Path: blob/master/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
9913 views
// Copyright 2014 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// Image transforms and color space conversion methods for lossless decoder.10//11// Author(s): Djordje Pesut ([email protected])12// Jovan Zelincevic ([email protected])1314#include "src/dsp/dsp.h"1516#if defined(WEBP_USE_MIPS_DSP_R2)1718#include "src/dsp/lossless.h"19#include "src/dsp/lossless_common.h"2021#define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \22static void FUNC_NAME(const TYPE* src, \23const uint32_t* const color_map, \24TYPE* dst, int y_start, int y_end, \25int width) { \26int y; \27for (y = y_start; y < y_end; ++y) { \28int x; \29for (x = 0; x < (width >> 2); ++x) { \30int tmp1, tmp2, tmp3, tmp4; \31__asm__ volatile ( \32".ifc " #TYPE ", uint8_t \n\t" \33"lbu %[tmp1], 0(%[src]) \n\t" \34"lbu %[tmp2], 1(%[src]) \n\t" \35"lbu %[tmp3], 2(%[src]) \n\t" \36"lbu %[tmp4], 3(%[src]) \n\t" \37"addiu %[src], %[src], 4 \n\t" \38".endif \n\t" \39".ifc " #TYPE ", uint32_t \n\t" \40"lw %[tmp1], 0(%[src]) \n\t" \41"lw %[tmp2], 4(%[src]) \n\t" \42"lw %[tmp3], 8(%[src]) \n\t" \43"lw %[tmp4], 12(%[src]) \n\t" \44"ext %[tmp1], %[tmp1], 8, 8 \n\t" \45"ext %[tmp2], %[tmp2], 8, 8 \n\t" \46"ext %[tmp3], %[tmp3], 8, 8 \n\t" \47"ext %[tmp4], %[tmp4], 8, 8 \n\t" \48"addiu %[src], %[src], 16 \n\t" \49".endif \n\t" \50"sll %[tmp1], %[tmp1], 2 \n\t" \51"sll %[tmp2], %[tmp2], 2 \n\t" \52"sll %[tmp3], %[tmp3], 2 \n\t" \53"sll %[tmp4], %[tmp4], 2 \n\t" \54"lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \55"lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \56"lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \57"lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \58".ifc " #TYPE ", uint8_t \n\t" \59"ext %[tmp1], %[tmp1], 8, 8 \n\t" \60"ext %[tmp2], %[tmp2], 8, 8 \n\t" \61"ext %[tmp3], %[tmp3], 8, 8 \n\t" \62"ext %[tmp4], %[tmp4], 8, 8 \n\t" \63"sb %[tmp1], 0(%[dst]) \n\t" \64"sb %[tmp2], 1(%[dst]) \n\t" \65"sb %[tmp3], 2(%[dst]) \n\t" \66"sb %[tmp4], 3(%[dst]) \n\t" \67"addiu %[dst], %[dst], 4 \n\t" \68".endif \n\t" \69".ifc " #TYPE ", uint32_t \n\t" \70"sw %[tmp1], 0(%[dst]) \n\t" \71"sw %[tmp2], 4(%[dst]) \n\t" \72"sw %[tmp3], 8(%[dst]) \n\t" \73"sw %[tmp4], 12(%[dst]) \n\t" \74"addiu %[dst], %[dst], 16 \n\t" \75".endif \n\t" \76: [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \77[tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \78: [color_map]"r"(color_map) \79: "memory" \80); \81} \82for (x = 0; x < (width & 3); ++x) { \83*dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \84} \85} \86}8788MAP_COLOR_FUNCS(MapARGB_MIPSdspR2, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)89MAP_COLOR_FUNCS(MapAlpha_MIPSdspR2, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)9091#undef MAP_COLOR_FUNCS9293static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,94uint32_t c2) {95int temp0, temp1, temp2, temp3, temp4, temp5;96__asm__ volatile (97"preceu.ph.qbr %[temp1], %[c0] \n\t"98"preceu.ph.qbl %[temp2], %[c0] \n\t"99"preceu.ph.qbr %[temp3], %[c1] \n\t"100"preceu.ph.qbl %[temp4], %[c1] \n\t"101"preceu.ph.qbr %[temp5], %[c2] \n\t"102"preceu.ph.qbl %[temp0], %[c2] \n\t"103"subq.ph %[temp3], %[temp3], %[temp5] \n\t"104"subq.ph %[temp4], %[temp4], %[temp0] \n\t"105"addq.ph %[temp1], %[temp1], %[temp3] \n\t"106"addq.ph %[temp2], %[temp2], %[temp4] \n\t"107"shll_s.ph %[temp1], %[temp1], 7 \n\t"108"shll_s.ph %[temp2], %[temp2], 7 \n\t"109"precrqu_s.qb.ph %[temp2], %[temp2], %[temp1] \n\t"110: [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),111[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5)112: [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)113: "memory"114);115return temp2;116}117118static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,119uint32_t c2) {120int temp0, temp1, temp2, temp3, temp4, temp5;121__asm__ volatile (122"adduh.qb %[temp5], %[c0], %[c1] \n\t"123"preceu.ph.qbr %[temp3], %[c2] \n\t"124"preceu.ph.qbr %[temp1], %[temp5] \n\t"125"preceu.ph.qbl %[temp2], %[temp5] \n\t"126"preceu.ph.qbl %[temp4], %[c2] \n\t"127"subq.ph %[temp3], %[temp1], %[temp3] \n\t"128"subq.ph %[temp4], %[temp2], %[temp4] \n\t"129"shrl.ph %[temp5], %[temp3], 15 \n\t"130"shrl.ph %[temp0], %[temp4], 15 \n\t"131"addq.ph %[temp3], %[temp3], %[temp5] \n\t"132"addq.ph %[temp4], %[temp0], %[temp4] \n\t"133"shra.ph %[temp3], %[temp3], 1 \n\t"134"shra.ph %[temp4], %[temp4], 1 \n\t"135"addq.ph %[temp1], %[temp1], %[temp3] \n\t"136"addq.ph %[temp2], %[temp2], %[temp4] \n\t"137"shll_s.ph %[temp1], %[temp1], 7 \n\t"138"shll_s.ph %[temp2], %[temp2], 7 \n\t"139"precrqu_s.qb.ph %[temp1], %[temp2], %[temp1] \n\t"140: [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),141[temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5)142: [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)143: "memory"144);145return temp1;146}147148static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {149int temp0, temp1, temp2, temp3, temp4, temp5;150__asm__ volatile (151"cmpgdu.lt.qb %[temp1], %[c], %[b] \n\t"152"pick.qb %[temp1], %[b], %[c] \n\t"153"pick.qb %[temp2], %[c], %[b] \n\t"154"cmpgdu.lt.qb %[temp4], %[c], %[a] \n\t"155"pick.qb %[temp4], %[a], %[c] \n\t"156"pick.qb %[temp5], %[c], %[a] \n\t"157"subu.qb %[temp3], %[temp1], %[temp2] \n\t"158"subu.qb %[temp0], %[temp4], %[temp5] \n\t"159"raddu.w.qb %[temp3], %[temp3] \n\t"160"raddu.w.qb %[temp0], %[temp0] \n\t"161"subu %[temp3], %[temp3], %[temp0] \n\t"162"slti %[temp0], %[temp3], 0x1 \n\t"163"movz %[a], %[b], %[temp0] \n\t"164: [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),165[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0),166[a]"+&r"(a)167: [b]"r"(b), [c]"r"(c)168);169return a;170}171172static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {173__asm__ volatile (174"adduh.qb %[a0], %[a0], %[a1] \n\t"175: [a0]"+r"(a0)176: [a1]"r"(a1)177);178return a0;179}180181static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {182return Average2(Average2(a0, a2), a1);183}184185static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,186uint32_t a2, uint32_t a3) {187return Average2(Average2(a0, a1), Average2(a2, a3));188}189190static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,191const uint32_t* const top) {192return Average3(*left, top[0], top[1]);193}194195static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,196const uint32_t* const top) {197return Average2(*left, top[-1]);198}199200static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,201const uint32_t* const top) {202return Average2(*left, top[0]);203}204205static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,206const uint32_t* const top) {207(void)left;208return Average2(top[-1], top[0]);209}210211static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,212const uint32_t* const top) {213(void)left;214return Average2(top[0], top[1]);215}216217static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,218const uint32_t* const top) {219return Average4(*left, top[-1], top[0], top[1]);220}221222static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,223const uint32_t* const top) {224return Select(top[0], *left, top[-1]);225}226227static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,228const uint32_t* const top) {229return ClampedAddSubtractFull(*left, top[0], top[-1]);230}231232static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,233const uint32_t* const top) {234return ClampedAddSubtractHalf(*left, top[0], top[-1]);235}236237// Add green to blue and red channels (i.e. perform the inverse transform of238// 'subtract green').239static void AddGreenToBlueAndRed_MIPSdspR2(const uint32_t* src, int num_pixels,240uint32_t* dst) {241uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;242const uint32_t* const p_loop1_end = src + (num_pixels & ~3);243const uint32_t* const p_loop2_end = src + num_pixels;244__asm__ volatile (245".set push \n\t"246".set noreorder \n\t"247"beq %[src], %[p_loop1_end], 3f \n\t"248" nop \n\t"249"0: \n\t"250"lw %[temp0], 0(%[src]) \n\t"251"lw %[temp1], 4(%[src]) \n\t"252"lw %[temp2], 8(%[src]) \n\t"253"lw %[temp3], 12(%[src]) \n\t"254"ext %[temp4], %[temp0], 8, 8 \n\t"255"ext %[temp5], %[temp1], 8, 8 \n\t"256"ext %[temp6], %[temp2], 8, 8 \n\t"257"ext %[temp7], %[temp3], 8, 8 \n\t"258"addiu %[src], %[src], 16 \n\t"259"addiu %[dst], %[dst], 16 \n\t"260"replv.ph %[temp4], %[temp4] \n\t"261"replv.ph %[temp5], %[temp5] \n\t"262"replv.ph %[temp6], %[temp6] \n\t"263"replv.ph %[temp7], %[temp7] \n\t"264"addu.qb %[temp0], %[temp0], %[temp4] \n\t"265"addu.qb %[temp1], %[temp1], %[temp5] \n\t"266"addu.qb %[temp2], %[temp2], %[temp6] \n\t"267"addu.qb %[temp3], %[temp3], %[temp7] \n\t"268"sw %[temp0], -16(%[dst]) \n\t"269"sw %[temp1], -12(%[dst]) \n\t"270"sw %[temp2], -8(%[dst]) \n\t"271"bne %[src], %[p_loop1_end], 0b \n\t"272" sw %[temp3], -4(%[dst]) \n\t"273"3: \n\t"274"beq %[src], %[p_loop2_end], 2f \n\t"275" nop \n\t"276"1: \n\t"277"lw %[temp0], 0(%[src]) \n\t"278"addiu %[src], %[src], 4 \n\t"279"addiu %[dst], %[dst], 4 \n\t"280"ext %[temp4], %[temp0], 8, 8 \n\t"281"replv.ph %[temp4], %[temp4] \n\t"282"addu.qb %[temp0], %[temp0], %[temp4] \n\t"283"bne %[src], %[p_loop2_end], 1b \n\t"284" sw %[temp0], -4(%[dst]) \n\t"285"2: \n\t"286".set pop \n\t"287: [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),288[temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),289[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),290[temp7]"=&r"(temp7)291: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)292: "memory"293);294}295296static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m,297const uint32_t* src, int num_pixels,298uint32_t* dst) {299int temp0, temp1, temp2, temp3, temp4, temp5;300uint32_t argb, argb1, new_red;301const uint32_t G_to_R = m->green_to_red_;302const uint32_t G_to_B = m->green_to_blue_;303const uint32_t R_to_B = m->red_to_blue_;304const uint32_t* const p_loop_end = src + (num_pixels & ~1);305__asm__ volatile (306".set push \n\t"307".set noreorder \n\t"308"beq %[src], %[p_loop_end], 1f \n\t"309" nop \n\t"310"replv.ph %[temp0], %[G_to_R] \n\t"311"replv.ph %[temp1], %[G_to_B] \n\t"312"replv.ph %[temp2], %[R_to_B] \n\t"313"shll.ph %[temp0], %[temp0], 8 \n\t"314"shll.ph %[temp1], %[temp1], 8 \n\t"315"shll.ph %[temp2], %[temp2], 8 \n\t"316"shra.ph %[temp0], %[temp0], 8 \n\t"317"shra.ph %[temp1], %[temp1], 8 \n\t"318"shra.ph %[temp2], %[temp2], 8 \n\t"319"0: \n\t"320"lw %[argb], 0(%[src]) \n\t"321"lw %[argb1], 4(%[src]) \n\t"322"sw %[argb], 0(%[dst]) \n\t"323"sw %[argb1], 4(%[dst]) \n\t"324"addiu %[src], %[src], 8 \n\t"325"addiu %[dst], %[dst], 8 \n\t"326"precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"327"preceu.ph.qbra %[temp3], %[temp3] \n\t"328"shll.ph %[temp3], %[temp3], 8 \n\t"329"shra.ph %[temp3], %[temp3], 8 \n\t"330"mul.ph %[temp5], %[temp3], %[temp0] \n\t"331"mul.ph %[temp3], %[temp3], %[temp1] \n\t"332"precrq.ph.w %[new_red], %[argb], %[argb1] \n\t"333"ins %[argb1], %[argb], 16, 16 \n\t"334"shra.ph %[temp5], %[temp5], 5 \n\t"335"shra.ph %[temp3], %[temp3], 5 \n\t"336"addu.ph %[new_red], %[new_red], %[temp5] \n\t"337"addu.ph %[argb1], %[argb1], %[temp3] \n\t"338"preceu.ph.qbra %[temp5], %[new_red] \n\t"339"shll.ph %[temp4], %[temp5], 8 \n\t"340"shra.ph %[temp4], %[temp4], 8 \n\t"341"mul.ph %[temp4], %[temp4], %[temp2] \n\t"342"sb %[temp5], -2(%[dst]) \n\t"343"sra %[temp5], %[temp5], 16 \n\t"344"shra.ph %[temp4], %[temp4], 5 \n\t"345"addu.ph %[argb1], %[argb1], %[temp4] \n\t"346"preceu.ph.qbra %[temp3], %[argb1] \n\t"347"sb %[temp5], -6(%[dst]) \n\t"348"sb %[temp3], -4(%[dst]) \n\t"349"sra %[temp3], %[temp3], 16 \n\t"350"bne %[src], %[p_loop_end], 0b \n\t"351" sb %[temp3], -8(%[dst]) \n\t"352"1: \n\t"353".set pop \n\t"354: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),355[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),356[new_red]"=&r"(new_red), [argb]"=&r"(argb),357[argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)358: [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),359[G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)360: "memory", "hi", "lo"361);362363// Fall-back to C-version for left-overs.364if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);365}366367static void ConvertBGRAToRGB_MIPSdspR2(const uint32_t* src,368int num_pixels, uint8_t* dst) {369int temp0, temp1, temp2, temp3;370const uint32_t* const p_loop1_end = src + (num_pixels & ~3);371const uint32_t* const p_loop2_end = src + num_pixels;372__asm__ volatile (373".set push \n\t"374".set noreorder \n\t"375"beq %[src], %[p_loop1_end], 3f \n\t"376" nop \n\t"377"0: \n\t"378"lw %[temp3], 12(%[src]) \n\t"379"lw %[temp2], 8(%[src]) \n\t"380"lw %[temp1], 4(%[src]) \n\t"381"lw %[temp0], 0(%[src]) \n\t"382"ins %[temp3], %[temp2], 24, 8 \n\t"383"sll %[temp2], %[temp2], 8 \n\t"384"rotr %[temp3], %[temp3], 16 \n\t"385"ins %[temp2], %[temp1], 0, 16 \n\t"386"sll %[temp1], %[temp1], 8 \n\t"387"wsbh %[temp3], %[temp3] \n\t"388"balign %[temp0], %[temp1], 1 \n\t"389"wsbh %[temp2], %[temp2] \n\t"390"wsbh %[temp0], %[temp0] \n\t"391"usw %[temp3], 8(%[dst]) \n\t"392"rotr %[temp0], %[temp0], 16 \n\t"393"usw %[temp2], 4(%[dst]) \n\t"394"addiu %[src], %[src], 16 \n\t"395"usw %[temp0], 0(%[dst]) \n\t"396"bne %[src], %[p_loop1_end], 0b \n\t"397" addiu %[dst], %[dst], 12 \n\t"398"3: \n\t"399"beq %[src], %[p_loop2_end], 2f \n\t"400" nop \n\t"401"1: \n\t"402"lw %[temp0], 0(%[src]) \n\t"403"addiu %[src], %[src], 4 \n\t"404"wsbh %[temp1], %[temp0] \n\t"405"addiu %[dst], %[dst], 3 \n\t"406"ush %[temp1], -2(%[dst]) \n\t"407"sra %[temp0], %[temp0], 16 \n\t"408"bne %[src], %[p_loop2_end], 1b \n\t"409" sb %[temp0], -3(%[dst]) \n\t"410"2: \n\t"411".set pop \n\t"412: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),413[temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)414: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)415: "memory"416);417}418419static void ConvertBGRAToRGBA_MIPSdspR2(const uint32_t* src,420int num_pixels, uint8_t* dst) {421int temp0, temp1, temp2, temp3;422const uint32_t* const p_loop1_end = src + (num_pixels & ~3);423const uint32_t* const p_loop2_end = src + num_pixels;424__asm__ volatile (425".set push \n\t"426".set noreorder \n\t"427"beq %[src], %[p_loop1_end], 3f \n\t"428" nop \n\t"429"0: \n\t"430"lw %[temp0], 0(%[src]) \n\t"431"lw %[temp1], 4(%[src]) \n\t"432"lw %[temp2], 8(%[src]) \n\t"433"lw %[temp3], 12(%[src]) \n\t"434"wsbh %[temp0], %[temp0] \n\t"435"wsbh %[temp1], %[temp1] \n\t"436"wsbh %[temp2], %[temp2] \n\t"437"wsbh %[temp3], %[temp3] \n\t"438"addiu %[src], %[src], 16 \n\t"439"balign %[temp0], %[temp0], 1 \n\t"440"balign %[temp1], %[temp1], 1 \n\t"441"balign %[temp2], %[temp2], 1 \n\t"442"balign %[temp3], %[temp3], 1 \n\t"443"usw %[temp0], 0(%[dst]) \n\t"444"usw %[temp1], 4(%[dst]) \n\t"445"usw %[temp2], 8(%[dst]) \n\t"446"usw %[temp3], 12(%[dst]) \n\t"447"bne %[src], %[p_loop1_end], 0b \n\t"448" addiu %[dst], %[dst], 16 \n\t"449"3: \n\t"450"beq %[src], %[p_loop2_end], 2f \n\t"451" nop \n\t"452"1: \n\t"453"lw %[temp0], 0(%[src]) \n\t"454"wsbh %[temp0], %[temp0] \n\t"455"addiu %[src], %[src], 4 \n\t"456"balign %[temp0], %[temp0], 1 \n\t"457"usw %[temp0], 0(%[dst]) \n\t"458"bne %[src], %[p_loop2_end], 1b \n\t"459" addiu %[dst], %[dst], 4 \n\t"460"2: \n\t"461".set pop \n\t"462: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),463[temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)464: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)465: "memory"466);467}468469static void ConvertBGRAToRGBA4444_MIPSdspR2(const uint32_t* src,470int num_pixels, uint8_t* dst) {471int temp0, temp1, temp2, temp3, temp4, temp5;472const uint32_t* const p_loop1_end = src + (num_pixels & ~3);473const uint32_t* const p_loop2_end = src + num_pixels;474__asm__ volatile (475".set push \n\t"476".set noreorder \n\t"477"beq %[src], %[p_loop1_end], 3f \n\t"478" nop \n\t"479"0: \n\t"480"lw %[temp0], 0(%[src]) \n\t"481"lw %[temp1], 4(%[src]) \n\t"482"lw %[temp2], 8(%[src]) \n\t"483"lw %[temp3], 12(%[src]) \n\t"484"ext %[temp4], %[temp0], 28, 4 \n\t"485"ext %[temp5], %[temp0], 12, 4 \n\t"486"ins %[temp0], %[temp4], 0, 4 \n\t"487"ext %[temp4], %[temp1], 28, 4 \n\t"488"ins %[temp0], %[temp5], 16, 4 \n\t"489"ext %[temp5], %[temp1], 12, 4 \n\t"490"ins %[temp1], %[temp4], 0, 4 \n\t"491"ext %[temp4], %[temp2], 28, 4 \n\t"492"ins %[temp1], %[temp5], 16, 4 \n\t"493"ext %[temp5], %[temp2], 12, 4 \n\t"494"ins %[temp2], %[temp4], 0, 4 \n\t"495"ext %[temp4], %[temp3], 28, 4 \n\t"496"ins %[temp2], %[temp5], 16, 4 \n\t"497"ext %[temp5], %[temp3], 12, 4 \n\t"498"ins %[temp3], %[temp4], 0, 4 \n\t"499"precr.qb.ph %[temp1], %[temp1], %[temp0] \n\t"500"ins %[temp3], %[temp5], 16, 4 \n\t"501"addiu %[src], %[src], 16 \n\t"502"precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t"503#if (WEBP_SWAP_16BIT_CSP == 1)504"usw %[temp1], 0(%[dst]) \n\t"505"usw %[temp3], 4(%[dst]) \n\t"506#else507"wsbh %[temp1], %[temp1] \n\t"508"wsbh %[temp3], %[temp3] \n\t"509"usw %[temp1], 0(%[dst]) \n\t"510"usw %[temp3], 4(%[dst]) \n\t"511#endif512"bne %[src], %[p_loop1_end], 0b \n\t"513" addiu %[dst], %[dst], 8 \n\t"514"3: \n\t"515"beq %[src], %[p_loop2_end], 2f \n\t"516" nop \n\t"517"1: \n\t"518"lw %[temp0], 0(%[src]) \n\t"519"ext %[temp4], %[temp0], 28, 4 \n\t"520"ext %[temp5], %[temp0], 12, 4 \n\t"521"ins %[temp0], %[temp4], 0, 4 \n\t"522"ins %[temp0], %[temp5], 16, 4 \n\t"523"addiu %[src], %[src], 4 \n\t"524"precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t"525#if (WEBP_SWAP_16BIT_CSP == 1)526"ush %[temp0], 0(%[dst]) \n\t"527#else528"wsbh %[temp0], %[temp0] \n\t"529"ush %[temp0], 0(%[dst]) \n\t"530#endif531"bne %[src], %[p_loop2_end], 1b \n\t"532" addiu %[dst], %[dst], 2 \n\t"533"2: \n\t"534".set pop \n\t"535: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),536[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),537[dst]"+&r"(dst), [src]"+&r"(src)538: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)539: "memory"540);541}542543static void ConvertBGRAToRGB565_MIPSdspR2(const uint32_t* src,544int num_pixels, uint8_t* dst) {545int temp0, temp1, temp2, temp3, temp4, temp5;546const uint32_t* const p_loop1_end = src + (num_pixels & ~3);547const uint32_t* const p_loop2_end = src + num_pixels;548__asm__ volatile (549".set push \n\t"550".set noreorder \n\t"551"beq %[src], %[p_loop1_end], 3f \n\t"552" nop \n\t"553"0: \n\t"554"lw %[temp0], 0(%[src]) \n\t"555"lw %[temp1], 4(%[src]) \n\t"556"lw %[temp2], 8(%[src]) \n\t"557"lw %[temp3], 12(%[src]) \n\t"558"ext %[temp4], %[temp0], 8, 16 \n\t"559"ext %[temp5], %[temp0], 5, 11 \n\t"560"ext %[temp0], %[temp0], 3, 5 \n\t"561"ins %[temp4], %[temp5], 0, 11 \n\t"562"ext %[temp5], %[temp1], 5, 11 \n\t"563"ins %[temp4], %[temp0], 0, 5 \n\t"564"ext %[temp0], %[temp1], 8, 16 \n\t"565"ext %[temp1], %[temp1], 3, 5 \n\t"566"ins %[temp0], %[temp5], 0, 11 \n\t"567"ext %[temp5], %[temp2], 5, 11 \n\t"568"ins %[temp0], %[temp1], 0, 5 \n\t"569"ext %[temp1], %[temp2], 8, 16 \n\t"570"ext %[temp2], %[temp2], 3, 5 \n\t"571"ins %[temp1], %[temp5], 0, 11 \n\t"572"ext %[temp5], %[temp3], 5, 11 \n\t"573"ins %[temp1], %[temp2], 0, 5 \n\t"574"ext %[temp2], %[temp3], 8, 16 \n\t"575"ext %[temp3], %[temp3], 3, 5 \n\t"576"ins %[temp2], %[temp5], 0, 11 \n\t"577"append %[temp0], %[temp4], 16 \n\t"578"ins %[temp2], %[temp3], 0, 5 \n\t"579"addiu %[src], %[src], 16 \n\t"580"append %[temp2], %[temp1], 16 \n\t"581#if (WEBP_SWAP_16BIT_CSP == 1)582"usw %[temp0], 0(%[dst]) \n\t"583"usw %[temp2], 4(%[dst]) \n\t"584#else585"wsbh %[temp0], %[temp0] \n\t"586"wsbh %[temp2], %[temp2] \n\t"587"usw %[temp0], 0(%[dst]) \n\t"588"usw %[temp2], 4(%[dst]) \n\t"589#endif590"bne %[src], %[p_loop1_end], 0b \n\t"591" addiu %[dst], %[dst], 8 \n\t"592"3: \n\t"593"beq %[src], %[p_loop2_end], 2f \n\t"594" nop \n\t"595"1: \n\t"596"lw %[temp0], 0(%[src]) \n\t"597"ext %[temp4], %[temp0], 8, 16 \n\t"598"ext %[temp5], %[temp0], 5, 11 \n\t"599"ext %[temp0], %[temp0], 3, 5 \n\t"600"ins %[temp4], %[temp5], 0, 11 \n\t"601"addiu %[src], %[src], 4 \n\t"602"ins %[temp4], %[temp0], 0, 5 \n\t"603#if (WEBP_SWAP_16BIT_CSP == 1)604"ush %[temp4], 0(%[dst]) \n\t"605#else606"wsbh %[temp4], %[temp4] \n\t"607"ush %[temp4], 0(%[dst]) \n\t"608#endif609"bne %[src], %[p_loop2_end], 1b \n\t"610" addiu %[dst], %[dst], 2 \n\t"611"2: \n\t"612".set pop \n\t"613: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),614[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),615[dst]"+&r"(dst), [src]"+&r"(src)616: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)617: "memory"618);619}620621static void ConvertBGRAToBGR_MIPSdspR2(const uint32_t* src,622int num_pixels, uint8_t* dst) {623int temp0, temp1, temp2, temp3;624const uint32_t* const p_loop1_end = src + (num_pixels & ~3);625const uint32_t* const p_loop2_end = src + num_pixels;626__asm__ volatile (627".set push \n\t"628".set noreorder \n\t"629"beq %[src], %[p_loop1_end], 3f \n\t"630" nop \n\t"631"0: \n\t"632"lw %[temp0], 0(%[src]) \n\t"633"lw %[temp1], 4(%[src]) \n\t"634"lw %[temp2], 8(%[src]) \n\t"635"lw %[temp3], 12(%[src]) \n\t"636"ins %[temp0], %[temp1], 24, 8 \n\t"637"sra %[temp1], %[temp1], 8 \n\t"638"ins %[temp1], %[temp2], 16, 16 \n\t"639"sll %[temp2], %[temp2], 8 \n\t"640"balign %[temp3], %[temp2], 1 \n\t"641"addiu %[src], %[src], 16 \n\t"642"usw %[temp0], 0(%[dst]) \n\t"643"usw %[temp1], 4(%[dst]) \n\t"644"usw %[temp3], 8(%[dst]) \n\t"645"bne %[src], %[p_loop1_end], 0b \n\t"646" addiu %[dst], %[dst], 12 \n\t"647"3: \n\t"648"beq %[src], %[p_loop2_end], 2f \n\t"649" nop \n\t"650"1: \n\t"651"lw %[temp0], 0(%[src]) \n\t"652"addiu %[src], %[src], 4 \n\t"653"addiu %[dst], %[dst], 3 \n\t"654"ush %[temp0], -3(%[dst]) \n\t"655"sra %[temp0], %[temp0], 16 \n\t"656"bne %[src], %[p_loop2_end], 1b \n\t"657" sb %[temp0], -1(%[dst]) \n\t"658"2: \n\t"659".set pop \n\t"660: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),661[temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)662: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)663: "memory"664);665}666667//------------------------------------------------------------------------------668// Entry point669670extern void VP8LDspInitMIPSdspR2(void);671672WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {673VP8LMapColor32b = MapARGB_MIPSdspR2;674VP8LMapColor8b = MapAlpha_MIPSdspR2;675676VP8LPredictors[5] = Predictor5_MIPSdspR2;677VP8LPredictors[6] = Predictor6_MIPSdspR2;678VP8LPredictors[7] = Predictor7_MIPSdspR2;679VP8LPredictors[8] = Predictor8_MIPSdspR2;680VP8LPredictors[9] = Predictor9_MIPSdspR2;681VP8LPredictors[10] = Predictor10_MIPSdspR2;682VP8LPredictors[11] = Predictor11_MIPSdspR2;683VP8LPredictors[12] = Predictor12_MIPSdspR2;684VP8LPredictors[13] = Predictor13_MIPSdspR2;685686VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_MIPSdspR2;687VP8LTransformColorInverse = TransformColorInverse_MIPSdspR2;688689VP8LConvertBGRAToRGB = ConvertBGRAToRGB_MIPSdspR2;690VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_MIPSdspR2;691VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_MIPSdspR2;692VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_MIPSdspR2;693VP8LConvertBGRAToBGR = ConvertBGRAToBGR_MIPSdspR2;694}695696#else // !WEBP_USE_MIPS_DSP_R2697698WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)699700#endif // WEBP_USE_MIPS_DSP_R2701702703