Path: blob/master/thirdparty/libwebp/src/dsp/dec_mips32.c
9913 views
// Copyright 2014 Google Inc. All Rights Reserved.1//2// Use of this source code is governed by a BSD-style license3// that can be found in the COPYING file in the root of the source4// tree. An additional intellectual property rights grant can be found5// in the file PATENTS. All contributing project authors may6// be found in the AUTHORS file in the root of the source tree.7// -----------------------------------------------------------------------------8//9// MIPS version of dsp functions10//11// Author(s): Djordje Pesut ([email protected])12// Jovan Zelincevic ([email protected])1314#include "src/dsp/dsp.h"1516#if defined(WEBP_USE_MIPS32)1718#include "src/dsp/mips_macro.h"1920static const int kC1 = WEBP_TRANSFORM_AC3_C1;21static const int kC2 = WEBP_TRANSFORM_AC3_C2;2223static WEBP_INLINE int abs_mips32(int x) {24const int sign = x >> 31;25return (x ^ sign) - sign;26}2728// 4 pixels in, 2 pixels out29static WEBP_INLINE void do_filter2(uint8_t* p, int step) {30const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];31const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];32const int a1 = VP8ksclip2[(a + 4) >> 3];33const int a2 = VP8ksclip2[(a + 3) >> 3];34p[-step] = VP8kclip1[p0 + a2];35p[ 0] = VP8kclip1[q0 - a1];36}3738// 4 pixels in, 4 pixels out39static WEBP_INLINE void do_filter4(uint8_t* p, int step) {40const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];41const int a = 3 * (q0 - p0);42const int a1 = VP8ksclip2[(a + 4) >> 3];43const int a2 = VP8ksclip2[(a + 3) >> 3];44const int a3 = (a1 + 1) >> 1;45p[-2 * step] = VP8kclip1[p1 + a3];46p[- step] = VP8kclip1[p0 + a2];47p[ 0] = VP8kclip1[q0 - a1];48p[ step] = VP8kclip1[q1 - a3];49}5051// 6 pixels in, 6 pixels out52static WEBP_INLINE void do_filter6(uint8_t* p, int step) {53const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];54const int q0 = p[0], q1 = p[step], q2 = p[2 * step];55const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];56// a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]57const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 758const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 759const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 760p[-3 * step] = VP8kclip1[p2 + a3];61p[-2 * step] = VP8kclip1[p1 + a2];62p[- step] = VP8kclip1[p0 + a1];63p[ 0] = VP8kclip1[q0 - a1];64p[ step] = VP8kclip1[q1 - a2];65p[ 2 * step] = VP8kclip1[q2 - a3];66}6768static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {69const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];70return (abs_mips32(p1 - p0) > thresh) || (abs_mips32(q1 - q0) > thresh);71}7273static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {74const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];75return ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) <= t);76}7778static WEBP_INLINE int needs_filter2(const uint8_t* p,79int step, int t, int it) {80const int p3 = p[-4 * step], p2 = p[-3 * step];81const int p1 = p[-2 * step], p0 = p[-step];82const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];83if ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) > t) {84return 0;85}86return abs_mips32(p3 - p2) <= it && abs_mips32(p2 - p1) <= it &&87abs_mips32(p1 - p0) <= it && abs_mips32(q3 - q2) <= it &&88abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;89}9091static WEBP_INLINE void FilterLoop26(uint8_t* p,92int hstride, int vstride, int size,93int thresh, int ithresh, int hev_thresh) {94const int thresh2 = 2 * thresh + 1;95while (size-- > 0) {96if (needs_filter2(p, hstride, thresh2, ithresh)) {97if (hev(p, hstride, hev_thresh)) {98do_filter2(p, hstride);99} else {100do_filter6(p, hstride);101}102}103p += vstride;104}105}106107static WEBP_INLINE void FilterLoop24(uint8_t* p,108int hstride, int vstride, int size,109int thresh, int ithresh, int hev_thresh) {110const int thresh2 = 2 * thresh + 1;111while (size-- > 0) {112if (needs_filter2(p, hstride, thresh2, ithresh)) {113if (hev(p, hstride, hev_thresh)) {114do_filter2(p, hstride);115} else {116do_filter4(p, hstride);117}118}119p += vstride;120}121}122123// on macroblock edges124static void VFilter16(uint8_t* p, int stride,125int thresh, int ithresh, int hev_thresh) {126FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);127}128129static void HFilter16(uint8_t* p, int stride,130int thresh, int ithresh, int hev_thresh) {131FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);132}133134// 8-pixels wide variant, for chroma filtering135static void VFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,136int stride, int thresh, int ithresh, int hev_thresh) {137FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);138FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);139}140141static void HFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,142int stride, int thresh, int ithresh, int hev_thresh) {143FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);144FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);145}146147static void VFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,148int stride, int thresh, int ithresh, int hev_thresh) {149FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);150FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);151}152153static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,154int stride, int thresh, int ithresh, int hev_thresh) {155FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);156FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);157}158159// on three inner edges160static void VFilter16i(uint8_t* p, int stride,161int thresh, int ithresh, int hev_thresh) {162int k;163for (k = 3; k > 0; --k) {164p += 4 * stride;165FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);166}167}168169static void HFilter16i(uint8_t* p, int stride,170int thresh, int ithresh, int hev_thresh) {171int k;172for (k = 3; k > 0; --k) {173p += 4;174FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);175}176}177178//------------------------------------------------------------------------------179// Simple In-loop filtering (Paragraph 15.2)180181static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {182int i;183const int thresh2 = 2 * thresh + 1;184for (i = 0; i < 16; ++i) {185if (needs_filter(p + i, stride, thresh2)) {186do_filter2(p + i, stride);187}188}189}190191static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {192int i;193const int thresh2 = 2 * thresh + 1;194for (i = 0; i < 16; ++i) {195if (needs_filter(p + i * stride, 1, thresh2)) {196do_filter2(p + i * stride, 1);197}198}199}200201static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {202int k;203for (k = 3; k > 0; --k) {204p += 4 * stride;205SimpleVFilter16(p, stride, thresh);206}207}208209static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {210int k;211for (k = 3; k > 0; --k) {212p += 4;213SimpleHFilter16(p, stride, thresh);214}215}216217static void TransformOne(const int16_t* WEBP_RESTRICT in,218uint8_t* WEBP_RESTRICT dst) {219int temp0, temp1, temp2, temp3, temp4;220int temp5, temp6, temp7, temp8, temp9;221int temp10, temp11, temp12, temp13, temp14;222int temp15, temp16, temp17, temp18, temp19;223int16_t* p_in = (int16_t*)in;224225// loops unrolled and merged to avoid usage of tmp buffer226// and to reduce number of stalls. MUL macro is written227// in assembler and inlined228__asm__ volatile(229"lh %[temp0], 0(%[in]) \n\t"230"lh %[temp8], 16(%[in]) \n\t"231"lh %[temp4], 8(%[in]) \n\t"232"lh %[temp12], 24(%[in]) \n\t"233"addu %[temp16], %[temp0], %[temp8] \n\t"234"subu %[temp0], %[temp0], %[temp8] \n\t"235"mul %[temp8], %[temp4], %[kC2] \n\t"236MUL_SHIFT_C1(temp17, temp12)237MUL_SHIFT_C1_IO(temp4, temp19)238"mul %[temp12], %[temp12], %[kC2] \n\t"239"lh %[temp1], 2(%[in]) \n\t"240"lh %[temp5], 10(%[in]) \n\t"241"lh %[temp9], 18(%[in]) \n\t"242"lh %[temp13], 26(%[in]) \n\t"243"sra %[temp8], %[temp8], 16 \n\t"244"sra %[temp12], %[temp12], 16 \n\t"245"lh %[temp2], 4(%[in]) \n\t"246"lh %[temp6], 12(%[in]) \n\t"247"lh %[temp10], 20(%[in]) \n\t"248"lh %[temp14], 28(%[in]) \n\t"249"subu %[temp17], %[temp8], %[temp17] \n\t"250"addu %[temp4], %[temp4], %[temp12] \n\t"251"addu %[temp8], %[temp16], %[temp4] \n\t"252"subu %[temp4], %[temp16], %[temp4] \n\t"253"addu %[temp16], %[temp1], %[temp9] \n\t"254"subu %[temp1], %[temp1], %[temp9] \n\t"255"lh %[temp3], 6(%[in]) \n\t"256"lh %[temp7], 14(%[in]) \n\t"257"lh %[temp11], 22(%[in]) \n\t"258"lh %[temp15], 30(%[in]) \n\t"259"addu %[temp12], %[temp0], %[temp17] \n\t"260"subu %[temp0], %[temp0], %[temp17] \n\t"261"mul %[temp9], %[temp5], %[kC2] \n\t"262MUL_SHIFT_C1(temp17, temp13)263MUL_SHIFT_C1_IO(temp5, temp19)264"mul %[temp13], %[temp13], %[kC2] \n\t"265"sra %[temp9], %[temp9], 16 \n\t"266"subu %[temp17], %[temp9], %[temp17] \n\t"267"sra %[temp13], %[temp13], 16 \n\t"268"addu %[temp5], %[temp5], %[temp13] \n\t"269"addu %[temp13], %[temp1], %[temp17] \n\t"270"subu %[temp1], %[temp1], %[temp17] \n\t"271MUL_SHIFT_C1(temp17, temp14)272"mul %[temp14], %[temp14], %[kC2] \n\t"273"addu %[temp9], %[temp16], %[temp5] \n\t"274"subu %[temp5], %[temp16], %[temp5] \n\t"275"addu %[temp16], %[temp2], %[temp10] \n\t"276"subu %[temp2], %[temp2], %[temp10] \n\t"277"mul %[temp10], %[temp6], %[kC2] \n\t"278MUL_SHIFT_C1_IO(temp6, temp19)279"sra %[temp14], %[temp14], 16 \n\t"280"sra %[temp10], %[temp10], 16 \n\t"281"subu %[temp17], %[temp10], %[temp17] \n\t"282"addu %[temp6], %[temp6], %[temp14] \n\t"283"addu %[temp10], %[temp16], %[temp6] \n\t"284"subu %[temp6], %[temp16], %[temp6] \n\t"285"addu %[temp14], %[temp2], %[temp17] \n\t"286"subu %[temp2], %[temp2], %[temp17] \n\t"287MUL_SHIFT_C1(temp17, temp15)288"mul %[temp15], %[temp15], %[kC2] \n\t"289"addu %[temp16], %[temp3], %[temp11] \n\t"290"subu %[temp3], %[temp3], %[temp11] \n\t"291"mul %[temp11], %[temp7], %[kC2] \n\t"292MUL_SHIFT_C1_IO(temp7, temp19)293"addiu %[temp8], %[temp8], 4 \n\t"294"addiu %[temp12], %[temp12], 4 \n\t"295"addiu %[temp0], %[temp0], 4 \n\t"296"addiu %[temp4], %[temp4], 4 \n\t"297"sra %[temp15], %[temp15], 16 \n\t"298"sra %[temp11], %[temp11], 16 \n\t"299"subu %[temp17], %[temp11], %[temp17] \n\t"300"addu %[temp7], %[temp7], %[temp15] \n\t"301"addu %[temp15], %[temp3], %[temp17] \n\t"302"subu %[temp3], %[temp3], %[temp17] \n\t"303"addu %[temp11], %[temp16], %[temp7] \n\t"304"subu %[temp7], %[temp16], %[temp7] \n\t"305"addu %[temp16], %[temp8], %[temp10] \n\t"306"subu %[temp8], %[temp8], %[temp10] \n\t"307"mul %[temp10], %[temp9], %[kC2] \n\t"308MUL_SHIFT_C1(temp17, temp11)309MUL_SHIFT_C1_IO(temp9, temp19)310"mul %[temp11], %[temp11], %[kC2] \n\t"311"sra %[temp10], %[temp10], 16 \n\t"312"sra %[temp11], %[temp11], 16 \n\t"313"subu %[temp17], %[temp10], %[temp17] \n\t"314"addu %[temp11], %[temp9], %[temp11] \n\t"315"addu %[temp10], %[temp12], %[temp14] \n\t"316"subu %[temp12], %[temp12], %[temp14] \n\t"317"mul %[temp14], %[temp13], %[kC2] \n\t"318MUL_SHIFT_C1(temp9, temp15)319MUL_SHIFT_C1_IO(temp13, temp19)320"mul %[temp15], %[temp15], %[kC2] \n\t"321"sra %[temp14], %[temp14], 16 \n\t"322"sra %[temp15], %[temp15], 16 \n\t"323"subu %[temp9], %[temp14], %[temp9] \n\t"324"addu %[temp15], %[temp13], %[temp15] \n\t"325"addu %[temp14], %[temp0], %[temp2] \n\t"326"subu %[temp0], %[temp0], %[temp2] \n\t"327"mul %[temp2], %[temp1], %[kC2] \n\t"328MUL_SHIFT_C1(temp13, temp3)329MUL_SHIFT_C1_IO(temp1, temp19)330"mul %[temp3], %[temp3], %[kC2] \n\t"331"sra %[temp2], %[temp2], 16 \n\t"332"sra %[temp3], %[temp3], 16 \n\t"333"subu %[temp13], %[temp2], %[temp13] \n\t"334"addu %[temp3], %[temp1], %[temp3] \n\t"335"addu %[temp2], %[temp4], %[temp6] \n\t"336"subu %[temp4], %[temp4], %[temp6] \n\t"337"mul %[temp6], %[temp5], %[kC2] \n\t"338MUL_SHIFT_C1(temp1, temp7)339MUL_SHIFT_C1_IO(temp5, temp19)340"mul %[temp7], %[temp7], %[kC2] \n\t"341"sra %[temp6], %[temp6], 16 \n\t"342"sra %[temp7], %[temp7], 16 \n\t"343"subu %[temp1], %[temp6], %[temp1] \n\t"344"addu %[temp7], %[temp5], %[temp7] \n\t"345"addu %[temp5], %[temp16], %[temp11] \n\t"346"subu %[temp16], %[temp16], %[temp11] \n\t"347"addu %[temp11], %[temp8], %[temp17] \n\t"348"subu %[temp8], %[temp8], %[temp17] \n\t"349"sra %[temp5], %[temp5], 3 \n\t"350"sra %[temp16], %[temp16], 3 \n\t"351"sra %[temp11], %[temp11], 3 \n\t"352"sra %[temp8], %[temp8], 3 \n\t"353"addu %[temp17], %[temp10], %[temp15] \n\t"354"subu %[temp10], %[temp10], %[temp15] \n\t"355"addu %[temp15], %[temp12], %[temp9] \n\t"356"subu %[temp12], %[temp12], %[temp9] \n\t"357"sra %[temp17], %[temp17], 3 \n\t"358"sra %[temp10], %[temp10], 3 \n\t"359"sra %[temp15], %[temp15], 3 \n\t"360"sra %[temp12], %[temp12], 3 \n\t"361"addu %[temp9], %[temp14], %[temp3] \n\t"362"subu %[temp14], %[temp14], %[temp3] \n\t"363"addu %[temp3], %[temp0], %[temp13] \n\t"364"subu %[temp0], %[temp0], %[temp13] \n\t"365"sra %[temp9], %[temp9], 3 \n\t"366"sra %[temp14], %[temp14], 3 \n\t"367"sra %[temp3], %[temp3], 3 \n\t"368"sra %[temp0], %[temp0], 3 \n\t"369"addu %[temp13], %[temp2], %[temp7] \n\t"370"subu %[temp2], %[temp2], %[temp7] \n\t"371"addu %[temp7], %[temp4], %[temp1] \n\t"372"subu %[temp4], %[temp4], %[temp1] \n\t"373"sra %[temp13], %[temp13], 3 \n\t"374"sra %[temp2], %[temp2], 3 \n\t"375"sra %[temp7], %[temp7], 3 \n\t"376"sra %[temp4], %[temp4], 3 \n\t"377"addiu %[temp6], $zero, 255 \n\t"378"lbu %[temp1], 0+0*" XSTR(BPS) "(%[dst]) \n\t"379"addu %[temp1], %[temp1], %[temp5] \n\t"380"sra %[temp5], %[temp1], 8 \n\t"381"sra %[temp18], %[temp1], 31 \n\t"382"beqz %[temp5], 1f \n\t"383"xor %[temp1], %[temp1], %[temp1] \n\t"384"movz %[temp1], %[temp6], %[temp18] \n\t"385"1: \n\t"386"lbu %[temp18], 1+0*" XSTR(BPS) "(%[dst]) \n\t"387"sb %[temp1], 0+0*" XSTR(BPS) "(%[dst]) \n\t"388"addu %[temp18], %[temp18], %[temp11] \n\t"389"sra %[temp11], %[temp18], 8 \n\t"390"sra %[temp1], %[temp18], 31 \n\t"391"beqz %[temp11], 2f \n\t"392"xor %[temp18], %[temp18], %[temp18] \n\t"393"movz %[temp18], %[temp6], %[temp1] \n\t"394"2: \n\t"395"lbu %[temp1], 2+0*" XSTR(BPS) "(%[dst]) \n\t"396"sb %[temp18], 1+0*" XSTR(BPS) "(%[dst]) \n\t"397"addu %[temp1], %[temp1], %[temp8] \n\t"398"sra %[temp8], %[temp1], 8 \n\t"399"sra %[temp18], %[temp1], 31 \n\t"400"beqz %[temp8], 3f \n\t"401"xor %[temp1], %[temp1], %[temp1] \n\t"402"movz %[temp1], %[temp6], %[temp18] \n\t"403"3: \n\t"404"lbu %[temp18], 3+0*" XSTR(BPS) "(%[dst]) \n\t"405"sb %[temp1], 2+0*" XSTR(BPS) "(%[dst]) \n\t"406"addu %[temp18], %[temp18], %[temp16] \n\t"407"sra %[temp16], %[temp18], 8 \n\t"408"sra %[temp1], %[temp18], 31 \n\t"409"beqz %[temp16], 4f \n\t"410"xor %[temp18], %[temp18], %[temp18] \n\t"411"movz %[temp18], %[temp6], %[temp1] \n\t"412"4: \n\t"413"sb %[temp18], 3+0*" XSTR(BPS) "(%[dst]) \n\t"414"lbu %[temp5], 0+1*" XSTR(BPS) "(%[dst]) \n\t"415"lbu %[temp8], 1+1*" XSTR(BPS) "(%[dst]) \n\t"416"lbu %[temp11], 2+1*" XSTR(BPS) "(%[dst]) \n\t"417"lbu %[temp16], 3+1*" XSTR(BPS) "(%[dst]) \n\t"418"addu %[temp5], %[temp5], %[temp17] \n\t"419"addu %[temp8], %[temp8], %[temp15] \n\t"420"addu %[temp11], %[temp11], %[temp12] \n\t"421"addu %[temp16], %[temp16], %[temp10] \n\t"422"sra %[temp18], %[temp5], 8 \n\t"423"sra %[temp1], %[temp5], 31 \n\t"424"beqz %[temp18], 5f \n\t"425"xor %[temp5], %[temp5], %[temp5] \n\t"426"movz %[temp5], %[temp6], %[temp1] \n\t"427"5: \n\t"428"sra %[temp18], %[temp8], 8 \n\t"429"sra %[temp1], %[temp8], 31 \n\t"430"beqz %[temp18], 6f \n\t"431"xor %[temp8], %[temp8], %[temp8] \n\t"432"movz %[temp8], %[temp6], %[temp1] \n\t"433"6: \n\t"434"sra %[temp18], %[temp11], 8 \n\t"435"sra %[temp1], %[temp11], 31 \n\t"436"sra %[temp17], %[temp16], 8 \n\t"437"sra %[temp15], %[temp16], 31 \n\t"438"beqz %[temp18], 7f \n\t"439"xor %[temp11], %[temp11], %[temp11] \n\t"440"movz %[temp11], %[temp6], %[temp1] \n\t"441"7: \n\t"442"beqz %[temp17], 8f \n\t"443"xor %[temp16], %[temp16], %[temp16] \n\t"444"movz %[temp16], %[temp6], %[temp15] \n\t"445"8: \n\t"446"sb %[temp5], 0+1*" XSTR(BPS) "(%[dst]) \n\t"447"sb %[temp8], 1+1*" XSTR(BPS) "(%[dst]) \n\t"448"sb %[temp11], 2+1*" XSTR(BPS) "(%[dst]) \n\t"449"sb %[temp16], 3+1*" XSTR(BPS) "(%[dst]) \n\t"450"lbu %[temp5], 0+2*" XSTR(BPS) "(%[dst]) \n\t"451"lbu %[temp8], 1+2*" XSTR(BPS) "(%[dst]) \n\t"452"lbu %[temp11], 2+2*" XSTR(BPS) "(%[dst]) \n\t"453"lbu %[temp16], 3+2*" XSTR(BPS) "(%[dst]) \n\t"454"addu %[temp5], %[temp5], %[temp9] \n\t"455"addu %[temp8], %[temp8], %[temp3] \n\t"456"addu %[temp11], %[temp11], %[temp0] \n\t"457"addu %[temp16], %[temp16], %[temp14] \n\t"458"sra %[temp18], %[temp5], 8 \n\t"459"sra %[temp1], %[temp5], 31 \n\t"460"sra %[temp17], %[temp8], 8 \n\t"461"sra %[temp15], %[temp8], 31 \n\t"462"sra %[temp12], %[temp11], 8 \n\t"463"sra %[temp10], %[temp11], 31 \n\t"464"sra %[temp9], %[temp16], 8 \n\t"465"sra %[temp3], %[temp16], 31 \n\t"466"beqz %[temp18], 9f \n\t"467"xor %[temp5], %[temp5], %[temp5] \n\t"468"movz %[temp5], %[temp6], %[temp1] \n\t"469"9: \n\t"470"beqz %[temp17], 10f \n\t"471"xor %[temp8], %[temp8], %[temp8] \n\t"472"movz %[temp8], %[temp6], %[temp15] \n\t"473"10: \n\t"474"beqz %[temp12], 11f \n\t"475"xor %[temp11], %[temp11], %[temp11] \n\t"476"movz %[temp11], %[temp6], %[temp10] \n\t"477"11: \n\t"478"beqz %[temp9], 12f \n\t"479"xor %[temp16], %[temp16], %[temp16] \n\t"480"movz %[temp16], %[temp6], %[temp3] \n\t"481"12: \n\t"482"sb %[temp5], 0+2*" XSTR(BPS) "(%[dst]) \n\t"483"sb %[temp8], 1+2*" XSTR(BPS) "(%[dst]) \n\t"484"sb %[temp11], 2+2*" XSTR(BPS) "(%[dst]) \n\t"485"sb %[temp16], 3+2*" XSTR(BPS) "(%[dst]) \n\t"486"lbu %[temp5], 0+3*" XSTR(BPS) "(%[dst]) \n\t"487"lbu %[temp8], 1+3*" XSTR(BPS) "(%[dst]) \n\t"488"lbu %[temp11], 2+3*" XSTR(BPS) "(%[dst]) \n\t"489"lbu %[temp16], 3+3*" XSTR(BPS) "(%[dst]) \n\t"490"addu %[temp5], %[temp5], %[temp13] \n\t"491"addu %[temp8], %[temp8], %[temp7] \n\t"492"addu %[temp11], %[temp11], %[temp4] \n\t"493"addu %[temp16], %[temp16], %[temp2] \n\t"494"sra %[temp18], %[temp5], 8 \n\t"495"sra %[temp1], %[temp5], 31 \n\t"496"sra %[temp17], %[temp8], 8 \n\t"497"sra %[temp15], %[temp8], 31 \n\t"498"sra %[temp12], %[temp11], 8 \n\t"499"sra %[temp10], %[temp11], 31 \n\t"500"sra %[temp9], %[temp16], 8 \n\t"501"sra %[temp3], %[temp16], 31 \n\t"502"beqz %[temp18], 13f \n\t"503"xor %[temp5], %[temp5], %[temp5] \n\t"504"movz %[temp5], %[temp6], %[temp1] \n\t"505"13: \n\t"506"beqz %[temp17], 14f \n\t"507"xor %[temp8], %[temp8], %[temp8] \n\t"508"movz %[temp8], %[temp6], %[temp15] \n\t"509"14: \n\t"510"beqz %[temp12], 15f \n\t"511"xor %[temp11], %[temp11], %[temp11] \n\t"512"movz %[temp11], %[temp6], %[temp10] \n\t"513"15: \n\t"514"beqz %[temp9], 16f \n\t"515"xor %[temp16], %[temp16], %[temp16] \n\t"516"movz %[temp16], %[temp6], %[temp3] \n\t"517"16: \n\t"518"sb %[temp5], 0+3*" XSTR(BPS) "(%[dst]) \n\t"519"sb %[temp8], 1+3*" XSTR(BPS) "(%[dst]) \n\t"520"sb %[temp11], 2+3*" XSTR(BPS) "(%[dst]) \n\t"521"sb %[temp16], 3+3*" XSTR(BPS) "(%[dst]) \n\t"522523: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),524[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),525[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),526[temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),527[temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),528[temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),529[temp18]"=&r"(temp18), [temp19]"=&r"(temp19)530: [in]"r"(p_in), [kC1]"r"(kC1), [kC2]"r"(kC2), [dst]"r"(dst)531: "memory", "hi", "lo"532);533}534535static void TransformTwo(const int16_t* WEBP_RESTRICT in,536uint8_t* WEBP_RESTRICT dst, int do_two) {537TransformOne(in, dst);538if (do_two) {539TransformOne(in + 16, dst + 4);540}541}542543//------------------------------------------------------------------------------544// Entry point545546extern void VP8DspInitMIPS32(void);547548WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPS32(void) {549VP8InitClipTables();550551VP8Transform = TransformTwo;552553VP8VFilter16 = VFilter16;554VP8HFilter16 = HFilter16;555VP8VFilter8 = VFilter8;556VP8HFilter8 = HFilter8;557VP8VFilter16i = VFilter16i;558VP8HFilter16i = HFilter16i;559VP8VFilter8i = VFilter8i;560VP8HFilter8i = HFilter8i;561562VP8SimpleVFilter16 = SimpleVFilter16;563VP8SimpleHFilter16 = SimpleHFilter16;564VP8SimpleVFilter16i = SimpleVFilter16i;565VP8SimpleHFilter16i = SimpleHFilter16i;566}567568#else // !WEBP_USE_MIPS32569570WEBP_DSP_INIT_STUB(VP8DspInitMIPS32)571572#endif // WEBP_USE_MIPS32573574575