CoCalc -- dec_mips32.c

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/libwebp/src/dsp/dec_mips32.c
⁹⁹¹³ views
1
// Copyright 2014 Google Inc. All Rights Reserved.
2
//
3
// Use of this source code is governed by a BSD-style license
4
// that can be found in the COPYING file in the root of the source
5
// tree. An additional intellectual property rights grant can be found
6
// in the file PATENTS. All contributing project authors may
7
// be found in the AUTHORS file in the root of the source tree.
8
// -----------------------------------------------------------------------------
9
//
10
// MIPS version of dsp functions
11
//
12
// Author(s):  Djordje Pesut    ([email protected])
13
//             Jovan Zelincevic ([email protected])
14

15
#include "src/dsp/dsp.h"
16

17
#if defined(WEBP_USE_MIPS32)
18

19
#include "src/dsp/mips_macro.h"
20

21
static const int kC1 = WEBP_TRANSFORM_AC3_C1;
22
static const int kC2 = WEBP_TRANSFORM_AC3_C2;
23

24
static WEBP_INLINE int abs_mips32(int x) {
25
  const int sign = x >> 31;
26
  return (x ^ sign) - sign;
27
}
28

29
// 4 pixels in, 2 pixels out
30
static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
31
  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
32
  const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];
33
  const int a1 = VP8ksclip2[(a + 4) >> 3];
34
  const int a2 = VP8ksclip2[(a + 3) >> 3];
35
  p[-step] = VP8kclip1[p0 + a2];
36
  p[    0] = VP8kclip1[q0 - a1];
37
}
38

39
// 4 pixels in, 4 pixels out
40
static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
41
  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
42
  const int a = 3 * (q0 - p0);
43
  const int a1 = VP8ksclip2[(a + 4) >> 3];
44
  const int a2 = VP8ksclip2[(a + 3) >> 3];
45
  const int a3 = (a1 + 1) >> 1;
46
  p[-2 * step] = VP8kclip1[p1 + a3];
47
  p[-    step] = VP8kclip1[p0 + a2];
48
  p[        0] = VP8kclip1[q0 - a1];
49
  p[     step] = VP8kclip1[q1 - a3];
50
}
51

52
// 6 pixels in, 6 pixels out
53
static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
54
  const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
55
  const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
56
  const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
57
  // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
58
  const int a1 = (27 * a + 63) >> 7;  // eq. to ((3 * a + 7) * 9) >> 7
59
  const int a2 = (18 * a + 63) >> 7;  // eq. to ((2 * a + 7) * 9) >> 7
60
  const int a3 = (9  * a + 63) >> 7;  // eq. to ((1 * a + 7) * 9) >> 7
61
  p[-3 * step] = VP8kclip1[p2 + a3];
62
  p[-2 * step] = VP8kclip1[p1 + a2];
63
  p[-    step] = VP8kclip1[p0 + a1];
64
  p[        0] = VP8kclip1[q0 - a1];
65
  p[     step] = VP8kclip1[q1 - a2];
66
  p[ 2 * step] = VP8kclip1[q2 - a3];
67
}
68

69
static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
70
  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
71
  return (abs_mips32(p1 - p0) > thresh) || (abs_mips32(q1 - q0) > thresh);
72
}
73

74
static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
75
  const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
76
  return ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) <= t);
77
}
78

79
static WEBP_INLINE int needs_filter2(const uint8_t* p,
80
                                     int step, int t, int it) {
81
  const int p3 = p[-4 * step], p2 = p[-3 * step];
82
  const int p1 = p[-2 * step], p0 = p[-step];
83
  const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
84
  if ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) > t) {
85
    return 0;
86
  }
87
  return abs_mips32(p3 - p2) <= it && abs_mips32(p2 - p1) <= it &&
88
         abs_mips32(p1 - p0) <= it && abs_mips32(q3 - q2) <= it &&
89
         abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
90
}
91

92
static WEBP_INLINE void FilterLoop26(uint8_t* p,
93
                                     int hstride, int vstride, int size,
94
                                     int thresh, int ithresh, int hev_thresh) {
95
  const int thresh2 = 2 * thresh + 1;
96
  while (size-- > 0) {
97
    if (needs_filter2(p, hstride, thresh2, ithresh)) {
98
      if (hev(p, hstride, hev_thresh)) {
99
        do_filter2(p, hstride);
100
      } else {
101
        do_filter6(p, hstride);
102
      }
103
    }
104
    p += vstride;
105
  }
106
}
107

108
static WEBP_INLINE void FilterLoop24(uint8_t* p,
109
                                     int hstride, int vstride, int size,
110
                                     int thresh, int ithresh, int hev_thresh) {
111
  const int thresh2 = 2 * thresh + 1;
112
  while (size-- > 0) {
113
    if (needs_filter2(p, hstride, thresh2, ithresh)) {
114
      if (hev(p, hstride, hev_thresh)) {
115
        do_filter2(p, hstride);
116
      } else {
117
        do_filter4(p, hstride);
118
      }
119
    }
120
    p += vstride;
121
  }
122
}
123

124
// on macroblock edges
125
static void VFilter16(uint8_t* p, int stride,
126
                      int thresh, int ithresh, int hev_thresh) {
127
  FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
128
}
129

130
static void HFilter16(uint8_t* p, int stride,
131
                      int thresh, int ithresh, int hev_thresh) {
132
  FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
133
}
134

135
// 8-pixels wide variant, for chroma filtering
136
static void VFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
137
                     int stride, int thresh, int ithresh, int hev_thresh) {
138
  FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
139
  FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
140
}
141

142
static void HFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
143
                     int stride, int thresh, int ithresh, int hev_thresh) {
144
  FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
145
  FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
146
}
147

148
static void VFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
149
                      int stride, int thresh, int ithresh, int hev_thresh) {
150
  FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
151
  FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
152
}
153

154
static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
155
                      int stride, int thresh, int ithresh, int hev_thresh) {
156
  FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
157
  FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
158
}
159

160
// on three inner edges
161
static void VFilter16i(uint8_t* p, int stride,
162
                       int thresh, int ithresh, int hev_thresh) {
163
  int k;
164
  for (k = 3; k > 0; --k) {
165
    p += 4 * stride;
166
    FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
167
  }
168
}
169

170
static void HFilter16i(uint8_t* p, int stride,
171
                       int thresh, int ithresh, int hev_thresh) {
172
  int k;
173
  for (k = 3; k > 0; --k) {
174
    p += 4;
175
    FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
176
  }
177
}
178

179
//------------------------------------------------------------------------------
180
// Simple In-loop filtering (Paragraph 15.2)
181

182
static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
183
  int i;
184
  const int thresh2 = 2 * thresh + 1;
185
  for (i = 0; i < 16; ++i) {
186
    if (needs_filter(p + i, stride, thresh2)) {
187
      do_filter2(p + i, stride);
188
    }
189
  }
190
}
191

192
static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
193
  int i;
194
  const int thresh2 = 2 * thresh + 1;
195
  for (i = 0; i < 16; ++i) {
196
    if (needs_filter(p + i * stride, 1, thresh2)) {
197
      do_filter2(p + i * stride, 1);
198
    }
199
  }
200
}
201

202
static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
203
  int k;
204
  for (k = 3; k > 0; --k) {
205
    p += 4 * stride;
206
    SimpleVFilter16(p, stride, thresh);
207
  }
208
}
209

210
static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
211
  int k;
212
  for (k = 3; k > 0; --k) {
213
    p += 4;
214
    SimpleHFilter16(p, stride, thresh);
215
  }
216
}
217

218
static void TransformOne(const int16_t* WEBP_RESTRICT in,
219
                         uint8_t* WEBP_RESTRICT dst) {
220
  int temp0, temp1, temp2, temp3, temp4;
221
  int temp5, temp6, temp7, temp8, temp9;
222
  int temp10, temp11, temp12, temp13, temp14;
223
  int temp15, temp16, temp17, temp18, temp19;
224
  int16_t* p_in = (int16_t*)in;
225

226
  // loops unrolled and merged to avoid usage of tmp buffer
227
  // and to reduce number of stalls. MUL macro is written
228
  // in assembler and inlined
229
  __asm__ volatile(
230
    "lh       %[temp0],  0(%[in])                      \n\t"
231
    "lh       %[temp8],  16(%[in])                     \n\t"
232
    "lh       %[temp4],  8(%[in])                      \n\t"
233
    "lh       %[temp12], 24(%[in])                     \n\t"
234
    "addu     %[temp16], %[temp0],  %[temp8]           \n\t"
235
    "subu     %[temp0],  %[temp0],  %[temp8]           \n\t"
236
    "mul      %[temp8],  %[temp4],  %[kC2]             \n\t"
237
    MUL_SHIFT_C1(temp17, temp12)
238
    MUL_SHIFT_C1_IO(temp4, temp19)
239
    "mul      %[temp12], %[temp12], %[kC2]             \n\t"
240
    "lh       %[temp1],  2(%[in])                      \n\t"
241
    "lh       %[temp5],  10(%[in])                     \n\t"
242
    "lh       %[temp9],  18(%[in])                     \n\t"
243
    "lh       %[temp13], 26(%[in])                     \n\t"
244
    "sra      %[temp8],  %[temp8],  16                 \n\t"
245
    "sra      %[temp12], %[temp12], 16                 \n\t"
246
    "lh       %[temp2],  4(%[in])                      \n\t"
247
    "lh       %[temp6],  12(%[in])                     \n\t"
248
    "lh       %[temp10], 20(%[in])                     \n\t"
249
    "lh       %[temp14], 28(%[in])                     \n\t"
250
    "subu     %[temp17], %[temp8],  %[temp17]          \n\t"
251
    "addu     %[temp4],  %[temp4],  %[temp12]          \n\t"
252
    "addu     %[temp8],  %[temp16], %[temp4]           \n\t"
253
    "subu     %[temp4],  %[temp16], %[temp4]           \n\t"
254
    "addu     %[temp16], %[temp1],  %[temp9]           \n\t"
255
    "subu     %[temp1],  %[temp1],  %[temp9]           \n\t"
256
    "lh       %[temp3],  6(%[in])                      \n\t"
257
    "lh       %[temp7],  14(%[in])                     \n\t"
258
    "lh       %[temp11], 22(%[in])                     \n\t"
259
    "lh       %[temp15], 30(%[in])                     \n\t"
260
    "addu     %[temp12], %[temp0],  %[temp17]          \n\t"
261
    "subu     %[temp0],  %[temp0],  %[temp17]          \n\t"
262
    "mul      %[temp9],  %[temp5],  %[kC2]             \n\t"
263
    MUL_SHIFT_C1(temp17, temp13)
264
    MUL_SHIFT_C1_IO(temp5, temp19)
265
    "mul      %[temp13], %[temp13], %[kC2]             \n\t"
266
    "sra      %[temp9],  %[temp9],  16                 \n\t"
267
    "subu     %[temp17], %[temp9],  %[temp17]          \n\t"
268
    "sra      %[temp13], %[temp13], 16                 \n\t"
269
    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
270
    "addu     %[temp13], %[temp1],  %[temp17]          \n\t"
271
    "subu     %[temp1],  %[temp1],  %[temp17]          \n\t"
272
    MUL_SHIFT_C1(temp17, temp14)
273
    "mul      %[temp14], %[temp14], %[kC2]             \n\t"
274
    "addu     %[temp9],  %[temp16], %[temp5]           \n\t"
275
    "subu     %[temp5],  %[temp16], %[temp5]           \n\t"
276
    "addu     %[temp16], %[temp2],  %[temp10]          \n\t"
277
    "subu     %[temp2],  %[temp2],  %[temp10]          \n\t"
278
    "mul      %[temp10], %[temp6],  %[kC2]             \n\t"
279
    MUL_SHIFT_C1_IO(temp6, temp19)
280
    "sra      %[temp14], %[temp14], 16                 \n\t"
281
    "sra      %[temp10], %[temp10], 16                 \n\t"
282
    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
283
    "addu     %[temp6],  %[temp6],  %[temp14]          \n\t"
284
    "addu     %[temp10], %[temp16], %[temp6]           \n\t"
285
    "subu     %[temp6],  %[temp16], %[temp6]           \n\t"
286
    "addu     %[temp14], %[temp2],  %[temp17]          \n\t"
287
    "subu     %[temp2],  %[temp2],  %[temp17]          \n\t"
288
    MUL_SHIFT_C1(temp17, temp15)
289
    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
290
    "addu     %[temp16], %[temp3],  %[temp11]          \n\t"
291
    "subu     %[temp3],  %[temp3],  %[temp11]          \n\t"
292
    "mul      %[temp11], %[temp7],  %[kC2]             \n\t"
293
    MUL_SHIFT_C1_IO(temp7, temp19)
294
    "addiu    %[temp8],  %[temp8],  4                  \n\t"
295
    "addiu    %[temp12], %[temp12], 4                  \n\t"
296
    "addiu    %[temp0],  %[temp0],  4                  \n\t"
297
    "addiu    %[temp4],  %[temp4],  4                  \n\t"
298
    "sra      %[temp15], %[temp15], 16                 \n\t"
299
    "sra      %[temp11], %[temp11], 16                 \n\t"
300
    "subu     %[temp17], %[temp11], %[temp17]          \n\t"
301
    "addu     %[temp7],  %[temp7],  %[temp15]          \n\t"
302
    "addu     %[temp15], %[temp3],  %[temp17]          \n\t"
303
    "subu     %[temp3],  %[temp3],  %[temp17]          \n\t"
304
    "addu     %[temp11], %[temp16], %[temp7]           \n\t"
305
    "subu     %[temp7],  %[temp16], %[temp7]           \n\t"
306
    "addu     %[temp16], %[temp8],  %[temp10]          \n\t"
307
    "subu     %[temp8],  %[temp8],  %[temp10]          \n\t"
308
    "mul      %[temp10], %[temp9],  %[kC2]             \n\t"
309
    MUL_SHIFT_C1(temp17, temp11)
310
    MUL_SHIFT_C1_IO(temp9, temp19)
311
    "mul      %[temp11], %[temp11], %[kC2]             \n\t"
312
    "sra      %[temp10], %[temp10], 16                 \n\t"
313
    "sra      %[temp11], %[temp11], 16                 \n\t"
314
    "subu     %[temp17], %[temp10], %[temp17]          \n\t"
315
    "addu     %[temp11], %[temp9],  %[temp11]          \n\t"
316
    "addu     %[temp10], %[temp12], %[temp14]          \n\t"
317
    "subu     %[temp12], %[temp12], %[temp14]          \n\t"
318
    "mul      %[temp14], %[temp13], %[kC2]             \n\t"
319
    MUL_SHIFT_C1(temp9, temp15)
320
    MUL_SHIFT_C1_IO(temp13, temp19)
321
    "mul      %[temp15], %[temp15], %[kC2]             \n\t"
322
    "sra      %[temp14], %[temp14], 16                 \n\t"
323
    "sra      %[temp15], %[temp15], 16                 \n\t"
324
    "subu     %[temp9],  %[temp14], %[temp9]           \n\t"
325
    "addu     %[temp15], %[temp13], %[temp15]          \n\t"
326
    "addu     %[temp14], %[temp0],  %[temp2]           \n\t"
327
    "subu     %[temp0],  %[temp0],  %[temp2]           \n\t"
328
    "mul      %[temp2],  %[temp1],  %[kC2]             \n\t"
329
    MUL_SHIFT_C1(temp13, temp3)
330
    MUL_SHIFT_C1_IO(temp1, temp19)
331
    "mul      %[temp3],  %[temp3],  %[kC2]             \n\t"
332
    "sra      %[temp2],  %[temp2],  16                 \n\t"
333
    "sra      %[temp3],  %[temp3],  16                 \n\t"
334
    "subu     %[temp13], %[temp2],  %[temp13]          \n\t"
335
    "addu     %[temp3],  %[temp1],  %[temp3]           \n\t"
336
    "addu     %[temp2],  %[temp4],  %[temp6]           \n\t"
337
    "subu     %[temp4],  %[temp4],  %[temp6]           \n\t"
338
    "mul      %[temp6],  %[temp5],  %[kC2]             \n\t"
339
    MUL_SHIFT_C1(temp1, temp7)
340
    MUL_SHIFT_C1_IO(temp5, temp19)
341
    "mul      %[temp7],  %[temp7],  %[kC2]             \n\t"
342
    "sra      %[temp6],  %[temp6],  16                 \n\t"
343
    "sra      %[temp7],  %[temp7],  16                 \n\t"
344
    "subu     %[temp1],  %[temp6],  %[temp1]           \n\t"
345
    "addu     %[temp7],  %[temp5],  %[temp7]           \n\t"
346
    "addu     %[temp5],  %[temp16], %[temp11]          \n\t"
347
    "subu     %[temp16], %[temp16], %[temp11]          \n\t"
348
    "addu     %[temp11], %[temp8],  %[temp17]          \n\t"
349
    "subu     %[temp8],  %[temp8],  %[temp17]          \n\t"
350
    "sra      %[temp5],  %[temp5],  3                  \n\t"
351
    "sra      %[temp16], %[temp16], 3                  \n\t"
352
    "sra      %[temp11], %[temp11], 3                  \n\t"
353
    "sra      %[temp8],  %[temp8],  3                  \n\t"
354
    "addu     %[temp17], %[temp10], %[temp15]          \n\t"
355
    "subu     %[temp10], %[temp10], %[temp15]          \n\t"
356
    "addu     %[temp15], %[temp12], %[temp9]           \n\t"
357
    "subu     %[temp12], %[temp12], %[temp9]           \n\t"
358
    "sra      %[temp17], %[temp17], 3                  \n\t"
359
    "sra      %[temp10], %[temp10], 3                  \n\t"
360
    "sra      %[temp15], %[temp15], 3                  \n\t"
361
    "sra      %[temp12], %[temp12], 3                  \n\t"
362
    "addu     %[temp9],  %[temp14], %[temp3]           \n\t"
363
    "subu     %[temp14], %[temp14], %[temp3]           \n\t"
364
    "addu     %[temp3],  %[temp0],  %[temp13]          \n\t"
365
    "subu     %[temp0],  %[temp0],  %[temp13]          \n\t"
366
    "sra      %[temp9],  %[temp9],  3                  \n\t"
367
    "sra      %[temp14], %[temp14], 3                  \n\t"
368
    "sra      %[temp3],  %[temp3],  3                  \n\t"
369
    "sra      %[temp0],  %[temp0],  3                  \n\t"
370
    "addu     %[temp13], %[temp2],  %[temp7]           \n\t"
371
    "subu     %[temp2],  %[temp2],  %[temp7]           \n\t"
372
    "addu     %[temp7],  %[temp4],  %[temp1]           \n\t"
373
    "subu     %[temp4],  %[temp4],  %[temp1]           \n\t"
374
    "sra      %[temp13], %[temp13], 3                  \n\t"
375
    "sra      %[temp2],  %[temp2],  3                  \n\t"
376
    "sra      %[temp7],  %[temp7],  3                  \n\t"
377
    "sra      %[temp4],  %[temp4],  3                  \n\t"
378
    "addiu    %[temp6],  $zero,     255                \n\t"
379
    "lbu      %[temp1],  0+0*" XSTR(BPS) "(%[dst])     \n\t"
380
    "addu     %[temp1],  %[temp1],  %[temp5]           \n\t"
381
    "sra      %[temp5],  %[temp1],  8                  \n\t"
382
    "sra      %[temp18], %[temp1],  31                 \n\t"
383
    "beqz     %[temp5],  1f                            \n\t"
384
    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
385
    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
386
  "1:                                                  \n\t"
387
    "lbu      %[temp18], 1+0*" XSTR(BPS) "(%[dst])     \n\t"
388
    "sb       %[temp1],  0+0*" XSTR(BPS) "(%[dst])     \n\t"
389
    "addu     %[temp18], %[temp18], %[temp11]          \n\t"
390
    "sra      %[temp11], %[temp18], 8                  \n\t"
391
    "sra      %[temp1],  %[temp18], 31                 \n\t"
392
    "beqz     %[temp11], 2f                            \n\t"
393
    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
394
    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
395
  "2:                                                  \n\t"
396
    "lbu      %[temp1],  2+0*" XSTR(BPS) "(%[dst])     \n\t"
397
    "sb       %[temp18], 1+0*" XSTR(BPS) "(%[dst])     \n\t"
398
    "addu     %[temp1],  %[temp1],  %[temp8]           \n\t"
399
    "sra      %[temp8],  %[temp1],  8                  \n\t"
400
    "sra      %[temp18], %[temp1],  31                 \n\t"
401
    "beqz     %[temp8],  3f                            \n\t"
402
    "xor      %[temp1],  %[temp1],  %[temp1]           \n\t"
403
    "movz     %[temp1],  %[temp6],  %[temp18]          \n\t"
404
  "3:                                                  \n\t"
405
    "lbu      %[temp18], 3+0*" XSTR(BPS) "(%[dst])     \n\t"
406
    "sb       %[temp1],  2+0*" XSTR(BPS) "(%[dst])     \n\t"
407
    "addu     %[temp18], %[temp18], %[temp16]          \n\t"
408
    "sra      %[temp16], %[temp18], 8                  \n\t"
409
    "sra      %[temp1],  %[temp18], 31                 \n\t"
410
    "beqz     %[temp16], 4f                            \n\t"
411
    "xor      %[temp18], %[temp18], %[temp18]          \n\t"
412
    "movz     %[temp18], %[temp6],  %[temp1]           \n\t"
413
  "4:                                                  \n\t"
414
    "sb       %[temp18], 3+0*" XSTR(BPS) "(%[dst])     \n\t"
415
    "lbu      %[temp5],  0+1*" XSTR(BPS) "(%[dst])     \n\t"
416
    "lbu      %[temp8],  1+1*" XSTR(BPS) "(%[dst])     \n\t"
417
    "lbu      %[temp11], 2+1*" XSTR(BPS) "(%[dst])     \n\t"
418
    "lbu      %[temp16], 3+1*" XSTR(BPS) "(%[dst])     \n\t"
419
    "addu     %[temp5],  %[temp5],  %[temp17]          \n\t"
420
    "addu     %[temp8],  %[temp8],  %[temp15]          \n\t"
421
    "addu     %[temp11], %[temp11], %[temp12]          \n\t"
422
    "addu     %[temp16], %[temp16], %[temp10]          \n\t"
423
    "sra      %[temp18], %[temp5],  8                  \n\t"
424
    "sra      %[temp1],  %[temp5],  31                 \n\t"
425
    "beqz     %[temp18], 5f                            \n\t"
426
    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
427
    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
428
  "5:                                                  \n\t"
429
    "sra      %[temp18], %[temp8],  8                  \n\t"
430
    "sra      %[temp1],  %[temp8],  31                 \n\t"
431
    "beqz     %[temp18], 6f                            \n\t"
432
    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
433
    "movz     %[temp8],  %[temp6],  %[temp1]           \n\t"
434
  "6:                                                  \n\t"
435
    "sra      %[temp18], %[temp11], 8                  \n\t"
436
    "sra      %[temp1],  %[temp11], 31                 \n\t"
437
    "sra      %[temp17], %[temp16], 8                  \n\t"
438
    "sra      %[temp15], %[temp16], 31                 \n\t"
439
    "beqz     %[temp18], 7f                            \n\t"
440
    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
441
    "movz     %[temp11], %[temp6],  %[temp1]           \n\t"
442
  "7:                                                  \n\t"
443
    "beqz     %[temp17], 8f                            \n\t"
444
    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
445
    "movz     %[temp16], %[temp6],  %[temp15]          \n\t"
446
  "8:                                                  \n\t"
447
    "sb       %[temp5],  0+1*" XSTR(BPS) "(%[dst])     \n\t"
448
    "sb       %[temp8],  1+1*" XSTR(BPS) "(%[dst])     \n\t"
449
    "sb       %[temp11], 2+1*" XSTR(BPS) "(%[dst])     \n\t"
450
    "sb       %[temp16], 3+1*" XSTR(BPS) "(%[dst])     \n\t"
451
    "lbu      %[temp5],  0+2*" XSTR(BPS) "(%[dst])     \n\t"
452
    "lbu      %[temp8],  1+2*" XSTR(BPS) "(%[dst])     \n\t"
453
    "lbu      %[temp11], 2+2*" XSTR(BPS) "(%[dst])     \n\t"
454
    "lbu      %[temp16], 3+2*" XSTR(BPS) "(%[dst])     \n\t"
455
    "addu     %[temp5],  %[temp5],  %[temp9]           \n\t"
456
    "addu     %[temp8],  %[temp8],  %[temp3]           \n\t"
457
    "addu     %[temp11], %[temp11], %[temp0]           \n\t"
458
    "addu     %[temp16], %[temp16], %[temp14]          \n\t"
459
    "sra      %[temp18], %[temp5],  8                  \n\t"
460
    "sra      %[temp1],  %[temp5],  31                 \n\t"
461
    "sra      %[temp17], %[temp8],  8                  \n\t"
462
    "sra      %[temp15], %[temp8],  31                 \n\t"
463
    "sra      %[temp12], %[temp11], 8                  \n\t"
464
    "sra      %[temp10], %[temp11], 31                 \n\t"
465
    "sra      %[temp9],  %[temp16], 8                  \n\t"
466
    "sra      %[temp3],  %[temp16], 31                 \n\t"
467
    "beqz     %[temp18], 9f                            \n\t"
468
    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
469
    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
470
  "9:                                                  \n\t"
471
    "beqz     %[temp17], 10f                           \n\t"
472
    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
473
    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
474
  "10:                                                 \n\t"
475
    "beqz     %[temp12], 11f                           \n\t"
476
    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
477
    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
478
  "11:                                                 \n\t"
479
    "beqz     %[temp9],  12f                           \n\t"
480
    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
481
    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
482
  "12:                                                 \n\t"
483
    "sb       %[temp5],  0+2*" XSTR(BPS) "(%[dst])     \n\t"
484
    "sb       %[temp8],  1+2*" XSTR(BPS) "(%[dst])     \n\t"
485
    "sb       %[temp11], 2+2*" XSTR(BPS) "(%[dst])     \n\t"
486
    "sb       %[temp16], 3+2*" XSTR(BPS) "(%[dst])     \n\t"
487
    "lbu      %[temp5],  0+3*" XSTR(BPS) "(%[dst])     \n\t"
488
    "lbu      %[temp8],  1+3*" XSTR(BPS) "(%[dst])     \n\t"
489
    "lbu      %[temp11], 2+3*" XSTR(BPS) "(%[dst])     \n\t"
490
    "lbu      %[temp16], 3+3*" XSTR(BPS) "(%[dst])     \n\t"
491
    "addu     %[temp5],  %[temp5],  %[temp13]          \n\t"
492
    "addu     %[temp8],  %[temp8],  %[temp7]           \n\t"
493
    "addu     %[temp11], %[temp11], %[temp4]           \n\t"
494
    "addu     %[temp16], %[temp16], %[temp2]           \n\t"
495
    "sra      %[temp18], %[temp5],  8                  \n\t"
496
    "sra      %[temp1],  %[temp5],  31                 \n\t"
497
    "sra      %[temp17], %[temp8],  8                  \n\t"
498
    "sra      %[temp15], %[temp8],  31                 \n\t"
499
    "sra      %[temp12], %[temp11], 8                  \n\t"
500
    "sra      %[temp10], %[temp11], 31                 \n\t"
501
    "sra      %[temp9],  %[temp16], 8                  \n\t"
502
    "sra      %[temp3],  %[temp16], 31                 \n\t"
503
    "beqz     %[temp18], 13f                           \n\t"
504
    "xor      %[temp5],  %[temp5],  %[temp5]           \n\t"
505
    "movz     %[temp5],  %[temp6],  %[temp1]           \n\t"
506
  "13:                                                 \n\t"
507
    "beqz     %[temp17], 14f                           \n\t"
508
    "xor      %[temp8],  %[temp8],  %[temp8]           \n\t"
509
    "movz     %[temp8],  %[temp6],  %[temp15]          \n\t"
510
  "14:                                                 \n\t"
511
    "beqz     %[temp12], 15f                           \n\t"
512
    "xor      %[temp11], %[temp11], %[temp11]          \n\t"
513
    "movz     %[temp11], %[temp6],  %[temp10]          \n\t"
514
  "15:                                                 \n\t"
515
    "beqz     %[temp9],  16f                           \n\t"
516
    "xor      %[temp16], %[temp16], %[temp16]          \n\t"
517
    "movz     %[temp16], %[temp6],  %[temp3]           \n\t"
518
  "16:                                                 \n\t"
519
    "sb       %[temp5],  0+3*" XSTR(BPS) "(%[dst])     \n\t"
520
    "sb       %[temp8],  1+3*" XSTR(BPS) "(%[dst])     \n\t"
521
    "sb       %[temp11], 2+3*" XSTR(BPS) "(%[dst])     \n\t"
522
    "sb       %[temp16], 3+3*" XSTR(BPS) "(%[dst])     \n\t"
523

524
    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
525
      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
526
      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
527
      [temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
528
      [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
529
      [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
530
      [temp18]"=&r"(temp18), [temp19]"=&r"(temp19)
531
    : [in]"r"(p_in), [kC1]"r"(kC1), [kC2]"r"(kC2), [dst]"r"(dst)
532
    : "memory", "hi", "lo"
533
  );
534
}
535

536
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
537
                         uint8_t* WEBP_RESTRICT dst, int do_two) {
538
  TransformOne(in, dst);
539
  if (do_two) {
540
    TransformOne(in + 16, dst + 4);
541
  }
542
}
543

544
//------------------------------------------------------------------------------
545
// Entry point
546

547
extern void VP8DspInitMIPS32(void);
548

549
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPS32(void) {
550
  VP8InitClipTables();
551

552
  VP8Transform = TransformTwo;
553

554
  VP8VFilter16 = VFilter16;
555
  VP8HFilter16 = HFilter16;
556
  VP8VFilter8 = VFilter8;
557
  VP8HFilter8 = HFilter8;
558
  VP8VFilter16i = VFilter16i;
559
  VP8HFilter16i = HFilter16i;
560
  VP8VFilter8i = VFilter8i;
561
  VP8HFilter8i = HFilter8i;
562

563
  VP8SimpleVFilter16 = SimpleVFilter16;
564
  VP8SimpleHFilter16 = SimpleHFilter16;
565
  VP8SimpleVFilter16i = SimpleVFilter16i;
566
  VP8SimpleHFilter16i = SimpleHFilter16i;
567
}
568

569
#else  // !WEBP_USE_MIPS32
570

571
WEBP_DSP_INIT_STUB(VP8DspInitMIPS32)
572

573
#endif  // WEBP_USE_MIPS32
574

575
Product

Resources

Company