Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/libwebp/src/dsp/dec_mips32.c
9913 views
1
// Copyright 2014 Google Inc. All Rights Reserved.
2
//
3
// Use of this source code is governed by a BSD-style license
4
// that can be found in the COPYING file in the root of the source
5
// tree. An additional intellectual property rights grant can be found
6
// in the file PATENTS. All contributing project authors may
7
// be found in the AUTHORS file in the root of the source tree.
8
// -----------------------------------------------------------------------------
9
//
10
// MIPS version of dsp functions
11
//
12
// Author(s): Djordje Pesut ([email protected])
13
// Jovan Zelincevic ([email protected])
14
15
#include "src/dsp/dsp.h"
16
17
#if defined(WEBP_USE_MIPS32)
18
19
#include "src/dsp/mips_macro.h"
20
21
static const int kC1 = WEBP_TRANSFORM_AC3_C1;
22
static const int kC2 = WEBP_TRANSFORM_AC3_C2;
23
24
static WEBP_INLINE int abs_mips32(int x) {
25
const int sign = x >> 31;
26
return (x ^ sign) - sign;
27
}
28
29
// 4 pixels in, 2 pixels out
30
static WEBP_INLINE void do_filter2(uint8_t* p, int step) {
31
const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
32
const int a = 3 * (q0 - p0) + VP8ksclip1[p1 - q1];
33
const int a1 = VP8ksclip2[(a + 4) >> 3];
34
const int a2 = VP8ksclip2[(a + 3) >> 3];
35
p[-step] = VP8kclip1[p0 + a2];
36
p[ 0] = VP8kclip1[q0 - a1];
37
}
38
39
// 4 pixels in, 4 pixels out
40
static WEBP_INLINE void do_filter4(uint8_t* p, int step) {
41
const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
42
const int a = 3 * (q0 - p0);
43
const int a1 = VP8ksclip2[(a + 4) >> 3];
44
const int a2 = VP8ksclip2[(a + 3) >> 3];
45
const int a3 = (a1 + 1) >> 1;
46
p[-2 * step] = VP8kclip1[p1 + a3];
47
p[- step] = VP8kclip1[p0 + a2];
48
p[ 0] = VP8kclip1[q0 - a1];
49
p[ step] = VP8kclip1[q1 - a3];
50
}
51
52
// 6 pixels in, 6 pixels out
53
static WEBP_INLINE void do_filter6(uint8_t* p, int step) {
54
const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
55
const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
56
const int a = VP8ksclip1[3 * (q0 - p0) + VP8ksclip1[p1 - q1]];
57
// a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
58
const int a1 = (27 * a + 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7
59
const int a2 = (18 * a + 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7
60
const int a3 = (9 * a + 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7
61
p[-3 * step] = VP8kclip1[p2 + a3];
62
p[-2 * step] = VP8kclip1[p1 + a2];
63
p[- step] = VP8kclip1[p0 + a1];
64
p[ 0] = VP8kclip1[q0 - a1];
65
p[ step] = VP8kclip1[q1 - a2];
66
p[ 2 * step] = VP8kclip1[q2 - a3];
67
}
68
69
static WEBP_INLINE int hev(const uint8_t* p, int step, int thresh) {
70
const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
71
return (abs_mips32(p1 - p0) > thresh) || (abs_mips32(q1 - q0) > thresh);
72
}
73
74
static WEBP_INLINE int needs_filter(const uint8_t* p, int step, int t) {
75
const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
76
return ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) <= t);
77
}
78
79
static WEBP_INLINE int needs_filter2(const uint8_t* p,
80
int step, int t, int it) {
81
const int p3 = p[-4 * step], p2 = p[-3 * step];
82
const int p1 = p[-2 * step], p0 = p[-step];
83
const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
84
if ((4 * abs_mips32(p0 - q0) + abs_mips32(p1 - q1)) > t) {
85
return 0;
86
}
87
return abs_mips32(p3 - p2) <= it && abs_mips32(p2 - p1) <= it &&
88
abs_mips32(p1 - p0) <= it && abs_mips32(q3 - q2) <= it &&
89
abs_mips32(q2 - q1) <= it && abs_mips32(q1 - q0) <= it;
90
}
91
92
static WEBP_INLINE void FilterLoop26(uint8_t* p,
93
int hstride, int vstride, int size,
94
int thresh, int ithresh, int hev_thresh) {
95
const int thresh2 = 2 * thresh + 1;
96
while (size-- > 0) {
97
if (needs_filter2(p, hstride, thresh2, ithresh)) {
98
if (hev(p, hstride, hev_thresh)) {
99
do_filter2(p, hstride);
100
} else {
101
do_filter6(p, hstride);
102
}
103
}
104
p += vstride;
105
}
106
}
107
108
static WEBP_INLINE void FilterLoop24(uint8_t* p,
109
int hstride, int vstride, int size,
110
int thresh, int ithresh, int hev_thresh) {
111
const int thresh2 = 2 * thresh + 1;
112
while (size-- > 0) {
113
if (needs_filter2(p, hstride, thresh2, ithresh)) {
114
if (hev(p, hstride, hev_thresh)) {
115
do_filter2(p, hstride);
116
} else {
117
do_filter4(p, hstride);
118
}
119
}
120
p += vstride;
121
}
122
}
123
124
// on macroblock edges
125
static void VFilter16(uint8_t* p, int stride,
126
int thresh, int ithresh, int hev_thresh) {
127
FilterLoop26(p, stride, 1, 16, thresh, ithresh, hev_thresh);
128
}
129
130
static void HFilter16(uint8_t* p, int stride,
131
int thresh, int ithresh, int hev_thresh) {
132
FilterLoop26(p, 1, stride, 16, thresh, ithresh, hev_thresh);
133
}
134
135
// 8-pixels wide variant, for chroma filtering
136
static void VFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
137
int stride, int thresh, int ithresh, int hev_thresh) {
138
FilterLoop26(u, stride, 1, 8, thresh, ithresh, hev_thresh);
139
FilterLoop26(v, stride, 1, 8, thresh, ithresh, hev_thresh);
140
}
141
142
static void HFilter8(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
143
int stride, int thresh, int ithresh, int hev_thresh) {
144
FilterLoop26(u, 1, stride, 8, thresh, ithresh, hev_thresh);
145
FilterLoop26(v, 1, stride, 8, thresh, ithresh, hev_thresh);
146
}
147
148
static void VFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
149
int stride, int thresh, int ithresh, int hev_thresh) {
150
FilterLoop24(u + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
151
FilterLoop24(v + 4 * stride, stride, 1, 8, thresh, ithresh, hev_thresh);
152
}
153
154
static void HFilter8i(uint8_t* WEBP_RESTRICT u, uint8_t* WEBP_RESTRICT v,
155
int stride, int thresh, int ithresh, int hev_thresh) {
156
FilterLoop24(u + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
157
FilterLoop24(v + 4, 1, stride, 8, thresh, ithresh, hev_thresh);
158
}
159
160
// on three inner edges
161
static void VFilter16i(uint8_t* p, int stride,
162
int thresh, int ithresh, int hev_thresh) {
163
int k;
164
for (k = 3; k > 0; --k) {
165
p += 4 * stride;
166
FilterLoop24(p, stride, 1, 16, thresh, ithresh, hev_thresh);
167
}
168
}
169
170
static void HFilter16i(uint8_t* p, int stride,
171
int thresh, int ithresh, int hev_thresh) {
172
int k;
173
for (k = 3; k > 0; --k) {
174
p += 4;
175
FilterLoop24(p, 1, stride, 16, thresh, ithresh, hev_thresh);
176
}
177
}
178
179
//------------------------------------------------------------------------------
180
// Simple In-loop filtering (Paragraph 15.2)
181
182
static void SimpleVFilter16(uint8_t* p, int stride, int thresh) {
183
int i;
184
const int thresh2 = 2 * thresh + 1;
185
for (i = 0; i < 16; ++i) {
186
if (needs_filter(p + i, stride, thresh2)) {
187
do_filter2(p + i, stride);
188
}
189
}
190
}
191
192
static void SimpleHFilter16(uint8_t* p, int stride, int thresh) {
193
int i;
194
const int thresh2 = 2 * thresh + 1;
195
for (i = 0; i < 16; ++i) {
196
if (needs_filter(p + i * stride, 1, thresh2)) {
197
do_filter2(p + i * stride, 1);
198
}
199
}
200
}
201
202
static void SimpleVFilter16i(uint8_t* p, int stride, int thresh) {
203
int k;
204
for (k = 3; k > 0; --k) {
205
p += 4 * stride;
206
SimpleVFilter16(p, stride, thresh);
207
}
208
}
209
210
static void SimpleHFilter16i(uint8_t* p, int stride, int thresh) {
211
int k;
212
for (k = 3; k > 0; --k) {
213
p += 4;
214
SimpleHFilter16(p, stride, thresh);
215
}
216
}
217
218
static void TransformOne(const int16_t* WEBP_RESTRICT in,
219
uint8_t* WEBP_RESTRICT dst) {
220
int temp0, temp1, temp2, temp3, temp4;
221
int temp5, temp6, temp7, temp8, temp9;
222
int temp10, temp11, temp12, temp13, temp14;
223
int temp15, temp16, temp17, temp18, temp19;
224
int16_t* p_in = (int16_t*)in;
225
226
// loops unrolled and merged to avoid usage of tmp buffer
227
// and to reduce number of stalls. MUL macro is written
228
// in assembler and inlined
229
__asm__ volatile(
230
"lh %[temp0], 0(%[in]) \n\t"
231
"lh %[temp8], 16(%[in]) \n\t"
232
"lh %[temp4], 8(%[in]) \n\t"
233
"lh %[temp12], 24(%[in]) \n\t"
234
"addu %[temp16], %[temp0], %[temp8] \n\t"
235
"subu %[temp0], %[temp0], %[temp8] \n\t"
236
"mul %[temp8], %[temp4], %[kC2] \n\t"
237
MUL_SHIFT_C1(temp17, temp12)
238
MUL_SHIFT_C1_IO(temp4, temp19)
239
"mul %[temp12], %[temp12], %[kC2] \n\t"
240
"lh %[temp1], 2(%[in]) \n\t"
241
"lh %[temp5], 10(%[in]) \n\t"
242
"lh %[temp9], 18(%[in]) \n\t"
243
"lh %[temp13], 26(%[in]) \n\t"
244
"sra %[temp8], %[temp8], 16 \n\t"
245
"sra %[temp12], %[temp12], 16 \n\t"
246
"lh %[temp2], 4(%[in]) \n\t"
247
"lh %[temp6], 12(%[in]) \n\t"
248
"lh %[temp10], 20(%[in]) \n\t"
249
"lh %[temp14], 28(%[in]) \n\t"
250
"subu %[temp17], %[temp8], %[temp17] \n\t"
251
"addu %[temp4], %[temp4], %[temp12] \n\t"
252
"addu %[temp8], %[temp16], %[temp4] \n\t"
253
"subu %[temp4], %[temp16], %[temp4] \n\t"
254
"addu %[temp16], %[temp1], %[temp9] \n\t"
255
"subu %[temp1], %[temp1], %[temp9] \n\t"
256
"lh %[temp3], 6(%[in]) \n\t"
257
"lh %[temp7], 14(%[in]) \n\t"
258
"lh %[temp11], 22(%[in]) \n\t"
259
"lh %[temp15], 30(%[in]) \n\t"
260
"addu %[temp12], %[temp0], %[temp17] \n\t"
261
"subu %[temp0], %[temp0], %[temp17] \n\t"
262
"mul %[temp9], %[temp5], %[kC2] \n\t"
263
MUL_SHIFT_C1(temp17, temp13)
264
MUL_SHIFT_C1_IO(temp5, temp19)
265
"mul %[temp13], %[temp13], %[kC2] \n\t"
266
"sra %[temp9], %[temp9], 16 \n\t"
267
"subu %[temp17], %[temp9], %[temp17] \n\t"
268
"sra %[temp13], %[temp13], 16 \n\t"
269
"addu %[temp5], %[temp5], %[temp13] \n\t"
270
"addu %[temp13], %[temp1], %[temp17] \n\t"
271
"subu %[temp1], %[temp1], %[temp17] \n\t"
272
MUL_SHIFT_C1(temp17, temp14)
273
"mul %[temp14], %[temp14], %[kC2] \n\t"
274
"addu %[temp9], %[temp16], %[temp5] \n\t"
275
"subu %[temp5], %[temp16], %[temp5] \n\t"
276
"addu %[temp16], %[temp2], %[temp10] \n\t"
277
"subu %[temp2], %[temp2], %[temp10] \n\t"
278
"mul %[temp10], %[temp6], %[kC2] \n\t"
279
MUL_SHIFT_C1_IO(temp6, temp19)
280
"sra %[temp14], %[temp14], 16 \n\t"
281
"sra %[temp10], %[temp10], 16 \n\t"
282
"subu %[temp17], %[temp10], %[temp17] \n\t"
283
"addu %[temp6], %[temp6], %[temp14] \n\t"
284
"addu %[temp10], %[temp16], %[temp6] \n\t"
285
"subu %[temp6], %[temp16], %[temp6] \n\t"
286
"addu %[temp14], %[temp2], %[temp17] \n\t"
287
"subu %[temp2], %[temp2], %[temp17] \n\t"
288
MUL_SHIFT_C1(temp17, temp15)
289
"mul %[temp15], %[temp15], %[kC2] \n\t"
290
"addu %[temp16], %[temp3], %[temp11] \n\t"
291
"subu %[temp3], %[temp3], %[temp11] \n\t"
292
"mul %[temp11], %[temp7], %[kC2] \n\t"
293
MUL_SHIFT_C1_IO(temp7, temp19)
294
"addiu %[temp8], %[temp8], 4 \n\t"
295
"addiu %[temp12], %[temp12], 4 \n\t"
296
"addiu %[temp0], %[temp0], 4 \n\t"
297
"addiu %[temp4], %[temp4], 4 \n\t"
298
"sra %[temp15], %[temp15], 16 \n\t"
299
"sra %[temp11], %[temp11], 16 \n\t"
300
"subu %[temp17], %[temp11], %[temp17] \n\t"
301
"addu %[temp7], %[temp7], %[temp15] \n\t"
302
"addu %[temp15], %[temp3], %[temp17] \n\t"
303
"subu %[temp3], %[temp3], %[temp17] \n\t"
304
"addu %[temp11], %[temp16], %[temp7] \n\t"
305
"subu %[temp7], %[temp16], %[temp7] \n\t"
306
"addu %[temp16], %[temp8], %[temp10] \n\t"
307
"subu %[temp8], %[temp8], %[temp10] \n\t"
308
"mul %[temp10], %[temp9], %[kC2] \n\t"
309
MUL_SHIFT_C1(temp17, temp11)
310
MUL_SHIFT_C1_IO(temp9, temp19)
311
"mul %[temp11], %[temp11], %[kC2] \n\t"
312
"sra %[temp10], %[temp10], 16 \n\t"
313
"sra %[temp11], %[temp11], 16 \n\t"
314
"subu %[temp17], %[temp10], %[temp17] \n\t"
315
"addu %[temp11], %[temp9], %[temp11] \n\t"
316
"addu %[temp10], %[temp12], %[temp14] \n\t"
317
"subu %[temp12], %[temp12], %[temp14] \n\t"
318
"mul %[temp14], %[temp13], %[kC2] \n\t"
319
MUL_SHIFT_C1(temp9, temp15)
320
MUL_SHIFT_C1_IO(temp13, temp19)
321
"mul %[temp15], %[temp15], %[kC2] \n\t"
322
"sra %[temp14], %[temp14], 16 \n\t"
323
"sra %[temp15], %[temp15], 16 \n\t"
324
"subu %[temp9], %[temp14], %[temp9] \n\t"
325
"addu %[temp15], %[temp13], %[temp15] \n\t"
326
"addu %[temp14], %[temp0], %[temp2] \n\t"
327
"subu %[temp0], %[temp0], %[temp2] \n\t"
328
"mul %[temp2], %[temp1], %[kC2] \n\t"
329
MUL_SHIFT_C1(temp13, temp3)
330
MUL_SHIFT_C1_IO(temp1, temp19)
331
"mul %[temp3], %[temp3], %[kC2] \n\t"
332
"sra %[temp2], %[temp2], 16 \n\t"
333
"sra %[temp3], %[temp3], 16 \n\t"
334
"subu %[temp13], %[temp2], %[temp13] \n\t"
335
"addu %[temp3], %[temp1], %[temp3] \n\t"
336
"addu %[temp2], %[temp4], %[temp6] \n\t"
337
"subu %[temp4], %[temp4], %[temp6] \n\t"
338
"mul %[temp6], %[temp5], %[kC2] \n\t"
339
MUL_SHIFT_C1(temp1, temp7)
340
MUL_SHIFT_C1_IO(temp5, temp19)
341
"mul %[temp7], %[temp7], %[kC2] \n\t"
342
"sra %[temp6], %[temp6], 16 \n\t"
343
"sra %[temp7], %[temp7], 16 \n\t"
344
"subu %[temp1], %[temp6], %[temp1] \n\t"
345
"addu %[temp7], %[temp5], %[temp7] \n\t"
346
"addu %[temp5], %[temp16], %[temp11] \n\t"
347
"subu %[temp16], %[temp16], %[temp11] \n\t"
348
"addu %[temp11], %[temp8], %[temp17] \n\t"
349
"subu %[temp8], %[temp8], %[temp17] \n\t"
350
"sra %[temp5], %[temp5], 3 \n\t"
351
"sra %[temp16], %[temp16], 3 \n\t"
352
"sra %[temp11], %[temp11], 3 \n\t"
353
"sra %[temp8], %[temp8], 3 \n\t"
354
"addu %[temp17], %[temp10], %[temp15] \n\t"
355
"subu %[temp10], %[temp10], %[temp15] \n\t"
356
"addu %[temp15], %[temp12], %[temp9] \n\t"
357
"subu %[temp12], %[temp12], %[temp9] \n\t"
358
"sra %[temp17], %[temp17], 3 \n\t"
359
"sra %[temp10], %[temp10], 3 \n\t"
360
"sra %[temp15], %[temp15], 3 \n\t"
361
"sra %[temp12], %[temp12], 3 \n\t"
362
"addu %[temp9], %[temp14], %[temp3] \n\t"
363
"subu %[temp14], %[temp14], %[temp3] \n\t"
364
"addu %[temp3], %[temp0], %[temp13] \n\t"
365
"subu %[temp0], %[temp0], %[temp13] \n\t"
366
"sra %[temp9], %[temp9], 3 \n\t"
367
"sra %[temp14], %[temp14], 3 \n\t"
368
"sra %[temp3], %[temp3], 3 \n\t"
369
"sra %[temp0], %[temp0], 3 \n\t"
370
"addu %[temp13], %[temp2], %[temp7] \n\t"
371
"subu %[temp2], %[temp2], %[temp7] \n\t"
372
"addu %[temp7], %[temp4], %[temp1] \n\t"
373
"subu %[temp4], %[temp4], %[temp1] \n\t"
374
"sra %[temp13], %[temp13], 3 \n\t"
375
"sra %[temp2], %[temp2], 3 \n\t"
376
"sra %[temp7], %[temp7], 3 \n\t"
377
"sra %[temp4], %[temp4], 3 \n\t"
378
"addiu %[temp6], $zero, 255 \n\t"
379
"lbu %[temp1], 0+0*" XSTR(BPS) "(%[dst]) \n\t"
380
"addu %[temp1], %[temp1], %[temp5] \n\t"
381
"sra %[temp5], %[temp1], 8 \n\t"
382
"sra %[temp18], %[temp1], 31 \n\t"
383
"beqz %[temp5], 1f \n\t"
384
"xor %[temp1], %[temp1], %[temp1] \n\t"
385
"movz %[temp1], %[temp6], %[temp18] \n\t"
386
"1: \n\t"
387
"lbu %[temp18], 1+0*" XSTR(BPS) "(%[dst]) \n\t"
388
"sb %[temp1], 0+0*" XSTR(BPS) "(%[dst]) \n\t"
389
"addu %[temp18], %[temp18], %[temp11] \n\t"
390
"sra %[temp11], %[temp18], 8 \n\t"
391
"sra %[temp1], %[temp18], 31 \n\t"
392
"beqz %[temp11], 2f \n\t"
393
"xor %[temp18], %[temp18], %[temp18] \n\t"
394
"movz %[temp18], %[temp6], %[temp1] \n\t"
395
"2: \n\t"
396
"lbu %[temp1], 2+0*" XSTR(BPS) "(%[dst]) \n\t"
397
"sb %[temp18], 1+0*" XSTR(BPS) "(%[dst]) \n\t"
398
"addu %[temp1], %[temp1], %[temp8] \n\t"
399
"sra %[temp8], %[temp1], 8 \n\t"
400
"sra %[temp18], %[temp1], 31 \n\t"
401
"beqz %[temp8], 3f \n\t"
402
"xor %[temp1], %[temp1], %[temp1] \n\t"
403
"movz %[temp1], %[temp6], %[temp18] \n\t"
404
"3: \n\t"
405
"lbu %[temp18], 3+0*" XSTR(BPS) "(%[dst]) \n\t"
406
"sb %[temp1], 2+0*" XSTR(BPS) "(%[dst]) \n\t"
407
"addu %[temp18], %[temp18], %[temp16] \n\t"
408
"sra %[temp16], %[temp18], 8 \n\t"
409
"sra %[temp1], %[temp18], 31 \n\t"
410
"beqz %[temp16], 4f \n\t"
411
"xor %[temp18], %[temp18], %[temp18] \n\t"
412
"movz %[temp18], %[temp6], %[temp1] \n\t"
413
"4: \n\t"
414
"sb %[temp18], 3+0*" XSTR(BPS) "(%[dst]) \n\t"
415
"lbu %[temp5], 0+1*" XSTR(BPS) "(%[dst]) \n\t"
416
"lbu %[temp8], 1+1*" XSTR(BPS) "(%[dst]) \n\t"
417
"lbu %[temp11], 2+1*" XSTR(BPS) "(%[dst]) \n\t"
418
"lbu %[temp16], 3+1*" XSTR(BPS) "(%[dst]) \n\t"
419
"addu %[temp5], %[temp5], %[temp17] \n\t"
420
"addu %[temp8], %[temp8], %[temp15] \n\t"
421
"addu %[temp11], %[temp11], %[temp12] \n\t"
422
"addu %[temp16], %[temp16], %[temp10] \n\t"
423
"sra %[temp18], %[temp5], 8 \n\t"
424
"sra %[temp1], %[temp5], 31 \n\t"
425
"beqz %[temp18], 5f \n\t"
426
"xor %[temp5], %[temp5], %[temp5] \n\t"
427
"movz %[temp5], %[temp6], %[temp1] \n\t"
428
"5: \n\t"
429
"sra %[temp18], %[temp8], 8 \n\t"
430
"sra %[temp1], %[temp8], 31 \n\t"
431
"beqz %[temp18], 6f \n\t"
432
"xor %[temp8], %[temp8], %[temp8] \n\t"
433
"movz %[temp8], %[temp6], %[temp1] \n\t"
434
"6: \n\t"
435
"sra %[temp18], %[temp11], 8 \n\t"
436
"sra %[temp1], %[temp11], 31 \n\t"
437
"sra %[temp17], %[temp16], 8 \n\t"
438
"sra %[temp15], %[temp16], 31 \n\t"
439
"beqz %[temp18], 7f \n\t"
440
"xor %[temp11], %[temp11], %[temp11] \n\t"
441
"movz %[temp11], %[temp6], %[temp1] \n\t"
442
"7: \n\t"
443
"beqz %[temp17], 8f \n\t"
444
"xor %[temp16], %[temp16], %[temp16] \n\t"
445
"movz %[temp16], %[temp6], %[temp15] \n\t"
446
"8: \n\t"
447
"sb %[temp5], 0+1*" XSTR(BPS) "(%[dst]) \n\t"
448
"sb %[temp8], 1+1*" XSTR(BPS) "(%[dst]) \n\t"
449
"sb %[temp11], 2+1*" XSTR(BPS) "(%[dst]) \n\t"
450
"sb %[temp16], 3+1*" XSTR(BPS) "(%[dst]) \n\t"
451
"lbu %[temp5], 0+2*" XSTR(BPS) "(%[dst]) \n\t"
452
"lbu %[temp8], 1+2*" XSTR(BPS) "(%[dst]) \n\t"
453
"lbu %[temp11], 2+2*" XSTR(BPS) "(%[dst]) \n\t"
454
"lbu %[temp16], 3+2*" XSTR(BPS) "(%[dst]) \n\t"
455
"addu %[temp5], %[temp5], %[temp9] \n\t"
456
"addu %[temp8], %[temp8], %[temp3] \n\t"
457
"addu %[temp11], %[temp11], %[temp0] \n\t"
458
"addu %[temp16], %[temp16], %[temp14] \n\t"
459
"sra %[temp18], %[temp5], 8 \n\t"
460
"sra %[temp1], %[temp5], 31 \n\t"
461
"sra %[temp17], %[temp8], 8 \n\t"
462
"sra %[temp15], %[temp8], 31 \n\t"
463
"sra %[temp12], %[temp11], 8 \n\t"
464
"sra %[temp10], %[temp11], 31 \n\t"
465
"sra %[temp9], %[temp16], 8 \n\t"
466
"sra %[temp3], %[temp16], 31 \n\t"
467
"beqz %[temp18], 9f \n\t"
468
"xor %[temp5], %[temp5], %[temp5] \n\t"
469
"movz %[temp5], %[temp6], %[temp1] \n\t"
470
"9: \n\t"
471
"beqz %[temp17], 10f \n\t"
472
"xor %[temp8], %[temp8], %[temp8] \n\t"
473
"movz %[temp8], %[temp6], %[temp15] \n\t"
474
"10: \n\t"
475
"beqz %[temp12], 11f \n\t"
476
"xor %[temp11], %[temp11], %[temp11] \n\t"
477
"movz %[temp11], %[temp6], %[temp10] \n\t"
478
"11: \n\t"
479
"beqz %[temp9], 12f \n\t"
480
"xor %[temp16], %[temp16], %[temp16] \n\t"
481
"movz %[temp16], %[temp6], %[temp3] \n\t"
482
"12: \n\t"
483
"sb %[temp5], 0+2*" XSTR(BPS) "(%[dst]) \n\t"
484
"sb %[temp8], 1+2*" XSTR(BPS) "(%[dst]) \n\t"
485
"sb %[temp11], 2+2*" XSTR(BPS) "(%[dst]) \n\t"
486
"sb %[temp16], 3+2*" XSTR(BPS) "(%[dst]) \n\t"
487
"lbu %[temp5], 0+3*" XSTR(BPS) "(%[dst]) \n\t"
488
"lbu %[temp8], 1+3*" XSTR(BPS) "(%[dst]) \n\t"
489
"lbu %[temp11], 2+3*" XSTR(BPS) "(%[dst]) \n\t"
490
"lbu %[temp16], 3+3*" XSTR(BPS) "(%[dst]) \n\t"
491
"addu %[temp5], %[temp5], %[temp13] \n\t"
492
"addu %[temp8], %[temp8], %[temp7] \n\t"
493
"addu %[temp11], %[temp11], %[temp4] \n\t"
494
"addu %[temp16], %[temp16], %[temp2] \n\t"
495
"sra %[temp18], %[temp5], 8 \n\t"
496
"sra %[temp1], %[temp5], 31 \n\t"
497
"sra %[temp17], %[temp8], 8 \n\t"
498
"sra %[temp15], %[temp8], 31 \n\t"
499
"sra %[temp12], %[temp11], 8 \n\t"
500
"sra %[temp10], %[temp11], 31 \n\t"
501
"sra %[temp9], %[temp16], 8 \n\t"
502
"sra %[temp3], %[temp16], 31 \n\t"
503
"beqz %[temp18], 13f \n\t"
504
"xor %[temp5], %[temp5], %[temp5] \n\t"
505
"movz %[temp5], %[temp6], %[temp1] \n\t"
506
"13: \n\t"
507
"beqz %[temp17], 14f \n\t"
508
"xor %[temp8], %[temp8], %[temp8] \n\t"
509
"movz %[temp8], %[temp6], %[temp15] \n\t"
510
"14: \n\t"
511
"beqz %[temp12], 15f \n\t"
512
"xor %[temp11], %[temp11], %[temp11] \n\t"
513
"movz %[temp11], %[temp6], %[temp10] \n\t"
514
"15: \n\t"
515
"beqz %[temp9], 16f \n\t"
516
"xor %[temp16], %[temp16], %[temp16] \n\t"
517
"movz %[temp16], %[temp6], %[temp3] \n\t"
518
"16: \n\t"
519
"sb %[temp5], 0+3*" XSTR(BPS) "(%[dst]) \n\t"
520
"sb %[temp8], 1+3*" XSTR(BPS) "(%[dst]) \n\t"
521
"sb %[temp11], 2+3*" XSTR(BPS) "(%[dst]) \n\t"
522
"sb %[temp16], 3+3*" XSTR(BPS) "(%[dst]) \n\t"
523
524
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
525
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
526
[temp6]"=&r"(temp6), [temp7]"=&r"(temp7), [temp8]"=&r"(temp8),
527
[temp9]"=&r"(temp9), [temp10]"=&r"(temp10), [temp11]"=&r"(temp11),
528
[temp12]"=&r"(temp12), [temp13]"=&r"(temp13), [temp14]"=&r"(temp14),
529
[temp15]"=&r"(temp15), [temp16]"=&r"(temp16), [temp17]"=&r"(temp17),
530
[temp18]"=&r"(temp18), [temp19]"=&r"(temp19)
531
: [in]"r"(p_in), [kC1]"r"(kC1), [kC2]"r"(kC2), [dst]"r"(dst)
532
: "memory", "hi", "lo"
533
);
534
}
535
536
static void TransformTwo(const int16_t* WEBP_RESTRICT in,
537
uint8_t* WEBP_RESTRICT dst, int do_two) {
538
TransformOne(in, dst);
539
if (do_two) {
540
TransformOne(in + 16, dst + 4);
541
}
542
}
543
544
//------------------------------------------------------------------------------
545
// Entry point
546
547
extern void VP8DspInitMIPS32(void);
548
549
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitMIPS32(void) {
550
VP8InitClipTables();
551
552
VP8Transform = TransformTwo;
553
554
VP8VFilter16 = VFilter16;
555
VP8HFilter16 = HFilter16;
556
VP8VFilter8 = VFilter8;
557
VP8HFilter8 = HFilter8;
558
VP8VFilter16i = VFilter16i;
559
VP8HFilter16i = HFilter16i;
560
VP8VFilter8i = VFilter8i;
561
VP8HFilter8i = HFilter8i;
562
563
VP8SimpleVFilter16 = SimpleVFilter16;
564
VP8SimpleHFilter16 = SimpleHFilter16;
565
VP8SimpleVFilter16i = SimpleVFilter16i;
566
VP8SimpleHFilter16i = SimpleHFilter16i;
567
}
568
569
#else // !WEBP_USE_MIPS32
570
571
WEBP_DSP_INIT_STUB(VP8DspInitMIPS32)
572
573
#endif // WEBP_USE_MIPS32
574
575