Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
9913 views
1
// Copyright 2014 Google Inc. All Rights Reserved.
2
//
3
// Use of this source code is governed by a BSD-style license
4
// that can be found in the COPYING file in the root of the source
5
// tree. An additional intellectual property rights grant can be found
6
// in the file PATENTS. All contributing project authors may
7
// be found in the AUTHORS file in the root of the source tree.
8
// -----------------------------------------------------------------------------
9
//
10
// Image transforms and color space conversion methods for lossless decoder.
11
//
12
// Author(s): Djordje Pesut ([email protected])
13
// Jovan Zelincevic ([email protected])
14
15
#include "src/dsp/dsp.h"
16
17
#if defined(WEBP_USE_MIPS_DSP_R2)
18
19
#include "src/dsp/lossless.h"
20
#include "src/dsp/lossless_common.h"
21
22
#define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
23
static void FUNC_NAME(const TYPE* src, \
24
const uint32_t* const color_map, \
25
TYPE* dst, int y_start, int y_end, \
26
int width) { \
27
int y; \
28
for (y = y_start; y < y_end; ++y) { \
29
int x; \
30
for (x = 0; x < (width >> 2); ++x) { \
31
int tmp1, tmp2, tmp3, tmp4; \
32
__asm__ volatile ( \
33
".ifc " #TYPE ", uint8_t \n\t" \
34
"lbu %[tmp1], 0(%[src]) \n\t" \
35
"lbu %[tmp2], 1(%[src]) \n\t" \
36
"lbu %[tmp3], 2(%[src]) \n\t" \
37
"lbu %[tmp4], 3(%[src]) \n\t" \
38
"addiu %[src], %[src], 4 \n\t" \
39
".endif \n\t" \
40
".ifc " #TYPE ", uint32_t \n\t" \
41
"lw %[tmp1], 0(%[src]) \n\t" \
42
"lw %[tmp2], 4(%[src]) \n\t" \
43
"lw %[tmp3], 8(%[src]) \n\t" \
44
"lw %[tmp4], 12(%[src]) \n\t" \
45
"ext %[tmp1], %[tmp1], 8, 8 \n\t" \
46
"ext %[tmp2], %[tmp2], 8, 8 \n\t" \
47
"ext %[tmp3], %[tmp3], 8, 8 \n\t" \
48
"ext %[tmp4], %[tmp4], 8, 8 \n\t" \
49
"addiu %[src], %[src], 16 \n\t" \
50
".endif \n\t" \
51
"sll %[tmp1], %[tmp1], 2 \n\t" \
52
"sll %[tmp2], %[tmp2], 2 \n\t" \
53
"sll %[tmp3], %[tmp3], 2 \n\t" \
54
"sll %[tmp4], %[tmp4], 2 \n\t" \
55
"lwx %[tmp1], %[tmp1](%[color_map]) \n\t" \
56
"lwx %[tmp2], %[tmp2](%[color_map]) \n\t" \
57
"lwx %[tmp3], %[tmp3](%[color_map]) \n\t" \
58
"lwx %[tmp4], %[tmp4](%[color_map]) \n\t" \
59
".ifc " #TYPE ", uint8_t \n\t" \
60
"ext %[tmp1], %[tmp1], 8, 8 \n\t" \
61
"ext %[tmp2], %[tmp2], 8, 8 \n\t" \
62
"ext %[tmp3], %[tmp3], 8, 8 \n\t" \
63
"ext %[tmp4], %[tmp4], 8, 8 \n\t" \
64
"sb %[tmp1], 0(%[dst]) \n\t" \
65
"sb %[tmp2], 1(%[dst]) \n\t" \
66
"sb %[tmp3], 2(%[dst]) \n\t" \
67
"sb %[tmp4], 3(%[dst]) \n\t" \
68
"addiu %[dst], %[dst], 4 \n\t" \
69
".endif \n\t" \
70
".ifc " #TYPE ", uint32_t \n\t" \
71
"sw %[tmp1], 0(%[dst]) \n\t" \
72
"sw %[tmp2], 4(%[dst]) \n\t" \
73
"sw %[tmp3], 8(%[dst]) \n\t" \
74
"sw %[tmp4], 12(%[dst]) \n\t" \
75
"addiu %[dst], %[dst], 16 \n\t" \
76
".endif \n\t" \
77
: [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3), \
78
[tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst) \
79
: [color_map]"r"(color_map) \
80
: "memory" \
81
); \
82
} \
83
for (x = 0; x < (width & 3); ++x) { \
84
*dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
85
} \
86
} \
87
}
88
89
MAP_COLOR_FUNCS(MapARGB_MIPSdspR2, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
90
MAP_COLOR_FUNCS(MapAlpha_MIPSdspR2, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
91
92
#undef MAP_COLOR_FUNCS
93
94
static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
95
uint32_t c2) {
96
int temp0, temp1, temp2, temp3, temp4, temp5;
97
__asm__ volatile (
98
"preceu.ph.qbr %[temp1], %[c0] \n\t"
99
"preceu.ph.qbl %[temp2], %[c0] \n\t"
100
"preceu.ph.qbr %[temp3], %[c1] \n\t"
101
"preceu.ph.qbl %[temp4], %[c1] \n\t"
102
"preceu.ph.qbr %[temp5], %[c2] \n\t"
103
"preceu.ph.qbl %[temp0], %[c2] \n\t"
104
"subq.ph %[temp3], %[temp3], %[temp5] \n\t"
105
"subq.ph %[temp4], %[temp4], %[temp0] \n\t"
106
"addq.ph %[temp1], %[temp1], %[temp3] \n\t"
107
"addq.ph %[temp2], %[temp2], %[temp4] \n\t"
108
"shll_s.ph %[temp1], %[temp1], 7 \n\t"
109
"shll_s.ph %[temp2], %[temp2], 7 \n\t"
110
"precrqu_s.qb.ph %[temp2], %[temp2], %[temp1] \n\t"
111
: [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
112
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5)
113
: [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
114
: "memory"
115
);
116
return temp2;
117
}
118
119
static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
120
uint32_t c2) {
121
int temp0, temp1, temp2, temp3, temp4, temp5;
122
__asm__ volatile (
123
"adduh.qb %[temp5], %[c0], %[c1] \n\t"
124
"preceu.ph.qbr %[temp3], %[c2] \n\t"
125
"preceu.ph.qbr %[temp1], %[temp5] \n\t"
126
"preceu.ph.qbl %[temp2], %[temp5] \n\t"
127
"preceu.ph.qbl %[temp4], %[c2] \n\t"
128
"subq.ph %[temp3], %[temp1], %[temp3] \n\t"
129
"subq.ph %[temp4], %[temp2], %[temp4] \n\t"
130
"shrl.ph %[temp5], %[temp3], 15 \n\t"
131
"shrl.ph %[temp0], %[temp4], 15 \n\t"
132
"addq.ph %[temp3], %[temp3], %[temp5] \n\t"
133
"addq.ph %[temp4], %[temp0], %[temp4] \n\t"
134
"shra.ph %[temp3], %[temp3], 1 \n\t"
135
"shra.ph %[temp4], %[temp4], 1 \n\t"
136
"addq.ph %[temp1], %[temp1], %[temp3] \n\t"
137
"addq.ph %[temp2], %[temp2], %[temp4] \n\t"
138
"shll_s.ph %[temp1], %[temp1], 7 \n\t"
139
"shll_s.ph %[temp2], %[temp2], 7 \n\t"
140
"precrqu_s.qb.ph %[temp1], %[temp2], %[temp1] \n\t"
141
: [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
142
[temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5)
143
: [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
144
: "memory"
145
);
146
return temp1;
147
}
148
149
static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
150
int temp0, temp1, temp2, temp3, temp4, temp5;
151
__asm__ volatile (
152
"cmpgdu.lt.qb %[temp1], %[c], %[b] \n\t"
153
"pick.qb %[temp1], %[b], %[c] \n\t"
154
"pick.qb %[temp2], %[c], %[b] \n\t"
155
"cmpgdu.lt.qb %[temp4], %[c], %[a] \n\t"
156
"pick.qb %[temp4], %[a], %[c] \n\t"
157
"pick.qb %[temp5], %[c], %[a] \n\t"
158
"subu.qb %[temp3], %[temp1], %[temp2] \n\t"
159
"subu.qb %[temp0], %[temp4], %[temp5] \n\t"
160
"raddu.w.qb %[temp3], %[temp3] \n\t"
161
"raddu.w.qb %[temp0], %[temp0] \n\t"
162
"subu %[temp3], %[temp3], %[temp0] \n\t"
163
"slti %[temp0], %[temp3], 0x1 \n\t"
164
"movz %[a], %[b], %[temp0] \n\t"
165
: [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
166
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0),
167
[a]"+&r"(a)
168
: [b]"r"(b), [c]"r"(c)
169
);
170
return a;
171
}
172
173
static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
174
__asm__ volatile (
175
"adduh.qb %[a0], %[a0], %[a1] \n\t"
176
: [a0]"+r"(a0)
177
: [a1]"r"(a1)
178
);
179
return a0;
180
}
181
182
static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
183
return Average2(Average2(a0, a2), a1);
184
}
185
186
static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
187
uint32_t a2, uint32_t a3) {
188
return Average2(Average2(a0, a1), Average2(a2, a3));
189
}
190
191
static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,
192
const uint32_t* const top) {
193
return Average3(*left, top[0], top[1]);
194
}
195
196
static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,
197
const uint32_t* const top) {
198
return Average2(*left, top[-1]);
199
}
200
201
static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,
202
const uint32_t* const top) {
203
return Average2(*left, top[0]);
204
}
205
206
static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,
207
const uint32_t* const top) {
208
(void)left;
209
return Average2(top[-1], top[0]);
210
}
211
212
static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,
213
const uint32_t* const top) {
214
(void)left;
215
return Average2(top[0], top[1]);
216
}
217
218
static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,
219
const uint32_t* const top) {
220
return Average4(*left, top[-1], top[0], top[1]);
221
}
222
223
static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,
224
const uint32_t* const top) {
225
return Select(top[0], *left, top[-1]);
226
}
227
228
static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,
229
const uint32_t* const top) {
230
return ClampedAddSubtractFull(*left, top[0], top[-1]);
231
}
232
233
static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,
234
const uint32_t* const top) {
235
return ClampedAddSubtractHalf(*left, top[0], top[-1]);
236
}
237
238
// Add green to blue and red channels (i.e. perform the inverse transform of
239
// 'subtract green').
240
static void AddGreenToBlueAndRed_MIPSdspR2(const uint32_t* src, int num_pixels,
241
uint32_t* dst) {
242
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
243
const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
244
const uint32_t* const p_loop2_end = src + num_pixels;
245
__asm__ volatile (
246
".set push \n\t"
247
".set noreorder \n\t"
248
"beq %[src], %[p_loop1_end], 3f \n\t"
249
" nop \n\t"
250
"0: \n\t"
251
"lw %[temp0], 0(%[src]) \n\t"
252
"lw %[temp1], 4(%[src]) \n\t"
253
"lw %[temp2], 8(%[src]) \n\t"
254
"lw %[temp3], 12(%[src]) \n\t"
255
"ext %[temp4], %[temp0], 8, 8 \n\t"
256
"ext %[temp5], %[temp1], 8, 8 \n\t"
257
"ext %[temp6], %[temp2], 8, 8 \n\t"
258
"ext %[temp7], %[temp3], 8, 8 \n\t"
259
"addiu %[src], %[src], 16 \n\t"
260
"addiu %[dst], %[dst], 16 \n\t"
261
"replv.ph %[temp4], %[temp4] \n\t"
262
"replv.ph %[temp5], %[temp5] \n\t"
263
"replv.ph %[temp6], %[temp6] \n\t"
264
"replv.ph %[temp7], %[temp7] \n\t"
265
"addu.qb %[temp0], %[temp0], %[temp4] \n\t"
266
"addu.qb %[temp1], %[temp1], %[temp5] \n\t"
267
"addu.qb %[temp2], %[temp2], %[temp6] \n\t"
268
"addu.qb %[temp3], %[temp3], %[temp7] \n\t"
269
"sw %[temp0], -16(%[dst]) \n\t"
270
"sw %[temp1], -12(%[dst]) \n\t"
271
"sw %[temp2], -8(%[dst]) \n\t"
272
"bne %[src], %[p_loop1_end], 0b \n\t"
273
" sw %[temp3], -4(%[dst]) \n\t"
274
"3: \n\t"
275
"beq %[src], %[p_loop2_end], 2f \n\t"
276
" nop \n\t"
277
"1: \n\t"
278
"lw %[temp0], 0(%[src]) \n\t"
279
"addiu %[src], %[src], 4 \n\t"
280
"addiu %[dst], %[dst], 4 \n\t"
281
"ext %[temp4], %[temp0], 8, 8 \n\t"
282
"replv.ph %[temp4], %[temp4] \n\t"
283
"addu.qb %[temp0], %[temp0], %[temp4] \n\t"
284
"bne %[src], %[p_loop2_end], 1b \n\t"
285
" sw %[temp0], -4(%[dst]) \n\t"
286
"2: \n\t"
287
".set pop \n\t"
288
: [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),
289
[temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
290
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
291
[temp7]"=&r"(temp7)
292
: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
293
: "memory"
294
);
295
}
296
297
static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m,
298
const uint32_t* src, int num_pixels,
299
uint32_t* dst) {
300
int temp0, temp1, temp2, temp3, temp4, temp5;
301
uint32_t argb, argb1, new_red;
302
const uint32_t G_to_R = m->green_to_red_;
303
const uint32_t G_to_B = m->green_to_blue_;
304
const uint32_t R_to_B = m->red_to_blue_;
305
const uint32_t* const p_loop_end = src + (num_pixels & ~1);
306
__asm__ volatile (
307
".set push \n\t"
308
".set noreorder \n\t"
309
"beq %[src], %[p_loop_end], 1f \n\t"
310
" nop \n\t"
311
"replv.ph %[temp0], %[G_to_R] \n\t"
312
"replv.ph %[temp1], %[G_to_B] \n\t"
313
"replv.ph %[temp2], %[R_to_B] \n\t"
314
"shll.ph %[temp0], %[temp0], 8 \n\t"
315
"shll.ph %[temp1], %[temp1], 8 \n\t"
316
"shll.ph %[temp2], %[temp2], 8 \n\t"
317
"shra.ph %[temp0], %[temp0], 8 \n\t"
318
"shra.ph %[temp1], %[temp1], 8 \n\t"
319
"shra.ph %[temp2], %[temp2], 8 \n\t"
320
"0: \n\t"
321
"lw %[argb], 0(%[src]) \n\t"
322
"lw %[argb1], 4(%[src]) \n\t"
323
"sw %[argb], 0(%[dst]) \n\t"
324
"sw %[argb1], 4(%[dst]) \n\t"
325
"addiu %[src], %[src], 8 \n\t"
326
"addiu %[dst], %[dst], 8 \n\t"
327
"precrq.qb.ph %[temp3], %[argb], %[argb1] \n\t"
328
"preceu.ph.qbra %[temp3], %[temp3] \n\t"
329
"shll.ph %[temp3], %[temp3], 8 \n\t"
330
"shra.ph %[temp3], %[temp3], 8 \n\t"
331
"mul.ph %[temp5], %[temp3], %[temp0] \n\t"
332
"mul.ph %[temp3], %[temp3], %[temp1] \n\t"
333
"precrq.ph.w %[new_red], %[argb], %[argb1] \n\t"
334
"ins %[argb1], %[argb], 16, 16 \n\t"
335
"shra.ph %[temp5], %[temp5], 5 \n\t"
336
"shra.ph %[temp3], %[temp3], 5 \n\t"
337
"addu.ph %[new_red], %[new_red], %[temp5] \n\t"
338
"addu.ph %[argb1], %[argb1], %[temp3] \n\t"
339
"preceu.ph.qbra %[temp5], %[new_red] \n\t"
340
"shll.ph %[temp4], %[temp5], 8 \n\t"
341
"shra.ph %[temp4], %[temp4], 8 \n\t"
342
"mul.ph %[temp4], %[temp4], %[temp2] \n\t"
343
"sb %[temp5], -2(%[dst]) \n\t"
344
"sra %[temp5], %[temp5], 16 \n\t"
345
"shra.ph %[temp4], %[temp4], 5 \n\t"
346
"addu.ph %[argb1], %[argb1], %[temp4] \n\t"
347
"preceu.ph.qbra %[temp3], %[argb1] \n\t"
348
"sb %[temp5], -6(%[dst]) \n\t"
349
"sb %[temp3], -4(%[dst]) \n\t"
350
"sra %[temp3], %[temp3], 16 \n\t"
351
"bne %[src], %[p_loop_end], 0b \n\t"
352
" sb %[temp3], -8(%[dst]) \n\t"
353
"1: \n\t"
354
".set pop \n\t"
355
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
356
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
357
[new_red]"=&r"(new_red), [argb]"=&r"(argb),
358
[argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)
359
: [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
360
[G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
361
: "memory", "hi", "lo"
362
);
363
364
// Fall-back to C-version for left-overs.
365
if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);
366
}
367
368
static void ConvertBGRAToRGB_MIPSdspR2(const uint32_t* src,
369
int num_pixels, uint8_t* dst) {
370
int temp0, temp1, temp2, temp3;
371
const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
372
const uint32_t* const p_loop2_end = src + num_pixels;
373
__asm__ volatile (
374
".set push \n\t"
375
".set noreorder \n\t"
376
"beq %[src], %[p_loop1_end], 3f \n\t"
377
" nop \n\t"
378
"0: \n\t"
379
"lw %[temp3], 12(%[src]) \n\t"
380
"lw %[temp2], 8(%[src]) \n\t"
381
"lw %[temp1], 4(%[src]) \n\t"
382
"lw %[temp0], 0(%[src]) \n\t"
383
"ins %[temp3], %[temp2], 24, 8 \n\t"
384
"sll %[temp2], %[temp2], 8 \n\t"
385
"rotr %[temp3], %[temp3], 16 \n\t"
386
"ins %[temp2], %[temp1], 0, 16 \n\t"
387
"sll %[temp1], %[temp1], 8 \n\t"
388
"wsbh %[temp3], %[temp3] \n\t"
389
"balign %[temp0], %[temp1], 1 \n\t"
390
"wsbh %[temp2], %[temp2] \n\t"
391
"wsbh %[temp0], %[temp0] \n\t"
392
"usw %[temp3], 8(%[dst]) \n\t"
393
"rotr %[temp0], %[temp0], 16 \n\t"
394
"usw %[temp2], 4(%[dst]) \n\t"
395
"addiu %[src], %[src], 16 \n\t"
396
"usw %[temp0], 0(%[dst]) \n\t"
397
"bne %[src], %[p_loop1_end], 0b \n\t"
398
" addiu %[dst], %[dst], 12 \n\t"
399
"3: \n\t"
400
"beq %[src], %[p_loop2_end], 2f \n\t"
401
" nop \n\t"
402
"1: \n\t"
403
"lw %[temp0], 0(%[src]) \n\t"
404
"addiu %[src], %[src], 4 \n\t"
405
"wsbh %[temp1], %[temp0] \n\t"
406
"addiu %[dst], %[dst], 3 \n\t"
407
"ush %[temp1], -2(%[dst]) \n\t"
408
"sra %[temp0], %[temp0], 16 \n\t"
409
"bne %[src], %[p_loop2_end], 1b \n\t"
410
" sb %[temp0], -3(%[dst]) \n\t"
411
"2: \n\t"
412
".set pop \n\t"
413
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
414
[temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
415
: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
416
: "memory"
417
);
418
}
419
420
static void ConvertBGRAToRGBA_MIPSdspR2(const uint32_t* src,
421
int num_pixels, uint8_t* dst) {
422
int temp0, temp1, temp2, temp3;
423
const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
424
const uint32_t* const p_loop2_end = src + num_pixels;
425
__asm__ volatile (
426
".set push \n\t"
427
".set noreorder \n\t"
428
"beq %[src], %[p_loop1_end], 3f \n\t"
429
" nop \n\t"
430
"0: \n\t"
431
"lw %[temp0], 0(%[src]) \n\t"
432
"lw %[temp1], 4(%[src]) \n\t"
433
"lw %[temp2], 8(%[src]) \n\t"
434
"lw %[temp3], 12(%[src]) \n\t"
435
"wsbh %[temp0], %[temp0] \n\t"
436
"wsbh %[temp1], %[temp1] \n\t"
437
"wsbh %[temp2], %[temp2] \n\t"
438
"wsbh %[temp3], %[temp3] \n\t"
439
"addiu %[src], %[src], 16 \n\t"
440
"balign %[temp0], %[temp0], 1 \n\t"
441
"balign %[temp1], %[temp1], 1 \n\t"
442
"balign %[temp2], %[temp2], 1 \n\t"
443
"balign %[temp3], %[temp3], 1 \n\t"
444
"usw %[temp0], 0(%[dst]) \n\t"
445
"usw %[temp1], 4(%[dst]) \n\t"
446
"usw %[temp2], 8(%[dst]) \n\t"
447
"usw %[temp3], 12(%[dst]) \n\t"
448
"bne %[src], %[p_loop1_end], 0b \n\t"
449
" addiu %[dst], %[dst], 16 \n\t"
450
"3: \n\t"
451
"beq %[src], %[p_loop2_end], 2f \n\t"
452
" nop \n\t"
453
"1: \n\t"
454
"lw %[temp0], 0(%[src]) \n\t"
455
"wsbh %[temp0], %[temp0] \n\t"
456
"addiu %[src], %[src], 4 \n\t"
457
"balign %[temp0], %[temp0], 1 \n\t"
458
"usw %[temp0], 0(%[dst]) \n\t"
459
"bne %[src], %[p_loop2_end], 1b \n\t"
460
" addiu %[dst], %[dst], 4 \n\t"
461
"2: \n\t"
462
".set pop \n\t"
463
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
464
[temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
465
: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
466
: "memory"
467
);
468
}
469
470
static void ConvertBGRAToRGBA4444_MIPSdspR2(const uint32_t* src,
471
int num_pixels, uint8_t* dst) {
472
int temp0, temp1, temp2, temp3, temp4, temp5;
473
const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
474
const uint32_t* const p_loop2_end = src + num_pixels;
475
__asm__ volatile (
476
".set push \n\t"
477
".set noreorder \n\t"
478
"beq %[src], %[p_loop1_end], 3f \n\t"
479
" nop \n\t"
480
"0: \n\t"
481
"lw %[temp0], 0(%[src]) \n\t"
482
"lw %[temp1], 4(%[src]) \n\t"
483
"lw %[temp2], 8(%[src]) \n\t"
484
"lw %[temp3], 12(%[src]) \n\t"
485
"ext %[temp4], %[temp0], 28, 4 \n\t"
486
"ext %[temp5], %[temp0], 12, 4 \n\t"
487
"ins %[temp0], %[temp4], 0, 4 \n\t"
488
"ext %[temp4], %[temp1], 28, 4 \n\t"
489
"ins %[temp0], %[temp5], 16, 4 \n\t"
490
"ext %[temp5], %[temp1], 12, 4 \n\t"
491
"ins %[temp1], %[temp4], 0, 4 \n\t"
492
"ext %[temp4], %[temp2], 28, 4 \n\t"
493
"ins %[temp1], %[temp5], 16, 4 \n\t"
494
"ext %[temp5], %[temp2], 12, 4 \n\t"
495
"ins %[temp2], %[temp4], 0, 4 \n\t"
496
"ext %[temp4], %[temp3], 28, 4 \n\t"
497
"ins %[temp2], %[temp5], 16, 4 \n\t"
498
"ext %[temp5], %[temp3], 12, 4 \n\t"
499
"ins %[temp3], %[temp4], 0, 4 \n\t"
500
"precr.qb.ph %[temp1], %[temp1], %[temp0] \n\t"
501
"ins %[temp3], %[temp5], 16, 4 \n\t"
502
"addiu %[src], %[src], 16 \n\t"
503
"precr.qb.ph %[temp3], %[temp3], %[temp2] \n\t"
504
#if (WEBP_SWAP_16BIT_CSP == 1)
505
"usw %[temp1], 0(%[dst]) \n\t"
506
"usw %[temp3], 4(%[dst]) \n\t"
507
#else
508
"wsbh %[temp1], %[temp1] \n\t"
509
"wsbh %[temp3], %[temp3] \n\t"
510
"usw %[temp1], 0(%[dst]) \n\t"
511
"usw %[temp3], 4(%[dst]) \n\t"
512
#endif
513
"bne %[src], %[p_loop1_end], 0b \n\t"
514
" addiu %[dst], %[dst], 8 \n\t"
515
"3: \n\t"
516
"beq %[src], %[p_loop2_end], 2f \n\t"
517
" nop \n\t"
518
"1: \n\t"
519
"lw %[temp0], 0(%[src]) \n\t"
520
"ext %[temp4], %[temp0], 28, 4 \n\t"
521
"ext %[temp5], %[temp0], 12, 4 \n\t"
522
"ins %[temp0], %[temp4], 0, 4 \n\t"
523
"ins %[temp0], %[temp5], 16, 4 \n\t"
524
"addiu %[src], %[src], 4 \n\t"
525
"precr.qb.ph %[temp0], %[temp0], %[temp0] \n\t"
526
#if (WEBP_SWAP_16BIT_CSP == 1)
527
"ush %[temp0], 0(%[dst]) \n\t"
528
#else
529
"wsbh %[temp0], %[temp0] \n\t"
530
"ush %[temp0], 0(%[dst]) \n\t"
531
#endif
532
"bne %[src], %[p_loop2_end], 1b \n\t"
533
" addiu %[dst], %[dst], 2 \n\t"
534
"2: \n\t"
535
".set pop \n\t"
536
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
537
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
538
[dst]"+&r"(dst), [src]"+&r"(src)
539
: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
540
: "memory"
541
);
542
}
543
544
static void ConvertBGRAToRGB565_MIPSdspR2(const uint32_t* src,
545
int num_pixels, uint8_t* dst) {
546
int temp0, temp1, temp2, temp3, temp4, temp5;
547
const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
548
const uint32_t* const p_loop2_end = src + num_pixels;
549
__asm__ volatile (
550
".set push \n\t"
551
".set noreorder \n\t"
552
"beq %[src], %[p_loop1_end], 3f \n\t"
553
" nop \n\t"
554
"0: \n\t"
555
"lw %[temp0], 0(%[src]) \n\t"
556
"lw %[temp1], 4(%[src]) \n\t"
557
"lw %[temp2], 8(%[src]) \n\t"
558
"lw %[temp3], 12(%[src]) \n\t"
559
"ext %[temp4], %[temp0], 8, 16 \n\t"
560
"ext %[temp5], %[temp0], 5, 11 \n\t"
561
"ext %[temp0], %[temp0], 3, 5 \n\t"
562
"ins %[temp4], %[temp5], 0, 11 \n\t"
563
"ext %[temp5], %[temp1], 5, 11 \n\t"
564
"ins %[temp4], %[temp0], 0, 5 \n\t"
565
"ext %[temp0], %[temp1], 8, 16 \n\t"
566
"ext %[temp1], %[temp1], 3, 5 \n\t"
567
"ins %[temp0], %[temp5], 0, 11 \n\t"
568
"ext %[temp5], %[temp2], 5, 11 \n\t"
569
"ins %[temp0], %[temp1], 0, 5 \n\t"
570
"ext %[temp1], %[temp2], 8, 16 \n\t"
571
"ext %[temp2], %[temp2], 3, 5 \n\t"
572
"ins %[temp1], %[temp5], 0, 11 \n\t"
573
"ext %[temp5], %[temp3], 5, 11 \n\t"
574
"ins %[temp1], %[temp2], 0, 5 \n\t"
575
"ext %[temp2], %[temp3], 8, 16 \n\t"
576
"ext %[temp3], %[temp3], 3, 5 \n\t"
577
"ins %[temp2], %[temp5], 0, 11 \n\t"
578
"append %[temp0], %[temp4], 16 \n\t"
579
"ins %[temp2], %[temp3], 0, 5 \n\t"
580
"addiu %[src], %[src], 16 \n\t"
581
"append %[temp2], %[temp1], 16 \n\t"
582
#if (WEBP_SWAP_16BIT_CSP == 1)
583
"usw %[temp0], 0(%[dst]) \n\t"
584
"usw %[temp2], 4(%[dst]) \n\t"
585
#else
586
"wsbh %[temp0], %[temp0] \n\t"
587
"wsbh %[temp2], %[temp2] \n\t"
588
"usw %[temp0], 0(%[dst]) \n\t"
589
"usw %[temp2], 4(%[dst]) \n\t"
590
#endif
591
"bne %[src], %[p_loop1_end], 0b \n\t"
592
" addiu %[dst], %[dst], 8 \n\t"
593
"3: \n\t"
594
"beq %[src], %[p_loop2_end], 2f \n\t"
595
" nop \n\t"
596
"1: \n\t"
597
"lw %[temp0], 0(%[src]) \n\t"
598
"ext %[temp4], %[temp0], 8, 16 \n\t"
599
"ext %[temp5], %[temp0], 5, 11 \n\t"
600
"ext %[temp0], %[temp0], 3, 5 \n\t"
601
"ins %[temp4], %[temp5], 0, 11 \n\t"
602
"addiu %[src], %[src], 4 \n\t"
603
"ins %[temp4], %[temp0], 0, 5 \n\t"
604
#if (WEBP_SWAP_16BIT_CSP == 1)
605
"ush %[temp4], 0(%[dst]) \n\t"
606
#else
607
"wsbh %[temp4], %[temp4] \n\t"
608
"ush %[temp4], 0(%[dst]) \n\t"
609
#endif
610
"bne %[src], %[p_loop2_end], 1b \n\t"
611
" addiu %[dst], %[dst], 2 \n\t"
612
"2: \n\t"
613
".set pop \n\t"
614
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
615
[temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
616
[dst]"+&r"(dst), [src]"+&r"(src)
617
: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
618
: "memory"
619
);
620
}
621
622
static void ConvertBGRAToBGR_MIPSdspR2(const uint32_t* src,
623
int num_pixels, uint8_t* dst) {
624
int temp0, temp1, temp2, temp3;
625
const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
626
const uint32_t* const p_loop2_end = src + num_pixels;
627
__asm__ volatile (
628
".set push \n\t"
629
".set noreorder \n\t"
630
"beq %[src], %[p_loop1_end], 3f \n\t"
631
" nop \n\t"
632
"0: \n\t"
633
"lw %[temp0], 0(%[src]) \n\t"
634
"lw %[temp1], 4(%[src]) \n\t"
635
"lw %[temp2], 8(%[src]) \n\t"
636
"lw %[temp3], 12(%[src]) \n\t"
637
"ins %[temp0], %[temp1], 24, 8 \n\t"
638
"sra %[temp1], %[temp1], 8 \n\t"
639
"ins %[temp1], %[temp2], 16, 16 \n\t"
640
"sll %[temp2], %[temp2], 8 \n\t"
641
"balign %[temp3], %[temp2], 1 \n\t"
642
"addiu %[src], %[src], 16 \n\t"
643
"usw %[temp0], 0(%[dst]) \n\t"
644
"usw %[temp1], 4(%[dst]) \n\t"
645
"usw %[temp3], 8(%[dst]) \n\t"
646
"bne %[src], %[p_loop1_end], 0b \n\t"
647
" addiu %[dst], %[dst], 12 \n\t"
648
"3: \n\t"
649
"beq %[src], %[p_loop2_end], 2f \n\t"
650
" nop \n\t"
651
"1: \n\t"
652
"lw %[temp0], 0(%[src]) \n\t"
653
"addiu %[src], %[src], 4 \n\t"
654
"addiu %[dst], %[dst], 3 \n\t"
655
"ush %[temp0], -3(%[dst]) \n\t"
656
"sra %[temp0], %[temp0], 16 \n\t"
657
"bne %[src], %[p_loop2_end], 1b \n\t"
658
" sb %[temp0], -1(%[dst]) \n\t"
659
"2: \n\t"
660
".set pop \n\t"
661
: [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
662
[temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
663
: [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
664
: "memory"
665
);
666
}
667
668
//------------------------------------------------------------------------------
669
// Entry point
670
671
extern void VP8LDspInitMIPSdspR2(void);
672
673
WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
674
VP8LMapColor32b = MapARGB_MIPSdspR2;
675
VP8LMapColor8b = MapAlpha_MIPSdspR2;
676
677
VP8LPredictors[5] = Predictor5_MIPSdspR2;
678
VP8LPredictors[6] = Predictor6_MIPSdspR2;
679
VP8LPredictors[7] = Predictor7_MIPSdspR2;
680
VP8LPredictors[8] = Predictor8_MIPSdspR2;
681
VP8LPredictors[9] = Predictor9_MIPSdspR2;
682
VP8LPredictors[10] = Predictor10_MIPSdspR2;
683
VP8LPredictors[11] = Predictor11_MIPSdspR2;
684
VP8LPredictors[12] = Predictor12_MIPSdspR2;
685
VP8LPredictors[13] = Predictor13_MIPSdspR2;
686
687
VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_MIPSdspR2;
688
VP8LTransformColorInverse = TransformColorInverse_MIPSdspR2;
689
690
VP8LConvertBGRAToRGB = ConvertBGRAToRGB_MIPSdspR2;
691
VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_MIPSdspR2;
692
VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_MIPSdspR2;
693
VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_MIPSdspR2;
694
VP8LConvertBGRAToBGR = ConvertBGRAToBGR_MIPSdspR2;
695
}
696
697
#else // !WEBP_USE_MIPS_DSP_R2
698
699
WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)
700
701
#endif // WEBP_USE_MIPS_DSP_R2
702
703