CoCalc -- lossless_mips_dsp

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/libwebp/src/dsp/lossless_mips_dsp_r2.c
⁹⁹¹³ views
1
// Copyright 2014 Google Inc. All Rights Reserved.
2
//
3
// Use of this source code is governed by a BSD-style license
4
// that can be found in the COPYING file in the root of the source
5
// tree. An additional intellectual property rights grant can be found
6
// in the file PATENTS. All contributing project authors may
7
// be found in the AUTHORS file in the root of the source tree.
8
// -----------------------------------------------------------------------------
9
//
10
// Image transforms and color space conversion methods for lossless decoder.
11
//
12
// Author(s):  Djordje Pesut    ([email protected])
13
//             Jovan Zelincevic ([email protected])
14

15
#include "src/dsp/dsp.h"
16

17
#if defined(WEBP_USE_MIPS_DSP_R2)
18

19
#include "src/dsp/lossless.h"
20
#include "src/dsp/lossless_common.h"
21

22
#define MAP_COLOR_FUNCS(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE)                 \
23
static void FUNC_NAME(const TYPE* src,                                         \
24
                      const uint32_t* const color_map,                         \
25
                      TYPE* dst, int y_start, int y_end,                       \
26
                      int width) {                                             \
27
  int y;                                                                       \
28
  for (y = y_start; y < y_end; ++y) {                                          \
29
    int x;                                                                     \
30
    for (x = 0; x < (width >> 2); ++x) {                                       \
31
      int tmp1, tmp2, tmp3, tmp4;                                              \
32
      __asm__ volatile (                                                       \
33
      ".ifc        " #TYPE ",  uint8_t                  \n\t"                  \
34
        "lbu       %[tmp1],  0(%[src])                  \n\t"                  \
35
        "lbu       %[tmp2],  1(%[src])                  \n\t"                  \
36
        "lbu       %[tmp3],  2(%[src])                  \n\t"                  \
37
        "lbu       %[tmp4],  3(%[src])                  \n\t"                  \
38
        "addiu     %[src],   %[src],      4             \n\t"                  \
39
      ".endif                                           \n\t"                  \
40
      ".ifc        " #TYPE ",  uint32_t                 \n\t"                  \
41
        "lw        %[tmp1],  0(%[src])                  \n\t"                  \
42
        "lw        %[tmp2],  4(%[src])                  \n\t"                  \
43
        "lw        %[tmp3],  8(%[src])                  \n\t"                  \
44
        "lw        %[tmp4],  12(%[src])                 \n\t"                  \
45
        "ext       %[tmp1],  %[tmp1],     8,        8   \n\t"                  \
46
        "ext       %[tmp2],  %[tmp2],     8,        8   \n\t"                  \
47
        "ext       %[tmp3],  %[tmp3],     8,        8   \n\t"                  \
48
        "ext       %[tmp4],  %[tmp4],     8,        8   \n\t"                  \
49
        "addiu     %[src],   %[src],      16            \n\t"                  \
50
      ".endif                                           \n\t"                  \
51
        "sll       %[tmp1],  %[tmp1],     2             \n\t"                  \
52
        "sll       %[tmp2],  %[tmp2],     2             \n\t"                  \
53
        "sll       %[tmp3],  %[tmp3],     2             \n\t"                  \
54
        "sll       %[tmp4],  %[tmp4],     2             \n\t"                  \
55
        "lwx       %[tmp1],  %[tmp1](%[color_map])      \n\t"                  \
56
        "lwx       %[tmp2],  %[tmp2](%[color_map])      \n\t"                  \
57
        "lwx       %[tmp3],  %[tmp3](%[color_map])      \n\t"                  \
58
        "lwx       %[tmp4],  %[tmp4](%[color_map])      \n\t"                  \
59
      ".ifc        " #TYPE ",  uint8_t                  \n\t"                  \
60
        "ext       %[tmp1],  %[tmp1],     8,        8   \n\t"                  \
61
        "ext       %[tmp2],  %[tmp2],     8,        8   \n\t"                  \
62
        "ext       %[tmp3],  %[tmp3],     8,        8   \n\t"                  \
63
        "ext       %[tmp4],  %[tmp4],     8,        8   \n\t"                  \
64
        "sb        %[tmp1],  0(%[dst])                  \n\t"                  \
65
        "sb        %[tmp2],  1(%[dst])                  \n\t"                  \
66
        "sb        %[tmp3],  2(%[dst])                  \n\t"                  \
67
        "sb        %[tmp4],  3(%[dst])                  \n\t"                  \
68
        "addiu     %[dst],   %[dst],      4             \n\t"                  \
69
      ".endif                                           \n\t"                  \
70
      ".ifc        " #TYPE ",  uint32_t                 \n\t"                  \
71
        "sw        %[tmp1],  0(%[dst])                  \n\t"                  \
72
        "sw        %[tmp2],  4(%[dst])                  \n\t"                  \
73
        "sw        %[tmp3],  8(%[dst])                  \n\t"                  \
74
        "sw        %[tmp4],  12(%[dst])                 \n\t"                  \
75
        "addiu     %[dst],   %[dst],      16            \n\t"                  \
76
      ".endif                                           \n\t"                  \
77
        : [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2), [tmp3]"=&r"(tmp3),             \
78
          [tmp4]"=&r"(tmp4), [src]"+&r"(src), [dst]"+r"(dst)                   \
79
        : [color_map]"r"(color_map)                                            \
80
        : "memory"                                                             \
81
      );                                                                       \
82
    }                                                                          \
83
    for (x = 0; x < (width & 3); ++x) {                                        \
84
      *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]);                        \
85
    }                                                                          \
86
  }                                                                            \
87
}
88

89
MAP_COLOR_FUNCS(MapARGB_MIPSdspR2, uint32_t, VP8GetARGBIndex, VP8GetARGBValue)
90
MAP_COLOR_FUNCS(MapAlpha_MIPSdspR2, uint8_t, VP8GetAlphaIndex, VP8GetAlphaValue)
91

92
#undef MAP_COLOR_FUNCS
93

94
static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
95
                                                   uint32_t c2) {
96
  int temp0, temp1, temp2, temp3, temp4, temp5;
97
  __asm__ volatile (
98
    "preceu.ph.qbr   %[temp1],   %[c0]                 \n\t"
99
    "preceu.ph.qbl   %[temp2],   %[c0]                 \n\t"
100
    "preceu.ph.qbr   %[temp3],   %[c1]                 \n\t"
101
    "preceu.ph.qbl   %[temp4],   %[c1]                 \n\t"
102
    "preceu.ph.qbr   %[temp5],   %[c2]                 \n\t"
103
    "preceu.ph.qbl   %[temp0],   %[c2]                 \n\t"
104
    "subq.ph         %[temp3],   %[temp3],   %[temp5]  \n\t"
105
    "subq.ph         %[temp4],   %[temp4],   %[temp0]  \n\t"
106
    "addq.ph         %[temp1],   %[temp1],   %[temp3]  \n\t"
107
    "addq.ph         %[temp2],   %[temp2],   %[temp4]  \n\t"
108
    "shll_s.ph       %[temp1],   %[temp1],   7         \n\t"
109
    "shll_s.ph       %[temp2],   %[temp2],   7         \n\t"
110
    "precrqu_s.qb.ph %[temp2],   %[temp2],   %[temp1]  \n\t"
111
    : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
112
      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5)
113
    : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
114
    : "memory"
115
  );
116
  return temp2;
117
}
118

119
static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
120
                                                   uint32_t c2) {
121
  int temp0, temp1, temp2, temp3, temp4, temp5;
122
  __asm__ volatile (
123
    "adduh.qb         %[temp5],   %[c0],      %[c1]       \n\t"
124
    "preceu.ph.qbr    %[temp3],   %[c2]                   \n\t"
125
    "preceu.ph.qbr    %[temp1],   %[temp5]                \n\t"
126
    "preceu.ph.qbl    %[temp2],   %[temp5]                \n\t"
127
    "preceu.ph.qbl    %[temp4],   %[c2]                   \n\t"
128
    "subq.ph          %[temp3],   %[temp1],   %[temp3]    \n\t"
129
    "subq.ph          %[temp4],   %[temp2],   %[temp4]    \n\t"
130
    "shrl.ph          %[temp5],   %[temp3],   15          \n\t"
131
    "shrl.ph          %[temp0],   %[temp4],   15          \n\t"
132
    "addq.ph          %[temp3],   %[temp3],   %[temp5]    \n\t"
133
    "addq.ph          %[temp4],   %[temp0],   %[temp4]    \n\t"
134
    "shra.ph          %[temp3],   %[temp3],   1           \n\t"
135
    "shra.ph          %[temp4],   %[temp4],   1           \n\t"
136
    "addq.ph          %[temp1],   %[temp1],   %[temp3]    \n\t"
137
    "addq.ph          %[temp2],   %[temp2],   %[temp4]    \n\t"
138
    "shll_s.ph        %[temp1],   %[temp1],   7           \n\t"
139
    "shll_s.ph        %[temp2],   %[temp2],   7           \n\t"
140
    "precrqu_s.qb.ph  %[temp1],   %[temp2],   %[temp1]    \n\t"
141
    : [temp0]"=r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
142
      [temp3]"=&r"(temp3), [temp4]"=r"(temp4), [temp5]"=&r"(temp5)
143
    : [c0]"r"(c0), [c1]"r"(c1), [c2]"r"(c2)
144
    : "memory"
145
  );
146
  return temp1;
147
}
148

149
static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
150
  int temp0, temp1, temp2, temp3, temp4, temp5;
151
  __asm__ volatile (
152
    "cmpgdu.lt.qb %[temp1], %[c],     %[b]             \n\t"
153
    "pick.qb      %[temp1], %[b],     %[c]             \n\t"
154
    "pick.qb      %[temp2], %[c],     %[b]             \n\t"
155
    "cmpgdu.lt.qb %[temp4], %[c],     %[a]             \n\t"
156
    "pick.qb      %[temp4], %[a],     %[c]             \n\t"
157
    "pick.qb      %[temp5], %[c],     %[a]             \n\t"
158
    "subu.qb      %[temp3], %[temp1], %[temp2]         \n\t"
159
    "subu.qb      %[temp0], %[temp4], %[temp5]         \n\t"
160
    "raddu.w.qb   %[temp3], %[temp3]                   \n\t"
161
    "raddu.w.qb   %[temp0], %[temp0]                   \n\t"
162
    "subu         %[temp3], %[temp3], %[temp0]         \n\t"
163
    "slti         %[temp0], %[temp3], 0x1              \n\t"
164
    "movz         %[a],     %[b],     %[temp0]         \n\t"
165
    : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
166
      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp0]"=&r"(temp0),
167
      [a]"+&r"(a)
168
    : [b]"r"(b), [c]"r"(c)
169
  );
170
  return a;
171
}
172

173
static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
174
  __asm__ volatile (
175
    "adduh.qb    %[a0], %[a0], %[a1]       \n\t"
176
    : [a0]"+r"(a0)
177
    : [a1]"r"(a1)
178
  );
179
  return a0;
180
}
181

182
static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
183
  return Average2(Average2(a0, a2), a1);
184
}
185

186
static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
187
                                     uint32_t a2, uint32_t a3) {
188
  return Average2(Average2(a0, a1), Average2(a2, a3));
189
}
190

191
static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,
192
                                     const uint32_t* const top) {
193
  return Average3(*left, top[0], top[1]);
194
}
195

196
static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,
197
                                     const uint32_t* const top) {
198
  return Average2(*left, top[-1]);
199
}
200

201
static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,
202
                                     const uint32_t* const top) {
203
  return Average2(*left, top[0]);
204
}
205

206
static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,
207
                                     const uint32_t* const top) {
208
  (void)left;
209
  return Average2(top[-1], top[0]);
210
}
211

212
static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,
213
                                     const uint32_t* const top) {
214
  (void)left;
215
  return Average2(top[0], top[1]);
216
}
217

218
static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,
219
                                      const uint32_t* const top) {
220
  return Average4(*left, top[-1], top[0], top[1]);
221
}
222

223
static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,
224
                                      const uint32_t* const top) {
225
  return Select(top[0], *left, top[-1]);
226
}
227

228
static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,
229
                                      const uint32_t* const top) {
230
  return ClampedAddSubtractFull(*left, top[0], top[-1]);
231
}
232

233
static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,
234
                                      const uint32_t* const top) {
235
  return ClampedAddSubtractHalf(*left, top[0], top[-1]);
236
}
237

238
// Add green to blue and red channels (i.e. perform the inverse transform of
239
// 'subtract green').
240
static void AddGreenToBlueAndRed_MIPSdspR2(const uint32_t* src, int num_pixels,
241
                                           uint32_t* dst) {
242
  uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
243
  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
244
  const uint32_t* const p_loop2_end = src + num_pixels;
245
  __asm__ volatile (
246
    ".set       push                                          \n\t"
247
    ".set       noreorder                                     \n\t"
248
    "beq        %[src],          %[p_loop1_end],     3f       \n\t"
249
    " nop                                                     \n\t"
250
  "0:                                                         \n\t"
251
    "lw         %[temp0],        0(%[src])                    \n\t"
252
    "lw         %[temp1],        4(%[src])                    \n\t"
253
    "lw         %[temp2],        8(%[src])                    \n\t"
254
    "lw         %[temp3],        12(%[src])                   \n\t"
255
    "ext        %[temp4],        %[temp0],           8,    8  \n\t"
256
    "ext        %[temp5],        %[temp1],           8,    8  \n\t"
257
    "ext        %[temp6],        %[temp2],           8,    8  \n\t"
258
    "ext        %[temp7],        %[temp3],           8,    8  \n\t"
259
    "addiu      %[src],          %[src],             16       \n\t"
260
    "addiu      %[dst],          %[dst],             16       \n\t"
261
    "replv.ph   %[temp4],        %[temp4]                     \n\t"
262
    "replv.ph   %[temp5],        %[temp5]                     \n\t"
263
    "replv.ph   %[temp6],        %[temp6]                     \n\t"
264
    "replv.ph   %[temp7],        %[temp7]                     \n\t"
265
    "addu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
266
    "addu.qb    %[temp1],        %[temp1],           %[temp5] \n\t"
267
    "addu.qb    %[temp2],        %[temp2],           %[temp6] \n\t"
268
    "addu.qb    %[temp3],        %[temp3],           %[temp7] \n\t"
269
    "sw         %[temp0],        -16(%[dst])                  \n\t"
270
    "sw         %[temp1],        -12(%[dst])                  \n\t"
271
    "sw         %[temp2],        -8(%[dst])                   \n\t"
272
    "bne        %[src],          %[p_loop1_end],     0b       \n\t"
273
    " sw        %[temp3],        -4(%[dst])                   \n\t"
274
  "3:                                                         \n\t"
275
    "beq        %[src],          %[p_loop2_end],     2f       \n\t"
276
    " nop                                                     \n\t"
277
  "1:                                                         \n\t"
278
    "lw         %[temp0],        0(%[src])                    \n\t"
279
    "addiu      %[src],          %[src],             4        \n\t"
280
    "addiu      %[dst],          %[dst],             4        \n\t"
281
    "ext        %[temp4],        %[temp0],           8,    8  \n\t"
282
    "replv.ph   %[temp4],        %[temp4]                     \n\t"
283
    "addu.qb    %[temp0],        %[temp0],           %[temp4] \n\t"
284
    "bne        %[src],          %[p_loop2_end],     1b       \n\t"
285
    " sw        %[temp0],        -4(%[dst])                   \n\t"
286
  "2:                                                         \n\t"
287
    ".set       pop                                           \n\t"
288
    : [dst]"+&r"(dst), [src]"+&r"(src), [temp0]"=&r"(temp0),
289
      [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
290
      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6),
291
      [temp7]"=&r"(temp7)
292
    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
293
    : "memory"
294
  );
295
}
296

297
static void TransformColorInverse_MIPSdspR2(const VP8LMultipliers* const m,
298
                                            const uint32_t* src, int num_pixels,
299
                                            uint32_t* dst) {
300
  int temp0, temp1, temp2, temp3, temp4, temp5;
301
  uint32_t argb, argb1, new_red;
302
  const uint32_t G_to_R = m->green_to_red_;
303
  const uint32_t G_to_B = m->green_to_blue_;
304
  const uint32_t R_to_B = m->red_to_blue_;
305
  const uint32_t* const p_loop_end = src + (num_pixels & ~1);
306
  __asm__ volatile (
307
    ".set            push                                    \n\t"
308
    ".set            noreorder                               \n\t"
309
    "beq             %[src],       %[p_loop_end],  1f        \n\t"
310
    " nop                                                    \n\t"
311
    "replv.ph        %[temp0],     %[G_to_R]                 \n\t"
312
    "replv.ph        %[temp1],     %[G_to_B]                 \n\t"
313
    "replv.ph        %[temp2],     %[R_to_B]                 \n\t"
314
    "shll.ph         %[temp0],     %[temp0],       8         \n\t"
315
    "shll.ph         %[temp1],     %[temp1],       8         \n\t"
316
    "shll.ph         %[temp2],     %[temp2],       8         \n\t"
317
    "shra.ph         %[temp0],     %[temp0],       8         \n\t"
318
    "shra.ph         %[temp1],     %[temp1],       8         \n\t"
319
    "shra.ph         %[temp2],     %[temp2],       8         \n\t"
320
  "0:                                                        \n\t"
321
    "lw              %[argb],      0(%[src])                 \n\t"
322
    "lw              %[argb1],     4(%[src])                 \n\t"
323
    "sw              %[argb],      0(%[dst])                 \n\t"
324
    "sw              %[argb1],     4(%[dst])                 \n\t"
325
    "addiu           %[src],       %[src],         8         \n\t"
326
    "addiu           %[dst],       %[dst],         8         \n\t"
327
    "precrq.qb.ph    %[temp3],     %[argb],        %[argb1]  \n\t"
328
    "preceu.ph.qbra  %[temp3],     %[temp3]                  \n\t"
329
    "shll.ph         %[temp3],     %[temp3],       8         \n\t"
330
    "shra.ph         %[temp3],     %[temp3],       8         \n\t"
331
    "mul.ph          %[temp5],     %[temp3],       %[temp0]  \n\t"
332
    "mul.ph          %[temp3],     %[temp3],       %[temp1]  \n\t"
333
    "precrq.ph.w     %[new_red],   %[argb],        %[argb1]  \n\t"
334
    "ins             %[argb1],     %[argb],        16,   16  \n\t"
335
    "shra.ph         %[temp5],     %[temp5],       5         \n\t"
336
    "shra.ph         %[temp3],     %[temp3],       5         \n\t"
337
    "addu.ph         %[new_red],   %[new_red],     %[temp5]  \n\t"
338
    "addu.ph         %[argb1],     %[argb1],       %[temp3]  \n\t"
339
    "preceu.ph.qbra  %[temp5],     %[new_red]                \n\t"
340
    "shll.ph         %[temp4],     %[temp5],       8         \n\t"
341
    "shra.ph         %[temp4],     %[temp4],       8         \n\t"
342
    "mul.ph          %[temp4],     %[temp4],       %[temp2]  \n\t"
343
    "sb              %[temp5],     -2(%[dst])                \n\t"
344
    "sra             %[temp5],     %[temp5],       16        \n\t"
345
    "shra.ph         %[temp4],     %[temp4],       5         \n\t"
346
    "addu.ph         %[argb1],     %[argb1],       %[temp4]  \n\t"
347
    "preceu.ph.qbra  %[temp3],     %[argb1]                  \n\t"
348
    "sb              %[temp5],     -6(%[dst])                \n\t"
349
    "sb              %[temp3],     -4(%[dst])                \n\t"
350
    "sra             %[temp3],     %[temp3],       16        \n\t"
351
    "bne             %[src],       %[p_loop_end],  0b        \n\t"
352
    " sb             %[temp3],     -8(%[dst])                \n\t"
353
  "1:                                                        \n\t"
354
    ".set            pop                                     \n\t"
355
    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
356
      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
357
      [new_red]"=&r"(new_red), [argb]"=&r"(argb),
358
      [argb1]"=&r"(argb1), [dst]"+&r"(dst), [src]"+&r"(src)
359
    : [G_to_R]"r"(G_to_R), [R_to_B]"r"(R_to_B),
360
      [G_to_B]"r"(G_to_B), [p_loop_end]"r"(p_loop_end)
361
    : "memory", "hi", "lo"
362
  );
363

364
  // Fall-back to C-version for left-overs.
365
  if (num_pixels & 1) VP8LTransformColorInverse_C(m, src, 1, dst);
366
}
367

368
static void ConvertBGRAToRGB_MIPSdspR2(const uint32_t* src,
369
                                       int num_pixels, uint8_t* dst) {
370
  int temp0, temp1, temp2, temp3;
371
  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
372
  const uint32_t* const p_loop2_end = src + num_pixels;
373
  __asm__ volatile (
374
    ".set       push                                       \n\t"
375
    ".set       noreorder                                  \n\t"
376
    "beq        %[src],      %[p_loop1_end],    3f         \n\t"
377
    " nop                                                  \n\t"
378
  "0:                                                      \n\t"
379
    "lw         %[temp3],    12(%[src])                    \n\t"
380
    "lw         %[temp2],    8(%[src])                     \n\t"
381
    "lw         %[temp1],    4(%[src])                     \n\t"
382
    "lw         %[temp0],    0(%[src])                     \n\t"
383
    "ins        %[temp3],    %[temp2],          24,   8    \n\t"
384
    "sll        %[temp2],    %[temp2],          8          \n\t"
385
    "rotr       %[temp3],    %[temp3],          16         \n\t"
386
    "ins        %[temp2],    %[temp1],          0,    16   \n\t"
387
    "sll        %[temp1],    %[temp1],          8          \n\t"
388
    "wsbh       %[temp3],    %[temp3]                      \n\t"
389
    "balign     %[temp0],    %[temp1],          1          \n\t"
390
    "wsbh       %[temp2],    %[temp2]                      \n\t"
391
    "wsbh       %[temp0],    %[temp0]                      \n\t"
392
    "usw        %[temp3],    8(%[dst])                     \n\t"
393
    "rotr       %[temp0],    %[temp0],          16         \n\t"
394
    "usw        %[temp2],    4(%[dst])                     \n\t"
395
    "addiu      %[src],      %[src],            16         \n\t"
396
    "usw        %[temp0],    0(%[dst])                     \n\t"
397
    "bne        %[src],      %[p_loop1_end],    0b         \n\t"
398
    " addiu     %[dst],      %[dst],            12         \n\t"
399
  "3:                                                      \n\t"
400
    "beq        %[src],      %[p_loop2_end],    2f         \n\t"
401
    " nop                                                  \n\t"
402
  "1:                                                      \n\t"
403
    "lw         %[temp0],    0(%[src])                     \n\t"
404
    "addiu      %[src],      %[src],            4          \n\t"
405
    "wsbh       %[temp1],    %[temp0]                      \n\t"
406
    "addiu      %[dst],      %[dst],            3          \n\t"
407
    "ush        %[temp1],    -2(%[dst])                    \n\t"
408
    "sra        %[temp0],    %[temp0],          16         \n\t"
409
    "bne        %[src],      %[p_loop2_end],    1b         \n\t"
410
    " sb        %[temp0],    -3(%[dst])                    \n\t"
411
  "2:                                                      \n\t"
412
    ".set       pop                                        \n\t"
413
    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
414
      [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
415
    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
416
    : "memory"
417
  );
418
}
419

420
static void ConvertBGRAToRGBA_MIPSdspR2(const uint32_t* src,
421
                                        int num_pixels, uint8_t* dst) {
422
  int temp0, temp1, temp2, temp3;
423
  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
424
  const uint32_t* const p_loop2_end = src + num_pixels;
425
  __asm__ volatile (
426
    ".set       push                                       \n\t"
427
    ".set       noreorder                                  \n\t"
428
    "beq        %[src],      %[p_loop1_end],    3f         \n\t"
429
    " nop                                                  \n\t"
430
  "0:                                                      \n\t"
431
    "lw         %[temp0],    0(%[src])                     \n\t"
432
    "lw         %[temp1],    4(%[src])                     \n\t"
433
    "lw         %[temp2],    8(%[src])                     \n\t"
434
    "lw         %[temp3],    12(%[src])                    \n\t"
435
    "wsbh       %[temp0],    %[temp0]                      \n\t"
436
    "wsbh       %[temp1],    %[temp1]                      \n\t"
437
    "wsbh       %[temp2],    %[temp2]                      \n\t"
438
    "wsbh       %[temp3],    %[temp3]                      \n\t"
439
    "addiu      %[src],      %[src],            16         \n\t"
440
    "balign     %[temp0],    %[temp0],          1          \n\t"
441
    "balign     %[temp1],    %[temp1],          1          \n\t"
442
    "balign     %[temp2],    %[temp2],          1          \n\t"
443
    "balign     %[temp3],    %[temp3],          1          \n\t"
444
    "usw        %[temp0],    0(%[dst])                     \n\t"
445
    "usw        %[temp1],    4(%[dst])                     \n\t"
446
    "usw        %[temp2],    8(%[dst])                     \n\t"
447
    "usw        %[temp3],    12(%[dst])                    \n\t"
448
    "bne        %[src],      %[p_loop1_end],    0b         \n\t"
449
    " addiu     %[dst],      %[dst],            16         \n\t"
450
  "3:                                                      \n\t"
451
    "beq        %[src],      %[p_loop2_end],    2f         \n\t"
452
    " nop                                                  \n\t"
453
  "1:                                                      \n\t"
454
    "lw         %[temp0],    0(%[src])                     \n\t"
455
    "wsbh       %[temp0],    %[temp0]                      \n\t"
456
    "addiu      %[src],      %[src],            4          \n\t"
457
    "balign     %[temp0],    %[temp0],          1          \n\t"
458
    "usw        %[temp0],    0(%[dst])                     \n\t"
459
    "bne        %[src],      %[p_loop2_end],    1b         \n\t"
460
    " addiu     %[dst],      %[dst],            4          \n\t"
461
  "2:                                                      \n\t"
462
    ".set       pop                                        \n\t"
463
    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
464
      [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
465
    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
466
    : "memory"
467
  );
468
}
469

470
static void ConvertBGRAToRGBA4444_MIPSdspR2(const uint32_t* src,
471
                                            int num_pixels, uint8_t* dst) {
472
  int temp0, temp1, temp2, temp3, temp4, temp5;
473
  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
474
  const uint32_t* const p_loop2_end = src + num_pixels;
475
  __asm__ volatile (
476
    ".set           push                                       \n\t"
477
    ".set           noreorder                                  \n\t"
478
    "beq            %[src],      %[p_loop1_end],    3f         \n\t"
479
    " nop                                                      \n\t"
480
  "0:                                                          \n\t"
481
    "lw             %[temp0],    0(%[src])                     \n\t"
482
    "lw             %[temp1],    4(%[src])                     \n\t"
483
    "lw             %[temp2],    8(%[src])                     \n\t"
484
    "lw             %[temp3],    12(%[src])                    \n\t"
485
    "ext            %[temp4],    %[temp0],          28,   4    \n\t"
486
    "ext            %[temp5],    %[temp0],          12,   4    \n\t"
487
    "ins            %[temp0],    %[temp4],          0,    4    \n\t"
488
    "ext            %[temp4],    %[temp1],          28,   4    \n\t"
489
    "ins            %[temp0],    %[temp5],          16,   4    \n\t"
490
    "ext            %[temp5],    %[temp1],          12,   4    \n\t"
491
    "ins            %[temp1],    %[temp4],          0,    4    \n\t"
492
    "ext            %[temp4],    %[temp2],          28,   4    \n\t"
493
    "ins            %[temp1],    %[temp5],          16,   4    \n\t"
494
    "ext            %[temp5],    %[temp2],          12,   4    \n\t"
495
    "ins            %[temp2],    %[temp4],          0,    4    \n\t"
496
    "ext            %[temp4],    %[temp3],          28,   4    \n\t"
497
    "ins            %[temp2],    %[temp5],          16,   4    \n\t"
498
    "ext            %[temp5],    %[temp3],          12,   4    \n\t"
499
    "ins            %[temp3],    %[temp4],          0,    4    \n\t"
500
    "precr.qb.ph    %[temp1],    %[temp1],          %[temp0]   \n\t"
501
    "ins            %[temp3],    %[temp5],          16,   4    \n\t"
502
    "addiu          %[src],      %[src],            16         \n\t"
503
    "precr.qb.ph    %[temp3],    %[temp3],          %[temp2]   \n\t"
504
#if (WEBP_SWAP_16BIT_CSP == 1)
505
    "usw            %[temp1],    0(%[dst])                     \n\t"
506
    "usw            %[temp3],    4(%[dst])                     \n\t"
507
#else
508
    "wsbh           %[temp1],    %[temp1]                      \n\t"
509
    "wsbh           %[temp3],    %[temp3]                      \n\t"
510
    "usw            %[temp1],    0(%[dst])                     \n\t"
511
    "usw            %[temp3],    4(%[dst])                     \n\t"
512
#endif
513
    "bne            %[src],      %[p_loop1_end],    0b         \n\t"
514
    " addiu         %[dst],      %[dst],            8          \n\t"
515
  "3:                                                          \n\t"
516
    "beq            %[src],      %[p_loop2_end],    2f         \n\t"
517
    " nop                                                      \n\t"
518
  "1:                                                          \n\t"
519
    "lw             %[temp0],    0(%[src])                     \n\t"
520
    "ext            %[temp4],    %[temp0],          28,   4    \n\t"
521
    "ext            %[temp5],    %[temp0],          12,   4    \n\t"
522
    "ins            %[temp0],    %[temp4],          0,    4    \n\t"
523
    "ins            %[temp0],    %[temp5],          16,   4    \n\t"
524
    "addiu          %[src],      %[src],            4          \n\t"
525
    "precr.qb.ph    %[temp0],    %[temp0],          %[temp0]   \n\t"
526
#if (WEBP_SWAP_16BIT_CSP == 1)
527
    "ush            %[temp0],    0(%[dst])                     \n\t"
528
#else
529
    "wsbh           %[temp0],    %[temp0]                      \n\t"
530
    "ush            %[temp0],    0(%[dst])                     \n\t"
531
#endif
532
    "bne            %[src],      %[p_loop2_end],    1b         \n\t"
533
    " addiu         %[dst],      %[dst],            2          \n\t"
534
  "2:                                                          \n\t"
535
    ".set           pop                                        \n\t"
536
    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
537
      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
538
      [dst]"+&r"(dst), [src]"+&r"(src)
539
    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
540
    : "memory"
541
  );
542
}
543

544
static void ConvertBGRAToRGB565_MIPSdspR2(const uint32_t* src,
545
                                          int num_pixels, uint8_t* dst) {
546
  int temp0, temp1, temp2, temp3, temp4, temp5;
547
  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
548
  const uint32_t* const p_loop2_end = src + num_pixels;
549
  __asm__ volatile (
550
    ".set           push                                       \n\t"
551
    ".set           noreorder                                  \n\t"
552
    "beq            %[src],      %[p_loop1_end],    3f         \n\t"
553
    " nop                                                      \n\t"
554
  "0:                                                          \n\t"
555
    "lw             %[temp0],    0(%[src])                     \n\t"
556
    "lw             %[temp1],    4(%[src])                     \n\t"
557
    "lw             %[temp2],    8(%[src])                     \n\t"
558
    "lw             %[temp3],    12(%[src])                    \n\t"
559
    "ext            %[temp4],    %[temp0],          8,    16   \n\t"
560
    "ext            %[temp5],    %[temp0],          5,    11   \n\t"
561
    "ext            %[temp0],    %[temp0],          3,    5    \n\t"
562
    "ins            %[temp4],    %[temp5],          0,    11   \n\t"
563
    "ext            %[temp5],    %[temp1],          5,    11   \n\t"
564
    "ins            %[temp4],    %[temp0],          0,    5    \n\t"
565
    "ext            %[temp0],    %[temp1],          8,    16   \n\t"
566
    "ext            %[temp1],    %[temp1],          3,    5    \n\t"
567
    "ins            %[temp0],    %[temp5],          0,    11   \n\t"
568
    "ext            %[temp5],    %[temp2],          5,    11   \n\t"
569
    "ins            %[temp0],    %[temp1],          0,    5    \n\t"
570
    "ext            %[temp1],    %[temp2],          8,    16   \n\t"
571
    "ext            %[temp2],    %[temp2],          3,    5    \n\t"
572
    "ins            %[temp1],    %[temp5],          0,    11   \n\t"
573
    "ext            %[temp5],    %[temp3],          5,    11   \n\t"
574
    "ins            %[temp1],    %[temp2],          0,    5    \n\t"
575
    "ext            %[temp2],    %[temp3],          8,    16   \n\t"
576
    "ext            %[temp3],    %[temp3],          3,    5    \n\t"
577
    "ins            %[temp2],    %[temp5],          0,    11   \n\t"
578
    "append         %[temp0],    %[temp4],          16         \n\t"
579
    "ins            %[temp2],    %[temp3],          0,    5    \n\t"
580
    "addiu          %[src],      %[src],            16         \n\t"
581
    "append         %[temp2],    %[temp1],          16         \n\t"
582
#if (WEBP_SWAP_16BIT_CSP == 1)
583
    "usw            %[temp0],    0(%[dst])                     \n\t"
584
    "usw            %[temp2],    4(%[dst])                     \n\t"
585
#else
586
    "wsbh           %[temp0],    %[temp0]                      \n\t"
587
    "wsbh           %[temp2],    %[temp2]                      \n\t"
588
    "usw            %[temp0],    0(%[dst])                     \n\t"
589
    "usw            %[temp2],    4(%[dst])                     \n\t"
590
#endif
591
    "bne            %[src],      %[p_loop1_end],    0b         \n\t"
592
    " addiu         %[dst],      %[dst],            8          \n\t"
593
  "3:                                                          \n\t"
594
    "beq            %[src],      %[p_loop2_end],    2f         \n\t"
595
    " nop                                                      \n\t"
596
  "1:                                                          \n\t"
597
    "lw             %[temp0],    0(%[src])                     \n\t"
598
    "ext            %[temp4],    %[temp0],          8,    16   \n\t"
599
    "ext            %[temp5],    %[temp0],          5,    11   \n\t"
600
    "ext            %[temp0],    %[temp0],          3,    5    \n\t"
601
    "ins            %[temp4],    %[temp5],          0,    11   \n\t"
602
    "addiu          %[src],      %[src],            4          \n\t"
603
    "ins            %[temp4],    %[temp0],          0,    5    \n\t"
604
#if (WEBP_SWAP_16BIT_CSP == 1)
605
    "ush            %[temp4],    0(%[dst])                     \n\t"
606
#else
607
    "wsbh           %[temp4],    %[temp4]                      \n\t"
608
    "ush            %[temp4],    0(%[dst])                     \n\t"
609
#endif
610
    "bne            %[src],      %[p_loop2_end],    1b         \n\t"
611
    " addiu         %[dst],      %[dst],            2          \n\t"
612
  "2:                                                          \n\t"
613
    ".set           pop                                        \n\t"
614
    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
615
      [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
616
      [dst]"+&r"(dst), [src]"+&r"(src)
617
    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
618
    : "memory"
619
  );
620
}
621

622
static void ConvertBGRAToBGR_MIPSdspR2(const uint32_t* src,
623
                                       int num_pixels, uint8_t* dst) {
624
  int temp0, temp1, temp2, temp3;
625
  const uint32_t* const p_loop1_end = src + (num_pixels & ~3);
626
  const uint32_t* const p_loop2_end = src + num_pixels;
627
  __asm__ volatile (
628
    ".set       push                                         \n\t"
629
    ".set       noreorder                                    \n\t"
630
    "beq        %[src],      %[p_loop1_end],    3f           \n\t"
631
    " nop                                                    \n\t"
632
  "0:                                                        \n\t"
633
    "lw         %[temp0],    0(%[src])                       \n\t"
634
    "lw         %[temp1],    4(%[src])                       \n\t"
635
    "lw         %[temp2],    8(%[src])                       \n\t"
636
    "lw         %[temp3],    12(%[src])                      \n\t"
637
    "ins        %[temp0],    %[temp1],          24,    8     \n\t"
638
    "sra        %[temp1],    %[temp1],          8            \n\t"
639
    "ins        %[temp1],    %[temp2],          16,    16    \n\t"
640
    "sll        %[temp2],    %[temp2],          8            \n\t"
641
    "balign     %[temp3],    %[temp2],          1            \n\t"
642
    "addiu      %[src],      %[src],            16           \n\t"
643
    "usw        %[temp0],    0(%[dst])                       \n\t"
644
    "usw        %[temp1],    4(%[dst])                       \n\t"
645
    "usw        %[temp3],    8(%[dst])                       \n\t"
646
    "bne        %[src],      %[p_loop1_end],    0b           \n\t"
647
    " addiu     %[dst],      %[dst],            12           \n\t"
648
  "3:                                                        \n\t"
649
    "beq        %[src],      %[p_loop2_end],    2f           \n\t"
650
    " nop                                                    \n\t"
651
  "1:                                                        \n\t"
652
    "lw         %[temp0],    0(%[src])                       \n\t"
653
    "addiu      %[src],      %[src],            4            \n\t"
654
    "addiu      %[dst],      %[dst],            3            \n\t"
655
    "ush        %[temp0],    -3(%[dst])                      \n\t"
656
    "sra        %[temp0],    %[temp0],          16           \n\t"
657
    "bne        %[src],      %[p_loop2_end],    1b           \n\t"
658
    " sb        %[temp0],    -1(%[dst])                      \n\t"
659
  "2:                                                        \n\t"
660
    ".set       pop                                          \n\t"
661
    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
662
      [temp3]"=&r"(temp3), [dst]"+&r"(dst), [src]"+&r"(src)
663
    : [p_loop1_end]"r"(p_loop1_end), [p_loop2_end]"r"(p_loop2_end)
664
    : "memory"
665
  );
666
}
667

668
//------------------------------------------------------------------------------
669
// Entry point
670

671
extern void VP8LDspInitMIPSdspR2(void);
672

673
WEBP_TSAN_IGNORE_FUNCTION void VP8LDspInitMIPSdspR2(void) {
674
  VP8LMapColor32b = MapARGB_MIPSdspR2;
675
  VP8LMapColor8b = MapAlpha_MIPSdspR2;
676

677
  VP8LPredictors[5] = Predictor5_MIPSdspR2;
678
  VP8LPredictors[6] = Predictor6_MIPSdspR2;
679
  VP8LPredictors[7] = Predictor7_MIPSdspR2;
680
  VP8LPredictors[8] = Predictor8_MIPSdspR2;
681
  VP8LPredictors[9] = Predictor9_MIPSdspR2;
682
  VP8LPredictors[10] = Predictor10_MIPSdspR2;
683
  VP8LPredictors[11] = Predictor11_MIPSdspR2;
684
  VP8LPredictors[12] = Predictor12_MIPSdspR2;
685
  VP8LPredictors[13] = Predictor13_MIPSdspR2;
686

687
  VP8LAddGreenToBlueAndRed = AddGreenToBlueAndRed_MIPSdspR2;
688
  VP8LTransformColorInverse = TransformColorInverse_MIPSdspR2;
689

690
  VP8LConvertBGRAToRGB = ConvertBGRAToRGB_MIPSdspR2;
691
  VP8LConvertBGRAToRGBA = ConvertBGRAToRGBA_MIPSdspR2;
692
  VP8LConvertBGRAToRGBA4444 = ConvertBGRAToRGBA4444_MIPSdspR2;
693
  VP8LConvertBGRAToRGB565 = ConvertBGRAToRGB565_MIPSdspR2;
694
  VP8LConvertBGRAToBGR = ConvertBGRAToBGR_MIPSdspR2;
695
}
696

697
#else  // !WEBP_USE_MIPS_DSP_R2
698

699
WEBP_DSP_INIT_STUB(VP8LDspInitMIPSdspR2)
700

701
#endif  // WEBP_USE_MIPS_DSP_R2
702

703
Product

Resources

Company