CoCalc -- alpha_processing_mips_dsp

GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/libwebp/src/dsp/alpha_processing_mips_dsp_r2.c
²¹³⁶² views
1
// Copyright 2014 Google Inc. All Rights Reserved.
2
//
3
// Use of this source code is governed by a BSD-style license
4
// that can be found in the COPYING file in the root of the source
5
// tree. An additional intellectual property rights grant can be found
6
// in the file PATENTS. All contributing project authors may
7
// be found in the AUTHORS file in the root of the source tree.
8
// -----------------------------------------------------------------------------
9
//
10
// Utilities for processing transparent channel.
11
//
12
// Author(s): Branimir Vasic ([email protected])
13
//            Djordje Pesut  ([email protected])
14

15
#include "src/dsp/dsp.h"
16

17
#if defined(WEBP_USE_MIPS_DSP_R2)
18

19
static int DispatchAlpha_MIPSdspR2(const uint8_t* alpha, int alpha_stride,
20
                                   int width, int height,
21
                                   uint8_t* dst, int dst_stride) {
22
  uint32_t alpha_mask = 0xffffffff;
23
  int i, j, temp0;
24

25
  for (j = 0; j < height; ++j) {
26
    uint8_t* pdst = dst;
27
    const uint8_t* palpha = alpha;
28
    for (i = 0; i < (width >> 2); ++i) {
29
      int temp1, temp2, temp3;
30

31
      __asm__ volatile (
32
        "ulw    %[temp0],      0(%[palpha])                \n\t"
33
        "addiu  %[palpha],     %[palpha],     4            \n\t"
34
        "addiu  %[pdst],       %[pdst],       16           \n\t"
35
        "srl    %[temp1],      %[temp0],      8            \n\t"
36
        "srl    %[temp2],      %[temp0],      16           \n\t"
37
        "srl    %[temp3],      %[temp0],      24           \n\t"
38
        "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
39
        "sb     %[temp0],      -16(%[pdst])                \n\t"
40
        "sb     %[temp1],      -12(%[pdst])                \n\t"
41
        "sb     %[temp2],      -8(%[pdst])                 \n\t"
42
        "sb     %[temp3],      -4(%[pdst])                 \n\t"
43
        : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
44
          [temp3]"=&r"(temp3), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
45
          [alpha_mask]"+r"(alpha_mask)
46
        :
47
        : "memory"
48
      );
49
    }
50

51
    for (i = 0; i < (width & 3); ++i) {
52
      __asm__ volatile (
53
        "lbu    %[temp0],      0(%[palpha])                \n\t"
54
        "addiu  %[palpha],     %[palpha],     1            \n\t"
55
        "sb     %[temp0],      0(%[pdst])                  \n\t"
56
        "and    %[alpha_mask], %[alpha_mask], %[temp0]     \n\t"
57
        "addiu  %[pdst],       %[pdst],       4            \n\t"
58
        : [temp0]"=&r"(temp0), [palpha]"+r"(palpha), [pdst]"+r"(pdst),
59
          [alpha_mask]"+r"(alpha_mask)
60
        :
61
        : "memory"
62
      );
63
    }
64
    alpha += alpha_stride;
65
    dst += dst_stride;
66
  }
67

68
  __asm__ volatile (
69
    "ext    %[temp0],      %[alpha_mask], 0, 16            \n\t"
70
    "srl    %[alpha_mask], %[alpha_mask], 16               \n\t"
71
    "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
72
    "ext    %[temp0],      %[alpha_mask], 0, 8             \n\t"
73
    "srl    %[alpha_mask], %[alpha_mask], 8                \n\t"
74
    "and    %[alpha_mask], %[alpha_mask], %[temp0]         \n\t"
75
    : [temp0]"=&r"(temp0), [alpha_mask]"+r"(alpha_mask)
76
    :
77
  );
78

79
  return (alpha_mask != 0xff);
80
}
81

82
static void MultARGBRow_MIPSdspR2(uint32_t* const ptr, int width,
83
                                  int inverse) {
84
  int x;
85
  const uint32_t c_00ffffff = 0x00ffffffu;
86
  const uint32_t c_ff000000 = 0xff000000u;
87
  const uint32_t c_8000000  = 0x00800000u;
88
  const uint32_t c_8000080  = 0x00800080u;
89
  for (x = 0; x < width; ++x) {
90
    const uint32_t argb = ptr[x];
91
    if (argb < 0xff000000u) {      // alpha < 255
92
      if (argb <= 0x00ffffffu) {   // alpha == 0
93
        ptr[x] = 0;
94
      } else {
95
        int temp0, temp1, temp2, temp3, alpha;
96
        __asm__ volatile (
97
          "srl          %[alpha],   %[argb],       24                \n\t"
98
          "replv.qb     %[temp0],   %[alpha]                         \n\t"
99
          "and          %[temp0],   %[temp0],      %[c_00ffffff]     \n\t"
100
          "beqz         %[inverse], 0f                               \n\t"
101
          "divu         $zero,      %[c_ff000000], %[alpha]          \n\t"
102
          "mflo         %[temp0]                                     \n\t"
103
        "0:                                                          \n\t"
104
          "andi         %[temp1],   %[argb],       0xff              \n\t"
105
          "ext          %[temp2],   %[argb],       8,             8  \n\t"
106
          "ext          %[temp3],   %[argb],       16,            8  \n\t"
107
          "mul          %[temp1],   %[temp1],      %[temp0]          \n\t"
108
          "mul          %[temp2],   %[temp2],      %[temp0]          \n\t"
109
          "mul          %[temp3],   %[temp3],      %[temp0]          \n\t"
110
          "precrq.ph.w  %[temp1],   %[temp2],      %[temp1]          \n\t"
111
          "addu         %[temp3],   %[temp3],      %[c_8000000]      \n\t"
112
          "addu         %[temp1],   %[temp1],      %[c_8000080]      \n\t"
113
          "precrq.ph.w  %[temp3],   %[argb],       %[temp3]          \n\t"
114
          "precrq.qb.ph %[temp1],   %[temp3],      %[temp1]          \n\t"
115
          : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
116
            [temp3]"=&r"(temp3), [alpha]"=&r"(alpha)
117
          : [inverse]"r"(inverse), [c_00ffffff]"r"(c_00ffffff),
118
            [c_8000000]"r"(c_8000000), [c_8000080]"r"(c_8000080),
119
            [c_ff000000]"r"(c_ff000000), [argb]"r"(argb)
120
          : "memory", "hi", "lo"
121
        );
122
        ptr[x] = temp1;
123
      }
124
    }
125
  }
126
}
127

128
#ifdef WORDS_BIGENDIAN
129
static void PackARGB_MIPSdspR2(const uint8_t* a, const uint8_t* r,
130
                               const uint8_t* g, const uint8_t* b, int len,
131
                               uint32_t* out) {
132
  int temp0, temp1, temp2, temp3, offset;
133
  const int rest = len & 1;
134
  const uint32_t* const loop_end = out + len - rest;
135
  const int step = 4;
136
  __asm__ volatile (
137
    "xor          %[offset],   %[offset], %[offset]    \n\t"
138
    "beq          %[loop_end], %[out],    0f           \n\t"
139
  "2:                                                  \n\t"
140
    "lbux         %[temp0],    %[offset](%[a])         \n\t"
141
    "lbux         %[temp1],    %[offset](%[r])         \n\t"
142
    "lbux         %[temp2],    %[offset](%[g])         \n\t"
143
    "lbux         %[temp3],    %[offset](%[b])         \n\t"
144
    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
145
    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
146
    "addiu        %[out],      %[out],    4            \n\t"
147
    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
148
    "sw           %[temp0],    -4(%[out])              \n\t"
149
    "addu         %[offset],   %[offset], %[step]      \n\t"
150
    "bne          %[loop_end], %[out],    2b           \n\t"
151
  "0:                                                  \n\t"
152
    "beq          %[rest],     $zero,     1f           \n\t"
153
    "lbux         %[temp0],    %[offset](%[a])         \n\t"
154
    "lbux         %[temp1],    %[offset](%[r])         \n\t"
155
    "lbux         %[temp2],    %[offset](%[g])         \n\t"
156
    "lbux         %[temp3],    %[offset](%[b])         \n\t"
157
    "ins          %[temp1],    %[temp0],  16,     16   \n\t"
158
    "ins          %[temp3],    %[temp2],  16,     16   \n\t"
159
    "precr.qb.ph  %[temp0],    %[temp1],  %[temp3]     \n\t"
160
    "sw           %[temp0],    0(%[out])               \n\t"
161
  "1:                                                  \n\t"
162
    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
163
      [temp3]"=&r"(temp3), [offset]"=&r"(offset), [out]"+&r"(out)
164
    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
165
      [loop_end]"r"(loop_end), [rest]"r"(rest)
166
    : "memory"
167
  );
168
}
169
#endif  // WORDS_BIGENDIAN
170

171
static void PackRGB_MIPSdspR2(const uint8_t* r, const uint8_t* g,
172
                              const uint8_t* b, int len, int step,
173
                              uint32_t* out) {
174
  int temp0, temp1, temp2, offset;
175
  const int rest = len & 1;
176
  const int a = 0xff;
177
  const uint32_t* const loop_end = out + len - rest;
178
  __asm__ volatile (
179
    "xor          %[offset],   %[offset], %[offset]    \n\t"
180
    "beq          %[loop_end], %[out],    0f           \n\t"
181
  "2:                                                  \n\t"
182
    "lbux         %[temp0],    %[offset](%[r])         \n\t"
183
    "lbux         %[temp1],    %[offset](%[g])         \n\t"
184
    "lbux         %[temp2],    %[offset](%[b])         \n\t"
185
    "ins          %[temp0],    %[a],      16,     16   \n\t"
186
    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
187
    "addiu        %[out],      %[out],    4            \n\t"
188
    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
189
    "sw           %[temp0],    -4(%[out])              \n\t"
190
    "addu         %[offset],   %[offset], %[step]      \n\t"
191
    "bne          %[loop_end], %[out],    2b           \n\t"
192
  "0:                                                  \n\t"
193
    "beq          %[rest],     $zero,     1f           \n\t"
194
    "lbux         %[temp0],    %[offset](%[r])         \n\t"
195
    "lbux         %[temp1],    %[offset](%[g])         \n\t"
196
    "lbux         %[temp2],    %[offset](%[b])         \n\t"
197
    "ins          %[temp0],    %[a],      16,     16   \n\t"
198
    "ins          %[temp2],    %[temp1],  16,     16   \n\t"
199
    "precr.qb.ph  %[temp0],    %[temp0],  %[temp2]     \n\t"
200
    "sw           %[temp0],    0(%[out])               \n\t"
201
  "1:                                                  \n\t"
202
    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
203
      [offset]"=&r"(offset), [out]"+&r"(out)
204
    : [a]"r"(a), [r]"r"(r), [g]"r"(g), [b]"r"(b), [step]"r"(step),
205
      [loop_end]"r"(loop_end), [rest]"r"(rest)
206
    : "memory"
207
  );
208
}
209

210
//------------------------------------------------------------------------------
211
// Entry point
212

213
extern void WebPInitAlphaProcessingMIPSdspR2(void);
214

215
WEBP_TSAN_IGNORE_FUNCTION void WebPInitAlphaProcessingMIPSdspR2(void) {
216
  WebPDispatchAlpha = DispatchAlpha_MIPSdspR2;
217
  WebPMultARGBRow = MultARGBRow_MIPSdspR2;
218
#ifdef WORDS_BIGENDIAN
219
  WebPPackARGB = PackARGB_MIPSdspR2;
220
#endif
221
  WebPPackRGB = PackRGB_MIPSdspR2;
222
}
223

224
#else  // !WEBP_USE_MIPS_DSP_R2
225

226
WEBP_DSP_INIT_STUB(WebPInitAlphaProcessingMIPSdspR2)
227

228
#endif  // WEBP_USE_MIPS_DSP_R2
229

230
Product

Resources

Company