Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/libwebp/src/dsp/mips_macro.h
16348 views
1
// Copyright 2014 Google Inc. All Rights Reserved.
2
//
3
// Use of this source code is governed by a BSD-style license
4
// that can be found in the COPYING file in the root of the source
5
// tree. An additional intellectual property rights grant can be found
6
// in the file PATENTS. All contributing project authors may
7
// be found in the AUTHORS file in the root of the source tree.
8
// -----------------------------------------------------------------------------
9
//
10
// MIPS common macros
11
12
#ifndef WEBP_DSP_MIPS_MACRO_H_
13
#define WEBP_DSP_MIPS_MACRO_H_
14
15
#if defined(__GNUC__) && defined(__ANDROID__) && LOCAL_GCC_VERSION == 0x409
16
#define WORK_AROUND_GCC
17
#endif
18
19
#define STR(s) #s
20
#define XSTR(s) STR(s)
21
22
// O0[31..16 | 15..0] = I0[31..16 | 15..0] + I1[31..16 | 15..0]
23
// O1[31..16 | 15..0] = I0[31..16 | 15..0] - I1[31..16 | 15..0]
24
// O - output
25
// I - input (macro doesn't change it)
26
#define ADD_SUB_HALVES(O0, O1, \
27
I0, I1) \
28
"addq.ph %[" #O0 "], %[" #I0 "], %[" #I1 "] \n\t" \
29
"subq.ph %[" #O1 "], %[" #I0 "], %[" #I1 "] \n\t"
30
31
// O - output
32
// I - input (macro doesn't change it)
33
// I[0/1] - offset in bytes
34
#define LOAD_IN_X2(O0, O1, \
35
I0, I1) \
36
"lh %[" #O0 "], " #I0 "(%[in]) \n\t" \
37
"lh %[" #O1 "], " #I1 "(%[in]) \n\t"
38
39
// I0 - location
40
// I1..I9 - offsets in bytes
41
#define LOAD_WITH_OFFSET_X4(O0, O1, O2, O3, \
42
I0, I1, I2, I3, I4, I5, I6, I7, I8, I9) \
43
"ulw %[" #O0 "], " #I1 "+" XSTR(I9) "*" #I5 "(%[" #I0 "]) \n\t" \
44
"ulw %[" #O1 "], " #I2 "+" XSTR(I9) "*" #I6 "(%[" #I0 "]) \n\t" \
45
"ulw %[" #O2 "], " #I3 "+" XSTR(I9) "*" #I7 "(%[" #I0 "]) \n\t" \
46
"ulw %[" #O3 "], " #I4 "+" XSTR(I9) "*" #I8 "(%[" #I0 "]) \n\t"
47
48
// O - output
49
// IO - input/output
50
// I - input (macro doesn't change it)
51
#define MUL_SHIFT_SUM(O0, O1, O2, O3, O4, O5, O6, O7, \
52
IO0, IO1, IO2, IO3, \
53
I0, I1, I2, I3, I4, I5, I6, I7) \
54
"mul %[" #O0 "], %[" #I0 "], %[kC2] \n\t" \
55
"mul %[" #O1 "], %[" #I0 "], %[kC1] \n\t" \
56
"mul %[" #O2 "], %[" #I1 "], %[kC2] \n\t" \
57
"mul %[" #O3 "], %[" #I1 "], %[kC1] \n\t" \
58
"mul %[" #O4 "], %[" #I2 "], %[kC2] \n\t" \
59
"mul %[" #O5 "], %[" #I2 "], %[kC1] \n\t" \
60
"mul %[" #O6 "], %[" #I3 "], %[kC2] \n\t" \
61
"mul %[" #O7 "], %[" #I3 "], %[kC1] \n\t" \
62
"sra %[" #O0 "], %[" #O0 "], 16 \n\t" \
63
"sra %[" #O1 "], %[" #O1 "], 16 \n\t" \
64
"sra %[" #O2 "], %[" #O2 "], 16 \n\t" \
65
"sra %[" #O3 "], %[" #O3 "], 16 \n\t" \
66
"sra %[" #O4 "], %[" #O4 "], 16 \n\t" \
67
"sra %[" #O5 "], %[" #O5 "], 16 \n\t" \
68
"sra %[" #O6 "], %[" #O6 "], 16 \n\t" \
69
"sra %[" #O7 "], %[" #O7 "], 16 \n\t" \
70
"addu %[" #IO0 "], %[" #IO0 "], %[" #I4 "] \n\t" \
71
"addu %[" #IO1 "], %[" #IO1 "], %[" #I5 "] \n\t" \
72
"subu %[" #IO2 "], %[" #IO2 "], %[" #I6 "] \n\t" \
73
"subu %[" #IO3 "], %[" #IO3 "], %[" #I7 "] \n\t"
74
75
// O - output
76
// I - input (macro doesn't change it)
77
#define INSERT_HALF_X2(O0, O1, \
78
I0, I1) \
79
"ins %[" #O0 "], %[" #I0 "], 16, 16 \n\t" \
80
"ins %[" #O1 "], %[" #I1 "], 16, 16 \n\t"
81
82
// O - output
83
// I - input (macro doesn't change it)
84
#define SRA_16(O0, O1, O2, O3, \
85
I0, I1, I2, I3) \
86
"sra %[" #O0 "], %[" #I0 "], 16 \n\t" \
87
"sra %[" #O1 "], %[" #I1 "], 16 \n\t" \
88
"sra %[" #O2 "], %[" #I2 "], 16 \n\t" \
89
"sra %[" #O3 "], %[" #I3 "], 16 \n\t"
90
91
// temp0[31..16 | 15..0] = temp8[31..16 | 15..0] + temp12[31..16 | 15..0]
92
// temp1[31..16 | 15..0] = temp8[31..16 | 15..0] - temp12[31..16 | 15..0]
93
// temp0[31..16 | 15..0] = temp0[31..16 >> 3 | 15..0 >> 3]
94
// temp1[31..16 | 15..0] = temp1[31..16 >> 3 | 15..0 >> 3]
95
// O - output
96
// I - input (macro doesn't change it)
97
#define SHIFT_R_SUM_X2(O0, O1, O2, O3, O4, O5, O6, O7, \
98
I0, I1, I2, I3, I4, I5, I6, I7) \
99
"addq.ph %[" #O0 "], %[" #I0 "], %[" #I4 "] \n\t" \
100
"subq.ph %[" #O1 "], %[" #I0 "], %[" #I4 "] \n\t" \
101
"addq.ph %[" #O2 "], %[" #I1 "], %[" #I5 "] \n\t" \
102
"subq.ph %[" #O3 "], %[" #I1 "], %[" #I5 "] \n\t" \
103
"addq.ph %[" #O4 "], %[" #I2 "], %[" #I6 "] \n\t" \
104
"subq.ph %[" #O5 "], %[" #I2 "], %[" #I6 "] \n\t" \
105
"addq.ph %[" #O6 "], %[" #I3 "], %[" #I7 "] \n\t" \
106
"subq.ph %[" #O7 "], %[" #I3 "], %[" #I7 "] \n\t" \
107
"shra.ph %[" #O0 "], %[" #O0 "], 3 \n\t" \
108
"shra.ph %[" #O1 "], %[" #O1 "], 3 \n\t" \
109
"shra.ph %[" #O2 "], %[" #O2 "], 3 \n\t" \
110
"shra.ph %[" #O3 "], %[" #O3 "], 3 \n\t" \
111
"shra.ph %[" #O4 "], %[" #O4 "], 3 \n\t" \
112
"shra.ph %[" #O5 "], %[" #O5 "], 3 \n\t" \
113
"shra.ph %[" #O6 "], %[" #O6 "], 3 \n\t" \
114
"shra.ph %[" #O7 "], %[" #O7 "], 3 \n\t"
115
116
// precrq.ph.w temp0, temp8, temp2
117
// temp0 = temp8[31..16] | temp2[31..16]
118
// ins temp2, temp8, 16, 16
119
// temp2 = temp8[31..16] | temp2[15..0]
120
// O - output
121
// IO - input/output
122
// I - input (macro doesn't change it)
123
#define PACK_2_HALVES_TO_WORD(O0, O1, O2, O3, \
124
IO0, IO1, IO2, IO3, \
125
I0, I1, I2, I3) \
126
"precrq.ph.w %[" #O0 "], %[" #I0 "], %[" #IO0 "] \n\t" \
127
"precrq.ph.w %[" #O1 "], %[" #I1 "], %[" #IO1 "] \n\t" \
128
"ins %[" #IO0 "], %[" #I0 "], 16, 16 \n\t" \
129
"ins %[" #IO1 "], %[" #I1 "], 16, 16 \n\t" \
130
"precrq.ph.w %[" #O2 "], %[" #I2 "], %[" #IO2 "] \n\t" \
131
"precrq.ph.w %[" #O3 "], %[" #I3 "], %[" #IO3 "] \n\t" \
132
"ins %[" #IO2 "], %[" #I2 "], 16, 16 \n\t" \
133
"ins %[" #IO3 "], %[" #I3 "], 16, 16 \n\t"
134
135
// preceu.ph.qbr temp0, temp8
136
// temp0 = 0 | 0 | temp8[23..16] | temp8[7..0]
137
// preceu.ph.qbl temp1, temp8
138
// temp1 = temp8[23..16] | temp8[7..0] | 0 | 0
139
// O - output
140
// I - input (macro doesn't change it)
141
#define CONVERT_2_BYTES_TO_HALF(O0, O1, O2, O3, O4, O5, O6, O7, \
142
I0, I1, I2, I3) \
143
"preceu.ph.qbr %[" #O0 "], %[" #I0 "] \n\t" \
144
"preceu.ph.qbl %[" #O1 "], %[" #I0 "] \n\t" \
145
"preceu.ph.qbr %[" #O2 "], %[" #I1 "] \n\t" \
146
"preceu.ph.qbl %[" #O3 "], %[" #I1 "] \n\t" \
147
"preceu.ph.qbr %[" #O4 "], %[" #I2 "] \n\t" \
148
"preceu.ph.qbl %[" #O5 "], %[" #I2 "] \n\t" \
149
"preceu.ph.qbr %[" #O6 "], %[" #I3 "] \n\t" \
150
"preceu.ph.qbl %[" #O7 "], %[" #I3 "] \n\t"
151
152
// temp0[31..16 | 15..0] = temp0[31..16 | 15..0] + temp8[31..16 | 15..0]
153
// temp0[31..16 | 15..0] = temp0[31..16 <<(s) 7 | 15..0 <<(s) 7]
154
// temp1..temp7 same as temp0
155
// precrqu_s.qb.ph temp0, temp1, temp0:
156
// temp0 = temp1[31..24] | temp1[15..8] | temp0[31..24] | temp0[15..8]
157
// store temp0 to dst
158
// IO - input/output
159
// I - input (macro doesn't change it)
160
#define STORE_SAT_SUM_X2(IO0, IO1, IO2, IO3, IO4, IO5, IO6, IO7, \
161
I0, I1, I2, I3, I4, I5, I6, I7, \
162
I8, I9, I10, I11, I12, I13) \
163
"addq.ph %[" #IO0 "], %[" #IO0 "], %[" #I0 "] \n\t" \
164
"addq.ph %[" #IO1 "], %[" #IO1 "], %[" #I1 "] \n\t" \
165
"addq.ph %[" #IO2 "], %[" #IO2 "], %[" #I2 "] \n\t" \
166
"addq.ph %[" #IO3 "], %[" #IO3 "], %[" #I3 "] \n\t" \
167
"addq.ph %[" #IO4 "], %[" #IO4 "], %[" #I4 "] \n\t" \
168
"addq.ph %[" #IO5 "], %[" #IO5 "], %[" #I5 "] \n\t" \
169
"addq.ph %[" #IO6 "], %[" #IO6 "], %[" #I6 "] \n\t" \
170
"addq.ph %[" #IO7 "], %[" #IO7 "], %[" #I7 "] \n\t" \
171
"shll_s.ph %[" #IO0 "], %[" #IO0 "], 7 \n\t" \
172
"shll_s.ph %[" #IO1 "], %[" #IO1 "], 7 \n\t" \
173
"shll_s.ph %[" #IO2 "], %[" #IO2 "], 7 \n\t" \
174
"shll_s.ph %[" #IO3 "], %[" #IO3 "], 7 \n\t" \
175
"shll_s.ph %[" #IO4 "], %[" #IO4 "], 7 \n\t" \
176
"shll_s.ph %[" #IO5 "], %[" #IO5 "], 7 \n\t" \
177
"shll_s.ph %[" #IO6 "], %[" #IO6 "], 7 \n\t" \
178
"shll_s.ph %[" #IO7 "], %[" #IO7 "], 7 \n\t" \
179
"precrqu_s.qb.ph %[" #IO0 "], %[" #IO1 "], %[" #IO0 "] \n\t" \
180
"precrqu_s.qb.ph %[" #IO2 "], %[" #IO3 "], %[" #IO2 "] \n\t" \
181
"precrqu_s.qb.ph %[" #IO4 "], %[" #IO5 "], %[" #IO4 "] \n\t" \
182
"precrqu_s.qb.ph %[" #IO6 "], %[" #IO7 "], %[" #IO6 "] \n\t" \
183
"usw %[" #IO0 "], " XSTR(I13) "*" #I9 "(%[" #I8 "]) \n\t" \
184
"usw %[" #IO2 "], " XSTR(I13) "*" #I10 "(%[" #I8 "]) \n\t" \
185
"usw %[" #IO4 "], " XSTR(I13) "*" #I11 "(%[" #I8 "]) \n\t" \
186
"usw %[" #IO6 "], " XSTR(I13) "*" #I12 "(%[" #I8 "]) \n\t"
187
188
#define OUTPUT_EARLY_CLOBBER_REGS_10() \
189
: [temp1]"=&r"(temp1), [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), \
190
[temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), \
191
[temp7]"=&r"(temp7), [temp8]"=&r"(temp8), [temp9]"=&r"(temp9), \
192
[temp10]"=&r"(temp10)
193
194
#define OUTPUT_EARLY_CLOBBER_REGS_18() \
195
OUTPUT_EARLY_CLOBBER_REGS_10(), \
196
[temp11]"=&r"(temp11), [temp12]"=&r"(temp12), [temp13]"=&r"(temp13), \
197
[temp14]"=&r"(temp14), [temp15]"=&r"(temp15), [temp16]"=&r"(temp16), \
198
[temp17]"=&r"(temp17), [temp18]"=&r"(temp18)
199
200
#endif // WEBP_DSP_MIPS_MACRO_H_
201
202