Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/sobel.cpp
16337 views
1
/*
2
* By downloading, copying, installing or using the software you agree to this license.
3
* If you do not agree to this license, do not download, install,
4
* copy or use the software.
5
*
6
*
7
* License Agreement
8
* For Open Source Computer Vision Library
9
* (3-clause BSD License)
10
*
11
* Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12
* Third party copyrights are property of their respective owners.
13
*
14
* Redistribution and use in source and binary forms, with or without modification,
15
* are permitted provided that the following conditions are met:
16
*
17
* * Redistributions of source code must retain the above copyright notice,
18
* this list of conditions and the following disclaimer.
19
*
20
* * Redistributions in binary form must reproduce the above copyright notice,
21
* this list of conditions and the following disclaimer in the documentation
22
* and/or other materials provided with the distribution.
23
*
24
* * Neither the names of the copyright holders nor the names of the contributors
25
* may be used to endorse or promote products derived from this software
26
* without specific prior written permission.
27
*
28
* This software is provided by the copyright holders and contributors "as is" and
29
* any express or implied warranties, including, but not limited to, the implied
30
* warranties of merchantability and fitness for a particular purpose are disclaimed.
31
* In no event shall copyright holders or contributors be liable for any direct,
32
* indirect, incidental, special, exemplary, or consequential damages
33
* (including, but not limited to, procurement of substitute goods or services;
34
* loss of use, data, or profits; or business interruption) however caused
35
* and on any theory of liability, whether in contract, strict liability,
36
* or tort (including negligence or otherwise) arising in any way out of
37
* the use of this software, even if advised of the possibility of such damage.
38
*/
39
40
#include <vector>
41
42
#include "common.hpp"
43
44
namespace CAROTENE_NS {
45
46
bool isSobel3x3Supported(const Size2D &size, BORDER_MODE border,
47
s32 dx, s32 dy, Margin borderMargin)
48
{
49
return dx < 3 && dx >= 0 &&
50
dy < 3 && dy >= 0 &&
51
(dx + dy) > 0 &&
52
isSeparableFilter3x3Supported(size, border, dx, dy, borderMargin);
53
}
54
55
void Sobel3x3(const Size2D &size,
56
const u8 * srcBase, ptrdiff_t srcStride,
57
s16 * dstBase, ptrdiff_t dstStride,
58
s32 dx, s32 dy,
59
BORDER_MODE borderType, u8 borderValue, Margin borderMargin)
60
{
61
internal::assertSupportedConfiguration(isSobel3x3Supported(size, borderType, dx, dy, borderMargin));
62
#ifdef CAROTENE_NEON
63
SeparableFilter3x3(size, srcBase, srcStride, dstBase, dstStride,
64
dx, dy, 0, 0,
65
borderType, borderValue, borderMargin);
66
#else
67
(void)srcBase;
68
(void)srcStride;
69
(void)dstBase;
70
(void)dstStride;
71
(void)borderValue;
72
#endif
73
}
74
75
bool isSobel3x3f32Supported(const Size2D &size, BORDER_MODE border,
76
s32 dx, s32 dy)
77
{
78
return isSupportedConfiguration() &&
79
dx < 3 && dx >= 0 &&
80
dy < 3 && dy >= 0 &&
81
(dx + dy) > 0 &&
82
size.width >= 4 && size.height >= 2 &&
83
(border == BORDER_MODE_CONSTANT ||
84
border == BORDER_MODE_REFLECT ||
85
border == BORDER_MODE_REFLECT101 ||
86
border == BORDER_MODE_REPLICATE );
87
}
88
89
void Sobel3x3(const Size2D &size,
90
const f32 * srcBase, ptrdiff_t srcStride,
91
f32 * dstBase, ptrdiff_t dstStride,
92
s32 dx, s32 dy,
93
BORDER_MODE borderType, f32 borderValue)
94
{
95
internal::assertSupportedConfiguration(isSobel3x3f32Supported(size, borderType, dx, dy));
96
#ifdef CAROTENE_NEON
97
std::vector<f32> _tmp;
98
f32 *tmp = 0;
99
if (borderType == BORDER_MODE_CONSTANT)
100
{
101
_tmp.assign(size.width + 2, borderValue);
102
tmp = &_tmp[1];
103
}
104
105
ptrdiff_t delta = (ptrdiff_t)((size.width + 2 + 31) & -32);//align size
106
std::vector<f32> _tempBuf((delta << 1) + 64);
107
f32 *trow0 = internal::alignPtr(&_tempBuf[1], 32), *trow1 = internal::alignPtr(trow0 + delta, 32);
108
109
for( size_t y = 0; y < size.height; y++ )
110
{
111
const f32* srow0;
112
const f32* srow1 = internal::getRowPtr(srcBase, srcStride, y);
113
const f32* srow2;
114
f32* drow = internal::getRowPtr(dstBase, dstStride, y > 0 ? y-1 : 0);
115
f32* drow1 = internal::getRowPtr(dstBase, dstStride, y);
116
if (borderType == BORDER_MODE_REFLECT101) {
117
srow0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 1);
118
srow2 = internal::getRowPtr(srcBase, srcStride, y < size.height-1 ? y+1 : size.height-2);
119
} else if (borderType == BORDER_MODE_CONSTANT) {
120
srow0 = y > 0 ? internal::getRowPtr(srcBase, srcStride, y-1) : tmp;
121
srow2 = y < size.height-1 ? internal::getRowPtr(srcBase, srcStride, y+1) : tmp;
122
} else { // BORDER_MODE_REFLECT || BORDER_MODE_REPLICATE
123
srow0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 0);
124
srow2 = internal::getRowPtr(srcBase, srcStride, y < size.height-1 ? y+1 : size.height-1);
125
}
126
127
float32x4_t tprev = vmovq_n_f32(0.f);
128
float32x4_t tcurr = vmovq_n_f32(0.f);
129
float32x4_t tnext = vmovq_n_f32(0.f);
130
float32x4_t t0, t1, t2;
131
// do vertical convolution
132
size_t x = 0, bcolsn = y + 2 < size.height ? size.width : (size.width - 4);
133
for( ; x <= bcolsn; x += 4 )
134
{
135
internal::prefetch(srow0 + x);
136
internal::prefetch(srow1 + x);
137
internal::prefetch(srow2 + x);
138
139
float32x4_t x0 = vld1q_f32(srow0 + x);
140
float32x4_t x1 = vld1q_f32(srow1 + x);
141
float32x4_t x2 = vld1q_f32(srow2 + x);
142
143
tprev = tcurr;
144
tcurr = tnext;
145
if(!dy)
146
{
147
tnext = vaddq_f32(vaddq_f32(vaddq_f32(x1, x1), x2), x0);
148
}
149
else if(dy == 2)
150
{
151
tnext = vsubq_f32(vsubq_f32(x2, x1), vsubq_f32(x1, x0));
152
}
153
else
154
{
155
tnext = vsubq_f32(x2, x0);
156
}
157
158
if(!x) {
159
tcurr = tnext;
160
// make border
161
if (borderType == BORDER_MODE_CONSTANT)
162
{
163
tcurr = vsetq_lane_f32(borderValue,tcurr, 3);
164
}
165
else if (borderType == BORDER_MODE_REFLECT101)
166
{
167
tcurr = vsetq_lane_f32(vgetq_lane_f32(tcurr, 1),tcurr, 3);
168
}
169
else // BORDER_MODE_REFLECT || BORDER_MODE_REPLICATE
170
{
171
tcurr = vsetq_lane_f32(vgetq_lane_f32(tcurr, 0),tcurr, 3);
172
}
173
continue;
174
}
175
176
internal::prefetch(trow0 + x);
177
internal::prefetch(trow1 + x);
178
179
t0 = vextq_f32(tprev, tcurr, 3);
180
t1 = tcurr;
181
t2 = vextq_f32(tcurr, tnext, 1);
182
if(!dx)
183
{
184
t0 = vaddq_f32(t0, vaddq_f32(vaddq_f32(t1, t1), t2));
185
}
186
else if(dx == 2)
187
{
188
t0 = vsubq_f32(vsubq_f32(t2, t1), vsubq_f32(t1, t0));
189
}
190
else
191
{
192
t0 = vsubq_f32(t2, t0);
193
}
194
195
if(!(y%2))
196
{
197
vst1q_f32(trow0 + x - 4, t0);
198
}
199
else
200
{
201
vst1q_f32(trow1 + x - 4, t0);
202
}
203
}
204
x -= 4;
205
if(x == size.width){
206
x--;
207
}
208
f32 prevx = 0, rowx = 0, nextx = 0;
209
if(!dy)
210
{
211
prevx = x > 0 ? srow2[x-1] + 2*srow1[x-1] + srow0[x-1] :
212
(borderType == BORDER_MODE_REFLECT101 ? srow2[1] + 2*srow1[1] + srow0[1] :
213
(borderType == BORDER_MODE_CONSTANT ? 4*borderValue :
214
srow2[0] + 2*srow1[0] + srow0[0]) );
215
rowx = srow2[x] + 2*srow1[x] + srow0[x];
216
}
217
else if(dy == 2)
218
{
219
prevx = x > 0 ? srow2[x-1] - 2*srow1[x-1] + srow0[x-1] :
220
(borderType == BORDER_MODE_REFLECT101 ? srow2[1] - 2*srow1[1] + srow0[1] :
221
(borderType == BORDER_MODE_CONSTANT ? 0.f :
222
srow2[0] - 2*srow1[0] + srow0[0]) );
223
rowx = srow2[x] - 2*srow1[x] + srow0[x];
224
}
225
else
226
{
227
prevx = x > 0 ? srow2[x-1] - srow0[x-1] :
228
(borderType == BORDER_MODE_REFLECT101 ? srow2[1] - srow0[1] :
229
(borderType == BORDER_MODE_CONSTANT ? 0.f :
230
srow2[0] - srow0[0]) );
231
rowx = srow2[x] - srow0[x];
232
}
233
234
for( ; x < size.width; x++ )
235
{
236
if(x+1 == size.width) {
237
// make border
238
if (borderType == BORDER_MODE_CONSTANT)
239
{
240
if(!dy) {
241
nextx = 4*borderValue;
242
} else {
243
nextx = 0.f;
244
}
245
} else if (borderType == BORDER_MODE_REFLECT101)
246
{
247
if(!dy) {
248
nextx = srow2[x-1] + 2*srow1[x-1] + srow0[x-1];
249
} else if(dy == 2) {
250
nextx = srow2[x-1] - 2*srow1[x-1] + srow0[x-1];
251
} else {
252
nextx = srow2[x-1] - srow0[x-1];
253
}
254
} else {
255
if(!dy) {
256
nextx = srow2[x] + 2*srow1[x] + srow0[x];
257
} else if(dy == 2) {
258
nextx = srow2[x] - 2*srow1[x] + srow0[x];
259
} else {
260
nextx = srow2[x] - srow0[x];
261
}
262
}
263
} else {
264
if(!dy) {
265
nextx = srow2[x+1] + 2*srow1[x+1] + srow0[x+1];
266
} else if(dy == 2) {
267
nextx = srow2[x+1] - 2*srow1[x+1] + srow0[x+1];
268
} else {
269
nextx = srow2[x+1] - srow0[x+1];
270
}
271
}
272
f32 res;
273
if(dx==1) {
274
res = nextx - prevx;
275
} else if(!dx) {
276
res = prevx + 2*rowx + nextx;
277
} else {
278
res = prevx - 2*rowx + nextx;
279
}
280
if(!(y%2)) {
281
*(trow0+x) = res;
282
} else {
283
*(trow1+x) = res;
284
}
285
prevx = rowx;
286
rowx = nextx;
287
}
288
289
if(y>0) {
290
for(size_t x1 = 0; x1 < size.width; x1++ )
291
{
292
if(y%2)
293
*(drow + x1) = trow0[x1];
294
else
295
*(drow + x1) = trow1[x1];
296
}
297
}
298
if(y == size.height-1) {
299
for(size_t x1 = 0; x1 < size.width; x1++ )
300
{
301
if(!(y%2))
302
*(drow1 + x1) = trow0[x1];
303
else
304
*(drow1 + x1) = trow1[x1];
305
}
306
}
307
}
308
#else
309
(void)srcBase;
310
(void)srcStride;
311
(void)dstBase;
312
(void)dstStride;
313
(void)borderValue;
314
#endif
315
}
316
317
} // namespace CAROTENE_NS
318
319