Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/flip.cpp
16337 views
1
/*
2
* By downloading, copying, installing or using the software you agree to this license.
3
* If you do not agree to this license, do not download, install,
4
* copy or use the software.
5
*
6
*
7
* License Agreement
8
* For Open Source Computer Vision Library
9
* (3-clause BSD License)
10
*
11
* Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
12
* Third party copyrights are property of their respective owners.
13
*
14
* Redistribution and use in source and binary forms, with or without modification,
15
* are permitted provided that the following conditions are met:
16
*
17
* * Redistributions of source code must retain the above copyright notice,
18
* this list of conditions and the following disclaimer.
19
*
20
* * Redistributions in binary form must reproduce the above copyright notice,
21
* this list of conditions and the following disclaimer in the documentation
22
* and/or other materials provided with the distribution.
23
*
24
* * Neither the names of the copyright holders nor the names of the contributors
25
* may be used to endorse or promote products derived from this software
26
* without specific prior written permission.
27
*
28
* This software is provided by the copyright holders and contributors "as is" and
29
* any express or implied warranties, including, but not limited to, the implied
30
* warranties of merchantability and fitness for a particular purpose are disclaimed.
31
* In no event shall copyright holders or contributors be liable for any direct,
32
* indirect, incidental, special, exemplary, or consequential damages
33
* (including, but not limited to, procurement of substitute goods or services;
34
* loss of use, data, or profits; or business interruption) however caused
35
* and on any theory of liability, whether in contract, strict liability,
36
* or tort (including negligence or otherwise) arising in any way out of
37
* the use of this software, even if advised of the possibility of such damage.
38
*/
39
40
#include "common.hpp"
41
#include "vtransform.hpp"
42
43
#include <cstring>
44
45
namespace CAROTENE_NS {
46
47
bool isFlipSupported(FLIP_MODE flipMode, u32 elemSize)
48
{
49
bool supportedElemSize = (elemSize == 1) || (elemSize == 2) || (elemSize == 3) || (elemSize == 4);
50
return isSupportedConfiguration() &&
51
((supportedElemSize && ((flipMode == FLIP_BOTH_MODE) || (flipMode == FLIP_HORIZONTAL_MODE))) ||
52
(flipMode == FLIP_VERTICAL_MODE));
53
}
54
55
#ifdef CAROTENE_NEON
56
57
namespace {
58
59
template <typename T>
60
void flip(const Size2D & size,
61
const void * srcBase, ptrdiff_t srcStride,
62
void * dstBase, ptrdiff_t dstStride,
63
FLIP_MODE flipMode)
64
{
65
using namespace internal;
66
67
typedef typename VecTraits<T>::vec128 vec128;
68
typedef typename VecTraits<T>::vec64 vec64;
69
70
u32 step_base = 16 / sizeof(T), step_tail = 8 / sizeof(T);
71
size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
72
size_t roiw_tail = size.width >= (step_tail - 1) ? size.width - step_tail + 1 : 0;
73
74
for (size_t i = 0; i < size.height; ++i)
75
{
76
const T * src = getRowPtr((const T *)srcBase, srcStride, i);
77
T * dst = getRowPtr((T *)dstBase, dstStride, (flipMode & FLIP_VERTICAL_MODE) != 0 ? size.height - i - 1 : i);
78
size_t js = 0, jd = size.width;
79
80
for (; js < roiw_base; js += step_base, jd -= step_base)
81
{
82
prefetch(src + js);
83
84
vec128 v_src = vld1q(src + js);
85
vec128 v_dst = vrev64q(v_src);
86
v_dst = vcombine(vget_high(v_dst), vget_low(v_dst));
87
vst1q(dst + jd - step_base, v_dst);
88
}
89
for (; js < roiw_tail; js += step_tail, jd -= step_tail)
90
{
91
vec64 v_src = vld1(src + js);
92
vst1(dst + jd - step_tail, vrev64(v_src));
93
}
94
95
for (--jd; js < size.width; ++js, --jd)
96
dst[jd] = src[js];
97
}
98
}
99
100
template <typename T>
101
void flip3(const Size2D & size,
102
const void * srcBase, ptrdiff_t srcStride,
103
void * dstBase, ptrdiff_t dstStride,
104
FLIP_MODE flipMode)
105
{
106
using namespace internal;
107
108
#ifndef __ANDROID__
109
typedef typename VecTraits<T, 3>::vec128 vec128;
110
#endif
111
typedef typename VecTraits<T, 3>::vec64 vec64;
112
113
#ifndef __ANDROID__
114
u32 step_base = 16 / sizeof(T), step_base3 = step_base * 3;
115
size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
116
#endif
117
u32 step_tail = 8 / sizeof(T), step_tail3 = step_tail * 3;
118
size_t roiw_tail = size.width >= (step_tail - 1) ? size.width - step_tail + 1 : 0;
119
120
for (size_t i = 0; i < size.height; ++i)
121
{
122
const T * src = getRowPtr((const T *)srcBase, srcStride, i);
123
T * dst = getRowPtr((T *)dstBase, dstStride, (flipMode & FLIP_VERTICAL_MODE) != 0 ? size.height - i - 1 : i);
124
size_t j = 0, js = 0, jd = size.width * 3;
125
126
#ifndef __ANDROID__
127
for (; j < roiw_base; j += step_base, js += step_base3, jd -= step_base3)
128
{
129
prefetch(src + js);
130
131
vec128 v_src = vld3q(src + js), v_dst;
132
v_src.val[0] = vrev64q(v_src.val[0]);
133
v_src.val[1] = vrev64q(v_src.val[1]);
134
v_src.val[2] = vrev64q(v_src.val[2]);
135
136
v_dst.val[0] = vcombine(vget_high(v_src.val[0]), vget_low(v_src.val[0]));
137
v_dst.val[1] = vcombine(vget_high(v_src.val[1]), vget_low(v_src.val[1]));
138
v_dst.val[2] = vcombine(vget_high(v_src.val[2]), vget_low(v_src.val[2]));
139
140
vst3q(dst + jd - step_base3, v_dst);
141
}
142
#endif // __ANDROID__
143
144
for (; j < roiw_tail; j += step_tail, js += step_tail3, jd -= step_tail3)
145
{
146
vec64 v_src = vld3(src + js), v_dst;
147
v_dst.val[0] = vrev64(v_src.val[0]);
148
v_dst.val[1] = vrev64(v_src.val[1]);
149
v_dst.val[2] = vrev64(v_src.val[2]);
150
151
vst3(dst + jd - step_tail3, v_dst);
152
}
153
154
for (jd -= 3; j < size.width; ++j, js += 3, jd -= 3)
155
{
156
dst[jd] = src[js];
157
dst[jd + 1] = src[js + 1];
158
dst[jd + 2] = src[js + 2];
159
}
160
}
161
}
162
163
typedef void (* flipFunc)(const Size2D &size,
164
const void * srcBase, ptrdiff_t srcStride,
165
void * dstBase, ptrdiff_t dstStride,
166
FLIP_MODE flipMode);
167
168
} // namespace
169
170
#endif
171
172
void flip(const Size2D &size,
173
const u8 * srcBase, ptrdiff_t srcStride,
174
u8 * dstBase, ptrdiff_t dstStride,
175
FLIP_MODE flipMode, u32 elemSize)
176
{
177
internal::assertSupportedConfiguration(isFlipSupported(flipMode, elemSize));
178
#ifdef CAROTENE_NEON
179
180
if (flipMode == FLIP_VERTICAL_MODE)
181
{
182
for (size_t y = 0; y < size.height; ++y)
183
{
184
const u8 * src_row = internal::getRowPtr(srcBase, srcStride, y);
185
u8 * dst_row = internal::getRowPtr(dstBase, dstStride, size.height - y - 1);
186
187
std::memcpy(dst_row, src_row, elemSize * size.width);
188
}
189
return;
190
}
191
192
flipFunc func = NULL;
193
194
if (elemSize == (u32)sizeof(u8))
195
func = &flip<u8>;
196
if (elemSize == (u32)sizeof(u16))
197
func = &flip<u16>;
198
if (elemSize == (u32)sizeof(u32))
199
func = &flip<u32>;
200
if (elemSize == (u32)sizeof(u8) * 3)
201
func = &flip3<u8>;
202
203
if (func == NULL)
204
return;
205
206
func(size,
207
srcBase, srcStride,
208
dstBase, dstStride,
209
flipMode);
210
211
#else
212
(void)size;
213
(void)srcBase;
214
(void)srcStride;
215
(void)dstBase;
216
(void)dstStride;
217
(void)flipMode;
218
(void)elemSize;
219
#endif
220
}
221
222
} // namespace CAROTENE_NS
223
224