Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/opticalflow.cpp
16337 views
1
/*
2
* By downloading, copying, installing or using the software you agree to this license.
3
* If you do not agree to this license, do not download, install,
4
* copy or use the software.
5
*
6
*
7
* License Agreement
8
* For Open Source Computer Vision Library
9
* (3-clause BSD License)
10
*
11
* Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12
* Third party copyrights are property of their respective owners.
13
*
14
* Redistribution and use in source and binary forms, with or without modification,
15
* are permitted provided that the following conditions are met:
16
*
17
* * Redistributions of source code must retain the above copyright notice,
18
* this list of conditions and the following disclaimer.
19
*
20
* * Redistributions in binary form must reproduce the above copyright notice,
21
* this list of conditions and the following disclaimer in the documentation
22
* and/or other materials provided with the distribution.
23
*
24
* * Neither the names of the copyright holders nor the names of the contributors
25
* may be used to endorse or promote products derived from this software
26
* without specific prior written permission.
27
*
28
* This software is provided by the copyright holders and contributors "as is" and
29
* any express or implied warranties, including, but not limited to, the implied
30
* warranties of merchantability and fitness for a particular purpose are disclaimed.
31
* In no event shall copyright holders or contributors be liable for any direct,
32
* indirect, incidental, special, exemplary, or consequential damages
33
* (including, but not limited to, procurement of substitute goods or services;
34
* loss of use, data, or profits; or business interruption) however caused
35
* and on any theory of liability, whether in contract, strict liability,
36
* or tort (including negligence or otherwise) arising in any way out of
37
* the use of this software, even if advised of the possibility of such damage.
38
*/
39
40
#include "common.hpp"
41
#include "saturate_cast.hpp"
42
#include <vector>
43
#include <float.h> // For FLT_EPSILON
44
45
namespace CAROTENE_NS {
46
47
#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
48
49
/*
50
* Pyramidal Lucas-Kanade Optical Flow level processing
51
*/
52
void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
53
const u8 *prevData, ptrdiff_t prevStride,
54
const s16 *prevDerivData, ptrdiff_t prevDerivStride,
55
const u8 *nextData, ptrdiff_t nextStride,
56
u32 ptCount,
57
const f32 *prevPts, f32 *nextPts,
58
u8 *status, f32 *err,
59
const Size2D &winSize,
60
u32 terminationCount, f64 terminationEpsilon,
61
u32 level, u32 maxLevel, bool useInitialFlow, bool getMinEigenVals,
62
f32 minEigThreshold)
63
{
64
internal::assertSupportedConfiguration();
65
#ifdef CAROTENE_NEON
66
f32 halfWinX = (winSize.width-1)*0.5f, halfWinY = (winSize.height-1)*0.5f;
67
s32 cn2 = cn*2;
68
69
std::vector<s16> _buf(winSize.total()*(cn + cn2));
70
s16* IWinBuf = &_buf[0];
71
s32 IWinBufStride = winSize.width*cn;
72
s16* derivIWinBuf = &_buf[winSize.total()*cn];
73
s32 derivIWinBufStride = winSize.width*cn2;
74
75
for( u32 ptidx = 0; ptidx < ptCount; ptidx++ )
76
{
77
f32 levscale = (1./(1 << level));
78
u32 ptref = ptidx << 1;
79
f32 prevPtX = prevPts[ptref+0]*levscale;
80
f32 prevPtY = prevPts[ptref+1]*levscale;
81
f32 nextPtX;
82
f32 nextPtY;
83
if( level == maxLevel )
84
{
85
if( useInitialFlow )
86
{
87
nextPtX = nextPts[ptref+0]*levscale;
88
nextPtY = nextPts[ptref+1]*levscale;
89
}
90
else
91
{
92
nextPtX = prevPtX;
93
nextPtY = prevPtY;
94
}
95
}
96
else
97
{
98
nextPtX = nextPts[ptref+0]*2.f;
99
nextPtY = nextPts[ptref+1]*2.f;
100
}
101
nextPts[ptref+0] = nextPtX;
102
nextPts[ptref+1] = nextPtY;
103
104
s32 iprevPtX, iprevPtY;
105
s32 inextPtX, inextPtY;
106
prevPtX -= halfWinX;
107
prevPtY -= halfWinY;
108
iprevPtX = floor(prevPtX);
109
iprevPtY = floor(prevPtY);
110
111
if( iprevPtX < -(s32)winSize.width || iprevPtX >= (s32)size.width ||
112
iprevPtY < -(s32)winSize.height || iprevPtY >= (s32)size.height )
113
{
114
if( level == 0 )
115
{
116
if( status )
117
status[ptidx] = false;
118
if( err )
119
err[ptidx] = 0;
120
}
121
continue;
122
}
123
124
f32 a = prevPtX - iprevPtX;
125
f32 b = prevPtY - iprevPtY;
126
const s32 W_BITS = 14, W_BITS1 = 14;
127
const f32 FLT_SCALE = 1.f/(1 << 20);
128
s32 iw00 = round((1.f - a)*(1.f - b)*(1 << W_BITS));
129
s32 iw01 = round(a*(1.f - b)*(1 << W_BITS));
130
s32 iw10 = round((1.f - a)*b*(1 << W_BITS));
131
s32 iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
132
133
s32 dstep = prevDerivStride/sizeof(s16);
134
f32 A11 = 0, A12 = 0, A22 = 0;
135
136
int16x4_t viw00 = vmov_n_s16((s16)iw00);
137
int16x4_t viw01 = vmov_n_s16((s16)iw01);
138
int16x4_t viw10 = vmov_n_s16((s16)iw10);
139
int16x4_t viw11 = vmov_n_s16((s16)iw11);
140
141
float32x4_t vA11 = vmovq_n_f32(0);
142
float32x4_t vA12 = vmovq_n_f32(0);
143
float32x4_t vA22 = vmovq_n_f32(0);
144
145
s32 wwcn = winSize.width*cn;
146
147
// extract the patch from the first image, compute covariation matrix of derivatives
148
s32 x = 0;
149
for(s32 y = 0; y < (s32)winSize.height; y++ )
150
{
151
const u8* src = prevData + prevStride*(y + iprevPtY) + iprevPtX*cn;
152
const s16* dsrc = prevDerivData + dstep*(y + iprevPtY) + iprevPtX*cn2;
153
154
s16* Iptr = IWinBuf + y*IWinBufStride;
155
s16* dIptr = derivIWinBuf + y*derivIWinBufStride;
156
157
internal::prefetch(src + x + prevStride * 2, 0);
158
for(x = 0; x <= wwcn - 8; x += 8)
159
{
160
uint8x8_t vsrc00 = vld1_u8(src + x);
161
uint8x8_t vsrc10 = vld1_u8(src + x + prevStride);
162
uint8x8_t vsrc01 = vld1_u8(src + x + cn);
163
uint8x8_t vsrc11 = vld1_u8(src + x + prevStride + cn);
164
165
int16x8_t vs00 = vreinterpretq_s16_u16(vmovl_u8(vsrc00));
166
int16x8_t vs10 = vreinterpretq_s16_u16(vmovl_u8(vsrc10));
167
int16x8_t vs01 = vreinterpretq_s16_u16(vmovl_u8(vsrc01));
168
int16x8_t vs11 = vreinterpretq_s16_u16(vmovl_u8(vsrc11));
169
170
int32x4_t vsuml = vmull_s16(vget_low_s16(vs00), viw00);
171
int32x4_t vsumh = vmull_s16(vget_high_s16(vs10), viw10);
172
173
vsuml = vmlal_s16(vsuml, vget_low_s16(vs01), viw01);
174
vsumh = vmlal_s16(vsumh, vget_high_s16(vs11), viw11);
175
176
vsuml = vmlal_s16(vsuml, vget_low_s16(vs10), viw10);
177
vsumh = vmlal_s16(vsumh, vget_high_s16(vs00), viw00);
178
179
vsuml = vmlal_s16(vsuml, vget_low_s16(vs11), viw11);
180
vsumh = vmlal_s16(vsumh, vget_high_s16(vs01), viw01);
181
182
int16x4_t vsumnl = vrshrn_n_s32(vsuml, W_BITS1-5);
183
int16x4_t vsumnh = vrshrn_n_s32(vsumh, W_BITS1-5);
184
185
vst1q_s16(Iptr + x, vcombine_s16(vsumnl, vsumnh));
186
}
187
for(; x <= wwcn - 4; x += 4)
188
{
189
uint8x8_t vsrc00 = vld1_u8(src + x);
190
uint8x8_t vsrc10 = vld1_u8(src + x + prevStride);
191
uint8x8_t vsrc01 = vld1_u8(src + x + cn);
192
uint8x8_t vsrc11 = vld1_u8(src + x + prevStride + cn);
193
194
int16x4_t vs00 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc00)));
195
int16x4_t vs10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc10)));
196
int16x4_t vs01 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc01)));
197
int16x4_t vs11 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc11)));
198
199
int32x4_t vsuml1 = vmull_s16(vs00, viw00);
200
int32x4_t vsuml2 = vmull_s16(vs01, viw01);
201
vsuml1 = vmlal_s16(vsuml1, vs10, viw10);
202
vsuml2 = vmlal_s16(vsuml2, vs11, viw11);
203
int32x4_t vsuml = vaddq_s32(vsuml1, vsuml2);
204
205
int16x4_t vsumnl = vrshrn_n_s32(vsuml, W_BITS1-5);
206
207
vst1_s16(Iptr + x, vsumnl);
208
}
209
210
internal::prefetch(dsrc + dstep * 2, 0);
211
for(x = 0; x <= wwcn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
212
{
213
#if 0
214
__asm__ (
215
"vld2.16 {d0-d1}, [%[dsrc00]] \n\t"
216
"vld2.16 {d2-d3}, [%[dsrc10]] \n\t"
217
"vld2.16 {d4-d5}, [%[dsrc01]] \n\t"
218
"vld2.16 {d6-d7}, [%[dsrc11]] \n\t"
219
"vmull.s16 q4, d3, %P[viw10] \n\t"
220
"vmull.s16 q5, d0, %P[viw00] \n\t"
221
"vmlal.s16 q4, d7, %P[viw11] \n\t"
222
"vmlal.s16 q5, d4, %P[viw01] \n\t"
223
"vmlal.s16 q4, d1, %P[viw00] \n\t"
224
"vmlal.s16 q5, d2, %P[viw10] \n\t"
225
"vmlal.s16 q4, d5, %P[viw01] \n\t"
226
"vmlal.s16 q5, d6, %P[viw11] \n\t"
227
"vrshrn.s32 d13, q4, %[W_BITS1] \n\t"
228
"vrshrn.s32 d12, q5, %[W_BITS1] \n\t"
229
"vmull.s16 q3, d13, d13 \n\t"
230
"vmull.s16 q4, d12, d12 \n\t"
231
"vmull.s16 q5, d13, d12 \n\t"
232
"vcvt.f32.s32 q3, q3 \n\t"
233
"vcvt.f32.s32 q4, q4 \n\t"
234
"vcvt.f32.s32 q5, q5 \n\t"
235
"vadd.f32 %q[vA22], q3 \n\t"
236
"vadd.f32 %q[vA11], q4 \n\t"
237
"vadd.f32 %q[vA12], q5 \n\t"
238
"vst2.16 {d12-d13}, [%[out]] \n\t"
239
: [vA22] "=w" (vA22),
240
[vA11] "=w" (vA11),
241
[vA12] "=w" (vA12)
242
: "0" (vA22),
243
"1" (vA11),
244
"2" (vA12),
245
[out] "r" (dIptr),
246
[dsrc00] "r" (dsrc),
247
[dsrc10] "r" (dsrc + dstep),
248
[dsrc01] "r" (dsrc + cn2),
249
[dsrc11] "r" (dsrc + dstep + cn2),
250
[viw00] "w" (viw00),
251
[viw10] "w" (viw10),
252
[viw01] "w" (viw01),
253
[viw11] "w" (viw11),
254
[W_BITS1] "I" (W_BITS1)
255
: "d0","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d11","d12","d13"
256
);
257
#else
258
int16x4x2_t vdsrc00 = vld2_s16(dsrc);
259
int16x4x2_t vdsrc10 = vld2_s16(dsrc + dstep);
260
int16x4x2_t vdsrc01 = vld2_s16(dsrc + cn2);
261
int16x4x2_t vdsrc11 = vld2_s16(dsrc + dstep + cn2);
262
263
int32x4_t vsumy = vmull_s16(vdsrc10.val[1], viw10);
264
int32x4_t vsumx = vmull_s16(vdsrc00.val[0], viw00);
265
266
vsumy = vmlal_s16(vsumy, vdsrc11.val[1], viw11);
267
vsumx = vmlal_s16(vsumx, vdsrc01.val[0], viw01);
268
269
vsumy = vmlal_s16(vsumy, vdsrc00.val[1], viw00);
270
vsumx = vmlal_s16(vsumx, vdsrc10.val[0], viw10);
271
272
vsumy = vmlal_s16(vsumy, vdsrc01.val[1], viw01);
273
vsumx = vmlal_s16(vsumx, vdsrc11.val[0], viw11);
274
275
int16x4_t vsumny = vrshrn_n_s32(vsumy, W_BITS1);
276
int16x4_t vsumnx = vrshrn_n_s32(vsumx, W_BITS1);
277
278
int32x4_t va22i = vmull_s16(vsumny, vsumny);
279
int32x4_t va11i = vmull_s16(vsumnx, vsumnx);
280
int32x4_t va12i = vmull_s16(vsumnx, vsumny);
281
282
float32x4_t va22f = vcvtq_f32_s32(va22i);
283
float32x4_t va11f = vcvtq_f32_s32(va11i);
284
float32x4_t va12f = vcvtq_f32_s32(va12i);
285
286
vA22 = vaddq_f32(vA22, va22f);
287
vA11 = vaddq_f32(vA11, va11f);
288
vA12 = vaddq_f32(vA12, va12f);
289
290
int16x4x2_t vsum;
291
vsum.val[0] = vsumnx;
292
vsum.val[1] = vsumny;
293
vst2_s16(dIptr, vsum);
294
#endif
295
}
296
297
for( ; x < wwcn; x++, dsrc += 2, dIptr += 2 )
298
{
299
s32 ival = CV_DESCALE(src[x]*iw00 + src[x+cn]*iw01 +
300
src[x+prevStride]*iw10 + src[x+prevStride+cn]*iw11, W_BITS1-5);
301
s32 ixval = CV_DESCALE(dsrc[0]*iw00 + dsrc[cn2]*iw01 +
302
dsrc[dstep]*iw10 + dsrc[dstep+cn2]*iw11, W_BITS1);
303
s32 iyval = CV_DESCALE(dsrc[1]*iw00 + dsrc[cn2+1]*iw01 + dsrc[dstep+1]*iw10 +
304
dsrc[dstep+cn2+1]*iw11, W_BITS1);
305
Iptr[x] = (s16)ival;
306
dIptr[0] = (s16)ixval;
307
dIptr[1] = (s16)iyval;
308
309
A11 += (f32)(ixval*ixval);
310
A12 += (f32)(ixval*iyval);
311
A22 += (f32)(iyval*iyval);
312
}
313
}
314
315
f32 A11buf[2], A12buf[2], A22buf[2];
316
vst1_f32(A11buf, vadd_f32(vget_low_f32(vA11), vget_high_f32(vA11)));
317
vst1_f32(A12buf, vadd_f32(vget_low_f32(vA12), vget_high_f32(vA12)));
318
vst1_f32(A22buf, vadd_f32(vget_low_f32(vA22), vget_high_f32(vA22)));
319
A11 += A11buf[0] + A11buf[1];
320
A12 += A12buf[0] + A12buf[1];
321
A22 += A22buf[0] + A22buf[1];
322
323
A11 *= FLT_SCALE;
324
A12 *= FLT_SCALE;
325
A22 *= FLT_SCALE;
326
327
f32 D = A11*A22 - A12*A12;
328
f32 minEig = (A22 + A11 - std::sqrt((A11-A22)*(A11-A22) +
329
4.f*A12*A12))/(2*winSize.width*winSize.height);
330
331
if( err && getMinEigenVals )
332
err[ptidx] = (f32)minEig;
333
334
if( minEig < minEigThreshold || D < FLT_EPSILON )
335
{
336
if( level == 0 && status )
337
status[ptidx] = false;
338
continue;
339
}
340
341
D = 1.f/D;
342
343
nextPtX -= halfWinX;
344
nextPtY -= halfWinY;
345
f32 prevDeltaX = 0;
346
f32 prevDeltaY = 0;
347
348
for(u32 j = 0; j < terminationCount; j++ )
349
{
350
inextPtX = floor(nextPtX);
351
inextPtY = floor(nextPtY);
352
353
if( inextPtX < -(s32)winSize.width || inextPtX >= (s32)size.width ||
354
inextPtY < -(s32)winSize.height || inextPtY >= (s32)size.height )
355
{
356
if( level == 0 && status )
357
status[ptidx] = false;
358
break;
359
}
360
361
a = nextPtX - inextPtX;
362
b = nextPtY - inextPtY;
363
iw00 = round((1.f - a)*(1.f - b)*(1 << W_BITS));
364
iw01 = round(a*(1.f - b)*(1 << W_BITS));
365
iw10 = round((1.f - a)*b*(1 << W_BITS));
366
iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
367
f32 b1 = 0, b2 = 0;
368
369
viw00 = vmov_n_s16((s16)iw00);
370
viw01 = vmov_n_s16((s16)iw01);
371
viw10 = vmov_n_s16((s16)iw10);
372
viw11 = vmov_n_s16((s16)iw11);
373
374
float32x4_t vb1 = vmovq_n_f32(0);
375
float32x4_t vb2 = vmovq_n_f32(0);
376
377
for(s32 y = 0; y < (s32)winSize.height; y++ )
378
{
379
const u8* Jptr = nextData + nextStride*(y + inextPtY) + inextPtX*cn;
380
const s16* Iptr = IWinBuf + y*IWinBufStride;
381
const s16* dIptr = derivIWinBuf + y*derivIWinBufStride;
382
383
x = 0;
384
385
internal::prefetch(Jptr, nextStride * 2);
386
internal::prefetch(Iptr, IWinBufStride/2);
387
internal::prefetch(dIptr, derivIWinBufStride/2);
388
389
for( ; x <= wwcn - 8; x += 8, dIptr += 8*2 )
390
{
391
uint8x8_t vj00 = vld1_u8(Jptr + x);
392
uint8x8_t vj10 = vld1_u8(Jptr + x + nextStride);
393
uint8x8_t vj01 = vld1_u8(Jptr + x + cn);
394
uint8x8_t vj11 = vld1_u8(Jptr + x + nextStride + cn);
395
int16x8_t vI = vld1q_s16(Iptr + x);
396
int16x8x2_t vDerivI = vld2q_s16(dIptr);
397
398
int16x8_t vs00 = vreinterpretq_s16_u16(vmovl_u8(vj00));
399
int16x8_t vs10 = vreinterpretq_s16_u16(vmovl_u8(vj10));
400
int16x8_t vs01 = vreinterpretq_s16_u16(vmovl_u8(vj01));
401
int16x8_t vs11 = vreinterpretq_s16_u16(vmovl_u8(vj11));
402
403
int32x4_t vsuml = vmull_s16(vget_low_s16(vs00), viw00);
404
int32x4_t vsumh = vmull_s16(vget_high_s16(vs10), viw10);
405
406
vsuml = vmlal_s16(vsuml, vget_low_s16(vs01), viw01);
407
vsumh = vmlal_s16(vsumh, vget_high_s16(vs11), viw11);
408
409
vsuml = vmlal_s16(vsuml, vget_low_s16(vs10), viw10);
410
vsumh = vmlal_s16(vsumh, vget_high_s16(vs00), viw00);
411
412
vsuml = vmlal_s16(vsuml, vget_low_s16(vs11), viw11);
413
vsumh = vmlal_s16(vsumh, vget_high_s16(vs01), viw01);
414
415
int16x4_t vsumnl = vrshrn_n_s32(vsuml, W_BITS1-5);
416
int16x4_t vsumnh = vrshrn_n_s32(vsumh, W_BITS1-5);
417
418
int16x8_t diff = vqsubq_s16(vcombine_s16(vsumnl, vsumnh), vI);
419
420
int32x4_t vb1l = vmull_s16(vget_low_s16(diff), vget_low_s16(vDerivI.val[0]));
421
int32x4_t vb2h = vmull_s16(vget_high_s16(diff), vget_high_s16(vDerivI.val[1]));
422
int32x4_t vb1i = vmlal_s16(vb1l, vget_high_s16(diff), vget_high_s16(vDerivI.val[0]));
423
int32x4_t vb2i = vmlal_s16(vb2h, vget_low_s16(diff), vget_low_s16(vDerivI.val[1]));
424
425
float32x4_t vb1f = vcvtq_f32_s32(vb1i);
426
float32x4_t vb2f = vcvtq_f32_s32(vb2i);
427
428
vb1 = vaddq_f32(vb1, vb1f);
429
vb2 = vaddq_f32(vb2, vb2f);
430
}
431
432
for( ; x < wwcn; x++, dIptr += 2 )
433
{
434
s32 diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
435
Jptr[x+nextStride]*iw10 + Jptr[x+nextStride+cn]*iw11,
436
W_BITS1-5) - Iptr[x];
437
b1 += (f32)(diff*dIptr[0]);
438
b2 += (f32)(diff*dIptr[1]);
439
}
440
}
441
442
f32 bbuf[2];
443
float32x2_t vb = vpadd_f32(vadd_f32(vget_low_f32(vb1), vget_high_f32(vb1)), vadd_f32(vget_low_f32(vb2), vget_high_f32(vb2)));
444
vst1_f32(bbuf, vb);
445
b1 += bbuf[0];
446
b2 += bbuf[1];
447
448
b1 *= FLT_SCALE;
449
b2 *= FLT_SCALE;
450
451
f32 deltaX = (f32)((A12*b2 - A22*b1) * D);
452
f32 deltaY = (f32)((A12*b1 - A11*b2) * D);
453
454
nextPtX += deltaX;
455
nextPtY += deltaY;
456
nextPts[ptref+0] = nextPtX + halfWinX;
457
nextPts[ptref+1] = nextPtY + halfWinY;
458
459
if( ((double)deltaX*deltaX + (double)deltaY*deltaY) <= terminationEpsilon )
460
break;
461
462
if( j > 0 && std::abs(deltaX + prevDeltaX) < 0.01 &&
463
std::abs(deltaY + prevDeltaY) < 0.01 )
464
{
465
nextPts[ptref+0] -= deltaX*0.5f;
466
nextPts[ptref+1] -= deltaY*0.5f;
467
break;
468
}
469
prevDeltaX = deltaX;
470
prevDeltaY = deltaY;
471
}
472
473
if( status && status[ptidx] && err && level == 0 && !getMinEigenVals )
474
{
475
f32 nextPointX = nextPts[ptref+0] - halfWinX;
476
f32 nextPointY = nextPts[ptref+1] - halfWinY;
477
478
s32 inextPointX = floor(nextPointX);
479
s32 inextPointY = floor(nextPointY);
480
481
if( inextPointX < -(s32)winSize.width || inextPointX >= (s32)size.width ||
482
inextPointY < -(s32)winSize.height || inextPointY >= (s32)size.height )
483
{
484
if( status )
485
status[ptidx] = false;
486
continue;
487
}
488
489
f32 aa = nextPointX - inextPointX;
490
f32 bb = nextPointY - inextPointY;
491
iw00 = round((1.f - aa)*(1.f - bb)*(1 << W_BITS));
492
iw01 = round(aa*(1.f - bb)*(1 << W_BITS));
493
iw10 = round((1.f - aa)*bb*(1 << W_BITS));
494
iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
495
f32 errval = 0.f;
496
497
for(s32 y = 0; y < (s32)winSize.height; y++ )
498
{
499
const u8* Jptr = nextData + nextStride*(y + inextPointY) + inextPointX*cn;
500
const s16* Iptr = IWinBuf + y*IWinBufStride;
501
502
for( x = 0; x < wwcn; x++ )
503
{
504
s32 diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
505
Jptr[x+nextStride]*iw10 + Jptr[x+nextStride+cn]*iw11,
506
W_BITS1-5) - Iptr[x];
507
errval += std::abs((f32)diff);
508
}
509
}
510
err[ptidx] = errval / (32*wwcn*winSize.height);
511
}
512
}
513
#else
514
(void)size;
515
(void)cn;
516
(void)prevData;
517
(void)prevStride;
518
(void)prevDerivData;
519
(void)prevDerivStride;
520
(void)nextData;
521
(void)nextStride;
522
(void)prevPts;
523
(void)nextPts;
524
(void)status;
525
(void)err;
526
(void)winSize;
527
(void)terminationCount;
528
(void)terminationEpsilon;
529
(void)level;
530
(void)maxLevel;
531
(void)useInitialFlow;
532
(void)getMinEigenVals;
533
(void)minEigThreshold;
534
(void)ptCount;
535
#endif
536
}
537
538
}//CAROTENE_NS
539
540
541