CoCalc -- opticalflow.cpp

GitHub Repository: Tetragramm/opencv
Path: blob/master/3rdparty/carotene/src/opticalflow.cpp
¹⁶³³⁷ views
1
/*
2
 * By downloading, copying, installing or using the software you agree to this license.
3
 * If you do not agree to this license, do not download, install,
4
 * copy or use the software.
5
 *
6
 *
7
 *                           License Agreement
8
 *                For Open Source Computer Vision Library
9
 *                        (3-clause BSD License)
10
 *
11
 * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
12
 * Third party copyrights are property of their respective owners.
13
 *
14
 * Redistribution and use in source and binary forms, with or without modification,
15
 * are permitted provided that the following conditions are met:
16
 *
17
 *   * Redistributions of source code must retain the above copyright notice,
18
 *     this list of conditions and the following disclaimer.
19
 *
20
 *   * Redistributions in binary form must reproduce the above copyright notice,
21
 *     this list of conditions and the following disclaimer in the documentation
22
 *     and/or other materials provided with the distribution.
23
 *
24
 *   * Neither the names of the copyright holders nor the names of the contributors
25
 *     may be used to endorse or promote products derived from this software
26
 *     without specific prior written permission.
27
 *
28
 * This software is provided by the copyright holders and contributors "as is" and
29
 * any express or implied warranties, including, but not limited to, the implied
30
 * warranties of merchantability and fitness for a particular purpose are disclaimed.
31
 * In no event shall copyright holders or contributors be liable for any direct,
32
 * indirect, incidental, special, exemplary, or consequential damages
33
 * (including, but not limited to, procurement of substitute goods or services;
34
 * loss of use, data, or profits; or business interruption) however caused
35
 * and on any theory of liability, whether in contract, strict liability,
36
 * or tort (including negligence or otherwise) arising in any way out of
37
 * the use of this software, even if advised of the possibility of such damage.
38
 */
39

40
#include "common.hpp"
41
#include "saturate_cast.hpp"
42
#include <vector>
43
#include <float.h> // For FLT_EPSILON
44

45
namespace CAROTENE_NS {
46

47
#define CV_DESCALE(x,n)     (((x) + (1 << ((n)-1))) >> (n))
48

49
/*
50
 *        Pyramidal Lucas-Kanade Optical Flow level processing
51
 */
52
void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
53
                       const u8 *prevData, ptrdiff_t prevStride,
54
                       const s16 *prevDerivData, ptrdiff_t prevDerivStride,
55
                       const u8 *nextData, ptrdiff_t nextStride,
56
                       u32 ptCount,
57
                       const f32 *prevPts, f32 *nextPts,
58
                       u8 *status, f32 *err,
59
                       const Size2D &winSize,
60
                       u32 terminationCount, f64 terminationEpsilon,
61
                       u32 level, u32 maxLevel, bool useInitialFlow, bool getMinEigenVals,
62
                       f32 minEigThreshold)
63
{
64
    internal::assertSupportedConfiguration();
65
#ifdef CAROTENE_NEON
66
    f32 halfWinX = (winSize.width-1)*0.5f, halfWinY = (winSize.height-1)*0.5f;
67
    s32 cn2 = cn*2;
68

69
    std::vector<s16> _buf(winSize.total()*(cn + cn2));
70
    s16* IWinBuf = &_buf[0];
71
    s32  IWinBufStride = winSize.width*cn;
72
    s16* derivIWinBuf = &_buf[winSize.total()*cn];
73
    s32  derivIWinBufStride = winSize.width*cn2;
74

75
    for( u32 ptidx = 0; ptidx < ptCount; ptidx++ )
76
    {
77
        f32 levscale = (1./(1 << level));
78
        u32 ptref = ptidx << 1;
79
        f32 prevPtX = prevPts[ptref+0]*levscale;
80
        f32 prevPtY = prevPts[ptref+1]*levscale;
81
        f32 nextPtX;
82
        f32 nextPtY;
83
        if( level == maxLevel )
84
        {
85
            if( useInitialFlow )
86
            {
87
                nextPtX = nextPts[ptref+0]*levscale;
88
                nextPtY = nextPts[ptref+1]*levscale;
89
            }
90
            else
91
            {
92
                nextPtX = prevPtX;
93
                nextPtY = prevPtY;
94
            }
95
        }
96
        else
97
        {
98
            nextPtX = nextPts[ptref+0]*2.f;
99
            nextPtY = nextPts[ptref+1]*2.f;
100
        }
101
        nextPts[ptref+0] = nextPtX;
102
        nextPts[ptref+1] = nextPtY;
103

104
        s32 iprevPtX, iprevPtY;
105
        s32 inextPtX, inextPtY;
106
        prevPtX -= halfWinX;
107
        prevPtY -= halfWinY;
108
        iprevPtX = floor(prevPtX);
109
        iprevPtY = floor(prevPtY);
110

111
        if( iprevPtX < -(s32)winSize.width || iprevPtX >= (s32)size.width ||
112
            iprevPtY < -(s32)winSize.height || iprevPtY >= (s32)size.height )
113
        {
114
            if( level == 0 )
115
            {
116
                if( status )
117
                    status[ptidx] = false;
118
                if( err )
119
                    err[ptidx] = 0;
120
            }
121
            continue;
122
        }
123

124
        f32 a = prevPtX - iprevPtX;
125
        f32 b = prevPtY - iprevPtY;
126
        const s32 W_BITS = 14, W_BITS1 = 14;
127
        const f32 FLT_SCALE = 1.f/(1 << 20);
128
        s32 iw00 = round((1.f - a)*(1.f - b)*(1 << W_BITS));
129
        s32 iw01 = round(a*(1.f - b)*(1 << W_BITS));
130
        s32 iw10 = round((1.f - a)*b*(1 << W_BITS));
131
        s32 iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
132

133
        s32 dstep = prevDerivStride/sizeof(s16);
134
        f32 A11 = 0, A12 = 0, A22 = 0;
135

136
        int16x4_t viw00 = vmov_n_s16((s16)iw00);
137
        int16x4_t viw01 = vmov_n_s16((s16)iw01);
138
        int16x4_t viw10 = vmov_n_s16((s16)iw10);
139
        int16x4_t viw11 = vmov_n_s16((s16)iw11);
140

141
        float32x4_t vA11 = vmovq_n_f32(0);
142
        float32x4_t vA12 = vmovq_n_f32(0);
143
        float32x4_t vA22 = vmovq_n_f32(0);
144

145
        s32 wwcn = winSize.width*cn;
146

147
        // extract the patch from the first image, compute covariation matrix of derivatives
148
        s32 x = 0;
149
        for(s32 y = 0; y < (s32)winSize.height; y++ )
150
        {
151
            const u8* src = prevData + prevStride*(y + iprevPtY) + iprevPtX*cn;
152
            const s16* dsrc = prevDerivData + dstep*(y + iprevPtY) + iprevPtX*cn2;
153

154
            s16* Iptr = IWinBuf + y*IWinBufStride;
155
            s16* dIptr = derivIWinBuf + y*derivIWinBufStride;
156

157
            internal::prefetch(src + x + prevStride * 2, 0);
158
            for(x = 0; x <= wwcn - 8; x += 8)
159
            {
160
                uint8x8_t vsrc00 = vld1_u8(src + x);
161
                uint8x8_t vsrc10 = vld1_u8(src + x + prevStride);
162
                uint8x8_t vsrc01 = vld1_u8(src + x + cn);
163
                uint8x8_t vsrc11 = vld1_u8(src + x + prevStride + cn);
164

165
                int16x8_t vs00 = vreinterpretq_s16_u16(vmovl_u8(vsrc00));
166
                int16x8_t vs10 = vreinterpretq_s16_u16(vmovl_u8(vsrc10));
167
                int16x8_t vs01 = vreinterpretq_s16_u16(vmovl_u8(vsrc01));
168
                int16x8_t vs11 = vreinterpretq_s16_u16(vmovl_u8(vsrc11));
169

170
                int32x4_t vsuml = vmull_s16(vget_low_s16(vs00), viw00);
171
                int32x4_t vsumh = vmull_s16(vget_high_s16(vs10), viw10);
172

173
                vsuml = vmlal_s16(vsuml, vget_low_s16(vs01), viw01);
174
                vsumh = vmlal_s16(vsumh, vget_high_s16(vs11), viw11);
175

176
                vsuml = vmlal_s16(vsuml, vget_low_s16(vs10), viw10);
177
                vsumh = vmlal_s16(vsumh, vget_high_s16(vs00), viw00);
178

179
                vsuml = vmlal_s16(vsuml, vget_low_s16(vs11), viw11);
180
                vsumh = vmlal_s16(vsumh, vget_high_s16(vs01), viw01);
181

182
                int16x4_t vsumnl = vrshrn_n_s32(vsuml, W_BITS1-5);
183
                int16x4_t vsumnh = vrshrn_n_s32(vsumh, W_BITS1-5);
184

185
                vst1q_s16(Iptr + x, vcombine_s16(vsumnl, vsumnh));
186
            }
187
            for(; x <= wwcn - 4; x += 4)
188
            {
189
                uint8x8_t vsrc00 = vld1_u8(src + x);
190
                uint8x8_t vsrc10 = vld1_u8(src + x + prevStride);
191
                uint8x8_t vsrc01 = vld1_u8(src + x + cn);
192
                uint8x8_t vsrc11 = vld1_u8(src + x + prevStride + cn);
193

194
                int16x4_t vs00 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc00)));
195
                int16x4_t vs10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc10)));
196
                int16x4_t vs01 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc01)));
197
                int16x4_t vs11 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc11)));
198

199
                int32x4_t vsuml1 = vmull_s16(vs00, viw00);
200
                int32x4_t vsuml2 = vmull_s16(vs01, viw01);
201
                vsuml1 = vmlal_s16(vsuml1, vs10, viw10);
202
                vsuml2 = vmlal_s16(vsuml2, vs11, viw11);
203
                int32x4_t vsuml = vaddq_s32(vsuml1, vsuml2);
204

205
                int16x4_t vsumnl = vrshrn_n_s32(vsuml, W_BITS1-5);
206

207
                vst1_s16(Iptr + x, vsumnl);
208
            }
209

210
            internal::prefetch(dsrc + dstep * 2, 0);
211
            for(x = 0; x <= wwcn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
212
            {
213
#if 0
214
                __asm__ (
215
                    "vld2.16 {d0-d1}, [%[dsrc00]]                         \n\t"
216
                    "vld2.16 {d2-d3}, [%[dsrc10]]                         \n\t"
217
                    "vld2.16 {d4-d5}, [%[dsrc01]]                         \n\t"
218
                    "vld2.16 {d6-d7}, [%[dsrc11]]                         \n\t"
219
                    "vmull.s16 q4, d3, %P[viw10]                           \n\t"
220
                    "vmull.s16 q5, d0, %P[viw00]                           \n\t"
221
                    "vmlal.s16 q4, d7, %P[viw11]                           \n\t"
222
                    "vmlal.s16 q5, d4, %P[viw01]                           \n\t"
223
                    "vmlal.s16 q4, d1, %P[viw00]                           \n\t"
224
                    "vmlal.s16 q5, d2, %P[viw10]                           \n\t"
225
                    "vmlal.s16 q4, d5, %P[viw01]                           \n\t"
226
                    "vmlal.s16 q5, d6, %P[viw11]                            \n\t"
227
                    "vrshrn.s32 d13, q4, %[W_BITS1]                       \n\t"
228
                    "vrshrn.s32 d12, q5, %[W_BITS1]                       \n\t"
229
                    "vmull.s16 q3, d13, d13                               \n\t"
230
                    "vmull.s16 q4, d12, d12                               \n\t"
231
                    "vmull.s16 q5, d13, d12                               \n\t"
232
                    "vcvt.f32.s32 q3, q3                                  \n\t"
233
                    "vcvt.f32.s32 q4, q4                                  \n\t"
234
                    "vcvt.f32.s32 q5, q5                                  \n\t"
235
                    "vadd.f32 %q[vA22], q3                                \n\t"
236
                    "vadd.f32 %q[vA11], q4                                \n\t"
237
                    "vadd.f32 %q[vA12], q5                                \n\t"
238
                    "vst2.16 {d12-d13}, [%[out]]                          \n\t"
239
                    : [vA22] "=w" (vA22),
240
                      [vA11] "=w" (vA11),
241
                      [vA12] "=w" (vA12)
242
                    : "0" (vA22),
243
                      "1" (vA11),
244
                      "2" (vA12),
245
                      [out] "r" (dIptr),
246
                      [dsrc00] "r" (dsrc),
247
                      [dsrc10] "r" (dsrc + dstep),
248
                      [dsrc01] "r" (dsrc + cn2),
249
                      [dsrc11] "r" (dsrc + dstep + cn2),
250
                      [viw00] "w" (viw00),
251
                      [viw10] "w" (viw10),
252
                      [viw01] "w" (viw01),
253
                      [viw11] "w" (viw11),
254
                      [W_BITS1] "I" (W_BITS1)
255
                    : "d0","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d11","d12","d13"
256
                );
257
#else
258
                int16x4x2_t vdsrc00 = vld2_s16(dsrc);
259
                int16x4x2_t vdsrc10 = vld2_s16(dsrc + dstep);
260
                int16x4x2_t vdsrc01 = vld2_s16(dsrc + cn2);
261
                int16x4x2_t vdsrc11 = vld2_s16(dsrc + dstep + cn2);
262

263
                int32x4_t vsumy = vmull_s16(vdsrc10.val[1], viw10);
264
                int32x4_t vsumx = vmull_s16(vdsrc00.val[0], viw00);
265

266
                vsumy = vmlal_s16(vsumy, vdsrc11.val[1], viw11);
267
                vsumx = vmlal_s16(vsumx, vdsrc01.val[0], viw01);
268

269
                vsumy = vmlal_s16(vsumy, vdsrc00.val[1], viw00);
270
                vsumx = vmlal_s16(vsumx, vdsrc10.val[0], viw10);
271

272
                vsumy = vmlal_s16(vsumy, vdsrc01.val[1], viw01);
273
                vsumx = vmlal_s16(vsumx, vdsrc11.val[0], viw11);
274

275
                int16x4_t vsumny = vrshrn_n_s32(vsumy, W_BITS1);
276
                int16x4_t vsumnx = vrshrn_n_s32(vsumx, W_BITS1);
277

278
                int32x4_t va22i = vmull_s16(vsumny, vsumny);
279
                int32x4_t va11i = vmull_s16(vsumnx, vsumnx);
280
                int32x4_t va12i = vmull_s16(vsumnx, vsumny);
281

282
                float32x4_t va22f = vcvtq_f32_s32(va22i);
283
                float32x4_t va11f = vcvtq_f32_s32(va11i);
284
                float32x4_t va12f = vcvtq_f32_s32(va12i);
285

286
                vA22 = vaddq_f32(vA22, va22f);
287
                vA11 = vaddq_f32(vA11, va11f);
288
                vA12 = vaddq_f32(vA12, va12f);
289

290
                int16x4x2_t vsum;
291
                vsum.val[0] = vsumnx;
292
                vsum.val[1] = vsumny;
293
                vst2_s16(dIptr, vsum);
294
#endif
295
            }
296

297
            for( ; x < wwcn; x++, dsrc += 2, dIptr += 2 )
298
            {
299
                s32 ival = CV_DESCALE(src[x]*iw00 + src[x+cn]*iw01 +
300
                                      src[x+prevStride]*iw10 + src[x+prevStride+cn]*iw11, W_BITS1-5);
301
                s32 ixval = CV_DESCALE(dsrc[0]*iw00 + dsrc[cn2]*iw01 +
302
                                       dsrc[dstep]*iw10 + dsrc[dstep+cn2]*iw11, W_BITS1);
303
                s32 iyval = CV_DESCALE(dsrc[1]*iw00 + dsrc[cn2+1]*iw01 + dsrc[dstep+1]*iw10 +
304
                                       dsrc[dstep+cn2+1]*iw11, W_BITS1);
305
                Iptr[x] = (s16)ival;
306
                dIptr[0] = (s16)ixval;
307
                dIptr[1] = (s16)iyval;
308

309
                A11 += (f32)(ixval*ixval);
310
                A12 += (f32)(ixval*iyval);
311
                A22 += (f32)(iyval*iyval);
312
            }
313
        }
314

315
        f32 A11buf[2], A12buf[2], A22buf[2];
316
        vst1_f32(A11buf, vadd_f32(vget_low_f32(vA11), vget_high_f32(vA11)));
317
        vst1_f32(A12buf, vadd_f32(vget_low_f32(vA12), vget_high_f32(vA12)));
318
        vst1_f32(A22buf, vadd_f32(vget_low_f32(vA22), vget_high_f32(vA22)));
319
        A11 += A11buf[0] + A11buf[1];
320
        A12 += A12buf[0] + A12buf[1];
321
        A22 += A22buf[0] + A22buf[1];
322

323
        A11 *= FLT_SCALE;
324
        A12 *= FLT_SCALE;
325
        A22 *= FLT_SCALE;
326

327
        f32 D = A11*A22 - A12*A12;
328
        f32 minEig = (A22 + A11 - std::sqrt((A11-A22)*(A11-A22) +
329
                        4.f*A12*A12))/(2*winSize.width*winSize.height);
330

331
        if( err && getMinEigenVals )
332
            err[ptidx] = (f32)minEig;
333

334
        if( minEig < minEigThreshold || D < FLT_EPSILON )
335
        {
336
            if( level == 0 && status )
337
                status[ptidx] = false;
338
            continue;
339
        }
340

341
        D = 1.f/D;
342

343
        nextPtX -= halfWinX;
344
        nextPtY -= halfWinY;
345
        f32 prevDeltaX = 0;
346
        f32 prevDeltaY = 0;
347

348
        for(u32 j = 0; j < terminationCount; j++ )
349
        {
350
            inextPtX = floor(nextPtX);
351
            inextPtY = floor(nextPtY);
352

353
            if( inextPtX < -(s32)winSize.width || inextPtX >= (s32)size.width ||
354
               inextPtY < -(s32)winSize.height || inextPtY >= (s32)size.height )
355
            {
356
                if( level == 0 && status )
357
                    status[ptidx] = false;
358
                break;
359
            }
360

361
            a = nextPtX - inextPtX;
362
            b = nextPtY - inextPtY;
363
            iw00 = round((1.f - a)*(1.f - b)*(1 << W_BITS));
364
            iw01 = round(a*(1.f - b)*(1 << W_BITS));
365
            iw10 = round((1.f - a)*b*(1 << W_BITS));
366
            iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
367
            f32 b1 = 0, b2 = 0;
368

369
            viw00 = vmov_n_s16((s16)iw00);
370
            viw01 = vmov_n_s16((s16)iw01);
371
            viw10 = vmov_n_s16((s16)iw10);
372
            viw11 = vmov_n_s16((s16)iw11);
373

374
            float32x4_t vb1 = vmovq_n_f32(0);
375
            float32x4_t vb2 = vmovq_n_f32(0);
376

377
            for(s32 y = 0; y < (s32)winSize.height; y++ )
378
            {
379
                const u8* Jptr = nextData + nextStride*(y + inextPtY) + inextPtX*cn;
380
                const s16* Iptr = IWinBuf + y*IWinBufStride;
381
                const s16* dIptr = derivIWinBuf + y*derivIWinBufStride;
382

383
                x = 0;
384

385
                internal::prefetch(Jptr, nextStride * 2);
386
                internal::prefetch(Iptr, IWinBufStride/2);
387
                internal::prefetch(dIptr, derivIWinBufStride/2);
388

389
                for( ; x <= wwcn - 8; x += 8, dIptr += 8*2 )
390
                {
391
                    uint8x8_t vj00 = vld1_u8(Jptr + x);
392
                    uint8x8_t vj10 = vld1_u8(Jptr + x + nextStride);
393
                    uint8x8_t vj01 = vld1_u8(Jptr + x + cn);
394
                    uint8x8_t vj11 = vld1_u8(Jptr + x + nextStride + cn);
395
                    int16x8_t vI = vld1q_s16(Iptr + x);
396
                    int16x8x2_t vDerivI = vld2q_s16(dIptr);
397

398
                    int16x8_t vs00 = vreinterpretq_s16_u16(vmovl_u8(vj00));
399
                    int16x8_t vs10 = vreinterpretq_s16_u16(vmovl_u8(vj10));
400
                    int16x8_t vs01 = vreinterpretq_s16_u16(vmovl_u8(vj01));
401
                    int16x8_t vs11 = vreinterpretq_s16_u16(vmovl_u8(vj11));
402

403
                    int32x4_t vsuml = vmull_s16(vget_low_s16(vs00), viw00);
404
                    int32x4_t vsumh = vmull_s16(vget_high_s16(vs10), viw10);
405

406
                    vsuml = vmlal_s16(vsuml, vget_low_s16(vs01), viw01);
407
                    vsumh = vmlal_s16(vsumh, vget_high_s16(vs11), viw11);
408

409
                    vsuml = vmlal_s16(vsuml, vget_low_s16(vs10), viw10);
410
                    vsumh = vmlal_s16(vsumh, vget_high_s16(vs00), viw00);
411

412
                    vsuml = vmlal_s16(vsuml, vget_low_s16(vs11), viw11);
413
                    vsumh = vmlal_s16(vsumh, vget_high_s16(vs01), viw01);
414

415
                    int16x4_t vsumnl = vrshrn_n_s32(vsuml, W_BITS1-5);
416
                    int16x4_t vsumnh = vrshrn_n_s32(vsumh, W_BITS1-5);
417

418
                    int16x8_t diff = vqsubq_s16(vcombine_s16(vsumnl, vsumnh), vI);
419

420
                    int32x4_t vb1l = vmull_s16(vget_low_s16(diff), vget_low_s16(vDerivI.val[0]));
421
                    int32x4_t vb2h = vmull_s16(vget_high_s16(diff), vget_high_s16(vDerivI.val[1]));
422
                    int32x4_t vb1i = vmlal_s16(vb1l, vget_high_s16(diff), vget_high_s16(vDerivI.val[0]));
423
                    int32x4_t vb2i = vmlal_s16(vb2h, vget_low_s16(diff), vget_low_s16(vDerivI.val[1]));
424

425
                    float32x4_t vb1f = vcvtq_f32_s32(vb1i);
426
                    float32x4_t vb2f = vcvtq_f32_s32(vb2i);
427

428
                    vb1 = vaddq_f32(vb1, vb1f);
429
                    vb2 = vaddq_f32(vb2, vb2f);
430
                }
431

432
                for( ; x < wwcn; x++, dIptr += 2 )
433
                {
434
                    s32 diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
435
                                          Jptr[x+nextStride]*iw10 + Jptr[x+nextStride+cn]*iw11,
436
                                          W_BITS1-5) - Iptr[x];
437
                    b1 += (f32)(diff*dIptr[0]);
438
                    b2 += (f32)(diff*dIptr[1]);
439
                }
440
            }
441

442
            f32 bbuf[2];
443
            float32x2_t vb = vpadd_f32(vadd_f32(vget_low_f32(vb1), vget_high_f32(vb1)), vadd_f32(vget_low_f32(vb2), vget_high_f32(vb2)));
444
            vst1_f32(bbuf, vb);
445
            b1 += bbuf[0];
446
            b2 += bbuf[1];
447

448
            b1 *= FLT_SCALE;
449
            b2 *= FLT_SCALE;
450

451
            f32 deltaX = (f32)((A12*b2 - A22*b1) * D);
452
            f32 deltaY = (f32)((A12*b1 - A11*b2) * D);
453

454
            nextPtX += deltaX;
455
            nextPtY += deltaY;
456
            nextPts[ptref+0] = nextPtX + halfWinX;
457
            nextPts[ptref+1] = nextPtY + halfWinY;
458

459
            if( ((double)deltaX*deltaX + (double)deltaY*deltaY) <= terminationEpsilon )
460
                break;
461

462
            if( j > 0 && std::abs(deltaX + prevDeltaX) < 0.01 &&
463
               std::abs(deltaY + prevDeltaY) < 0.01 )
464
            {
465
                nextPts[ptref+0] -= deltaX*0.5f;
466
                nextPts[ptref+1] -= deltaY*0.5f;
467
                break;
468
            }
469
            prevDeltaX = deltaX;
470
            prevDeltaY = deltaY;
471
        }
472

473
        if( status && status[ptidx] && err && level == 0 && !getMinEigenVals )
474
        {
475
            f32 nextPointX = nextPts[ptref+0] - halfWinX;
476
            f32 nextPointY = nextPts[ptref+1] - halfWinY;
477

478
            s32 inextPointX = floor(nextPointX);
479
            s32 inextPointY = floor(nextPointY);
480

481
            if( inextPointX < -(s32)winSize.width || inextPointX >= (s32)size.width ||
482
                inextPointY < -(s32)winSize.height || inextPointY >= (s32)size.height )
483
            {
484
                if( status )
485
                    status[ptidx] = false;
486
                continue;
487
            }
488

489
            f32 aa = nextPointX - inextPointX;
490
            f32 bb = nextPointY - inextPointY;
491
            iw00 = round((1.f - aa)*(1.f - bb)*(1 << W_BITS));
492
            iw01 = round(aa*(1.f - bb)*(1 << W_BITS));
493
            iw10 = round((1.f - aa)*bb*(1 << W_BITS));
494
            iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
495
            f32 errval = 0.f;
496

497
            for(s32 y = 0; y < (s32)winSize.height; y++ )
498
            {
499
                const u8* Jptr = nextData + nextStride*(y + inextPointY) + inextPointX*cn;
500
                const s16* Iptr = IWinBuf + y*IWinBufStride;
501

502
                for( x = 0; x < wwcn; x++ )
503
                {
504
                    s32 diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
505
                                          Jptr[x+nextStride]*iw10 + Jptr[x+nextStride+cn]*iw11,
506
                                          W_BITS1-5) - Iptr[x];
507
                    errval += std::abs((f32)diff);
508
                }
509
            }
510
            err[ptidx] = errval / (32*wwcn*winSize.height);
511
        }
512
    }
513
#else
514
    (void)size;
515
    (void)cn;
516
    (void)prevData;
517
    (void)prevStride;
518
    (void)prevDerivData;
519
    (void)prevDerivStride;
520
    (void)nextData;
521
    (void)nextStride;
522
    (void)prevPts;
523
    (void)nextPts;
524
    (void)status;
525
    (void)err;
526
    (void)winSize;
527
    (void)terminationCount;
528
    (void)terminationEpsilon;
529
    (void)level;
530
    (void)maxLevel;
531
    (void)useInitialFlow;
532
    (void)getMinEigenVals;
533
    (void)minEigThreshold;
534
    (void)ptCount;
535
#endif
536
}
537

538
}//CAROTENE_NS
539

540

541
Product

Resources

Company