Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/calib3d/src/stereobm.cpp
16344 views
1
//M*//////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000, Intel Corporation, all rights reserved.
14
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
// * Redistribution's of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
22
//
23
// * Redistribution's in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
26
//
27
// * The name of the copyright holders may not be used to endorse or promote products
28
// derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42
43
/****************************************************************************************\
44
* Very fast SAD-based (Sum-of-Absolute-Diffrences) stereo correspondence algorithm. *
45
* Contributed by Kurt Konolige *
46
\****************************************************************************************/
47
48
#include "precomp.hpp"
49
#include <stdio.h>
50
#include <limits>
51
#include "opencl_kernels_calib3d.hpp"
52
#include "opencv2/core/hal/intrin.hpp"
53
54
namespace cv
55
{
56
57
struct StereoBMParams
58
{
59
StereoBMParams(int _numDisparities=64, int _SADWindowSize=21)
60
{
61
preFilterType = StereoBM::PREFILTER_XSOBEL;
62
preFilterSize = 9;
63
preFilterCap = 31;
64
SADWindowSize = _SADWindowSize;
65
minDisparity = 0;
66
numDisparities = _numDisparities > 0 ? _numDisparities : 64;
67
textureThreshold = 10;
68
uniquenessRatio = 15;
69
speckleRange = speckleWindowSize = 0;
70
roi1 = roi2 = Rect(0,0,0,0);
71
disp12MaxDiff = -1;
72
dispType = CV_16S;
73
}
74
75
int preFilterType;
76
int preFilterSize;
77
int preFilterCap;
78
int SADWindowSize;
79
int minDisparity;
80
int numDisparities;
81
int textureThreshold;
82
int uniquenessRatio;
83
int speckleRange;
84
int speckleWindowSize;
85
Rect roi1, roi2;
86
int disp12MaxDiff;
87
int dispType;
88
};
89
90
#ifdef HAVE_OPENCL
91
static bool ocl_prefilter_norm(InputArray _input, OutputArray _output, int winsize, int prefilterCap)
92
{
93
ocl::Kernel k("prefilter_norm", ocl::calib3d::stereobm_oclsrc, cv::format("-D WSZ=%d", winsize));
94
if(k.empty())
95
return false;
96
97
int scale_g = winsize*winsize/8, scale_s = (1024 + scale_g)/(scale_g*2);
98
scale_g *= scale_s;
99
100
UMat input = _input.getUMat(), output;
101
_output.create(input.size(), input.type());
102
output = _output.getUMat();
103
104
size_t globalThreads[3] = { (size_t)input.cols, (size_t)input.rows, 1 };
105
106
k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols,
107
prefilterCap, scale_g, scale_s);
108
109
return k.run(2, globalThreads, NULL, false);
110
}
111
#endif
112
113
static void prefilterNorm( const Mat& src, Mat& dst, int winsize, int ftzero, uchar* buf )
114
{
115
int x, y, wsz2 = winsize/2;
116
int* vsum = (int*)alignPtr(buf + (wsz2 + 1)*sizeof(vsum[0]), 32);
117
int scale_g = winsize*winsize/8, scale_s = (1024 + scale_g)/(scale_g*2);
118
const int OFS = 256*5, TABSZ = OFS*2 + 256;
119
uchar tab[TABSZ];
120
const uchar* sptr = src.ptr();
121
int srcstep = (int)src.step;
122
Size size = src.size();
123
124
scale_g *= scale_s;
125
126
for( x = 0; x < TABSZ; x++ )
127
tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero*2 : x - OFS + ftzero);
128
129
for( x = 0; x < size.width; x++ )
130
vsum[x] = (ushort)(sptr[x]*(wsz2 + 2));
131
132
for( y = 1; y < wsz2; y++ )
133
{
134
for( x = 0; x < size.width; x++ )
135
vsum[x] = (ushort)(vsum[x] + sptr[srcstep*y + x]);
136
}
137
138
for( y = 0; y < size.height; y++ )
139
{
140
const uchar* top = sptr + srcstep*MAX(y-wsz2-1,0);
141
const uchar* bottom = sptr + srcstep*MIN(y+wsz2,size.height-1);
142
const uchar* prev = sptr + srcstep*MAX(y-1,0);
143
const uchar* curr = sptr + srcstep*y;
144
const uchar* next = sptr + srcstep*MIN(y+1,size.height-1);
145
uchar* dptr = dst.ptr<uchar>(y);
146
147
for( x = 0; x < size.width; x++ )
148
vsum[x] = (ushort)(vsum[x] + bottom[x] - top[x]);
149
150
for( x = 0; x <= wsz2; x++ )
151
{
152
vsum[-x-1] = vsum[0];
153
vsum[size.width+x] = vsum[size.width-1];
154
}
155
156
int sum = vsum[0]*(wsz2 + 1);
157
for( x = 1; x <= wsz2; x++ )
158
sum += vsum[x];
159
160
int val = ((curr[0]*5 + curr[1] + prev[0] + next[0])*scale_g - sum*scale_s) >> 10;
161
dptr[0] = tab[val + OFS];
162
163
for( x = 1; x < size.width-1; x++ )
164
{
165
sum += vsum[x+wsz2] - vsum[x-wsz2-1];
166
val = ((curr[x]*4 + curr[x-1] + curr[x+1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10;
167
dptr[x] = tab[val + OFS];
168
}
169
170
sum += vsum[x+wsz2] - vsum[x-wsz2-1];
171
val = ((curr[x]*5 + curr[x-1] + prev[x] + next[x])*scale_g - sum*scale_s) >> 10;
172
dptr[x] = tab[val + OFS];
173
}
174
}
175
176
#ifdef HAVE_OPENCL
177
static bool ocl_prefilter_xsobel(InputArray _input, OutputArray _output, int prefilterCap)
178
{
179
ocl::Kernel k("prefilter_xsobel", ocl::calib3d::stereobm_oclsrc);
180
if(k.empty())
181
return false;
182
183
UMat input = _input.getUMat(), output;
184
_output.create(input.size(), input.type());
185
output = _output.getUMat();
186
187
size_t globalThreads[3] = { (size_t)input.cols, (size_t)input.rows, 1 };
188
189
k.args(ocl::KernelArg::PtrReadOnly(input), ocl::KernelArg::PtrWriteOnly(output), input.rows, input.cols, prefilterCap);
190
191
return k.run(2, globalThreads, NULL, false);
192
}
193
#endif
194
195
static void
196
prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
197
{
198
int x, y;
199
const int OFS = 256*4, TABSZ = OFS*2 + 256;
200
uchar tab[TABSZ] = { 0 };
201
Size size = src.size();
202
203
for( x = 0; x < TABSZ; x++ )
204
tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero*2 : x - OFS + ftzero);
205
uchar val0 = tab[0 + OFS];
206
207
#if CV_SIMD128
208
bool useSIMD = hasSIMD128();
209
#endif
210
211
for( y = 0; y < size.height-1; y += 2 )
212
{
213
const uchar* srow1 = src.ptr<uchar>(y);
214
const uchar* srow0 = y > 0 ? srow1 - src.step : size.height > 1 ? srow1 + src.step : srow1;
215
const uchar* srow2 = y < size.height-1 ? srow1 + src.step : size.height > 1 ? srow1 - src.step : srow1;
216
const uchar* srow3 = y < size.height-2 ? srow1 + src.step*2 : srow1;
217
uchar* dptr0 = dst.ptr<uchar>(y);
218
uchar* dptr1 = dptr0 + dst.step;
219
220
dptr0[0] = dptr0[size.width-1] = dptr1[0] = dptr1[size.width-1] = val0;
221
x = 1;
222
223
#if CV_SIMD128
224
if( useSIMD )
225
{
226
v_int16x8 ftz = v_setall_s16((short) ftzero);
227
v_int16x8 ftz2 = v_setall_s16((short)(ftzero*2));
228
v_int16x8 z = v_setzero_s16();
229
230
for(; x <= (size.width - 1) - 8; x += 8 )
231
{
232
v_int16x8 s00 = v_reinterpret_as_s16(v_load_expand(srow0 + x + 1));
233
v_int16x8 s01 = v_reinterpret_as_s16(v_load_expand(srow0 + x - 1));
234
v_int16x8 s10 = v_reinterpret_as_s16(v_load_expand(srow1 + x + 1));
235
v_int16x8 s11 = v_reinterpret_as_s16(v_load_expand(srow1 + x - 1));
236
v_int16x8 s20 = v_reinterpret_as_s16(v_load_expand(srow2 + x + 1));
237
v_int16x8 s21 = v_reinterpret_as_s16(v_load_expand(srow2 + x - 1));
238
v_int16x8 s30 = v_reinterpret_as_s16(v_load_expand(srow3 + x + 1));
239
v_int16x8 s31 = v_reinterpret_as_s16(v_load_expand(srow3 + x - 1));
240
241
v_int16x8 d0 = s00 - s01;
242
v_int16x8 d1 = s10 - s11;
243
v_int16x8 d2 = s20 - s21;
244
v_int16x8 d3 = s30 - s31;
245
246
v_uint16x8 v0 = v_reinterpret_as_u16(v_max(v_min(d0 + d1 + d1 + d2 + ftz, ftz2), z));
247
v_uint16x8 v1 = v_reinterpret_as_u16(v_max(v_min(d1 + d2 + d2 + d3 + ftz, ftz2), z));
248
249
v_pack_store(dptr0 + x, v0);
250
v_pack_store(dptr1 + x, v1);
251
}
252
}
253
#endif
254
255
for( ; x < size.width-1; x++ )
256
{
257
int d0 = srow0[x+1] - srow0[x-1], d1 = srow1[x+1] - srow1[x-1],
258
d2 = srow2[x+1] - srow2[x-1], d3 = srow3[x+1] - srow3[x-1];
259
int v0 = tab[d0 + d1*2 + d2 + OFS];
260
int v1 = tab[d1 + d2*2 + d3 + OFS];
261
dptr0[x] = (uchar)v0;
262
dptr1[x] = (uchar)v1;
263
}
264
}
265
266
for( ; y < size.height; y++ )
267
{
268
uchar* dptr = dst.ptr<uchar>(y);
269
x = 0;
270
#if CV_SIMD128
271
if( useSIMD )
272
{
273
v_uint8x16 val0_16 = v_setall_u8(val0);
274
for(; x <= size.width-16; x+=16 )
275
v_store(dptr + x, val0_16);
276
}
277
#endif
278
for(; x < size.width; x++ )
279
dptr[x] = val0;
280
}
281
}
282
283
284
static const int DISPARITY_SHIFT_16S = 4;
285
static const int DISPARITY_SHIFT_32S = 8;
286
287
template <typename T>
288
struct dispShiftTemplate
289
{ };
290
291
template<>
292
struct dispShiftTemplate<short>
293
{
294
enum { value = DISPARITY_SHIFT_16S };
295
};
296
297
template<>
298
struct dispShiftTemplate<int>
299
{
300
enum { value = DISPARITY_SHIFT_32S };
301
};
302
303
template <typename T>
304
inline T dispDescale(int /*v1*/, int /*v2*/, int /*d*/);
305
306
template<>
307
inline short dispDescale(int v1, int v2, int d)
308
{
309
return (short)((v1*256 + (d != 0 ? v2*256/d : 0) + 15) >> 4);
310
}
311
312
template <>
313
inline int dispDescale(int v1, int v2, int d)
314
{
315
return (int)(v1*256 + (d != 0 ? v2*256/d : 0)); // no need to add 127, this will be converted to float
316
}
317
318
#if CV_SIMD128
319
template <typename dType>
320
static void findStereoCorrespondenceBM_SIMD( const Mat& left, const Mat& right,
321
Mat& disp, Mat& cost, StereoBMParams& state,
322
uchar* buf, int _dy0, int _dy1 )
323
{
324
const int ALIGN = 16;
325
int x, y, d;
326
int wsz = state.SADWindowSize, wsz2 = wsz/2;
327
int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
328
int ndisp = state.numDisparities;
329
int mindisp = state.minDisparity;
330
int lofs = MAX(ndisp - 1 + mindisp, 0);
331
int rofs = -MIN(ndisp - 1 + mindisp, 0);
332
int width = left.cols, height = left.rows;
333
int width1 = width - rofs - ndisp + 1;
334
int ftzero = state.preFilterCap;
335
int textureThreshold = state.textureThreshold;
336
int uniquenessRatio = state.uniquenessRatio;
337
const int disp_shift = dispShiftTemplate<dType>::value;
338
dType FILTERED = (dType)((mindisp - 1) << disp_shift);
339
340
ushort *sad, *hsad0, *hsad, *hsad_sub;
341
int *htext;
342
uchar *cbuf0, *cbuf;
343
const uchar* lptr0 = left.ptr() + lofs;
344
const uchar* rptr0 = right.ptr() + rofs;
345
const uchar *lptr, *lptr_sub, *rptr;
346
dType* dptr = disp.ptr<dType>();
347
int sstep = (int)left.step;
348
int dstep = (int)(disp.step/sizeof(dptr[0]));
349
int cstep = (height + dy0 + dy1)*ndisp;
350
short costbuf = 0;
351
int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
352
const int TABSZ = 256;
353
uchar tab[TABSZ];
354
const v_int16x8 d0_8 = v_int16x8(0,1,2,3,4,5,6,7), dd_8 = v_setall_s16(8);
355
356
sad = (ushort*)alignPtr(buf + sizeof(sad[0]), ALIGN);
357
hsad0 = (ushort*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
358
htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
359
cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);
360
361
for( x = 0; x < TABSZ; x++ )
362
tab[x] = (uchar)std::abs(x - ftzero);
363
364
// initialize buffers
365
memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) );
366
memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) );
367
368
for( x = -wsz2-1; x < wsz2; x++ )
369
{
370
hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp;
371
lptr = lptr0 + MIN(MAX(x, -lofs), width-lofs-1) - dy0*sstep;
372
rptr = rptr0 + MIN(MAX(x, -rofs), width-rofs-ndisp) - dy0*sstep;
373
374
for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep )
375
{
376
int lval = lptr[0];
377
v_uint8x16 lv = v_setall_u8((uchar)lval);
378
for( d = 0; d < ndisp; d += 16 )
379
{
380
v_uint8x16 rv = v_load(rptr + d);
381
v_uint16x8 hsad_l = v_load(hsad + d);
382
v_uint16x8 hsad_h = v_load(hsad + d + 8);
383
v_uint8x16 diff = v_absdiff(lv, rv);
384
v_store(cbuf + d, diff);
385
v_uint16x8 diff0, diff1;
386
v_expand(diff, diff0, diff1);
387
hsad_l += diff0;
388
hsad_h += diff1;
389
v_store(hsad + d, hsad_l);
390
v_store(hsad + d + 8, hsad_h);
391
}
392
htext[y] += tab[lval];
393
}
394
}
395
396
// initialize the left and right borders of the disparity map
397
for( y = 0; y < height; y++ )
398
{
399
for( x = 0; x < lofs; x++ )
400
dptr[y*dstep + x] = FILTERED;
401
for( x = lofs + width1; x < width; x++ )
402
dptr[y*dstep + x] = FILTERED;
403
}
404
dptr += lofs;
405
406
for( x = 0; x < width1; x++, dptr++ )
407
{
408
short* costptr = cost.data ? cost.ptr<short>() + lofs + x : &costbuf;
409
int x0 = x - wsz2 - 1, x1 = x + wsz2;
410
const uchar* cbuf_sub = cbuf0 + ((x0 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
411
cbuf = cbuf0 + ((x1 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
412
hsad = hsad0 - dy0*ndisp;
413
lptr_sub = lptr0 + MIN(MAX(x0, -lofs), width-1-lofs) - dy0*sstep;
414
lptr = lptr0 + MIN(MAX(x1, -lofs), width-1-lofs) - dy0*sstep;
415
rptr = rptr0 + MIN(MAX(x1, -rofs), width-ndisp-rofs) - dy0*sstep;
416
417
for( y = -dy0; y < height + dy1; y++, cbuf += ndisp, cbuf_sub += ndisp,
418
hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep )
419
{
420
int lval = lptr[0];
421
v_uint8x16 lv = v_setall_u8((uchar)lval);
422
for( d = 0; d < ndisp; d += 16 )
423
{
424
v_uint8x16 rv = v_load(rptr + d);
425
v_uint16x8 hsad_l = v_load(hsad + d);
426
v_uint16x8 hsad_h = v_load(hsad + d + 8);
427
v_uint8x16 cbs = v_load(cbuf_sub + d);
428
v_uint8x16 diff = v_absdiff(lv, rv);
429
v_int16x8 diff_l, diff_h, cbs_l, cbs_h;
430
v_store(cbuf + d, diff);
431
v_expand(v_reinterpret_as_s8(diff), diff_l, diff_h);
432
v_expand(v_reinterpret_as_s8(cbs), cbs_l, cbs_h);
433
diff_l -= cbs_l;
434
diff_h -= cbs_h;
435
hsad_h = v_reinterpret_as_u16(v_reinterpret_as_s16(hsad_h) + diff_h);
436
hsad_l = v_reinterpret_as_u16(v_reinterpret_as_s16(hsad_l) + diff_l);
437
v_store(hsad + d, hsad_l);
438
v_store(hsad + d + 8, hsad_h);
439
}
440
htext[y] += tab[lval] - tab[lptr_sub[0]];
441
}
442
443
// fill borders
444
for( y = dy1; y <= wsz2; y++ )
445
htext[height+y] = htext[height+dy1-1];
446
for( y = -wsz2-1; y < -dy0; y++ )
447
htext[y] = htext[-dy0];
448
449
// initialize sums
450
for( d = 0; d < ndisp; d++ )
451
sad[d] = (ushort)(hsad0[d-ndisp*dy0]*(wsz2 + 2 - dy0));
452
453
hsad = hsad0 + (1 - dy0)*ndisp;
454
for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp )
455
for( d = 0; d <= ndisp-16; d += 16 )
456
{
457
v_uint16x8 s0 = v_load(sad + d);
458
v_uint16x8 s1 = v_load(sad + d + 8);
459
v_uint16x8 t0 = v_load(hsad + d);
460
v_uint16x8 t1 = v_load(hsad + d + 8);
461
s0 = s0 + t0;
462
s1 = s1 + t1;
463
v_store(sad + d, s0);
464
v_store(sad + d + 8, s1);
465
}
466
int tsum = 0;
467
for( y = -wsz2-1; y < wsz2; y++ )
468
tsum += htext[y];
469
470
// finally, start the real processing
471
for( y = 0; y < height; y++ )
472
{
473
int minsad = INT_MAX, mind = -1;
474
hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp;
475
hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;
476
v_int16x8 minsad8 = v_setall_s16(SHRT_MAX);
477
v_int16x8 mind8 = v_setall_s16(0), d8 = d0_8;
478
479
for( d = 0; d < ndisp; d += 16 )
480
{
481
v_int16x8 u0 = v_reinterpret_as_s16(v_load(hsad_sub + d));
482
v_int16x8 u1 = v_reinterpret_as_s16(v_load(hsad + d));
483
484
v_int16x8 v0 = v_reinterpret_as_s16(v_load(hsad_sub + d + 8));
485
v_int16x8 v1 = v_reinterpret_as_s16(v_load(hsad + d + 8));
486
487
v_int16x8 usad8 = v_reinterpret_as_s16(v_load(sad + d));
488
v_int16x8 vsad8 = v_reinterpret_as_s16(v_load(sad + d + 8));
489
490
u1 -= u0;
491
v1 -= v0;
492
usad8 += u1;
493
vsad8 += v1;
494
495
v_int16x8 mask = minsad8 > usad8;
496
minsad8 = v_min(minsad8, usad8);
497
mind8 = v_max(mind8, (mask& d8));
498
499
v_store(sad + d, v_reinterpret_as_u16(usad8));
500
v_store(sad + d + 8, v_reinterpret_as_u16(vsad8));
501
502
mask = minsad8 > vsad8;
503
minsad8 = v_min(minsad8, vsad8);
504
505
d8 = d8 + dd_8;
506
mind8 = v_max(mind8, (mask & d8));
507
d8 = d8 + dd_8;
508
}
509
510
tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
511
if( tsum < textureThreshold )
512
{
513
dptr[y*dstep] = FILTERED;
514
continue;
515
}
516
517
ushort CV_DECL_ALIGNED(16) minsad_buf[8], mind_buf[8];
518
v_store(minsad_buf, v_reinterpret_as_u16(minsad8));
519
v_store(mind_buf, v_reinterpret_as_u16(mind8));
520
for( d = 0; d < 8; d++ )
521
if(minsad > (int)minsad_buf[d] || (minsad == (int)minsad_buf[d] && mind > mind_buf[d]))
522
{
523
minsad = minsad_buf[d];
524
mind = mind_buf[d];
525
}
526
527
if( uniquenessRatio > 0 )
528
{
529
int thresh = minsad + (minsad * uniquenessRatio/100);
530
v_int32x4 thresh4 = v_setall_s32(thresh + 1);
531
v_int32x4 d1 = v_setall_s32(mind-1), d2 = v_setall_s32(mind+1);
532
v_int32x4 dd_4 = v_setall_s32(4);
533
v_int32x4 d4 = v_int32x4(0,1,2,3);
534
v_int32x4 mask4;
535
536
for( d = 0; d < ndisp; d += 8 )
537
{
538
v_int16x8 sad8 = v_reinterpret_as_s16(v_load(sad + d));
539
v_int32x4 sad4_l, sad4_h;
540
v_expand(sad8, sad4_l, sad4_h);
541
mask4 = thresh4 > sad4_l;
542
mask4 = mask4 & ((d1 > d4) | (d4 > d2));
543
if( v_signmask(mask4) )
544
break;
545
d4 += dd_4;
546
mask4 = thresh4 > sad4_h;
547
mask4 = mask4 & ((d1 > d4) | (d4 > d2));
548
if( v_signmask(mask4) )
549
break;
550
d4 += dd_4;
551
}
552
if( d < ndisp )
553
{
554
dptr[y*dstep] = FILTERED;
555
continue;
556
}
557
}
558
559
if( 0 < mind && mind < ndisp - 1 )
560
{
561
int p = sad[mind+1], n = sad[mind-1];
562
d = p + n - 2*sad[mind] + std::abs(p - n);
563
dptr[y*dstep] = dispDescale<dType>(ndisp - mind - 1 + mindisp, p-n, d);
564
}
565
else
566
dptr[y*dstep] = dispDescale<dType>(ndisp - mind - 1 + mindisp, 0, 0);
567
costptr[y*coststep] = sad[mind];
568
}
569
}
570
}
571
#endif
572
573
template <typename mType>
574
static void
575
findStereoCorrespondenceBM( const Mat& left, const Mat& right,
576
Mat& disp, Mat& cost, const StereoBMParams& state,
577
uchar* buf, int _dy0, int _dy1 )
578
{
579
580
const int ALIGN = 16;
581
int x, y, d;
582
int wsz = state.SADWindowSize, wsz2 = wsz/2;
583
int dy0 = MIN(_dy0, wsz2+1), dy1 = MIN(_dy1, wsz2+1);
584
int ndisp = state.numDisparities;
585
int mindisp = state.minDisparity;
586
int lofs = MAX(ndisp - 1 + mindisp, 0);
587
int rofs = -MIN(ndisp - 1 + mindisp, 0);
588
int width = left.cols, height = left.rows;
589
int width1 = width - rofs - ndisp + 1;
590
int ftzero = state.preFilterCap;
591
int textureThreshold = state.textureThreshold;
592
int uniquenessRatio = state.uniquenessRatio;
593
const int disp_shift = dispShiftTemplate<mType>::value;
594
mType FILTERED = (mType)((mindisp - 1) << disp_shift);
595
596
#if CV_SIMD128
597
bool useSIMD = hasSIMD128();
598
if( useSIMD )
599
{
600
CV_Assert (ndisp % 8 == 0);
601
}
602
#endif
603
604
int *sad, *hsad0, *hsad, *hsad_sub, *htext;
605
uchar *cbuf0, *cbuf;
606
const uchar* lptr0 = left.ptr() + lofs;
607
const uchar* rptr0 = right.ptr() + rofs;
608
const uchar *lptr, *lptr_sub, *rptr;
609
mType* dptr = disp.ptr<mType>();
610
int sstep = (int)left.step;
611
int dstep = (int)(disp.step/sizeof(dptr[0]));
612
int cstep = (height+dy0+dy1)*ndisp;
613
int costbuf = 0;
614
int coststep = cost.data ? (int)(cost.step/sizeof(costbuf)) : 0;
615
const int TABSZ = 256;
616
uchar tab[TABSZ];
617
618
sad = (int*)alignPtr(buf + sizeof(sad[0]), ALIGN);
619
hsad0 = (int*)alignPtr(sad + ndisp + 1 + dy0*ndisp, ALIGN);
620
htext = (int*)alignPtr((int*)(hsad0 + (height+dy1)*ndisp) + wsz2 + 2, ALIGN);
621
cbuf0 = (uchar*)alignPtr((uchar*)(htext + height + wsz2 + 2) + dy0*ndisp, ALIGN);
622
623
for( x = 0; x < TABSZ; x++ )
624
tab[x] = (uchar)std::abs(x - ftzero);
625
626
// initialize buffers
627
memset( hsad0 - dy0*ndisp, 0, (height + dy0 + dy1)*ndisp*sizeof(hsad0[0]) );
628
memset( htext - wsz2 - 1, 0, (height + wsz + 1)*sizeof(htext[0]) );
629
630
for( x = -wsz2-1; x < wsz2; x++ )
631
{
632
hsad = hsad0 - dy0*ndisp; cbuf = cbuf0 + (x + wsz2 + 1)*cstep - dy0*ndisp;
633
lptr = lptr0 + std::min(std::max(x, -lofs), width-lofs-1) - dy0*sstep;
634
rptr = rptr0 + std::min(std::max(x, -rofs), width-rofs-ndisp) - dy0*sstep;
635
for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep )
636
{
637
int lval = lptr[0];
638
d = 0;
639
#if CV_SIMD128
640
if( useSIMD )
641
{
642
v_uint8x16 lv = v_setall_u8((uchar)lval);
643
644
for( ; d <= ndisp - 16; d += 16 )
645
{
646
v_uint8x16 rv = v_load(rptr + d);
647
v_int32x4 hsad_0 = v_load(hsad + d);
648
v_int32x4 hsad_1 = v_load(hsad + d + 4);
649
v_int32x4 hsad_2 = v_load(hsad + d + 8);
650
v_int32x4 hsad_3 = v_load(hsad + d + 12);
651
v_uint8x16 diff = v_absdiff(lv, rv);
652
v_store(cbuf + d, diff);
653
654
v_uint16x8 diff0, diff1;
655
v_uint32x4 diff00, diff01, diff10, diff11;
656
v_expand(diff, diff0, diff1);
657
v_expand(diff0, diff00, diff01);
658
v_expand(diff1, diff10, diff11);
659
660
hsad_0 += v_reinterpret_as_s32(diff00);
661
hsad_1 += v_reinterpret_as_s32(diff01);
662
hsad_2 += v_reinterpret_as_s32(diff10);
663
hsad_3 += v_reinterpret_as_s32(diff11);
664
665
v_store(hsad + d, hsad_0);
666
v_store(hsad + d + 4, hsad_1);
667
v_store(hsad + d + 8, hsad_2);
668
v_store(hsad + d + 12, hsad_3);
669
}
670
}
671
#endif
672
for( ; d < ndisp; d++ )
673
{
674
int diff = std::abs(lval - rptr[d]);
675
cbuf[d] = (uchar)diff;
676
hsad[d] = (int)(hsad[d] + diff);
677
}
678
htext[y] += tab[lval];
679
}
680
}
681
682
// initialize the left and right borders of the disparity map
683
for( y = 0; y < height; y++ )
684
{
685
for( x = 0; x < lofs; x++ )
686
dptr[y*dstep + x] = FILTERED;
687
for( x = lofs + width1; x < width; x++ )
688
dptr[y*dstep + x] = FILTERED;
689
}
690
dptr += lofs;
691
692
for( x = 0; x < width1; x++, dptr++ )
693
{
694
int* costptr = cost.data ? cost.ptr<int>() + lofs + x : &costbuf;
695
int x0 = x - wsz2 - 1, x1 = x + wsz2;
696
const uchar* cbuf_sub = cbuf0 + ((x0 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
697
cbuf = cbuf0 + ((x1 + wsz2 + 1) % (wsz + 1))*cstep - dy0*ndisp;
698
hsad = hsad0 - dy0*ndisp;
699
lptr_sub = lptr0 + MIN(MAX(x0, -lofs), width-1-lofs) - dy0*sstep;
700
lptr = lptr0 + MIN(MAX(x1, -lofs), width-1-lofs) - dy0*sstep;
701
rptr = rptr0 + MIN(MAX(x1, -rofs), width-ndisp-rofs) - dy0*sstep;
702
703
for( y = -dy0; y < height + dy1; y++, cbuf += ndisp, cbuf_sub += ndisp,
704
hsad += ndisp, lptr += sstep, lptr_sub += sstep, rptr += sstep )
705
{
706
int lval = lptr[0];
707
d = 0;
708
#if CV_SIMD128
709
if( useSIMD )
710
{
711
v_uint8x16 lv = v_setall_u8((uchar)lval);
712
for( ; d <= ndisp - 16; d += 16 )
713
{
714
v_uint8x16 rv = v_load(rptr + d);
715
v_int32x4 hsad_0 = v_load(hsad + d);
716
v_int32x4 hsad_1 = v_load(hsad + d + 4);
717
v_int32x4 hsad_2 = v_load(hsad + d + 8);
718
v_int32x4 hsad_3 = v_load(hsad + d + 12);
719
v_uint8x16 cbs = v_load(cbuf_sub + d);
720
v_uint8x16 diff = v_absdiff(lv, rv);
721
v_store(cbuf + d, diff);
722
723
v_uint16x8 diff0, diff1, cbs0, cbs1;
724
v_int32x4 diff00, diff01, diff10, diff11, cbs00, cbs01, cbs10, cbs11;
725
v_expand(diff, diff0, diff1);
726
v_expand(cbs, cbs0, cbs1);
727
v_expand(v_reinterpret_as_s16(diff0), diff00, diff01);
728
v_expand(v_reinterpret_as_s16(diff1), diff10, diff11);
729
v_expand(v_reinterpret_as_s16(cbs0), cbs00, cbs01);
730
v_expand(v_reinterpret_as_s16(cbs1), cbs10, cbs11);
731
732
v_int32x4 diff_0 = diff00 - cbs00;
733
v_int32x4 diff_1 = diff01 - cbs01;
734
v_int32x4 diff_2 = diff10 - cbs10;
735
v_int32x4 diff_3 = diff11 - cbs11;
736
hsad_0 += diff_0;
737
hsad_1 += diff_1;
738
hsad_2 += diff_2;
739
hsad_3 += diff_3;
740
741
v_store(hsad + d, hsad_0);
742
v_store(hsad + d + 4, hsad_1);
743
v_store(hsad + d + 8, hsad_2);
744
v_store(hsad + d + 12, hsad_3);
745
}
746
}
747
#endif
748
for( ; d < ndisp; d++ )
749
{
750
int diff = std::abs(lval - rptr[d]);
751
cbuf[d] = (uchar)diff;
752
hsad[d] = hsad[d] + diff - cbuf_sub[d];
753
}
754
htext[y] += tab[lval] - tab[lptr_sub[0]];
755
}
756
757
// fill borders
758
for( y = dy1; y <= wsz2; y++ )
759
htext[height+y] = htext[height+dy1-1];
760
for( y = -wsz2-1; y < -dy0; y++ )
761
htext[y] = htext[-dy0];
762
763
// initialize sums
764
for( d = 0; d < ndisp; d++ )
765
sad[d] = (int)(hsad0[d-ndisp*dy0]*(wsz2 + 2 - dy0));
766
767
hsad = hsad0 + (1 - dy0)*ndisp;
768
for( y = 1 - dy0; y < wsz2; y++, hsad += ndisp )
769
{
770
d = 0;
771
#if CV_SIMD128
772
if( useSIMD )
773
{
774
for( d = 0; d <= ndisp-8; d += 8 )
775
{
776
v_int32x4 s0 = v_load(sad + d);
777
v_int32x4 s1 = v_load(sad + d + 4);
778
v_int32x4 t0 = v_load(hsad + d);
779
v_int32x4 t1 = v_load(hsad + d + 4);
780
s0 += t0;
781
s1 += t1;
782
v_store(sad + d, s0);
783
v_store(sad + d + 4, s1);
784
}
785
}
786
#endif
787
for( ; d < ndisp; d++ )
788
sad[d] = (int)(sad[d] + hsad[d]);
789
}
790
int tsum = 0;
791
for( y = -wsz2-1; y < wsz2; y++ )
792
tsum += htext[y];
793
794
// finally, start the real processing
795
for( y = 0; y < height; y++ )
796
{
797
int minsad = INT_MAX, mind = -1;
798
hsad = hsad0 + MIN(y + wsz2, height+dy1-1)*ndisp;
799
hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;
800
d = 0;
801
#if CV_SIMD128
802
if( useSIMD )
803
{
804
v_int32x4 d0_4 = v_int32x4(0, 1, 2, 3);
805
v_int32x4 dd_4 = v_setall_s32(4);
806
v_int32x4 minsad4 = v_setall_s32(INT_MAX);
807
v_int32x4 mind4 = v_setall_s32(0), d4 = d0_4;
808
809
for( ; d <= ndisp - 8; d += 8 )
810
{
811
v_int32x4 u0 = v_load(hsad_sub + d);
812
v_int32x4 u1 = v_load(hsad + d);
813
814
v_int32x4 v0 = v_load(hsad_sub + d + 4);
815
v_int32x4 v1 = v_load(hsad + d + 4);
816
817
v_int32x4 usad4 = v_load(sad + d);
818
v_int32x4 vsad4 = v_load(sad + d + 4);
819
820
u1 -= u0;
821
v1 -= v0;
822
usad4 += u1;
823
vsad4 += v1;
824
825
v_store(sad + d, usad4);
826
v_store(sad + d + 4, vsad4);
827
828
v_int32x4 mask = minsad4 > usad4;
829
minsad4 = v_min(minsad4, usad4);
830
mind4 = v_select(mask, d4, mind4);
831
d4 += dd_4;
832
833
mask = minsad4 > vsad4;
834
minsad4 = v_min(minsad4, vsad4);
835
mind4 = v_select(mask, d4, mind4);
836
d4 += dd_4;
837
}
838
839
int CV_DECL_ALIGNED(16) minsad_buf[4], mind_buf[4];
840
v_store(minsad_buf, minsad4);
841
v_store(mind_buf, mind4);
842
if(minsad_buf[0] < minsad || (minsad == minsad_buf[0] && mind_buf[0] < mind)) { minsad = minsad_buf[0]; mind = mind_buf[0]; }
843
if(minsad_buf[1] < minsad || (minsad == minsad_buf[1] && mind_buf[1] < mind)) { minsad = minsad_buf[1]; mind = mind_buf[1]; }
844
if(minsad_buf[2] < minsad || (minsad == minsad_buf[2] && mind_buf[2] < mind)) { minsad = minsad_buf[2]; mind = mind_buf[2]; }
845
if(minsad_buf[3] < minsad || (minsad == minsad_buf[3] && mind_buf[3] < mind)) { minsad = minsad_buf[3]; mind = mind_buf[3]; }
846
}
847
#endif
848
for( ; d < ndisp; d++ )
849
{
850
int currsad = sad[d] + hsad[d] - hsad_sub[d];
851
sad[d] = currsad;
852
if( currsad < minsad )
853
{
854
minsad = currsad;
855
mind = d;
856
}
857
}
858
859
tsum += htext[y + wsz2] - htext[y - wsz2 - 1];
860
if( tsum < textureThreshold )
861
{
862
dptr[y*dstep] = FILTERED;
863
continue;
864
}
865
866
if( uniquenessRatio > 0 )
867
{
868
int thresh = minsad + (minsad * uniquenessRatio/100);
869
for( d = 0; d < ndisp; d++ )
870
{
871
if( (d < mind-1 || d > mind+1) && sad[d] <= thresh)
872
break;
873
}
874
if( d < ndisp )
875
{
876
dptr[y*dstep] = FILTERED;
877
continue;
878
}
879
}
880
881
{
882
sad[-1] = sad[1];
883
sad[ndisp] = sad[ndisp-2];
884
int p = sad[mind+1], n = sad[mind-1];
885
d = p + n - 2*sad[mind] + std::abs(p - n);
886
dptr[y*dstep] = dispDescale<mType>(ndisp - mind - 1 + mindisp, p-n, d);
887
888
costptr[y*coststep] = sad[mind];
889
}
890
}
891
}
892
}
893
894
#ifdef HAVE_OPENCL
895
static bool ocl_prefiltering(InputArray left0, InputArray right0, OutputArray left, OutputArray right, StereoBMParams* state)
896
{
897
if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE )
898
{
899
if(!ocl_prefilter_norm( left0, left, state->preFilterSize, state->preFilterCap))
900
return false;
901
if(!ocl_prefilter_norm( right0, right, state->preFilterSize, state->preFilterCap))
902
return false;
903
}
904
else
905
{
906
if(!ocl_prefilter_xsobel( left0, left, state->preFilterCap ))
907
return false;
908
if(!ocl_prefilter_xsobel( right0, right, state->preFilterCap))
909
return false;
910
}
911
return true;
912
}
913
#endif
914
915
struct PrefilterInvoker : public ParallelLoopBody
916
{
917
PrefilterInvoker(const Mat& left0, const Mat& right0, Mat& left, Mat& right,
918
uchar* buf0, uchar* buf1, StereoBMParams* _state)
919
{
920
imgs0[0] = &left0; imgs0[1] = &right0;
921
imgs[0] = &left; imgs[1] = &right;
922
buf[0] = buf0; buf[1] = buf1;
923
state = _state;
924
}
925
926
void operator()(const Range& range) const CV_OVERRIDE
927
{
928
for( int i = range.start; i < range.end; i++ )
929
{
930
if( state->preFilterType == StereoBM::PREFILTER_NORMALIZED_RESPONSE )
931
prefilterNorm( *imgs0[i], *imgs[i], state->preFilterSize, state->preFilterCap, buf[i] );
932
else
933
prefilterXSobel( *imgs0[i], *imgs[i], state->preFilterCap );
934
}
935
}
936
937
const Mat* imgs0[2];
938
Mat* imgs[2];
939
uchar* buf[2];
940
StereoBMParams* state;
941
};
942
943
#ifdef HAVE_OPENCL
944
static bool ocl_stereobm( InputArray _left, InputArray _right,
945
OutputArray _disp, StereoBMParams* state)
946
{
947
int ndisp = state->numDisparities;
948
int mindisp = state->minDisparity;
949
int wsz = state->SADWindowSize;
950
int wsz2 = wsz/2;
951
952
ocl::Device devDef = ocl::Device::getDefault();
953
int sizeX = devDef.isIntel() ? 32 : std::max(11, 27 - devDef.maxComputeUnits()),
954
sizeY = sizeX - 1,
955
N = ndisp * 2;
956
957
cv::String opt = cv::format("-D DEFINE_KERNEL_STEREOBM -D MIN_DISP=%d -D NUM_DISP=%d"
958
" -D BLOCK_SIZE_X=%d -D BLOCK_SIZE_Y=%d -D WSZ=%d",
959
mindisp, ndisp,
960
sizeX, sizeY, wsz);
961
ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, opt);
962
if(k.empty())
963
return false;
964
965
UMat left = _left.getUMat(), right = _right.getUMat();
966
int cols = left.cols, rows = left.rows;
967
968
_disp.create(_left.size(), CV_16S);
969
_disp.setTo((mindisp - 1) << 4);
970
Rect roi = Rect(Point(wsz2 + mindisp + ndisp - 1, wsz2), Point(cols-wsz2-mindisp, rows-wsz2) );
971
UMat disp = (_disp.getUMat())(roi);
972
973
int globalX = (disp.cols + sizeX - 1) / sizeX,
974
globalY = (disp.rows + sizeY - 1) / sizeY;
975
size_t globalThreads[3] = {(size_t)N, (size_t)globalX, (size_t)globalY};
976
size_t localThreads[3] = {(size_t)N, 1, 1};
977
978
int idx = 0;
979
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(left));
980
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(right));
981
idx = k.set(idx, ocl::KernelArg::WriteOnlyNoSize(disp));
982
idx = k.set(idx, rows);
983
idx = k.set(idx, cols);
984
idx = k.set(idx, state->textureThreshold);
985
idx = k.set(idx, state->uniquenessRatio);
986
return k.run(3, globalThreads, localThreads, false);
987
}
988
#endif
989
990
struct FindStereoCorrespInvoker : public ParallelLoopBody
991
{
992
FindStereoCorrespInvoker( const Mat& _left, const Mat& _right,
993
Mat& _disp, StereoBMParams* _state,
994
int _nstripes, size_t _stripeBufSize,
995
bool _useShorts, Rect _validDisparityRect,
996
Mat& _slidingSumBuf, Mat& _cost )
997
{
998
CV_Assert( _disp.type() == CV_16S || _disp.type() == CV_32S );
999
left = &_left; right = &_right;
1000
disp = &_disp; state = _state;
1001
nstripes = _nstripes; stripeBufSize = _stripeBufSize;
1002
useShorts = _useShorts;
1003
validDisparityRect = _validDisparityRect;
1004
slidingSumBuf = &_slidingSumBuf;
1005
cost = &_cost;
1006
#if CV_SIMD128
1007
useSIMD = hasSIMD128();
1008
#endif
1009
}
1010
1011
void operator()(const Range& range) const CV_OVERRIDE
1012
{
1013
int cols = left->cols, rows = left->rows;
1014
int _row0 = std::min(cvRound(range.start * rows / nstripes), rows);
1015
int _row1 = std::min(cvRound(range.end * rows / nstripes), rows);
1016
uchar *ptr = slidingSumBuf->ptr() + range.start * stripeBufSize;
1017
1018
int dispShift = disp->type() == CV_16S ? DISPARITY_SHIFT_16S :
1019
DISPARITY_SHIFT_32S;
1020
int FILTERED = (state->minDisparity - 1) << dispShift;
1021
1022
Rect roi = validDisparityRect & Rect(0, _row0, cols, _row1 - _row0);
1023
if( roi.height == 0 )
1024
return;
1025
int row0 = roi.y;
1026
int row1 = roi.y + roi.height;
1027
1028
Mat part;
1029
if( row0 > _row0 )
1030
{
1031
part = disp->rowRange(_row0, row0);
1032
part = Scalar::all(FILTERED);
1033
}
1034
if( _row1 > row1 )
1035
{
1036
part = disp->rowRange(row1, _row1);
1037
part = Scalar::all(FILTERED);
1038
}
1039
1040
Mat left_i = left->rowRange(row0, row1);
1041
Mat right_i = right->rowRange(row0, row1);
1042
Mat disp_i = disp->rowRange(row0, row1);
1043
Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat();
1044
1045
#if CV_SIMD128
1046
if( useSIMD && useShorts )
1047
{
1048
if( disp_i.type() == CV_16S)
1049
findStereoCorrespondenceBM_SIMD<short>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 );
1050
else
1051
findStereoCorrespondenceBM_SIMD<int>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1);
1052
}
1053
else
1054
#endif
1055
{
1056
if( disp_i.type() == CV_16S )
1057
findStereoCorrespondenceBM<short>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 );
1058
else
1059
findStereoCorrespondenceBM<int>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 );
1060
}
1061
1062
if( state->disp12MaxDiff >= 0 )
1063
validateDisparity( disp_i, cost_i, state->minDisparity, state->numDisparities, state->disp12MaxDiff );
1064
1065
if( roi.x > 0 )
1066
{
1067
part = disp_i.colRange(0, roi.x);
1068
part = Scalar::all(FILTERED);
1069
}
1070
if( roi.x + roi.width < cols )
1071
{
1072
part = disp_i.colRange(roi.x + roi.width, cols);
1073
part = Scalar::all(FILTERED);
1074
}
1075
}
1076
1077
protected:
1078
const Mat *left, *right;
1079
Mat* disp, *slidingSumBuf, *cost;
1080
StereoBMParams *state;
1081
1082
int nstripes;
1083
size_t stripeBufSize;
1084
bool useShorts;
1085
Rect validDisparityRect;
1086
bool useSIMD;
1087
};
1088
1089
class StereoBMImpl CV_FINAL : public StereoBM
1090
{
1091
public:
1092
StereoBMImpl()
1093
{
1094
params = StereoBMParams();
1095
}
1096
1097
StereoBMImpl( int _numDisparities, int _SADWindowSize )
1098
{
1099
params = StereoBMParams(_numDisparities, _SADWindowSize);
1100
}
1101
1102
void compute( InputArray leftarr, InputArray rightarr, OutputArray disparr ) CV_OVERRIDE
1103
{
1104
CV_INSTRUMENT_REGION();
1105
1106
int dtype = disparr.fixedType() ? disparr.type() : params.dispType;
1107
Size leftsize = leftarr.size();
1108
1109
if (leftarr.size() != rightarr.size())
1110
CV_Error( Error::StsUnmatchedSizes, "All the images must have the same size" );
1111
1112
if (leftarr.type() != CV_8UC1 || rightarr.type() != CV_8UC1)
1113
CV_Error( Error::StsUnsupportedFormat, "Both input images must have CV_8UC1" );
1114
1115
if (dtype != CV_16SC1 && dtype != CV_32FC1)
1116
CV_Error( Error::StsUnsupportedFormat, "Disparity image must have CV_16SC1 or CV_32FC1 format" );
1117
1118
if( params.preFilterType != PREFILTER_NORMALIZED_RESPONSE &&
1119
params.preFilterType != PREFILTER_XSOBEL )
1120
CV_Error( Error::StsOutOfRange, "preFilterType must be = CV_STEREO_BM_NORMALIZED_RESPONSE" );
1121
1122
if( params.preFilterSize < 5 || params.preFilterSize > 255 || params.preFilterSize % 2 == 0 )
1123
CV_Error( Error::StsOutOfRange, "preFilterSize must be odd and be within 5..255" );
1124
1125
if( params.preFilterCap < 1 || params.preFilterCap > 63 )
1126
CV_Error( Error::StsOutOfRange, "preFilterCap must be within 1..63" );
1127
1128
if( params.SADWindowSize < 5 || params.SADWindowSize > 255 || params.SADWindowSize % 2 == 0 ||
1129
params.SADWindowSize >= std::min(leftsize.width, leftsize.height) )
1130
CV_Error( Error::StsOutOfRange, "SADWindowSize must be odd, be within 5..255 and be not larger than image width or height" );
1131
1132
if( params.numDisparities <= 0 || params.numDisparities % 16 != 0 )
1133
CV_Error( Error::StsOutOfRange, "numDisparities must be positive and divisble by 16" );
1134
1135
if( params.textureThreshold < 0 )
1136
CV_Error( Error::StsOutOfRange, "texture threshold must be non-negative" );
1137
1138
if( params.uniquenessRatio < 0 )
1139
CV_Error( Error::StsOutOfRange, "uniqueness ratio must be non-negative" );
1140
1141
int disp_shift;
1142
if (dtype == CV_16SC1)
1143
disp_shift = DISPARITY_SHIFT_16S;
1144
else
1145
disp_shift = DISPARITY_SHIFT_32S;
1146
1147
int FILTERED = (params.minDisparity - 1) << disp_shift;
1148
1149
#ifdef HAVE_OPENCL
1150
if(ocl::isOpenCLActivated() && disparr.isUMat() && params.textureThreshold == 0)
1151
{
1152
UMat left, right;
1153
if(ocl_prefiltering(leftarr, rightarr, left, right, &params))
1154
{
1155
if(ocl_stereobm(left, right, disparr, &params))
1156
{
1157
disp_shift = DISPARITY_SHIFT_16S;
1158
FILTERED = (params.minDisparity - 1) << disp_shift;
1159
1160
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
1161
filterSpeckles(disparr.getMat(), FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
1162
if (dtype == CV_32F)
1163
disparr.getUMat().convertTo(disparr, CV_32FC1, 1./(1 << disp_shift), 0);
1164
CV_IMPL_ADD(CV_IMPL_OCL);
1165
return;
1166
}
1167
}
1168
}
1169
#endif
1170
1171
Mat left0 = leftarr.getMat(), right0 = rightarr.getMat();
1172
disparr.create(left0.size(), dtype);
1173
Mat disp0 = disparr.getMat();
1174
1175
preFilteredImg0.create( left0.size(), CV_8U );
1176
preFilteredImg1.create( left0.size(), CV_8U );
1177
cost.create( left0.size(), CV_16S );
1178
1179
Mat left = preFilteredImg0, right = preFilteredImg1;
1180
1181
int mindisp = params.minDisparity;
1182
int ndisp = params.numDisparities;
1183
1184
int width = left0.cols;
1185
int height = left0.rows;
1186
int lofs = std::max(ndisp - 1 + mindisp, 0);
1187
int rofs = -std::min(ndisp - 1 + mindisp, 0);
1188
int width1 = width - rofs - ndisp + 1;
1189
1190
if( lofs >= width || rofs >= width || width1 < 1 )
1191
{
1192
disp0 = Scalar::all( FILTERED * ( disp0.type() < CV_32F ? 1 : 1./(1 << disp_shift) ) );
1193
return;
1194
}
1195
1196
Mat disp = disp0;
1197
if( dtype == CV_32F )
1198
{
1199
dispbuf.create(disp0.size(), CV_32S);
1200
disp = dispbuf;
1201
}
1202
1203
int wsz = params.SADWindowSize;
1204
int bufSize0 = (int)((ndisp + 2)*sizeof(int));
1205
bufSize0 += (int)((height+wsz+2)*ndisp*sizeof(int));
1206
bufSize0 += (int)((height + wsz + 2)*sizeof(int));
1207
bufSize0 += (int)((height+wsz+2)*ndisp*(wsz+2)*sizeof(uchar) + 256);
1208
1209
int bufSize1 = (int)((width + params.preFilterSize + 2) * sizeof(int) + 256);
1210
int bufSize2 = 0;
1211
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
1212
bufSize2 = width*height*(sizeof(Point_<short>) + sizeof(int) + sizeof(uchar));
1213
1214
bool useShorts = params.preFilterCap <= 31 && params.SADWindowSize <= 21;
1215
const double SAD_overhead_coeff = 10.0;
1216
double N0 = 8000000 / (useShorts ? 1 : 4); // approx tbb's min number instructions reasonable for one thread
1217
double maxStripeSize = std::min(std::max(N0 / (width * ndisp), (wsz-1) * SAD_overhead_coeff), (double)height);
1218
int nstripes = cvCeil(height / maxStripeSize);
1219
int bufSize = std::max(bufSize0 * nstripes, std::max(bufSize1 * 2, bufSize2));
1220
1221
if( slidingSumBuf.cols < bufSize )
1222
slidingSumBuf.create( 1, bufSize, CV_8U );
1223
1224
uchar *_buf = slidingSumBuf.ptr();
1225
1226
parallel_for_(Range(0, 2), PrefilterInvoker(left0, right0, left, right, _buf, _buf + bufSize1, &params), 1);
1227
1228
Rect validDisparityRect(0, 0, width, height), R1 = params.roi1, R2 = params.roi2;
1229
validDisparityRect = getValidDisparityROI(!R1.empty() ? R1 : validDisparityRect,
1230
!R2.empty() ? R2 : validDisparityRect,
1231
params.minDisparity, params.numDisparities,
1232
params.SADWindowSize);
1233
1234
parallel_for_(Range(0, nstripes),
1235
FindStereoCorrespInvoker(left, right, disp, &params, nstripes,
1236
bufSize0, useShorts, validDisparityRect,
1237
slidingSumBuf, cost));
1238
1239
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
1240
filterSpeckles(disp, FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
1241
1242
if (disp0.data != disp.data)
1243
disp.convertTo(disp0, disp0.type(), 1./(1 << disp_shift), 0);
1244
}
1245
1246
int getMinDisparity() const CV_OVERRIDE { return params.minDisparity; }
1247
void setMinDisparity(int minDisparity) CV_OVERRIDE { params.minDisparity = minDisparity; }
1248
1249
int getNumDisparities() const CV_OVERRIDE { return params.numDisparities; }
1250
void setNumDisparities(int numDisparities) CV_OVERRIDE { params.numDisparities = numDisparities; }
1251
1252
int getBlockSize() const CV_OVERRIDE { return params.SADWindowSize; }
1253
void setBlockSize(int blockSize) CV_OVERRIDE { params.SADWindowSize = blockSize; }
1254
1255
int getSpeckleWindowSize() const CV_OVERRIDE { return params.speckleWindowSize; }
1256
void setSpeckleWindowSize(int speckleWindowSize) CV_OVERRIDE { params.speckleWindowSize = speckleWindowSize; }
1257
1258
int getSpeckleRange() const CV_OVERRIDE { return params.speckleRange; }
1259
void setSpeckleRange(int speckleRange) CV_OVERRIDE { params.speckleRange = speckleRange; }
1260
1261
int getDisp12MaxDiff() const CV_OVERRIDE { return params.disp12MaxDiff; }
1262
void setDisp12MaxDiff(int disp12MaxDiff) CV_OVERRIDE { params.disp12MaxDiff = disp12MaxDiff; }
1263
1264
int getPreFilterType() const CV_OVERRIDE { return params.preFilterType; }
1265
void setPreFilterType(int preFilterType) CV_OVERRIDE { params.preFilterType = preFilterType; }
1266
1267
int getPreFilterSize() const CV_OVERRIDE { return params.preFilterSize; }
1268
void setPreFilterSize(int preFilterSize) CV_OVERRIDE { params.preFilterSize = preFilterSize; }
1269
1270
int getPreFilterCap() const CV_OVERRIDE { return params.preFilterCap; }
1271
void setPreFilterCap(int preFilterCap) CV_OVERRIDE { params.preFilterCap = preFilterCap; }
1272
1273
int getTextureThreshold() const CV_OVERRIDE { return params.textureThreshold; }
1274
void setTextureThreshold(int textureThreshold) CV_OVERRIDE { params.textureThreshold = textureThreshold; }
1275
1276
int getUniquenessRatio() const CV_OVERRIDE { return params.uniquenessRatio; }
1277
void setUniquenessRatio(int uniquenessRatio) CV_OVERRIDE { params.uniquenessRatio = uniquenessRatio; }
1278
1279
int getSmallerBlockSize() const CV_OVERRIDE { return 0; }
1280
void setSmallerBlockSize(int) CV_OVERRIDE {}
1281
1282
Rect getROI1() const CV_OVERRIDE { return params.roi1; }
1283
void setROI1(Rect roi1) CV_OVERRIDE { params.roi1 = roi1; }
1284
1285
Rect getROI2() const CV_OVERRIDE { return params.roi2; }
1286
void setROI2(Rect roi2) CV_OVERRIDE { params.roi2 = roi2; }
1287
1288
void write(FileStorage& fs) const CV_OVERRIDE
1289
{
1290
writeFormat(fs);
1291
fs << "name" << name_
1292
<< "minDisparity" << params.minDisparity
1293
<< "numDisparities" << params.numDisparities
1294
<< "blockSize" << params.SADWindowSize
1295
<< "speckleWindowSize" << params.speckleWindowSize
1296
<< "speckleRange" << params.speckleRange
1297
<< "disp12MaxDiff" << params.disp12MaxDiff
1298
<< "preFilterType" << params.preFilterType
1299
<< "preFilterSize" << params.preFilterSize
1300
<< "preFilterCap" << params.preFilterCap
1301
<< "textureThreshold" << params.textureThreshold
1302
<< "uniquenessRatio" << params.uniquenessRatio;
1303
}
1304
1305
void read(const FileNode& fn) CV_OVERRIDE
1306
{
1307
FileNode n = fn["name"];
1308
CV_Assert( n.isString() && String(n) == name_ );
1309
params.minDisparity = (int)fn["minDisparity"];
1310
params.numDisparities = (int)fn["numDisparities"];
1311
params.SADWindowSize = (int)fn["blockSize"];
1312
params.speckleWindowSize = (int)fn["speckleWindowSize"];
1313
params.speckleRange = (int)fn["speckleRange"];
1314
params.disp12MaxDiff = (int)fn["disp12MaxDiff"];
1315
params.preFilterType = (int)fn["preFilterType"];
1316
params.preFilterSize = (int)fn["preFilterSize"];
1317
params.preFilterCap = (int)fn["preFilterCap"];
1318
params.textureThreshold = (int)fn["textureThreshold"];
1319
params.uniquenessRatio = (int)fn["uniquenessRatio"];
1320
params.roi1 = params.roi2 = Rect();
1321
}
1322
1323
StereoBMParams params;
1324
Mat preFilteredImg0, preFilteredImg1, cost, dispbuf;
1325
Mat slidingSumBuf;
1326
1327
static const char* name_;
1328
};
1329
1330
const char* StereoBMImpl::name_ = "StereoMatcher.BM";
1331
1332
Ptr<StereoBM> StereoBM::create(int _numDisparities, int _SADWindowSize)
1333
{
1334
return makePtr<StereoBMImpl>(_numDisparities, _SADWindowSize);
1335
}
1336
1337
}
1338
1339
/* End of file. */
1340
1341