Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/samples/gpu/stereo_multi.cpp
16337 views
1
// This sample demonstrates working on one piece of data using two GPUs.
2
// It splits input into two parts and processes them separately on different GPUs.
3
4
#ifdef _WIN32
5
#define NOMINMAX
6
#include <windows.h>
7
#else
8
#include <pthread.h>
9
#include <unistd.h>
10
#endif
11
12
#include <iostream>
13
#include <iomanip>
14
15
#include "opencv2/core.hpp"
16
#include "opencv2/highgui.hpp"
17
#include "opencv2/imgproc.hpp"
18
#include "opencv2/cudastereo.hpp"
19
20
using namespace std;
21
using namespace cv;
22
using namespace cv::cuda;
23
24
///////////////////////////////////////////////////////////
25
// Thread
26
// OS-specific wrappers for multi-threading
27
28
#ifdef _WIN32
29
class Thread
30
{
31
struct UserData
32
{
33
void (*func)(void* userData);
34
void* param;
35
};
36
37
static DWORD WINAPI WinThreadFunction(LPVOID lpParam)
38
{
39
UserData* userData = static_cast<UserData*>(lpParam);
40
41
userData->func(userData->param);
42
43
return 0;
44
}
45
46
UserData userData_;
47
HANDLE thread_;
48
DWORD threadId_;
49
50
public:
51
Thread(void (*func)(void* userData), void* userData)
52
{
53
userData_.func = func;
54
userData_.param = userData;
55
56
thread_ = CreateThread(
57
NULL, // default security attributes
58
0, // use default stack size
59
WinThreadFunction, // thread function name
60
&userData_, // argument to thread function
61
0, // use default creation flags
62
&threadId_); // returns the thread identifier
63
}
64
65
~Thread()
66
{
67
CloseHandle(thread_);
68
}
69
70
void wait()
71
{
72
WaitForSingleObject(thread_, INFINITE);
73
}
74
};
75
#else
76
class Thread
77
{
78
struct UserData
79
{
80
void (*func)(void* userData);
81
void* param;
82
};
83
84
static void* PThreadFunction(void* lpParam)
85
{
86
UserData* userData = static_cast<UserData*>(lpParam);
87
88
userData->func(userData->param);
89
90
return 0;
91
}
92
93
pthread_t thread_;
94
UserData userData_;
95
96
public:
97
Thread(void (*func)(void* userData), void* userData)
98
{
99
userData_.func = func;
100
userData_.param = userData;
101
102
pthread_create(&thread_, NULL, PThreadFunction, &userData_);
103
}
104
105
~Thread()
106
{
107
pthread_detach(thread_);
108
}
109
110
void wait()
111
{
112
pthread_join(thread_, NULL);
113
}
114
};
115
#endif
116
117
///////////////////////////////////////////////////////////
118
// StereoSingleGpu
119
// Run Stereo algorithm on single GPU
120
121
class StereoSingleGpu
122
{
123
public:
124
explicit StereoSingleGpu(int deviceId = 0);
125
~StereoSingleGpu();
126
127
void compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity);
128
129
private:
130
int deviceId_;
131
GpuMat d_leftFrame;
132
GpuMat d_rightFrame;
133
GpuMat d_disparity;
134
Ptr<cuda::StereoBM> d_alg;
135
};
136
137
StereoSingleGpu::StereoSingleGpu(int deviceId) : deviceId_(deviceId)
138
{
139
cuda::setDevice(deviceId_);
140
d_alg = cuda::createStereoBM(256);
141
}
142
143
StereoSingleGpu::~StereoSingleGpu()
144
{
145
cuda::setDevice(deviceId_);
146
d_leftFrame.release();
147
d_rightFrame.release();
148
d_disparity.release();
149
d_alg.release();
150
}
151
152
void StereoSingleGpu::compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity)
153
{
154
cuda::setDevice(deviceId_);
155
d_leftFrame.upload(leftFrame);
156
d_rightFrame.upload(rightFrame);
157
d_alg->compute(d_leftFrame, d_rightFrame, d_disparity);
158
d_disparity.download(disparity);
159
}
160
161
///////////////////////////////////////////////////////////
162
// StereoMultiGpuThread
163
// Run Stereo algorithm on two GPUs using different host threads
164
165
class StereoMultiGpuThread
166
{
167
public:
168
StereoMultiGpuThread();
169
~StereoMultiGpuThread();
170
171
void compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity);
172
173
private:
174
GpuMat d_leftFrames[2];
175
GpuMat d_rightFrames[2];
176
GpuMat d_disparities[2];
177
Ptr<cuda::StereoBM> d_algs[2];
178
179
struct StereoLaunchData
180
{
181
int deviceId;
182
Mat leftFrame;
183
Mat rightFrame;
184
Mat disparity;
185
GpuMat* d_leftFrame;
186
GpuMat* d_rightFrame;
187
GpuMat* d_disparity;
188
Ptr<cuda::StereoBM> d_alg;
189
};
190
191
static void launchGpuStereoAlg(void* userData);
192
};
193
194
StereoMultiGpuThread::StereoMultiGpuThread()
195
{
196
cuda::setDevice(0);
197
d_algs[0] = cuda::createStereoBM(256);
198
199
cuda::setDevice(1);
200
d_algs[1] = cuda::createStereoBM(256);
201
}
202
203
StereoMultiGpuThread::~StereoMultiGpuThread()
204
{
205
cuda::setDevice(0);
206
d_leftFrames[0].release();
207
d_rightFrames[0].release();
208
d_disparities[0].release();
209
d_algs[0].release();
210
211
cuda::setDevice(1);
212
d_leftFrames[1].release();
213
d_rightFrames[1].release();
214
d_disparities[1].release();
215
d_algs[1].release();
216
}
217
218
void StereoMultiGpuThread::compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity)
219
{
220
disparity.create(leftFrame.size(), CV_8UC1);
221
222
// Split input data onto two parts for each GPUs.
223
// We add small border for each part,
224
// because original algorithm doesn't calculate disparity on image borders.
225
// With such padding we will get output in the middle of final result.
226
227
StereoLaunchData launchDatas[2];
228
229
launchDatas[0].deviceId = 0;
230
launchDatas[0].leftFrame = leftFrame.rowRange(0, leftFrame.rows / 2 + 32);
231
launchDatas[0].rightFrame = rightFrame.rowRange(0, rightFrame.rows / 2 + 32);
232
launchDatas[0].disparity = disparity.rowRange(0, leftFrame.rows / 2);
233
launchDatas[0].d_leftFrame = &d_leftFrames[0];
234
launchDatas[0].d_rightFrame = &d_rightFrames[0];
235
launchDatas[0].d_disparity = &d_disparities[0];
236
launchDatas[0].d_alg = d_algs[0];
237
238
launchDatas[1].deviceId = 1;
239
launchDatas[1].leftFrame = leftFrame.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows);
240
launchDatas[1].rightFrame = rightFrame.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows);
241
launchDatas[1].disparity = disparity.rowRange(leftFrame.rows / 2, leftFrame.rows);
242
launchDatas[1].d_leftFrame = &d_leftFrames[1];
243
launchDatas[1].d_rightFrame = &d_rightFrames[1];
244
launchDatas[1].d_disparity = &d_disparities[1];
245
launchDatas[1].d_alg = d_algs[1];
246
247
Thread thread0(launchGpuStereoAlg, &launchDatas[0]);
248
Thread thread1(launchGpuStereoAlg, &launchDatas[1]);
249
250
thread0.wait();
251
thread1.wait();
252
}
253
254
void StereoMultiGpuThread::launchGpuStereoAlg(void* userData)
255
{
256
StereoLaunchData* data = static_cast<StereoLaunchData*>(userData);
257
258
cuda::setDevice(data->deviceId);
259
data->d_leftFrame->upload(data->leftFrame);
260
data->d_rightFrame->upload(data->rightFrame);
261
data->d_alg->compute(*data->d_leftFrame, *data->d_rightFrame, *data->d_disparity);
262
263
if (data->deviceId == 0)
264
data->d_disparity->rowRange(0, data->d_disparity->rows - 32).download(data->disparity);
265
else
266
data->d_disparity->rowRange(32, data->d_disparity->rows).download(data->disparity);
267
}
268
269
///////////////////////////////////////////////////////////
270
// StereoMultiGpuStream
271
// Run Stereo algorithm on two GPUs from single host thread using async API
272
273
class StereoMultiGpuStream
274
{
275
public:
276
StereoMultiGpuStream();
277
~StereoMultiGpuStream();
278
279
void compute(const HostMem& leftFrame, const HostMem& rightFrame, HostMem& disparity);
280
281
private:
282
GpuMat d_leftFrames[2];
283
GpuMat d_rightFrames[2];
284
GpuMat d_disparities[2];
285
Ptr<cuda::StereoBM> d_algs[2];
286
Ptr<Stream> streams[2];
287
};
288
289
StereoMultiGpuStream::StereoMultiGpuStream()
290
{
291
cuda::setDevice(0);
292
d_algs[0] = cuda::createStereoBM(256);
293
streams[0] = makePtr<Stream>();
294
295
cuda::setDevice(1);
296
d_algs[1] = cuda::createStereoBM(256);
297
streams[1] = makePtr<Stream>();
298
}
299
300
StereoMultiGpuStream::~StereoMultiGpuStream()
301
{
302
cuda::setDevice(0);
303
d_leftFrames[0].release();
304
d_rightFrames[0].release();
305
d_disparities[0].release();
306
d_algs[0].release();
307
streams[0].release();
308
309
cuda::setDevice(1);
310
d_leftFrames[1].release();
311
d_rightFrames[1].release();
312
d_disparities[1].release();
313
d_algs[1].release();
314
streams[1].release();
315
}
316
317
void StereoMultiGpuStream::compute(const HostMem& leftFrame, const HostMem& rightFrame, HostMem& disparity)
318
{
319
disparity.create(leftFrame.size(), CV_8UC1);
320
321
// Split input data onto two parts for each GPUs.
322
// We add small border for each part,
323
// because original algorithm doesn't calculate disparity on image borders.
324
// With such padding we will get output in the middle of final result.
325
326
Mat leftFrameHdr = leftFrame.createMatHeader();
327
Mat rightFrameHdr = rightFrame.createMatHeader();
328
Mat disparityHdr = disparity.createMatHeader();
329
Mat disparityPart0 = disparityHdr.rowRange(0, leftFrame.rows / 2);
330
Mat disparityPart1 = disparityHdr.rowRange(leftFrame.rows / 2, leftFrame.rows);
331
332
cuda::setDevice(0);
333
d_leftFrames[0].upload(leftFrameHdr.rowRange(0, leftFrame.rows / 2 + 32), *streams[0]);
334
d_rightFrames[0].upload(rightFrameHdr.rowRange(0, leftFrame.rows / 2 + 32), *streams[0]);
335
d_algs[0]->compute(d_leftFrames[0], d_rightFrames[0], d_disparities[0], *streams[0]);
336
d_disparities[0].rowRange(0, leftFrame.rows / 2).download(disparityPart0, *streams[0]);
337
338
cuda::setDevice(1);
339
d_leftFrames[1].upload(leftFrameHdr.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows), *streams[1]);
340
d_rightFrames[1].upload(rightFrameHdr.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows), *streams[1]);
341
d_algs[1]->compute(d_leftFrames[1], d_rightFrames[1], d_disparities[1], *streams[1]);
342
d_disparities[1].rowRange(32, d_disparities[1].rows).download(disparityPart1, *streams[1]);
343
344
cuda::setDevice(0);
345
streams[0]->waitForCompletion();
346
347
cuda::setDevice(1);
348
streams[1]->waitForCompletion();
349
}
350
351
///////////////////////////////////////////////////////////
352
// main
353
354
int main(int argc, char** argv)
355
{
356
if (argc != 3)
357
{
358
cerr << "Usage: stereo_multi <left_video> <right_video>" << endl;
359
return -1;
360
}
361
362
const int numDevices = getCudaEnabledDeviceCount();
363
if (numDevices != 2)
364
{
365
cerr << "Two GPUs are required" << endl;
366
return -1;
367
}
368
369
for (int i = 0; i < numDevices; ++i)
370
{
371
DeviceInfo devInfo(i);
372
if (!devInfo.isCompatible())
373
{
374
cerr << "CUDA module wasn't built for GPU #" << i << " ("
375
<< devInfo.name() << ", CC " << devInfo.majorVersion()
376
<< devInfo.minorVersion() << endl;
377
return -1;
378
}
379
380
printShortCudaDeviceInfo(i);
381
}
382
383
VideoCapture leftVideo(argv[1]);
384
VideoCapture rightVideo(argv[2]);
385
386
if (!leftVideo.isOpened())
387
{
388
cerr << "Can't open " << argv[1] << " video file" << endl;
389
return -1;
390
}
391
392
if (!rightVideo.isOpened())
393
{
394
cerr << "Can't open " << argv[2] << " video file" << endl;
395
return -1;
396
}
397
398
cout << endl;
399
cout << "This sample demonstrates working on one piece of data using two GPUs." << endl;
400
cout << "It splits input into two parts and processes them separately on different GPUs." << endl;
401
cout << endl;
402
403
Mat leftFrame, rightFrame;
404
HostMem leftGrayFrame, rightGrayFrame;
405
406
StereoSingleGpu gpu0Alg(0);
407
StereoSingleGpu gpu1Alg(1);
408
StereoMultiGpuThread multiThreadAlg;
409
StereoMultiGpuStream multiStreamAlg;
410
411
Mat disparityGpu0;
412
Mat disparityGpu1;
413
Mat disparityMultiThread;
414
HostMem disparityMultiStream;
415
416
Mat disparityGpu0Show;
417
Mat disparityGpu1Show;
418
Mat disparityMultiThreadShow;
419
Mat disparityMultiStreamShow;
420
421
TickMeter tm;
422
423
cout << "-------------------------------------------------------------------" << endl;
424
cout << "| Frame | GPU 0 ms | GPU 1 ms | Multi Thread ms | Multi Stream ms |" << endl;
425
cout << "-------------------------------------------------------------------" << endl;
426
427
for (int i = 0;; ++i)
428
{
429
leftVideo >> leftFrame;
430
rightVideo >> rightFrame;
431
432
if (leftFrame.empty() || rightFrame.empty())
433
break;
434
435
if (leftFrame.size() != rightFrame.size())
436
{
437
cerr << "Frames have different sizes" << endl;
438
return -1;
439
}
440
441
leftGrayFrame.create(leftFrame.size(), CV_8UC1);
442
rightGrayFrame.create(leftFrame.size(), CV_8UC1);
443
444
cvtColor(leftFrame, leftGrayFrame.createMatHeader(), COLOR_BGR2GRAY);
445
cvtColor(rightFrame, rightGrayFrame.createMatHeader(), COLOR_BGR2GRAY);
446
447
tm.reset(); tm.start();
448
gpu0Alg.compute(leftGrayFrame.createMatHeader(), rightGrayFrame.createMatHeader(),
449
disparityGpu0);
450
tm.stop();
451
452
const double gpu0Time = tm.getTimeMilli();
453
454
tm.reset(); tm.start();
455
gpu1Alg.compute(leftGrayFrame.createMatHeader(), rightGrayFrame.createMatHeader(),
456
disparityGpu1);
457
tm.stop();
458
459
const double gpu1Time = tm.getTimeMilli();
460
461
tm.reset(); tm.start();
462
multiThreadAlg.compute(leftGrayFrame.createMatHeader(), rightGrayFrame.createMatHeader(),
463
disparityMultiThread);
464
tm.stop();
465
466
const double multiThreadTime = tm.getTimeMilli();
467
468
tm.reset(); tm.start();
469
multiStreamAlg.compute(leftGrayFrame, rightGrayFrame, disparityMultiStream);
470
tm.stop();
471
472
const double multiStreamTime = tm.getTimeMilli();
473
474
cout << "| " << setw(5) << i << " | "
475
<< setw(8) << setprecision(1) << fixed << gpu0Time << " | "
476
<< setw(8) << setprecision(1) << fixed << gpu1Time << " | "
477
<< setw(15) << setprecision(1) << fixed << multiThreadTime << " | "
478
<< setw(15) << setprecision(1) << fixed << multiStreamTime << " |" << endl;
479
480
resize(disparityGpu0, disparityGpu0Show, Size(1024, 768), 0, 0, INTER_AREA);
481
resize(disparityGpu1, disparityGpu1Show, Size(1024, 768), 0, 0, INTER_AREA);
482
resize(disparityMultiThread, disparityMultiThreadShow, Size(1024, 768), 0, 0, INTER_AREA);
483
resize(disparityMultiStream.createMatHeader(), disparityMultiStreamShow, Size(1024, 768), 0, 0, INTER_AREA);
484
485
imshow("disparityGpu0", disparityGpu0Show);
486
imshow("disparityGpu1", disparityGpu1Show);
487
imshow("disparityMultiThread", disparityMultiThreadShow);
488
imshow("disparityMultiStream", disparityMultiStreamShow);
489
490
const int key = waitKey(30) & 0xff;
491
if (key == 27)
492
break;
493
}
494
495
cout << "-------------------------------------------------------------------" << endl;
496
497
return 0;
498
}
499
500