Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Tetragramm
GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/core/src/cuda_info.cpp
16337 views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
8
//
9
//
10
// License Agreement
11
// For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
// * Redistribution's of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
22
//
23
// * Redistribution's in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
26
//
27
// * The name of the copyright holders may not be used to endorse or promote products
28
// derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42
43
#include "precomp.hpp"
44
45
using namespace cv;
46
using namespace cv::cuda;
47
48
int cv::cuda::getCudaEnabledDeviceCount()
49
{
50
#ifndef HAVE_CUDA
51
return 0;
52
#else
53
int count;
54
cudaError_t error = cudaGetDeviceCount(&count);
55
56
if (error == cudaErrorInsufficientDriver)
57
return -1;
58
59
if (error == cudaErrorNoDevice)
60
return 0;
61
62
cudaSafeCall( error );
63
return count;
64
#endif
65
}
66
67
void cv::cuda::setDevice(int device)
68
{
69
#ifndef HAVE_CUDA
70
CV_UNUSED(device);
71
throw_no_cuda();
72
#else
73
cudaSafeCall( cudaSetDevice(device) );
74
cudaSafeCall( cudaFree(0) );
75
#endif
76
}
77
78
int cv::cuda::getDevice()
79
{
80
#ifndef HAVE_CUDA
81
throw_no_cuda();
82
#else
83
int device;
84
cudaSafeCall( cudaGetDevice(&device) );
85
return device;
86
#endif
87
}
88
89
void cv::cuda::resetDevice()
90
{
91
#ifndef HAVE_CUDA
92
throw_no_cuda();
93
#else
94
cudaSafeCall( cudaDeviceReset() );
95
#endif
96
}
97
98
bool cv::cuda::deviceSupports(FeatureSet feature_set)
99
{
100
#ifndef HAVE_CUDA
101
CV_UNUSED(feature_set);
102
throw_no_cuda();
103
#else
104
static int versions[] =
105
{
106
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
107
};
108
static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0]));
109
110
const int devId = getDevice();
111
112
int version;
113
114
if (devId < cache_size && versions[devId] >= 0)
115
{
116
version = versions[devId];
117
}
118
else
119
{
120
DeviceInfo dev(devId);
121
version = dev.majorVersion() * 10 + dev.minorVersion();
122
if (devId < cache_size)
123
versions[devId] = version;
124
}
125
126
return TargetArchs::builtWith(feature_set) && (version >= feature_set);
127
#endif
128
}
129
130
////////////////////////////////////////////////////////////////////////
131
// TargetArchs
132
133
#ifdef HAVE_CUDA
134
135
namespace
136
{
137
class CudaArch
138
{
139
public:
140
CudaArch();
141
142
bool builtWith(FeatureSet feature_set) const;
143
bool hasPtx(int major, int minor) const;
144
bool hasBin(int major, int minor) const;
145
bool hasEqualOrLessPtx(int major, int minor) const;
146
bool hasEqualOrGreaterPtx(int major, int minor) const;
147
bool hasEqualOrGreaterBin(int major, int minor) const;
148
149
private:
150
static void fromStr(const char* set_as_str, std::vector<int>& arr);
151
152
std::vector<int> bin;
153
std::vector<int> ptx;
154
std::vector<int> features;
155
};
156
157
const CudaArch cudaArch;
158
159
CudaArch::CudaArch()
160
{
161
fromStr(CUDA_ARCH_BIN, bin);
162
fromStr(CUDA_ARCH_PTX, ptx);
163
fromStr(CUDA_ARCH_FEATURES, features);
164
}
165
166
bool CudaArch::builtWith(FeatureSet feature_set) const
167
{
168
return !features.empty() && (features.back() >= feature_set);
169
}
170
171
bool CudaArch::hasPtx(int major, int minor) const
172
{
173
return std::find(ptx.begin(), ptx.end(), major * 10 + minor) != ptx.end();
174
}
175
176
bool CudaArch::hasBin(int major, int minor) const
177
{
178
return std::find(bin.begin(), bin.end(), major * 10 + minor) != bin.end();
179
}
180
181
bool CudaArch::hasEqualOrLessPtx(int major, int minor) const
182
{
183
return !ptx.empty() && (ptx.front() <= major * 10 + minor);
184
}
185
186
bool CudaArch::hasEqualOrGreaterPtx(int major, int minor) const
187
{
188
return !ptx.empty() && (ptx.back() >= major * 10 + minor);
189
}
190
191
bool CudaArch::hasEqualOrGreaterBin(int major, int minor) const
192
{
193
return !bin.empty() && (bin.back() >= major * 10 + minor);
194
}
195
196
void CudaArch::fromStr(const char* set_as_str, std::vector<int>& arr)
197
{
198
arr.clear();
199
200
const size_t len = strlen(set_as_str);
201
202
size_t pos = 0;
203
while (pos < len)
204
{
205
if (isspace(set_as_str[pos]))
206
{
207
++pos;
208
}
209
else
210
{
211
int cur_value;
212
int chars_read;
213
int args_read = sscanf(set_as_str + pos, "%d%n", &cur_value, &chars_read);
214
CV_Assert( args_read == 1 );
215
216
arr.push_back(cur_value);
217
pos += chars_read;
218
}
219
}
220
221
std::sort(arr.begin(), arr.end());
222
}
223
}
224
225
#endif
226
227
bool cv::cuda::TargetArchs::builtWith(cv::cuda::FeatureSet feature_set)
228
{
229
#ifndef HAVE_CUDA
230
CV_UNUSED(feature_set);
231
throw_no_cuda();
232
#else
233
return cudaArch.builtWith(feature_set);
234
#endif
235
}
236
237
bool cv::cuda::TargetArchs::hasPtx(int major, int minor)
238
{
239
#ifndef HAVE_CUDA
240
CV_UNUSED(major);
241
CV_UNUSED(minor);
242
throw_no_cuda();
243
#else
244
return cudaArch.hasPtx(major, minor);
245
#endif
246
}
247
248
bool cv::cuda::TargetArchs::hasBin(int major, int minor)
249
{
250
#ifndef HAVE_CUDA
251
CV_UNUSED(major);
252
CV_UNUSED(minor);
253
throw_no_cuda();
254
#else
255
return cudaArch.hasBin(major, minor);
256
#endif
257
}
258
259
bool cv::cuda::TargetArchs::hasEqualOrLessPtx(int major, int minor)
260
{
261
#ifndef HAVE_CUDA
262
CV_UNUSED(major);
263
CV_UNUSED(minor);
264
throw_no_cuda();
265
#else
266
return cudaArch.hasEqualOrLessPtx(major, minor);
267
#endif
268
}
269
270
bool cv::cuda::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
271
{
272
#ifndef HAVE_CUDA
273
CV_UNUSED(major);
274
CV_UNUSED(minor);
275
throw_no_cuda();
276
#else
277
return cudaArch.hasEqualOrGreaterPtx(major, minor);
278
#endif
279
}
280
281
bool cv::cuda::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
282
{
283
#ifndef HAVE_CUDA
284
CV_UNUSED(major);
285
CV_UNUSED(minor);
286
throw_no_cuda();
287
#else
288
return cudaArch.hasEqualOrGreaterBin(major, minor);
289
#endif
290
}
291
292
////////////////////////////////////////////////////////////////////////
293
// DeviceInfo
294
295
#ifdef HAVE_CUDA
296
297
namespace
298
{
299
class DeviceProps
300
{
301
public:
302
DeviceProps();
303
304
const cudaDeviceProp* get(int devID) const;
305
306
private:
307
std::vector<cudaDeviceProp> props_;
308
};
309
310
DeviceProps::DeviceProps()
311
{
312
int count = getCudaEnabledDeviceCount();
313
314
if (count > 0)
315
{
316
props_.resize(count);
317
318
for (int devID = 0; devID < count; ++devID)
319
{
320
cudaSafeCall( cudaGetDeviceProperties(&props_[devID], devID) );
321
}
322
}
323
}
324
325
const cudaDeviceProp* DeviceProps::get(int devID) const
326
{
327
CV_Assert( static_cast<size_t>(devID) < props_.size() );
328
329
return &props_[devID];
330
}
331
332
DeviceProps& deviceProps()
333
{
334
static DeviceProps props;
335
return props;
336
}
337
}
338
339
#endif
340
341
const char* cv::cuda::DeviceInfo::name() const
342
{
343
#ifndef HAVE_CUDA
344
throw_no_cuda();
345
#else
346
return deviceProps().get(device_id_)->name;
347
#endif
348
}
349
350
size_t cv::cuda::DeviceInfo::totalGlobalMem() const
351
{
352
#ifndef HAVE_CUDA
353
throw_no_cuda();
354
#else
355
return deviceProps().get(device_id_)->totalGlobalMem;
356
#endif
357
}
358
359
size_t cv::cuda::DeviceInfo::sharedMemPerBlock() const
360
{
361
#ifndef HAVE_CUDA
362
throw_no_cuda();
363
#else
364
return deviceProps().get(device_id_)->sharedMemPerBlock;
365
#endif
366
}
367
368
int cv::cuda::DeviceInfo::regsPerBlock() const
369
{
370
#ifndef HAVE_CUDA
371
throw_no_cuda();
372
#else
373
return deviceProps().get(device_id_)->regsPerBlock;
374
#endif
375
}
376
377
int cv::cuda::DeviceInfo::warpSize() const
378
{
379
#ifndef HAVE_CUDA
380
throw_no_cuda();
381
#else
382
return deviceProps().get(device_id_)->warpSize;
383
#endif
384
}
385
386
size_t cv::cuda::DeviceInfo::memPitch() const
387
{
388
#ifndef HAVE_CUDA
389
throw_no_cuda();
390
#else
391
return deviceProps().get(device_id_)->memPitch;
392
#endif
393
}
394
395
int cv::cuda::DeviceInfo::maxThreadsPerBlock() const
396
{
397
#ifndef HAVE_CUDA
398
throw_no_cuda();
399
#else
400
return deviceProps().get(device_id_)->maxThreadsPerBlock;
401
#endif
402
}
403
404
Vec3i cv::cuda::DeviceInfo::maxThreadsDim() const
405
{
406
#ifndef HAVE_CUDA
407
throw_no_cuda();
408
#else
409
return Vec3i(deviceProps().get(device_id_)->maxThreadsDim);
410
#endif
411
}
412
413
Vec3i cv::cuda::DeviceInfo::maxGridSize() const
414
{
415
#ifndef HAVE_CUDA
416
throw_no_cuda();
417
#else
418
return Vec3i(deviceProps().get(device_id_)->maxGridSize);
419
#endif
420
}
421
422
int cv::cuda::DeviceInfo::clockRate() const
423
{
424
#ifndef HAVE_CUDA
425
throw_no_cuda();
426
#else
427
return deviceProps().get(device_id_)->clockRate;
428
#endif
429
}
430
431
size_t cv::cuda::DeviceInfo::totalConstMem() const
432
{
433
#ifndef HAVE_CUDA
434
throw_no_cuda();
435
#else
436
return deviceProps().get(device_id_)->totalConstMem;
437
#endif
438
}
439
440
int cv::cuda::DeviceInfo::majorVersion() const
441
{
442
#ifndef HAVE_CUDA
443
throw_no_cuda();
444
#else
445
return deviceProps().get(device_id_)->major;
446
#endif
447
}
448
449
int cv::cuda::DeviceInfo::minorVersion() const
450
{
451
#ifndef HAVE_CUDA
452
throw_no_cuda();
453
#else
454
return deviceProps().get(device_id_)->minor;
455
#endif
456
}
457
458
size_t cv::cuda::DeviceInfo::textureAlignment() const
459
{
460
#ifndef HAVE_CUDA
461
throw_no_cuda();
462
#else
463
return deviceProps().get(device_id_)->textureAlignment;
464
#endif
465
}
466
467
size_t cv::cuda::DeviceInfo::texturePitchAlignment() const
468
{
469
#ifndef HAVE_CUDA
470
throw_no_cuda();
471
#else
472
return deviceProps().get(device_id_)->texturePitchAlignment;
473
#endif
474
}
475
476
int cv::cuda::DeviceInfo::multiProcessorCount() const
477
{
478
#ifndef HAVE_CUDA
479
throw_no_cuda();
480
#else
481
return deviceProps().get(device_id_)->multiProcessorCount;
482
#endif
483
}
484
485
bool cv::cuda::DeviceInfo::kernelExecTimeoutEnabled() const
486
{
487
#ifndef HAVE_CUDA
488
throw_no_cuda();
489
#else
490
return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0;
491
#endif
492
}
493
494
bool cv::cuda::DeviceInfo::integrated() const
495
{
496
#ifndef HAVE_CUDA
497
throw_no_cuda();
498
#else
499
return deviceProps().get(device_id_)->integrated != 0;
500
#endif
501
}
502
503
bool cv::cuda::DeviceInfo::canMapHostMemory() const
504
{
505
#ifndef HAVE_CUDA
506
throw_no_cuda();
507
#else
508
return deviceProps().get(device_id_)->canMapHostMemory != 0;
509
#endif
510
}
511
512
DeviceInfo::ComputeMode cv::cuda::DeviceInfo::computeMode() const
513
{
514
#ifndef HAVE_CUDA
515
throw_no_cuda();
516
#else
517
static const ComputeMode tbl[] =
518
{
519
ComputeModeDefault,
520
ComputeModeExclusive,
521
ComputeModeProhibited,
522
ComputeModeExclusiveProcess
523
};
524
525
return tbl[deviceProps().get(device_id_)->computeMode];
526
#endif
527
}
528
529
int cv::cuda::DeviceInfo::maxTexture1D() const
530
{
531
#ifndef HAVE_CUDA
532
throw_no_cuda();
533
#else
534
return deviceProps().get(device_id_)->maxTexture1D;
535
#endif
536
}
537
538
int cv::cuda::DeviceInfo::maxTexture1DMipmap() const
539
{
540
#ifndef HAVE_CUDA
541
throw_no_cuda();
542
#else
543
#if CUDA_VERSION >= 5000
544
return deviceProps().get(device_id_)->maxTexture1DMipmap;
545
#else
546
CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0");
547
return 0;
548
#endif
549
#endif
550
}
551
552
int cv::cuda::DeviceInfo::maxTexture1DLinear() const
553
{
554
#ifndef HAVE_CUDA
555
throw_no_cuda();
556
#else
557
return deviceProps().get(device_id_)->maxTexture1DLinear;
558
#endif
559
}
560
561
Vec2i cv::cuda::DeviceInfo::maxTexture2D() const
562
{
563
#ifndef HAVE_CUDA
564
throw_no_cuda();
565
#else
566
return Vec2i(deviceProps().get(device_id_)->maxTexture2D);
567
#endif
568
}
569
570
Vec2i cv::cuda::DeviceInfo::maxTexture2DMipmap() const
571
{
572
#ifndef HAVE_CUDA
573
throw_no_cuda();
574
#else
575
#if CUDA_VERSION >= 5000
576
return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap);
577
#else
578
CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0");
579
return Vec2i();
580
#endif
581
#endif
582
}
583
584
Vec3i cv::cuda::DeviceInfo::maxTexture2DLinear() const
585
{
586
#ifndef HAVE_CUDA
587
throw_no_cuda();
588
#else
589
return Vec3i(deviceProps().get(device_id_)->maxTexture2DLinear);
590
#endif
591
}
592
593
Vec2i cv::cuda::DeviceInfo::maxTexture2DGather() const
594
{
595
#ifndef HAVE_CUDA
596
throw_no_cuda();
597
#else
598
return Vec2i(deviceProps().get(device_id_)->maxTexture2DGather);
599
#endif
600
}
601
602
Vec3i cv::cuda::DeviceInfo::maxTexture3D() const
603
{
604
#ifndef HAVE_CUDA
605
throw_no_cuda();
606
#else
607
return Vec3i(deviceProps().get(device_id_)->maxTexture3D);
608
#endif
609
}
610
611
int cv::cuda::DeviceInfo::maxTextureCubemap() const
612
{
613
#ifndef HAVE_CUDA
614
throw_no_cuda();
615
#else
616
return deviceProps().get(device_id_)->maxTextureCubemap;
617
#endif
618
}
619
620
Vec2i cv::cuda::DeviceInfo::maxTexture1DLayered() const
621
{
622
#ifndef HAVE_CUDA
623
throw_no_cuda();
624
#else
625
return Vec2i(deviceProps().get(device_id_)->maxTexture1DLayered);
626
#endif
627
}
628
629
Vec3i cv::cuda::DeviceInfo::maxTexture2DLayered() const
630
{
631
#ifndef HAVE_CUDA
632
throw_no_cuda();
633
#else
634
return Vec3i(deviceProps().get(device_id_)->maxTexture2DLayered);
635
#endif
636
}
637
638
Vec2i cv::cuda::DeviceInfo::maxTextureCubemapLayered() const
639
{
640
#ifndef HAVE_CUDA
641
throw_no_cuda();
642
#else
643
return Vec2i(deviceProps().get(device_id_)->maxTextureCubemapLayered);
644
#endif
645
}
646
647
int cv::cuda::DeviceInfo::maxSurface1D() const
648
{
649
#ifndef HAVE_CUDA
650
throw_no_cuda();
651
#else
652
return deviceProps().get(device_id_)->maxSurface1D;
653
#endif
654
}
655
656
Vec2i cv::cuda::DeviceInfo::maxSurface2D() const
657
{
658
#ifndef HAVE_CUDA
659
throw_no_cuda();
660
#else
661
return Vec2i(deviceProps().get(device_id_)->maxSurface2D);
662
#endif
663
}
664
665
Vec3i cv::cuda::DeviceInfo::maxSurface3D() const
666
{
667
#ifndef HAVE_CUDA
668
throw_no_cuda();
669
#else
670
return Vec3i(deviceProps().get(device_id_)->maxSurface3D);
671
#endif
672
}
673
674
Vec2i cv::cuda::DeviceInfo::maxSurface1DLayered() const
675
{
676
#ifndef HAVE_CUDA
677
throw_no_cuda();
678
#else
679
return Vec2i(deviceProps().get(device_id_)->maxSurface1DLayered);
680
#endif
681
}
682
683
Vec3i cv::cuda::DeviceInfo::maxSurface2DLayered() const
684
{
685
#ifndef HAVE_CUDA
686
throw_no_cuda();
687
#else
688
return Vec3i(deviceProps().get(device_id_)->maxSurface2DLayered);
689
#endif
690
}
691
692
int cv::cuda::DeviceInfo::maxSurfaceCubemap() const
693
{
694
#ifndef HAVE_CUDA
695
throw_no_cuda();
696
#else
697
return deviceProps().get(device_id_)->maxSurfaceCubemap;
698
#endif
699
}
700
701
Vec2i cv::cuda::DeviceInfo::maxSurfaceCubemapLayered() const
702
{
703
#ifndef HAVE_CUDA
704
throw_no_cuda();
705
#else
706
return Vec2i(deviceProps().get(device_id_)->maxSurfaceCubemapLayered);
707
#endif
708
}
709
710
size_t cv::cuda::DeviceInfo::surfaceAlignment() const
711
{
712
#ifndef HAVE_CUDA
713
throw_no_cuda();
714
#else
715
return deviceProps().get(device_id_)->surfaceAlignment;
716
#endif
717
}
718
719
bool cv::cuda::DeviceInfo::concurrentKernels() const
720
{
721
#ifndef HAVE_CUDA
722
throw_no_cuda();
723
#else
724
return deviceProps().get(device_id_)->concurrentKernels != 0;
725
#endif
726
}
727
728
bool cv::cuda::DeviceInfo::ECCEnabled() const
729
{
730
#ifndef HAVE_CUDA
731
throw_no_cuda();
732
#else
733
return deviceProps().get(device_id_)->ECCEnabled != 0;
734
#endif
735
}
736
737
int cv::cuda::DeviceInfo::pciBusID() const
738
{
739
#ifndef HAVE_CUDA
740
throw_no_cuda();
741
#else
742
return deviceProps().get(device_id_)->pciBusID;
743
#endif
744
}
745
746
int cv::cuda::DeviceInfo::pciDeviceID() const
747
{
748
#ifndef HAVE_CUDA
749
throw_no_cuda();
750
#else
751
return deviceProps().get(device_id_)->pciDeviceID;
752
#endif
753
}
754
755
int cv::cuda::DeviceInfo::pciDomainID() const
756
{
757
#ifndef HAVE_CUDA
758
throw_no_cuda();
759
#else
760
return deviceProps().get(device_id_)->pciDomainID;
761
#endif
762
}
763
764
bool cv::cuda::DeviceInfo::tccDriver() const
765
{
766
#ifndef HAVE_CUDA
767
throw_no_cuda();
768
#else
769
return deviceProps().get(device_id_)->tccDriver != 0;
770
#endif
771
}
772
773
int cv::cuda::DeviceInfo::asyncEngineCount() const
774
{
775
#ifndef HAVE_CUDA
776
throw_no_cuda();
777
#else
778
return deviceProps().get(device_id_)->asyncEngineCount;
779
#endif
780
}
781
782
bool cv::cuda::DeviceInfo::unifiedAddressing() const
783
{
784
#ifndef HAVE_CUDA
785
throw_no_cuda();
786
#else
787
return deviceProps().get(device_id_)->unifiedAddressing != 0;
788
#endif
789
}
790
791
int cv::cuda::DeviceInfo::memoryClockRate() const
792
{
793
#ifndef HAVE_CUDA
794
throw_no_cuda();
795
#else
796
return deviceProps().get(device_id_)->memoryClockRate;
797
#endif
798
}
799
800
int cv::cuda::DeviceInfo::memoryBusWidth() const
801
{
802
#ifndef HAVE_CUDA
803
throw_no_cuda();
804
#else
805
return deviceProps().get(device_id_)->memoryBusWidth;
806
#endif
807
}
808
809
int cv::cuda::DeviceInfo::l2CacheSize() const
810
{
811
#ifndef HAVE_CUDA
812
throw_no_cuda();
813
#else
814
return deviceProps().get(device_id_)->l2CacheSize;
815
#endif
816
}
817
818
int cv::cuda::DeviceInfo::maxThreadsPerMultiProcessor() const
819
{
820
#ifndef HAVE_CUDA
821
throw_no_cuda();
822
#else
823
return deviceProps().get(device_id_)->maxThreadsPerMultiProcessor;
824
#endif
825
}
826
827
void cv::cuda::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
828
{
829
#ifndef HAVE_CUDA
830
CV_UNUSED(_totalMemory);
831
CV_UNUSED(_freeMemory);
832
throw_no_cuda();
833
#else
834
int prevDeviceID = getDevice();
835
if (prevDeviceID != device_id_)
836
setDevice(device_id_);
837
838
cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
839
840
if (prevDeviceID != device_id_)
841
setDevice(prevDeviceID);
842
#endif
843
}
844
845
bool cv::cuda::DeviceInfo::isCompatible() const
846
{
847
#ifndef HAVE_CUDA
848
throw_no_cuda();
849
#else
850
// Check PTX compatibility
851
if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion()))
852
return true;
853
854
// Check BIN compatibility
855
for (int i = minorVersion(); i >= 0; --i)
856
if (TargetArchs::hasBin(majorVersion(), i))
857
return true;
858
859
return false;
860
#endif
861
}
862
863
////////////////////////////////////////////////////////////////////////
864
// print info
865
866
#ifdef HAVE_CUDA
867
868
namespace
869
{
870
int convertSMVer2Cores(int major, int minor)
871
{
872
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
873
typedef struct {
874
int SM; // 0xMm (hexadecimal notation), M = SM Major version, and m = SM minor version
875
int Cores;
876
} SMtoCores;
877
878
SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } };
879
880
int index = 0;
881
while (gpuArchCoresPerSM[index].SM != -1)
882
{
883
if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
884
return gpuArchCoresPerSM[index].Cores;
885
index++;
886
}
887
888
return -1;
889
}
890
}
891
892
#endif
893
894
void cv::cuda::printCudaDeviceInfo(int device)
895
{
896
#ifndef HAVE_CUDA
897
CV_UNUSED(device);
898
throw_no_cuda();
899
#else
900
int count = getCudaEnabledDeviceCount();
901
bool valid = (device >= 0) && (device < count);
902
903
int beg = valid ? device : 0;
904
int end = valid ? device+1 : count;
905
906
printf("*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n");
907
printf("Device count: %d\n", count);
908
909
int driverVersion = 0, runtimeVersion = 0;
910
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
911
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
912
913
const char *computeMode[] = {
914
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
915
"Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
916
"Prohibited (no host thread can use ::cudaSetDevice() with this device)",
917
"Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
918
"Unknown",
919
NULL
920
};
921
922
for(int dev = beg; dev < end; ++dev)
923
{
924
cudaDeviceProp prop;
925
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
926
927
printf("\nDevice %d: \"%s\"\n", dev, prop.name);
928
printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
929
printf(" CUDA Capability Major/Minor version number: %d.%d\n", prop.major, prop.minor);
930
printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n", (float)prop.totalGlobalMem/1048576.0f, (unsigned long long) prop.totalGlobalMem);
931
932
int cores = convertSMVer2Cores(prop.major, prop.minor);
933
if (cores > 0)
934
printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, cores, cores * prop.multiProcessorCount);
935
936
printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f);
937
938
printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
939
prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
940
prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
941
printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n",
942
prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
943
prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
944
945
printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem);
946
printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock);
947
printf(" Total number of registers available per block: %d\n", prop.regsPerBlock);
948
printf(" Warp size: %d\n", prop.warpSize);
949
printf(" Maximum number of threads per block: %d\n", prop.maxThreadsPerBlock);
950
printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
951
printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
952
printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch);
953
printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment);
954
955
printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
956
printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
957
printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No");
958
printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No");
959
960
printf(" Concurrent kernel execution: %s\n", prop.concurrentKernels ? "Yes" : "No");
961
printf(" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ? "Yes" : "No");
962
printf(" Device has ECC support enabled: %s\n", prop.ECCEnabled ? "Yes" : "No");
963
printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No");
964
printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No");
965
printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID );
966
printf(" Compute Mode:\n");
967
printf(" %s \n", computeMode[prop.computeMode]);
968
}
969
970
printf("\n");
971
printf("deviceQuery, CUDA Driver = CUDART");
972
printf(", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100);
973
printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100);
974
printf(", NumDevs = %d\n\n", count);
975
976
fflush(stdout);
977
#endif
978
}
979
980
void cv::cuda::printShortCudaDeviceInfo(int device)
981
{
982
#ifndef HAVE_CUDA
983
CV_UNUSED(device);
984
throw_no_cuda();
985
#else
986
int count = getCudaEnabledDeviceCount();
987
bool valid = (device >= 0) && (device < count);
988
989
int beg = valid ? device : 0;
990
int end = valid ? device+1 : count;
991
992
int driverVersion = 0, runtimeVersion = 0;
993
cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
994
cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
995
996
for(int dev = beg; dev < end; ++dev)
997
{
998
cudaDeviceProp prop;
999
cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
1000
1001
const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
1002
printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
1003
printf(", sm_%d%d%s", prop.major, prop.minor, arch_str);
1004
1005
int cores = convertSMVer2Cores(prop.major, prop.minor);
1006
if (cores > 0)
1007
printf(", %d cores", cores * prop.multiProcessorCount);
1008
1009
printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
1010
}
1011
1012
fflush(stdout);
1013
#endif
1014
}
1015
1016
////////////////////////////////////////////////////////////////////////
1017
// Error handling
1018
1019
#ifdef HAVE_CUDA
1020
1021
namespace
1022
{
1023
#define error_entry(entry) { entry, #entry }
1024
1025
struct ErrorEntry
1026
{
1027
int code;
1028
const char* str;
1029
};
1030
1031
struct ErrorEntryComparer
1032
{
1033
int code;
1034
ErrorEntryComparer(int code_) : code(code_) {}
1035
bool operator()(const ErrorEntry& e) const { return e.code == code; }
1036
};
1037
1038
const ErrorEntry npp_errors [] =
1039
{
1040
#if defined (_MSC_VER)
1041
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
1042
#endif
1043
1044
#if NPP_VERSION < 5500
1045
error_entry( NPP_BAD_ARG_ERROR ),
1046
error_entry( NPP_COEFF_ERROR ),
1047
error_entry( NPP_RECT_ERROR ),
1048
error_entry( NPP_QUAD_ERROR ),
1049
error_entry( NPP_MEMFREE_ERR ),
1050
error_entry( NPP_MEMSET_ERR ),
1051
error_entry( NPP_MEM_ALLOC_ERR ),
1052
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
1053
error_entry( NPP_MIRROR_FLIP_ERR ),
1054
error_entry( NPP_INVALID_INPUT ),
1055
error_entry( NPP_POINTER_ERROR ),
1056
error_entry( NPP_WARNING ),
1057
error_entry( NPP_ODD_ROI_WARNING ),
1058
#else
1059
error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
1060
error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
1061
error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
1062
error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
1063
error_entry( NPP_MEMFREE_ERROR ),
1064
error_entry( NPP_MEMSET_ERROR ),
1065
error_entry( NPP_QUALITY_INDEX_ERROR ),
1066
error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
1067
error_entry( NPP_CHANNEL_ORDER_ERROR ),
1068
error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
1069
error_entry( NPP_QUADRANGLE_ERROR ),
1070
error_entry( NPP_RECTANGLE_ERROR ),
1071
error_entry( NPP_COEFFICIENT_ERROR ),
1072
error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
1073
error_entry( NPP_COI_ERROR ),
1074
error_entry( NPP_DIVISOR_ERROR ),
1075
error_entry( NPP_CHANNEL_ERROR ),
1076
error_entry( NPP_STRIDE_ERROR ),
1077
error_entry( NPP_ANCHOR_ERROR ),
1078
error_entry( NPP_MASK_SIZE_ERROR ),
1079
error_entry( NPP_MIRROR_FLIP_ERROR ),
1080
error_entry( NPP_MOMENT_00_ZERO_ERROR ),
1081
error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
1082
error_entry( NPP_THRESHOLD_ERROR ),
1083
error_entry( NPP_CONTEXT_MATCH_ERROR ),
1084
error_entry( NPP_FFT_FLAG_ERROR ),
1085
error_entry( NPP_FFT_ORDER_ERROR ),
1086
error_entry( NPP_SCALE_RANGE_ERROR ),
1087
error_entry( NPP_DATA_TYPE_ERROR ),
1088
error_entry( NPP_OUT_OFF_RANGE_ERROR ),
1089
error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
1090
error_entry( NPP_MEMORY_ALLOCATION_ERR ),
1091
error_entry( NPP_RANGE_ERROR ),
1092
error_entry( NPP_BAD_ARGUMENT_ERROR ),
1093
error_entry( NPP_NO_MEMORY_ERROR ),
1094
error_entry( NPP_ERROR_RESERVED ),
1095
error_entry( NPP_NO_OPERATION_WARNING ),
1096
error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
1097
error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
1098
#endif
1099
1100
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
1101
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
1102
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
1103
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
1104
error_entry( NPP_TEXTURE_BIND_ERROR ),
1105
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
1106
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
1107
error_entry( NPP_INTERPOLATION_ERROR ),
1108
error_entry( NPP_RESIZE_FACTOR_ERROR ),
1109
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
1110
error_entry( NPP_MEMCPY_ERROR ),
1111
error_entry( NPP_ALIGNMENT_ERROR ),
1112
error_entry( NPP_STEP_ERROR ),
1113
error_entry( NPP_SIZE_ERROR ),
1114
error_entry( NPP_NULL_POINTER_ERROR ),
1115
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
1116
error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
1117
error_entry( NPP_ERROR ),
1118
error_entry( NPP_NO_ERROR ),
1119
error_entry( NPP_SUCCESS ),
1120
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
1121
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
1122
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
1123
error_entry( NPP_DOUBLE_SIZE_WARNING )
1124
};
1125
1126
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
1127
1128
const ErrorEntry cu_errors [] =
1129
{
1130
error_entry( CUDA_SUCCESS ),
1131
error_entry( CUDA_ERROR_INVALID_VALUE ),
1132
error_entry( CUDA_ERROR_OUT_OF_MEMORY ),
1133
error_entry( CUDA_ERROR_NOT_INITIALIZED ),
1134
error_entry( CUDA_ERROR_DEINITIALIZED ),
1135
error_entry( CUDA_ERROR_PROFILER_DISABLED ),
1136
error_entry( CUDA_ERROR_PROFILER_NOT_INITIALIZED ),
1137
error_entry( CUDA_ERROR_PROFILER_ALREADY_STARTED ),
1138
error_entry( CUDA_ERROR_PROFILER_ALREADY_STOPPED ),
1139
error_entry( CUDA_ERROR_NO_DEVICE ),
1140
error_entry( CUDA_ERROR_INVALID_DEVICE ),
1141
error_entry( CUDA_ERROR_INVALID_IMAGE ),
1142
error_entry( CUDA_ERROR_INVALID_CONTEXT ),
1143
error_entry( CUDA_ERROR_CONTEXT_ALREADY_CURRENT ),
1144
error_entry( CUDA_ERROR_MAP_FAILED ),
1145
error_entry( CUDA_ERROR_UNMAP_FAILED ),
1146
error_entry( CUDA_ERROR_ARRAY_IS_MAPPED ),
1147
error_entry( CUDA_ERROR_ALREADY_MAPPED ),
1148
error_entry( CUDA_ERROR_NO_BINARY_FOR_GPU ),
1149
error_entry( CUDA_ERROR_ALREADY_ACQUIRED ),
1150
error_entry( CUDA_ERROR_NOT_MAPPED ),
1151
error_entry( CUDA_ERROR_NOT_MAPPED_AS_ARRAY ),
1152
error_entry( CUDA_ERROR_NOT_MAPPED_AS_POINTER ),
1153
error_entry( CUDA_ERROR_ECC_UNCORRECTABLE ),
1154
error_entry( CUDA_ERROR_UNSUPPORTED_LIMIT ),
1155
error_entry( CUDA_ERROR_CONTEXT_ALREADY_IN_USE ),
1156
error_entry( CUDA_ERROR_INVALID_SOURCE ),
1157
error_entry( CUDA_ERROR_FILE_NOT_FOUND ),
1158
error_entry( CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND ),
1159
error_entry( CUDA_ERROR_SHARED_OBJECT_INIT_FAILED ),
1160
error_entry( CUDA_ERROR_OPERATING_SYSTEM ),
1161
error_entry( CUDA_ERROR_INVALID_HANDLE ),
1162
error_entry( CUDA_ERROR_NOT_FOUND ),
1163
error_entry( CUDA_ERROR_NOT_READY ),
1164
error_entry( CUDA_ERROR_LAUNCH_FAILED ),
1165
error_entry( CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES ),
1166
error_entry( CUDA_ERROR_LAUNCH_TIMEOUT ),
1167
error_entry( CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING ),
1168
error_entry( CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED ),
1169
error_entry( CUDA_ERROR_PEER_ACCESS_NOT_ENABLED ),
1170
error_entry( CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE ),
1171
error_entry( CUDA_ERROR_CONTEXT_IS_DESTROYED ),
1172
error_entry( CUDA_ERROR_ASSERT ),
1173
error_entry( CUDA_ERROR_TOO_MANY_PEERS ),
1174
error_entry( CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED ),
1175
error_entry( CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED ),
1176
error_entry( CUDA_ERROR_UNKNOWN )
1177
};
1178
1179
const size_t cu_errors_num = sizeof(cu_errors) / sizeof(cu_errors[0]);
1180
1181
cv::String getErrorString(int code, const ErrorEntry* errors, size_t n)
1182
{
1183
size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
1184
1185
const char* msg = (idx != n) ? errors[idx].str : "Unknown error code";
1186
cv::String str = cv::format("%s [Code = %d]", msg, code);
1187
1188
return str;
1189
}
1190
}
1191
1192
#endif
1193
1194
String cv::cuda::getNppErrorMessage(int code)
1195
{
1196
#ifndef HAVE_CUDA
1197
CV_UNUSED(code);
1198
return String();
1199
#else
1200
return getErrorString(code, npp_errors, npp_error_num);
1201
#endif
1202
}
1203
1204
String cv::cuda::getCudaDriverApiErrorMessage(int code)
1205
{
1206
#ifndef HAVE_CUDA
1207
CV_UNUSED(code);
1208
return String();
1209
#else
1210
return getErrorString(code, cu_errors, cu_errors_num);
1211
#endif
1212
}
1213
1214