CoCalc -- cuda

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/core/src/cuda_info.cpp
¹⁶³³⁷ views
1
/*M///////////////////////////////////////////////////////////////////////////////////////
2
//
3
//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4
//
5
//  By downloading, copying, installing or using the software you agree to this license.
6
//  If you do not agree to this license, do not download, install,
7
//  copy or use the software.
8
//
9
//
10
//                           License Agreement
11
//                For Open Source Computer Vision Library
12
//
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15
// Third party copyrights are property of their respective owners.
16
//
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
19
//
20
//   * Redistribution's of source code must retain the above copyright notice,
21
//     this list of conditions and the following disclaimer.
22
//
23
//   * Redistribution's in binary form must reproduce the above copyright notice,
24
//     this list of conditions and the following disclaimer in the documentation
25
//     and/or other materials provided with the distribution.
26
//
27
//   * The name of the copyright holders may not be used to endorse or promote products
28
//     derived from this software without specific prior written permission.
29
//
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
40
//
41
//M*/
42

43
#include "precomp.hpp"
44

45
using namespace cv;
46
using namespace cv::cuda;
47

48
int cv::cuda::getCudaEnabledDeviceCount()
49
{
50
#ifndef HAVE_CUDA
51
    return 0;
52
#else
53
    int count;
54
    cudaError_t error = cudaGetDeviceCount(&count);
55

56
    if (error == cudaErrorInsufficientDriver)
57
        return -1;
58

59
    if (error == cudaErrorNoDevice)
60
        return 0;
61

62
    cudaSafeCall( error );
63
    return count;
64
#endif
65
}
66

67
void cv::cuda::setDevice(int device)
68
{
69
#ifndef HAVE_CUDA
70
    CV_UNUSED(device);
71
    throw_no_cuda();
72
#else
73
    cudaSafeCall( cudaSetDevice(device) );
74
    cudaSafeCall( cudaFree(0) );
75
#endif
76
}
77

78
int cv::cuda::getDevice()
79
{
80
#ifndef HAVE_CUDA
81
    throw_no_cuda();
82
#else
83
    int device;
84
    cudaSafeCall( cudaGetDevice(&device) );
85
    return device;
86
#endif
87
}
88

89
void cv::cuda::resetDevice()
90
{
91
#ifndef HAVE_CUDA
92
    throw_no_cuda();
93
#else
94
    cudaSafeCall( cudaDeviceReset() );
95
#endif
96
}
97

98
bool cv::cuda::deviceSupports(FeatureSet feature_set)
99
{
100
#ifndef HAVE_CUDA
101
    CV_UNUSED(feature_set);
102
    throw_no_cuda();
103
#else
104
    static int versions[] =
105
    {
106
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
107
    };
108
    static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0]));
109

110
    const int devId = getDevice();
111

112
    int version;
113

114
    if (devId < cache_size && versions[devId] >= 0)
115
    {
116
        version = versions[devId];
117
    }
118
    else
119
    {
120
        DeviceInfo dev(devId);
121
        version = dev.majorVersion() * 10 + dev.minorVersion();
122
        if (devId < cache_size)
123
            versions[devId] = version;
124
    }
125

126
    return TargetArchs::builtWith(feature_set) && (version >= feature_set);
127
#endif
128
}
129

130
////////////////////////////////////////////////////////////////////////
131
// TargetArchs
132

133
#ifdef HAVE_CUDA
134

135
namespace
136
{
137
    class CudaArch
138
    {
139
    public:
140
        CudaArch();
141

142
        bool builtWith(FeatureSet feature_set) const;
143
        bool hasPtx(int major, int minor) const;
144
        bool hasBin(int major, int minor) const;
145
        bool hasEqualOrLessPtx(int major, int minor) const;
146
        bool hasEqualOrGreaterPtx(int major, int minor) const;
147
        bool hasEqualOrGreaterBin(int major, int minor) const;
148

149
    private:
150
        static void fromStr(const char* set_as_str, std::vector<int>& arr);
151

152
        std::vector<int> bin;
153
        std::vector<int> ptx;
154
        std::vector<int> features;
155
    };
156

157
    const CudaArch cudaArch;
158

159
    CudaArch::CudaArch()
160
    {
161
        fromStr(CUDA_ARCH_BIN, bin);
162
        fromStr(CUDA_ARCH_PTX, ptx);
163
        fromStr(CUDA_ARCH_FEATURES, features);
164
    }
165

166
    bool CudaArch::builtWith(FeatureSet feature_set) const
167
    {
168
        return !features.empty() && (features.back() >= feature_set);
169
    }
170

171
    bool CudaArch::hasPtx(int major, int minor) const
172
    {
173
        return std::find(ptx.begin(), ptx.end(), major * 10 + minor) != ptx.end();
174
    }
175

176
    bool CudaArch::hasBin(int major, int minor) const
177
    {
178
        return std::find(bin.begin(), bin.end(), major * 10 + minor) != bin.end();
179
    }
180

181
    bool CudaArch::hasEqualOrLessPtx(int major, int minor) const
182
    {
183
        return !ptx.empty() && (ptx.front() <= major * 10 + minor);
184
    }
185

186
    bool CudaArch::hasEqualOrGreaterPtx(int major, int minor) const
187
    {
188
        return !ptx.empty() && (ptx.back() >= major * 10 + minor);
189
    }
190

191
    bool CudaArch::hasEqualOrGreaterBin(int major, int minor) const
192
    {
193
        return !bin.empty() && (bin.back() >= major * 10 + minor);
194
    }
195

196
    void CudaArch::fromStr(const char* set_as_str, std::vector<int>& arr)
197
    {
198
        arr.clear();
199

200
        const size_t len = strlen(set_as_str);
201

202
        size_t pos = 0;
203
        while (pos < len)
204
        {
205
            if (isspace(set_as_str[pos]))
206
            {
207
                ++pos;
208
            }
209
            else
210
            {
211
                int cur_value;
212
                int chars_read;
213
                int args_read = sscanf(set_as_str + pos, "%d%n", &cur_value, &chars_read);
214
                CV_Assert( args_read == 1 );
215

216
                arr.push_back(cur_value);
217
                pos += chars_read;
218
            }
219
        }
220

221
        std::sort(arr.begin(), arr.end());
222
    }
223
}
224

225
#endif
226

227
bool cv::cuda::TargetArchs::builtWith(cv::cuda::FeatureSet feature_set)
228
{
229
#ifndef HAVE_CUDA
230
    CV_UNUSED(feature_set);
231
    throw_no_cuda();
232
#else
233
    return cudaArch.builtWith(feature_set);
234
#endif
235
}
236

237
bool cv::cuda::TargetArchs::hasPtx(int major, int minor)
238
{
239
#ifndef HAVE_CUDA
240
    CV_UNUSED(major);
241
    CV_UNUSED(minor);
242
    throw_no_cuda();
243
#else
244
    return cudaArch.hasPtx(major, minor);
245
#endif
246
}
247

248
bool cv::cuda::TargetArchs::hasBin(int major, int minor)
249
{
250
#ifndef HAVE_CUDA
251
    CV_UNUSED(major);
252
    CV_UNUSED(minor);
253
    throw_no_cuda();
254
#else
255
    return cudaArch.hasBin(major, minor);
256
#endif
257
}
258

259
bool cv::cuda::TargetArchs::hasEqualOrLessPtx(int major, int minor)
260
{
261
#ifndef HAVE_CUDA
262
    CV_UNUSED(major);
263
    CV_UNUSED(minor);
264
    throw_no_cuda();
265
#else
266
    return cudaArch.hasEqualOrLessPtx(major, minor);
267
#endif
268
}
269

270
bool cv::cuda::TargetArchs::hasEqualOrGreaterPtx(int major, int minor)
271
{
272
#ifndef HAVE_CUDA
273
    CV_UNUSED(major);
274
    CV_UNUSED(minor);
275
    throw_no_cuda();
276
#else
277
    return cudaArch.hasEqualOrGreaterPtx(major, minor);
278
#endif
279
}
280

281
bool cv::cuda::TargetArchs::hasEqualOrGreaterBin(int major, int minor)
282
{
283
#ifndef HAVE_CUDA
284
    CV_UNUSED(major);
285
    CV_UNUSED(minor);
286
    throw_no_cuda();
287
#else
288
    return cudaArch.hasEqualOrGreaterBin(major, minor);
289
#endif
290
}
291

292
////////////////////////////////////////////////////////////////////////
293
// DeviceInfo
294

295
#ifdef HAVE_CUDA
296

297
namespace
298
{
299
    class DeviceProps
300
    {
301
    public:
302
        DeviceProps();
303

304
        const cudaDeviceProp* get(int devID) const;
305

306
    private:
307
        std::vector<cudaDeviceProp> props_;
308
    };
309

310
    DeviceProps::DeviceProps()
311
    {
312
        int count = getCudaEnabledDeviceCount();
313

314
        if (count > 0)
315
        {
316
            props_.resize(count);
317

318
            for (int devID = 0; devID < count; ++devID)
319
            {
320
                cudaSafeCall( cudaGetDeviceProperties(&props_[devID], devID) );
321
            }
322
        }
323
    }
324

325
    const cudaDeviceProp* DeviceProps::get(int devID) const
326
    {
327
        CV_Assert( static_cast<size_t>(devID) < props_.size() );
328

329
        return &props_[devID];
330
    }
331

332
    DeviceProps& deviceProps()
333
    {
334
        static DeviceProps props;
335
        return props;
336
    }
337
}
338

339
#endif
340

341
const char* cv::cuda::DeviceInfo::name() const
342
{
343
#ifndef HAVE_CUDA
344
    throw_no_cuda();
345
#else
346
    return deviceProps().get(device_id_)->name;
347
#endif
348
}
349

350
size_t cv::cuda::DeviceInfo::totalGlobalMem() const
351
{
352
#ifndef HAVE_CUDA
353
    throw_no_cuda();
354
#else
355
    return deviceProps().get(device_id_)->totalGlobalMem;
356
#endif
357
}
358

359
size_t cv::cuda::DeviceInfo::sharedMemPerBlock() const
360
{
361
#ifndef HAVE_CUDA
362
    throw_no_cuda();
363
#else
364
    return deviceProps().get(device_id_)->sharedMemPerBlock;
365
#endif
366
}
367

368
int cv::cuda::DeviceInfo::regsPerBlock() const
369
{
370
#ifndef HAVE_CUDA
371
    throw_no_cuda();
372
#else
373
    return deviceProps().get(device_id_)->regsPerBlock;
374
#endif
375
}
376

377
int cv::cuda::DeviceInfo::warpSize() const
378
{
379
#ifndef HAVE_CUDA
380
    throw_no_cuda();
381
#else
382
    return deviceProps().get(device_id_)->warpSize;
383
#endif
384
}
385

386
size_t cv::cuda::DeviceInfo::memPitch() const
387
{
388
#ifndef HAVE_CUDA
389
    throw_no_cuda();
390
#else
391
    return deviceProps().get(device_id_)->memPitch;
392
#endif
393
}
394

395
int cv::cuda::DeviceInfo::maxThreadsPerBlock() const
396
{
397
#ifndef HAVE_CUDA
398
    throw_no_cuda();
399
#else
400
    return deviceProps().get(device_id_)->maxThreadsPerBlock;
401
#endif
402
}
403

404
Vec3i cv::cuda::DeviceInfo::maxThreadsDim() const
405
{
406
#ifndef HAVE_CUDA
407
    throw_no_cuda();
408
#else
409
    return Vec3i(deviceProps().get(device_id_)->maxThreadsDim);
410
#endif
411
}
412

413
Vec3i cv::cuda::DeviceInfo::maxGridSize() const
414
{
415
#ifndef HAVE_CUDA
416
    throw_no_cuda();
417
#else
418
    return Vec3i(deviceProps().get(device_id_)->maxGridSize);
419
#endif
420
}
421

422
int cv::cuda::DeviceInfo::clockRate() const
423
{
424
#ifndef HAVE_CUDA
425
    throw_no_cuda();
426
#else
427
    return deviceProps().get(device_id_)->clockRate;
428
#endif
429
}
430

431
size_t cv::cuda::DeviceInfo::totalConstMem() const
432
{
433
#ifndef HAVE_CUDA
434
    throw_no_cuda();
435
#else
436
    return deviceProps().get(device_id_)->totalConstMem;
437
#endif
438
}
439

440
int cv::cuda::DeviceInfo::majorVersion() const
441
{
442
#ifndef HAVE_CUDA
443
    throw_no_cuda();
444
#else
445
    return deviceProps().get(device_id_)->major;
446
#endif
447
}
448

449
int cv::cuda::DeviceInfo::minorVersion() const
450
{
451
#ifndef HAVE_CUDA
452
    throw_no_cuda();
453
#else
454
    return deviceProps().get(device_id_)->minor;
455
#endif
456
}
457

458
size_t cv::cuda::DeviceInfo::textureAlignment() const
459
{
460
#ifndef HAVE_CUDA
461
    throw_no_cuda();
462
#else
463
    return deviceProps().get(device_id_)->textureAlignment;
464
#endif
465
}
466

467
size_t cv::cuda::DeviceInfo::texturePitchAlignment() const
468
{
469
#ifndef HAVE_CUDA
470
    throw_no_cuda();
471
#else
472
    return deviceProps().get(device_id_)->texturePitchAlignment;
473
#endif
474
}
475

476
int cv::cuda::DeviceInfo::multiProcessorCount() const
477
{
478
#ifndef HAVE_CUDA
479
    throw_no_cuda();
480
#else
481
    return deviceProps().get(device_id_)->multiProcessorCount;
482
#endif
483
}
484

485
bool cv::cuda::DeviceInfo::kernelExecTimeoutEnabled() const
486
{
487
#ifndef HAVE_CUDA
488
    throw_no_cuda();
489
#else
490
    return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0;
491
#endif
492
}
493

494
bool cv::cuda::DeviceInfo::integrated() const
495
{
496
#ifndef HAVE_CUDA
497
    throw_no_cuda();
498
#else
499
    return deviceProps().get(device_id_)->integrated != 0;
500
#endif
501
}
502

503
bool cv::cuda::DeviceInfo::canMapHostMemory() const
504
{
505
#ifndef HAVE_CUDA
506
    throw_no_cuda();
507
#else
508
    return deviceProps().get(device_id_)->canMapHostMemory != 0;
509
#endif
510
}
511

512
DeviceInfo::ComputeMode cv::cuda::DeviceInfo::computeMode() const
513
{
514
#ifndef HAVE_CUDA
515
    throw_no_cuda();
516
#else
517
    static const ComputeMode tbl[] =
518
    {
519
        ComputeModeDefault,
520
        ComputeModeExclusive,
521
        ComputeModeProhibited,
522
        ComputeModeExclusiveProcess
523
    };
524

525
    return tbl[deviceProps().get(device_id_)->computeMode];
526
#endif
527
}
528

529
int cv::cuda::DeviceInfo::maxTexture1D() const
530
{
531
#ifndef HAVE_CUDA
532
    throw_no_cuda();
533
#else
534
    return deviceProps().get(device_id_)->maxTexture1D;
535
#endif
536
}
537

538
int cv::cuda::DeviceInfo::maxTexture1DMipmap() const
539
{
540
#ifndef HAVE_CUDA
541
    throw_no_cuda();
542
#else
543
    #if CUDA_VERSION >= 5000
544
        return deviceProps().get(device_id_)->maxTexture1DMipmap;
545
    #else
546
        CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0");
547
        return 0;
548
    #endif
549
#endif
550
}
551

552
int cv::cuda::DeviceInfo::maxTexture1DLinear() const
553
{
554
#ifndef HAVE_CUDA
555
    throw_no_cuda();
556
#else
557
    return deviceProps().get(device_id_)->maxTexture1DLinear;
558
#endif
559
}
560

561
Vec2i cv::cuda::DeviceInfo::maxTexture2D() const
562
{
563
#ifndef HAVE_CUDA
564
    throw_no_cuda();
565
#else
566
    return Vec2i(deviceProps().get(device_id_)->maxTexture2D);
567
#endif
568
}
569

570
Vec2i cv::cuda::DeviceInfo::maxTexture2DMipmap() const
571
{
572
#ifndef HAVE_CUDA
573
    throw_no_cuda();
574
#else
575
    #if CUDA_VERSION >= 5000
576
        return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap);
577
    #else
578
        CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0");
579
        return Vec2i();
580
    #endif
581
#endif
582
}
583

584
Vec3i cv::cuda::DeviceInfo::maxTexture2DLinear() const
585
{
586
#ifndef HAVE_CUDA
587
    throw_no_cuda();
588
#else
589
    return Vec3i(deviceProps().get(device_id_)->maxTexture2DLinear);
590
#endif
591
}
592

593
Vec2i cv::cuda::DeviceInfo::maxTexture2DGather() const
594
{
595
#ifndef HAVE_CUDA
596
    throw_no_cuda();
597
#else
598
    return Vec2i(deviceProps().get(device_id_)->maxTexture2DGather);
599
#endif
600
}
601

602
Vec3i cv::cuda::DeviceInfo::maxTexture3D() const
603
{
604
#ifndef HAVE_CUDA
605
    throw_no_cuda();
606
#else
607
    return Vec3i(deviceProps().get(device_id_)->maxTexture3D);
608
#endif
609
}
610

611
int cv::cuda::DeviceInfo::maxTextureCubemap() const
612
{
613
#ifndef HAVE_CUDA
614
    throw_no_cuda();
615
#else
616
    return deviceProps().get(device_id_)->maxTextureCubemap;
617
#endif
618
}
619

620
Vec2i cv::cuda::DeviceInfo::maxTexture1DLayered() const
621
{
622
#ifndef HAVE_CUDA
623
    throw_no_cuda();
624
#else
625
    return Vec2i(deviceProps().get(device_id_)->maxTexture1DLayered);
626
#endif
627
}
628

629
Vec3i cv::cuda::DeviceInfo::maxTexture2DLayered() const
630
{
631
#ifndef HAVE_CUDA
632
    throw_no_cuda();
633
#else
634
    return Vec3i(deviceProps().get(device_id_)->maxTexture2DLayered);
635
#endif
636
}
637

638
Vec2i cv::cuda::DeviceInfo::maxTextureCubemapLayered() const
639
{
640
#ifndef HAVE_CUDA
641
    throw_no_cuda();
642
#else
643
    return Vec2i(deviceProps().get(device_id_)->maxTextureCubemapLayered);
644
#endif
645
}
646

647
int cv::cuda::DeviceInfo::maxSurface1D() const
648
{
649
#ifndef HAVE_CUDA
650
    throw_no_cuda();
651
#else
652
    return deviceProps().get(device_id_)->maxSurface1D;
653
#endif
654
}
655

656
Vec2i cv::cuda::DeviceInfo::maxSurface2D() const
657
{
658
#ifndef HAVE_CUDA
659
    throw_no_cuda();
660
#else
661
    return Vec2i(deviceProps().get(device_id_)->maxSurface2D);
662
#endif
663
}
664

665
Vec3i cv::cuda::DeviceInfo::maxSurface3D() const
666
{
667
#ifndef HAVE_CUDA
668
    throw_no_cuda();
669
#else
670
    return Vec3i(deviceProps().get(device_id_)->maxSurface3D);
671
#endif
672
}
673

674
Vec2i cv::cuda::DeviceInfo::maxSurface1DLayered() const
675
{
676
#ifndef HAVE_CUDA
677
    throw_no_cuda();
678
#else
679
    return Vec2i(deviceProps().get(device_id_)->maxSurface1DLayered);
680
#endif
681
}
682

683
Vec3i cv::cuda::DeviceInfo::maxSurface2DLayered() const
684
{
685
#ifndef HAVE_CUDA
686
    throw_no_cuda();
687
#else
688
    return Vec3i(deviceProps().get(device_id_)->maxSurface2DLayered);
689
#endif
690
}
691

692
int cv::cuda::DeviceInfo::maxSurfaceCubemap() const
693
{
694
#ifndef HAVE_CUDA
695
    throw_no_cuda();
696
#else
697
    return deviceProps().get(device_id_)->maxSurfaceCubemap;
698
#endif
699
}
700

701
Vec2i cv::cuda::DeviceInfo::maxSurfaceCubemapLayered() const
702
{
703
#ifndef HAVE_CUDA
704
    throw_no_cuda();
705
#else
706
    return Vec2i(deviceProps().get(device_id_)->maxSurfaceCubemapLayered);
707
#endif
708
}
709

710
size_t cv::cuda::DeviceInfo::surfaceAlignment() const
711
{
712
#ifndef HAVE_CUDA
713
    throw_no_cuda();
714
#else
715
    return deviceProps().get(device_id_)->surfaceAlignment;
716
#endif
717
}
718

719
bool cv::cuda::DeviceInfo::concurrentKernels() const
720
{
721
#ifndef HAVE_CUDA
722
    throw_no_cuda();
723
#else
724
    return deviceProps().get(device_id_)->concurrentKernels != 0;
725
#endif
726
}
727

728
bool cv::cuda::DeviceInfo::ECCEnabled() const
729
{
730
#ifndef HAVE_CUDA
731
    throw_no_cuda();
732
#else
733
    return deviceProps().get(device_id_)->ECCEnabled != 0;
734
#endif
735
}
736

737
int cv::cuda::DeviceInfo::pciBusID() const
738
{
739
#ifndef HAVE_CUDA
740
    throw_no_cuda();
741
#else
742
    return deviceProps().get(device_id_)->pciBusID;
743
#endif
744
}
745

746
int cv::cuda::DeviceInfo::pciDeviceID() const
747
{
748
#ifndef HAVE_CUDA
749
    throw_no_cuda();
750
#else
751
    return deviceProps().get(device_id_)->pciDeviceID;
752
#endif
753
}
754

755
int cv::cuda::DeviceInfo::pciDomainID() const
756
{
757
#ifndef HAVE_CUDA
758
    throw_no_cuda();
759
#else
760
    return deviceProps().get(device_id_)->pciDomainID;
761
#endif
762
}
763

764
bool cv::cuda::DeviceInfo::tccDriver() const
765
{
766
#ifndef HAVE_CUDA
767
    throw_no_cuda();
768
#else
769
    return deviceProps().get(device_id_)->tccDriver != 0;
770
#endif
771
}
772

773
int cv::cuda::DeviceInfo::asyncEngineCount() const
774
{
775
#ifndef HAVE_CUDA
776
    throw_no_cuda();
777
#else
778
    return deviceProps().get(device_id_)->asyncEngineCount;
779
#endif
780
}
781

782
bool cv::cuda::DeviceInfo::unifiedAddressing() const
783
{
784
#ifndef HAVE_CUDA
785
    throw_no_cuda();
786
#else
787
    return deviceProps().get(device_id_)->unifiedAddressing != 0;
788
#endif
789
}
790

791
int cv::cuda::DeviceInfo::memoryClockRate() const
792
{
793
#ifndef HAVE_CUDA
794
    throw_no_cuda();
795
#else
796
    return deviceProps().get(device_id_)->memoryClockRate;
797
#endif
798
}
799

800
int cv::cuda::DeviceInfo::memoryBusWidth() const
801
{
802
#ifndef HAVE_CUDA
803
    throw_no_cuda();
804
#else
805
    return deviceProps().get(device_id_)->memoryBusWidth;
806
#endif
807
}
808

809
int cv::cuda::DeviceInfo::l2CacheSize() const
810
{
811
#ifndef HAVE_CUDA
812
    throw_no_cuda();
813
#else
814
    return deviceProps().get(device_id_)->l2CacheSize;
815
#endif
816
}
817

818
int cv::cuda::DeviceInfo::maxThreadsPerMultiProcessor() const
819
{
820
#ifndef HAVE_CUDA
821
    throw_no_cuda();
822
#else
823
    return deviceProps().get(device_id_)->maxThreadsPerMultiProcessor;
824
#endif
825
}
826

827
void cv::cuda::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const
828
{
829
#ifndef HAVE_CUDA
830
    CV_UNUSED(_totalMemory);
831
    CV_UNUSED(_freeMemory);
832
    throw_no_cuda();
833
#else
834
    int prevDeviceID = getDevice();
835
    if (prevDeviceID != device_id_)
836
        setDevice(device_id_);
837

838
    cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) );
839

840
    if (prevDeviceID != device_id_)
841
        setDevice(prevDeviceID);
842
#endif
843
}
844

845
bool cv::cuda::DeviceInfo::isCompatible() const
846
{
847
#ifndef HAVE_CUDA
848
    throw_no_cuda();
849
#else
850
    // Check PTX compatibility
851
    if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion()))
852
        return true;
853

854
    // Check BIN compatibility
855
    for (int i = minorVersion(); i >= 0; --i)
856
        if (TargetArchs::hasBin(majorVersion(), i))
857
            return true;
858

859
    return false;
860
#endif
861
}
862

863
////////////////////////////////////////////////////////////////////////
864
// print info
865

866
#ifdef HAVE_CUDA
867

868
namespace
869
{
870
    int convertSMVer2Cores(int major, int minor)
871
    {
872
        // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
873
        typedef struct {
874
            int SM; // 0xMm (hexadecimal notation), M = SM Major version, and m = SM minor version
875
            int Cores;
876
        } SMtoCores;
877

878
        SMtoCores gpuArchCoresPerSM[] =  { { 0x10,  8 }, { 0x11,  8 }, { 0x12,  8 }, { 0x13,  8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 }  };
879

880
        int index = 0;
881
        while (gpuArchCoresPerSM[index].SM != -1)
882
        {
883
            if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) )
884
                return gpuArchCoresPerSM[index].Cores;
885
            index++;
886
        }
887

888
        return -1;
889
    }
890
}
891

892
#endif
893

894
void cv::cuda::printCudaDeviceInfo(int device)
895
{
896
#ifndef HAVE_CUDA
897
    CV_UNUSED(device);
898
    throw_no_cuda();
899
#else
900
    int count = getCudaEnabledDeviceCount();
901
    bool valid = (device >= 0) && (device < count);
902

903
    int beg = valid ? device   : 0;
904
    int end = valid ? device+1 : count;
905

906
    printf("*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n");
907
    printf("Device count: %d\n", count);
908

909
    int driverVersion = 0, runtimeVersion = 0;
910
    cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
911
    cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
912

913
    const char *computeMode[] = {
914
        "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
915
        "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
916
        "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
917
        "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
918
        "Unknown",
919
        NULL
920
    };
921

922
    for(int dev = beg; dev < end; ++dev)
923
    {
924
        cudaDeviceProp prop;
925
        cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
926

927
        printf("\nDevice %d: \"%s\"\n", dev, prop.name);
928
        printf("  CUDA Driver Version / Runtime Version          %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
929
        printf("  CUDA Capability Major/Minor version number:    %d.%d\n", prop.major, prop.minor);
930
        printf("  Total amount of global memory:                 %.0f MBytes (%llu bytes)\n", (float)prop.totalGlobalMem/1048576.0f, (unsigned long long) prop.totalGlobalMem);
931

932
        int cores = convertSMVer2Cores(prop.major, prop.minor);
933
        if (cores > 0)
934
            printf("  (%2d) Multiprocessors x (%2d) CUDA Cores/MP:     %d CUDA Cores\n", prop.multiProcessorCount, cores, cores * prop.multiProcessorCount);
935

936
        printf("  GPU Clock Speed:                               %.2f GHz\n", prop.clockRate * 1e-6f);
937

938
        printf("  Max Texture Dimension Size (x,y,z)             1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n",
939
            prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1],
940
            prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]);
941
        printf("  Max Layered Texture Size (dim) x layers        1D=(%d) x %d, 2D=(%d,%d) x %d\n",
942
            prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1],
943
            prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]);
944

945
        printf("  Total amount of constant memory:               %u bytes\n", (int)prop.totalConstMem);
946
        printf("  Total amount of shared memory per block:       %u bytes\n", (int)prop.sharedMemPerBlock);
947
        printf("  Total number of registers available per block: %d\n", prop.regsPerBlock);
948
        printf("  Warp size:                                     %d\n", prop.warpSize);
949
        printf("  Maximum number of threads per block:           %d\n", prop.maxThreadsPerBlock);
950
        printf("  Maximum sizes of each dimension of a block:    %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
951
        printf("  Maximum sizes of each dimension of a grid:     %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1],  prop.maxGridSize[2]);
952
        printf("  Maximum memory pitch:                          %u bytes\n", (int)prop.memPitch);
953
        printf("  Texture alignment:                             %u bytes\n", (int)prop.textureAlignment);
954

955
        printf("  Concurrent copy and execution:                 %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount);
956
        printf("  Run time limit on kernels:                     %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No");
957
        printf("  Integrated GPU sharing Host Memory:            %s\n", prop.integrated ? "Yes" : "No");
958
        printf("  Support host page-locked memory mapping:       %s\n", prop.canMapHostMemory ? "Yes" : "No");
959

960
        printf("  Concurrent kernel execution:                   %s\n", prop.concurrentKernels ? "Yes" : "No");
961
        printf("  Alignment requirement for Surfaces:            %s\n", prop.surfaceAlignment ? "Yes" : "No");
962
        printf("  Device has ECC support enabled:                %s\n", prop.ECCEnabled ? "Yes" : "No");
963
        printf("  Device is using TCC driver mode:               %s\n", prop.tccDriver ? "Yes" : "No");
964
        printf("  Device supports Unified Addressing (UVA):      %s\n", prop.unifiedAddressing ? "Yes" : "No");
965
        printf("  Device PCI Bus ID / PCI location ID:           %d / %d\n", prop.pciBusID, prop.pciDeviceID );
966
        printf("  Compute Mode:\n");
967
        printf("      %s \n", computeMode[prop.computeMode]);
968
    }
969

970
    printf("\n");
971
    printf("deviceQuery, CUDA Driver = CUDART");
972
    printf(", CUDA Driver Version  = %d.%d", driverVersion / 1000, driverVersion % 100);
973
    printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100);
974
    printf(", NumDevs = %d\n\n", count);
975

976
    fflush(stdout);
977
#endif
978
}
979

980
void cv::cuda::printShortCudaDeviceInfo(int device)
981
{
982
#ifndef HAVE_CUDA
983
    CV_UNUSED(device);
984
    throw_no_cuda();
985
#else
986
    int count = getCudaEnabledDeviceCount();
987
    bool valid = (device >= 0) && (device < count);
988

989
    int beg = valid ? device   : 0;
990
    int end = valid ? device+1 : count;
991

992
    int driverVersion = 0, runtimeVersion = 0;
993
    cudaSafeCall( cudaDriverGetVersion(&driverVersion) );
994
    cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) );
995

996
    for(int dev = beg; dev < end; ++dev)
997
    {
998
        cudaDeviceProp prop;
999
        cudaSafeCall( cudaGetDeviceProperties(&prop, dev) );
1000

1001
        const char *arch_str = prop.major < 2 ? " (not Fermi)" : "";
1002
        printf("Device %d:  \"%s\"  %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f);
1003
        printf(", sm_%d%d%s", prop.major, prop.minor, arch_str);
1004

1005
        int cores = convertSMVer2Cores(prop.major, prop.minor);
1006
        if (cores > 0)
1007
            printf(", %d cores", cores * prop.multiProcessorCount);
1008

1009
        printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100);
1010
    }
1011

1012
    fflush(stdout);
1013
#endif
1014
}
1015

1016
////////////////////////////////////////////////////////////////////////
1017
// Error handling
1018

1019
#ifdef HAVE_CUDA
1020

1021
namespace
1022
{
1023
    #define error_entry(entry)  { entry, #entry }
1024

1025
    struct ErrorEntry
1026
    {
1027
        int code;
1028
        const char* str;
1029
    };
1030

1031
    struct ErrorEntryComparer
1032
    {
1033
        int code;
1034
        ErrorEntryComparer(int code_) : code(code_) {}
1035
        bool operator()(const ErrorEntry& e) const { return e.code == code; }
1036
    };
1037

1038
    const ErrorEntry npp_errors [] =
1039
    {
1040
    #if defined (_MSC_VER)
1041
        error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
1042
    #endif
1043

1044
    #if NPP_VERSION < 5500
1045
        error_entry( NPP_BAD_ARG_ERROR ),
1046
        error_entry( NPP_COEFF_ERROR ),
1047
        error_entry( NPP_RECT_ERROR ),
1048
        error_entry( NPP_QUAD_ERROR ),
1049
        error_entry( NPP_MEMFREE_ERR ),
1050
        error_entry( NPP_MEMSET_ERR ),
1051
        error_entry( NPP_MEM_ALLOC_ERR ),
1052
        error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
1053
        error_entry( NPP_MIRROR_FLIP_ERR ),
1054
        error_entry( NPP_INVALID_INPUT ),
1055
        error_entry( NPP_POINTER_ERROR ),
1056
        error_entry( NPP_WARNING ),
1057
        error_entry( NPP_ODD_ROI_WARNING ),
1058
    #else
1059
        error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
1060
        error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
1061
        error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
1062
        error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
1063
        error_entry( NPP_MEMFREE_ERROR ),
1064
        error_entry( NPP_MEMSET_ERROR ),
1065
        error_entry( NPP_QUALITY_INDEX_ERROR ),
1066
        error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
1067
        error_entry( NPP_CHANNEL_ORDER_ERROR ),
1068
        error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
1069
        error_entry( NPP_QUADRANGLE_ERROR ),
1070
        error_entry( NPP_RECTANGLE_ERROR ),
1071
        error_entry( NPP_COEFFICIENT_ERROR ),
1072
        error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
1073
        error_entry( NPP_COI_ERROR ),
1074
        error_entry( NPP_DIVISOR_ERROR ),
1075
        error_entry( NPP_CHANNEL_ERROR ),
1076
        error_entry( NPP_STRIDE_ERROR ),
1077
        error_entry( NPP_ANCHOR_ERROR ),
1078
        error_entry( NPP_MASK_SIZE_ERROR ),
1079
        error_entry( NPP_MIRROR_FLIP_ERROR ),
1080
        error_entry( NPP_MOMENT_00_ZERO_ERROR ),
1081
        error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
1082
        error_entry( NPP_THRESHOLD_ERROR ),
1083
        error_entry( NPP_CONTEXT_MATCH_ERROR ),
1084
        error_entry( NPP_FFT_FLAG_ERROR ),
1085
        error_entry( NPP_FFT_ORDER_ERROR ),
1086
        error_entry( NPP_SCALE_RANGE_ERROR ),
1087
        error_entry( NPP_DATA_TYPE_ERROR ),
1088
        error_entry( NPP_OUT_OFF_RANGE_ERROR ),
1089
        error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
1090
        error_entry( NPP_MEMORY_ALLOCATION_ERR ),
1091
        error_entry( NPP_RANGE_ERROR ),
1092
        error_entry( NPP_BAD_ARGUMENT_ERROR ),
1093
        error_entry( NPP_NO_MEMORY_ERROR ),
1094
        error_entry( NPP_ERROR_RESERVED ),
1095
        error_entry( NPP_NO_OPERATION_WARNING ),
1096
        error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
1097
        error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
1098
    #endif
1099

1100
        error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
1101
        error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
1102
        error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
1103
        error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
1104
        error_entry( NPP_TEXTURE_BIND_ERROR ),
1105
        error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
1106
        error_entry( NPP_NOT_EVEN_STEP_ERROR ),
1107
        error_entry( NPP_INTERPOLATION_ERROR ),
1108
        error_entry( NPP_RESIZE_FACTOR_ERROR ),
1109
        error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
1110
        error_entry( NPP_MEMCPY_ERROR ),
1111
        error_entry( NPP_ALIGNMENT_ERROR ),
1112
        error_entry( NPP_STEP_ERROR ),
1113
        error_entry( NPP_SIZE_ERROR ),
1114
        error_entry( NPP_NULL_POINTER_ERROR ),
1115
        error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
1116
        error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
1117
        error_entry( NPP_ERROR ),
1118
        error_entry( NPP_NO_ERROR ),
1119
        error_entry( NPP_SUCCESS ),
1120
        error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
1121
        error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
1122
        error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
1123
        error_entry( NPP_DOUBLE_SIZE_WARNING )
1124
    };
1125

1126
    const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
1127

1128
    const ErrorEntry cu_errors [] =
1129
    {
1130
        error_entry( CUDA_SUCCESS                              ),
1131
        error_entry( CUDA_ERROR_INVALID_VALUE                  ),
1132
        error_entry( CUDA_ERROR_OUT_OF_MEMORY                  ),
1133
        error_entry( CUDA_ERROR_NOT_INITIALIZED                ),
1134
        error_entry( CUDA_ERROR_DEINITIALIZED                  ),
1135
        error_entry( CUDA_ERROR_PROFILER_DISABLED              ),
1136
        error_entry( CUDA_ERROR_PROFILER_NOT_INITIALIZED       ),
1137
        error_entry( CUDA_ERROR_PROFILER_ALREADY_STARTED       ),
1138
        error_entry( CUDA_ERROR_PROFILER_ALREADY_STOPPED       ),
1139
        error_entry( CUDA_ERROR_NO_DEVICE                      ),
1140
        error_entry( CUDA_ERROR_INVALID_DEVICE                 ),
1141
        error_entry( CUDA_ERROR_INVALID_IMAGE                  ),
1142
        error_entry( CUDA_ERROR_INVALID_CONTEXT                ),
1143
        error_entry( CUDA_ERROR_CONTEXT_ALREADY_CURRENT        ),
1144
        error_entry( CUDA_ERROR_MAP_FAILED                     ),
1145
        error_entry( CUDA_ERROR_UNMAP_FAILED                   ),
1146
        error_entry( CUDA_ERROR_ARRAY_IS_MAPPED                ),
1147
        error_entry( CUDA_ERROR_ALREADY_MAPPED                 ),
1148
        error_entry( CUDA_ERROR_NO_BINARY_FOR_GPU              ),
1149
        error_entry( CUDA_ERROR_ALREADY_ACQUIRED               ),
1150
        error_entry( CUDA_ERROR_NOT_MAPPED                     ),
1151
        error_entry( CUDA_ERROR_NOT_MAPPED_AS_ARRAY            ),
1152
        error_entry( CUDA_ERROR_NOT_MAPPED_AS_POINTER          ),
1153
        error_entry( CUDA_ERROR_ECC_UNCORRECTABLE              ),
1154
        error_entry( CUDA_ERROR_UNSUPPORTED_LIMIT              ),
1155
        error_entry( CUDA_ERROR_CONTEXT_ALREADY_IN_USE         ),
1156
        error_entry( CUDA_ERROR_INVALID_SOURCE                 ),
1157
        error_entry( CUDA_ERROR_FILE_NOT_FOUND                 ),
1158
        error_entry( CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND ),
1159
        error_entry( CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      ),
1160
        error_entry( CUDA_ERROR_OPERATING_SYSTEM               ),
1161
        error_entry( CUDA_ERROR_INVALID_HANDLE                 ),
1162
        error_entry( CUDA_ERROR_NOT_FOUND                      ),
1163
        error_entry( CUDA_ERROR_NOT_READY                      ),
1164
        error_entry( CUDA_ERROR_LAUNCH_FAILED                  ),
1165
        error_entry( CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        ),
1166
        error_entry( CUDA_ERROR_LAUNCH_TIMEOUT                 ),
1167
        error_entry( CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  ),
1168
        error_entry( CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED    ),
1169
        error_entry( CUDA_ERROR_PEER_ACCESS_NOT_ENABLED        ),
1170
        error_entry( CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE         ),
1171
        error_entry( CUDA_ERROR_CONTEXT_IS_DESTROYED           ),
1172
        error_entry( CUDA_ERROR_ASSERT                         ),
1173
        error_entry( CUDA_ERROR_TOO_MANY_PEERS                 ),
1174
        error_entry( CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED ),
1175
        error_entry( CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED     ),
1176
        error_entry( CUDA_ERROR_UNKNOWN                        )
1177
    };
1178

1179
    const size_t cu_errors_num = sizeof(cu_errors) / sizeof(cu_errors[0]);
1180

1181
    cv::String getErrorString(int code, const ErrorEntry* errors, size_t n)
1182
    {
1183
        size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
1184

1185
        const char* msg = (idx != n) ? errors[idx].str : "Unknown error code";
1186
        cv::String str = cv::format("%s [Code = %d]", msg, code);
1187

1188
        return str;
1189
    }
1190
}
1191

1192
#endif
1193

1194
String cv::cuda::getNppErrorMessage(int code)
1195
{
1196
#ifndef HAVE_CUDA
1197
    CV_UNUSED(code);
1198
    return String();
1199
#else
1200
    return getErrorString(code, npp_errors, npp_error_num);
1201
#endif
1202
}
1203

1204
String cv::cuda::getCudaDriverApiErrorMessage(int code)
1205
{
1206
#ifndef HAVE_CUDA
1207
    CV_UNUSED(code);
1208
    return String();
1209
#else
1210
    return getErrorString(code, cu_errors, cu_errors_num);
1211
#endif
1212
}
1213

1214
Product

Resources

Company