CoCalc -- multiband

GitHub Repository: Tetragramm/opencv
Path: blob/master/modules/stitching/src/cuda/multiband_blend.cu
¹⁶³³⁷ views
1
#if !defined CUDA_DISABLER
2

3
#include "opencv2/core/cuda/common.hpp"
4
#include "opencv2/core/types.hpp"
5

6
namespace cv { namespace cuda { namespace device
7
{
8
    namespace blend
9
    {
10
        __global__ void addSrcWeightKernel16S(const PtrStep<short> src, const PtrStep<short> src_weight,
11
            PtrStep<short> dst, PtrStep<short> dst_weight, int rows, int cols)
12
        {
13
            int x = blockIdx.x * blockDim.x + threadIdx.x;
14
            int y = blockIdx.y * blockDim.y + threadIdx.y;
15

16
            if (y < rows && x < cols)
17
            {
18
                const short3 v = ((const short3*)src.ptr(y))[x];
19
                short w = src_weight.ptr(y)[x];
20
                ((short3*)dst.ptr(y))[x].x += short((v.x * w) >> 8);
21
                ((short3*)dst.ptr(y))[x].y += short((v.y * w) >> 8);
22
                ((short3*)dst.ptr(y))[x].z += short((v.z * w) >> 8);
23
                dst_weight.ptr(y)[x] += w;
24
            }
25
        }
26

27
        void addSrcWeightGpu16S(const PtrStep<short> src, const PtrStep<short> src_weight,
28
            PtrStep<short> dst, PtrStep<short> dst_weight, cv::Rect &rc)
29
        {
30
            dim3 threads(16, 16);
31
            dim3 grid(divUp(rc.width, threads.x), divUp(rc.height, threads.y));
32
            addSrcWeightKernel16S<<<grid, threads>>>(src, src_weight, dst, dst_weight, rc.height, rc.width);
33
            cudaSafeCall(cudaGetLastError());
34
        }
35

36
        __global__ void addSrcWeightKernel32F(const PtrStep<short> src, const PtrStepf src_weight,
37
            PtrStep<short> dst, PtrStepf dst_weight, int rows, int cols)
38
        {
39
            int x = blockIdx.x * blockDim.x + threadIdx.x;
40
            int y = blockIdx.y * blockDim.y + threadIdx.y;
41

42
            if (y < rows && x < cols)
43
            {
44
                const short3 v = ((const short3*)src.ptr(y))[x];
45
                float w = src_weight.ptr(y)[x];
46
                ((short3*)dst.ptr(y))[x].x += static_cast<short>(v.x * w);
47
                ((short3*)dst.ptr(y))[x].y += static_cast<short>(v.y * w);
48
                ((short3*)dst.ptr(y))[x].z += static_cast<short>(v.z * w);
49
                dst_weight.ptr(y)[x] += w;
50
            }
51
        }
52

53
        void addSrcWeightGpu32F(const PtrStep<short> src, const PtrStepf src_weight,
54
            PtrStep<short> dst, PtrStepf dst_weight, cv::Rect &rc)
55
        {
56
            dim3 threads(16, 16);
57
            dim3 grid(divUp(rc.width, threads.x), divUp(rc.height, threads.y));
58
            addSrcWeightKernel32F<<<grid, threads>>>(src, src_weight, dst, dst_weight, rc.height, rc.width);
59
            cudaSafeCall(cudaGetLastError());
60
        }
61

62
        __global__ void normalizeUsingWeightKernel16S(const PtrStep<short> weight, PtrStep<short> src,
63
            const int width, const int height)
64
        {
65
            int x = (blockIdx.x * blockDim.x) + threadIdx.x;
66
            int y = (blockIdx.y * blockDim.y) + threadIdx.y;
67

68
            if (x < width && y < height)
69
            {
70
                const short3 v = ((short3*)src.ptr(y))[x];
71
                short w = weight.ptr(y)[x];
72
                ((short3*)src.ptr(y))[x] = make_short3(short((v.x << 8) / w),
73
                    short((v.y << 8) / w), short((v.z << 8) / w));
74
            }
75
        }
76

77
        void normalizeUsingWeightMapGpu16S(const PtrStep<short> weight, PtrStep<short> src,
78
                                           const int width, const int height)
79
        {
80
            dim3 threads(16, 16);
81
            dim3 grid(divUp(width, threads.x), divUp(height, threads.y));
82
            normalizeUsingWeightKernel16S<<<grid, threads>>> (weight, src, width, height);
83
        }
84

85
        __global__ void normalizeUsingWeightKernel32F(const PtrStepf weight, PtrStep<short> src,
86
            const int width, const int height)
87
        {
88
            int x = (blockIdx.x * blockDim.x) + threadIdx.x;
89
            int y = (blockIdx.y * blockDim.y) + threadIdx.y;
90

91
            if (x < width && y < height)
92
            {
93
                const float WEIGHT_EPS = 1e-5f;
94
                const short3 v = ((short3*)src.ptr(y))[x];
95
                float w = weight.ptr(y)[x];
96
                ((short3*)src.ptr(y))[x] = make_short3(static_cast<short>(v.x / (w + WEIGHT_EPS)),
97
                    static_cast<short>(v.y / (w + WEIGHT_EPS)),
98
                    static_cast<short>(v.z / (w + WEIGHT_EPS)));
99
            }
100
        }
101

102
        void normalizeUsingWeightMapGpu32F(const PtrStepf weight, PtrStep<short> src,
103
                                           const int width, const int height)
104
        {
105
            dim3 threads(16, 16);
106
            dim3 grid(divUp(width, threads.x), divUp(height, threads.y));
107
            normalizeUsingWeightKernel32F<<<grid, threads>>> (weight, src, width, height);
108
        }
109
    }
110
}}}
111

112
#endif
113

114
Product

Resources

Company