Path: blob/master/modules/stitching/src/cuda/multiband_blend.cu
16337 views
#if !defined CUDA_DISABLER12#include "opencv2/core/cuda/common.hpp"3#include "opencv2/core/types.hpp"45namespace cv { namespace cuda { namespace device6{7namespace blend8{9__global__ void addSrcWeightKernel16S(const PtrStep<short> src, const PtrStep<short> src_weight,10PtrStep<short> dst, PtrStep<short> dst_weight, int rows, int cols)11{12int x = blockIdx.x * blockDim.x + threadIdx.x;13int y = blockIdx.y * blockDim.y + threadIdx.y;1415if (y < rows && x < cols)16{17const short3 v = ((const short3*)src.ptr(y))[x];18short w = src_weight.ptr(y)[x];19((short3*)dst.ptr(y))[x].x += short((v.x * w) >> 8);20((short3*)dst.ptr(y))[x].y += short((v.y * w) >> 8);21((short3*)dst.ptr(y))[x].z += short((v.z * w) >> 8);22dst_weight.ptr(y)[x] += w;23}24}2526void addSrcWeightGpu16S(const PtrStep<short> src, const PtrStep<short> src_weight,27PtrStep<short> dst, PtrStep<short> dst_weight, cv::Rect &rc)28{29dim3 threads(16, 16);30dim3 grid(divUp(rc.width, threads.x), divUp(rc.height, threads.y));31addSrcWeightKernel16S<<<grid, threads>>>(src, src_weight, dst, dst_weight, rc.height, rc.width);32cudaSafeCall(cudaGetLastError());33}3435__global__ void addSrcWeightKernel32F(const PtrStep<short> src, const PtrStepf src_weight,36PtrStep<short> dst, PtrStepf dst_weight, int rows, int cols)37{38int x = blockIdx.x * blockDim.x + threadIdx.x;39int y = blockIdx.y * blockDim.y + threadIdx.y;4041if (y < rows && x < cols)42{43const short3 v = ((const short3*)src.ptr(y))[x];44float w = src_weight.ptr(y)[x];45((short3*)dst.ptr(y))[x].x += static_cast<short>(v.x * w);46((short3*)dst.ptr(y))[x].y += static_cast<short>(v.y * w);47((short3*)dst.ptr(y))[x].z += static_cast<short>(v.z * w);48dst_weight.ptr(y)[x] += w;49}50}5152void addSrcWeightGpu32F(const PtrStep<short> src, const PtrStepf src_weight,53PtrStep<short> dst, PtrStepf dst_weight, cv::Rect &rc)54{55dim3 threads(16, 16);56dim3 grid(divUp(rc.width, threads.x), divUp(rc.height, threads.y));57addSrcWeightKernel32F<<<grid, threads>>>(src, src_weight, dst, dst_weight, rc.height, rc.width);58cudaSafeCall(cudaGetLastError());59}6061__global__ void normalizeUsingWeightKernel16S(const PtrStep<short> weight, PtrStep<short> src,62const int width, const int height)63{64int x = (blockIdx.x * blockDim.x) + threadIdx.x;65int y = (blockIdx.y * blockDim.y) + threadIdx.y;6667if (x < width && y < height)68{69const short3 v = ((short3*)src.ptr(y))[x];70short w = weight.ptr(y)[x];71((short3*)src.ptr(y))[x] = make_short3(short((v.x << 8) / w),72short((v.y << 8) / w), short((v.z << 8) / w));73}74}7576void normalizeUsingWeightMapGpu16S(const PtrStep<short> weight, PtrStep<short> src,77const int width, const int height)78{79dim3 threads(16, 16);80dim3 grid(divUp(width, threads.x), divUp(height, threads.y));81normalizeUsingWeightKernel16S<<<grid, threads>>> (weight, src, width, height);82}8384__global__ void normalizeUsingWeightKernel32F(const PtrStepf weight, PtrStep<short> src,85const int width, const int height)86{87int x = (blockIdx.x * blockDim.x) + threadIdx.x;88int y = (blockIdx.y * blockDim.y) + threadIdx.y;8990if (x < width && y < height)91{92const float WEIGHT_EPS = 1e-5f;93const short3 v = ((short3*)src.ptr(y))[x];94float w = weight.ptr(y)[x];95((short3*)src.ptr(y))[x] = make_short3(static_cast<short>(v.x / (w + WEIGHT_EPS)),96static_cast<short>(v.y / (w + WEIGHT_EPS)),97static_cast<short>(v.z / (w + WEIGHT_EPS)));98}99}100101void normalizeUsingWeightMapGpu32F(const PtrStepf weight, PtrStep<short> src,102const int width, const int height)103{104dim3 threads(16, 16);105dim3 grid(divUp(width, threads.x), divUp(height, threads.y));106normalizeUsingWeightKernel32F<<<grid, threads>>> (weight, src, width, height);107}108}109}}}110111#endif112113114