Path: blob/master/modules/superres/src/btv_l1_cuda.cpp
16339 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.13// Copyright (C) 2009, Willow Garage Inc., all rights reserved.14// Third party copyrights are property of their respective owners.15//16// Redistribution and use in source and binary forms, with or without modification,17// are permitted provided that the following conditions are met:18//19// * Redistribution's of source code must retain the above copyright notice,20// this list of conditions and the following disclaimer.21//22// * Redistribution's in binary form must reproduce the above copyright notice,23// this list of conditions and the following disclaimer in the documentation24// and/or other materials provided with the distribution.25//26// * The name of the copyright holders may not be used to endorse or promote products27// derived from this software without specific prior written permission.28//29// This software is provided by the copyright holders and contributors "as is" and30// any express or implied warranties, including, but not limited to, the implied31// warranties of merchantability and fitness for a particular purpose are disclaimed.32// In no event shall the Intel Corporation or contributors be liable for any direct,33// indirect, incidental, special, exemplary, or consequential damages34// (including, but not limited to, procurement of substitute goods or services;35// loss of use, data, or profits; or business interruption) however caused36// and on any theory of liability, whether in contract, strict liability,37// or tort (including negligence or otherwise) arising in any way out of38// the use of this software, even if advised of the possibility of such damage.39//40//M*/4142// S. Farsiu , D. Robinson, M. Elad, P. Milanfar. Fast and robust multiframe super resolution.43// Dennis Mitzel, Thomas Pock, Thomas Schoenemann, Daniel Cremers. Video Super Resolution using Duality Based TV-L1 Optical Flow.4445#include "precomp.hpp"4647using namespace cv;48using namespace cv::cuda;49using namespace cv::superres;50using namespace cv::superres::detail;5152#if !defined(HAVE_CUDA) || !defined(HAVE_OPENCV_CUDAARITHM) || !defined(HAVE_OPENCV_CUDAWARPING) || !defined(HAVE_OPENCV_CUDAFILTERS)5354Ptr<SuperResolution> cv::superres::createSuperResolution_BTVL1_CUDA()55{56CV_Error(Error::StsNotImplemented, "The called functionality is disabled for current build or platform");57}5859#else // HAVE_CUDA6061namespace btv_l1_cudev62{63void buildMotionMaps(PtrStepSzf forwardMotionX, PtrStepSzf forwardMotionY,64PtrStepSzf backwardMotionX, PtrStepSzf bacwardMotionY,65PtrStepSzf forwardMapX, PtrStepSzf forwardMapY,66PtrStepSzf backwardMapX, PtrStepSzf backwardMapY);6768template <int cn>69void upscale(const PtrStepSzb src, PtrStepSzb dst, int scale, cudaStream_t stream);7071void diffSign(PtrStepSzf src1, PtrStepSzf src2, PtrStepSzf dst, cudaStream_t stream);7273void loadBtvWeights(const float* weights, size_t count);74template <int cn> void calcBtvRegularization(PtrStepSzb src, PtrStepSzb dst, int ksize);75}7677namespace78{79void calcRelativeMotions(const std::vector<std::pair<GpuMat, GpuMat> >& forwardMotions, const std::vector<std::pair<GpuMat, GpuMat> >& backwardMotions,80std::vector<std::pair<GpuMat, GpuMat> >& relForwardMotions, std::vector<std::pair<GpuMat, GpuMat> >& relBackwardMotions,81int baseIdx, Size size)82{83const int count = static_cast<int>(forwardMotions.size());8485relForwardMotions.resize(count);86relForwardMotions[baseIdx].first.create(size, CV_32FC1);87relForwardMotions[baseIdx].first.setTo(Scalar::all(0));88relForwardMotions[baseIdx].second.create(size, CV_32FC1);89relForwardMotions[baseIdx].second.setTo(Scalar::all(0));9091relBackwardMotions.resize(count);92relBackwardMotions[baseIdx].first.create(size, CV_32FC1);93relBackwardMotions[baseIdx].first.setTo(Scalar::all(0));94relBackwardMotions[baseIdx].second.create(size, CV_32FC1);95relBackwardMotions[baseIdx].second.setTo(Scalar::all(0));9697for (int i = baseIdx - 1; i >= 0; --i)98{99cuda::add(relForwardMotions[i + 1].first, forwardMotions[i].first, relForwardMotions[i].first);100cuda::add(relForwardMotions[i + 1].second, forwardMotions[i].second, relForwardMotions[i].second);101102cuda::add(relBackwardMotions[i + 1].first, backwardMotions[i + 1].first, relBackwardMotions[i].first);103cuda::add(relBackwardMotions[i + 1].second, backwardMotions[i + 1].second, relBackwardMotions[i].second);104}105106for (int i = baseIdx + 1; i < count; ++i)107{108cuda::add(relForwardMotions[i - 1].first, backwardMotions[i].first, relForwardMotions[i].first);109cuda::add(relForwardMotions[i - 1].second, backwardMotions[i].second, relForwardMotions[i].second);110111cuda::add(relBackwardMotions[i - 1].first, forwardMotions[i - 1].first, relBackwardMotions[i].first);112cuda::add(relBackwardMotions[i - 1].second, forwardMotions[i - 1].second, relBackwardMotions[i].second);113}114}115116void upscaleMotions(const std::vector<std::pair<GpuMat, GpuMat> >& lowResMotions, std::vector<std::pair<GpuMat, GpuMat> >& highResMotions, int scale)117{118highResMotions.resize(lowResMotions.size());119120for (size_t i = 0; i < lowResMotions.size(); ++i)121{122cuda::resize(lowResMotions[i].first, highResMotions[i].first, Size(), scale, scale, INTER_CUBIC);123cuda::resize(lowResMotions[i].second, highResMotions[i].second, Size(), scale, scale, INTER_CUBIC);124125cuda::multiply(highResMotions[i].first, Scalar::all(scale), highResMotions[i].first);126cuda::multiply(highResMotions[i].second, Scalar::all(scale), highResMotions[i].second);127}128}129130void buildMotionMaps(const std::pair<GpuMat, GpuMat>& forwardMotion, const std::pair<GpuMat, GpuMat>& backwardMotion,131std::pair<GpuMat, GpuMat>& forwardMap, std::pair<GpuMat, GpuMat>& backwardMap)132{133forwardMap.first.create(forwardMotion.first.size(), CV_32FC1);134forwardMap.second.create(forwardMotion.first.size(), CV_32FC1);135136backwardMap.first.create(forwardMotion.first.size(), CV_32FC1);137backwardMap.second.create(forwardMotion.first.size(), CV_32FC1);138139btv_l1_cudev::buildMotionMaps(forwardMotion.first, forwardMotion.second,140backwardMotion.first, backwardMotion.second,141forwardMap.first, forwardMap.second,142backwardMap.first, backwardMap.second);143}144145void upscale(const GpuMat& src, GpuMat& dst, int scale, Stream& stream)146{147typedef void (*func_t)(const PtrStepSzb src, PtrStepSzb dst, int scale, cudaStream_t stream);148static const func_t funcs[] =149{1500, btv_l1_cudev::upscale<1>, 0, btv_l1_cudev::upscale<3>, btv_l1_cudev::upscale<4>151};152153CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );154155dst.create(src.rows * scale, src.cols * scale, src.type());156dst.setTo(Scalar::all(0));157158const func_t func = funcs[src.channels()];159160func(src, dst, scale, StreamAccessor::getStream(stream));161}162163void diffSign(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)164{165dst.create(src1.size(), src1.type());166167btv_l1_cudev::diffSign(src1.reshape(1), src2.reshape(1), dst.reshape(1), StreamAccessor::getStream(stream));168}169170void calcBtvWeights(int btvKernelSize, double alpha, std::vector<float>& btvWeights)171{172const size_t size = btvKernelSize * btvKernelSize;173174btvWeights.resize(size);175176const int ksize = (btvKernelSize - 1) / 2;177const float alpha_f = static_cast<float>(alpha);178179for (int m = 0, ind = 0; m <= ksize; ++m)180{181for (int l = ksize; l + m >= 0; --l, ++ind)182btvWeights[ind] = pow(alpha_f, std::abs(m) + std::abs(l));183}184185btv_l1_cudev::loadBtvWeights(&btvWeights[0], size);186}187188void calcBtvRegularization(const GpuMat& src, GpuMat& dst, int btvKernelSize)189{190typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int ksize);191static const func_t funcs[] =192{1930,194btv_l1_cudev::calcBtvRegularization<1>,1950,196btv_l1_cudev::calcBtvRegularization<3>,197btv_l1_cudev::calcBtvRegularization<4>198};199200dst.create(src.size(), src.type());201dst.setTo(Scalar::all(0));202203const int ksize = (btvKernelSize - 1) / 2;204205funcs[src.channels()](src, dst, ksize);206}207208class BTVL1_CUDA_Base : public cv::superres::SuperResolution209{210public:211BTVL1_CUDA_Base();212213void process(const std::vector<GpuMat>& src, GpuMat& dst,214const std::vector<std::pair<GpuMat, GpuMat> >& forwardMotions, const std::vector<std::pair<GpuMat, GpuMat> >& backwardMotions,215int baseIdx);216217void collectGarbage();218219inline int getScale() const CV_OVERRIDE { return scale_; }220inline void setScale(int val) CV_OVERRIDE { scale_ = val; }221inline int getIterations() const CV_OVERRIDE { return iterations_; }222inline void setIterations(int val) CV_OVERRIDE { iterations_ = val; }223inline double getTau() const CV_OVERRIDE { return tau_; }224inline void setTau(double val) CV_OVERRIDE { tau_ = val; }225inline double getLabmda() const CV_OVERRIDE { return lambda_; }226inline void setLabmda(double val) CV_OVERRIDE { lambda_ = val; }227inline double getAlpha() const CV_OVERRIDE { return alpha_; }228inline void setAlpha(double val) CV_OVERRIDE { alpha_ = val; }229inline int getKernelSize() const CV_OVERRIDE { return btvKernelSize_; }230inline void setKernelSize(int val) CV_OVERRIDE { btvKernelSize_ = val; }231inline int getBlurKernelSize() const CV_OVERRIDE { return blurKernelSize_; }232inline void setBlurKernelSize(int val) CV_OVERRIDE { blurKernelSize_ = val; }233inline double getBlurSigma() const CV_OVERRIDE { return blurSigma_; }234inline void setBlurSigma(double val) CV_OVERRIDE { blurSigma_ = val; }235inline int getTemporalAreaRadius() const CV_OVERRIDE { return temporalAreaRadius_; }236inline void setTemporalAreaRadius(int val) CV_OVERRIDE { temporalAreaRadius_ = val; }237inline Ptr<cv::superres::DenseOpticalFlowExt> getOpticalFlow() const CV_OVERRIDE { return opticalFlow_; }238inline void setOpticalFlow(const Ptr<cv::superres::DenseOpticalFlowExt>& val) CV_OVERRIDE { opticalFlow_ = val; }239240protected:241int scale_;242int iterations_;243double lambda_;244double tau_;245double alpha_;246int btvKernelSize_;247int blurKernelSize_;248double blurSigma_;249int temporalAreaRadius_;250Ptr<cv::superres::DenseOpticalFlowExt> opticalFlow_;251252private:253std::vector<Ptr<cuda::Filter> > filters_;254int curBlurKernelSize_;255double curBlurSigma_;256int curSrcType_;257258std::vector<float> btvWeights_;259int curBtvKernelSize_;260double curAlpha_;261262std::vector<std::pair<GpuMat, GpuMat> > lowResForwardMotions_;263std::vector<std::pair<GpuMat, GpuMat> > lowResBackwardMotions_;264265std::vector<std::pair<GpuMat, GpuMat> > highResForwardMotions_;266std::vector<std::pair<GpuMat, GpuMat> > highResBackwardMotions_;267268std::vector<std::pair<GpuMat, GpuMat> > forwardMaps_;269std::vector<std::pair<GpuMat, GpuMat> > backwardMaps_;270271GpuMat highRes_;272273std::vector<Stream> streams_;274std::vector<GpuMat> diffTerms_;275std::vector<GpuMat> a_, b_, c_;276GpuMat regTerm_;277};278279BTVL1_CUDA_Base::BTVL1_CUDA_Base()280{281scale_ = 4;282iterations_ = 180;283lambda_ = 0.03;284tau_ = 1.3;285alpha_ = 0.7;286btvKernelSize_ = 7;287blurKernelSize_ = 5;288blurSigma_ = 0.0;289290#ifdef HAVE_OPENCV_CUDAOPTFLOW291opticalFlow_ = createOptFlow_Farneback_CUDA();292#else293opticalFlow_ = createOptFlow_Farneback();294#endif295temporalAreaRadius_ = 0;296297curBlurKernelSize_ = -1;298curBlurSigma_ = -1.0;299curSrcType_ = -1;300301curBtvKernelSize_ = -1;302curAlpha_ = -1.0;303}304305void BTVL1_CUDA_Base::process(const std::vector<GpuMat>& src, GpuMat& dst,306const std::vector<std::pair<GpuMat, GpuMat> >& forwardMotions, const std::vector<std::pair<GpuMat, GpuMat> >& backwardMotions,307int baseIdx)308{309CV_Assert( scale_ > 1 );310CV_Assert( iterations_ > 0 );311CV_Assert( tau_ > 0.0 );312CV_Assert( alpha_ > 0.0 );313CV_Assert( btvKernelSize_ > 0 && btvKernelSize_ <= 16 );314CV_Assert( blurKernelSize_ > 0 );315CV_Assert( blurSigma_ >= 0.0 );316317// update blur filter and btv weights318319if (filters_.size() != src.size() || blurKernelSize_ != curBlurKernelSize_ || blurSigma_ != curBlurSigma_ || src[0].type() != curSrcType_)320{321filters_.resize(src.size());322for (size_t i = 0; i < src.size(); ++i)323filters_[i] = cuda::createGaussianFilter(src[0].type(), -1, Size(blurKernelSize_, blurKernelSize_), blurSigma_);324curBlurKernelSize_ = blurKernelSize_;325curBlurSigma_ = blurSigma_;326curSrcType_ = src[0].type();327}328329if (btvWeights_.empty() || btvKernelSize_ != curBtvKernelSize_ || alpha_ != curAlpha_)330{331calcBtvWeights(btvKernelSize_, alpha_, btvWeights_);332curBtvKernelSize_ = btvKernelSize_;333curAlpha_ = alpha_;334}335336// calc motions between input frames337338calcRelativeMotions(forwardMotions, backwardMotions, lowResForwardMotions_, lowResBackwardMotions_, baseIdx, src[0].size());339340upscaleMotions(lowResForwardMotions_, highResForwardMotions_, scale_);341upscaleMotions(lowResBackwardMotions_, highResBackwardMotions_, scale_);342343forwardMaps_.resize(highResForwardMotions_.size());344backwardMaps_.resize(highResForwardMotions_.size());345for (size_t i = 0; i < highResForwardMotions_.size(); ++i)346buildMotionMaps(highResForwardMotions_[i], highResBackwardMotions_[i], forwardMaps_[i], backwardMaps_[i]);347348// initial estimation349350const Size lowResSize = src[0].size();351const Size highResSize(lowResSize.width * scale_, lowResSize.height * scale_);352353cuda::resize(src[baseIdx], highRes_, highResSize, 0, 0, INTER_CUBIC);354355// iterations356357streams_.resize(src.size());358diffTerms_.resize(src.size());359a_.resize(src.size());360b_.resize(src.size());361c_.resize(src.size());362363for (int i = 0; i < iterations_; ++i)364{365for (size_t k = 0; k < src.size(); ++k)366{367// a = M * Ih368cuda::remap(highRes_, a_[k], backwardMaps_[k].first, backwardMaps_[k].second, INTER_NEAREST, BORDER_REPLICATE, Scalar(), streams_[k]);369// b = HM * Ih370filters_[k]->apply(a_[k], b_[k], streams_[k]);371// c = DHF * Ih372cuda::resize(b_[k], c_[k], lowResSize, 0, 0, INTER_NEAREST, streams_[k]);373374diffSign(src[k], c_[k], c_[k], streams_[k]);375376// a = Dt * diff377upscale(c_[k], a_[k], scale_, streams_[k]);378// b = HtDt * diff379filters_[k]->apply(a_[k], b_[k], streams_[k]);380// diffTerm = MtHtDt * diff381cuda::remap(b_[k], diffTerms_[k], forwardMaps_[k].first, forwardMaps_[k].second, INTER_NEAREST, BORDER_REPLICATE, Scalar(), streams_[k]);382}383384if (lambda_ > 0)385{386calcBtvRegularization(highRes_, regTerm_, btvKernelSize_);387cuda::addWeighted(highRes_, 1.0, regTerm_, -tau_ * lambda_, 0.0, highRes_);388}389390for (size_t k = 0; k < src.size(); ++k)391{392streams_[k].waitForCompletion();393cuda::addWeighted(highRes_, 1.0, diffTerms_[k], tau_, 0.0, highRes_);394}395}396397Rect inner(btvKernelSize_, btvKernelSize_, highRes_.cols - 2 * btvKernelSize_, highRes_.rows - 2 * btvKernelSize_);398highRes_(inner).copyTo(dst);399}400401void BTVL1_CUDA_Base::collectGarbage()402{403filters_.clear();404405lowResForwardMotions_.clear();406lowResBackwardMotions_.clear();407408highResForwardMotions_.clear();409highResBackwardMotions_.clear();410411forwardMaps_.clear();412backwardMaps_.clear();413414highRes_.release();415416diffTerms_.clear();417a_.clear();418b_.clear();419c_.clear();420regTerm_.release();421}422423////////////////////////////////////////////////////////////424425class BTVL1_CUDA : public BTVL1_CUDA_Base426{427public:428BTVL1_CUDA();429430void collectGarbage();431432protected:433void initImpl(Ptr<FrameSource>& frameSource);434void processImpl(Ptr<FrameSource>& frameSource, OutputArray output);435436private:437void readNextFrame(Ptr<FrameSource>& frameSource);438void processFrame(int idx);439440GpuMat curFrame_;441GpuMat prevFrame_;442443std::vector<GpuMat> frames_;444std::vector<std::pair<GpuMat, GpuMat> > forwardMotions_;445std::vector<std::pair<GpuMat, GpuMat> > backwardMotions_;446std::vector<GpuMat> outputs_;447448int storePos_;449int procPos_;450int outPos_;451452std::vector<GpuMat> srcFrames_;453std::vector<std::pair<GpuMat, GpuMat> > srcForwardMotions_;454std::vector<std::pair<GpuMat, GpuMat> > srcBackwardMotions_;455GpuMat finalOutput_;456};457458BTVL1_CUDA::BTVL1_CUDA()459{460temporalAreaRadius_ = 4;461}462463void BTVL1_CUDA::collectGarbage()464{465curFrame_.release();466prevFrame_.release();467468frames_.clear();469forwardMotions_.clear();470backwardMotions_.clear();471outputs_.clear();472473srcFrames_.clear();474srcForwardMotions_.clear();475srcBackwardMotions_.clear();476finalOutput_.release();477478SuperResolution::collectGarbage();479BTVL1_CUDA_Base::collectGarbage();480}481482void BTVL1_CUDA::initImpl(Ptr<FrameSource>& frameSource)483{484const int cacheSize = 2 * temporalAreaRadius_ + 1;485486frames_.resize(cacheSize);487forwardMotions_.resize(cacheSize);488backwardMotions_.resize(cacheSize);489outputs_.resize(cacheSize);490491storePos_ = -1;492493for (int t = -temporalAreaRadius_; t <= temporalAreaRadius_; ++t)494readNextFrame(frameSource);495496for (int i = 0; i <= temporalAreaRadius_; ++i)497processFrame(i);498499procPos_ = temporalAreaRadius_;500outPos_ = -1;501}502503void BTVL1_CUDA::processImpl(Ptr<FrameSource>& frameSource, OutputArray _output)504{505if (outPos_ >= storePos_)506{507_output.release();508return;509}510511readNextFrame(frameSource);512513if (procPos_ < storePos_)514{515++procPos_;516processFrame(procPos_);517}518519++outPos_;520const GpuMat& curOutput = at(outPos_, outputs_);521522if (_output.kind() == _InputArray::CUDA_GPU_MAT)523curOutput.convertTo(_output.getGpuMatRef(), CV_8U);524else525{526curOutput.convertTo(finalOutput_, CV_8U);527arrCopy(finalOutput_, _output);528}529}530531void BTVL1_CUDA::readNextFrame(Ptr<FrameSource>& frameSource)532{533frameSource->nextFrame(curFrame_);534535if (curFrame_.empty())536return;537538++storePos_;539curFrame_.convertTo(at(storePos_, frames_), CV_32F);540541if (storePos_ > 0)542{543std::pair<GpuMat, GpuMat>& forwardMotion = at(storePos_ - 1, forwardMotions_);544std::pair<GpuMat, GpuMat>& backwardMotion = at(storePos_, backwardMotions_);545546opticalFlow_->calc(prevFrame_, curFrame_, forwardMotion.first, forwardMotion.second);547opticalFlow_->calc(curFrame_, prevFrame_, backwardMotion.first, backwardMotion.second);548}549550curFrame_.copyTo(prevFrame_);551}552553void BTVL1_CUDA::processFrame(int idx)554{555const int startIdx = std::max(idx - temporalAreaRadius_, 0);556const int procIdx = idx;557const int endIdx = std::min(startIdx + 2 * temporalAreaRadius_, storePos_);558559const int count = endIdx - startIdx + 1;560561srcFrames_.resize(count);562srcForwardMotions_.resize(count);563srcBackwardMotions_.resize(count);564565int baseIdx = -1;566567for (int i = startIdx, k = 0; i <= endIdx; ++i, ++k)568{569if (i == procIdx)570baseIdx = k;571572srcFrames_[k] = at(i, frames_);573574if (i < endIdx)575srcForwardMotions_[k] = at(i, forwardMotions_);576if (i > startIdx)577srcBackwardMotions_[k] = at(i, backwardMotions_);578}579580process(srcFrames_, at(idx, outputs_), srcForwardMotions_, srcBackwardMotions_, baseIdx);581}582}583584Ptr<SuperResolution> cv::superres::createSuperResolution_BTVL1_CUDA()585{586return makePtr<BTVL1_CUDA>();587}588589#endif // HAVE_CUDA590591592