Path: blob/master/modules/core/src/cuda_host_mem.cpp
16337 views
/*M///////////////////////////////////////////////////////////////////////////////////////1//2// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.3//4// By downloading, copying, installing or using the software you agree to this license.5// If you do not agree to this license, do not download, install,6// copy or use the software.7//8//9// License Agreement10// For Open Source Computer Vision Library11//12// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.13// Copyright (C) 2009, Willow Garage Inc., all rights reserved.14// Copyright (C) 2013, OpenCV Foundation, all rights reserved.15// Third party copyrights are property of their respective owners.16//17// Redistribution and use in source and binary forms, with or without modification,18// are permitted provided that the following conditions are met:19//20// * Redistribution's of source code must retain the above copyright notice,21// this list of conditions and the following disclaimer.22//23// * Redistribution's in binary form must reproduce the above copyright notice,24// this list of conditions and the following disclaimer in the documentation25// and/or other materials provided with the distribution.26//27// * The name of the copyright holders may not be used to endorse or promote products28// derived from this software without specific prior written permission.29//30// This software is provided by the copyright holders and contributors "as is" and31// any express or implied warranties, including, but not limited to, the implied32// warranties of merchantability and fitness for a particular purpose are disclaimed.33// In no event shall the Intel Corporation or contributors be liable for any direct,34// indirect, incidental, special, exemplary, or consequential damages35// (including, but not limited to, procurement of substitute goods or services;36// loss of use, data, or profits; or business interruption) however caused37// and on any theory of liability, whether in contract, strict liability,38// or tort (including negligence or otherwise) arising in any way out of39// the use of this software, even if advised of the possibility of such damage.40//41//M*/4243#include "precomp.hpp"44#include <map>4546using namespace cv;47using namespace cv::cuda;4849#ifdef HAVE_CUDA5051namespace {5253class HostMemAllocator : public MatAllocator54{55public:56explicit HostMemAllocator(unsigned int flags) : flags_(flags)57{58}5960UMatData* allocate(int dims, const int* sizes, int type,61void* data0, size_t* step,62AccessFlag /*flags*/, UMatUsageFlags /*usageFlags*/) const CV_OVERRIDE63{64size_t total = CV_ELEM_SIZE(type);65for (int i = dims-1; i >= 0; i--)66{67if (step)68{69if (data0 && step[i] != CV_AUTOSTEP)70{71CV_Assert(total <= step[i]);72total = step[i];73}74else75{76step[i] = total;77}78}7980total *= sizes[i];81}8283UMatData* u = new UMatData(this);84u->size = total;8586if (data0)87{88u->data = u->origdata = static_cast<uchar*>(data0);89u->flags |= UMatData::USER_ALLOCATED;90}91else92{93void* ptr = 0;94cudaSafeCall( cudaHostAlloc(&ptr, total, flags_) );9596u->data = u->origdata = static_cast<uchar*>(ptr);97}9899return u;100}101102bool allocate(UMatData* u, AccessFlag /*accessFlags*/, UMatUsageFlags /*usageFlags*/) const CV_OVERRIDE103{104return (u != NULL);105}106107void deallocate(UMatData* u) const CV_OVERRIDE108{109if (!u)110return;111112CV_Assert(u->urefcount >= 0);113CV_Assert(u->refcount >= 0);114115if (u->refcount == 0)116{117if ( !(u->flags & UMatData::USER_ALLOCATED) )118{119cudaFreeHost(u->origdata);120u->origdata = 0;121}122123delete u;124}125}126127private:128unsigned int flags_;129};130131} // namespace132133#endif134135MatAllocator* cv::cuda::HostMem::getAllocator(AllocType alloc_type)136{137#ifndef HAVE_CUDA138CV_UNUSED(alloc_type);139throw_no_cuda();140#else141static std::map<unsigned int, Ptr<MatAllocator> > allocators;142143unsigned int flag = cudaHostAllocDefault;144145switch (alloc_type)146{147case PAGE_LOCKED: flag = cudaHostAllocDefault; break;148case SHARED: flag = cudaHostAllocMapped; break;149case WRITE_COMBINED: flag = cudaHostAllocWriteCombined; break;150default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");151}152153Ptr<MatAllocator>& a = allocators[flag];154155if (a.empty())156{157a = makePtr<HostMemAllocator>(flag);158}159160return a.get();161#endif162}163164#ifdef HAVE_CUDA165namespace166{167size_t alignUpStep(size_t what, size_t alignment)168{169size_t alignMask = alignment - 1;170size_t inverseAlignMask = ~alignMask;171size_t res = (what + alignMask) & inverseAlignMask;172return res;173}174}175#endif176177void cv::cuda::HostMem::create(int rows_, int cols_, int type_)178{179#ifndef HAVE_CUDA180CV_UNUSED(rows_);181CV_UNUSED(cols_);182CV_UNUSED(type_);183throw_no_cuda();184#else185if (alloc_type == SHARED)186{187DeviceInfo devInfo;188CV_Assert( devInfo.canMapHostMemory() );189}190191type_ &= Mat::TYPE_MASK;192193if (rows == rows_ && cols == cols_ && type() == type_ && data)194return;195196if (data)197release();198199CV_DbgAssert( rows_ >= 0 && cols_ >= 0 );200201if (rows_ > 0 && cols_ > 0)202{203flags = Mat::MAGIC_VAL + type_;204rows = rows_;205cols = cols_;206step = elemSize() * cols;207int sz[] = { rows, cols };208size_t steps[] = { step, CV_ELEM_SIZE(type_) };209flags = updateContinuityFlag(flags, 2, sz, steps);210211if (alloc_type == SHARED)212{213DeviceInfo devInfo;214step = alignUpStep(step, devInfo.textureAlignment());215}216217int64 _nettosize = (int64)step*rows;218size_t nettosize = (size_t)_nettosize;219220if (_nettosize != (int64)nettosize)221CV_Error(cv::Error::StsNoMem, "Too big buffer is allocated");222223size_t datasize = alignSize(nettosize, (int)sizeof(*refcount));224225void* ptr = 0;226227switch (alloc_type)228{229case PAGE_LOCKED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocDefault) ); break;230case SHARED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocMapped) ); break;231case WRITE_COMBINED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocWriteCombined) ); break;232default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type");233}234235datastart = data = (uchar*)ptr;236dataend = data + nettosize;237238refcount = (int*)cv::fastMalloc(sizeof(*refcount));239*refcount = 1;240}241#endif242}243244HostMem cv::cuda::HostMem::reshape(int new_cn, int new_rows) const245{246HostMem hdr = *this;247248int cn = channels();249if (new_cn == 0)250new_cn = cn;251252int total_width = cols * cn;253254if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0)255new_rows = rows * total_width / new_cn;256257if (new_rows != 0 && new_rows != rows)258{259int total_size = total_width * rows;260261if (!isContinuous())262CV_Error(cv::Error::BadStep, "The matrix is not continuous, thus its number of rows can not be changed");263264if ((unsigned)new_rows > (unsigned)total_size)265CV_Error(cv::Error::StsOutOfRange, "Bad new number of rows");266267total_width = total_size / new_rows;268269if (total_width * new_rows != total_size)270CV_Error(cv::Error::StsBadArg, "The total number of matrix elements is not divisible by the new number of rows");271272hdr.rows = new_rows;273hdr.step = total_width * elemSize1();274}275276int new_width = total_width / new_cn;277278if (new_width * new_cn != total_width)279CV_Error(cv::Error::BadNumChannels, "The total width is not divisible by the new number of channels");280281hdr.cols = new_width;282hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);283284return hdr;285}286287void cv::cuda::HostMem::release()288{289#ifdef HAVE_CUDA290if (refcount && CV_XADD(refcount, -1) == 1)291{292cudaFreeHost(datastart);293fastFree(refcount);294}295296dataend = data = datastart = 0;297step = rows = cols = 0;298refcount = 0;299#endif300}301302GpuMat cv::cuda::HostMem::createGpuMatHeader() const303{304#ifndef HAVE_CUDA305throw_no_cuda();306#else307CV_Assert( alloc_type == SHARED );308309void *pdev;310cudaSafeCall( cudaHostGetDevicePointer(&pdev, data, 0) );311312return GpuMat(rows, cols, type(), pdev, step);313#endif314}315316void cv::cuda::registerPageLocked(Mat& m)317{318#ifndef HAVE_CUDA319CV_UNUSED(m);320throw_no_cuda();321#else322CV_Assert( m.isContinuous() );323cudaSafeCall( cudaHostRegister(m.data, m.step * m.rows, cudaHostRegisterPortable) );324#endif325}326327void cv::cuda::unregisterPageLocked(Mat& m)328{329#ifndef HAVE_CUDA330CV_UNUSED(m);331#else332cudaSafeCall( cudaHostUnregister(m.data) );333#endif334}335336337