Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
cuda_host_mem.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2013, OpenCV Foundation, all rights reserved. 00016 // Third party copyrights are property of their respective owners. 00017 // 00018 // Redistribution and use in source and binary forms, with or without modification, 00019 // are permitted provided that the following conditions are met: 00020 // 00021 // * Redistribution's of source code must retain the above copyright notice, 00022 // this list of conditions and the following disclaimer. 00023 // 00024 // * Redistribution's in binary form must reproduce the above copyright notice, 00025 // this list of conditions and the following disclaimer in the documentation 00026 // and/or other materials provided with the distribution. 00027 // 00028 // * The name of the copyright holders may not be used to endorse or promote products 00029 // derived from this software without specific prior written permission. 00030 // 00031 // This software is provided by the copyright holders and contributors "as is" and 00032 // any express or implied warranties, including, but not limited to, the implied 00033 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00034 // In no event shall the Intel Corporation or contributors be liable for any direct, 00035 // indirect, incidental, special, exemplary, or consequential damages 00036 // (including, but not limited to, procurement of substitute goods or services; 00037 // loss of use, data, or profits; or business interruption) however caused 00038 // and on any theory of liability, whether in contract, strict liability, 00039 // or tort (including negligence or otherwise) arising in any way out of 00040 // the use of this software, even if advised of the possibility of such damage. 00041 // 00042 //M*/ 00043 00044 #include "precomp.hpp" 00045 #include <map> 00046 00047 using namespace cv; 00048 using namespace cv::cuda; 00049 00050 #ifdef HAVE_CUDA 00051 00052 namespace { 00053 00054 class HostMemAllocator : public MatAllocator 00055 { 00056 public: 00057 explicit HostMemAllocator(unsigned int flags) : flags_(flags) 00058 { 00059 } 00060 00061 UMatData* allocate(int dims, const int* sizes, int type, 00062 void* data0, size_t* step, 00063 int /*flags*/, UMatUsageFlags /*usageFlags*/) const 00064 { 00065 size_t total = CV_ELEM_SIZE(type); 00066 for (int i = dims-1; i >= 0; i--) 00067 { 00068 if (step) 00069 { 00070 if (data0 && step[i] != CV_AUTOSTEP) 00071 { 00072 CV_Assert(total <= step[i]); 00073 total = step[i]; 00074 } 00075 else 00076 { 00077 step[i] = total; 00078 } 00079 } 00080 00081 total *= sizes[i]; 00082 } 00083 00084 UMatData* u = new UMatData(this); 00085 u->size = total; 00086 00087 if (data0) 00088 { 00089 u->data = u->origdata = static_cast<uchar*>(data0); 00090 u->flags |= UMatData::USER_ALLOCATED; 00091 } 00092 else 00093 { 00094 void* ptr = 0; 00095 cudaSafeCall( cudaHostAlloc(&ptr, total, flags_) ); 00096 00097 u->data = u->origdata = static_cast<uchar*>(ptr); 00098 } 00099 00100 return u; 00101 } 00102 00103 bool allocate(UMatData* u, int /*accessFlags*/, UMatUsageFlags /*usageFlags*/) const 00104 { 00105 return (u != NULL); 00106 } 00107 00108 void deallocate(UMatData* u) const 00109 { 00110 if (!u) 00111 return; 00112 00113 CV_Assert(u->urefcount >= 0); 00114 CV_Assert(u->refcount >= 0); 00115 00116 if (u->refcount == 0) 00117 { 00118 if ( !(u->flags & UMatData::USER_ALLOCATED) ) 00119 { 00120 cudaFreeHost(u->origdata); 00121 u->origdata = 0; 00122 } 00123 00124 delete u; 00125 } 00126 } 00127 00128 private: 00129 unsigned int flags_; 00130 }; 00131 00132 } // namespace 00133 00134 #endif 00135 00136 MatAllocator* cv::cuda::HostMem::getAllocator(AllocType alloc_type) 00137 { 00138 #ifndef HAVE_CUDA 00139 (void) alloc_type; 00140 throw_no_cuda(); 00141 return NULL; 00142 #else 00143 static std::map<unsigned int, Ptr<MatAllocator> > allocators; 00144 00145 unsigned int flag = cudaHostAllocDefault; 00146 00147 switch (alloc_type) 00148 { 00149 case PAGE_LOCKED: flag = cudaHostAllocDefault; break; 00150 case SHARED: flag = cudaHostAllocMapped; break; 00151 case WRITE_COMBINED: flag = cudaHostAllocWriteCombined; break; 00152 default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type"); 00153 } 00154 00155 Ptr<MatAllocator>& a = allocators[flag]; 00156 00157 if (a.empty()) 00158 { 00159 a = makePtr<HostMemAllocator>(flag); 00160 } 00161 00162 return a.get(); 00163 #endif 00164 } 00165 00166 #ifdef HAVE_CUDA 00167 namespace 00168 { 00169 size_t alignUpStep(size_t what, size_t alignment) 00170 { 00171 size_t alignMask = alignment - 1; 00172 size_t inverseAlignMask = ~alignMask; 00173 size_t res = (what + alignMask) & inverseAlignMask; 00174 return res; 00175 } 00176 } 00177 #endif 00178 00179 void cv::cuda::HostMem::create(int rows_, int cols_, int type_) 00180 { 00181 #ifndef HAVE_CUDA 00182 (void) rows_; 00183 (void) cols_; 00184 (void) type_; 00185 throw_no_cuda(); 00186 #else 00187 if (alloc_type == SHARED) 00188 { 00189 DeviceInfo devInfo; 00190 CV_Assert( devInfo.canMapHostMemory() ); 00191 } 00192 00193 type_ &= Mat::TYPE_MASK; 00194 00195 if (rows == rows_ && cols == cols_ && type() == type_ && data) 00196 return; 00197 00198 if (data) 00199 release(); 00200 00201 CV_DbgAssert( rows_ >= 0 && cols_ >= 0 ); 00202 00203 if (rows_ > 0 && cols_ > 0) 00204 { 00205 flags = Mat::MAGIC_VAL + Mat::CONTINUOUS_FLAG + type_; 00206 rows = rows_; 00207 cols = cols_; 00208 step = elemSize() * cols; 00209 00210 if (alloc_type == SHARED) 00211 { 00212 DeviceInfo devInfo; 00213 step = alignUpStep(step, devInfo.textureAlignment()); 00214 } 00215 00216 int64 _nettosize = (int64)step*rows; 00217 size_t nettosize = (size_t)_nettosize; 00218 00219 if (_nettosize != (int64)nettosize) 00220 CV_Error(cv::Error::StsNoMem, "Too big buffer is allocated"); 00221 00222 size_t datasize = alignSize(nettosize, (int)sizeof(*refcount)); 00223 00224 void* ptr = 0; 00225 00226 switch (alloc_type) 00227 { 00228 case PAGE_LOCKED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocDefault) ); break; 00229 case SHARED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocMapped) ); break; 00230 case WRITE_COMBINED: cudaSafeCall( cudaHostAlloc(&ptr, datasize, cudaHostAllocWriteCombined) ); break; 00231 default: CV_Error(cv::Error::StsBadFlag, "Invalid alloc type"); 00232 } 00233 00234 datastart = data = (uchar*)ptr; 00235 dataend = data + nettosize; 00236 00237 refcount = (int*)cv::fastMalloc(sizeof(*refcount)); 00238 *refcount = 1; 00239 } 00240 #endif 00241 } 00242 00243 HostMem cv::cuda::HostMem::reshape(int new_cn, int new_rows) const 00244 { 00245 HostMem hdr = *this; 00246 00247 int cn = channels(); 00248 if (new_cn == 0) 00249 new_cn = cn; 00250 00251 int total_width = cols * cn; 00252 00253 if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0) 00254 new_rows = rows * total_width / new_cn; 00255 00256 if (new_rows != 0 && new_rows != rows) 00257 { 00258 int total_size = total_width * rows; 00259 00260 if (!isContinuous()) 00261 CV_Error(cv::Error::BadStep, "The matrix is not continuous, thus its number of rows can not be changed"); 00262 00263 if ((unsigned)new_rows > (unsigned)total_size) 00264 CV_Error(cv::Error::StsOutOfRange, "Bad new number of rows"); 00265 00266 total_width = total_size / new_rows; 00267 00268 if (total_width * new_rows != total_size) 00269 CV_Error(cv::Error::StsBadArg, "The total number of matrix elements is not divisible by the new number of rows"); 00270 00271 hdr.rows = new_rows; 00272 hdr.step = total_width * elemSize1(); 00273 } 00274 00275 int new_width = total_width / new_cn; 00276 00277 if (new_width * new_cn != total_width) 00278 CV_Error(cv::Error::BadNumChannels, "The total width is not divisible by the new number of channels"); 00279 00280 hdr.cols = new_width; 00281 hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT); 00282 00283 return hdr; 00284 } 00285 00286 void cv::cuda::HostMem::release() 00287 { 00288 #ifdef HAVE_CUDA 00289 if (refcount && CV_XADD(refcount, -1) == 1) 00290 { 00291 cudaFreeHost(datastart); 00292 fastFree(refcount); 00293 } 00294 00295 dataend = data = datastart = 0; 00296 step = rows = cols = 0; 00297 refcount = 0; 00298 #endif 00299 } 00300 00301 GpuMat cv::cuda::HostMem::createGpuMatHeader() const 00302 { 00303 #ifndef HAVE_CUDA 00304 throw_no_cuda(); 00305 return GpuMat(); 00306 #else 00307 CV_Assert( alloc_type == SHARED ); 00308 00309 void *pdev; 00310 cudaSafeCall( cudaHostGetDevicePointer(&pdev, data, 0) ); 00311 00312 return GpuMat(rows, cols, type(), pdev, step); 00313 #endif 00314 } 00315 00316 void cv::cuda::registerPageLocked(Mat& m) 00317 { 00318 #ifndef HAVE_CUDA 00319 (void) m; 00320 throw_no_cuda(); 00321 #else 00322 CV_Assert( m.isContinuous() ); 00323 cudaSafeCall( cudaHostRegister(m.data, m.step * m.rows, cudaHostRegisterPortable) ); 00324 #endif 00325 } 00326 00327 void cv::cuda::unregisterPageLocked(Mat& m) 00328 { 00329 #ifndef HAVE_CUDA 00330 (void) m; 00331 #else 00332 cudaSafeCall( cudaHostUnregister(m.data) ); 00333 #endif 00334 } 00335
Generated on Tue Jul 12 2022 14:46:31 by
