Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
cuda_info.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 00015 // Third party copyrights are property of their respective owners. 00016 // 00017 // Redistribution and use in source and binary forms, with or without modification, 00018 // are permitted provided that the following conditions are met: 00019 // 00020 // * Redistribution's of source code must retain the above copyright notice, 00021 // this list of conditions and the following disclaimer. 00022 // 00023 // * Redistribution's in binary form must reproduce the above copyright notice, 00024 // this list of conditions and the following disclaimer in the documentation 00025 // and/or other materials provided with the distribution. 00026 // 00027 // * The name of the copyright holders may not be used to endorse or promote products 00028 // derived from this software without specific prior written permission. 00029 // 00030 // This software is provided by the copyright holders and contributors "as is" and 00031 // any express or implied warranties, including, but not limited to, the implied 00032 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00033 // In no event shall the Intel Corporation or contributors be liable for any direct, 00034 // indirect, incidental, special, exemplary, or consequential damages 00035 // (including, but not limited to, procurement of substitute goods or services; 00036 // loss of use, data, or profits; or business interruption) however caused 00037 // and on any theory of liability, whether in contract, strict liability, 00038 // or tort (including negligence or otherwise) arising in any way out of 00039 // the use of this software, even if advised of the possibility of such damage. 00040 // 00041 //M*/ 00042 00043 #include "precomp.hpp" 00044 00045 using namespace cv; 00046 using namespace cv::cuda; 00047 00048 int cv::cuda::getCudaEnabledDeviceCount() 00049 { 00050 #ifndef HAVE_CUDA 00051 return 0; 00052 #else 00053 int count; 00054 cudaError_t error = cudaGetDeviceCount(&count); 00055 00056 if (error == cudaErrorInsufficientDriver) 00057 return -1; 00058 00059 if (error == cudaErrorNoDevice) 00060 return 0; 00061 00062 cudaSafeCall( error ); 00063 return count; 00064 #endif 00065 } 00066 00067 void cv::cuda::setDevice(int device) 00068 { 00069 #ifndef HAVE_CUDA 00070 (void) device; 00071 throw_no_cuda(); 00072 #else 00073 cudaSafeCall( cudaSetDevice(device) ); 00074 #endif 00075 } 00076 00077 int cv::cuda::getDevice() 00078 { 00079 #ifndef HAVE_CUDA 00080 throw_no_cuda(); 00081 return 0; 00082 #else 00083 int device; 00084 cudaSafeCall( cudaGetDevice(&device) ); 00085 return device; 00086 #endif 00087 } 00088 00089 void cv::cuda::resetDevice() 00090 { 00091 #ifndef HAVE_CUDA 00092 throw_no_cuda(); 00093 #else 00094 cudaSafeCall( cudaDeviceReset() ); 00095 #endif 00096 } 00097 00098 bool cv::cuda::deviceSupports(FeatureSet feature_set) 00099 { 00100 #ifndef HAVE_CUDA 00101 (void) feature_set; 00102 throw_no_cuda(); 00103 return false; 00104 #else 00105 static int versions[] = 00106 { 00107 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 00108 }; 00109 static const int cache_size = static_cast<int>(sizeof(versions) / sizeof(versions[0])); 00110 00111 const int devId = getDevice(); 00112 00113 int version; 00114 00115 if (devId < cache_size && versions[devId] >= 0) 00116 { 00117 version = versions[devId]; 00118 } 00119 else 00120 { 00121 DeviceInfo dev(devId); 00122 version = dev.majorVersion() * 10 + dev.minorVersion(); 00123 if (devId < cache_size) 00124 versions[devId] = version; 00125 } 00126 00127 return TargetArchs::builtWith(feature_set) && (version >= feature_set); 00128 #endif 00129 } 00130 00131 //////////////////////////////////////////////////////////////////////// 00132 // TargetArchs 00133 00134 #ifdef HAVE_CUDA 00135 00136 namespace 00137 { 00138 class CudaArch 00139 { 00140 public: 00141 CudaArch(); 00142 00143 bool builtWith(FeatureSet feature_set) const; 00144 bool hasPtx(int major, int minor) const; 00145 bool hasBin(int major, int minor) const; 00146 bool hasEqualOrLessPtx(int major, int minor) const; 00147 bool hasEqualOrGreaterPtx(int major, int minor) const; 00148 bool hasEqualOrGreaterBin(int major, int minor) const; 00149 00150 private: 00151 static void fromStr(const char* set_as_str, std::vector<int>& arr); 00152 00153 std::vector<int> bin; 00154 std::vector<int> ptx; 00155 std::vector<int> features; 00156 }; 00157 00158 const CudaArch cudaArch; 00159 00160 CudaArch::CudaArch() 00161 { 00162 fromStr(CUDA_ARCH_BIN, bin); 00163 fromStr(CUDA_ARCH_PTX, ptx); 00164 fromStr(CUDA_ARCH_FEATURES, features); 00165 } 00166 00167 bool CudaArch::builtWith(FeatureSet feature_set) const 00168 { 00169 return !features.empty() && (features.back() >= feature_set); 00170 } 00171 00172 bool CudaArch::hasPtx(int major, int minor) const 00173 { 00174 return std::find(ptx.begin(), ptx.end(), major * 10 + minor) != ptx.end(); 00175 } 00176 00177 bool CudaArch::hasBin(int major, int minor) const 00178 { 00179 return std::find(bin.begin(), bin.end(), major * 10 + minor) != bin.end(); 00180 } 00181 00182 bool CudaArch::hasEqualOrLessPtx(int major, int minor) const 00183 { 00184 return !ptx.empty() && (ptx.front() <= major * 10 + minor); 00185 } 00186 00187 bool CudaArch::hasEqualOrGreaterPtx(int major, int minor) const 00188 { 00189 return !ptx.empty() && (ptx.back() >= major * 10 + minor); 00190 } 00191 00192 bool CudaArch::hasEqualOrGreaterBin(int major, int minor) const 00193 { 00194 return !bin.empty() && (bin.back() >= major * 10 + minor); 00195 } 00196 00197 void CudaArch::fromStr(const char* set_as_str, std::vector<int>& arr) 00198 { 00199 arr.clear(); 00200 00201 const size_t len = strlen(set_as_str); 00202 00203 size_t pos = 0; 00204 while (pos < len) 00205 { 00206 if (isspace(set_as_str[pos])) 00207 { 00208 ++pos; 00209 } 00210 else 00211 { 00212 int cur_value; 00213 int chars_read; 00214 int args_read = sscanf(set_as_str + pos, "%d%n", &cur_value, &chars_read); 00215 CV_Assert( args_read == 1 ); 00216 00217 arr.push_back(cur_value); 00218 pos += chars_read; 00219 } 00220 } 00221 00222 std::sort(arr.begin(), arr.end()); 00223 } 00224 } 00225 00226 #endif 00227 00228 bool cv::cuda::TargetArchs::builtWith(cv::cuda::FeatureSet feature_set) 00229 { 00230 #ifndef HAVE_CUDA 00231 (void) feature_set; 00232 throw_no_cuda(); 00233 return false; 00234 #else 00235 return cudaArch.builtWith(feature_set); 00236 #endif 00237 } 00238 00239 bool cv::cuda::TargetArchs::hasPtx(int major, int minor) 00240 { 00241 #ifndef HAVE_CUDA 00242 (void) major; 00243 (void) minor; 00244 throw_no_cuda(); 00245 return false; 00246 #else 00247 return cudaArch.hasPtx(major, minor); 00248 #endif 00249 } 00250 00251 bool cv::cuda::TargetArchs::hasBin(int major, int minor) 00252 { 00253 #ifndef HAVE_CUDA 00254 (void) major; 00255 (void) minor; 00256 throw_no_cuda(); 00257 return false; 00258 #else 00259 return cudaArch.hasBin(major, minor); 00260 #endif 00261 } 00262 00263 bool cv::cuda::TargetArchs::hasEqualOrLessPtx(int major, int minor) 00264 { 00265 #ifndef HAVE_CUDA 00266 (void) major; 00267 (void) minor; 00268 throw_no_cuda(); 00269 return false; 00270 #else 00271 return cudaArch.hasEqualOrLessPtx(major, minor); 00272 #endif 00273 } 00274 00275 bool cv::cuda::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) 00276 { 00277 #ifndef HAVE_CUDA 00278 (void) major; 00279 (void) minor; 00280 throw_no_cuda(); 00281 return false; 00282 #else 00283 return cudaArch.hasEqualOrGreaterPtx(major, minor); 00284 #endif 00285 } 00286 00287 bool cv::cuda::TargetArchs::hasEqualOrGreaterBin(int major, int minor) 00288 { 00289 #ifndef HAVE_CUDA 00290 (void) major; 00291 (void) minor; 00292 throw_no_cuda(); 00293 return false; 00294 #else 00295 return cudaArch.hasEqualOrGreaterBin(major, minor); 00296 #endif 00297 } 00298 00299 //////////////////////////////////////////////////////////////////////// 00300 // DeviceInfo 00301 00302 #ifdef HAVE_CUDA 00303 00304 namespace 00305 { 00306 class DeviceProps 00307 { 00308 public: 00309 DeviceProps(); 00310 00311 const cudaDeviceProp* get(int devID) const; 00312 00313 private: 00314 std::vector<cudaDeviceProp> props_; 00315 }; 00316 00317 DeviceProps::DeviceProps() 00318 { 00319 int count = getCudaEnabledDeviceCount(); 00320 00321 if (count > 0) 00322 { 00323 props_.resize(count); 00324 00325 for (int devID = 0; devID < count; ++devID) 00326 { 00327 cudaSafeCall( cudaGetDeviceProperties(&props_[devID], devID) ); 00328 } 00329 } 00330 } 00331 00332 const cudaDeviceProp* DeviceProps::get(int devID) const 00333 { 00334 CV_Assert( static_cast<size_t>(devID) < props_.size() ); 00335 00336 return &props_[devID]; 00337 } 00338 00339 DeviceProps& deviceProps() 00340 { 00341 static DeviceProps props; 00342 return props; 00343 } 00344 } 00345 00346 #endif 00347 00348 const char* cv::cuda::DeviceInfo::name() const 00349 { 00350 #ifndef HAVE_CUDA 00351 throw_no_cuda(); 00352 return ""; 00353 #else 00354 return deviceProps().get(device_id_)->name; 00355 #endif 00356 } 00357 00358 size_t cv::cuda::DeviceInfo::totalGlobalMem() const 00359 { 00360 #ifndef HAVE_CUDA 00361 throw_no_cuda(); 00362 return 0; 00363 #else 00364 return deviceProps().get(device_id_)->totalGlobalMem; 00365 #endif 00366 } 00367 00368 size_t cv::cuda::DeviceInfo::sharedMemPerBlock() const 00369 { 00370 #ifndef HAVE_CUDA 00371 throw_no_cuda(); 00372 return 0; 00373 #else 00374 return deviceProps().get(device_id_)->sharedMemPerBlock; 00375 #endif 00376 } 00377 00378 int cv::cuda::DeviceInfo::regsPerBlock() const 00379 { 00380 #ifndef HAVE_CUDA 00381 throw_no_cuda(); 00382 return 0; 00383 #else 00384 return deviceProps().get(device_id_)->regsPerBlock; 00385 #endif 00386 } 00387 00388 int cv::cuda::DeviceInfo::warpSize() const 00389 { 00390 #ifndef HAVE_CUDA 00391 throw_no_cuda(); 00392 return 0; 00393 #else 00394 return deviceProps().get(device_id_)->warpSize; 00395 #endif 00396 } 00397 00398 size_t cv::cuda::DeviceInfo::memPitch() const 00399 { 00400 #ifndef HAVE_CUDA 00401 throw_no_cuda(); 00402 return 0; 00403 #else 00404 return deviceProps().get(device_id_)->memPitch; 00405 #endif 00406 } 00407 00408 int cv::cuda::DeviceInfo::maxThreadsPerBlock() const 00409 { 00410 #ifndef HAVE_CUDA 00411 throw_no_cuda(); 00412 return 0; 00413 #else 00414 return deviceProps().get(device_id_)->maxThreadsPerBlock; 00415 #endif 00416 } 00417 00418 Vec3i cv::cuda::DeviceInfo::maxThreadsDim() const 00419 { 00420 #ifndef HAVE_CUDA 00421 throw_no_cuda(); 00422 return Vec3i(); 00423 #else 00424 return Vec3i(deviceProps().get(device_id_)->maxThreadsDim); 00425 #endif 00426 } 00427 00428 Vec3i cv::cuda::DeviceInfo::maxGridSize() const 00429 { 00430 #ifndef HAVE_CUDA 00431 throw_no_cuda(); 00432 return Vec3i(); 00433 #else 00434 return Vec3i(deviceProps().get(device_id_)->maxGridSize); 00435 #endif 00436 } 00437 00438 int cv::cuda::DeviceInfo::clockRate() const 00439 { 00440 #ifndef HAVE_CUDA 00441 throw_no_cuda(); 00442 return 0; 00443 #else 00444 return deviceProps().get(device_id_)->clockRate; 00445 #endif 00446 } 00447 00448 size_t cv::cuda::DeviceInfo::totalConstMem() const 00449 { 00450 #ifndef HAVE_CUDA 00451 throw_no_cuda(); 00452 return 0; 00453 #else 00454 return deviceProps().get(device_id_)->totalConstMem; 00455 #endif 00456 } 00457 00458 int cv::cuda::DeviceInfo::majorVersion() const 00459 { 00460 #ifndef HAVE_CUDA 00461 throw_no_cuda(); 00462 return 0; 00463 #else 00464 return deviceProps().get(device_id_)->major; 00465 #endif 00466 } 00467 00468 int cv::cuda::DeviceInfo::minorVersion() const 00469 { 00470 #ifndef HAVE_CUDA 00471 throw_no_cuda(); 00472 return 0; 00473 #else 00474 return deviceProps().get(device_id_)->minor; 00475 #endif 00476 } 00477 00478 size_t cv::cuda::DeviceInfo::textureAlignment() const 00479 { 00480 #ifndef HAVE_CUDA 00481 throw_no_cuda(); 00482 return 0; 00483 #else 00484 return deviceProps().get(device_id_)->textureAlignment; 00485 #endif 00486 } 00487 00488 size_t cv::cuda::DeviceInfo::texturePitchAlignment() const 00489 { 00490 #ifndef HAVE_CUDA 00491 throw_no_cuda(); 00492 return 0; 00493 #else 00494 return deviceProps().get(device_id_)->texturePitchAlignment; 00495 #endif 00496 } 00497 00498 int cv::cuda::DeviceInfo::multiProcessorCount() const 00499 { 00500 #ifndef HAVE_CUDA 00501 throw_no_cuda(); 00502 return 0; 00503 #else 00504 return deviceProps().get(device_id_)->multiProcessorCount; 00505 #endif 00506 } 00507 00508 bool cv::cuda::DeviceInfo::kernelExecTimeoutEnabled() const 00509 { 00510 #ifndef HAVE_CUDA 00511 throw_no_cuda(); 00512 return false; 00513 #else 00514 return deviceProps().get(device_id_)->kernelExecTimeoutEnabled != 0; 00515 #endif 00516 } 00517 00518 bool cv::cuda::DeviceInfo::integrated() const 00519 { 00520 #ifndef HAVE_CUDA 00521 throw_no_cuda(); 00522 return false; 00523 #else 00524 return deviceProps().get(device_id_)->integrated != 0; 00525 #endif 00526 } 00527 00528 bool cv::cuda::DeviceInfo::canMapHostMemory() const 00529 { 00530 #ifndef HAVE_CUDA 00531 throw_no_cuda(); 00532 return false; 00533 #else 00534 return deviceProps().get(device_id_)->canMapHostMemory != 0; 00535 #endif 00536 } 00537 00538 DeviceInfo::ComputeMode cv::cuda::DeviceInfo::computeMode() const 00539 { 00540 #ifndef HAVE_CUDA 00541 throw_no_cuda(); 00542 return ComputeModeDefault; 00543 #else 00544 static const ComputeMode tbl[] = 00545 { 00546 ComputeModeDefault, 00547 ComputeModeExclusive, 00548 ComputeModeProhibited, 00549 ComputeModeExclusiveProcess 00550 }; 00551 00552 return tbl[deviceProps().get(device_id_)->computeMode]; 00553 #endif 00554 } 00555 00556 int cv::cuda::DeviceInfo::maxTexture1D() const 00557 { 00558 #ifndef HAVE_CUDA 00559 throw_no_cuda(); 00560 return 0; 00561 #else 00562 return deviceProps().get(device_id_)->maxTexture1D; 00563 #endif 00564 } 00565 00566 int cv::cuda::DeviceInfo::maxTexture1DMipmap() const 00567 { 00568 #ifndef HAVE_CUDA 00569 throw_no_cuda(); 00570 return 0; 00571 #else 00572 #if CUDA_VERSION >= 5000 00573 return deviceProps().get(device_id_)->maxTexture1DMipmap; 00574 #else 00575 CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0"); 00576 return 0; 00577 #endif 00578 #endif 00579 } 00580 00581 int cv::cuda::DeviceInfo::maxTexture1DLinear() const 00582 { 00583 #ifndef HAVE_CUDA 00584 throw_no_cuda(); 00585 return 0; 00586 #else 00587 return deviceProps().get(device_id_)->maxTexture1DLinear; 00588 #endif 00589 } 00590 00591 Vec2i cv::cuda::DeviceInfo::maxTexture2D() const 00592 { 00593 #ifndef HAVE_CUDA 00594 throw_no_cuda(); 00595 return Vec2i(); 00596 #else 00597 return Vec2i(deviceProps().get(device_id_)->maxTexture2D); 00598 #endif 00599 } 00600 00601 Vec2i cv::cuda::DeviceInfo::maxTexture2DMipmap() const 00602 { 00603 #ifndef HAVE_CUDA 00604 throw_no_cuda(); 00605 return Vec2i(); 00606 #else 00607 #if CUDA_VERSION >= 5000 00608 return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap); 00609 #else 00610 CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0"); 00611 return Vec2i(); 00612 #endif 00613 #endif 00614 } 00615 00616 Vec3i cv::cuda::DeviceInfo::maxTexture2DLinear() const 00617 { 00618 #ifndef HAVE_CUDA 00619 throw_no_cuda(); 00620 return Vec3i(); 00621 #else 00622 return Vec3i(deviceProps().get(device_id_)->maxTexture2DLinear); 00623 #endif 00624 } 00625 00626 Vec2i cv::cuda::DeviceInfo::maxTexture2DGather() const 00627 { 00628 #ifndef HAVE_CUDA 00629 throw_no_cuda(); 00630 return Vec2i(); 00631 #else 00632 return Vec2i(deviceProps().get(device_id_)->maxTexture2DGather); 00633 #endif 00634 } 00635 00636 Vec3i cv::cuda::DeviceInfo::maxTexture3D() const 00637 { 00638 #ifndef HAVE_CUDA 00639 throw_no_cuda(); 00640 return Vec3i(); 00641 #else 00642 return Vec3i(deviceProps().get(device_id_)->maxTexture3D); 00643 #endif 00644 } 00645 00646 int cv::cuda::DeviceInfo::maxTextureCubemap() const 00647 { 00648 #ifndef HAVE_CUDA 00649 throw_no_cuda(); 00650 return 0; 00651 #else 00652 return deviceProps().get(device_id_)->maxTextureCubemap; 00653 #endif 00654 } 00655 00656 Vec2i cv::cuda::DeviceInfo::maxTexture1DLayered() const 00657 { 00658 #ifndef HAVE_CUDA 00659 throw_no_cuda(); 00660 return Vec2i(); 00661 #else 00662 return Vec2i(deviceProps().get(device_id_)->maxTexture1DLayered); 00663 #endif 00664 } 00665 00666 Vec3i cv::cuda::DeviceInfo::maxTexture2DLayered() const 00667 { 00668 #ifndef HAVE_CUDA 00669 throw_no_cuda(); 00670 return Vec3i(); 00671 #else 00672 return Vec3i(deviceProps().get(device_id_)->maxTexture2DLayered); 00673 #endif 00674 } 00675 00676 Vec2i cv::cuda::DeviceInfo::maxTextureCubemapLayered() const 00677 { 00678 #ifndef HAVE_CUDA 00679 throw_no_cuda(); 00680 return Vec2i(); 00681 #else 00682 return Vec2i(deviceProps().get(device_id_)->maxTextureCubemapLayered); 00683 #endif 00684 } 00685 00686 int cv::cuda::DeviceInfo::maxSurface1D() const 00687 { 00688 #ifndef HAVE_CUDA 00689 throw_no_cuda(); 00690 return 0; 00691 #else 00692 return deviceProps().get(device_id_)->maxSurface1D; 00693 #endif 00694 } 00695 00696 Vec2i cv::cuda::DeviceInfo::maxSurface2D() const 00697 { 00698 #ifndef HAVE_CUDA 00699 throw_no_cuda(); 00700 return Vec2i(); 00701 #else 00702 return Vec2i(deviceProps().get(device_id_)->maxSurface2D); 00703 #endif 00704 } 00705 00706 Vec3i cv::cuda::DeviceInfo::maxSurface3D() const 00707 { 00708 #ifndef HAVE_CUDA 00709 throw_no_cuda(); 00710 return Vec3i(); 00711 #else 00712 return Vec3i(deviceProps().get(device_id_)->maxSurface3D); 00713 #endif 00714 } 00715 00716 Vec2i cv::cuda::DeviceInfo::maxSurface1DLayered() const 00717 { 00718 #ifndef HAVE_CUDA 00719 throw_no_cuda(); 00720 return Vec2i(); 00721 #else 00722 return Vec2i(deviceProps().get(device_id_)->maxSurface1DLayered); 00723 #endif 00724 } 00725 00726 Vec3i cv::cuda::DeviceInfo::maxSurface2DLayered() const 00727 { 00728 #ifndef HAVE_CUDA 00729 throw_no_cuda(); 00730 return Vec3i(); 00731 #else 00732 return Vec3i(deviceProps().get(device_id_)->maxSurface2DLayered); 00733 #endif 00734 } 00735 00736 int cv::cuda::DeviceInfo::maxSurfaceCubemap() const 00737 { 00738 #ifndef HAVE_CUDA 00739 throw_no_cuda(); 00740 return 0; 00741 #else 00742 return deviceProps().get(device_id_)->maxSurfaceCubemap; 00743 #endif 00744 } 00745 00746 Vec2i cv::cuda::DeviceInfo::maxSurfaceCubemapLayered() const 00747 { 00748 #ifndef HAVE_CUDA 00749 throw_no_cuda(); 00750 return Vec2i(); 00751 #else 00752 return Vec2i(deviceProps().get(device_id_)->maxSurfaceCubemapLayered); 00753 #endif 00754 } 00755 00756 size_t cv::cuda::DeviceInfo::surfaceAlignment() const 00757 { 00758 #ifndef HAVE_CUDA 00759 throw_no_cuda(); 00760 return 0; 00761 #else 00762 return deviceProps().get(device_id_)->surfaceAlignment; 00763 #endif 00764 } 00765 00766 bool cv::cuda::DeviceInfo::concurrentKernels() const 00767 { 00768 #ifndef HAVE_CUDA 00769 throw_no_cuda(); 00770 return false; 00771 #else 00772 return deviceProps().get(device_id_)->concurrentKernels != 0; 00773 #endif 00774 } 00775 00776 bool cv::cuda::DeviceInfo::ECCEnabled() const 00777 { 00778 #ifndef HAVE_CUDA 00779 throw_no_cuda(); 00780 return false; 00781 #else 00782 return deviceProps().get(device_id_)->ECCEnabled != 0; 00783 #endif 00784 } 00785 00786 int cv::cuda::DeviceInfo::pciBusID() const 00787 { 00788 #ifndef HAVE_CUDA 00789 throw_no_cuda(); 00790 return 0; 00791 #else 00792 return deviceProps().get(device_id_)->pciBusID; 00793 #endif 00794 } 00795 00796 int cv::cuda::DeviceInfo::pciDeviceID() const 00797 { 00798 #ifndef HAVE_CUDA 00799 throw_no_cuda(); 00800 return 0; 00801 #else 00802 return deviceProps().get(device_id_)->pciDeviceID; 00803 #endif 00804 } 00805 00806 int cv::cuda::DeviceInfo::pciDomainID() const 00807 { 00808 #ifndef HAVE_CUDA 00809 throw_no_cuda(); 00810 return 0; 00811 #else 00812 return deviceProps().get(device_id_)->pciDomainID; 00813 #endif 00814 } 00815 00816 bool cv::cuda::DeviceInfo::tccDriver() const 00817 { 00818 #ifndef HAVE_CUDA 00819 throw_no_cuda(); 00820 return false; 00821 #else 00822 return deviceProps().get(device_id_)->tccDriver != 0; 00823 #endif 00824 } 00825 00826 int cv::cuda::DeviceInfo::asyncEngineCount() const 00827 { 00828 #ifndef HAVE_CUDA 00829 throw_no_cuda(); 00830 return 0; 00831 #else 00832 return deviceProps().get(device_id_)->asyncEngineCount; 00833 #endif 00834 } 00835 00836 bool cv::cuda::DeviceInfo::unifiedAddressing() const 00837 { 00838 #ifndef HAVE_CUDA 00839 throw_no_cuda(); 00840 return false; 00841 #else 00842 return deviceProps().get(device_id_)->unifiedAddressing != 0; 00843 #endif 00844 } 00845 00846 int cv::cuda::DeviceInfo::memoryClockRate() const 00847 { 00848 #ifndef HAVE_CUDA 00849 throw_no_cuda(); 00850 return 0; 00851 #else 00852 return deviceProps().get(device_id_)->memoryClockRate; 00853 #endif 00854 } 00855 00856 int cv::cuda::DeviceInfo::memoryBusWidth() const 00857 { 00858 #ifndef HAVE_CUDA 00859 throw_no_cuda(); 00860 return 0; 00861 #else 00862 return deviceProps().get(device_id_)->memoryBusWidth; 00863 #endif 00864 } 00865 00866 int cv::cuda::DeviceInfo::l2CacheSize() const 00867 { 00868 #ifndef HAVE_CUDA 00869 throw_no_cuda(); 00870 return 0; 00871 #else 00872 return deviceProps().get(device_id_)->l2CacheSize; 00873 #endif 00874 } 00875 00876 int cv::cuda::DeviceInfo::maxThreadsPerMultiProcessor() const 00877 { 00878 #ifndef HAVE_CUDA 00879 throw_no_cuda(); 00880 return 0; 00881 #else 00882 return deviceProps().get(device_id_)->maxThreadsPerMultiProcessor; 00883 #endif 00884 } 00885 00886 void cv::cuda::DeviceInfo::queryMemory(size_t& _totalMemory, size_t& _freeMemory) const 00887 { 00888 #ifndef HAVE_CUDA 00889 (void) _totalMemory; 00890 (void) _freeMemory; 00891 throw_no_cuda(); 00892 #else 00893 int prevDeviceID = getDevice(); 00894 if (prevDeviceID != device_id_) 00895 setDevice(device_id_); 00896 00897 cudaSafeCall( cudaMemGetInfo(&_freeMemory, &_totalMemory) ); 00898 00899 if (prevDeviceID != device_id_) 00900 setDevice(prevDeviceID); 00901 #endif 00902 } 00903 00904 bool cv::cuda::DeviceInfo::isCompatible() const 00905 { 00906 #ifndef HAVE_CUDA 00907 throw_no_cuda(); 00908 return false; 00909 #else 00910 // Check PTX compatibility 00911 if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion())) 00912 return true; 00913 00914 // Check BIN compatibility 00915 for (int i = minorVersion(); i >= 0; --i) 00916 if (TargetArchs::hasBin(majorVersion(), i)) 00917 return true; 00918 00919 return false; 00920 #endif 00921 } 00922 00923 //////////////////////////////////////////////////////////////////////// 00924 // print info 00925 00926 #ifdef HAVE_CUDA 00927 00928 namespace 00929 { 00930 int convertSMVer2Cores(int major, int minor) 00931 { 00932 // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM 00933 typedef struct { 00934 int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version 00935 int Cores; 00936 } SMtoCores; 00937 00938 SMtoCores gpuArchCoresPerSM[] = { { 0x10, 8 }, { 0x11, 8 }, { 0x12, 8 }, { 0x13, 8 }, { 0x20, 32 }, { 0x21, 48 }, {0x30, 192}, {0x35, 192}, { -1, -1 } }; 00939 00940 int index = 0; 00941 while (gpuArchCoresPerSM[index].SM != -1) 00942 { 00943 if (gpuArchCoresPerSM[index].SM == ((major << 4) + minor) ) 00944 return gpuArchCoresPerSM[index].Cores; 00945 index++; 00946 } 00947 00948 return -1; 00949 } 00950 } 00951 00952 #endif 00953 00954 void cv::cuda::printCudaDeviceInfo(int device) 00955 { 00956 #ifndef HAVE_CUDA 00957 (void) device; 00958 throw_no_cuda(); 00959 #else 00960 int count = getCudaEnabledDeviceCount(); 00961 bool valid = (device >= 0) && (device < count); 00962 00963 int beg = valid ? device : 0; 00964 int end = valid ? device+1 : count; 00965 00966 printf("*** CUDA Device Query (Runtime API) version (CUDART static linking) *** \n\n"); 00967 printf("Device count: %d\n", count); 00968 00969 int driverVersion = 0, runtimeVersion = 0; 00970 cudaSafeCall( cudaDriverGetVersion(&driverVersion) ); 00971 cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) ); 00972 00973 const char *computeMode[] = { 00974 "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)", 00975 "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)", 00976 "Prohibited (no host thread can use ::cudaSetDevice() with this device)", 00977 "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)", 00978 "Unknown", 00979 NULL 00980 }; 00981 00982 for(int dev = beg; dev < end; ++dev) 00983 { 00984 cudaDeviceProp prop; 00985 cudaSafeCall( cudaGetDeviceProperties(&prop, dev) ); 00986 00987 printf("\nDevice %d: \"%s\"\n", dev, prop.name); 00988 printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100); 00989 printf(" CUDA Capability Major/Minor version number: %d.%d\n", prop.major, prop.minor); 00990 printf(" Total amount of global memory: %.0f MBytes (%llu bytes)\n", (float)prop.totalGlobalMem/1048576.0f, (unsigned long long) prop.totalGlobalMem); 00991 00992 int cores = convertSMVer2Cores(prop.major, prop.minor); 00993 if (cores > 0) 00994 printf(" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores\n", prop.multiProcessorCount, cores, cores * prop.multiProcessorCount); 00995 00996 printf(" GPU Clock Speed: %.2f GHz\n", prop.clockRate * 1e-6f); 00997 00998 printf(" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)\n", 00999 prop.maxTexture1D, prop.maxTexture2D[0], prop.maxTexture2D[1], 01000 prop.maxTexture3D[0], prop.maxTexture3D[1], prop.maxTexture3D[2]); 01001 printf(" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d\n", 01002 prop.maxTexture1DLayered[0], prop.maxTexture1DLayered[1], 01003 prop.maxTexture2DLayered[0], prop.maxTexture2DLayered[1], prop.maxTexture2DLayered[2]); 01004 01005 printf(" Total amount of constant memory: %u bytes\n", (int)prop.totalConstMem); 01006 printf(" Total amount of shared memory per block: %u bytes\n", (int)prop.sharedMemPerBlock); 01007 printf(" Total number of registers available per block: %d\n", prop.regsPerBlock); 01008 printf(" Warp size: %d\n", prop.warpSize); 01009 printf(" Maximum number of threads per block: %d\n", prop.maxThreadsPerBlock); 01010 printf(" Maximum sizes of each dimension of a block: %d x %d x %d\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]); 01011 printf(" Maximum sizes of each dimension of a grid: %d x %d x %d\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]); 01012 printf(" Maximum memory pitch: %u bytes\n", (int)prop.memPitch); 01013 printf(" Texture alignment: %u bytes\n", (int)prop.textureAlignment); 01014 01015 printf(" Concurrent copy and execution: %s with %d copy engine(s)\n", (prop.deviceOverlap ? "Yes" : "No"), prop.asyncEngineCount); 01016 printf(" Run time limit on kernels: %s\n", prop.kernelExecTimeoutEnabled ? "Yes" : "No"); 01017 printf(" Integrated GPU sharing Host Memory: %s\n", prop.integrated ? "Yes" : "No"); 01018 printf(" Support host page-locked memory mapping: %s\n", prop.canMapHostMemory ? "Yes" : "No"); 01019 01020 printf(" Concurrent kernel execution: %s\n", prop.concurrentKernels ? "Yes" : "No"); 01021 printf(" Alignment requirement for Surfaces: %s\n", prop.surfaceAlignment ? "Yes" : "No"); 01022 printf(" Device has ECC support enabled: %s\n", prop.ECCEnabled ? "Yes" : "No"); 01023 printf(" Device is using TCC driver mode: %s\n", prop.tccDriver ? "Yes" : "No"); 01024 printf(" Device supports Unified Addressing (UVA): %s\n", prop.unifiedAddressing ? "Yes" : "No"); 01025 printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", prop.pciBusID, prop.pciDeviceID ); 01026 printf(" Compute Mode:\n"); 01027 printf(" %s \n", computeMode[prop.computeMode]); 01028 } 01029 01030 printf("\n"); 01031 printf("deviceQuery, CUDA Driver = CUDART"); 01032 printf(", CUDA Driver Version = %d.%d", driverVersion / 1000, driverVersion % 100); 01033 printf(", CUDA Runtime Version = %d.%d", runtimeVersion/1000, runtimeVersion%100); 01034 printf(", NumDevs = %d\n\n", count); 01035 01036 fflush(stdout); 01037 #endif 01038 } 01039 01040 void cv::cuda::printShortCudaDeviceInfo(int device) 01041 { 01042 #ifndef HAVE_CUDA 01043 (void) device; 01044 throw_no_cuda(); 01045 #else 01046 int count = getCudaEnabledDeviceCount(); 01047 bool valid = (device >= 0) && (device < count); 01048 01049 int beg = valid ? device : 0; 01050 int end = valid ? device+1 : count; 01051 01052 int driverVersion = 0, runtimeVersion = 0; 01053 cudaSafeCall( cudaDriverGetVersion(&driverVersion) ); 01054 cudaSafeCall( cudaRuntimeGetVersion(&runtimeVersion) ); 01055 01056 for(int dev = beg; dev < end; ++dev) 01057 { 01058 cudaDeviceProp prop; 01059 cudaSafeCall( cudaGetDeviceProperties(&prop, dev) ); 01060 01061 const char *arch_str = prop.major < 2 ? " (not Fermi)" : ""; 01062 printf("Device %d: \"%s\" %.0fMb", dev, prop.name, (float)prop.totalGlobalMem/1048576.0f); 01063 printf(", sm_%d%d%s", prop.major, prop.minor, arch_str); 01064 01065 int cores = convertSMVer2Cores(prop.major, prop.minor); 01066 if (cores > 0) 01067 printf(", %d cores", cores * prop.multiProcessorCount); 01068 01069 printf(", Driver/Runtime ver.%d.%d/%d.%d\n", driverVersion/1000, driverVersion%100, runtimeVersion/1000, runtimeVersion%100); 01070 } 01071 01072 fflush(stdout); 01073 #endif 01074 } 01075 01076 //////////////////////////////////////////////////////////////////////// 01077 // Error handling 01078 01079 #ifdef HAVE_CUDA 01080 01081 namespace 01082 { 01083 #define error_entry(entry) { entry, #entry } 01084 01085 struct ErrorEntry 01086 { 01087 int code; 01088 const char* str; 01089 }; 01090 01091 struct ErrorEntryComparer 01092 { 01093 int code; 01094 ErrorEntryComparer(int code_) : code(code_) {} 01095 bool operator()(const ErrorEntry& e) const { return e.code == code; } 01096 }; 01097 01098 const ErrorEntry npp_errors [] = 01099 { 01100 #if defined (_MSC_VER) 01101 error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ), 01102 #endif 01103 01104 #if NPP_VERSION < 5500 01105 error_entry( NPP_BAD_ARG_ERROR ), 01106 error_entry( NPP_COEFF_ERROR ), 01107 error_entry( NPP_RECT_ERROR ), 01108 error_entry( NPP_QUAD_ERROR ), 01109 error_entry( NPP_MEMFREE_ERR ), 01110 error_entry( NPP_MEMSET_ERR ), 01111 error_entry( NPP_MEM_ALLOC_ERR ), 01112 error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ), 01113 error_entry( NPP_MIRROR_FLIP_ERR ), 01114 error_entry( NPP_INVALID_INPUT ), 01115 error_entry( NPP_POINTER_ERROR ), 01116 error_entry( NPP_WARNING ), 01117 error_entry( NPP_ODD_ROI_WARNING ), 01118 #else 01119 error_entry( NPP_INVALID_HOST_POINTER_ERROR ), 01120 error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ), 01121 error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ), 01122 error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ), 01123 error_entry( NPP_MEMFREE_ERROR ), 01124 error_entry( NPP_MEMSET_ERROR ), 01125 error_entry( NPP_QUALITY_INDEX_ERROR ), 01126 error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ), 01127 error_entry( NPP_CHANNEL_ORDER_ERROR ), 01128 error_entry( NPP_ZERO_MASK_VALUE_ERROR ), 01129 error_entry( NPP_QUADRANGLE_ERROR ), 01130 error_entry( NPP_RECTANGLE_ERROR ), 01131 error_entry( NPP_COEFFICIENT_ERROR ), 01132 error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ), 01133 error_entry( NPP_COI_ERROR ), 01134 error_entry( NPP_DIVISOR_ERROR ), 01135 error_entry( NPP_CHANNEL_ERROR ), 01136 error_entry( NPP_STRIDE_ERROR ), 01137 error_entry( NPP_ANCHOR_ERROR ), 01138 error_entry( NPP_MASK_SIZE_ERROR ), 01139 error_entry( NPP_MIRROR_FLIP_ERROR ), 01140 error_entry( NPP_MOMENT_00_ZERO_ERROR ), 01141 error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ), 01142 error_entry( NPP_THRESHOLD_ERROR ), 01143 error_entry( NPP_CONTEXT_MATCH_ERROR ), 01144 error_entry( NPP_FFT_FLAG_ERROR ), 01145 error_entry( NPP_FFT_ORDER_ERROR ), 01146 error_entry( NPP_SCALE_RANGE_ERROR ), 01147 error_entry( NPP_DATA_TYPE_ERROR ), 01148 error_entry( NPP_OUT_OFF_RANGE_ERROR ), 01149 error_entry( NPP_DIVIDE_BY_ZERO_ERROR ), 01150 error_entry( NPP_MEMORY_ALLOCATION_ERR ), 01151 error_entry( NPP_RANGE_ERROR ), 01152 error_entry( NPP_BAD_ARGUMENT_ERROR ), 01153 error_entry( NPP_NO_MEMORY_ERROR ), 01154 error_entry( NPP_ERROR_RESERVED ), 01155 error_entry( NPP_NO_OPERATION_WARNING ), 01156 error_entry( NPP_DIVIDE_BY_ZERO_WARNING ), 01157 error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ), 01158 #endif 01159 01160 error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ), 01161 error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ), 01162 error_entry( NPP_RESIZE_NO_OPERATION_ERROR ), 01163 error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ), 01164 error_entry( NPP_TEXTURE_BIND_ERROR ), 01165 error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ), 01166 error_entry( NPP_NOT_EVEN_STEP_ERROR ), 01167 error_entry( NPP_INTERPOLATION_ERROR ), 01168 error_entry( NPP_RESIZE_FACTOR_ERROR ), 01169 error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ), 01170 error_entry( NPP_MEMCPY_ERROR ), 01171 error_entry( NPP_ALIGNMENT_ERROR ), 01172 error_entry( NPP_STEP_ERROR ), 01173 error_entry( NPP_SIZE_ERROR ), 01174 error_entry( NPP_NULL_POINTER_ERROR ), 01175 error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ), 01176 error_entry( NPP_NOT_IMPLEMENTED_ERROR ), 01177 error_entry( NPP_ERROR ), 01178 error_entry( NPP_NO_ERROR ), 01179 error_entry( NPP_SUCCESS ), 01180 error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ), 01181 error_entry( NPP_MISALIGNED_DST_ROI_WARNING ), 01182 error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ), 01183 error_entry( NPP_DOUBLE_SIZE_WARNING ) 01184 }; 01185 01186 const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]); 01187 01188 const ErrorEntry cu_errors [] = 01189 { 01190 error_entry( CUDA_SUCCESS ), 01191 error_entry( CUDA_ERROR_INVALID_VALUE ), 01192 error_entry( CUDA_ERROR_OUT_OF_MEMORY ), 01193 error_entry( CUDA_ERROR_NOT_INITIALIZED ), 01194 error_entry( CUDA_ERROR_DEINITIALIZED ), 01195 error_entry( CUDA_ERROR_PROFILER_DISABLED ), 01196 error_entry( CUDA_ERROR_PROFILER_NOT_INITIALIZED ), 01197 error_entry( CUDA_ERROR_PROFILER_ALREADY_STARTED ), 01198 error_entry( CUDA_ERROR_PROFILER_ALREADY_STOPPED ), 01199 error_entry( CUDA_ERROR_NO_DEVICE ), 01200 error_entry( CUDA_ERROR_INVALID_DEVICE ), 01201 error_entry( CUDA_ERROR_INVALID_IMAGE ), 01202 error_entry( CUDA_ERROR_INVALID_CONTEXT ), 01203 error_entry( CUDA_ERROR_CONTEXT_ALREADY_CURRENT ), 01204 error_entry( CUDA_ERROR_MAP_FAILED ), 01205 error_entry( CUDA_ERROR_UNMAP_FAILED ), 01206 error_entry( CUDA_ERROR_ARRAY_IS_MAPPED ), 01207 error_entry( CUDA_ERROR_ALREADY_MAPPED ), 01208 error_entry( CUDA_ERROR_NO_BINARY_FOR_GPU ), 01209 error_entry( CUDA_ERROR_ALREADY_ACQUIRED ), 01210 error_entry( CUDA_ERROR_NOT_MAPPED ), 01211 error_entry( CUDA_ERROR_NOT_MAPPED_AS_ARRAY ), 01212 error_entry( CUDA_ERROR_NOT_MAPPED_AS_POINTER ), 01213 error_entry( CUDA_ERROR_ECC_UNCORRECTABLE ), 01214 error_entry( CUDA_ERROR_UNSUPPORTED_LIMIT ), 01215 error_entry( CUDA_ERROR_CONTEXT_ALREADY_IN_USE ), 01216 error_entry( CUDA_ERROR_INVALID_SOURCE ), 01217 error_entry( CUDA_ERROR_FILE_NOT_FOUND ), 01218 error_entry( CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND ), 01219 error_entry( CUDA_ERROR_SHARED_OBJECT_INIT_FAILED ), 01220 error_entry( CUDA_ERROR_OPERATING_SYSTEM ), 01221 error_entry( CUDA_ERROR_INVALID_HANDLE ), 01222 error_entry( CUDA_ERROR_NOT_FOUND ), 01223 error_entry( CUDA_ERROR_NOT_READY ), 01224 error_entry( CUDA_ERROR_LAUNCH_FAILED ), 01225 error_entry( CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES ), 01226 error_entry( CUDA_ERROR_LAUNCH_TIMEOUT ), 01227 error_entry( CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING ), 01228 error_entry( CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED ), 01229 error_entry( CUDA_ERROR_PEER_ACCESS_NOT_ENABLED ), 01230 error_entry( CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE ), 01231 error_entry( CUDA_ERROR_CONTEXT_IS_DESTROYED ), 01232 error_entry( CUDA_ERROR_ASSERT ), 01233 error_entry( CUDA_ERROR_TOO_MANY_PEERS ), 01234 error_entry( CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED ), 01235 error_entry( CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED ), 01236 error_entry( CUDA_ERROR_UNKNOWN ) 01237 }; 01238 01239 const size_t cu_errors_num = sizeof(cu_errors) / sizeof(cu_errors[0]); 01240 01241 cv::String getErrorString(int code, const ErrorEntry* errors, size_t n) 01242 { 01243 size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors; 01244 01245 const char* msg = (idx != n) ? errors[idx].str : "Unknown error code"; 01246 cv::String str = cv::format("%s [Code = %d]", msg, code); 01247 01248 return str; 01249 } 01250 } 01251 01252 #endif 01253 01254 String cv::cuda::getNppErrorMessage(int code) 01255 { 01256 #ifndef HAVE_CUDA 01257 (void) code; 01258 return String(); 01259 #else 01260 return getErrorString(code, npp_errors, npp_error_num); 01261 #endif 01262 } 01263 01264 String cv::cuda::getCudaDriverApiErrorMessage(int code) 01265 { 01266 #ifndef HAVE_CUDA 01267 (void) code; 01268 return String(); 01269 #else 01270 return getErrorString(code, cu_errors, cu_errors_num); 01271 #endif 01272 } 01273
Generated on Tue Jul 12 2022 14:46:31 by
