Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Dependents: RZ_A2M_Mbed_samples
cuda.hpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2013, OpenCV Foundation, all rights reserved. 00016 // Third party copyrights are property of their respective owners. 00017 // 00018 // Redistribution and use in source and binary forms, with or without modification, 00019 // are permitted provided that the following conditions are met: 00020 // 00021 // * Redistribution's of source code must retain the above copyright notice, 00022 // this list of conditions and the following disclaimer. 00023 // 00024 // * Redistribution's in binary form must reproduce the above copyright notice, 00025 // this list of conditions and the following disclaimer in the documentation 00026 // and/or other materials provided with the distribution. 00027 // 00028 // * The name of the copyright holders may not be used to endorse or promote products 00029 // derived from this software without specific prior written permission. 00030 // 00031 // This software is provided by the copyright holders and contributors "as is" and 00032 // any express or implied warranties, including, but not limited to, the implied 00033 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00034 // In no event shall the Intel Corporation or contributors be liable for any direct, 00035 // indirect, incidental, special, exemplary, or consequential damages 00036 // (including, but not limited to, procurement of substitute goods or services; 00037 // loss of use, data, or profits; or business interruption) however caused 00038 // and on any theory of liability, whether in contract, strict liability, 00039 // or tort (including negligence or otherwise) arising in any way out of 00040 // the use of this software, even if advised of the possibility of such damage. 00041 // 00042 //M*/ 00043 00044 #ifndef OPENCV_CORE_CUDA_HPP 00045 #define OPENCV_CORE_CUDA_HPP 00046 00047 #ifndef __cplusplus 00048 # error cuda.hpp header must be compiled as C++ 00049 #endif 00050 00051 #include "opencv2/core.hpp" 00052 #include "opencv2/core/cuda_types.hpp " 00053 00054 /** 00055 @defgroup cuda CUDA-accelerated Computer Vision 00056 @{ 00057 @defgroup cudacore Core part 00058 @{ 00059 @defgroup cudacore_init Initalization and Information 00060 @defgroup cudacore_struct Data Structures 00061 @} 00062 @} 00063 */ 00064 00065 namespace cv { namespace cuda { 00066 00067 //! @addtogroup cudacore_struct 00068 //! @{ 00069 00070 //=================================================================================== 00071 // GpuMat 00072 //=================================================================================== 00073 00074 /** @brief Base storage class for GPU memory with reference counting. 00075 00076 Its interface matches the Mat interface with the following limitations: 00077 00078 - no arbitrary dimensions support (only 2D) 00079 - no functions that return references to their data (because references on GPU are not valid for 00080 CPU) 00081 - no expression templates technique support 00082 00083 Beware that the latter limitation may lead to overloaded matrix operators that cause memory 00084 allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be 00085 passed directly to the kernel. 00086 00087 @note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are 00088 aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix. 00089 00090 @note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely 00091 on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory 00092 release function returns error if the CUDA context has been destroyed before. 00093 00094 @sa Mat 00095 */ 00096 class CV_EXPORTS GpuMat 00097 { 00098 public: 00099 class CV_EXPORTS Allocator 00100 { 00101 public: 00102 virtual ~Allocator() {} 00103 00104 // allocator must fill data, step and refcount fields 00105 virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0; 00106 virtual void free(GpuMat* mat) = 0; 00107 }; 00108 00109 //! default allocator 00110 static Allocator* defaultAllocator(); 00111 static void setDefaultAllocator(Allocator* allocator); 00112 00113 //! default constructor 00114 explicit GpuMat(Allocator* allocator = defaultAllocator()); 00115 00116 //! constructs GpuMat of the specified size and type 00117 GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator()); 00118 GpuMat(Size size, int type, Allocator* allocator = defaultAllocator()); 00119 00120 //! constucts GpuMat and fills it with the specified value _s 00121 GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator()); 00122 GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator()); 00123 00124 //! copy constructor 00125 GpuMat(const GpuMat& m); 00126 00127 //! constructor for GpuMat headers pointing to user-allocated data 00128 GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP); 00129 GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP); 00130 00131 //! creates a GpuMat header for a part of the bigger matrix 00132 GpuMat(const GpuMat& m, Range rowRange, Range colRange); 00133 GpuMat(const GpuMat& m, Rect roi); 00134 00135 //! builds GpuMat from host memory (Blocking call) 00136 explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator()); 00137 00138 //! destructor - calls release() 00139 ~GpuMat(); 00140 00141 //! assignment operators 00142 GpuMat& operator =(const GpuMat& m); 00143 00144 //! allocates new GpuMat data unless the GpuMat already has specified size and type 00145 void create(int rows, int cols, int type); 00146 void create(Size size, int type); 00147 00148 //! decreases reference counter, deallocate the data when reference counter reaches 0 00149 void release(); 00150 00151 //! swaps with other smart pointer 00152 void swap(GpuMat& mat); 00153 00154 //! pefroms upload data to GpuMat (Blocking call) 00155 void upload(InputArray arr); 00156 00157 //! pefroms upload data to GpuMat (Non-Blocking call) 00158 void upload(InputArray arr, Stream& stream); 00159 00160 //! pefroms download data from device to host memory (Blocking call) 00161 void download(OutputArray dst) const; 00162 00163 //! pefroms download data from device to host memory (Non-Blocking call) 00164 void download(OutputArray dst, Stream& stream) const; 00165 00166 //! returns deep copy of the GpuMat, i.e. the data is copied 00167 GpuMat clone() const; 00168 00169 //! copies the GpuMat content to device memory (Blocking call) 00170 void copyTo(OutputArray dst) const; 00171 00172 //! copies the GpuMat content to device memory (Non-Blocking call) 00173 void copyTo(OutputArray dst, Stream& stream) const; 00174 00175 //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call) 00176 void copyTo(OutputArray dst, InputArray mask) const; 00177 00178 //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call) 00179 void copyTo(OutputArray dst, InputArray mask, Stream& stream) const; 00180 00181 //! sets some of the GpuMat elements to s (Blocking call) 00182 GpuMat& setTo(Scalar s); 00183 00184 //! sets some of the GpuMat elements to s (Non-Blocking call) 00185 GpuMat& setTo(Scalar s, Stream& stream); 00186 00187 //! sets some of the GpuMat elements to s, according to the mask (Blocking call) 00188 GpuMat& setTo(Scalar s, InputArray mask); 00189 00190 //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call) 00191 GpuMat& setTo(Scalar s, InputArray mask, Stream& stream); 00192 00193 //! converts GpuMat to another datatype (Blocking call) 00194 void convertTo(OutputArray dst, int rtype) const; 00195 00196 //! converts GpuMat to another datatype (Non-Blocking call) 00197 void convertTo(OutputArray dst, int rtype, Stream& stream) const; 00198 00199 //! converts GpuMat to another datatype with scaling (Blocking call) 00200 void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const; 00201 00202 //! converts GpuMat to another datatype with scaling (Non-Blocking call) 00203 void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const; 00204 00205 //! converts GpuMat to another datatype with scaling (Non-Blocking call) 00206 void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const; 00207 00208 void assignTo(GpuMat& m, int type=-1) const; 00209 00210 //! returns pointer to y-th row 00211 uchar* ptr(int y = 0); 00212 const uchar* ptr(int y = 0) const; 00213 00214 //! template version of the above method 00215 template<typename _Tp> _Tp* ptr(int y = 0); 00216 template<typename _Tp> const _Tp* ptr(int y = 0) const; 00217 00218 template <typename _Tp> operator PtrStepSz<_Tp>() const; 00219 template <typename _Tp> operator PtrStep<_Tp>() const; 00220 00221 //! returns a new GpuMat header for the specified row 00222 GpuMat row(int y) const; 00223 00224 //! returns a new GpuMat header for the specified column 00225 GpuMat col(int x) const; 00226 00227 //! ... for the specified row span 00228 GpuMat rowRange(int startrow, int endrow) const; 00229 GpuMat rowRange(Range r) const; 00230 00231 //! ... for the specified column span 00232 GpuMat colRange(int startcol, int endcol) const; 00233 GpuMat colRange(Range r) const; 00234 00235 //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.) 00236 GpuMat operator ()(Range rowRange, Range colRange) const; 00237 GpuMat operator ()(Rect roi) const; 00238 00239 //! creates alternative GpuMat header for the same data, with different 00240 //! number of channels and/or different number of rows 00241 GpuMat reshape(int cn, int rows = 0) const; 00242 00243 //! locates GpuMat header within a parent GpuMat 00244 void locateROI(Size& wholeSize, Point& ofs) const; 00245 00246 //! moves/resizes the current GpuMat ROI inside the parent GpuMat 00247 GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright); 00248 00249 //! returns true iff the GpuMat data is continuous 00250 //! (i.e. when there are no gaps between successive rows) 00251 bool isContinuous() const; 00252 00253 //! returns element size in bytes 00254 size_t elemSize() const; 00255 00256 //! returns the size of element channel in bytes 00257 size_t elemSize1() const; 00258 00259 //! returns element type 00260 int type() const; 00261 00262 //! returns element type 00263 int depth() const; 00264 00265 //! returns number of channels 00266 int channels() const; 00267 00268 //! returns step/elemSize1() 00269 size_t step1() const; 00270 00271 //! returns GpuMat size : width == number of columns, height == number of rows 00272 Size size() const; 00273 00274 //! returns true if GpuMat data is NULL 00275 bool empty() const; 00276 00277 /*! includes several bit-fields: 00278 - the magic signature 00279 - continuity flag 00280 - depth 00281 - number of channels 00282 */ 00283 int flags ; 00284 00285 //! the number of rows and columns 00286 int rows, cols; 00287 00288 //! a distance between successive rows in bytes; includes the gap if any 00289 size_t step; 00290 00291 //! pointer to the data 00292 uchar* data; 00293 00294 //! pointer to the reference counter; 00295 //! when GpuMat points to user-allocated data, the pointer is NULL 00296 int* refcount; 00297 00298 //! helper fields used in locateROI and adjustROI 00299 uchar* datastart; 00300 const uchar* dataend; 00301 00302 //! allocator 00303 Allocator* allocator; 00304 }; 00305 00306 /** @brief Creates a continuous matrix. 00307 00308 @param rows Row count. 00309 @param cols Column count. 00310 @param type Type of the matrix. 00311 @param arr Destination matrix. This parameter changes only if it has a proper type and area ( 00312 \f$\texttt{rows} \times \texttt{cols}\f$ ). 00313 00314 Matrix is called continuous if its elements are stored continuously, that is, without gaps at the 00315 end of each row. 00316 */ 00317 CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr); 00318 00319 /** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type. 00320 00321 @param rows Minimum desired number of rows. 00322 @param cols Minimum desired number of columns. 00323 @param type Desired matrix type. 00324 @param arr Destination matrix. 00325 00326 The function does not reallocate memory if the matrix has proper attributes already. 00327 */ 00328 CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr); 00329 00330 //! BufferPool management (must be called before Stream creation) 00331 CV_EXPORTS void setBufferPoolUsage(bool on); 00332 CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount); 00333 00334 //=================================================================================== 00335 // HostMem 00336 //=================================================================================== 00337 00338 /** @brief Class with reference counting wrapping special memory type allocation functions from CUDA. 00339 00340 Its interface is also Mat-like but with additional memory type parameters. 00341 00342 - **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous 00343 uploading/downloading data from/to GPU. 00344 - **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU 00345 address space, if supported. 00346 - **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are 00347 used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache 00348 utilization. 00349 00350 @note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2 00351 Pinned Memory APIs* document or *CUDA C Programming Guide*. 00352 */ 00353 class CV_EXPORTS HostMem 00354 { 00355 public: 00356 enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 }; 00357 00358 static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED); 00359 00360 explicit HostMem(AllocType alloc_type = PAGE_LOCKED); 00361 00362 HostMem(const HostMem& m); 00363 00364 HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED); 00365 HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED); 00366 00367 //! creates from host memory with coping data 00368 explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED); 00369 00370 ~HostMem(); 00371 00372 HostMem& operator =(const HostMem& m); 00373 00374 //! swaps with other smart pointer 00375 void swap(HostMem& b); 00376 00377 //! returns deep copy of the matrix, i.e. the data is copied 00378 HostMem clone() const; 00379 00380 //! allocates new matrix data unless the matrix already has specified size and type. 00381 void create(int rows, int cols, int type); 00382 void create(Size size, int type); 00383 00384 //! creates alternative HostMem header for the same data, with different 00385 //! number of channels and/or different number of rows 00386 HostMem reshape(int cn, int rows = 0) const; 00387 00388 //! decrements reference counter and released memory if needed. 00389 void release(); 00390 00391 //! returns matrix header with disabled reference counting for HostMem data. 00392 Mat createMatHeader() const; 00393 00394 /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting 00395 for it. 00396 00397 This can be done only if memory was allocated with the SHARED flag and if it is supported by the 00398 hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which 00399 eliminates an extra copy. 00400 */ 00401 GpuMat createGpuMatHeader() const; 00402 00403 // Please see cv::Mat for descriptions 00404 bool isContinuous() const; 00405 size_t elemSize() const; 00406 size_t elemSize1() const; 00407 int type() const; 00408 int depth() const; 00409 int channels() const; 00410 size_t step1() const; 00411 Size size() const; 00412 bool empty() const; 00413 00414 // Please see cv::Mat for descriptions 00415 int flags; 00416 int rows, cols; 00417 size_t step; 00418 00419 uchar* data; 00420 int* refcount; 00421 00422 uchar* datastart; 00423 const uchar* dataend; 00424 00425 AllocType alloc_type; 00426 }; 00427 00428 /** @brief Page-locks the memory of matrix and maps it for the device(s). 00429 00430 @param m Input matrix. 00431 */ 00432 CV_EXPORTS void registerPageLocked(Mat& m); 00433 00434 /** @brief Unmaps the memory of matrix and makes it pageable again. 00435 00436 @param m Input matrix. 00437 */ 00438 CV_EXPORTS void unregisterPageLocked(Mat& m); 00439 00440 //=================================================================================== 00441 // Stream 00442 //=================================================================================== 00443 00444 /** @brief This class encapsulates a queue of asynchronous calls. 00445 00446 @note Currently, you may face problems if an operation is enqueued twice with different data. Some 00447 functions use the constant GPU memory, and next call may update the memory before the previous one 00448 has been finished. But calling different operations asynchronously is safe because each operation 00449 has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are 00450 also safe. 00451 00452 @note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads. 00453 00454 @code 00455 void thread1() 00456 { 00457 cv::cuda::Stream stream1; 00458 cv::cuda::func1(..., stream1); 00459 } 00460 00461 void thread2() 00462 { 00463 cv::cuda::Stream stream2; 00464 cv::cuda::func2(..., stream2); 00465 } 00466 @endcode 00467 00468 @note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user. 00469 In multi-threading environment the stream objects must be passed explicitly (see previous note). 00470 */ 00471 class CV_EXPORTS Stream 00472 { 00473 typedef void (Stream::*bool_type)() const; 00474 void this_type_does_not_support_comparisons() const {} 00475 00476 public: 00477 typedef void (*StreamCallback)(int status, void* userData); 00478 00479 //! creates a new asynchronous stream 00480 Stream(); 00481 00482 /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false. 00483 */ 00484 bool queryIfComplete() const; 00485 00486 /** @brief Blocks the current CPU thread until all operations in the stream are complete. 00487 */ 00488 void waitForCompletion(); 00489 00490 /** @brief Makes a compute stream wait on an event. 00491 */ 00492 void waitEvent(const Event& event); 00493 00494 /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have 00495 completed. 00496 00497 @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization 00498 that may depend on outstanding device work or other callbacks that are not mandated to run earlier. 00499 Callbacks without a mandated order (in independent streams) execute in undefined order and may be 00500 serialized. 00501 */ 00502 void enqueueHostCallback(StreamCallback callback, void* userData); 00503 00504 //! return Stream object for default CUDA stream 00505 static Stream& Null(); 00506 00507 //! returns true if stream object is not default (!= 0) 00508 operator bool_type() const; 00509 00510 class Impl; 00511 00512 private: 00513 Ptr<Impl> impl_; 00514 Stream(const Ptr<Impl> & impl); 00515 00516 friend struct StreamAccessor; 00517 friend class BufferPool; 00518 friend class DefaultDeviceInitializer; 00519 }; 00520 00521 class CV_EXPORTS Event 00522 { 00523 public: 00524 enum CreateFlags 00525 { 00526 DEFAULT = 0x00, /**< Default event flag */ 00527 BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */ 00528 DISABLE_TIMING = 0x02, /**< Event will not record timing data */ 00529 INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */ 00530 }; 00531 00532 explicit Event(CreateFlags flags = DEFAULT); 00533 00534 //! records an event 00535 void record(Stream& stream = Stream::Null()); 00536 00537 //! queries an event's status 00538 bool queryIfComplete() const; 00539 00540 //! waits for an event to complete 00541 void waitForCompletion(); 00542 00543 //! computes the elapsed time between events 00544 static float elapsedTime(const Event& start, const Event& end); 00545 00546 class Impl; 00547 00548 private: 00549 Ptr<Impl> impl_; 00550 Event(const Ptr<Impl> & impl); 00551 00552 friend struct EventAccessor; 00553 }; 00554 00555 //! @} cudacore_struct 00556 00557 //=================================================================================== 00558 // Initialization & Info 00559 //=================================================================================== 00560 00561 //! @addtogroup cudacore_init 00562 //! @{ 00563 00564 /** @brief Returns the number of installed CUDA-enabled devices. 00565 00566 Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support, 00567 this function returns 0. 00568 */ 00569 CV_EXPORTS int getCudaEnabledDeviceCount(); 00570 00571 /** @brief Sets a device and initializes it for the current thread. 00572 00573 @param device System index of a CUDA device starting with 0. 00574 00575 If the call of this function is omitted, a default device is initialized at the fist CUDA usage. 00576 */ 00577 CV_EXPORTS void setDevice(int device); 00578 00579 /** @brief Returns the current device index set by cuda::setDevice or initialized by default. 00580 */ 00581 CV_EXPORTS int getDevice(); 00582 00583 /** @brief Explicitly destroys and cleans up all resources associated with the current device in the current 00584 process. 00585 00586 Any subsequent API call to this device will reinitialize the device. 00587 */ 00588 CV_EXPORTS void resetDevice(); 00589 00590 /** @brief Enumeration providing CUDA computing features. 00591 */ 00592 enum FeatureSet 00593 { 00594 FEATURE_SET_COMPUTE_10 = 10, 00595 FEATURE_SET_COMPUTE_11 = 11, 00596 FEATURE_SET_COMPUTE_12 = 12, 00597 FEATURE_SET_COMPUTE_13 = 13, 00598 FEATURE_SET_COMPUTE_20 = 20, 00599 FEATURE_SET_COMPUTE_21 = 21, 00600 FEATURE_SET_COMPUTE_30 = 30, 00601 FEATURE_SET_COMPUTE_32 = 32, 00602 FEATURE_SET_COMPUTE_35 = 35, 00603 FEATURE_SET_COMPUTE_50 = 50, 00604 00605 GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11, 00606 SHARED_ATOMICS = FEATURE_SET_COMPUTE_12, 00607 NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13, 00608 WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30, 00609 DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35 00610 }; 00611 00612 //! checks whether current device supports the given feature 00613 CV_EXPORTS bool deviceSupports(FeatureSet feature_set); 00614 00615 /** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was 00616 built for. 00617 00618 According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute 00619 capability can always be compiled to binary code of greater or equal compute capability". 00620 */ 00621 class CV_EXPORTS TargetArchs 00622 { 00623 public: 00624 /** @brief The following method checks whether the module was built with the support of the given feature: 00625 00626 @param feature_set Features to be checked. See :ocvcuda::FeatureSet. 00627 */ 00628 static bool builtWith(FeatureSet feature_set); 00629 00630 /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA 00631 code for the given architecture(s): 00632 00633 @param major Major compute capability version. 00634 @param minor Minor compute capability version. 00635 */ 00636 static bool has(int major, int minor); 00637 static bool hasPtx(int major, int minor); 00638 static bool hasBin(int major, int minor); 00639 00640 static bool hasEqualOrLessPtx(int major, int minor); 00641 static bool hasEqualOrGreater(int major, int minor); 00642 static bool hasEqualOrGreaterPtx(int major, int minor); 00643 static bool hasEqualOrGreaterBin(int major, int minor); 00644 }; 00645 00646 /** @brief Class providing functionality for querying the specified GPU properties. 00647 */ 00648 class CV_EXPORTS DeviceInfo 00649 { 00650 public: 00651 //! creates DeviceInfo object for the current GPU 00652 DeviceInfo(); 00653 00654 /** @brief The constructors. 00655 00656 @param device_id System index of the CUDA device starting with 0. 00657 00658 Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it 00659 constructs an object for the current device. 00660 */ 00661 DeviceInfo(int device_id); 00662 00663 /** @brief Returns system index of the CUDA device starting with 0. 00664 */ 00665 int deviceID() const; 00666 00667 //! ASCII string identifying device 00668 const char* name() const; 00669 00670 //! global memory available on device in bytes 00671 size_t totalGlobalMem() const; 00672 00673 //! shared memory available per block in bytes 00674 size_t sharedMemPerBlock() const; 00675 00676 //! 32-bit registers available per block 00677 int regsPerBlock() const; 00678 00679 //! warp size in threads 00680 int warpSize() const; 00681 00682 //! maximum pitch in bytes allowed by memory copies 00683 size_t memPitch() const; 00684 00685 //! maximum number of threads per block 00686 int maxThreadsPerBlock() const; 00687 00688 //! maximum size of each dimension of a block 00689 Vec3i maxThreadsDim() const; 00690 00691 //! maximum size of each dimension of a grid 00692 Vec3i maxGridSize() const; 00693 00694 //! clock frequency in kilohertz 00695 int clockRate() const; 00696 00697 //! constant memory available on device in bytes 00698 size_t totalConstMem() const; 00699 00700 //! major compute capability 00701 int majorVersion() const; 00702 00703 //! minor compute capability 00704 int minorVersion() const; 00705 00706 //! alignment requirement for textures 00707 size_t textureAlignment() const; 00708 00709 //! pitch alignment requirement for texture references bound to pitched memory 00710 size_t texturePitchAlignment() const; 00711 00712 //! number of multiprocessors on device 00713 int multiProcessorCount() const; 00714 00715 //! specified whether there is a run time limit on kernels 00716 bool kernelExecTimeoutEnabled() const; 00717 00718 //! device is integrated as opposed to discrete 00719 bool integrated() const; 00720 00721 //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer 00722 bool canMapHostMemory() const; 00723 00724 enum ComputeMode 00725 { 00726 ComputeModeDefault, /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */ 00727 ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */ 00728 ComputeModeProhibited, /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */ 00729 ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */ 00730 }; 00731 00732 //! compute mode 00733 ComputeMode computeMode() const; 00734 00735 //! maximum 1D texture size 00736 int maxTexture1D() const; 00737 00738 //! maximum 1D mipmapped texture size 00739 int maxTexture1DMipmap() const; 00740 00741 //! maximum size for 1D textures bound to linear memory 00742 int maxTexture1DLinear() const; 00743 00744 //! maximum 2D texture dimensions 00745 Vec2i maxTexture2D() const; 00746 00747 //! maximum 2D mipmapped texture dimensions 00748 Vec2i maxTexture2DMipmap() const; 00749 00750 //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory 00751 Vec3i maxTexture2DLinear() const; 00752 00753 //! maximum 2D texture dimensions if texture gather operations have to be performed 00754 Vec2i maxTexture2DGather() const; 00755 00756 //! maximum 3D texture dimensions 00757 Vec3i maxTexture3D() const; 00758 00759 //! maximum Cubemap texture dimensions 00760 int maxTextureCubemap() const; 00761 00762 //! maximum 1D layered texture dimensions 00763 Vec2i maxTexture1DLayered() const; 00764 00765 //! maximum 2D layered texture dimensions 00766 Vec3i maxTexture2DLayered() const; 00767 00768 //! maximum Cubemap layered texture dimensions 00769 Vec2i maxTextureCubemapLayered() const; 00770 00771 //! maximum 1D surface size 00772 int maxSurface1D() const; 00773 00774 //! maximum 2D surface dimensions 00775 Vec2i maxSurface2D() const; 00776 00777 //! maximum 3D surface dimensions 00778 Vec3i maxSurface3D() const; 00779 00780 //! maximum 1D layered surface dimensions 00781 Vec2i maxSurface1DLayered() const; 00782 00783 //! maximum 2D layered surface dimensions 00784 Vec3i maxSurface2DLayered() const; 00785 00786 //! maximum Cubemap surface dimensions 00787 int maxSurfaceCubemap() const; 00788 00789 //! maximum Cubemap layered surface dimensions 00790 Vec2i maxSurfaceCubemapLayered() const; 00791 00792 //! alignment requirements for surfaces 00793 size_t surfaceAlignment() const; 00794 00795 //! device can possibly execute multiple kernels concurrently 00796 bool concurrentKernels() const; 00797 00798 //! device has ECC support enabled 00799 bool ECCEnabled() const; 00800 00801 //! PCI bus ID of the device 00802 int pciBusID() const; 00803 00804 //! PCI device ID of the device 00805 int pciDeviceID() const; 00806 00807 //! PCI domain ID of the device 00808 int pciDomainID() const; 00809 00810 //! true if device is a Tesla device using TCC driver, false otherwise 00811 bool tccDriver() const; 00812 00813 //! number of asynchronous engines 00814 int asyncEngineCount() const; 00815 00816 //! device shares a unified address space with the host 00817 bool unifiedAddressing() const; 00818 00819 //! peak memory clock frequency in kilohertz 00820 int memoryClockRate() const; 00821 00822 //! global memory bus width in bits 00823 int memoryBusWidth() const; 00824 00825 //! size of L2 cache in bytes 00826 int l2CacheSize() const; 00827 00828 //! maximum resident threads per multiprocessor 00829 int maxThreadsPerMultiProcessor() const; 00830 00831 //! gets free and total device memory 00832 void queryMemory(size_t& totalMemory, size_t& freeMemory) const; 00833 size_t freeMemory() const; 00834 size_t totalMemory() const; 00835 00836 /** @brief Provides information on CUDA feature support. 00837 00838 @param feature_set Features to be checked. See cuda::FeatureSet. 00839 00840 This function returns true if the device has the specified CUDA feature. Otherwise, it returns false 00841 */ 00842 bool supports(FeatureSet feature_set) const; 00843 00844 /** @brief Checks the CUDA module and device compatibility. 00845 00846 This function returns true if the CUDA module can be run on the specified device. Otherwise, it 00847 returns false . 00848 */ 00849 bool isCompatible() const; 00850 00851 private: 00852 int device_id_; 00853 }; 00854 00855 CV_EXPORTS void printCudaDeviceInfo(int device); 00856 CV_EXPORTS void printShortCudaDeviceInfo(int device); 00857 00858 /** @brief Converts an array to half precision floating number. 00859 00860 @param _src input array. 00861 @param _dst output array. 00862 @param stream Stream for the asynchronous version. 00863 @sa convertFp16 00864 */ 00865 CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null()); 00866 00867 //! @} cudacore_init 00868 00869 }} // namespace cv { namespace cuda { 00870 00871 00872 #include "opencv2/core/cuda.inl.hpp" 00873 00874 #endif /* OPENCV_CORE_CUDA_HPP */
Generated on Tue Jul 12 2022 18:20:16 by
