Renesas / opencv-lib

Dependents:   RZ_A2M_Mbed_samples

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers cuda.hpp Source File

cuda.hpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                          License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
00015 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
00016 // Third party copyrights are property of their respective owners.
00017 //
00018 // Redistribution and use in source and binary forms, with or without modification,
00019 // are permitted provided that the following conditions are met:
00020 //
00021 //   * Redistribution's of source code must retain the above copyright notice,
00022 //     this list of conditions and the following disclaimer.
00023 //
00024 //   * Redistribution's in binary form must reproduce the above copyright notice,
00025 //     this list of conditions and the following disclaimer in the documentation
00026 //     and/or other materials provided with the distribution.
00027 //
00028 //   * The name of the copyright holders may not be used to endorse or promote products
00029 //     derived from this software without specific prior written permission.
00030 //
00031 // This software is provided by the copyright holders and contributors "as is" and
00032 // any express or implied warranties, including, but not limited to, the implied
00033 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00034 // In no event shall the Intel Corporation or contributors be liable for any direct,
00035 // indirect, incidental, special, exemplary, or consequential damages
00036 // (including, but not limited to, procurement of substitute goods or services;
00037 // loss of use, data, or profits; or business interruption) however caused
00038 // and on any theory of liability, whether in contract, strict liability,
00039 // or tort (including negligence or otherwise) arising in any way out of
00040 // the use of this software, even if advised of the possibility of such damage.
00041 //
00042 //M*/
00043 
00044 #ifndef OPENCV_CORE_CUDA_HPP
00045 #define OPENCV_CORE_CUDA_HPP
00046 
00047 #ifndef __cplusplus
00048 #  error cuda.hpp header must be compiled as C++
00049 #endif
00050 
00051 #include "opencv2/core.hpp"
00052 #include "opencv2/core/cuda_types.hpp "
00053 
00054 /**
00055   @defgroup cuda CUDA-accelerated Computer Vision
00056   @{
00057     @defgroup cudacore Core part
00058     @{
00059       @defgroup cudacore_init Initalization and Information
00060       @defgroup cudacore_struct Data Structures
00061     @}
00062   @}
00063  */
00064 
00065 namespace cv { namespace cuda {
00066 
00067 //! @addtogroup cudacore_struct
00068 //! @{
00069 
00070 //===================================================================================
00071 // GpuMat
00072 //===================================================================================
00073 
00074 /** @brief Base storage class for GPU memory with reference counting.
00075 
00076 Its interface matches the Mat interface with the following limitations:
00077 
00078 -   no arbitrary dimensions support (only 2D)
00079 -   no functions that return references to their data (because references on GPU are not valid for
00080     CPU)
00081 -   no expression templates technique support
00082 
00083 Beware that the latter limitation may lead to overloaded matrix operators that cause memory
00084 allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
00085 passed directly to the kernel.
00086 
00087 @note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
00088 aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
00089 
00090 @note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
00091 on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
00092 release function returns error if the CUDA context has been destroyed before.
00093 
00094 @sa Mat
00095  */
00096 class CV_EXPORTS GpuMat
00097 {
00098 public:
00099     class CV_EXPORTS Allocator
00100     {
00101     public:
00102         virtual ~Allocator() {}
00103 
00104         // allocator must fill data, step and refcount fields
00105         virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
00106         virtual void free(GpuMat* mat) = 0;
00107     };
00108 
00109     //! default allocator
00110     static Allocator* defaultAllocator();
00111     static void setDefaultAllocator(Allocator* allocator);
00112 
00113     //! default constructor
00114     explicit GpuMat(Allocator* allocator = defaultAllocator());
00115 
00116     //! constructs GpuMat of the specified size and type
00117     GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
00118     GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
00119 
00120     //! constucts GpuMat and fills it with the specified value _s
00121     GpuMat(int rows, int cols, int type, Scalar  s, Allocator* allocator = defaultAllocator());
00122     GpuMat(Size size, int type, Scalar  s, Allocator* allocator = defaultAllocator());
00123 
00124     //! copy constructor
00125     GpuMat(const GpuMat& m);
00126 
00127     //! constructor for GpuMat headers pointing to user-allocated data
00128     GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
00129     GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
00130 
00131     //! creates a GpuMat header for a part of the bigger matrix
00132     GpuMat(const GpuMat& m, Range rowRange, Range colRange);
00133     GpuMat(const GpuMat& m, Rect roi);
00134 
00135     //! builds GpuMat from host memory (Blocking call)
00136     explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
00137 
00138     //! destructor - calls release()
00139     ~GpuMat();
00140 
00141     //! assignment operators
00142     GpuMat& operator =(const GpuMat& m);
00143 
00144     //! allocates new GpuMat data unless the GpuMat already has specified size and type
00145     void create(int rows, int cols, int type);
00146     void create(Size size, int type);
00147 
00148     //! decreases reference counter, deallocate the data when reference counter reaches 0
00149     void release();
00150 
00151     //! swaps with other smart pointer
00152     void swap(GpuMat& mat);
00153 
00154     //! pefroms upload data to GpuMat (Blocking call)
00155     void upload(InputArray arr);
00156 
00157     //! pefroms upload data to GpuMat (Non-Blocking call)
00158     void upload(InputArray arr, Stream& stream);
00159 
00160     //! pefroms download data from device to host memory (Blocking call)
00161     void download(OutputArray dst) const;
00162 
00163     //! pefroms download data from device to host memory (Non-Blocking call)
00164     void download(OutputArray dst, Stream& stream) const;
00165 
00166     //! returns deep copy of the GpuMat, i.e. the data is copied
00167     GpuMat clone() const;
00168 
00169     //! copies the GpuMat content to device memory (Blocking call)
00170     void copyTo(OutputArray dst) const;
00171 
00172     //! copies the GpuMat content to device memory (Non-Blocking call)
00173     void copyTo(OutputArray dst, Stream& stream) const;
00174 
00175     //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
00176     void copyTo(OutputArray dst, InputArray mask) const;
00177 
00178     //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
00179     void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
00180 
00181     //! sets some of the GpuMat elements to s (Blocking call)
00182     GpuMat& setTo(Scalar  s);
00183 
00184     //! sets some of the GpuMat elements to s (Non-Blocking call)
00185     GpuMat& setTo(Scalar  s, Stream& stream);
00186 
00187     //! sets some of the GpuMat elements to s, according to the mask (Blocking call)
00188     GpuMat& setTo(Scalar  s, InputArray mask);
00189 
00190     //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
00191     GpuMat& setTo(Scalar  s, InputArray mask, Stream& stream);
00192 
00193     //! converts GpuMat to another datatype (Blocking call)
00194     void convertTo(OutputArray dst, int rtype) const;
00195 
00196     //! converts GpuMat to another datatype (Non-Blocking call)
00197     void convertTo(OutputArray dst, int rtype, Stream& stream) const;
00198 
00199     //! converts GpuMat to another datatype with scaling (Blocking call)
00200     void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
00201 
00202     //! converts GpuMat to another datatype with scaling (Non-Blocking call)
00203     void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
00204 
00205     //! converts GpuMat to another datatype with scaling (Non-Blocking call)
00206     void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
00207 
00208     void assignTo(GpuMat& m, int type=-1) const;
00209 
00210     //! returns pointer to y-th row
00211     uchar* ptr(int y = 0);
00212     const uchar* ptr(int y = 0) const;
00213 
00214     //! template version of the above method
00215     template<typename _Tp> _Tp* ptr(int y = 0);
00216     template<typename _Tp> const _Tp* ptr(int y = 0) const;
00217 
00218     template <typename _Tp> operator PtrStepSz<_Tp>() const;
00219     template <typename _Tp> operator PtrStep<_Tp>() const;
00220 
00221     //! returns a new GpuMat header for the specified row
00222     GpuMat row(int y) const;
00223 
00224     //! returns a new GpuMat header for the specified column
00225     GpuMat col(int x) const;
00226 
00227     //! ... for the specified row span
00228     GpuMat rowRange(int startrow, int endrow) const;
00229     GpuMat rowRange(Range r) const;
00230 
00231     //! ... for the specified column span
00232     GpuMat colRange(int startcol, int endcol) const;
00233     GpuMat colRange(Range r) const;
00234 
00235     //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
00236     GpuMat operator ()(Range rowRange, Range colRange) const;
00237     GpuMat operator ()(Rect roi) const;
00238 
00239     //! creates alternative GpuMat header for the same data, with different
00240     //! number of channels and/or different number of rows
00241     GpuMat reshape(int cn, int rows = 0) const;
00242 
00243     //! locates GpuMat header within a parent GpuMat
00244     void locateROI(Size& wholeSize, Point& ofs) const;
00245 
00246     //! moves/resizes the current GpuMat ROI inside the parent GpuMat
00247     GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
00248 
00249     //! returns true iff the GpuMat data is continuous
00250     //! (i.e. when there are no gaps between successive rows)
00251     bool isContinuous() const;
00252 
00253     //! returns element size in bytes
00254     size_t elemSize() const;
00255 
00256     //! returns the size of element channel in bytes
00257     size_t elemSize1() const;
00258 
00259     //! returns element type
00260     int type() const;
00261 
00262     //! returns element type
00263     int depth() const;
00264 
00265     //! returns number of channels
00266     int channels() const;
00267 
00268     //! returns step/elemSize1()
00269     size_t step1() const;
00270 
00271     //! returns GpuMat size : width == number of columns, height == number of rows
00272     Size size() const;
00273 
00274     //! returns true if GpuMat data is NULL
00275     bool empty() const;
00276 
00277     /*! includes several bit-fields:
00278     - the magic signature
00279     - continuity flag
00280     - depth
00281     - number of channels
00282     */
00283     int flags ;
00284 
00285     //! the number of rows and columns
00286     int rows, cols;
00287 
00288     //! a distance between successive rows in bytes; includes the gap if any
00289     size_t step;
00290 
00291     //! pointer to the data
00292     uchar* data;
00293 
00294     //! pointer to the reference counter;
00295     //! when GpuMat points to user-allocated data, the pointer is NULL
00296     int* refcount;
00297 
00298     //! helper fields used in locateROI and adjustROI
00299     uchar* datastart;
00300     const uchar* dataend;
00301 
00302     //! allocator
00303     Allocator* allocator;
00304 };
00305 
00306 /** @brief Creates a continuous matrix.
00307 
00308 @param rows Row count.
00309 @param cols Column count.
00310 @param type Type of the matrix.
00311 @param arr Destination matrix. This parameter changes only if it has a proper type and area (
00312 \f$\texttt{rows} \times \texttt{cols}\f$ ).
00313 
00314 Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
00315 end of each row.
00316  */
00317 CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
00318 
00319 /** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
00320 
00321 @param rows Minimum desired number of rows.
00322 @param cols Minimum desired number of columns.
00323 @param type Desired matrix type.
00324 @param arr Destination matrix.
00325 
00326 The function does not reallocate memory if the matrix has proper attributes already.
00327  */
00328 CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
00329 
00330 //! BufferPool management (must be called before Stream creation)
00331 CV_EXPORTS void setBufferPoolUsage(bool on);
00332 CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
00333 
00334 //===================================================================================
00335 // HostMem
00336 //===================================================================================
00337 
00338 /** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
00339 
00340 Its interface is also Mat-like but with additional memory type parameters.
00341 
00342 -   **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
00343     uploading/downloading data from/to GPU.
00344 -   **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
00345     address space, if supported.
00346 -   **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
00347     used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
00348     utilization.
00349 
00350 @note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
00351 Pinned Memory APIs* document or *CUDA C Programming Guide*.
00352  */
00353 class CV_EXPORTS HostMem
00354 {
00355 public:
00356     enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
00357 
00358     static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED);
00359 
00360     explicit HostMem(AllocType alloc_type = PAGE_LOCKED);
00361 
00362     HostMem(const HostMem& m);
00363 
00364     HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
00365     HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
00366 
00367     //! creates from host memory with coping data
00368     explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
00369 
00370     ~HostMem();
00371 
00372     HostMem& operator =(const HostMem& m);
00373 
00374     //! swaps with other smart pointer
00375     void swap(HostMem& b);
00376 
00377     //! returns deep copy of the matrix, i.e. the data is copied
00378     HostMem clone() const;
00379 
00380     //! allocates new matrix data unless the matrix already has specified size and type.
00381     void create(int rows, int cols, int type);
00382     void create(Size size, int type);
00383 
00384     //! creates alternative HostMem header for the same data, with different
00385     //! number of channels and/or different number of rows
00386     HostMem reshape(int cn, int rows = 0) const;
00387 
00388     //! decrements reference counter and released memory if needed.
00389     void release();
00390 
00391     //! returns matrix header with disabled reference counting for HostMem data.
00392     Mat createMatHeader() const;
00393 
00394     /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
00395     for it.
00396 
00397     This can be done only if memory was allocated with the SHARED flag and if it is supported by the
00398     hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
00399     eliminates an extra copy.
00400      */
00401     GpuMat createGpuMatHeader() const;
00402 
00403     // Please see cv::Mat for descriptions
00404     bool isContinuous() const;
00405     size_t elemSize() const;
00406     size_t elemSize1() const;
00407     int type() const;
00408     int depth() const;
00409     int channels() const;
00410     size_t step1() const;
00411     Size size() const;
00412     bool empty() const;
00413 
00414     // Please see cv::Mat for descriptions
00415     int flags;
00416     int rows, cols;
00417     size_t step;
00418 
00419     uchar* data;
00420     int* refcount;
00421 
00422     uchar* datastart;
00423     const uchar* dataend;
00424 
00425     AllocType alloc_type;
00426 };
00427 
00428 /** @brief Page-locks the memory of matrix and maps it for the device(s).
00429 
00430 @param m Input matrix.
00431  */
00432 CV_EXPORTS void registerPageLocked(Mat& m);
00433 
00434 /** @brief Unmaps the memory of matrix and makes it pageable again.
00435 
00436 @param m Input matrix.
00437  */
00438 CV_EXPORTS void unregisterPageLocked(Mat& m);
00439 
00440 //===================================================================================
00441 // Stream
00442 //===================================================================================
00443 
00444 /** @brief This class encapsulates a queue of asynchronous calls.
00445 
00446 @note Currently, you may face problems if an operation is enqueued twice with different data. Some
00447 functions use the constant GPU memory, and next call may update the memory before the previous one
00448 has been finished. But calling different operations asynchronously is safe because each operation
00449 has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
00450 also safe.
00451 
00452 @note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads.
00453 
00454 @code
00455 void thread1()
00456 {
00457     cv::cuda::Stream stream1;
00458     cv::cuda::func1(..., stream1);
00459 }
00460 
00461 void thread2()
00462 {
00463     cv::cuda::Stream stream2;
00464     cv::cuda::func2(..., stream2);
00465 }
00466 @endcode
00467 
00468 @note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user.
00469 In multi-threading environment the stream objects must be passed explicitly (see previous note).
00470  */
00471 class CV_EXPORTS Stream
00472 {
00473     typedef void (Stream::*bool_type)() const;
00474     void this_type_does_not_support_comparisons() const {}
00475 
00476 public:
00477     typedef void (*StreamCallback)(int status, void* userData);
00478 
00479     //! creates a new asynchronous stream
00480     Stream();
00481 
00482     /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
00483     */
00484     bool queryIfComplete() const;
00485 
00486     /** @brief Blocks the current CPU thread until all operations in the stream are complete.
00487     */
00488     void waitForCompletion();
00489 
00490     /** @brief Makes a compute stream wait on an event.
00491     */
00492     void waitEvent(const Event& event);
00493 
00494     /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
00495     completed.
00496 
00497     @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
00498     that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
00499     Callbacks without a mandated order (in independent streams) execute in undefined order and may be
00500     serialized.
00501      */
00502     void enqueueHostCallback(StreamCallback callback, void* userData);
00503 
00504     //! return Stream object for default CUDA stream
00505     static Stream& Null();
00506 
00507     //! returns true if stream object is not default (!= 0)
00508     operator bool_type() const;
00509 
00510     class Impl;
00511 
00512 private:
00513     Ptr<Impl>  impl_;
00514     Stream(const Ptr<Impl> & impl);
00515 
00516     friend struct StreamAccessor;
00517     friend class BufferPool;
00518     friend class DefaultDeviceInitializer;
00519 };
00520 
00521 class CV_EXPORTS Event
00522 {
00523 public:
00524     enum CreateFlags
00525     {
00526         DEFAULT        = 0x00,  /**< Default event flag */
00527         BLOCKING_SYNC  = 0x01,  /**< Event uses blocking synchronization */
00528         DISABLE_TIMING = 0x02,  /**< Event will not record timing data */
00529         INTERPROCESS   = 0x04   /**< Event is suitable for interprocess use. DisableTiming must be set */
00530     };
00531 
00532     explicit Event(CreateFlags flags = DEFAULT);
00533 
00534     //! records an event
00535     void record(Stream& stream = Stream::Null());
00536 
00537     //! queries an event's status
00538     bool queryIfComplete() const;
00539 
00540     //! waits for an event to complete
00541     void waitForCompletion();
00542 
00543     //! computes the elapsed time between events
00544     static float elapsedTime(const Event& start, const Event& end);
00545 
00546     class Impl;
00547 
00548 private:
00549     Ptr<Impl>  impl_;
00550     Event(const Ptr<Impl> & impl);
00551 
00552     friend struct EventAccessor;
00553 };
00554 
00555 //! @} cudacore_struct
00556 
00557 //===================================================================================
00558 // Initialization & Info
00559 //===================================================================================
00560 
00561 //! @addtogroup cudacore_init
00562 //! @{
00563 
00564 /** @brief Returns the number of installed CUDA-enabled devices.
00565 
00566 Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
00567 this function returns 0.
00568  */
00569 CV_EXPORTS int getCudaEnabledDeviceCount();
00570 
00571 /** @brief Sets a device and initializes it for the current thread.
00572 
00573 @param device System index of a CUDA device starting with 0.
00574 
00575 If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
00576  */
00577 CV_EXPORTS void setDevice(int device);
00578 
00579 /** @brief Returns the current device index set by cuda::setDevice or initialized by default.
00580  */
00581 CV_EXPORTS int getDevice();
00582 
00583 /** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
00584 process.
00585 
00586 Any subsequent API call to this device will reinitialize the device.
00587  */
00588 CV_EXPORTS void resetDevice();
00589 
00590 /** @brief Enumeration providing CUDA computing features.
00591  */
00592 enum FeatureSet
00593 {
00594     FEATURE_SET_COMPUTE_10 = 10,
00595     FEATURE_SET_COMPUTE_11 = 11,
00596     FEATURE_SET_COMPUTE_12 = 12,
00597     FEATURE_SET_COMPUTE_13 = 13,
00598     FEATURE_SET_COMPUTE_20 = 20,
00599     FEATURE_SET_COMPUTE_21 = 21,
00600     FEATURE_SET_COMPUTE_30 = 30,
00601     FEATURE_SET_COMPUTE_32 = 32,
00602     FEATURE_SET_COMPUTE_35 = 35,
00603     FEATURE_SET_COMPUTE_50 = 50,
00604 
00605     GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
00606     SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
00607     NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
00608     WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
00609     DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
00610 };
00611 
00612 //! checks whether current device supports the given feature
00613 CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
00614 
00615 /** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
00616 built for.
00617 
00618 According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
00619 capability can always be compiled to binary code of greater or equal compute capability".
00620  */
00621 class CV_EXPORTS TargetArchs
00622 {
00623 public:
00624     /** @brief The following method checks whether the module was built with the support of the given feature:
00625 
00626     @param feature_set Features to be checked. See :ocvcuda::FeatureSet.
00627      */
00628     static bool builtWith(FeatureSet feature_set);
00629 
00630     /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
00631     code for the given architecture(s):
00632 
00633     @param major Major compute capability version.
00634     @param minor Minor compute capability version.
00635      */
00636     static bool has(int major, int minor);
00637     static bool hasPtx(int major, int minor);
00638     static bool hasBin(int major, int minor);
00639 
00640     static bool hasEqualOrLessPtx(int major, int minor);
00641     static bool hasEqualOrGreater(int major, int minor);
00642     static bool hasEqualOrGreaterPtx(int major, int minor);
00643     static bool hasEqualOrGreaterBin(int major, int minor);
00644 };
00645 
00646 /** @brief Class providing functionality for querying the specified GPU properties.
00647  */
00648 class CV_EXPORTS DeviceInfo
00649 {
00650 public:
00651     //! creates DeviceInfo object for the current GPU
00652     DeviceInfo();
00653 
00654     /** @brief The constructors.
00655 
00656     @param device_id System index of the CUDA device starting with 0.
00657 
00658     Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
00659     constructs an object for the current device.
00660      */
00661     DeviceInfo(int device_id);
00662 
00663     /** @brief Returns system index of the CUDA device starting with 0.
00664     */
00665     int deviceID() const;
00666 
00667     //! ASCII string identifying device
00668     const char* name() const;
00669 
00670     //! global memory available on device in bytes
00671     size_t totalGlobalMem() const;
00672 
00673     //! shared memory available per block in bytes
00674     size_t sharedMemPerBlock() const;
00675 
00676     //! 32-bit registers available per block
00677     int regsPerBlock() const;
00678 
00679     //! warp size in threads
00680     int warpSize() const;
00681 
00682     //! maximum pitch in bytes allowed by memory copies
00683     size_t memPitch() const;
00684 
00685     //! maximum number of threads per block
00686     int maxThreadsPerBlock() const;
00687 
00688     //! maximum size of each dimension of a block
00689     Vec3i maxThreadsDim() const;
00690 
00691     //! maximum size of each dimension of a grid
00692     Vec3i maxGridSize() const;
00693 
00694     //! clock frequency in kilohertz
00695     int clockRate() const;
00696 
00697     //! constant memory available on device in bytes
00698     size_t totalConstMem() const;
00699 
00700     //! major compute capability
00701     int majorVersion() const;
00702 
00703     //! minor compute capability
00704     int minorVersion() const;
00705 
00706     //! alignment requirement for textures
00707     size_t textureAlignment() const;
00708 
00709     //! pitch alignment requirement for texture references bound to pitched memory
00710     size_t texturePitchAlignment() const;
00711 
00712     //! number of multiprocessors on device
00713     int multiProcessorCount() const;
00714 
00715     //! specified whether there is a run time limit on kernels
00716     bool kernelExecTimeoutEnabled() const;
00717 
00718     //! device is integrated as opposed to discrete
00719     bool integrated() const;
00720 
00721     //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
00722     bool canMapHostMemory() const;
00723 
00724     enum ComputeMode 
00725     {
00726         ComputeModeDefault,         /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
00727         ComputeModeExclusive,       /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
00728         ComputeModeProhibited,      /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
00729         ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
00730     };
00731 
00732     //! compute mode
00733     ComputeMode computeMode() const;
00734 
00735     //! maximum 1D texture size
00736     int maxTexture1D() const;
00737 
00738     //! maximum 1D mipmapped texture size
00739     int maxTexture1DMipmap() const;
00740 
00741     //! maximum size for 1D textures bound to linear memory
00742     int maxTexture1DLinear() const;
00743 
00744     //! maximum 2D texture dimensions
00745     Vec2i maxTexture2D() const;
00746 
00747     //! maximum 2D mipmapped texture dimensions
00748     Vec2i maxTexture2DMipmap() const;
00749 
00750     //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
00751     Vec3i maxTexture2DLinear() const;
00752 
00753     //! maximum 2D texture dimensions if texture gather operations have to be performed
00754     Vec2i maxTexture2DGather() const;
00755 
00756     //! maximum 3D texture dimensions
00757     Vec3i maxTexture3D() const;
00758 
00759     //! maximum Cubemap texture dimensions
00760     int maxTextureCubemap() const;
00761 
00762     //! maximum 1D layered texture dimensions
00763     Vec2i maxTexture1DLayered() const;
00764 
00765     //! maximum 2D layered texture dimensions
00766     Vec3i maxTexture2DLayered() const;
00767 
00768     //! maximum Cubemap layered texture dimensions
00769     Vec2i maxTextureCubemapLayered() const;
00770 
00771     //! maximum 1D surface size
00772     int maxSurface1D() const;
00773 
00774     //! maximum 2D surface dimensions
00775     Vec2i maxSurface2D() const;
00776 
00777     //! maximum 3D surface dimensions
00778     Vec3i maxSurface3D() const;
00779 
00780     //! maximum 1D layered surface dimensions
00781     Vec2i maxSurface1DLayered() const;
00782 
00783     //! maximum 2D layered surface dimensions
00784     Vec3i maxSurface2DLayered() const;
00785 
00786     //! maximum Cubemap surface dimensions
00787     int maxSurfaceCubemap() const;
00788 
00789     //! maximum Cubemap layered surface dimensions
00790     Vec2i maxSurfaceCubemapLayered() const;
00791 
00792     //! alignment requirements for surfaces
00793     size_t surfaceAlignment() const;
00794 
00795     //! device can possibly execute multiple kernels concurrently
00796     bool concurrentKernels() const;
00797 
00798     //! device has ECC support enabled
00799     bool ECCEnabled() const;
00800 
00801     //! PCI bus ID of the device
00802     int pciBusID() const;
00803 
00804     //! PCI device ID of the device
00805     int pciDeviceID() const;
00806 
00807     //! PCI domain ID of the device
00808     int pciDomainID() const;
00809 
00810     //! true if device is a Tesla device using TCC driver, false otherwise
00811     bool tccDriver() const;
00812 
00813     //! number of asynchronous engines
00814     int asyncEngineCount() const;
00815 
00816     //! device shares a unified address space with the host
00817     bool unifiedAddressing() const;
00818 
00819     //! peak memory clock frequency in kilohertz
00820     int memoryClockRate() const;
00821 
00822     //! global memory bus width in bits
00823     int memoryBusWidth() const;
00824 
00825     //! size of L2 cache in bytes
00826     int l2CacheSize() const;
00827 
00828     //! maximum resident threads per multiprocessor
00829     int maxThreadsPerMultiProcessor() const;
00830 
00831     //! gets free and total device memory
00832     void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
00833     size_t freeMemory() const;
00834     size_t totalMemory() const;
00835 
00836     /** @brief Provides information on CUDA feature support.
00837 
00838     @param feature_set Features to be checked. See cuda::FeatureSet.
00839 
00840     This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
00841      */
00842     bool supports(FeatureSet feature_set) const;
00843 
00844     /** @brief Checks the CUDA module and device compatibility.
00845 
00846     This function returns true if the CUDA module can be run on the specified device. Otherwise, it
00847     returns false .
00848      */
00849     bool isCompatible() const;
00850 
00851 private:
00852     int device_id_;
00853 };
00854 
00855 CV_EXPORTS void printCudaDeviceInfo(int device);
00856 CV_EXPORTS void printShortCudaDeviceInfo(int device);
00857 
00858 /** @brief Converts an array to half precision floating number.
00859 
00860 @param _src input array.
00861 @param _dst output array.
00862 @param stream Stream for the asynchronous version.
00863 @sa convertFp16
00864 */
00865 CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
00866 
00867 //! @} cudacore_init
00868 
00869 }} // namespace cv { namespace cuda {
00870 
00871 
00872 #include "opencv2/core/cuda.inl.hpp"
00873 
00874 #endif /* OPENCV_CORE_CUDA_HPP */