openCV library for Renesas RZ/A
Dependents: RZ_A2M_Mbed_samples
Diff: include/opencv2/core/cuda.hpp
- Revision:
- 0:0e0631af0305
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/include/opencv2/core/cuda.hpp Fri Jan 29 04:53:38 2021 +0000
@@ -0,0 +1,874 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+// By downloading, copying, installing or using the software you agree to this license.
+// If you do not agree to this license, do not download, install,
+// copy or use the software.
+//
+//
+// License Agreement
+// For Open Source Computer Vision Library
+//
+// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
+// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
+// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+// * Redistribution's of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+//
+// * Redistribution's in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+//
+// * The name of the copyright holders may not be used to endorse or promote products
+// derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors "as is" and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+
+#ifndef OPENCV_CORE_CUDA_HPP
+#define OPENCV_CORE_CUDA_HPP
+
+#ifndef __cplusplus
+# error cuda.hpp header must be compiled as C++
+#endif
+
+#include "opencv2/core.hpp"
+#include "opencv2/core/cuda_types.hpp"
+
+/**
+ @defgroup cuda CUDA-accelerated Computer Vision
+ @{
+ @defgroup cudacore Core part
+ @{
+ @defgroup cudacore_init Initalization and Information
+ @defgroup cudacore_struct Data Structures
+ @}
+ @}
+ */
+
+namespace cv { namespace cuda {
+
+//! @addtogroup cudacore_struct
+//! @{
+
+//===================================================================================
+// GpuMat
+//===================================================================================
+
+/** @brief Base storage class for GPU memory with reference counting.
+
+Its interface matches the Mat interface with the following limitations:
+
+- no arbitrary dimensions support (only 2D)
+- no functions that return references to their data (because references on GPU are not valid for
+ CPU)
+- no expression templates technique support
+
+Beware that the latter limitation may lead to overloaded matrix operators that cause memory
+allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
+passed directly to the kernel.
+
+@note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
+aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
+
+@note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
+on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
+release function returns error if the CUDA context has been destroyed before.
+
+@sa Mat
+ */
+class CV_EXPORTS GpuMat
+{
+public:
+ class CV_EXPORTS Allocator
+ {
+ public:
+ virtual ~Allocator() {}
+
+ // allocator must fill data, step and refcount fields
+ virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
+ virtual void free(GpuMat* mat) = 0;
+ };
+
+ //! default allocator
+ static Allocator* defaultAllocator();
+ static void setDefaultAllocator(Allocator* allocator);
+
+ //! default constructor
+ explicit GpuMat(Allocator* allocator = defaultAllocator());
+
+ //! constructs GpuMat of the specified size and type
+ GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
+ GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
+
+ //! constucts GpuMat and fills it with the specified value _s
+ GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
+ GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
+
+ //! copy constructor
+ GpuMat(const GpuMat& m);
+
+ //! constructor for GpuMat headers pointing to user-allocated data
+ GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
+ GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
+
+ //! creates a GpuMat header for a part of the bigger matrix
+ GpuMat(const GpuMat& m, Range rowRange, Range colRange);
+ GpuMat(const GpuMat& m, Rect roi);
+
+ //! builds GpuMat from host memory (Blocking call)
+ explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
+
+ //! destructor - calls release()
+ ~GpuMat();
+
+ //! assignment operators
+ GpuMat& operator =(const GpuMat& m);
+
+ //! allocates new GpuMat data unless the GpuMat already has specified size and type
+ void create(int rows, int cols, int type);
+ void create(Size size, int type);
+
+ //! decreases reference counter, deallocate the data when reference counter reaches 0
+ void release();
+
+ //! swaps with other smart pointer
+ void swap(GpuMat& mat);
+
+ //! pefroms upload data to GpuMat (Blocking call)
+ void upload(InputArray arr);
+
+ //! pefroms upload data to GpuMat (Non-Blocking call)
+ void upload(InputArray arr, Stream& stream);
+
+ //! pefroms download data from device to host memory (Blocking call)
+ void download(OutputArray dst) const;
+
+ //! pefroms download data from device to host memory (Non-Blocking call)
+ void download(OutputArray dst, Stream& stream) const;
+
+ //! returns deep copy of the GpuMat, i.e. the data is copied
+ GpuMat clone() const;
+
+ //! copies the GpuMat content to device memory (Blocking call)
+ void copyTo(OutputArray dst) const;
+
+ //! copies the GpuMat content to device memory (Non-Blocking call)
+ void copyTo(OutputArray dst, Stream& stream) const;
+
+ //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
+ void copyTo(OutputArray dst, InputArray mask) const;
+
+ //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
+ void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
+
+ //! sets some of the GpuMat elements to s (Blocking call)
+ GpuMat& setTo(Scalar s);
+
+ //! sets some of the GpuMat elements to s (Non-Blocking call)
+ GpuMat& setTo(Scalar s, Stream& stream);
+
+ //! sets some of the GpuMat elements to s, according to the mask (Blocking call)
+ GpuMat& setTo(Scalar s, InputArray mask);
+
+ //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
+ GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
+
+ //! converts GpuMat to another datatype (Blocking call)
+ void convertTo(OutputArray dst, int rtype) const;
+
+ //! converts GpuMat to another datatype (Non-Blocking call)
+ void convertTo(OutputArray dst, int rtype, Stream& stream) const;
+
+ //! converts GpuMat to another datatype with scaling (Blocking call)
+ void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
+
+ //! converts GpuMat to another datatype with scaling (Non-Blocking call)
+ void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
+
+ //! converts GpuMat to another datatype with scaling (Non-Blocking call)
+ void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
+
+ void assignTo(GpuMat& m, int type=-1) const;
+
+ //! returns pointer to y-th row
+ uchar* ptr(int y = 0);
+ const uchar* ptr(int y = 0) const;
+
+ //! template version of the above method
+ template<typename _Tp> _Tp* ptr(int y = 0);
+ template<typename _Tp> const _Tp* ptr(int y = 0) const;
+
+ template <typename _Tp> operator PtrStepSz<_Tp>() const;
+ template <typename _Tp> operator PtrStep<_Tp>() const;
+
+ //! returns a new GpuMat header for the specified row
+ GpuMat row(int y) const;
+
+ //! returns a new GpuMat header for the specified column
+ GpuMat col(int x) const;
+
+ //! ... for the specified row span
+ GpuMat rowRange(int startrow, int endrow) const;
+ GpuMat rowRange(Range r) const;
+
+ //! ... for the specified column span
+ GpuMat colRange(int startcol, int endcol) const;
+ GpuMat colRange(Range r) const;
+
+ //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
+ GpuMat operator ()(Range rowRange, Range colRange) const;
+ GpuMat operator ()(Rect roi) const;
+
+ //! creates alternative GpuMat header for the same data, with different
+ //! number of channels and/or different number of rows
+ GpuMat reshape(int cn, int rows = 0) const;
+
+ //! locates GpuMat header within a parent GpuMat
+ void locateROI(Size& wholeSize, Point& ofs) const;
+
+ //! moves/resizes the current GpuMat ROI inside the parent GpuMat
+ GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
+
+ //! returns true iff the GpuMat data is continuous
+ //! (i.e. when there are no gaps between successive rows)
+ bool isContinuous() const;
+
+ //! returns element size in bytes
+ size_t elemSize() const;
+
+ //! returns the size of element channel in bytes
+ size_t elemSize1() const;
+
+ //! returns element type
+ int type() const;
+
+ //! returns element type
+ int depth() const;
+
+ //! returns number of channels
+ int channels() const;
+
+ //! returns step/elemSize1()
+ size_t step1() const;
+
+ //! returns GpuMat size : width == number of columns, height == number of rows
+ Size size() const;
+
+ //! returns true if GpuMat data is NULL
+ bool empty() const;
+
+ /*! includes several bit-fields:
+ - the magic signature
+ - continuity flag
+ - depth
+ - number of channels
+ */
+ int flags;
+
+ //! the number of rows and columns
+ int rows, cols;
+
+ //! a distance between successive rows in bytes; includes the gap if any
+ size_t step;
+
+ //! pointer to the data
+ uchar* data;
+
+ //! pointer to the reference counter;
+ //! when GpuMat points to user-allocated data, the pointer is NULL
+ int* refcount;
+
+ //! helper fields used in locateROI and adjustROI
+ uchar* datastart;
+ const uchar* dataend;
+
+ //! allocator
+ Allocator* allocator;
+};
+
+/** @brief Creates a continuous matrix.
+
+@param rows Row count.
+@param cols Column count.
+@param type Type of the matrix.
+@param arr Destination matrix. This parameter changes only if it has a proper type and area (
+\f$\texttt{rows} \times \texttt{cols}\f$ ).
+
+Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
+end of each row.
+ */
+CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
+
+/** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
+
+@param rows Minimum desired number of rows.
+@param cols Minimum desired number of columns.
+@param type Desired matrix type.
+@param arr Destination matrix.
+
+The function does not reallocate memory if the matrix has proper attributes already.
+ */
+CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
+
+//! BufferPool management (must be called before Stream creation)
+CV_EXPORTS void setBufferPoolUsage(bool on);
+CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
+
+//===================================================================================
+// HostMem
+//===================================================================================
+
+/** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
+
+Its interface is also Mat-like but with additional memory type parameters.
+
+- **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
+ uploading/downloading data from/to GPU.
+- **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
+ address space, if supported.
+- **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
+ used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
+ utilization.
+
+@note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
+Pinned Memory APIs* document or *CUDA C Programming Guide*.
+ */
+class CV_EXPORTS HostMem
+{
+public:
+ enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
+
+ static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED);
+
+ explicit HostMem(AllocType alloc_type = PAGE_LOCKED);
+
+ HostMem(const HostMem& m);
+
+ HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
+ HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
+
+ //! creates from host memory with coping data
+ explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
+
+ ~HostMem();
+
+ HostMem& operator =(const HostMem& m);
+
+ //! swaps with other smart pointer
+ void swap(HostMem& b);
+
+ //! returns deep copy of the matrix, i.e. the data is copied
+ HostMem clone() const;
+
+ //! allocates new matrix data unless the matrix already has specified size and type.
+ void create(int rows, int cols, int type);
+ void create(Size size, int type);
+
+ //! creates alternative HostMem header for the same data, with different
+ //! number of channels and/or different number of rows
+ HostMem reshape(int cn, int rows = 0) const;
+
+ //! decrements reference counter and released memory if needed.
+ void release();
+
+ //! returns matrix header with disabled reference counting for HostMem data.
+ Mat createMatHeader() const;
+
+ /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
+ for it.
+
+ This can be done only if memory was allocated with the SHARED flag and if it is supported by the
+ hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
+ eliminates an extra copy.
+ */
+ GpuMat createGpuMatHeader() const;
+
+ // Please see cv::Mat for descriptions
+ bool isContinuous() const;
+ size_t elemSize() const;
+ size_t elemSize1() const;
+ int type() const;
+ int depth() const;
+ int channels() const;
+ size_t step1() const;
+ Size size() const;
+ bool empty() const;
+
+ // Please see cv::Mat for descriptions
+ int flags;
+ int rows, cols;
+ size_t step;
+
+ uchar* data;
+ int* refcount;
+
+ uchar* datastart;
+ const uchar* dataend;
+
+ AllocType alloc_type;
+};
+
+/** @brief Page-locks the memory of matrix and maps it for the device(s).
+
+@param m Input matrix.
+ */
+CV_EXPORTS void registerPageLocked(Mat& m);
+
+/** @brief Unmaps the memory of matrix and makes it pageable again.
+
+@param m Input matrix.
+ */
+CV_EXPORTS void unregisterPageLocked(Mat& m);
+
+//===================================================================================
+// Stream
+//===================================================================================
+
+/** @brief This class encapsulates a queue of asynchronous calls.
+
+@note Currently, you may face problems if an operation is enqueued twice with different data. Some
+functions use the constant GPU memory, and next call may update the memory before the previous one
+has been finished. But calling different operations asynchronously is safe because each operation
+has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
+also safe.
+
+@note The Stream class is not thread-safe. Please use different Stream objects for different CPU threads.
+
+@code
+void thread1()
+{
+ cv::cuda::Stream stream1;
+ cv::cuda::func1(..., stream1);
+}
+
+void thread2()
+{
+ cv::cuda::Stream stream2;
+ cv::cuda::func2(..., stream2);
+}
+@endcode
+
+@note By default all CUDA routines are launched in Stream::Null() object, if the stream is not specified by user.
+In multi-threading environment the stream objects must be passed explicitly (see previous note).
+ */
+class CV_EXPORTS Stream
+{
+ typedef void (Stream::*bool_type)() const;
+ void this_type_does_not_support_comparisons() const {}
+
+public:
+ typedef void (*StreamCallback)(int status, void* userData);
+
+ //! creates a new asynchronous stream
+ Stream();
+
+ /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
+ */
+ bool queryIfComplete() const;
+
+ /** @brief Blocks the current CPU thread until all operations in the stream are complete.
+ */
+ void waitForCompletion();
+
+ /** @brief Makes a compute stream wait on an event.
+ */
+ void waitEvent(const Event& event);
+
+ /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
+ completed.
+
+ @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
+ that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
+ Callbacks without a mandated order (in independent streams) execute in undefined order and may be
+ serialized.
+ */
+ void enqueueHostCallback(StreamCallback callback, void* userData);
+
+ //! return Stream object for default CUDA stream
+ static Stream& Null();
+
+ //! returns true if stream object is not default (!= 0)
+ operator bool_type() const;
+
+ class Impl;
+
+private:
+ Ptr<Impl> impl_;
+ Stream(const Ptr<Impl>& impl);
+
+ friend struct StreamAccessor;
+ friend class BufferPool;
+ friend class DefaultDeviceInitializer;
+};
+
+class CV_EXPORTS Event
+{
+public:
+ enum CreateFlags
+ {
+ DEFAULT = 0x00, /**< Default event flag */
+ BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */
+ DISABLE_TIMING = 0x02, /**< Event will not record timing data */
+ INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */
+ };
+
+ explicit Event(CreateFlags flags = DEFAULT);
+
+ //! records an event
+ void record(Stream& stream = Stream::Null());
+
+ //! queries an event's status
+ bool queryIfComplete() const;
+
+ //! waits for an event to complete
+ void waitForCompletion();
+
+ //! computes the elapsed time between events
+ static float elapsedTime(const Event& start, const Event& end);
+
+ class Impl;
+
+private:
+ Ptr<Impl> impl_;
+ Event(const Ptr<Impl>& impl);
+
+ friend struct EventAccessor;
+};
+
+//! @} cudacore_struct
+
+//===================================================================================
+// Initialization & Info
+//===================================================================================
+
+//! @addtogroup cudacore_init
+//! @{
+
+/** @brief Returns the number of installed CUDA-enabled devices.
+
+Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
+this function returns 0.
+ */
+CV_EXPORTS int getCudaEnabledDeviceCount();
+
+/** @brief Sets a device and initializes it for the current thread.
+
+@param device System index of a CUDA device starting with 0.
+
+If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
+ */
+CV_EXPORTS void setDevice(int device);
+
+/** @brief Returns the current device index set by cuda::setDevice or initialized by default.
+ */
+CV_EXPORTS int getDevice();
+
+/** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
+process.
+
+Any subsequent API call to this device will reinitialize the device.
+ */
+CV_EXPORTS void resetDevice();
+
+/** @brief Enumeration providing CUDA computing features.
+ */
+enum FeatureSet
+{
+ FEATURE_SET_COMPUTE_10 = 10,
+ FEATURE_SET_COMPUTE_11 = 11,
+ FEATURE_SET_COMPUTE_12 = 12,
+ FEATURE_SET_COMPUTE_13 = 13,
+ FEATURE_SET_COMPUTE_20 = 20,
+ FEATURE_SET_COMPUTE_21 = 21,
+ FEATURE_SET_COMPUTE_30 = 30,
+ FEATURE_SET_COMPUTE_32 = 32,
+ FEATURE_SET_COMPUTE_35 = 35,
+ FEATURE_SET_COMPUTE_50 = 50,
+
+ GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
+ SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
+ NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
+ WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
+ DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
+};
+
+//! checks whether current device supports the given feature
+CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
+
+/** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
+built for.
+
+According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
+capability can always be compiled to binary code of greater or equal compute capability".
+ */
+class CV_EXPORTS TargetArchs
+{
+public:
+ /** @brief The following method checks whether the module was built with the support of the given feature:
+
+ @param feature_set Features to be checked. See :ocvcuda::FeatureSet.
+ */
+ static bool builtWith(FeatureSet feature_set);
+
+ /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
+ code for the given architecture(s):
+
+ @param major Major compute capability version.
+ @param minor Minor compute capability version.
+ */
+ static bool has(int major, int minor);
+ static bool hasPtx(int major, int minor);
+ static bool hasBin(int major, int minor);
+
+ static bool hasEqualOrLessPtx(int major, int minor);
+ static bool hasEqualOrGreater(int major, int minor);
+ static bool hasEqualOrGreaterPtx(int major, int minor);
+ static bool hasEqualOrGreaterBin(int major, int minor);
+};
+
+/** @brief Class providing functionality for querying the specified GPU properties.
+ */
+class CV_EXPORTS DeviceInfo
+{
+public:
+ //! creates DeviceInfo object for the current GPU
+ DeviceInfo();
+
+ /** @brief The constructors.
+
+ @param device_id System index of the CUDA device starting with 0.
+
+ Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
+ constructs an object for the current device.
+ */
+ DeviceInfo(int device_id);
+
+ /** @brief Returns system index of the CUDA device starting with 0.
+ */
+ int deviceID() const;
+
+ //! ASCII string identifying device
+ const char* name() const;
+
+ //! global memory available on device in bytes
+ size_t totalGlobalMem() const;
+
+ //! shared memory available per block in bytes
+ size_t sharedMemPerBlock() const;
+
+ //! 32-bit registers available per block
+ int regsPerBlock() const;
+
+ //! warp size in threads
+ int warpSize() const;
+
+ //! maximum pitch in bytes allowed by memory copies
+ size_t memPitch() const;
+
+ //! maximum number of threads per block
+ int maxThreadsPerBlock() const;
+
+ //! maximum size of each dimension of a block
+ Vec3i maxThreadsDim() const;
+
+ //! maximum size of each dimension of a grid
+ Vec3i maxGridSize() const;
+
+ //! clock frequency in kilohertz
+ int clockRate() const;
+
+ //! constant memory available on device in bytes
+ size_t totalConstMem() const;
+
+ //! major compute capability
+ int majorVersion() const;
+
+ //! minor compute capability
+ int minorVersion() const;
+
+ //! alignment requirement for textures
+ size_t textureAlignment() const;
+
+ //! pitch alignment requirement for texture references bound to pitched memory
+ size_t texturePitchAlignment() const;
+
+ //! number of multiprocessors on device
+ int multiProcessorCount() const;
+
+ //! specified whether there is a run time limit on kernels
+ bool kernelExecTimeoutEnabled() const;
+
+ //! device is integrated as opposed to discrete
+ bool integrated() const;
+
+ //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
+ bool canMapHostMemory() const;
+
+ enum ComputeMode
+ {
+ ComputeModeDefault, /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
+ ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
+ ComputeModeProhibited, /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
+ ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
+ };
+
+ //! compute mode
+ ComputeMode computeMode() const;
+
+ //! maximum 1D texture size
+ int maxTexture1D() const;
+
+ //! maximum 1D mipmapped texture size
+ int maxTexture1DMipmap() const;
+
+ //! maximum size for 1D textures bound to linear memory
+ int maxTexture1DLinear() const;
+
+ //! maximum 2D texture dimensions
+ Vec2i maxTexture2D() const;
+
+ //! maximum 2D mipmapped texture dimensions
+ Vec2i maxTexture2DMipmap() const;
+
+ //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
+ Vec3i maxTexture2DLinear() const;
+
+ //! maximum 2D texture dimensions if texture gather operations have to be performed
+ Vec2i maxTexture2DGather() const;
+
+ //! maximum 3D texture dimensions
+ Vec3i maxTexture3D() const;
+
+ //! maximum Cubemap texture dimensions
+ int maxTextureCubemap() const;
+
+ //! maximum 1D layered texture dimensions
+ Vec2i maxTexture1DLayered() const;
+
+ //! maximum 2D layered texture dimensions
+ Vec3i maxTexture2DLayered() const;
+
+ //! maximum Cubemap layered texture dimensions
+ Vec2i maxTextureCubemapLayered() const;
+
+ //! maximum 1D surface size
+ int maxSurface1D() const;
+
+ //! maximum 2D surface dimensions
+ Vec2i maxSurface2D() const;
+
+ //! maximum 3D surface dimensions
+ Vec3i maxSurface3D() const;
+
+ //! maximum 1D layered surface dimensions
+ Vec2i maxSurface1DLayered() const;
+
+ //! maximum 2D layered surface dimensions
+ Vec3i maxSurface2DLayered() const;
+
+ //! maximum Cubemap surface dimensions
+ int maxSurfaceCubemap() const;
+
+ //! maximum Cubemap layered surface dimensions
+ Vec2i maxSurfaceCubemapLayered() const;
+
+ //! alignment requirements for surfaces
+ size_t surfaceAlignment() const;
+
+ //! device can possibly execute multiple kernels concurrently
+ bool concurrentKernels() const;
+
+ //! device has ECC support enabled
+ bool ECCEnabled() const;
+
+ //! PCI bus ID of the device
+ int pciBusID() const;
+
+ //! PCI device ID of the device
+ int pciDeviceID() const;
+
+ //! PCI domain ID of the device
+ int pciDomainID() const;
+
+ //! true if device is a Tesla device using TCC driver, false otherwise
+ bool tccDriver() const;
+
+ //! number of asynchronous engines
+ int asyncEngineCount() const;
+
+ //! device shares a unified address space with the host
+ bool unifiedAddressing() const;
+
+ //! peak memory clock frequency in kilohertz
+ int memoryClockRate() const;
+
+ //! global memory bus width in bits
+ int memoryBusWidth() const;
+
+ //! size of L2 cache in bytes
+ int l2CacheSize() const;
+
+ //! maximum resident threads per multiprocessor
+ int maxThreadsPerMultiProcessor() const;
+
+ //! gets free and total device memory
+ void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
+ size_t freeMemory() const;
+ size_t totalMemory() const;
+
+ /** @brief Provides information on CUDA feature support.
+
+ @param feature_set Features to be checked. See cuda::FeatureSet.
+
+ This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
+ */
+ bool supports(FeatureSet feature_set) const;
+
+ /** @brief Checks the CUDA module and device compatibility.
+
+ This function returns true if the CUDA module can be run on the specified device. Otherwise, it
+ returns false .
+ */
+ bool isCompatible() const;
+
+private:
+ int device_id_;
+};
+
+CV_EXPORTS void printCudaDeviceInfo(int device);
+CV_EXPORTS void printShortCudaDeviceInfo(int device);
+
+/** @brief Converts an array to half precision floating number.
+
+@param _src input array.
+@param _dst output array.
+@param stream Stream for the asynchronous version.
+@sa convertFp16
+*/
+CV_EXPORTS void convertFp16(InputArray _src, OutputArray _dst, Stream& stream = Stream::Null());
+
+//! @} cudacore_init
+
+}} // namespace cv { namespace cuda {
+
+
+#include "opencv2/core/cuda.inl.hpp"
+
+#endif /* OPENCV_CORE_CUDA_HPP */