opencv on mbed

Dependencies:   mbed

Committer:
joeverbout
Date:
Thu Mar 31 21:16:38 2016 +0000
Revision:
0:ea44dc9ed014
OpenCV on mbed attempt

Who changed what in which revision?

UserRevisionLine numberNew contents of line
joeverbout 0:ea44dc9ed014 1 /*M///////////////////////////////////////////////////////////////////////////////////////
joeverbout 0:ea44dc9ed014 2 //
joeverbout 0:ea44dc9ed014 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
joeverbout 0:ea44dc9ed014 4 //
joeverbout 0:ea44dc9ed014 5 // By downloading, copying, installing or using the software you agree to this license.
joeverbout 0:ea44dc9ed014 6 // If you do not agree to this license, do not download, install,
joeverbout 0:ea44dc9ed014 7 // copy or use the software.
joeverbout 0:ea44dc9ed014 8 //
joeverbout 0:ea44dc9ed014 9 //
joeverbout 0:ea44dc9ed014 10 // License Agreement
joeverbout 0:ea44dc9ed014 11 // For Open Source Computer Vision Library
joeverbout 0:ea44dc9ed014 12 //
joeverbout 0:ea44dc9ed014 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
joeverbout 0:ea44dc9ed014 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
joeverbout 0:ea44dc9ed014 15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
joeverbout 0:ea44dc9ed014 16 // Third party copyrights are property of their respective owners.
joeverbout 0:ea44dc9ed014 17 //
joeverbout 0:ea44dc9ed014 18 // Redistribution and use in source and binary forms, with or without modification,
joeverbout 0:ea44dc9ed014 19 // are permitted provided that the following conditions are met:
joeverbout 0:ea44dc9ed014 20 //
joeverbout 0:ea44dc9ed014 21 // * Redistribution's of source code must retain the above copyright notice,
joeverbout 0:ea44dc9ed014 22 // this list of conditions and the following disclaimer.
joeverbout 0:ea44dc9ed014 23 //
joeverbout 0:ea44dc9ed014 24 // * Redistribution's in binary form must reproduce the above copyright notice,
joeverbout 0:ea44dc9ed014 25 // this list of conditions and the following disclaimer in the documentation
joeverbout 0:ea44dc9ed014 26 // and/or other materials provided with the distribution.
joeverbout 0:ea44dc9ed014 27 //
joeverbout 0:ea44dc9ed014 28 // * The name of the copyright holders may not be used to endorse or promote products
joeverbout 0:ea44dc9ed014 29 // derived from this software without specific prior written permission.
joeverbout 0:ea44dc9ed014 30 //
joeverbout 0:ea44dc9ed014 31 // This software is provided by the copyright holders and contributors "as is" and
joeverbout 0:ea44dc9ed014 32 // any express or implied warranties, including, but not limited to, the implied
joeverbout 0:ea44dc9ed014 33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
joeverbout 0:ea44dc9ed014 34 // In no event shall the Intel Corporation or contributors be liable for any direct,
joeverbout 0:ea44dc9ed014 35 // indirect, incidental, special, exemplary, or consequential damages
joeverbout 0:ea44dc9ed014 36 // (including, but not limited to, procurement of substitute goods or services;
joeverbout 0:ea44dc9ed014 37 // loss of use, data, or profits; or business interruption) however caused
joeverbout 0:ea44dc9ed014 38 // and on any theory of liability, whether in contract, strict liability,
joeverbout 0:ea44dc9ed014 39 // or tort (including negligence or otherwise) arising in any way out of
joeverbout 0:ea44dc9ed014 40 // the use of this software, even if advised of the possibility of such damage.
joeverbout 0:ea44dc9ed014 41 //
joeverbout 0:ea44dc9ed014 42 //M*/
joeverbout 0:ea44dc9ed014 43
joeverbout 0:ea44dc9ed014 44 #ifndef __OPENCV_CORE_CUDA_HPP__
joeverbout 0:ea44dc9ed014 45 #define __OPENCV_CORE_CUDA_HPP__
joeverbout 0:ea44dc9ed014 46
joeverbout 0:ea44dc9ed014 47 #ifndef __cplusplus
joeverbout 0:ea44dc9ed014 48 # error cuda.hpp header must be compiled as C++
joeverbout 0:ea44dc9ed014 49 #endif
joeverbout 0:ea44dc9ed014 50
joeverbout 0:ea44dc9ed014 51 #include "opencv2/core.hpp"
joeverbout 0:ea44dc9ed014 52 #include "opencv2/core/cuda_types.hpp"
joeverbout 0:ea44dc9ed014 53
joeverbout 0:ea44dc9ed014 54 /**
joeverbout 0:ea44dc9ed014 55 @defgroup cuda CUDA-accelerated Computer Vision
joeverbout 0:ea44dc9ed014 56 @{
joeverbout 0:ea44dc9ed014 57 @defgroup cudacore Core part
joeverbout 0:ea44dc9ed014 58 @{
joeverbout 0:ea44dc9ed014 59 @defgroup cudacore_init Initalization and Information
joeverbout 0:ea44dc9ed014 60 @defgroup cudacore_struct Data Structures
joeverbout 0:ea44dc9ed014 61 @}
joeverbout 0:ea44dc9ed014 62 @}
joeverbout 0:ea44dc9ed014 63 */
joeverbout 0:ea44dc9ed014 64
joeverbout 0:ea44dc9ed014 65 namespace cv { namespace cuda {
joeverbout 0:ea44dc9ed014 66
joeverbout 0:ea44dc9ed014 67 //! @addtogroup cudacore_struct
joeverbout 0:ea44dc9ed014 68 //! @{
joeverbout 0:ea44dc9ed014 69
joeverbout 0:ea44dc9ed014 70 //===================================================================================
joeverbout 0:ea44dc9ed014 71 // GpuMat
joeverbout 0:ea44dc9ed014 72 //===================================================================================
joeverbout 0:ea44dc9ed014 73
joeverbout 0:ea44dc9ed014 74 /** @brief Base storage class for GPU memory with reference counting.
joeverbout 0:ea44dc9ed014 75
joeverbout 0:ea44dc9ed014 76 Its interface matches the Mat interface with the following limitations:
joeverbout 0:ea44dc9ed014 77
joeverbout 0:ea44dc9ed014 78 - no arbitrary dimensions support (only 2D)
joeverbout 0:ea44dc9ed014 79 - no functions that return references to their data (because references on GPU are not valid for
joeverbout 0:ea44dc9ed014 80 CPU)
joeverbout 0:ea44dc9ed014 81 - no expression templates technique support
joeverbout 0:ea44dc9ed014 82
joeverbout 0:ea44dc9ed014 83 Beware that the latter limitation may lead to overloaded matrix operators that cause memory
joeverbout 0:ea44dc9ed014 84 allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be
joeverbout 0:ea44dc9ed014 85 passed directly to the kernel.
joeverbout 0:ea44dc9ed014 86
joeverbout 0:ea44dc9ed014 87 @note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are
joeverbout 0:ea44dc9ed014 88 aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix.
joeverbout 0:ea44dc9ed014 89
joeverbout 0:ea44dc9ed014 90 @note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely
joeverbout 0:ea44dc9ed014 91 on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory
joeverbout 0:ea44dc9ed014 92 release function returns error if the CUDA context has been destroyed before.
joeverbout 0:ea44dc9ed014 93
joeverbout 0:ea44dc9ed014 94 @sa Mat
joeverbout 0:ea44dc9ed014 95 */
joeverbout 0:ea44dc9ed014 96 class CV_EXPORTS GpuMat
joeverbout 0:ea44dc9ed014 97 {
joeverbout 0:ea44dc9ed014 98 public:
joeverbout 0:ea44dc9ed014 99 class CV_EXPORTS Allocator
joeverbout 0:ea44dc9ed014 100 {
joeverbout 0:ea44dc9ed014 101 public:
joeverbout 0:ea44dc9ed014 102 virtual ~Allocator() {}
joeverbout 0:ea44dc9ed014 103
joeverbout 0:ea44dc9ed014 104 // allocator must fill data, step and refcount fields
joeverbout 0:ea44dc9ed014 105 virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0;
joeverbout 0:ea44dc9ed014 106 virtual void free(GpuMat* mat) = 0;
joeverbout 0:ea44dc9ed014 107 };
joeverbout 0:ea44dc9ed014 108
joeverbout 0:ea44dc9ed014 109 //! default allocator
joeverbout 0:ea44dc9ed014 110 static Allocator* defaultAllocator();
joeverbout 0:ea44dc9ed014 111 static void setDefaultAllocator(Allocator* allocator);
joeverbout 0:ea44dc9ed014 112
joeverbout 0:ea44dc9ed014 113 //! default constructor
joeverbout 0:ea44dc9ed014 114 explicit GpuMat(Allocator* allocator = defaultAllocator());
joeverbout 0:ea44dc9ed014 115
joeverbout 0:ea44dc9ed014 116 //! constructs GpuMat of the specified size and type
joeverbout 0:ea44dc9ed014 117 GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator());
joeverbout 0:ea44dc9ed014 118 GpuMat(Size size, int type, Allocator* allocator = defaultAllocator());
joeverbout 0:ea44dc9ed014 119
joeverbout 0:ea44dc9ed014 120 //! constucts GpuMat and fills it with the specified value _s
joeverbout 0:ea44dc9ed014 121 GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator());
joeverbout 0:ea44dc9ed014 122 GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator());
joeverbout 0:ea44dc9ed014 123
joeverbout 0:ea44dc9ed014 124 //! copy constructor
joeverbout 0:ea44dc9ed014 125 GpuMat(const GpuMat& m);
joeverbout 0:ea44dc9ed014 126
joeverbout 0:ea44dc9ed014 127 //! constructor for GpuMat headers pointing to user-allocated data
joeverbout 0:ea44dc9ed014 128 GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
joeverbout 0:ea44dc9ed014 129 GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
joeverbout 0:ea44dc9ed014 130
joeverbout 0:ea44dc9ed014 131 //! creates a GpuMat header for a part of the bigger matrix
joeverbout 0:ea44dc9ed014 132 GpuMat(const GpuMat& m, Range rowRange, Range colRange);
joeverbout 0:ea44dc9ed014 133 GpuMat(const GpuMat& m, Rect roi);
joeverbout 0:ea44dc9ed014 134
joeverbout 0:ea44dc9ed014 135 //! builds GpuMat from host memory (Blocking call)
joeverbout 0:ea44dc9ed014 136 explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator());
joeverbout 0:ea44dc9ed014 137
joeverbout 0:ea44dc9ed014 138 //! destructor - calls release()
joeverbout 0:ea44dc9ed014 139 ~GpuMat();
joeverbout 0:ea44dc9ed014 140
joeverbout 0:ea44dc9ed014 141 //! assignment operators
joeverbout 0:ea44dc9ed014 142 GpuMat& operator =(const GpuMat& m);
joeverbout 0:ea44dc9ed014 143
joeverbout 0:ea44dc9ed014 144 //! allocates new GpuMat data unless the GpuMat already has specified size and type
joeverbout 0:ea44dc9ed014 145 void create(int rows, int cols, int type);
joeverbout 0:ea44dc9ed014 146 void create(Size size, int type);
joeverbout 0:ea44dc9ed014 147
joeverbout 0:ea44dc9ed014 148 //! decreases reference counter, deallocate the data when reference counter reaches 0
joeverbout 0:ea44dc9ed014 149 void release();
joeverbout 0:ea44dc9ed014 150
joeverbout 0:ea44dc9ed014 151 //! swaps with other smart pointer
joeverbout 0:ea44dc9ed014 152 void swap(GpuMat& mat);
joeverbout 0:ea44dc9ed014 153
joeverbout 0:ea44dc9ed014 154 //! pefroms upload data to GpuMat (Blocking call)
joeverbout 0:ea44dc9ed014 155 void upload(InputArray arr);
joeverbout 0:ea44dc9ed014 156
joeverbout 0:ea44dc9ed014 157 //! pefroms upload data to GpuMat (Non-Blocking call)
joeverbout 0:ea44dc9ed014 158 void upload(InputArray arr, Stream& stream);
joeverbout 0:ea44dc9ed014 159
joeverbout 0:ea44dc9ed014 160 //! pefroms download data from device to host memory (Blocking call)
joeverbout 0:ea44dc9ed014 161 void download(OutputArray dst) const;
joeverbout 0:ea44dc9ed014 162
joeverbout 0:ea44dc9ed014 163 //! pefroms download data from device to host memory (Non-Blocking call)
joeverbout 0:ea44dc9ed014 164 void download(OutputArray dst, Stream& stream) const;
joeverbout 0:ea44dc9ed014 165
joeverbout 0:ea44dc9ed014 166 //! returns deep copy of the GpuMat, i.e. the data is copied
joeverbout 0:ea44dc9ed014 167 GpuMat clone() const;
joeverbout 0:ea44dc9ed014 168
joeverbout 0:ea44dc9ed014 169 //! copies the GpuMat content to device memory (Blocking call)
joeverbout 0:ea44dc9ed014 170 void copyTo(OutputArray dst) const;
joeverbout 0:ea44dc9ed014 171
joeverbout 0:ea44dc9ed014 172 //! copies the GpuMat content to device memory (Non-Blocking call)
joeverbout 0:ea44dc9ed014 173 void copyTo(OutputArray dst, Stream& stream) const;
joeverbout 0:ea44dc9ed014 174
joeverbout 0:ea44dc9ed014 175 //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call)
joeverbout 0:ea44dc9ed014 176 void copyTo(OutputArray dst, InputArray mask) const;
joeverbout 0:ea44dc9ed014 177
joeverbout 0:ea44dc9ed014 178 //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call)
joeverbout 0:ea44dc9ed014 179 void copyTo(OutputArray dst, InputArray mask, Stream& stream) const;
joeverbout 0:ea44dc9ed014 180
joeverbout 0:ea44dc9ed014 181 //! sets some of the GpuMat elements to s (Blocking call)
joeverbout 0:ea44dc9ed014 182 GpuMat& setTo(Scalar s);
joeverbout 0:ea44dc9ed014 183
joeverbout 0:ea44dc9ed014 184 //! sets some of the GpuMat elements to s (Non-Blocking call)
joeverbout 0:ea44dc9ed014 185 GpuMat& setTo(Scalar s, Stream& stream);
joeverbout 0:ea44dc9ed014 186
joeverbout 0:ea44dc9ed014 187 //! sets some of the GpuMat elements to s, according to the mask (Blocking call)
joeverbout 0:ea44dc9ed014 188 GpuMat& setTo(Scalar s, InputArray mask);
joeverbout 0:ea44dc9ed014 189
joeverbout 0:ea44dc9ed014 190 //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call)
joeverbout 0:ea44dc9ed014 191 GpuMat& setTo(Scalar s, InputArray mask, Stream& stream);
joeverbout 0:ea44dc9ed014 192
joeverbout 0:ea44dc9ed014 193 //! converts GpuMat to another datatype (Blocking call)
joeverbout 0:ea44dc9ed014 194 void convertTo(OutputArray dst, int rtype) const;
joeverbout 0:ea44dc9ed014 195
joeverbout 0:ea44dc9ed014 196 //! converts GpuMat to another datatype (Non-Blocking call)
joeverbout 0:ea44dc9ed014 197 void convertTo(OutputArray dst, int rtype, Stream& stream) const;
joeverbout 0:ea44dc9ed014 198
joeverbout 0:ea44dc9ed014 199 //! converts GpuMat to another datatype with scaling (Blocking call)
joeverbout 0:ea44dc9ed014 200 void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const;
joeverbout 0:ea44dc9ed014 201
joeverbout 0:ea44dc9ed014 202 //! converts GpuMat to another datatype with scaling (Non-Blocking call)
joeverbout 0:ea44dc9ed014 203 void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const;
joeverbout 0:ea44dc9ed014 204
joeverbout 0:ea44dc9ed014 205 //! converts GpuMat to another datatype with scaling (Non-Blocking call)
joeverbout 0:ea44dc9ed014 206 void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const;
joeverbout 0:ea44dc9ed014 207
joeverbout 0:ea44dc9ed014 208 void assignTo(GpuMat& m, int type=-1) const;
joeverbout 0:ea44dc9ed014 209
joeverbout 0:ea44dc9ed014 210 //! returns pointer to y-th row
joeverbout 0:ea44dc9ed014 211 uchar* ptr(int y = 0);
joeverbout 0:ea44dc9ed014 212 const uchar* ptr(int y = 0) const;
joeverbout 0:ea44dc9ed014 213
joeverbout 0:ea44dc9ed014 214 //! template version of the above method
joeverbout 0:ea44dc9ed014 215 template<typename _Tp> _Tp* ptr(int y = 0);
joeverbout 0:ea44dc9ed014 216 template<typename _Tp> const _Tp* ptr(int y = 0) const;
joeverbout 0:ea44dc9ed014 217
joeverbout 0:ea44dc9ed014 218 template <typename _Tp> operator PtrStepSz<_Tp>() const;
joeverbout 0:ea44dc9ed014 219 template <typename _Tp> operator PtrStep<_Tp>() const;
joeverbout 0:ea44dc9ed014 220
joeverbout 0:ea44dc9ed014 221 //! returns a new GpuMat header for the specified row
joeverbout 0:ea44dc9ed014 222 GpuMat row(int y) const;
joeverbout 0:ea44dc9ed014 223
joeverbout 0:ea44dc9ed014 224 //! returns a new GpuMat header for the specified column
joeverbout 0:ea44dc9ed014 225 GpuMat col(int x) const;
joeverbout 0:ea44dc9ed014 226
joeverbout 0:ea44dc9ed014 227 //! ... for the specified row span
joeverbout 0:ea44dc9ed014 228 GpuMat rowRange(int startrow, int endrow) const;
joeverbout 0:ea44dc9ed014 229 GpuMat rowRange(Range r) const;
joeverbout 0:ea44dc9ed014 230
joeverbout 0:ea44dc9ed014 231 //! ... for the specified column span
joeverbout 0:ea44dc9ed014 232 GpuMat colRange(int startcol, int endcol) const;
joeverbout 0:ea44dc9ed014 233 GpuMat colRange(Range r) const;
joeverbout 0:ea44dc9ed014 234
joeverbout 0:ea44dc9ed014 235 //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
joeverbout 0:ea44dc9ed014 236 GpuMat operator ()(Range rowRange, Range colRange) const;
joeverbout 0:ea44dc9ed014 237 GpuMat operator ()(Rect roi) const;
joeverbout 0:ea44dc9ed014 238
joeverbout 0:ea44dc9ed014 239 //! creates alternative GpuMat header for the same data, with different
joeverbout 0:ea44dc9ed014 240 //! number of channels and/or different number of rows
joeverbout 0:ea44dc9ed014 241 GpuMat reshape(int cn, int rows = 0) const;
joeverbout 0:ea44dc9ed014 242
joeverbout 0:ea44dc9ed014 243 //! locates GpuMat header within a parent GpuMat
joeverbout 0:ea44dc9ed014 244 void locateROI(Size& wholeSize, Point& ofs) const;
joeverbout 0:ea44dc9ed014 245
joeverbout 0:ea44dc9ed014 246 //! moves/resizes the current GpuMat ROI inside the parent GpuMat
joeverbout 0:ea44dc9ed014 247 GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
joeverbout 0:ea44dc9ed014 248
joeverbout 0:ea44dc9ed014 249 //! returns true iff the GpuMat data is continuous
joeverbout 0:ea44dc9ed014 250 //! (i.e. when there are no gaps between successive rows)
joeverbout 0:ea44dc9ed014 251 bool isContinuous() const;
joeverbout 0:ea44dc9ed014 252
joeverbout 0:ea44dc9ed014 253 //! returns element size in bytes
joeverbout 0:ea44dc9ed014 254 size_t elemSize() const;
joeverbout 0:ea44dc9ed014 255
joeverbout 0:ea44dc9ed014 256 //! returns the size of element channel in bytes
joeverbout 0:ea44dc9ed014 257 size_t elemSize1() const;
joeverbout 0:ea44dc9ed014 258
joeverbout 0:ea44dc9ed014 259 //! returns element type
joeverbout 0:ea44dc9ed014 260 int type() const;
joeverbout 0:ea44dc9ed014 261
joeverbout 0:ea44dc9ed014 262 //! returns element type
joeverbout 0:ea44dc9ed014 263 int depth() const;
joeverbout 0:ea44dc9ed014 264
joeverbout 0:ea44dc9ed014 265 //! returns number of channels
joeverbout 0:ea44dc9ed014 266 int channels() const;
joeverbout 0:ea44dc9ed014 267
joeverbout 0:ea44dc9ed014 268 //! returns step/elemSize1()
joeverbout 0:ea44dc9ed014 269 size_t step1() const;
joeverbout 0:ea44dc9ed014 270
joeverbout 0:ea44dc9ed014 271 //! returns GpuMat size : width == number of columns, height == number of rows
joeverbout 0:ea44dc9ed014 272 Size size() const;
joeverbout 0:ea44dc9ed014 273
joeverbout 0:ea44dc9ed014 274 //! returns true if GpuMat data is NULL
joeverbout 0:ea44dc9ed014 275 bool empty() const;
joeverbout 0:ea44dc9ed014 276
joeverbout 0:ea44dc9ed014 277 /*! includes several bit-fields:
joeverbout 0:ea44dc9ed014 278 - the magic signature
joeverbout 0:ea44dc9ed014 279 - continuity flag
joeverbout 0:ea44dc9ed014 280 - depth
joeverbout 0:ea44dc9ed014 281 - number of channels
joeverbout 0:ea44dc9ed014 282 */
joeverbout 0:ea44dc9ed014 283 int flags;
joeverbout 0:ea44dc9ed014 284
joeverbout 0:ea44dc9ed014 285 //! the number of rows and columns
joeverbout 0:ea44dc9ed014 286 int rows, cols;
joeverbout 0:ea44dc9ed014 287
joeverbout 0:ea44dc9ed014 288 //! a distance between successive rows in bytes; includes the gap if any
joeverbout 0:ea44dc9ed014 289 size_t step;
joeverbout 0:ea44dc9ed014 290
joeverbout 0:ea44dc9ed014 291 //! pointer to the data
joeverbout 0:ea44dc9ed014 292 uchar* data;
joeverbout 0:ea44dc9ed014 293
joeverbout 0:ea44dc9ed014 294 //! pointer to the reference counter;
joeverbout 0:ea44dc9ed014 295 //! when GpuMat points to user-allocated data, the pointer is NULL
joeverbout 0:ea44dc9ed014 296 int* refcount;
joeverbout 0:ea44dc9ed014 297
joeverbout 0:ea44dc9ed014 298 //! helper fields used in locateROI and adjustROI
joeverbout 0:ea44dc9ed014 299 uchar* datastart;
joeverbout 0:ea44dc9ed014 300 const uchar* dataend;
joeverbout 0:ea44dc9ed014 301
joeverbout 0:ea44dc9ed014 302 //! allocator
joeverbout 0:ea44dc9ed014 303 Allocator* allocator;
joeverbout 0:ea44dc9ed014 304 };
joeverbout 0:ea44dc9ed014 305
joeverbout 0:ea44dc9ed014 306 /** @brief Creates a continuous matrix.
joeverbout 0:ea44dc9ed014 307
joeverbout 0:ea44dc9ed014 308 @param rows Row count.
joeverbout 0:ea44dc9ed014 309 @param cols Column count.
joeverbout 0:ea44dc9ed014 310 @param type Type of the matrix.
joeverbout 0:ea44dc9ed014 311 @param arr Destination matrix. This parameter changes only if it has a proper type and area (
joeverbout 0:ea44dc9ed014 312 \f$\texttt{rows} \times \texttt{cols}\f$ ).
joeverbout 0:ea44dc9ed014 313
joeverbout 0:ea44dc9ed014 314 Matrix is called continuous if its elements are stored continuously, that is, without gaps at the
joeverbout 0:ea44dc9ed014 315 end of each row.
joeverbout 0:ea44dc9ed014 316 */
joeverbout 0:ea44dc9ed014 317 CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr);
joeverbout 0:ea44dc9ed014 318
joeverbout 0:ea44dc9ed014 319 /** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type.
joeverbout 0:ea44dc9ed014 320
joeverbout 0:ea44dc9ed014 321 @param rows Minimum desired number of rows.
joeverbout 0:ea44dc9ed014 322 @param cols Minimum desired number of columns.
joeverbout 0:ea44dc9ed014 323 @param type Desired matrix type.
joeverbout 0:ea44dc9ed014 324 @param arr Destination matrix.
joeverbout 0:ea44dc9ed014 325
joeverbout 0:ea44dc9ed014 326 The function does not reallocate memory if the matrix has proper attributes already.
joeverbout 0:ea44dc9ed014 327 */
joeverbout 0:ea44dc9ed014 328 CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr);
joeverbout 0:ea44dc9ed014 329
joeverbout 0:ea44dc9ed014 330 //! BufferPool management (must be called before Stream creation)
joeverbout 0:ea44dc9ed014 331 CV_EXPORTS void setBufferPoolUsage(bool on);
joeverbout 0:ea44dc9ed014 332 CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount);
joeverbout 0:ea44dc9ed014 333
joeverbout 0:ea44dc9ed014 334 //===================================================================================
joeverbout 0:ea44dc9ed014 335 // HostMem
joeverbout 0:ea44dc9ed014 336 //===================================================================================
joeverbout 0:ea44dc9ed014 337
joeverbout 0:ea44dc9ed014 338 /** @brief Class with reference counting wrapping special memory type allocation functions from CUDA.
joeverbout 0:ea44dc9ed014 339
joeverbout 0:ea44dc9ed014 340 Its interface is also Mat-like but with additional memory type parameters.
joeverbout 0:ea44dc9ed014 341
joeverbout 0:ea44dc9ed014 342 - **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous
joeverbout 0:ea44dc9ed014 343 uploading/downloading data from/to GPU.
joeverbout 0:ea44dc9ed014 344 - **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU
joeverbout 0:ea44dc9ed014 345 address space, if supported.
joeverbout 0:ea44dc9ed014 346 - **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are
joeverbout 0:ea44dc9ed014 347 used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache
joeverbout 0:ea44dc9ed014 348 utilization.
joeverbout 0:ea44dc9ed014 349
joeverbout 0:ea44dc9ed014 350 @note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2
joeverbout 0:ea44dc9ed014 351 Pinned Memory APIs* document or *CUDA C Programming Guide*.
joeverbout 0:ea44dc9ed014 352 */
joeverbout 0:ea44dc9ed014 353 class CV_EXPORTS HostMem
joeverbout 0:ea44dc9ed014 354 {
joeverbout 0:ea44dc9ed014 355 public:
joeverbout 0:ea44dc9ed014 356 enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 };
joeverbout 0:ea44dc9ed014 357
joeverbout 0:ea44dc9ed014 358 static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED);
joeverbout 0:ea44dc9ed014 359
joeverbout 0:ea44dc9ed014 360 explicit HostMem(AllocType alloc_type = PAGE_LOCKED);
joeverbout 0:ea44dc9ed014 361
joeverbout 0:ea44dc9ed014 362 HostMem(const HostMem& m);
joeverbout 0:ea44dc9ed014 363
joeverbout 0:ea44dc9ed014 364 HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED);
joeverbout 0:ea44dc9ed014 365 HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED);
joeverbout 0:ea44dc9ed014 366
joeverbout 0:ea44dc9ed014 367 //! creates from host memory with coping data
joeverbout 0:ea44dc9ed014 368 explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED);
joeverbout 0:ea44dc9ed014 369
joeverbout 0:ea44dc9ed014 370 ~HostMem();
joeverbout 0:ea44dc9ed014 371
joeverbout 0:ea44dc9ed014 372 HostMem& operator =(const HostMem& m);
joeverbout 0:ea44dc9ed014 373
joeverbout 0:ea44dc9ed014 374 //! swaps with other smart pointer
joeverbout 0:ea44dc9ed014 375 void swap(HostMem& b);
joeverbout 0:ea44dc9ed014 376
joeverbout 0:ea44dc9ed014 377 //! returns deep copy of the matrix, i.e. the data is copied
joeverbout 0:ea44dc9ed014 378 HostMem clone() const;
joeverbout 0:ea44dc9ed014 379
joeverbout 0:ea44dc9ed014 380 //! allocates new matrix data unless the matrix already has specified size and type.
joeverbout 0:ea44dc9ed014 381 void create(int rows, int cols, int type);
joeverbout 0:ea44dc9ed014 382 void create(Size size, int type);
joeverbout 0:ea44dc9ed014 383
joeverbout 0:ea44dc9ed014 384 //! creates alternative HostMem header for the same data, with different
joeverbout 0:ea44dc9ed014 385 //! number of channels and/or different number of rows
joeverbout 0:ea44dc9ed014 386 HostMem reshape(int cn, int rows = 0) const;
joeverbout 0:ea44dc9ed014 387
joeverbout 0:ea44dc9ed014 388 //! decrements reference counter and released memory if needed.
joeverbout 0:ea44dc9ed014 389 void release();
joeverbout 0:ea44dc9ed014 390
joeverbout 0:ea44dc9ed014 391 //! returns matrix header with disabled reference counting for HostMem data.
joeverbout 0:ea44dc9ed014 392 Mat createMatHeader() const;
joeverbout 0:ea44dc9ed014 393
joeverbout 0:ea44dc9ed014 394 /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting
joeverbout 0:ea44dc9ed014 395 for it.
joeverbout 0:ea44dc9ed014 396
joeverbout 0:ea44dc9ed014 397 This can be done only if memory was allocated with the SHARED flag and if it is supported by the
joeverbout 0:ea44dc9ed014 398 hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which
joeverbout 0:ea44dc9ed014 399 eliminates an extra copy.
joeverbout 0:ea44dc9ed014 400 */
joeverbout 0:ea44dc9ed014 401 GpuMat createGpuMatHeader() const;
joeverbout 0:ea44dc9ed014 402
joeverbout 0:ea44dc9ed014 403 // Please see cv::Mat for descriptions
joeverbout 0:ea44dc9ed014 404 bool isContinuous() const;
joeverbout 0:ea44dc9ed014 405 size_t elemSize() const;
joeverbout 0:ea44dc9ed014 406 size_t elemSize1() const;
joeverbout 0:ea44dc9ed014 407 int type() const;
joeverbout 0:ea44dc9ed014 408 int depth() const;
joeverbout 0:ea44dc9ed014 409 int channels() const;
joeverbout 0:ea44dc9ed014 410 size_t step1() const;
joeverbout 0:ea44dc9ed014 411 Size size() const;
joeverbout 0:ea44dc9ed014 412 bool empty() const;
joeverbout 0:ea44dc9ed014 413
joeverbout 0:ea44dc9ed014 414 // Please see cv::Mat for descriptions
joeverbout 0:ea44dc9ed014 415 int flags;
joeverbout 0:ea44dc9ed014 416 int rows, cols;
joeverbout 0:ea44dc9ed014 417 size_t step;
joeverbout 0:ea44dc9ed014 418
joeverbout 0:ea44dc9ed014 419 uchar* data;
joeverbout 0:ea44dc9ed014 420 int* refcount;
joeverbout 0:ea44dc9ed014 421
joeverbout 0:ea44dc9ed014 422 uchar* datastart;
joeverbout 0:ea44dc9ed014 423 const uchar* dataend;
joeverbout 0:ea44dc9ed014 424
joeverbout 0:ea44dc9ed014 425 AllocType alloc_type;
joeverbout 0:ea44dc9ed014 426 };
joeverbout 0:ea44dc9ed014 427
joeverbout 0:ea44dc9ed014 428 /** @brief Page-locks the memory of matrix and maps it for the device(s).
joeverbout 0:ea44dc9ed014 429
joeverbout 0:ea44dc9ed014 430 @param m Input matrix.
joeverbout 0:ea44dc9ed014 431 */
joeverbout 0:ea44dc9ed014 432 CV_EXPORTS void registerPageLocked(Mat& m);
joeverbout 0:ea44dc9ed014 433
joeverbout 0:ea44dc9ed014 434 /** @brief Unmaps the memory of matrix and makes it pageable again.
joeverbout 0:ea44dc9ed014 435
joeverbout 0:ea44dc9ed014 436 @param m Input matrix.
joeverbout 0:ea44dc9ed014 437 */
joeverbout 0:ea44dc9ed014 438 CV_EXPORTS void unregisterPageLocked(Mat& m);
joeverbout 0:ea44dc9ed014 439
joeverbout 0:ea44dc9ed014 440 //===================================================================================
joeverbout 0:ea44dc9ed014 441 // Stream
joeverbout 0:ea44dc9ed014 442 //===================================================================================
joeverbout 0:ea44dc9ed014 443
joeverbout 0:ea44dc9ed014 444 /** @brief This class encapsulates a queue of asynchronous calls.
joeverbout 0:ea44dc9ed014 445
joeverbout 0:ea44dc9ed014 446 @note Currently, you may face problems if an operation is enqueued twice with different data. Some
joeverbout 0:ea44dc9ed014 447 functions use the constant GPU memory, and next call may update the memory before the previous one
joeverbout 0:ea44dc9ed014 448 has been finished. But calling different operations asynchronously is safe because each operation
joeverbout 0:ea44dc9ed014 449 has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are
joeverbout 0:ea44dc9ed014 450 also safe. :
joeverbout 0:ea44dc9ed014 451 */
joeverbout 0:ea44dc9ed014 452 class CV_EXPORTS Stream
joeverbout 0:ea44dc9ed014 453 {
joeverbout 0:ea44dc9ed014 454 typedef void (Stream::*bool_type)() const;
joeverbout 0:ea44dc9ed014 455 void this_type_does_not_support_comparisons() const {}
joeverbout 0:ea44dc9ed014 456
joeverbout 0:ea44dc9ed014 457 public:
joeverbout 0:ea44dc9ed014 458 typedef void (*StreamCallback)(int status, void* userData);
joeverbout 0:ea44dc9ed014 459
joeverbout 0:ea44dc9ed014 460 //! creates a new asynchronous stream
joeverbout 0:ea44dc9ed014 461 Stream();
joeverbout 0:ea44dc9ed014 462
joeverbout 0:ea44dc9ed014 463 /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false.
joeverbout 0:ea44dc9ed014 464 */
joeverbout 0:ea44dc9ed014 465 bool queryIfComplete() const;
joeverbout 0:ea44dc9ed014 466
joeverbout 0:ea44dc9ed014 467 /** @brief Blocks the current CPU thread until all operations in the stream are complete.
joeverbout 0:ea44dc9ed014 468 */
joeverbout 0:ea44dc9ed014 469 void waitForCompletion();
joeverbout 0:ea44dc9ed014 470
joeverbout 0:ea44dc9ed014 471 /** @brief Makes a compute stream wait on an event.
joeverbout 0:ea44dc9ed014 472 */
joeverbout 0:ea44dc9ed014 473 void waitEvent(const Event& event);
joeverbout 0:ea44dc9ed014 474
joeverbout 0:ea44dc9ed014 475 /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have
joeverbout 0:ea44dc9ed014 476 completed.
joeverbout 0:ea44dc9ed014 477
joeverbout 0:ea44dc9ed014 478 @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization
joeverbout 0:ea44dc9ed014 479 that may depend on outstanding device work or other callbacks that are not mandated to run earlier.
joeverbout 0:ea44dc9ed014 480 Callbacks without a mandated order (in independent streams) execute in undefined order and may be
joeverbout 0:ea44dc9ed014 481 serialized.
joeverbout 0:ea44dc9ed014 482 */
joeverbout 0:ea44dc9ed014 483 void enqueueHostCallback(StreamCallback callback, void* userData);
joeverbout 0:ea44dc9ed014 484
joeverbout 0:ea44dc9ed014 485 //! return Stream object for default CUDA stream
joeverbout 0:ea44dc9ed014 486 static Stream& Null();
joeverbout 0:ea44dc9ed014 487
joeverbout 0:ea44dc9ed014 488 //! returns true if stream object is not default (!= 0)
joeverbout 0:ea44dc9ed014 489 operator bool_type() const;
joeverbout 0:ea44dc9ed014 490
joeverbout 0:ea44dc9ed014 491 class Impl;
joeverbout 0:ea44dc9ed014 492
joeverbout 0:ea44dc9ed014 493 private:
joeverbout 0:ea44dc9ed014 494 Ptr<Impl> impl_;
joeverbout 0:ea44dc9ed014 495 Stream(const Ptr<Impl>& impl);
joeverbout 0:ea44dc9ed014 496
joeverbout 0:ea44dc9ed014 497 friend struct StreamAccessor;
joeverbout 0:ea44dc9ed014 498 friend class BufferPool;
joeverbout 0:ea44dc9ed014 499 friend class DefaultDeviceInitializer;
joeverbout 0:ea44dc9ed014 500 };
joeverbout 0:ea44dc9ed014 501
joeverbout 0:ea44dc9ed014 502 class CV_EXPORTS Event
joeverbout 0:ea44dc9ed014 503 {
joeverbout 0:ea44dc9ed014 504 public:
joeverbout 0:ea44dc9ed014 505 enum CreateFlags
joeverbout 0:ea44dc9ed014 506 {
joeverbout 0:ea44dc9ed014 507 DEFAULT = 0x00, /**< Default event flag */
joeverbout 0:ea44dc9ed014 508 BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */
joeverbout 0:ea44dc9ed014 509 DISABLE_TIMING = 0x02, /**< Event will not record timing data */
joeverbout 0:ea44dc9ed014 510 INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */
joeverbout 0:ea44dc9ed014 511 };
joeverbout 0:ea44dc9ed014 512
joeverbout 0:ea44dc9ed014 513 explicit Event(CreateFlags flags = DEFAULT);
joeverbout 0:ea44dc9ed014 514
joeverbout 0:ea44dc9ed014 515 //! records an event
joeverbout 0:ea44dc9ed014 516 void record(Stream& stream = Stream::Null());
joeverbout 0:ea44dc9ed014 517
joeverbout 0:ea44dc9ed014 518 //! queries an event's status
joeverbout 0:ea44dc9ed014 519 bool queryIfComplete() const;
joeverbout 0:ea44dc9ed014 520
joeverbout 0:ea44dc9ed014 521 //! waits for an event to complete
joeverbout 0:ea44dc9ed014 522 void waitForCompletion();
joeverbout 0:ea44dc9ed014 523
joeverbout 0:ea44dc9ed014 524 //! computes the elapsed time between events
joeverbout 0:ea44dc9ed014 525 static float elapsedTime(const Event& start, const Event& end);
joeverbout 0:ea44dc9ed014 526
joeverbout 0:ea44dc9ed014 527 class Impl;
joeverbout 0:ea44dc9ed014 528
joeverbout 0:ea44dc9ed014 529 private:
joeverbout 0:ea44dc9ed014 530 Ptr<Impl> impl_;
joeverbout 0:ea44dc9ed014 531 Event(const Ptr<Impl>& impl);
joeverbout 0:ea44dc9ed014 532
joeverbout 0:ea44dc9ed014 533 friend struct EventAccessor;
joeverbout 0:ea44dc9ed014 534 };
joeverbout 0:ea44dc9ed014 535
joeverbout 0:ea44dc9ed014 536 //! @} cudacore_struct
joeverbout 0:ea44dc9ed014 537
joeverbout 0:ea44dc9ed014 538 //===================================================================================
joeverbout 0:ea44dc9ed014 539 // Initialization & Info
joeverbout 0:ea44dc9ed014 540 //===================================================================================
joeverbout 0:ea44dc9ed014 541
joeverbout 0:ea44dc9ed014 542 //! @addtogroup cudacore_init
joeverbout 0:ea44dc9ed014 543 //! @{
joeverbout 0:ea44dc9ed014 544
joeverbout 0:ea44dc9ed014 545 /** @brief Returns the number of installed CUDA-enabled devices.
joeverbout 0:ea44dc9ed014 546
joeverbout 0:ea44dc9ed014 547 Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support,
joeverbout 0:ea44dc9ed014 548 this function returns 0.
joeverbout 0:ea44dc9ed014 549 */
joeverbout 0:ea44dc9ed014 550 CV_EXPORTS int getCudaEnabledDeviceCount();
joeverbout 0:ea44dc9ed014 551
joeverbout 0:ea44dc9ed014 552 /** @brief Sets a device and initializes it for the current thread.
joeverbout 0:ea44dc9ed014 553
joeverbout 0:ea44dc9ed014 554 @param device System index of a CUDA device starting with 0.
joeverbout 0:ea44dc9ed014 555
joeverbout 0:ea44dc9ed014 556 If the call of this function is omitted, a default device is initialized at the fist CUDA usage.
joeverbout 0:ea44dc9ed014 557 */
joeverbout 0:ea44dc9ed014 558 CV_EXPORTS void setDevice(int device);
joeverbout 0:ea44dc9ed014 559
joeverbout 0:ea44dc9ed014 560 /** @brief Returns the current device index set by cuda::setDevice or initialized by default.
joeverbout 0:ea44dc9ed014 561 */
joeverbout 0:ea44dc9ed014 562 CV_EXPORTS int getDevice();
joeverbout 0:ea44dc9ed014 563
joeverbout 0:ea44dc9ed014 564 /** @brief Explicitly destroys and cleans up all resources associated with the current device in the current
joeverbout 0:ea44dc9ed014 565 process.
joeverbout 0:ea44dc9ed014 566
joeverbout 0:ea44dc9ed014 567 Any subsequent API call to this device will reinitialize the device.
joeverbout 0:ea44dc9ed014 568 */
joeverbout 0:ea44dc9ed014 569 CV_EXPORTS void resetDevice();
joeverbout 0:ea44dc9ed014 570
joeverbout 0:ea44dc9ed014 571 /** @brief Enumeration providing CUDA computing features.
joeverbout 0:ea44dc9ed014 572 */
joeverbout 0:ea44dc9ed014 573 enum FeatureSet
joeverbout 0:ea44dc9ed014 574 {
joeverbout 0:ea44dc9ed014 575 FEATURE_SET_COMPUTE_10 = 10,
joeverbout 0:ea44dc9ed014 576 FEATURE_SET_COMPUTE_11 = 11,
joeverbout 0:ea44dc9ed014 577 FEATURE_SET_COMPUTE_12 = 12,
joeverbout 0:ea44dc9ed014 578 FEATURE_SET_COMPUTE_13 = 13,
joeverbout 0:ea44dc9ed014 579 FEATURE_SET_COMPUTE_20 = 20,
joeverbout 0:ea44dc9ed014 580 FEATURE_SET_COMPUTE_21 = 21,
joeverbout 0:ea44dc9ed014 581 FEATURE_SET_COMPUTE_30 = 30,
joeverbout 0:ea44dc9ed014 582 FEATURE_SET_COMPUTE_32 = 32,
joeverbout 0:ea44dc9ed014 583 FEATURE_SET_COMPUTE_35 = 35,
joeverbout 0:ea44dc9ed014 584 FEATURE_SET_COMPUTE_50 = 50,
joeverbout 0:ea44dc9ed014 585
joeverbout 0:ea44dc9ed014 586 GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11,
joeverbout 0:ea44dc9ed014 587 SHARED_ATOMICS = FEATURE_SET_COMPUTE_12,
joeverbout 0:ea44dc9ed014 588 NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13,
joeverbout 0:ea44dc9ed014 589 WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30,
joeverbout 0:ea44dc9ed014 590 DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
joeverbout 0:ea44dc9ed014 591 };
joeverbout 0:ea44dc9ed014 592
joeverbout 0:ea44dc9ed014 593 //! checks whether current device supports the given feature
joeverbout 0:ea44dc9ed014 594 CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
joeverbout 0:ea44dc9ed014 595
joeverbout 0:ea44dc9ed014 596 /** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was
joeverbout 0:ea44dc9ed014 597 built for.
joeverbout 0:ea44dc9ed014 598
joeverbout 0:ea44dc9ed014 599 According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute
joeverbout 0:ea44dc9ed014 600 capability can always be compiled to binary code of greater or equal compute capability".
joeverbout 0:ea44dc9ed014 601 */
joeverbout 0:ea44dc9ed014 602 class CV_EXPORTS TargetArchs
joeverbout 0:ea44dc9ed014 603 {
joeverbout 0:ea44dc9ed014 604 public:
joeverbout 0:ea44dc9ed014 605 /** @brief The following method checks whether the module was built with the support of the given feature:
joeverbout 0:ea44dc9ed014 606
joeverbout 0:ea44dc9ed014 607 @param feature_set Features to be checked. See :ocvcuda::FeatureSet.
joeverbout 0:ea44dc9ed014 608 */
joeverbout 0:ea44dc9ed014 609 static bool builtWith(FeatureSet feature_set);
joeverbout 0:ea44dc9ed014 610
joeverbout 0:ea44dc9ed014 611 /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA
joeverbout 0:ea44dc9ed014 612 code for the given architecture(s):
joeverbout 0:ea44dc9ed014 613
joeverbout 0:ea44dc9ed014 614 @param major Major compute capability version.
joeverbout 0:ea44dc9ed014 615 @param minor Minor compute capability version.
joeverbout 0:ea44dc9ed014 616 */
joeverbout 0:ea44dc9ed014 617 static bool has(int major, int minor);
joeverbout 0:ea44dc9ed014 618 static bool hasPtx(int major, int minor);
joeverbout 0:ea44dc9ed014 619 static bool hasBin(int major, int minor);
joeverbout 0:ea44dc9ed014 620
joeverbout 0:ea44dc9ed014 621 static bool hasEqualOrLessPtx(int major, int minor);
joeverbout 0:ea44dc9ed014 622 static bool hasEqualOrGreater(int major, int minor);
joeverbout 0:ea44dc9ed014 623 static bool hasEqualOrGreaterPtx(int major, int minor);
joeverbout 0:ea44dc9ed014 624 static bool hasEqualOrGreaterBin(int major, int minor);
joeverbout 0:ea44dc9ed014 625 };
joeverbout 0:ea44dc9ed014 626
joeverbout 0:ea44dc9ed014 627 /** @brief Class providing functionality for querying the specified GPU properties.
joeverbout 0:ea44dc9ed014 628 */
joeverbout 0:ea44dc9ed014 629 class CV_EXPORTS DeviceInfo
joeverbout 0:ea44dc9ed014 630 {
joeverbout 0:ea44dc9ed014 631 public:
joeverbout 0:ea44dc9ed014 632 //! creates DeviceInfo object for the current GPU
joeverbout 0:ea44dc9ed014 633 DeviceInfo();
joeverbout 0:ea44dc9ed014 634
joeverbout 0:ea44dc9ed014 635 /** @brief The constructors.
joeverbout 0:ea44dc9ed014 636
joeverbout 0:ea44dc9ed014 637 @param device_id System index of the CUDA device starting with 0.
joeverbout 0:ea44dc9ed014 638
joeverbout 0:ea44dc9ed014 639 Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it
joeverbout 0:ea44dc9ed014 640 constructs an object for the current device.
joeverbout 0:ea44dc9ed014 641 */
joeverbout 0:ea44dc9ed014 642 DeviceInfo(int device_id);
joeverbout 0:ea44dc9ed014 643
joeverbout 0:ea44dc9ed014 644 /** @brief Returns system index of the CUDA device starting with 0.
joeverbout 0:ea44dc9ed014 645 */
joeverbout 0:ea44dc9ed014 646 int deviceID() const;
joeverbout 0:ea44dc9ed014 647
joeverbout 0:ea44dc9ed014 648 //! ASCII string identifying device
joeverbout 0:ea44dc9ed014 649 const char* name() const;
joeverbout 0:ea44dc9ed014 650
joeverbout 0:ea44dc9ed014 651 //! global memory available on device in bytes
joeverbout 0:ea44dc9ed014 652 size_t totalGlobalMem() const;
joeverbout 0:ea44dc9ed014 653
joeverbout 0:ea44dc9ed014 654 //! shared memory available per block in bytes
joeverbout 0:ea44dc9ed014 655 size_t sharedMemPerBlock() const;
joeverbout 0:ea44dc9ed014 656
joeverbout 0:ea44dc9ed014 657 //! 32-bit registers available per block
joeverbout 0:ea44dc9ed014 658 int regsPerBlock() const;
joeverbout 0:ea44dc9ed014 659
joeverbout 0:ea44dc9ed014 660 //! warp size in threads
joeverbout 0:ea44dc9ed014 661 int warpSize() const;
joeverbout 0:ea44dc9ed014 662
joeverbout 0:ea44dc9ed014 663 //! maximum pitch in bytes allowed by memory copies
joeverbout 0:ea44dc9ed014 664 size_t memPitch() const;
joeverbout 0:ea44dc9ed014 665
joeverbout 0:ea44dc9ed014 666 //! maximum number of threads per block
joeverbout 0:ea44dc9ed014 667 int maxThreadsPerBlock() const;
joeverbout 0:ea44dc9ed014 668
joeverbout 0:ea44dc9ed014 669 //! maximum size of each dimension of a block
joeverbout 0:ea44dc9ed014 670 Vec3i maxThreadsDim() const;
joeverbout 0:ea44dc9ed014 671
joeverbout 0:ea44dc9ed014 672 //! maximum size of each dimension of a grid
joeverbout 0:ea44dc9ed014 673 Vec3i maxGridSize() const;
joeverbout 0:ea44dc9ed014 674
joeverbout 0:ea44dc9ed014 675 //! clock frequency in kilohertz
joeverbout 0:ea44dc9ed014 676 int clockRate() const;
joeverbout 0:ea44dc9ed014 677
joeverbout 0:ea44dc9ed014 678 //! constant memory available on device in bytes
joeverbout 0:ea44dc9ed014 679 size_t totalConstMem() const;
joeverbout 0:ea44dc9ed014 680
joeverbout 0:ea44dc9ed014 681 //! major compute capability
joeverbout 0:ea44dc9ed014 682 int majorVersion() const;
joeverbout 0:ea44dc9ed014 683
joeverbout 0:ea44dc9ed014 684 //! minor compute capability
joeverbout 0:ea44dc9ed014 685 int minorVersion() const;
joeverbout 0:ea44dc9ed014 686
joeverbout 0:ea44dc9ed014 687 //! alignment requirement for textures
joeverbout 0:ea44dc9ed014 688 size_t textureAlignment() const;
joeverbout 0:ea44dc9ed014 689
joeverbout 0:ea44dc9ed014 690 //! pitch alignment requirement for texture references bound to pitched memory
joeverbout 0:ea44dc9ed014 691 size_t texturePitchAlignment() const;
joeverbout 0:ea44dc9ed014 692
joeverbout 0:ea44dc9ed014 693 //! number of multiprocessors on device
joeverbout 0:ea44dc9ed014 694 int multiProcessorCount() const;
joeverbout 0:ea44dc9ed014 695
joeverbout 0:ea44dc9ed014 696 //! specified whether there is a run time limit on kernels
joeverbout 0:ea44dc9ed014 697 bool kernelExecTimeoutEnabled() const;
joeverbout 0:ea44dc9ed014 698
joeverbout 0:ea44dc9ed014 699 //! device is integrated as opposed to discrete
joeverbout 0:ea44dc9ed014 700 bool integrated() const;
joeverbout 0:ea44dc9ed014 701
joeverbout 0:ea44dc9ed014 702 //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
joeverbout 0:ea44dc9ed014 703 bool canMapHostMemory() const;
joeverbout 0:ea44dc9ed014 704
joeverbout 0:ea44dc9ed014 705 enum ComputeMode
joeverbout 0:ea44dc9ed014 706 {
joeverbout 0:ea44dc9ed014 707 ComputeModeDefault, /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */
joeverbout 0:ea44dc9ed014 708 ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */
joeverbout 0:ea44dc9ed014 709 ComputeModeProhibited, /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */
joeverbout 0:ea44dc9ed014 710 ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */
joeverbout 0:ea44dc9ed014 711 };
joeverbout 0:ea44dc9ed014 712
joeverbout 0:ea44dc9ed014 713 //! compute mode
joeverbout 0:ea44dc9ed014 714 ComputeMode computeMode() const;
joeverbout 0:ea44dc9ed014 715
joeverbout 0:ea44dc9ed014 716 //! maximum 1D texture size
joeverbout 0:ea44dc9ed014 717 int maxTexture1D() const;
joeverbout 0:ea44dc9ed014 718
joeverbout 0:ea44dc9ed014 719 //! maximum 1D mipmapped texture size
joeverbout 0:ea44dc9ed014 720 int maxTexture1DMipmap() const;
joeverbout 0:ea44dc9ed014 721
joeverbout 0:ea44dc9ed014 722 //! maximum size for 1D textures bound to linear memory
joeverbout 0:ea44dc9ed014 723 int maxTexture1DLinear() const;
joeverbout 0:ea44dc9ed014 724
joeverbout 0:ea44dc9ed014 725 //! maximum 2D texture dimensions
joeverbout 0:ea44dc9ed014 726 Vec2i maxTexture2D() const;
joeverbout 0:ea44dc9ed014 727
joeverbout 0:ea44dc9ed014 728 //! maximum 2D mipmapped texture dimensions
joeverbout 0:ea44dc9ed014 729 Vec2i maxTexture2DMipmap() const;
joeverbout 0:ea44dc9ed014 730
joeverbout 0:ea44dc9ed014 731 //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
joeverbout 0:ea44dc9ed014 732 Vec3i maxTexture2DLinear() const;
joeverbout 0:ea44dc9ed014 733
joeverbout 0:ea44dc9ed014 734 //! maximum 2D texture dimensions if texture gather operations have to be performed
joeverbout 0:ea44dc9ed014 735 Vec2i maxTexture2DGather() const;
joeverbout 0:ea44dc9ed014 736
joeverbout 0:ea44dc9ed014 737 //! maximum 3D texture dimensions
joeverbout 0:ea44dc9ed014 738 Vec3i maxTexture3D() const;
joeverbout 0:ea44dc9ed014 739
joeverbout 0:ea44dc9ed014 740 //! maximum Cubemap texture dimensions
joeverbout 0:ea44dc9ed014 741 int maxTextureCubemap() const;
joeverbout 0:ea44dc9ed014 742
joeverbout 0:ea44dc9ed014 743 //! maximum 1D layered texture dimensions
joeverbout 0:ea44dc9ed014 744 Vec2i maxTexture1DLayered() const;
joeverbout 0:ea44dc9ed014 745
joeverbout 0:ea44dc9ed014 746 //! maximum 2D layered texture dimensions
joeverbout 0:ea44dc9ed014 747 Vec3i maxTexture2DLayered() const;
joeverbout 0:ea44dc9ed014 748
joeverbout 0:ea44dc9ed014 749 //! maximum Cubemap layered texture dimensions
joeverbout 0:ea44dc9ed014 750 Vec2i maxTextureCubemapLayered() const;
joeverbout 0:ea44dc9ed014 751
joeverbout 0:ea44dc9ed014 752 //! maximum 1D surface size
joeverbout 0:ea44dc9ed014 753 int maxSurface1D() const;
joeverbout 0:ea44dc9ed014 754
joeverbout 0:ea44dc9ed014 755 //! maximum 2D surface dimensions
joeverbout 0:ea44dc9ed014 756 Vec2i maxSurface2D() const;
joeverbout 0:ea44dc9ed014 757
joeverbout 0:ea44dc9ed014 758 //! maximum 3D surface dimensions
joeverbout 0:ea44dc9ed014 759 Vec3i maxSurface3D() const;
joeverbout 0:ea44dc9ed014 760
joeverbout 0:ea44dc9ed014 761 //! maximum 1D layered surface dimensions
joeverbout 0:ea44dc9ed014 762 Vec2i maxSurface1DLayered() const;
joeverbout 0:ea44dc9ed014 763
joeverbout 0:ea44dc9ed014 764 //! maximum 2D layered surface dimensions
joeverbout 0:ea44dc9ed014 765 Vec3i maxSurface2DLayered() const;
joeverbout 0:ea44dc9ed014 766
joeverbout 0:ea44dc9ed014 767 //! maximum Cubemap surface dimensions
joeverbout 0:ea44dc9ed014 768 int maxSurfaceCubemap() const;
joeverbout 0:ea44dc9ed014 769
joeverbout 0:ea44dc9ed014 770 //! maximum Cubemap layered surface dimensions
joeverbout 0:ea44dc9ed014 771 Vec2i maxSurfaceCubemapLayered() const;
joeverbout 0:ea44dc9ed014 772
joeverbout 0:ea44dc9ed014 773 //! alignment requirements for surfaces
joeverbout 0:ea44dc9ed014 774 size_t surfaceAlignment() const;
joeverbout 0:ea44dc9ed014 775
joeverbout 0:ea44dc9ed014 776 //! device can possibly execute multiple kernels concurrently
joeverbout 0:ea44dc9ed014 777 bool concurrentKernels() const;
joeverbout 0:ea44dc9ed014 778
joeverbout 0:ea44dc9ed014 779 //! device has ECC support enabled
joeverbout 0:ea44dc9ed014 780 bool ECCEnabled() const;
joeverbout 0:ea44dc9ed014 781
joeverbout 0:ea44dc9ed014 782 //! PCI bus ID of the device
joeverbout 0:ea44dc9ed014 783 int pciBusID() const;
joeverbout 0:ea44dc9ed014 784
joeverbout 0:ea44dc9ed014 785 //! PCI device ID of the device
joeverbout 0:ea44dc9ed014 786 int pciDeviceID() const;
joeverbout 0:ea44dc9ed014 787
joeverbout 0:ea44dc9ed014 788 //! PCI domain ID of the device
joeverbout 0:ea44dc9ed014 789 int pciDomainID() const;
joeverbout 0:ea44dc9ed014 790
joeverbout 0:ea44dc9ed014 791 //! true if device is a Tesla device using TCC driver, false otherwise
joeverbout 0:ea44dc9ed014 792 bool tccDriver() const;
joeverbout 0:ea44dc9ed014 793
joeverbout 0:ea44dc9ed014 794 //! number of asynchronous engines
joeverbout 0:ea44dc9ed014 795 int asyncEngineCount() const;
joeverbout 0:ea44dc9ed014 796
joeverbout 0:ea44dc9ed014 797 //! device shares a unified address space with the host
joeverbout 0:ea44dc9ed014 798 bool unifiedAddressing() const;
joeverbout 0:ea44dc9ed014 799
joeverbout 0:ea44dc9ed014 800 //! peak memory clock frequency in kilohertz
joeverbout 0:ea44dc9ed014 801 int memoryClockRate() const;
joeverbout 0:ea44dc9ed014 802
joeverbout 0:ea44dc9ed014 803 //! global memory bus width in bits
joeverbout 0:ea44dc9ed014 804 int memoryBusWidth() const;
joeverbout 0:ea44dc9ed014 805
joeverbout 0:ea44dc9ed014 806 //! size of L2 cache in bytes
joeverbout 0:ea44dc9ed014 807 int l2CacheSize() const;
joeverbout 0:ea44dc9ed014 808
joeverbout 0:ea44dc9ed014 809 //! maximum resident threads per multiprocessor
joeverbout 0:ea44dc9ed014 810 int maxThreadsPerMultiProcessor() const;
joeverbout 0:ea44dc9ed014 811
joeverbout 0:ea44dc9ed014 812 //! gets free and total device memory
joeverbout 0:ea44dc9ed014 813 void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
joeverbout 0:ea44dc9ed014 814 size_t freeMemory() const;
joeverbout 0:ea44dc9ed014 815 size_t totalMemory() const;
joeverbout 0:ea44dc9ed014 816
joeverbout 0:ea44dc9ed014 817 /** @brief Provides information on CUDA feature support.
joeverbout 0:ea44dc9ed014 818
joeverbout 0:ea44dc9ed014 819 @param feature_set Features to be checked. See cuda::FeatureSet.
joeverbout 0:ea44dc9ed014 820
joeverbout 0:ea44dc9ed014 821 This function returns true if the device has the specified CUDA feature. Otherwise, it returns false
joeverbout 0:ea44dc9ed014 822 */
joeverbout 0:ea44dc9ed014 823 bool supports(FeatureSet feature_set) const;
joeverbout 0:ea44dc9ed014 824
joeverbout 0:ea44dc9ed014 825 /** @brief Checks the CUDA module and device compatibility.
joeverbout 0:ea44dc9ed014 826
joeverbout 0:ea44dc9ed014 827 This function returns true if the CUDA module can be run on the specified device. Otherwise, it
joeverbout 0:ea44dc9ed014 828 returns false .
joeverbout 0:ea44dc9ed014 829 */
joeverbout 0:ea44dc9ed014 830 bool isCompatible() const;
joeverbout 0:ea44dc9ed014 831
joeverbout 0:ea44dc9ed014 832 private:
joeverbout 0:ea44dc9ed014 833 int device_id_;
joeverbout 0:ea44dc9ed014 834 };
joeverbout 0:ea44dc9ed014 835
joeverbout 0:ea44dc9ed014 836 CV_EXPORTS void printCudaDeviceInfo(int device);
joeverbout 0:ea44dc9ed014 837 CV_EXPORTS void printShortCudaDeviceInfo(int device);
joeverbout 0:ea44dc9ed014 838
joeverbout 0:ea44dc9ed014 839 //! @} cudacore_init
joeverbout 0:ea44dc9ed014 840
joeverbout 0:ea44dc9ed014 841 }} // namespace cv { namespace cuda {
joeverbout 0:ea44dc9ed014 842
joeverbout 0:ea44dc9ed014 843
joeverbout 0:ea44dc9ed014 844 #include "opencv2/core/cuda.inl.hpp"
joeverbout 0:ea44dc9ed014 845
joeverbout 0:ea44dc9ed014 846 #endif /* __OPENCV_CORE_CUDA_HPP__ */
joeverbout 0:ea44dc9ed014 847