Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
ocl.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2013, OpenCV Foundation, all rights reserved. 00014 // Third party copyrights are property of their respective owners. 00015 // 00016 // Redistribution and use in source and binary forms, with or without modification, 00017 // are permitted provided that the following conditions are met: 00018 // 00019 // * Redistribution's of source code must retain the above copyright notice, 00020 // this list of conditions and the following disclaimer. 00021 // 00022 // * Redistribution's in binary form must reproduce the above copyright notice, 00023 // this list of conditions and the following disclaimer in the documentation 00024 // and/or other materials provided with the distribution. 00025 // 00026 // * The name of the copyright holders may not be used to endorse or promote products 00027 // derived from this software without specific prior written permission. 00028 // 00029 // This software is provided by the copyright holders and contributors "as is" and 00030 // any express or implied warranties, including, but not limited to, the implied 00031 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00032 // In no event shall the OpenCV Foundation or contributors be liable for any direct, 00033 // indirect, incidental, special, exemplary, or consequential damages 00034 // (including, but not limited to, procurement of substitute goods or services; 00035 // loss of use, data, or profits; or business interruption) however caused 00036 // and on any theory of liability, whether in contract, strict liability, 00037 // or tort (including negligence or otherwise) arising in any way out of 00038 // the use of this software, even if advised of the possibility of such damage. 00039 // 00040 //M*/ 00041 00042 #include "precomp.hpp" 00043 #include <list> 00044 #include <map> 00045 #include <string> 00046 #include <sstream> 00047 #include <iostream> // std::cerr 00048 #include "opencv2/core/ocl.hpp" 00049 00050 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0 00051 #define CV_OPENCL_SHOW_RUN_ERRORS 0 00052 #define CV_OPENCL_SHOW_SVM_ERROR_LOG 1 00053 #define CV_OPENCL_SHOW_SVM_LOG 0 00054 00055 #include "opencv2/core/bufferpool.hpp" 00056 #ifndef LOG_BUFFER_POOL 00057 # if 0 00058 # define LOG_BUFFER_POOL printf 00059 # else 00060 # define LOG_BUFFER_POOL(...) 00061 # endif 00062 #endif 00063 00064 00065 // TODO Move to some common place 00066 static bool getBoolParameter(const char* name, bool defaultValue) 00067 { 00068 /* 00069 * If your system doesn't support getenv(), define NO_GETENV to disable 00070 * this feature. 00071 */ 00072 #ifdef NO_GETENV 00073 const char* envValue = NULL; 00074 #else 00075 const char* envValue = getenv(name); 00076 #endif 00077 if (envValue == NULL) 00078 { 00079 return defaultValue; 00080 } 00081 cv::String value = envValue; 00082 if (value == "1" || value == "True" || value == "true" || value == "TRUE") 00083 { 00084 return true; 00085 } 00086 if (value == "0" || value == "False" || value == "false" || value == "FALSE") 00087 { 00088 return false; 00089 } 00090 CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str())); 00091 } 00092 00093 00094 // TODO Move to some common place 00095 static size_t getConfigurationParameterForSize(const char* name, size_t defaultValue) 00096 { 00097 #ifdef NO_GETENV 00098 const char* envValue = NULL; 00099 #else 00100 const char* envValue = getenv(name); 00101 #endif 00102 if (envValue == NULL) 00103 { 00104 return defaultValue; 00105 } 00106 cv::String value = envValue; 00107 size_t pos = 0; 00108 for (; pos < value.size(); pos++) 00109 { 00110 if (!isdigit(value[pos])) 00111 break; 00112 } 00113 cv::String valueStr = value.substr(0, pos); 00114 cv::String suffixStr = value.substr(pos, value.length() - pos); 00115 int v = atoi(valueStr.c_str()); 00116 if (suffixStr.length() == 0) 00117 return v; 00118 else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb") 00119 return v * 1024 * 1024; 00120 else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb") 00121 return v * 1024; 00122 CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str())); 00123 } 00124 00125 #if CV_OPENCL_SHOW_SVM_LOG 00126 // TODO add timestamp logging 00127 #define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf 00128 #else 00129 #define CV_OPENCL_SVM_TRACE_P(...) 00130 #endif 00131 00132 #if CV_OPENCL_SHOW_SVM_ERROR_LOG 00133 // TODO add timestamp logging 00134 #define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf 00135 #else 00136 #define CV_OPENCL_SVM_TRACE_ERROR_P(...) 00137 #endif 00138 00139 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp" 00140 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp" 00141 00142 #ifdef HAVE_OPENCL 00143 #include "opencv2/core/opencl/runtime/opencl_core.hpp" 00144 #else 00145 // TODO FIXIT: This file can't be build without OPENCL 00146 00147 /* 00148 Part of the file is an extract from the standard OpenCL headers from Khronos site. 00149 Below is the original copyright. 00150 */ 00151 00152 /******************************************************************************* 00153 * Copyright (c) 2008 - 2012 The Khronos Group Inc. 00154 * 00155 * Permission is hereby granted, free of charge, to any person obtaining a 00156 * copy of this software and/or associated documentation files (the 00157 * "Materials"), to deal in the Materials without restriction, including 00158 * without limitation the rights to use, copy, modify, merge, publish, 00159 * distribute, sublicense, and/or sell copies of the Materials, and to 00160 * permit persons to whom the Materials are furnished to do so, subject to 00161 * the following conditions: 00162 * 00163 * The above copyright notice and this permission notice shall be included 00164 * in all copies or substantial portions of the Materials. 00165 * 00166 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00167 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00168 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 00169 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 00170 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 00171 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 00172 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. 00173 ******************************************************************************/ 00174 00175 #if 0 //defined __APPLE__ 00176 #define HAVE_OPENCL 1 00177 #else 00178 #undef HAVE_OPENCL 00179 #endif 00180 00181 #define OPENCV_CL_NOT_IMPLEMENTED -1000 00182 00183 #ifdef HAVE_OPENCL 00184 00185 #if defined __APPLE__ 00186 #include <OpenCL/opencl.h> 00187 #else 00188 #include <CL/opencl.h> 00189 #endif 00190 00191 static const bool g_haveOpenCL = true; 00192 00193 #else 00194 00195 extern "C" { 00196 00197 struct _cl_platform_id { int dummy; }; 00198 struct _cl_device_id { int dummy; }; 00199 struct _cl_context { int dummy; }; 00200 struct _cl_command_queue { int dummy; }; 00201 struct _cl_mem { int dummy; }; 00202 struct _cl_program { int dummy; }; 00203 struct _cl_kernel { int dummy; }; 00204 struct _cl_event { int dummy; }; 00205 struct _cl_sampler { int dummy; }; 00206 00207 typedef struct _cl_platform_id * cl_platform_id; 00208 typedef struct _cl_device_id * cl_device_id; 00209 typedef struct _cl_context * cl_context; 00210 typedef struct _cl_command_queue * cl_command_queue; 00211 typedef struct _cl_mem * cl_mem; 00212 typedef struct _cl_program * cl_program; 00213 typedef struct _cl_kernel * cl_kernel; 00214 typedef struct _cl_event * cl_event; 00215 typedef struct _cl_sampler * cl_sampler; 00216 00217 typedef int cl_int; 00218 typedef unsigned cl_uint; 00219 #if defined (_WIN32) && defined(_MSC_VER) 00220 typedef __int64 cl_long; 00221 typedef unsigned __int64 cl_ulong; 00222 #else 00223 typedef long cl_long; 00224 typedef unsigned long cl_ulong; 00225 #endif 00226 00227 typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */ 00228 typedef cl_ulong cl_bitfield; 00229 typedef cl_bitfield cl_device_type; 00230 typedef cl_uint cl_platform_info; 00231 typedef cl_uint cl_device_info; 00232 typedef cl_bitfield cl_device_fp_config; 00233 typedef cl_uint cl_device_mem_cache_type; 00234 typedef cl_uint cl_device_local_mem_type; 00235 typedef cl_bitfield cl_device_exec_capabilities; 00236 typedef cl_bitfield cl_command_queue_properties; 00237 typedef intptr_t cl_device_partition_property; 00238 typedef cl_bitfield cl_device_affinity_domain; 00239 00240 typedef intptr_t cl_context_properties; 00241 typedef cl_uint cl_context_info; 00242 typedef cl_uint cl_command_queue_info; 00243 typedef cl_uint cl_channel_order; 00244 typedef cl_uint cl_channel_type; 00245 typedef cl_bitfield cl_mem_flags; 00246 typedef cl_uint cl_mem_object_type; 00247 typedef cl_uint cl_mem_info; 00248 typedef cl_bitfield cl_mem_migration_flags; 00249 typedef cl_uint cl_image_info; 00250 typedef cl_uint cl_buffer_create_type; 00251 typedef cl_uint cl_addressing_mode; 00252 typedef cl_uint cl_filter_mode; 00253 typedef cl_uint cl_sampler_info; 00254 typedef cl_bitfield cl_map_flags; 00255 typedef cl_uint cl_program_info; 00256 typedef cl_uint cl_program_build_info; 00257 typedef cl_uint cl_program_binary_type; 00258 typedef cl_int cl_build_status; 00259 typedef cl_uint cl_kernel_info; 00260 typedef cl_uint cl_kernel_arg_info; 00261 typedef cl_uint cl_kernel_arg_address_qualifier; 00262 typedef cl_uint cl_kernel_arg_access_qualifier; 00263 typedef cl_bitfield cl_kernel_arg_type_qualifier; 00264 typedef cl_uint cl_kernel_work_group_info; 00265 typedef cl_uint cl_event_info; 00266 typedef cl_uint cl_command_type; 00267 typedef cl_uint cl_profiling_info; 00268 00269 00270 typedef struct _cl_image_format { 00271 cl_channel_order image_channel_order; 00272 cl_channel_type image_channel_data_type; 00273 } cl_image_format; 00274 00275 typedef struct _cl_image_desc { 00276 cl_mem_object_type image_type; 00277 size_t image_width; 00278 size_t image_height; 00279 size_t image_depth; 00280 size_t image_array_size; 00281 size_t image_row_pitch; 00282 size_t image_slice_pitch; 00283 cl_uint num_mip_levels; 00284 cl_uint num_samples; 00285 cl_mem buffer; 00286 } cl_image_desc; 00287 00288 typedef struct _cl_buffer_region { 00289 size_t origin; 00290 size_t size; 00291 } cl_buffer_region; 00292 00293 00294 ////////////////////////////////////////////////////////// 00295 00296 #define CL_SUCCESS 0 00297 #define CL_DEVICE_NOT_FOUND -1 00298 #define CL_DEVICE_NOT_AVAILABLE -2 00299 #define CL_COMPILER_NOT_AVAILABLE -3 00300 #define CL_MEM_OBJECT_ALLOCATION_FAILURE -4 00301 #define CL_OUT_OF_RESOURCES -5 00302 #define CL_OUT_OF_HOST_MEMORY -6 00303 #define CL_PROFILING_INFO_NOT_AVAILABLE -7 00304 #define CL_MEM_COPY_OVERLAP -8 00305 #define CL_IMAGE_FORMAT_MISMATCH -9 00306 #define CL_IMAGE_FORMAT_NOT_SUPPORTED -10 00307 #define CL_BUILD_PROGRAM_FAILURE -11 00308 #define CL_MAP_FAILURE -12 00309 #define CL_MISALIGNED_SUB_BUFFER_OFFSET -13 00310 #define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14 00311 #define CL_COMPILE_PROGRAM_FAILURE -15 00312 #define CL_LINKER_NOT_AVAILABLE -16 00313 #define CL_LINK_PROGRAM_FAILURE -17 00314 #define CL_DEVICE_PARTITION_FAILED -18 00315 #define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19 00316 00317 #define CL_INVALID_VALUE -30 00318 #define CL_INVALID_DEVICE_TYPE -31 00319 #define CL_INVALID_PLATFORM -32 00320 #define CL_INVALID_DEVICE -33 00321 #define CL_INVALID_CONTEXT -34 00322 #define CL_INVALID_QUEUE_PROPERTIES -35 00323 #define CL_INVALID_COMMAND_QUEUE -36 00324 #define CL_INVALID_HOST_PTR -37 00325 #define CL_INVALID_MEM_OBJECT -38 00326 #define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39 00327 #define CL_INVALID_IMAGE_SIZE -40 00328 #define CL_INVALID_SAMPLER -41 00329 #define CL_INVALID_BINARY -42 00330 #define CL_INVALID_BUILD_OPTIONS -43 00331 #define CL_INVALID_PROGRAM -44 00332 #define CL_INVALID_PROGRAM_EXECUTABLE -45 00333 #define CL_INVALID_KERNEL_NAME -46 00334 #define CL_INVALID_KERNEL_DEFINITION -47 00335 #define CL_INVALID_KERNEL -48 00336 #define CL_INVALID_ARG_INDEX -49 00337 #define CL_INVALID_ARG_VALUE -50 00338 #define CL_INVALID_ARG_SIZE -51 00339 #define CL_INVALID_KERNEL_ARGS -52 00340 #define CL_INVALID_WORK_DIMENSION -53 00341 #define CL_INVALID_WORK_GROUP_SIZE -54 00342 #define CL_INVALID_WORK_ITEM_SIZE -55 00343 #define CL_INVALID_GLOBAL_OFFSET -56 00344 #define CL_INVALID_EVENT_WAIT_LIST -57 00345 #define CL_INVALID_EVENT -58 00346 #define CL_INVALID_OPERATION -59 00347 #define CL_INVALID_GL_OBJECT -60 00348 #define CL_INVALID_BUFFER_SIZE -61 00349 #define CL_INVALID_MIP_LEVEL -62 00350 #define CL_INVALID_GLOBAL_WORK_SIZE -63 00351 #define CL_INVALID_PROPERTY -64 00352 #define CL_INVALID_IMAGE_DESCRIPTOR -65 00353 #define CL_INVALID_COMPILER_OPTIONS -66 00354 #define CL_INVALID_LINKER_OPTIONS -67 00355 #define CL_INVALID_DEVICE_PARTITION_COUNT -68 00356 00357 /*#define CL_VERSION_1_0 1 00358 #define CL_VERSION_1_1 1 00359 #define CL_VERSION_1_2 1*/ 00360 00361 #define CL_FALSE 0 00362 #define CL_TRUE 1 00363 #define CL_BLOCKING CL_TRUE 00364 #define CL_NON_BLOCKING CL_FALSE 00365 00366 #define CL_PLATFORM_PROFILE 0x0900 00367 #define CL_PLATFORM_VERSION 0x0901 00368 #define CL_PLATFORM_NAME 0x0902 00369 #define CL_PLATFORM_VENDOR 0x0903 00370 #define CL_PLATFORM_EXTENSIONS 0x0904 00371 00372 #define CL_DEVICE_TYPE_DEFAULT (1 << 0) 00373 #define CL_DEVICE_TYPE_CPU (1 << 1) 00374 #define CL_DEVICE_TYPE_GPU (1 << 2) 00375 #define CL_DEVICE_TYPE_ACCELERATOR (1 << 3) 00376 #define CL_DEVICE_TYPE_CUSTOM (1 << 4) 00377 #define CL_DEVICE_TYPE_ALL 0xFFFFFFFF 00378 #define CL_DEVICE_TYPE 0x1000 00379 #define CL_DEVICE_VENDOR_ID 0x1001 00380 #define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002 00381 #define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003 00382 #define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004 00383 #define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005 00384 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006 00385 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007 00386 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008 00387 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009 00388 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A 00389 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B 00390 #define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C 00391 #define CL_DEVICE_ADDRESS_BITS 0x100D 00392 #define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E 00393 #define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F 00394 #define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010 00395 #define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011 00396 #define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012 00397 #define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013 00398 #define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014 00399 #define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015 00400 #define CL_DEVICE_IMAGE_SUPPORT 0x1016 00401 #define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017 00402 #define CL_DEVICE_MAX_SAMPLERS 0x1018 00403 #define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019 00404 #define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A 00405 #define CL_DEVICE_SINGLE_FP_CONFIG 0x101B 00406 #define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C 00407 #define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D 00408 #define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E 00409 #define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F 00410 #define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020 00411 #define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021 00412 #define CL_DEVICE_LOCAL_MEM_TYPE 0x1022 00413 #define CL_DEVICE_LOCAL_MEM_SIZE 0x1023 00414 #define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024 00415 #define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025 00416 #define CL_DEVICE_ENDIAN_LITTLE 0x1026 00417 #define CL_DEVICE_AVAILABLE 0x1027 00418 #define CL_DEVICE_COMPILER_AVAILABLE 0x1028 00419 #define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029 00420 #define CL_DEVICE_QUEUE_PROPERTIES 0x102A 00421 #define CL_DEVICE_NAME 0x102B 00422 #define CL_DEVICE_VENDOR 0x102C 00423 #define CL_DRIVER_VERSION 0x102D 00424 #define CL_DEVICE_PROFILE 0x102E 00425 #define CL_DEVICE_VERSION 0x102F 00426 #define CL_DEVICE_EXTENSIONS 0x1030 00427 #define CL_DEVICE_PLATFORM 0x1031 00428 #define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032 00429 #define CL_DEVICE_HALF_FP_CONFIG 0x1033 00430 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034 00431 #define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035 00432 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036 00433 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037 00434 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038 00435 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039 00436 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A 00437 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B 00438 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C 00439 #define CL_DEVICE_OPENCL_C_VERSION 0x103D 00440 #define CL_DEVICE_LINKER_AVAILABLE 0x103E 00441 #define CL_DEVICE_BUILT_IN_KERNELS 0x103F 00442 #define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040 00443 #define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041 00444 #define CL_DEVICE_PARENT_DEVICE 0x1042 00445 #define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043 00446 #define CL_DEVICE_PARTITION_PROPERTIES 0x1044 00447 #define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045 00448 #define CL_DEVICE_PARTITION_TYPE 0x1046 00449 #define CL_DEVICE_REFERENCE_COUNT 0x1047 00450 #define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048 00451 #define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049 00452 #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A 00453 #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B 00454 00455 #define CL_FP_DENORM (1 << 0) 00456 #define CL_FP_INF_NAN (1 << 1) 00457 #define CL_FP_ROUND_TO_NEAREST (1 << 2) 00458 #define CL_FP_ROUND_TO_ZERO (1 << 3) 00459 #define CL_FP_ROUND_TO_INF (1 << 4) 00460 #define CL_FP_FMA (1 << 5) 00461 #define CL_FP_SOFT_FLOAT (1 << 6) 00462 #define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7) 00463 00464 #define CL_NONE 0x0 00465 #define CL_READ_ONLY_CACHE 0x1 00466 #define CL_READ_WRITE_CACHE 0x2 00467 #define CL_LOCAL 0x1 00468 #define CL_GLOBAL 0x2 00469 #define CL_EXEC_KERNEL (1 << 0) 00470 #define CL_EXEC_NATIVE_KERNEL (1 << 1) 00471 #define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0) 00472 #define CL_QUEUE_PROFILING_ENABLE (1 << 1) 00473 00474 #define CL_CONTEXT_REFERENCE_COUNT 0x1080 00475 #define CL_CONTEXT_DEVICES 0x1081 00476 #define CL_CONTEXT_PROPERTIES 0x1082 00477 #define CL_CONTEXT_NUM_DEVICES 0x1083 00478 #define CL_CONTEXT_PLATFORM 0x1084 00479 #define CL_CONTEXT_INTEROP_USER_SYNC 0x1085 00480 00481 #define CL_DEVICE_PARTITION_EQUALLY 0x1086 00482 #define CL_DEVICE_PARTITION_BY_COUNTS 0x1087 00483 #define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0 00484 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088 00485 #define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0) 00486 #define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1) 00487 #define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2) 00488 #define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3) 00489 #define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4) 00490 #define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5) 00491 #define CL_QUEUE_CONTEXT 0x1090 00492 #define CL_QUEUE_DEVICE 0x1091 00493 #define CL_QUEUE_REFERENCE_COUNT 0x1092 00494 #define CL_QUEUE_PROPERTIES 0x1093 00495 #define CL_MEM_READ_WRITE (1 << 0) 00496 #define CL_MEM_WRITE_ONLY (1 << 1) 00497 #define CL_MEM_READ_ONLY (1 << 2) 00498 #define CL_MEM_USE_HOST_PTR (1 << 3) 00499 #define CL_MEM_ALLOC_HOST_PTR (1 << 4) 00500 #define CL_MEM_COPY_HOST_PTR (1 << 5) 00501 // reserved (1 << 6) 00502 #define CL_MEM_HOST_WRITE_ONLY (1 << 7) 00503 #define CL_MEM_HOST_READ_ONLY (1 << 8) 00504 #define CL_MEM_HOST_NO_ACCESS (1 << 9) 00505 #define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0) 00506 #define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1) 00507 00508 #define CL_R 0x10B0 00509 #define CL_A 0x10B1 00510 #define CL_RG 0x10B2 00511 #define CL_RA 0x10B3 00512 #define CL_RGB 0x10B4 00513 #define CL_RGBA 0x10B5 00514 #define CL_BGRA 0x10B6 00515 #define CL_ARGB 0x10B7 00516 #define CL_INTENSITY 0x10B8 00517 #define CL_LUMINANCE 0x10B9 00518 #define CL_Rx 0x10BA 00519 #define CL_RGx 0x10BB 00520 #define CL_RGBx 0x10BC 00521 #define CL_DEPTH 0x10BD 00522 #define CL_DEPTH_STENCIL 0x10BE 00523 00524 #define CL_SNORM_INT8 0x10D0 00525 #define CL_SNORM_INT16 0x10D1 00526 #define CL_UNORM_INT8 0x10D2 00527 #define CL_UNORM_INT16 0x10D3 00528 #define CL_UNORM_SHORT_565 0x10D4 00529 #define CL_UNORM_SHORT_555 0x10D5 00530 #define CL_UNORM_INT_101010 0x10D6 00531 #define CL_SIGNED_INT8 0x10D7 00532 #define CL_SIGNED_INT16 0x10D8 00533 #define CL_SIGNED_INT32 0x10D9 00534 #define CL_UNSIGNED_INT8 0x10DA 00535 #define CL_UNSIGNED_INT16 0x10DB 00536 #define CL_UNSIGNED_INT32 0x10DC 00537 #define CL_HALF_FLOAT 0x10DD 00538 #define CL_FLOAT 0x10DE 00539 #define CL_UNORM_INT24 0x10DF 00540 00541 #define CL_MEM_OBJECT_BUFFER 0x10F0 00542 #define CL_MEM_OBJECT_IMAGE2D 0x10F1 00543 #define CL_MEM_OBJECT_IMAGE3D 0x10F2 00544 #define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3 00545 #define CL_MEM_OBJECT_IMAGE1D 0x10F4 00546 #define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5 00547 #define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6 00548 00549 #define CL_MEM_TYPE 0x1100 00550 #define CL_MEM_FLAGS 0x1101 00551 #define CL_MEM_SIZE 0x1102 00552 #define CL_MEM_HOST_PTR 0x1103 00553 #define CL_MEM_MAP_COUNT 0x1104 00554 #define CL_MEM_REFERENCE_COUNT 0x1105 00555 #define CL_MEM_CONTEXT 0x1106 00556 #define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107 00557 #define CL_MEM_OFFSET 0x1108 00558 00559 #define CL_IMAGE_FORMAT 0x1110 00560 #define CL_IMAGE_ELEMENT_SIZE 0x1111 00561 #define CL_IMAGE_ROW_PITCH 0x1112 00562 #define CL_IMAGE_SLICE_PITCH 0x1113 00563 #define CL_IMAGE_WIDTH 0x1114 00564 #define CL_IMAGE_HEIGHT 0x1115 00565 #define CL_IMAGE_DEPTH 0x1116 00566 #define CL_IMAGE_ARRAY_SIZE 0x1117 00567 #define CL_IMAGE_BUFFER 0x1118 00568 #define CL_IMAGE_NUM_MIP_LEVELS 0x1119 00569 #define CL_IMAGE_NUM_SAMPLES 0x111A 00570 00571 #define CL_ADDRESS_NONE 0x1130 00572 #define CL_ADDRESS_CLAMP_TO_EDGE 0x1131 00573 #define CL_ADDRESS_CLAMP 0x1132 00574 #define CL_ADDRESS_REPEAT 0x1133 00575 #define CL_ADDRESS_MIRRORED_REPEAT 0x1134 00576 00577 #define CL_FILTER_NEAREST 0x1140 00578 #define CL_FILTER_LINEAR 0x1141 00579 00580 #define CL_SAMPLER_REFERENCE_COUNT 0x1150 00581 #define CL_SAMPLER_CONTEXT 0x1151 00582 #define CL_SAMPLER_NORMALIZED_COORDS 0x1152 00583 #define CL_SAMPLER_ADDRESSING_MODE 0x1153 00584 #define CL_SAMPLER_FILTER_MODE 0x1154 00585 00586 #define CL_MAP_READ (1 << 0) 00587 #define CL_MAP_WRITE (1 << 1) 00588 #define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2) 00589 00590 #define CL_PROGRAM_REFERENCE_COUNT 0x1160 00591 #define CL_PROGRAM_CONTEXT 0x1161 00592 #define CL_PROGRAM_NUM_DEVICES 0x1162 00593 #define CL_PROGRAM_DEVICES 0x1163 00594 #define CL_PROGRAM_SOURCE 0x1164 00595 #define CL_PROGRAM_BINARY_SIZES 0x1165 00596 #define CL_PROGRAM_BINARIES 0x1166 00597 #define CL_PROGRAM_NUM_KERNELS 0x1167 00598 #define CL_PROGRAM_KERNEL_NAMES 0x1168 00599 #define CL_PROGRAM_BUILD_STATUS 0x1181 00600 #define CL_PROGRAM_BUILD_OPTIONS 0x1182 00601 #define CL_PROGRAM_BUILD_LOG 0x1183 00602 #define CL_PROGRAM_BINARY_TYPE 0x1184 00603 #define CL_PROGRAM_BINARY_TYPE_NONE 0x0 00604 #define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1 00605 #define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2 00606 #define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4 00607 00608 #define CL_BUILD_SUCCESS 0 00609 #define CL_BUILD_NONE -1 00610 #define CL_BUILD_ERROR -2 00611 #define CL_BUILD_IN_PROGRESS -3 00612 00613 #define CL_KERNEL_FUNCTION_NAME 0x1190 00614 #define CL_KERNEL_NUM_ARGS 0x1191 00615 #define CL_KERNEL_REFERENCE_COUNT 0x1192 00616 #define CL_KERNEL_CONTEXT 0x1193 00617 #define CL_KERNEL_PROGRAM 0x1194 00618 #define CL_KERNEL_ATTRIBUTES 0x1195 00619 #define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196 00620 #define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197 00621 #define CL_KERNEL_ARG_TYPE_NAME 0x1198 00622 #define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199 00623 #define CL_KERNEL_ARG_NAME 0x119A 00624 #define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B 00625 #define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C 00626 #define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D 00627 #define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E 00628 #define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0 00629 #define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1 00630 #define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2 00631 #define CL_KERNEL_ARG_ACCESS_NONE 0x11A3 00632 #define CL_KERNEL_ARG_TYPE_NONE 0 00633 #define CL_KERNEL_ARG_TYPE_CONST (1 << 0) 00634 #define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1) 00635 #define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2) 00636 #define CL_KERNEL_WORK_GROUP_SIZE 0x11B0 00637 #define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1 00638 #define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2 00639 #define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3 00640 #define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4 00641 #define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5 00642 00643 #define CL_EVENT_COMMAND_QUEUE 0x11D0 00644 #define CL_EVENT_COMMAND_TYPE 0x11D1 00645 #define CL_EVENT_REFERENCE_COUNT 0x11D2 00646 #define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3 00647 #define CL_EVENT_CONTEXT 0x11D4 00648 00649 #define CL_COMMAND_NDRANGE_KERNEL 0x11F0 00650 #define CL_COMMAND_TASK 0x11F1 00651 #define CL_COMMAND_NATIVE_KERNEL 0x11F2 00652 #define CL_COMMAND_READ_BUFFER 0x11F3 00653 #define CL_COMMAND_WRITE_BUFFER 0x11F4 00654 #define CL_COMMAND_COPY_BUFFER 0x11F5 00655 #define CL_COMMAND_READ_IMAGE 0x11F6 00656 #define CL_COMMAND_WRITE_IMAGE 0x11F7 00657 #define CL_COMMAND_COPY_IMAGE 0x11F8 00658 #define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9 00659 #define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA 00660 #define CL_COMMAND_MAP_BUFFER 0x11FB 00661 #define CL_COMMAND_MAP_IMAGE 0x11FC 00662 #define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD 00663 #define CL_COMMAND_MARKER 0x11FE 00664 #define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF 00665 #define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200 00666 #define CL_COMMAND_READ_BUFFER_RECT 0x1201 00667 #define CL_COMMAND_WRITE_BUFFER_RECT 0x1202 00668 #define CL_COMMAND_COPY_BUFFER_RECT 0x1203 00669 #define CL_COMMAND_USER 0x1204 00670 #define CL_COMMAND_BARRIER 0x1205 00671 #define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206 00672 #define CL_COMMAND_FILL_BUFFER 0x1207 00673 #define CL_COMMAND_FILL_IMAGE 0x1208 00674 00675 #define CL_COMPLETE 0x0 00676 #define CL_RUNNING 0x1 00677 #define CL_SUBMITTED 0x2 00678 #define CL_QUEUED 0x3 00679 #define CL_BUFFER_CREATE_TYPE_REGION 0x1220 00680 00681 #define CL_PROFILING_COMMAND_QUEUED 0x1280 00682 #define CL_PROFILING_COMMAND_SUBMIT 0x1281 00683 #define CL_PROFILING_COMMAND_START 0x1282 00684 #define CL_PROFILING_COMMAND_END 0x1283 00685 00686 #define CL_CALLBACK CV_STDCALL 00687 00688 static volatile bool g_haveOpenCL = false; 00689 static const char* oclFuncToCheck = "clEnqueueReadBufferRect"; 00690 00691 #if defined(__APPLE__) 00692 #include <dlfcn.h> 00693 00694 static void* initOpenCLAndLoad(const char* funcname) 00695 { 00696 static bool initialized = false; 00697 static void* handle = 0; 00698 if (!handle) 00699 { 00700 if(!initialized) 00701 { 00702 const char* oclpath = getenv("OPENCV_OPENCL_RUNTIME"); 00703 oclpath = oclpath && strlen(oclpath) > 0 ? oclpath : 00704 "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL"; 00705 handle = dlopen(oclpath, RTLD_LAZY); 00706 initialized = true; 00707 g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0; 00708 if( g_haveOpenCL ) 00709 fprintf(stderr, "Successfully loaded OpenCL v1.1+ runtime from %s\n", oclpath); 00710 else 00711 fprintf(stderr, "Failed to load OpenCL runtime\n"); 00712 } 00713 if(!handle) 00714 return 0; 00715 } 00716 00717 return funcname && handle ? dlsym(handle, funcname) : 0; 00718 } 00719 00720 #elif defined WIN32 || defined _WIN32 00721 00722 #ifndef _WIN32_WINNT // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?) 00723 #define _WIN32_WINNT 0x0400 // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx 00724 #endif 00725 #include <windows.h> 00726 #if (_WIN32_WINNT >= 0x0602) 00727 #include <synchapi.h> 00728 #endif 00729 #undef small 00730 #undef min 00731 #undef max 00732 #undef abs 00733 00734 static void* initOpenCLAndLoad(const char* funcname) 00735 { 00736 static bool initialized = false; 00737 static HMODULE handle = 0; 00738 if (!handle) 00739 { 00740 #ifndef WINRT 00741 if(!initialized) 00742 { 00743 handle = LoadLibraryA("OpenCL.dll"); 00744 initialized = true; 00745 g_haveOpenCL = handle != 0 && GetProcAddress(handle, oclFuncToCheck) != 0; 00746 } 00747 #endif 00748 if(!handle) 00749 return 0; 00750 } 00751 00752 return funcname ? (void*)GetProcAddress(handle, funcname) : 0; 00753 } 00754 00755 #elif defined(__linux) 00756 00757 #include <dlfcn.h> 00758 #include <stdio.h> 00759 00760 static void* initOpenCLAndLoad(const char* funcname) 00761 { 00762 static bool initialized = false; 00763 static void* handle = 0; 00764 if (!handle) 00765 { 00766 if(!initialized) 00767 { 00768 handle = dlopen("libOpenCL.so", RTLD_LAZY); 00769 if(!handle) 00770 handle = dlopen("libCL.so", RTLD_LAZY); 00771 initialized = true; 00772 g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0; 00773 } 00774 if(!handle) 00775 return 0; 00776 } 00777 00778 return funcname ? (void*)dlsym(handle, funcname) : 0; 00779 } 00780 00781 #else 00782 00783 static void* initOpenCLAndLoad(const char*) 00784 { 00785 return 0; 00786 } 00787 00788 #endif 00789 00790 00791 #define OCL_FUNC(rettype, funcname, argsdecl, args) \ 00792 typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \ 00793 static rettype funcname argsdecl \ 00794 { \ 00795 static funcname##_t funcname##_p = 0; \ 00796 if( !funcname##_p ) \ 00797 { \ 00798 funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \ 00799 if( !funcname##_p ) \ 00800 return OPENCV_CL_NOT_IMPLEMENTED; \ 00801 } \ 00802 return funcname##_p args; \ 00803 } 00804 00805 00806 #define OCL_FUNC_P(rettype, funcname, argsdecl, args) \ 00807 typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \ 00808 static rettype funcname argsdecl \ 00809 { \ 00810 static funcname##_t funcname##_p = 0; \ 00811 if( !funcname##_p ) \ 00812 { \ 00813 funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \ 00814 if( !funcname##_p ) \ 00815 { \ 00816 if( errcode_ret ) \ 00817 *errcode_ret = OPENCV_CL_NOT_IMPLEMENTED; \ 00818 return 0; \ 00819 } \ 00820 } \ 00821 return funcname##_p args; \ 00822 } 00823 00824 OCL_FUNC(cl_int, clGetPlatformIDs, 00825 (cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms), 00826 (num_entries, platforms, num_platforms)) 00827 00828 OCL_FUNC(cl_int, clGetPlatformInfo, 00829 (cl_platform_id platform, cl_platform_info param_name, 00830 size_t param_value_size, void * param_value, 00831 size_t * param_value_size_ret), 00832 (platform, param_name, param_value_size, param_value, param_value_size_ret)) 00833 00834 OCL_FUNC(cl_int, clGetDeviceInfo, 00835 (cl_device_id device, 00836 cl_device_info param_name, 00837 size_t param_value_size, 00838 void * param_value, 00839 size_t * param_value_size_ret), 00840 (device, param_name, param_value_size, param_value, param_value_size_ret)) 00841 00842 00843 OCL_FUNC(cl_int, clGetDeviceIDs, 00844 (cl_platform_id platform, 00845 cl_device_type device_type, 00846 cl_uint num_entries, 00847 cl_device_id * devices, 00848 cl_uint * num_devices), 00849 (platform, device_type, num_entries, devices, num_devices)) 00850 00851 OCL_FUNC_P(cl_context, clCreateContext, 00852 (const cl_context_properties * properties, 00853 cl_uint num_devices, 00854 const cl_device_id * devices, 00855 void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *), 00856 void * user_data, 00857 cl_int * errcode_ret), 00858 (properties, num_devices, devices, pfn_notify, user_data, errcode_ret)) 00859 00860 OCL_FUNC(cl_int, clReleaseContext, (cl_context context), (context)) 00861 00862 00863 OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context)) 00864 /* 00865 OCL_FUNC_P(cl_context, clCreateContextFromType, 00866 (const cl_context_properties * properties, 00867 cl_device_type device_type, 00868 void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *), 00869 void * user_data, 00870 cl_int * errcode_ret), 00871 (properties, device_type, pfn_notify, user_data, errcode_ret)) 00872 00873 OCL_FUNC(cl_int, clGetContextInfo, 00874 (cl_context context, 00875 cl_context_info param_name, 00876 size_t param_value_size, 00877 void * param_value, 00878 size_t * param_value_size_ret), 00879 (context, param_name, param_value_size, 00880 param_value, param_value_size_ret)) 00881 */ 00882 OCL_FUNC_P(cl_command_queue, clCreateCommandQueue, 00883 (cl_context context, 00884 cl_device_id device, 00885 cl_command_queue_properties properties, 00886 cl_int * errcode_ret), 00887 (context, device, properties, errcode_ret)) 00888 00889 OCL_FUNC(cl_int, clReleaseCommandQueue, (cl_command_queue command_queue), (command_queue)) 00890 00891 OCL_FUNC_P(cl_mem, clCreateBuffer, 00892 (cl_context context, 00893 cl_mem_flags flags, 00894 size_t size, 00895 void * host_ptr, 00896 cl_int * errcode_ret), 00897 (context, flags, size, host_ptr, errcode_ret)) 00898 00899 /* 00900 OCL_FUNC(cl_int, clRetainCommandQueue, (cl_command_queue command_queue), (command_queue)) 00901 00902 OCL_FUNC(cl_int, clGetCommandQueueInfo, 00903 (cl_command_queue command_queue, 00904 cl_command_queue_info param_name, 00905 size_t param_value_size, 00906 void * param_value, 00907 size_t * param_value_size_ret), 00908 (command_queue, param_name, param_value_size, param_value, param_value_size_ret)) 00909 00910 OCL_FUNC_P(cl_mem, clCreateSubBuffer, 00911 (cl_mem buffer, 00912 cl_mem_flags flags, 00913 cl_buffer_create_type buffer_create_type, 00914 const void * buffer_create_info, 00915 cl_int * errcode_ret), 00916 (buffer, flags, buffer_create_type, buffer_create_info, errcode_ret)) 00917 */ 00918 00919 OCL_FUNC_P(cl_mem, clCreateImage, 00920 (cl_context context, 00921 cl_mem_flags flags, 00922 const cl_image_format * image_format, 00923 const cl_image_desc * image_desc, 00924 void * host_ptr, 00925 cl_int * errcode_ret), 00926 (context, flags, image_format, image_desc, host_ptr, errcode_ret)) 00927 00928 OCL_FUNC_P(cl_mem, clCreateImage2D, 00929 (cl_context context, 00930 cl_mem_flags flags, 00931 const cl_image_format * image_format, 00932 size_t image_width, 00933 size_t image_height, 00934 size_t image_row_pitch, 00935 void * host_ptr, 00936 cl_int *errcode_ret), 00937 (context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret)) 00938 00939 OCL_FUNC(cl_int, clGetSupportedImageFormats, 00940 (cl_context context, 00941 cl_mem_flags flags, 00942 cl_mem_object_type image_type, 00943 cl_uint num_entries, 00944 cl_image_format * image_formats, 00945 cl_uint * num_image_formats), 00946 (context, flags, image_type, num_entries, image_formats, num_image_formats)) 00947 00948 00949 OCL_FUNC(cl_int, clGetMemObjectInfo, 00950 (cl_mem memobj, 00951 cl_mem_info param_name, 00952 size_t param_value_size, 00953 void * param_value, 00954 size_t * param_value_size_ret), 00955 (memobj, param_name, param_value_size, param_value, param_value_size_ret)) 00956 00957 OCL_FUNC(cl_int, clGetImageInfo, 00958 (cl_mem image, 00959 cl_image_info param_name, 00960 size_t param_value_size, 00961 void * param_value, 00962 size_t * param_value_size_ret), 00963 (image, param_name, param_value_size, param_value, param_value_size_ret)) 00964 00965 /* 00966 OCL_FUNC(cl_int, clCreateKernelsInProgram, 00967 (cl_program program, 00968 cl_uint num_kernels, 00969 cl_kernel * kernels, 00970 cl_uint * num_kernels_ret), 00971 (program, num_kernels, kernels, num_kernels_ret)) 00972 00973 OCL_FUNC(cl_int, clRetainKernel, (cl_kernel kernel), (kernel)) 00974 00975 OCL_FUNC(cl_int, clGetKernelArgInfo, 00976 (cl_kernel kernel, 00977 cl_uint arg_indx, 00978 cl_kernel_arg_info param_name, 00979 size_t param_value_size, 00980 void * param_value, 00981 size_t * param_value_size_ret), 00982 (kernel, arg_indx, param_name, param_value_size, param_value, param_value_size_ret)) 00983 00984 OCL_FUNC(cl_int, clEnqueueReadImage, 00985 (cl_command_queue command_queue, 00986 cl_mem image, 00987 cl_bool blocking_read, 00988 const size_t * origin[3], 00989 const size_t * region[3], 00990 size_t row_pitch, 00991 size_t slice_pitch, 00992 void * ptr, 00993 cl_uint num_events_in_wait_list, 00994 const cl_event * event_wait_list, 00995 cl_event * event), 00996 (command_queue, image, blocking_read, origin, region, 00997 row_pitch, slice_pitch, 00998 ptr, 00999 num_events_in_wait_list, 01000 event_wait_list, 01001 event)) 01002 01003 OCL_FUNC(cl_int, clEnqueueWriteImage, 01004 (cl_command_queue command_queue, 01005 cl_mem image, 01006 cl_bool blocking_write, 01007 const size_t * origin[3], 01008 const size_t * region[3], 01009 size_t input_row_pitch, 01010 size_t input_slice_pitch, 01011 const void * ptr, 01012 cl_uint num_events_in_wait_list, 01013 const cl_event * event_wait_list, 01014 cl_event * event), 01015 (command_queue, image, blocking_write, origin, region, input_row_pitch, 01016 input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event)) 01017 01018 OCL_FUNC(cl_int, clEnqueueFillImage, 01019 (cl_command_queue command_queue, 01020 cl_mem image, 01021 const void * fill_color, 01022 const size_t * origin[3], 01023 const size_t * region[3], 01024 cl_uint num_events_in_wait_list, 01025 const cl_event * event_wait_list, 01026 cl_event * event), 01027 (command_queue, image, fill_color, origin, region, 01028 num_events_in_wait_list, event_wait_list, event)) 01029 01030 OCL_FUNC(cl_int, clEnqueueCopyImage, 01031 (cl_command_queue command_queue, 01032 cl_mem src_image, 01033 cl_mem dst_image, 01034 const size_t * src_origin[3], 01035 const size_t * dst_origin[3], 01036 const size_t * region[3], 01037 cl_uint num_events_in_wait_list, 01038 const cl_event * event_wait_list, 01039 cl_event * event), 01040 (command_queue, src_image, dst_image, src_origin, dst_origin, 01041 region, num_events_in_wait_list, event_wait_list, event)) 01042 */ 01043 01044 OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer, 01045 (cl_command_queue command_queue, 01046 cl_mem src_image, 01047 cl_mem dst_buffer, 01048 const size_t * src_origin, 01049 const size_t * region, 01050 size_t dst_offset, 01051 cl_uint num_events_in_wait_list, 01052 const cl_event * event_wait_list, 01053 cl_event * event), 01054 (command_queue, src_image, dst_buffer, src_origin, region, dst_offset, 01055 num_events_in_wait_list, event_wait_list, event)) 01056 01057 OCL_FUNC(cl_int, clEnqueueCopyBufferToImage, 01058 (cl_command_queue command_queue, 01059 cl_mem src_buffer, 01060 cl_mem dst_image, 01061 size_t src_offset, 01062 const size_t dst_origin[3], 01063 const size_t region[3], 01064 cl_uint num_events_in_wait_list, 01065 const cl_event * event_wait_list, 01066 cl_event * event), 01067 (command_queue, src_buffer, dst_image, src_offset, dst_origin, 01068 region, num_events_in_wait_list, event_wait_list, event)) 01069 01070 OCL_FUNC(cl_int, clFlush, 01071 (cl_command_queue command_queue), 01072 (command_queue)) 01073 01074 /* 01075 OCL_FUNC_P(void*, clEnqueueMapImage, 01076 (cl_command_queue command_queue, 01077 cl_mem image, 01078 cl_bool blocking_map, 01079 cl_map_flags map_flags, 01080 const size_t * origin[3], 01081 const size_t * region[3], 01082 size_t * image_row_pitch, 01083 size_t * image_slice_pitch, 01084 cl_uint num_events_in_wait_list, 01085 const cl_event * event_wait_list, 01086 cl_event * event, 01087 cl_int * errcode_ret), 01088 (command_queue, image, blocking_map, map_flags, origin, region, 01089 image_row_pitch, image_slice_pitch, num_events_in_wait_list, 01090 event_wait_list, event, errcode_ret)) 01091 */ 01092 01093 /* 01094 OCL_FUNC(cl_int, clRetainProgram, (cl_program program), (program)) 01095 01096 OCL_FUNC(cl_int, clGetKernelInfo, 01097 (cl_kernel kernel, 01098 cl_kernel_info param_name, 01099 size_t param_value_size, 01100 void * param_value, 01101 size_t * param_value_size_ret), 01102 (kernel, param_name, param_value_size, param_value, param_value_size_ret)) 01103 01104 */ 01105 01106 OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj)) 01107 01108 OCL_FUNC(cl_int, clReleaseMemObject, (cl_mem memobj), (memobj)) 01109 01110 01111 OCL_FUNC_P(cl_program, clCreateProgramWithSource, 01112 (cl_context context, 01113 cl_uint count, 01114 const char ** strings, 01115 const size_t * lengths, 01116 cl_int * errcode_ret), 01117 (context, count, strings, lengths, errcode_ret)) 01118 01119 OCL_FUNC_P(cl_program, clCreateProgramWithBinary, 01120 (cl_context context, 01121 cl_uint num_devices, 01122 const cl_device_id * device_list, 01123 const size_t * lengths, 01124 const unsigned char ** binaries, 01125 cl_int * binary_status, 01126 cl_int * errcode_ret), 01127 (context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret)) 01128 01129 OCL_FUNC(cl_int, clReleaseProgram, (cl_program program), (program)) 01130 01131 OCL_FUNC(cl_int, clBuildProgram, 01132 (cl_program program, 01133 cl_uint num_devices, 01134 const cl_device_id * device_list, 01135 const char * options, 01136 void (CL_CALLBACK * pfn_notify)(cl_program, void *), 01137 void * user_data), 01138 (program, num_devices, device_list, options, pfn_notify, user_data)) 01139 01140 OCL_FUNC(cl_int, clGetProgramInfo, 01141 (cl_program program, 01142 cl_program_info param_name, 01143 size_t param_value_size, 01144 void * param_value, 01145 size_t * param_value_size_ret), 01146 (program, param_name, param_value_size, param_value, param_value_size_ret)) 01147 01148 OCL_FUNC(cl_int, clGetProgramBuildInfo, 01149 (cl_program program, 01150 cl_device_id device, 01151 cl_program_build_info param_name, 01152 size_t param_value_size, 01153 void * param_value, 01154 size_t * param_value_size_ret), 01155 (program, device, param_name, param_value_size, param_value, param_value_size_ret)) 01156 01157 OCL_FUNC_P(cl_kernel, clCreateKernel, 01158 (cl_program program, 01159 const char * kernel_name, 01160 cl_int * errcode_ret), 01161 (program, kernel_name, errcode_ret)) 01162 01163 OCL_FUNC(cl_int, clReleaseKernel, (cl_kernel kernel), (kernel)) 01164 01165 OCL_FUNC(cl_int, clSetKernelArg, 01166 (cl_kernel kernel, 01167 cl_uint arg_index, 01168 size_t arg_size, 01169 const void * arg_value), 01170 (kernel, arg_index, arg_size, arg_value)) 01171 01172 OCL_FUNC(cl_int, clGetKernelWorkGroupInfo, 01173 (cl_kernel kernel, 01174 cl_device_id device, 01175 cl_kernel_work_group_info param_name, 01176 size_t param_value_size, 01177 void * param_value, 01178 size_t * param_value_size_ret), 01179 (kernel, device, param_name, param_value_size, param_value, param_value_size_ret)) 01180 01181 OCL_FUNC(cl_int, clFinish, (cl_command_queue command_queue), (command_queue)) 01182 01183 OCL_FUNC(cl_int, clEnqueueReadBuffer, 01184 (cl_command_queue command_queue, 01185 cl_mem buffer, 01186 cl_bool blocking_read, 01187 size_t offset, 01188 size_t size, 01189 void * ptr, 01190 cl_uint num_events_in_wait_list, 01191 const cl_event * event_wait_list, 01192 cl_event * event), 01193 (command_queue, buffer, blocking_read, offset, size, ptr, 01194 num_events_in_wait_list, event_wait_list, event)) 01195 01196 OCL_FUNC(cl_int, clEnqueueReadBufferRect, 01197 (cl_command_queue command_queue, 01198 cl_mem buffer, 01199 cl_bool blocking_read, 01200 const size_t * buffer_offset, 01201 const size_t * host_offset, 01202 const size_t * region, 01203 size_t buffer_row_pitch, 01204 size_t buffer_slice_pitch, 01205 size_t host_row_pitch, 01206 size_t host_slice_pitch, 01207 void * ptr, 01208 cl_uint num_events_in_wait_list, 01209 const cl_event * event_wait_list, 01210 cl_event * event), 01211 (command_queue, buffer, blocking_read, buffer_offset, host_offset, region, buffer_row_pitch, 01212 buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, num_events_in_wait_list, 01213 event_wait_list, event)) 01214 01215 OCL_FUNC(cl_int, clEnqueueWriteBuffer, 01216 (cl_command_queue command_queue, 01217 cl_mem buffer, 01218 cl_bool blocking_write, 01219 size_t offset, 01220 size_t size, 01221 const void * ptr, 01222 cl_uint num_events_in_wait_list, 01223 const cl_event * event_wait_list, 01224 cl_event * event), 01225 (command_queue, buffer, blocking_write, offset, size, ptr, 01226 num_events_in_wait_list, event_wait_list, event)) 01227 01228 OCL_FUNC(cl_int, clEnqueueWriteBufferRect, 01229 (cl_command_queue command_queue, 01230 cl_mem buffer, 01231 cl_bool blocking_write, 01232 const size_t * buffer_offset, 01233 const size_t * host_offset, 01234 const size_t * region, 01235 size_t buffer_row_pitch, 01236 size_t buffer_slice_pitch, 01237 size_t host_row_pitch, 01238 size_t host_slice_pitch, 01239 const void * ptr, 01240 cl_uint num_events_in_wait_list, 01241 const cl_event * event_wait_list, 01242 cl_event * event), 01243 (command_queue, buffer, blocking_write, buffer_offset, host_offset, 01244 region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, 01245 host_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event)) 01246 01247 /*OCL_FUNC(cl_int, clEnqueueFillBuffer, 01248 (cl_command_queue command_queue, 01249 cl_mem buffer, 01250 const void * pattern, 01251 size_t pattern_size, 01252 size_t offset, 01253 size_t size, 01254 cl_uint num_events_in_wait_list, 01255 const cl_event * event_wait_list, 01256 cl_event * event), 01257 (command_queue, buffer, pattern, pattern_size, offset, size, 01258 num_events_in_wait_list, event_wait_list, event))*/ 01259 01260 OCL_FUNC(cl_int, clEnqueueCopyBuffer, 01261 (cl_command_queue command_queue, 01262 cl_mem src_buffer, 01263 cl_mem dst_buffer, 01264 size_t src_offset, 01265 size_t dst_offset, 01266 size_t size, 01267 cl_uint num_events_in_wait_list, 01268 const cl_event * event_wait_list, 01269 cl_event * event), 01270 (command_queue, src_buffer, dst_buffer, src_offset, dst_offset, 01271 size, num_events_in_wait_list, event_wait_list, event)) 01272 01273 OCL_FUNC(cl_int, clEnqueueCopyBufferRect, 01274 (cl_command_queue command_queue, 01275 cl_mem src_buffer, 01276 cl_mem dst_buffer, 01277 const size_t * src_origin, 01278 const size_t * dst_origin, 01279 const size_t * region, 01280 size_t src_row_pitch, 01281 size_t src_slice_pitch, 01282 size_t dst_row_pitch, 01283 size_t dst_slice_pitch, 01284 cl_uint num_events_in_wait_list, 01285 const cl_event * event_wait_list, 01286 cl_event * event), 01287 (command_queue, src_buffer, dst_buffer, src_origin, dst_origin, 01288 region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch, 01289 num_events_in_wait_list, event_wait_list, event)) 01290 01291 OCL_FUNC_P(void*, clEnqueueMapBuffer, 01292 (cl_command_queue command_queue, 01293 cl_mem buffer, 01294 cl_bool blocking_map, 01295 cl_map_flags map_flags, 01296 size_t offset, 01297 size_t size, 01298 cl_uint num_events_in_wait_list, 01299 const cl_event * event_wait_list, 01300 cl_event * event, 01301 cl_int * errcode_ret), 01302 (command_queue, buffer, blocking_map, map_flags, offset, size, 01303 num_events_in_wait_list, event_wait_list, event, errcode_ret)) 01304 01305 OCL_FUNC(cl_int, clEnqueueUnmapMemObject, 01306 (cl_command_queue command_queue, 01307 cl_mem memobj, 01308 void * mapped_ptr, 01309 cl_uint num_events_in_wait_list, 01310 const cl_event * event_wait_list, 01311 cl_event * event), 01312 (command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event)) 01313 01314 OCL_FUNC(cl_int, clEnqueueNDRangeKernel, 01315 (cl_command_queue command_queue, 01316 cl_kernel kernel, 01317 cl_uint work_dim, 01318 const size_t * global_work_offset, 01319 const size_t * global_work_size, 01320 const size_t * local_work_size, 01321 cl_uint num_events_in_wait_list, 01322 const cl_event * event_wait_list, 01323 cl_event * event), 01324 (command_queue, kernel, work_dim, global_work_offset, global_work_size, 01325 local_work_size, num_events_in_wait_list, event_wait_list, event)) 01326 01327 OCL_FUNC(cl_int, clEnqueueTask, 01328 (cl_command_queue command_queue, 01329 cl_kernel kernel, 01330 cl_uint num_events_in_wait_list, 01331 const cl_event * event_wait_list, 01332 cl_event * event), 01333 (command_queue, kernel, num_events_in_wait_list, event_wait_list, event)) 01334 01335 OCL_FUNC(cl_int, clSetEventCallback, 01336 (cl_event event, 01337 cl_int command_exec_callback_type , 01338 void (CL_CALLBACK *pfn_event_notify) (cl_event event, cl_int event_command_exec_status, void *user_data), 01339 void *user_data), 01340 (event, command_exec_callback_type, pfn_event_notify, user_data)) 01341 01342 OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event)) 01343 01344 } 01345 01346 #endif 01347 01348 #ifndef CL_VERSION_1_2 01349 #define CL_VERSION_1_2 01350 #endif 01351 01352 #endif // HAVE_OPENCL 01353 01354 #ifdef _DEBUG 01355 #define CV_OclDbgAssert CV_DbgAssert 01356 #else 01357 static bool isRaiseError() 01358 { 01359 static bool initialized = false; 01360 static bool value = false; 01361 if (!initialized) 01362 { 01363 value = getBoolParameter("OPENCV_OPENCL_RAISE_ERROR", false); 01364 initialized = true; 01365 } 01366 return value; 01367 } 01368 #define CV_OclDbgAssert(expr) do { if (isRaiseError()) { CV_Assert(expr); } else { (void)(expr); } } while ((void)0, 0) 01369 #endif 01370 01371 #ifdef HAVE_OPENCL_SVM 01372 #include "opencv2/core/opencl/runtime/opencl_svm_20.hpp" 01373 #include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp" 01374 #include "opencv2/core/opencl/opencl_svm.hpp" 01375 #endif 01376 01377 namespace cv { namespace ocl { 01378 01379 struct UMat2D 01380 { 01381 UMat2D(const UMat& m) 01382 { 01383 offset = (int)m.offset; 01384 step = (int)m.step; 01385 rows = m.rows; 01386 cols = m.cols; 01387 } 01388 int offset; 01389 int step; 01390 int rows; 01391 int cols; 01392 }; 01393 01394 struct UMat3D 01395 { 01396 UMat3D(const UMat& m) 01397 { 01398 offset = (int)m.offset; 01399 step = (int)m.step.p[1]; 01400 slicestep = (int)m.step.p[0]; 01401 slices = (int)m.size.p[0]; 01402 rows = m.size.p[1]; 01403 cols = m.size.p[2]; 01404 } 01405 int offset; 01406 int slicestep; 01407 int step; 01408 int slices; 01409 int rows; 01410 int cols; 01411 }; 01412 01413 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182 01414 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 ) 01415 { 01416 static uint64 table[256]; 01417 static bool initialized = false; 01418 01419 if( !initialized ) 01420 { 01421 for( int i = 0; i < 256; i++ ) 01422 { 01423 uint64 c = i; 01424 for( int j = 0; j < 8; j++ ) 01425 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1); 01426 table[i] = c; 01427 } 01428 initialized = true; 01429 } 01430 01431 uint64 crc = ~crc0; 01432 for( size_t idx = 0; idx < size; idx++ ) 01433 crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8); 01434 01435 return ~crc; 01436 } 01437 01438 struct HashKey 01439 { 01440 typedef uint64 part; 01441 HashKey(part _a, part _b) : a(_a), b(_b) {} 01442 part a, b; 01443 }; 01444 01445 inline bool operator == (const HashKey& h1, const HashKey& h2) 01446 { 01447 return h1.a == h2.a && h1.b == h2.b; 01448 } 01449 01450 inline bool operator < (const HashKey& h1, const HashKey& h2) 01451 { 01452 return h1.a < h2.a || (h1.a == h2.a && h1.b < h2.b); 01453 } 01454 01455 01456 bool haveOpenCL() 01457 { 01458 #ifdef HAVE_OPENCL 01459 static bool g_isOpenCLInitialized = false; 01460 static bool g_isOpenCLAvailable = false; 01461 01462 if (!g_isOpenCLInitialized) 01463 { 01464 try 01465 { 01466 cl_uint n = 0; 01467 g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS; 01468 } 01469 catch (...) 01470 { 01471 g_isOpenCLAvailable = false; 01472 } 01473 g_isOpenCLInitialized = true; 01474 } 01475 return g_isOpenCLAvailable; 01476 #else 01477 return false; 01478 #endif 01479 } 01480 01481 bool useOpenCL() 01482 { 01483 CoreTLSData* data = getCoreTlsData().get(); 01484 if( data->useOpenCL < 0 ) 01485 { 01486 //try 01487 // { 01488 data->useOpenCL = (int)haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available(); 01489 //} 01490 // catch (...) 01491 // { 01492 // data->useOpenCL = 0; 01493 // } 01494 } 01495 return data->useOpenCL > 0; 01496 } 01497 01498 void setUseOpenCL(bool flag) 01499 { 01500 if( haveOpenCL() ) 01501 { 01502 CoreTLSData* data = getCoreTlsData().get(); 01503 data->useOpenCL = (flag && Device::getDefault().ptr() != NULL) ? 1 : 0; 01504 } 01505 } 01506 01507 #ifdef HAVE_CLAMDBLAS 01508 01509 class AmdBlasHelper 01510 { 01511 public: 01512 static AmdBlasHelper & getInstance() 01513 { 01514 CV_SINGLETON_LAZY_INIT_REF(AmdBlasHelper, new AmdBlasHelper()) 01515 } 01516 01517 bool isAvailable() const 01518 { 01519 return g_isAmdBlasAvailable; 01520 } 01521 01522 ~AmdBlasHelper() 01523 { 01524 try 01525 { 01526 clAmdBlasTeardown(); 01527 } 01528 catch (...) { } 01529 } 01530 01531 protected: 01532 AmdBlasHelper() 01533 { 01534 if (!g_isAmdBlasInitialized) 01535 { 01536 AutoLock lock(getInitializationMutex()); 01537 01538 if (!g_isAmdBlasInitialized) 01539 { 01540 if (haveOpenCL()) 01541 { 01542 try 01543 { 01544 g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess; 01545 } 01546 catch (...) 01547 { 01548 g_isAmdBlasAvailable = false; 01549 } 01550 } 01551 else 01552 g_isAmdBlasAvailable = false; 01553 01554 g_isAmdBlasInitialized = true; 01555 } 01556 } 01557 } 01558 01559 private: 01560 static bool g_isAmdBlasInitialized; 01561 static bool g_isAmdBlasAvailable; 01562 }; 01563 01564 bool AmdBlasHelper::g_isAmdBlasAvailable = false; 01565 bool AmdBlasHelper::g_isAmdBlasInitialized = false; 01566 01567 bool haveAmdBlas() 01568 { 01569 return AmdBlasHelper::getInstance().isAvailable(); 01570 } 01571 01572 #else 01573 01574 bool haveAmdBlas() 01575 { 01576 return false; 01577 } 01578 01579 #endif 01580 01581 #ifdef HAVE_CLAMDFFT 01582 01583 class AmdFftHelper 01584 { 01585 public: 01586 static AmdFftHelper & getInstance() 01587 { 01588 CV_SINGLETON_LAZY_INIT_REF(AmdFftHelper, new AmdFftHelper()) 01589 } 01590 01591 bool isAvailable() const 01592 { 01593 return g_isAmdFftAvailable; 01594 } 01595 01596 ~AmdFftHelper() 01597 { 01598 try 01599 { 01600 // clAmdFftTeardown(); 01601 } 01602 catch (...) { } 01603 } 01604 01605 protected: 01606 AmdFftHelper() 01607 { 01608 if (!g_isAmdFftInitialized) 01609 { 01610 AutoLock lock(getInitializationMutex()); 01611 01612 if (!g_isAmdFftInitialized) 01613 { 01614 if (haveOpenCL()) 01615 { 01616 try 01617 { 01618 cl_uint major, minor, patch; 01619 CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS); 01620 01621 // it throws exception in case AmdFft binaries are not found 01622 CV_Assert(clAmdFftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS); 01623 g_isAmdFftAvailable = true; 01624 } 01625 catch (const Exception &) 01626 { 01627 g_isAmdFftAvailable = false; 01628 } 01629 } 01630 else 01631 g_isAmdFftAvailable = false; 01632 01633 g_isAmdFftInitialized = true; 01634 } 01635 } 01636 } 01637 01638 private: 01639 static clAmdFftSetupData setupData; 01640 static bool g_isAmdFftInitialized; 01641 static bool g_isAmdFftAvailable; 01642 }; 01643 01644 clAmdFftSetupData AmdFftHelper::setupData; 01645 bool AmdFftHelper::g_isAmdFftAvailable = false; 01646 bool AmdFftHelper::g_isAmdFftInitialized = false; 01647 01648 bool haveAmdFft() 01649 { 01650 return AmdFftHelper::getInstance().isAvailable(); 01651 } 01652 01653 #else 01654 01655 bool haveAmdFft() 01656 { 01657 return false; 01658 } 01659 01660 #endif 01661 01662 bool haveSVM() 01663 { 01664 #ifdef HAVE_OPENCL_SVM 01665 return true; 01666 #else 01667 return false; 01668 #endif 01669 } 01670 01671 void finish() 01672 { 01673 Queue::getDefault().finish(); 01674 } 01675 01676 #define IMPLEMENT_REFCOUNTABLE() \ 01677 void addref() { CV_XADD(&refcount, 1); } \ 01678 void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \ 01679 int refcount 01680 01681 /////////////////////////////////////////// Platform ///////////////////////////////////////////// 01682 01683 struct Platform::Impl 01684 { 01685 Impl() 01686 { 01687 refcount = 1; 01688 handle = 0; 01689 initialized = false; 01690 } 01691 01692 ~Impl() {} 01693 01694 void init() 01695 { 01696 if( !initialized ) 01697 { 01698 //cl_uint num_entries 01699 cl_uint n = 0; 01700 if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 ) 01701 handle = 0; 01702 if( handle != 0 ) 01703 { 01704 char buf[1000]; 01705 size_t len = 0; 01706 CV_OclDbgAssert(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len) == CL_SUCCESS); 01707 buf[len] = '\0'; 01708 vendor = String(buf); 01709 } 01710 01711 initialized = true; 01712 } 01713 } 01714 01715 IMPLEMENT_REFCOUNTABLE(); 01716 01717 cl_platform_id handle; 01718 String vendor; 01719 bool initialized; 01720 }; 01721 01722 Platform::Platform() 01723 { 01724 p = 0; 01725 } 01726 01727 Platform::~Platform() 01728 { 01729 if(p) 01730 p->release(); 01731 } 01732 01733 Platform::Platform(const Platform& pl) 01734 { 01735 p = (Impl*)pl.p; 01736 if(p) 01737 p->addref(); 01738 } 01739 01740 Platform& Platform::operator = (const Platform& pl) 01741 { 01742 Impl* newp = (Impl*)pl.p; 01743 if(newp) 01744 newp->addref(); 01745 if(p) 01746 p->release(); 01747 p = newp; 01748 return *this; 01749 } 01750 01751 void* Platform::ptr() const 01752 { 01753 return p ? p->handle : 0; 01754 } 01755 01756 Platform& Platform::getDefault() 01757 { 01758 static Platform p; 01759 if( !p.p ) 01760 { 01761 p.p = new Impl; 01762 p.p->init(); 01763 } 01764 return p; 01765 } 01766 01767 /////////////////////////////////////// Device //////////////////////////////////////////// 01768 01769 // deviceVersion has format 01770 // OpenCL<space><major_version.minor_version><space><vendor-specific information> 01771 // by specification 01772 // http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html 01773 // http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html 01774 static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor) 01775 { 01776 major = minor = 0; 01777 if (10 >= deviceVersion.length()) 01778 return; 01779 const char *pstr = deviceVersion.c_str(); 01780 if (0 != strncmp(pstr, "OpenCL ", 7)) 01781 return; 01782 size_t ppos = deviceVersion.find('.', 7); 01783 if (String::npos == ppos) 01784 return; 01785 String temp = deviceVersion.substr(7, ppos - 7); 01786 major = atoi(temp.c_str()); 01787 temp = deviceVersion.substr(ppos + 1); 01788 minor = atoi(temp.c_str()); 01789 } 01790 01791 struct Device::Impl 01792 { 01793 Impl(void* d) 01794 { 01795 handle = (cl_device_id)d; 01796 refcount = 1; 01797 01798 name_ = getStrProp(CL_DEVICE_NAME); 01799 version_ = getStrProp(CL_DEVICE_VERSION); 01800 doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG); 01801 hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY); 01802 maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS); 01803 maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE); 01804 type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE); 01805 driverVersion_ = getStrProp(CL_DRIVER_VERSION); 01806 01807 String deviceVersion_ = getStrProp(CL_DEVICE_VERSION); 01808 parseDeviceVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_); 01809 01810 vendorName_ = getStrProp(CL_DEVICE_VENDOR); 01811 if (vendorName_ == "Advanced Micro Devices, Inc." || 01812 vendorName_ == "AMD") 01813 vendorID_ = VENDOR_AMD; 01814 else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0) 01815 vendorID_ = VENDOR_INTEL; 01816 else if (vendorName_ == "NVIDIA Corporation") 01817 vendorID_ = VENDOR_NVIDIA; 01818 else 01819 vendorID_ = UNKNOWN_VENDOR; 01820 } 01821 01822 template<typename _TpCL, typename _TpOut> 01823 _TpOut getProp(cl_device_info prop) const 01824 { 01825 _TpCL temp=_TpCL(); 01826 size_t sz = 0; 01827 01828 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS && 01829 sz == sizeof(temp) ? _TpOut(temp) : _TpOut(); 01830 } 01831 01832 bool getBoolProp(cl_device_info prop) const 01833 { 01834 cl_bool temp = CL_FALSE; 01835 size_t sz = 0; 01836 01837 return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS && 01838 sz == sizeof(temp) ? temp != 0 : false; 01839 } 01840 01841 String getStrProp(cl_device_info prop) const 01842 { 01843 char buf[1024]; 01844 size_t sz=0; 01845 return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS && 01846 sz < sizeof(buf) ? String(buf) : String(); 01847 } 01848 01849 IMPLEMENT_REFCOUNTABLE(); 01850 cl_device_id handle; 01851 01852 String name_; 01853 String version_; 01854 int doubleFPConfig_; 01855 bool hostUnifiedMemory_; 01856 int maxComputeUnits_; 01857 size_t maxWorkGroupSize_; 01858 int type_; 01859 int deviceVersionMajor_; 01860 int deviceVersionMinor_; 01861 String driverVersion_; 01862 String vendorName_; 01863 int vendorID_; 01864 }; 01865 01866 01867 Device::Device() 01868 { 01869 p = 0; 01870 } 01871 01872 Device::Device(void* d) 01873 { 01874 p = 0; 01875 set(d); 01876 } 01877 01878 Device::Device(const Device& d) 01879 { 01880 p = d.p; 01881 if(p) 01882 p->addref(); 01883 } 01884 01885 Device& Device::operator = (const Device& d) 01886 { 01887 Impl* newp = (Impl*)d.p; 01888 if(newp) 01889 newp->addref(); 01890 if(p) 01891 p->release(); 01892 p = newp; 01893 return *this; 01894 } 01895 01896 Device::~Device() 01897 { 01898 if(p) 01899 p->release(); 01900 } 01901 01902 void Device::set(void* d) 01903 { 01904 if(p) 01905 p->release(); 01906 p = new Impl(d); 01907 } 01908 01909 void* Device::ptr() const 01910 { 01911 return p ? p->handle : 0; 01912 } 01913 01914 String Device::name() const 01915 { return p ? p->name_ : String(); } 01916 01917 String Device::extensions() const 01918 { return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); } 01919 01920 String Device::version() const 01921 { return p ? p->version_ : String(); } 01922 01923 String Device::vendorName() const 01924 { return p ? p->vendorName_ : String(); } 01925 01926 int Device::vendorID() const 01927 { return p ? p->vendorID_ : 0; } 01928 01929 String Device::OpenCL_C_Version() const 01930 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); } 01931 01932 String Device::OpenCLVersion() const 01933 { return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); } 01934 01935 int Device::deviceVersionMajor() const 01936 { return p ? p->deviceVersionMajor_ : 0; } 01937 01938 int Device::deviceVersionMinor() const 01939 { return p ? p->deviceVersionMinor_ : 0; } 01940 01941 String Device::driverVersion() const 01942 { return p ? p->driverVersion_ : String(); } 01943 01944 int Device::type() const 01945 { return p ? p->type_ : 0; } 01946 01947 int Device::addressBits() const 01948 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS) : 0; } 01949 01950 bool Device::available() const 01951 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; } 01952 01953 bool Device::compilerAvailable() const 01954 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; } 01955 01956 bool Device::linkerAvailable() const 01957 #ifdef CL_VERSION_1_2 01958 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; } 01959 #else 01960 { CV_REQUIRE_OPENCL_1_2_ERROR; } 01961 #endif 01962 01963 int Device::doubleFPConfig() const 01964 { return p ? p->doubleFPConfig_ : 0; } 01965 01966 int Device::singleFPConfig() const 01967 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; } 01968 01969 int Device::halfFPConfig() const 01970 #ifdef CL_VERSION_1_2 01971 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; } 01972 #else 01973 { CV_REQUIRE_OPENCL_1_2_ERROR; } 01974 #endif 01975 01976 bool Device::endianLittle() const 01977 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; } 01978 01979 bool Device::errorCorrectionSupport() const 01980 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; } 01981 01982 int Device::executionCapabilities() const 01983 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; } 01984 01985 size_t Device::globalMemCacheSize() const 01986 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; } 01987 01988 int Device::globalMemCacheType() const 01989 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; } 01990 01991 int Device::globalMemCacheLineSize() const 01992 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; } 01993 01994 size_t Device::globalMemSize() const 01995 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; } 01996 01997 size_t Device::localMemSize() const 01998 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; } 01999 02000 int Device::localMemType() const 02001 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; } 02002 02003 bool Device::hostUnifiedMemory() const 02004 { return p ? p->hostUnifiedMemory_ : false; } 02005 02006 bool Device::imageSupport() const 02007 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; } 02008 02009 bool Device::imageFromBufferSupport() const 02010 { 02011 bool ret = false; 02012 if (p) 02013 { 02014 size_t pos = p->getStrProp(CL_DEVICE_EXTENSIONS).find("cl_khr_image2d_from_buffer"); 02015 if (pos != String::npos) 02016 { 02017 ret = true; 02018 } 02019 } 02020 return ret; 02021 } 02022 02023 uint Device::imagePitchAlignment() const 02024 { 02025 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT 02026 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0; 02027 #else 02028 return 0; 02029 #endif 02030 } 02031 02032 uint Device::imageBaseAddressAlignment() const 02033 { 02034 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 02035 return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0; 02036 #else 02037 return 0; 02038 #endif 02039 } 02040 02041 size_t Device::image2DMaxWidth() const 02042 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; } 02043 02044 size_t Device::image2DMaxHeight() const 02045 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; } 02046 02047 size_t Device::image3DMaxWidth() const 02048 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; } 02049 02050 size_t Device::image3DMaxHeight() const 02051 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; } 02052 02053 size_t Device::image3DMaxDepth() const 02054 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; } 02055 02056 size_t Device::imageMaxBufferSize() const 02057 #ifdef CL_VERSION_1_2 02058 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; } 02059 #else 02060 { CV_REQUIRE_OPENCL_1_2_ERROR; } 02061 #endif 02062 02063 size_t Device::imageMaxArraySize() const 02064 #ifdef CL_VERSION_1_2 02065 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; } 02066 #else 02067 { CV_REQUIRE_OPENCL_1_2_ERROR; } 02068 #endif 02069 02070 int Device::maxClockFrequency() const 02071 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; } 02072 02073 int Device::maxComputeUnits() const 02074 { return p ? p->maxComputeUnits_ : 0; } 02075 02076 int Device::maxConstantArgs() const 02077 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; } 02078 02079 size_t Device::maxConstantBufferSize() const 02080 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; } 02081 02082 size_t Device::maxMemAllocSize() const 02083 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; } 02084 02085 size_t Device::maxParameterSize() const 02086 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; } 02087 02088 int Device::maxReadImageArgs() const 02089 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; } 02090 02091 int Device::maxWriteImageArgs() const 02092 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; } 02093 02094 int Device::maxSamplers() const 02095 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; } 02096 02097 size_t Device::maxWorkGroupSize() const 02098 { return p ? p->maxWorkGroupSize_ : 0; } 02099 02100 int Device::maxWorkItemDims() const 02101 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; } 02102 02103 void Device::maxWorkItemSizes(size_t* sizes) const 02104 { 02105 if(p) 02106 { 02107 const int MAX_DIMS = 32; 02108 size_t retsz = 0; 02109 CV_OclDbgAssert(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES, 02110 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz) == CL_SUCCESS); 02111 } 02112 } 02113 02114 int Device::memBaseAddrAlign() const 02115 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; } 02116 02117 int Device::nativeVectorWidthChar() const 02118 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; } 02119 02120 int Device::nativeVectorWidthShort() const 02121 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; } 02122 02123 int Device::nativeVectorWidthInt() const 02124 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; } 02125 02126 int Device::nativeVectorWidthLong() const 02127 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; } 02128 02129 int Device::nativeVectorWidthFloat() const 02130 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; } 02131 02132 int Device::nativeVectorWidthDouble() const 02133 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; } 02134 02135 int Device::nativeVectorWidthHalf() const 02136 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; } 02137 02138 int Device::preferredVectorWidthChar() const 02139 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; } 02140 02141 int Device::preferredVectorWidthShort() const 02142 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; } 02143 02144 int Device::preferredVectorWidthInt() const 02145 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; } 02146 02147 int Device::preferredVectorWidthLong() const 02148 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; } 02149 02150 int Device::preferredVectorWidthFloat() const 02151 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; } 02152 02153 int Device::preferredVectorWidthDouble() const 02154 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; } 02155 02156 int Device::preferredVectorWidthHalf() const 02157 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; } 02158 02159 size_t Device::printfBufferSize() const 02160 #ifdef CL_VERSION_1_2 02161 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; } 02162 #else 02163 { CV_REQUIRE_OPENCL_1_2_ERROR; } 02164 #endif 02165 02166 02167 size_t Device::profilingTimerResolution() const 02168 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; } 02169 02170 const Device& Device::getDefault() 02171 { 02172 const Context& ctx = Context::getDefault(); 02173 int idx = getCoreTlsData().get()->device; 02174 const Device& device = ctx.device(idx); 02175 return device; 02176 } 02177 02178 ////////////////////////////////////// Context /////////////////////////////////////////////////// 02179 02180 template <typename Functor, typename ObjectType> 02181 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param) 02182 { 02183 ::size_t required; 02184 cl_int err = f(obj, name, 0, NULL, &required); 02185 if (err != CL_SUCCESS) 02186 return err; 02187 02188 param.clear(); 02189 if (required > 0) 02190 { 02191 AutoBuffer<char> buf(required + 1); 02192 char* ptr = (char*)buf; // cleanup is not needed 02193 err = f(obj, name, required, ptr, NULL); 02194 if (err != CL_SUCCESS) 02195 return err; 02196 param = ptr; 02197 } 02198 02199 return CL_SUCCESS; 02200 } 02201 02202 static void split(const std::string &s, char delim, std::vector<std::string> &elems) 02203 { 02204 elems.clear(); 02205 if (s.size() == 0) 02206 return; 02207 std::istringstream ss(s); 02208 std::string item; 02209 while (!ss.eof()) 02210 { 02211 std::getline(ss, item, delim); 02212 elems.push_back(item); 02213 } 02214 } 02215 02216 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName> 02217 // Sample: AMD:GPU: 02218 // Sample: AMD:GPU:Tahiti 02219 // Sample: :GPU|CPU: = '' = ':' = '::' 02220 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr, 02221 std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID) 02222 { 02223 std::vector<std::string> parts; 02224 split(configurationStr, ':', parts); 02225 if (parts.size() > 3) 02226 { 02227 std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl; 02228 return false; 02229 } 02230 if (parts.size() > 2) 02231 deviceNameOrID = parts[2]; 02232 if (parts.size() > 1) 02233 { 02234 split(parts[1], '|', deviceTypes); 02235 } 02236 if (parts.size() > 0) 02237 { 02238 platform = parts[0]; 02239 } 02240 return true; 02241 } 02242 02243 #ifdef WINRT 02244 static cl_device_id selectOpenCLDevice() 02245 { 02246 return NULL; 02247 } 02248 #else 02249 static cl_device_id selectOpenCLDevice() 02250 { 02251 std::string platform, deviceName; 02252 std::vector<std::string> deviceTypes; 02253 02254 const char* configuration = getenv("OPENCV_OPENCL_DEVICE"); 02255 if (configuration && 02256 (strcmp(configuration, "disabled") == 0 || 02257 !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName) 02258 )) 02259 return NULL; 02260 02261 bool isID = false; 02262 int deviceID = -1; 02263 if (deviceName.length() == 1) 02264 // We limit ID range to 0..9, because we want to write: 02265 // - '2500' to mean i5-2500 02266 // - '8350' to mean AMD FX-8350 02267 // - '650' to mean GeForce 650 02268 // To extend ID range change condition to '> 0' 02269 { 02270 isID = true; 02271 for (size_t i = 0; i < deviceName.length(); i++) 02272 { 02273 if (!isdigit(deviceName[i])) 02274 { 02275 isID = false; 02276 break; 02277 } 02278 } 02279 if (isID) 02280 { 02281 deviceID = atoi(deviceName.c_str()); 02282 if (deviceID < 0) 02283 return NULL; 02284 } 02285 } 02286 02287 std::vector<cl_platform_id> platforms; 02288 { 02289 cl_uint numPlatforms = 0; 02290 CV_OclDbgAssert(clGetPlatformIDs(0, NULL, &numPlatforms) == CL_SUCCESS); 02291 02292 if (numPlatforms == 0) 02293 return NULL; 02294 platforms.resize((size_t)numPlatforms); 02295 CV_OclDbgAssert(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms) == CL_SUCCESS); 02296 platforms.resize(numPlatforms); 02297 } 02298 02299 int selectedPlatform = -1; 02300 if (platform.length() > 0) 02301 { 02302 for (size_t i = 0; i < platforms.size(); i++) 02303 { 02304 std::string name; 02305 CV_OclDbgAssert(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name) == CL_SUCCESS); 02306 if (name.find(platform) != std::string::npos) 02307 { 02308 selectedPlatform = (int)i; 02309 break; 02310 } 02311 } 02312 if (selectedPlatform == -1) 02313 { 02314 std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl; 02315 goto not_found; 02316 } 02317 } 02318 if (deviceTypes.size() == 0) 02319 { 02320 if (!isID) 02321 { 02322 deviceTypes.push_back("GPU"); 02323 if (configuration) 02324 deviceTypes.push_back("CPU"); 02325 } 02326 else 02327 deviceTypes.push_back("ALL"); 02328 } 02329 for (size_t t = 0; t < deviceTypes.size(); t++) 02330 { 02331 int deviceType = 0; 02332 std::string tempStrDeviceType = deviceTypes[t]; 02333 #ifdef HAVE_OPENCL 02334 std::transform( tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), tolower ); 02335 #endif 02336 02337 if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu") 02338 deviceType = Device::TYPE_GPU; 02339 else if (tempStrDeviceType == "cpu") 02340 deviceType = Device::TYPE_CPU; 02341 else if (tempStrDeviceType == "accelerator") 02342 deviceType = Device::TYPE_ACCELERATOR; 02343 else if (tempStrDeviceType == "all") 02344 deviceType = Device::TYPE_ALL; 02345 else 02346 { 02347 std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl; 02348 goto not_found; 02349 } 02350 02351 std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup 02352 for (int i = selectedPlatform >= 0 ? selectedPlatform : 0; 02353 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size()); 02354 i++) 02355 { 02356 cl_uint count = 0; 02357 cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count); 02358 CV_OclDbgAssert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND); 02359 if (count == 0) 02360 continue; 02361 size_t base = devices.size(); 02362 devices.resize(base + count); 02363 status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count); 02364 CV_OclDbgAssert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND); 02365 } 02366 02367 for (size_t i = (isID ? deviceID : 0); 02368 (isID ? (i == (size_t)deviceID) : true) && (i < devices.size()); 02369 i++) 02370 { 02371 std::string name; 02372 CV_OclDbgAssert(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name) == CL_SUCCESS); 02373 cl_bool useGPU = true; 02374 if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu") 02375 { 02376 cl_bool isIGPU = CL_FALSE; 02377 clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL); 02378 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU; 02379 } 02380 if ( (isID || name.find(deviceName) != std::string::npos) && useGPU) 02381 { 02382 // TODO check for OpenCL 1.1 02383 return devices[i]; 02384 } 02385 } 02386 } 02387 02388 not_found: 02389 if (!configuration) 02390 return NULL; // suppress messages on stderr 02391 02392 std::cerr << "ERROR: Requested OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl 02393 << " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl 02394 << " Device types: "; 02395 for (size_t t = 0; t < deviceTypes.size(); t++) 02396 std::cerr << deviceTypes[t] << " "; 02397 02398 std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl; 02399 return NULL; 02400 } 02401 #endif 02402 02403 #ifdef HAVE_OPENCL_SVM 02404 namespace svm { 02405 02406 enum AllocatorFlags { // don't use first 16 bits 02407 OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap 02408 OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc 02409 OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access 02410 OPENCL_SVM_BUFFER_MASK = 3 << 16, 02411 OPENCL_SVM_BUFFER_MAP = 4 << 16 02412 }; 02413 02414 static bool checkForceSVMUmatUsage() 02415 { 02416 static bool initialized = false; 02417 static bool force = false; 02418 if (!initialized) 02419 { 02420 force = getBoolParameter("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false); 02421 initialized = true; 02422 } 02423 return force; 02424 } 02425 static bool checkDisableSVMUMatUsage() 02426 { 02427 static bool initialized = false; 02428 static bool force = false; 02429 if (!initialized) 02430 { 02431 force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false); 02432 initialized = true; 02433 } 02434 return force; 02435 } 02436 static bool checkDisableSVM() 02437 { 02438 static bool initialized = false; 02439 static bool force = false; 02440 if (!initialized) 02441 { 02442 force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE", false); 02443 initialized = true; 02444 } 02445 return force; 02446 } 02447 // see SVMCapabilities 02448 static unsigned int getSVMCapabilitiesMask() 02449 { 02450 static bool initialized = false; 02451 static unsigned int mask = 0; 02452 if (!initialized) 02453 { 02454 const char* envValue = getenv("OPENCV_OPENCL_SVM_CAPABILITIES_MASK"); 02455 if (envValue == NULL) 02456 { 02457 return ~0U; // all bits 1 02458 } 02459 mask = atoi(envValue); 02460 initialized = true; 02461 } 02462 return mask; 02463 } 02464 } // namespace 02465 #endif 02466 02467 struct Context::Impl 02468 { 02469 static Context::Impl* get(Context& context) { return context.p; } 02470 02471 void __init() 02472 { 02473 refcount = 1; 02474 handle = 0; 02475 #ifdef HAVE_OPENCL_SVM 02476 svmInitialized = false; 02477 #endif 02478 } 02479 02480 Impl() 02481 { 02482 __init(); 02483 } 02484 02485 void setDefault() 02486 { 02487 CV_Assert(handle == NULL); 02488 02489 cl_device_id d = selectOpenCLDevice(); 02490 02491 if (d == NULL) 02492 return; 02493 02494 cl_platform_id pl = NULL; 02495 CV_OclDbgAssert(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL) == CL_SUCCESS); 02496 02497 cl_context_properties prop[] = 02498 { 02499 CL_CONTEXT_PLATFORM, (cl_context_properties)pl, 02500 0 02501 }; 02502 02503 // !!! in the current implementation force the number of devices to 1 !!! 02504 cl_uint nd = 1; 02505 cl_int status; 02506 02507 handle = clCreateContext(prop, nd, &d, 0, 0, &status); 02508 02509 bool ok = handle != 0 && status == CL_SUCCESS; 02510 if( ok ) 02511 { 02512 devices.resize(nd); 02513 devices[0].set(d); 02514 } 02515 else 02516 handle = NULL; 02517 } 02518 02519 Impl(int dtype0) 02520 { 02521 __init(); 02522 02523 cl_int retval = 0; 02524 cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr(); 02525 cl_context_properties prop[] = 02526 { 02527 CL_CONTEXT_PLATFORM, (cl_context_properties)pl, 02528 0 02529 }; 02530 02531 cl_uint i, nd0 = 0, nd = 0; 02532 int dtype = dtype0 & 15; 02533 CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, 0, 0, &nd0 ) == CL_SUCCESS); 02534 02535 AutoBuffer<void*> dlistbuf(nd0*2+1); 02536 cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf; 02537 cl_device_id* dlist_new = dlist + nd0; 02538 CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 ) == CL_SUCCESS); 02539 String name0; 02540 02541 for(i = 0; i < nd0; i++) 02542 { 02543 Device d(dlist[i]); 02544 if( !d.available() || !d.compilerAvailable() ) 02545 continue; 02546 if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() ) 02547 continue; 02548 if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() ) 02549 continue; 02550 String name = d.name(); 02551 if( nd != 0 && name != name0 ) 02552 continue; 02553 name0 = name; 02554 dlist_new[nd++] = dlist[i]; 02555 } 02556 02557 if(nd == 0) 02558 return; 02559 02560 // !!! in the current implementation force the number of devices to 1 !!! 02561 nd = 1; 02562 02563 handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval); 02564 bool ok = handle != 0 && retval == CL_SUCCESS; 02565 if( ok ) 02566 { 02567 devices.resize(nd); 02568 for( i = 0; i < nd; i++ ) 02569 devices[i].set(dlist_new[i]); 02570 } 02571 } 02572 02573 ~Impl() 02574 { 02575 if(handle) 02576 { 02577 clReleaseContext(handle); 02578 handle = NULL; 02579 } 02580 devices.clear(); 02581 } 02582 02583 Program getProg(const ProgramSource& src, 02584 const String& buildflags, String& errmsg) 02585 { 02586 String prefix = Program::getPrefix(buildflags); 02587 HashKey k(src.hash(), crc64((const uchar*)prefix.c_str(), prefix.size())); 02588 phash_t::iterator it = phash.find(k); 02589 if( it != phash.end() ) 02590 return it->second; 02591 //String filename = format("%08x%08x_%08x%08x.clb2", 02592 Program prog(src, buildflags, errmsg); 02593 if(prog.ptr()) 02594 phash.insert(std::pair<HashKey,Program>(k, prog)); 02595 return prog; 02596 } 02597 02598 IMPLEMENT_REFCOUNTABLE(); 02599 02600 cl_context handle; 02601 std::vector<Device> devices; 02602 02603 typedef ProgramSource::hash_t hash_t; 02604 02605 struct HashKey 02606 { 02607 HashKey(hash_t _a, hash_t _b) : a(_a), b(_b) {} 02608 bool operator < (const HashKey& k) const { return a < k.a || (a == k.a && b < k.b); } 02609 bool operator == (const HashKey& k) const { return a == k.a && b == k.b; } 02610 bool operator != (const HashKey& k) const { return a != k.a || b != k.b; } 02611 hash_t a, b; 02612 }; 02613 typedef std::map<HashKey, Program> phash_t; 02614 phash_t phash; 02615 02616 #ifdef HAVE_OPENCL_SVM 02617 bool svmInitialized; 02618 bool svmAvailable; 02619 bool svmEnabled; 02620 svm::SVMCapabilities svmCapabilities; 02621 svm::SVMFunctions svmFunctions; 02622 02623 void svmInit() 02624 { 02625 CV_Assert(handle != NULL); 02626 const Device& device = devices[0]; 02627 cl_device_svm_capabilities deviceCaps = 0; 02628 CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption 02629 cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL); 02630 if (status != CL_SUCCESS) 02631 { 02632 CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status); 02633 goto noSVM; 02634 } 02635 CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps); 02636 CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption 02637 svmCapabilities.value_ = 02638 ((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) | 02639 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) | 02640 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) | 02641 ((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0); 02642 svmCapabilities.value_ &= svm::getSVMCapabilitiesMask(); 02643 if (svmCapabilities.value_ == 0) 02644 { 02645 CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n"); 02646 goto noSVM; 02647 } 02648 try 02649 { 02650 // Try OpenCL 2.0 02651 CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n"); 02652 void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0); 02653 if (!ptr) 02654 { 02655 CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n"); 02656 CV_ErrorNoReturn(Error::StsBadArg, "clSVMAlloc returned NULL"); 02657 } 02658 try 02659 { 02660 bool error = false; 02661 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); 02662 if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL)) 02663 { 02664 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n"); 02665 CV_ErrorNoReturn(Error::StsBadArg, "clEnqueueSVMMap FAILED"); 02666 } 02667 clFinish(q); 02668 try 02669 { 02670 ((int*)ptr)[0] = 100; 02671 } 02672 catch (...) 02673 { 02674 CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n"); 02675 error = true; 02676 } 02677 if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL)) 02678 { 02679 CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n"); 02680 CV_ErrorNoReturn(Error::StsBadArg, "clEnqueueSVMUnmap FAILED"); 02681 } 02682 clFinish(q); 02683 if (error) 02684 { 02685 CV_ErrorNoReturn(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED"); 02686 } 02687 } 02688 catch (...) 02689 { 02690 CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n"); 02691 clSVMFree(handle, ptr); 02692 throw; 02693 } 02694 clSVMFree(handle, ptr); 02695 svmFunctions.fn_clSVMAlloc = clSVMAlloc; 02696 svmFunctions.fn_clSVMFree = clSVMFree; 02697 svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer; 02698 //svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo; 02699 //svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree; 02700 svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy; 02701 svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill; 02702 svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap; 02703 svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap; 02704 } 02705 catch (...) 02706 { 02707 CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n"); 02708 try 02709 { 02710 // Try HSA extension 02711 String extensions = device.extensions(); 02712 if (extensions.find("cl_amd_svm") == String::npos) 02713 { 02714 CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str()); 02715 goto noSVM; 02716 } 02717 cl_platform_id p = NULL; 02718 status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL); 02719 CV_Assert(status == CL_SUCCESS); 02720 svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD"); 02721 svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD"); 02722 svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD"); 02723 //svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD"); 02724 //svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD"); 02725 svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD"); 02726 svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD"); 02727 svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD"); 02728 svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD"); 02729 CV_Assert(svmFunctions.isValid()); 02730 } 02731 catch (...) 02732 { 02733 CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n"); 02734 goto noSVM; 02735 } 02736 } 02737 02738 svmAvailable = true; 02739 svmEnabled = !svm::checkDisableSVM(); 02740 svmInitialized = true; 02741 CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n"); 02742 return; 02743 noSVM: 02744 CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n"); 02745 svmAvailable = false; 02746 svmEnabled = false; 02747 svmCapabilities.value_ = 0; 02748 svmInitialized = true; 02749 svmFunctions.fn_clSVMAlloc = NULL; 02750 return; 02751 } 02752 #endif 02753 }; 02754 02755 02756 Context::Context() 02757 { 02758 p = 0; 02759 } 02760 02761 Context::Context(int dtype) 02762 { 02763 p = 0; 02764 create(dtype); 02765 } 02766 02767 bool Context::create() 02768 { 02769 if( !haveOpenCL() ) 02770 return false; 02771 if(p) 02772 p->release(); 02773 p = new Impl(); 02774 if(!p->handle) 02775 { 02776 delete p; 02777 p = 0; 02778 } 02779 return p != 0; 02780 } 02781 02782 bool Context::create(int dtype0) 02783 { 02784 if( !haveOpenCL() ) 02785 return false; 02786 if(p) 02787 p->release(); 02788 p = new Impl(dtype0); 02789 if(!p->handle) 02790 { 02791 delete p; 02792 p = 0; 02793 } 02794 return p != 0; 02795 } 02796 02797 Context::~Context() 02798 { 02799 if (p) 02800 { 02801 p->release(); 02802 p = NULL; 02803 } 02804 } 02805 02806 Context::Context(const Context& c) 02807 { 02808 p = (Impl*)c.p; 02809 if(p) 02810 p->addref(); 02811 } 02812 02813 Context& Context::operator = (const Context& c) 02814 { 02815 Impl* newp = (Impl*)c.p; 02816 if(newp) 02817 newp->addref(); 02818 if(p) 02819 p->release(); 02820 p = newp; 02821 return *this; 02822 } 02823 02824 void* Context::ptr() const 02825 { 02826 return p == NULL ? NULL : p->handle; 02827 } 02828 02829 size_t Context::ndevices() const 02830 { 02831 return p ? p->devices.size() : 0; 02832 } 02833 02834 const Device& Context::device(size_t idx) const 02835 { 02836 static Device dummy; 02837 return !p || idx >= p->devices.size() ? dummy : p->devices[idx]; 02838 } 02839 02840 Context& Context::getDefault(bool initialize) 02841 { 02842 static Context* ctx = new Context(); 02843 if(!ctx->p && haveOpenCL()) 02844 { 02845 if (!ctx->p) 02846 ctx->p = new Impl(); 02847 if (initialize) 02848 { 02849 // do not create new Context right away. 02850 // First, try to retrieve existing context of the same type. 02851 // In its turn, Platform::getContext() may call Context::create() 02852 // if there is no such context. 02853 if (ctx->p->handle == NULL) 02854 ctx->p->setDefault(); 02855 } 02856 } 02857 02858 return *ctx; 02859 } 02860 02861 Program Context::getProg(const ProgramSource& prog, 02862 const String& buildopts, String& errmsg) 02863 { 02864 return p ? p->getProg(prog, buildopts, errmsg) : Program(); 02865 } 02866 02867 02868 02869 #ifdef HAVE_OPENCL_SVM 02870 bool Context::useSVM() const 02871 { 02872 Context::Impl* i = p; 02873 CV_Assert(i); 02874 if (!i->svmInitialized) 02875 i->svmInit(); 02876 return i->svmEnabled; 02877 } 02878 void Context::setUseSVM(bool enabled) 02879 { 02880 Context::Impl* i = p; 02881 CV_Assert(i); 02882 if (!i->svmInitialized) 02883 i->svmInit(); 02884 if (enabled && !i->svmAvailable) 02885 { 02886 CV_ErrorNoReturn(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device"); 02887 } 02888 i->svmEnabled = enabled; 02889 } 02890 #else 02891 bool Context::useSVM() const { return false; } 02892 void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); } 02893 #endif 02894 02895 #ifdef HAVE_OPENCL_SVM 02896 namespace svm { 02897 02898 const SVMCapabilities getSVMCapabilitites(const ocl::Context& context) 02899 { 02900 Context::Impl* i = context.p; 02901 CV_Assert(i); 02902 if (!i->svmInitialized) 02903 i->svmInit(); 02904 return i->svmCapabilities; 02905 } 02906 02907 CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context) 02908 { 02909 Context::Impl* i = context.p; 02910 CV_Assert(i); 02911 CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first 02912 CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL); 02913 return &i->svmFunctions; 02914 } 02915 02916 CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags) 02917 { 02918 if (checkForceSVMUmatUsage()) 02919 return true; 02920 if (checkDisableSVMUMatUsage()) 02921 return false; 02922 if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0) 02923 return true; 02924 return false; // don't use SVM by default 02925 } 02926 02927 } // namespace cv::ocl::svm 02928 #endif // HAVE_OPENCL_SVM 02929 02930 02931 static void get_platform_name(cl_platform_id id, String& name) 02932 { 02933 // get platform name string length 02934 size_t sz = 0; 02935 if (CL_SUCCESS != clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz)) 02936 CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformInfo failed!"); 02937 02938 // get platform name string 02939 AutoBuffer<char> buf(sz + 1); 02940 if (CL_SUCCESS != clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf, 0)) 02941 CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformInfo failed!"); 02942 02943 // just in case, ensure trailing zero for ASCIIZ string 02944 buf[sz] = 0; 02945 02946 name = (const char*)buf; 02947 } 02948 02949 /* 02950 // Attaches OpenCL context to OpenCV 02951 */ 02952 void attachContext(const String& platformName, void* platformID, void* context, void* deviceID) 02953 { 02954 cl_uint cnt = 0; 02955 02956 if(CL_SUCCESS != clGetPlatformIDs(0, 0, &cnt)) 02957 CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformIDs failed!"); 02958 02959 if (cnt == 0) 02960 CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "no OpenCL platform available!"); 02961 02962 std::vector<cl_platform_id> platforms(cnt); 02963 02964 if(CL_SUCCESS != clGetPlatformIDs(cnt, &platforms[0], 0)) 02965 CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformIDs failed!"); 02966 02967 bool platformAvailable = false; 02968 02969 // check if external platformName contained in list of available platforms in OpenCV 02970 for (unsigned int i = 0; i < cnt; i++) 02971 { 02972 String availablePlatformName; 02973 get_platform_name(platforms[i], availablePlatformName); 02974 // external platform is found in the list of available platforms 02975 if (platformName == availablePlatformName) 02976 { 02977 platformAvailable = true; 02978 break; 02979 } 02980 } 02981 02982 if (!platformAvailable) 02983 CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "No matched platforms available!"); 02984 02985 // check if platformID corresponds to platformName 02986 String actualPlatformName; 02987 get_platform_name((cl_platform_id)platformID, actualPlatformName); 02988 if (platformName != actualPlatformName) 02989 CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "No matched platforms available!"); 02990 02991 // do not initialize OpenCL context 02992 Context ctx = Context::getDefault(false); 02993 02994 // attach supplied context to OpenCV 02995 initializeContextFromHandle(ctx, platformID, context, deviceID); 02996 02997 if(CL_SUCCESS != clRetainContext((cl_context)context)) 02998 CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clRetainContext failed!"); 02999 03000 // clear command queue, if any 03001 getCoreTlsData().get()->oclQueue.finish(); 03002 Queue q; 03003 getCoreTlsData().get()->oclQueue = q; 03004 03005 return; 03006 } // attachContext() 03007 03008 03009 void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device) 03010 { 03011 cl_context context = (cl_context)_context; 03012 cl_device_id device = (cl_device_id)_device; 03013 03014 // cleanup old context 03015 Context::Impl * impl = ctx.p; 03016 if (impl->handle) 03017 { 03018 CV_OclDbgAssert(clReleaseContext(impl->handle) == CL_SUCCESS); 03019 } 03020 impl->devices.clear(); 03021 03022 impl->handle = context; 03023 impl->devices.resize(1); 03024 impl->devices[0].set(device); 03025 03026 Platform& p = Platform::getDefault(); 03027 Platform::Impl* pImpl = p.p; 03028 pImpl->handle = (cl_platform_id)platform; 03029 } 03030 03031 /////////////////////////////////////////// Queue ///////////////////////////////////////////// 03032 03033 struct Queue::Impl 03034 { 03035 Impl(const Context& c, const Device& d) 03036 { 03037 refcount = 1; 03038 const Context* pc = &c; 03039 cl_context ch = (cl_context)pc->ptr(); 03040 if( !ch ) 03041 { 03042 pc = &Context::getDefault(); 03043 ch = (cl_context)pc->ptr(); 03044 } 03045 cl_device_id dh = (cl_device_id)d.ptr(); 03046 if( !dh ) 03047 dh = (cl_device_id)pc->device(0).ptr(); 03048 cl_int retval = 0; 03049 handle = clCreateCommandQueue(ch, dh, 0, &retval); 03050 CV_OclDbgAssert(retval == CL_SUCCESS); 03051 } 03052 03053 ~Impl() 03054 { 03055 #ifdef _WIN32 03056 if (!cv::__termination) 03057 #endif 03058 { 03059 if(handle) 03060 { 03061 clFinish(handle); 03062 clReleaseCommandQueue(handle); 03063 handle = NULL; 03064 } 03065 } 03066 } 03067 03068 IMPLEMENT_REFCOUNTABLE(); 03069 03070 cl_command_queue handle; 03071 }; 03072 03073 Queue::Queue() 03074 { 03075 p = 0; 03076 } 03077 03078 Queue::Queue(const Context& c, const Device& d) 03079 { 03080 p = 0; 03081 create(c, d); 03082 } 03083 03084 Queue::Queue(const Queue& q) 03085 { 03086 p = q.p; 03087 if(p) 03088 p->addref(); 03089 } 03090 03091 Queue& Queue::operator = (const Queue& q) 03092 { 03093 Impl* newp = (Impl*)q.p; 03094 if(newp) 03095 newp->addref(); 03096 if(p) 03097 p->release(); 03098 p = newp; 03099 return *this; 03100 } 03101 03102 Queue::~Queue() 03103 { 03104 if(p) 03105 p->release(); 03106 } 03107 03108 bool Queue::create(const Context& c, const Device& d) 03109 { 03110 if(p) 03111 p->release(); 03112 p = new Impl(c, d); 03113 return p->handle != 0; 03114 } 03115 03116 void Queue::finish() 03117 { 03118 if(p && p->handle) 03119 { 03120 CV_OclDbgAssert(clFinish(p->handle) == CL_SUCCESS); 03121 } 03122 } 03123 03124 void* Queue::ptr() const 03125 { 03126 return p ? p->handle : 0; 03127 } 03128 03129 Queue& Queue::getDefault() 03130 { 03131 Queue& q = getCoreTlsData().get()->oclQueue; 03132 if( !q.p && haveOpenCL() ) 03133 q.create(Context::getDefault()); 03134 return q; 03135 } 03136 03137 static cl_command_queue getQueue(const Queue& q) 03138 { 03139 cl_command_queue qq = (cl_command_queue)q.ptr(); 03140 if(!qq) 03141 qq = (cl_command_queue)Queue::getDefault().ptr(); 03142 return qq; 03143 } 03144 03145 /////////////////////////////////////////// KernelArg ///////////////////////////////////////////// 03146 03147 KernelArg::KernelArg() 03148 : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1) 03149 { 03150 } 03151 03152 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz) 03153 : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale) 03154 { 03155 } 03156 03157 KernelArg KernelArg::Constant(const Mat& m) 03158 { 03159 CV_Assert(m.isContinuous()); 03160 return KernelArg(CONSTANT, 0, 0, 0, m.ptr(), m.total()*m.elemSize()); 03161 } 03162 03163 /////////////////////////////////////////// Kernel ///////////////////////////////////////////// 03164 03165 struct Kernel::Impl 03166 { 03167 Impl(const char* kname, const Program& prog) : 03168 refcount(1), e(0), nu(0) 03169 { 03170 cl_program ph = (cl_program)prog.ptr(); 03171 cl_int retval = 0; 03172 handle = ph != 0 ? 03173 clCreateKernel(ph, kname, &retval) : 0; 03174 CV_OclDbgAssert(retval == CL_SUCCESS); 03175 for( int i = 0; i < MAX_ARRS; i++ ) 03176 u[i] = 0; 03177 haveTempDstUMats = false; 03178 } 03179 03180 void cleanupUMats() 03181 { 03182 for( int i = 0; i < MAX_ARRS; i++ ) 03183 if( u[i] ) 03184 { 03185 if( CV_XADD(&u[i]->urefcount, -1) == 1 ) 03186 u[i]->currAllocator->deallocate(u[i]); 03187 u[i] = 0; 03188 } 03189 nu = 0; 03190 haveTempDstUMats = false; 03191 } 03192 03193 void addUMat(const UMat& m, bool dst) 03194 { 03195 CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0); 03196 u[nu] = m.u; 03197 CV_XADD(&m.u->urefcount, 1); 03198 nu++; 03199 if(dst && m.u->tempUMat()) 03200 haveTempDstUMats = true; 03201 } 03202 03203 void addImage(const Image2D& image) 03204 { 03205 images.push_back(image); 03206 } 03207 03208 void finit() 03209 { 03210 cleanupUMats(); 03211 images.clear(); 03212 if(e) { clReleaseEvent(e); e = 0; } 03213 release(); 03214 } 03215 03216 ~Impl() 03217 { 03218 if(handle) 03219 clReleaseKernel(handle); 03220 } 03221 03222 IMPLEMENT_REFCOUNTABLE(); 03223 03224 cl_kernel handle; 03225 cl_event e; 03226 enum { MAX_ARRS = 16 }; 03227 UMatData* u[MAX_ARRS]; 03228 int nu; 03229 std::list<Image2D> images; 03230 bool haveTempDstUMats; 03231 }; 03232 03233 }} // namespace cv::ocl 03234 03235 extern "C" { 03236 03237 static void CL_CALLBACK oclCleanupCallback(cl_event, cl_int, void *p) 03238 { 03239 ((cv::ocl::Kernel::Impl*)p)->finit(); 03240 } 03241 03242 } 03243 03244 namespace cv { namespace ocl { 03245 03246 Kernel::Kernel() 03247 { 03248 p = 0; 03249 } 03250 03251 Kernel::Kernel(const char* kname, const Program& prog) 03252 { 03253 p = 0; 03254 create(kname, prog); 03255 } 03256 03257 Kernel::Kernel(const char* kname, const ProgramSource& src, 03258 const String& buildopts, String* errmsg) 03259 { 03260 p = 0; 03261 create(kname, src, buildopts, errmsg); 03262 } 03263 03264 Kernel::Kernel(const Kernel& k) 03265 { 03266 p = k.p; 03267 if(p) 03268 p->addref(); 03269 } 03270 03271 Kernel& Kernel::operator = (const Kernel& k) 03272 { 03273 Impl* newp = (Impl*)k.p; 03274 if(newp) 03275 newp->addref(); 03276 if(p) 03277 p->release(); 03278 p = newp; 03279 return *this; 03280 } 03281 03282 Kernel::~Kernel() 03283 { 03284 if(p) 03285 p->release(); 03286 } 03287 03288 bool Kernel::create(const char* kname, const Program& prog) 03289 { 03290 if(p) 03291 p->release(); 03292 p = new Impl(kname, prog); 03293 if(p->handle == 0) 03294 { 03295 p->release(); 03296 p = 0; 03297 } 03298 #ifdef CV_OPENCL_RUN_ASSERT // check kernel compilation fails 03299 CV_Assert(p); 03300 #endif 03301 return p != 0; 03302 } 03303 03304 bool Kernel::create(const char* kname, const ProgramSource& src, 03305 const String& buildopts, String* errmsg) 03306 { 03307 if(p) 03308 { 03309 p->release(); 03310 p = 0; 03311 } 03312 String tempmsg; 03313 if( !errmsg ) errmsg = &tempmsg; 03314 const Program& prog = Context::getDefault().getProg(src, buildopts, *errmsg); 03315 return create(kname, prog); 03316 } 03317 03318 void* Kernel::ptr() const 03319 { 03320 return p ? p->handle : 0; 03321 } 03322 03323 bool Kernel::empty() const 03324 { 03325 return ptr() == 0; 03326 } 03327 03328 int Kernel::set(int i, const void* value, size_t sz) 03329 { 03330 if (!p || !p->handle) 03331 return -1; 03332 if (i < 0) 03333 return i; 03334 if( i == 0 ) 03335 p->cleanupUMats(); 03336 03337 cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value); 03338 CV_OclDbgAssert(retval == CL_SUCCESS); 03339 if (retval != CL_SUCCESS) 03340 return -1; 03341 return i+1; 03342 } 03343 03344 int Kernel::set(int i, const Image2D& image2D) 03345 { 03346 p->addImage(image2D); 03347 cl_mem h = (cl_mem)image2D.ptr(); 03348 return set(i, &h, sizeof(h)); 03349 } 03350 03351 int Kernel::set(int i, const UMat& m) 03352 { 03353 return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0)); 03354 } 03355 03356 int Kernel::set(int i, const KernelArg& arg) 03357 { 03358 if( !p || !p->handle ) 03359 return -1; 03360 if (i < 0) 03361 return i; 03362 if( i == 0 ) 03363 p->cleanupUMats(); 03364 if( arg.m ) 03365 { 03366 int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) + 03367 ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0); 03368 bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0; 03369 cl_mem h = (cl_mem)arg.m->handle(accessFlags); 03370 03371 if (!h) 03372 { 03373 p->release(); 03374 p = 0; 03375 return -1; 03376 } 03377 03378 #ifdef HAVE_OPENCL_SVM 03379 if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) 03380 { 03381 const Context& ctx = Context::getDefault(); 03382 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); 03383 uchar*& svmDataPtr = (uchar*&)arg.m->u->handle; 03384 CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr); 03385 #if 1 // TODO 03386 cl_int status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr); 03387 #else 03388 cl_int status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr); 03389 #endif 03390 CV_Assert(status == CL_SUCCESS); 03391 } 03392 else 03393 #endif 03394 { 03395 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS); 03396 } 03397 03398 if (ptronly) 03399 { 03400 i++; 03401 } 03402 else if( arg.m->dims <= 2 ) 03403 { 03404 UMat2D u2d(*arg.m); 03405 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step) == CL_SUCCESS); 03406 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset) == CL_SUCCESS); 03407 i += 3; 03408 03409 if( !(arg.flags & KernelArg::NO_SIZE) ) 03410 { 03411 int cols = u2d.cols*arg.wscale/arg.iwscale; 03412 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows) == CL_SUCCESS); 03413 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols) == CL_SUCCESS); 03414 i += 2; 03415 } 03416 } 03417 else 03418 { 03419 UMat3D u3d(*arg.m); 03420 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep) == CL_SUCCESS); 03421 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step) == CL_SUCCESS); 03422 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset) == CL_SUCCESS); 03423 i += 4; 03424 if( !(arg.flags & KernelArg::NO_SIZE) ) 03425 { 03426 int cols = u3d.cols*arg.wscale/arg.iwscale; 03427 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.rows) == CL_SUCCESS); 03428 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows) == CL_SUCCESS); 03429 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols) == CL_SUCCESS); 03430 i += 3; 03431 } 03432 } 03433 p->addUMat(*arg.m, (accessFlags & ACCESS_WRITE) != 0); 03434 return i; 03435 } 03436 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj) == CL_SUCCESS); 03437 return i+1; 03438 } 03439 03440 03441 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[], 03442 bool sync, const Queue& q) 03443 { 03444 if(!p || !p->handle || p->e != 0) 03445 return false; 03446 03447 cl_command_queue qq = getQueue(q); 03448 size_t offset[CV_MAX_DIM] = {0}, globalsize[CV_MAX_DIM] = {1,1,1}; 03449 size_t total = 1; 03450 CV_Assert(_globalsize != 0); 03451 for (int i = 0; i < dims; i++) 03452 { 03453 size_t val = _localsize ? _localsize[i] : 03454 dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1; 03455 CV_Assert( val > 0 ); 03456 total *= _globalsize[i]; 03457 globalsize[i] = ((_globalsize[i] + val - 1)/val)*val; 03458 } 03459 if( total == 0 ) 03460 return true; 03461 if( p->haveTempDstUMats ) 03462 sync = true; 03463 cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims, 03464 offset, globalsize, _localsize, 0, 0, 03465 sync ? 0 : &p->e); 03466 #if CV_OPENCL_SHOW_RUN_ERRORS 03467 if (retval != CL_SUCCESS) 03468 { 03469 printf("OpenCL program returns error: %d\n", retval); 03470 fflush(stdout); 03471 } 03472 #endif 03473 if( sync || retval != CL_SUCCESS ) 03474 { 03475 CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS); 03476 p->cleanupUMats(); 03477 } 03478 else 03479 { 03480 p->addref(); 03481 CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS); 03482 } 03483 return retval == CL_SUCCESS; 03484 } 03485 03486 bool Kernel::runTask(bool sync, const Queue& q) 03487 { 03488 if(!p || !p->handle || p->e != 0) 03489 return false; 03490 03491 cl_command_queue qq = getQueue(q); 03492 cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e); 03493 if( sync || retval != CL_SUCCESS ) 03494 { 03495 CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS); 03496 p->cleanupUMats(); 03497 } 03498 else 03499 { 03500 p->addref(); 03501 CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS); 03502 } 03503 return retval == CL_SUCCESS; 03504 } 03505 03506 03507 size_t Kernel::workGroupSize() const 03508 { 03509 if(!p || !p->handle) 03510 return 0; 03511 size_t val = 0, retsz = 0; 03512 cl_device_id dev = (cl_device_id)Device::getDefault().ptr(); 03513 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE, 03514 sizeof(val), &val, &retsz) == CL_SUCCESS ? val : 0; 03515 } 03516 03517 size_t Kernel::preferedWorkGroupSizeMultiple() const 03518 { 03519 if(!p || !p->handle) 03520 return 0; 03521 size_t val = 0, retsz = 0; 03522 cl_device_id dev = (cl_device_id)Device::getDefault().ptr(); 03523 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, 03524 sizeof(val), &val, &retsz) == CL_SUCCESS ? val : 0; 03525 } 03526 03527 bool Kernel::compileWorkGroupSize(size_t wsz[]) const 03528 { 03529 if(!p || !p->handle || !wsz) 03530 return 0; 03531 size_t retsz = 0; 03532 cl_device_id dev = (cl_device_id)Device::getDefault().ptr(); 03533 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, 03534 sizeof(wsz[0])*3, wsz, &retsz) == CL_SUCCESS; 03535 } 03536 03537 size_t Kernel::localMemSize() const 03538 { 03539 if(!p || !p->handle) 03540 return 0; 03541 size_t retsz = 0; 03542 cl_ulong val = 0; 03543 cl_device_id dev = (cl_device_id)Device::getDefault().ptr(); 03544 return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE, 03545 sizeof(val), &val, &retsz) == CL_SUCCESS ? (size_t)val : 0; 03546 } 03547 03548 /////////////////////////////////////////// Program ///////////////////////////////////////////// 03549 03550 struct Program::Impl 03551 { 03552 Impl(const ProgramSource& _src, 03553 const String& _buildflags, String& errmsg) 03554 { 03555 refcount = 1; 03556 const Context& ctx = Context::getDefault(); 03557 src = _src; 03558 buildflags = _buildflags; 03559 const String& srcstr = src.source(); 03560 const char* srcptr = srcstr.c_str(); 03561 size_t srclen = srcstr.size(); 03562 cl_int retval = 0; 03563 03564 handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval); 03565 if( handle && retval == CL_SUCCESS ) 03566 { 03567 int i, n = (int)ctx.ndevices(); 03568 AutoBuffer<void*> deviceListBuf(n+1); 03569 void** deviceList = deviceListBuf; 03570 for( i = 0; i < n; i++ ) 03571 deviceList[i] = ctx.device(i).ptr(); 03572 03573 Device device = Device::getDefault(); 03574 if (device.isAMD()) 03575 buildflags += " -D AMD_DEVICE"; 03576 else if (device.isIntel()) 03577 buildflags += " -D INTEL_DEVICE"; 03578 03579 retval = clBuildProgram(handle, n, 03580 (const cl_device_id*)deviceList, 03581 buildflags.c_str(), 0, 0); 03582 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 03583 if( retval != CL_SUCCESS ) 03584 #endif 03585 { 03586 size_t retsz = 0; 03587 cl_int buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0], 03588 CL_PROGRAM_BUILD_LOG, 0, 0, &retsz); 03589 if (buildInfo_retval == CL_SUCCESS && retsz > 1) 03590 { 03591 AutoBuffer<char> bufbuf(retsz + 16); 03592 char* buf = bufbuf; 03593 buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0], 03594 CL_PROGRAM_BUILD_LOG, retsz+1, buf, &retsz); 03595 if (buildInfo_retval == CL_SUCCESS) 03596 { 03597 // TODO It is useful to see kernel name & program file name also 03598 errmsg = String(buf); 03599 printf("OpenCL program build log: %s\n%s\n", buildflags.c_str(), errmsg.c_str()); 03600 fflush(stdout); 03601 } 03602 } 03603 if (retval != CL_SUCCESS && handle) 03604 { 03605 clReleaseProgram(handle); 03606 handle = NULL; 03607 } 03608 } 03609 } 03610 } 03611 03612 Impl(const String& _buf, const String& _buildflags) 03613 { 03614 refcount = 1; 03615 handle = 0; 03616 buildflags = _buildflags; 03617 if(_buf.empty()) 03618 return; 03619 String prefix0 = Program::getPrefix(buildflags); 03620 const Context& ctx = Context::getDefault(); 03621 const Device& dev = Device::getDefault(); 03622 const char* pos0 = _buf.c_str(); 03623 const char* pos1 = strchr(pos0, '\n'); 03624 if(!pos1) 03625 return; 03626 const char* pos2 = strchr(pos1+1, '\n'); 03627 if(!pos2) 03628 return; 03629 const char* pos3 = strchr(pos2+1, '\n'); 03630 if(!pos3) 03631 return; 03632 size_t prefixlen = (pos3 - pos0)+1; 03633 String prefix(pos0, prefixlen); 03634 if( prefix != prefix0 ) 03635 return; 03636 const uchar* bin = (uchar*)(pos3+1); 03637 void* devid = dev.ptr(); 03638 size_t codelen = _buf.length() - prefixlen; 03639 cl_int binstatus = 0, retval = 0; 03640 handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), 1, (cl_device_id*)&devid, 03641 &codelen, &bin, &binstatus, &retval); 03642 CV_OclDbgAssert(retval == CL_SUCCESS); 03643 } 03644 03645 String store() 03646 { 03647 if(!handle) 03648 return String(); 03649 size_t progsz = 0, retsz = 0; 03650 String prefix = Program::getPrefix(buildflags); 03651 size_t prefixlen = prefix.length(); 03652 if(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(progsz), &progsz, &retsz) != CL_SUCCESS) 03653 return String(); 03654 AutoBuffer<uchar> bufbuf(prefixlen + progsz + 16); 03655 uchar* buf = bufbuf; 03656 memcpy(buf, prefix.c_str(), prefixlen); 03657 buf += prefixlen; 03658 if(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(buf), &buf, &retsz) != CL_SUCCESS) 03659 return String(); 03660 buf[progsz] = (uchar)'\0'; 03661 return String((const char*)(uchar*)bufbuf, prefixlen + progsz); 03662 } 03663 03664 ~Impl() 03665 { 03666 if( handle ) 03667 { 03668 #ifdef _WIN32 03669 if (!cv::__termination) 03670 #endif 03671 { 03672 clReleaseProgram(handle); 03673 } 03674 handle = NULL; 03675 } 03676 } 03677 03678 IMPLEMENT_REFCOUNTABLE(); 03679 03680 ProgramSource src; 03681 String buildflags; 03682 cl_program handle; 03683 }; 03684 03685 03686 Program::Program() { p = 0; } 03687 03688 Program::Program(const ProgramSource& src, 03689 const String& buildflags, String& errmsg) 03690 { 03691 p = 0; 03692 create(src, buildflags, errmsg); 03693 } 03694 03695 Program::Program(const Program& prog) 03696 { 03697 p = prog.p; 03698 if(p) 03699 p->addref(); 03700 } 03701 03702 Program& Program::operator = (const Program& prog) 03703 { 03704 Impl* newp = (Impl*)prog.p; 03705 if(newp) 03706 newp->addref(); 03707 if(p) 03708 p->release(); 03709 p = newp; 03710 return *this; 03711 } 03712 03713 Program::~Program() 03714 { 03715 if(p) 03716 p->release(); 03717 } 03718 03719 bool Program::create(const ProgramSource& src, 03720 const String& buildflags, String& errmsg) 03721 { 03722 if(p) 03723 p->release(); 03724 p = new Impl(src, buildflags, errmsg); 03725 if(!p->handle) 03726 { 03727 p->release(); 03728 p = 0; 03729 } 03730 return p != 0; 03731 } 03732 03733 const ProgramSource& Program::source() const 03734 { 03735 static ProgramSource dummy; 03736 return p ? p->src : dummy; 03737 } 03738 03739 void* Program::ptr() const 03740 { 03741 return p ? p->handle : 0; 03742 } 03743 03744 bool Program::read(const String& bin, const String& buildflags) 03745 { 03746 if(p) 03747 p->release(); 03748 p = new Impl(bin, buildflags); 03749 return p->handle != 0; 03750 } 03751 03752 bool Program::write(String& bin) const 03753 { 03754 if(!p) 03755 return false; 03756 bin = p->store(); 03757 return !bin.empty(); 03758 } 03759 03760 String Program::getPrefix() const 03761 { 03762 if(!p) 03763 return String(); 03764 return getPrefix(p->buildflags); 03765 } 03766 03767 String Program::getPrefix(const String& buildflags) 03768 { 03769 const Context& ctx = Context::getDefault(); 03770 const Device& dev = ctx.device(0); 03771 return format("name=%s\ndriver=%s\nbuildflags=%s\n", 03772 dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str()); 03773 } 03774 03775 ///////////////////////////////////////// ProgramSource /////////////////////////////////////////////// 03776 03777 struct ProgramSource::Impl 03778 { 03779 Impl(const char* _src) 03780 { 03781 init(String(_src)); 03782 } 03783 Impl(const String& _src) 03784 { 03785 init(_src); 03786 } 03787 void init(const String& _src) 03788 { 03789 refcount = 1; 03790 src = _src; 03791 h = crc64((uchar*)src.c_str(), src.size()); 03792 } 03793 03794 IMPLEMENT_REFCOUNTABLE(); 03795 String src; 03796 ProgramSource::hash_t h; 03797 }; 03798 03799 03800 ProgramSource::ProgramSource() 03801 { 03802 p = 0; 03803 } 03804 03805 ProgramSource::ProgramSource(const char* prog) 03806 { 03807 p = new Impl(prog); 03808 } 03809 03810 ProgramSource::ProgramSource(const String& prog) 03811 { 03812 p = new Impl(prog); 03813 } 03814 03815 ProgramSource::~ProgramSource() 03816 { 03817 if(p) 03818 p->release(); 03819 } 03820 03821 ProgramSource::ProgramSource(const ProgramSource& prog) 03822 { 03823 p = prog.p; 03824 if(p) 03825 p->addref(); 03826 } 03827 03828 ProgramSource& ProgramSource::operator = (const ProgramSource& prog) 03829 { 03830 Impl* newp = (Impl*)prog.p; 03831 if(newp) 03832 newp->addref(); 03833 if(p) 03834 p->release(); 03835 p = newp; 03836 return *this; 03837 } 03838 03839 const String& ProgramSource::source() const 03840 { 03841 static String dummy; 03842 return p ? p->src : dummy; 03843 } 03844 03845 ProgramSource::hash_t ProgramSource::hash() const 03846 { 03847 return p ? p->h : 0; 03848 } 03849 03850 //////////////////////////////////////////// OpenCLAllocator ////////////////////////////////////////////////// 03851 03852 template<typename T> 03853 class OpenCLBufferPool 03854 { 03855 protected: 03856 ~OpenCLBufferPool() { } 03857 public: 03858 virtual T allocate(size_t size) = 0; 03859 virtual void release(T buffer) = 0; 03860 }; 03861 03862 template <typename Derived, typename BufferEntry, typename T> 03863 class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool<T> 03864 { 03865 private: 03866 inline Derived& derived() { return *static_cast<Derived*>(this); } 03867 protected: 03868 Mutex mutex_; 03869 03870 size_t currentReservedSize; 03871 size_t maxReservedSize; 03872 03873 std::list<BufferEntry> allocatedEntries_; // Allocated and used entries 03874 std::list<BufferEntry> reservedEntries_; // LRU order. Allocated, but not used entries 03875 03876 // synchronized 03877 bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer) 03878 { 03879 typename std::list<BufferEntry>::iterator i = allocatedEntries_.begin(); 03880 for (; i != allocatedEntries_.end(); ++i) 03881 { 03882 BufferEntry& e = *i; 03883 if (e.clBuffer_ == buffer) 03884 { 03885 entry = e; 03886 allocatedEntries_.erase(i); 03887 return true; 03888 } 03889 } 03890 return false; 03891 } 03892 03893 // synchronized 03894 bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size) 03895 { 03896 if (reservedEntries_.empty()) 03897 return false; 03898 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin(); 03899 typename std::list<BufferEntry>::iterator result_pos = reservedEntries_.end(); 03900 BufferEntry result; 03901 size_t minDiff = (size_t)(-1); 03902 for (; i != reservedEntries_.end(); ++i) 03903 { 03904 BufferEntry& e = *i; 03905 if (e.capacity_ >= size) 03906 { 03907 size_t diff = e.capacity_ - size; 03908 if (diff < std::max((size_t)4096, size / 8) && (result_pos == reservedEntries_.end() || diff < minDiff)) 03909 { 03910 minDiff = diff; 03911 result_pos = i; 03912 result = e; 03913 if (diff == 0) 03914 break; 03915 } 03916 } 03917 } 03918 if (result_pos != reservedEntries_.end()) 03919 { 03920 //CV_DbgAssert(result == *result_pos); 03921 reservedEntries_.erase(result_pos); 03922 entry = result; 03923 currentReservedSize -= entry.capacity_; 03924 allocatedEntries_.push_back(entry); 03925 return true; 03926 } 03927 return false; 03928 } 03929 03930 // synchronized 03931 void _checkSizeOfReservedEntries() 03932 { 03933 while (currentReservedSize > maxReservedSize) 03934 { 03935 CV_DbgAssert(!reservedEntries_.empty()); 03936 const BufferEntry& entry = reservedEntries_.back(); 03937 CV_DbgAssert(currentReservedSize >= entry.capacity_); 03938 currentReservedSize -= entry.capacity_; 03939 derived()._releaseBufferEntry(entry); 03940 reservedEntries_.pop_back(); 03941 } 03942 } 03943 03944 inline size_t _allocationGranularity(size_t size) 03945 { 03946 // heuristic values 03947 if (size < 1024*1024) 03948 return 4096; // don't work with buffers smaller than 4Kb (hidden allocation overhead issue) 03949 else if (size < 16*1024*1024) 03950 return 64*1024; 03951 else 03952 return 1024*1024; 03953 } 03954 03955 public: 03956 OpenCLBufferPoolBaseImpl() 03957 : currentReservedSize(0), 03958 maxReservedSize(0) 03959 { 03960 // nothing 03961 } 03962 virtual ~OpenCLBufferPoolBaseImpl() 03963 { 03964 freeAllReservedBuffers(); 03965 CV_Assert(reservedEntries_.empty()); 03966 } 03967 public: 03968 virtual T allocate(size_t size) 03969 { 03970 AutoLock locker(mutex_); 03971 BufferEntry entry; 03972 if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size)) 03973 { 03974 CV_DbgAssert(size <= entry.capacity_); 03975 LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_); 03976 } 03977 else 03978 { 03979 derived()._allocateBufferEntry(entry, size); 03980 } 03981 return entry.clBuffer_; 03982 } 03983 virtual void release(T buffer) 03984 { 03985 AutoLock locker(mutex_); 03986 BufferEntry entry; 03987 CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer)); 03988 if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8) 03989 { 03990 derived()._releaseBufferEntry(entry); 03991 } 03992 else 03993 { 03994 reservedEntries_.push_front(entry); 03995 currentReservedSize += entry.capacity_; 03996 _checkSizeOfReservedEntries(); 03997 } 03998 } 03999 04000 virtual size_t getReservedSize() const { return currentReservedSize; } 04001 virtual size_t getMaxReservedSize() const { return maxReservedSize; } 04002 virtual void setMaxReservedSize(size_t size) 04003 { 04004 AutoLock locker(mutex_); 04005 size_t oldMaxReservedSize = maxReservedSize; 04006 maxReservedSize = size; 04007 if (maxReservedSize < oldMaxReservedSize) 04008 { 04009 typename std::list<BufferEntry>::iterator i = reservedEntries_.begin(); 04010 for (; i != reservedEntries_.end();) 04011 { 04012 const BufferEntry& entry = *i; 04013 if (entry.capacity_ > maxReservedSize / 8) 04014 { 04015 CV_DbgAssert(currentReservedSize >= entry.capacity_); 04016 currentReservedSize -= entry.capacity_; 04017 derived()._releaseBufferEntry(entry); 04018 i = reservedEntries_.erase(i); 04019 continue; 04020 } 04021 ++i; 04022 } 04023 _checkSizeOfReservedEntries(); 04024 } 04025 } 04026 virtual void freeAllReservedBuffers() 04027 { 04028 AutoLock locker(mutex_); 04029 typename std::list<BufferEntry>::const_iterator i = reservedEntries_.begin(); 04030 for (; i != reservedEntries_.end(); ++i) 04031 { 04032 const BufferEntry& entry = *i; 04033 derived()._releaseBufferEntry(entry); 04034 } 04035 reservedEntries_.clear(); 04036 currentReservedSize = 0; 04037 } 04038 }; 04039 04040 struct CLBufferEntry 04041 { 04042 cl_mem clBuffer_; 04043 size_t capacity_; 04044 CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { } 04045 }; 04046 04047 class OpenCLBufferPoolImpl : public OpenCLBufferPoolBaseImpl<OpenCLBufferPoolImpl, CLBufferEntry, cl_mem> 04048 { 04049 public: 04050 typedef struct CLBufferEntry BufferEntry; 04051 protected: 04052 int createFlags_; 04053 public: 04054 OpenCLBufferPoolImpl(int createFlags = 0) 04055 : createFlags_(createFlags) 04056 { 04057 } 04058 04059 void _allocateBufferEntry(BufferEntry& entry, size_t size) 04060 { 04061 CV_DbgAssert(entry.clBuffer_ == NULL); 04062 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size)); 04063 Context& ctx = Context::getDefault(); 04064 cl_int retval = CL_SUCCESS; 04065 entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval); 04066 CV_Assert(retval == CL_SUCCESS); 04067 CV_Assert(entry.clBuffer_ != NULL); 04068 if(retval == CL_SUCCESS) 04069 { 04070 CV_IMPL_ADD(CV_IMPL_OCL); 04071 } 04072 LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n", 04073 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_); 04074 allocatedEntries_.push_back(entry); 04075 } 04076 04077 void _releaseBufferEntry(const BufferEntry& entry) 04078 { 04079 CV_Assert(entry.capacity_ != 0); 04080 CV_Assert(entry.clBuffer_ != NULL); 04081 LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n", 04082 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_); 04083 clReleaseMemObject(entry.clBuffer_); 04084 } 04085 }; 04086 04087 #ifdef HAVE_OPENCL_SVM 04088 struct CLSVMBufferEntry 04089 { 04090 void* clBuffer_; 04091 size_t capacity_; 04092 CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { } 04093 }; 04094 class OpenCLSVMBufferPoolImpl : public OpenCLBufferPoolBaseImpl<OpenCLSVMBufferPoolImpl, CLSVMBufferEntry, void*> 04095 { 04096 public: 04097 typedef struct CLSVMBufferEntry BufferEntry; 04098 public: 04099 OpenCLSVMBufferPoolImpl() 04100 { 04101 } 04102 04103 void _allocateBufferEntry(BufferEntry& entry, size_t size) 04104 { 04105 CV_DbgAssert(entry.clBuffer_ == NULL); 04106 entry.capacity_ = alignSize(size, (int)_allocationGranularity(size)); 04107 04108 Context& ctx = Context::getDefault(); 04109 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx); 04110 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer(); 04111 cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE | 04112 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0); 04113 04114 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); 04115 CV_DbgAssert(svmFns->isValid()); 04116 04117 CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_); 04118 void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0); 04119 CV_Assert(buf); 04120 04121 entry.clBuffer_ = buf; 04122 { 04123 CV_IMPL_ADD(CV_IMPL_OCL); 04124 } 04125 LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n", 04126 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_); 04127 allocatedEntries_.push_back(entry); 04128 } 04129 04130 void _releaseBufferEntry(const BufferEntry& entry) 04131 { 04132 CV_Assert(entry.capacity_ != 0); 04133 CV_Assert(entry.clBuffer_ != NULL); 04134 LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n", 04135 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_); 04136 Context& ctx = Context::getDefault(); 04137 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); 04138 CV_DbgAssert(svmFns->isValid()); 04139 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", entry.clBuffer_); 04140 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_); 04141 } 04142 }; 04143 #endif 04144 04145 04146 04147 #if defined _MSC_VER 04148 #pragma warning(disable:4127) // conditional expression is constant 04149 #endif 04150 template <bool readAccess, bool writeAccess> 04151 class AlignedDataPtr 04152 { 04153 protected: 04154 const size_t size_; 04155 uchar* const originPtr_; 04156 const size_t alignment_; 04157 uchar* ptr_; 04158 uchar* allocatedPtr_; 04159 04160 public: 04161 AlignedDataPtr(uchar* ptr, size_t size, size_t alignment) 04162 : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL) 04163 { 04164 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n 04165 if (((size_t)ptr_ & (alignment - 1)) != 0) 04166 { 04167 allocatedPtr_ = new uchar[size_ + alignment - 1]; 04168 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1)); 04169 if (readAccess) 04170 { 04171 memcpy(ptr_, originPtr_, size_); 04172 } 04173 } 04174 } 04175 04176 uchar* getAlignedPtr() const 04177 { 04178 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0); 04179 return ptr_; 04180 } 04181 04182 ~AlignedDataPtr() 04183 { 04184 if (allocatedPtr_) 04185 { 04186 if (writeAccess) 04187 { 04188 memcpy(originPtr_, ptr_, size_); 04189 } 04190 delete[] allocatedPtr_; 04191 allocatedPtr_ = NULL; 04192 } 04193 ptr_ = NULL; 04194 } 04195 private: 04196 AlignedDataPtr(const AlignedDataPtr&); // disabled 04197 AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled 04198 }; 04199 04200 template <bool readAccess, bool writeAccess> 04201 class AlignedDataPtr2D 04202 { 04203 protected: 04204 const size_t size_; 04205 uchar* const originPtr_; 04206 const size_t alignment_; 04207 uchar* ptr_; 04208 uchar* allocatedPtr_; 04209 size_t rows_; 04210 size_t cols_; 04211 size_t step_; 04212 04213 public: 04214 AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment) 04215 : size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step) 04216 { 04217 CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n 04218 if (((size_t)ptr_ & (alignment - 1)) != 0) 04219 { 04220 allocatedPtr_ = new uchar[size_ + alignment - 1]; 04221 ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1)); 04222 if (readAccess) 04223 { 04224 for (size_t i = 0; i < rows_; i++) 04225 memcpy(ptr_ + i*step_, originPtr_ + i*step_, cols_); 04226 } 04227 } 04228 } 04229 04230 uchar* getAlignedPtr() const 04231 { 04232 CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0); 04233 return ptr_; 04234 } 04235 04236 ~AlignedDataPtr2D() 04237 { 04238 if (allocatedPtr_) 04239 { 04240 if (writeAccess) 04241 { 04242 for (size_t i = 0; i < rows_; i++) 04243 memcpy(originPtr_ + i*step_, ptr_ + i*step_, cols_); 04244 } 04245 delete[] allocatedPtr_; 04246 allocatedPtr_ = NULL; 04247 } 04248 ptr_ = NULL; 04249 } 04250 private: 04251 AlignedDataPtr2D(const AlignedDataPtr2D&); // disabled 04252 AlignedDataPtr2D& operator=(const AlignedDataPtr2D&); // disabled 04253 }; 04254 #if defined _MSC_VER 04255 #pragma warning(default:4127) // conditional expression is constant 04256 #endif 04257 04258 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT 04259 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16 04260 #endif 04261 04262 class OpenCLAllocator : public MatAllocator 04263 { 04264 mutable OpenCLBufferPoolImpl bufferPool; 04265 mutable OpenCLBufferPoolImpl bufferPoolHostPtr; 04266 #ifdef HAVE_OPENCL_SVM 04267 mutable OpenCLSVMBufferPoolImpl bufferPoolSVM; 04268 #endif 04269 04270 enum AllocatorFlags 04271 { 04272 ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0, 04273 ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1 04274 #ifdef HAVE_OPENCL_SVM 04275 ,ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2 04276 #endif 04277 }; 04278 public: 04279 OpenCLAllocator() 04280 : bufferPool(0), 04281 bufferPoolHostPtr(CL_MEM_ALLOC_HOST_PTR) 04282 { 04283 size_t defaultPoolSize, poolSize; 04284 defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0; 04285 poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize); 04286 bufferPool.setMaxReservedSize(poolSize); 04287 poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize); 04288 bufferPoolHostPtr.setMaxReservedSize(poolSize); 04289 #ifdef HAVE_OPENCL_SVM 04290 poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize); 04291 bufferPoolSVM.setMaxReservedSize(poolSize); 04292 #endif 04293 04294 matStdAllocator = Mat::getDefaultAllocator(); 04295 } 04296 04297 UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step, 04298 int flags, UMatUsageFlags usageFlags) const 04299 { 04300 UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags); 04301 return u; 04302 } 04303 04304 void getBestFlags(const Context& ctx, int /*flags*/, UMatUsageFlags usageFlags, int& createFlags, int& flags0) const 04305 { 04306 const Device& dev = ctx.device(0); 04307 createFlags = 0; 04308 if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0) 04309 createFlags |= CL_MEM_ALLOC_HOST_PTR; 04310 04311 if( dev.hostUnifiedMemory() ) 04312 flags0 = 0; 04313 else 04314 flags0 = UMatData::COPY_ON_MAP; 04315 } 04316 04317 UMatData* allocate(int dims, const int* sizes, int type, 04318 void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const 04319 { 04320 if(!useOpenCL()) 04321 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags); 04322 CV_Assert(data == 0); 04323 size_t total = CV_ELEM_SIZE(type); 04324 for( int i = dims-1; i >= 0; i-- ) 04325 { 04326 if( step ) 04327 step[i] = total; 04328 total *= sizes[i]; 04329 } 04330 04331 Context& ctx = Context::getDefault(); 04332 04333 int createFlags = 0, flags0 = 0; 04334 getBestFlags(ctx, flags, usageFlags, createFlags, flags0); 04335 04336 void* handle = NULL; 04337 int allocatorFlags = 0; 04338 04339 #ifdef HAVE_OPENCL_SVM 04340 const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx); 04341 if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport()) 04342 { 04343 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED; 04344 handle = bufferPoolSVM.allocate(total); 04345 04346 // this property is constant, so single buffer pool can be used here 04347 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer(); 04348 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER; 04349 } 04350 else 04351 #endif 04352 if (createFlags == 0) 04353 { 04354 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED; 04355 handle = bufferPool.allocate(total); 04356 } 04357 else if (createFlags == CL_MEM_ALLOC_HOST_PTR) 04358 { 04359 allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED; 04360 handle = bufferPoolHostPtr.allocate(total); 04361 } 04362 else 04363 { 04364 CV_Assert(handle != NULL); // Unsupported, throw 04365 } 04366 04367 if (!handle) 04368 return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags); 04369 04370 UMatData* u = new UMatData(this); 04371 u->data = 0; 04372 u->size = total; 04373 u->handle = handle; 04374 u->flags = flags0; 04375 u->allocatorFlags_ = allocatorFlags; 04376 CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate() 04377 u->markHostCopyObsolete(true); 04378 return u; 04379 } 04380 04381 bool allocate(UMatData* u, int accessFlags, UMatUsageFlags usageFlags) const 04382 { 04383 if(!u) 04384 return false; 04385 04386 UMatDataAutoLock lock(u); 04387 04388 if(u->handle == 0) 04389 { 04390 CV_Assert(u->origdata != 0); 04391 Context& ctx = Context::getDefault(); 04392 int createFlags = 0, flags0 = 0; 04393 getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0); 04394 04395 cl_context ctx_handle = (cl_context)ctx.ptr(); 04396 int allocatorFlags = 0; 04397 int tempUMatFlags = 0; 04398 void* handle = NULL; 04399 cl_int retval = CL_SUCCESS; 04400 04401 #ifdef HAVE_OPENCL_SVM 04402 svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx); 04403 bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags); 04404 if (useSVM && svmCaps.isSupportFineGrainSystem()) 04405 { 04406 allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM; 04407 tempUMatFlags = UMatData::TEMP_UMAT; 04408 handle = u->origdata; 04409 CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle); 04410 } 04411 else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer())) 04412 { 04413 if (!(accessFlags & ACCESS_FAST)) // memcpy used 04414 { 04415 bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer(); 04416 04417 cl_svm_mem_flags memFlags = createFlags | 04418 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0); 04419 04420 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); 04421 CV_DbgAssert(svmFns->isValid()); 04422 04423 CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size); 04424 handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0); 04425 CV_Assert(handle); 04426 04427 cl_command_queue q = NULL; 04428 if (!isFineGrainBuffer) 04429 { 04430 q = (cl_command_queue)Queue::getDefault().ptr(); 04431 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size); 04432 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, 04433 handle, u->size, 04434 0, NULL, NULL); 04435 CV_Assert(status == CL_SUCCESS); 04436 04437 } 04438 memcpy(handle, u->origdata, u->size); 04439 if (!isFineGrainBuffer) 04440 { 04441 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle); 04442 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL); 04443 CV_Assert(status == CL_SUCCESS); 04444 } 04445 04446 tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT; 04447 allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER 04448 : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER; 04449 } 04450 } 04451 else 04452 #endif 04453 { 04454 tempUMatFlags = UMatData::TEMP_UMAT; 04455 if (u->origdata == cv::alignPtr(u->origdata, 4)) // There are OpenCL runtime issues for less aligned data 04456 { 04457 handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags, 04458 u->size, u->origdata, &retval); 04459 } 04460 if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST)) 04461 { 04462 handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, 04463 u->size, u->origdata, &retval); 04464 tempUMatFlags |= UMatData::TEMP_COPIED_UMAT; 04465 } 04466 } 04467 if(!handle || retval != CL_SUCCESS) 04468 return false; 04469 u->handle = handle; 04470 u->prevAllocator = u->currAllocator; 04471 u->currAllocator = this; 04472 u->flags |= tempUMatFlags; 04473 u->allocatorFlags_ = allocatorFlags; 04474 } 04475 if(accessFlags & ACCESS_WRITE) 04476 u->markHostCopyObsolete(true); 04477 return true; 04478 } 04479 04480 /*void sync(UMatData* u) const 04481 { 04482 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); 04483 UMatDataAutoLock lock(u); 04484 04485 if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata) 04486 { 04487 if( u->tempCopiedUMat() ) 04488 { 04489 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, 04490 u->size, u->origdata, 0, 0, 0); 04491 } 04492 else 04493 { 04494 cl_int retval = 0; 04495 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, 04496 (CL_MAP_READ | CL_MAP_WRITE), 04497 0, u->size, 0, 0, 0, &retval); 04498 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0); 04499 clFinish(q); 04500 } 04501 u->markHostCopyObsolete(false); 04502 } 04503 else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data ) 04504 { 04505 clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, 04506 u->size, u->data, 0, 0, 0); 04507 } 04508 }*/ 04509 04510 void deallocate(UMatData* u) const 04511 { 04512 if(!u) 04513 return; 04514 04515 CV_Assert(u->urefcount == 0); 04516 CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive"); 04517 04518 CV_Assert(u->handle != 0); 04519 CV_Assert(u->mapcount == 0); 04520 if(u->tempUMat()) 04521 { 04522 CV_Assert(u->origdata); 04523 // UMatDataAutoLock lock(u); 04524 04525 if (u->hostCopyObsolete()) 04526 { 04527 #ifdef HAVE_OPENCL_SVM 04528 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) 04529 { 04530 Context& ctx = Context::getDefault(); 04531 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); 04532 CV_DbgAssert(svmFns->isValid()); 04533 04534 if( u->tempCopiedUMat() ) 04535 { 04536 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER || 04537 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER); 04538 bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER; 04539 cl_command_queue q = NULL; 04540 if (!isFineGrainBuffer) 04541 { 04542 CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)); 04543 q = (cl_command_queue)Queue::getDefault().ptr(); 04544 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size); 04545 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ, 04546 u->handle, u->size, 04547 0, NULL, NULL); 04548 CV_Assert(status == CL_SUCCESS); 04549 } 04550 clFinish(q); 04551 memcpy(u->origdata, u->handle, u->size); 04552 if (!isFineGrainBuffer) 04553 { 04554 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle); 04555 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL); 04556 CV_Assert(status == CL_SUCCESS); 04557 } 04558 } 04559 else 04560 { 04561 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM); 04562 // nothing 04563 } 04564 } 04565 else 04566 #endif 04567 { 04568 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); 04569 if( u->tempCopiedUMat() ) 04570 { 04571 AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); 04572 CV_OclDbgAssert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, 04573 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS); 04574 } 04575 else 04576 { 04577 cl_int retval = 0; 04578 if (u->tempUMat()) 04579 { 04580 CV_Assert(u->mapcount == 0); 04581 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, 04582 (CL_MAP_READ | CL_MAP_WRITE), 04583 0, u->size, 0, 0, 0, &retval); 04584 CV_Assert(u->origdata == data); 04585 CV_OclDbgAssert(retval == CL_SUCCESS); 04586 if (u->originalUMatData) 04587 { 04588 CV_Assert(u->originalUMatData->data == data); 04589 } 04590 CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS); 04591 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS); 04592 } 04593 } 04594 } 04595 u->markHostCopyObsolete(false); 04596 } 04597 else 04598 { 04599 // nothing 04600 } 04601 #ifdef HAVE_OPENCL_SVM 04602 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) 04603 { 04604 if( u->tempCopiedUMat() ) 04605 { 04606 Context& ctx = Context::getDefault(); 04607 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); 04608 CV_DbgAssert(svmFns->isValid()); 04609 04610 CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle); 04611 svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle); 04612 } 04613 } 04614 else 04615 #endif 04616 { 04617 clReleaseMemObject((cl_mem)u->handle); 04618 } 04619 u->handle = 0; 04620 u->markDeviceCopyObsolete(true); 04621 u->currAllocator = u->prevAllocator; 04622 u->prevAllocator = NULL; 04623 if(u->data && u->copyOnMap() && u->data != u->origdata) 04624 fastFree(u->data); 04625 u->data = u->origdata; 04626 u->currAllocator->deallocate(u); 04627 u = NULL; 04628 } 04629 else 04630 { 04631 CV_Assert(u->origdata == NULL); 04632 if(u->data && u->copyOnMap() && u->data != u->origdata) 04633 { 04634 fastFree(u->data); 04635 u->data = 0; 04636 u->markHostCopyObsolete(true); 04637 } 04638 if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED) 04639 { 04640 bufferPool.release((cl_mem)u->handle); 04641 } 04642 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED) 04643 { 04644 bufferPoolHostPtr.release((cl_mem)u->handle); 04645 } 04646 #ifdef HAVE_OPENCL_SVM 04647 else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED) 04648 { 04649 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM) 04650 { 04651 //nothing 04652 } 04653 else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER || 04654 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) 04655 { 04656 Context& ctx = Context::getDefault(); 04657 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); 04658 CV_DbgAssert(svmFns->isValid()); 04659 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); 04660 04661 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0) 04662 { 04663 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle); 04664 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL); 04665 CV_Assert(status == CL_SUCCESS); 04666 } 04667 } 04668 bufferPoolSVM.release((void*)u->handle); 04669 } 04670 #endif 04671 else 04672 { 04673 clReleaseMemObject((cl_mem)u->handle); 04674 } 04675 u->handle = 0; 04676 u->markDeviceCopyObsolete(true); 04677 delete u; 04678 u = NULL; 04679 } 04680 CV_Assert(u == NULL); 04681 } 04682 04683 // synchronized call (external UMatDataAutoLock, see UMat::getMat) 04684 void map(UMatData* u, int accessFlags) const 04685 { 04686 CV_Assert(u && u->handle); 04687 04688 if(accessFlags & ACCESS_WRITE) 04689 u->markDeviceCopyObsolete(true); 04690 04691 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); 04692 04693 { 04694 if( !u->copyOnMap() ) 04695 { 04696 // TODO 04697 // because there can be other map requests for the same UMat with different access flags, 04698 // we use the universal (read-write) access mode. 04699 #ifdef HAVE_OPENCL_SVM 04700 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) 04701 { 04702 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) 04703 { 04704 Context& ctx = Context::getDefault(); 04705 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); 04706 CV_DbgAssert(svmFns->isValid()); 04707 04708 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0) 04709 { 04710 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size); 04711 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE, 04712 u->handle, u->size, 04713 0, NULL, NULL); 04714 CV_Assert(status == CL_SUCCESS); 04715 u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP; 04716 } 04717 } 04718 clFinish(q); 04719 u->data = (uchar*)u->handle; 04720 u->markHostCopyObsolete(false); 04721 u->markDeviceMemMapped(true); 04722 return; 04723 } 04724 #endif 04725 04726 cl_int retval = CL_SUCCESS; 04727 if (!u->deviceMemMapped()) 04728 { 04729 CV_Assert(u->refcount == 1); 04730 CV_Assert(u->mapcount++ == 0); 04731 u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE, 04732 (CL_MAP_READ | CL_MAP_WRITE), 04733 0, u->size, 0, 0, 0, &retval); 04734 } 04735 if (u->data && retval == CL_SUCCESS) 04736 { 04737 u->markHostCopyObsolete(false); 04738 u->markDeviceMemMapped(true); 04739 return; 04740 } 04741 04742 // TODO Is it really a good idea and was it tested well? 04743 // if map failed, switch to copy-on-map mode for the particular buffer 04744 u->flags |= UMatData::COPY_ON_MAP; 04745 } 04746 04747 if(!u->data) 04748 { 04749 u->data = (uchar*)fastMalloc(u->size); 04750 u->markHostCopyObsolete(true); 04751 } 04752 } 04753 04754 if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() ) 04755 { 04756 AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); 04757 #ifdef HAVE_OPENCL_SVM 04758 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0); 04759 #endif 04760 CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, 04761 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS ); 04762 u->markHostCopyObsolete(false); 04763 } 04764 } 04765 04766 void unmap(UMatData* u) const 04767 { 04768 if(!u) 04769 return; 04770 04771 04772 CV_Assert(u->handle != 0); 04773 04774 UMatDataAutoLock autolock(u); 04775 04776 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); 04777 cl_int retval = 0; 04778 if( !u->copyOnMap() && u->deviceMemMapped() ) 04779 { 04780 CV_Assert(u->data != NULL); 04781 #ifdef HAVE_OPENCL_SVM 04782 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) 04783 { 04784 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) 04785 { 04786 Context& ctx = Context::getDefault(); 04787 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); 04788 CV_DbgAssert(svmFns->isValid()); 04789 04790 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0); 04791 { 04792 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle); 04793 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 04794 0, NULL, NULL); 04795 CV_Assert(status == CL_SUCCESS); 04796 clFinish(q); 04797 u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP; 04798 } 04799 } 04800 if (u->refcount == 0) 04801 u->data = 0; 04802 u->markDeviceCopyObsolete(false); 04803 u->markHostCopyObsolete(true); 04804 return; 04805 } 04806 #endif 04807 if (u->refcount == 0) 04808 { 04809 CV_Assert(u->mapcount-- == 1); 04810 CV_Assert((retval = clEnqueueUnmapMemObject(q, 04811 (cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS); 04812 if (Device::getDefault().isAMD()) 04813 { 04814 // required for multithreaded applications (see stitching test) 04815 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS); 04816 } 04817 u->markDeviceMemMapped(false); 04818 u->data = 0; 04819 u->markDeviceCopyObsolete(false); 04820 u->markHostCopyObsolete(true); 04821 } 04822 } 04823 else if( u->copyOnMap() && u->deviceCopyObsolete() ) 04824 { 04825 AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT); 04826 #ifdef HAVE_OPENCL_SVM 04827 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0); 04828 #endif 04829 CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0, 04830 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS ); 04831 u->markDeviceCopyObsolete(false); 04832 u->markHostCopyObsolete(true); 04833 } 04834 } 04835 04836 bool checkContinuous(int dims, const size_t sz[], 04837 const size_t srcofs[], const size_t srcstep[], 04838 const size_t dstofs[], const size_t dststep[], 04839 size_t& total, size_t new_sz[], 04840 size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[], 04841 size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const 04842 { 04843 bool iscontinuous = true; 04844 srcrawofs = srcofs ? srcofs[dims-1] : 0; 04845 dstrawofs = dstofs ? dstofs[dims-1] : 0; 04846 total = sz[dims-1]; 04847 for( int i = dims-2; i >= 0; i-- ) 04848 { 04849 if( i >= 0 && (total != srcstep[i] || total != dststep[i]) ) 04850 iscontinuous = false; 04851 total *= sz[i]; 04852 if( srcofs ) 04853 srcrawofs += srcofs[i]*srcstep[i]; 04854 if( dstofs ) 04855 dstrawofs += dstofs[i]*dststep[i]; 04856 } 04857 04858 if( !iscontinuous ) 04859 { 04860 // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order. 04861 if( dims == 2 ) 04862 { 04863 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1; 04864 // we assume that new_... arrays are initialized by caller 04865 // with 0's, so there is no else branch 04866 if( srcofs ) 04867 { 04868 new_srcofs[0] = srcofs[1]; 04869 new_srcofs[1] = srcofs[0]; 04870 new_srcofs[2] = 0; 04871 } 04872 04873 if( dstofs ) 04874 { 04875 new_dstofs[0] = dstofs[1]; 04876 new_dstofs[1] = dstofs[0]; 04877 new_dstofs[2] = 0; 04878 } 04879 04880 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0; 04881 new_dststep[0] = dststep[0]; new_dststep[1] = 0; 04882 } 04883 else 04884 { 04885 // we could check for dims == 3 here, 04886 // but from user perspective this one is more informative 04887 CV_Assert(dims <= 3); 04888 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0]; 04889 if( srcofs ) 04890 { 04891 new_srcofs[0] = srcofs[2]; 04892 new_srcofs[1] = srcofs[1]; 04893 new_srcofs[2] = srcofs[0]; 04894 } 04895 04896 if( dstofs ) 04897 { 04898 new_dstofs[0] = dstofs[2]; 04899 new_dstofs[1] = dstofs[1]; 04900 new_dstofs[2] = dstofs[0]; 04901 } 04902 04903 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0]; 04904 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0]; 04905 } 04906 } 04907 return iscontinuous; 04908 } 04909 04910 void download(UMatData* u, void* dstptr, int dims, const size_t sz[], 04911 const size_t srcofs[], const size_t srcstep[], 04912 const size_t dststep[]) const 04913 { 04914 if(!u) 04915 return; 04916 UMatDataAutoLock autolock(u); 04917 04918 if( u->data && !u->hostCopyObsolete() ) 04919 { 04920 Mat::getDefaultAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep); 04921 return; 04922 } 04923 CV_Assert( u->handle != 0 ); 04924 04925 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); 04926 04927 size_t total = 0, new_sz[] = {0, 0, 0}; 04928 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0}; 04929 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0}; 04930 04931 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep, 04932 total, new_sz, 04933 srcrawofs, new_srcofs, new_srcstep, 04934 dstrawofs, new_dstofs, new_dststep); 04935 04936 #ifdef HAVE_OPENCL_SVM 04937 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) 04938 { 04939 CV_DbgAssert(u->data == NULL || u->data == u->handle); 04940 Context& ctx = Context::getDefault(); 04941 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); 04942 CV_DbgAssert(svmFns->isValid()); 04943 04944 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0); 04945 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) 04946 { 04947 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size); 04948 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ, 04949 u->handle, u->size, 04950 0, NULL, NULL); 04951 CV_Assert(status == CL_SUCCESS); 04952 } 04953 clFinish(q); 04954 if( iscontinuous ) 04955 { 04956 memcpy(dstptr, (uchar*)u->handle + srcrawofs, total); 04957 } 04958 else 04959 { 04960 // This code is from MatAllocator::download() 04961 int isz[CV_MAX_DIM]; 04962 uchar* srcptr = (uchar*)u->handle; 04963 for( int i = 0; i < dims; i++ ) 04964 { 04965 CV_Assert( sz[i] <= (size_t)INT_MAX ); 04966 if( sz[i] == 0 ) 04967 return; 04968 if( srcofs ) 04969 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1); 04970 isz[i] = (int)sz[i]; 04971 } 04972 04973 Mat src(dims, isz, CV_8U, srcptr, srcstep); 04974 Mat dst(dims, isz, CV_8U, dstptr, dststep); 04975 04976 const Mat* arrays[] = { &src, &dst }; 04977 uchar* ptrs[2]; 04978 NAryMatIterator it(arrays, ptrs, 2); 04979 size_t j, planesz = it.size; 04980 04981 for( j = 0; j < it.nplanes; j++, ++it ) 04982 memcpy(ptrs[1], ptrs[0], planesz); 04983 } 04984 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) 04985 { 04986 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle); 04987 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 04988 0, NULL, NULL); 04989 CV_Assert(status == CL_SUCCESS); 04990 clFinish(q); 04991 } 04992 } 04993 else 04994 #endif 04995 { 04996 if( iscontinuous ) 04997 { 04998 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT); 04999 CV_Assert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 05000 srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 ); 05001 } 05002 else 05003 { 05004 AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT); 05005 uchar* ptr = alignedPtr.getAlignedPtr(); 05006 05007 CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE, 05008 new_srcofs, new_dstofs, new_sz, 05009 new_srcstep[0], 0, 05010 new_dststep[0], 0, 05011 ptr, 0, 0, 0) >= 0 ); 05012 } 05013 } 05014 } 05015 05016 void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[], 05017 const size_t dstofs[], const size_t dststep[], 05018 const size_t srcstep[]) const 05019 { 05020 if(!u) 05021 return; 05022 05023 // there should be no user-visible CPU copies of the UMat which we are going to copy to 05024 CV_Assert(u->refcount == 0 || u->tempUMat()); 05025 05026 size_t total = 0, new_sz[] = {0, 0, 0}; 05027 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0}; 05028 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0}; 05029 05030 bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep, 05031 total, new_sz, 05032 srcrawofs, new_srcofs, new_srcstep, 05033 dstrawofs, new_dstofs, new_dststep); 05034 05035 UMatDataAutoLock autolock(u); 05036 05037 // if there is cached CPU copy of the GPU matrix, 05038 // we could use it as a destination. 05039 // we can do it in 2 cases: 05040 // 1. we overwrite the whole content 05041 // 2. we overwrite part of the matrix, but the GPU copy is out-of-date 05042 if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size)) 05043 { 05044 Mat::getDefaultAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep); 05045 u->markHostCopyObsolete(false); 05046 u->markDeviceCopyObsolete(true); 05047 return; 05048 } 05049 05050 CV_Assert( u->handle != 0 ); 05051 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); 05052 05053 #ifdef HAVE_OPENCL_SVM 05054 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) 05055 { 05056 CV_DbgAssert(u->data == NULL || u->data == u->handle); 05057 Context& ctx = Context::getDefault(); 05058 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); 05059 CV_DbgAssert(svmFns->isValid()); 05060 05061 CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0); 05062 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) 05063 { 05064 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size); 05065 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE, 05066 u->handle, u->size, 05067 0, NULL, NULL); 05068 CV_Assert(status == CL_SUCCESS); 05069 } 05070 clFinish(q); 05071 if( iscontinuous ) 05072 { 05073 memcpy((uchar*)u->handle + dstrawofs, srcptr, total); 05074 } 05075 else 05076 { 05077 // This code is from MatAllocator::upload() 05078 int isz[CV_MAX_DIM]; 05079 uchar* dstptr = (uchar*)u->handle; 05080 for( int i = 0; i < dims; i++ ) 05081 { 05082 CV_Assert( sz[i] <= (size_t)INT_MAX ); 05083 if( sz[i] == 0 ) 05084 return; 05085 if( dstofs ) 05086 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1); 05087 isz[i] = (int)sz[i]; 05088 } 05089 05090 Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep); 05091 Mat dst(dims, isz, CV_8U, dstptr, dststep); 05092 05093 const Mat* arrays[] = { &src, &dst }; 05094 uchar* ptrs[2]; 05095 NAryMatIterator it(arrays, ptrs, 2); 05096 size_t j, planesz = it.size; 05097 05098 for( j = 0; j < it.nplanes; j++, ++it ) 05099 memcpy(ptrs[1], ptrs[0], planesz); 05100 } 05101 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER) 05102 { 05103 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle); 05104 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 05105 0, NULL, NULL); 05106 CV_Assert(status == CL_SUCCESS); 05107 clFinish(q); 05108 } 05109 } 05110 else 05111 #endif 05112 { 05113 if( iscontinuous ) 05114 { 05115 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT); 05116 CV_Assert(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 05117 dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0); 05118 } 05119 else 05120 { 05121 AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT); 05122 uchar* ptr = alignedPtr.getAlignedPtr(); 05123 05124 CV_Assert(clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE, 05125 new_dstofs, new_srcofs, new_sz, 05126 new_dststep[0], 0, 05127 new_srcstep[0], 0, 05128 ptr, 0, 0, 0) >= 0 ); 05129 } 05130 } 05131 u->markHostCopyObsolete(true); 05132 #ifdef HAVE_OPENCL_SVM 05133 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER || 05134 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM) 05135 { 05136 // nothing 05137 } 05138 else 05139 #endif 05140 { 05141 u->markHostCopyObsolete(true); 05142 } 05143 u->markDeviceCopyObsolete(false); 05144 } 05145 05146 void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[], 05147 const size_t srcofs[], const size_t srcstep[], 05148 const size_t dstofs[], const size_t dststep[], bool _sync) const 05149 { 05150 if(!src || !dst) 05151 return; 05152 05153 size_t total = 0, new_sz[] = {0, 0, 0}; 05154 size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0}; 05155 size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0}; 05156 05157 bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep, 05158 total, new_sz, 05159 srcrawofs, new_srcofs, new_srcstep, 05160 dstrawofs, new_dstofs, new_dststep); 05161 05162 UMatDataAutoLock src_autolock(src); 05163 UMatDataAutoLock dst_autolock(dst); 05164 05165 if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) ) 05166 { 05167 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep); 05168 return; 05169 } 05170 if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) ) 05171 { 05172 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep); 05173 dst->markHostCopyObsolete(false); 05174 #ifdef HAVE_OPENCL_SVM 05175 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER || 05176 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM) 05177 { 05178 // nothing 05179 } 05180 else 05181 #endif 05182 { 05183 dst->markDeviceCopyObsolete(true); 05184 } 05185 return; 05186 } 05187 05188 // there should be no user-visible CPU copies of the UMat which we are going to copy to 05189 CV_Assert(dst->refcount == 0); 05190 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); 05191 05192 cl_int retval = CL_SUCCESS; 05193 #ifdef HAVE_OPENCL_SVM 05194 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 || 05195 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) 05196 { 05197 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 && 05198 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) 05199 { 05200 Context& ctx = Context::getDefault(); 05201 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx); 05202 CV_DbgAssert(svmFns->isValid()); 05203 05204 if( iscontinuous ) 05205 { 05206 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n", 05207 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total); 05208 cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE, 05209 (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, 05210 total, 0, NULL, NULL); 05211 CV_Assert(status == CL_SUCCESS); 05212 } 05213 else 05214 { 05215 clFinish(q); 05216 // This code is from MatAllocator::download()/upload() 05217 int isz[CV_MAX_DIM]; 05218 uchar* srcptr = (uchar*)src->handle; 05219 for( int i = 0; i < dims; i++ ) 05220 { 05221 CV_Assert( sz[i] <= (size_t)INT_MAX ); 05222 if( sz[i] == 0 ) 05223 return; 05224 if( srcofs ) 05225 srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1); 05226 isz[i] = (int)sz[i]; 05227 } 05228 Mat m_src(dims, isz, CV_8U, srcptr, srcstep); 05229 05230 uchar* dstptr = (uchar*)dst->handle; 05231 for( int i = 0; i < dims; i++ ) 05232 { 05233 if( dstofs ) 05234 dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1); 05235 } 05236 Mat m_dst(dims, isz, CV_8U, dstptr, dststep); 05237 05238 const Mat* arrays[] = { &m_src, &m_dst }; 05239 uchar* ptrs[2]; 05240 NAryMatIterator it(arrays, ptrs, 2); 05241 size_t j, planesz = it.size; 05242 05243 for( j = 0; j < it.nplanes; j++, ++it ) 05244 memcpy(ptrs[1], ptrs[0], planesz); 05245 } 05246 } 05247 else 05248 { 05249 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0) 05250 { 05251 map(src, ACCESS_READ); 05252 upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep); 05253 unmap(src); 05254 } 05255 else 05256 { 05257 map(dst, ACCESS_WRITE); 05258 download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep); 05259 unmap(dst); 05260 } 05261 } 05262 } 05263 else 05264 #endif 05265 { 05266 if( iscontinuous ) 05267 { 05268 CV_Assert( (retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle, 05269 srcrawofs, dstrawofs, total, 0, 0, 0)) == CL_SUCCESS ); 05270 } 05271 else 05272 { 05273 CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle, 05274 new_srcofs, new_dstofs, new_sz, 05275 new_srcstep[0], 0, 05276 new_dststep[0], 0, 05277 0, 0, 0)) == CL_SUCCESS ); 05278 } 05279 } 05280 if (retval == CL_SUCCESS) 05281 { 05282 CV_IMPL_ADD(CV_IMPL_OCL) 05283 } 05284 05285 #ifdef HAVE_OPENCL_SVM 05286 if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER || 05287 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM) 05288 { 05289 // nothing 05290 } 05291 else 05292 #endif 05293 { 05294 dst->markHostCopyObsolete(true); 05295 } 05296 dst->markDeviceCopyObsolete(false); 05297 05298 if( _sync ) 05299 { 05300 CV_OclDbgAssert(clFinish(q) == CL_SUCCESS); 05301 } 05302 } 05303 05304 BufferPoolController* getBufferPoolController(const char* id) const { 05305 #ifdef HAVE_OPENCL_SVM 05306 if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0)) 05307 { 05308 return &bufferPoolSVM; 05309 } 05310 #endif 05311 if (id != NULL && strcmp(id, "HOST_ALLOC") == 0) 05312 { 05313 return &bufferPoolHostPtr; 05314 } 05315 if (id != NULL && strcmp(id, "OCL") != 0) 05316 { 05317 CV_ErrorNoReturn(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n"); 05318 } 05319 return &bufferPool; 05320 } 05321 05322 MatAllocator* matStdAllocator; 05323 }; 05324 05325 MatAllocator* getOpenCLAllocator() 05326 { 05327 CV_SINGLETON_LAZY_INIT(MatAllocator, new OpenCLAllocator()) 05328 } 05329 05330 }} // namespace cv::ocl 05331 05332 05333 namespace cv { 05334 05335 // three funcs below are implemented in umatrix.cpp 05336 void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps, 05337 bool autoSteps = false ); 05338 05339 void updateContinuityFlag(UMat& m); 05340 void finalizeHdr(UMat& m); 05341 05342 } // namespace cv 05343 05344 05345 namespace cv { namespace ocl { 05346 05347 /* 05348 // Convert OpenCL buffer memory to UMat 05349 */ 05350 void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst) 05351 { 05352 int d = 2; 05353 int sizes[] = { rows, cols }; 05354 05355 CV_Assert(0 <= d && d <= CV_MAX_DIM); 05356 05357 dst.release(); 05358 05359 dst.flags = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL; 05360 dst.usageFlags = USAGE_DEFAULT; 05361 05362 setSize(dst, d, sizes, 0, true); 05363 dst.offset = 0; 05364 05365 cl_mem memobj = (cl_mem)cl_mem_buffer; 05366 cl_mem_object_type mem_type = 0; 05367 05368 CV_Assert(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0) == CL_SUCCESS); 05369 05370 CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type); 05371 05372 size_t total = 0; 05373 CV_Assert(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0) == CL_SUCCESS); 05374 05375 CV_Assert(clRetainMemObject(memobj) == CL_SUCCESS); 05376 05377 CV_Assert((int)step >= cols * CV_ELEM_SIZE(type)); 05378 CV_Assert(total >= rows * step); 05379 05380 // attach clBuffer to UMatData 05381 dst.u = new UMatData(getOpenCLAllocator()); 05382 dst.u->data = 0; 05383 dst.u->allocatorFlags_ = 0; // not allocated from any OpenCV buffer pool 05384 dst.u->flags = 0; 05385 dst.u->handle = cl_mem_buffer; 05386 dst.u->origdata = 0; 05387 dst.u->prevAllocator = 0; 05388 dst.u->size = total; 05389 05390 finalizeHdr(dst); 05391 dst.addref(); 05392 05393 return; 05394 } // convertFromBuffer() 05395 05396 05397 /* 05398 // Convert OpenCL image2d_t memory to UMat 05399 */ 05400 void convertFromImage(void* cl_mem_image, UMat& dst) 05401 { 05402 cl_mem clImage = (cl_mem)cl_mem_image; 05403 cl_mem_object_type mem_type = 0; 05404 05405 CV_Assert(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0) == CL_SUCCESS); 05406 05407 CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type); 05408 05409 cl_image_format fmt = { 0, 0 }; 05410 CV_Assert(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0) == CL_SUCCESS); 05411 05412 int depth = CV_8U; 05413 switch (fmt.image_channel_data_type) 05414 { 05415 case CL_UNORM_INT8: 05416 case CL_UNSIGNED_INT8: 05417 depth = CV_8U; 05418 break; 05419 05420 case CL_SNORM_INT8: 05421 case CL_SIGNED_INT8: 05422 depth = CV_8S; 05423 break; 05424 05425 case CL_UNORM_INT16: 05426 case CL_UNSIGNED_INT16: 05427 depth = CV_16U; 05428 break; 05429 05430 case CL_SNORM_INT16: 05431 case CL_SIGNED_INT16: 05432 depth = CV_16S; 05433 break; 05434 05435 case CL_SIGNED_INT32: 05436 depth = CV_32S; 05437 break; 05438 05439 case CL_FLOAT: 05440 depth = CV_32F; 05441 break; 05442 05443 default: 05444 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type"); 05445 } 05446 05447 int type = CV_8UC1; 05448 switch (fmt.image_channel_order) 05449 { 05450 case CL_R: 05451 type = CV_MAKE_TYPE(depth, 1); 05452 break; 05453 05454 case CL_RGBA: 05455 case CL_BGRA: 05456 case CL_ARGB: 05457 type = CV_MAKE_TYPE(depth, 4); 05458 break; 05459 05460 default: 05461 CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order"); 05462 break; 05463 } 05464 05465 size_t step = 0; 05466 CV_Assert(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0) == CL_SUCCESS); 05467 05468 size_t w = 0; 05469 CV_Assert(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0) == CL_SUCCESS); 05470 05471 size_t h = 0; 05472 CV_Assert(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0) == CL_SUCCESS); 05473 05474 dst.create((int)h, (int)w, type); 05475 05476 cl_mem clBuffer = (cl_mem)dst.handle(ACCESS_READ); 05477 05478 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr(); 05479 05480 size_t offset = 0; 05481 size_t src_origin[3] = { 0, 0, 0 }; 05482 size_t region[3] = { w, h, 1 }; 05483 CV_Assert(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL) == CL_SUCCESS); 05484 05485 CV_Assert(clFinish(q) == CL_SUCCESS); 05486 05487 return; 05488 } // convertFromImage() 05489 05490 05491 ///////////////////////////////////////////// Utility functions ///////////////////////////////////////////////// 05492 05493 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform) 05494 { 05495 cl_uint numDevices = 0; 05496 CV_OclDbgAssert(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 05497 0, NULL, &numDevices) == CL_SUCCESS); 05498 05499 if (numDevices == 0) 05500 { 05501 devices.clear(); 05502 return; 05503 } 05504 05505 devices.resize((size_t)numDevices); 05506 CV_OclDbgAssert(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL, 05507 numDevices, &devices[0], &numDevices) == CL_SUCCESS); 05508 } 05509 05510 struct PlatformInfo::Impl 05511 { 05512 Impl(void* id) 05513 { 05514 refcount = 1; 05515 handle = *(cl_platform_id*)id; 05516 getDevices(devices, handle); 05517 } 05518 05519 String getStrProp(cl_device_info prop) const 05520 { 05521 char buf[1024]; 05522 size_t sz=0; 05523 return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS && 05524 sz < sizeof(buf) ? String(buf) : String(); 05525 } 05526 05527 IMPLEMENT_REFCOUNTABLE(); 05528 std::vector<cl_device_id> devices; 05529 cl_platform_id handle; 05530 }; 05531 05532 PlatformInfo::PlatformInfo() 05533 { 05534 p = 0; 05535 } 05536 05537 PlatformInfo::PlatformInfo(void* platform_id) 05538 { 05539 p = new Impl(platform_id); 05540 } 05541 05542 PlatformInfo::~PlatformInfo() 05543 { 05544 if(p) 05545 p->release(); 05546 } 05547 05548 PlatformInfo::PlatformInfo(const PlatformInfo& i) 05549 { 05550 if (i.p) 05551 i.p->addref(); 05552 p = i.p; 05553 } 05554 05555 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i) 05556 { 05557 if (i.p != p) 05558 { 05559 if (i.p) 05560 i.p->addref(); 05561 if (p) 05562 p->release(); 05563 p = i.p; 05564 } 05565 return *this; 05566 } 05567 05568 int PlatformInfo::deviceNumber() const 05569 { 05570 return p ? (int)p->devices.size() : 0; 05571 } 05572 05573 void PlatformInfo::getDevice(Device& device, int d) const 05574 { 05575 CV_Assert(p && d < (int)p->devices.size() ); 05576 if(p) 05577 device.set(p->devices[d]); 05578 } 05579 05580 String PlatformInfo::name() const 05581 { 05582 return p ? p->getStrProp(CL_PLATFORM_NAME) : String(); 05583 } 05584 05585 String PlatformInfo::vendor() const 05586 { 05587 return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String(); 05588 } 05589 05590 String PlatformInfo::version() const 05591 { 05592 return p ? p->getStrProp(CL_PLATFORM_VERSION) : String(); 05593 } 05594 05595 static void getPlatforms(std::vector<cl_platform_id>& platforms) 05596 { 05597 cl_uint numPlatforms = 0; 05598 CV_OclDbgAssert(clGetPlatformIDs(0, NULL, &numPlatforms) == CL_SUCCESS); 05599 05600 if (numPlatforms == 0) 05601 { 05602 platforms.clear(); 05603 return; 05604 } 05605 05606 platforms.resize((size_t)numPlatforms); 05607 CV_OclDbgAssert(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms) == CL_SUCCESS); 05608 } 05609 05610 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo) 05611 { 05612 std::vector<cl_platform_id> platforms; 05613 getPlatforms(platforms); 05614 05615 for (size_t i = 0; i < platforms.size(); i++) 05616 platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) ); 05617 } 05618 05619 const char* typeToStr(int type) 05620 { 05621 static const char* tab[]= 05622 { 05623 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16", 05624 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16", 05625 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16", 05626 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16", 05627 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16", 05628 "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16", 05629 "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16", 05630 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?" 05631 }; 05632 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type); 05633 return cn > 16 ? "?" : tab[depth*16 + cn-1]; 05634 } 05635 05636 const char* memopTypeToStr(int type) 05637 { 05638 static const char* tab[] = 05639 { 05640 "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16", 05641 "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16", 05642 "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16", 05643 "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16", 05644 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16", 05645 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16", 05646 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16", 05647 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?" 05648 }; 05649 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type); 05650 return cn > 16 ? "?" : tab[depth*16 + cn-1]; 05651 } 05652 05653 const char* vecopTypeToStr(int type) 05654 { 05655 static const char* tab[] = 05656 { 05657 "uchar", "short", "uchar3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4", 05658 "char", "short", "char3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4", 05659 "ushort", "int", "ushort3", "int2",0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8", 05660 "short", "int", "short3", "int2", 0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8", 05661 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16", 05662 "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16", 05663 "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16", 05664 "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?" 05665 }; 05666 int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type); 05667 return cn > 16 ? "?" : tab[depth*16 + cn-1]; 05668 } 05669 05670 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf) 05671 { 05672 if( sdepth == ddepth ) 05673 return "noconvert"; 05674 const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn)); 05675 if( ddepth >= CV_32F || 05676 (ddepth == CV_32S && sdepth < CV_32S) || 05677 (ddepth == CV_16S && sdepth <= CV_8S) || 05678 (ddepth == CV_16U && sdepth == CV_8U)) 05679 { 05680 sprintf(buf, "convert_%s", typestr); 05681 } 05682 else if( sdepth >= CV_32F ) 05683 sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : "")); 05684 else 05685 sprintf(buf, "convert_%s_sat", typestr); 05686 05687 return buf; 05688 } 05689 05690 template <typename T> 05691 static std::string kerToStr(const Mat & k) 05692 { 05693 int width = k.cols - 1, depth = k.depth(); 05694 const T * const data = k.ptr<T>(); 05695 05696 std::ostringstream stream; 05697 stream.precision(10); 05698 05699 if (depth <= CV_8S) 05700 { 05701 for (int i = 0; i < width; ++i) 05702 stream << "DIG(" << (int)data[i] << ")"; 05703 stream << "DIG(" << (int)data[width] << ")"; 05704 } 05705 else if (depth == CV_32F) 05706 { 05707 stream.setf(std::ios_base::showpoint); 05708 for (int i = 0; i < width; ++i) 05709 stream << "DIG(" << data[i] << "f)"; 05710 stream << "DIG(" << data[width] << "f)"; 05711 } 05712 else 05713 { 05714 for (int i = 0; i < width; ++i) 05715 stream << "DIG(" << data[i] << ")"; 05716 stream << "DIG(" << data[width] << ")"; 05717 } 05718 05719 return stream.str(); 05720 } 05721 05722 String kernelToStr(InputArray _kernel, int ddepth, const char * name) 05723 { 05724 Mat kernel = _kernel.getMat().reshape(1, 1); 05725 05726 int depth = kernel.depth(); 05727 if (ddepth < 0) 05728 ddepth = depth; 05729 05730 if (ddepth != depth) 05731 kernel.convertTo(kernel, ddepth); 05732 05733 typedef std::string (* func_t)(const Mat &); 05734 static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>, 05735 kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 }; 05736 const func_t func = funcs[ddepth]; 05737 CV_Assert(func != 0); 05738 05739 return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str()); 05740 } 05741 05742 #define PROCESS_SRC(src) \ 05743 do \ 05744 { \ 05745 if (!src.empty()) \ 05746 { \ 05747 CV_Assert(src.isMat() || src.isUMat()); \ 05748 Size csize = src.size(); \ 05749 int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \ 05750 ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \ 05751 if (cwidth < ckercn || ckercn <= 0) \ 05752 return 1; \ 05753 cols.push_back(cwidth); \ 05754 if (strat == OCL_VECTOR_OWN && ctype != ref_type) \ 05755 return 1; \ 05756 offsets.push_back(src.offset()); \ 05757 steps.push_back(src.step()); \ 05758 dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \ 05759 kercns.push_back(ckercn); \ 05760 } \ 05761 } \ 05762 while ((void)0, 0) 05763 05764 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3, 05765 InputArray src4, InputArray src5, InputArray src6, 05766 InputArray src7, InputArray src8, InputArray src9, 05767 OclVectorStrategy strat) 05768 { 05769 const ocl::Device & d = ocl::Device::getDefault(); 05770 05771 int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(), 05772 d.preferredVectorWidthShort(), d.preferredVectorWidthShort(), 05773 d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(), 05774 d.preferredVectorWidthDouble(), -1 }; 05775 05776 // if the device says don't use vectors 05777 if (vectorWidths[0] == 1) 05778 { 05779 // it's heuristic 05780 vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4; 05781 vectorWidths[CV_16U] = vectorWidths[CV_16S] = 2; 05782 vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1; 05783 } 05784 05785 return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat); 05786 } 05787 05788 int checkOptimalVectorWidth(const int *vectorWidths, 05789 InputArray src1, InputArray src2, InputArray src3, 05790 InputArray src4, InputArray src5, InputArray src6, 05791 InputArray src7, InputArray src8, InputArray src9, 05792 OclVectorStrategy strat) 05793 { 05794 CV_Assert(vectorWidths); 05795 05796 int ref_type = src1.type(); 05797 05798 std::vector<size_t> offsets, steps, cols; 05799 std::vector<int> dividers, kercns; 05800 PROCESS_SRC(src1); 05801 PROCESS_SRC(src2); 05802 PROCESS_SRC(src3); 05803 PROCESS_SRC(src4); 05804 PROCESS_SRC(src5); 05805 PROCESS_SRC(src6); 05806 PROCESS_SRC(src7); 05807 PROCESS_SRC(src8); 05808 PROCESS_SRC(src9); 05809 05810 size_t size = offsets.size(); 05811 05812 for (size_t i = 0; i < size; ++i) 05813 while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0) 05814 dividers[i] >>= 1, kercns[i] >>= 1; 05815 05816 // default strategy 05817 int kercn = *std::min_element(kercns.begin(), kercns.end()); 05818 05819 return kercn; 05820 } 05821 05822 int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3, 05823 InputArray src4, InputArray src5, InputArray src6, 05824 InputArray src7, InputArray src8, InputArray src9) 05825 { 05826 return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, OCL_VECTOR_MAX); 05827 } 05828 05829 #undef PROCESS_SRC 05830 05831 05832 // TODO Make this as a method of OpenCL "BuildOptions" class 05833 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m) 05834 { 05835 if (!buildOptions.empty()) 05836 buildOptions += " "; 05837 int type = _m.type(), depth = CV_MAT_DEPTH(type); 05838 buildOptions += format( 05839 "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d", 05840 name.c_str(), ocl::typeToStr(type), 05841 name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), 05842 name.c_str(), (int)CV_MAT_CN(type), 05843 name.c_str(), (int)CV_ELEM_SIZE(type), 05844 name.c_str(), (int)CV_ELEM_SIZE1(type), 05845 name.c_str(), (int)depth 05846 ); 05847 } 05848 05849 05850 struct Image2D::Impl 05851 { 05852 Impl(const UMat &src, bool norm, bool alias) 05853 { 05854 handle = 0; 05855 refcount = 1; 05856 init(src, norm, alias); 05857 } 05858 05859 ~Impl() 05860 { 05861 if (handle) 05862 clReleaseMemObject(handle); 05863 } 05864 05865 static cl_image_format getImageFormat(int depth, int cn, bool norm) 05866 { 05867 cl_image_format format; 05868 static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16, 05869 CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 }; 05870 static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16, 05871 CL_SNORM_INT16, -1, -1, -1, -1 }; 05872 static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA }; 05873 05874 int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth]; 05875 int channelOrder = channelOrders[cn]; 05876 format.image_channel_data_type = (cl_channel_type)channelType; 05877 format.image_channel_order = (cl_channel_order)channelOrder; 05878 return format; 05879 } 05880 05881 static bool isFormatSupported(cl_image_format format) 05882 { 05883 if (!haveOpenCL()) 05884 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!"); 05885 05886 cl_context context = (cl_context)Context::getDefault().ptr(); 05887 // Figure out how many formats are supported by this context. 05888 cl_uint numFormats = 0; 05889 cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, 05890 CL_MEM_OBJECT_IMAGE2D, numFormats, 05891 NULL, &numFormats); 05892 AutoBuffer<cl_image_format> formats(numFormats); 05893 err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE, 05894 CL_MEM_OBJECT_IMAGE2D, numFormats, 05895 formats, NULL); 05896 CV_OclDbgAssert(err == CL_SUCCESS); 05897 for (cl_uint i = 0; i < numFormats; ++i) 05898 { 05899 if (!memcmp(&formats[i], &format, sizeof(format))) 05900 { 05901 return true; 05902 } 05903 } 05904 return false; 05905 } 05906 05907 void init(const UMat &src, bool norm, bool alias) 05908 { 05909 if (!haveOpenCL()) 05910 CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!"); 05911 05912 CV_Assert(!src.empty()); 05913 CV_Assert(ocl::Device::getDefault().imageSupport()); 05914 05915 int err, depth = src.depth(), cn = src.channels(); 05916 CV_Assert(cn <= 4); 05917 cl_image_format format = getImageFormat(depth, cn, norm); 05918 05919 if (!isFormatSupported(format)) 05920 CV_Error(Error::OpenCLApiCallError, "Image format is not supported"); 05921 05922 if (alias && !src.handle(ACCESS_RW)) 05923 CV_Error(Error::OpenCLApiCallError, "Incorrect UMat, handle is null"); 05924 05925 cl_context context = (cl_context)Context::getDefault().ptr(); 05926 cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr(); 05927 05928 #ifdef CL_VERSION_1_2 05929 // this enables backwards portability to 05930 // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support 05931 const Device & d = ocl::Device::getDefault(); 05932 int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor(); 05933 CV_Assert(!alias || canCreateAlias(src)); 05934 if (1 < major || (1 == major && 2 <= minor)) 05935 { 05936 cl_image_desc desc; 05937 desc.image_type = CL_MEM_OBJECT_IMAGE2D; 05938 desc.image_width = src.cols; 05939 desc.image_height = src.rows; 05940 desc.image_depth = 0; 05941 desc.image_array_size = 1; 05942 desc.image_row_pitch = alias ? src.step[0] : 0; 05943 desc.image_slice_pitch = 0; 05944 desc.buffer = alias ? (cl_mem)src.handle(ACCESS_RW) : 0; 05945 desc.num_mip_levels = 0; 05946 desc.num_samples = 0; 05947 handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err); 05948 } 05949 else 05950 #endif 05951 { 05952 CV_SUPPRESS_DEPRECATED_START 05953 CV_Assert(!alias); // This is an OpenCL 1.2 extension 05954 handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err); 05955 CV_SUPPRESS_DEPRECATED_END 05956 } 05957 CV_OclDbgAssert(err == CL_SUCCESS); 05958 05959 size_t origin[] = { 0, 0, 0 }; 05960 size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 }; 05961 05962 cl_mem devData; 05963 if (!alias && !src.isContinuous()) 05964 { 05965 devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err); 05966 CV_OclDbgAssert(err == CL_SUCCESS); 05967 05968 const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1}; 05969 CV_Assert(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin, 05970 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL) == CL_SUCCESS); 05971 CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS); 05972 } 05973 else 05974 { 05975 devData = (cl_mem)src.handle(ACCESS_READ); 05976 } 05977 CV_Assert(devData != NULL); 05978 05979 if (!alias) 05980 { 05981 CV_OclDbgAssert(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0) == CL_SUCCESS); 05982 if (!src.isContinuous()) 05983 { 05984 CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS); 05985 CV_OclDbgAssert(clReleaseMemObject(devData) == CL_SUCCESS); 05986 } 05987 } 05988 } 05989 05990 IMPLEMENT_REFCOUNTABLE(); 05991 05992 cl_mem handle; 05993 }; 05994 05995 Image2D::Image2D() 05996 { 05997 p = NULL; 05998 } 05999 06000 Image2D::Image2D(const UMat &src, bool norm, bool alias) 06001 { 06002 p = new Impl(src, norm, alias); 06003 } 06004 06005 bool Image2D::canCreateAlias(const UMat &m) 06006 { 06007 bool ret = false; 06008 const Device & d = ocl::Device::getDefault(); 06009 if (d.imageFromBufferSupport() && !m.empty()) 06010 { 06011 // This is the required pitch alignment in pixels 06012 uint pitchAlign = d.imagePitchAlignment(); 06013 if (pitchAlign && !(m.step % (pitchAlign * m.elemSize()))) 06014 { 06015 // We don't currently handle the case where the buffer was created 06016 // with CL_MEM_USE_HOST_PTR 06017 if (!m.u->tempUMat()) 06018 { 06019 ret = true; 06020 } 06021 } 06022 } 06023 return ret; 06024 } 06025 06026 bool Image2D::isFormatSupported(int depth, int cn, bool norm) 06027 { 06028 cl_image_format format = Impl::getImageFormat(depth, cn, norm); 06029 06030 return Impl::isFormatSupported(format); 06031 } 06032 06033 Image2D::Image2D(const Image2D & i) 06034 { 06035 p = i.p; 06036 if (p) 06037 p->addref(); 06038 } 06039 06040 Image2D & Image2D::operator = (const Image2D & i) 06041 { 06042 if (i.p != p) 06043 { 06044 if (i.p) 06045 i.p->addref(); 06046 if (p) 06047 p->release(); 06048 p = i.p; 06049 } 06050 return *this; 06051 } 06052 06053 Image2D::~Image2D() 06054 { 06055 if (p) 06056 p->release(); 06057 } 06058 06059 void* Image2D::ptr() const 06060 { 06061 return p ? p->handle : 0; 06062 } 06063 06064 //CV_EXPORTS bool cv::ocl::internal::isPerformanceCheckBypassed() 06065 //{ 06066 // static bool initialized = false; 06067 // static bool value = false; 06068 // if (!initialized) 06069 // { 06070 // value = getBoolParameter("OPENCV_OPENCL_PERF_CHECK_BYPASS", false); 06071 // initialized = true; 06072 // } 06073 // return value; 06074 //} 06075 06076 //CV_EXPORTS bool ocl::internal::isCLBuffer(UMat& u) 06077 //{ 06078 // void* h = u.handle(ACCESS_RW); 06079 // if (!h) 06080 // return true; 06081 // CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator()); 06082 //#if 1 06083 // if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here 06084 // return false; 06085 //#else 06086 // cl_mem_object_type type = 0; 06087 // cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL); 06088 // if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER) 06089 // return false; 06090 //#endif 06091 // return true; 06092 //} 06093 06094 }} 06095
Generated on Tue Jul 12 2022 14:47:30 by
1.7.2
