Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update

Fork of gr-peach-opencv-project-sd-card by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers ocl.cpp Source File

ocl.cpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
00014 // Third party copyrights are property of their respective owners.
00015 //
00016 // Redistribution and use in source and binary forms, with or without modification,
00017 // are permitted provided that the following conditions are met:
00018 //
00019 //   * Redistribution's of source code must retain the above copyright notice,
00020 //     this list of conditions and the following disclaimer.
00021 //
00022 //   * Redistribution's in binary form must reproduce the above copyright notice,
00023 //     this list of conditions and the following disclaimer in the documentation
00024 //     and/or other materials provided with the distribution.
00025 //
00026 //   * The name of the copyright holders may not be used to endorse or promote products
00027 //     derived from this software without specific prior written permission.
00028 //
00029 // This software is provided by the copyright holders and contributors "as is" and
00030 // any express or implied warranties, including, but not limited to, the implied
00031 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00032 // In no event shall the OpenCV Foundation or contributors be liable for any direct,
00033 // indirect, incidental, special, exemplary, or consequential damages
00034 // (including, but not limited to, procurement of substitute goods or services;
00035 // loss of use, data, or profits; or business interruption) however caused
00036 // and on any theory of liability, whether in contract, strict liability,
00037 // or tort (including negligence or otherwise) arising in any way out of
00038 // the use of this software, even if advised of the possibility of such damage.
00039 //
00040 //M*/
00041 
00042 #include "precomp.hpp"
00043 #include <list>
00044 #include <map>
00045 #include <string>
00046 #include <sstream>
00047 #include <iostream> // std::cerr
00048 #include "opencv2/core/ocl.hpp"
00049 
00050 #define CV_OPENCL_ALWAYS_SHOW_BUILD_LOG 0
00051 #define CV_OPENCL_SHOW_RUN_ERRORS       0
00052 #define CV_OPENCL_SHOW_SVM_ERROR_LOG    1
00053 #define CV_OPENCL_SHOW_SVM_LOG          0
00054 
00055 #include "opencv2/core/bufferpool.hpp"
00056 #ifndef LOG_BUFFER_POOL
00057 # if 0
00058 #   define LOG_BUFFER_POOL printf
00059 # else
00060 #   define LOG_BUFFER_POOL(...)
00061 # endif
00062 #endif
00063 
00064 
00065 // TODO Move to some common place
00066 static bool getBoolParameter(const char* name, bool defaultValue)
00067 {
00068 /*
00069  * If your system doesn't support getenv(), define NO_GETENV to disable
00070  * this feature.
00071  */
00072 #ifdef NO_GETENV
00073     const char* envValue = NULL;
00074 #else
00075     const char* envValue = getenv(name);
00076 #endif
00077     if (envValue == NULL)
00078     {
00079         return defaultValue;
00080     }
00081     cv::String value = envValue;
00082     if (value == "1" || value == "True" || value == "true" || value == "TRUE")
00083     {
00084         return true;
00085     }
00086     if (value == "0" || value == "False" || value == "false" || value == "FALSE")
00087     {
00088         return false;
00089     }
00090     CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
00091 }
00092 
00093 
00094 // TODO Move to some common place
00095 static size_t getConfigurationParameterForSize(const char* name, size_t defaultValue)
00096 {
00097 #ifdef NO_GETENV
00098     const char* envValue = NULL;
00099 #else
00100     const char* envValue = getenv(name);
00101 #endif
00102     if (envValue == NULL)
00103     {
00104         return defaultValue;
00105     }
00106     cv::String value = envValue;
00107     size_t pos = 0;
00108     for (; pos < value.size(); pos++)
00109     {
00110         if (!isdigit(value[pos]))
00111             break;
00112     }
00113     cv::String valueStr = value.substr(0, pos);
00114     cv::String suffixStr = value.substr(pos, value.length() - pos);
00115     int v = atoi(valueStr.c_str());
00116     if (suffixStr.length() == 0)
00117         return v;
00118     else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb")
00119         return v * 1024 * 1024;
00120     else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb")
00121         return v * 1024;
00122     CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
00123 }
00124 
00125 #if CV_OPENCL_SHOW_SVM_LOG
00126 // TODO add timestamp logging
00127 #define CV_OPENCL_SVM_TRACE_P printf("line %d (ocl.cpp): ", __LINE__); printf
00128 #else
00129 #define CV_OPENCL_SVM_TRACE_P(...)
00130 #endif
00131 
00132 #if CV_OPENCL_SHOW_SVM_ERROR_LOG
00133 // TODO add timestamp logging
00134 #define CV_OPENCL_SVM_TRACE_ERROR_P printf("Error on line %d (ocl.cpp): ", __LINE__); printf
00135 #else
00136 #define CV_OPENCL_SVM_TRACE_ERROR_P(...)
00137 #endif
00138 
00139 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
00140 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
00141 
00142 #ifdef HAVE_OPENCL
00143 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
00144 #else
00145 // TODO FIXIT: This file can't be build without OPENCL
00146 
00147 /*
00148   Part of the file is an extract from the standard OpenCL headers from Khronos site.
00149   Below is the original copyright.
00150 */
00151 
00152 /*******************************************************************************
00153  * Copyright (c) 2008 - 2012 The Khronos Group Inc.
00154  *
00155  * Permission is hereby granted, free of charge, to any person obtaining a
00156  * copy of this software and/or associated documentation files (the
00157  * "Materials"), to deal in the Materials without restriction, including
00158  * without limitation the rights to use, copy, modify, merge, publish,
00159  * distribute, sublicense, and/or sell copies of the Materials, and to
00160  * permit persons to whom the Materials are furnished to do so, subject to
00161  * the following conditions:
00162  *
00163  * The above copyright notice and this permission notice shall be included
00164  * in all copies or substantial portions of the Materials.
00165  *
00166  * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00167  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00168  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
00169  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
00170  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00171  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00172  * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
00173  ******************************************************************************/
00174 
00175 #if 0 //defined __APPLE__
00176 #define HAVE_OPENCL 1
00177 #else
00178 #undef HAVE_OPENCL
00179 #endif
00180 
00181 #define OPENCV_CL_NOT_IMPLEMENTED -1000
00182 
00183 #ifdef HAVE_OPENCL
00184 
00185 #if defined __APPLE__
00186 #include <OpenCL/opencl.h>
00187 #else
00188 #include <CL/opencl.h>
00189 #endif
00190 
00191 static const bool g_haveOpenCL = true;
00192 
00193 #else
00194 
00195 extern "C" {
00196 
00197 struct _cl_platform_id { int dummy; };
00198 struct _cl_device_id { int dummy; };
00199 struct _cl_context { int dummy; };
00200 struct _cl_command_queue { int dummy; };
00201 struct _cl_mem { int dummy; };
00202 struct _cl_program { int dummy; };
00203 struct _cl_kernel { int dummy; };
00204 struct _cl_event { int dummy; };
00205 struct _cl_sampler { int dummy; };
00206 
00207 typedef struct _cl_platform_id *    cl_platform_id;
00208 typedef struct _cl_device_id *      cl_device_id;
00209 typedef struct _cl_context *        cl_context;
00210 typedef struct _cl_command_queue *  cl_command_queue;
00211 typedef struct _cl_mem *            cl_mem;
00212 typedef struct _cl_program *        cl_program;
00213 typedef struct _cl_kernel *         cl_kernel;
00214 typedef struct _cl_event *          cl_event;
00215 typedef struct _cl_sampler *        cl_sampler;
00216 
00217 typedef int cl_int;
00218 typedef unsigned cl_uint;
00219 #if defined (_WIN32) && defined(_MSC_VER)
00220     typedef __int64 cl_long;
00221     typedef unsigned __int64 cl_ulong;
00222 #else
00223     typedef long cl_long;
00224     typedef unsigned long cl_ulong;
00225 #endif
00226 
00227 typedef cl_uint             cl_bool; /* WARNING!  Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
00228 typedef cl_ulong            cl_bitfield;
00229 typedef cl_bitfield         cl_device_type;
00230 typedef cl_uint             cl_platform_info;
00231 typedef cl_uint             cl_device_info;
00232 typedef cl_bitfield         cl_device_fp_config;
00233 typedef cl_uint             cl_device_mem_cache_type;
00234 typedef cl_uint             cl_device_local_mem_type;
00235 typedef cl_bitfield         cl_device_exec_capabilities;
00236 typedef cl_bitfield         cl_command_queue_properties;
00237 typedef intptr_t            cl_device_partition_property;
00238 typedef cl_bitfield         cl_device_affinity_domain;
00239 
00240 typedef intptr_t            cl_context_properties;
00241 typedef cl_uint             cl_context_info;
00242 typedef cl_uint             cl_command_queue_info;
00243 typedef cl_uint             cl_channel_order;
00244 typedef cl_uint             cl_channel_type;
00245 typedef cl_bitfield         cl_mem_flags;
00246 typedef cl_uint             cl_mem_object_type;
00247 typedef cl_uint             cl_mem_info;
00248 typedef cl_bitfield         cl_mem_migration_flags;
00249 typedef cl_uint             cl_image_info;
00250 typedef cl_uint             cl_buffer_create_type;
00251 typedef cl_uint             cl_addressing_mode;
00252 typedef cl_uint             cl_filter_mode;
00253 typedef cl_uint             cl_sampler_info;
00254 typedef cl_bitfield         cl_map_flags;
00255 typedef cl_uint             cl_program_info;
00256 typedef cl_uint             cl_program_build_info;
00257 typedef cl_uint             cl_program_binary_type;
00258 typedef cl_int              cl_build_status;
00259 typedef cl_uint             cl_kernel_info;
00260 typedef cl_uint             cl_kernel_arg_info;
00261 typedef cl_uint             cl_kernel_arg_address_qualifier;
00262 typedef cl_uint             cl_kernel_arg_access_qualifier;
00263 typedef cl_bitfield         cl_kernel_arg_type_qualifier;
00264 typedef cl_uint             cl_kernel_work_group_info;
00265 typedef cl_uint             cl_event_info;
00266 typedef cl_uint             cl_command_type;
00267 typedef cl_uint             cl_profiling_info;
00268 
00269 
00270 typedef struct _cl_image_format {
00271     cl_channel_order        image_channel_order;
00272     cl_channel_type         image_channel_data_type;
00273 } cl_image_format;
00274 
00275 typedef struct _cl_image_desc {
00276     cl_mem_object_type      image_type;
00277     size_t                  image_width;
00278     size_t                  image_height;
00279     size_t                  image_depth;
00280     size_t                  image_array_size;
00281     size_t                  image_row_pitch;
00282     size_t                  image_slice_pitch;
00283     cl_uint                 num_mip_levels;
00284     cl_uint                 num_samples;
00285     cl_mem                  buffer;
00286 } cl_image_desc;
00287 
00288 typedef struct _cl_buffer_region {
00289     size_t                  origin;
00290     size_t                  size;
00291 } cl_buffer_region;
00292 
00293 
00294 //////////////////////////////////////////////////////////
00295 
00296 #define CL_SUCCESS                                  0
00297 #define CL_DEVICE_NOT_FOUND                         -1
00298 #define CL_DEVICE_NOT_AVAILABLE                     -2
00299 #define CL_COMPILER_NOT_AVAILABLE                   -3
00300 #define CL_MEM_OBJECT_ALLOCATION_FAILURE            -4
00301 #define CL_OUT_OF_RESOURCES                         -5
00302 #define CL_OUT_OF_HOST_MEMORY                       -6
00303 #define CL_PROFILING_INFO_NOT_AVAILABLE             -7
00304 #define CL_MEM_COPY_OVERLAP                         -8
00305 #define CL_IMAGE_FORMAT_MISMATCH                    -9
00306 #define CL_IMAGE_FORMAT_NOT_SUPPORTED               -10
00307 #define CL_BUILD_PROGRAM_FAILURE                    -11
00308 #define CL_MAP_FAILURE                              -12
00309 #define CL_MISALIGNED_SUB_BUFFER_OFFSET             -13
00310 #define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
00311 #define CL_COMPILE_PROGRAM_FAILURE                  -15
00312 #define CL_LINKER_NOT_AVAILABLE                     -16
00313 #define CL_LINK_PROGRAM_FAILURE                     -17
00314 #define CL_DEVICE_PARTITION_FAILED                  -18
00315 #define CL_KERNEL_ARG_INFO_NOT_AVAILABLE            -19
00316 
00317 #define CL_INVALID_VALUE                            -30
00318 #define CL_INVALID_DEVICE_TYPE                      -31
00319 #define CL_INVALID_PLATFORM                         -32
00320 #define CL_INVALID_DEVICE                           -33
00321 #define CL_INVALID_CONTEXT                          -34
00322 #define CL_INVALID_QUEUE_PROPERTIES                 -35
00323 #define CL_INVALID_COMMAND_QUEUE                    -36
00324 #define CL_INVALID_HOST_PTR                         -37
00325 #define CL_INVALID_MEM_OBJECT                       -38
00326 #define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR          -39
00327 #define CL_INVALID_IMAGE_SIZE                       -40
00328 #define CL_INVALID_SAMPLER                          -41
00329 #define CL_INVALID_BINARY                           -42
00330 #define CL_INVALID_BUILD_OPTIONS                    -43
00331 #define CL_INVALID_PROGRAM                          -44
00332 #define CL_INVALID_PROGRAM_EXECUTABLE               -45
00333 #define CL_INVALID_KERNEL_NAME                      -46
00334 #define CL_INVALID_KERNEL_DEFINITION                -47
00335 #define CL_INVALID_KERNEL                           -48
00336 #define CL_INVALID_ARG_INDEX                        -49
00337 #define CL_INVALID_ARG_VALUE                        -50
00338 #define CL_INVALID_ARG_SIZE                         -51
00339 #define CL_INVALID_KERNEL_ARGS                      -52
00340 #define CL_INVALID_WORK_DIMENSION                   -53
00341 #define CL_INVALID_WORK_GROUP_SIZE                  -54
00342 #define CL_INVALID_WORK_ITEM_SIZE                   -55
00343 #define CL_INVALID_GLOBAL_OFFSET                    -56
00344 #define CL_INVALID_EVENT_WAIT_LIST                  -57
00345 #define CL_INVALID_EVENT                            -58
00346 #define CL_INVALID_OPERATION                        -59
00347 #define CL_INVALID_GL_OBJECT                        -60
00348 #define CL_INVALID_BUFFER_SIZE                      -61
00349 #define CL_INVALID_MIP_LEVEL                        -62
00350 #define CL_INVALID_GLOBAL_WORK_SIZE                 -63
00351 #define CL_INVALID_PROPERTY                         -64
00352 #define CL_INVALID_IMAGE_DESCRIPTOR                 -65
00353 #define CL_INVALID_COMPILER_OPTIONS                 -66
00354 #define CL_INVALID_LINKER_OPTIONS                   -67
00355 #define CL_INVALID_DEVICE_PARTITION_COUNT           -68
00356 
00357 /*#define CL_VERSION_1_0                              1
00358 #define CL_VERSION_1_1                              1
00359 #define CL_VERSION_1_2                              1*/
00360 
00361 #define CL_FALSE                                    0
00362 #define CL_TRUE                                     1
00363 #define CL_BLOCKING                                 CL_TRUE
00364 #define CL_NON_BLOCKING                             CL_FALSE
00365 
00366 #define CL_PLATFORM_PROFILE                         0x0900
00367 #define CL_PLATFORM_VERSION                         0x0901
00368 #define CL_PLATFORM_NAME                            0x0902
00369 #define CL_PLATFORM_VENDOR                          0x0903
00370 #define CL_PLATFORM_EXTENSIONS                      0x0904
00371 
00372 #define CL_DEVICE_TYPE_DEFAULT                      (1 << 0)
00373 #define CL_DEVICE_TYPE_CPU                          (1 << 1)
00374 #define CL_DEVICE_TYPE_GPU                          (1 << 2)
00375 #define CL_DEVICE_TYPE_ACCELERATOR                  (1 << 3)
00376 #define CL_DEVICE_TYPE_CUSTOM                       (1 << 4)
00377 #define CL_DEVICE_TYPE_ALL                          0xFFFFFFFF
00378 #define CL_DEVICE_TYPE                              0x1000
00379 #define CL_DEVICE_VENDOR_ID                         0x1001
00380 #define CL_DEVICE_MAX_COMPUTE_UNITS                 0x1002
00381 #define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS          0x1003
00382 #define CL_DEVICE_MAX_WORK_GROUP_SIZE               0x1004
00383 #define CL_DEVICE_MAX_WORK_ITEM_SIZES               0x1005
00384 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR       0x1006
00385 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT      0x1007
00386 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT        0x1008
00387 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG       0x1009
00388 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT      0x100A
00389 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE     0x100B
00390 #define CL_DEVICE_MAX_CLOCK_FREQUENCY               0x100C
00391 #define CL_DEVICE_ADDRESS_BITS                      0x100D
00392 #define CL_DEVICE_MAX_READ_IMAGE_ARGS               0x100E
00393 #define CL_DEVICE_MAX_WRITE_IMAGE_ARGS              0x100F
00394 #define CL_DEVICE_MAX_MEM_ALLOC_SIZE                0x1010
00395 #define CL_DEVICE_IMAGE2D_MAX_WIDTH                 0x1011
00396 #define CL_DEVICE_IMAGE2D_MAX_HEIGHT                0x1012
00397 #define CL_DEVICE_IMAGE3D_MAX_WIDTH                 0x1013
00398 #define CL_DEVICE_IMAGE3D_MAX_HEIGHT                0x1014
00399 #define CL_DEVICE_IMAGE3D_MAX_DEPTH                 0x1015
00400 #define CL_DEVICE_IMAGE_SUPPORT                     0x1016
00401 #define CL_DEVICE_MAX_PARAMETER_SIZE                0x1017
00402 #define CL_DEVICE_MAX_SAMPLERS                      0x1018
00403 #define CL_DEVICE_MEM_BASE_ADDR_ALIGN               0x1019
00404 #define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE          0x101A
00405 #define CL_DEVICE_SINGLE_FP_CONFIG                  0x101B
00406 #define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE             0x101C
00407 #define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE         0x101D
00408 #define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE             0x101E
00409 #define CL_DEVICE_GLOBAL_MEM_SIZE                   0x101F
00410 #define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE          0x1020
00411 #define CL_DEVICE_MAX_CONSTANT_ARGS                 0x1021
00412 #define CL_DEVICE_LOCAL_MEM_TYPE                    0x1022
00413 #define CL_DEVICE_LOCAL_MEM_SIZE                    0x1023
00414 #define CL_DEVICE_ERROR_CORRECTION_SUPPORT          0x1024
00415 #define CL_DEVICE_PROFILING_TIMER_RESOLUTION        0x1025
00416 #define CL_DEVICE_ENDIAN_LITTLE                     0x1026
00417 #define CL_DEVICE_AVAILABLE                         0x1027
00418 #define CL_DEVICE_COMPILER_AVAILABLE                0x1028
00419 #define CL_DEVICE_EXECUTION_CAPABILITIES            0x1029
00420 #define CL_DEVICE_QUEUE_PROPERTIES                  0x102A
00421 #define CL_DEVICE_NAME                              0x102B
00422 #define CL_DEVICE_VENDOR                            0x102C
00423 #define CL_DRIVER_VERSION                           0x102D
00424 #define CL_DEVICE_PROFILE                           0x102E
00425 #define CL_DEVICE_VERSION                           0x102F
00426 #define CL_DEVICE_EXTENSIONS                        0x1030
00427 #define CL_DEVICE_PLATFORM                          0x1031
00428 #define CL_DEVICE_DOUBLE_FP_CONFIG                  0x1032
00429 #define CL_DEVICE_HALF_FP_CONFIG                    0x1033
00430 #define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF       0x1034
00431 #define CL_DEVICE_HOST_UNIFIED_MEMORY               0x1035
00432 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR          0x1036
00433 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT         0x1037
00434 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT           0x1038
00435 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG          0x1039
00436 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT         0x103A
00437 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE        0x103B
00438 #define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF          0x103C
00439 #define CL_DEVICE_OPENCL_C_VERSION                  0x103D
00440 #define CL_DEVICE_LINKER_AVAILABLE                  0x103E
00441 #define CL_DEVICE_BUILT_IN_KERNELS                  0x103F
00442 #define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE             0x1040
00443 #define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE              0x1041
00444 #define CL_DEVICE_PARENT_DEVICE                     0x1042
00445 #define CL_DEVICE_PARTITION_MAX_SUB_DEVICES         0x1043
00446 #define CL_DEVICE_PARTITION_PROPERTIES              0x1044
00447 #define CL_DEVICE_PARTITION_AFFINITY_DOMAIN         0x1045
00448 #define CL_DEVICE_PARTITION_TYPE                    0x1046
00449 #define CL_DEVICE_REFERENCE_COUNT                   0x1047
00450 #define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC       0x1048
00451 #define CL_DEVICE_PRINTF_BUFFER_SIZE                0x1049
00452 #define CL_DEVICE_IMAGE_PITCH_ALIGNMENT             0x104A
00453 #define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT      0x104B
00454 
00455 #define CL_FP_DENORM                                (1 << 0)
00456 #define CL_FP_INF_NAN                               (1 << 1)
00457 #define CL_FP_ROUND_TO_NEAREST                      (1 << 2)
00458 #define CL_FP_ROUND_TO_ZERO                         (1 << 3)
00459 #define CL_FP_ROUND_TO_INF                          (1 << 4)
00460 #define CL_FP_FMA                                   (1 << 5)
00461 #define CL_FP_SOFT_FLOAT                            (1 << 6)
00462 #define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT         (1 << 7)
00463 
00464 #define CL_NONE                                     0x0
00465 #define CL_READ_ONLY_CACHE                          0x1
00466 #define CL_READ_WRITE_CACHE                         0x2
00467 #define CL_LOCAL                                    0x1
00468 #define CL_GLOBAL                                   0x2
00469 #define CL_EXEC_KERNEL                              (1 << 0)
00470 #define CL_EXEC_NATIVE_KERNEL                       (1 << 1)
00471 #define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE      (1 << 0)
00472 #define CL_QUEUE_PROFILING_ENABLE                   (1 << 1)
00473 
00474 #define CL_CONTEXT_REFERENCE_COUNT                  0x1080
00475 #define CL_CONTEXT_DEVICES                          0x1081
00476 #define CL_CONTEXT_PROPERTIES                       0x1082
00477 #define CL_CONTEXT_NUM_DEVICES                      0x1083
00478 #define CL_CONTEXT_PLATFORM                         0x1084
00479 #define CL_CONTEXT_INTEROP_USER_SYNC                0x1085
00480 
00481 #define CL_DEVICE_PARTITION_EQUALLY                 0x1086
00482 #define CL_DEVICE_PARTITION_BY_COUNTS               0x1087
00483 #define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END      0x0
00484 #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN      0x1088
00485 #define CL_DEVICE_AFFINITY_DOMAIN_NUMA                     (1 << 0)
00486 #define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE                 (1 << 1)
00487 #define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE                 (1 << 2)
00488 #define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE                 (1 << 3)
00489 #define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE                 (1 << 4)
00490 #define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE       (1 << 5)
00491 #define CL_QUEUE_CONTEXT                            0x1090
00492 #define CL_QUEUE_DEVICE                             0x1091
00493 #define CL_QUEUE_REFERENCE_COUNT                    0x1092
00494 #define CL_QUEUE_PROPERTIES                         0x1093
00495 #define CL_MEM_READ_WRITE                           (1 << 0)
00496 #define CL_MEM_WRITE_ONLY                           (1 << 1)
00497 #define CL_MEM_READ_ONLY                            (1 << 2)
00498 #define CL_MEM_USE_HOST_PTR                         (1 << 3)
00499 #define CL_MEM_ALLOC_HOST_PTR                       (1 << 4)
00500 #define CL_MEM_COPY_HOST_PTR                        (1 << 5)
00501 // reserved                                         (1 << 6)
00502 #define CL_MEM_HOST_WRITE_ONLY                      (1 << 7)
00503 #define CL_MEM_HOST_READ_ONLY                       (1 << 8)
00504 #define CL_MEM_HOST_NO_ACCESS                       (1 << 9)
00505 #define CL_MIGRATE_MEM_OBJECT_HOST                  (1 << 0)
00506 #define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED     (1 << 1)
00507 
00508 #define CL_R                                        0x10B0
00509 #define CL_A                                        0x10B1
00510 #define CL_RG                                       0x10B2
00511 #define CL_RA                                       0x10B3
00512 #define CL_RGB                                      0x10B4
00513 #define CL_RGBA                                     0x10B5
00514 #define CL_BGRA                                     0x10B6
00515 #define CL_ARGB                                     0x10B7
00516 #define CL_INTENSITY                                0x10B8
00517 #define CL_LUMINANCE                                0x10B9
00518 #define CL_Rx                                       0x10BA
00519 #define CL_RGx                                      0x10BB
00520 #define CL_RGBx                                     0x10BC
00521 #define CL_DEPTH                                    0x10BD
00522 #define CL_DEPTH_STENCIL                            0x10BE
00523 
00524 #define CL_SNORM_INT8                               0x10D0
00525 #define CL_SNORM_INT16                              0x10D1
00526 #define CL_UNORM_INT8                               0x10D2
00527 #define CL_UNORM_INT16                              0x10D3
00528 #define CL_UNORM_SHORT_565                          0x10D4
00529 #define CL_UNORM_SHORT_555                          0x10D5
00530 #define CL_UNORM_INT_101010                         0x10D6
00531 #define CL_SIGNED_INT8                              0x10D7
00532 #define CL_SIGNED_INT16                             0x10D8
00533 #define CL_SIGNED_INT32                             0x10D9
00534 #define CL_UNSIGNED_INT8                            0x10DA
00535 #define CL_UNSIGNED_INT16                           0x10DB
00536 #define CL_UNSIGNED_INT32                           0x10DC
00537 #define CL_HALF_FLOAT                               0x10DD
00538 #define CL_FLOAT                                    0x10DE
00539 #define CL_UNORM_INT24                              0x10DF
00540 
00541 #define CL_MEM_OBJECT_BUFFER                        0x10F0
00542 #define CL_MEM_OBJECT_IMAGE2D                       0x10F1
00543 #define CL_MEM_OBJECT_IMAGE3D                       0x10F2
00544 #define CL_MEM_OBJECT_IMAGE2D_ARRAY                 0x10F3
00545 #define CL_MEM_OBJECT_IMAGE1D                       0x10F4
00546 #define CL_MEM_OBJECT_IMAGE1D_ARRAY                 0x10F5
00547 #define CL_MEM_OBJECT_IMAGE1D_BUFFER                0x10F6
00548 
00549 #define CL_MEM_TYPE                                 0x1100
00550 #define CL_MEM_FLAGS                                0x1101
00551 #define CL_MEM_SIZE                                 0x1102
00552 #define CL_MEM_HOST_PTR                             0x1103
00553 #define CL_MEM_MAP_COUNT                            0x1104
00554 #define CL_MEM_REFERENCE_COUNT                      0x1105
00555 #define CL_MEM_CONTEXT                              0x1106
00556 #define CL_MEM_ASSOCIATED_MEMOBJECT                 0x1107
00557 #define CL_MEM_OFFSET                               0x1108
00558 
00559 #define CL_IMAGE_FORMAT                             0x1110
00560 #define CL_IMAGE_ELEMENT_SIZE                       0x1111
00561 #define CL_IMAGE_ROW_PITCH                          0x1112
00562 #define CL_IMAGE_SLICE_PITCH                        0x1113
00563 #define CL_IMAGE_WIDTH                              0x1114
00564 #define CL_IMAGE_HEIGHT                             0x1115
00565 #define CL_IMAGE_DEPTH                              0x1116
00566 #define CL_IMAGE_ARRAY_SIZE                         0x1117
00567 #define CL_IMAGE_BUFFER                             0x1118
00568 #define CL_IMAGE_NUM_MIP_LEVELS                     0x1119
00569 #define CL_IMAGE_NUM_SAMPLES                        0x111A
00570 
00571 #define CL_ADDRESS_NONE                             0x1130
00572 #define CL_ADDRESS_CLAMP_TO_EDGE                    0x1131
00573 #define CL_ADDRESS_CLAMP                            0x1132
00574 #define CL_ADDRESS_REPEAT                           0x1133
00575 #define CL_ADDRESS_MIRRORED_REPEAT                  0x1134
00576 
00577 #define CL_FILTER_NEAREST                           0x1140
00578 #define CL_FILTER_LINEAR                            0x1141
00579 
00580 #define CL_SAMPLER_REFERENCE_COUNT                  0x1150
00581 #define CL_SAMPLER_CONTEXT                          0x1151
00582 #define CL_SAMPLER_NORMALIZED_COORDS                0x1152
00583 #define CL_SAMPLER_ADDRESSING_MODE                  0x1153
00584 #define CL_SAMPLER_FILTER_MODE                      0x1154
00585 
00586 #define CL_MAP_READ                                 (1 << 0)
00587 #define CL_MAP_WRITE                                (1 << 1)
00588 #define CL_MAP_WRITE_INVALIDATE_REGION              (1 << 2)
00589 
00590 #define CL_PROGRAM_REFERENCE_COUNT                  0x1160
00591 #define CL_PROGRAM_CONTEXT                          0x1161
00592 #define CL_PROGRAM_NUM_DEVICES                      0x1162
00593 #define CL_PROGRAM_DEVICES                          0x1163
00594 #define CL_PROGRAM_SOURCE                           0x1164
00595 #define CL_PROGRAM_BINARY_SIZES                     0x1165
00596 #define CL_PROGRAM_BINARIES                         0x1166
00597 #define CL_PROGRAM_NUM_KERNELS                      0x1167
00598 #define CL_PROGRAM_KERNEL_NAMES                     0x1168
00599 #define CL_PROGRAM_BUILD_STATUS                     0x1181
00600 #define CL_PROGRAM_BUILD_OPTIONS                    0x1182
00601 #define CL_PROGRAM_BUILD_LOG                        0x1183
00602 #define CL_PROGRAM_BINARY_TYPE                      0x1184
00603 #define CL_PROGRAM_BINARY_TYPE_NONE                 0x0
00604 #define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT      0x1
00605 #define CL_PROGRAM_BINARY_TYPE_LIBRARY              0x2
00606 #define CL_PROGRAM_BINARY_TYPE_EXECUTABLE           0x4
00607 
00608 #define CL_BUILD_SUCCESS                            0
00609 #define CL_BUILD_NONE                               -1
00610 #define CL_BUILD_ERROR                              -2
00611 #define CL_BUILD_IN_PROGRESS                        -3
00612 
00613 #define CL_KERNEL_FUNCTION_NAME                     0x1190
00614 #define CL_KERNEL_NUM_ARGS                          0x1191
00615 #define CL_KERNEL_REFERENCE_COUNT                   0x1192
00616 #define CL_KERNEL_CONTEXT                           0x1193
00617 #define CL_KERNEL_PROGRAM                           0x1194
00618 #define CL_KERNEL_ATTRIBUTES                        0x1195
00619 #define CL_KERNEL_ARG_ADDRESS_QUALIFIER             0x1196
00620 #define CL_KERNEL_ARG_ACCESS_QUALIFIER              0x1197
00621 #define CL_KERNEL_ARG_TYPE_NAME                     0x1198
00622 #define CL_KERNEL_ARG_TYPE_QUALIFIER                0x1199
00623 #define CL_KERNEL_ARG_NAME                          0x119A
00624 #define CL_KERNEL_ARG_ADDRESS_GLOBAL                0x119B
00625 #define CL_KERNEL_ARG_ADDRESS_LOCAL                 0x119C
00626 #define CL_KERNEL_ARG_ADDRESS_CONSTANT              0x119D
00627 #define CL_KERNEL_ARG_ADDRESS_PRIVATE               0x119E
00628 #define CL_KERNEL_ARG_ACCESS_READ_ONLY              0x11A0
00629 #define CL_KERNEL_ARG_ACCESS_WRITE_ONLY             0x11A1
00630 #define CL_KERNEL_ARG_ACCESS_READ_WRITE             0x11A2
00631 #define CL_KERNEL_ARG_ACCESS_NONE                   0x11A3
00632 #define CL_KERNEL_ARG_TYPE_NONE                     0
00633 #define CL_KERNEL_ARG_TYPE_CONST                    (1 << 0)
00634 #define CL_KERNEL_ARG_TYPE_RESTRICT                 (1 << 1)
00635 #define CL_KERNEL_ARG_TYPE_VOLATILE                 (1 << 2)
00636 #define CL_KERNEL_WORK_GROUP_SIZE                   0x11B0
00637 #define CL_KERNEL_COMPILE_WORK_GROUP_SIZE           0x11B1
00638 #define CL_KERNEL_LOCAL_MEM_SIZE                    0x11B2
00639 #define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
00640 #define CL_KERNEL_PRIVATE_MEM_SIZE                  0x11B4
00641 #define CL_KERNEL_GLOBAL_WORK_SIZE                  0x11B5
00642 
00643 #define CL_EVENT_COMMAND_QUEUE                      0x11D0
00644 #define CL_EVENT_COMMAND_TYPE                       0x11D1
00645 #define CL_EVENT_REFERENCE_COUNT                    0x11D2
00646 #define CL_EVENT_COMMAND_EXECUTION_STATUS           0x11D3
00647 #define CL_EVENT_CONTEXT                            0x11D4
00648 
00649 #define CL_COMMAND_NDRANGE_KERNEL                   0x11F0
00650 #define CL_COMMAND_TASK                             0x11F1
00651 #define CL_COMMAND_NATIVE_KERNEL                    0x11F2
00652 #define CL_COMMAND_READ_BUFFER                      0x11F3
00653 #define CL_COMMAND_WRITE_BUFFER                     0x11F4
00654 #define CL_COMMAND_COPY_BUFFER                      0x11F5
00655 #define CL_COMMAND_READ_IMAGE                       0x11F6
00656 #define CL_COMMAND_WRITE_IMAGE                      0x11F7
00657 #define CL_COMMAND_COPY_IMAGE                       0x11F8
00658 #define CL_COMMAND_COPY_IMAGE_TO_BUFFER             0x11F9
00659 #define CL_COMMAND_COPY_BUFFER_TO_IMAGE             0x11FA
00660 #define CL_COMMAND_MAP_BUFFER                       0x11FB
00661 #define CL_COMMAND_MAP_IMAGE                        0x11FC
00662 #define CL_COMMAND_UNMAP_MEM_OBJECT                 0x11FD
00663 #define CL_COMMAND_MARKER                           0x11FE
00664 #define CL_COMMAND_ACQUIRE_GL_OBJECTS               0x11FF
00665 #define CL_COMMAND_RELEASE_GL_OBJECTS               0x1200
00666 #define CL_COMMAND_READ_BUFFER_RECT                 0x1201
00667 #define CL_COMMAND_WRITE_BUFFER_RECT                0x1202
00668 #define CL_COMMAND_COPY_BUFFER_RECT                 0x1203
00669 #define CL_COMMAND_USER                             0x1204
00670 #define CL_COMMAND_BARRIER                          0x1205
00671 #define CL_COMMAND_MIGRATE_MEM_OBJECTS              0x1206
00672 #define CL_COMMAND_FILL_BUFFER                      0x1207
00673 #define CL_COMMAND_FILL_IMAGE                       0x1208
00674 
00675 #define CL_COMPLETE                                 0x0
00676 #define CL_RUNNING                                  0x1
00677 #define CL_SUBMITTED                                0x2
00678 #define CL_QUEUED                                   0x3
00679 #define CL_BUFFER_CREATE_TYPE_REGION                0x1220
00680 
00681 #define CL_PROFILING_COMMAND_QUEUED                 0x1280
00682 #define CL_PROFILING_COMMAND_SUBMIT                 0x1281
00683 #define CL_PROFILING_COMMAND_START                  0x1282
00684 #define CL_PROFILING_COMMAND_END                    0x1283
00685 
00686 #define CL_CALLBACK CV_STDCALL
00687 
00688 static volatile bool g_haveOpenCL = false;
00689 static const char* oclFuncToCheck = "clEnqueueReadBufferRect";
00690 
00691 #if defined(__APPLE__)
00692 #include <dlfcn.h>
00693 
00694 static void* initOpenCLAndLoad(const char* funcname)
00695 {
00696     static bool initialized = false;
00697     static void* handle = 0;
00698     if (!handle)
00699     {
00700         if(!initialized)
00701         {
00702             const char* oclpath = getenv("OPENCV_OPENCL_RUNTIME");
00703             oclpath = oclpath && strlen(oclpath) > 0 ? oclpath :
00704                 "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL";
00705             handle = dlopen(oclpath, RTLD_LAZY);
00706             initialized = true;
00707             g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
00708             if( g_haveOpenCL )
00709                 fprintf(stderr, "Successfully loaded OpenCL v1.1+ runtime from %s\n", oclpath);
00710             else
00711                 fprintf(stderr, "Failed to load OpenCL runtime\n");
00712         }
00713         if(!handle)
00714             return 0;
00715     }
00716 
00717     return funcname && handle ? dlsym(handle, funcname) : 0;
00718 }
00719 
00720 #elif defined WIN32 || defined _WIN32
00721 
00722 #ifndef _WIN32_WINNT           // This is needed for the declaration of TryEnterCriticalSection in winbase.h with Visual Studio 2005 (and older?)
00723   #define _WIN32_WINNT 0x0400  // http://msdn.microsoft.com/en-us/library/ms686857(VS.85).aspx
00724 #endif
00725 #include <windows.h>
00726 #if (_WIN32_WINNT >= 0x0602)
00727   #include <synchapi.h>
00728 #endif
00729 #undef small
00730 #undef min
00731 #undef max
00732 #undef abs
00733 
00734 static void* initOpenCLAndLoad(const char* funcname)
00735 {
00736     static bool initialized = false;
00737     static HMODULE handle = 0;
00738     if (!handle)
00739     {
00740 #ifndef WINRT
00741         if(!initialized)
00742         {
00743             handle = LoadLibraryA("OpenCL.dll");
00744             initialized = true;
00745             g_haveOpenCL = handle != 0 && GetProcAddress(handle, oclFuncToCheck) != 0;
00746         }
00747 #endif
00748         if(!handle)
00749             return 0;
00750     }
00751 
00752     return funcname ? (void*)GetProcAddress(handle, funcname) : 0;
00753 }
00754 
00755 #elif defined(__linux)
00756 
00757 #include <dlfcn.h>
00758 #include <stdio.h>
00759 
00760 static void* initOpenCLAndLoad(const char* funcname)
00761 {
00762     static bool initialized = false;
00763     static void* handle = 0;
00764     if (!handle)
00765     {
00766         if(!initialized)
00767         {
00768             handle = dlopen("libOpenCL.so", RTLD_LAZY);
00769             if(!handle)
00770                 handle = dlopen("libCL.so", RTLD_LAZY);
00771             initialized = true;
00772             g_haveOpenCL = handle != 0 && dlsym(handle, oclFuncToCheck) != 0;
00773         }
00774         if(!handle)
00775             return 0;
00776     }
00777 
00778     return funcname ? (void*)dlsym(handle, funcname) : 0;
00779 }
00780 
00781 #else
00782 
00783 static void* initOpenCLAndLoad(const char*)
00784 {
00785     return 0;
00786 }
00787 
00788 #endif
00789 
00790 
00791 #define OCL_FUNC(rettype, funcname, argsdecl, args) \
00792     typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
00793     static rettype funcname argsdecl \
00794     { \
00795         static funcname##_t funcname##_p = 0; \
00796         if( !funcname##_p ) \
00797         { \
00798             funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
00799             if( !funcname##_p ) \
00800                 return OPENCV_CL_NOT_IMPLEMENTED; \
00801         } \
00802         return funcname##_p args; \
00803     }
00804 
00805 
00806 #define OCL_FUNC_P(rettype, funcname, argsdecl, args) \
00807     typedef rettype (CV_STDCALL * funcname##_t) argsdecl; \
00808     static rettype funcname argsdecl \
00809     { \
00810         static funcname##_t funcname##_p = 0; \
00811         if( !funcname##_p ) \
00812         { \
00813             funcname##_p = (funcname##_t)initOpenCLAndLoad(#funcname); \
00814             if( !funcname##_p ) \
00815             { \
00816                 if( errcode_ret ) \
00817                     *errcode_ret = OPENCV_CL_NOT_IMPLEMENTED; \
00818                 return 0; \
00819             } \
00820         } \
00821         return funcname##_p args; \
00822     }
00823 
00824 OCL_FUNC(cl_int, clGetPlatformIDs,
00825     (cl_uint num_entries, cl_platform_id* platforms, cl_uint* num_platforms),
00826     (num_entries, platforms, num_platforms))
00827 
00828 OCL_FUNC(cl_int, clGetPlatformInfo,
00829     (cl_platform_id platform, cl_platform_info param_name,
00830     size_t param_value_size, void * param_value,
00831     size_t * param_value_size_ret),
00832     (platform, param_name, param_value_size, param_value, param_value_size_ret))
00833 
00834 OCL_FUNC(cl_int, clGetDeviceInfo,
00835          (cl_device_id device,
00836           cl_device_info param_name,
00837           size_t param_value_size,
00838           void * param_value,
00839           size_t * param_value_size_ret),
00840          (device, param_name, param_value_size, param_value, param_value_size_ret))
00841 
00842 
00843 OCL_FUNC(cl_int, clGetDeviceIDs,
00844     (cl_platform_id platform,
00845     cl_device_type device_type,
00846     cl_uint num_entries,
00847     cl_device_id * devices,
00848     cl_uint * num_devices),
00849     (platform, device_type, num_entries, devices, num_devices))
00850 
00851 OCL_FUNC_P(cl_context, clCreateContext,
00852     (const cl_context_properties * properties,
00853     cl_uint num_devices,
00854     const cl_device_id * devices,
00855     void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
00856     void * user_data,
00857     cl_int * errcode_ret),
00858     (properties, num_devices, devices, pfn_notify, user_data, errcode_ret))
00859 
00860 OCL_FUNC(cl_int, clReleaseContext, (cl_context context), (context))
00861 
00862 
00863 OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context))
00864 /*
00865 OCL_FUNC_P(cl_context, clCreateContextFromType,
00866     (const cl_context_properties * properties,
00867     cl_device_type device_type,
00868     void (CL_CALLBACK * pfn_notify)(const char *, const void *, size_t, void *),
00869     void * user_data,
00870     cl_int * errcode_ret),
00871     (properties, device_type, pfn_notify, user_data, errcode_ret))
00872 
00873 OCL_FUNC(cl_int, clGetContextInfo,
00874     (cl_context context,
00875     cl_context_info param_name,
00876     size_t param_value_size,
00877     void * param_value,
00878     size_t * param_value_size_ret),
00879     (context, param_name, param_value_size,
00880     param_value, param_value_size_ret))
00881 */
00882 OCL_FUNC_P(cl_command_queue, clCreateCommandQueue,
00883     (cl_context context,
00884     cl_device_id device,
00885     cl_command_queue_properties properties,
00886     cl_int * errcode_ret),
00887     (context, device, properties, errcode_ret))
00888 
00889 OCL_FUNC(cl_int, clReleaseCommandQueue, (cl_command_queue command_queue), (command_queue))
00890 
00891 OCL_FUNC_P(cl_mem, clCreateBuffer,
00892     (cl_context context,
00893     cl_mem_flags flags,
00894     size_t size,
00895     void * host_ptr,
00896     cl_int * errcode_ret),
00897     (context, flags, size, host_ptr, errcode_ret))
00898 
00899 /*
00900 OCL_FUNC(cl_int, clRetainCommandQueue, (cl_command_queue command_queue), (command_queue))
00901 
00902 OCL_FUNC(cl_int, clGetCommandQueueInfo,
00903  (cl_command_queue command_queue,
00904  cl_command_queue_info param_name,
00905  size_t param_value_size,
00906  void * param_value,
00907  size_t * param_value_size_ret),
00908  (command_queue, param_name, param_value_size, param_value, param_value_size_ret))
00909 
00910 OCL_FUNC_P(cl_mem, clCreateSubBuffer,
00911     (cl_mem buffer,
00912     cl_mem_flags flags,
00913     cl_buffer_create_type buffer_create_type,
00914     const void * buffer_create_info,
00915     cl_int * errcode_ret),
00916     (buffer, flags, buffer_create_type, buffer_create_info, errcode_ret))
00917 */
00918 
00919 OCL_FUNC_P(cl_mem, clCreateImage,
00920     (cl_context context,
00921     cl_mem_flags flags,
00922     const cl_image_format * image_format,
00923     const cl_image_desc * image_desc,
00924     void * host_ptr,
00925     cl_int * errcode_ret),
00926     (context, flags, image_format, image_desc, host_ptr, errcode_ret))
00927 
00928 OCL_FUNC_P(cl_mem, clCreateImage2D,
00929     (cl_context context,
00930     cl_mem_flags flags,
00931     const cl_image_format * image_format,
00932     size_t image_width,
00933     size_t image_height,
00934     size_t image_row_pitch,
00935     void * host_ptr,
00936     cl_int *errcode_ret),
00937     (context, flags, image_format, image_width, image_height, image_row_pitch, host_ptr, errcode_ret))
00938 
00939 OCL_FUNC(cl_int, clGetSupportedImageFormats,
00940  (cl_context context,
00941  cl_mem_flags flags,
00942  cl_mem_object_type image_type,
00943  cl_uint num_entries,
00944  cl_image_format * image_formats,
00945  cl_uint * num_image_formats),
00946  (context, flags, image_type, num_entries, image_formats, num_image_formats))
00947 
00948 
00949 OCL_FUNC(cl_int, clGetMemObjectInfo,
00950  (cl_mem memobj,
00951  cl_mem_info param_name,
00952  size_t param_value_size,
00953  void * param_value,
00954  size_t * param_value_size_ret),
00955  (memobj, param_name, param_value_size, param_value, param_value_size_ret))
00956 
00957 OCL_FUNC(cl_int, clGetImageInfo,
00958  (cl_mem image,
00959  cl_image_info param_name,
00960  size_t param_value_size,
00961  void * param_value,
00962  size_t * param_value_size_ret),
00963  (image, param_name, param_value_size, param_value, param_value_size_ret))
00964 
00965 /*
00966 OCL_FUNC(cl_int, clCreateKernelsInProgram,
00967  (cl_program program,
00968  cl_uint num_kernels,
00969  cl_kernel * kernels,
00970  cl_uint * num_kernels_ret),
00971  (program, num_kernels, kernels, num_kernels_ret))
00972 
00973 OCL_FUNC(cl_int, clRetainKernel, (cl_kernel kernel), (kernel))
00974 
00975 OCL_FUNC(cl_int, clGetKernelArgInfo,
00976  (cl_kernel kernel,
00977  cl_uint arg_indx,
00978  cl_kernel_arg_info param_name,
00979  size_t param_value_size,
00980  void * param_value,
00981  size_t * param_value_size_ret),
00982  (kernel, arg_indx, param_name, param_value_size, param_value, param_value_size_ret))
00983 
00984 OCL_FUNC(cl_int, clEnqueueReadImage,
00985  (cl_command_queue command_queue,
00986  cl_mem image,
00987  cl_bool blocking_read,
00988  const size_t * origin[3],
00989  const size_t * region[3],
00990  size_t row_pitch,
00991  size_t slice_pitch,
00992  void * ptr,
00993  cl_uint num_events_in_wait_list,
00994  const cl_event * event_wait_list,
00995  cl_event * event),
00996  (command_queue, image, blocking_read, origin, region,
00997  row_pitch, slice_pitch,
00998  ptr,
00999  num_events_in_wait_list,
01000  event_wait_list,
01001  event))
01002 
01003 OCL_FUNC(cl_int, clEnqueueWriteImage,
01004  (cl_command_queue command_queue,
01005  cl_mem image,
01006  cl_bool blocking_write,
01007  const size_t * origin[3],
01008  const size_t * region[3],
01009  size_t input_row_pitch,
01010  size_t input_slice_pitch,
01011  const void * ptr,
01012  cl_uint num_events_in_wait_list,
01013  const cl_event * event_wait_list,
01014  cl_event * event),
01015  (command_queue, image, blocking_write, origin, region, input_row_pitch,
01016  input_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
01017 
01018 OCL_FUNC(cl_int, clEnqueueFillImage,
01019  (cl_command_queue command_queue,
01020  cl_mem image,
01021  const void * fill_color,
01022  const size_t * origin[3],
01023  const size_t * region[3],
01024  cl_uint num_events_in_wait_list,
01025  const cl_event * event_wait_list,
01026  cl_event * event),
01027  (command_queue, image, fill_color, origin, region,
01028  num_events_in_wait_list, event_wait_list, event))
01029 
01030 OCL_FUNC(cl_int, clEnqueueCopyImage,
01031  (cl_command_queue command_queue,
01032  cl_mem src_image,
01033  cl_mem dst_image,
01034  const size_t * src_origin[3],
01035  const size_t * dst_origin[3],
01036  const size_t * region[3],
01037  cl_uint num_events_in_wait_list,
01038  const cl_event * event_wait_list,
01039  cl_event * event),
01040  (command_queue, src_image, dst_image, src_origin, dst_origin,
01041  region, num_events_in_wait_list, event_wait_list, event))
01042 */
01043 
01044 OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer,
01045  (cl_command_queue command_queue,
01046  cl_mem src_image,
01047  cl_mem dst_buffer,
01048  const size_t * src_origin,
01049  const size_t * region,
01050  size_t dst_offset,
01051  cl_uint num_events_in_wait_list,
01052  const cl_event * event_wait_list,
01053  cl_event * event),
01054  (command_queue, src_image, dst_buffer, src_origin, region, dst_offset,
01055  num_events_in_wait_list, event_wait_list, event))
01056 
01057 OCL_FUNC(cl_int, clEnqueueCopyBufferToImage,
01058  (cl_command_queue command_queue,
01059  cl_mem src_buffer,
01060  cl_mem dst_image,
01061  size_t src_offset,
01062  const size_t dst_origin[3],
01063  const size_t region[3],
01064  cl_uint num_events_in_wait_list,
01065  const cl_event * event_wait_list,
01066  cl_event * event),
01067  (command_queue, src_buffer, dst_image, src_offset, dst_origin,
01068  region, num_events_in_wait_list, event_wait_list, event))
01069 
01070  OCL_FUNC(cl_int, clFlush,
01071  (cl_command_queue command_queue),
01072  (command_queue))
01073 
01074 /*
01075 OCL_FUNC_P(void*, clEnqueueMapImage,
01076  (cl_command_queue command_queue,
01077  cl_mem image,
01078  cl_bool blocking_map,
01079  cl_map_flags map_flags,
01080  const size_t * origin[3],
01081  const size_t * region[3],
01082  size_t * image_row_pitch,
01083  size_t * image_slice_pitch,
01084  cl_uint num_events_in_wait_list,
01085  const cl_event * event_wait_list,
01086  cl_event * event,
01087  cl_int * errcode_ret),
01088  (command_queue, image, blocking_map, map_flags, origin, region,
01089  image_row_pitch, image_slice_pitch, num_events_in_wait_list,
01090  event_wait_list, event, errcode_ret))
01091 */
01092 
01093 /*
01094 OCL_FUNC(cl_int, clRetainProgram, (cl_program program), (program))
01095 
01096 OCL_FUNC(cl_int, clGetKernelInfo,
01097  (cl_kernel kernel,
01098  cl_kernel_info param_name,
01099  size_t param_value_size,
01100  void * param_value,
01101  size_t * param_value_size_ret),
01102  (kernel, param_name, param_value_size, param_value, param_value_size_ret))
01103 
01104 */
01105 
01106 OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj))
01107 
01108 OCL_FUNC(cl_int, clReleaseMemObject, (cl_mem memobj), (memobj))
01109 
01110 
01111 OCL_FUNC_P(cl_program, clCreateProgramWithSource,
01112     (cl_context context,
01113     cl_uint count,
01114     const char ** strings,
01115     const size_t * lengths,
01116     cl_int * errcode_ret),
01117     (context, count, strings, lengths, errcode_ret))
01118 
01119 OCL_FUNC_P(cl_program, clCreateProgramWithBinary,
01120     (cl_context context,
01121     cl_uint num_devices,
01122     const cl_device_id * device_list,
01123     const size_t * lengths,
01124     const unsigned char ** binaries,
01125     cl_int * binary_status,
01126     cl_int * errcode_ret),
01127     (context, num_devices, device_list, lengths, binaries, binary_status, errcode_ret))
01128 
01129 OCL_FUNC(cl_int, clReleaseProgram, (cl_program program), (program))
01130 
01131 OCL_FUNC(cl_int, clBuildProgram,
01132     (cl_program program,
01133     cl_uint num_devices,
01134     const cl_device_id * device_list,
01135     const char * options,
01136     void (CL_CALLBACK * pfn_notify)(cl_program, void *),
01137     void * user_data),
01138     (program, num_devices, device_list, options, pfn_notify, user_data))
01139 
01140 OCL_FUNC(cl_int, clGetProgramInfo,
01141     (cl_program program,
01142     cl_program_info param_name,
01143     size_t param_value_size,
01144     void * param_value,
01145     size_t * param_value_size_ret),
01146     (program, param_name, param_value_size, param_value, param_value_size_ret))
01147 
01148 OCL_FUNC(cl_int, clGetProgramBuildInfo,
01149     (cl_program program,
01150     cl_device_id device,
01151     cl_program_build_info param_name,
01152     size_t param_value_size,
01153     void * param_value,
01154     size_t * param_value_size_ret),
01155     (program, device, param_name, param_value_size, param_value, param_value_size_ret))
01156 
01157 OCL_FUNC_P(cl_kernel, clCreateKernel,
01158     (cl_program program,
01159     const char * kernel_name,
01160     cl_int * errcode_ret),
01161     (program, kernel_name, errcode_ret))
01162 
01163 OCL_FUNC(cl_int, clReleaseKernel, (cl_kernel kernel), (kernel))
01164 
01165 OCL_FUNC(cl_int, clSetKernelArg,
01166     (cl_kernel kernel,
01167     cl_uint arg_index,
01168     size_t arg_size,
01169     const void * arg_value),
01170     (kernel, arg_index, arg_size, arg_value))
01171 
01172 OCL_FUNC(cl_int, clGetKernelWorkGroupInfo,
01173     (cl_kernel kernel,
01174     cl_device_id device,
01175     cl_kernel_work_group_info param_name,
01176     size_t param_value_size,
01177     void * param_value,
01178     size_t * param_value_size_ret),
01179     (kernel, device, param_name, param_value_size, param_value, param_value_size_ret))
01180 
01181 OCL_FUNC(cl_int, clFinish, (cl_command_queue command_queue), (command_queue))
01182 
01183 OCL_FUNC(cl_int, clEnqueueReadBuffer,
01184     (cl_command_queue command_queue,
01185     cl_mem buffer,
01186     cl_bool blocking_read,
01187     size_t offset,
01188     size_t size,
01189     void * ptr,
01190     cl_uint num_events_in_wait_list,
01191     const cl_event * event_wait_list,
01192     cl_event * event),
01193     (command_queue, buffer, blocking_read, offset, size, ptr,
01194     num_events_in_wait_list, event_wait_list, event))
01195 
01196 OCL_FUNC(cl_int, clEnqueueReadBufferRect,
01197     (cl_command_queue command_queue,
01198     cl_mem buffer,
01199     cl_bool blocking_read,
01200     const size_t * buffer_offset,
01201     const size_t * host_offset,
01202     const size_t * region,
01203     size_t buffer_row_pitch,
01204     size_t buffer_slice_pitch,
01205     size_t host_row_pitch,
01206     size_t host_slice_pitch,
01207     void * ptr,
01208     cl_uint num_events_in_wait_list,
01209     const cl_event * event_wait_list,
01210     cl_event * event),
01211     (command_queue, buffer, blocking_read, buffer_offset, host_offset, region, buffer_row_pitch,
01212     buffer_slice_pitch, host_row_pitch, host_slice_pitch, ptr, num_events_in_wait_list,
01213     event_wait_list, event))
01214 
01215 OCL_FUNC(cl_int, clEnqueueWriteBuffer,
01216     (cl_command_queue command_queue,
01217     cl_mem buffer,
01218     cl_bool blocking_write,
01219     size_t offset,
01220     size_t size,
01221     const void * ptr,
01222     cl_uint num_events_in_wait_list,
01223     const cl_event * event_wait_list,
01224     cl_event * event),
01225     (command_queue, buffer, blocking_write, offset, size, ptr,
01226     num_events_in_wait_list, event_wait_list, event))
01227 
01228 OCL_FUNC(cl_int, clEnqueueWriteBufferRect,
01229     (cl_command_queue command_queue,
01230     cl_mem buffer,
01231     cl_bool blocking_write,
01232     const size_t * buffer_offset,
01233     const size_t * host_offset,
01234     const size_t * region,
01235     size_t buffer_row_pitch,
01236     size_t buffer_slice_pitch,
01237     size_t host_row_pitch,
01238     size_t host_slice_pitch,
01239     const void * ptr,
01240     cl_uint num_events_in_wait_list,
01241     const cl_event * event_wait_list,
01242     cl_event * event),
01243     (command_queue, buffer, blocking_write, buffer_offset, host_offset,
01244     region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch,
01245     host_slice_pitch, ptr, num_events_in_wait_list, event_wait_list, event))
01246 
01247 /*OCL_FUNC(cl_int, clEnqueueFillBuffer,
01248     (cl_command_queue command_queue,
01249     cl_mem buffer,
01250     const void * pattern,
01251     size_t pattern_size,
01252     size_t offset,
01253     size_t size,
01254     cl_uint num_events_in_wait_list,
01255     const cl_event * event_wait_list,
01256     cl_event * event),
01257     (command_queue, buffer, pattern, pattern_size, offset, size,
01258     num_events_in_wait_list, event_wait_list, event))*/
01259 
01260 OCL_FUNC(cl_int, clEnqueueCopyBuffer,
01261     (cl_command_queue command_queue,
01262     cl_mem src_buffer,
01263     cl_mem dst_buffer,
01264     size_t src_offset,
01265     size_t dst_offset,
01266     size_t size,
01267     cl_uint num_events_in_wait_list,
01268     const cl_event * event_wait_list,
01269     cl_event * event),
01270     (command_queue, src_buffer, dst_buffer, src_offset, dst_offset,
01271     size, num_events_in_wait_list, event_wait_list, event))
01272 
01273 OCL_FUNC(cl_int, clEnqueueCopyBufferRect,
01274     (cl_command_queue command_queue,
01275     cl_mem src_buffer,
01276     cl_mem dst_buffer,
01277     const size_t * src_origin,
01278     const size_t * dst_origin,
01279     const size_t * region,
01280     size_t src_row_pitch,
01281     size_t src_slice_pitch,
01282     size_t dst_row_pitch,
01283     size_t dst_slice_pitch,
01284     cl_uint num_events_in_wait_list,
01285     const cl_event * event_wait_list,
01286     cl_event * event),
01287     (command_queue, src_buffer, dst_buffer, src_origin, dst_origin,
01288     region, src_row_pitch, src_slice_pitch, dst_row_pitch, dst_slice_pitch,
01289     num_events_in_wait_list, event_wait_list, event))
01290 
01291 OCL_FUNC_P(void*, clEnqueueMapBuffer,
01292     (cl_command_queue command_queue,
01293     cl_mem buffer,
01294     cl_bool blocking_map,
01295     cl_map_flags map_flags,
01296     size_t offset,
01297     size_t size,
01298     cl_uint num_events_in_wait_list,
01299     const cl_event * event_wait_list,
01300     cl_event * event,
01301     cl_int * errcode_ret),
01302     (command_queue, buffer, blocking_map, map_flags, offset, size,
01303     num_events_in_wait_list, event_wait_list, event, errcode_ret))
01304 
01305 OCL_FUNC(cl_int, clEnqueueUnmapMemObject,
01306     (cl_command_queue command_queue,
01307     cl_mem memobj,
01308     void * mapped_ptr,
01309     cl_uint num_events_in_wait_list,
01310     const cl_event * event_wait_list,
01311     cl_event * event),
01312     (command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event))
01313 
01314 OCL_FUNC(cl_int, clEnqueueNDRangeKernel,
01315     (cl_command_queue command_queue,
01316     cl_kernel kernel,
01317     cl_uint work_dim,
01318     const size_t * global_work_offset,
01319     const size_t * global_work_size,
01320     const size_t * local_work_size,
01321     cl_uint num_events_in_wait_list,
01322     const cl_event * event_wait_list,
01323     cl_event * event),
01324     (command_queue, kernel, work_dim, global_work_offset, global_work_size,
01325     local_work_size, num_events_in_wait_list, event_wait_list, event))
01326 
01327 OCL_FUNC(cl_int, clEnqueueTask,
01328     (cl_command_queue command_queue,
01329     cl_kernel kernel,
01330     cl_uint num_events_in_wait_list,
01331     const cl_event * event_wait_list,
01332     cl_event * event),
01333     (command_queue, kernel, num_events_in_wait_list, event_wait_list, event))
01334 
01335 OCL_FUNC(cl_int, clSetEventCallback,
01336     (cl_event event,
01337     cl_int command_exec_callback_type ,
01338     void (CL_CALLBACK  *pfn_event_notify) (cl_event event, cl_int event_command_exec_status, void *user_data),
01339     void *user_data),
01340     (event, command_exec_callback_type, pfn_event_notify, user_data))
01341 
01342 OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
01343 
01344 }
01345 
01346 #endif
01347 
01348 #ifndef CL_VERSION_1_2
01349 #define CL_VERSION_1_2
01350 #endif
01351 
01352 #endif // HAVE_OPENCL
01353 
01354 #ifdef _DEBUG
01355 #define CV_OclDbgAssert CV_DbgAssert
01356 #else
01357 static bool isRaiseError()
01358 {
01359     static bool initialized = false;
01360     static bool value = false;
01361     if (!initialized)
01362     {
01363         value = getBoolParameter("OPENCV_OPENCL_RAISE_ERROR", false);
01364         initialized = true;
01365     }
01366     return value;
01367 }
01368 #define CV_OclDbgAssert(expr) do { if (isRaiseError()) { CV_Assert(expr); } else { (void)(expr); } } while ((void)0, 0)
01369 #endif
01370 
01371 #ifdef HAVE_OPENCL_SVM
01372 #include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
01373 #include "opencv2/core/opencl/runtime/opencl_svm_hsa_extension.hpp"
01374 #include "opencv2/core/opencl/opencl_svm.hpp"
01375 #endif
01376 
01377 namespace cv { namespace ocl {
01378 
01379 struct UMat2D
01380 {
01381     UMat2D(const UMat& m)
01382     {
01383         offset = (int)m.offset;
01384         step = (int)m.step;
01385         rows = m.rows;
01386         cols = m.cols;
01387     }
01388     int offset;
01389     int step;
01390     int rows;
01391     int cols;
01392 };
01393 
01394 struct UMat3D
01395 {
01396     UMat3D(const UMat& m)
01397     {
01398         offset = (int)m.offset;
01399         step = (int)m.step.p[1];
01400         slicestep = (int)m.step.p[0];
01401         slices = (int)m.size.p[0];
01402         rows = m.size.p[1];
01403         cols = m.size.p[2];
01404     }
01405     int offset;
01406     int slicestep;
01407     int step;
01408     int slices;
01409     int rows;
01410     int cols;
01411 };
01412 
01413 // Computes 64-bit "cyclic redundancy check" sum, as specified in ECMA-182
01414 static uint64 crc64( const uchar* data, size_t size, uint64 crc0=0 )
01415 {
01416     static uint64 table[256];
01417     static bool initialized = false;
01418 
01419     if( !initialized )
01420     {
01421         for( int i = 0; i < 256; i++ )
01422         {
01423             uint64 c = i;
01424             for( int j = 0; j < 8; j++ )
01425                 c = ((c & 1) ? CV_BIG_UINT(0xc96c5795d7870f42) : 0) ^ (c >> 1);
01426             table[i] = c;
01427         }
01428         initialized = true;
01429     }
01430 
01431     uint64 crc = ~crc0;
01432     for( size_t idx = 0; idx < size; idx++ )
01433         crc = table[(uchar)crc ^ data[idx]] ^ (crc >> 8);
01434 
01435     return ~crc;
01436 }
01437 
01438 struct HashKey
01439 {
01440     typedef uint64 part;
01441     HashKey(part _a, part _b) : a(_a), b(_b) {}
01442     part a, b;
01443 };
01444 
01445 inline bool operator == (const HashKey& h1, const HashKey& h2)
01446 {
01447     return h1.a == h2.a && h1.b == h2.b;
01448 }
01449 
01450 inline bool operator < (const HashKey& h1, const HashKey& h2)
01451 {
01452     return h1.a < h2.a || (h1.a == h2.a && h1.b < h2.b);
01453 }
01454 
01455 
01456 bool haveOpenCL()
01457 {
01458 #ifdef HAVE_OPENCL
01459     static bool g_isOpenCLInitialized = false;
01460     static bool g_isOpenCLAvailable = false;
01461 
01462     if (!g_isOpenCLInitialized)
01463     {
01464         try
01465         {
01466             cl_uint n = 0;
01467             g_isOpenCLAvailable = ::clGetPlatformIDs(0, NULL, &n) == CL_SUCCESS;
01468         }
01469         catch (...)
01470         {
01471             g_isOpenCLAvailable = false;
01472         }
01473         g_isOpenCLInitialized = true;
01474     }
01475     return g_isOpenCLAvailable;
01476 #else
01477     return false;
01478 #endif
01479 }
01480 
01481 bool useOpenCL()
01482 {
01483     CoreTLSData* data = getCoreTlsData().get();
01484     if( data->useOpenCL < 0 )
01485     {
01486         //try
01487 //        {
01488             data->useOpenCL = (int)haveOpenCL() && Device::getDefault().ptr() && Device::getDefault().available();
01489         //}
01490 //        catch (...)
01491 //        {
01492 //            data->useOpenCL = 0;
01493 //        }
01494     }
01495     return data->useOpenCL > 0;
01496 }
01497 
01498 void setUseOpenCL(bool flag)
01499 {
01500     if( haveOpenCL() )
01501     {
01502         CoreTLSData* data = getCoreTlsData().get();
01503         data->useOpenCL = (flag && Device::getDefault().ptr() != NULL) ? 1 : 0;
01504     }
01505 }
01506 
01507 #ifdef HAVE_CLAMDBLAS
01508 
01509 class AmdBlasHelper
01510 {
01511 public:
01512     static AmdBlasHelper & getInstance()
01513     {
01514         CV_SINGLETON_LAZY_INIT_REF(AmdBlasHelper, new AmdBlasHelper())
01515     }
01516 
01517     bool isAvailable() const
01518     {
01519         return g_isAmdBlasAvailable;
01520     }
01521 
01522     ~AmdBlasHelper()
01523     {
01524         try
01525         {
01526             clAmdBlasTeardown();
01527         }
01528         catch (...) { }
01529     }
01530 
01531 protected:
01532     AmdBlasHelper()
01533     {
01534         if (!g_isAmdBlasInitialized)
01535         {
01536             AutoLock lock(getInitializationMutex());
01537 
01538             if (!g_isAmdBlasInitialized)
01539             {
01540                 if (haveOpenCL())
01541                 {
01542                     try
01543                     {
01544                         g_isAmdBlasAvailable = clAmdBlasSetup() == clAmdBlasSuccess;
01545                     }
01546                     catch (...)
01547                     {
01548                         g_isAmdBlasAvailable = false;
01549                     }
01550                 }
01551                 else
01552                     g_isAmdBlasAvailable = false;
01553 
01554                 g_isAmdBlasInitialized = true;
01555             }
01556         }
01557     }
01558 
01559 private:
01560     static bool g_isAmdBlasInitialized;
01561     static bool g_isAmdBlasAvailable;
01562 };
01563 
01564 bool AmdBlasHelper::g_isAmdBlasAvailable = false;
01565 bool AmdBlasHelper::g_isAmdBlasInitialized = false;
01566 
01567 bool haveAmdBlas()
01568 {
01569     return AmdBlasHelper::getInstance().isAvailable();
01570 }
01571 
01572 #else
01573 
01574 bool haveAmdBlas()
01575 {
01576     return false;
01577 }
01578 
01579 #endif
01580 
01581 #ifdef HAVE_CLAMDFFT
01582 
01583 class AmdFftHelper
01584 {
01585 public:
01586     static AmdFftHelper & getInstance()
01587     {
01588         CV_SINGLETON_LAZY_INIT_REF(AmdFftHelper, new AmdFftHelper())
01589     }
01590 
01591     bool isAvailable() const
01592     {
01593         return g_isAmdFftAvailable;
01594     }
01595 
01596     ~AmdFftHelper()
01597     {
01598         try
01599         {
01600 //            clAmdFftTeardown();
01601         }
01602         catch (...) { }
01603     }
01604 
01605 protected:
01606     AmdFftHelper()
01607     {
01608         if (!g_isAmdFftInitialized)
01609         {
01610             AutoLock lock(getInitializationMutex());
01611 
01612             if (!g_isAmdFftInitialized)
01613             {
01614                 if (haveOpenCL())
01615                 {
01616                     try
01617                     {
01618                         cl_uint major, minor, patch;
01619                         CV_Assert(clAmdFftInitSetupData(&setupData) == CLFFT_SUCCESS);
01620 
01621                         // it throws exception in case AmdFft binaries are not found
01622                         CV_Assert(clAmdFftGetVersion(&major, &minor, &patch) == CLFFT_SUCCESS);
01623                         g_isAmdFftAvailable = true;
01624                     }
01625                     catch (const Exception &)
01626                     {
01627                         g_isAmdFftAvailable = false;
01628                     }
01629                 }
01630                 else
01631                     g_isAmdFftAvailable = false;
01632 
01633                 g_isAmdFftInitialized = true;
01634             }
01635         }
01636     }
01637 
01638 private:
01639     static clAmdFftSetupData setupData;
01640     static bool g_isAmdFftInitialized;
01641     static bool g_isAmdFftAvailable;
01642 };
01643 
01644 clAmdFftSetupData AmdFftHelper::setupData;
01645 bool AmdFftHelper::g_isAmdFftAvailable = false;
01646 bool AmdFftHelper::g_isAmdFftInitialized = false;
01647 
01648 bool haveAmdFft()
01649 {
01650     return AmdFftHelper::getInstance().isAvailable();
01651 }
01652 
01653 #else
01654 
01655 bool haveAmdFft()
01656 {
01657     return false;
01658 }
01659 
01660 #endif
01661 
01662 bool haveSVM()
01663 {
01664 #ifdef HAVE_OPENCL_SVM
01665     return true;
01666 #else
01667     return false;
01668 #endif
01669 }
01670 
01671 void finish()
01672 {
01673     Queue::getDefault().finish();
01674 }
01675 
01676 #define IMPLEMENT_REFCOUNTABLE() \
01677     void addref() { CV_XADD(&refcount, 1); } \
01678     void release() { if( CV_XADD(&refcount, -1) == 1 && !cv::__termination) delete this; } \
01679     int refcount
01680 
01681 /////////////////////////////////////////// Platform /////////////////////////////////////////////
01682 
01683 struct Platform::Impl
01684 {
01685     Impl()
01686     {
01687         refcount = 1;
01688         handle = 0;
01689         initialized = false;
01690     }
01691 
01692     ~Impl() {}
01693 
01694     void init()
01695     {
01696         if( !initialized )
01697         {
01698             //cl_uint num_entries
01699             cl_uint n = 0;
01700             if( clGetPlatformIDs(1, &handle, &n) != CL_SUCCESS || n == 0 )
01701                 handle = 0;
01702             if( handle != 0 )
01703             {
01704                 char buf[1000];
01705                 size_t len = 0;
01706                 CV_OclDbgAssert(clGetPlatformInfo(handle, CL_PLATFORM_VENDOR, sizeof(buf), buf, &len) == CL_SUCCESS);
01707                 buf[len] = '\0';
01708                 vendor = String(buf);
01709             }
01710 
01711             initialized = true;
01712         }
01713     }
01714 
01715     IMPLEMENT_REFCOUNTABLE();
01716 
01717     cl_platform_id handle;
01718     String vendor;
01719     bool initialized;
01720 };
01721 
01722 Platform::Platform()
01723 {
01724     p = 0;
01725 }
01726 
01727 Platform::~Platform()
01728 {
01729     if(p)
01730         p->release();
01731 }
01732 
01733 Platform::Platform(const Platform& pl)
01734 {
01735     p = (Impl*)pl.p;
01736     if(p)
01737         p->addref();
01738 }
01739 
01740 Platform& Platform::operator = (const Platform& pl)
01741 {
01742     Impl* newp = (Impl*)pl.p;
01743     if(newp)
01744         newp->addref();
01745     if(p)
01746         p->release();
01747     p = newp;
01748     return *this;
01749 }
01750 
01751 void* Platform::ptr() const
01752 {
01753     return p ? p->handle : 0;
01754 }
01755 
01756 Platform& Platform::getDefault()
01757 {
01758     static Platform p;
01759     if( !p.p )
01760     {
01761         p.p = new Impl;
01762         p.p->init();
01763     }
01764     return p;
01765 }
01766 
01767 /////////////////////////////////////// Device ////////////////////////////////////////////
01768 
01769 // deviceVersion has format
01770 //   OpenCL<space><major_version.minor_version><space><vendor-specific information>
01771 // by specification
01772 //   http://www.khronos.org/registry/cl/sdk/1.1/docs/man/xhtml/clGetDeviceInfo.html
01773 //   http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clGetDeviceInfo.html
01774 static void parseDeviceVersion(const String &deviceVersion, int &major, int &minor)
01775 {
01776     major = minor = 0;
01777     if (10 >= deviceVersion.length())
01778         return;
01779     const char *pstr = deviceVersion.c_str();
01780     if (0 != strncmp(pstr, "OpenCL ", 7))
01781         return;
01782     size_t ppos = deviceVersion.find('.', 7);
01783     if (String::npos == ppos)
01784         return;
01785     String temp = deviceVersion.substr(7, ppos - 7);
01786     major = atoi(temp.c_str());
01787     temp = deviceVersion.substr(ppos + 1);
01788     minor = atoi(temp.c_str());
01789 }
01790 
01791 struct Device::Impl
01792 {
01793     Impl(void* d)
01794     {
01795         handle = (cl_device_id)d;
01796         refcount = 1;
01797 
01798         name_ = getStrProp(CL_DEVICE_NAME);
01799         version_ = getStrProp(CL_DEVICE_VERSION);
01800         doubleFPConfig_ = getProp<cl_device_fp_config, int>(CL_DEVICE_DOUBLE_FP_CONFIG);
01801         hostUnifiedMemory_ = getBoolProp(CL_DEVICE_HOST_UNIFIED_MEMORY);
01802         maxComputeUnits_ = getProp<cl_uint, int>(CL_DEVICE_MAX_COMPUTE_UNITS);
01803         maxWorkGroupSize_ = getProp<size_t, size_t>(CL_DEVICE_MAX_WORK_GROUP_SIZE);
01804         type_ = getProp<cl_device_type, int>(CL_DEVICE_TYPE);
01805         driverVersion_ = getStrProp(CL_DRIVER_VERSION);
01806 
01807         String deviceVersion_ = getStrProp(CL_DEVICE_VERSION);
01808         parseDeviceVersion(deviceVersion_, deviceVersionMajor_, deviceVersionMinor_);
01809 
01810         vendorName_ = getStrProp(CL_DEVICE_VENDOR);
01811         if (vendorName_ == "Advanced Micro Devices, Inc." ||
01812             vendorName_ == "AMD")
01813             vendorID_ = VENDOR_AMD;
01814         else if (vendorName_ == "Intel(R) Corporation" || vendorName_ == "Intel" || strstr(name_.c_str(), "Iris") != 0)
01815             vendorID_ = VENDOR_INTEL;
01816         else if (vendorName_ == "NVIDIA Corporation")
01817             vendorID_ = VENDOR_NVIDIA;
01818         else
01819             vendorID_ = UNKNOWN_VENDOR;
01820     }
01821 
01822     template<typename _TpCL, typename _TpOut>
01823     _TpOut getProp(cl_device_info prop) const
01824     {
01825         _TpCL temp=_TpCL();
01826         size_t sz = 0;
01827 
01828         return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
01829             sz == sizeof(temp) ? _TpOut(temp) : _TpOut();
01830     }
01831 
01832     bool getBoolProp(cl_device_info prop) const
01833     {
01834         cl_bool temp = CL_FALSE;
01835         size_t sz = 0;
01836 
01837         return clGetDeviceInfo(handle, prop, sizeof(temp), &temp, &sz) == CL_SUCCESS &&
01838             sz == sizeof(temp) ? temp != 0 : false;
01839     }
01840 
01841     String getStrProp(cl_device_info prop) const
01842     {
01843         char buf[1024];
01844         size_t sz=0;
01845         return clGetDeviceInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
01846             sz < sizeof(buf) ? String(buf) : String();
01847     }
01848 
01849     IMPLEMENT_REFCOUNTABLE();
01850     cl_device_id handle;
01851 
01852     String name_;
01853     String version_;
01854     int doubleFPConfig_;
01855     bool hostUnifiedMemory_;
01856     int maxComputeUnits_;
01857     size_t maxWorkGroupSize_;
01858     int type_;
01859     int deviceVersionMajor_;
01860     int deviceVersionMinor_;
01861     String driverVersion_;
01862     String vendorName_;
01863     int vendorID_;
01864 };
01865 
01866 
01867 Device::Device()
01868 {
01869     p = 0;
01870 }
01871 
01872 Device::Device(void* d)
01873 {
01874     p = 0;
01875     set(d);
01876 }
01877 
01878 Device::Device(const Device& d)
01879 {
01880     p = d.p;
01881     if(p)
01882         p->addref();
01883 }
01884 
01885 Device& Device::operator = (const Device& d)
01886 {
01887     Impl* newp = (Impl*)d.p;
01888     if(newp)
01889         newp->addref();
01890     if(p)
01891         p->release();
01892     p = newp;
01893     return *this;
01894 }
01895 
01896 Device::~Device()
01897 {
01898     if(p)
01899         p->release();
01900 }
01901 
01902 void Device::set(void* d)
01903 {
01904     if(p)
01905         p->release();
01906     p = new Impl(d);
01907 }
01908 
01909 void* Device::ptr() const
01910 {
01911     return p ? p->handle : 0;
01912 }
01913 
01914 String Device::name() const
01915 { return p ? p->name_ : String(); }
01916 
01917 String Device::extensions() const
01918 { return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
01919 
01920 String Device::version() const
01921 { return p ? p->version_ : String(); }
01922 
01923 String Device::vendorName() const
01924 { return p ? p->vendorName_ : String(); }
01925 
01926 int Device::vendorID() const
01927 { return p ? p->vendorID_ : 0; }
01928 
01929 String Device::OpenCL_C_Version() const
01930 { return p ? p->getStrProp(CL_DEVICE_OPENCL_C_VERSION) : String(); }
01931 
01932 String Device::OpenCLVersion() const
01933 { return p ? p->getStrProp(CL_DEVICE_EXTENSIONS) : String(); }
01934 
01935 int Device::deviceVersionMajor() const
01936 { return p ? p->deviceVersionMajor_ : 0; }
01937 
01938 int Device::deviceVersionMinor() const
01939 { return p ? p->deviceVersionMinor_ : 0; }
01940 
01941 String Device::driverVersion() const
01942 { return p ? p->driverVersion_ : String(); }
01943 
01944 int Device::type() const
01945 { return p ? p->type_ : 0; }
01946 
01947 int Device::addressBits() const
01948 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_ADDRESS_BITS) : 0; }
01949 
01950 bool Device::available() const
01951 { return p ? p->getBoolProp(CL_DEVICE_AVAILABLE) : false; }
01952 
01953 bool Device::compilerAvailable() const
01954 { return p ? p->getBoolProp(CL_DEVICE_COMPILER_AVAILABLE) : false; }
01955 
01956 bool Device::linkerAvailable() const
01957 #ifdef CL_VERSION_1_2
01958 { return p ? p->getBoolProp(CL_DEVICE_LINKER_AVAILABLE) : false; }
01959 #else
01960 { CV_REQUIRE_OPENCL_1_2_ERROR; }
01961 #endif
01962 
01963 int Device::doubleFPConfig() const
01964 { return p ? p->doubleFPConfig_ : 0; }
01965 
01966 int Device::singleFPConfig() const
01967 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_SINGLE_FP_CONFIG) : 0; }
01968 
01969 int Device::halfFPConfig() const
01970 #ifdef CL_VERSION_1_2
01971 { return p ? p->getProp<cl_device_fp_config, int>(CL_DEVICE_HALF_FP_CONFIG) : 0; }
01972 #else
01973 { CV_REQUIRE_OPENCL_1_2_ERROR; }
01974 #endif
01975 
01976 bool Device::endianLittle() const
01977 { return p ? p->getBoolProp(CL_DEVICE_ENDIAN_LITTLE) : false; }
01978 
01979 bool Device::errorCorrectionSupport() const
01980 { return p ? p->getBoolProp(CL_DEVICE_ERROR_CORRECTION_SUPPORT) : false; }
01981 
01982 int Device::executionCapabilities() const
01983 { return p ? p->getProp<cl_device_exec_capabilities, int>(CL_DEVICE_EXECUTION_CAPABILITIES) : 0; }
01984 
01985 size_t Device::globalMemCacheSize() const
01986 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_CACHE_SIZE) : 0; }
01987 
01988 int Device::globalMemCacheType() const
01989 { return p ? p->getProp<cl_device_mem_cache_type, int>(CL_DEVICE_GLOBAL_MEM_CACHE_TYPE) : 0; }
01990 
01991 int Device::globalMemCacheLineSize() const
01992 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE) : 0; }
01993 
01994 size_t Device::globalMemSize() const
01995 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_GLOBAL_MEM_SIZE) : 0; }
01996 
01997 size_t Device::localMemSize() const
01998 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_LOCAL_MEM_SIZE) : 0; }
01999 
02000 int Device::localMemType() const
02001 { return p ? p->getProp<cl_device_local_mem_type, int>(CL_DEVICE_LOCAL_MEM_TYPE) : 0; }
02002 
02003 bool Device::hostUnifiedMemory() const
02004 { return p ? p->hostUnifiedMemory_ : false; }
02005 
02006 bool Device::imageSupport() const
02007 { return p ? p->getBoolProp(CL_DEVICE_IMAGE_SUPPORT) : false; }
02008 
02009 bool Device::imageFromBufferSupport() const
02010 {
02011     bool ret = false;
02012     if (p)
02013     {
02014         size_t pos = p->getStrProp(CL_DEVICE_EXTENSIONS).find("cl_khr_image2d_from_buffer");
02015         if (pos != String::npos)
02016         {
02017             ret = true;
02018         }
02019     }
02020     return ret;
02021 }
02022 
02023 uint Device::imagePitchAlignment() const
02024 {
02025 #ifdef CL_DEVICE_IMAGE_PITCH_ALIGNMENT
02026     return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_PITCH_ALIGNMENT) : 0;
02027 #else
02028     return 0;
02029 #endif
02030 }
02031 
02032 uint Device::imageBaseAddressAlignment() const
02033 {
02034 #ifdef CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT
02035     return p ? p->getProp<cl_uint, uint>(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT) : 0;
02036 #else
02037     return 0;
02038 #endif
02039 }
02040 
02041 size_t Device::image2DMaxWidth() const
02042 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_WIDTH) : 0; }
02043 
02044 size_t Device::image2DMaxHeight() const
02045 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE2D_MAX_HEIGHT) : 0; }
02046 
02047 size_t Device::image3DMaxWidth() const
02048 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_WIDTH) : 0; }
02049 
02050 size_t Device::image3DMaxHeight() const
02051 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_HEIGHT) : 0; }
02052 
02053 size_t Device::image3DMaxDepth() const
02054 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE3D_MAX_DEPTH) : 0; }
02055 
02056 size_t Device::imageMaxBufferSize() const
02057 #ifdef CL_VERSION_1_2
02058 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_BUFFER_SIZE) : 0; }
02059 #else
02060 { CV_REQUIRE_OPENCL_1_2_ERROR; }
02061 #endif
02062 
02063 size_t Device::imageMaxArraySize() const
02064 #ifdef CL_VERSION_1_2
02065 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_IMAGE_MAX_ARRAY_SIZE) : 0; }
02066 #else
02067 { CV_REQUIRE_OPENCL_1_2_ERROR; }
02068 #endif
02069 
02070 int Device::maxClockFrequency() const
02071 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CLOCK_FREQUENCY) : 0; }
02072 
02073 int Device::maxComputeUnits() const
02074 { return p ? p->maxComputeUnits_ : 0; }
02075 
02076 int Device::maxConstantArgs() const
02077 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_CONSTANT_ARGS) : 0; }
02078 
02079 size_t Device::maxConstantBufferSize() const
02080 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) : 0; }
02081 
02082 size_t Device::maxMemAllocSize() const
02083 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_MEM_ALLOC_SIZE) : 0; }
02084 
02085 size_t Device::maxParameterSize() const
02086 { return p ? p->getProp<cl_ulong, size_t>(CL_DEVICE_MAX_PARAMETER_SIZE) : 0; }
02087 
02088 int Device::maxReadImageArgs() const
02089 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_READ_IMAGE_ARGS) : 0; }
02090 
02091 int Device::maxWriteImageArgs() const
02092 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WRITE_IMAGE_ARGS) : 0; }
02093 
02094 int Device::maxSamplers() const
02095 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_SAMPLERS) : 0; }
02096 
02097 size_t Device::maxWorkGroupSize() const
02098 { return p ? p->maxWorkGroupSize_ : 0; }
02099 
02100 int Device::maxWorkItemDims() const
02101 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS) : 0; }
02102 
02103 void Device::maxWorkItemSizes(size_t* sizes) const
02104 {
02105     if(p)
02106     {
02107         const int MAX_DIMS = 32;
02108         size_t retsz = 0;
02109         CV_OclDbgAssert(clGetDeviceInfo(p->handle, CL_DEVICE_MAX_WORK_ITEM_SIZES,
02110                 MAX_DIMS*sizeof(sizes[0]), &sizes[0], &retsz) == CL_SUCCESS);
02111     }
02112 }
02113 
02114 int Device::memBaseAddrAlign() const
02115 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_MEM_BASE_ADDR_ALIGN) : 0; }
02116 
02117 int Device::nativeVectorWidthChar() const
02118 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR) : 0; }
02119 
02120 int Device::nativeVectorWidthShort() const
02121 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT) : 0; }
02122 
02123 int Device::nativeVectorWidthInt() const
02124 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_INT) : 0; }
02125 
02126 int Device::nativeVectorWidthLong() const
02127 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG) : 0; }
02128 
02129 int Device::nativeVectorWidthFloat() const
02130 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT) : 0; }
02131 
02132 int Device::nativeVectorWidthDouble() const
02133 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE) : 0; }
02134 
02135 int Device::nativeVectorWidthHalf() const
02136 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF) : 0; }
02137 
02138 int Device::preferredVectorWidthChar() const
02139 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR) : 0; }
02140 
02141 int Device::preferredVectorWidthShort() const
02142 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT) : 0; }
02143 
02144 int Device::preferredVectorWidthInt() const
02145 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT) : 0; }
02146 
02147 int Device::preferredVectorWidthLong() const
02148 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG) : 0; }
02149 
02150 int Device::preferredVectorWidthFloat() const
02151 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT) : 0; }
02152 
02153 int Device::preferredVectorWidthDouble() const
02154 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE) : 0; }
02155 
02156 int Device::preferredVectorWidthHalf() const
02157 { return p ? p->getProp<cl_uint, int>(CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF) : 0; }
02158 
02159 size_t Device::printfBufferSize() const
02160 #ifdef CL_VERSION_1_2
02161 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PRINTF_BUFFER_SIZE) : 0; }
02162 #else
02163 { CV_REQUIRE_OPENCL_1_2_ERROR; }
02164 #endif
02165 
02166 
02167 size_t Device::profilingTimerResolution() const
02168 { return p ? p->getProp<size_t, size_t>(CL_DEVICE_PROFILING_TIMER_RESOLUTION) : 0; }
02169 
02170 const Device& Device::getDefault()
02171 {
02172     const Context& ctx = Context::getDefault();
02173     int idx = getCoreTlsData().get()->device;
02174     const Device& device = ctx.device(idx);
02175     return device;
02176 }
02177 
02178 ////////////////////////////////////// Context ///////////////////////////////////////////////////
02179 
02180 template <typename Functor, typename ObjectType>
02181 inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
02182 {
02183     ::size_t required;
02184     cl_int err = f(obj, name, 0, NULL, &required);
02185     if (err != CL_SUCCESS)
02186         return err;
02187 
02188     param.clear();
02189     if (required > 0)
02190     {
02191         AutoBuffer<char> buf(required + 1);
02192         char* ptr = (char*)buf; // cleanup is not needed
02193         err = f(obj, name, required, ptr, NULL);
02194         if (err != CL_SUCCESS)
02195             return err;
02196         param = ptr;
02197     }
02198 
02199     return CL_SUCCESS;
02200 }
02201 
02202 static void split(const std::string &s, char delim, std::vector<std::string> &elems)
02203 {
02204     elems.clear();
02205     if (s.size() == 0)
02206         return;
02207     std::istringstream ss(s);
02208     std::string item;
02209     while (!ss.eof())
02210     {
02211         std::getline(ss, item, delim);
02212         elems.push_back(item);
02213     }
02214 }
02215 
02216 // Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
02217 // Sample: AMD:GPU:
02218 // Sample: AMD:GPU:Tahiti
02219 // Sample: :GPU|CPU: = '' = ':' = '::'
02220 static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
02221         std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
02222 {
02223     std::vector<std::string> parts;
02224     split(configurationStr, ':', parts);
02225     if (parts.size() > 3)
02226     {
02227         std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
02228         return false;
02229     }
02230     if (parts.size() > 2)
02231         deviceNameOrID = parts[2];
02232     if (parts.size() > 1)
02233     {
02234         split(parts[1], '|', deviceTypes);
02235     }
02236     if (parts.size() > 0)
02237     {
02238         platform = parts[0];
02239     }
02240     return true;
02241 }
02242 
02243 #ifdef WINRT
02244 static cl_device_id selectOpenCLDevice()
02245 {
02246     return NULL;
02247 }
02248 #else
02249 static cl_device_id selectOpenCLDevice()
02250 {
02251     std::string platform, deviceName;
02252     std::vector<std::string> deviceTypes;
02253 
02254     const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
02255     if (configuration &&
02256             (strcmp(configuration, "disabled") == 0 ||
02257              !parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName)
02258             ))
02259         return NULL;
02260 
02261     bool isID = false;
02262     int deviceID = -1;
02263     if (deviceName.length() == 1)
02264     // We limit ID range to 0..9, because we want to write:
02265     // - '2500' to mean i5-2500
02266     // - '8350' to mean AMD FX-8350
02267     // - '650' to mean GeForce 650
02268     // To extend ID range change condition to '> 0'
02269     {
02270         isID = true;
02271         for (size_t i = 0; i < deviceName.length(); i++)
02272         {
02273             if (!isdigit(deviceName[i]))
02274             {
02275                 isID = false;
02276                 break;
02277             }
02278         }
02279         if (isID)
02280         {
02281             deviceID = atoi(deviceName.c_str());
02282             if (deviceID < 0)
02283                 return NULL;
02284         }
02285     }
02286 
02287     std::vector<cl_platform_id> platforms;
02288     {
02289         cl_uint numPlatforms = 0;
02290         CV_OclDbgAssert(clGetPlatformIDs(0, NULL, &numPlatforms) == CL_SUCCESS);
02291 
02292         if (numPlatforms == 0)
02293             return NULL;
02294         platforms.resize((size_t)numPlatforms);
02295         CV_OclDbgAssert(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms) == CL_SUCCESS);
02296         platforms.resize(numPlatforms);
02297     }
02298 
02299     int selectedPlatform = -1;
02300     if (platform.length() > 0)
02301     {
02302         for (size_t i = 0; i < platforms.size(); i++)
02303         {
02304             std::string name;
02305             CV_OclDbgAssert(getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name) == CL_SUCCESS);
02306             if (name.find(platform) != std::string::npos)
02307             {
02308                 selectedPlatform = (int)i;
02309                 break;
02310             }
02311         }
02312         if (selectedPlatform == -1)
02313         {
02314             std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
02315             goto not_found;
02316         }
02317     }
02318     if (deviceTypes.size() == 0)
02319     {
02320         if (!isID)
02321         {
02322             deviceTypes.push_back("GPU");
02323             if (configuration)
02324                 deviceTypes.push_back("CPU");
02325         }
02326         else
02327             deviceTypes.push_back("ALL");
02328     }
02329     for (size_t t = 0; t < deviceTypes.size(); t++)
02330     {
02331         int deviceType = 0;
02332         std::string tempStrDeviceType = deviceTypes[t];
02333         #ifdef HAVE_OPENCL
02334         std::transform( tempStrDeviceType.begin(), tempStrDeviceType.end(), tempStrDeviceType.begin(), tolower );
02335         #endif
02336         
02337         if (tempStrDeviceType == "gpu" || tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
02338             deviceType = Device::TYPE_GPU;
02339         else if (tempStrDeviceType == "cpu")
02340             deviceType = Device::TYPE_CPU;
02341         else if (tempStrDeviceType == "accelerator")
02342             deviceType = Device::TYPE_ACCELERATOR;
02343         else if (tempStrDeviceType == "all")
02344             deviceType = Device::TYPE_ALL;
02345         else
02346         {
02347             std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
02348             goto not_found;
02349         }
02350 
02351         std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
02352         for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
02353                 (selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
02354                 i++)
02355         {
02356             cl_uint count = 0;
02357             cl_int status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
02358             CV_OclDbgAssert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
02359             if (count == 0)
02360                 continue;
02361             size_t base = devices.size();
02362             devices.resize(base + count);
02363             status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
02364             CV_OclDbgAssert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
02365         }
02366 
02367         for (size_t i = (isID ? deviceID : 0);
02368              (isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
02369              i++)
02370         {
02371             std::string name;
02372             CV_OclDbgAssert(getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name) == CL_SUCCESS);
02373             cl_bool useGPU = true;
02374             if(tempStrDeviceType == "dgpu" || tempStrDeviceType == "igpu")
02375             {
02376                 cl_bool isIGPU = CL_FALSE;
02377                 clGetDeviceInfo(devices[i], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(isIGPU), &isIGPU, NULL);
02378                 useGPU = tempStrDeviceType == "dgpu" ? !isIGPU : isIGPU;
02379             }
02380             if ( (isID || name.find(deviceName) != std::string::npos) && useGPU)
02381             {
02382                 // TODO check for OpenCL 1.1
02383                 return devices[i];
02384             }
02385         }
02386     }
02387 
02388 not_found:
02389     if (!configuration)
02390         return NULL; // suppress messages on stderr
02391 
02392     std::cerr << "ERROR: Requested OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl
02393             << "    Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
02394             << "    Device types: ";
02395     for (size_t t = 0; t < deviceTypes.size(); t++)
02396         std::cerr << deviceTypes[t] << " ";
02397 
02398     std::cerr << std::endl << "    Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
02399     return NULL;
02400 }
02401 #endif
02402 
02403 #ifdef HAVE_OPENCL_SVM
02404 namespace svm {
02405 
02406 enum AllocatorFlags { // don't use first 16 bits
02407         OPENCL_SVM_COARSE_GRAIN_BUFFER = 1 << 16, // clSVMAlloc + SVM map/unmap
02408         OPENCL_SVM_FINE_GRAIN_BUFFER = 2 << 16, // clSVMAlloc
02409         OPENCL_SVM_FINE_GRAIN_SYSTEM = 3 << 16, // direct access
02410         OPENCL_SVM_BUFFER_MASK = 3 << 16,
02411         OPENCL_SVM_BUFFER_MAP = 4 << 16
02412 };
02413 
02414 static bool checkForceSVMUmatUsage()
02415 {
02416     static bool initialized = false;
02417     static bool force = false;
02418     if (!initialized)
02419     {
02420         force = getBoolParameter("OPENCV_OPENCL_SVM_FORCE_UMAT_USAGE", false);
02421         initialized = true;
02422     }
02423     return force;
02424 }
02425 static bool checkDisableSVMUMatUsage()
02426 {
02427     static bool initialized = false;
02428     static bool force = false;
02429     if (!initialized)
02430     {
02431         force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE_UMAT_USAGE", false);
02432         initialized = true;
02433     }
02434     return force;
02435 }
02436 static bool checkDisableSVM()
02437 {
02438     static bool initialized = false;
02439     static bool force = false;
02440     if (!initialized)
02441     {
02442         force = getBoolParameter("OPENCV_OPENCL_SVM_DISABLE", false);
02443         initialized = true;
02444     }
02445     return force;
02446 }
02447 // see SVMCapabilities
02448 static unsigned int getSVMCapabilitiesMask()
02449 {
02450     static bool initialized = false;
02451     static unsigned int mask = 0;
02452     if (!initialized)
02453     {
02454         const char* envValue = getenv("OPENCV_OPENCL_SVM_CAPABILITIES_MASK");
02455         if (envValue == NULL)
02456         {
02457             return ~0U; // all bits 1
02458         }
02459         mask = atoi(envValue);
02460         initialized = true;
02461     }
02462     return mask;
02463 }
02464 } // namespace
02465 #endif
02466 
02467 struct Context::Impl
02468 {
02469     static Context::Impl* get(Context& context) { return context.p; }
02470 
02471     void __init()
02472     {
02473         refcount = 1;
02474         handle = 0;
02475 #ifdef HAVE_OPENCL_SVM
02476         svmInitialized = false;
02477 #endif
02478     }
02479 
02480     Impl()
02481     {
02482         __init();
02483     }
02484 
02485     void setDefault()
02486     {
02487         CV_Assert(handle == NULL);
02488 
02489         cl_device_id d = selectOpenCLDevice();
02490 
02491         if (d == NULL)
02492             return;
02493 
02494         cl_platform_id pl = NULL;
02495         CV_OclDbgAssert(clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL) == CL_SUCCESS);
02496 
02497         cl_context_properties prop[] =
02498         {
02499             CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
02500             0
02501         };
02502 
02503         // !!! in the current implementation force the number of devices to 1 !!!
02504         cl_uint nd = 1;
02505         cl_int status;
02506 
02507         handle = clCreateContext(prop, nd, &d, 0, 0, &status);
02508 
02509         bool ok = handle != 0 && status == CL_SUCCESS;
02510         if( ok )
02511         {
02512             devices.resize(nd);
02513             devices[0].set(d);
02514         }
02515         else
02516             handle = NULL;
02517     }
02518 
02519     Impl(int dtype0)
02520     {
02521         __init();
02522 
02523         cl_int retval = 0;
02524         cl_platform_id pl = (cl_platform_id)Platform::getDefault().ptr();
02525         cl_context_properties prop[] =
02526         {
02527             CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
02528             0
02529         };
02530 
02531         cl_uint i, nd0 = 0, nd = 0;
02532         int dtype = dtype0 & 15;
02533         CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, 0, 0, &nd0 ) == CL_SUCCESS);
02534 
02535         AutoBuffer<void*> dlistbuf(nd0*2+1);
02536         cl_device_id* dlist = (cl_device_id*)(void**)dlistbuf;
02537         cl_device_id* dlist_new = dlist + nd0;
02538         CV_OclDbgAssert(clGetDeviceIDs( pl, dtype, nd0, dlist, &nd0 ) == CL_SUCCESS);
02539         String name0;
02540 
02541         for(i = 0; i < nd0; i++)
02542         {
02543             Device d(dlist[i]);
02544             if( !d.available() || !d.compilerAvailable() )
02545                 continue;
02546             if( dtype0 == Device::TYPE_DGPU && d.hostUnifiedMemory() )
02547                 continue;
02548             if( dtype0 == Device::TYPE_IGPU && !d.hostUnifiedMemory() )
02549                 continue;
02550             String name = d.name();
02551             if( nd != 0 && name != name0 )
02552                 continue;
02553             name0 = name;
02554             dlist_new[nd++] = dlist[i];
02555         }
02556 
02557         if(nd == 0)
02558             return;
02559 
02560         // !!! in the current implementation force the number of devices to 1 !!!
02561         nd = 1;
02562 
02563         handle = clCreateContext(prop, nd, dlist_new, 0, 0, &retval);
02564         bool ok = handle != 0 && retval == CL_SUCCESS;
02565         if( ok )
02566         {
02567             devices.resize(nd);
02568             for( i = 0; i < nd; i++ )
02569                 devices[i].set(dlist_new[i]);
02570         }
02571     }
02572 
02573     ~Impl()
02574     {
02575         if(handle)
02576         {
02577             clReleaseContext(handle);
02578             handle = NULL;
02579         }
02580         devices.clear();
02581     }
02582 
02583     Program getProg(const ProgramSource& src,
02584                     const String& buildflags, String& errmsg)
02585     {
02586         String prefix = Program::getPrefix(buildflags);
02587         HashKey k(src.hash(), crc64((const uchar*)prefix.c_str(), prefix.size()));
02588         phash_t::iterator it = phash.find(k);
02589         if( it != phash.end() )
02590             return it->second;
02591         //String filename = format("%08x%08x_%08x%08x.clb2",
02592         Program prog(src, buildflags, errmsg);
02593         if(prog.ptr())
02594             phash.insert(std::pair<HashKey,Program>(k, prog));
02595         return prog;
02596     }
02597 
02598     IMPLEMENT_REFCOUNTABLE();
02599 
02600     cl_context handle;
02601     std::vector<Device> devices;
02602 
02603     typedef ProgramSource::hash_t hash_t;
02604 
02605     struct HashKey
02606     {
02607         HashKey(hash_t _a, hash_t _b) : a(_a), b(_b) {}
02608         bool operator < (const HashKey& k) const { return a < k.a || (a == k.a && b < k.b); }
02609         bool operator == (const HashKey& k) const { return a == k.a && b == k.b; }
02610         bool operator != (const HashKey& k) const { return a != k.a || b != k.b; }
02611         hash_t a, b;
02612     };
02613     typedef std::map<HashKey, Program> phash_t;
02614     phash_t phash;
02615 
02616 #ifdef HAVE_OPENCL_SVM
02617     bool svmInitialized;
02618     bool svmAvailable;
02619     bool svmEnabled;
02620     svm::SVMCapabilities svmCapabilities;
02621     svm::SVMFunctions svmFunctions;
02622 
02623     void svmInit()
02624     {
02625         CV_Assert(handle != NULL);
02626         const Device& device = devices[0];
02627         cl_device_svm_capabilities deviceCaps = 0;
02628         CV_Assert(((void)0, CL_DEVICE_SVM_CAPABILITIES == CL_DEVICE_SVM_CAPABILITIES_AMD)); // Check assumption
02629         cl_int status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_SVM_CAPABILITIES, sizeof(deviceCaps), &deviceCaps, NULL);
02630         if (status != CL_SUCCESS)
02631         {
02632             CV_OPENCL_SVM_TRACE_ERROR_P("CL_DEVICE_SVM_CAPABILITIES via clGetDeviceInfo failed: %d\n", status);
02633             goto noSVM;
02634         }
02635         CV_OPENCL_SVM_TRACE_P("CL_DEVICE_SVM_CAPABILITIES returned: 0x%x\n", (int)deviceCaps);
02636         CV_Assert(((void)0, CL_DEVICE_SVM_COARSE_GRAIN_BUFFER == CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD)); // Check assumption
02637         svmCapabilities.value_ =
02638                 ((deviceCaps & CL_DEVICE_SVM_COARSE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_COARSE_GRAIN_BUFFER : 0) |
02639                 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_BUFFER) ? svm::SVMCapabilities::SVM_FINE_GRAIN_BUFFER : 0) |
02640                 ((deviceCaps & CL_DEVICE_SVM_FINE_GRAIN_SYSTEM) ? svm::SVMCapabilities::SVM_FINE_GRAIN_SYSTEM : 0) |
02641                 ((deviceCaps & CL_DEVICE_SVM_ATOMICS) ? svm::SVMCapabilities::SVM_ATOMICS : 0);
02642         svmCapabilities.value_ &= svm::getSVMCapabilitiesMask();
02643         if (svmCapabilities.value_ == 0)
02644         {
02645             CV_OPENCL_SVM_TRACE_ERROR_P("svmCapabilities is empty\n");
02646             goto noSVM;
02647         }
02648         try
02649         {
02650             // Try OpenCL 2.0
02651             CV_OPENCL_SVM_TRACE_P("Try SVM from OpenCL 2.0 ...\n");
02652             void* ptr = clSVMAlloc(handle, CL_MEM_READ_WRITE, 100, 0);
02653             if (!ptr)
02654             {
02655                 CV_OPENCL_SVM_TRACE_ERROR_P("clSVMAlloc returned NULL...\n");
02656                 CV_ErrorNoReturn(Error::StsBadArg, "clSVMAlloc returned NULL");
02657             }
02658             try
02659             {
02660                 bool error = false;
02661                 cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
02662                 if (CL_SUCCESS != clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE, ptr, 100, 0, NULL, NULL))
02663                 {
02664                     CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMMap failed...\n");
02665                     CV_ErrorNoReturn(Error::StsBadArg, "clEnqueueSVMMap FAILED");
02666                 }
02667                 clFinish(q);
02668                 try
02669                 {
02670                     ((int*)ptr)[0] = 100;
02671                 }
02672                 catch (...)
02673                 {
02674                     CV_OPENCL_SVM_TRACE_ERROR_P("SVM buffer access test FAILED\n");
02675                     error = true;
02676                 }
02677                 if (CL_SUCCESS != clEnqueueSVMUnmap(q, ptr, 0, NULL, NULL))
02678                 {
02679                     CV_OPENCL_SVM_TRACE_ERROR_P("clEnqueueSVMUnmap failed...\n");
02680                     CV_ErrorNoReturn(Error::StsBadArg, "clEnqueueSVMUnmap FAILED");
02681                 }
02682                 clFinish(q);
02683                 if (error)
02684                 {
02685                     CV_ErrorNoReturn(Error::StsBadArg, "OpenCL SVM buffer access test was FAILED");
02686                 }
02687             }
02688             catch (...)
02689             {
02690                 CV_OPENCL_SVM_TRACE_ERROR_P("OpenCL SVM buffer access test was FAILED\n");
02691                 clSVMFree(handle, ptr);
02692                 throw;
02693             }
02694             clSVMFree(handle, ptr);
02695             svmFunctions.fn_clSVMAlloc = clSVMAlloc;
02696             svmFunctions.fn_clSVMFree = clSVMFree;
02697             svmFunctions.fn_clSetKernelArgSVMPointer = clSetKernelArgSVMPointer;
02698             //svmFunctions.fn_clSetKernelExecInfo = clSetKernelExecInfo;
02699             //svmFunctions.fn_clEnqueueSVMFree = clEnqueueSVMFree;
02700             svmFunctions.fn_clEnqueueSVMMemcpy = clEnqueueSVMMemcpy;
02701             svmFunctions.fn_clEnqueueSVMMemFill = clEnqueueSVMMemFill;
02702             svmFunctions.fn_clEnqueueSVMMap = clEnqueueSVMMap;
02703             svmFunctions.fn_clEnqueueSVMUnmap = clEnqueueSVMUnmap;
02704         }
02705         catch (...)
02706         {
02707             CV_OPENCL_SVM_TRACE_P("clSVMAlloc failed, trying HSA extension...\n");
02708             try
02709             {
02710                 // Try HSA extension
02711                 String extensions = device.extensions();
02712                 if (extensions.find("cl_amd_svm") == String::npos)
02713                 {
02714                     CV_OPENCL_SVM_TRACE_P("Device extension doesn't have cl_amd_svm: %s\n", extensions.c_str());
02715                     goto noSVM;
02716                 }
02717                 cl_platform_id p = NULL;
02718                 status = clGetDeviceInfo((cl_device_id)device.ptr(), CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &p, NULL);
02719                 CV_Assert(status == CL_SUCCESS);
02720                 svmFunctions.fn_clSVMAlloc = (clSVMAllocAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMAllocAMD");
02721                 svmFunctions.fn_clSVMFree = (clSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSVMFreeAMD");
02722                 svmFunctions.fn_clSetKernelArgSVMPointer = (clSetKernelArgSVMPointerAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelArgSVMPointerAMD");
02723                 //svmFunctions.fn_clSetKernelExecInfo = (clSetKernelExecInfoAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clSetKernelExecInfoAMD");
02724                 //svmFunctions.fn_clEnqueueSVMFree = (clEnqueueSVMFreeAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMFreeAMD");
02725                 svmFunctions.fn_clEnqueueSVMMemcpy = (clEnqueueSVMMemcpyAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemcpyAMD");
02726                 svmFunctions.fn_clEnqueueSVMMemFill = (clEnqueueSVMMemFillAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMemFillAMD");
02727                 svmFunctions.fn_clEnqueueSVMMap = (clEnqueueSVMMapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMMapAMD");
02728                 svmFunctions.fn_clEnqueueSVMUnmap = (clEnqueueSVMUnmapAMD_fn)clGetExtensionFunctionAddressForPlatform(p, "clEnqueueSVMUnmapAMD");
02729                 CV_Assert(svmFunctions.isValid());
02730             }
02731             catch (...)
02732             {
02733                 CV_OPENCL_SVM_TRACE_P("Something is totally wrong\n");
02734                 goto noSVM;
02735             }
02736         }
02737 
02738         svmAvailable = true;
02739         svmEnabled = !svm::checkDisableSVM();
02740         svmInitialized = true;
02741         CV_OPENCL_SVM_TRACE_P("OpenCV OpenCL SVM support initialized\n");
02742         return;
02743     noSVM:
02744         CV_OPENCL_SVM_TRACE_P("OpenCL SVM is not detected\n");
02745         svmAvailable = false;
02746         svmEnabled = false;
02747         svmCapabilities.value_ = 0;
02748         svmInitialized = true;
02749         svmFunctions.fn_clSVMAlloc = NULL;
02750         return;
02751     }
02752 #endif
02753 };
02754 
02755 
02756 Context::Context()
02757 {
02758     p = 0;
02759 }
02760 
02761 Context::Context(int dtype)
02762 {
02763     p = 0;
02764     create(dtype);
02765 }
02766 
02767 bool Context::create()
02768 {
02769     if( !haveOpenCL() )
02770         return false;
02771     if(p)
02772         p->release();
02773     p = new Impl();
02774     if(!p->handle)
02775     {
02776         delete p;
02777         p = 0;
02778     }
02779     return p != 0;
02780 }
02781 
02782 bool Context::create(int dtype0)
02783 {
02784     if( !haveOpenCL() )
02785         return false;
02786     if(p)
02787         p->release();
02788     p = new Impl(dtype0);
02789     if(!p->handle)
02790     {
02791         delete p;
02792         p = 0;
02793     }
02794     return p != 0;
02795 }
02796 
02797 Context::~Context()
02798 {
02799     if (p)
02800     {
02801         p->release();
02802         p = NULL;
02803     }
02804 }
02805 
02806 Context::Context(const Context& c)
02807 {
02808     p = (Impl*)c.p;
02809     if(p)
02810         p->addref();
02811 }
02812 
02813 Context& Context::operator = (const Context& c)
02814 {
02815     Impl* newp = (Impl*)c.p;
02816     if(newp)
02817         newp->addref();
02818     if(p)
02819         p->release();
02820     p = newp;
02821     return *this;
02822 }
02823 
02824 void* Context::ptr() const
02825 {
02826     return p == NULL ? NULL : p->handle;
02827 }
02828 
02829 size_t Context::ndevices() const
02830 {
02831     return p ? p->devices.size() : 0;
02832 }
02833 
02834 const Device& Context::device(size_t idx) const
02835 {
02836     static Device dummy;
02837     return !p || idx >= p->devices.size() ? dummy : p->devices[idx];
02838 }
02839 
02840 Context& Context::getDefault(bool initialize)
02841 {
02842     static Context* ctx = new Context();
02843     if(!ctx->p && haveOpenCL())
02844     {
02845         if (!ctx->p)
02846             ctx->p = new Impl();
02847         if (initialize)
02848         {
02849             // do not create new Context right away.
02850             // First, try to retrieve existing context of the same type.
02851             // In its turn, Platform::getContext() may call Context::create()
02852             // if there is no such context.
02853             if (ctx->p->handle == NULL)
02854                 ctx->p->setDefault();
02855         }
02856     }
02857 
02858     return *ctx;
02859 }
02860 
02861 Program Context::getProg(const ProgramSource& prog,
02862                          const String& buildopts, String& errmsg)
02863 {
02864     return p ? p->getProg(prog, buildopts, errmsg) : Program();
02865 }
02866 
02867 
02868 
02869 #ifdef HAVE_OPENCL_SVM
02870 bool Context::useSVM() const
02871 {
02872     Context::Impl* i = p;
02873     CV_Assert(i);
02874     if (!i->svmInitialized)
02875         i->svmInit();
02876     return i->svmEnabled;
02877 }
02878 void Context::setUseSVM(bool enabled)
02879 {
02880     Context::Impl* i = p;
02881     CV_Assert(i);
02882     if (!i->svmInitialized)
02883         i->svmInit();
02884     if (enabled && !i->svmAvailable)
02885     {
02886         CV_ErrorNoReturn(Error::StsError, "OpenCL Shared Virtual Memory (SVM) is not supported by OpenCL device");
02887     }
02888     i->svmEnabled = enabled;
02889 }
02890 #else
02891 bool Context::useSVM() const { return false; }
02892 void Context::setUseSVM(bool enabled) { CV_Assert(!enabled); }
02893 #endif
02894 
02895 #ifdef HAVE_OPENCL_SVM
02896 namespace svm {
02897 
02898 const SVMCapabilities getSVMCapabilitites(const ocl::Context& context)
02899 {
02900     Context::Impl* i = context.p;
02901     CV_Assert(i);
02902     if (!i->svmInitialized)
02903         i->svmInit();
02904     return i->svmCapabilities;
02905 }
02906 
02907 CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context)
02908 {
02909     Context::Impl* i = context.p;
02910     CV_Assert(i);
02911     CV_Assert(i->svmInitialized); // getSVMCapabilitites() must be called first
02912     CV_Assert(i->svmFunctions.fn_clSVMAlloc != NULL);
02913     return &i->svmFunctions;
02914 }
02915 
02916 CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
02917 {
02918     if (checkForceSVMUmatUsage())
02919         return true;
02920     if (checkDisableSVMUMatUsage())
02921         return false;
02922     if ((usageFlags & USAGE_ALLOCATE_SHARED_MEMORY) != 0)
02923         return true;
02924     return false; // don't use SVM by default
02925 }
02926 
02927 } // namespace cv::ocl::svm
02928 #endif // HAVE_OPENCL_SVM
02929 
02930 
02931 static void get_platform_name(cl_platform_id id, String& name)
02932 {
02933     // get platform name string length
02934     size_t sz = 0;
02935     if (CL_SUCCESS != clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz))
02936         CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformInfo failed!");
02937 
02938     // get platform name string
02939     AutoBuffer<char> buf(sz + 1);
02940     if (CL_SUCCESS != clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf, 0))
02941         CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformInfo failed!");
02942 
02943     // just in case, ensure trailing zero for ASCIIZ string
02944     buf[sz] = 0;
02945 
02946     name = (const char*)buf;
02947 }
02948 
02949 /*
02950 // Attaches OpenCL context to OpenCV
02951 */
02952 void attachContext(const String& platformName, void* platformID, void* context, void* deviceID)
02953 {
02954     cl_uint cnt = 0;
02955 
02956     if(CL_SUCCESS != clGetPlatformIDs(0, 0, &cnt))
02957         CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformIDs failed!");
02958 
02959     if (cnt == 0)
02960         CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "no OpenCL platform available!");
02961 
02962     std::vector<cl_platform_id> platforms(cnt);
02963 
02964     if(CL_SUCCESS != clGetPlatformIDs(cnt, &platforms[0], 0))
02965         CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformIDs failed!");
02966 
02967     bool platformAvailable = false;
02968 
02969     // check if external platformName contained in list of available platforms in OpenCV
02970     for (unsigned int i = 0; i < cnt; i++)
02971     {
02972         String availablePlatformName;
02973         get_platform_name(platforms[i], availablePlatformName);
02974         // external platform is found in the list of available platforms
02975         if (platformName == availablePlatformName)
02976         {
02977             platformAvailable = true;
02978             break;
02979         }
02980     }
02981 
02982     if (!platformAvailable)
02983         CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "No matched platforms available!");
02984 
02985     // check if platformID corresponds to platformName
02986     String actualPlatformName;
02987     get_platform_name((cl_platform_id)platformID, actualPlatformName);
02988     if (platformName != actualPlatformName)
02989         CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "No matched platforms available!");
02990 
02991     // do not initialize OpenCL context
02992     Context ctx = Context::getDefault(false);
02993 
02994     // attach supplied context to OpenCV
02995     initializeContextFromHandle(ctx, platformID, context, deviceID);
02996 
02997     if(CL_SUCCESS != clRetainContext((cl_context)context))
02998         CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clRetainContext failed!");
02999 
03000     // clear command queue, if any
03001     getCoreTlsData().get()->oclQueue.finish();
03002     Queue q;
03003     getCoreTlsData().get()->oclQueue = q;
03004 
03005     return;
03006 } // attachContext()
03007 
03008 
03009 void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device)
03010 {
03011     cl_context context = (cl_context)_context;
03012     cl_device_id device = (cl_device_id)_device;
03013 
03014     // cleanup old context
03015     Context::Impl * impl = ctx.p;
03016     if (impl->handle)
03017     {
03018         CV_OclDbgAssert(clReleaseContext(impl->handle) == CL_SUCCESS);
03019     }
03020     impl->devices.clear();
03021 
03022     impl->handle = context;
03023     impl->devices.resize(1);
03024     impl->devices[0].set(device);
03025 
03026     Platform& p = Platform::getDefault();
03027     Platform::Impl* pImpl = p.p;
03028     pImpl->handle = (cl_platform_id)platform;
03029 }
03030 
03031 /////////////////////////////////////////// Queue /////////////////////////////////////////////
03032 
03033 struct Queue::Impl
03034 {
03035     Impl(const Context& c, const Device& d)
03036     {
03037         refcount = 1;
03038         const Context* pc = &c;
03039         cl_context ch = (cl_context)pc->ptr();
03040         if( !ch )
03041         {
03042             pc = &Context::getDefault();
03043             ch = (cl_context)pc->ptr();
03044         }
03045         cl_device_id dh = (cl_device_id)d.ptr();
03046         if( !dh )
03047             dh = (cl_device_id)pc->device(0).ptr();
03048         cl_int retval = 0;
03049         handle = clCreateCommandQueue(ch, dh, 0, &retval);
03050         CV_OclDbgAssert(retval == CL_SUCCESS);
03051     }
03052 
03053     ~Impl()
03054     {
03055 #ifdef _WIN32
03056         if (!cv::__termination)
03057 #endif
03058         {
03059             if(handle)
03060             {
03061                 clFinish(handle);
03062                 clReleaseCommandQueue(handle);
03063                 handle = NULL;
03064             }
03065         }
03066     }
03067 
03068     IMPLEMENT_REFCOUNTABLE();
03069 
03070     cl_command_queue handle;
03071 };
03072 
03073 Queue::Queue()
03074 {
03075     p = 0;
03076 }
03077 
03078 Queue::Queue(const Context& c, const Device& d)
03079 {
03080     p = 0;
03081     create(c, d);
03082 }
03083 
03084 Queue::Queue(const Queue& q)
03085 {
03086     p = q.p;
03087     if(p)
03088         p->addref();
03089 }
03090 
03091 Queue& Queue::operator = (const Queue& q)
03092 {
03093     Impl* newp = (Impl*)q.p;
03094     if(newp)
03095         newp->addref();
03096     if(p)
03097         p->release();
03098     p = newp;
03099     return *this;
03100 }
03101 
03102 Queue::~Queue()
03103 {
03104     if(p)
03105         p->release();
03106 }
03107 
03108 bool Queue::create(const Context& c, const Device& d)
03109 {
03110     if(p)
03111         p->release();
03112     p = new Impl(c, d);
03113     return p->handle != 0;
03114 }
03115 
03116 void Queue::finish()
03117 {
03118     if(p && p->handle)
03119     {
03120         CV_OclDbgAssert(clFinish(p->handle) == CL_SUCCESS);
03121     }
03122 }
03123 
03124 void* Queue::ptr() const
03125 {
03126     return p ? p->handle : 0;
03127 }
03128 
03129 Queue& Queue::getDefault()
03130 {
03131     Queue& q = getCoreTlsData().get()->oclQueue;
03132     if( !q.p && haveOpenCL() )
03133         q.create(Context::getDefault());
03134     return q;
03135 }
03136 
03137 static cl_command_queue getQueue(const Queue& q)
03138 {
03139     cl_command_queue qq = (cl_command_queue)q.ptr();
03140     if(!qq)
03141         qq = (cl_command_queue)Queue::getDefault().ptr();
03142     return qq;
03143 }
03144 
03145 /////////////////////////////////////////// KernelArg /////////////////////////////////////////////
03146 
03147 KernelArg::KernelArg()
03148     : flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
03149 {
03150 }
03151 
03152 KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
03153     : flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
03154 {
03155 }
03156 
03157 KernelArg KernelArg::Constant(const Mat& m)
03158 {
03159     CV_Assert(m.isContinuous());
03160     return KernelArg(CONSTANT, 0, 0, 0, m.ptr(), m.total()*m.elemSize());
03161 }
03162 
03163 /////////////////////////////////////////// Kernel /////////////////////////////////////////////
03164 
03165 struct Kernel::Impl
03166 {
03167     Impl(const char* kname, const Program& prog) :
03168         refcount(1), e(0), nu(0)
03169     {
03170         cl_program ph = (cl_program)prog.ptr();
03171         cl_int retval = 0;
03172         handle = ph != 0 ?
03173             clCreateKernel(ph, kname, &retval) : 0;
03174         CV_OclDbgAssert(retval == CL_SUCCESS);
03175         for( int i = 0; i < MAX_ARRS; i++ )
03176             u[i] = 0;
03177         haveTempDstUMats = false;
03178     }
03179 
03180     void cleanupUMats()
03181     {
03182         for( int i = 0; i < MAX_ARRS; i++ )
03183             if( u[i] )
03184             {
03185                 if( CV_XADD(&u[i]->urefcount, -1) == 1 )
03186                     u[i]->currAllocator->deallocate(u[i]);
03187                 u[i] = 0;
03188             }
03189         nu = 0;
03190         haveTempDstUMats = false;
03191     }
03192 
03193     void addUMat(const UMat& m, bool dst)
03194     {
03195         CV_Assert(nu < MAX_ARRS && m.u && m.u->urefcount > 0);
03196         u[nu] = m.u;
03197         CV_XADD(&m.u->urefcount, 1);
03198         nu++;
03199         if(dst && m.u->tempUMat())
03200             haveTempDstUMats = true;
03201     }
03202 
03203     void addImage(const Image2D& image)
03204     {
03205         images.push_back(image);
03206     }
03207 
03208     void finit()
03209     {
03210         cleanupUMats();
03211         images.clear();
03212         if(e) { clReleaseEvent(e); e = 0; }
03213         release();
03214     }
03215 
03216     ~Impl()
03217     {
03218         if(handle)
03219             clReleaseKernel(handle);
03220     }
03221 
03222     IMPLEMENT_REFCOUNTABLE();
03223 
03224     cl_kernel handle;
03225     cl_event e;
03226     enum { MAX_ARRS = 16 };
03227     UMatData* u[MAX_ARRS];
03228     int nu;
03229     std::list<Image2D> images;
03230     bool haveTempDstUMats;
03231 };
03232 
03233 }} // namespace cv::ocl
03234 
03235 extern "C" {
03236 
03237 static void CL_CALLBACK oclCleanupCallback(cl_event, cl_int, void *p)
03238 {
03239     ((cv::ocl::Kernel::Impl*)p)->finit();
03240 }
03241 
03242 }
03243 
03244 namespace cv { namespace ocl {
03245 
03246 Kernel::Kernel()
03247 {
03248     p = 0;
03249 }
03250 
03251 Kernel::Kernel(const char* kname, const Program& prog)
03252 {
03253     p = 0;
03254     create(kname, prog);
03255 }
03256 
03257 Kernel::Kernel(const char* kname, const ProgramSource& src,
03258                const String& buildopts, String* errmsg)
03259 {
03260     p = 0;
03261     create(kname, src, buildopts, errmsg);
03262 }
03263 
03264 Kernel::Kernel(const Kernel& k)
03265 {
03266     p = k.p;
03267     if(p)
03268         p->addref();
03269 }
03270 
03271 Kernel& Kernel::operator = (const Kernel& k)
03272 {
03273     Impl* newp = (Impl*)k.p;
03274     if(newp)
03275         newp->addref();
03276     if(p)
03277         p->release();
03278     p = newp;
03279     return *this;
03280 }
03281 
03282 Kernel::~Kernel()
03283 {
03284     if(p)
03285         p->release();
03286 }
03287 
03288 bool Kernel::create(const char* kname, const Program& prog)
03289 {
03290     if(p)
03291         p->release();
03292     p = new Impl(kname, prog);
03293     if(p->handle == 0)
03294     {
03295         p->release();
03296         p = 0;
03297     }
03298 #ifdef CV_OPENCL_RUN_ASSERT // check kernel compilation fails
03299     CV_Assert(p);
03300 #endif
03301     return p != 0;
03302 }
03303 
03304 bool Kernel::create(const char* kname, const ProgramSource& src,
03305                     const String& buildopts, String* errmsg)
03306 {
03307     if(p)
03308     {
03309         p->release();
03310         p = 0;
03311     }
03312     String tempmsg;
03313     if( !errmsg ) errmsg = &tempmsg;
03314     const Program& prog = Context::getDefault().getProg(src, buildopts, *errmsg);
03315     return create(kname, prog);
03316 }
03317 
03318 void* Kernel::ptr() const
03319 {
03320     return p ? p->handle : 0;
03321 }
03322 
03323 bool Kernel::empty() const
03324 {
03325     return ptr() == 0;
03326 }
03327 
03328 int Kernel::set(int i, const void* value, size_t sz)
03329 {
03330     if (!p || !p->handle)
03331         return -1;
03332     if (i < 0)
03333         return i;
03334     if( i == 0 )
03335         p->cleanupUMats();
03336 
03337     cl_int retval = clSetKernelArg(p->handle, (cl_uint)i, sz, value);
03338     CV_OclDbgAssert(retval == CL_SUCCESS);
03339     if (retval != CL_SUCCESS)
03340         return -1;
03341     return i+1;
03342 }
03343 
03344 int Kernel::set(int i, const Image2D& image2D)
03345 {
03346     p->addImage(image2D);
03347     cl_mem h = (cl_mem)image2D.ptr();
03348     return set(i, &h, sizeof(h));
03349 }
03350 
03351 int Kernel::set(int i, const UMat& m)
03352 {
03353     return set(i, KernelArg(KernelArg::READ_WRITE, (UMat*)&m, 0, 0));
03354 }
03355 
03356 int Kernel::set(int i, const KernelArg& arg)
03357 {
03358     if( !p || !p->handle )
03359         return -1;
03360     if (i < 0)
03361         return i;
03362     if( i == 0 )
03363         p->cleanupUMats();
03364     if( arg.m )
03365     {
03366         int accessFlags = ((arg.flags & KernelArg::READ_ONLY) ? ACCESS_READ : 0) +
03367                           ((arg.flags & KernelArg::WRITE_ONLY) ? ACCESS_WRITE : 0);
03368         bool ptronly = (arg.flags & KernelArg::PTR_ONLY) != 0;
03369         cl_mem h = (cl_mem)arg.m->handle(accessFlags);
03370 
03371         if (!h)
03372         {
03373             p->release();
03374             p = 0;
03375             return -1;
03376         }
03377 
03378 #ifdef HAVE_OPENCL_SVM
03379         if ((arg.m->u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
03380         {
03381             const Context& ctx = Context::getDefault();
03382             const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
03383             uchar*& svmDataPtr = (uchar*&)arg.m->u->handle;
03384             CV_OPENCL_SVM_TRACE_P("clSetKernelArgSVMPointer: %p\n", svmDataPtr);
03385 #if 1 // TODO
03386             cl_int status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, svmDataPtr);
03387 #else
03388             cl_int status = svmFns->fn_clSetKernelArgSVMPointer(p->handle, (cl_uint)i, &svmDataPtr);
03389 #endif
03390             CV_Assert(status == CL_SUCCESS);
03391         }
03392         else
03393 #endif
03394         {
03395             CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(h), &h) == CL_SUCCESS);
03396         }
03397 
03398         if (ptronly)
03399         {
03400             i++;
03401         }
03402         else if( arg.m->dims <= 2 )
03403         {
03404             UMat2D u2d(*arg.m);
03405             CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u2d.step), &u2d.step) == CL_SUCCESS);
03406             CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u2d.offset), &u2d.offset) == CL_SUCCESS);
03407             i += 3;
03408 
03409             if( !(arg.flags & KernelArg::NO_SIZE) )
03410             {
03411                 int cols = u2d.cols*arg.wscale/arg.iwscale;
03412                 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows) == CL_SUCCESS);
03413                 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols) == CL_SUCCESS);
03414                 i += 2;
03415             }
03416         }
03417         else
03418         {
03419             UMat3D u3d(*arg.m);
03420             CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.slicestep), &u3d.slicestep) == CL_SUCCESS);
03421             CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.step), &u3d.step) == CL_SUCCESS);
03422             CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+3), sizeof(u3d.offset), &u3d.offset) == CL_SUCCESS);
03423             i += 4;
03424             if( !(arg.flags & KernelArg::NO_SIZE) )
03425             {
03426                 int cols = u3d.cols*arg.wscale/arg.iwscale;
03427                 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.rows) == CL_SUCCESS);
03428                 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows) == CL_SUCCESS);
03429                 CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols) == CL_SUCCESS);
03430                 i += 3;
03431             }
03432         }
03433         p->addUMat(*arg.m, (accessFlags & ACCESS_WRITE) != 0);
03434         return i;
03435     }
03436     CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, arg.sz, arg.obj) == CL_SUCCESS);
03437     return i+1;
03438 }
03439 
03440 
03441 bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
03442                  bool sync, const Queue& q)
03443 {
03444     if(!p || !p->handle || p->e != 0)
03445         return false;
03446 
03447     cl_command_queue qq = getQueue(q);
03448     size_t offset[CV_MAX_DIM] = {0}, globalsize[CV_MAX_DIM] = {1,1,1};
03449     size_t total = 1;
03450     CV_Assert(_globalsize != 0);
03451     for (int i = 0; i < dims; i++)
03452     {
03453         size_t val = _localsize ? _localsize[i] :
03454             dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
03455         CV_Assert( val > 0 );
03456         total *= _globalsize[i];
03457         globalsize[i] = ((_globalsize[i] + val - 1)/val)*val;
03458     }
03459     if( total == 0 )
03460         return true;
03461     if( p->haveTempDstUMats )
03462         sync = true;
03463     cl_int retval = clEnqueueNDRangeKernel(qq, p->handle, (cl_uint)dims,
03464                                            offset, globalsize, _localsize, 0, 0,
03465                                            sync ? 0 : &p->e);
03466 #if CV_OPENCL_SHOW_RUN_ERRORS
03467     if (retval != CL_SUCCESS)
03468     {
03469         printf("OpenCL program returns error: %d\n", retval);
03470         fflush(stdout);
03471     }
03472 #endif
03473     if( sync || retval != CL_SUCCESS )
03474     {
03475         CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
03476         p->cleanupUMats();
03477     }
03478     else
03479     {
03480         p->addref();
03481         CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);
03482     }
03483     return retval == CL_SUCCESS;
03484 }
03485 
03486 bool Kernel::runTask(bool sync, const Queue& q)
03487 {
03488     if(!p || !p->handle || p->e != 0)
03489         return false;
03490 
03491     cl_command_queue qq = getQueue(q);
03492     cl_int retval = clEnqueueTask(qq, p->handle, 0, 0, sync ? 0 : &p->e);
03493     if( sync || retval != CL_SUCCESS )
03494     {
03495         CV_OclDbgAssert(clFinish(qq) == CL_SUCCESS);
03496         p->cleanupUMats();
03497     }
03498     else
03499     {
03500         p->addref();
03501         CV_OclDbgAssert(clSetEventCallback(p->e, CL_COMPLETE, oclCleanupCallback, p) == CL_SUCCESS);
03502     }
03503     return retval == CL_SUCCESS;
03504 }
03505 
03506 
03507 size_t Kernel::workGroupSize() const
03508 {
03509     if(!p || !p->handle)
03510         return 0;
03511     size_t val = 0, retsz = 0;
03512     cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
03513     return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_WORK_GROUP_SIZE,
03514                                     sizeof(val), &val, &retsz) == CL_SUCCESS ? val : 0;
03515 }
03516 
03517 size_t Kernel::preferedWorkGroupSizeMultiple() const
03518 {
03519     if(!p || !p->handle)
03520         return 0;
03521     size_t val = 0, retsz = 0;
03522     cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
03523     return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
03524                                     sizeof(val), &val, &retsz) == CL_SUCCESS ? val : 0;
03525 }
03526 
03527 bool Kernel::compileWorkGroupSize(size_t wsz[]) const
03528 {
03529     if(!p || !p->handle || !wsz)
03530         return 0;
03531     size_t retsz = 0;
03532     cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
03533     return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_COMPILE_WORK_GROUP_SIZE,
03534                                     sizeof(wsz[0])*3, wsz, &retsz) == CL_SUCCESS;
03535 }
03536 
03537 size_t Kernel::localMemSize() const
03538 {
03539     if(!p || !p->handle)
03540         return 0;
03541     size_t retsz = 0;
03542     cl_ulong val = 0;
03543     cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
03544     return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_LOCAL_MEM_SIZE,
03545                                     sizeof(val), &val, &retsz) == CL_SUCCESS ? (size_t)val : 0;
03546 }
03547 
03548 /////////////////////////////////////////// Program /////////////////////////////////////////////
03549 
03550 struct Program::Impl
03551 {
03552     Impl(const ProgramSource& _src,
03553          const String& _buildflags, String& errmsg)
03554     {
03555         refcount = 1;
03556         const Context& ctx = Context::getDefault();
03557         src = _src;
03558         buildflags = _buildflags;
03559         const String& srcstr = src.source();
03560         const char* srcptr = srcstr.c_str();
03561         size_t srclen = srcstr.size();
03562         cl_int retval = 0;
03563 
03564         handle = clCreateProgramWithSource((cl_context)ctx.ptr(), 1, &srcptr, &srclen, &retval);
03565         if( handle && retval == CL_SUCCESS )
03566         {
03567             int i, n = (int)ctx.ndevices();
03568             AutoBuffer<void*> deviceListBuf(n+1);
03569             void** deviceList = deviceListBuf;
03570             for( i = 0; i < n; i++ )
03571                 deviceList[i] = ctx.device(i).ptr();
03572 
03573             Device device = Device::getDefault();
03574             if (device.isAMD())
03575                 buildflags += " -D AMD_DEVICE";
03576             else if (device.isIntel())
03577                 buildflags += " -D INTEL_DEVICE";
03578 
03579             retval = clBuildProgram(handle, n,
03580                                     (const cl_device_id*)deviceList,
03581                                     buildflags.c_str(), 0, 0);
03582 #if !CV_OPENCL_ALWAYS_SHOW_BUILD_LOG
03583             if( retval != CL_SUCCESS )
03584 #endif
03585             {
03586                 size_t retsz = 0;
03587                 cl_int buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
03588                                                CL_PROGRAM_BUILD_LOG, 0, 0, &retsz);
03589                 if (buildInfo_retval == CL_SUCCESS && retsz > 1)
03590                 {
03591                     AutoBuffer<char> bufbuf(retsz + 16);
03592                     char* buf = bufbuf;
03593                     buildInfo_retval = clGetProgramBuildInfo(handle, (cl_device_id)deviceList[0],
03594                                                    CL_PROGRAM_BUILD_LOG, retsz+1, buf, &retsz);
03595                     if (buildInfo_retval == CL_SUCCESS)
03596                     {
03597                         // TODO It is useful to see kernel name & program file name also
03598                         errmsg = String(buf);
03599                         printf("OpenCL program build log: %s\n%s\n", buildflags.c_str(), errmsg.c_str());
03600                         fflush(stdout);
03601                     }
03602                 }
03603                 if (retval != CL_SUCCESS && handle)
03604                 {
03605                     clReleaseProgram(handle);
03606                     handle = NULL;
03607                 }
03608             }
03609         }
03610     }
03611 
03612     Impl(const String& _buf, const String& _buildflags)
03613     {
03614         refcount = 1;
03615         handle = 0;
03616         buildflags = _buildflags;
03617         if(_buf.empty())
03618             return;
03619         String prefix0 = Program::getPrefix(buildflags);
03620         const Context& ctx = Context::getDefault();
03621         const Device& dev = Device::getDefault();
03622         const char* pos0 = _buf.c_str();
03623         const char* pos1 = strchr(pos0, '\n');
03624         if(!pos1)
03625             return;
03626         const char* pos2 = strchr(pos1+1, '\n');
03627         if(!pos2)
03628             return;
03629         const char* pos3 = strchr(pos2+1, '\n');
03630         if(!pos3)
03631             return;
03632         size_t prefixlen = (pos3 - pos0)+1;
03633         String prefix(pos0, prefixlen);
03634         if( prefix != prefix0 )
03635             return;
03636         const uchar* bin = (uchar*)(pos3+1);
03637         void* devid = dev.ptr();
03638         size_t codelen = _buf.length() - prefixlen;
03639         cl_int binstatus = 0, retval = 0;
03640         handle = clCreateProgramWithBinary((cl_context)ctx.ptr(), 1, (cl_device_id*)&devid,
03641                                            &codelen, &bin, &binstatus, &retval);
03642         CV_OclDbgAssert(retval == CL_SUCCESS);
03643     }
03644 
03645     String store()
03646     {
03647         if(!handle)
03648             return String();
03649         size_t progsz = 0, retsz = 0;
03650         String prefix = Program::getPrefix(buildflags);
03651         size_t prefixlen = prefix.length();
03652         if(clGetProgramInfo(handle, CL_PROGRAM_BINARY_SIZES, sizeof(progsz), &progsz, &retsz) != CL_SUCCESS)
03653             return String();
03654         AutoBuffer<uchar> bufbuf(prefixlen + progsz + 16);
03655         uchar* buf = bufbuf;
03656         memcpy(buf, prefix.c_str(), prefixlen);
03657         buf += prefixlen;
03658         if(clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(buf), &buf, &retsz) != CL_SUCCESS)
03659             return String();
03660         buf[progsz] = (uchar)'\0';
03661         return String((const char*)(uchar*)bufbuf, prefixlen + progsz);
03662     }
03663 
03664     ~Impl()
03665     {
03666         if( handle )
03667         {
03668 #ifdef _WIN32
03669             if (!cv::__termination)
03670 #endif
03671             {
03672                 clReleaseProgram(handle);
03673             }
03674             handle = NULL;
03675         }
03676     }
03677 
03678     IMPLEMENT_REFCOUNTABLE();
03679 
03680     ProgramSource src;
03681     String buildflags;
03682     cl_program handle;
03683 };
03684 
03685 
03686 Program::Program() { p = 0; }
03687 
03688 Program::Program(const ProgramSource& src,
03689         const String& buildflags, String& errmsg)
03690 {
03691     p = 0;
03692     create(src, buildflags, errmsg);
03693 }
03694 
03695 Program::Program(const Program& prog)
03696 {
03697     p = prog.p;
03698     if(p)
03699         p->addref();
03700 }
03701 
03702 Program& Program::operator = (const Program& prog)
03703 {
03704     Impl* newp = (Impl*)prog.p;
03705     if(newp)
03706         newp->addref();
03707     if(p)
03708         p->release();
03709     p = newp;
03710     return *this;
03711 }
03712 
03713 Program::~Program()
03714 {
03715     if(p)
03716         p->release();
03717 }
03718 
03719 bool Program::create(const ProgramSource& src,
03720             const String& buildflags, String& errmsg)
03721 {
03722     if(p)
03723         p->release();
03724     p = new Impl(src, buildflags, errmsg);
03725     if(!p->handle)
03726     {
03727         p->release();
03728         p = 0;
03729     }
03730     return p != 0;
03731 }
03732 
03733 const ProgramSource& Program::source() const
03734 {
03735     static ProgramSource dummy;
03736     return p ? p->src : dummy;
03737 }
03738 
03739 void* Program::ptr() const
03740 {
03741     return p ? p->handle : 0;
03742 }
03743 
03744 bool Program::read(const String& bin, const String& buildflags)
03745 {
03746     if(p)
03747         p->release();
03748     p = new Impl(bin, buildflags);
03749     return p->handle != 0;
03750 }
03751 
03752 bool Program::write(String& bin) const
03753 {
03754     if(!p)
03755         return false;
03756     bin = p->store();
03757     return !bin.empty();
03758 }
03759 
03760 String Program::getPrefix() const
03761 {
03762     if(!p)
03763         return String();
03764     return getPrefix(p->buildflags);
03765 }
03766 
03767 String Program::getPrefix(const String& buildflags)
03768 {
03769     const Context& ctx = Context::getDefault();
03770     const Device& dev = ctx.device(0);
03771     return format("name=%s\ndriver=%s\nbuildflags=%s\n",
03772                   dev.name().c_str(), dev.driverVersion().c_str(), buildflags.c_str());
03773 }
03774 
03775 ///////////////////////////////////////// ProgramSource ///////////////////////////////////////////////
03776 
03777 struct ProgramSource::Impl
03778 {
03779     Impl(const char* _src)
03780     {
03781         init(String(_src));
03782     }
03783     Impl(const String& _src)
03784     {
03785         init(_src);
03786     }
03787     void init(const String& _src)
03788     {
03789         refcount = 1;
03790         src = _src;
03791         h = crc64((uchar*)src.c_str(), src.size());
03792     }
03793 
03794     IMPLEMENT_REFCOUNTABLE();
03795     String src;
03796     ProgramSource::hash_t h;
03797 };
03798 
03799 
03800 ProgramSource::ProgramSource()
03801 {
03802     p = 0;
03803 }
03804 
03805 ProgramSource::ProgramSource(const char* prog)
03806 {
03807     p = new Impl(prog);
03808 }
03809 
03810 ProgramSource::ProgramSource(const String& prog)
03811 {
03812     p = new Impl(prog);
03813 }
03814 
03815 ProgramSource::~ProgramSource()
03816 {
03817     if(p)
03818         p->release();
03819 }
03820 
03821 ProgramSource::ProgramSource(const ProgramSource& prog)
03822 {
03823     p = prog.p;
03824     if(p)
03825         p->addref();
03826 }
03827 
03828 ProgramSource& ProgramSource::operator = (const ProgramSource& prog)
03829 {
03830     Impl* newp = (Impl*)prog.p;
03831     if(newp)
03832         newp->addref();
03833     if(p)
03834         p->release();
03835     p = newp;
03836     return *this;
03837 }
03838 
03839 const String& ProgramSource::source() const
03840 {
03841     static String dummy;
03842     return p ? p->src : dummy;
03843 }
03844 
03845 ProgramSource::hash_t ProgramSource::hash() const
03846 {
03847     return p ? p->h : 0;
03848 }
03849 
03850 //////////////////////////////////////////// OpenCLAllocator //////////////////////////////////////////////////
03851 
03852 template<typename T>
03853 class OpenCLBufferPool
03854 {
03855 protected:
03856     ~OpenCLBufferPool() { }
03857 public:
03858     virtual T allocate(size_t size) = 0;
03859     virtual void release(T buffer) = 0;
03860 };
03861 
03862 template <typename Derived, typename BufferEntry, typename T>
03863 class OpenCLBufferPoolBaseImpl : public BufferPoolController, public OpenCLBufferPool<T>
03864 {
03865 private:
03866     inline Derived& derived() { return *static_cast<Derived*>(this); }
03867 protected:
03868     Mutex mutex_;
03869 
03870     size_t currentReservedSize;
03871     size_t maxReservedSize;
03872 
03873     std::list<BufferEntry> allocatedEntries_; // Allocated and used entries
03874     std::list<BufferEntry> reservedEntries_; // LRU order. Allocated, but not used entries
03875 
03876     // synchronized
03877     bool _findAndRemoveEntryFromAllocatedList(CV_OUT BufferEntry& entry, T buffer)
03878     {
03879         typename std::list<BufferEntry>::iterator i = allocatedEntries_.begin();
03880         for (; i != allocatedEntries_.end(); ++i)
03881         {
03882             BufferEntry& e = *i;
03883             if (e.clBuffer_ == buffer)
03884             {
03885                 entry = e;
03886                 allocatedEntries_.erase(i);
03887                 return true;
03888             }
03889         }
03890         return false;
03891     }
03892 
03893     // synchronized
03894     bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size)
03895     {
03896         if (reservedEntries_.empty())
03897             return false;
03898         typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
03899         typename std::list<BufferEntry>::iterator result_pos = reservedEntries_.end();
03900         BufferEntry result;
03901         size_t minDiff = (size_t)(-1);
03902         for (; i != reservedEntries_.end(); ++i)
03903         {
03904             BufferEntry& e = *i;
03905             if (e.capacity_ >= size)
03906             {
03907                 size_t diff = e.capacity_ - size;
03908                 if (diff < std::max((size_t)4096, size / 8) && (result_pos == reservedEntries_.end() || diff < minDiff))
03909                 {
03910                     minDiff = diff;
03911                     result_pos = i;
03912                     result = e;
03913                     if (diff == 0)
03914                         break;
03915                 }
03916             }
03917         }
03918         if (result_pos != reservedEntries_.end())
03919         {
03920             //CV_DbgAssert(result == *result_pos);
03921             reservedEntries_.erase(result_pos);
03922             entry = result;
03923             currentReservedSize -= entry.capacity_;
03924             allocatedEntries_.push_back(entry);
03925             return true;
03926         }
03927         return false;
03928     }
03929 
03930     // synchronized
03931     void _checkSizeOfReservedEntries()
03932     {
03933         while (currentReservedSize > maxReservedSize)
03934         {
03935             CV_DbgAssert(!reservedEntries_.empty());
03936             const BufferEntry& entry = reservedEntries_.back();
03937             CV_DbgAssert(currentReservedSize >= entry.capacity_);
03938             currentReservedSize -= entry.capacity_;
03939             derived()._releaseBufferEntry(entry);
03940             reservedEntries_.pop_back();
03941         }
03942     }
03943 
03944     inline size_t _allocationGranularity(size_t size)
03945     {
03946         // heuristic values
03947         if (size < 1024*1024)
03948             return 4096;  // don't work with buffers smaller than 4Kb (hidden allocation overhead issue)
03949         else if (size < 16*1024*1024)
03950             return 64*1024;
03951         else
03952             return 1024*1024;
03953     }
03954 
03955 public:
03956     OpenCLBufferPoolBaseImpl()
03957         : currentReservedSize(0),
03958           maxReservedSize(0)
03959     {
03960         // nothing
03961     }
03962     virtual ~OpenCLBufferPoolBaseImpl()
03963     {
03964         freeAllReservedBuffers();
03965         CV_Assert(reservedEntries_.empty());
03966     }
03967 public:
03968     virtual T allocate(size_t size)
03969     {
03970         AutoLock locker(mutex_);
03971         BufferEntry entry;
03972         if (maxReservedSize > 0 && _findAndRemoveEntryFromReservedList(entry, size))
03973         {
03974             CV_DbgAssert(size <= entry.capacity_);
03975             LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_);
03976         }
03977         else
03978         {
03979             derived()._allocateBufferEntry(entry, size);
03980         }
03981         return entry.clBuffer_;
03982     }
03983     virtual void release(T buffer)
03984     {
03985         AutoLock locker(mutex_);
03986         BufferEntry entry;
03987         CV_Assert(_findAndRemoveEntryFromAllocatedList(entry, buffer));
03988         if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8)
03989         {
03990             derived()._releaseBufferEntry(entry);
03991         }
03992         else
03993         {
03994             reservedEntries_.push_front(entry);
03995             currentReservedSize += entry.capacity_;
03996             _checkSizeOfReservedEntries();
03997         }
03998     }
03999 
04000     virtual size_t getReservedSize() const { return currentReservedSize; }
04001     virtual size_t getMaxReservedSize() const { return maxReservedSize; }
04002     virtual void setMaxReservedSize(size_t size)
04003     {
04004         AutoLock locker(mutex_);
04005         size_t oldMaxReservedSize = maxReservedSize;
04006         maxReservedSize = size;
04007         if (maxReservedSize < oldMaxReservedSize)
04008         {
04009             typename std::list<BufferEntry>::iterator i = reservedEntries_.begin();
04010             for (; i != reservedEntries_.end();)
04011             {
04012                 const BufferEntry& entry = *i;
04013                 if (entry.capacity_ > maxReservedSize / 8)
04014                 {
04015                     CV_DbgAssert(currentReservedSize >= entry.capacity_);
04016                     currentReservedSize -= entry.capacity_;
04017                     derived()._releaseBufferEntry(entry);
04018                     i = reservedEntries_.erase(i);
04019                     continue;
04020                 }
04021                 ++i;
04022             }
04023             _checkSizeOfReservedEntries();
04024         }
04025     }
04026     virtual void freeAllReservedBuffers()
04027     {
04028         AutoLock locker(mutex_);
04029         typename std::list<BufferEntry>::const_iterator i = reservedEntries_.begin();
04030         for (; i != reservedEntries_.end(); ++i)
04031         {
04032             const BufferEntry& entry = *i;
04033             derived()._releaseBufferEntry(entry);
04034         }
04035         reservedEntries_.clear();
04036         currentReservedSize = 0;
04037     }
04038 };
04039 
04040 struct CLBufferEntry
04041 {
04042     cl_mem clBuffer_;
04043     size_t capacity_;
04044     CLBufferEntry() : clBuffer_((cl_mem)NULL), capacity_(0) { }
04045 };
04046 
04047 class OpenCLBufferPoolImpl : public OpenCLBufferPoolBaseImpl<OpenCLBufferPoolImpl, CLBufferEntry, cl_mem>
04048 {
04049 public:
04050     typedef struct CLBufferEntry BufferEntry;
04051 protected:
04052     int createFlags_;
04053 public:
04054     OpenCLBufferPoolImpl(int createFlags = 0)
04055         : createFlags_(createFlags)
04056     {
04057     }
04058 
04059     void _allocateBufferEntry(BufferEntry& entry, size_t size)
04060     {
04061         CV_DbgAssert(entry.clBuffer_ == NULL);
04062         entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
04063         Context& ctx = Context::getDefault();
04064         cl_int retval = CL_SUCCESS;
04065         entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE|createFlags_, entry.capacity_, 0, &retval);
04066         CV_Assert(retval == CL_SUCCESS);
04067         CV_Assert(entry.clBuffer_ != NULL);
04068         if(retval == CL_SUCCESS)
04069         {
04070             CV_IMPL_ADD(CV_IMPL_OCL);
04071         }
04072         LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n",
04073                 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
04074         allocatedEntries_.push_back(entry);
04075     }
04076 
04077     void _releaseBufferEntry(const BufferEntry& entry)
04078     {
04079         CV_Assert(entry.capacity_ != 0);
04080         CV_Assert(entry.clBuffer_ != NULL);
04081         LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n",
04082                 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
04083         clReleaseMemObject(entry.clBuffer_);
04084     }
04085 };
04086 
04087 #ifdef HAVE_OPENCL_SVM
04088 struct CLSVMBufferEntry
04089 {
04090     void* clBuffer_;
04091     size_t capacity_;
04092     CLSVMBufferEntry() : clBuffer_(NULL), capacity_(0) { }
04093 };
04094 class OpenCLSVMBufferPoolImpl : public OpenCLBufferPoolBaseImpl<OpenCLSVMBufferPoolImpl, CLSVMBufferEntry, void*>
04095 {
04096 public:
04097     typedef struct CLSVMBufferEntry BufferEntry;
04098 public:
04099     OpenCLSVMBufferPoolImpl()
04100     {
04101     }
04102 
04103     void _allocateBufferEntry(BufferEntry& entry, size_t size)
04104     {
04105         CV_DbgAssert(entry.clBuffer_ == NULL);
04106         entry.capacity_ = alignSize(size, (int)_allocationGranularity(size));
04107 
04108         Context& ctx = Context::getDefault();
04109         const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
04110         bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
04111         cl_svm_mem_flags memFlags = CL_MEM_READ_WRITE |
04112                 (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
04113 
04114         const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
04115         CV_DbgAssert(svmFns->isValid());
04116 
04117         CV_OPENCL_SVM_TRACE_P("clSVMAlloc: %d\n", (int)entry.capacity_);
04118         void *buf = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, entry.capacity_, 0);
04119         CV_Assert(buf);
04120 
04121         entry.clBuffer_ = buf;
04122         {
04123             CV_IMPL_ADD(CV_IMPL_OCL);
04124         }
04125         LOG_BUFFER_POOL("OpenCL SVM allocate %lld (0x%llx) bytes: %p\n",
04126                 (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_);
04127         allocatedEntries_.push_back(entry);
04128     }
04129 
04130     void _releaseBufferEntry(const BufferEntry& entry)
04131     {
04132         CV_Assert(entry.capacity_ != 0);
04133         CV_Assert(entry.clBuffer_ != NULL);
04134         LOG_BUFFER_POOL("OpenCL release SVM buffer: %p, %lld (0x%llx) bytes\n",
04135                 entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_);
04136         Context& ctx = Context::getDefault();
04137         const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
04138         CV_DbgAssert(svmFns->isValid());
04139         CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n",  entry.clBuffer_);
04140         svmFns->fn_clSVMFree((cl_context)ctx.ptr(), entry.clBuffer_);
04141     }
04142 };
04143 #endif
04144 
04145 
04146 
04147 #if defined _MSC_VER
04148 #pragma warning(disable:4127) // conditional expression is constant
04149 #endif
04150 template <bool readAccess, bool writeAccess>
04151 class AlignedDataPtr
04152 {
04153 protected:
04154     const size_t size_;
04155     uchar* const originPtr_;
04156     const size_t alignment_;
04157     uchar* ptr_;
04158     uchar* allocatedPtr_;
04159 
04160 public:
04161     AlignedDataPtr(uchar* ptr, size_t size, size_t alignment)
04162         : size_(size), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL)
04163     {
04164         CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
04165         if (((size_t)ptr_ & (alignment - 1)) != 0)
04166         {
04167             allocatedPtr_ = new uchar[size_ + alignment - 1];
04168             ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
04169             if (readAccess)
04170             {
04171                 memcpy(ptr_, originPtr_, size_);
04172             }
04173         }
04174     }
04175 
04176     uchar* getAlignedPtr() const
04177     {
04178         CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
04179         return ptr_;
04180     }
04181 
04182     ~AlignedDataPtr()
04183     {
04184         if (allocatedPtr_)
04185         {
04186             if (writeAccess)
04187             {
04188                 memcpy(originPtr_, ptr_, size_);
04189             }
04190             delete[] allocatedPtr_;
04191             allocatedPtr_ = NULL;
04192         }
04193         ptr_ = NULL;
04194     }
04195 private:
04196     AlignedDataPtr(const AlignedDataPtr&); // disabled
04197     AlignedDataPtr& operator=(const AlignedDataPtr&); // disabled
04198 };
04199 
04200 template <bool readAccess, bool writeAccess>
04201 class AlignedDataPtr2D
04202 {
04203 protected:
04204     const size_t size_;
04205     uchar* const originPtr_;
04206     const size_t alignment_;
04207     uchar* ptr_;
04208     uchar* allocatedPtr_;
04209     size_t rows_;
04210     size_t cols_;
04211     size_t step_;
04212 
04213 public:
04214     AlignedDataPtr2D(uchar* ptr, size_t rows, size_t cols, size_t step, size_t alignment)
04215         : size_(rows*step), originPtr_(ptr), alignment_(alignment), ptr_(ptr), allocatedPtr_(NULL), rows_(rows), cols_(cols), step_(step)
04216     {
04217         CV_DbgAssert((alignment & (alignment - 1)) == 0); // check for 2^n
04218         if (((size_t)ptr_ & (alignment - 1)) != 0)
04219         {
04220             allocatedPtr_ = new uchar[size_ + alignment - 1];
04221             ptr_ = (uchar*)(((uintptr_t)allocatedPtr_ + (alignment - 1)) & ~(alignment - 1));
04222             if (readAccess)
04223             {
04224                 for (size_t i = 0; i < rows_; i++)
04225                     memcpy(ptr_ + i*step_, originPtr_ + i*step_, cols_);
04226             }
04227         }
04228     }
04229 
04230     uchar* getAlignedPtr() const
04231     {
04232         CV_DbgAssert(((size_t)ptr_ & (alignment_ - 1)) == 0);
04233         return ptr_;
04234     }
04235 
04236     ~AlignedDataPtr2D()
04237     {
04238         if (allocatedPtr_)
04239         {
04240             if (writeAccess)
04241             {
04242                 for (size_t i = 0; i < rows_; i++)
04243                     memcpy(originPtr_ + i*step_, ptr_ + i*step_, cols_);
04244             }
04245             delete[] allocatedPtr_;
04246             allocatedPtr_ = NULL;
04247         }
04248         ptr_ = NULL;
04249     }
04250 private:
04251     AlignedDataPtr2D(const AlignedDataPtr2D&); // disabled
04252     AlignedDataPtr2D& operator=(const AlignedDataPtr2D&); // disabled
04253 };
04254 #if defined _MSC_VER
04255 #pragma warning(default:4127) // conditional expression is constant
04256 #endif
04257 
04258 #ifndef CV_OPENCL_DATA_PTR_ALIGNMENT
04259 #define CV_OPENCL_DATA_PTR_ALIGNMENT 16
04260 #endif
04261 
04262 class OpenCLAllocator : public MatAllocator
04263 {
04264     mutable OpenCLBufferPoolImpl bufferPool;
04265     mutable OpenCLBufferPoolImpl bufferPoolHostPtr;
04266 #ifdef  HAVE_OPENCL_SVM
04267     mutable OpenCLSVMBufferPoolImpl bufferPoolSVM;
04268 #endif
04269 
04270     enum AllocatorFlags
04271     {
04272         ALLOCATOR_FLAGS_BUFFER_POOL_USED = 1 << 0,
04273         ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED = 1 << 1
04274 #ifdef HAVE_OPENCL_SVM
04275         ,ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED = 1 << 2
04276 #endif
04277     };
04278 public:
04279     OpenCLAllocator()
04280         : bufferPool(0),
04281           bufferPoolHostPtr(CL_MEM_ALLOC_HOST_PTR)
04282     {
04283         size_t defaultPoolSize, poolSize;
04284         defaultPoolSize = ocl::Device::getDefault().isIntel() ? 1 << 27 : 0;
04285         poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", defaultPoolSize);
04286         bufferPool.setMaxReservedSize(poolSize);
04287         poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_HOST_PTR_BUFFERPOOL_LIMIT", defaultPoolSize);
04288         bufferPoolHostPtr.setMaxReservedSize(poolSize);
04289 #ifdef HAVE_OPENCL_SVM
04290         poolSize = getConfigurationParameterForSize("OPENCV_OPENCL_SVM_BUFFERPOOL_LIMIT", defaultPoolSize);
04291         bufferPoolSVM.setMaxReservedSize(poolSize);
04292 #endif
04293 
04294         matStdAllocator = Mat::getDefaultAllocator();
04295     }
04296 
04297     UMatData* defaultAllocate(int dims, const int* sizes, int type, void* data, size_t* step,
04298             int flags, UMatUsageFlags usageFlags) const
04299     {
04300         UMatData* u = matStdAllocator->allocate(dims, sizes, type, data, step, flags, usageFlags);
04301         return u;
04302     }
04303 
04304     void getBestFlags(const Context& ctx, int /*flags*/, UMatUsageFlags usageFlags, int& createFlags, int& flags0) const
04305     {
04306         const Device& dev = ctx.device(0);
04307         createFlags = 0;
04308         if ((usageFlags & USAGE_ALLOCATE_HOST_MEMORY) != 0)
04309             createFlags |= CL_MEM_ALLOC_HOST_PTR;
04310 
04311         if( dev.hostUnifiedMemory() )
04312             flags0 = 0;
04313         else
04314             flags0 = UMatData::COPY_ON_MAP;
04315     }
04316 
04317     UMatData* allocate(int dims, const int* sizes, int type,
04318                        void* data, size_t* step, int flags, UMatUsageFlags usageFlags) const
04319     {
04320         if(!useOpenCL())
04321             return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
04322         CV_Assert(data == 0);
04323         size_t total = CV_ELEM_SIZE(type);
04324         for( int i = dims-1; i >= 0; i-- )
04325         {
04326             if( step )
04327                 step[i] = total;
04328             total *= sizes[i];
04329         }
04330 
04331         Context& ctx = Context::getDefault();
04332 
04333         int createFlags = 0, flags0 = 0;
04334         getBestFlags(ctx, flags, usageFlags, createFlags, flags0);
04335 
04336         void* handle = NULL;
04337         int allocatorFlags = 0;
04338 
04339 #ifdef HAVE_OPENCL_SVM
04340         const svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
04341         if (ctx.useSVM() && svm::useSVM(usageFlags) && !svmCaps.isNoSVMSupport())
04342         {
04343             allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED;
04344             handle = bufferPoolSVM.allocate(total);
04345 
04346             // this property is constant, so single buffer pool can be used here
04347             bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
04348             allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
04349         }
04350         else
04351 #endif
04352         if (createFlags == 0)
04353         {
04354             allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_USED;
04355             handle = bufferPool.allocate(total);
04356         }
04357         else if (createFlags == CL_MEM_ALLOC_HOST_PTR)
04358         {
04359             allocatorFlags = ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED;
04360             handle = bufferPoolHostPtr.allocate(total);
04361         }
04362         else
04363         {
04364             CV_Assert(handle != NULL); // Unsupported, throw
04365         }
04366 
04367         if (!handle)
04368             return defaultAllocate(dims, sizes, type, data, step, flags, usageFlags);
04369 
04370         UMatData* u = new UMatData(this);
04371         u->data = 0;
04372         u->size = total;
04373         u->handle = handle;
04374         u->flags = flags0;
04375         u->allocatorFlags_ = allocatorFlags;
04376         CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency in deallocate()
04377         u->markHostCopyObsolete(true);
04378         return u;
04379     }
04380 
04381     bool allocate(UMatData* u, int accessFlags, UMatUsageFlags usageFlags) const
04382     {
04383         if(!u)
04384             return false;
04385 
04386         UMatDataAutoLock lock(u);
04387 
04388         if(u->handle == 0)
04389         {
04390             CV_Assert(u->origdata != 0);
04391             Context& ctx = Context::getDefault();
04392             int createFlags = 0, flags0 = 0;
04393             getBestFlags(ctx, accessFlags, usageFlags, createFlags, flags0);
04394 
04395             cl_context ctx_handle = (cl_context)ctx.ptr();
04396             int allocatorFlags = 0;
04397             int tempUMatFlags = 0;
04398             void* handle = NULL;
04399             cl_int retval = CL_SUCCESS;
04400 
04401 #ifdef HAVE_OPENCL_SVM
04402             svm::SVMCapabilities svmCaps = svm::getSVMCapabilitites(ctx);
04403             bool useSVM = ctx.useSVM() && svm::useSVM(usageFlags);
04404             if (useSVM && svmCaps.isSupportFineGrainSystem())
04405             {
04406                 allocatorFlags = svm::OPENCL_SVM_FINE_GRAIN_SYSTEM;
04407                 tempUMatFlags = UMatData::TEMP_UMAT;
04408                 handle = u->origdata;
04409                 CV_OPENCL_SVM_TRACE_P("Use fine grain system: %d (%p)\n", (int)u->size, handle);
04410             }
04411             else if (useSVM && (svmCaps.isSupportFineGrainBuffer() || svmCaps.isSupportCoarseGrainBuffer()))
04412             {
04413                 if (!(accessFlags & ACCESS_FAST)) // memcpy used
04414                 {
04415                     bool isFineGrainBuffer = svmCaps.isSupportFineGrainBuffer();
04416 
04417                     cl_svm_mem_flags memFlags = createFlags |
04418                             (isFineGrainBuffer ? CL_MEM_SVM_FINE_GRAIN_BUFFER : 0);
04419 
04420                     const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
04421                     CV_DbgAssert(svmFns->isValid());
04422 
04423                     CV_OPENCL_SVM_TRACE_P("clSVMAlloc + copy: %d\n", (int)u->size);
04424                     handle = svmFns->fn_clSVMAlloc((cl_context)ctx.ptr(), memFlags, u->size, 0);
04425                     CV_Assert(handle);
04426 
04427                     cl_command_queue q = NULL;
04428                     if (!isFineGrainBuffer)
04429                     {
04430                         q = (cl_command_queue)Queue::getDefault().ptr();
04431                         CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", handle, (int)u->size);
04432                         cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_TRUE, CL_MAP_WRITE,
04433                                 handle, u->size,
04434                                 0, NULL, NULL);
04435                         CV_Assert(status == CL_SUCCESS);
04436 
04437                     }
04438                     memcpy(handle, u->origdata, u->size);
04439                     if (!isFineGrainBuffer)
04440                     {
04441                         CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", handle);
04442                         cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, handle, 0, NULL, NULL);
04443                         CV_Assert(status == CL_SUCCESS);
04444                     }
04445 
04446                     tempUMatFlags = UMatData::TEMP_UMAT | UMatData::TEMP_COPIED_UMAT;
04447                     allocatorFlags |= isFineGrainBuffer ? svm::OPENCL_SVM_FINE_GRAIN_BUFFER
04448                                                 : svm::OPENCL_SVM_COARSE_GRAIN_BUFFER;
04449                 }
04450             }
04451             else
04452 #endif
04453             {
04454                 tempUMatFlags = UMatData::TEMP_UMAT;
04455                 if (u->origdata == cv::alignPtr(u->origdata, 4)) // There are OpenCL runtime issues for less aligned data
04456                 {
04457                     handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags,
04458                                             u->size, u->origdata, &retval);
04459                 }
04460                 if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST))
04461                 {
04462                     handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags,
04463                                                u->size, u->origdata, &retval);
04464                     tempUMatFlags |= UMatData::TEMP_COPIED_UMAT;
04465                 }
04466             }
04467             if(!handle || retval != CL_SUCCESS)
04468                 return false;
04469             u->handle = handle;
04470             u->prevAllocator = u->currAllocator;
04471             u->currAllocator = this;
04472             u->flags |= tempUMatFlags;
04473             u->allocatorFlags_ = allocatorFlags;
04474         }
04475         if(accessFlags & ACCESS_WRITE)
04476             u->markHostCopyObsolete(true);
04477         return true;
04478     }
04479 
04480     /*void sync(UMatData* u) const
04481     {
04482         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
04483         UMatDataAutoLock lock(u);
04484 
04485         if( u->hostCopyObsolete() && u->handle && u->refcount > 0 && u->origdata)
04486         {
04487             if( u->tempCopiedUMat() )
04488             {
04489                 clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
04490                                     u->size, u->origdata, 0, 0, 0);
04491             }
04492             else
04493             {
04494                 cl_int retval = 0;
04495                 void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
04496                                                 (CL_MAP_READ | CL_MAP_WRITE),
04497                                                 0, u->size, 0, 0, 0, &retval);
04498                 clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0);
04499                 clFinish(q);
04500             }
04501             u->markHostCopyObsolete(false);
04502         }
04503         else if( u->copyOnMap() && u->deviceCopyObsolete() && u->data )
04504         {
04505             clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
04506                                  u->size, u->data, 0, 0, 0);
04507         }
04508     }*/
04509 
04510     void deallocate(UMatData* u) const
04511     {
04512         if(!u)
04513             return;
04514 
04515         CV_Assert(u->urefcount == 0);
04516         CV_Assert(u->refcount == 0 && "UMat deallocation error: some derived Mat is still alive");
04517 
04518         CV_Assert(u->handle != 0);
04519         CV_Assert(u->mapcount == 0);
04520         if(u->tempUMat())
04521         {
04522             CV_Assert(u->origdata);
04523 //            UMatDataAutoLock lock(u);
04524 
04525             if (u->hostCopyObsolete())
04526             {
04527 #ifdef HAVE_OPENCL_SVM
04528                 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
04529                 {
04530                     Context& ctx = Context::getDefault();
04531                     const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
04532                     CV_DbgAssert(svmFns->isValid());
04533 
04534                     if( u->tempCopiedUMat() )
04535                     {
04536                         CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
04537                                 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER);
04538                         bool isFineGrainBuffer = (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER;
04539                         cl_command_queue q = NULL;
04540                         if (!isFineGrainBuffer)
04541                         {
04542                             CV_DbgAssert(((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0));
04543                             q = (cl_command_queue)Queue::getDefault().ptr();
04544                             CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
04545                             cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
04546                                     u->handle, u->size,
04547                                     0, NULL, NULL);
04548                             CV_Assert(status == CL_SUCCESS);
04549                         }
04550                         clFinish(q);
04551                         memcpy(u->origdata, u->handle, u->size);
04552                         if (!isFineGrainBuffer)
04553                         {
04554                             CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
04555                             cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
04556                             CV_Assert(status == CL_SUCCESS);
04557                         }
04558                     }
04559                     else
04560                     {
04561                         CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM);
04562                         // nothing
04563                     }
04564                 }
04565                 else
04566 #endif
04567                 {
04568                     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
04569                     if( u->tempCopiedUMat() )
04570                     {
04571                         AlignedDataPtr<false, true> alignedPtr(u->origdata, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
04572                         CV_OclDbgAssert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
04573                                             u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS);
04574                     }
04575                     else
04576                     {
04577                         cl_int retval = 0;
04578                         if (u->tempUMat())
04579                         {
04580                             CV_Assert(u->mapcount == 0);
04581                             void* data = clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
04582                                 (CL_MAP_READ | CL_MAP_WRITE),
04583                                 0, u->size, 0, 0, 0, &retval);
04584                             CV_Assert(u->origdata == data);
04585                             CV_OclDbgAssert(retval == CL_SUCCESS);
04586                             if (u->originalUMatData)
04587                             {
04588                                 CV_Assert(u->originalUMatData->data == data);
04589                             }
04590                             CV_OclDbgAssert(clEnqueueUnmapMemObject(q, (cl_mem)u->handle, data, 0, 0, 0) == CL_SUCCESS);
04591                             CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
04592                         }
04593                     }
04594                 }
04595                 u->markHostCopyObsolete(false);
04596             }
04597             else
04598             {
04599                 // nothing
04600             }
04601 #ifdef HAVE_OPENCL_SVM
04602             if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
04603             {
04604                 if( u->tempCopiedUMat() )
04605                 {
04606                     Context& ctx = Context::getDefault();
04607                     const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
04608                     CV_DbgAssert(svmFns->isValid());
04609 
04610                     CV_OPENCL_SVM_TRACE_P("clSVMFree: %p\n", u->handle);
04611                     svmFns->fn_clSVMFree((cl_context)ctx.ptr(), u->handle);
04612                 }
04613             }
04614             else
04615 #endif
04616             {
04617                 clReleaseMemObject((cl_mem)u->handle);
04618             }
04619             u->handle = 0;
04620             u->markDeviceCopyObsolete(true);
04621             u->currAllocator = u->prevAllocator;
04622             u->prevAllocator = NULL;
04623             if(u->data && u->copyOnMap() && u->data != u->origdata)
04624                 fastFree(u->data);
04625             u->data = u->origdata;
04626             u->currAllocator->deallocate(u);
04627             u = NULL;
04628         }
04629         else
04630         {
04631             CV_Assert(u->origdata == NULL);
04632             if(u->data && u->copyOnMap() && u->data != u->origdata)
04633             {
04634                 fastFree(u->data);
04635                 u->data = 0;
04636                 u->markHostCopyObsolete(true);
04637             }
04638             if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_USED)
04639             {
04640                 bufferPool.release((cl_mem)u->handle);
04641             }
04642             else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_HOST_PTR_USED)
04643             {
04644                 bufferPoolHostPtr.release((cl_mem)u->handle);
04645             }
04646 #ifdef HAVE_OPENCL_SVM
04647             else if (u->allocatorFlags_ & ALLOCATOR_FLAGS_BUFFER_POOL_SVM_USED)
04648             {
04649                 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
04650                 {
04651                     //nothing
04652                 }
04653                 else if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
04654                         (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
04655                 {
04656                     Context& ctx = Context::getDefault();
04657                     const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
04658                     CV_DbgAssert(svmFns->isValid());
04659                     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
04660 
04661                     if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0)
04662                     {
04663                         CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
04664                         cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle, 0, NULL, NULL);
04665                         CV_Assert(status == CL_SUCCESS);
04666                     }
04667                 }
04668                 bufferPoolSVM.release((void*)u->handle);
04669             }
04670 #endif
04671             else
04672             {
04673                 clReleaseMemObject((cl_mem)u->handle);
04674             }
04675             u->handle = 0;
04676             u->markDeviceCopyObsolete(true);
04677             delete u;
04678             u = NULL;
04679         }
04680         CV_Assert(u == NULL);
04681     }
04682 
04683     // synchronized call (external UMatDataAutoLock, see UMat::getMat)
04684     void map(UMatData* u, int accessFlags) const
04685     {
04686         CV_Assert(u && u->handle);
04687 
04688         if(accessFlags & ACCESS_WRITE)
04689             u->markDeviceCopyObsolete(true);
04690 
04691         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
04692 
04693         {
04694             if( !u->copyOnMap() )
04695             {
04696                 // TODO
04697                 // because there can be other map requests for the same UMat with different access flags,
04698                 // we use the universal (read-write) access mode.
04699 #ifdef HAVE_OPENCL_SVM
04700                 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
04701                 {
04702                     if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
04703                     {
04704                         Context& ctx = Context::getDefault();
04705                         const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
04706                         CV_DbgAssert(svmFns->isValid());
04707 
04708                         if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0)
04709                         {
04710                             CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
04711                             cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ | CL_MAP_WRITE,
04712                                     u->handle, u->size,
04713                                     0, NULL, NULL);
04714                             CV_Assert(status == CL_SUCCESS);
04715                             u->allocatorFlags_ |= svm::OPENCL_SVM_BUFFER_MAP;
04716                         }
04717                     }
04718                     clFinish(q);
04719                     u->data = (uchar*)u->handle;
04720                     u->markHostCopyObsolete(false);
04721                     u->markDeviceMemMapped(true);
04722                     return;
04723                 }
04724 #endif
04725 
04726                 cl_int retval = CL_SUCCESS;
04727                 if (!u->deviceMemMapped())
04728                 {
04729                     CV_Assert(u->refcount == 1);
04730                     CV_Assert(u->mapcount++ == 0);
04731                     u->data = (uchar*)clEnqueueMapBuffer(q, (cl_mem)u->handle, CL_TRUE,
04732                                                          (CL_MAP_READ | CL_MAP_WRITE),
04733                                                          0, u->size, 0, 0, 0, &retval);
04734                 }
04735                 if (u->data && retval == CL_SUCCESS)
04736                 {
04737                     u->markHostCopyObsolete(false);
04738                     u->markDeviceMemMapped(true);
04739                     return;
04740                 }
04741 
04742                 // TODO Is it really a good idea and was it tested well?
04743                 // if map failed, switch to copy-on-map mode for the particular buffer
04744                 u->flags |= UMatData::COPY_ON_MAP;
04745             }
04746 
04747             if(!u->data)
04748             {
04749                 u->data = (uchar*)fastMalloc(u->size);
04750                 u->markHostCopyObsolete(true);
04751             }
04752         }
04753 
04754         if( (accessFlags & ACCESS_READ) != 0 && u->hostCopyObsolete() )
04755         {
04756             AlignedDataPtr<false, true> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
04757 #ifdef HAVE_OPENCL_SVM
04758             CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
04759 #endif
04760             CV_Assert( clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
04761                                            u->size, alignedPtr.getAlignedPtr(), 0, 0, 0) == CL_SUCCESS );
04762             u->markHostCopyObsolete(false);
04763         }
04764     }
04765 
04766     void unmap(UMatData* u) const
04767     {
04768         if(!u)
04769             return;
04770 
04771 
04772         CV_Assert(u->handle != 0);
04773 
04774         UMatDataAutoLock autolock(u);
04775 
04776         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
04777         cl_int retval = 0;
04778         if( !u->copyOnMap() && u->deviceMemMapped() )
04779         {
04780             CV_Assert(u->data != NULL);
04781 #ifdef HAVE_OPENCL_SVM
04782             if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
04783             {
04784                 if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
04785                 {
04786                     Context& ctx = Context::getDefault();
04787                     const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
04788                     CV_DbgAssert(svmFns->isValid());
04789 
04790                     CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) != 0);
04791                     {
04792                         CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
04793                         cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
04794                                 0, NULL, NULL);
04795                         CV_Assert(status == CL_SUCCESS);
04796                         clFinish(q);
04797                         u->allocatorFlags_ &= ~svm::OPENCL_SVM_BUFFER_MAP;
04798                     }
04799                 }
04800                 if (u->refcount == 0)
04801                     u->data = 0;
04802                 u->markDeviceCopyObsolete(false);
04803                 u->markHostCopyObsolete(true);
04804                 return;
04805             }
04806 #endif
04807             if (u->refcount == 0)
04808             {
04809                 CV_Assert(u->mapcount-- == 1);
04810                 CV_Assert((retval = clEnqueueUnmapMemObject(q,
04811                           (cl_mem)u->handle, u->data, 0, 0, 0)) == CL_SUCCESS);
04812                 if (Device::getDefault().isAMD())
04813                 {
04814                     // required for multithreaded applications (see stitching test)
04815                     CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
04816                 }
04817                 u->markDeviceMemMapped(false);
04818                 u->data = 0;
04819                 u->markDeviceCopyObsolete(false);
04820                 u->markHostCopyObsolete(true);
04821             }
04822         }
04823         else if( u->copyOnMap() && u->deviceCopyObsolete() )
04824         {
04825             AlignedDataPtr<true, false> alignedPtr(u->data, u->size, CV_OPENCL_DATA_PTR_ALIGNMENT);
04826 #ifdef HAVE_OPENCL_SVM
04827             CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == 0);
04828 #endif
04829             CV_Assert( (retval = clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE, 0,
04830                                 u->size, alignedPtr.getAlignedPtr(), 0, 0, 0)) == CL_SUCCESS );
04831             u->markDeviceCopyObsolete(false);
04832             u->markHostCopyObsolete(true);
04833         }
04834     }
04835 
04836     bool checkContinuous(int dims, const size_t sz[],
04837                          const size_t srcofs[], const size_t srcstep[],
04838                          const size_t dstofs[], const size_t dststep[],
04839                          size_t& total, size_t new_sz[],
04840                          size_t& srcrawofs, size_t new_srcofs[], size_t new_srcstep[],
04841                          size_t& dstrawofs, size_t new_dstofs[], size_t new_dststep[]) const
04842     {
04843         bool iscontinuous = true;
04844         srcrawofs = srcofs ? srcofs[dims-1] : 0;
04845         dstrawofs = dstofs ? dstofs[dims-1] : 0;
04846         total = sz[dims-1];
04847         for( int i = dims-2; i >= 0; i-- )
04848         {
04849             if( i >= 0 && (total != srcstep[i] || total != dststep[i]) )
04850                 iscontinuous = false;
04851             total *= sz[i];
04852             if( srcofs )
04853                 srcrawofs += srcofs[i]*srcstep[i];
04854             if( dstofs )
04855                 dstrawofs += dstofs[i]*dststep[i];
04856         }
04857 
04858         if( !iscontinuous )
04859         {
04860             // OpenCL uses {x, y, z} order while OpenCV uses {z, y, x} order.
04861             if( dims == 2 )
04862             {
04863                 new_sz[0] = sz[1]; new_sz[1] = sz[0]; new_sz[2] = 1;
04864                 // we assume that new_... arrays are initialized by caller
04865                 // with 0's, so there is no else branch
04866                 if( srcofs )
04867                 {
04868                     new_srcofs[0] = srcofs[1];
04869                     new_srcofs[1] = srcofs[0];
04870                     new_srcofs[2] = 0;
04871                 }
04872 
04873                 if( dstofs )
04874                 {
04875                     new_dstofs[0] = dstofs[1];
04876                     new_dstofs[1] = dstofs[0];
04877                     new_dstofs[2] = 0;
04878                 }
04879 
04880                 new_srcstep[0] = srcstep[0]; new_srcstep[1] = 0;
04881                 new_dststep[0] = dststep[0]; new_dststep[1] = 0;
04882             }
04883             else
04884             {
04885                 // we could check for dims == 3 here,
04886                 // but from user perspective this one is more informative
04887                 CV_Assert(dims <= 3);
04888                 new_sz[0] = sz[2]; new_sz[1] = sz[1]; new_sz[2] = sz[0];
04889                 if( srcofs )
04890                 {
04891                     new_srcofs[0] = srcofs[2];
04892                     new_srcofs[1] = srcofs[1];
04893                     new_srcofs[2] = srcofs[0];
04894                 }
04895 
04896                 if( dstofs )
04897                 {
04898                     new_dstofs[0] = dstofs[2];
04899                     new_dstofs[1] = dstofs[1];
04900                     new_dstofs[2] = dstofs[0];
04901                 }
04902 
04903                 new_srcstep[0] = srcstep[1]; new_srcstep[1] = srcstep[0];
04904                 new_dststep[0] = dststep[1]; new_dststep[1] = dststep[0];
04905             }
04906         }
04907         return iscontinuous;
04908     }
04909 
04910     void download(UMatData* u, void* dstptr, int dims, const size_t sz[],
04911                   const size_t srcofs[], const size_t srcstep[],
04912                   const size_t dststep[]) const
04913     {
04914         if(!u)
04915             return;
04916         UMatDataAutoLock autolock(u);
04917 
04918         if( u->data && !u->hostCopyObsolete() )
04919         {
04920             Mat::getDefaultAllocator()->download(u, dstptr, dims, sz, srcofs, srcstep, dststep);
04921             return;
04922         }
04923         CV_Assert( u->handle != 0 );
04924 
04925         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
04926 
04927         size_t total = 0, new_sz[] = {0, 0, 0};
04928         size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
04929         size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
04930 
04931         bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, 0, dststep,
04932                                             total, new_sz,
04933                                             srcrawofs, new_srcofs, new_srcstep,
04934                                             dstrawofs, new_dstofs, new_dststep);
04935 
04936 #ifdef HAVE_OPENCL_SVM
04937         if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
04938         {
04939             CV_DbgAssert(u->data == NULL || u->data == u->handle);
04940             Context& ctx = Context::getDefault();
04941             const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
04942             CV_DbgAssert(svmFns->isValid());
04943 
04944             CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
04945             if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
04946             {
04947                 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
04948                 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_READ,
04949                         u->handle, u->size,
04950                         0, NULL, NULL);
04951                 CV_Assert(status == CL_SUCCESS);
04952             }
04953             clFinish(q);
04954             if( iscontinuous )
04955             {
04956                 memcpy(dstptr, (uchar*)u->handle + srcrawofs, total);
04957             }
04958             else
04959             {
04960                 // This code is from MatAllocator::download()
04961                 int isz[CV_MAX_DIM];
04962                 uchar* srcptr = (uchar*)u->handle;
04963                 for( int i = 0; i < dims; i++ )
04964                 {
04965                     CV_Assert( sz[i] <= (size_t)INT_MAX );
04966                     if( sz[i] == 0 )
04967                     return;
04968                     if( srcofs )
04969                     srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
04970                     isz[i] = (int)sz[i];
04971                 }
04972 
04973                 Mat src(dims, isz, CV_8U, srcptr, srcstep);
04974                 Mat dst(dims, isz, CV_8U, dstptr, dststep);
04975 
04976                 const Mat* arrays[] = { &src, &dst };
04977                 uchar* ptrs[2];
04978                 NAryMatIterator it(arrays, ptrs, 2);
04979                 size_t j, planesz = it.size;
04980 
04981                 for( j = 0; j < it.nplanes; j++, ++it )
04982                     memcpy(ptrs[1], ptrs[0], planesz);
04983             }
04984             if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
04985             {
04986                 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
04987                 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
04988                         0, NULL, NULL);
04989                 CV_Assert(status == CL_SUCCESS);
04990                 clFinish(q);
04991             }
04992         }
04993         else
04994 #endif
04995         {
04996             if( iscontinuous )
04997             {
04998                 AlignedDataPtr<false, true> alignedPtr((uchar*)dstptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
04999                 CV_Assert(clEnqueueReadBuffer(q, (cl_mem)u->handle, CL_TRUE,
05000                     srcrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0 );
05001             }
05002             else
05003             {
05004                 AlignedDataPtr2D<false, true> alignedPtr((uchar*)dstptr, new_sz[1], new_sz[0], new_dststep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
05005                 uchar* ptr = alignedPtr.getAlignedPtr();
05006 
05007                 CV_Assert( clEnqueueReadBufferRect(q, (cl_mem)u->handle, CL_TRUE,
05008                     new_srcofs, new_dstofs, new_sz,
05009                     new_srcstep[0], 0,
05010                     new_dststep[0], 0,
05011                     ptr, 0, 0, 0) >= 0 );
05012             }
05013         }
05014     }
05015 
05016     void upload(UMatData* u, const void* srcptr, int dims, const size_t sz[],
05017                 const size_t dstofs[], const size_t dststep[],
05018                 const size_t srcstep[]) const
05019     {
05020         if(!u)
05021             return;
05022 
05023         // there should be no user-visible CPU copies of the UMat which we are going to copy to
05024         CV_Assert(u->refcount == 0 || u->tempUMat());
05025 
05026         size_t total = 0, new_sz[] = {0, 0, 0};
05027         size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
05028         size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
05029 
05030         bool iscontinuous = checkContinuous(dims, sz, 0, srcstep, dstofs, dststep,
05031                                             total, new_sz,
05032                                             srcrawofs, new_srcofs, new_srcstep,
05033                                             dstrawofs, new_dstofs, new_dststep);
05034 
05035         UMatDataAutoLock autolock(u);
05036 
05037         // if there is cached CPU copy of the GPU matrix,
05038         // we could use it as a destination.
05039         // we can do it in 2 cases:
05040         //    1. we overwrite the whole content
05041         //    2. we overwrite part of the matrix, but the GPU copy is out-of-date
05042         if( u->data && (u->hostCopyObsolete() < u->deviceCopyObsolete() || total == u->size))
05043         {
05044             Mat::getDefaultAllocator()->upload(u, srcptr, dims, sz, dstofs, dststep, srcstep);
05045             u->markHostCopyObsolete(false);
05046             u->markDeviceCopyObsolete(true);
05047             return;
05048         }
05049 
05050         CV_Assert( u->handle != 0 );
05051         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
05052 
05053 #ifdef HAVE_OPENCL_SVM
05054         if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
05055         {
05056             CV_DbgAssert(u->data == NULL || u->data == u->handle);
05057             Context& ctx = Context::getDefault();
05058             const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
05059             CV_DbgAssert(svmFns->isValid());
05060 
05061             CV_DbgAssert((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MAP) == 0);
05062             if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
05063             {
05064                 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMap: %p (%d)\n", u->handle, (int)u->size);
05065                 cl_int status = svmFns->fn_clEnqueueSVMMap(q, CL_FALSE, CL_MAP_WRITE,
05066                         u->handle, u->size,
05067                         0, NULL, NULL);
05068                 CV_Assert(status == CL_SUCCESS);
05069             }
05070             clFinish(q);
05071             if( iscontinuous )
05072             {
05073                 memcpy((uchar*)u->handle + dstrawofs, srcptr, total);
05074             }
05075             else
05076             {
05077                 // This code is from MatAllocator::upload()
05078                 int isz[CV_MAX_DIM];
05079                 uchar* dstptr = (uchar*)u->handle;
05080                 for( int i = 0; i < dims; i++ )
05081                 {
05082                     CV_Assert( sz[i] <= (size_t)INT_MAX );
05083                     if( sz[i] == 0 )
05084                     return;
05085                     if( dstofs )
05086                     dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
05087                     isz[i] = (int)sz[i];
05088                 }
05089 
05090                 Mat src(dims, isz, CV_8U, (void*)srcptr, srcstep);
05091                 Mat dst(dims, isz, CV_8U, dstptr, dststep);
05092 
05093                 const Mat* arrays[] = { &src, &dst };
05094                 uchar* ptrs[2];
05095                 NAryMatIterator it(arrays, ptrs, 2);
05096                 size_t j, planesz = it.size;
05097 
05098                 for( j = 0; j < it.nplanes; j++, ++it )
05099                     memcpy(ptrs[1], ptrs[0], planesz);
05100             }
05101             if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_COARSE_GRAIN_BUFFER)
05102             {
05103                 CV_OPENCL_SVM_TRACE_P("clEnqueueSVMUnmap: %p\n", u->handle);
05104                 cl_int status = svmFns->fn_clEnqueueSVMUnmap(q, u->handle,
05105                         0, NULL, NULL);
05106                 CV_Assert(status == CL_SUCCESS);
05107                 clFinish(q);
05108             }
05109         }
05110         else
05111 #endif
05112         {
05113             if( iscontinuous )
05114             {
05115                 AlignedDataPtr<true, false> alignedPtr((uchar*)srcptr, total, CV_OPENCL_DATA_PTR_ALIGNMENT);
05116                 CV_Assert(clEnqueueWriteBuffer(q, (cl_mem)u->handle, CL_TRUE,
05117                     dstrawofs, total, alignedPtr.getAlignedPtr(), 0, 0, 0) >= 0);
05118             }
05119             else
05120             {
05121                 AlignedDataPtr2D<true, false> alignedPtr((uchar*)srcptr, new_sz[1], new_sz[0], new_srcstep[0], CV_OPENCL_DATA_PTR_ALIGNMENT);
05122                 uchar* ptr = alignedPtr.getAlignedPtr();
05123 
05124                 CV_Assert(clEnqueueWriteBufferRect(q, (cl_mem)u->handle, CL_TRUE,
05125                     new_dstofs, new_srcofs, new_sz,
05126                     new_dststep[0], 0,
05127                     new_srcstep[0], 0,
05128                     ptr, 0, 0, 0) >= 0 );
05129             }
05130         }
05131         u->markHostCopyObsolete(true);
05132 #ifdef HAVE_OPENCL_SVM
05133         if ((u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
05134                 (u->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
05135         {
05136             // nothing
05137         }
05138         else
05139 #endif
05140         {
05141             u->markHostCopyObsolete(true);
05142         }
05143         u->markDeviceCopyObsolete(false);
05144     }
05145 
05146     void copy(UMatData* src, UMatData* dst, int dims, const size_t sz[],
05147               const size_t srcofs[], const size_t srcstep[],
05148               const size_t dstofs[], const size_t dststep[], bool _sync) const
05149     {
05150         if(!src || !dst)
05151             return;
05152 
05153         size_t total = 0, new_sz[] = {0, 0, 0};
05154         size_t srcrawofs = 0, new_srcofs[] = {0, 0, 0}, new_srcstep[] = {0, 0, 0};
05155         size_t dstrawofs = 0, new_dstofs[] = {0, 0, 0}, new_dststep[] = {0, 0, 0};
05156 
05157         bool iscontinuous = checkContinuous(dims, sz, srcofs, srcstep, dstofs, dststep,
05158                                             total, new_sz,
05159                                             srcrawofs, new_srcofs, new_srcstep,
05160                                             dstrawofs, new_dstofs, new_dststep);
05161 
05162         UMatDataAutoLock src_autolock(src);
05163         UMatDataAutoLock dst_autolock(dst);
05164 
05165         if( !src->handle || (src->data && src->hostCopyObsolete() < src->deviceCopyObsolete()) )
05166         {
05167             upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
05168             return;
05169         }
05170         if( !dst->handle || (dst->data && dst->hostCopyObsolete() < dst->deviceCopyObsolete()) )
05171         {
05172             download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
05173             dst->markHostCopyObsolete(false);
05174 #ifdef HAVE_OPENCL_SVM
05175             if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
05176                     (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
05177             {
05178                 // nothing
05179             }
05180             else
05181 #endif
05182             {
05183                 dst->markDeviceCopyObsolete(true);
05184             }
05185             return;
05186         }
05187 
05188         // there should be no user-visible CPU copies of the UMat which we are going to copy to
05189         CV_Assert(dst->refcount == 0);
05190         cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
05191 
05192         cl_int retval = CL_SUCCESS;
05193 #ifdef HAVE_OPENCL_SVM
05194         if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 ||
05195                 (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
05196         {
05197             if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0 &&
05198                             (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
05199             {
05200                 Context& ctx = Context::getDefault();
05201                 const svm::SVMFunctions* svmFns = svm::getSVMFunctions(ctx);
05202                 CV_DbgAssert(svmFns->isValid());
05203 
05204                 if( iscontinuous )
05205                 {
05206                     CV_OPENCL_SVM_TRACE_P("clEnqueueSVMMemcpy: %p <-- %p (%d)\n",
05207                             (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs, (int)total);
05208                     cl_int status = svmFns->fn_clEnqueueSVMMemcpy(q, CL_TRUE,
05209                             (uchar*)dst->handle + dstrawofs, (uchar*)src->handle + srcrawofs,
05210                             total, 0, NULL, NULL);
05211                     CV_Assert(status == CL_SUCCESS);
05212                 }
05213                 else
05214                 {
05215                     clFinish(q);
05216                     // This code is from MatAllocator::download()/upload()
05217                     int isz[CV_MAX_DIM];
05218                     uchar* srcptr = (uchar*)src->handle;
05219                     for( int i = 0; i < dims; i++ )
05220                     {
05221                         CV_Assert( sz[i] <= (size_t)INT_MAX );
05222                         if( sz[i] == 0 )
05223                         return;
05224                         if( srcofs )
05225                         srcptr += srcofs[i]*(i <= dims-2 ? srcstep[i] : 1);
05226                         isz[i] = (int)sz[i];
05227                     }
05228                     Mat m_src(dims, isz, CV_8U, srcptr, srcstep);
05229 
05230                     uchar* dstptr = (uchar*)dst->handle;
05231                     for( int i = 0; i < dims; i++ )
05232                     {
05233                         if( dstofs )
05234                         dstptr += dstofs[i]*(i <= dims-2 ? dststep[i] : 1);
05235                     }
05236                     Mat m_dst(dims, isz, CV_8U, dstptr, dststep);
05237 
05238                     const Mat* arrays[] = { &m_src, &m_dst };
05239                     uchar* ptrs[2];
05240                     NAryMatIterator it(arrays, ptrs, 2);
05241                     size_t j, planesz = it.size;
05242 
05243                     for( j = 0; j < it.nplanes; j++, ++it )
05244                         memcpy(ptrs[1], ptrs[0], planesz);
05245                 }
05246             }
05247             else
05248             {
05249                 if ((src->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) != 0)
05250                 {
05251                     map(src, ACCESS_READ);
05252                     upload(dst, src->data + srcrawofs, dims, sz, dstofs, dststep, srcstep);
05253                     unmap(src);
05254                 }
05255                 else
05256                 {
05257                     map(dst, ACCESS_WRITE);
05258                     download(src, dst->data + dstrawofs, dims, sz, srcofs, srcstep, dststep);
05259                     unmap(dst);
05260                 }
05261             }
05262         }
05263         else
05264 #endif
05265         {
05266             if( iscontinuous )
05267             {
05268                 CV_Assert( (retval = clEnqueueCopyBuffer(q, (cl_mem)src->handle, (cl_mem)dst->handle,
05269                                                srcrawofs, dstrawofs, total, 0, 0, 0)) == CL_SUCCESS );
05270             }
05271             else
05272             {
05273                 CV_Assert( (retval = clEnqueueCopyBufferRect(q, (cl_mem)src->handle, (cl_mem)dst->handle,
05274                                                    new_srcofs, new_dstofs, new_sz,
05275                                                    new_srcstep[0], 0,
05276                                                    new_dststep[0], 0,
05277                                                    0, 0, 0)) == CL_SUCCESS );
05278             }
05279         }
05280         if (retval == CL_SUCCESS)
05281         {
05282             CV_IMPL_ADD(CV_IMPL_OCL)
05283         }
05284 
05285 #ifdef HAVE_OPENCL_SVM
05286         if ((dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_BUFFER ||
05287             (dst->allocatorFlags_ & svm::OPENCL_SVM_BUFFER_MASK) == svm::OPENCL_SVM_FINE_GRAIN_SYSTEM)
05288         {
05289             // nothing
05290         }
05291         else
05292 #endif
05293         {
05294             dst->markHostCopyObsolete(true);
05295         }
05296         dst->markDeviceCopyObsolete(false);
05297 
05298         if( _sync )
05299         {
05300             CV_OclDbgAssert(clFinish(q) == CL_SUCCESS);
05301         }
05302     }
05303 
05304     BufferPoolController* getBufferPoolController(const char* id) const {
05305 #ifdef HAVE_OPENCL_SVM
05306         if ((svm::checkForceSVMUmatUsage() && (id == NULL || strcmp(id, "OCL") == 0)) || (id != NULL && strcmp(id, "SVM") == 0))
05307         {
05308             return &bufferPoolSVM;
05309         }
05310 #endif
05311         if (id != NULL && strcmp(id, "HOST_ALLOC") == 0)
05312         {
05313             return &bufferPoolHostPtr;
05314         }
05315         if (id != NULL && strcmp(id, "OCL") != 0)
05316         {
05317             CV_ErrorNoReturn(cv::Error::StsBadArg, "getBufferPoolController(): unknown BufferPool ID\n");
05318         }
05319         return &bufferPool;
05320     }
05321 
05322     MatAllocator* matStdAllocator;
05323 };
05324 
05325 MatAllocator* getOpenCLAllocator()
05326 {
05327     CV_SINGLETON_LAZY_INIT(MatAllocator, new OpenCLAllocator())
05328 }
05329 
05330 }} // namespace cv::ocl
05331 
05332 
05333 namespace cv {
05334 
05335 // three funcs below are implemented in umatrix.cpp
05336 void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps,
05337               bool autoSteps = false );
05338 
05339 void updateContinuityFlag(UMat& m);
05340 void finalizeHdr(UMat& m);
05341 
05342 } // namespace cv
05343 
05344 
05345 namespace cv { namespace ocl {
05346 
05347 /*
05348 // Convert OpenCL buffer memory to UMat
05349 */
05350 void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst)
05351 {
05352     int d = 2;
05353     int sizes[] = { rows, cols };
05354 
05355     CV_Assert(0 <= d && d <= CV_MAX_DIM);
05356 
05357     dst.release();
05358 
05359     dst.flags      = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL;
05360     dst.usageFlags = USAGE_DEFAULT;
05361 
05362     setSize(dst, d, sizes, 0, true);
05363     dst.offset = 0;
05364 
05365     cl_mem             memobj = (cl_mem)cl_mem_buffer;
05366     cl_mem_object_type mem_type = 0;
05367 
05368     CV_Assert(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0) == CL_SUCCESS);
05369 
05370     CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type);
05371 
05372     size_t total = 0;
05373     CV_Assert(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0) == CL_SUCCESS);
05374 
05375     CV_Assert(clRetainMemObject(memobj) == CL_SUCCESS);
05376 
05377     CV_Assert((int)step >= cols * CV_ELEM_SIZE(type));
05378     CV_Assert(total >= rows * step);
05379 
05380     // attach clBuffer to UMatData
05381     dst.u = new UMatData(getOpenCLAllocator());
05382     dst.u->data            = 0;
05383     dst.u->allocatorFlags_ = 0; // not allocated from any OpenCV buffer pool
05384     dst.u->flags           = 0;
05385     dst.u->handle          = cl_mem_buffer;
05386     dst.u->origdata        = 0;
05387     dst.u->prevAllocator   = 0;
05388     dst.u->size            = total;
05389 
05390     finalizeHdr(dst);
05391     dst.addref();
05392 
05393     return;
05394 } // convertFromBuffer()
05395 
05396 
05397 /*
05398 // Convert OpenCL image2d_t memory to UMat
05399 */
05400 void convertFromImage(void* cl_mem_image, UMat& dst)
05401 {
05402     cl_mem             clImage = (cl_mem)cl_mem_image;
05403     cl_mem_object_type mem_type = 0;
05404 
05405     CV_Assert(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0) == CL_SUCCESS);
05406 
05407     CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type);
05408 
05409     cl_image_format fmt = { 0, 0 };
05410     CV_Assert(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0) == CL_SUCCESS);
05411 
05412     int depth = CV_8U;
05413     switch (fmt.image_channel_data_type)
05414     {
05415     case CL_UNORM_INT8:
05416     case CL_UNSIGNED_INT8:
05417         depth = CV_8U;
05418         break;
05419 
05420     case CL_SNORM_INT8:
05421     case CL_SIGNED_INT8:
05422         depth = CV_8S;
05423         break;
05424 
05425     case CL_UNORM_INT16:
05426     case CL_UNSIGNED_INT16:
05427         depth = CV_16U;
05428         break;
05429 
05430     case CL_SNORM_INT16:
05431     case CL_SIGNED_INT16:
05432         depth = CV_16S;
05433         break;
05434 
05435     case CL_SIGNED_INT32:
05436         depth = CV_32S;
05437         break;
05438 
05439     case CL_FLOAT:
05440         depth = CV_32F;
05441         break;
05442 
05443     default:
05444         CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
05445     }
05446 
05447     int type = CV_8UC1;
05448     switch (fmt.image_channel_order)
05449     {
05450     case CL_R:
05451         type = CV_MAKE_TYPE(depth, 1);
05452         break;
05453 
05454     case CL_RGBA:
05455     case CL_BGRA:
05456     case CL_ARGB:
05457          type = CV_MAKE_TYPE(depth, 4);
05458         break;
05459 
05460     default:
05461         CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order");
05462         break;
05463     }
05464 
05465     size_t step = 0;
05466     CV_Assert(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0) == CL_SUCCESS);
05467 
05468     size_t w = 0;
05469     CV_Assert(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0) == CL_SUCCESS);
05470 
05471     size_t h = 0;
05472     CV_Assert(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0) == CL_SUCCESS);
05473 
05474     dst.create((int)h, (int)w, type);
05475 
05476     cl_mem clBuffer = (cl_mem)dst.handle(ACCESS_READ);
05477 
05478     cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
05479 
05480     size_t offset = 0;
05481     size_t src_origin[3] = { 0, 0, 0 };
05482     size_t region[3] = { w, h, 1 };
05483     CV_Assert(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL) == CL_SUCCESS);
05484 
05485     CV_Assert(clFinish(q) == CL_SUCCESS);
05486 
05487     return;
05488 } // convertFromImage()
05489 
05490 
05491 ///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
05492 
05493 static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)
05494 {
05495     cl_uint numDevices = 0;
05496     CV_OclDbgAssert(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL,
05497                                 0, NULL, &numDevices) == CL_SUCCESS);
05498 
05499     if (numDevices == 0)
05500     {
05501         devices.clear();
05502         return;
05503     }
05504 
05505     devices.resize((size_t)numDevices);
05506     CV_OclDbgAssert(clGetDeviceIDs(platform, (cl_device_type)Device::TYPE_ALL,
05507                                 numDevices, &devices[0], &numDevices) == CL_SUCCESS);
05508 }
05509 
05510 struct PlatformInfo::Impl
05511 {
05512     Impl(void* id)
05513     {
05514         refcount = 1;
05515         handle = *(cl_platform_id*)id;
05516         getDevices(devices, handle);
05517     }
05518 
05519     String getStrProp(cl_device_info prop) const
05520     {
05521         char buf[1024];
05522         size_t sz=0;
05523         return clGetPlatformInfo(handle, prop, sizeof(buf)-16, buf, &sz) == CL_SUCCESS &&
05524             sz < sizeof(buf) ? String(buf) : String();
05525     }
05526 
05527     IMPLEMENT_REFCOUNTABLE();
05528     std::vector<cl_device_id> devices;
05529     cl_platform_id handle;
05530 };
05531 
05532 PlatformInfo::PlatformInfo()
05533 {
05534     p = 0;
05535 }
05536 
05537 PlatformInfo::PlatformInfo(void* platform_id)
05538 {
05539     p = new Impl(platform_id);
05540 }
05541 
05542 PlatformInfo::~PlatformInfo()
05543 {
05544     if(p)
05545         p->release();
05546 }
05547 
05548 PlatformInfo::PlatformInfo(const PlatformInfo& i)
05549 {
05550     if (i.p)
05551         i.p->addref();
05552     p = i.p;
05553 }
05554 
05555 PlatformInfo& PlatformInfo::operator =(const PlatformInfo& i)
05556 {
05557     if (i.p != p)
05558     {
05559         if (i.p)
05560             i.p->addref();
05561         if (p)
05562             p->release();
05563         p = i.p;
05564     }
05565     return *this;
05566 }
05567 
05568 int PlatformInfo::deviceNumber() const
05569 {
05570     return p ? (int)p->devices.size() : 0;
05571 }
05572 
05573 void PlatformInfo::getDevice(Device& device, int d) const
05574 {
05575     CV_Assert(p && d < (int)p->devices.size() );
05576     if(p)
05577         device.set(p->devices[d]);
05578 }
05579 
05580 String PlatformInfo::name() const
05581 {
05582     return p ? p->getStrProp(CL_PLATFORM_NAME) : String();
05583 }
05584 
05585 String PlatformInfo::vendor() const
05586 {
05587     return p ? p->getStrProp(CL_PLATFORM_VENDOR) : String();
05588 }
05589 
05590 String PlatformInfo::version() const
05591 {
05592     return p ? p->getStrProp(CL_PLATFORM_VERSION) : String();
05593 }
05594 
05595 static void getPlatforms(std::vector<cl_platform_id>& platforms)
05596 {
05597     cl_uint numPlatforms = 0;
05598     CV_OclDbgAssert(clGetPlatformIDs(0, NULL, &numPlatforms) == CL_SUCCESS);
05599 
05600     if (numPlatforms == 0)
05601     {
05602         platforms.clear();
05603         return;
05604     }
05605 
05606     platforms.resize((size_t)numPlatforms);
05607     CV_OclDbgAssert(clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms) == CL_SUCCESS);
05608 }
05609 
05610 void getPlatfomsInfo(std::vector<PlatformInfo>& platformsInfo)
05611 {
05612     std::vector<cl_platform_id> platforms;
05613     getPlatforms(platforms);
05614 
05615     for (size_t i = 0; i < platforms.size(); i++)
05616         platformsInfo.push_back( PlatformInfo((void*)&platforms[i]) );
05617 }
05618 
05619 const char* typeToStr(int type)
05620 {
05621     static const char* tab[]=
05622     {
05623         "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
05624         "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
05625         "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
05626         "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
05627         "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
05628         "float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
05629         "double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
05630         "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
05631     };
05632     int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
05633     return cn > 16 ? "?" : tab[depth*16 + cn-1];
05634 }
05635 
05636 const char* memopTypeToStr(int type)
05637 {
05638     static const char* tab[] =
05639     {
05640         "uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
05641         "char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
05642         "ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
05643         "short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
05644         "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
05645         "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
05646         "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
05647         "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
05648     };
05649     int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
05650     return cn > 16 ? "?" : tab[depth*16 + cn-1];
05651 }
05652 
05653 const char* vecopTypeToStr(int type)
05654 {
05655     static const char* tab[] =
05656     {
05657         "uchar", "short", "uchar3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
05658         "char", "short", "char3", "int", 0, 0, 0, "int2", 0, 0, 0, 0, 0, 0, 0, "int4",
05659         "ushort", "int", "ushort3", "int2",0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
05660         "short", "int", "short3", "int2", 0, 0, 0, "int4", 0, 0, 0, 0, 0, 0, 0, "int8",
05661         "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
05662         "int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
05663         "ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
05664         "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
05665     };
05666     int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
05667     return cn > 16 ? "?" : tab[depth*16 + cn-1];
05668 }
05669 
05670 const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
05671 {
05672     if( sdepth == ddepth )
05673         return "noconvert";
05674     const char *typestr = typeToStr(CV_MAKETYPE(ddepth, cn));
05675     if( ddepth >= CV_32F ||
05676         (ddepth == CV_32S && sdepth < CV_32S) ||
05677         (ddepth == CV_16S && sdepth <= CV_8S) ||
05678         (ddepth == CV_16U && sdepth == CV_8U))
05679     {
05680         sprintf(buf, "convert_%s", typestr);
05681     }
05682     else if( sdepth >= CV_32F )
05683         sprintf(buf, "convert_%s%s_rte", typestr, (ddepth < CV_32S ? "_sat" : ""));
05684     else
05685         sprintf(buf, "convert_%s_sat", typestr);
05686 
05687     return buf;
05688 }
05689 
05690 template <typename T>
05691 static std::string kerToStr(const Mat & k)
05692 {
05693     int width = k.cols - 1, depth = k.depth();
05694     const T * const data = k.ptr<T>();
05695 
05696     std::ostringstream stream;
05697     stream.precision(10);
05698 
05699     if (depth <= CV_8S)
05700     {
05701         for (int i = 0; i < width; ++i)
05702             stream << "DIG(" << (int)data[i] << ")";
05703         stream << "DIG(" << (int)data[width] << ")";
05704     }
05705     else if (depth == CV_32F)
05706     {
05707         stream.setf(std::ios_base::showpoint);
05708         for (int i = 0; i < width; ++i)
05709             stream << "DIG(" << data[i] << "f)";
05710         stream << "DIG(" << data[width] << "f)";
05711     }
05712     else
05713     {
05714         for (int i = 0; i < width; ++i)
05715             stream << "DIG(" << data[i] << ")";
05716         stream << "DIG(" << data[width] << ")";
05717     }
05718 
05719     return stream.str();
05720 }
05721 
05722 String kernelToStr(InputArray _kernel, int ddepth, const char * name)
05723 {
05724     Mat kernel = _kernel.getMat().reshape(1, 1);
05725 
05726     int depth = kernel.depth();
05727     if (ddepth < 0)
05728         ddepth = depth;
05729 
05730     if (ddepth != depth)
05731         kernel.convertTo(kernel, ddepth);
05732 
05733     typedef std::string (* func_t)(const Mat &);
05734     static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
05735                                     kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
05736     const func_t func = funcs[ddepth];
05737     CV_Assert(func != 0);
05738 
05739     return cv::format(" -D %s=%s", name ? name : "COEFF", func(kernel).c_str());
05740 }
05741 
05742 #define PROCESS_SRC(src) \
05743     do \
05744     { \
05745         if (!src.empty()) \
05746         { \
05747             CV_Assert(src.isMat() || src.isUMat()); \
05748             Size csize = src.size(); \
05749             int ctype = src.type(), ccn = CV_MAT_CN(ctype), cdepth = CV_MAT_DEPTH(ctype), \
05750                 ckercn = vectorWidths[cdepth], cwidth = ccn * csize.width; \
05751             if (cwidth < ckercn || ckercn <= 0) \
05752                 return 1; \
05753             cols.push_back(cwidth); \
05754             if (strat == OCL_VECTOR_OWN && ctype != ref_type) \
05755                 return 1; \
05756             offsets.push_back(src.offset()); \
05757             steps.push_back(src.step()); \
05758             dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
05759             kercns.push_back(ckercn); \
05760         } \
05761     } \
05762     while ((void)0, 0)
05763 
05764 int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
05765                               InputArray src4, InputArray src5, InputArray src6,
05766                               InputArray src7, InputArray src8, InputArray src9,
05767                               OclVectorStrategy strat)
05768 {
05769     const ocl::Device & d = ocl::Device::getDefault();
05770 
05771     int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
05772         d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
05773         d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
05774         d.preferredVectorWidthDouble(), -1 };
05775 
05776     // if the device says don't use vectors
05777     if (vectorWidths[0] == 1)
05778     {
05779         // it's heuristic
05780         vectorWidths[CV_8U] = vectorWidths[CV_8S] = 4;
05781         vectorWidths[CV_16U] = vectorWidths[CV_16S] = 2;
05782         vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
05783     }
05784 
05785     return checkOptimalVectorWidth(vectorWidths, src1, src2, src3, src4, src5, src6, src7, src8, src9, strat);
05786 }
05787 
05788 int checkOptimalVectorWidth(const int *vectorWidths,
05789                             InputArray src1, InputArray src2, InputArray src3,
05790                             InputArray src4, InputArray src5, InputArray src6,
05791                             InputArray src7, InputArray src8, InputArray src9,
05792                             OclVectorStrategy strat)
05793 {
05794     CV_Assert(vectorWidths);
05795 
05796     int ref_type = src1.type();
05797 
05798     std::vector<size_t> offsets, steps, cols;
05799     std::vector<int> dividers, kercns;
05800     PROCESS_SRC(src1);
05801     PROCESS_SRC(src2);
05802     PROCESS_SRC(src3);
05803     PROCESS_SRC(src4);
05804     PROCESS_SRC(src5);
05805     PROCESS_SRC(src6);
05806     PROCESS_SRC(src7);
05807     PROCESS_SRC(src8);
05808     PROCESS_SRC(src9);
05809 
05810     size_t size = offsets.size();
05811 
05812     for (size_t i = 0; i < size; ++i)
05813         while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0)
05814             dividers[i] >>= 1, kercns[i] >>= 1;
05815 
05816     // default strategy
05817     int kercn = *std::min_element(kercns.begin(), kercns.end());
05818 
05819     return kercn;
05820 }
05821 
05822 int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3,
05823                                  InputArray src4, InputArray src5, InputArray src6,
05824                                  InputArray src7, InputArray src8, InputArray src9)
05825 {
05826     return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, OCL_VECTOR_MAX);
05827 }
05828 
05829 #undef PROCESS_SRC
05830 
05831 
05832 // TODO Make this as a method of OpenCL "BuildOptions" class
05833 void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m)
05834 {
05835     if (!buildOptions.empty())
05836         buildOptions += " ";
05837     int type = _m.type(), depth = CV_MAT_DEPTH(type);
05838     buildOptions += format(
05839             "-D %s_T=%s -D %s_T1=%s -D %s_CN=%d -D %s_TSIZE=%d -D %s_T1SIZE=%d -D %s_DEPTH=%d",
05840             name.c_str(), ocl::typeToStr(type),
05841             name.c_str(), ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
05842             name.c_str(), (int)CV_MAT_CN(type),
05843             name.c_str(), (int)CV_ELEM_SIZE(type),
05844             name.c_str(), (int)CV_ELEM_SIZE1(type),
05845             name.c_str(), (int)depth
05846             );
05847 }
05848 
05849 
05850 struct Image2D::Impl
05851 {
05852     Impl(const UMat &src, bool norm, bool alias)
05853     {
05854         handle = 0;
05855         refcount = 1;
05856         init(src, norm, alias);
05857     }
05858 
05859     ~Impl()
05860     {
05861         if (handle)
05862             clReleaseMemObject(handle);
05863     }
05864 
05865     static cl_image_format getImageFormat(int depth, int cn, bool norm)
05866     {
05867         cl_image_format format;
05868         static const int channelTypes[] = { CL_UNSIGNED_INT8, CL_SIGNED_INT8, CL_UNSIGNED_INT16,
05869                                        CL_SIGNED_INT16, CL_SIGNED_INT32, CL_FLOAT, -1, -1 };
05870         static const int channelTypesNorm[] = { CL_UNORM_INT8, CL_SNORM_INT8, CL_UNORM_INT16,
05871                                                 CL_SNORM_INT16, -1, -1, -1, -1 };
05872         static const int channelOrders[] = { -1, CL_R, CL_RG, -1, CL_RGBA };
05873 
05874         int channelType = norm ? channelTypesNorm[depth] : channelTypes[depth];
05875         int channelOrder = channelOrders[cn];
05876         format.image_channel_data_type = (cl_channel_type)channelType;
05877         format.image_channel_order = (cl_channel_order)channelOrder;
05878         return format;
05879     }
05880 
05881     static bool isFormatSupported(cl_image_format format)
05882     {
05883         if (!haveOpenCL())
05884             CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
05885 
05886         cl_context context = (cl_context)Context::getDefault().ptr();
05887         // Figure out how many formats are supported by this context.
05888         cl_uint numFormats = 0;
05889         cl_int err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
05890                                                 CL_MEM_OBJECT_IMAGE2D, numFormats,
05891                                                 NULL, &numFormats);
05892         AutoBuffer<cl_image_format> formats(numFormats);
05893         err = clGetSupportedImageFormats(context, CL_MEM_READ_WRITE,
05894                                          CL_MEM_OBJECT_IMAGE2D, numFormats,
05895                                          formats, NULL);
05896         CV_OclDbgAssert(err == CL_SUCCESS);
05897         for (cl_uint i = 0; i < numFormats; ++i)
05898         {
05899             if (!memcmp(&formats[i], &format, sizeof(format)))
05900             {
05901                 return true;
05902             }
05903         }
05904         return false;
05905     }
05906 
05907     void init(const UMat &src, bool norm, bool alias)
05908     {
05909         if (!haveOpenCL())
05910             CV_Error(Error::OpenCLApiCallError, "OpenCL runtime not found!");
05911 
05912         CV_Assert(!src.empty());
05913         CV_Assert(ocl::Device::getDefault().imageSupport());
05914 
05915         int err, depth = src.depth(), cn = src.channels();
05916         CV_Assert(cn <= 4);
05917         cl_image_format format = getImageFormat(depth, cn, norm);
05918 
05919         if (!isFormatSupported(format))
05920             CV_Error(Error::OpenCLApiCallError, "Image format is not supported");
05921 
05922         if (alias && !src.handle(ACCESS_RW))
05923             CV_Error(Error::OpenCLApiCallError, "Incorrect UMat, handle is null");
05924 
05925         cl_context context = (cl_context)Context::getDefault().ptr();
05926         cl_command_queue queue = (cl_command_queue)Queue::getDefault().ptr();
05927 
05928 #ifdef CL_VERSION_1_2
05929         // this enables backwards portability to
05930         // run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
05931         const Device & d = ocl::Device::getDefault();
05932         int minor = d.deviceVersionMinor(), major = d.deviceVersionMajor();
05933         CV_Assert(!alias || canCreateAlias(src));
05934         if (1 < major || (1 == major && 2 <= minor))
05935         {
05936             cl_image_desc desc;
05937             desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
05938             desc.image_width      = src.cols;
05939             desc.image_height     = src.rows;
05940             desc.image_depth      = 0;
05941             desc.image_array_size = 1;
05942             desc.image_row_pitch  = alias ? src.step[0] : 0;
05943             desc.image_slice_pitch = 0;
05944             desc.buffer           = alias ? (cl_mem)src.handle(ACCESS_RW) : 0;
05945             desc.num_mip_levels   = 0;
05946             desc.num_samples      = 0;
05947             handle = clCreateImage(context, CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
05948         }
05949         else
05950 #endif
05951         {
05952             CV_SUPPRESS_DEPRECATED_START
05953             CV_Assert(!alias);  // This is an OpenCL 1.2 extension
05954             handle = clCreateImage2D(context, CL_MEM_READ_WRITE, &format, src.cols, src.rows, 0, NULL, &err);
05955             CV_SUPPRESS_DEPRECATED_END
05956         }
05957         CV_OclDbgAssert(err == CL_SUCCESS);
05958 
05959         size_t origin[] = { 0, 0, 0 };
05960         size_t region[] = { static_cast<size_t>(src.cols), static_cast<size_t>(src.rows), 1 };
05961 
05962         cl_mem devData;
05963         if (!alias && !src.isContinuous())
05964         {
05965             devData = clCreateBuffer(context, CL_MEM_READ_ONLY, src.cols * src.rows * src.elemSize(), NULL, &err);
05966             CV_OclDbgAssert(err == CL_SUCCESS);
05967 
05968             const size_t roi[3] = {static_cast<size_t>(src.cols) * src.elemSize(), static_cast<size_t>(src.rows), 1};
05969             CV_Assert(clEnqueueCopyBufferRect(queue, (cl_mem)src.handle(ACCESS_READ), devData, origin, origin,
05970                 roi, src.step, 0, src.cols * src.elemSize(), 0, 0, NULL, NULL) == CL_SUCCESS);
05971             CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS);
05972         }
05973         else
05974         {
05975             devData = (cl_mem)src.handle(ACCESS_READ);
05976         }
05977         CV_Assert(devData != NULL);
05978 
05979         if (!alias)
05980         {
05981             CV_OclDbgAssert(clEnqueueCopyBufferToImage(queue, devData, handle, 0, origin, region, 0, NULL, 0) == CL_SUCCESS);
05982             if (!src.isContinuous())
05983             {
05984                 CV_OclDbgAssert(clFlush(queue) == CL_SUCCESS);
05985                 CV_OclDbgAssert(clReleaseMemObject(devData) == CL_SUCCESS);
05986             }
05987         }
05988     }
05989 
05990     IMPLEMENT_REFCOUNTABLE();
05991 
05992     cl_mem handle;
05993 };
05994 
05995 Image2D::Image2D()
05996 {
05997     p = NULL;
05998 }
05999 
06000 Image2D::Image2D(const UMat &src, bool norm, bool alias)
06001 {
06002     p = new Impl(src, norm, alias);
06003 }
06004 
06005 bool Image2D::canCreateAlias(const UMat &m)
06006 {
06007     bool ret = false;
06008     const Device & d = ocl::Device::getDefault();
06009     if (d.imageFromBufferSupport() && !m.empty())
06010     {
06011         // This is the required pitch alignment in pixels
06012         uint pitchAlign = d.imagePitchAlignment();
06013         if (pitchAlign && !(m.step % (pitchAlign * m.elemSize())))
06014         {
06015             // We don't currently handle the case where the buffer was created
06016             // with CL_MEM_USE_HOST_PTR
06017             if (!m.u->tempUMat())
06018             {
06019                 ret = true;
06020             }
06021         }
06022     }
06023     return ret;
06024 }
06025 
06026 bool Image2D::isFormatSupported(int depth, int cn, bool norm)
06027 {
06028     cl_image_format format = Impl::getImageFormat(depth, cn, norm);
06029 
06030     return Impl::isFormatSupported(format);
06031 }
06032 
06033 Image2D::Image2D(const Image2D & i)
06034 {
06035     p = i.p;
06036     if (p)
06037         p->addref();
06038 }
06039 
06040 Image2D & Image2D::operator = (const Image2D & i)
06041 {
06042     if (i.p != p)
06043     {
06044         if (i.p)
06045             i.p->addref();
06046         if (p)
06047             p->release();
06048         p = i.p;
06049     }
06050     return *this;
06051 }
06052 
06053 Image2D::~Image2D()
06054 {
06055     if (p)
06056         p->release();
06057 }
06058 
06059 void* Image2D::ptr() const
06060 {
06061     return p ? p->handle : 0;
06062 }
06063 
06064 //CV_EXPORTS bool cv::ocl::internal::isPerformanceCheckBypassed()
06065 //{
06066 //    static bool initialized = false;
06067 //    static bool value = false;
06068 //    if (!initialized)
06069 //    {
06070 //        value = getBoolParameter("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);
06071 //        initialized = true;
06072 //    }
06073 //    return value;
06074 //}
06075 
06076 //CV_EXPORTS bool ocl::internal::isCLBuffer(UMat& u)
06077 //{
06078 //    void* h = u.handle(ACCESS_RW);
06079 //    if (!h)
06080 //        return true;
06081 //    CV_DbgAssert(u.u->currAllocator == getOpenCLAllocator());
06082 //#if 1
06083 //    if ((u.u->allocatorFlags_ & 0xffff0000) != 0) // OpenCL SVM flags are stored here
06084 //        return false;
06085 //#else
06086 //    cl_mem_object_type type = 0;
06087 //    cl_int ret = clGetMemObjectInfo((cl_mem)h, CL_MEM_TYPE, sizeof(type), &type, NULL);
06088 //    if (ret != CL_SUCCESS || type != CL_MEM_OBJECT_BUFFER)
06089 //        return false;
06090 //#endif
06091 //    return true;
06092 //}
06093 
06094 }}
06095