the do / gr-peach-opencv-project

Fork of gr-peach-opencv-project by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers clahe.cpp Source File

clahe.cpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2013, NVIDIA Corporation, all rights reserved.
00014 // Copyright (C) 2014, Itseez Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the copyright holders or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #include "precomp.hpp"
00044 #include "opencl_kernels_imgproc.hpp"
00045 
00046 // ----------------------------------------------------------------------
00047 // CLAHE
00048 
00049 #ifdef HAVE_OPENCL
00050 
00051 namespace clahe
00052 {
00053     static bool calcLut(cv::InputArray _src, cv::OutputArray _dst,
00054         const int tilesX, const int tilesY, const cv::Size tileSize,
00055         const int clipLimit, const float lutScale)
00056     {
00057         cv::ocl::Kernel _k("calcLut", cv::ocl::imgproc::clahe_oclsrc);
00058 
00059         bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
00060         cv::String opts;
00061         if(is_cpu)
00062             opts = "-D CPU ";
00063         else
00064             opts = cv::format("-D WAVE_SIZE=%d", _k.preferedWorkGroupSizeMultiple());
00065 
00066         cv::ocl::Kernel k("calcLut", cv::ocl::imgproc::clahe_oclsrc, opts);
00067         if(k.empty())
00068             return false;
00069 
00070         cv::UMat  src = _src.getUMat();
00071         _dst.create(tilesX * tilesY, 256, CV_8UC1);
00072         cv::UMat  dst = _dst.getUMat();
00073 
00074         int tile_size[2];
00075         tile_size[0] = tileSize.width;
00076         tile_size[1] = tileSize.height;
00077 
00078         size_t localThreads[3]  = { 32, 8, 1 };
00079         size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 };
00080 
00081         int idx = 0;
00082         idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src));
00083         idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst));
00084         idx = k.set(idx, tile_size);
00085         idx = k.set(idx, tilesX);
00086         idx = k.set(idx, clipLimit);
00087         k.set(idx, lutScale);
00088 
00089         return k.run(2, globalThreads, localThreads, false);
00090     }
00091 
00092     static bool transform(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _lut,
00093         const int tilesX, const int tilesY, const cv::Size & tileSize)
00094     {
00095 
00096         cv::ocl::Kernel k("transform", cv::ocl::imgproc::clahe_oclsrc);
00097         if(k.empty())
00098             return false;
00099 
00100         int tile_size[2];
00101         tile_size[0] = tileSize.width;
00102         tile_size[1] = tileSize.height;
00103 
00104         cv::UMat  src = _src.getUMat();
00105         _dst.create(src.size(), src.type());
00106         cv::UMat  dst = _dst.getUMat();
00107         cv::UMat  lut = _lut.getUMat();
00108 
00109         size_t localThreads[3]  = { 32, 8, 1 };
00110         size_t globalThreads[3] = { (size_t)src.cols, (size_t)src.rows, 1 };
00111 
00112         int idx = 0;
00113         idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src));
00114         idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst));
00115         idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(lut));
00116         idx = k.set(idx, src.cols);
00117         idx = k.set(idx, src.rows);
00118         idx = k.set(idx, tile_size);
00119         idx = k.set(idx, tilesX);
00120         k.set(idx, tilesY);
00121 
00122         return k.run(2, globalThreads, localThreads, false);
00123     }
00124 }
00125 
00126 #endif
00127 
00128 namespace
00129 {
00130     template <class T, int histSize, int shift>
00131     class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
00132     {
00133     public:
00134         CLAHE_CalcLut_Body(const cv::Mat& src, const cv::Mat& lut, const cv::Size& tileSize, const int& tilesX, const int& clipLimit, const float& lutScale) :
00135             src_(src), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), clipLimit_(clipLimit), lutScale_(lutScale)
00136         {
00137         }
00138 
00139         void operator ()(const cv::Range& range) const;
00140 
00141     private:
00142         cv::Mat src_;
00143         mutable cv::Mat lut_;
00144 
00145         cv::Size tileSize_;
00146         int tilesX_;
00147         int clipLimit_;
00148         float lutScale_;
00149     };
00150 
00151     template <class T, int histSize, int shift>
00152     void CLAHE_CalcLut_Body<T,histSize,shift>::operator ()(const cv::Range& range) const
00153     {
00154         T* tileLut = lut_.ptr<T>(range.start);
00155         const size_t lut_step = lut_.step / sizeof(T);
00156 
00157         for (int k = range.start; k < range.end; ++k, tileLut += lut_step)
00158         {
00159             const int ty = k / tilesX_;
00160             const int tx = k % tilesX_;
00161 
00162             // retrieve tile submatrix
00163 
00164             cv::Rect tileROI;
00165             tileROI.x = tx * tileSize_.width;
00166             tileROI.y = ty * tileSize_.height;
00167             tileROI.width = tileSize_.width;
00168             tileROI.height = tileSize_.height;
00169 
00170             const cv::Mat tile = src_(tileROI);
00171 
00172             // calc histogram
00173 
00174             int tileHist[histSize] = {0, };
00175 
00176             int height = tileROI.height;
00177             const size_t sstep = src_.step / sizeof(T);
00178             for (const T* ptr = tile.ptr<T>(0); height--; ptr += sstep)
00179             {
00180                 int x = 0;
00181                 for (; x <= tileROI.width - 4; x += 4)
00182                 {
00183                     int t0 = ptr[x], t1 = ptr[x+1];
00184                     tileHist[t0 >> shift]++; tileHist[t1 >> shift]++;
00185                     t0 = ptr[x+2]; t1 = ptr[x+3];
00186                     tileHist[t0 >> shift]++; tileHist[t1 >> shift]++;
00187                 }
00188 
00189                 for (; x < tileROI.width; ++x)
00190                     tileHist[ptr[x] >> shift]++;
00191             }
00192 
00193             // clip histogram
00194 
00195             if (clipLimit_ > 0)
00196             {
00197                 // how many pixels were clipped
00198                 int clipped = 0;
00199                 for (int i = 0; i < histSize; ++i)
00200                 {
00201                     if (tileHist[i] > clipLimit_)
00202                     {
00203                         clipped += tileHist[i] - clipLimit_;
00204                         tileHist[i] = clipLimit_;
00205                     }
00206                 }
00207 
00208                 // redistribute clipped pixels
00209                 int redistBatch = clipped / histSize;
00210                 int residual = clipped - redistBatch * histSize;
00211 
00212                 for (int i = 0; i < histSize; ++i)
00213                     tileHist[i] += redistBatch;
00214 
00215                 for (int i = 0; i < residual; ++i)
00216                     tileHist[i]++;
00217             }
00218 
00219             // calc Lut
00220 
00221             int sum = 0;
00222             for (int i = 0; i < histSize; ++i)
00223             {
00224                 sum += tileHist[i];
00225                 tileLut[i] = cv::saturate_cast<T>(sum * lutScale_);
00226             }
00227         }
00228     }
00229 
00230     template <class T>
00231     class CLAHE_Interpolation_Body : public cv::ParallelLoopBody
00232     {
00233     public:
00234         CLAHE_Interpolation_Body(const cv::Mat& src, const cv::Mat& dst, const cv::Mat& lut, const cv::Size& tileSize, const int& tilesX, const int& tilesY) :
00235             src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
00236         {
00237             buf.allocate(src.cols << 2);
00238             ind1_p = (int *)buf;
00239             ind2_p = ind1_p + src.cols;
00240             xa_p = (float *)(ind2_p + src.cols);
00241             xa1_p = xa_p + src.cols;
00242 
00243             int lut_step = static_cast<int>(lut_.step / sizeof(T));
00244             float inv_tw = 1.0f / tileSize_.width;
00245 
00246             for (int x = 0; x < src.cols; ++x)
00247             {
00248                 float txf = x * inv_tw - 0.5f;
00249 
00250                 int tx1 = cvFloor(txf);
00251                 int tx2 = tx1 + 1;
00252 
00253                 xa_p[x] = txf - tx1;
00254                 xa1_p[x] = 1.0f - xa_p[x];
00255 
00256                 tx1 = std::max(tx1, 0);
00257                 tx2 = std::min(tx2, tilesX_ - 1);
00258 
00259                 ind1_p[x] = tx1 * lut_step;
00260                 ind2_p[x] = tx2 * lut_step;
00261             }
00262         }
00263 
00264         void operator ()(const cv::Range& range) const;
00265 
00266     private:
00267         cv::Mat src_;
00268         mutable cv::Mat dst_;
00269         cv::Mat lut_;
00270 
00271         cv::Size tileSize_;
00272         int tilesX_;
00273         int tilesY_;
00274 
00275         cv::AutoBuffer<int>  buf;
00276         int * ind1_p, * ind2_p;
00277         float * xa_p, * xa1_p;
00278     };
00279 
00280     template <class T>
00281     void CLAHE_Interpolation_Body<T>::operator ()(const cv::Range& range) const
00282     {
00283         float inv_th = 1.0f / tileSize_.height;
00284 
00285         for (int y = range.start; y < range.end; ++y)
00286         {
00287             const T* srcRow = src_.ptr<T>(y);
00288             T* dstRow = dst_.ptr<T>(y);
00289 
00290             float tyf = y * inv_th - 0.5f;
00291 
00292             int ty1 = cvFloor(tyf);
00293             int ty2 = ty1 + 1;
00294 
00295             float ya = tyf - ty1, ya1 = 1.0f - ya;
00296 
00297             ty1 = std::max(ty1, 0);
00298             ty2 = std::min(ty2, tilesY_ - 1);
00299 
00300             const T* lutPlane1 = lut_.ptr<T>(ty1 * tilesX_);
00301             const T* lutPlane2 = lut_.ptr<T>(ty2 * tilesX_);
00302 
00303             for (int x = 0; x < src_.cols; ++x)
00304             {
00305                 int srcVal = srcRow[x];
00306 
00307                 int ind1 = ind1_p[x] + srcVal;
00308                 int ind2 = ind2_p[x] + srcVal;
00309 
00310                 float res = (lutPlane1[ind1] * xa1_p[x] + lutPlane1[ind2] * xa_p[x]) * ya1 +
00311                             (lutPlane2[ind1] * xa1_p[x] + lutPlane2[ind2] * xa_p[x]) * ya;
00312 
00313                 dstRow[x] = cv::saturate_cast<T>(res);
00314             }
00315         }
00316     }
00317 
00318     class CLAHE_Impl : public cv::CLAHE
00319     {
00320     public:
00321         CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
00322 
00323         void apply(cv::InputArray src, cv::OutputArray dst);
00324 
00325         void setClipLimit(double clipLimit);
00326         double getClipLimit() const;
00327 
00328         void setTilesGridSize(cv::Size tileGridSize);
00329         cv::Size getTilesGridSize() const;
00330 
00331         void collectGarbage();
00332 
00333     private:
00334         double clipLimit_;
00335         int tilesX_;
00336         int tilesY_;
00337 
00338         cv::Mat srcExt_;
00339         cv::Mat lut_;
00340 
00341 #ifdef HAVE_OPENCL
00342         cv::UMat  usrcExt_;
00343         cv::UMat  ulut_;
00344 #endif
00345     };
00346 
00347     CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
00348         clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
00349     {
00350     }
00351 
00352     void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
00353     {
00354         CV_Assert( _src.type() == CV_8UC1 || _src.type() == CV_16UC1 );
00355 
00356 #ifdef HAVE_OPENCL
00357         bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.dims()<=2 && _src.type() == CV_8UC1;
00358 #endif
00359 
00360         int histSize = _src.type() == CV_8UC1 ? 256 : 4096;
00361 
00362         cv::Size tileSize;
00363         cv::_InputArray _srcForLut;
00364 
00365         if (_src.size().width % tilesX_ == 0 && _src.size().height % tilesY_ == 0)
00366         {
00367             tileSize = cv::Size(_src.size().width / tilesX_, _src.size().height / tilesY_);
00368             _srcForLut = _src;
00369         }
00370         else
00371         {
00372 #ifdef HAVE_OPENCL
00373             if(useOpenCL)
00374             {
00375                 cv::copyMakeBorder(_src, usrcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101);
00376                 tileSize = cv::Size(usrcExt_.size().width / tilesX_, usrcExt_.size().height / tilesY_);
00377                 _srcForLut = usrcExt_;
00378             }
00379             else
00380 #endif
00381             {
00382                 cv::copyMakeBorder(_src, srcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101);
00383                 tileSize = cv::Size(srcExt_.size().width / tilesX_, srcExt_.size().height / tilesY_);
00384                 _srcForLut = srcExt_;
00385             }
00386         }
00387 
00388         const int tileSizeTotal = tileSize.area();
00389         const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
00390 
00391         int clipLimit = 0;
00392         if (clipLimit_ > 0.0)
00393         {
00394             clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
00395             clipLimit = std::max(clipLimit, 1);
00396         }
00397 
00398 #ifdef HAVE_OPENCL
00399         if (useOpenCL && clahe::calcLut(_srcForLut, ulut_, tilesX_, tilesY_, tileSize, clipLimit, lutScale) )
00400             if( clahe::transform(_src, _dst, ulut_, tilesX_, tilesY_, tileSize) )
00401             {
00402                 CV_IMPL_ADD(CV_IMPL_OCL);
00403                 return;
00404             }
00405 #endif
00406 
00407         cv::Mat src = _src.getMat();
00408         _dst.create( src.size(), src.type() );
00409         cv::Mat dst = _dst.getMat();
00410         cv::Mat srcForLut = _srcForLut.getMat();
00411         lut_.create(tilesX_ * tilesY_, histSize, _src.type());
00412 
00413         cv::Ptr<cv::ParallelLoopBody> calcLutBody;
00414         if (_src.type() == CV_8UC1)
00415             calcLutBody = cv::makePtr<CLAHE_CalcLut_Body<uchar, 256, 0> >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale);
00416         else if (_src.type() == CV_16UC1)
00417             calcLutBody = cv::makePtr<CLAHE_CalcLut_Body<ushort, 4096, 4> >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale);
00418         else
00419             CV_Error( CV_StsBadArg, "Unsupported type" );
00420 
00421         cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), *calcLutBody);
00422 
00423         cv::Ptr<cv::ParallelLoopBody> interpolationBody;
00424         if (_src.type() == CV_8UC1)
00425             interpolationBody = cv::makePtr<CLAHE_Interpolation_Body<uchar> >(src, dst, lut_, tileSize, tilesX_, tilesY_);
00426         else if (_src.type() == CV_16UC1)
00427             interpolationBody = cv::makePtr<CLAHE_Interpolation_Body<ushort> >(src, dst, lut_, tileSize, tilesX_, tilesY_);
00428 
00429         cv::parallel_for_(cv::Range(0, src.rows), *interpolationBody);
00430     }
00431 
00432     void CLAHE_Impl::setClipLimit(double clipLimit)
00433     {
00434         clipLimit_ = clipLimit;
00435     }
00436 
00437     double CLAHE_Impl::getClipLimit() const
00438     {
00439         return clipLimit_;
00440     }
00441 
00442     void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
00443     {
00444         tilesX_ = tileGridSize.width;
00445         tilesY_ = tileGridSize.height;
00446     }
00447 
00448     cv::Size CLAHE_Impl::getTilesGridSize() const
00449     {
00450         return cv::Size(tilesX_, tilesY_);
00451     }
00452 
00453     void CLAHE_Impl::collectGarbage()
00454     {
00455         srcExt_.release();
00456         lut_.release();
00457 #ifdef HAVE_OPENCL
00458         usrcExt_.release();
00459         ulut_.release();
00460 #endif
00461     }
00462 }
00463 
00464 cv::Ptr<cv::CLAHE> cv::createCLAHE(double clipLimit, cv::Size tileGridSize)
00465 {
00466     return makePtr<CLAHE_Impl>(clipLimit, tileGridSize.width, tileGridSize.height);
00467 }
00468