Renesas GR-PEACH OpenCV Development / gr-peach-opencv-project-sd-card_update

Fork of gr-peach-opencv-project-sd-card by the do

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers parallel.cpp Source File

parallel.cpp

00001 /*M///////////////////////////////////////////////////////////////////////////////////////
00002 //
00003 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
00004 //
00005 //  By downloading, copying, installing or using the software you agree to this license.
00006 //  If you do not agree to this license, do not download, install,
00007 //  copy or use the software.
00008 //
00009 //
00010 //                           License Agreement
00011 //                For Open Source Computer Vision Library
00012 //
00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
00014 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
00015 // Third party copyrights are property of their respective owners.
00016 //
00017 // Redistribution and use in source and binary forms, with or without modification,
00018 // are permitted provided that the following conditions are met:
00019 //
00020 //   * Redistribution's of source code must retain the above copyright notice,
00021 //     this list of conditions and the following disclaimer.
00022 //
00023 //   * Redistribution's in binary form must reproduce the above copyright notice,
00024 //     this list of conditions and the following disclaimer in the documentation
00025 //     and/or other materials provided with the distribution.
00026 //
00027 //   * The name of the copyright holders may not be used to endorse or promote products
00028 //     derived from this software without specific prior written permission.
00029 //
00030 // This software is provided by the copyright holders and contributors "as is" and
00031 // any express or implied warranties, including, but not limited to, the implied
00032 // warranties of merchantability and fitness for a particular purpose are disclaimed.
00033 // In no event shall the Intel Corporation or contributors be liable for any direct,
00034 // indirect, incidental, special, exemplary, or consequential damages
00035 // (including, but not limited to, procurement of substitute goods or services;
00036 // loss of use, data, or profits; or business interruption) however caused
00037 // and on any theory of liability, whether in contract, strict liability,
00038 // or tort (including negligence or otherwise) arising in any way out of
00039 // the use of this software, even if advised of the possibility of such damage.
00040 //
00041 //M*/
00042 
00043 #include "precomp.hpp"
00044 
00045 #if defined WIN32 || defined WINCE
00046     #include <windows.h>
00047     #undef small
00048     #undef min
00049     #undef max
00050     #undef abs
00051 #endif
00052 
00053 #if defined __linux__ || defined __APPLE__
00054     #include <unistd.h>
00055     #include <stdio.h>
00056     #include <sys/types.h>
00057     #if defined ANDROID
00058         #include <sys/sysconf.h>
00059     #elif defined __APPLE__
00060         #include <sys/sysctl.h>
00061     #endif
00062 #endif
00063 
00064 #ifdef _OPENMP
00065     #define HAVE_OPENMP
00066 #endif
00067 
00068 #ifdef __APPLE__
00069     #define HAVE_GCD
00070 #endif
00071 
00072 #if defined _MSC_VER && _MSC_VER >= 1600
00073     #define HAVE_CONCURRENCY
00074 #endif
00075 
00076 /* IMPORTANT: always use the same order of defines
00077    1. HAVE_TBB         - 3rdparty library, should be explicitly enabled
00078    2. HAVE_CSTRIPES    - 3rdparty library, should be explicitly enabled
00079    3. HAVE_OPENMP      - integrated to compiler, should be explicitly enabled
00080    4. HAVE_GCD         - system wide, used automatically        (APPLE only)
00081    5. WINRT            - system wide, used automatically        (Windows RT only)
00082    6. HAVE_CONCURRENCY - part of runtime, used automatically    (Windows only - MSVS 10, MSVS 11)
00083    7. HAVE_PTHREADS_PF - pthreads if available
00084 */
00085 
00086 #if defined HAVE_TBB
00087     #include "tbb/tbb_stddef.h"
00088     #if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
00089         #include "tbb/tbb.h"
00090         #include "tbb/task.h"
00091         #if TBB_INTERFACE_VERSION >= 6100
00092             #include "tbb/task_arena.h"
00093         #endif
00094         #undef min
00095         #undef max
00096     #else
00097         #undef HAVE_TBB
00098     #endif // end TBB version
00099 #endif
00100 
00101 #ifndef HAVE_TBB
00102     #if defined HAVE_CSTRIPES
00103         #include "C=.h"
00104         #undef shared
00105     #elif defined HAVE_OPENMP
00106         #include <omp.h>
00107     #elif defined HAVE_GCD
00108         #include <dispatch/dispatch.h>
00109         #include <pthread.h>
00110     #elif defined WINRT && _MSC_VER < 1900
00111         #include <ppltasks.h>
00112     #elif defined HAVE_CONCURRENCY
00113         #include <ppl.h>
00114     #endif
00115 #endif
00116 
00117 #if defined HAVE_TBB && TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
00118 #  define CV_PARALLEL_FRAMEWORK "tbb"
00119 #elif defined HAVE_CSTRIPES
00120 #  define CV_PARALLEL_FRAMEWORK "cstripes"
00121 #elif defined HAVE_OPENMP
00122 #  define CV_PARALLEL_FRAMEWORK "openmp"
00123 #elif defined HAVE_GCD
00124 #  define CV_PARALLEL_FRAMEWORK "gcd"
00125 #elif defined WINRT
00126 #  define CV_PARALLEL_FRAMEWORK "winrt-concurrency"
00127 #elif defined HAVE_CONCURRENCY
00128 #  define CV_PARALLEL_FRAMEWORK "ms-concurrency"
00129 #elif defined HAVE_PTHREADS_PF
00130 #  define CV_PARALLEL_FRAMEWORK "pthreads"
00131 #endif
00132 
00133 namespace cv
00134 {
00135     ParallelLoopBody::~ParallelLoopBody() {}
00136 #ifdef HAVE_PTHREADS_PF
00137     void parallel_for_pthreads(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes);
00138     size_t parallel_pthreads_get_threads_num();
00139     void parallel_pthreads_set_threads_num(int num);
00140 #endif
00141 }
00142 
00143 
00144 namespace
00145 {
00146 #ifdef CV_PARALLEL_FRAMEWORK
00147     class ParallelLoopBodyWrapper
00148     {
00149     public:
00150         ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
00151         {
00152             body = &_body;
00153             wholeRange = _r;
00154             double len = wholeRange.end - wholeRange.start;
00155             nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
00156         }
00157         void operator()(const cv::Range& sr) const
00158         {
00159             cv::Range r;
00160             r.start = (int)(wholeRange.start +
00161                             ((uint64)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
00162             r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
00163                             ((uint64)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
00164             (*body)(r);
00165         }
00166         cv::Range stripeRange() const { return cv::Range(0, nstripes); }
00167 
00168     protected:
00169         const cv::ParallelLoopBody* body;
00170         cv::Range wholeRange;
00171         int nstripes;
00172     };
00173 
00174 #if defined HAVE_TBB
00175     class ProxyLoopBody : public ParallelLoopBodyWrapper
00176     {
00177     public:
00178         ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
00179         : ParallelLoopBodyWrapper(_body, _r, _nstripes)
00180         {}
00181 
00182         void operator ()(const tbb::blocked_range<int>& range) const
00183         {
00184             this->ParallelLoopBodyWrapper::operator()(cv::Range(range.begin(), range.end()));
00185         }
00186     };
00187 #elif defined HAVE_CSTRIPES || defined HAVE_OPENMP
00188     typedef ParallelLoopBodyWrapper ProxyLoopBody;
00189 #elif defined HAVE_GCD
00190     typedef ParallelLoopBodyWrapper ProxyLoopBody;
00191     static void block_function(void* context, size_t index)
00192     {
00193         ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
00194         (*ptr_body)(cv::Range((int)index, (int)index + 1));
00195     }
00196 #elif defined WINRT || defined HAVE_CONCURRENCY
00197     class ProxyLoopBody : public ParallelLoopBodyWrapper
00198     {
00199     public:
00200         ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
00201         : ParallelLoopBodyWrapper(_body, _r, _nstripes)
00202         {}
00203 
00204         void operator ()(int i) const
00205         {
00206             this->ParallelLoopBodyWrapper::operator()(cv::Range(i, i + 1));
00207         }
00208     };
00209 #else
00210     typedef ParallelLoopBodyWrapper ProxyLoopBody;
00211 #endif
00212 
00213 static int numThreads = -1;
00214 
00215 #if defined HAVE_TBB
00216 static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred);
00217 #elif defined HAVE_CSTRIPES
00218 // nothing for C=
00219 #elif defined HAVE_OPENMP
00220 static int numThreadsMax = omp_get_max_threads();
00221 #elif defined HAVE_GCD
00222 // nothing for GCD
00223 #elif defined WINRT
00224 // nothing for WINRT
00225 #elif defined HAVE_CONCURRENCY
00226 
00227 class SchedPtr
00228 {
00229     Concurrency::Scheduler* sched_;
00230 public:
00231     Concurrency::Scheduler* operator->() { return sched_; }
00232     operator Concurrency::Scheduler*() { return sched_; }
00233 
00234     void operator=(Concurrency::Scheduler* sched)
00235     {
00236         if (sched_) sched_->Release();
00237         sched_ = sched;
00238     }
00239 
00240     SchedPtr() : sched_(0) {}
00241     ~SchedPtr() {}
00242 };
00243 static SchedPtr pplScheduler;
00244 
00245 #endif
00246 
00247 #endif // CV_PARALLEL_FRAMEWORK
00248 
00249 } //namespace
00250 
00251 /* ================================   parallel_for_  ================================ */
00252 
00253 void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
00254 {
00255 #ifdef CV_PARALLEL_FRAMEWORK
00256 
00257     if(numThreads != 0)
00258     {
00259         ProxyLoopBody pbody(body, range, nstripes);
00260         cv::Range stripeRange = pbody.stripeRange();
00261         if( stripeRange.end - stripeRange.start == 1 )
00262         {
00263             body(range);
00264             return;
00265         }
00266 
00267 #if defined HAVE_TBB
00268 
00269         tbb::parallel_for(tbb::blocked_range<int>(stripeRange.start, stripeRange.end), pbody);
00270 
00271 #elif defined HAVE_CSTRIPES
00272 
00273         parallel(MAX(0, numThreads))
00274         {
00275             int offset = stripeRange.start;
00276             int len = stripeRange.end - offset;
00277             Range r(offset + CPX_RANGE_START(len), offset + CPX_RANGE_END(len));
00278             pbody(r);
00279             barrier();
00280         }
00281 
00282 #elif defined HAVE_OPENMP
00283 
00284         #pragma omp parallel for schedule(dynamic)
00285         for (int i = stripeRange.start; i < stripeRange.end; ++i)
00286             pbody(Range(i, i + 1));
00287 
00288 #elif defined HAVE_GCD
00289 
00290         dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
00291         dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
00292 
00293 #elif defined WINRT
00294 
00295         Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
00296 
00297 #elif defined HAVE_CONCURRENCY
00298 
00299         if(!pplScheduler || pplScheduler->Id() == Concurrency::CurrentScheduler::Id())
00300         {
00301             Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
00302         }
00303         else
00304         {
00305             pplScheduler->Attach();
00306             Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
00307             Concurrency::CurrentScheduler::Detach();
00308         }
00309 
00310 #elif defined HAVE_PTHREADS_PF
00311 
00312         parallel_for_pthreads(range, body, nstripes);
00313 
00314 #else
00315 
00316 #error You have hacked and compiling with unsupported parallel framework
00317 
00318 #endif
00319 
00320     }
00321     else
00322 
00323 #endif // CV_PARALLEL_FRAMEWORK
00324     {
00325         (void)nstripes;
00326         body(range);
00327     }
00328 }
00329 
00330 int cv::getNumThreads(void)
00331 {
00332 #ifdef CV_PARALLEL_FRAMEWORK
00333 
00334     if(numThreads == 0)
00335         return 1;
00336 
00337 #endif
00338 
00339 #if defined HAVE_TBB
00340 
00341     return tbbScheduler.is_active()
00342            ? numThreads
00343            : tbb::task_scheduler_init::default_num_threads();
00344 
00345 #elif defined HAVE_CSTRIPES
00346 
00347     return numThreads > 0
00348             ? numThreads
00349             : cv::getNumberOfCPUs();
00350 
00351 #elif defined HAVE_OPENMP
00352 
00353     return omp_get_max_threads();
00354 
00355 #elif defined HAVE_GCD
00356 
00357     return 512; // the GCD thread pool limit
00358 
00359 #elif defined WINRT
00360 
00361     return 0;
00362 
00363 #elif defined HAVE_CONCURRENCY
00364 
00365     return 1 + (pplScheduler == 0
00366         ? Concurrency::CurrentScheduler::Get()->GetNumberOfVirtualProcessors()
00367         : pplScheduler->GetNumberOfVirtualProcessors());
00368 
00369 #elif defined HAVE_PTHREADS_PF
00370 
00371         return parallel_pthreads_get_threads_num();
00372 
00373 #else
00374 
00375     return 1;
00376 
00377 #endif
00378 }
00379 
00380 void cv::setNumThreads( int threads )
00381 {
00382     (void)threads;
00383 #ifdef CV_PARALLEL_FRAMEWORK
00384     numThreads = threads;
00385 #endif
00386 
00387 #ifdef HAVE_TBB
00388 
00389     if(tbbScheduler.is_active()) tbbScheduler.terminate();
00390     if(threads > 0) tbbScheduler.initialize(threads);
00391 
00392 #elif defined HAVE_CSTRIPES
00393 
00394     return; // nothing needed
00395 
00396 #elif defined HAVE_OPENMP
00397 
00398     if(omp_in_parallel())
00399         return; // can't change number of openmp threads inside a parallel region
00400 
00401     omp_set_num_threads(threads > 0 ? threads : numThreadsMax);
00402 
00403 #elif defined HAVE_GCD
00404 
00405     // unsupported
00406     // there is only private dispatch_queue_set_width() and only for desktop
00407 
00408 #elif defined WINRT
00409 
00410     return;
00411 
00412 #elif defined HAVE_CONCURRENCY
00413 
00414     if (threads <= 0)
00415     {
00416         pplScheduler = 0;
00417     }
00418     else if (threads == 1)
00419     {
00420         // Concurrency always uses >=2 threads, so we just disable it if 1 thread is requested
00421         numThreads = 0;
00422     }
00423     else if (pplScheduler == 0 || 1 + pplScheduler->GetNumberOfVirtualProcessors() != (unsigned int)threads)
00424     {
00425         pplScheduler = Concurrency::Scheduler::Create(Concurrency::SchedulerPolicy(2,
00426                        Concurrency::MinConcurrency, threads-1,
00427                        Concurrency::MaxConcurrency, threads-1));
00428     }
00429 
00430 #elif defined HAVE_PTHREADS_PF
00431 
00432     parallel_pthreads_set_threads_num(threads);
00433 
00434 #endif
00435 }
00436 
00437 
00438 int cv::getThreadNum(void)
00439 {
00440 #if defined HAVE_TBB
00441     #if TBB_INTERFACE_VERSION >= 6100 && defined TBB_PREVIEW_TASK_ARENA && TBB_PREVIEW_TASK_ARENA
00442         return tbb::task_arena::current_slot();
00443     #else
00444         return 0;
00445     #endif
00446 #elif defined HAVE_CSTRIPES
00447     return pix();
00448 #elif defined HAVE_OPENMP
00449     return omp_get_thread_num();
00450 #elif defined HAVE_GCD
00451     return (int)(size_t)(void*)pthread_self(); // no zero-based indexing
00452 #elif defined WINRT
00453     return 0;
00454 #elif defined HAVE_CONCURRENCY
00455     return std::max(0, (int)Concurrency::Context::VirtualProcessorId()); // zero for master thread, unique number for others but not necessary 1,2,3,...
00456 #elif defined HAVE_PTHREADS_PF
00457     return (int)(size_t)(void*)pthread_self(); // no zero-based indexing
00458 #else
00459     return 0;
00460 #endif
00461 }
00462 
00463 #ifdef ANDROID
00464 static inline int getNumberOfCPUsImpl()
00465 {
00466    FILE* cpuPossible = fopen("/sys/devices/system/cpu/possible", "r");
00467    if(!cpuPossible)
00468        return 1;
00469 
00470    char buf[2000]; //big enough for 1000 CPUs in worst possible configuration
00471    char* pbuf = fgets(buf, sizeof(buf), cpuPossible);
00472    fclose(cpuPossible);
00473    if(!pbuf)
00474       return 1;
00475 
00476    //parse string of form "0-1,3,5-7,10,13-15"
00477    int cpusAvailable = 0;
00478 
00479    while(*pbuf)
00480    {
00481       const char* pos = pbuf;
00482       bool range = false;
00483       while(*pbuf && *pbuf != ',')
00484       {
00485           if(*pbuf == '-') range = true;
00486           ++pbuf;
00487       }
00488       if(*pbuf) *pbuf++ = 0;
00489       if(!range)
00490         ++cpusAvailable;
00491       else
00492       {
00493           int rstart = 0, rend = 0;
00494           sscanf(pos, "%d-%d", &rstart, &rend);
00495           cpusAvailable += rend - rstart + 1;
00496       }
00497 
00498    }
00499    return cpusAvailable ? cpusAvailable : 1;
00500 }
00501 #endif
00502 
00503 int cv::getNumberOfCPUs(void)
00504 {
00505 #if defined WIN32 || defined _WIN32
00506     SYSTEM_INFO sysinfo;
00507 #if defined(_M_ARM) || defined(_M_X64) || defined(WINRT)
00508     GetNativeSystemInfo( &sysinfo );
00509 #else
00510     GetSystemInfo( &sysinfo );
00511 #endif
00512 
00513     return (int)sysinfo.dwNumberOfProcessors;
00514 #elif defined ANDROID
00515     static int ncpus = getNumberOfCPUsImpl();
00516     return ncpus;
00517 #elif defined __linux__
00518     return (int)sysconf( _SC_NPROCESSORS_ONLN );
00519 #elif defined __APPLE__
00520     int numCPU=0;
00521     int mib[4];
00522     size_t len = sizeof(numCPU);
00523 
00524     /* set the mib for hw.ncpu */
00525     mib[0] = CTL_HW;
00526     mib[1] = HW_AVAILCPU;  // alternatively, try HW_NCPU;
00527 
00528     /* get the number of CPUs from the system */
00529     sysctl(mib, 2, &numCPU, &len, NULL, 0);
00530 
00531     if( numCPU < 1 )
00532     {
00533         mib[1] = HW_NCPU;
00534         sysctl( mib, 2, &numCPU, &len, NULL, 0 );
00535 
00536         if( numCPU < 1 )
00537             numCPU = 1;
00538     }
00539 
00540     return (int)numCPU;
00541 #else
00542     return 1;
00543 #endif
00544 }
00545 
00546 const char* cv::currentParallelFramework() {
00547 #ifdef CV_PARALLEL_FRAMEWORK
00548     return CV_PARALLEL_FRAMEWORK;
00549 #else
00550     return NULL;
00551 #endif
00552 }
00553 
00554 CV_IMPL void cvSetNumThreads(int nt)
00555 {
00556     cv::setNumThreads(nt);
00557 }
00558 
00559 CV_IMPL int cvGetNumThreads()
00560 {
00561     return cv::getNumThreads();
00562 }
00563 
00564 CV_IMPL int cvGetThreadNum()
00565 {
00566     return cv::getThreadNum();
00567 }
00568