Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project by
kmeans.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2013, OpenCV Foundation, all rights reserved. 00016 // Third party copyrights are property of their respective owners. 00017 // 00018 // Redistribution and use in source and binary forms, with or without modification, 00019 // are permitted provided that the following conditions are met: 00020 // 00021 // * Redistribution's of source code must retain the above copyright notice, 00022 // this list of conditions and the following disclaimer. 00023 // 00024 // * Redistribution's in binary form must reproduce the above copyright notice, 00025 // this list of conditions and the following disclaimer in the documentation 00026 // and/or other materials provided with the distribution. 00027 // 00028 // * The name of the copyright holders may not be used to endorse or promote products 00029 // derived from this software without specific prior written permission. 00030 // 00031 // This software is provided by the copyright holders and contributors "as is" and 00032 // any express or implied warranties, including, but not limited to, the implied 00033 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00034 // In no event shall the Intel Corporation or contributors be liable for any direct, 00035 // indirect, incidental, special, exemplary, or consequential damages 00036 // (including, but not limited to, procurement of substitute goods or services; 00037 // loss of use, data, or profits; or business interruption) however caused 00038 // and on any theory of liability, whether in contract, strict liability, 00039 // or tort (including negligence or otherwise) arising in any way out of 00040 // the use of this software, even if advised of the possibility of such damage. 00041 // 00042 //M*/ 00043 00044 #include "precomp.hpp" 00045 00046 ////////////////////////////////////////// kmeans //////////////////////////////////////////// 00047 00048 namespace cv 00049 { 00050 00051 static void generateRandomCenter(const std::vector<Vec2f>& box, float* center, RNG& rng) 00052 { 00053 size_t j, dims = box.size(); 00054 float margin = 1.f/dims; 00055 for( j = 0; j < dims; j++ ) 00056 center[j] = ((float)rng*(1.f+margin*2.f)-margin)*(box[j][1] - box[j][0]) + box[j][0]; 00057 } 00058 00059 class KMeansPPDistanceComputer : public ParallelLoopBody 00060 { 00061 public: 00062 KMeansPPDistanceComputer( float *_tdist2, 00063 const float *_data, 00064 const float *_dist, 00065 int _dims, 00066 size_t _step, 00067 size_t _stepci ) 00068 : tdist2(_tdist2), 00069 data(_data), 00070 dist(_dist), 00071 dims(_dims), 00072 step(_step), 00073 stepci(_stepci) { } 00074 00075 void operator()( const cv::Range& range ) const 00076 { 00077 const int begin = range.start; 00078 const int end = range.end; 00079 00080 for ( int i = begin; i<end; i++ ) 00081 { 00082 tdist2[i] = std::min(normL2Sqr(data + step*i, data + stepci, dims), dist[i]); 00083 } 00084 } 00085 00086 private: 00087 KMeansPPDistanceComputer& operator=(const KMeansPPDistanceComputer&); // to quiet MSVC 00088 00089 float *tdist2; 00090 const float *data; 00091 const float *dist; 00092 const int dims; 00093 const size_t step; 00094 const size_t stepci; 00095 }; 00096 00097 /* 00098 k-means center initialization using the following algorithm: 00099 Arthur & Vassilvitskii (2007) k-means++: The Advantages of Careful Seeding 00100 */ 00101 static void generateCentersPP(const Mat& _data, Mat& _out_centers, 00102 int K, RNG& rng, int trials) 00103 { 00104 int i, j, k, dims = _data.cols, N = _data.rows; 00105 const float* data = _data.ptr<float>(0); 00106 size_t step = _data.step/sizeof(data[0]); 00107 std::vector<int> _centers(K); 00108 int* centers = &_centers[0]; 00109 std::vector<float> _dist(N*3); 00110 float* dist = &_dist[0], *tdist = dist + N, *tdist2 = tdist + N; 00111 double sum0 = 0; 00112 00113 centers[0] = (unsigned)rng % N; 00114 00115 for( i = 0; i < N; i++ ) 00116 { 00117 dist[i] = normL2Sqr(data + step*i, data + step*centers[0], dims); 00118 sum0 += dist[i]; 00119 } 00120 00121 for( k = 1; k < K; k++ ) 00122 { 00123 double bestSum = DBL_MAX; 00124 int bestCenter = -1; 00125 00126 for( j = 0; j < trials; j++ ) 00127 { 00128 double p = (double)rng*sum0, s = 0; 00129 for( i = 0; i < N-1; i++ ) 00130 if( (p -= dist[i]) <= 0 ) 00131 break; 00132 int ci = i; 00133 00134 parallel_for_(Range(0, N), 00135 KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci)); 00136 for( i = 0; i < N; i++ ) 00137 { 00138 s += tdist2[i]; 00139 } 00140 00141 if( s < bestSum ) 00142 { 00143 bestSum = s; 00144 bestCenter = ci; 00145 std::swap(tdist, tdist2); 00146 } 00147 } 00148 centers[k] = bestCenter; 00149 sum0 = bestSum; 00150 std::swap(dist, tdist); 00151 } 00152 00153 for( k = 0; k < K; k++ ) 00154 { 00155 const float* src = data + step*centers[k]; 00156 float* dst = _out_centers.ptr<float>(k); 00157 for( j = 0; j < dims; j++ ) 00158 dst[j] = src[j]; 00159 } 00160 } 00161 00162 class KMeansDistanceComputer : public ParallelLoopBody 00163 { 00164 public: 00165 KMeansDistanceComputer( double *_distances, 00166 int *_labels, 00167 const Mat& _data, 00168 const Mat& _centers ) 00169 : distances(_distances), 00170 labels(_labels), 00171 data(_data), 00172 centers(_centers) 00173 { 00174 } 00175 00176 void operator()( const Range& range ) const 00177 { 00178 const int begin = range.start; 00179 const int end = range.end; 00180 const int K = centers.rows; 00181 const int dims = centers.cols; 00182 00183 for( int i = begin; i<end; ++i) 00184 { 00185 const float *sample = data.ptr<float>(i); 00186 int k_best = 0; 00187 double min_dist = DBL_MAX; 00188 00189 for( int k = 0; k < K; k++ ) 00190 { 00191 const float* center = centers.ptr<float>(k); 00192 const double dist = normL2Sqr(sample, center, dims); 00193 00194 if( min_dist > dist ) 00195 { 00196 min_dist = dist; 00197 k_best = k; 00198 } 00199 } 00200 00201 distances[i] = min_dist; 00202 labels[i] = k_best; 00203 } 00204 } 00205 00206 private: 00207 KMeansDistanceComputer& operator=(const KMeansDistanceComputer&); // to quiet MSVC 00208 00209 double *distances; 00210 int *labels; 00211 const Mat& data; 00212 const Mat& centers; 00213 }; 00214 00215 } 00216 00217 double cv::kmeans( InputArray _data, int K, 00218 InputOutputArray _bestLabels, 00219 TermCriteria criteria, int attempts, 00220 int flags, OutputArray _centers ) 00221 { 00222 const int SPP_TRIALS = 3; 00223 Mat data0 = _data.getMat(); 00224 bool isrow = data0.rows == 1; 00225 int N = isrow ? data0.cols : data0.rows; 00226 int dims = (isrow ? 1 : data0.cols)*data0.channels(); 00227 int type = data0.depth(); 00228 00229 attempts = std::max(attempts, 1); 00230 CV_Assert( data0.dims <= 2 && type == CV_32F && K > 0 ); 00231 CV_Assert( N >= K ); 00232 00233 Mat data(N, dims, CV_32F, data0.ptr(), isrow ? dims * sizeof(float) : static_cast<size_t>(data0.step)); 00234 00235 _bestLabels.create(N, 1, CV_32S, -1, true); 00236 00237 Mat _labels, best_labels = _bestLabels.getMat(); 00238 if( flags & CV_KMEANS_USE_INITIAL_LABELS ) 00239 { 00240 CV_Assert( (best_labels.cols == 1 || best_labels.rows == 1) && 00241 best_labels.cols*best_labels.rows == N && 00242 best_labels.type() == CV_32S && 00243 best_labels.isContinuous()); 00244 best_labels.copyTo(_labels); 00245 } 00246 else 00247 { 00248 if( !((best_labels.cols == 1 || best_labels.rows == 1) && 00249 best_labels.cols*best_labels.rows == N && 00250 best_labels.type() == CV_32S && 00251 best_labels.isContinuous())) 00252 best_labels.create(N, 1, CV_32S); 00253 _labels.create(best_labels.size(), best_labels.type()); 00254 } 00255 int* labels = _labels.ptr<int>(); 00256 00257 Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type); 00258 std::vector<int> counters(K); 00259 std::vector<Vec2f> _box(dims); 00260 Vec2f* box = &_box[0]; 00261 double best_compactness = DBL_MAX, compactness = 0; 00262 RNG& rng = theRNG(); 00263 int a, iter, i, j, k; 00264 00265 if( criteria.type & TermCriteria::EPS ) 00266 criteria.epsilon = std::max(criteria.epsilon, 0.); 00267 else 00268 criteria.epsilon = FLT_EPSILON; 00269 criteria.epsilon *= criteria.epsilon; 00270 00271 if( criteria.type & TermCriteria::COUNT ) 00272 criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100); 00273 else 00274 criteria.maxCount = 100; 00275 00276 if( K == 1 ) 00277 { 00278 attempts = 1; 00279 criteria.maxCount = 2; 00280 } 00281 00282 const float* sample = data.ptr<float>(0); 00283 for( j = 0; j < dims; j++ ) 00284 box[j] = Vec2f(sample[j], sample[j]); 00285 00286 for( i = 1; i < N; i++ ) 00287 { 00288 sample = data.ptr<float>(i); 00289 for( j = 0; j < dims; j++ ) 00290 { 00291 float v = sample[j]; 00292 box[j][0] = std::min(box[j][0], v); 00293 box[j][1] = std::max(box[j][1], v); 00294 } 00295 } 00296 00297 for( a = 0; a < attempts; a++ ) 00298 { 00299 double max_center_shift = DBL_MAX; 00300 for( iter = 0;; ) 00301 { 00302 swap(centers, old_centers); 00303 00304 if( iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)) ) 00305 { 00306 if( flags & KMEANS_PP_CENTERS ) 00307 generateCentersPP(data, centers, K, rng, SPP_TRIALS); 00308 else 00309 { 00310 for( k = 0; k < K; k++ ) 00311 generateRandomCenter(_box, centers.ptr<float>(k), rng); 00312 } 00313 } 00314 else 00315 { 00316 if( iter == 0 && a == 0 && (flags & KMEANS_USE_INITIAL_LABELS) ) 00317 { 00318 for( i = 0; i < N; i++ ) 00319 CV_Assert( (unsigned)labels[i] < (unsigned)K ); 00320 } 00321 00322 // compute centers 00323 centers = Scalar (0); 00324 for( k = 0; k < K; k++ ) 00325 counters[k] = 0; 00326 00327 for( i = 0; i < N; i++ ) 00328 { 00329 sample = data.ptr<float>(i); 00330 k = labels[i]; 00331 float* center = centers.ptr<float>(k); 00332 j=0; 00333 #if CV_ENABLE_UNROLLED 00334 for(; j <= dims - 4; j += 4 ) 00335 { 00336 float t0 = center[j] + sample[j]; 00337 float t1 = center[j+1] + sample[j+1]; 00338 00339 center[j] = t0; 00340 center[j+1] = t1; 00341 00342 t0 = center[j+2] + sample[j+2]; 00343 t1 = center[j+3] + sample[j+3]; 00344 00345 center[j+2] = t0; 00346 center[j+3] = t1; 00347 } 00348 #endif 00349 for( ; j < dims; j++ ) 00350 center[j] += sample[j]; 00351 counters[k]++; 00352 } 00353 00354 if( iter > 0 ) 00355 max_center_shift = 0; 00356 00357 for( k = 0; k < K; k++ ) 00358 { 00359 if( counters[k] != 0 ) 00360 continue; 00361 00362 // if some cluster appeared to be empty then: 00363 // 1. find the biggest cluster 00364 // 2. find the farthest from the center point in the biggest cluster 00365 // 3. exclude the farthest point from the biggest cluster and form a new 1-point cluster. 00366 int max_k = 0; 00367 for( int k1 = 1; k1 < K; k1++ ) 00368 { 00369 if( counters[max_k] < counters[k1] ) 00370 max_k = k1; 00371 } 00372 00373 double max_dist = 0; 00374 int farthest_i = -1; 00375 float* new_center = centers.ptr<float>(k); 00376 float* old_center = centers.ptr<float>(max_k); 00377 float* _old_center = temp.ptr<float>(); // normalized 00378 float scale = 1.f/counters[max_k]; 00379 for( j = 0; j < dims; j++ ) 00380 _old_center[j] = old_center[j]*scale; 00381 00382 for( i = 0; i < N; i++ ) 00383 { 00384 if( labels[i] != max_k ) 00385 continue; 00386 sample = data.ptr<float>(i); 00387 double dist = normL2Sqr(sample, _old_center, dims); 00388 00389 if( max_dist <= dist ) 00390 { 00391 max_dist = dist; 00392 farthest_i = i; 00393 } 00394 } 00395 00396 counters[max_k]--; 00397 counters[k]++; 00398 labels[farthest_i] = k; 00399 sample = data.ptr<float>(farthest_i); 00400 00401 for( j = 0; j < dims; j++ ) 00402 { 00403 old_center[j] -= sample[j]; 00404 new_center[j] += sample[j]; 00405 } 00406 } 00407 00408 for( k = 0; k < K; k++ ) 00409 { 00410 float* center = centers.ptr<float>(k); 00411 CV_Assert( counters[k] != 0 ); 00412 00413 float scale = 1.f/counters[k]; 00414 for( j = 0; j < dims; j++ ) 00415 center[j] *= scale; 00416 00417 if( iter > 0 ) 00418 { 00419 double dist = 0; 00420 const float* old_center = old_centers.ptr<float>(k); 00421 for( j = 0; j < dims; j++ ) 00422 { 00423 double t = center[j] - old_center[j]; 00424 dist += t*t; 00425 } 00426 max_center_shift = std::max(max_center_shift, dist); 00427 } 00428 } 00429 } 00430 00431 if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon ) 00432 break; 00433 00434 // assign labels 00435 Mat dists(1, N, CV_64F); 00436 double* dist = dists.ptr<double>(0); 00437 parallel_for_(Range(0, N), 00438 KMeansDistanceComputer(dist, labels, data, centers)); 00439 compactness = 0; 00440 for( i = 0; i < N; i++ ) 00441 { 00442 compactness += dist[i]; 00443 } 00444 } 00445 00446 if( compactness < best_compactness ) 00447 { 00448 best_compactness = compactness; 00449 if( _centers.needed() ) 00450 centers.copyTo(_centers); 00451 _labels.copyTo(best_labels); 00452 } 00453 } 00454 00455 return best_compactness; 00456 } 00457
Generated on Tue Jul 12 2022 15:17:26 by
1.7.2
