openCV library for Renesas RZ/A

Dependents:   RZ_A2M_Mbed_samples

Committer:
RyoheiHagimoto
Date:
Fri Jan 29 04:53:38 2021 +0000
Revision:
0:0e0631af0305
copied from https://github.com/d-kato/opencv-lib.

Who changed what in which revision?

UserRevisionLine numberNew contents of line
RyoheiHagimoto 0:0e0631af0305 1 /***********************************************************************
RyoheiHagimoto 0:0e0631af0305 2 * Software License Agreement (BSD License)
RyoheiHagimoto 0:0e0631af0305 3 *
RyoheiHagimoto 0:0e0631af0305 4 * Copyright 2008-2011 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
RyoheiHagimoto 0:0e0631af0305 5 * Copyright 2008-2011 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
RyoheiHagimoto 0:0e0631af0305 6 *
RyoheiHagimoto 0:0e0631af0305 7 * THE BSD LICENSE
RyoheiHagimoto 0:0e0631af0305 8 *
RyoheiHagimoto 0:0e0631af0305 9 * Redistribution and use in source and binary forms, with or without
RyoheiHagimoto 0:0e0631af0305 10 * modification, are permitted provided that the following conditions
RyoheiHagimoto 0:0e0631af0305 11 * are met:
RyoheiHagimoto 0:0e0631af0305 12 *
RyoheiHagimoto 0:0e0631af0305 13 * 1. Redistributions of source code must retain the above copyright
RyoheiHagimoto 0:0e0631af0305 14 * notice, this list of conditions and the following disclaimer.
RyoheiHagimoto 0:0e0631af0305 15 * 2. Redistributions in binary form must reproduce the above copyright
RyoheiHagimoto 0:0e0631af0305 16 * notice, this list of conditions and the following disclaimer in the
RyoheiHagimoto 0:0e0631af0305 17 * documentation and/or other materials provided with the distribution.
RyoheiHagimoto 0:0e0631af0305 18 *
RyoheiHagimoto 0:0e0631af0305 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
RyoheiHagimoto 0:0e0631af0305 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
RyoheiHagimoto 0:0e0631af0305 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
RyoheiHagimoto 0:0e0631af0305 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
RyoheiHagimoto 0:0e0631af0305 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
RyoheiHagimoto 0:0e0631af0305 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
RyoheiHagimoto 0:0e0631af0305 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
RyoheiHagimoto 0:0e0631af0305 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
RyoheiHagimoto 0:0e0631af0305 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
RyoheiHagimoto 0:0e0631af0305 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
RyoheiHagimoto 0:0e0631af0305 29 *************************************************************************/
RyoheiHagimoto 0:0e0631af0305 30
RyoheiHagimoto 0:0e0631af0305 31 #ifndef OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
RyoheiHagimoto 0:0e0631af0305 32 #define OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_
RyoheiHagimoto 0:0e0631af0305 33
RyoheiHagimoto 0:0e0631af0305 34 #include <algorithm>
RyoheiHagimoto 0:0e0631af0305 35 #include <map>
RyoheiHagimoto 0:0e0631af0305 36 #include <cassert>
RyoheiHagimoto 0:0e0631af0305 37 #include <limits>
RyoheiHagimoto 0:0e0631af0305 38 #include <cmath>
RyoheiHagimoto 0:0e0631af0305 39
RyoheiHagimoto 0:0e0631af0305 40 #include "general.h"
RyoheiHagimoto 0:0e0631af0305 41 #include "nn_index.h"
RyoheiHagimoto 0:0e0631af0305 42 #include "dist.h"
RyoheiHagimoto 0:0e0631af0305 43 #include "matrix.h"
RyoheiHagimoto 0:0e0631af0305 44 #include "result_set.h"
RyoheiHagimoto 0:0e0631af0305 45 #include "heap.h"
RyoheiHagimoto 0:0e0631af0305 46 #include "allocator.h"
RyoheiHagimoto 0:0e0631af0305 47 #include "random.h"
RyoheiHagimoto 0:0e0631af0305 48 #include "saving.h"
RyoheiHagimoto 0:0e0631af0305 49
RyoheiHagimoto 0:0e0631af0305 50
RyoheiHagimoto 0:0e0631af0305 51 namespace cvflann
RyoheiHagimoto 0:0e0631af0305 52 {
RyoheiHagimoto 0:0e0631af0305 53
RyoheiHagimoto 0:0e0631af0305 54 struct HierarchicalClusteringIndexParams : public IndexParams
RyoheiHagimoto 0:0e0631af0305 55 {
RyoheiHagimoto 0:0e0631af0305 56 HierarchicalClusteringIndexParams(int branching = 32,
RyoheiHagimoto 0:0e0631af0305 57 flann_centers_init_t centers_init = FLANN_CENTERS_RANDOM,
RyoheiHagimoto 0:0e0631af0305 58 int trees = 4, int leaf_size = 100)
RyoheiHagimoto 0:0e0631af0305 59 {
RyoheiHagimoto 0:0e0631af0305 60 (*this)["algorithm"] = FLANN_INDEX_HIERARCHICAL;
RyoheiHagimoto 0:0e0631af0305 61 // The branching factor used in the hierarchical clustering
RyoheiHagimoto 0:0e0631af0305 62 (*this)["branching"] = branching;
RyoheiHagimoto 0:0e0631af0305 63 // Algorithm used for picking the initial cluster centers
RyoheiHagimoto 0:0e0631af0305 64 (*this)["centers_init"] = centers_init;
RyoheiHagimoto 0:0e0631af0305 65 // number of parallel trees to build
RyoheiHagimoto 0:0e0631af0305 66 (*this)["trees"] = trees;
RyoheiHagimoto 0:0e0631af0305 67 // maximum leaf size
RyoheiHagimoto 0:0e0631af0305 68 (*this)["leaf_size"] = leaf_size;
RyoheiHagimoto 0:0e0631af0305 69 }
RyoheiHagimoto 0:0e0631af0305 70 };
RyoheiHagimoto 0:0e0631af0305 71
RyoheiHagimoto 0:0e0631af0305 72
RyoheiHagimoto 0:0e0631af0305 73 /**
RyoheiHagimoto 0:0e0631af0305 74 * Hierarchical index
RyoheiHagimoto 0:0e0631af0305 75 *
RyoheiHagimoto 0:0e0631af0305 76 * Contains a tree constructed through a hierarchical clustering
RyoheiHagimoto 0:0e0631af0305 77 * and other information for indexing a set of points for nearest-neighbour matching.
RyoheiHagimoto 0:0e0631af0305 78 */
RyoheiHagimoto 0:0e0631af0305 79 template <typename Distance>
RyoheiHagimoto 0:0e0631af0305 80 class HierarchicalClusteringIndex : public NNIndex<Distance>
RyoheiHagimoto 0:0e0631af0305 81 {
RyoheiHagimoto 0:0e0631af0305 82 public:
RyoheiHagimoto 0:0e0631af0305 83 typedef typename Distance::ElementType ElementType;
RyoheiHagimoto 0:0e0631af0305 84 typedef typename Distance::ResultType DistanceType;
RyoheiHagimoto 0:0e0631af0305 85
RyoheiHagimoto 0:0e0631af0305 86 private:
RyoheiHagimoto 0:0e0631af0305 87
RyoheiHagimoto 0:0e0631af0305 88
RyoheiHagimoto 0:0e0631af0305 89 typedef void (HierarchicalClusteringIndex::* centersAlgFunction)(int, int*, int, int*, int&);
RyoheiHagimoto 0:0e0631af0305 90
RyoheiHagimoto 0:0e0631af0305 91 /**
RyoheiHagimoto 0:0e0631af0305 92 * The function used for choosing the cluster centers.
RyoheiHagimoto 0:0e0631af0305 93 */
RyoheiHagimoto 0:0e0631af0305 94 centersAlgFunction chooseCenters;
RyoheiHagimoto 0:0e0631af0305 95
RyoheiHagimoto 0:0e0631af0305 96
RyoheiHagimoto 0:0e0631af0305 97
RyoheiHagimoto 0:0e0631af0305 98 /**
RyoheiHagimoto 0:0e0631af0305 99 * Chooses the initial centers in the k-means clustering in a random manner.
RyoheiHagimoto 0:0e0631af0305 100 *
RyoheiHagimoto 0:0e0631af0305 101 * Params:
RyoheiHagimoto 0:0e0631af0305 102 * k = number of centers
RyoheiHagimoto 0:0e0631af0305 103 * vecs = the dataset of points
RyoheiHagimoto 0:0e0631af0305 104 * indices = indices in the dataset
RyoheiHagimoto 0:0e0631af0305 105 * indices_length = length of indices vector
RyoheiHagimoto 0:0e0631af0305 106 *
RyoheiHagimoto 0:0e0631af0305 107 */
RyoheiHagimoto 0:0e0631af0305 108 void chooseCentersRandom(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
RyoheiHagimoto 0:0e0631af0305 109 {
RyoheiHagimoto 0:0e0631af0305 110 UniqueRandom r(indices_length);
RyoheiHagimoto 0:0e0631af0305 111
RyoheiHagimoto 0:0e0631af0305 112 int index;
RyoheiHagimoto 0:0e0631af0305 113 for (index=0; index<k; ++index) {
RyoheiHagimoto 0:0e0631af0305 114 bool duplicate = true;
RyoheiHagimoto 0:0e0631af0305 115 int rnd;
RyoheiHagimoto 0:0e0631af0305 116 while (duplicate) {
RyoheiHagimoto 0:0e0631af0305 117 duplicate = false;
RyoheiHagimoto 0:0e0631af0305 118 rnd = r.next();
RyoheiHagimoto 0:0e0631af0305 119 if (rnd<0) {
RyoheiHagimoto 0:0e0631af0305 120 centers_length = index;
RyoheiHagimoto 0:0e0631af0305 121 return;
RyoheiHagimoto 0:0e0631af0305 122 }
RyoheiHagimoto 0:0e0631af0305 123
RyoheiHagimoto 0:0e0631af0305 124 centers[index] = dsindices[rnd];
RyoheiHagimoto 0:0e0631af0305 125
RyoheiHagimoto 0:0e0631af0305 126 for (int j=0; j<index; ++j) {
RyoheiHagimoto 0:0e0631af0305 127 DistanceType sq = distance(dataset[centers[index]], dataset[centers[j]], dataset.cols);
RyoheiHagimoto 0:0e0631af0305 128 if (sq<1e-16) {
RyoheiHagimoto 0:0e0631af0305 129 duplicate = true;
RyoheiHagimoto 0:0e0631af0305 130 }
RyoheiHagimoto 0:0e0631af0305 131 }
RyoheiHagimoto 0:0e0631af0305 132 }
RyoheiHagimoto 0:0e0631af0305 133 }
RyoheiHagimoto 0:0e0631af0305 134
RyoheiHagimoto 0:0e0631af0305 135 centers_length = index;
RyoheiHagimoto 0:0e0631af0305 136 }
RyoheiHagimoto 0:0e0631af0305 137
RyoheiHagimoto 0:0e0631af0305 138
RyoheiHagimoto 0:0e0631af0305 139 /**
RyoheiHagimoto 0:0e0631af0305 140 * Chooses the initial centers in the k-means using Gonzales' algorithm
RyoheiHagimoto 0:0e0631af0305 141 * so that the centers are spaced apart from each other.
RyoheiHagimoto 0:0e0631af0305 142 *
RyoheiHagimoto 0:0e0631af0305 143 * Params:
RyoheiHagimoto 0:0e0631af0305 144 * k = number of centers
RyoheiHagimoto 0:0e0631af0305 145 * vecs = the dataset of points
RyoheiHagimoto 0:0e0631af0305 146 * indices = indices in the dataset
RyoheiHagimoto 0:0e0631af0305 147 * Returns:
RyoheiHagimoto 0:0e0631af0305 148 */
RyoheiHagimoto 0:0e0631af0305 149 void chooseCentersGonzales(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
RyoheiHagimoto 0:0e0631af0305 150 {
RyoheiHagimoto 0:0e0631af0305 151 int n = indices_length;
RyoheiHagimoto 0:0e0631af0305 152
RyoheiHagimoto 0:0e0631af0305 153 int rnd = rand_int(n);
RyoheiHagimoto 0:0e0631af0305 154 assert(rnd >=0 && rnd < n);
RyoheiHagimoto 0:0e0631af0305 155
RyoheiHagimoto 0:0e0631af0305 156 centers[0] = dsindices[rnd];
RyoheiHagimoto 0:0e0631af0305 157
RyoheiHagimoto 0:0e0631af0305 158 int index;
RyoheiHagimoto 0:0e0631af0305 159 for (index=1; index<k; ++index) {
RyoheiHagimoto 0:0e0631af0305 160
RyoheiHagimoto 0:0e0631af0305 161 int best_index = -1;
RyoheiHagimoto 0:0e0631af0305 162 DistanceType best_val = 0;
RyoheiHagimoto 0:0e0631af0305 163 for (int j=0; j<n; ++j) {
RyoheiHagimoto 0:0e0631af0305 164 DistanceType dist = distance(dataset[centers[0]],dataset[dsindices[j]],dataset.cols);
RyoheiHagimoto 0:0e0631af0305 165 for (int i=1; i<index; ++i) {
RyoheiHagimoto 0:0e0631af0305 166 DistanceType tmp_dist = distance(dataset[centers[i]],dataset[dsindices[j]],dataset.cols);
RyoheiHagimoto 0:0e0631af0305 167 if (tmp_dist<dist) {
RyoheiHagimoto 0:0e0631af0305 168 dist = tmp_dist;
RyoheiHagimoto 0:0e0631af0305 169 }
RyoheiHagimoto 0:0e0631af0305 170 }
RyoheiHagimoto 0:0e0631af0305 171 if (dist>best_val) {
RyoheiHagimoto 0:0e0631af0305 172 best_val = dist;
RyoheiHagimoto 0:0e0631af0305 173 best_index = j;
RyoheiHagimoto 0:0e0631af0305 174 }
RyoheiHagimoto 0:0e0631af0305 175 }
RyoheiHagimoto 0:0e0631af0305 176 if (best_index!=-1) {
RyoheiHagimoto 0:0e0631af0305 177 centers[index] = dsindices[best_index];
RyoheiHagimoto 0:0e0631af0305 178 }
RyoheiHagimoto 0:0e0631af0305 179 else {
RyoheiHagimoto 0:0e0631af0305 180 break;
RyoheiHagimoto 0:0e0631af0305 181 }
RyoheiHagimoto 0:0e0631af0305 182 }
RyoheiHagimoto 0:0e0631af0305 183 centers_length = index;
RyoheiHagimoto 0:0e0631af0305 184 }
RyoheiHagimoto 0:0e0631af0305 185
RyoheiHagimoto 0:0e0631af0305 186
RyoheiHagimoto 0:0e0631af0305 187 /**
RyoheiHagimoto 0:0e0631af0305 188 * Chooses the initial centers in the k-means using the algorithm
RyoheiHagimoto 0:0e0631af0305 189 * proposed in the KMeans++ paper:
RyoheiHagimoto 0:0e0631af0305 190 * Arthur, David; Vassilvitskii, Sergei - k-means++: The Advantages of Careful Seeding
RyoheiHagimoto 0:0e0631af0305 191 *
RyoheiHagimoto 0:0e0631af0305 192 * Implementation of this function was converted from the one provided in Arthur's code.
RyoheiHagimoto 0:0e0631af0305 193 *
RyoheiHagimoto 0:0e0631af0305 194 * Params:
RyoheiHagimoto 0:0e0631af0305 195 * k = number of centers
RyoheiHagimoto 0:0e0631af0305 196 * vecs = the dataset of points
RyoheiHagimoto 0:0e0631af0305 197 * indices = indices in the dataset
RyoheiHagimoto 0:0e0631af0305 198 * Returns:
RyoheiHagimoto 0:0e0631af0305 199 */
RyoheiHagimoto 0:0e0631af0305 200 void chooseCentersKMeanspp(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
RyoheiHagimoto 0:0e0631af0305 201 {
RyoheiHagimoto 0:0e0631af0305 202 int n = indices_length;
RyoheiHagimoto 0:0e0631af0305 203
RyoheiHagimoto 0:0e0631af0305 204 double currentPot = 0;
RyoheiHagimoto 0:0e0631af0305 205 DistanceType* closestDistSq = new DistanceType[n];
RyoheiHagimoto 0:0e0631af0305 206
RyoheiHagimoto 0:0e0631af0305 207 // Choose one random center and set the closestDistSq values
RyoheiHagimoto 0:0e0631af0305 208 int index = rand_int(n);
RyoheiHagimoto 0:0e0631af0305 209 assert(index >=0 && index < n);
RyoheiHagimoto 0:0e0631af0305 210 centers[0] = dsindices[index];
RyoheiHagimoto 0:0e0631af0305 211
RyoheiHagimoto 0:0e0631af0305 212 // Computing distance^2 will have the advantage of even higher probability further to pick new centers
RyoheiHagimoto 0:0e0631af0305 213 // far from previous centers (and this complies to "k-means++: the advantages of careful seeding" article)
RyoheiHagimoto 0:0e0631af0305 214 for (int i = 0; i < n; i++) {
RyoheiHagimoto 0:0e0631af0305 215 closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
RyoheiHagimoto 0:0e0631af0305 216 closestDistSq[i] = ensureSquareDistance<Distance>( closestDistSq[i] );
RyoheiHagimoto 0:0e0631af0305 217 currentPot += closestDistSq[i];
RyoheiHagimoto 0:0e0631af0305 218 }
RyoheiHagimoto 0:0e0631af0305 219
RyoheiHagimoto 0:0e0631af0305 220
RyoheiHagimoto 0:0e0631af0305 221 const int numLocalTries = 1;
RyoheiHagimoto 0:0e0631af0305 222
RyoheiHagimoto 0:0e0631af0305 223 // Choose each center
RyoheiHagimoto 0:0e0631af0305 224 int centerCount;
RyoheiHagimoto 0:0e0631af0305 225 for (centerCount = 1; centerCount < k; centerCount++) {
RyoheiHagimoto 0:0e0631af0305 226
RyoheiHagimoto 0:0e0631af0305 227 // Repeat several trials
RyoheiHagimoto 0:0e0631af0305 228 double bestNewPot = -1;
RyoheiHagimoto 0:0e0631af0305 229 int bestNewIndex = 0;
RyoheiHagimoto 0:0e0631af0305 230 for (int localTrial = 0; localTrial < numLocalTries; localTrial++) {
RyoheiHagimoto 0:0e0631af0305 231
RyoheiHagimoto 0:0e0631af0305 232 // Choose our center - have to be slightly careful to return a valid answer even accounting
RyoheiHagimoto 0:0e0631af0305 233 // for possible rounding errors
RyoheiHagimoto 0:0e0631af0305 234 double randVal = rand_double(currentPot);
RyoheiHagimoto 0:0e0631af0305 235 for (index = 0; index < n-1; index++) {
RyoheiHagimoto 0:0e0631af0305 236 if (randVal <= closestDistSq[index]) break;
RyoheiHagimoto 0:0e0631af0305 237 else randVal -= closestDistSq[index];
RyoheiHagimoto 0:0e0631af0305 238 }
RyoheiHagimoto 0:0e0631af0305 239
RyoheiHagimoto 0:0e0631af0305 240 // Compute the new potential
RyoheiHagimoto 0:0e0631af0305 241 double newPot = 0;
RyoheiHagimoto 0:0e0631af0305 242 for (int i = 0; i < n; i++) {
RyoheiHagimoto 0:0e0631af0305 243 DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
RyoheiHagimoto 0:0e0631af0305 244 newPot += std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
RyoheiHagimoto 0:0e0631af0305 245 }
RyoheiHagimoto 0:0e0631af0305 246
RyoheiHagimoto 0:0e0631af0305 247 // Store the best result
RyoheiHagimoto 0:0e0631af0305 248 if ((bestNewPot < 0)||(newPot < bestNewPot)) {
RyoheiHagimoto 0:0e0631af0305 249 bestNewPot = newPot;
RyoheiHagimoto 0:0e0631af0305 250 bestNewIndex = index;
RyoheiHagimoto 0:0e0631af0305 251 }
RyoheiHagimoto 0:0e0631af0305 252 }
RyoheiHagimoto 0:0e0631af0305 253
RyoheiHagimoto 0:0e0631af0305 254 // Add the appropriate center
RyoheiHagimoto 0:0e0631af0305 255 centers[centerCount] = dsindices[bestNewIndex];
RyoheiHagimoto 0:0e0631af0305 256 currentPot = bestNewPot;
RyoheiHagimoto 0:0e0631af0305 257 for (int i = 0; i < n; i++) {
RyoheiHagimoto 0:0e0631af0305 258 DistanceType dist = distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols);
RyoheiHagimoto 0:0e0631af0305 259 closestDistSq[i] = std::min( ensureSquareDistance<Distance>(dist), closestDistSq[i] );
RyoheiHagimoto 0:0e0631af0305 260 }
RyoheiHagimoto 0:0e0631af0305 261 }
RyoheiHagimoto 0:0e0631af0305 262
RyoheiHagimoto 0:0e0631af0305 263 centers_length = centerCount;
RyoheiHagimoto 0:0e0631af0305 264
RyoheiHagimoto 0:0e0631af0305 265 delete[] closestDistSq;
RyoheiHagimoto 0:0e0631af0305 266 }
RyoheiHagimoto 0:0e0631af0305 267
RyoheiHagimoto 0:0e0631af0305 268
RyoheiHagimoto 0:0e0631af0305 269 /**
RyoheiHagimoto 0:0e0631af0305 270 * Chooses the initial centers in a way inspired by Gonzales (by Pierre-Emmanuel Viel):
RyoheiHagimoto 0:0e0631af0305 271 * select the first point of the list as a candidate, then parse the points list. If another
RyoheiHagimoto 0:0e0631af0305 272 * point is further than current candidate from the other centers, test if it is a good center
RyoheiHagimoto 0:0e0631af0305 273 * of a local aggregation. If it is, replace current candidate by this point. And so on...
RyoheiHagimoto 0:0e0631af0305 274 *
RyoheiHagimoto 0:0e0631af0305 275 * Used with KMeansIndex that computes centers coordinates by averaging positions of clusters points,
RyoheiHagimoto 0:0e0631af0305 276 * this doesn't make a real difference with previous methods. But used with HierarchicalClusteringIndex
RyoheiHagimoto 0:0e0631af0305 277 * class that pick centers among existing points instead of computing the barycenters, there is a real
RyoheiHagimoto 0:0e0631af0305 278 * improvement.
RyoheiHagimoto 0:0e0631af0305 279 *
RyoheiHagimoto 0:0e0631af0305 280 * Params:
RyoheiHagimoto 0:0e0631af0305 281 * k = number of centers
RyoheiHagimoto 0:0e0631af0305 282 * vecs = the dataset of points
RyoheiHagimoto 0:0e0631af0305 283 * indices = indices in the dataset
RyoheiHagimoto 0:0e0631af0305 284 * Returns:
RyoheiHagimoto 0:0e0631af0305 285 */
RyoheiHagimoto 0:0e0631af0305 286 void GroupWiseCenterChooser(int k, int* dsindices, int indices_length, int* centers, int& centers_length)
RyoheiHagimoto 0:0e0631af0305 287 {
RyoheiHagimoto 0:0e0631af0305 288 const float kSpeedUpFactor = 1.3f;
RyoheiHagimoto 0:0e0631af0305 289
RyoheiHagimoto 0:0e0631af0305 290 int n = indices_length;
RyoheiHagimoto 0:0e0631af0305 291
RyoheiHagimoto 0:0e0631af0305 292 DistanceType* closestDistSq = new DistanceType[n];
RyoheiHagimoto 0:0e0631af0305 293
RyoheiHagimoto 0:0e0631af0305 294 // Choose one random center and set the closestDistSq values
RyoheiHagimoto 0:0e0631af0305 295 int index = rand_int(n);
RyoheiHagimoto 0:0e0631af0305 296 assert(index >=0 && index < n);
RyoheiHagimoto 0:0e0631af0305 297 centers[0] = dsindices[index];
RyoheiHagimoto 0:0e0631af0305 298
RyoheiHagimoto 0:0e0631af0305 299 for (int i = 0; i < n; i++) {
RyoheiHagimoto 0:0e0631af0305 300 closestDistSq[i] = distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols);
RyoheiHagimoto 0:0e0631af0305 301 }
RyoheiHagimoto 0:0e0631af0305 302
RyoheiHagimoto 0:0e0631af0305 303
RyoheiHagimoto 0:0e0631af0305 304 // Choose each center
RyoheiHagimoto 0:0e0631af0305 305 int centerCount;
RyoheiHagimoto 0:0e0631af0305 306 for (centerCount = 1; centerCount < k; centerCount++) {
RyoheiHagimoto 0:0e0631af0305 307
RyoheiHagimoto 0:0e0631af0305 308 // Repeat several trials
RyoheiHagimoto 0:0e0631af0305 309 double bestNewPot = -1;
RyoheiHagimoto 0:0e0631af0305 310 int bestNewIndex = 0;
RyoheiHagimoto 0:0e0631af0305 311 DistanceType furthest = 0;
RyoheiHagimoto 0:0e0631af0305 312 for (index = 0; index < n; index++) {
RyoheiHagimoto 0:0e0631af0305 313
RyoheiHagimoto 0:0e0631af0305 314 // We will test only the potential of the points further than current candidate
RyoheiHagimoto 0:0e0631af0305 315 if( closestDistSq[index] > kSpeedUpFactor * (float)furthest ) {
RyoheiHagimoto 0:0e0631af0305 316
RyoheiHagimoto 0:0e0631af0305 317 // Compute the new potential
RyoheiHagimoto 0:0e0631af0305 318 double newPot = 0;
RyoheiHagimoto 0:0e0631af0305 319 for (int i = 0; i < n; i++) {
RyoheiHagimoto 0:0e0631af0305 320 newPot += std::min( distance(dataset[dsindices[i]], dataset[dsindices[index]], dataset.cols)
RyoheiHagimoto 0:0e0631af0305 321 , closestDistSq[i] );
RyoheiHagimoto 0:0e0631af0305 322 }
RyoheiHagimoto 0:0e0631af0305 323
RyoheiHagimoto 0:0e0631af0305 324 // Store the best result
RyoheiHagimoto 0:0e0631af0305 325 if ((bestNewPot < 0)||(newPot <= bestNewPot)) {
RyoheiHagimoto 0:0e0631af0305 326 bestNewPot = newPot;
RyoheiHagimoto 0:0e0631af0305 327 bestNewIndex = index;
RyoheiHagimoto 0:0e0631af0305 328 furthest = closestDistSq[index];
RyoheiHagimoto 0:0e0631af0305 329 }
RyoheiHagimoto 0:0e0631af0305 330 }
RyoheiHagimoto 0:0e0631af0305 331 }
RyoheiHagimoto 0:0e0631af0305 332
RyoheiHagimoto 0:0e0631af0305 333 // Add the appropriate center
RyoheiHagimoto 0:0e0631af0305 334 centers[centerCount] = dsindices[bestNewIndex];
RyoheiHagimoto 0:0e0631af0305 335 for (int i = 0; i < n; i++) {
RyoheiHagimoto 0:0e0631af0305 336 closestDistSq[i] = std::min( distance(dataset[dsindices[i]], dataset[dsindices[bestNewIndex]], dataset.cols)
RyoheiHagimoto 0:0e0631af0305 337 , closestDistSq[i] );
RyoheiHagimoto 0:0e0631af0305 338 }
RyoheiHagimoto 0:0e0631af0305 339 }
RyoheiHagimoto 0:0e0631af0305 340
RyoheiHagimoto 0:0e0631af0305 341 centers_length = centerCount;
RyoheiHagimoto 0:0e0631af0305 342
RyoheiHagimoto 0:0e0631af0305 343 delete[] closestDistSq;
RyoheiHagimoto 0:0e0631af0305 344 }
RyoheiHagimoto 0:0e0631af0305 345
RyoheiHagimoto 0:0e0631af0305 346
RyoheiHagimoto 0:0e0631af0305 347 public:
RyoheiHagimoto 0:0e0631af0305 348
RyoheiHagimoto 0:0e0631af0305 349
RyoheiHagimoto 0:0e0631af0305 350 /**
RyoheiHagimoto 0:0e0631af0305 351 * Index constructor
RyoheiHagimoto 0:0e0631af0305 352 *
RyoheiHagimoto 0:0e0631af0305 353 * Params:
RyoheiHagimoto 0:0e0631af0305 354 * inputData = dataset with the input features
RyoheiHagimoto 0:0e0631af0305 355 * params = parameters passed to the hierarchical k-means algorithm
RyoheiHagimoto 0:0e0631af0305 356 */
RyoheiHagimoto 0:0e0631af0305 357 HierarchicalClusteringIndex(const Matrix<ElementType>& inputData, const IndexParams& index_params = HierarchicalClusteringIndexParams(),
RyoheiHagimoto 0:0e0631af0305 358 Distance d = Distance())
RyoheiHagimoto 0:0e0631af0305 359 : dataset(inputData), params(index_params), root(NULL), indices(NULL), distance(d)
RyoheiHagimoto 0:0e0631af0305 360 {
RyoheiHagimoto 0:0e0631af0305 361 memoryCounter = 0;
RyoheiHagimoto 0:0e0631af0305 362
RyoheiHagimoto 0:0e0631af0305 363 size_ = dataset.rows;
RyoheiHagimoto 0:0e0631af0305 364 veclen_ = dataset.cols;
RyoheiHagimoto 0:0e0631af0305 365
RyoheiHagimoto 0:0e0631af0305 366 branching_ = get_param(params,"branching",32);
RyoheiHagimoto 0:0e0631af0305 367 centers_init_ = get_param(params,"centers_init", FLANN_CENTERS_RANDOM);
RyoheiHagimoto 0:0e0631af0305 368 trees_ = get_param(params,"trees",4);
RyoheiHagimoto 0:0e0631af0305 369 leaf_size_ = get_param(params,"leaf_size",100);
RyoheiHagimoto 0:0e0631af0305 370
RyoheiHagimoto 0:0e0631af0305 371 if (centers_init_==FLANN_CENTERS_RANDOM) {
RyoheiHagimoto 0:0e0631af0305 372 chooseCenters = &HierarchicalClusteringIndex::chooseCentersRandom;
RyoheiHagimoto 0:0e0631af0305 373 }
RyoheiHagimoto 0:0e0631af0305 374 else if (centers_init_==FLANN_CENTERS_GONZALES) {
RyoheiHagimoto 0:0e0631af0305 375 chooseCenters = &HierarchicalClusteringIndex::chooseCentersGonzales;
RyoheiHagimoto 0:0e0631af0305 376 }
RyoheiHagimoto 0:0e0631af0305 377 else if (centers_init_==FLANN_CENTERS_KMEANSPP) {
RyoheiHagimoto 0:0e0631af0305 378 chooseCenters = &HierarchicalClusteringIndex::chooseCentersKMeanspp;
RyoheiHagimoto 0:0e0631af0305 379 }
RyoheiHagimoto 0:0e0631af0305 380 else if (centers_init_==FLANN_CENTERS_GROUPWISE) {
RyoheiHagimoto 0:0e0631af0305 381 chooseCenters = &HierarchicalClusteringIndex::GroupWiseCenterChooser;
RyoheiHagimoto 0:0e0631af0305 382 }
RyoheiHagimoto 0:0e0631af0305 383 else {
RyoheiHagimoto 0:0e0631af0305 384 throw FLANNException("Unknown algorithm for choosing initial centers.");
RyoheiHagimoto 0:0e0631af0305 385 }
RyoheiHagimoto 0:0e0631af0305 386
RyoheiHagimoto 0:0e0631af0305 387 trees_ = get_param(params,"trees",4);
RyoheiHagimoto 0:0e0631af0305 388 root = new NodePtr[trees_];
RyoheiHagimoto 0:0e0631af0305 389 indices = new int*[trees_];
RyoheiHagimoto 0:0e0631af0305 390
RyoheiHagimoto 0:0e0631af0305 391 for (int i=0; i<trees_; ++i) {
RyoheiHagimoto 0:0e0631af0305 392 root[i] = NULL;
RyoheiHagimoto 0:0e0631af0305 393 indices[i] = NULL;
RyoheiHagimoto 0:0e0631af0305 394 }
RyoheiHagimoto 0:0e0631af0305 395 }
RyoheiHagimoto 0:0e0631af0305 396
RyoheiHagimoto 0:0e0631af0305 397 HierarchicalClusteringIndex(const HierarchicalClusteringIndex&);
RyoheiHagimoto 0:0e0631af0305 398 HierarchicalClusteringIndex& operator=(const HierarchicalClusteringIndex&);
RyoheiHagimoto 0:0e0631af0305 399
RyoheiHagimoto 0:0e0631af0305 400 /**
RyoheiHagimoto 0:0e0631af0305 401 * Index destructor.
RyoheiHagimoto 0:0e0631af0305 402 *
RyoheiHagimoto 0:0e0631af0305 403 * Release the memory used by the index.
RyoheiHagimoto 0:0e0631af0305 404 */
RyoheiHagimoto 0:0e0631af0305 405 virtual ~HierarchicalClusteringIndex()
RyoheiHagimoto 0:0e0631af0305 406 {
RyoheiHagimoto 0:0e0631af0305 407 free_elements();
RyoheiHagimoto 0:0e0631af0305 408
RyoheiHagimoto 0:0e0631af0305 409 if (root!=NULL) {
RyoheiHagimoto 0:0e0631af0305 410 delete[] root;
RyoheiHagimoto 0:0e0631af0305 411 }
RyoheiHagimoto 0:0e0631af0305 412
RyoheiHagimoto 0:0e0631af0305 413 if (indices!=NULL) {
RyoheiHagimoto 0:0e0631af0305 414 delete[] indices;
RyoheiHagimoto 0:0e0631af0305 415 }
RyoheiHagimoto 0:0e0631af0305 416 }
RyoheiHagimoto 0:0e0631af0305 417
RyoheiHagimoto 0:0e0631af0305 418
RyoheiHagimoto 0:0e0631af0305 419 /**
RyoheiHagimoto 0:0e0631af0305 420 * Release the inner elements of indices[]
RyoheiHagimoto 0:0e0631af0305 421 */
RyoheiHagimoto 0:0e0631af0305 422 void free_elements()
RyoheiHagimoto 0:0e0631af0305 423 {
RyoheiHagimoto 0:0e0631af0305 424 if (indices!=NULL) {
RyoheiHagimoto 0:0e0631af0305 425 for(int i=0; i<trees_; ++i) {
RyoheiHagimoto 0:0e0631af0305 426 if (indices[i]!=NULL) {
RyoheiHagimoto 0:0e0631af0305 427 delete[] indices[i];
RyoheiHagimoto 0:0e0631af0305 428 indices[i] = NULL;
RyoheiHagimoto 0:0e0631af0305 429 }
RyoheiHagimoto 0:0e0631af0305 430 }
RyoheiHagimoto 0:0e0631af0305 431 }
RyoheiHagimoto 0:0e0631af0305 432 }
RyoheiHagimoto 0:0e0631af0305 433
RyoheiHagimoto 0:0e0631af0305 434
RyoheiHagimoto 0:0e0631af0305 435 /**
RyoheiHagimoto 0:0e0631af0305 436 * Returns size of index.
RyoheiHagimoto 0:0e0631af0305 437 */
RyoheiHagimoto 0:0e0631af0305 438 size_t size() const
RyoheiHagimoto 0:0e0631af0305 439 {
RyoheiHagimoto 0:0e0631af0305 440 return size_;
RyoheiHagimoto 0:0e0631af0305 441 }
RyoheiHagimoto 0:0e0631af0305 442
RyoheiHagimoto 0:0e0631af0305 443 /**
RyoheiHagimoto 0:0e0631af0305 444 * Returns the length of an index feature.
RyoheiHagimoto 0:0e0631af0305 445 */
RyoheiHagimoto 0:0e0631af0305 446 size_t veclen() const
RyoheiHagimoto 0:0e0631af0305 447 {
RyoheiHagimoto 0:0e0631af0305 448 return veclen_;
RyoheiHagimoto 0:0e0631af0305 449 }
RyoheiHagimoto 0:0e0631af0305 450
RyoheiHagimoto 0:0e0631af0305 451
RyoheiHagimoto 0:0e0631af0305 452 /**
RyoheiHagimoto 0:0e0631af0305 453 * Computes the inde memory usage
RyoheiHagimoto 0:0e0631af0305 454 * Returns: memory used by the index
RyoheiHagimoto 0:0e0631af0305 455 */
RyoheiHagimoto 0:0e0631af0305 456 int usedMemory() const
RyoheiHagimoto 0:0e0631af0305 457 {
RyoheiHagimoto 0:0e0631af0305 458 return pool.usedMemory+pool.wastedMemory+memoryCounter;
RyoheiHagimoto 0:0e0631af0305 459 }
RyoheiHagimoto 0:0e0631af0305 460
RyoheiHagimoto 0:0e0631af0305 461 /**
RyoheiHagimoto 0:0e0631af0305 462 * Builds the index
RyoheiHagimoto 0:0e0631af0305 463 */
RyoheiHagimoto 0:0e0631af0305 464 void buildIndex()
RyoheiHagimoto 0:0e0631af0305 465 {
RyoheiHagimoto 0:0e0631af0305 466 if (branching_<2) {
RyoheiHagimoto 0:0e0631af0305 467 throw FLANNException("Branching factor must be at least 2");
RyoheiHagimoto 0:0e0631af0305 468 }
RyoheiHagimoto 0:0e0631af0305 469
RyoheiHagimoto 0:0e0631af0305 470 free_elements();
RyoheiHagimoto 0:0e0631af0305 471
RyoheiHagimoto 0:0e0631af0305 472 for (int i=0; i<trees_; ++i) {
RyoheiHagimoto 0:0e0631af0305 473 indices[i] = new int[size_];
RyoheiHagimoto 0:0e0631af0305 474 for (size_t j=0; j<size_; ++j) {
RyoheiHagimoto 0:0e0631af0305 475 indices[i][j] = (int)j;
RyoheiHagimoto 0:0e0631af0305 476 }
RyoheiHagimoto 0:0e0631af0305 477 root[i] = pool.allocate<Node>();
RyoheiHagimoto 0:0e0631af0305 478 computeClustering(root[i], indices[i], (int)size_, branching_,0);
RyoheiHagimoto 0:0e0631af0305 479 }
RyoheiHagimoto 0:0e0631af0305 480 }
RyoheiHagimoto 0:0e0631af0305 481
RyoheiHagimoto 0:0e0631af0305 482
RyoheiHagimoto 0:0e0631af0305 483 flann_algorithm_t getType() const
RyoheiHagimoto 0:0e0631af0305 484 {
RyoheiHagimoto 0:0e0631af0305 485 return FLANN_INDEX_HIERARCHICAL;
RyoheiHagimoto 0:0e0631af0305 486 }
RyoheiHagimoto 0:0e0631af0305 487
RyoheiHagimoto 0:0e0631af0305 488
RyoheiHagimoto 0:0e0631af0305 489 void saveIndex(FILE* stream)
RyoheiHagimoto 0:0e0631af0305 490 {
RyoheiHagimoto 0:0e0631af0305 491 save_value(stream, branching_);
RyoheiHagimoto 0:0e0631af0305 492 save_value(stream, trees_);
RyoheiHagimoto 0:0e0631af0305 493 save_value(stream, centers_init_);
RyoheiHagimoto 0:0e0631af0305 494 save_value(stream, leaf_size_);
RyoheiHagimoto 0:0e0631af0305 495 save_value(stream, memoryCounter);
RyoheiHagimoto 0:0e0631af0305 496 for (int i=0; i<trees_; ++i) {
RyoheiHagimoto 0:0e0631af0305 497 save_value(stream, *indices[i], size_);
RyoheiHagimoto 0:0e0631af0305 498 save_tree(stream, root[i], i);
RyoheiHagimoto 0:0e0631af0305 499 }
RyoheiHagimoto 0:0e0631af0305 500
RyoheiHagimoto 0:0e0631af0305 501 }
RyoheiHagimoto 0:0e0631af0305 502
RyoheiHagimoto 0:0e0631af0305 503
RyoheiHagimoto 0:0e0631af0305 504 void loadIndex(FILE* stream)
RyoheiHagimoto 0:0e0631af0305 505 {
RyoheiHagimoto 0:0e0631af0305 506 free_elements();
RyoheiHagimoto 0:0e0631af0305 507
RyoheiHagimoto 0:0e0631af0305 508 if (root!=NULL) {
RyoheiHagimoto 0:0e0631af0305 509 delete[] root;
RyoheiHagimoto 0:0e0631af0305 510 }
RyoheiHagimoto 0:0e0631af0305 511
RyoheiHagimoto 0:0e0631af0305 512 if (indices!=NULL) {
RyoheiHagimoto 0:0e0631af0305 513 delete[] indices;
RyoheiHagimoto 0:0e0631af0305 514 }
RyoheiHagimoto 0:0e0631af0305 515
RyoheiHagimoto 0:0e0631af0305 516 load_value(stream, branching_);
RyoheiHagimoto 0:0e0631af0305 517 load_value(stream, trees_);
RyoheiHagimoto 0:0e0631af0305 518 load_value(stream, centers_init_);
RyoheiHagimoto 0:0e0631af0305 519 load_value(stream, leaf_size_);
RyoheiHagimoto 0:0e0631af0305 520 load_value(stream, memoryCounter);
RyoheiHagimoto 0:0e0631af0305 521
RyoheiHagimoto 0:0e0631af0305 522 indices = new int*[trees_];
RyoheiHagimoto 0:0e0631af0305 523 root = new NodePtr[trees_];
RyoheiHagimoto 0:0e0631af0305 524 for (int i=0; i<trees_; ++i) {
RyoheiHagimoto 0:0e0631af0305 525 indices[i] = new int[size_];
RyoheiHagimoto 0:0e0631af0305 526 load_value(stream, *indices[i], size_);
RyoheiHagimoto 0:0e0631af0305 527 load_tree(stream, root[i], i);
RyoheiHagimoto 0:0e0631af0305 528 }
RyoheiHagimoto 0:0e0631af0305 529
RyoheiHagimoto 0:0e0631af0305 530 params["algorithm"] = getType();
RyoheiHagimoto 0:0e0631af0305 531 params["branching"] = branching_;
RyoheiHagimoto 0:0e0631af0305 532 params["trees"] = trees_;
RyoheiHagimoto 0:0e0631af0305 533 params["centers_init"] = centers_init_;
RyoheiHagimoto 0:0e0631af0305 534 params["leaf_size"] = leaf_size_;
RyoheiHagimoto 0:0e0631af0305 535 }
RyoheiHagimoto 0:0e0631af0305 536
RyoheiHagimoto 0:0e0631af0305 537
RyoheiHagimoto 0:0e0631af0305 538 /**
RyoheiHagimoto 0:0e0631af0305 539 * Find set of nearest neighbors to vec. Their indices are stored inside
RyoheiHagimoto 0:0e0631af0305 540 * the result object.
RyoheiHagimoto 0:0e0631af0305 541 *
RyoheiHagimoto 0:0e0631af0305 542 * Params:
RyoheiHagimoto 0:0e0631af0305 543 * result = the result object in which the indices of the nearest-neighbors are stored
RyoheiHagimoto 0:0e0631af0305 544 * vec = the vector for which to search the nearest neighbors
RyoheiHagimoto 0:0e0631af0305 545 * searchParams = parameters that influence the search algorithm (checks)
RyoheiHagimoto 0:0e0631af0305 546 */
RyoheiHagimoto 0:0e0631af0305 547 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& searchParams)
RyoheiHagimoto 0:0e0631af0305 548 {
RyoheiHagimoto 0:0e0631af0305 549
RyoheiHagimoto 0:0e0631af0305 550 int maxChecks = get_param(searchParams,"checks",32);
RyoheiHagimoto 0:0e0631af0305 551
RyoheiHagimoto 0:0e0631af0305 552 // Priority queue storing intermediate branches in the best-bin-first search
RyoheiHagimoto 0:0e0631af0305 553 Heap<BranchSt>* heap = new Heap<BranchSt>((int)size_);
RyoheiHagimoto 0:0e0631af0305 554
RyoheiHagimoto 0:0e0631af0305 555 std::vector<bool> checked(size_,false);
RyoheiHagimoto 0:0e0631af0305 556 int checks = 0;
RyoheiHagimoto 0:0e0631af0305 557 for (int i=0; i<trees_; ++i) {
RyoheiHagimoto 0:0e0631af0305 558 findNN(root[i], result, vec, checks, maxChecks, heap, checked);
RyoheiHagimoto 0:0e0631af0305 559 }
RyoheiHagimoto 0:0e0631af0305 560
RyoheiHagimoto 0:0e0631af0305 561 BranchSt branch;
RyoheiHagimoto 0:0e0631af0305 562 while (heap->popMin(branch) && (checks<maxChecks || !result.full())) {
RyoheiHagimoto 0:0e0631af0305 563 NodePtr node = branch.node;
RyoheiHagimoto 0:0e0631af0305 564 findNN(node, result, vec, checks, maxChecks, heap, checked);
RyoheiHagimoto 0:0e0631af0305 565 }
RyoheiHagimoto 0:0e0631af0305 566 assert(result.full());
RyoheiHagimoto 0:0e0631af0305 567
RyoheiHagimoto 0:0e0631af0305 568 delete heap;
RyoheiHagimoto 0:0e0631af0305 569
RyoheiHagimoto 0:0e0631af0305 570 }
RyoheiHagimoto 0:0e0631af0305 571
RyoheiHagimoto 0:0e0631af0305 572 IndexParams getParameters() const
RyoheiHagimoto 0:0e0631af0305 573 {
RyoheiHagimoto 0:0e0631af0305 574 return params;
RyoheiHagimoto 0:0e0631af0305 575 }
RyoheiHagimoto 0:0e0631af0305 576
RyoheiHagimoto 0:0e0631af0305 577
RyoheiHagimoto 0:0e0631af0305 578 private:
RyoheiHagimoto 0:0e0631af0305 579
RyoheiHagimoto 0:0e0631af0305 580 /**
RyoheiHagimoto 0:0e0631af0305 581 * Struture representing a node in the hierarchical k-means tree.
RyoheiHagimoto 0:0e0631af0305 582 */
RyoheiHagimoto 0:0e0631af0305 583 struct Node
RyoheiHagimoto 0:0e0631af0305 584 {
RyoheiHagimoto 0:0e0631af0305 585 /**
RyoheiHagimoto 0:0e0631af0305 586 * The cluster center index
RyoheiHagimoto 0:0e0631af0305 587 */
RyoheiHagimoto 0:0e0631af0305 588 int pivot;
RyoheiHagimoto 0:0e0631af0305 589 /**
RyoheiHagimoto 0:0e0631af0305 590 * The cluster size (number of points in the cluster)
RyoheiHagimoto 0:0e0631af0305 591 */
RyoheiHagimoto 0:0e0631af0305 592 int size;
RyoheiHagimoto 0:0e0631af0305 593 /**
RyoheiHagimoto 0:0e0631af0305 594 * Child nodes (only for non-terminal nodes)
RyoheiHagimoto 0:0e0631af0305 595 */
RyoheiHagimoto 0:0e0631af0305 596 Node** childs;
RyoheiHagimoto 0:0e0631af0305 597 /**
RyoheiHagimoto 0:0e0631af0305 598 * Node points (only for terminal nodes)
RyoheiHagimoto 0:0e0631af0305 599 */
RyoheiHagimoto 0:0e0631af0305 600 int* indices;
RyoheiHagimoto 0:0e0631af0305 601 /**
RyoheiHagimoto 0:0e0631af0305 602 * Level
RyoheiHagimoto 0:0e0631af0305 603 */
RyoheiHagimoto 0:0e0631af0305 604 int level;
RyoheiHagimoto 0:0e0631af0305 605 };
RyoheiHagimoto 0:0e0631af0305 606 typedef Node* NodePtr;
RyoheiHagimoto 0:0e0631af0305 607
RyoheiHagimoto 0:0e0631af0305 608
RyoheiHagimoto 0:0e0631af0305 609
RyoheiHagimoto 0:0e0631af0305 610 /**
RyoheiHagimoto 0:0e0631af0305 611 * Alias definition for a nicer syntax.
RyoheiHagimoto 0:0e0631af0305 612 */
RyoheiHagimoto 0:0e0631af0305 613 typedef BranchStruct<NodePtr, DistanceType> BranchSt;
RyoheiHagimoto 0:0e0631af0305 614
RyoheiHagimoto 0:0e0631af0305 615
RyoheiHagimoto 0:0e0631af0305 616
RyoheiHagimoto 0:0e0631af0305 617 void save_tree(FILE* stream, NodePtr node, int num)
RyoheiHagimoto 0:0e0631af0305 618 {
RyoheiHagimoto 0:0e0631af0305 619 save_value(stream, *node);
RyoheiHagimoto 0:0e0631af0305 620 if (node->childs==NULL) {
RyoheiHagimoto 0:0e0631af0305 621 int indices_offset = (int)(node->indices - indices[num]);
RyoheiHagimoto 0:0e0631af0305 622 save_value(stream, indices_offset);
RyoheiHagimoto 0:0e0631af0305 623 }
RyoheiHagimoto 0:0e0631af0305 624 else {
RyoheiHagimoto 0:0e0631af0305 625 for(int i=0; i<branching_; ++i) {
RyoheiHagimoto 0:0e0631af0305 626 save_tree(stream, node->childs[i], num);
RyoheiHagimoto 0:0e0631af0305 627 }
RyoheiHagimoto 0:0e0631af0305 628 }
RyoheiHagimoto 0:0e0631af0305 629 }
RyoheiHagimoto 0:0e0631af0305 630
RyoheiHagimoto 0:0e0631af0305 631
RyoheiHagimoto 0:0e0631af0305 632 void load_tree(FILE* stream, NodePtr& node, int num)
RyoheiHagimoto 0:0e0631af0305 633 {
RyoheiHagimoto 0:0e0631af0305 634 node = pool.allocate<Node>();
RyoheiHagimoto 0:0e0631af0305 635 load_value(stream, *node);
RyoheiHagimoto 0:0e0631af0305 636 if (node->childs==NULL) {
RyoheiHagimoto 0:0e0631af0305 637 int indices_offset;
RyoheiHagimoto 0:0e0631af0305 638 load_value(stream, indices_offset);
RyoheiHagimoto 0:0e0631af0305 639 node->indices = indices[num] + indices_offset;
RyoheiHagimoto 0:0e0631af0305 640 }
RyoheiHagimoto 0:0e0631af0305 641 else {
RyoheiHagimoto 0:0e0631af0305 642 node->childs = pool.allocate<NodePtr>(branching_);
RyoheiHagimoto 0:0e0631af0305 643 for(int i=0; i<branching_; ++i) {
RyoheiHagimoto 0:0e0631af0305 644 load_tree(stream, node->childs[i], num);
RyoheiHagimoto 0:0e0631af0305 645 }
RyoheiHagimoto 0:0e0631af0305 646 }
RyoheiHagimoto 0:0e0631af0305 647 }
RyoheiHagimoto 0:0e0631af0305 648
RyoheiHagimoto 0:0e0631af0305 649
RyoheiHagimoto 0:0e0631af0305 650
RyoheiHagimoto 0:0e0631af0305 651
RyoheiHagimoto 0:0e0631af0305 652 void computeLabels(int* dsindices, int indices_length, int* centers, int centers_length, int* labels, DistanceType& cost)
RyoheiHagimoto 0:0e0631af0305 653 {
RyoheiHagimoto 0:0e0631af0305 654 cost = 0;
RyoheiHagimoto 0:0e0631af0305 655 for (int i=0; i<indices_length; ++i) {
RyoheiHagimoto 0:0e0631af0305 656 ElementType* point = dataset[dsindices[i]];
RyoheiHagimoto 0:0e0631af0305 657 DistanceType dist = distance(point, dataset[centers[0]], veclen_);
RyoheiHagimoto 0:0e0631af0305 658 labels[i] = 0;
RyoheiHagimoto 0:0e0631af0305 659 for (int j=1; j<centers_length; ++j) {
RyoheiHagimoto 0:0e0631af0305 660 DistanceType new_dist = distance(point, dataset[centers[j]], veclen_);
RyoheiHagimoto 0:0e0631af0305 661 if (dist>new_dist) {
RyoheiHagimoto 0:0e0631af0305 662 labels[i] = j;
RyoheiHagimoto 0:0e0631af0305 663 dist = new_dist;
RyoheiHagimoto 0:0e0631af0305 664 }
RyoheiHagimoto 0:0e0631af0305 665 }
RyoheiHagimoto 0:0e0631af0305 666 cost += dist;
RyoheiHagimoto 0:0e0631af0305 667 }
RyoheiHagimoto 0:0e0631af0305 668 }
RyoheiHagimoto 0:0e0631af0305 669
RyoheiHagimoto 0:0e0631af0305 670 /**
RyoheiHagimoto 0:0e0631af0305 671 * The method responsible with actually doing the recursive hierarchical
RyoheiHagimoto 0:0e0631af0305 672 * clustering
RyoheiHagimoto 0:0e0631af0305 673 *
RyoheiHagimoto 0:0e0631af0305 674 * Params:
RyoheiHagimoto 0:0e0631af0305 675 * node = the node to cluster
RyoheiHagimoto 0:0e0631af0305 676 * indices = indices of the points belonging to the current node
RyoheiHagimoto 0:0e0631af0305 677 * branching = the branching factor to use in the clustering
RyoheiHagimoto 0:0e0631af0305 678 *
RyoheiHagimoto 0:0e0631af0305 679 * TODO: for 1-sized clusters don't store a cluster center (it's the same as the single cluster point)
RyoheiHagimoto 0:0e0631af0305 680 */
RyoheiHagimoto 0:0e0631af0305 681 void computeClustering(NodePtr node, int* dsindices, int indices_length, int branching, int level)
RyoheiHagimoto 0:0e0631af0305 682 {
RyoheiHagimoto 0:0e0631af0305 683 node->size = indices_length;
RyoheiHagimoto 0:0e0631af0305 684 node->level = level;
RyoheiHagimoto 0:0e0631af0305 685
RyoheiHagimoto 0:0e0631af0305 686 if (indices_length < leaf_size_) { // leaf node
RyoheiHagimoto 0:0e0631af0305 687 node->indices = dsindices;
RyoheiHagimoto 0:0e0631af0305 688 std::sort(node->indices,node->indices+indices_length);
RyoheiHagimoto 0:0e0631af0305 689 node->childs = NULL;
RyoheiHagimoto 0:0e0631af0305 690 return;
RyoheiHagimoto 0:0e0631af0305 691 }
RyoheiHagimoto 0:0e0631af0305 692
RyoheiHagimoto 0:0e0631af0305 693 std::vector<int> centers(branching);
RyoheiHagimoto 0:0e0631af0305 694 std::vector<int> labels(indices_length);
RyoheiHagimoto 0:0e0631af0305 695
RyoheiHagimoto 0:0e0631af0305 696 int centers_length;
RyoheiHagimoto 0:0e0631af0305 697 (this->*chooseCenters)(branching, dsindices, indices_length, &centers[0], centers_length);
RyoheiHagimoto 0:0e0631af0305 698
RyoheiHagimoto 0:0e0631af0305 699 if (centers_length<branching) {
RyoheiHagimoto 0:0e0631af0305 700 node->indices = dsindices;
RyoheiHagimoto 0:0e0631af0305 701 std::sort(node->indices,node->indices+indices_length);
RyoheiHagimoto 0:0e0631af0305 702 node->childs = NULL;
RyoheiHagimoto 0:0e0631af0305 703 return;
RyoheiHagimoto 0:0e0631af0305 704 }
RyoheiHagimoto 0:0e0631af0305 705
RyoheiHagimoto 0:0e0631af0305 706
RyoheiHagimoto 0:0e0631af0305 707 // assign points to clusters
RyoheiHagimoto 0:0e0631af0305 708 DistanceType cost;
RyoheiHagimoto 0:0e0631af0305 709 computeLabels(dsindices, indices_length, &centers[0], centers_length, &labels[0], cost);
RyoheiHagimoto 0:0e0631af0305 710
RyoheiHagimoto 0:0e0631af0305 711 node->childs = pool.allocate<NodePtr>(branching);
RyoheiHagimoto 0:0e0631af0305 712 int start = 0;
RyoheiHagimoto 0:0e0631af0305 713 int end = start;
RyoheiHagimoto 0:0e0631af0305 714 for (int i=0; i<branching; ++i) {
RyoheiHagimoto 0:0e0631af0305 715 for (int j=0; j<indices_length; ++j) {
RyoheiHagimoto 0:0e0631af0305 716 if (labels[j]==i) {
RyoheiHagimoto 0:0e0631af0305 717 std::swap(dsindices[j],dsindices[end]);
RyoheiHagimoto 0:0e0631af0305 718 std::swap(labels[j],labels[end]);
RyoheiHagimoto 0:0e0631af0305 719 end++;
RyoheiHagimoto 0:0e0631af0305 720 }
RyoheiHagimoto 0:0e0631af0305 721 }
RyoheiHagimoto 0:0e0631af0305 722
RyoheiHagimoto 0:0e0631af0305 723 node->childs[i] = pool.allocate<Node>();
RyoheiHagimoto 0:0e0631af0305 724 node->childs[i]->pivot = centers[i];
RyoheiHagimoto 0:0e0631af0305 725 node->childs[i]->indices = NULL;
RyoheiHagimoto 0:0e0631af0305 726 computeClustering(node->childs[i],dsindices+start, end-start, branching, level+1);
RyoheiHagimoto 0:0e0631af0305 727 start=end;
RyoheiHagimoto 0:0e0631af0305 728 }
RyoheiHagimoto 0:0e0631af0305 729 }
RyoheiHagimoto 0:0e0631af0305 730
RyoheiHagimoto 0:0e0631af0305 731
RyoheiHagimoto 0:0e0631af0305 732
RyoheiHagimoto 0:0e0631af0305 733 /**
RyoheiHagimoto 0:0e0631af0305 734 * Performs one descent in the hierarchical k-means tree. The branches not
RyoheiHagimoto 0:0e0631af0305 735 * visited are stored in a priority queue.
RyoheiHagimoto 0:0e0631af0305 736 *
RyoheiHagimoto 0:0e0631af0305 737 * Params:
RyoheiHagimoto 0:0e0631af0305 738 * node = node to explore
RyoheiHagimoto 0:0e0631af0305 739 * result = container for the k-nearest neighbors found
RyoheiHagimoto 0:0e0631af0305 740 * vec = query points
RyoheiHagimoto 0:0e0631af0305 741 * checks = how many points in the dataset have been checked so far
RyoheiHagimoto 0:0e0631af0305 742 * maxChecks = maximum dataset points to checks
RyoheiHagimoto 0:0e0631af0305 743 */
RyoheiHagimoto 0:0e0631af0305 744
RyoheiHagimoto 0:0e0631af0305 745
RyoheiHagimoto 0:0e0631af0305 746 void findNN(NodePtr node, ResultSet<DistanceType>& result, const ElementType* vec, int& checks, int maxChecks,
RyoheiHagimoto 0:0e0631af0305 747 Heap<BranchSt>* heap, std::vector<bool>& checked)
RyoheiHagimoto 0:0e0631af0305 748 {
RyoheiHagimoto 0:0e0631af0305 749 if (node->childs==NULL) {
RyoheiHagimoto 0:0e0631af0305 750 if (checks>=maxChecks) {
RyoheiHagimoto 0:0e0631af0305 751 if (result.full()) return;
RyoheiHagimoto 0:0e0631af0305 752 }
RyoheiHagimoto 0:0e0631af0305 753 for (int i=0; i<node->size; ++i) {
RyoheiHagimoto 0:0e0631af0305 754 int index = node->indices[i];
RyoheiHagimoto 0:0e0631af0305 755 if (!checked[index]) {
RyoheiHagimoto 0:0e0631af0305 756 DistanceType dist = distance(dataset[index], vec, veclen_);
RyoheiHagimoto 0:0e0631af0305 757 result.addPoint(dist, index);
RyoheiHagimoto 0:0e0631af0305 758 checked[index] = true;
RyoheiHagimoto 0:0e0631af0305 759 ++checks;
RyoheiHagimoto 0:0e0631af0305 760 }
RyoheiHagimoto 0:0e0631af0305 761 }
RyoheiHagimoto 0:0e0631af0305 762 }
RyoheiHagimoto 0:0e0631af0305 763 else {
RyoheiHagimoto 0:0e0631af0305 764 DistanceType* domain_distances = new DistanceType[branching_];
RyoheiHagimoto 0:0e0631af0305 765 int best_index = 0;
RyoheiHagimoto 0:0e0631af0305 766 domain_distances[best_index] = distance(vec, dataset[node->childs[best_index]->pivot], veclen_);
RyoheiHagimoto 0:0e0631af0305 767 for (int i=1; i<branching_; ++i) {
RyoheiHagimoto 0:0e0631af0305 768 domain_distances[i] = distance(vec, dataset[node->childs[i]->pivot], veclen_);
RyoheiHagimoto 0:0e0631af0305 769 if (domain_distances[i]<domain_distances[best_index]) {
RyoheiHagimoto 0:0e0631af0305 770 best_index = i;
RyoheiHagimoto 0:0e0631af0305 771 }
RyoheiHagimoto 0:0e0631af0305 772 }
RyoheiHagimoto 0:0e0631af0305 773 for (int i=0; i<branching_; ++i) {
RyoheiHagimoto 0:0e0631af0305 774 if (i!=best_index) {
RyoheiHagimoto 0:0e0631af0305 775 heap->insert(BranchSt(node->childs[i],domain_distances[i]));
RyoheiHagimoto 0:0e0631af0305 776 }
RyoheiHagimoto 0:0e0631af0305 777 }
RyoheiHagimoto 0:0e0631af0305 778 delete[] domain_distances;
RyoheiHagimoto 0:0e0631af0305 779 findNN(node->childs[best_index],result,vec, checks, maxChecks, heap, checked);
RyoheiHagimoto 0:0e0631af0305 780 }
RyoheiHagimoto 0:0e0631af0305 781 }
RyoheiHagimoto 0:0e0631af0305 782
RyoheiHagimoto 0:0e0631af0305 783 private:
RyoheiHagimoto 0:0e0631af0305 784
RyoheiHagimoto 0:0e0631af0305 785
RyoheiHagimoto 0:0e0631af0305 786 /**
RyoheiHagimoto 0:0e0631af0305 787 * The dataset used by this index
RyoheiHagimoto 0:0e0631af0305 788 */
RyoheiHagimoto 0:0e0631af0305 789 const Matrix<ElementType> dataset;
RyoheiHagimoto 0:0e0631af0305 790
RyoheiHagimoto 0:0e0631af0305 791 /**
RyoheiHagimoto 0:0e0631af0305 792 * Parameters used by this index
RyoheiHagimoto 0:0e0631af0305 793 */
RyoheiHagimoto 0:0e0631af0305 794 IndexParams params;
RyoheiHagimoto 0:0e0631af0305 795
RyoheiHagimoto 0:0e0631af0305 796
RyoheiHagimoto 0:0e0631af0305 797 /**
RyoheiHagimoto 0:0e0631af0305 798 * Number of features in the dataset.
RyoheiHagimoto 0:0e0631af0305 799 */
RyoheiHagimoto 0:0e0631af0305 800 size_t size_;
RyoheiHagimoto 0:0e0631af0305 801
RyoheiHagimoto 0:0e0631af0305 802 /**
RyoheiHagimoto 0:0e0631af0305 803 * Length of each feature.
RyoheiHagimoto 0:0e0631af0305 804 */
RyoheiHagimoto 0:0e0631af0305 805 size_t veclen_;
RyoheiHagimoto 0:0e0631af0305 806
RyoheiHagimoto 0:0e0631af0305 807 /**
RyoheiHagimoto 0:0e0631af0305 808 * The root node in the tree.
RyoheiHagimoto 0:0e0631af0305 809 */
RyoheiHagimoto 0:0e0631af0305 810 NodePtr* root;
RyoheiHagimoto 0:0e0631af0305 811
RyoheiHagimoto 0:0e0631af0305 812 /**
RyoheiHagimoto 0:0e0631af0305 813 * Array of indices to vectors in the dataset.
RyoheiHagimoto 0:0e0631af0305 814 */
RyoheiHagimoto 0:0e0631af0305 815 int** indices;
RyoheiHagimoto 0:0e0631af0305 816
RyoheiHagimoto 0:0e0631af0305 817
RyoheiHagimoto 0:0e0631af0305 818 /**
RyoheiHagimoto 0:0e0631af0305 819 * The distance
RyoheiHagimoto 0:0e0631af0305 820 */
RyoheiHagimoto 0:0e0631af0305 821 Distance distance;
RyoheiHagimoto 0:0e0631af0305 822
RyoheiHagimoto 0:0e0631af0305 823 /**
RyoheiHagimoto 0:0e0631af0305 824 * Pooled memory allocator.
RyoheiHagimoto 0:0e0631af0305 825 *
RyoheiHagimoto 0:0e0631af0305 826 * Using a pooled memory allocator is more efficient
RyoheiHagimoto 0:0e0631af0305 827 * than allocating memory directly when there is a large
RyoheiHagimoto 0:0e0631af0305 828 * number small of memory allocations.
RyoheiHagimoto 0:0e0631af0305 829 */
RyoheiHagimoto 0:0e0631af0305 830 PooledAllocator pool;
RyoheiHagimoto 0:0e0631af0305 831
RyoheiHagimoto 0:0e0631af0305 832 /**
RyoheiHagimoto 0:0e0631af0305 833 * Memory occupied by the index.
RyoheiHagimoto 0:0e0631af0305 834 */
RyoheiHagimoto 0:0e0631af0305 835 int memoryCounter;
RyoheiHagimoto 0:0e0631af0305 836
RyoheiHagimoto 0:0e0631af0305 837 /** index parameters */
RyoheiHagimoto 0:0e0631af0305 838 int branching_;
RyoheiHagimoto 0:0e0631af0305 839 int trees_;
RyoheiHagimoto 0:0e0631af0305 840 flann_centers_init_t centers_init_;
RyoheiHagimoto 0:0e0631af0305 841 int leaf_size_;
RyoheiHagimoto 0:0e0631af0305 842
RyoheiHagimoto 0:0e0631af0305 843
RyoheiHagimoto 0:0e0631af0305 844 };
RyoheiHagimoto 0:0e0631af0305 845
RyoheiHagimoto 0:0e0631af0305 846 }
RyoheiHagimoto 0:0e0631af0305 847
RyoheiHagimoto 0:0e0631af0305 848 #endif /* OPENCV_FLANN_HIERARCHICAL_CLUSTERING_INDEX_H_ */