Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Dependents: RZ_A2M_Mbed_samples
lsh_index.h
00001 /*********************************************************************** 00002 * Software License Agreement (BSD License) 00003 * 00004 * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved. 00005 * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved. 00006 * 00007 * THE BSD LICENSE 00008 * 00009 * Redistribution and use in source and binary forms, with or without 00010 * modification, are permitted provided that the following conditions 00011 * are met: 00012 * 00013 * 1. Redistributions of source code must retain the above copyright 00014 * notice, this list of conditions and the following disclaimer. 00015 * 2. Redistributions in binary form must reproduce the above copyright 00016 * notice, this list of conditions and the following disclaimer in the 00017 * documentation and/or other materials provided with the distribution. 00018 * 00019 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 00020 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00021 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 00022 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 00023 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 00024 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00025 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00026 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 00028 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 *************************************************************************/ 00030 00031 /*********************************************************************** 00032 * Author: Vincent Rabaud 00033 *************************************************************************/ 00034 00035 #ifndef OPENCV_FLANN_LSH_INDEX_H_ 00036 #define OPENCV_FLANN_LSH_INDEX_H_ 00037 00038 #include <algorithm> 00039 #include <cassert> 00040 #include <cstring> 00041 #include <map> 00042 #include <vector> 00043 00044 #include "general.h" 00045 #include "nn_index.h" 00046 #include "matrix.h" 00047 #include "result_set.h" 00048 #include "heap.h" 00049 #include "lsh_table.h" 00050 #include "allocator.h" 00051 #include "random.h" 00052 #include "saving.h" 00053 00054 namespace cvflann 00055 { 00056 00057 struct LshIndexParams : public IndexParams 00058 { 00059 LshIndexParams(unsigned int table_number = 12, unsigned int key_size = 20, unsigned int multi_probe_level = 2) 00060 { 00061 (* this)["algorithm"] = FLANN_INDEX_LSH; 00062 // The number of hash tables to use 00063 (*this)["table_number"] = table_number; 00064 // The length of the key in the hash tables 00065 (*this)["key_size"] = key_size; 00066 // Number of levels to use in multi-probe (0 for standard LSH) 00067 (*this)["multi_probe_level"] = multi_probe_level; 00068 } 00069 }; 00070 00071 /** 00072 * Randomized kd-tree index 00073 * 00074 * Contains the k-d trees and other information for indexing a set of points 00075 * for nearest-neighbor matching. 00076 */ 00077 template<typename Distance> 00078 class LshIndex : public NNIndex<Distance> 00079 { 00080 public: 00081 typedef typename Distance::ElementType ElementType; 00082 typedef typename Distance::ResultType DistanceType; 00083 00084 /** Constructor 00085 * @param input_data dataset with the input features 00086 * @param params parameters passed to the LSH algorithm 00087 * @param d the distance used 00088 */ 00089 LshIndex(const Matrix<ElementType> & input_data, const IndexParams& params = LshIndexParams(), 00090 Distance d = Distance()) : 00091 dataset_(input_data), index_params_(params), distance_(d) 00092 { 00093 // cv::flann::IndexParams sets integer params as 'int', so it is used with get_param 00094 // in place of 'unsigned int' 00095 table_number_ = (unsigned int)get_param<int>(index_params_,"table_number",12); 00096 key_size_ = (unsigned int)get_param<int>(index_params_,"key_size",20); 00097 multi_probe_level_ = (unsigned int)get_param<int>(index_params_,"multi_probe_level",2); 00098 00099 feature_size_ = (unsigned)dataset_.cols; 00100 fill_xor_mask(0, key_size_, multi_probe_level_, xor_masks_); 00101 } 00102 00103 00104 LshIndex(const LshIndex&); 00105 LshIndex& operator=(const LshIndex&); 00106 00107 /** 00108 * Builds the index 00109 */ 00110 void buildIndex() 00111 { 00112 tables_.resize(table_number_); 00113 for (unsigned int i = 0; i < table_number_; ++i) { 00114 lsh::LshTable<ElementType>& table = tables_[i]; 00115 table = lsh::LshTable<ElementType>(feature_size_, key_size_); 00116 00117 // Add the features to the table 00118 table.add(dataset_); 00119 } 00120 } 00121 00122 flann_algorithm_t getType () const 00123 { 00124 return FLANN_INDEX_LSH; 00125 } 00126 00127 00128 void saveIndex(FILE* stream) 00129 { 00130 save_value(stream,table_number_); 00131 save_value(stream,key_size_); 00132 save_value(stream,multi_probe_level_); 00133 save_value(stream, dataset_); 00134 } 00135 00136 void loadIndex(FILE* stream) 00137 { 00138 load_value(stream, table_number_); 00139 load_value(stream, key_size_); 00140 load_value(stream, multi_probe_level_); 00141 load_value(stream, dataset_); 00142 // Building the index is so fast we can afford not storing it 00143 buildIndex(); 00144 00145 index_params_["algorithm"] = getType (); 00146 index_params_["table_number"] = table_number_; 00147 index_params_["key_size"] = key_size_; 00148 index_params_["multi_probe_level"] = multi_probe_level_; 00149 } 00150 00151 /** 00152 * Returns size of index. 00153 */ 00154 size_t size() const 00155 { 00156 return dataset_.rows; 00157 } 00158 00159 /** 00160 * Returns the length of an index feature. 00161 */ 00162 size_t veclen() const 00163 { 00164 return feature_size_; 00165 } 00166 00167 /** 00168 * Computes the index memory usage 00169 * Returns: memory used by the index 00170 */ 00171 int usedMemory() const 00172 { 00173 return (int)(dataset_.rows * sizeof(int)); 00174 } 00175 00176 00177 IndexParams getParameters () const 00178 { 00179 return index_params_; 00180 } 00181 00182 /** 00183 * \brief Perform k-nearest neighbor search 00184 * \param[in] queries The query points for which to find the nearest neighbors 00185 * \param[out] indices The indices of the nearest neighbors found 00186 * \param[out] dists Distances to the nearest neighbors found 00187 * \param[in] knn Number of nearest neighbors to return 00188 * \param[in] params Search parameters 00189 */ 00190 virtual void knnSearch(const Matrix<ElementType> & queries, Matrix<int> & indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params) 00191 { 00192 assert(queries.cols == veclen()); 00193 assert(indices.rows >= queries.rows); 00194 assert(dists.rows >= queries.rows); 00195 assert(int(indices.cols) >= knn); 00196 assert(int(dists.cols) >= knn); 00197 00198 00199 KNNUniqueResultSet<DistanceType> resultSet(knn); 00200 for (size_t i = 0; i < queries.rows; i++) { 00201 resultSet.clear(); 00202 std::fill_n(indices[i], knn, -1); 00203 std::fill_n(dists[i], knn, std::numeric_limits<DistanceType>::max()); 00204 findNeighbors(resultSet, queries[i], params); 00205 if (get_param(params,"sorted",true)) resultSet.sortAndCopy(indices[i], dists[i], knn); 00206 else resultSet.copy(indices[i], dists[i], knn); 00207 } 00208 } 00209 00210 00211 /** 00212 * Find set of nearest neighbors to vec. Their indices are stored inside 00213 * the result object. 00214 * 00215 * Params: 00216 * result = the result object in which the indices of the nearest-neighbors are stored 00217 * vec = the vector for which to search the nearest neighbors 00218 * maxCheck = the maximum number of restarts (in a best-bin-first manner) 00219 */ 00220 void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& /*searchParams*/) 00221 { 00222 getNeighbors(vec, result); 00223 } 00224 00225 private: 00226 /** Defines the comparator on score and index 00227 */ 00228 typedef std::pair<float, unsigned int> ScoreIndexPair; 00229 struct SortScoreIndexPairOnSecond 00230 { 00231 bool operator()(const ScoreIndexPair& left, const ScoreIndexPair& right) const 00232 { 00233 return left.second < right.second; 00234 } 00235 }; 00236 00237 /** Fills the different xor masks to use when getting the neighbors in multi-probe LSH 00238 * @param key the key we build neighbors from 00239 * @param lowest_index the lowest index of the bit set 00240 * @param level the multi-probe level we are at 00241 * @param xor_masks all the xor mask 00242 */ 00243 void fill_xor_mask(lsh::BucketKey key, int lowest_index, unsigned int level, 00244 std::vector<lsh::BucketKey>& xor_masks) 00245 { 00246 xor_masks.push_back(key); 00247 if (level == 0) return; 00248 for (int index = lowest_index - 1; index >= 0; --index) { 00249 // Create a new key 00250 lsh::BucketKey new_key = key | (1 << index); 00251 fill_xor_mask(new_key, index, level - 1, xor_masks); 00252 } 00253 } 00254 00255 /** Performs the approximate nearest-neighbor search. 00256 * @param vec the feature to analyze 00257 * @param do_radius flag indicating if we check the radius too 00258 * @param radius the radius if it is a radius search 00259 * @param do_k flag indicating if we limit the number of nn 00260 * @param k_nn the number of nearest neighbors 00261 * @param checked_average used for debugging 00262 */ 00263 void getNeighbors(const ElementType* vec, bool /*do_radius*/, float radius, bool do_k, unsigned int k_nn, 00264 float& /*checked_average*/) 00265 { 00266 static std::vector<ScoreIndexPair> score_index_heap; 00267 00268 if (do_k) { 00269 unsigned int worst_score = std::numeric_limits<unsigned int>::max(); 00270 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin(); 00271 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end(); 00272 for (; table != table_end; ++table) { 00273 size_t key = table->getKey(vec); 00274 std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin(); 00275 std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end(); 00276 for (; xor_mask != xor_mask_end; ++xor_mask) { 00277 size_t sub_key = key ^ (*xor_mask); 00278 const lsh::Bucket* bucket = table->getBucketFromKey(sub_key); 00279 if (bucket == 0) continue; 00280 00281 // Go over each descriptor index 00282 std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin(); 00283 std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end(); 00284 DistanceType hamming_distance; 00285 00286 // Process the rest of the candidates 00287 for (; training_index < last_training_index; ++training_index) { 00288 hamming_distance = distance_(vec, dataset_[*training_index], dataset_.cols); 00289 00290 if (hamming_distance < worst_score) { 00291 // Insert the new element 00292 score_index_heap.push_back(ScoreIndexPair(hamming_distance, training_index)); 00293 std::push_heap(score_index_heap.begin(), score_index_heap.end()); 00294 00295 if (score_index_heap.size() > (unsigned int)k_nn) { 00296 // Remove the highest distance value as we have too many elements 00297 std::pop_heap(score_index_heap.begin(), score_index_heap.end()); 00298 score_index_heap.pop_back(); 00299 // Keep track of the worst score 00300 worst_score = score_index_heap.front().first; 00301 } 00302 } 00303 } 00304 } 00305 } 00306 } 00307 else { 00308 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin(); 00309 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end(); 00310 for (; table != table_end; ++table) { 00311 size_t key = table->getKey(vec); 00312 std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin(); 00313 std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end(); 00314 for (; xor_mask != xor_mask_end; ++xor_mask) { 00315 size_t sub_key = key ^ (*xor_mask); 00316 const lsh::Bucket* bucket = table->getBucketFromKey(sub_key); 00317 if (bucket == 0) continue; 00318 00319 // Go over each descriptor index 00320 std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin(); 00321 std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end(); 00322 DistanceType hamming_distance; 00323 00324 // Process the rest of the candidates 00325 for (; training_index < last_training_index; ++training_index) { 00326 // Compute the Hamming distance 00327 hamming_distance = distance_(vec, dataset_[*training_index], dataset_.cols); 00328 if (hamming_distance < radius) score_index_heap.push_back(ScoreIndexPair(hamming_distance, training_index)); 00329 } 00330 } 00331 } 00332 } 00333 } 00334 00335 /** Performs the approximate nearest-neighbor search. 00336 * This is a slower version than the above as it uses the ResultSet 00337 * @param vec the feature to analyze 00338 */ 00339 void getNeighbors(const ElementType* vec, ResultSet<DistanceType>& result) 00340 { 00341 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin(); 00342 typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end(); 00343 for (; table != table_end; ++table) { 00344 size_t key = table->getKey(vec); 00345 std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin(); 00346 std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end(); 00347 for (; xor_mask != xor_mask_end; ++xor_mask) { 00348 size_t sub_key = key ^ (*xor_mask); 00349 const lsh::Bucket* bucket = table->getBucketFromKey((lsh::BucketKey)sub_key); 00350 if (bucket == 0) continue; 00351 00352 // Go over each descriptor index 00353 std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin(); 00354 std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end(); 00355 DistanceType hamming_distance; 00356 00357 // Process the rest of the candidates 00358 for (; training_index < last_training_index; ++training_index) { 00359 // Compute the Hamming distance 00360 hamming_distance = distance_(vec, dataset_[*training_index], (int)dataset_.cols); 00361 result.addPoint(hamming_distance, *training_index); 00362 } 00363 } 00364 } 00365 } 00366 00367 /** The different hash tables */ 00368 std::vector<lsh::LshTable<ElementType> > tables_; 00369 00370 /** The data the LSH tables where built from */ 00371 Matrix<ElementType> dataset_; 00372 00373 /** The size of the features (as ElementType[]) */ 00374 unsigned int feature_size_; 00375 00376 IndexParams index_params_; 00377 00378 /** table number */ 00379 unsigned int table_number_; 00380 /** key size */ 00381 unsigned int key_size_; 00382 /** How far should we look for neighbors in multi-probe LSH */ 00383 unsigned int multi_probe_level_; 00384 00385 /** The XOR masks to apply to a key to get the neighboring buckets */ 00386 std::vector<lsh::BucketKey> xor_masks_; 00387 00388 Distance distance_; 00389 }; 00390 } 00391 00392 #endif //OPENCV_FLANN_LSH_INDEX_H_
Generated on Tue Jul 12 2022 18:20:18 by
1.7.2