opencv on mbed

Dependencies:   mbed

Committer:
joeverbout
Date:
Thu Mar 31 21:16:38 2016 +0000
Revision:
0:ea44dc9ed014
OpenCV on mbed attempt

Who changed what in which revision?

UserRevisionLine numberNew contents of line
joeverbout 0:ea44dc9ed014 1 /*M///////////////////////////////////////////////////////////////////////////////////////
joeverbout 0:ea44dc9ed014 2 //
joeverbout 0:ea44dc9ed014 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
joeverbout 0:ea44dc9ed014 4 //
joeverbout 0:ea44dc9ed014 5 // By downloading, copying, installing or using the software you agree to this license.
joeverbout 0:ea44dc9ed014 6 // If you do not agree to this license, do not download, install,
joeverbout 0:ea44dc9ed014 7 // copy or use the software.
joeverbout 0:ea44dc9ed014 8 //
joeverbout 0:ea44dc9ed014 9 //
joeverbout 0:ea44dc9ed014 10 // License Agreement
joeverbout 0:ea44dc9ed014 11 // For Open Source Computer Vision Library
joeverbout 0:ea44dc9ed014 12 //
joeverbout 0:ea44dc9ed014 13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
joeverbout 0:ea44dc9ed014 14 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
joeverbout 0:ea44dc9ed014 15 // Copyright (C) 2014, Itseez Inc, all rights reserved.
joeverbout 0:ea44dc9ed014 16 // Third party copyrights are property of their respective owners.
joeverbout 0:ea44dc9ed014 17 //
joeverbout 0:ea44dc9ed014 18 // Redistribution and use in source and binary forms, with or without modification,
joeverbout 0:ea44dc9ed014 19 // are permitted provided that the following conditions are met:
joeverbout 0:ea44dc9ed014 20 //
joeverbout 0:ea44dc9ed014 21 // * Redistribution's of source code must retain the above copyright notice,
joeverbout 0:ea44dc9ed014 22 // this list of conditions and the following disclaimer.
joeverbout 0:ea44dc9ed014 23 //
joeverbout 0:ea44dc9ed014 24 // * Redistribution's in binary form must reproduce the above copyright notice,
joeverbout 0:ea44dc9ed014 25 // this list of conditions and the following disclaimer in the documentation
joeverbout 0:ea44dc9ed014 26 // and/or other materials provided with the distribution.
joeverbout 0:ea44dc9ed014 27 //
joeverbout 0:ea44dc9ed014 28 // * The name of the copyright holders may not be used to endorse or promote products
joeverbout 0:ea44dc9ed014 29 // derived from this software without specific prior written permission.
joeverbout 0:ea44dc9ed014 30 //
joeverbout 0:ea44dc9ed014 31 // This software is provided by the copyright holders and contributors "as is" and
joeverbout 0:ea44dc9ed014 32 // any express or implied warranties, including, but not limited to, the implied
joeverbout 0:ea44dc9ed014 33 // warranties of merchantability and fitness for a particular purpose are disclaimed.
joeverbout 0:ea44dc9ed014 34 // In no event shall the Intel Corporation or contributors be liable for any direct,
joeverbout 0:ea44dc9ed014 35 // indirect, incidental, special, exemplary, or consequential damages
joeverbout 0:ea44dc9ed014 36 // (including, but not limited to, procurement of substitute goods or services;
joeverbout 0:ea44dc9ed014 37 // loss of use, data, or profits; or business interruption) however caused
joeverbout 0:ea44dc9ed014 38 // and on any theory of liability, whether in contract, strict liability,
joeverbout 0:ea44dc9ed014 39 // or tort (including negligence or otherwise) arising in any way out of
joeverbout 0:ea44dc9ed014 40 // the use of this software, even if advised of the possibility of such damage.
joeverbout 0:ea44dc9ed014 41 //
joeverbout 0:ea44dc9ed014 42 //M*/
joeverbout 0:ea44dc9ed014 43
joeverbout 0:ea44dc9ed014 44 #ifndef __OPENCV_ML_HPP__
joeverbout 0:ea44dc9ed014 45 #define __OPENCV_ML_HPP__
joeverbout 0:ea44dc9ed014 46
joeverbout 0:ea44dc9ed014 47 #ifdef __cplusplus
joeverbout 0:ea44dc9ed014 48 # include "opencv2/core.hpp"
joeverbout 0:ea44dc9ed014 49 #endif
joeverbout 0:ea44dc9ed014 50
joeverbout 0:ea44dc9ed014 51 #ifdef __cplusplus
joeverbout 0:ea44dc9ed014 52
joeverbout 0:ea44dc9ed014 53 #include <float.h>
joeverbout 0:ea44dc9ed014 54 #include <map>
joeverbout 0:ea44dc9ed014 55 #include <iostream>
joeverbout 0:ea44dc9ed014 56
joeverbout 0:ea44dc9ed014 57 /**
joeverbout 0:ea44dc9ed014 58 @defgroup ml Machine Learning
joeverbout 0:ea44dc9ed014 59
joeverbout 0:ea44dc9ed014 60 The Machine Learning Library (MLL) is a set of classes and functions for statistical
joeverbout 0:ea44dc9ed014 61 classification, regression, and clustering of data.
joeverbout 0:ea44dc9ed014 62
joeverbout 0:ea44dc9ed014 63 Most of the classification and regression algorithms are implemented as C++ classes. As the
joeverbout 0:ea44dc9ed014 64 algorithms have different sets of features (like an ability to handle missing measurements or
joeverbout 0:ea44dc9ed014 65 categorical input variables), there is a little common ground between the classes. This common
joeverbout 0:ea44dc9ed014 66 ground is defined by the class cv::ml::StatModel that all the other ML classes are derived from.
joeverbout 0:ea44dc9ed014 67
joeverbout 0:ea44dc9ed014 68 See detailed overview here: @ref ml_intro.
joeverbout 0:ea44dc9ed014 69 */
joeverbout 0:ea44dc9ed014 70
joeverbout 0:ea44dc9ed014 71 namespace cv
joeverbout 0:ea44dc9ed014 72 {
joeverbout 0:ea44dc9ed014 73
joeverbout 0:ea44dc9ed014 74 namespace ml
joeverbout 0:ea44dc9ed014 75 {
joeverbout 0:ea44dc9ed014 76
joeverbout 0:ea44dc9ed014 77 //! @addtogroup ml
joeverbout 0:ea44dc9ed014 78 //! @{
joeverbout 0:ea44dc9ed014 79
joeverbout 0:ea44dc9ed014 80 /** @brief Variable types */
joeverbout 0:ea44dc9ed014 81 enum VariableTypes
joeverbout 0:ea44dc9ed014 82 {
joeverbout 0:ea44dc9ed014 83 VAR_NUMERICAL =0, //!< same as VAR_ORDERED
joeverbout 0:ea44dc9ed014 84 VAR_ORDERED =0, //!< ordered variables
joeverbout 0:ea44dc9ed014 85 VAR_CATEGORICAL =1 //!< categorical variables
joeverbout 0:ea44dc9ed014 86 };
joeverbout 0:ea44dc9ed014 87
joeverbout 0:ea44dc9ed014 88 /** @brief %Error types */
joeverbout 0:ea44dc9ed014 89 enum ErrorTypes
joeverbout 0:ea44dc9ed014 90 {
joeverbout 0:ea44dc9ed014 91 TEST_ERROR = 0,
joeverbout 0:ea44dc9ed014 92 TRAIN_ERROR = 1
joeverbout 0:ea44dc9ed014 93 };
joeverbout 0:ea44dc9ed014 94
joeverbout 0:ea44dc9ed014 95 /** @brief Sample types */
joeverbout 0:ea44dc9ed014 96 enum SampleTypes
joeverbout 0:ea44dc9ed014 97 {
joeverbout 0:ea44dc9ed014 98 ROW_SAMPLE = 0, //!< each training sample is a row of samples
joeverbout 0:ea44dc9ed014 99 COL_SAMPLE = 1 //!< each training sample occupies a column of samples
joeverbout 0:ea44dc9ed014 100 };
joeverbout 0:ea44dc9ed014 101
joeverbout 0:ea44dc9ed014 102 /** @brief The structure represents the logarithmic grid range of statmodel parameters.
joeverbout 0:ea44dc9ed014 103
joeverbout 0:ea44dc9ed014 104 It is used for optimizing statmodel accuracy by varying model parameters, the accuracy estimate
joeverbout 0:ea44dc9ed014 105 being computed by cross-validation.
joeverbout 0:ea44dc9ed014 106 */
joeverbout 0:ea44dc9ed014 107 class CV_EXPORTS ParamGrid
joeverbout 0:ea44dc9ed014 108 {
joeverbout 0:ea44dc9ed014 109 public:
joeverbout 0:ea44dc9ed014 110 /** @brief Default constructor */
joeverbout 0:ea44dc9ed014 111 ParamGrid();
joeverbout 0:ea44dc9ed014 112 /** @brief Constructor with parameters */
joeverbout 0:ea44dc9ed014 113 ParamGrid(double _minVal, double _maxVal, double _logStep);
joeverbout 0:ea44dc9ed014 114
joeverbout 0:ea44dc9ed014 115 double minVal; //!< Minimum value of the statmodel parameter. Default value is 0.
joeverbout 0:ea44dc9ed014 116 double maxVal; //!< Maximum value of the statmodel parameter. Default value is 0.
joeverbout 0:ea44dc9ed014 117 /** @brief Logarithmic step for iterating the statmodel parameter.
joeverbout 0:ea44dc9ed014 118
joeverbout 0:ea44dc9ed014 119 The grid determines the following iteration sequence of the statmodel parameter values:
joeverbout 0:ea44dc9ed014 120 \f[(minVal, minVal*step, minVal*{step}^2, \dots, minVal*{logStep}^n),\f]
joeverbout 0:ea44dc9ed014 121 where \f$n\f$ is the maximal index satisfying
joeverbout 0:ea44dc9ed014 122 \f[\texttt{minVal} * \texttt{logStep} ^n < \texttt{maxVal}\f]
joeverbout 0:ea44dc9ed014 123 The grid is logarithmic, so logStep must always be greater then 1. Default value is 1.
joeverbout 0:ea44dc9ed014 124 */
joeverbout 0:ea44dc9ed014 125 double logStep;
joeverbout 0:ea44dc9ed014 126 };
joeverbout 0:ea44dc9ed014 127
joeverbout 0:ea44dc9ed014 128 /** @brief Class encapsulating training data.
joeverbout 0:ea44dc9ed014 129
joeverbout 0:ea44dc9ed014 130 Please note that the class only specifies the interface of training data, but not implementation.
joeverbout 0:ea44dc9ed014 131 All the statistical model classes in _ml_ module accepts Ptr\<TrainData\> as parameter. In other
joeverbout 0:ea44dc9ed014 132 words, you can create your own class derived from TrainData and pass smart pointer to the instance
joeverbout 0:ea44dc9ed014 133 of this class into StatModel::train.
joeverbout 0:ea44dc9ed014 134
joeverbout 0:ea44dc9ed014 135 @sa @ref ml_intro_data
joeverbout 0:ea44dc9ed014 136 */
joeverbout 0:ea44dc9ed014 137 class CV_EXPORTS_W TrainData
joeverbout 0:ea44dc9ed014 138 {
joeverbout 0:ea44dc9ed014 139 public:
joeverbout 0:ea44dc9ed014 140 static inline float missingValue() { return FLT_MAX; }
joeverbout 0:ea44dc9ed014 141 virtual ~TrainData();
joeverbout 0:ea44dc9ed014 142
joeverbout 0:ea44dc9ed014 143 CV_WRAP virtual int getLayout() const = 0;
joeverbout 0:ea44dc9ed014 144 CV_WRAP virtual int getNTrainSamples() const = 0;
joeverbout 0:ea44dc9ed014 145 CV_WRAP virtual int getNTestSamples() const = 0;
joeverbout 0:ea44dc9ed014 146 CV_WRAP virtual int getNSamples() const = 0;
joeverbout 0:ea44dc9ed014 147 CV_WRAP virtual int getNVars() const = 0;
joeverbout 0:ea44dc9ed014 148 CV_WRAP virtual int getNAllVars() const = 0;
joeverbout 0:ea44dc9ed014 149
joeverbout 0:ea44dc9ed014 150 CV_WRAP virtual void getSample(InputArray varIdx, int sidx, float* buf) const = 0;
joeverbout 0:ea44dc9ed014 151 CV_WRAP virtual Mat getSamples() const = 0;
joeverbout 0:ea44dc9ed014 152 CV_WRAP virtual Mat getMissing() const = 0;
joeverbout 0:ea44dc9ed014 153
joeverbout 0:ea44dc9ed014 154 /** @brief Returns matrix of train samples
joeverbout 0:ea44dc9ed014 155
joeverbout 0:ea44dc9ed014 156 @param layout The requested layout. If it's different from the initial one, the matrix is
joeverbout 0:ea44dc9ed014 157 transposed. See ml::SampleTypes.
joeverbout 0:ea44dc9ed014 158 @param compressSamples if true, the function returns only the training samples (specified by
joeverbout 0:ea44dc9ed014 159 sampleIdx)
joeverbout 0:ea44dc9ed014 160 @param compressVars if true, the function returns the shorter training samples, containing only
joeverbout 0:ea44dc9ed014 161 the active variables.
joeverbout 0:ea44dc9ed014 162
joeverbout 0:ea44dc9ed014 163 In current implementation the function tries to avoid physical data copying and returns the
joeverbout 0:ea44dc9ed014 164 matrix stored inside TrainData (unless the transposition or compression is needed).
joeverbout 0:ea44dc9ed014 165 */
joeverbout 0:ea44dc9ed014 166 CV_WRAP virtual Mat getTrainSamples(int layout=ROW_SAMPLE,
joeverbout 0:ea44dc9ed014 167 bool compressSamples=true,
joeverbout 0:ea44dc9ed014 168 bool compressVars=true) const = 0;
joeverbout 0:ea44dc9ed014 169
joeverbout 0:ea44dc9ed014 170 /** @brief Returns the vector of responses
joeverbout 0:ea44dc9ed014 171
joeverbout 0:ea44dc9ed014 172 The function returns ordered or the original categorical responses. Usually it's used in
joeverbout 0:ea44dc9ed014 173 regression algorithms.
joeverbout 0:ea44dc9ed014 174 */
joeverbout 0:ea44dc9ed014 175 CV_WRAP virtual Mat getTrainResponses() const = 0;
joeverbout 0:ea44dc9ed014 176
joeverbout 0:ea44dc9ed014 177 /** @brief Returns the vector of normalized categorical responses
joeverbout 0:ea44dc9ed014 178
joeverbout 0:ea44dc9ed014 179 The function returns vector of responses. Each response is integer from `0` to `<number of
joeverbout 0:ea44dc9ed014 180 classes>-1`. The actual label value can be retrieved then from the class label vector, see
joeverbout 0:ea44dc9ed014 181 TrainData::getClassLabels.
joeverbout 0:ea44dc9ed014 182 */
joeverbout 0:ea44dc9ed014 183 CV_WRAP virtual Mat getTrainNormCatResponses() const = 0;
joeverbout 0:ea44dc9ed014 184 CV_WRAP virtual Mat getTestResponses() const = 0;
joeverbout 0:ea44dc9ed014 185 CV_WRAP virtual Mat getTestNormCatResponses() const = 0;
joeverbout 0:ea44dc9ed014 186 CV_WRAP virtual Mat getResponses() const = 0;
joeverbout 0:ea44dc9ed014 187 CV_WRAP virtual Mat getNormCatResponses() const = 0;
joeverbout 0:ea44dc9ed014 188 CV_WRAP virtual Mat getSampleWeights() const = 0;
joeverbout 0:ea44dc9ed014 189 CV_WRAP virtual Mat getTrainSampleWeights() const = 0;
joeverbout 0:ea44dc9ed014 190 CV_WRAP virtual Mat getTestSampleWeights() const = 0;
joeverbout 0:ea44dc9ed014 191 CV_WRAP virtual Mat getVarIdx() const = 0;
joeverbout 0:ea44dc9ed014 192 CV_WRAP virtual Mat getVarType() const = 0;
joeverbout 0:ea44dc9ed014 193 CV_WRAP virtual int getResponseType() const = 0;
joeverbout 0:ea44dc9ed014 194 CV_WRAP virtual Mat getTrainSampleIdx() const = 0;
joeverbout 0:ea44dc9ed014 195 CV_WRAP virtual Mat getTestSampleIdx() const = 0;
joeverbout 0:ea44dc9ed014 196 CV_WRAP virtual void getValues(int vi, InputArray sidx, float* values) const = 0;
joeverbout 0:ea44dc9ed014 197 virtual void getNormCatValues(int vi, InputArray sidx, int* values) const = 0;
joeverbout 0:ea44dc9ed014 198 CV_WRAP virtual Mat getDefaultSubstValues() const = 0;
joeverbout 0:ea44dc9ed014 199
joeverbout 0:ea44dc9ed014 200 CV_WRAP virtual int getCatCount(int vi) const = 0;
joeverbout 0:ea44dc9ed014 201
joeverbout 0:ea44dc9ed014 202 /** @brief Returns the vector of class labels
joeverbout 0:ea44dc9ed014 203
joeverbout 0:ea44dc9ed014 204 The function returns vector of unique labels occurred in the responses.
joeverbout 0:ea44dc9ed014 205 */
joeverbout 0:ea44dc9ed014 206 CV_WRAP virtual Mat getClassLabels() const = 0;
joeverbout 0:ea44dc9ed014 207
joeverbout 0:ea44dc9ed014 208 CV_WRAP virtual Mat getCatOfs() const = 0;
joeverbout 0:ea44dc9ed014 209 CV_WRAP virtual Mat getCatMap() const = 0;
joeverbout 0:ea44dc9ed014 210
joeverbout 0:ea44dc9ed014 211 /** @brief Splits the training data into the training and test parts
joeverbout 0:ea44dc9ed014 212 @sa TrainData::setTrainTestSplitRatio
joeverbout 0:ea44dc9ed014 213 */
joeverbout 0:ea44dc9ed014 214 CV_WRAP virtual void setTrainTestSplit(int count, bool shuffle=true) = 0;
joeverbout 0:ea44dc9ed014 215
joeverbout 0:ea44dc9ed014 216 /** @brief Splits the training data into the training and test parts
joeverbout 0:ea44dc9ed014 217
joeverbout 0:ea44dc9ed014 218 The function selects a subset of specified relative size and then returns it as the training
joeverbout 0:ea44dc9ed014 219 set. If the function is not called, all the data is used for training. Please, note that for
joeverbout 0:ea44dc9ed014 220 each of TrainData::getTrain\* there is corresponding TrainData::getTest\*, so that the test
joeverbout 0:ea44dc9ed014 221 subset can be retrieved and processed as well.
joeverbout 0:ea44dc9ed014 222 @sa TrainData::setTrainTestSplit
joeverbout 0:ea44dc9ed014 223 */
joeverbout 0:ea44dc9ed014 224 CV_WRAP virtual void setTrainTestSplitRatio(double ratio, bool shuffle=true) = 0;
joeverbout 0:ea44dc9ed014 225 CV_WRAP virtual void shuffleTrainTest() = 0;
joeverbout 0:ea44dc9ed014 226
joeverbout 0:ea44dc9ed014 227 CV_WRAP static Mat getSubVector(const Mat& vec, const Mat& idx);
joeverbout 0:ea44dc9ed014 228
joeverbout 0:ea44dc9ed014 229 /** @brief Reads the dataset from a .csv file and returns the ready-to-use training data.
joeverbout 0:ea44dc9ed014 230
joeverbout 0:ea44dc9ed014 231 @param filename The input file name
joeverbout 0:ea44dc9ed014 232 @param headerLineCount The number of lines in the beginning to skip; besides the header, the
joeverbout 0:ea44dc9ed014 233 function also skips empty lines and lines staring with `#`
joeverbout 0:ea44dc9ed014 234 @param responseStartIdx Index of the first output variable. If -1, the function considers the
joeverbout 0:ea44dc9ed014 235 last variable as the response
joeverbout 0:ea44dc9ed014 236 @param responseEndIdx Index of the last output variable + 1. If -1, then there is single
joeverbout 0:ea44dc9ed014 237 response variable at responseStartIdx.
joeverbout 0:ea44dc9ed014 238 @param varTypeSpec The optional text string that specifies the variables' types. It has the
joeverbout 0:ea44dc9ed014 239 format `ord[n1-n2,n3,n4-n5,...]cat[n6,n7-n8,...]`. That is, variables from `n1 to n2`
joeverbout 0:ea44dc9ed014 240 (inclusive range), `n3`, `n4 to n5` ... are considered ordered and `n6`, `n7 to n8` ... are
joeverbout 0:ea44dc9ed014 241 considered as categorical. The range `[n1..n2] + [n3] + [n4..n5] + ... + [n6] + [n7..n8]`
joeverbout 0:ea44dc9ed014 242 should cover all the variables. If varTypeSpec is not specified, then algorithm uses the
joeverbout 0:ea44dc9ed014 243 following rules:
joeverbout 0:ea44dc9ed014 244 - all input variables are considered ordered by default. If some column contains has non-
joeverbout 0:ea44dc9ed014 245 numerical values, e.g. 'apple', 'pear', 'apple', 'apple', 'mango', the corresponding
joeverbout 0:ea44dc9ed014 246 variable is considered categorical.
joeverbout 0:ea44dc9ed014 247 - if there are several output variables, they are all considered as ordered. Error is
joeverbout 0:ea44dc9ed014 248 reported when non-numerical values are used.
joeverbout 0:ea44dc9ed014 249 - if there is a single output variable, then if its values are non-numerical or are all
joeverbout 0:ea44dc9ed014 250 integers, then it's considered categorical. Otherwise, it's considered ordered.
joeverbout 0:ea44dc9ed014 251 @param delimiter The character used to separate values in each line.
joeverbout 0:ea44dc9ed014 252 @param missch The character used to specify missing measurements. It should not be a digit.
joeverbout 0:ea44dc9ed014 253 Although it's a non-numerical value, it surely does not affect the decision of whether the
joeverbout 0:ea44dc9ed014 254 variable ordered or categorical.
joeverbout 0:ea44dc9ed014 255 @note If the dataset only contains input variables and no responses, use responseStartIdx = -2
joeverbout 0:ea44dc9ed014 256 and responseEndIdx = 0. The output variables vector will just contain zeros.
joeverbout 0:ea44dc9ed014 257 */
joeverbout 0:ea44dc9ed014 258 static Ptr<TrainData> loadFromCSV(const String& filename,
joeverbout 0:ea44dc9ed014 259 int headerLineCount,
joeverbout 0:ea44dc9ed014 260 int responseStartIdx=-1,
joeverbout 0:ea44dc9ed014 261 int responseEndIdx=-1,
joeverbout 0:ea44dc9ed014 262 const String& varTypeSpec=String(),
joeverbout 0:ea44dc9ed014 263 char delimiter=',',
joeverbout 0:ea44dc9ed014 264 char missch='?');
joeverbout 0:ea44dc9ed014 265
joeverbout 0:ea44dc9ed014 266 /** @brief Creates training data from in-memory arrays.
joeverbout 0:ea44dc9ed014 267
joeverbout 0:ea44dc9ed014 268 @param samples matrix of samples. It should have CV_32F type.
joeverbout 0:ea44dc9ed014 269 @param layout see ml::SampleTypes.
joeverbout 0:ea44dc9ed014 270 @param responses matrix of responses. If the responses are scalar, they should be stored as a
joeverbout 0:ea44dc9ed014 271 single row or as a single column. The matrix should have type CV_32F or CV_32S (in the
joeverbout 0:ea44dc9ed014 272 former case the responses are considered as ordered by default; in the latter case - as
joeverbout 0:ea44dc9ed014 273 categorical)
joeverbout 0:ea44dc9ed014 274 @param varIdx vector specifying which variables to use for training. It can be an integer vector
joeverbout 0:ea44dc9ed014 275 (CV_32S) containing 0-based variable indices or byte vector (CV_8U) containing a mask of
joeverbout 0:ea44dc9ed014 276 active variables.
joeverbout 0:ea44dc9ed014 277 @param sampleIdx vector specifying which samples to use for training. It can be an integer
joeverbout 0:ea44dc9ed014 278 vector (CV_32S) containing 0-based sample indices or byte vector (CV_8U) containing a mask
joeverbout 0:ea44dc9ed014 279 of training samples.
joeverbout 0:ea44dc9ed014 280 @param sampleWeights optional vector with weights for each sample. It should have CV_32F type.
joeverbout 0:ea44dc9ed014 281 @param varType optional vector of type CV_8U and size `<number_of_variables_in_samples> +
joeverbout 0:ea44dc9ed014 282 <number_of_variables_in_responses>`, containing types of each input and output variable. See
joeverbout 0:ea44dc9ed014 283 ml::VariableTypes.
joeverbout 0:ea44dc9ed014 284 */
joeverbout 0:ea44dc9ed014 285 CV_WRAP static Ptr<TrainData> create(InputArray samples, int layout, InputArray responses,
joeverbout 0:ea44dc9ed014 286 InputArray varIdx=noArray(), InputArray sampleIdx=noArray(),
joeverbout 0:ea44dc9ed014 287 InputArray sampleWeights=noArray(), InputArray varType=noArray());
joeverbout 0:ea44dc9ed014 288 };
joeverbout 0:ea44dc9ed014 289
joeverbout 0:ea44dc9ed014 290 /** @brief Base class for statistical models in OpenCV ML.
joeverbout 0:ea44dc9ed014 291 */
joeverbout 0:ea44dc9ed014 292 class CV_EXPORTS_W StatModel : public Algorithm
joeverbout 0:ea44dc9ed014 293 {
joeverbout 0:ea44dc9ed014 294 public:
joeverbout 0:ea44dc9ed014 295 /** Predict options */
joeverbout 0:ea44dc9ed014 296 enum Flags {
joeverbout 0:ea44dc9ed014 297 UPDATE_MODEL = 1,
joeverbout 0:ea44dc9ed014 298 RAW_OUTPUT=1, //!< makes the method return the raw results (the sum), not the class label
joeverbout 0:ea44dc9ed014 299 COMPRESSED_INPUT=2,
joeverbout 0:ea44dc9ed014 300 PREPROCESSED_INPUT=4
joeverbout 0:ea44dc9ed014 301 };
joeverbout 0:ea44dc9ed014 302
joeverbout 0:ea44dc9ed014 303 /** @brief Returns the number of variables in training samples */
joeverbout 0:ea44dc9ed014 304 CV_WRAP virtual int getVarCount() const = 0;
joeverbout 0:ea44dc9ed014 305
joeverbout 0:ea44dc9ed014 306 CV_WRAP virtual bool empty() const;
joeverbout 0:ea44dc9ed014 307
joeverbout 0:ea44dc9ed014 308 /** @brief Returns true if the model is trained */
joeverbout 0:ea44dc9ed014 309 CV_WRAP virtual bool isTrained() const = 0;
joeverbout 0:ea44dc9ed014 310 /** @brief Returns true if the model is classifier */
joeverbout 0:ea44dc9ed014 311 CV_WRAP virtual bool isClassifier() const = 0;
joeverbout 0:ea44dc9ed014 312
joeverbout 0:ea44dc9ed014 313 /** @brief Trains the statistical model
joeverbout 0:ea44dc9ed014 314
joeverbout 0:ea44dc9ed014 315 @param trainData training data that can be loaded from file using TrainData::loadFromCSV or
joeverbout 0:ea44dc9ed014 316 created with TrainData::create.
joeverbout 0:ea44dc9ed014 317 @param flags optional flags, depending on the model. Some of the models can be updated with the
joeverbout 0:ea44dc9ed014 318 new training samples, not completely overwritten (such as NormalBayesClassifier or ANN_MLP).
joeverbout 0:ea44dc9ed014 319 */
joeverbout 0:ea44dc9ed014 320 CV_WRAP virtual bool train( const Ptr<TrainData>& trainData, int flags=0 );
joeverbout 0:ea44dc9ed014 321
joeverbout 0:ea44dc9ed014 322 /** @brief Trains the statistical model
joeverbout 0:ea44dc9ed014 323
joeverbout 0:ea44dc9ed014 324 @param samples training samples
joeverbout 0:ea44dc9ed014 325 @param layout See ml::SampleTypes.
joeverbout 0:ea44dc9ed014 326 @param responses vector of responses associated with the training samples.
joeverbout 0:ea44dc9ed014 327 */
joeverbout 0:ea44dc9ed014 328 CV_WRAP virtual bool train( InputArray samples, int layout, InputArray responses );
joeverbout 0:ea44dc9ed014 329
joeverbout 0:ea44dc9ed014 330 /** @brief Computes error on the training or test dataset
joeverbout 0:ea44dc9ed014 331
joeverbout 0:ea44dc9ed014 332 @param data the training data
joeverbout 0:ea44dc9ed014 333 @param test if true, the error is computed over the test subset of the data, otherwise it's
joeverbout 0:ea44dc9ed014 334 computed over the training subset of the data. Please note that if you loaded a completely
joeverbout 0:ea44dc9ed014 335 different dataset to evaluate already trained classifier, you will probably want not to set
joeverbout 0:ea44dc9ed014 336 the test subset at all with TrainData::setTrainTestSplitRatio and specify test=false, so
joeverbout 0:ea44dc9ed014 337 that the error is computed for the whole new set. Yes, this sounds a bit confusing.
joeverbout 0:ea44dc9ed014 338 @param resp the optional output responses.
joeverbout 0:ea44dc9ed014 339
joeverbout 0:ea44dc9ed014 340 The method uses StatModel::predict to compute the error. For regression models the error is
joeverbout 0:ea44dc9ed014 341 computed as RMS, for classifiers - as a percent of missclassified samples (0%-100%).
joeverbout 0:ea44dc9ed014 342 */
joeverbout 0:ea44dc9ed014 343 CV_WRAP virtual float calcError( const Ptr<TrainData>& data, bool test, OutputArray resp ) const;
joeverbout 0:ea44dc9ed014 344
joeverbout 0:ea44dc9ed014 345 /** @brief Predicts response(s) for the provided sample(s)
joeverbout 0:ea44dc9ed014 346
joeverbout 0:ea44dc9ed014 347 @param samples The input samples, floating-point matrix
joeverbout 0:ea44dc9ed014 348 @param results The optional output matrix of results.
joeverbout 0:ea44dc9ed014 349 @param flags The optional flags, model-dependent. See cv::ml::StatModel::Flags.
joeverbout 0:ea44dc9ed014 350 */
joeverbout 0:ea44dc9ed014 351 CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
joeverbout 0:ea44dc9ed014 352
joeverbout 0:ea44dc9ed014 353 /** @brief Create and train model with default parameters
joeverbout 0:ea44dc9ed014 354
joeverbout 0:ea44dc9ed014 355 The class must implement static `create()` method with no parameters or with all default parameter values
joeverbout 0:ea44dc9ed014 356 */
joeverbout 0:ea44dc9ed014 357 template<typename _Tp> static Ptr<_Tp> train(const Ptr<TrainData>& data, int flags=0)
joeverbout 0:ea44dc9ed014 358 {
joeverbout 0:ea44dc9ed014 359 Ptr<_Tp> model = _Tp::create();
joeverbout 0:ea44dc9ed014 360 return !model.empty() && model->train(data, flags) ? model : Ptr<_Tp>();
joeverbout 0:ea44dc9ed014 361 }
joeverbout 0:ea44dc9ed014 362 };
joeverbout 0:ea44dc9ed014 363
joeverbout 0:ea44dc9ed014 364 /****************************************************************************************\
joeverbout 0:ea44dc9ed014 365 * Normal Bayes Classifier *
joeverbout 0:ea44dc9ed014 366 \****************************************************************************************/
joeverbout 0:ea44dc9ed014 367
joeverbout 0:ea44dc9ed014 368 /** @brief Bayes classifier for normally distributed data.
joeverbout 0:ea44dc9ed014 369
joeverbout 0:ea44dc9ed014 370 @sa @ref ml_intro_bayes
joeverbout 0:ea44dc9ed014 371 */
joeverbout 0:ea44dc9ed014 372 class CV_EXPORTS_W NormalBayesClassifier : public StatModel
joeverbout 0:ea44dc9ed014 373 {
joeverbout 0:ea44dc9ed014 374 public:
joeverbout 0:ea44dc9ed014 375 /** @brief Predicts the response for sample(s).
joeverbout 0:ea44dc9ed014 376
joeverbout 0:ea44dc9ed014 377 The method estimates the most probable classes for input vectors. Input vectors (one or more)
joeverbout 0:ea44dc9ed014 378 are stored as rows of the matrix inputs. In case of multiple input vectors, there should be one
joeverbout 0:ea44dc9ed014 379 output vector outputs. The predicted class for a single input vector is returned by the method.
joeverbout 0:ea44dc9ed014 380 The vector outputProbs contains the output probabilities corresponding to each element of
joeverbout 0:ea44dc9ed014 381 result.
joeverbout 0:ea44dc9ed014 382 */
joeverbout 0:ea44dc9ed014 383 CV_WRAP virtual float predictProb( InputArray inputs, OutputArray outputs,
joeverbout 0:ea44dc9ed014 384 OutputArray outputProbs, int flags=0 ) const = 0;
joeverbout 0:ea44dc9ed014 385
joeverbout 0:ea44dc9ed014 386 /** Creates empty model
joeverbout 0:ea44dc9ed014 387 Use StatModel::train to train the model after creation. */
joeverbout 0:ea44dc9ed014 388 CV_WRAP static Ptr<NormalBayesClassifier> create();
joeverbout 0:ea44dc9ed014 389 };
joeverbout 0:ea44dc9ed014 390
joeverbout 0:ea44dc9ed014 391 /****************************************************************************************\
joeverbout 0:ea44dc9ed014 392 * K-Nearest Neighbour Classifier *
joeverbout 0:ea44dc9ed014 393 \****************************************************************************************/
joeverbout 0:ea44dc9ed014 394
joeverbout 0:ea44dc9ed014 395 /** @brief The class implements K-Nearest Neighbors model
joeverbout 0:ea44dc9ed014 396
joeverbout 0:ea44dc9ed014 397 @sa @ref ml_intro_knn
joeverbout 0:ea44dc9ed014 398 */
joeverbout 0:ea44dc9ed014 399 class CV_EXPORTS_W KNearest : public StatModel
joeverbout 0:ea44dc9ed014 400 {
joeverbout 0:ea44dc9ed014 401 public:
joeverbout 0:ea44dc9ed014 402
joeverbout 0:ea44dc9ed014 403 /** Default number of neighbors to use in predict method. */
joeverbout 0:ea44dc9ed014 404 /** @see setDefaultK */
joeverbout 0:ea44dc9ed014 405 CV_WRAP virtual int getDefaultK() const = 0;
joeverbout 0:ea44dc9ed014 406 /** @copybrief getDefaultK @see getDefaultK */
joeverbout 0:ea44dc9ed014 407 CV_WRAP virtual void setDefaultK(int val) = 0;
joeverbout 0:ea44dc9ed014 408
joeverbout 0:ea44dc9ed014 409 /** Whether classification or regression model should be trained. */
joeverbout 0:ea44dc9ed014 410 /** @see setIsClassifier */
joeverbout 0:ea44dc9ed014 411 CV_WRAP virtual bool getIsClassifier() const = 0;
joeverbout 0:ea44dc9ed014 412 /** @copybrief getIsClassifier @see getIsClassifier */
joeverbout 0:ea44dc9ed014 413 CV_WRAP virtual void setIsClassifier(bool val) = 0;
joeverbout 0:ea44dc9ed014 414
joeverbout 0:ea44dc9ed014 415 /** Parameter for KDTree implementation. */
joeverbout 0:ea44dc9ed014 416 /** @see setEmax */
joeverbout 0:ea44dc9ed014 417 CV_WRAP virtual int getEmax() const = 0;
joeverbout 0:ea44dc9ed014 418 /** @copybrief getEmax @see getEmax */
joeverbout 0:ea44dc9ed014 419 CV_WRAP virtual void setEmax(int val) = 0;
joeverbout 0:ea44dc9ed014 420
joeverbout 0:ea44dc9ed014 421 /** %Algorithm type, one of KNearest::Types. */
joeverbout 0:ea44dc9ed014 422 /** @see setAlgorithmType */
joeverbout 0:ea44dc9ed014 423 CV_WRAP virtual int getAlgorithmType() const = 0;
joeverbout 0:ea44dc9ed014 424 /** @copybrief getAlgorithmType @see getAlgorithmType */
joeverbout 0:ea44dc9ed014 425 CV_WRAP virtual void setAlgorithmType(int val) = 0;
joeverbout 0:ea44dc9ed014 426
joeverbout 0:ea44dc9ed014 427 /** @brief Finds the neighbors and predicts responses for input vectors.
joeverbout 0:ea44dc9ed014 428
joeverbout 0:ea44dc9ed014 429 @param samples Input samples stored by rows. It is a single-precision floating-point matrix of
joeverbout 0:ea44dc9ed014 430 `<number_of_samples> * k` size.
joeverbout 0:ea44dc9ed014 431 @param k Number of used nearest neighbors. Should be greater than 1.
joeverbout 0:ea44dc9ed014 432 @param results Vector with results of prediction (regression or classification) for each input
joeverbout 0:ea44dc9ed014 433 sample. It is a single-precision floating-point vector with `<number_of_samples>` elements.
joeverbout 0:ea44dc9ed014 434 @param neighborResponses Optional output values for corresponding neighbors. It is a single-
joeverbout 0:ea44dc9ed014 435 precision floating-point matrix of `<number_of_samples> * k` size.
joeverbout 0:ea44dc9ed014 436 @param dist Optional output distances from the input vectors to the corresponding neighbors. It
joeverbout 0:ea44dc9ed014 437 is a single-precision floating-point matrix of `<number_of_samples> * k` size.
joeverbout 0:ea44dc9ed014 438
joeverbout 0:ea44dc9ed014 439 For each input vector (a row of the matrix samples), the method finds the k nearest neighbors.
joeverbout 0:ea44dc9ed014 440 In case of regression, the predicted result is a mean value of the particular vector's neighbor
joeverbout 0:ea44dc9ed014 441 responses. In case of classification, the class is determined by voting.
joeverbout 0:ea44dc9ed014 442
joeverbout 0:ea44dc9ed014 443 For each input vector, the neighbors are sorted by their distances to the vector.
joeverbout 0:ea44dc9ed014 444
joeverbout 0:ea44dc9ed014 445 In case of C++ interface you can use output pointers to empty matrices and the function will
joeverbout 0:ea44dc9ed014 446 allocate memory itself.
joeverbout 0:ea44dc9ed014 447
joeverbout 0:ea44dc9ed014 448 If only a single input vector is passed, all output matrices are optional and the predicted
joeverbout 0:ea44dc9ed014 449 value is returned by the method.
joeverbout 0:ea44dc9ed014 450
joeverbout 0:ea44dc9ed014 451 The function is parallelized with the TBB library.
joeverbout 0:ea44dc9ed014 452 */
joeverbout 0:ea44dc9ed014 453 CV_WRAP virtual float findNearest( InputArray samples, int k,
joeverbout 0:ea44dc9ed014 454 OutputArray results,
joeverbout 0:ea44dc9ed014 455 OutputArray neighborResponses=noArray(),
joeverbout 0:ea44dc9ed014 456 OutputArray dist=noArray() ) const = 0;
joeverbout 0:ea44dc9ed014 457
joeverbout 0:ea44dc9ed014 458 /** @brief Implementations of KNearest algorithm
joeverbout 0:ea44dc9ed014 459 */
joeverbout 0:ea44dc9ed014 460 enum Types
joeverbout 0:ea44dc9ed014 461 {
joeverbout 0:ea44dc9ed014 462 BRUTE_FORCE=1,
joeverbout 0:ea44dc9ed014 463 KDTREE=2
joeverbout 0:ea44dc9ed014 464 };
joeverbout 0:ea44dc9ed014 465
joeverbout 0:ea44dc9ed014 466 /** @brief Creates the empty model
joeverbout 0:ea44dc9ed014 467
joeverbout 0:ea44dc9ed014 468 The static method creates empty %KNearest classifier. It should be then trained using StatModel::train method.
joeverbout 0:ea44dc9ed014 469 */
joeverbout 0:ea44dc9ed014 470 CV_WRAP static Ptr<KNearest> create();
joeverbout 0:ea44dc9ed014 471 };
joeverbout 0:ea44dc9ed014 472
joeverbout 0:ea44dc9ed014 473 /****************************************************************************************\
joeverbout 0:ea44dc9ed014 474 * Support Vector Machines *
joeverbout 0:ea44dc9ed014 475 \****************************************************************************************/
joeverbout 0:ea44dc9ed014 476
joeverbout 0:ea44dc9ed014 477 /** @brief Support Vector Machines.
joeverbout 0:ea44dc9ed014 478
joeverbout 0:ea44dc9ed014 479 @sa @ref ml_intro_svm
joeverbout 0:ea44dc9ed014 480 */
joeverbout 0:ea44dc9ed014 481 class CV_EXPORTS_W SVM : public StatModel
joeverbout 0:ea44dc9ed014 482 {
joeverbout 0:ea44dc9ed014 483 public:
joeverbout 0:ea44dc9ed014 484
joeverbout 0:ea44dc9ed014 485 class CV_EXPORTS Kernel : public Algorithm
joeverbout 0:ea44dc9ed014 486 {
joeverbout 0:ea44dc9ed014 487 public:
joeverbout 0:ea44dc9ed014 488 virtual int getType() const = 0;
joeverbout 0:ea44dc9ed014 489 virtual void calc( int vcount, int n, const float* vecs, const float* another, float* results ) = 0;
joeverbout 0:ea44dc9ed014 490 };
joeverbout 0:ea44dc9ed014 491
joeverbout 0:ea44dc9ed014 492 /** Type of a %SVM formulation.
joeverbout 0:ea44dc9ed014 493 See SVM::Types. Default value is SVM::C_SVC. */
joeverbout 0:ea44dc9ed014 494 /** @see setType */
joeverbout 0:ea44dc9ed014 495 CV_WRAP virtual int getType() const = 0;
joeverbout 0:ea44dc9ed014 496 /** @copybrief getType @see getType */
joeverbout 0:ea44dc9ed014 497 CV_WRAP virtual void setType(int val) = 0;
joeverbout 0:ea44dc9ed014 498
joeverbout 0:ea44dc9ed014 499 /** Parameter \f$\gamma\f$ of a kernel function.
joeverbout 0:ea44dc9ed014 500 For SVM::POLY, SVM::RBF, SVM::SIGMOID or SVM::CHI2. Default value is 1. */
joeverbout 0:ea44dc9ed014 501 /** @see setGamma */
joeverbout 0:ea44dc9ed014 502 CV_WRAP virtual double getGamma() const = 0;
joeverbout 0:ea44dc9ed014 503 /** @copybrief getGamma @see getGamma */
joeverbout 0:ea44dc9ed014 504 CV_WRAP virtual void setGamma(double val) = 0;
joeverbout 0:ea44dc9ed014 505
joeverbout 0:ea44dc9ed014 506 /** Parameter _coef0_ of a kernel function.
joeverbout 0:ea44dc9ed014 507 For SVM::POLY or SVM::SIGMOID. Default value is 0.*/
joeverbout 0:ea44dc9ed014 508 /** @see setCoef0 */
joeverbout 0:ea44dc9ed014 509 CV_WRAP virtual double getCoef0() const = 0;
joeverbout 0:ea44dc9ed014 510 /** @copybrief getCoef0 @see getCoef0 */
joeverbout 0:ea44dc9ed014 511 CV_WRAP virtual void setCoef0(double val) = 0;
joeverbout 0:ea44dc9ed014 512
joeverbout 0:ea44dc9ed014 513 /** Parameter _degree_ of a kernel function.
joeverbout 0:ea44dc9ed014 514 For SVM::POLY. Default value is 0. */
joeverbout 0:ea44dc9ed014 515 /** @see setDegree */
joeverbout 0:ea44dc9ed014 516 CV_WRAP virtual double getDegree() const = 0;
joeverbout 0:ea44dc9ed014 517 /** @copybrief getDegree @see getDegree */
joeverbout 0:ea44dc9ed014 518 CV_WRAP virtual void setDegree(double val) = 0;
joeverbout 0:ea44dc9ed014 519
joeverbout 0:ea44dc9ed014 520 /** Parameter _C_ of a %SVM optimization problem.
joeverbout 0:ea44dc9ed014 521 For SVM::C_SVC, SVM::EPS_SVR or SVM::NU_SVR. Default value is 0. */
joeverbout 0:ea44dc9ed014 522 /** @see setC */
joeverbout 0:ea44dc9ed014 523 CV_WRAP virtual double getC() const = 0;
joeverbout 0:ea44dc9ed014 524 /** @copybrief getC @see getC */
joeverbout 0:ea44dc9ed014 525 CV_WRAP virtual void setC(double val) = 0;
joeverbout 0:ea44dc9ed014 526
joeverbout 0:ea44dc9ed014 527 /** Parameter \f$\nu\f$ of a %SVM optimization problem.
joeverbout 0:ea44dc9ed014 528 For SVM::NU_SVC, SVM::ONE_CLASS or SVM::NU_SVR. Default value is 0. */
joeverbout 0:ea44dc9ed014 529 /** @see setNu */
joeverbout 0:ea44dc9ed014 530 CV_WRAP virtual double getNu() const = 0;
joeverbout 0:ea44dc9ed014 531 /** @copybrief getNu @see getNu */
joeverbout 0:ea44dc9ed014 532 CV_WRAP virtual void setNu(double val) = 0;
joeverbout 0:ea44dc9ed014 533
joeverbout 0:ea44dc9ed014 534 /** Parameter \f$\epsilon\f$ of a %SVM optimization problem.
joeverbout 0:ea44dc9ed014 535 For SVM::EPS_SVR. Default value is 0. */
joeverbout 0:ea44dc9ed014 536 /** @see setP */
joeverbout 0:ea44dc9ed014 537 CV_WRAP virtual double getP() const = 0;
joeverbout 0:ea44dc9ed014 538 /** @copybrief getP @see getP */
joeverbout 0:ea44dc9ed014 539 CV_WRAP virtual void setP(double val) = 0;
joeverbout 0:ea44dc9ed014 540
joeverbout 0:ea44dc9ed014 541 /** Optional weights in the SVM::C_SVC problem, assigned to particular classes.
joeverbout 0:ea44dc9ed014 542 They are multiplied by _C_ so the parameter _C_ of class _i_ becomes `classWeights(i) * C`. Thus
joeverbout 0:ea44dc9ed014 543 these weights affect the misclassification penalty for different classes. The larger weight,
joeverbout 0:ea44dc9ed014 544 the larger penalty on misclassification of data from the corresponding class. Default value is
joeverbout 0:ea44dc9ed014 545 empty Mat. */
joeverbout 0:ea44dc9ed014 546 /** @see setClassWeights */
joeverbout 0:ea44dc9ed014 547 CV_WRAP virtual cv::Mat getClassWeights() const = 0;
joeverbout 0:ea44dc9ed014 548 /** @copybrief getClassWeights @see getClassWeights */
joeverbout 0:ea44dc9ed014 549 CV_WRAP virtual void setClassWeights(const cv::Mat &val) = 0;
joeverbout 0:ea44dc9ed014 550
joeverbout 0:ea44dc9ed014 551 /** Termination criteria of the iterative %SVM training procedure which solves a partial
joeverbout 0:ea44dc9ed014 552 case of constrained quadratic optimization problem.
joeverbout 0:ea44dc9ed014 553 You can specify tolerance and/or the maximum number of iterations. Default value is
joeverbout 0:ea44dc9ed014 554 `TermCriteria( TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, FLT_EPSILON )`; */
joeverbout 0:ea44dc9ed014 555 /** @see setTermCriteria */
joeverbout 0:ea44dc9ed014 556 CV_WRAP virtual cv::TermCriteria getTermCriteria() const = 0;
joeverbout 0:ea44dc9ed014 557 /** @copybrief getTermCriteria @see getTermCriteria */
joeverbout 0:ea44dc9ed014 558 CV_WRAP virtual void setTermCriteria(const cv::TermCriteria &val) = 0;
joeverbout 0:ea44dc9ed014 559
joeverbout 0:ea44dc9ed014 560 /** Type of a %SVM kernel.
joeverbout 0:ea44dc9ed014 561 See SVM::KernelTypes. Default value is SVM::RBF. */
joeverbout 0:ea44dc9ed014 562 CV_WRAP virtual int getKernelType() const = 0;
joeverbout 0:ea44dc9ed014 563
joeverbout 0:ea44dc9ed014 564 /** Initialize with one of predefined kernels.
joeverbout 0:ea44dc9ed014 565 See SVM::KernelTypes. */
joeverbout 0:ea44dc9ed014 566 CV_WRAP virtual void setKernel(int kernelType) = 0;
joeverbout 0:ea44dc9ed014 567
joeverbout 0:ea44dc9ed014 568 /** Initialize with custom kernel.
joeverbout 0:ea44dc9ed014 569 See SVM::Kernel class for implementation details */
joeverbout 0:ea44dc9ed014 570 virtual void setCustomKernel(const Ptr<Kernel> &_kernel) = 0;
joeverbout 0:ea44dc9ed014 571
joeverbout 0:ea44dc9ed014 572 //! %SVM type
joeverbout 0:ea44dc9ed014 573 enum Types {
joeverbout 0:ea44dc9ed014 574 /** C-Support Vector Classification. n-class classification (n \f$\geq\f$ 2), allows
joeverbout 0:ea44dc9ed014 575 imperfect separation of classes with penalty multiplier C for outliers. */
joeverbout 0:ea44dc9ed014 576 C_SVC=100,
joeverbout 0:ea44dc9ed014 577 /** \f$\nu\f$-Support Vector Classification. n-class classification with possible
joeverbout 0:ea44dc9ed014 578 imperfect separation. Parameter \f$\nu\f$ (in the range 0..1, the larger the value, the smoother
joeverbout 0:ea44dc9ed014 579 the decision boundary) is used instead of C. */
joeverbout 0:ea44dc9ed014 580 NU_SVC=101,
joeverbout 0:ea44dc9ed014 581 /** Distribution Estimation (One-class %SVM). All the training data are from
joeverbout 0:ea44dc9ed014 582 the same class, %SVM builds a boundary that separates the class from the rest of the feature
joeverbout 0:ea44dc9ed014 583 space. */
joeverbout 0:ea44dc9ed014 584 ONE_CLASS=102,
joeverbout 0:ea44dc9ed014 585 /** \f$\epsilon\f$-Support Vector Regression. The distance between feature vectors
joeverbout 0:ea44dc9ed014 586 from the training set and the fitting hyper-plane must be less than p. For outliers the
joeverbout 0:ea44dc9ed014 587 penalty multiplier C is used. */
joeverbout 0:ea44dc9ed014 588 EPS_SVR=103,
joeverbout 0:ea44dc9ed014 589 /** \f$\nu\f$-Support Vector Regression. \f$\nu\f$ is used instead of p.
joeverbout 0:ea44dc9ed014 590 See @cite LibSVM for details. */
joeverbout 0:ea44dc9ed014 591 NU_SVR=104
joeverbout 0:ea44dc9ed014 592 };
joeverbout 0:ea44dc9ed014 593
joeverbout 0:ea44dc9ed014 594 /** @brief %SVM kernel type
joeverbout 0:ea44dc9ed014 595
joeverbout 0:ea44dc9ed014 596 A comparison of different kernels on the following 2D test case with four classes. Four
joeverbout 0:ea44dc9ed014 597 SVM::C_SVC SVMs have been trained (one against rest) with auto_train. Evaluation on three
joeverbout 0:ea44dc9ed014 598 different kernels (SVM::CHI2, SVM::INTER, SVM::RBF). The color depicts the class with max score.
joeverbout 0:ea44dc9ed014 599 Bright means max-score \> 0, dark means max-score \< 0.
joeverbout 0:ea44dc9ed014 600 ![image](pics/SVM_Comparison.png)
joeverbout 0:ea44dc9ed014 601 */
joeverbout 0:ea44dc9ed014 602 enum KernelTypes {
joeverbout 0:ea44dc9ed014 603 /** Returned by SVM::getKernelType in case when custom kernel has been set */
joeverbout 0:ea44dc9ed014 604 CUSTOM=-1,
joeverbout 0:ea44dc9ed014 605 /** Linear kernel. No mapping is done, linear discrimination (or regression) is
joeverbout 0:ea44dc9ed014 606 done in the original feature space. It is the fastest option. \f$K(x_i, x_j) = x_i^T x_j\f$. */
joeverbout 0:ea44dc9ed014 607 LINEAR=0,
joeverbout 0:ea44dc9ed014 608 /** Polynomial kernel:
joeverbout 0:ea44dc9ed014 609 \f$K(x_i, x_j) = (\gamma x_i^T x_j + coef0)^{degree}, \gamma > 0\f$. */
joeverbout 0:ea44dc9ed014 610 POLY=1,
joeverbout 0:ea44dc9ed014 611 /** Radial basis function (RBF), a good choice in most cases.
joeverbout 0:ea44dc9ed014 612 \f$K(x_i, x_j) = e^{-\gamma ||x_i - x_j||^2}, \gamma > 0\f$. */
joeverbout 0:ea44dc9ed014 613 RBF=2,
joeverbout 0:ea44dc9ed014 614 /** Sigmoid kernel: \f$K(x_i, x_j) = \tanh(\gamma x_i^T x_j + coef0)\f$. */
joeverbout 0:ea44dc9ed014 615 SIGMOID=3,
joeverbout 0:ea44dc9ed014 616 /** Exponential Chi2 kernel, similar to the RBF kernel:
joeverbout 0:ea44dc9ed014 617 \f$K(x_i, x_j) = e^{-\gamma \chi^2(x_i,x_j)}, \chi^2(x_i,x_j) = (x_i-x_j)^2/(x_i+x_j), \gamma > 0\f$. */
joeverbout 0:ea44dc9ed014 618 CHI2=4,
joeverbout 0:ea44dc9ed014 619 /** Histogram intersection kernel. A fast kernel. \f$K(x_i, x_j) = min(x_i,x_j)\f$. */
joeverbout 0:ea44dc9ed014 620 INTER=5
joeverbout 0:ea44dc9ed014 621 };
joeverbout 0:ea44dc9ed014 622
joeverbout 0:ea44dc9ed014 623 //! %SVM params type
joeverbout 0:ea44dc9ed014 624 enum ParamTypes {
joeverbout 0:ea44dc9ed014 625 C=0,
joeverbout 0:ea44dc9ed014 626 GAMMA=1,
joeverbout 0:ea44dc9ed014 627 P=2,
joeverbout 0:ea44dc9ed014 628 NU=3,
joeverbout 0:ea44dc9ed014 629 COEF=4,
joeverbout 0:ea44dc9ed014 630 DEGREE=5
joeverbout 0:ea44dc9ed014 631 };
joeverbout 0:ea44dc9ed014 632
joeverbout 0:ea44dc9ed014 633 /** @brief Trains an %SVM with optimal parameters.
joeverbout 0:ea44dc9ed014 634
joeverbout 0:ea44dc9ed014 635 @param data the training data that can be constructed using TrainData::create or
joeverbout 0:ea44dc9ed014 636 TrainData::loadFromCSV.
joeverbout 0:ea44dc9ed014 637 @param kFold Cross-validation parameter. The training set is divided into kFold subsets. One
joeverbout 0:ea44dc9ed014 638 subset is used to test the model, the others form the train set. So, the %SVM algorithm is
joeverbout 0:ea44dc9ed014 639 executed kFold times.
joeverbout 0:ea44dc9ed014 640 @param Cgrid grid for C
joeverbout 0:ea44dc9ed014 641 @param gammaGrid grid for gamma
joeverbout 0:ea44dc9ed014 642 @param pGrid grid for p
joeverbout 0:ea44dc9ed014 643 @param nuGrid grid for nu
joeverbout 0:ea44dc9ed014 644 @param coeffGrid grid for coeff
joeverbout 0:ea44dc9ed014 645 @param degreeGrid grid for degree
joeverbout 0:ea44dc9ed014 646 @param balanced If true and the problem is 2-class classification then the method creates more
joeverbout 0:ea44dc9ed014 647 balanced cross-validation subsets that is proportions between classes in subsets are close
joeverbout 0:ea44dc9ed014 648 to such proportion in the whole train dataset.
joeverbout 0:ea44dc9ed014 649
joeverbout 0:ea44dc9ed014 650 The method trains the %SVM model automatically by choosing the optimal parameters C, gamma, p,
joeverbout 0:ea44dc9ed014 651 nu, coef0, degree. Parameters are considered optimal when the cross-validation
joeverbout 0:ea44dc9ed014 652 estimate of the test set error is minimal.
joeverbout 0:ea44dc9ed014 653
joeverbout 0:ea44dc9ed014 654 If there is no need to optimize a parameter, the corresponding grid step should be set to any
joeverbout 0:ea44dc9ed014 655 value less than or equal to 1. For example, to avoid optimization in gamma, set `gammaGrid.step
joeverbout 0:ea44dc9ed014 656 = 0`, `gammaGrid.minVal`, `gamma_grid.maxVal` as arbitrary numbers. In this case, the value
joeverbout 0:ea44dc9ed014 657 `Gamma` is taken for gamma.
joeverbout 0:ea44dc9ed014 658
joeverbout 0:ea44dc9ed014 659 And, finally, if the optimization in a parameter is required but the corresponding grid is
joeverbout 0:ea44dc9ed014 660 unknown, you may call the function SVM::getDefaultGrid. To generate a grid, for example, for
joeverbout 0:ea44dc9ed014 661 gamma, call `SVM::getDefaultGrid(SVM::GAMMA)`.
joeverbout 0:ea44dc9ed014 662
joeverbout 0:ea44dc9ed014 663 This function works for the classification (SVM::C_SVC or SVM::NU_SVC) as well as for the
joeverbout 0:ea44dc9ed014 664 regression (SVM::EPS_SVR or SVM::NU_SVR). If it is SVM::ONE_CLASS, no optimization is made and
joeverbout 0:ea44dc9ed014 665 the usual %SVM with parameters specified in params is executed.
joeverbout 0:ea44dc9ed014 666 */
joeverbout 0:ea44dc9ed014 667 virtual bool trainAuto( const Ptr<TrainData>& data, int kFold = 10,
joeverbout 0:ea44dc9ed014 668 ParamGrid Cgrid = SVM::getDefaultGrid(SVM::C),
joeverbout 0:ea44dc9ed014 669 ParamGrid gammaGrid = SVM::getDefaultGrid(SVM::GAMMA),
joeverbout 0:ea44dc9ed014 670 ParamGrid pGrid = SVM::getDefaultGrid(SVM::P),
joeverbout 0:ea44dc9ed014 671 ParamGrid nuGrid = SVM::getDefaultGrid(SVM::NU),
joeverbout 0:ea44dc9ed014 672 ParamGrid coeffGrid = SVM::getDefaultGrid(SVM::COEF),
joeverbout 0:ea44dc9ed014 673 ParamGrid degreeGrid = SVM::getDefaultGrid(SVM::DEGREE),
joeverbout 0:ea44dc9ed014 674 bool balanced=false) = 0;
joeverbout 0:ea44dc9ed014 675
joeverbout 0:ea44dc9ed014 676 /** @brief Retrieves all the support vectors
joeverbout 0:ea44dc9ed014 677
joeverbout 0:ea44dc9ed014 678 The method returns all the support vectors as a floating-point matrix, where support vectors are
joeverbout 0:ea44dc9ed014 679 stored as matrix rows.
joeverbout 0:ea44dc9ed014 680 */
joeverbout 0:ea44dc9ed014 681 CV_WRAP virtual Mat getSupportVectors() const = 0;
joeverbout 0:ea44dc9ed014 682
joeverbout 0:ea44dc9ed014 683 /** @brief Retrieves all the uncompressed support vectors of a linear %SVM
joeverbout 0:ea44dc9ed014 684
joeverbout 0:ea44dc9ed014 685 The method returns all the uncompressed support vectors of a linear %SVM that the compressed
joeverbout 0:ea44dc9ed014 686 support vector, used for prediction, was derived from. They are returned in a floating-point
joeverbout 0:ea44dc9ed014 687 matrix, where the support vectors are stored as matrix rows.
joeverbout 0:ea44dc9ed014 688 */
joeverbout 0:ea44dc9ed014 689 CV_WRAP Mat getUncompressedSupportVectors() const;
joeverbout 0:ea44dc9ed014 690
joeverbout 0:ea44dc9ed014 691 /** @brief Retrieves the decision function
joeverbout 0:ea44dc9ed014 692
joeverbout 0:ea44dc9ed014 693 @param i the index of the decision function. If the problem solved is regression, 1-class or
joeverbout 0:ea44dc9ed014 694 2-class classification, then there will be just one decision function and the index should
joeverbout 0:ea44dc9ed014 695 always be 0. Otherwise, in the case of N-class classification, there will be \f$N(N-1)/2\f$
joeverbout 0:ea44dc9ed014 696 decision functions.
joeverbout 0:ea44dc9ed014 697 @param alpha the optional output vector for weights, corresponding to different support vectors.
joeverbout 0:ea44dc9ed014 698 In the case of linear %SVM all the alpha's will be 1's.
joeverbout 0:ea44dc9ed014 699 @param svidx the optional output vector of indices of support vectors within the matrix of
joeverbout 0:ea44dc9ed014 700 support vectors (which can be retrieved by SVM::getSupportVectors). In the case of linear
joeverbout 0:ea44dc9ed014 701 %SVM each decision function consists of a single "compressed" support vector.
joeverbout 0:ea44dc9ed014 702
joeverbout 0:ea44dc9ed014 703 The method returns rho parameter of the decision function, a scalar subtracted from the weighted
joeverbout 0:ea44dc9ed014 704 sum of kernel responses.
joeverbout 0:ea44dc9ed014 705 */
joeverbout 0:ea44dc9ed014 706 CV_WRAP virtual double getDecisionFunction(int i, OutputArray alpha, OutputArray svidx) const = 0;
joeverbout 0:ea44dc9ed014 707
joeverbout 0:ea44dc9ed014 708 /** @brief Generates a grid for %SVM parameters.
joeverbout 0:ea44dc9ed014 709
joeverbout 0:ea44dc9ed014 710 @param param_id %SVM parameters IDs that must be one of the SVM::ParamTypes. The grid is
joeverbout 0:ea44dc9ed014 711 generated for the parameter with this ID.
joeverbout 0:ea44dc9ed014 712
joeverbout 0:ea44dc9ed014 713 The function generates a grid for the specified parameter of the %SVM algorithm. The grid may be
joeverbout 0:ea44dc9ed014 714 passed to the function SVM::trainAuto.
joeverbout 0:ea44dc9ed014 715 */
joeverbout 0:ea44dc9ed014 716 static ParamGrid getDefaultGrid( int param_id );
joeverbout 0:ea44dc9ed014 717
joeverbout 0:ea44dc9ed014 718 /** Creates empty model.
joeverbout 0:ea44dc9ed014 719 Use StatModel::train to train the model. Since %SVM has several parameters, you may want to
joeverbout 0:ea44dc9ed014 720 find the best parameters for your problem, it can be done with SVM::trainAuto. */
joeverbout 0:ea44dc9ed014 721 CV_WRAP static Ptr<SVM> create();
joeverbout 0:ea44dc9ed014 722 };
joeverbout 0:ea44dc9ed014 723
joeverbout 0:ea44dc9ed014 724 /****************************************************************************************\
joeverbout 0:ea44dc9ed014 725 * Expectation - Maximization *
joeverbout 0:ea44dc9ed014 726 \****************************************************************************************/
joeverbout 0:ea44dc9ed014 727
joeverbout 0:ea44dc9ed014 728 /** @brief The class implements the Expectation Maximization algorithm.
joeverbout 0:ea44dc9ed014 729
joeverbout 0:ea44dc9ed014 730 @sa @ref ml_intro_em
joeverbout 0:ea44dc9ed014 731 */
joeverbout 0:ea44dc9ed014 732 class CV_EXPORTS_W EM : public StatModel
joeverbout 0:ea44dc9ed014 733 {
joeverbout 0:ea44dc9ed014 734 public:
joeverbout 0:ea44dc9ed014 735 //! Type of covariation matrices
joeverbout 0:ea44dc9ed014 736 enum Types {
joeverbout 0:ea44dc9ed014 737 /** A scaled identity matrix \f$\mu_k * I\f$. There is the only
joeverbout 0:ea44dc9ed014 738 parameter \f$\mu_k\f$ to be estimated for each matrix. The option may be used in special cases,
joeverbout 0:ea44dc9ed014 739 when the constraint is relevant, or as a first step in the optimization (for example in case
joeverbout 0:ea44dc9ed014 740 when the data is preprocessed with PCA). The results of such preliminary estimation may be
joeverbout 0:ea44dc9ed014 741 passed again to the optimization procedure, this time with
joeverbout 0:ea44dc9ed014 742 covMatType=EM::COV_MAT_DIAGONAL. */
joeverbout 0:ea44dc9ed014 743 COV_MAT_SPHERICAL=0,
joeverbout 0:ea44dc9ed014 744 /** A diagonal matrix with positive diagonal elements. The number of
joeverbout 0:ea44dc9ed014 745 free parameters is d for each matrix. This is most commonly used option yielding good
joeverbout 0:ea44dc9ed014 746 estimation results. */
joeverbout 0:ea44dc9ed014 747 COV_MAT_DIAGONAL=1,
joeverbout 0:ea44dc9ed014 748 /** A symmetric positively defined matrix. The number of free
joeverbout 0:ea44dc9ed014 749 parameters in each matrix is about \f$d^2/2\f$. It is not recommended to use this option, unless
joeverbout 0:ea44dc9ed014 750 there is pretty accurate initial estimation of the parameters and/or a huge number of
joeverbout 0:ea44dc9ed014 751 training samples. */
joeverbout 0:ea44dc9ed014 752 COV_MAT_GENERIC=2,
joeverbout 0:ea44dc9ed014 753 COV_MAT_DEFAULT=COV_MAT_DIAGONAL
joeverbout 0:ea44dc9ed014 754 };
joeverbout 0:ea44dc9ed014 755
joeverbout 0:ea44dc9ed014 756 //! Default parameters
joeverbout 0:ea44dc9ed014 757 enum {DEFAULT_NCLUSTERS=5, DEFAULT_MAX_ITERS=100};
joeverbout 0:ea44dc9ed014 758
joeverbout 0:ea44dc9ed014 759 //! The initial step
joeverbout 0:ea44dc9ed014 760 enum {START_E_STEP=1, START_M_STEP=2, START_AUTO_STEP=0};
joeverbout 0:ea44dc9ed014 761
joeverbout 0:ea44dc9ed014 762 /** The number of mixture components in the Gaussian mixture model.
joeverbout 0:ea44dc9ed014 763 Default value of the parameter is EM::DEFAULT_NCLUSTERS=5. Some of %EM implementation could
joeverbout 0:ea44dc9ed014 764 determine the optimal number of mixtures within a specified value range, but that is not the
joeverbout 0:ea44dc9ed014 765 case in ML yet. */
joeverbout 0:ea44dc9ed014 766 /** @see setClustersNumber */
joeverbout 0:ea44dc9ed014 767 CV_WRAP virtual int getClustersNumber() const = 0;
joeverbout 0:ea44dc9ed014 768 /** @copybrief getClustersNumber @see getClustersNumber */
joeverbout 0:ea44dc9ed014 769 CV_WRAP virtual void setClustersNumber(int val) = 0;
joeverbout 0:ea44dc9ed014 770
joeverbout 0:ea44dc9ed014 771 /** Constraint on covariance matrices which defines type of matrices.
joeverbout 0:ea44dc9ed014 772 See EM::Types. */
joeverbout 0:ea44dc9ed014 773 /** @see setCovarianceMatrixType */
joeverbout 0:ea44dc9ed014 774 CV_WRAP virtual int getCovarianceMatrixType() const = 0;
joeverbout 0:ea44dc9ed014 775 /** @copybrief getCovarianceMatrixType @see getCovarianceMatrixType */
joeverbout 0:ea44dc9ed014 776 CV_WRAP virtual void setCovarianceMatrixType(int val) = 0;
joeverbout 0:ea44dc9ed014 777
joeverbout 0:ea44dc9ed014 778 /** The termination criteria of the %EM algorithm.
joeverbout 0:ea44dc9ed014 779 The %EM algorithm can be terminated by the number of iterations termCrit.maxCount (number of
joeverbout 0:ea44dc9ed014 780 M-steps) or when relative change of likelihood logarithm is less than termCrit.epsilon. Default
joeverbout 0:ea44dc9ed014 781 maximum number of iterations is EM::DEFAULT_MAX_ITERS=100. */
joeverbout 0:ea44dc9ed014 782 /** @see setTermCriteria */
joeverbout 0:ea44dc9ed014 783 CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
joeverbout 0:ea44dc9ed014 784 /** @copybrief getTermCriteria @see getTermCriteria */
joeverbout 0:ea44dc9ed014 785 CV_WRAP virtual void setTermCriteria(const TermCriteria &val) = 0;
joeverbout 0:ea44dc9ed014 786
joeverbout 0:ea44dc9ed014 787 /** @brief Returns weights of the mixtures
joeverbout 0:ea44dc9ed014 788
joeverbout 0:ea44dc9ed014 789 Returns vector with the number of elements equal to the number of mixtures.
joeverbout 0:ea44dc9ed014 790 */
joeverbout 0:ea44dc9ed014 791 CV_WRAP virtual Mat getWeights() const = 0;
joeverbout 0:ea44dc9ed014 792 /** @brief Returns the cluster centers (means of the Gaussian mixture)
joeverbout 0:ea44dc9ed014 793
joeverbout 0:ea44dc9ed014 794 Returns matrix with the number of rows equal to the number of mixtures and number of columns
joeverbout 0:ea44dc9ed014 795 equal to the space dimensionality.
joeverbout 0:ea44dc9ed014 796 */
joeverbout 0:ea44dc9ed014 797 CV_WRAP virtual Mat getMeans() const = 0;
joeverbout 0:ea44dc9ed014 798 /** @brief Returns covariation matrices
joeverbout 0:ea44dc9ed014 799
joeverbout 0:ea44dc9ed014 800 Returns vector of covariation matrices. Number of matrices is the number of gaussian mixtures,
joeverbout 0:ea44dc9ed014 801 each matrix is a square floating-point matrix NxN, where N is the space dimensionality.
joeverbout 0:ea44dc9ed014 802 */
joeverbout 0:ea44dc9ed014 803 CV_WRAP virtual void getCovs(CV_OUT std::vector<Mat>& covs) const = 0;
joeverbout 0:ea44dc9ed014 804
joeverbout 0:ea44dc9ed014 805 /** @brief Returns a likelihood logarithm value and an index of the most probable mixture component
joeverbout 0:ea44dc9ed014 806 for the given sample.
joeverbout 0:ea44dc9ed014 807
joeverbout 0:ea44dc9ed014 808 @param sample A sample for classification. It should be a one-channel matrix of
joeverbout 0:ea44dc9ed014 809 \f$1 \times dims\f$ or \f$dims \times 1\f$ size.
joeverbout 0:ea44dc9ed014 810 @param probs Optional output matrix that contains posterior probabilities of each component
joeverbout 0:ea44dc9ed014 811 given the sample. It has \f$1 \times nclusters\f$ size and CV_64FC1 type.
joeverbout 0:ea44dc9ed014 812
joeverbout 0:ea44dc9ed014 813 The method returns a two-element double vector. Zero element is a likelihood logarithm value for
joeverbout 0:ea44dc9ed014 814 the sample. First element is an index of the most probable mixture component for the given
joeverbout 0:ea44dc9ed014 815 sample.
joeverbout 0:ea44dc9ed014 816 */
joeverbout 0:ea44dc9ed014 817 CV_WRAP virtual Vec2d predict2(InputArray sample, OutputArray probs) const = 0;
joeverbout 0:ea44dc9ed014 818
joeverbout 0:ea44dc9ed014 819 /** @brief Estimate the Gaussian mixture parameters from a samples set.
joeverbout 0:ea44dc9ed014 820
joeverbout 0:ea44dc9ed014 821 This variation starts with Expectation step. Initial values of the model parameters will be
joeverbout 0:ea44dc9ed014 822 estimated by the k-means algorithm.
joeverbout 0:ea44dc9ed014 823
joeverbout 0:ea44dc9ed014 824 Unlike many of the ML models, %EM is an unsupervised learning algorithm and it does not take
joeverbout 0:ea44dc9ed014 825 responses (class labels or function values) as input. Instead, it computes the *Maximum
joeverbout 0:ea44dc9ed014 826 Likelihood Estimate* of the Gaussian mixture parameters from an input sample set, stores all the
joeverbout 0:ea44dc9ed014 827 parameters inside the structure: \f$p_{i,k}\f$ in probs, \f$a_k\f$ in means , \f$S_k\f$ in
joeverbout 0:ea44dc9ed014 828 covs[k], \f$\pi_k\f$ in weights , and optionally computes the output "class label" for each
joeverbout 0:ea44dc9ed014 829 sample: \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most
joeverbout 0:ea44dc9ed014 830 probable mixture component for each sample).
joeverbout 0:ea44dc9ed014 831
joeverbout 0:ea44dc9ed014 832 The trained model can be used further for prediction, just like any other classifier. The
joeverbout 0:ea44dc9ed014 833 trained model is similar to the NormalBayesClassifier.
joeverbout 0:ea44dc9ed014 834
joeverbout 0:ea44dc9ed014 835 @param samples Samples from which the Gaussian mixture model will be estimated. It should be a
joeverbout 0:ea44dc9ed014 836 one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type
joeverbout 0:ea44dc9ed014 837 it will be converted to the inner matrix of such type for the further computing.
joeverbout 0:ea44dc9ed014 838 @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for
joeverbout 0:ea44dc9ed014 839 each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type.
joeverbout 0:ea44dc9ed014 840 @param labels The optional output "class label" for each sample:
joeverbout 0:ea44dc9ed014 841 \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable
joeverbout 0:ea44dc9ed014 842 mixture component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type.
joeverbout 0:ea44dc9ed014 843 @param probs The optional output matrix that contains posterior probabilities of each Gaussian
joeverbout 0:ea44dc9ed014 844 mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
joeverbout 0:ea44dc9ed014 845 CV_64FC1 type.
joeverbout 0:ea44dc9ed014 846 */
joeverbout 0:ea44dc9ed014 847 CV_WRAP virtual bool trainEM(InputArray samples,
joeverbout 0:ea44dc9ed014 848 OutputArray logLikelihoods=noArray(),
joeverbout 0:ea44dc9ed014 849 OutputArray labels=noArray(),
joeverbout 0:ea44dc9ed014 850 OutputArray probs=noArray()) = 0;
joeverbout 0:ea44dc9ed014 851
joeverbout 0:ea44dc9ed014 852 /** @brief Estimate the Gaussian mixture parameters from a samples set.
joeverbout 0:ea44dc9ed014 853
joeverbout 0:ea44dc9ed014 854 This variation starts with Expectation step. You need to provide initial means \f$a_k\f$ of
joeverbout 0:ea44dc9ed014 855 mixture components. Optionally you can pass initial weights \f$\pi_k\f$ and covariance matrices
joeverbout 0:ea44dc9ed014 856 \f$S_k\f$ of mixture components.
joeverbout 0:ea44dc9ed014 857
joeverbout 0:ea44dc9ed014 858 @param samples Samples from which the Gaussian mixture model will be estimated. It should be a
joeverbout 0:ea44dc9ed014 859 one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type
joeverbout 0:ea44dc9ed014 860 it will be converted to the inner matrix of such type for the further computing.
joeverbout 0:ea44dc9ed014 861 @param means0 Initial means \f$a_k\f$ of mixture components. It is a one-channel matrix of
joeverbout 0:ea44dc9ed014 862 \f$nclusters \times dims\f$ size. If the matrix does not have CV_64F type it will be
joeverbout 0:ea44dc9ed014 863 converted to the inner matrix of such type for the further computing.
joeverbout 0:ea44dc9ed014 864 @param covs0 The vector of initial covariance matrices \f$S_k\f$ of mixture components. Each of
joeverbout 0:ea44dc9ed014 865 covariance matrices is a one-channel matrix of \f$dims \times dims\f$ size. If the matrices
joeverbout 0:ea44dc9ed014 866 do not have CV_64F type they will be converted to the inner matrices of such type for the
joeverbout 0:ea44dc9ed014 867 further computing.
joeverbout 0:ea44dc9ed014 868 @param weights0 Initial weights \f$\pi_k\f$ of mixture components. It should be a one-channel
joeverbout 0:ea44dc9ed014 869 floating-point matrix with \f$1 \times nclusters\f$ or \f$nclusters \times 1\f$ size.
joeverbout 0:ea44dc9ed014 870 @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for
joeverbout 0:ea44dc9ed014 871 each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type.
joeverbout 0:ea44dc9ed014 872 @param labels The optional output "class label" for each sample:
joeverbout 0:ea44dc9ed014 873 \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable
joeverbout 0:ea44dc9ed014 874 mixture component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type.
joeverbout 0:ea44dc9ed014 875 @param probs The optional output matrix that contains posterior probabilities of each Gaussian
joeverbout 0:ea44dc9ed014 876 mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
joeverbout 0:ea44dc9ed014 877 CV_64FC1 type.
joeverbout 0:ea44dc9ed014 878 */
joeverbout 0:ea44dc9ed014 879 CV_WRAP virtual bool trainE(InputArray samples, InputArray means0,
joeverbout 0:ea44dc9ed014 880 InputArray covs0=noArray(),
joeverbout 0:ea44dc9ed014 881 InputArray weights0=noArray(),
joeverbout 0:ea44dc9ed014 882 OutputArray logLikelihoods=noArray(),
joeverbout 0:ea44dc9ed014 883 OutputArray labels=noArray(),
joeverbout 0:ea44dc9ed014 884 OutputArray probs=noArray()) = 0;
joeverbout 0:ea44dc9ed014 885
joeverbout 0:ea44dc9ed014 886 /** @brief Estimate the Gaussian mixture parameters from a samples set.
joeverbout 0:ea44dc9ed014 887
joeverbout 0:ea44dc9ed014 888 This variation starts with Maximization step. You need to provide initial probabilities
joeverbout 0:ea44dc9ed014 889 \f$p_{i,k}\f$ to use this option.
joeverbout 0:ea44dc9ed014 890
joeverbout 0:ea44dc9ed014 891 @param samples Samples from which the Gaussian mixture model will be estimated. It should be a
joeverbout 0:ea44dc9ed014 892 one-channel matrix, each row of which is a sample. If the matrix does not have CV_64F type
joeverbout 0:ea44dc9ed014 893 it will be converted to the inner matrix of such type for the further computing.
joeverbout 0:ea44dc9ed014 894 @param probs0
joeverbout 0:ea44dc9ed014 895 @param logLikelihoods The optional output matrix that contains a likelihood logarithm value for
joeverbout 0:ea44dc9ed014 896 each sample. It has \f$nsamples \times 1\f$ size and CV_64FC1 type.
joeverbout 0:ea44dc9ed014 897 @param labels The optional output "class label" for each sample:
joeverbout 0:ea44dc9ed014 898 \f$\texttt{labels}_i=\texttt{arg max}_k(p_{i,k}), i=1..N\f$ (indices of the most probable
joeverbout 0:ea44dc9ed014 899 mixture component for each sample). It has \f$nsamples \times 1\f$ size and CV_32SC1 type.
joeverbout 0:ea44dc9ed014 900 @param probs The optional output matrix that contains posterior probabilities of each Gaussian
joeverbout 0:ea44dc9ed014 901 mixture component given the each sample. It has \f$nsamples \times nclusters\f$ size and
joeverbout 0:ea44dc9ed014 902 CV_64FC1 type.
joeverbout 0:ea44dc9ed014 903 */
joeverbout 0:ea44dc9ed014 904 CV_WRAP virtual bool trainM(InputArray samples, InputArray probs0,
joeverbout 0:ea44dc9ed014 905 OutputArray logLikelihoods=noArray(),
joeverbout 0:ea44dc9ed014 906 OutputArray labels=noArray(),
joeverbout 0:ea44dc9ed014 907 OutputArray probs=noArray()) = 0;
joeverbout 0:ea44dc9ed014 908
joeverbout 0:ea44dc9ed014 909 /** Creates empty %EM model.
joeverbout 0:ea44dc9ed014 910 The model should be trained then using StatModel::train(traindata, flags) method. Alternatively, you
joeverbout 0:ea44dc9ed014 911 can use one of the EM::train\* methods or load it from file using Algorithm::load\<EM\>(filename).
joeverbout 0:ea44dc9ed014 912 */
joeverbout 0:ea44dc9ed014 913 CV_WRAP static Ptr<EM> create();
joeverbout 0:ea44dc9ed014 914 };
joeverbout 0:ea44dc9ed014 915
joeverbout 0:ea44dc9ed014 916 /****************************************************************************************\
joeverbout 0:ea44dc9ed014 917 * Decision Tree *
joeverbout 0:ea44dc9ed014 918 \****************************************************************************************/
joeverbout 0:ea44dc9ed014 919
joeverbout 0:ea44dc9ed014 920 /** @brief The class represents a single decision tree or a collection of decision trees.
joeverbout 0:ea44dc9ed014 921
joeverbout 0:ea44dc9ed014 922 The current public interface of the class allows user to train only a single decision tree, however
joeverbout 0:ea44dc9ed014 923 the class is capable of storing multiple decision trees and using them for prediction (by summing
joeverbout 0:ea44dc9ed014 924 responses or using a voting schemes), and the derived from DTrees classes (such as RTrees and Boost)
joeverbout 0:ea44dc9ed014 925 use this capability to implement decision tree ensembles.
joeverbout 0:ea44dc9ed014 926
joeverbout 0:ea44dc9ed014 927 @sa @ref ml_intro_trees
joeverbout 0:ea44dc9ed014 928 */
joeverbout 0:ea44dc9ed014 929 class CV_EXPORTS_W DTrees : public StatModel
joeverbout 0:ea44dc9ed014 930 {
joeverbout 0:ea44dc9ed014 931 public:
joeverbout 0:ea44dc9ed014 932 /** Predict options */
joeverbout 0:ea44dc9ed014 933 enum Flags { PREDICT_AUTO=0, PREDICT_SUM=(1<<8), PREDICT_MAX_VOTE=(2<<8), PREDICT_MASK=(3<<8) };
joeverbout 0:ea44dc9ed014 934
joeverbout 0:ea44dc9ed014 935 /** Cluster possible values of a categorical variable into K\<=maxCategories clusters to
joeverbout 0:ea44dc9ed014 936 find a suboptimal split.
joeverbout 0:ea44dc9ed014 937 If a discrete variable, on which the training procedure tries to make a split, takes more than
joeverbout 0:ea44dc9ed014 938 maxCategories values, the precise best subset estimation may take a very long time because the
joeverbout 0:ea44dc9ed014 939 algorithm is exponential. Instead, many decision trees engines (including our implementation)
joeverbout 0:ea44dc9ed014 940 try to find sub-optimal split in this case by clustering all the samples into maxCategories
joeverbout 0:ea44dc9ed014 941 clusters that is some categories are merged together. The clustering is applied only in n \>
joeverbout 0:ea44dc9ed014 942 2-class classification problems for categorical variables with N \> max_categories possible
joeverbout 0:ea44dc9ed014 943 values. In case of regression and 2-class classification the optimal split can be found
joeverbout 0:ea44dc9ed014 944 efficiently without employing clustering, thus the parameter is not used in these cases.
joeverbout 0:ea44dc9ed014 945 Default value is 10.*/
joeverbout 0:ea44dc9ed014 946 /** @see setMaxCategories */
joeverbout 0:ea44dc9ed014 947 CV_WRAP virtual int getMaxCategories() const = 0;
joeverbout 0:ea44dc9ed014 948 /** @copybrief getMaxCategories @see getMaxCategories */
joeverbout 0:ea44dc9ed014 949 CV_WRAP virtual void setMaxCategories(int val) = 0;
joeverbout 0:ea44dc9ed014 950
joeverbout 0:ea44dc9ed014 951 /** The maximum possible depth of the tree.
joeverbout 0:ea44dc9ed014 952 That is the training algorithms attempts to split a node while its depth is less than maxDepth.
joeverbout 0:ea44dc9ed014 953 The root node has zero depth. The actual depth may be smaller if the other termination criteria
joeverbout 0:ea44dc9ed014 954 are met (see the outline of the training procedure @ref ml_intro_trees "here"), and/or if the
joeverbout 0:ea44dc9ed014 955 tree is pruned. Default value is INT_MAX.*/
joeverbout 0:ea44dc9ed014 956 /** @see setMaxDepth */
joeverbout 0:ea44dc9ed014 957 CV_WRAP virtual int getMaxDepth() const = 0;
joeverbout 0:ea44dc9ed014 958 /** @copybrief getMaxDepth @see getMaxDepth */
joeverbout 0:ea44dc9ed014 959 CV_WRAP virtual void setMaxDepth(int val) = 0;
joeverbout 0:ea44dc9ed014 960
joeverbout 0:ea44dc9ed014 961 /** If the number of samples in a node is less than this parameter then the node will not be split.
joeverbout 0:ea44dc9ed014 962
joeverbout 0:ea44dc9ed014 963 Default value is 10.*/
joeverbout 0:ea44dc9ed014 964 /** @see setMinSampleCount */
joeverbout 0:ea44dc9ed014 965 CV_WRAP virtual int getMinSampleCount() const = 0;
joeverbout 0:ea44dc9ed014 966 /** @copybrief getMinSampleCount @see getMinSampleCount */
joeverbout 0:ea44dc9ed014 967 CV_WRAP virtual void setMinSampleCount(int val) = 0;
joeverbout 0:ea44dc9ed014 968
joeverbout 0:ea44dc9ed014 969 /** If CVFolds \> 1 then algorithms prunes the built decision tree using K-fold
joeverbout 0:ea44dc9ed014 970 cross-validation procedure where K is equal to CVFolds.
joeverbout 0:ea44dc9ed014 971 Default value is 10.*/
joeverbout 0:ea44dc9ed014 972 /** @see setCVFolds */
joeverbout 0:ea44dc9ed014 973 CV_WRAP virtual int getCVFolds() const = 0;
joeverbout 0:ea44dc9ed014 974 /** @copybrief getCVFolds @see getCVFolds */
joeverbout 0:ea44dc9ed014 975 CV_WRAP virtual void setCVFolds(int val) = 0;
joeverbout 0:ea44dc9ed014 976
joeverbout 0:ea44dc9ed014 977 /** If true then surrogate splits will be built.
joeverbout 0:ea44dc9ed014 978 These splits allow to work with missing data and compute variable importance correctly.
joeverbout 0:ea44dc9ed014 979 Default value is false.
joeverbout 0:ea44dc9ed014 980 @note currently it's not implemented.*/
joeverbout 0:ea44dc9ed014 981 /** @see setUseSurrogates */
joeverbout 0:ea44dc9ed014 982 CV_WRAP virtual bool getUseSurrogates() const = 0;
joeverbout 0:ea44dc9ed014 983 /** @copybrief getUseSurrogates @see getUseSurrogates */
joeverbout 0:ea44dc9ed014 984 CV_WRAP virtual void setUseSurrogates(bool val) = 0;
joeverbout 0:ea44dc9ed014 985
joeverbout 0:ea44dc9ed014 986 /** If true then a pruning will be harsher.
joeverbout 0:ea44dc9ed014 987 This will make a tree more compact and more resistant to the training data noise but a bit less
joeverbout 0:ea44dc9ed014 988 accurate. Default value is true.*/
joeverbout 0:ea44dc9ed014 989 /** @see setUse1SERule */
joeverbout 0:ea44dc9ed014 990 CV_WRAP virtual bool getUse1SERule() const = 0;
joeverbout 0:ea44dc9ed014 991 /** @copybrief getUse1SERule @see getUse1SERule */
joeverbout 0:ea44dc9ed014 992 CV_WRAP virtual void setUse1SERule(bool val) = 0;
joeverbout 0:ea44dc9ed014 993
joeverbout 0:ea44dc9ed014 994 /** If true then pruned branches are physically removed from the tree.
joeverbout 0:ea44dc9ed014 995 Otherwise they are retained and it is possible to get results from the original unpruned (or
joeverbout 0:ea44dc9ed014 996 pruned less aggressively) tree. Default value is true.*/
joeverbout 0:ea44dc9ed014 997 /** @see setTruncatePrunedTree */
joeverbout 0:ea44dc9ed014 998 CV_WRAP virtual bool getTruncatePrunedTree() const = 0;
joeverbout 0:ea44dc9ed014 999 /** @copybrief getTruncatePrunedTree @see getTruncatePrunedTree */
joeverbout 0:ea44dc9ed014 1000 CV_WRAP virtual void setTruncatePrunedTree(bool val) = 0;
joeverbout 0:ea44dc9ed014 1001
joeverbout 0:ea44dc9ed014 1002 /** Termination criteria for regression trees.
joeverbout 0:ea44dc9ed014 1003 If all absolute differences between an estimated value in a node and values of train samples
joeverbout 0:ea44dc9ed014 1004 in this node are less than this parameter then the node will not be split further. Default
joeverbout 0:ea44dc9ed014 1005 value is 0.01f*/
joeverbout 0:ea44dc9ed014 1006 /** @see setRegressionAccuracy */
joeverbout 0:ea44dc9ed014 1007 CV_WRAP virtual float getRegressionAccuracy() const = 0;
joeverbout 0:ea44dc9ed014 1008 /** @copybrief getRegressionAccuracy @see getRegressionAccuracy */
joeverbout 0:ea44dc9ed014 1009 CV_WRAP virtual void setRegressionAccuracy(float val) = 0;
joeverbout 0:ea44dc9ed014 1010
joeverbout 0:ea44dc9ed014 1011 /** @brief The array of a priori class probabilities, sorted by the class label value.
joeverbout 0:ea44dc9ed014 1012
joeverbout 0:ea44dc9ed014 1013 The parameter can be used to tune the decision tree preferences toward a certain class. For
joeverbout 0:ea44dc9ed014 1014 example, if you want to detect some rare anomaly occurrence, the training base will likely
joeverbout 0:ea44dc9ed014 1015 contain much more normal cases than anomalies, so a very good classification performance
joeverbout 0:ea44dc9ed014 1016 will be achieved just by considering every case as normal. To avoid this, the priors can be
joeverbout 0:ea44dc9ed014 1017 specified, where the anomaly probability is artificially increased (up to 0.5 or even
joeverbout 0:ea44dc9ed014 1018 greater), so the weight of the misclassified anomalies becomes much bigger, and the tree is
joeverbout 0:ea44dc9ed014 1019 adjusted properly.
joeverbout 0:ea44dc9ed014 1020
joeverbout 0:ea44dc9ed014 1021 You can also think about this parameter as weights of prediction categories which determine
joeverbout 0:ea44dc9ed014 1022 relative weights that you give to misclassification. That is, if the weight of the first
joeverbout 0:ea44dc9ed014 1023 category is 1 and the weight of the second category is 10, then each mistake in predicting
joeverbout 0:ea44dc9ed014 1024 the second category is equivalent to making 10 mistakes in predicting the first category.
joeverbout 0:ea44dc9ed014 1025 Default value is empty Mat.*/
joeverbout 0:ea44dc9ed014 1026 /** @see setPriors */
joeverbout 0:ea44dc9ed014 1027 CV_WRAP virtual cv::Mat getPriors() const = 0;
joeverbout 0:ea44dc9ed014 1028 /** @copybrief getPriors @see getPriors */
joeverbout 0:ea44dc9ed014 1029 CV_WRAP virtual void setPriors(const cv::Mat &val) = 0;
joeverbout 0:ea44dc9ed014 1030
joeverbout 0:ea44dc9ed014 1031 /** @brief The class represents a decision tree node.
joeverbout 0:ea44dc9ed014 1032 */
joeverbout 0:ea44dc9ed014 1033 class CV_EXPORTS Node
joeverbout 0:ea44dc9ed014 1034 {
joeverbout 0:ea44dc9ed014 1035 public:
joeverbout 0:ea44dc9ed014 1036 Node();
joeverbout 0:ea44dc9ed014 1037 double value; //!< Value at the node: a class label in case of classification or estimated
joeverbout 0:ea44dc9ed014 1038 //!< function value in case of regression.
joeverbout 0:ea44dc9ed014 1039 int classIdx; //!< Class index normalized to 0..class_count-1 range and assigned to the
joeverbout 0:ea44dc9ed014 1040 //!< node. It is used internally in classification trees and tree ensembles.
joeverbout 0:ea44dc9ed014 1041 int parent; //!< Index of the parent node
joeverbout 0:ea44dc9ed014 1042 int left; //!< Index of the left child node
joeverbout 0:ea44dc9ed014 1043 int right; //!< Index of right child node
joeverbout 0:ea44dc9ed014 1044 int defaultDir; //!< Default direction where to go (-1: left or +1: right). It helps in the
joeverbout 0:ea44dc9ed014 1045 //!< case of missing values.
joeverbout 0:ea44dc9ed014 1046 int split; //!< Index of the first split
joeverbout 0:ea44dc9ed014 1047 };
joeverbout 0:ea44dc9ed014 1048
joeverbout 0:ea44dc9ed014 1049 /** @brief The class represents split in a decision tree.
joeverbout 0:ea44dc9ed014 1050 */
joeverbout 0:ea44dc9ed014 1051 class CV_EXPORTS Split
joeverbout 0:ea44dc9ed014 1052 {
joeverbout 0:ea44dc9ed014 1053 public:
joeverbout 0:ea44dc9ed014 1054 Split();
joeverbout 0:ea44dc9ed014 1055 int varIdx; //!< Index of variable on which the split is created.
joeverbout 0:ea44dc9ed014 1056 bool inversed; //!< If true, then the inverse split rule is used (i.e. left and right
joeverbout 0:ea44dc9ed014 1057 //!< branches are exchanged in the rule expressions below).
joeverbout 0:ea44dc9ed014 1058 float quality; //!< The split quality, a positive number. It is used to choose the best split.
joeverbout 0:ea44dc9ed014 1059 int next; //!< Index of the next split in the list of splits for the node
joeverbout 0:ea44dc9ed014 1060 float c; /**< The threshold value in case of split on an ordered variable.
joeverbout 0:ea44dc9ed014 1061 The rule is:
joeverbout 0:ea44dc9ed014 1062 @code{.none}
joeverbout 0:ea44dc9ed014 1063 if var_value < c
joeverbout 0:ea44dc9ed014 1064 then next_node <- left
joeverbout 0:ea44dc9ed014 1065 else next_node <- right
joeverbout 0:ea44dc9ed014 1066 @endcode */
joeverbout 0:ea44dc9ed014 1067 int subsetOfs; /**< Offset of the bitset used by the split on a categorical variable.
joeverbout 0:ea44dc9ed014 1068 The rule is:
joeverbout 0:ea44dc9ed014 1069 @code{.none}
joeverbout 0:ea44dc9ed014 1070 if bitset[var_value] == 1
joeverbout 0:ea44dc9ed014 1071 then next_node <- left
joeverbout 0:ea44dc9ed014 1072 else next_node <- right
joeverbout 0:ea44dc9ed014 1073 @endcode */
joeverbout 0:ea44dc9ed014 1074 };
joeverbout 0:ea44dc9ed014 1075
joeverbout 0:ea44dc9ed014 1076 /** @brief Returns indices of root nodes
joeverbout 0:ea44dc9ed014 1077 */
joeverbout 0:ea44dc9ed014 1078 virtual const std::vector<int>& getRoots() const = 0;
joeverbout 0:ea44dc9ed014 1079 /** @brief Returns all the nodes
joeverbout 0:ea44dc9ed014 1080
joeverbout 0:ea44dc9ed014 1081 all the node indices are indices in the returned vector
joeverbout 0:ea44dc9ed014 1082 */
joeverbout 0:ea44dc9ed014 1083 virtual const std::vector<Node>& getNodes() const = 0;
joeverbout 0:ea44dc9ed014 1084 /** @brief Returns all the splits
joeverbout 0:ea44dc9ed014 1085
joeverbout 0:ea44dc9ed014 1086 all the split indices are indices in the returned vector
joeverbout 0:ea44dc9ed014 1087 */
joeverbout 0:ea44dc9ed014 1088 virtual const std::vector<Split>& getSplits() const = 0;
joeverbout 0:ea44dc9ed014 1089 /** @brief Returns all the bitsets for categorical splits
joeverbout 0:ea44dc9ed014 1090
joeverbout 0:ea44dc9ed014 1091 Split::subsetOfs is an offset in the returned vector
joeverbout 0:ea44dc9ed014 1092 */
joeverbout 0:ea44dc9ed014 1093 virtual const std::vector<int>& getSubsets() const = 0;
joeverbout 0:ea44dc9ed014 1094
joeverbout 0:ea44dc9ed014 1095 /** @brief Creates the empty model
joeverbout 0:ea44dc9ed014 1096
joeverbout 0:ea44dc9ed014 1097 The static method creates empty decision tree with the specified parameters. It should be then
joeverbout 0:ea44dc9ed014 1098 trained using train method (see StatModel::train). Alternatively, you can load the model from
joeverbout 0:ea44dc9ed014 1099 file using Algorithm::load\<DTrees\>(filename).
joeverbout 0:ea44dc9ed014 1100 */
joeverbout 0:ea44dc9ed014 1101 CV_WRAP static Ptr<DTrees> create();
joeverbout 0:ea44dc9ed014 1102 };
joeverbout 0:ea44dc9ed014 1103
joeverbout 0:ea44dc9ed014 1104 /****************************************************************************************\
joeverbout 0:ea44dc9ed014 1105 * Random Trees Classifier *
joeverbout 0:ea44dc9ed014 1106 \****************************************************************************************/
joeverbout 0:ea44dc9ed014 1107
joeverbout 0:ea44dc9ed014 1108 /** @brief The class implements the random forest predictor.
joeverbout 0:ea44dc9ed014 1109
joeverbout 0:ea44dc9ed014 1110 @sa @ref ml_intro_rtrees
joeverbout 0:ea44dc9ed014 1111 */
joeverbout 0:ea44dc9ed014 1112 class CV_EXPORTS_W RTrees : public DTrees
joeverbout 0:ea44dc9ed014 1113 {
joeverbout 0:ea44dc9ed014 1114 public:
joeverbout 0:ea44dc9ed014 1115
joeverbout 0:ea44dc9ed014 1116 /** If true then variable importance will be calculated and then it can be retrieved by RTrees::getVarImportance.
joeverbout 0:ea44dc9ed014 1117 Default value is false.*/
joeverbout 0:ea44dc9ed014 1118 /** @see setCalculateVarImportance */
joeverbout 0:ea44dc9ed014 1119 CV_WRAP virtual bool getCalculateVarImportance() const = 0;
joeverbout 0:ea44dc9ed014 1120 /** @copybrief getCalculateVarImportance @see getCalculateVarImportance */
joeverbout 0:ea44dc9ed014 1121 CV_WRAP virtual void setCalculateVarImportance(bool val) = 0;
joeverbout 0:ea44dc9ed014 1122
joeverbout 0:ea44dc9ed014 1123 /** The size of the randomly selected subset of features at each tree node and that are used
joeverbout 0:ea44dc9ed014 1124 to find the best split(s).
joeverbout 0:ea44dc9ed014 1125 If you set it to 0 then the size will be set to the square root of the total number of
joeverbout 0:ea44dc9ed014 1126 features. Default value is 0.*/
joeverbout 0:ea44dc9ed014 1127 /** @see setActiveVarCount */
joeverbout 0:ea44dc9ed014 1128 CV_WRAP virtual int getActiveVarCount() const = 0;
joeverbout 0:ea44dc9ed014 1129 /** @copybrief getActiveVarCount @see getActiveVarCount */
joeverbout 0:ea44dc9ed014 1130 CV_WRAP virtual void setActiveVarCount(int val) = 0;
joeverbout 0:ea44dc9ed014 1131
joeverbout 0:ea44dc9ed014 1132 /** The termination criteria that specifies when the training algorithm stops.
joeverbout 0:ea44dc9ed014 1133 Either when the specified number of trees is trained and added to the ensemble or when
joeverbout 0:ea44dc9ed014 1134 sufficient accuracy (measured as OOB error) is achieved. Typically the more trees you have the
joeverbout 0:ea44dc9ed014 1135 better the accuracy. However, the improvement in accuracy generally diminishes and asymptotes
joeverbout 0:ea44dc9ed014 1136 pass a certain number of trees. Also to keep in mind, the number of tree increases the
joeverbout 0:ea44dc9ed014 1137 prediction time linearly. Default value is TermCriteria(TermCriteria::MAX_ITERS +
joeverbout 0:ea44dc9ed014 1138 TermCriteria::EPS, 50, 0.1)*/
joeverbout 0:ea44dc9ed014 1139 /** @see setTermCriteria */
joeverbout 0:ea44dc9ed014 1140 CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
joeverbout 0:ea44dc9ed014 1141 /** @copybrief getTermCriteria @see getTermCriteria */
joeverbout 0:ea44dc9ed014 1142 CV_WRAP virtual void setTermCriteria(const TermCriteria &val) = 0;
joeverbout 0:ea44dc9ed014 1143
joeverbout 0:ea44dc9ed014 1144 /** Returns the variable importance array.
joeverbout 0:ea44dc9ed014 1145 The method returns the variable importance vector, computed at the training stage when
joeverbout 0:ea44dc9ed014 1146 CalculateVarImportance is set to true. If this flag was set to false, the empty matrix is
joeverbout 0:ea44dc9ed014 1147 returned.
joeverbout 0:ea44dc9ed014 1148 */
joeverbout 0:ea44dc9ed014 1149 CV_WRAP virtual Mat getVarImportance() const = 0;
joeverbout 0:ea44dc9ed014 1150
joeverbout 0:ea44dc9ed014 1151 /** Creates the empty model.
joeverbout 0:ea44dc9ed014 1152 Use StatModel::train to train the model, StatModel::train to create and train the model,
joeverbout 0:ea44dc9ed014 1153 Algorithm::load to load the pre-trained model.
joeverbout 0:ea44dc9ed014 1154 */
joeverbout 0:ea44dc9ed014 1155 CV_WRAP static Ptr<RTrees> create();
joeverbout 0:ea44dc9ed014 1156 };
joeverbout 0:ea44dc9ed014 1157
joeverbout 0:ea44dc9ed014 1158 /****************************************************************************************\
joeverbout 0:ea44dc9ed014 1159 * Boosted tree classifier *
joeverbout 0:ea44dc9ed014 1160 \****************************************************************************************/
joeverbout 0:ea44dc9ed014 1161
joeverbout 0:ea44dc9ed014 1162 /** @brief Boosted tree classifier derived from DTrees
joeverbout 0:ea44dc9ed014 1163
joeverbout 0:ea44dc9ed014 1164 @sa @ref ml_intro_boost
joeverbout 0:ea44dc9ed014 1165 */
joeverbout 0:ea44dc9ed014 1166 class CV_EXPORTS_W Boost : public DTrees
joeverbout 0:ea44dc9ed014 1167 {
joeverbout 0:ea44dc9ed014 1168 public:
joeverbout 0:ea44dc9ed014 1169 /** Type of the boosting algorithm.
joeverbout 0:ea44dc9ed014 1170 See Boost::Types. Default value is Boost::REAL. */
joeverbout 0:ea44dc9ed014 1171 /** @see setBoostType */
joeverbout 0:ea44dc9ed014 1172 CV_WRAP virtual int getBoostType() const = 0;
joeverbout 0:ea44dc9ed014 1173 /** @copybrief getBoostType @see getBoostType */
joeverbout 0:ea44dc9ed014 1174 CV_WRAP virtual void setBoostType(int val) = 0;
joeverbout 0:ea44dc9ed014 1175
joeverbout 0:ea44dc9ed014 1176 /** The number of weak classifiers.
joeverbout 0:ea44dc9ed014 1177 Default value is 100. */
joeverbout 0:ea44dc9ed014 1178 /** @see setWeakCount */
joeverbout 0:ea44dc9ed014 1179 CV_WRAP virtual int getWeakCount() const = 0;
joeverbout 0:ea44dc9ed014 1180 /** @copybrief getWeakCount @see getWeakCount */
joeverbout 0:ea44dc9ed014 1181 CV_WRAP virtual void setWeakCount(int val) = 0;
joeverbout 0:ea44dc9ed014 1182
joeverbout 0:ea44dc9ed014 1183 /** A threshold between 0 and 1 used to save computational time.
joeverbout 0:ea44dc9ed014 1184 Samples with summary weight \f$\leq 1 - weight_trim_rate\f$ do not participate in the *next*
joeverbout 0:ea44dc9ed014 1185 iteration of training. Set this parameter to 0 to turn off this functionality. Default value is 0.95.*/
joeverbout 0:ea44dc9ed014 1186 /** @see setWeightTrimRate */
joeverbout 0:ea44dc9ed014 1187 CV_WRAP virtual double getWeightTrimRate() const = 0;
joeverbout 0:ea44dc9ed014 1188 /** @copybrief getWeightTrimRate @see getWeightTrimRate */
joeverbout 0:ea44dc9ed014 1189 CV_WRAP virtual void setWeightTrimRate(double val) = 0;
joeverbout 0:ea44dc9ed014 1190
joeverbout 0:ea44dc9ed014 1191 /** Boosting type.
joeverbout 0:ea44dc9ed014 1192 Gentle AdaBoost and Real AdaBoost are often the preferable choices. */
joeverbout 0:ea44dc9ed014 1193 enum Types {
joeverbout 0:ea44dc9ed014 1194 DISCRETE=0, //!< Discrete AdaBoost.
joeverbout 0:ea44dc9ed014 1195 REAL=1, //!< Real AdaBoost. It is a technique that utilizes confidence-rated predictions
joeverbout 0:ea44dc9ed014 1196 //!< and works well with categorical data.
joeverbout 0:ea44dc9ed014 1197 LOGIT=2, //!< LogitBoost. It can produce good regression fits.
joeverbout 0:ea44dc9ed014 1198 GENTLE=3 //!< Gentle AdaBoost. It puts less weight on outlier data points and for that
joeverbout 0:ea44dc9ed014 1199 //!<reason is often good with regression data.
joeverbout 0:ea44dc9ed014 1200 };
joeverbout 0:ea44dc9ed014 1201
joeverbout 0:ea44dc9ed014 1202 /** Creates the empty model.
joeverbout 0:ea44dc9ed014 1203 Use StatModel::train to train the model, Algorithm::load\<Boost\>(filename) to load the pre-trained model. */
joeverbout 0:ea44dc9ed014 1204 CV_WRAP static Ptr<Boost> create();
joeverbout 0:ea44dc9ed014 1205 };
joeverbout 0:ea44dc9ed014 1206
joeverbout 0:ea44dc9ed014 1207 /****************************************************************************************\
joeverbout 0:ea44dc9ed014 1208 * Gradient Boosted Trees *
joeverbout 0:ea44dc9ed014 1209 \****************************************************************************************/
joeverbout 0:ea44dc9ed014 1210
joeverbout 0:ea44dc9ed014 1211 /*class CV_EXPORTS_W GBTrees : public DTrees
joeverbout 0:ea44dc9ed014 1212 {
joeverbout 0:ea44dc9ed014 1213 public:
joeverbout 0:ea44dc9ed014 1214 struct CV_EXPORTS_W_MAP Params : public DTrees::Params
joeverbout 0:ea44dc9ed014 1215 {
joeverbout 0:ea44dc9ed014 1216 CV_PROP_RW int weakCount;
joeverbout 0:ea44dc9ed014 1217 CV_PROP_RW int lossFunctionType;
joeverbout 0:ea44dc9ed014 1218 CV_PROP_RW float subsamplePortion;
joeverbout 0:ea44dc9ed014 1219 CV_PROP_RW float shrinkage;
joeverbout 0:ea44dc9ed014 1220
joeverbout 0:ea44dc9ed014 1221 Params();
joeverbout 0:ea44dc9ed014 1222 Params( int lossFunctionType, int weakCount, float shrinkage,
joeverbout 0:ea44dc9ed014 1223 float subsamplePortion, int maxDepth, bool useSurrogates );
joeverbout 0:ea44dc9ed014 1224 };
joeverbout 0:ea44dc9ed014 1225
joeverbout 0:ea44dc9ed014 1226 enum {SQUARED_LOSS=0, ABSOLUTE_LOSS, HUBER_LOSS=3, DEVIANCE_LOSS};
joeverbout 0:ea44dc9ed014 1227
joeverbout 0:ea44dc9ed014 1228 virtual void setK(int k) = 0;
joeverbout 0:ea44dc9ed014 1229
joeverbout 0:ea44dc9ed014 1230 virtual float predictSerial( InputArray samples,
joeverbout 0:ea44dc9ed014 1231 OutputArray weakResponses, int flags) const = 0;
joeverbout 0:ea44dc9ed014 1232
joeverbout 0:ea44dc9ed014 1233 static Ptr<GBTrees> create(const Params& p);
joeverbout 0:ea44dc9ed014 1234 };*/
joeverbout 0:ea44dc9ed014 1235
joeverbout 0:ea44dc9ed014 1236 /****************************************************************************************\
joeverbout 0:ea44dc9ed014 1237 * Artificial Neural Networks (ANN) *
joeverbout 0:ea44dc9ed014 1238 \****************************************************************************************/
joeverbout 0:ea44dc9ed014 1239
joeverbout 0:ea44dc9ed014 1240 /////////////////////////////////// Multi-Layer Perceptrons //////////////////////////////
joeverbout 0:ea44dc9ed014 1241
joeverbout 0:ea44dc9ed014 1242 /** @brief Artificial Neural Networks - Multi-Layer Perceptrons.
joeverbout 0:ea44dc9ed014 1243
joeverbout 0:ea44dc9ed014 1244 Unlike many other models in ML that are constructed and trained at once, in the MLP model these
joeverbout 0:ea44dc9ed014 1245 steps are separated. First, a network with the specified topology is created using the non-default
joeverbout 0:ea44dc9ed014 1246 constructor or the method ANN_MLP::create. All the weights are set to zeros. Then, the network is
joeverbout 0:ea44dc9ed014 1247 trained using a set of input and output vectors. The training procedure can be repeated more than
joeverbout 0:ea44dc9ed014 1248 once, that is, the weights can be adjusted based on the new training data.
joeverbout 0:ea44dc9ed014 1249
joeverbout 0:ea44dc9ed014 1250 Additional flags for StatModel::train are available: ANN_MLP::TrainFlags.
joeverbout 0:ea44dc9ed014 1251
joeverbout 0:ea44dc9ed014 1252 @sa @ref ml_intro_ann
joeverbout 0:ea44dc9ed014 1253 */
joeverbout 0:ea44dc9ed014 1254 class CV_EXPORTS_W ANN_MLP : public StatModel
joeverbout 0:ea44dc9ed014 1255 {
joeverbout 0:ea44dc9ed014 1256 public:
joeverbout 0:ea44dc9ed014 1257 /** Available training methods */
joeverbout 0:ea44dc9ed014 1258 enum TrainingMethods {
joeverbout 0:ea44dc9ed014 1259 BACKPROP=0, //!< The back-propagation algorithm.
joeverbout 0:ea44dc9ed014 1260 RPROP=1 //!< The RPROP algorithm. See @cite RPROP93 for details.
joeverbout 0:ea44dc9ed014 1261 };
joeverbout 0:ea44dc9ed014 1262
joeverbout 0:ea44dc9ed014 1263 /** Sets training method and common parameters.
joeverbout 0:ea44dc9ed014 1264 @param method Default value is ANN_MLP::RPROP. See ANN_MLP::TrainingMethods.
joeverbout 0:ea44dc9ed014 1265 @param param1 passed to setRpropDW0 for ANN_MLP::RPROP and to setBackpropWeightScale for ANN_MLP::BACKPROP
joeverbout 0:ea44dc9ed014 1266 @param param2 passed to setRpropDWMin for ANN_MLP::RPROP and to setBackpropMomentumScale for ANN_MLP::BACKPROP.
joeverbout 0:ea44dc9ed014 1267 */
joeverbout 0:ea44dc9ed014 1268 CV_WRAP virtual void setTrainMethod(int method, double param1 = 0, double param2 = 0) = 0;
joeverbout 0:ea44dc9ed014 1269
joeverbout 0:ea44dc9ed014 1270 /** Returns current training method */
joeverbout 0:ea44dc9ed014 1271 CV_WRAP virtual int getTrainMethod() const = 0;
joeverbout 0:ea44dc9ed014 1272
joeverbout 0:ea44dc9ed014 1273 /** Initialize the activation function for each neuron.
joeverbout 0:ea44dc9ed014 1274 Currently the default and the only fully supported activation function is ANN_MLP::SIGMOID_SYM.
joeverbout 0:ea44dc9ed014 1275 @param type The type of activation function. See ANN_MLP::ActivationFunctions.
joeverbout 0:ea44dc9ed014 1276 @param param1 The first parameter of the activation function, \f$\alpha\f$. Default value is 0.
joeverbout 0:ea44dc9ed014 1277 @param param2 The second parameter of the activation function, \f$\beta\f$. Default value is 0.
joeverbout 0:ea44dc9ed014 1278 */
joeverbout 0:ea44dc9ed014 1279 CV_WRAP virtual void setActivationFunction(int type, double param1 = 0, double param2 = 0) = 0;
joeverbout 0:ea44dc9ed014 1280
joeverbout 0:ea44dc9ed014 1281 /** Integer vector specifying the number of neurons in each layer including the input and output layers.
joeverbout 0:ea44dc9ed014 1282 The very first element specifies the number of elements in the input layer.
joeverbout 0:ea44dc9ed014 1283 The last element - number of elements in the output layer. Default value is empty Mat.
joeverbout 0:ea44dc9ed014 1284 @sa getLayerSizes */
joeverbout 0:ea44dc9ed014 1285 CV_WRAP virtual void setLayerSizes(InputArray _layer_sizes) = 0;
joeverbout 0:ea44dc9ed014 1286
joeverbout 0:ea44dc9ed014 1287 /** Integer vector specifying the number of neurons in each layer including the input and output layers.
joeverbout 0:ea44dc9ed014 1288 The very first element specifies the number of elements in the input layer.
joeverbout 0:ea44dc9ed014 1289 The last element - number of elements in the output layer.
joeverbout 0:ea44dc9ed014 1290 @sa setLayerSizes */
joeverbout 0:ea44dc9ed014 1291 CV_WRAP virtual cv::Mat getLayerSizes() const = 0;
joeverbout 0:ea44dc9ed014 1292
joeverbout 0:ea44dc9ed014 1293 /** Termination criteria of the training algorithm.
joeverbout 0:ea44dc9ed014 1294 You can specify the maximum number of iterations (maxCount) and/or how much the error could
joeverbout 0:ea44dc9ed014 1295 change between the iterations to make the algorithm continue (epsilon). Default value is
joeverbout 0:ea44dc9ed014 1296 TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 0.01).*/
joeverbout 0:ea44dc9ed014 1297 /** @see setTermCriteria */
joeverbout 0:ea44dc9ed014 1298 CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
joeverbout 0:ea44dc9ed014 1299 /** @copybrief getTermCriteria @see getTermCriteria */
joeverbout 0:ea44dc9ed014 1300 CV_WRAP virtual void setTermCriteria(TermCriteria val) = 0;
joeverbout 0:ea44dc9ed014 1301
joeverbout 0:ea44dc9ed014 1302 /** BPROP: Strength of the weight gradient term.
joeverbout 0:ea44dc9ed014 1303 The recommended value is about 0.1. Default value is 0.1.*/
joeverbout 0:ea44dc9ed014 1304 /** @see setBackpropWeightScale */
joeverbout 0:ea44dc9ed014 1305 CV_WRAP virtual double getBackpropWeightScale() const = 0;
joeverbout 0:ea44dc9ed014 1306 /** @copybrief getBackpropWeightScale @see getBackpropWeightScale */
joeverbout 0:ea44dc9ed014 1307 CV_WRAP virtual void setBackpropWeightScale(double val) = 0;
joeverbout 0:ea44dc9ed014 1308
joeverbout 0:ea44dc9ed014 1309 /** BPROP: Strength of the momentum term (the difference between weights on the 2 previous iterations).
joeverbout 0:ea44dc9ed014 1310 This parameter provides some inertia to smooth the random fluctuations of the weights. It can
joeverbout 0:ea44dc9ed014 1311 vary from 0 (the feature is disabled) to 1 and beyond. The value 0.1 or so is good enough.
joeverbout 0:ea44dc9ed014 1312 Default value is 0.1.*/
joeverbout 0:ea44dc9ed014 1313 /** @see setBackpropMomentumScale */
joeverbout 0:ea44dc9ed014 1314 CV_WRAP virtual double getBackpropMomentumScale() const = 0;
joeverbout 0:ea44dc9ed014 1315 /** @copybrief getBackpropMomentumScale @see getBackpropMomentumScale */
joeverbout 0:ea44dc9ed014 1316 CV_WRAP virtual void setBackpropMomentumScale(double val) = 0;
joeverbout 0:ea44dc9ed014 1317
joeverbout 0:ea44dc9ed014 1318 /** RPROP: Initial value \f$\Delta_0\f$ of update-values \f$\Delta_{ij}\f$.
joeverbout 0:ea44dc9ed014 1319 Default value is 0.1.*/
joeverbout 0:ea44dc9ed014 1320 /** @see setRpropDW0 */
joeverbout 0:ea44dc9ed014 1321 CV_WRAP virtual double getRpropDW0() const = 0;
joeverbout 0:ea44dc9ed014 1322 /** @copybrief getRpropDW0 @see getRpropDW0 */
joeverbout 0:ea44dc9ed014 1323 CV_WRAP virtual void setRpropDW0(double val) = 0;
joeverbout 0:ea44dc9ed014 1324
joeverbout 0:ea44dc9ed014 1325 /** RPROP: Increase factor \f$\eta^+\f$.
joeverbout 0:ea44dc9ed014 1326 It must be \>1. Default value is 1.2.*/
joeverbout 0:ea44dc9ed014 1327 /** @see setRpropDWPlus */
joeverbout 0:ea44dc9ed014 1328 CV_WRAP virtual double getRpropDWPlus() const = 0;
joeverbout 0:ea44dc9ed014 1329 /** @copybrief getRpropDWPlus @see getRpropDWPlus */
joeverbout 0:ea44dc9ed014 1330 CV_WRAP virtual void setRpropDWPlus(double val) = 0;
joeverbout 0:ea44dc9ed014 1331
joeverbout 0:ea44dc9ed014 1332 /** RPROP: Decrease factor \f$\eta^-\f$.
joeverbout 0:ea44dc9ed014 1333 It must be \<1. Default value is 0.5.*/
joeverbout 0:ea44dc9ed014 1334 /** @see setRpropDWMinus */
joeverbout 0:ea44dc9ed014 1335 CV_WRAP virtual double getRpropDWMinus() const = 0;
joeverbout 0:ea44dc9ed014 1336 /** @copybrief getRpropDWMinus @see getRpropDWMinus */
joeverbout 0:ea44dc9ed014 1337 CV_WRAP virtual void setRpropDWMinus(double val) = 0;
joeverbout 0:ea44dc9ed014 1338
joeverbout 0:ea44dc9ed014 1339 /** RPROP: Update-values lower limit \f$\Delta_{min}\f$.
joeverbout 0:ea44dc9ed014 1340 It must be positive. Default value is FLT_EPSILON.*/
joeverbout 0:ea44dc9ed014 1341 /** @see setRpropDWMin */
joeverbout 0:ea44dc9ed014 1342 CV_WRAP virtual double getRpropDWMin() const = 0;
joeverbout 0:ea44dc9ed014 1343 /** @copybrief getRpropDWMin @see getRpropDWMin */
joeverbout 0:ea44dc9ed014 1344 CV_WRAP virtual void setRpropDWMin(double val) = 0;
joeverbout 0:ea44dc9ed014 1345
joeverbout 0:ea44dc9ed014 1346 /** RPROP: Update-values upper limit \f$\Delta_{max}\f$.
joeverbout 0:ea44dc9ed014 1347 It must be \>1. Default value is 50.*/
joeverbout 0:ea44dc9ed014 1348 /** @see setRpropDWMax */
joeverbout 0:ea44dc9ed014 1349 CV_WRAP virtual double getRpropDWMax() const = 0;
joeverbout 0:ea44dc9ed014 1350 /** @copybrief getRpropDWMax @see getRpropDWMax */
joeverbout 0:ea44dc9ed014 1351 CV_WRAP virtual void setRpropDWMax(double val) = 0;
joeverbout 0:ea44dc9ed014 1352
joeverbout 0:ea44dc9ed014 1353 /** possible activation functions */
joeverbout 0:ea44dc9ed014 1354 enum ActivationFunctions {
joeverbout 0:ea44dc9ed014 1355 /** Identity function: \f$f(x)=x\f$ */
joeverbout 0:ea44dc9ed014 1356 IDENTITY = 0,
joeverbout 0:ea44dc9ed014 1357 /** Symmetrical sigmoid: \f$f(x)=\beta*(1-e^{-\alpha x})/(1+e^{-\alpha x}\f$
joeverbout 0:ea44dc9ed014 1358 @note
joeverbout 0:ea44dc9ed014 1359 If you are using the default sigmoid activation function with the default parameter values
joeverbout 0:ea44dc9ed014 1360 fparam1=0 and fparam2=0 then the function used is y = 1.7159\*tanh(2/3 \* x), so the output
joeverbout 0:ea44dc9ed014 1361 will range from [-1.7159, 1.7159], instead of [0,1].*/
joeverbout 0:ea44dc9ed014 1362 SIGMOID_SYM = 1,
joeverbout 0:ea44dc9ed014 1363 /** Gaussian function: \f$f(x)=\beta e^{-\alpha x*x}\f$ */
joeverbout 0:ea44dc9ed014 1364 GAUSSIAN = 2
joeverbout 0:ea44dc9ed014 1365 };
joeverbout 0:ea44dc9ed014 1366
joeverbout 0:ea44dc9ed014 1367 /** Train options */
joeverbout 0:ea44dc9ed014 1368 enum TrainFlags {
joeverbout 0:ea44dc9ed014 1369 /** Update the network weights, rather than compute them from scratch. In the latter case
joeverbout 0:ea44dc9ed014 1370 the weights are initialized using the Nguyen-Widrow algorithm. */
joeverbout 0:ea44dc9ed014 1371 UPDATE_WEIGHTS = 1,
joeverbout 0:ea44dc9ed014 1372 /** Do not normalize the input vectors. If this flag is not set, the training algorithm
joeverbout 0:ea44dc9ed014 1373 normalizes each input feature independently, shifting its mean value to 0 and making the
joeverbout 0:ea44dc9ed014 1374 standard deviation equal to 1. If the network is assumed to be updated frequently, the new
joeverbout 0:ea44dc9ed014 1375 training data could be much different from original one. In this case, you should take care
joeverbout 0:ea44dc9ed014 1376 of proper normalization. */
joeverbout 0:ea44dc9ed014 1377 NO_INPUT_SCALE = 2,
joeverbout 0:ea44dc9ed014 1378 /** Do not normalize the output vectors. If the flag is not set, the training algorithm
joeverbout 0:ea44dc9ed014 1379 normalizes each output feature independently, by transforming it to the certain range
joeverbout 0:ea44dc9ed014 1380 depending on the used activation function. */
joeverbout 0:ea44dc9ed014 1381 NO_OUTPUT_SCALE = 4
joeverbout 0:ea44dc9ed014 1382 };
joeverbout 0:ea44dc9ed014 1383
joeverbout 0:ea44dc9ed014 1384 CV_WRAP virtual Mat getWeights(int layerIdx) const = 0;
joeverbout 0:ea44dc9ed014 1385
joeverbout 0:ea44dc9ed014 1386 /** @brief Creates empty model
joeverbout 0:ea44dc9ed014 1387
joeverbout 0:ea44dc9ed014 1388 Use StatModel::train to train the model, Algorithm::load\<ANN_MLP\>(filename) to load the pre-trained model.
joeverbout 0:ea44dc9ed014 1389 Note that the train method has optional flags: ANN_MLP::TrainFlags.
joeverbout 0:ea44dc9ed014 1390 */
joeverbout 0:ea44dc9ed014 1391 CV_WRAP static Ptr<ANN_MLP> create();
joeverbout 0:ea44dc9ed014 1392 };
joeverbout 0:ea44dc9ed014 1393
joeverbout 0:ea44dc9ed014 1394 /****************************************************************************************\
joeverbout 0:ea44dc9ed014 1395 * Logistic Regression *
joeverbout 0:ea44dc9ed014 1396 \****************************************************************************************/
joeverbout 0:ea44dc9ed014 1397
joeverbout 0:ea44dc9ed014 1398 /** @brief Implements Logistic Regression classifier.
joeverbout 0:ea44dc9ed014 1399
joeverbout 0:ea44dc9ed014 1400 @sa @ref ml_intro_lr
joeverbout 0:ea44dc9ed014 1401 */
joeverbout 0:ea44dc9ed014 1402 class CV_EXPORTS_W LogisticRegression : public StatModel
joeverbout 0:ea44dc9ed014 1403 {
joeverbout 0:ea44dc9ed014 1404 public:
joeverbout 0:ea44dc9ed014 1405
joeverbout 0:ea44dc9ed014 1406 /** Learning rate. */
joeverbout 0:ea44dc9ed014 1407 /** @see setLearningRate */
joeverbout 0:ea44dc9ed014 1408 CV_WRAP virtual double getLearningRate() const = 0;
joeverbout 0:ea44dc9ed014 1409 /** @copybrief getLearningRate @see getLearningRate */
joeverbout 0:ea44dc9ed014 1410 CV_WRAP virtual void setLearningRate(double val) = 0;
joeverbout 0:ea44dc9ed014 1411
joeverbout 0:ea44dc9ed014 1412 /** Number of iterations. */
joeverbout 0:ea44dc9ed014 1413 /** @see setIterations */
joeverbout 0:ea44dc9ed014 1414 CV_WRAP virtual int getIterations() const = 0;
joeverbout 0:ea44dc9ed014 1415 /** @copybrief getIterations @see getIterations */
joeverbout 0:ea44dc9ed014 1416 CV_WRAP virtual void setIterations(int val) = 0;
joeverbout 0:ea44dc9ed014 1417
joeverbout 0:ea44dc9ed014 1418 /** Kind of regularization to be applied. See LogisticRegression::RegKinds. */
joeverbout 0:ea44dc9ed014 1419 /** @see setRegularization */
joeverbout 0:ea44dc9ed014 1420 CV_WRAP virtual int getRegularization() const = 0;
joeverbout 0:ea44dc9ed014 1421 /** @copybrief getRegularization @see getRegularization */
joeverbout 0:ea44dc9ed014 1422 CV_WRAP virtual void setRegularization(int val) = 0;
joeverbout 0:ea44dc9ed014 1423
joeverbout 0:ea44dc9ed014 1424 /** Kind of training method used. See LogisticRegression::Methods. */
joeverbout 0:ea44dc9ed014 1425 /** @see setTrainMethod */
joeverbout 0:ea44dc9ed014 1426 CV_WRAP virtual int getTrainMethod() const = 0;
joeverbout 0:ea44dc9ed014 1427 /** @copybrief getTrainMethod @see getTrainMethod */
joeverbout 0:ea44dc9ed014 1428 CV_WRAP virtual void setTrainMethod(int val) = 0;
joeverbout 0:ea44dc9ed014 1429
joeverbout 0:ea44dc9ed014 1430 /** Specifies the number of training samples taken in each step of Mini-Batch Gradient
joeverbout 0:ea44dc9ed014 1431 Descent. Will only be used if using LogisticRegression::MINI_BATCH training algorithm. It
joeverbout 0:ea44dc9ed014 1432 has to take values less than the total number of training samples. */
joeverbout 0:ea44dc9ed014 1433 /** @see setMiniBatchSize */
joeverbout 0:ea44dc9ed014 1434 CV_WRAP virtual int getMiniBatchSize() const = 0;
joeverbout 0:ea44dc9ed014 1435 /** @copybrief getMiniBatchSize @see getMiniBatchSize */
joeverbout 0:ea44dc9ed014 1436 CV_WRAP virtual void setMiniBatchSize(int val) = 0;
joeverbout 0:ea44dc9ed014 1437
joeverbout 0:ea44dc9ed014 1438 /** Termination criteria of the algorithm. */
joeverbout 0:ea44dc9ed014 1439 /** @see setTermCriteria */
joeverbout 0:ea44dc9ed014 1440 CV_WRAP virtual TermCriteria getTermCriteria() const = 0;
joeverbout 0:ea44dc9ed014 1441 /** @copybrief getTermCriteria @see getTermCriteria */
joeverbout 0:ea44dc9ed014 1442 CV_WRAP virtual void setTermCriteria(TermCriteria val) = 0;
joeverbout 0:ea44dc9ed014 1443
joeverbout 0:ea44dc9ed014 1444 //! Regularization kinds
joeverbout 0:ea44dc9ed014 1445 enum RegKinds {
joeverbout 0:ea44dc9ed014 1446 REG_DISABLE = -1, //!< Regularization disabled
joeverbout 0:ea44dc9ed014 1447 REG_L1 = 0, //!< %L1 norm
joeverbout 0:ea44dc9ed014 1448 REG_L2 = 1 //!< %L2 norm
joeverbout 0:ea44dc9ed014 1449 };
joeverbout 0:ea44dc9ed014 1450
joeverbout 0:ea44dc9ed014 1451 //! Training methods
joeverbout 0:ea44dc9ed014 1452 enum Methods {
joeverbout 0:ea44dc9ed014 1453 BATCH = 0,
joeverbout 0:ea44dc9ed014 1454 MINI_BATCH = 1 //!< Set MiniBatchSize to a positive integer when using this method.
joeverbout 0:ea44dc9ed014 1455 };
joeverbout 0:ea44dc9ed014 1456
joeverbout 0:ea44dc9ed014 1457 /** @brief Predicts responses for input samples and returns a float type.
joeverbout 0:ea44dc9ed014 1458
joeverbout 0:ea44dc9ed014 1459 @param samples The input data for the prediction algorithm. Matrix [m x n], where each row
joeverbout 0:ea44dc9ed014 1460 contains variables (features) of one object being classified. Should have data type CV_32F.
joeverbout 0:ea44dc9ed014 1461 @param results Predicted labels as a column matrix of type CV_32S.
joeverbout 0:ea44dc9ed014 1462 @param flags Not used.
joeverbout 0:ea44dc9ed014 1463 */
joeverbout 0:ea44dc9ed014 1464 CV_WRAP virtual float predict( InputArray samples, OutputArray results=noArray(), int flags=0 ) const = 0;
joeverbout 0:ea44dc9ed014 1465
joeverbout 0:ea44dc9ed014 1466 /** @brief This function returns the trained paramters arranged across rows.
joeverbout 0:ea44dc9ed014 1467
joeverbout 0:ea44dc9ed014 1468 For a two class classifcation problem, it returns a row matrix. It returns learnt paramters of
joeverbout 0:ea44dc9ed014 1469 the Logistic Regression as a matrix of type CV_32F.
joeverbout 0:ea44dc9ed014 1470 */
joeverbout 0:ea44dc9ed014 1471 CV_WRAP virtual Mat get_learnt_thetas() const = 0;
joeverbout 0:ea44dc9ed014 1472
joeverbout 0:ea44dc9ed014 1473 /** @brief Creates empty model.
joeverbout 0:ea44dc9ed014 1474
joeverbout 0:ea44dc9ed014 1475 Creates Logistic Regression model with parameters given.
joeverbout 0:ea44dc9ed014 1476 */
joeverbout 0:ea44dc9ed014 1477 CV_WRAP static Ptr<LogisticRegression> create();
joeverbout 0:ea44dc9ed014 1478 };
joeverbout 0:ea44dc9ed014 1479
joeverbout 0:ea44dc9ed014 1480 /****************************************************************************************\
joeverbout 0:ea44dc9ed014 1481 * Auxilary functions declarations *
joeverbout 0:ea44dc9ed014 1482 \****************************************************************************************/
joeverbout 0:ea44dc9ed014 1483
joeverbout 0:ea44dc9ed014 1484 /** @brief Generates _sample_ from multivariate normal distribution
joeverbout 0:ea44dc9ed014 1485
joeverbout 0:ea44dc9ed014 1486 @param mean an average row vector
joeverbout 0:ea44dc9ed014 1487 @param cov symmetric covariation matrix
joeverbout 0:ea44dc9ed014 1488 @param nsamples returned samples count
joeverbout 0:ea44dc9ed014 1489 @param samples returned samples array
joeverbout 0:ea44dc9ed014 1490 */
joeverbout 0:ea44dc9ed014 1491 CV_EXPORTS void randMVNormal( InputArray mean, InputArray cov, int nsamples, OutputArray samples);
joeverbout 0:ea44dc9ed014 1492
joeverbout 0:ea44dc9ed014 1493 /** @brief Creates test set */
joeverbout 0:ea44dc9ed014 1494 CV_EXPORTS void createConcentricSpheresTestSet( int nsamples, int nfeatures, int nclasses,
joeverbout 0:ea44dc9ed014 1495 OutputArray samples, OutputArray responses);
joeverbout 0:ea44dc9ed014 1496
joeverbout 0:ea44dc9ed014 1497 //! @} ml
joeverbout 0:ea44dc9ed014 1498
joeverbout 0:ea44dc9ed014 1499 }
joeverbout 0:ea44dc9ed014 1500 }
joeverbout 0:ea44dc9ed014 1501
joeverbout 0:ea44dc9ed014 1502 #endif // __cplusplus
joeverbout 0:ea44dc9ed014 1503 #endif // __OPENCV_ML_HPP__
joeverbout 0:ea44dc9ed014 1504
joeverbout 0:ea44dc9ed014 1505 /* End of file. */
joeverbout 0:ea44dc9ed014 1506