bool RandomForests::train(LabelledClassificationData trainingData){ //Clear any previous model clear(); const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumDimensions(); const unsigned int K = trainingData.getNumClasses(); if( M == 0 ){ errorLog << "train(LabelledClassificationData labelledTrainingData) - Training data has zero samples!" << endl; return false; } numInputDimensions = N; numClasses = K; classLabels = trainingData.getClassLabels(); ranges = trainingData.getRanges(); //Scale the training data if needed if( useScaling ){ //Scale the training data between 0 and 1 trainingData.scale(0, 1); } //Train the random forest forestSize = 10; Random random; DecisionTree tree; tree.enableScaling( false ); //We have already scaled the training data so we do not need to scale it again tree.setTrainingMode( DecisionTree::BEST_RANDOM_SPLIT ); tree.setNumSplittingSteps( numRandomSplits ); tree.setMinNumSamplesPerNode( minNumSamplesPerNode ); tree.setMaxDepth( maxDepth ); for(UINT i=0; i<forestSize; i++){ LabelledClassificationData data = trainingData.getBootstrappedDataset(); if( !tree.train( data ) ){ errorLog << "train(LabelledClassificationData labelledTrainingData) - Failed to train tree at forest index: " << i << endl; return false; } //Deep copy the tree into the forest forest.push_back( tree.deepCopyTree() ); } //Flag that the algorithm has been trained trained = true; return trained; }
bool Softmax::train(LabelledClassificationData trainingData){ //Clear any previous model clear(); const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumDimensions(); const unsigned int K = trainingData.getNumClasses(); if( M == 0 ){ errorLog << "train(LabelledClassificationData labelledTrainingData) - Training data has zero samples!" << endl; return false; } numFeatures = N; numClasses = K; models.resize(K); classLabels.resize(K); ranges = trainingData.getRanges(); //Scale the training data if needed if( useScaling ){ //Scale the training data between 0 and 1 trainingData.scale(0, 1); } //Train a regression model for each class in the training data for(UINT k=0; k<numClasses; k++){ //Set the class label classLabels[k] = trainingData.getClassTracker()[k].classLabel; //Train the model if( !trainSoftmaxModel(classLabels[k],models[k],trainingData) ){ errorLog << "train(LabelledClassificationData labelledTrainingData) - Failed to train model for class: " << classLabels[k] << endl; return false; } } //Flag that the algorithm has been trained trained = true; return trained; }
bool GMM::train(LabelledClassificationData trainingData){ //Clear any old models models.clear(); trained = false; numFeatures = 0; numClasses = 0; if( trainingData.getNumSamples() == 0 ){ errorLog << "train(LabelledClassificationData &trainingData) - Training data is empty!" << endl; return false; } //Set the number of features and number of classes and resize the models buffer numFeatures = trainingData.getNumDimensions(); numClasses = trainingData.getNumClasses(); models.resize(numClasses); if( numFeatures >= 6 ){ warningLog << "train(LabelledClassificationData &trainingData) - The number of features in your training data is high (" << numFeatures << "). The GMMClassifier does not work well with high dimensional data, you might get better results from one of the other classifiers." << endl; } //Get the ranges of the training data if the training data is going to be scaled if( useScaling ){ ranges = trainingData.getRanges(); } //Fit a Mixture Model to each class (independently) for(UINT k=0; k<numClasses; k++){ UINT classLabel = trainingData.getClassTracker()[k].classLabel; LabelledClassificationData classData = trainingData.getClassData( classLabel ); //Scale the training data if needed if( useScaling ){ if( !classData.scale(ranges,GMM_MIN_SCALE_VALUE, GMM_MAX_SCALE_VALUE) ){ errorLog << "train(LabelledClassificationData &trainingData) - Failed to scale training data!" << endl; return false; } } //Convert the labelled data to unlabelled data UnlabelledClassificationData unlabelledData = classData.reformatAsUnlabelledClassificationData(); //Train the Mixture Model for this class GaussianMixtureModels gaussianMixtureModel; gaussianMixtureModel.setMinChange( minChange ); gaussianMixtureModel.setMaxIter( maxIter ); if( !gaussianMixtureModel.train(unlabelledData, numMixtureModels) ){ errorLog << "train(LabelledClassificationData &trainingData) - Failed to train Mixture Model for class " << classLabel << endl; return false; } //Setup the model container models[k].resize( numMixtureModels ); models[k].setClassLabel( classLabel ); //Store the mixture model in the container for(UINT j=0; j<numMixtureModels; j++){ models[k][j].mu = gaussianMixtureModel.getMu().getRowVector(j); models[k][j].sigma = gaussianMixtureModel.getSigma()[j]; //Compute the determinant and invSigma for the realtime prediction LUDecomposition ludcmp(models[k][j].sigma); if( !ludcmp.inverse( models[k][j].invSigma ) ){ models.clear(); errorLog << "train(LabelledClassificationData &trainingData) - Failed to invert Matrix for class " << classLabel << "!" << endl; return false; } models[k][j].det = ludcmp.det(); } //Compute the normalize factor models[k].recomputeNormalizationFactor(); //Compute the rejection thresholds double mu = 0; double sigma = 0; VectorDouble predictionResults(classData.getNumSamples(),0); for(UINT i=0; i<classData.getNumSamples(); i++){ vector< double > sample = classData[i].getSample(); predictionResults[i] = models[k].computeMixtureLikelihood( sample ); mu += predictionResults[i]; } //Update mu mu /= double( classData.getNumSamples() ); //Calculate the standard deviation for(UINT i=0; i<classData.getNumSamples(); i++) sigma += SQR( (predictionResults[i]-mu) ); sigma = sqrt( sigma / (double(classData.getNumSamples())-1.0) ); sigma = 0.2; //Set the models training mu and sigma models[k].setTrainingMuAndSigma(mu,sigma); if( !models[k].recomputeNullRejectionThreshold(nullRejectionCoeff) && useNullRejection ){ warningLog << "train(LabelledClassificationData &trainingData) - Failed to recompute rejection threshold for class " << classLabel << " - the nullRjectionCoeff value is too high!" << endl; } //cout << "Training Mu: " << mu << " TrainingSigma: " << sigma << " RejectionThreshold: " << models[k].getNullRejectionThreshold() << endl; //models[k].printModelValues(); } //Reset the class labels classLabels.resize(numClasses); for(UINT k=0; k<numClasses; k++){ classLabels[k] = models[k].getClassLabel(); } //Resize the rejection thresholds nullRejectionThresholds.resize(numClasses); for(UINT k=0; k<numClasses; k++){ nullRejectionThresholds[k] = models[k].getNullRejectionThreshold(); } //Flag that the models have been trained trained = true; return true; }