bool KMeans::train_(UnlabelledData &trainingData){ //Convert the training data into one matrix UINT M = trainingData.getNumSamples(); UINT N = trainingData.getNumDimensions(); MatrixFloat data(M,N); for(UINT i=0; i<M; i++){ for(UINT j=0; j<N; j++){ data[i][j] = trainingData[i][j]; } } return train_(data); }
bool HierarchicalClustering::train_(ClassificationData &trainingData){ if( trainingData.getNumSamples() == 0 ){ return false; } //Convert the labelled training data to a training matrix M = trainingData.getNumSamples(); N = trainingData.getNumDimensions(); MatrixFloat data(M,N); for(UINT i=0; i<M; i++){ for(UINT j=0; j<N; j++){ data[i][j] = trainingData[i][j]; } } return train_( data ); }
bool KMeans::train_(ClassificationData &trainingData){ if( trainingData.getNumSamples() == 0 ){ errorLog << "train_(ClassificationData &trainingData) - The training data is empty!" << std::endl; return false; } //Set the numClusters as the number of classes in the training data numClusters = trainingData.getNumClasses(); //Convert the labelled training data to a training matrix UINT M = trainingData.getNumSamples(); UINT N = trainingData.getNumDimensions(); MatrixFloat data(M,N); for(UINT i=0; i<M; i++){ for(UINT j=0; j<N; j++){ data[i][j] = trainingData[i][j]; } } //Run the K-Means algorithm return train_( data ); }
bool GaussianMixtureModels::train_(ClassificationData &trainingData){ MatrixFloat data = trainingData.getDataAsMatrixFloat(); return train_( data ); }
bool MLBase::train(RegressionData trainingData){ return train_( trainingData ); }
bool MLBase::train(MatrixFloat data){ return train_( data ); }
bool SelfOrganizingMap::train_(UnlabelledData &trainingData){ MatrixFloat data = trainingData.getDataAsMatrixFloat(); return train_(data); }
bool KMeansFeatures::train_(UnlabelledData &trainingData){ MatrixDouble data = trainingData.getDataAsMatrixDouble(); return train_( data ); }
bool KNN::train(LabelledClassificationData &trainingData){ if( !searchForBestKValue ){ return train_(trainingData,K); } UINT index = 0; double bestAccuracy = 0; vector< IndexedDouble > trainingAccuracyLog; for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){ //Randomly spilt the data and use 80% to train the algorithm and 20% to test it LabelledClassificationData trainingSet(trainingData); LabelledClassificationData testSet = trainingSet.partition(80,true); if( !train_(trainingSet, k) ){ errorLog << "Failed to train model for a k value of " << k << endl; }else{ //Compute the classification error double accuracy = 0; for(UINT i=0; i<testSet.getNumSamples(); i++){ vector< double > sample = testSet[i].getSample(); if( !predict( sample ) ){ errorLog << "Failed to predict label for test sample with a k value of " << k << endl; return false; } if( testSet[i].getClassLabel() == predictedClassLabel ){ accuracy++; } } accuracy = accuracy /double( testSet.getNumSamples() ) * 100.0; trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) ); trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << endl; if( accuracy > bestAccuracy ){ bestAccuracy = accuracy; } index++; } } if( bestAccuracy > 0 ){ //Sort the training log by value std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending); //Copy the top matching values into a temporary buffer vector< IndexedDouble > tempLog; //Add the first value tempLog.push_back( trainingAccuracyLog[0] ); //Keep adding values until the value changes for(UINT i=1; i<trainingAccuracyLog.size(); i++){ if( trainingAccuracyLog[i].value == tempLog[0].value ){ tempLog.push_back( trainingAccuracyLog[i] ); }else break; } //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy) std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending); trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << endl; //Use the minimum index, this should give us the best accuracy with the minimum K value return train_(trainingData,tempLog[0].index); } return false; }
bool KNN::train_(ClassificationData &trainingData){ //Clear any previous models clear(); if( trainingData.getNumSamples() == 0 ){ errorLog << "train_(ClassificationData &trainingData) - Training data has zero samples!" << endl; return false; } //Get the ranges of the data ranges = trainingData.getRanges(); if( useScaling ){ //Scale the training data between 0 and 1 trainingData.scale(0, 1); } //Store the number of features, classes and the training data this->numInputDimensions = trainingData.getNumDimensions(); this->numClasses = trainingData.getNumClasses(); //TODO: In the future need to build a kdtree from the training data to allow better realtime prediction this->trainingData = trainingData; //Set the class labels classLabels.resize(numClasses); for(UINT k=0; k<numClasses; k++){ classLabels[k] = trainingData.getClassTracker()[k].classLabel; } //If we do not need to search for the best K value, then call the sub training function and return the result if( !searchForBestKValue ){ return train_(trainingData,K); } //If we have got this far then we are going to search for the best K value UINT index = 0; double bestAccuracy = 0; vector< IndexedDouble > trainingAccuracyLog; for(UINT k=minKSearchValue; k<=maxKSearchValue; k++){ //Randomly spilt the data and use 80% to train the algorithm and 20% to test it ClassificationData trainingSet(trainingData); ClassificationData testSet = trainingSet.partition(80,true); if( !train_(trainingSet, k) ){ errorLog << "Failed to train model for a k value of " << k << endl; }else{ //Compute the classification error double accuracy = 0; for(UINT i=0; i<testSet.getNumSamples(); i++){ VectorDouble sample = testSet[i].getSample(); if( !predict( sample , k) ){ errorLog << "Failed to predict label for test sample with a k value of " << k << endl; return false; } if( testSet[i].getClassLabel() == predictedClassLabel ){ accuracy++; } } accuracy = accuracy /double( testSet.getNumSamples() ) * 100.0; trainingAccuracyLog.push_back( IndexedDouble(k,accuracy) ); trainingLog << "K:\t" << k << "\tAccuracy:\t" << accuracy << endl; if( accuracy > bestAccuracy ){ bestAccuracy = accuracy; } index++; } } if( bestAccuracy > 0 ){ //Sort the training log by value std::sort(trainingAccuracyLog.begin(),trainingAccuracyLog.end(),IndexedDouble::sortIndexedDoubleByValueDescending); //Copy the top matching values into a temporary buffer vector< IndexedDouble > tempLog; //Add the first value tempLog.push_back( trainingAccuracyLog[0] ); //Keep adding values until the value changes for(UINT i=1; i<trainingAccuracyLog.size(); i++){ if( trainingAccuracyLog[i].value == tempLog[0].value ){ tempLog.push_back( trainingAccuracyLog[i] ); }else break; } //Sort the temp values by index (the index is the K value so we want to get the minimum K value with the maximum accuracy) std::sort(tempLog.begin(),tempLog.end(),IndexedDouble::sortIndexedDoubleByIndexAscending); trainingLog << "Best K Value: " << tempLog[0].index << "\tAccuracy:\t" << tempLog[0].value << endl; //Use the minimum index, this should give us the best accuracy with the minimum K value //We now need to train the model again to make sure all the training metrics are computed correctly return train_(trainingData,tempLog[0].index); } return false; }
bool KMeansFeatures::train_(LabelledContinuousTimeSeriesClassificationData &trainingData){ MatrixDouble data = trainingData.getDataAsMatrixDouble(); return train_( data ); }
bool RBMQuantizer::train_(TimeSeriesClassificationDataStream &trainingData){ MatrixDouble data = trainingData.getDataAsMatrixDouble(); return train_( data ); }
bool RBMQuantizer::train_(ClassificationData &trainingData){ MatrixDouble data = trainingData.getDataAsMatrixDouble(); return train_( data ); }
bool train(const Ptr<TrainData>& data, int) { Mat samples = data->getTrainSamples(), labels; return train_(samples, labels, noArray(), noArray()); }
bool KMeansFeatures::train_(ClassificationData &trainingData){ MatrixDouble data = trainingData.getDataAsMatrixDouble(); return train_( data ); }
bool KMeansFeatures::train_(TimeSeriesClassificationDataStream &trainingData){ MatrixDouble data = trainingData.getDataAsMatrixDouble(); return train_( data ); }
bool MLBase::train(RegressionData trainingData,RegressionData validationData){ return train_( trainingData, validationData ); }
bool SelfOrganizingMap::train_(ClassificationData &trainingData){ MatrixFloat data = trainingData.getDataAsMatrixFloat(); return train_(data); }
/*bool batchTrain(Vector<UINT> &obs) - This method */ bool HiddenMarkovModel::train(const vector< vector<UINT> > &trainingData){ //Clear any previous models modelTrained = false; observationSequence.clear(); estimatedStates.clear(); trainingIterationLog.clear(); UINT n,currentIter, bestIndex = 0; double newLoglikelihood, bestLogValue = 0; if( numRandomTrainingIterations > 1 ){ //A buffer to keep track each AB matrix vector< MatrixDouble > aTracker( numRandomTrainingIterations ); vector< MatrixDouble > bTracker( numRandomTrainingIterations ); vector< double > loglikelihoodTracker( numRandomTrainingIterations ); UINT maxNumTestIter = maxNumIter > 10 ? 10 : maxNumIter; //Try and find the best starting point for(n=0; n<numRandomTrainingIterations; n++){ //Reset the model to a new random starting values randomizeMatrices(numStates,numSymbols); if( !train_(trainingData,maxNumTestIter,currentIter,newLoglikelihood) ){ return false; } aTracker[n] = a; bTracker[n] = b; loglikelihoodTracker[n] = newLoglikelihood; } //Get the best result and set it as the a and b starting values bestIndex = 0; bestLogValue = loglikelihoodTracker[0]; for(n=1; n<numRandomTrainingIterations; n++){ if(bestLogValue < loglikelihoodTracker[n]){ bestLogValue = loglikelihoodTracker[n]; bestIndex = n; } } //Set a and b a = aTracker[bestIndex]; b = bTracker[bestIndex]; }else{ randomizeMatrices(numStates,numSymbols); } //Perform the actual training if( !train_(trainingData,maxNumIter,currentIter,newLoglikelihood) ){ return false; } //Calculate the observationSequence buffer length const UINT numObs = (unsigned int)trainingData.size(); UINT k = 0; UINT averageObsLength = 0; for(k=0; k<numObs; k++){ const UINT T = (unsigned int)trainingData[k].size(); averageObsLength += T; } averageObsLength = (UINT)floor( averageObsLength/double(numObs) ); observationSequence.resize( averageObsLength ); estimatedStates.resize( averageObsLength ); //Finally, flag that the model was trained modelTrained = true; return true; }
bool MLBase::train(UnlabelledData trainingData){ return train_( trainingData ); }
bool RBMQuantizer::train_(TimeSeriesClassificationData &trainingData){ MatrixFloat data = trainingData.getDataAsMatrixFloat(); return train_( data ); }
bool MLBase::train(ClassificationData trainingData){ return train_( trainingData ); }
bool RBMQuantizer::train_(ClassificationDataStream &trainingData){ MatrixFloat data = trainingData.getDataAsMatrixFloat(); return train_( data ); }
bool MLBase::train(TimeSeriesClassificationData trainingData){ return train_( trainingData ); }
bool RBMQuantizer::train_(UnlabelledData &trainingData){ MatrixFloat data = trainingData.getDataAsMatrixFloat(); return train_( data ); }
bool GaussianMixtureModels::train_(UnlabelledData &trainingData){ MatrixFloat data = trainingData.getDataAsMatrixFloat(); return train_( data ); }
bool MLBase::train(MatrixDouble data){ return train_( data ); }