LabelledTimeSeriesClassificationData LabelledTimeSeriesClassificationData::getTrainingFoldData(const UINT foldIndex) const { LabelledTimeSeriesClassificationData trainingData; if( !crossValidationSetup ){ errorLog << "getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl; return trainingData; } if( foldIndex >= kFoldValue ) return trainingData; trainingData.setNumDimensions( numDimensions ); //Add the data to the training set, this will consist of all the data that is NOT in the foldIndex UINT index = 0; for(UINT k=0; k<kFoldValue; k++){ if( k != foldIndex ){ for(UINT i=0; i<crossValidationIndexs[k].size(); i++){ index = crossValidationIndexs[k][i]; trainingData.addSample( data[ index ].getClassLabel(), data[ index ].getData() ); } } } return trainingData; }
LabelledTimeSeriesClassificationData KfoldTimeSeriesData::getTrainingFoldData(const UINT foldIndex, const UINT numSamplesPerClass) const { UINT index = 0; unsigned int randomNumber; unsigned int indexClassLabel; unsigned int numSamplesRemaining; LabelledTimeSeriesClassificationData trainingData; if( !crossValidationSetup ) { cout << "getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl; return trainingData; } if( foldIndex >= kFoldValue ) { cout << "Fold index too big" << endl; return trainingData; } Random random; trainingData.setNumDimensions( numDimensions ); /* Put all K-1 training folds in one data set */ vector <vector< UINT > > MergedIndexs(inputDataset.getNumClasses()); for(UINT k = 0; k < kFoldValue; k++) { if( k == foldIndex ) { continue; } for (UINT classLabel = 0 ; classLabel < crossValidationIndexs[k].size(); classLabel++) { for (UINT i = 0; i < crossValidationIndexs[k][classLabel].size(); i++) { MergedIndexs[classLabel].push_back(crossValidationIndexs[k][classLabel][i]); } } } /* For each class peak randomly "numSamplesPerClass" samples */ for (unsigned int classLabel = 0; classLabel < inputDataset.getNumClasses() ; classLabel++) { for (unsigned int numSamples = 1; numSamples <= numSamplesPerClass; numSamples++) { numSamplesRemaining = MergedIndexs[classLabel].size(); if (numSamplesRemaining == 0) { printf("The \"numSamplesPerClass\" variable is bigger that the samples for this class"); break; } randomNumber = random.getRandomNumberInt(0, numSamplesRemaining); index = MergedIndexs[classLabel][randomNumber]; /* Remove added sample so that it is not added again */ MergedIndexs[classLabel].erase(MergedIndexs[classLabel].begin() + randomNumber); trainingData.addSample( inputDataset[ index ].getClassLabel(), inputDataset[ index ].getData() ); } } return trainingData; }
void DTW::znormData(LabelledTimeSeriesClassificationData &trainingData){ for(UINT i=0; i<trainingData.getNumSamples(); i++){ znormData( trainingData[i].getData(), trainingData[i].getData() ); } }
bool GRT_Recognizer::initPipeline(string trainingdatafile, int dimension) { //Initialize the training and info variables // infoText = ""; // trainingClassLabel = 1; // noOfHands = 2; //noOfTrackedHands = 0; //The input to the training data will be the R[x y z]L[x y z] from the left end right hand // so we set the number of dimensions to 6 LabelledTimeSeriesClassificationData trainingData; //trainingData.setNumDimensions(6); trainingData.loadDatasetFromFile(trainingdatafile); //Initialize the DTW classifier DTW dtw; //Turn on null rejection, this lets the classifier output the predicted class label of 0 when the likelihood of a gesture is low dtw.enableNullRejection( true); //Set the null rejection coefficient to 3, this controls the thresholds for the automatic null rejection //You can increase this value if you find that your real-time gestures are not being recognized //If you are getting too many false positives then you should decrease this value dtw.setNullRejectionCoeff(2); //Turn on the automatic data triming, this will remove any sections of none movement from the start and end of the training samples dtw.enableTrimTrainingData(true, 0.1, 90); //Offset the timeseries data by the first sample, this makes your gestures (more) invariant to the location the gesture is performed dtw.setOffsetTimeseriesUsingFirstSample(true); //Add the classifier to the pipeline (after we do this, we don't need the DTW classifier anymore) pipeline.setClassifier( dtw ); //pipeline.addPreProcessingModule(MovingAverageFilter(5,dimension)); //pipeline.addFeatureExtractionModule(FFT(16,1, dimension)); /*ClassLabelFilter myFilter = ClassLabelFilter(); myFilter.setBufferSize(6); myFilter.setBufferSize(2);*/ pipeline.addPostProcessingModule(ClassLabelChangeFilter()); pipeline.train(trainingData); return true; }
void DTW::scaleData(LabelledTimeSeriesClassificationData &trainingData){ //Scale the data using the min and max values for(UINT i=0; i<trainingData.getNumSamples(); i++){ scaleData( trainingData[i].getData(), trainingData[i].getData() ); } }
bool HMM::convertDataToObservationSequence( LabelledTimeSeriesClassificationData &classData, vector< vector< UINT > > &observationSequences ){ observationSequences.resize( classData.getNumSamples() ); for(UINT i=0; i<classData.getNumSamples(); i++){ MatrixDouble ×eries = classData[i].getData(); observationSequences[i].resize( timeseries.getNumRows() ); for(UINT j=0; j<timeseries.getNumRows(); j++){ if( timeseries[j][0] >= numSymbols ){ errorLog << "train(LabelledTimeSeriesClassificationData &trainingData) - Found an observation sequence with a value outside of the symbol range! Value: " << timeseries[j][0] << endl; return false; } observationSequences[i][j] = (UINT)timeseries[j][0]; } } return true; }
LabelledTimeSeriesClassificationData LabelledTimeSeriesClassificationData::getTestFoldData(const UINT foldIndex) const { LabelledTimeSeriesClassificationData testData; if( !crossValidationSetup ) return testData; if( foldIndex >= kFoldValue ) return testData; //Add the data to the training testData.setNumDimensions( numDimensions ); UINT index = 0; for(UINT i=0; i<crossValidationIndexs[ foldIndex ].size(); i++){ index = crossValidationIndexs[ foldIndex ][i]; testData.addSample( data[ index ].getClassLabel(), data[ index ].getData() ); } return testData; }
UINT KfoldTimeSeriesData::getFoldSize() { if (crossValidationSetup) { UINT maxSize = crossValidationIndexs[0].size(); for (UINT k = 0; k < kFoldValue; k++) { if (crossValidationIndexs[k].size() > maxSize) { maxSize = crossValidationIndexs[k].size(); } } return inputDataset.getNumSamples() - maxSize; } return 0; }
bool LabelledTimeSeriesClassificationData::merge(const LabelledTimeSeriesClassificationData &labelledData){ if( labelledData.getNumDimensions() != numDimensions ){ errorLog << "merge(LabelledTimeSeriesClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << endl; return false; } //The dataset has changed so flag that any previous cross validation setup will now not work crossValidationSetup = false; crossValidationIndexs.clear(); //Add the data from the labelledData to this instance for(UINT i=0; i<labelledData.getNumSamples(); i++){ addSample(labelledData[i].getClassLabel(), labelledData[i].getData()); } //Set the class names from the dataset vector< ClassTracker > classTracker = labelledData.getClassTracker(); for(UINT i=0; i<classTracker.size(); i++){ setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel); } return true; }
bool KfoldTimeSeriesData::spiltDataIntoKFolds(const GRT::UINT K) { kFoldValue = K; //K can not be zero if( K == 0 ){ std::cout << "spiltDataIntoKFolds(UINT K) - K can not be zero!" << std::endl; return false; } //K can not be larger than the number of examples if( K > inputDataset.getNumSamples()){ std::cout << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be 0!" << std::endl; return false; } //K can not be larger than the number of examples in a specific class if the stratified sampling option is true for(UINT c=0; c < inputDataset.getNumClasses(); c++) { if( K > classTracker[c].counter ){ cout << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << std::endl; return false; } } //Setup the dataset for k-fold cross validation kFoldValue = K; vector< UINT > indexs( inputDataset.getNumSamples() ); //Work out how many samples are in each fold, the last fold might have more samples than the others UINT numSamplesPerFold = (UINT) floor( inputDataset.getNumSamples() / double(K) ); //Create the random partion indexs Random random; UINT randomIndex = 0; //Break the data into seperate classes vector< vector< UINT > > classData( inputDataset.getNumClasses() ); //Add the indexs to their respective classes for(UINT i = 0; i < inputDataset.getNumSamples(); i++) { classData[ inputDataset.getClassLabelIndexValue( inputDataset[i].getClassLabel() ) ].push_back( i ); } //Randomize the order of the indexs in each of the class index buffers for(UINT c = 0; c < inputDataset.getNumClasses(); c++) { UINT numSamples = (UINT)classData[c].size(); for(UINT x = 0; x < numSamples; x++) { //Pick a random index randomIndex = random.getRandomNumberInt(0, numSamples); //Swap the indexs SWAP( classData[c][ x ] , classData[c][ randomIndex ] ); } } //Resize the cross validation indexs buffer crossValidationIndexs.resize( K ); for (UINT k = 0; k < K; k++) { crossValidationIndexs[k].resize(inputDataset.getNumClasses()); } //Loop over each of the classes and add the data equally to each of the k folds until there is no data left vector< UINT >::iterator iter; for(UINT c = 0; c < inputDataset.getNumClasses(); c++){ iter = classData[ c ].begin(); UINT k = 0; while( iter != classData[c].end() ){ crossValidationIndexs[ k ][c].push_back( *iter ); iter++; k = ++k % K; } } crossValidationSetup = true; return true; }
bool KMeansQuantizer::train(LabelledTimeSeriesClassificationData &trainingData){ MatrixDouble data = trainingData.getDataAsMatrixDouble(); return train( data ); }
bool DTW::train_NDDTW(LabelledTimeSeriesClassificationData &trainingData,DTWTemplate &dtwTemplate,UINT &bestIndex){ UINT numExamples = trainingData.getNumSamples(); VectorDouble results(numExamples,0.0); MatrixDouble distanceResults(numExamples,numExamples); dtwTemplate.averageTemplateLength = 0; for(UINT m=0; m<numExamples; m++){ MatrixDouble templateA; //The m'th template MatrixDouble templateB; //The n'th template dtwTemplate.averageTemplateLength += trainingData[m].getLength(); //Smooth the data if required if( useSmoothing ) smoothData(trainingData[m].getData(),smoothingFactor,templateA); else templateA = trainingData[m].getData(); if( offsetUsingFirstSample ){ offsetTimeseries(templateA); } for(UINT n=0; n<numExamples; n++){ if(m!=n){ //Smooth the data if required if( useSmoothing ) smoothData(trainingData[n].getData(),smoothingFactor,templateB); else templateB = trainingData[n].getData(); if( offsetUsingFirstSample ){ offsetTimeseries(templateB); } //Compute the distance between the two time series MatrixDouble distanceMatrix(templateA.getNumRows(),templateB.getNumRows()); vector< IndexDist > warpPath; double dist = computeDistance(templateA,templateB,distanceMatrix,warpPath); trainingLog << "Template: " << m << " Timeseries: " << n << " Dist: " << dist << endl; //Update the results values distanceResults[m][n] = dist; results[m] += dist; }else distanceResults[m][n] = 0; //The distance is zero because the two timeseries are the same } } for(UINT m=0; m<numExamples; m++) results[m]/=(numExamples-1); //Find the best average result, this is the result with the minimum value bestIndex = 0; double bestAverage = results[0]; for(UINT m=1; m<numExamples; m++){ if( results[m] < bestAverage ){ bestAverage = results[m]; bestIndex = m; } } if( numExamples > 2 ){ //Work out the threshold value for the best template dtwTemplate.trainingMu = results[bestIndex]; dtwTemplate.trainingSigma = 0.0; for(UINT n=0; n<numExamples; n++){ if(n!=bestIndex){ dtwTemplate.trainingSigma += SQR( distanceResults[ bestIndex ][n] - dtwTemplate.trainingMu ); } } dtwTemplate.trainingSigma = sqrt( dtwTemplate.trainingSigma / double(numExamples-2) ); }else{ warningLog << "_train_NDDTW(LabelledTimeSeriesClassificationData &trainingData,DTWTemplate &dtwTemplate,UINT &bestIndex - There are not enough examples to compute the trainingMu and trainingSigma for the template for class " << dtwTemplate.classLabel << endl; dtwTemplate.trainingMu = 0.0; dtwTemplate.trainingSigma = 0.0; } //Set the average length of the training examples dtwTemplate.averageTemplateLength = (UINT) (dtwTemplate.averageTemplateLength/double(numExamples)); trainingLog << "AverageTemplateLength: " << dtwTemplate.averageTemplateLength << endl; //Flag that the training was successfull return true; }
////////////////////////// TRAINING FUNCTIONS ////////////////////////// bool DTW::train(LabelledTimeSeriesClassificationData labelledTrainingData){ UINT bestIndex = 0; //Cleanup Memory templatesBuffer.clear(); classLabels.clear(); trained = false; continuousInputDataBuffer.clear(); if( trimTrainingData ){ LabelledTimeSeriesClassificationSampleTrimmer timeSeriesTrimmer(trimThreshold,maximumTrimPercentage); LabelledTimeSeriesClassificationData tempData; tempData.setNumDimensions( labelledTrainingData.getNumDimensions() ); for(UINT i=0; i<labelledTrainingData.getNumSamples(); i++){ if( timeSeriesTrimmer.trimTimeSeries( labelledTrainingData[i] ) ){ tempData.addSample(labelledTrainingData[i].getClassLabel(), labelledTrainingData[i].getData()); }else{ trainingLog << "Removing training sample " << i << " from the dataset as it could not be trimmed!" << endl; } } //Overwrite the original training data with the trimmed dataset labelledTrainingData = tempData; } if( labelledTrainingData.getNumSamples() == 0 ){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't train model as there are no samples in training data!" << endl; return false; } //Assign numClasses = labelledTrainingData.getNumClasses(); numTemplates = labelledTrainingData.getNumClasses(); numFeatures = labelledTrainingData.getNumDimensions(); templatesBuffer.resize( numClasses ); classLabels.resize( numClasses ); nullRejectionThresholds.resize( numClasses ); averageTemplateLength = 0; //Need to copy the labelled training data incase we need to scale it or znorm it LabelledTimeSeriesClassificationData trainingData( labelledTrainingData ); //Perform any scaling or normalisation rangesBuffer = trainingData.getRanges(); if( useScaling ) scaleData( trainingData ); if( useZNormalisation ) znormData( trainingData ); //For each class, run a one-to-one DTW and find the template the best describes the data for(UINT k=0; k<numTemplates; k++){ //Get the class label for the cth class UINT classLabel = trainingData.getClassTracker()[k].classLabel; LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classLabel ); UINT numExamples = classData.getNumSamples(); bestIndex = 0; //Set the class label of this template templatesBuffer[k].classLabel = classLabel; //Set the kth class label classLabels[k] = classLabel; trainingLog << "Training Template: " << k << " Class: " << classLabel << endl; //Check to make sure we actually have some training examples if(numExamples<1){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can not train model: Num of Example is < 1! Class: " << classLabel << endl; return false; } if(numExamples==1){//If we have just one training example then we have to use it as the template bestIndex = 0; nullRejectionThresholds[k] = 0.0;//TODO-We need a better way of calculating this! warningLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't compute reject thresholds for class " << classLabel << " as there is only 1 training example" << endl; }else{ //Search for the best training example for this class if( !train_NDDTW(classData,templatesBuffer[k],bestIndex) ){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Failed to train template for class with label: " << classLabel << endl; return false; } } //Add the template with the best index to the buffer int trainingMethod = 0; if(useSmoothing) trainingMethod = 1; switch (trainingMethod) { case(0)://Standard Training templatesBuffer[k].timeSeries = classData[bestIndex].getData(); break; case(1)://Training using Smoothing //Smooth the data, reducing its size by a factor set by smoothFactor smoothData(classData[ bestIndex ].getData(),smoothingFactor,templatesBuffer[k].timeSeries); break; default: cout<<"Can not train model: Unknown training method \n"; return false; break; } if( offsetUsingFirstSample ){ offsetTimeseries( templatesBuffer[k].timeSeries ); } //Add the average length of the training examples for this template to the overall averageTemplateLength averageTemplateLength += templatesBuffer[k].averageTemplateLength; } //Flag that the models have been trained trained = true; averageTemplateLength = (UINT) averageTemplateLength/double(numTemplates); //Recompute the null rejection thresholds recomputeNullRejectionThresholds(); //Resize the prediction results to make sure it is setup for realtime prediction continuousInputDataBuffer.clear(); continuousInputDataBuffer.resize(averageTemplateLength,vector<double>(numFeatures,0)); classLikelihoods.resize(numTemplates,DEFAULT_NULL_LIKELIHOOD_VALUE); classDistances.resize(numTemplates,0); predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE; //Training complete return true; }
bool HMM::train(LabelledTimeSeriesClassificationData trainingData){ if( trainingData.getNumSamples() == 0 ){ errorLog << "train(LabelledTimeSeriesClassificationData trainingData) - There are no training samples to train the HMM classifer!" << endl; return false; } if( trainingData.getNumDimensions() != 1 ){ errorLog << "train(LabelledTimeSeriesClassificationData trainingData) - The number of dimensions in the training data must be 1. If your training data is not 1 dimensional then you must quantize the training data using one of the GRT quantization algorithms" << endl; return false; } //Reset the HMM trained = false; useScaling = false; numFeatures = trainingData.getNumDimensions(); numClasses = trainingData.getNumClasses(); models.clear(); classLabels.clear(); models.resize( numClasses ); classLabels.resize( numClasses ); //Init the models for(UINT k=0; k<numClasses; k++){ models[k].resetModel(numStates,numSymbols,modelType,delta); models[k].maxNumIter = maxNumIter; models[k].minImprovement = minImprovement; } //Train each of the models for(UINT k=0; k<numClasses; k++){ //Get the class ID of this gesture UINT classID = trainingData.getClassTracker()[k].classLabel; classLabels[k] = classID; //Convert this classes training data into a list of observation sequences LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classID ); vector< vector< UINT > > observationSequences; if( !convertDataToObservationSequence( classData, observationSequences ) ){ return false; } //Train the model if( !models[k].train( observationSequences ) ){ errorLog << "train(LabelledTimeSeriesClassificationData &trainingData) - Failed to train HMM for class " << classID << endl; return false; } } //Compute the rejection thresholds nullRejectionThresholds.resize(numClasses); for(UINT k=0; k<numClasses; k++){ //Get the class ID of this gesture UINT classID = trainingData.getClassTracker()[k].classLabel; classLabels[k] = classID; //Convert this classes training data into a list of observation sequences LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classID ); vector< vector< UINT > > observationSequences; if( !convertDataToObservationSequence( classData, observationSequences ) ){ return false; } //Test the model double loglikelihood = 0; double avgLoglikelihood = 0; for(UINT i=0; i<observationSequences.size(); i++){ loglikelihood = models[k].predict( observationSequences[i] ); avgLoglikelihood += fabs( loglikelihood ); cout << "Class: " << classID << " PredictedLogLikelihood: " << -loglikelihood << endl; } nullRejectionThresholds[k] = -( avgLoglikelihood / double( observationSequences.size() ) ); cout << "Class: " << classID << " NullRejectionThreshold: " << nullRejectionThresholds[k] << endl; } for(UINT k=0; k<numClasses; k++){ models[k].printAB(); } trained = true; return true; }