LabelledTimeSeriesClassificationData KfoldTimeSeriesData::getTrainingFoldData(const UINT foldIndex, const UINT numSamplesPerClass) const { UINT index = 0; unsigned int randomNumber; unsigned int indexClassLabel; unsigned int numSamplesRemaining; LabelledTimeSeriesClassificationData trainingData; if( !crossValidationSetup ) { cout << "getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl; return trainingData; } if( foldIndex >= kFoldValue ) { cout << "Fold index too big" << endl; return trainingData; } Random random; trainingData.setNumDimensions( numDimensions ); /* Put all K-1 training folds in one data set */ vector <vector< UINT > > MergedIndexs(inputDataset.getNumClasses()); for(UINT k = 0; k < kFoldValue; k++) { if( k == foldIndex ) { continue; } for (UINT classLabel = 0 ; classLabel < crossValidationIndexs[k].size(); classLabel++) { for (UINT i = 0; i < crossValidationIndexs[k][classLabel].size(); i++) { MergedIndexs[classLabel].push_back(crossValidationIndexs[k][classLabel][i]); } } } /* For each class peak randomly "numSamplesPerClass" samples */ for (unsigned int classLabel = 0; classLabel < inputDataset.getNumClasses() ; classLabel++) { for (unsigned int numSamples = 1; numSamples <= numSamplesPerClass; numSamples++) { numSamplesRemaining = MergedIndexs[classLabel].size(); if (numSamplesRemaining == 0) { printf("The \"numSamplesPerClass\" variable is bigger that the samples for this class"); break; } randomNumber = random.getRandomNumberInt(0, numSamplesRemaining); index = MergedIndexs[classLabel][randomNumber]; /* Remove added sample so that it is not added again */ MergedIndexs[classLabel].erase(MergedIndexs[classLabel].begin() + randomNumber); trainingData.addSample( inputDataset[ index ].getClassLabel(), inputDataset[ index ].getData() ); } } return trainingData; }
bool KfoldTimeSeriesData::spiltDataIntoKFolds(const GRT::UINT K) { kFoldValue = K; //K can not be zero if( K == 0 ){ std::cout << "spiltDataIntoKFolds(UINT K) - K can not be zero!" << std::endl; return false; } //K can not be larger than the number of examples if( K > inputDataset.getNumSamples()){ std::cout << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be 0!" << std::endl; return false; } //K can not be larger than the number of examples in a specific class if the stratified sampling option is true for(UINT c=0; c < inputDataset.getNumClasses(); c++) { if( K > classTracker[c].counter ){ cout << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << std::endl; return false; } } //Setup the dataset for k-fold cross validation kFoldValue = K; vector< UINT > indexs( inputDataset.getNumSamples() ); //Work out how many samples are in each fold, the last fold might have more samples than the others UINT numSamplesPerFold = (UINT) floor( inputDataset.getNumSamples() / double(K) ); //Create the random partion indexs Random random; UINT randomIndex = 0; //Break the data into seperate classes vector< vector< UINT > > classData( inputDataset.getNumClasses() ); //Add the indexs to their respective classes for(UINT i = 0; i < inputDataset.getNumSamples(); i++) { classData[ inputDataset.getClassLabelIndexValue( inputDataset[i].getClassLabel() ) ].push_back( i ); } //Randomize the order of the indexs in each of the class index buffers for(UINT c = 0; c < inputDataset.getNumClasses(); c++) { UINT numSamples = (UINT)classData[c].size(); for(UINT x = 0; x < numSamples; x++) { //Pick a random index randomIndex = random.getRandomNumberInt(0, numSamples); //Swap the indexs SWAP( classData[c][ x ] , classData[c][ randomIndex ] ); } } //Resize the cross validation indexs buffer crossValidationIndexs.resize( K ); for (UINT k = 0; k < K; k++) { crossValidationIndexs[k].resize(inputDataset.getNumClasses()); } //Loop over each of the classes and add the data equally to each of the k folds until there is no data left vector< UINT >::iterator iter; for(UINT c = 0; c < inputDataset.getNumClasses(); c++){ iter = classData[ c ].begin(); UINT k = 0; while( iter != classData[c].end() ){ crossValidationIndexs[ k ][c].push_back( *iter ); iter++; k = ++k % K; } } crossValidationSetup = true; return true; }
////////////////////////// TRAINING FUNCTIONS ////////////////////////// bool DTW::train(LabelledTimeSeriesClassificationData labelledTrainingData){ UINT bestIndex = 0; //Cleanup Memory templatesBuffer.clear(); classLabels.clear(); trained = false; continuousInputDataBuffer.clear(); if( trimTrainingData ){ LabelledTimeSeriesClassificationSampleTrimmer timeSeriesTrimmer(trimThreshold,maximumTrimPercentage); LabelledTimeSeriesClassificationData tempData; tempData.setNumDimensions( labelledTrainingData.getNumDimensions() ); for(UINT i=0; i<labelledTrainingData.getNumSamples(); i++){ if( timeSeriesTrimmer.trimTimeSeries( labelledTrainingData[i] ) ){ tempData.addSample(labelledTrainingData[i].getClassLabel(), labelledTrainingData[i].getData()); }else{ trainingLog << "Removing training sample " << i << " from the dataset as it could not be trimmed!" << endl; } } //Overwrite the original training data with the trimmed dataset labelledTrainingData = tempData; } if( labelledTrainingData.getNumSamples() == 0 ){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't train model as there are no samples in training data!" << endl; return false; } //Assign numClasses = labelledTrainingData.getNumClasses(); numTemplates = labelledTrainingData.getNumClasses(); numFeatures = labelledTrainingData.getNumDimensions(); templatesBuffer.resize( numClasses ); classLabels.resize( numClasses ); nullRejectionThresholds.resize( numClasses ); averageTemplateLength = 0; //Need to copy the labelled training data incase we need to scale it or znorm it LabelledTimeSeriesClassificationData trainingData( labelledTrainingData ); //Perform any scaling or normalisation rangesBuffer = trainingData.getRanges(); if( useScaling ) scaleData( trainingData ); if( useZNormalisation ) znormData( trainingData ); //For each class, run a one-to-one DTW and find the template the best describes the data for(UINT k=0; k<numTemplates; k++){ //Get the class label for the cth class UINT classLabel = trainingData.getClassTracker()[k].classLabel; LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classLabel ); UINT numExamples = classData.getNumSamples(); bestIndex = 0; //Set the class label of this template templatesBuffer[k].classLabel = classLabel; //Set the kth class label classLabels[k] = classLabel; trainingLog << "Training Template: " << k << " Class: " << classLabel << endl; //Check to make sure we actually have some training examples if(numExamples<1){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can not train model: Num of Example is < 1! Class: " << classLabel << endl; return false; } if(numExamples==1){//If we have just one training example then we have to use it as the template bestIndex = 0; nullRejectionThresholds[k] = 0.0;//TODO-We need a better way of calculating this! warningLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't compute reject thresholds for class " << classLabel << " as there is only 1 training example" << endl; }else{ //Search for the best training example for this class if( !train_NDDTW(classData,templatesBuffer[k],bestIndex) ){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Failed to train template for class with label: " << classLabel << endl; return false; } } //Add the template with the best index to the buffer int trainingMethod = 0; if(useSmoothing) trainingMethod = 1; switch (trainingMethod) { case(0)://Standard Training templatesBuffer[k].timeSeries = classData[bestIndex].getData(); break; case(1)://Training using Smoothing //Smooth the data, reducing its size by a factor set by smoothFactor smoothData(classData[ bestIndex ].getData(),smoothingFactor,templatesBuffer[k].timeSeries); break; default: cout<<"Can not train model: Unknown training method \n"; return false; break; } if( offsetUsingFirstSample ){ offsetTimeseries( templatesBuffer[k].timeSeries ); } //Add the average length of the training examples for this template to the overall averageTemplateLength averageTemplateLength += templatesBuffer[k].averageTemplateLength; } //Flag that the models have been trained trained = true; averageTemplateLength = (UINT) averageTemplateLength/double(numTemplates); //Recompute the null rejection thresholds recomputeNullRejectionThresholds(); //Resize the prediction results to make sure it is setup for realtime prediction continuousInputDataBuffer.clear(); continuousInputDataBuffer.resize(averageTemplateLength,vector<double>(numFeatures,0)); classLikelihoods.resize(numTemplates,DEFAULT_NULL_LIKELIHOOD_VALUE); classDistances.resize(numTemplates,0); predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE; //Training complete return true; }
bool HMM::train(LabelledTimeSeriesClassificationData trainingData){ if( trainingData.getNumSamples() == 0 ){ errorLog << "train(LabelledTimeSeriesClassificationData trainingData) - There are no training samples to train the HMM classifer!" << endl; return false; } if( trainingData.getNumDimensions() != 1 ){ errorLog << "train(LabelledTimeSeriesClassificationData trainingData) - The number of dimensions in the training data must be 1. If your training data is not 1 dimensional then you must quantize the training data using one of the GRT quantization algorithms" << endl; return false; } //Reset the HMM trained = false; useScaling = false; numFeatures = trainingData.getNumDimensions(); numClasses = trainingData.getNumClasses(); models.clear(); classLabels.clear(); models.resize( numClasses ); classLabels.resize( numClasses ); //Init the models for(UINT k=0; k<numClasses; k++){ models[k].resetModel(numStates,numSymbols,modelType,delta); models[k].maxNumIter = maxNumIter; models[k].minImprovement = minImprovement; } //Train each of the models for(UINT k=0; k<numClasses; k++){ //Get the class ID of this gesture UINT classID = trainingData.getClassTracker()[k].classLabel; classLabels[k] = classID; //Convert this classes training data into a list of observation sequences LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classID ); vector< vector< UINT > > observationSequences; if( !convertDataToObservationSequence( classData, observationSequences ) ){ return false; } //Train the model if( !models[k].train( observationSequences ) ){ errorLog << "train(LabelledTimeSeriesClassificationData &trainingData) - Failed to train HMM for class " << classID << endl; return false; } } //Compute the rejection thresholds nullRejectionThresholds.resize(numClasses); for(UINT k=0; k<numClasses; k++){ //Get the class ID of this gesture UINT classID = trainingData.getClassTracker()[k].classLabel; classLabels[k] = classID; //Convert this classes training data into a list of observation sequences LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classID ); vector< vector< UINT > > observationSequences; if( !convertDataToObservationSequence( classData, observationSequences ) ){ return false; } //Test the model double loglikelihood = 0; double avgLoglikelihood = 0; for(UINT i=0; i<observationSequences.size(); i++){ loglikelihood = models[k].predict( observationSequences[i] ); avgLoglikelihood += fabs( loglikelihood ); cout << "Class: " << classID << " PredictedLogLikelihood: " << -loglikelihood << endl; } nullRejectionThresholds[k] = -( avgLoglikelihood / double( observationSequences.size() ) ); cout << "Class: " << classID << " NullRejectionThreshold: " << nullRejectionThresholds[k] << endl; } for(UINT k=0; k<numClasses; k++){ models[k].printAB(); } trained = true; return true; }