bool LabelledTimeSeriesClassificationData::merge(const LabelledTimeSeriesClassificationData &labelledData){ if( labelledData.getNumDimensions() != numDimensions ){ errorLog << "merge(LabelledTimeSeriesClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << endl; return false; } //The dataset has changed so flag that any previous cross validation setup will now not work crossValidationSetup = false; crossValidationIndexs.clear(); //Add the data from the labelledData to this instance for(UINT i=0; i<labelledData.getNumSamples(); i++){ addSample(labelledData[i].getClassLabel(), labelledData[i].getData()); } //Set the class names from the dataset vector< ClassTracker > classTracker = labelledData.getClassTracker(); for(UINT i=0; i<classTracker.size(); i++){ setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel); } return true; }
////////////////////////// TRAINING FUNCTIONS ////////////////////////// bool DTW::train(LabelledTimeSeriesClassificationData labelledTrainingData){ UINT bestIndex = 0; //Cleanup Memory templatesBuffer.clear(); classLabels.clear(); trained = false; continuousInputDataBuffer.clear(); if( trimTrainingData ){ LabelledTimeSeriesClassificationSampleTrimmer timeSeriesTrimmer(trimThreshold,maximumTrimPercentage); LabelledTimeSeriesClassificationData tempData; tempData.setNumDimensions( labelledTrainingData.getNumDimensions() ); for(UINT i=0; i<labelledTrainingData.getNumSamples(); i++){ if( timeSeriesTrimmer.trimTimeSeries( labelledTrainingData[i] ) ){ tempData.addSample(labelledTrainingData[i].getClassLabel(), labelledTrainingData[i].getData()); }else{ trainingLog << "Removing training sample " << i << " from the dataset as it could not be trimmed!" << endl; } } //Overwrite the original training data with the trimmed dataset labelledTrainingData = tempData; } if( labelledTrainingData.getNumSamples() == 0 ){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't train model as there are no samples in training data!" << endl; return false; } //Assign numClasses = labelledTrainingData.getNumClasses(); numTemplates = labelledTrainingData.getNumClasses(); numFeatures = labelledTrainingData.getNumDimensions(); templatesBuffer.resize( numClasses ); classLabels.resize( numClasses ); nullRejectionThresholds.resize( numClasses ); averageTemplateLength = 0; //Need to copy the labelled training data incase we need to scale it or znorm it LabelledTimeSeriesClassificationData trainingData( labelledTrainingData ); //Perform any scaling or normalisation rangesBuffer = trainingData.getRanges(); if( useScaling ) scaleData( trainingData ); if( useZNormalisation ) znormData( trainingData ); //For each class, run a one-to-one DTW and find the template the best describes the data for(UINT k=0; k<numTemplates; k++){ //Get the class label for the cth class UINT classLabel = trainingData.getClassTracker()[k].classLabel; LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classLabel ); UINT numExamples = classData.getNumSamples(); bestIndex = 0; //Set the class label of this template templatesBuffer[k].classLabel = classLabel; //Set the kth class label classLabels[k] = classLabel; trainingLog << "Training Template: " << k << " Class: " << classLabel << endl; //Check to make sure we actually have some training examples if(numExamples<1){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can not train model: Num of Example is < 1! Class: " << classLabel << endl; return false; } if(numExamples==1){//If we have just one training example then we have to use it as the template bestIndex = 0; nullRejectionThresholds[k] = 0.0;//TODO-We need a better way of calculating this! warningLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't compute reject thresholds for class " << classLabel << " as there is only 1 training example" << endl; }else{ //Search for the best training example for this class if( !train_NDDTW(classData,templatesBuffer[k],bestIndex) ){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Failed to train template for class with label: " << classLabel << endl; return false; } } //Add the template with the best index to the buffer int trainingMethod = 0; if(useSmoothing) trainingMethod = 1; switch (trainingMethod) { case(0)://Standard Training templatesBuffer[k].timeSeries = classData[bestIndex].getData(); break; case(1)://Training using Smoothing //Smooth the data, reducing its size by a factor set by smoothFactor smoothData(classData[ bestIndex ].getData(),smoothingFactor,templatesBuffer[k].timeSeries); break; default: cout<<"Can not train model: Unknown training method \n"; return false; break; } if( offsetUsingFirstSample ){ offsetTimeseries( templatesBuffer[k].timeSeries ); } //Add the average length of the training examples for this template to the overall averageTemplateLength averageTemplateLength += templatesBuffer[k].averageTemplateLength; } //Flag that the models have been trained trained = true; averageTemplateLength = (UINT) averageTemplateLength/double(numTemplates); //Recompute the null rejection thresholds recomputeNullRejectionThresholds(); //Resize the prediction results to make sure it is setup for realtime prediction continuousInputDataBuffer.clear(); continuousInputDataBuffer.resize(averageTemplateLength,vector<double>(numFeatures,0)); classLikelihoods.resize(numTemplates,DEFAULT_NULL_LIKELIHOOD_VALUE); classDistances.resize(numTemplates,0); predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE; //Training complete return true; }
bool HMM::train(LabelledTimeSeriesClassificationData trainingData){ if( trainingData.getNumSamples() == 0 ){ errorLog << "train(LabelledTimeSeriesClassificationData trainingData) - There are no training samples to train the HMM classifer!" << endl; return false; } if( trainingData.getNumDimensions() != 1 ){ errorLog << "train(LabelledTimeSeriesClassificationData trainingData) - The number of dimensions in the training data must be 1. If your training data is not 1 dimensional then you must quantize the training data using one of the GRT quantization algorithms" << endl; return false; } //Reset the HMM trained = false; useScaling = false; numFeatures = trainingData.getNumDimensions(); numClasses = trainingData.getNumClasses(); models.clear(); classLabels.clear(); models.resize( numClasses ); classLabels.resize( numClasses ); //Init the models for(UINT k=0; k<numClasses; k++){ models[k].resetModel(numStates,numSymbols,modelType,delta); models[k].maxNumIter = maxNumIter; models[k].minImprovement = minImprovement; } //Train each of the models for(UINT k=0; k<numClasses; k++){ //Get the class ID of this gesture UINT classID = trainingData.getClassTracker()[k].classLabel; classLabels[k] = classID; //Convert this classes training data into a list of observation sequences LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classID ); vector< vector< UINT > > observationSequences; if( !convertDataToObservationSequence( classData, observationSequences ) ){ return false; } //Train the model if( !models[k].train( observationSequences ) ){ errorLog << "train(LabelledTimeSeriesClassificationData &trainingData) - Failed to train HMM for class " << classID << endl; return false; } } //Compute the rejection thresholds nullRejectionThresholds.resize(numClasses); for(UINT k=0; k<numClasses; k++){ //Get the class ID of this gesture UINT classID = trainingData.getClassTracker()[k].classLabel; classLabels[k] = classID; //Convert this classes training data into a list of observation sequences LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classID ); vector< vector< UINT > > observationSequences; if( !convertDataToObservationSequence( classData, observationSequences ) ){ return false; } //Test the model double loglikelihood = 0; double avgLoglikelihood = 0; for(UINT i=0; i<observationSequences.size(); i++){ loglikelihood = models[k].predict( observationSequences[i] ); avgLoglikelihood += fabs( loglikelihood ); cout << "Class: " << classID << " PredictedLogLikelihood: " << -loglikelihood << endl; } nullRejectionThresholds[k] = -( avgLoglikelihood / double( observationSequences.size() ) ); cout << "Class: " << classID << " NullRejectionThreshold: " << nullRejectionThresholds[k] << endl; } for(UINT k=0; k<numClasses; k++){ models[k].printAB(); } trained = true; return true; }