void DTW::znormData(LabelledTimeSeriesClassificationData &trainingData){ for(UINT i=0; i<trainingData.getNumSamples(); i++){ znormData( trainingData[i].getData(), trainingData[i].getData() ); } }
void DTW::scaleData(LabelledTimeSeriesClassificationData &trainingData){ //Scale the data using the min and max values for(UINT i=0; i<trainingData.getNumSamples(); i++){ scaleData( trainingData[i].getData(), trainingData[i].getData() ); } }
bool HMM::convertDataToObservationSequence( LabelledTimeSeriesClassificationData &classData, vector< vector< UINT > > &observationSequences ){ observationSequences.resize( classData.getNumSamples() ); for(UINT i=0; i<classData.getNumSamples(); i++){ MatrixDouble ×eries = classData[i].getData(); observationSequences[i].resize( timeseries.getNumRows() ); for(UINT j=0; j<timeseries.getNumRows(); j++){ if( timeseries[j][0] >= numSymbols ){ errorLog << "train(LabelledTimeSeriesClassificationData &trainingData) - Found an observation sequence with a value outside of the symbol range! Value: " << timeseries[j][0] << endl; return false; } observationSequences[i][j] = (UINT)timeseries[j][0]; } } return true; }
UINT KfoldTimeSeriesData::getFoldSize() { if (crossValidationSetup) { UINT maxSize = crossValidationIndexs[0].size(); for (UINT k = 0; k < kFoldValue; k++) { if (crossValidationIndexs[k].size() > maxSize) { maxSize = crossValidationIndexs[k].size(); } } return inputDataset.getNumSamples() - maxSize; } return 0; }
bool LabelledTimeSeriesClassificationData::merge(const LabelledTimeSeriesClassificationData &labelledData){ if( labelledData.getNumDimensions() != numDimensions ){ errorLog << "merge(LabelledTimeSeriesClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << endl; return false; } //The dataset has changed so flag that any previous cross validation setup will now not work crossValidationSetup = false; crossValidationIndexs.clear(); //Add the data from the labelledData to this instance for(UINT i=0; i<labelledData.getNumSamples(); i++){ addSample(labelledData[i].getClassLabel(), labelledData[i].getData()); } //Set the class names from the dataset vector< ClassTracker > classTracker = labelledData.getClassTracker(); for(UINT i=0; i<classTracker.size(); i++){ setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel); } return true; }
bool KfoldTimeSeriesData::spiltDataIntoKFolds(const GRT::UINT K) { kFoldValue = K; //K can not be zero if( K == 0 ){ std::cout << "spiltDataIntoKFolds(UINT K) - K can not be zero!" << std::endl; return false; } //K can not be larger than the number of examples if( K > inputDataset.getNumSamples()){ std::cout << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be 0!" << std::endl; return false; } //K can not be larger than the number of examples in a specific class if the stratified sampling option is true for(UINT c=0; c < inputDataset.getNumClasses(); c++) { if( K > classTracker[c].counter ){ cout << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << std::endl; return false; } } //Setup the dataset for k-fold cross validation kFoldValue = K; vector< UINT > indexs( inputDataset.getNumSamples() ); //Work out how many samples are in each fold, the last fold might have more samples than the others UINT numSamplesPerFold = (UINT) floor( inputDataset.getNumSamples() / double(K) ); //Create the random partion indexs Random random; UINT randomIndex = 0; //Break the data into seperate classes vector< vector< UINT > > classData( inputDataset.getNumClasses() ); //Add the indexs to their respective classes for(UINT i = 0; i < inputDataset.getNumSamples(); i++) { classData[ inputDataset.getClassLabelIndexValue( inputDataset[i].getClassLabel() ) ].push_back( i ); } //Randomize the order of the indexs in each of the class index buffers for(UINT c = 0; c < inputDataset.getNumClasses(); c++) { UINT numSamples = (UINT)classData[c].size(); for(UINT x = 0; x < numSamples; x++) { //Pick a random index randomIndex = random.getRandomNumberInt(0, numSamples); //Swap the indexs SWAP( classData[c][ x ] , classData[c][ randomIndex ] ); } } //Resize the cross validation indexs buffer crossValidationIndexs.resize( K ); for (UINT k = 0; k < K; k++) { crossValidationIndexs[k].resize(inputDataset.getNumClasses()); } //Loop over each of the classes and add the data equally to each of the k folds until there is no data left vector< UINT >::iterator iter; for(UINT c = 0; c < inputDataset.getNumClasses(); c++){ iter = classData[ c ].begin(); UINT k = 0; while( iter != classData[c].end() ){ crossValidationIndexs[ k ][c].push_back( *iter ); iter++; k = ++k % K; } } crossValidationSetup = true; return true; }
bool DTW::train_NDDTW(LabelledTimeSeriesClassificationData &trainingData,DTWTemplate &dtwTemplate,UINT &bestIndex){ UINT numExamples = trainingData.getNumSamples(); VectorDouble results(numExamples,0.0); MatrixDouble distanceResults(numExamples,numExamples); dtwTemplate.averageTemplateLength = 0; for(UINT m=0; m<numExamples; m++){ MatrixDouble templateA; //The m'th template MatrixDouble templateB; //The n'th template dtwTemplate.averageTemplateLength += trainingData[m].getLength(); //Smooth the data if required if( useSmoothing ) smoothData(trainingData[m].getData(),smoothingFactor,templateA); else templateA = trainingData[m].getData(); if( offsetUsingFirstSample ){ offsetTimeseries(templateA); } for(UINT n=0; n<numExamples; n++){ if(m!=n){ //Smooth the data if required if( useSmoothing ) smoothData(trainingData[n].getData(),smoothingFactor,templateB); else templateB = trainingData[n].getData(); if( offsetUsingFirstSample ){ offsetTimeseries(templateB); } //Compute the distance between the two time series MatrixDouble distanceMatrix(templateA.getNumRows(),templateB.getNumRows()); vector< IndexDist > warpPath; double dist = computeDistance(templateA,templateB,distanceMatrix,warpPath); trainingLog << "Template: " << m << " Timeseries: " << n << " Dist: " << dist << endl; //Update the results values distanceResults[m][n] = dist; results[m] += dist; }else distanceResults[m][n] = 0; //The distance is zero because the two timeseries are the same } } for(UINT m=0; m<numExamples; m++) results[m]/=(numExamples-1); //Find the best average result, this is the result with the minimum value bestIndex = 0; double bestAverage = results[0]; for(UINT m=1; m<numExamples; m++){ if( results[m] < bestAverage ){ bestAverage = results[m]; bestIndex = m; } } if( numExamples > 2 ){ //Work out the threshold value for the best template dtwTemplate.trainingMu = results[bestIndex]; dtwTemplate.trainingSigma = 0.0; for(UINT n=0; n<numExamples; n++){ if(n!=bestIndex){ dtwTemplate.trainingSigma += SQR( distanceResults[ bestIndex ][n] - dtwTemplate.trainingMu ); } } dtwTemplate.trainingSigma = sqrt( dtwTemplate.trainingSigma / double(numExamples-2) ); }else{ warningLog << "_train_NDDTW(LabelledTimeSeriesClassificationData &trainingData,DTWTemplate &dtwTemplate,UINT &bestIndex - There are not enough examples to compute the trainingMu and trainingSigma for the template for class " << dtwTemplate.classLabel << endl; dtwTemplate.trainingMu = 0.0; dtwTemplate.trainingSigma = 0.0; } //Set the average length of the training examples dtwTemplate.averageTemplateLength = (UINT) (dtwTemplate.averageTemplateLength/double(numExamples)); trainingLog << "AverageTemplateLength: " << dtwTemplate.averageTemplateLength << endl; //Flag that the training was successfull return true; }
////////////////////////// TRAINING FUNCTIONS ////////////////////////// bool DTW::train(LabelledTimeSeriesClassificationData labelledTrainingData){ UINT bestIndex = 0; //Cleanup Memory templatesBuffer.clear(); classLabels.clear(); trained = false; continuousInputDataBuffer.clear(); if( trimTrainingData ){ LabelledTimeSeriesClassificationSampleTrimmer timeSeriesTrimmer(trimThreshold,maximumTrimPercentage); LabelledTimeSeriesClassificationData tempData; tempData.setNumDimensions( labelledTrainingData.getNumDimensions() ); for(UINT i=0; i<labelledTrainingData.getNumSamples(); i++){ if( timeSeriesTrimmer.trimTimeSeries( labelledTrainingData[i] ) ){ tempData.addSample(labelledTrainingData[i].getClassLabel(), labelledTrainingData[i].getData()); }else{ trainingLog << "Removing training sample " << i << " from the dataset as it could not be trimmed!" << endl; } } //Overwrite the original training data with the trimmed dataset labelledTrainingData = tempData; } if( labelledTrainingData.getNumSamples() == 0 ){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't train model as there are no samples in training data!" << endl; return false; } //Assign numClasses = labelledTrainingData.getNumClasses(); numTemplates = labelledTrainingData.getNumClasses(); numFeatures = labelledTrainingData.getNumDimensions(); templatesBuffer.resize( numClasses ); classLabels.resize( numClasses ); nullRejectionThresholds.resize( numClasses ); averageTemplateLength = 0; //Need to copy the labelled training data incase we need to scale it or znorm it LabelledTimeSeriesClassificationData trainingData( labelledTrainingData ); //Perform any scaling or normalisation rangesBuffer = trainingData.getRanges(); if( useScaling ) scaleData( trainingData ); if( useZNormalisation ) znormData( trainingData ); //For each class, run a one-to-one DTW and find the template the best describes the data for(UINT k=0; k<numTemplates; k++){ //Get the class label for the cth class UINT classLabel = trainingData.getClassTracker()[k].classLabel; LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classLabel ); UINT numExamples = classData.getNumSamples(); bestIndex = 0; //Set the class label of this template templatesBuffer[k].classLabel = classLabel; //Set the kth class label classLabels[k] = classLabel; trainingLog << "Training Template: " << k << " Class: " << classLabel << endl; //Check to make sure we actually have some training examples if(numExamples<1){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can not train model: Num of Example is < 1! Class: " << classLabel << endl; return false; } if(numExamples==1){//If we have just one training example then we have to use it as the template bestIndex = 0; nullRejectionThresholds[k] = 0.0;//TODO-We need a better way of calculating this! warningLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't compute reject thresholds for class " << classLabel << " as there is only 1 training example" << endl; }else{ //Search for the best training example for this class if( !train_NDDTW(classData,templatesBuffer[k],bestIndex) ){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Failed to train template for class with label: " << classLabel << endl; return false; } } //Add the template with the best index to the buffer int trainingMethod = 0; if(useSmoothing) trainingMethod = 1; switch (trainingMethod) { case(0)://Standard Training templatesBuffer[k].timeSeries = classData[bestIndex].getData(); break; case(1)://Training using Smoothing //Smooth the data, reducing its size by a factor set by smoothFactor smoothData(classData[ bestIndex ].getData(),smoothingFactor,templatesBuffer[k].timeSeries); break; default: cout<<"Can not train model: Unknown training method \n"; return false; break; } if( offsetUsingFirstSample ){ offsetTimeseries( templatesBuffer[k].timeSeries ); } //Add the average length of the training examples for this template to the overall averageTemplateLength averageTemplateLength += templatesBuffer[k].averageTemplateLength; } //Flag that the models have been trained trained = true; averageTemplateLength = (UINT) averageTemplateLength/double(numTemplates); //Recompute the null rejection thresholds recomputeNullRejectionThresholds(); //Resize the prediction results to make sure it is setup for realtime prediction continuousInputDataBuffer.clear(); continuousInputDataBuffer.resize(averageTemplateLength,vector<double>(numFeatures,0)); classLikelihoods.resize(numTemplates,DEFAULT_NULL_LIKELIHOOD_VALUE); classDistances.resize(numTemplates,0); predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE; //Training complete return true; }
bool HMM::train(LabelledTimeSeriesClassificationData trainingData){ if( trainingData.getNumSamples() == 0 ){ errorLog << "train(LabelledTimeSeriesClassificationData trainingData) - There are no training samples to train the HMM classifer!" << endl; return false; } if( trainingData.getNumDimensions() != 1 ){ errorLog << "train(LabelledTimeSeriesClassificationData trainingData) - The number of dimensions in the training data must be 1. If your training data is not 1 dimensional then you must quantize the training data using one of the GRT quantization algorithms" << endl; return false; } //Reset the HMM trained = false; useScaling = false; numFeatures = trainingData.getNumDimensions(); numClasses = trainingData.getNumClasses(); models.clear(); classLabels.clear(); models.resize( numClasses ); classLabels.resize( numClasses ); //Init the models for(UINT k=0; k<numClasses; k++){ models[k].resetModel(numStates,numSymbols,modelType,delta); models[k].maxNumIter = maxNumIter; models[k].minImprovement = minImprovement; } //Train each of the models for(UINT k=0; k<numClasses; k++){ //Get the class ID of this gesture UINT classID = trainingData.getClassTracker()[k].classLabel; classLabels[k] = classID; //Convert this classes training data into a list of observation sequences LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classID ); vector< vector< UINT > > observationSequences; if( !convertDataToObservationSequence( classData, observationSequences ) ){ return false; } //Train the model if( !models[k].train( observationSequences ) ){ errorLog << "train(LabelledTimeSeriesClassificationData &trainingData) - Failed to train HMM for class " << classID << endl; return false; } } //Compute the rejection thresholds nullRejectionThresholds.resize(numClasses); for(UINT k=0; k<numClasses; k++){ //Get the class ID of this gesture UINT classID = trainingData.getClassTracker()[k].classLabel; classLabels[k] = classID; //Convert this classes training data into a list of observation sequences LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classID ); vector< vector< UINT > > observationSequences; if( !convertDataToObservationSequence( classData, observationSequences ) ){ return false; } //Test the model double loglikelihood = 0; double avgLoglikelihood = 0; for(UINT i=0; i<observationSequences.size(); i++){ loglikelihood = models[k].predict( observationSequences[i] ); avgLoglikelihood += fabs( loglikelihood ); cout << "Class: " << classID << " PredictedLogLikelihood: " << -loglikelihood << endl; } nullRejectionThresholds[k] = -( avgLoglikelihood / double( observationSequences.size() ) ); cout << "Class: " << classID << " NullRejectionThreshold: " << nullRejectionThresholds[k] << endl; } for(UINT k=0; k<numClasses; k++){ models[k].printAB(); } trained = true; return true; }