LabelledTimeSeriesClassificationData LabelledTimeSeriesClassificationData::getTrainingFoldData(const UINT foldIndex) const { LabelledTimeSeriesClassificationData trainingData; if( !crossValidationSetup ){ errorLog << "getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl; return trainingData; } if( foldIndex >= kFoldValue ) return trainingData; trainingData.setNumDimensions( numDimensions ); //Add the data to the training set, this will consist of all the data that is NOT in the foldIndex UINT index = 0; for(UINT k=0; k<kFoldValue; k++){ if( k != foldIndex ){ for(UINT i=0; i<crossValidationIndexs[k].size(); i++){ index = crossValidationIndexs[k][i]; trainingData.addSample( data[ index ].getClassLabel(), data[ index ].getData() ); } } } return trainingData; }
LabelledTimeSeriesClassificationData KfoldTimeSeriesData::getTrainingFoldData(const UINT foldIndex, const UINT numSamplesPerClass) const { UINT index = 0; unsigned int randomNumber; unsigned int indexClassLabel; unsigned int numSamplesRemaining; LabelledTimeSeriesClassificationData trainingData; if( !crossValidationSetup ) { cout << "getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl; return trainingData; } if( foldIndex >= kFoldValue ) { cout << "Fold index too big" << endl; return trainingData; } Random random; trainingData.setNumDimensions( numDimensions ); /* Put all K-1 training folds in one data set */ vector <vector< UINT > > MergedIndexs(inputDataset.getNumClasses()); for(UINT k = 0; k < kFoldValue; k++) { if( k == foldIndex ) { continue; } for (UINT classLabel = 0 ; classLabel < crossValidationIndexs[k].size(); classLabel++) { for (UINT i = 0; i < crossValidationIndexs[k][classLabel].size(); i++) { MergedIndexs[classLabel].push_back(crossValidationIndexs[k][classLabel][i]); } } } /* For each class peak randomly "numSamplesPerClass" samples */ for (unsigned int classLabel = 0; classLabel < inputDataset.getNumClasses() ; classLabel++) { for (unsigned int numSamples = 1; numSamples <= numSamplesPerClass; numSamples++) { numSamplesRemaining = MergedIndexs[classLabel].size(); if (numSamplesRemaining == 0) { printf("The \"numSamplesPerClass\" variable is bigger that the samples for this class"); break; } randomNumber = random.getRandomNumberInt(0, numSamplesRemaining); index = MergedIndexs[classLabel][randomNumber]; /* Remove added sample so that it is not added again */ MergedIndexs[classLabel].erase(MergedIndexs[classLabel].begin() + randomNumber); trainingData.addSample( inputDataset[ index ].getClassLabel(), inputDataset[ index ].getData() ); } } return trainingData; }
LabelledTimeSeriesClassificationData LabelledTimeSeriesClassificationData::getTestFoldData(const UINT foldIndex) const { LabelledTimeSeriesClassificationData testData; if( !crossValidationSetup ) return testData; if( foldIndex >= kFoldValue ) return testData; //Add the data to the training testData.setNumDimensions( numDimensions ); UINT index = 0; for(UINT i=0; i<crossValidationIndexs[ foldIndex ].size(); i++){ index = crossValidationIndexs[ foldIndex ][i]; testData.addSample( data[ index ].getClassLabel(), data[ index ].getData() ); } return testData; }
////////////////////////// TRAINING FUNCTIONS ////////////////////////// bool DTW::train(LabelledTimeSeriesClassificationData labelledTrainingData){ UINT bestIndex = 0; //Cleanup Memory templatesBuffer.clear(); classLabels.clear(); trained = false; continuousInputDataBuffer.clear(); if( trimTrainingData ){ LabelledTimeSeriesClassificationSampleTrimmer timeSeriesTrimmer(trimThreshold,maximumTrimPercentage); LabelledTimeSeriesClassificationData tempData; tempData.setNumDimensions( labelledTrainingData.getNumDimensions() ); for(UINT i=0; i<labelledTrainingData.getNumSamples(); i++){ if( timeSeriesTrimmer.trimTimeSeries( labelledTrainingData[i] ) ){ tempData.addSample(labelledTrainingData[i].getClassLabel(), labelledTrainingData[i].getData()); }else{ trainingLog << "Removing training sample " << i << " from the dataset as it could not be trimmed!" << endl; } } //Overwrite the original training data with the trimmed dataset labelledTrainingData = tempData; } if( labelledTrainingData.getNumSamples() == 0 ){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't train model as there are no samples in training data!" << endl; return false; } //Assign numClasses = labelledTrainingData.getNumClasses(); numTemplates = labelledTrainingData.getNumClasses(); numFeatures = labelledTrainingData.getNumDimensions(); templatesBuffer.resize( numClasses ); classLabels.resize( numClasses ); nullRejectionThresholds.resize( numClasses ); averageTemplateLength = 0; //Need to copy the labelled training data incase we need to scale it or znorm it LabelledTimeSeriesClassificationData trainingData( labelledTrainingData ); //Perform any scaling or normalisation rangesBuffer = trainingData.getRanges(); if( useScaling ) scaleData( trainingData ); if( useZNormalisation ) znormData( trainingData ); //For each class, run a one-to-one DTW and find the template the best describes the data for(UINT k=0; k<numTemplates; k++){ //Get the class label for the cth class UINT classLabel = trainingData.getClassTracker()[k].classLabel; LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classLabel ); UINT numExamples = classData.getNumSamples(); bestIndex = 0; //Set the class label of this template templatesBuffer[k].classLabel = classLabel; //Set the kth class label classLabels[k] = classLabel; trainingLog << "Training Template: " << k << " Class: " << classLabel << endl; //Check to make sure we actually have some training examples if(numExamples<1){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can not train model: Num of Example is < 1! Class: " << classLabel << endl; return false; } if(numExamples==1){//If we have just one training example then we have to use it as the template bestIndex = 0; nullRejectionThresholds[k] = 0.0;//TODO-We need a better way of calculating this! warningLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't compute reject thresholds for class " << classLabel << " as there is only 1 training example" << endl; }else{ //Search for the best training example for this class if( !train_NDDTW(classData,templatesBuffer[k],bestIndex) ){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Failed to train template for class with label: " << classLabel << endl; return false; } } //Add the template with the best index to the buffer int trainingMethod = 0; if(useSmoothing) trainingMethod = 1; switch (trainingMethod) { case(0)://Standard Training templatesBuffer[k].timeSeries = classData[bestIndex].getData(); break; case(1)://Training using Smoothing //Smooth the data, reducing its size by a factor set by smoothFactor smoothData(classData[ bestIndex ].getData(),smoothingFactor,templatesBuffer[k].timeSeries); break; default: cout<<"Can not train model: Unknown training method \n"; return false; break; } if( offsetUsingFirstSample ){ offsetTimeseries( templatesBuffer[k].timeSeries ); } //Add the average length of the training examples for this template to the overall averageTemplateLength averageTemplateLength += templatesBuffer[k].averageTemplateLength; } //Flag that the models have been trained trained = true; averageTemplateLength = (UINT) averageTemplateLength/double(numTemplates); //Recompute the null rejection thresholds recomputeNullRejectionThresholds(); //Resize the prediction results to make sure it is setup for realtime prediction continuousInputDataBuffer.clear(); continuousInputDataBuffer.resize(averageTemplateLength,vector<double>(numFeatures,0)); classLikelihoods.resize(numTemplates,DEFAULT_NULL_LIKELIHOOD_VALUE); classDistances.resize(numTemplates,0); predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE; //Training complete return true; }