Example #1
0
void DTW::znormData(LabelledTimeSeriesClassificationData &trainingData){

    for(UINT i=0; i<trainingData.getNumSamples(); i++){
        znormData( trainingData[i].getData(), trainingData[i].getData() );
    }

}
Example #2
0
void DTW::scaleData(LabelledTimeSeriesClassificationData &trainingData){

	//Scale the data using the min and max values
    for(UINT i=0; i<trainingData.getNumSamples(); i++){
        scaleData( trainingData[i].getData(), trainingData[i].getData() );
    }

}
Example #3
0
bool HMM::convertDataToObservationSequence( LabelledTimeSeriesClassificationData &classData, vector< vector< UINT > > &observationSequences ){
        
    observationSequences.resize( classData.getNumSamples() );
    
    for(UINT i=0; i<classData.getNumSamples(); i++){
        MatrixDouble &timeseries = classData[i].getData();
        observationSequences[i].resize( timeseries.getNumRows() );
        for(UINT j=0; j<timeseries.getNumRows(); j++){
            if( timeseries[j][0] >= numSymbols ){
                errorLog << "train(LabelledTimeSeriesClassificationData &trainingData) - Found an observation sequence with a value outside of the symbol range! Value: " << timeseries[j][0] << endl;
                return false;
            }
            observationSequences[i][j] = (UINT)timeseries[j][0];
        }
    }
    
    return true;
}
UINT KfoldTimeSeriesData::getFoldSize() {
	if (crossValidationSetup) {
		UINT maxSize = crossValidationIndexs[0].size();
		for (UINT k = 0; k < kFoldValue; k++) {
			if (crossValidationIndexs[k].size() > maxSize) {
				maxSize = crossValidationIndexs[k].size();
			}
		}
		return inputDataset.getNumSamples() - maxSize;
	}
	return 0;
}
bool LabelledTimeSeriesClassificationData::merge(const LabelledTimeSeriesClassificationData &labelledData){

    if( labelledData.getNumDimensions() != numDimensions ){
        errorLog << "merge(LabelledTimeSeriesClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << endl;
        return false;
    }

    //The dataset has changed so flag that any previous cross validation setup will now not work
    crossValidationSetup = false;
    crossValidationIndexs.clear();

    //Add the data from the labelledData to this instance
    for(UINT i=0; i<labelledData.getNumSamples(); i++){
        addSample(labelledData[i].getClassLabel(), labelledData[i].getData());
    }

    //Set the class names from the dataset
    vector< ClassTracker > classTracker = labelledData.getClassTracker();
    for(UINT i=0; i<classTracker.size(); i++){
        setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel);
    }

    return true;
}
bool KfoldTimeSeriesData::spiltDataIntoKFolds(const GRT::UINT K) {

	kFoldValue = K;

    //K can not be zero
    if( K == 0 ){
        std::cout << "spiltDataIntoKFolds(UINT K) - K can not be zero!" << std::endl;
        return false;
    }

    //K can not be larger than the number of examples
    if( K  > inputDataset.getNumSamples()){
        std::cout << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be 0!" << std::endl;
        return false;
    }

    //K can not be larger than the number of examples in a specific class if the stratified sampling option is true
	for(UINT c=0; c < inputDataset.getNumClasses(); c++) {
		if( K > classTracker[c].counter ){
			cout << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << std::endl;
			return false;
		}
	}


    //Setup the dataset for k-fold cross validation
    kFoldValue = K;
    vector< UINT > indexs( inputDataset.getNumSamples() );

    //Work out how many samples are in each fold, the last fold might have more samples than the others
    UINT numSamplesPerFold = (UINT) floor( inputDataset.getNumSamples() / double(K) );

    //Create the random partion indexs
    Random random;
    UINT randomIndex = 0;

	//Break the data into seperate classes
	vector< vector< UINT > > classData( inputDataset.getNumClasses() );

	//Add the indexs to their respective classes
	for(UINT i = 0; i < inputDataset.getNumSamples(); i++) {
		classData[ inputDataset.getClassLabelIndexValue(
				inputDataset[i].getClassLabel() ) ].push_back( i );
	}

	//Randomize the order of the indexs in each of the class index buffers
	for(UINT c = 0; c < inputDataset.getNumClasses(); c++) {
		UINT numSamples = (UINT)classData[c].size();
		for(UINT x = 0; x < numSamples; x++) {
			//Pick a random index
			randomIndex = random.getRandomNumberInt(0, numSamples);

			//Swap the indexs
			SWAP( classData[c][ x ] , classData[c][ randomIndex ] );
		}
	}


    //Resize the cross validation indexs buffer
    crossValidationIndexs.resize( K );
    for (UINT k = 0; k < K; k++) {
    	crossValidationIndexs[k].resize(inputDataset.getNumClasses());
    }

    //Loop over each of the classes and add the data equally to each of the k folds until there is no data left
    vector< UINT >::iterator iter;
    for(UINT c = 0; c < inputDataset.getNumClasses(); c++){
        iter = classData[ c ].begin();
        UINT k = 0;
        while( iter != classData[c].end() ){
            crossValidationIndexs[ k ][c].push_back( *iter );
            iter++;
            k = ++k % K;
        }
    }

    crossValidationSetup = true;
    return true;

}
Example #7
0
bool DTW::train_NDDTW(LabelledTimeSeriesClassificationData &trainingData,DTWTemplate &dtwTemplate,UINT &bestIndex){

   UINT numExamples = trainingData.getNumSamples();
   VectorDouble results(numExamples,0.0);
   MatrixDouble distanceResults(numExamples,numExamples);
   dtwTemplate.averageTemplateLength = 0;
    
   for(UINT m=0; m<numExamples; m++){
       
	   MatrixDouble templateA; //The m'th template
	   MatrixDouble templateB; //The n'th template
	   dtwTemplate.averageTemplateLength += trainingData[m].getLength();

	   //Smooth the data if required
	   if( useSmoothing ) smoothData(trainingData[m].getData(),smoothingFactor,templateA);
	   else templateA = trainingData[m].getData();
       
       if( offsetUsingFirstSample ){
           offsetTimeseries(templateA);
       }

	   for(UINT n=0; n<numExamples; n++){
		if(m!=n){
		    //Smooth the data if required
		    if( useSmoothing ) smoothData(trainingData[n].getData(),smoothingFactor,templateB);
		    else templateB = trainingData[n].getData();
            
            if( offsetUsingFirstSample ){
                offsetTimeseries(templateB);
            }

			//Compute the distance between the two time series
            MatrixDouble distanceMatrix(templateA.getNumRows(),templateB.getNumRows());
            vector< IndexDist > warpPath;
			double dist = computeDistance(templateA,templateB,distanceMatrix,warpPath);
            
            trainingLog << "Template: " << m << " Timeseries: " << n << " Dist: " << dist << endl;

			//Update the results values
			distanceResults[m][n] = dist;
			results[m] += dist;
		}else distanceResults[m][n] = 0; //The distance is zero because the two timeseries are the same
	   }
   }

	for(UINT m=0; m<numExamples; m++) results[m]/=(numExamples-1);
	//Find the best average result, this is the result with the minimum value
	bestIndex = 0;
	double bestAverage = results[0];
	for(UINT m=1; m<numExamples; m++){
		if( results[m] < bestAverage ){
			bestAverage = results[m];
			bestIndex = m;
		}
	}

    if( numExamples > 2 ){

        //Work out the threshold value for the best template
        dtwTemplate.trainingMu = results[bestIndex];
        dtwTemplate.trainingSigma = 0.0;

        for(UINT n=0; n<numExamples; n++){
            if(n!=bestIndex){
                dtwTemplate.trainingSigma += SQR( distanceResults[ bestIndex ][n] - dtwTemplate.trainingMu );
            }
        }
        dtwTemplate.trainingSigma = sqrt( dtwTemplate.trainingSigma / double(numExamples-2) );
    }else{
        warningLog << "_train_NDDTW(LabelledTimeSeriesClassificationData &trainingData,DTWTemplate &dtwTemplate,UINT &bestIndex - There are not enough examples to compute the trainingMu and trainingSigma for the template for class " << dtwTemplate.classLabel << endl;
        dtwTemplate.trainingMu = 0.0;
        dtwTemplate.trainingSigma = 0.0;
    }

	//Set the average length of the training examples
	dtwTemplate.averageTemplateLength = (UINT) (dtwTemplate.averageTemplateLength/double(numExamples));
    
    trainingLog << "AverageTemplateLength: " << dtwTemplate.averageTemplateLength << endl;

    //Flag that the training was successfull
	return true;
}
Example #8
0
////////////////////////// TRAINING FUNCTIONS //////////////////////////
bool DTW::train(LabelledTimeSeriesClassificationData labelledTrainingData){

	UINT bestIndex = 0;

	//Cleanup Memory
	templatesBuffer.clear();
    classLabels.clear();
	trained = false;
    continuousInputDataBuffer.clear();

    if( trimTrainingData ){
        LabelledTimeSeriesClassificationSampleTrimmer timeSeriesTrimmer(trimThreshold,maximumTrimPercentage);
        LabelledTimeSeriesClassificationData tempData;
        tempData.setNumDimensions( labelledTrainingData.getNumDimensions() );
        
        for(UINT i=0; i<labelledTrainingData.getNumSamples(); i++){
            if( timeSeriesTrimmer.trimTimeSeries( labelledTrainingData[i] ) ){
                tempData.addSample(labelledTrainingData[i].getClassLabel(), labelledTrainingData[i].getData());
            }else{
                trainingLog << "Removing training sample " << i << " from the dataset as it could not be trimmed!" << endl;
            }
        }
        //Overwrite the original training data with the trimmed dataset
        labelledTrainingData = tempData;
    }
    
    if( labelledTrainingData.getNumSamples() == 0 ){
        errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't train model as there are no samples in training data!" << endl;
        return false;
    }

	//Assign
    numClasses = labelledTrainingData.getNumClasses();
	numTemplates = labelledTrainingData.getNumClasses();
    numFeatures = labelledTrainingData.getNumDimensions();
	templatesBuffer.resize( numClasses );
    classLabels.resize( numClasses );
	nullRejectionThresholds.resize( numClasses );
	averageTemplateLength = 0;

	//Need to copy the labelled training data incase we need to scale it or znorm it
	LabelledTimeSeriesClassificationData trainingData( labelledTrainingData );

	//Perform any scaling or normalisation
    rangesBuffer = trainingData.getRanges();
	if( useScaling ) scaleData( trainingData );
	if( useZNormalisation ) znormData( trainingData );

	//For each class, run a one-to-one DTW and find the template the best describes the data
	for(UINT k=0; k<numTemplates; k++){
        //Get the class label for the cth class
        UINT classLabel = trainingData.getClassTracker()[k].classLabel;
        LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classLabel );
		UINT numExamples = classData.getNumSamples();
		bestIndex = 0;

        //Set the class label of this template
        templatesBuffer[k].classLabel = classLabel;

        //Set the kth class label
        classLabels[k] = classLabel;
        
        trainingLog << "Training Template: " << k << " Class: " << classLabel << endl;

		//Check to make sure we actually have some training examples
		if(numExamples<1){
            errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can not train model: Num of Example is < 1! Class: " << classLabel << endl;
			return false;
		}

		if(numExamples==1){//If we have just one training example then we have to use it as the template
            bestIndex = 0;

            nullRejectionThresholds[k] = 0.0;//TODO-We need a better way of calculating this!
            warningLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't compute reject thresholds for class " << classLabel << " as there is only 1 training example" << endl;
		}else{
            //Search for the best training example for this class
			if( !train_NDDTW(classData,templatesBuffer[k],bestIndex) ){
                errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Failed to train template for class with label: " << classLabel << endl;
                return false;
            }
		}

		//Add the template with the best index to the buffer
		int trainingMethod = 0;
		if(useSmoothing) trainingMethod = 1;

		switch (trainingMethod) {
			case(0)://Standard Training
				templatesBuffer[k].timeSeries = classData[bestIndex].getData();
				break;
			case(1)://Training using Smoothing
				//Smooth the data, reducing its size by a factor set by smoothFactor
				smoothData(classData[ bestIndex ].getData(),smoothingFactor,templatesBuffer[k].timeSeries);
				break;
			default:
				cout<<"Can not train model: Unknown training method \n";
				return false;
				break;
		}
        
        if( offsetUsingFirstSample ){
            offsetTimeseries( templatesBuffer[k].timeSeries );
        }

		//Add the average length of the training examples for this template to the overall averageTemplateLength
		averageTemplateLength += templatesBuffer[k].averageTemplateLength;
	}

    //Flag that the models have been trained
	trained = true;
	averageTemplateLength = (UINT) averageTemplateLength/double(numTemplates);

    //Recompute the null rejection thresholds
    recomputeNullRejectionThresholds();

    //Resize the prediction results to make sure it is setup for realtime prediction
    continuousInputDataBuffer.clear();
    continuousInputDataBuffer.resize(averageTemplateLength,vector<double>(numFeatures,0));
    classLikelihoods.resize(numTemplates,DEFAULT_NULL_LIKELIHOOD_VALUE);
    classDistances.resize(numTemplates,0);
    predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
    maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;

	//Training complete
	return true;
}
Example #9
0
bool HMM::train(LabelledTimeSeriesClassificationData trainingData){
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train(LabelledTimeSeriesClassificationData trainingData) - There are no training samples to train the HMM classifer!" << endl;
        return false;
    }
    
    if( trainingData.getNumDimensions() != 1 ){
        errorLog << "train(LabelledTimeSeriesClassificationData trainingData) - The number of dimensions in the training data must be 1. If your training data is not 1 dimensional then you must quantize the training data using one of the GRT quantization algorithms" << endl;
        return false;
    }

	//Reset the HMM
    trained = false;
    useScaling = false;
    numFeatures = trainingData.getNumDimensions();
	numClasses = trainingData.getNumClasses();
	models.clear();
    classLabels.clear();
	models.resize( numClasses );
    classLabels.resize( numClasses );

	//Init the models
	for(UINT k=0; k<numClasses; k++){
		models[k].resetModel(numStates,numSymbols,modelType,delta);
		models[k].maxNumIter = maxNumIter;
		models[k].minImprovement = minImprovement;
	}
    
    //Train each of the models
    for(UINT k=0; k<numClasses; k++){
        //Get the class ID of this gesture
        UINT classID = trainingData.getClassTracker()[k].classLabel;
        classLabels[k] = classID;
        
        //Convert this classes training data into a list of observation sequences
        LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classID );
        vector< vector< UINT > > observationSequences;
        if( !convertDataToObservationSequence( classData, observationSequences ) ){
            return false;
        }
        
        //Train the model
		if( !models[k].train( observationSequences ) ){
            errorLog << "train(LabelledTimeSeriesClassificationData &trainingData) - Failed to train HMM for class " << classID << endl;
            return false;
        }
	}
    
    //Compute the rejection thresholds
    nullRejectionThresholds.resize(numClasses);
    
    for(UINT k=0; k<numClasses; k++){
        //Get the class ID of this gesture
        UINT classID = trainingData.getClassTracker()[k].classLabel;
        classLabels[k] = classID;
        
        //Convert this classes training data into a list of observation sequences
        LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classID );
        vector< vector< UINT > > observationSequences;
        if( !convertDataToObservationSequence( classData, observationSequences ) ){
            return false;
        }
        
        //Test the model
        double loglikelihood = 0;
        double avgLoglikelihood = 0;
        for(UINT i=0; i<observationSequences.size(); i++){
            loglikelihood = models[k].predict( observationSequences[i] );
            avgLoglikelihood += fabs( loglikelihood );
            cout << "Class: " << classID << " PredictedLogLikelihood: " << -loglikelihood << endl;
        }
        nullRejectionThresholds[k] = -( avgLoglikelihood / double( observationSequences.size() ) );
        cout << "Class: " << classID << " NullRejectionThreshold: " << nullRejectionThresholds[k] << endl;
	}
    
    for(UINT k=0; k<numClasses; k++){
        models[k].printAB();
    }
    
    trained = true;

	return true;
}