LabelledTimeSeriesClassificationData LabelledTimeSeriesClassificationData::getTrainingFoldData(const UINT foldIndex) const {

    LabelledTimeSeriesClassificationData trainingData;

    if( !crossValidationSetup ){
        errorLog << "getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl;
        return trainingData;
    }

    if( foldIndex >= kFoldValue ) return trainingData;

    trainingData.setNumDimensions( numDimensions );

    //Add the data to the training set, this will consist of all the data that is NOT in the foldIndex
    UINT index = 0;
    for(UINT k=0; k<kFoldValue; k++){
        if( k != foldIndex ){
            for(UINT i=0; i<crossValidationIndexs[k].size(); i++){

                index = crossValidationIndexs[k][i];
                trainingData.addSample( data[ index ].getClassLabel(), data[ index ].getData() );
            }
        }
    }

    return trainingData;
}
Exemplo n.º 2
0
LabelledTimeSeriesClassificationData KfoldTimeSeriesData::getTrainingFoldData(const UINT foldIndex, const UINT numSamplesPerClass) const {

    UINT index = 0;
	unsigned int randomNumber;
	unsigned int indexClassLabel;
	unsigned int numSamplesRemaining;
    LabelledTimeSeriesClassificationData trainingData;

    if( !crossValidationSetup ) {
        cout << "getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl;
        return trainingData;
    }

    if( foldIndex >= kFoldValue ) {
    	cout << "Fold index too big" << endl;
    	return trainingData;
    }

    Random random;

    trainingData.setNumDimensions( numDimensions );

    /* Put all K-1 training folds in one data set */
    vector <vector< UINT > >  MergedIndexs(inputDataset.getNumClasses());
    for(UINT k = 0; k < kFoldValue; k++) {
           if( k == foldIndex ) {
           	continue;
           }
           for (UINT classLabel = 0 ; classLabel < crossValidationIndexs[k].size(); classLabel++) {
        	   for (UINT i = 0; i < crossValidationIndexs[k][classLabel].size(); i++) {
        		   MergedIndexs[classLabel].push_back(crossValidationIndexs[k][classLabel][i]);
        	   }
           }
    }

	/* For each class peak randomly "numSamplesPerClass" samples */
	for (unsigned int classLabel = 0; classLabel < inputDataset.getNumClasses() ; classLabel++) {

		for (unsigned int numSamples = 1; numSamples <= numSamplesPerClass; numSamples++) {

			numSamplesRemaining = MergedIndexs[classLabel].size();
			if (numSamplesRemaining == 0) {
				printf("The \"numSamplesPerClass\" variable is bigger that the samples for this class");
				break;
			}
			randomNumber = random.getRandomNumberInt(0, numSamplesRemaining);
			index = MergedIndexs[classLabel][randomNumber];

			/* Remove added sample so that it is not added again */
			MergedIndexs[classLabel].erase(MergedIndexs[classLabel].begin() + randomNumber);

			trainingData.addSample( inputDataset[ index ].getClassLabel(),
					inputDataset[ index ].getData() );
		}
	}


    return trainingData;
}
Exemplo n.º 3
0
void DTW::znormData(LabelledTimeSeriesClassificationData &trainingData){

    for(UINT i=0; i<trainingData.getNumSamples(); i++){
        znormData( trainingData[i].getData(), trainingData[i].getData() );
    }

}
bool GRT_Recognizer::initPipeline(string trainingdatafile, int dimension)
{
			    //Initialize the training and info variables
   // infoText = "";
   // trainingClassLabel = 1;
   // noOfHands = 2;
	//noOfTrackedHands = 0;
    
	
	//The input to the training data will be the R[x y z]L[x y z] from the left end right hand
	// so we set the number of dimensions to 6
	LabelledTimeSeriesClassificationData trainingData; 
    //trainingData.setNumDimensions(6);
	trainingData.loadDatasetFromFile(trainingdatafile);
    
    //Initialize the DTW classifier
    DTW dtw;
    
    //Turn on null rejection, this lets the classifier output the predicted class label of 0 when the likelihood of a gesture is low
    dtw.enableNullRejection( true);
    
    //Set the null rejection coefficient to 3, this controls the thresholds for the automatic null rejection
    //You can increase this value if you find that your real-time gestures are not being recognized
    //If you are getting too many false positives then you should decrease this value
    dtw.setNullRejectionCoeff(2);

    
    //Turn on the automatic data triming, this will remove any sections of none movement from the start and end of the training samples
    dtw.enableTrimTrainingData(true, 0.1, 90);
    
    //Offset the timeseries data by the first sample, this makes your gestures (more) invariant to the location the gesture is performed
    dtw.setOffsetTimeseriesUsingFirstSample(true);
  

    //Add the classifier to the pipeline (after we do this, we don't need the DTW classifier anymore)
    pipeline.setClassifier( dtw );
	//pipeline.addPreProcessingModule(MovingAverageFilter(5,dimension));
	//pipeline.addFeatureExtractionModule(FFT(16,1, dimension));
	/*ClassLabelFilter myFilter = ClassLabelFilter();
	myFilter.setBufferSize(6);
	myFilter.setBufferSize(2);*/

	pipeline.addPostProcessingModule(ClassLabelChangeFilter());
	pipeline.train(trainingData);

	return true;
}
Exemplo n.º 5
0
void DTW::scaleData(LabelledTimeSeriesClassificationData &trainingData){

	//Scale the data using the min and max values
    for(UINT i=0; i<trainingData.getNumSamples(); i++){
        scaleData( trainingData[i].getData(), trainingData[i].getData() );
    }

}
Exemplo n.º 6
0
bool HMM::convertDataToObservationSequence( LabelledTimeSeriesClassificationData &classData, vector< vector< UINT > > &observationSequences ){
        
    observationSequences.resize( classData.getNumSamples() );
    
    for(UINT i=0; i<classData.getNumSamples(); i++){
        MatrixDouble &timeseries = classData[i].getData();
        observationSequences[i].resize( timeseries.getNumRows() );
        for(UINT j=0; j<timeseries.getNumRows(); j++){
            if( timeseries[j][0] >= numSymbols ){
                errorLog << "train(LabelledTimeSeriesClassificationData &trainingData) - Found an observation sequence with a value outside of the symbol range! Value: " << timeseries[j][0] << endl;
                return false;
            }
            observationSequences[i][j] = (UINT)timeseries[j][0];
        }
    }
    
    return true;
}
LabelledTimeSeriesClassificationData LabelledTimeSeriesClassificationData::getTestFoldData(const UINT foldIndex) const {
    LabelledTimeSeriesClassificationData testData;

    if( !crossValidationSetup ) return testData;

    if( foldIndex >= kFoldValue ) return testData;

    //Add the data to the training
    testData.setNumDimensions( numDimensions );

    UINT index = 0;
    for(UINT i=0; i<crossValidationIndexs[ foldIndex ].size(); i++){

        index = crossValidationIndexs[ foldIndex ][i];
        testData.addSample( data[ index ].getClassLabel(), data[ index ].getData() );
    }

    return testData;
}
Exemplo n.º 8
0
UINT KfoldTimeSeriesData::getFoldSize() {
	if (crossValidationSetup) {
		UINT maxSize = crossValidationIndexs[0].size();
		for (UINT k = 0; k < kFoldValue; k++) {
			if (crossValidationIndexs[k].size() > maxSize) {
				maxSize = crossValidationIndexs[k].size();
			}
		}
		return inputDataset.getNumSamples() - maxSize;
	}
	return 0;
}
bool LabelledTimeSeriesClassificationData::merge(const LabelledTimeSeriesClassificationData &labelledData){

    if( labelledData.getNumDimensions() != numDimensions ){
        errorLog << "merge(LabelledTimeSeriesClassificationData &labelledData) - The number of dimensions in the labelledData (" << labelledData.getNumDimensions() << ") does not match the number of dimensions of this dataset (" << numDimensions << ")" << endl;
        return false;
    }

    //The dataset has changed so flag that any previous cross validation setup will now not work
    crossValidationSetup = false;
    crossValidationIndexs.clear();

    //Add the data from the labelledData to this instance
    for(UINT i=0; i<labelledData.getNumSamples(); i++){
        addSample(labelledData[i].getClassLabel(), labelledData[i].getData());
    }

    //Set the class names from the dataset
    vector< ClassTracker > classTracker = labelledData.getClassTracker();
    for(UINT i=0; i<classTracker.size(); i++){
        setClassNameForCorrespondingClassLabel(classTracker[i].className, classTracker[i].classLabel);
    }

    return true;
}
Exemplo n.º 10
0
bool KfoldTimeSeriesData::spiltDataIntoKFolds(const GRT::UINT K) {

	kFoldValue = K;

    //K can not be zero
    if( K == 0 ){
        std::cout << "spiltDataIntoKFolds(UINT K) - K can not be zero!" << std::endl;
        return false;
    }

    //K can not be larger than the number of examples
    if( K  > inputDataset.getNumSamples()){
        std::cout << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be 0!" << std::endl;
        return false;
    }

    //K can not be larger than the number of examples in a specific class if the stratified sampling option is true
	for(UINT c=0; c < inputDataset.getNumClasses(); c++) {
		if( K > classTracker[c].counter ){
			cout << "spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) - K can not be larger than the number of samples in any given class!" << std::endl;
			return false;
		}
	}


    //Setup the dataset for k-fold cross validation
    kFoldValue = K;
    vector< UINT > indexs( inputDataset.getNumSamples() );

    //Work out how many samples are in each fold, the last fold might have more samples than the others
    UINT numSamplesPerFold = (UINT) floor( inputDataset.getNumSamples() / double(K) );

    //Create the random partion indexs
    Random random;
    UINT randomIndex = 0;

	//Break the data into seperate classes
	vector< vector< UINT > > classData( inputDataset.getNumClasses() );

	//Add the indexs to their respective classes
	for(UINT i = 0; i < inputDataset.getNumSamples(); i++) {
		classData[ inputDataset.getClassLabelIndexValue(
				inputDataset[i].getClassLabel() ) ].push_back( i );
	}

	//Randomize the order of the indexs in each of the class index buffers
	for(UINT c = 0; c < inputDataset.getNumClasses(); c++) {
		UINT numSamples = (UINT)classData[c].size();
		for(UINT x = 0; x < numSamples; x++) {
			//Pick a random index
			randomIndex = random.getRandomNumberInt(0, numSamples);

			//Swap the indexs
			SWAP( classData[c][ x ] , classData[c][ randomIndex ] );
		}
	}


    //Resize the cross validation indexs buffer
    crossValidationIndexs.resize( K );
    for (UINT k = 0; k < K; k++) {
    	crossValidationIndexs[k].resize(inputDataset.getNumClasses());
    }

    //Loop over each of the classes and add the data equally to each of the k folds until there is no data left
    vector< UINT >::iterator iter;
    for(UINT c = 0; c < inputDataset.getNumClasses(); c++){
        iter = classData[ c ].begin();
        UINT k = 0;
        while( iter != classData[c].end() ){
            crossValidationIndexs[ k ][c].push_back( *iter );
            iter++;
            k = ++k % K;
        }
    }

    crossValidationSetup = true;
    return true;

}
Exemplo n.º 11
0
bool KMeansQuantizer::train(LabelledTimeSeriesClassificationData &trainingData){
    MatrixDouble data = trainingData.getDataAsMatrixDouble();
    return train( data );
}
Exemplo n.º 12
0
bool DTW::train_NDDTW(LabelledTimeSeriesClassificationData &trainingData,DTWTemplate &dtwTemplate,UINT &bestIndex){

   UINT numExamples = trainingData.getNumSamples();
   VectorDouble results(numExamples,0.0);
   MatrixDouble distanceResults(numExamples,numExamples);
   dtwTemplate.averageTemplateLength = 0;
    
   for(UINT m=0; m<numExamples; m++){
       
	   MatrixDouble templateA; //The m'th template
	   MatrixDouble templateB; //The n'th template
	   dtwTemplate.averageTemplateLength += trainingData[m].getLength();

	   //Smooth the data if required
	   if( useSmoothing ) smoothData(trainingData[m].getData(),smoothingFactor,templateA);
	   else templateA = trainingData[m].getData();
       
       if( offsetUsingFirstSample ){
           offsetTimeseries(templateA);
       }

	   for(UINT n=0; n<numExamples; n++){
		if(m!=n){
		    //Smooth the data if required
		    if( useSmoothing ) smoothData(trainingData[n].getData(),smoothingFactor,templateB);
		    else templateB = trainingData[n].getData();
            
            if( offsetUsingFirstSample ){
                offsetTimeseries(templateB);
            }

			//Compute the distance between the two time series
            MatrixDouble distanceMatrix(templateA.getNumRows(),templateB.getNumRows());
            vector< IndexDist > warpPath;
			double dist = computeDistance(templateA,templateB,distanceMatrix,warpPath);
            
            trainingLog << "Template: " << m << " Timeseries: " << n << " Dist: " << dist << endl;

			//Update the results values
			distanceResults[m][n] = dist;
			results[m] += dist;
		}else distanceResults[m][n] = 0; //The distance is zero because the two timeseries are the same
	   }
   }

	for(UINT m=0; m<numExamples; m++) results[m]/=(numExamples-1);
	//Find the best average result, this is the result with the minimum value
	bestIndex = 0;
	double bestAverage = results[0];
	for(UINT m=1; m<numExamples; m++){
		if( results[m] < bestAverage ){
			bestAverage = results[m];
			bestIndex = m;
		}
	}

    if( numExamples > 2 ){

        //Work out the threshold value for the best template
        dtwTemplate.trainingMu = results[bestIndex];
        dtwTemplate.trainingSigma = 0.0;

        for(UINT n=0; n<numExamples; n++){
            if(n!=bestIndex){
                dtwTemplate.trainingSigma += SQR( distanceResults[ bestIndex ][n] - dtwTemplate.trainingMu );
            }
        }
        dtwTemplate.trainingSigma = sqrt( dtwTemplate.trainingSigma / double(numExamples-2) );
    }else{
        warningLog << "_train_NDDTW(LabelledTimeSeriesClassificationData &trainingData,DTWTemplate &dtwTemplate,UINT &bestIndex - There are not enough examples to compute the trainingMu and trainingSigma for the template for class " << dtwTemplate.classLabel << endl;
        dtwTemplate.trainingMu = 0.0;
        dtwTemplate.trainingSigma = 0.0;
    }

	//Set the average length of the training examples
	dtwTemplate.averageTemplateLength = (UINT) (dtwTemplate.averageTemplateLength/double(numExamples));
    
    trainingLog << "AverageTemplateLength: " << dtwTemplate.averageTemplateLength << endl;

    //Flag that the training was successfull
	return true;
}
Exemplo n.º 13
0
////////////////////////// TRAINING FUNCTIONS //////////////////////////
bool DTW::train(LabelledTimeSeriesClassificationData labelledTrainingData){

	UINT bestIndex = 0;

	//Cleanup Memory
	templatesBuffer.clear();
    classLabels.clear();
	trained = false;
    continuousInputDataBuffer.clear();

    if( trimTrainingData ){
        LabelledTimeSeriesClassificationSampleTrimmer timeSeriesTrimmer(trimThreshold,maximumTrimPercentage);
        LabelledTimeSeriesClassificationData tempData;
        tempData.setNumDimensions( labelledTrainingData.getNumDimensions() );
        
        for(UINT i=0; i<labelledTrainingData.getNumSamples(); i++){
            if( timeSeriesTrimmer.trimTimeSeries( labelledTrainingData[i] ) ){
                tempData.addSample(labelledTrainingData[i].getClassLabel(), labelledTrainingData[i].getData());
            }else{
                trainingLog << "Removing training sample " << i << " from the dataset as it could not be trimmed!" << endl;
            }
        }
        //Overwrite the original training data with the trimmed dataset
        labelledTrainingData = tempData;
    }
    
    if( labelledTrainingData.getNumSamples() == 0 ){
        errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't train model as there are no samples in training data!" << endl;
        return false;
    }

	//Assign
    numClasses = labelledTrainingData.getNumClasses();
	numTemplates = labelledTrainingData.getNumClasses();
    numFeatures = labelledTrainingData.getNumDimensions();
	templatesBuffer.resize( numClasses );
    classLabels.resize( numClasses );
	nullRejectionThresholds.resize( numClasses );
	averageTemplateLength = 0;

	//Need to copy the labelled training data incase we need to scale it or znorm it
	LabelledTimeSeriesClassificationData trainingData( labelledTrainingData );

	//Perform any scaling or normalisation
    rangesBuffer = trainingData.getRanges();
	if( useScaling ) scaleData( trainingData );
	if( useZNormalisation ) znormData( trainingData );

	//For each class, run a one-to-one DTW and find the template the best describes the data
	for(UINT k=0; k<numTemplates; k++){
        //Get the class label for the cth class
        UINT classLabel = trainingData.getClassTracker()[k].classLabel;
        LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classLabel );
		UINT numExamples = classData.getNumSamples();
		bestIndex = 0;

        //Set the class label of this template
        templatesBuffer[k].classLabel = classLabel;

        //Set the kth class label
        classLabels[k] = classLabel;
        
        trainingLog << "Training Template: " << k << " Class: " << classLabel << endl;

		//Check to make sure we actually have some training examples
		if(numExamples<1){
            errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can not train model: Num of Example is < 1! Class: " << classLabel << endl;
			return false;
		}

		if(numExamples==1){//If we have just one training example then we have to use it as the template
            bestIndex = 0;

            nullRejectionThresholds[k] = 0.0;//TODO-We need a better way of calculating this!
            warningLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't compute reject thresholds for class " << classLabel << " as there is only 1 training example" << endl;
		}else{
            //Search for the best training example for this class
			if( !train_NDDTW(classData,templatesBuffer[k],bestIndex) ){
                errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Failed to train template for class with label: " << classLabel << endl;
                return false;
            }
		}

		//Add the template with the best index to the buffer
		int trainingMethod = 0;
		if(useSmoothing) trainingMethod = 1;

		switch (trainingMethod) {
			case(0)://Standard Training
				templatesBuffer[k].timeSeries = classData[bestIndex].getData();
				break;
			case(1)://Training using Smoothing
				//Smooth the data, reducing its size by a factor set by smoothFactor
				smoothData(classData[ bestIndex ].getData(),smoothingFactor,templatesBuffer[k].timeSeries);
				break;
			default:
				cout<<"Can not train model: Unknown training method \n";
				return false;
				break;
		}
        
        if( offsetUsingFirstSample ){
            offsetTimeseries( templatesBuffer[k].timeSeries );
        }

		//Add the average length of the training examples for this template to the overall averageTemplateLength
		averageTemplateLength += templatesBuffer[k].averageTemplateLength;
	}

    //Flag that the models have been trained
	trained = true;
	averageTemplateLength = (UINT) averageTemplateLength/double(numTemplates);

    //Recompute the null rejection thresholds
    recomputeNullRejectionThresholds();

    //Resize the prediction results to make sure it is setup for realtime prediction
    continuousInputDataBuffer.clear();
    continuousInputDataBuffer.resize(averageTemplateLength,vector<double>(numFeatures,0));
    classLikelihoods.resize(numTemplates,DEFAULT_NULL_LIKELIHOOD_VALUE);
    classDistances.resize(numTemplates,0);
    predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
    maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;

	//Training complete
	return true;
}
Exemplo n.º 14
0
bool HMM::train(LabelledTimeSeriesClassificationData trainingData){
    
    if( trainingData.getNumSamples() == 0 ){
        errorLog << "train(LabelledTimeSeriesClassificationData trainingData) - There are no training samples to train the HMM classifer!" << endl;
        return false;
    }
    
    if( trainingData.getNumDimensions() != 1 ){
        errorLog << "train(LabelledTimeSeriesClassificationData trainingData) - The number of dimensions in the training data must be 1. If your training data is not 1 dimensional then you must quantize the training data using one of the GRT quantization algorithms" << endl;
        return false;
    }

	//Reset the HMM
    trained = false;
    useScaling = false;
    numFeatures = trainingData.getNumDimensions();
	numClasses = trainingData.getNumClasses();
	models.clear();
    classLabels.clear();
	models.resize( numClasses );
    classLabels.resize( numClasses );

	//Init the models
	for(UINT k=0; k<numClasses; k++){
		models[k].resetModel(numStates,numSymbols,modelType,delta);
		models[k].maxNumIter = maxNumIter;
		models[k].minImprovement = minImprovement;
	}
    
    //Train each of the models
    for(UINT k=0; k<numClasses; k++){
        //Get the class ID of this gesture
        UINT classID = trainingData.getClassTracker()[k].classLabel;
        classLabels[k] = classID;
        
        //Convert this classes training data into a list of observation sequences
        LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classID );
        vector< vector< UINT > > observationSequences;
        if( !convertDataToObservationSequence( classData, observationSequences ) ){
            return false;
        }
        
        //Train the model
		if( !models[k].train( observationSequences ) ){
            errorLog << "train(LabelledTimeSeriesClassificationData &trainingData) - Failed to train HMM for class " << classID << endl;
            return false;
        }
	}
    
    //Compute the rejection thresholds
    nullRejectionThresholds.resize(numClasses);
    
    for(UINT k=0; k<numClasses; k++){
        //Get the class ID of this gesture
        UINT classID = trainingData.getClassTracker()[k].classLabel;
        classLabels[k] = classID;
        
        //Convert this classes training data into a list of observation sequences
        LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classID );
        vector< vector< UINT > > observationSequences;
        if( !convertDataToObservationSequence( classData, observationSequences ) ){
            return false;
        }
        
        //Test the model
        double loglikelihood = 0;
        double avgLoglikelihood = 0;
        for(UINT i=0; i<observationSequences.size(); i++){
            loglikelihood = models[k].predict( observationSequences[i] );
            avgLoglikelihood += fabs( loglikelihood );
            cout << "Class: " << classID << " PredictedLogLikelihood: " << -loglikelihood << endl;
        }
        nullRejectionThresholds[k] = -( avgLoglikelihood / double( observationSequences.size() ) );
        cout << "Class: " << classID << " NullRejectionThreshold: " << nullRejectionThresholds[k] << endl;
	}
    
    for(UINT k=0; k<numClasses; k++){
        models[k].printAB();
    }
    
    trained = true;

	return true;
}