LabelledTimeSeriesClassificationData LabelledTimeSeriesClassificationData::getTrainingFoldData(const UINT foldIndex) const {

    LabelledTimeSeriesClassificationData trainingData;

    if( !crossValidationSetup ){
        errorLog << "getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl;
        return trainingData;
    }

    if( foldIndex >= kFoldValue ) return trainingData;

    trainingData.setNumDimensions( numDimensions );

    //Add the data to the training set, this will consist of all the data that is NOT in the foldIndex
    UINT index = 0;
    for(UINT k=0; k<kFoldValue; k++){
        if( k != foldIndex ){
            for(UINT i=0; i<crossValidationIndexs[k].size(); i++){

                index = crossValidationIndexs[k][i];
                trainingData.addSample( data[ index ].getClassLabel(), data[ index ].getData() );
            }
        }
    }

    return trainingData;
}
LabelledTimeSeriesClassificationData KfoldTimeSeriesData::getTrainingFoldData(const UINT foldIndex, const UINT numSamplesPerClass) const {

    UINT index = 0;
	unsigned int randomNumber;
	unsigned int indexClassLabel;
	unsigned int numSamplesRemaining;
    LabelledTimeSeriesClassificationData trainingData;

    if( !crossValidationSetup ) {
        cout << "getTrainingFoldData(UINT foldIndex) - Cross Validation has not been setup! You need to call the spiltDataIntoKFolds(UINT K,bool useStratifiedSampling) function first before calling this function!" << endl;
        return trainingData;
    }

    if( foldIndex >= kFoldValue ) {
    	cout << "Fold index too big" << endl;
    	return trainingData;
    }

    Random random;

    trainingData.setNumDimensions( numDimensions );

    /* Put all K-1 training folds in one data set */
    vector <vector< UINT > >  MergedIndexs(inputDataset.getNumClasses());
    for(UINT k = 0; k < kFoldValue; k++) {
           if( k == foldIndex ) {
           	continue;
           }
           for (UINT classLabel = 0 ; classLabel < crossValidationIndexs[k].size(); classLabel++) {
        	   for (UINT i = 0; i < crossValidationIndexs[k][classLabel].size(); i++) {
        		   MergedIndexs[classLabel].push_back(crossValidationIndexs[k][classLabel][i]);
        	   }
           }
    }

	/* For each class peak randomly "numSamplesPerClass" samples */
	for (unsigned int classLabel = 0; classLabel < inputDataset.getNumClasses() ; classLabel++) {

		for (unsigned int numSamples = 1; numSamples <= numSamplesPerClass; numSamples++) {

			numSamplesRemaining = MergedIndexs[classLabel].size();
			if (numSamplesRemaining == 0) {
				printf("The \"numSamplesPerClass\" variable is bigger that the samples for this class");
				break;
			}
			randomNumber = random.getRandomNumberInt(0, numSamplesRemaining);
			index = MergedIndexs[classLabel][randomNumber];

			/* Remove added sample so that it is not added again */
			MergedIndexs[classLabel].erase(MergedIndexs[classLabel].begin() + randomNumber);

			trainingData.addSample( inputDataset[ index ].getClassLabel(),
					inputDataset[ index ].getData() );
		}
	}


    return trainingData;
}
LabelledTimeSeriesClassificationData LabelledTimeSeriesClassificationData::getTestFoldData(const UINT foldIndex) const {
    LabelledTimeSeriesClassificationData testData;

    if( !crossValidationSetup ) return testData;

    if( foldIndex >= kFoldValue ) return testData;

    //Add the data to the training
    testData.setNumDimensions( numDimensions );

    UINT index = 0;
    for(UINT i=0; i<crossValidationIndexs[ foldIndex ].size(); i++){

        index = crossValidationIndexs[ foldIndex ][i];
        testData.addSample( data[ index ].getClassLabel(), data[ index ].getData() );
    }

    return testData;
}
Esempio n. 4
0
////////////////////////// TRAINING FUNCTIONS //////////////////////////
bool DTW::train(LabelledTimeSeriesClassificationData labelledTrainingData){

	UINT bestIndex = 0;

	//Cleanup Memory
	templatesBuffer.clear();
    classLabels.clear();
	trained = false;
    continuousInputDataBuffer.clear();

    if( trimTrainingData ){
        LabelledTimeSeriesClassificationSampleTrimmer timeSeriesTrimmer(trimThreshold,maximumTrimPercentage);
        LabelledTimeSeriesClassificationData tempData;
        tempData.setNumDimensions( labelledTrainingData.getNumDimensions() );
        
        for(UINT i=0; i<labelledTrainingData.getNumSamples(); i++){
            if( timeSeriesTrimmer.trimTimeSeries( labelledTrainingData[i] ) ){
                tempData.addSample(labelledTrainingData[i].getClassLabel(), labelledTrainingData[i].getData());
            }else{
                trainingLog << "Removing training sample " << i << " from the dataset as it could not be trimmed!" << endl;
            }
        }
        //Overwrite the original training data with the trimmed dataset
        labelledTrainingData = tempData;
    }
    
    if( labelledTrainingData.getNumSamples() == 0 ){
        errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't train model as there are no samples in training data!" << endl;
        return false;
    }

	//Assign
    numClasses = labelledTrainingData.getNumClasses();
	numTemplates = labelledTrainingData.getNumClasses();
    numFeatures = labelledTrainingData.getNumDimensions();
	templatesBuffer.resize( numClasses );
    classLabels.resize( numClasses );
	nullRejectionThresholds.resize( numClasses );
	averageTemplateLength = 0;

	//Need to copy the labelled training data incase we need to scale it or znorm it
	LabelledTimeSeriesClassificationData trainingData( labelledTrainingData );

	//Perform any scaling or normalisation
    rangesBuffer = trainingData.getRanges();
	if( useScaling ) scaleData( trainingData );
	if( useZNormalisation ) znormData( trainingData );

	//For each class, run a one-to-one DTW and find the template the best describes the data
	for(UINT k=0; k<numTemplates; k++){
        //Get the class label for the cth class
        UINT classLabel = trainingData.getClassTracker()[k].classLabel;
        LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classLabel );
		UINT numExamples = classData.getNumSamples();
		bestIndex = 0;

        //Set the class label of this template
        templatesBuffer[k].classLabel = classLabel;

        //Set the kth class label
        classLabels[k] = classLabel;
        
        trainingLog << "Training Template: " << k << " Class: " << classLabel << endl;

		//Check to make sure we actually have some training examples
		if(numExamples<1){
            errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can not train model: Num of Example is < 1! Class: " << classLabel << endl;
			return false;
		}

		if(numExamples==1){//If we have just one training example then we have to use it as the template
            bestIndex = 0;

            nullRejectionThresholds[k] = 0.0;//TODO-We need a better way of calculating this!
            warningLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't compute reject thresholds for class " << classLabel << " as there is only 1 training example" << endl;
		}else{
            //Search for the best training example for this class
			if( !train_NDDTW(classData,templatesBuffer[k],bestIndex) ){
                errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Failed to train template for class with label: " << classLabel << endl;
                return false;
            }
		}

		//Add the template with the best index to the buffer
		int trainingMethod = 0;
		if(useSmoothing) trainingMethod = 1;

		switch (trainingMethod) {
			case(0)://Standard Training
				templatesBuffer[k].timeSeries = classData[bestIndex].getData();
				break;
			case(1)://Training using Smoothing
				//Smooth the data, reducing its size by a factor set by smoothFactor
				smoothData(classData[ bestIndex ].getData(),smoothingFactor,templatesBuffer[k].timeSeries);
				break;
			default:
				cout<<"Can not train model: Unknown training method \n";
				return false;
				break;
		}
        
        if( offsetUsingFirstSample ){
            offsetTimeseries( templatesBuffer[k].timeSeries );
        }

		//Add the average length of the training examples for this template to the overall averageTemplateLength
		averageTemplateLength += templatesBuffer[k].averageTemplateLength;
	}

    //Flag that the models have been trained
	trained = true;
	averageTemplateLength = (UINT) averageTemplateLength/double(numTemplates);

    //Recompute the null rejection thresholds
    recomputeNullRejectionThresholds();

    //Resize the prediction results to make sure it is setup for realtime prediction
    continuousInputDataBuffer.clear();
    continuousInputDataBuffer.resize(averageTemplateLength,vector<double>(numFeatures,0));
    classLikelihoods.resize(numTemplates,DEFAULT_NULL_LIKELIHOOD_VALUE);
    classDistances.resize(numTemplates,0);
    predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
    maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;

	//Training complete
	return true;
}