Пример #1
0
int main(int argc, char** argv)
{

	try {

		TCLAP::CmdLine cmd("Command description message", ' ', "0.9");

		TCLAP::ValueArg<std::string> trainingData("d", "data", "training data", true, "", "string");
		cmd.add(trainingData);
		TCLAP::ValueArg<int> maxIteration("i", "iter", "max iterations", true, 10, "int");
		cmd.add(maxIteration);
		TCLAP::ValueArg<double> learningRate("s", "step", "leraning rate", true, 0.001, "double");
		cmd.add(learningRate);
		TCLAP::ValueArg<double> variance("l", "l1", "variance", true, 0.001, "double");
		cmd.add(variance);
		TCLAP::ValueArg<string> modelPath("m", "model", "model path", true, "./lccrf.weights.txt", "string");
		cmd.add(modelPath);

		// Parse the argv array.
		cmd.parse(argc, argv);

		LCCRF lccrf;
		lccrf.Fit(trainingData.getValue(), maxIteration.getValue(), learningRate.getValue(), variance.getValue());
		lccrf.Save(modelPath.getValue());
		return 0;
	}
	catch (TCLAP::ArgException &e)  // catch any exceptions
	{
		std::cerr << "error: " << e.error() << " for arg " << e.argId() << std::endl;
		return 1;
	}
}
Пример #2
0
////////////////////////// TRAINING FUNCTIONS //////////////////////////
bool DTW::train(LabelledTimeSeriesClassificationData labelledTrainingData){

	UINT bestIndex = 0;

	//Cleanup Memory
	templatesBuffer.clear();
    classLabels.clear();
	trained = false;
    continuousInputDataBuffer.clear();

    if( trimTrainingData ){
        LabelledTimeSeriesClassificationSampleTrimmer timeSeriesTrimmer(trimThreshold,maximumTrimPercentage);
        LabelledTimeSeriesClassificationData tempData;
        tempData.setNumDimensions( labelledTrainingData.getNumDimensions() );
        
        for(UINT i=0; i<labelledTrainingData.getNumSamples(); i++){
            if( timeSeriesTrimmer.trimTimeSeries( labelledTrainingData[i] ) ){
                tempData.addSample(labelledTrainingData[i].getClassLabel(), labelledTrainingData[i].getData());
            }else{
                trainingLog << "Removing training sample " << i << " from the dataset as it could not be trimmed!" << endl;
            }
        }
        //Overwrite the original training data with the trimmed dataset
        labelledTrainingData = tempData;
    }
    
    if( labelledTrainingData.getNumSamples() == 0 ){
        errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't train model as there are no samples in training data!" << endl;
        return false;
    }

	//Assign
    numClasses = labelledTrainingData.getNumClasses();
	numTemplates = labelledTrainingData.getNumClasses();
    numFeatures = labelledTrainingData.getNumDimensions();
	templatesBuffer.resize( numClasses );
    classLabels.resize( numClasses );
	nullRejectionThresholds.resize( numClasses );
	averageTemplateLength = 0;

	//Need to copy the labelled training data incase we need to scale it or znorm it
	LabelledTimeSeriesClassificationData trainingData( labelledTrainingData );

	//Perform any scaling or normalisation
    rangesBuffer = trainingData.getRanges();
	if( useScaling ) scaleData( trainingData );
	if( useZNormalisation ) znormData( trainingData );

	//For each class, run a one-to-one DTW and find the template the best describes the data
	for(UINT k=0; k<numTemplates; k++){
        //Get the class label for the cth class
        UINT classLabel = trainingData.getClassTracker()[k].classLabel;
        LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classLabel );
		UINT numExamples = classData.getNumSamples();
		bestIndex = 0;

        //Set the class label of this template
        templatesBuffer[k].classLabel = classLabel;

        //Set the kth class label
        classLabels[k] = classLabel;
        
        trainingLog << "Training Template: " << k << " Class: " << classLabel << endl;

		//Check to make sure we actually have some training examples
		if(numExamples<1){
            errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can not train model: Num of Example is < 1! Class: " << classLabel << endl;
			return false;
		}

		if(numExamples==1){//If we have just one training example then we have to use it as the template
            bestIndex = 0;

            nullRejectionThresholds[k] = 0.0;//TODO-We need a better way of calculating this!
            warningLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't compute reject thresholds for class " << classLabel << " as there is only 1 training example" << endl;
		}else{
            //Search for the best training example for this class
			if( !train_NDDTW(classData,templatesBuffer[k],bestIndex) ){
                errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Failed to train template for class with label: " << classLabel << endl;
                return false;
            }
		}

		//Add the template with the best index to the buffer
		int trainingMethod = 0;
		if(useSmoothing) trainingMethod = 1;

		switch (trainingMethod) {
			case(0)://Standard Training
				templatesBuffer[k].timeSeries = classData[bestIndex].getData();
				break;
			case(1)://Training using Smoothing
				//Smooth the data, reducing its size by a factor set by smoothFactor
				smoothData(classData[ bestIndex ].getData(),smoothingFactor,templatesBuffer[k].timeSeries);
				break;
			default:
				cout<<"Can not train model: Unknown training method \n";
				return false;
				break;
		}
        
        if( offsetUsingFirstSample ){
            offsetTimeseries( templatesBuffer[k].timeSeries );
        }

		//Add the average length of the training examples for this template to the overall averageTemplateLength
		averageTemplateLength += templatesBuffer[k].averageTemplateLength;
	}

    //Flag that the models have been trained
	trained = true;
	averageTemplateLength = (UINT) averageTemplateLength/double(numTemplates);

    //Recompute the null rejection thresholds
    recomputeNullRejectionThresholds();

    //Resize the prediction results to make sure it is setup for realtime prediction
    continuousInputDataBuffer.clear();
    continuousInputDataBuffer.resize(averageTemplateLength,vector<double>(numFeatures,0));
    classLikelihoods.resize(numTemplates,DEFAULT_NULL_LIKELIHOOD_VALUE);
    classDistances.resize(numTemplates,0);
    predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL;
    maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE;

	//Training complete
	return true;
}
Пример #3
0
void bootstrapTrain(const int verbose, const bool doBootstrap = true)
{

    //read files for training
    const std::string
            trainSetPath = Parameters::getParameter<std::string>("train.trainSet"),
            testSetPath = Parameters::getParameter<std::string>("train.testSet");


    const int
            backgroundClassLabel = Parameters::getParameter<int>("backgroundClassLabel"),
            numBootstrappingSamples = Parameters::getParameter<int>("bootstrapTrain.numBootstrappingSamples");

    std::vector<int> stages, maxNumSamplesPerImage;

    if(doBootstrap)
    {
        stages = Parameters::getParameter<std::vector<int> >("bootstrapTrain.classifiersPerStage");
        maxNumSamplesPerImage = Parameters::getParameter<std::vector<int> >("bootstrapTrain.maxNumSamplesPerImage");
    }
    else
    {
        const int numIterations = Parameters::getParameter<int>("train.numIterations");
        stages.push_back(numIterations);
        maxNumSamplesPerImage.push_back(-1);
    }


    if (stages.size() != maxNumSamplesPerImage.size())
    {
        throw runtime_error("Size miss match between the vectors classifiersperStage and maxNumSamplesPerImage");
    }


    const TrainingData::point_t
            modelWindowSize = getModelWindowSize(),
            trainDataOffset = getTrainingDataOffset();

    const TrainingData::rectangle_t objectWindow = getObjectWindow();

    std::vector<std::string> filenamesPositives, filenamesBackground;
    getImageFileNames(trainSetPath, backgroundClassLabel, filenamesPositives, filenamesBackground);

    const int trainNumNegativeSamples = Parameters::getParameter<int>("train.numNegativeSamples");

    size_t maxNumExamples = filenamesPositives.size() + trainNumNegativeSamples;
    if(stages.size() > 1)
    {
        maxNumExamples += numBootstrappingSamples*(stages.size() - 1);
    }

    const std::string initialBootstrapFileName = Parameters::getParameter<std::string>("train.bootStrapLearnerFile");
    if (!initialBootstrapFileName.empty())
    {
        maxNumExamples += numBootstrappingSamples;
    }


    // computed all feature configurations available for training.
    const size_t featuresPoolSize = Parameters::getParameter<int>("train.featuresPoolSize");
    FeaturesSharedPointer featuresConfigurations(new Features());
    //first basic feature pool
    computeRandomFeaturesConfigurations(modelWindowSize, featuresPoolSize,  *featuresConfigurations);

    //all features are valid for this setup
    std::vector<bool> valid_features(featuresConfigurations->size(),true);
    //fill (valid_features.begin(),valid_features.begin()+featuresPoolSize,true);



    TrainingData::shared_ptr trainingData(new TrainingData(featuresConfigurations, valid_features, maxNumExamples,
                                                           modelWindowSize, objectWindow));
    trainingData->addPositiveSamples(filenamesPositives, modelWindowSize, trainDataOffset);
    trainingData->addNegativeSamples(filenamesBackground, modelWindowSize, trainDataOffset, trainNumNegativeSamples);

    if (!initialBootstrapFileName.empty())
    {
        //for a weak model it should be avoided to sample all hard negatives from a single image
        const int maxFalsePositivesPerImage = 5;
        trainingData->addBootstrappingSamples(initialBootstrapFileName, filenamesBackground,
                                              modelWindowSize, trainDataOffset,
                                              numBootstrappingSamples, maxFalsePositivesPerImage);
    }

    const bool check_boostrapping = false; // for debugging only
    if(check_boostrapping)
    {
        ModelIO modelReader;
        modelReader.readModel(initialBootstrapFileName);
        StrongClassifier classifier = modelReader.read();

        int tp, fp, fn, tn;
        classifier.classify(*trainingData, tp, fp, fn, tn);
        std::cout << "Classification Results (TestData): " << std::endl;
        std::cout << "Detection Rate: " << double(tp + tn) / (tp + tn + fp + fn) * 100 << " %" <<  std::endl;
        std::cout << "Error Rate: " << double(fp + fn) / (tp + tn + fp + fn) * 100 << " %" <<  std::endl;
        std::cout << "Error Positives: " <<  double(fn) / (tp + fn) * 100 << " %" <<  std::endl;
        std::cout << "Error Negatives: " <<  double(fp) / (tn + fp) * 100 << " %" <<  std::endl;
        std::cout << std::endl;
        throw std::runtime_error("End of game, just doing a mini-test");
    }

    AdaboostLearner Learner(verbose, trainingData);

    if (not testSetPath.empty())
    {
        // FIXME test data should use TrainingData class instead of the deprecated LabeledData class
        const TrainingData::point_t testDataOffset = getTestingDataOffset();

        std::vector<std::string> filenamesPositives, filenamesBackground;
        getImageFileNames(testSetPath, backgroundClassLabel, filenamesPositives, filenamesBackground);

        printf("\nCollecting test data...\n");
        LabeledData::shared_ptr labeledTestData(new LabeledData(verbose, backgroundClassLabel));
        //printf("testDataOffsets: %i, %i\n", testDataOffset.x(), testDataOffset.y() );
        labeledTestData->createIntegralImages(filenamesPositives, filenamesBackground,
                                              modelWindowSize, testDataOffset.x(), testDataOffset.y());
        Learner.setTestData(labeledTestData);
    }

    const std::string baseOuputModelFilename = Learner.getOuputModelFileName();

    for (size_t k = 0; k < stages.size(); ++k)
    {
        // bootstrap new negatives
        if (k != 0)
        {
            const std::string bootstrapFile =
                    boost::str(boost::format("%s.bootstrap%i") % baseOuputModelFilename % (k - 1));

            // sample new (hard) negatives using bootstrapping
            trainingData->addBootstrappingSamples(bootstrapFile, filenamesBackground,
                                                  modelWindowSize, trainDataOffset,
                                                  numBootstrappingSamples, maxNumSamplesPerImage[k]);
        }

        Learner.setNumIterations(stages[k]);
        Learner.setOutputModelFileName(boost::str(boost::format("%s.bootstrap%i") % baseOuputModelFilename % (k)));

        if (k == stages.size()-1)
            Learner.train(true);
        else
            Learner.train(false);
    } // end of "for each stage"

    boost::filesystem::copy_file(Learner.getOuputModelFileName(), baseOuputModelFilename);

    printf("Finished the %zi bootstrapping stages. Model was trained over %zi samples (%zi positives, %zi negatives).\n"
           "Final model saved at %s\n",
           stages.size(),
           trainingData->getNumExamples(),
           trainingData->getNumPositiveExamples(), trainingData->getNumNegativeExamples(),
           //Learner.getOuputModelFileName().c_str()
           baseOuputModelFilename.c_str());


    return;
}