int main(int argc, char** argv) { try { TCLAP::CmdLine cmd("Command description message", ' ', "0.9"); TCLAP::ValueArg<std::string> trainingData("d", "data", "training data", true, "", "string"); cmd.add(trainingData); TCLAP::ValueArg<int> maxIteration("i", "iter", "max iterations", true, 10, "int"); cmd.add(maxIteration); TCLAP::ValueArg<double> learningRate("s", "step", "leraning rate", true, 0.001, "double"); cmd.add(learningRate); TCLAP::ValueArg<double> variance("l", "l1", "variance", true, 0.001, "double"); cmd.add(variance); TCLAP::ValueArg<string> modelPath("m", "model", "model path", true, "./lccrf.weights.txt", "string"); cmd.add(modelPath); // Parse the argv array. cmd.parse(argc, argv); LCCRF lccrf; lccrf.Fit(trainingData.getValue(), maxIteration.getValue(), learningRate.getValue(), variance.getValue()); lccrf.Save(modelPath.getValue()); return 0; } catch (TCLAP::ArgException &e) // catch any exceptions { std::cerr << "error: " << e.error() << " for arg " << e.argId() << std::endl; return 1; } }
////////////////////////// TRAINING FUNCTIONS ////////////////////////// bool DTW::train(LabelledTimeSeriesClassificationData labelledTrainingData){ UINT bestIndex = 0; //Cleanup Memory templatesBuffer.clear(); classLabels.clear(); trained = false; continuousInputDataBuffer.clear(); if( trimTrainingData ){ LabelledTimeSeriesClassificationSampleTrimmer timeSeriesTrimmer(trimThreshold,maximumTrimPercentage); LabelledTimeSeriesClassificationData tempData; tempData.setNumDimensions( labelledTrainingData.getNumDimensions() ); for(UINT i=0; i<labelledTrainingData.getNumSamples(); i++){ if( timeSeriesTrimmer.trimTimeSeries( labelledTrainingData[i] ) ){ tempData.addSample(labelledTrainingData[i].getClassLabel(), labelledTrainingData[i].getData()); }else{ trainingLog << "Removing training sample " << i << " from the dataset as it could not be trimmed!" << endl; } } //Overwrite the original training data with the trimmed dataset labelledTrainingData = tempData; } if( labelledTrainingData.getNumSamples() == 0 ){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't train model as there are no samples in training data!" << endl; return false; } //Assign numClasses = labelledTrainingData.getNumClasses(); numTemplates = labelledTrainingData.getNumClasses(); numFeatures = labelledTrainingData.getNumDimensions(); templatesBuffer.resize( numClasses ); classLabels.resize( numClasses ); nullRejectionThresholds.resize( numClasses ); averageTemplateLength = 0; //Need to copy the labelled training data incase we need to scale it or znorm it LabelledTimeSeriesClassificationData trainingData( labelledTrainingData ); //Perform any scaling or normalisation rangesBuffer = trainingData.getRanges(); if( useScaling ) scaleData( trainingData ); if( useZNormalisation ) znormData( trainingData ); //For each class, run a one-to-one DTW and find the template the best describes the data for(UINT k=0; k<numTemplates; k++){ //Get the class label for the cth class UINT classLabel = trainingData.getClassTracker()[k].classLabel; LabelledTimeSeriesClassificationData classData = trainingData.getClassData( classLabel ); UINT numExamples = classData.getNumSamples(); bestIndex = 0; //Set the class label of this template templatesBuffer[k].classLabel = classLabel; //Set the kth class label classLabels[k] = classLabel; trainingLog << "Training Template: " << k << " Class: " << classLabel << endl; //Check to make sure we actually have some training examples if(numExamples<1){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can not train model: Num of Example is < 1! Class: " << classLabel << endl; return false; } if(numExamples==1){//If we have just one training example then we have to use it as the template bestIndex = 0; nullRejectionThresholds[k] = 0.0;//TODO-We need a better way of calculating this! warningLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Can't compute reject thresholds for class " << classLabel << " as there is only 1 training example" << endl; }else{ //Search for the best training example for this class if( !train_NDDTW(classData,templatesBuffer[k],bestIndex) ){ errorLog << "_train(LabelledTimeSeriesClassificationData &labelledTrainingData) - Failed to train template for class with label: " << classLabel << endl; return false; } } //Add the template with the best index to the buffer int trainingMethod = 0; if(useSmoothing) trainingMethod = 1; switch (trainingMethod) { case(0)://Standard Training templatesBuffer[k].timeSeries = classData[bestIndex].getData(); break; case(1)://Training using Smoothing //Smooth the data, reducing its size by a factor set by smoothFactor smoothData(classData[ bestIndex ].getData(),smoothingFactor,templatesBuffer[k].timeSeries); break; default: cout<<"Can not train model: Unknown training method \n"; return false; break; } if( offsetUsingFirstSample ){ offsetTimeseries( templatesBuffer[k].timeSeries ); } //Add the average length of the training examples for this template to the overall averageTemplateLength averageTemplateLength += templatesBuffer[k].averageTemplateLength; } //Flag that the models have been trained trained = true; averageTemplateLength = (UINT) averageTemplateLength/double(numTemplates); //Recompute the null rejection thresholds recomputeNullRejectionThresholds(); //Resize the prediction results to make sure it is setup for realtime prediction continuousInputDataBuffer.clear(); continuousInputDataBuffer.resize(averageTemplateLength,vector<double>(numFeatures,0)); classLikelihoods.resize(numTemplates,DEFAULT_NULL_LIKELIHOOD_VALUE); classDistances.resize(numTemplates,0); predictedClassLabel = GRT_DEFAULT_NULL_CLASS_LABEL; maxLikelihood = DEFAULT_NULL_LIKELIHOOD_VALUE; //Training complete return true; }
void bootstrapTrain(const int verbose, const bool doBootstrap = true) { //read files for training const std::string trainSetPath = Parameters::getParameter<std::string>("train.trainSet"), testSetPath = Parameters::getParameter<std::string>("train.testSet"); const int backgroundClassLabel = Parameters::getParameter<int>("backgroundClassLabel"), numBootstrappingSamples = Parameters::getParameter<int>("bootstrapTrain.numBootstrappingSamples"); std::vector<int> stages, maxNumSamplesPerImage; if(doBootstrap) { stages = Parameters::getParameter<std::vector<int> >("bootstrapTrain.classifiersPerStage"); maxNumSamplesPerImage = Parameters::getParameter<std::vector<int> >("bootstrapTrain.maxNumSamplesPerImage"); } else { const int numIterations = Parameters::getParameter<int>("train.numIterations"); stages.push_back(numIterations); maxNumSamplesPerImage.push_back(-1); } if (stages.size() != maxNumSamplesPerImage.size()) { throw runtime_error("Size miss match between the vectors classifiersperStage and maxNumSamplesPerImage"); } const TrainingData::point_t modelWindowSize = getModelWindowSize(), trainDataOffset = getTrainingDataOffset(); const TrainingData::rectangle_t objectWindow = getObjectWindow(); std::vector<std::string> filenamesPositives, filenamesBackground; getImageFileNames(trainSetPath, backgroundClassLabel, filenamesPositives, filenamesBackground); const int trainNumNegativeSamples = Parameters::getParameter<int>("train.numNegativeSamples"); size_t maxNumExamples = filenamesPositives.size() + trainNumNegativeSamples; if(stages.size() > 1) { maxNumExamples += numBootstrappingSamples*(stages.size() - 1); } const std::string initialBootstrapFileName = Parameters::getParameter<std::string>("train.bootStrapLearnerFile"); if (!initialBootstrapFileName.empty()) { maxNumExamples += numBootstrappingSamples; } // computed all feature configurations available for training. const size_t featuresPoolSize = Parameters::getParameter<int>("train.featuresPoolSize"); FeaturesSharedPointer featuresConfigurations(new Features()); //first basic feature pool computeRandomFeaturesConfigurations(modelWindowSize, featuresPoolSize, *featuresConfigurations); //all features are valid for this setup std::vector<bool> valid_features(featuresConfigurations->size(),true); //fill (valid_features.begin(),valid_features.begin()+featuresPoolSize,true); TrainingData::shared_ptr trainingData(new TrainingData(featuresConfigurations, valid_features, maxNumExamples, modelWindowSize, objectWindow)); trainingData->addPositiveSamples(filenamesPositives, modelWindowSize, trainDataOffset); trainingData->addNegativeSamples(filenamesBackground, modelWindowSize, trainDataOffset, trainNumNegativeSamples); if (!initialBootstrapFileName.empty()) { //for a weak model it should be avoided to sample all hard negatives from a single image const int maxFalsePositivesPerImage = 5; trainingData->addBootstrappingSamples(initialBootstrapFileName, filenamesBackground, modelWindowSize, trainDataOffset, numBootstrappingSamples, maxFalsePositivesPerImage); } const bool check_boostrapping = false; // for debugging only if(check_boostrapping) { ModelIO modelReader; modelReader.readModel(initialBootstrapFileName); StrongClassifier classifier = modelReader.read(); int tp, fp, fn, tn; classifier.classify(*trainingData, tp, fp, fn, tn); std::cout << "Classification Results (TestData): " << std::endl; std::cout << "Detection Rate: " << double(tp + tn) / (tp + tn + fp + fn) * 100 << " %" << std::endl; std::cout << "Error Rate: " << double(fp + fn) / (tp + tn + fp + fn) * 100 << " %" << std::endl; std::cout << "Error Positives: " << double(fn) / (tp + fn) * 100 << " %" << std::endl; std::cout << "Error Negatives: " << double(fp) / (tn + fp) * 100 << " %" << std::endl; std::cout << std::endl; throw std::runtime_error("End of game, just doing a mini-test"); } AdaboostLearner Learner(verbose, trainingData); if (not testSetPath.empty()) { // FIXME test data should use TrainingData class instead of the deprecated LabeledData class const TrainingData::point_t testDataOffset = getTestingDataOffset(); std::vector<std::string> filenamesPositives, filenamesBackground; getImageFileNames(testSetPath, backgroundClassLabel, filenamesPositives, filenamesBackground); printf("\nCollecting test data...\n"); LabeledData::shared_ptr labeledTestData(new LabeledData(verbose, backgroundClassLabel)); //printf("testDataOffsets: %i, %i\n", testDataOffset.x(), testDataOffset.y() ); labeledTestData->createIntegralImages(filenamesPositives, filenamesBackground, modelWindowSize, testDataOffset.x(), testDataOffset.y()); Learner.setTestData(labeledTestData); } const std::string baseOuputModelFilename = Learner.getOuputModelFileName(); for (size_t k = 0; k < stages.size(); ++k) { // bootstrap new negatives if (k != 0) { const std::string bootstrapFile = boost::str(boost::format("%s.bootstrap%i") % baseOuputModelFilename % (k - 1)); // sample new (hard) negatives using bootstrapping trainingData->addBootstrappingSamples(bootstrapFile, filenamesBackground, modelWindowSize, trainDataOffset, numBootstrappingSamples, maxNumSamplesPerImage[k]); } Learner.setNumIterations(stages[k]); Learner.setOutputModelFileName(boost::str(boost::format("%s.bootstrap%i") % baseOuputModelFilename % (k))); if (k == stages.size()-1) Learner.train(true); else Learner.train(false); } // end of "for each stage" boost::filesystem::copy_file(Learner.getOuputModelFileName(), baseOuputModelFilename); printf("Finished the %zi bootstrapping stages. Model was trained over %zi samples (%zi positives, %zi negatives).\n" "Final model saved at %s\n", stages.size(), trainingData->getNumExamples(), trainingData->getNumPositiveExamples(), trainingData->getNumNegativeExamples(), //Learner.getOuputModelFileName().c_str() baseOuputModelFilename.c_str()); return; }