RegressionData ClassificationData::reformatAsRegressionData() const{ //Turns the classification into a regression data to enable regression algorithms like the MLP to be used as a classifier //This sets the number of targets in the regression data equal to the number of classes in the classification data //The output of each regression training sample will then be all 0's, except for the index matching the classLabel, which will be 1 //For this to work, the labelled classification data cannot have any samples with a classLabel of 0! RegressionData regressionData; if( totalNumSamples == 0 ){ return regressionData; } const UINT numInputDimensions = numDimensions; const UINT numTargetDimensions = getNumClasses(); regressionData.setInputAndTargetDimensions(numInputDimensions, numTargetDimensions); for(UINT i=0; i<totalNumSamples; i++){ VectorDouble targetVector(numTargetDimensions,0); //Set the class index in the target vector to 1 and all other values in the target vector to 0 UINT classLabel = data[i].getClassLabel(); if( classLabel > 0 ){ targetVector[ classLabel-1 ] = 1; }else{ regressionData.clear(); return regressionData; } regressionData.addSample(data[i].getSample(),targetVector); } return regressionData; }
bool MultidimensionalRegression::train_(RegressionData &trainingData){ const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumInputDimensions(); const unsigned int K = trainingData.getNumTargetDimensions(); trained = false; trainingResults.clear(); deleteRegressionModules(); if( !getIsRegressionModuleSet() ){ errorLog << "train_(RegressionData &trainingData) - The regression module has not been set!" << std::endl; return false; } if( M == 0 ){ errorLog << "train_(RegressionData &trainingData) - Training data has zero samples!" << std::endl; return false; } numInputDimensions = N; numOutputDimensions = K; inputVectorRanges.clear(); targetVectorRanges.clear(); //Scale the training and validation data, if needed if( useScaling ){ //Find the ranges for the input data inputVectorRanges = trainingData.getInputRanges(); //Find the ranges for the target data targetVectorRanges = trainingData.getTargetRanges(); //Scale the training data trainingData.scale(inputVectorRanges,targetVectorRanges,0.0,1.0); } //Setup the regression modules regressionModules.resize( K, NULL ); //Any scaling will happpen at the meta level, not the regression module letter, so ensure scaling is turned off for the modules regressifier->enableScaling( false ); for(UINT k=0; k<K; k++){ regressionModules[k] = regressifier->deepCopy(); if( regressionModules[k] == NULL ){ errorLog << "train(LabelledRegressionData &trainingData) - Failed to deep copy module " << k << std::endl; return false; } } //Train each regression module for(UINT k=0; k<K; k++){ trainingLog << "Training regression module: " << k << std::endl; //We need to create a 1 dimensional training dataset for the k'th target dimension RegressionData data; data.setInputAndTargetDimensions(N, 1); for(UINT i=0; i<M; i++){ if( !data.addSample(trainingData[i].getInputVector(), VectorFloat(1,trainingData[i].getTargetVector()[k]) ) ){ errorLog << "train_(RegressionData &trainingData) - Failed to add sample to dataset for regression module " << k << std::endl; return false; } } if( !regressionModules[k]->train( data ) ){ errorLog << "train_(RegressionData &trainingData) - Failed to train regression module " << k << std::endl; return false; } } //Flag that the algorithm has been trained regressionData.resize(K,0); trained = true; return trained; }
bool train( CommandLineParser &parser ){ infoLog << "Training regression model..." << endl; string trainDatasetFilename = ""; string modelFilename = ""; float learningRate = 0; float minChange = 0; unsigned int maxEpoch = 0; unsigned int batchSize = 0; //Get the filename if( !parser.get("filename",trainDatasetFilename) ){ errorLog << "Failed to parse filename from command line! You can set the filename using the -f." << endl; printHelp(); return false; } //Get the parameters from the parser parser.get("model-filename",modelFilename); parser.get( "learning-rate", learningRate ); parser.get( "min-change", minChange ); parser.get( "max-epoch", maxEpoch ); parser.get( "batch-size", batchSize ); infoLog << "settings: learning-rate: " << learningRate << " min-change: " << minChange << " max-epoch: " << maxEpoch << " batch-size: " << batchSize << endl; //Load the training data to train the model RegressionData trainingData; //Try and parse the input and target dimensions unsigned int numInputDimensions = 0; unsigned int numTargetDimensions = 0; if( parser.get("num-inputs",numInputDimensions) && parser.get("num-targets",numTargetDimensions) ){ infoLog << "num input dimensions: " << numInputDimensions << " num target dimensions: " << numTargetDimensions << endl; trainingData.setInputAndTargetDimensions( numInputDimensions, numTargetDimensions ); } if( (numInputDimensions == 0 || numTargetDimensions == 0) && Util::stringEndsWith( trainDatasetFilename, ".csv" ) ){ errorLog << "Failed to parse num input dimensions and num target dimensions from input arguments. You must supply the input and target dimensions if the data format is CSV!" << endl; printHelp(); return false; } infoLog << "- Loading Training Data..." << endl; if( !trainingData.load( trainDatasetFilename ) ){ errorLog << "Failed to load training data!\n"; return false; } const unsigned int N = trainingData.getNumInputDimensions(); const unsigned int T = trainingData.getNumTargetDimensions(); infoLog << "- Num training samples: " << trainingData.getNumSamples() << endl; infoLog << "- Num input dimensions: " << N << endl; infoLog << "- Num target dimensions: " << T << endl; //Create a new regression instance LogisticRegression regression; regression.setMaxNumEpochs( maxEpoch ); regression.setMinChange( minChange ); regression.setUseValidationSet( true ); regression.setValidationSetSize( 20 ); regression.setRandomiseTrainingOrder( true ); regression.enableScaling( true ); //Create a new pipeline that will hold the regression algorithm GestureRecognitionPipeline pipeline; //Add a multidimensional regression instance and set the regression algorithm to Linear Regression pipeline.setRegressifier( MultidimensionalRegression( regression, true ) ); infoLog << "- Training model...\n"; //Train the classifier if( !pipeline.train( trainingData ) ){ errorLog << "Failed to train model!" << endl; return false; } infoLog << "- Model trained!" << endl; infoLog << "- Saving model to: " << modelFilename << endl; //Save the pipeline if( pipeline.save( modelFilename ) ){ infoLog << "- Model saved." << endl; }else warningLog << "Failed to save model to file: " << modelFilename << endl; infoLog << "- TrainingTime: " << pipeline.getTrainingTime() << endl; return true; }