RegressionData ClassificationData::reformatAsRegressionData() const{ //Turns the classification into a regression data to enable regression algorithms like the MLP to be used as a classifier //This sets the number of targets in the regression data equal to the number of classes in the classification data //The output of each regression training sample will then be all 0's, except for the index matching the classLabel, which will be 1 //For this to work, the labelled classification data cannot have any samples with a classLabel of 0! RegressionData regressionData; if( totalNumSamples == 0 ){ return regressionData; } const UINT numInputDimensions = numDimensions; const UINT numTargetDimensions = getNumClasses(); regressionData.setInputAndTargetDimensions(numInputDimensions, numTargetDimensions); for(UINT i=0; i<totalNumSamples; i++){ VectorDouble targetVector(numTargetDimensions,0); //Set the class index in the target vector to 1 and all other values in the target vector to 0 UINT classLabel = data[i].getClassLabel(); if( classLabel > 0 ){ targetVector[ classLabel-1 ] = 1; }else{ regressionData.clear(); return regressionData; } regressionData.addSample(data[i].getSample(),targetVector); } return regressionData; }
bool MultidimensionalRegression::train_(RegressionData &trainingData){ const unsigned int M = trainingData.getNumSamples(); const unsigned int N = trainingData.getNumInputDimensions(); const unsigned int K = trainingData.getNumTargetDimensions(); trained = false; trainingResults.clear(); deleteRegressionModules(); if( !getIsRegressionModuleSet() ){ errorLog << "train_(RegressionData &trainingData) - The regression module has not been set!" << std::endl; return false; } if( M == 0 ){ errorLog << "train_(RegressionData &trainingData) - Training data has zero samples!" << std::endl; return false; } numInputDimensions = N; numOutputDimensions = K; inputVectorRanges.clear(); targetVectorRanges.clear(); //Scale the training and validation data, if needed if( useScaling ){ //Find the ranges for the input data inputVectorRanges = trainingData.getInputRanges(); //Find the ranges for the target data targetVectorRanges = trainingData.getTargetRanges(); //Scale the training data trainingData.scale(inputVectorRanges,targetVectorRanges,0.0,1.0); } //Setup the regression modules regressionModules.resize( K, NULL ); //Any scaling will happpen at the meta level, not the regression module letter, so ensure scaling is turned off for the modules regressifier->enableScaling( false ); for(UINT k=0; k<K; k++){ regressionModules[k] = regressifier->deepCopy(); if( regressionModules[k] == NULL ){ errorLog << "train(LabelledRegressionData &trainingData) - Failed to deep copy module " << k << std::endl; return false; } } //Train each regression module for(UINT k=0; k<K; k++){ trainingLog << "Training regression module: " << k << std::endl; //We need to create a 1 dimensional training dataset for the k'th target dimension RegressionData data; data.setInputAndTargetDimensions(N, 1); for(UINT i=0; i<M; i++){ if( !data.addSample(trainingData[i].getInputVector(), VectorFloat(1,trainingData[i].getTargetVector()[k]) ) ){ errorLog << "train_(RegressionData &trainingData) - Failed to add sample to dataset for regression module " << k << std::endl; return false; } } if( !regressionModules[k]->train( data ) ){ errorLog << "train_(RegressionData &trainingData) - Failed to train regression module " << k << std::endl; return false; } } //Flag that the algorithm has been trained regressionData.resize(K,0); trained = true; return trained; }