int main (int argc, char* argv[]){ RegressionDataset dataset; dataset.setName("IAM-sequenced10px"); fillImageVector(argv[1], 0, dataset, 10); cout << dataset.getMean(); cout << dataset.getStandardDeviation(); dataset.save("../xml/IAM-10.xml"); return EXIT_SUCCESS; }
int main(int argc, char **argv) { RegressionDataset data; importCSV(data, "blogData_train.csv", LAST_COLUMN,1,',','#', 2<<16); LinearRegression trainer(100); LinearModel<> model; Timer time; trainer.train(model, data); double time_taken = time.stop(); SquaredLoss<> loss; cout << "Residual sum of squares:" << loss(data.labels(),model(data.inputs()))<<std::endl; cout << "Time:\n" << time_taken << endl; cout << time_taken << endl; }
//Train model with a regression dataset void CARTTrainer::train(ModelType& model, RegressionDataset const& dataset) { //Store the number of input dimensions m_inputDimension = inputDimension(dataset); //Pass input dimension (i.e., number of attributes) to tree model model.setInputDimension(m_inputDimension); //Store the size of the labels m_labelDimension = labelDimension(dataset); // create cross-validation folds RegressionDataset set=dataset; CVFolds<RegressionDataset > folds = createCVSameSize(set, m_numberOfFolds); double bestErrorRate = std::numeric_limits<double>::max(); CARTClassifier<RealVector>::TreeType bestTree; for (unsigned fold = 0; fold < m_numberOfFolds; ++fold){ //Run through all the cross validation sets RegressionDataset dataTrain = folds.training(fold); RegressionDataset dataTest = folds.validation(fold); std::size_t numTrainElements = dataTrain.numberOfElements(); AttributeTables tables = createAttributeTables(dataTrain.inputs()); std::vector < RealVector > labels(numTrainElements); boost::copy(dataTrain.labels().elements(),labels.begin()); //Build tree form this fold CARTClassifier<RealVector>::TreeType tree = buildTree(tables, dataTrain, labels, 0, dataTrain.numberOfElements()); //Add the tree to the model and prune model.setTree(tree); while(true){ //evaluate the error of current tree SquaredLoss<> loss; double error = loss.eval(dataTest.labels(), model(dataTest.inputs())); if(error < bestErrorRate){ //We have found a subtree that has a smaller error rate when tested! bestErrorRate = error; bestTree = tree; } if(tree.size() == 1) break; pruneTree(tree); model.setTree(tree); } } SHARK_CHECK(bestTree.size() > 0, "We should never set a tree that is empty."); model.setTree(bestTree); }
void fillImageVector(char* _directoryName,int _mode, RegressionDataset& _dataset, int _sectionLength){ DIR *dp; struct dirent *ep; dp = opendir (_directoryName); if (dp != NULL){ while (ep = readdir (dp)){ //append an image to the vector only if it has a .png extension if(strstr(ep->d_name,".png")!=NULL){ char str[200]=""; strcat(str,_directoryName); strcat(str,"/"); strcat(str,ep->d_name); Mat image = imread(str,_mode); int subparts=floor(((float)image.cols)/((float)_sectionLength)); vector<FeatureVector> features; for(int i=0;i<subparts;i++){ FeatureVector fv(_sectionLength*image.rows); for(int j=0;j<_sectionLength;j++){ for(int k=0;k<image.rows;k++){ if((int)image.at<uchar>(k,i*_sectionLength+j)==255){ fv[j*image.rows+k]=1.0; } else{ fv[j*image.rows+k]=0.0; } } } features.push_back(fv); } _dataset.addSequence(features,features); } } (void) closedir (dp); } else{ cerr << "ERROR in CreateImageVector : Couldn't open the directory"; exit(1); } }
// Regression void RFTrainer::train(RFClassifier& model, RegressionDataset const& dataset) { model.clearModels(); // added by TG 23.02.2015 //TODO O.K.: i am just fixing these things for now so that they are working. //Store the number of input dimensions m_inputDimension = inputDimension(dataset); //Store the size of the labels m_labelDimension = labelDimension(dataset); model.setInputDimension(m_inputDimension); model.setLabelDimension(m_labelDimension); m_regressionLearner = true; setDefaults(); //we need direct element access sicne we need to generate elementwise subsets std::size_t subsetSize = static_cast<std::size_t>(dataset.numberOfElements()*m_OOBratio); DataView<RegressionDataset const> elements(dataset); //Generate m_B trees SHARK_PARALLEL_FOR(int i = 0; i < (int)m_B; ++i){ //For each tree generate a subset of the dataset //generate indices of the dataset (pick k out of n elements) std::vector<std::size_t> subsetIndices(dataset.numberOfElements()); boost::iota(subsetIndices,0); boost::random_shuffle(subsetIndices); // create oob indices std::vector<std::size_t>::iterator oobStart = subsetIndices.begin() + subsetSize; std::vector<std::size_t>::iterator oobEnd = subsetIndices.end(); //generate the dataset by copying (TODO: this is a quick fix! subsetIndices.erase(oobStart, oobEnd); RegressionDataset dataTrain = toDataset(subset(elements,subsetIndices)); AttributeTables tables; createAttributeTables(dataTrain.inputs(), tables); std::size_t dataTrainSize = dataTrain.numberOfElements(); std::vector<RealVector> labels; for(std::size_t i = 0; i < dataTrainSize; i++){ labels.push_back(dataTrain.element(i).label); } CARTClassifier<RealVector>::TreeType tree = buildTree(tables, dataTrain, labels, 0); CARTClassifier<RealVector> cart(tree, m_inputDimension); // if oob error or importances have to be computed, create an oob sample if(m_computeOOBerror || m_computeFeatureImportances){ std::vector<std::size_t> subsetIndicesOOB(oobStart, oobEnd); RegressionDataset dataOOB = toDataset(subset(elements, subsetIndicesOOB)); // if importances should be computed, oob errors are computed implicitly if(m_computeFeatureImportances){ cart.computeFeatureImportances(dataOOB); } // if importances should not be computed, only compute the oob errors else{ cart.computeOOBerror(dataOOB); } } SHARK_CRITICAL_REGION{ model.addModel(cart); } } if(m_computeOOBerror){ model.computeOOBerror(); } if(m_computeFeatureImportances){ model.computeFeatureImportances(); } }
CARTClassifier<RealVector>::TreeType RFTrainer::buildTree(AttributeTables& tables, RegressionDataset const& dataset, std::vector<RealVector> const& labels, std::size_t nodeId ){ //Construct tree CARTClassifier<RealVector>::NodeInfo nodeInfo; nodeInfo.nodeId = nodeId; nodeInfo.attributeIndex = 0; nodeInfo.attributeValue = 0.0; nodeInfo.leftNodeId = 0; nodeInfo.rightNodeId = 0; nodeInfo.label = average(labels); nodeInfo.misclassProp = 0.0; nodeInfo.r = 0; nodeInfo.g = 0.0; CARTClassifier<RealVector>::TreeType tree, lTree, rTree; //n = Total number of cases in the dataset std::size_t n = tables[0].size(); bool isLeaf = false; if(n <= m_nodeSize){ isLeaf = true; }else{ //label vectors std::vector<RealVector> bestLabels, tmpLabels; RealVector labelSumAbove(m_labelDimension), labelSumBelow(m_labelDimension); //Randomly select the attributes to test for split set<std::size_t> tableIndicies; generateRandomTableIndicies(tableIndicies); //Index of attributes std::size_t attributeIndex, bestAttributeIndex, bestAttributeValIndex; //Attribute values double bestAttributeVal; double bestImpurity = -1; std::size_t prev; bool doSplit = false; for (set<std::size_t>::iterator it=tableIndicies.begin() ; it != tableIndicies.end(); it++ ){ attributeIndex = *it; labelSumBelow.clear(); labelSumAbove.clear(); tmpLabels.clear(); //Create a labels table, that corresponds to the sorted attribute for(std::size_t k=0; k<tables[attributeIndex].size(); k++){ tmpLabels.push_back(dataset.element(tables[attributeIndex][k].id).label); labelSumBelow += dataset.element(tables[attributeIndex][k].id).label; } labelSumAbove += tmpLabels[0]; labelSumBelow -= tmpLabels[0]; for(std::size_t i=1; i<n; i++){ prev = i-1; if(tables[attributeIndex][prev].value!=tables[attributeIndex][i].value){ std::size_t n1=i; std::size_t n2 = n-n1; //Calculate the squared error of the split double impurity = (n1*totalSumOfSquares(tmpLabels,0,n1,labelSumAbove)+n2*totalSumOfSquares(tmpLabels,n1,n2,labelSumBelow))/(double)(n); if(impurity<bestImpurity || bestImpurity<0){ //Found a more pure split, store the attribute index and value doSplit = true; bestImpurity = impurity; bestAttributeIndex = attributeIndex; bestAttributeValIndex = prev; bestAttributeVal = tables[attributeIndex][bestAttributeValIndex].value; bestLabels = tmpLabels; } } labelSumAbove += tmpLabels[i]; labelSumBelow -= tmpLabels[i]; } } if(doSplit){ //Split the attribute tables AttributeTables rTables, lTables; splitAttributeTables(tables, bestAttributeIndex, bestAttributeValIndex, lTables, rTables); tables.clear();//save memory //Split the labels std::vector<RealVector> lLabels, rLabels; for(std::size_t i = 0; i <= bestAttributeValIndex; i++){ lLabels.push_back(bestLabels[i]); } for(std::size_t i = bestAttributeValIndex+1; i < bestLabels.size(); i++){ rLabels.push_back(bestLabels[i]); } //Continue recursively nodeInfo.attributeIndex = bestAttributeIndex; nodeInfo.attributeValue = bestAttributeVal; nodeInfo.leftNodeId = 2*nodeId+1; nodeInfo.rightNodeId = 2*nodeId+2; lTree = buildTree(lTables, dataset, lLabels, nodeInfo.leftNodeId); rTree = buildTree(rTables, dataset, rLabels, nodeInfo.rightNodeId); }else{ //Leaf node isLeaf = true; } } if(isLeaf){ tree.push_back(nodeInfo); return tree; } tree.push_back(nodeInfo); tree.insert(tree.end(), lTree.begin(), lTree.end()); tree.insert(tree.end(), rTree.begin(), rTree.end()); //Store entry in the tree return tree; }
//Build CART tree in the regression case CARTTrainer::TreeType CARTTrainer::buildTree(AttributeTables const& tables, RegressionDataset const& dataset, std::vector<RealVector> const& labels, std::size_t nodeId, std::size_t trainSize){ size_t nextId = 0; std::queue<BuildData> bd; bd.emplace(tables, labels, nextId++); TreeType tree; while(!bd.empty()) { BuildData current(std::move(bd.front())); bd.pop(); //Construct tree CARTClassifier<RealVector>::NodeInfo nodeInfo; nodeInfo.nodeId = current.nodeId; nodeInfo.label = mean(current.labels); nodeInfo.leftNodeId = 0; nodeInfo.rightNodeId = 0; //Store the Total Sum of Squares (TSS) RealVector labelSum = current.labels[0]; for(std::size_t i=1; i< current.labels.size(); i++){ labelSum += current.labels[0]; } nodeInfo.misclassProp = totalSumOfSquares(current.labels, 0, current.labels.size(), labelSum)*((double)dataset.numberOfElements()/trainSize); //n = Total number of cases in the dataset //n1 = Number of cases to the left child node //n2 = number of cases to the right child node std::size_t n, n1, n2; n = current.tables[0].size(); size_t splitcount = n/m_numSplits; splitcount = splitcount ? splitcount : 1; // Make sure splitcount is never 0 std::cout << labels.size() << " " << splitcount << " " << m_nodeSize << std::endl; if(n > m_nodeSize){ //label vectors std::vector<RealVector> bestLabels, tmpLabels; RealVector labelSumAbove(m_labelDimension), labelSumBelow(m_labelDimension); //Index of attributes std::size_t bestAttributeIndex = 0; std::size_t bestAttributeValIndex = m_nodeSize; //Attribute values double bestAttributeVal = current.tables[bestAttributeIndex][bestAttributeValIndex-1].value; double impurity, fullImpurity, bestImpurity = -1; bool doSplit = false; for (size_t attributeIndex = 0; attributeIndex< m_inputDimension; attributeIndex++){ labelSumBelow.clear(); labelSumAbove.clear(); tmpLabels.clear(); //Create a labels table, that corresponds to the sorted attribute for(std::size_t k=0; k<current.tables[attributeIndex].size(); k++){ tmpLabels.push_back(dataset.element(current.tables[attributeIndex][k].id).label); noalias(labelSumBelow) += dataset.element(current.tables[attributeIndex][k].id).label; } for(std::size_t i=splitcount; i<n; i += splitcount){ // cerr << "Trying split at att: " << attributeIndex << " and point: " << i << endl; for(std::size_t j = i-splitcount; j < i; j++) { noalias(labelSumAbove) += tmpLabels[j]; noalias(labelSumBelow) -= tmpLabels[j]; } if(current.tables[attributeIndex][i-splitcount].value!=current.tables[attributeIndex][i].value){ n1=i; n2 = n-n1; //Calculate the squared error of the split fullImpurity = totalSumOfSquares(tmpLabels,0,n,labelSumBelow + labelSumAbove); impurity = (n1*totalSumOfSquares(tmpLabels,0,n1,labelSumAbove)+n2*totalSumOfSquares(tmpLabels,n1,n2,labelSumBelow))/(double)(n); double improvement = (fullImpurity - impurity) / fullImpurity; if(improvement*100 >= m_splitImpurityGain && (impurity<bestImpurity || bestImpurity<0)){ //Found a more pure split, store the attribute index and value doSplit = true; bestImpurity = impurity; bestAttributeIndex = attributeIndex; bestAttributeValIndex = i; bestAttributeVal = current.tables[attributeIndex][bestAttributeValIndex-1].value; bestLabels = tmpLabels; } } } } if(doSplit){ BuildData leftNode; BuildData rightNode; //Split the attribute tables splitAttributeTables(current.tables, bestAttributeIndex, bestAttributeValIndex-1, leftNode.tables, rightNode.tables); //Split the labels for(std::size_t i = 0; i < bestAttributeValIndex; i++){ leftNode.labels.push_back(bestLabels[i]); } for(std::size_t i = bestAttributeValIndex; i < bestLabels.size(); i++){ rightNode.labels.push_back(bestLabels[i]); } //Continue recursively nodeInfo.attributeIndex = bestAttributeIndex; nodeInfo.attributeValue = bestAttributeVal; nodeInfo.leftNodeId = nextId++; nodeInfo.rightNodeId = nextId++; leftNode.nodeId = nodeInfo.leftNodeId; rightNode.nodeId = nodeInfo.rightNodeId; bd.push(std::move(leftNode)); bd.push(std::move(rightNode)); } } tree.push_back(nodeInfo); } std::cerr << "Tree size: " << tree.size() << std::endl; cerr << "Will return\n"; return tree; }
int main(int argc, char* argv[]) { RegressionDataset dataset; dataset.load(argv[2]); RegressionDataset dataset2; dataset2.load(argv[3]); cout << "Dataset loaded, total elements : " << dataset.getNumSamples() << endl; Mask mask; LearningParams params; PBDNN pop; ifstream inStream(argv[1]); inStream >> pop; inStream >> params; vector<Vec3b> colors = createColorRepartition(pop.getPopulation().size()); ofstream log("training.log"); PopulationClusterBP pbp(pop, dataset, params, dataset2,mask, mask,log); AEMeasurer mae; DiversityMeasurer diversity(pop, dataset, mae); diversity.measurePerformance(); cout << "Starting diversity" << endl << diversity.getDisagreementMatrix() << endl; cout << "Starting overall diversity" << endl << diversity.getDisagreementScalar() << endl; double t = (double) getTickCount(); pbp.train(); t = ((double) getTickCount() - t) / getTickFrequency(); log << "Time :" << t << endl; cout << endl << "Saving network" << endl; ofstream outStream("IAMpop.txt"); outStream << pop; DiversityMeasurer diversity2(pop, dataset2, mae); vector<NeuralNetworkPtr> population = pop.getPopulation(); vector<vector<int> > assignedTo = diversity2.findBestNetwork(); vector<vector<FeatureVector> > recomposed = diversity2.buildBestOutput(); vector<int> pngParams = vector<int>(); pngParams.push_back(CV_IMWRITE_PNG_COMPRESSION); pngParams.push_back(3); cout << "Recording Data" << endl; for (uint i = 0; i < population.size(); i++) { ostringstream dir; dir << "network" << i; if (mkdir(dir.str().c_str(), S_IRWXU) == 0) { for (uint j = 0; j < dataset2.getNumSequences(); j++) { ostringstream name; name << "network" << i << "\/neuralNet" << i << "sample" << j << ".png"; vector<FeatureVector> features; for (uint k = 0; k < dataset2[j].size(); k++) { population[i]->forward(dataset2[j][k]); features.push_back(population[i]->getOutputSignal()); } vector<int> color = vector<int>(features.size(), i); Mat image = buildColorMapImage(features, 3, color, colors); imwrite(name.str(), image, pngParams); } } else { throw invalid_argument("pbdnnCluster : could not create directory"); } } ostringstream dirR; dirR << "recomposed"; if (mkdir(dirR.str().c_str(), S_IRWXU) == 0) { for (uint j = 0; j < recomposed.size(); j++) { ostringstream name; name << "recomposed\/recomposedSample" << j << ".png"; vector<FeatureVector> features; Mat image = buildColorMapImage(recomposed[j], 3, assignedTo[j], colors); imwrite(name.str(), image, pngParams); } } log.close(); return EXIT_SUCCESS; }
int main(int argc, char* argv[]) { vector<string> arguments; arguments.push_back("population size"); arguments.push_back("number of hidden units"); arguments.push_back("number of iterations"); arguments.push_back("learning dataset"); arguments.push_back("validation dataset"); arguments.push_back("simple load mode"); cout << helper("Pbdnn cluster", "Train a population of neural networks on a regression task.", arguments) << endl; if (argc != arguments.size() + 1) { cerr << "Not enough arguments, " << argc - 1 << " given and " << arguments.size() << " required" << endl; return EXIT_FAILURE; } int simpleMode = atoi(argv[6]); RegressionDataset dataset; RegressionDataset dataset2; if(simpleMode!=0) { dataset.simpleLoad(argv[4]); dataset2.simpleLoad(argv[5]); } else { dataset.load(argv[4]); dataset2.load(argv[5]); } cout << "Learning dataset loaded, total elements : " << dataset.getNumSamples() << endl; cout << "Validation dataset loaded, total elements : " << dataset2.getNumSamples() << endl; int populationSize = atoi(argv[1]); int numberOfHiddenUnits = atoi(argv[2]); int iterations = atoi(argv[3]); vector<Vec3b> colors = createColorRepartition(populationSize); AEMeasurer mae; PBDNN pop = PBDNN(populationSize, dataset.getFeatureVectorLength(), numberOfHiddenUnits, dataset.getMean(), dataset.getStandardDeviation()); DiversityMeasurer diversity(pop, dataset2, mae,0.01); // 07/02/13 : Not sure if useful or not so stop doing it /*do { pop = PBDNN(populationSize, dataset.getFeatureVectorLength(), numberOfHiddenUnits, dataset.getMean(), dataset.getStandardDeviation()); diversity.measurePerformance(); } while (diversity.getDisagreementScalar() < 0.17);*/ Mask mask; LearningParams params; params.setActualIteration(0); params.setMaxIterations(iterations); params.setLearningRate(0.001); params.setMaxTrainedPercentage(0.1); params.setSavedDuringProcess(true); params.setValidateEveryNIteration(100); ofstream log("training.log"); PopulationClusterBP pbp(pop, dataset, params, dataset2, mask, mask, log); // 07/02/13 : Not sure if useful or not so stop doing it /*cout << "Starting diversity" << endl << diversity.getDisagreementMatrix() << endl; cout << "Starting overall diversity : " << diversity.getDisagreementScalar() << endl;*/ cout << "Training" << endl; double t = (double) getTickCount(); pbp.train(); t = ((double) getTickCount() - t) / getTickFrequency(); cout << "Time :" << t << endl; cout << endl << "Saving network" << endl; ofstream outStream("IAMpop.pop"); outStream << pop; if(simpleMode == 0) { cout << "Recording Data" << endl; vector<NeuralNetworkPtr> population = pop.getPopulation(); vector<vector<int> > assignedTo = diversity.findBestNetwork(); vector<vector<FeatureVector> > recomposed = diversity.buildBestOutput(); vector<int> pngParams = vector<int>(); pngParams.push_back(CV_IMWRITE_PNG_COMPRESSION); pngParams.push_back(3); for (uint i = 0; i < population.size(); i++) { ostringstream dir; dir << "network" << i; if (mkdir(dir.str().c_str(), S_IRWXU) == 0) { for (uint j = 0; j < dataset2.getNumSequences(); j++) { ostringstream name; name << "network" << i << "\/neuralNet" << i << "sample" << j << ".png"; vector<FeatureVector> features; for (uint k = 0; k < dataset2[j].size(); k++) { population[i]->forward(dataset2[j][k]); features.push_back(population[i]->getOutputSignal()); } vector<int> color = vector<int>(features.size(), i); Mat image = buildColorMapImage(features, 3, color, colors); imwrite(name.str(), image, pngParams); } } else { throw invalid_argument("pbdnnCluster : could not create directory"); } } ostringstream dirR; dirR << "recomposed"; if (mkdir(dirR.str().c_str(), S_IRWXU) == 0) { for (uint j = 0; j < recomposed.size(); j++) { ostringstream name; name << "recomposed\/recomposedSample" << j << ".png"; vector<FeatureVector> features; Mat image = buildColorMapImage(recomposed[j], 3, assignedTo[j], colors); imwrite(name.str(), image, pngParams); } } } return EXIT_SUCCESS; }
//Build CART tree in the regression case CARTTrainer::TreeType CARTTrainer::buildTree(AttributeTables const& tables, RegressionDataset const& dataset, std::vector<RealVector> const& labels, std::size_t nodeId, std::size_t trainSize){ //Construct tree CARTClassifier<RealVector>::NodeInfo nodeInfo; nodeInfo.nodeId = nodeId; nodeInfo.label = mean(labels); nodeInfo.leftNodeId = 0; nodeInfo.rightNodeId = 0; //Store the Total Sum of Squares (TSS) RealVector labelSum = labels[0]; for(std::size_t i=1; i< labels.size(); i++){ labelSum += labels[0]; } nodeInfo.misclassProp = totalSumOfSquares(labels, 0, labels.size(), labelSum)*((double)dataset.numberOfElements()/trainSize); TreeType tree, lTree, rTree; //n = Total number of cases in the dataset //n1 = Number of cases to the left child node //n2 = number of cases to the right child node std::size_t n, n1, n2; n = tables[0].size(); if(n > m_nodeSize){ //label vectors std::vector<RealVector> bestLabels, tmpLabels; RealVector labelSumAbove(m_labelDimension), labelSumBelow(m_labelDimension); //Index of attributes std::size_t attributeIndex, bestAttributeIndex, bestAttributeValIndex; //Attribute values double bestAttributeVal; double impurity, bestImpurity = -1; std::size_t prev; bool doSplit = false; for ( attributeIndex = 0; attributeIndex< m_inputDimension; attributeIndex++){ labelSumBelow.clear(); labelSumAbove.clear(); tmpLabels.clear(); //Create a labels table, that corresponds to the sorted attribute for(std::size_t k=0; k<tables[attributeIndex].size(); k++){ tmpLabels.push_back(dataset.element(tables[attributeIndex][k].id).label); labelSumBelow += dataset.element(tables[attributeIndex][k].id).label; } labelSumAbove += tmpLabels[0]; labelSumBelow -= tmpLabels[0]; for(std::size_t i=1; i<n; i++){ prev = i-1; if(tables[attributeIndex][prev].value!=tables[attributeIndex][i].value){ n1=i; n2 = n-n1; //Calculate the squared error of the split impurity = (n1*totalSumOfSquares(tmpLabels,0,n1,labelSumAbove)+n2*totalSumOfSquares(tmpLabels,n1,n2,labelSumBelow))/(double)(n); if(impurity<bestImpurity || bestImpurity<0){ //Found a more pure split, store the attribute index and value doSplit = true; bestImpurity = impurity; bestAttributeIndex = attributeIndex; bestAttributeValIndex = prev; bestAttributeVal = tables[attributeIndex][bestAttributeValIndex].value; bestLabels = tmpLabels; } } labelSumAbove += tmpLabels[i]; labelSumBelow -= tmpLabels[i]; } } if(doSplit){ //Split the attribute tables AttributeTables rTables, lTables; splitAttributeTables(tables, bestAttributeIndex, bestAttributeValIndex, lTables, rTables); //Split the labels std::vector<RealVector> lLabels, rLabels; for(std::size_t i = 0; i <= bestAttributeValIndex; i++){ lLabels.push_back(bestLabels[i]); } for(std::size_t i = bestAttributeValIndex+1; i < bestLabels.size(); i++){ rLabels.push_back(bestLabels[i]); } //Continue recursively nodeInfo.attributeIndex = bestAttributeIndex; nodeInfo.attributeValue = bestAttributeVal; nodeInfo.leftNodeId = nodeId+1; lTree = buildTree(lTables, dataset, lLabels, nodeInfo.leftNodeId, trainSize); nodeInfo.rightNodeId = nodeInfo.leftNodeId + lTree.size(); rTree = buildTree(rTables, dataset, rLabels, nodeInfo.rightNodeId, trainSize); } } tree.push_back(nodeInfo); tree.insert(tree.end(), lTree.begin(), lTree.end()); tree.insert(tree.end(), rTree.begin(), rTree.end()); //Store entry in the tree return tree; }