void Forest::TrainLarge(ClassificationData& trainingData, bool allNodesStoreLabelDistribution, bool refineWithAllTrainingData, int verbosityLevel) { // get available labels from training data labels = trainingData.GetAvailableLabels(); if(verbosityLevel > 0) { std::cout << "Train forest with " << nTrees << " trees..." << std::endl; if(verbosityLevel > 1) { // List all labels the forest is trained for std::cout << "Used label IDs:" << std::endl; for(unsigned int i=0; i<labels.size(); ++i) { std::cout << labels[i] << std::endl; } } } // train every tree independently for(int i=0; i < nTrees; ++i) { if(verbosityLevel > 0) { std::cout << "Tree " << i+1 << "/" << nTrees << std::endl; } if(verbosityLevel > 1) { std::cout << "- Bag training data..." << std::endl; } // storage for the indices of the used data points for each tree (bagging) std::vector<unsigned int> dataPointIndices = trainingData.NewBag(baggingRatio); if(verbosityLevel > 1) { std::cout << "- Train tree with " << dataPointIndices.size() << " datapoints..." << std::endl; } // create and train tree Tree t(&randomGenerator); t.TrainParallel(trainingData, dataPointIndices, maxDepth, testedSplittingFunctions, minInformationGain, minPointsForSplit, allNodesStoreLabelDistribution, verbosityLevel); trees.push_back(t); } if(refineWithAllTrainingData) { if(verbosityLevel > 0) { std::cout << "Refine all trees with all available training data..." << std::endl; } RefineLeafNodes(trainingData, verbosityLevel); } splitNodesStoreLabelDistribution = allNodesStoreLabelDistribution; if(verbosityLevel > 0) { std::cout << "### TRAINING DONE ###" << std::endl; } }
void Forest::Train(ClassificationData& trainingData, int verbosityLevel) { // get available labels from training data labels = trainingData.GetAvailableLabels(); if(verbosityLevel > 0) { std::cout << "Train forest with " << nTrees << " trees..." << std::endl; if(verbosityLevel > 1) { // List all labels the forest is trained for std::cout << "Used label IDs:" << std::endl; for(unsigned int i=0; i<labels.size(); ++i) { std::cout << labels[i] << std::endl; } } } // create trees for(int i=0; i<nTrees; i++) trees.push_back(Tree(&randomGenerator)); // random generator for bagging of training data boost::uniform_int<int> intDist(0, trainingData.GetCount()-1); // how many data points for every tree? int nDataPoints = floor(trainingData.GetCount() * baggingRatio); // train every tree independently #pragma omp parallel for for(int i=0; i < nTrees; i++) { // storage for the indices of the used data points for each tree (bagging) std::vector<unsigned int> dataPointIndices; #pragma omp critical { if(verbosityLevel > 0) { std::cout << "Tree " << i+1 << "/" << nTrees << std::endl; } if(verbosityLevel > 1) { std::cout << "- Bag training data..." << std::endl; std::cout << "- Train tree with " << nDataPoints << " datapoints..." << std::endl; } } // refill index array for tree dataPointIndices.clear(); // randomly select training points for each tree (bagging) for(int j=0; j < nDataPoints; j++) dataPointIndices.push_back(intDist(randomGenerator)); trees[i].Train(trainingData, dataPointIndices, maxDepth, testedSplittingFunctions, minInformationGain, minPointsForSplit, verbosityLevel); } if(verbosityLevel > 0) { std::cout << "### TRAINING DONE ###" << std::endl; } }