Beispiel #1
0
void Forest::TrainLarge(ClassificationData& trainingData, bool allNodesStoreLabelDistribution, bool refineWithAllTrainingData, int verbosityLevel)
{   
  // get available labels from training data
  labels = trainingData.GetAvailableLabels();
  
  if(verbosityLevel > 0)
  {
	std::cout << "Train forest with " << nTrees << " trees..." << std::endl;  
	
    if(verbosityLevel > 1)
	{
	  // List all labels the forest is trained for
      std::cout << "Used label IDs:" << std::endl;
      for(unsigned int i=0; i<labels.size(); ++i)
	  {
        std::cout << labels[i] << std::endl;
	  }
	}	  
  }
 
  // train every tree independently
  for(int i=0; i < nTrees; ++i)
  { 
	if(verbosityLevel > 0)
	{
	  std::cout << "Tree " << i+1 << "/" << nTrees << std::endl;	  
	}
	
	if(verbosityLevel > 1)
	{
	  std::cout << "- Bag training data..." << std::endl;
	}
	
	// storage for the indices of the used data points for each tree (bagging)    
    std::vector<unsigned int> dataPointIndices = trainingData.NewBag(baggingRatio);	
	
	if(verbosityLevel > 1)
	{
	  std::cout << "- Train tree with " << dataPointIndices.size() << " datapoints..." << std::endl;
	}
	
	// create and train tree
	Tree t(&randomGenerator);
    t.TrainParallel(trainingData, dataPointIndices, maxDepth, testedSplittingFunctions, minInformationGain, minPointsForSplit, allNodesStoreLabelDistribution, verbosityLevel);
	trees.push_back(t);
  }
  
  if(refineWithAllTrainingData)
  {
	if(verbosityLevel > 0)
	{
	  std::cout << "Refine all trees with all available training data..." << std::endl;
	}
	
	RefineLeafNodes(trainingData, verbosityLevel);
  }
  
  splitNodesStoreLabelDistribution = allNodesStoreLabelDistribution;

  if(verbosityLevel > 0)
  {
	std::cout << "### TRAINING DONE ###" << std::endl;
  }
}
Beispiel #2
0
void Forest::Train(ClassificationData& trainingData, int verbosityLevel)
{
  // get available labels from training data
  labels = trainingData.GetAvailableLabels();

  if(verbosityLevel > 0)
  {
    std::cout << "Train forest with " << nTrees << " trees..." << std::endl;

    if(verbosityLevel > 1)
    {
      // List all labels the forest is trained for
      std::cout << "Used label IDs:" << std::endl;
      for(unsigned int i=0; i<labels.size(); ++i)
      {
        std::cout << labels[i] << std::endl;
      }
    }
  }

  // create trees
  for(int i=0; i<nTrees; i++)
    trees.push_back(Tree(&randomGenerator));
  
  // random generator for bagging of training data
  boost::uniform_int<int> intDist(0, trainingData.GetCount()-1);
  
  // how many data points for every tree?
  int nDataPoints = floor(trainingData.GetCount() * baggingRatio);
  
  // train every tree independently
  #pragma omp parallel for
  for(int i=0; i < nTrees; i++)
  {
    // storage for the indices of the used data points for each tree (bagging)
    std::vector<unsigned int> dataPointIndices;
    
    #pragma omp critical
    {
        if(verbosityLevel > 0)
        {
          std::cout << "Tree " << i+1 << "/" << nTrees << std::endl;
        }
        if(verbosityLevel > 1)
        {
          std::cout << "- Bag training data..." << std::endl;
          std::cout << "- Train tree with " << nDataPoints << " datapoints..." << std::endl;
        }
    }

    // refill index array for tree
    dataPointIndices.clear();
            
    // randomly select training points for each tree (bagging)
    for(int j=0; j < nDataPoints; j++)
      dataPointIndices.push_back(intDist(randomGenerator));
        
    trees[i].Train(trainingData, dataPointIndices, maxDepth, testedSplittingFunctions, minInformationGain, minPointsForSplit, verbosityLevel);
  }
  
  if(verbosityLevel > 0)
  {
    std::cout << "### TRAINING DONE ###" << std::endl;
  }
}