PProbabilityEstimator TProbabilityEstimatorConstructor_m::operator()(PDistribution frequencies, PDistribution apriori, PExampleGenerator, const long &weightID, const int &) const
{ TProbabilityEstimator_FromDistribution *pefd = mlnew TProbabilityEstimator_FromDistribution(CLONE(TDistribution, frequencies));
  PProbabilityEstimator estimator = pefd;
  
  TDiscDistribution *ddist = pefd->probabilities.AS(TDiscDistribution);  
  if (ddist && (ddist->cases > 1e-20) && apriori) {
    TDiscDistribution *dapriori = apriori.AS(TDiscDistribution);
    if (!dapriori || (dapriori->abs < 1e-20))
      raiseError("invalid apriori distribution");
    
    float mabs = m/dapriori->abs;
    const float &abs = ddist->abs;
    const float &cases = ddist->cases;
    const float div = cases + m;
    if ((abs==cases) || !renormalize) {
      int i = 0;
      for(TDiscDistribution::iterator di(ddist->begin()), de(ddist->end()), ai(dapriori->begin());
          di != de;
          di++, ai++, i++)
         ddist->setint(i, (*di+*ai*mabs)/div);
    }
    else {
      int i = 0;
      for(TDiscDistribution::iterator di(ddist->begin()), de(ddist->end()), ai(dapriori->begin());
          di != de;
          di++, ai++, i++)
         ddist->setint(i, (*di / abs * cases + *ai*mabs)/div);
    }
  }
  else
    pefd->probabilities->normalize();
    
  return estimator;
}
Exemple #2
0
// rejects the split if there are less than two non-empty branches
// or there is a non-empty branch with less then minSubset examples
bool checkDistribution(const TDiscDistribution &dist, const float &minSubset)
{
  int nonzero = 0;
  for(TDiscDistribution::const_iterator dvi(dist.begin()), dve(dist.end()); dvi!=dve; dvi++)
    if (*dvi > 0) {
      if  (*dvi < minSubset)
        return false;
      nonzero++;
    }

  return nonzero >= 2;
}