PProbabilityEstimator TProbabilityEstimatorConstructor_m::operator()(PDistribution frequencies, PDistribution apriori, PExampleGenerator, const long &weightID, const int &) const { TProbabilityEstimator_FromDistribution *pefd = mlnew TProbabilityEstimator_FromDistribution(CLONE(TDistribution, frequencies)); PProbabilityEstimator estimator = pefd; TDiscDistribution *ddist = pefd->probabilities.AS(TDiscDistribution); if (ddist && (ddist->cases > 1e-20) && apriori) { TDiscDistribution *dapriori = apriori.AS(TDiscDistribution); if (!dapriori || (dapriori->abs < 1e-20)) raiseError("invalid apriori distribution"); float mabs = m/dapriori->abs; const float &abs = ddist->abs; const float &cases = ddist->cases; const float div = cases + m; if ((abs==cases) || !renormalize) { int i = 0; for(TDiscDistribution::iterator di(ddist->begin()), de(ddist->end()), ai(dapriori->begin()); di != de; di++, ai++, i++) ddist->setint(i, (*di+*ai*mabs)/div); } else { int i = 0; for(TDiscDistribution::iterator di(ddist->begin()), de(ddist->end()), ai(dapriori->begin()); di != de; di++, ai++, i++) ddist->setint(i, (*di / abs * cases + *ai*mabs)/div); } } else pefd->probabilities->normalize(); return estimator; }
// rejects the split if there are less than two non-empty branches // or there is a non-empty branch with less then minSubset examples bool checkDistribution(const TDiscDistribution &dist, const float &minSubset) { int nonzero = 0; for(TDiscDistribution::const_iterator dvi(dist.begin()), dve(dist.end()); dvi!=dve; dvi++) if (*dvi > 0) { if (*dvi < minSubset) return false; nonzero++; } return nonzero >= 2; }