void TTreeNode::removeStoredInfo() { distribution = PDistribution(); contingency = PDomainContingency(); examples = PExampleGenerator(); if (branches) const_PITERATE(TTreeNodeList, bi, branches) if (*bi) (*bi)->treeSize(); }
PClassifier TBayesLearner::operator()(PExampleGenerator gen, const int &weight) { if (!gen->domain->classVar) raiseError("class-less domain"); if (gen->domain->classVar->varType != TValue::INTVAR) raiseError("discrete class attribute expected"); PProbabilityEstimatorConstructor estConst = estimatorConstructor ? estimatorConstructor : PProbabilityEstimatorConstructor(mlnew TProbabilityEstimatorConstructor_relative()); PConditionalProbabilityEstimatorConstructor condEstConst = conditionalEstimatorConstructor ? conditionalEstimatorConstructor : PConditionalProbabilityEstimatorConstructor(mlnew TConditionalProbabilityEstimatorConstructor_ByRows(estConst)); PConditionalProbabilityEstimatorConstructor condEstConstCont = conditionalEstimatorConstructorContinuous ? conditionalEstimatorConstructorContinuous : PConditionalProbabilityEstimatorConstructor(mlnew TConditionalProbabilityEstimatorConstructor_loess()); PDomainContingency stat(mlnew TDomainContingency(gen, weight)); PProbabilityEstimator estimator = estConst->call(stat->classes, PDistribution(), gen, weight); PDistribution distribution = estimator->call(); if (distribution) estimator = PProbabilityEstimator(); int i = 0; bool haveContingencies = false; bool haveEstimators = false; PDomainContingency condProbs = mlnew TDomainContingency(); condProbs->classes = distribution; PConditionalProbabilityEstimatorList condProbEstList = mlnew TConditionalProbabilityEstimatorList(); PITERATE(TDomainContingency, di, stat) { PConditionalProbabilityEstimator condEst; PContingency condProp; try { condEst = (((*di)->varType==TValue::FLOATVAR) ? condEstConstCont : condEstConst) ->call(*di, stat->classes, gen, weight, i++); condProp = condEst->call(); } catch (mlexception err) { if (strcmp(err.what(), "'orange.ConditionalProbabilityEstimatorConstructor_loess': distribution (of attribute values, probably) is empty or has only a single value")) throw; } condProbs->push_back(condProp); if (condProbs) { condProbEstList->push_back(PConditionalProbabilityEstimator()); haveContingencies = true; } else { condProbEstList->push_back(condEst); haveEstimators = true; } }
PDistribution TConditionalProbabilityEstimator_FromDistribution::operator()(const TValue &condition) const { if (condition.varType == TValue::INTVAR) return probabilities->operator[](condition); else if (condition.varType == TValue::FLOATVAR) { if (condition.isSpecial()) raiseError("undefined attribute value for condition"); if (probabilities->varType != TValue::FLOATVAR) raiseError("invalid attribute value type for condition"); const float &x = condition.floatV; const TDistributionMap *dm = probabilities->continuous; TDistributionMap::const_iterator rb = dm->upper_bound(x); if (rb==dm->end()) rb = dm->begin(); TDistribution *result = CLONE(TDistribution, (*rb).second); PDistribution wresult = result; if ((rb==dm->begin()) && ((*rb).first!=x)) { (*result) *= 0; return wresult; } const float &x2 = (*rb).first; rb--; const float &x1 = (*rb).first; const PDistribution &y1 = (*rb).second; if (x1 == x2) { *result += y1; *result *= 0.5; return wresult; } // The normal formula for this is in the function above *result -= y1; *result *= (x-x1)/(x2-x1); *result += y1; return wresult; } raiseError("invalid attribute value for condition"); return PDistribution(); }
PDistribution TProbabilityEstimator::operator()() const { return PDistribution(); }
#include "classify.hpp" #include "stladdon.hpp" WRAPPER(TransformValue); class ORANGE_API TClassifierFromVar : public TClassifier { public: __REGISTER_CLASS PVariable whichVar; //P(+variable) variable PTransformValue transformer; //P transformer PDistribution distributionForUnknown; //P distribution for unknown value bool transformUnknowns; //P if false (default), unknowns stay unknown or are changed into distribution if given TClassifierFromVar(PVariable classVar=PVariable(), PDistribution = PDistribution()); TClassifierFromVar(PVariable classVar, PVariable whichVar, PDistribution = PDistribution()); TClassifierFromVar(PVariable classVar, PVariable whichVar, PDistribution, PTransformValue); TClassifierFromVar(const TClassifierFromVar &); virtual TValue operator ()(const TExample &); virtual PDistribution classDistribution(const TExample &exam); virtual void predictionAndDistribution(const TExample &ex, TValue &val, PDistribution &classDist); protected: int lastDomainVersion; PVariable lastWhichVar; int position; };
PTreeNode TTreeLearner::operator()(PExampleGenerator examples, const int &weightID, PDistribution apriorClass, vector<bool> &candidates, const int &depth) { PDomainContingency contingency; PDomainDistributions domainDistributions; PDistribution classDistribution; if (!examples->numberOfExamples()) return PTreeNode(); if (contingencyComputer) contingency = contingencyComputer->call(examples, weightID); if (storeContingencies) contingency = mlnew TDomainContingency(examples, weightID); if (contingency) classDistribution = contingency->classes; else if (storeDistributions) classDistribution = getClassDistribution(examples, weightID); if (classDistribution) { if (!classDistribution->abs) return PTreeNode(); } else if (examples->weightOfExamples() < 1e-10) return PTreeNode(); TTreeNode *utreeNode = mlnew TTreeNode(); PTreeNode treeNode = utreeNode; utreeNode->weightID = weightID; bool isLeaf = ((maxDepth>=0) && (depth == maxDepth)) || stop->call(examples, weightID, contingency); if (isLeaf || storeNodeClassifier) { utreeNode->nodeClassifier = nodeLearner ? nodeLearner->smartLearn(examples, weightID, contingency, domainDistributions, classDistribution) : TMajorityLearner().smartLearn(examples, weightID, contingency, domainDistributions, classDistribution); if (isLeaf) { if (storeContingencies) utreeNode->contingency = contingency; if (storeDistributions) utreeNode->distribution = classDistribution; return treeNode; } } utreeNode->contingency = contingency; utreeNode->distribution = classDistribution; float quality; int spentAttribute; utreeNode->branchSelector = split->call(utreeNode->branchDescriptions, utreeNode->branchSizes, quality, spentAttribute, examples, weightID, contingency, apriorClass ? apriorClass : classDistribution, candidates, utreeNode->nodeClassifier); isLeaf = !utreeNode->branchSelector; bool hasNullNodes = false; if (!isLeaf) { if (spentAttribute>=0) if (candidates[spentAttribute]) candidates[spentAttribute] = false; else spentAttribute = -1; /* BEWARE: If you add an additional 'return' in the code below, do not forget to restore the candidate's flag. */ utreeNode->branches = mlnew TTreeNodeList(); vector<int> newWeights; PExampleGeneratorList subsets = exampleSplitter->call(treeNode, examples, weightID, newWeights); if (!utreeNode->branchSizes) utreeNode->branchSizes = branchSizesFromSubsets(subsets, weightID, newWeights); if (!storeContingencies) utreeNode->contingency = PDomainContingency(); if (!storeDistributions) utreeNode->distribution = PDistribution(); vector<int>::iterator nwi = newWeights.begin(), nwe = newWeights.end(); PITERATE(TExampleGeneratorList, gi, subsets) { if ((*gi)->numberOfExamples()) { utreeNode->branches->push_back(call(*gi, (nwi!=nwe) ? *nwi : weightID, apriorClass, candidates, depth+1)); // Can store pointers to examples: the original is stored in the root if (storeExamples && utreeNode->branches->back()) utreeNode->branches->back()->examples = *gi; else if ((nwi!=nwe) && *nwi && (*nwi != weightID)) examples->removeMetaAttribute(*nwi); } else { utreeNode->branches->push_back(PTreeNode()); hasNullNodes = true; } if (nwi!=nwe) nwi++; } /* If I set it to false, it must had been true before (otherwise, my TreeSplitConstructor wouldn't be allowed to spend it). Hence, I can simply set it back to true... */ if (spentAttribute>=0) candidates[spentAttribute] = true; } else { // no split constructed if (!utreeNode->contingency)
enum {NeedsNothing, NeedsClassDistribution, NeedsDomainDistribution, NeedsDomainContingency, NeedsExampleGenerator}; int needs; //PR the kind of data that learner needs TLearner(const int & = NeedsExampleGenerator); virtual PClassifier operator()(PVariable); virtual PClassifier operator()(PDistribution); virtual PClassifier operator()(PDomainDistributions); virtual PClassifier operator()(PDomainContingency); virtual PClassifier operator()(PExampleGenerator, const int &weight = 0); virtual PClassifier smartLearn(PExampleGenerator, const int &weight, PDomainContingency = PDomainContingency(), PDomainDistributions = PDomainDistributions(), PDistribution = PDistribution()); }; class ORANGE_API TLearnerFD : public TLearner { public: __REGISTER_CLASS PDomain domain; //P domain TLearnerFD(); TLearnerFD(PDomain); }; WRAPPER(Learner)