Пример #1
0
PClassifier TLogRegLearner::fitModel(PExampleGenerator gen, const int &weight, int &error, PVariable &errorAt)
{ 
  PImputer imputer = imputerConstructor ? imputerConstructor->call(gen, weight) : PImputer();
  PExampleGenerator imputed = imputer ? imputer->call(gen, weight) : gen;

  // construct classifier	
  TLogRegClassifier *lrc = mlnew TLogRegClassifier(imputed->domain);
  lrc->dataDescription = mlnew TEFMDataDescription(gen->domain, mlnew TDomainDistributions(gen), 0, getMetaID());
  PClassifier cl = lrc;
  lrc->imputer = imputer;

  //if (imputed->domain->hasDiscreteAttributes(false)) {
    lrc->continuizedDomain = domainContinuizer ? domainContinuizer->call(imputed, weight) : (*logisticRegressionDomainContinuizer)(imputed, weight);
    imputed = mlnew TExampleTable(lrc->continuizedDomain, imputed);
  //}

    // copy class value

  // construct a LR fitter
  fitter = fitter ? fitter : PLogRegFitter(mlnew TLogRegFitter_Cholesky());

  PAttributedFloatList temp_beta, temp_beta_se;
  // fit logistic regression 

  temp_beta = fitter->call(imputed, weight, temp_beta_se, lrc->likelihood, error, errorAt);
  lrc->fit_status = error;

  // transform beta to AttributedList
  PVarList enum_attributes = mlnew TVarList(); 
  enum_attributes->push_back(imputed->domain->classVar);
  PITERATE(TVarList, vl, imputed->domain->attributes) 
    enum_attributes->push_back(*vl);
  // tranfsorm *beta into a PFloatList
  lrc->beta=mlnew TAttributedFloatList(enum_attributes);
  lrc->beta_se=mlnew TAttributedFloatList(enum_attributes);

  PITERATE(TAttributedFloatList, fi, temp_beta)
    lrc->beta->push_back(*fi);

  PITERATE(TAttributedFloatList, fi_se, temp_beta_se)
    lrc->beta_se->push_back(*fi_se);

  if (error >= TLogRegFitter::Constant) 
    return cl;

  lrc->wald_Z = computeWaldZ(lrc->beta, lrc->beta_se);
  lrc->P = computeP(lrc->wald_Z);

  // return classifier with domain, beta and standard errors of beta 
  return cl;
}
Пример #2
0
PClassifier TTreeLearner::operator()(PExampleGenerator ogen, const int &weight)
{ if (!ogen)
    raiseError("invalid example generator");

  PVariable &classVar = ogen->domain->classVar;

  if (!classVar)
    raiseError("class-less domain");

  bool tempSplit = !split;
  if (tempSplit)
    if (classVar->varType == TValue::INTVAR)
      split = defaultDiscreteTreeSplitConstructor;
    else if (classVar->varType == TValue::FLOATVAR)
      split = defaultContinuousTreeSplitConstructor;
    else
      raiseError("invalid class type (discrete or continuous expected)");

  bool tempStop = !stop;
  if (tempStop)
    stop = defaultStop;

  bool tempSplitter = !exampleSplitter;
  if (tempSplitter)
    exampleSplitter = mlnew TTreeExampleSplitter_UnknownsAsSelector;

  try {
    PExampleGenerator examples;
    /* If we don't intend to store them, we'll copy them if they're not in a table. 
       If we must store examples, we'll copy them in any case... */ 
    if (storeExamples)
      examples = mlnew TExampleTable(ogen);
    else
      examples = toExampleTable(ogen);

    PDistribution apriorClass = getClassDistribution (examples);
    if (apriorClass->abs == 0)
      raiseError("no examples");

    vector<bool> candidates(examples->domain->attributes->size(), true);

    PTreeNode root = call(examples, weight, apriorClass, candidates, 0);
    if (storeExamples)
      root->examples = examples;

    if (tempSplit)
      split = PTreeSplitConstructor();
    if (tempStop)
      stop = PTreeSplitConstructor();
    if (tempSplitter)
      exampleSplitter = PTreeExampleSplitter();

    return mlnew TTreeClassifier(examples->domain, root, 
                               descender ? descender : mlnew TTreeDescender_UnknownMergeAsSelector);
  }
  catch (exception) {
    if (tempSplit)
      split = PTreeSplitConstructor();
    if (tempStop)
      stop = PTreeSplitConstructor();
    if (tempSplitter)
      exampleSplitter = PTreeExampleSplitter();
    throw;
  }
}