Beispiel #1
0
float TEFMDataDescription::getExampleMatch(const TExample &ex1, const TExample &ex2)
{ 
  if ((ex1.domain != domain) && (ex2.domain != domain))
    raiseError("example's domain doesn't match the data descriptor's");

  float weight=1.0;
  TExample::iterator e1i(ex1.begin()), e2i(ex2.end());

  if (domainDistributions) {
    if (matchProbabilities.size() != domainDistributions->size())
      matchProbabilities = vector<float>(domainDistributions->size(), -1);

    vector<float>::iterator mi(matchProbabilities.begin());
    TDomainDistributions::const_iterator di(domainDistributions->begin()), de(domainDistributions->end());

    for(; di!=de; e1i++, e2i++, di++, mi++) {
      if ((*e1i).varType == TValue::INTVAR) {
        if ((*e1i).isDK()) {
          if ((*e2i).isDK()) {
            if (*mi == -1) {
              float mp = 0.0;
              ITERATE(TDiscDistribution, ddi, ((TDiscDistribution &)((*di).getReference())))
                mp += *ddi * *ddi;
              *mi = mp;
            }
            weight *= *mi;
          }
          else if (!(*e2i).isSpecial())
            weight *= (*di)->p(*e2i);
        }
        else if ((*e2i).isDK() && !(*e1i).isSpecial())
          weight *= (*di)->p(*e1i);
      }
    }
  }
  else {
    TVarList::const_iterator vi(domain->attributes->begin()), vie(domain->attributes->end());
    for(; vi!=vie; e1i++, e2i++, vi++)
      if (((*e1i).varType == TValue::INTVAR) && ((*e1i).isDK() && !(*e2i).isSpecial()   ||   (*e2i).isDK() && !(*e1i).isSpecial()))
        weight /= (*vi)->noOfValues();
  }

  return weight;
}
PDistribution TLogRegClassifier::classDistribution(const TExample &origexam)
{   
  checkProperty(domain);
  TExample cexample(domain, origexam);

  TExample *example2;

  if (imputer)
    example2 = imputer->call(cexample);
  else {
    if (dataDescription)
      for(TExample::const_iterator ei(cexample.begin()), ee(cexample.end()-1); ei!=ee; ei++)
        if ((*ei).isSpecial())
          return TClassifier::classDistribution(cexample, dataDescription);

    example2 = &cexample;
  }

  TExample *example = continuizedDomain ? mlnew TExample(continuizedDomain, *example2) : example2;

  float prob1;
  try {
    // multiply example with beta
    TAttributedFloatList::const_iterator b(beta->begin()), be(beta->end());

    // get beta 0
    prob1 = *b;
    b++;
    // multiply beta with example
    TVarList::const_iterator vi(example->domain->attributes->begin());
    TExample::const_iterator ei(example->begin()), ee(example->end());
    for (; (b!=be) && (ei!=ee); ei++, b++, vi++) {
      if ((*ei).isSpecial())
        raiseError("unknown value in attribute '%s'", (*vi)->get_name().c_str());
      prob1 += (*ei).floatV * (*b); 
    }

    prob1 = exp(prob1)/(1+exp(prob1));
  }
  catch (...) {
    if (imputer)
      mldelete example2;
    if (continuizedDomain)
      mldelete example;
    throw;
  }

  if (imputer)
    mldelete example2;
  if (continuizedDomain)
    mldelete example;

  if (classVar->varType == TValue::INTVAR) {
      TDiscDistribution *dist = mlnew TDiscDistribution(classVar);
      PDistribution res = dist;
      dist->addint(0, 1-prob1);
      dist->addint(1, prob1);
      return res;
  }
  else {
      TContDistribution *dist = mlnew TContDistribution(classVar);
      PDistribution res = dist;
      dist->addfloat(prob1, 1.0);
      return res;
  }
}