Пример #1
0
float TEFMDataDescription::getExampleMatch(const TExample &ex1, const TExample &ex2)
{ 
  if ((ex1.domain != domain) && (ex2.domain != domain))
    raiseError("example's domain doesn't match the data descriptor's");

  float weight=1.0;
  TExample::iterator e1i(ex1.begin()), e2i(ex2.end());

  if (domainDistributions) {
    if (matchProbabilities.size() != domainDistributions->size())
      matchProbabilities = vector<float>(domainDistributions->size(), -1);

    vector<float>::iterator mi(matchProbabilities.begin());
    TDomainDistributions::const_iterator di(domainDistributions->begin()), de(domainDistributions->end());

    for(; di!=de; e1i++, e2i++, di++, mi++) {
      if ((*e1i).varType == TValue::INTVAR) {
        if ((*e1i).isDK()) {
          if ((*e2i).isDK()) {
            if (*mi == -1) {
              float mp = 0.0;
              ITERATE(TDiscDistribution, ddi, ((TDiscDistribution &)((*di).getReference())))
                mp += *ddi * *ddi;
              *mi = mp;
            }
            weight *= *mi;
          }
          else if (!(*e2i).isSpecial())
            weight *= (*di)->p(*e2i);
        }
        else if ((*e2i).isDK() && !(*e1i).isSpecial())
          weight *= (*di)->p(*e1i);
      }
    }
  }
  else {
    TVarList::const_iterator vi(domain->attributes->begin()), vie(domain->attributes->end());
    for(; vi!=vie; e1i++, e2i++, vi++)
      if (((*e1i).varType == TValue::INTVAR) && ((*e1i).isDK() && !(*e2i).isSpecial()   ||   (*e2i).isDK() && !(*e1i).isSpecial()))
        weight /= (*vi)->noOfValues();
  }

  return weight;
}
Пример #2
0
float TEFMDataDescription::getExampleWeight(const TExample &example) const
{ 
  if (example.domain != domain)
    raiseError("example's domain doesn't match the data descriptor's");

  float weight=1.0;
  TVarList::const_iterator vi(domain->attributes->begin()), vie(domain->attributes->end());
  TExample::iterator ei(example.begin());
  for(; vi!=vie; ei++, vi++)
    if ((*ei).isDK() && ((*ei).varType == TValue::INTVAR))
      weight /= (*vi)->noOfValues();

  return weight;
}
bool convertFromPythonExisting(PyObject *lst, TExample &example)
{
  PDomain dom=example.domain;

  if (PyOrExample_Check(lst)) {
    const TExample &orex = PyExample_AS_ExampleReference(lst);
    if (orex.domain != dom)
      dom->convert(example, orex);
    else
      example = orex;
    return true;
  }

  if (!PyList_Check(lst)) {
    PyErr_Format(PyExc_TypeError, "invalid argument type (expected list, got '%s)", lst ? lst->ob_type->tp_name : "None");
    return false;
  }

  int const nvars = dom->variables->size() + dom->classVars->size();
  if (Py_ssize_t(nvars) != PyList_Size(lst)) {
    PyErr_Format(PyExc_IndexError, "invalid list size (got %i, expected %i items)",
        PyList_Size(lst), nvars);
    return false;
  }

  Py_ssize_t pos = 0;
  TExample::iterator ei(example.begin());
  TVarList::iterator vi(dom->variables->begin());
  TVarList::const_iterator const ve(dom->variables->end());
  TVarList::const_iterator const ce(dom->classVars->end());
  while(vi != ce && vi != ve) {
    PyObject *li=PyList_GetItem(lst, pos++);
    if (!li)
      PYERROR(PyExc_SystemError, "can't read the list", false);

    if (PyOrValue_Check(li))
      if (PyValue_AS_Variable(li) ? (PyValue_AS_Variable(li) != *vi) : (PyValue_AS_Value(li).varType=!(*vi)->varType) ) {
        PyErr_Format(PyExc_TypeError, "wrong value type for attribute no. %i (%s)", pos, (*vi)->get_name().c_str());
        return false;
      }
      else
        *(ei++)=PyValue_AS_Value(li);

    else {
      if (li == Py_None) {
        *(ei++) = (*vi)->DK();
      }
        
      else if (PyString_Check(li))
          (*vi)->str2val(string(PyString_AsString(li)), *(ei++));

      else if ((*vi)->varType==TValue::INTVAR) {
        if (PyInt_Check(li))
          *(ei++)=TValue(int(PyInt_AsLong(li)));
        else {
          PyErr_Format(PyExc_TypeError, "attribute no. %i (%s) is ordinal, string value expected", pos, (*vi)->get_name().c_str());
          return false;
        }
      }
      else if ((*vi)->varType==TValue::FLOATVAR) {
        float f;
        if (PyNumber_ToFloat(li, f))
          *(ei++) = TValue(f);
        else {
          PyErr_Format(PyExc_TypeError, "attribute no. %i (%s) is continuous, float value expected", pos, (*vi)->get_name().c_str());
          return false;
        }
      }
      else
        ei++;
    }
    if (++vi == ve) {
        vi = dom->classVars->begin();
    }
  }

  return true;
}
Пример #4
0
PDistribution TLogRegClassifier::classDistribution(const TExample &origexam)
{   
  checkProperty(domain);
  TExample cexample(domain, origexam);

  TExample *example2;

  if (imputer)
    example2 = imputer->call(cexample);
  else {
    if (dataDescription)
      for(TExample::const_iterator ei(cexample.begin()), ee(cexample.end()-1); ei!=ee; ei++)
        if ((*ei).isSpecial())
          return TClassifier::classDistribution(cexample, dataDescription);

    example2 = &cexample;
  }

  TExample *example = continuizedDomain ? mlnew TExample(continuizedDomain, *example2) : example2;

  float prob1;
  try {
    // multiply example with beta
    TAttributedFloatList::const_iterator b(beta->begin()), be(beta->end());

    // get beta 0
    prob1 = *b;
    b++;
    // multiply beta with example
    TVarList::const_iterator vi(example->domain->attributes->begin());
    TExample::const_iterator ei(example->begin()), ee(example->end());
    for (; (b!=be) && (ei!=ee); ei++, b++, vi++) {
      if ((*ei).isSpecial())
        raiseError("unknown value in attribute '%s'", (*vi)->get_name().c_str());
      prob1 += (*ei).floatV * (*b); 
    }

    prob1 = exp(prob1)/(1+exp(prob1));
  }
  catch (...) {
    if (imputer)
      mldelete example2;
    if (continuizedDomain)
      mldelete example;
    throw;
  }

  if (imputer)
    mldelete example2;
  if (continuizedDomain)
    mldelete example;

  if (classVar->varType == TValue::INTVAR) {
      TDiscDistribution *dist = mlnew TDiscDistribution(classVar);
      PDistribution res = dist;
      dist->addint(0, 1-prob1);
      dist->addint(1, prob1);
      return res;
  }
  else {
      TContDistribution *dist = mlnew TContDistribution(classVar);
      PDistribution res = dist;
      dist->addfloat(prob1, 1.0);
      return res;
  }
}
Пример #5
0
bool TTabDelimExampleGenerator::readExample(TFileExampleIteratorData &fei, TExample &exam)
{
  vector<string> atoms;
  // read lines until eof or a non-empty line
  while(!feof(fei.file) && ((readTabAtom(fei, atoms, true, csv)>0) || atomsEmpty(atoms))) {
    vector<string>::iterator ii(atoms.begin()), ie(atoms.end());
    while ((ii!=ie) && !(*ii).length())
      ii++;
    if (ii==ie)
      atoms.clear();
    else
      break;
  }
  
  if (!atoms.size())
    return false;

  // Add an appropriate number of empty atoms, if needed
  while (atoms.size()<attributeTypes->size())
    atoms.push_back(string(""));
  _ASSERT(exam.domain==domain);

  exam.removeMetas();

  TExample::iterator ei(exam.begin());
  TVarList::iterator vi(domain->attributes->begin());
  vector<string>::iterator ai(atoms.begin());
  TIntList::iterator si(attributeTypes->begin()), se(attributeTypes->end());
  TIntList::iterator cb, cp, ce;
  if (classPoses) {
      cb = cp = classPoses->begin();
      ce = classPoses->end();
  }
  int pos=0;
  for (; (si!=se); pos++, si++, ai++) {
    if (*si) { // if attribute is not to be skipped and is not a basket
      string valstr;

      // Check for don't care
      valstr = *ai;
      if (!valstr.length() || (valstr == "NA") || (valstr == ".") || (DC && (valstr == DC)))
        valstr = "?";
      else if ((valstr == "*") || (DK && (valstr == DK)))
        valstr = "~";

      try {
        if (*si==-1)
          if (pos==classPos) { // if this is class value
            TValue cval;
            domain->classVar->filestr2val(valstr, cval, exam);
            exam.setClass(cval);
          }
          else if (classPoses && (cp != ce) && (pos == *cp)) {
              const int ind = cp - cb;
              domain->classVars->at(ind)->filestr2val(valstr, exam.values_end[ind], exam);
              cp++;
          }
          else { // if this is a normal value
            (*vi++)->filestr2val(valstr, *ei++, exam);
          }
        else { // if this is a meta value
          TMetaDescriptor *md = domain->metas[*si];
          _ASSERT(md!=NULL);
          TValue mval;
          md->variable->filestr2val(valstr, mval, exam);

          exam.setMeta(*si, mval);
        }
      }
      catch (mlexception &err) {
        raiseError("file '%s', line '%i': %s", fei.filename.c_str(), fei.line, err.what());
      }
    }

    // the attribute is marked to be skipped, but may also be a basket
    else { 
      if (pos == basketPos) {
        TSplits splits;
        split(*ai, splits);
        ITERATE(TSplits, si, splits)
          basketFeeder->addItem(exam, string(si->first, si->second), fei.line);
      }
    }
  }

  if (pos==classPos) // if class is the last value in the line, it is set here
    domain->classVar->filestr2val(ai==atoms.end() ? "?" : *(ai++), exam[domain->variables->size()-1], exam);
  /* I'm not sure that this is needed; this code is a mess but I don't wish to
     waste time studying it since we are moving to 3.0 */
  else if (classPoses && (cp != ce) && (pos == *cp)) {
    const int ind = cp - cb;
    domain->classVars->at(ind)->filestr2val(ai==atoms.end() ? "?" : *(ai++), exam.values_end[ind], exam);
  }

  while ((ai!=atoms.end()) && !(*ai).length()) ai++; // line must be empty from now on

  if (ai!=atoms.end()) {
	vector<string>::iterator ii=atoms.begin();
	string s=*ii;
	while(++ii!=atoms.end()) s+=" "+*ii;
    raiseError("example of invalid length (%s)", s.c_str());
  }

  return true;
}