float TEFMDataDescription::getExampleWeight(const TExample &example) const { if (example.domain != domain) raiseError("example's domain doesn't match the data descriptor's"); float weight=1.0; TVarList::const_iterator vi(domain->attributes->begin()), vie(domain->attributes->end()); TExample::iterator ei(example.begin()); for(; vi!=vie; ei++, vi++) if ((*ei).isDK() && ((*ei).varType == TValue::INTVAR)) weight /= (*vi)->noOfValues(); return weight; }
float TEFMDataDescription::getExampleMatch(const TExample &ex1, const TExample &ex2) { if ((ex1.domain != domain) && (ex2.domain != domain)) raiseError("example's domain doesn't match the data descriptor's"); float weight=1.0; TExample::iterator e1i(ex1.begin()), e2i(ex2.end()); if (domainDistributions) { if (matchProbabilities.size() != domainDistributions->size()) matchProbabilities = vector<float>(domainDistributions->size(), -1); vector<float>::iterator mi(matchProbabilities.begin()); TDomainDistributions::const_iterator di(domainDistributions->begin()), de(domainDistributions->end()); for(; di!=de; e1i++, e2i++, di++, mi++) { if ((*e1i).varType == TValue::INTVAR) { if ((*e1i).isDK()) { if ((*e2i).isDK()) { if (*mi == -1) { float mp = 0.0; ITERATE(TDiscDistribution, ddi, ((TDiscDistribution &)((*di).getReference()))) mp += *ddi * *ddi; *mi = mp; } weight *= *mi; } else if (!(*e2i).isSpecial()) weight *= (*di)->p(*e2i); } else if ((*e2i).isDK() && !(*e1i).isSpecial()) weight *= (*di)->p(*e1i); } } } else { TVarList::const_iterator vi(domain->attributes->begin()), vie(domain->attributes->end()); for(; vi!=vie; e1i++, e2i++, vi++) if (((*e1i).varType == TValue::INTVAR) && ((*e1i).isDK() && !(*e2i).isSpecial() || (*e2i).isDK() && !(*e1i).isSpecial())) weight /= (*vi)->noOfValues(); } return weight; }
bool convertFromPythonExisting(PyObject *lst, TExample &example) { PDomain dom=example.domain; if (PyOrExample_Check(lst)) { const TExample &orex = PyExample_AS_ExampleReference(lst); if (orex.domain != dom) dom->convert(example, orex); else example = orex; return true; } if (!PyList_Check(lst)) { PyErr_Format(PyExc_TypeError, "invalid argument type (expected list, got '%s)", lst ? lst->ob_type->tp_name : "None"); return false; } int const nvars = dom->variables->size() + dom->classVars->size(); if (Py_ssize_t(nvars) != PyList_Size(lst)) { PyErr_Format(PyExc_IndexError, "invalid list size (got %i, expected %i items)", PyList_Size(lst), nvars); return false; } Py_ssize_t pos = 0; TExample::iterator ei(example.begin()); TVarList::iterator vi(dom->variables->begin()); TVarList::const_iterator const ve(dom->variables->end()); TVarList::const_iterator const ce(dom->classVars->end()); while(vi != ce && vi != ve) { PyObject *li=PyList_GetItem(lst, pos++); if (!li) PYERROR(PyExc_SystemError, "can't read the list", false); if (PyOrValue_Check(li)) if (PyValue_AS_Variable(li) ? (PyValue_AS_Variable(li) != *vi) : (PyValue_AS_Value(li).varType=!(*vi)->varType) ) { PyErr_Format(PyExc_TypeError, "wrong value type for attribute no. %i (%s)", pos, (*vi)->get_name().c_str()); return false; } else *(ei++)=PyValue_AS_Value(li); else { if (li == Py_None) { *(ei++) = (*vi)->DK(); } else if (PyString_Check(li)) (*vi)->str2val(string(PyString_AsString(li)), *(ei++)); else if ((*vi)->varType==TValue::INTVAR) { if (PyInt_Check(li)) *(ei++)=TValue(int(PyInt_AsLong(li))); else { PyErr_Format(PyExc_TypeError, "attribute no. %i (%s) is ordinal, string value expected", pos, (*vi)->get_name().c_str()); return false; } } else if ((*vi)->varType==TValue::FLOATVAR) { float f; if (PyNumber_ToFloat(li, f)) *(ei++) = TValue(f); else { PyErr_Format(PyExc_TypeError, "attribute no. %i (%s) is continuous, float value expected", pos, (*vi)->get_name().c_str()); return false; } } else ei++; } if (++vi == ve) { vi = dom->classVars->begin(); } } return true; }
PDistribution TLogRegClassifier::classDistribution(const TExample &origexam) { checkProperty(domain); TExample cexample(domain, origexam); TExample *example2; if (imputer) example2 = imputer->call(cexample); else { if (dataDescription) for(TExample::const_iterator ei(cexample.begin()), ee(cexample.end()-1); ei!=ee; ei++) if ((*ei).isSpecial()) return TClassifier::classDistribution(cexample, dataDescription); example2 = &cexample; } TExample *example = continuizedDomain ? mlnew TExample(continuizedDomain, *example2) : example2; float prob1; try { // multiply example with beta TAttributedFloatList::const_iterator b(beta->begin()), be(beta->end()); // get beta 0 prob1 = *b; b++; // multiply beta with example TVarList::const_iterator vi(example->domain->attributes->begin()); TExample::const_iterator ei(example->begin()), ee(example->end()); for (; (b!=be) && (ei!=ee); ei++, b++, vi++) { if ((*ei).isSpecial()) raiseError("unknown value in attribute '%s'", (*vi)->get_name().c_str()); prob1 += (*ei).floatV * (*b); } prob1 = exp(prob1)/(1+exp(prob1)); } catch (...) { if (imputer) mldelete example2; if (continuizedDomain) mldelete example; throw; } if (imputer) mldelete example2; if (continuizedDomain) mldelete example; if (classVar->varType == TValue::INTVAR) { TDiscDistribution *dist = mlnew TDiscDistribution(classVar); PDistribution res = dist; dist->addint(0, 1-prob1); dist->addint(1, prob1); return res; } else { TContDistribution *dist = mlnew TContDistribution(classVar); PDistribution res = dist; dist->addfloat(prob1, 1.0); return res; } }
bool TTabDelimExampleGenerator::readExample(TFileExampleIteratorData &fei, TExample &exam) { vector<string> atoms; // read lines until eof or a non-empty line while(!feof(fei.file) && ((readTabAtom(fei, atoms, true, csv)>0) || atomsEmpty(atoms))) { vector<string>::iterator ii(atoms.begin()), ie(atoms.end()); while ((ii!=ie) && !(*ii).length()) ii++; if (ii==ie) atoms.clear(); else break; } if (!atoms.size()) return false; // Add an appropriate number of empty atoms, if needed while (atoms.size()<attributeTypes->size()) atoms.push_back(string("")); _ASSERT(exam.domain==domain); exam.removeMetas(); TExample::iterator ei(exam.begin()); TVarList::iterator vi(domain->attributes->begin()); vector<string>::iterator ai(atoms.begin()); TIntList::iterator si(attributeTypes->begin()), se(attributeTypes->end()); TIntList::iterator cb, cp, ce; if (classPoses) { cb = cp = classPoses->begin(); ce = classPoses->end(); } int pos=0; for (; (si!=se); pos++, si++, ai++) { if (*si) { // if attribute is not to be skipped and is not a basket string valstr; // Check for don't care valstr = *ai; if (!valstr.length() || (valstr == "NA") || (valstr == ".") || (DC && (valstr == DC))) valstr = "?"; else if ((valstr == "*") || (DK && (valstr == DK))) valstr = "~"; try { if (*si==-1) if (pos==classPos) { // if this is class value TValue cval; domain->classVar->filestr2val(valstr, cval, exam); exam.setClass(cval); } else if (classPoses && (cp != ce) && (pos == *cp)) { const int ind = cp - cb; domain->classVars->at(ind)->filestr2val(valstr, exam.values_end[ind], exam); cp++; } else { // if this is a normal value (*vi++)->filestr2val(valstr, *ei++, exam); } else { // if this is a meta value TMetaDescriptor *md = domain->metas[*si]; _ASSERT(md!=NULL); TValue mval; md->variable->filestr2val(valstr, mval, exam); exam.setMeta(*si, mval); } } catch (mlexception &err) { raiseError("file '%s', line '%i': %s", fei.filename.c_str(), fei.line, err.what()); } } // the attribute is marked to be skipped, but may also be a basket else { if (pos == basketPos) { TSplits splits; split(*ai, splits); ITERATE(TSplits, si, splits) basketFeeder->addItem(exam, string(si->first, si->second), fei.line); } } } if (pos==classPos) // if class is the last value in the line, it is set here domain->classVar->filestr2val(ai==atoms.end() ? "?" : *(ai++), exam[domain->variables->size()-1], exam); /* I'm not sure that this is needed; this code is a mess but I don't wish to waste time studying it since we are moving to 3.0 */ else if (classPoses && (cp != ce) && (pos == *cp)) { const int ind = cp - cb; domain->classVars->at(ind)->filestr2val(ai==atoms.end() ? "?" : *(ai++), exam.values_end[ind], exam); } while ((ai!=atoms.end()) && !(*ai).length()) ai++; // line must be empty from now on if (ai!=atoms.end()) { vector<string>::iterator ii=atoms.begin(); string s=*ii; while(++ii!=atoms.end()) s+=" "+*ii; raiseError("example of invalid length (%s)", s.c_str()); } return true; }