void PosteriorEstimator::estimatePEP( vector<pair<double, bool> >& combined, double pi0, vector<double>& peps, bool include_negative) { // Logistic regression on the data size_t nTargets = 0, nDecoys = 0; LogisticRegression lr; estimate(combined, lr); vector<double> xvals(0); vector<pair<double, bool> >::const_iterator elem = combined.begin(); for (; elem != combined.end(); ++elem) if (elem->second) { xvals.push_back(elem->first); ++nTargets; } else { if (include_negative) { xvals.push_back(elem->first); } ++nDecoys; } lr.predict(xvals, peps); #define OUTPUT_DEBUG_FILES #undef OUTPUT_DEBUG_FILES #ifdef OUTPUT_DEBUG_FILES ofstream drFile("decoyRate.all", ios::out), xvalFile("xvals.all", ios::out); ostream_iterator<double> drIt(drFile, "\n"), xvalIt(xvalFile, "\n"); copy(peps.begin(), peps.end(), drIt); copy(xvals.begin(), xvals.end(), xvalIt); #endif double factor = pi0 * ((double)nTargets / (double)nDecoys); double top = min(1.0, factor * exp(*max_element(peps.begin(), peps.end()))); vector<double>::iterator pep = peps.begin(); bool crap = false; for (; pep != peps.end(); ++pep) { if (crap) { *pep = top; continue; } *pep = factor * exp(*pep); if (*pep >= top) { *pep = top; crap = true; } } partial_sum(peps.rbegin(), peps.rend(), peps.rbegin(), mymin); }
void PosteriorEstimator::estimatePEPGeneralized( vector<pair<double, bool> >& combined, vector<double>& peps, bool include_negative) { // Logistic regression on the data size_t nTargets = 0, nDecoys = 0; LogisticRegression lr; estimate(combined, lr); vector<double> xvals(0); vector<pair<double, bool> >::const_iterator elem = combined.begin(); for (; elem != combined.end(); ++elem) { xvals.push_back(elem->first); if (elem->second) { ++nTargets; } else { if (include_negative) { xvals.push_back(elem->first); } ++nDecoys; } } lr.predict(xvals, peps); #ifdef OUTPUT_DEBUG_FILES ofstream drFile("decoyRate.all", ios::out), xvalFile("xvals.all", ios::out); ostream_iterator<double> drIt(drFile, "\n"), xvalIt(xvalFile, "\n"); copy(peps.begin(), peps.end(), drIt); copy(xvals.begin(), xvals.end(), xvalIt); #endif double top = exp(*max_element(peps.begin(), peps.end())); top = top/(1+top); bool crap = false; vector<double>::iterator pep = peps.begin(); for (; pep != peps.end(); ++pep) { if (crap) { *pep = top; continue; } // eg = p/(1-p) // eg - egp = p // p = eg/(1+eg) double eg = exp(*pep); *pep = eg/(1+eg); if (*pep >= top) { *pep = top; crap = true; } } partial_sum(peps.rbegin(), peps.rend(), peps.rbegin(), mymin); double high = *max_element(peps.begin(), peps.end()); double low = *min_element(peps.begin(), peps.end()); assert(high>low); if (VERB > 2) { cerr << "Highest generalized decoy rate =" << high << ", low rate = " << low << endl; } pep = peps.begin(); for (; pep != peps.end(); ++pep) { *pep = (*pep - low)/(high-low); } }