예제 #1
0
void FidoInterface::getEstimated_and_Empirical_FDR(
    const std::vector<std::vector<string> >& proteinNames,
    const std::vector<double>& probabilities,
    std::vector<double>& empq, 
    std::vector<double>& estq) {
  empq.clear();
  estq.clear();
  
  std::vector<std::pair<double, bool> > combined;
  std::vector<double> peps;
  for (unsigned int k = 0; k < proteinNames.size(); ++k) {
    double prob = probabilities[k];
    unsigned tpChange = countTargets(proteinNames[k]);
    unsigned fpChange = proteinNames[k].size() - tpChange;
    bool isDecoy = (tpChange == 0);
    combined.push_back(make_pair(probabilities[k], !isDecoy));
    peps.push_back(probabilities[k]);
  }
  
  if (usePi0_) {
    std::vector<double> pvals;
    PosteriorEstimator::getPValues(combined, pvals);
    pi0_ = PosteriorEstimator::estimatePi0(pvals);
  }
  
  PosteriorEstimator::setNegative(true); // also get q-values for decoys
  PosteriorEstimator::getQValuesFromPEP(peps, estq);
  PosteriorEstimator::getQValues(pi0_, combined, empq);
}
예제 #2
0
void FidoInterface::getEstimated_and_Empirical_FDR(
    const std::vector<std::vector<string> >& proteinNames,
    const std::vector<double>& probabilities,
    std::vector<double>& empq, 
    std::vector<double>& estq) {
  empq.clear();
  estq.clear();
  
  double targetDecoyRatio = 1.0;
  
  if (usePi0_) {
    targetDecoyRatio = static_cast<double>(numberTargetProteins_) / numberDecoyProteins_;
    std::vector<std::pair<double, bool> > combined;
    for (unsigned int k = 0; k < proteinNames.size(); ++k) {
      double prob = probabilities[k];
      unsigned tpChange = countTargets(proteinNames[k]);
      unsigned fpChange = proteinNames[k].size() - tpChange;
      bool isDecoy = (tpChange == 0);
      combined.push_back(make_pair(probabilities[k], !isDecoy));
    }
    
    std::vector<double> pvals;
    PosteriorEstimator::getPValues(combined, pvals);
    pi0_ = PosteriorEstimator::estimatePi0(pvals);
  }
  FDRCalculator fdrCalculator(usePi0_, targetDecoyRatio, pi0_ * absenceRatio_, countDecoyQvalue_);
  
  //NOTE no need to store more q values since they will not be taken into account while estimating MSE FDR divergence
  for (unsigned int k = 0; (k < proteinNames.size() && 
        (fdrCalculator.getPreviousEstQ() <= mseThreshold_)); k++) {
    double prob = probabilities[k];
    unsigned tpChange = countTargets(proteinNames[k]);
    unsigned fpChange = proteinNames[k].size() - tpChange;
    if (trivialGrouping_) {
      if (tpChange > 0) tpChange = 1;
      if (fpChange > 0) fpChange = 1;
    }
    fdrCalculator.calcFDRs(fpChange, tpChange, prob, empq, estq);
  }
  if (kUpdateRocN) rocN_ = fdrCalculator.getRocN();
}
예제 #3
0
void FidoInterface::updateTargetDecoySizes() {
  std::vector<std::vector<std::string> > proteinNames;
  proteinGraph_->getProteinNames(proteinNames);
  
  numberTargetProteins_ = 0;
  numberDecoyProteins_ = 0;
  for (unsigned int k = 0; k < proteinNames.size(); ++k) {
    unsigned tpChange = countTargets(proteinNames[k]);
    unsigned fpChange = proteinNames[k].size() - tpChange;
    if (trivialGrouping_) {
      if (tpChange > 0) numberTargetProteins_ += 1;
      if (fpChange > 0) numberDecoyProteins_ += 1;
    }
  }
}
예제 #4
0
void FidoInterface::getROC_AUC(const std::vector<std::vector<string> > &names,
            const std::vector<double> &probabilities, double &auc) {
  /* Estimate ROC auc1 area as : (So - no(no + 1) / 2) / (no*n1)
   * where no = number of target
   * where n1 = number of decoy
   * where So = SUM ri
   * where ri is the rank of i target in the ranked list of target and decoys
   */
  
  /* Estimate ROC auc2 area as : sum trapezoid area of each segment (integral of absolute value)
   * A_segment(i) = abs(X1-Xo) * abs((y1 + y2 ) / 2)
   * Where yo = number TP at segment i
   * Where y1 = number TP at segment i + 1
   * Where Xo = number FP at segment i
   * Where X1 = number FP at segment i + 1
   * Total Area = Total Area / total_TP * total_FP
   */
  
  /* Estimate ROC auc3 area as : sum trapezoid area with antiderivatives of each segment (absolute value of the integral)
   * A_segment(i) = ((yo - m*Xo)*X1 + m/2 * X1^2) - ((yo - m*Xo)*Xo - m/2 * X2^2))
   * Where yo = number TP at segment i
   * Where y1 = number TP at segment i + 1
   * Where Xo = number FP at segment i
   * Where X2 = number FP at segment i + 1
   * Where m = (y1 - y0) / (X1 - X0)
   * Total Area = abs(Total Area / total_TP * total_FP)
   */
  
  std::vector<bool> ranked_list; // true if is decoy
  std::vector<unsigned> tpArray,fpArray;
  
  unsigned prev_tp,prev_fp,tp,fp;
  prev_tp = prev_fp = tp = fp = 0;
  double prev_prob = -1;
  auc = 0.0;
  
  // assuming names and probabilities same size; rocN_ set by getEstimated_and_Empirical_FDR()
  for (unsigned k = 0; k < names.size() && fp <= rocN_; k++) {
    double prob = probabilities[k];
    unsigned tpChange = countTargets(names[k]);
    unsigned fpChange = names[k].size() - tpChange;
    //if ties activated count groups as 1 protein
    if (trivialGrouping_) {
      if (tpChange > 0) tpChange = 1;
      if (fpChange > 0) fpChange = 1;
    }
    tp += tpChange;
    fp += fpChange;
    //should only do it when fp changes and either of them is != 0
    if (prev_prob != -1 && fp != 0 && tp != 0 && fp != prev_fp) {
      double trapezoid = trapezoid_area(fp,prev_fp,tp,prev_tp);
      prev_fp = fp;
      prev_tp = tp;
      auc += trapezoid;
    }    
    prev_prob = prob;
  }

  unsigned normalizer = (tp * fp);
  
  if (normalizer > 0) {
    auc /= normalizer;
  } else {
    auc = 0.0;
  }
  
  return;
}
예제 #5
0
void FidoInterface::getEstimated_and_Empirical_FDR(const std::vector<std::vector<string> > &names,
							     const std::vector<double> &probabilities,
							     std::vector<double> &empq,
							     std::vector<double> &estq) 
{
  empq.clear();
  estq.clear();
  double fpCount = 0.0, tpCount = 0.0;
  double totalFDR = 0.0, estFDR = 0.0, empFDR = 0.0;
  double TargetDecoyRatio = (double)numberTargetProteins / (double)numberDecoyProteins;
  double previousEmpQ = 0.0;
  double previousEstQ = 0.0;
  
  if(updateRocN) rocN = 50;
  
  //NOTE no need to store more q values since they will not be taken into account while estimating MSE FDR divergence
  for (unsigned int k=0; (k<names.size() && (estFDR <= threshold)); k++)
    {
      double prob = probabilities[k];

      if(tiesAsOneProtein)
      {
	unsigned tpChange = countTargets(names[k]);
	unsigned fpChange = names[k].size() - tpChange;
	
	fpCount += (double)fpChange;
	tpCount += (double)tpChange;
	
	if(countDecoyQvalue)
	{
	  totalFDR += (prob) * (double)(tpChange + fpChange);
	  estFDR = totalFDR / (tpCount + fpCount);
	}
	else
	{
	  totalFDR += (prob) * (double)(tpChange);
	  estFDR = totalFDR / (tpCount);  
	}

	if(tpCount) empFDR = (fpCount * pi0 * TargetDecoyRatio) / tpCount; 
	
	if(empFDR > 1.0 || std::isnan(empFDR) || std::isinf(empFDR)) empFDR = 1.0;
	if(estFDR > 1.0 || std::isnan(estFDR) || std::isinf(estFDR)) estFDR = 1.0;
	    
	if(estFDR < previousEstQ) estFDR = previousEstQ;
	else previousEstQ = estFDR;
	    
	if(empFDR < previousEmpQ) empFDR = previousEmpQ;
	else previousEmpQ = empFDR;
	
	if(updateRocN)
	{ 
	  rocN = (unsigned)std::max(rocN,(unsigned)std::max(50,std::min((int)fpCount,500)));
	}
	
	estq.push_back(estFDR);
	empq.push_back(empFDR);

      }
      else
      {
	for(unsigned i=0; i<names[k].size(); i++)
	{
	    std::string protein = names[k][i];
	    
	    bool isdecoy = isDecoy(protein);
	    
	    if(isdecoy)
	    {
	      fpCount++;
	    }
	    else
	    {
	      tpCount++;
	    }
	    
	    if(countDecoyQvalue)
	    {
	      totalFDR += (prob);
	      estFDR = totalFDR / (tpCount + fpCount);
	    }
	    else if(tpCount)
	    {
	      if(!((bool)isdecoy)) totalFDR += (prob);
	      estFDR = totalFDR / (tpCount);
	    }
	    
	    if(tpCount) empFDR = (fpCount * pi0 * TargetDecoyRatio) / tpCount; 
	    
	    if(empFDR > 1.0 || std::isnan(empFDR) || std::isinf(empFDR)) empFDR = 1.0;
	    if(estFDR > 1.0 || std::isnan(estFDR) || std::isinf(estFDR)) estFDR = 1.0;
	    
	    if(estFDR < previousEstQ) estFDR = previousEstQ;
	    else previousEstQ = estFDR;
	    
	    if(empFDR < previousEmpQ) empFDR = previousEmpQ;
	    else previousEmpQ = empFDR;
	    
	    if(updateRocN)
	    {
	      rocN = (unsigned)std::max(rocN,(unsigned)std::max(50,std::min((int)fpCount,500)));
	    }
	    
	    estq.push_back(estFDR);
	    empq.push_back(empFDR);
	    
	 }
      }
	
    }
   
  return;
}