void CompNovoIdentificationBase::getCIDSpectrumLight_(PeakSpectrum & spec, const String & sequence, DoubleReal prefix, DoubleReal suffix)
  {
    static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight();
    Peak1D p;
    DoubleReal b_pos(0.0 + prefix);
    DoubleReal y_pos(h2o_mass + suffix);
    for (Size i = 0; i != sequence.size() - 1; ++i)
    {
      char aa(sequence[i]);
      b_pos += aa_to_weight_[aa];

      char aa2(sequence[sequence.size() - i - 1]);
      y_pos += aa_to_weight_[aa2];

      if (b_pos > min_mz_ && b_pos < max_mz_)
      {
        p.setPosition(b_pos + Constants::PROTON_MASS_U);
        p.setIntensity(1.0f);
        spec.push_back(p);
      }

      if (y_pos > min_mz_ && y_pos < max_mz_)
      {
        p.setPosition(y_pos + Constants::PROTON_MASS_U);
        p.setIntensity(1.0f);
        spec.push_back(p);
      }
    }

    spec.sortByPosition();
    return;
  }
 void SuffixArrayPeptideFinder::getCandidates(vector<vector<pair<SuffixArrayPeptideFinder::FASTAEntry, String> > >& candidates, const String& DTA_file)
 {
   DTAFile dta_file;
   PeakSpectrum s;
   dta_file.load(DTA_file, s);
   s.sortByPosition();
   PeakSpectrum::ConstIterator it(s.begin());
   vector<double> spec;
   for (; it != s.end(); ++it)
   {
     spec.push_back(it->getPosition()[0]);
   }
   const vector<double> specc(spec);
   getCandidates(candidates, specc);
   return;
 }
  void CompNovoIdentificationBase::windowMower_(PeakSpectrum & spec, DoubleReal windowsize, Size no_peaks)
  {
    PeakSpectrum copy(spec);
    vector<Peak1D> to_be_deleted;
    for (Size i = 0; i < spec.size(); ++i)
    {
      PeakSpectrum sub_spec;
      bool end(false);
      for (Size j = i; spec[j].getPosition()[0] - spec[i].getPosition()[0] < windowsize; )
      {
        sub_spec.push_back(spec[j]);
        if (++j == spec.size())
        {
          end = true;
          break;
        }
      }

      sub_spec.sortByIntensity(true);

      for (Size k = no_peaks; k < sub_spec.size(); ++k)
      {
        Peak1D p(sub_spec[k]);
        to_be_deleted.push_back(p);
      }

      if (end)
      {
        break;
      }
    }

    spec.clear(false);
    for (PeakSpectrum::ConstIterator it = copy.begin(); it != copy.end(); ++it)
    {
      if (find(to_be_deleted.begin(), to_be_deleted.end(), *it) == to_be_deleted.end())
      {
        spec.push_back(*it);
      }
    }

    spec.sortByPosition();

  }
Beispiel #4
0
 PeptideHit AScore::compute(const PeptideHit & hit, PeakSpectrum & real_spectrum, double fragment_mass_tolerance, bool fragment_mass_unit_ppm, Size max_peptide_len, Size max_num_perm)
 {
   PeptideHit phospho = hit;
   
   //reset phospho
   phospho.setScore(-1);
   if (real_spectrum.empty())
   {
     return phospho;
   }
   
   String sequence_str = phospho.getSequence().toString();
   
   Size number_of_phosphorylation_events = numberOfPhosphoEvents_(sequence_str);
   AASequence seq_without_phospho = removePhosphositesFromSequence_(sequence_str);
   
   if (seq_without_phospho.toUnmodifiedString().size() > max_peptide_len)
   {
     LOG_DEBUG << "\tcalculation aborted: peptide too long: " << seq_without_phospho.toString() << std::endl;
     return phospho;
   }
   
   // determine all phospho sites
   vector<Size> sites(getSites_(seq_without_phospho));
   Size number_of_STY = sites.size();
   
   if (number_of_phosphorylation_events == 0 || number_of_STY == 0 || number_of_STY == number_of_phosphorylation_events)
   {
     return phospho;
   }
   
   vector<vector<Size> > permutations(computePermutations_(sites, (Int)number_of_phosphorylation_events));
   LOG_DEBUG << "\tnumber of permutations: " << permutations.size() << std::endl;
   
   // TODO: using a heuristic to calculate the best phospho sites if the number of permutations are exceeding the maximum.
   // A heuristic could be to calculate the best site for the first phosphorylation and based on this the best site for the second 
   // phosphorylation and so on until every site is determined
   if (permutations.size() > max_num_perm) 
   {
     LOG_DEBUG << "\tcalculation aborted: number of permutations exceeded" << std::endl;
     return phospho;
   }
     
   vector<PeakSpectrum> th_spectra(createTheoreticalSpectra_(permutations, seq_without_phospho));
   
   // prepare real spectrum windows
   if (!real_spectrum.isSorted())
   {
     real_spectrum.sortByPosition();
   }
   vector<PeakSpectrum> windows_top10(peakPickingPerWindowsInSpectrum_(real_spectrum));
   
   // calculate peptide score for each possible phospho site permutation
   vector<vector<double> > peptide_site_scores(calculatePermutationPeptideScores_(th_spectra, windows_top10, fragment_mass_tolerance, fragment_mass_unit_ppm));
   
   // rank peptide permutations ascending
   multimap<double, Size> ranking(rankWeightedPermutationPeptideScores_(peptide_site_scores));
   
   multimap<double, Size>::reverse_iterator rev = ranking.rbegin();
   String seq1 = th_spectra[rev->second].getName();
   phospho.setSequence(AASequence::fromString(seq1));
   phospho.setMetaValue("search_engine_sequence", hit.getSequence().toString());
   
   double peptide1_score = rev->first;
   phospho.setMetaValue("AScore_pep_score", peptide1_score); // initialize score with highest peptide score (aka highest weighted score)
   
   ++rev;
   String seq2 = th_spectra[rev->second].getName();
   double peptide2_score = rev->first;
   
   vector<ProbablePhosphoSites> phospho_sites;
   determineHighestScoringPermutations_(peptide_site_scores, phospho_sites, permutations, ranking);
   
   Int rank = 1;
   double best_Ascore = std::numeric_limits<double>::max(); // the lower the better
   for (vector<ProbablePhosphoSites>::iterator s_it = phospho_sites.begin(); s_it != phospho_sites.end(); ++s_it)
   {
     double Ascore = 0;
     if (peptide1_score == peptide2_score) // set Ascore = 0 for each phosphorylation site
     {
       LOG_DEBUG << "\tscore of best (" << seq1 << ") and second best peptide (" << seq2 << ") are equal (" << peptide1_score << ")" << std::endl;
     }
     else
     {
       vector<PeakSpectrum> site_determining_ions;
       
       computeSiteDeterminingIons_(th_spectra, *s_it, site_determining_ions, fragment_mass_tolerance, fragment_mass_unit_ppm);
       Size N = site_determining_ions[0].size(); // all possibilities have the same number so take the first one
       double p = static_cast<double>(s_it->peak_depth) / 100.0;
       
       Size n_first = 0; // number of matching peaks for first peptide
       for (Size window_idx = 0; window_idx != windows_top10.size(); ++window_idx) // for each 100 m/z window
       {
         n_first += numberOfMatchedIons_(site_determining_ions[0], windows_top10[window_idx], s_it->peak_depth, fragment_mass_tolerance, fragment_mass_unit_ppm);        
       }
       double P_first = computeCumulativeScore_(N, n_first, p);
       
       Size n_second = 0; // number of matching peaks for second peptide
       for (Size window_idx = 0; window_idx <  windows_top10.size(); ++window_idx) //each 100 m/z window
       {
         n_second += numberOfMatchedIons_(site_determining_ions[1], windows_top10[window_idx], s_it->peak_depth, fragment_mass_tolerance, fragment_mass_unit_ppm);        
       }
       Size N2 = site_determining_ions[1].size(); // all possibilities have the same number so take the first one
       double P_second = computeCumulativeScore_(N2, n_second, p);
       
       //abs is used to avoid -0 score values
       double score_first = abs(-10 * log10(P_first));
       double score_second = abs(-10 * log10(P_second));
       
       LOG_DEBUG << "\tfirst - N: " << N << ",p: " << p << ",n: " << n_first << ", score: " << score_first << std::endl;
       LOG_DEBUG << "\tsecond - N: " << N2 << ",p: " << p << ",n: " << n_second << ", score: " << score_second << std::endl;
       
       Ascore = score_first - score_second;
       LOG_DEBUG << "\tAscore_" << rank << ": " << Ascore << std::endl;
     }
     if (Ascore < best_Ascore)
     {
       best_Ascore = Ascore;
     }
     phospho.setMetaValue("AScore_" + String(rank), Ascore);
     ++rank;      
   }
   phospho.setScore(best_Ascore);
   return phospho;
 }
  void CompNovoIdentificationBase::getCIDSpectrum_(PeakSpectrum & spec, const String & sequence, Size charge, DoubleReal prefix, DoubleReal suffix)
  {
    static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight();
    static DoubleReal nh3_mass = EmpiricalFormula("NH3").getMonoWeight();
    static DoubleReal co_mass = EmpiricalFormula("CO").getMonoWeight();
    Peak1D p;
    DoubleReal b_pos(0 + prefix);
    DoubleReal y_pos(h2o_mass + suffix);
    bool b_H2O_loss(false), b_NH3_loss(false), y_NH3_loss(false);

    for (Size i = 0; i != sequence.size() - 1; ++i)
    {
      char aa(sequence[i]);
      b_pos += aa_to_weight_[aa];

      char aa2(sequence[sequence.size() - i - 1]);
      y_pos += aa_to_weight_[aa2];
      for (Size z = 1; z <= charge && z < 3; ++z)
      {
        // b-ions
        if (b_pos >= min_mz_ && b_pos <= max_mz_)
        {
          for (Size j = 0; j != max_isotope_; ++j)
          {
            if (z == 1 /*|| b_pos > MIN_DOUBLE_MZ*/)
            {
              p.setPosition((b_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j + Constants::NEUTRON_MASS_U) / (DoubleReal)z);
              p.setIntensity(isotope_distributions_[(Size)b_pos][j] * 0.8 / (z * z));
              spec.push_back(p);
            }
          }
        }

        // b-ion losses
        if (b_pos - h2o_mass > min_mz_ && b_pos - h2o_mass < max_mz_)
        {
          if (b_H2O_loss || aa == 'S' || aa == 'T' || aa == 'E' || aa == 'D')
          {
            b_H2O_loss = true;
            p.setPosition((b_pos + z * Constants::PROTON_MASS_U - h2o_mass) / z);
            p.setIntensity(0.02 / (DoubleReal)(z * z));
            if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/)
            {
              spec.push_back(p);
            }
          }
          if (b_NH3_loss || aa == 'Q' || aa == 'N' || aa == 'R' || aa == 'K')
          {
            b_NH3_loss = true;
            p.setPosition((b_pos + z * Constants::PROTON_MASS_U - nh3_mass) / z);
            p.setIntensity(0.02 / (DoubleReal)(z * z));

            if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/)
            {
              spec.push_back(p);
            }
          }
        }

        // a-ions only for charge 1
        if (z == 1)
        {
          if (b_pos - co_mass > min_mz_ && b_pos - co_mass < max_mz_)
          {
            // a-ions
            p.setPosition((b_pos + z * Constants::PROTON_MASS_U - co_mass) / (DoubleReal)z);
            p.setIntensity(0.1f);
            spec.push_back(p);
          }
        }



        if (y_pos > min_mz_ && y_pos < max_mz_)
        {
          // y-ions
          for (Size j = 0; j != max_isotope_; ++j)
          {
            if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/)
            {
              p.setPosition((y_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j * Constants::NEUTRON_MASS_U) / (DoubleReal)z);
              p.setIntensity(isotope_distributions_[(Size)y_pos][j] / (DoubleReal) (z * z));
              spec.push_back(p);
            }
          }

          // H2O loss
          p.setPosition((y_pos + z * Constants::PROTON_MASS_U - h2o_mass) / (DoubleReal)z);
          p.setIntensity(0.1 / (DoubleReal)(z * z));
          if (aa2 == 'Q')           // pyroglutamic acid formation
          {
            p.setIntensity(0.5f);
          }
          if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/)
          {
            spec.push_back(p);
          }

          // NH3 loss
          if (y_NH3_loss || aa2 == 'Q' || aa2 == 'N' || aa2 == 'R' || aa2 == 'K')
          {
            y_NH3_loss = true;
            p.setPosition((y_pos + z * Constants::PROTON_MASS_U - nh3_mass) / (DoubleReal)z);
            p.setIntensity(0.1 / (DoubleReal)(z * z));

            if (z == 1 /*|| y_pos > MIN_DOUBLE_MZ*/)
            {
              spec.push_back(p);
            }
          }
        }
      }
    }

    // if Q1 abundant loss of water -> pyroglutamic acid formation

    if (sequence[0] == 'Q' && prefix == 0 && suffix == 0)
    {
      /*
      for (PeakSpectrum::Iterator it = spec.begin(); it != spec.end(); ++it)
      {
          it->setIntensity(it->getIntensity() * 0.5);
      }*/

      /*
      for (Size j = 0; j != max_isotope; ++j)
      {
  p.setPosition((precursor_weight + charge - 1 + j)/(DoubleReal)charge);
  p.setIntensity(isotope_distributions_[(Int)p.getPosition()[0]][j] * 0.1);
  spec.push_back(p);
      }
      */
    }


    spec.sortByPosition();

    return;
  }