void CompNovoIdentificationBase::getCIDSpectrumLight_(PeakSpectrum & spec, const String & sequence, DoubleReal prefix, DoubleReal suffix)
  {
    static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight();
    Peak1D p;
    DoubleReal b_pos(0.0 + prefix);
    DoubleReal y_pos(h2o_mass + suffix);
    for (Size i = 0; i != sequence.size() - 1; ++i)
    {
      char aa(sequence[i]);
      b_pos += aa_to_weight_[aa];

      char aa2(sequence[sequence.size() - i - 1]);
      y_pos += aa_to_weight_[aa2];

      if (b_pos > min_mz_ && b_pos < max_mz_)
      {
        p.setPosition(b_pos + Constants::PROTON_MASS_U);
        p.setIntensity(1.0f);
        spec.push_back(p);
      }

      if (y_pos > min_mz_ && y_pos < max_mz_)
      {
        p.setPosition(y_pos + Constants::PROTON_MASS_U);
        p.setIntensity(1.0f);
        spec.push_back(p);
      }
    }

    spec.sortByPosition();
    return;
  }
Ejemplo n.º 2
0
 std::vector<PeakSpectrum> AScore::peakPickingPerWindowsInSpectrum_(PeakSpectrum &real_spectrum) const
 {
   vector<PeakSpectrum> windows_top10;
   
   double spect_lower_bound = floor(real_spectrum.front().getMZ() / 100) * 100;
   double spect_upper_bound = ceil(real_spectrum.back().getMZ() / 100) * 100;
   
   Size number_of_windows = static_cast<Size>(ceil((spect_upper_bound - spect_lower_bound) / 100));
   windows_top10.resize(number_of_windows);
   
   PeakSpectrum::Iterator it_current_peak = real_spectrum.begin();
   Size window_upper_bound(spect_lower_bound + 100);
   
   for (Size current_window = 0; current_window < number_of_windows; ++current_window)
   {
     PeakSpectrum real_window;
     while (((*it_current_peak).getMZ() <= window_upper_bound) && (it_current_peak < real_spectrum.end()))
     {
       real_window.push_back(*it_current_peak);
       ++it_current_peak;
     }
     
     real_window.sortByIntensity(true);
     for (Size i = 0; (i < 10) & (i < real_window.size()); ++i)
     {
       windows_top10[current_window].push_back(real_window[i]);
     }
     
     window_upper_bound += 100;
   }
   return windows_top10;
 }
  double SpectrumAlignmentScore::operator()(const PeakSpectrum & s1, const PeakSpectrum & s2) const
  {
    const double tolerance = (double)param_.getValue("tolerance");
    bool is_relative_tolerance = param_.getValue("is_relative_tolerance").toBool();
    bool use_linear_factor = param_.getValue("use_linear_factor").toBool();
    bool use_gaussian_factor = param_.getValue("use_gaussian_factor").toBool();

    if (use_linear_factor && use_gaussian_factor)
    {
      cerr << "Warning: SpectrumAlignmentScore, use either 'use_linear_factor' or 'use_gaussian_factor'!" << endl;
    }

    SpectrumAlignment aligner;
    Param p;
    p.setValue("tolerance", tolerance);
    p.setValue("is_relative_tolerance", (String)param_.getValue("is_relative_tolerance"));
    aligner.setParameters(p);

    vector<pair<Size, Size> > alignment;
    aligner.getSpectrumAlignment(alignment, s1, s2);

    double score(0), sum(0), sum1(0), sum2(0);
    for (PeakSpectrum::ConstIterator it1 = s1.begin(); it1 != s1.end(); ++it1)
    {
      sum1 += it1->getIntensity() * it1->getIntensity();
    }

    for (PeakSpectrum::ConstIterator it1 = s2.begin(); it1 != s2.end(); ++it1)
    {
      sum2 += it1->getIntensity() * it1->getIntensity();
    }

    for (vector<pair<Size, Size> >::const_iterator it = alignment.begin(); it != alignment.end(); ++it)
    {
      //double factor(0.0);
      //factor = (epsilon - fabs(s1[it->first].getPosition()[0] - s2[it->second].getPosition()[0])) / epsilon;
      double mz_tolerance(tolerance);

      if (is_relative_tolerance)
      {
        mz_tolerance = mz_tolerance * s1[it->first].getPosition()[0] / 1e6;
      }

      double mz_difference(fabs(s1[it->first].getPosition()[0] - s2[it->second].getPosition()[0]));
      double factor = 1.0;

      if (use_linear_factor || use_gaussian_factor)
      {
        factor = getFactor_(mz_tolerance, mz_difference, use_gaussian_factor);
      }
      sum += sqrt(s1[it->first].getIntensity() * s2[it->second].getIntensity() * factor);
    }

    score = sum / (sqrt(sum1 * sum2));

    return score;
  }
Ejemplo n.º 4
0
  double XQuestScores::xCorrelationPrescore(const PeakSpectrum & spec1, const PeakSpectrum & spec2, double tolerance)
  {
    // return 0 = no correlation, when one of the spectra is empty
    if (spec1.size() == 0 || spec2.size() == 0) {
      return 0.0;
    }

    double maxionsize = std::max(spec1[spec1.size()-1].getMZ(), spec2[spec2.size()-1].getMZ());
    Int table_size = ceil(maxionsize / tolerance)+1;
    std::vector< double > ion_table1(table_size, 0);
    std::vector< double > ion_table2(table_size, 0);

    // Build tables of the same size, each bin has the size of the tolerance
    for (Size i = 0; i < spec1.size(); ++i)
    {
      Size pos = static_cast<Size>(ceil(spec1[i].getMZ() / tolerance));
      ion_table1[pos] = 1;
    }
    for (Size i = 0; i < spec2.size(); ++i)
    {
      Size pos =static_cast<Size>(ceil(spec2[i].getMZ() / tolerance));
      ion_table2[pos] = 1;

    }

    double dot_product = 0.0;
    for (Size i = 0; i < ion_table1.size(); ++i)
    {
      dot_product += ion_table1[i] * ion_table2[i];
    }

    // determine the smaller spectrum and normalize by the number of peaks in it
    double peaks = std::min(spec1.size(), spec2.size());
    return dot_product / peaks;
  }
Ejemplo n.º 5
0
 void SuffixArrayPeptideFinder::getCandidates(vector<vector<pair<SuffixArrayPeptideFinder::FASTAEntry, String> > >& candidates, const String& DTA_file)
 {
   DTAFile dta_file;
   PeakSpectrum s;
   dta_file.load(DTA_file, s);
   s.sortByPosition();
   PeakSpectrum::ConstIterator it(s.begin());
   vector<double> spec;
   for (; it != s.end(); ++it)
   {
     spec.push_back(it->getPosition()[0]);
   }
   const vector<double> specc(spec);
   getCandidates(candidates, specc);
   return;
 }
Ejemplo n.º 6
0
  map<Size, PeakSpectrum > PScore::calculatePeakLevelSpectra(const PeakSpectrum& spec, const vector<Size>& ranks, Size min_level, Size max_level)
  {
    map<Size, MSSpectrum<Peak1D> > peak_level_spectra;

    if (spec.empty()) return peak_level_spectra;

    // loop over all peaks and associated (zero-based) ranks
    for (Size i = 0; i != ranks.size(); ++i)
    {
      // start at the highest (less restrictive) level
      for (int j = static_cast<int>(max_level); j >= static_cast<int>(min_level); --j)
      {
        // if the current peak is annotated to have lower or equal rank then allowed for this peak level add it
        if (static_cast<int>(ranks[i]) <= j)
        {
          peak_level_spectra[j].push_back(spec[i]);
        }
        else
        {
          // if the current peak has higher rank than the current level then all it is also to high for the lower levels
          break;
        }
      }
    }
    return peak_level_spectra;
  }
  void CompNovoIdentificationBase::windowMower_(PeakSpectrum & spec, DoubleReal windowsize, Size no_peaks)
  {
    PeakSpectrum copy(spec);
    vector<Peak1D> to_be_deleted;
    for (Size i = 0; i < spec.size(); ++i)
    {
      PeakSpectrum sub_spec;
      bool end(false);
      for (Size j = i; spec[j].getPosition()[0] - spec[i].getPosition()[0] < windowsize; )
      {
        sub_spec.push_back(spec[j]);
        if (++j == spec.size())
        {
          end = true;
          break;
        }
      }

      sub_spec.sortByIntensity(true);

      for (Size k = no_peaks; k < sub_spec.size(); ++k)
      {
        Peak1D p(sub_spec[k]);
        to_be_deleted.push_back(p);
      }

      if (end)
      {
        break;
      }
    }

    spec.clear(false);
    for (PeakSpectrum::ConstIterator it = copy.begin(); it != copy.end(); ++it)
    {
      if (find(to_be_deleted.begin(), to_be_deleted.end(), *it) == to_be_deleted.end())
      {
        spec.push_back(*it);
      }
    }

    spec.sortByPosition();

  }
Ejemplo n.º 8
0
  double XQuestScores::logOccupancyProb(const PeakSpectrum& theoretical_spec,  const Size matched_size, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm)
  {
    using boost::math::binomial;
    Size theo_size = theoretical_spec.size();

    if (matched_size < 1 || theo_size < 1)
    {
      return 0;
    }

    double range;
    double used_tolerance;

    if (fragment_mass_tolerance_unit_ppm)
    {
      range = std::log(theoretical_spec.back().getMZ()) - std::log(theoretical_spec[0].getMZ());
      used_tolerance = fragment_mass_tolerance / 1e6;
    }
    else
    {
      range = theoretical_spec.back().getMZ() - theoretical_spec[0].getMZ();
      used_tolerance = fragment_mass_tolerance;
    }

    // A priori probability of a random match given info about the theoretical spectrum
    double a_priori_p = 0;
    a_priori_p = 1 - pow(1 - 2 * used_tolerance / range,  static_cast<double>(theo_size));

    double log_occu_prob = 0;
    binomial flip(theo_size, a_priori_p);
    // min double number to avoid 0 values, causing scores with the value "inf"
    log_occu_prob = -log(1 - cdf(flip, matched_size) + std::numeric_limits<double>::min());

    // score lower than 0 does not make sense, but can happen, if cfd = 0, then -log( 1 + <double>::min() ) < 0
    if (log_occu_prob >= 0.0)
    {
      return log_occu_prob;
    }
    else // underflow warning?
    {
      return 0;
    }
  }
Ejemplo n.º 9
0
  std::vector< double > XQuestScores::xCorrelation(const PeakSpectrum & spec1, const PeakSpectrum & spec2, Int maxshift, double tolerance)
  {
    // generate vector of results, filled with zeroes
    std::vector< double > results(maxshift * 2 + 1, 0);

    // return 0 = no correlation, when one of the spectra is empty
    if (spec1.size() == 0 || spec2.size() == 0) {
      return results;
    }

    double maxionsize = std::max(spec1[spec1.size()-1].getMZ(), spec2[spec2.size()-1].getMZ());
    Int table_size = ceil(maxionsize / tolerance)+1;
    std::vector< double > ion_table1(table_size, 0);
    std::vector< double > ion_table2(table_size, 0);

    // Build tables of the same size, each bin has the size of the tolerance
    for (Size i = 0; i < spec1.size(); ++i)
    {
      Size pos = static_cast<Size>(ceil(spec1[i].getMZ() / tolerance));
      ion_table1[pos] = 10.0;
    }
    for (Size i = 0; i < spec2.size(); ++i)
    {
      Size pos =static_cast<Size>(ceil(spec2[i].getMZ() / tolerance));
      ion_table2[pos] = 10.0;
    }

    // Compute means
    double mean1 = (std::accumulate(ion_table1.begin(), ion_table1.end(), 0.0)) / table_size;
    double mean2 = (std::accumulate(ion_table2.begin(), ion_table2.end(), 0.0)) / table_size;

    // Compute denominator
    double s1 = 0;
    double s2 = 0;
    for (Int i = 0; i < table_size; ++i)
    {
      s1 += pow((ion_table1[i] - mean1), 2);
      s2 += pow((ion_table2[i] - mean2), 2);
    }
    double denom = sqrt(s1 * s2);

    // Calculate correlation for each shift
    for (Int shift = -maxshift; shift <= maxshift; ++shift)
    {
      double s = 0;
      for (Int i = 0; i < table_size; ++i)
      {
        Int j = i + shift;
        if ( (j >= 0) && (j < table_size))
        {
          s += (ion_table1[i] - mean1) * (ion_table2[j] - mean2);
        }
      }
      if (denom > 0)
      {
        results[shift + maxshift] = s / denom;
      }
    }
    return results;
  }
  /**
  @brief Similarity pairwise score

  This function return the similarity score of two spectra based on SteinScott.

  @param s1  const PeakSpectrum Spectrum 1
  @param s2  const PeakSpectrum Spectrum 2
  @see SteinScottImproveScore()
  */
  double SteinScottImproveScore::operator()(const PeakSpectrum & s1, const PeakSpectrum & s2) const
  {
    const double epsilon = (double)param_.getValue("tolerance");
    const double constant = epsilon / 10000;

    //const double c(0.0004);
    double score(0), sum(0), sum1(0), sum2(0), sum3(0), sum4(0);
    /* std::cout << s1 << std::endl;
    std::cout << std::endl;
    std::cout << s2 << std::endl;*/

    for (PeakSpectrum::ConstIterator it1 = s1.begin(); it1 != s1.end(); ++it1)
    {
      double temp = it1->getIntensity();
      sum1 += temp * temp;
      sum3 += temp;
    }

    for (PeakSpectrum::ConstIterator it1 = s2.begin(); it1 != s2.end(); ++it1)
    {
      double temp = it1->getIntensity();
      sum2 += temp * temp;
      sum4 += temp;
    }
    double z = constant * (sum3 * sum4);
    Size j_left(0);
    for (Size i = 0; i != s1.size(); ++i)
    {
      for (Size j = j_left; j != s2.size(); ++j)
      {
        double pos1(s1[i].getMZ()), pos2(s2[j].getMZ());
        if (std::abs(pos1 - pos2) <= 2 * epsilon)
        {
          sum += s1[i].getIntensity() * s2[j].getIntensity();
        }
        else
        {
          if (pos2 > pos1)
          {
            break;
          }
          else
          {
            j_left = j;
          }
        }
      }
    }
    //std::cout<< sum << " Sum " << z << " z " << std::endl;
    score = (sum - z) / (std::sqrt((sum1 * sum2)));
    // std::cout<<score<< " score" << std::endl;
    if (score < (float)param_.getValue("threshold"))
    {
      score = 0;
    }

    return score;
  }
Ejemplo n.º 11
0
  double XQuestScores::matchOddsScore(const PeakSpectrum& theoretical_spec,  const Size matched_size, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm, bool is_xlink_spectrum, Size n_charges)
  {
    using boost::math::binomial;
    Size theo_size = theoretical_spec.size();

    if (matched_size < 1 || theo_size < 1)
    {
      return 0;
    }

    double range = theoretical_spec[theo_size-1].getMZ() -  theoretical_spec[0].getMZ();

    // Compute fragment tolerance in Da for the mean of MZ values, if tolerance in ppm (rough approximation)
    double mean = 0.0;
    for (Size i = 0; i < theo_size; ++i)
    {
      mean += theoretical_spec[i].getMZ();
    }
    mean = mean / theo_size;
    double tolerance_Th = fragment_mass_tolerance_unit_ppm ? mean * 1e-6 * fragment_mass_tolerance : fragment_mass_tolerance;

    // A priori probability of a random match given info about the theoretical spectrum
    double a_priori_p = 0;

    if (is_xlink_spectrum)
    {
      a_priori_p = (1 - ( pow( (1 - 2 * tolerance_Th / (0.5 * range)),  (static_cast<double>(theo_size) / static_cast<double>(n_charges)))));
    }
    else
    {
      a_priori_p = (1 - ( pow( (1 - 2 * tolerance_Th / (0.5 * range)),  static_cast<int>(theo_size))));
    }

    double match_odds = 0;

    binomial flip(theo_size, a_priori_p);
    // min double number to avoid 0 values, causing scores with the value "inf"
    match_odds = -log(1 - cdf(flip, matched_size) + std::numeric_limits<double>::min());

    // score lower than 0 does not make sense, but can happen if cfd = 0, -log( 1 + min() ) < 0
    if (match_odds >= 0.0)
    {
      return match_odds;
    }
    else
    {
      return 0;
    }
  }
  // s1 should be the original spectrum
  DoubleReal CompNovoIdentificationBase::compareSpectra_(const PeakSpectrum & s1, const PeakSpectrum & s2)
  {
    DoubleReal score(0.0);

    PeakSpectrum::ConstIterator it1 = s1.begin();
    PeakSpectrum::ConstIterator it2 = s2.begin();

    Size num_matches(0);
    while (it1 != s1.end() && it2 != s2.end())
    {
      DoubleReal pos1(it1->getPosition()[0]), pos2(it2->getPosition()[0]);
      if (fabs(pos1 - pos2) < fragment_mass_tolerance_)
      {
        score += it1->getIntensity();
        ++num_matches;
      }

      if (pos1 <= pos2)
      {
        ++it1;
      }
      else
      {
        ++it2;
      }
    }

    if (num_matches == 0)
    {
      return 0;
    }

    score /= sqrt((DoubleReal)num_matches);

    return score;
  }
Ejemplo n.º 13
0
 Size AScore::numberOfMatchedIons_(const PeakSpectrum & th, const PeakSpectrum & window, Size depth, double fragment_mass_tolerance, bool fragment_mass_tolerance_ppm) const
 {
   PeakSpectrum window_reduced = window;
   if (window_reduced.size() > depth)
   {
     window_reduced.resize(depth);
   }
   
   window_reduced.sortByPosition();
   Size n = 0;
   for (Size i = 0; i < th.size(); ++i)
   {
     Size nearest_peak = -1;
     try
     {
       nearest_peak = window_reduced.findNearest(th[i].getMZ());
     }
     catch (Exception::Precondition) {}
     
     if (nearest_peak < window_reduced.size())
     {
       double window_mz = window_reduced[nearest_peak].getMZ();
       double error = abs(window_mz - th[i].getMZ());
       
       if (fragment_mass_tolerance_ppm)
       {
         error = error / window_mz * 1e6;
       }
       if (error < fragment_mass_tolerance)
       {
         ++n;
       }
     }      
   }
   return n;
 }
Ejemplo n.º 14
0
  void CompNovoIonScoring::scoreETDFeatures_(Size /*charge*/, double precursor_weight, Map<double, IonScore> & ion_scores, const PeakSpectrum & CID_spec, const PeakSpectrum & ETD_spec)
  {
    //double fragment_mass_tolerance((double)param_.getValue("fragment_mass_tolerance"));
    Size max_isotope_to_score(param_.getValue("max_isotope_to_score"));

    for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1)
    {
      double pos1(it1->getPosition()[0]);
      double b_sum(0.0), y_sum(0.0);

      // score a-ions
      for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2)
      {
        double pos2(it2->getPosition()[0]);
        if (fabs(pos1 - pos2 - 28.0) < fragment_mass_tolerance_)
        {
          double factor((fragment_mass_tolerance_ - fabs(pos1 - pos2 - 28.0)) / fragment_mass_tolerance_);
#ifdef SCORE_ETDFEATURES_DEBUG
          cerr << "scoreETDFeatures: found a-ion " << pos1 << " (" << pos2 << ") (factor=" << factor << ") " << b_sum << " -> ";
#endif
          b_sum += it2->getIntensity() * factor;
#ifdef SCORE_ETDFEATURES_DEBUG
          cerr << endl;
#endif
        }
      }

      for (PeakSpectrum::ConstIterator it2 = ETD_spec.begin(); it2 != ETD_spec.end(); ++it2)
      {
        double pos2(it2->getPosition()[0]);

        // check if pos2 is precursor doubly charged, which has not fragmented
        double pre_diff_lower = (precursor_weight + Constants::PROTON_MASS_U) / 2.0 - fragment_mass_tolerance_;
        double pre_diff_upper = (precursor_weight + 4.0 * Constants::PROTON_MASS_U) / 2.0 + fragment_mass_tolerance_;
        if (pos2 > pre_diff_lower && pos2 < pre_diff_upper)
        {
#ifdef SCORE_ETDFEATURES_DEBUG
          cerr << "scoreETDFeatures: pre-range: " << pos2 << " is in precursor peak range: " << pre_diff_lower << " <-> " << pre_diff_upper << endl;
#endif
          continue;
        }

        //double diff(pos2 - pos1);

        // pos1 is CID ion; pos2 is ETD ion
        // pos1 b-ion, pos2 c-ion
        if (fabs(pos1 + 17.0 - pos2) < fragment_mass_tolerance_)
        {
          // now test if the ETD peak has "isotope" pattern
          double factor((fragment_mass_tolerance_ - fabs(pos1 + 17.0 - pos2)) / fragment_mass_tolerance_);
#ifdef SCORE_ETDFEATURES_DEBUG
          cerr << "scoreETDFeatures: is b-ion: " << pos1 << " (" << pos2 << ") (factor=" << factor << ") " << b_sum << " -> ";
#endif
          vector<double> iso_pattern;
          iso_pattern.push_back(it1->getIntensity());
          double actual_pos = it1->getPosition()[0];
          for (PeakSpectrum::ConstIterator it3 = it2; it3 != ETD_spec.end(); ++it3)
          {
            double it3_pos(it3->getPosition()[0]);
            if (fabs(fabs(actual_pos - it3_pos) - Constants::NEUTRON_MASS_U) < fragment_mass_tolerance_)
            {
              iso_pattern.push_back(it3->getIntensity());
              actual_pos = it3_pos;
            }
            if (iso_pattern.size() == max_isotope_to_score)
            {
              break;
            }
          }

          if (ion_scores[it1->getPosition()[0]].is_isotope_1_mono != -1)
          {
            b_sum += it2->getIntensity() * iso_pattern.size() * factor;
          }
#ifdef SCORE_ETDFEATURES_DEBUG
          cerr << b_sum << endl;
#endif
        }



        // pos1 z-ion, pos2 y-ion
        if (fabs(pos2 + 16.0 - pos1) < fragment_mass_tolerance_)
        {
          double factor((fragment_mass_tolerance_ - fabs(pos2 + 16.0 - pos1)) / fragment_mass_tolerance_);
          // now test if the ETD peak has "isotope" pattern
#ifdef SCORE_ETDFEATURES_DEBUG
          cerr << "scoreETDFeatures: is y-ion: " << pos1 << " (" << pos2 << ") (factor=" << factor << ") " << y_sum << " -> ";
#endif
          vector<double> iso_pattern;
          iso_pattern.push_back(it1->getIntensity());
          double actual_pos = it1->getPosition()[0];
          for (PeakSpectrum::ConstIterator it3 = it2; it3 != ETD_spec.end(); ++it3)
          {
            double it3_pos(it3->getPosition()[0]);
            if (fabs(fabs(actual_pos - it3_pos) - Constants::NEUTRON_MASS_U) < fragment_mass_tolerance_)
            {
              iso_pattern.push_back(it3->getIntensity());
              actual_pos = it3_pos;
            }
            if (iso_pattern.size() == max_isotope_to_score)
            {
              break;
            }
          }
#ifdef SCORE_ETDFEATURES_DEBUG
          cerr << ion_scores[it1->getPosition()[0]].is_isotope_1_mono << " ";
#endif
          if (ion_scores[it1->getPosition()[0]].is_isotope_1_mono != -1)
          {
            y_sum += it2->getIntensity() * iso_pattern.size() * factor;
          }
#ifdef SCORE_ETDFEATURES_DEBUG
          cerr << y_sum << endl;
#endif
        }
      }
      ion_scores[it1->getPosition()[0]].s_bion = b_sum;
      ion_scores[it1->getPosition()[0]].s_yion = y_sum;
    }
    return;
  }
Ejemplo n.º 15
0
  void CompNovoIonScoring::scoreWitnessSet_(Size charge, double precursor_weight, Map<double, IonScore> & ion_scores, const PeakSpectrum & CID_spec)
  {
    vector<double> diffs;
    //diffs.push_back(28.0);
    diffs.push_back(17.0);
    diffs.push_back(18.0);

    // witnesses of CID spec (diffs)
    for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1)
    {
      //Size num_wit(0);
      double wit_score(0.0);
      double pos1(it1->getPosition()[0]);
      wit_score += it1->getIntensity();
      for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2)
      {
        double pos2(it2->getPosition()[0]);

        // direct ++
        if (charge > 1)
        {
          if (fabs(pos2 * 2 - Constants::PROTON_MASS_U - pos1) < fragment_mass_tolerance_)
          {
            double factor((fragment_mass_tolerance_ - fabs(pos2 * 2 - Constants::PROTON_MASS_U - pos1)) / fragment_mass_tolerance_);
            // pos1 is ion, pos2 is ++ion
#ifdef SCORE_WITNESSSET_DEBUG
            cerr << "scoreWitnessSet: ++ion " << pos1 << " " << pos2 << " (factor=" << factor << ") " << wit_score << " -> ";
#endif
            if (ion_scores[it2->getPosition()[0]].s_isotope_pattern_2 < 0.2)
            {
              wit_score += it2->getIntensity() * /* 0.2 */ factor;
            }
            else
            {
              wit_score += it2->getIntensity() * ion_scores[it2->getPosition()[0]].s_isotope_pattern_2 * factor;
            }
#ifdef SCORE_WITNESSSET_DEBUG
            cerr << wit_score << endl;
#endif
          }
        }

        // diffs?
        for (vector<double>::const_iterator it = diffs.begin(); it != diffs.end(); ++it)
        {
          // pos1 is ion, pos2 loss peak
          if (fabs(pos1 - pos2 - *it) < fragment_mass_tolerance_)
          {
            double factor((fragment_mass_tolerance_ - fabs(pos1 - pos2 - *it)) / fragment_mass_tolerance_);
#ifdef SCORE_WITNESSSET_DEBUG
            cerr << "scoreWitnessSet: diff " << pos1 << " (" << pos2 << ") " << *it << " (factor=" << factor << ") " << wit_score << " -> ";
#endif
            wit_score += it2->getIntensity() /* / 5.0*/ * factor;
#ifdef SCORE_WITNESSSET_DEBUG
            cerr << wit_score << endl;
#endif
          }
        }

        // is there a b-ion?; pos1 is ion, pos2 complementary ion
        if (fabs(pos1 + pos2 - 1 * Constants::PROTON_MASS_U - precursor_weight) < fragment_mass_tolerance_)
        {
          double factor((fragment_mass_tolerance_ - fabs(pos1 + pos2 - Constants::PROTON_MASS_U - precursor_weight)) / fragment_mass_tolerance_);
          /*factor *= 0.2;*/
#ifdef SCORE_WITNESSSET_DEBUG
          cerr << "scoreWitnessSet: complementary " << pos1 << " (" << pos2 << ") (factor=" << factor << ") " << wit_score << " -> ";
#endif
          // found complementary ion
          if (ion_scores[it2->getPosition()[0]].s_isotope_pattern_1 < 0.5 || ion_scores[it2->getPosition()[0]].is_isotope_1_mono != 1)
          {
            wit_score += it2->getIntensity() /* * 0.5*/ * factor;
          }
          else
          {
            wit_score += it2->getIntensity() * ion_scores[it2->getPosition()[0]].s_isotope_pattern_1 * factor;
          }
#ifdef SCORE_WITNESSSET_DEBUG
          cerr << wit_score << endl;
#endif

          if (ion_scores[it2->getPosition()[0]].s_bion != 0)
          {
#ifdef SCORE_WITNESSSET_DEBUG
            cerr << "scoreWitnessSet: complementary is b-ion " << pos1 << "(" << pos2 << ")" << wit_score << " -> ";
#endif
            wit_score += ion_scores[it2->getPosition()[0]].s_bion * factor;
#ifdef SCORE_WITNESSSET_DEBUG
            cerr << wit_score << endl;
#endif
          }

        }
      }

      // isotope pattern ok?
      if (ion_scores[it1->getPosition()[0]].s_isotope_pattern_1 > 0 && ion_scores[it1->getPosition()[0]].is_isotope_1_mono == 1)
      {
#ifdef SCORE_WITNESSSET_DEBUG
        cerr << "scoreWitnessSet: isotope pattern: " << pos1 << " " << wit_score << " -> ";
#endif
        wit_score += ion_scores[it1->getPosition()[0]].s_isotope_pattern_1 * wit_score;
#ifdef SCORE_WITNESSSET_DEBUG
        cerr << wit_score << endl;
#endif
      }

      if (ion_scores[it1->getPosition()[0]].s_yion > 0)
      {
#ifdef SCORE_WITNESSSET_DEBUG
        cerr << "scoreWitnessSet: is y-ion: " << pos1 << " " << wit_score << " -> ";
#endif
        wit_score += ion_scores[it1->getPosition()[0]].s_yion;
#ifdef SCORE_WITNESSSET_DEBUG
        cerr << wit_score << endl;
#endif
      }

      if (ion_scores[it1->getPosition()[0]].s_bion > 0)
      {
#ifdef SCORE_WITNESSSET_DEBUG
        cerr << "scoreWitnessSet: is b-ion: " << pos1 << " " << wit_score << " -> ";
#endif
        if (ion_scores[it1->getPosition()[0]].s_bion < wit_score)
        {
          wit_score -= ion_scores[it1->getPosition()[0]].s_bion;
        }
        else
        {
          wit_score = 0;
        }
      }

      ion_scores[it1->getPosition()[0]].s_witness = wit_score;
    }
    return;
  }
Ejemplo n.º 16
0
START_SECTION((WindowMower(const WindowMower& source)))
	WindowMower copy(*e_ptr);
	TEST_EQUAL(copy.getParameters(), e_ptr->getParameters())
	TEST_EQUAL(copy.getName(), e_ptr->getName())
END_SECTION

START_SECTION((WindowMower& operator = (const WindowMower& source)))
	WindowMower copy;
	copy = *e_ptr;
	TEST_EQUAL(copy.getParameters(), e_ptr->getParameters())
	TEST_EQUAL(copy.getName(), e_ptr->getName())
END_SECTION

START_SECTION((template<typename SpectrumType> void filterPeakSpectrumForTopNInSlidingWindow(SpectrumType& spectrum)))
	DTAFile dta_file;
	PeakSpectrum spec;
	dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec);
	TEST_EQUAL(spec.size(), 121)

	Param p(e_ptr->getParameters());
	p.setValue("windowsize", 50.0); // default
	p.setValue("peakcount", 2);  // default
	p.setValue("movetype", "slide"); // default and not needed as we directly call sliding window function
	e_ptr->setParameters(p);
	
	e_ptr->filterPeakSpectrumForTopNInSlidingWindow(spec);
	
	TEST_EQUAL(spec.size(), 56)
	
END_SECTION
Ejemplo n.º 17
0
  String XQuestResultXMLFile::getxQuestBase64EncodedSpectrum_(const PeakSpectrum& spec, String header)
  {
    std::vector<String> in_strings;
    StringList sl;

    double precursor_mz = 0;
    double precursor_z = 0;
    if (spec.getPrecursors().size() > 0)
    {
      precursor_mz = Math::roundDecimal(spec.getPrecursors()[0].getMZ(), -9);
      precursor_z = spec.getPrecursors()[0].getCharge();
    }

    // header lines
    if (!header.empty()) // common or xlinker spectrum will be reported
    {
      sl.push_back(header + "\n"); // e.g. GUA1372-S14-A-LRRK2_DSS_1A3.03873.03873.3.dta,GUA1372-S14-A-LRRK2_DSS_1A3.03863.03863.3.dta
      sl.push_back(String(precursor_mz) + "\n");
      sl.push_back(String(precursor_z) + "\n");
    }
    else // light or heavy spectrum will be reported
    {
      sl.push_back(String(precursor_mz) + "\t" + String(precursor_z) + "\n");
    }

    PeakSpectrum::IntegerDataArray charges;
    if (spec.getIntegerDataArrays().size() > 0)
    {
      charges = spec.getIntegerDataArrays()[0];
    }

    // write peaks
    for (Size i = 0; i != spec.size(); ++i)
    {
      String s;
      s += String(Math::roundDecimal(spec[i].getMZ(), -9)) + "\t";
      s += String(spec[i].getIntensity()) + "\t";

      if (charges.size() > 0)
      {
        s += String(charges[i]);
      }
      else
      {
        s += "0";
      }

      s += "\n";

      sl.push_back(s);
    }

    String out;
    out.concatenate(sl.begin(), sl.end(), "");
    in_strings.push_back(out);

    String out_encoded;
    Base64().encodeStrings(in_strings, out_encoded, false, false);
    String out_wrapped;
    wrap_(out_encoded, 76, out_wrapped);
    return out_wrapped;
  }
Ejemplo n.º 18
0
void CompNovoIdentificationCID::getIdentification(PeptideIdentification & id, const PeakSpectrum & CID_spec)
{
    //if (CID_spec.getPrecursors().begin()->getMZ() > 1000.0)
    //{
    //cerr << "Weight of precursor has been estimated to exceed 2000.0 Da which is the current limit" << endl;
    //return;
    //}

    PeakSpectrum new_CID_spec(CID_spec);
    windowMower_(new_CID_spec, 0.3, 1);

    Param zhang_param;
    zhang_param = zhang_.getParameters();
    zhang_param.setValue("tolerance", fragment_mass_tolerance_);
    zhang_param.setValue("use_gaussian_factor", "true");
    zhang_param.setValue("use_linear_factor", "false");
    zhang_.setParameters(zhang_param);


    Normalizer normalizer;
    Param n_param(normalizer.getParameters());
    n_param.setValue("method", "to_one");
    normalizer.setParameters(n_param);
    normalizer.filterSpectrum(new_CID_spec);

    Size charge(2);
    double precursor_weight(0);     // [M+H]+
    if (!CID_spec.getPrecursors().empty())
    {
        // believe charge of spectrum?
        if (CID_spec.getPrecursors().begin()->getCharge() != 0)
        {
            charge = CID_spec.getPrecursors().begin()->getCharge();
        }
        else
        {
            // TODO estimate charge state
        }
        precursor_weight = CID_spec.getPrecursors().begin()->getMZ() * charge - ((charge - 1) * Constants::PROTON_MASS_U);
    }

    //cerr << "charge=" << charge << ", [M+H]=" << precursor_weight << endl;

    // now delete all peaks that are right of the estimated precursor weight
    Size peak_counter(0);
    for (PeakSpectrum::ConstIterator it = new_CID_spec.begin(); it != new_CID_spec.end(); ++it, ++peak_counter)
    {
        if (it->getPosition()[0] > precursor_weight)
        {
            break;
        }
    }
    if (peak_counter < new_CID_spec.size())
    {
        new_CID_spec.resize(peak_counter);
    }


    static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight();

    Peak1D p;
    p.setIntensity(1);
    p.setPosition(oxonium_mass);

    new_CID_spec.push_back(p);

    p.setPosition(precursor_weight);
    new_CID_spec.push_back(p);

    // add complement to spectrum
    /*
    for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1)
    {
    // get m/z of complement
    double mz_comp = precursor_weight - it1->getPosition()[0] + Constants::PROTON_MASS_U;

    // search if peaks are available that have similar m/z values
    Size count(0);
    bool found(false);
    for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2, ++count)
    {
    if (fabs(mz_comp - it2->getPosition()[0]) < fragment_mass_tolerance)
    {
      // add peak intensity to corresponding peak in new_CID_spec
      new_CID_spec[count].setIntensity(new_CID_spec[count].getIntensity());
    }
    }
    if (!found)
    {
    // infer this peak
    Peak1D p;
    p.setIntensity(it1->getIntensity());
    p.setPosition(mz_comp);
    new_CID_spec.push_back(p);
    }
    }*/

    CompNovoIonScoringCID ion_scoring;
    Param ion_scoring_param(ion_scoring.getParameters());
    ion_scoring_param.setValue("fragment_mass_tolerance", fragment_mass_tolerance_);
    ion_scoring_param.setValue("precursor_mass_tolerance", precursor_mass_tolerance_);
    ion_scoring_param.setValue("decomp_weights_precision", decomp_weights_precision_);
    ion_scoring_param.setValue("double_charged_iso_threshold", (double)param_.getValue("double_charged_iso_threshold"));
    ion_scoring_param.setValue("max_isotope_to_score", param_.getValue("max_isotope_to_score"));
    ion_scoring_param.setValue("max_isotope", max_isotope_);
    ion_scoring.setParameters(ion_scoring_param);

    Map<double, IonScore> ion_scores;
    ion_scoring.scoreSpectrum(ion_scores, new_CID_spec, precursor_weight, charge);

    new_CID_spec.sortByPosition();

    /*
    cerr << "Size of ion_scores " << ion_scores.size() << endl;
    for (Map<double, IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it)
    {
        cerr << it->first << " " << it->second.score << endl;
    }*/

#ifdef WRITE_SCORED_SPEC
    PeakSpectrum filtered_spec(new_CID_spec);
    filtered_spec.clear();
    for (Map<double, CompNovoIonScoringCID::IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it)
    {
        Peak1D p;
        p.setIntensity(it->second.score);
        p.setPosition(it->first);
        filtered_spec.push_back(p);
    }
    DTAFile().store("spec_scored.dta", filtered_spec);
#endif

    set<String> sequences;
    getDecompositionsDAC_(sequences, 0, new_CID_spec.size() - 1, precursor_weight, new_CID_spec, ion_scores);

#ifdef SPIKE_IN
    sequences.insert("AFCVDGEGR");
    sequences.insert("APEFAAPWPDFVPR");
    sequences.insert("AVKQFEESQGR");
    sequences.insert("CCTESLVNR");
    sequences.insert("DAFLGSFLYEYSR");
    sequences.insert("DAIPENLPPLTADFAEDK");
    sequences.insert("DDNKVEDIWSFLSK");
    sequences.insert("DDPHACYSTVFDK");
    sequences.insert("DEYELLCLDGSR");
    sequences.insert("DGAESYKELSVLLPNR");
    sequences.insert("DGASCWCVDADGR");
    sequences.insert("DLFIPTCLETGEFAR");
    sequences.insert("DTHKSEIAHR");
    sequences.insert("DVCKNYQEAK");
    sequences.insert("EACFAVEGPK");
    sequences.insert("ECCHGDLLECADDR");
    sequences.insert("EFLGDKFYTVISSLK");
    sequences.insert("EFTPVLQADFQK");
    sequences.insert("ELFLDSGIFQPMLQGR");
    sequences.insert("ETYGDMADCCEK");
    sequences.insert("EVGCPSSSVQEMVSCLR");
    sequences.insert("EYEATLEECCAK");
    sequences.insert("FADLIQSGTFQLHLDSK");
    sequences.insert("FFSASCVPGATIEQK");
    sequences.insert("FLANVSTVLTSK");
    sequences.insert("FLSGSDYAIR");
    sequences.insert("FTASCPPSIK");
    sequences.insert("GAIEWEGIESGSVEQAVAK");
    sequences.insert("GDVAFIQHSTVEENTGGK");
    sequences.insert("GEPPSCAEDQSCPSER");
    sequences.insert("GEYVPTSLTAR");
    sequences.insert("GQEFTITGQKR");
    sequences.insert("GTFAALSELHCDK");
    sequences.insert("HLVDEPQNLIK");
    sequences.insert("HQDCLVTTLQTQPGAVR");
    sequences.insert("HTTVNENAPDQK");
    sequences.insert("ILDCGSPDTEVR");
    sequences.insert("KCPSPCQLQAER");
    sequences.insert("KGTEFTVNDLQGK");
    sequences.insert("KQTALVELLK");
    sequences.insert("KVPQVSTPTLVEVSR");
    sequences.insert("LALQFTTNAKR");
    sequences.insert("LCVLHEKTPVSEK");
    sequences.insert("LFTFHADICTLPDTEK");
    sequences.insert("LGEYGFQNALIVR");
    sequences.insert("LHVDPENFK");
    sequences.insert("LKECCDKPLLEK");
    sequences.insert("LKHLVDEPQNLIK");
    sequences.insert("LKPDPNTLCDEFK");
    sequences.insert("LLGNVLVVVLAR");
    sequences.insert("LLVVYPWTQR");
    sequences.insert("LRVDPVNFK");
    sequences.insert("LTDEELAFPPLSPSR");
    sequences.insert("LVNELTEFAK");
    sequences.insert("MFLSFPTTK");
    sequences.insert("MPCTEDYLSLILNR");
    sequences.insert("NAPYSGYSGAFHCLK");
    sequences.insert("NECFLSHKDDSPDLPK");
    sequences.insert("NEPNKVPACPGSCEEVK");
    sequences.insert("NLQMDDFELLCTDGR");
    sequences.insert("QAGVQAEPSPK");
    sequences.insert("RAPEFAAPWPDFVPR");
    sequences.insert("RHPEYAVSVLLR");
    sequences.insert("RPCFSALTPDETYVPK");
    sequences.insert("RSLLLAPEEGPVSQR");
    sequences.insert("SAFPPEPLLCSVQR");
    sequences.insert("SAGWNIPIGTLLHR");
    sequences.insert("SCWCVDEAGQK");
    sequences.insert("SGNPNYPHEFSR");
    sequences.insert("SHCIAEVEK");
    sequences.insert("SISSGFFECER");
    sequences.insert("SKYLASASTMDHAR");
    sequences.insert("SLHTLFGDELCK");
    sequences.insert("SLLLAPEEGPVSQR");
    sequences.insert("SPPQCSPDGAFRPVQCK");
    sequences.insert("SREGDPLAVYLK");
    sequences.insert("SRQIPQCPTSCER");
    sequences.insert("TAGTPVSIPVCDDSSVK");
    sequences.insert("TCVADESHAGCEK");
    sequences.insert("TQFGCLEGFGR");
    sequences.insert("TVMENFVAFVDK");
    sequences.insert("TYFPHFDLSHGSAQVK");
    sequences.insert("TYMLAFDVNDEK");
    sequences.insert("VDEVGGEALGR");
    sequences.insert("VDLLIGSSQDDGLINR");
    sequences.insert("VEDIWSFLSK");
    sequences.insert("VGGHAAEYGAEALER");
    sequences.insert("VGTRCCTKPESER");
    sequences.insert("VKVDEVGGEALGR");
    sequences.insert("VKVDLLIGSSQDDGLINR");
    sequences.insert("VLDSFSNGMK");
    sequences.insert("VLSAADKGNVK");
    sequences.insert("VPQVSTPTLVEVSR");
    sequences.insert("VTKCCTESLVNR");
    sequences.insert("VVAASDASQDALGCVK");
    sequences.insert("VVAGVANALAHR");
    sequences.insert("YICDNQDTISSK");
    sequences.insert("YLASASTMDHAR");
    sequences.insert("YNGVFQECCQAEDK");
#endif

    SpectrumAlignmentScore spectra_zhang;
    spectra_zhang.setParameters(zhang_param);

    vector<PeptideHit> hits;
    Size missed_cleavages = param_.getValue("missed_cleavages");
    for (set<String>::const_iterator it = sequences.begin(); it != sequences.end(); ++it)
    {

        Size num_missed = countMissedCleavagesTryptic_(*it);
        if (missed_cleavages < num_missed)
        {
            //cerr << "Two many missed cleavages: " << *it << ", found " << num_missed << ", allowed " << missed_cleavages << endl;
            continue;
        }
        PeakSpectrum CID_sim_spec;
        getCIDSpectrum_(CID_sim_spec, *it, charge);

        //normalizer.filterSpectrum(CID_sim_spec);

        double cid_score = zhang_(CID_sim_spec, CID_spec);

        PeptideHit hit;
        hit.setScore(cid_score);

        hit.setSequence(getModifiedAASequence_(*it));
        hit.setCharge((Int)charge);   //TODO unify charge interface: int or size?
        hits.push_back(hit);
        //cerr << getModifiedAASequence_(*it) << " " << cid_score << " " << endl;
    }

    // rescore the top hits
    id.setHits(hits);
    id.assignRanks();

    hits = id.getHits();

    SpectrumAlignmentScore alignment_score;
    Param align_param(alignment_score.getParameters());
    align_param.setValue("tolerance", fragment_mass_tolerance_);
    align_param.setValue("use_linear_factor", "true");
    alignment_score.setParameters(align_param);

    for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it)
    {
        //cerr << "Pre: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl;
    }

    Size number_of_prescoring_hits = param_.getValue("number_of_prescoring_hits");
    if (hits.size() > number_of_prescoring_hits)
    {
        hits.resize(number_of_prescoring_hits);
    }

    for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it)
    {
        PeakSpectrum CID_sim_spec;
        getCIDSpectrum_(CID_sim_spec, getModifiedStringFromAASequence_(it->getSequence()), charge);

        normalizer.filterSpectrum(CID_sim_spec);

        //DTAFile().store("sim_specs/" + it->getSequence().toUnmodifiedString() + "_sim_CID.dta", CID_sim_spec);

        //double cid_score = spectra_zhang(CID_sim_spec, CID_spec);
        double cid_score = alignment_score(CID_sim_spec, CID_spec);

        //cerr << "Final: " << it->getSequence() << " " << cid_score << endl;

        it->setScore(cid_score);
    }

    id.setHits(hits);
    id.assignRanks();
    hits = id.getHits();

    for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it)
    {
        //cerr << "Fin: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl;
    }

    Size number_of_hits = param_.getValue("number_of_hits");
    if (id.getHits().size() > number_of_hits)
    {
        hits.resize(number_of_hits);
    }

    id.setHits(hits);
    id.assignRanks();

    return;
}
  ExitCodes main_(int, const char**)
  {
    //-------------------------------------------------------------
    // parameter handling
    //-------------------------------------------------------------

    StringList in_spec = getStringList_("in");
    StringList out = getStringList_("out");
    String in_lib = getStringOption_("lib");
    String compare_function = getStringOption_("compare_function");
    Int precursor_mass_multiplier = getIntOption_("round_precursor_to_integer");
    float precursor_mass_tolerance = getDoubleOption_("precursor_mass_tolerance");
    //Int min_precursor_charge = getIntOption_("min_precursor_charge");
    //Int max_precursor_charge = getIntOption_("max_precursor_charge");
    float remove_peaks_below_threshold = getDoubleOption_("filter:remove_peaks_below_threshold");
    UInt min_peaks = getIntOption_("filter:min_peaks");
    UInt max_peaks = getIntOption_("filter:max_peaks");
    Int cut_peaks_below = getIntOption_("filter:cut_peaks_below");
    StringList fixed_modifications = getStringList_("fixed_modifications");
    StringList variable_modifications = getStringList_("variable_modifications");
    Int top_hits  = getIntOption_("top_hits");
    if (top_hits < -1)
    {
      writeLog_("top_hits (should be  >= -1 )");
      return ILLEGAL_PARAMETERS;
    }
    //-------------------------------------------------------------
    // loading input
    //-------------------------------------------------------------
    if (out.size() != in_spec.size())
    {
      writeLog_("out (should be as many as input files)");
      return ILLEGAL_PARAMETERS;
    }

    time_t prog_time = time(NULL);
    MSPFile spectral_library;
    RichPeakMap query, library;
    //spectrum which will be identified
    MzMLFile spectra;
    spectra.setLogType(log_type_);

    time_t start_build_time = time(NULL);
    //-------------------------------------------------------------
    //building map for faster search
    //-------------------------------------------------------------

    //library containing already identified peptide spectra
    vector<PeptideIdentification> ids;
    spectral_library.load(in_lib, ids, library);

    map<Size, vector<PeakSpectrum> > MSLibrary;
    {
      RichPeakMap::iterator s;
      vector<PeptideIdentification>::iterator i;
      ModificationsDB* mdb = ModificationsDB::getInstance();
      for (s = library.begin(), i = ids.begin(); s < library.end(); ++s, ++i)
      {
        double precursor_MZ = (*s).getPrecursors()[0].getMZ();
        Size MZ_multi = (Size)precursor_MZ * precursor_mass_multiplier;
        map<Size, vector<PeakSpectrum> >::iterator found;
        found = MSLibrary.find(MZ_multi);

        PeakSpectrum librar;
        bool variable_modifications_ok = true;
        bool fixed_modifications_ok = true;
        const AASequence& aaseq = i->getHits()[0].getSequence();
        //variable fixed modifications
        if (!fixed_modifications.empty())
        {
          for (Size i = 0; i < aaseq.size(); ++i)
          {
            const   Residue& mod  = aaseq.getResidue(i);
            for (Size s = 0; s < fixed_modifications.size(); ++s)
            {
              if (mod.getOneLetterCode() == mdb->getModification(fixed_modifications[s]).getOrigin() && fixed_modifications[s] != mod.getModification())
              {
                fixed_modifications_ok = false;
                break;
              }
            }
          }
        }
        //variable modifications
        if (aaseq.isModified() && (!variable_modifications.empty()))
        {
          for (Size i = 0; i < aaseq.size(); ++i)
          {
            if (aaseq.isModified(i))
            {
              const   Residue& mod  = aaseq.getResidue(i);
              for (Size s = 0; s < variable_modifications.size(); ++s)
              {
                if (mod.getOneLetterCode() == mdb->getModification(variable_modifications[s]).getOrigin() && variable_modifications[s] != mod.getModification())
                {
                  variable_modifications_ok = false;
                  break;
                }
              }
            }
          }
        }
        if (variable_modifications_ok && fixed_modifications_ok)
        {
          PeptideIdentification& translocate_pid = *i;
          librar.getPeptideIdentifications().push_back(translocate_pid);
          librar.setPrecursors(s->getPrecursors());
          //library entry transformation
          for (UInt l = 0; l < s->size(); ++l)
          {
            Peak1D peak;
            if ((*s)[l].getIntensity() >  remove_peaks_below_threshold)
            {
              const String& info = (*s)[l].getMetaValue("MSPPeakInfo");
              if (info[0] == '?')
              {
                peak.setIntensity(sqrt(0.2 * (*s)[l].getIntensity()));
              }
              else
              {
                peak.setIntensity(sqrt((*s)[l].getIntensity()));
              }

              peak.setMZ((*s)[l].getMZ());
              peak.setPosition((*s)[l].getPosition());
              librar.push_back(peak);
            }
          }
          if (found != MSLibrary.end())
          {
            found->second.push_back(librar);
          }
          else
          {
            vector<PeakSpectrum> tmp;
            tmp.push_back(librar);
            MSLibrary.insert(make_pair(MZ_multi, tmp));
          }
        }
      }
    }
    time_t end_build_time = time(NULL);
    cout << "Time needed for preprocessing data: " << (end_build_time - start_build_time) << "\n";
    //compare function
    PeakSpectrumCompareFunctor* comparor = Factory<PeakSpectrumCompareFunctor>::create(compare_function);
    //-------------------------------------------------------------
    // calculations
    //-------------------------------------------------------------
    double score;
    StringList::iterator in, out_file;
    for (in  = in_spec.begin(), out_file  = out.begin(); in < in_spec.end(); ++in, ++out_file)
    {
      time_t start_time = time(NULL);
      spectra.load(*in, query);
      //Will hold valuable hits
      vector<PeptideIdentification> peptide_ids;
      vector<ProteinIdentification> protein_ids;
      // Write parameters to ProteinIdentifcation
      ProteinIdentification prot_id;
      //Parameters of identificaion
      prot_id.setIdentifier("test");
      prot_id.setSearchEngineVersion("SpecLibSearcher");
      prot_id.setDateTime(DateTime::now());
      prot_id.setScoreType(compare_function);
      ProteinIdentification::SearchParameters searchparam;
      searchparam.precursor_tolerance = precursor_mass_tolerance;
      prot_id.setSearchParameters(searchparam);
      /***********SEARCH**********/
      for (UInt j = 0; j < query.size(); ++j)
      {
        //Set identifier for each identifications
        PeptideIdentification pid;
        pid.setIdentifier("test");
        pid.setScoreType(compare_function);
        ProteinHit pr_hit;
        pr_hit.setAccession(j);
        prot_id.insertHit(pr_hit);
        //RichPeak1D to Peak1D transformation for the compare function query
        PeakSpectrum quer;
        bool peak_ok = true;
        query[j].sortByIntensity(true);
        double min_high_intensity = 0;

        if (query[j].empty() || query[j].getMSLevel() != 2)
        {
          continue;
        }
        if (query[j].getPrecursors().empty())
        {
          writeLog_("Warning MS2 spectrum without precursor information");
          continue;
        }

        min_high_intensity = (1 / cut_peaks_below) * query[j][0].getIntensity();

        query[j].sortByPosition();
        for (UInt k = 0; k < query[j].size() && k < max_peaks; ++k)
        {
          if (query[j][k].getIntensity() >  remove_peaks_below_threshold && query[j][k].getIntensity() >= min_high_intensity)
          {
            Peak1D peak;
            peak.setIntensity(sqrt(query[j][k].getIntensity()));
            peak.setMZ(query[j][k].getMZ());
            peak.setPosition(query[j][k].getPosition());
            quer.push_back(peak);
          }
        }
        if (quer.size() >= min_peaks)
        {
          peak_ok = true;
        }
        else
        {
          peak_ok = false;
        }
        double query_MZ = query[j].getPrecursors()[0].getMZ();
        if (peak_ok)
        {
          bool charge_one = false;
          Int percent = (Int) Math::round((query[j].size() / 100.0) * 3.0);
          Int margin  = (Int) Math::round((query[j].size() / 100.0) * 1.0);
          for (vector<RichPeak1D>::iterator peak = query[j].end() - 1; percent >= 0; --peak, --percent)
          {
            if (peak->getMZ() < query_MZ)
            {
              break;
            }
          }
          if (percent > margin)
          {
            charge_one = true;
          }
          float min_MZ = (query_MZ - precursor_mass_tolerance) * precursor_mass_multiplier;
          float max_MZ = (query_MZ + precursor_mass_tolerance) * precursor_mass_multiplier;
          for (Size mz = (Size)min_MZ; mz <= ((Size)max_MZ) + 1; ++mz)
          {
            map<Size, vector<PeakSpectrum> >::iterator found;
            found = MSLibrary.find(mz);
            if (found != MSLibrary.end())
            {
              vector<PeakSpectrum>& library = found->second;
              for (Size i = 0; i < library.size(); ++i)
              {
                float this_MZ  = library[i].getPrecursors()[0].getMZ() * precursor_mass_multiplier;
                if (this_MZ >= min_MZ && max_MZ >= this_MZ && ((charge_one == true && library[i].getPeptideIdentifications()[0].getHits()[0].getCharge() == 1) || charge_one == false))
                {
                  PeptideHit hit = library[i].getPeptideIdentifications()[0].getHits()[0];
                  PeakSpectrum& librar = library[i];
                  //Special treatment for SpectraST score as it computes a score based on the whole library
                  if (compare_function == "SpectraSTSimilarityScore")
                  {
                    SpectraSTSimilarityScore* sp = static_cast<SpectraSTSimilarityScore*>(comparor);
                    BinnedSpectrum quer_bin = sp->transform(quer);
                    BinnedSpectrum librar_bin = sp->transform(librar);
                    score = (*sp)(quer, librar); //(*sp)(quer_bin,librar_bin);
                    double dot_bias = sp->dot_bias(quer_bin, librar_bin, score);
                    hit.setMetaValue("DOTBIAS", dot_bias);
                  }
                  else
                  {
                    score = (*comparor)(quer, librar);
                  }

                  DataValue RT(library[i].getRT());
                  DataValue MZ(library[i].getPrecursors()[0].getMZ());
                  hit.setMetaValue("RT", RT);
                  hit.setMetaValue("MZ", MZ);
                  hit.setScore(score);
                  PeptideEvidence pe;
                  pe.setProteinAccession(pr_hit.getAccession());
                  hit.addPeptideEvidence(pe);
                  pid.insertHit(hit);
                }
              }
            }
          }
        }
        pid.setHigherScoreBetter(true);
        pid.sort();
        if (compare_function == "SpectraSTSimilarityScore")
        {
          if (!pid.empty() && !pid.getHits().empty())
          {
            vector<PeptideHit> final_hits;
            final_hits.resize(pid.getHits().size());
            SpectraSTSimilarityScore* sp = static_cast<SpectraSTSimilarityScore*>(comparor);
            Size runner_up = 1;
            for (; runner_up < pid.getHits().size(); ++runner_up)
            {
              if (pid.getHits()[0].getSequence().toUnmodifiedString() != pid.getHits()[runner_up].getSequence().toUnmodifiedString() || runner_up > 5)
              {
                break;
              }
            }
            double delta_D = sp->delta_D(pid.getHits()[0].getScore(), pid.getHits()[runner_up].getScore());
            for (Size s = 0; s < pid.getHits().size(); ++s)
            {
              final_hits[s] = pid.getHits()[s];
              final_hits[s].setMetaValue("delta D", delta_D);
              final_hits[s].setMetaValue("dot product", pid.getHits()[s].getScore());
              final_hits[s].setScore(sp->compute_F(pid.getHits()[s].getScore(), delta_D, pid.getHits()[s].getMetaValue("DOTBIAS")));

              //final_hits[s].removeMetaValue("DOTBIAS");
            }
            pid.setHits(final_hits);
            pid.sort();
            pid.setMZ(query[j].getPrecursors()[0].getMZ());
            pid.setRT(query_MZ);
          }
        }
        if (top_hits != -1 && (UInt)top_hits < pid.getHits().size())
        {
          vector<PeptideHit> hits;
          hits.resize(top_hits);
          for (Size i = 0; i < (UInt)top_hits; ++i)
          {
            hits[i] = pid.getHits()[i];
          }
          pid.setHits(hits);
        }
        peptide_ids.push_back(pid);
      }
      protein_ids.push_back(prot_id);
      //-------------------------------------------------------------
      // writing output
      //-------------------------------------------------------------
      IdXMLFile id_xml_file;
      id_xml_file.store(*out_file, protein_ids, peptide_ids);
      time_t end_time = time(NULL);
      cout << "Search time: " << difftime(end_time, start_time) << " seconds for " << *in << "\n";
    }
    time_t end_time = time(NULL);
    cout << "Total time: " << difftime(end_time, prog_time) << " secconds\n";
    return EXECUTION_OK;
  }
START_SECTION((Normalizer(const Normalizer& source)))
	Normalizer copy(*e_ptr);
	TEST_EQUAL(copy.getParameters(), e_ptr->getParameters())
	TEST_EQUAL(copy.getName(), e_ptr->getName())
END_SECTION

START_SECTION((Normalizer& operator = (const Normalizer& source)))
	Normalizer copy;
	copy = *e_ptr;
	TEST_EQUAL(copy.getParameters(), e_ptr->getParameters())
	TEST_EQUAL(copy.getName(), e_ptr->getName())
END_SECTION

START_SECTION((template<typename SpectrumType> void filterSpectrum(SpectrumType& spectrum)))
	DTAFile dta_file;
	PeakSpectrum spec;
	dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec);

	spec.sortByIntensity();

	TEST_EQUAL(spec.rbegin()->getIntensity(), 46)

	e_ptr->filterSpectrum(spec);

	spec.sortByIntensity();
	
	TEST_EQUAL(spec.rbegin()->getIntensity(), 1)

	Param p(e_ptr->getParameters());
	p.setValue("method", "to_TIC");
	e_ptr->setParameters(p);
  void CompNovoIdentificationBase::getCIDSpectrum_(PeakSpectrum & spec, const String & sequence, Size charge, DoubleReal prefix, DoubleReal suffix)
  {
    static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight();
    static DoubleReal nh3_mass = EmpiricalFormula("NH3").getMonoWeight();
    static DoubleReal co_mass = EmpiricalFormula("CO").getMonoWeight();
    Peak1D p;
    DoubleReal b_pos(0 + prefix);
    DoubleReal y_pos(h2o_mass + suffix);
    bool b_H2O_loss(false), b_NH3_loss(false), y_NH3_loss(false);

    for (Size i = 0; i != sequence.size() - 1; ++i)
    {
      char aa(sequence[i]);
      b_pos += aa_to_weight_[aa];

      char aa2(sequence[sequence.size() - i - 1]);
      y_pos += aa_to_weight_[aa2];
      for (Size z = 1; z <= charge && z < 3; ++z)
      {
        // b-ions
        if (b_pos >= min_mz_ && b_pos <= max_mz_)
        {
          for (Size j = 0; j != max_isotope_; ++j)
          {
            if (z == 1 /*|| b_pos > MIN_DOUBLE_MZ*/)
            {
              p.setPosition((b_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j + Constants::NEUTRON_MASS_U) / (DoubleReal)z);
              p.setIntensity(isotope_distributions_[(Size)b_pos][j] * 0.8 / (z * z));
              spec.push_back(p);
            }
          }
        }

        // b-ion losses
        if (b_pos - h2o_mass > min_mz_ && b_pos - h2o_mass < max_mz_)
        {
          if (b_H2O_loss || aa == 'S' || aa == 'T' || aa == 'E' || aa == 'D')
          {
            b_H2O_loss = true;
            p.setPosition((b_pos + z * Constants::PROTON_MASS_U - h2o_mass) / z);
            p.setIntensity(0.02 / (DoubleReal)(z * z));
            if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/)
            {
              spec.push_back(p);
            }
          }
          if (b_NH3_loss || aa == 'Q' || aa == 'N' || aa == 'R' || aa == 'K')
          {
            b_NH3_loss = true;
            p.setPosition((b_pos + z * Constants::PROTON_MASS_U - nh3_mass) / z);
            p.setIntensity(0.02 / (DoubleReal)(z * z));

            if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/)
            {
              spec.push_back(p);
            }
          }
        }

        // a-ions only for charge 1
        if (z == 1)
        {
          if (b_pos - co_mass > min_mz_ && b_pos - co_mass < max_mz_)
          {
            // a-ions
            p.setPosition((b_pos + z * Constants::PROTON_MASS_U - co_mass) / (DoubleReal)z);
            p.setIntensity(0.1f);
            spec.push_back(p);
          }
        }



        if (y_pos > min_mz_ && y_pos < max_mz_)
        {
          // y-ions
          for (Size j = 0; j != max_isotope_; ++j)
          {
            if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/)
            {
              p.setPosition((y_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j * Constants::NEUTRON_MASS_U) / (DoubleReal)z);
              p.setIntensity(isotope_distributions_[(Size)y_pos][j] / (DoubleReal) (z * z));
              spec.push_back(p);
            }
          }

          // H2O loss
          p.setPosition((y_pos + z * Constants::PROTON_MASS_U - h2o_mass) / (DoubleReal)z);
          p.setIntensity(0.1 / (DoubleReal)(z * z));
          if (aa2 == 'Q')           // pyroglutamic acid formation
          {
            p.setIntensity(0.5f);
          }
          if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/)
          {
            spec.push_back(p);
          }

          // NH3 loss
          if (y_NH3_loss || aa2 == 'Q' || aa2 == 'N' || aa2 == 'R' || aa2 == 'K')
          {
            y_NH3_loss = true;
            p.setPosition((y_pos + z * Constants::PROTON_MASS_U - nh3_mass) / (DoubleReal)z);
            p.setIntensity(0.1 / (DoubleReal)(z * z));

            if (z == 1 /*|| y_pos > MIN_DOUBLE_MZ*/)
            {
              spec.push_back(p);
            }
          }
        }
      }
    }

    // if Q1 abundant loss of water -> pyroglutamic acid formation

    if (sequence[0] == 'Q' && prefix == 0 && suffix == 0)
    {
      /*
      for (PeakSpectrum::Iterator it = spec.begin(); it != spec.end(); ++it)
      {
          it->setIntensity(it->getIntensity() * 0.5);
      }*/

      /*
      for (Size j = 0; j != max_isotope; ++j)
      {
  p.setPosition((precursor_weight + charge - 1 + j)/(DoubleReal)charge);
  p.setIntensity(isotope_distributions_[(Int)p.getPosition()[0]][j] * 0.1);
  spec.push_back(p);
      }
      */
    }


    spec.sortByPosition();

    return;
  }
Ejemplo n.º 22
0
START_SECTION((ThresholdMower(const ThresholdMower& source)))
	ThresholdMower copy(*e_ptr);
	TEST_EQUAL(copy.getParameters(), e_ptr->getParameters())
	TEST_EQUAL(copy.getName(), e_ptr->getName())
END_SECTION

START_SECTION((ThresholdMower& operator=(const ThresholdMower& source)))
	ThresholdMower copy;
	copy = *e_ptr;
	TEST_EQUAL(copy.getParameters(), e_ptr->getParameters())
	TEST_EQUAL(copy.getName(), e_ptr->getName());
END_SECTION

START_SECTION((template<typename SpectrumType> void filterSpectrum(SpectrumType& spectrum)))
	DTAFile dta_file;
	PeakSpectrum spec;
	dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec);
	
	TEST_EQUAL(spec.size(), 121)

	Param p(e_ptr->getParameters());
	p.setValue("threshold", 1.0);
	e_ptr->setParameters(p);

	e_ptr->filterSpectrum(spec);
	TEST_EQUAL(spec.size(), 121)

	p.setValue("threshold", 10.0);
	e_ptr->setParameters(p);

	e_ptr->filterSpectrum(spec);
Ejemplo n.º 23
0
  double PeakAlignment::operator()(const PeakSpectrum& spec1, const PeakSpectrum& spec2) const
  {

    PeakSpectrum s1(spec1), s2(spec2);

    // shortcut similarity calculation by comparing PrecursorPeaks (PrecursorPeaks more than delta away from each other are supposed to be from another peptide)
    DoubleReal pre_mz1 = 0.0;
    if (!spec1.getPrecursors().empty())
      pre_mz1 = spec1.getPrecursors()[0].getMZ();
    DoubleReal pre_mz2 = 0.0;
    if (!spec1.getPrecursors().empty())
      pre_mz2 = spec2.getPrecursors()[0].getMZ();
    if (fabs(pre_mz1 - pre_mz2) > (double)param_.getValue("precursor_mass_tolerance"))
    {
      return 0;
    }

    // heuristic shortcut
    const double epsilon = (double)param_.getValue("epsilon");
    const UInt heuristic_level = (UInt)param_.getValue("heuristic_level");
    bool heuristic_filters(true);
    if (heuristic_level)
    {
      s1.sortByIntensity(true);
      s2.sortByIntensity(true);

      //heuristic filters (and shortcuts) if spec1 and spec2 have NOT at least one peak in the sets of |heuristic_level|-many highest peaks in common
      for (PeakSpectrum::ConstIterator it_s1 = s1.begin(); Size(it_s1 - s1.begin()) < heuristic_level && it_s1 != s1.end(); ++it_s1)
      {
        for (PeakSpectrum::ConstIterator it_s2 = s2.begin(); Size(it_s2 - s2.begin()) < heuristic_level && it_s2 != s2.end(); ++it_s2)
        {
          // determine if it is a match, i.e. mutual peak at certain m/z with epsilon tolerance
          if (fabs((*it_s2).getMZ() - (*it_s1).getMZ()) < epsilon)
          {
            heuristic_filters = false;
            break;
          }
        }
      }
    }
    if (heuristic_filters && heuristic_level)
    {
      return 0;
    }

    //TODO gapcost dependence on distance ?
    const double gap = (double)param_.getValue("epsilon");

    //initialize alignment matrix with 0 in (0,0) and a multiple of gapcost in the first row/col matrix(row,col,values)
    Matrix<double> matrix(spec1.size() + 1, spec2.size() + 1, 0);
    for (Size i = 1; i < matrix.rows(); i++)
    {
      matrix.setValue(i, 0, -gap * i);
    }
    for (Size i = 1; i < matrix.cols(); i++)
    {
      matrix.setValue(0, i, -gap * i);
    }

    //get sigma - the standard deviation (sqrt of variance)
    double mid(0);
    for (Size i = 0; i < spec1.size(); ++i)
    {
      for (Size j = 0; j < spec2.size(); ++j)
      {
        double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ());
        mid += fabs(pos1 - pos2);
      }
    }
    // average peak distance
    mid /= (spec1.size() * spec2.size());

    /* to manually retrace
    cout << "average peak distance " << mid << endl;
    */


    double var(0);
    for (Size i = 0; i < spec1.size(); ++i)
    {
      for (Size j = 0; j < spec2.size(); ++j)
      {
        double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ());
        var += (fabs(pos1 - pos2) - mid) * (fabs(pos1 - pos2) - mid);
      }
    }
    // peak distance variance
    var /= (spec1.size() * spec2.size());

    /* to manually retrace
    cout << "peak distance variance " << var << endl;
    */

    //only in case of only two equal peaks in the spectra sigma is 0


    const double sigma((var == 0) ? numeric_limits<double>::min() : sqrt(var));

    /* to manually retrace
    cout << "peak standard deviation " << sigma << endl;
    */

    //fill alignment matrix
    for (Size i = 1; i < spec1.size() + 1; ++i)
    {
      for (Size j = 1; j < spec2.size() + 1; ++j)
      {
        double pos1(spec1[i - 1].getMZ()), pos2(spec2[j - 1].getMZ());
        //only if peaks are in reasonable proximity alignment is considered else only gaps
        if (fabs(pos1 - pos2) <= epsilon)
        {
          // actual cell = max(upper left cell+score, left cell-gap, upper cell-gap)
          double from_left(matrix.getValue(i, j - 1) - gap);
          double from_above(matrix.getValue(i - 1, j) - gap);
          double int1(spec1[i - 1].getIntensity()), int2(spec2[j - 1].getIntensity());
          double from_diagonal(matrix.getValue(i - 1, j - 1) + peakPairScore_(pos1, int1, pos2, int2, sigma));
          matrix.setValue(i, j, max(from_left, max(from_above, from_diagonal)));
        }
        else
        {
          // actual cell = max(left cell-gap, upper cell-gap)
          double from_left(matrix.getValue(i, j - 1) - gap);
          double from_above(matrix.getValue(i - 1, j) - gap);
          matrix.setValue(i, j, max(from_left, from_above));
        }
      }
    }

    /* to manually retrace
    cout << endl << matrix << endl;
    */

    //get best overall score and return
    double best_score(numeric_limits<double>::min());
    for (Size i = 0; i < matrix.cols(); i++)
    {
      best_score = max(best_score, matrix.getValue(matrix.rows() - 1, i));
    }
    for (Size i = 0; i < matrix.rows(); i++)
    {
      best_score = max(best_score, matrix.getValue(i, matrix.cols() - 1));
    }

    //calculate selfalignment-scores for both input spectra
    double score_spec1(0), score_spec2(0);
    for (Size i = 0; i < spec1.size(); ++i)
    {
      double int_i(spec1[i].getIntensity());
      double pos_i(spec1[i].getMZ());
      score_spec1 += peakPairScore_(pos_i, int_i, pos_i, int_i, sigma);
    }
    for (Size i = 0; i < spec2.size(); ++i)
    {
      double int_i(spec2[i].getIntensity());
      double pos_i(spec2[i].getMZ());
      score_spec2 += peakPairScore_(pos_i, int_i, pos_i, int_i, sigma);
    }


    /* to manually retrace
    cout << "score_spec1: " << score_spec1 << "score_spec2: " << score_spec2 << endl;
    */

    //normalize score to interval [0,1] with geometric mean
    double best_score_normalized(best_score / sqrt(score_spec1 * score_spec2));

    /*
    cout << "score_spec1: " << score_spec1 << " score_spec2: " << score_spec2 <<  " best_score: " << best_score << endl;

    //normalize score to interval [0,1] with arithmeic mean
    double best_score_normalized( (best_score*2) / (score_spec1 + score_spec2) );
    */

    return best_score_normalized;
  }
Ejemplo n.º 24
0
  void CompNovoIonScoring::scoreSpectra(Map<double, IonScore> & ion_scores, PeakSpectrum & CID_spec, PeakSpectrum & ETD_spec, double precursor_weight, Size charge)
  {

    // adds single charged variants of putative single charged ions
    //addSingleChargedIons_(ion_scores, CID_spec);

    for (PeakSpectrum::ConstIterator it = CID_spec.begin(); it != CID_spec.end(); ++it)
    {
      double it_pos(it->getPosition()[0]);
      IonScore ion_score;
      ion_scores[it_pos] = ion_score;
    }

    for (PeakSpectrum::ConstIterator it = CID_spec.begin(); it != CID_spec.end(); ++it)
    {
      ion_scores[it->getPosition()[0]].s_isotope_pattern_1 = scoreIsotopes_(CID_spec, it, ion_scores, 1);
      if (it->getPosition()[0] < precursor_weight / 2.0)
      {
        ion_scores[it->getPosition()[0]].s_isotope_pattern_2 =  scoreIsotopes_(CID_spec, it, ion_scores, 2);
      }
      else
      {
        ion_scores[it->getPosition()[0]].s_isotope_pattern_2 = -1;
      }
    }

    // find possible supporting ions from ETD spec to CID spec
    scoreETDFeatures_(charge, precursor_weight, ion_scores, CID_spec, ETD_spec);

    // combine the features and give b-ion scores
    scoreWitnessSet_(charge, precursor_weight, ion_scores, CID_spec);

    for (Map<double, IonScore>::iterator it = ion_scores.begin(); it != ion_scores.end(); ++it)
    {
      it->second.score = it->second.s_witness;
    }


    MassDecompositionAlgorithm decomp_algo;


    // check whether a PRMNode_ can be decomposed into amino acids
    // rescore the peaks that cannot be possible y-ion candidates
    double max_decomp_weight((double)param_.getValue("max_decomp_weight"));
    for (Map<double, IonScore>::iterator it = ion_scores.begin(); it != ion_scores.end(); ++it)
    {
      if (it->first > 19.0 && (it->first - 19.0) < max_decomp_weight)
      {
        vector<MassDecomposition> decomps;
        decomp_algo.getDecompositions(decomps, it->first - 19.0);
#ifdef ION_SCORING_DEBUG
        cerr << "Decomps: " << it->first <<  " " << it->first - 19.0 << " " << decomps.size() << " " << it->second.score << endl;
#endif
        if (decomps.empty())
        {
          it->second.score = 0;
        }
      }

      if (it->first < precursor_weight && precursor_weight - it->first < max_decomp_weight)
      {
        vector<MassDecomposition> decomps;
        decomp_algo.getDecompositions(decomps, precursor_weight - it->first);
#ifdef ION_SCORING_DEBUG
        cerr << "Decomps: " << it->first << " " << precursor_weight - it->first << " " << decomps.size() << " " << it->second.score << endl;
#endif
        if (decomps.empty())
        {
          it->second.score = 0;
        }
      }
    }

    ion_scores[CID_spec.begin()->getPosition()[0]].score = 1;
    ion_scores[(CID_spec.end() - 1)->getPosition()[0]].score = 1;
  }
Ejemplo n.º 25
0
  vector<pair<Size, Size> > PeakAlignment::getAlignmentTraceback(const PeakSpectrum& spec1, const PeakSpectrum& spec2) const
  {
    const double epsilon = (double)param_.getValue("epsilon");

    //TODO gapcost dependence on distance ?
    const double gap = (double)param_.getValue("epsilon");

    //initialize alignment matrix with 0 in (0,0) and a multiple of gapcost in the first row/col matrix(row,col,values)
    Matrix<double> matrix(spec1.size() + 1, spec2.size() + 1, 0);
    for (Size i = 1; i < matrix.rows(); i++)
    {
      matrix.setValue(i, 0, -gap * i);
    }
    for (Size i = 1; i < matrix.cols(); i++)
    {
      matrix.setValue(0, i, -gap * i);
    }

    // gives the direction of the matrix cell that originated the respective cell
    // e.g. matrix(i+1,j+1) could have originated from matrix(i,j), matrix(i+1,j) or matrix(i,j+1)
    // so traceback(i,j) represents matrix(i+1,j+1) and contains a "1"-from diagonal, a "0"-from left or a "2"-from above
    Matrix<Size> traceback(spec1.size(), spec2.size());

    //get sigma - the standard deviation (sqrt of variance)
    double mid(0);
    for (Size i = 0; i < spec1.size(); ++i)
    {
      for (Size j = 0; j < spec2.size(); ++j)
      {
        double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ());
        mid += fabs(pos1 - pos2);
      }
    }
    mid /= (spec1.size() * spec2.size());

    /* to manually retrace
        cout << mid << endl;
    */

    double var(0);
    for (Size i = 0; i < spec1.size(); ++i)
    {
      for (Size j = 0; j < spec2.size(); ++j)
      {
        double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ());
        var += (fabs(pos1 - pos2) - mid) * (fabs(pos1 - pos2) - mid);
      }
    }
    var /= (spec1.size() * spec2.size());

    /* to manually retrace
        cout << var << endl;
    */

    const double sigma(sqrt(var));

    /* to manually retrace
        cout << sigma << endl;
    */


    //fill alignment matrix
    for (Size i = 1; i < spec1.size() + 1; ++i)
    {
      for (Size j = 1; j < spec2.size() + 1; ++j)
      {
        double pos1(spec1[i - 1].getMZ()), pos2(spec2[j - 1].getMZ());
        //only if peaks are in reasonable proximity alignment is considered else only gaps
        if (fabs(pos1 - pos2) <= epsilon)
        {
          // actual cell = max(upper left cell+score, left cell-gap, upper cell-gap)
          double from_left(matrix.getValue(i, j - 1) - gap);
          double from_above(matrix.getValue(i - 1, j) - gap);
          double int1(spec1[i - 1].getIntensity()), int2(spec2[j - 1].getIntensity());
          double from_diagonal(matrix.getValue(i - 1, j - 1) + peakPairScore_(pos1, int1, pos2, int2, sigma));
          matrix.setValue(i, j, max(from_left, max(from_above, from_diagonal)));

          // TODO the cases where all or two values are equal
          if (from_diagonal > from_left && from_diagonal > from_above)
          {
            traceback.setValue(i - 1, j - 1, 1);
          }
          else
          {
            if (from_left > from_diagonal && from_left > from_above)
            {
              traceback.setValue(i - 1, j - 1, 0);
            }
            else
            {
              if (from_above > from_diagonal && from_above > from_left)
              {
                traceback.setValue(i - 1, j - 1, 2);
              }
            }
          }
        }
        else
        {
          // actual cell = max(left cell-gap, upper cell-gap)
          double from_left(matrix.getValue(i, j - 1) - gap);
          double from_above(matrix.getValue(i - 1, j) - gap);
          matrix.setValue(i, j, max(from_left, from_above));
          if (from_left > from_above)
          {
            traceback.setValue(i - 1, j - 1, 0);
          }
          else           //from_left <= from_above
          {
            traceback.setValue(i - 1, j - 1, 2);
          }
        }
      }
    }
    //return track from best alloverscore to 0,0
    vector<pair<Size, Size> > ret_val;

    //get matrix coordinates from best alloverscore
    Size row_index(0), col_index(0);
    double best_score(numeric_limits<double>::min());
    for (Size i = 0; i < matrix.cols(); i++)
    {
      if (best_score < matrix.getValue(matrix.rows() - 1, i))
      {
        best_score = matrix.getValue(matrix.rows() - 1, i);
        row_index = matrix.rows() - 1;
        col_index = i;
      }
    }
    for (Size i = 0; i < matrix.rows(); i++)
    {
      if (best_score < matrix.getValue(i, matrix.cols() - 1))
      {
        best_score = matrix.getValue(i, matrix.cols() - 1);
        row_index = i;
        col_index = matrix.cols() - 1;
      }
    }

    // TODO check the invariant!
    while (row_index > 0 && col_index > 0)
    {
      //from diagonal - peaks aligned
      if (traceback.getValue(row_index - 1, col_index - 1) == 1)
      {
        //register aligned peaks only
        ret_val.insert(ret_val.begin(), pair<Size, Size>(row_index - 1, col_index - 1));
        row_index = row_index - 1;
        col_index = col_index - 1;
      }
      // gap alignment
      else if (traceback.getValue(row_index - 1, col_index - 1) == 0)
      {
        col_index = col_index - 1;
      }
      else
      {
        row_index = row_index - 1;
      }
    }

    /* to manually retrace
    cout << endl << matrix << endl << traceback << endl;
    */

    return ret_val;
  }
Ejemplo n.º 26
0
 PeptideHit AScore::compute(const PeptideHit & hit, PeakSpectrum & real_spectrum, double fragment_mass_tolerance, bool fragment_mass_unit_ppm, Size max_peptide_len, Size max_num_perm)
 {
   PeptideHit phospho = hit;
   
   //reset phospho
   phospho.setScore(-1);
   if (real_spectrum.empty())
   {
     return phospho;
   }
   
   String sequence_str = phospho.getSequence().toString();
   
   Size number_of_phosphorylation_events = numberOfPhosphoEvents_(sequence_str);
   AASequence seq_without_phospho = removePhosphositesFromSequence_(sequence_str);
   
   if (seq_without_phospho.toUnmodifiedString().size() > max_peptide_len)
   {
     LOG_DEBUG << "\tcalculation aborted: peptide too long: " << seq_without_phospho.toString() << std::endl;
     return phospho;
   }
   
   // determine all phospho sites
   vector<Size> sites(getSites_(seq_without_phospho));
   Size number_of_STY = sites.size();
   
   if (number_of_phosphorylation_events == 0 || number_of_STY == 0 || number_of_STY == number_of_phosphorylation_events)
   {
     return phospho;
   }
   
   vector<vector<Size> > permutations(computePermutations_(sites, (Int)number_of_phosphorylation_events));
   LOG_DEBUG << "\tnumber of permutations: " << permutations.size() << std::endl;
   
   // TODO: using a heuristic to calculate the best phospho sites if the number of permutations are exceeding the maximum.
   // A heuristic could be to calculate the best site for the first phosphorylation and based on this the best site for the second 
   // phosphorylation and so on until every site is determined
   if (permutations.size() > max_num_perm) 
   {
     LOG_DEBUG << "\tcalculation aborted: number of permutations exceeded" << std::endl;
     return phospho;
   }
     
   vector<PeakSpectrum> th_spectra(createTheoreticalSpectra_(permutations, seq_without_phospho));
   
   // prepare real spectrum windows
   if (!real_spectrum.isSorted())
   {
     real_spectrum.sortByPosition();
   }
   vector<PeakSpectrum> windows_top10(peakPickingPerWindowsInSpectrum_(real_spectrum));
   
   // calculate peptide score for each possible phospho site permutation
   vector<vector<double> > peptide_site_scores(calculatePermutationPeptideScores_(th_spectra, windows_top10, fragment_mass_tolerance, fragment_mass_unit_ppm));
   
   // rank peptide permutations ascending
   multimap<double, Size> ranking(rankWeightedPermutationPeptideScores_(peptide_site_scores));
   
   multimap<double, Size>::reverse_iterator rev = ranking.rbegin();
   String seq1 = th_spectra[rev->second].getName();
   phospho.setSequence(AASequence::fromString(seq1));
   phospho.setMetaValue("search_engine_sequence", hit.getSequence().toString());
   
   double peptide1_score = rev->first;
   phospho.setMetaValue("AScore_pep_score", peptide1_score); // initialize score with highest peptide score (aka highest weighted score)
   
   ++rev;
   String seq2 = th_spectra[rev->second].getName();
   double peptide2_score = rev->first;
   
   vector<ProbablePhosphoSites> phospho_sites;
   determineHighestScoringPermutations_(peptide_site_scores, phospho_sites, permutations, ranking);
   
   Int rank = 1;
   double best_Ascore = std::numeric_limits<double>::max(); // the lower the better
   for (vector<ProbablePhosphoSites>::iterator s_it = phospho_sites.begin(); s_it != phospho_sites.end(); ++s_it)
   {
     double Ascore = 0;
     if (peptide1_score == peptide2_score) // set Ascore = 0 for each phosphorylation site
     {
       LOG_DEBUG << "\tscore of best (" << seq1 << ") and second best peptide (" << seq2 << ") are equal (" << peptide1_score << ")" << std::endl;
     }
     else
     {
       vector<PeakSpectrum> site_determining_ions;
       
       computeSiteDeterminingIons_(th_spectra, *s_it, site_determining_ions, fragment_mass_tolerance, fragment_mass_unit_ppm);
       Size N = site_determining_ions[0].size(); // all possibilities have the same number so take the first one
       double p = static_cast<double>(s_it->peak_depth) / 100.0;
       
       Size n_first = 0; // number of matching peaks for first peptide
       for (Size window_idx = 0; window_idx != windows_top10.size(); ++window_idx) // for each 100 m/z window
       {
         n_first += numberOfMatchedIons_(site_determining_ions[0], windows_top10[window_idx], s_it->peak_depth, fragment_mass_tolerance, fragment_mass_unit_ppm);        
       }
       double P_first = computeCumulativeScore_(N, n_first, p);
       
       Size n_second = 0; // number of matching peaks for second peptide
       for (Size window_idx = 0; window_idx <  windows_top10.size(); ++window_idx) //each 100 m/z window
       {
         n_second += numberOfMatchedIons_(site_determining_ions[1], windows_top10[window_idx], s_it->peak_depth, fragment_mass_tolerance, fragment_mass_unit_ppm);        
       }
       Size N2 = site_determining_ions[1].size(); // all possibilities have the same number so take the first one
       double P_second = computeCumulativeScore_(N2, n_second, p);
       
       //abs is used to avoid -0 score values
       double score_first = abs(-10 * log10(P_first));
       double score_second = abs(-10 * log10(P_second));
       
       LOG_DEBUG << "\tfirst - N: " << N << ",p: " << p << ",n: " << n_first << ", score: " << score_first << std::endl;
       LOG_DEBUG << "\tsecond - N: " << N2 << ",p: " << p << ",n: " << n_second << ", score: " << score_second << std::endl;
       
       Ascore = score_first - score_second;
       LOG_DEBUG << "\tAscore_" << rank << ": " << Ascore << std::endl;
     }
     if (Ascore < best_Ascore)
     {
       best_Ascore = Ascore;
     }
     phospho.setMetaValue("AScore_" + String(rank), Ascore);
     ++rank;      
   }
   phospho.setScore(best_Ascore);
   return phospho;
 }
Ejemplo n.º 27
0
  double ZhangSimilarityScore::operator()(const PeakSpectrum & s1, const PeakSpectrum & s2) const
  {
    const double tolerance = (double)param_.getValue("tolerance");
    bool use_linear_factor = param_.getValue("use_linear_factor").toBool();
    bool use_gaussian_factor = param_.getValue("use_gaussian_factor").toBool();
    double score(0), sum(0), sum1(0), sum2(0) /*, squared_sum1(0), squared_sum2(0)*/;

    // TODO remove parameter 
    if (param_.getValue("is_relative_tolerance").toBool() )
    {
      throw Exception::NotImplemented(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION);
    }

    for (PeakSpectrum::ConstIterator it1 = s1.begin(); it1 != s1.end(); ++it1)
    {
      sum1 += it1->getIntensity();
      /*
for (PeakSpectrum::ConstIterator it2 = s1.begin(); it2 != s1.end(); ++it2)
{
  if (abs(it1->getPosition()[0] - it2->getPosition()[0]) <= 2 * tolerance)
  {
    squared_sum1 += it1->getIntensity() * it2->getIntensity();
  }
}*/
    }

/*
        UInt i_left(0);
        for (Size i = 0; i != s1.size(); ++i)
        {
            sum1 += s1[i].getIntensity();
            for (Size j = i_left; j != s1.size(); ++j)
            {
                double pos1(s1[i].getPosition()[0]), pos2(s1[j].getPosition()[0]);
                if (abs(pos1 - pos2) <= 2 * tolerance)
                {
                    squared_sum1 += s1[i].getIntensity() * s1[j].getIntensity();
                }
                else
                {
                    if (pos2 > pos1)
                    {
                        break;
                    }
                    else
                    {
                        i_left = i;
                    }
                }
            }
        }*/

/*
    i_left = 0;
    for (Size i = 0; i != s2.size(); ++i)
    {
      sum2 += s2[i].getIntensity();
      for (Size j = i_left; j != s2.size(); ++j)
      {
        double pos1(s2[i].getPosition()[0]), pos2(s2[j].getPosition()[0]);
        if (abs(pos1 - pos2) <= 2 * tolerance)
        {
          squared_sum1 += s2[i].getIntensity() * s2[j].getIntensity();
        }
        else
        {
          if (pos2 > pos1)
          {
            break;
          }
          else
          {
            i_left = i;
          }
        }
      }
    }*/

    for (PeakSpectrum::ConstIterator it1 = s2.begin(); it1 != s2.end(); ++it1)
    {
      sum2 += it1->getIntensity();
      /*
for (PeakSpectrum::ConstIterator it2 = s2.begin(); it2 != s2.end(); ++it2)
{
  if (abs(it1->getPosition()[0] - it2->getPosition()[0]) <= 2 * tolerance)
  {
    squared_sum2 += it1->getIntensity() * it2->getIntensity();
  }
}
      */
    }

    Size j_left(0);
    for (Size i = 0; i != s1.size(); ++i)
    {
      for (Size j = j_left; j != s2.size(); ++j)
      {
        double pos1(s1[i].getMZ()), pos2(s2[j].getMZ());
        if (fabs(pos1 - pos2) < tolerance)
        {
          //double factor((tolerance - fabs(pos1 - pos2)) / tolerance);
          double factor = 1.0;

          if (use_linear_factor || use_gaussian_factor)
          {
            factor = getFactor_(tolerance, fabs(pos1 - pos2), use_gaussian_factor);
          }
          sum += sqrt(s1[i].getIntensity() * s2[j].getIntensity() * factor);
        }
        else
        {
          if (pos2 > pos1)
          {
            break;
          }
          else
          {
            j_left = j;
          }
        }
      }
    }


    /*
for (PeakSpectrum::ConstIterator it1 = s1.begin(); it1 != s1.end(); ++it1)
{
  for (PeakSpectrum::ConstIterator it2 = s2.begin(); it2 != s2.end(); ++it2)
  {
    if (abs(it1->getPosition()[0] - it2->getPosition()[0]) <= 2 * tolerance)
    {
      sum += sqrt(it1->getIntensity() * it2->getIntensity());
    }
  }
}*/

    score = sum / (sqrt(sum1 * sum2));

    return score;

  }
Ejemplo n.º 28
0
START_SECTION((BernNorm(const BernNorm& source)))
	BernNorm copy(*e_ptr);
	TEST_EQUAL(copy.getParameters(), e_ptr->getParameters())
	TEST_EQUAL(copy.getName(), e_ptr->getName())
END_SECTION

START_SECTION((BernNorm& operator=(const BernNorm& source)))
	BernNorm copy;
	copy = *e_ptr;
	TEST_EQUAL(copy.getParameters(), e_ptr->getParameters())
	TEST_EQUAL(copy.getName(), e_ptr->getName())
END_SECTION

START_SECTION((template<typename SpectrumType> void filterSpectrum(SpectrumType& spectrum)))
	DTAFile dta_file;
	PeakSpectrum spec;
	dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec);

	TEST_EQUAL(spec.size(), 121)

	e_ptr->filterSpectrum(spec);
	
	TEST_EQUAL(spec.size(), 121)

	Param p(e_ptr->getParameters());
	p.setValue("C2", 2000.0);
	e_ptr->setParameters(p);
	e_ptr->filterSpectrum(spec);

	TEST_EQUAL(spec.size(), 28)
Ejemplo n.º 29
0
e_ptr = new SqrtMower();

START_SECTION((SqrtMower(const SqrtMower& source)))
	SqrtMower copy(*e_ptr);
	TEST_EQUAL(*e_ptr == copy, true)
END_SECTION

START_SECTION((SqrtMower& operator=(const SqrtMower& source)))
	SqrtMower copy;
	copy = *e_ptr;
	TEST_EQUAL(*e_ptr == copy, true)
END_SECTION

START_SECTION((template<typename SpectrumType> void filterSpectrum(SpectrumType& spectrum)))
	DTAFile dta_file;
	PeakSpectrum spec;
	dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec);
	
	TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), 37.5)

	e_ptr->filterSpectrum(spec);
	TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), sqrt(37.5))
END_SECTION

START_SECTION((void filterPeakMap(PeakMap& exp)))
	DTAFile dta_file;
  PeakSpectrum spec;
	dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec);

	PeakMap pm;
	pm.addSpectrum(spec);
START_SECTION(~TheoreticalSpectrumGenerator())
  delete ptr;
END_SECTION

ptr = new TheoreticalSpectrumGenerator();
AASequence peptide = AASequence::fromString("IFSQVGK");

START_SECTION(TheoreticalSpectrumGenerator& operator = (const TheoreticalSpectrumGenerator& tsg))
  TheoreticalSpectrumGenerator copy;
  copy = *ptr;
  TEST_EQUAL(copy.getParameters(), ptr->getParameters())
END_SECTION

START_SECTION(void getSpectrum(PeakSpectrum& spec, const AASequence& peptide, Int min_charge = 1, Int max_charge = 1))
  PeakSpectrum spec;
  ptr->getSpectrum(spec, peptide, 1, 1);
  TEST_EQUAL(spec.size(), 11)

  TOLERANCE_ABSOLUTE(0.001)

  double result[] = {/*114.091,*/ 147.113, 204.135, 261.16, 303.203, 348.192, 431.262, 476.251, 518.294, 575.319, 632.341, 665.362};
  for (Size i = 0; i != spec.size(); ++i)
  {
    TEST_REAL_SIMILAR(spec[i].getPosition()[0], result[i])
  }

  spec.clear(true);
  ptr->getSpectrum(spec, peptide, 1, 2);
  TEST_EQUAL(spec.size(), 22)