void CompNovoIdentificationBase::getCIDSpectrumLight_(PeakSpectrum & spec, const String & sequence, DoubleReal prefix, DoubleReal suffix) { static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight(); Peak1D p; DoubleReal b_pos(0.0 + prefix); DoubleReal y_pos(h2o_mass + suffix); for (Size i = 0; i != sequence.size() - 1; ++i) { char aa(sequence[i]); b_pos += aa_to_weight_[aa]; char aa2(sequence[sequence.size() - i - 1]); y_pos += aa_to_weight_[aa2]; if (b_pos > min_mz_ && b_pos < max_mz_) { p.setPosition(b_pos + Constants::PROTON_MASS_U); p.setIntensity(1.0f); spec.push_back(p); } if (y_pos > min_mz_ && y_pos < max_mz_) { p.setPosition(y_pos + Constants::PROTON_MASS_U); p.setIntensity(1.0f); spec.push_back(p); } } spec.sortByPosition(); return; }
std::vector<PeakSpectrum> AScore::peakPickingPerWindowsInSpectrum_(PeakSpectrum &real_spectrum) const { vector<PeakSpectrum> windows_top10; double spect_lower_bound = floor(real_spectrum.front().getMZ() / 100) * 100; double spect_upper_bound = ceil(real_spectrum.back().getMZ() / 100) * 100; Size number_of_windows = static_cast<Size>(ceil((spect_upper_bound - spect_lower_bound) / 100)); windows_top10.resize(number_of_windows); PeakSpectrum::Iterator it_current_peak = real_spectrum.begin(); Size window_upper_bound(spect_lower_bound + 100); for (Size current_window = 0; current_window < number_of_windows; ++current_window) { PeakSpectrum real_window; while (((*it_current_peak).getMZ() <= window_upper_bound) && (it_current_peak < real_spectrum.end())) { real_window.push_back(*it_current_peak); ++it_current_peak; } real_window.sortByIntensity(true); for (Size i = 0; (i < 10) & (i < real_window.size()); ++i) { windows_top10[current_window].push_back(real_window[i]); } window_upper_bound += 100; } return windows_top10; }
double SpectrumAlignmentScore::operator()(const PeakSpectrum & s1, const PeakSpectrum & s2) const { const double tolerance = (double)param_.getValue("tolerance"); bool is_relative_tolerance = param_.getValue("is_relative_tolerance").toBool(); bool use_linear_factor = param_.getValue("use_linear_factor").toBool(); bool use_gaussian_factor = param_.getValue("use_gaussian_factor").toBool(); if (use_linear_factor && use_gaussian_factor) { cerr << "Warning: SpectrumAlignmentScore, use either 'use_linear_factor' or 'use_gaussian_factor'!" << endl; } SpectrumAlignment aligner; Param p; p.setValue("tolerance", tolerance); p.setValue("is_relative_tolerance", (String)param_.getValue("is_relative_tolerance")); aligner.setParameters(p); vector<pair<Size, Size> > alignment; aligner.getSpectrumAlignment(alignment, s1, s2); double score(0), sum(0), sum1(0), sum2(0); for (PeakSpectrum::ConstIterator it1 = s1.begin(); it1 != s1.end(); ++it1) { sum1 += it1->getIntensity() * it1->getIntensity(); } for (PeakSpectrum::ConstIterator it1 = s2.begin(); it1 != s2.end(); ++it1) { sum2 += it1->getIntensity() * it1->getIntensity(); } for (vector<pair<Size, Size> >::const_iterator it = alignment.begin(); it != alignment.end(); ++it) { //double factor(0.0); //factor = (epsilon - fabs(s1[it->first].getPosition()[0] - s2[it->second].getPosition()[0])) / epsilon; double mz_tolerance(tolerance); if (is_relative_tolerance) { mz_tolerance = mz_tolerance * s1[it->first].getPosition()[0] / 1e6; } double mz_difference(fabs(s1[it->first].getPosition()[0] - s2[it->second].getPosition()[0])); double factor = 1.0; if (use_linear_factor || use_gaussian_factor) { factor = getFactor_(mz_tolerance, mz_difference, use_gaussian_factor); } sum += sqrt(s1[it->first].getIntensity() * s2[it->second].getIntensity() * factor); } score = sum / (sqrt(sum1 * sum2)); return score; }
double XQuestScores::xCorrelationPrescore(const PeakSpectrum & spec1, const PeakSpectrum & spec2, double tolerance) { // return 0 = no correlation, when one of the spectra is empty if (spec1.size() == 0 || spec2.size() == 0) { return 0.0; } double maxionsize = std::max(spec1[spec1.size()-1].getMZ(), spec2[spec2.size()-1].getMZ()); Int table_size = ceil(maxionsize / tolerance)+1; std::vector< double > ion_table1(table_size, 0); std::vector< double > ion_table2(table_size, 0); // Build tables of the same size, each bin has the size of the tolerance for (Size i = 0; i < spec1.size(); ++i) { Size pos = static_cast<Size>(ceil(spec1[i].getMZ() / tolerance)); ion_table1[pos] = 1; } for (Size i = 0; i < spec2.size(); ++i) { Size pos =static_cast<Size>(ceil(spec2[i].getMZ() / tolerance)); ion_table2[pos] = 1; } double dot_product = 0.0; for (Size i = 0; i < ion_table1.size(); ++i) { dot_product += ion_table1[i] * ion_table2[i]; } // determine the smaller spectrum and normalize by the number of peaks in it double peaks = std::min(spec1.size(), spec2.size()); return dot_product / peaks; }
void SuffixArrayPeptideFinder::getCandidates(vector<vector<pair<SuffixArrayPeptideFinder::FASTAEntry, String> > >& candidates, const String& DTA_file) { DTAFile dta_file; PeakSpectrum s; dta_file.load(DTA_file, s); s.sortByPosition(); PeakSpectrum::ConstIterator it(s.begin()); vector<double> spec; for (; it != s.end(); ++it) { spec.push_back(it->getPosition()[0]); } const vector<double> specc(spec); getCandidates(candidates, specc); return; }
map<Size, PeakSpectrum > PScore::calculatePeakLevelSpectra(const PeakSpectrum& spec, const vector<Size>& ranks, Size min_level, Size max_level) { map<Size, MSSpectrum<Peak1D> > peak_level_spectra; if (spec.empty()) return peak_level_spectra; // loop over all peaks and associated (zero-based) ranks for (Size i = 0; i != ranks.size(); ++i) { // start at the highest (less restrictive) level for (int j = static_cast<int>(max_level); j >= static_cast<int>(min_level); --j) { // if the current peak is annotated to have lower or equal rank then allowed for this peak level add it if (static_cast<int>(ranks[i]) <= j) { peak_level_spectra[j].push_back(spec[i]); } else { // if the current peak has higher rank than the current level then all it is also to high for the lower levels break; } } } return peak_level_spectra; }
void CompNovoIdentificationBase::windowMower_(PeakSpectrum & spec, DoubleReal windowsize, Size no_peaks) { PeakSpectrum copy(spec); vector<Peak1D> to_be_deleted; for (Size i = 0; i < spec.size(); ++i) { PeakSpectrum sub_spec; bool end(false); for (Size j = i; spec[j].getPosition()[0] - spec[i].getPosition()[0] < windowsize; ) { sub_spec.push_back(spec[j]); if (++j == spec.size()) { end = true; break; } } sub_spec.sortByIntensity(true); for (Size k = no_peaks; k < sub_spec.size(); ++k) { Peak1D p(sub_spec[k]); to_be_deleted.push_back(p); } if (end) { break; } } spec.clear(false); for (PeakSpectrum::ConstIterator it = copy.begin(); it != copy.end(); ++it) { if (find(to_be_deleted.begin(), to_be_deleted.end(), *it) == to_be_deleted.end()) { spec.push_back(*it); } } spec.sortByPosition(); }
double XQuestScores::logOccupancyProb(const PeakSpectrum& theoretical_spec, const Size matched_size, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm) { using boost::math::binomial; Size theo_size = theoretical_spec.size(); if (matched_size < 1 || theo_size < 1) { return 0; } double range; double used_tolerance; if (fragment_mass_tolerance_unit_ppm) { range = std::log(theoretical_spec.back().getMZ()) - std::log(theoretical_spec[0].getMZ()); used_tolerance = fragment_mass_tolerance / 1e6; } else { range = theoretical_spec.back().getMZ() - theoretical_spec[0].getMZ(); used_tolerance = fragment_mass_tolerance; } // A priori probability of a random match given info about the theoretical spectrum double a_priori_p = 0; a_priori_p = 1 - pow(1 - 2 * used_tolerance / range, static_cast<double>(theo_size)); double log_occu_prob = 0; binomial flip(theo_size, a_priori_p); // min double number to avoid 0 values, causing scores with the value "inf" log_occu_prob = -log(1 - cdf(flip, matched_size) + std::numeric_limits<double>::min()); // score lower than 0 does not make sense, but can happen, if cfd = 0, then -log( 1 + <double>::min() ) < 0 if (log_occu_prob >= 0.0) { return log_occu_prob; } else // underflow warning? { return 0; } }
std::vector< double > XQuestScores::xCorrelation(const PeakSpectrum & spec1, const PeakSpectrum & spec2, Int maxshift, double tolerance) { // generate vector of results, filled with zeroes std::vector< double > results(maxshift * 2 + 1, 0); // return 0 = no correlation, when one of the spectra is empty if (spec1.size() == 0 || spec2.size() == 0) { return results; } double maxionsize = std::max(spec1[spec1.size()-1].getMZ(), spec2[spec2.size()-1].getMZ()); Int table_size = ceil(maxionsize / tolerance)+1; std::vector< double > ion_table1(table_size, 0); std::vector< double > ion_table2(table_size, 0); // Build tables of the same size, each bin has the size of the tolerance for (Size i = 0; i < spec1.size(); ++i) { Size pos = static_cast<Size>(ceil(spec1[i].getMZ() / tolerance)); ion_table1[pos] = 10.0; } for (Size i = 0; i < spec2.size(); ++i) { Size pos =static_cast<Size>(ceil(spec2[i].getMZ() / tolerance)); ion_table2[pos] = 10.0; } // Compute means double mean1 = (std::accumulate(ion_table1.begin(), ion_table1.end(), 0.0)) / table_size; double mean2 = (std::accumulate(ion_table2.begin(), ion_table2.end(), 0.0)) / table_size; // Compute denominator double s1 = 0; double s2 = 0; for (Int i = 0; i < table_size; ++i) { s1 += pow((ion_table1[i] - mean1), 2); s2 += pow((ion_table2[i] - mean2), 2); } double denom = sqrt(s1 * s2); // Calculate correlation for each shift for (Int shift = -maxshift; shift <= maxshift; ++shift) { double s = 0; for (Int i = 0; i < table_size; ++i) { Int j = i + shift; if ( (j >= 0) && (j < table_size)) { s += (ion_table1[i] - mean1) * (ion_table2[j] - mean2); } } if (denom > 0) { results[shift + maxshift] = s / denom; } } return results; }
/** @brief Similarity pairwise score This function return the similarity score of two spectra based on SteinScott. @param s1 const PeakSpectrum Spectrum 1 @param s2 const PeakSpectrum Spectrum 2 @see SteinScottImproveScore() */ double SteinScottImproveScore::operator()(const PeakSpectrum & s1, const PeakSpectrum & s2) const { const double epsilon = (double)param_.getValue("tolerance"); const double constant = epsilon / 10000; //const double c(0.0004); double score(0), sum(0), sum1(0), sum2(0), sum3(0), sum4(0); /* std::cout << s1 << std::endl; std::cout << std::endl; std::cout << s2 << std::endl;*/ for (PeakSpectrum::ConstIterator it1 = s1.begin(); it1 != s1.end(); ++it1) { double temp = it1->getIntensity(); sum1 += temp * temp; sum3 += temp; } for (PeakSpectrum::ConstIterator it1 = s2.begin(); it1 != s2.end(); ++it1) { double temp = it1->getIntensity(); sum2 += temp * temp; sum4 += temp; } double z = constant * (sum3 * sum4); Size j_left(0); for (Size i = 0; i != s1.size(); ++i) { for (Size j = j_left; j != s2.size(); ++j) { double pos1(s1[i].getMZ()), pos2(s2[j].getMZ()); if (std::abs(pos1 - pos2) <= 2 * epsilon) { sum += s1[i].getIntensity() * s2[j].getIntensity(); } else { if (pos2 > pos1) { break; } else { j_left = j; } } } } //std::cout<< sum << " Sum " << z << " z " << std::endl; score = (sum - z) / (std::sqrt((sum1 * sum2))); // std::cout<<score<< " score" << std::endl; if (score < (float)param_.getValue("threshold")) { score = 0; } return score; }
double XQuestScores::matchOddsScore(const PeakSpectrum& theoretical_spec, const Size matched_size, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm, bool is_xlink_spectrum, Size n_charges) { using boost::math::binomial; Size theo_size = theoretical_spec.size(); if (matched_size < 1 || theo_size < 1) { return 0; } double range = theoretical_spec[theo_size-1].getMZ() - theoretical_spec[0].getMZ(); // Compute fragment tolerance in Da for the mean of MZ values, if tolerance in ppm (rough approximation) double mean = 0.0; for (Size i = 0; i < theo_size; ++i) { mean += theoretical_spec[i].getMZ(); } mean = mean / theo_size; double tolerance_Th = fragment_mass_tolerance_unit_ppm ? mean * 1e-6 * fragment_mass_tolerance : fragment_mass_tolerance; // A priori probability of a random match given info about the theoretical spectrum double a_priori_p = 0; if (is_xlink_spectrum) { a_priori_p = (1 - ( pow( (1 - 2 * tolerance_Th / (0.5 * range)), (static_cast<double>(theo_size) / static_cast<double>(n_charges))))); } else { a_priori_p = (1 - ( pow( (1 - 2 * tolerance_Th / (0.5 * range)), static_cast<int>(theo_size)))); } double match_odds = 0; binomial flip(theo_size, a_priori_p); // min double number to avoid 0 values, causing scores with the value "inf" match_odds = -log(1 - cdf(flip, matched_size) + std::numeric_limits<double>::min()); // score lower than 0 does not make sense, but can happen if cfd = 0, -log( 1 + min() ) < 0 if (match_odds >= 0.0) { return match_odds; } else { return 0; } }
// s1 should be the original spectrum DoubleReal CompNovoIdentificationBase::compareSpectra_(const PeakSpectrum & s1, const PeakSpectrum & s2) { DoubleReal score(0.0); PeakSpectrum::ConstIterator it1 = s1.begin(); PeakSpectrum::ConstIterator it2 = s2.begin(); Size num_matches(0); while (it1 != s1.end() && it2 != s2.end()) { DoubleReal pos1(it1->getPosition()[0]), pos2(it2->getPosition()[0]); if (fabs(pos1 - pos2) < fragment_mass_tolerance_) { score += it1->getIntensity(); ++num_matches; } if (pos1 <= pos2) { ++it1; } else { ++it2; } } if (num_matches == 0) { return 0; } score /= sqrt((DoubleReal)num_matches); return score; }
Size AScore::numberOfMatchedIons_(const PeakSpectrum & th, const PeakSpectrum & window, Size depth, double fragment_mass_tolerance, bool fragment_mass_tolerance_ppm) const { PeakSpectrum window_reduced = window; if (window_reduced.size() > depth) { window_reduced.resize(depth); } window_reduced.sortByPosition(); Size n = 0; for (Size i = 0; i < th.size(); ++i) { Size nearest_peak = -1; try { nearest_peak = window_reduced.findNearest(th[i].getMZ()); } catch (Exception::Precondition) {} if (nearest_peak < window_reduced.size()) { double window_mz = window_reduced[nearest_peak].getMZ(); double error = abs(window_mz - th[i].getMZ()); if (fragment_mass_tolerance_ppm) { error = error / window_mz * 1e6; } if (error < fragment_mass_tolerance) { ++n; } } } return n; }
void CompNovoIonScoring::scoreETDFeatures_(Size /*charge*/, double precursor_weight, Map<double, IonScore> & ion_scores, const PeakSpectrum & CID_spec, const PeakSpectrum & ETD_spec) { //double fragment_mass_tolerance((double)param_.getValue("fragment_mass_tolerance")); Size max_isotope_to_score(param_.getValue("max_isotope_to_score")); for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1) { double pos1(it1->getPosition()[0]); double b_sum(0.0), y_sum(0.0); // score a-ions for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2) { double pos2(it2->getPosition()[0]); if (fabs(pos1 - pos2 - 28.0) < fragment_mass_tolerance_) { double factor((fragment_mass_tolerance_ - fabs(pos1 - pos2 - 28.0)) / fragment_mass_tolerance_); #ifdef SCORE_ETDFEATURES_DEBUG cerr << "scoreETDFeatures: found a-ion " << pos1 << " (" << pos2 << ") (factor=" << factor << ") " << b_sum << " -> "; #endif b_sum += it2->getIntensity() * factor; #ifdef SCORE_ETDFEATURES_DEBUG cerr << endl; #endif } } for (PeakSpectrum::ConstIterator it2 = ETD_spec.begin(); it2 != ETD_spec.end(); ++it2) { double pos2(it2->getPosition()[0]); // check if pos2 is precursor doubly charged, which has not fragmented double pre_diff_lower = (precursor_weight + Constants::PROTON_MASS_U) / 2.0 - fragment_mass_tolerance_; double pre_diff_upper = (precursor_weight + 4.0 * Constants::PROTON_MASS_U) / 2.0 + fragment_mass_tolerance_; if (pos2 > pre_diff_lower && pos2 < pre_diff_upper) { #ifdef SCORE_ETDFEATURES_DEBUG cerr << "scoreETDFeatures: pre-range: " << pos2 << " is in precursor peak range: " << pre_diff_lower << " <-> " << pre_diff_upper << endl; #endif continue; } //double diff(pos2 - pos1); // pos1 is CID ion; pos2 is ETD ion // pos1 b-ion, pos2 c-ion if (fabs(pos1 + 17.0 - pos2) < fragment_mass_tolerance_) { // now test if the ETD peak has "isotope" pattern double factor((fragment_mass_tolerance_ - fabs(pos1 + 17.0 - pos2)) / fragment_mass_tolerance_); #ifdef SCORE_ETDFEATURES_DEBUG cerr << "scoreETDFeatures: is b-ion: " << pos1 << " (" << pos2 << ") (factor=" << factor << ") " << b_sum << " -> "; #endif vector<double> iso_pattern; iso_pattern.push_back(it1->getIntensity()); double actual_pos = it1->getPosition()[0]; for (PeakSpectrum::ConstIterator it3 = it2; it3 != ETD_spec.end(); ++it3) { double it3_pos(it3->getPosition()[0]); if (fabs(fabs(actual_pos - it3_pos) - Constants::NEUTRON_MASS_U) < fragment_mass_tolerance_) { iso_pattern.push_back(it3->getIntensity()); actual_pos = it3_pos; } if (iso_pattern.size() == max_isotope_to_score) { break; } } if (ion_scores[it1->getPosition()[0]].is_isotope_1_mono != -1) { b_sum += it2->getIntensity() * iso_pattern.size() * factor; } #ifdef SCORE_ETDFEATURES_DEBUG cerr << b_sum << endl; #endif } // pos1 z-ion, pos2 y-ion if (fabs(pos2 + 16.0 - pos1) < fragment_mass_tolerance_) { double factor((fragment_mass_tolerance_ - fabs(pos2 + 16.0 - pos1)) / fragment_mass_tolerance_); // now test if the ETD peak has "isotope" pattern #ifdef SCORE_ETDFEATURES_DEBUG cerr << "scoreETDFeatures: is y-ion: " << pos1 << " (" << pos2 << ") (factor=" << factor << ") " << y_sum << " -> "; #endif vector<double> iso_pattern; iso_pattern.push_back(it1->getIntensity()); double actual_pos = it1->getPosition()[0]; for (PeakSpectrum::ConstIterator it3 = it2; it3 != ETD_spec.end(); ++it3) { double it3_pos(it3->getPosition()[0]); if (fabs(fabs(actual_pos - it3_pos) - Constants::NEUTRON_MASS_U) < fragment_mass_tolerance_) { iso_pattern.push_back(it3->getIntensity()); actual_pos = it3_pos; } if (iso_pattern.size() == max_isotope_to_score) { break; } } #ifdef SCORE_ETDFEATURES_DEBUG cerr << ion_scores[it1->getPosition()[0]].is_isotope_1_mono << " "; #endif if (ion_scores[it1->getPosition()[0]].is_isotope_1_mono != -1) { y_sum += it2->getIntensity() * iso_pattern.size() * factor; } #ifdef SCORE_ETDFEATURES_DEBUG cerr << y_sum << endl; #endif } } ion_scores[it1->getPosition()[0]].s_bion = b_sum; ion_scores[it1->getPosition()[0]].s_yion = y_sum; } return; }
void CompNovoIonScoring::scoreWitnessSet_(Size charge, double precursor_weight, Map<double, IonScore> & ion_scores, const PeakSpectrum & CID_spec) { vector<double> diffs; //diffs.push_back(28.0); diffs.push_back(17.0); diffs.push_back(18.0); // witnesses of CID spec (diffs) for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1) { //Size num_wit(0); double wit_score(0.0); double pos1(it1->getPosition()[0]); wit_score += it1->getIntensity(); for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2) { double pos2(it2->getPosition()[0]); // direct ++ if (charge > 1) { if (fabs(pos2 * 2 - Constants::PROTON_MASS_U - pos1) < fragment_mass_tolerance_) { double factor((fragment_mass_tolerance_ - fabs(pos2 * 2 - Constants::PROTON_MASS_U - pos1)) / fragment_mass_tolerance_); // pos1 is ion, pos2 is ++ion #ifdef SCORE_WITNESSSET_DEBUG cerr << "scoreWitnessSet: ++ion " << pos1 << " " << pos2 << " (factor=" << factor << ") " << wit_score << " -> "; #endif if (ion_scores[it2->getPosition()[0]].s_isotope_pattern_2 < 0.2) { wit_score += it2->getIntensity() * /* 0.2 */ factor; } else { wit_score += it2->getIntensity() * ion_scores[it2->getPosition()[0]].s_isotope_pattern_2 * factor; } #ifdef SCORE_WITNESSSET_DEBUG cerr << wit_score << endl; #endif } } // diffs? for (vector<double>::const_iterator it = diffs.begin(); it != diffs.end(); ++it) { // pos1 is ion, pos2 loss peak if (fabs(pos1 - pos2 - *it) < fragment_mass_tolerance_) { double factor((fragment_mass_tolerance_ - fabs(pos1 - pos2 - *it)) / fragment_mass_tolerance_); #ifdef SCORE_WITNESSSET_DEBUG cerr << "scoreWitnessSet: diff " << pos1 << " (" << pos2 << ") " << *it << " (factor=" << factor << ") " << wit_score << " -> "; #endif wit_score += it2->getIntensity() /* / 5.0*/ * factor; #ifdef SCORE_WITNESSSET_DEBUG cerr << wit_score << endl; #endif } } // is there a b-ion?; pos1 is ion, pos2 complementary ion if (fabs(pos1 + pos2 - 1 * Constants::PROTON_MASS_U - precursor_weight) < fragment_mass_tolerance_) { double factor((fragment_mass_tolerance_ - fabs(pos1 + pos2 - Constants::PROTON_MASS_U - precursor_weight)) / fragment_mass_tolerance_); /*factor *= 0.2;*/ #ifdef SCORE_WITNESSSET_DEBUG cerr << "scoreWitnessSet: complementary " << pos1 << " (" << pos2 << ") (factor=" << factor << ") " << wit_score << " -> "; #endif // found complementary ion if (ion_scores[it2->getPosition()[0]].s_isotope_pattern_1 < 0.5 || ion_scores[it2->getPosition()[0]].is_isotope_1_mono != 1) { wit_score += it2->getIntensity() /* * 0.5*/ * factor; } else { wit_score += it2->getIntensity() * ion_scores[it2->getPosition()[0]].s_isotope_pattern_1 * factor; } #ifdef SCORE_WITNESSSET_DEBUG cerr << wit_score << endl; #endif if (ion_scores[it2->getPosition()[0]].s_bion != 0) { #ifdef SCORE_WITNESSSET_DEBUG cerr << "scoreWitnessSet: complementary is b-ion " << pos1 << "(" << pos2 << ")" << wit_score << " -> "; #endif wit_score += ion_scores[it2->getPosition()[0]].s_bion * factor; #ifdef SCORE_WITNESSSET_DEBUG cerr << wit_score << endl; #endif } } } // isotope pattern ok? if (ion_scores[it1->getPosition()[0]].s_isotope_pattern_1 > 0 && ion_scores[it1->getPosition()[0]].is_isotope_1_mono == 1) { #ifdef SCORE_WITNESSSET_DEBUG cerr << "scoreWitnessSet: isotope pattern: " << pos1 << " " << wit_score << " -> "; #endif wit_score += ion_scores[it1->getPosition()[0]].s_isotope_pattern_1 * wit_score; #ifdef SCORE_WITNESSSET_DEBUG cerr << wit_score << endl; #endif } if (ion_scores[it1->getPosition()[0]].s_yion > 0) { #ifdef SCORE_WITNESSSET_DEBUG cerr << "scoreWitnessSet: is y-ion: " << pos1 << " " << wit_score << " -> "; #endif wit_score += ion_scores[it1->getPosition()[0]].s_yion; #ifdef SCORE_WITNESSSET_DEBUG cerr << wit_score << endl; #endif } if (ion_scores[it1->getPosition()[0]].s_bion > 0) { #ifdef SCORE_WITNESSSET_DEBUG cerr << "scoreWitnessSet: is b-ion: " << pos1 << " " << wit_score << " -> "; #endif if (ion_scores[it1->getPosition()[0]].s_bion < wit_score) { wit_score -= ion_scores[it1->getPosition()[0]].s_bion; } else { wit_score = 0; } } ion_scores[it1->getPosition()[0]].s_witness = wit_score; } return; }
START_SECTION((WindowMower(const WindowMower& source))) WindowMower copy(*e_ptr); TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) TEST_EQUAL(copy.getName(), e_ptr->getName()) END_SECTION START_SECTION((WindowMower& operator = (const WindowMower& source))) WindowMower copy; copy = *e_ptr; TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) TEST_EQUAL(copy.getName(), e_ptr->getName()) END_SECTION START_SECTION((template<typename SpectrumType> void filterPeakSpectrumForTopNInSlidingWindow(SpectrumType& spectrum))) DTAFile dta_file; PeakSpectrum spec; dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec); TEST_EQUAL(spec.size(), 121) Param p(e_ptr->getParameters()); p.setValue("windowsize", 50.0); // default p.setValue("peakcount", 2); // default p.setValue("movetype", "slide"); // default and not needed as we directly call sliding window function e_ptr->setParameters(p); e_ptr->filterPeakSpectrumForTopNInSlidingWindow(spec); TEST_EQUAL(spec.size(), 56) END_SECTION
String XQuestResultXMLFile::getxQuestBase64EncodedSpectrum_(const PeakSpectrum& spec, String header) { std::vector<String> in_strings; StringList sl; double precursor_mz = 0; double precursor_z = 0; if (spec.getPrecursors().size() > 0) { precursor_mz = Math::roundDecimal(spec.getPrecursors()[0].getMZ(), -9); precursor_z = spec.getPrecursors()[0].getCharge(); } // header lines if (!header.empty()) // common or xlinker spectrum will be reported { sl.push_back(header + "\n"); // e.g. GUA1372-S14-A-LRRK2_DSS_1A3.03873.03873.3.dta,GUA1372-S14-A-LRRK2_DSS_1A3.03863.03863.3.dta sl.push_back(String(precursor_mz) + "\n"); sl.push_back(String(precursor_z) + "\n"); } else // light or heavy spectrum will be reported { sl.push_back(String(precursor_mz) + "\t" + String(precursor_z) + "\n"); } PeakSpectrum::IntegerDataArray charges; if (spec.getIntegerDataArrays().size() > 0) { charges = spec.getIntegerDataArrays()[0]; } // write peaks for (Size i = 0; i != spec.size(); ++i) { String s; s += String(Math::roundDecimal(spec[i].getMZ(), -9)) + "\t"; s += String(spec[i].getIntensity()) + "\t"; if (charges.size() > 0) { s += String(charges[i]); } else { s += "0"; } s += "\n"; sl.push_back(s); } String out; out.concatenate(sl.begin(), sl.end(), ""); in_strings.push_back(out); String out_encoded; Base64().encodeStrings(in_strings, out_encoded, false, false); String out_wrapped; wrap_(out_encoded, 76, out_wrapped); return out_wrapped; }
void CompNovoIdentificationCID::getIdentification(PeptideIdentification & id, const PeakSpectrum & CID_spec) { //if (CID_spec.getPrecursors().begin()->getMZ() > 1000.0) //{ //cerr << "Weight of precursor has been estimated to exceed 2000.0 Da which is the current limit" << endl; //return; //} PeakSpectrum new_CID_spec(CID_spec); windowMower_(new_CID_spec, 0.3, 1); Param zhang_param; zhang_param = zhang_.getParameters(); zhang_param.setValue("tolerance", fragment_mass_tolerance_); zhang_param.setValue("use_gaussian_factor", "true"); zhang_param.setValue("use_linear_factor", "false"); zhang_.setParameters(zhang_param); Normalizer normalizer; Param n_param(normalizer.getParameters()); n_param.setValue("method", "to_one"); normalizer.setParameters(n_param); normalizer.filterSpectrum(new_CID_spec); Size charge(2); double precursor_weight(0); // [M+H]+ if (!CID_spec.getPrecursors().empty()) { // believe charge of spectrum? if (CID_spec.getPrecursors().begin()->getCharge() != 0) { charge = CID_spec.getPrecursors().begin()->getCharge(); } else { // TODO estimate charge state } precursor_weight = CID_spec.getPrecursors().begin()->getMZ() * charge - ((charge - 1) * Constants::PROTON_MASS_U); } //cerr << "charge=" << charge << ", [M+H]=" << precursor_weight << endl; // now delete all peaks that are right of the estimated precursor weight Size peak_counter(0); for (PeakSpectrum::ConstIterator it = new_CID_spec.begin(); it != new_CID_spec.end(); ++it, ++peak_counter) { if (it->getPosition()[0] > precursor_weight) { break; } } if (peak_counter < new_CID_spec.size()) { new_CID_spec.resize(peak_counter); } static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight(); Peak1D p; p.setIntensity(1); p.setPosition(oxonium_mass); new_CID_spec.push_back(p); p.setPosition(precursor_weight); new_CID_spec.push_back(p); // add complement to spectrum /* for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1) { // get m/z of complement double mz_comp = precursor_weight - it1->getPosition()[0] + Constants::PROTON_MASS_U; // search if peaks are available that have similar m/z values Size count(0); bool found(false); for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2, ++count) { if (fabs(mz_comp - it2->getPosition()[0]) < fragment_mass_tolerance) { // add peak intensity to corresponding peak in new_CID_spec new_CID_spec[count].setIntensity(new_CID_spec[count].getIntensity()); } } if (!found) { // infer this peak Peak1D p; p.setIntensity(it1->getIntensity()); p.setPosition(mz_comp); new_CID_spec.push_back(p); } }*/ CompNovoIonScoringCID ion_scoring; Param ion_scoring_param(ion_scoring.getParameters()); ion_scoring_param.setValue("fragment_mass_tolerance", fragment_mass_tolerance_); ion_scoring_param.setValue("precursor_mass_tolerance", precursor_mass_tolerance_); ion_scoring_param.setValue("decomp_weights_precision", decomp_weights_precision_); ion_scoring_param.setValue("double_charged_iso_threshold", (double)param_.getValue("double_charged_iso_threshold")); ion_scoring_param.setValue("max_isotope_to_score", param_.getValue("max_isotope_to_score")); ion_scoring_param.setValue("max_isotope", max_isotope_); ion_scoring.setParameters(ion_scoring_param); Map<double, IonScore> ion_scores; ion_scoring.scoreSpectrum(ion_scores, new_CID_spec, precursor_weight, charge); new_CID_spec.sortByPosition(); /* cerr << "Size of ion_scores " << ion_scores.size() << endl; for (Map<double, IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { cerr << it->first << " " << it->second.score << endl; }*/ #ifdef WRITE_SCORED_SPEC PeakSpectrum filtered_spec(new_CID_spec); filtered_spec.clear(); for (Map<double, CompNovoIonScoringCID::IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { Peak1D p; p.setIntensity(it->second.score); p.setPosition(it->first); filtered_spec.push_back(p); } DTAFile().store("spec_scored.dta", filtered_spec); #endif set<String> sequences; getDecompositionsDAC_(sequences, 0, new_CID_spec.size() - 1, precursor_weight, new_CID_spec, ion_scores); #ifdef SPIKE_IN sequences.insert("AFCVDGEGR"); sequences.insert("APEFAAPWPDFVPR"); sequences.insert("AVKQFEESQGR"); sequences.insert("CCTESLVNR"); sequences.insert("DAFLGSFLYEYSR"); sequences.insert("DAIPENLPPLTADFAEDK"); sequences.insert("DDNKVEDIWSFLSK"); sequences.insert("DDPHACYSTVFDK"); sequences.insert("DEYELLCLDGSR"); sequences.insert("DGAESYKELSVLLPNR"); sequences.insert("DGASCWCVDADGR"); sequences.insert("DLFIPTCLETGEFAR"); sequences.insert("DTHKSEIAHR"); sequences.insert("DVCKNYQEAK"); sequences.insert("EACFAVEGPK"); sequences.insert("ECCHGDLLECADDR"); sequences.insert("EFLGDKFYTVISSLK"); sequences.insert("EFTPVLQADFQK"); sequences.insert("ELFLDSGIFQPMLQGR"); sequences.insert("ETYGDMADCCEK"); sequences.insert("EVGCPSSSVQEMVSCLR"); sequences.insert("EYEATLEECCAK"); sequences.insert("FADLIQSGTFQLHLDSK"); sequences.insert("FFSASCVPGATIEQK"); sequences.insert("FLANVSTVLTSK"); sequences.insert("FLSGSDYAIR"); sequences.insert("FTASCPPSIK"); sequences.insert("GAIEWEGIESGSVEQAVAK"); sequences.insert("GDVAFIQHSTVEENTGGK"); sequences.insert("GEPPSCAEDQSCPSER"); sequences.insert("GEYVPTSLTAR"); sequences.insert("GQEFTITGQKR"); sequences.insert("GTFAALSELHCDK"); sequences.insert("HLVDEPQNLIK"); sequences.insert("HQDCLVTTLQTQPGAVR"); sequences.insert("HTTVNENAPDQK"); sequences.insert("ILDCGSPDTEVR"); sequences.insert("KCPSPCQLQAER"); sequences.insert("KGTEFTVNDLQGK"); sequences.insert("KQTALVELLK"); sequences.insert("KVPQVSTPTLVEVSR"); sequences.insert("LALQFTTNAKR"); sequences.insert("LCVLHEKTPVSEK"); sequences.insert("LFTFHADICTLPDTEK"); sequences.insert("LGEYGFQNALIVR"); sequences.insert("LHVDPENFK"); sequences.insert("LKECCDKPLLEK"); sequences.insert("LKHLVDEPQNLIK"); sequences.insert("LKPDPNTLCDEFK"); sequences.insert("LLGNVLVVVLAR"); sequences.insert("LLVVYPWTQR"); sequences.insert("LRVDPVNFK"); sequences.insert("LTDEELAFPPLSPSR"); sequences.insert("LVNELTEFAK"); sequences.insert("MFLSFPTTK"); sequences.insert("MPCTEDYLSLILNR"); sequences.insert("NAPYSGYSGAFHCLK"); sequences.insert("NECFLSHKDDSPDLPK"); sequences.insert("NEPNKVPACPGSCEEVK"); sequences.insert("NLQMDDFELLCTDGR"); sequences.insert("QAGVQAEPSPK"); sequences.insert("RAPEFAAPWPDFVPR"); sequences.insert("RHPEYAVSVLLR"); sequences.insert("RPCFSALTPDETYVPK"); sequences.insert("RSLLLAPEEGPVSQR"); sequences.insert("SAFPPEPLLCSVQR"); sequences.insert("SAGWNIPIGTLLHR"); sequences.insert("SCWCVDEAGQK"); sequences.insert("SGNPNYPHEFSR"); sequences.insert("SHCIAEVEK"); sequences.insert("SISSGFFECER"); sequences.insert("SKYLASASTMDHAR"); sequences.insert("SLHTLFGDELCK"); sequences.insert("SLLLAPEEGPVSQR"); sequences.insert("SPPQCSPDGAFRPVQCK"); sequences.insert("SREGDPLAVYLK"); sequences.insert("SRQIPQCPTSCER"); sequences.insert("TAGTPVSIPVCDDSSVK"); sequences.insert("TCVADESHAGCEK"); sequences.insert("TQFGCLEGFGR"); sequences.insert("TVMENFVAFVDK"); sequences.insert("TYFPHFDLSHGSAQVK"); sequences.insert("TYMLAFDVNDEK"); sequences.insert("VDEVGGEALGR"); sequences.insert("VDLLIGSSQDDGLINR"); sequences.insert("VEDIWSFLSK"); sequences.insert("VGGHAAEYGAEALER"); sequences.insert("VGTRCCTKPESER"); sequences.insert("VKVDEVGGEALGR"); sequences.insert("VKVDLLIGSSQDDGLINR"); sequences.insert("VLDSFSNGMK"); sequences.insert("VLSAADKGNVK"); sequences.insert("VPQVSTPTLVEVSR"); sequences.insert("VTKCCTESLVNR"); sequences.insert("VVAASDASQDALGCVK"); sequences.insert("VVAGVANALAHR"); sequences.insert("YICDNQDTISSK"); sequences.insert("YLASASTMDHAR"); sequences.insert("YNGVFQECCQAEDK"); #endif SpectrumAlignmentScore spectra_zhang; spectra_zhang.setParameters(zhang_param); vector<PeptideHit> hits; Size missed_cleavages = param_.getValue("missed_cleavages"); for (set<String>::const_iterator it = sequences.begin(); it != sequences.end(); ++it) { Size num_missed = countMissedCleavagesTryptic_(*it); if (missed_cleavages < num_missed) { //cerr << "Two many missed cleavages: " << *it << ", found " << num_missed << ", allowed " << missed_cleavages << endl; continue; } PeakSpectrum CID_sim_spec; getCIDSpectrum_(CID_sim_spec, *it, charge); //normalizer.filterSpectrum(CID_sim_spec); double cid_score = zhang_(CID_sim_spec, CID_spec); PeptideHit hit; hit.setScore(cid_score); hit.setSequence(getModifiedAASequence_(*it)); hit.setCharge((Int)charge); //TODO unify charge interface: int or size? hits.push_back(hit); //cerr << getModifiedAASequence_(*it) << " " << cid_score << " " << endl; } // rescore the top hits id.setHits(hits); id.assignRanks(); hits = id.getHits(); SpectrumAlignmentScore alignment_score; Param align_param(alignment_score.getParameters()); align_param.setValue("tolerance", fragment_mass_tolerance_); align_param.setValue("use_linear_factor", "true"); alignment_score.setParameters(align_param); for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { //cerr << "Pre: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl; } Size number_of_prescoring_hits = param_.getValue("number_of_prescoring_hits"); if (hits.size() > number_of_prescoring_hits) { hits.resize(number_of_prescoring_hits); } for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { PeakSpectrum CID_sim_spec; getCIDSpectrum_(CID_sim_spec, getModifiedStringFromAASequence_(it->getSequence()), charge); normalizer.filterSpectrum(CID_sim_spec); //DTAFile().store("sim_specs/" + it->getSequence().toUnmodifiedString() + "_sim_CID.dta", CID_sim_spec); //double cid_score = spectra_zhang(CID_sim_spec, CID_spec); double cid_score = alignment_score(CID_sim_spec, CID_spec); //cerr << "Final: " << it->getSequence() << " " << cid_score << endl; it->setScore(cid_score); } id.setHits(hits); id.assignRanks(); hits = id.getHits(); for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { //cerr << "Fin: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl; } Size number_of_hits = param_.getValue("number_of_hits"); if (id.getHits().size() > number_of_hits) { hits.resize(number_of_hits); } id.setHits(hits); id.assignRanks(); return; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList in_spec = getStringList_("in"); StringList out = getStringList_("out"); String in_lib = getStringOption_("lib"); String compare_function = getStringOption_("compare_function"); Int precursor_mass_multiplier = getIntOption_("round_precursor_to_integer"); float precursor_mass_tolerance = getDoubleOption_("precursor_mass_tolerance"); //Int min_precursor_charge = getIntOption_("min_precursor_charge"); //Int max_precursor_charge = getIntOption_("max_precursor_charge"); float remove_peaks_below_threshold = getDoubleOption_("filter:remove_peaks_below_threshold"); UInt min_peaks = getIntOption_("filter:min_peaks"); UInt max_peaks = getIntOption_("filter:max_peaks"); Int cut_peaks_below = getIntOption_("filter:cut_peaks_below"); StringList fixed_modifications = getStringList_("fixed_modifications"); StringList variable_modifications = getStringList_("variable_modifications"); Int top_hits = getIntOption_("top_hits"); if (top_hits < -1) { writeLog_("top_hits (should be >= -1 )"); return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // loading input //------------------------------------------------------------- if (out.size() != in_spec.size()) { writeLog_("out (should be as many as input files)"); return ILLEGAL_PARAMETERS; } time_t prog_time = time(NULL); MSPFile spectral_library; RichPeakMap query, library; //spectrum which will be identified MzMLFile spectra; spectra.setLogType(log_type_); time_t start_build_time = time(NULL); //------------------------------------------------------------- //building map for faster search //------------------------------------------------------------- //library containing already identified peptide spectra vector<PeptideIdentification> ids; spectral_library.load(in_lib, ids, library); map<Size, vector<PeakSpectrum> > MSLibrary; { RichPeakMap::iterator s; vector<PeptideIdentification>::iterator i; ModificationsDB* mdb = ModificationsDB::getInstance(); for (s = library.begin(), i = ids.begin(); s < library.end(); ++s, ++i) { double precursor_MZ = (*s).getPrecursors()[0].getMZ(); Size MZ_multi = (Size)precursor_MZ * precursor_mass_multiplier; map<Size, vector<PeakSpectrum> >::iterator found; found = MSLibrary.find(MZ_multi); PeakSpectrum librar; bool variable_modifications_ok = true; bool fixed_modifications_ok = true; const AASequence& aaseq = i->getHits()[0].getSequence(); //variable fixed modifications if (!fixed_modifications.empty()) { for (Size i = 0; i < aaseq.size(); ++i) { const Residue& mod = aaseq.getResidue(i); for (Size s = 0; s < fixed_modifications.size(); ++s) { if (mod.getOneLetterCode() == mdb->getModification(fixed_modifications[s]).getOrigin() && fixed_modifications[s] != mod.getModification()) { fixed_modifications_ok = false; break; } } } } //variable modifications if (aaseq.isModified() && (!variable_modifications.empty())) { for (Size i = 0; i < aaseq.size(); ++i) { if (aaseq.isModified(i)) { const Residue& mod = aaseq.getResidue(i); for (Size s = 0; s < variable_modifications.size(); ++s) { if (mod.getOneLetterCode() == mdb->getModification(variable_modifications[s]).getOrigin() && variable_modifications[s] != mod.getModification()) { variable_modifications_ok = false; break; } } } } } if (variable_modifications_ok && fixed_modifications_ok) { PeptideIdentification& translocate_pid = *i; librar.getPeptideIdentifications().push_back(translocate_pid); librar.setPrecursors(s->getPrecursors()); //library entry transformation for (UInt l = 0; l < s->size(); ++l) { Peak1D peak; if ((*s)[l].getIntensity() > remove_peaks_below_threshold) { const String& info = (*s)[l].getMetaValue("MSPPeakInfo"); if (info[0] == '?') { peak.setIntensity(sqrt(0.2 * (*s)[l].getIntensity())); } else { peak.setIntensity(sqrt((*s)[l].getIntensity())); } peak.setMZ((*s)[l].getMZ()); peak.setPosition((*s)[l].getPosition()); librar.push_back(peak); } } if (found != MSLibrary.end()) { found->second.push_back(librar); } else { vector<PeakSpectrum> tmp; tmp.push_back(librar); MSLibrary.insert(make_pair(MZ_multi, tmp)); } } } } time_t end_build_time = time(NULL); cout << "Time needed for preprocessing data: " << (end_build_time - start_build_time) << "\n"; //compare function PeakSpectrumCompareFunctor* comparor = Factory<PeakSpectrumCompareFunctor>::create(compare_function); //------------------------------------------------------------- // calculations //------------------------------------------------------------- double score; StringList::iterator in, out_file; for (in = in_spec.begin(), out_file = out.begin(); in < in_spec.end(); ++in, ++out_file) { time_t start_time = time(NULL); spectra.load(*in, query); //Will hold valuable hits vector<PeptideIdentification> peptide_ids; vector<ProteinIdentification> protein_ids; // Write parameters to ProteinIdentifcation ProteinIdentification prot_id; //Parameters of identificaion prot_id.setIdentifier("test"); prot_id.setSearchEngineVersion("SpecLibSearcher"); prot_id.setDateTime(DateTime::now()); prot_id.setScoreType(compare_function); ProteinIdentification::SearchParameters searchparam; searchparam.precursor_tolerance = precursor_mass_tolerance; prot_id.setSearchParameters(searchparam); /***********SEARCH**********/ for (UInt j = 0; j < query.size(); ++j) { //Set identifier for each identifications PeptideIdentification pid; pid.setIdentifier("test"); pid.setScoreType(compare_function); ProteinHit pr_hit; pr_hit.setAccession(j); prot_id.insertHit(pr_hit); //RichPeak1D to Peak1D transformation for the compare function query PeakSpectrum quer; bool peak_ok = true; query[j].sortByIntensity(true); double min_high_intensity = 0; if (query[j].empty() || query[j].getMSLevel() != 2) { continue; } if (query[j].getPrecursors().empty()) { writeLog_("Warning MS2 spectrum without precursor information"); continue; } min_high_intensity = (1 / cut_peaks_below) * query[j][0].getIntensity(); query[j].sortByPosition(); for (UInt k = 0; k < query[j].size() && k < max_peaks; ++k) { if (query[j][k].getIntensity() > remove_peaks_below_threshold && query[j][k].getIntensity() >= min_high_intensity) { Peak1D peak; peak.setIntensity(sqrt(query[j][k].getIntensity())); peak.setMZ(query[j][k].getMZ()); peak.setPosition(query[j][k].getPosition()); quer.push_back(peak); } } if (quer.size() >= min_peaks) { peak_ok = true; } else { peak_ok = false; } double query_MZ = query[j].getPrecursors()[0].getMZ(); if (peak_ok) { bool charge_one = false; Int percent = (Int) Math::round((query[j].size() / 100.0) * 3.0); Int margin = (Int) Math::round((query[j].size() / 100.0) * 1.0); for (vector<RichPeak1D>::iterator peak = query[j].end() - 1; percent >= 0; --peak, --percent) { if (peak->getMZ() < query_MZ) { break; } } if (percent > margin) { charge_one = true; } float min_MZ = (query_MZ - precursor_mass_tolerance) * precursor_mass_multiplier; float max_MZ = (query_MZ + precursor_mass_tolerance) * precursor_mass_multiplier; for (Size mz = (Size)min_MZ; mz <= ((Size)max_MZ) + 1; ++mz) { map<Size, vector<PeakSpectrum> >::iterator found; found = MSLibrary.find(mz); if (found != MSLibrary.end()) { vector<PeakSpectrum>& library = found->second; for (Size i = 0; i < library.size(); ++i) { float this_MZ = library[i].getPrecursors()[0].getMZ() * precursor_mass_multiplier; if (this_MZ >= min_MZ && max_MZ >= this_MZ && ((charge_one == true && library[i].getPeptideIdentifications()[0].getHits()[0].getCharge() == 1) || charge_one == false)) { PeptideHit hit = library[i].getPeptideIdentifications()[0].getHits()[0]; PeakSpectrum& librar = library[i]; //Special treatment for SpectraST score as it computes a score based on the whole library if (compare_function == "SpectraSTSimilarityScore") { SpectraSTSimilarityScore* sp = static_cast<SpectraSTSimilarityScore*>(comparor); BinnedSpectrum quer_bin = sp->transform(quer); BinnedSpectrum librar_bin = sp->transform(librar); score = (*sp)(quer, librar); //(*sp)(quer_bin,librar_bin); double dot_bias = sp->dot_bias(quer_bin, librar_bin, score); hit.setMetaValue("DOTBIAS", dot_bias); } else { score = (*comparor)(quer, librar); } DataValue RT(library[i].getRT()); DataValue MZ(library[i].getPrecursors()[0].getMZ()); hit.setMetaValue("RT", RT); hit.setMetaValue("MZ", MZ); hit.setScore(score); PeptideEvidence pe; pe.setProteinAccession(pr_hit.getAccession()); hit.addPeptideEvidence(pe); pid.insertHit(hit); } } } } } pid.setHigherScoreBetter(true); pid.sort(); if (compare_function == "SpectraSTSimilarityScore") { if (!pid.empty() && !pid.getHits().empty()) { vector<PeptideHit> final_hits; final_hits.resize(pid.getHits().size()); SpectraSTSimilarityScore* sp = static_cast<SpectraSTSimilarityScore*>(comparor); Size runner_up = 1; for (; runner_up < pid.getHits().size(); ++runner_up) { if (pid.getHits()[0].getSequence().toUnmodifiedString() != pid.getHits()[runner_up].getSequence().toUnmodifiedString() || runner_up > 5) { break; } } double delta_D = sp->delta_D(pid.getHits()[0].getScore(), pid.getHits()[runner_up].getScore()); for (Size s = 0; s < pid.getHits().size(); ++s) { final_hits[s] = pid.getHits()[s]; final_hits[s].setMetaValue("delta D", delta_D); final_hits[s].setMetaValue("dot product", pid.getHits()[s].getScore()); final_hits[s].setScore(sp->compute_F(pid.getHits()[s].getScore(), delta_D, pid.getHits()[s].getMetaValue("DOTBIAS"))); //final_hits[s].removeMetaValue("DOTBIAS"); } pid.setHits(final_hits); pid.sort(); pid.setMZ(query[j].getPrecursors()[0].getMZ()); pid.setRT(query_MZ); } } if (top_hits != -1 && (UInt)top_hits < pid.getHits().size()) { vector<PeptideHit> hits; hits.resize(top_hits); for (Size i = 0; i < (UInt)top_hits; ++i) { hits[i] = pid.getHits()[i]; } pid.setHits(hits); } peptide_ids.push_back(pid); } protein_ids.push_back(prot_id); //------------------------------------------------------------- // writing output //------------------------------------------------------------- IdXMLFile id_xml_file; id_xml_file.store(*out_file, protein_ids, peptide_ids); time_t end_time = time(NULL); cout << "Search time: " << difftime(end_time, start_time) << " seconds for " << *in << "\n"; } time_t end_time = time(NULL); cout << "Total time: " << difftime(end_time, prog_time) << " secconds\n"; return EXECUTION_OK; }
START_SECTION((Normalizer(const Normalizer& source))) Normalizer copy(*e_ptr); TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) TEST_EQUAL(copy.getName(), e_ptr->getName()) END_SECTION START_SECTION((Normalizer& operator = (const Normalizer& source))) Normalizer copy; copy = *e_ptr; TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) TEST_EQUAL(copy.getName(), e_ptr->getName()) END_SECTION START_SECTION((template<typename SpectrumType> void filterSpectrum(SpectrumType& spectrum))) DTAFile dta_file; PeakSpectrum spec; dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec); spec.sortByIntensity(); TEST_EQUAL(spec.rbegin()->getIntensity(), 46) e_ptr->filterSpectrum(spec); spec.sortByIntensity(); TEST_EQUAL(spec.rbegin()->getIntensity(), 1) Param p(e_ptr->getParameters()); p.setValue("method", "to_TIC"); e_ptr->setParameters(p);
void CompNovoIdentificationBase::getCIDSpectrum_(PeakSpectrum & spec, const String & sequence, Size charge, DoubleReal prefix, DoubleReal suffix) { static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight(); static DoubleReal nh3_mass = EmpiricalFormula("NH3").getMonoWeight(); static DoubleReal co_mass = EmpiricalFormula("CO").getMonoWeight(); Peak1D p; DoubleReal b_pos(0 + prefix); DoubleReal y_pos(h2o_mass + suffix); bool b_H2O_loss(false), b_NH3_loss(false), y_NH3_loss(false); for (Size i = 0; i != sequence.size() - 1; ++i) { char aa(sequence[i]); b_pos += aa_to_weight_[aa]; char aa2(sequence[sequence.size() - i - 1]); y_pos += aa_to_weight_[aa2]; for (Size z = 1; z <= charge && z < 3; ++z) { // b-ions if (b_pos >= min_mz_ && b_pos <= max_mz_) { for (Size j = 0; j != max_isotope_; ++j) { if (z == 1 /*|| b_pos > MIN_DOUBLE_MZ*/) { p.setPosition((b_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j + Constants::NEUTRON_MASS_U) / (DoubleReal)z); p.setIntensity(isotope_distributions_[(Size)b_pos][j] * 0.8 / (z * z)); spec.push_back(p); } } } // b-ion losses if (b_pos - h2o_mass > min_mz_ && b_pos - h2o_mass < max_mz_) { if (b_H2O_loss || aa == 'S' || aa == 'T' || aa == 'E' || aa == 'D') { b_H2O_loss = true; p.setPosition((b_pos + z * Constants::PROTON_MASS_U - h2o_mass) / z); p.setIntensity(0.02 / (DoubleReal)(z * z)); if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/) { spec.push_back(p); } } if (b_NH3_loss || aa == 'Q' || aa == 'N' || aa == 'R' || aa == 'K') { b_NH3_loss = true; p.setPosition((b_pos + z * Constants::PROTON_MASS_U - nh3_mass) / z); p.setIntensity(0.02 / (DoubleReal)(z * z)); if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/) { spec.push_back(p); } } } // a-ions only for charge 1 if (z == 1) { if (b_pos - co_mass > min_mz_ && b_pos - co_mass < max_mz_) { // a-ions p.setPosition((b_pos + z * Constants::PROTON_MASS_U - co_mass) / (DoubleReal)z); p.setIntensity(0.1f); spec.push_back(p); } } if (y_pos > min_mz_ && y_pos < max_mz_) { // y-ions for (Size j = 0; j != max_isotope_; ++j) { if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/) { p.setPosition((y_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j * Constants::NEUTRON_MASS_U) / (DoubleReal)z); p.setIntensity(isotope_distributions_[(Size)y_pos][j] / (DoubleReal) (z * z)); spec.push_back(p); } } // H2O loss p.setPosition((y_pos + z * Constants::PROTON_MASS_U - h2o_mass) / (DoubleReal)z); p.setIntensity(0.1 / (DoubleReal)(z * z)); if (aa2 == 'Q') // pyroglutamic acid formation { p.setIntensity(0.5f); } if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/) { spec.push_back(p); } // NH3 loss if (y_NH3_loss || aa2 == 'Q' || aa2 == 'N' || aa2 == 'R' || aa2 == 'K') { y_NH3_loss = true; p.setPosition((y_pos + z * Constants::PROTON_MASS_U - nh3_mass) / (DoubleReal)z); p.setIntensity(0.1 / (DoubleReal)(z * z)); if (z == 1 /*|| y_pos > MIN_DOUBLE_MZ*/) { spec.push_back(p); } } } } } // if Q1 abundant loss of water -> pyroglutamic acid formation if (sequence[0] == 'Q' && prefix == 0 && suffix == 0) { /* for (PeakSpectrum::Iterator it = spec.begin(); it != spec.end(); ++it) { it->setIntensity(it->getIntensity() * 0.5); }*/ /* for (Size j = 0; j != max_isotope; ++j) { p.setPosition((precursor_weight + charge - 1 + j)/(DoubleReal)charge); p.setIntensity(isotope_distributions_[(Int)p.getPosition()[0]][j] * 0.1); spec.push_back(p); } */ } spec.sortByPosition(); return; }
START_SECTION((ThresholdMower(const ThresholdMower& source))) ThresholdMower copy(*e_ptr); TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) TEST_EQUAL(copy.getName(), e_ptr->getName()) END_SECTION START_SECTION((ThresholdMower& operator=(const ThresholdMower& source))) ThresholdMower copy; copy = *e_ptr; TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) TEST_EQUAL(copy.getName(), e_ptr->getName()); END_SECTION START_SECTION((template<typename SpectrumType> void filterSpectrum(SpectrumType& spectrum))) DTAFile dta_file; PeakSpectrum spec; dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec); TEST_EQUAL(spec.size(), 121) Param p(e_ptr->getParameters()); p.setValue("threshold", 1.0); e_ptr->setParameters(p); e_ptr->filterSpectrum(spec); TEST_EQUAL(spec.size(), 121) p.setValue("threshold", 10.0); e_ptr->setParameters(p); e_ptr->filterSpectrum(spec);
double PeakAlignment::operator()(const PeakSpectrum& spec1, const PeakSpectrum& spec2) const { PeakSpectrum s1(spec1), s2(spec2); // shortcut similarity calculation by comparing PrecursorPeaks (PrecursorPeaks more than delta away from each other are supposed to be from another peptide) DoubleReal pre_mz1 = 0.0; if (!spec1.getPrecursors().empty()) pre_mz1 = spec1.getPrecursors()[0].getMZ(); DoubleReal pre_mz2 = 0.0; if (!spec1.getPrecursors().empty()) pre_mz2 = spec2.getPrecursors()[0].getMZ(); if (fabs(pre_mz1 - pre_mz2) > (double)param_.getValue("precursor_mass_tolerance")) { return 0; } // heuristic shortcut const double epsilon = (double)param_.getValue("epsilon"); const UInt heuristic_level = (UInt)param_.getValue("heuristic_level"); bool heuristic_filters(true); if (heuristic_level) { s1.sortByIntensity(true); s2.sortByIntensity(true); //heuristic filters (and shortcuts) if spec1 and spec2 have NOT at least one peak in the sets of |heuristic_level|-many highest peaks in common for (PeakSpectrum::ConstIterator it_s1 = s1.begin(); Size(it_s1 - s1.begin()) < heuristic_level && it_s1 != s1.end(); ++it_s1) { for (PeakSpectrum::ConstIterator it_s2 = s2.begin(); Size(it_s2 - s2.begin()) < heuristic_level && it_s2 != s2.end(); ++it_s2) { // determine if it is a match, i.e. mutual peak at certain m/z with epsilon tolerance if (fabs((*it_s2).getMZ() - (*it_s1).getMZ()) < epsilon) { heuristic_filters = false; break; } } } } if (heuristic_filters && heuristic_level) { return 0; } //TODO gapcost dependence on distance ? const double gap = (double)param_.getValue("epsilon"); //initialize alignment matrix with 0 in (0,0) and a multiple of gapcost in the first row/col matrix(row,col,values) Matrix<double> matrix(spec1.size() + 1, spec2.size() + 1, 0); for (Size i = 1; i < matrix.rows(); i++) { matrix.setValue(i, 0, -gap * i); } for (Size i = 1; i < matrix.cols(); i++) { matrix.setValue(0, i, -gap * i); } //get sigma - the standard deviation (sqrt of variance) double mid(0); for (Size i = 0; i < spec1.size(); ++i) { for (Size j = 0; j < spec2.size(); ++j) { double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ()); mid += fabs(pos1 - pos2); } } // average peak distance mid /= (spec1.size() * spec2.size()); /* to manually retrace cout << "average peak distance " << mid << endl; */ double var(0); for (Size i = 0; i < spec1.size(); ++i) { for (Size j = 0; j < spec2.size(); ++j) { double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ()); var += (fabs(pos1 - pos2) - mid) * (fabs(pos1 - pos2) - mid); } } // peak distance variance var /= (spec1.size() * spec2.size()); /* to manually retrace cout << "peak distance variance " << var << endl; */ //only in case of only two equal peaks in the spectra sigma is 0 const double sigma((var == 0) ? numeric_limits<double>::min() : sqrt(var)); /* to manually retrace cout << "peak standard deviation " << sigma << endl; */ //fill alignment matrix for (Size i = 1; i < spec1.size() + 1; ++i) { for (Size j = 1; j < spec2.size() + 1; ++j) { double pos1(spec1[i - 1].getMZ()), pos2(spec2[j - 1].getMZ()); //only if peaks are in reasonable proximity alignment is considered else only gaps if (fabs(pos1 - pos2) <= epsilon) { // actual cell = max(upper left cell+score, left cell-gap, upper cell-gap) double from_left(matrix.getValue(i, j - 1) - gap); double from_above(matrix.getValue(i - 1, j) - gap); double int1(spec1[i - 1].getIntensity()), int2(spec2[j - 1].getIntensity()); double from_diagonal(matrix.getValue(i - 1, j - 1) + peakPairScore_(pos1, int1, pos2, int2, sigma)); matrix.setValue(i, j, max(from_left, max(from_above, from_diagonal))); } else { // actual cell = max(left cell-gap, upper cell-gap) double from_left(matrix.getValue(i, j - 1) - gap); double from_above(matrix.getValue(i - 1, j) - gap); matrix.setValue(i, j, max(from_left, from_above)); } } } /* to manually retrace cout << endl << matrix << endl; */ //get best overall score and return double best_score(numeric_limits<double>::min()); for (Size i = 0; i < matrix.cols(); i++) { best_score = max(best_score, matrix.getValue(matrix.rows() - 1, i)); } for (Size i = 0; i < matrix.rows(); i++) { best_score = max(best_score, matrix.getValue(i, matrix.cols() - 1)); } //calculate selfalignment-scores for both input spectra double score_spec1(0), score_spec2(0); for (Size i = 0; i < spec1.size(); ++i) { double int_i(spec1[i].getIntensity()); double pos_i(spec1[i].getMZ()); score_spec1 += peakPairScore_(pos_i, int_i, pos_i, int_i, sigma); } for (Size i = 0; i < spec2.size(); ++i) { double int_i(spec2[i].getIntensity()); double pos_i(spec2[i].getMZ()); score_spec2 += peakPairScore_(pos_i, int_i, pos_i, int_i, sigma); } /* to manually retrace cout << "score_spec1: " << score_spec1 << "score_spec2: " << score_spec2 << endl; */ //normalize score to interval [0,1] with geometric mean double best_score_normalized(best_score / sqrt(score_spec1 * score_spec2)); /* cout << "score_spec1: " << score_spec1 << " score_spec2: " << score_spec2 << " best_score: " << best_score << endl; //normalize score to interval [0,1] with arithmeic mean double best_score_normalized( (best_score*2) / (score_spec1 + score_spec2) ); */ return best_score_normalized; }
void CompNovoIonScoring::scoreSpectra(Map<double, IonScore> & ion_scores, PeakSpectrum & CID_spec, PeakSpectrum & ETD_spec, double precursor_weight, Size charge) { // adds single charged variants of putative single charged ions //addSingleChargedIons_(ion_scores, CID_spec); for (PeakSpectrum::ConstIterator it = CID_spec.begin(); it != CID_spec.end(); ++it) { double it_pos(it->getPosition()[0]); IonScore ion_score; ion_scores[it_pos] = ion_score; } for (PeakSpectrum::ConstIterator it = CID_spec.begin(); it != CID_spec.end(); ++it) { ion_scores[it->getPosition()[0]].s_isotope_pattern_1 = scoreIsotopes_(CID_spec, it, ion_scores, 1); if (it->getPosition()[0] < precursor_weight / 2.0) { ion_scores[it->getPosition()[0]].s_isotope_pattern_2 = scoreIsotopes_(CID_spec, it, ion_scores, 2); } else { ion_scores[it->getPosition()[0]].s_isotope_pattern_2 = -1; } } // find possible supporting ions from ETD spec to CID spec scoreETDFeatures_(charge, precursor_weight, ion_scores, CID_spec, ETD_spec); // combine the features and give b-ion scores scoreWitnessSet_(charge, precursor_weight, ion_scores, CID_spec); for (Map<double, IonScore>::iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { it->second.score = it->second.s_witness; } MassDecompositionAlgorithm decomp_algo; // check whether a PRMNode_ can be decomposed into amino acids // rescore the peaks that cannot be possible y-ion candidates double max_decomp_weight((double)param_.getValue("max_decomp_weight")); for (Map<double, IonScore>::iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { if (it->first > 19.0 && (it->first - 19.0) < max_decomp_weight) { vector<MassDecomposition> decomps; decomp_algo.getDecompositions(decomps, it->first - 19.0); #ifdef ION_SCORING_DEBUG cerr << "Decomps: " << it->first << " " << it->first - 19.0 << " " << decomps.size() << " " << it->second.score << endl; #endif if (decomps.empty()) { it->second.score = 0; } } if (it->first < precursor_weight && precursor_weight - it->first < max_decomp_weight) { vector<MassDecomposition> decomps; decomp_algo.getDecompositions(decomps, precursor_weight - it->first); #ifdef ION_SCORING_DEBUG cerr << "Decomps: " << it->first << " " << precursor_weight - it->first << " " << decomps.size() << " " << it->second.score << endl; #endif if (decomps.empty()) { it->second.score = 0; } } } ion_scores[CID_spec.begin()->getPosition()[0]].score = 1; ion_scores[(CID_spec.end() - 1)->getPosition()[0]].score = 1; }
vector<pair<Size, Size> > PeakAlignment::getAlignmentTraceback(const PeakSpectrum& spec1, const PeakSpectrum& spec2) const { const double epsilon = (double)param_.getValue("epsilon"); //TODO gapcost dependence on distance ? const double gap = (double)param_.getValue("epsilon"); //initialize alignment matrix with 0 in (0,0) and a multiple of gapcost in the first row/col matrix(row,col,values) Matrix<double> matrix(spec1.size() + 1, spec2.size() + 1, 0); for (Size i = 1; i < matrix.rows(); i++) { matrix.setValue(i, 0, -gap * i); } for (Size i = 1; i < matrix.cols(); i++) { matrix.setValue(0, i, -gap * i); } // gives the direction of the matrix cell that originated the respective cell // e.g. matrix(i+1,j+1) could have originated from matrix(i,j), matrix(i+1,j) or matrix(i,j+1) // so traceback(i,j) represents matrix(i+1,j+1) and contains a "1"-from diagonal, a "0"-from left or a "2"-from above Matrix<Size> traceback(spec1.size(), spec2.size()); //get sigma - the standard deviation (sqrt of variance) double mid(0); for (Size i = 0; i < spec1.size(); ++i) { for (Size j = 0; j < spec2.size(); ++j) { double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ()); mid += fabs(pos1 - pos2); } } mid /= (spec1.size() * spec2.size()); /* to manually retrace cout << mid << endl; */ double var(0); for (Size i = 0; i < spec1.size(); ++i) { for (Size j = 0; j < spec2.size(); ++j) { double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ()); var += (fabs(pos1 - pos2) - mid) * (fabs(pos1 - pos2) - mid); } } var /= (spec1.size() * spec2.size()); /* to manually retrace cout << var << endl; */ const double sigma(sqrt(var)); /* to manually retrace cout << sigma << endl; */ //fill alignment matrix for (Size i = 1; i < spec1.size() + 1; ++i) { for (Size j = 1; j < spec2.size() + 1; ++j) { double pos1(spec1[i - 1].getMZ()), pos2(spec2[j - 1].getMZ()); //only if peaks are in reasonable proximity alignment is considered else only gaps if (fabs(pos1 - pos2) <= epsilon) { // actual cell = max(upper left cell+score, left cell-gap, upper cell-gap) double from_left(matrix.getValue(i, j - 1) - gap); double from_above(matrix.getValue(i - 1, j) - gap); double int1(spec1[i - 1].getIntensity()), int2(spec2[j - 1].getIntensity()); double from_diagonal(matrix.getValue(i - 1, j - 1) + peakPairScore_(pos1, int1, pos2, int2, sigma)); matrix.setValue(i, j, max(from_left, max(from_above, from_diagonal))); // TODO the cases where all or two values are equal if (from_diagonal > from_left && from_diagonal > from_above) { traceback.setValue(i - 1, j - 1, 1); } else { if (from_left > from_diagonal && from_left > from_above) { traceback.setValue(i - 1, j - 1, 0); } else { if (from_above > from_diagonal && from_above > from_left) { traceback.setValue(i - 1, j - 1, 2); } } } } else { // actual cell = max(left cell-gap, upper cell-gap) double from_left(matrix.getValue(i, j - 1) - gap); double from_above(matrix.getValue(i - 1, j) - gap); matrix.setValue(i, j, max(from_left, from_above)); if (from_left > from_above) { traceback.setValue(i - 1, j - 1, 0); } else //from_left <= from_above { traceback.setValue(i - 1, j - 1, 2); } } } } //return track from best alloverscore to 0,0 vector<pair<Size, Size> > ret_val; //get matrix coordinates from best alloverscore Size row_index(0), col_index(0); double best_score(numeric_limits<double>::min()); for (Size i = 0; i < matrix.cols(); i++) { if (best_score < matrix.getValue(matrix.rows() - 1, i)) { best_score = matrix.getValue(matrix.rows() - 1, i); row_index = matrix.rows() - 1; col_index = i; } } for (Size i = 0; i < matrix.rows(); i++) { if (best_score < matrix.getValue(i, matrix.cols() - 1)) { best_score = matrix.getValue(i, matrix.cols() - 1); row_index = i; col_index = matrix.cols() - 1; } } // TODO check the invariant! while (row_index > 0 && col_index > 0) { //from diagonal - peaks aligned if (traceback.getValue(row_index - 1, col_index - 1) == 1) { //register aligned peaks only ret_val.insert(ret_val.begin(), pair<Size, Size>(row_index - 1, col_index - 1)); row_index = row_index - 1; col_index = col_index - 1; } // gap alignment else if (traceback.getValue(row_index - 1, col_index - 1) == 0) { col_index = col_index - 1; } else { row_index = row_index - 1; } } /* to manually retrace cout << endl << matrix << endl << traceback << endl; */ return ret_val; }
PeptideHit AScore::compute(const PeptideHit & hit, PeakSpectrum & real_spectrum, double fragment_mass_tolerance, bool fragment_mass_unit_ppm, Size max_peptide_len, Size max_num_perm) { PeptideHit phospho = hit; //reset phospho phospho.setScore(-1); if (real_spectrum.empty()) { return phospho; } String sequence_str = phospho.getSequence().toString(); Size number_of_phosphorylation_events = numberOfPhosphoEvents_(sequence_str); AASequence seq_without_phospho = removePhosphositesFromSequence_(sequence_str); if (seq_without_phospho.toUnmodifiedString().size() > max_peptide_len) { LOG_DEBUG << "\tcalculation aborted: peptide too long: " << seq_without_phospho.toString() << std::endl; return phospho; } // determine all phospho sites vector<Size> sites(getSites_(seq_without_phospho)); Size number_of_STY = sites.size(); if (number_of_phosphorylation_events == 0 || number_of_STY == 0 || number_of_STY == number_of_phosphorylation_events) { return phospho; } vector<vector<Size> > permutations(computePermutations_(sites, (Int)number_of_phosphorylation_events)); LOG_DEBUG << "\tnumber of permutations: " << permutations.size() << std::endl; // TODO: using a heuristic to calculate the best phospho sites if the number of permutations are exceeding the maximum. // A heuristic could be to calculate the best site for the first phosphorylation and based on this the best site for the second // phosphorylation and so on until every site is determined if (permutations.size() > max_num_perm) { LOG_DEBUG << "\tcalculation aborted: number of permutations exceeded" << std::endl; return phospho; } vector<PeakSpectrum> th_spectra(createTheoreticalSpectra_(permutations, seq_without_phospho)); // prepare real spectrum windows if (!real_spectrum.isSorted()) { real_spectrum.sortByPosition(); } vector<PeakSpectrum> windows_top10(peakPickingPerWindowsInSpectrum_(real_spectrum)); // calculate peptide score for each possible phospho site permutation vector<vector<double> > peptide_site_scores(calculatePermutationPeptideScores_(th_spectra, windows_top10, fragment_mass_tolerance, fragment_mass_unit_ppm)); // rank peptide permutations ascending multimap<double, Size> ranking(rankWeightedPermutationPeptideScores_(peptide_site_scores)); multimap<double, Size>::reverse_iterator rev = ranking.rbegin(); String seq1 = th_spectra[rev->second].getName(); phospho.setSequence(AASequence::fromString(seq1)); phospho.setMetaValue("search_engine_sequence", hit.getSequence().toString()); double peptide1_score = rev->first; phospho.setMetaValue("AScore_pep_score", peptide1_score); // initialize score with highest peptide score (aka highest weighted score) ++rev; String seq2 = th_spectra[rev->second].getName(); double peptide2_score = rev->first; vector<ProbablePhosphoSites> phospho_sites; determineHighestScoringPermutations_(peptide_site_scores, phospho_sites, permutations, ranking); Int rank = 1; double best_Ascore = std::numeric_limits<double>::max(); // the lower the better for (vector<ProbablePhosphoSites>::iterator s_it = phospho_sites.begin(); s_it != phospho_sites.end(); ++s_it) { double Ascore = 0; if (peptide1_score == peptide2_score) // set Ascore = 0 for each phosphorylation site { LOG_DEBUG << "\tscore of best (" << seq1 << ") and second best peptide (" << seq2 << ") are equal (" << peptide1_score << ")" << std::endl; } else { vector<PeakSpectrum> site_determining_ions; computeSiteDeterminingIons_(th_spectra, *s_it, site_determining_ions, fragment_mass_tolerance, fragment_mass_unit_ppm); Size N = site_determining_ions[0].size(); // all possibilities have the same number so take the first one double p = static_cast<double>(s_it->peak_depth) / 100.0; Size n_first = 0; // number of matching peaks for first peptide for (Size window_idx = 0; window_idx != windows_top10.size(); ++window_idx) // for each 100 m/z window { n_first += numberOfMatchedIons_(site_determining_ions[0], windows_top10[window_idx], s_it->peak_depth, fragment_mass_tolerance, fragment_mass_unit_ppm); } double P_first = computeCumulativeScore_(N, n_first, p); Size n_second = 0; // number of matching peaks for second peptide for (Size window_idx = 0; window_idx < windows_top10.size(); ++window_idx) //each 100 m/z window { n_second += numberOfMatchedIons_(site_determining_ions[1], windows_top10[window_idx], s_it->peak_depth, fragment_mass_tolerance, fragment_mass_unit_ppm); } Size N2 = site_determining_ions[1].size(); // all possibilities have the same number so take the first one double P_second = computeCumulativeScore_(N2, n_second, p); //abs is used to avoid -0 score values double score_first = abs(-10 * log10(P_first)); double score_second = abs(-10 * log10(P_second)); LOG_DEBUG << "\tfirst - N: " << N << ",p: " << p << ",n: " << n_first << ", score: " << score_first << std::endl; LOG_DEBUG << "\tsecond - N: " << N2 << ",p: " << p << ",n: " << n_second << ", score: " << score_second << std::endl; Ascore = score_first - score_second; LOG_DEBUG << "\tAscore_" << rank << ": " << Ascore << std::endl; } if (Ascore < best_Ascore) { best_Ascore = Ascore; } phospho.setMetaValue("AScore_" + String(rank), Ascore); ++rank; } phospho.setScore(best_Ascore); return phospho; }
double ZhangSimilarityScore::operator()(const PeakSpectrum & s1, const PeakSpectrum & s2) const { const double tolerance = (double)param_.getValue("tolerance"); bool use_linear_factor = param_.getValue("use_linear_factor").toBool(); bool use_gaussian_factor = param_.getValue("use_gaussian_factor").toBool(); double score(0), sum(0), sum1(0), sum2(0) /*, squared_sum1(0), squared_sum2(0)*/; // TODO remove parameter if (param_.getValue("is_relative_tolerance").toBool() ) { throw Exception::NotImplemented(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION); } for (PeakSpectrum::ConstIterator it1 = s1.begin(); it1 != s1.end(); ++it1) { sum1 += it1->getIntensity(); /* for (PeakSpectrum::ConstIterator it2 = s1.begin(); it2 != s1.end(); ++it2) { if (abs(it1->getPosition()[0] - it2->getPosition()[0]) <= 2 * tolerance) { squared_sum1 += it1->getIntensity() * it2->getIntensity(); } }*/ } /* UInt i_left(0); for (Size i = 0; i != s1.size(); ++i) { sum1 += s1[i].getIntensity(); for (Size j = i_left; j != s1.size(); ++j) { double pos1(s1[i].getPosition()[0]), pos2(s1[j].getPosition()[0]); if (abs(pos1 - pos2) <= 2 * tolerance) { squared_sum1 += s1[i].getIntensity() * s1[j].getIntensity(); } else { if (pos2 > pos1) { break; } else { i_left = i; } } } }*/ /* i_left = 0; for (Size i = 0; i != s2.size(); ++i) { sum2 += s2[i].getIntensity(); for (Size j = i_left; j != s2.size(); ++j) { double pos1(s2[i].getPosition()[0]), pos2(s2[j].getPosition()[0]); if (abs(pos1 - pos2) <= 2 * tolerance) { squared_sum1 += s2[i].getIntensity() * s2[j].getIntensity(); } else { if (pos2 > pos1) { break; } else { i_left = i; } } } }*/ for (PeakSpectrum::ConstIterator it1 = s2.begin(); it1 != s2.end(); ++it1) { sum2 += it1->getIntensity(); /* for (PeakSpectrum::ConstIterator it2 = s2.begin(); it2 != s2.end(); ++it2) { if (abs(it1->getPosition()[0] - it2->getPosition()[0]) <= 2 * tolerance) { squared_sum2 += it1->getIntensity() * it2->getIntensity(); } } */ } Size j_left(0); for (Size i = 0; i != s1.size(); ++i) { for (Size j = j_left; j != s2.size(); ++j) { double pos1(s1[i].getMZ()), pos2(s2[j].getMZ()); if (fabs(pos1 - pos2) < tolerance) { //double factor((tolerance - fabs(pos1 - pos2)) / tolerance); double factor = 1.0; if (use_linear_factor || use_gaussian_factor) { factor = getFactor_(tolerance, fabs(pos1 - pos2), use_gaussian_factor); } sum += sqrt(s1[i].getIntensity() * s2[j].getIntensity() * factor); } else { if (pos2 > pos1) { break; } else { j_left = j; } } } } /* for (PeakSpectrum::ConstIterator it1 = s1.begin(); it1 != s1.end(); ++it1) { for (PeakSpectrum::ConstIterator it2 = s2.begin(); it2 != s2.end(); ++it2) { if (abs(it1->getPosition()[0] - it2->getPosition()[0]) <= 2 * tolerance) { sum += sqrt(it1->getIntensity() * it2->getIntensity()); } } }*/ score = sum / (sqrt(sum1 * sum2)); return score; }
START_SECTION((BernNorm(const BernNorm& source))) BernNorm copy(*e_ptr); TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) TEST_EQUAL(copy.getName(), e_ptr->getName()) END_SECTION START_SECTION((BernNorm& operator=(const BernNorm& source))) BernNorm copy; copy = *e_ptr; TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) TEST_EQUAL(copy.getName(), e_ptr->getName()) END_SECTION START_SECTION((template<typename SpectrumType> void filterSpectrum(SpectrumType& spectrum))) DTAFile dta_file; PeakSpectrum spec; dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec); TEST_EQUAL(spec.size(), 121) e_ptr->filterSpectrum(spec); TEST_EQUAL(spec.size(), 121) Param p(e_ptr->getParameters()); p.setValue("C2", 2000.0); e_ptr->setParameters(p); e_ptr->filterSpectrum(spec); TEST_EQUAL(spec.size(), 28)
e_ptr = new SqrtMower(); START_SECTION((SqrtMower(const SqrtMower& source))) SqrtMower copy(*e_ptr); TEST_EQUAL(*e_ptr == copy, true) END_SECTION START_SECTION((SqrtMower& operator=(const SqrtMower& source))) SqrtMower copy; copy = *e_ptr; TEST_EQUAL(*e_ptr == copy, true) END_SECTION START_SECTION((template<typename SpectrumType> void filterSpectrum(SpectrumType& spectrum))) DTAFile dta_file; PeakSpectrum spec; dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec); TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), 37.5) e_ptr->filterSpectrum(spec); TEST_REAL_SIMILAR((spec.begin() + 40)->getIntensity(), sqrt(37.5)) END_SECTION START_SECTION((void filterPeakMap(PeakMap& exp))) DTAFile dta_file; PeakSpectrum spec; dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec); PeakMap pm; pm.addSpectrum(spec);
START_SECTION(~TheoreticalSpectrumGenerator()) delete ptr; END_SECTION ptr = new TheoreticalSpectrumGenerator(); AASequence peptide = AASequence::fromString("IFSQVGK"); START_SECTION(TheoreticalSpectrumGenerator& operator = (const TheoreticalSpectrumGenerator& tsg)) TheoreticalSpectrumGenerator copy; copy = *ptr; TEST_EQUAL(copy.getParameters(), ptr->getParameters()) END_SECTION START_SECTION(void getSpectrum(PeakSpectrum& spec, const AASequence& peptide, Int min_charge = 1, Int max_charge = 1)) PeakSpectrum spec; ptr->getSpectrum(spec, peptide, 1, 1); TEST_EQUAL(spec.size(), 11) TOLERANCE_ABSOLUTE(0.001) double result[] = {/*114.091,*/ 147.113, 204.135, 261.16, 303.203, 348.192, 431.262, 476.251, 518.294, 575.319, 632.341, 665.362}; for (Size i = 0; i != spec.size(); ++i) { TEST_REAL_SIMILAR(spec[i].getPosition()[0], result[i]) } spec.clear(true); ptr->getSpectrum(spec, peptide, 1, 2); TEST_EQUAL(spec.size(), 22)