/** @brief Similarity pairwise score This function return the similarity score of two spectra based on SteinScott. @param s1 const PeakSpectrum Spectrum 1 @param s2 const PeakSpectrum Spectrum 2 @see SteinScottImproveScore() */ double SteinScottImproveScore::operator()(const PeakSpectrum & s1, const PeakSpectrum & s2) const { const double epsilon = (double)param_.getValue("tolerance"); const double constant = epsilon / 10000; //const double c(0.0004); double score(0), sum(0), sum1(0), sum2(0), sum3(0), sum4(0); /* std::cout << s1 << std::endl; std::cout << std::endl; std::cout << s2 << std::endl;*/ for (PeakSpectrum::ConstIterator it1 = s1.begin(); it1 != s1.end(); ++it1) { double temp = it1->getIntensity(); sum1 += temp * temp; sum3 += temp; } for (PeakSpectrum::ConstIterator it1 = s2.begin(); it1 != s2.end(); ++it1) { double temp = it1->getIntensity(); sum2 += temp * temp; sum4 += temp; } double z = constant * (sum3 * sum4); Size j_left(0); for (Size i = 0; i != s1.size(); ++i) { for (Size j = j_left; j != s2.size(); ++j) { double pos1(s1[i].getMZ()), pos2(s2[j].getMZ()); if (std::abs(pos1 - pos2) <= 2 * epsilon) { sum += s1[i].getIntensity() * s2[j].getIntensity(); } else { if (pos2 > pos1) { break; } else { j_left = j; } } } } //std::cout<< sum << " Sum " << z << " z " << std::endl; score = (sum - z) / (std::sqrt((sum1 * sum2))); // std::cout<<score<< " score" << std::endl; if (score < (float)param_.getValue("threshold")) { score = 0; } return score; }
double XQuestScores::xCorrelationPrescore(const PeakSpectrum & spec1, const PeakSpectrum & spec2, double tolerance) { // return 0 = no correlation, when one of the spectra is empty if (spec1.size() == 0 || spec2.size() == 0) { return 0.0; } double maxionsize = std::max(spec1[spec1.size()-1].getMZ(), spec2[spec2.size()-1].getMZ()); Int table_size = ceil(maxionsize / tolerance)+1; std::vector< double > ion_table1(table_size, 0); std::vector< double > ion_table2(table_size, 0); // Build tables of the same size, each bin has the size of the tolerance for (Size i = 0; i < spec1.size(); ++i) { Size pos = static_cast<Size>(ceil(spec1[i].getMZ() / tolerance)); ion_table1[pos] = 1; } for (Size i = 0; i < spec2.size(); ++i) { Size pos =static_cast<Size>(ceil(spec2[i].getMZ() / tolerance)); ion_table2[pos] = 1; } double dot_product = 0.0; for (Size i = 0; i < ion_table1.size(); ++i) { dot_product += ion_table1[i] * ion_table2[i]; } // determine the smaller spectrum and normalize by the number of peaks in it double peaks = std::min(spec1.size(), spec2.size()); return dot_product / peaks; }
void CompNovoIdentificationBase::windowMower_(PeakSpectrum & spec, DoubleReal windowsize, Size no_peaks) { PeakSpectrum copy(spec); vector<Peak1D> to_be_deleted; for (Size i = 0; i < spec.size(); ++i) { PeakSpectrum sub_spec; bool end(false); for (Size j = i; spec[j].getPosition()[0] - spec[i].getPosition()[0] < windowsize; ) { sub_spec.push_back(spec[j]); if (++j == spec.size()) { end = true; break; } } sub_spec.sortByIntensity(true); for (Size k = no_peaks; k < sub_spec.size(); ++k) { Peak1D p(sub_spec[k]); to_be_deleted.push_back(p); } if (end) { break; } } spec.clear(false); for (PeakSpectrum::ConstIterator it = copy.begin(); it != copy.end(); ++it) { if (find(to_be_deleted.begin(), to_be_deleted.end(), *it) == to_be_deleted.end()) { spec.push_back(*it); } } spec.sortByPosition(); }
std::vector< double > XQuestScores::xCorrelation(const PeakSpectrum & spec1, const PeakSpectrum & spec2, Int maxshift, double tolerance) { // generate vector of results, filled with zeroes std::vector< double > results(maxshift * 2 + 1, 0); // return 0 = no correlation, when one of the spectra is empty if (spec1.size() == 0 || spec2.size() == 0) { return results; } double maxionsize = std::max(spec1[spec1.size()-1].getMZ(), spec2[spec2.size()-1].getMZ()); Int table_size = ceil(maxionsize / tolerance)+1; std::vector< double > ion_table1(table_size, 0); std::vector< double > ion_table2(table_size, 0); // Build tables of the same size, each bin has the size of the tolerance for (Size i = 0; i < spec1.size(); ++i) { Size pos = static_cast<Size>(ceil(spec1[i].getMZ() / tolerance)); ion_table1[pos] = 10.0; } for (Size i = 0; i < spec2.size(); ++i) { Size pos =static_cast<Size>(ceil(spec2[i].getMZ() / tolerance)); ion_table2[pos] = 10.0; } // Compute means double mean1 = (std::accumulate(ion_table1.begin(), ion_table1.end(), 0.0)) / table_size; double mean2 = (std::accumulate(ion_table2.begin(), ion_table2.end(), 0.0)) / table_size; // Compute denominator double s1 = 0; double s2 = 0; for (Int i = 0; i < table_size; ++i) { s1 += pow((ion_table1[i] - mean1), 2); s2 += pow((ion_table2[i] - mean2), 2); } double denom = sqrt(s1 * s2); // Calculate correlation for each shift for (Int shift = -maxshift; shift <= maxshift; ++shift) { double s = 0; for (Int i = 0; i < table_size; ++i) { Int j = i + shift; if ( (j >= 0) && (j < table_size)) { s += (ion_table1[i] - mean1) * (ion_table2[j] - mean2); } } if (denom > 0) { results[shift + maxshift] = s / denom; } } return results; }
double XQuestScores::matchOddsScore(const PeakSpectrum& theoretical_spec, const Size matched_size, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm, bool is_xlink_spectrum, Size n_charges) { using boost::math::binomial; Size theo_size = theoretical_spec.size(); if (matched_size < 1 || theo_size < 1) { return 0; } double range = theoretical_spec[theo_size-1].getMZ() - theoretical_spec[0].getMZ(); // Compute fragment tolerance in Da for the mean of MZ values, if tolerance in ppm (rough approximation) double mean = 0.0; for (Size i = 0; i < theo_size; ++i) { mean += theoretical_spec[i].getMZ(); } mean = mean / theo_size; double tolerance_Th = fragment_mass_tolerance_unit_ppm ? mean * 1e-6 * fragment_mass_tolerance : fragment_mass_tolerance; // A priori probability of a random match given info about the theoretical spectrum double a_priori_p = 0; if (is_xlink_spectrum) { a_priori_p = (1 - ( pow( (1 - 2 * tolerance_Th / (0.5 * range)), (static_cast<double>(theo_size) / static_cast<double>(n_charges))))); } else { a_priori_p = (1 - ( pow( (1 - 2 * tolerance_Th / (0.5 * range)), static_cast<int>(theo_size)))); } double match_odds = 0; binomial flip(theo_size, a_priori_p); // min double number to avoid 0 values, causing scores with the value "inf" match_odds = -log(1 - cdf(flip, matched_size) + std::numeric_limits<double>::min()); // score lower than 0 does not make sense, but can happen if cfd = 0, -log( 1 + min() ) < 0 if (match_odds >= 0.0) { return match_odds; } else { return 0; } }
double XQuestScores::logOccupancyProb(const PeakSpectrum& theoretical_spec, const Size matched_size, double fragment_mass_tolerance, bool fragment_mass_tolerance_unit_ppm) { using boost::math::binomial; Size theo_size = theoretical_spec.size(); if (matched_size < 1 || theo_size < 1) { return 0; } double range; double used_tolerance; if (fragment_mass_tolerance_unit_ppm) { range = std::log(theoretical_spec.back().getMZ()) - std::log(theoretical_spec[0].getMZ()); used_tolerance = fragment_mass_tolerance / 1e6; } else { range = theoretical_spec.back().getMZ() - theoretical_spec[0].getMZ(); used_tolerance = fragment_mass_tolerance; } // A priori probability of a random match given info about the theoretical spectrum double a_priori_p = 0; a_priori_p = 1 - pow(1 - 2 * used_tolerance / range, static_cast<double>(theo_size)); double log_occu_prob = 0; binomial flip(theo_size, a_priori_p); // min double number to avoid 0 values, causing scores with the value "inf" log_occu_prob = -log(1 - cdf(flip, matched_size) + std::numeric_limits<double>::min()); // score lower than 0 does not make sense, but can happen, if cfd = 0, then -log( 1 + <double>::min() ) < 0 if (log_occu_prob >= 0.0) { return log_occu_prob; } else // underflow warning? { return 0; } }
Size AScore::numberOfMatchedIons_(const PeakSpectrum & th, const PeakSpectrum & window, Size depth, double fragment_mass_tolerance, bool fragment_mass_tolerance_ppm) const { PeakSpectrum window_reduced = window; if (window_reduced.size() > depth) { window_reduced.resize(depth); } window_reduced.sortByPosition(); Size n = 0; for (Size i = 0; i < th.size(); ++i) { Size nearest_peak = -1; try { nearest_peak = window_reduced.findNearest(th[i].getMZ()); } catch (Exception::Precondition) {} if (nearest_peak < window_reduced.size()) { double window_mz = window_reduced[nearest_peak].getMZ(); double error = abs(window_mz - th[i].getMZ()); if (fragment_mass_tolerance_ppm) { error = error / window_mz * 1e6; } if (error < fragment_mass_tolerance) { ++n; } } } return n; }
double PeakAlignment::operator()(const PeakSpectrum& spec1, const PeakSpectrum& spec2) const { PeakSpectrum s1(spec1), s2(spec2); // shortcut similarity calculation by comparing PrecursorPeaks (PrecursorPeaks more than delta away from each other are supposed to be from another peptide) DoubleReal pre_mz1 = 0.0; if (!spec1.getPrecursors().empty()) pre_mz1 = spec1.getPrecursors()[0].getMZ(); DoubleReal pre_mz2 = 0.0; if (!spec1.getPrecursors().empty()) pre_mz2 = spec2.getPrecursors()[0].getMZ(); if (fabs(pre_mz1 - pre_mz2) > (double)param_.getValue("precursor_mass_tolerance")) { return 0; } // heuristic shortcut const double epsilon = (double)param_.getValue("epsilon"); const UInt heuristic_level = (UInt)param_.getValue("heuristic_level"); bool heuristic_filters(true); if (heuristic_level) { s1.sortByIntensity(true); s2.sortByIntensity(true); //heuristic filters (and shortcuts) if spec1 and spec2 have NOT at least one peak in the sets of |heuristic_level|-many highest peaks in common for (PeakSpectrum::ConstIterator it_s1 = s1.begin(); Size(it_s1 - s1.begin()) < heuristic_level && it_s1 != s1.end(); ++it_s1) { for (PeakSpectrum::ConstIterator it_s2 = s2.begin(); Size(it_s2 - s2.begin()) < heuristic_level && it_s2 != s2.end(); ++it_s2) { // determine if it is a match, i.e. mutual peak at certain m/z with epsilon tolerance if (fabs((*it_s2).getMZ() - (*it_s1).getMZ()) < epsilon) { heuristic_filters = false; break; } } } } if (heuristic_filters && heuristic_level) { return 0; } //TODO gapcost dependence on distance ? const double gap = (double)param_.getValue("epsilon"); //initialize alignment matrix with 0 in (0,0) and a multiple of gapcost in the first row/col matrix(row,col,values) Matrix<double> matrix(spec1.size() + 1, spec2.size() + 1, 0); for (Size i = 1; i < matrix.rows(); i++) { matrix.setValue(i, 0, -gap * i); } for (Size i = 1; i < matrix.cols(); i++) { matrix.setValue(0, i, -gap * i); } //get sigma - the standard deviation (sqrt of variance) double mid(0); for (Size i = 0; i < spec1.size(); ++i) { for (Size j = 0; j < spec2.size(); ++j) { double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ()); mid += fabs(pos1 - pos2); } } // average peak distance mid /= (spec1.size() * spec2.size()); /* to manually retrace cout << "average peak distance " << mid << endl; */ double var(0); for (Size i = 0; i < spec1.size(); ++i) { for (Size j = 0; j < spec2.size(); ++j) { double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ()); var += (fabs(pos1 - pos2) - mid) * (fabs(pos1 - pos2) - mid); } } // peak distance variance var /= (spec1.size() * spec2.size()); /* to manually retrace cout << "peak distance variance " << var << endl; */ //only in case of only two equal peaks in the spectra sigma is 0 const double sigma((var == 0) ? numeric_limits<double>::min() : sqrt(var)); /* to manually retrace cout << "peak standard deviation " << sigma << endl; */ //fill alignment matrix for (Size i = 1; i < spec1.size() + 1; ++i) { for (Size j = 1; j < spec2.size() + 1; ++j) { double pos1(spec1[i - 1].getMZ()), pos2(spec2[j - 1].getMZ()); //only if peaks are in reasonable proximity alignment is considered else only gaps if (fabs(pos1 - pos2) <= epsilon) { // actual cell = max(upper left cell+score, left cell-gap, upper cell-gap) double from_left(matrix.getValue(i, j - 1) - gap); double from_above(matrix.getValue(i - 1, j) - gap); double int1(spec1[i - 1].getIntensity()), int2(spec2[j - 1].getIntensity()); double from_diagonal(matrix.getValue(i - 1, j - 1) + peakPairScore_(pos1, int1, pos2, int2, sigma)); matrix.setValue(i, j, max(from_left, max(from_above, from_diagonal))); } else { // actual cell = max(left cell-gap, upper cell-gap) double from_left(matrix.getValue(i, j - 1) - gap); double from_above(matrix.getValue(i - 1, j) - gap); matrix.setValue(i, j, max(from_left, from_above)); } } } /* to manually retrace cout << endl << matrix << endl; */ //get best overall score and return double best_score(numeric_limits<double>::min()); for (Size i = 0; i < matrix.cols(); i++) { best_score = max(best_score, matrix.getValue(matrix.rows() - 1, i)); } for (Size i = 0; i < matrix.rows(); i++) { best_score = max(best_score, matrix.getValue(i, matrix.cols() - 1)); } //calculate selfalignment-scores for both input spectra double score_spec1(0), score_spec2(0); for (Size i = 0; i < spec1.size(); ++i) { double int_i(spec1[i].getIntensity()); double pos_i(spec1[i].getMZ()); score_spec1 += peakPairScore_(pos_i, int_i, pos_i, int_i, sigma); } for (Size i = 0; i < spec2.size(); ++i) { double int_i(spec2[i].getIntensity()); double pos_i(spec2[i].getMZ()); score_spec2 += peakPairScore_(pos_i, int_i, pos_i, int_i, sigma); } /* to manually retrace cout << "score_spec1: " << score_spec1 << "score_spec2: " << score_spec2 << endl; */ //normalize score to interval [0,1] with geometric mean double best_score_normalized(best_score / sqrt(score_spec1 * score_spec2)); /* cout << "score_spec1: " << score_spec1 << " score_spec2: " << score_spec2 << " best_score: " << best_score << endl; //normalize score to interval [0,1] with arithmeic mean double best_score_normalized( (best_score*2) / (score_spec1 + score_spec2) ); */ return best_score_normalized; }
vector<pair<Size, Size> > PeakAlignment::getAlignmentTraceback(const PeakSpectrum& spec1, const PeakSpectrum& spec2) const { const double epsilon = (double)param_.getValue("epsilon"); //TODO gapcost dependence on distance ? const double gap = (double)param_.getValue("epsilon"); //initialize alignment matrix with 0 in (0,0) and a multiple of gapcost in the first row/col matrix(row,col,values) Matrix<double> matrix(spec1.size() + 1, spec2.size() + 1, 0); for (Size i = 1; i < matrix.rows(); i++) { matrix.setValue(i, 0, -gap * i); } for (Size i = 1; i < matrix.cols(); i++) { matrix.setValue(0, i, -gap * i); } // gives the direction of the matrix cell that originated the respective cell // e.g. matrix(i+1,j+1) could have originated from matrix(i,j), matrix(i+1,j) or matrix(i,j+1) // so traceback(i,j) represents matrix(i+1,j+1) and contains a "1"-from diagonal, a "0"-from left or a "2"-from above Matrix<Size> traceback(spec1.size(), spec2.size()); //get sigma - the standard deviation (sqrt of variance) double mid(0); for (Size i = 0; i < spec1.size(); ++i) { for (Size j = 0; j < spec2.size(); ++j) { double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ()); mid += fabs(pos1 - pos2); } } mid /= (spec1.size() * spec2.size()); /* to manually retrace cout << mid << endl; */ double var(0); for (Size i = 0; i < spec1.size(); ++i) { for (Size j = 0; j < spec2.size(); ++j) { double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ()); var += (fabs(pos1 - pos2) - mid) * (fabs(pos1 - pos2) - mid); } } var /= (spec1.size() * spec2.size()); /* to manually retrace cout << var << endl; */ const double sigma(sqrt(var)); /* to manually retrace cout << sigma << endl; */ //fill alignment matrix for (Size i = 1; i < spec1.size() + 1; ++i) { for (Size j = 1; j < spec2.size() + 1; ++j) { double pos1(spec1[i - 1].getMZ()), pos2(spec2[j - 1].getMZ()); //only if peaks are in reasonable proximity alignment is considered else only gaps if (fabs(pos1 - pos2) <= epsilon) { // actual cell = max(upper left cell+score, left cell-gap, upper cell-gap) double from_left(matrix.getValue(i, j - 1) - gap); double from_above(matrix.getValue(i - 1, j) - gap); double int1(spec1[i - 1].getIntensity()), int2(spec2[j - 1].getIntensity()); double from_diagonal(matrix.getValue(i - 1, j - 1) + peakPairScore_(pos1, int1, pos2, int2, sigma)); matrix.setValue(i, j, max(from_left, max(from_above, from_diagonal))); // TODO the cases where all or two values are equal if (from_diagonal > from_left && from_diagonal > from_above) { traceback.setValue(i - 1, j - 1, 1); } else { if (from_left > from_diagonal && from_left > from_above) { traceback.setValue(i - 1, j - 1, 0); } else { if (from_above > from_diagonal && from_above > from_left) { traceback.setValue(i - 1, j - 1, 2); } } } } else { // actual cell = max(left cell-gap, upper cell-gap) double from_left(matrix.getValue(i, j - 1) - gap); double from_above(matrix.getValue(i - 1, j) - gap); matrix.setValue(i, j, max(from_left, from_above)); if (from_left > from_above) { traceback.setValue(i - 1, j - 1, 0); } else //from_left <= from_above { traceback.setValue(i - 1, j - 1, 2); } } } } //return track from best alloverscore to 0,0 vector<pair<Size, Size> > ret_val; //get matrix coordinates from best alloverscore Size row_index(0), col_index(0); double best_score(numeric_limits<double>::min()); for (Size i = 0; i < matrix.cols(); i++) { if (best_score < matrix.getValue(matrix.rows() - 1, i)) { best_score = matrix.getValue(matrix.rows() - 1, i); row_index = matrix.rows() - 1; col_index = i; } } for (Size i = 0; i < matrix.rows(); i++) { if (best_score < matrix.getValue(i, matrix.cols() - 1)) { best_score = matrix.getValue(i, matrix.cols() - 1); row_index = i; col_index = matrix.cols() - 1; } } // TODO check the invariant! while (row_index > 0 && col_index > 0) { //from diagonal - peaks aligned if (traceback.getValue(row_index - 1, col_index - 1) == 1) { //register aligned peaks only ret_val.insert(ret_val.begin(), pair<Size, Size>(row_index - 1, col_index - 1)); row_index = row_index - 1; col_index = col_index - 1; } // gap alignment else if (traceback.getValue(row_index - 1, col_index - 1) == 0) { col_index = col_index - 1; } else { row_index = row_index - 1; } } /* to manually retrace cout << endl << matrix << endl << traceback << endl; */ return ret_val; }
TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) TEST_EQUAL(copy.getName(), e_ptr->getName()) END_SECTION START_SECTION((WindowMower& operator = (const WindowMower& source))) WindowMower copy; copy = *e_ptr; TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) TEST_EQUAL(copy.getName(), e_ptr->getName()) END_SECTION START_SECTION((template<typename SpectrumType> void filterPeakSpectrumForTopNInSlidingWindow(SpectrumType& spectrum))) DTAFile dta_file; PeakSpectrum spec; dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec); TEST_EQUAL(spec.size(), 121) Param p(e_ptr->getParameters()); p.setValue("windowsize", 50.0); // default p.setValue("peakcount", 2); // default p.setValue("movetype", "slide"); // default and not needed as we directly call sliding window function e_ptr->setParameters(p); e_ptr->filterPeakSpectrumForTopNInSlidingWindow(spec); TEST_EQUAL(spec.size(), 56) END_SECTION START_SECTION((template<typename SpectrumType> void filterPeakSpectrumForTopNInJumpingWindow(SpectrumType& spectrum))) DTAFile dta_file;
String XQuestResultXMLFile::getxQuestBase64EncodedSpectrum_(const PeakSpectrum& spec, String header) { std::vector<String> in_strings; StringList sl; double precursor_mz = 0; double precursor_z = 0; if (spec.getPrecursors().size() > 0) { precursor_mz = Math::roundDecimal(spec.getPrecursors()[0].getMZ(), -9); precursor_z = spec.getPrecursors()[0].getCharge(); } // header lines if (!header.empty()) // common or xlinker spectrum will be reported { sl.push_back(header + "\n"); // e.g. GUA1372-S14-A-LRRK2_DSS_1A3.03873.03873.3.dta,GUA1372-S14-A-LRRK2_DSS_1A3.03863.03863.3.dta sl.push_back(String(precursor_mz) + "\n"); sl.push_back(String(precursor_z) + "\n"); } else // light or heavy spectrum will be reported { sl.push_back(String(precursor_mz) + "\t" + String(precursor_z) + "\n"); } PeakSpectrum::IntegerDataArray charges; if (spec.getIntegerDataArrays().size() > 0) { charges = spec.getIntegerDataArrays()[0]; } // write peaks for (Size i = 0; i != spec.size(); ++i) { String s; s += String(Math::roundDecimal(spec[i].getMZ(), -9)) + "\t"; s += String(spec[i].getIntensity()) + "\t"; if (charges.size() > 0) { s += String(charges[i]); } else { s += "0"; } s += "\n"; sl.push_back(s); } String out; out.concatenate(sl.begin(), sl.end(), ""); in_strings.push_back(out); String out_encoded; Base64().encodeStrings(in_strings, out_encoded, false, false); String out_wrapped; wrap_(out_encoded, 76, out_wrapped); return out_wrapped; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList in_spec = getStringList_("in"); StringList out = getStringList_("out"); String in_lib = getStringOption_("lib"); String compare_function = getStringOption_("compare_function"); Int precursor_mass_multiplier = getIntOption_("round_precursor_to_integer"); float precursor_mass_tolerance = getDoubleOption_("precursor_mass_tolerance"); //Int min_precursor_charge = getIntOption_("min_precursor_charge"); //Int max_precursor_charge = getIntOption_("max_precursor_charge"); float remove_peaks_below_threshold = getDoubleOption_("filter:remove_peaks_below_threshold"); UInt min_peaks = getIntOption_("filter:min_peaks"); UInt max_peaks = getIntOption_("filter:max_peaks"); Int cut_peaks_below = getIntOption_("filter:cut_peaks_below"); StringList fixed_modifications = getStringList_("fixed_modifications"); StringList variable_modifications = getStringList_("variable_modifications"); Int top_hits = getIntOption_("top_hits"); if (top_hits < -1) { writeLog_("top_hits (should be >= -1 )"); return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // loading input //------------------------------------------------------------- if (out.size() != in_spec.size()) { writeLog_("out (should be as many as input files)"); return ILLEGAL_PARAMETERS; } time_t prog_time = time(NULL); MSPFile spectral_library; RichPeakMap query, library; //spectrum which will be identified MzMLFile spectra; spectra.setLogType(log_type_); time_t start_build_time = time(NULL); //------------------------------------------------------------- //building map for faster search //------------------------------------------------------------- //library containing already identified peptide spectra vector<PeptideIdentification> ids; spectral_library.load(in_lib, ids, library); map<Size, vector<PeakSpectrum> > MSLibrary; { RichPeakMap::iterator s; vector<PeptideIdentification>::iterator i; ModificationsDB* mdb = ModificationsDB::getInstance(); for (s = library.begin(), i = ids.begin(); s < library.end(); ++s, ++i) { double precursor_MZ = (*s).getPrecursors()[0].getMZ(); Size MZ_multi = (Size)precursor_MZ * precursor_mass_multiplier; map<Size, vector<PeakSpectrum> >::iterator found; found = MSLibrary.find(MZ_multi); PeakSpectrum librar; bool variable_modifications_ok = true; bool fixed_modifications_ok = true; const AASequence& aaseq = i->getHits()[0].getSequence(); //variable fixed modifications if (!fixed_modifications.empty()) { for (Size i = 0; i < aaseq.size(); ++i) { const Residue& mod = aaseq.getResidue(i); for (Size s = 0; s < fixed_modifications.size(); ++s) { if (mod.getOneLetterCode() == mdb->getModification(fixed_modifications[s]).getOrigin() && fixed_modifications[s] != mod.getModification()) { fixed_modifications_ok = false; break; } } } } //variable modifications if (aaseq.isModified() && (!variable_modifications.empty())) { for (Size i = 0; i < aaseq.size(); ++i) { if (aaseq.isModified(i)) { const Residue& mod = aaseq.getResidue(i); for (Size s = 0; s < variable_modifications.size(); ++s) { if (mod.getOneLetterCode() == mdb->getModification(variable_modifications[s]).getOrigin() && variable_modifications[s] != mod.getModification()) { variable_modifications_ok = false; break; } } } } } if (variable_modifications_ok && fixed_modifications_ok) { PeptideIdentification& translocate_pid = *i; librar.getPeptideIdentifications().push_back(translocate_pid); librar.setPrecursors(s->getPrecursors()); //library entry transformation for (UInt l = 0; l < s->size(); ++l) { Peak1D peak; if ((*s)[l].getIntensity() > remove_peaks_below_threshold) { const String& info = (*s)[l].getMetaValue("MSPPeakInfo"); if (info[0] == '?') { peak.setIntensity(sqrt(0.2 * (*s)[l].getIntensity())); } else { peak.setIntensity(sqrt((*s)[l].getIntensity())); } peak.setMZ((*s)[l].getMZ()); peak.setPosition((*s)[l].getPosition()); librar.push_back(peak); } } if (found != MSLibrary.end()) { found->second.push_back(librar); } else { vector<PeakSpectrum> tmp; tmp.push_back(librar); MSLibrary.insert(make_pair(MZ_multi, tmp)); } } } } time_t end_build_time = time(NULL); cout << "Time needed for preprocessing data: " << (end_build_time - start_build_time) << "\n"; //compare function PeakSpectrumCompareFunctor* comparor = Factory<PeakSpectrumCompareFunctor>::create(compare_function); //------------------------------------------------------------- // calculations //------------------------------------------------------------- double score; StringList::iterator in, out_file; for (in = in_spec.begin(), out_file = out.begin(); in < in_spec.end(); ++in, ++out_file) { time_t start_time = time(NULL); spectra.load(*in, query); //Will hold valuable hits vector<PeptideIdentification> peptide_ids; vector<ProteinIdentification> protein_ids; // Write parameters to ProteinIdentifcation ProteinIdentification prot_id; //Parameters of identificaion prot_id.setIdentifier("test"); prot_id.setSearchEngineVersion("SpecLibSearcher"); prot_id.setDateTime(DateTime::now()); prot_id.setScoreType(compare_function); ProteinIdentification::SearchParameters searchparam; searchparam.precursor_tolerance = precursor_mass_tolerance; prot_id.setSearchParameters(searchparam); /***********SEARCH**********/ for (UInt j = 0; j < query.size(); ++j) { //Set identifier for each identifications PeptideIdentification pid; pid.setIdentifier("test"); pid.setScoreType(compare_function); ProteinHit pr_hit; pr_hit.setAccession(j); prot_id.insertHit(pr_hit); //RichPeak1D to Peak1D transformation for the compare function query PeakSpectrum quer; bool peak_ok = true; query[j].sortByIntensity(true); double min_high_intensity = 0; if (query[j].empty() || query[j].getMSLevel() != 2) { continue; } if (query[j].getPrecursors().empty()) { writeLog_("Warning MS2 spectrum without precursor information"); continue; } min_high_intensity = (1 / cut_peaks_below) * query[j][0].getIntensity(); query[j].sortByPosition(); for (UInt k = 0; k < query[j].size() && k < max_peaks; ++k) { if (query[j][k].getIntensity() > remove_peaks_below_threshold && query[j][k].getIntensity() >= min_high_intensity) { Peak1D peak; peak.setIntensity(sqrt(query[j][k].getIntensity())); peak.setMZ(query[j][k].getMZ()); peak.setPosition(query[j][k].getPosition()); quer.push_back(peak); } } if (quer.size() >= min_peaks) { peak_ok = true; } else { peak_ok = false; } double query_MZ = query[j].getPrecursors()[0].getMZ(); if (peak_ok) { bool charge_one = false; Int percent = (Int) Math::round((query[j].size() / 100.0) * 3.0); Int margin = (Int) Math::round((query[j].size() / 100.0) * 1.0); for (vector<RichPeak1D>::iterator peak = query[j].end() - 1; percent >= 0; --peak, --percent) { if (peak->getMZ() < query_MZ) { break; } } if (percent > margin) { charge_one = true; } float min_MZ = (query_MZ - precursor_mass_tolerance) * precursor_mass_multiplier; float max_MZ = (query_MZ + precursor_mass_tolerance) * precursor_mass_multiplier; for (Size mz = (Size)min_MZ; mz <= ((Size)max_MZ) + 1; ++mz) { map<Size, vector<PeakSpectrum> >::iterator found; found = MSLibrary.find(mz); if (found != MSLibrary.end()) { vector<PeakSpectrum>& library = found->second; for (Size i = 0; i < library.size(); ++i) { float this_MZ = library[i].getPrecursors()[0].getMZ() * precursor_mass_multiplier; if (this_MZ >= min_MZ && max_MZ >= this_MZ && ((charge_one == true && library[i].getPeptideIdentifications()[0].getHits()[0].getCharge() == 1) || charge_one == false)) { PeptideHit hit = library[i].getPeptideIdentifications()[0].getHits()[0]; PeakSpectrum& librar = library[i]; //Special treatment for SpectraST score as it computes a score based on the whole library if (compare_function == "SpectraSTSimilarityScore") { SpectraSTSimilarityScore* sp = static_cast<SpectraSTSimilarityScore*>(comparor); BinnedSpectrum quer_bin = sp->transform(quer); BinnedSpectrum librar_bin = sp->transform(librar); score = (*sp)(quer, librar); //(*sp)(quer_bin,librar_bin); double dot_bias = sp->dot_bias(quer_bin, librar_bin, score); hit.setMetaValue("DOTBIAS", dot_bias); } else { score = (*comparor)(quer, librar); } DataValue RT(library[i].getRT()); DataValue MZ(library[i].getPrecursors()[0].getMZ()); hit.setMetaValue("RT", RT); hit.setMetaValue("MZ", MZ); hit.setScore(score); PeptideEvidence pe; pe.setProteinAccession(pr_hit.getAccession()); hit.addPeptideEvidence(pe); pid.insertHit(hit); } } } } } pid.setHigherScoreBetter(true); pid.sort(); if (compare_function == "SpectraSTSimilarityScore") { if (!pid.empty() && !pid.getHits().empty()) { vector<PeptideHit> final_hits; final_hits.resize(pid.getHits().size()); SpectraSTSimilarityScore* sp = static_cast<SpectraSTSimilarityScore*>(comparor); Size runner_up = 1; for (; runner_up < pid.getHits().size(); ++runner_up) { if (pid.getHits()[0].getSequence().toUnmodifiedString() != pid.getHits()[runner_up].getSequence().toUnmodifiedString() || runner_up > 5) { break; } } double delta_D = sp->delta_D(pid.getHits()[0].getScore(), pid.getHits()[runner_up].getScore()); for (Size s = 0; s < pid.getHits().size(); ++s) { final_hits[s] = pid.getHits()[s]; final_hits[s].setMetaValue("delta D", delta_D); final_hits[s].setMetaValue("dot product", pid.getHits()[s].getScore()); final_hits[s].setScore(sp->compute_F(pid.getHits()[s].getScore(), delta_D, pid.getHits()[s].getMetaValue("DOTBIAS"))); //final_hits[s].removeMetaValue("DOTBIAS"); } pid.setHits(final_hits); pid.sort(); pid.setMZ(query[j].getPrecursors()[0].getMZ()); pid.setRT(query_MZ); } } if (top_hits != -1 && (UInt)top_hits < pid.getHits().size()) { vector<PeptideHit> hits; hits.resize(top_hits); for (Size i = 0; i < (UInt)top_hits; ++i) { hits[i] = pid.getHits()[i]; } pid.setHits(hits); } peptide_ids.push_back(pid); } protein_ids.push_back(prot_id); //------------------------------------------------------------- // writing output //------------------------------------------------------------- IdXMLFile id_xml_file; id_xml_file.store(*out_file, protein_ids, peptide_ids); time_t end_time = time(NULL); cout << "Search time: " << difftime(end_time, start_time) << " seconds for " << *in << "\n"; } time_t end_time = time(NULL); cout << "Total time: " << difftime(end_time, prog_time) << " secconds\n"; return EXECUTION_OK; }
delete ptr; END_SECTION ptr = new TheoreticalSpectrumGenerator(); AASequence peptide = AASequence::fromString("IFSQVGK"); START_SECTION(TheoreticalSpectrumGenerator& operator = (const TheoreticalSpectrumGenerator& tsg)) TheoreticalSpectrumGenerator copy; copy = *ptr; TEST_EQUAL(copy.getParameters(), ptr->getParameters()) END_SECTION START_SECTION(void getSpectrum(PeakSpectrum& spec, const AASequence& peptide, Int min_charge = 1, Int max_charge = 1)) PeakSpectrum spec; ptr->getSpectrum(spec, peptide, 1, 1); TEST_EQUAL(spec.size(), 11) TOLERANCE_ABSOLUTE(0.001) double result[] = {/*114.091,*/ 147.113, 204.135, 261.16, 303.203, 348.192, 431.262, 476.251, 518.294, 575.319, 632.341, 665.362}; for (Size i = 0; i != spec.size(); ++i) { TEST_REAL_SIMILAR(spec[i].getPosition()[0], result[i]) } spec.clear(true); ptr->getSpectrum(spec, peptide, 1, 2); TEST_EQUAL(spec.size(), 22) spec.clear(true); Param param(ptr->getParameters());
PeakSpectrum spec; Param p = ptr->getDefaults(); p.setValue ("hide_losses", "true"); p.setValue ("add_metainfo", "true"); ptr->setParameters (p); ptr->load(); ptr->simulate(spec, peptide, rnd_gen, 1); PeakMap exp; MzMLFile mz_file; #if OPENMS_BOOST_VERSION_MINOR < 56 mz_file.load(OPENMS_GET_TEST_DATA_PATH("SvmTheoreticalSpectrumGenerator_test.mzML"),exp); TEST_EQUAL(spec.size(), 7); #else mz_file.load(OPENMS_GET_TEST_DATA_PATH("SvmTheoreticalSpectrumGenerator_test_boost58.mzML"),exp); TEST_EQUAL(spec.size(), 8); // the extra peak: TEST_EQUAL(spec.getStringDataArrays()[0][2], "YIon 0++") // TODO: ion_nr is always zero, its actually y4++ TEST_EQUAL(spec.getIntegerDataArrays()[0][2], 2) #endif TEST_EQUAL(exp.size(), 1); if(exp.size()) { TEST_EQUAL(spec.size(), exp[0].size()); Size min_size = min(spec.size(), exp[0].size()); for(Size i = 0; i<min_size; ++i)
PeakSpectrum theo_spec_1, theo_spec_2, exp_spec_1, exp_spec_2; specGen.getCommonIonSpectrum(exp_spec_1, AASequence::fromString("PEPTIDE"), 2, true, 3); specGen.getCommonIonSpectrum(exp_spec_2, AASequence::fromString("PEPTEDI"), 3, true, 3); param.setValue("add_metainfo", "true"); specGen.setParameters(param); specGen.getCommonIonSpectrum(theo_spec_1, AASequence::fromString("PEPTIDE"), 3, true, 3); specGen.getCommonIonSpectrum(theo_spec_2, AASequence::fromString("PEPTEDI"), 4, true, 3); START_SECTION(static PeakSpectrum mergeAnnotatedSpectra(PeakSpectrum & first_spectrum, PeakSpectrum & second_spectrum)) PeakSpectrum merged_spec = OPXLSpectrumProcessingAlgorithms::mergeAnnotatedSpectra(theo_spec_1, theo_spec_2); TEST_EQUAL(merged_spec.size(), 36) TEST_EQUAL(merged_spec.getIntegerDataArrays().size(), 1) TEST_EQUAL(merged_spec.getIntegerDataArrays()[0].size(), 36) TEST_EQUAL(merged_spec.getStringDataArrays()[0].size(), 36) TEST_EQUAL(merged_spec.getIntegerDataArrays()[0][10], 3) TEST_EQUAL(merged_spec.getStringDataArrays()[0][10], "[alpha|ci$y2]") TEST_EQUAL(merged_spec.getIntegerDataArrays()[0][20], 2) TEST_EQUAL(merged_spec.getStringDataArrays()[0][20], "[alpha|ci$y2]") TEST_REAL_SIMILAR(merged_spec[10].getMZ(), 83.04780) TEST_REAL_SIMILAR(merged_spec[20].getMZ(), 132.04732) for (Size i = 0; i < merged_spec.size()-1; ++i) { TEST_EQUAL(merged_spec[i].getMZ() <= merged_spec[i+1].getMZ(), true) }
double ZhangSimilarityScore::operator()(const PeakSpectrum & s1, const PeakSpectrum & s2) const { const double tolerance = (double)param_.getValue("tolerance"); bool use_linear_factor = param_.getValue("use_linear_factor").toBool(); bool use_gaussian_factor = param_.getValue("use_gaussian_factor").toBool(); double score(0), sum(0), sum1(0), sum2(0) /*, squared_sum1(0), squared_sum2(0)*/; // TODO remove parameter if (param_.getValue("is_relative_tolerance").toBool() ) { throw Exception::NotImplemented(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION); } for (PeakSpectrum::ConstIterator it1 = s1.begin(); it1 != s1.end(); ++it1) { sum1 += it1->getIntensity(); /* for (PeakSpectrum::ConstIterator it2 = s1.begin(); it2 != s1.end(); ++it2) { if (abs(it1->getPosition()[0] - it2->getPosition()[0]) <= 2 * tolerance) { squared_sum1 += it1->getIntensity() * it2->getIntensity(); } }*/ } /* UInt i_left(0); for (Size i = 0; i != s1.size(); ++i) { sum1 += s1[i].getIntensity(); for (Size j = i_left; j != s1.size(); ++j) { double pos1(s1[i].getPosition()[0]), pos2(s1[j].getPosition()[0]); if (abs(pos1 - pos2) <= 2 * tolerance) { squared_sum1 += s1[i].getIntensity() * s1[j].getIntensity(); } else { if (pos2 > pos1) { break; } else { i_left = i; } } } }*/ /* i_left = 0; for (Size i = 0; i != s2.size(); ++i) { sum2 += s2[i].getIntensity(); for (Size j = i_left; j != s2.size(); ++j) { double pos1(s2[i].getPosition()[0]), pos2(s2[j].getPosition()[0]); if (abs(pos1 - pos2) <= 2 * tolerance) { squared_sum1 += s2[i].getIntensity() * s2[j].getIntensity(); } else { if (pos2 > pos1) { break; } else { i_left = i; } } } }*/ for (PeakSpectrum::ConstIterator it1 = s2.begin(); it1 != s2.end(); ++it1) { sum2 += it1->getIntensity(); /* for (PeakSpectrum::ConstIterator it2 = s2.begin(); it2 != s2.end(); ++it2) { if (abs(it1->getPosition()[0] - it2->getPosition()[0]) <= 2 * tolerance) { squared_sum2 += it1->getIntensity() * it2->getIntensity(); } } */ } Size j_left(0); for (Size i = 0; i != s1.size(); ++i) { for (Size j = j_left; j != s2.size(); ++j) { double pos1(s1[i].getMZ()), pos2(s2[j].getMZ()); if (fabs(pos1 - pos2) < tolerance) { //double factor((tolerance - fabs(pos1 - pos2)) / tolerance); double factor = 1.0; if (use_linear_factor || use_gaussian_factor) { factor = getFactor_(tolerance, fabs(pos1 - pos2), use_gaussian_factor); } sum += sqrt(s1[i].getIntensity() * s2[j].getIntensity() * factor); } else { if (pos2 > pos1) { break; } else { j_left = j; } } } } /* for (PeakSpectrum::ConstIterator it1 = s1.begin(); it1 != s1.end(); ++it1) { for (PeakSpectrum::ConstIterator it2 = s2.begin(); it2 != s2.end(); ++it2) { if (abs(it1->getPosition()[0] - it2->getPosition()[0]) <= 2 * tolerance) { sum += sqrt(it1->getIntensity() * it2->getIntensity()); } } }*/ score = sum / (sqrt(sum1 * sum2)); return score; }
TEST_EQUAL(copy.getName(), e_ptr->getName()) END_SECTION START_SECTION((ThresholdMower& operator=(const ThresholdMower& source))) ThresholdMower copy; copy = *e_ptr; TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) TEST_EQUAL(copy.getName(), e_ptr->getName()); END_SECTION START_SECTION((template<typename SpectrumType> void filterSpectrum(SpectrumType& spectrum))) DTAFile dta_file; PeakSpectrum spec; dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec); TEST_EQUAL(spec.size(), 121) Param p(e_ptr->getParameters()); p.setValue("threshold", 1.0); e_ptr->setParameters(p); e_ptr->filterSpectrum(spec); TEST_EQUAL(spec.size(), 121) p.setValue("threshold", 10.0); e_ptr->setParameters(p); e_ptr->filterSpectrum(spec); TEST_EQUAL(spec.size(), 14) END_SECTION
TEST_EQUAL(copy.getName(), e_ptr->getName()) END_SECTION START_SECTION((BernNorm& operator=(const BernNorm& source))) BernNorm copy; copy = *e_ptr; TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) TEST_EQUAL(copy.getName(), e_ptr->getName()) END_SECTION START_SECTION((template<typename SpectrumType> void filterSpectrum(SpectrumType& spectrum))) DTAFile dta_file; PeakSpectrum spec; dta_file.load(OPENMS_GET_TEST_DATA_PATH("Transformers_tests.dta"), spec); TEST_EQUAL(spec.size(), 121) e_ptr->filterSpectrum(spec); TEST_EQUAL(spec.size(), 121) Param p(e_ptr->getParameters()); p.setValue("C2", 2000.0); e_ptr->setParameters(p); e_ptr->filterSpectrum(spec); TEST_EQUAL(spec.size(), 28) END_SECTION START_SECTION((void filterPeakMap(PeakMap& exp)))