String XQuestResultXMLFile::getxQuestBase64EncodedSpectrum_(const PeakSpectrum& spec, String header) { std::vector<String> in_strings; StringList sl; double precursor_mz = 0; double precursor_z = 0; if (spec.getPrecursors().size() > 0) { precursor_mz = Math::roundDecimal(spec.getPrecursors()[0].getMZ(), -9); precursor_z = spec.getPrecursors()[0].getCharge(); } // header lines if (!header.empty()) // common or xlinker spectrum will be reported { sl.push_back(header + "\n"); // e.g. GUA1372-S14-A-LRRK2_DSS_1A3.03873.03873.3.dta,GUA1372-S14-A-LRRK2_DSS_1A3.03863.03863.3.dta sl.push_back(String(precursor_mz) + "\n"); sl.push_back(String(precursor_z) + "\n"); } else // light or heavy spectrum will be reported { sl.push_back(String(precursor_mz) + "\t" + String(precursor_z) + "\n"); } PeakSpectrum::IntegerDataArray charges; if (spec.getIntegerDataArrays().size() > 0) { charges = spec.getIntegerDataArrays()[0]; } // write peaks for (Size i = 0; i != spec.size(); ++i) { String s; s += String(Math::roundDecimal(spec[i].getMZ(), -9)) + "\t"; s += String(spec[i].getIntensity()) + "\t"; if (charges.size() > 0) { s += String(charges[i]); } else { s += "0"; } s += "\n"; sl.push_back(s); } String out; out.concatenate(sl.begin(), sl.end(), ""); in_strings.push_back(out); String out_encoded; Base64().encodeStrings(in_strings, out_encoded, false, false); String out_wrapped; wrap_(out_encoded, 76, out_wrapped); return out_wrapped; }
double PeakAlignment::operator()(const PeakSpectrum& spec1, const PeakSpectrum& spec2) const { PeakSpectrum s1(spec1), s2(spec2); // shortcut similarity calculation by comparing PrecursorPeaks (PrecursorPeaks more than delta away from each other are supposed to be from another peptide) DoubleReal pre_mz1 = 0.0; if (!spec1.getPrecursors().empty()) pre_mz1 = spec1.getPrecursors()[0].getMZ(); DoubleReal pre_mz2 = 0.0; if (!spec1.getPrecursors().empty()) pre_mz2 = spec2.getPrecursors()[0].getMZ(); if (fabs(pre_mz1 - pre_mz2) > (double)param_.getValue("precursor_mass_tolerance")) { return 0; } // heuristic shortcut const double epsilon = (double)param_.getValue("epsilon"); const UInt heuristic_level = (UInt)param_.getValue("heuristic_level"); bool heuristic_filters(true); if (heuristic_level) { s1.sortByIntensity(true); s2.sortByIntensity(true); //heuristic filters (and shortcuts) if spec1 and spec2 have NOT at least one peak in the sets of |heuristic_level|-many highest peaks in common for (PeakSpectrum::ConstIterator it_s1 = s1.begin(); Size(it_s1 - s1.begin()) < heuristic_level && it_s1 != s1.end(); ++it_s1) { for (PeakSpectrum::ConstIterator it_s2 = s2.begin(); Size(it_s2 - s2.begin()) < heuristic_level && it_s2 != s2.end(); ++it_s2) { // determine if it is a match, i.e. mutual peak at certain m/z with epsilon tolerance if (fabs((*it_s2).getMZ() - (*it_s1).getMZ()) < epsilon) { heuristic_filters = false; break; } } } } if (heuristic_filters && heuristic_level) { return 0; } //TODO gapcost dependence on distance ? const double gap = (double)param_.getValue("epsilon"); //initialize alignment matrix with 0 in (0,0) and a multiple of gapcost in the first row/col matrix(row,col,values) Matrix<double> matrix(spec1.size() + 1, spec2.size() + 1, 0); for (Size i = 1; i < matrix.rows(); i++) { matrix.setValue(i, 0, -gap * i); } for (Size i = 1; i < matrix.cols(); i++) { matrix.setValue(0, i, -gap * i); } //get sigma - the standard deviation (sqrt of variance) double mid(0); for (Size i = 0; i < spec1.size(); ++i) { for (Size j = 0; j < spec2.size(); ++j) { double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ()); mid += fabs(pos1 - pos2); } } // average peak distance mid /= (spec1.size() * spec2.size()); /* to manually retrace cout << "average peak distance " << mid << endl; */ double var(0); for (Size i = 0; i < spec1.size(); ++i) { for (Size j = 0; j < spec2.size(); ++j) { double pos1(spec1[i].getMZ()), pos2(spec2[j].getMZ()); var += (fabs(pos1 - pos2) - mid) * (fabs(pos1 - pos2) - mid); } } // peak distance variance var /= (spec1.size() * spec2.size()); /* to manually retrace cout << "peak distance variance " << var << endl; */ //only in case of only two equal peaks in the spectra sigma is 0 const double sigma((var == 0) ? numeric_limits<double>::min() : sqrt(var)); /* to manually retrace cout << "peak standard deviation " << sigma << endl; */ //fill alignment matrix for (Size i = 1; i < spec1.size() + 1; ++i) { for (Size j = 1; j < spec2.size() + 1; ++j) { double pos1(spec1[i - 1].getMZ()), pos2(spec2[j - 1].getMZ()); //only if peaks are in reasonable proximity alignment is considered else only gaps if (fabs(pos1 - pos2) <= epsilon) { // actual cell = max(upper left cell+score, left cell-gap, upper cell-gap) double from_left(matrix.getValue(i, j - 1) - gap); double from_above(matrix.getValue(i - 1, j) - gap); double int1(spec1[i - 1].getIntensity()), int2(spec2[j - 1].getIntensity()); double from_diagonal(matrix.getValue(i - 1, j - 1) + peakPairScore_(pos1, int1, pos2, int2, sigma)); matrix.setValue(i, j, max(from_left, max(from_above, from_diagonal))); } else { // actual cell = max(left cell-gap, upper cell-gap) double from_left(matrix.getValue(i, j - 1) - gap); double from_above(matrix.getValue(i - 1, j) - gap); matrix.setValue(i, j, max(from_left, from_above)); } } } /* to manually retrace cout << endl << matrix << endl; */ //get best overall score and return double best_score(numeric_limits<double>::min()); for (Size i = 0; i < matrix.cols(); i++) { best_score = max(best_score, matrix.getValue(matrix.rows() - 1, i)); } for (Size i = 0; i < matrix.rows(); i++) { best_score = max(best_score, matrix.getValue(i, matrix.cols() - 1)); } //calculate selfalignment-scores for both input spectra double score_spec1(0), score_spec2(0); for (Size i = 0; i < spec1.size(); ++i) { double int_i(spec1[i].getIntensity()); double pos_i(spec1[i].getMZ()); score_spec1 += peakPairScore_(pos_i, int_i, pos_i, int_i, sigma); } for (Size i = 0; i < spec2.size(); ++i) { double int_i(spec2[i].getIntensity()); double pos_i(spec2[i].getMZ()); score_spec2 += peakPairScore_(pos_i, int_i, pos_i, int_i, sigma); } /* to manually retrace cout << "score_spec1: " << score_spec1 << "score_spec2: " << score_spec2 << endl; */ //normalize score to interval [0,1] with geometric mean double best_score_normalized(best_score / sqrt(score_spec1 * score_spec2)); /* cout << "score_spec1: " << score_spec1 << " score_spec2: " << score_spec2 << " best_score: " << best_score << endl; //normalize score to interval [0,1] with arithmeic mean double best_score_normalized( (best_score*2) / (score_spec1 + score_spec2) ); */ return best_score_normalized; }
void CompNovoIdentificationCID::getIdentification(PeptideIdentification & id, const PeakSpectrum & CID_spec) { //if (CID_spec.getPrecursors().begin()->getMZ() > 1000.0) //{ //cerr << "Weight of precursor has been estimated to exceed 2000.0 Da which is the current limit" << endl; //return; //} PeakSpectrum new_CID_spec(CID_spec); windowMower_(new_CID_spec, 0.3, 1); Param zhang_param; zhang_param = zhang_.getParameters(); zhang_param.setValue("tolerance", fragment_mass_tolerance_); zhang_param.setValue("use_gaussian_factor", "true"); zhang_param.setValue("use_linear_factor", "false"); zhang_.setParameters(zhang_param); Normalizer normalizer; Param n_param(normalizer.getParameters()); n_param.setValue("method", "to_one"); normalizer.setParameters(n_param); normalizer.filterSpectrum(new_CID_spec); Size charge(2); double precursor_weight(0); // [M+H]+ if (!CID_spec.getPrecursors().empty()) { // believe charge of spectrum? if (CID_spec.getPrecursors().begin()->getCharge() != 0) { charge = CID_spec.getPrecursors().begin()->getCharge(); } else { // TODO estimate charge state } precursor_weight = CID_spec.getPrecursors().begin()->getMZ() * charge - ((charge - 1) * Constants::PROTON_MASS_U); } //cerr << "charge=" << charge << ", [M+H]=" << precursor_weight << endl; // now delete all peaks that are right of the estimated precursor weight Size peak_counter(0); for (PeakSpectrum::ConstIterator it = new_CID_spec.begin(); it != new_CID_spec.end(); ++it, ++peak_counter) { if (it->getPosition()[0] > precursor_weight) { break; } } if (peak_counter < new_CID_spec.size()) { new_CID_spec.resize(peak_counter); } static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight(); Peak1D p; p.setIntensity(1); p.setPosition(oxonium_mass); new_CID_spec.push_back(p); p.setPosition(precursor_weight); new_CID_spec.push_back(p); // add complement to spectrum /* for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1) { // get m/z of complement double mz_comp = precursor_weight - it1->getPosition()[0] + Constants::PROTON_MASS_U; // search if peaks are available that have similar m/z values Size count(0); bool found(false); for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2, ++count) { if (fabs(mz_comp - it2->getPosition()[0]) < fragment_mass_tolerance) { // add peak intensity to corresponding peak in new_CID_spec new_CID_spec[count].setIntensity(new_CID_spec[count].getIntensity()); } } if (!found) { // infer this peak Peak1D p; p.setIntensity(it1->getIntensity()); p.setPosition(mz_comp); new_CID_spec.push_back(p); } }*/ CompNovoIonScoringCID ion_scoring; Param ion_scoring_param(ion_scoring.getParameters()); ion_scoring_param.setValue("fragment_mass_tolerance", fragment_mass_tolerance_); ion_scoring_param.setValue("precursor_mass_tolerance", precursor_mass_tolerance_); ion_scoring_param.setValue("decomp_weights_precision", decomp_weights_precision_); ion_scoring_param.setValue("double_charged_iso_threshold", (double)param_.getValue("double_charged_iso_threshold")); ion_scoring_param.setValue("max_isotope_to_score", param_.getValue("max_isotope_to_score")); ion_scoring_param.setValue("max_isotope", max_isotope_); ion_scoring.setParameters(ion_scoring_param); Map<double, IonScore> ion_scores; ion_scoring.scoreSpectrum(ion_scores, new_CID_spec, precursor_weight, charge); new_CID_spec.sortByPosition(); /* cerr << "Size of ion_scores " << ion_scores.size() << endl; for (Map<double, IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { cerr << it->first << " " << it->second.score << endl; }*/ #ifdef WRITE_SCORED_SPEC PeakSpectrum filtered_spec(new_CID_spec); filtered_spec.clear(); for (Map<double, CompNovoIonScoringCID::IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { Peak1D p; p.setIntensity(it->second.score); p.setPosition(it->first); filtered_spec.push_back(p); } DTAFile().store("spec_scored.dta", filtered_spec); #endif set<String> sequences; getDecompositionsDAC_(sequences, 0, new_CID_spec.size() - 1, precursor_weight, new_CID_spec, ion_scores); #ifdef SPIKE_IN sequences.insert("AFCVDGEGR"); sequences.insert("APEFAAPWPDFVPR"); sequences.insert("AVKQFEESQGR"); sequences.insert("CCTESLVNR"); sequences.insert("DAFLGSFLYEYSR"); sequences.insert("DAIPENLPPLTADFAEDK"); sequences.insert("DDNKVEDIWSFLSK"); sequences.insert("DDPHACYSTVFDK"); sequences.insert("DEYELLCLDGSR"); sequences.insert("DGAESYKELSVLLPNR"); sequences.insert("DGASCWCVDADGR"); sequences.insert("DLFIPTCLETGEFAR"); sequences.insert("DTHKSEIAHR"); sequences.insert("DVCKNYQEAK"); sequences.insert("EACFAVEGPK"); sequences.insert("ECCHGDLLECADDR"); sequences.insert("EFLGDKFYTVISSLK"); sequences.insert("EFTPVLQADFQK"); sequences.insert("ELFLDSGIFQPMLQGR"); sequences.insert("ETYGDMADCCEK"); sequences.insert("EVGCPSSSVQEMVSCLR"); sequences.insert("EYEATLEECCAK"); sequences.insert("FADLIQSGTFQLHLDSK"); sequences.insert("FFSASCVPGATIEQK"); sequences.insert("FLANVSTVLTSK"); sequences.insert("FLSGSDYAIR"); sequences.insert("FTASCPPSIK"); sequences.insert("GAIEWEGIESGSVEQAVAK"); sequences.insert("GDVAFIQHSTVEENTGGK"); sequences.insert("GEPPSCAEDQSCPSER"); sequences.insert("GEYVPTSLTAR"); sequences.insert("GQEFTITGQKR"); sequences.insert("GTFAALSELHCDK"); sequences.insert("HLVDEPQNLIK"); sequences.insert("HQDCLVTTLQTQPGAVR"); sequences.insert("HTTVNENAPDQK"); sequences.insert("ILDCGSPDTEVR"); sequences.insert("KCPSPCQLQAER"); sequences.insert("KGTEFTVNDLQGK"); sequences.insert("KQTALVELLK"); sequences.insert("KVPQVSTPTLVEVSR"); sequences.insert("LALQFTTNAKR"); sequences.insert("LCVLHEKTPVSEK"); sequences.insert("LFTFHADICTLPDTEK"); sequences.insert("LGEYGFQNALIVR"); sequences.insert("LHVDPENFK"); sequences.insert("LKECCDKPLLEK"); sequences.insert("LKHLVDEPQNLIK"); sequences.insert("LKPDPNTLCDEFK"); sequences.insert("LLGNVLVVVLAR"); sequences.insert("LLVVYPWTQR"); sequences.insert("LRVDPVNFK"); sequences.insert("LTDEELAFPPLSPSR"); sequences.insert("LVNELTEFAK"); sequences.insert("MFLSFPTTK"); sequences.insert("MPCTEDYLSLILNR"); sequences.insert("NAPYSGYSGAFHCLK"); sequences.insert("NECFLSHKDDSPDLPK"); sequences.insert("NEPNKVPACPGSCEEVK"); sequences.insert("NLQMDDFELLCTDGR"); sequences.insert("QAGVQAEPSPK"); sequences.insert("RAPEFAAPWPDFVPR"); sequences.insert("RHPEYAVSVLLR"); sequences.insert("RPCFSALTPDETYVPK"); sequences.insert("RSLLLAPEEGPVSQR"); sequences.insert("SAFPPEPLLCSVQR"); sequences.insert("SAGWNIPIGTLLHR"); sequences.insert("SCWCVDEAGQK"); sequences.insert("SGNPNYPHEFSR"); sequences.insert("SHCIAEVEK"); sequences.insert("SISSGFFECER"); sequences.insert("SKYLASASTMDHAR"); sequences.insert("SLHTLFGDELCK"); sequences.insert("SLLLAPEEGPVSQR"); sequences.insert("SPPQCSPDGAFRPVQCK"); sequences.insert("SREGDPLAVYLK"); sequences.insert("SRQIPQCPTSCER"); sequences.insert("TAGTPVSIPVCDDSSVK"); sequences.insert("TCVADESHAGCEK"); sequences.insert("TQFGCLEGFGR"); sequences.insert("TVMENFVAFVDK"); sequences.insert("TYFPHFDLSHGSAQVK"); sequences.insert("TYMLAFDVNDEK"); sequences.insert("VDEVGGEALGR"); sequences.insert("VDLLIGSSQDDGLINR"); sequences.insert("VEDIWSFLSK"); sequences.insert("VGGHAAEYGAEALER"); sequences.insert("VGTRCCTKPESER"); sequences.insert("VKVDEVGGEALGR"); sequences.insert("VKVDLLIGSSQDDGLINR"); sequences.insert("VLDSFSNGMK"); sequences.insert("VLSAADKGNVK"); sequences.insert("VPQVSTPTLVEVSR"); sequences.insert("VTKCCTESLVNR"); sequences.insert("VVAASDASQDALGCVK"); sequences.insert("VVAGVANALAHR"); sequences.insert("YICDNQDTISSK"); sequences.insert("YLASASTMDHAR"); sequences.insert("YNGVFQECCQAEDK"); #endif SpectrumAlignmentScore spectra_zhang; spectra_zhang.setParameters(zhang_param); vector<PeptideHit> hits; Size missed_cleavages = param_.getValue("missed_cleavages"); for (set<String>::const_iterator it = sequences.begin(); it != sequences.end(); ++it) { Size num_missed = countMissedCleavagesTryptic_(*it); if (missed_cleavages < num_missed) { //cerr << "Two many missed cleavages: " << *it << ", found " << num_missed << ", allowed " << missed_cleavages << endl; continue; } PeakSpectrum CID_sim_spec; getCIDSpectrum_(CID_sim_spec, *it, charge); //normalizer.filterSpectrum(CID_sim_spec); double cid_score = zhang_(CID_sim_spec, CID_spec); PeptideHit hit; hit.setScore(cid_score); hit.setSequence(getModifiedAASequence_(*it)); hit.setCharge((Int)charge); //TODO unify charge interface: int or size? hits.push_back(hit); //cerr << getModifiedAASequence_(*it) << " " << cid_score << " " << endl; } // rescore the top hits id.setHits(hits); id.assignRanks(); hits = id.getHits(); SpectrumAlignmentScore alignment_score; Param align_param(alignment_score.getParameters()); align_param.setValue("tolerance", fragment_mass_tolerance_); align_param.setValue("use_linear_factor", "true"); alignment_score.setParameters(align_param); for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { //cerr << "Pre: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl; } Size number_of_prescoring_hits = param_.getValue("number_of_prescoring_hits"); if (hits.size() > number_of_prescoring_hits) { hits.resize(number_of_prescoring_hits); } for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { PeakSpectrum CID_sim_spec; getCIDSpectrum_(CID_sim_spec, getModifiedStringFromAASequence_(it->getSequence()), charge); normalizer.filterSpectrum(CID_sim_spec); //DTAFile().store("sim_specs/" + it->getSequence().toUnmodifiedString() + "_sim_CID.dta", CID_sim_spec); //double cid_score = spectra_zhang(CID_sim_spec, CID_spec); double cid_score = alignment_score(CID_sim_spec, CID_spec); //cerr << "Final: " << it->getSequence() << " " << cid_score << endl; it->setScore(cid_score); } id.setHits(hits); id.assignRanks(); hits = id.getHits(); for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { //cerr << "Fin: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl; } Size number_of_hits = param_.getValue("number_of_hits"); if (id.getHits().size() > number_of_hits) { hits.resize(number_of_hits); } id.setHits(hits); id.assignRanks(); return; }