void CompNovoIdentificationBase::getCIDSpectrumLight_(PeakSpectrum & spec, const String & sequence, DoubleReal prefix, DoubleReal suffix) { static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight(); Peak1D p; DoubleReal b_pos(0.0 + prefix); DoubleReal y_pos(h2o_mass + suffix); for (Size i = 0; i != sequence.size() - 1; ++i) { char aa(sequence[i]); b_pos += aa_to_weight_[aa]; char aa2(sequence[sequence.size() - i - 1]); y_pos += aa_to_weight_[aa2]; if (b_pos > min_mz_ && b_pos < max_mz_) { p.setPosition(b_pos + Constants::PROTON_MASS_U); p.setIntensity(1.0f); spec.push_back(p); } if (y_pos > min_mz_ && y_pos < max_mz_) { p.setPosition(y_pos + Constants::PROTON_MASS_U); p.setIntensity(1.0f); spec.push_back(p); } } spec.sortByPosition(); return; }
void OpenSwathDataAccessHelper::convertToOpenMSSpectrum(const OpenSwath::SpectrumPtr sptr, OpenMS::MSSpectrum<> & spectrum) { // recreate a spectrum from the data arrays! OpenSwath::BinaryDataArrayPtr mz_arr = sptr->getMZArray(); OpenSwath::BinaryDataArrayPtr int_arr = sptr->getIntensityArray(); spectrum.reserve(mz_arr->data.size()); for (Size i = 0; i < mz_arr->data.size(); i++) { Peak1D p; p.setMZ(mz_arr->data[i]); p.setIntensity(int_arr->data[i]); spectrum.push_back(p); } }
void getSwathFile(PeakMap& exp, int nr_swathes=32, bool ms1=true) { if (ms1) { MSSpectrum s; s.setMSLevel(1); Peak1D p; p.setMZ(100); p.setIntensity(200); s.push_back(p); exp.addSpectrum(s); } for (int i = 0; i< nr_swathes; i++) { MSSpectrum s; s.setMSLevel(2); std::vector<Precursor> prec(1); prec[0].setIsolationWindowLowerOffset(12.5); prec[0].setIsolationWindowUpperOffset(12.5); prec[0].setMZ(400 + i*25 + 12.5); s.setPrecursors(prec); Peak1D p; p.setMZ(101 + i); p.setIntensity(201 + i); s.push_back(p); exp.addSpectrum(s); } }
bool IDEvaluationBase::getPoints(std::vector<PeptideIdentification>& peptides /* cannot be const, to avoid copy */, const std::vector<double>& q_value_thresholds, MSSpectrum<>& points) { points.clear(true); FalseDiscoveryRate fdr; fdr.setParameters(param_.copy("fdr:", true)); try { fdr.apply(peptides); // computes a q-value (if its params are correct) } catch (Exception::MissingInformation) { LOG_FATAL_ERROR << "Tool failed due to missing information (see above)." << std::endl; return false; } // get list of q-values and sort them std::vector<double> q_values; q_values.reserve(peptides.size()); for (vector<PeptideIdentification>::iterator it = peptides.begin(); it != peptides.end(); ++it) { it->assignRanks(); if (it->getHits().size() > 0) q_values.push_back(it->getHits()[0].getScore()); } std::sort(q_values.begin(), q_values.end()); for (Size i = 0; i < q_value_thresholds.size(); ++i) { // get position in sorted q-values where cutoff is reached std::vector<double>::iterator pos = std::upper_bound(q_values.begin(), q_values.end(), q_value_thresholds[i]); Peak1D p; p.setMZ(q_value_thresholds[i] * 100); p.setIntensity(std::distance(q_values.begin(), pos)); points.push_back(p); } return true; }
void PeakPickerHiRes::pick(const MSChromatogram& input, MSChromatogram& output, std::vector<PeakBoundary>& boundaries) const { // copy meta data of the input chromatogram output.clear(true); output.ChromatogramSettings::operator=(input); output.MetaInfoInterface::operator=(input); output.setName(input.getName()); MSSpectrum input_spectrum; MSSpectrum output_spectrum; for (MSChromatogram::const_iterator it = input.begin(); it != input.end(); ++it) { Peak1D p; p.setMZ(it->getRT()); p.setIntensity(it->getIntensity()); input_spectrum.push_back(p); } pick(input_spectrum, output_spectrum, boundaries, false); // no spacing checks! for (MSSpectrum::const_iterator it = output_spectrum.begin(); it != output_spectrum.end(); ++it) { ChromatogramPeak p; p.setRT(it->getMZ()); p.setIntensity(it->getIntensity()); output.push_back(p); } // copy float data arrays (for FWHM) output.getFloatDataArrays().resize(output_spectrum.getFloatDataArrays().size()); for (Size i = 0; i < output_spectrum.getFloatDataArrays().size(); ++i) { output.getFloatDataArrays()[i].insert(output.getFloatDataArrays()[i].begin(), output_spectrum.getFloatDataArrays()[i].begin(), output_spectrum.getFloatDataArrays()[i].end()); output.getFloatDataArrays()[i].setName(output_spectrum.getFloatDataArrays()[i].getName()); } }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String in(getStringOption_("in")); String out(getStringOption_("out")); String pair_in(getStringOption_("pair_in")); String feature_out(getStringOption_("feature_out")); double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance")); double RT_tolerance(getDoubleOption_("RT_tolerance")); double expansion_range(getDoubleOption_("expansion_range")); Size max_isotope(getIntOption_("max_isotope")); Int debug(getIntOption_("debug")); //------------------------------------------------------------- // reading input //------------------------------------------------------------- PeakMap exp; MzMLFile().load(in, exp); exp.sortSpectra(); exp.updateRanges(); // read pair file ifstream is(pair_in.c_str()); String line; vector<SILAC_pair> pairs; while (getline(is, line)) { line.trim(); if (line.empty() || line[0] == '#') { continue; } vector<String> split; line.split(' ', split); if (split.size() != 4) { cerr << "missformated line ('" << line << "') should be (space separated) 'm/z-light m/z-heavy charge rt'" << endl; } SILAC_pair p; p.mz_light = split[0].toDouble(); p.mz_heavy = split[1].toDouble(); p.charge = split[2].toInt(); p.rt = split[3].toDouble(); pairs.push_back(p); } is.close(); //------------------------------------------------------------- // calculations //------------------------------------------------------------- ConsensusMap results_map; results_map.getFileDescriptions()[0].label = "light"; results_map.getFileDescriptions()[0].filename = in; results_map.getFileDescriptions()[1].label = "heavy"; results_map.getFileDescriptions()[1].filename = in; FeatureFinderAlgorithmIsotopeWavelet iso_ff; Param ff_param(iso_ff.getParameters()); ff_param.setValue("max_charge", 3); ff_param.setValue("intensity_threshold", -1.0); iso_ff.setParameters(ff_param); FeatureFinder ff; ff.setLogType(ProgressLogger::NONE); vector<SILACQuantitation> quantlets; FeatureMap all_features; for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it) { if (it->size() == 0 || it->getMSLevel() != 1 || !it->getInstrumentSettings().getZoomScan()) { continue; } PeakSpectrum new_spec = *it; // get spacing from data double min_spacing(numeric_limits<double>::max()); double last_mz(0); for (PeakSpectrum::ConstIterator pit = new_spec.begin(); pit != new_spec.end(); ++pit) { if (pit->getMZ() - last_mz < min_spacing) { min_spacing = pit->getMZ() - last_mz; } last_mz = pit->getMZ(); } writeDebug_("Min-spacing=" + String(min_spacing), 1); // split the spectrum into two subspectra, by using different hypothesis of // the SILAC pairs Size idx = 0; for (vector<SILAC_pair>::const_iterator pit = pairs.begin(); pit != pairs.end(); ++pit, ++idx) { // in RT window? if (fabs(it->getRT() - pit->rt) >= RT_tolerance) { continue; } // now excise the two ranges for the pair, complete isotope distributions of both, light and heavy PeakSpectrum light_spec, heavy_spec; light_spec.setRT(it->getRT()); heavy_spec.setRT(it->getRT()); for (PeakSpectrum::ConstIterator sit = it->begin(); sit != it->end(); ++sit) { double mz(sit->getMZ()); if (mz - (pit->mz_light - precursor_mass_tolerance) > 0 && (pit->mz_light + (double)max_isotope * Constants::NEUTRON_MASS_U / (double)pit->charge + precursor_mass_tolerance) - mz > 0) { light_spec.push_back(*sit); } if (mz - (pit->mz_heavy - precursor_mass_tolerance) > 0 && (pit->mz_heavy + (double)max_isotope * Constants::NEUTRON_MASS_U / (double)pit->charge + precursor_mass_tolerance) - mz > 0) { heavy_spec.push_back(*sit); } } // expand light spectrum Peak1D p; p.setIntensity(0); if (light_spec.size() > 0) { double lower_border = light_spec.begin()->getMZ() - expansion_range; for (double pos = light_spec.begin()->getMZ(); pos > lower_border; pos -= min_spacing) { p.setMZ(pos); light_spec.insert(light_spec.begin(), p); } double upper_border = light_spec.begin()->getMZ() - expansion_range; for (double pos = light_spec.rbegin()->getMZ(); pos < upper_border; pos += min_spacing) { p.setMZ(pos); light_spec.push_back(p); } } if (heavy_spec.size() > 0) { // expand heavy spectrum double lower_border = heavy_spec.begin()->getMZ() - expansion_range; for (double pos = heavy_spec.begin()->getMZ(); pos > lower_border; pos -= min_spacing) { p.setMZ(pos); heavy_spec.insert(heavy_spec.begin(), p); } double upper_border = heavy_spec.begin()->getMZ() - expansion_range; for (double pos = heavy_spec.rbegin()->getMZ(); pos < upper_border; pos += min_spacing) { p.setMZ(pos); heavy_spec.push_back(p); } } // create experiments for feature finding PeakMap new_exp_light, new_exp_heavy; new_exp_light.addSpectrum(light_spec); new_exp_heavy.addSpectrum(heavy_spec); if (debug > 9) { MzMLFile().store(String(it->getRT()) + "_debugging_light.mzML", new_exp_light); MzMLFile().store(String(it->getRT()) + "_debugging_heavy.mzML", new_exp_heavy); } writeDebug_("Spectrum-id: " + it->getNativeID() + " @ " + String(it->getRT()) + "s", 1); new_exp_light.updateRanges(); new_exp_heavy.updateRanges(); FeatureMap feature_map_light, feature_map_heavy, seeds; if (light_spec.size() > 0) { ff.run("isotope_wavelet", new_exp_light, feature_map_light, ff_param, seeds); } writeDebug_("#light_features=" + String(feature_map_light.size()), 1); if (heavy_spec.size() > 0) { ff.run("isotope_wavelet", new_exp_heavy, feature_map_heavy, ff_param, seeds); } writeDebug_("#heavy_features=" + String(feature_map_heavy.size()), 1); // search if feature maps to m/z value of pair vector<MatchedFeature> light, heavy; for (FeatureMap::const_iterator fit = feature_map_light.begin(); fit != feature_map_light.end(); ++fit) { all_features.push_back(*fit); light.push_back(MatchedFeature(*fit, idx)); } for (FeatureMap::const_iterator fit = feature_map_heavy.begin(); fit != feature_map_heavy.end(); ++fit) { all_features.push_back(*fit); heavy.push_back(MatchedFeature(*fit, idx)); } if (!heavy.empty() && !light.empty()) { writeDebug_("Finding best feature pair out of " + String(light.size()) + " light and " + String(heavy.size()) + " heavy matching features.", 1); // now find "good" matches, means the pair with the smallest m/z deviation Feature best_light, best_heavy; double best_deviation(numeric_limits<double>::max()); Size best_idx(pairs.size()); for (vector<MatchedFeature>::const_iterator fit1 = light.begin(); fit1 != light.end(); ++fit1) { for (vector<MatchedFeature>::const_iterator fit2 = heavy.begin(); fit2 != heavy.end(); ++fit2) { if (fit1->idx != fit2->idx || fit1->f.getCharge() != fit2->f.getCharge() || fabs(fit1->f.getMZ() - pairs[fit1->idx].mz_light) > precursor_mass_tolerance || fabs(fit2->f.getMZ() - pairs[fit2->idx].mz_heavy) > precursor_mass_tolerance) { continue; } double deviation(0); deviation = fabs((fit1->f.getMZ() - pairs[fit1->idx].mz_light) - (fit2->f.getMZ() - pairs[fit2->idx].mz_heavy)); if (deviation < best_deviation && deviation < precursor_mass_tolerance) { best_light = fit1->f; best_heavy = fit2->f; best_idx = fit1->idx; } } } if (best_idx == pairs.size()) { continue; } writeDebug_("Ratio: " + String(best_heavy.getIntensity() / best_light.getIntensity()), 1); ConsensusFeature SILAC_feature; SILAC_feature.setMZ((best_light.getMZ() + best_heavy.getMZ()) / 2.0); SILAC_feature.setRT((best_light.getRT() + best_heavy.getRT()) / 2.0); SILAC_feature.insert(0, best_light); SILAC_feature.insert(1, best_heavy); results_map.push_back(SILAC_feature); quantlets.push_back(SILACQuantitation(best_light.getIntensity(), best_heavy.getIntensity(), best_idx)); } } } // now calculate the final quantitation values from the quantlets Map<Size, vector<SILACQuantitation> > idx_to_quantlet; for (vector<SILACQuantitation>::const_iterator it = quantlets.begin(); it != quantlets.end(); ++it) { idx_to_quantlet[it->idx].push_back(*it); } for (Map<Size, vector<SILACQuantitation> >::ConstIterator it1 = idx_to_quantlet.begin(); it1 != idx_to_quantlet.end(); ++it1) { SILAC_pair silac_pair = pairs[it1->first]; // simply add up all intensities and calculate the final ratio double light_sum(0), heavy_sum(0); vector<double> light_ints, heavy_ints, ratios; for (vector<SILACQuantitation>::const_iterator it2 = it1->second.begin(); it2 != it1->second.end(); ++it2) { light_sum += it2->light_intensity; light_ints.push_back(it2->light_intensity); heavy_sum += it2->heavy_intensity; heavy_ints.push_back(it2->heavy_intensity); ratios.push_back(it2->heavy_intensity / it2->light_intensity * (it2->heavy_intensity + it2->light_intensity)); } double absdev_ratios = Math::absdev(ratios.begin(), ratios.begin() + (ratios.size()) / (heavy_sum + light_sum)); cout << "Ratio: " << silac_pair.mz_light << " <-> " << silac_pair.mz_heavy << " @ " << silac_pair.rt << " s, ratio(h/l) " << heavy_sum / light_sum << " +/- " << absdev_ratios << " (#scans for quantation: " << String(it1->second.size()) << " )" << endl; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- if (feature_out != "") { FeatureXMLFile().store(feature_out, all_features); } writeDebug_("Writing output", 1); ConsensusXMLFile().store(out, results_map); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList in = getStringList_("in"); String edta = getStringOption_("pos"); String out = getStringOption_("out"); String out_sep = getStringOption_("out_separator"); String out_TIC_debug = getStringOption_("auto_rt:out_debug_TIC"); StringList in_header = getStringList_("in_header"); // number of out_debug_TIC files and input files must be identical /*if (out_TIC_debug.size() > 0 && in.size() != out_TIC_debug.size()) { LOG_FATAL_ERROR << "Error: number of input file 'in' and auto_rt:out_debug_TIC files must be identical!" << std::endl; return ILLEGAL_PARAMETERS; }*/ // number of header files and input files must be identical if (in_header.size() > 0 && in.size() != in_header.size()) { LOG_FATAL_ERROR << "Error: number of input file 'in' and 'in_header' files must be identical!" << std::endl; return ILLEGAL_PARAMETERS; } if (!getFlag_("auto_rt:enabled") && !out_TIC_debug.empty()) { LOG_FATAL_ERROR << "Error: TIC output file requested, but auto_rt is not enabled! Either do not request the file or switch on 'auto_rt:enabled'." << std::endl; return ILLEGAL_PARAMETERS; } double rttol = getDoubleOption_("rt_tol"); double mztol = getDoubleOption_("mz_tol"); Size rt_collect = getIntOption_("rt_collect"); //------------------------------------------------------------- // loading input //------------------------------------------------------------- MzMLFile mzml_file; mzml_file.setLogType(log_type_); MSExperiment<Peak1D> exp, exp_pp; EDTAFile ed; ConsensusMap cm; ed.load(edta, cm); StringList tf_single_header0, tf_single_header1, tf_single_header2; // header content, for each column std::vector<String> vec_single; // one line for each compound, multiple columns per experiment vec_single.resize(cm.size()); for (Size fi = 0; fi < in.size(); ++fi) { // load raw data mzml_file.load(in[fi], exp); exp.sortSpectra(true); if (exp.empty()) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry." << std::endl; return INCOMPATIBLE_INPUT_DATA; } // try to detect RT peaks (only for the first input file -- all others should align!) // cm.size() might change in here... if (getFlag_("auto_rt:enabled") && fi == 0) { ConsensusMap cm_local = cm; // we might have different RT peaks for each map if 'auto_rt' is enabled cm.clear(false); // reset global list (about to be filled) // compute TIC MSChromatogram<> tic = exp.getTIC(); MSSpectrum<> tics, tic_gf, tics_pp, tics_sn; for (Size ic = 0; ic < tic.size(); ++ic) { // rewrite Chromatogram to MSSpectrum (GaussFilter requires it) Peak1D peak; peak.setMZ(tic[ic].getRT()); peak.setIntensity(tic[ic].getIntensity()); tics.push_back(peak); } // smooth (no PP_CWT here due to efficiency reasons -- large FWHM take longer!) double fwhm = getDoubleOption_("auto_rt:FHWM"); GaussFilter gf; Param p = gf.getParameters(); p.setValue("gaussian_width", fwhm * 2); // wider than FWHM, just to be sure we have a fully smoothed peak. Merging two peaks is unlikely p.setValue("use_ppm_tolerance", "false"); gf.setParameters(p); tic_gf = tics; gf.filter(tic_gf); // pick peaks PeakPickerHiRes pp; p = pp.getParameters(); p.setValue("signal_to_noise", getDoubleOption_("auto_rt:SNThreshold")); pp.setParameters(p); pp.pick(tic_gf, tics_pp); if (tics_pp.size()) { LOG_INFO << "Found " << tics_pp.size() << " auto-rt peaks at: "; for (Size ipp = 0; ipp != tics_pp.size(); ++ipp) LOG_INFO << " " << tics_pp[ipp].getMZ(); } else { LOG_INFO << "Found no auto-rt peaks. Change threshold parameters!"; } LOG_INFO << std::endl; if (!out_TIC_debug.empty()) // if debug file was given { // store intermediate steps for debug MSExperiment<> out_debug; out_debug.addChromatogram(toChromatogram(tics)); out_debug.addChromatogram(toChromatogram(tic_gf)); SignalToNoiseEstimatorMedian<MSSpectrum<> > snt; snt.init(tics); for (Size is = 0; is < tics.size(); ++is) { Peak1D peak; peak.setMZ(tic[is].getMZ()); peak.setIntensity(snt.getSignalToNoise(tics[is])); tics_sn.push_back(peak); } out_debug.addChromatogram(toChromatogram(tics_sn)); out_debug.addChromatogram(toChromatogram(tics_pp)); // get rid of "native-id" missing warning for (Size id = 0; id < out_debug.size(); ++id) out_debug[id].setNativeID(String("spectrum=") + id); mzml_file.store(out_TIC_debug, out_debug); LOG_DEBUG << "Storing debug AUTO-RT: " << out_TIC_debug << std::endl; } // add target EICs: for each m/z with no/negative RT, add all combinations of that m/z with auto-RTs // duplicate m/z entries will be ignored! // all other lines with positive RT values are copied unaffected //do not allow doubles std::set<double> mz_doubles; for (ConsensusMap::Iterator cit = cm_local.begin(); cit != cm_local.end(); ++cit) { if (cit->getRT() < 0) { if (mz_doubles.find(cit->getMZ()) == mz_doubles.end()) { mz_doubles.insert(cit->getMZ()); } else { LOG_INFO << "Found duplicate m/z entry (" << cit->getMZ() << ") for auto-rt. Skipping ..." << std::endl; continue; } ConsensusMap cm_RT_multiplex; for (MSSpectrum<>::ConstIterator itp = tics_pp.begin(); itp != tics_pp.end(); ++itp) { ConsensusFeature f = *cit; f.setRT(itp->getMZ()); cm.push_back(f); } } else { // default feature with no auto-rt LOG_INFO << "copying feature with RT " << cit->getRT() << std::endl; cm.push_back(*cit); } } // resize, since we have more positions now vec_single.resize(cm.size()); } // search for each EIC and add up Int not_found(0); Map<Size, double> quant; String description; if (fi < in_header.size()) { HeaderInfo info(in_header[fi]); description = info.header_description; } if (fi == 0) { // two additional columns for first file (theoretical RT and m/z) tf_single_header0 << "" << ""; tf_single_header1 << "" << ""; tf_single_header2 << "RT" << "mz"; } // 5 entries for each input file tf_single_header0 << File::basename(in[fi]) << "" << "" << "" << ""; tf_single_header1 << description << "" << "" << "" << ""; tf_single_header2 << "RTobs" << "dRT" << "mzobs" << "dppm" << "intensity"; for (Size i = 0; i < cm.size(); ++i) { //std::cerr << "Rt" << cm[i].getRT() << " mz: " << cm[i].getMZ() << " R " << cm[i].getMetaValue("rank") << "\n"; double mz_da = mztol * cm[i].getMZ() / 1e6; // mz tolerance in Dalton MSExperiment<>::ConstAreaIterator it = exp.areaBeginConst(cm[i].getRT() - rttol / 2, cm[i].getRT() + rttol / 2, cm[i].getMZ() - mz_da, cm[i].getMZ() + mz_da); Peak2D max_peak; max_peak.setIntensity(0); max_peak.setRT(cm[i].getRT()); max_peak.setMZ(cm[i].getMZ()); for (; it != exp.areaEndConst(); ++it) { if (max_peak.getIntensity() < it->getIntensity()) { max_peak.setIntensity(it->getIntensity()); max_peak.setRT(it.getRT()); max_peak.setMZ(it->getMZ()); } } double ppm = 0; // observed m/z offset if (max_peak.getIntensity() == 0) { ++not_found; } else { // take median for m/z found std::vector<double> mz; MSExperiment<>::Iterator itm = exp.RTBegin(max_peak.getRT()); SignedSize low = std::min<SignedSize>(std::distance(exp.begin(), itm), rt_collect); SignedSize high = std::min<SignedSize>(std::distance(itm, exp.end()) - 1, rt_collect); MSExperiment<>::AreaIterator itt = exp.areaBegin((itm - low)->getRT() - 0.01, (itm + high)->getRT() + 0.01, cm[i].getMZ() - mz_da, cm[i].getMZ() + mz_da); for (; itt != exp.areaEnd(); ++itt) { mz.push_back(itt->getMZ()); //std::cerr << "ppm: " << itt.getRT() << " " << itt->getMZ() << " " << itt->getIntensity() << std::endl; } if ((SignedSize)mz.size() > (low + high + 1)) LOG_WARN << "Compound " << i << " has overlapping peaks [" << mz.size() << "/" << low + high + 1 << "]" << std::endl; if (!mz.empty()) { double avg_mz = std::accumulate(mz.begin(), mz.end(), 0.0) / double(mz.size()); //std::cerr << "avg: " << avg_mz << "\n"; ppm = (avg_mz - cm[i].getMZ()) / cm[i].getMZ() * 1e6; } } // appending the second column set requires separator String append_sep = (fi == 0 ? "" : out_sep); vec_single[i] += append_sep; // new line if (fi == 0) { vec_single[i] += String(cm[i].getRT()) + out_sep + String(cm[i].getMZ()) + out_sep; } vec_single[i] += String(max_peak.getRT()) + out_sep + String(max_peak.getRT() - cm[i].getRT()) + out_sep + String(max_peak.getMZ()) + out_sep + String(ppm) + out_sep + String(max_peak.getIntensity()); } if (not_found) LOG_INFO << "Missing peaks for " << not_found << " compounds in file '" << in[fi] << "'.\n"; } //------------------------------------------------------------- // create header //------------------------------------------------------------- vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header2, out_sep)); vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header1, out_sep)); vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header0, out_sep)); //------------------------------------------------------------- // writing output //------------------------------------------------------------- TextFile tf; for (std::vector<String>::iterator v_it = vec_single.begin(); v_it != vec_single.end(); ++v_it) { tf.addLine(*v_it); } tf.store(out); return EXECUTION_OK; }
void CompNovoIdentificationCID::getIdentification(PeptideIdentification & id, const PeakSpectrum & CID_spec) { //if (CID_spec.getPrecursors().begin()->getMZ() > 1000.0) //{ //cerr << "Weight of precursor has been estimated to exceed 2000.0 Da which is the current limit" << endl; //return; //} PeakSpectrum new_CID_spec(CID_spec); windowMower_(new_CID_spec, 0.3, 1); Param zhang_param; zhang_param = zhang_.getParameters(); zhang_param.setValue("tolerance", fragment_mass_tolerance_); zhang_param.setValue("use_gaussian_factor", "true"); zhang_param.setValue("use_linear_factor", "false"); zhang_.setParameters(zhang_param); Normalizer normalizer; Param n_param(normalizer.getParameters()); n_param.setValue("method", "to_one"); normalizer.setParameters(n_param); normalizer.filterSpectrum(new_CID_spec); Size charge(2); double precursor_weight(0); // [M+H]+ if (!CID_spec.getPrecursors().empty()) { // believe charge of spectrum? if (CID_spec.getPrecursors().begin()->getCharge() != 0) { charge = CID_spec.getPrecursors().begin()->getCharge(); } else { // TODO estimate charge state } precursor_weight = CID_spec.getPrecursors().begin()->getMZ() * charge - ((charge - 1) * Constants::PROTON_MASS_U); } //cerr << "charge=" << charge << ", [M+H]=" << precursor_weight << endl; // now delete all peaks that are right of the estimated precursor weight Size peak_counter(0); for (PeakSpectrum::ConstIterator it = new_CID_spec.begin(); it != new_CID_spec.end(); ++it, ++peak_counter) { if (it->getPosition()[0] > precursor_weight) { break; } } if (peak_counter < new_CID_spec.size()) { new_CID_spec.resize(peak_counter); } static double oxonium_mass = EmpiricalFormula("H2O+").getMonoWeight(); Peak1D p; p.setIntensity(1); p.setPosition(oxonium_mass); new_CID_spec.push_back(p); p.setPosition(precursor_weight); new_CID_spec.push_back(p); // add complement to spectrum /* for (PeakSpectrum::ConstIterator it1 = CID_spec.begin(); it1 != CID_spec.end(); ++it1) { // get m/z of complement double mz_comp = precursor_weight - it1->getPosition()[0] + Constants::PROTON_MASS_U; // search if peaks are available that have similar m/z values Size count(0); bool found(false); for (PeakSpectrum::ConstIterator it2 = CID_spec.begin(); it2 != CID_spec.end(); ++it2, ++count) { if (fabs(mz_comp - it2->getPosition()[0]) < fragment_mass_tolerance) { // add peak intensity to corresponding peak in new_CID_spec new_CID_spec[count].setIntensity(new_CID_spec[count].getIntensity()); } } if (!found) { // infer this peak Peak1D p; p.setIntensity(it1->getIntensity()); p.setPosition(mz_comp); new_CID_spec.push_back(p); } }*/ CompNovoIonScoringCID ion_scoring; Param ion_scoring_param(ion_scoring.getParameters()); ion_scoring_param.setValue("fragment_mass_tolerance", fragment_mass_tolerance_); ion_scoring_param.setValue("precursor_mass_tolerance", precursor_mass_tolerance_); ion_scoring_param.setValue("decomp_weights_precision", decomp_weights_precision_); ion_scoring_param.setValue("double_charged_iso_threshold", (double)param_.getValue("double_charged_iso_threshold")); ion_scoring_param.setValue("max_isotope_to_score", param_.getValue("max_isotope_to_score")); ion_scoring_param.setValue("max_isotope", max_isotope_); ion_scoring.setParameters(ion_scoring_param); Map<double, IonScore> ion_scores; ion_scoring.scoreSpectrum(ion_scores, new_CID_spec, precursor_weight, charge); new_CID_spec.sortByPosition(); /* cerr << "Size of ion_scores " << ion_scores.size() << endl; for (Map<double, IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { cerr << it->first << " " << it->second.score << endl; }*/ #ifdef WRITE_SCORED_SPEC PeakSpectrum filtered_spec(new_CID_spec); filtered_spec.clear(); for (Map<double, CompNovoIonScoringCID::IonScore>::const_iterator it = ion_scores.begin(); it != ion_scores.end(); ++it) { Peak1D p; p.setIntensity(it->second.score); p.setPosition(it->first); filtered_spec.push_back(p); } DTAFile().store("spec_scored.dta", filtered_spec); #endif set<String> sequences; getDecompositionsDAC_(sequences, 0, new_CID_spec.size() - 1, precursor_weight, new_CID_spec, ion_scores); #ifdef SPIKE_IN sequences.insert("AFCVDGEGR"); sequences.insert("APEFAAPWPDFVPR"); sequences.insert("AVKQFEESQGR"); sequences.insert("CCTESLVNR"); sequences.insert("DAFLGSFLYEYSR"); sequences.insert("DAIPENLPPLTADFAEDK"); sequences.insert("DDNKVEDIWSFLSK"); sequences.insert("DDPHACYSTVFDK"); sequences.insert("DEYELLCLDGSR"); sequences.insert("DGAESYKELSVLLPNR"); sequences.insert("DGASCWCVDADGR"); sequences.insert("DLFIPTCLETGEFAR"); sequences.insert("DTHKSEIAHR"); sequences.insert("DVCKNYQEAK"); sequences.insert("EACFAVEGPK"); sequences.insert("ECCHGDLLECADDR"); sequences.insert("EFLGDKFYTVISSLK"); sequences.insert("EFTPVLQADFQK"); sequences.insert("ELFLDSGIFQPMLQGR"); sequences.insert("ETYGDMADCCEK"); sequences.insert("EVGCPSSSVQEMVSCLR"); sequences.insert("EYEATLEECCAK"); sequences.insert("FADLIQSGTFQLHLDSK"); sequences.insert("FFSASCVPGATIEQK"); sequences.insert("FLANVSTVLTSK"); sequences.insert("FLSGSDYAIR"); sequences.insert("FTASCPPSIK"); sequences.insert("GAIEWEGIESGSVEQAVAK"); sequences.insert("GDVAFIQHSTVEENTGGK"); sequences.insert("GEPPSCAEDQSCPSER"); sequences.insert("GEYVPTSLTAR"); sequences.insert("GQEFTITGQKR"); sequences.insert("GTFAALSELHCDK"); sequences.insert("HLVDEPQNLIK"); sequences.insert("HQDCLVTTLQTQPGAVR"); sequences.insert("HTTVNENAPDQK"); sequences.insert("ILDCGSPDTEVR"); sequences.insert("KCPSPCQLQAER"); sequences.insert("KGTEFTVNDLQGK"); sequences.insert("KQTALVELLK"); sequences.insert("KVPQVSTPTLVEVSR"); sequences.insert("LALQFTTNAKR"); sequences.insert("LCVLHEKTPVSEK"); sequences.insert("LFTFHADICTLPDTEK"); sequences.insert("LGEYGFQNALIVR"); sequences.insert("LHVDPENFK"); sequences.insert("LKECCDKPLLEK"); sequences.insert("LKHLVDEPQNLIK"); sequences.insert("LKPDPNTLCDEFK"); sequences.insert("LLGNVLVVVLAR"); sequences.insert("LLVVYPWTQR"); sequences.insert("LRVDPVNFK"); sequences.insert("LTDEELAFPPLSPSR"); sequences.insert("LVNELTEFAK"); sequences.insert("MFLSFPTTK"); sequences.insert("MPCTEDYLSLILNR"); sequences.insert("NAPYSGYSGAFHCLK"); sequences.insert("NECFLSHKDDSPDLPK"); sequences.insert("NEPNKVPACPGSCEEVK"); sequences.insert("NLQMDDFELLCTDGR"); sequences.insert("QAGVQAEPSPK"); sequences.insert("RAPEFAAPWPDFVPR"); sequences.insert("RHPEYAVSVLLR"); sequences.insert("RPCFSALTPDETYVPK"); sequences.insert("RSLLLAPEEGPVSQR"); sequences.insert("SAFPPEPLLCSVQR"); sequences.insert("SAGWNIPIGTLLHR"); sequences.insert("SCWCVDEAGQK"); sequences.insert("SGNPNYPHEFSR"); sequences.insert("SHCIAEVEK"); sequences.insert("SISSGFFECER"); sequences.insert("SKYLASASTMDHAR"); sequences.insert("SLHTLFGDELCK"); sequences.insert("SLLLAPEEGPVSQR"); sequences.insert("SPPQCSPDGAFRPVQCK"); sequences.insert("SREGDPLAVYLK"); sequences.insert("SRQIPQCPTSCER"); sequences.insert("TAGTPVSIPVCDDSSVK"); sequences.insert("TCVADESHAGCEK"); sequences.insert("TQFGCLEGFGR"); sequences.insert("TVMENFVAFVDK"); sequences.insert("TYFPHFDLSHGSAQVK"); sequences.insert("TYMLAFDVNDEK"); sequences.insert("VDEVGGEALGR"); sequences.insert("VDLLIGSSQDDGLINR"); sequences.insert("VEDIWSFLSK"); sequences.insert("VGGHAAEYGAEALER"); sequences.insert("VGTRCCTKPESER"); sequences.insert("VKVDEVGGEALGR"); sequences.insert("VKVDLLIGSSQDDGLINR"); sequences.insert("VLDSFSNGMK"); sequences.insert("VLSAADKGNVK"); sequences.insert("VPQVSTPTLVEVSR"); sequences.insert("VTKCCTESLVNR"); sequences.insert("VVAASDASQDALGCVK"); sequences.insert("VVAGVANALAHR"); sequences.insert("YICDNQDTISSK"); sequences.insert("YLASASTMDHAR"); sequences.insert("YNGVFQECCQAEDK"); #endif SpectrumAlignmentScore spectra_zhang; spectra_zhang.setParameters(zhang_param); vector<PeptideHit> hits; Size missed_cleavages = param_.getValue("missed_cleavages"); for (set<String>::const_iterator it = sequences.begin(); it != sequences.end(); ++it) { Size num_missed = countMissedCleavagesTryptic_(*it); if (missed_cleavages < num_missed) { //cerr << "Two many missed cleavages: " << *it << ", found " << num_missed << ", allowed " << missed_cleavages << endl; continue; } PeakSpectrum CID_sim_spec; getCIDSpectrum_(CID_sim_spec, *it, charge); //normalizer.filterSpectrum(CID_sim_spec); double cid_score = zhang_(CID_sim_spec, CID_spec); PeptideHit hit; hit.setScore(cid_score); hit.setSequence(getModifiedAASequence_(*it)); hit.setCharge((Int)charge); //TODO unify charge interface: int or size? hits.push_back(hit); //cerr << getModifiedAASequence_(*it) << " " << cid_score << " " << endl; } // rescore the top hits id.setHits(hits); id.assignRanks(); hits = id.getHits(); SpectrumAlignmentScore alignment_score; Param align_param(alignment_score.getParameters()); align_param.setValue("tolerance", fragment_mass_tolerance_); align_param.setValue("use_linear_factor", "true"); alignment_score.setParameters(align_param); for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { //cerr << "Pre: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl; } Size number_of_prescoring_hits = param_.getValue("number_of_prescoring_hits"); if (hits.size() > number_of_prescoring_hits) { hits.resize(number_of_prescoring_hits); } for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { PeakSpectrum CID_sim_spec; getCIDSpectrum_(CID_sim_spec, getModifiedStringFromAASequence_(it->getSequence()), charge); normalizer.filterSpectrum(CID_sim_spec); //DTAFile().store("sim_specs/" + it->getSequence().toUnmodifiedString() + "_sim_CID.dta", CID_sim_spec); //double cid_score = spectra_zhang(CID_sim_spec, CID_spec); double cid_score = alignment_score(CID_sim_spec, CID_spec); //cerr << "Final: " << it->getSequence() << " " << cid_score << endl; it->setScore(cid_score); } id.setHits(hits); id.assignRanks(); hits = id.getHits(); for (vector<PeptideHit>::iterator it = hits.begin(); it != hits.end(); ++it) { //cerr << "Fin: " << it->getRank() << " " << it->getSequence() << " " << it->getScore() << " " << endl; } Size number_of_hits = param_.getValue("number_of_hits"); if (id.getHits().size() > number_of_hits) { hits.resize(number_of_hits); } id.setHits(hits); id.assignRanks(); return; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList in_spec = getStringList_("in"); StringList out = getStringList_("out"); String in_lib = getStringOption_("lib"); String compare_function = getStringOption_("compare_function"); Int precursor_mass_multiplier = getIntOption_("round_precursor_to_integer"); float precursor_mass_tolerance = getDoubleOption_("precursor_mass_tolerance"); //Int min_precursor_charge = getIntOption_("min_precursor_charge"); //Int max_precursor_charge = getIntOption_("max_precursor_charge"); float remove_peaks_below_threshold = getDoubleOption_("filter:remove_peaks_below_threshold"); UInt min_peaks = getIntOption_("filter:min_peaks"); UInt max_peaks = getIntOption_("filter:max_peaks"); Int cut_peaks_below = getIntOption_("filter:cut_peaks_below"); StringList fixed_modifications = getStringList_("fixed_modifications"); StringList variable_modifications = getStringList_("variable_modifications"); Int top_hits = getIntOption_("top_hits"); if (top_hits < -1) { writeLog_("top_hits (should be >= -1 )"); return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // loading input //------------------------------------------------------------- if (out.size() != in_spec.size()) { writeLog_("out (should be as many as input files)"); return ILLEGAL_PARAMETERS; } time_t prog_time = time(NULL); MSPFile spectral_library; RichPeakMap query, library; //spectrum which will be identified MzMLFile spectra; spectra.setLogType(log_type_); time_t start_build_time = time(NULL); //------------------------------------------------------------- //building map for faster search //------------------------------------------------------------- //library containing already identified peptide spectra vector<PeptideIdentification> ids; spectral_library.load(in_lib, ids, library); map<Size, vector<PeakSpectrum> > MSLibrary; { RichPeakMap::iterator s; vector<PeptideIdentification>::iterator i; ModificationsDB* mdb = ModificationsDB::getInstance(); for (s = library.begin(), i = ids.begin(); s < library.end(); ++s, ++i) { double precursor_MZ = (*s).getPrecursors()[0].getMZ(); Size MZ_multi = (Size)precursor_MZ * precursor_mass_multiplier; map<Size, vector<PeakSpectrum> >::iterator found; found = MSLibrary.find(MZ_multi); PeakSpectrum librar; bool variable_modifications_ok = true; bool fixed_modifications_ok = true; const AASequence& aaseq = i->getHits()[0].getSequence(); //variable fixed modifications if (!fixed_modifications.empty()) { for (Size i = 0; i < aaseq.size(); ++i) { const Residue& mod = aaseq.getResidue(i); for (Size s = 0; s < fixed_modifications.size(); ++s) { if (mod.getOneLetterCode() == mdb->getModification(fixed_modifications[s]).getOrigin() && fixed_modifications[s] != mod.getModification()) { fixed_modifications_ok = false; break; } } } } //variable modifications if (aaseq.isModified() && (!variable_modifications.empty())) { for (Size i = 0; i < aaseq.size(); ++i) { if (aaseq.isModified(i)) { const Residue& mod = aaseq.getResidue(i); for (Size s = 0; s < variable_modifications.size(); ++s) { if (mod.getOneLetterCode() == mdb->getModification(variable_modifications[s]).getOrigin() && variable_modifications[s] != mod.getModification()) { variable_modifications_ok = false; break; } } } } } if (variable_modifications_ok && fixed_modifications_ok) { PeptideIdentification& translocate_pid = *i; librar.getPeptideIdentifications().push_back(translocate_pid); librar.setPrecursors(s->getPrecursors()); //library entry transformation for (UInt l = 0; l < s->size(); ++l) { Peak1D peak; if ((*s)[l].getIntensity() > remove_peaks_below_threshold) { const String& info = (*s)[l].getMetaValue("MSPPeakInfo"); if (info[0] == '?') { peak.setIntensity(sqrt(0.2 * (*s)[l].getIntensity())); } else { peak.setIntensity(sqrt((*s)[l].getIntensity())); } peak.setMZ((*s)[l].getMZ()); peak.setPosition((*s)[l].getPosition()); librar.push_back(peak); } } if (found != MSLibrary.end()) { found->second.push_back(librar); } else { vector<PeakSpectrum> tmp; tmp.push_back(librar); MSLibrary.insert(make_pair(MZ_multi, tmp)); } } } } time_t end_build_time = time(NULL); cout << "Time needed for preprocessing data: " << (end_build_time - start_build_time) << "\n"; //compare function PeakSpectrumCompareFunctor* comparor = Factory<PeakSpectrumCompareFunctor>::create(compare_function); //------------------------------------------------------------- // calculations //------------------------------------------------------------- double score; StringList::iterator in, out_file; for (in = in_spec.begin(), out_file = out.begin(); in < in_spec.end(); ++in, ++out_file) { time_t start_time = time(NULL); spectra.load(*in, query); //Will hold valuable hits vector<PeptideIdentification> peptide_ids; vector<ProteinIdentification> protein_ids; // Write parameters to ProteinIdentifcation ProteinIdentification prot_id; //Parameters of identificaion prot_id.setIdentifier("test"); prot_id.setSearchEngineVersion("SpecLibSearcher"); prot_id.setDateTime(DateTime::now()); prot_id.setScoreType(compare_function); ProteinIdentification::SearchParameters searchparam; searchparam.precursor_tolerance = precursor_mass_tolerance; prot_id.setSearchParameters(searchparam); /***********SEARCH**********/ for (UInt j = 0; j < query.size(); ++j) { //Set identifier for each identifications PeptideIdentification pid; pid.setIdentifier("test"); pid.setScoreType(compare_function); ProteinHit pr_hit; pr_hit.setAccession(j); prot_id.insertHit(pr_hit); //RichPeak1D to Peak1D transformation for the compare function query PeakSpectrum quer; bool peak_ok = true; query[j].sortByIntensity(true); double min_high_intensity = 0; if (query[j].empty() || query[j].getMSLevel() != 2) { continue; } if (query[j].getPrecursors().empty()) { writeLog_("Warning MS2 spectrum without precursor information"); continue; } min_high_intensity = (1 / cut_peaks_below) * query[j][0].getIntensity(); query[j].sortByPosition(); for (UInt k = 0; k < query[j].size() && k < max_peaks; ++k) { if (query[j][k].getIntensity() > remove_peaks_below_threshold && query[j][k].getIntensity() >= min_high_intensity) { Peak1D peak; peak.setIntensity(sqrt(query[j][k].getIntensity())); peak.setMZ(query[j][k].getMZ()); peak.setPosition(query[j][k].getPosition()); quer.push_back(peak); } } if (quer.size() >= min_peaks) { peak_ok = true; } else { peak_ok = false; } double query_MZ = query[j].getPrecursors()[0].getMZ(); if (peak_ok) { bool charge_one = false; Int percent = (Int) Math::round((query[j].size() / 100.0) * 3.0); Int margin = (Int) Math::round((query[j].size() / 100.0) * 1.0); for (vector<RichPeak1D>::iterator peak = query[j].end() - 1; percent >= 0; --peak, --percent) { if (peak->getMZ() < query_MZ) { break; } } if (percent > margin) { charge_one = true; } float min_MZ = (query_MZ - precursor_mass_tolerance) * precursor_mass_multiplier; float max_MZ = (query_MZ + precursor_mass_tolerance) * precursor_mass_multiplier; for (Size mz = (Size)min_MZ; mz <= ((Size)max_MZ) + 1; ++mz) { map<Size, vector<PeakSpectrum> >::iterator found; found = MSLibrary.find(mz); if (found != MSLibrary.end()) { vector<PeakSpectrum>& library = found->second; for (Size i = 0; i < library.size(); ++i) { float this_MZ = library[i].getPrecursors()[0].getMZ() * precursor_mass_multiplier; if (this_MZ >= min_MZ && max_MZ >= this_MZ && ((charge_one == true && library[i].getPeptideIdentifications()[0].getHits()[0].getCharge() == 1) || charge_one == false)) { PeptideHit hit = library[i].getPeptideIdentifications()[0].getHits()[0]; PeakSpectrum& librar = library[i]; //Special treatment for SpectraST score as it computes a score based on the whole library if (compare_function == "SpectraSTSimilarityScore") { SpectraSTSimilarityScore* sp = static_cast<SpectraSTSimilarityScore*>(comparor); BinnedSpectrum quer_bin = sp->transform(quer); BinnedSpectrum librar_bin = sp->transform(librar); score = (*sp)(quer, librar); //(*sp)(quer_bin,librar_bin); double dot_bias = sp->dot_bias(quer_bin, librar_bin, score); hit.setMetaValue("DOTBIAS", dot_bias); } else { score = (*comparor)(quer, librar); } DataValue RT(library[i].getRT()); DataValue MZ(library[i].getPrecursors()[0].getMZ()); hit.setMetaValue("RT", RT); hit.setMetaValue("MZ", MZ); hit.setScore(score); PeptideEvidence pe; pe.setProteinAccession(pr_hit.getAccession()); hit.addPeptideEvidence(pe); pid.insertHit(hit); } } } } } pid.setHigherScoreBetter(true); pid.sort(); if (compare_function == "SpectraSTSimilarityScore") { if (!pid.empty() && !pid.getHits().empty()) { vector<PeptideHit> final_hits; final_hits.resize(pid.getHits().size()); SpectraSTSimilarityScore* sp = static_cast<SpectraSTSimilarityScore*>(comparor); Size runner_up = 1; for (; runner_up < pid.getHits().size(); ++runner_up) { if (pid.getHits()[0].getSequence().toUnmodifiedString() != pid.getHits()[runner_up].getSequence().toUnmodifiedString() || runner_up > 5) { break; } } double delta_D = sp->delta_D(pid.getHits()[0].getScore(), pid.getHits()[runner_up].getScore()); for (Size s = 0; s < pid.getHits().size(); ++s) { final_hits[s] = pid.getHits()[s]; final_hits[s].setMetaValue("delta D", delta_D); final_hits[s].setMetaValue("dot product", pid.getHits()[s].getScore()); final_hits[s].setScore(sp->compute_F(pid.getHits()[s].getScore(), delta_D, pid.getHits()[s].getMetaValue("DOTBIAS"))); //final_hits[s].removeMetaValue("DOTBIAS"); } pid.setHits(final_hits); pid.sort(); pid.setMZ(query[j].getPrecursors()[0].getMZ()); pid.setRT(query_MZ); } } if (top_hits != -1 && (UInt)top_hits < pid.getHits().size()) { vector<PeptideHit> hits; hits.resize(top_hits); for (Size i = 0; i < (UInt)top_hits; ++i) { hits[i] = pid.getHits()[i]; } pid.setHits(hits); } peptide_ids.push_back(pid); } protein_ids.push_back(prot_id); //------------------------------------------------------------- // writing output //------------------------------------------------------------- IdXMLFile id_xml_file; id_xml_file.store(*out_file, protein_ids, peptide_ids); time_t end_time = time(NULL); cout << "Search time: " << difftime(end_time, start_time) << " seconds for " << *in << "\n"; } time_t end_time = time(NULL); cout << "Total time: " << difftime(end_time, prog_time) << " secconds\n"; return EXECUTION_OK; }
bool ShiftModel1D::finish() { if (!isValid()) { return false; } if (!system_) { Log.info() << "No valid system found!" << std::endl; return false; } // compute the shift model if necessary if (compute_shifts_) { BALL::ShiftModel sm(parameters_.getFilename()); system_->apply(sm); } String element = ""; // Peter Bayer proposed as peak width // for H 15Hz // for N 10hz // for C 5Hz // peakwidth is meassured in ppm, since // experiments were done in Hz, we convert the values // according to the formular // // offset [Hz] = offset[ppm] * basic frequency // // for our prediction we assume a basic frequency of 700 MHz float peakwidth = 0.0; switch(type_) { case H: case H_ON_BACKBONE: element = "H"; //peakwidth = 0.02142; // Peter Bayers estimation peakwidth = 0.0032; // this is the former BALL estimation break; case N: case N_BACKBONE: element = "N"; peakwidth = 0.01428; break; case C: case C_BACKBONE: element = "C"; peakwidth = 0.00714; break; } int counter = 0; if (element == "" ) return true; for (BALL::ResidueIterator r_it = system_->beginResidue(); +r_it; ++r_it) { Atom* atom = NULL; for (BALL::AtomIterator at_it = r_it->beginAtom(); +at_it; ++at_it) { if (hasType_(&(*at_it), type_)) { counter++; atom = &(*at_it); // we have, get the shift float shift = atom->getProperty(BALL::ShiftModule::PROPERTY__SHIFT).getFloat(); Peak1D peak; float pos = shift; peak.setPosition(pos); peak.setWidth(peakwidth); peak.setIntensity(peak.getIntensity()+1); //setAtom(); peaks_.push_back(peak); } } } std::cout << "Number of considered atoms: "<< counter << std::endl; return true; }
void CompNovoIdentificationBase::getCIDSpectrum_(PeakSpectrum & spec, const String & sequence, Size charge, DoubleReal prefix, DoubleReal suffix) { static DoubleReal h2o_mass = EmpiricalFormula("H2O").getMonoWeight(); static DoubleReal nh3_mass = EmpiricalFormula("NH3").getMonoWeight(); static DoubleReal co_mass = EmpiricalFormula("CO").getMonoWeight(); Peak1D p; DoubleReal b_pos(0 + prefix); DoubleReal y_pos(h2o_mass + suffix); bool b_H2O_loss(false), b_NH3_loss(false), y_NH3_loss(false); for (Size i = 0; i != sequence.size() - 1; ++i) { char aa(sequence[i]); b_pos += aa_to_weight_[aa]; char aa2(sequence[sequence.size() - i - 1]); y_pos += aa_to_weight_[aa2]; for (Size z = 1; z <= charge && z < 3; ++z) { // b-ions if (b_pos >= min_mz_ && b_pos <= max_mz_) { for (Size j = 0; j != max_isotope_; ++j) { if (z == 1 /*|| b_pos > MIN_DOUBLE_MZ*/) { p.setPosition((b_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j + Constants::NEUTRON_MASS_U) / (DoubleReal)z); p.setIntensity(isotope_distributions_[(Size)b_pos][j] * 0.8 / (z * z)); spec.push_back(p); } } } // b-ion losses if (b_pos - h2o_mass > min_mz_ && b_pos - h2o_mass < max_mz_) { if (b_H2O_loss || aa == 'S' || aa == 'T' || aa == 'E' || aa == 'D') { b_H2O_loss = true; p.setPosition((b_pos + z * Constants::PROTON_MASS_U - h2o_mass) / z); p.setIntensity(0.02 / (DoubleReal)(z * z)); if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/) { spec.push_back(p); } } if (b_NH3_loss || aa == 'Q' || aa == 'N' || aa == 'R' || aa == 'K') { b_NH3_loss = true; p.setPosition((b_pos + z * Constants::PROTON_MASS_U - nh3_mass) / z); p.setIntensity(0.02 / (DoubleReal)(z * z)); if (z == 1 /* || b_pos > MIN_DOUBLE_MZ*/) { spec.push_back(p); } } } // a-ions only for charge 1 if (z == 1) { if (b_pos - co_mass > min_mz_ && b_pos - co_mass < max_mz_) { // a-ions p.setPosition((b_pos + z * Constants::PROTON_MASS_U - co_mass) / (DoubleReal)z); p.setIntensity(0.1f); spec.push_back(p); } } if (y_pos > min_mz_ && y_pos < max_mz_) { // y-ions for (Size j = 0; j != max_isotope_; ++j) { if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/) { p.setPosition((y_pos + (DoubleReal)z * Constants::PROTON_MASS_U + (DoubleReal)j * Constants::NEUTRON_MASS_U) / (DoubleReal)z); p.setIntensity(isotope_distributions_[(Size)y_pos][j] / (DoubleReal) (z * z)); spec.push_back(p); } } // H2O loss p.setPosition((y_pos + z * Constants::PROTON_MASS_U - h2o_mass) / (DoubleReal)z); p.setIntensity(0.1 / (DoubleReal)(z * z)); if (aa2 == 'Q') // pyroglutamic acid formation { p.setIntensity(0.5f); } if (z == 1 /* || y_pos > MIN_DOUBLE_MZ*/) { spec.push_back(p); } // NH3 loss if (y_NH3_loss || aa2 == 'Q' || aa2 == 'N' || aa2 == 'R' || aa2 == 'K') { y_NH3_loss = true; p.setPosition((y_pos + z * Constants::PROTON_MASS_U - nh3_mass) / (DoubleReal)z); p.setIntensity(0.1 / (DoubleReal)(z * z)); if (z == 1 /*|| y_pos > MIN_DOUBLE_MZ*/) { spec.push_back(p); } } } } } // if Q1 abundant loss of water -> pyroglutamic acid formation if (sequence[0] == 'Q' && prefix == 0 && suffix == 0) { /* for (PeakSpectrum::Iterator it = spec.begin(); it != spec.end(); ++it) { it->setIntensity(it->getIntensity() * 0.5); }*/ /* for (Size j = 0; j != max_isotope; ++j) { p.setPosition((precursor_weight + charge - 1 + j)/(DoubleReal)charge); p.setIntensity(isotope_distributions_[(Int)p.getPosition()[0]][j] * 0.1); spec.push_back(p); } */ } spec.sortByPosition(); return; }
void PeakPickerHiRes::pick(const MSSpectrum& input, MSSpectrum& output, std::vector<PeakBoundary>& boundaries, bool check_spacings) const { // copy meta data of the input spectrum output.clear(true); output.SpectrumSettings::operator=(input); output.MetaInfoInterface::operator=(input); output.setRT(input.getRT()); output.setMSLevel(input.getMSLevel()); output.setName(input.getName()); output.setType(SpectrumSettings::CENTROID); if (report_FWHM_) { output.getFloatDataArrays().resize(1); output.getFloatDataArrays()[0].setName( report_FWHM_as_ppm_ ? "FWHM_ppm" : "FWHM"); } // don't pick a spectrum with less than 5 data points if (input.size() < 5) return; // if both spacing constraints are disabled, don't check spacings at all: if ((spacing_difference_ == std::numeric_limits<double>::infinity()) && (spacing_difference_gap_ == std::numeric_limits<double>::infinity())) { check_spacings = false; } // signal-to-noise estimation SignalToNoiseEstimatorMedian<MSSpectrum > snt; snt.setParameters(param_.copy("SignalToNoise:", true)); if (signal_to_noise_ > 0.0) { snt.init(input); } // find local maxima in profile data for (Size i = 2; i < input.size() - 2; ++i) { double central_peak_mz = input[i].getMZ(), central_peak_int = input[i].getIntensity(); double left_neighbor_mz = input[i - 1].getMZ(), left_neighbor_int = input[i - 1].getIntensity(); double right_neighbor_mz = input[i + 1].getMZ(), right_neighbor_int = input[i + 1].getIntensity(); // do not interpolate when the left or right support is a zero-data-point if (std::fabs(left_neighbor_int) < std::numeric_limits<double>::epsilon()) continue; if (std::fabs(right_neighbor_int) < std::numeric_limits<double>::epsilon()) continue; // MZ spacing sanity checks double left_to_central = 0.0, central_to_right = 0.0, min_spacing = 0.0; if (check_spacings) { left_to_central = central_peak_mz - left_neighbor_mz; central_to_right = right_neighbor_mz - central_peak_mz; min_spacing = (left_to_central < central_to_right) ? left_to_central : central_to_right; } double act_snt = 0.0, act_snt_l1 = 0.0, act_snt_r1 = 0.0; if (signal_to_noise_ > 0.0) { act_snt = snt.getSignalToNoise(input[i]); act_snt_l1 = snt.getSignalToNoise(input[i - 1]); act_snt_r1 = snt.getSignalToNoise(input[i + 1]); } // look for peak cores meeting MZ and intensity/SNT criteria if ((central_peak_int > left_neighbor_int) && (central_peak_int > right_neighbor_int) && (act_snt >= signal_to_noise_) && (act_snt_l1 >= signal_to_noise_) && (act_snt_r1 >= signal_to_noise_) && (!check_spacings || ((left_to_central < spacing_difference_ * min_spacing) && (central_to_right < spacing_difference_ * min_spacing)))) { // special case: if a peak core is surrounded by more intense // satellite peaks (indicates oscillation rather than // real peaks) -> remove double act_snt_l2 = 0.0, act_snt_r2 = 0.0; if (signal_to_noise_ > 0.0) { act_snt_l2 = snt.getSignalToNoise(input[i - 2]); act_snt_r2 = snt.getSignalToNoise(input[i + 2]); } // checking signal-to-noise? if ((i > 1) && (i + 2 < input.size()) && (left_neighbor_int < input[i - 2].getIntensity()) && (right_neighbor_int < input[i + 2].getIntensity()) && (act_snt_l2 >= signal_to_noise_) && (act_snt_r2 >= signal_to_noise_) && (!check_spacings || ((left_neighbor_mz - input[i - 2].getMZ() < spacing_difference_ * min_spacing) && (input[i + 2].getMZ() - right_neighbor_mz < spacing_difference_ * min_spacing)))) { ++i; continue; } std::map<double, double> peak_raw_data; peak_raw_data[central_peak_mz] = central_peak_int; peak_raw_data[left_neighbor_mz] = left_neighbor_int; peak_raw_data[right_neighbor_mz] = right_neighbor_int; // peak core found, now extend it // to the left Size k = 2; bool previous_zero_left(false); // no need to extend peak if previous intensity was zero Size missing_left(0); Size left_boundary(i - 1); // index of the left boundary for the spline interpolation while ((k <= i) && // prevent underflow (i - k + 1 > 0) && !previous_zero_left && (missing_left <= missing_) && (input[i - k].getIntensity() <= peak_raw_data.begin()->second) && (!check_spacings || (peak_raw_data.begin()->first - input[i - k].getMZ() < spacing_difference_gap_ * min_spacing))) { double act_snt_lk = 0.0; if (signal_to_noise_ > 0.0) { act_snt_lk = snt.getSignalToNoise(input[i - k]); } if ((act_snt_lk >= signal_to_noise_) && (!check_spacings || (peak_raw_data.begin()->first - input[i - k].getMZ() < spacing_difference_ * min_spacing))) { peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity(); } else { ++missing_left; if (missing_left <= missing_) { peak_raw_data[input[i - k].getMZ()] = input[i - k].getIntensity(); } } previous_zero_left = (input[i - k].getIntensity() == 0); left_boundary = i - k; ++k; } // to the right k = 2; bool previous_zero_right(false); // no need to extend peak if previous intensity was zero Size missing_right(0); Size right_boundary(i+1); // index of the right boundary for the spline interpolation while ((i + k < input.size()) && !previous_zero_right && (missing_right <= missing_) && (input[i + k].getIntensity() <= peak_raw_data.rbegin()->second) && (!check_spacings || (input[i + k].getMZ() - peak_raw_data.rbegin()->first < spacing_difference_gap_ * min_spacing))) { double act_snt_rk = 0.0; if (signal_to_noise_ > 0.0) { act_snt_rk = snt.getSignalToNoise(input[i + k]); } if ((act_snt_rk >= signal_to_noise_) && (!check_spacings || (input[i + k].getMZ() - peak_raw_data.rbegin()->first < spacing_difference_ * min_spacing))) { peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity(); } else { ++missing_right; if (missing_right <= missing_) { peak_raw_data[input[i + k].getMZ()] = input[i + k].getIntensity(); } } previous_zero_right = (input[i + k].getIntensity() == 0); right_boundary = i + k; ++k; } // skip if the minimal number of 3 points for fitting is not reached if (peak_raw_data.size() < 3) continue; CubicSpline2d peak_spline (peak_raw_data); // calculate maximum by evaluating the spline's 1st derivative // (bisection method) double max_peak_mz = central_peak_mz; double max_peak_int = central_peak_int; double threshold = 1e-6; OpenMS::Math::spline_bisection(peak_spline, left_neighbor_mz, right_neighbor_mz, max_peak_mz, max_peak_int, threshold); // // compute FWHM // if (report_FWHM_) { double fwhm_int = max_peak_int / 2.0; threshold = 0.01 * fwhm_int; double mz_mid, int_mid; // left: double mz_left = peak_raw_data.begin()->first; double mz_center = max_peak_mz; if (peak_spline.eval(mz_left) > fwhm_int) { // the spline ends before half max is reached -- take the leftmost point (probably an underestimation) mz_mid = mz_left; } else { do { mz_mid = mz_left / 2 + mz_center / 2; int_mid = peak_spline.eval(mz_mid); if (int_mid < fwhm_int) { mz_left = mz_mid; } else { mz_center = mz_mid; } } while (fabs(int_mid - fwhm_int) > threshold); } const double fwhm_left_mz = mz_mid; // right ... double mz_right = peak_raw_data.rbegin()->first; mz_center = max_peak_mz; if (peak_spline.eval(mz_right) > fwhm_int) { // the spline ends before half max is reached -- take the rightmost point (probably an underestimation) mz_mid = mz_right; } else { do { mz_mid = mz_right / 2 + mz_center / 2; int_mid = peak_spline.eval(mz_mid); if (int_mid < fwhm_int) { mz_right = mz_mid; } else { mz_center = mz_mid; } } while (fabs(int_mid - fwhm_int) > threshold); } const double fwhm_right_mz = mz_mid; const double fwhm_absolute = fwhm_right_mz - fwhm_left_mz; output.getFloatDataArrays()[0].push_back( report_FWHM_as_ppm_ ? fwhm_absolute / max_peak_mz * 1e6 : fwhm_absolute); } // FWHM // save picked peak into output spectrum Peak1D peak; PeakBoundary peak_boundary; peak.setMZ(max_peak_mz); peak.setIntensity(max_peak_int); peak_boundary.mz_min = input[left_boundary].getMZ(); peak_boundary.mz_max = input[right_boundary].getMZ(); output.push_back(peak); boundaries.push_back(peak_boundary); // jump over profile data points that have been considered already i = i + k - 1; } } return; }