void MapAlignmentTransformer::transformSinglePeakMap(MSExperiment<> & msexp, const TransformationDescription & trafo) { msexp.clearRanges(); // Transform spectra for (MSExperiment<>::iterator mse_iter = msexp.begin(); mse_iter != msexp.end(); ++mse_iter) { DoubleReal rt = mse_iter->getRT(); mse_iter->setRT(trafo.apply(rt)); } // Also transform chromatograms DoubleReal rt; std::vector<MSChromatogram<ChromatogramPeak> > chromatograms; for (Size i = 0; i < msexp.getChromatograms().size(); i++) { MSChromatogram<ChromatogramPeak> chromatogram = msexp.getChromatograms()[i]; for (Size j = 0; j < chromatogram.size(); j++) { rt = chromatogram[j].getRT(); chromatogram[j].setRT(trafo.apply(rt)); } chromatograms.push_back(chromatogram); } msexp.setChromatograms(chromatograms); msexp.updateRanges(); }
/// Fill up transition group with paired Transitions and Chromatograms void getTransitionGroup(OpenSwath::SpectrumAccessPtr input, MRMTransitionGroupType& transition_group, String id) { transition_group.setTransitionGroupID(id); // Go through all transitions for (Size i = 0; i < assay_map[id].size(); i++) { // Check first whether we have a mapping (e.g. see -force option) const TransitionType* transition = assay_map[id][i]; if (chromatogram_map.find(transition->getNativeID()) == chromatogram_map.end()) { LOG_DEBUG << "Found no matching chromatogram for id " << transition->getNativeID() << std::endl; continue; } OpenSwath::ChromatogramPtr cptr = input->getChromatogramById(chromatogram_map[transition->getNativeID()]); MSChromatogram chromatogram; OpenSwathDataAccessHelper::convertToOpenMSChromatogram(cptr, chromatogram); chromatogram.setMetaValue("product_mz", transition->getProductMZ()); chromatogram.setMetaValue("precursor_mz", transition->getPrecursorMZ()); chromatogram.setNativeID(transition->getNativeID()); // Now add the transition and the chromatogram to the group transition_group.addTransition(*transition, transition->getNativeID()); transition_group.addChromatogram(chromatogram, chromatogram.getNativeID()); } }
MSChromatogram<> toChromatogram(const MSSpectrum<>& in) { MSChromatogram<> out; for (Size ic = 0; ic < in.size(); ++ic) { ChromatogramPeak peak; peak.setMZ(in[ic].getMZ()); peak.setIntensity(in[ic].getIntensity()); out.push_back(peak); } out.setChromatogramType(ChromatogramSettings::SELECTED_ION_CURRENT_CHROMATOGRAM); return out; }
double MRMTransitionGroupPicker::calculateBgEstimation_(const MSChromatogram<>& chromatogram, double best_left, double best_right) { // determine (in the chromatogram) the intensity at the left / right border MSChromatogram<>::const_iterator it = chromatogram.begin(); int nr_points = 0; for (; it != chromatogram.end(); ++it) { if (it->getMZ() > best_left) { nr_points++; break; } } double intensity_left = it->getIntensity(); for (; it != chromatogram.end(); ++it) { if (it->getMZ() > best_right) { break; } nr_points++; } if (it == chromatogram.begin() || nr_points < 1) { // something is fishy, the endpoint of the peak is the beginning of the chromatogram std::cerr << "Tried to calculate background but no points were found " << std::endl; return 0; } // decrease the iterator and the nr_points by one (because we went one too far) double intensity_right = (it--)->getIntensity(); nr_points--; double avg_noise_level = (intensity_right + intensity_left) / 2; return avg_noise_level * nr_points; }
void PeakPickerHiRes::pick(const MSChromatogram& input, MSChromatogram& output, std::vector<PeakBoundary>& boundaries) const { // copy meta data of the input chromatogram output.clear(true); output.ChromatogramSettings::operator=(input); output.MetaInfoInterface::operator=(input); output.setName(input.getName()); MSSpectrum input_spectrum; MSSpectrum output_spectrum; for (MSChromatogram::const_iterator it = input.begin(); it != input.end(); ++it) { Peak1D p; p.setMZ(it->getRT()); p.setIntensity(it->getIntensity()); input_spectrum.push_back(p); } pick(input_spectrum, output_spectrum, boundaries, false); // no spacing checks! for (MSSpectrum::const_iterator it = output_spectrum.begin(); it != output_spectrum.end(); ++it) { ChromatogramPeak p; p.setRT(it->getMZ()); p.setIntensity(it->getIntensity()); output.push_back(p); } // copy float data arrays (for FWHM) output.getFloatDataArrays().resize(output_spectrum.getFloatDataArrays().size()); for (Size i = 0; i < output_spectrum.getFloatDataArrays().size(); ++i) { output.getFloatDataArrays()[i].insert(output.getFloatDataArrays()[i].begin(), output_spectrum.getFloatDataArrays()[i].begin(), output_spectrum.getFloatDataArrays()[i].end()); output.getFloatDataArrays()[i].setName(output_spectrum.getFloatDataArrays()[i].getName()); } }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList in = getStringList_("in"); String edta = getStringOption_("pos"); String out = getStringOption_("out"); String out_sep = getStringOption_("out_separator"); String out_TIC_debug = getStringOption_("auto_rt:out_debug_TIC"); StringList in_header = getStringList_("in_header"); // number of out_debug_TIC files and input files must be identical /*if (out_TIC_debug.size() > 0 && in.size() != out_TIC_debug.size()) { LOG_FATAL_ERROR << "Error: number of input file 'in' and auto_rt:out_debug_TIC files must be identical!" << std::endl; return ILLEGAL_PARAMETERS; }*/ // number of header files and input files must be identical if (in_header.size() > 0 && in.size() != in_header.size()) { LOG_FATAL_ERROR << "Error: number of input file 'in' and 'in_header' files must be identical!" << std::endl; return ILLEGAL_PARAMETERS; } if (!getFlag_("auto_rt:enabled") && !out_TIC_debug.empty()) { LOG_FATAL_ERROR << "Error: TIC output file requested, but auto_rt is not enabled! Either do not request the file or switch on 'auto_rt:enabled'." << std::endl; return ILLEGAL_PARAMETERS; } double rttol = getDoubleOption_("rt_tol"); double mztol = getDoubleOption_("mz_tol"); Size rt_collect = getIntOption_("rt_collect"); //------------------------------------------------------------- // loading input //------------------------------------------------------------- MzMLFile mzml_file; mzml_file.setLogType(log_type_); MSExperiment<Peak1D> exp, exp_pp; EDTAFile ed; ConsensusMap cm; ed.load(edta, cm); StringList tf_single_header0, tf_single_header1, tf_single_header2; // header content, for each column std::vector<String> vec_single; // one line for each compound, multiple columns per experiment vec_single.resize(cm.size()); for (Size fi = 0; fi < in.size(); ++fi) { // load raw data mzml_file.load(in[fi], exp); exp.sortSpectra(true); if (exp.empty()) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry." << std::endl; return INCOMPATIBLE_INPUT_DATA; } // try to detect RT peaks (only for the first input file -- all others should align!) // cm.size() might change in here... if (getFlag_("auto_rt:enabled") && fi == 0) { ConsensusMap cm_local = cm; // we might have different RT peaks for each map if 'auto_rt' is enabled cm.clear(false); // reset global list (about to be filled) // compute TIC MSChromatogram<> tic = exp.getTIC(); MSSpectrum<> tics, tic_gf, tics_pp, tics_sn; for (Size ic = 0; ic < tic.size(); ++ic) { // rewrite Chromatogram to MSSpectrum (GaussFilter requires it) Peak1D peak; peak.setMZ(tic[ic].getRT()); peak.setIntensity(tic[ic].getIntensity()); tics.push_back(peak); } // smooth (no PP_CWT here due to efficiency reasons -- large FWHM take longer!) double fwhm = getDoubleOption_("auto_rt:FHWM"); GaussFilter gf; Param p = gf.getParameters(); p.setValue("gaussian_width", fwhm * 2); // wider than FWHM, just to be sure we have a fully smoothed peak. Merging two peaks is unlikely p.setValue("use_ppm_tolerance", "false"); gf.setParameters(p); tic_gf = tics; gf.filter(tic_gf); // pick peaks PeakPickerHiRes pp; p = pp.getParameters(); p.setValue("signal_to_noise", getDoubleOption_("auto_rt:SNThreshold")); pp.setParameters(p); pp.pick(tic_gf, tics_pp); if (tics_pp.size()) { LOG_INFO << "Found " << tics_pp.size() << " auto-rt peaks at: "; for (Size ipp = 0; ipp != tics_pp.size(); ++ipp) LOG_INFO << " " << tics_pp[ipp].getMZ(); } else { LOG_INFO << "Found no auto-rt peaks. Change threshold parameters!"; } LOG_INFO << std::endl; if (!out_TIC_debug.empty()) // if debug file was given { // store intermediate steps for debug MSExperiment<> out_debug; out_debug.addChromatogram(toChromatogram(tics)); out_debug.addChromatogram(toChromatogram(tic_gf)); SignalToNoiseEstimatorMedian<MSSpectrum<> > snt; snt.init(tics); for (Size is = 0; is < tics.size(); ++is) { Peak1D peak; peak.setMZ(tic[is].getMZ()); peak.setIntensity(snt.getSignalToNoise(tics[is])); tics_sn.push_back(peak); } out_debug.addChromatogram(toChromatogram(tics_sn)); out_debug.addChromatogram(toChromatogram(tics_pp)); // get rid of "native-id" missing warning for (Size id = 0; id < out_debug.size(); ++id) out_debug[id].setNativeID(String("spectrum=") + id); mzml_file.store(out_TIC_debug, out_debug); LOG_DEBUG << "Storing debug AUTO-RT: " << out_TIC_debug << std::endl; } // add target EICs: for each m/z with no/negative RT, add all combinations of that m/z with auto-RTs // duplicate m/z entries will be ignored! // all other lines with positive RT values are copied unaffected //do not allow doubles std::set<double> mz_doubles; for (ConsensusMap::Iterator cit = cm_local.begin(); cit != cm_local.end(); ++cit) { if (cit->getRT() < 0) { if (mz_doubles.find(cit->getMZ()) == mz_doubles.end()) { mz_doubles.insert(cit->getMZ()); } else { LOG_INFO << "Found duplicate m/z entry (" << cit->getMZ() << ") for auto-rt. Skipping ..." << std::endl; continue; } ConsensusMap cm_RT_multiplex; for (MSSpectrum<>::ConstIterator itp = tics_pp.begin(); itp != tics_pp.end(); ++itp) { ConsensusFeature f = *cit; f.setRT(itp->getMZ()); cm.push_back(f); } } else { // default feature with no auto-rt LOG_INFO << "copying feature with RT " << cit->getRT() << std::endl; cm.push_back(*cit); } } // resize, since we have more positions now vec_single.resize(cm.size()); } // search for each EIC and add up Int not_found(0); Map<Size, double> quant; String description; if (fi < in_header.size()) { HeaderInfo info(in_header[fi]); description = info.header_description; } if (fi == 0) { // two additional columns for first file (theoretical RT and m/z) tf_single_header0 << "" << ""; tf_single_header1 << "" << ""; tf_single_header2 << "RT" << "mz"; } // 5 entries for each input file tf_single_header0 << File::basename(in[fi]) << "" << "" << "" << ""; tf_single_header1 << description << "" << "" << "" << ""; tf_single_header2 << "RTobs" << "dRT" << "mzobs" << "dppm" << "intensity"; for (Size i = 0; i < cm.size(); ++i) { //std::cerr << "Rt" << cm[i].getRT() << " mz: " << cm[i].getMZ() << " R " << cm[i].getMetaValue("rank") << "\n"; double mz_da = mztol * cm[i].getMZ() / 1e6; // mz tolerance in Dalton MSExperiment<>::ConstAreaIterator it = exp.areaBeginConst(cm[i].getRT() - rttol / 2, cm[i].getRT() + rttol / 2, cm[i].getMZ() - mz_da, cm[i].getMZ() + mz_da); Peak2D max_peak; max_peak.setIntensity(0); max_peak.setRT(cm[i].getRT()); max_peak.setMZ(cm[i].getMZ()); for (; it != exp.areaEndConst(); ++it) { if (max_peak.getIntensity() < it->getIntensity()) { max_peak.setIntensity(it->getIntensity()); max_peak.setRT(it.getRT()); max_peak.setMZ(it->getMZ()); } } double ppm = 0; // observed m/z offset if (max_peak.getIntensity() == 0) { ++not_found; } else { // take median for m/z found std::vector<double> mz; MSExperiment<>::Iterator itm = exp.RTBegin(max_peak.getRT()); SignedSize low = std::min<SignedSize>(std::distance(exp.begin(), itm), rt_collect); SignedSize high = std::min<SignedSize>(std::distance(itm, exp.end()) - 1, rt_collect); MSExperiment<>::AreaIterator itt = exp.areaBegin((itm - low)->getRT() - 0.01, (itm + high)->getRT() + 0.01, cm[i].getMZ() - mz_da, cm[i].getMZ() + mz_da); for (; itt != exp.areaEnd(); ++itt) { mz.push_back(itt->getMZ()); //std::cerr << "ppm: " << itt.getRT() << " " << itt->getMZ() << " " << itt->getIntensity() << std::endl; } if ((SignedSize)mz.size() > (low + high + 1)) LOG_WARN << "Compound " << i << " has overlapping peaks [" << mz.size() << "/" << low + high + 1 << "]" << std::endl; if (!mz.empty()) { double avg_mz = std::accumulate(mz.begin(), mz.end(), 0.0) / double(mz.size()); //std::cerr << "avg: " << avg_mz << "\n"; ppm = (avg_mz - cm[i].getMZ()) / cm[i].getMZ() * 1e6; } } // appending the second column set requires separator String append_sep = (fi == 0 ? "" : out_sep); vec_single[i] += append_sep; // new line if (fi == 0) { vec_single[i] += String(cm[i].getRT()) + out_sep + String(cm[i].getMZ()) + out_sep; } vec_single[i] += String(max_peak.getRT()) + out_sep + String(max_peak.getRT() - cm[i].getRT()) + out_sep + String(max_peak.getMZ()) + out_sep + String(ppm) + out_sep + String(max_peak.getIntensity()); } if (not_found) LOG_INFO << "Missing peaks for " << not_found << " compounds in file '" << in[fi] << "'.\n"; } //------------------------------------------------------------- // create header //------------------------------------------------------------- vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header2, out_sep)); vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header1, out_sep)); vec_single.insert(vec_single.begin(), ListUtils::concatenate(tf_single_header0, out_sep)); //------------------------------------------------------------- // writing output //------------------------------------------------------------- TextFile tf; for (std::vector<String>::iterator v_it = vec_single.begin(); v_it != vec_single.end(); ++v_it) { tf.addLine(*v_it); } tf.store(out); return EXECUTION_OK; }