vector<vector<Size> > PScore::calculateRankMap(const PeakMap& peak_map, double mz_window) { vector<std::vector<Size> > rank_map; // note: ranks are zero based rank_map.reserve(peak_map.size()); for (Size i = 0; i != peak_map.size(); ++i) { const PeakSpectrum& spec = peak_map[i]; vector<double> mz; vector<double> intensities; for (Size j = 0; j != spec.size(); ++j) { mz.push_back(spec[j].getMZ()); intensities.push_back(spec[j].getIntensity()); } rank_map.push_back(calculateIntensityRankInMZWindow(mz, intensities, mz_window)); } return rank_map; }
// version for label-free linkers void XQuestResultXMLFile::writeXQuestXMLSpec(String out_file, String base_name, const std::vector< std::vector< OPXLDataStructs::CrossLinkSpectrumMatch > >& all_top_csms, const PeakMap& spectra) { // String spec_xml_filename = base_name + "_matched.spec.xml"; // XML Header std::ofstream spec_xml_file; std::cout << "Writing spec.xml to " << out_file << std::endl; spec_xml_file.open(out_file.c_str(), std::ios::trunc); // ios::app = append to file, ios::trunc = overwrites file // TODO write actual data spec_xml_file << "<?xml version=\"1.0\" encoding=\"UTF-8\"?><xquest_spectra compare_peaks_version=\"3.4\" date=\"Tue Nov 24 12:41:18 2015\" author=\"Thomas Walzthoeni,Oliver Rinner\" homepage=\"http://proteomics.ethz.ch\" resultdir=\"aleitner_M1012_004_matched\" deffile=\"xquest.def\" >" << std::endl; // collect indices of spectra, that need to be written out std::vector <Size> spectrum_indices; for (Size i = 0; i < all_top_csms.size(); ++i) { if (!all_top_csms[i].empty()) { if (all_top_csms[i][0].scan_index_light < spectra.size()) { spectrum_indices.push_back(all_top_csms[i][0].scan_index_light); } } } // loop over list of indices and write out spectra for (Size i = 0; i < spectrum_indices.size(); ++i) { String spectrum_light_name = base_name + ".light." + spectrum_indices[i]; String spectrum_heavy_name = base_name + ".heavy." + spectrum_indices[i]; String spectrum_name = spectrum_light_name + String("_") + spectrum_heavy_name; // 4 Spectra resulting from a light/heavy spectra pair. Write for each spectrum, that is written to xquest.xml (should be all considered pairs, or better only those with at least one sensible Hit, meaning a score was computed) spec_xml_file << "<spectrum filename=\"" << spectrum_light_name << ".dta" << "\" type=\"light\">" << std::endl; spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[spectrum_indices[i]], String("")); spec_xml_file << "</spectrum>" << std::endl; spec_xml_file << "<spectrum filename=\"" << spectrum_heavy_name << ".dta" << "\" type=\"heavy\">" << std::endl; spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[spectrum_indices[i]], String("")); spec_xml_file << "</spectrum>" << std::endl; String spectrum_common_name = spectrum_name + String("_common.txt"); spec_xml_file << "<spectrum filename=\"" << spectrum_common_name << "\" type=\"common\">" << std::endl; spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[spectrum_indices[i]], spectrum_light_name + ".dta," + spectrum_heavy_name + ".dta"); spec_xml_file << "</spectrum>" << std::endl; String spectrum_xlink_name = spectrum_name + String("_xlinker.txt"); spec_xml_file << "<spectrum filename=\"" << spectrum_xlink_name << "\" type=\"xlinker\">" << std::endl; spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[spectrum_indices[i]], spectrum_light_name + ".dta," + spectrum_heavy_name + ".dta"); spec_xml_file << "</spectrum>" << std::endl; } spec_xml_file << "</xquest_spectra>" << std::endl; spec_xml_file.close(); return; }
void getPrecursors_(const PeakMap & exp, vector<Precursor> & precursors, vector<double> & precursors_rt) { for (Size i = 0; i != exp.size(); ++i) { vector<Precursor> pcs = exp[i].getPrecursors(); if (pcs.empty()) { continue; } vector<double> pcs_rt(pcs.size(), exp[i].getRT()); copy(pcs.begin(), pcs.end(), back_inserter(precursors)); copy(pcs_rt.begin(), pcs_rt.end(), back_inserter(precursors_rt)); } }
void FeatureFinder::run(const String& algorithm_name, PeakMap& input_map, FeatureMap& features, const Param& param, const FeatureMap& seeds) { // Nothing to do if there is no data if ((algorithm_name != "mrm" && input_map.empty()) || (algorithm_name == "mrm" && input_map.getChromatograms().empty())) { features.clear(true); return; } // check input { // We need updated ranges => check number of peaks if (algorithm_name != "mrm" && input_map.getSize() == 0) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder needs updated ranges on input map. Aborting."); } // We need MS1 data only => check levels if (algorithm_name != "mrm" && (input_map.getMSLevels().size() != 1 || input_map.getMSLevels()[0] != 1)) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on MS level 1 data. Please do not use MS/MS data. Aborting."); } //Check if the peaks are sorted according to m/z if (!input_map.isSorted(true)) { LOG_WARN << "Input map is not sorted by RT and m/z! This is done now, before applying the algorithm!" << std::endl; input_map.sortSpectra(true); input_map.sortChromatograms(true); } for (Size s = 0; s < input_map.size(); ++s) { if (input_map[s].empty()) continue; if (input_map[s][0].getMZ() < 0) { throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "FeatureFinder can only operate on spectra that contain peaks with positive m/z values. Filter the data accordingly beforehand! Aborting."); } } } // initialize if (algorithm_name != "mrm" && algorithm_name != "centroided") { // Resize peak flag vector flags_.resize(input_map.size()); for (Size i = 0; i < input_map.size(); ++i) { flags_[i].assign(input_map[i].size(), UNUSED); } } // do the work if (algorithm_name != "none") { FeatureFinderAlgorithm* algorithm = Factory<FeatureFinderAlgorithm>::create(algorithm_name); algorithm->setParameters(param); algorithm->setData(input_map, features, *this); algorithm->setSeeds(seeds); algorithm->run(); delete(algorithm); } if (algorithm_name != "mrm") // mrm works on chromatograms; the next section is only for conventional data { //report RT apex spectrum index and native ID for each feature for (Size i = 0; i < features.size(); ++i) { //index Size spectrum_index = input_map.RTBegin(features[i].getRT()) - input_map.begin(); features[i].setMetaValue("spectrum_index", spectrum_index); //native id if (spectrum_index < input_map.size()) { String native_id = input_map[spectrum_index].getNativeID(); features[i].setMetaValue("spectrum_native_id", native_id); } else { /// @todo that happens sometimes using IsotopeWaveletFeatureFinder (Rene, Marc, Andreas, Clemens) std::cerr << "FeatureFinderAlgorithm_impl, line=" << __LINE__ << "; FixMe this cannot be, but happens" << std::endl; } } } }
// version for labeled linkers void XQuestResultXMLFile::writeXQuestXMLSpec(String out_file, String base_name, const OPXLDataStructs::PreprocessedPairSpectra& preprocessed_pair_spectra, const std::vector< std::pair<Size, Size> >& spectrum_pairs, const std::vector< std::vector< OPXLDataStructs::CrossLinkSpectrumMatch > >& all_top_csms, const PeakMap& spectra) { //String spec_xml_filename = base_name + "_matched.spec.xml"; // XML Header std::ofstream spec_xml_file; std::cout << "Writing spec.xml to " << out_file << std::endl; spec_xml_file.open(out_file.c_str(), std::ios::trunc); // ios::app = append to file, ios::trunc = overwrites file // TODO write actual data spec_xml_file << "<?xml version=\"1.0\" encoding=\"UTF-8\"?><xquest_spectra compare_peaks_version=\"3.4\" date=\"Tue Nov 24 12:41:18 2015\" author=\"Thomas Walzthoeni,Oliver Rinner\" homepage=\"http://proteomics.ethz.ch\" resultdir=\"aleitner_M1012_004_matched\" deffile=\"xquest.def\" >" << std::endl; // collect indices of spectra, that need to be written out std::vector <std::pair <Size, Size> > spectrum_indices; for (Size i = 0; i < all_top_csms.size(); ++i) { if (!all_top_csms[i].empty()) { if (all_top_csms[i][0].scan_index_light < spectra.size() && all_top_csms[i][0].scan_index_heavy < spectra.size()) { spectrum_indices.push_back( std::make_pair(all_top_csms[i][0].scan_index_light, all_top_csms[i][0].scan_index_heavy) ); } } } // loop over list of indices and write out spectra for (Size i = 0; i < spectrum_indices.size(); ++i) { Size scan_index_light = spectrum_indices[i].first; Size scan_index_heavy = spectrum_indices[i].second; // TODO more correct alternative String spectrum_light_name = base_name + ".light." + scan_index_light; String spectrum_heavy_name = base_name + ".heavy." + scan_index_heavy; String spectrum_name = spectrum_light_name + String("_") + spectrum_heavy_name; if (scan_index_light < spectra.size() && scan_index_heavy < spectra.size() && i < preprocessed_pair_spectra.spectra_common_peaks.size() && i < preprocessed_pair_spectra.spectra_xlink_peaks.size()) { // 4 Spectra resulting from a light/heavy spectra pair. Write for each spectrum, that is written to xquest.xml (should be all considered pairs, or better only those with at least one sensible Hit, meaning a score was computed) spec_xml_file << "<spectrum filename=\"" << spectrum_light_name << ".dta" << "\" type=\"light\">" << std::endl; spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[scan_index_light], String("")); spec_xml_file << "</spectrum>" << std::endl; spec_xml_file << "<spectrum filename=\"" << spectrum_heavy_name << ".dta" << "\" type=\"heavy\">" << std::endl; spec_xml_file << getxQuestBase64EncodedSpectrum_(spectra[scan_index_heavy], String("")); spec_xml_file << "</spectrum>" << std::endl; // the preprocessed pair spectra are sorted by another index // because some pairs do not yield any resonable hits, the index from the spectrum matches or spectrum_indices does not address the right pair anymore // use find with the pair of spectrum indices to find the correct index for the preprocessed common and cross-linked ion spectra std::vector<std::pair <Size, Size> >::const_iterator pair_it = std::find(spectrum_pairs.begin(), spectrum_pairs.end(), spectrum_indices[i]); Size pair_index = std::distance(spectrum_pairs.begin(), pair_it); String spectrum_common_name = spectrum_name + String("_common.txt"); spec_xml_file << "<spectrum filename=\"" << spectrum_common_name << "\" type=\"common\">" << std::endl; spec_xml_file << getxQuestBase64EncodedSpectrum_(preprocessed_pair_spectra.spectra_common_peaks[pair_index], spectrum_light_name + ".dta," + spectrum_heavy_name + ".dta"); spec_xml_file << "</spectrum>" << std::endl; String spectrum_xlink_name = spectrum_name + String("_xlinker.txt"); spec_xml_file << "<spectrum filename=\"" << spectrum_xlink_name << "\" type=\"xlinker\">" << std::endl; spec_xml_file << getxQuestBase64EncodedSpectrum_(preprocessed_pair_spectra.spectra_xlink_peaks[pair_index], spectrum_light_name + ".dta," + spectrum_heavy_name + ".dta"); spec_xml_file << "</spectrum>" << std::endl; } } spec_xml_file << "</xquest_spectra>" << std::endl; spec_xml_file.close(); return; }
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String in(getStringOption_("in")); String out(getStringOption_("out")); Size num_spots_per_row(getIntOption_("num_spots_per_row")); double RT_distance(getDoubleOption_("RT_distance")); //------------------------------------------------------------- // reading input //------------------------------------------------------------- PeakMap exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); //------------------------------------------------------------- // calculations //------------------------------------------------------------- ProgressLogger pl; pl.setLogType(log_type_); pl.startProgress(0, exp.size(), "Assigning pseudo RTs."); Size num_ms1(0), num_ms1_base(0), row_counter(0); bool row_to_reverse(false); double actual_RT(0); for (Size i = 0; i != exp.size(); ++i) { pl.setProgress(i); if (row_to_reverse) { actual_RT = (double)(num_ms1_base + (num_spots_per_row - row_counter)) * RT_distance; writeDebug_("RT=" + String(actual_RT) + " (modified, row_counter=" + String(row_counter) + ")", 1); } else { actual_RT = (double)num_ms1 * RT_distance; writeDebug_("RT=" + String(actual_RT), 1); } exp[i].setRT(actual_RT); if (exp[i].getMSLevel() == 1) { if (++row_counter >= num_spots_per_row) { row_counter = 0; if (row_to_reverse) { row_to_reverse = false; } else { row_to_reverse = true; } } ++num_ms1; if (!row_to_reverse) { num_ms1_base = num_ms1; } } } pl.endProgress(); // sort the spectra according to their new RT exp.sortSpectra(); //------------------------------------------------------------- // writing output //------------------------------------------------------------- f.store(out, exp); return EXECUTION_OK; }
/** * @brief Applies the peak-picking algorithm to a map (MSExperiment). This * method picks peaks for each scan in the map consecutively. The resulting * picked peaks are written to the output map. * * @param input input map in profile mode * @param output output map with picked peaks * @param boundaries_spec boundaries of the picked peaks in spectra * @param boundaries_chrom boundaries of the picked peaks in chromatograms * @param check_spectrum_type if set, checks spectrum type and throws an exception if a centroided spectrum is passed */ void PeakPickerHiRes::pickExperiment(const PeakMap& input, PeakMap& output, std::vector<std::vector<PeakBoundary> >& boundaries_spec, std::vector<std::vector<PeakBoundary> >& boundaries_chrom, const bool check_spectrum_type) const { // make sure that output is clear output.clear(true); // copy experimental settings static_cast<ExperimentalSettings &>(output) = input; // resize output with respect to input output.resize(input.size()); Size progress = 0; startProgress(0, input.size() + input.getChromatograms().size(), "picking peaks"); if (input.getNrSpectra() > 0) { for (Size scan_idx = 0; scan_idx != input.size(); ++scan_idx) { if (ms_levels_.empty()) // auto mode { SpectrumSettings::SpectrumType spectrum_type = input[scan_idx].getType(); if (spectrum_type == SpectrumSettings::CENTROID) { output[scan_idx] = input[scan_idx]; } else { std::vector<PeakBoundary> boundaries_s; // peak boundaries of a single spectrum pick(input[scan_idx], output[scan_idx], boundaries_s); boundaries_spec.push_back(boundaries_s); } } else if (!ListUtils::contains(ms_levels_, input[scan_idx].getMSLevel())) // manual mode { output[scan_idx] = input[scan_idx]; } else { std::vector<PeakBoundary> boundaries_s; // peak boundaries of a single spectrum // determine type of spectral data (profile or centroided) SpectrumSettings::SpectrumType spectrum_type = input[scan_idx].getType(); if (spectrum_type == SpectrumSettings::CENTROID && check_spectrum_type) { throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Centroided data provided but profile spectra expected."); } pick(input[scan_idx], output[scan_idx], boundaries_s); boundaries_spec.push_back(boundaries_s); } setProgress(++progress); } } for (Size i = 0; i < input.getChromatograms().size(); ++i) { MSChromatogram chromatogram; std::vector<PeakBoundary> boundaries_c; // peak boundaries of a single chromatogram pick(input.getChromatograms()[i], chromatogram, boundaries_c); output.addChromatogram(chromatogram); boundaries_chrom.push_back(boundaries_c); setProgress(++progress); } endProgress(); return; }
ExitCodes main_(int, const char **) override { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input/output files String in(getStringOption_("in")); String out(getStringOption_("out")); //------------------------------------------------------------- // loading input //------------------------------------------------------------- PeakMap exp; MzMLFile f; f.setLogType(log_type_); PeakFileOptions options; options.clearMSLevels(); options.addMSLevel(2); f.getOptions() = options; f.load(in, exp); writeDebug_("Data set contains " + String(exp.size()) + " spectra", 1); //------------------------------------------------------------- // calculations //------------------------------------------------------------- vector<PeptideIdentification> pep_ids; CompNovoIdentificationCID comp_novo_id; // set the options Param algorithm_param = getParam_().copy("algorithm:", true); comp_novo_id.setParameters(algorithm_param); comp_novo_id.getIdentifications(pep_ids, exp); algorithm_param = comp_novo_id.getParameters(); //------------------------------------------------------------- // writing output //------------------------------------------------------------- DateTime now = DateTime::now(); String date_string = now.get(); String identifier("CompNovoCID_" + date_string); for (vector<PeptideIdentification>::iterator it = pep_ids.begin(); it != pep_ids.end(); ++it) { it->assignRanks(); it->setIdentifier(identifier); } vector<ProteinIdentification> prot_ids; ProteinIdentification prot_id; prot_id.setIdentifier(identifier); prot_id.setDateTime(now); StringList ms_runs; exp.getPrimaryMSRunPath(ms_runs); prot_id.setPrimaryMSRunPath(ms_runs); ProteinIdentification::SearchParameters search_parameters; search_parameters.charges = "+2-+3"; if (algorithm_param.getValue("tryptic_only").toBool()) { search_parameters.digestion_enzyme = *(ProteaseDB::getInstance()->getEnzyme("Trypsin")); } else { search_parameters.digestion_enzyme = *(ProteaseDB::getInstance()->getEnzyme("no cleavage")); } search_parameters.mass_type = ProteinIdentification::MONOISOTOPIC; search_parameters.fixed_modifications = algorithm_param.getValue("fixed_modifications"); search_parameters.variable_modifications = algorithm_param.getValue("variable_modifications"); search_parameters.missed_cleavages = (UInt)algorithm_param.getValue("missed_cleavages"); search_parameters.fragment_mass_tolerance = (double)algorithm_param.getValue("fragment_mass_tolerance"); search_parameters.precursor_mass_tolerance = (double)algorithm_param.getValue("precursor_mass_tolerance"); search_parameters.fragment_mass_tolerance_ppm = false; search_parameters.precursor_mass_tolerance_ppm = false; prot_id.setSearchParameters(search_parameters); prot_id.setSearchEngineVersion("0.9beta"); prot_id.setSearchEngine("CompNovo"); prot_ids.push_back(prot_id); IdXMLFile().store(out, prot_ids, pep_ids); return EXECUTION_OK; }
// Wrong assignment of the mono-isotopic mass for precursors are assumed: // - if precursor_mz matches the mz of a non-monoisotopic feature mass trace // - and in the case that believe_charge is true: if feature_charge matches the precursor_charge // In the case of wrong mono-isotopic assignment several options for correction are available: // keep_original will create a copy of the precursor and tandem spectrum for the new mono-isotopic mass trace and retain the original one // all_matching_features does this not for only the closest feature but all features in a question set<Size> correctToNearestFeature(const FeatureMap& features, PeakMap & exp, double rt_tolerance_s = 0.0, double mz_tolerance = 0.0, bool ppm = true, bool believe_charge = false, bool keep_original = false, bool all_matching_features = false, int max_trace = 2) { set<Size> corrected_precursors; // for each precursor/MS2 find all features that are in the given tolerance window (bounding box + rt tolerances) // if believe_charge is set, only add features that match the precursor charge map<Size, set<Size> > scan_idx_to_feature_idx; for (Size scan = 0; scan != exp.size(); ++scan) { // skip non-tandem mass spectra if (exp[scan].getMSLevel() != 2 || exp[scan].getPrecursors().empty()) continue; // extract precusor / MS2 information const double pc_mz = exp[scan].getPrecursors()[0].getMZ(); const double rt = exp[scan].getRT(); const int pc_charge = exp[scan].getPrecursors()[0].getCharge(); for (Size f = 0; f != features.size(); ++f) { // feature is incompatible if believe_charge is set and charges don't match if (believe_charge && features[f].getCharge() != pc_charge) continue; // check if precursor/MS2 position overlap with feature if (overlaps_(features[f], rt, pc_mz, rt_tolerance_s)) { scan_idx_to_feature_idx[scan].insert(f); } } } // filter sets to retain compatible features: // if precursor_mz = feature_mz + n * feature_charge (+/- mz_tolerance) a feature is compatible, others are removed from the set for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it) { const Size scan = it->first; const double pc_mz = exp[scan].getPrecursors()[0].getMZ(); const double mz_tolerance_da = ppm ? pc_mz * mz_tolerance * 1e-6 : mz_tolerance; // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); ) { if (!compatible_(features[*sit], pc_mz, mz_tolerance_da, max_trace)) { it->second.erase(sit++); } else { ++sit; } } } // remove entries with no compatible features (empty sets). // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ) { if (it->second.empty()) { scan_idx_to_feature_idx.erase(it++); } else { ++it; } } if (debug_level_ > 0) { LOG_INFO << "Number of precursors with compatible features: " << scan_idx_to_feature_idx.size() << endl; } if (!all_matching_features) { // keep only nearest features in set for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it) { const Size scan = it->first; const double pc_rt = exp[scan].getRT(); double min_distance = 1e16; set<Size>::iterator best_feature = it->second.begin(); // determine nearest/best feature for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); ++sit) { const double current_distance = fabs(pc_rt - features[*sit].getRT()); if (current_distance < min_distance) { min_distance = current_distance; best_feature = sit; } } // delete all except the nearest/best feature // Note: This is the "delete while iterating" pattern so mind the pre- and postincrement for (set<Size>::iterator sit = it->second.begin(); sit != it->second.end(); ) { if (sit != best_feature) { it->second.erase(sit++); } else { ++sit; } } } } // depending on all_matching_features option, only the nearest or all features are contained in the sets // depending on options: move/copy corrected precursor and tandem spectrum if (keep_original) { // duplicate spectra for each feature in set and adapt precursor_mz and precursor_charge to feature_mz and feature_charge for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it) { const Size scan = it->first; MSSpectrum<> spectrum = exp[scan]; corrected_precursors.insert(scan); for (set<Size>::iterator f_it = it->second.begin(); f_it != it->second.end(); ++f_it) { spectrum.getPrecursors()[0].setMZ(features[*f_it].getMZ()); spectrum.getPrecursors()[0].setCharge(features[*f_it].getCharge()); exp.addSpectrum(spectrum); } } } else { // set precursor_mz and _charge to the feature_mz and _charge for (map<Size, set<Size> >::iterator it = scan_idx_to_feature_idx.begin(); it != scan_idx_to_feature_idx.end(); ++it) { const Size scan = it->first; exp[scan].getPrecursors()[0].setMZ(features[*it->second.begin()].getMZ()); exp[scan].getPrecursors()[0].setCharge(features[*it->second.begin()].getCharge()); corrected_precursors.insert(scan); } } return corrected_precursors; }
ExitCodes main_(int argc, const char** argv) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input/output files String in(getStringOption_("in")), out(getStringOption_("out")); FileHandler fh; FileTypes::Type in_type = fh.getType(in); //------------------------------------------------------------- // loading input //------------------------------------------------------------- PeakMap exp; // keep only MS2 spectra fh.getOptions().addMSLevel(2); fh.loadExperiment(in, exp, in_type, log_type_); writeDebug_(String("Spectra loaded: ") + exp.size(), 2); if (exp.getSpectra().empty()) { throw OpenMS::Exception::FileEmpty(__FILE__, __LINE__, __FUNCTION__, "Error: No MS2 spectra in input file."); } // determine type of spectral data (profile or centroided) SpectrumSettings::SpectrumType spectrum_type = exp[0].getType(); if (spectrum_type == SpectrumSettings::RAWDATA) { if (!getFlag_("force")) { throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Profile data provided but centroided MS2 spectra expected. To enforce processing of the data set the -force flag."); } } //------------------------------------------------------------- // calculations //------------------------------------------------------------- Param mascot_param = getParam_().copy("Mascot_parameters:", true); MascotGenericFile mgf_file; Param p; // TODO: switch this to mzML (much smaller) p.setValue("internal:format", "Mascot generic", "Sets the format type of the peak list, this should not be changed unless you write the header only.", ListUtils::create<String>("advanced")); p.setValue("internal:HTTP_format", "true", "Write header with MIME boundaries instead of simple key-value pairs. For HTTP submission only.", ListUtils::create<String>("advanced")); p.setValue("internal:content", "all", "Use parameter header + the peak lists with BEGIN IONS... or only one of them.", ListUtils::create<String>("advanced")); mgf_file.setParameters(mascot_param); // get the spectra into string stream writeDebug_("Writing MGF file to stream", 1); stringstream ss; mgf_file.store(ss, in, exp, true); // write in compact format // Usage of a QCoreApplication is overkill here (and ugly too), but we just use the // QEventLoop to process the signals and slots and grab the results afterwards from // the MascotRemotQuery instance char** argv2 = const_cast<char**>(argv); QCoreApplication event_loop(argc, argv2); MascotRemoteQuery* mascot_query = new MascotRemoteQuery(&event_loop); Param mascot_query_param = getParam_().copy("Mascot_server:", true); writeDebug_("Setting parameters for Mascot query", 1); mascot_query->setParameters(mascot_query_param); writeDebug_("Setting spectra for Mascot query", 1); mascot_query->setQuerySpectra(ss.str()); // remove unnecessary spectra ss.clear(); QObject::connect(mascot_query, SIGNAL(done()), &event_loop, SLOT(quit())); QTimer::singleShot(1000, mascot_query, SLOT(run())); writeDebug_("Fire off Mascot query", 1); event_loop.exec(); writeDebug_("Mascot query finished", 1); if (mascot_query->hasError()) { writeLog_("An error occurred during the query: " + mascot_query->getErrorMessage()); delete mascot_query; return EXTERNAL_PROGRAM_ERROR; } // write Mascot response to file String mascot_tmp_file_name(File::getTempDirectory() + "/" + File::getUniqueName() + "_Mascot_response"); QFile mascot_tmp_file(mascot_tmp_file_name.c_str()); mascot_tmp_file.open(QIODevice::WriteOnly); mascot_tmp_file.write(mascot_query->getMascotXMLResponse()); mascot_tmp_file.close(); // clean up delete mascot_query; vector<PeptideIdentification> pep_ids; ProteinIdentification prot_id; // set up mapping between scan numbers and retention times: MascotXMLFile::RTMapping rt_mapping; MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping); // read the response MascotXMLFile().load(mascot_tmp_file_name, prot_id, pep_ids, rt_mapping); writeDebug_("Read " + String(pep_ids.size()) + " peptide ids and " + String(prot_id.getHits().size()) + " protein identifications from Mascot", 5); // for debugging errors relating to unexpected response files if (this->debug_level_ >= 100) { writeDebug_(String("\nMascot Server Response file saved to: '") + mascot_tmp_file_name + "'. If an error occurs, send this file to the OpenMS team.\n", 100); } else { // delete file mascot_tmp_file.remove(); } // keep or delete protein identifications?! vector<ProteinIdentification> prot_ids; if (!getFlag_("keep_protein_links")) { // remove protein links from peptides for (Size i = 0; i < pep_ids.size(); ++i) { std::vector<PeptideHit> hits = pep_ids[i].getHits(); for (Size h = 0; h < hits.size(); ++h) { hits[h].setPeptideEvidences(vector<PeptideEvidence>()); } pep_ids[i].setHits(hits); } // remove proteins std::vector<ProteinHit> p_hit; prot_id.setHits(p_hit); } prot_ids.push_back(prot_id); //------------------------------------------------------------- // writing output //------------------------------------------------------------- IdXMLFile().store(out, prot_ids, pep_ids); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // general variables and data //------------------------------------------------------------- FileHandler fh; vector<PeptideIdentification> peptide_identifications; vector<ProteinIdentification> protein_identifications; //------------------------------------------------------------- // reading input //------------------------------------------------------------- const String in = getStringOption_("in"); ProgressLogger logger; logger.setLogType(ProgressLogger::CMD); logger.startProgress(0, 1, "Loading..."); if (File::isDirectory(in)) { const String in_directory = File::absolutePath(in).ensureLastChar('/'); const String mz_file = getStringOption_("mz_file"); const bool ignore_proteins_per_peptide = getFlag_("ignore_proteins_per_peptide"); UInt i = 0; FileHandler fh; FileTypes::Type type; MSExperiment<Peak1D> msexperiment; // Note: we had issues with leading zeroes, so let us represent scan numbers as Int (next line used to be map<String, float> num_and_rt;) However, now String::toInt() might throw. map<Int, float> num_and_rt; vector<String> NativeID; // The mz-File (if given) if (!mz_file.empty()) { type = fh.getTypeByFileName(mz_file); fh.loadExperiment(mz_file, msexperiment, type); for (MSExperiment<Peak1D>::Iterator spectra_it = msexperiment.begin(); spectra_it != msexperiment.end(); ++spectra_it) { String(spectra_it->getNativeID()).split('=', NativeID); try { num_and_rt[NativeID[1].toInt()] = spectra_it->getRT(); // cout << "num_and_rt: " << NativeID[1] << " = " << NativeID[1].toInt() << " : " << num_and_rt[NativeID[1].toInt()] << endl; // CG debuggging 2009-07-01 } catch (Exception::ConversionError& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage()); } } } // Get list of the actual Sequest .out-Files StringList in_files; if (!File::fileList(in_directory, String("*.out"), in_files)) { writeLog_(String("Error: No .out files found in '") + in_directory + "'. Aborting!"); } // Now get to work ... for (vector<String>::const_iterator in_files_it = in_files.begin(); in_files_it != in_files.end(); ++in_files_it) { vector<PeptideIdentification> peptide_ids_seq; ProteinIdentification protein_id_seq; vector<double> pvalues_seq; vector<String> in_file_vec; SequestOutfile sequest_outfile; writeDebug_(String("Reading file ") + *in_files_it, 3); try { sequest_outfile.load((String) (in_directory + *in_files_it), peptide_ids_seq, protein_id_seq, 1.0, pvalues_seq, "Sequest", ignore_proteins_per_peptide); in_files_it->split('.', in_file_vec); for (Size j = 0; j < peptide_ids_seq.size(); ++j) { // We have to explicitly set the identifiers, because the normal set ones are composed of search engine name and date, which is the same for a bunch of sequest out-files. peptide_ids_seq[j].setIdentifier(*in_files_it + "_" + i); Int scan_number = 0; if (!mz_file.empty()) { try { scan_number = in_file_vec[2].toInt(); peptide_ids_seq[j].setRT(num_and_rt[scan_number]); } catch (Exception::ConversionError& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.getMessage()); } catch (exception& e) { writeLog_(String("Error: Cannot read scan number as integer. '") + e.what()); } //double real_mz = ( peptide_ids_seq[j].getMZ() - hydrogen_mass )/ (double)peptide_ids_seq[j].getHits()[0].getCharge(); // ???? semantics of mz const double real_mz = peptide_ids_seq[j].getMZ() / (double) peptide_ids_seq[j].getHits()[0].getCharge(); peptide_ids_seq[j].setMZ(real_mz); } writeDebug_(String("scan: ") + String(scan_number) + String(" RT: ") + String(peptide_ids_seq[j].getRT()) + " MZ: " + String(peptide_ids_seq[j].getMZ()) + " Ident: " + peptide_ids_seq[j].getIdentifier(), 4); peptide_identifications.push_back(peptide_ids_seq[j]); } protein_id_seq.setIdentifier(*in_files_it + "_" + i); protein_identifications.push_back(protein_id_seq); ++i; } catch (Exception::ParseError& pe) { writeLog_(pe.getMessage() + String("(file: ") + *in_files_it + ")"); throw; } catch (...) { writeLog_(String("Error reading file: ") + *in_files_it); throw; } } writeDebug_("All files processed.", 3); } // ! directory else { FileTypes::Type in_type = fh.getType(in); if (in_type == FileTypes::PEPXML) { String exp_name = getStringOption_("mz_file"); String orig_name = getStringOption_("mz_name"); bool use_precursor_data = getFlag_("use_precursor_data"); if (exp_name.empty()) { PepXMLFile().load(in, protein_identifications, peptide_identifications, orig_name); } else { MSExperiment<> exp; fh.loadExperiment(exp_name, exp); if (!orig_name.empty()) { exp_name = orig_name; } PepXMLFile().load(in, protein_identifications, peptide_identifications, exp_name, exp, use_precursor_data); } } else if (in_type == FileTypes::IDXML) { IdXMLFile().load(in, protein_identifications, peptide_identifications); } else if (in_type == FileTypes::MZIDENTML) { LOG_WARN << "Converting from mzid: you might experience loss of information depending on the capabilities of the target format." << endl; MzIdentMLFile().load(in, protein_identifications, peptide_identifications); } else if (in_type == FileTypes::PROTXML) { protein_identifications.resize(1); peptide_identifications.resize(1); ProtXMLFile().load(in, protein_identifications[0], peptide_identifications[0]); } else if (in_type == FileTypes::OMSSAXML) { protein_identifications.resize(1); OMSSAXMLFile().load(in, protein_identifications[0], peptide_identifications, true); } else if (in_type == FileTypes::MASCOTXML) { String scan_regex = getStringOption_("scan_regex"); String exp_name = getStringOption_("mz_file"); MascotXMLFile::RTMapping rt_mapping; if (!exp_name.empty()) { PeakMap exp; // load only MS2 spectra: fh.getOptions().addMSLevel(2); fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_); MascotXMLFile::generateRTMapping(exp.begin(), exp.end(), rt_mapping); } protein_identifications.resize(1); MascotXMLFile().load(in, protein_identifications[0], peptide_identifications, rt_mapping, scan_regex); } else if (in_type == FileTypes::XML) { ProteinIdentification protein_id; XTandemXMLFile().load(in, protein_id, peptide_identifications); protein_id.setSearchEngineVersion(""); protein_id.setSearchEngine("XTandem"); protein_identifications.push_back(protein_id); String exp_name = getStringOption_("mz_file"); if (!exp_name.empty()) { PeakMap exp; fh.getOptions().addMSLevel(2); fh.loadExperiment(exp_name, exp, FileTypes::MZML, log_type_); for (vector<PeptideIdentification>::iterator it = peptide_identifications.begin(); it != peptide_identifications.end(); ++it) { UInt id = (Int)it->getMetaValue("spectrum_id"); --id; // native IDs were written 1-based if (id < exp.size()) { it->setRT(exp[id].getRT()); double pre_mz(0.0); if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ(); it->setMZ(pre_mz); it->removeMetaValue("spectrum_id"); } else { LOG_ERROR << "XTandem xml: Error: id '" << id << "' not found in peak map!" << endl; } } } } else { writeLog_("Unknown input file type given. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } } logger.endProgress(); //------------------------------------------------------------- // writing output //------------------------------------------------------------- const String out = getStringOption_("out"); FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type")); if (out_type == FileTypes::UNKNOWN) { out_type = fh.getTypeByFileName(out); } if (out_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine output file type!"); return PARSE_ERROR; } logger.startProgress(0, 1, "Storing..."); if (out_type == FileTypes::PEPXML) { bool peptideprophet_analyzed = getFlag_("peptideprophet_analyzed"); String mz_file = getStringOption_("mz_file"); String mz_name = getStringOption_("mz_name"); PepXMLFile().store(out, protein_identifications, peptide_identifications, mz_file, mz_name, peptideprophet_analyzed); } else if (out_type == FileTypes::IDXML) { IdXMLFile().store(out, protein_identifications, peptide_identifications); } else if (out_type == FileTypes::MZIDENTML) { MzIdentMLFile().store(out, protein_identifications, peptide_identifications); } else if (out_type == FileTypes::FASTA) { Size count = 0; ofstream fasta(out.c_str(), ios::out); for (Size i = 0; i < peptide_identifications.size(); ++i) { for (Size l = 0; l < peptide_identifications[i].getHits().size(); ++l) { const PeptideHit& hit = peptide_identifications[i].getHits()[l]; fasta << ">" << hit.getSequence().toUnmodifiedString() << "|" << count++ << "|" << hit.getSequence().toString() << endl; String seq = hit.getSequence().toUnmodifiedString(); // FASTA files should have at most 60 characters of sequence info per line for (Size j = 0; j < seq.size(); j += 60) { Size k = min(j + 60, seq.size()); fasta << string(seq[j], seq[k]) << endl; } } } } else { writeLog_("Unsupported output file type given. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } logger.endProgress(); return EXECUTION_OK; }
TOLERANCE_ABSOLUTE(0.01) MzDataFile file; PeakMap e; // real test file.load(OPENMS_GET_TEST_DATA_PATH("MzDataFile_1.mzData"), e); //test DocumentIdentifier addition TEST_STRING_EQUAL(e.getLoadedFilePath(), OPENMS_GET_TEST_DATA_PATH("MzDataFile_1.mzData")); TEST_STRING_EQUAL(FileTypes::typeToName(e.getLoadedFileType()), "mzData"); //--------------------------------------------------------------------------- // ms-level, RT, native ID //--------------------------------------------------------------------------- TEST_EQUAL(e.size(), 3) TEST_EQUAL(e[0].getMSLevel(), 1) TEST_EQUAL(e[1].getMSLevel(), 2) TEST_EQUAL(e[2].getMSLevel(), 1) TEST_REAL_SIMILAR(e[0].getRT(), 60) TEST_REAL_SIMILAR(e[1].getRT(), 120) TEST_REAL_SIMILAR(e[2].getRT(), 180) TEST_STRING_EQUAL(e[0].getNativeID(), "spectrum=10") TEST_STRING_EQUAL(e[1].getNativeID(), "spectrum=11") TEST_STRING_EQUAL(e[2].getNativeID(), "spectrum=12") TEST_EQUAL(e[0].getType(), SpectrumSettings::UNKNOWN) //--------------------------------------------------------------------------- //meta data array meta data //--------------------------------------------------------------------------- TEST_EQUAL(e[0].getFloatDataArrays()[0].getMetaValue("URL"), "www.open-ms.de")
TOLERANCE_ABSOLUTE(0.01) PeakMap e; DTA2DFile file; //test exception TEST_EXCEPTION( Exception::FileNotFound , file.load("dummy/dummy.dta2d",e) ) // real test file.load(OPENMS_GET_TEST_DATA_PATH("DTA2DFile_test_1.dta2d"),e); //test DocumentIdentifier addition TEST_STRING_EQUAL(e.getLoadedFilePath(), OPENMS_GET_TEST_DATA_PATH("DTA2DFile_test_1.dta2d")); TEST_STRING_EQUAL(FileTypes::typeToName(e.getLoadedFileType()),"dta2d"); TEST_EQUAL(e.size(), 9); ABORT_IF(e.size() != 9) TEST_STRING_EQUAL(e[0].getNativeID(),"index=0") TEST_STRING_EQUAL(e[1].getNativeID(),"index=1") TEST_STRING_EQUAL(e[2].getNativeID(),"index=2") TEST_STRING_EQUAL(e[3].getNativeID(),"index=3") TEST_STRING_EQUAL(e[4].getNativeID(),"index=4") TEST_STRING_EQUAL(e[5].getNativeID(),"index=5") TEST_STRING_EQUAL(e[6].getNativeID(),"index=6") TEST_STRING_EQUAL(e[7].getNativeID(),"index=7") TEST_STRING_EQUAL(e[8].getNativeID(),"index=8") PeakMap::const_iterator it(e.begin()); TEST_REAL_SIMILAR((*it)[0].getPosition()[0], 230.02) TEST_REAL_SIMILAR(it->getRT(), 4711.1)
chrom2.setChromatogramType(ChromatogramSettings::SELECTED_REACTION_MONITORING_CHROMATOGRAM); ChromatogramPeak peak1, peak2, peak3; peak1.setRT(0.1); peak2.setRT(0.2); peak3.setRT(0.3); chrom1.push_back(peak1); chrom1.push_back(peak2); chrom2.push_back(peak2); chrom2.push_back(peak2); exp.addChromatogram(chrom1); exp.addChromatogram(chrom2); TEST_EQUAL(exp.size(), 0) TEST_EQUAL(exp.getChromatograms().size(), 2) ChromatogramTools().convertChromatogramsToSpectra(exp); TEST_EQUAL(exp.size(), 4) TEST_EQUAL(exp.getChromatograms().size(), 0) TEST_REAL_SIMILAR(exp[0][0].getMZ(), 200.1) TEST_EQUAL(exp[0].getPrecursors().size(), 1) TEST_REAL_SIMILAR(exp[0].getPrecursors().begin()->getMZ(), 100.1) } END_SECTION START_SECTION(template <typename ExperimentType> void convertSpectraToChromatograms(ExperimentType& exp, bool remove_spectra = false)) {
ExitCodes main_(int, const char**) { // instance specific location of settings in INI file (e.g. 'TOPP_Skeleton:1:') String ini_location; // path to the log file String logfile(getStringOption_("log")); String xtandem_executable(getStringOption_("xtandem_executable")); String inputfile_name; String outputfile_name; //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- inputfile_name = getStringOption_("in"); writeDebug_(String("Input file: ") + inputfile_name, 1); if (inputfile_name == "") { writeLog_("No input file specified. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } outputfile_name = getStringOption_("out"); writeDebug_(String("Output file: ") + outputfile_name, 1); if (outputfile_name == "") { writeLog_("No output file specified. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } // write input xml file String temp_directory = QDir::toNativeSeparators((File::getTempDirectory() + "/" + File::getUniqueName() + "/").toQString()); // body for the tmp files { QDir d; d.mkpath(temp_directory.toQString()); } String input_filename(temp_directory + "_tandem_input_file.xml"); String tandem_input_filename(temp_directory + "_tandem_input_file.mzData"); String tandem_output_filename(temp_directory + "_tandem_output_file.xml"); String tandem_taxonomy_filename(temp_directory + "_tandem_taxonomy_file.xml"); //------------------------------------------------------------- // Validate user parameters //------------------------------------------------------------- if (getIntOption_("min_precursor_charge") > getIntOption_("max_precursor_charge")) { LOG_ERROR << "Given charge range is invalid: max_precursor_charge needs to be >= min_precursor_charge." << std::endl; return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // reading input //------------------------------------------------------------- String db_name(getStringOption_("database")); if (!File::readable(db_name)) { String full_db_name; try { full_db_name = File::findDatabase(db_name); } catch (...) { printUsage_(); return ILLEGAL_PARAMETERS; } db_name = full_db_name; } PeakMap exp; MzMLFile mzml_file; mzml_file.getOptions().addMSLevel(2); // only load msLevel 2 mzml_file.setLogType(log_type_); mzml_file.load(inputfile_name, exp); if (exp.getSpectra().empty()) { throw OpenMS::Exception::FileEmpty(__FILE__, __LINE__, __FUNCTION__, "Error: No MS2 spectra in input file."); } // determine type of spectral data (profile or centroided) SpectrumSettings::SpectrumType spectrum_type = exp[0].getType(); if (spectrum_type == SpectrumSettings::RAWDATA) { if (!getFlag_("force")) { throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Profile data provided but centroided MS2 spectra expected. To enforce processing of the data set the -force flag."); } } // we need to replace the native id with a simple numbering schema, to be able to // map the IDs back to the spectra (RT, and MZ information) Size native_id(0); for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { it->setNativeID(++native_id); } // We store the file in mzData file format, because MGF files somehow produce in most // of the cases IDs with charge 2+. We do not use the input file directly // because XTandem sometimes stumbles over misleading substrings in the filename, // e.g. mzXML ... MzDataFile mzdata_outfile; mzdata_outfile.store(tandem_input_filename, exp); XTandemInfile infile; infile.setInputFilename(tandem_input_filename); infile.setOutputFilename(tandem_output_filename); ofstream tax_out(tandem_taxonomy_filename.c_str()); tax_out << "<?xml version=\"1.0\"?>" << "\n"; tax_out << "\t<bioml label=\"x! taxon-to-file matching list\">" << "\n"; tax_out << "\t\t<taxon label=\"OpenMS_dummy_taxonomy\">" << "\n"; tax_out << "\t\t\t<file format=\"peptide\" URL=\"" << db_name << "\" />" << "\n"; tax_out << "\t</taxon>" << "\n"; tax_out << "</bioml>" << "\n"; tax_out.close(); infile.setTaxonomyFilename(tandem_taxonomy_filename); if (getStringOption_("precursor_error_units") == "Da") { infile.setPrecursorMassErrorUnit(XTandemInfile::DALTONS); } else { infile.setPrecursorMassErrorUnit(XTandemInfile::PPM); } if (getStringOption_("fragment_error_units") == "Da") { infile.setFragmentMassErrorUnit(XTandemInfile::DALTONS); } else { infile.setFragmentMassErrorUnit(XTandemInfile::PPM); } if (getStringOption_("default_input_file") != "") { infile.load(getStringOption_("default_input_file")); infile.setDefaultParametersFilename(getStringOption_("default_input_file")); } else { String default_file = File::find("CHEMISTRY/XTandem_default_input.xml"); infile.load(default_file); infile.setDefaultParametersFilename(default_file); } infile.setPrecursorMassTolerancePlus(getDoubleOption_("precursor_mass_tolerance")); infile.setPrecursorMassToleranceMinus(getDoubleOption_("precursor_mass_tolerance")); infile.setFragmentMassTolerance(getDoubleOption_("fragment_mass_tolerance")); infile.setMaxPrecursorCharge(getIntOption_("max_precursor_charge")); infile.setNumberOfThreads(getIntOption_("threads")); infile.setModifications(ModificationDefinitionsSet(getStringList_("fixed_modifications"), getStringList_("variable_modifications"))); infile.setTaxon("OpenMS_dummy_taxonomy"); infile.setOutputResults(getStringOption_("output_results")); infile.setMaxValidEValue(getDoubleOption_("max_valid_expect")); infile.setCleavageSite(getStringOption_("cleavage_site")); infile.setNumberOfMissedCleavages(getIntOption_("missed_cleavages")); infile.setRefine(getFlag_("refinement")); infile.setSemiCleavage(getFlag_("semi_cleavage")); bool allow_isotope_error = getStringOption_("allow_isotope_error") == "yes" ? true : false; infile.setAllowIsotopeError(allow_isotope_error); infile.write(input_filename); //------------------------------------------------------------- // calculations //------------------------------------------------------------- int status = QProcess::execute(xtandem_executable.toQString(), QStringList(input_filename.toQString())); // does automatic escaping etc... if (status != 0) { writeLog_("XTandem problem. Aborting! Calling command was: '" + xtandem_executable + " \"" + input_filename + "\"'.\nDoes the !XTandem executable exist?"); // clean temporary files if (this->debug_level_ < 2) { File::removeDirRecursively(temp_directory); LOG_WARN << "Set debug level to >=2 to keep the temporary files at '" << temp_directory << "'" << std::endl; } else { LOG_WARN << "Keeping the temporary files at '" << temp_directory << "'. Set debug level to <2 to remove them." << std::endl; } return EXTERNAL_PROGRAM_ERROR; } vector<ProteinIdentification> protein_ids; ProteinIdentification protein_id; vector<PeptideIdentification> peptide_ids; // read the output of X!Tandem and write it to idXML XTandemXMLFile tandem_output; tandem_output.setModificationDefinitionsSet(ModificationDefinitionsSet(getStringList_("fixed_modifications"), getStringList_("variable_modifications"))); // find the file, because XTandem extends the filename with a timestamp we do not know (exactly) StringList files; File::fileList(temp_directory, "_tandem_output_file*.xml", files); if (files.size() != 1) { throw Exception::FileNotFound(__FILE__, __LINE__, __PRETTY_FUNCTION__, tandem_output_filename); } tandem_output.load(temp_directory + files[0], protein_id, peptide_ids); // now put the RTs into the peptide_ids from the spectrum ids for (vector<PeptideIdentification>::iterator it = peptide_ids.begin(); it != peptide_ids.end(); ++it) { UInt id = (Int)it->getMetaValue("spectrum_id"); --id; // native IDs were written 1-based if (id < exp.size()) { it->setRT(exp[id].getRT()); double pre_mz(0.0); if (!exp[id].getPrecursors().empty()) pre_mz = exp[id].getPrecursors()[0].getMZ(); it->setMZ(pre_mz); //it->removeMetaValue("spectrum_id"); } else { LOG_ERROR << "XTandemAdapter: Error: id '" << id << "' not found in peak map!" << endl; } } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // handle the search parameters ProteinIdentification::SearchParameters search_parameters; search_parameters.db = getStringOption_("database"); search_parameters.charges = "+" + String(getIntOption_("min_precursor_charge")) + "-+" + String(getIntOption_("max_precursor_charge")); ProteinIdentification::PeakMassType mass_type = ProteinIdentification::MONOISOTOPIC; search_parameters.mass_type = mass_type; search_parameters.fixed_modifications = getStringList_("fixed_modifications"); search_parameters.variable_modifications = getStringList_("variable_modifications"); search_parameters.missed_cleavages = getIntOption_("missed_cleavages"); search_parameters.peak_mass_tolerance = getDoubleOption_("fragment_mass_tolerance"); search_parameters.precursor_tolerance = getDoubleOption_("precursor_mass_tolerance"); protein_id.setSearchParameters(search_parameters); protein_id.setSearchEngineVersion(""); protein_id.setSearchEngine("XTandem"); protein_ids.push_back(protein_id); IdXMLFile().store(outputfile_name, protein_ids, peptide_ids); /// Deletion of temporary files if (this->debug_level_ < 2) { File::removeDirRecursively(temp_directory); LOG_WARN << "Set debug level to >=2 to keep the temporary files at '" << temp_directory << "'" << std::endl; } else { LOG_WARN << "Keeping the temporary files at '" << temp_directory << "'. Set debug level to <2 to remove them." << std::endl; } // some stats LOG_INFO << "Statistics:\n" << " identified MS2 spectra: " << peptide_ids.size() << " / " << exp.size() << " = " << int(peptide_ids.size() * 100.0 / exp.size()) << "% (with e-value < " << String(getDoubleOption_("max_valid_expect")) << ")" << std::endl; return EXECUTION_OK; }
START_SECTION((SpectraMerger(const SpectraMerger& source))) SpectraMerger copy(*e_ptr); TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) END_SECTION START_SECTION((SpectraMerger& operator=(const SpectraMerger& source))) SpectraMerger copy; copy = *e_ptr; TEST_EQUAL(copy.getParameters(), e_ptr->getParameters()) END_SECTION START_SECTION((template < typename MapType > void mergeSpectraBlockWise(MapType &exp))) PeakMap exp, exp2; MzMLFile().load(OPENMS_GET_TEST_DATA_PATH("SpectraMerger_input_2.mzML"), exp); TEST_EQUAL(exp.size(), 144) exp2 = exp; SpectraMerger merger; Param p; p.setValue("block_method:rt_block_size", 5); p.setValue("block_method:ms_levels", IntList::create(StringList::create("1"))); merger.setParameters(p); merger.mergeSpectraBlockWise(exp); TEST_EQUAL(exp.size(), 130); exp=exp2; p.setValue("block_method:rt_block_size", 4); p.setValue("block_method:ms_levels", IntList::create(StringList::create("2"))); merger.setParameters(p);
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- in = getStringOption_("in"); out = getStringOption_("out"); String process_option = getStringOption_("processOption"); Param filter_param = getParam_().copy("algorithm:", true); writeDebug_("Parameters passed to filter", filter_param, 3); SavitzkyGolayFilter sgolay; sgolay.setLogType(log_type_); sgolay.setParameters(filter_param); if (process_option == "lowmemory") { return doLowMemAlgorithm(sgolay); } //------------------------------------------------------------- // loading input //------------------------------------------------------------- MzMLFile mz_data_file; mz_data_file.setLogType(log_type_); PeakMap exp; mz_data_file.load(in, exp); if (exp.empty() && exp.getChromatograms().size() == 0) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry."; return INCOMPATIBLE_INPUT_DATA; } //check for peak type (profile data required) if (!exp.empty() && PeakTypeEstimator().estimateType(exp[0].begin(), exp[0].end()) == SpectrumSettings::PEAKS) { writeLog_("Warning: OpenMS peak type estimation indicates that this is not profile data!"); } //check if spectra are sorted for (Size i = 0; i < exp.size(); ++i) { if (!exp[i].isSorted()) { writeLog_("Error: Not all spectra are sorted according to peak m/z positions. Use FileFilter to sort the input!"); return INCOMPATIBLE_INPUT_DATA; } } //check if chromatograms are sorted for (Size i = 0; i < exp.getChromatograms().size(); ++i) { if (!exp.getChromatogram(i).isSorted()) { writeLog_("Error: Not all chromatograms are sorted according to peak m/z positions. Use FileFilter to sort the input!"); return INCOMPATIBLE_INPUT_DATA; } } //------------------------------------------------------------- // calculations //------------------------------------------------------------- sgolay.filterExperiment(exp); //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing::SMOOTHING)); mz_data_file.store(out, exp); return EXECUTION_OK; }
ExitCodes main_(int , const char**) override { // path to the log file String logfile(getStringOption_("log")); String pepnovo_executable(getStringOption_("pepnovo_executable")); PeakMap exp; String inputfile_name = getStringOption_("in"); writeDebug_(String("Input file: ") + inputfile_name, 1); String outputfile_name = getStringOption_("out"); writeDebug_(String("Output file: ") + outputfile_name, 1); String model_directory = getStringOption_("model_directory"); writeDebug_(String("model directory: ") + model_directory, 1); String model_name = getStringOption_("model"); writeDebug_(String("model directory: ") + model_name, 1); double fragment_tolerance = getDoubleOption_("fragment_tolerance"); if (fragment_tolerance!=-1.0 && (fragment_tolerance<0 || fragment_tolerance>0.75)) { writeLog_("Invalid fragment tolerance"); printUsage_(); return ILLEGAL_PARAMETERS; } double pm_tolerance = getDoubleOption_("pm_tolerance"); if (pm_tolerance!=-1.0 && (pm_tolerance<0.0 || pm_tolerance>5.0)) { writeLog_("Invalid fragment tolerance"); printUsage_(); return ILLEGAL_PARAMETERS; } Int tag_length = getIntOption_("tag_length"); if ( tag_length!=-1 && (tag_length<3 || tag_length>6)) { writeLog_("Invalid fragment tolerance"); printUsage_(); return ILLEGAL_PARAMETERS; } String digest = getStringOption_("digest"); Size num_solutions=getIntOption_("num_solutions"); //------------------------------------------------------------- // reading input //------------------------------------------------------------- // only load msLevel 2 MzMLFile mzml_infile; mzml_infile.getOptions().addMSLevel(2); mzml_infile.setLogType(log_type_); mzml_infile.load(inputfile_name, exp); // we map the native id to the MZ and RT to be able to // map the IDs back to the spectra (RT, and MZ Meta Information) PepNovoOutfile::IndexPosMappingType index_to_precursor; for (Size i = 0; i < exp.size(); ++i) { index_to_precursor[i]= make_pair(exp[i].getRT(), exp[i].getPrecursors()[0].getPosition()[0]); //set entry <RT, MZ> } logfile = getStringOption_("log"); QDir qdir_models_source(model_directory.c_str()); if (!qdir_models_source.exists()) { writeLog_("The model directory does not exist"); return INPUT_FILE_NOT_FOUND; } // create temp directory QDir qdir_temp(File::getTempDirectory().toQString()); String temp_data_directory = File::getUniqueName(); qdir_temp.mkdir(temp_data_directory.toQString()); qdir_temp.cd(temp_data_directory.toQString()); temp_data_directory = File::getTempDirectory() + "/" + temp_data_directory; // delete later String mgf_file = temp_data_directory + "/" + File::getUniqueName() + ".mgf"; // the mzXML parser of PepNovo is somewhat broken.. don't use mzXML MascotGenericFile().store(mgf_file, exp); bool error(false); try { //temporary File to store PepNovo output String temp_pepnovo_outfile = qdir_temp.absoluteFilePath("tmp_pepnovo_out.txt"); String tmp_models_dir = qdir_temp.absoluteFilePath("Models"); std::map<String, String>mods_and_keys; //, key_to_id; if (qdir_temp.cd("Models")) { writeLog_("The temporary directory already contains \"Model\" Folder. Please delete it and re-run. Aborting!"); return CANNOT_WRITE_OUTPUT_FILE; } else { qdir_temp.mkdir("Models"); qdir_temp.cd("Models"); } //copy the Models folder of OpenMS into the temp_data_directory QStringList pepnovo_files = qdir_models_source.entryList(QDir::Dirs | QDir::Files|QDir::NoDotAndDotDot); if (pepnovo_files.empty()) { writeLog_("The \"Model\" directory does not contain model files. Aborting!"); return INPUT_FILE_NOT_FOUND; } for (QStringList::ConstIterator file_it=pepnovo_files.begin(); file_it!=pepnovo_files.end(); ++file_it) { if (qdir_models_source.cd(*file_it)) { qdir_temp.mkdir(*file_it); qdir_temp.cd(*file_it); QStringList subdir_files = qdir_models_source.entryList(QDir::Dirs | QDir::Files|QDir::NoDotAndDotDot); for (QStringList::ConstIterator subdir_file_it=subdir_files.begin(); subdir_file_it!=subdir_files.end(); ++subdir_file_it) { QFile::copy(qdir_models_source.filePath(*subdir_file_it), qdir_temp.filePath(*subdir_file_it)); } qdir_temp.cdUp(); qdir_models_source.cdUp(); } else { QFile::copy(qdir_models_source.filePath(*file_it), qdir_temp.filePath(*file_it)); } } //generate PTM File and store in temp directory PepNovoInfile p_novo_infile; String ptm_command; if (!getStringList_("fixed_modifications").empty() || !getStringList_("variable_modifications").empty()) { p_novo_infile.setModifications(getStringList_("fixed_modifications"), getStringList_("variable_modifications")); p_novo_infile.store(qdir_temp.filePath("PepNovo_PTMs.txt")); pepnovo_files.append("PepNovo_PTMs.txt"); p_novo_infile.getModifications(mods_and_keys); for (std::map<String, String>::const_iterator key_it=mods_and_keys.begin(); key_it!=mods_and_keys.end();++key_it) { if (ptm_command!="") { ptm_command+=":"; } ptm_command+= key_it->first; //key_to_id[key_it->second]=key_it->first; } } //------------------------------------------------------------- // (3) running program according to parameters //------------------------------------------------------------- QStringList arguments; arguments << "-file" << mgf_file.toQString(); arguments << "-model" << model_name.toQString(); if (pm_tolerance != -1 ) arguments << "-pm_tolerance"<<String(pm_tolerance).toQString(); if (fragment_tolerance != -1 ) arguments << "-fragment_tolerance" <<String(fragment_tolerance).toQString(); if (!ptm_command.empty()) arguments <<"-PTMs" <<ptm_command.toQString(); if (getFlag_("correct_pm")) arguments << "-correct_pm"; if (getFlag_("use_spectrum_charge")) arguments << "-use_spectrum_charge"; if (getFlag_("use_spectrum_mz")) arguments << "-use_spectrum_mz"; if (getFlag_("no_quality_filter")) arguments << "-no_quality_filter"; arguments << "-digest" << digest.toQString(); arguments << "-num_solutions" << String(num_solutions).toQString(); if (tag_length!=-1) arguments<<"-tag_length" << String(tag_length).toQString(); arguments<<"-model_dir" << tmp_models_dir.toQString(); //arguments<<">" << temp_pepnovo_outfile.toQString(); writeDebug_("Use this line to call PepNovo: ", 1); writeDebug_(pepnovo_executable + " " + String(arguments.join(" ")), 1); QProcess process; process.setStandardOutputFile(temp_pepnovo_outfile.toQString()); process.setStandardErrorFile(temp_pepnovo_outfile.toQString()); process.start(pepnovo_executable.toQString(), arguments); // does automatic escaping etc... if (process.waitForFinished(-1)) { //if PepNovo finished successfully use PepNovoOutfile to parse the results and generate idXML std::vector< PeptideIdentification > peptide_identifications; ProteinIdentification protein_identification; StringList ms_runs; exp.getPrimaryMSRunPath(ms_runs); protein_identification.setPrimaryMSRunPath(ms_runs); PepNovoOutfile p_novo_outfile; //resolve PTMs (match them back to the OpenMs Identifier String) std::vector<ProteinIdentification>prot_ids; p_novo_outfile.load(temp_pepnovo_outfile, peptide_identifications, protein_identification, -1e5, index_to_precursor, mods_and_keys); prot_ids.push_back(protein_identification); IdXMLFile().store(outputfile_name, prot_ids, peptide_identifications); } if (process.exitStatus() != 0) error = true; } catch(Exception::BaseException &exc) { writeLog_(exc.what()); LOG_ERROR << "Error occurred: " << exc.what() << std::endl; error = true; } if (!error) { File::removeDirRecursively(temp_data_directory); return EXECUTION_OK; } else { writeLog_("PepNovo problem. Aborting! (Details can be seen in outfiles: '" + temp_data_directory + "')"); return EXTERNAL_PROGRAM_ERROR; } }
ExitCodes main_(int, const char**) { // parsing parameters String in(getStringOption_("in")); String feature_in(getStringOption_("feature_in")); String out(getStringOption_("out")); double precursor_mass_tolerance(getDoubleOption_("precursor_mass_tolerance")); // reading input FileHandler fh; FileTypes::Type in_type = fh.getType(in); PeakMap exp; fh.loadExperiment(in, exp, in_type, log_type_, false, false); exp.sortSpectra(); FeatureMap feature_map; if (feature_in != "") { FeatureXMLFile().load(feature_in, feature_map); } // calculations FeatureFinderAlgorithmIsotopeWavelet iso_ff; Param ff_param(iso_ff.getParameters()); ff_param.setValue("max_charge", getIntOption_("max_charge")); ff_param.setValue("intensity_threshold", getDoubleOption_("intensity_threshold")); iso_ff.setParameters(ff_param); FeatureFinder ff; ff.setLogType(ProgressLogger::NONE); PeakMap exp2 = exp; exp2.clear(false); for (PeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it) { if (it->size() != 0) { exp2.addSpectrum(*it); } } exp = exp2; exp.updateRanges(); // TODO check MS2 and MS1 counts ProgressLogger progresslogger; progresslogger.setLogType(log_type_); progresslogger.startProgress(0, exp.size(), "Correcting precursor masses"); for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { progresslogger.setProgress(exp.end() - it); if (it->getMSLevel() != 2) { continue; } // find first MS1 scan of the MS/MS scan PeakMap::Iterator ms1_it = it; while (ms1_it != exp.begin() && ms1_it->getMSLevel() != 1) { --ms1_it; } if (ms1_it == exp.begin() && ms1_it->getMSLevel() != 1) { writeLog_("Did not find a MS1 scan to the MS/MS scan at RT=" + String(it->getRT())); continue; } if (ms1_it->size() == 0) { writeDebug_("No peaks in scan at RT=" + String(ms1_it->getRT()) + String(", skipping"), 1); continue; } PeakMap::Iterator ms2_it = ms1_it; ++ms2_it; while (ms2_it != exp.end() && ms2_it->getMSLevel() == 2) { // first: error checks if (ms2_it->getPrecursors().empty()) { writeDebug_("Warning: found no precursors of spectrum RT=" + String(ms2_it->getRT()) + ", skipping it.", 1); ++ms2_it; continue; } else if (ms2_it->getPrecursors().size() > 1) { writeLog_("Warning: found more than one precursor of spectrum RT=" + String(ms2_it->getRT()) + ", using first one."); } Precursor prec = *ms2_it->getPrecursors().begin(); double prec_pos = prec.getMZ(); PeakMap new_exp; // now excise small region from the MS1 spec for the feature finder (isotope pattern must be covered...) PeakSpectrum zoom_spec; for (PeakSpectrum::ConstIterator pit = ms1_it->begin(); pit != ms1_it->end(); ++pit) { if (pit->getMZ() > prec_pos - 3 && pit->getMZ() < prec_pos + 3) { zoom_spec.push_back(*pit); } } new_exp.addSpectrum(zoom_spec); new_exp.updateRanges(); FeatureMap features, seeds; ff.run("isotope_wavelet", new_exp, features, ff_param, seeds); if (features.empty()) { writeDebug_("No features found for scan RT=" + String(ms1_it->getRT()), 1); ++ms2_it; continue; } double max_int(numeric_limits<double>::min()); double min_dist(numeric_limits<double>::max()); Size max_int_feat_idx(0); for (Size i = 0; i != features.size(); ++i) { if (fabs(features[i].getMZ() - prec_pos) < precursor_mass_tolerance && features[i].getIntensity() > max_int) { max_int_feat_idx = i; max_int = features[i].getIntensity(); min_dist = fabs(features[i].getMZ() - prec_pos); } } writeDebug_(" max_int=" + String(max_int) + " mz=" + String(features[max_int_feat_idx].getMZ()) + " charge=" + String(features[max_int_feat_idx].getCharge()), 5); if (min_dist < precursor_mass_tolerance) { prec.setMZ(features[max_int_feat_idx].getMZ()); prec.setCharge(features[max_int_feat_idx].getCharge()); vector<Precursor> precs; precs.push_back(prec); ms2_it->setPrecursors(precs); writeDebug_("Correcting precursor mass of spectrum RT=" + String(ms2_it->getRT()) + " from " + String(prec_pos) + " to " + String(prec.getMZ()) + " (z=" + String(prec.getCharge()) + ")", 1); } ++ms2_it; } it = --ms2_it; } progresslogger.endProgress(); // writing output fh.storeExperiment(out, exp, log_type_); return EXECUTION_OK; }
void MassTraceDetection::run_(const MapIdxSortedByInt& chrom_apices, const Size total_peak_count, const PeakMap& work_exp, const std::vector<Size>& spec_offsets, std::vector<MassTrace>& found_masstraces) { boost::dynamic_bitset<> peak_visited(total_peak_count); Size trace_number(1); // check presence of FWHM meta data int fwhm_meta_idx(-1); Size fwhm_meta_count(0); for (Size i = 0; i < work_exp.size(); ++i) { if (work_exp[i].getFloatDataArrays().size() > 0 && work_exp[i].getFloatDataArrays()[0].getName() == "FWHM_ppm") { if (work_exp[i].getFloatDataArrays()[0].size() != work_exp[i].size()) { // float data should always have the same size as the corresponding array throw Exception::InvalidSize(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, work_exp[i].size()); } fwhm_meta_idx = 0; ++fwhm_meta_count; } } if (fwhm_meta_count > 0 && fwhm_meta_count != work_exp.size()) { throw Exception::Precondition(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("FWHM meta arrays are expected to be missing or present for all MS spectra [") + fwhm_meta_count + "/" + work_exp.size() + "]."); } this->startProgress(0, total_peak_count, "mass trace detection"); Size peaks_detected(0); for (MapIdxSortedByInt::const_reverse_iterator m_it = chrom_apices.rbegin(); m_it != chrom_apices.rend(); ++m_it) { Size apex_scan_idx(m_it->second.first); Size apex_peak_idx(m_it->second.second); if (peak_visited[spec_offsets[apex_scan_idx] + apex_peak_idx]) { continue; } Peak2D apex_peak; apex_peak.setRT(work_exp[apex_scan_idx].getRT()); apex_peak.setMZ(work_exp[apex_scan_idx][apex_peak_idx].getMZ()); apex_peak.setIntensity(work_exp[apex_scan_idx][apex_peak_idx].getIntensity()); Size trace_up_idx(apex_scan_idx); Size trace_down_idx(apex_scan_idx); std::list<PeakType> current_trace; current_trace.push_back(apex_peak); std::vector<double> fwhms_mz; // peak-FWHM meta values of collected peaks // Initialization for the iterative version of weighted m/z mean calculation double centroid_mz(apex_peak.getMZ()); double prev_counter(apex_peak.getIntensity() * apex_peak.getMZ()); double prev_denom(apex_peak.getIntensity()); updateIterativeWeightedMeanMZ(apex_peak.getMZ(), apex_peak.getIntensity(), centroid_mz, prev_counter, prev_denom); std::vector<std::pair<Size, Size> > gathered_idx; gathered_idx.push_back(std::make_pair(apex_scan_idx, apex_peak_idx)); if (fwhm_meta_idx != -1) { fwhms_mz.push_back(work_exp[apex_scan_idx].getFloatDataArrays()[fwhm_meta_idx][apex_peak_idx]); } Size up_hitting_peak(0), down_hitting_peak(0); Size up_scan_counter(0), down_scan_counter(0); bool toggle_up = true, toggle_down = true; Size conseq_missed_peak_up(0), conseq_missed_peak_down(0); Size max_consecutive_missing(trace_termination_outliers_); double current_sample_rate(1.0); // Size min_scans_to_consider(std::floor((min_sample_rate_ /2)*10)); Size min_scans_to_consider(5); // double outlier_ratio(0.3); // double ftl_mean(centroid_mz); double ftl_sd((centroid_mz / 1e6) * mass_error_ppm_); double intensity_so_far(apex_peak.getIntensity()); while (((trace_down_idx > 0) && toggle_down) || ((trace_up_idx < work_exp.size() - 1) && toggle_up) ) { // *********************************************************** // // Step 2.1 MOVE DOWN in RT dim // *********************************************************** // if ((trace_down_idx > 0) && toggle_down) { const MSSpectrum<>& spec_trace_down = work_exp[trace_down_idx - 1]; if (!spec_trace_down.empty()) { Size next_down_peak_idx = spec_trace_down.findNearest(centroid_mz); double next_down_peak_mz = spec_trace_down[next_down_peak_idx].getMZ(); double next_down_peak_int = spec_trace_down[next_down_peak_idx].getIntensity(); double right_bound = centroid_mz + 3 * ftl_sd; double left_bound = centroid_mz - 3 * ftl_sd; if ((next_down_peak_mz <= right_bound) && (next_down_peak_mz >= left_bound) && !peak_visited[spec_offsets[trace_down_idx - 1] + next_down_peak_idx] ) { Peak2D next_peak; next_peak.setRT(spec_trace_down.getRT()); next_peak.setMZ(next_down_peak_mz); next_peak.setIntensity(next_down_peak_int); current_trace.push_front(next_peak); // FWHM average if (fwhm_meta_idx != -1) { fwhms_mz.push_back(spec_trace_down.getFloatDataArrays()[fwhm_meta_idx][next_down_peak_idx]); } // Update the m/z mean of the current trace as we added a new peak updateIterativeWeightedMeanMZ(next_down_peak_mz, next_down_peak_int, centroid_mz, prev_counter, prev_denom); gathered_idx.push_back(std::make_pair(trace_down_idx - 1, next_down_peak_idx)); // Update the m/z variance dynamically if (reestimate_mt_sd_) // && (down_hitting_peak+1 > min_flank_scans)) { // if (ftl_t > min_fwhm_scans) { updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); } } ++down_hitting_peak; conseq_missed_peak_down = 0; } else { ++conseq_missed_peak_down; } } --trace_down_idx; ++down_scan_counter; // trace termination criterion: max allowed number of // consecutive outliers reached OR cancel extension if // sampling_rate falls below min_sample_rate_ if (trace_termination_criterion_ == "outlier") { if (conseq_missed_peak_down > max_consecutive_missing) { toggle_down = false; } } else if (trace_termination_criterion_ == "sample_rate") { current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / (double)(down_scan_counter + up_scan_counter + 1); if (down_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) { // std::cout << "stopping down..." << std::endl; toggle_down = false; } } } // *********************************************************** // // Step 2.2 MOVE UP in RT dim // *********************************************************** // if ((trace_up_idx < work_exp.size() - 1) && toggle_up) { const MSSpectrum<>& spec_trace_up = work_exp[trace_up_idx + 1]; if (!spec_trace_up.empty()) { Size next_up_peak_idx = spec_trace_up.findNearest(centroid_mz); double next_up_peak_mz = spec_trace_up[next_up_peak_idx].getMZ(); double next_up_peak_int = spec_trace_up[next_up_peak_idx].getIntensity(); double right_bound = centroid_mz + 3 * ftl_sd; double left_bound = centroid_mz - 3 * ftl_sd; if ((next_up_peak_mz <= right_bound) && (next_up_peak_mz >= left_bound) && !peak_visited[spec_offsets[trace_up_idx + 1] + next_up_peak_idx]) { Peak2D next_peak; next_peak.setRT(spec_trace_up.getRT()); next_peak.setMZ(next_up_peak_mz); next_peak.setIntensity(next_up_peak_int); current_trace.push_back(next_peak); if (fwhm_meta_idx != -1) { fwhms_mz.push_back(spec_trace_up.getFloatDataArrays()[fwhm_meta_idx][next_up_peak_idx]); } // Update the m/z mean of the current trace as we added a new peak updateIterativeWeightedMeanMZ(next_up_peak_mz, next_up_peak_int, centroid_mz, prev_counter, prev_denom); gathered_idx.push_back(std::make_pair(trace_up_idx + 1, next_up_peak_idx)); // Update the m/z variance dynamically if (reestimate_mt_sd_) // && (up_hitting_peak+1 > min_flank_scans)) { // if (ftl_t > min_fwhm_scans) { updateWeightedSDEstimateRobust(next_peak, centroid_mz, ftl_sd, intensity_so_far); } } ++up_hitting_peak; conseq_missed_peak_up = 0; } else { ++conseq_missed_peak_up; } } ++trace_up_idx; ++up_scan_counter; if (trace_termination_criterion_ == "outlier") { if (conseq_missed_peak_up > max_consecutive_missing) { toggle_up = false; } } else if (trace_termination_criterion_ == "sample_rate") { current_sample_rate = (double)(down_hitting_peak + up_hitting_peak + 1) / (double)(down_scan_counter + up_scan_counter + 1); if (up_scan_counter > min_scans_to_consider && current_sample_rate < min_sample_rate_) { // std::cout << "stopping up" << std::endl; toggle_up = false; } } } } // std::cout << "current sr: " << current_sample_rate << std::endl; double num_scans(down_scan_counter + up_scan_counter + 1 - conseq_missed_peak_down - conseq_missed_peak_up); double mt_quality((double)current_trace.size() / (double)num_scans); // std::cout << "mt quality: " << mt_quality << std::endl; double rt_range(std::fabs(current_trace.rbegin()->getRT() - current_trace.begin()->getRT())); // *********************************************************** // // Step 2.3 check if minimum length and quality of mass trace criteria are met // *********************************************************** // bool max_trace_criteria = (max_trace_length_ < 0.0 || rt_range < max_trace_length_); if (rt_range >= min_trace_length_ && max_trace_criteria && mt_quality >= min_sample_rate_) { // std::cout << "T" << trace_number << "\t" << mt_quality << std::endl; // mark all peaks as visited for (Size i = 0; i < gathered_idx.size(); ++i) { peak_visited[spec_offsets[gathered_idx[i].first] + gathered_idx[i].second] = true; } // create new MassTrace object and store collected peaks from list current_trace MassTrace new_trace(current_trace); new_trace.updateWeightedMeanRT(); new_trace.updateWeightedMeanMZ(); if (!fwhms_mz.empty()) new_trace.fwhm_mz_avg = Math::median(fwhms_mz.begin(), fwhms_mz.end()); new_trace.setQuantMethod(quant_method_); //new_trace.setCentroidSD(ftl_sd); new_trace.updateWeightedMZsd(); new_trace.setLabel("T" + String(trace_number)); ++trace_number; found_masstraces.push_back(new_trace); peaks_detected += new_trace.getSize(); this->setProgress(peaks_detected); } } this->endProgress(); }
} END_SECTION START_SECTION(virtual ~MascotGenericFile()) { delete ptr; } END_SECTION ptr = new MascotGenericFile(); START_SECTION((template < typename MapType > void load(const String &filename, MapType &exp))) { PeakMap exp; ptr->load(OPENMS_GET_TEST_DATA_PATH("MascotInfile_test.mascot_in"), exp); TEST_EQUAL(exp.size(), 1) TEST_EQUAL(exp.begin()->size(), 9) } END_SECTION START_SECTION((void store(std::ostream &os, const String &filename, const PeakMap &experiment, bool compact = false))) { PeakMap exp; ptr->load(OPENMS_GET_TEST_DATA_PATH("MascotInfile_test.mascot_in"), exp); // handling of modifications: Param params = ptr->getParameters(); params.setValue("fixed_modifications", ListUtils::create<String>("Carbamidomethyl (C),Phospho (S)")); params.setValue("variable_modifications", ListUtils::create<String>("Oxidation (M),Deamidated (N),Deamidated (Q)")); ptr->setParameters(params);
ExitCodes main_(int, const char**) override { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input file names String in = getStringOption_("in"); String read_method = getStringOption_("read_method"); bool load_data = getStringOption_("loadData") == "true"; if (read_method == "streaming") { std::cout << "Read method: streaming" << std::endl; // Create the consumer, set output file name, transform TICConsumer consumer; MzMLFile mzml; mzml.setLogType(log_type_); PeakFileOptions opt = mzml.getOptions(); opt.setFillData(load_data); // whether to actually load any data opt.setSkipXMLChecks(true); // save time by not checking base64 strings for whitespaces opt.setMaxDataPoolSize(100); opt.setAlwaysAppendData(false); mzml.setOptions(opt); mzml.transform(in, &consumer, true, true); std::cout << "There are " << consumer.nr_spectra << " spectra and " << consumer.nr_peaks << " peaks in the input file." << std::endl; std::cout << "The total ion current is " << consumer.TIC << std::endl; size_t after; SysInfo::getProcessMemoryConsumption(after); std::cout << " Memory consumption after " << after << std::endl; } else if (read_method == "regular") { std::cout << "Read method: regular" << std::endl; MzMLFile mzml; mzml.setLogType(log_type_); PeakFileOptions opt = mzml.getOptions(); opt.setFillData(load_data); // whether to actually load any data opt.setSkipXMLChecks(true); // save time by not checking base64 strings for whitespaces mzml.setOptions(opt); PeakMap map; mzml.load(in, map); double TIC = 0.0; long int nr_peaks = 0; for (Size i =0; i < map.size(); i++) { nr_peaks += map[i].size(); for (Size j = 0; j < map[i].size(); j++) { TIC += map[i][j].getIntensity(); } } std::cout << "There are " << map.size() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl; std::cout << "The total ion current is " << TIC << std::endl; size_t after; SysInfo::getProcessMemoryConsumption(after); std::cout << " Memory consumption after " << after << std::endl; } else if (read_method == "indexed") { std::cout << "Read method: indexed" << std::endl; IndexedMzMLFileLoader imzml; // load data from an indexed MzML file OnDiscPeakMap map; imzml.load(in, map); double TIC = 0.0; long int nr_peaks = 0; if (load_data) { for (Size i =0; i < map.getNrSpectra(); i++) { OpenMS::Interfaces::SpectrumPtr sptr = map.getSpectrumById(i); nr_peaks += sptr->getIntensityArray()->data.size(); TIC += std::accumulate(sptr->getIntensityArray()->data.begin(), sptr->getIntensityArray()->data.end(), 0.0); } } std::cout << "There are " << map.getNrSpectra() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl; std::cout << "The total ion current is " << TIC << std::endl; size_t after; SysInfo::getProcessMemoryConsumption(after); std::cout << " Memory consumption after " << after << std::endl; } else if (read_method == "indexed_parallel") { std::cout << "Read method: indexed (parallel)" << std::endl; IndexedMzMLFileLoader imzml; PeakFileOptions opt = imzml.getOptions(); opt.setFillData(load_data); // whether to actually load any data imzml.setOptions(opt); // load data from an indexed MzML file OnDiscPeakMap map; map.openFile(in, true); map.setSkipXMLChecks(true); double TIC = 0.0; long int nr_peaks = 0; if (load_data) { // firstprivate means that each thread has its own instance of the // variable, each copy initialized with the initial value #ifdef _OPENMP #pragma omp parallel for firstprivate(map) #endif for (SignedSize i =0; i < (SignedSize)map.getNrSpectra(); i++) { OpenMS::Interfaces::SpectrumPtr sptr = map.getSpectrumById(i); double nr_peaks_l = sptr->getIntensityArray()->data.size(); double TIC_l = std::accumulate(sptr->getIntensityArray()->data.begin(), sptr->getIntensityArray()->data.end(), 0.0); #ifdef _OPENMP #pragma omp critical (indexed) #endif { TIC += TIC_l; nr_peaks += nr_peaks_l; } } } std::cout << "There are " << map.getNrSpectra() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl; std::cout << "The total ion current is " << TIC << std::endl; size_t after; SysInfo::getProcessMemoryConsumption(after); std::cout << " Memory consumption after " << after << std::endl; } else if (read_method == "cached") { std::cout << "Read method: cached" << std::endl; // Special handling of cached mzML as input types: // we expect two paired input files which we should read into exp std::vector<String> split_out; in.split(".cachedMzML", split_out); if (split_out.size() != 2) { LOG_ERROR << "Cannot deduce base path from input '" << in << "' (note that '.cachedMzML' should only occur once as the final ending)" << std::endl; return ILLEGAL_PARAMETERS; } String in_meta = split_out[0] + ".mzML"; MzMLFile f; f.setLogType(log_type_); CachedmzML cacher; cacher.setLogType(log_type_); CachedmzML cache; cache.createMemdumpIndex(in); const std::vector<std::streampos> spectra_index = cache.getSpectraIndex(); std::ifstream ifs_; ifs_.open(in.c_str(), std::ios::binary); double TIC = 0.0; long int nr_peaks = 0; for (Size i=0; i < spectra_index.size(); ++i) { BinaryDataArrayPtr mz_array(new BinaryDataArray); BinaryDataArrayPtr intensity_array(new BinaryDataArray); int ms_level = -1; double rt = -1.0; ifs_.seekg(spectra_index[i]); CachedmzML::readSpectrumFast(mz_array, intensity_array, ifs_, ms_level, rt); nr_peaks += intensity_array->data.size(); for (Size j = 0; j < intensity_array->data.size(); j++) { TIC += intensity_array->data[j]; } } std::cout << "There are " << spectra_index.size() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl; std::cout << "The total ion current is " << TIC << std::endl; size_t after; SysInfo::getProcessMemoryConsumption(after); std::cout << " Memory consumption after " << after << std::endl; } else if (read_method == "cached_parallel") { std::cout << "Read method: cached parallel" << std::endl; // Special handling of cached mzML as input types: // we expect two paired input files which we should read into exp std::vector<String> split_out; in.split(".cachedMzML", split_out); if (split_out.size() != 2) { LOG_ERROR << "Cannot deduce base path from input '" << in << "' (note that '.cachedMzML' should only occur once as the final ending)" << std::endl; return ILLEGAL_PARAMETERS; } String in_meta = split_out[0] + ".mzML"; MzMLFile f; f.setLogType(log_type_); CachedmzML cacher; cacher.setLogType(log_type_); CachedmzML cache; cache.createMemdumpIndex(in); const std::vector<std::streampos> spectra_index = cache.getSpectraIndex(); FileAbstraction filestream(in); double TIC = 0.0; long int nr_peaks = 0; #ifdef _OPENMP #pragma omp parallel for firstprivate(filestream) #endif for (SignedSize i=0; i < (SignedSize)spectra_index.size(); ++i) { BinaryDataArrayPtr mz_array(new BinaryDataArray); BinaryDataArrayPtr intensity_array(new BinaryDataArray); int ms_level = -1; double rt = -1.0; // we only change the position of the thread-local filestream filestream.getStream().seekg(spectra_index[i]); CachedmzML::readSpectrumFast(mz_array, intensity_array, filestream.getStream(), ms_level, rt); double nr_peaks_l = intensity_array->data.size(); double TIC_l = std::accumulate(intensity_array->data.begin(), intensity_array->data.end(), 0.0); #ifdef _OPENMP #pragma omp critical (indexed) #endif { TIC += TIC_l; nr_peaks += nr_peaks_l; } } std::cout << "There are " << spectra_index.size() << " spectra and " << nr_peaks << " peaks in the input file." << std::endl; std::cout << "The total ion current is " << TIC << std::endl; size_t after; SysInfo::getProcessMemoryConsumption(after); std::cout << " Memory consumption after " << after << std::endl; } return EXECUTION_OK; }