ExitCodes main_(int, const char **) { String in = getStringOption_("in"); String out = getStringOption_("out"); String method = getStringOption_("method"); String decoy_tag = getStringOption_("decoy_tag"); double mz_threshold = getDoubleOption_("mz_threshold"); bool exclude_similar = getFlag_("exclude_similar"); double similarity_threshold = getDoubleOption_("similarity_threshold"); bool append = getFlag_("append"); bool remove_CNterm_mods = getFlag_("remove_CNterm_mods"); bool remove_unannotated = getFlag_("remove_unannotated"); double identity_threshold = getDoubleOption_("identity_threshold"); Int max_attempts = getIntOption_("max_attempts"); double mz_shift = getDoubleOption_("mz_shift"); double precursor_mass_shift = getDoubleOption_("precursor_mass_shift"); String allowed_fragment_types_string = getStringOption_("allowed_fragment_types"); String allowed_fragment_charges_string = getStringOption_("allowed_fragment_charges"); bool enable_detection_specific_losses = getFlag_("enable_detection_specific_losses"); bool enable_detection_unspecific_losses = getFlag_("enable_detection_unspecific_losses"); std::vector<String> allowed_fragment_types; allowed_fragment_types_string.split(",", allowed_fragment_types); std::vector<String> allowed_fragment_charges_string_vector; std::vector<size_t> allowed_fragment_charges; allowed_fragment_charges_string.split(",", allowed_fragment_charges_string_vector); for (size_t i = 0; i < allowed_fragment_charges_string_vector.size(); i++) { size_t charge = std::atoi(allowed_fragment_charges_string_vector.at(i).c_str()); allowed_fragment_charges.push_back(charge); } if (method != "shuffle" && method != "pseudo-reverse" && method != "reverse" && method != "shift") { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "No valid decoy generation method selected!"); } TraMLFile traml; TargetedExperiment targeted_exp; TargetedExperiment targeted_decoy; std::cout << "Loading " << in << std::endl; traml.load(in, targeted_exp); MRMDecoy decoys = MRMDecoy(); std::cout << "Generate decoys" << std::endl; decoys.generateDecoys(targeted_exp, targeted_decoy, method, decoy_tag, identity_threshold, max_attempts, mz_threshold, mz_shift, exclude_similar, similarity_threshold, remove_CNterm_mods, precursor_mass_shift, allowed_fragment_types, allowed_fragment_charges, enable_detection_specific_losses, enable_detection_unspecific_losses, remove_unannotated); if (append) { TargetedExperiment targeted_merged; targeted_merged += targeted_exp + targeted_decoy; traml.store(out, targeted_merged); } else { traml.store(out, targeted_decoy); } return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //file list StringList file_list = getStringList_("in"); //file type FileHandler fh; FileTypes::Type force_type; if (getStringOption_("in_type").size() > 0) { force_type = FileTypes::nameToType(getStringOption_("in_type")); } else { force_type = fh.getType(file_list[0]); } //output file names and types String out_file = getStringOption_("out"); //------------------------------------------------------------- // calculations //------------------------------------------------------------- bool annotate_file_origin = getFlag_("annotate_file_origin"); if (force_type == FileTypes::FEATUREXML) { FeatureMap<> out; for (Size i = 0; i < file_list.size(); ++i) { FeatureMap<> map; FeatureXMLFile fh; fh.load(file_list[i], map); if (annotate_file_origin) { for (FeatureMap<>::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); FeatureXMLFile f; f.store(out_file, out); } else if (force_type == FileTypes::CONSENSUSXML) { ConsensusMap out; ConsensusXMLFile fh; fh.load(file_list[0], out); //skip first file for (Size i = 1; i < file_list.size(); ++i) { ConsensusMap map; ConsensusXMLFile fh; fh.load(file_list[i], map); if (annotate_file_origin) { for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); ConsensusXMLFile f; f.store(out_file, out); } else if (force_type == FileTypes::TRAML) { TargetedExperiment out; for (Size i = 0; i < file_list.size(); ++i) { TargetedExperiment map; TraMLFile fh; fh.load(file_list[i], map); out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info Software software; software.setName("FileMerger"); software.setVersion(VersionInfo::getVersion()); out.addSoftware(software); TraMLFile f; f.store(out_file, out); } else { // we might want to combine different types, thus we only // query in_type (which applies to all files) // and not the suffix or content of a single file force_type = FileTypes::nameToType(getStringOption_("in_type")); //rt bool rt_auto_number = getFlag_("raw:rt_auto"); bool rt_filename = getFlag_("raw:rt_filename"); bool rt_custom = false; DoubleList custom_rts = getDoubleList_("raw:rt_custom"); if (custom_rts.size() != 0) { rt_custom = true; if (custom_rts.size() != file_list.size()) { writeLog_("Custom retention time list must have as many elements as there are input files!"); printUsage_(); return ILLEGAL_PARAMETERS; } } //ms level bool user_ms_level = getFlag_("raw:user_ms_level"); MSExperiment<> out; out.reserve(file_list.size()); UInt rt_auto = 0; UInt native_id = 0; std::vector<MSChromatogram<ChromatogramPeak> > all_chromatograms; for (Size i = 0; i < file_list.size(); ++i) { String filename = file_list[i]; //load file MSExperiment<> in; fh.loadExperiment(filename, in, force_type, log_type_); if (in.empty() && in.getChromatograms().empty()) { writeLog_(String("Warning: Empty file '") + filename + "'!"); continue; } out.reserve(out.size() + in.size()); //warn if custom RT and more than one scan in input file if (rt_custom && in.size() > 1) { writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!"); } for (MSExperiment<>::const_iterator it2 = in.begin(); it2 != in.end(); ++it2) { //handle rt Real rt_final = it2->getRT(); if (rt_auto_number) { rt_final = ++rt_auto; } else if (rt_custom) { rt_final = custom_rts[i]; } else if (rt_filename) { if (!filename.hasSubstring("rt")) { writeLog_(String("Warning: cannot guess retention time from filename as it does not contain 'rt'")); } for (Size i = 0; i < filename.size(); ++i) { if (filename[i] == 'r' && ++i != filename.size() && filename[i] == 't' && ++i != filename.size() && isdigit(filename[i])) { String rt; while (i != filename.size() && (filename[i] == '.' || isdigit(filename[i]))) { rt += filename[i++]; } if (rt.size() > 0) { // remove dot from rt3892.98.dta // ^ if (rt[rt.size() - 1] == '.') { // remove last character rt.erase(rt.end() - 1); } } try { float tmp = rt.toFloat(); rt_final = tmp; } catch (Exception::ConversionError) { writeLog_(String("Warning: cannot convert the found retention time in a value '" + rt + "'.")); } } } } // none of the rt methods were successful if (rt_final == -1) { writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'"); } out.addSpectrum(*it2); out.getSpectra().back().setRT(rt_final); out.getSpectra().back().setNativeID(native_id); if (user_ms_level) { out.getSpectra().back().setMSLevel((int)getIntOption_("raw:ms_level")); } ++native_id; } // if we had only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles) if (in.size() == 1) { out.getSpectra().back().setSourceFile(in.getSourceFiles()[0]); in.getSourceFiles().clear(); // delete source file annotated from source file (its in the spectrum anyways) } // copy experimental settings from first file if (i == 0) { out.ExperimentalSettings::operator=(in); } else // otherwise append { out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then } // also add the chromatograms for (std::vector<MSChromatogram<ChromatogramPeak> >::const_iterator it2 = in.getChromatograms().begin(); it2 != in.getChromatograms().end(); ++it2) { all_chromatograms.push_back(*it2); } } // set the chromatograms out.setChromatograms(all_chromatograms); //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); MzMLFile f; f.setLogType(log_type_); f.store(out_file, out); } return EXECUTION_OK; }
ExitCodes main_(int, const char**) { OpenMS::StringList file_list = getStringList_("swath_files"); std::string tr_file = getStringOption_("tr"); std::cout << tr_file << std::endl; //std::string out = getStringOption_("out"); //std::cout << out << std::endl; double min_upper_edge_dist = getDoubleOption_("min_upper_edge_dist"); // If we have a transformation file, trafo will transform the RT in the // scoring according to the model. If we dont have one, it will apply the // null transformation. Param feature_finder_param = getParam_().copy("algorithm:", true); // Create the output map, load the input TraML file and the chromatograms MapType exp; OpenSwath::LightTargetedExperiment transition_exp; std::cout << "Loading TraML file" << std::endl; { OpenMS::TargetedExperiment transition_exp_; TraMLFile t; t.load(tr_file, transition_exp_); //int pept = transition_exp_.getPeptides().size(); //int prot = transition_exp_.getProteins().size(); //int trans = transition_exp_.getTransitions().size(); OpenSwathDataAccessHelper::convertTargetedExp(transition_exp_, transition_exp); int ltrans = transition_exp.transitions.size(); std::cout << ltrans << std::endl; } // Here we deal with SWATH files (can be multiple files) for (Size i = 0; i < file_list.size(); ++i) { MzMLFile swath_file; MapTypePtr swath_map (new MapType); FeatureMap featureFile; std::cout << "Loading file " << file_list[i] << std::endl; // no progress log on the console in parallel std::string fileout = file_list[i]; /// Returns the basename of the file (without the path). /// Returns the path of the file (without the file name). //boost::filesystem::path x(fileout); //boost::filesystem::path y = x.parent_path() ; //std::string fname = x.stem().string(); //std::string tmp = File.basename(fileout); std::string fname = File::removeExtension(fileout); fname += ".tsv"; swath_file.setLogType(log_type_); swath_file.load(file_list[i], *swath_map); if (swath_map->size() == 0 || (*swath_map)[0].getPrecursors().size() == 0) { std::cerr << "WARNING: File " << swath_map->getLoadedFilePath() << " does not have any experiments or any precursors. Is it a SWATH map?" << std::endl; continue; } // Find the transitions to extract and extract them OpenSwath::LightTargetedExperiment transition_exp_used; double upper, lower; const std::vector<Precursor> prec = (*swath_map)[0].getPrecursors(); lower = prec[0].getMZ() - prec[0].getIsolationWindowLowerOffset(); upper = prec[0].getMZ() + prec[0].getIsolationWindowUpperOffset(); OpenSwathHelper::selectSwathTransitions(transition_exp, transition_exp_used, min_upper_edge_dist, lower, upper); if (transition_exp_used.getTransitions().size() == 0) { std::cerr << "WARNING: For file " << swath_map->getLoadedFilePath() << " there are no transitions to extract." << std::endl; continue; } //OpenMS::MRMFeatureFinderScoring::TransitionGroupMapType transition_group_map; std::cout << "Using Spectrum Interface!" << std::endl; OpenSwath::SpectrumAccessPtr spectrumAccess = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr( swath_map); //std::cout << "using data frame writer for storing data. Outfile :" << out << std::endl; OpenSwath::IDataFrameWriter* dfw = new OpenSwath::CSVWriter(fname); OpenMS::DiaPrescore dp; dp.operator()(spectrumAccess, transition_exp_used, dfw); delete dfw; //featureFinder.pickExperiment(chromatogram_ptr, out_featureFile, //transition_exp_used, trafo, swath_ptr, transition_group_map); //FeatureXMLFile().store(out, out_featureFile); } //end of for loop return EXECUTION_OK; } //end of _main
ExitCodes main_(int, const char **) { StringList file_list = getStringList_("in"); String tr_file_str = getStringOption_("tr"); String out = getStringOption_("out"); bool is_swath = getFlag_("is_swath"); bool ppm = getFlag_("ppm"); bool extract_MS1 = getFlag_("extract_MS1"); double min_upper_edge_dist = getDoubleOption_("min_upper_edge_dist"); double mz_extraction_window = getDoubleOption_("mz_window"); double rt_extraction_window = getDoubleOption_("rt_window"); String extraction_function = getStringOption_("extraction_function"); // If we have a transformation file, trafo will transform the RT in the // scoring according to the model. If we dont have one, it will apply the // null transformation. String trafo_in = getStringOption_("rt_norm"); TransformationDescription trafo; if (trafo_in.size() > 0) { TransformationXMLFile trafoxml; String model_type = getStringOption_("model:type"); Param model_params = getParam_().copy("model:", true); trafoxml.load(trafo_in, trafo); trafo.fitModel(model_type, model_params); } TransformationDescription trafo_inverse = trafo; trafo_inverse.invert(); const char * tr_file = tr_file_str.c_str(); MapType out_exp; std::vector< OpenMS::MSChromatogram > chromatograms; TraMLFile traml; OpenMS::TargetedExperiment targeted_exp; std::cout << "Loading TraML file" << std::endl; traml.load(tr_file, targeted_exp); std::cout << "Loaded TraML file" << std::endl; // Do parallelization over the different input files // Only in OpenMP 3.0 are unsigned loop variables allowed #ifdef _OPENMP #pragma omp parallel for #endif for (SignedSize i = 0; i < boost::numeric_cast<SignedSize>(file_list.size()); ++i) { boost::shared_ptr<PeakMap > exp(new PeakMap); MzMLFile f; // Logging and output to the console // IF_MASTERTHREAD f.setLogType(log_type_); // Find the transitions to extract and extract them MapType tmp_out; OpenMS::TargetedExperiment transition_exp_used; f.load(file_list[i], *exp); if (exp->empty() ) { continue; } // if empty, go on OpenSwath::SpectrumAccessPtr expptr = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(exp); bool do_continue = true; if (is_swath) { do_continue = OpenSwathHelper::checkSwathMapAndSelectTransitions(*exp, targeted_exp, transition_exp_used, min_upper_edge_dist); } else { transition_exp_used = targeted_exp; } #ifdef _OPENMP #pragma omp critical (OpenSwathChromatogramExtractor_metadata) #endif // after loading the first file, copy the meta data from that experiment // this may happen *after* chromatograms were already added to the // output, thus we do NOT fill the experiment here but rather store all // the chromatograms in the "chromatograms" array and store them in // out_exp afterwards. if (i == 0) { out_exp = *exp; out_exp.clear(false); } std::cout << "Extracting " << transition_exp_used.getTransitions().size() << " transitions" << std::endl; std::vector< OpenSwath::ChromatogramPtr > chromatogram_ptrs; std::vector< ChromatogramExtractor::ExtractionCoordinates > coordinates; // continue if the map is not empty if (do_continue) { // Prepare the coordinates (with or without rt extraction) and then extract the chromatograms ChromatogramExtractor extractor; if (rt_extraction_window < 0) { extractor.prepare_coordinates(chromatogram_ptrs, coordinates, transition_exp_used, rt_extraction_window, extract_MS1); } else { // Use an rt extraction window of 0.0 which will just write the retention time in start / end positions extractor.prepare_coordinates(chromatogram_ptrs, coordinates, transition_exp_used, 0.0, extract_MS1); for (std::vector< ChromatogramExtractor::ExtractionCoordinates >::iterator it = coordinates.begin(); it != coordinates.end(); ++it) { it->rt_start = trafo_inverse.apply(it->rt_start) - rt_extraction_window / 2.0; it->rt_end = trafo_inverse.apply(it->rt_end) + rt_extraction_window / 2.0; } } extractor.extractChromatograms(expptr, chromatogram_ptrs, coordinates, mz_extraction_window, ppm, extraction_function); #ifdef _OPENMP #pragma omp critical (OpenSwathChromatogramExtractor_insertMS1) #endif { // Remove potential meta value indicating cached data SpectrumSettings exp_settings = (*exp)[0]; for (Size j = 0; j < exp_settings.getDataProcessing().size(); j++) { if (exp_settings.getDataProcessing()[j]->metaValueExists("cached_data")) { exp_settings.getDataProcessing()[j]->removeMetaValue("cached_data"); } } extractor.return_chromatogram(chromatogram_ptrs, coordinates, transition_exp_used, exp_settings, chromatograms, extract_MS1); } } // end of do_continue } // end of loop over all files / end of OpenMP // TODO check that no chromatogram IDs occur multiple times ! // store the output out_exp.setChromatograms(chromatograms); MzMLFile mzf; mzf.setLogType(log_type_); addDataProcessing_(out_exp, getProcessingInfo_(DataProcessing::SMOOTHING)); mzf.store(out, out_exp); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- // file list StringList file_list = getStringList_("in"); // file type FileHandler file_handler; FileTypes::Type force_type; if (getStringOption_("in_type").size() > 0) { force_type = FileTypes::nameToType(getStringOption_("in_type")); } else { force_type = file_handler.getType(file_list[0]); } // output file names and types String out_file = getStringOption_("out"); bool annotate_file_origin = getFlag_("annotate_file_origin"); rt_gap_ = getDoubleOption_("rt_concat:gap"); vector<String> trafo_out = getStringList_("rt_concat:trafo_out"); if (trafo_out.empty()) { // resize now so we don't have to worry about indexing out of bounds: trafo_out.resize(file_list.size()); } else if (trafo_out.size() != file_list.size()) { writeLog_("Error: Number of transformation output files must equal the number of input files (parameters 'rt_concat:trafo_out'/'in')!"); return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // calculations //------------------------------------------------------------- if (force_type == FileTypes::FEATUREXML) { FeatureMap out; FeatureXMLFile fh; for (Size i = 0; i < file_list.size(); ++i) { FeatureMap map; fh.load(file_list[i], map); if (annotate_file_origin) { for (FeatureMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(map, trafo_out[i], i == 0); } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); fh.store(out_file, out); } else if (force_type == FileTypes::CONSENSUSXML) { ConsensusMap out; ConsensusXMLFile fh; fh.load(file_list[0], out); // skip first file for (Size i = 1; i < file_list.size(); ++i) { ConsensusMap map; fh.load(file_list[i], map); if (annotate_file_origin) { for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(map, trafo_out[i], i == 0); } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); fh.store(out_file, out); } else if (force_type == FileTypes::TRAML) { TargetedExperiment out; TraMLFile fh; for (Size i = 0; i < file_list.size(); ++i) { TargetedExperiment map; fh.load(file_list[i], map); out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info Software software; software.setName("FileMerger"); software.setVersion(VersionInfo::getVersion()); out.addSoftware(software); fh.store(out_file, out); } else // raw data input (e.g. mzML) { // RT bool rt_auto_number = getFlag_("raw:rt_auto"); bool rt_filename = getFlag_("raw:rt_filename"); bool rt_custom = false; DoubleList custom_rts = getDoubleList_("raw:rt_custom"); if (!custom_rts.empty()) { rt_custom = true; if (custom_rts.size() != file_list.size()) { writeLog_("Custom retention time list (parameter 'raw:rt_custom') must have as many elements as there are input files (parameter 'in')!"); return ILLEGAL_PARAMETERS; } } // MS level Int ms_level = getIntOption_("raw:ms_level"); MSExperiment<> out; UInt rt_auto = 0; UInt native_id = 0; for (Size i = 0; i < file_list.size(); ++i) { String filename = file_list[i]; // load file force_type = file_handler.getType(file_list[i]); MSExperiment<> in; file_handler.loadExperiment(filename, in, force_type, log_type_); if (in.empty() && in.getChromatograms().empty()) { writeLog_(String("Warning: Empty file '") + filename + "'!"); continue; } out.reserve(out.size() + in.size()); // warn if custom RT and more than one scan in input file if (rt_custom && in.size() > 1) { writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!"); } // handle special raw data options: for (MSExperiment<>::iterator spec_it = in.begin(); spec_it != in.end(); ++spec_it) { float rt_final = spec_it->getRT(); if (rt_auto_number) { rt_final = ++rt_auto; } else if (rt_custom) { rt_final = custom_rts[i]; } else if (rt_filename) { static const boost::regex re("rt(\\d+(\\.\\d+)?)"); boost::smatch match; bool found = boost::regex_search(filename, match, re); if (found) { rt_final = String(match[1]).toFloat(); } else { writeLog_("Warning: could not extract retention time from filename '" + filename + "'"); } } // none of the rt methods were successful if (rt_final < 0) { writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'"); } spec_it->setRT(rt_final); spec_it->setNativeID("spectrum=" + String(native_id)); if (ms_level > 0) { spec_it->setMSLevel(ms_level); } ++native_id; } // if we have only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles) if (in.size() == 1) { in[0].setSourceFile(in.getSourceFiles()[0]); in.getSourceFiles().clear(); // delete source file annotated from source file (it's in the spectrum anyways) } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(in, trafo_out[i], i == 0); } // add spectra to output for (MSExperiment<>::const_iterator spec_it = in.begin(); spec_it != in.end(); ++spec_it) { out.addSpectrum(*spec_it); } // also add the chromatograms for (vector<MSChromatogram<ChromatogramPeak> >::const_iterator chrom_it = in.getChromatograms().begin(); chrom_it != in.getChromatograms().end(); ++chrom_it) { out.addChromatogram(*chrom_it); } // copy experimental settings from first file if (i == 0) { out.ExperimentalSettings::operator=(in); } else // otherwise append { out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then } } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); MzMLFile f; f.setLogType(log_type_); f.store(out_file, out); } return EXECUTION_OK; }