ExitCodes main_(int, const char **) { String in = getStringOption_("in"); String out = getStringOption_("out"); String tr_file = getStringOption_("tr"); bool force = getFlag_("force"); boost::shared_ptr<PeakMap > exp ( new PeakMap ); MzMLFile mzmlfile; mzmlfile.setLogType(log_type_); mzmlfile.load(in, *exp); TargetedExpType transition_exp; TraMLFile().load(tr_file, transition_exp); FeatureMap output; OpenSwath::SpectrumAccessPtr input = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(exp); run_(input, output, transition_exp, force); output.ensureUniqueId(); StringList ms_runs; exp->getPrimaryMSRunPath(ms_runs); output.setPrimaryMSRunPath(ms_runs); FeatureXMLFile().store(out, output); return EXECUTION_OK; }
SpectrumAccessOpenMSCached::SpectrumAccessOpenMSCached(String filename) { filename_cached_ = filename + ".cached"; // currently we re-open the filestream with each read access // std::ifstream ifs_((filename_cached_).c_str(), std::ios::binary); MzMLFile f; f.load(filename, meta_ms_experiment_); filename_ = filename; }
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //varaibles String db, user, password, host, in; Int port; bool init = getFlag_("init"); if (!init) { in = getStringOption_("in"); } db = getStringOption_("db"); user = getStringOption_("user"); password = getStringOption_("password"); host = getStringOption_("host"); port = getIntOption_("port"); //------------------------------------------------------------- // reading input //------------------------------------------------------------- DBConnection con; con.connect(db, user, password, host, port); DBAdapter a(con); if (init) { a.createDB(); } else { //load input file data MSExperiment<Peak1D> exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); //store data a.storeExperiment(exp); writeLog_(String(" written file to DB (id: ") + (double)(exp.getPersistenceId()) + ")"); } return EXECUTION_OK; }
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input/output files String in(getStringOption_("in")); String out(getStringOption_("out")); //------------------------------------------------------------- // loading input //------------------------------------------------------------- MSExperiment<> exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); //------------------------------------------------------------- // if meta data arrays are present, remove them and warn //------------------------------------------------------------- if (exp.clearMetaDataArrays()) { writeLog_("Warning: Spectrum meta data arrays cannot be sorted. They are deleted."); } //------------------------------------------------------------- // filter //------------------------------------------------------------- Param filter_param = getParam_().copy("algorithm:", true); writeDebug_("Used filter parameters", filter_param, 3); BernNorm filter; filter.setParameters(filter_param); filter.filterPeakMap(exp); //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing::FILTERING)); f.store(out, exp); return EXECUTION_OK; }
int main(int argc, const char** argv) { if (argc < 2) return 1; // the path to the data should be given on the command line String tutorial_data_path(argv[1]); MzXMLFile mzxml; MzMLFile mzml; // temporary data storage MSExperiment<Peak1D> map; // convert MzXML to MzML mzxml.load(tutorial_data_path + "/data/Tutorial_FileIO.mzXML", map); mzml.store("Tutorial_FileIO.mzML", map); return 0; } //end of main
int main(int argc, const char** argv) { if (argc < 2) return 1; // the path to the data should be given on the command line String tutorial_data_path(argv[1]); MSExperiment spectra; MzMLFile f; // load mzML from code examples folder f.load(tutorial_data_path + "/data/Tutorial_GaussFilter.mzML", spectra); // iterate over map and output MS2 precursor information for (auto s_it = spectra.begin(); s_it != spectra.end(); ++s_it) { // we are only interested in MS2 spectra so we skip all other levels if (s_it->getMSLevel() != 2) continue; // get a reference to the precursor information const MSSpectrum& spectrum = *s_it; const vector<Precursor>& precursors = spectrum.getPrecursors(); // size check & throw exception if needed if (precursors.empty()) throw Exception::InvalidSize(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, precursors.size()); // get m/z and intensity of precursor double precursor_mz = precursors[0].getMZ(); float precursor_int = precursors[0].getIntensity(); // retrieve the precursor spectrum (the most recent MS1 spectrum) PeakMap::ConstIterator precursor_spectrum = spectra.getPrecursorSpectrum(s_it); double precursor_rt = precursor_spectrum->getRT(); // output precursor information std::cout << " precusor m/z: " << precursor_mz << " intensity: " << precursor_int << " retention time (sec.): " << precursor_rt << std::endl; } return 0; } // end of main
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input/output files String in(getStringOption_("in")); String out(getStringOption_("out")); //------------------------------------------------------------- // loading input //------------------------------------------------------------- MSExperiment<> exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); //------------------------------------------------------------- // filter //------------------------------------------------------------- Param filter_param = getParam_().copy("algorithm:", true); writeDebug_("Used filter parameters", filter_param, 3); Normalizer filter; filter.setParameters(filter_param); filter.filterPeakMap(exp); //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing::FILTERING)); f.store(out, exp); return EXECUTION_OK; }
ExitCodes main_(int, const char **) { //---------------------------------------------------------------- // load data //---------------------------------------------------------------- String in = getStringOption_("in"); String out = getStringOption_("out"); MSExperiment<> exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); DoubleReal sampling_rate = getDoubleOption_("sampling_rate"); LinearResampler lin_resampler; Param resampler_param; resampler_param.setValue("spacing", sampling_rate); lin_resampler.setParameters(resampler_param); // resample every scan for (Size i = 0; i < exp.size(); ++i) { lin_resampler.raster(exp[i]); } //clear meta data because they are no longer meaningful exp.clearMetaDataArrays(); //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing::DATA_PROCESSING)); //store output f.store(out, exp); return EXECUTION_OK; }
ExitCodes main_(int , const char**) override { String in = getStringOption_("in"); String out = getStringOption_("out"); MapType exp; MapType out_exp; Param picker_param = getParam_().copy("algorithm:", true); MzMLFile f; f.setLogType(log_type_); f.load(in,exp); PeakPickerIterative pp; pp.setParameters(picker_param); pp.setLogType(log_type_); pp.pickExperiment(exp, out_exp); addDataProcessing_(out_exp, getProcessingInfo_(DataProcessing::PEAK_PICKING)); f.store(out,out_exp); return EXECUTION_OK; }
ExitCodes main_(int, const char **) { //---------------------------------------------------------------- // load data //---------------------------------------------------------------- String in = getStringOption_("in"); String in_featureXML = getStringOption_("in_featureXML"); String out = getStringOption_("out"); String format = getStringOption_("out_type"); if (format.trim() == "") // get from filename { try { format = out.suffix('.'); } catch (Exception::ElementNotFound & /*e*/) { format = "nosuffix"; } StringListUtils::toUpper(out_formats_); if (!ListUtils::contains(out_formats_, format.toUpper())) { LOG_ERROR << "No explicit image output format was provided via 'out_type', and the suffix ('" << format << "') does not resemble a valid type. Please fix one of them." << std::endl; return ILLEGAL_PARAMETERS; } } MSExperiment<> exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); exp.updateRanges(1); SignedSize rows = getIntOption_("height"); if (rows == 0) { rows = exp.size(); } if (rows <= 0) { writeLog_("Error: Zero rows is not possible."); return ILLEGAL_PARAMETERS; } SignedSize cols = getIntOption_("width"); if (cols == 0) { cols = UInt(ceil(exp.getMaxMZ() - exp.getMinMZ())); } if (cols <= 0) { writeLog_("Error: Zero columns is not possible."); return ILLEGAL_PARAMETERS; } //---------------------------------------------------------------- //Do the actual resampling BilinearInterpolation<double, double> bilip; bilip.getData().resize(rows, cols); if (!getFlag_("transpose")) { // scans run bottom-up: bilip.setMapping_0(0, exp.getMaxRT(), rows - 1, exp.getMinRT()); // peaks run left-right: bilip.setMapping_1(0, exp.getMinMZ(), cols - 1, exp.getMaxMZ()); for (MSExperiment<>::Iterator spec_iter = exp.begin(); spec_iter != exp.end(); ++spec_iter) { if (spec_iter->getMSLevel() != 1) continue; for (MSExperiment<>::SpectrumType::ConstIterator peak1_iter = spec_iter->begin(); peak1_iter != spec_iter->end(); ++peak1_iter) { bilip.addValue(spec_iter->getRT(), peak1_iter->getMZ(), peak1_iter->getIntensity()); } } } else // transpose { // spectra run bottom-up: bilip.setMapping_0(0, exp.getMaxMZ(), rows - 1, exp.getMinMZ()); // scans run left-right: bilip.setMapping_1(0, exp.getMinRT(), cols - 1, exp.getMaxRT()); for (MSExperiment<>::Iterator spec_iter = exp.begin(); spec_iter != exp.end(); ++spec_iter) { if (spec_iter->getMSLevel() != 1) continue; for (MSExperiment<>::SpectrumType::ConstIterator peak1_iter = spec_iter->begin(); peak1_iter != spec_iter->end(); ++peak1_iter) { bilip.addValue(peak1_iter->getMZ(), spec_iter->getRT(), peak1_iter->getIntensity()); } } } //---------------------------------------------------------------- //create and store image int scans = (int) bilip.getData().sizePair().first; int peaks = (int) bilip.getData().sizePair().second; MultiGradient gradient; String gradient_str = getStringOption_("gradient"); if (gradient_str != "") { gradient.fromString(String("Linear|") + gradient_str); } else { gradient.fromString("Linear|0,#FFFFFF;2,#FFFF00;11,#FFAA00;32,#FF0000;55,#AA00FF;78,#5500FF;100,#000000"); } bool use_log = getFlag_("log_intensity"); writeDebug_("log_intensity: " + String(use_log), 1); QImage image(peaks, scans, QImage::Format_RGB32); string s = getStringOption_("background_color"); QColor background_color(s.c_str()); string feature_color_string = getStringOption_("feature_color"); QColor feature_color(feature_color_string.c_str()); QPainter * painter = new QPainter(&image); painter->setPen(background_color); painter->fillRect(0, 0, peaks, scans, Qt::SolidPattern); delete painter; double factor = getDoubleOption_("max_intensity"); if (factor == 0) { factor = (*std::max_element(bilip.getData().begin(), bilip.getData().end())); } // logarithmize max. intensity as well: if (use_log) factor = std::log(factor); factor /= 100.0; for (int i = 0; i < scans; ++i) { for (int j = 0; j < peaks; ++j) { double value = bilip.getData().getValue(i, j); if (use_log) value = std::log(value); if (value > 1e-4) { image.setPixel(j, i, gradient.interpolatedColorAt(value / factor).rgb()); } else { image.setPixel(j, i, background_color.rgb()); } } } if (getFlag_("precursors")) { markMS2Locations_(exp, image, getFlag_("transpose"), getStringOption_("precursor_color").toQString(), Size(getIntOption_("precursor_size"))); } if (!in_featureXML.empty()) { FeatureMap feature_map; FeatureXMLFile ff; ff.load(in_featureXML, feature_map); markFeatureLocations_(feature_map, exp, image, getFlag_("transpose"), feature_color); } if (image.save(out.toQString(), format.c_str())) return EXECUTION_OK; else return CANNOT_WRITE_OUTPUT_FILE; }
ExitCodes main_(int, const char**) { //input file names String in = getStringOption_("in"); String out = getStringOption_("out"); String out_mzq = getStringOption_("out_mzq"); //prevent loading of fragment spectra PeakFileOptions options; options.setMSLevels(vector<Int>(1, 1)); //reading input data MzMLFile f; f.getOptions() = options; f.setLogType(log_type_); PeakMap exp; f.load(in, exp); exp.updateRanges(); if (exp.getSpectra().empty()) { throw OpenMS::Exception::FileEmpty(__FILE__, __LINE__, __FUNCTION__, "Error: No MS1 spectra in input file."); } // determine type of spectral data (profile or centroided) SpectrumSettings::SpectrumType spectrum_type = exp[0].getType(); if (spectrum_type == SpectrumSettings::RAWDATA) { if (!getFlag_("force")) { throw OpenMS::Exception::IllegalArgument(__FILE__, __LINE__, __FUNCTION__, "Error: Profile data provided but centroided spectra expected. To enforce processing of the data set the -force flag."); } } //load seeds FeatureMap seeds; if (getStringOption_("seeds") != "") { FeatureXMLFile().load(getStringOption_("seeds"), seeds); } //setup of FeatureFinder FeatureFinder ff; ff.setLogType(log_type_); // A map for the resulting features FeatureMap features; // get parameters specific for the feature finder Param feafi_param = getParam_().copy("algorithm:", true); writeDebug_("Parameters passed to FeatureFinder", feafi_param, 3); // Apply the feature finder ff.run(FeatureFinderAlgorithmPicked::getProductName(), exp, features, feafi_param, seeds); features.applyMemberFunction(&UniqueIdInterface::setUniqueId); // DEBUG if (debug_level_ > 10) { FeatureMap::Iterator it; for (it = features.begin(); it != features.end(); ++it) { if (!it->isMetaEmpty()) { vector<String> keys; it->getKeys(keys); LOG_INFO << "Feature " << it->getUniqueId() << endl; for (Size i = 0; i < keys.size(); i++) { LOG_INFO << " " << keys[i] << " = " << it->getMetaValue(keys[i]) << endl; } } } } //------------------------------------------------------------- // writing files //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(features, getProcessingInfo_(DataProcessing::QUANTITATION)); // write features to user specified output file FeatureXMLFile map_file; // Remove detailed convex hull information and subordinate features // (unless requested otherwise) to reduce file size of feature files // unless debugging is turned on. if (debug_level_ < 5) { FeatureMap::Iterator it; for (it = features.begin(); it != features.end(); ++it) { it->getConvexHull().expandToBoundingBox(); for (Size i = 0; i < it->getConvexHulls().size(); ++i) { it->getConvexHulls()[i].expandToBoundingBox(); } it->getSubordinates().clear(); } } map_file.store(out, features); if (!out_mzq.trim().empty()) { MSQuantifications msq(features, exp.getExperimentalSettings(), exp[0].getDataProcessing()); msq.assignUIDs(); MzQuantMLFile file; file.store(out_mzq, msq); } return EXECUTION_OK; }
ExitCodes main_(int, const char**) { ExitCodes ret = checkParameters_(); if (ret != EXECUTION_OK) return ret; MapAlignmentAlgorithmSpectrumAlignment algorithm; Param algo_params = getParam_().copy("algorithm:", true); algorithm.setParameters(algo_params); algorithm.setLogType(log_type_); StringList ins = getStringList_("in"); StringList outs = getStringList_("out"); StringList trafos = getStringList_("trafo_out"); Param model_params = getParam_().copy("model:", true); String model_type = model_params.getValue("type"); model_params = model_params.copy(model_type + ":", true); std::vector<TransformationDescription> transformations; //------------------------------------------------------------- // perform peak alignment //------------------------------------------------------------- ProgressLogger progresslogger; progresslogger.setLogType(log_type_); // load input std::vector<MSExperiment<> > peak_maps(ins.size()); MzMLFile f; f.setLogType(log_type_); progresslogger.startProgress(0, ins.size(), "loading input files"); for (Size i = 0; i < ins.size(); ++i) { progresslogger.setProgress(i); f.load(ins[i], peak_maps[i]); } progresslogger.endProgress(); // try to align algorithm.align(peak_maps, transformations); if (model_type != "none") { for (vector<TransformationDescription>::iterator it = transformations.begin(); it != transformations.end(); ++it) { it->fitModel(model_type, model_params); } } // write output progresslogger.startProgress(0, outs.size(), "applying RT transformations and writing output files"); for (Size i = 0; i < outs.size(); ++i) { progresslogger.setProgress(i); MapAlignmentTransformer::transformRetentionTimes(peak_maps[i], transformations[i]); // annotate output with data processing info addDataProcessing_(peak_maps[i], getProcessingInfo_(DataProcessing::ALIGNMENT)); f.store(outs[i], peak_maps[i]); } progresslogger.endProgress(); if (!trafos.empty()) { TransformationXMLFile trafo_file; for (Size i = 0; i < transformations.size(); ++i) { trafo_file.store(trafos[i], transformations[i]); } } return EXECUTION_OK; }
ExitCodes main_(int, const char **) { // data to be passed through the algorithm vector<vector<SILACPattern> > data; MSQuantifications msq; vector<Clustering *> cluster_data; // // Parameter handling // map<String, DoubleReal> label_identifiers; // list defining the mass shifts of each label (e.g. "Arg6" => 6.0201290268) handleParameters_sample(); handleParameters_algorithm(); handleParameters_labels(label_identifiers); handleParameters(); if (selected_labels.empty() && !out.empty()) // incompatible parameters { writeLog_("Error: The 'out' parameter cannot be used without a label (parameter 'sample:labels'). Use 'out_features' instead."); return ILLEGAL_PARAMETERS; } // // Initializing the SILACAnalzer with our parameters // SILACAnalyzer analyzer; analyzer.setLogType(log_type_); analyzer.initialize( // section "sample" selected_labels, charge_min, charge_max, missed_cleavages, isotopes_per_peptide_min, isotopes_per_peptide_max, // section "algorithm" rt_threshold, rt_min, intensity_cutoff, intensity_correlation, model_deviation, allow_missing_peaks, // labels label_identifiers); //-------------------------------------------------- // loading input from .mzML //-------------------------------------------------- MzMLFile file; MSExperiment<Peak1D> exp; // only read MS1 spectra ... /* std::vector<int> levels; levels.push_back(1); file.getOptions().setMSLevels(levels); */ LOG_DEBUG << "Loading input..." << endl; file.setLogType(log_type_); file.load(in, exp); // set size of input map exp.updateRanges(); // extract level 1 spectra exp.getSpectra().erase(remove_if(exp.begin(), exp.end(), InMSLevelRange<MSExperiment<Peak1D>::SpectrumType>(IntList::create("1"), true)), exp.end()); // sort according to RT and MZ exp.sortSpectra(); if (out_mzq != "") { vector<vector<String> > SILAClabels = analyzer.getSILAClabels(); // list of SILAC labels, e.g. selected_labels="[Lys4,Arg6][Lys8,Arg10]" => SILAClabels[0][1]="Arg6" std::vector<std::vector<std::pair<String, DoubleReal> > > labels; //add none label labels.push_back(std::vector<std::pair<String, DoubleReal> >(1, std::make_pair<String, DoubleReal>(String("none"), DoubleReal(0)))); for (Size i = 0; i < SILAClabels.size(); ++i) //SILACLabels MUST be in weight order!!! { std::vector<std::pair<String, DoubleReal> > one_label; for (UInt j = 0; j < SILAClabels[i].size(); ++j) { one_label.push_back(*(label_identifiers.find(SILAClabels[i][j]))); // this dereferencing would break if all SILAClabels would not have been checked before! } labels.push_back(one_label); } msq.registerExperiment(exp, labels); //add assays msq.assignUIDs(); } MSQuantifications::QUANT_TYPES quant_type = MSQuantifications::MS1LABEL; msq.setAnalysisSummaryQuantType(quant_type); //add analysis_summary_ //-------------------------------------------------- // estimate peak width //-------------------------------------------------- LOG_DEBUG << "Estimating peak width..." << endl; PeakWidthEstimator::Result peak_width; try { peak_width = analyzer.estimatePeakWidth(exp); } catch (Exception::InvalidSize &) { writeLog_("Error: Unable to estimate peak width of input data."); return INCOMPATIBLE_INPUT_DATA; } if (in_filters == "") { //-------------------------------------------------- // filter input data //-------------------------------------------------- LOG_DEBUG << "Filtering input data..." << endl; analyzer.filterData(exp, peak_width, data); //-------------------------------------------------- // store filter results //-------------------------------------------------- if (out_filters != "") { LOG_DEBUG << "Storing filtering results..." << endl; ConsensusMap map; for (std::vector<std::vector<SILACPattern> >::const_iterator it = data.begin(); it != data.end(); ++it) { analyzer.generateFilterConsensusByPattern(map, *it); } analyzer.writeConsensus(out_filters, map); } } else { //-------------------------------------------------- // load filter results //-------------------------------------------------- LOG_DEBUG << "Loading filtering results..." << endl; ConsensusMap map; analyzer.readConsensus(in_filters, map); analyzer.readFilterConsensusByPattern(map, data); } //-------------------------------------------------- // clustering //-------------------------------------------------- LOG_DEBUG << "Clustering data..." << endl; analyzer.clusterData(exp, peak_width, cluster_data, data); //-------------------------------------------------------------- // write output //-------------------------------------------------------------- if (out_debug != "") { LOG_DEBUG << "Writing debug output file..." << endl; std::ofstream out((out_debug + ".clusters.csv").c_str()); vector<vector<DoubleReal> > massShifts = analyzer.getMassShifts(); // list of mass shifts // generate header out << std::fixed << std::setprecision(8) << "ID,RT,MZ_PEAK,CHARGE"; for (UInt i = 1; i <= massShifts[0].size(); ++i) { out << ",DELTA_MASS_" << i + 1; } for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",INT_PEAK_" << i + 1 << '_' << j; } } out << ",MZ_RAW"; for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",INT_RAW_" << i + 1 << '_' << j; } } for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",MZ_RAW_" << i + 1 << '_' << j; } } out << '\n'; // write data UInt cluster_id = 0; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterDebug(out, **it, cluster_id); } } if (out != "") { LOG_DEBUG << "Generating output consensus map..." << endl; ConsensusMap map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterConsensusByCluster(map, **it); } LOG_DEBUG << "Adding meta data..." << endl; // XXX: Need a map per mass shift ConsensusMap::FileDescriptions& desc = map.getFileDescriptions(); Size id = 0; for (ConsensusMap::FileDescriptions::iterator it = desc.begin(); it != desc.end(); ++it) { if (test_mode_) it->second.filename = in; // skip path, since its not cross platform and complicates verification else it->second.filename = File::basename(in); // Write correct label // (this would crash if used without a label!) if (id > 0) it->second.label = StringList(analyzer.getSILAClabels()[id - 1]).concatenate(""); // skip first round (empty label is not listed) ++id; } std::set<DataProcessing::ProcessingAction> actions; actions.insert(DataProcessing::DATA_PROCESSING); actions.insert(DataProcessing::PEAK_PICKING); actions.insert(DataProcessing::FILTERING); actions.insert(DataProcessing::QUANTITATION); addDataProcessing_(map, getProcessingInfo_(actions)); analyzer.writeConsensus(out, map); if (out_mzq != "") { LOG_DEBUG << "Generating output mzQuantML file..." << endl; ConsensusMap numap(map); //calc. ratios for (ConsensusMap::iterator cit = numap.begin(); cit != numap.end(); ++cit) { //~ make ratio templates std::vector<ConsensusFeature::Ratio> rts; for (std::vector<MSQuantifications::Assay>::const_iterator ait = msq.getAssays().begin() + 1; ait != msq.getAssays().end(); ++ait) { ConsensusFeature::Ratio r; r.numerator_ref_ = String(msq.getAssays().begin()->uid_); r.denominator_ref_ = String(ait->uid_); r.description_.push_back("Simple ratio calc"); r.description_.push_back("light to medium/.../heavy"); //~ "<cvParam cvRef=\"PSI-MS\" accession=\"MS:1001132\" name=\"peptide ratio\"/>" rts.push_back(r); } const ConsensusFeature::HandleSetType& feature_handles = cit->getFeatures(); if (feature_handles.size() > 1) { std::set<FeatureHandle, FeatureHandle::IndexLess>::const_iterator fit = feature_handles.begin(); // this is unlabeled fit++; for (; fit != feature_handles.end(); ++fit) { Size ri = std::distance(feature_handles.begin(), fit); rts[ri - 1].ratio_value_ = feature_handles.begin()->getIntensity() / fit->getIntensity(); // a proper silacalanyzer algo should never have 0-intensities so no 0devison ... } } cit->setRatios(rts); } msq.addConsensusMap(numap); //add SILACAnalyzer result //~ msq.addFeatureMap();//add SILACAnalyzer evidencetrail as soon as clear what is realy contained in the featuremap //~ add AuditCollection - no such concept in TOPPTools yet analyzer.writeMzQuantML(out_mzq, msq); } } if (out_clusters != "") { LOG_DEBUG << "Generating cluster output file..." << endl; ConsensusMap map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { UInt cluster_id = 0; analyzer.generateClusterConsensusByPattern(map, **it, cluster_id); } ConsensusMap::FileDescription & desc = map.getFileDescriptions()[0]; desc.filename = in; desc.label = "Cluster"; analyzer.writeConsensus(out_clusters, map); } if (out_features != "") { LOG_DEBUG << "Generating output feature map..." << endl; FeatureMap<> map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterFeatureByCluster(map, **it); } analyzer.writeFeatures(out_features, map); } return EXECUTION_OK; }
ExitCodes main_(int, const char**) override { String tmp_dir = QDir::toNativeSeparators((File::getTempDirectory() + "/" + File::getUniqueName() + "/").toQString()); // body for the tmp files { QDir d; d.mkpath(tmp_dir.toQString()); } String logfile(getStringOption_("log")); String myrimatch_executable(getStringOption_("myrimatch_executable")); //------------------------------------------------------------- // get version of MyriMatch //------------------------------------------------------------- QProcess qp; String myrimatch_version; MyriMatchVersion myrimatch_version_i; // we invoke myrimatch w/o arguments. that yields a return code != 0. but // there is no other way for version 2.1 to get the version number qp.start(myrimatch_executable.toQString(), QStringList(), QIODevice::ReadOnly); // does automatic escaping etc... qp.waitForFinished(); String output(QString(qp.readAllStandardOutput())); vector<String> lines; vector<String> version_split; output.split('\n', lines); // the version number is expected to be in the second line if (lines.size() < 2) { writeLog_("Warning: MyriMatch version output (" + output + ") not formatted as expected!"); return EXTERNAL_PROGRAM_ERROR; } // the version is expected to be something like: // MyriMatch 2.1.111 (2011-12-27) lines[1].split(' ', version_split); if (version_split.size() == 3 && getVersion_(version_split[1], myrimatch_version_i)) { myrimatch_version = version_split[1].removeWhitespaces(); writeDebug_("Setting MyriMatch version to " + myrimatch_version, 1); } else { writeLog_("Warning: MyriMatch version output (" + output + ") not formatted as expected!"); return EXTERNAL_PROGRAM_ERROR; } if (! ( (myrimatch_version_i.myrimatch_major == 2) && // major must be 2 (myrimatch_version_i.myrimatch_minor == 1 || myrimatch_version_i.myrimatch_minor == 2) // minor .1 or .2 )) { writeLog_("Warning: unsupported MyriMatch version (" + myrimatch_version + "). Tested only for MyriMatch 2.1.x and 2.2.x." "\nIf you encounter parameter errors, you can try the flag 'ignoreConfigErrors', but be aware that MyriMatch might be misconfigured."); } //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String inputfile_name = File::absolutePath(getStringOption_("in")); String outputfile_name = getStringOption_("out"); String db_name = File::absolutePath(String(getStringOption_("database"))); // building parameter String StringList parameters; if (getFlag_("ignoreConfigErrors")) parameters << "-ignoreConfigErrors"; // Common Identification engine options StringList static_mod_list; StringList dynamic_mod_list; translateModifications(static_mod_list, dynamic_mod_list); if (!static_mod_list.empty()) parameters << "-StaticMods" << ListUtils::concatenate(static_mod_list, " "); if (!dynamic_mod_list.empty()) parameters << "-DynamicMods" << ListUtils::concatenate(dynamic_mod_list, " "); parameters << "-ProteinDatabase" << File::absolutePath(db_name); if (getFlag_("precursor_mass_tolerance_avg")) { parameters << "-AvgPrecursorMzTolerance"; } else { parameters << "-MonoPrecursorMzTolerance"; } String precursor_mass_tolerance_unit = getStringOption_("precursor_mass_tolerance_unit") == "Da" ? " m/z" : " ppm"; parameters << String(getDoubleOption_("precursor_mass_tolerance")) + precursor_mass_tolerance_unit; String fragment_mass_tolerance_unit = getStringOption_("fragment_mass_tolerance_unit"); if (fragment_mass_tolerance_unit == "Da") { fragment_mass_tolerance_unit = "m/z"; } parameters << "-FragmentMzTolerance" << String(getDoubleOption_("fragment_mass_tolerance")) + " " + fragment_mass_tolerance_unit; StringList slf = getStringList_("SpectrumListFilters"); if (slf.size() > 0) { if (myrimatch_version_i.myrimatch_minor <= 1) { // use quotes around the slf arguments (will be added automatically by Qt during call), i.e. "-SpectrumListFilters" "peakPicking false 2-" parameters << "-SpectrumListFilters" << ListUtils::concatenate(slf, ";") << ""; } else { // no quotes -- pass a single argument, i.e. "-SpectrumListFilters peakPicking false 2-" parameters << "-SpectrumListFilters " + ListUtils::concatenate(slf, ";") << ""; } } //parameters << "-ThreadCountMultiplier" << String(getIntOption_("threads")); // MyriMatch does not recognise this, even though it's in the manual. // MyriMatch specific parameters parameters << "-NumChargeStates" << getIntOption_("NumChargeStates"); parameters << "-TicCutoffPercentage" << String(getDoubleOption_("TicCutoffPercentage")); parameters << "-MaxDynamicMods" << getIntOption_("MaxDynamicMods"); parameters << "-MaxResultRank" << getIntOption_("MaxResultRank"); parameters << "-MinTerminiCleavages" << getIntOption_("MinTerminiCleavages"); parameters << "-MaxMissedCleavages" << getIntOption_("MaxMissedCleavages"); String cleavage_rule = getStringOption_("CleavageRules"); if (cleavage_rule.empty()) { cleavage_rule = "Trypsin/P"; } parameters << "-CleavageRules" << cleavage_rule; // advanced parameters parameters << "-MinPeptideMass" << getDoubleOption_("MinPeptideMass"); parameters << "-MaxPeptideMass" << getDoubleOption_("MaxPeptideMass"); parameters << "-MinPeptideLength" << getIntOption_("MinPeptideLength"); parameters << "-MaxPeptideLength" << getIntOption_("MaxPeptideLength"); parameters << "-NumIntensityClasses" << getIntOption_("NumIntensityClasses"); parameters << "-ClassSizeMultiplier" << getDoubleOption_("ClassSizeMultiplier"); parameters << "-MonoisotopeAdjustmentSet" << getStringOption_("MonoisotopeAdjustmentSet"); parameters << "-cpus" << getIntOption_("threads"); // Constant parameters // DecoyPrefix worked only when set through the config file String cfg_file = tmp_dir + "myrimatch.cfg"; ofstream f(cfg_file.c_str()); f << "DecoyPrefix=\"\"\n"; f.close(); parameters << "-cfg" << cfg_file; // path to input file must be the last parameter parameters << inputfile_name; //------------------------------------------------------------- // calculations //------------------------------------------------------------- QStringList qparam; writeDebug_("MyriMatch arguments:", 1); writeDebug_(String("\"") + ListUtils::concatenate(parameters, "\" \"") + "\"", 1); for (Size i = 0; i < parameters.size(); ++i) { qparam << parameters[i].toQString(); } QProcess process; // Bad style, because it breaks relative paths? process.setWorkingDirectory(tmp_dir.toQString()); process.start(myrimatch_executable.toQString(), qparam, QIODevice::ReadOnly); bool success = process.waitForFinished(-1); String myri_msg(QString(process.readAllStandardOutput())); String myri_err(QString(process.readAllStandardError())); writeDebug_(myri_msg, 1); writeDebug_(myri_err, 0); if (!success || process.exitStatus() != 0 || process.exitCode() != 0) { writeLog_("Error: MyriMatch problem! (Details can be seen in the logfile: \"" + logfile + "\")"); writeLog_("Note: This message can also be triggered if you run out of space in your tmp directory"); return EXTERNAL_PROGRAM_ERROR; } //------------------------------------------------------------- // reading MyriMatch output //------------------------------------------------------------- writeDebug_("Reading output of MyriMatch", 5); String exp_name = File::basename(inputfile_name); String pep_file = tmp_dir + File::removeExtension(exp_name) + ".pepXML"; vector<ProteinIdentification> protein_identifications; vector<PeptideIdentification> peptide_identifications; PeakMap exp; if (File::exists(pep_file)) { MzMLFile fh; fh.load(inputfile_name, exp); SpectrumMetaDataLookup lookup; lookup.readSpectra(exp.getSpectra()); PepXMLFile().load(pep_file, protein_identifications, peptide_identifications, exp_name, lookup); } else { writeLog_("Error: MyriMatch problem! No pepXML output file (expected as '" + pep_file + "') was generated by MyriMatch."); writeLog_("Note: This message can be triggered if no MS2 spectra were found or no identifications were made."); writeLog_(" Myrimatch expects MS2 spectra in mzML files to contain the MSn tag. MSSpectrum with MS level 2 is not sufficient. You can use FileConverter to create such an mzML file by converting from mzML --> mzXML --> mzML."); return EXTERNAL_PROGRAM_ERROR; } if (debug_level_ == 0) { QFile(pep_file.toQString()).remove(); QFile(cfg_file.toQString()).remove(); } else { writeDebug_(String("Not removing '") + pep_file + "' for debugging purposes. Please delete manually!", 1); writeDebug_(String("Not removing '") + cfg_file + "' for debugging purposes. Please delete manually!", 1); } //------------------------------------------------------------- // writing results //------------------------------------------------------------- ProteinIdentification::SearchParameters search_parameters; search_parameters.db = getStringOption_("database"); ProteinIdentification::PeakMassType mass_type = getFlag_("precursor_mass_tolerance_avg") == true ? ProteinIdentification::AVERAGE : ProteinIdentification::MONOISOTOPIC; search_parameters.mass_type = mass_type; search_parameters.fixed_modifications = getStringList_("fixed_modifications"); search_parameters.variable_modifications = getStringList_("variable_modifications"); search_parameters.missed_cleavages = getIntOption_("MaxMissedCleavages"); search_parameters.fragment_mass_tolerance = getDoubleOption_("fragment_mass_tolerance"); search_parameters.precursor_mass_tolerance = getDoubleOption_("precursor_mass_tolerance"); search_parameters.precursor_mass_tolerance_ppm = getStringOption_("precursor_mass_tolerance_unit") == "ppm" ? true : false; search_parameters.fragment_mass_tolerance_ppm = getStringOption_("fragment_mass_tolerance_unit") == "ppm" ? true : false; protein_identifications[0].setSearchParameters(search_parameters); protein_identifications[0].setSearchEngineVersion(myrimatch_version); protein_identifications[0].setSearchEngine("MyriMatch"); if (!protein_identifications.empty()) { StringList ms_runs; exp.getPrimaryMSRunPath(ms_runs); protein_identifications[0].setPrimaryMSRunPath(ms_runs); } IdXMLFile().store(outputfile_name, protein_identifications, peptide_identifications); return EXECUTION_OK; }
ExitCodes main_(int, const char **) override { //------------------------------------------------------------- // Parsing parameters //------------------------------------------------------------- String in = getStringOption_("in"); String out_sirius = getStringOption_("out_sirius"); String out_csifingerid = getStringOption_("out_fingerid"); // needed for counting int number_compounds = getIntOption_("number"); // Parameter for Sirius3 QString executable = getStringOption_("executable").toQString(); const QString profile = getStringOption_("profile").toQString(); const QString elements = getStringOption_("elements").toQString(); const QString database = getStringOption_("database").toQString(); const QString isotope = getStringOption_("isotope").toQString(); const QString noise = QString::number(getIntOption_("noise")); const QString ppm_max = QString::number(getIntOption_("ppm_max")); const QString candidates = QString::number(getIntOption_("candidates")); bool auto_charge = getFlag_("auto_charge"); bool no_recalibration = getFlag_("no_recalibration"); bool iontree = getFlag_("iontree"); //------------------------------------------------------------- // Determination of the Executable //------------------------------------------------------------- // Parameter executable not provided if (executable.isEmpty()) { const QProcessEnvironment env; const QString & qsiriuspathenv = env.systemEnvironment().value("SIRIUS_PATH"); if (qsiriuspathenv.isEmpty()) { writeLog_( "FATAL: Executable of Sirius could not be found. Please either use SIRIUS_PATH env variable or provide with -executable"); return MISSING_PARAMETERS; } executable = qsiriuspathenv; } // Normalize file path QFileInfo file_info(executable); executable = file_info.canonicalFilePath(); writeLog_("Executable is: " + executable); const QString & path_to_executable = File::path(executable).toQString(); //------------------------------------------------------------- // Calculations //------------------------------------------------------------- PeakMap spectra; MzMLFile f; f.setLogType(log_type_); f.load(in, spectra); std::vector<String> subdirs; QString tmp_base_dir = File::getTempDirectory().toQString(); QString tmp_dir = QDir(tmp_base_dir).filePath(File::getUniqueName().toQString()); String tmp_ms_file = QDir(tmp_base_dir).filePath((File::getUniqueName() + ".ms").toQString()); String out_dir = QDir(tmp_dir).filePath("sirius_out"); //Write msfile SiriusMSFile::store(spectra, tmp_ms_file); // Assemble SIRIUS parameters QStringList process_params; process_params << "-p" << profile << "-e" << elements << "-d" << database << "-s" << isotope << "--noise" << noise << "--candidates" << candidates << "--ppm-max" << ppm_max << "--quiet" << "--output" << out_dir.toQString(); //internal output folder for temporary SIRIUS output file storage // Add flags if (no_recalibration) { process_params << "--no-recalibration"; } if (!out_csifingerid.empty()) { process_params << "--fingerid"; } if (iontree) { process_params << "--iontree"; } if (auto_charge) { process_params << "--auto-charge"; } process_params << tmp_ms_file.toQString(); // The actual process QProcess qp; qp.setWorkingDirectory(path_to_executable); //since library paths are relative to sirius executable path qp.start(executable, process_params); // does automatic escaping etc... start std::stringstream ss; ss << "COMMAND: " << executable.toStdString(); for (QStringList::const_iterator it = process_params.begin(); it != process_params.end(); ++it) { ss << " " << it->toStdString(); } LOG_DEBUG << ss.str() << endl; writeLog_("Executing: " + String(executable)); writeLog_("Working Dir is: " + path_to_executable); const bool success = qp.waitForFinished(-1); // wait till job is finished qp.close(); if (success == false || qp.exitStatus() != 0 || qp.exitCode() != 0) { writeLog_( "FATAL: External invocation of Sirius failed. Standard output and error were:"); const QString sirius_stdout(qp.readAllStandardOutput()); const QString sirius_stderr(qp.readAllStandardOutput()); writeLog_(sirius_stdout); writeLog_(sirius_stderr); writeLog_(String(qp.exitCode())); return EXTERNAL_PROGRAM_ERROR; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //Extract path to subfolders (sirius internal folder structure) QDirIterator it(out_dir.toQString(), QDir::Dirs | QDir::NoDotAndDotDot, QDirIterator::NoIteratorFlags); while (it.hasNext()) { subdirs.push_back(it.next()); } //sort vector path list std::sort(subdirs.begin(), subdirs.end(), sortByScanIndex); //Convert sirius_output to mztab and store file MzTab sirius_result; MzTabFile siriusfile; SiriusMzTabWriter::read(subdirs, number_compounds, sirius_result); siriusfile.store(out_sirius, sirius_result); //Convert sirius_output to mztab and store file if (out_csifingerid.empty() == false) { MzTab csi_result; MzTabFile csifile; CsiFingerIdMzTabWriter::read(subdirs, number_compounds, csi_result); csifile.store(out_csifingerid, csi_result); } //clean tmp directory if debug level < 2 if (debug_level_ >= 2) { writeDebug_("Keeping temporary files in directory '" + String(tmp_dir) + " and msfile at this location "+ tmp_ms_file + ". Set debug level to 1 or lower to remove them.", 2); } else { if (tmp_dir.isEmpty() == false) { writeDebug_("Deleting temporary directory '" + String(tmp_dir) + "'. Set debug level to 2 or higher to keep it.", 0); File::removeDir(tmp_dir); } if (tmp_ms_file.empty() == false) { writeDebug_("Deleting temporary msfile '" + tmp_ms_file + "'. Set debug level to 2 or higher to keep it.", 0); File::remove(tmp_ms_file); // remove msfile } } return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input file names String in = getStringOption_("in"); //input file type FileHandler fh; FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type")); if (in_type == FileTypes::UNKNOWN) { in_type = fh.getType(in); writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2); } if (in_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine input file type!"); return PARSE_ERROR; } //output file names and types String out = getStringOption_("out"); FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type")); if (out_type == FileTypes::UNKNOWN) { out_type = fh.getTypeByFileName(out); } if (out_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine output file type!"); return PARSE_ERROR; } bool TIC_DTA2D = getFlag_("TIC_DTA2D"); writeDebug_(String("Output file type: ") + FileTypes::typeToName(out_type), 1); //------------------------------------------------------------- // reading input //------------------------------------------------------------- typedef MSExperiment<Peak1D> MSExperimentType; MSExperimentType exp; typedef MSExperimentType::SpectrumType SpectrumType; typedef FeatureMap<> FeatureMapType; FeatureMapType fm; ConsensusMap cm; writeDebug_(String("Loading input file"), 1); if (in_type == FileTypes::CONSENSUSXML) { ConsensusXMLFile().load(in, cm); cm.sortByPosition(); if ((out_type != FileTypes::FEATUREXML) && (out_type != FileTypes::CONSENSUSXML)) { // You you will lose information and waste memory. Enough reasons to issue a warning! writeLog_("Warning: Converting consensus features to peaks. You will lose information!"); exp.set2DData(cm); } } else if (in_type == FileTypes::EDTA) { EDTAFile().load(in, cm); cm.sortByPosition(); if ((out_type != FileTypes::FEATUREXML) && (out_type != FileTypes::CONSENSUSXML)) { // You you will lose information and waste memory. Enough reasons to issue a warning! writeLog_("Warning: Converting consensus features to peaks. You will lose information!"); exp.set2DData(cm); } } else if (in_type == FileTypes::FEATUREXML || in_type == FileTypes::TSV || in_type == FileTypes::PEPLIST || in_type == FileTypes::KROENIK) { fh.loadFeatures(in, fm, in_type); fm.sortByPosition(); if ((out_type != FileTypes::FEATUREXML) && (out_type != FileTypes::CONSENSUSXML)) { // You will lose information and waste memory. Enough reasons to issue a warning! writeLog_("Warning: Converting features to peaks. You will lose information! Mass traces are added, if present as 'num_of_masstraces' and 'masstrace_intensity_<X>' (X>=0) meta values."); exp.set2DData<true>(fm); } } else { fh.loadExperiment(in, exp, in_type, log_type_); } //------------------------------------------------------------- // writing output //------------------------------------------------------------- writeDebug_(String("Writing output file"), 1); if (out_type == FileTypes::MZML) { //add data processing entry addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: CONVERSION_MZML)); MzMLFile f; f.setLogType(log_type_); ChromatogramTools().convertSpectraToChromatograms(exp, true); f.store(out, exp); } else if (out_type == FileTypes::MZDATA) { //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: CONVERSION_MZDATA)); MzDataFile f; f.setLogType(log_type_); ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp); f.store(out, exp); } else if (out_type == FileTypes::MZXML) { //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: CONVERSION_MZXML)); MzXMLFile f; f.setLogType(log_type_); ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp); f.store(out, exp); } else if (out_type == FileTypes::DTA2D) { //add data processing entry addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); DTA2DFile f; f.setLogType(log_type_); ChromatogramTools().convertChromatogramsToSpectra<MSExperimentType>(exp); if (TIC_DTA2D) { // store the total ion chromatogram (TIC) f.storeTIC(out, exp); } else { // store entire experiment f.store(out, exp); } } else if (out_type == FileTypes::MGF) { //add data processing entry addDataProcessing_(exp, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); MascotGenericFile f; f.setLogType(log_type_); f.store(out, exp); } else if (out_type == FileTypes::FEATUREXML) { if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) || (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK)) { fm.applyMemberFunction(&UniqueIdInterface::setUniqueId); } else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA) { ConsensusMap::convert(cm, true, fm); } else // not loaded as feature map or consensus map { // The feature specific information is only defaulted. Enough reasons to issue a warning! writeLog_("Warning: Converting peaks to features will lead to incomplete features!"); fm.clear(); fm.reserve(exp.getSize()); typedef FeatureMapType::FeatureType FeatureType; FeatureType feature; feature.setQuality(0, 1); // override default feature.setQuality(1, 1); // override default feature.setOverallQuality(1); // override default for (MSExperimentType::ConstIterator spec_iter = exp.begin(); spec_iter != exp.end(); ++spec_iter ) { feature.setRT(spec_iter->getRT()); for (SpectrumType::ConstIterator peak1_iter = spec_iter->begin(); peak1_iter != spec_iter->end(); ++peak1_iter ) { feature.setMZ(peak1_iter->getMZ()); feature.setIntensity(peak1_iter->getIntensity()); feature.setUniqueId(); fm.push_back(feature); } } fm.updateRanges(); } addDataProcessing_(fm, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); FeatureXMLFile().store(out, fm); } else if (out_type == FileTypes::CONSENSUSXML) { if ((in_type == FileTypes::FEATUREXML) || (in_type == FileTypes::TSV) || (in_type == FileTypes::PEPLIST) || (in_type == FileTypes::KROENIK)) { fm.applyMemberFunction(&UniqueIdInterface::setUniqueId); ConsensusMap::convert(0, fm, cm); } // nothing to do for consensus input else if (in_type == FileTypes::CONSENSUSXML || in_type == FileTypes::EDTA) { } else // experimental data { ConsensusMap::convert(0, exp, cm, exp.size()); } addDataProcessing_(cm, getProcessingInfo_(DataProcessing:: FORMAT_CONVERSION)); ConsensusXMLFile().store(out, cm); } else if (out_type == FileTypes::EDTA) { if (fm.size() > 0 && cm.size() > 0) { LOG_ERROR << "Internal error: cannot decide on container (Consensus or Feature)! This is a bug. Please report it!"; return INTERNAL_ERROR; } if (fm.size() > 0) EDTAFile().store(out, fm); else if (cm.size() > 0) EDTAFile().store(out, cm); } else { writeLog_("Unknown output file type given. Aborting!"); printUsage_(); return ILLEGAL_PARAMETERS; } return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList in_spec = getStringList_("in"); StringList out = getStringList_("out"); String in_lib = getStringOption_("lib"); String compare_function = getStringOption_("compare_function"); Int precursor_mass_multiplier = getIntOption_("round_precursor_to_integer"); float precursor_mass_tolerance = getDoubleOption_("precursor_mass_tolerance"); //Int min_precursor_charge = getIntOption_("min_precursor_charge"); //Int max_precursor_charge = getIntOption_("max_precursor_charge"); float remove_peaks_below_threshold = getDoubleOption_("filter:remove_peaks_below_threshold"); UInt min_peaks = getIntOption_("filter:min_peaks"); UInt max_peaks = getIntOption_("filter:max_peaks"); Int cut_peaks_below = getIntOption_("filter:cut_peaks_below"); StringList fixed_modifications = getStringList_("fixed_modifications"); StringList variable_modifications = getStringList_("variable_modifications"); Int top_hits = getIntOption_("top_hits"); if (top_hits < -1) { writeLog_("top_hits (should be >= -1 )"); return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // loading input //------------------------------------------------------------- if (out.size() != in_spec.size()) { writeLog_("out (should be as many as input files)"); return ILLEGAL_PARAMETERS; } time_t prog_time = time(NULL); MSPFile spectral_library; RichPeakMap query, library; //spectrum which will be identified MzMLFile spectra; spectra.setLogType(log_type_); time_t start_build_time = time(NULL); //------------------------------------------------------------- //building map for faster search //------------------------------------------------------------- //library containing already identified peptide spectra vector<PeptideIdentification> ids; spectral_library.load(in_lib, ids, library); map<Size, vector<PeakSpectrum> > MSLibrary; { RichPeakMap::iterator s; vector<PeptideIdentification>::iterator i; ModificationsDB* mdb = ModificationsDB::getInstance(); for (s = library.begin(), i = ids.begin(); s < library.end(); ++s, ++i) { double precursor_MZ = (*s).getPrecursors()[0].getMZ(); Size MZ_multi = (Size)precursor_MZ * precursor_mass_multiplier; map<Size, vector<PeakSpectrum> >::iterator found; found = MSLibrary.find(MZ_multi); PeakSpectrum librar; bool variable_modifications_ok = true; bool fixed_modifications_ok = true; const AASequence& aaseq = i->getHits()[0].getSequence(); //variable fixed modifications if (!fixed_modifications.empty()) { for (Size i = 0; i < aaseq.size(); ++i) { const Residue& mod = aaseq.getResidue(i); for (Size s = 0; s < fixed_modifications.size(); ++s) { if (mod.getOneLetterCode() == mdb->getModification(fixed_modifications[s]).getOrigin() && fixed_modifications[s] != mod.getModification()) { fixed_modifications_ok = false; break; } } } } //variable modifications if (aaseq.isModified() && (!variable_modifications.empty())) { for (Size i = 0; i < aaseq.size(); ++i) { if (aaseq.isModified(i)) { const Residue& mod = aaseq.getResidue(i); for (Size s = 0; s < variable_modifications.size(); ++s) { if (mod.getOneLetterCode() == mdb->getModification(variable_modifications[s]).getOrigin() && variable_modifications[s] != mod.getModification()) { variable_modifications_ok = false; break; } } } } } if (variable_modifications_ok && fixed_modifications_ok) { PeptideIdentification& translocate_pid = *i; librar.getPeptideIdentifications().push_back(translocate_pid); librar.setPrecursors(s->getPrecursors()); //library entry transformation for (UInt l = 0; l < s->size(); ++l) { Peak1D peak; if ((*s)[l].getIntensity() > remove_peaks_below_threshold) { const String& info = (*s)[l].getMetaValue("MSPPeakInfo"); if (info[0] == '?') { peak.setIntensity(sqrt(0.2 * (*s)[l].getIntensity())); } else { peak.setIntensity(sqrt((*s)[l].getIntensity())); } peak.setMZ((*s)[l].getMZ()); peak.setPosition((*s)[l].getPosition()); librar.push_back(peak); } } if (found != MSLibrary.end()) { found->second.push_back(librar); } else { vector<PeakSpectrum> tmp; tmp.push_back(librar); MSLibrary.insert(make_pair(MZ_multi, tmp)); } } } } time_t end_build_time = time(NULL); cout << "Time needed for preprocessing data: " << (end_build_time - start_build_time) << "\n"; //compare function PeakSpectrumCompareFunctor* comparor = Factory<PeakSpectrumCompareFunctor>::create(compare_function); //------------------------------------------------------------- // calculations //------------------------------------------------------------- double score; StringList::iterator in, out_file; for (in = in_spec.begin(), out_file = out.begin(); in < in_spec.end(); ++in, ++out_file) { time_t start_time = time(NULL); spectra.load(*in, query); //Will hold valuable hits vector<PeptideIdentification> peptide_ids; vector<ProteinIdentification> protein_ids; // Write parameters to ProteinIdentifcation ProteinIdentification prot_id; //Parameters of identificaion prot_id.setIdentifier("test"); prot_id.setSearchEngineVersion("SpecLibSearcher"); prot_id.setDateTime(DateTime::now()); prot_id.setScoreType(compare_function); ProteinIdentification::SearchParameters searchparam; searchparam.precursor_tolerance = precursor_mass_tolerance; prot_id.setSearchParameters(searchparam); /***********SEARCH**********/ for (UInt j = 0; j < query.size(); ++j) { //Set identifier for each identifications PeptideIdentification pid; pid.setIdentifier("test"); pid.setScoreType(compare_function); ProteinHit pr_hit; pr_hit.setAccession(j); prot_id.insertHit(pr_hit); //RichPeak1D to Peak1D transformation for the compare function query PeakSpectrum quer; bool peak_ok = true; query[j].sortByIntensity(true); double min_high_intensity = 0; if (query[j].empty() || query[j].getMSLevel() != 2) { continue; } if (query[j].getPrecursors().empty()) { writeLog_("Warning MS2 spectrum without precursor information"); continue; } min_high_intensity = (1 / cut_peaks_below) * query[j][0].getIntensity(); query[j].sortByPosition(); for (UInt k = 0; k < query[j].size() && k < max_peaks; ++k) { if (query[j][k].getIntensity() > remove_peaks_below_threshold && query[j][k].getIntensity() >= min_high_intensity) { Peak1D peak; peak.setIntensity(sqrt(query[j][k].getIntensity())); peak.setMZ(query[j][k].getMZ()); peak.setPosition(query[j][k].getPosition()); quer.push_back(peak); } } if (quer.size() >= min_peaks) { peak_ok = true; } else { peak_ok = false; } double query_MZ = query[j].getPrecursors()[0].getMZ(); if (peak_ok) { bool charge_one = false; Int percent = (Int) Math::round((query[j].size() / 100.0) * 3.0); Int margin = (Int) Math::round((query[j].size() / 100.0) * 1.0); for (vector<RichPeak1D>::iterator peak = query[j].end() - 1; percent >= 0; --peak, --percent) { if (peak->getMZ() < query_MZ) { break; } } if (percent > margin) { charge_one = true; } float min_MZ = (query_MZ - precursor_mass_tolerance) * precursor_mass_multiplier; float max_MZ = (query_MZ + precursor_mass_tolerance) * precursor_mass_multiplier; for (Size mz = (Size)min_MZ; mz <= ((Size)max_MZ) + 1; ++mz) { map<Size, vector<PeakSpectrum> >::iterator found; found = MSLibrary.find(mz); if (found != MSLibrary.end()) { vector<PeakSpectrum>& library = found->second; for (Size i = 0; i < library.size(); ++i) { float this_MZ = library[i].getPrecursors()[0].getMZ() * precursor_mass_multiplier; if (this_MZ >= min_MZ && max_MZ >= this_MZ && ((charge_one == true && library[i].getPeptideIdentifications()[0].getHits()[0].getCharge() == 1) || charge_one == false)) { PeptideHit hit = library[i].getPeptideIdentifications()[0].getHits()[0]; PeakSpectrum& librar = library[i]; //Special treatment for SpectraST score as it computes a score based on the whole library if (compare_function == "SpectraSTSimilarityScore") { SpectraSTSimilarityScore* sp = static_cast<SpectraSTSimilarityScore*>(comparor); BinnedSpectrum quer_bin = sp->transform(quer); BinnedSpectrum librar_bin = sp->transform(librar); score = (*sp)(quer, librar); //(*sp)(quer_bin,librar_bin); double dot_bias = sp->dot_bias(quer_bin, librar_bin, score); hit.setMetaValue("DOTBIAS", dot_bias); } else { score = (*comparor)(quer, librar); } DataValue RT(library[i].getRT()); DataValue MZ(library[i].getPrecursors()[0].getMZ()); hit.setMetaValue("RT", RT); hit.setMetaValue("MZ", MZ); hit.setScore(score); PeptideEvidence pe; pe.setProteinAccession(pr_hit.getAccession()); hit.addPeptideEvidence(pe); pid.insertHit(hit); } } } } } pid.setHigherScoreBetter(true); pid.sort(); if (compare_function == "SpectraSTSimilarityScore") { if (!pid.empty() && !pid.getHits().empty()) { vector<PeptideHit> final_hits; final_hits.resize(pid.getHits().size()); SpectraSTSimilarityScore* sp = static_cast<SpectraSTSimilarityScore*>(comparor); Size runner_up = 1; for (; runner_up < pid.getHits().size(); ++runner_up) { if (pid.getHits()[0].getSequence().toUnmodifiedString() != pid.getHits()[runner_up].getSequence().toUnmodifiedString() || runner_up > 5) { break; } } double delta_D = sp->delta_D(pid.getHits()[0].getScore(), pid.getHits()[runner_up].getScore()); for (Size s = 0; s < pid.getHits().size(); ++s) { final_hits[s] = pid.getHits()[s]; final_hits[s].setMetaValue("delta D", delta_D); final_hits[s].setMetaValue("dot product", pid.getHits()[s].getScore()); final_hits[s].setScore(sp->compute_F(pid.getHits()[s].getScore(), delta_D, pid.getHits()[s].getMetaValue("DOTBIAS"))); //final_hits[s].removeMetaValue("DOTBIAS"); } pid.setHits(final_hits); pid.sort(); pid.setMZ(query[j].getPrecursors()[0].getMZ()); pid.setRT(query_MZ); } } if (top_hits != -1 && (UInt)top_hits < pid.getHits().size()) { vector<PeptideHit> hits; hits.resize(top_hits); for (Size i = 0; i < (UInt)top_hits; ++i) { hits[i] = pid.getHits()[i]; } pid.setHits(hits); } peptide_ids.push_back(pid); } protein_ids.push_back(prot_id); //------------------------------------------------------------- // writing output //------------------------------------------------------------- IdXMLFile id_xml_file; id_xml_file.store(*out_file, protein_ids, peptide_ids); time_t end_time = time(NULL); cout << "Search time: " << difftime(end_time, start_time) << " seconds for " << *in << "\n"; } time_t end_time = time(NULL); cout << "Total time: " << difftime(end_time, prog_time) << " secconds\n"; return EXECUTION_OK; }
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- String in = getStringOption_("in"); String out = getStringOption_("out"); //------------------------------------------------------------- // loading input //------------------------------------------------------------- MzMLFile mzMLFile; mzMLFile.setLogType(log_type_); MSExperiment<Peak1D> input; mzMLFile.getOptions().addMSLevel(1); mzMLFile.load(in, input); if (input.empty()) { LOG_WARN << "The given file does not contain any conventional peak data, but might" " contain chromatograms. This tool currently cannot handle them, sorry."; return INCOMPATIBLE_INPUT_DATA; } //check if spectra are sorted for (Size i = 0; i < input.size(); ++i) { if (!input[i].isSorted()) { writeLog_("Error: Not all spectra are sorted according to peak m/z positions. Use FileFilter to sort the input!"); return INCOMPATIBLE_INPUT_DATA; } } //------------------------------------------------------------- // pick //------------------------------------------------------------- FeatureMap<> output; FeatureFinder ff; Param param = getParam_().copy("algorithm:", true); FFSH ffsh; ffsh.setParameters(param); ffsh.setData(input, output, ff); ffsh.run(); //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(output, getProcessingInfo_(DataProcessing::PEAK_PICKING)); addDataProcessing_(output, getProcessingInfo_(DataProcessing::QUANTITATION)); output.ensureUniqueId(); for (Size i = 0; i < output.size(); i++) { output[i].ensureUniqueId(); } FeatureXMLFile().store(out, output); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //file list StringList file_list = getStringList_("in"); //file type FileHandler fh; FileTypes::Type force_type; if (getStringOption_("in_type").size() > 0) { force_type = FileTypes::nameToType(getStringOption_("in_type")); } else { force_type = fh.getType(file_list[0]); } //output file names and types String out_file = getStringOption_("out"); //------------------------------------------------------------- // calculations //------------------------------------------------------------- bool annotate_file_origin = getFlag_("annotate_file_origin"); if (force_type == FileTypes::FEATUREXML) { FeatureMap<> out; for (Size i = 0; i < file_list.size(); ++i) { FeatureMap<> map; FeatureXMLFile fh; fh.load(file_list[i], map); if (annotate_file_origin) { for (FeatureMap<>::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); FeatureXMLFile f; f.store(out_file, out); } else if (force_type == FileTypes::CONSENSUSXML) { ConsensusMap out; ConsensusXMLFile fh; fh.load(file_list[0], out); //skip first file for (Size i = 1; i < file_list.size(); ++i) { ConsensusMap map; ConsensusXMLFile fh; fh.load(file_list[i], map); if (annotate_file_origin) { for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); ConsensusXMLFile f; f.store(out_file, out); } else if (force_type == FileTypes::TRAML) { TargetedExperiment out; for (Size i = 0; i < file_list.size(); ++i) { TargetedExperiment map; TraMLFile fh; fh.load(file_list[i], map); out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info Software software; software.setName("FileMerger"); software.setVersion(VersionInfo::getVersion()); out.addSoftware(software); TraMLFile f; f.store(out_file, out); } else { // we might want to combine different types, thus we only // query in_type (which applies to all files) // and not the suffix or content of a single file force_type = FileTypes::nameToType(getStringOption_("in_type")); //rt bool rt_auto_number = getFlag_("raw:rt_auto"); bool rt_filename = getFlag_("raw:rt_filename"); bool rt_custom = false; DoubleList custom_rts = getDoubleList_("raw:rt_custom"); if (custom_rts.size() != 0) { rt_custom = true; if (custom_rts.size() != file_list.size()) { writeLog_("Custom retention time list must have as many elements as there are input files!"); printUsage_(); return ILLEGAL_PARAMETERS; } } //ms level bool user_ms_level = getFlag_("raw:user_ms_level"); MSExperiment<> out; out.reserve(file_list.size()); UInt rt_auto = 0; UInt native_id = 0; std::vector<MSChromatogram<ChromatogramPeak> > all_chromatograms; for (Size i = 0; i < file_list.size(); ++i) { String filename = file_list[i]; //load file MSExperiment<> in; fh.loadExperiment(filename, in, force_type, log_type_); if (in.empty() && in.getChromatograms().empty()) { writeLog_(String("Warning: Empty file '") + filename + "'!"); continue; } out.reserve(out.size() + in.size()); //warn if custom RT and more than one scan in input file if (rt_custom && in.size() > 1) { writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!"); } for (MSExperiment<>::const_iterator it2 = in.begin(); it2 != in.end(); ++it2) { //handle rt Real rt_final = it2->getRT(); if (rt_auto_number) { rt_final = ++rt_auto; } else if (rt_custom) { rt_final = custom_rts[i]; } else if (rt_filename) { if (!filename.hasSubstring("rt")) { writeLog_(String("Warning: cannot guess retention time from filename as it does not contain 'rt'")); } for (Size i = 0; i < filename.size(); ++i) { if (filename[i] == 'r' && ++i != filename.size() && filename[i] == 't' && ++i != filename.size() && isdigit(filename[i])) { String rt; while (i != filename.size() && (filename[i] == '.' || isdigit(filename[i]))) { rt += filename[i++]; } if (rt.size() > 0) { // remove dot from rt3892.98.dta // ^ if (rt[rt.size() - 1] == '.') { // remove last character rt.erase(rt.end() - 1); } } try { float tmp = rt.toFloat(); rt_final = tmp; } catch (Exception::ConversionError) { writeLog_(String("Warning: cannot convert the found retention time in a value '" + rt + "'.")); } } } } // none of the rt methods were successful if (rt_final == -1) { writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'"); } out.addSpectrum(*it2); out.getSpectra().back().setRT(rt_final); out.getSpectra().back().setNativeID(native_id); if (user_ms_level) { out.getSpectra().back().setMSLevel((int)getIntOption_("raw:ms_level")); } ++native_id; } // if we had only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles) if (in.size() == 1) { out.getSpectra().back().setSourceFile(in.getSourceFiles()[0]); in.getSourceFiles().clear(); // delete source file annotated from source file (its in the spectrum anyways) } // copy experimental settings from first file if (i == 0) { out.ExperimentalSettings::operator=(in); } else // otherwise append { out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then } // also add the chromatograms for (std::vector<MSChromatogram<ChromatogramPeak> >::const_iterator it2 = in.getChromatograms().begin(); it2 != in.getChromatograms().end(); ++it2) { all_chromatograms.push_back(*it2); } } // set the chromatograms out.setChromatograms(all_chromatograms); //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); MzMLFile f; f.setLogType(log_type_); f.store(out_file, out); } return EXECUTION_OK; }
ExitCodes main_(int , const char**) { String out_meta = getStringOption_("out"); String out_cached = out_meta + ".cached"; bool convert_back = getFlag_("convert_back"); FileHandler fh; //input file type String in = getStringOption_("in"); String in_cached = in + ".cached"; FileTypes::Type in_type = FileTypes::nameToType(getStringOption_("in_type")); if (in_type == FileTypes::UNKNOWN) { in_type = fh.getType(in); writeDebug_(String("Input file type: ") + FileTypes::typeToName(in_type), 2); } if (in_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine input file type!"); return PARSE_ERROR; } //output file names and types String out = getStringOption_("out"); FileTypes::Type out_type = FileTypes::nameToType(getStringOption_("out_type")); if (out_type == FileTypes::UNKNOWN) { out_type = fh.getTypeByFileName(out); } if (out_type == FileTypes::UNKNOWN) { writeLog_("Error: Could not determine output file type!"); return PARSE_ERROR; } if (in_type == FileTypes::SQMASS && out_type == FileTypes::MZML) { MapType exp; SqMassFile sqfile; MzMLFile f; sqfile.load(in, exp); f.store(out, exp); return EXECUTION_OK; } else if (in_type == FileTypes::MZML && out_type == FileTypes::SQMASS) { MzMLFile f; SqMassFile sqfile; MapType exp; f.load(in, exp); sqfile.store(out, exp); return EXECUTION_OK; } if (!convert_back) { MapType exp; CachedmzML cacher; MzMLFile f; cacher.setLogType(log_type_); f.setLogType(log_type_); f.load(in,exp); cacher.writeMemdump(exp, out_cached); cacher.writeMetadata(exp, out_meta, true); } else { MzMLFile f; MapType meta_exp; CachedmzML cacher; MapType exp_reading; cacher.setLogType(log_type_); f.setLogType(log_type_); f.load(in,meta_exp); cacher.readMemdump(exp_reading, in_cached); std::cout << " read back, got " << exp_reading.size() << " spectra " << exp_reading.getChromatograms().size() << " chromats " << std::endl; { for (Size i=0; i<meta_exp.size(); ++i) { for (Size j = 0; j < meta_exp[i].getDataProcessing().size(); j++) { if (meta_exp[i].getDataProcessing()[j]->metaValueExists("cached_data")) { meta_exp[i].getDataProcessing()[j]->removeMetaValue("cached_data"); } } } for (Size i=0; i < meta_exp.getNrChromatograms(); ++i) { for (Size j = 0; j < meta_exp.getChromatogram(i).getDataProcessing().size(); j++) { if (meta_exp.getChromatogram(i).getDataProcessing()[j]->metaValueExists("cached_data")) { meta_exp.getChromatogram(i).getDataProcessing()[j]->removeMetaValue("cached_data"); } } } } if (meta_exp.size() != exp_reading.size()) { std::cerr << " Both experiments need to have the same size!"; } for (Size i=0; i<exp_reading.size(); ++i) { for (Size j = 0; j < exp_reading[i].size(); j++) { meta_exp[i].push_back(exp_reading[i][j]); } } std::vector<MSChromatogram<ChromatogramPeak> > chromatograms = exp_reading.getChromatograms(); std::vector<MSChromatogram<ChromatogramPeak> > old_chromatograms = meta_exp.getChromatograms(); for (Size i=0; i<chromatograms.size(); ++i) { for (Size j = 0; j < chromatograms[i].size(); j++) { old_chromatograms[i].push_back(chromatograms[i][j]); } } meta_exp.setChromatograms(old_chromatograms); f.store(out_meta,meta_exp); } return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- String in(getStringOption_("in")); String id(getStringOption_("id")); String out(getStringOption_("out")); double fragment_mass_tolerance(getDoubleOption_("fragment_mass_tolerance")); bool fragment_mass_unit_ppm = getStringOption_("fragment_mass_unit") == "Da" ? false : true; Size max_peptide_len = getIntOption_("max_peptide_length"); Size max_num_perm = getIntOption_("max_num_perm"); AScore ascore; //------------------------------------------------------------- // loading input //------------------------------------------------------------- vector<PeptideIdentification> pep_ids; vector<ProteinIdentification> prot_ids; vector<PeptideIdentification> pep_out; IdXMLFile().load(id, prot_ids, pep_ids); MSExperiment<> exp; MzMLFile f; f.setLogType(log_type_); PeakFileOptions options; options.clearMSLevels(); options.addMSLevel(2); f.getOptions() = options; f.load(in, exp); exp.sortSpectra(true); SpectrumLookup lookup; lookup.readSpectra(exp.getSpectra()); for (vector<PeptideIdentification>::iterator pep_id = pep_ids.begin(); pep_id != pep_ids.end(); ++pep_id) { Size scan_id = lookup.findByRT(pep_id->getRT()); PeakSpectrum& temp = exp.getSpectrum(scan_id); vector<PeptideHit> scored_peptides; for (vector<PeptideHit>::const_iterator hit = pep_id->getHits().begin(); hit < pep_id->getHits().end(); ++hit) { PeptideHit scored_hit = *hit; addScoreToMetaValues_(scored_hit, pep_id->getScoreType()); // backup score value LOG_DEBUG << "starting to compute AScore RT=" << pep_id->getRT() << " SEQUENCE: " << scored_hit.getSequence().toString() << std::endl; PeptideHit phospho_sites = ascore.compute(scored_hit, temp, fragment_mass_tolerance, fragment_mass_unit_ppm, max_peptide_len, max_num_perm); scored_peptides.push_back(phospho_sites); } PeptideIdentification new_pep_id(*pep_id); new_pep_id.setScoreType("PhosphoScore"); new_pep_id.setHigherScoreBetter(true); new_pep_id.setHits(scored_peptides); pep_out.push_back(new_pep_id); } //------------------------------------------------------------- // writing output //------------------------------------------------------------- IdXMLFile().store(out, prot_ids, pep_out); return EXECUTION_OK; }
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input/output files StringList in(getStringList_("in")); StringList id_in(getStringList_("id_in")); String trained_model_file(getStringOption_("trained_model_file")); String model_file(getStringOption_("model_file")); bool score_filtering(getFlag_("score_filtering")); double score_threshold(getDoubleOption_("score_threshold")); Int min_charge(getIntOption_("min_charge")); Int max_charge(getIntOption_("max_charge")); if (in.empty()) { writeLog_("For 'training' mode spectra and identifications are needed."); return INCOMPATIBLE_INPUT_DATA; } //bool duplicates_by_tic(getFlag_("duplicates_by_tic")); //bool base_model_from_file(getFlag_("base_model_from_file")); // create model, either read from a model file, or initialize with default parameters PILISModel model; if (model_file != "") { writeDebug_("Reading model from file '" + model_file + "'", 1); model.readFromFile(model_file); } else { writeDebug_("Initializing model", 1); model.setParameters(getParam_().copy("PILIS_parameters:", true)); model.init(); } Param pilis_param(model.getParameters()); ModificationDefinitionsSet mod_set(pilis_param.getValue("fixed_modifications"), pilis_param.getValue("variable_modifications")); // read spectra file (if available) vector<RichPeakMap> exp; vector<vector<ProteinIdentification> > prot_ids; vector<vector<PeptideIdentification> > pep_ids; if (!in.empty()) { FileTypes::Type in_file_type = FileHandler().getType(in[0]); writeDebug_("File type of parameter 'in' estimated as '" + FileTypes::typeToName(in_file_type) + "'", 1); // TODO check all types if (in_file_type == FileTypes::MSP) { writeDebug_("Reading MSP file", 1); MSPFile f; exp.resize(in.size()); pep_ids.resize(in.size()); for (Size i = 0; i != in.size(); ++i) { f.load(in[i], pep_ids[i], exp[i]); for (Size j = 0; j != exp[i].size(); ++j) { exp[i][j].getPeptideIdentifications().push_back(pep_ids[i][j]); } } } if (in_file_type == FileTypes::MZML) { MzMLFile f; f.setLogType(log_type_); exp.resize(in.size()); for (Size i = 0; i != in.size(); ++i) { f.load(in[i], exp[i]); } } } if (!id_in.empty()) { prot_ids.resize(id_in.size()); pep_ids.resize(id_in.size()); IdXMLFile f; for (Size i = 0; i != id_in.size(); ++i) { f.load(id_in[i], prot_ids[i], pep_ids[i]); } } if (!id_in.empty() && !in.empty()) { // map the if (id_in.size() != in.size()) { writeLog_("If in parameter contains mzML files and id_in contains idXML files, the number should be equal to allow mapping of the identification to the spectra"); return INCOMPATIBLE_INPUT_DATA; } // map the ids to the spectra IDMapper id_mapper; for (Size i = 0; i != exp.size(); ++i) { id_mapper.annotate(exp[i], pep_ids[i], prot_ids[i]); } } // get the peptides and spectra vector<PILISCrossValidation::Peptide> peptides; for (vector<RichPeakMap>::const_iterator it1 = exp.begin(); it1 != exp.end(); ++it1) { for (RichPeakMap::ConstIterator it2 = it1->begin(); it2 != it1->end(); ++it2) { if (it2->getPeptideIdentifications().empty()) { continue; } PeptideHit hit; if (it2->getPeptideIdentifications().begin()->getHits().size() > 0) { hit = *it2->getPeptideIdentifications().begin()->getHits().begin(); } else { continue; } // check whether the sequence contains a modification not modelled if (!mod_set.isCompatible(hit.getSequence()) || hit.getSequence().size() > (UInt)pilis_param.getValue("visible_model_depth")) { continue; } if (score_filtering && ((hit.getScore() < score_threshold && it2->getPeptideIdentifications().begin()->isHigherScoreBetter()) || (hit.getScore() > score_threshold && !it2->getPeptideIdentifications().begin()->isHigherScoreBetter()))) { continue; } PILISCrossValidation::Peptide pep_struct; pep_struct.sequence = hit.getSequence(); pep_struct.charge = hit.getCharge(); pep_struct.spec = *it2; pep_struct.hits = it2->getPeptideIdentifications().begin()->getHits(); // check charges if (pep_struct.charge < min_charge || pep_struct.charge > max_charge) { continue; } peptides.push_back(pep_struct); } } getUniquePeptides(peptides); writeDebug_("Number of (unique) peptides for training: " + String(peptides.size()), 1); //model.writeToFile("pilis_tmp.dat"); model.setParameters(pilis_param); for (vector<PILISCrossValidation::Peptide>::const_iterator it = peptides.begin(); it != peptides.end(); ++it) { model.train(it->spec, it->sequence, it->charge); } model.evaluate(); if (trained_model_file != "") { model.writeToFile(trained_model_file); } return EXECUTION_OK; }
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input/output files String in(getStringOption_("in")); String out(getStringOption_("out")); //------------------------------------------------------------- // loading input //------------------------------------------------------------- RichPeakMap exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); writeDebug_("Data set contains " + String(exp.size()) + " spectra", 1); //------------------------------------------------------------- // calculations //------------------------------------------------------------- writeDebug_("Reading model file", 2); // create model an set the given options PILISModel * model = new PILISModel(); model->readFromFile(getStringOption_("model_file")); Param model_param(model->getParameters()); model_param.setValue("upper_mz", getDoubleOption_("model:upper_mz")); model_param.setValue("lower_mz", getDoubleOption_("model:lower_mz")); model_param.setValue("charge_directed_threshold", getDoubleOption_("model:charge_directed_threshold")); model_param.setValue("charge_remote_threshold", getDoubleOption_("model:charge_remote_threshold")); //model_param.setValue("min_main_ion_intensity", getDoubleOption_("model:min_main_ion_intensity")); //model_param.setValue("min_loss_ion_intensity", getDoubleOption_("model:min_loss_ion_intensity")); model_param.setValue("min_y_ion_intensity", getDoubleOption_("model:min_y_ion_intensity")); model_param.setValue("min_b_ion_intensity", getDoubleOption_("model:min_b_ion_intensity")); model_param.setValue("min_a_ion_intensity", getDoubleOption_("model:min_a_ion_intensity")); model_param.setValue("min_y_loss_intensity", getDoubleOption_("model:min_y_loss_intensity")); model_param.setValue("min_b_loss_intensity", getDoubleOption_("model:min_b_loss_intensity")); model_param.setValue("charge_loss_factor", getDoubleOption_("model:charge_loss_factor")); model_param.setValue("visible_model_depth", getIntOption_("model:visible_model_depth")); model_param.setValue("model_depth", getIntOption_("model:model_depth")); model_param.setValue("fixed_modifications", getStringOption_("fixed_modifications")); model->setParameters(model_param); writeDebug_("Reading sequence db", 2); // create sequence db SuffixArrayPeptideFinder * sapf = new SuffixArrayPeptideFinder(getStringOption_("peptide_db_file"), "trypticCompressed"); sapf->setTolerance(getDoubleOption_("precursor_mass_tolerance")); sapf->setNumberOfModifications(0); sapf->setUseTags(false); //exp.resize(50); // TODO UInt max_charge(3), min_charge(1); // TODO vector<double> pre_weights; for (RichPeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { double pre_weight(it->getPrecursors()[0].getMZ()); for (Size z = min_charge; z <= max_charge; ++z) { pre_weights.push_back((pre_weight * (double)z) - (double)z); } } sort(pre_weights.begin(), pre_weights.end()); cerr << "Getting candidates from SA..."; vector<vector<pair<pair<String, String>, String> > > candidates; sapf->getCandidates(candidates, pre_weights); cerr << "done" << endl; delete sapf; map<double, vector<pair<pair<String, String>, String> > > sorted_candidates; UInt count(0); for (Size count = 0; count != candidates.size(); ++count) { sorted_candidates[pre_weights[count]] = candidates[count]; } candidates.clear(); // create ProteinIdentification and set the options PILISIdentification PILIS_id; PILIS_id.setModel(model); Param id_param(PILIS_id.getParameters()); id_param.setValue("precursor_mass_tolerance", getDoubleOption_("precursor_mass_tolerance")); id_param.setValue("max_candidates", getIntOption_("max_pre_candidates")); // disable evalue scoring, this is done separately to allow for a single id per spectrum id_param.setValue("use_evalue_scoring", 0); id_param.setValue("fixed_modifications", getStringOption_("fixed_modifications")); PILIS_id.setParameters(id_param); vector<PeptideIdentification> ids; // perform the ProteinIdentification of the given spectra UInt no(0); for (RichPeakMap::Iterator it = exp.begin(); it != exp.end(); ++it, ++no) { if (it->getMSLevel() == 0) { writeLog_("Warning: MSLevel is 0, assuming MSLevel 2"); it->setMSLevel(2); } if (it->getMSLevel() == 2) { writeDebug_(String(no) + "/" + String(exp.size()), 1); PeptideIdentification id; map<String, UInt> cand; for (UInt z = min_charge; z <= max_charge; ++z) { double pre_weight = (it->getPrecursors()[0].getMZ() * (double)z) - (double)z; for (vector<pair<pair<String, String>, String> >::const_iterator cit = sorted_candidates[pre_weight].begin(); cit != sorted_candidates[pre_weight].end(); ++cit) { String seq = cit->first.second; if (seq.size() > 39) { continue; } UInt num_cleavages_sites(0); for (Size k = 0; k != seq.size(); ++k) { if (k != seq.size() - 1) { if ((seq[k] == 'K' || seq[k] == 'R') && seq[k + 1] != 'P') { ++num_cleavages_sites; } } } if (num_cleavages_sites > 1) { continue; } cand[seq] = z; } } cerr << "#cand=" << cand.size() << endl; PILIS_id.getIdentification(cand, id, *it); id.setMetaValue("RT", it->getRT()); id.setMetaValue("MZ", it->getPrecursors()[0].getMZ()); ids.push_back(id); if (!id.getHits().empty()) { cerr << it->getPrecursors()[0].getMZ() << " " << AASequence(id.getHits().begin()->getSequence()).getAverageWeight() << endl; writeDebug_(id.getHits().begin()->getSequence().toString() + " (z=" + id.getHits().begin()->getCharge() + "), score=" + String(id.getHits().begin()->getScore()), 10); } } } // perform the PILIS scoring to the spectra if (!getFlag_("scoring:do_not_use_evalue_scoring")) { PILISScoring scoring; Param scoring_param(scoring.getParameters()); scoring_param.setValue("use_local_scoring", (int)getFlag_("scoring:use_local_scoring")); scoring_param.setValue("survival_function_bin_size", getIntOption_("scoring:survival_function_bin_size")); scoring_param.setValue("global_linear_fitting_threshold", getDoubleOption_("scoring:global_linear_fitting_threshold")); scoring_param.setValue("local_linear_fitting_threshold", getDoubleOption_("scoring:local_linear_fitting_threshold")); scoring.setParameters(scoring_param); scoring.getScores(ids); } // write the result to the IdentificationData structure for the storing UInt max_candidates = getIntOption_("max_candidates"); for (Size i = 0; i != ids.size(); ++i) { if (ids[i].getHits().size() > max_candidates) { vector<PeptideHit> hits = ids[i].getHits(); hits.resize(max_candidates); ids[i].setHits(hits); } } delete model; //------------------------------------------------------------- // writing output //------------------------------------------------------------- DateTime now; now.now(); String date_string; //now.get(date_string); // @todo Fix it (Andreas) String identifier("PILIS_" + date_string); //UInt count(0); count = 0; for (RichPeakMap::ConstIterator it = exp.begin(); it != exp.end(); ++it) { if (it->getMSLevel() == 2) { ids[count].setMetaValue("RT", it->getRT()); ids[count].setMetaValue("MZ", it->getPrecursors()[0].getMZ()); ids[count].setIdentifier(identifier); ids[count++].setHigherScoreBetter(false); } } // search parameters ProteinIdentification::SearchParameters search_parameters; search_parameters.db = getStringOption_("peptide_db_file"); search_parameters.db_version = ""; search_parameters.taxonomy = ""; //search_parameters.charges = getStringOption_("charges"); search_parameters.mass_type = ProteinIdentification::MONOISOTOPIC; vector<String> fixed_mods; getStringOption_("fixed_modifications").split(',', fixed_mods); search_parameters.fixed_modifications = fixed_mods; search_parameters.enzyme = ProteinIdentification::TRYPSIN; search_parameters.missed_cleavages = 1; search_parameters.peak_mass_tolerance = getDoubleOption_("peak_mass_tolerance"); search_parameters.precursor_tolerance = getDoubleOption_("precursor_mass_tolerance"); ProteinIdentification protein_identification; protein_identification.setDateTime(now); protein_identification.setSearchEngine("PILIS"); protein_identification.setSearchEngineVersion("beta"); protein_identification.setSearchParameters(search_parameters); protein_identification.setIdentifier(identifier); vector<ProteinIdentification> protein_identifications; protein_identifications.push_back(protein_identification); IdXMLFile().store(out, protein_identifications, ids); return EXECUTION_OK; }
ExitCodes main_(int, const char **) { StringList file_list = getStringList_("in"); String tr_file_str = getStringOption_("tr"); String out = getStringOption_("out"); bool is_swath = getFlag_("is_swath"); bool ppm = getFlag_("ppm"); bool extract_MS1 = getFlag_("extract_MS1"); double min_upper_edge_dist = getDoubleOption_("min_upper_edge_dist"); double mz_extraction_window = getDoubleOption_("mz_window"); double rt_extraction_window = getDoubleOption_("rt_window"); String extraction_function = getStringOption_("extraction_function"); // If we have a transformation file, trafo will transform the RT in the // scoring according to the model. If we dont have one, it will apply the // null transformation. String trafo_in = getStringOption_("rt_norm"); TransformationDescription trafo; if (trafo_in.size() > 0) { TransformationXMLFile trafoxml; String model_type = getStringOption_("model:type"); Param model_params = getParam_().copy("model:", true); trafoxml.load(trafo_in, trafo); trafo.fitModel(model_type, model_params); } TransformationDescription trafo_inverse = trafo; trafo_inverse.invert(); const char * tr_file = tr_file_str.c_str(); MapType out_exp; std::vector< OpenMS::MSChromatogram > chromatograms; TraMLFile traml; OpenMS::TargetedExperiment targeted_exp; std::cout << "Loading TraML file" << std::endl; traml.load(tr_file, targeted_exp); std::cout << "Loaded TraML file" << std::endl; // Do parallelization over the different input files // Only in OpenMP 3.0 are unsigned loop variables allowed #ifdef _OPENMP #pragma omp parallel for #endif for (SignedSize i = 0; i < boost::numeric_cast<SignedSize>(file_list.size()); ++i) { boost::shared_ptr<PeakMap > exp(new PeakMap); MzMLFile f; // Logging and output to the console // IF_MASTERTHREAD f.setLogType(log_type_); // Find the transitions to extract and extract them MapType tmp_out; OpenMS::TargetedExperiment transition_exp_used; f.load(file_list[i], *exp); if (exp->empty() ) { continue; } // if empty, go on OpenSwath::SpectrumAccessPtr expptr = SimpleOpenMSSpectraFactory::getSpectrumAccessOpenMSPtr(exp); bool do_continue = true; if (is_swath) { do_continue = OpenSwathHelper::checkSwathMapAndSelectTransitions(*exp, targeted_exp, transition_exp_used, min_upper_edge_dist); } else { transition_exp_used = targeted_exp; } #ifdef _OPENMP #pragma omp critical (OpenSwathChromatogramExtractor_metadata) #endif // after loading the first file, copy the meta data from that experiment // this may happen *after* chromatograms were already added to the // output, thus we do NOT fill the experiment here but rather store all // the chromatograms in the "chromatograms" array and store them in // out_exp afterwards. if (i == 0) { out_exp = *exp; out_exp.clear(false); } std::cout << "Extracting " << transition_exp_used.getTransitions().size() << " transitions" << std::endl; std::vector< OpenSwath::ChromatogramPtr > chromatogram_ptrs; std::vector< ChromatogramExtractor::ExtractionCoordinates > coordinates; // continue if the map is not empty if (do_continue) { // Prepare the coordinates (with or without rt extraction) and then extract the chromatograms ChromatogramExtractor extractor; if (rt_extraction_window < 0) { extractor.prepare_coordinates(chromatogram_ptrs, coordinates, transition_exp_used, rt_extraction_window, extract_MS1); } else { // Use an rt extraction window of 0.0 which will just write the retention time in start / end positions extractor.prepare_coordinates(chromatogram_ptrs, coordinates, transition_exp_used, 0.0, extract_MS1); for (std::vector< ChromatogramExtractor::ExtractionCoordinates >::iterator it = coordinates.begin(); it != coordinates.end(); ++it) { it->rt_start = trafo_inverse.apply(it->rt_start) - rt_extraction_window / 2.0; it->rt_end = trafo_inverse.apply(it->rt_end) + rt_extraction_window / 2.0; } } extractor.extractChromatograms(expptr, chromatogram_ptrs, coordinates, mz_extraction_window, ppm, extraction_function); #ifdef _OPENMP #pragma omp critical (OpenSwathChromatogramExtractor_insertMS1) #endif { // Remove potential meta value indicating cached data SpectrumSettings exp_settings = (*exp)[0]; for (Size j = 0; j < exp_settings.getDataProcessing().size(); j++) { if (exp_settings.getDataProcessing()[j]->metaValueExists("cached_data")) { exp_settings.getDataProcessing()[j]->removeMetaValue("cached_data"); } } extractor.return_chromatogram(chromatogram_ptrs, coordinates, transition_exp_used, exp_settings, chromatograms, extract_MS1); } } // end of do_continue } // end of loop over all files / end of OpenMP // TODO check that no chromatogram IDs occur multiple times ! // store the output out_exp.setChromatograms(chromatograms); MzMLFile mzf; mzf.setLogType(log_type_); addDataProcessing_(out_exp, getProcessingInfo_(DataProcessing::SMOOTHING)); mzf.store(out, out_exp); return EXECUTION_OK; }
ExitCodes main_(int , const char**) { String in = getStringOption_("in"); String out_meta = getStringOption_("out"); String in_cached = in + ".cached"; String out_cached = out_meta + ".cached"; bool convert_back = getFlag_("convert_back"); if (!convert_back) { MapType exp; CachedmzML cacher; MzMLFile f; cacher.setLogType(log_type_); f.setLogType(log_type_); f.load(in,exp); cacher.writeMemdump(exp, out_cached); DataProcessing dp; std::set<DataProcessing::ProcessingAction> actions; actions.insert(DataProcessing::FORMAT_CONVERSION); dp.setProcessingActions(actions); dp.setMetaValue("cached_data", "true"); for (Size i=0; i<exp.size(); ++i) { exp[i].getDataProcessing().push_back(dp); } std::vector<MSChromatogram<ChromatogramPeak> > chromatograms = exp.getChromatograms(); for (Size i=0; i<chromatograms.size(); ++i) { chromatograms[i].getDataProcessing().push_back(dp); } exp.setChromatograms(chromatograms); cacher.writeMetadata(exp, out_meta); } else { MzMLFile f; MapType meta_exp; CachedmzML cacher; MapType exp_reading; cacher.setLogType(log_type_); f.setLogType(log_type_); f.load(in,meta_exp); cacher.readMemdump(exp_reading, in_cached); std::cout << " read back, got " << exp_reading.size() << " spectra " << exp_reading.getChromatograms().size() << " chromats " << std::endl; { for (Size i=0; i<meta_exp.size(); ++i) { for (Size j = 0; j < meta_exp[i].getDataProcessing().size(); j++) { DataProcessing& dp = meta_exp[i].getDataProcessing()[j]; if (dp.metaValueExists("cached_data")) { dp.removeMetaValue("cached_data"); } } } std::vector<MSChromatogram<ChromatogramPeak> > chromatograms = meta_exp.getChromatograms(); for (Size i=0; i<chromatograms.size(); ++i) { for (Size j = 0; j < chromatograms[i].getDataProcessing().size(); j++) { DataProcessing& dp = chromatograms[i].getDataProcessing()[j]; if (dp.metaValueExists("cached_data")) { dp.removeMetaValue("cached_data"); } } } } if (meta_exp.size() != exp_reading.size()) { std::cerr << " Both experiments need to have the same size!"; } for (Size i=0; i<exp_reading.size(); ++i) { for (Size j = 0; j < exp_reading[i].size(); j++) { meta_exp[i].push_back(exp_reading[i][j]); } } std::vector<MSChromatogram<ChromatogramPeak> > chromatograms = exp_reading.getChromatograms(); std::vector<MSChromatogram<ChromatogramPeak> > old_chromatograms = meta_exp.getChromatograms(); for (Size i=0; i<chromatograms.size(); ++i) { for (Size j = 0; j < chromatograms[i].size(); j++) { old_chromatograms[i].push_back(chromatograms[i][j]); } } meta_exp.setChromatograms(old_chromatograms); f.store(out_meta,meta_exp); } return EXECUTION_OK; }
ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String in(getStringOption_("in")); String out(getStringOption_("out")); Size num_spots_per_row(getIntOption_("num_spots_per_row")); double RT_distance(getDoubleOption_("RT_distance")); //------------------------------------------------------------- // reading input //------------------------------------------------------------- PeakMap exp; MzMLFile f; f.setLogType(log_type_); f.load(in, exp); //------------------------------------------------------------- // calculations //------------------------------------------------------------- ProgressLogger pl; pl.setLogType(log_type_); pl.startProgress(0, exp.size(), "Assigning pseudo RTs."); Size num_ms1(0), num_ms1_base(0), row_counter(0); bool row_to_reverse(false); double actual_RT(0); for (Size i = 0; i != exp.size(); ++i) { pl.setProgress(i); if (row_to_reverse) { actual_RT = (double)(num_ms1_base + (num_spots_per_row - row_counter)) * RT_distance; writeDebug_("RT=" + String(actual_RT) + " (modified, row_counter=" + String(row_counter) + ")", 1); } else { actual_RT = (double)num_ms1 * RT_distance; writeDebug_("RT=" + String(actual_RT), 1); } exp[i].setRT(actual_RT); if (exp[i].getMSLevel() == 1) { if (++row_counter >= num_spots_per_row) { row_counter = 0; if (row_to_reverse) { row_to_reverse = false; } else { row_to_reverse = true; } } ++num_ms1; if (!row_to_reverse) { num_ms1_base = num_ms1; } } } pl.endProgress(); // sort the spectra according to their new RT exp.sortSpectra(); //------------------------------------------------------------- // writing output //------------------------------------------------------------- f.store(out, exp); return EXECUTION_OK; }
ExitCodes main_(int, const char **) { //input and output file names .. String in = getStringOption_("in"); String out = getStringOption_("out"); //prevent loading of fragment spectra PeakFileOptions options; options.setMSLevels(vector<Int>(1, 1)); //reading input data MzMLFile f; f.getOptions() = options; f.setLogType(log_type_); PeakMap exp; f.load(in, exp); exp.updateRanges(); //no seeds supported FeatureMap<> seeds; //setup of FeatureFinder FeatureFinder ff; ff.setLogType(log_type_); // A map for the resulting features FeatureMap<> features; // get parameters specific for the feature finder Param feafi_param = getParam_().copy("algorithm:", true); writeDebug_("Parameters passed to FeatureFinder", feafi_param, 3); // Apply the feature finder ff.run(FeatureFinderAlgorithmIsotopeWavelet<Peak1D, Feature>::getProductName(), exp, features, feafi_param, seeds); features.applyMemberFunction(&UniqueIdInterface::setUniqueId); // DEBUG if (debug_level_ > 10) { FeatureMap<>::Iterator it; for (it = features.begin(); it != features.end(); ++it) { if (!it->isMetaEmpty()) { vector<String> keys; it->getKeys(keys); LOG_INFO << "Feature " << it->getUniqueId() << endl; for (Size i = 0; i < keys.size(); i++) { LOG_INFO << " " << keys[i] << " = " << it->getMetaValue(keys[i]) << endl; } } } } //------------------------------------------------------------- // writing files //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(features, getProcessingInfo_(DataProcessing::QUANTITATION)); // write features to user specified output file FeatureXMLFile map_file; map_file.store(out, features); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- // file list StringList file_list = getStringList_("in"); // file type FileHandler file_handler; FileTypes::Type force_type; if (getStringOption_("in_type").size() > 0) { force_type = FileTypes::nameToType(getStringOption_("in_type")); } else { force_type = file_handler.getType(file_list[0]); } // output file names and types String out_file = getStringOption_("out"); bool annotate_file_origin = getFlag_("annotate_file_origin"); rt_gap_ = getDoubleOption_("rt_concat:gap"); vector<String> trafo_out = getStringList_("rt_concat:trafo_out"); if (trafo_out.empty()) { // resize now so we don't have to worry about indexing out of bounds: trafo_out.resize(file_list.size()); } else if (trafo_out.size() != file_list.size()) { writeLog_("Error: Number of transformation output files must equal the number of input files (parameters 'rt_concat:trafo_out'/'in')!"); return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // calculations //------------------------------------------------------------- if (force_type == FileTypes::FEATUREXML) { FeatureMap out; FeatureXMLFile fh; for (Size i = 0; i < file_list.size(); ++i) { FeatureMap map; fh.load(file_list[i], map); if (annotate_file_origin) { for (FeatureMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(map, trafo_out[i], i == 0); } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); fh.store(out_file, out); } else if (force_type == FileTypes::CONSENSUSXML) { ConsensusMap out; ConsensusXMLFile fh; fh.load(file_list[0], out); // skip first file for (Size i = 1; i < file_list.size(); ++i) { ConsensusMap map; fh.load(file_list[i], map); if (annotate_file_origin) { for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(map, trafo_out[i], i == 0); } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); fh.store(out_file, out); } else if (force_type == FileTypes::TRAML) { TargetedExperiment out; TraMLFile fh; for (Size i = 0; i < file_list.size(); ++i) { TargetedExperiment map; fh.load(file_list[i], map); out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info Software software; software.setName("FileMerger"); software.setVersion(VersionInfo::getVersion()); out.addSoftware(software); fh.store(out_file, out); } else // raw data input (e.g. mzML) { // RT bool rt_auto_number = getFlag_("raw:rt_auto"); bool rt_filename = getFlag_("raw:rt_filename"); bool rt_custom = false; DoubleList custom_rts = getDoubleList_("raw:rt_custom"); if (!custom_rts.empty()) { rt_custom = true; if (custom_rts.size() != file_list.size()) { writeLog_("Custom retention time list (parameter 'raw:rt_custom') must have as many elements as there are input files (parameter 'in')!"); return ILLEGAL_PARAMETERS; } } // MS level Int ms_level = getIntOption_("raw:ms_level"); MSExperiment<> out; UInt rt_auto = 0; UInt native_id = 0; for (Size i = 0; i < file_list.size(); ++i) { String filename = file_list[i]; // load file force_type = file_handler.getType(file_list[i]); MSExperiment<> in; file_handler.loadExperiment(filename, in, force_type, log_type_); if (in.empty() && in.getChromatograms().empty()) { writeLog_(String("Warning: Empty file '") + filename + "'!"); continue; } out.reserve(out.size() + in.size()); // warn if custom RT and more than one scan in input file if (rt_custom && in.size() > 1) { writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!"); } // handle special raw data options: for (MSExperiment<>::iterator spec_it = in.begin(); spec_it != in.end(); ++spec_it) { float rt_final = spec_it->getRT(); if (rt_auto_number) { rt_final = ++rt_auto; } else if (rt_custom) { rt_final = custom_rts[i]; } else if (rt_filename) { static const boost::regex re("rt(\\d+(\\.\\d+)?)"); boost::smatch match; bool found = boost::regex_search(filename, match, re); if (found) { rt_final = String(match[1]).toFloat(); } else { writeLog_("Warning: could not extract retention time from filename '" + filename + "'"); } } // none of the rt methods were successful if (rt_final < 0) { writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'"); } spec_it->setRT(rt_final); spec_it->setNativeID("spectrum=" + String(native_id)); if (ms_level > 0) { spec_it->setMSLevel(ms_level); } ++native_id; } // if we have only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles) if (in.size() == 1) { in[0].setSourceFile(in.getSourceFiles()[0]); in.getSourceFiles().clear(); // delete source file annotated from source file (it's in the spectrum anyways) } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(in, trafo_out[i], i == 0); } // add spectra to output for (MSExperiment<>::const_iterator spec_it = in.begin(); spec_it != in.end(); ++spec_it) { out.addSpectrum(*spec_it); } // also add the chromatograms for (vector<MSChromatogram<ChromatogramPeak> >::const_iterator chrom_it = in.getChromatograms().begin(); chrom_it != in.getChromatograms().end(); ++chrom_it) { out.addChromatogram(*chrom_it); } // copy experimental settings from first file if (i == 0) { out.ExperimentalSettings::operator=(in); } else // otherwise append { out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then } } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); MzMLFile f; f.setLogType(log_type_); f.store(out_file, out); } return EXECUTION_OK; }
ExitCodes main_(int, const char **) override { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //input/output files String in(getStringOption_("in")); String out(getStringOption_("out")); //------------------------------------------------------------- // loading input //------------------------------------------------------------- PeakMap exp; MzMLFile f; f.setLogType(log_type_); PeakFileOptions options; options.clearMSLevels(); options.addMSLevel(2); f.getOptions() = options; f.load(in, exp); writeDebug_("Data set contains " + String(exp.size()) + " spectra", 1); //------------------------------------------------------------- // calculations //------------------------------------------------------------- vector<PeptideIdentification> pep_ids; CompNovoIdentificationCID comp_novo_id; // set the options Param algorithm_param = getParam_().copy("algorithm:", true); comp_novo_id.setParameters(algorithm_param); comp_novo_id.getIdentifications(pep_ids, exp); algorithm_param = comp_novo_id.getParameters(); //------------------------------------------------------------- // writing output //------------------------------------------------------------- DateTime now = DateTime::now(); String date_string = now.get(); String identifier("CompNovoCID_" + date_string); for (vector<PeptideIdentification>::iterator it = pep_ids.begin(); it != pep_ids.end(); ++it) { it->assignRanks(); it->setIdentifier(identifier); } vector<ProteinIdentification> prot_ids; ProteinIdentification prot_id; prot_id.setIdentifier(identifier); prot_id.setDateTime(now); StringList ms_runs; exp.getPrimaryMSRunPath(ms_runs); prot_id.setPrimaryMSRunPath(ms_runs); ProteinIdentification::SearchParameters search_parameters; search_parameters.charges = "+2-+3"; if (algorithm_param.getValue("tryptic_only").toBool()) { search_parameters.digestion_enzyme = *(ProteaseDB::getInstance()->getEnzyme("Trypsin")); } else { search_parameters.digestion_enzyme = *(ProteaseDB::getInstance()->getEnzyme("no cleavage")); } search_parameters.mass_type = ProteinIdentification::MONOISOTOPIC; search_parameters.fixed_modifications = algorithm_param.getValue("fixed_modifications"); search_parameters.variable_modifications = algorithm_param.getValue("variable_modifications"); search_parameters.missed_cleavages = (UInt)algorithm_param.getValue("missed_cleavages"); search_parameters.fragment_mass_tolerance = (double)algorithm_param.getValue("fragment_mass_tolerance"); search_parameters.precursor_mass_tolerance = (double)algorithm_param.getValue("precursor_mass_tolerance"); search_parameters.fragment_mass_tolerance_ppm = false; search_parameters.precursor_mass_tolerance_ppm = false; prot_id.setSearchParameters(search_parameters); prot_id.setSearchEngineVersion("0.9beta"); prot_id.setSearchEngine("CompNovo"); prot_ids.push_back(prot_id); IdXMLFile().store(out, prot_ids, pep_ids); return EXECUTION_OK; }
ExitCodes main_(int, const char **) override { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- String in = getStringOption_("in"); String out = getStringOption_("out"); //------------------------------------------------------------- // loading input //------------------------------------------------------------- PeakMap exp; MzMLFile f; f.load(in, exp); //------------------------------------------------------------- // calculations //------------------------------------------------------------- //determine maximum peak exp.updateRanges(); double max = exp.getMaxInt() / 100.0; for (PeakMap::Iterator it = exp.begin(); it != exp.end(); ++it) { if (it->getMSLevel() < 2) { for (PeakMap::SpectrumType::Iterator it2 = it->begin(); it2 != it->end(); ++it2) { it2->setIntensity(it2->getIntensity() / max); } } } /// @todo add chromatogram support for normalization, e.g. for MRM stuff (Andreas) /* vector<MSChromatogram > chroms = exp.getChromatograms(); double sum(0); for (vector<MSChromatogram >::iterator it = chroms.begin(); it != chroms.end(); ++it) { for (MSChromatogram::Iterator it2 = it->begin(); it2 != it->end(); ++it2) { sum += it2->getIntensity(); } } for (vector<MSChromatogram >::iterator it = chroms.begin(); it != chroms.end(); ++it) { for (MSChromatogram::Iterator it2 = it->begin(); it2 != it->end(); ++it2) { it2->setIntensity(it2->getIntensity() / sum * 1000000.0); } } exp.setChromatograms(chroms); */ //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(exp, getProcessingInfo_(DataProcessing::NORMALIZATION)); f.store(out, exp); return EXECUTION_OK; }