void QuantitativeExperimentalDesign::mergeConsensusMaps_(ConsensusMap & out, const String & experiment, StringList & file_paths) { ConsensusMap map; LOG_INFO << "Merge consensus maps: " << endl; UInt counter = 1; for (StringList::Iterator file_it = file_paths.begin(); file_it != file_paths.end(); ++file_it, ++counter) { //load should clear the map ConsensusXMLFile().load(*file_it, map); for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("experiment", DataValue(experiment)); } out += map; } LOG_INFO << endl; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //file list StringList file_list = getStringList_("in"); //file type FileHandler fh; FileTypes::Type force_type; if (getStringOption_("in_type").size() > 0) { force_type = FileTypes::nameToType(getStringOption_("in_type")); } else { force_type = fh.getType(file_list[0]); } //output file names and types String out_file = getStringOption_("out"); //------------------------------------------------------------- // calculations //------------------------------------------------------------- bool annotate_file_origin = getFlag_("annotate_file_origin"); if (force_type == FileTypes::FEATUREXML) { FeatureMap<> out; for (Size i = 0; i < file_list.size(); ++i) { FeatureMap<> map; FeatureXMLFile fh; fh.load(file_list[i], map); if (annotate_file_origin) { for (FeatureMap<>::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); FeatureXMLFile f; f.store(out_file, out); } else if (force_type == FileTypes::CONSENSUSXML) { ConsensusMap out; ConsensusXMLFile fh; fh.load(file_list[0], out); //skip first file for (Size i = 1; i < file_list.size(); ++i) { ConsensusMap map; ConsensusXMLFile fh; fh.load(file_list[i], map); if (annotate_file_origin) { for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); ConsensusXMLFile f; f.store(out_file, out); } else if (force_type == FileTypes::TRAML) { TargetedExperiment out; for (Size i = 0; i < file_list.size(); ++i) { TargetedExperiment map; TraMLFile fh; fh.load(file_list[i], map); out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info Software software; software.setName("FileMerger"); software.setVersion(VersionInfo::getVersion()); out.addSoftware(software); TraMLFile f; f.store(out_file, out); } else { // we might want to combine different types, thus we only // query in_type (which applies to all files) // and not the suffix or content of a single file force_type = FileTypes::nameToType(getStringOption_("in_type")); //rt bool rt_auto_number = getFlag_("raw:rt_auto"); bool rt_filename = getFlag_("raw:rt_filename"); bool rt_custom = false; DoubleList custom_rts = getDoubleList_("raw:rt_custom"); if (custom_rts.size() != 0) { rt_custom = true; if (custom_rts.size() != file_list.size()) { writeLog_("Custom retention time list must have as many elements as there are input files!"); printUsage_(); return ILLEGAL_PARAMETERS; } } //ms level bool user_ms_level = getFlag_("raw:user_ms_level"); MSExperiment<> out; out.reserve(file_list.size()); UInt rt_auto = 0; UInt native_id = 0; std::vector<MSChromatogram<ChromatogramPeak> > all_chromatograms; for (Size i = 0; i < file_list.size(); ++i) { String filename = file_list[i]; //load file MSExperiment<> in; fh.loadExperiment(filename, in, force_type, log_type_); if (in.empty() && in.getChromatograms().empty()) { writeLog_(String("Warning: Empty file '") + filename + "'!"); continue; } out.reserve(out.size() + in.size()); //warn if custom RT and more than one scan in input file if (rt_custom && in.size() > 1) { writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!"); } for (MSExperiment<>::const_iterator it2 = in.begin(); it2 != in.end(); ++it2) { //handle rt Real rt_final = it2->getRT(); if (rt_auto_number) { rt_final = ++rt_auto; } else if (rt_custom) { rt_final = custom_rts[i]; } else if (rt_filename) { if (!filename.hasSubstring("rt")) { writeLog_(String("Warning: cannot guess retention time from filename as it does not contain 'rt'")); } for (Size i = 0; i < filename.size(); ++i) { if (filename[i] == 'r' && ++i != filename.size() && filename[i] == 't' && ++i != filename.size() && isdigit(filename[i])) { String rt; while (i != filename.size() && (filename[i] == '.' || isdigit(filename[i]))) { rt += filename[i++]; } if (rt.size() > 0) { // remove dot from rt3892.98.dta // ^ if (rt[rt.size() - 1] == '.') { // remove last character rt.erase(rt.end() - 1); } } try { float tmp = rt.toFloat(); rt_final = tmp; } catch (Exception::ConversionError) { writeLog_(String("Warning: cannot convert the found retention time in a value '" + rt + "'.")); } } } } // none of the rt methods were successful if (rt_final == -1) { writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'"); } out.addSpectrum(*it2); out.getSpectra().back().setRT(rt_final); out.getSpectra().back().setNativeID(native_id); if (user_ms_level) { out.getSpectra().back().setMSLevel((int)getIntOption_("raw:ms_level")); } ++native_id; } // if we had only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles) if (in.size() == 1) { out.getSpectra().back().setSourceFile(in.getSourceFiles()[0]); in.getSourceFiles().clear(); // delete source file annotated from source file (its in the spectrum anyways) } // copy experimental settings from first file if (i == 0) { out.ExperimentalSettings::operator=(in); } else // otherwise append { out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then } // also add the chromatograms for (std::vector<MSChromatogram<ChromatogramPeak> >::const_iterator it2 = in.getChromatograms().begin(); it2 != in.getChromatograms().end(); ++it2) { all_chromatograms.push_back(*it2); } } // set the chromatograms out.setChromatograms(all_chromatograms); //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); MzMLFile f; f.setLogType(log_type_); f.store(out_file, out); } return EXECUTION_OK; }
ExitCodes main_(int, const char **) { // data to be passed through the algorithm vector<vector<SILACPattern> > data; MSQuantifications msq; vector<Clustering *> cluster_data; // // Parameter handling // map<String, DoubleReal> label_identifiers; // list defining the mass shifts of each label (e.g. "Arg6" => 6.0201290268) handleParameters_sample(); handleParameters_algorithm(); handleParameters_labels(label_identifiers); handleParameters(); if (selected_labels.empty() && !out.empty()) // incompatible parameters { writeLog_("Error: The 'out' parameter cannot be used without a label (parameter 'sample:labels'). Use 'out_features' instead."); return ILLEGAL_PARAMETERS; } // // Initializing the SILACAnalzer with our parameters // SILACAnalyzer analyzer; analyzer.setLogType(log_type_); analyzer.initialize( // section "sample" selected_labels, charge_min, charge_max, missed_cleavages, isotopes_per_peptide_min, isotopes_per_peptide_max, // section "algorithm" rt_threshold, rt_min, intensity_cutoff, intensity_correlation, model_deviation, allow_missing_peaks, // labels label_identifiers); //-------------------------------------------------- // loading input from .mzML //-------------------------------------------------- MzMLFile file; MSExperiment<Peak1D> exp; // only read MS1 spectra ... /* std::vector<int> levels; levels.push_back(1); file.getOptions().setMSLevels(levels); */ LOG_DEBUG << "Loading input..." << endl; file.setLogType(log_type_); file.load(in, exp); // set size of input map exp.updateRanges(); // extract level 1 spectra exp.getSpectra().erase(remove_if(exp.begin(), exp.end(), InMSLevelRange<MSExperiment<Peak1D>::SpectrumType>(IntList::create("1"), true)), exp.end()); // sort according to RT and MZ exp.sortSpectra(); if (out_mzq != "") { vector<vector<String> > SILAClabels = analyzer.getSILAClabels(); // list of SILAC labels, e.g. selected_labels="[Lys4,Arg6][Lys8,Arg10]" => SILAClabels[0][1]="Arg6" std::vector<std::vector<std::pair<String, DoubleReal> > > labels; //add none label labels.push_back(std::vector<std::pair<String, DoubleReal> >(1, std::make_pair<String, DoubleReal>(String("none"), DoubleReal(0)))); for (Size i = 0; i < SILAClabels.size(); ++i) //SILACLabels MUST be in weight order!!! { std::vector<std::pair<String, DoubleReal> > one_label; for (UInt j = 0; j < SILAClabels[i].size(); ++j) { one_label.push_back(*(label_identifiers.find(SILAClabels[i][j]))); // this dereferencing would break if all SILAClabels would not have been checked before! } labels.push_back(one_label); } msq.registerExperiment(exp, labels); //add assays msq.assignUIDs(); } MSQuantifications::QUANT_TYPES quant_type = MSQuantifications::MS1LABEL; msq.setAnalysisSummaryQuantType(quant_type); //add analysis_summary_ //-------------------------------------------------- // estimate peak width //-------------------------------------------------- LOG_DEBUG << "Estimating peak width..." << endl; PeakWidthEstimator::Result peak_width; try { peak_width = analyzer.estimatePeakWidth(exp); } catch (Exception::InvalidSize &) { writeLog_("Error: Unable to estimate peak width of input data."); return INCOMPATIBLE_INPUT_DATA; } if (in_filters == "") { //-------------------------------------------------- // filter input data //-------------------------------------------------- LOG_DEBUG << "Filtering input data..." << endl; analyzer.filterData(exp, peak_width, data); //-------------------------------------------------- // store filter results //-------------------------------------------------- if (out_filters != "") { LOG_DEBUG << "Storing filtering results..." << endl; ConsensusMap map; for (std::vector<std::vector<SILACPattern> >::const_iterator it = data.begin(); it != data.end(); ++it) { analyzer.generateFilterConsensusByPattern(map, *it); } analyzer.writeConsensus(out_filters, map); } } else { //-------------------------------------------------- // load filter results //-------------------------------------------------- LOG_DEBUG << "Loading filtering results..." << endl; ConsensusMap map; analyzer.readConsensus(in_filters, map); analyzer.readFilterConsensusByPattern(map, data); } //-------------------------------------------------- // clustering //-------------------------------------------------- LOG_DEBUG << "Clustering data..." << endl; analyzer.clusterData(exp, peak_width, cluster_data, data); //-------------------------------------------------------------- // write output //-------------------------------------------------------------- if (out_debug != "") { LOG_DEBUG << "Writing debug output file..." << endl; std::ofstream out((out_debug + ".clusters.csv").c_str()); vector<vector<DoubleReal> > massShifts = analyzer.getMassShifts(); // list of mass shifts // generate header out << std::fixed << std::setprecision(8) << "ID,RT,MZ_PEAK,CHARGE"; for (UInt i = 1; i <= massShifts[0].size(); ++i) { out << ",DELTA_MASS_" << i + 1; } for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",INT_PEAK_" << i + 1 << '_' << j; } } out << ",MZ_RAW"; for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",INT_RAW_" << i + 1 << '_' << j; } } for (UInt i = 0; i <= massShifts[0].size(); ++i) { for (UInt j = 1; j <= isotopes_per_peptide_max; ++j) { out << ",MZ_RAW_" << i + 1 << '_' << j; } } out << '\n'; // write data UInt cluster_id = 0; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterDebug(out, **it, cluster_id); } } if (out != "") { LOG_DEBUG << "Generating output consensus map..." << endl; ConsensusMap map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterConsensusByCluster(map, **it); } LOG_DEBUG << "Adding meta data..." << endl; // XXX: Need a map per mass shift ConsensusMap::FileDescriptions& desc = map.getFileDescriptions(); Size id = 0; for (ConsensusMap::FileDescriptions::iterator it = desc.begin(); it != desc.end(); ++it) { if (test_mode_) it->second.filename = in; // skip path, since its not cross platform and complicates verification else it->second.filename = File::basename(in); // Write correct label // (this would crash if used without a label!) if (id > 0) it->second.label = StringList(analyzer.getSILAClabels()[id - 1]).concatenate(""); // skip first round (empty label is not listed) ++id; } std::set<DataProcessing::ProcessingAction> actions; actions.insert(DataProcessing::DATA_PROCESSING); actions.insert(DataProcessing::PEAK_PICKING); actions.insert(DataProcessing::FILTERING); actions.insert(DataProcessing::QUANTITATION); addDataProcessing_(map, getProcessingInfo_(actions)); analyzer.writeConsensus(out, map); if (out_mzq != "") { LOG_DEBUG << "Generating output mzQuantML file..." << endl; ConsensusMap numap(map); //calc. ratios for (ConsensusMap::iterator cit = numap.begin(); cit != numap.end(); ++cit) { //~ make ratio templates std::vector<ConsensusFeature::Ratio> rts; for (std::vector<MSQuantifications::Assay>::const_iterator ait = msq.getAssays().begin() + 1; ait != msq.getAssays().end(); ++ait) { ConsensusFeature::Ratio r; r.numerator_ref_ = String(msq.getAssays().begin()->uid_); r.denominator_ref_ = String(ait->uid_); r.description_.push_back("Simple ratio calc"); r.description_.push_back("light to medium/.../heavy"); //~ "<cvParam cvRef=\"PSI-MS\" accession=\"MS:1001132\" name=\"peptide ratio\"/>" rts.push_back(r); } const ConsensusFeature::HandleSetType& feature_handles = cit->getFeatures(); if (feature_handles.size() > 1) { std::set<FeatureHandle, FeatureHandle::IndexLess>::const_iterator fit = feature_handles.begin(); // this is unlabeled fit++; for (; fit != feature_handles.end(); ++fit) { Size ri = std::distance(feature_handles.begin(), fit); rts[ri - 1].ratio_value_ = feature_handles.begin()->getIntensity() / fit->getIntensity(); // a proper silacalanyzer algo should never have 0-intensities so no 0devison ... } } cit->setRatios(rts); } msq.addConsensusMap(numap); //add SILACAnalyzer result //~ msq.addFeatureMap();//add SILACAnalyzer evidencetrail as soon as clear what is realy contained in the featuremap //~ add AuditCollection - no such concept in TOPPTools yet analyzer.writeMzQuantML(out_mzq, msq); } } if (out_clusters != "") { LOG_DEBUG << "Generating cluster output file..." << endl; ConsensusMap map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { UInt cluster_id = 0; analyzer.generateClusterConsensusByPattern(map, **it, cluster_id); } ConsensusMap::FileDescription & desc = map.getFileDescriptions()[0]; desc.filename = in; desc.label = "Cluster"; analyzer.writeConsensus(out_clusters, map); } if (out_features != "") { LOG_DEBUG << "Generating output feature map..." << endl; FeatureMap<> map; for (vector<Clustering *>::const_iterator it = cluster_data.begin(); it != cluster_data.end(); ++it) { analyzer.generateClusterFeatureByCluster(map, **it); } analyzer.writeFeatures(out_features, map); } return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- // file list StringList file_list = getStringList_("in"); // file type FileHandler file_handler; FileTypes::Type force_type; if (getStringOption_("in_type").size() > 0) { force_type = FileTypes::nameToType(getStringOption_("in_type")); } else { force_type = file_handler.getType(file_list[0]); } // output file names and types String out_file = getStringOption_("out"); bool annotate_file_origin = getFlag_("annotate_file_origin"); rt_gap_ = getDoubleOption_("rt_concat:gap"); vector<String> trafo_out = getStringList_("rt_concat:trafo_out"); if (trafo_out.empty()) { // resize now so we don't have to worry about indexing out of bounds: trafo_out.resize(file_list.size()); } else if (trafo_out.size() != file_list.size()) { writeLog_("Error: Number of transformation output files must equal the number of input files (parameters 'rt_concat:trafo_out'/'in')!"); return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // calculations //------------------------------------------------------------- if (force_type == FileTypes::FEATUREXML) { FeatureMap out; FeatureXMLFile fh; for (Size i = 0; i < file_list.size(); ++i) { FeatureMap map; fh.load(file_list[i], map); if (annotate_file_origin) { for (FeatureMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(map, trafo_out[i], i == 0); } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); fh.store(out_file, out); } else if (force_type == FileTypes::CONSENSUSXML) { ConsensusMap out; ConsensusXMLFile fh; fh.load(file_list[0], out); // skip first file for (Size i = 1; i < file_list.size(); ++i) { ConsensusMap map; fh.load(file_list[i], map); if (annotate_file_origin) { for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(map, trafo_out[i], i == 0); } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); fh.store(out_file, out); } else if (force_type == FileTypes::TRAML) { TargetedExperiment out; TraMLFile fh; for (Size i = 0; i < file_list.size(); ++i) { TargetedExperiment map; fh.load(file_list[i], map); out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info Software software; software.setName("FileMerger"); software.setVersion(VersionInfo::getVersion()); out.addSoftware(software); fh.store(out_file, out); } else // raw data input (e.g. mzML) { // RT bool rt_auto_number = getFlag_("raw:rt_auto"); bool rt_filename = getFlag_("raw:rt_filename"); bool rt_custom = false; DoubleList custom_rts = getDoubleList_("raw:rt_custom"); if (!custom_rts.empty()) { rt_custom = true; if (custom_rts.size() != file_list.size()) { writeLog_("Custom retention time list (parameter 'raw:rt_custom') must have as many elements as there are input files (parameter 'in')!"); return ILLEGAL_PARAMETERS; } } // MS level Int ms_level = getIntOption_("raw:ms_level"); MSExperiment<> out; UInt rt_auto = 0; UInt native_id = 0; for (Size i = 0; i < file_list.size(); ++i) { String filename = file_list[i]; // load file force_type = file_handler.getType(file_list[i]); MSExperiment<> in; file_handler.loadExperiment(filename, in, force_type, log_type_); if (in.empty() && in.getChromatograms().empty()) { writeLog_(String("Warning: Empty file '") + filename + "'!"); continue; } out.reserve(out.size() + in.size()); // warn if custom RT and more than one scan in input file if (rt_custom && in.size() > 1) { writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!"); } // handle special raw data options: for (MSExperiment<>::iterator spec_it = in.begin(); spec_it != in.end(); ++spec_it) { float rt_final = spec_it->getRT(); if (rt_auto_number) { rt_final = ++rt_auto; } else if (rt_custom) { rt_final = custom_rts[i]; } else if (rt_filename) { static const boost::regex re("rt(\\d+(\\.\\d+)?)"); boost::smatch match; bool found = boost::regex_search(filename, match, re); if (found) { rt_final = String(match[1]).toFloat(); } else { writeLog_("Warning: could not extract retention time from filename '" + filename + "'"); } } // none of the rt methods were successful if (rt_final < 0) { writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'"); } spec_it->setRT(rt_final); spec_it->setNativeID("spectrum=" + String(native_id)); if (ms_level > 0) { spec_it->setMSLevel(ms_level); } ++native_id; } // if we have only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles) if (in.size() == 1) { in[0].setSourceFile(in.getSourceFiles()[0]); in.getSourceFiles().clear(); // delete source file annotated from source file (it's in the spectrum anyways) } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(in, trafo_out[i], i == 0); } // add spectra to output for (MSExperiment<>::const_iterator spec_it = in.begin(); spec_it != in.end(); ++spec_it) { out.addSpectrum(*spec_it); } // also add the chromatograms for (vector<MSChromatogram<ChromatogramPeak> >::const_iterator chrom_it = in.getChromatograms().begin(); chrom_it != in.getChromatograms().end(); ++chrom_it) { out.addChromatogram(*chrom_it); } // copy experimental settings from first file if (i == 0) { out.ExperimentalSettings::operator=(in); } else // otherwise append { out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then } } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); MzMLFile f; f.setLogType(log_type_); f.store(out_file, out); } return EXECUTION_OK; }