ExitCodes main_(int, const char **) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- String infile = getStringOption_("in"); String outfile_fm = getStringOption_("out_fm"); String outfile_cm = getStringOption_("out_cm"); String outfile_p = getStringOption_("outpairs"); FeatureDeconvolution fdc; Param const & dc_param = getParam_().copy("algorithm:FeatureDeconvolution:", true); writeDebug_("Parameters passed to Decharger", dc_param, 3); fdc.setParameters(dc_param); //------------------------------------------------------------- // loading input //------------------------------------------------------------- writeDebug_("Loading input file", 1); typedef FeatureMap<> FeatureMapType; FeatureMapType map_in, map_out; FeatureXMLFile().load(infile, map_in); //------------------------------------------------------------- // calculations //------------------------------------------------------------- ConsensusMap cm, cm2; StopWatch a; a.start(); fdc.compute(map_in, map_out, cm, cm2); a.stop(); //std::cerr << "took: " << a.getClockTime() << " seconds\n\n\n"; //------------------------------------------------------------- // writing output //------------------------------------------------------------- writeDebug_("Saving output files", 1); cm.getFileDescriptions()[0].filename = infile; cm2.getFileDescriptions()[0].filename = infile; //annotate output with data processing info addDataProcessing_(map_out, getProcessingInfo_(DataProcessing::CHARGE_DECONVOLUTION)); addDataProcessing_(cm, getProcessingInfo_(DataProcessing::CHARGE_DECONVOLUTION)); addDataProcessing_(cm2, getProcessingInfo_(DataProcessing::CHARGE_DECONVOLUTION)); ConsensusXMLFile f; f.store(outfile_cm, cm); if (!outfile_p.empty()) f.store(outfile_p, cm2); if (!outfile_fm.empty()) FeatureXMLFile().store(outfile_fm, map_out); return EXECUTION_OK; }
ExitCodes main_(int, const char **) override { String in = getStringOption_("in"); String out = getStringOption_("out"); String algo_type = getStringOption_("algorithm_type"); String acc_filter = getStringOption_("accession_filter"); String desc_filter = getStringOption_("description_filter"); double ratio_threshold = getDoubleOption_("ratio_threshold"); ConsensusXMLFile infile; infile.setLogType(log_type_); ConsensusMap map; infile.load(in, map); //map normalization if (algo_type == "robust_regression") { map.sortBySize(); vector<double> results = ConsensusMapNormalizerAlgorithmThreshold::computeCorrelation(map, ratio_threshold, acc_filter, desc_filter); ConsensusMapNormalizerAlgorithmThreshold::normalizeMaps(map, results); } else if (algo_type == "median") { ConsensusMapNormalizerAlgorithmMedian::normalizeMaps(map, ConsensusMapNormalizerAlgorithmMedian::NM_SCALE, acc_filter, desc_filter); } else if (algo_type == "median_shift") { ConsensusMapNormalizerAlgorithmMedian::normalizeMaps(map, ConsensusMapNormalizerAlgorithmMedian::NM_SHIFT, acc_filter, desc_filter); } else if (algo_type == "quantile") { if (acc_filter != "" || desc_filter != "") { LOG_WARN << endl << "NOTE: Accession / description filtering is not supported in quantile normalization mode. Ignoring filters." << endl << endl; } ConsensusMapNormalizerAlgorithmQuantile::normalizeMaps(map); } else { cerr << "Unknown algorithm type '" << algo_type.c_str() << "'." << endl; return ILLEGAL_PARAMETERS; } //annotate output with data processing info and save output file addDataProcessing_(map, getProcessingInfo_(DataProcessing::NORMALIZATION)); infile.store(out, map); return EXECUTION_OK; }
ExitCodes main_(int, const char **) { String in = getStringOption_("in"); String out = getStringOption_("out"); String algo_type = getStringOption_("algorithm_type"); double ratio_threshold = getDoubleOption_("ratio_threshold"); ConsensusXMLFile infile; infile.setLogType(log_type_); ConsensusMap map; infile.load(in, map); //map normalization if (algo_type == "robust_regression") { map.sortBySize(); vector<double> results = ConsensusMapNormalizerAlgorithmThreshold::computeCorrelation(map, ratio_threshold); ConsensusMapNormalizerAlgorithmThreshold::normalizeMaps(map, results); } else if (algo_type == "median") { ConsensusMapNormalizerAlgorithmMedian::normalizeMaps(map); } else if (algo_type == "quantile") { ConsensusMapNormalizerAlgorithmQuantile::normalizeMaps(map); } else { cerr << "Unknown algorithm type '" << algo_type.c_str() << "'." << endl; return ILLEGAL_PARAMETERS; } //annotate output with data processing info and save output file addDataProcessing_(map, getProcessingInfo_(DataProcessing::NORMALIZATION)); infile.store(out, map); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- //file list StringList file_list = getStringList_("in"); //file type FileHandler fh; FileTypes::Type force_type; if (getStringOption_("in_type").size() > 0) { force_type = FileTypes::nameToType(getStringOption_("in_type")); } else { force_type = fh.getType(file_list[0]); } //output file names and types String out_file = getStringOption_("out"); //------------------------------------------------------------- // calculations //------------------------------------------------------------- bool annotate_file_origin = getFlag_("annotate_file_origin"); if (force_type == FileTypes::FEATUREXML) { FeatureMap<> out; for (Size i = 0; i < file_list.size(); ++i) { FeatureMap<> map; FeatureXMLFile fh; fh.load(file_list[i], map); if (annotate_file_origin) { for (FeatureMap<>::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); FeatureXMLFile f; f.store(out_file, out); } else if (force_type == FileTypes::CONSENSUSXML) { ConsensusMap out; ConsensusXMLFile fh; fh.load(file_list[0], out); //skip first file for (Size i = 1; i < file_list.size(); ++i) { ConsensusMap map; ConsensusXMLFile fh; fh.load(file_list[i], map); if (annotate_file_origin) { for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); ConsensusXMLFile f; f.store(out_file, out); } else if (force_type == FileTypes::TRAML) { TargetedExperiment out; for (Size i = 0; i < file_list.size(); ++i) { TargetedExperiment map; TraMLFile fh; fh.load(file_list[i], map); out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info Software software; software.setName("FileMerger"); software.setVersion(VersionInfo::getVersion()); out.addSoftware(software); TraMLFile f; f.store(out_file, out); } else { // we might want to combine different types, thus we only // query in_type (which applies to all files) // and not the suffix or content of a single file force_type = FileTypes::nameToType(getStringOption_("in_type")); //rt bool rt_auto_number = getFlag_("raw:rt_auto"); bool rt_filename = getFlag_("raw:rt_filename"); bool rt_custom = false; DoubleList custom_rts = getDoubleList_("raw:rt_custom"); if (custom_rts.size() != 0) { rt_custom = true; if (custom_rts.size() != file_list.size()) { writeLog_("Custom retention time list must have as many elements as there are input files!"); printUsage_(); return ILLEGAL_PARAMETERS; } } //ms level bool user_ms_level = getFlag_("raw:user_ms_level"); MSExperiment<> out; out.reserve(file_list.size()); UInt rt_auto = 0; UInt native_id = 0; std::vector<MSChromatogram<ChromatogramPeak> > all_chromatograms; for (Size i = 0; i < file_list.size(); ++i) { String filename = file_list[i]; //load file MSExperiment<> in; fh.loadExperiment(filename, in, force_type, log_type_); if (in.empty() && in.getChromatograms().empty()) { writeLog_(String("Warning: Empty file '") + filename + "'!"); continue; } out.reserve(out.size() + in.size()); //warn if custom RT and more than one scan in input file if (rt_custom && in.size() > 1) { writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!"); } for (MSExperiment<>::const_iterator it2 = in.begin(); it2 != in.end(); ++it2) { //handle rt Real rt_final = it2->getRT(); if (rt_auto_number) { rt_final = ++rt_auto; } else if (rt_custom) { rt_final = custom_rts[i]; } else if (rt_filename) { if (!filename.hasSubstring("rt")) { writeLog_(String("Warning: cannot guess retention time from filename as it does not contain 'rt'")); } for (Size i = 0; i < filename.size(); ++i) { if (filename[i] == 'r' && ++i != filename.size() && filename[i] == 't' && ++i != filename.size() && isdigit(filename[i])) { String rt; while (i != filename.size() && (filename[i] == '.' || isdigit(filename[i]))) { rt += filename[i++]; } if (rt.size() > 0) { // remove dot from rt3892.98.dta // ^ if (rt[rt.size() - 1] == '.') { // remove last character rt.erase(rt.end() - 1); } } try { float tmp = rt.toFloat(); rt_final = tmp; } catch (Exception::ConversionError) { writeLog_(String("Warning: cannot convert the found retention time in a value '" + rt + "'.")); } } } } // none of the rt methods were successful if (rt_final == -1) { writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'"); } out.addSpectrum(*it2); out.getSpectra().back().setRT(rt_final); out.getSpectra().back().setNativeID(native_id); if (user_ms_level) { out.getSpectra().back().setMSLevel((int)getIntOption_("raw:ms_level")); } ++native_id; } // if we had only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles) if (in.size() == 1) { out.getSpectra().back().setSourceFile(in.getSourceFiles()[0]); in.getSourceFiles().clear(); // delete source file annotated from source file (its in the spectrum anyways) } // copy experimental settings from first file if (i == 0) { out.ExperimentalSettings::operator=(in); } else // otherwise append { out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then } // also add the chromatograms for (std::vector<MSChromatogram<ChromatogramPeak> >::const_iterator it2 = in.getChromatograms().begin(); it2 != in.getChromatograms().end(); ++it2) { all_chromatograms.push_back(*it2); } } // set the chromatograms out.setChromatograms(all_chromatograms); //------------------------------------------------------------- // writing output //------------------------------------------------------------- //annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); MzMLFile f; f.setLogType(log_type_); f.store(out_file, out); } return EXECUTION_OK; }
ExitCodes main_(int, const char**) { vector<ProteinIdentification> prot_ids; vector<PeptideIdentification> pep_ids; ProteinHit temp_protein_hit; //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String inputfile_id = getStringOption_("id"); String inputfile_feature = getStringOption_("feature"); String inputfile_consensus = getStringOption_("consensus"); String inputfile_raw = getStringOption_("in"); String outputfile_name = getStringOption_("out"); //~ bool Ms1(getFlag_("MS1")); //~ bool Ms2(getFlag_("MS2")); bool remove_duplicate_features(getFlag_("remove_duplicate_features")); //------------------------------------------------------------- // fetch vocabularies //------------------------------------------------------------ ControlledVocabulary cv; cv.loadFromOBO("PSI-MS", File::find("/CV/psi-ms.obo")); cv.loadFromOBO("QC", File::find("/CV/qc-cv.obo")); QcMLFile qcmlfile; //------------------------------------------------------------- // MS aqiusition //------------------------------------------------------------ String base_name = QFileInfo(QString::fromStdString(inputfile_raw)).baseName(); cout << "Reading mzML file..." << endl; MzMLFile mz_data_file; MSExperiment<Peak1D> exp; MzMLFile().load(inputfile_raw, exp); //---prep input exp.sortSpectra(); UInt min_mz = std::numeric_limits<UInt>::max(); UInt max_mz = 0; std::map<Size, UInt> mslevelcounts; qcmlfile.registerRun(base_name,base_name); //TODO use UIDs //---base MS aquisition qp String msaq_ref = base_name + "_msaq"; QcMLFile::QualityParameter qp; qp.id = msaq_ref; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000004"; try { //~ const ControlledVocabulary::CVTerm& test = cv.getTermByName("MS aquisition result details"); //~ cout << test.name << test.id << endl; const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); //~ const ControlledVocabulary::CVTerm& term = cv.getTerm("0000004"); qp.name = term.name; ///< Name } catch (...) { qp.name = "mzML file"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); //---file origin qp qp = QcMLFile::QualityParameter(); qp.name = "mzML file"; ///< Name qp.id = base_name + "_run_name"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000577"; qp.value = base_name; qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.name = "instrument model"; ///< Name qp.id = base_name + "_instrument_name"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000031"; qp.value = exp.getInstrument().getName(); qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.name = "completion time"; ///< Name qp.id = base_name + "_date"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000747"; qp.value = exp.getDateTime().getDate(); qcmlfile.addRunQualityParameter(base_name, qp); //---precursors at QcMLFile::Attachment at; at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000044"; at.qualityRef = msaq_ref; at.id = base_name + "_precursors"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "precursors"; ///< Name } at.colTypes.push_back("MS:1000894_[sec]"); //RT at.colTypes.push_back("MS:1000040"); //MZ for (Size i = 0; i < exp.size(); ++i) { mslevelcounts[exp[i].getMSLevel()]++; if (exp[i].getMSLevel() == 2) { if (exp[i].getPrecursors().front().getMZ() < min_mz) { min_mz = exp[i].getPrecursors().front().getMZ(); } if (exp[i].getPrecursors().front().getMZ() > max_mz) { max_mz = exp[i].getPrecursors().front().getMZ(); } std::vector<String> row; row.push_back(exp[i].getRT()); row.push_back(exp[i].getPrecursors().front().getMZ()); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); //---aquisition results qp qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000006"; ///< cv accession for "aquisition results" qp.id = base_name + "_ms1aquisition"; ///< Identifier qp.value = String(mslevelcounts[1]); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of ms1 spectra"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000007"; ///< cv accession for "aquisition results" qp.id = base_name + "_ms2aquisition"; ///< Identifier qp.value = String(mslevelcounts[2]); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of ms2 spectra"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000008"; ///< cv accession for "aquisition results" qp.id = base_name + "_Chromaquisition"; ///< Identifier qp.value = String(exp.getChromatograms().size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of chromatograms"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000009"; at.qualityRef = msaq_ref; at.id = base_name + "_mzrange"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS MZ aquisition ranges"; ///< Name } at.colTypes.push_back("QC:0000010"); //MZ at.colTypes.push_back("QC:0000011"); //MZ std::vector<String> rowmz; rowmz.push_back(String(min_mz)); rowmz.push_back(String(max_mz)); at.tableRows.push_back(rowmz); qcmlfile.addRunAttachment(base_name, at); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000012"; at.qualityRef = msaq_ref; at.id = base_name + "_rtrange"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS RT aquisition ranges"; ///< Name } at.colTypes.push_back("QC:0000013"); //MZ at.colTypes.push_back("QC:0000014"); //MZ std::vector<String> rowrt; rowrt.push_back(String(exp.begin()->getRT())); rowrt.push_back(String(exp.getSpectra().back().getRT())); at.tableRows.push_back(rowrt); qcmlfile.addRunAttachment(base_name, at); //---ion current stability ( & tic ) qp at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000022"; at.qualityRef = msaq_ref; at.id = base_name + "_tics"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS TICs"; ///< Name } at.colTypes.push_back("MS:1000894_[sec]"); at.colTypes.push_back("MS:1000285"); UInt max = 0; Size below_10k = 0; for (Size i = 0; i < exp.size(); ++i) { if (exp[i].getMSLevel() == 1) { UInt sum = 0; for (Size j = 0; j < exp[i].size(); ++j) { sum += exp[i][j].getIntensity(); } if (sum > max) { max = sum; } if (sum < 10000) { ++below_10k; } std::vector<String> row; row.push_back(exp[i].getRT()); row.push_back(sum); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); qp = QcMLFile::QualityParameter(); qp.id = base_name + "_ticslump"; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000023"; qp.value = String((100 / exp.size()) * below_10k); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "percentage of tic slumps"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); //------------------------------------------------------------- // MS id //------------------------------------------------------------ if (inputfile_id != "") { IdXMLFile().load(inputfile_id, prot_ids, pep_ids); cerr << "idXML read ended. Found " << pep_ids.size() << " peptide identifications." << endl; ProteinIdentification::SearchParameters params = prot_ids[0].getSearchParameters(); vector<String> var_mods = params.variable_modifications; //~ boost::regex re("(?<=[KR])(?=[^P])"); String msid_ref = base_name + "_msid"; QcMLFile::QualityParameter qp; qp.id = msid_ref; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000025"; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "MS identification result details"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000026"; at.qualityRef = msid_ref; at.id = base_name + "_idsetting"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS id settings"; ///< Name } at.colTypes.push_back("MS:1001013"); //MS:1001013 db name MS:1001016 version MS:1001020 taxonomy at.colTypes.push_back("MS:1001016"); at.colTypes.push_back("MS:1001020"); std::vector<String> row; row.push_back(String(prot_ids.front().getSearchParameters().db)); row.push_back(String(prot_ids.front().getSearchParameters().db_version)); row.push_back(String(prot_ids.front().getSearchParameters().taxonomy)); at.tableRows.push_back(row); qcmlfile.addRunAttachment(base_name, at); UInt spectrum_count = 0; Size peptide_hit_count = 0; UInt runs_count = 0; Size protein_hit_count = 0; set<String> peptides; set<String> proteins; Size missedcleavages = 0; for (Size i = 0; i < pep_ids.size(); ++i) { if (!pep_ids[i].empty()) { ++spectrum_count; peptide_hit_count += pep_ids[i].getHits().size(); const vector<PeptideHit>& temp_hits = pep_ids[i].getHits(); for (Size j = 0; j < temp_hits.size(); ++j) { peptides.insert(temp_hits[j].getSequence().toString()); } } } for (set<String>::iterator it = peptides.begin(); it != peptides.end(); ++it) { for (String::const_iterator st = it->begin(); st != it->end() - 1; ++st) { if (*st == 'K' || *st == 'R') { ++missedcleavages; } } } for (Size i = 0; i < prot_ids.size(); ++i) { ++runs_count; protein_hit_count += prot_ids[i].getHits().size(); const vector<ProteinHit>& temp_hits = prot_ids[i].getHits(); for (Size j = 0; j < temp_hits.size(); ++j) { proteins.insert(temp_hits[j].getAccession()); } } qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000037"; ///< cv accession qp.id = base_name + "_misscleave"; ///< Identifier qp.value = missedcleavages; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of missed cleavages"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000032"; ///< cv accession qp.id = base_name + "_totprot"; ///< Identifier qp.value = protein_hit_count; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of identified proteins"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000033"; ///< cv accession qp.id = base_name + "_totuniqprot"; ///< Identifier qp.value = String(proteins.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of uniquely identified proteins"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000029"; ///< cv accession qp.id = base_name + "_psms"; ///< Identifier qp.value = String(spectrum_count); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of PSM"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000030"; ///< cv accession qp.id = base_name + "_totpeps"; ///< Identifier qp.value = String(peptide_hit_count); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of identified peptides"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000031"; ///< cv accession qp.id = base_name + "_totuniqpeps"; ///< Identifier qp.value = String(peptides.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of uniquely identified peptides"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000038"; at.qualityRef = msid_ref; at.id = base_name + "_massacc"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "delta ppm tables"; } //~ delta ppm QC:0000039 RT MZ uniqueness ProteinID MS:1000885 target/decoy Score PeptideSequence MS:1000889 Annots string Similarity Charge UO:0000219 TheoreticalWeight UO:0000221 Oxidation_(M) at.colTypes.push_back("RT"); at.colTypes.push_back("MZ"); at.colTypes.push_back("Score"); at.colTypes.push_back("PeptideSequence"); at.colTypes.push_back("Charge"); at.colTypes.push_back("TheoreticalWeight"); at.colTypes.push_back("delta_ppm"); for (UInt w = 0; w < var_mods.size(); ++w) { at.colTypes.push_back(String(var_mods[w]).substitute(' ', '_')); } std::vector<double> deltas; //~ prot_ids[0].getSearchParameters(); for (vector<PeptideIdentification>::iterator it = pep_ids.begin(); it != pep_ids.end(); ++it) { if (it->getHits().size() > 0) { std::vector<String> row; row.push_back(it->getRT()); row.push_back(it->getMZ()); PeptideHit tmp = it->getHits().front(); //TODO depends on score & sort vector<UInt> pep_mods; for (UInt w = 0; w < var_mods.size(); ++w) { pep_mods.push_back(0); } for (AASequence::ConstIterator z = tmp.getSequence().begin(); z != tmp.getSequence().end(); ++z) { Residue res = *z; String temp; if (res.getModification().size() > 0 && res.getModification() != "Carbamidomethyl") { temp = res.getModification() + " (" + res.getOneLetterCode() + ")"; //cout<<res.getModification()<<endl; for (UInt w = 0; w < var_mods.size(); ++w) { if (temp == var_mods[w]) { //cout<<temp; pep_mods[w] += 1; } } } } row.push_back(tmp.getScore()); row.push_back(tmp.getSequence().toString().removeWhitespaces()); row.push_back(tmp.getCharge()); row.push_back(String((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge())); double dppm = /* std::abs */ (getMassDifference(((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge()), it->getMZ(), true)); row.push_back(String(dppm)); deltas.push_back(dppm); for (UInt w = 0; w < var_mods.size(); ++w) { row.push_back(pep_mods[w]); } at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000040"; ///< cv accession qp.id = base_name + "_mean_delta"; ///< Identifier qp.value = String(OpenMS::Math::mean(deltas.begin(), deltas.end())); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "mean delta ppm"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000041"; ///< cv accession qp.id = base_name + "_median_delta"; ///< Identifier qp.value = String(OpenMS::Math::median(deltas.begin(), deltas.end(), false)); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "median delta ppm"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000035"; ///< cv accession qp.id = base_name + "_ratio_id"; ///< Identifier qp.value = String(double(pep_ids.size()) / double(mslevelcounts[2])); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "id ratio"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); } //------------------------------------------------------------- // MS quantitation //------------------------------------------------------------ FeatureMap map; String msqu_ref = base_name + "_msqu"; if (inputfile_feature != "") { FeatureXMLFile f; f.load(inputfile_feature, map); cout << "Read featureXML file..." << endl; //~ UInt fiter = 0; map.sortByRT(); map.updateRanges(); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000045"; ///< cv accession qp.id = msqu_ref; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "MS quantification result details"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000046"; ///< cv accession qp.id = base_name + "_feature_count"; ///< Identifier qp.value = String(map.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of features"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); } if (inputfile_feature != "" && !remove_duplicate_features) { QcMLFile::Attachment at; at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000047"; at.qualityRef = msqu_ref; at.id = base_name + "_features"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "features"; ///< Name } at.colTypes.push_back("MZ"); at.colTypes.push_back("RT"); at.colTypes.push_back("Intensity"); at.colTypes.push_back("Charge"); at.colTypes.push_back("Quality"); at.colTypes.push_back("FWHM"); at.colTypes.push_back("IDs"); UInt fiter = 0; map.sortByRT(); //ofstream out(outputfile_name.c_str()); while (fiter < map.size()) { std::vector<String> row; row.push_back(map[fiter].getMZ()); row.push_back(map[fiter].getRT()); row.push_back(map[fiter].getIntensity()); row.push_back(map[fiter].getCharge()); row.push_back(map[fiter].getOverallQuality()); row.push_back(map[fiter].getWidth()); row.push_back(map[fiter].getPeptideIdentifications().size()); fiter++; at.tableRows.push_back(row); } qcmlfile.addRunAttachment(base_name, at); } else if (inputfile_feature != "" && remove_duplicate_features) { QcMLFile::Attachment at; at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000047"; at.qualityRef = msqu_ref; at.id = base_name + "_features"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "features"; ///< Name } at.colTypes.push_back("MZ"); at.colTypes.push_back("RT"); at.colTypes.push_back("Intensity"); at.colTypes.push_back("Charge"); FeatureMap map, map_out; FeatureXMLFile f; f.load(inputfile_feature, map); UInt fiter = 0; map.sortByRT(); while (fiter < map.size()) { FeatureMap map_tmp; for (UInt k = fiter; k <= map.size(); ++k) { if (abs(map[fiter].getRT() - map[k].getRT()) < 0.1) { //~ cout << fiter << endl; map_tmp.push_back(map[k]); } else { fiter = k; break; } } map_tmp.sortByMZ(); UInt retif = 1; map_out.push_back(map_tmp[0]); while (retif < map_tmp.size()) { if (abs(map_tmp[retif].getMZ() - map_tmp[retif - 1].getMZ()) > 0.01) { cout << "equal RT, but mass different" << endl; map_out.push_back(map_tmp[retif]); } retif++; } } qcmlfile.addRunAttachment(base_name, at); } if (inputfile_consensus != "") { cout << "Reading consensusXML file..." << endl; ConsensusXMLFile f; ConsensusMap map; f.load(inputfile_consensus, map); //~ String CONSENSUS_NAME = "_consensus.tsv"; //~ String combined_out = outputfile_name + CONSENSUS_NAME; //~ ofstream out(combined_out.c_str()); at = QcMLFile::Attachment(); qp.name = "consensuspoints"; ///< Name //~ qp.id = base_name + "_consensuses"; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:xxxxxxxx"; ///< cv accession "featuremapper results" at.colTypes.push_back("Native_spectrum_ID"); at.colTypes.push_back("DECON_RT_(sec)"); at.colTypes.push_back("DECON_MZ_(Th)"); at.colTypes.push_back("DECON_Intensity"); at.colTypes.push_back("Feature_RT_(sec)"); at.colTypes.push_back("Feature_MZ_(Th)"); at.colTypes.push_back("Feature_Intensity"); at.colTypes.push_back("Feature_Charge"); for (ConsensusMap::const_iterator cmit = map.begin(); cmit != map.end(); ++cmit) { const ConsensusFeature& CF = *cmit; for (ConsensusFeature::const_iterator cfit = CF.begin(); cfit != CF.end(); ++cfit) { std::vector<String> row; FeatureHandle FH = *cfit; row.push_back(CF.getMetaValue("spectrum_native_id")); row.push_back(CF.getRT()); row.push_back(CF.getMZ()); row.push_back(CF.getIntensity()); row.push_back(FH.getRT()); row.push_back(FH.getMZ()); row.push_back(FH.getCharge()); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); } //------------------------------------------------------------- // finalize //------------------------------------------------------------ qcmlfile.store(outputfile_name); return EXECUTION_OK; }
ExitCodes common_main_(FeatureGroupingAlgorithm * algorithm, bool labeled = false) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList ins; if (labeled) ins.push_back(getStringOption_("in")); else ins = getStringList_("in"); String out = getStringOption_("out"); //------------------------------------------------------------- // check for valid input //------------------------------------------------------------- // check if all input files have the correct type FileTypes::Type file_type = FileHandler::getType(ins[0]); for (Size i = 0; i < ins.size(); ++i) { if (FileHandler::getType(ins[i]) != file_type) { writeLog_("Error: All input files must be of the same type!"); return ILLEGAL_PARAMETERS; } } //------------------------------------------------------------- // set up algorithm //------------------------------------------------------------- Param algorithm_param = getParam_().copy("algorithm:", true); writeDebug_("Used algorithm parameters", algorithm_param, 3); algorithm->setParameters(algorithm_param); //------------------------------------------------------------- // perform grouping //------------------------------------------------------------- // load input ConsensusMap out_map; StringList ms_run_locations; if (file_type == FileTypes::FEATUREXML) { vector<ConsensusMap > maps(ins.size()); FeatureXMLFile f; FeatureFileOptions param = f.getOptions(); // to save memory don't load convex hulls and subordinates param.setLoadSubordinates(false); param.setLoadConvexHull(false); f.setOptions(param); Size progress = 0; setLogType(ProgressLogger::CMD); startProgress(0, ins.size(), "reading input"); for (Size i = 0; i < ins.size(); ++i) { FeatureMap tmp; f.load(ins[i], tmp); out_map.getFileDescriptions()[i].filename = ins[i]; out_map.getFileDescriptions()[i].size = tmp.size(); out_map.getFileDescriptions()[i].unique_id = tmp.getUniqueId(); // copy over information on the primary MS run const StringList& ms_runs = tmp.getPrimaryMSRunPath(); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); // to save memory, remove convex hulls, subordinates: for (FeatureMap::Iterator it = tmp.begin(); it != tmp.end(); ++it) { it->getSubordinates().clear(); it->getConvexHulls().clear(); it->clearMetaInfo(); } MapConversion::convert(i, tmp, maps[i]); maps[i].updateRanges(); setProgress(progress++); } endProgress(); // exception for "labeled" algorithms: copy file descriptions if (labeled) { out_map.getFileDescriptions()[1] = out_map.getFileDescriptions()[0]; out_map.getFileDescriptions()[0].label = "light"; out_map.getFileDescriptions()[1].label = "heavy"; } // group algorithm->group(maps, out_map); } else { vector<ConsensusMap> maps(ins.size()); ConsensusXMLFile f; for (Size i = 0; i < ins.size(); ++i) { f.load(ins[i], maps[i]); maps[i].updateRanges(); // copy over information on the primary MS run const StringList& ms_runs = maps[i].getPrimaryMSRunPath(); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); } // group algorithm->group(maps, out_map); // set file descriptions: bool keep_subelements = getFlag_("keep_subelements"); if (!keep_subelements) { for (Size i = 0; i < ins.size(); ++i) { out_map.getFileDescriptions()[i].filename = ins[i]; out_map.getFileDescriptions()[i].size = maps[i].size(); out_map.getFileDescriptions()[i].unique_id = maps[i].getUniqueId(); } } else { // components of the output map are not the input maps themselves, but // the components of the input maps: algorithm->transferSubelements(maps, out_map); } } // assign unique ids out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId); // annotate output with data processing info addDataProcessing_(out_map, getProcessingInfo_(DataProcessing::FEATURE_GROUPING)); // set primary MS runs out_map.setPrimaryMSRunPath(ms_run_locations); // write output ConsensusXMLFile().store(out, out_map); // some statistics map<Size, UInt> num_consfeat_of_size; for (ConsensusMap::const_iterator cmit = out_map.begin(); cmit != out_map.end(); ++cmit) { ++num_consfeat_of_size[cmit->size()]; } LOG_INFO << "Number of consensus features:" << endl; for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i) { LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl; } LOG_INFO << " total: " << setw(6) << out_map.size() << endl; return EXECUTION_OK; }
ExitCodes main_(int, const char **) { FeatureGroupingAlgorithmUnlabeled * algorithm = new FeatureGroupingAlgorithmUnlabeled(); //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- StringList ins; ins = getStringList_("in"); String out = getStringOption_("out"); //------------------------------------------------------------- // check for valid input //------------------------------------------------------------- // check if all input files have the correct type FileTypes::Type file_type = FileHandler::getType(ins[0]); for (Size i = 0; i < ins.size(); ++i) { if (FileHandler::getType(ins[i]) != file_type) { writeLog_("Error: All input files must be of the same type!"); return ILLEGAL_PARAMETERS; } } //------------------------------------------------------------- // set up algorithm //------------------------------------------------------------- Param algorithm_param = getParam_().copy("algorithm:", true); writeDebug_("Used algorithm parameters", algorithm_param, 3); algorithm->setParameters(algorithm_param); Size reference_index(0); //------------------------------------------------------------- // perform grouping //------------------------------------------------------------- // load input ConsensusMap out_map; StringList ms_run_locations; if (file_type == FileTypes::FEATUREXML) { // use map with highest number of features as reference: Size max_count(0); FeatureXMLFile f; for (Size i = 0; i < ins.size(); ++i) { Size s = f.loadSize(ins[i]); if (s > max_count) { max_count = s; reference_index = i; } } // Load reference map and input it to the algorithm UInt64 ref_id; Size ref_size; std::vector<PeptideIdentification> ref_pepids; std::vector<ProteinIdentification> ref_protids; { FeatureMap map_ref; FeatureXMLFile f_fxml_tmp; f_fxml_tmp.getOptions().setLoadConvexHull(false); f_fxml_tmp.getOptions().setLoadSubordinates(false); f_fxml_tmp.load(ins[reference_index], map_ref); algorithm->setReference(reference_index, map_ref); ref_id = map_ref.getUniqueId(); ref_size = map_ref.size(); ref_pepids = map_ref.getUnassignedPeptideIdentifications(); ref_protids = map_ref.getProteinIdentifications(); } ConsensusMap dummy; // go through all input files and add them to the result one by one for (Size i = 0; i < ins.size(); ++i) { FeatureXMLFile f_fxml_tmp; FeatureMap tmp_map; f_fxml_tmp.getOptions().setLoadConvexHull(false); f_fxml_tmp.getOptions().setLoadSubordinates(false); f_fxml_tmp.load(ins[i], tmp_map); // copy over information on the primary MS run StringList ms_runs; tmp_map.getPrimaryMSRunPath(ms_runs); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); if (i != reference_index) { algorithm->addToGroup(i, tmp_map); // store some meta-data about the maps in the "dummy" object -> try to // keep the same order as they were given in the input independent of // which map is the reference. dummy.getFileDescriptions()[i].filename = ins[i]; dummy.getFileDescriptions()[i].size = tmp_map.size(); dummy.getFileDescriptions()[i].unique_id = tmp_map.getUniqueId(); // add protein identifications to result map dummy.getProteinIdentifications().insert( dummy.getProteinIdentifications().end(), tmp_map.getProteinIdentifications().begin(), tmp_map.getProteinIdentifications().end()); // add unassigned peptide identifications to result map dummy.getUnassignedPeptideIdentifications().insert( dummy.getUnassignedPeptideIdentifications().end(), tmp_map.getUnassignedPeptideIdentifications().begin(), tmp_map.getUnassignedPeptideIdentifications().end()); } else { // copy the meta-data from the refernce map dummy.getFileDescriptions()[i].filename = ins[i]; dummy.getFileDescriptions()[i].size = ref_size; dummy.getFileDescriptions()[i].unique_id = ref_id; // add protein identifications to result map dummy.getProteinIdentifications().insert( dummy.getProteinIdentifications().end(), ref_protids.begin(), ref_protids.end()); // add unassigned peptide identifications to result map dummy.getUnassignedPeptideIdentifications().insert( dummy.getUnassignedPeptideIdentifications().end(), ref_pepids.begin(), ref_pepids.end()); } } // get the resulting map out_map = algorithm->getResultMap(); // // Copy back meta-data (Protein / Peptide ids / File descriptions) // // add protein identifications to result map out_map.getProteinIdentifications().insert( out_map.getProteinIdentifications().end(), dummy.getProteinIdentifications().begin(), dummy.getProteinIdentifications().end()); // add unassigned peptide identifications to result map out_map.getUnassignedPeptideIdentifications().insert( out_map.getUnassignedPeptideIdentifications().end(), dummy.getUnassignedPeptideIdentifications().begin(), dummy.getUnassignedPeptideIdentifications().end()); out_map.setFileDescriptions(dummy.getFileDescriptions()); // canonical ordering for checking the results, and the ids have no real meaning anyway // the way this was done in DelaunayPairFinder and StablePairFinder // -> the same ordering as FeatureGroupingAlgorithmUnlabeled::group applies! out_map.sortByMZ(); out_map.updateRanges(); } else { vector<ConsensusMap> maps(ins.size()); ConsensusXMLFile f; for (Size i = 0; i < ins.size(); ++i) { f.load(ins[i], maps[i]); StringList ms_runs; maps[i].getPrimaryMSRunPath(ms_runs); ms_run_locations.insert(ms_run_locations.end(), ms_runs.begin(), ms_runs.end()); } // group algorithm->FeatureGroupingAlgorithm::group(maps, out_map); // set file descriptions: bool keep_subelements = getFlag_("keep_subelements"); if (!keep_subelements) { for (Size i = 0; i < ins.size(); ++i) { out_map.getFileDescriptions()[i].filename = ins[i]; out_map.getFileDescriptions()[i].size = maps[i].size(); out_map.getFileDescriptions()[i].unique_id = maps[i].getUniqueId(); } } else { // components of the output map are not the input maps themselves, but // the components of the input maps: algorithm->transferSubelements(maps, out_map); } } // assign unique ids out_map.applyMemberFunction(&UniqueIdInterface::setUniqueId); // annotate output with data processing info addDataProcessing_(out_map, getProcessingInfo_(DataProcessing::FEATURE_GROUPING)); out_map.setPrimaryMSRunPath(ms_run_locations); // write output ConsensusXMLFile().store(out, out_map); // some statistics map<Size, UInt> num_consfeat_of_size; for (ConsensusMap::const_iterator cmit = out_map.begin(); cmit != out_map.end(); ++cmit) { ++num_consfeat_of_size[cmit->size()]; } LOG_INFO << "Number of consensus features:" << endl; for (map<Size, UInt>::reverse_iterator i = num_consfeat_of_size.rbegin(); i != num_consfeat_of_size.rend(); ++i) { LOG_INFO << " of size " << setw(2) << i->first << ": " << setw(6) << i->second << endl; } LOG_INFO << " total: " << setw(6) << out_map.size() << endl; delete algorithm; return EXECUTION_OK; }
ExitCodes main_(int, const char**) { // LOG_DEBUG << "Starting..." << endl; //---------------------------------------------------------------- // load ids //---------------------------------------------------------------- // LOG_DEBUG << "Loading idXML..." << endl; String id = getStringOption_("id"); vector<ProteinIdentification> protein_ids; vector<PeptideIdentification> peptide_ids; FileTypes::Type in_type = FileHandler::getType(id); if (in_type == FileTypes::IDXML) { IdXMLFile().load(id, protein_ids, peptide_ids); } else if (in_type == FileTypes::MZIDENTML) { MzIdentMLFile().load(id, protein_ids, peptide_ids); } else { throw Exception::IllegalArgument(__FILE__, __LINE__, __PRETTY_FUNCTION__, "wrong id fileformat"); } String in = getStringOption_("in"); String out = getStringOption_("out"); in_type = FileHandler::getType(in); //---------------------------------------------------------------- //create mapper //---------------------------------------------------------------- // LOG_DEBUG << "Creating mapper..." << endl; IDMapper mapper; Param p = mapper.getParameters(); p.setValue("rt_tolerance", getDoubleOption_("rt_tolerance")); p.setValue("mz_tolerance", getDoubleOption_("mz_tolerance")); p.setValue("mz_measure", getStringOption_("mz_measure")); p.setValue("mz_reference", getStringOption_("mz_reference")); p.setValue("ignore_charge", getFlag_("ignore_charge") ? "true" : "false"); mapper.setParameters(p); //---------------------------------------------------------------- // consensusXML //---------------------------------------------------------------- if (in_type == FileTypes::CONSENSUSXML) { // LOG_DEBUG << "Processing consensus map..." << endl; ConsensusXMLFile file; ConsensusMap map; file.load(in, map); bool measure_from_subelements = getFlag_("consensus:use_subelements"); bool annotate_ids_with_subelements = getFlag_("consensus:annotate_ids_with_subelements"); mapper.annotate(map, peptide_ids, protein_ids, measure_from_subelements, annotate_ids_with_subelements); //annotate output with data processing info addDataProcessing_(map, getProcessingInfo_(DataProcessing::IDENTIFICATION_MAPPING)); file.store(out, map); } //---------------------------------------------------------------- // featureXML //---------------------------------------------------------------- if (in_type == FileTypes::FEATUREXML) { // LOG_DEBUG << "Processing feature map..." << endl; FeatureMap map; FeatureXMLFile file; file.load(in, map); mapper.annotate(map, peptide_ids, protein_ids, getFlag_("feature:use_centroid_rt"), getFlag_("feature:use_centroid_mz")); //annotate output with data processing info addDataProcessing_(map, getProcessingInfo_(DataProcessing::IDENTIFICATION_MAPPING)); file.store(out, map); } //---------------------------------------------------------------- // MzQuantML //---------------------------------------------------------------- if (in_type == FileTypes::MZQUANTML) { // LOG_DEBUG << "Processing mzq ..." << endl; MSQuantifications msq; MzQuantMLFile file; file.load(in, msq); bool measure_from_subelements = getFlag_("consensus:use_subelements"); for (std::vector<ConsensusMap>::iterator it = msq.getConsensusMaps().begin(); it != msq.getConsensusMaps().end(); ++it) { mapper.annotate(*it, peptide_ids, protein_ids, measure_from_subelements); //annotate output with data processing info addDataProcessing_(*it, getProcessingInfo_(DataProcessing::IDENTIFICATION_MAPPING)); } //~ writeDebug_(msq.getConsensusMaps().size(),3); //~ writeDebug_(msq.getConsensusMaps().back().size(),3); //~ writeDebug_(msq.getAnalysisSummary().quant_type_,3); file.store(out, msq); } // LOG_DEBUG << "Done." << endl; return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parameter handling //------------------------------------------------------------- // file list StringList file_list = getStringList_("in"); // file type FileHandler file_handler; FileTypes::Type force_type; if (getStringOption_("in_type").size() > 0) { force_type = FileTypes::nameToType(getStringOption_("in_type")); } else { force_type = file_handler.getType(file_list[0]); } // output file names and types String out_file = getStringOption_("out"); bool annotate_file_origin = getFlag_("annotate_file_origin"); rt_gap_ = getDoubleOption_("rt_concat:gap"); vector<String> trafo_out = getStringList_("rt_concat:trafo_out"); if (trafo_out.empty()) { // resize now so we don't have to worry about indexing out of bounds: trafo_out.resize(file_list.size()); } else if (trafo_out.size() != file_list.size()) { writeLog_("Error: Number of transformation output files must equal the number of input files (parameters 'rt_concat:trafo_out'/'in')!"); return ILLEGAL_PARAMETERS; } //------------------------------------------------------------- // calculations //------------------------------------------------------------- if (force_type == FileTypes::FEATUREXML) { FeatureMap out; FeatureXMLFile fh; for (Size i = 0; i < file_list.size(); ++i) { FeatureMap map; fh.load(file_list[i], map); if (annotate_file_origin) { for (FeatureMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(map, trafo_out[i], i == 0); } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); fh.store(out_file, out); } else if (force_type == FileTypes::CONSENSUSXML) { ConsensusMap out; ConsensusXMLFile fh; fh.load(file_list[0], out); // skip first file for (Size i = 1; i < file_list.size(); ++i) { ConsensusMap map; fh.load(file_list[i], map); if (annotate_file_origin) { for (ConsensusMap::iterator it = map.begin(); it != map.end(); ++it) { it->setMetaValue("file_origin", DataValue(file_list[i])); } } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(map, trafo_out[i], i == 0); } out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); fh.store(out_file, out); } else if (force_type == FileTypes::TRAML) { TargetedExperiment out; TraMLFile fh; for (Size i = 0; i < file_list.size(); ++i) { TargetedExperiment map; fh.load(file_list[i], map); out += map; } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info Software software; software.setName("FileMerger"); software.setVersion(VersionInfo::getVersion()); out.addSoftware(software); fh.store(out_file, out); } else // raw data input (e.g. mzML) { // RT bool rt_auto_number = getFlag_("raw:rt_auto"); bool rt_filename = getFlag_("raw:rt_filename"); bool rt_custom = false; DoubleList custom_rts = getDoubleList_("raw:rt_custom"); if (!custom_rts.empty()) { rt_custom = true; if (custom_rts.size() != file_list.size()) { writeLog_("Custom retention time list (parameter 'raw:rt_custom') must have as many elements as there are input files (parameter 'in')!"); return ILLEGAL_PARAMETERS; } } // MS level Int ms_level = getIntOption_("raw:ms_level"); MSExperiment<> out; UInt rt_auto = 0; UInt native_id = 0; for (Size i = 0; i < file_list.size(); ++i) { String filename = file_list[i]; // load file force_type = file_handler.getType(file_list[i]); MSExperiment<> in; file_handler.loadExperiment(filename, in, force_type, log_type_); if (in.empty() && in.getChromatograms().empty()) { writeLog_(String("Warning: Empty file '") + filename + "'!"); continue; } out.reserve(out.size() + in.size()); // warn if custom RT and more than one scan in input file if (rt_custom && in.size() > 1) { writeLog_(String("Warning: More than one scan in file '") + filename + "'! All scans will have the same retention time!"); } // handle special raw data options: for (MSExperiment<>::iterator spec_it = in.begin(); spec_it != in.end(); ++spec_it) { float rt_final = spec_it->getRT(); if (rt_auto_number) { rt_final = ++rt_auto; } else if (rt_custom) { rt_final = custom_rts[i]; } else if (rt_filename) { static const boost::regex re("rt(\\d+(\\.\\d+)?)"); boost::smatch match; bool found = boost::regex_search(filename, match, re); if (found) { rt_final = String(match[1]).toFloat(); } else { writeLog_("Warning: could not extract retention time from filename '" + filename + "'"); } } // none of the rt methods were successful if (rt_final < 0) { writeLog_(String("Warning: No valid retention time for output scan '") + rt_auto + "' from file '" + filename + "'"); } spec_it->setRT(rt_final); spec_it->setNativeID("spectrum=" + String(native_id)); if (ms_level > 0) { spec_it->setMSLevel(ms_level); } ++native_id; } // if we have only one spectrum, we can annotate it directly, for more spectra, we just name the source file leaving the spectra unannotated (to avoid a long and redundant list of sourceFiles) if (in.size() == 1) { in[0].setSourceFile(in.getSourceFiles()[0]); in.getSourceFiles().clear(); // delete source file annotated from source file (it's in the spectrum anyways) } if (rt_gap_ > 0.0) // concatenate in RT { adjustRetentionTimes_(in, trafo_out[i], i == 0); } // add spectra to output for (MSExperiment<>::const_iterator spec_it = in.begin(); spec_it != in.end(); ++spec_it) { out.addSpectrum(*spec_it); } // also add the chromatograms for (vector<MSChromatogram<ChromatogramPeak> >::const_iterator chrom_it = in.getChromatograms().begin(); chrom_it != in.getChromatograms().end(); ++chrom_it) { out.addChromatogram(*chrom_it); } // copy experimental settings from first file if (i == 0) { out.ExperimentalSettings::operator=(in); } else // otherwise append { out.getSourceFiles().insert(out.getSourceFiles().end(), in.getSourceFiles().begin(), in.getSourceFiles().end()); // could be emtpty if spectrum was annotated above, but that's ok then } } //------------------------------------------------------------- // writing output //------------------------------------------------------------- // annotate output with data processing info addDataProcessing_(out, getProcessingInfo_(DataProcessing::FORMAT_CONVERSION)); MzMLFile f; f.setLogType(log_type_); f.store(out_file, out); } return EXECUTION_OK; }
ConsensusXMLFile * ptr = 0; ConsensusXMLFile* nullPointer = 0; START_SECTION((ConsensusXMLFile())) ptr = new ConsensusXMLFile(); TEST_NOT_EQUAL(ptr, nullPointer) END_SECTION START_SECTION((~ConsensusXMLFile())) delete ptr; END_SECTION TOLERANCE_ABSOLUTE(0.01) START_SECTION(const PeakFileOptions& getOptions() const) ConsensusXMLFile file; TEST_EQUAL(file.getOptions().hasMSLevels(), false) END_SECTION START_SECTION(PeakFileOptions& getOptions()) ConsensusXMLFile file; file.getOptions().addMSLevel(1); TEST_EQUAL(file.getOptions().hasMSLevels(), true); END_SECTION START_SECTION((void load(const String &filename, ConsensusMap & map))) ConsensusMap map; ConsensusXMLFile file; file.load(OPENMS_GET_TEST_DATA_PATH("ConsensusXMLFile_1.consensusXML"), map); //test DocumentIdentifier addition