bool TraMLFile::isSemanticallyValid(const String & filename, StringList & errors, StringList & warnings) { //load mapping CVMappings mapping; CVMappingFile().load(File::find("/MAPPING/TraML-mapping.xml"), mapping); //load cvs ControlledVocabulary cv; cv.loadFromOBO("MS", File::find("/CV/psi-ms.obo")); cv.loadFromOBO("UO", File::find("/CV/unit.obo")); //validate Internal::TraMLValidator v(mapping, cv); bool result = v.validate(filename, errors, warnings); return result; }
bool MzQuantMLFile::isSemanticallyValid(const String & filename, StringList & errors, StringList & warnings) { //load mapping CVMappings mapping; CVMappingFile().load(File::find("/MAPPING/mzQuantML-mapping_1.0.0-rc2-general.xml"), mapping); //load cvs ControlledVocabulary cv; cv.loadFromOBO("MS", File::find("/CV/psi-ms.obo")); cv.loadFromOBO("PATO", File::find("/CV/quality.obo")); cv.loadFromOBO("UO", File::find("/CV/unit.obo")); cv.loadFromOBO("BTO", File::find("/CV/brenda.obo")); cv.loadFromOBO("GO", File::find("/CV/goslim_goa.obo")); //validate TODO Internal::MzQuantMLValidator v(mapping, cv); bool result = v.validate(filename, errors, warnings); return result; }
ExitCodes main_(int, const char**) override { String in_file = getStringOption_("in"); String mapping_file = getStringOption_("mapping_file"); StringList cv_list = getStringList_("cv"); CVMappings mappings; CVMappingFile().load(mapping_file, mappings, false); // Allow definition of the controlled vocabulary files on the commandlines. // If none are defined, the hardcoded obo files are used ControlledVocabulary cv; if (!cv_list.empty()) { for (Size i = 0; i < cv_list.size(); i++) { // TODO do we need to provide the name of the namespace here? cv.loadFromOBO("", cv_list[i]); } } else { cv.loadFromOBO("PSI-MOD", File::find("/CHEMISTRY/PSI-MOD.obo")); cv.loadFromOBO("PATO", File::find("/CV/quality.obo")); cv.loadFromOBO("UO", File::find("/CV/unit.obo")); cv.loadFromOBO("brenda", File::find("/CV/brenda.obo")); cv.loadFromOBO("GO", File::find("/CV/goslim_goa.obo")); cv.loadFromOBO("UNIMOD", File::find("/CV/unimod.obo")); cv.loadFromOBO("PSI-MS", File::find("/CV/psi-ms.obo")); } // check cv params Internal::SemanticValidator semantic_validator(mappings, cv); semantic_validator.setCheckTermValueTypes(true); semantic_validator.setCheckUnits(true); StringList errors, warnings; bool valid = semantic_validator.validate(in_file, errors, warnings); for (Size i = 0; i < warnings.size(); ++i) { cout << "Warning: " << warnings[i] << endl; } for (Size i = 0; i < errors.size(); ++i) { cout << "Error: " << errors[i] << endl; } if (valid && warnings.empty() && errors.empty()) { cout << "Congratulations, the file is valid!" << endl; return EXECUTION_OK; } else { return PARSE_ERROR; } }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String in = getStringOption_("in"); String out = getStringOption_("out"); String target_qp = getStringOption_("qp_att_acc"); String target_acc = getStringOption_("cv_acc"); String target_run = getStringOption_("name"); String target_file = getStringOption_("run"); String plot_file = getStringOption_("plot"); String tab = getStringOption_("table"); //------------------------------------------------------------- // fetch vocabularies //------------------------------------------------------------ ControlledVocabulary cv; cv.loadFromOBO("PSI-MS", File::find("/CV/psi-ms.obo")); cv.loadFromOBO("QC", File::find("/CV/qc-cv.obo")); //------------------------------------------------------------- // reading input //------------------------------------------------------------ if (target_file != "") { target_run = QFileInfo(QString::fromStdString(target_file)).baseName(); } QcMLFile qcmlfile; if (in != "") { qcmlfile.load(in); } if (target_run == "") { //~ check if only one run in file std::vector<String> nas; qcmlfile.getRunNames(nas); if (nas.size() == 1) { target_run = nas.front(); } else { cerr << "Error: You have to give at least one of the following parameter (in ascending precedence): name, run. Aborting!" << endl; return ILLEGAL_PARAMETERS; } } QFile f(plot_file.c_str()); String plot_b64; if (f.open(QIODevice::ReadOnly)) { QByteArray ba = f.readAll(); f.close(); plot_b64 = String(QString(ba.toBase64())); } QcMLFile::Attachment at; at.cvAcc = target_acc; at.id = String(UniqueIdGenerator::getUniqueId()); at.cvRef = "QC"; //TODO assign right cv reference if (plot_b64 != "" || tab != "") { if (plot_b64 != "") { try { const ControlledVocabulary::CVTerm& term = cv.getTerm(target_acc); at.name = term.name; ///< Name //~ at.unitRef; //TODO MIME type //~ at.unitAcc; } catch (...) { cerr << "Error: You have to give the accession of a existing cv term. Aborting!" << endl; return ILLEGAL_PARAMETERS; } at.binary = plot_b64; } else if (tab != "") { try { const ControlledVocabulary::CVTerm& term = cv.getTerm(target_acc); at.name = term.name; ///< Name //~ at.unitRef; //TODO MIME type //~ at.unitAcc; } catch (...) { cerr << "Error: You have to give the accession of a existing cv term. Aborting!" << endl; return ILLEGAL_PARAMETERS; } CsvFile csv_file(tab); if (csv_file.size()>1) { StringList li; csv_file.getRow(0, li); for (Size i = 0; i < li.size(); ++i) { at.colTypes.push_back(li[i]); } for (UInt i = 1; i < csv_file.size(); ++i) { StringList li; std::vector<String> v; csv_file.getRow(i, li); //TODO throw error if li.size() != at.colTypes.size() for (Size i = 0; i < li.size(); ++i) { v.push_back(li[i]); } at.tableRows.push_back(v); } } } else { cerr << "Error: Nothing valid to attach. Aborting!" << endl; return ILLEGAL_PARAMETERS; } std::vector<String> ids; qcmlfile.existsRunQualityParameter(target_run, target_qp, ids); if (!ids.empty()) { at.qualityRef = ids.front(); qcmlfile.addRunAttachment(target_run, at); } else { qcmlfile.existsSetQualityParameter(target_run, target_qp, ids); if (!ids.empty()) { at.qualityRef = ids.front(); qcmlfile.addSetAttachment(target_run, at); } else { cerr << "Error: You have to give the accession of a existing cv term to attacht to. Aborting!" << endl; return ILLEGAL_PARAMETERS; } } } qcmlfile.store(out); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { vector<ProteinIdentification> prot_ids; vector<PeptideIdentification> pep_ids; ProteinHit temp_protein_hit; //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String inputfile_id = getStringOption_("id"); String inputfile_feature = getStringOption_("feature"); String inputfile_consensus = getStringOption_("consensus"); String inputfile_raw = getStringOption_("in"); String outputfile_name = getStringOption_("out"); //~ bool Ms1(getFlag_("MS1")); //~ bool Ms2(getFlag_("MS2")); bool remove_duplicate_features(getFlag_("remove_duplicate_features")); //------------------------------------------------------------- // fetch vocabularies //------------------------------------------------------------ ControlledVocabulary cv; cv.loadFromOBO("PSI-MS", File::find("/CV/psi-ms.obo")); cv.loadFromOBO("QC", File::find("/CV/qc-cv.obo")); QcMLFile qcmlfile; //------------------------------------------------------------- // MS aqiusition //------------------------------------------------------------ String base_name = QFileInfo(QString::fromStdString(inputfile_raw)).baseName(); cout << "Reading mzML file..." << endl; MzMLFile mz_data_file; MSExperiment<Peak1D> exp; MzMLFile().load(inputfile_raw, exp); //---prep input exp.sortSpectra(); UInt min_mz = std::numeric_limits<UInt>::max(); UInt max_mz = 0; std::map<Size, UInt> mslevelcounts; qcmlfile.registerRun(base_name,base_name); //TODO use UIDs //---base MS aquisition qp String msaq_ref = base_name + "_msaq"; QcMLFile::QualityParameter qp; qp.id = msaq_ref; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000004"; try { //~ const ControlledVocabulary::CVTerm& test = cv.getTermByName("MS aquisition result details"); //~ cout << test.name << test.id << endl; const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); //~ const ControlledVocabulary::CVTerm& term = cv.getTerm("0000004"); qp.name = term.name; ///< Name } catch (...) { qp.name = "mzML file"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); //---file origin qp qp = QcMLFile::QualityParameter(); qp.name = "mzML file"; ///< Name qp.id = base_name + "_run_name"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000577"; qp.value = base_name; qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.name = "instrument model"; ///< Name qp.id = base_name + "_instrument_name"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000031"; qp.value = exp.getInstrument().getName(); qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.name = "completion time"; ///< Name qp.id = base_name + "_date"; ///< Identifier qp.cvRef = "MS"; ///< cv reference qp.cvAcc = "MS:1000747"; qp.value = exp.getDateTime().getDate(); qcmlfile.addRunQualityParameter(base_name, qp); //---precursors at QcMLFile::Attachment at; at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000044"; at.qualityRef = msaq_ref; at.id = base_name + "_precursors"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "precursors"; ///< Name } at.colTypes.push_back("MS:1000894_[sec]"); //RT at.colTypes.push_back("MS:1000040"); //MZ for (Size i = 0; i < exp.size(); ++i) { mslevelcounts[exp[i].getMSLevel()]++; if (exp[i].getMSLevel() == 2) { if (exp[i].getPrecursors().front().getMZ() < min_mz) { min_mz = exp[i].getPrecursors().front().getMZ(); } if (exp[i].getPrecursors().front().getMZ() > max_mz) { max_mz = exp[i].getPrecursors().front().getMZ(); } std::vector<String> row; row.push_back(exp[i].getRT()); row.push_back(exp[i].getPrecursors().front().getMZ()); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); //---aquisition results qp qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000006"; ///< cv accession for "aquisition results" qp.id = base_name + "_ms1aquisition"; ///< Identifier qp.value = String(mslevelcounts[1]); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of ms1 spectra"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000007"; ///< cv accession for "aquisition results" qp.id = base_name + "_ms2aquisition"; ///< Identifier qp.value = String(mslevelcounts[2]); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of ms2 spectra"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000008"; ///< cv accession for "aquisition results" qp.id = base_name + "_Chromaquisition"; ///< Identifier qp.value = String(exp.getChromatograms().size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of chromatograms"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000009"; at.qualityRef = msaq_ref; at.id = base_name + "_mzrange"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS MZ aquisition ranges"; ///< Name } at.colTypes.push_back("QC:0000010"); //MZ at.colTypes.push_back("QC:0000011"); //MZ std::vector<String> rowmz; rowmz.push_back(String(min_mz)); rowmz.push_back(String(max_mz)); at.tableRows.push_back(rowmz); qcmlfile.addRunAttachment(base_name, at); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000012"; at.qualityRef = msaq_ref; at.id = base_name + "_rtrange"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS RT aquisition ranges"; ///< Name } at.colTypes.push_back("QC:0000013"); //MZ at.colTypes.push_back("QC:0000014"); //MZ std::vector<String> rowrt; rowrt.push_back(String(exp.begin()->getRT())); rowrt.push_back(String(exp.getSpectra().back().getRT())); at.tableRows.push_back(rowrt); qcmlfile.addRunAttachment(base_name, at); //---ion current stability ( & tic ) qp at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000022"; at.qualityRef = msaq_ref; at.id = base_name + "_tics"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS TICs"; ///< Name } at.colTypes.push_back("MS:1000894_[sec]"); at.colTypes.push_back("MS:1000285"); UInt max = 0; Size below_10k = 0; for (Size i = 0; i < exp.size(); ++i) { if (exp[i].getMSLevel() == 1) { UInt sum = 0; for (Size j = 0; j < exp[i].size(); ++j) { sum += exp[i][j].getIntensity(); } if (sum > max) { max = sum; } if (sum < 10000) { ++below_10k; } std::vector<String> row; row.push_back(exp[i].getRT()); row.push_back(sum); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); qp = QcMLFile::QualityParameter(); qp.id = base_name + "_ticslump"; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000023"; qp.value = String((100 / exp.size()) * below_10k); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "percentage of tic slumps"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); //------------------------------------------------------------- // MS id //------------------------------------------------------------ if (inputfile_id != "") { IdXMLFile().load(inputfile_id, prot_ids, pep_ids); cerr << "idXML read ended. Found " << pep_ids.size() << " peptide identifications." << endl; ProteinIdentification::SearchParameters params = prot_ids[0].getSearchParameters(); vector<String> var_mods = params.variable_modifications; //~ boost::regex re("(?<=[KR])(?=[^P])"); String msid_ref = base_name + "_msid"; QcMLFile::QualityParameter qp; qp.id = msid_ref; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000025"; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "MS identification result details"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000026"; at.qualityRef = msid_ref; at.id = base_name + "_idsetting"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "MS id settings"; ///< Name } at.colTypes.push_back("MS:1001013"); //MS:1001013 db name MS:1001016 version MS:1001020 taxonomy at.colTypes.push_back("MS:1001016"); at.colTypes.push_back("MS:1001020"); std::vector<String> row; row.push_back(String(prot_ids.front().getSearchParameters().db)); row.push_back(String(prot_ids.front().getSearchParameters().db_version)); row.push_back(String(prot_ids.front().getSearchParameters().taxonomy)); at.tableRows.push_back(row); qcmlfile.addRunAttachment(base_name, at); UInt spectrum_count = 0; Size peptide_hit_count = 0; UInt runs_count = 0; Size protein_hit_count = 0; set<String> peptides; set<String> proteins; Size missedcleavages = 0; for (Size i = 0; i < pep_ids.size(); ++i) { if (!pep_ids[i].empty()) { ++spectrum_count; peptide_hit_count += pep_ids[i].getHits().size(); const vector<PeptideHit>& temp_hits = pep_ids[i].getHits(); for (Size j = 0; j < temp_hits.size(); ++j) { peptides.insert(temp_hits[j].getSequence().toString()); } } } for (set<String>::iterator it = peptides.begin(); it != peptides.end(); ++it) { for (String::const_iterator st = it->begin(); st != it->end() - 1; ++st) { if (*st == 'K' || *st == 'R') { ++missedcleavages; } } } for (Size i = 0; i < prot_ids.size(); ++i) { ++runs_count; protein_hit_count += prot_ids[i].getHits().size(); const vector<ProteinHit>& temp_hits = prot_ids[i].getHits(); for (Size j = 0; j < temp_hits.size(); ++j) { proteins.insert(temp_hits[j].getAccession()); } } qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000037"; ///< cv accession qp.id = base_name + "_misscleave"; ///< Identifier qp.value = missedcleavages; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of missed cleavages"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000032"; ///< cv accession qp.id = base_name + "_totprot"; ///< Identifier qp.value = protein_hit_count; try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of identified proteins"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000033"; ///< cv accession qp.id = base_name + "_totuniqprot"; ///< Identifier qp.value = String(proteins.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of uniquely identified proteins"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000029"; ///< cv accession qp.id = base_name + "_psms"; ///< Identifier qp.value = String(spectrum_count); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of PSM"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000030"; ///< cv accession qp.id = base_name + "_totpeps"; ///< Identifier qp.value = String(peptide_hit_count); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of identified peptides"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000031"; ///< cv accession qp.id = base_name + "_totuniqpeps"; ///< Identifier qp.value = String(peptides.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "total number of uniquely identified peptides"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000038"; at.qualityRef = msid_ref; at.id = base_name + "_massacc"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "delta ppm tables"; } //~ delta ppm QC:0000039 RT MZ uniqueness ProteinID MS:1000885 target/decoy Score PeptideSequence MS:1000889 Annots string Similarity Charge UO:0000219 TheoreticalWeight UO:0000221 Oxidation_(M) at.colTypes.push_back("RT"); at.colTypes.push_back("MZ"); at.colTypes.push_back("Score"); at.colTypes.push_back("PeptideSequence"); at.colTypes.push_back("Charge"); at.colTypes.push_back("TheoreticalWeight"); at.colTypes.push_back("delta_ppm"); for (UInt w = 0; w < var_mods.size(); ++w) { at.colTypes.push_back(String(var_mods[w]).substitute(' ', '_')); } std::vector<double> deltas; //~ prot_ids[0].getSearchParameters(); for (vector<PeptideIdentification>::iterator it = pep_ids.begin(); it != pep_ids.end(); ++it) { if (it->getHits().size() > 0) { std::vector<String> row; row.push_back(it->getRT()); row.push_back(it->getMZ()); PeptideHit tmp = it->getHits().front(); //TODO depends on score & sort vector<UInt> pep_mods; for (UInt w = 0; w < var_mods.size(); ++w) { pep_mods.push_back(0); } for (AASequence::ConstIterator z = tmp.getSequence().begin(); z != tmp.getSequence().end(); ++z) { Residue res = *z; String temp; if (res.getModification().size() > 0 && res.getModification() != "Carbamidomethyl") { temp = res.getModification() + " (" + res.getOneLetterCode() + ")"; //cout<<res.getModification()<<endl; for (UInt w = 0; w < var_mods.size(); ++w) { if (temp == var_mods[w]) { //cout<<temp; pep_mods[w] += 1; } } } } row.push_back(tmp.getScore()); row.push_back(tmp.getSequence().toString().removeWhitespaces()); row.push_back(tmp.getCharge()); row.push_back(String((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge())); double dppm = /* std::abs */ (getMassDifference(((tmp.getSequence().getMonoWeight() + tmp.getCharge() * Constants::PROTON_MASS_U) / tmp.getCharge()), it->getMZ(), true)); row.push_back(String(dppm)); deltas.push_back(dppm); for (UInt w = 0; w < var_mods.size(); ++w) { row.push_back(pep_mods[w]); } at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000040"; ///< cv accession qp.id = base_name + "_mean_delta"; ///< Identifier qp.value = String(OpenMS::Math::mean(deltas.begin(), deltas.end())); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "mean delta ppm"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000041"; ///< cv accession qp.id = base_name + "_median_delta"; ///< Identifier qp.value = String(OpenMS::Math::median(deltas.begin(), deltas.end(), false)); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "median delta ppm"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000035"; ///< cv accession qp.id = base_name + "_ratio_id"; ///< Identifier qp.value = String(double(pep_ids.size()) / double(mslevelcounts[2])); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "id ratio"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); } //------------------------------------------------------------- // MS quantitation //------------------------------------------------------------ FeatureMap map; String msqu_ref = base_name + "_msqu"; if (inputfile_feature != "") { FeatureXMLFile f; f.load(inputfile_feature, map); cout << "Read featureXML file..." << endl; //~ UInt fiter = 0; map.sortByRT(); map.updateRanges(); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000045"; ///< cv accession qp.id = msqu_ref; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "MS quantification result details"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); qp = QcMLFile::QualityParameter(); qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:0000046"; ///< cv accession qp.id = base_name + "_feature_count"; ///< Identifier qp.value = String(map.size()); try { const ControlledVocabulary::CVTerm& term = cv.getTerm(qp.cvAcc); qp.name = term.name; ///< Name } catch (...) { qp.name = "number of features"; ///< Name } qcmlfile.addRunQualityParameter(base_name, qp); } if (inputfile_feature != "" && !remove_duplicate_features) { QcMLFile::Attachment at; at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000047"; at.qualityRef = msqu_ref; at.id = base_name + "_features"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "features"; ///< Name } at.colTypes.push_back("MZ"); at.colTypes.push_back("RT"); at.colTypes.push_back("Intensity"); at.colTypes.push_back("Charge"); at.colTypes.push_back("Quality"); at.colTypes.push_back("FWHM"); at.colTypes.push_back("IDs"); UInt fiter = 0; map.sortByRT(); //ofstream out(outputfile_name.c_str()); while (fiter < map.size()) { std::vector<String> row; row.push_back(map[fiter].getMZ()); row.push_back(map[fiter].getRT()); row.push_back(map[fiter].getIntensity()); row.push_back(map[fiter].getCharge()); row.push_back(map[fiter].getOverallQuality()); row.push_back(map[fiter].getWidth()); row.push_back(map[fiter].getPeptideIdentifications().size()); fiter++; at.tableRows.push_back(row); } qcmlfile.addRunAttachment(base_name, at); } else if (inputfile_feature != "" && remove_duplicate_features) { QcMLFile::Attachment at; at = QcMLFile::Attachment(); at.cvRef = "QC"; ///< cv reference at.cvAcc = "QC:0000047"; at.qualityRef = msqu_ref; at.id = base_name + "_features"; ///< Identifier try { const ControlledVocabulary::CVTerm& term = cv.getTerm(at.cvAcc); at.name = term.name; ///< Name } catch (...) { at.name = "features"; ///< Name } at.colTypes.push_back("MZ"); at.colTypes.push_back("RT"); at.colTypes.push_back("Intensity"); at.colTypes.push_back("Charge"); FeatureMap map, map_out; FeatureXMLFile f; f.load(inputfile_feature, map); UInt fiter = 0; map.sortByRT(); while (fiter < map.size()) { FeatureMap map_tmp; for (UInt k = fiter; k <= map.size(); ++k) { if (abs(map[fiter].getRT() - map[k].getRT()) < 0.1) { //~ cout << fiter << endl; map_tmp.push_back(map[k]); } else { fiter = k; break; } } map_tmp.sortByMZ(); UInt retif = 1; map_out.push_back(map_tmp[0]); while (retif < map_tmp.size()) { if (abs(map_tmp[retif].getMZ() - map_tmp[retif - 1].getMZ()) > 0.01) { cout << "equal RT, but mass different" << endl; map_out.push_back(map_tmp[retif]); } retif++; } } qcmlfile.addRunAttachment(base_name, at); } if (inputfile_consensus != "") { cout << "Reading consensusXML file..." << endl; ConsensusXMLFile f; ConsensusMap map; f.load(inputfile_consensus, map); //~ String CONSENSUS_NAME = "_consensus.tsv"; //~ String combined_out = outputfile_name + CONSENSUS_NAME; //~ ofstream out(combined_out.c_str()); at = QcMLFile::Attachment(); qp.name = "consensuspoints"; ///< Name //~ qp.id = base_name + "_consensuses"; ///< Identifier qp.cvRef = "QC"; ///< cv reference qp.cvAcc = "QC:xxxxxxxx"; ///< cv accession "featuremapper results" at.colTypes.push_back("Native_spectrum_ID"); at.colTypes.push_back("DECON_RT_(sec)"); at.colTypes.push_back("DECON_MZ_(Th)"); at.colTypes.push_back("DECON_Intensity"); at.colTypes.push_back("Feature_RT_(sec)"); at.colTypes.push_back("Feature_MZ_(Th)"); at.colTypes.push_back("Feature_Intensity"); at.colTypes.push_back("Feature_Charge"); for (ConsensusMap::const_iterator cmit = map.begin(); cmit != map.end(); ++cmit) { const ConsensusFeature& CF = *cmit; for (ConsensusFeature::const_iterator cfit = CF.begin(); cfit != CF.end(); ++cfit) { std::vector<String> row; FeatureHandle FH = *cfit; row.push_back(CF.getMetaValue("spectrum_native_id")); row.push_back(CF.getRT()); row.push_back(CF.getMZ()); row.push_back(CF.getIntensity()); row.push_back(FH.getRT()); row.push_back(FH.getMZ()); row.push_back(FH.getCharge()); at.tableRows.push_back(row); } } qcmlfile.addRunAttachment(base_name, at); } //------------------------------------------------------------- // finalize //------------------------------------------------------------ qcmlfile.store(outputfile_name); return EXECUTION_OK; }
ExitCodes main_(int, const char**) { StringList cv_files = getStringList_("cv_files"); StringList cv_names = getStringList_("cv_names"); if (cv_files.size() != cv_names.size()) { cerr << "Error: You have to specify an identifier for each CV file. Aborting!" << endl; return ILLEGAL_PARAMETERS; } // load cv terms ControlledVocabulary cv; for (Size i = 0; i < cv_files.size(); ++i) { cv.loadFromOBO(cv_names[i], cv_files[i]); } Map<String, ControlledVocabulary::CVTerm> terms = cv.getTerms(); // load mappings from mapping file String mapping_file = getStringOption_("mapping_file"); CVMappings mappings; CVMappingFile().load(mapping_file, mappings); //store HTML version of mapping and CV if (getStringOption_("html") != "") { TextFile file; file.addLine("<HTML>"); file.addLine(" <HEAD>"); file.addLine(" <TITLE>CV mapping file</TITLE>"); file.addLine(" <SCRIPT language=javascript type='text/javascript'>"); file.addLine(" function toggleDiv(layer_ref,force_state) "); file.addLine(" {"); file.addLine(" if (document.getElementById(layer_ref).style.display=='none' || force_state=='true')"); file.addLine(" {"); file.addLine(" document.getElementById(layer_ref).style.display = 'block';"); file.addLine(" }"); file.addLine(" else if (document.getElementById(layer_ref).style.display=='block' || force_state=='false')"); file.addLine(" {"); file.addLine(" document.getElementById(layer_ref).style.display = 'none';"); file.addLine(" }"); file.addLine(" }"); file.addLine(" </SCRIPT>"); file.addLine(" </HEAD>"); file.addLine(" <BODY>"); //count the number of terms and add button to expend/collaps all terms Int term_count = 0; for (vector<CVMappingRule>::const_iterator it = mappings.getMappingRules().begin(); it != mappings.getMappingRules().end(); ++it) { for (vector<CVMappingTerm>::const_iterator tit = it->getCVTerms().begin(); tit != it->getCVTerms().end(); ++tit) { ++term_count; } } String expand_all = " <a href=\"javascript:toggleDiv('div0','true')"; String collapse_all = " <a href=\"javascript:toggleDiv('div0','false')"; for (Int i = 1; i < term_count; ++i) { expand_all += String(";toggleDiv('div") + i + "','true')"; collapse_all += String(";toggleDiv('div") + i + "','false')"; } file.addLine(expand_all + "\">Expand all</a><BR>"); file.addLine(collapse_all + "\">Collapse all</a>"); file.addLine(" <TABLE width=100% border=0>"); term_count = -1; for (vector<CVMappingRule>::const_iterator it = mappings.getMappingRules().begin(); it != mappings.getMappingRules().end(); ++it) { //create rule line file.addLine(" <TR><TD colspan=\"2\"><HR></TD></TR>"); file.addLine(String(" <TR><TD>Identifier:</TD><TD><B>") + it->getIdentifier() + "</B></TD></TR>"); file.addLine(String(" <TR><TD>Element:</TD><TD><B>") + it->getElementPath() + "</B></TD></TR>"); if (it->getRequirementLevel() == CVMappingRule::MUST) { file.addLine(" <TR><TD>Requirement level:</TD><TD><FONT color=\"red\">MUST</FONT></TD></TR>"); } else if (it->getRequirementLevel() == CVMappingRule::SHOULD) { file.addLine(" <TR><TD>Requirement level:</TD><TD><FONT color=\"orange\">SHOULD</FONT></TD></TR>"); } else if (it->getRequirementLevel() == CVMappingRule::MAY) { file.addLine(" <TR><TD>Requirement level:</TD><TD><FONT color=\"green\">MAY</FONT></TD></TR>"); } if (it->getCombinationsLogic() == CVMappingRule::AND) { file.addLine(" <TR><TD>Combination logic:</TD><TD><FONT color=\"red\">AND</FONT></TD></TR>"); } else if (it->getCombinationsLogic() == CVMappingRule::XOR) { file.addLine(" <TR><TD>Combination logic:</TD><TD><FONT color=\"orange\">XOR</FONT></TD></TR>"); } else if (it->getCombinationsLogic() == CVMappingRule::OR) { file.addLine(" <TR><TD>Combination logic:</TD><TD><FONT color=\"green\">OR</FONT></TD></TR>"); } //create table with terms for (vector<CVMappingTerm>::const_iterator tit = it->getCVTerms().begin(); tit != it->getCVTerms().end(); ++tit) { //create term line String term_line = String(" <TR><TD valign=\"top\">Term:</TD><TD>"); if (tit->getAllowChildren()) { ++term_count; term_line += String("<a href=\"javascript:toggleDiv('div") + term_count + "','')\" style=\"text-decoration:none\" >+</a> "; } else { term_line += String(" "); } //add Term accession, name and description (as popup) if (cv.exists(tit->getAccession())) { const ControlledVocabulary::CVTerm& child_term = cv.getTerm(tit->getAccession()); String description = child_term.description; if (child_term.synonyms.size() != 0) { description += String(" -- Synonyms: '") + ListUtils::concatenate(child_term.synonyms, ", ") + "'"; } term_line += "<span title=\"" + description + "\">"; } term_line += tit->getAccession() + " ! " + tit->getTermName(); if (cv.exists(tit->getAccession())) { term_line += "</span>"; //check if term accession and term name correspond to the CV const ControlledVocabulary::CVTerm& main_term = cv.getTerm(tit->getAccession()); if (main_term.name != tit->getTermName()) { cerr << "Warning: Accession '" << tit->getAccession() << "' and name '" << tit->getTermName() << "' do not match. Name should be '" << main_term.name << "'." << endl; } } //tags StringList tags; if (!tit->getUseTerm()) { tags.push_back("children only"); } if (tit->getIsRepeatable()) { tags.push_back("repeatable"); } if (cv.exists(tit->getAccession())) { const ControlledVocabulary::CVTerm& term = cv.getTerm(tit->getAccession()); if (term.obsolete) { tags.push_back("<font color=darkred>obsolete</font>"); } if (term.xref_type != ControlledVocabulary::CVTerm::NONE) { tags.push_back("value-type=" + ControlledVocabulary::CVTerm::getXRefTypeName(term.xref_type)); } if (term.units.size() > 0) { StringList units; for (set<String>::const_iterator u_it = term.units.begin(); u_it != term.units.end(); ++u_it) { units.push_back(*u_it + "!" + cv.getTerm(*u_it).name); } tags.push_back(String("units=") + ListUtils::concatenate(units, ",")); } if (term.xref_binary.size() > 0) { StringList types; for (StringList::const_iterator u_it = term.xref_binary.begin(); u_it != term.xref_binary.end(); ++u_it) { types.push_back(*u_it + "!" + cv.getTerm(*u_it).name); } tags.push_back(String("binary-array-types=") + ListUtils::concatenate(types, ",")); } } if (tags.size() != 0) { term_line += String("<FONT color=\"grey\"> (") + ListUtils::concatenate(tags, ", ") + ")</FONT>"; } file.addLine(term_line); // check whether we need the whole tree, or just the term itself if (tit->getAllowChildren()) { file.addLine(String(" <div id=\"div") + term_count + "\" style=\"display: none\">"); if (cv.exists(tit->getAccession())) { writeTermTree_(tit->getAccession(), cv, file, 1); //BEGIN - THIS IS NEEDED FOR WRITING PARSERS ONLY /* set<String> allowed_terms; cv.getAllChildTerms(allowed_terms, tit->getAccession()); for (set<String>::const_iterator atit=allowed_terms.begin(); atit!=allowed_terms.end(); ++atit) { const ControlledVocabulary::CVTerm& child_term = cv.getTerm(*atit); String parser_string = String("os << \"<cvParam cvRef=\\\"MS\\\" accession=\\\"") + child_term.id + "\\\" name=\\\"" + child_term.name + "\\\""; for (Size i=0; i<child_term.unparsed.size(); ++i) { //TODO this does not work anymore. The type is now stored as a member if (child_term.unparsed[i].hasSubstring("value-type:xsd\\:int") || child_term.unparsed[i].hasSubstring("value-type:xsd\\:float") || child_term.unparsed[i].hasSubstring("value-type:xsd\\:string")) { parser_string += " value=\\\"\" << << \"\\\""; } } parser_string += "/>\\n\";<BR>"; file.push_back(parser_string); }*/ } else { file.addLine(" - Missing terms, CV not loaded..."); cerr << "Warning: no child terms for " << tit->getAccession() << " found!" << endl; } file.addLine(" </div>"); file.addLine(" </TD></TD></TR>"); } } } file.addLine(" </TABLE>"); file.addLine(" </BODY>"); file.addLine("</HTML>"); file.store(getStringOption_("html")); return EXECUTION_OK; } // iterator over all mapping rules and store the mentioned terms StringList ignore_namespaces = getStringList_("ignore_cv"); set<String> ignore_cv_list; for (StringList::const_iterator it = ignore_namespaces.begin(); it != ignore_namespaces.end(); ++it) { ignore_cv_list.insert(*it); } set<String> used_terms; for (vector<CVMappingRule>::const_iterator it = mappings.getMappingRules().begin(); it != mappings.getMappingRules().end(); ++it) { set<String> allowed_terms; // iterate over all allowed terms for (vector<CVMappingTerm>::const_iterator tit = it->getCVTerms().begin(); tit != it->getCVTerms().end(); ++tit) { // check whether the term itself it allowed, or only its children if (tit->getUseTerm()) { allowed_terms.insert(tit->getAccession()); } // check whether we need the whole tree, or just the term itself if (tit->getAllowChildren()) { // check whether we want to ignore this term if (!(tit->getAccession().has(':') && ignore_cv_list.find(tit->getAccession().prefix(':')) != ignore_cv_list.end())) { cv.getAllChildTerms(allowed_terms, tit->getAccession()); } // also add the term itself to the used_terms, because all the children are allowed used_terms.insert(tit->getAccession()); } } // print the allowed terms for the rule cout << "MappingRule: id=" << it->getIdentifier() << ", elementPath=" << it->getElementPath() << ", #terms=" << it->getCVTerms().size() << endl; for (set<String>::const_iterator ait = allowed_terms.begin(); ait != allowed_terms.end(); ++ait) { cout << *ait << " " << terms[*ait].name << endl; } used_terms.insert(allowed_terms.begin(), allowed_terms.end()); } // find unused terms, which CANNOT be used in the XML due to the mapping file set<String> unused_terms; for (Map<String, ControlledVocabulary::CVTerm>::ConstIterator it = terms.begin(); it != terms.end(); ++it) { if (used_terms.find(it->first) == used_terms.end()) { unused_terms.insert(it->first); } } cout << "\n\nCVTerms which are unused in the mapping file and therefore MUST NOT be used in an instance document" << endl; for (set<String>::const_iterator it = unused_terms.begin(); it != unused_terms.end(); ++it) { cout << *it << " " << terms[*it].name; // print also parent names for (set<String>::const_iterator pit = terms[*it].parents.begin(); pit != terms[*it].parents.end(); ++pit) { cout << " " << terms[*pit].id << " " << terms[*pit].name; } cout << endl; } return EXECUTION_OK; }
ExitCodes main_(int, const char**) { //------------------------------------------------------------- // parsing parameters //------------------------------------------------------------- String in = getStringOption_("in"); String out = getStringOption_("out"); String mappi = getStringOption_("mapping"); String tab = getStringOption_("table"); ControlledVocabulary cv; cv.loadFromOBO("PSI-MS", File::find("/CV/psi-ms.obo")); cv.loadFromOBO("QC", File::find("/CV/qc-cv.obo")); //------------------------------------------------------------- // reading input //------------------------------------------------------------ QcMLFile qcmlfile; if (in != "") { qcmlfile.load(in); } if (mappi != "" && tab != "") { CsvFile csv_file(tab); CsvFile map_file(mappi); if (map_file.size()<2) //assumed that first row is the header of table and second row is the according qc { cerr << "Error: You have to give a mapping of your table (first row is the header of table and second row is the according qc). Aborting!" << endl; return ILLEGAL_PARAMETERS; } StringList header,according; map_file.getRow(0, header); map_file.getRow(1, according); if (header.size() != according.size()) { cerr << "Error: You have to give a mapping of your table (first row is the header of table and second row is the according qc). Aborting!" << endl; return ILLEGAL_PARAMETERS; } //~ std::map<String,String> mapping; //~ std::transform( header.begin(), header.end(), according.begin(), std::inserter(mapping, mapping.end() ), std::make_pair<String,String> ); int runset_col = -1; for (Size i = 0; i < according.size(); ++i) { if (!cv.exists(according[i])) { try { const ControlledVocabulary::CVTerm& term = cv.getTermByName(according[i]); header[i] = term.name; according[i] = term.id; } catch (...) { cerr << "Error: You have to specify a correct cv with accession or name in col "<< String(i) <<". Aborting!" << endl; //~ cerr << "Header was: "<< header[i] << " , according value was: " << according[i] << endl; return ILLEGAL_PARAMETERS; } } else { const ControlledVocabulary::CVTerm& term = cv.getTerm(according[i]); header[i] = term.name; } if (header[i] == "raw data file") //TODO add set name as possibility! { runset_col = i; } } if (runset_col < 0) { cerr << "Error: You have to give a mapping of your table - rows to runs/sets. Aborting!" << endl; return ILLEGAL_PARAMETERS; } if (csv_file.size()>1) { StringList li; for (Size i = 1; i < csv_file.size(); ++i) { StringList li; csv_file.getRow(i, li); if (li.size() < according.size()) { cerr << "Error: You have to give a correct mapping of your table - row " << String(i+1) <<" is too short. Aborting!" << endl; return ILLEGAL_PARAMETERS; } std::vector< QcMLFile::QualityParameter > qps; String id; bool set = false; for (Size j = 0; j < li.size(); ++j) { if (j==runset_col) { if (qcmlfile.existsRun(li[j])) //TODO this only works for real run IDs { id = li[j]; } else if (qcmlfile.existsSet(li[j])) //TODO this only works for real set IDs { id = li[j]; set = true; } else { id = li[j]; qcmlfile.registerRun(id,id); //TODO warn that if this was supposed to be a set - now it is not! } } QcMLFile::QualityParameter def; def.name = header[j]; ///< Name def.id = String(UniqueIdGenerator::getUniqueId()); def.cvRef = "QC"; ///< cv reference ('full name') def.cvAcc = according[j]; def.value = li[j]; qps.push_back(def); } if (id!="") { for (std::vector<QcMLFile::QualityParameter>::const_iterator qit = qps.begin(); qit != qps.end(); ++qit) { if (!set) { qcmlfile.addRunQualityParameter(id, *qit); } else { qcmlfile.addSetQualityParameter(id, *qit); } } } } } } qcmlfile.store(out); return EXECUTION_OK; }