//This method needed to be reimplemented to // - check CV term values // - handle referenceableParamGroups // - check if binaryDataArray name and type match void MzMLValidator::startElement(const XMLCh * const /*uri*/, const XMLCh * const /*local_name*/, const XMLCh * const qname, const Attributes & attributes) { String tag = sm_.convert(qname); String parent_tag; if (open_tags_.size() > 0) parent_tag = open_tags_.back(); String path = getPath_() + "/" + cv_tag_ + "/@" + accession_att_; open_tags_.push_back(tag); if (tag == "referenceableParamGroup") { current_id_ = attributeAsString_(attributes, "id"); } else if (tag == "referenceableParamGroupRef") { const std::vector<CVTerm> & terms = param_groups_[attributeAsString_(attributes, "ref")]; for (Size i = 0; i < terms.size(); ++i) { handleTerm_(path, terms[i]); } } else if (tag == "binaryDataArray") { binary_data_array_ = ""; binary_data_type_ = ""; } else if (tag == cv_tag_) { //extract accession, name and value CVTerm parsed_term; getCVTerm_(attributes, parsed_term); //check if the term is unknown if (!cv_.exists(parsed_term.accession)) { warnings_.push_back(String("Unknown CV term: '") + parsed_term.accession + " - " + parsed_term.name + "' at element '" + getPath_(1) + "'"); return; } //check if the term is obsolete if (cv_.getTerm(parsed_term.accession).obsolete) { warnings_.push_back(String("Obsolete CV term: '") + parsed_term.accession + " - " + parsed_term.name + "' at element '" + getPath_(1) + "'"); } //actual handling of the term if (parent_tag == "referenceableParamGroup") { param_groups_[current_id_].push_back(parsed_term); } else { handleTerm_(path, parsed_term); } } }
void CVMappingFile::startElement(const XMLCh * const /*uri*/, const XMLCh * const /*local_name*/, const XMLCh * const qname, const Attributes & attributes) { tag_ = String(sm_.convert(qname)); if (tag_ == "CvReference") { // CvReference cvName="PSI-PI" cvIdentifier="PSI-PI"/> CVReference ref; ref.setName(attributeAsString_(attributes, "cvName")); ref.setIdentifier(attributeAsString_(attributes, "cvIdentifier")); cv_references_.push_back(ref); return; } if (tag_ == "CvMappingRule") { // id="R1" cvElementPath="/psi-pi:MzIdentML/psi-pi:AnalysisSoftwareList/psi-pi:AnalysisSoftware/pf:ContactRole/pf:role/pf:cvParam" requirementLevel="MUST" scopePath="" cvTermsCombinationLogic="OR actual_rule_.setIdentifier(attributeAsString_(attributes, "id")); String element_path = attributeAsString_(attributes, "cvElementPath"); if (strip_namespaces_) { vector<String> slash_split; element_path.split('/', slash_split); if (slash_split.empty()) { slash_split.push_back(element_path); } element_path = ""; for (vector<String>::const_iterator it = slash_split.begin(); it != slash_split.end(); ++it) { if (it->empty()) { continue; } vector<String> split; it->split(':', split); if (split.empty()) { element_path += "/" + *it; } else { if (split.size() == 2) { element_path += "/" + split[1]; } else { fatalError(LOAD, String("Cannot parse namespaces of path: '") + element_path + "'"); } } } } actual_rule_.setElementPath(element_path); CVMappingRule::RequirementLevel level = CVMappingRule::MUST; String lvl = attributeAsString_(attributes, "requirementLevel"); if (lvl == "MAY") { level = CVMappingRule::MAY; } else { if (lvl == "SHOULD") { level = CVMappingRule::SHOULD; } else { if (lvl == "MUST") { level = CVMappingRule::MUST; } else { // throw Exception } } } actual_rule_.setRequirementLevel(level); actual_rule_.setScopePath(attributeAsString_(attributes, "scopePath")); CVMappingRule::CombinationsLogic logic = CVMappingRule::OR; String lgc = attributeAsString_(attributes, "cvTermsCombinationLogic"); if (lgc == "OR") { logic = CVMappingRule::OR; } else { if (lgc == "AND") { logic = CVMappingRule::AND; } else { if (lgc == "XOR") { logic = CVMappingRule::XOR; } else { // throw Exception; } } } actual_rule_.setCombinationsLogic(logic); return; } if (tag_ == "CvTerm") { // termAccession="PI:00266" useTermName="false" useTerm="false" termName="role type" isRepeatable="true" allowChildren="true" cvIdentifierRef="PSI-PI" CVMappingTerm term; term.setAccession(attributeAsString_(attributes, "termAccession")); term.setUseTerm(DataValue(attributeAsString_(attributes, "useTerm")).toBool()); String use_term_name; optionalAttributeAsString_(use_term_name, attributes, "useTermName"); if (use_term_name != "") { term.setUseTermName(DataValue(use_term_name).toBool()); } else { term.setUseTermName(false); } term.setTermName(attributeAsString_(attributes, "termName")); String is_repeatable; optionalAttributeAsString_(is_repeatable, attributes, "isRepeatable"); if (is_repeatable != "") { term.setIsRepeatable(DataValue(is_repeatable).toBool()); } else { term.setIsRepeatable(true); } term.setAllowChildren(DataValue(attributeAsString_(attributes, "allowChildren")).toBool()); term.setCVIdentifierRef(attributeAsString_(attributes, "cvIdentifierRef")); actual_rule_.addCVTerm(term); return; } return; }
void ToolDescriptionHandler::startElement(const XMLCh * const uri, const XMLCh * const local_name, const XMLCh * const qname, const xercesc::Attributes & attributes) { if (in_ini_section_) { ParamXMLHandler::startElement(uri, local_name, qname, attributes); return; } tag_ = sm_.convert(qname); open_tags_.push_back(tag_); //std::cout << "starting tag " << tag_ << "\n"; if (tag_ == "tool") { String status = attributeAsString_(attributes, "status"); if (status == "external") td_.is_internal = false; else if (status == "internal") td_.is_internal = true; else error(LOAD, "ToolDescriptionHandler::startElement: Element 'status' if tag 'tool' has unknown value " + status + "'."); return; } if (tag_ == "mapping") { Int id = attributeAsInt_(attributes, "id"); String command = attributeAsString_(attributes, "cl"); tde_.tr_table.mapping[id] = command; return; } if (tag_ == "file_post") { Internal::FileMapping fm; fm.location = attributeAsString_(attributes, "location"); fm.target = attributeAsString_(attributes, "target"); tde_.tr_table.post_moves.push_back(fm); return; } if (tag_ == "file_pre") { Internal::FileMapping fm; fm.location = attributeAsString_(attributes, "location"); fm.target = attributeAsString_(attributes, "target"); tde_.tr_table.pre_moves.push_back(fm); return; } if (tag_ == "ini_param") { in_ini_section_ = true; p_ = Param(); // reset Param return; } if (tag_ == "ttd" || tag_ == "category" || tag_ == "e_category" || tag_ == "type") return; if (td_.is_internal) { if (tag_ == "name") { return; } } else if (!td_.is_internal) { if (tag_ == "external" || tag_ == "cloptions" || tag_ == "path" || tag_ == "mappings" || tag_ == "mapping" || tag_ == "ini_param" || tag_ == "text" || tag_ == "onstartup" || tag_ == "onfail" || tag_ == "onfinish" || tag_ == "workingdirectory") return; } error(LOAD, "ToolDescriptionHandler::startElement(): Unkown element found: '" + tag_ + "', ignoring."); }
void ProtXMLFile::startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) { String tag = sm_.convert(qname); if (tag == "protein_summary_header") { String db = attributeAsString_(attributes, "reference_database"); String enzyme = attributeAsString_(attributes, "sample_enzyme"); ProteinIdentification::SearchParameters sp = prot_id_->getSearchParameters(); sp.db = db; // find a matching enzyme name sp.digestion_enzyme = *(ProteaseDB::getInstance()->getEnzyme(enzyme)); prot_id_->setSearchParameters(sp); prot_id_->setScoreType("ProteinProphet probability"); prot_id_->setHigherScoreBetter(true); pep_id_->setScoreType("ProteinProphet probability"); pep_id_->setHigherScoreBetter(true); } // identifier for Protein & PeptideIdentification // <program_details analysis="proteinprophet" time="2009-11-29T18:30:03" ... if (tag == "program_details") { String analysis = attributeAsString_(attributes, "analysis"); String time = attributeAsString_(attributes, "time"); String version = attributeAsString_(attributes, "version"); QDateTime date = QDateTime::fromString(time.toQString()); if (!date.isValid()) date = QDateTime::fromString(time.toQString(), Qt::ISODate); if (!date.isValid()) LOG_WARN << "Warning: Cannot parse 'time'='" << time << "'.\n"; prot_id_->setDateTime(date); prot_id_->setSearchEngine(analysis); prot_id_->setSearchEngineVersion(version); String id = String(UniqueIdGenerator::getUniqueId()); // was: analysis + "_" + time; prot_id_->setIdentifier(id); pep_id_->setIdentifier(id); } if (tag == "protein_group") { // we group all <protein>'s and <indistinguishable_protein>'s in our // internal group structure protein_group_ = ProteinGroup(); protein_group_.probability = attributeAsDouble_(attributes, "probability"); } else if (tag == "protein") { // usually there will be just one <protein> per <protein_group>, but more // are possible; each <protein> is distinguishable from the other, we // nevertheless group them String protein_name = attributeAsString_(attributes, "protein_name"); // open new "indistinguishable" group: prot_id_->insertIndistinguishableProteins(ProteinGroup()); registerProtein_(protein_name); // create new protein // fill protein with life double pc_coverage; if (optionalAttributeAsDouble_(pc_coverage, attributes, "percent_coverage")) { prot_id_->getHits().back().setCoverage(pc_coverage); } else { LOG_WARN << "Required attribute 'percent_coverage' missing\n"; } prot_id_->getHits().back().setScore(attributeAsDouble_(attributes, "probability")); } else if (tag == "indistinguishable_protein") { String protein_name = attributeAsString_(attributes, "protein_name"); // current last protein is from the same "indistinguishable" group: double score = prot_id_->getHits().back().getScore(); registerProtein_(protein_name); // score of group leader might technically not be transferable (due to // protein length etc.), but we still transfer it to allow filtering of // proteins by score without disrupting the groups: prot_id_->getHits().back().setScore(score); } else if (tag == "peptide") { // If a peptide is degenerate it will show in multiple groups, but have different statistics (e.g. 'nsp_adjusted_probability') // We thus treat each instance as a separate peptide // todo/improvement: link them by a group in PeptideIdentification?! pep_hit_ = new PeptideHit; pep_hit_->setSequence(AASequence::fromString(String(attributeAsString_(attributes, "peptide_sequence")))); pep_hit_->setScore(attributeAsDouble_(attributes, "nsp_adjusted_probability")); Int charge; if (optionalAttributeAsInt_(charge, attributes, "charge")) { pep_hit_->setCharge(charge); } else { LOG_WARN << "Required attribute 'charge' missing\n"; } // add accessions of all indistinguishable proteins the peptide belongs to ProteinIdentification::ProteinGroup& indist = prot_id_->getIndistinguishableProteins().back(); for (StringList::const_iterator accession = indist.accessions.begin(); accession != indist.accessions.end(); ++accession) { PeptideEvidence pe; pe.setProteinAccession(*accession); pep_hit_->addPeptideEvidence(pe); } pep_hit_->setMetaValue("is_unique", String(attributeAsString_(attributes, "is_nondegenerate_evidence")) == "Y" ? 1 : 0); pep_hit_->setMetaValue("is_contributing", String(attributeAsString_(attributes, "is_contributing_evidence")) == "Y" ? 1 : 0); } else if (tag == "mod_aminoacid_mass") { // relates to the last seen peptide (we hope) Size position = attributeAsInt_(attributes, "position"); double mass = attributeAsDouble_(attributes, "mass"); AASequence temp_aa_sequence(pep_hit_->getSequence()); String temp_description = ""; String origin = temp_aa_sequence[position - 1].getOneLetterCode(); matchModification_(mass, origin, temp_description); if (temp_description.size() > 0) // only if a mod was found { // e.g. Carboxymethyl (C) vector<String> mod_split; temp_description.split(' ', mod_split); if (mod_split.size() == 2) { if (mod_split[1] == "(C-term)" || ModificationsDB::getInstance()->getModification(temp_description).getTermSpecificity() == ResidueModification::C_TERM) { temp_aa_sequence.setCTerminalModification(mod_split[0]); } else { if (mod_split[1] == "(N-term)" || ModificationsDB::getInstance()->getModification(temp_description).getTermSpecificity() == ResidueModification::N_TERM) { temp_aa_sequence.setNTerminalModification(mod_split[0]); } else { // search this mod, if not directly use a general one temp_aa_sequence.setModification(position - 1, mod_split[0]); } } } else { error(LOAD, String("Cannot parse modification '") + temp_description + "@" + position + "'"); } } else { error(LOAD, String("Cannot find modification '") + String(mass) + " " + String(origin) + "' @" + String(position)); } pep_hit_->setSequence(temp_aa_sequence); } }