Esempio n. 1
0
    //This method needed to be reimplemented to
    // - check CV term values
    // - handle referenceableParamGroups
    // - check if binaryDataArray name and type match
    void MzMLValidator::startElement(const XMLCh * const /*uri*/, const XMLCh * const /*local_name*/, const XMLCh * const qname, const Attributes & attributes)
    {
      String tag = sm_.convert(qname);
      String parent_tag;
      if (open_tags_.size() > 0)
        parent_tag = open_tags_.back();
      String path = getPath_() + "/" + cv_tag_ + "/@" + accession_att_;
      open_tags_.push_back(tag);

      if (tag == "referenceableParamGroup")
      {
        current_id_ = attributeAsString_(attributes, "id");
      }
      else if (tag == "referenceableParamGroupRef")
      {
        const std::vector<CVTerm> & terms = param_groups_[attributeAsString_(attributes, "ref")];
        for (Size i = 0; i < terms.size(); ++i)
        {
          handleTerm_(path, terms[i]);
        }
      }
      else if (tag == "binaryDataArray")
      {
        binary_data_array_ = "";
        binary_data_type_ = "";
      }
      else if (tag == cv_tag_)
      {
        //extract accession, name and value
        CVTerm parsed_term;
        getCVTerm_(attributes, parsed_term);

        //check if the term is unknown
        if (!cv_.exists(parsed_term.accession))
        {
          warnings_.push_back(String("Unknown CV term: '") + parsed_term.accession + " - " + parsed_term.name + "' at element '" + getPath_(1) + "'");
          return;
        }

        //check if the term is obsolete
        if (cv_.getTerm(parsed_term.accession).obsolete)
        {
          warnings_.push_back(String("Obsolete CV term: '") + parsed_term.accession + " - " + parsed_term.name + "' at element '" + getPath_(1) + "'");
        }

        //actual handling of the term
        if (parent_tag == "referenceableParamGroup")
        {
          param_groups_[current_id_].push_back(parsed_term);
        }
        else
        {
          handleTerm_(path, parsed_term);
        }
      }
    }
Esempio n. 2
0
  void CVMappingFile::startElement(const XMLCh * const /*uri*/, const XMLCh * const /*local_name*/, const XMLCh * const qname, const Attributes & attributes)
  {

    tag_ = String(sm_.convert(qname));

    if (tag_ == "CvReference")
    {
      // CvReference cvName="PSI-PI" cvIdentifier="PSI-PI"/>
      CVReference ref;
      ref.setName(attributeAsString_(attributes, "cvName"));
      ref.setIdentifier(attributeAsString_(attributes, "cvIdentifier"));
      cv_references_.push_back(ref);
      return;
    }

    if (tag_ == "CvMappingRule")
    {
      // id="R1" cvElementPath="/psi-pi:MzIdentML/psi-pi:AnalysisSoftwareList/psi-pi:AnalysisSoftware/pf:ContactRole/pf:role/pf:cvParam" requirementLevel="MUST"  scopePath="" cvTermsCombinationLogic="OR
      actual_rule_.setIdentifier(attributeAsString_(attributes, "id"));
      String element_path = attributeAsString_(attributes, "cvElementPath");
      if (strip_namespaces_)
      {
        vector<String> slash_split;
        element_path.split('/', slash_split);
        if (slash_split.empty())
        {
          slash_split.push_back(element_path);
        }
        element_path = "";
        for (vector<String>::const_iterator it = slash_split.begin(); it != slash_split.end(); ++it)
        {
          if (it->empty())
          {
            continue;
          }

          vector<String> split;
          it->split(':', split);
          if (split.empty())
          {
            element_path += "/" + *it;
          }
          else
          {
            if (split.size() == 2)
            {
              element_path += "/" + split[1];
            }
            else
            {
              fatalError(LOAD, String("Cannot parse namespaces of path: '") + element_path + "'");
            }
          }
        }
      }
      actual_rule_.setElementPath(element_path);
      CVMappingRule::RequirementLevel level = CVMappingRule::MUST;
      String lvl = attributeAsString_(attributes, "requirementLevel");
      if (lvl == "MAY")
      {
        level = CVMappingRule::MAY;
      }
      else
      {
        if (lvl == "SHOULD")
        {
          level = CVMappingRule::SHOULD;
        }
        else
        {
          if (lvl == "MUST")
          {
            level = CVMappingRule::MUST;
          }
          else
          {
            // throw Exception
          }
        }
      }

      actual_rule_.setRequirementLevel(level);

      actual_rule_.setScopePath(attributeAsString_(attributes, "scopePath"));
      CVMappingRule::CombinationsLogic logic = CVMappingRule::OR;
      String lgc = attributeAsString_(attributes, "cvTermsCombinationLogic");
      if (lgc == "OR")
      {
        logic = CVMappingRule::OR;
      }
      else
      {
        if (lgc == "AND")
        {
          logic = CVMappingRule::AND;
        }
        else
        {
          if (lgc == "XOR")
          {
            logic = CVMappingRule::XOR;
          }
          else
          {
            // throw Exception;
          }
        }
      }
      actual_rule_.setCombinationsLogic(logic);
      return;
    }

    if (tag_ == "CvTerm")
    {
      // termAccession="PI:00266" useTermName="false" useTerm="false" termName="role type" isRepeatable="true" allowChildren="true" cvIdentifierRef="PSI-PI"
      CVMappingTerm term;

      term.setAccession(attributeAsString_(attributes, "termAccession"));
      term.setUseTerm(DataValue(attributeAsString_(attributes, "useTerm")).toBool());

      String use_term_name;
      optionalAttributeAsString_(use_term_name, attributes, "useTermName");
      if (use_term_name != "")
      {
        term.setUseTermName(DataValue(use_term_name).toBool());
      }
      else
      {
        term.setUseTermName(false);
      }
      term.setTermName(attributeAsString_(attributes, "termName"));

      String is_repeatable;
      optionalAttributeAsString_(is_repeatable, attributes, "isRepeatable");
      if (is_repeatable != "")
      {
        term.setIsRepeatable(DataValue(is_repeatable).toBool());
      }
      else
      {
        term.setIsRepeatable(true);
      }
      term.setAllowChildren(DataValue(attributeAsString_(attributes, "allowChildren")).toBool());
      term.setCVIdentifierRef(attributeAsString_(attributes, "cvIdentifierRef"));

      actual_rule_.addCVTerm(term);
      return;
    }

    return;
  }
Esempio n. 3
0
    void ToolDescriptionHandler::startElement(const XMLCh * const uri, const XMLCh * const local_name, const XMLCh * const qname, const xercesc::Attributes & attributes)
    {
      if (in_ini_section_)
      {
        ParamXMLHandler::startElement(uri, local_name, qname, attributes);
        return;
      }

      tag_ = sm_.convert(qname);
      open_tags_.push_back(tag_);
      //std::cout << "starting tag " << tag_ << "\n";

      if (tag_ == "tool")
      {
        String status = attributeAsString_(attributes, "status");
        if (status == "external")
          td_.is_internal = false;
        else if (status == "internal")
          td_.is_internal = true;
        else
          error(LOAD, "ToolDescriptionHandler::startElement: Element 'status' if tag 'tool' has unknown value " + status + "'.");
        return;
      }
      if (tag_ == "mapping")
      {
        Int id = attributeAsInt_(attributes, "id");
        String command = attributeAsString_(attributes, "cl");
        tde_.tr_table.mapping[id] = command;
        return;
      }
      if (tag_ == "file_post")
      {
        Internal::FileMapping fm;
        fm.location = attributeAsString_(attributes, "location");
        fm.target = attributeAsString_(attributes, "target");
        tde_.tr_table.post_moves.push_back(fm);
        return;
      }
      if (tag_ == "file_pre")
      {
        Internal::FileMapping fm;
        fm.location = attributeAsString_(attributes, "location");
        fm.target = attributeAsString_(attributes, "target");
        tde_.tr_table.pre_moves.push_back(fm);
        return;
      }

      if (tag_ == "ini_param")
      {
        in_ini_section_ = true;
        p_ = Param(); // reset Param
        return;
      }

      if (tag_ == "ttd" || tag_ == "category" || tag_ == "e_category" || tag_ == "type")
        return;

      if (td_.is_internal)
      {
        if (tag_ == "name")
        {
          return;
        }
      }
      else if (!td_.is_internal)
      {
        if (tag_ == "external" || tag_ == "cloptions" || tag_ == "path" || tag_ == "mappings" || tag_ == "mapping" || tag_ == "ini_param" ||
            tag_ == "text" || tag_ == "onstartup" || tag_ == "onfail" || tag_ == "onfinish" || tag_ == "workingdirectory")
          return;
      }

      error(LOAD, "ToolDescriptionHandler::startElement(): Unkown element found: '" + tag_ + "', ignoring.");
    }
Esempio n. 4
0
  void ProtXMLFile::startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes)
  {
    String tag = sm_.convert(qname);

    if (tag == "protein_summary_header")
    {
      String db = attributeAsString_(attributes, "reference_database");
      String enzyme = attributeAsString_(attributes, "sample_enzyme");
      ProteinIdentification::SearchParameters sp = prot_id_->getSearchParameters();
      sp.db = db;
      // find a matching enzyme name
      sp.digestion_enzyme = *(ProteaseDB::getInstance()->getEnzyme(enzyme));
      prot_id_->setSearchParameters(sp);
      prot_id_->setScoreType("ProteinProphet probability");
      prot_id_->setHigherScoreBetter(true);
      pep_id_->setScoreType("ProteinProphet probability");
      pep_id_->setHigherScoreBetter(true);
    }
    // identifier for Protein & PeptideIdentification
    // <program_details analysis="proteinprophet" time="2009-11-29T18:30:03" ...
    if (tag == "program_details")
    {
      String analysis = attributeAsString_(attributes, "analysis");
      String time = attributeAsString_(attributes, "time");
      String version = attributeAsString_(attributes, "version");

      QDateTime date = QDateTime::fromString(time.toQString());
      if (!date.isValid())
        date = QDateTime::fromString(time.toQString(), Qt::ISODate);
      if (!date.isValid())
        LOG_WARN << "Warning: Cannot parse 'time'='" << time << "'.\n";
      prot_id_->setDateTime(date);
      prot_id_->setSearchEngine(analysis);
      prot_id_->setSearchEngineVersion(version);
      String id = String(UniqueIdGenerator::getUniqueId()); // was: analysis + "_" + time;
      prot_id_->setIdentifier(id);
      pep_id_->setIdentifier(id);
    }

    if (tag == "protein_group")
    {
      // we group all <protein>'s and <indistinguishable_protein>'s in our
      // internal group structure
      protein_group_ = ProteinGroup();
      protein_group_.probability = attributeAsDouble_(attributes, "probability");
    }
    else if (tag == "protein")
    {
      // usually there will be just one <protein> per <protein_group>, but more
      // are possible; each <protein> is distinguishable from the other, we
      // nevertheless group them

      String protein_name = attributeAsString_(attributes, "protein_name");
      // open new "indistinguishable" group:
      prot_id_->insertIndistinguishableProteins(ProteinGroup());
      registerProtein_(protein_name); // create new protein

      // fill protein with life
      double pc_coverage;
      if (optionalAttributeAsDouble_(pc_coverage, attributes, "percent_coverage"))
      {
        prot_id_->getHits().back().setCoverage(pc_coverage);
      }
      else
      {
        LOG_WARN << "Required attribute 'percent_coverage' missing\n";
      }
      prot_id_->getHits().back().setScore(attributeAsDouble_(attributes, "probability"));

    }
    else if (tag == "indistinguishable_protein")
    {
      String protein_name = attributeAsString_(attributes, "protein_name");
      // current last protein is from the same "indistinguishable" group:
      double score = prot_id_->getHits().back().getScore();
      registerProtein_(protein_name);
      // score of group leader might technically not be transferable (due to
      // protein length etc.), but we still transfer it to allow filtering of
      // proteins by score without disrupting the groups:
      prot_id_->getHits().back().setScore(score);
    }
    else if (tag == "peptide")
    {
      // If a peptide is degenerate it will show in multiple groups, but have different statistics (e.g. 'nsp_adjusted_probability')
      // We thus treat each instance as a separate peptide
      // todo/improvement: link them by a group in PeptideIdentification?!
      pep_hit_ = new PeptideHit;
      pep_hit_->setSequence(AASequence::fromString(String(attributeAsString_(attributes, "peptide_sequence"))));
      pep_hit_->setScore(attributeAsDouble_(attributes, "nsp_adjusted_probability"));

      Int charge;
      if (optionalAttributeAsInt_(charge, attributes, "charge"))
      {
        pep_hit_->setCharge(charge);
      }
      else
      {
        LOG_WARN << "Required attribute 'charge' missing\n";
      }

      // add accessions of all indistinguishable proteins the peptide belongs to
      ProteinIdentification::ProteinGroup& indist = prot_id_->getIndistinguishableProteins().back();
      for (StringList::const_iterator accession = indist.accessions.begin(); accession != indist.accessions.end(); ++accession)
      {
        PeptideEvidence pe;
        pe.setProteinAccession(*accession);
        pep_hit_->addPeptideEvidence(pe);
      }
      pep_hit_->setMetaValue("is_unique", String(attributeAsString_(attributes, "is_nondegenerate_evidence")) == "Y" ? 1 : 0);
      pep_hit_->setMetaValue("is_contributing", String(attributeAsString_(attributes, "is_contributing_evidence")) == "Y" ? 1 : 0);
    }
    else if (tag == "mod_aminoacid_mass")
    {
      // relates to the last seen peptide (we hope)
      Size position = attributeAsInt_(attributes, "position");
      double mass = attributeAsDouble_(attributes, "mass");
      AASequence temp_aa_sequence(pep_hit_->getSequence());

      String temp_description = "";
      String origin = temp_aa_sequence[position - 1].getOneLetterCode();
      matchModification_(mass, origin, temp_description);
      if (temp_description.size() > 0) // only if a mod was found
      {
        // e.g. Carboxymethyl (C)
        vector<String> mod_split;
        temp_description.split(' ', mod_split);
        if (mod_split.size() == 2)
        {
          if (mod_split[1] == "(C-term)" || ModificationsDB::getInstance()->getModification(temp_description).getTermSpecificity() == ResidueModification::C_TERM)
          {
            temp_aa_sequence.setCTerminalModification(mod_split[0]);
          }
          else
          {
            if (mod_split[1] == "(N-term)" || ModificationsDB::getInstance()->getModification(temp_description).getTermSpecificity() == ResidueModification::N_TERM)
            {
              temp_aa_sequence.setNTerminalModification(mod_split[0]);
            }
            else
            {
              // search this mod, if not directly use a general one
              temp_aa_sequence.setModification(position - 1, mod_split[0]);
            }
          }
        }
        else
        {
          error(LOAD, String("Cannot parse modification '") + temp_description + "@" + position + "'");
        }
      }
      else
      {
        error(LOAD, String("Cannot find modification '") + String(mass) + " " + String(origin) + "' @" + String(position));
      }

      pep_hit_->setSequence(temp_aa_sequence);
    }
  }