예제 #1
0
void MzidentmlReader::getMaxMinCharge(const std::string &fn, bool isDecoy) {
  ifstream ifs;
  ifs.exceptions(ifstream::badbit | ifstream::failbit);
  try {
    ifs.open(fn.c_str());
    parser p;
    bool schemaVal = true;
    xml_schema::dom::auto_ptr<xercesc_3_1::DOMDocument> doc
        (p.start(ifs, fn.c_str(), schemaVal, schemaDefinition, schema_major, 
                 schema_minor, scheme_namespace));

    // MT: This seems to be a bit slow for doing nothing
    for (doc = p.next(); doc.get() != 0
            && !XMLString::equals(spectrumIdentificationResultStr, doc->getDocumentElement()->getTagName()); doc = p.next()) {
      // Let's skip some sub trees that we are not interested, e.g. AnalysisCollection
    }
    // For each SpectrumIdentificationResult
    int itemCount = 1;
    for (; doc.get() != 0 && XMLString::equals(spectrumIdentificationResultStr,
            doc->getDocumentElement()->getTagName()); doc = p.next()) {
      ::mzIdentML_ns::SpectrumIdentificationResultType specIdResult(*doc->getDocumentElement());
      // For each SpectrumIdentificationItem
      BOOST_FOREACH(const ::mzIdentML_ns::SpectrumIdentificationItemType & item, specIdResult.SpectrumIdentificationItem()) {
        minCharge = (std::min)(item.chargeState(), minCharge);
        maxCharge = (std::max)(item.chargeState(), maxCharge);
        searchEngineSpecificParsing(item, itemCount);  // Virtual function that potentially checks the features
        ++itemCount;
      }
    }
  } catch (ifstream::failure e) {
    cerr << "Exception opening/reading file :" << fn << endl;
  } catch (const xercesc::DOMException& e) {
    char * tmpStr = XMLString::transcode(e.getMessage());
    std::cerr << "catch xercesc_3_1::DOMException=" << tmpStr << std::endl;
    XMLString::release(&tmpStr);
  } catch (const xml_schema::exception& e) {
    cerr << "XML schema exception in getMaxMinCharge: " << e << endl;
  } catch (MyException e) {
	  cerr << e.what() << endl;
	  exit(1);
  } catch (std::exception e) {
    cerr << "Some unknown exception in getMaxMinCharge: " << e.what() << endl;
  }
  ifs.close();
  return;
}
예제 #2
0
void MzidentmlReader::read(const std::string &fn, bool isDecoy, boost::shared_ptr<FragSpectrumScanDatabase> database)
{
  namespace xml = xsd::cxx::xml;
  scanNumberMapType scanNumberMap;
  ifstream ifs;
  try
  {
    ifs.exceptions(ifstream::badbit | ifstream::failbit);
    ifs.open(fn.c_str());
    parser p;
    xml_schema::dom::auto_ptr<xercesc_3_1::DOMDocument> doc
            (p.start(ifs, fn.c_str(), true, schemaDefinition, schema_major, schema_minor, scheme_namespace));

    //NOTE wouldnt be  better to use the get tag by Name to jump SequenceCollenction directly?
    while (doc.get() != 0 && !XMLString::equals(sequenceCollectionStr,
            doc->getDocumentElement()->getTagName())) {
      doc = p.next(); // Let's skip some sub trees that we are not interested, e.g. AuditCollection
    }

    assert(doc.get());
    mzIdentML_ns::SequenceCollectionType sequenceCollection(*doc->getDocumentElement());

    peptideMap.clear();
    proteinMap.clear();
    peptideEvidenceMap.clear();

    //NOTE probably I can get rid of these hash tables with a proper access to elements by tag and id

    BOOST_FOREACH(const mzIdentML_ns::SequenceCollectionType::Peptide_type &peptide, sequenceCollection.Peptide()) {
      //PEPTIDE
      mzIdentML_ns::SequenceCollectionType::Peptide_type *pept =
              new mzIdentML_ns::SequenceCollectionType::Peptide_type(peptide);
      peptideMap.insert(std::make_pair(peptide.id(), pept));
    }

    BOOST_FOREACH(const mzIdentML_ns::SequenceCollectionType::DBSequence_type &protein, sequenceCollection.DBSequence()) {
      //PROTEIN
      mzIdentML_ns::SequenceCollectionType::DBSequence_type *prot =
              new mzIdentML_ns::SequenceCollectionType::DBSequence_type(protein);
      proteinMap.insert(std::make_pair(protein.id(), prot));
    }

    BOOST_FOREACH(const ::mzIdentML_ns::PeptideEvidenceType &peptideE, sequenceCollection.PeptideEvidence()) {
      //PEPTIDE EVIDENCE
      ::mzIdentML_ns::PeptideEvidenceType *peptE = new mzIdentML_ns::PeptideEvidenceType(peptideE);
      peptideEvidenceMap.insert(std::make_pair(peptideE.id(), peptE));
    }

    for (doc = p.next(); doc.get() != 0 && !XMLString::equals(spectrumIdentificationResultStr,
            doc->getDocumentElement()->getTagName()); doc = p.next()) {
      // Let's skip some sub trees that we are not interested, e.g. AnalysisCollection
    }

    unsigned scanNumber = 0;
    bool useRankedScanNumbers = false;  	/* True scan numbers are used,
    										if they can't be found, use ranked scan numbers from 1 and up. */
    for (; doc.get() != 0 && XMLString::equals(spectrumIdentificationResultStr,
            doc->getDocumentElement()->getTagName()); doc = p.next()) {
      ::mzIdentML_ns::SpectrumIdentificationResultType specIdResult(*doc->getDocumentElement());
      assert(specIdResult.SpectrumIdentificationItem().size() > 0);
      unsigned numberHitsSpectra = 0;

      //Find scan number from the cvParam element in spetrumIdentificationResults
      if(!useRankedScanNumbers) {
    	  bool foundScanNumber = false;  // Indicates whether a proper scan number was found
    	  BOOST_FOREACH(const ::mzIdentML_ns::CVParamType & cv, specIdResult.cvParam()) {
    		  std::string param_name(cv.name().c_str());
    		  std::string expected_name("scan number(s)");
    		  if (param_name == expected_name) {
    			  scanNumber = boost::lexical_cast<unsigned>(cv.value().get().c_str());
    			  foundScanNumber = true;
    		  }
    	  }
    	  if(!foundScanNumber || scanNumber == 0) {
    		  std::cerr << "No scan number was found for a PSM (or it equaled 0), scans are ranked from 1 and up" << std::endl;
    		  useRankedScanNumbers = true;
    	  }
      }
      // If no scan numbers were found, or a scan of 0 was found, just rank them
      if(useRankedScanNumbers) {
    	  ++scanNumber;
      }

      BOOST_FOREACH(const ::mzIdentML_ns::SpectrumIdentificationItemType & item, specIdResult.SpectrumIdentificationItem()) {
	      if(++numberHitsSpectra <= po->hitsPerSpectrum) {
	        assert(item.experimentalMassToCharge());
          int charge = item.chargeState();
	        ::percolatorInNs::fragSpectrumScan::experimentalMass_type experimentalMass = item.experimentalMassToCharge()*charge - proton_mass*charge;
	        createPSM(item, experimentalMass, isDecoy, scanNumber, database, fn);
	      }
      }
    }

    cleanHashMaps();
    ifs.close();
  }