void MzMLSpectrumDecoder::domParseString(const std::string& in, std::vector<BinaryData>& data_) { //------------------------------------------------------------- // Create parser from input string using MemBufInputSource //------------------------------------------------------------- xercesc::MemBufInputSource myxml_buf(reinterpret_cast<const unsigned char*>(in.c_str()), in.length(), "myxml (in memory)"); xercesc::XercesDOMParser* parser = new xercesc::XercesDOMParser(); parser->setDoNamespaces(false); parser->setDoSchema(false); parser->setLoadExternalDTD(false); parser->parse(myxml_buf); //------------------------------------------------------------- // Start parsing // see http://www.yolinux.com/TUTORIALS/XML-Xerces-C.html //------------------------------------------------------------- // no need to free this pointer - owned by the parent parser object xercesc::DOMDocument* doc = parser->getDocument(); // Get the top-level element (for example <spectrum> or <chromatogram>) xercesc::DOMElement* elementRoot = doc->getDocumentElement(); if (!elementRoot) { delete parser; } // Extract the binaryDataArray tag (there may be multiple) XMLCh* tag = xercesc::XMLString::transcode("binaryDataArray"); xercesc::DOMNodeList* li = elementRoot->getElementsByTagName(tag); xercesc::XMLString::release(&tag); /// Do the processing of the DOM for (Size i = 0; i < li->getLength(); i++) { handleBinaryDataArray(li->item(i), data_); } delete parser; }
int IndexedMzMLDecoder::domParseIndexedEnd_(std::string in, OffsetVector& spectra_offsets, OffsetVector& chromatograms_offsets) { /* We parse something like <indexedmzML> <indexList count="1"> <index name="chromatogram"> <offset idRef="1">9752</offset> </index> </indexList> <indexListOffset>26795</indexListOffset> <fileChecksum>0</fileChecksum> </indexedmzML> */ //------------------------------------------------------------- // Create parser from input string using MemBufInputSource //------------------------------------------------------------- xercesc::MemBufInputSource myxml_buf( reinterpret_cast<const unsigned char*>(in.c_str()), in.length(), "myxml (in memory)"); xercesc::XercesDOMParser parser; parser.setDoNamespaces(false); parser.setDoSchema(false); parser.setLoadExternalDTD(false); parser.parse(myxml_buf); //------------------------------------------------------------- // Start parsing // see http://www.yolinux.com/TUTORIALS/XML-Xerces-C.html //------------------------------------------------------------- // no need to free this pointer - owned by the parent parser object xercesc::DOMDocument* doc = parser.getDocument(); // Get the top-level element ("indexedmzML") xercesc::DOMElement* elementRoot = doc->getDocumentElement(); if (!elementRoot) { std::cerr << "IndexedMzMLDecoder::domParseIndexedEnd Error: " << "No root element found:" << std::endl << std::endl << in << std::endl; return -1; } // Extract the indexList tag (there should only be one) XMLCh* x_tag = xercesc::XMLString::transcode("indexList"); xercesc::DOMNodeList* li = elementRoot->getElementsByTagName(x_tag); xercesc::XMLString::release(&x_tag); if (li->getLength() != 1) { std::cerr << "IndexedMzMLDecoder::domParseIndexedEnd Error: " << "no indexList element found:" << std::endl << std::endl << in << std::endl; return -1; } xercesc::DOMNode* indexListNode = li->item(0); XMLCh* x_idref_tag = xercesc::XMLString::transcode("idRef"); XMLCh* x_name_tag = xercesc::XMLString::transcode("name"); xercesc::DOMNodeList* index_elems = indexListNode->getChildNodes(); const XMLSize_t nodeCount_ = index_elems->getLength(); // Iterate through indexList elements (only two elements should be present // which should be either spectrum or chromatogram offsets) for (XMLSize_t j = 0; j < nodeCount_; ++j) { xercesc::DOMNode* currentNode = index_elems->item(j); if (currentNode->getNodeType() && // true is not NULL currentNode->getNodeType() == xercesc::DOMNode::ELEMENT_NODE) // is element { std::vector<std::pair<std::string, std::streampos> > result; xercesc::DOMNode* firstChild = currentNode->getFirstChild(); xercesc::DOMNode* lastChild = currentNode->getLastChild(); xercesc::DOMNode* iter = firstChild; // Iterate through children // NOTE: Using xercesc::DOMNodeList and "item" is a very bad idea since // each "item" call has complexity of O(n), see the // implementation in DOMNodeListImpl.cpp : // https://svn.apache.org/repos/asf/xerces/c/trunk/src/xercesc/dom/impl/DOMNodeListImpl.cpp // while (iter != lastChild) { iter = iter->getNextSibling(); xercesc::DOMNode* currentONode = iter; if (currentONode->getNodeType() && // true is not NULL currentONode->getNodeType() == xercesc::DOMNode::ELEMENT_NODE) // is element { xercesc::DOMElement* currentElement = dynamic_cast<xercesc::DOMElement*>(currentONode); char* x_name = xercesc::XMLString::transcode(currentElement->getAttribute(x_idref_tag)); char* x_offset = xercesc::XMLString::transcode(currentONode->getTextContent()); std::streampos thisOffset = OpenMS::IndexedMzMLUtils::stringToStreampos( String(x_offset) ); result.push_back(std::make_pair(String(x_name), thisOffset)); xercesc::XMLString::release(&x_name); xercesc::XMLString::release(&x_offset); } } // should be either spectrum or chromatogram ... xercesc::DOMElement* currentElement = dynamic_cast<xercesc::DOMElement*>(currentNode); char* x_indexName = xercesc::XMLString::transcode(currentElement->getAttribute(x_name_tag)); std::string name(x_indexName); xercesc::XMLString::release(&x_indexName); if (name == "spectrum") { spectra_offsets = result; } else if (name == "chromatogram") { chromatograms_offsets = result; } else { std::cerr << "IndexedMzMLDecoder::domParseIndexedEnd Error: expected only " << "'spectrum' or 'chromatogram' below indexList but found instead '" << name << "'." << std::endl; xercesc::XMLString::release(&x_idref_tag); xercesc::XMLString::release(&x_name_tag); return -1; } } } xercesc::XMLString::release(&x_idref_tag); xercesc::XMLString::release(&x_name_tag); return 0; }