// // t e s t // void DinoXmlScanner::test(){ bool terminate = false; XmlToken currentToken; while (!terminate){ cout << "Line " << getInputFileLineCounter() << ": "; currentToken = getNextToken(); switch (currentToken){ case openingBracket: cout << "<" << endl; break; case closingBracket: cout << ">" << endl; break; case questionMark: cout << "?" << endl; break; case exclamationMark: cout << "!" << endl; break; case minus: cout << "-" << endl; break; case slash: cout << "/" << endl; break; case equalSign: cout << "<" << endl; break; case identifier: cout << "Identifier: " << m_pCurrentTokenString << endl; break; case attributeValue: cout << "Attribute value: " << m_pCurrentTokenString << endl; break; case quotedValue: cout << "Quoted value: \"" << m_pCurrentTokenString << "\"" << endl; break; case endOfFile: cout << "EOF" << endl; terminate = true; break; default: cout << "Invalid token!" << endl; } // switch } // while } // testScanner
// // p a r s e // // Take a look at the state machine of parse() to understand // what is going on here. // // TODO: It seems to be useful that this function throws an exception // if something goes wrong. XmlTagObject *XmlParser::parse() { // Increment recursion depth ++m_recursionDepth; // currentTagObject is the tag object we want to create // in this invocation of parse() XmlTagObject *currentTagObject = 0; // Now we are in the start state of the state machine for( ; ; ) { XmlToken token = m_pScanner->getNextToken(); // Expect "<", otherwise failure if (token != openingBracket) { reportError("XmlParser::parse", __LINE__, "Opening Bracket expected!", getInputFileLineCounter()); } // Let's look what comes after "<" token = m_pScanner->getNextToken(); // Read "?", i.e. we have the XML header line <? ... ?> if (token == questionMark) { // Skip until we reach the matching question mark if (!m_pScanner->skipUntil('?')) { reportError("XmlParser::parse", __LINE__, "Could not found the matching '?'", getInputFileLineCounter()); } // Consume ">", otherwise failure token = m_pScanner->getNextToken(); if (token != closingBracket) { reportError("XmlParser::parse", __LINE__, "Closing Bracket expected!", getInputFileLineCounter()); } // Go to start state of the state machine continue; } // end of Read "?" // Read "!", i.e. we have a XML comment <!-- bla --> if (token == exclamationMark) { // A preambel comment <!lala > which could be also nested if ((m_pScanner->getNextToken() != minus) || (m_pScanner->getNextToken() != minus)) { if (!m_pScanner->skipUntilMatchingClosingBracket()) { reportError("XmlParser::parse", __LINE__, "Could not find closing comment bracket!", getInputFileLineCounter()); } continue; } // Find end of comment bool endOfCommentFound = false; while (!endOfCommentFound) { // Skip until we find a - (and skip over it) if (!m_pScanner->skipUntil('-', true)) { reportError("XmlParser::parse", __LINE__, "Closing --> of comment not found!", getInputFileLineCounter()); } // The next characters must be -> (note that one minus is already consumed) if ((m_pScanner->getNextToken() == minus) && (m_pScanner->getNextToken() == closingBracket)) { endOfCommentFound = true; } } // while // Go to start state of the state machine continue; } // end of Read "!" // We have found an identifier, i.e. a tag name if (token == identifier) { // Get hash element of token string HashedString *tagName = hashString(m_pScanner->getCurrentTokenString()); // Create new tag object currentTagObject = new XmlTagObject(tagName); if (currentTagObject == 0) { OGDF_THROW(InsufficientMemoryException); } //push (opening) tagName to stack m_tagObserver.push(tagName->key()); // set depth of current tag object currentTagObject->setDepth(m_recursionDepth); // set line of the tag object in the parsed xml document currentTagObject->setLine(getInputFileLineCounter()); // Next token token = m_pScanner->getNextToken(); // Again we found an identifier, so it must be an attribute if (token == identifier) { // Read list of attributes do { // Save the attribute name HashedString *attributeName = hashString(m_pScanner->getCurrentTokenString()); // Consume "=", otherwise failure token = m_pScanner->getNextToken(); if (token != equalSign) { reportError("XmlParser::parse", __LINE__, "Equal Sign expected!", getInputFileLineCounter()); } // Read value token = m_pScanner->getNextToken(); if ((token != quotedValue) && (token != identifier) && (token != attributeValue)) { reportError("XmlParser::parse", __LINE__, "No valid attribute value!", getInputFileLineCounter()); } // Create a new XmlAttributeObject XmlAttributeObject *currentAttributeObject = new XmlAttributeObject(attributeName, hashString(m_pScanner->getCurrentTokenString())); if (currentAttributeObject == 0) { OGDF_THROW(InsufficientMemoryException); } // Append attribute to attribute list of the current tag object appendAttributeObject(currentTagObject, currentAttributeObject); // Get next token token = m_pScanner->getNextToken(); } while (token == identifier); } // Found an identifier of an attribute // Read "/", i.e. the tag is ended immeadiately, e.g. // <A ... /> without a closing tag </A> if (token == slash) { // Consume ">", otherwise failure token = m_pScanner->getNextToken(); if (token != closingBracket) { reportError("XmlParser::parse", __LINE__, "Closing Bracket expected!", getInputFileLineCounter()); } // The tag is closed and ended so we return string s = m_tagObserver.pop(); --m_recursionDepth; return currentTagObject; } // end of Read "/" // Read ">", i.e. the tag is closed and we // expect some content if (token == closingBracket) { // We read something different from "<", so we have to // deal with a tag value now, i.e. a string inbetween the // opening and the closing tag, e.g. <A ...> lalala </A> if (m_pScanner->testNextToken() != openingBracket) { // Read the characters until "<" is reached and put them into // currentTagObject m_pScanner->readStringUntil('<'); currentTagObject->m_pTagValue = hashString(m_pScanner->getCurrentTokenString()); // We expect a closing tag now, i.e. </id> token = m_pScanner->getNextToken(); if (token != openingBracket) { reportError("XmlParser::parse", __LINE__, "Opening Bracket expected!", getInputFileLineCounter()); } token = m_pScanner->getNextToken(); if (token != slash) { reportError("XmlParser::parse", __LINE__, "Slash expected!", getInputFileLineCounter()); } token = m_pScanner->getNextToken(); if (token != identifier) { reportError("XmlParser::parse", __LINE__, "Identifier expected!", getInputFileLineCounter()); } // next token is the closing tag string nextTag(m_pScanner->getCurrentTokenString()); // pop corresponding tag from stack string s = m_tagObserver.pop(); // compare the two tags if (s != nextTag) { // the closing tag doesn't correspond to the opening tag: reportError("XmlParser::parse", __LINE__, "wrong closing tag!", getInputFileLineCounter()); } token = m_pScanner->getNextToken(); if (token != closingBracket) { reportError("XmlParser::parse", __LINE__, "Closing Bracket expected!", getInputFileLineCounter()); } // The tag is closed so we return --m_recursionDepth; return currentTagObject; } // end of read something different from "<" // Found "<", so a (series of) new tag begins and we have to perform // recursive invocation of parse() // // There are two exceptions: // - a slash follows afer <, i.e. we have a closing tag // - an exclamation mark follows after <, i.e. we have a comment while (m_pScanner->testNextToken() == openingBracket) { // Leave the while loop if a closing tag occurs if (m_pScanner->testNextNextToken() == slash) { break; } // Ignore comments if (m_pScanner->testNextNextToken() == exclamationMark) { // Comment must start with <!-- if ((m_pScanner->getNextToken() != openingBracket) || (m_pScanner->getNextToken() != exclamationMark) || (m_pScanner->getNextToken() != minus) || (m_pScanner->getNextToken() != minus)) { reportError("XmlParser::parse", __LINE__, "Comment must start with <!--", getInputFileLineCounter()); } // Find end of comment bool endOfCommentFound = false; while (!endOfCommentFound) { // Skip until we find a - (and skip over it) if (!m_pScanner->skipUntil('-', true)) { reportError("XmlParser::parse", __LINE__, "Closing --> of comment not found!", getInputFileLineCounter()); } // The next characters must be -> (note that one minus is already consumed) if ((m_pScanner->getNextToken() == minus) && (m_pScanner->getNextToken() == closingBracket)) { endOfCommentFound = true; } } // while // Proceed with outer while loop continue; } // Ignore comments // The new tag object is a son of the current tag object XmlTagObject *sonTagObject = parse(); appendSonTagObject(currentTagObject, sonTagObject); } // while // Now we have found all tags. // We expect a closing tag now, i.e. </id> token = m_pScanner->getNextToken(); if (token != openingBracket) { reportError("XmlParser::parse", __LINE__, "Opening Bracket expected!", getInputFileLineCounter()); } token = m_pScanner->getNextToken(); if (token != slash) { reportError("XmlParser::parse", __LINE__, "Slash expected!", getInputFileLineCounter()); } token = m_pScanner->getNextToken(); if (token != identifier) { reportError("XmlParser::parse", __LINE__, "Identifier expected!", getInputFileLineCounter()); } // next token is the closing tag string nextTag(m_pScanner->getCurrentTokenString()); // pop corresponding tag from stack string s = m_tagObserver.pop(); // compare the two tags if (s != nextTag) { // the closing tag doesn't correspond to the opening tag: reportError("XmlParser::parse", __LINE__, "wrong closing tag!", getInputFileLineCounter()); } token = m_pScanner->getNextToken(); if (token != closingBracket) { reportError("XmlParser::parse", __LINE__, "Closing Bracket expected!", getInputFileLineCounter()); } --m_recursionDepth; // check if Document contains code after the last closing bracket if (m_recursionDepth == 0) { token = m_pScanner->getNextToken(); if (token != endOfFile) { reportError("XmlParser::parse", __LINE__, "Document contains code after the last closing bracket!", getInputFileLineCounter()); } } return currentTagObject; } // end of Read ">" OGDF_ASSERT(false) //continue; } // end of found identifier OGDF_ASSERT(false) } // end of while (true)