Exemplo n.º 1
0
/**
 * Parses the given XML String and returns the main XMLNode
 * @param xml_string XML String
 * @param tag (?)
 * @param pResults XMLResult object to write in on error or success
 * @return The main XMLNode or empty XMLNode on error
 */
XMLNode *
XML::ParseString(const TCHAR *xml_string, Results *pResults)
{
  // If String is empty
  if (xml_string == nullptr) {
    // If XML::Results object exists
    if (pResults) {
      // -> Save the error type
      pResults->error = eXMLErrorNoElements;
      pResults->line = 0;
      pResults->column = 0;
    }

    // -> Return empty XMLNode
    return nullptr;
  }

  Error error;
  XMLNode xnode = XMLNode::Null();
  Parser xml = { nullptr, 0, eXMLErrorNone, nullptr, 0, true, };

  xml.lpXML = xml_string;

  // Fill the XMLNode xnode with the parsed data of xml
  // note: xnode is now the document node, not the main XMLNode
  ParseXMLElement(xnode, &xml);
  error = xml.error;

  // If the document node does not have childnodes
  XMLNode *child = xnode.GetFirstChild();
  if (child == nullptr) {
    // If XML::Results object exists
    if (pResults) {
      // -> Save the error type
      pResults->error = eXMLErrorNoElements;
      pResults->line = 0;
      pResults->column = 0;
    }

    // -> Return empty XMLNode
    return nullptr;
  } else {
    // Set the document's first childnode as new main node
    xnode = std::move(*child);
  }

  // If the new main node is the xml declaration
  // -> try to take the first childnode again
  if (xnode.IsDeclaration()) {
    // If the declaration does not have childnodes
    child = xnode.GetFirstChild();
    if (child == nullptr) {
      // If XML::Results object exists
      if (pResults) {
        // -> Save the error type
        pResults->error = eXMLErrorNoElements;
        pResults->line = 0;
        pResults->column = 0;
      }

      // -> Return empty XMLNode
      return nullptr;
    } else {
      // Set the declaration's first childnode as new main node
      xnode = std::move(*child);
    }
  }

  // If an XML::Results object exists
  // -> save the result (error/success)
  if (pResults) {
    pResults->error = error;

    // If we have an error
    if (error != eXMLErrorNone) {
      // Find which line and column it starts on and
      // save it in the XML::Results object
      CountLinesAndColumns(xml.lpXML, xml.nIndex, pResults);
    }
  }

  // If error occurred -> set node to empty
  if (error != eXMLErrorNone)
    return nullptr;

  // Return the node (empty, main or child of main that equals tag)
  return new XMLNode(std::move(xnode));
}
Exemplo n.º 2
0
/**
 * Recursively parse an XML element.
 */
static bool
XML::ParseXMLElement(XMLNode &node, Parser *pXML)
{
  bool is_declaration;
  const TCHAR *text = nullptr;
  XMLNode *pNew;
  enum Status status; // inside or outside a tag
  enum Attrib attrib = eAttribName;

  /* the name of the attribute that is currently being */
  tstring attribute_name;

  assert(pXML);

  // If this is the first call to the function
  if (pXML->nFirst) {
    // Assume we are outside of a tag definition
    pXML->nFirst = false;
    status = eOutsideTag;
  } else {
    // If this is not the first call then we should only be called when inside a tag.
    status = eInsideTag;
  }

  // Iterate through the tokens in the document
  while (true) {
    // Obtain the next token
    NextToken token = GetNextToken(pXML);
    if (gcc_unlikely(token.type == eTokenError))
      return false;

    // Check the current status
    switch (status) {
      // If we are outside of a tag definition
    case eOutsideTag:

      // Check what type of token we obtained
      switch (token.type) {
        // If we have found text or quoted text
      case eTokenText:
      case eTokenQuotedText:
      case eTokenEquals:
        if (text == nullptr)
          text = token.pStr;

        break;

        // If we found a start tag '<' and declarations '<?'
      case eTokenTagStart:
      case eTokenDeclaration:
        // Cache whether this new element is a declaration or not
        is_declaration = token.type == eTokenDeclaration;

        // If we have node text then add this to the element
        if (text != nullptr) {
          size_t length = StripRight(text, token.pStr - text);
          node.AddText(text, length);
          text = nullptr;
        }

        // Find the name of the tag
        token = GetNextToken(pXML);

        // Return an error if we couldn't obtain the next token or
        // it wasnt text
        if (token.type != eTokenText) {
          pXML->error = eXMLErrorMissingTagName;
          return false;
        }

        // If the name of the new element differs from the name of
        // the current element we need to add the new element to
        // the current one and recurse
        pNew = &node.AddChild(token.pStr, token.length,
                              is_declaration);

        while (true) {
          // Callself to process the new node.  If we return
          // FALSE this means we dont have any more
          // processing to do...

          if (!ParseXMLElement(*pNew, pXML)) {
            return false;
          } else {
            // If the call to recurse this function
            // evented in a end tag specified in XML then
            // we need to unwind the calls to this
            // function until we find the appropriate node
            // (the element name and end tag name must
            // match)
            if (pXML->cbEndTag) {
              // If we are back at the root node then we
              // have an unmatched end tag
              if (node.GetName() == nullptr) {
                pXML->error = eXMLErrorUnmatchedEndTag;
                return false;
              }

              // If the end tag matches the name of this
              // element then we only need to unwind
              // once more...

              if (CompareTagName(node.GetName(), pXML->lpEndTag)) {
                pXML->cbEndTag = 0;
              }

              return true;
            } else {
              // If we didn't have a new element to create
              break;
            }
          }
        }
        break;

        // If we found an end tag
      case eTokenTagEnd:

        // If we have node text then add this to the element
        if (text != nullptr) {
          size_t length = StripRight(text, token.pStr - text);
          TCHAR *text2 = FromXMLString(text, length);
          if (text2 == nullptr) {
            pXML->error = eXMLErrorUnexpectedToken;
            return false;
          }

          node.AddText(text2);
          free(text2);
          text = nullptr;
        }

        // Find the name of the end tag
        token = GetNextToken(pXML);

        // The end tag should be text
        if (token.type != eTokenText) {
          pXML->error = eXMLErrorMissingEndTagName;
          return false;
        }

        // After the end tag we should find a closing tag
        if (GetNextToken(pXML).type != eTokenCloseTag) {
          pXML->error = eXMLErrorMissingEndTagName;
          return false;
        }

        // We need to return to the previous caller.  If the name
        // of the tag cannot be found we need to keep returning to
        // caller until we find a match
        if (!CompareTagName(node.GetName(), token.pStr)) {
          pXML->lpEndTag = token.pStr;
          pXML->cbEndTag = token.length;
        }

        // Return to the caller
        return true;

        // Errors...
      case eTokenCloseTag: /* '>'         */
      case eTokenShortHandClose: /* '/>'        */
        pXML->error = eXMLErrorUnexpectedToken;
        return false;
      default:
        break;
      }
      break;

      // If we are inside a tag definition we need to search for attributes
    case eInsideTag:
      // Check what part of the attribute (name, equals, value) we
      // are looking for.
      switch (attrib) {
        // If we are looking for a new attribute
      case eAttribName:
        // Check what the current token type is
        switch (token.type) {
          // If the current type is text...
          // Eg.  'attribute'
        case eTokenText:
          // Cache the token then indicate that we are next to
          // look for the equals
          attribute_name.assign(token.pStr, token.length);
          attrib = eAttribEquals;
          break;

          // If we found a closing tag...
          // Eg.  '>'
        case eTokenCloseTag:
          // We are now outside the tag
          status = eOutsideTag;
          break;

          // If we found a short hand '/>' closing tag then we can
          // return to the caller
        case eTokenShortHandClose:
          return true;

          // Errors...
        case eTokenQuotedText: /* '"SomeText"'   */
        case eTokenTagStart: /* '<'            */
        case eTokenTagEnd: /* '</'           */
        case eTokenEquals: /* '='            */
        case eTokenDeclaration: /* '<?'           */
          pXML->error = eXMLErrorUnexpectedToken;
          return false;
        default:
          break;
        }
        break;

        // If we are looking for an equals
      case eAttribEquals:
        // Check what the current token type is
        switch (token.type) {
          // If the current type is text...
          // Eg.  'Attribute AnotherAttribute'
        case eTokenText:
          // Add the unvalued attribute to the list
          node.AddAttribute(std::move(attribute_name), _T(""), 0);
          // Cache the token then indicate.  We are next to
          // look for the equals attribute
          attribute_name.assign(token.pStr, token.length);
          break;

          // If we found a closing tag 'Attribute >' or a short hand
          // closing tag 'Attribute />'
        case eTokenShortHandClose:
        case eTokenCloseTag:
          assert(!attribute_name.empty());

          // If we are a declaration element '<?' then we need
          // to remove extra closing '?' if it exists
          if (node.IsDeclaration() && attribute_name.back() == _T('?')) {
            attribute_name.pop_back();
          }

          if (!attribute_name.empty())
            // Add the unvalued attribute to the list
            node.AddAttribute(std::move(attribute_name), _T(""), 0);

          // If this is the end of the tag then return to the caller
          if (token.type == eTokenShortHandClose)
            return true;

          // We are now outside the tag
          status = eOutsideTag;
          break;

          // If we found the equals token...
          // Eg.  'Attribute ='
        case eTokenEquals:
          // Indicate that we next need to search for the value
          // for the attribute
          attrib = eAttribValue;
          break;

          // Errors...
        case eTokenQuotedText: /* 'Attribute "InvalidAttr"'*/
        case eTokenTagStart: /* 'Attribute <'            */
        case eTokenTagEnd: /* 'Attribute </'           */
        case eTokenDeclaration: /* 'Attribute <?'           */
          pXML->error = eXMLErrorUnexpectedToken;
          return false;
        default:
          break;
        }
        break;

        // If we are looking for an attribute value
      case eAttribValue:
        // Check what the current token type is
        switch (token.type) {
          // If the current type is text or quoted text...
          // Eg.  'Attribute = "Value"' or 'Attribute = Value' or
          // 'Attribute = 'Value''.
        case eTokenText:
        case eTokenQuotedText:
          // If we are a declaration element '<?' then we need
          // to remove extra closing '?' if it exists
          if (node.IsDeclaration() && (token.pStr[token.length - 1]) == _T('?')) {
            token.length--;
          }

          // Add the valued attribute to the list
          if (token.type == eTokenQuotedText) {
            token.pStr++;
            token.length -= 2;
          }

          assert(!attribute_name.empty());

          {
            TCHAR *value = FromXMLString(token.pStr, token.length);
            if (value == nullptr) {
              pXML->error = eXMLErrorUnexpectedToken;
              return false;
            }

            node.AddAttribute(std::move(attribute_name),
                              value, _tcslen(value));
            free(value);
          }

          // Indicate we are searching for a new attribute
          attrib = eAttribName;
          break;

          // Errors...
        case eTokenTagStart: /* 'Attr = <'          */
        case eTokenTagEnd: /* 'Attr = </'         */
        case eTokenCloseTag: /* 'Attr = >'          */
        case eTokenShortHandClose: /* "Attr = />"         */
        case eTokenEquals: /* 'Attr = ='          */
        case eTokenDeclaration: /* 'Attr = <?'         */
          pXML->error = eXMLErrorUnexpectedToken;
          return false;
        default:
          break;
        }
      }
    }
  }
}