/** * Parses the given XML String and returns the main XMLNode * @param xml_string XML String * @param tag (?) * @param pResults XMLResult object to write in on error or success * @return The main XMLNode or empty XMLNode on error */ XMLNode * XML::ParseString(const TCHAR *xml_string, Results *pResults) { // If String is empty if (xml_string == nullptr) { // If XML::Results object exists if (pResults) { // -> Save the error type pResults->error = eXMLErrorNoElements; pResults->line = 0; pResults->column = 0; } // -> Return empty XMLNode return nullptr; } Error error; XMLNode xnode = XMLNode::Null(); Parser xml = { nullptr, 0, eXMLErrorNone, nullptr, 0, true, }; xml.lpXML = xml_string; // Fill the XMLNode xnode with the parsed data of xml // note: xnode is now the document node, not the main XMLNode ParseXMLElement(xnode, &xml); error = xml.error; // If the document node does not have childnodes XMLNode *child = xnode.GetFirstChild(); if (child == nullptr) { // If XML::Results object exists if (pResults) { // -> Save the error type pResults->error = eXMLErrorNoElements; pResults->line = 0; pResults->column = 0; } // -> Return empty XMLNode return nullptr; } else { // Set the document's first childnode as new main node xnode = std::move(*child); } // If the new main node is the xml declaration // -> try to take the first childnode again if (xnode.IsDeclaration()) { // If the declaration does not have childnodes child = xnode.GetFirstChild(); if (child == nullptr) { // If XML::Results object exists if (pResults) { // -> Save the error type pResults->error = eXMLErrorNoElements; pResults->line = 0; pResults->column = 0; } // -> Return empty XMLNode return nullptr; } else { // Set the declaration's first childnode as new main node xnode = std::move(*child); } } // If an XML::Results object exists // -> save the result (error/success) if (pResults) { pResults->error = error; // If we have an error if (error != eXMLErrorNone) { // Find which line and column it starts on and // save it in the XML::Results object CountLinesAndColumns(xml.lpXML, xml.nIndex, pResults); } } // If error occurred -> set node to empty if (error != eXMLErrorNone) return nullptr; // Return the node (empty, main or child of main that equals tag) return new XMLNode(std::move(xnode)); }
/** * Recursively parse an XML element. */ static bool XML::ParseXMLElement(XMLNode &node, Parser *pXML) { bool is_declaration; const TCHAR *text = nullptr; XMLNode *pNew; enum Status status; // inside or outside a tag enum Attrib attrib = eAttribName; /* the name of the attribute that is currently being */ tstring attribute_name; assert(pXML); // If this is the first call to the function if (pXML->nFirst) { // Assume we are outside of a tag definition pXML->nFirst = false; status = eOutsideTag; } else { // If this is not the first call then we should only be called when inside a tag. status = eInsideTag; } // Iterate through the tokens in the document while (true) { // Obtain the next token NextToken token = GetNextToken(pXML); if (gcc_unlikely(token.type == eTokenError)) return false; // Check the current status switch (status) { // If we are outside of a tag definition case eOutsideTag: // Check what type of token we obtained switch (token.type) { // If we have found text or quoted text case eTokenText: case eTokenQuotedText: case eTokenEquals: if (text == nullptr) text = token.pStr; break; // If we found a start tag '<' and declarations '<?' case eTokenTagStart: case eTokenDeclaration: // Cache whether this new element is a declaration or not is_declaration = token.type == eTokenDeclaration; // If we have node text then add this to the element if (text != nullptr) { size_t length = StripRight(text, token.pStr - text); node.AddText(text, length); text = nullptr; } // Find the name of the tag token = GetNextToken(pXML); // Return an error if we couldn't obtain the next token or // it wasnt text if (token.type != eTokenText) { pXML->error = eXMLErrorMissingTagName; return false; } // If the name of the new element differs from the name of // the current element we need to add the new element to // the current one and recurse pNew = &node.AddChild(token.pStr, token.length, is_declaration); while (true) { // Callself to process the new node. If we return // FALSE this means we dont have any more // processing to do... if (!ParseXMLElement(*pNew, pXML)) { return false; } else { // If the call to recurse this function // evented in a end tag specified in XML then // we need to unwind the calls to this // function until we find the appropriate node // (the element name and end tag name must // match) if (pXML->cbEndTag) { // If we are back at the root node then we // have an unmatched end tag if (node.GetName() == nullptr) { pXML->error = eXMLErrorUnmatchedEndTag; return false; } // If the end tag matches the name of this // element then we only need to unwind // once more... if (CompareTagName(node.GetName(), pXML->lpEndTag)) { pXML->cbEndTag = 0; } return true; } else { // If we didn't have a new element to create break; } } } break; // If we found an end tag case eTokenTagEnd: // If we have node text then add this to the element if (text != nullptr) { size_t length = StripRight(text, token.pStr - text); TCHAR *text2 = FromXMLString(text, length); if (text2 == nullptr) { pXML->error = eXMLErrorUnexpectedToken; return false; } node.AddText(text2); free(text2); text = nullptr; } // Find the name of the end tag token = GetNextToken(pXML); // The end tag should be text if (token.type != eTokenText) { pXML->error = eXMLErrorMissingEndTagName; return false; } // After the end tag we should find a closing tag if (GetNextToken(pXML).type != eTokenCloseTag) { pXML->error = eXMLErrorMissingEndTagName; return false; } // We need to return to the previous caller. If the name // of the tag cannot be found we need to keep returning to // caller until we find a match if (!CompareTagName(node.GetName(), token.pStr)) { pXML->lpEndTag = token.pStr; pXML->cbEndTag = token.length; } // Return to the caller return true; // Errors... case eTokenCloseTag: /* '>' */ case eTokenShortHandClose: /* '/>' */ pXML->error = eXMLErrorUnexpectedToken; return false; default: break; } break; // If we are inside a tag definition we need to search for attributes case eInsideTag: // Check what part of the attribute (name, equals, value) we // are looking for. switch (attrib) { // If we are looking for a new attribute case eAttribName: // Check what the current token type is switch (token.type) { // If the current type is text... // Eg. 'attribute' case eTokenText: // Cache the token then indicate that we are next to // look for the equals attribute_name.assign(token.pStr, token.length); attrib = eAttribEquals; break; // If we found a closing tag... // Eg. '>' case eTokenCloseTag: // We are now outside the tag status = eOutsideTag; break; // If we found a short hand '/>' closing tag then we can // return to the caller case eTokenShortHandClose: return true; // Errors... case eTokenQuotedText: /* '"SomeText"' */ case eTokenTagStart: /* '<' */ case eTokenTagEnd: /* '</' */ case eTokenEquals: /* '=' */ case eTokenDeclaration: /* '<?' */ pXML->error = eXMLErrorUnexpectedToken; return false; default: break; } break; // If we are looking for an equals case eAttribEquals: // Check what the current token type is switch (token.type) { // If the current type is text... // Eg. 'Attribute AnotherAttribute' case eTokenText: // Add the unvalued attribute to the list node.AddAttribute(std::move(attribute_name), _T(""), 0); // Cache the token then indicate. We are next to // look for the equals attribute attribute_name.assign(token.pStr, token.length); break; // If we found a closing tag 'Attribute >' or a short hand // closing tag 'Attribute />' case eTokenShortHandClose: case eTokenCloseTag: assert(!attribute_name.empty()); // If we are a declaration element '<?' then we need // to remove extra closing '?' if it exists if (node.IsDeclaration() && attribute_name.back() == _T('?')) { attribute_name.pop_back(); } if (!attribute_name.empty()) // Add the unvalued attribute to the list node.AddAttribute(std::move(attribute_name), _T(""), 0); // If this is the end of the tag then return to the caller if (token.type == eTokenShortHandClose) return true; // We are now outside the tag status = eOutsideTag; break; // If we found the equals token... // Eg. 'Attribute =' case eTokenEquals: // Indicate that we next need to search for the value // for the attribute attrib = eAttribValue; break; // Errors... case eTokenQuotedText: /* 'Attribute "InvalidAttr"'*/ case eTokenTagStart: /* 'Attribute <' */ case eTokenTagEnd: /* 'Attribute </' */ case eTokenDeclaration: /* 'Attribute <?' */ pXML->error = eXMLErrorUnexpectedToken; return false; default: break; } break; // If we are looking for an attribute value case eAttribValue: // Check what the current token type is switch (token.type) { // If the current type is text or quoted text... // Eg. 'Attribute = "Value"' or 'Attribute = Value' or // 'Attribute = 'Value''. case eTokenText: case eTokenQuotedText: // If we are a declaration element '<?' then we need // to remove extra closing '?' if it exists if (node.IsDeclaration() && (token.pStr[token.length - 1]) == _T('?')) { token.length--; } // Add the valued attribute to the list if (token.type == eTokenQuotedText) { token.pStr++; token.length -= 2; } assert(!attribute_name.empty()); { TCHAR *value = FromXMLString(token.pStr, token.length); if (value == nullptr) { pXML->error = eXMLErrorUnexpectedToken; return false; } node.AddAttribute(std::move(attribute_name), value, _tcslen(value)); free(value); } // Indicate we are searching for a new attribute attrib = eAttribName; break; // Errors... case eTokenTagStart: /* 'Attr = <' */ case eTokenTagEnd: /* 'Attr = </' */ case eTokenCloseTag: /* 'Attr = >' */ case eTokenShortHandClose: /* "Attr = />" */ case eTokenEquals: /* 'Attr = =' */ case eTokenDeclaration: /* 'Attr = <?' */ pXML->error = eXMLErrorUnexpectedToken; return false; default: break; } } } } }