static bool sReadMLIdentifier(const ZStrimU& s, string& oText) { oText.clear(); UTF32 curCP; if (!s.ReadCP(curCP)) return false; if (!ZUnicode::sIsAlpha(curCP) && curCP != '_' && curCP != '?' && curCP != '!') { s.Unread(); return false; } oText += curCP; for (;;) { if (!s.ReadCP(curCP)) { break; } else if (!ZUnicode::sIsAlphaDigit(curCP) && curCP != '_' && curCP != '-' && curCP != ':') { s.Unread(); break; } else { oText += curCP; } } return true; }
static bool sReadMLAttributeValue(const ZStrimU& s, ZML::Reader::EntityCallback iCallback, void* iRefcon, string& oValue) { oValue.clear(); UTF32 curCP; if (!s.ReadCP(curCP)) return false; if (curCP == '"') { return sReadUntil(s, iCallback, iRefcon, '"', oValue); } else if (curCP == '\'') { return sReadUntil(s, iCallback, iRefcon, '\'', oValue); } else { s.Unread(); ZUtil_Strim::sSkip_WS(s); for (;;) { if (!s.ReadCP(curCP)) { break; } else if (curCP == '>') { s.Unread(); break; } else if (ZUnicode::sIsWhitespace(curCP)) { break; } else if (curCP == '&') { oValue += sReadReference(s, iCallback, iRefcon); } else { oValue += curCP; } } } return true; }
static bool sReadUntil(const ZStrimU& s, ZML::Reader::EntityCallback iCallback, void* iRefcon, UTF32 iTerminator, string& oText) { oText.clear(); for (;;) { UTF32 theCP; if (!s.ReadCP(theCP)) { return false; } else if (theCP == iTerminator) { return true; } else if (theCP == '&') { oText += sReadReference(s, iCallback, iRefcon); } else { oText += theCP; } } }
static bool sAtEnd(ZStrimU& iStrimU) { UTF32 dummy; if (!iStrimU.ReadCP(dummy)) return true; iStrimU.Unread(); return false; }
static bool sReadMLAttributeName(const ZStrimU& s, string& oName) { oName.clear(); UTF32 curCP; if (!s.ReadCP(curCP)) return false; if (curCP == '"') { return sReadUntil(s, '"', oName); } else if (curCP == '\'') { return sReadUntil(s, '\'', oName); } else { if (!ZUnicode::sIsAlpha(curCP) && curCP != '_' && curCP != '?' && curCP != '!') { s.Unread(); return false; } oName += curCP; for (;;) { if (!s.ReadCP(curCP)) break; if (!ZUnicode::sIsAlphaDigit(curCP) && curCP != '_' && curCP != '-' && curCP != ':') { s.Unread(); break; } oName += curCP; } } return true; }
static bool sReadUntil(const ZStrimU& s, UTF32 iTerminator, string& oText) { oText.clear(); for (;;) { UTF32 theCP; if (!s.ReadCP(theCP)) return false; if (theCP == iTerminator) return true; oText += theCP; } }
bool ZUtil_Tuple::sFromStrim(const ZStrimU& iStrimU, ZTuple& oTuple) { using namespace ZUtil_Strim; sSkip_WSAndCPlusPlusComments(iStrimU); UTF32 theCP; if (!iStrimU.ReadCP(theCP)) return false; if (theCP != '{') throw ParseException("Expected '{' to open tuple"); sFromStrim_BodyOfTuple(iStrimU, oTuple); sSkip_WSAndCPlusPlusComments(iStrimU); if (!sTryRead_CP(iStrimU, '}')) throw ParseException("Expected '}' to close tuple"); return true; }
bool ZUtil_Tuple::sFromStrim(const ZStrimU& iStrimU, vector<ZTupleValue>& oVector) { using namespace ZUtil_Strim; sSkip_WSAndCPlusPlusComments(iStrimU); UTF32 theCP; if (!iStrimU.ReadCP(theCP)) return false; if (theCP != '[') throw ParseException("Expected '[' to open vector"); sFromStrim_BodyOfVector(iStrimU, oVector); sSkip_WSAndCPlusPlusComments(iStrimU); if (!sTryRead_CP(iStrimU, ']')) throw ParseException("Expected ']' to close vector"); return true; }
bool ZUtil_Tuple::sRead_Identifier(const ZStrimU& iStrimU, string* oStringLC, string* oStringExact) { bool gotAny = false; for (;;) { UTF32 theCP; if (!iStrimU.ReadCP(theCP)) break; if (!ZUnicode::sIsAlphaDigit(theCP) && theCP != '_') { iStrimU.Unread(); break; } gotAny = true; if (oStringLC) *oStringLC += ZUnicode::sToLower(theCP); if (oStringExact) *oStringExact += theCP; } return gotAny; }
static string sReadReference(const ZStrimU& iStrim, ZML::Reader::EntityCallback iCallback, void* iRefcon) { using namespace ZUtil_Strim; string result; if (sTryRead_CP(iStrim, '#')) { // It's a character reference. int64 theInt; bool gotIt = false; if (sTryRead_CP(iStrim, 'x') || sTryRead_CP(iStrim, 'X')) gotIt = sTryRead_HexInteger(iStrim, theInt); else gotIt = sTryRead_DecimalInteger(iStrim, theInt); if (gotIt && sTryRead_CP(iStrim, ';')) result += UTF32(theInt); } else { string theEntity; for (;;) { UTF32 theCP; if (!iStrim.ReadCP(theCP)) { theEntity.clear(); break; } if (theCP == ';') break; if (ZUnicode::sIsWhitespace(theCP)) { theEntity.clear(); break; } theEntity += theCP; } if (!theEntity.empty()) { if (iCallback) result = iCallback(iRefcon, theEntity); else if (theEntity == "quot") result = "\""; else if (theEntity == "lt") result = "<"; else if (theEntity == "gt") result = ">"; else if (theEntity == "amp") result = "&"; else if (theEntity == "apos") result = "'"; else if (theEntity == "nbsp") result = "\xC2\xA0"; } } return result; }
static void sReadQuotedString_Quote(const ZStrimU& iStrimU, ZTupleValue& oTupleValue) { using namespace ZUtil_Strim; string theString; theString.reserve(100); ZStrimW_String theStrimW(theString); for (;;) { // We've read, and could un-read, a quote mark. if (sTryRead_CP(iStrimU, '"')) { // We've now seen a second quote, abutting the first. if (sTryRead_CP(iStrimU, '"')) { // We have three quotes in a row, which opens a verbatim string. // If the next character is an EOL then absorb it, so the verbatim // text can start on a fresh line, but not be parsed as // beginning with an EOL. UTF32 theCP = iStrimU.ReadCP(); if (!ZUnicode::sIsEOL(theCP)) iStrimU.Unread(); // Now copy everything till we see three quotes in a row again. ZStrimR_Boundary theStrimR_Boundary("\"\"\"", iStrimU); theStrimW.CopyAllFrom(theStrimR_Boundary); if (!theStrimR_Boundary.HitBoundary()) throw ParseException("Expected \"\"\" to close a string"); if (sTryRead_CP(iStrimU, '"')) { // We have another quote, so there were at least four in a row, // which we get with a quote in the text immediately followed // by the triple quote. So emit a quote. theStrimW.WriteCP('"'); if (sTryRead_CP(iStrimU, '"')) { // Same again -- five quotes in a row, which is two content // quotes followed by the closing triple. theStrimW.WriteCP('"'); // This is why it's essential that when using triple quotes // you put whitespace before the opening, and after the closing // triple, so we don't mistake included quotes for ones that // are (say) opening a subsequent regular quoted sequence. } } } else { // We have two quotes in a row, followed by something else, so // we had an empty string segment. } } else { sCopy_EscapedString(iStrimU, '"', theStrimW); if (!sTryRead_CP(iStrimU, '"')) throw ParseException("Expected \" to close a string"); } sSkip_WSAndCPlusPlusComments(iStrimU); if (!sTryRead_CP(iStrimU, '"')) break; } oTupleValue.SetString(theString); }