// We expect: // whitespace | attribute name | = | attribute value // where attribute value can be quoted AttrInfo *HtmlToken::NextAttr() { // start after the last attribute found (or the beginning) const char *curr = nextAttr; if (!curr) curr = s + nLen; const char *end = s + sLen; // parse attribute name SkipWs(curr, end); if (curr == end) { NoNextAttr: nextAttr = NULL; return NULL; } attrInfo.name = curr; SkipName(curr, end); attrInfo.nameLen = curr - attrInfo.name; if (0 == attrInfo.nameLen) goto NoNextAttr; SkipWs(curr, end); if ((curr == end) || ('=' != *curr)) { // attributes without values get their names as value in HTML attrInfo.val = attrInfo.name; attrInfo.valLen = attrInfo.nameLen; nextAttr = curr; return &attrInfo; } // parse attribute value ++curr; // skip '=' SkipWs(curr, end); if (curr == end) { // attribute with implicit empty value attrInfo.val = curr; attrInfo.valLen = 0; } else if (('\'' == *curr) || ('\"' == *curr)) { // attribute with quoted value ++curr; attrInfo.val = curr; if (!SkipUntil(curr, end, *(curr - 1))) goto NoNextAttr; attrInfo.valLen = curr - attrInfo.val; ++curr; } else { attrInfo.val = curr; SkipNonWs(curr, end); attrInfo.valLen = curr - attrInfo.val; } nextAttr = curr; return &attrInfo; }
static char *SkipWsAndComments(char *s) { do { s = SkipWs(s); if ('#' == *s || ';' == *s) { // skip entire comment line for (; *s && *s != '\n'; s++); } } while (str::IsWs(*s)); return s; }
void HtmlFormatter::HandleText(const char* s, size_t sLen) { const char* curr = s; const char* end = s + sLen; if (preFormatted) { // don't collapse whitespace and respect text newlines while (curr < end) { const char* text = curr; currReparseIdx = curr - htmlParser->Start(); // skip to the next newline for (; curr < end && *curr != '\n'; curr++) ; if (curr < end && curr > text && *(curr - 1) == '\r') curr--; EmitTextRun(text, curr); if ('\n' == *curr || '\r' == *curr) { curr += '\r' == *curr ? 2 : 1; HandleTagBr(); } } return; } // break text into runs i.e. chunks that are either all // whitespace or all non-whitespace while (curr < end) { // collapse multiple, consecutive white-spaces into a single space currReparseIdx = curr - htmlParser->Start(); bool skipped = SkipWs(curr, end); if (skipped) EmitElasticSpace(); const char* text = curr; currReparseIdx = curr - htmlParser->Start(); skipped = SkipNonWs(curr, end); if (skipped) EmitTextRun(text, curr); } }
static SquareTreeNode *ParseSquareTreeRec(char *& data, bool isTopLevel=false) { SquareTreeNode *node = new SquareTreeNode(); while (*(data = SkipWsAndComments(data))) { // all non-empty non-comment lines contain a key-value pair // where the value is either a string (separated by '=' or ':') // or a list of child nodes (if the key is followed by '[' alone) char *key = data; for (data = key; *data && *data != '=' && *data != ':' && *data != '[' && *data != ']' && *data != '\n'; data++); if (!*data || '\n' == *data) { // use first whitespace as a fallback separator for (data = key; *data && !str::IsWs(*data); data++); } char *separator = data; if (*data && *data != '\n') { // skip to the first non-whitespace character on the same line (value) data = SkipWs(data + 1, true); } char *value = data; // skip to the end of the line for (; *data && *data != '\n'; data++); if (IsBracketLine(separator) || // also tolerate "key \n [ \n ... \n ]" (else the key // gets an empty value and the child node an empty key) str::IsWs(*separator) && '\n' == *value && IsBracketLine(SkipWsAndComments(data))) { // parse child node(s) data = SkipWsAndComments(separator) + 1; *SkipWsRev(key, separator) = '\0'; node->data.Append(SquareTreeNode::DataItem(key, ParseSquareTreeRec(data))); // arrays are created by either reusing the same key for a different child // or by concatenating multiple children ("[ \n ] [ \n ] [ \n ]") while (IsBracketLine((data = SkipWsAndComments(data)))) { data++; node->data.Append(SquareTreeNode::DataItem(key, ParseSquareTreeRec(data))); } } else if (']' == *key) { // finish parsing child node data = key + 1; if (!isTopLevel) return node; // ignore superfluous closing square brackets instead of // ignoring all content following them } else if ('[' == *key && ']' == SkipWsRev(value, data)[-1]) { // treat INI section headers as top-level node names // (else "[Section]" would be ignored) if (!isTopLevel) { data = key; return node; } // trim whitespace around section name (for consistency with GetPrivateProfileString) key = SkipWs(key + 1); *SkipWsRev(key, SkipWsRev(value, data) - 1) = '\0'; node->data.Append(SquareTreeNode::DataItem(key, ParseSquareTreeRec(data))); } else if ('[' == *separator || ']' == *separator) { // invalid line (ignored) } else { // string value (decoding is left to the consumer) bool hasMoreLines = '\n' == *data; *SkipWsRev(key, separator) = '\0'; *SkipWsRev(value, data) = '\0'; node->data.Append(SquareTreeNode::DataItem(key, value)); if (hasMoreLines) data++; } } // assume that all square brackets have been properly balanced return node; }
// return true if s consists only of whitespace bool IsSpaceOnly(const char *s, const char *end) { SkipWs(s, end); return s == end; }