//////////////////////////////////////////////////////////////////////////////// // Lexer::Type::dom // [ <isUUID> | <isDigit>+ . ] <isIdentifier> [ . <isIdentifier> ]* bool Lexer::isDOM (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; std::string extractedToken; Lexer::Type extractedType; if (isUUID (extractedToken, extractedType)) { if (_text[_cursor] == '.') ++_cursor; else { _cursor = marker; return false; } } else { if (isDigit (_text[_cursor])) { ++_cursor; while (isDigit (_text[_cursor])) ++_cursor; if (_text[_cursor] == '.') ++_cursor; else { _cursor = marker; return false; } } } if (! isOperator (extractedToken, extractedType) && isIdentifier (extractedToken, extractedType)) { while (1) { if (_text[_cursor] == '.') ++_cursor; else break; if (isOperator (extractedToken, extractedType) || ! isIdentifier (extractedToken, extractedType)) { _cursor = marker; return false; } } type = Lexer::Type::dom; token = _text.substr (marker, _cursor - marker); return true; } _cursor = marker; return false; }
//////////////////////////////////////////////////////////////////////////////// // When a Lexer object is constructed with a string, this method walks through // the stream of low-level tokens. bool Lexer::token (std::string& token, Lexer::Type& type) { // Eat white space. while (isWhitespace (_text[_cursor])) utf8_next_char (_text, _cursor); // Terminate at EOS. if (isEOS ()) return false; // The sequence is specific, and must follow these rules: // - date < duration < uuid < identifier // - dom < uuid // - uuid < hex < number // - url < pair < identifier // - hex < number // - separator < tag < operator // - path < substitution < pattern // - set < number // - word last if (isString (token, type, "'\"") || isDate (token, type) || isDuration (token, type) || isURL (token, type) || isPair (token, type) || isUUID (token, type, true) || isSet (token, type) || isDOM (token, type) || isHexNumber (token, type) || isNumber (token, type) || isSeparator (token, type) || isTag (token, type) || isPath (token, type) || isSubstitution (token, type) || isPattern (token, type) || isOperator (token, type) || isIdentifier (token, type) || isWord (token, type)) return true; return false; }
//////////////////////////////////////////////////////////////////////////////// // Lexer::Type::dom // [ <isUUID> | <isDigit>+ . ] <isIdentifier> [ . <isIdentifier> ]* // // Configuration: // rc.<name> // // System: // context.program // context.args // context.width // context.height // system.version // system.os // // Relative or absolute attribute: // <attribute> // <id>.<attribute> // <uuid>.<attribute> // // Single tag: // tags.<word> // // Date type: // <date>.year // <date>.month // <date>.day // <date>.week // <date>.weekday // <date>.julian // <date>.hour // <date>.minute // <date>.second // // Annotations (entry is a date): // annotations.<N>.entry // annotations.<N>.description // bool Lexer::isDOM (std::string& token, Lexer::Type& type) { std::size_t marker = _cursor; std::string partialToken; Lexer::Type partialType; if (isLiteral ("rc.", false, false) && isWord (partialToken, partialType)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } else _cursor = marker; if (isOneOf ({"context.program", "context.args", "context.width", "context.height", "system.version", "system.os"}, false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } // Optional: // <uuid>. // <id>. std::string extractedToken; Lexer::Type extractedType; if (isUUID (extractedToken, extractedType, false) || isInteger (extractedToken, extractedType)) { if (! isLiteral (".", false, false)) { _cursor = marker; return false; } } // Any failure after this line should rollback to the checkpoint. std::size_t checkpoint = _cursor; // [prefix]tags.<word> if (isLiteral ("tags", false, false) && isLiteral (".", false, false) && isWord (partialToken, partialType)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } else _cursor = checkpoint; // [prefix]attribute if (isOneOf (attributes, false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } // [prefix]attribute. if (isOneOf (attributes, false, false)) { if (isLiteral (".", false, false)) { std::string attribute = _text.substr (checkpoint, _cursor - checkpoint - 1); // if attribute type is 'date', then it has sub-elements. if (attributes[attribute] == "date" && isOneOf ({"year", "month", "day", "week", "weekday", "julian", "hour", "minute", "second"}, false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } } else { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } } // [prefix]annotations. if (isLiteral ("annotations", true, false) && isLiteral (".", false, false)) { std::string extractedToken; Lexer::Type extractedType; if (isInteger (extractedToken, extractedType)) { if (isLiteral (".", false, false)) { if (isLiteral ("description", false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } else if (isLiteral ("entry", false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } else if (isLiteral ("entry", false, false) && isLiteral (".", false, false) && isOneOf ({"year", "month", "day", "week", "weekday", "julian", "hour", "minute", "second"}, false, true)) { token = _text.substr (marker, _cursor - marker); type = Lexer::Type::dom; return true; } } } } _cursor = marker; return false; }