TEST(CSSTokenizerTest, SingleCharacterTokens) { TEST_TOKENS("(", leftParenthesis()); TEST_TOKENS(")", rightParenthesis()); TEST_TOKENS("[", leftBracket()); TEST_TOKENS("]", rightBracket()); TEST_TOKENS(",", comma()); TEST_TOKENS(":", colon()); TEST_TOKENS(";", semicolon()); TEST_TOKENS(")[", rightParenthesis(), leftBracket()); TEST_TOKENS("[)", leftBracket(), rightParenthesis()); TEST_TOKENS("{}", leftBrace(), rightBrace()); TEST_TOKENS(",,", comma(), comma()); }
PrologToken getToken(const std::string& str, unsigned int& idx, const std::string& fileName, int& line, int& col) { top: const char normalOperatorChars[] = "+-*/\\^<>=`~:.?@#&"; if (idx == str.size()) { // End of string return endOfFile(fileName, line, col); } int oldLine = line, oldCol = col; #define STAR_P (str[idx]) #define P_SUB_1 (idx + 1 == str.size() ? 0 : str[idx + 1]) #define INC_INDEX do { \ if (STAR_P == '\n') { \ ++line; \ col = 1; \ } else { \ ++col; \ } \ ++idx; \ } while (false) if (islower(STAR_P)) { // Normal atoms unsigned int oldIdx = idx; do {INC_INDEX;} while (isalnum(STAR_P) || STAR_P == '_'); bool startOfTerm = false; if (STAR_P == '(') {startOfTerm = true; INC_INDEX;} return atom(str.substr(oldIdx, idx - oldIdx - startOfTerm), startOfTerm, fileName, oldLine, oldCol); } else if (STAR_P == '_' && !isalnum(P_SUB_1)) { // Wildcard INC_INDEX; return wildcard(fileName, oldLine, oldCol); } else if (isupper(STAR_P) || STAR_P == '_' || STAR_P == '$') { // Variables unsigned int oldIdx = idx; do {INC_INDEX;} while (isalnum(STAR_P) || STAR_P == '_' || STAR_P == '$'); return variable(str.substr(oldIdx, idx - oldIdx), fileName, oldLine, oldCol); } else if (isdigit(STAR_P) || STAR_P == '-' && isdigit(P_SUB_1)) { // Numbers int isNegative = 1; if (STAR_P == '-') {isNegative = -1; INC_INDEX;} unsigned int oldIdx = idx; do {INC_INDEX;} while (isdigit(STAR_P)); std::string tokenString; if (STAR_P == '.' && isdigit(P_SUB_1)) { // Floating point INC_INDEX; while (isdigit(STAR_P)) INC_INDEX; if (STAR_P == 'e' || STAR_P == 'E') { INC_INDEX; if (STAR_P == '-') INC_INDEX; while (isdigit(STAR_P)) INC_INDEX; } return floatingPoint(isNegative * stringToDouble(str.substr(oldIdx, idx - oldIdx), fileName, oldLine, oldCol), fileName, oldLine, oldCol); } else if (STAR_P == '\'') { // Base or character definition int base = stringToInt(str.substr(oldIdx, idx - oldIdx), 10); if (base == 0 && P_SUB_1) { // Character INC_INDEX; return integer(isNegative * (STAR_P), fileName, oldLine, oldCol); } else if (base >= 2 && base <= 36) { INC_INDEX; oldIdx = idx; while (isalnum(STAR_P)) INC_INDEX; return integer(isNegative * stringToInt(str.substr(oldIdx, idx - oldIdx), base), fileName, oldLine, oldCol); } else { std::cerr << "Invalid number base " << base << std::endl; abort(); } } else { // Normal integer return integer(isNegative * stringToInt(str.substr(oldIdx, idx - oldIdx), 10), fileName, oldLine, oldCol); } } else if (STAR_P == '.' && P_SUB_1 && !strchr(normalOperatorChars, P_SUB_1)) { INC_INDEX; return period(fileName, oldLine, oldCol); } else if (strchr(normalOperatorChars, STAR_P)) { // Operators unsigned int oldIdx = idx; do {INC_INDEX;} while (strchr(normalOperatorChars, STAR_P)); bool startOfTerm = false; if (STAR_P == '(') {startOfTerm = true; INC_INDEX;} return atom(str.substr(oldIdx, idx - oldIdx - startOfTerm), startOfTerm, fileName, oldLine, oldCol); } else if (STAR_P == '!' || STAR_P == ';') { // Single-character operators char c = STAR_P; INC_INDEX; bool startOfTerm = false; if (STAR_P == '(') {startOfTerm = true; INC_INDEX;} return atom(std::string(1, c), startOfTerm, fileName, oldLine, oldCol); } else if (STAR_P == '\'') { // Quoted atoms std::string result; INC_INDEX; unsigned int oldIdx = idx; get_more_chars_atom: while (STAR_P != '\'') INC_INDEX; result += str.substr(oldIdx, idx - oldIdx); if (P_SUB_1 == '\'') { // Escaped single quote result += "'"; INC_INDEX; INC_INDEX; goto get_more_chars_atom; } else { // End of atom INC_INDEX; bool startOfTerm = false; if (STAR_P == '(') {startOfTerm = true; INC_INDEX;} return atom(result, startOfTerm, fileName, oldLine, oldCol); } } else if (STAR_P == '"') { // Strings std::string result; INC_INDEX; unsigned int oldIdx = idx; get_more_chars_string: while (STAR_P && STAR_P != '"' && STAR_P != '\\') INC_INDEX; result += str.substr(oldIdx, idx - oldIdx); if (STAR_P == '"') { if (P_SUB_1 == '"') { // Escaped double quote result += "\""; INC_INDEX; INC_INDEX; oldIdx = idx; goto get_more_chars_string; } else { // End of string INC_INDEX; return stringToken(result, fileName, oldLine, oldCol); } } else if (STAR_P == '\\') { std::cerr << "Cannot handle backslash escapes in strings yet" << std::endl; abort(); } else if (!STAR_P) { std::cerr << "Unterminated string constant" << std::endl; abort(); } } else if (STAR_P == '(') { INC_INDEX; return leftParen(fileName, oldLine, oldCol); } else if (STAR_P == ')') { INC_INDEX; return rightParen(fileName, oldLine, oldCol); } else if (STAR_P == '[') { if (P_SUB_1 == ']') { INC_INDEX; INC_INDEX; bool startOfTerm = false; if (STAR_P == '(') {startOfTerm = true; INC_INDEX;} return atom("[]", startOfTerm, fileName, oldLine, oldCol); } else { INC_INDEX; return leftBracket(fileName, oldLine, oldCol); } } else if (STAR_P == ']') { INC_INDEX; return rightBracket(fileName, oldLine, oldCol); } else if (STAR_P == '|') { INC_INDEX; return verticalBar(fileName, oldLine, oldCol); } else if (STAR_P == ',') { INC_INDEX; return comma(fileName, oldLine, oldCol); } else if (isspace(STAR_P)) { // White space INC_INDEX; goto top; } else if (STAR_P == '%') { // Comment do {INC_INDEX;} while (STAR_P && STAR_P != '\n' && STAR_P != '\r'); if (!STAR_P) { std::cerr << "Unterminated comment" << std::endl; abort(); } goto top; } else { std::cerr << "Bad character '" << STAR_P << "'" << std::endl; abort(); } #undef STAR_P #undef P_SUB_1 #undef INC_INDEX std::cerr << "Should not get here" << std::endl; abort(); return endOfFile(fileName, line, col); // Should not get here }