TEST(CSSTokenizerTest, SingleCharacterTokens)
{
    TEST_TOKENS("(", leftParenthesis());
    TEST_TOKENS(")", rightParenthesis());
    TEST_TOKENS("[", leftBracket());
    TEST_TOKENS("]", rightBracket());
    TEST_TOKENS(",", comma());
    TEST_TOKENS(":", colon());
    TEST_TOKENS(";", semicolon());
    TEST_TOKENS(")[", rightParenthesis(), leftBracket());
    TEST_TOKENS("[)", leftBracket(), rightParenthesis());
    TEST_TOKENS("{}", leftBrace(), rightBrace());
    TEST_TOKENS(",,", comma(), comma());
}
Exemplo n.º 2
0
PrologToken getToken(const std::string& str, unsigned int& idx,
                     const std::string& fileName, int& line, int& col) {
top:
  const char normalOperatorChars[] = "+-*/\\^<>=`~:.?@#&";
  if (idx == str.size()) { // End of string
    return endOfFile(fileName, line, col);
  }
  int oldLine = line, oldCol = col;
#define STAR_P (str[idx])
#define P_SUB_1 (idx + 1 == str.size() ? 0 : str[idx + 1])
#define INC_INDEX do { \
                    if (STAR_P == '\n') { \
                      ++line; \
                      col = 1; \
                    } else { \
                      ++col; \
                    } \
                    ++idx; \
                  } while (false)
  if (islower(STAR_P)) { // Normal atoms
    unsigned int oldIdx = idx;
    do {INC_INDEX;} while (isalnum(STAR_P) || STAR_P == '_');
    bool startOfTerm = false;
    if (STAR_P == '(') {startOfTerm = true; INC_INDEX;}
    return atom(str.substr(oldIdx, idx - oldIdx - startOfTerm), startOfTerm,
                fileName, oldLine, oldCol);
  } else if (STAR_P == '_' && !isalnum(P_SUB_1)) { // Wildcard
    INC_INDEX;
    return wildcard(fileName, oldLine, oldCol);
  } else if (isupper(STAR_P) || STAR_P == '_' || STAR_P == '$') { // Variables
    unsigned int oldIdx = idx;
    do {INC_INDEX;} while (isalnum(STAR_P) || STAR_P == '_' || STAR_P == '$');
    return variable(str.substr(oldIdx, idx - oldIdx), fileName, oldLine, oldCol);
  } else if (isdigit(STAR_P) || STAR_P == '-' && isdigit(P_SUB_1)) { // Numbers
    int isNegative = 1;
    if (STAR_P == '-') {isNegative = -1; INC_INDEX;}
    unsigned int oldIdx = idx;
    do {INC_INDEX;} while (isdigit(STAR_P));
    std::string tokenString;
    if (STAR_P == '.' && isdigit(P_SUB_1)) { // Floating point
      INC_INDEX;
      while (isdigit(STAR_P)) INC_INDEX;
      if (STAR_P == 'e' || STAR_P == 'E') {
        INC_INDEX;
        if (STAR_P == '-') INC_INDEX;
        while (isdigit(STAR_P)) INC_INDEX;
      }
      return floatingPoint(isNegative *
                           stringToDouble(str.substr(oldIdx, idx - oldIdx),
                                          fileName, oldLine, oldCol),
                           fileName, oldLine, oldCol);
    } else if (STAR_P == '\'') { // Base or character definition
      int base = stringToInt(str.substr(oldIdx, idx - oldIdx), 10);
      if (base == 0 && P_SUB_1) { // Character
        INC_INDEX;
        return integer(isNegative * (STAR_P), fileName, oldLine, oldCol);
      } else if (base >= 2 && base <= 36) {
        INC_INDEX;
        oldIdx = idx;
        while (isalnum(STAR_P)) INC_INDEX;
        return integer(isNegative *
                       stringToInt(str.substr(oldIdx, idx - oldIdx), base),
                       fileName, oldLine, oldCol);
      } else {
        std::cerr << "Invalid number base " << base << std::endl;
        abort();
      }
    } else { // Normal integer
      return integer(isNegative *
                     stringToInt(str.substr(oldIdx, idx - oldIdx), 10),
                     fileName, oldLine, oldCol);
    }
  } else if (STAR_P == '.' && P_SUB_1 && !strchr(normalOperatorChars, P_SUB_1)) {
    INC_INDEX;
    return period(fileName, oldLine, oldCol);
  } else if (strchr(normalOperatorChars, STAR_P)) { // Operators
    unsigned int oldIdx = idx;
    do {INC_INDEX;} while (strchr(normalOperatorChars, STAR_P));
    bool startOfTerm = false;
    if (STAR_P == '(') {startOfTerm = true; INC_INDEX;}
    return atom(str.substr(oldIdx, idx - oldIdx - startOfTerm), startOfTerm,
                fileName, oldLine, oldCol);
  } else if (STAR_P == '!' || STAR_P == ';') { // Single-character operators
    char c = STAR_P;
    INC_INDEX;
    bool startOfTerm = false;
    if (STAR_P == '(') {startOfTerm = true; INC_INDEX;}
    return atom(std::string(1, c), startOfTerm, fileName, oldLine, oldCol);
  } else if (STAR_P == '\'') { // Quoted atoms
    std::string result;
    INC_INDEX;
    unsigned int oldIdx = idx;
get_more_chars_atom:
    while (STAR_P != '\'') INC_INDEX;
    result += str.substr(oldIdx, idx - oldIdx);
    if (P_SUB_1 == '\'') { // Escaped single quote
      result += "'";
      INC_INDEX; INC_INDEX;
      goto get_more_chars_atom;
    } else { // End of atom
      INC_INDEX;
      bool startOfTerm = false;
      if (STAR_P == '(') {startOfTerm = true; INC_INDEX;}
      return atom(result, startOfTerm, fileName, oldLine, oldCol);
    }
  } else if (STAR_P == '"') { // Strings
    std::string result;
    INC_INDEX;
    unsigned int oldIdx = idx;
get_more_chars_string:
    while (STAR_P && STAR_P != '"' && STAR_P != '\\') INC_INDEX;
    result += str.substr(oldIdx, idx - oldIdx);
    if (STAR_P == '"') {
      if (P_SUB_1 == '"') { // Escaped double quote
        result += "\"";
        INC_INDEX; INC_INDEX;
        oldIdx = idx;
        goto get_more_chars_string;
      } else { // End of string
        INC_INDEX;
        return stringToken(result, fileName, oldLine, oldCol);
      }
    } else if (STAR_P == '\\') {
      std::cerr << "Cannot handle backslash escapes in strings yet" << std::endl;
      abort();
    } else if (!STAR_P) {
      std::cerr << "Unterminated string constant" << std::endl;
      abort();
    }
  } else if (STAR_P == '(') {
    INC_INDEX;
    return leftParen(fileName, oldLine, oldCol);
  } else if (STAR_P == ')') {
    INC_INDEX;
    return rightParen(fileName, oldLine, oldCol);
  } else if (STAR_P == '[') {
    if (P_SUB_1 == ']') {
      INC_INDEX; INC_INDEX;
      bool startOfTerm = false;
      if (STAR_P == '(') {startOfTerm = true; INC_INDEX;}
      return atom("[]", startOfTerm, fileName, oldLine, oldCol);
    } else {
      INC_INDEX;
      return leftBracket(fileName, oldLine, oldCol);
    }
  } else if (STAR_P == ']') {
    INC_INDEX;
    return rightBracket(fileName, oldLine, oldCol);
  } else if (STAR_P == '|') {
    INC_INDEX;
    return verticalBar(fileName, oldLine, oldCol);
  } else if (STAR_P == ',') {
    INC_INDEX;
    return comma(fileName, oldLine, oldCol);
  } else if (isspace(STAR_P)) { // White space
    INC_INDEX;
    goto top;
  } else if (STAR_P == '%') { // Comment
    do {INC_INDEX;} while (STAR_P && STAR_P != '\n' && STAR_P != '\r');
    if (!STAR_P) {
      std::cerr << "Unterminated comment" << std::endl;
      abort();
    }
    goto top;
  } else {
    std::cerr << "Bad character '" << STAR_P << "'" << std::endl;
    abort();
  }
#undef STAR_P
#undef P_SUB_1
#undef INC_INDEX
  std::cerr << "Should not get here" << std::endl;
  abort();
  return endOfFile(fileName, line, col); // Should not get here
}