Beispiel #1
0
// This is really only used for testing
bool tokeniseReservedWord(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
{
    std::string::const_iterator p = s;
    bool r = tokeniseIdentifier(p, e, tok) && tokeniseReservedWord(tok);
    if (r) s = p;
    return r;
}
Beispiel #2
0
bool tokenise(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
{
    std::string::const_iterator t = s;

    // Hand constructed state machine recogniser
    enum {
        START,
        REJECT,
        IDENTIFIER,
        ZERO,
        DIGIT,
        HEXDIGIT_START,
        HEXDIGIT,
        OCTDIGIT,
        BINDIGIT_START,
        BINDIGIT,
        DECIMAL_START,
        DECIMAL,
        EXPONENT_SIGN,
        EXPONENT_START,
        EXPONENT,
        ACCEPT_IDENTIFIER,
        ACCEPT_INC,
        ACCEPT_NOINC
    } state = START;

    TokenType tokType = T_EOS;
    while (true)
    switch (state) {
    case START:
        if (t==e) {tok = Token(T_EOS, s, "<END>"); return true;}
        else if (std::isspace(*t)) {++t; ++s; continue;}
        else switch (*t) {
        case '(': tokType = T_LPAREN; state = ACCEPT_INC; continue;
        case ')': tokType = T_RPAREN; state = ACCEPT_INC; continue;
        case ',': tokType = T_COMMA; state = ACCEPT_INC; continue;
        case '+': tokType = T_PLUS; state = ACCEPT_INC; continue;
        case '-': tokType = T_MINUS; state = ACCEPT_INC; continue;
        case '*': tokType = T_MULT; state = ACCEPT_INC; continue;
        case '/': tokType = T_DIV; state = ACCEPT_INC; continue;
        case '=': tokType = T_EQUAL; state = ACCEPT_INC; continue;
        case '<':
            ++t;
            if (t==e || (*t!='>' && *t!='='))
                {tokType = T_LESS; state = ACCEPT_NOINC; continue; }
            else
                {tokType = (*t=='>') ? T_NEQ : T_LSEQ; state = ACCEPT_INC; continue; }
        case '>':
            ++t;
            if (t==e || *t!='=')
                {tokType = T_GRT; state = ACCEPT_NOINC; continue;}
            else
                {tokType = T_GREQ; state = ACCEPT_INC; continue;}
        default:
            break;
        }
        if (isIdentifierStart(*t)) {++t; state = IDENTIFIER;}
        else if (*t=='\'') {return processString(s, e, '\'', T_STRING, tok);}
        else if (*t=='\"') {return processString(s, e, '\"', T_IDENTIFIER, tok);}
        else if (*t=='0') {++t; state = ZERO;}
        else if (std::isdigit(*t)) {++t; state = DIGIT;}
        else if (*t=='.') {++t; state = DECIMAL_START;}
        else state = REJECT;
        continue;
    case IDENTIFIER:
        if (t==e) {state = ACCEPT_IDENTIFIER;}
        else if (isIdentifierPart(*t)) {++t; state = IDENTIFIER;}
        else state = ACCEPT_IDENTIFIER;
        continue;
    case DECIMAL_START:
        if (t==e) {state = REJECT;}
        else if (std::isdigit(*t)) {++t; state = DECIMAL;}
        else state = REJECT;
        continue;
    case EXPONENT_SIGN:
        if (t==e) {state = REJECT;}
        else if (*t=='-' || *t=='+') {++t; state = EXPONENT_START;}
        else if (std::isdigit(*t)) {++t; state = EXPONENT;}
        else state = REJECT;
        continue;
    case EXPONENT_START:
        if (t==e) {state = REJECT;}
        else if (std::isdigit(*t)) {++t; state = EXPONENT;}
        else state = REJECT;
        continue;
    case ZERO:
        if (t==e) {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;}
        else if (*t=='.') {++t; state = DECIMAL;}
        else if (*t=='x' || *t=='X') {++t; state = HEXDIGIT_START;}
        else if (*t=='b' || *t=='B') {++t; state = BINDIGIT_START;}
        else state = OCTDIGIT;
        continue;
    case HEXDIGIT_START:
        if (t==e) {state = REJECT;}
        else if (std::isxdigit(*t)) {++t; state = HEXDIGIT;}
        else state = REJECT;
        continue;
    case HEXDIGIT:
        if (t==e) {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;}
        else if (*t=='l' || *t=='L') {tokType = T_NUMERIC_EXACT; state = ACCEPT_INC;}
        else if (std::isxdigit(*t) || *t=='_') {++t; state = HEXDIGIT;}
        else if (*t=='p' || *t=='P') {++t; state = EXPONENT_SIGN;}
        else {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;}
        continue;
    case BINDIGIT_START:
        if (t==e) {state = REJECT;}
        else if (*t=='0' || *t=='1') {++t; state = BINDIGIT;}
        else state = REJECT;
        continue;
    case BINDIGIT:
        if (t==e) {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;}
        else if (*t=='l' || *t=='L') {tokType = T_NUMERIC_EXACT; state = ACCEPT_INC;}
        else if (*t=='0' || *t=='1' || *t=='_') {++t; state = BINDIGIT;}
        else {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;}
        continue;
    case OCTDIGIT:
        if (t==e) {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;}
        else if (*t=='l' || *t=='L') {tokType = T_NUMERIC_EXACT; state = ACCEPT_INC;}
        else if ((std::isdigit(*t) && *t<'8') || *t=='_') {++t; state = OCTDIGIT;}
        else {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;}
        continue;
    case DIGIT:
        if (t==e) {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;}
        else if (*t=='l' || *t=='L') {tokType = T_NUMERIC_EXACT; state = ACCEPT_INC;}
        else if (*t=='f' || *t=='F' || *t=='d' || *t=='D') {tokType = T_NUMERIC_APPROX; state = ACCEPT_INC;}
        else if (std::isdigit(*t) || *t=='_') {++t; state = DIGIT;}
        else if (*t=='.') {++t; state = DECIMAL;}
        else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;}
        else {tokType = T_NUMERIC_EXACT; state = ACCEPT_NOINC;}
        continue;
    case DECIMAL:
        if (t==e) {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;}
        else if (std::isdigit(*t) || *t=='_') {++t; state = DECIMAL;}
        else if (*t=='e' || *t=='E') {++t; state = EXPONENT_SIGN;}
        else if (*t=='f' || *t=='F' || *t=='d' || *t=='D') {tokType = T_NUMERIC_APPROX; state = ACCEPT_INC;}
        else {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;}
        continue;
    case EXPONENT:
        if (t==e) {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;}
        else if (std::isdigit(*t)) {++t; state = EXPONENT;}
        else if (*t=='f' || *t=='F' || *t=='d' || *t=='D') {tokType = T_NUMERIC_APPROX; state = ACCEPT_INC;}
        else {tokType = T_NUMERIC_APPROX; state = ACCEPT_NOINC;}
        continue;
    case ACCEPT_INC:
        ++t;
    case ACCEPT_NOINC:
        tok = Token(tokType, s, t);
        s = t;
        return true;
    case ACCEPT_IDENTIFIER:
        tok = Token(T_IDENTIFIER, s, t);
        s = t;
        tokeniseReservedWord(tok);
        return true;
    case REJECT:
        return false;
    };
}
Beispiel #3
0
bool tokeniseIdentifierOrReservedWord(std::string::const_iterator& s, std::string::const_iterator& e, Token& tok)
{
    bool r = tokeniseIdentifier(s, e, tok);
    if (r) (void) tokeniseReservedWord(tok);
    return r;
}