PUBLIC int ecGetToken(EcCompiler *cp) { EcToken *tp; EcStream *stream; int c; if ((tp = getLexToken(cp)) == NULL) { return T_ERR; } if (tp->tokenId) { return tp->tokenId; } stream = cp->stream; while (1) { c = getNextChar(stream); /* Overloadable operators + - ~ * / % < > <= >= == << >> >>> & | === != !== TODO FUTURE, we could allow also: ".", "[", "(" and unary !, ^ */ switch (c) { default: if (isdigit((uchar) c)) { return makeNumberToken(cp, tp, c); } else if (c == '\\') { c = getNextChar(stream); if (c == '\n') { break; } putBackChar(stream, c); c = '\n'; } if (isalpha((uchar) c) || c == '_' || c == '\\' || c == '$') { return makeAlphaToken(cp, tp, c); } return makeToken(tp, 0, T_ERR, 0); case -1: return makeToken(tp, 0, T_ERR, 0); case 0: if (stream->flags & EC_STREAM_EOL) { return makeToken(tp, 0, T_NOP, 0); } return makeToken(tp, 0, T_EOF, 0); case ' ': case '\f': case '\t': case '\v': case 0xA0: /* No break space */ break; case '\r': case '\n': break; case '"': case '\'': return makeQuotedToken(cp, tp, c); case '#': return makeToken(tp, c, T_HASH, 0); case '[': // EJS extension to consider this an operator return makeToken(tp, c, T_LBRACKET, G_OPERATOR); case ']': return makeToken(tp, c, T_RBRACKET, 0); case '(': // EJS extension to consider this an operator return makeToken(tp, c, T_LPAREN, G_OPERATOR); case ')': return makeToken(tp, c, T_RPAREN, 0); case '{': return makeToken(tp, c, T_LBRACE, 0); case '}': return makeToken(tp, c, T_RBRACE, 0); case '@': return makeToken(tp, c, T_AT, 0); case ';': return makeToken(tp, c, T_SEMICOLON, 0); case ',': return makeToken(tp, c, T_COMMA, 0); case '?': return makeToken(tp, c, T_QUERY, 0); case '~': return makeToken(tp, c, T_TILDE, G_OPERATOR); case '+': c = getNextChar(stream); if (c == '+') { addCharToToken(tp, '+'); return makeToken(tp, c, T_PLUS_PLUS, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '+'); return makeSubToken(tp, c, T_ASSIGN, T_PLUS_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '+', T_PLUS, G_OPERATOR); case '-': c = getNextChar(stream); if (isdigit((uchar) c)) { putBackChar(stream, c); return makeToken(tp, '-', T_MINUS, G_OPERATOR); } else if (c == '-') { addCharToToken(tp, '-'); return makeToken(tp, c, T_MINUS_MINUS, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '-'); return makeSubToken(tp, c, T_ASSIGN, T_MINUS_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '-', T_MINUS, G_OPERATOR); case '*': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '*'); return makeSubToken(tp, c, T_ASSIGN, T_MUL_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '*', T_MUL, G_OPERATOR); case '/': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '/'); return makeSubToken(tp, c, T_ASSIGN, T_DIV_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } else if (c == '>') { addCharToToken(tp, '/'); return makeToken(tp, c, T_SLASH_GT, G_OPERATOR); } else if (c == '*' || c == '/') { /* C and C++ comments */ if (getComment(cp, tp, c) < 0) { return tp->tokenId; } /* Doc comments are: [slash]**. The second "*' becomes the first char of the comment. Don't regard: [slash]*** (three stars) as a comment. */ if (cp->doc) { if (tp->text && tp->text[0] == '*' && tp->text[1] != '*') { cp->docToken = mprMemdup(tp->text, tp->length * sizeof(wchar)); } } initializeToken(tp, stream); break; } putBackChar(stream, c); return makeToken(tp, '/', T_DIV, G_OPERATOR); case '%': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '%'); return makeSubToken(tp, c, T_ASSIGN, T_MOD_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '%', T_MOD, G_OPERATOR); case '.': c = getNextChar(stream); if (c == '.') { c = getNextChar(stream); if (c == '.') { addStringToToken(tp, ".."); return makeToken(tp, c, T_ELIPSIS, 0); } putBackChar(stream, c); addCharToToken(tp, '.'); return makeToken(tp, '.', T_DOT_DOT, 0); #if FUTURE } else if (c == '<') { addCharToToken(tp, '.'); return makeToken(tp, c, T_DOT_LESS, 0); #endif } else if (isdigit((uchar) c)) { putBackChar(stream, c); return makeNumberToken(cp, tp, '.'); } putBackChar(stream, c); // EJS extension to consider this an operator return makeToken(tp, '.', T_DOT, G_OPERATOR); case ':': c = getNextChar(stream); if (c == ':') { addCharToToken(tp, ':'); return makeToken(tp, c, T_COLON_COLON, 0); } putBackChar(stream, c); return makeToken(tp, ':', T_COLON, 0); case '!': c = getNextChar(stream); if (c == '=') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "!="); return makeToken(tp, c, T_STRICT_NE, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '!'); return makeToken(tp, '=', T_NE, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '!', T_LOGICAL_NOT, G_OPERATOR); case '&': c = getNextChar(stream); if (c == '&') { addCharToToken(tp, '&'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '&'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_AND_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '&', T_LOGICAL_AND, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '&'); return makeSubToken(tp, c, T_ASSIGN, T_BIT_AND_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '&', T_BIT_AND, G_OPERATOR); case '<': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '<'); return makeToken(tp, c, T_LE, G_OPERATOR); } else if (c == '<') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "<<"); return makeSubToken(tp, c, T_ASSIGN, T_LSH_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); addCharToToken(tp, '<'); return makeToken(tp, c, T_LSH, G_OPERATOR); } else if (c == '/') { addCharToToken(tp, '<'); return makeToken(tp, c, T_LT_SLASH, 0); } putBackChar(stream, c); return makeToken(tp, '<', T_LT, G_OPERATOR); case '=': c = getNextChar(stream); if (c == '=') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "=="); return makeToken(tp, c, T_STRICT_EQ, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '='); return makeToken(tp, c, T_EQ, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '=', T_ASSIGN, G_OPERATOR); case '>': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '<'); return makeToken(tp, c, T_GE, G_OPERATOR); } else if (c == '>') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, ">>"); return makeSubToken(tp, c, T_ASSIGN, T_RSH_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } else if (c == '>') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, ">>>"); return makeSubToken(tp, c, T_ASSIGN, T_RSH_ZERO_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); addStringToToken(tp, ">>"); return makeToken(tp, '>', T_RSH_ZERO, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '>'); return makeToken(tp, '>', T_RSH, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '>', T_GT, G_OPERATOR); case '^': c = getNextChar(stream); if (c == '^') { addCharToToken(tp, '^'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '^'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_XOR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '^', T_LOGICAL_XOR, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '^'); return makeSubToken(tp, '=', T_ASSIGN, T_BIT_XOR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '^', T_BIT_XOR, G_OPERATOR); case '|': c = getNextChar(stream); if (c == '|') { addCharToToken(tp, '|'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '|'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_OR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '|', T_LOGICAL_OR, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '|'); return makeSubToken(tp, '=', T_ASSIGN, T_BIT_OR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '|', T_BIT_OR, G_OPERATOR); } } }
int ecGetToken(EcInput *input) { EcToken *token, *tp; EcStream *stream; int c; // TODO - functionalize this section token = input->token; if ((tp = input->putBack) != 0) { input->putBack = tp->next; input->token = tp; /* * Move any old token to free list */ if (token) { token->next = input->freeTokens; input->freeTokens = token; } return tp->tokenId; } if (token == 0) { // TBD -- need an API for this input->token = mprAllocObjZeroed(input, EcToken); if (input->token == 0) { // TBD -- err code return -1; } input->token->lineNumber = 1; } stream = input->stream; tp = input->token; mprAssert(tp); initializeToken(tp, stream); while (1) { c = getNextChar(stream); /* * Overloadable operators * * + - ~ * / % < > <= >= == << >> >>> & | === != !== * * TODO FUTURE, we could allow also: ".", "[", "(" * * TODO: what about unary !, ^ */ switch (c) { default: number: if (isdigit(c)) { return getNumberToken(input, tp, c); } else if (c == '\\') { c = getNextChar(stream); if (c == '\n') { break; } putBackChar(stream, c); c = '\n'; } if (isalpha(c) || c == '_' || c == '\\' || c == '$') { return getAlphaToken(input, tp, c); } return makeToken(tp, 0, T_ERR, 0); case -1: return makeToken(tp, 0, T_ERR, 0); case 0: if (stream->flags & EC_STREAM_EOL) { return makeToken(tp, 0, T_NOP, 0); } else { return makeToken(tp, 0, T_EOF, 0); } case ' ': case '\t': break; case '\r': case '\n': if (tp->textLen == 0 && tp->lineNumber != stream->lineNumber) { tp->currentLine = 0; } break; case '"': case '\'': return getQuotedToken(input, tp, c); case '#': return makeToken(tp, c, T_HASH, 0); case '[': // EJS extension to consider this an operator return makeToken(tp, c, T_LBRACKET, G_OPERATOR); case ']': return makeToken(tp, c, T_RBRACKET, 0); case '(': // EJS extension to consider this an operator return makeToken(tp, c, T_LPAREN, G_OPERATOR); case ')': return makeToken(tp, c, T_RPAREN, 0); case '{': return makeToken(tp, c, T_LBRACE, 0); case '}': return makeToken(tp, c, T_RBRACE, 0); case '@': return makeToken(tp, c, T_AT, 0); case ';': return makeToken(tp, c, T_SEMICOLON, 0); case ',': return makeToken(tp, c, T_COMMA, 0); case '?': return makeToken(tp, c, T_QUERY, 0); case '~': return makeToken(tp, c, T_TILDE, G_OPERATOR); case '+': c = getNextChar(stream); if (c == '+') { addCharToToken(tp, '+'); return makeToken(tp, c, T_PLUS_PLUS, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '+'); return makeSubToken(tp, c, T_ASSIGN, T_PLUS_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '+', T_PLUS, G_OPERATOR); case '-': c = getNextChar(stream); if (isdigit(c)) { putBackChar(stream, c); return makeToken(tp, '-', T_MINUS, G_OPERATOR); } else if (c == '-') { addCharToToken(tp, '-'); return makeToken(tp, c, T_MINUS_MINUS, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '-'); return makeSubToken(tp, c, T_ASSIGN, T_MINUS_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '-', T_MINUS, G_OPERATOR); case '*': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '*'); return makeSubToken(tp, c, T_ASSIGN, T_MUL_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '*', T_MUL, G_OPERATOR); case '/': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '/'); return makeSubToken(tp, c, T_ASSIGN, T_DIV_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } else if (c == '>') { addCharToToken(tp, '/'); return makeToken(tp, c, T_SLASH_GT, G_OPERATOR); } else if (c == '*' || c == '/') { /* * C and C++ comments */ if (getComment(input, tp, c) < 0) { return tp->tokenId; } #if BLD_FEATURE_EJS_DOC if (tp->text && tp->text[0] == '*') { mprFree(input->doc); input->doc = mprStrdup(input, (char*) tp->text); } #endif initializeToken(tp, stream); break; } putBackChar(stream, c); return makeToken(tp, '/', T_DIV, G_OPERATOR); case '%': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '%'); return makeSubToken(tp, c, T_ASSIGN, T_MOD_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '%', T_MOD, G_OPERATOR); case '.': c = getNextChar(stream); if (c == '.') { c = getNextChar(stream); if (c == '.') { addStringToToken(tp, ".."); return makeToken(tp, c, T_ELIPSIS, 0); } putBackChar(stream, c); addCharToToken(tp, '.'); return makeToken(tp, '.', T_DOT_DOT, 0); } else if (c == '<') { addCharToToken(tp, '.'); return makeToken(tp, c, T_DOT_LESS, 0); } else if (isdigit(c)) { putBackChar(stream, c); goto number; } putBackChar(stream, c); // EJS extension to consider this an operator return makeToken(tp, '.', T_DOT, G_OPERATOR); case ':': c = getNextChar(stream); if (c == ':') { addCharToToken(tp, ':'); return makeToken(tp, c, T_COLON_COLON, 0); } putBackChar(stream, c); return makeToken(tp, ':', T_COLON, 0); case '!': c = getNextChar(stream); if (c == '=') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "!="); return makeToken(tp, c, T_STRICT_NE, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '!'); return makeToken(tp, '=', T_NE, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '!', T_LOGICAL_NOT, G_OPERATOR); #if UNUSED case '~': c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "~="); return makeSubToken(tp, c, T_ASSIGN, T_BIT_NEG_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '~', T_BIT_NEG, G_OPERATOR); #endif case '&': c = getNextChar(stream); if (c == '&') { addCharToToken(tp, '&'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '&'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_AND_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '&', T_LOGICAL_AND, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '&'); return makeSubToken(tp, c, T_ASSIGN, T_BIT_AND_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '&', T_BIT_AND, G_OPERATOR); case '<': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '<'); return makeToken(tp, c, T_LE, G_OPERATOR); } else if (c == '<') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "<<"); return makeSubToken(tp, c, T_ASSIGN, T_LSH_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); addCharToToken(tp, '<'); return makeToken(tp, c, T_LSH, G_OPERATOR); } else if (c == '/') { addCharToToken(tp, '<'); return makeToken(tp, c, T_LT_SLASH, 0); } putBackChar(stream, c); return makeToken(tp, '<', T_LT, G_OPERATOR); case '=': c = getNextChar(stream); if (c == '=') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "=="); return makeToken(tp, c, T_STRICT_EQ, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '='); return makeToken(tp, c, T_EQ, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '=', T_ASSIGN, G_OPERATOR); case '>': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '<'); return makeToken(tp, c, T_GE, G_OPERATOR); } else if (c == '>') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, ">>"); return makeSubToken(tp, c, T_ASSIGN, T_RSH_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } else if (c == '>') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, ">>>"); return makeSubToken(tp, c, T_ASSIGN, T_RSH_ZERO_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); addStringToToken(tp, ">>"); return makeToken(tp, '>', T_RSH_ZERO, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '>'); return makeToken(tp, '>', T_RSH, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '>', T_GT, G_OPERATOR); case '^': c = getNextChar(stream); if (c == '^') { addCharToToken(tp, '^'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '^'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_XOR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '^', T_LOGICAL_XOR, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '^'); return makeSubToken(tp, '=', T_ASSIGN, T_BIT_XOR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '^', T_BIT_XOR, G_OPERATOR); case '|': c = getNextChar(stream); if (c == '|') { addCharToToken(tp, '|'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '|'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_OR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '|', T_LOGICAL_OR, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '|'); return makeSubToken(tp, '=', T_ASSIGN, T_BIT_OR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '|', T_BIT_OR, G_OPERATOR); } } }