// Scans a line of tokens using yylex token * BlarbVM_scanLine(BlarbVM *vm) { BlarbVM_WORD tokenCount = 0; // 1024 tokens *should* be way more than enough. token *line = malloc(sizeof(token) * 1024); token_t tokenType; int label_call_present = 0; int newline_present = 0; while ((tokenType = yylex())) { token *t = &line[tokenCount++]; // In general, we just care about the type. t->type = tokenType; if (tokenType == NEWLINE) { newline_present = 1; break; // No tokens (except newlines) are allowed after a function call } else if (label_call_present) { fprintf(stderr, "Parse error: " "No tokens are allowed after calls to labels.\n" "Error on line %d in %s\n", yylineno, yyfilename); exit(1); } else if (tokenType == INTEGER) { t->val = strtoull(yytext, 0, 10); } else if (tokenType == LABEL_CALL) { addStringToToken(t, yytext, strlen(yytext)); label_call_present = 1; } else if (tokenType == LABEL) { // Exclude the hash addStringToToken(t, yytext + 1, strlen(yytext) - 1); BlarbVM_addLabelPointer(vm, t->str, vm->lineCount); } else if (tokenType == STR) { addStringLiteralToToken(t, yytext); } else if (tokenType == CHR) { t->type = INTEGER; char *s = yytext + 1; t->val = parseChar(&s); } } // If this is the EOF (no more tokens) if (tokenCount == 0) { free(line); return NULL; } // In case we hit an EOF on a valid line (we use newlines for terminators) if ( ! newline_present) { token *t = &line[tokenCount++]; t->type = NEWLINE; } line = BlarbVM_optimizeLine(line, &tokenCount); // Resize the memory chunk so we don't waste memory on small lines return realloc(line, sizeof(token) * tokenCount); }
static int addFormattedStringToToken(EcToken *tp, char *fmt, ...) { va_list args; char *buf; va_start(args, fmt); buf = sfmtv(fmt, args); addStringToToken(tp, buf); va_end(args); return 0; }
static int addFormattedStringToToken(EcToken *tp, char *fmt, ...) { va_list args; char *buf; va_start(args, fmt); mprAllocVsprintf(tp, &buf, MPR_MAX_STRING, fmt, args); addStringToToken(tp, buf); mprFree(buf); va_end(args); return 0; }
PUBLIC int ecGetToken(EcCompiler *cp) { EcToken *tp; EcStream *stream; int c; if ((tp = getLexToken(cp)) == NULL) { return T_ERR; } if (tp->tokenId) { return tp->tokenId; } stream = cp->stream; while (1) { c = getNextChar(stream); /* Overloadable operators + - ~ * / % < > <= >= == << >> >>> & | === != !== TODO FUTURE, we could allow also: ".", "[", "(" and unary !, ^ */ switch (c) { default: if (isdigit((uchar) c)) { return makeNumberToken(cp, tp, c); } else if (c == '\\') { c = getNextChar(stream); if (c == '\n') { break; } putBackChar(stream, c); c = '\n'; } if (isalpha((uchar) c) || c == '_' || c == '\\' || c == '$') { return makeAlphaToken(cp, tp, c); } return makeToken(tp, 0, T_ERR, 0); case -1: return makeToken(tp, 0, T_ERR, 0); case 0: if (stream->flags & EC_STREAM_EOL) { return makeToken(tp, 0, T_NOP, 0); } return makeToken(tp, 0, T_EOF, 0); case ' ': case '\f': case '\t': case '\v': case 0xA0: /* No break space */ break; case '\r': case '\n': break; case '"': case '\'': return makeQuotedToken(cp, tp, c); case '#': return makeToken(tp, c, T_HASH, 0); case '[': // EJS extension to consider this an operator return makeToken(tp, c, T_LBRACKET, G_OPERATOR); case ']': return makeToken(tp, c, T_RBRACKET, 0); case '(': // EJS extension to consider this an operator return makeToken(tp, c, T_LPAREN, G_OPERATOR); case ')': return makeToken(tp, c, T_RPAREN, 0); case '{': return makeToken(tp, c, T_LBRACE, 0); case '}': return makeToken(tp, c, T_RBRACE, 0); case '@': return makeToken(tp, c, T_AT, 0); case ';': return makeToken(tp, c, T_SEMICOLON, 0); case ',': return makeToken(tp, c, T_COMMA, 0); case '?': return makeToken(tp, c, T_QUERY, 0); case '~': return makeToken(tp, c, T_TILDE, G_OPERATOR); case '+': c = getNextChar(stream); if (c == '+') { addCharToToken(tp, '+'); return makeToken(tp, c, T_PLUS_PLUS, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '+'); return makeSubToken(tp, c, T_ASSIGN, T_PLUS_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '+', T_PLUS, G_OPERATOR); case '-': c = getNextChar(stream); if (isdigit((uchar) c)) { putBackChar(stream, c); return makeToken(tp, '-', T_MINUS, G_OPERATOR); } else if (c == '-') { addCharToToken(tp, '-'); return makeToken(tp, c, T_MINUS_MINUS, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '-'); return makeSubToken(tp, c, T_ASSIGN, T_MINUS_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '-', T_MINUS, G_OPERATOR); case '*': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '*'); return makeSubToken(tp, c, T_ASSIGN, T_MUL_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '*', T_MUL, G_OPERATOR); case '/': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '/'); return makeSubToken(tp, c, T_ASSIGN, T_DIV_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } else if (c == '>') { addCharToToken(tp, '/'); return makeToken(tp, c, T_SLASH_GT, G_OPERATOR); } else if (c == '*' || c == '/') { /* C and C++ comments */ if (getComment(cp, tp, c) < 0) { return tp->tokenId; } /* Doc comments are: [slash]**. The second "*' becomes the first char of the comment. Don't regard: [slash]*** (three stars) as a comment. */ if (cp->doc) { if (tp->text && tp->text[0] == '*' && tp->text[1] != '*') { cp->docToken = mprMemdup(tp->text, tp->length * sizeof(wchar)); } } initializeToken(tp, stream); break; } putBackChar(stream, c); return makeToken(tp, '/', T_DIV, G_OPERATOR); case '%': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '%'); return makeSubToken(tp, c, T_ASSIGN, T_MOD_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '%', T_MOD, G_OPERATOR); case '.': c = getNextChar(stream); if (c == '.') { c = getNextChar(stream); if (c == '.') { addStringToToken(tp, ".."); return makeToken(tp, c, T_ELIPSIS, 0); } putBackChar(stream, c); addCharToToken(tp, '.'); return makeToken(tp, '.', T_DOT_DOT, 0); #if FUTURE } else if (c == '<') { addCharToToken(tp, '.'); return makeToken(tp, c, T_DOT_LESS, 0); #endif } else if (isdigit((uchar) c)) { putBackChar(stream, c); return makeNumberToken(cp, tp, '.'); } putBackChar(stream, c); // EJS extension to consider this an operator return makeToken(tp, '.', T_DOT, G_OPERATOR); case ':': c = getNextChar(stream); if (c == ':') { addCharToToken(tp, ':'); return makeToken(tp, c, T_COLON_COLON, 0); } putBackChar(stream, c); return makeToken(tp, ':', T_COLON, 0); case '!': c = getNextChar(stream); if (c == '=') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "!="); return makeToken(tp, c, T_STRICT_NE, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '!'); return makeToken(tp, '=', T_NE, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '!', T_LOGICAL_NOT, G_OPERATOR); case '&': c = getNextChar(stream); if (c == '&') { addCharToToken(tp, '&'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '&'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_AND_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '&', T_LOGICAL_AND, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '&'); return makeSubToken(tp, c, T_ASSIGN, T_BIT_AND_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '&', T_BIT_AND, G_OPERATOR); case '<': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '<'); return makeToken(tp, c, T_LE, G_OPERATOR); } else if (c == '<') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "<<"); return makeSubToken(tp, c, T_ASSIGN, T_LSH_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); addCharToToken(tp, '<'); return makeToken(tp, c, T_LSH, G_OPERATOR); } else if (c == '/') { addCharToToken(tp, '<'); return makeToken(tp, c, T_LT_SLASH, 0); } putBackChar(stream, c); return makeToken(tp, '<', T_LT, G_OPERATOR); case '=': c = getNextChar(stream); if (c == '=') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "=="); return makeToken(tp, c, T_STRICT_EQ, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '='); return makeToken(tp, c, T_EQ, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '=', T_ASSIGN, G_OPERATOR); case '>': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '<'); return makeToken(tp, c, T_GE, G_OPERATOR); } else if (c == '>') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, ">>"); return makeSubToken(tp, c, T_ASSIGN, T_RSH_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } else if (c == '>') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, ">>>"); return makeSubToken(tp, c, T_ASSIGN, T_RSH_ZERO_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); addStringToToken(tp, ">>"); return makeToken(tp, '>', T_RSH_ZERO, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '>'); return makeToken(tp, '>', T_RSH, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '>', T_GT, G_OPERATOR); case '^': c = getNextChar(stream); if (c == '^') { addCharToToken(tp, '^'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '^'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_XOR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '^', T_LOGICAL_XOR, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '^'); return makeSubToken(tp, '=', T_ASSIGN, T_BIT_XOR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '^', T_BIT_XOR, G_OPERATOR); case '|': c = getNextChar(stream); if (c == '|') { addCharToToken(tp, '|'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '|'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_OR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '|', T_LOGICAL_OR, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '|'); return makeSubToken(tp, '=', T_ASSIGN, T_BIT_OR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '|', T_BIT_OR, G_OPERATOR); } } }
int ecGetToken(EcInput *input) { EcToken *token, *tp; EcStream *stream; int c; // TODO - functionalize this section token = input->token; if ((tp = input->putBack) != 0) { input->putBack = tp->next; input->token = tp; /* * Move any old token to free list */ if (token) { token->next = input->freeTokens; input->freeTokens = token; } return tp->tokenId; } if (token == 0) { // TBD -- need an API for this input->token = mprAllocObjZeroed(input, EcToken); if (input->token == 0) { // TBD -- err code return -1; } input->token->lineNumber = 1; } stream = input->stream; tp = input->token; mprAssert(tp); initializeToken(tp, stream); while (1) { c = getNextChar(stream); /* * Overloadable operators * * + - ~ * / % < > <= >= == << >> >>> & | === != !== * * TODO FUTURE, we could allow also: ".", "[", "(" * * TODO: what about unary !, ^ */ switch (c) { default: number: if (isdigit(c)) { return getNumberToken(input, tp, c); } else if (c == '\\') { c = getNextChar(stream); if (c == '\n') { break; } putBackChar(stream, c); c = '\n'; } if (isalpha(c) || c == '_' || c == '\\' || c == '$') { return getAlphaToken(input, tp, c); } return makeToken(tp, 0, T_ERR, 0); case -1: return makeToken(tp, 0, T_ERR, 0); case 0: if (stream->flags & EC_STREAM_EOL) { return makeToken(tp, 0, T_NOP, 0); } else { return makeToken(tp, 0, T_EOF, 0); } case ' ': case '\t': break; case '\r': case '\n': if (tp->textLen == 0 && tp->lineNumber != stream->lineNumber) { tp->currentLine = 0; } break; case '"': case '\'': return getQuotedToken(input, tp, c); case '#': return makeToken(tp, c, T_HASH, 0); case '[': // EJS extension to consider this an operator return makeToken(tp, c, T_LBRACKET, G_OPERATOR); case ']': return makeToken(tp, c, T_RBRACKET, 0); case '(': // EJS extension to consider this an operator return makeToken(tp, c, T_LPAREN, G_OPERATOR); case ')': return makeToken(tp, c, T_RPAREN, 0); case '{': return makeToken(tp, c, T_LBRACE, 0); case '}': return makeToken(tp, c, T_RBRACE, 0); case '@': return makeToken(tp, c, T_AT, 0); case ';': return makeToken(tp, c, T_SEMICOLON, 0); case ',': return makeToken(tp, c, T_COMMA, 0); case '?': return makeToken(tp, c, T_QUERY, 0); case '~': return makeToken(tp, c, T_TILDE, G_OPERATOR); case '+': c = getNextChar(stream); if (c == '+') { addCharToToken(tp, '+'); return makeToken(tp, c, T_PLUS_PLUS, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '+'); return makeSubToken(tp, c, T_ASSIGN, T_PLUS_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '+', T_PLUS, G_OPERATOR); case '-': c = getNextChar(stream); if (isdigit(c)) { putBackChar(stream, c); return makeToken(tp, '-', T_MINUS, G_OPERATOR); } else if (c == '-') { addCharToToken(tp, '-'); return makeToken(tp, c, T_MINUS_MINUS, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '-'); return makeSubToken(tp, c, T_ASSIGN, T_MINUS_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '-', T_MINUS, G_OPERATOR); case '*': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '*'); return makeSubToken(tp, c, T_ASSIGN, T_MUL_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '*', T_MUL, G_OPERATOR); case '/': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '/'); return makeSubToken(tp, c, T_ASSIGN, T_DIV_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } else if (c == '>') { addCharToToken(tp, '/'); return makeToken(tp, c, T_SLASH_GT, G_OPERATOR); } else if (c == '*' || c == '/') { /* * C and C++ comments */ if (getComment(input, tp, c) < 0) { return tp->tokenId; } #if BLD_FEATURE_EJS_DOC if (tp->text && tp->text[0] == '*') { mprFree(input->doc); input->doc = mprStrdup(input, (char*) tp->text); } #endif initializeToken(tp, stream); break; } putBackChar(stream, c); return makeToken(tp, '/', T_DIV, G_OPERATOR); case '%': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '%'); return makeSubToken(tp, c, T_ASSIGN, T_MOD_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '%', T_MOD, G_OPERATOR); case '.': c = getNextChar(stream); if (c == '.') { c = getNextChar(stream); if (c == '.') { addStringToToken(tp, ".."); return makeToken(tp, c, T_ELIPSIS, 0); } putBackChar(stream, c); addCharToToken(tp, '.'); return makeToken(tp, '.', T_DOT_DOT, 0); } else if (c == '<') { addCharToToken(tp, '.'); return makeToken(tp, c, T_DOT_LESS, 0); } else if (isdigit(c)) { putBackChar(stream, c); goto number; } putBackChar(stream, c); // EJS extension to consider this an operator return makeToken(tp, '.', T_DOT, G_OPERATOR); case ':': c = getNextChar(stream); if (c == ':') { addCharToToken(tp, ':'); return makeToken(tp, c, T_COLON_COLON, 0); } putBackChar(stream, c); return makeToken(tp, ':', T_COLON, 0); case '!': c = getNextChar(stream); if (c == '=') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "!="); return makeToken(tp, c, T_STRICT_NE, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '!'); return makeToken(tp, '=', T_NE, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '!', T_LOGICAL_NOT, G_OPERATOR); #if UNUSED case '~': c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "~="); return makeSubToken(tp, c, T_ASSIGN, T_BIT_NEG_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '~', T_BIT_NEG, G_OPERATOR); #endif case '&': c = getNextChar(stream); if (c == '&') { addCharToToken(tp, '&'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '&'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_AND_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '&', T_LOGICAL_AND, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '&'); return makeSubToken(tp, c, T_ASSIGN, T_BIT_AND_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '&', T_BIT_AND, G_OPERATOR); case '<': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '<'); return makeToken(tp, c, T_LE, G_OPERATOR); } else if (c == '<') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "<<"); return makeSubToken(tp, c, T_ASSIGN, T_LSH_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); addCharToToken(tp, '<'); return makeToken(tp, c, T_LSH, G_OPERATOR); } else if (c == '/') { addCharToToken(tp, '<'); return makeToken(tp, c, T_LT_SLASH, 0); } putBackChar(stream, c); return makeToken(tp, '<', T_LT, G_OPERATOR); case '=': c = getNextChar(stream); if (c == '=') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, "=="); return makeToken(tp, c, T_STRICT_EQ, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '='); return makeToken(tp, c, T_EQ, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '=', T_ASSIGN, G_OPERATOR); case '>': c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '<'); return makeToken(tp, c, T_GE, G_OPERATOR); } else if (c == '>') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, ">>"); return makeSubToken(tp, c, T_ASSIGN, T_RSH_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } else if (c == '>') { c = getNextChar(stream); if (c == '=') { addStringToToken(tp, ">>>"); return makeSubToken(tp, c, T_ASSIGN, T_RSH_ZERO_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); addStringToToken(tp, ">>"); return makeToken(tp, '>', T_RSH_ZERO, G_OPERATOR); } putBackChar(stream, c); addCharToToken(tp, '>'); return makeToken(tp, '>', T_RSH, G_OPERATOR); } putBackChar(stream, c); return makeToken(tp, '>', T_GT, G_OPERATOR); case '^': c = getNextChar(stream); if (c == '^') { addCharToToken(tp, '^'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '^'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_XOR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '^', T_LOGICAL_XOR, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '^'); return makeSubToken(tp, '=', T_ASSIGN, T_BIT_XOR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '^', T_BIT_XOR, G_OPERATOR); case '|': c = getNextChar(stream); if (c == '|') { addCharToToken(tp, '|'); c = getNextChar(stream); if (c == '=') { addCharToToken(tp, '|'); return makeSubToken(tp, '=', T_ASSIGN, T_LOGICAL_OR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '|', T_LOGICAL_OR, G_OPERATOR); } else if (c == '=') { addCharToToken(tp, '|'); return makeSubToken(tp, '=', T_ASSIGN, T_BIT_OR_ASSIGN, G_OPERATOR | G_COMPOUND_ASSIGN); } putBackChar(stream, c); return makeToken(tp, '|', T_BIT_OR, G_OPERATOR); } } }