static int getLexicalToken(Ejs *ep, int state) { MprType type; EjsInput *ip; int done, tid, c, quote, style, idx; mprAssert(ep); ip = ep->input; mprAssert(ip); ep->tid = -1; tid = -1; type = BLD_FEATURE_NUM_TYPE_ID; /* * Use a putback tokens first. Don't free strings as caller needs access. */ if (ip->putBackIndex >= 0) { idx = ip->putBackIndex; tid = ip->putBack[idx].id; ep->token = (char*) ip->putBack[idx].token; tid = checkReservedWord(ep, state, 0, tid); ip->putBackIndex--; return tid; } ep->token = ip->tokServp = ip->tokEndp = ip->tokbuf; *ip->tokServp = '\0'; if ((c = inputGetc(ep)) < 0) { return EJS_TOK_EOF; } /* * Main lexical analyser */ for (done = 0; !done; ) { switch (c) { case -1: return EJS_TOK_EOF; case ' ': case '\t': case '\r': do { if ((c = inputGetc(ep)) < 0) break; } while (c == ' ' || c == '\t' || c == '\r'); break; case '\n': return EJS_TOK_NEWLINE; case '(': tokenAddChar(ep, c); return EJS_TOK_LPAREN; case ')': tokenAddChar(ep, c); return EJS_TOK_RPAREN; case '[': tokenAddChar(ep, c); return EJS_TOK_LBRACKET; case ']': tokenAddChar(ep, c); return EJS_TOK_RBRACKET; case '.': tokenAddChar(ep, c); return EJS_TOK_PERIOD; case '{': tokenAddChar(ep, c); return EJS_TOK_LBRACE; case '}': tokenAddChar(ep, c); return EJS_TOK_RBRACE; case '+': if ((c = inputGetc(ep)) < 0) { ejsError(ep, "Syntax Error"); return EJS_TOK_ERR; } if (c != '+' ) { inputPutback(ep, c); tokenAddChar(ep, EJS_EXPR_PLUS); return EJS_TOK_EXPR; } tokenAddChar(ep, EJS_EXPR_INC); return EJS_TOK_INC_DEC; case '-': if ((c = inputGetc(ep)) < 0) { ejsError(ep, "Syntax Error"); return EJS_TOK_ERR; } if (c != '-' ) { inputPutback(ep, c); tokenAddChar(ep, EJS_EXPR_MINUS); return EJS_TOK_EXPR; } tokenAddChar(ep, EJS_EXPR_DEC); return EJS_TOK_INC_DEC; case '*': tokenAddChar(ep, EJS_EXPR_MUL); return EJS_TOK_EXPR; case '%': tokenAddChar(ep, EJS_EXPR_MOD); return EJS_TOK_EXPR; case '/': /* * Handle the division operator and comments */ if ((c = inputGetc(ep)) < 0) { ejsError(ep, "Syntax Error"); return EJS_TOK_ERR; } if (c != '*' && c != '/') { inputPutback(ep, c); tokenAddChar(ep, EJS_EXPR_DIV); return EJS_TOK_EXPR; } style = c; /* * Eat comments. Both C and C++ comment styles are supported. */ while (1) { if ((c = inputGetc(ep)) < 0) { if (style == '/') { return EJS_TOK_EOF; } ejsError(ep, "Syntax Error"); return EJS_TOK_ERR; } if (c == '\n' && style == '/') { break; } else if (c == '*') { c = inputGetc(ep); if (style == '/') { if (c == '\n') { break; } } else { if (c == '/') { break; } } } } /* * Continue looking for a token, so get the next character */ if ((c = inputGetc(ep)) < 0) { return EJS_TOK_EOF; } break; case '<': /* < and <= */ if ((c = inputGetc(ep)) < 0) { ejsError(ep, "Syntax Error"); return EJS_TOK_ERR; } if (c == '<') { tokenAddChar(ep, EJS_EXPR_LSHIFT); return EJS_TOK_EXPR; } else if (c == '=') { tokenAddChar(ep, EJS_EXPR_LESSEQ); return EJS_TOK_EXPR; } tokenAddChar(ep, EJS_EXPR_LESS); inputPutback(ep, c); return EJS_TOK_EXPR; case '>': /* > and >= */ if ((c = inputGetc(ep)) < 0) { ejsError(ep, "Syntax Error"); return EJS_TOK_ERR; } if (c == '>') { tokenAddChar(ep, EJS_EXPR_RSHIFT); return EJS_TOK_EXPR; } else if (c == '=') { tokenAddChar(ep, EJS_EXPR_GREATEREQ); return EJS_TOK_EXPR; } tokenAddChar(ep, EJS_EXPR_GREATER); inputPutback(ep, c); return EJS_TOK_EXPR; case '=': /* "==" */ if ((c = inputGetc(ep)) < 0) { ejsError(ep, "Syntax Error"); return EJS_TOK_ERR; } if (c == '=') { tokenAddChar(ep, EJS_EXPR_EQ); return EJS_TOK_EXPR; } inputPutback(ep, c); return EJS_TOK_ASSIGNMENT; case '!': /* "!=" or "!"*/ if ((c = inputGetc(ep)) < 0) { ejsError(ep, "Syntax Error"); return EJS_TOK_ERR; } if (c == '=') { tokenAddChar(ep, EJS_EXPR_NOTEQ); return EJS_TOK_EXPR; } inputPutback(ep, c); tokenAddChar(ep, EJS_EXPR_BOOL_COMP); return EJS_TOK_EXPR; case ';': tokenAddChar(ep, c); return EJS_TOK_SEMI; case ',': tokenAddChar(ep, c); return EJS_TOK_COMMA; case '|': /* "||" */ if ((c = inputGetc(ep)) < 0 || c != '|') { ejsError(ep, "Syntax Error"); return EJS_TOK_ERR; } tokenAddChar(ep, EJS_COND_OR); return EJS_TOK_LOGICAL; case '&': /* "&&" */ if ((c = inputGetc(ep)) < 0 || c != '&') { ejsError(ep, "Syntax Error"); return EJS_TOK_ERR; } tokenAddChar(ep, EJS_COND_AND); return EJS_TOK_LOGICAL; case '\"': /* String quote */ case '\'': quote = c; if ((c = inputGetc(ep)) < 0) { ejsError(ep, "Syntax Error"); return EJS_TOK_ERR; } while (c != quote) { /* * Check for escape sequence characters */ if (c == '\\') { c = inputGetc(ep); if (isdigit(c)) { /* * Octal support, \101 maps to 65 = 'A'. Put first * char back so converter will work properly. */ inputPutback(ep, c); c = charConvert(ep, 8, 3); } else { switch (c) { case 'n': c = '\n'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'x': /* * Hex support, \x41 maps to 65 = 'A' */ c = charConvert(ep, 16, 2); break; case 'u': /* * Unicode support, \x0401 maps to 65 = 'A' */ c = charConvert(ep, 16, 2); c = c*16 + charConvert(ep, 16, 2); break; case '\'': case '\"': case '\\': break; default: ejsError(ep, "Invalid Escape Sequence"); return EJS_TOK_ERR; } } if (tokenAddChar(ep, c) < 0) { return EJS_TOK_ERR; } } else { if (tokenAddChar(ep, c) < 0) { return EJS_TOK_ERR; } } if ((c = inputGetc(ep)) < 0) { ejsError(ep, "Unmatched Quote"); return EJS_TOK_ERR; } } return EJS_TOK_LITERAL; case '0': if (tokenAddChar(ep, c) < 0) { return EJS_TOK_ERR; } if ((c = inputGetc(ep)) < 0) { break; } if (tolower(c) == 'x') { do { if (tokenAddChar(ep, c) < 0) { return EJS_TOK_ERR; } if ((c = inputGetc(ep)) < 0) { break; } } while (isdigit(c) || (tolower(c) >= 'a' && tolower(c) <= 'f')); mprDestroyVar(&ep->tokenNumber); ep->tokenNumber = mprParseVar(ep->token, type); inputPutback(ep, c); return EJS_TOK_NUMBER; } if (! isdigit(c)) { #if BLD_FEATURE_FLOATING_POINT if (c == '.' || tolower(c) == 'e' || c == '+' || c == '-') { /* Fall through */ type = MPR_TYPE_FLOAT; } else #endif { mprDestroyVar(&ep->tokenNumber); ep->tokenNumber = mprParseVar(ep->token, type); inputPutback(ep, c); return EJS_TOK_NUMBER; } } /* Fall through to get more digits */ case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': do { if (tokenAddChar(ep, c) < 0) { return EJS_TOK_ERR; } if ((c = inputGetc(ep)) < 0) { break; } #if BLD_FEATURE_FLOATING_POINT if (c == '.' || tolower(c) == 'e' || tolower(c) == 'f') { type = MPR_TYPE_FLOAT; } } while (isdigit(c) || c == '.' || tolower(c) == 'e' || tolower(c) == 'f' || ((type == MPR_TYPE_FLOAT) && (c == '+' || c == '-'))); #else } while (isdigit(c)); #endif mprDestroyVar(&ep->tokenNumber); ep->tokenNumber = mprParseVar(ep->token, type); inputPutback(ep, c); return EJS_TOK_NUMBER; default: /* * Identifiers or a function names */ while (1) { if (c == '\\') { if ((c = inputGetc(ep)) < 0) { break; } if (c == '\n' || c == '\r') { break; } } else if (tokenAddChar(ep, c) < 0) { break; } if ((c = inputGetc(ep)) < 0) { break; } if (!isalnum(c) && c != '$' && c != '_' && c != '\\') { break; } } if (*ep->token == '\0') { c = inputGetc(ep); break; } if (! isalpha((int) *ep->token) && *ep->token != '$' && *ep->token != '_') { ejsError(ep, "Invalid identifier %s", ep->token); return EJS_TOK_ERR; } tid = checkReservedWord(ep, state, c, EJS_TOK_ID); if (tid != EJS_TOK_ID) { return tid; } /* * Skip white space after token to find out whether this is * a function or not. */ while (c == ' ' || c == '\t' || c == '\r' || c == '\n') { if ((c = inputGetc(ep)) < 0) break; } tid = EJS_TOK_ID; done++; } }
static int getLexicalToken(ej_t* ep, int state) { ringq_t *inq, *tokq; ejinput_t* ip; int done, tid, c, quote, style; a_assert(ep); ip = ep->input; a_assert(ip); inq = &ip->script; tokq = &ip->tokbuf; ep->tid = -1; tid = -1; ep->token = T(""); ringqFlush(tokq); if (ip->putBackTokenId > 0) { ringqPutStr(tokq, ip->putBackToken); tid = ip->putBackTokenId; ip->putBackTokenId = 0; ep->token = (char_t*) tokq->servp; return tid; } if ((c = inputGetc(ep)) < 0) { return TOK_EOF; } for (done = 0; !done; ) { switch (c) { case -1: return TOK_EOF; case ' ': case '\t': case '\r': do { if ((c = inputGetc(ep)) < 0) break; } while (c == ' ' || c == '\t' || c == '\r'); break; case '\n': return TOK_NEWLINE; case '(': tokenAddChar(ep, c); return TOK_LPAREN; case ')': tokenAddChar(ep, c); return TOK_RPAREN; case '{': tokenAddChar(ep, c); return TOK_LBRACE; case '}': tokenAddChar(ep, c); return TOK_RBRACE; case '+': if ((c = inputGetc(ep)) < 0) { ejError(ep, T("Syntax Error")); return TOK_ERR; } if (c != '+' ) { inputPutback(ep, c); tokenAddChar(ep, EXPR_PLUS); return TOK_EXPR; } tokenAddChar(ep, EXPR_INC); return TOK_INC_DEC; case '-': if ((c = inputGetc(ep)) < 0) { ejError(ep, T("Syntax Error")); return TOK_ERR; } if (c != '-' ) { inputPutback(ep, c); tokenAddChar(ep, EXPR_MINUS); return TOK_EXPR; } tokenAddChar(ep, EXPR_DEC); return TOK_INC_DEC; case '*': tokenAddChar(ep, EXPR_MUL); return TOK_EXPR; case '%': tokenAddChar(ep, EXPR_MOD); return TOK_EXPR; case '/': /* * Handle the division operator and comments */ if ((c = inputGetc(ep)) < 0) { ejError(ep, T("Syntax Error")); return TOK_ERR; } if (c != '*' && c != '/') { inputPutback(ep, c); tokenAddChar(ep, EXPR_DIV); return TOK_EXPR; } style = c; /* * Eat comments. Both C and C++ comment styles are supported. */ while (1) { if ((c = inputGetc(ep)) < 0) { ejError(ep, T("Syntax Error")); return TOK_ERR; } if (c == '\n' && style == '/') { break; } else if (c == '*') { c = inputGetc(ep); if (style == '/') { if (c == '\n') { break; } } else { if (c == '/') { break; } } } } /* * Continue looking for a token, so get the next character */ if ((c = inputGetc(ep)) < 0) { return TOK_EOF; } break; case '<': /* < and <= */ if ((c = inputGetc(ep)) < 0) { ejError(ep, T("Syntax Error")); return TOK_ERR; } if (c == '<') { tokenAddChar(ep, EXPR_LSHIFT); return TOK_EXPR; } else if (c == '=') { tokenAddChar(ep, EXPR_LESSEQ); return TOK_EXPR; } tokenAddChar(ep, EXPR_LESS); inputPutback(ep, c); return TOK_EXPR; case '>': /* > and >= */ if ((c = inputGetc(ep)) < 0) { ejError(ep, T("Syntax Error")); return TOK_ERR; } if (c == '>') { tokenAddChar(ep, EXPR_RSHIFT); return TOK_EXPR; } else if (c == '=') { tokenAddChar(ep, EXPR_GREATEREQ); return TOK_EXPR; } tokenAddChar(ep, EXPR_GREATER); inputPutback(ep, c); return TOK_EXPR; case '=': /* "==" */ if ((c = inputGetc(ep)) < 0) { ejError(ep, T("Syntax Error")); return TOK_ERR; } if (c == '=') { tokenAddChar(ep, EXPR_EQ); return TOK_EXPR; } inputPutback(ep, c); return TOK_ASSIGNMENT; case '!': /* "!=" or "!"*/ if ((c = inputGetc(ep)) < 0) { ejError(ep, T("Syntax Error")); return TOK_ERR; } if (c == '=') { tokenAddChar(ep, EXPR_NOTEQ); return TOK_EXPR; } inputPutback(ep, c); tokenAddChar(ep, EXPR_BOOL_COMP); return TOK_EXPR; case ';': tokenAddChar(ep, c); return TOK_SEMI; case ',': tokenAddChar(ep, c); return TOK_COMMA; case '|': /* "||" */ if ((c = inputGetc(ep)) < 0 || c != '|') { ejError(ep, T("Syntax Error")); return TOK_ERR; } tokenAddChar(ep, COND_OR); return TOK_LOGICAL; case '&': /* "&&" */ if ((c = inputGetc(ep)) < 0 || c != '&') { ejError(ep, T("Syntax Error")); return TOK_ERR; } tokenAddChar(ep, COND_AND); return TOK_LOGICAL; case '\"': /* String quote */ case '\'': quote = c; if ((c = inputGetc(ep)) < 0) { ejError(ep, T("Syntax Error")); return TOK_ERR; } while (c != quote) { /* * check for escape sequence characters */ if (c == '\\') { c = inputGetc(ep); if (gisdigit(c)) { /* * octal support, \101 maps to 65 = 'A'. put first char * back so converter will work properly. */ inputPutback(ep, c); c = charConvert(ep, OCTAL, 3); } else { switch (c) { case 'n': c = '\n'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'x': /* * hex support, \x41 maps to 65 = 'A' */ c = charConvert(ep, HEX, 2); break; case 'u': /* * unicode support, \x0401 maps to 65 = 'A' */ c = charConvert(ep, HEX, 2); c = c*16 + charConvert(ep, HEX, 2); break; case '\'': case '\"': case '\\': break; default: ejError(ep, T("Invalid Escape Sequence")); return TOK_ERR; } } if (tokenAddChar(ep, c) < 0) { return TOK_ERR; } } else { if (tokenAddChar(ep, c) < 0) { return TOK_ERR; } } if ((c = inputGetc(ep)) < 0) { ejError(ep, T("Unmatched Quote")); return TOK_ERR; } } return TOK_LITERAL; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': do { if (tokenAddChar(ep, c) < 0) { return TOK_ERR; } if ((c = inputGetc(ep)) < 0) break; } while (gisdigit(c)); inputPutback(ep, c); return TOK_LITERAL; default: /* * Identifiers or a function names */ while (1) { if (c == '\\') { /* * just ignore any \ characters. */ } else if (tokenAddChar(ep, c) < 0) { break; } if ((c = inputGetc(ep)) < 0) { break; } if (!gisalnum(c) && c != '$' && c != '_' && c != '\\') { break; } } if (! gisalpha(*tokq->servp) && *tokq->servp != '$' && *tokq->servp != '_') { ejError(ep, T("Invalid identifier %s"), tokq->servp); return TOK_ERR; } /* * Check for reserved words (only "if", "else", "var", "for" * and "return" at the moment) */ if (state == STATE_STMT) { if (gstrcmp(ep->token, T("if")) == 0) { return TOK_IF; } else if (gstrcmp(ep->token, T("else")) == 0) { return TOK_ELSE; } else if (gstrcmp(ep->token, T("var")) == 0) { return TOK_VAR; } else if (gstrcmp(ep->token, T("for")) == 0) { return TOK_FOR; } else if (gstrcmp(ep->token, T("return")) == 0) { if ((c == ';') || (c == '(')) { inputPutback(ep, c); } return TOK_RETURN; } } /* * Skip white space after token to find out whether this is * a function or not. */ while (c == ' ' || c == '\t' || c == '\r' || c == '\n') { if ((c = inputGetc(ep)) < 0) break; } tid = (c == '(') ? TOK_FUNCTION : TOK_ID; done++; } } /* * Putback the last extra character for next time */ inputPutback(ep, c); return tid; }