Beispiel #1
0
static int getLexicalToken(Ejs *ep, int state)
{
	MprType		type;
	EjsInput	*ip;
	int			done, tid, c, quote, style, idx;

	mprAssert(ep);
	ip = ep->input;
	mprAssert(ip);

	ep->tid = -1;
	tid = -1;
	type = BLD_FEATURE_NUM_TYPE_ID;

	/*
 	 *	Use a putback tokens first. Don't free strings as caller needs access.
	 */
	if (ip->putBackIndex >= 0) {
		idx = ip->putBackIndex;
		tid = ip->putBack[idx].id;
		ep->token = (char*) ip->putBack[idx].token;
		tid = checkReservedWord(ep, state, 0, tid);
		ip->putBackIndex--;
		return tid;
	}
	ep->token = ip->tokServp = ip->tokEndp = ip->tokbuf;
	*ip->tokServp = '\0';

	if ((c = inputGetc(ep)) < 0) {
		return EJS_TOK_EOF;
	}

	/*
 	 *	Main lexical analyser
	 */
	for (done = 0; !done; ) {
		switch (c) {
		case -1:
			return EJS_TOK_EOF;

		case ' ':
		case '\t':
		case '\r':
			do {
				if ((c = inputGetc(ep)) < 0)
					break;
			} while (c == ' ' || c == '\t' || c == '\r');
			break;

		case '\n':
			return EJS_TOK_NEWLINE;

		case '(':
			tokenAddChar(ep, c);
			return EJS_TOK_LPAREN;

		case ')':
			tokenAddChar(ep, c);
			return EJS_TOK_RPAREN;

		case '[':
			tokenAddChar(ep, c);
			return EJS_TOK_LBRACKET;

		case ']':
			tokenAddChar(ep, c);
			return EJS_TOK_RBRACKET;

		case '.':
			tokenAddChar(ep, c);
			return EJS_TOK_PERIOD;

		case '{':
			tokenAddChar(ep, c);
			return EJS_TOK_LBRACE;

		case '}':
			tokenAddChar(ep, c);
			return EJS_TOK_RBRACE;

		case '+':
			if ((c = inputGetc(ep)) < 0) {
				ejsError(ep, "Syntax Error");
				return EJS_TOK_ERR;
			}
			if (c != '+' ) {
				inputPutback(ep, c);
				tokenAddChar(ep, EJS_EXPR_PLUS);
				return EJS_TOK_EXPR;
			}
			tokenAddChar(ep, EJS_EXPR_INC);
			return EJS_TOK_INC_DEC;

		case '-':
			if ((c = inputGetc(ep)) < 0) {
				ejsError(ep, "Syntax Error");
				return EJS_TOK_ERR;
			}
			if (c != '-' ) {
				inputPutback(ep, c);
				tokenAddChar(ep, EJS_EXPR_MINUS);
				return EJS_TOK_EXPR;
			}
			tokenAddChar(ep, EJS_EXPR_DEC);
			return EJS_TOK_INC_DEC;

		case '*':
			tokenAddChar(ep, EJS_EXPR_MUL);
			return EJS_TOK_EXPR;

		case '%':
			tokenAddChar(ep, EJS_EXPR_MOD);
			return EJS_TOK_EXPR;

		case '/':
			/*
			 *	Handle the division operator and comments
			 */
			if ((c = inputGetc(ep)) < 0) {
				ejsError(ep, "Syntax Error");
				return EJS_TOK_ERR;
			}
			if (c != '*' && c != '/') {
				inputPutback(ep, c);
				tokenAddChar(ep, EJS_EXPR_DIV);
				return EJS_TOK_EXPR;
			}
			style = c;
			/*
			 *	Eat comments. Both C and C++ comment styles are supported.
			 */
			while (1) {
				if ((c = inputGetc(ep)) < 0) {
					if (style == '/') {
						return EJS_TOK_EOF;
					}
					ejsError(ep, "Syntax Error");
					return EJS_TOK_ERR;
				}
				if (c == '\n' && style == '/') {
					break;
				} else if (c == '*') {
					c = inputGetc(ep);
					if (style == '/') {
						if (c == '\n') {
							break;
						}
					} else {
						if (c == '/') {
							break;
						}
					}
				}
			}
			/*
			 *	Continue looking for a token, so get the next character
			 */
			if ((c = inputGetc(ep)) < 0) {
				return EJS_TOK_EOF;
			}
			break;

		case '<':									/* < and <= */
			if ((c = inputGetc(ep)) < 0) {
				ejsError(ep, "Syntax Error");
				return EJS_TOK_ERR;
			}
			if (c == '<') {
				tokenAddChar(ep, EJS_EXPR_LSHIFT);
				return EJS_TOK_EXPR;
			} else if (c == '=') {
				tokenAddChar(ep, EJS_EXPR_LESSEQ);
				return EJS_TOK_EXPR;
			}
			tokenAddChar(ep, EJS_EXPR_LESS);
			inputPutback(ep, c);
			return EJS_TOK_EXPR;

		case '>':									/* > and >= */
			if ((c = inputGetc(ep)) < 0) {
				ejsError(ep, "Syntax Error");
				return EJS_TOK_ERR;
			}
			if (c == '>') {
				tokenAddChar(ep, EJS_EXPR_RSHIFT);
				return EJS_TOK_EXPR;
			} else if (c == '=') {
				tokenAddChar(ep, EJS_EXPR_GREATEREQ);
				return EJS_TOK_EXPR;
			}
			tokenAddChar(ep, EJS_EXPR_GREATER);
			inputPutback(ep, c);
			return EJS_TOK_EXPR;

		case '=':									/* "==" */
			if ((c = inputGetc(ep)) < 0) {
				ejsError(ep, "Syntax Error");
				return EJS_TOK_ERR;
			}
			if (c == '=') {
				tokenAddChar(ep, EJS_EXPR_EQ);
				return EJS_TOK_EXPR;
			}
			inputPutback(ep, c);
			return EJS_TOK_ASSIGNMENT;

		case '!':									/* "!=" or "!"*/
			if ((c = inputGetc(ep)) < 0) {
				ejsError(ep, "Syntax Error");
				return EJS_TOK_ERR;
			}
			if (c == '=') {
				tokenAddChar(ep, EJS_EXPR_NOTEQ);
				return EJS_TOK_EXPR;
			}
			inputPutback(ep, c);
			tokenAddChar(ep, EJS_EXPR_BOOL_COMP);
			return EJS_TOK_EXPR;

		case ';':
			tokenAddChar(ep, c);
			return EJS_TOK_SEMI;

		case ',':
			tokenAddChar(ep, c);
			return EJS_TOK_COMMA;

		case '|':									/* "||" */
			if ((c = inputGetc(ep)) < 0 || c != '|') {
				ejsError(ep, "Syntax Error");
				return EJS_TOK_ERR;
			}
			tokenAddChar(ep, EJS_COND_OR);
			return EJS_TOK_LOGICAL;

		case '&':									/* "&&" */
			if ((c = inputGetc(ep)) < 0 || c != '&') {
				ejsError(ep, "Syntax Error");
				return EJS_TOK_ERR;
			}
			tokenAddChar(ep, EJS_COND_AND);
			return EJS_TOK_LOGICAL;

		case '\"':									/* String quote */
		case '\'':
			quote = c;
			if ((c = inputGetc(ep)) < 0) {
				ejsError(ep, "Syntax Error");
				return EJS_TOK_ERR;
			}

			while (c != quote) {
				/*
				 *	Check for escape sequence characters
				 */
				if (c == '\\') {
					c = inputGetc(ep);

					if (isdigit(c)) {
						/*
						 *	Octal support, \101 maps to 65 = 'A'. Put first 
						 *	char back so converter will work properly.
						 */
						inputPutback(ep, c);
						c = charConvert(ep, 8, 3);

					} else {
						switch (c) {
						case 'n':
							c = '\n'; break;
						case 'b':
							c = '\b'; break;
						case 'f':
							c = '\f'; break;
						case 'r':
							c = '\r'; break;
						case 't':
							c = '\t'; break;
						case 'x':
							/*
							 *	Hex support, \x41 maps to 65 = 'A'
							 */
							c = charConvert(ep, 16, 2);
							break;
						case 'u':
							/*
							 *	Unicode support, \x0401 maps to 65 = 'A'
							 */
							c = charConvert(ep, 16, 2);
							c = c*16 + charConvert(ep, 16, 2);

							break;
						case '\'':
						case '\"':
						case '\\':
							break;
						default:
							ejsError(ep, "Invalid Escape Sequence");
							return EJS_TOK_ERR;
						}
					}
					if (tokenAddChar(ep, c) < 0) {
						return EJS_TOK_ERR;
					}
				} else {
					if (tokenAddChar(ep, c) < 0) {
						return EJS_TOK_ERR;
					}
				}
				if ((c = inputGetc(ep)) < 0) {
					ejsError(ep, "Unmatched Quote");
					return EJS_TOK_ERR;
				}
			}
			return EJS_TOK_LITERAL;

		case '0': 
			if (tokenAddChar(ep, c) < 0) {
				return EJS_TOK_ERR;
			}
			if ((c = inputGetc(ep)) < 0) {
				break;
			}
			if (tolower(c) == 'x') {
				do {
					if (tokenAddChar(ep, c) < 0) {
						return EJS_TOK_ERR;
					}
					if ((c = inputGetc(ep)) < 0) {
						break;
					}
				} while (isdigit(c) || (tolower(c) >= 'a' && tolower(c) <= 'f'));

				mprDestroyVar(&ep->tokenNumber);
				ep->tokenNumber = mprParseVar(ep->token, type);
				inputPutback(ep, c);
				return EJS_TOK_NUMBER;
			}
			if (! isdigit(c)) {
#if BLD_FEATURE_FLOATING_POINT
				if (c == '.' || tolower(c) == 'e' || c == '+' || c == '-') {
					/* Fall through */
					type = MPR_TYPE_FLOAT;
				} else
#endif
				{
					mprDestroyVar(&ep->tokenNumber);
					ep->tokenNumber = mprParseVar(ep->token, type);
					inputPutback(ep, c);
					return EJS_TOK_NUMBER;
				}
			}
			/* Fall through to get more digits */

		case '1': case '2': case '3': case '4': 
		case '5': case '6': case '7': case '8': case '9':
			do {
				if (tokenAddChar(ep, c) < 0) {
					return EJS_TOK_ERR;
				}
				if ((c = inputGetc(ep)) < 0) {
					break;
				}
#if BLD_FEATURE_FLOATING_POINT
				if (c == '.' || tolower(c) == 'e' || tolower(c) == 'f') {
					type = MPR_TYPE_FLOAT;
				}
			} while (isdigit(c) || c == '.' || tolower(c) == 'e' || tolower(c) == 'f' ||
				((type == MPR_TYPE_FLOAT) && (c == '+' || c == '-')));
#else
			} while (isdigit(c));
#endif

			mprDestroyVar(&ep->tokenNumber);
			ep->tokenNumber = mprParseVar(ep->token, type);
			inputPutback(ep, c);
			return EJS_TOK_NUMBER;

		default:
			/*
			 *	Identifiers or a function names
			 */
			while (1) {
				if (c == '\\') {
					if ((c = inputGetc(ep)) < 0) {
						break;
					}
					if (c == '\n' || c == '\r') {
						break;
					}
				} else if (tokenAddChar(ep, c) < 0) {
						break;
				}
				if ((c = inputGetc(ep)) < 0) {
					break;
				}
				if (!isalnum(c) && c != '$' && c != '_' && c != '\\') {
					break;
				}
			}
			if (*ep->token == '\0') {
				c = inputGetc(ep);
				break;
			}
			if (! isalpha((int) *ep->token) && *ep->token != '$' && 
					*ep->token != '_') {
				ejsError(ep, "Invalid identifier %s", ep->token);
				return EJS_TOK_ERR;
			}

			tid = checkReservedWord(ep, state, c, EJS_TOK_ID);
			if (tid != EJS_TOK_ID) {
				return tid;
			}

			/* 
			 *	Skip white space after token to find out whether this is
			 * 	a function or not.
			 */ 
			while (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
				if ((c = inputGetc(ep)) < 0)
					break;
			}

			tid = EJS_TOK_ID;
			done++;
		}
	}
Beispiel #2
0
static int getLexicalToken(ej_t* ep, int state)
{
    ringq_t		*inq, *tokq;
    ejinput_t*	ip;
    int			done, tid, c, quote, style;

    a_assert(ep);
    ip = ep->input;
    a_assert(ip);

    inq = &ip->script;
    tokq = &ip->tokbuf;

    ep->tid = -1;
    tid = -1;
    ep->token = T("");

    ringqFlush(tokq);

    if (ip->putBackTokenId > 0) {
        ringqPutStr(tokq, ip->putBackToken);
        tid = ip->putBackTokenId;
        ip->putBackTokenId = 0;
        ep->token = (char_t*) tokq->servp;
        return tid;
    }

    if ((c = inputGetc(ep)) < 0) {
        return TOK_EOF;
    }

    for (done = 0; !done; ) {
        switch (c) {
        case -1:
            return TOK_EOF;

        case ' ':
        case '\t':
        case '\r':
            do {
                if ((c = inputGetc(ep)) < 0)
                    break;
            } while (c == ' ' || c == '\t' || c == '\r');
            break;

        case '\n':
            return TOK_NEWLINE;

        case '(':
            tokenAddChar(ep, c);
            return TOK_LPAREN;

        case ')':
            tokenAddChar(ep, c);
            return TOK_RPAREN;

        case '{':
            tokenAddChar(ep, c);
            return TOK_LBRACE;

        case '}':
            tokenAddChar(ep, c);
            return TOK_RBRACE;

        case '+':
            if ((c = inputGetc(ep)) < 0) {
                ejError(ep, T("Syntax Error"));
                return TOK_ERR;
            }
            if (c != '+' ) {
                inputPutback(ep, c);
                tokenAddChar(ep, EXPR_PLUS);
                return TOK_EXPR;
            }
            tokenAddChar(ep, EXPR_INC);
            return TOK_INC_DEC;

        case '-':
            if ((c = inputGetc(ep)) < 0) {
                ejError(ep, T("Syntax Error"));
                return TOK_ERR;
            }
            if (c != '-' ) {
                inputPutback(ep, c);
                tokenAddChar(ep, EXPR_MINUS);
                return TOK_EXPR;
            }
            tokenAddChar(ep, EXPR_DEC);
            return TOK_INC_DEC;

        case '*':
            tokenAddChar(ep, EXPR_MUL);
            return TOK_EXPR;

        case '%':
            tokenAddChar(ep, EXPR_MOD);
            return TOK_EXPR;

        case '/':
            /*
             *			Handle the division operator and comments
             */
            if ((c = inputGetc(ep)) < 0) {
                ejError(ep, T("Syntax Error"));
                return TOK_ERR;
            }
            if (c != '*' && c != '/') {
                inputPutback(ep, c);
                tokenAddChar(ep, EXPR_DIV);
                return TOK_EXPR;
            }
            style = c;
            /*
             *			Eat comments. Both C and C++ comment styles are supported.
             */
            while (1) {
                if ((c = inputGetc(ep)) < 0) {
                    ejError(ep, T("Syntax Error"));
                    return TOK_ERR;
                }
                if (c == '\n' && style == '/') {
                    break;
                } else if (c == '*') {
                    c = inputGetc(ep);
                    if (style == '/') {
                        if (c == '\n') {
                            break;
                        }
                    } else {
                        if (c == '/') {
                            break;
                        }
                    }
                }
            }
            /*
             *			Continue looking for a token, so get the next character
             */
            if ((c = inputGetc(ep)) < 0) {
                return TOK_EOF;
            }
            break;

        case '<':									/* < and <= */
            if ((c = inputGetc(ep)) < 0) {
                ejError(ep, T("Syntax Error"));
                return TOK_ERR;
            }
            if (c == '<') {
                tokenAddChar(ep, EXPR_LSHIFT);
                return TOK_EXPR;
            } else if (c == '=') {
                tokenAddChar(ep, EXPR_LESSEQ);
                return TOK_EXPR;
            }
            tokenAddChar(ep, EXPR_LESS);
            inputPutback(ep, c);
            return TOK_EXPR;

        case '>':									/* > and >= */
            if ((c = inputGetc(ep)) < 0) {
                ejError(ep, T("Syntax Error"));
                return TOK_ERR;
            }
            if (c == '>') {
                tokenAddChar(ep, EXPR_RSHIFT);
                return TOK_EXPR;
            } else if (c == '=') {
                tokenAddChar(ep, EXPR_GREATEREQ);
                return TOK_EXPR;
            }
            tokenAddChar(ep, EXPR_GREATER);
            inputPutback(ep, c);
            return TOK_EXPR;

        case '=':									/* "==" */
            if ((c = inputGetc(ep)) < 0) {
                ejError(ep, T("Syntax Error"));
                return TOK_ERR;
            }
            if (c == '=') {
                tokenAddChar(ep, EXPR_EQ);
                return TOK_EXPR;
            }
            inputPutback(ep, c);
            return TOK_ASSIGNMENT;

        case '!':									/* "!=" or "!"*/
            if ((c = inputGetc(ep)) < 0) {
                ejError(ep, T("Syntax Error"));
                return TOK_ERR;
            }
            if (c == '=') {
                tokenAddChar(ep, EXPR_NOTEQ);
                return TOK_EXPR;
            }
            inputPutback(ep, c);
            tokenAddChar(ep, EXPR_BOOL_COMP);
            return TOK_EXPR;

        case ';':
            tokenAddChar(ep, c);
            return TOK_SEMI;

        case ',':
            tokenAddChar(ep, c);
            return TOK_COMMA;

        case '|':									/* "||" */
            if ((c = inputGetc(ep)) < 0 || c != '|') {
                ejError(ep, T("Syntax Error"));
                return TOK_ERR;
            }
            tokenAddChar(ep, COND_OR);
            return TOK_LOGICAL;

        case '&':									/* "&&" */
            if ((c = inputGetc(ep)) < 0 || c != '&') {
                ejError(ep, T("Syntax Error"));
                return TOK_ERR;
            }
            tokenAddChar(ep, COND_AND);
            return TOK_LOGICAL;

        case '\"':									/* String quote */
        case '\'':
            quote = c;
            if ((c = inputGetc(ep)) < 0) {
                ejError(ep, T("Syntax Error"));
                return TOK_ERR;
            }

            while (c != quote) {
                /*
                 *				check for escape sequence characters
                 */
                if (c == '\\') {
                    c = inputGetc(ep);

                    if (gisdigit(c)) {
                        /*
                         *						octal support, \101 maps to 65 = 'A'. put first char
                         *						back so converter will work properly.
                         */
                        inputPutback(ep, c);
                        c = charConvert(ep, OCTAL, 3);

                    } else {
                        switch (c) {
                        case 'n':
                            c = '\n';
                            break;
                        case 'b':
                            c = '\b';
                            break;
                        case 'f':
                            c = '\f';
                            break;
                        case 'r':
                            c = '\r';
                            break;
                        case 't':
                            c = '\t';
                            break;
                        case 'x':
                            /*
                             *							hex support, \x41 maps to 65 = 'A'
                             */
                            c = charConvert(ep, HEX, 2);
                            break;
                        case 'u':
                            /*
                             *							unicode support, \x0401 maps to 65 = 'A'
                             */
                            c = charConvert(ep, HEX, 2);
                            c = c*16 + charConvert(ep, HEX, 2);

                            break;
                        case '\'':
                        case '\"':
                        case '\\':
                            break;
                        default:
                            ejError(ep, T("Invalid Escape Sequence"));
                            return TOK_ERR;
                        }
                    }
                    if (tokenAddChar(ep, c) < 0) {
                        return TOK_ERR;
                    }
                } else {
                    if (tokenAddChar(ep, c) < 0) {
                        return TOK_ERR;
                    }
                }
                if ((c = inputGetc(ep)) < 0) {
                    ejError(ep, T("Unmatched Quote"));
                    return TOK_ERR;
                }
            }
            return TOK_LITERAL;

        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
            do {
                if (tokenAddChar(ep, c) < 0) {
                    return TOK_ERR;
                }
                if ((c = inputGetc(ep)) < 0)
                    break;
            } while (gisdigit(c));
            inputPutback(ep, c);
            return TOK_LITERAL;

        default:
            /*
             *			Identifiers or a function names
             */
            while (1) {
                if (c == '\\') {
                    /*
                     *					just ignore any \ characters.
                     */
                } else if (tokenAddChar(ep, c) < 0) {
                    break;
                }
                if ((c = inputGetc(ep)) < 0) {
                    break;
                }
                if (!gisalnum(c) && c != '$' && c != '_' &&
                    c != '\\') {
                    break;
                }
            }
            if (! gisalpha(*tokq->servp) && *tokq->servp != '$' &&
                *tokq->servp != '_') {
                ejError(ep, T("Invalid identifier %s"), tokq->servp);
                return TOK_ERR;
            }
            /*
             *			Check for reserved words (only "if", "else", "var", "for"
             *			and "return" at the moment)
             */
            if (state == STATE_STMT) {
                if (gstrcmp(ep->token, T("if")) == 0) {
                    return TOK_IF;
                } else if (gstrcmp(ep->token, T("else")) == 0) {
                    return TOK_ELSE;
                } else if (gstrcmp(ep->token, T("var")) == 0) {
                    return TOK_VAR;
                } else if (gstrcmp(ep->token, T("for")) == 0) {
                    return TOK_FOR;
                } else if (gstrcmp(ep->token, T("return")) == 0) {
                    if ((c == ';') || (c == '(')) {
                        inputPutback(ep, c);
                    }
                    return TOK_RETURN;
                }
            }

            /*
             * 			Skip white space after token to find out whether this is
             * 			a function or not.
             */
            while (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
                if ((c = inputGetc(ep)) < 0)
                    break;
            }

            tid = (c == '(') ? TOK_FUNCTION : TOK_ID;
            done++;
        }
    }

    /*
     *	Putback the last extra character for next time
     */
    inputPutback(ep, c);
    return tid;
}