Exemple #1
0
static int jsY_lexx(js_State *J)
{
	J->newline = 0;

	while (1) {
		J->lexline = J->line; /* save location of beginning of token */

		while (jsY_iswhite(J->lexchar))
			jsY_next(J);

		if (jsY_accept(J, '\n')) {
			J->newline = 1;
			if (isnlthcontext(J->lasttoken))
				return ';';
			continue;
		}

		if (jsY_accept(J, '/')) {
			if (jsY_accept(J, '/')) {
				lexlinecomment(J);
				continue;
			} else if (jsY_accept(J, '*')) {
				if (lexcomment(J))
					jsY_error(J, "multi-line comment not terminated");
				continue;
			} else if (isregexpcontext(J->lasttoken)) {
				return lexregexp(J);
			} else if (jsY_accept(J, '=')) {
				return TK_DIV_ASS;
			} else {
				return '/';
			}
		}

		if (J->lexchar >= '0' && J->lexchar <= '9') {
			return lexnumber(J);
		}

		switch (J->lexchar) {
		case '(': jsY_next(J); return '(';
		case ')': jsY_next(J); return ')';
		case ',': jsY_next(J); return ',';
		case ':': jsY_next(J); return ':';
		case ';': jsY_next(J); return ';';
		case '?': jsY_next(J); return '?';
		case '[': jsY_next(J); return '[';
		case ']': jsY_next(J); return ']';
		case '{': jsY_next(J); return '{';
		case '}': jsY_next(J); return '}';
		case '~': jsY_next(J); return '~';

		case '\'':
		case '"':
			return lexstring(J);

		case '.':
			return lexnumber(J);

		case '<':
			jsY_next(J);
			if (jsY_accept(J, '<')) {
				if (jsY_accept(J, '='))
					return TK_SHL_ASS;
				return TK_SHL;
			}
			if (jsY_accept(J, '='))
				return TK_LE;
			return '<';

		case '>':
			jsY_next(J);
			if (jsY_accept(J, '>')) {
				if (jsY_accept(J, '>')) {
					if (jsY_accept(J, '='))
						return TK_USHR_ASS;
					return TK_USHR;
				}
				if (jsY_accept(J, '='))
					return TK_SHR_ASS;
				return TK_SHR;
			}
			if (jsY_accept(J, '='))
				return TK_GE;
			return '>';

		case '=':
			jsY_next(J);
			if (jsY_accept(J, '=')) {
				if (jsY_accept(J, '='))
					return TK_STRICTEQ;
				return TK_EQ;
			}
			return '=';

		case '!':
			jsY_next(J);
			if (jsY_accept(J, '=')) {
				if (jsY_accept(J, '='))
					return TK_STRICTNE;
				return TK_NE;
			}
			return '!';

		case '+':
			jsY_next(J);
			if (jsY_accept(J, '+'))
				return TK_INC;
			if (jsY_accept(J, '='))
				return TK_ADD_ASS;
			return '+';

		case '-':
			jsY_next(J);
			if (jsY_accept(J, '-'))
				return TK_DEC;
			if (jsY_accept(J, '='))
				return TK_SUB_ASS;
			return '-';

		case '*':
			jsY_next(J);
			if (jsY_accept(J, '='))
				return TK_MUL_ASS;
			return '*';

		case '%':
			jsY_next(J);
			if (jsY_accept(J, '='))
				return TK_MOD_ASS;
			return '%';

		case '&':
			jsY_next(J);
			if (jsY_accept(J, '&'))
				return TK_AND;
			if (jsY_accept(J, '='))
				return TK_AND_ASS;
			return '&';

		case '|':
			jsY_next(J);
			if (jsY_accept(J, '|'))
				return TK_OR;
			if (jsY_accept(J, '='))
				return TK_OR_ASS;
			return '|';

		case '^':
			jsY_next(J);
			if (jsY_accept(J, '='))
				return TK_XOR_ASS;
			return '^';

		case 0:
			return 0; /* EOF */
		}

		/* Handle \uXXXX escapes in identifiers */
		jsY_unescape(J);
		if (jsY_isidentifierstart(J->lexchar)) {
			textinit(J);
			textpush(J, J->lexchar);

			jsY_next(J);
			jsY_unescape(J);
			while (jsY_isidentifierpart(J->lexchar)) {
				textpush(J, J->lexchar);
				jsY_next(J);
				jsY_unescape(J);
			}

			textend(J);

			return jsY_findkeyword(J, J->lexbuf.text);
		}

		if (J->lexchar >= 0x20 && J->lexchar <= 0x7E)
			jsY_error(J, "unexpected character: '%c'", J->lexchar);
		jsY_error(J, "unexpected character: \\u%04X", J->lexchar);
	}
}
Exemple #2
0
/*
 * The lexical analyzer.
 */
int
yylex()
{
	wint_t c, c1;
	int i;
	static int savetoken = 0;
	static int wasfield;
	static int isfuncdef;
	static int nbrace, nparen, nbracket;
	static struct ctosymstruct {
		wint_t c, sym;
	} ctosym[] = {
		{ '|', BAR },		{ '^', CARAT },
		{ '~', TILDE },		{ '<', LANGLE },
		{ '>', RANGLE },	{ '+', PLUSC },
		{ '-', HYPHEN },	{ '*', STAR },
		{ '/', SLASH },		{ '%', PERCENT },
		{ '!', EXCLAMATION },	{ '$', DOLLAR },
		{ '[', LSQUARE },	{ ']', RSQUARE },
		{ '(', LPAREN },	{ ')', RPAREN },
		{ ';', SEMI },		{ '{', LBRACE },
		{ '}', RBRACE },	{   0, 0 }
	};

	if (savetoken) {
		c = savetoken;
		savetoken = 0;
	} else if (redelim != '\0') {
		c = redelim;
		redelim = 0;
		catterm = 0;
		savetoken = c;
		c = lexlast = lexregexp(c);
		goto out;
	} else while ((c = lexgetc()) != WEOF) {
		if (iswalpha(c) || c == '_') {
			c = lexid(c);
		} else if (iswdigit(c) || c == '.') {
			c = lexnumber(c);
		} else if (isWblank(c)) {
			continue;
		} else switch (c) {
#if DOS || OS2
		case 032:		/* ^Z */
			continue;
#endif

		case '"':
			c = lexstring(c);
			break;

		case '#':
			while ((c = lexgetc()) != '\n' && c != WEOF)
				;
			lexungetc(c);
			continue;

		case '+':
			if ((c1 = lexgetc()) == '+')
				c = INC;
			else if (c1 == '=')
				c = AADD;
			else
				lexungetc(c1);
			break;

		case '-':
			if ((c1 = lexgetc()) == '-')
				c = DEC;
			else if (c1 == '=')
				c = ASUB;
			else
				lexungetc(c1);
			break;

		case '*':
			if ((c1 = lexgetc()) == '=')
				c = AMUL;
			else if (c1 == '*') {
				if ((c1 = lexgetc()) == '=')
					c = AEXP;
				else {
					c = EXP;
					lexungetc(c1);
				}
			} else
				lexungetc(c1);
			break;

		case '^':
			if ((c1 = lexgetc()) == '=') {
				c = AEXP;
			} else {
				c = EXP;
				lexungetc(c1);
			}
			break;

		case '/':
			if ((c1 = lexgetc()) == '=' &&
			    lexlast != RE && lexlast != NRE &&
			    lexlast != ';' && lexlast != '\n' &&
			    lexlast != ',' && lexlast != '(')
				c = ADIV;
			else
				lexungetc(c1);
			break;

		case '%':
			if ((c1 = lexgetc()) == '=')
				c = AREM;
			else
				lexungetc(c1);
			break;

		case '&':
			if ((c1 = lexgetc()) == '&')
				c = AND;
			else
				lexungetc(c1);
			break;

		case '|':
			if ((c1 = lexgetc()) == '|')
				c = OR;
			else {
				lexungetc(c1);
				if (inprint)
					c = PIPE;
			}
			break;

		case '>':
			if ((c1 = lexgetc()) == '=')
				c = GE;
			else if (c1 == '>')
				c = APPEND;
			else {
				lexungetc(c1);
				if (nparen == 0 && inprint)
					c = WRITE;
			}
			break;

		case '<':
			if ((c1 = lexgetc()) == '=')
				c = LE;
			else
				lexungetc(c1);
			break;

		case '!':
			if ((c1 = lexgetc()) == '=')
				c = NE;
			else if (c1 == '~')
				c = NRE;
			else
				lexungetc(c1);
			break;

		case '=':
			if ((c1 = lexgetc()) == '=')
				c = EQ;
			else {
				lexungetc(c1);
				c = ASG;
			}
			break;

		case '\n':
			switch (lexlast) {
			case ')':
				if (catterm || inprint) {
					c = ';';
					break;
				}
			/* FALLTHROUGH */
			case AND:
			case OR:
			case COMMA:
			case '{':
			case ELSE:
			case ';':
			case DO:
				continue;

			case '}':
				if (nbrace != 0)
					continue;
				/* FALLTHROUGH */

			default:
				c = ';';
				break;
			}
			break;

		case ELSE:
			if (lexlast != ';') {
				savetoken = ELSE;
				c = ';';
			}
			break;

		case '(':
			++nparen;
			break;

		case ')':
			if (--nparen < 0)
				awkerr(unbal, "()");
			break;

		case '{':
			nbrace++;
			break;

		case '}':
			if (--nbrace < 0) {
				char brk[3];

				brk[0] = '{';
				brk[1] = '}';
				brk[2] = '\0';
				awkerr(unbal, brk);
			}
			if (lexlast != ';') {
				savetoken = c;
				c = ';';
			}
			break;

		case '[':
			++nbracket;
			break;

		case ']':
			if (--nbracket < 0) {
				char brk[3];

				brk[0] = '[';
				brk[1] = ']';
				brk[2] = '\0';
				awkerr(unbal, brk);
			}
			break;

		case '\\':
			if ((c1 = lexgetc()) == '\n')
				continue;
			lexungetc(c1);
			break;

		case ',':
			c = COMMA;
			break;

		case '?':
			c = QUEST;
			break;

		case ':':
			c = COLON;
			break;

		default:
			if (!iswprint(c))
				awkerr(
				    gettext("invalid character \"%s\""),
				    toprint(c));
			break;
		}
		break;
	}

	switch (c) {
	case ']':
		++catterm;
		break;

	case VAR:
		if (catterm) {
			savetoken = c;
			c = CONCAT;
			catterm = 0;
		} else if (!isfuncdef) {
			if ((c1 = lexgetc()) != '(')
				++catterm;
			lexungetc(c1);
		}
		isfuncdef = 0;
		break;

	case PARM:
	case CONSTANT:
		if (catterm) {
			savetoken = c;
			c = CONCAT;
			catterm = 0;
		} else {
			if (lexlast == '$')
				wasfield = 2;
			++catterm;
		}
		break;

	case INC:
	case DEC:
		if (!catterm || lexlast != CONSTANT || wasfield)
			break;

	/* FALLTHROUGH */
	case UFUNC:
	case FUNC:
	case GETLINE:
	case '!':
	case '$':
	case '(':
		if (catterm) {
			savetoken = c;
			c = CONCAT;
			catterm = 0;
		}
		break;

	case '}':
		if (nbrace == 0)
			savetoken = ';';
	/* FALLTHROUGH */
	case ';':
		inprint = 0;
	/* FALLTHROUGH */
	default:
		if (c == DEFFUNC)
			isfuncdef = 1;
		catterm = 0;
	}
	lexlast = c;
	if (wasfield)
		wasfield--;
	/*
	 * Map character constants to symbolic names.
	 */
	for (i = 0; ctosym[i].c != 0; i++)
		if (c == ctosym[i].c) {
			c = ctosym[i].sym;
			break;
		}
out:
#ifdef DEBUG
	if (dflag)
		(void) printf("%d\n", (int)c);
#endif
	return ((int)c);
}