Exemple #1
0
int jsY_lexjson(js_State *J)
{
	while (1) {
		J->lexline = J->line; /* save location of beginning of token */

		while (jsY_iswhite(J->lexchar) || J->lexchar == '\n')
			jsY_next(J);

		if (J->lexchar >= '0' && J->lexchar <= '9') {
			return lexnumber(J);
		}

		switch (J->lexchar) {
		case ',': jsY_next(J); return ',';
		case ':': jsY_next(J); return ':';
		case '[': jsY_next(J); return '[';
		case ']': jsY_next(J); return ']';
		case '{': jsY_next(J); return '{';
		case '}': jsY_next(J); return '}';

		case '"':
			return lexstring(J);

		case '.':
			return lexnumber(J);

		case 'f':
			jsY_next(J); jsY_expect(J, 'a'); jsY_expect(J, 'l'); jsY_expect(J, 's'); jsY_expect(J, 'e');
			return TK_FALSE;

		case 'n':
			jsY_next(J); jsY_expect(J, 'u'); jsY_expect(J, 'l'); jsY_expect(J, 'l');
			return TK_NULL;

		case 't':
			jsY_next(J); jsY_expect(J, 'r'); jsY_expect(J, 'u'); jsY_expect(J, 'e');
			return TK_TRUE;

		case 0:
			return 0; /* EOF */
		}

		if (J->lexchar >= 0x20 && J->lexchar <= 0x7E)
			jsY_error(J, "unexpected character: '%c'", J->lexchar);
		jsY_error(J, "unexpected character: \\u%04X", J->lexchar);
	}
}
Exemple #2
0
static int jsY_lexx(js_State *J)
{
	J->newline = 0;

	while (1) {
		J->lexline = J->line; /* save location of beginning of token */

		while (jsY_iswhite(J->lexchar))
			jsY_next(J);

		if (jsY_accept(J, '\n')) {
			J->newline = 1;
			if (isnlthcontext(J->lasttoken))
				return ';';
			continue;
		}

		if (jsY_accept(J, '/')) {
			if (jsY_accept(J, '/')) {
				lexlinecomment(J);
				continue;
			} else if (jsY_accept(J, '*')) {
				if (lexcomment(J))
					jsY_error(J, "multi-line comment not terminated");
				continue;
			} else if (isregexpcontext(J->lasttoken)) {
				return lexregexp(J);
			} else if (jsY_accept(J, '=')) {
				return TK_DIV_ASS;
			} else {
				return '/';
			}
		}

		if (J->lexchar >= '0' && J->lexchar <= '9') {
			return lexnumber(J);
		}

		switch (J->lexchar) {
		case '(': jsY_next(J); return '(';
		case ')': jsY_next(J); return ')';
		case ',': jsY_next(J); return ',';
		case ':': jsY_next(J); return ':';
		case ';': jsY_next(J); return ';';
		case '?': jsY_next(J); return '?';
		case '[': jsY_next(J); return '[';
		case ']': jsY_next(J); return ']';
		case '{': jsY_next(J); return '{';
		case '}': jsY_next(J); return '}';
		case '~': jsY_next(J); return '~';

		case '\'':
		case '"':
			return lexstring(J);

		case '.':
			return lexnumber(J);

		case '<':
			jsY_next(J);
			if (jsY_accept(J, '<')) {
				if (jsY_accept(J, '='))
					return TK_SHL_ASS;
				return TK_SHL;
			}
			if (jsY_accept(J, '='))
				return TK_LE;
			return '<';

		case '>':
			jsY_next(J);
			if (jsY_accept(J, '>')) {
				if (jsY_accept(J, '>')) {
					if (jsY_accept(J, '='))
						return TK_USHR_ASS;
					return TK_USHR;
				}
				if (jsY_accept(J, '='))
					return TK_SHR_ASS;
				return TK_SHR;
			}
			if (jsY_accept(J, '='))
				return TK_GE;
			return '>';

		case '=':
			jsY_next(J);
			if (jsY_accept(J, '=')) {
				if (jsY_accept(J, '='))
					return TK_STRICTEQ;
				return TK_EQ;
			}
			return '=';

		case '!':
			jsY_next(J);
			if (jsY_accept(J, '=')) {
				if (jsY_accept(J, '='))
					return TK_STRICTNE;
				return TK_NE;
			}
			return '!';

		case '+':
			jsY_next(J);
			if (jsY_accept(J, '+'))
				return TK_INC;
			if (jsY_accept(J, '='))
				return TK_ADD_ASS;
			return '+';

		case '-':
			jsY_next(J);
			if (jsY_accept(J, '-'))
				return TK_DEC;
			if (jsY_accept(J, '='))
				return TK_SUB_ASS;
			return '-';

		case '*':
			jsY_next(J);
			if (jsY_accept(J, '='))
				return TK_MUL_ASS;
			return '*';

		case '%':
			jsY_next(J);
			if (jsY_accept(J, '='))
				return TK_MOD_ASS;
			return '%';

		case '&':
			jsY_next(J);
			if (jsY_accept(J, '&'))
				return TK_AND;
			if (jsY_accept(J, '='))
				return TK_AND_ASS;
			return '&';

		case '|':
			jsY_next(J);
			if (jsY_accept(J, '|'))
				return TK_OR;
			if (jsY_accept(J, '='))
				return TK_OR_ASS;
			return '|';

		case '^':
			jsY_next(J);
			if (jsY_accept(J, '='))
				return TK_XOR_ASS;
			return '^';

		case 0:
			return 0; /* EOF */
		}

		/* Handle \uXXXX escapes in identifiers */
		jsY_unescape(J);
		if (jsY_isidentifierstart(J->lexchar)) {
			textinit(J);
			textpush(J, J->lexchar);

			jsY_next(J);
			jsY_unescape(J);
			while (jsY_isidentifierpart(J->lexchar)) {
				textpush(J, J->lexchar);
				jsY_next(J);
				jsY_unescape(J);
			}

			textend(J);

			return jsY_findkeyword(J, J->lexbuf.text);
		}

		if (J->lexchar >= 0x20 && J->lexchar <= 0x7E)
			jsY_error(J, "unexpected character: '%c'", J->lexchar);
		jsY_error(J, "unexpected character: \\u%04X", J->lexchar);
	}
}
Exemple #3
0
/*
 * The lexical analyzer.
 */
int
yylex()
{
	wint_t c, c1;
	int i;
	static int savetoken = 0;
	static int wasfield;
	static int isfuncdef;
	static int nbrace, nparen, nbracket;
	static struct ctosymstruct {
		wint_t c, sym;
	} ctosym[] = {
		{ '|', BAR },		{ '^', CARAT },
		{ '~', TILDE },		{ '<', LANGLE },
		{ '>', RANGLE },	{ '+', PLUSC },
		{ '-', HYPHEN },	{ '*', STAR },
		{ '/', SLASH },		{ '%', PERCENT },
		{ '!', EXCLAMATION },	{ '$', DOLLAR },
		{ '[', LSQUARE },	{ ']', RSQUARE },
		{ '(', LPAREN },	{ ')', RPAREN },
		{ ';', SEMI },		{ '{', LBRACE },
		{ '}', RBRACE },	{   0, 0 }
	};

	if (savetoken) {
		c = savetoken;
		savetoken = 0;
	} else if (redelim != '\0') {
		c = redelim;
		redelim = 0;
		catterm = 0;
		savetoken = c;
		c = lexlast = lexregexp(c);
		goto out;
	} else while ((c = lexgetc()) != WEOF) {
		if (iswalpha(c) || c == '_') {
			c = lexid(c);
		} else if (iswdigit(c) || c == '.') {
			c = lexnumber(c);
		} else if (isWblank(c)) {
			continue;
		} else switch (c) {
#if DOS || OS2
		case 032:		/* ^Z */
			continue;
#endif

		case '"':
			c = lexstring(c);
			break;

		case '#':
			while ((c = lexgetc()) != '\n' && c != WEOF)
				;
			lexungetc(c);
			continue;

		case '+':
			if ((c1 = lexgetc()) == '+')
				c = INC;
			else if (c1 == '=')
				c = AADD;
			else
				lexungetc(c1);
			break;

		case '-':
			if ((c1 = lexgetc()) == '-')
				c = DEC;
			else if (c1 == '=')
				c = ASUB;
			else
				lexungetc(c1);
			break;

		case '*':
			if ((c1 = lexgetc()) == '=')
				c = AMUL;
			else if (c1 == '*') {
				if ((c1 = lexgetc()) == '=')
					c = AEXP;
				else {
					c = EXP;
					lexungetc(c1);
				}
			} else
				lexungetc(c1);
			break;

		case '^':
			if ((c1 = lexgetc()) == '=') {
				c = AEXP;
			} else {
				c = EXP;
				lexungetc(c1);
			}
			break;

		case '/':
			if ((c1 = lexgetc()) == '=' &&
			    lexlast != RE && lexlast != NRE &&
			    lexlast != ';' && lexlast != '\n' &&
			    lexlast != ',' && lexlast != '(')
				c = ADIV;
			else
				lexungetc(c1);
			break;

		case '%':
			if ((c1 = lexgetc()) == '=')
				c = AREM;
			else
				lexungetc(c1);
			break;

		case '&':
			if ((c1 = lexgetc()) == '&')
				c = AND;
			else
				lexungetc(c1);
			break;

		case '|':
			if ((c1 = lexgetc()) == '|')
				c = OR;
			else {
				lexungetc(c1);
				if (inprint)
					c = PIPE;
			}
			break;

		case '>':
			if ((c1 = lexgetc()) == '=')
				c = GE;
			else if (c1 == '>')
				c = APPEND;
			else {
				lexungetc(c1);
				if (nparen == 0 && inprint)
					c = WRITE;
			}
			break;

		case '<':
			if ((c1 = lexgetc()) == '=')
				c = LE;
			else
				lexungetc(c1);
			break;

		case '!':
			if ((c1 = lexgetc()) == '=')
				c = NE;
			else if (c1 == '~')
				c = NRE;
			else
				lexungetc(c1);
			break;

		case '=':
			if ((c1 = lexgetc()) == '=')
				c = EQ;
			else {
				lexungetc(c1);
				c = ASG;
			}
			break;

		case '\n':
			switch (lexlast) {
			case ')':
				if (catterm || inprint) {
					c = ';';
					break;
				}
			/* FALLTHROUGH */
			case AND:
			case OR:
			case COMMA:
			case '{':
			case ELSE:
			case ';':
			case DO:
				continue;

			case '}':
				if (nbrace != 0)
					continue;
				/* FALLTHROUGH */

			default:
				c = ';';
				break;
			}
			break;

		case ELSE:
			if (lexlast != ';') {
				savetoken = ELSE;
				c = ';';
			}
			break;

		case '(':
			++nparen;
			break;

		case ')':
			if (--nparen < 0)
				awkerr(unbal, "()");
			break;

		case '{':
			nbrace++;
			break;

		case '}':
			if (--nbrace < 0) {
				char brk[3];

				brk[0] = '{';
				brk[1] = '}';
				brk[2] = '\0';
				awkerr(unbal, brk);
			}
			if (lexlast != ';') {
				savetoken = c;
				c = ';';
			}
			break;

		case '[':
			++nbracket;
			break;

		case ']':
			if (--nbracket < 0) {
				char brk[3];

				brk[0] = '[';
				brk[1] = ']';
				brk[2] = '\0';
				awkerr(unbal, brk);
			}
			break;

		case '\\':
			if ((c1 = lexgetc()) == '\n')
				continue;
			lexungetc(c1);
			break;

		case ',':
			c = COMMA;
			break;

		case '?':
			c = QUEST;
			break;

		case ':':
			c = COLON;
			break;

		default:
			if (!iswprint(c))
				awkerr(
				    gettext("invalid character \"%s\""),
				    toprint(c));
			break;
		}
		break;
	}

	switch (c) {
	case ']':
		++catterm;
		break;

	case VAR:
		if (catterm) {
			savetoken = c;
			c = CONCAT;
			catterm = 0;
		} else if (!isfuncdef) {
			if ((c1 = lexgetc()) != '(')
				++catterm;
			lexungetc(c1);
		}
		isfuncdef = 0;
		break;

	case PARM:
	case CONSTANT:
		if (catterm) {
			savetoken = c;
			c = CONCAT;
			catterm = 0;
		} else {
			if (lexlast == '$')
				wasfield = 2;
			++catterm;
		}
		break;

	case INC:
	case DEC:
		if (!catterm || lexlast != CONSTANT || wasfield)
			break;

	/* FALLTHROUGH */
	case UFUNC:
	case FUNC:
	case GETLINE:
	case '!':
	case '$':
	case '(':
		if (catterm) {
			savetoken = c;
			c = CONCAT;
			catterm = 0;
		}
		break;

	case '}':
		if (nbrace == 0)
			savetoken = ';';
	/* FALLTHROUGH */
	case ';':
		inprint = 0;
	/* FALLTHROUGH */
	default:
		if (c == DEFFUNC)
			isfuncdef = 1;
		catterm = 0;
	}
	lexlast = c;
	if (wasfield)
		wasfield--;
	/*
	 * Map character constants to symbolic names.
	 */
	for (i = 0; ctosym[i].c != 0; i++)
		if (c == ctosym[i].c) {
			c = ctosym[i].sym;
			break;
		}
out:
#ifdef DEBUG
	if (dflag)
		(void) printf("%d\n", (int)c);
#endif
	return ((int)c);
}
Exemple #4
0
fz_error
pdf_lex(int *tok, fz_stream *f, char *buf, int n, int *sl)
{
    while (1)
    {
        int c = fz_readbyte(f);
        switch (c)
        {
        case EOF:
            *tok = PDF_TEOF;
            return fz_okay;
        case ISWHITE:
            lexwhite(f);
            break;
        case '%':
            lexcomment(f);
            break;
        case '/':
            lexname(f, buf, n);
            *sl = strlen(buf);
            *tok = PDF_TNAME;
            return fz_okay;
        case '(':
            *sl = lexstring(f, buf, n);
            *tok = PDF_TSTRING;
            return fz_okay;
        case ')':
            *tok = PDF_TERROR;
            goto cleanuperror;
        case '<':
            c = fz_readbyte(f);
            if (c == '<')
            {
                *tok = PDF_TODICT;
            }
            else
            {
                fz_unreadbyte(f);
                *sl = lexhexstring(f, buf, n);
                *tok = PDF_TSTRING;
            }
            return fz_okay;
        case '>':
            c = fz_readbyte(f);
            if (c == '>')
            {
                *tok = PDF_TCDICT;
                return fz_okay;
            }
            *tok = PDF_TERROR;
            goto cleanuperror;
        case '[':
            *tok = PDF_TOARRAY;
            return fz_okay;
        case ']':
            *tok = PDF_TCARRAY;
            return fz_okay;
        case '{':
            *tok = PDF_TOBRACE;
            return fz_okay;
        case '}':
            *tok = PDF_TCBRACE;
            return fz_okay;
        case ISNUMBER:
            fz_unreadbyte(f);
            *sl = lexnumber(f, buf, n, tok);
            return fz_okay;
        default: /* isregular: !isdelim && !iswhite && c != EOF */
            fz_unreadbyte(f);
            lexname(f, buf, n);
            *sl = strlen(buf);
            *tok = pdf_tokenfromkeyword(buf);
            return fz_okay;
        }
    }

cleanuperror:
    *tok = PDF_TERROR;
    return fz_throw("lexical error");
}
Exemple #5
0
token_t lexnext(secd_parser_t *p) {
    /* skip spaces */
    while (isspace(p->lc))
        nextchar(p);

    switch (p->lc) {
      case EOF: return (p->token = TOK_EOF);
      case ';':
        /* consume comment */
        do nextchar(p); while (p->lc != '\n');
        return lexnext(p);

      case '(': case ')':
        p->token = p->lc;
        nextchar(p);
        return p->token;

      case '#':
        /* one-char tokens */
        p->token = p->lc;
        nextchar(p);
        switch (p->lc) {
            case 'f': case 't':
                p->symtok[0] = '#';
                p->symtok[1] = p->lc;
                p->symtok[2] = '\0';
                nextchar(p);
                return (p->token = TOK_SYM);
            case ';':
                nextchar(p);
                free_cell(p->secd, read_token(p->secd, p));
                return lexnext(p);
            case '|':
                lex_mltln_comment(p);
                return lexnext(p);
            case '!':
                do {
                    nextchar(p);
                } while (p->lc != '\n');
                return lexnext(p);
            /* chars */
            case '\\': nextchar(p); return lexchar(p);
            /* numbers */
            case 'x': case 'X': nextchar(p); return lexnumber(p, 16);
            case 'o': case 'O': nextchar(p); return lexnumber(p, 8);
            case 'b': case 'B': nextchar(p); return lexnumber(p, 2);
            case 'd': case 'D': nextchar(p); return lexnumber(p, 10);
        }
        return p->token;
      case '\'':
        nextchar(p);
        return (p->token = TOK_QUOTE);
      case '`':
        nextchar(p);
        return (p->token = TOK_QQ);
      case ',':
        /* may be ',' or ',@' */
        nextchar(p);
        if (p->lc == '@') {
            nextchar(p);
            return (p->token = TOK_UQSPL);
        }
        return (p->token = TOK_UQ);
      case '"':
        return lexstring(p);
    }

    if (isdigit(p->lc))
        return lexnumber(p, 10);

    if (p->issymbc[(unsigned char)p->lc])
        return lexsymbol(p);

    return TOK_ERR; /* nothing fits */
}
int
pdf_lex(fz_stream *f, unsigned char *buf, int n, int *sl)
{
	int c;

	while (1)
	{
		c = fz_peekbyte(f);

		if (c == EOF)
			return PDF_TEOF;

		else if (iswhite(c))
			lexwhite(f);

		else if (c == '%')
			lexcomment(f);


		else if (c == '/')
		{
			fz_readbyte(f);
			lexname(f, buf, n);
			*sl = strlen(buf);
			return PDF_TNAME;
		}

		else if (c == '(')
		{
			fz_readbyte(f);
			*sl = lexstring(f, buf, n);
			return PDF_TSTRING;
		}

		else if (c == '<')
		{
			fz_readbyte(f);
			c = fz_peekbyte(f);
			if (c == '<')
			{
				fz_readbyte(f);
				return PDF_TODICT;
			}
			else
			{
				*sl = lexhexstring(f, buf, n);
				return PDF_TSTRING;
			}
		}

		else if (c == '>')
		{
			fz_readbyte(f);
			c = fz_readbyte(f);
			if (c == '>')
				return PDF_TCDICT;
			return PDF_TERROR;
		}

		else if (c == '[')
		{
			fz_readbyte(f);
			return PDF_TOARRAY;
		}

		else if (c == ']')
		{
			fz_readbyte(f);
			return PDF_TCARRAY;
		}

		else if (c == '{')
		{
			fz_readbyte(f);
			return PDF_TOBRACE;
		}

		else if (c ==  '}')
		{
			fz_readbyte(f);
			return PDF_TCBRACE;
		}

		else if (isnumber(c))
		{
			lexnumber(f, buf, n);
			*sl = strlen(buf);
			if (strchr(buf, '.'))
				return PDF_TREAL;
			return PDF_TINT;
		}

		else if (isregular(c))
		{
			lexname(f, buf, n);
			*sl = strlen(buf);
			return tokenfromkeyword(buf);
		}

		else
			return PDF_TERROR;
	}
}
Exemple #7
0
fz_error
pdf_lex(pdf_token_e *tok, fz_stream *f, char *buf, int n, int *sl)
{
	fz_error error;
	int c;

	while (1)
	{
		c = fz_peekbyte(f);

		if (c == EOF)
		{
			*tok = PDF_TEOF;
			goto cleanupokay;
		}

		else if (iswhite(c))
			lexwhite(f);

		else if (c == '%')
			lexcomment(f);

		else if (c == '/')
		{
			fz_readbyte(f);
			lexname(f, buf, n);
			*sl = strlen(buf);
			*tok = PDF_TNAME;
			goto cleanupokay;
		}

		else if (c == '(')
		{
			fz_readbyte(f);
			*sl = lexstring(f, buf, n);
			*tok = PDF_TSTRING;
			goto cleanupokay;
		}

		else if (c == '<')
		{
			fz_readbyte(f);
			c = fz_peekbyte(f);
			if (c == '<')
			{
				fz_readbyte(f);
				*tok = PDF_TODICT;
				goto cleanupokay;
			}
			else
			{
				*sl = lexhexstring(f, buf, n);
				*tok = PDF_TSTRING;
				goto cleanupokay;
			}
		}

		else if (c == '>')
		{
			fz_readbyte(f);
			c = fz_readbyte(f);
			if (c == '>')
			{
				*tok = PDF_TCDICT;
				goto cleanupokay;
			}
			*tok = PDF_TERROR;
			goto cleanuperror;
		}

		else if (c == '[')
		{
			fz_readbyte(f);
			*tok = PDF_TOARRAY;
			goto cleanupokay;
		}

		else if (c == ']')
		{
			fz_readbyte(f);
			*tok = PDF_TCARRAY;
			goto cleanupokay;
		}

		else if (c == '{')
		{
			fz_readbyte(f);
			*tok = PDF_TOBRACE;
			goto cleanupokay;
		}

		else if (c == '}')
		{
			fz_readbyte(f);
			*tok = PDF_TCBRACE;
			goto cleanupokay;
		}

		else if (isnumber(c))
		{
			lexnumber(f, buf, n);
			*sl = strlen(buf);
			if (strchr(buf, '.'))
			{
				*tok = PDF_TREAL;
				goto cleanupokay;
			}
			*tok = PDF_TINT;
			goto cleanupokay;
		}

		else if (isregular(c))
		{
			lexname(f, buf, n);
			*sl = strlen(buf);
			*tok = pdf_tokenfromkeyword(buf);
			goto cleanupokay;
		}

		else
		{
			*tok = PDF_TERROR;
			goto cleanuperror;
		}
	}

cleanupokay:
	error = fz_readerror(f);
	if (error)
	{
		*tok = PDF_TERROR;
		return fz_rethrow(error, "cannot read token");
	}
	return fz_okay;

cleanuperror:
	error = fz_readerror(f);
	if (error)
	{
		*tok = PDF_TERROR;
		return fz_rethrow(error, "cannot read token");
	}
	*tok = PDF_TERROR;
	return fz_throw("lexical error");
}