Esempio n. 1
0
int
pdf_lex(fz_stream *f, pdf_lexbuf *buf)
{
	while (1)
	{
		int c = fz_read_byte(f);
		switch (c)
		{
		case EOF:
			return PDF_TOK_EOF;
		case IS_WHITE:
			lex_white(f);
			break;
		case '%':
			lex_comment(f);
			break;
		case '/':
			lex_name(f, buf);
			return PDF_TOK_NAME;
		case '(':
			return lex_string(f, buf);
		case ')':
			fz_warn(f->ctx, "lexical error (unexpected ')')");
			continue;
		case '<':
			c = fz_read_byte(f);
			if (c == '<')
			{
				return PDF_TOK_OPEN_DICT;
			}
			else
			{
				fz_unread_byte(f);
				return lex_hex_string(f, buf);
			}
		case '>':
			c = fz_read_byte(f);
			if (c == '>')
			{
				return PDF_TOK_CLOSE_DICT;
			}
			fz_warn(f->ctx, "lexical error (unexpected '>')");
			continue;
		case '[':
			return PDF_TOK_OPEN_ARRAY;
		case ']':
			return PDF_TOK_CLOSE_ARRAY;
		case '{':
			return PDF_TOK_OPEN_BRACE;
		case '}':
			return PDF_TOK_CLOSE_BRACE;
		case IS_NUMBER:
			return lex_number(f, buf, c);
		default: /* isregular: !isdelim && !iswhite && c != EOF */
			fz_unread_byte(f);
			lex_name(f, buf);
			return pdf_token_from_keyword(buf->scratch);
		}
	}
}
Esempio n. 2
0
pdf_token
pdf_lex(fz_stream *f, pdf_lexbuf *buf)
{
	while (1)
	{
		int c = fz_read_byte(f);
		switch (c)
		{
		case EOF:
			return PDF_TOK_EOF;
		case IS_WHITE:
			lex_white(f);
			break;
		case '%':
			lex_comment(f);
			break;
		case '/':
			lex_name(f, buf);
			return PDF_TOK_NAME;
		case '(':
			return lex_string(f, buf);
		case ')':
			fz_warn(f->ctx, "lexical error (unexpected ')')");
			continue;
		case '<':
			c = fz_read_byte(f);
			if (c == '<')
			{
				return PDF_TOK_OPEN_DICT;
			}
			else
			{
				fz_unread_byte(f);
				return lex_hex_string(f, buf);
			}
		case '>':
			c = fz_read_byte(f);
			if (c == '>')
			{
				return PDF_TOK_CLOSE_DICT;
			}
			fz_warn(f->ctx, "lexical error (unexpected '>')");
			if (c == EOF)
			{
				return PDF_TOK_EOF;
			}
			fz_unread_byte(f);
			continue;
		case '[':
			return PDF_TOK_OPEN_ARRAY;
		case ']':
			return PDF_TOK_CLOSE_ARRAY;
		case '{':
			return PDF_TOK_OPEN_BRACE;
		case '}':
			return PDF_TOK_CLOSE_BRACE;
		case IS_NUMBER:
			/* cf. https://code.google.com/p/sumatrapdf/issues/detail?id=2231 */
			{
				int tok = lex_number(f, buf, c);
				while (1)
				{
					c = fz_peek_byte(f);
					switch (c)
					{
					case IS_NUMBER:
						fz_warn(f->ctx, "ignoring invalid character after number: '%c'", c);
						fz_read_byte(f);
						continue;
					default:
						return tok;
					}
				}
			}
		default: /* isregular: !isdelim && !iswhite && c != EOF */
			fz_unread_byte(f);
			lex_name(f, buf);
			return pdf_token_from_keyword(buf->scratch);
		}
	}
}
Esempio n. 3
0
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
 *
 *   LEXICAL SCANNER
 */
LOCAL te_dp_event
yylex(void)
{
    lastToken = DP_EV_INVALID;

scanAgain:
    /*
     *  Start the process of locating a token.
     *  We branch here after skipping over a comment
     *  or processing a directive (which may change our context).
     */
    if (IS_WHITESPACE_CHAR(*pCurCtx->pzScan))
        trim_whitespace();

    switch (*pCurCtx->pzScan) {
    case NUL:
        /*
         *  IF we are not inside an include context,
         *  THEN go finish.
         */
        if (pCurCtx->pCtx == NULL)
            goto lex_done;

        pop_context();
        goto scanAgain;

    case '#':
    {
        extern char * processDirective(char*);
        char * pz = processDirective(pCurCtx->pzScan+1);
        /*
         *  Ensure that the compiler doesn't try to save a copy of
         *  "pCurCtx" in a register.  It must be reloaded from memory.
         */
        pCurCtx->pzScan = pz;
        goto scanAgain;
    }

    case '{': SET_LIT_TKN(O_BRACE);   break;
    case '=': SET_LIT_TKN(EQ);        break;
    case '}': SET_LIT_TKN(C_BRACE);   break;
    case '[': SET_LIT_TKN(OPEN_BKT);  break;
    case ']': SET_LIT_TKN(CLOSE_BKT); break;
    case ';': SET_LIT_TKN(SEMI);      break;
    case ',': SET_LIT_TKN(COMMA);     break;

    case '\'':
    case '"':
    {
        char* pz = ao_string_cook(pCurCtx->pzScan, &(pCurCtx->lineNo));
        if (pz == NULL)
            goto NUL_error;

        pz_token = pCurCtx->pzScan;

        lastToken = DP_EV_STRING;
        pCurCtx->pzScan = pz;
        break;
    }

    case '<':
        switch (lex_here_string()) {
        case SUCCESS: break;
        case FAILURE: goto BrokenToken;
        case PROBLEM: return DP_EV_INVALID;
        }
        break;

    case '(':
        loadScheme();
        break;

    case '\\':
        if (strncmp(pCurCtx->pzScan+1, "'(", (size_t)2) == 0) {
            alist_to_autogen_def();
            goto scanAgain;
        }
        lex_escaped_char();
        break;

    case '`':
        switch (lex_backquote()) {
        case FAILURE: goto NUL_error;
        case PROBLEM: goto scanAgain;
        case SUCCESS: break;
        }
        break;

    case '/':
        switch (lex_comment()) {
        case SUCCESS: goto scanAgain;
        default: break;
        }
        /* FALLTHROUGH */ /* to Invalid input char */

    default:
    BrokenToken:
        pCurCtx->pzScan = assembleName(pCurCtx->pzScan, &lastToken);
        break;
    }   /* switch (*pCurCtx->pzScan) */

    return lastToken;

NUL_error:

    AG_ABEND(aprf(zErrMsg, pzProg, "unterminated quote in definition",
                  pCurCtx->pzCtxFname, pCurCtx->lineNo));
    return DP_EV_INVALID;

lex_done:
    /*
     *  First time through, return the DP_EV_END token.
     *  Second time through, we really finish.
     */
    if (pCurCtx->pzScan == zNil) {
        pCurCtx->pCtx = pDoneCtx;
        pDoneCtx      = pCurCtx;

        return DP_EV_INVALID;
    }

    pCurCtx->pzScan = (char*)zNil;
    return DP_EV_END;
}
Esempio n. 4
0
fz_error
pdf_lex(int *tok, fz_stream *f, char *buf, int n, int *sl)
{
	while (1)
	{
		int c = fz_read_byte(f);
		switch (c)
		{
		case EOF:
			*tok = PDF_TOK_EOF;
			return fz_okay;
		case IS_WHITE:
			lex_white(f);
			break;
		case '%':
			lex_comment(f);
			break;
		case '/':
			lex_name(f, buf, n);
			*sl = strlen(buf);
			*tok = PDF_TOK_NAME;
			return fz_okay;
		case '(':
			*sl = lex_string(f, buf, n);
			*tok = PDF_TOK_STRING;
			return fz_okay;
		case ')':
			*tok = PDF_TOK_ERROR;
			goto cleanuperror;
		case '<':
			c = fz_read_byte(f);
			if (c == '<')
			{
				*tok = PDF_TOK_OPEN_DICT;
			}
			else
			{
				fz_unread_byte(f);
				*sl = lex_hex_string(f, buf, n);
				*tok = PDF_TOK_STRING;
			}
			return fz_okay;
		case '>':
			c = fz_read_byte(f);
			if (c == '>')
			{
				*tok = PDF_TOK_CLOSE_DICT;
				return fz_okay;
			}
			*tok = PDF_TOK_ERROR;
			goto cleanuperror;
		case '[':
			*tok = PDF_TOK_OPEN_ARRAY;
			return fz_okay;
		case ']':
			*tok = PDF_TOK_CLOSE_ARRAY;
			return fz_okay;
		case '{':
			*tok = PDF_TOK_OPEN_BRACE;
			return fz_okay;
		case '}':
			*tok = PDF_TOK_CLOSE_BRACE;
			return fz_okay;
		case IS_NUMBER:
			fz_unread_byte(f);
			*sl = lex_number(f, buf, n, tok);
			return fz_okay;
		default: /* isregular: !isdelim && !iswhite && c != EOF */
			fz_unread_byte(f);
			lex_name(f, buf, n);
			*sl = strlen(buf);
			*tok = pdf_token_from_keyword(buf);
			return fz_okay;
		}
	}

cleanuperror:
	*tok = PDF_TOK_ERROR;
	return fz_throw("lexical error");
}