Beispiel #1
0
char *
PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
{
    struct tok_state *tok;
    FILE *fp;
    char *p_start =NULL , *p_end =NULL , *encoding = NULL;

#ifndef PGEN
    fd = _Py_dup(fd);
#else
    fd = dup(fd);
#endif
    if (fd < 0) {
        return NULL;
    }

    fp = fdopen(fd, "r");
    if (fp == NULL) {
        return NULL;
    }
    tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL);
    if (tok == NULL) {
        fclose(fp);
        return NULL;
    }
#ifndef PGEN
    if (filename != NULL) {
        Py_INCREF(filename);
        tok->filename = filename;
    }
    else {
        tok->filename = PyUnicode_FromString("<string>");
        if (tok->filename == NULL) {
            fclose(fp);
            PyTokenizer_Free(tok);
            return encoding;
        }
    }
#endif
    while (tok->lineno < 2 && tok->done == E_OK) {
        PyTokenizer_Get(tok, &p_start, &p_end);
    }
    fclose(fp);
    if (tok->encoding) {
        encoding = (char *)PyMem_MALLOC(strlen(tok->encoding) + 1);
        if (encoding)
        strcpy(encoding, tok->encoding);
    }
    PyTokenizer_Free(tok);
    return encoding;
}
Beispiel #2
0
static node *
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
         int *flags)
{
    parser_state *ps;
    node *n;
    int started = 0;

    if ((ps = PyParser_New(g, start)) == NULL) {
        err_ret->error = E_NOMEM;
        PyTokenizer_Free(tok);
        return NULL;
    }
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    if (*flags & PyPARSE_BARRY_AS_BDFL)
        ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
#endif

    for (;;) {
        char *a, *b;
        int type;
        size_t len;
        char *str;
        int col_offset;

        type = PyTokenizer_Get(tok, &a, &b);
        if (type == ERRORTOKEN) {
            err_ret->error = tok->done;
            break;
        }
        if (type == ENDMARKER && started) {
            type = NEWLINE; /* Add an extra newline */
            started = 0;
            /* Add the right number of dedent tokens,
               except if a certain flag is given --
               codeop.py uses this. */
            if (tok->indent &&
                !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
            {
                tok->pendin = -tok->indent;
                tok->indent = 0;
            }
        }
        else
            started = 1;
        len = b - a; /* XXX this may compute NULL - NULL */
        str = (char *) PyObject_MALLOC(len + 1);
        if (str == NULL) {
            err_ret->error = E_NOMEM;
            break;
        }
        if (len > 0)
            strncpy(str, a, len);
        str[len] = '\0';

#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
        if (type == NOTEQUAL) {
            if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
                            strcmp(str, "!=")) {
                PyObject_FREE(str);
                err_ret->error = E_SYNTAX;
                break;
            }
            else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
                            strcmp(str, "<>")) {
                PyObject_FREE(str);
                err_ret->text = "with Barry as BDFL, use '<>' "
                                "instead of '!='";
                err_ret->error = E_SYNTAX;
                break;
            }
        }
#endif
        if (a >= tok->line_start)
            col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
                                          Py_intptr_t, int);
        else
            col_offset = -1;

        if ((err_ret->error =
             PyParser_AddToken(ps, (int)type, str,
                               tok->lineno, col_offset,
                               &(err_ret->expected))) != E_OK) {
            if (err_ret->error != E_DONE) {
                PyObject_FREE(str);
                err_ret->token = type;
            }
            break;
        }
    }
static node *
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
         int *flags)
{
    parser_state *ps;
    node *n;
    int started = 0, handling_import = 0, handling_with = 0;

    if ((ps = PyParser_New(g, start)) == NULL) {
        fprintf(stderr, "no mem for new parser\n");
        err_ret->error = E_NOMEM;
        PyTokenizer_Free(tok);
        return NULL;
    }
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    if (*flags & PyPARSE_PRINT_IS_FUNCTION) {
        ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
    }
    if (*flags & PyPARSE_UNICODE_LITERALS) {
        ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
    }

#endif

    for (;;) {
        char *a, *b;
        int type;
        size_t len;
        char *str;
        int col_offset;

        type = PyTokenizer_Get(tok, &a, &b);
        if (type == ERRORTOKEN) {
            err_ret->error = tok->done;
            break;
        }
        if (type == ENDMARKER && started) {
            type = NEWLINE; /* Add an extra newline */
            handling_with = handling_import = 0;
            started = 0;
            /* Add the right number of dedent tokens,
               except if a certain flag is given --
               codeop.py uses this. */
            if (tok->indent &&
                !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
            {
                tok->pendin = -tok->indent;
                tok->indent = 0;
            }
        }
        else
            started = 1;
        len = b - a; /* XXX this may compute NULL - NULL */
        str = (char *) PyObject_MALLOC(len + 1);
        if (str == NULL) {
            fprintf(stderr, "no mem for next token\n");
            err_ret->error = E_NOMEM;
            break;
        }
        if (len > 0)
            strncpy(str, a, len);
        str[len] = '\0';

#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#endif
        if (a >= tok->line_start)
            col_offset = a - tok->line_start;
        else
            col_offset = -1;

        if ((err_ret->error =
             PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset,
                               &(err_ret->expected))) != E_OK) {
            if (err_ret->error != E_DONE) {
                PyObject_FREE(str);
                err_ret->token = type;
            }
            break;
        }
    }

    if (err_ret->error == E_DONE) {
        n = ps->p_tree;
        ps->p_tree = NULL;
    }
    else
        n = NULL;

#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    *flags = ps->p_flags;
#endif
    PyParser_Delete(ps);

    if (n == NULL) {
        if (tok->lineno <= 1 && tok->done == E_EOF)
            err_ret->error = E_EOF;
        err_ret->lineno = tok->lineno;
        if (tok->buf != NULL) {
            char *text = NULL;
            size_t len;
            assert(tok->cur - tok->buf < INT_MAX);
            err_ret->offset = (int)(tok->cur - tok->buf);
            len = tok->inp - tok->buf;
#ifdef Py_USING_UNICODE
            text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset);

#endif
            if (text == NULL) {
                text = (char *) PyObject_MALLOC(len + 1);
                if (text != NULL) {
                    if (len > 0)
                        strncpy(text, tok->buf, len);
                    text[len] = '\0';
                }
            }
            err_ret->text = text;
        }
    } else if (tok->encoding != NULL) {
        node* r = PyNode_New(encoding_decl);
        if (!r) {
            err_ret->error = E_NOMEM;
            n = NULL;
            goto done;
        }
        r->n_str = tok->encoding;
        r->n_nchildren = 1;
        r->n_child = n;
        tok->encoding = NULL;
        n = r;
    }

done:
    PyTokenizer_Free(tok);

    return n;
}
Beispiel #4
0
static node *
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
         int *flags)
{
    parser_state *ps;
    node *n;
    int started = 0;

    if ((ps = PyParser_New(g, start)) == NULL) {
        err_ret->error = E_NOMEM;
        PyTokenizer_Free(tok);
        return NULL;
    }
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    if (*flags & PyPARSE_BARRY_AS_BDFL)
        ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
#endif

    for (;;) {
        char *a, *b;
        int type;
        size_t len;
        char *str;
        int col_offset;

        type = PyTokenizer_Get(tok, &a, &b);
        if (type == ERRORTOKEN) {
            err_ret->error = tok->done;
            break;
        }
        if (type == ENDMARKER && started) {
            type = NEWLINE; /* Add an extra newline */
            started = 0;
            /* Add the right number of dedent tokens,
               except if a certain flag is given --
               codeop.py uses this. */
            if (tok->indent &&
                !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
            {
                tok->pendin = -tok->indent;
                tok->indent = 0;
            }
        }
        else
            started = 1;
        len = b - a; /* XXX this may compute NULL - NULL */
        str = (char *) PyObject_MALLOC(len + 1);
        if (str == NULL) {
            err_ret->error = E_NOMEM;
            break;
        }
        if (len > 0)
            strncpy(str, a, len);
        str[len] = '\0';

#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
        if (type == NOTEQUAL) {
            if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
                            strcmp(str, "!=")) {
                PyObject_FREE(str);
                err_ret->error = E_SYNTAX;
                break;
            }
            else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
                            strcmp(str, "<>")) {
                PyObject_FREE(str);
                err_ret->text = "with Barry as BDFL, use '<>' "
                                "instead of '!='";
                err_ret->error = E_SYNTAX;
                break;
            }
        }
#endif
        if (a >= tok->line_start)
            col_offset = a - tok->line_start;
        else
            col_offset = -1;

        if ((err_ret->error =
             PyParser_AddToken(ps, (int)type, str,
                               tok->lineno, col_offset,
                               &(err_ret->expected))) != E_OK) {
            if (err_ret->error != E_DONE) {
                PyObject_FREE(str);
                err_ret->token = type;
            }
            break;
        }
    }

    if (err_ret->error == E_DONE) {
        n = ps->p_tree;
        ps->p_tree = NULL;

#ifndef PGEN
        /* Check that the source for a single input statement really
           is a single statement by looking at what is left in the
           buffer after parsing.  Trailing whitespace and comments
           are OK.  */
        if (start == single_input) {
            char *cur = tok->cur;
            char c = *tok->cur;

            for (;;) {
                while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
                    c = *++cur;

                if (!c)
                    break;

                if (c != '#') {
                    err_ret->error = E_BADSINGLE;
                    PyNode_Free(n);
                    n = NULL;
                    break;
                }

                /* Suck up comment. */
                while (c && c != '\n')
                    c = *++cur;
            }
        }
#endif
    }
    else
        n = NULL;

#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
    *flags = ps->p_flags;
#endif
    PyParser_Delete(ps);

    if (n == NULL) {
        if (tok->done == E_EOF)
            err_ret->error = E_EOF;
        err_ret->lineno = tok->lineno;
        if (tok->buf != NULL) {
            size_t len;
            assert(tok->cur - tok->buf < INT_MAX);
            err_ret->offset = (int)(tok->cur - tok->buf);
            len = tok->inp - tok->buf;
            err_ret->text = (char *) PyObject_MALLOC(len + 1);
            if (err_ret->text != NULL) {
                if (len > 0)
                    strncpy(err_ret->text, tok->buf, len);
                err_ret->text[len] = '\0';
            }
        }
    } else if (tok->encoding != NULL) {
        /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
         * allocated using PyMem_
         */
        node* r = PyNode_New(encoding_decl);
        if (r)
            r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
        if (!r || !r->n_str) {
            err_ret->error = E_NOMEM;
            if (r)
                PyObject_FREE(r);
            n = NULL;
            goto done;
        }
        strcpy(r->n_str, tok->encoding);
        PyMem_FREE(tok->encoding);
        tok->encoding = NULL;
        r->n_nchildren = 1;
        r->n_child = n;
        n = r;
    }

done:
    PyTokenizer_Free(tok);

    return n;
}
Beispiel #5
0
static node *
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
	 int flags)
{
	parser_state *ps;
	node *n;
	int started = 0;

	if ((ps = PyParser_New(g, start)) == NULL) {
		fprintf(stderr, "no mem for new parser\n");
		err_ret->error = E_NOMEM;
		return NULL;
	}
#if 0 /* future keyword */
	if (flags & PyPARSE_YIELD_IS_KEYWORD)
		ps->p_generators = 1;
#endif

	for (;;) {
		char *a, *b;
		int type;
		size_t len;
		char *str;

		type = PyTokenizer_Get(tok, &a, &b);
		if (type == ERRORTOKEN) {
			err_ret->error = tok->done;
			break;
		}
		if (type == ENDMARKER && started) {
			type = NEWLINE; /* Add an extra newline */
			started = 0;
			/* Add the right number of dedent tokens,
			   except if a certain flag is given --
			   codeop.py uses this. */
			if (tok->indent &&
			    !(flags & PyPARSE_DONT_IMPLY_DEDENT))
			{
				tok->pendin = -tok->indent;
				tok->indent = 0;
			}
		}
		else
			started = 1;
		len = b - a; /* XXX this may compute NULL - NULL */
		str = (char *) PyObject_MALLOC(len + 1);
		if (str == NULL) {
			fprintf(stderr, "no mem for next token\n");
			err_ret->error = E_NOMEM;
			break;
		}
		if (len > 0)
			strncpy(str, a, len);
		str[len] = '\0';

#if 0 /* future keyword */
		/* Warn about yield as NAME */
		if (type == NAME && !ps->p_generators &&
		    len == 5 && str[0] == 'y' && strcmp(str, "yield") == 0)
			PySys_WriteStderr(yield_msg,
					  err_ret->filename==NULL ?
					  "<string>" : err_ret->filename,
					  tok->lineno);
#endif

		if ((err_ret->error =
		     PyParser_AddToken(ps, (int)type, str, tok->lineno,
				       &(err_ret->expected))) != E_OK) {
			if (err_ret->error != E_DONE)
				PyObject_FREE(str);
			break;
		}
	}

	if (err_ret->error == E_DONE) {
		n = ps->p_tree;
		ps->p_tree = NULL;
	}
	else
		n = NULL;

	PyParser_Delete(ps);

	if (n == NULL) {
		if (tok->lineno <= 1 && tok->done == E_EOF)
			err_ret->error = E_EOF;
		err_ret->lineno = tok->lineno;
		err_ret->offset = tok->cur - tok->buf;
		if (tok->buf != NULL) {
			size_t len = tok->inp - tok->buf;
			err_ret->text = (char *) PyObject_MALLOC(len + 1);
			if (err_ret->text != NULL) {
				if (len > 0)
					strncpy(err_ret->text, tok->buf, len);
				err_ret->text[len] = '\0';
			}
		}
	} else if (tok->encoding != NULL) {
		node* r = PyNode_New(encoding_decl);
		r->n_str = tok->encoding;
		r->n_nchildren = 1;
		r->n_child = n;
		tok->encoding = NULL;
		n = r;
	}

	PyTokenizer_Free(tok);

	return n;
}
Beispiel #6
0
static node *
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
	 int flags)
{
	parser_state *ps;
	node *n;
	int started = 0, handling_import = 0, handling_with = 0;

	if ((ps = PyParser_New(g, start)) == NULL) {
		fprintf(stderr, "no mem for new parser\n");
		err_ret->error = E_NOMEM;
		PyTokenizer_Free(tok);
		return NULL;
	}
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
	if (flags & PyPARSE_WITH_IS_KEYWORD)
		ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
#endif

	for (;;) {
		char *a, *b;
		int type;
		size_t len;
		char *str;
		int col_offset;

		type = PyTokenizer_Get(tok, &a, &b);
		if (type == ERRORTOKEN) {
			err_ret->error = tok->done;
			break;
		}
		if (type == ENDMARKER && started) {
			type = NEWLINE; /* Add an extra newline */
			handling_with = handling_import = 0;
			started = 0;
			/* Add the right number of dedent tokens,
			   except if a certain flag is given --
			   codeop.py uses this. */
			if (tok->indent &&
			    !(flags & PyPARSE_DONT_IMPLY_DEDENT))
			{
				tok->pendin = -tok->indent;
				tok->indent = 0;
			}
		}
		else
			started = 1;
		len = b - a; /* XXX this may compute NULL - NULL */
		str = (char *) PyObject_MALLOC(len + 1);
		if (str == NULL) {
			fprintf(stderr, "no mem for next token\n");
			err_ret->error = E_NOMEM;
			break;
		}
		if (len > 0)
			strncpy(str, a, len);
		str[len] = '\0';

#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
		/* This is only necessary to support the "as" warning, but
		   we don't want to warn about "as" in import statements. */
		if (type == NAME &&
		    len == 6 && str[0] == 'i' && strcmp(str, "import") == 0)
			handling_import = 1;

		/* Warn about with as NAME */
		if (type == NAME &&
		    !(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
		    if (len == 4 && str[0] == 'w' && strcmp(str, "with") == 0)
			warn(with_msg, err_ret->filename, tok->lineno);
		    else if (!(handling_import || handling_with) &&
		             len == 2 && str[0] == 'a' &&
			     strcmp(str, "as") == 0)
			warn(as_msg, err_ret->filename, tok->lineno);
		}
		else if (type == NAME &&
			 (ps->p_flags & CO_FUTURE_WITH_STATEMENT) &&
			 len == 4 && str[0] == 'w' && strcmp(str, "with") == 0)
			handling_with = 1;
#endif
		if (a >= tok->line_start)
			col_offset = a - tok->line_start;
		else
			col_offset = -1;
			
		if ((err_ret->error =
		     PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset,
				       &(err_ret->expected))) != E_OK) {
			if (err_ret->error != E_DONE) {
				PyObject_FREE(str);
				err_ret->token = type;
			}				
			break;
		}
	}

	if (err_ret->error == E_DONE) {
		n = ps->p_tree;
		ps->p_tree = NULL;
	}
	else
		n = NULL;

	PyParser_Delete(ps);

	if (n == NULL) {
		if (tok->lineno <= 1 && tok->done == E_EOF)
			err_ret->error = E_EOF;
		err_ret->lineno = tok->lineno;
		if (tok->buf != NULL) {
			size_t len;
			assert(tok->cur - tok->buf < INT_MAX);
			err_ret->offset = (int)(tok->cur - tok->buf);
			len = tok->inp - tok->buf;
			err_ret->text = (char *) PyObject_MALLOC(len + 1);
			if (err_ret->text != NULL) {
				if (len > 0)
					strncpy(err_ret->text, tok->buf, len);
				err_ret->text[len] = '\0';
			}
		}
	} else if (tok->encoding != NULL) {
		node* r = PyNode_New(encoding_decl);
		if (!r) {
			err_ret->error = E_NOMEM;
			n = NULL;
			goto done;
		}
		r->n_str = tok->encoding;
		r->n_nchildren = 1;
		r->n_child = n;
		tok->encoding = NULL;
		n = r;
	}

done:
	PyTokenizer_Free(tok);

	return n;
}