static void
translabel(grammar *g, label *lb)
{
	int i;
	
	if (Py_DebugFlag)
		printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb));
	
	if (lb->lb_type == NAME) {
		for (i = 0; i < g->g_ndfas; i++) {
			if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {
				if (Py_DebugFlag)
					printf(
					    "Label %s is non-terminal %d.\n",
					    lb->lb_str,
					    g->g_dfa[i].d_type);
				lb->lb_type = g->g_dfa[i].d_type;
				free(lb->lb_str);
				lb->lb_str = NULL;
				return;
			}
		}
		for (i = 0; i < (int)N_TOKENS; i++) {
			if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) {
				if (Py_DebugFlag)
					printf("Label %s is terminal %d.\n",
						lb->lb_str, i);
				lb->lb_type = i;
				free(lb->lb_str);
				lb->lb_str = NULL;
				return;
			}
		}
		printf("Can't translate NAME label '%s'\n", lb->lb_str);
		return;
	}
	
	if (lb->lb_type == STRING) {
		if (isalpha(Py_CHARMASK(lb->lb_str[1])) ||
		    lb->lb_str[1] == '_') {
			char *p;
			char *src;
			char *dest;
			size_t name_len;
			if (Py_DebugFlag)
				printf("Label %s is a keyword\n", lb->lb_str);
			lb->lb_type = NAME;
			src = lb->lb_str + 1;
			p = strchr(src, '\'');
			if (p)
				name_len = p - src;
			else
				name_len = strlen(src);
			dest = (char *)malloc(name_len + 1);
			if (!dest) {
				printf("Can't alloc dest '%s'\n", src);
				return;
			}
			strncpy(dest, src, name_len);
			dest[name_len] = '\0';
			free(lb->lb_str);
			lb->lb_str = dest;
		}
		else if (lb->lb_str[2] == lb->lb_str[0]) {
			int type = (int) PyToken_OneChar(lb->lb_str[1]);
			if (type != OP) {
				lb->lb_type = type;
				free(lb->lb_str);
				lb->lb_str = NULL;
			}
			else
				printf("Unknown OP label %s\n",
					lb->lb_str);
		}
		else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) {
			int type = (int) PyToken_TwoChars(lb->lb_str[1],
						   lb->lb_str[2]);
			if (type != OP) {
				lb->lb_type = type;
				free(lb->lb_str);
				lb->lb_str = NULL;
			}
			else
				printf("Unknown OP label %s\n",
					lb->lb_str);
		}
		else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) {
			int type = (int) PyToken_ThreeChars(lb->lb_str[1],
							    lb->lb_str[2],
							    lb->lb_str[3]);
			if (type != OP) {
				lb->lb_type = type;
				free(lb->lb_str);
				lb->lb_str = NULL;
			}
			else
				printf("Unknown OP label %s\n",
					lb->lb_str);
		}
		else
			printf("Can't translate STRING label %s\n",
				lb->lb_str);
	}
	else
		printf("Can't translate label '%s'\n",
		       PyGrammar_LabelRepr(lb));
}
Example #2
0
static void
translabel(grammar *g, label *lb)
{
    int i;

    if (lb->lb_type == NAME) {
        for (i = 0; i < g->g_ndfas; i++) {
            if (lb->lb_str_length == g->g_dfa[i].d_name_length &&
                memcmp(lb->lb_str, g->g_dfa[i].d_name, lb->lb_str_length) == 0) {
#ifndef NDEBUG
                printf("Label %.*s is non-terminal %d.\n",
                       (int)lb->lb_str_length, lb->lb_str,
                       g->g_dfa[i].d_type);
#endif
                lb->lb_type = g->g_dfa[i].d_type;
                lb->lb_str = NULL;
                lb->lb_str_length = 0;
                return;
            }
        }
        for (i = 0; i < (int)N_TOKENS; i++) {
            if (lb->lb_str_length == strlen((const char*)_PyParser_TokenNames[i]) &&
                memcmp(lb->lb_str, _PyParser_TokenNames[i], lb->lb_str_length) == 0) {
#ifndef NDEBUG
                printf("Label %.*s is terminal %d.\n",
                       (int)lb->lb_str_length, lb->lb_str,
                       i);
#endif
                lb->lb_type = i;
                lb->lb_str = NULL;
                lb->lb_str_length = 0;
                return;
            }
        }
        printf("Can't translate NAME label '%.*s'\n",
               (int)lb->lb_str_length, lb->lb_str);
        return;
    }

    if (lb->lb_type == STRING) {
        if (isalpha(Py_CHARMASK(lb->lb_str[1])) || lb->lb_str[1] == '_') {
            // Maintain the constraint of keywords to the ASCII character set.

          const unsigned char *p;
#ifndef NDEBUG
          printf("Label %.*s is a keyword\n",
                 (int)lb->lb_str_length, lb->lb_str);
#endif
            lb->lb_type = NAME;
            lb->lb_str += 1;
            p = CUC(strchr((const char *)lb->lb_str, '\''));
            if (p != NULL)
                lb->lb_str_length = p - lb->lb_str;
        }
        else {
            int type;
            unsigned int *utf;

            // Limit strings to `maximum_string_length` (+2 quotes).
            unsigned int utf_data[maximum_string_length+2];

            // Decode the label and bracket it [`utf`, `utf_end`).
            unsigned int *utf_end = &utf_data[0];
            const unsigned char *i=lb->lb_str;
            while (i - lb->lb_str < lb->lb_str_length) {
                i = decode(i, utf_end++);

                assert(utf_end - utf_data < maximum_string_length);
            }

            assert(utf_data[0] == (unsigned int)(unsigned char)'\'');

            utf_end -= 1;
            assert(*utf_end == (unsigned int)(unsigned char)'\'');

            utf = &utf_data[1];
            switch (utf_end - utf) {
              /*
               * Operators from the grammar are wrapped by single quotes, so I'm
               * looking for the operator's length plus two.
               */
            case 1:
                type = (int) PyToken_OneChar(utf[0]);
                if (type != OP) {
                    lb->lb_type = type;
                    lb->lb_str = NULL;
                    lb->lb_str_length = 0;
                }
                else printf("Unknown OP label %.*s\n",
                            (int)lb->lb_str_length, (const char *)lb->lb_str);
                break;
            case 2:
                type = (int) PyToken_TwoChars(utf[0], utf[1]);
                if (type != OP) {
                    lb->lb_type = type;
                    lb->lb_str = NULL;
                    lb->lb_str_length = 0;
                }
                else printf("Unknown OP label %.*s\n",
                            (int)lb->lb_str_length, (const char *)lb->lb_str);
                break;
            case 3:
                type = (int) PyToken_ThreeChars(utf[0], utf[1], utf[2]);
                if (type != OP) {
                    lb->lb_type = type;
                    lb->lb_str = NULL;
                    lb->lb_str_length = 0;
                }
                else printf("Unknown OP label %.*s\n",
                            (int)lb->lb_str_length, (const char *)lb->lb_str);
                break;
            default: printf("Can't translate STRING label %.*s\n",
                            (int)lb->lb_str_length, (const char*)lb->lb_str);
            }
        }
    }
    else
        printf("Can't translate label of type '%i'\n", lb->lb_type);
}
Example #3
0
static int
tok_get(register struct tok_state *tok, char **p_start, char **p_end)
{
	register int c;
	int blankline;

	*p_start = *p_end = NULL;
  nextline:
	tok->start = NULL;
	blankline = 0;

	/* Get indentation level */
	if (tok->atbol) {
		register int col = 0;
		register int altcol = 0;
		tok->atbol = 0;
		for (;;) {
			c = tok_nextc(tok);
			if (c == ' ')
				col++, altcol++;
			else if (c == '\t') {
				col = (col/tok->tabsize + 1) * tok->tabsize;
				altcol = (altcol/tok->alttabsize + 1)
					* tok->alttabsize;
			}
			else if (c == '\014') /* Control-L (formfeed) */
				col = altcol = 0; /* For Emacs users */
			else
				break;
		}
		tok_backup(tok, c);
		if (c == '#' || c == '\n') {
			/* Lines with only whitespace and/or comments
			   shouldn't affect the indentation and are
			   not passed to the parser as NEWLINE tokens,
			   except *totally* empty lines in interactive
			   mode, which signal the end of a command group. */
			if (col == 0 && c == '\n' && tok->prompt != NULL)
				blankline = 0; /* Let it through */
			else
				blankline = 1; /* Ignore completely */
			/* We can't jump back right here since we still
			   may need to skip to the end of a comment */
		}
		if (!blankline && tok->level == 0) {
			if (col == tok->indstack[tok->indent]) {
				/* No change */
				if (altcol != tok->altindstack[tok->indent]) {
					if (indenterror(tok))
						return ERRORTOKEN;
				}
			}
			else if (col > tok->indstack[tok->indent]) {
				/* Indent -- always one */
				if (tok->indent+1 >= MAXINDENT) {
					tok->done = E_TOODEEP;
					tok->cur = tok->inp;
					return ERRORTOKEN;
				}
				if (altcol <= tok->altindstack[tok->indent]) {
					if (indenterror(tok))
						return ERRORTOKEN;
				}
				tok->pendin++;
				tok->indstack[++tok->indent] = col;
				tok->altindstack[tok->indent] = altcol;
			}
			else /* col < tok->indstack[tok->indent] */ {
				/* Dedent -- any number, must be consistent */
				while (tok->indent > 0 &&
					col < tok->indstack[tok->indent]) {
					tok->pendin--;
					tok->indent--;
				}
				if (col != tok->indstack[tok->indent]) {
					tok->done = E_DEDENT;
					tok->cur = tok->inp;
					return ERRORTOKEN;
				}
				if (altcol != tok->altindstack[tok->indent]) {
					if (indenterror(tok))
						return ERRORTOKEN;
				}
			}
		}
	}

	tok->start = tok->cur;

	/* Return pending indents/dedents */
	if (tok->pendin != 0) {
		if (tok->pendin < 0) {
			tok->pendin++;
			return DEDENT;
		}
		else {
			tok->pendin--;
			return INDENT;
		}
	}

 again:
	tok->start = NULL;
	/* Skip spaces */
	do {
		c = tok_nextc(tok);
	} while (c == ' ' || c == '\t' || c == '\014');

	/* Set start of current token */
	tok->start = tok->cur - 1;

	/* Skip comment, while looking for tab-setting magic */
	if (c == '#') {
		static char *tabforms[] = {
			"tab-width:",		/* Emacs */
			":tabstop=",		/* vim, full form */
			":ts=",			/* vim, abbreviated form */
			"set tabsize=",		/* will vi never die? */
		/* more templates can be added here to support other editors */
		};
		char cbuf[80];
		char *tp, **cp;
		tp = cbuf;
		do {
			*tp++ = c = tok_nextc(tok);
		} while (c != EOF && c != '\n' &&
			 (size_t)(tp - cbuf + 1) < sizeof(cbuf));
		*tp = '\0';
		for (cp = tabforms;
		     cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
		     cp++) {
			if ((tp = strstr(cbuf, *cp))) {
				int newsize = atoi(tp + strlen(*cp));

				if (newsize >= 1 && newsize <= 40) {
					tok->tabsize = newsize;
					if (Py_VerboseFlag)
					    PySys_WriteStderr(
						"Tab size set to %d\n",
						newsize);
				}
			}
		}
		while (c != EOF && c != '\n')
			c = tok_nextc(tok);
	}

	/* Check for EOF and errors now */
	if (c == EOF) {
		return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
	}

	/* Identifier (most frequent token!) */
	if (isalpha(c) || c == '_') {
		/* Process r"", u"" and ur"" */
		switch (c) {
		case 'b':
		case 'B':
			c = tok_nextc(tok);
			if (c == 'r' || c == 'R')
				c = tok_nextc(tok);
			if (c == '"' || c == '\'')
				goto letter_quote;
			break;
		case 'r':
		case 'R':
			c = tok_nextc(tok);
			if (c == '"' || c == '\'')
				goto letter_quote;
			break;
		case 'u':
		case 'U':
			c = tok_nextc(tok);
			if (c == 'r' || c == 'R')
				c = tok_nextc(tok);
			if (c == '"' || c == '\'')
				goto letter_quote;
			break;
		}
		while (isalnum(c) || c == '_') {
			c = tok_nextc(tok);
		}
		tok_backup(tok, c);
		*p_start = tok->start;
		*p_end = tok->cur;
		return NAME;
	}

	/* Newline */
	if (c == '\n') {
		tok->atbol = 1;
		if (blankline || tok->level > 0)
			goto nextline;
		*p_start = tok->start;
		*p_end = tok->cur - 1; /* Leave '\n' out of the string */
                tok->cont_line = 0;
		return NEWLINE;
	}

	/* Period or number starting with period? */
	if (c == '.') {
		c = tok_nextc(tok);
		if (isdigit(c)) {
			goto fraction;
		}
		else {
			tok_backup(tok, c);
			*p_start = tok->start;
			*p_end = tok->cur;
			return DOT;
		}
	}

	/* Number */
	if (isdigit(c)) {
		if (c == '0') {
			/* Hex, octal or binary -- maybe. */
			c = tok_nextc(tok);
			if (c == '.')
				goto fraction;
#ifndef WITHOUT_COMPLEX
			if (c == 'j' || c == 'J')
				goto imaginary;
#endif
			if (c == 'x' || c == 'X') {

				/* Hex */
				c = tok_nextc(tok);
				if (!isxdigit(c)) {
					tok->done = E_TOKEN;
					tok_backup(tok, c);
					return ERRORTOKEN;
				}
				do {
					c = tok_nextc(tok);
				} while (isxdigit(c));
			}
                        else if (c == 'o' || c == 'O') {
				/* Octal */
				c = tok_nextc(tok);
				if (c < '0' || c >= '8') {
					tok->done = E_TOKEN;
					tok_backup(tok, c);
					return ERRORTOKEN;
				}
				do {
					c = tok_nextc(tok);
				} while ('0' <= c && c < '8');
			}
			else if (c == 'b' || c == 'B') {
				/* Binary */
				c = tok_nextc(tok);
				if (c != '0' && c != '1') {
					tok->done = E_TOKEN;
					tok_backup(tok, c);
					return ERRORTOKEN;
				}
				do {
					c = tok_nextc(tok);
				} while (c == '0' || c == '1');
			}
			else {
				int found_decimal = 0;
				/* Octal; c is first char of it */
				/* There's no 'isoctdigit' macro, sigh */
				while ('0' <= c && c < '8') {
					c = tok_nextc(tok);
				}
				if (isdigit(c)) {
					found_decimal = 1;
					do {
						c = tok_nextc(tok);
					} while (isdigit(c));
				}
				if (c == '.')
					goto fraction;
				else if (c == 'e' || c == 'E')
					goto exponent;
#ifndef WITHOUT_COMPLEX
				else if (c == 'j' || c == 'J')
					goto imaginary;
#endif
				else if (found_decimal) {
					tok->done = E_TOKEN;
					tok_backup(tok, c);
					return ERRORTOKEN;
				}
			}
			if (c == 'l' || c == 'L')
				c = tok_nextc(tok);
		}
		else {
			/* Decimal */
			do {
				c = tok_nextc(tok);
			} while (isdigit(c));
			if (c == 'l' || c == 'L')
				c = tok_nextc(tok);
			else {
				/* Accept floating point numbers. */
				if (c == '.') {
		fraction:
					/* Fraction */
					do {
						c = tok_nextc(tok);
					} while (isdigit(c));
				}
				if (c == 'e' || c == 'E') {
		exponent:
					/* Exponent part */
					c = tok_nextc(tok);
					if (c == '+' || c == '-')
						c = tok_nextc(tok);
					if (!isdigit(c)) {
						tok->done = E_TOKEN;
						tok_backup(tok, c);
						return ERRORTOKEN;
					}
					do {
						c = tok_nextc(tok);
					} while (isdigit(c));
				}
#ifndef WITHOUT_COMPLEX
				if (c == 'j' || c == 'J')
					/* Imaginary part */
		imaginary:
					c = tok_nextc(tok);
#endif
			}
		}
		tok_backup(tok, c);
		*p_start = tok->start;
		*p_end = tok->cur;
		return NUMBER;
	}

  letter_quote:
	/* String */
	if (c == '\'' || c == '"') {
		Py_ssize_t quote2 = tok->cur - tok->start + 1;
		int quote = c;
		int triple = 0;
		int tripcount = 0;
		for (;;) {
			c = tok_nextc(tok);
			if (c == '\n') {
				if (!triple) {
					tok->done = E_EOLS;
					tok_backup(tok, c);
					return ERRORTOKEN;
				}
				tripcount = 0;
                                tok->cont_line = 1; /* multiline string. */
			}
			else if (c == EOF) {
				if (triple)
					tok->done = E_EOFS;
				else
					tok->done = E_EOLS;
				tok->cur = tok->inp;
				return ERRORTOKEN;
			}
			else if (c == quote) {
				tripcount++;
				if (tok->cur - tok->start == quote2) {
					c = tok_nextc(tok);
					if (c == quote) {
						triple = 1;
						tripcount = 0;
						continue;
					}
					tok_backup(tok, c);
				}
				if (!triple || tripcount == 3)
					break;
			}
			else if (c == '\\') {
				tripcount = 0;
				c = tok_nextc(tok);
				if (c == EOF) {
					tok->done = E_EOLS;
					tok->cur = tok->inp;
					return ERRORTOKEN;
				}
			}
			else
				tripcount = 0;
		}
		*p_start = tok->start;
		*p_end = tok->cur;
		return STRING;
	}

	/* Line continuation */
	if (c == '\\') {
		c = tok_nextc(tok);
		if (c != '\n') {
			tok->done = E_LINECONT;
			tok->cur = tok->inp;
			return ERRORTOKEN;
		}
                tok->cont_line = 1;
		goto again; /* Read next line */
	}

	/* Check for two-character token */
	{
		int c2 = tok_nextc(tok);
		int token = PyToken_TwoChars(c, c2);
#ifndef PGEN
		if (Py_Py3kWarningFlag && token == NOTEQUAL && c == '<') {
			if (PyErr_WarnExplicit(PyExc_DeprecationWarning,
					       "<> not supported in 3.x; use !=",
					       tok->filename, tok->lineno,
					       NULL, NULL)) {
				return ERRORTOKEN;
			}
		}
#endif
		if (token != OP) {
			int c3 = tok_nextc(tok);
			int token3 = PyToken_ThreeChars(c, c2, c3);
			if (token3 != OP) {
				token = token3;
			} else {
				tok_backup(tok, c3);
			}
			*p_start = tok->start;
			*p_end = tok->cur;
			return token;
		}
		tok_backup(tok, c2);
	}

	/* Keep track of parentheses nesting level */
	switch (c) {
	case '(':
	case '[':
	case '{':
		tok->level++;
		break;
	case ')':
	case ']':
	case '}':
		tok->level--;
		break;
	}

	/* Punctuation character */
	*p_start = tok->start;
	*p_end = tok->cur;
	return PyToken_OneChar(c);
}
Example #4
0
static int
tok_get(struct tok_state *tok, char **p_start, char **p_end)
{
    int c;
    int blankline, nonascii;

    *p_start = *p_end = NULL;
  nextline:
    tok->start = NULL;
    blankline = 0;

    /* Get indentation level */
    if (tok->atbol) {
        int col = 0;
        int altcol = 0;
        tok->atbol = 0;
        for (;;) {
            c = tok_nextc(tok);
            if (c == ' ')
                col++, altcol++;
            else if (c == '\t') {
                col = (col/tok->tabsize + 1) * tok->tabsize;
                altcol = (altcol/tok->alttabsize + 1)
                    * tok->alttabsize;
            }
            else if (c == '\014') /* Control-L (formfeed) */
                col = altcol = 0; /* For Emacs users */
            else
                break;
        }
        tok_backup(tok, c);
        if (c == '#' || c == '\n') {
            /* Lines with only whitespace and/or comments
               shouldn't affect the indentation and are
               not passed to the parser as NEWLINE tokens,
               except *totally* empty lines in interactive
               mode, which signal the end of a command group. */
            if (col == 0 && c == '\n' && tok->prompt != NULL)
                blankline = 0; /* Let it through */
            else
                blankline = 1; /* Ignore completely */
            /* We can't jump back right here since we still
               may need to skip to the end of a comment */
        }
        if (!blankline && tok->level == 0) {
            if (col == tok->indstack[tok->indent]) {
                /* No change */
                if (altcol != tok->altindstack[tok->indent]) {
                    if (indenterror(tok))
                        return ERRORTOKEN;
                }
            }
            else if (col > tok->indstack[tok->indent]) {
                /* Indent -- always one */
                if (tok->indent+1 >= MAXINDENT) {
                    tok->done = E_TOODEEP;
                    tok->cur = tok->inp;
                    return ERRORTOKEN;
                }
                if (altcol <= tok->altindstack[tok->indent]) {
                    if (indenterror(tok))
                        return ERRORTOKEN;
                }
                tok->pendin++;
                tok->indstack[++tok->indent] = col;
                tok->altindstack[tok->indent] = altcol;
            }
            else /* col < tok->indstack[tok->indent] */ {
                /* Dedent -- any number, must be consistent */
                while (tok->indent > 0 &&
                    col < tok->indstack[tok->indent]) {
                    tok->pendin--;
                    tok->indent--;
                }
                if (col != tok->indstack[tok->indent]) {
                    tok->done = E_DEDENT;
                    tok->cur = tok->inp;
                    return ERRORTOKEN;
                }
                if (altcol != tok->altindstack[tok->indent]) {
                    if (indenterror(tok))
                        return ERRORTOKEN;
                }
            }
        }
    }

    tok->start = tok->cur;

    /* Return pending indents/dedents */
    if (tok->pendin != 0) {
        if (tok->pendin < 0) {
            tok->pendin++;
            return DEDENT;
        }
        else {
            tok->pendin--;
            return INDENT;
        }
    }

 again:
    tok->start = NULL;
    /* Skip spaces */
    do {
        c = tok_nextc(tok);
    } while (c == ' ' || c == '\t' || c == '\014');

    /* Set start of current token */
    tok->start = tok->cur - 1;

    /* Skip comment */
    if (c == '#')
        while (c != EOF && c != '\n')
            c = tok_nextc(tok);

    /* Check for EOF and errors now */
    if (c == EOF) {
        return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
    }

    /* Identifier (most frequent token!) */
    nonascii = 0;
    if (is_potential_identifier_start(c)) {
        /* Process b"", r"", u"", br"" and rb"" */
        int saw_b = 0, saw_r = 0, saw_u = 0;
        while (1) {
            if (!(saw_b || saw_u) && (c == 'b' || c == 'B'))
                saw_b = 1;
            /* Since this is a backwards compatibility support literal we don't
               want to support it in arbitrary order like byte literals. */
            else if (!(saw_b || saw_u || saw_r) && (c == 'u' || c == 'U'))
                saw_u = 1;
            /* ur"" and ru"" are not supported */
            else if (!(saw_r || saw_u) && (c == 'r' || c == 'R'))
                saw_r = 1;
            else
                break;
            c = tok_nextc(tok);
            if (c == '"' || c == '\'')
                goto letter_quote;
        }
        while (is_potential_identifier_char(c)) {
            if (c >= 128)
                nonascii = 1;
            c = tok_nextc(tok);
        }
        tok_backup(tok, c);
        if (nonascii && !verify_identifier(tok))
            return ERRORTOKEN;
        *p_start = tok->start;
        *p_end = tok->cur;
        return NAME;
    }

    /* Newline */
    if (c == '\n') {
        tok->atbol = 1;
        if (blankline || tok->level > 0)
            goto nextline;
        *p_start = tok->start;
        *p_end = tok->cur - 1; /* Leave '\n' out of the string */
        tok->cont_line = 0;
        return NEWLINE;
    }

    /* Period or number starting with period? */
    if (c == '.') {
        c = tok_nextc(tok);
        if (isdigit(c)) {
            goto fraction;
        } else if (c == '.') {
            c = tok_nextc(tok);
            if (c == '.') {
                *p_start = tok->start;
                *p_end = tok->cur;
                return ELLIPSIS;
            } else {
                tok_backup(tok, c);
            }
            tok_backup(tok, '.');
        } else {
            tok_backup(tok, c);
        }
        *p_start = tok->start;
        *p_end = tok->cur;
        return DOT;
    }

    /* Number */
    if (isdigit(c)) {
        if (c == '0') {
            /* Hex, octal or binary -- maybe. */
            c = tok_nextc(tok);
            if (c == '.')
                goto fraction;
            if (c == 'j' || c == 'J')
                goto imaginary;
            if (c == 'x' || c == 'X') {

                /* Hex */
                c = tok_nextc(tok);
                if (!isxdigit(c)) {
                    tok->done = E_TOKEN;
                    tok_backup(tok, c);
                    return ERRORTOKEN;
                }
                do {
                    c = tok_nextc(tok);
                } while (isxdigit(c));
            }
            else if (c == 'o' || c == 'O') {
                /* Octal */
                c = tok_nextc(tok);
                if (c < '0' || c >= '8') {
                    tok->done = E_TOKEN;
                    tok_backup(tok, c);
                    return ERRORTOKEN;
                }
                do {
                    c = tok_nextc(tok);
                } while ('0' <= c && c < '8');
            }
            else if (c == 'b' || c == 'B') {
                /* Binary */
                c = tok_nextc(tok);
                if (c != '0' && c != '1') {
                    tok->done = E_TOKEN;
                    tok_backup(tok, c);
                    return ERRORTOKEN;
                }
                do {
                    c = tok_nextc(tok);
                } while (c == '0' || c == '1');
            }
            else {
                int nonzero = 0;
                /* maybe old-style octal; c is first char of it */
                /* in any case, allow '0' as a literal */
                while (c == '0')
                    c = tok_nextc(tok);
                while (isdigit(c)) {
                    nonzero = 1;
                    c = tok_nextc(tok);
                }
                if (c == '.')
                    goto fraction;
                else if (c == 'e' || c == 'E')
                    goto exponent;
                else if (c == 'j' || c == 'J')
                    goto imaginary;
                else if (nonzero) {
                    tok->done = E_TOKEN;
                    tok_backup(tok, c);
                    return ERRORTOKEN;
                }
            }
        }
        else {
            /* Decimal */
            do {
                c = tok_nextc(tok);
            } while (isdigit(c));
            {
                /* Accept floating point numbers. */
                if (c == '.') {
        fraction:
                    /* Fraction */
                    do {
                        c = tok_nextc(tok);
                    } while (isdigit(c));
                }
                if (c == 'e' || c == 'E') {
                    int e;
                  exponent:
                    e = c;
                    /* Exponent part */
                    c = tok_nextc(tok);
                    if (c == '+' || c == '-') {
                        c = tok_nextc(tok);
                        if (!isdigit(c)) {
                            tok->done = E_TOKEN;
                            tok_backup(tok, c);
                            return ERRORTOKEN;
                        }
                    } else if (!isdigit(c)) {
                        tok_backup(tok, c);
                        tok_backup(tok, e);
                        *p_start = tok->start;
                        *p_end = tok->cur;
                        return NUMBER;
                    }
                    do {
                        c = tok_nextc(tok);
                    } while (isdigit(c));
                }
                if (c == 'j' || c == 'J')
                    /* Imaginary part */
        imaginary:
                    c = tok_nextc(tok);
            }
        }
        tok_backup(tok, c);
        *p_start = tok->start;
        *p_end = tok->cur;
        return NUMBER;
    }

  letter_quote:
    /* String */
    if (c == '\'' || c == '"') {
        int quote = c;
        int quote_size = 1;             /* 1 or 3 */
        int end_quote_size = 0;

        /* Find the quote size and start of string */
        c = tok_nextc(tok);
        if (c == quote) {
            c = tok_nextc(tok);
            if (c == quote)
                quote_size = 3;
            else
                end_quote_size = 1;     /* empty string found */
        }
        if (c != quote)
            tok_backup(tok, c);

        /* Get rest of string */
        while (end_quote_size != quote_size) {
            c = tok_nextc(tok);
            if (c == EOF) {
                if (quote_size == 3)
                    tok->done = E_EOFS;
                else
                    tok->done = E_EOLS;
                tok->cur = tok->inp;
                return ERRORTOKEN;
            }
            if (quote_size == 1 && c == '\n') {
                tok->done = E_EOLS;
                tok->cur = tok->inp;
                return ERRORTOKEN;
            }
            if (c == quote)
                end_quote_size += 1;
            else {
                end_quote_size = 0;
                if (c == '\\')
                c = tok_nextc(tok);  /* skip escaped char */
            }
        }

        *p_start = tok->start;
        *p_end = tok->cur;
        return STRING;
    }

    /* Line continuation */
    if (c == '\\') {
        c = tok_nextc(tok);
        if (c != '\n') {
            tok->done = E_LINECONT;
            tok->cur = tok->inp;
            return ERRORTOKEN;
        }
        tok->cont_line = 1;
        goto again; /* Read next line */
    }

    /* Check for two-character token */
    {
        int c2 = tok_nextc(tok);
        int token = PyToken_TwoChars(c, c2);
        if (token != OP) {
            int c3 = tok_nextc(tok);
            int token3 = PyToken_ThreeChars(c, c2, c3);
            if (token3 != OP) {
                token = token3;
            } else {
                tok_backup(tok, c3);
            }
            *p_start = tok->start;
            *p_end = tok->cur;
            return token;
        }
        tok_backup(tok, c2);
    }

    /* Keep track of parentheses nesting level */
    switch (c) {
    case '(':
    case '[':
    case '{':
        tok->level++;
        break;
    case ')':
    case ']':
    case '}':
        tok->level--;
        break;
    }

    /* Punctuation character */
    *p_start = tok->start;
    *p_end = tok->cur;
    return PyToken_OneChar(c);
}