PyObject* _Py_bytes_isalnum(const char *cptr, Py_ssize_t len) { register const unsigned char *p = (unsigned char *) cptr; register const unsigned char *e; /* Shortcut for single character strings */ if (len == 1 && Py_ISALNUM(*p)) Py_RETURN_TRUE; /* Special case for empty strings */ if (len == 0) Py_RETURN_FALSE; e = p + len; for (; p < e; p++) { if (!Py_ISALNUM(*p)) Py_RETURN_FALSE; } Py_RETURN_TRUE; }
/* all_name_chars(s): true iff s matches [a-zA-Z0-9_]* */ static int all_name_chars(PyObject *o) { const unsigned char *s, *e; if (!PyUnicode_IS_ASCII(o)) return 0; s = PyUnicode_1BYTE_DATA(o); e = s + PyUnicode_GET_LENGTH(o); for (; s != e; s++) { if (!Py_ISALNUM(*s) && *s != '_') return 0; } return 1; }
static int get_coding_spec(const char *s, char **spec, Py_ssize_t size, struct tok_state *tok) { Py_ssize_t i; *spec = NULL; /* Coding spec must be in a comment, and that comment must be * the only statement on the source code line. */ for (i = 0; i < size - 6; i++) { if (s[i] == '#') break; if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014') return 1; } for (; i < size - 6; i++) { /* XXX inefficient search */ const char* t = s + i; if (strncmp(t, "coding", 6) == 0) { const char* begin = NULL; t += 6; if (t[0] != ':' && t[0] != '=') continue; do { t++; } while (t[0] == '\x20' || t[0] == '\t'); begin = t; while (Py_ISALNUM(t[0]) || t[0] == '-' || t[0] == '_' || t[0] == '.') t++; if (begin < t) { char* r = new_string(begin, t - begin, tok); char* q; if (!r) return 0; q = get_normal_name(r); if (r != q) { PyMem_FREE(r); r = new_string(q, strlen(q), tok); if (!r) return 0; } *spec = r; } } } return 1; }
static int tok_get(register struct tok_state *tok, char **p_start, char **p_end) { register int c; int blankline; *p_start = *p_end = NULL; nextline: tok->start = NULL; blankline = 0; /* Get indentation level */ if (tok->atbol) { register int col = 0; register int altcol = 0; tok->atbol = 0; for (;;) { c = tok_nextc(tok); if (c == ' ') col++, altcol++; else if (c == '\t') { col = (col/tok->tabsize + 1) * tok->tabsize; altcol = (altcol/tok->alttabsize + 1) * tok->alttabsize; } else if (c == '\014') /* Control-L (formfeed) */ col = altcol = 0; /* For Emacs users */ else break; } tok_backup(tok, c); if (c == '#' || c == '\n') { /* Lines with only whitespace and/or comments shouldn't affect the indentation and are not passed to the parser as NEWLINE tokens, except *totally* empty lines in interactive mode, which signal the end of a command group. */ if (col == 0 && c == '\n' && tok->prompt != NULL) blankline = 0; /* Let it through */ else blankline = 1; /* Ignore completely */ /* We can't jump back right here since we still may need to skip to the end of a comment */ } if (!blankline && tok->level == 0) { if (col == tok->indstack[tok->indent]) { /* No change */ if (altcol != tok->altindstack[tok->indent]) { if (indenterror(tok)) return ERRORTOKEN; } } else if (col > tok->indstack[tok->indent]) { /* Indent -- always one */ if (tok->indent+1 >= MAXINDENT) { tok->done = E_TOODEEP; tok->cur = tok->inp; return ERRORTOKEN; } if (altcol <= tok->altindstack[tok->indent]) { if (indenterror(tok)) return ERRORTOKEN; } tok->pendin++; tok->indstack[++tok->indent] = col; tok->altindstack[tok->indent] = altcol; } else /* col < tok->indstack[tok->indent] */ { /* Dedent -- any number, must be consistent */ while (tok->indent > 0 && col < tok->indstack[tok->indent]) { tok->pendin--; tok->indent--; } if (col != tok->indstack[tok->indent]) { tok->done = E_DEDENT; tok->cur = tok->inp; return ERRORTOKEN; } if (altcol != tok->altindstack[tok->indent]) { if (indenterror(tok)) return ERRORTOKEN; } } } } tok->start = tok->cur; /* Return pending indents/dedents */ if (tok->pendin != 0) { if (tok->pendin < 0) { tok->pendin++; return DEDENT; } else { tok->pendin--; return INDENT; } } again: tok->start = NULL; /* Skip spaces */ do { c = tok_nextc(tok); } while (c == ' ' || c == '\t' || c == '\014'); /* Set start of current token */ tok->start = tok->cur - 1; /* Skip comment, while looking for tab-setting magic */ if (c == '#') { static char *tabforms[] = { "tab-width:", /* Emacs */ ":tabstop=", /* vim, full form */ ":ts=", /* vim, abbreviated form */ "set tabsize=", /* will vi never die? */ /* more templates can be added here to support other editors */ }; char cbuf[80]; char *tp, **cp; tp = cbuf; do { *tp++ = c = tok_nextc(tok); } while (c != EOF && c != '\n' && (size_t)(tp - cbuf + 1) < sizeof(cbuf)); *tp = '\0'; for (cp = tabforms; cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]); cp++) { if ((tp = strstr(cbuf, *cp))) { int newsize = atoi(tp + strlen(*cp)); if (newsize >= 1 && newsize <= 40) { tok->tabsize = newsize; if (Py_VerboseFlag) PySys_WriteStderr( "Tab size set to %d\n", newsize); } } } while (c != EOF && c != '\n') c = tok_nextc(tok); } /* Check for EOF and errors now */ if (c == EOF) { return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN; } /* Identifier (most frequent token!) */ if (Py_ISALPHA(c) || c == '_') { /* Process r"", u"" and ur"" */ switch (c) { case 'b': case 'B': c = tok_nextc(tok); if (c == 'r' || c == 'R') c = tok_nextc(tok); if (c == '"' || c == '\'') goto letter_quote; break; case 'r': case 'R': c = tok_nextc(tok); if (c == '"' || c == '\'') goto letter_quote; break; case 'u': case 'U': c = tok_nextc(tok); if (c == 'r' || c == 'R') c = tok_nextc(tok); if (c == '"' || c == '\'') goto letter_quote; break; } while (c != EOF && (Py_ISALNUM(c) || c == '_')) { c = tok_nextc(tok); } tok_backup(tok, c); *p_start = tok->start; *p_end = tok->cur; return NAME; } /* Newline */ if (c == '\n') { tok->atbol = 1; if (blankline || tok->level > 0) goto nextline; *p_start = tok->start; *p_end = tok->cur - 1; /* Leave '\n' out of the string */ tok->cont_line = 0; return NEWLINE; } /* Period or number starting with period? */ if (c == '.') { c = tok_nextc(tok); if (isdigit(c)) { goto fraction; } else { tok_backup(tok, c); *p_start = tok->start; *p_end = tok->cur; return DOT; } } /* Number */ if (isdigit(c)) { if (c == '0') { /* Hex, octal or binary -- maybe. */ c = tok_nextc(tok); if (c == '.') goto fraction; #ifndef WITHOUT_COMPLEX if (c == 'j' || c == 'J') goto imaginary; #endif if (c == 'x' || c == 'X') { /* Hex */ c = tok_nextc(tok); if (!isxdigit(c)) { tok->done = E_TOKEN; tok_backup(tok, c); return ERRORTOKEN; } do { c = tok_nextc(tok); } while (isxdigit(c)); } else if (c == 'o' || c == 'O') { /* Octal */ c = tok_nextc(tok); if (c < '0' || c >= '8') { tok->done = E_TOKEN; tok_backup(tok, c); return ERRORTOKEN; } do { c = tok_nextc(tok); } while ('0' <= c && c < '8'); } else if (c == 'b' || c == 'B') { /* Binary */ c = tok_nextc(tok); if (c != '0' && c != '1') { tok->done = E_TOKEN; tok_backup(tok, c); return ERRORTOKEN; } do { c = tok_nextc(tok); } while (c == '0' || c == '1'); } else { int found_decimal = 0; /* Octal; c is first char of it */ /* There's no 'isoctdigit' macro, sigh */ while ('0' <= c && c < '8') { c = tok_nextc(tok); } if (isdigit(c)) { found_decimal = 1; do { c = tok_nextc(tok); } while (isdigit(c)); } if (c == '.') goto fraction; else if (c == 'e' || c == 'E') goto exponent; #ifndef WITHOUT_COMPLEX else if (c == 'j' || c == 'J') goto imaginary; #endif else if (found_decimal) { tok->done = E_TOKEN; tok_backup(tok, c); return ERRORTOKEN; } } if (c == 'l' || c == 'L') c = tok_nextc(tok); } else { /* Decimal */ do { c = tok_nextc(tok); } while (isdigit(c)); if (c == 'l' || c == 'L') c = tok_nextc(tok); else { /* Accept floating point numbers. */ if (c == '.') { fraction: /* Fraction */ do { c = tok_nextc(tok); } while (isdigit(c)); } if (c == 'e' || c == 'E') { int e; exponent: e = c; /* Exponent part */ c = tok_nextc(tok); if (c == '+' || c == '-') { c = tok_nextc(tok); if (!isdigit(c)) { tok->done = E_TOKEN; tok_backup(tok, c); return ERRORTOKEN; } } else if (!isdigit(c)) { tok_backup(tok, c); tok_backup(tok, e); *p_start = tok->start; *p_end = tok->cur; return NUMBER; } do { c = tok_nextc(tok); } while (isdigit(c)); } #ifndef WITHOUT_COMPLEX if (c == 'j' || c == 'J') /* Imaginary part */ imaginary: c = tok_nextc(tok); #endif } } tok_backup(tok, c); *p_start = tok->start; *p_end = tok->cur; return NUMBER; } letter_quote: /* String */ if (c == '\'' || c == '"') { Py_ssize_t quote2 = tok->cur - tok->start + 1; int quote = c; int triple = 0; int tripcount = 0; for (;;) { c = tok_nextc(tok); if (c == '\n') { if (!triple) { tok->done = E_EOLS; tok_backup(tok, c); return ERRORTOKEN; } tripcount = 0; tok->cont_line = 1; /* multiline string. */ } else if (c == EOF) { if (triple) tok->done = E_EOFS; else tok->done = E_EOLS; tok->cur = tok->inp; return ERRORTOKEN; } else if (c == quote) { tripcount++; if (tok->cur - tok->start == quote2) { c = tok_nextc(tok); if (c == quote) { triple = 1; tripcount = 0; continue; } tok_backup(tok, c); } if (!triple || tripcount == 3) break; } else if (c == '\\') { tripcount = 0; c = tok_nextc(tok); if (c == EOF) { tok->done = E_EOLS; tok->cur = tok->inp; return ERRORTOKEN; } } else tripcount = 0; } *p_start = tok->start; *p_end = tok->cur; return STRING; } /* Line continuation */ if (c == '\\') { c = tok_nextc(tok); if (c != '\n') { tok->done = E_LINECONT; tok->cur = tok->inp; return ERRORTOKEN; } tok->cont_line = 1; goto again; /* Read next line */ } /* Check for two-character token */ { int c2 = tok_nextc(tok); int token = PyToken_TwoChars(c, c2); #ifndef PGEN if (Py_Py3kWarningFlag && token == NOTEQUAL && c == '<') { if (PyErr_WarnExplicit(PyExc_DeprecationWarning, "<> not supported in 3.x; use !=", tok->filename, tok->lineno, NULL, NULL)) { return ERRORTOKEN; } } #endif if (token != OP) { int c3 = tok_nextc(tok); int token3 = PyToken_ThreeChars(c, c2, c3); if (token3 != OP) { token = token3; } else { tok_backup(tok, c3); } *p_start = tok->start; *p_end = tok->cur; return token; } tok_backup(tok, c2); } /* Keep track of parentheses nesting level */ switch (c) { case '(': case '[': case '{': tok->level++; break; case ')': case ']': case '}': tok->level--; break; } /* Punctuation character */ *p_start = tok->start; *p_end = tok->cur; return PyToken_OneChar(c); }