static vString *parseNumeric (int c) { static vString *string = NULL; if (string == NULL) string = vStringNew (); vStringCopy (string, parseInteger (c)); c = fileGetc (); if (c == '.') { vStringPut (string, c); vStringCat (string, parseInteger ('\0')); c = fileGetc (); } if (tolower (c) == 'e') { vStringPut (string, c); vStringCat (string, parseInteger ('\0')); } else if (!isspace (c)) fileUngetc (c); vStringTerminate (string); return string; }
static int get_next_char() { int c, nxt; c = fileGetc(); if (c == EOF) return c; nxt = fileGetc(); if (nxt == EOF) return c; fileUngetc(nxt); if (c == '-' && nxt == '-') { skip_rest_of_line(); return get_next_char(); } if (c == '{' && nxt == '-') { int last = '\0'; do { last = c; c = get_next_char(); } while (! (c == EOF || (last == '-' && c == '}'))); return get_next_char(); } return c; }
static void parseRegExp (void) { int c; boolean in_range = FALSE; do { c = fileGetc (); if (! in_range && c == '/') { do /* skip flags */ { c = fileGetc (); } while (isalpha (c)); fileUngetc (c); break; } else if (c == '\\') c = fileGetc (); /* skip next character */ else if (c == '[') in_range = TRUE; else if (c == ']') in_range = FALSE; } while (c != EOF); }
/* If a numeric is passed in 'c', this is used as the first digit of the * numeric being parsed. */ static vString *parseInteger (int c) { static vString *string = NULL; if (string == NULL) string = vStringNew (); vStringClear (string); if (c == '\0') c = fileGetc (); if (c == '-') { vStringPut (string, c); c = fileGetc (); } else if (! isdigit (c)) c = fileGetc (); while (c != EOF && (isdigit (c) || c == '_')) { vStringPut (string, c); c = fileGetc (); } vStringTerminate (string); fileUngetc (c); return string; }
static void skipLine (void) { int c; do c = nextChar (); while (c != EOF && c != '\n'); if (c == '\n') fileUngetc (c); }
static void parseIdentifier (vString *const string, const int firstChar) { int c = firstChar; do { vStringPut (string, c); c = fileGetc (); } while (isIdentChar (c)); vStringTerminate (string); fileUngetc (c); /* always unget, LF might add a semicolon */ }
static void parseSelector (vString *const string, const int firstChar) { int c = firstChar; do { vStringPut (string, (char) c); c = fileGetc (); } while (isSelectorChar (c)); fileUngetc (c); vStringTerminate (string); }
static void parseString (vString *const string, const int delimiter) { boolean end = FALSE; while (! end) { int c = fileGetc (); if (c == EOF) end = TRUE; else if (c == '\\') { /* Eat the escape sequence (\", \', etc). We properly handle * <LineContinuation> by eating a whole \<CR><LF> not to see <LF> * as an unescaped character, which is invalid and handled below. * Also, handle the fact that <LineContinuation> produces an empty * sequence. * See ECMA-262 7.8.4 */ c = fileGetc(); if (c != '\r' && c != '\n') vStringPut(string, c); else if (c == '\r') { c = fileGetc(); if (c != '\n') fileUngetc (c); } } else if (c == delimiter) end = TRUE; else if (c == '\r' || c == '\n') { /* those are invalid when not escaped */ end = TRUE; /* we don't want to eat the newline itself to let the automatic * semicolon insertion code kick in */ fileUngetc (c); } else vStringPut (string, c); } vStringTerminate (string); }
/* Read a C identifier beginning with "firstChar" and places it into * "name". */ static void parseIdentifier (vString *const string, const int firstChar) { int c = firstChar; Assert (isIdentChar (c)); do { vStringPut (string, c); c = fileGetc (); } while (isIdentChar (c)); vStringTerminate (string); fileUngetc (c); /* unget non-identifier character */ }
static void readIdentifier (const int first, vString *const id) { int c = first; vStringClear (id); while (isIdentifier (c)) { vStringPut (id, c); c = nextChar (); } fileUngetc (c); vStringTerminate (id); }
static void parseFreeOperator (vString *const string, const int firstChar) { int c = firstChar; do { vStringPut (string, c); c = fileGetc (); } while (c > ' '); vStringTerminate (string); if (!isspace (c)) fileUngetc (c); /* unget non-identifier character */ }
static void readIdentifier (const int first, vString *const id) { int depth = 0; int c = first; vStringClear (id); while (isIdentifier (c) || (depth > 0 && c != EOF && c != '\n')) { if (c == '(' || c == '}') depth++; else if (depth > 0 && (c == ')' || c == '}')) depth--; vStringPut (id, c); c = nextChar (); } fileUngetc (c); vStringTerminate (id); }
static int get_token(char *token, int n) { int c = fileGetc(); int i = n; while (c != EOF && isident(c) && i < 1000) { token[i] = c; i++; c = fileGetc(); } if (c == EOF) return 0; if (i != n) { token[i] = '\0'; fileUngetc(c); return 1; } else { return 0; } }
static void parseTemplateString (vString *const string) { int c; do { c = fileGetc (); if (c == '`') break; vStringPut (string, c); if (c == '\\') { c = fileGetc(); vStringPut(string, c); } else if (c == '$') { c = fileGetc (); if (c != '{') fileUngetc (c); else { int depth = 1; /* we need to use the real token machinery to handle strings, * comments, regexes and whatnot */ tokenInfo *token = newToken (); LastTokenType = TOKEN_UNDEFINED; vStringPut(string, c); do { readTokenFull (token, FALSE, string); if (isType (token, TOKEN_OPEN_CURLY)) depth++; else if (isType (token, TOKEN_CLOSE_CURLY)) depth--; } while (! isType (token, TOKEN_EOF) && depth > 0); deleteToken (token); } } } while (c != EOF); vStringTerminate (string); }
static int vGetc (void) { int c; if (Ungetc == '\0') c = fileGetc (); else { c = Ungetc; Ungetc = '\0'; } if (c == '/') { int c2 = fileGetc (); if (c2 == EOF) longjmp (Exception, (int) ExceptionEOF); else if (c2 == '/') /* strip comment until end-of-line */ { do c = fileGetc (); while (c != '\n' && c != EOF); } else if (c2 == '*') /* strip block comment */ { c = skipOverCComment(); } else { fileUngetc (c2); } } else if (c == '"') /* strip string contents */ { int c2; do c2 = fileGetc (); while (c2 != '"' && c2 != EOF); c = '@'; } if (c == EOF) longjmp (Exception, (int) ExceptionEOF); return c; }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); } while (c == '\t' || c == ' ' || c == '\n'); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_KEYWORD: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_FORWARD_SLASH; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (d == '*') { do { skipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { skipToCharacter ('\n'); goto getNextChar; } } break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } LastTokenType = token->type; }
static void readTokenFull (tokenInfo *const token, boolean includeStringRepr) { int c; token->type = TOKEN_UNDEFINED; vStringClear (token->string); do c = fileGetc (); while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case ':': token->type = TOKEN_COLON; break; case ',': token->type = TOKEN_COMMA; break; case '"': { boolean escaped = FALSE; token->type = TOKEN_STRING; while (TRUE) { c = fileGetc (); /* we don't handle unicode escapes but they are safe */ if (escaped) escaped = FALSE; else if (c == '\\') escaped = TRUE; else if (c >= 0x00 && c <= 0x1F) break; /* break on invalid, unescaped, control characters */ else if (c == '"' || c == EOF) break; if (includeStringRepr) vStringPut (token->string, c); } vStringTerminate (token->string); break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { do { vStringPut (token->string, c); c = fileGetc (); } while (c != EOF && isIdentChar (c)); vStringTerminate (token->string); fileUngetc (c); switch (lookupKeyword (vStringValue (token->string), Lang_json)) { case KEYWORD_true: token->type = TOKEN_TRUE; break; case KEYWORD_false: token->type = TOKEN_FALSE; break; case KEYWORD_null: token->type = TOKEN_NULL; break; default: token->type = TOKEN_NUMBER; break; } } break; } }
static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr) { int c; int i; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: i = 0; do { c = fileGetc (); i++; } while (c == '\t' || c == ' ' || ((c == '\r' || c == '\n') && ! include_newlines)); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { if (i > 1) vStringPut (repr, ' '); vStringPut (repr, c); } switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '+': case '-': { int d = fileGetc (); if (d == c) /* ++ or -- */ token->type = TOKEN_POSTFIX_OPERATOR; else { fileUngetc (d); token->type = TOKEN_BINARY_OPERATOR; } break; } case '*': case '%': case '?': case '>': case '<': case '^': case '|': case '&': token->type = TOKEN_BINARY_OPERATOR; break; case '\r': case '\n': /* This isn't strictly correct per the standard, but following the * real rules means understanding all statements, and that's not * what the parser currently does. What we do here is a guess, by * avoiding inserting semicolons that would make the statement on * the left invalid. Hopefully this should not have false negatives * (e.g. should not miss insertion of a semicolon) but might have * false positives (e.g. it will wrongfully emit a semicolon for the * newline in "foo\n+bar"). * This should however be mostly harmless as we only deal with * newlines in specific situations where we know a false positive * wouldn't hurt too bad. */ switch (LastTokenType) { /* these cannot be the end of a statement, so hold the newline */ case TOKEN_EQUAL_SIGN: case TOKEN_COLON: case TOKEN_PERIOD: case TOKEN_FORWARD_SLASH: case TOKEN_BINARY_OPERATOR: /* and these already end one, no need to duplicate it */ case TOKEN_SEMICOLON: case TOKEN_COMMA: case TOKEN_CLOSE_CURLY: case TOKEN_OPEN_CURLY: include_newlines = FALSE; /* no need to recheck */ goto getNextChar; break; default: token->type = TOKEN_SEMICOLON; } break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_FORWARD_SLASH; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (repr) /* remove the / we added */ repr->buffer[--repr->length] = 0; if (d == '*') { do { skipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { skipToCharacter ('\n'); /* if we care about newlines, put it back so it is seen */ if (include_newlines) fileUngetc ('\n'); goto getNextChar; } } break; } case '#': /* skip shebang in case of e.g. Node.js scripts */ if (token->lineNumber > 1) token->type = TOKEN_UNDEFINED; else if ((c = fileGetc ()) != '!') { fileUngetc (c); token->type = TOKEN_UNDEFINED; } else { skipToCharacter ('\n'); goto getNextChar; } break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; if (repr && vStringLength (token->string) > 1) vStringCatS (repr, vStringValue (token->string) + 1); } break; } LastTokenType = token->type; }
static void readToken (tokenInfo *const token) { int c; static tokenType lastTokenType = TOKEN_NONE; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || lastTokenType == TOKEN_STRING || lastTokenType == TOKEN_OTHER || lastTokenType == TOKEN_CLOSE_PAREN || lastTokenType == TOKEN_CLOSE_CURLY || lastTokenType == TOKEN_CLOSE_SQUARE)) { token->type = TOKEN_SEMICOLON; goto done; } } while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); switch (c) { case EOF: token->type = TOKEN_EOF; break; case ';': token->type = TOKEN_SEMICOLON; break; case '/': { boolean hasNewline = FALSE; int d = fileGetc (); switch (d) { case '/': fileSkipToCharacter ('\n'); /* Line comments start with the * character sequence // and * continue through the next * newline. A line comment acts * like a newline. */ fileUngetc ('\n'); goto getNextChar; case '*': do { do { d = fileGetc (); if (d == '\n') { hasNewline = TRUE; } } while (d != EOF && d != '*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); fileUngetc (hasNewline ? '\n' : ' '); goto getNextChar; default: token->type = TOKEN_OTHER; fileUngetc (d); break; } } break; case '"': case '\'': case '`': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = fileGetc (); if (d == '-') token->type = TOKEN_LEFT_ARROW; else { fileUngetc (d); token->type = TOKEN_OTHER; } } break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '*': token->type = TOKEN_STAR; break; case '.': token->type = TOKEN_DOT; break; case ',': token->type = TOKEN_COMMA; break; default: if (isStartIdentChar (c)) { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else token->type = TOKEN_OTHER; break; } done: lastTokenType = token->type; }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do c = fileGetc (); while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); break; case '-': c = fileGetc (); if (c == '-') /* is this the start of a comment? */ { skipToCharacter ('\n'); goto getNextChar; } else { if (!isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; } break; case '/': { int d = fileGetc (); if (d != '*') /* is this the start of a comment? */ fileUngetc (d); else { do { skipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != '\0'); goto getNextChar; } break; } default: if (! isIdentChar1 (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_rem)) { vStringClear (token->string); skipToCharacter ('\n'); goto getNextChar; } else if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } }
static void findMakeTags (void) { vString *name = vStringNew (); boolean newline = TRUE; boolean in_define = FALSE; boolean in_rule = FALSE; boolean variable_possible = TRUE; int c; while ((c = nextChar ()) != EOF) { if (newline) { if (in_rule) { if (c == '\t') { skipLine (); /* skip rule */ continue; } else in_rule = FALSE; } variable_possible = (boolean)(!in_rule); newline = FALSE; } if (c == '\n') newline = TRUE; else if (isspace (c)) continue; else if (c == '#') skipLine (); else if (c == '(') skipToMatch ("()"); else if (c == '{') skipToMatch ("{}"); else if (c == ':') { variable_possible = TRUE; in_rule = TRUE; } else if (variable_possible && isIdentifier (c)) { readIdentifier (c, name); if (strcmp (vStringValue (name), "endef") == 0) in_define = FALSE; else if (in_define) skipLine (); else if (strcmp (vStringValue (name), "define") == 0 && isIdentifier (c)) { in_define = TRUE; c = skipToNonWhite (); readIdentifier (c, name); makeSimpleTag (name, MakeKinds, K_MACRO); skipLine (); } else { c = skipToNonWhite (); if (strchr (":?+", c) != NULL) { boolean append = (boolean)(c == '+'); if (c == ':') in_rule = TRUE; c = nextChar (); if (c != '=') fileUngetc (c); else if (append) { skipLine (); continue; } } if (c == '=') { makeSimpleTag (name, MakeKinds, K_MACRO); in_rule = FALSE; skipLine (); } } } else variable_possible = FALSE; } vStringDelete (name); }
static void findMakeTags (void) { stringList *identifiers = stringListNew (); boolean newline = TRUE; boolean in_define = FALSE; boolean in_rule = FALSE; boolean variable_possible = TRUE; int c; while ((c = nextChar ()) != EOF) { if (newline) { if (in_rule) { if (c == '\t' || (c = skipToNonWhite (c)) == '#') { skipLine (); /* skip rule or comment */ c = nextChar (); } else if (c != '\n') in_rule = FALSE; } stringListClear (identifiers); variable_possible = (boolean)(!in_rule); newline = FALSE; } if (c == '\n') newline = TRUE; else if (isspace (c)) continue; else if (c == '#') skipLine (); else if (variable_possible && c == '?') { c = nextChar (); fileUngetc (c); variable_possible = (c == '='); } else if (variable_possible && c == ':' && stringListCount (identifiers) > 0) { c = nextChar (); fileUngetc (c); if (c != '=') { unsigned int i; for (i = 0; i < stringListCount (identifiers); i++) newTarget (stringListItem (identifiers, i)); stringListClear (identifiers); in_rule = TRUE; } } else if (variable_possible && c == '=' && stringListCount (identifiers) == 1) { newMacro (stringListItem (identifiers, 0)); skipLine (); in_rule = FALSE; } else if (variable_possible && isIdentifier (c)) { vString *name = vStringNew (); readIdentifier (c, name); stringListAdd (identifiers, name); if (stringListCount (identifiers) == 1) { if (in_define && ! strcmp (vStringValue (name), "endef")) in_define = FALSE; else if (in_define) skipLine (); else if (! strcmp (vStringValue (name), "define")) { in_define = TRUE; c = skipToNonWhite (nextChar ()); vStringClear (name); /* all remaining characters on the line are the name -- even spaces */ while (c != EOF && c != '\n') { vStringPut (name, c); c = nextChar (); } if (c == '\n') fileUngetc (c); vStringTerminate (name); vStringStripTrailing (name); newMacro (name); } else if (! strcmp (vStringValue (name), "export")) stringListClear (identifiers); } } else variable_possible = FALSE; } stringListDelete (identifiers); }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do c = fileGetc (); while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '!': token->type = TOKEN_BANG; break; case '$': token->type = TOKEN_DOLLAR; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_DOT; break; case ';': goto getNextChar; case '[': token->type = TOKEN_OPEN_BRACKET; break; case ']': token->type = TOKEN_CLOSE_BRACKET; break; case '{': token->type = TOKEN_OPEN_BRACE; break; case '}': token->type = TOKEN_CLOSE_BRACE; break; case '~': token->type = TOKEN_TILDE; break; case '+': case '*': case '^': case '=': token->type = TOKEN_OPERATOR; break; case '-': c = fileGetc (); if (c == '>') token->type = TOKEN_CONSTRAINT; else if (c == '-') /* is this the start of a comment? */ { skipToCharacter ('\n'); goto getNextChar; } else { if (!isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; } break; case '?': case ':': c = fileGetc (); if (c == '=') token->type = TOKEN_OPERATOR; else { token->type = TOKEN_COLON; if (!isspace (c)) fileUngetc (c); } break; case '<': c = fileGetc (); if (c != '=' && c != '>' && !isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; break; case '>': c = fileGetc (); if (c != '=' && c != '>' && !isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; break; case '/': c = fileGetc (); if (c != '/' && c != '=' && !isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; break; case '\\': c = fileGetc (); if (c != '\\' && !isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; break; case '"': token->type = TOKEN_STRING; parseString (token->string); break; case '\'': token->type = TOKEN_CHARACTER; parseCharacter (); break; default: if (isalpha (c)) { parseIdentifier (token->string, c); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else if (isdigit (c)) { vStringCat (token->string, parseNumeric (c)); token->type = TOKEN_NUMERIC; } else if (isFreeOperatorChar (c)) { parseFreeOperator (token->string, c); token->type = TOKEN_OPERATOR; } else { token->type = TOKEN_UNDEFINED; Assert (! isType (token, TOKEN_UNDEFINED)); } break; } }
static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr) { int c; int i; boolean newline_encountered = FALSE; /* if we've got a token held back, emit it */ if (NextToken) { copyToken (token, NextToken, FALSE); deleteToken (NextToken); NextToken = NULL; return; } token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: i = 0; do { c = fileGetc (); if (include_newlines && (c == '\r' || c == '\n')) newline_encountered = TRUE; i++; } while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { if (i > 1) vStringPut (repr, ' '); vStringPut (repr, c); } switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '+': case '-': { int d = fileGetc (); if (d == c) /* ++ or -- */ token->type = TOKEN_POSTFIX_OPERATOR; else { fileUngetc (d); token->type = TOKEN_BINARY_OPERATOR; } break; } case '*': case '%': case '?': case '>': case '<': case '^': case '|': case '&': token->type = TOKEN_BINARY_OPERATOR; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '`': token->type = TOKEN_TEMPLATE_STRING; parseTemplateString (token->string); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_TEMPLATE_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_BINARY_OPERATOR; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (repr) /* remove the / we added */ repr->buffer[--repr->length] = 0; if (d == '*') { do { fileSkipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { fileSkipToCharacter ('\n'); /* if we care about newlines, put it back so it is seen */ if (include_newlines) fileUngetc ('\n'); goto getNextChar; } } break; } case '#': /* skip shebang in case of e.g. Node.js scripts */ if (token->lineNumber > 1) token->type = TOKEN_UNDEFINED; else if ((c = fileGetc ()) != '!') { fileUngetc (c); token->type = TOKEN_UNDEFINED; } else { fileSkipToCharacter ('\n'); goto getNextChar; } break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; if (repr && vStringLength (token->string) > 1) vStringCatS (repr, vStringValue (token->string) + 1); } break; } if (include_newlines && newline_encountered) { /* This isn't strictly correct per the standard, but following the * real rules means understanding all statements, and that's not * what the parser currently does. What we do here is a guess, by * avoiding inserting semicolons that would make the statement on * the left or right obviously invalid. Hopefully this should not * have false negatives (e.g. should not miss insertion of a semicolon) * but might have false positives (e.g. it will wrongfully emit a * semicolon sometimes, i.e. for the newline in "foo\n(bar)"). * This should however be mostly harmless as we only deal with * newlines in specific situations where we know a false positive * wouldn't hurt too bad. */ /* these already end a statement, so no need to duplicate it */ #define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON || \ (t) == TOKEN_EOF || \ (t) == TOKEN_COMMA || \ (t) == TOKEN_CLOSE_CURLY || \ (t) == TOKEN_OPEN_CURLY) /* these cannot be the start or end of a statement */ #define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN || \ (t) == TOKEN_COLON || \ (t) == TOKEN_PERIOD || \ (t) == TOKEN_BINARY_OPERATOR) if (! IS_STMT_SEPARATOR(LastTokenType) && ! IS_STMT_SEPARATOR(token->type) && ! IS_BINARY_OPERATOR(LastTokenType) && ! IS_BINARY_OPERATOR(token->type) && /* these cannot be followed by a semicolon */ ! (LastTokenType == TOKEN_OPEN_PAREN || LastTokenType == TOKEN_OPEN_SQUARE)) { /* hold the token... */ Assert (NextToken == NULL); NextToken = newToken (); copyToken (NextToken, token, FALSE); /* ...and emit a semicolon instead */ token->type = TOKEN_SEMICOLON; token->keyword = KEYWORD_NONE; vStringClear (token->string); if (repr) vStringPut (token->string, '\n'); } #undef IS_STMT_SEPARATOR #undef IS_BINARY_OPERATOR } LastTokenType = token->type; }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); } while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '?': token->type = TOKEN_QUESTION_MARK; break; case '*': token->type = TOKEN_STAR; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '\\': /* * All Tex tags start with a backslash. * Check if the next character is an alpha character * else it is not a potential tex tag. */ c = fileGetc (); if (! isalpha (c)) fileUngetc (c); else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; case '%': fileSkipToCharacter ('\n'); /* % are single line comments */ goto getNextChar; break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->type = TOKEN_IDENTIFIER; } break; } }
static void readToken (tokenInfo *const token) { int c; vStringClear (token->string); getNextChar: c = fileGetc (); while (isspace (c)) c = fileGetc (); token->type = c; switch (c) { case EOF: token->type = TOKEN_EOF; break; case '\'': case '"': { const int delimiter = c; do { vStringPut (token->string, c); c = fileGetc (); if (c == '\\') c = fileGetc (); } while (c != EOF && c != delimiter); if (c != EOF) vStringPut (token->string, c); token->type = TOKEN_STRING; break; } case '/': /* maybe comment start */ { int d = fileGetc (); if (d != '*') { fileUngetc (d); vStringPut (token->string, c); token->type = c; } else { d = fileGetc (); do { c = d; d = fileGetc (); } while (d != EOF && ! (c == '*' && d == '/')); goto getNextChar; } break; } default: if (! isSelectorChar (c)) { vStringPut (token->string, c); token->type = c; } else { parseSelector (token->string, c); token->type = TOKEN_SELECTOR; } break; } }
static void readToken (tokenInfo * const token) { int c; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); } while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case '.': token->type = TOKEN_PERIOD; break; case ',': token->type = TOKEN_COMMA; break; case '\'': /* only single char are inside simple quotes */ break; /* or it is for attributes so we don't care */ case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '-': c = fileGetc (); if (c == '-') /* start of a comment */ { fileSkipToCharacter ('\n'); goto getNextChar; } else { if (!isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; } break; default: if (!isIdentChar1 (c)) token->type = TOKEN_NONE; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_vhdl); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } }
static void readToken (tokenInfo *const token) { int c; static tokenType lastTokenType = TOKEN_NONE; boolean firstWhitespace = TRUE; boolean whitespace; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || lastTokenType == TOKEN_STRING || lastTokenType == TOKEN_OTHER || lastTokenType == TOKEN_CLOSE_PAREN || lastTokenType == TOKEN_CLOSE_CURLY || lastTokenType == TOKEN_CLOSE_SQUARE)) { c = ';'; // semicolon injection } whitespace = c == '\t' || c == ' ' || c == '\r' || c == '\n'; if (signature && whitespace && firstWhitespace && vStringLength (signature) < MAX_SIGNATURE_LENGTH) { firstWhitespace = FALSE; vStringPut(signature, ' '); } } while (whitespace); switch (c) { case EOF: token->type = TOKEN_EOF; break; case ';': token->type = TOKEN_SEMICOLON; break; case '/': { boolean hasNewline = FALSE; int d = fileGetc (); switch (d) { case '/': fileSkipToCharacter ('\n'); /* Line comments start with the * character sequence // and * continue through the next * newline. A line comment acts * like a newline. */ fileUngetc ('\n'); goto getNextChar; case '*': do { do { d = fileGetc (); if (d == '\n') { hasNewline = TRUE; } } while (d != EOF && d != '*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); fileUngetc (hasNewline ? '\n' : ' '); goto getNextChar; default: token->type = TOKEN_OTHER; fileUngetc (d); break; } } break; case '"': case '\'': case '`': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = fileGetc (); if (d == '-') token->type = TOKEN_LEFT_ARROW; else { fileUngetc (d); token->type = TOKEN_OTHER; } } break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '*': token->type = TOKEN_STAR; break; case '.': token->type = TOKEN_DOT; break; case ',': token->type = TOKEN_COMMA; break; default: if (isStartIdentChar (c)) { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else token->type = TOKEN_OTHER; break; } if (signature && vStringLength (signature) < MAX_SIGNATURE_LENGTH) { if (token->type == TOKEN_LEFT_ARROW) vStringCatS(signature, "<-"); else if (token->type == TOKEN_STRING) { // only struct member annotations can appear in function prototypes // so only `` type strings are possible vStringPut(signature, '`'); vStringCat(signature, token->string); vStringPut(signature, '`'); } else if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD) vStringCat(signature, token->string); else if (c != EOF) vStringPut(signature, c); } lastTokenType = token->type; }