static void skipQuotes(int c) { unsigned int depth = 0; int openQuote = 0, closeQuote = 0; closeQuote = getCloseQuote(c); if (! closeQuote) return; else openQuote = c; for (; c != EOF; c = getcFromInputFile()) { if (c == closeQuote) depth --; else if (c == openQuote) depth ++; if (depth == 0) break; } }
static void readTokenText (tokenInfo *const token, bool collectText) { int c; int lastC = 'X'; /* whatever non-space character */ vStringClear (token->string); getNextChar: c = getcFromInputFile (); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '<': ungetcToInputFile (c); token->type = TOKEN_TEXT; break; default: if (collectText) { if (isspace (c)) c = ' '; if (c != ' ' || lastC != ' ') { if (collectText) vStringPut (token->string, c); lastC = c; } } goto getNextChar; } }
/* <script[:white:]+language[:white:]*=[:white:]*(php|'php'|"php")[:white:]*> * * This is ugly, but the whole "<script language=php>" tag is and we can't * really do better without adding a lot of code only for this */ static boolean isOpenScriptLanguagePhp (int c) { int quote = 0; /* <script[:white:]+language[:white:]*= */ if (c != '<' || tolower ((c = getcFromInputFile ())) != 's' || tolower ((c = getcFromInputFile ())) != 'c' || tolower ((c = getcFromInputFile ())) != 'r' || tolower ((c = getcFromInputFile ())) != 'i' || tolower ((c = getcFromInputFile ())) != 'p' || tolower ((c = getcFromInputFile ())) != 't' || ! isSpace ((c = getcFromInputFile ())) || tolower ((c = skipWhitespaces (c))) != 'l' || tolower ((c = getcFromInputFile ())) != 'a' || tolower ((c = getcFromInputFile ())) != 'n' || tolower ((c = getcFromInputFile ())) != 'g' || tolower ((c = getcFromInputFile ())) != 'u' || tolower ((c = getcFromInputFile ())) != 'a' || tolower ((c = getcFromInputFile ())) != 'g' || tolower ((c = getcFromInputFile ())) != 'e' || (c = skipWhitespaces (getcFromInputFile ())) != '=') return FALSE; /* (php|'php'|"php")> */ c = skipWhitespaces (getcFromInputFile ()); if (c == '"' || c == '\'') { quote = c; c = getcFromInputFile (); } if (tolower (c) != 'p' || tolower ((c = getcFromInputFile ())) != 'h' || tolower ((c = getcFromInputFile ())) != 'p' || (quote != 0 && (c = getcFromInputFile ()) != quote) || (c = skipWhitespaces (getcFromInputFile ())) != '>') return FALSE; return TRUE; }
static int skipWhitespaces (int c) { while (isSpace (c)) c = getcFromInputFile (); return c; }
/* reads an HereDoc or a NowDoc (the part after the <<<). * <<<[ \t]*(ID|'ID'|"ID") * ... * ID;? * * note that: * 1) starting ID must be immediately followed by a newline; * 2) closing ID is the same as opening one; * 3) closing ID must be immediately followed by a newline or a semicolon * then a newline. * * Example of a *single* valid heredoc: * <<< FOO * something * something else * FOO this is not an end * FOO; this isn't either * FOO; # neither this is * FOO; * # previous line was the end, but the semicolon wasn't required */ static void parseHeredoc (vString *const string) { int c; unsigned int len; char delimiter[64]; /* arbitrary limit, but more is crazy anyway */ int quote = 0; do { c = getcFromInputFile (); } while (c == ' ' || c == '\t'); if (c == '\'' || c == '"') { quote = c; c = getcFromInputFile (); } for (len = 0; len < ARRAY_SIZE (delimiter) - 1; len++) { if (! isIdentChar (c)) break; delimiter[len] = (char) c; c = getcFromInputFile (); } delimiter[len] = 0; if (len == 0) /* no delimiter, give up */ goto error; if (quote) { if (c != quote) /* no closing quote for quoted identifier, give up */ goto error; c = getcFromInputFile (); } if (c != '\r' && c != '\n') /* missing newline, give up */ goto error; do { c = getcFromInputFile (); if (c != '\r' && c != '\n') vStringPut (string, (char) c); else { /* new line, check for a delimiter right after */ int nl = c; int extra = EOF; c = getcFromInputFile (); for (len = 0; c != 0 && (c - delimiter[len]) == 0; len++) c = getcFromInputFile (); if (delimiter[len] != 0) ungetcToInputFile (c); else { /* line start matched the delimiter, now check whether there * is anything after it */ if (c == '\r' || c == '\n') { ungetcToInputFile (c); break; } else if (c == ';') { int d = getcFromInputFile (); if (d == '\r' || d == '\n') { /* put back the semicolon since it's not part of the * string. we can't put back the newline, but it's a * whitespace character nobody cares about it anyway */ ungetcToInputFile (';'); break; } else { /* put semicolon in the string and continue */ extra = ';'; ungetcToInputFile (d); } } } /* if we are here it wasn't a delimiter, so put everything in the * string */ vStringPut (string, (char) nl); vStringNCatS (string, delimiter, len); if (extra != EOF) vStringPut (string, (char) extra); } } while (c != EOF); vStringTerminate (string); return; error: ungetcToInputFile (c); }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do c = getcFromInputFile (); while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: token->type = TOKEN_EOF; break; case ';': token->type = TOKEN_SEMICOLON; break; case '!': token->type = TOKEN_BANG; break; case '}': token->type = TOKEN_CLOSE_BRACE; break; case ']': token->type = TOKEN_CLOSE_BRACKET; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ',': token->type = TOKEN_COMMA; break; case '$': token->type = TOKEN_DOLLAR; break; case '.': token->type = TOKEN_DOT; break; case '{': token->type = TOKEN_OPEN_BRACE; break; case '[': token->type = TOKEN_OPEN_BRACKET; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case '~': token->type = TOKEN_TILDE; break; case '+': case '*': case '^': case '=': token->type = TOKEN_OPERATOR; break; case '-': c = getcFromInputFile (); if (c == '>') token->type = TOKEN_CONSTRAINT; else if (c == '-') /* is this the start of a comment? */ { skipToCharacter ('\n'); goto getNextChar; } else { if (!isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; } break; case '?': case ':': { int c2 = getcFromInputFile (); if (c2 == '=') token->type = TOKEN_OPERATOR; else { if (!isspace (c2)) ungetcToInputFile (c2); if (c == ':') token->type = TOKEN_COLON; else token->type = TOKEN_QUESTION; } break; } case '<': c = getcFromInputFile (); if (c != '=' && c != '>' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; break; case '>': c = getcFromInputFile (); if (c != '=' && c != '>' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; break; case '/': c = getcFromInputFile (); if (c != '/' && c != '=' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; break; case '\\': c = getcFromInputFile (); if (c != '\\' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; break; case '"': token->type = TOKEN_STRING; parseString (token->string); break; case '\'': token->type = TOKEN_CHARACTER; parseCharacter (); break; default: if (isalpha (c)) { parseIdentifier (token->string, c); token->keyword = analyzeToken (token->string, Lang_eiffel); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else if (isdigit (c)) { vString* numeric = parseNumeric (c); vStringCat (token->string, numeric); vStringDelete (numeric); token->type = TOKEN_NUMERIC; } else if (isFreeOperatorChar (c)) { parseFreeOperator (token->string, c); token->type = TOKEN_OPERATOR; } else token->type = TOKEN_UNDEFINED; break; } }
static void parseString (vString *const string) { boolean verbatim = FALSE; boolean align = FALSE; boolean end = FALSE; vString *verbatimCloser = vStringNew (); vString *lastLine = vStringNew (); int prev = '\0'; int c; while (! end) { c = getcFromInputFile (); if (c == EOF) end = TRUE; else if (c == '"') { if (! verbatim) end = TRUE; else end = (boolean) (strcmp (vStringValue (lastLine), vStringValue (verbatimCloser)) == 0); } else if (c == '\n') { if (verbatim) vStringClear (lastLine); if (prev == '[' /* || prev == '{' */) { verbatim = TRUE; vStringClear (verbatimCloser); vStringClear (lastLine); if (prev == '{') vStringPut (verbatimCloser, '}'); else { vStringPut (verbatimCloser, ']'); align = TRUE; } vStringNCat (verbatimCloser, string, vStringLength (string) - 1); vStringClear (string); } if (verbatim && align) { do c = getcFromInputFile (); while (isspace (c)); } } else if (c == '%') c = parseEscapedCharacter (); if (! end) { vStringPut (string, c); if (verbatim) { vStringPut (lastLine, c); vStringTerminate (lastLine); } prev = c; } } vStringTerminate (string); vStringDelete (lastLine); vStringDelete (verbatimCloser); }
/* Reads a character from the file */ static void advanceChar (lexerState *lexer) { lexer->cur_c = lexer->next_c; lexer->next_c = getcFromInputFile(); }
static void readToken (tokenInfo *const token) { int c; vStringClear (token->string); getNextChar: c = getcFromInputFile (); while (isspace (c)) c = getcFromInputFile (); token->type = c; switch (c) { case EOF: token->type = TOKEN_EOF; break; case '\'': case '"': { const int delimiter = c; do { vStringPut (token->string, c); c = getcFromInputFile (); if (c == '\\') c = getcFromInputFile (); } while (c != EOF && c != delimiter); if (c != EOF) vStringPut (token->string, c); token->type = TOKEN_STRING; break; } case '/': /* maybe comment start */ { int d = getcFromInputFile (); if (d != '*') { ungetcToInputFile (d); vStringPut (token->string, c); token->type = c; } else { d = getcFromInputFile (); do { c = d; d = getcFromInputFile (); } while (d != EOF && ! (c == '*' && d == '/')); goto getNextChar; } break; } default: if (! isSelectorChar (c)) { vStringPut (token->string, c); token->type = c; } else { parseSelector (token->string, c); token->type = TOKEN_SELECTOR; } break; } }
static void readToken (tokenInfo *const token, bool skipComments) { int c; vStringClear (token->string); getNextChar: c = getcFromInputFile (); while (isspace (c)) c = getcFromInputFile (); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '<': { int d = getcFromInputFile (); if (d == '!') { d = getcFromInputFile (); if (d == '-') { d = getcFromInputFile (); if (d == '-') { int e = ' '; int f = ' '; do { d = e; e = f; f = getcFromInputFile (); } while (f != EOF && ! (d == '-' && e == '-' && f == '>')); if (skipComments) goto getNextChar; else { token->type = TOKEN_COMMENT; break; } } } ungetcToInputFile (d); token->type = TOKEN_OTHER; } else if (d == '?') token->type = TOKEN_OTHER; else if (d == '/') token->type = TOKEN_TAG_START2; else { ungetcToInputFile (d); token->type = TOKEN_TAG_START; } break; } case '/': { int d = getcFromInputFile (); if (d == '>') token->type = TOKEN_TAG_END2; else { ungetcToInputFile (d); token->type = TOKEN_OTHER; } break; } case '>': token->type = TOKEN_TAG_END; break; case '=': token->type = TOKEN_EQUAL; break; case '"': case '\'': { const int delimiter = c; c = getcFromInputFile (); while (c != EOF && c != delimiter) { vStringPut (token->string, c); c = getcFromInputFile (); } token->type = TOKEN_STRING; break; } default: { do { vStringPut (token->string, tolower (c)); c = getcFromInputFile (); } while (!isspace (c) && c != '<' && c != '>' && c != '/' && c != '=' && c != '\'' && c != '"' && c != EOF); if (c != EOF) ungetcToInputFile (c); token->type = TOKEN_NAME; break; } } }
static void readTokenFull (tokenInfo *const token, boolean includeStringRepr) { int c; token->type = TOKEN_UNDEFINED; vStringClear (token->string); do c = getcFromInputFile (); while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case ':': token->type = TOKEN_COLON; break; case ',': token->type = TOKEN_COMMA; break; case '"': { boolean escaped = FALSE; token->type = TOKEN_STRING; while (TRUE) { c = getcFromInputFile (); /* we don't handle unicode escapes but they are safe */ if (escaped) escaped = FALSE; else if (c == '\\') escaped = TRUE; else if (c >= 0x00 && c <= 0x1F) break; /* break on invalid, unescaped, control characters */ else if (c == '"' || c == EOF) break; if (includeStringRepr) vStringPut (token->string, c); } vStringTerminate (token->string); break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { do { vStringPut (token->string, c); c = getcFromInputFile (); } while (c != EOF && isIdentChar (c)); vStringTerminate (token->string); ungetcToInputFile (c); switch (lookupKeyword (vStringValue (token->string), Lang_json)) { case KEYWORD_true: token->type = TOKEN_TRUE; break; case KEYWORD_false: token->type = TOKEN_FALSE; break; case KEYWORD_null: token->type = TOKEN_NULL; break; default: token->type = TOKEN_NUMBER; break; } } break; } }
static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr) { int c; int i; boolean newline_encountered = FALSE; /* if we've got a token held back, emit it */ if (NextToken) { copyToken (token, NextToken, FALSE); deleteToken (NextToken); NextToken = NULL; return; } token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: i = 0; do { c = getcFromInputFile (); if (include_newlines && (c == '\r' || c == '\n')) newline_encountered = TRUE; i++; } while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { if (i > 1) vStringPut (repr, ' '); vStringPut (repr, c); } switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '+': case '-': { int d = getcFromInputFile (); if (d == c) /* ++ or -- */ token->type = TOKEN_POSTFIX_OPERATOR; else { ungetcToInputFile (d); token->type = TOKEN_BINARY_OPERATOR; } break; } case '*': case '%': case '?': case '>': case '<': case '^': case '|': case '&': token->type = TOKEN_BINARY_OPERATOR; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '`': token->type = TOKEN_TEMPLATE_STRING; parseTemplateString (token->string); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '\\': c = getcFromInputFile (); if (c != '\\' && c != '"' && !isspace (c)) ungetcToInputFile (c); token->type = TOKEN_CHARACTER; token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = getcFromInputFile (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { ungetcToInputFile (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_TEMPLATE_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_BINARY_OPERATOR; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (repr) /* remove the / we added */ repr->buffer[--repr->length] = 0; if (d == '*') { do { skipToCharacterInInputFile ('*'); c = getcFromInputFile (); if (c == '/') break; else ungetcToInputFile (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { skipToCharacterInInputFile ('\n'); /* if we care about newlines, put it back so it is seen */ if (include_newlines) ungetcToInputFile ('\n'); goto getNextChar; } } break; } case '#': /* skip shebang in case of e.g. Node.js scripts */ if (token->lineNumber > 1) token->type = TOKEN_UNDEFINED; else if ((c = getcFromInputFile ()) != '!') { ungetcToInputFile (c); token->type = TOKEN_UNDEFINED; } else { skipToCharacterInInputFile ('\n'); goto getNextChar; } break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; if (repr && vStringLength (token->string) > 1) vStringCatS (repr, vStringValue (token->string) + 1); } break; } if (include_newlines && newline_encountered) { /* This isn't strictly correct per the standard, but following the * real rules means understanding all statements, and that's not * what the parser currently does. What we do here is a guess, by * avoiding inserting semicolons that would make the statement on * the left or right obviously invalid. Hopefully this should not * have false negatives (e.g. should not miss insertion of a semicolon) * but might have false positives (e.g. it will wrongfully emit a * semicolon sometimes, i.e. for the newline in "foo\n(bar)"). * This should however be mostly harmless as we only deal with * newlines in specific situations where we know a false positive * wouldn't hurt too bad. */ /* these already end a statement, so no need to duplicate it */ #define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON || \ (t) == TOKEN_EOF || \ (t) == TOKEN_COMMA || \ (t) == TOKEN_CLOSE_CURLY || \ (t) == TOKEN_OPEN_CURLY) /* these cannot be the start or end of a statement */ #define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN || \ (t) == TOKEN_COLON || \ (t) == TOKEN_PERIOD || \ (t) == TOKEN_BINARY_OPERATOR) if (! IS_STMT_SEPARATOR(LastTokenType) && ! IS_STMT_SEPARATOR(token->type) && ! IS_BINARY_OPERATOR(LastTokenType) && ! IS_BINARY_OPERATOR(token->type) && /* these cannot be followed by a semicolon */ ! (LastTokenType == TOKEN_OPEN_PAREN || LastTokenType == TOKEN_OPEN_SQUARE)) { /* hold the token... */ Assert (NextToken == NULL); NextToken = newToken (); copyToken (NextToken, token, FALSE); /* ...and emit a semicolon instead */ token->type = TOKEN_SEMICOLON; token->keyword = KEYWORD_NONE; vStringClear (token->string); if (repr) vStringPut (token->string, '\n'); } #undef IS_STMT_SEPARATOR #undef IS_BINARY_OPERATOR } LastTokenType = token->type; }
static void readToken (tokenInfo * const token) { int c; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = getcFromInputFile (); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); } while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case '.': token->type = TOKEN_PERIOD; break; case ',': token->type = TOKEN_COMMA; break; case '\'': /* only single char are inside simple quotes */ break; /* or it is for attributes so we don't care */ case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); break; case '-': c = getcFromInputFile (); if (c == '-') /* start of a comment */ { skipToCharacterInInputFile ('\n'); goto getNextChar; } else { if (!isspace (c)) ungetcToInputFile (c); token->type = TOKEN_OPERATOR; } break; default: if (!isIdentChar1 (c)) token->type = TOKEN_NONE; else { parseIdentifier (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = lookupCaseKeyword (vStringValue (token->string), Lang_vhdl); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: if (! InPhp) { c = findPhpStart (); if (c != EOF) InPhp = TRUE; } else c = getcFromInputFile (); c = skipWhitespaces (c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '&': token->type = TOKEN_AMPERSAND; break; case '\\': token->type = TOKEN_BACKSLASH; break; case '=': { int d = getcFromInputFile (); if (d == '=' || d == '>') token->type = TOKEN_OPERATOR; else { ungetcToInputFile (d); token->type = TOKEN_EQUAL_SIGN; } break; } case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = getcFromInputFile (); if (d == '/') { /* </script[:white:]*> */ if (tolower ((d = getcFromInputFile ())) == 's' && tolower ((d = getcFromInputFile ())) == 'c' && tolower ((d = getcFromInputFile ())) == 'r' && tolower ((d = getcFromInputFile ())) == 'i' && tolower ((d = getcFromInputFile ())) == 'p' && tolower ((d = getcFromInputFile ())) == 't' && (d = skipWhitespaces (getcFromInputFile ())) == '>') { InPhp = FALSE; goto getNextChar; } else { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } } else if (d == '<' && (d = getcFromInputFile ()) == '<') { token->type = TOKEN_STRING; parseHeredoc (token->string); } else { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } break; } case '#': /* comment */ skipSingleComment (); goto getNextChar; break; case '+': case '-': case '*': case '%': { int d = getcFromInputFile (); if (d != '=' && ! (c == '-' && d == '>')) ungetcToInputFile (d); token->type = TOKEN_OPERATOR; break; } case '/': /* division or comment start */ { int d = getcFromInputFile (); if (d == '/') /* single-line comment */ { skipSingleComment (); goto getNextChar; } else if (d == '*') { do { c = skipToCharacter ('*'); if (c != EOF) { c = getcFromInputFile (); if (c == '/') break; else ungetcToInputFile (c); } } while (c != EOF && c != '\0'); goto getNextChar; } else { if (d != '=') ungetcToInputFile (d); token->type = TOKEN_OPERATOR; } break; } case '$': /* variable start */ { int d = getcFromInputFile (); if (! isIdentChar (d)) { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } else { parseIdentifier (token->string, d); token->type = TOKEN_VARIABLE; } break; } case '?': /* maybe the end of the PHP chunk */ { int d = getcFromInputFile (); if (d == '>') { InPhp = FALSE; goto getNextChar; } else { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->keyword = analyzeToken (token->string, getInputLanguage ()); if (token->keyword == KEYWORD_NONE) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } if (token->type == TOKEN_SEMICOLON || token->type == TOKEN_OPEN_CURLY || token->type == TOKEN_CLOSE_CURLY) { /* reset current statement details on statement end, and when entering * a deeper scope. * it is a bit ugly to do this in readToken(), but it makes everything * a lot simpler. */ CurrentStatement.access = ACCESS_UNDEFINED; CurrentStatement.impl = IMPL_UNDEFINED; } }
static void findBetaTags (void) { vString *line = vStringNew (); bool incomment = false; bool inquote = false; bool dovirtuals = BetaKinds [K_VIRTUAL].enabled; bool dopatterns = BetaKinds [K_PATTERN].enabled; int c; do { bool foundfragmenthere = false; /* find fragment definition (line that starts and ends with --) */ int last; int first; vStringClear (line); while ((c = getcFromInputFile ()) != EOF && c != '\n' && c != '\r') vStringPut (line, c); last = vStringLength (line) - 1; first = 0; /* skip white space at start and end of line */ while (last > 0 && isspace ((int) vStringChar (line, last))) last--; while (first < last && isspace ((int) vStringChar (line, first))) first++; /* if line still has a reasonable length and ... */ if (last - first > 4 && (vStringChar (line, first) == '-' && vStringChar (line, first + 1) == '-' && vStringChar (line, last) == '-' && vStringChar (line, last - 1) == '-')) { if (!incomment && !inquote) { foundfragmenthere = true; /* skip past -- and whitespace. Also skip back past 'dopart' or 'attributes' to the :. We have to do this because there is no sensible way to include whitespace in a ctags token so the conventional space after the ':' would mess us up */ last -= 2; first += 2; while (last && vStringChar (line, last) != ':') last--; while (last && (isspace ((int) vStringChar (line, last-1)))) last--; while (first < last && (isspace ((int) vStringChar (line, first)) || vStringChar (line, first) == '-')) first++; /* If there's anything left it is a fragment title */ if (first < last - 1) { vStringChar (line, last) = 0; if (strcasecmp ("LIB", vStringValue (line) + first) && strcasecmp ("PROGRAM", vStringValue (line) + first)) { makeBetaTag (vStringValue (line) + first, K_FRAGMENT); } } } } else { int pos = 0; int len = vStringLength (line); if (inquote) goto stringtext; if (incomment) goto commenttext; programtext: for ( ; pos < len; pos++) { if (vStringChar (line, pos) == '\'') { pos++; inquote = true; goto stringtext; } if (vStringChar (line, pos) == '{') { pos++; incomment = true; goto commenttext; } if (vStringChar (line, pos) == '(' && pos < len - 1 && vStringChar (line, pos+1) == '*') { pos +=2; incomment = true; goto commenttext; } /* * SLOT definition looks like this: * <<SLOT nameofslot: dopart>> * or * <<SLOT nameofslot: descriptor>> */ if (!foundfragmenthere && vStringChar (line, pos) == '<' && pos+1 < len && vStringChar (line, pos+1) == '<' && strstr (vStringValue (line) + pos, ">>")) { /* Found slot name, get start and end */ int eoname; char c2; pos += 2; /* skip past << */ /* skip past space before SLOT */ while (pos < len && isspace ((int) vStringChar (line, pos))) pos++; /* skip past SLOT */ if (pos+4 <= len && !strncasecmp (vStringValue(line) + pos, "SLOT", (size_t)4)) pos += 4; /* skip past space after SLOT */ while (pos < len && isspace ((int) vStringChar (line, pos))) pos++; eoname = pos; /* skip to end of name */ while (eoname < len && (c2 = vStringChar (line, eoname)) != '>' && c2 != ':' && !isspace ((int) c2)) eoname++; if (eoname < len) { vStringChar (line, eoname) = 0; if (strcasecmp ("LIB", vStringValue (line) + pos) && strcasecmp ("PROGRAM", vStringValue (line) + pos) && strcasecmp ("SLOT", vStringValue (line) + pos)) { makeBetaTag (vStringValue (line) + pos, K_SLOT); } } if (eoname+1 < len) { pos = eoname + 1; } else { pos = len; continue; } } /* Only patterns that are virtual, extensions of virtuals or * final bindings are normally included so as not to overload * totally. * That means one of the forms name:: name:< or name::< */ if (!foundfragmenthere && vStringChar (line, pos) == ':' && (dopatterns || (dovirtuals && (vStringChar (line, pos+1) == ':' || vStringChar (line, pos+1) == '<') ) ) ) { /* Found pattern name, get start and end */ int eoname = pos; int soname; while (eoname && isspace ((int) vStringChar (line, eoname-1))) eoname--; foundanothername: /* terminate right after name */ vStringChar (line, eoname) = 0; soname = eoname; while (soname && isbident (vStringChar (line, soname-1))) { soname--; } if (soname != eoname) { makeBetaTag (vStringValue (line) + soname, K_PATTERN); /* scan back past white space */ while (soname && isspace ((int) vStringChar (line, soname-1))) soname--; if (soname && vStringChar (line, soname-1) == ',') { /* we found a new pattern name before comma */ eoname = soname; goto foundanothername; } } } } goto endofline; commenttext: for ( ; pos < len; pos++) { if (vStringChar (line, pos) == '*' && pos < len - 1 && vStringChar (line, pos+1) == ')') { pos += 2; incomment = false; goto programtext; } if (vStringChar (line, pos) == '}') { pos++; incomment = false; goto programtext; } } goto endofline; stringtext: for ( ; pos < len; pos++) { if (vStringChar (line, pos) == '\\') { if (pos < len - 1) pos++; } else if (vStringChar (line, pos) == '\'') { pos++; /* support obsolete '' syntax */ if (pos < len && vStringChar (line, pos) == '\'') { continue; } inquote = false; goto programtext; } } } endofline: inquote = false; /* This shouldn't really make a difference */ } while (c != EOF); vStringDelete (line); }
static void readToken (tokenInfo *const token) { int c; static tokenType lastTokenType = TOKEN_NONE; boolean firstWhitespace = TRUE; boolean whitespace; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = getcFromInputFile (); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || lastTokenType == TOKEN_STRING || lastTokenType == TOKEN_OTHER || lastTokenType == TOKEN_CLOSE_PAREN || lastTokenType == TOKEN_CLOSE_CURLY || lastTokenType == TOKEN_CLOSE_SQUARE)) { c = ';'; // semicolon injection } whitespace = c == '\t' || c == ' ' || c == '\r' || c == '\n'; if (signature && whitespace && firstWhitespace && vStringLength (signature) < MAX_SIGNATURE_LENGTH) { firstWhitespace = FALSE; vStringPut(signature, ' '); } } while (whitespace); switch (c) { case EOF: token->type = TOKEN_EOF; break; case ';': token->type = TOKEN_SEMICOLON; break; case '/': { boolean hasNewline = FALSE; int d = getcFromInputFile (); switch (d) { case '/': skipToCharacterInInputFile ('\n'); /* Line comments start with the * character sequence // and * continue through the next * newline. A line comment acts * like a newline. */ ungetcToInputFile ('\n'); goto getNextChar; case '*': do { do { d = getcFromInputFile (); if (d == '\n') { hasNewline = TRUE; } } while (d != EOF && d != '*'); c = getcFromInputFile (); if (c == '/') break; else ungetcToInputFile (c); } while (c != EOF && c != '\0'); ungetcToInputFile (hasNewline ? '\n' : ' '); goto getNextChar; default: token->type = TOKEN_OTHER; ungetcToInputFile (d); break; } } break; case '"': case '\'': case '`': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = getcFromInputFile (); if (d == '-') token->type = TOKEN_LEFT_ARROW; else { ungetcToInputFile (d); token->type = TOKEN_OTHER; } } break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '*': token->type = TOKEN_STAR; break; case '.': token->type = TOKEN_DOT; break; case ',': token->type = TOKEN_COMMA; break; default: if (isStartIdentChar (c)) { parseIdentifier (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else token->type = TOKEN_OTHER; break; } if (signature && vStringLength (signature) < MAX_SIGNATURE_LENGTH) { if (token->type == TOKEN_LEFT_ARROW) vStringCatS(signature, "<-"); else if (token->type == TOKEN_STRING) { // only struct member annotations can appear in function prototypes // so only `` type strings are possible vStringPut(signature, '`'); vStringCat(signature, token->string); vStringPut(signature, '`'); } else if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD) vStringCat(signature, token->string); else if (c != EOF) vStringPut(signature, c); } lastTokenType = token->type; }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; vStringClear (token->string); getNextChar: c = getcFromInputFile (); c = skipWhitespaces (c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getInputLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = getcFromInputFile (); if (d == '#') { /* <# ... #> multiline comment */ do { c = skipToCharacterInInputFile ('#'); if (c != EOF) { c = getcFromInputFile (); if (c == '>') break; else ungetcToInputFile (c); } } while (c != EOF); goto getNextChar; } else { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } break; } case '#': /* comment */ skipSingleComment (); goto getNextChar; break; case '+': case '-': case '*': case '/': case '%': { int d = getcFromInputFile (); if (d != '=') ungetcToInputFile (d); token->type = TOKEN_OPERATOR; break; } case '$': /* variable start */ { int d = getcFromInputFile (); if (! isIdentChar (d)) { ungetcToInputFile (d); token->type = TOKEN_UNDEFINED; } else { parseIdentifier (token->string, d); token->type = TOKEN_VARIABLE; } break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); if (isTokenFunction (token->string)) token->type = TOKEN_KEYWORD; else token->type = TOKEN_IDENTIFIER; } break; } }