static tokenInfo *newToken (void) { tokenInfo *const token = xMalloc (1, tokenInfo); token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; token->string = vStringNew (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); return token; }
/***************************************************************************** Given a string with the contents of a line directly after the "def" keyword, extract all relevant information and create a symbol. *****************************************************************************/ PerlSymbol* Parser_Perl::makeFunction (const char *cp, QString *def, Symbol *parent) { int lineNum = getSourceLineNumber()-1; cp = parseIdentifier(cp, def); QString args = parseArgs(); Symbol * parentSymbol = (parent) ? parent : mSymbols; PerlSymbol *symbol = new PerlSymbol(Symbol::SymbolFunc, *def, parentSymbol); symbol->setDetailedText(QString("%1 (%2)").arg(*def).arg(args).simplified()); symbol->setLine(lineNum); def->clear(); return symbol; }
static tokenInfo *newToken (void) { tokenInfo *const token = xMalloc (1, tokenInfo); token->type = TOKEN_UNDEFINED; token->scopeKind = TAG_NONE; token->string = vStringNew (); token->scope = vStringNew (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); return token; }
/***************************************************************************** Given a string with the contents of the line directly after the "class" keyword, extract all necessary information and create a tag. *****************************************************************************/ PerlSymbol* Parser_Perl::makeClass (const char *cp, Symbol *parent) { int lineNum = getSourceLineNumber()-1; QString *name=new QString(); cp = parseIdentifier (cp, name); Symbol * parentSymbol = (parent) ? parent : mSymbols; PerlSymbol *symbol = new PerlSymbol(Symbol::SymbolClass, *name, parentSymbol); symbol->setDetailedText(QString("%1").arg(*name).simplified()); symbol->setLine(lineNum); if(name)delete name; return symbol; }
static tokenInfo *newToken (void) { tokenInfo *const token = xMalloc (1, tokenInfo); token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; token->string = vStringNew (); token->scope = vStringNew (); token->nestLevel = 0; token->ignoreTag = FALSE; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); return token; }
static tokenInfo *newToken (void) { tokenInfo *const token = xMalloc (1, tokenInfo); token->kind = K_UNDEFINED; token->name = vStringNew (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->scope = NULL; token->nestLevel = 0; token->lastKind = K_UNDEFINED; token->blockName = vStringNew (); token->inheritance = vStringNew (); token->singleStat = FALSE; return token; }
static boolean readIdentifier (tokenInfo *const token, int c) { vStringClear (token->name); if (isIdentifierCharacter (c)) { while (isIdentifierCharacter (c)) { vStringPut (token->name, c); c = vGetc (); } vUngetc (c); vStringTerminate (token->name); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); } return (boolean)(vStringLength (token->name) > 0); }
static void makeNamespaceTag (vString * const name, const unsigned char *dbp) { vStringClear (CurrentNamespace); functionName (name, dbp); vStringCopy (CurrentNamespace, name); if (vStringLength (CurrentNamespace) > 0) { tagEntryInfo e; initTagEntry (&e, vStringValue (CurrentNamespace)); e.lineNumber = getSourceLineNumber (); e.filePosition = getInputFilePosition (); e.kindName = ClojureKinds[K_NAMESPACE].name; e.kind = (char) ClojureKinds[K_NAMESPACE].letter; makeTagEntry (&e); } }
static void makeFunctionTag (vString * const name, const unsigned char *dbp) { functionName (name, dbp); if (vStringLength (name) > 0) { tagEntryInfo e; initTagEntry (&e, vStringValue (name)); e.lineNumber = getSourceLineNumber (); e.filePosition = getInputFilePosition (); e.kindName = ClojureKinds[K_FUNCTION].name; e.kind = (char) ClojureKinds[K_FUNCTION].letter; if (vStringLength (CurrentNamespace) > 0) { e.extensionFields.scope[0] = ClojureKinds[K_NAMESPACE].name; e.extensionFields.scope[1] = vStringValue (CurrentNamespace); } makeTagEntry (&e); } }
static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr) { int c; int i; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: i = 0; do { c = fileGetc (); i++; } while (c == '\t' || c == ' ' || ((c == '\r' || c == '\n') && ! include_newlines)); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { if (i > 1) vStringPut (repr, ' '); vStringPut (repr, c); } switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '+': case '-': { int d = fileGetc (); if (d == c) /* ++ or -- */ token->type = TOKEN_POSTFIX_OPERATOR; else { fileUngetc (d); token->type = TOKEN_BINARY_OPERATOR; } break; } case '*': case '%': case '?': case '>': case '<': case '^': case '|': case '&': token->type = TOKEN_BINARY_OPERATOR; break; case '\r': case '\n': /* This isn't strictly correct per the standard, but following the * real rules means understanding all statements, and that's not * what the parser currently does. What we do here is a guess, by * avoiding inserting semicolons that would make the statement on * the left invalid. Hopefully this should not have false negatives * (e.g. should not miss insertion of a semicolon) but might have * false positives (e.g. it will wrongfully emit a semicolon for the * newline in "foo\n+bar"). * This should however be mostly harmless as we only deal with * newlines in specific situations where we know a false positive * wouldn't hurt too bad. */ switch (LastTokenType) { /* these cannot be the end of a statement, so hold the newline */ case TOKEN_EQUAL_SIGN: case TOKEN_COLON: case TOKEN_PERIOD: case TOKEN_FORWARD_SLASH: case TOKEN_BINARY_OPERATOR: /* and these already end one, no need to duplicate it */ case TOKEN_SEMICOLON: case TOKEN_COMMA: case TOKEN_CLOSE_CURLY: case TOKEN_OPEN_CURLY: include_newlines = FALSE; /* no need to recheck */ goto getNextChar; break; default: token->type = TOKEN_SEMICOLON; } break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_FORWARD_SLASH; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (repr) /* remove the / we added */ repr->buffer[--repr->length] = 0; if (d == '*') { do { skipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { skipToCharacter ('\n'); /* if we care about newlines, put it back so it is seen */ if (include_newlines) fileUngetc ('\n'); goto getNextChar; } } break; } case '#': /* skip shebang in case of e.g. Node.js scripts */ if (token->lineNumber > 1) token->type = TOKEN_UNDEFINED; else if ((c = fileGetc ()) != '!') { fileUngetc (c); token->type = TOKEN_UNDEFINED; } else { skipToCharacter ('\n'); goto getNextChar; } break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; if (repr && vStringLength (token->string) > 1) vStringCatS (repr, vStringValue (token->string) + 1); } break; } LastTokenType = token->type; }
/* Advances the parser one token, optionally skipping whitespace * (otherwise it is concatenated and returned as a single whitespace token). * Whitespace is needed to properly render function signatures. Unrecognized * token starts are stored literally, e.g. token may equal to a character '#'. */ static int advanceToken (lexerState *lexer, boolean skip_whitspace) { boolean have_whitespace = FALSE; lexer->line = getSourceLineNumber(); lexer->pos = getInputFilePosition(); while (lexer->cur_c != EOF) { if (isWhitespace(lexer->cur_c)) { scanWhitespace(lexer); have_whitespace = TRUE; } else if (lexer->cur_c == '/' && (lexer->next_c == '/' || lexer->next_c == '*')) { scanComments(lexer); have_whitespace = TRUE; } else { if (have_whitespace && !skip_whitspace) return lexer->cur_token = TOKEN_WHITESPACE; break; } } lexer->line = getSourceLineNumber(); lexer->pos = getInputFilePosition(); while (lexer->cur_c != EOF) { if (lexer->cur_c == '"') { scanString(lexer); return lexer->cur_token = TOKEN_STRING; } else if (lexer->cur_c == 'r' && (lexer->next_c == '#' || lexer->next_c == '"')) { scanRawString(lexer); return lexer->cur_token = TOKEN_STRING; } else if (lexer->cur_c == '\'') { scanCharacterOrLifetime(lexer); return lexer->cur_token = TOKEN_STRING; } else if (isIdentifierStart(lexer->cur_c)) { scanIdentifier(lexer); return lexer->cur_token = TOKEN_IDENT; } /* These shift tokens aren't too important for tag-generation per se, * but they confuse the skipUntil code which tracks the <> pairs. */ else if (lexer->cur_c == '>' && lexer->next_c == '>') { advanceNChar(lexer, 2); return lexer->cur_token = TOKEN_RSHIFT; } else if (lexer->cur_c == '<' && lexer->next_c == '<') { advanceNChar(lexer, 2); return lexer->cur_token = TOKEN_LSHIFT; } else if (lexer->cur_c == '-' && lexer->next_c == '>') { advanceNChar(lexer, 2); return lexer->cur_token = TOKEN_RARROW; } else { int c = lexer->cur_c; advanceChar(lexer); return lexer->cur_token = c; } } return lexer->cur_token = TOKEN_EOF; }
static void findCssTags (void) { boolean readNextToken = TRUE; tokenInfo token; token.string = vStringNew (); do { if (readNextToken) readToken (&token); readNextToken = TRUE; if (token.type == '@') { /* At-rules, from the "@" to the next block or semicolon */ boolean useContents; readToken (&token); useContents = (strcmp (vStringValue (token.string), "media") == 0 || strcmp (vStringValue (token.string), "supports") == 0); while (token.type != TOKEN_EOF && token.type != ';' && token.type != '{') { readToken (&token); } /* HACK: we *eat* the opening '{' for medias and the like so that * the content is parsed as if it was at the root */ readNextToken = useContents && token.type == '{'; } else if (token.type == TOKEN_SELECTOR) { /* collect selectors and make a tag */ cssKind kind = K_SELECTOR; fpos_t filePosition; unsigned long lineNumber; vString *selector = vStringNew (); do { if (vStringLength (selector) > 0) vStringPut (selector, ' '); vStringCat (selector, token.string); kind = classifySelector (token.string); lineNumber = getSourceLineNumber (); filePosition = getInputFilePosition (); readToken (&token); /* handle attribute selectors */ if (token.type == '[') { int depth = 1; while (depth > 0 && token.type != TOKEN_EOF) { vStringCat (selector, token.string); readToken (&token); if (token.type == '[') depth++; else if (token.type == ']') depth--; } if (token.type != TOKEN_EOF) vStringCat (selector, token.string); readToken (&token); } } while (token.type == TOKEN_SELECTOR); /* we already consumed the next token, don't read it twice */ readNextToken = FALSE; vStringTerminate (selector); if (CssKinds[kind].enabled) { tagEntryInfo e; initTagEntry (&e, vStringValue (selector), &(CssKinds[kind])); e.lineNumber = lineNumber; e.filePosition = filePosition; makeTagEntry (&e); } vStringDelete (selector); } else if (token.type == '{') { /* skip over { ... } */ int depth = 1; while (depth > 0 && token.type != TOKEN_EOF) { readToken (&token); if (token.type == '{') depth++; else if (token.type == '}') depth--; } } } while (token.type != TOKEN_EOF); vStringDelete (token.string); }
static void readTokenFull (tokenInfo *const token, boolean includeStringRepr) { int c; token->type = TOKEN_UNDEFINED; vStringClear (token->string); do c = fileGetc (); while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case ':': token->type = TOKEN_COLON; break; case ',': token->type = TOKEN_COMMA; break; case '"': { boolean escaped = FALSE; token->type = TOKEN_STRING; while (TRUE) { c = fileGetc (); /* we don't handle unicode escapes but they are safe */ if (escaped) escaped = FALSE; else if (c == '\\') escaped = TRUE; else if (c >= 0x00 && c <= 0x1F) break; /* break on invalid, unescaped, control characters */ else if (c == '"' || c == EOF) break; if (includeStringRepr) vStringPut (token->string, c); } vStringTerminate (token->string); break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { do { vStringPut (token->string, c); c = fileGetc (); } while (c != EOF && isIdentChar (c)); vStringTerminate (token->string); fileUngetc (c); switch (lookupKeyword (vStringValue (token->string), Lang_json)) { case KEYWORD_true: token->type = TOKEN_TRUE; break; case KEYWORD_false: token->type = TOKEN_FALSE; break; case KEYWORD_null: token->type = TOKEN_NULL; break; default: token->type = TOKEN_NUMBER; break; } } break; } }
static void recordPosition(void) { recordedLineno = getSourceLineNumber(); recordedPos = getInputFilePosition(); }
/* Algorithm adapted from from GNU etags. * Perl support by Bart Robinson <*****@*****.**> * Perl sub names: look for /^ [ \t\n]sub [ \t\n]+ [^ \t\n{ (]+/ */ static void findPerlTags (void) { vString *name = vStringNew (); vString *package = NULL; bool skipPodDoc = false; const unsigned char *line; unsigned long podStart = 0UL; /* Core modules AutoLoader and SelfLoader support delayed compilation * by allowing Perl code that follows __END__ and __DATA__ tokens, * respectively. When we detect that one of these modules is used * in the file, we continue processing even after we see the * corresponding token that would usually terminate parsing of the * file. */ enum { RESPECT_END = (1 << 0), RESPECT_DATA = (1 << 1), } respect_token = RESPECT_END | RESPECT_DATA; while ((line = readLineFromInputFile ()) != NULL) { bool spaceRequired = false; bool qualified = false; const unsigned char *cp = line; perlKind kind = K_NONE; tagEntryInfo e; if (skipPodDoc) { if (strncmp ((const char*) line, "=cut", (size_t) 4) == 0) { skipPodDoc = false; if (podStart != 0UL) { makePromise ("Pod", podStart, 0, getInputLineNumber(), 0, getSourceLineNumber()); podStart = 0UL; } } continue; } else if (line [0] == '=') { skipPodDoc = isPodWord ((const char*)line + 1); if (skipPodDoc) podStart = getSourceLineNumber (); continue; } else if (strcmp ((const char*) line, "__DATA__") == 0) { if (respect_token & RESPECT_DATA) break; else continue; } else if (strcmp ((const char*) line, "__END__") == 0) { if (respect_token & RESPECT_END) break; else continue; } else if (line [0] == '#') continue; while (isspace (*cp)) cp++; if (strncmp((const char*) cp, "sub", (size_t) 3) == 0) { TRACE("this looks like a sub\n"); cp += 3; kind = K_SUBROUTINE; spaceRequired = true; qualified = true; } else if (strncmp((const char*) cp, "use", (size_t) 3) == 0) { cp += 3; if (!isspace(*cp)) continue; while (*cp && isspace (*cp)) ++cp; if (strncmp((const char*) cp, "AutoLoader", (size_t) 10) == 0) { respect_token &= ~RESPECT_END; continue; } if (strncmp((const char*) cp, "SelfLoader", (size_t) 10) == 0) { respect_token &= ~RESPECT_DATA; continue; } if (strncmp((const char*) cp, "constant", (size_t) 8) != 0) continue; cp += 8; /* Skip up to the first non-space character, skipping empty * and comment lines. */ while (isspace(*cp)) cp++; while (!*cp || '#' == *cp) { cp = readLineFromInputFile (); if (!cp) goto END_MAIN_WHILE; while (isspace (*cp)) cp++; } if ('{' == *cp) { ++cp; if (0 == parseConstantsFromHashRef(cp, name, package)) { vStringClear(name); continue; } else goto END_MAIN_WHILE; } kind = K_CONSTANT; spaceRequired = false; qualified = true; } else if (strncmp((const char*) cp, "package", (size_t) 7) == 0 && ('\0' == cp[7] || isspace(cp[7]))) { cp += 7; while (isspace (*cp)) cp++; while (!*cp || '#' == *cp) { cp = readLineFromInputFile (); if (!cp) goto END_MAIN_WHILE; while (isspace (*cp)) cp++; } if (package == NULL) package = vStringNew (); else vStringClear (package); const unsigned char *const first = cp; while (*cp && (int) *cp != ';' && !isspace ((int) *cp)) { vStringPut (package, (int) *cp); cp++; } vStringCatS (package, "::"); cp = first; /* Rewind */ kind = K_PACKAGE; spaceRequired = false; qualified = true; } else if (strncmp((const char*) cp, "format", (size_t) 6) == 0) { cp += 6; kind = K_FORMAT; spaceRequired = true; qualified = true; } else { if (isIdentifier1 (*cp)) { const unsigned char *p = cp; while (isIdentifier (*p)) ++p; while (isspace (*p)) ++p; if ((int) *p == ':' && (int) *(p + 1) != ':') kind = K_LABEL; } } if (kind != K_NONE) { TRACE("cp0: %s\n", (const char *) cp); if (spaceRequired && *cp && !isspace (*cp)) continue; TRACE("cp1: %s\n", (const char *) cp); while (isspace (*cp)) cp++; while (!*cp || '#' == *cp) { /* Gobble up empty lines and comments */ cp = readLineFromInputFile (); if (!cp) goto END_MAIN_WHILE; while (isspace (*cp)) cp++; } while (isIdentifier (*cp) || (K_PACKAGE == kind && ':' == *cp)) { vStringPut (name, (int) *cp); cp++; } if (K_FORMAT == kind && vStringLength (name) == 0 && /* cp did not advance */ '=' == *cp) { /* format's name is optional. If it's omitted, 'STDOUT' is assumed. */ vStringCatS (name, "STDOUT"); } TRACE("name: %s\n", name->buffer); if (0 == vStringLength(name)) { vStringClear(name); continue; } if (K_SUBROUTINE == kind) { /* * isSubroutineDeclaration() may consume several lines. So * we record line positions. */ initTagEntry(&e, vStringValue(name), NULL); if (true == isSubroutineDeclaration(cp)) { if (true == PerlKinds[K_SUBROUTINE_DECLARATION].enabled) { kind = K_SUBROUTINE_DECLARATION; } else { vStringClear (name); continue; } } else if (! PerlKinds[kind].enabled) { continue; } e.kind = &(PerlKinds[kind]); makeTagEntry(&e); if (isXtagEnabled (XTAG_QUALIFIED_TAGS) && qualified && package != NULL && vStringLength (package) > 0) { vString *const qualifiedName = vStringNew (); vStringCopy (qualifiedName, package); vStringCat (qualifiedName, name); e.name = vStringValue(qualifiedName); markTagExtraBit (&e, XTAG_QUALIFIED_TAGS); makeTagEntry(&e); vStringDelete (qualifiedName); } } else if (vStringLength (name) > 0) { makeSimpleTag (name, PerlKinds, kind); if (isXtagEnabled(XTAG_QUALIFIED_TAGS) && qualified && K_PACKAGE != kind && package != NULL && vStringLength (package) > 0) { tagEntryInfo fqe; vString *const qualifiedName = vStringNew (); vStringCopy (qualifiedName, package); vStringCat (qualifiedName, name); initTagEntry (&fqe, vStringValue (qualifiedName), PerlKinds + kind); markTagExtraBit (&fqe, XTAG_QUALIFIED_TAGS); vStringDelete (qualifiedName); } } vStringClear (name); } } END_MAIN_WHILE: vStringDelete (name); if (package != NULL) vStringDelete (package); }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); } while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '?': token->type = TOKEN_QUESTION_MARK; break; case '*': token->type = TOKEN_STAR; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '\\': /* * All Tex tags start with a backslash. * Check if the next character is an alpha character * else it is not a potential tex tag. */ c = fileGetc (); if (! isalpha (c)) fileUngetc (c); else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; case '%': fileSkipToCharacter ('\n'); /* % are single line comments */ goto getNextChar; break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->type = TOKEN_IDENTIFIER; } break; } }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); } while (c == '\t' || c == ' ' || c == '\n'); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_KEYWORD: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_FORWARD_SLASH; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (d == '*') { do { skipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { skipToCharacter ('\n'); goto getNextChar; } } break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } LastTokenType = token->type; }
static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr) { int c; int i; boolean newline_encountered = FALSE; /* if we've got a token held back, emit it */ if (NextToken) { copyToken (token, NextToken, FALSE); deleteToken (NextToken); NextToken = NULL; return; } token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: i = 0; do { c = fileGetc (); if (include_newlines && (c == '\r' || c == '\n')) newline_encountered = TRUE; i++; } while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { if (i > 1) vStringPut (repr, ' '); vStringPut (repr, c); } switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '+': case '-': { int d = fileGetc (); if (d == c) /* ++ or -- */ token->type = TOKEN_POSTFIX_OPERATOR; else { fileUngetc (d); token->type = TOKEN_BINARY_OPERATOR; } break; } case '*': case '%': case '?': case '>': case '<': case '^': case '|': case '&': token->type = TOKEN_BINARY_OPERATOR; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '`': token->type = TOKEN_TEMPLATE_STRING; parseTemplateString (token->string); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_TEMPLATE_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_BINARY_OPERATOR; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (repr) /* remove the / we added */ repr->buffer[--repr->length] = 0; if (d == '*') { do { fileSkipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { fileSkipToCharacter ('\n'); /* if we care about newlines, put it back so it is seen */ if (include_newlines) fileUngetc ('\n'); goto getNextChar; } } break; } case '#': /* skip shebang in case of e.g. Node.js scripts */ if (token->lineNumber > 1) token->type = TOKEN_UNDEFINED; else if ((c = fileGetc ()) != '!') { fileUngetc (c); token->type = TOKEN_UNDEFINED; } else { fileSkipToCharacter ('\n'); goto getNextChar; } break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; if (repr && vStringLength (token->string) > 1) vStringCatS (repr, vStringValue (token->string) + 1); } break; } if (include_newlines && newline_encountered) { /* This isn't strictly correct per the standard, but following the * real rules means understanding all statements, and that's not * what the parser currently does. What we do here is a guess, by * avoiding inserting semicolons that would make the statement on * the left or right obviously invalid. Hopefully this should not * have false negatives (e.g. should not miss insertion of a semicolon) * but might have false positives (e.g. it will wrongfully emit a * semicolon sometimes, i.e. for the newline in "foo\n(bar)"). * This should however be mostly harmless as we only deal with * newlines in specific situations where we know a false positive * wouldn't hurt too bad. */ /* these already end a statement, so no need to duplicate it */ #define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON || \ (t) == TOKEN_EOF || \ (t) == TOKEN_COMMA || \ (t) == TOKEN_CLOSE_CURLY || \ (t) == TOKEN_OPEN_CURLY) /* these cannot be the start or end of a statement */ #define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN || \ (t) == TOKEN_COLON || \ (t) == TOKEN_PERIOD || \ (t) == TOKEN_BINARY_OPERATOR) if (! IS_STMT_SEPARATOR(LastTokenType) && ! IS_STMT_SEPARATOR(token->type) && ! IS_BINARY_OPERATOR(LastTokenType) && ! IS_BINARY_OPERATOR(token->type) && /* these cannot be followed by a semicolon */ ! (LastTokenType == TOKEN_OPEN_PAREN || LastTokenType == TOKEN_OPEN_SQUARE)) { /* hold the token... */ Assert (NextToken == NULL); NextToken = newToken (); copyToken (NextToken, token, FALSE); /* ...and emit a semicolon instead */ token->type = TOKEN_SEMICOLON; token->keyword = KEYWORD_NONE; vStringClear (token->string); if (repr) vStringPut (token->string, '\n'); } #undef IS_STMT_SEPARATOR #undef IS_BINARY_OPERATOR } LastTokenType = token->type; }
static void readToken (tokenInfo *const token) { int c; static tokenType lastTokenType = TOKEN_NONE; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || lastTokenType == TOKEN_STRING || lastTokenType == TOKEN_OTHER || lastTokenType == TOKEN_CLOSE_PAREN || lastTokenType == TOKEN_CLOSE_CURLY || lastTokenType == TOKEN_CLOSE_SQUARE)) { token->type = TOKEN_SEMICOLON; goto done; } } while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); switch (c) { case EOF: token->type = TOKEN_EOF; break; case ';': token->type = TOKEN_SEMICOLON; break; case '/': { boolean hasNewline = FALSE; int d = fileGetc (); switch (d) { case '/': fileSkipToCharacter ('\n'); /* Line comments start with the * character sequence // and * continue through the next * newline. A line comment acts * like a newline. */ fileUngetc ('\n'); goto getNextChar; case '*': do { do { d = fileGetc (); if (d == '\n') { hasNewline = TRUE; } } while (d != EOF && d != '*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); fileUngetc (hasNewline ? '\n' : ' '); goto getNextChar; default: token->type = TOKEN_OTHER; fileUngetc (d); break; } } break; case '"': case '\'': case '`': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = fileGetc (); if (d == '-') token->type = TOKEN_LEFT_ARROW; else { fileUngetc (d); token->type = TOKEN_OTHER; } } break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '*': token->type = TOKEN_STAR; break; case '.': token->type = TOKEN_DOT; break; case ',': token->type = TOKEN_COMMA; break; default: if (isStartIdentChar (c)) { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else token->type = TOKEN_OTHER; break; } done: lastTokenType = token->type; }
static void readTag (tokenInfo *token, vString *text, int depth) { bool textCreated = false; readToken (token, true); if (token->type == TOKEN_NAME) { keywordId startTag; bool isHeading; bool isVoid; startTag = lookupKeyword (vStringValue (token->string), Lang_html); isHeading = (startTag == KEYWORD_h1 || startTag == KEYWORD_h2 || startTag == KEYWORD_h3); isVoid = (startTag >= KEYWORD_area && startTag <= KEYWORD_wbr); if (text == NULL && isHeading) { text = vStringNew (); textCreated = true; } do { readToken (token, true); if (startTag == KEYWORD_a && token->type == TOKEN_NAME) { keywordId attribute = lookupKeyword (vStringValue (token->string), Lang_html); if (attribute == KEYWORD_name) { readToken (token, true); if (token->type == TOKEN_EQUAL) { readToken (token, true); if (token->type == TOKEN_STRING || token->type == TOKEN_NAME) makeSimpleTag (token->string, HtmlKinds, K_ANCHOR); } } } } while (token->type != TOKEN_TAG_END && token->type != TOKEN_TAG_END2 && token->type != TOKEN_EOF); if (!isVoid && token->type == TOKEN_TAG_END && depth < MAX_DEPTH) { long startSourceLineNumber = getSourceLineNumber (); long startLineNumber = getInputLineNumber (); long startLineOffset = getInputLineOffset (); long endLineNumber; long endLineOffset; bool tag_start2; if (startTag == KEYWORD_script) { bool script = skipScriptContent (token, &endLineNumber, &endLineOffset); if (script) makePromise ("JavaScript", startLineNumber, startLineOffset, endLineNumber, endLineOffset, startSourceLineNumber); readToken (token, true); goto out; } tag_start2 = readTagContent (token, text, &endLineNumber, &endLineOffset, depth); if (tag_start2) { readToken (token, true); if (isHeading && textCreated && vStringLength (text) > 0) { keywordId endTag = lookupKeyword (vStringValue (token->string), Lang_html); if (startTag == endTag) { htmlKind headingKind; if (startTag == KEYWORD_h1) headingKind = K_HEADING1; else if (startTag == KEYWORD_h2) headingKind = K_HEADING2; else headingKind = K_HEADING3; vStringStripTrailing (text); makeSimpleTag (text, HtmlKinds, headingKind); } } else if (startTag == KEYWORD_style) { keywordId endTag = lookupKeyword (vStringValue (token->string), Lang_html); if (startTag == endTag) makePromise ("CSS", startLineNumber, startLineOffset, endLineNumber, endLineOffset, startSourceLineNumber); } readToken (token, true); } } } out: if (textCreated) vStringDelete (text); }
static void readToken (tokenInfo * const token) { int c; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); } while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case '.': token->type = TOKEN_PERIOD; break; case ',': token->type = TOKEN_COMMA; break; case '\'': /* only single char are inside simple quotes */ break; /* or it is for attributes so we don't care */ case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '-': c = fileGetc (); if (c == '-') /* start of a comment */ { fileSkipToCharacter ('\n'); goto getNextChar; } else { if (!isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; } break; default: if (!isIdentChar1 (c)) token->type = TOKEN_NONE; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_vhdl); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } }
static void readToken (tokenInfo *const token) { int c; static tokenType lastTokenType = TOKEN_NONE; boolean firstWhitespace = TRUE; boolean whitespace; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || lastTokenType == TOKEN_STRING || lastTokenType == TOKEN_OTHER || lastTokenType == TOKEN_CLOSE_PAREN || lastTokenType == TOKEN_CLOSE_CURLY || lastTokenType == TOKEN_CLOSE_SQUARE)) { c = ';'; // semicolon injection } whitespace = c == '\t' || c == ' ' || c == '\r' || c == '\n'; if (signature && whitespace && firstWhitespace && vStringLength (signature) < MAX_SIGNATURE_LENGTH) { firstWhitespace = FALSE; vStringPut(signature, ' '); } } while (whitespace); switch (c) { case EOF: token->type = TOKEN_EOF; break; case ';': token->type = TOKEN_SEMICOLON; break; case '/': { boolean hasNewline = FALSE; int d = fileGetc (); switch (d) { case '/': fileSkipToCharacter ('\n'); /* Line comments start with the * character sequence // and * continue through the next * newline. A line comment acts * like a newline. */ fileUngetc ('\n'); goto getNextChar; case '*': do { do { d = fileGetc (); if (d == '\n') { hasNewline = TRUE; } } while (d != EOF && d != '*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); fileUngetc (hasNewline ? '\n' : ' '); goto getNextChar; default: token->type = TOKEN_OTHER; fileUngetc (d); break; } } break; case '"': case '\'': case '`': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = fileGetc (); if (d == '-') token->type = TOKEN_LEFT_ARROW; else { fileUngetc (d); token->type = TOKEN_OTHER; } } break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '*': token->type = TOKEN_STAR; break; case '.': token->type = TOKEN_DOT; break; case ',': token->type = TOKEN_COMMA; break; default: if (isStartIdentChar (c)) { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else token->type = TOKEN_OTHER; break; } if (signature && vStringLength (signature) < MAX_SIGNATURE_LENGTH) { if (token->type == TOKEN_LEFT_ARROW) vStringCatS(signature, "<-"); else if (token->type == TOKEN_STRING) { // only struct member annotations can appear in function prototypes // so only `` type strings are possible vStringPut(signature, '`'); vStringCat(signature, token->string); vStringPut(signature, '`'); } else if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD) vStringCat(signature, token->string); else if (c != EOF) vStringPut(signature, c); } lastTokenType = token->type; }