static void parseModule (tokenInfo * const token) { tokenInfo *const name = newToken (); const vhdlKind kind = isKeyword (token, KEYWORD_ENTITY) ? VHDLTAG_ENTITY : VHDLTAG_COMPONENT; Assert (isKeyword (token, KEYWORD_ENTITY) || isKeyword (token, KEYWORD_COMPONENT)); readToken (name); if (kind == VHDLTAG_COMPONENT) { makeVhdlTag (name, VHDLTAG_COMPONENT); skipToKeyword (KEYWORD_END); fileSkipToCharacter (';'); } else { readToken (token); if (isKeyword (token, KEYWORD_IS)) { makeVhdlTag (name, VHDLTAG_ENTITY); skipToKeyword (KEYWORD_END); fileSkipToCharacter (';'); } } deleteToken (name); }
/* records */ static void parseKeywords (tokenInfo * const token, boolean local) { switch (token->keyword) { case KEYWORD_END: fileSkipToCharacter (';'); break; case KEYWORD_CONSTANT: parseConstant (local); break; case KEYWORD_TYPE: parseTypes (token); break; case KEYWORD_SUBTYPE: parseTypes (token); break; case KEYWORD_ENTITY: parseModule (token); break; case KEYWORD_COMPONENT: parseModule (token); break; case KEYWORD_FUNCTION: parseSubProgram (token); break; case KEYWORD_PROCEDURE: parseSubProgram (token); break; case KEYWORD_PACKAGE: parsePackage (token); break; default: break; } }
static void parseRecord (tokenInfo * const token) { tokenInfo *const name = newToken (); Assert (isKeyword (token, KEYWORD_RECORD)); readToken (name); do { readToken (token); /* should be a colon */ fileSkipToCharacter (';'); makeVhdlTag (name, VHDLTAG_RECORD); readToken (name); } while (!isKeyword (name, KEYWORD_END) && !isType (name, TOKEN_EOF)); fileSkipToCharacter (';'); deleteToken (name); }
static void parseConstant (boolean local) { tokenInfo *const name = newToken (); readToken (name); if (local) { makeVhdlTag (name, VHDLTAG_LOCAL); } else { makeVhdlTag (name, VHDLTAG_CONSTANT); } fileSkipToCharacter (';'); deleteToken (name); }
static void parseSubProgram (tokenInfo * const token) { tokenInfo *const name = newToken (); boolean endSubProgram = FALSE; const vhdlKind kind = isKeyword (token, KEYWORD_FUNCTION) ? VHDLTAG_FUNCTION : VHDLTAG_PROCEDURE; Assert (isKeyword (token, KEYWORD_FUNCTION) || isKeyword (token, KEYWORD_PROCEDURE)); readToken (name); /* the name of the function or procedure */ readToken (token); if (isType (token, TOKEN_OPEN_PAREN)) { skipToMatched (token); } if (kind == VHDLTAG_FUNCTION) { if (isKeyword (token, KEYWORD_RETURN)) { /* Read datatype */ readToken (token); while (! isKeyword (token, KEYWORD_IS) && ! isType (token, TOKEN_SEMICOLON) && ! isType (token, TOKEN_EOF)) { readToken (token); } } } if (isType (token, TOKEN_SEMICOLON)) { makeVhdlTag (name, VHDLTAG_PROTOTYPE); } else if (isKeyword (token, KEYWORD_IS)) { if (kind == VHDLTAG_FUNCTION) { makeVhdlTag (name, VHDLTAG_FUNCTION); do { readToken (token); if (isKeyword (token, KEYWORD_END)) { readToken (token); endSubProgram = isKeywordOrIdent (token, KEYWORD_FUNCTION, name->string); fileSkipToCharacter (';'); } else { if (isType (token, TOKEN_EOF)) { endSubProgram = TRUE; } else { parseKeywords (token, TRUE); } } } while (!endSubProgram); } else { makeVhdlTag (name, VHDLTAG_PROCEDURE); do { readToken (token); if (isKeyword (token, KEYWORD_END)) { readToken (token); endSubProgram = isKeywordOrIdent (token, KEYWORD_PROCEDURE, name->string); fileSkipToCharacter (';'); } else { if (isType (token, TOKEN_EOF)) { endSubProgram = TRUE; } else { parseKeywords (token, TRUE); } } } while (!endSubProgram); } } deleteToken (name); }
static void readToken (tokenInfo * const token) { int c; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); } while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case '.': token->type = TOKEN_PERIOD; break; case ',': token->type = TOKEN_COMMA; break; case '\'': /* only single char are inside simple quotes */ break; /* or it is for attributes so we don't care */ case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '-': c = fileGetc (); if (c == '-') /* start of a comment */ { fileSkipToCharacter ('\n'); goto getNextChar; } else { if (!isspace (c)) fileUngetc (c); token->type = TOKEN_OPERATOR; } break; default: if (!isIdentChar1 (c)) token->type = TOKEN_NONE; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_vhdl); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); } while (c == '\t' || c == ' ' || c == '\n'); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '?': token->type = TOKEN_QUESTION_MARK; break; case '*': token->type = TOKEN_STAR; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '\\': /* * All Tex tags start with a backslash. * Check if the next character is an alpha character * else it is not a potential tex tag. */ c = fileGetc (); if (! isalpha (c)) fileUngetc (c); else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; case '%': fileSkipToCharacter ('\n'); /* % are single line comments */ goto getNextChar; break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->type = TOKEN_IDENTIFIER; } break; } }
static void readToken (tokenInfo *const token) { int c; static tokenType lastTokenType = TOKEN_NONE; boolean firstWhitespace = TRUE; boolean whitespace; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || lastTokenType == TOKEN_STRING || lastTokenType == TOKEN_OTHER || lastTokenType == TOKEN_CLOSE_PAREN || lastTokenType == TOKEN_CLOSE_CURLY || lastTokenType == TOKEN_CLOSE_SQUARE)) { c = ';'; // semicolon injection } whitespace = c == '\t' || c == ' ' || c == '\r' || c == '\n'; if (signature && whitespace && firstWhitespace && vStringLength (signature) < MAX_SIGNATURE_LENGTH) { firstWhitespace = FALSE; vStringPut(signature, ' '); } } while (whitespace); switch (c) { case EOF: token->type = TOKEN_EOF; break; case ';': token->type = TOKEN_SEMICOLON; break; case '/': { boolean hasNewline = FALSE; int d = fileGetc (); switch (d) { case '/': fileSkipToCharacter ('\n'); /* Line comments start with the * character sequence // and * continue through the next * newline. A line comment acts * like a newline. */ fileUngetc ('\n'); goto getNextChar; case '*': do { do { d = fileGetc (); if (d == '\n') { hasNewline = TRUE; } } while (d != EOF && d != '*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); fileUngetc (hasNewline ? '\n' : ' '); goto getNextChar; default: token->type = TOKEN_OTHER; fileUngetc (d); break; } } break; case '"': case '\'': case '`': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = fileGetc (); if (d == '-') token->type = TOKEN_LEFT_ARROW; else { fileUngetc (d); token->type = TOKEN_OTHER; } } break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '*': token->type = TOKEN_STAR; break; case '.': token->type = TOKEN_DOT; break; case ',': token->type = TOKEN_COMMA; break; default: if (isStartIdentChar (c)) { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else token->type = TOKEN_OTHER; break; } if (signature && vStringLength (signature) < MAX_SIGNATURE_LENGTH) { if (token->type == TOKEN_LEFT_ARROW) vStringCatS(signature, "<-"); else if (token->type == TOKEN_STRING) { // only struct member annotations can appear in function prototypes // so only `` type strings are possible vStringPut(signature, '`'); vStringCat(signature, token->string); vStringPut(signature, '`'); } else if (token->type == TOKEN_IDENTIFIER || token->type == TOKEN_KEYWORD) vStringCat(signature, token->string); else if (c != EOF) vStringPut(signature, c); } lastTokenType = token->type; }
static void readToken (tokenInfo *const token) { int c; token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); } while (c == '\t' || c == ' ' || c == '\n'); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); switch (c) { case EOF: longjmp (Exception, (int)ExceptionEOF); break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_KEYWORD: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_FORWARD_SLASH; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (d == '*') { do { fileSkipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { fileSkipToCharacter ('\n'); goto getNextChar; } } break; } default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } break; } LastTokenType = token->type; }
static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr) { int c; int i; boolean newline_encountered = FALSE; /* if we've got a token held back, emit it */ if (NextToken) { copyToken (token, NextToken, FALSE); deleteToken (NextToken); NextToken = NULL; return; } token->type = TOKEN_UNDEFINED; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: i = 0; do { c = fileGetc (); if (include_newlines && (c == '\r' || c == '\n')) newline_encountered = TRUE; i++; } while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { if (i > 1) vStringPut (repr, ' '); vStringPut (repr, c); } switch (c) { case EOF: token->type = TOKEN_EOF; break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case ';': token->type = TOKEN_SEMICOLON; break; case ',': token->type = TOKEN_COMMA; break; case '.': token->type = TOKEN_PERIOD; break; case ':': token->type = TOKEN_COLON; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '=': token->type = TOKEN_EQUAL_SIGN; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '+': case '-': { int d = fileGetc (); if (d == c) /* ++ or -- */ token->type = TOKEN_POSTFIX_OPERATOR; else { fileUngetc (d); token->type = TOKEN_BINARY_OPERATOR; } break; } case '*': case '%': case '?': case '>': case '<': case '^': case '|': case '&': token->type = TOKEN_BINARY_OPERATOR; break; case '\'': case '"': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '`': token->type = TOKEN_TEMPLATE_STRING; parseTemplateString (token->string); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (repr) { vStringCat (repr, token->string); vStringPut (repr, c); } break; case '\\': c = fileGetc (); if (c != '\\' && c != '"' && !isspace (c)) fileUngetc (c); token->type = TOKEN_CHARACTER; token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '/': { int d = fileGetc (); if ( (d != '*') && /* is this the start of a comment? */ (d != '/') ) /* is a one line comment? */ { fileUngetc (d); switch (LastTokenType) { case TOKEN_CHARACTER: case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_TEMPLATE_STRING: case TOKEN_CLOSE_CURLY: case TOKEN_CLOSE_PAREN: case TOKEN_CLOSE_SQUARE: token->type = TOKEN_BINARY_OPERATOR; break; default: token->type = TOKEN_REGEXP; parseRegExp (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; } } else { if (repr) /* remove the / we added */ repr->buffer[--repr->length] = 0; if (d == '*') { do { fileSkipToCharacter ('*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); goto getNextChar; } else if (d == '/') /* is this the start of a comment? */ { fileSkipToCharacter ('\n'); /* if we care about newlines, put it back so it is seen */ if (include_newlines) fileUngetc ('\n'); goto getNextChar; } } break; } case '#': /* skip shebang in case of e.g. Node.js scripts */ if (token->lineNumber > 1) token->type = TOKEN_UNDEFINED; else if ((c = fileGetc ()) != '!') { fileUngetc (c); token->type = TOKEN_UNDEFINED; } else { fileSkipToCharacter ('\n'); goto getNextChar; } break; default: if (! isIdentChar (c)) token->type = TOKEN_UNDEFINED; else { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = analyzeToken (token->string, Lang_js); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; if (repr && vStringLength (token->string) > 1) vStringCatS (repr, vStringValue (token->string) + 1); } break; } if (include_newlines && newline_encountered) { /* This isn't strictly correct per the standard, but following the * real rules means understanding all statements, and that's not * what the parser currently does. What we do here is a guess, by * avoiding inserting semicolons that would make the statement on * the left or right obviously invalid. Hopefully this should not * have false negatives (e.g. should not miss insertion of a semicolon) * but might have false positives (e.g. it will wrongfully emit a * semicolon sometimes, i.e. for the newline in "foo\n(bar)"). * This should however be mostly harmless as we only deal with * newlines in specific situations where we know a false positive * wouldn't hurt too bad. */ /* these already end a statement, so no need to duplicate it */ #define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON || \ (t) == TOKEN_EOF || \ (t) == TOKEN_COMMA || \ (t) == TOKEN_CLOSE_CURLY || \ (t) == TOKEN_OPEN_CURLY) /* these cannot be the start or end of a statement */ #define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN || \ (t) == TOKEN_COLON || \ (t) == TOKEN_PERIOD || \ (t) == TOKEN_BINARY_OPERATOR) if (! IS_STMT_SEPARATOR(LastTokenType) && ! IS_STMT_SEPARATOR(token->type) && ! IS_BINARY_OPERATOR(LastTokenType) && ! IS_BINARY_OPERATOR(token->type) && /* these cannot be followed by a semicolon */ ! (LastTokenType == TOKEN_OPEN_PAREN || LastTokenType == TOKEN_OPEN_SQUARE)) { /* hold the token... */ Assert (NextToken == NULL); NextToken = newToken (); copyToken (NextToken, token, FALSE); /* ...and emit a semicolon instead */ token->type = TOKEN_SEMICOLON; token->keyword = KEYWORD_NONE; vStringClear (token->string); if (repr) vStringPut (token->string, '\n'); } #undef IS_STMT_SEPARATOR #undef IS_BINARY_OPERATOR } LastTokenType = token->type; }
static void readToken (tokenInfo *const token) { int c; static tokenType lastTokenType = TOKEN_NONE; token->type = TOKEN_NONE; token->keyword = KEYWORD_NONE; vStringClear (token->string); getNextChar: do { c = fileGetc (); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); if (c == '\n' && (lastTokenType == TOKEN_IDENTIFIER || lastTokenType == TOKEN_STRING || lastTokenType == TOKEN_OTHER || lastTokenType == TOKEN_CLOSE_PAREN || lastTokenType == TOKEN_CLOSE_CURLY || lastTokenType == TOKEN_CLOSE_SQUARE)) { token->type = TOKEN_SEMICOLON; goto done; } } while (c == '\t' || c == ' ' || c == '\r' || c == '\n'); switch (c) { case EOF: token->type = TOKEN_EOF; break; case ';': token->type = TOKEN_SEMICOLON; break; case '/': { boolean hasNewline = FALSE; int d = fileGetc (); switch (d) { case '/': fileSkipToCharacter ('\n'); /* Line comments start with the * character sequence // and * continue through the next * newline. A line comment acts * like a newline. */ fileUngetc ('\n'); goto getNextChar; case '*': do { do { d = fileGetc (); if (d == '\n') { hasNewline = TRUE; } } while (d != EOF && d != '*'); c = fileGetc (); if (c == '/') break; else fileUngetc (c); } while (c != EOF && c != '\0'); fileUngetc (hasNewline ? '\n' : ' '); goto getNextChar; default: token->type = TOKEN_OTHER; fileUngetc (d); break; } } break; case '"': case '\'': case '`': token->type = TOKEN_STRING; parseString (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); break; case '<': { int d = fileGetc (); if (d == '-') token->type = TOKEN_LEFT_ARROW; else { fileUngetc (d); token->type = TOKEN_OTHER; } } break; case '(': token->type = TOKEN_OPEN_PAREN; break; case ')': token->type = TOKEN_CLOSE_PAREN; break; case '{': token->type = TOKEN_OPEN_CURLY; break; case '}': token->type = TOKEN_CLOSE_CURLY; break; case '[': token->type = TOKEN_OPEN_SQUARE; break; case ']': token->type = TOKEN_CLOSE_SQUARE; break; case '*': token->type = TOKEN_STAR; break; case '.': token->type = TOKEN_DOT; break; case ',': token->type = TOKEN_COMMA; break; default: if (isStartIdentChar (c)) { parseIdentifier (token->string, c); token->lineNumber = getSourceLineNumber (); token->filePosition = getInputFilePosition (); token->keyword = lookupKeyword (vStringValue (token->string), Lang_go); if (isKeyword (token, KEYWORD_NONE)) token->type = TOKEN_IDENTIFIER; else token->type = TOKEN_KEYWORD; } else token->type = TOKEN_OTHER; break; } done: lastTokenType = token->type; }