Tokens jsonnet_lex(const std::string &filename, const char *input) { unsigned long line_number = 1; const char *line_start = input; Tokens r; const char *c = input; Fodder fodder; bool fresh_line = true; // Are we tokenizing from the beginning of a new line? while (*c!='\0') { Token::Kind kind; std::string data; std::string string_block_indent; std::string string_block_term_indent; unsigned new_lines, indent; lex_ws(c, new_lines, indent, line_start, line_number); // If it's the end of the file, discard final whitespace. if (*c == '\0') break; if (new_lines > 0) { // Otherwise store whitespace in fodder. unsigned blanks = new_lines - 1; fodder.emplace_back(FodderElement::LINE_END, blanks, indent, EMPTY); fresh_line = true; } Location begin(line_number, c - line_start + 1); switch (*c) { // The following operators should never be combined with subsequent symbols. case '{': kind = Token::BRACE_L; c++; break; case '}': kind = Token::BRACE_R; c++; break; case '[': kind = Token::BRACKET_L; c++; break; case ']': kind = Token::BRACKET_R; c++; break; case ',': kind = Token::COMMA; c++; break; case '.': kind = Token::DOT; c++; break; case '(': kind = Token::PAREN_L; c++; break; case ')': kind = Token::PAREN_R; c++; break; case ';': kind = Token::SEMICOLON; c++; break; // Numeric literals. case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': kind = Token::NUMBER; data = lex_number(c, filename, begin); break; // String literals. case '"': { c++; for (; ; ++c) { if (*c == '\0') { throw StaticError(filename, begin, "Unterminated string"); } if (*c == '"') { break; } if (*c == '\\' && *(c+1) != '\0') { data += *c; ++c; } if (*c == '\n') { // Maintain line/column counters. line_number++; line_start = c+1; } data += *c; } c++; // Advance beyond the ". kind = Token::STRING_DOUBLE; } break; // String literals. case '\'': { c++; for (; ; ++c) { if (*c == '\0') { throw StaticError(filename, begin, "Unterminated string"); } if (*c == '\'') { break; } if (*c == '\\' && *(c+1) != '\0') { data += *c; ++c; } if (*c == '\n') { // Maintain line/column counters. line_number++; line_start = c+1; } data += *c; } c++; // Advance beyond the '. kind = Token::STRING_SINGLE; } break; // Keywords default: if (is_identifier_first(*c)) { std::string id; for (; is_identifier(*c); ++c) id += *c; if (id == "assert") { kind = Token::ASSERT; } else if (id == "else") { kind = Token::ELSE; } else if (id == "error") { kind = Token::ERROR; } else if (id == "false") { kind = Token::FALSE; } else if (id == "for") { kind = Token::FOR; } else if (id == "function") { kind = Token::FUNCTION; } else if (id == "if") { kind = Token::IF; } else if (id == "import") { kind = Token::IMPORT; } else if (id == "importstr") { kind = Token::IMPORTSTR; } else if (id == "in") { kind = Token::IN; } else if (id == "local") { kind = Token::LOCAL; } else if (id == "null") { kind = Token::NULL_LIT; } else if (id == "self") { kind = Token::SELF; } else if (id == "super") { kind = Token::SUPER; } else if (id == "tailstrict") { kind = Token::TAILSTRICT; } else if (id == "then") { kind = Token::THEN; } else if (id == "true") { kind = Token::TRUE; } else { // Not a keyword, must be an identifier. kind = Token::IDENTIFIER; } data = id; } else if (is_symbol(*c) || *c == '#') { // Single line C++ and Python style comments. if (*c == '#' || (*c == '/' && *(c+1) == '/')) { std::vector<std::string> comment(1); unsigned blanks; unsigned indent; lex_until_newline(c, comment[0], blanks, indent, line_start, line_number); auto kind = fresh_line ? FodderElement::PARAGRAPH : FodderElement::LINE_END; fodder.emplace_back(kind, blanks, indent, comment); fresh_line = true; continue; // We've not got a token, just fodder, so keep scanning. } // Multi-line C style comment. if (*c == '/' && *(c+1) == '*') { unsigned margin = c - line_start; const char *initial_c = c; c += 2; // Avoid matching /*/: skip the /* before starting the search for */. while (!(*c == '*' && *(c+1) == '/')) { if (*c == '\0') { auto msg = "Multi-line comment has no terminating */."; throw StaticError(filename, begin, msg); } if (*c == '\n') { // Just keep track of the line / column counters. line_number++; line_start = c+1; } ++c; } c += 2; // Move the pointer to the char after the closing '/'. std::string comment(initial_c, c - initial_c); // Includes the "/*" and "*/". // Lex whitespace after comment unsigned new_lines_after, indent_after; lex_ws(c, new_lines_after, indent_after, line_start, line_number); std::vector<std::string> lines; if (comment.find('\n') >= comment.length()) { // Comment looks like /* foo */ lines.push_back(comment); fodder.emplace_back(FodderElement::INTERSTITIAL, 0, 0, lines); if (new_lines_after > 0) { fodder.emplace_back(FodderElement::LINE_END, new_lines_after - 1, indent_after, EMPTY); fresh_line = true; } } else { lines = line_split(comment, margin); assert(lines[0][0] == '/'); // Little hack to support PARAGRAPHs with * down the LHS: // Add a space to lines that start with a '*' bool all_star = true; for (auto &l : lines) { if (l[0] != '*') all_star = false; } if (all_star) { for (auto &l : lines) { if (l[0] == '*') l = " " + l; } } if (new_lines_after == 0) { // Ensure a line end after the paragraph. new_lines_after = 1; indent_after = 0; } if (!fresh_line) // Ensure a line end before the comment. fodder.emplace_back(FodderElement::LINE_END, 0, 0, EMPTY); fodder.emplace_back(FodderElement::PARAGRAPH, new_lines_after - 1, indent_after, lines); fresh_line = true; } continue; // We've not got a token, just fodder, so keep scanning. } // Text block if (*c == '|' && *(c+1) == '|' && *(c+2) == '|' && *(c+3) == '\n') { std::stringstream block; c += 4; // Skip the "|||\n" line_number++; // Skip any blank lines at the beginning of the block. while (*c == '\n') { line_number++; ++c; block << '\n'; } line_start = c; const char *first_line = c; int ws_chars = whitespace_check(first_line, c); string_block_indent = std::string(first_line, ws_chars); if (ws_chars == 0) { auto msg = "Text block's first line must start with whitespace."; throw StaticError(filename, begin, msg); } while (true) { assert(ws_chars > 0); // Read up to the \n for (c = &c[ws_chars]; *c != '\n' ; ++c) { if (*c == '\0') throw StaticError(filename, begin, "Unexpected EOF"); block << *c; } // Add the \n block << '\n'; ++c; line_number++; line_start = c; // Skip any blank lines while (*c == '\n') { line_number++; ++c; block << '\n'; } // Examine next line ws_chars = whitespace_check(first_line, c); if (ws_chars == 0) { // End of text block // Skip over any whitespace while (*c == ' ' || *c == '\t') { string_block_term_indent += *c; ++c; } // Expect ||| if (!(*c == '|' && *(c+1) == '|' && *(c+2) == '|')) { auto msg = "Text block not terminated with |||"; throw StaticError(filename, begin, msg); } c += 3; // Leave after the last | data = block.str(); kind = Token::STRING_BLOCK; break; // Out of the while loop. } } break; // Out of the switch. } const char *operator_begin = c; for (; is_symbol(*c) ; ++c) { // Not allowed // in operators if (*c == '/' && *(c+1) == '/') break; // Not allowed /* in operators if (*c == '/' && *(c+1) == '*') break; // Not allowed ||| in operators if (*c == '|' && *(c+1) == '|' && *(c+2) == '|') break; } // Not allowed to end with a + - ~ ! unless a single char. // So, wind it back if we need to (but not too far). while (c > operator_begin + 1 && (*(c-1) == '+' || *(c-1) == '-' || *(c-1) == '~' || *(c-1) == '!')) { c--; } data += std::string(operator_begin, c); if (data == "$") { kind = Token::DOLLAR; data = ""; } else { kind = Token::OPERATOR; } } else { std::stringstream ss; ss << "Could not lex the character "; auto uc = (unsigned char)(*c); if (*c < 32) ss << "code " << unsigned(uc); else ss << "'" << *c << "'"; throw StaticError(filename, begin, ss.str()); } } Location end(line_number, c - line_start); r.emplace_back(kind, fodder, data, string_block_indent, string_block_term_indent, LocationRange(filename, begin, end)); fodder.clear(); fresh_line = false; } Location end(line_number, c - line_start + 1); r.emplace_back(Token::END_OF_FILE, fodder, "", "", "", LocationRange(filename, end, end)); return r; }
inline static int is_group_token(char *s) { if (*s == '$' || *s == '.' || is_identifier(s)) return 1; return 0; }
MySQLQueryType MySQLQueryIdentifier::determineQueryType(pANTLR3_TOKEN_SOURCE tokenSource) { pANTLR3_COMMON_TOKEN token; if (!nextToken(tokenSource, token)) return QtUnknown; switch (token->type) { case ALTER_SYMBOL: if (!nextToken(tokenSource, token)) return QtAmbiguous; switch (token->type) { case DATABASE_SYMBOL: return QtAlterDatabase; case LOGFILE_SYMBOL: return QtAlterLogFileGroup; case FUNCTION_SYMBOL: return QtAlterFunction; case PROCEDURE_SYMBOL: return QtAlterProcedure; case SERVER_SYMBOL: return QtAlterServer; case TABLE_SYMBOL: case ONLINE_SYMBOL: // Optional part of ALTER TABLE. case OFFLINE_SYMBOL: // ditto case IGNORE_SYMBOL: return QtAlterTable; case TABLESPACE_SYMBOL: return QtAlterTableSpace; case EVENT_SYMBOL: return QtAlterEvent; case VIEW_SYMBOL: return QtAlterView; case DEFINER_SYMBOL: // Can be both event or view. if (!skipDefiner(tokenSource, token)) return QtAmbiguous; switch (token->type) { case EVENT_SYMBOL: return QtAlterEvent; case SQL_SYMBOL: case VIEW_SYMBOL: return QtAlterView; } break; case ALGORITHM_SYMBOL: // Optional part of CREATE VIEW. return QtAlterView; case USER_SYMBOL: return QtAlterUser; } break; case CREATE_SYMBOL: if (!nextToken(tokenSource, token)) return QtAmbiguous; switch (token->type) { case TEMPORARY_SYMBOL: // Optional part of CREATE TABLE. case TABLE_SYMBOL: return QtCreateTable; case ONLINE_SYMBOL: case OFFLINE_SYMBOL: case INDEX_SYMBOL: case UNIQUE_SYMBOL: case FULLTEXT_SYMBOL: case SPATIAL_SYMBOL: return QtCreateIndex; case DATABASE_SYMBOL: return QtCreateDatabase; case TRIGGER_SYMBOL: return QtCreateTrigger; case DEFINER_SYMBOL: // Can be event, view, procedure, function, UDF, trigger. { if (!skipDefiner(tokenSource, token)) return QtAmbiguous; switch (token->type) { case EVENT_SYMBOL: return QtCreateEvent; case VIEW_SYMBOL: case SQL_SYMBOL: return QtCreateView; case PROCEDURE_SYMBOL: return QtCreateProcedure; case FUNCTION_SYMBOL: { if (!nextToken(tokenSource, token)) return QtAmbiguous; if (token->type == UDF_NAME_TOKEN) return QtCreateUdf; return QtCreateFunction; } case AGGREGATE_SYMBOL: return QtCreateUdf; case TRIGGER_SYMBOL: return QtCreateTrigger; } } case VIEW_SYMBOL: case OR_SYMBOL: // CREATE OR REPLACE ... VIEW case ALGORITHM_SYMBOL: // CREATE ALGORITHM ... VIEW return QtCreateView; case EVENT_SYMBOL: return QtCreateEvent; case FUNCTION_SYMBOL: return QtCreateFunction; case AGGREGATE_SYMBOL: return QtCreateUdf; case PROCEDURE_SYMBOL: return QtCreateProcedure; case LOGFILE_SYMBOL: return QtCreateLogFileGroup; case SERVER_SYMBOL: return QtCreateServer; case TABLESPACE_SYMBOL: return QtCreateTableSpace; case USER_SYMBOL: return QtCreateUser; } break; case DROP_SYMBOL: { if (!nextToken(tokenSource, token)) return QtAmbiguous; switch (token->type) { case DATABASE_SYMBOL: return QtDropDatabase; case EVENT_SYMBOL: return QtDropEvent; case PROCEDURE_SYMBOL: return QtDropProcedure; case FUNCTION_SYMBOL: return QtDropFunction; case ONLINE_SYMBOL: case OFFLINE_SYMBOL: case INDEX_SYMBOL: return QtDropIndex; case LOGFILE_SYMBOL: return QtDropLogfileGroup; case SERVER_SYMBOL: return QtDropServer; case TEMPORARY_SYMBOL: case TABLE_SYMBOL: case TABLES_SYMBOL: return QtDropTable; case TABLESPACE_SYMBOL: return QtDropTablespace; case TRIGGER_SYMBOL: return QtDropTrigger; case VIEW_SYMBOL: return QtDropView; case PREPARE_SYMBOL: return QtDeallocate; case USER_SYMBOL: return QtDropUser; } } case TRUNCATE_SYMBOL: return QtTruncateTable; case CALL_SYMBOL: return QtCall; case DELETE_SYMBOL: return QtDelete; case DO_SYMBOL: return QtDo; case HANDLER_SYMBOL: return QtHandler; case INSERT_SYMBOL: return QtInsert; case LOAD_SYMBOL: { if (!nextToken(tokenSource, token)) return QtAmbiguous; switch (token->type) { case DATA_SYMBOL: { if (!nextToken(tokenSource, token)) return QtAmbiguous; if (token->type == FROM_SYMBOL) return QtLoadDataMaster; return QtLoadData; } case XML_SYMBOL: return QtLoadXML; case TABLE_SYMBOL: return QtLoadTableMaster; case INDEX_SYMBOL: return QtLoadIndex; } } case REPLACE_SYMBOL: return QtReplace; case SELECT_SYMBOL: return QtSelect; case UPDATE_SYMBOL: return QtUpdate; case OPEN_PAR_SYMBOL: // Either (((select ..))) or (partition...) { while (token->type == OPEN_PAR_SYMBOL) { if (!nextToken(tokenSource, token)) return QtAmbiguous; } if (token->type == SELECT_SYMBOL) return QtSelect; return QtPartition; } case PARTITION_SYMBOL: case PARTITIONS_SYMBOL: return QtPartition; case START_SYMBOL: { if (!nextToken(tokenSource, token)) return QtAmbiguous; if (token->type == TRANSACTION_SYMBOL) return QtStartTransaction; return QtStartSlave; } case BEGIN_SYMBOL: // Begin directly at the start of the query must be a transaction start. return QtBeginWork; case COMMIT_SYMBOL: return QtCommit; case ROLLBACK_SYMBOL: { // We assume a transaction statement here unless we exactly know it's about a savepoint. if (!nextToken(tokenSource, token)) return QtRollbackWork; if (token->type == WORK_SYMBOL) { if (!nextToken(tokenSource, token)) return QtRollbackWork; } if (token->type == TO_SYMBOL) return QtRollbackSavepoint; return QtRollbackWork; } case SET_SYMBOL: { if (!nextToken(tokenSource, token)) return QtSet; switch (token->type) { case PASSWORD_SYMBOL: return QtSetPassword; case GLOBAL_SYMBOL: case LOCAL_SYMBOL: case SESSION_SYMBOL: if (!nextToken(tokenSource, token)) return QtSet; break; case IDENTIFIER: if (isTokenText(token, "autocommit")) return QtSetAutoCommit; break; } if (token->type == TRANSACTION_SYMBOL) return QtSetTransaction; return QtSet; } case SAVEPOINT_SYMBOL: return QtSavepoint; case RELEASE_SYMBOL: // Release at the start of the query, obviously. return QtReleaseSavepoint; case LOCK_SYMBOL: return QtLock; case UNLOCK_SYMBOL: return QtUnlock; case XA_SYMBOL: return QtXA; case PURGE_SYMBOL: return QtPurge; case CHANGE_SYMBOL: return QtChangeMaster; case RESET_SYMBOL: { if (!nextToken(tokenSource, token)) return QtReset; switch (token->type) { case SERVER_SYMBOL: return QtResetMaster; case SLAVE_SYMBOL: return QtResetSlave; default: return QtReset; } } case STOP_SYMBOL: return QtStopSlave; case PREPARE_SYMBOL: return QtPrepare; case EXECUTE_SYMBOL: return QtExecute; case DEALLOCATE_SYMBOL: return QtDeallocate; case GRANT_SYMBOL: { if (!nextToken(tokenSource, token)) return QtAmbiguous; if (token->type == PROXY_SYMBOL) return QtGrantProxy; return QtGrant; } case RENAME_SYMBOL: { if (!nextToken(tokenSource, token)) return QtAmbiguous; if (token->type == USER_SYMBOL) return QtRenameUser; return QtRenameTable; } case REVOKE_SYMBOL: { if (!nextToken(tokenSource, token)) return QtAmbiguous; if (token->type == PROXY_SYMBOL) return QtRevokeProxy; return QtRevoke; } case ANALYZE_SYMBOL: return QtAnalyzeTable; case CHECK_SYMBOL: return QtCheckTable; case CHECKSUM_SYMBOL: return QtChecksumTable; case OPTIMIZE_SYMBOL: return QtOptimizeTable; case REPAIR_SYMBOL: return QtRepairTable; case BACKUP_SYMBOL: return QtBackUpTable; case RESTORE_SYMBOL: return QtRestoreTable; case INSTALL_SYMBOL: return QtInstallPlugin; case UNINSTALL_SYMBOL: return QtUninstallPlugin; case SHOW_SYMBOL: { if (!nextToken(tokenSource, token)) return QtShow; if (token->type == FULL_SYMBOL) { // Not all SHOW cases allow an optional FULL keyword, but this is not about checking for // a valid query but to find the most likely type. if (!nextToken(tokenSource, token)) return QtShow; } switch (token->type) { case GLOBAL_SYMBOL: case LOCK_SYMBOL: case SESSION_SYMBOL: { if (!nextToken(tokenSource, token)) return QtShow; if (token->type == STATUS_SYMBOL) return QtShowStatus; return QtShowVariables; } case AUTHORS_SYMBOL: return QtShowAuthors; case BINARY_SYMBOL: return QtShowBinaryLogs; case BINLOG_SYMBOL: return QtShowBinlogEvents; case RELAYLOG_SYMBOL: return QtShowRelaylogEvents; case CHAR_SYMBOL: return QtShowCharset; case COLLATION_SYMBOL: return QtShowCollation; case COLUMNS_SYMBOL: return QtShowColumns; case CONTRIBUTORS_SYMBOL: return QtShowContributors; case COUNT_SYMBOL: { if (!nextToken(tokenSource, token) || token->type != OPEN_PAR_SYMBOL) return QtShow; if (!nextToken(tokenSource, token) || token->type != MULT_OPERATOR) return QtShow; if (!nextToken(tokenSource, token) || token->type != CLOSE_PAR_SYMBOL) return QtShow; if (!nextToken(tokenSource, token)) return QtShow; switch (token->type) { case WARNINGS_SYMBOL: return QtShowWarnings; case ERRORS_SYMBOL: return QtShowErrors; } return QtShow; } case CREATE_SYMBOL: { if (!nextToken(tokenSource, token)) return QtShow; switch (token->type) { case DATABASE_SYMBOL: return QtShowCreateDatabase; case EVENT_SYMBOL: return QtShowCreateEvent; case FUNCTION_SYMBOL: return QtShowCreateFunction; case PROCEDURE_SYMBOL: return QtShowCreateProcedure; case TABLE_SYMBOL: return QtShowCreateTable; case TRIGGER_SYMBOL: return QtShowCreateTrigger; case VIEW_SYMBOL: return QtShowCreateView; } return QtShow; } case DATABASES_SYMBOL: return QtShowDatabases; case ENGINE_SYMBOL: return QtShowEngineStatus; case STORAGE_SYMBOL: case ENGINES_SYMBOL: return QtShowStorageEngines; case ERRORS_SYMBOL: return QtShowErrors; case EVENTS_SYMBOL: return QtShowEvents; case FUNCTION_SYMBOL: { if (!nextToken(tokenSource, token)) return QtAmbiguous; if (token->type == CODE_SYMBOL) return QtShowFunctionCode; return QtShowFunctionStatus; } case GRANT_SYMBOL: return QtShowGrants; case INDEX_SYMBOL: case INDEXES_SYMBOL: case KEY_SYMBOL: return QtShowIndexes; case INNODB_SYMBOL: return QtShowInnoDBStatus; case MASTER_SYMBOL: return QtShowMasterStatus; case OPEN_SYMBOL: return QtShowOpenTables; case PLUGIN_SYMBOL: case PLUGINS_SYMBOL: return QtShowPlugins; case PROCEDURE_SYMBOL: { if (!nextToken(tokenSource, token)) return QtShow; if (token->type == STATUS_SYMBOL) return QtShowProcedureStatus; return QtShowProcedureCode; } case PRIVILEGES_SYMBOL: return QtShowPrivileges; case PROCESSLIST_SYMBOL: return QtShowProcessList; case PROFILE_SYMBOL: return QtShowProfile; case PROFILES_SYMBOL: return QtShowProfiles; case SLAVE_SYMBOL: { if (!nextToken(tokenSource, token)) return QtAmbiguous; if (token->type == HOSTS_SYMBOL) return QtShowSlaveHosts; return QtShowSlaveStatus; } case STATUS_SYMBOL: return QtShowStatus; case VARIABLES_SYMBOL: return QtShowVariables; case TABLE_SYMBOL: return QtShowTableStatus; case TABLES_SYMBOL: return QtShowTables; case TRIGGERS_SYMBOL: return QtShowTriggers; case WARNINGS_SYMBOL: return QtShowWarnings; } return QtShow; } case CACHE_SYMBOL: return QtCacheIndex; case FLUSH_SYMBOL: return QtFlush; case KILL_SYMBOL: return QtKill; case DESCRIBE_SYMBOL: // EXPLAIN is converted to DESCRIBE in the lexer. case DESC_SYMBOL: { if (!nextToken(tokenSource, token)) return QtAmbiguous; if (is_identifier(token->type) || token->type == DOT_SYMBOL) return QtExplainTable; // EXTENDED is a bit special as it can be both, a table identifier or the keyword. if (token->type == EXTENDED_SYMBOL) { if (!nextToken(tokenSource, token)) return QtExplainTable; switch (token->type) { case DELETE_SYMBOL: case INSERT_SYMBOL: case REPLACE_SYMBOL: case UPDATE_SYMBOL: return QtExplainStatement; default: return QtExplainTable; } } return QtExplainStatement; } case HELP_SYMBOL: return QtHelp; case USE_SYMBOL: return QtUse; } return QtUnknown; }