StorageTinyLog::StorageTinyLog( const std::string & path_, const std::string & name_, const ColumnsDescription & columns_, bool attach, size_t max_compress_block_size_) : IStorage{columns_}, path(path_), name(name_), max_compress_block_size(max_compress_block_size_), file_checker(path + escapeForFileName(name) + '/' + "sizes.json"), log(&Logger::get("StorageTinyLog")) { if (path.empty()) throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); String full_path = path + escapeForFileName(name) + '/'; if (!attach) { /// create files if they do not exist if (0 != mkdir(full_path.c_str(), S_IRWXU | S_IRWXG | S_IRWXO) && errno != EEXIST) throwFromErrno("Cannot create directory " + full_path, ErrorCodes::CANNOT_CREATE_DIRECTORY); } for (const auto & col : getColumns().getAllPhysical()) addFiles(col.name, *col.type); }
String IDataType::getFileNameForStream(const String & column_name, const IDataType::SubstreamPath & path) { String nested_table_name = Nested::extractTableName(column_name); bool is_sizes_of_nested_type = !path.empty() && path.back().type == IDataType::Substream::ArraySizes && nested_table_name != column_name; size_t array_level = 0; String stream_name = escapeForFileName(is_sizes_of_nested_type ? nested_table_name : column_name); for (const Substream & elem : path) { if (elem.type == Substream::NullMap) stream_name += ".null"; else if (elem.type == Substream::ArraySizes) stream_name += ".size" + toString(array_level); else if (elem.type == Substream::ArrayElements) ++array_level; else if (elem.type == Substream::TupleElement) { /// For compatibility reasons, we use %2E instead of dot. /// Because nested data may be represented not by Array of Tuple, /// but by separate Array columns with names in a form of a.b, /// and name is encoded as a whole. stream_name += "%2E" + escapeForFileName(elem.tuple_element_name); } } return stream_name; }
StorageStripeLog::StorageStripeLog( const std::string & path_, const std::string & name_, const NamesAndTypesList & columns_, const NamesAndTypesList & materialized_columns_, const NamesAndTypesList & alias_columns_, const ColumnDefaults & column_defaults_, bool attach, size_t max_compress_block_size_) : IStorage{materialized_columns_, alias_columns_, column_defaults_}, path(path_), name(name_), columns(columns_), max_compress_block_size(max_compress_block_size_), file_checker(path + escapeForFileName(name) + '/' + "sizes.json"), log(&Logger::get("StorageStripeLog")) { if (columns.empty()) throw Exception("Empty list of columns passed to StorageStripeLog constructor", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); String full_path = path + escapeForFileName(name) + '/'; if (!attach) { /// create files if they do not exist if (0 != mkdir(full_path.c_str(), S_IRWXU | S_IRWXG | S_IRWXO) && errno != EEXIST) throwFromErrno("Cannot create directory " + full_path, ErrorCodes::CANNOT_CREATE_DIRECTORY); } }
void StorageStripeLog::rename(const String & new_path_to_db, const String & /*new_database_name*/, const String & new_table_name) { std::unique_lock<std::shared_mutex> lock(rwlock); /// Rename directory with data. Poco::File(path + escapeForFileName(name)).renameTo(new_path_to_db + escapeForFileName(new_table_name)); path = new_path_to_db; name = new_table_name; file_checker.setPath(path + escapeForFileName(name) + "/" + "sizes.json"); }
void StorageTinyLog::rename(const String & new_path_to_db, const String & /*new_database_name*/, const String & new_table_name) { /// Rename directory with data. Poco::File(path + escapeForFileName(name)).renameTo(new_path_to_db + escapeForFileName(new_table_name)); path = new_path_to_db; name = new_table_name; file_checker.setPath(path + escapeForFileName(name) + "/" + "sizes.json"); for (Files_t::iterator it = files.begin(); it != files.end(); ++it) it->second.data_file = Poco::File(path + escapeForFileName(name) + '/' + Poco::Path(it->second.data_file.path()).getFileName()); }
void StorageTinyLog::truncate(const ASTPtr &, const Context &) { if (name.empty()) throw Exception("Logical error: table name is empty", ErrorCodes::LOGICAL_ERROR); auto file = Poco::File(path + escapeForFileName(name)); file.remove(true); file.createDirectories(); files.clear(); file_checker = FileChecker{path + escapeForFileName(name) + '/' + "sizes.json"}; for (const auto &column : getColumns().getAllPhysical()) addFiles(column.name, *column.type); }
void DatabaseOrdinary::renameTable( const Context & context, const String & table_name, IDatabase & to_database, const String & to_table_name) { DatabaseOrdinary * to_database_concrete = typeid_cast<DatabaseOrdinary *>(&to_database); if (!to_database_concrete) throw Exception("Moving tables between databases of different engines is not supported", ErrorCodes::NOT_IMPLEMENTED); StoragePtr table = tryGetTable(table_name); if (!table) throw Exception("Table " + name + "." + table_name + " doesn't exist.", ErrorCodes::TABLE_ALREADY_EXISTS); /// Уведомляем таблицу о том, что она переименовывается. Если таблица не поддерживает переименование - кинется исключение. try { table->rename(context.getPath() + "/data/" + escapeForFileName(to_database_concrete->name) + "/", to_database_concrete->name, to_table_name); } catch (const Poco::Exception & e) { /// Более хорошая диагностика. throw Exception{e}; } ASTPtr ast = getCreateQueryImpl(path, table_name); ASTCreateQuery & ast_create_query = typeid_cast<ASTCreateQuery &>(*ast); ast_create_query.table = to_table_name; /// NOTE Неатомарно. to_database_concrete->createTable(to_table_name, table, ast, table->getName()); removeTable(table_name); }
void DatabaseOrdinary::alterTable( const Context & context, const String & name, const NamesAndTypesList & columns, const NamesAndTypesList & materialized_columns, const NamesAndTypesList & alias_columns, const ColumnDefaults & column_defaults, const ASTModifier & engine_modifier) { /// Считываем определение таблицы и заменяем в нём нужные части на новые. String table_name_escaped = escapeForFileName(name); String table_metadata_tmp_path = path + "/" + table_name_escaped + ".sql.tmp"; String table_metadata_path = path + "/" + table_name_escaped + ".sql"; String statement; { char in_buf[METADATA_FILE_BUFFER_SIZE]; ReadBufferFromFile in(table_metadata_path, METADATA_FILE_BUFFER_SIZE, -1, in_buf); WriteBufferFromString out(statement); copyData(in, out); } ParserCreateQuery parser; ASTPtr ast = parseQuery(parser, statement.data(), statement.data() + statement.size(), "in file " + table_metadata_path); ASTCreateQuery & ast_create_query = typeid_cast<ASTCreateQuery &>(*ast); ASTPtr new_columns = InterpreterCreateQuery::formatColumns(columns, materialized_columns, alias_columns, column_defaults); auto it = std::find(ast_create_query.children.begin(), ast_create_query.children.end(), ast_create_query.columns); if (it == ast_create_query.children.end()) throw Exception("Logical error: cannot find columns child in ASTCreateQuery", ErrorCodes::LOGICAL_ERROR); *it = new_columns; ast_create_query.columns = new_columns; if (engine_modifier) engine_modifier(ast_create_query.storage); statement = getTableDefinitionFromCreateQuery(ast); { WriteBufferFromFile out(table_metadata_tmp_path, statement.size(), O_WRONLY | O_CREAT | O_EXCL); writeString(statement, out); out.next(); out.sync(); out.close(); } try { /// rename атомарно заменяет старый файл новым. Poco::File(table_metadata_tmp_path).renameTo(table_metadata_path); } catch (...) { Poco::File(table_metadata_tmp_path).remove(); throw; } }
DatabaseOrdinary::DatabaseOrdinary(String name_, const String & metadata_path_, const Context & context) : DatabaseWithOwnTablesBase(std::move(name_)) , metadata_path(metadata_path_) , data_path(context.getPath() + "data/" + escapeForFileName(name) + "/") , log(&Logger::get("DatabaseOrdinary (" + name + ")")) { Poco::File(data_path).createDirectories(); }
void StorageSetOrJoinBase::rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) { /// Переименовываем директорию с данными. String new_path = new_path_to_db + escapeForFileName(new_table_name); Poco::File(path).renameTo(new_path); path = new_path + "/"; name = new_table_name; }
void StorageSetOrJoinBase::rename(const String & new_path_to_db, const String & /*new_database_name*/, const String & new_table_name) { /// Rename directory with data. String new_path = new_path_to_db + escapeForFileName(new_table_name); Poco::File(path).renameTo(new_path); path = new_path + "/"; table_name = new_table_name; }
void DatabaseOrdinary::createTable(const String & table_name, const StoragePtr & table, const ASTPtr & query, const String & engine) { /// Создаём файл с метаданными, если нужно - если запрос не ATTACH. /// В него записывается запрос на ATTACH таблицы. /** Код исходит из допущения, что во всех потоках виден один и тот же порядок действий: * - создание файла .sql.tmp; * - добавление таблицы в tables; * - переименование .sql.tmp в .sql. */ /// Был бы возможен race condition, если таблицу с одним именем одновременно создают с помощью CREATE и с помощью ATTACH. /// Но от него есть защита - см. использование DDLGuard в InterpreterCreateQuery. { std::lock_guard<std::mutex> lock(mutex); if (tables.count(table_name)) throw Exception("Table " + name + "." + table_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); } String table_name_escaped = escapeForFileName(table_name); String table_metadata_tmp_path = path + "/" + table_name_escaped + ".sql.tmp"; String table_metadata_path = path + "/" + table_name_escaped + ".sql"; String statement; { statement = getTableDefinitionFromCreateQuery(query); /// Гарантирует, что таблица не создаётся прямо сейчас. WriteBufferFromFile out(table_metadata_tmp_path, statement.size(), O_WRONLY | O_CREAT | O_EXCL); writeString(statement, out); out.next(); out.sync(); out.close(); } try { /// Добавляем таблицу в набор. { std::lock_guard<std::mutex> lock(mutex); if (!tables.emplace(table_name, table).second) throw Exception("Table " + name + "." + table_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); } /// Если запрос ATTACH, и метаданные таблицы уже существуют /// (то есть, ATTACH сделан после DETACH), то rename атомарно заменяет старый файл новым. Poco::File(table_metadata_tmp_path).renameTo(table_metadata_path); } catch (...) { Poco::File(table_metadata_tmp_path).remove(); throw; } }
StorageSetOrJoinBase::StorageSetOrJoinBase( const String & path_, const String & table_name_, const ColumnsDescription & columns_) : IStorage{columns_}, table_name(table_name_) { if (path_.empty()) throw Exception("Join and Set storages require data path", ErrorCodes::INCORRECT_FILE_NAME); path = path_ + escapeForFileName(table_name_) + '/'; }
StorageSetOrJoinBase::StorageSetOrJoinBase( const String & path_, const String & name_, NamesAndTypesListPtr columns_, const NamesAndTypesList & materialized_columns_, const NamesAndTypesList & alias_columns_, const ColumnDefaults & column_defaults_) : IStorage{materialized_columns_, alias_columns_, column_defaults_}, path(path_ + escapeForFileName(name_) + '/'), name(name_), columns(columns_) { }
void DatabaseOrdinary::alterTable( const Context & context, const String & name, const ColumnsDescription & columns, const ASTModifier & storage_modifier) { /// Read the definition of the table and replace the necessary parts with new ones. String table_name_escaped = escapeForFileName(name); String table_metadata_tmp_path = metadata_path + "/" + table_name_escaped + ".sql.tmp"; String table_metadata_path = metadata_path + "/" + table_name_escaped + ".sql"; String statement; { char in_buf[METADATA_FILE_BUFFER_SIZE]; ReadBufferFromFile in(table_metadata_path, METADATA_FILE_BUFFER_SIZE, -1, in_buf); readStringUntilEOF(statement, in); } ParserCreateQuery parser; ASTPtr ast = parseQuery(parser, statement.data(), statement.data() + statement.size(), "in file " + table_metadata_path, 0); ASTCreateQuery & ast_create_query = typeid_cast<ASTCreateQuery &>(*ast); ASTPtr new_columns = InterpreterCreateQuery::formatColumns(columns); ast_create_query.replace(ast_create_query.columns, new_columns); if (storage_modifier) storage_modifier(*ast_create_query.storage); statement = getTableDefinitionFromCreateQuery(ast); { WriteBufferFromFile out(table_metadata_tmp_path, statement.size(), O_WRONLY | O_CREAT | O_EXCL); writeString(statement, out); out.next(); if (context.getSettingsRef().fsync_metadata) out.sync(); out.close(); } try { /// rename atomically replaces the old file with the new one. Poco::File(table_metadata_tmp_path).renameTo(table_metadata_path); } catch (...) { Poco::File(table_metadata_tmp_path).remove(); throw; } }
BlockIO InterpreterDropQuery::executeToDatabase(String & database_name, ASTDropQuery::Kind kind, bool if_exists) { auto ddl_guard = context.getDDLGuard(database_name, ""); if (auto database = tryGetDatabase(database_name, if_exists)) { if (kind == ASTDropQuery::Kind::Truncate) { throw Exception("Unable to truncate database.", ErrorCodes::SYNTAX_ERROR); } else if (kind == ASTDropQuery::Kind::Detach) { context.detachDatabase(database_name); database->shutdown(); } else if (kind == ASTDropQuery::Kind::Drop) { for (auto iterator = database->getIterator(context); iterator->isValid(); iterator->next()) { String current_table_name = iterator->table()->getTableName(); executeToTable(database_name, current_table_name, kind, false, false); } auto context_lock = context.getLock(); /// Someone could have time to delete the database before us. context.assertDatabaseExists(database_name); /// Someone could have time to create a table in the database to be deleted while we deleted the tables without the context lock. if (!context.getDatabase(database_name)->empty(context)) throw Exception("New table appeared in database being dropped. Try dropping it again.", ErrorCodes::DATABASE_NOT_EMPTY); /// Delete database information from the RAM context.detachDatabase(database_name); database->shutdown(); /// Delete the database. database->drop(); /// Old ClickHouse versions did not store database.sql files Poco::File database_metadata_file(context.getPath() + "metadata/" + escapeForFileName(database_name) + ".sql"); if (database_metadata_file.exists()) database_metadata_file.remove(false); } } return {}; }
static ASTPtr getCreateQueryImpl(const String & path, const String & table_name) { String table_name_escaped = escapeForFileName(table_name); String table_metadata_path = path + "/" + table_name_escaped + ".sql"; String query; { ReadBufferFromFile in(table_metadata_path, 4096); WriteBufferFromString out(query); copyData(in, out); } ParserCreateQuery parser; return parseQuery(parser, query.data(), query.data() + query.size(), "in file " + table_metadata_path); }
void DatabaseOrdinary::removeTable(const String & table_name) { StoragePtr res = detachTable(table_name); String table_name_escaped = escapeForFileName(table_name); String table_metadata_path = path + "/" + table_name_escaped + ".sql"; try { Poco::File(table_metadata_path).remove(); } catch (...) { attachTable(table_name, res); throw; } }
StorageDistributed::StorageDistributed( const std::string & name_, NamesAndTypesListPtr columns_, const String & remote_database_, const String & remote_table_, const Cluster & cluster_, Context & context_, const ASTPtr & sharding_key_, const String & data_path_) : name(name_), columns(columns_), remote_database(remote_database_), remote_table(remote_table_), context(context_), cluster(cluster_), sharding_key_expr(sharding_key_ ? ExpressionAnalyzer(sharding_key_, context, nullptr, *columns).getActions(false) : nullptr), sharding_key_column_name(sharding_key_ ? sharding_key_->getColumnName() : String{}), write_enabled(!data_path_.empty() && (((cluster.getLocalShardCount() + cluster.getRemoteShardCount()) < 2) || sharding_key_)), path(data_path_.empty() ? "" : (data_path_ + escapeForFileName(name) + '/')) { createDirectoryMonitors(); }
void DatabaseOrdinary::renameTable( const Context & context, const String & table_name, IDatabase & to_database, const String & to_table_name) { DatabaseOrdinary * to_database_concrete = typeid_cast<DatabaseOrdinary *>(&to_database); if (!to_database_concrete) throw Exception("Moving tables between databases of different engines is not supported", ErrorCodes::NOT_IMPLEMENTED); StoragePtr table = tryGetTable(context, table_name); if (!table) throw Exception("Table " + name + "." + table_name + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); /// Notify the table that it is renamed. If the table does not support renaming, exception is thrown. try { table->rename(context.getPath() + "/data/" + escapeForFileName(to_database_concrete->name) + "/", to_database_concrete->name, to_table_name); } catch (const Exception &) { throw; } catch (const Poco::Exception & e) { /// Better diagnostics. throw Exception{e}; } ASTPtr ast = getQueryFromMetadata(detail::getTableMetadataPath(metadata_path, table_name)); if (!ast) throw Exception("There is no metadata file for table " + table_name, ErrorCodes::FILE_DOESNT_EXIST); ASTCreateQuery & ast_create_query = typeid_cast<ASTCreateQuery &>(*ast); ast_create_query.table = to_table_name; /// NOTE Non-atomic. to_database_concrete->createTable(context, to_table_name, table, ast); removeTable(context, table_name); }
void StorageTinyLog::addFiles(const String & column_name, const IDataType & type) { if (files.end() != files.find(column_name)) throw Exception("Duplicate column with name " + column_name + " in constructor of StorageTinyLog.", ErrorCodes::DUPLICATE_COLUMN); IDataType::StreamCallback stream_callback = [&] (const IDataType::SubstreamPath & substream_path) { String stream_name = IDataType::getFileNameForStream(column_name, substream_path); if (!files.count(stream_name)) { ColumnData column_data; files.insert(std::make_pair(stream_name, column_data)); files[stream_name].data_file = Poco::File( path + escapeForFileName(name) + '/' + stream_name + DBMS_STORAGE_LOG_DATA_FILE_EXTENSION); } }; IDataType::SubstreamPath substream_path; type.enumerateStreams(stream_callback, substream_path); }
void FileChecker::save() const { { WriteBufferFromFile out(tmp_files_info_path); /// Столь сложная структура JSON-а - для совместимости со старым форматом. writeCString("{\"yandex\":{", out); for (auto it = map.begin(); it != map.end(); ++it) { if (it != map.begin()) writeString(",", out); /// escapeForFileName на самом деле не нужен. Но он оставлен для совместимости со старым кодом. writeJSONString(escapeForFileName(it->first), out); writeString(":{\"size\":\"", out); writeIntText(it->second, out); writeString("\"}", out); } writeCString("}}", out); out.next(); } Poco::File current_file(files_info_path); if (current_file.exists()) { std::string old_file_name = files_info_path + ".old"; current_file.renameTo(old_file_name); Poco::File(tmp_files_info_path).renameTo(files_info_path); Poco::File(old_file_name).remove(); } else Poco::File(tmp_files_info_path).renameTo(files_info_path); }
BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { if (!create.cluster.empty()) { NameSet databases{create.database}; if (!create.to_table.empty()) databases.emplace(create.to_database); return executeDDLQueryOnCluster(query_ptr, context, std::move(databases)); } String path = context.getPath(); String current_database = context.getCurrentDatabase(); String database_name = create.database.empty() ? current_database : create.database; String table_name = create.table; String table_name_escaped = escapeForFileName(table_name); // If this is a stub ATTACH query, read the query definition from the database if (create.attach && !create.storage && !create.columns) { // Table SQL definition is available even if the table is detached auto query = context.getCreateTableQuery(database_name, table_name); auto & as_create = typeid_cast<const ASTCreateQuery &>(*query); create = as_create; // Copy the saved create query, but use ATTACH instead of CREATE create.attach = true; } if (create.to_database.empty()) create.to_database = current_database; if (create.select && (create.is_view || create.is_materialized_view)) { AddDefaultDatabaseVisitor visitor(current_database); visitor.visit(*create.select); } Block as_select_sample; if (create.select && (!create.attach || !create.columns)) as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), context); String as_database_name = create.as_database.empty() ? current_database : create.as_database; String as_table_name = create.as_table; StoragePtr as_storage; TableStructureReadLockPtr as_storage_lock; if (!as_table_name.empty()) { as_storage = context.getTable(as_database_name, as_table_name); as_storage_lock = as_storage->lockStructure(false); } /// Set and retrieve list of columns. ColumnsDescription columns = setColumns(create, as_select_sample, as_storage); /// Some column types may be not allowed according to settings. if (!create.attach) checkSupportedTypes(columns, context); /// Set the table engine if it was not specified explicitly. setEngine(create); StoragePtr res; { std::unique_ptr<DDLGuard> guard; String data_path; DatabasePtr database; if (!create.temporary) { database = context.getDatabase(database_name); data_path = database->getDataPath(); /** If the request specifies IF NOT EXISTS, we allow concurrent CREATE queries (which do nothing). * If table doesnt exist, one thread is creating table, while others wait in DDLGuard. */ guard = context.getDDLGuard(database_name, table_name); /// Table can be created before or it can be created concurrently in another thread, while we were waiting in DDLGuard. if (database->isTableExist(context, table_name)) { if (create.if_not_exists) return {}; else throw Exception("Table " + database_name + "." + table_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); } } else if (context.tryGetExternalTable(table_name) && create.if_not_exists) return {}; res = StorageFactory::instance().get(create, data_path, table_name, database_name, context, context.getGlobalContext(), columns, create.attach, false); if (create.temporary) context.getSessionContext().addExternalTable(table_name, res, query_ptr); else database->createTable(context, table_name, res, query_ptr); /// We must call "startup" and "shutdown" while holding DDLGuard. /// Because otherwise method "shutdown" (from InterpreterDropQuery) can be called before startup /// (in case when table was created and instantly dropped before started up) /// /// Method "startup" may create background tasks and method "shutdown" will wait for them. /// But if "shutdown" is called before "startup", it will exit early, because there are no background tasks to wait. /// Then background task is created by "startup" method. And when destructor of a table object is called, background task is still active, /// and the task will use references to freed data. res->startup(); } /// If the query is a CREATE SELECT, insert the data into the table. if (create.select && !create.attach && !create.is_view && (!create.is_materialized_view || create.is_populate)) { auto insert = std::make_shared<ASTInsertQuery>(); if (!create.temporary) insert->database = database_name; insert->table = table_name; insert->select = create.select->clone(); if (create.temporary && !context.getSessionContext().hasQueryContext()) context.getSessionContext().setQueryContext(context.getSessionContext()); return InterpreterInsertQuery(insert, create.temporary ? context.getSessionContext() : context, context.getSettingsRef().insert_allow_materialized_columns).execute(); } return {}; }
BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { if (!create.cluster.empty()) { NameSet databases{create.database}; if (!create.to_table.empty()) databases.emplace(create.to_database); return executeDDLQueryOnCluster(query_ptr, context, databases); } String path = context.getPath(); String current_database = context.getCurrentDatabase(); String database_name = create.database.empty() ? current_database : create.database; String table_name = create.table; String table_name_escaped = escapeForFileName(table_name); // If this is a stub ATTACH query, read the query definition from the database if (create.attach && !create.storage && !create.columns) { // Table SQL definition is available even if the table is detached auto query = context.getCreateTableQuery(database_name, table_name); auto & as_create = typeid_cast<const ASTCreateQuery &>(*query); create = as_create; // Copy the saved create query, but use ATTACH instead of CREATE create.attach = true; } if (create.to_database.empty()) create.to_database = current_database; if (create.select && (create.is_view || create.is_materialized_view)) create.select->setDatabaseIfNeeded(current_database); Block as_select_sample; if (create.select && (!create.attach || !create.columns)) as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), context); String as_database_name = create.as_database.empty() ? current_database : create.as_database; String as_table_name = create.as_table; StoragePtr as_storage; TableStructureReadLockPtr as_storage_lock; if (!as_table_name.empty()) { as_storage = context.getTable(as_database_name, as_table_name); as_storage_lock = as_storage->lockStructure(false, __PRETTY_FUNCTION__); } /// Set and retrieve list of columns. ColumnsDescription columns = setColumns(create, as_select_sample, as_storage); /// Set the table engine if it was not specified explicitly. setEngine(create); StoragePtr res; { std::unique_ptr<DDLGuard> guard; String data_path; DatabasePtr database; if (!create.is_temporary) { database = context.getDatabase(database_name); data_path = database->getDataPath(); /** If the table already exists, and the request specifies IF NOT EXISTS, * then we allow concurrent CREATE queries (which do nothing). * Otherwise, concurrent queries for creating a table, if the table does not exist, * can throw an exception, even if IF NOT EXISTS is specified. */ guard = context.getDDLGuardIfTableDoesntExist(database_name, table_name, "Table " + database_name + "." + table_name + " is creating or attaching right now"); if (!guard) { if (create.if_not_exists) return {}; else throw Exception("Table " + database_name + "." + table_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); } } else if (context.tryGetExternalTable(table_name) && create.if_not_exists) return {}; res = StorageFactory::instance().get(create, data_path, table_name, database_name, context, context.getGlobalContext(), columns, create.attach, false); if (create.is_temporary) context.getSessionContext().addExternalTable(table_name, res, query_ptr); else database->createTable(context, table_name, res, query_ptr); } res->startup(); /// If the query is a CREATE SELECT, insert the data into the table. if (create.select && !create.attach && !create.is_view && (!create.is_materialized_view || create.is_populate)) { auto insert = std::make_shared<ASTInsertQuery>(); if (!create.is_temporary) insert->database = database_name; insert->table = table_name; insert->select = create.select->clone(); return InterpreterInsertQuery(insert, create.is_temporary ? context.getSessionContext() : context, context.getSettingsRef().insert_allow_materialized_columns).execute(); } return {}; }
BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) { if (!create.cluster.empty()) return executeDDLQueryOnCluster(query_ptr, context, {create.database}); String database_name = create.database; if (create.if_not_exists && context.isDatabaseExist(database_name)) return {}; String database_engine_name; if (!create.storage) { database_engine_name = "Ordinary"; /// Default database engine. auto engine = std::make_shared<ASTFunction>(); engine->name = database_engine_name; auto storage = std::make_shared<ASTStorage>(); storage->set(storage->engine, engine); create.set(create.storage, storage); } else { const ASTStorage & storage = *create.storage; const ASTFunction & engine = *storage.engine; /// Currently, there are no database engines, that support any arguments. if (engine.arguments || engine.parameters || storage.partition_by || storage.order_by || storage.sample_by || storage.settings) { std::stringstream ostr; formatAST(storage, ostr, false, false); throw Exception("Unknown database engine: " + ostr.str(), ErrorCodes::UNKNOWN_DATABASE_ENGINE); } database_engine_name = engine.name; } String database_name_escaped = escapeForFileName(database_name); /// Create directories for tables metadata. String path = context.getPath(); String metadata_path = path + "metadata/" + database_name_escaped + "/"; Poco::File(metadata_path).createDirectory(); DatabasePtr database = DatabaseFactory::get(database_engine_name, database_name, metadata_path, context); /// Will write file with database metadata, if needed. String metadata_file_tmp_path = path + "metadata/" + database_name_escaped + ".sql.tmp"; String metadata_file_path = path + "metadata/" + database_name_escaped + ".sql"; bool need_write_metadata = !create.attach; if (need_write_metadata) { create.attach = true; create.if_not_exists = false; std::ostringstream statement_stream; formatAST(create, statement_stream, false); statement_stream << '\n'; String statement = statement_stream.str(); /// Exclusive flag guarantees, that database is not created right now in another thread. WriteBufferFromFile out(metadata_file_tmp_path, statement.size(), O_WRONLY | O_CREAT | O_EXCL); writeString(statement, out); out.next(); if (context.getSettingsRef().fsync_metadata) out.sync(); out.close(); } try { context.addDatabase(database_name, database); if (need_write_metadata) Poco::File(metadata_file_tmp_path).renameTo(metadata_file_path); database->loadTables(context, thread_pool, has_force_restore_data_flag); } catch (...) { if (need_write_metadata) Poco::File(metadata_file_tmp_path).remove(); throw; } return {}; }
static std::string getTablePath(const std::string & db_dir_path, const std::string & table_name, const std::string & format_name) { return db_dir_path + escapeForFileName(table_name) + "/data." + escapeForFileName(format_name); }
String getTableMetadataPath(const String & base_path, const String & table_name) { return base_path + (endsWith(base_path, "/") ? "" : "/") + escapeForFileName(table_name) + ".sql"; }
BlockIO InterpreterDropQuery::executeToTable(String & database_name_, String & table_name, ASTDropQuery::Kind kind, bool if_exists, bool if_temporary) { if (if_temporary || database_name_.empty()) { auto & session_context = context.hasSessionContext() ? context.getSessionContext() : context; if (session_context.isExternalTableExist(table_name)) return executeToTemporaryTable(table_name, kind); } String database_name = database_name_.empty() ? context.getCurrentDatabase() : database_name_; auto ddl_guard = context.getDDLGuard(database_name, table_name); DatabaseAndTable database_and_table = tryGetDatabaseAndTable(database_name, table_name, if_exists); if (database_and_table.first && database_and_table.second) { if (kind == ASTDropQuery::Kind::Detach) { database_and_table.second->shutdown(); /// If table was already dropped by anyone, an exception will be thrown auto table_lock = database_and_table.second->lockExclusively(context.getCurrentQueryId()); /// Drop table from memory, don't touch data and metadata database_and_table.first->detachTable(database_and_table.second->getTableName()); } else if (kind == ASTDropQuery::Kind::Truncate) { database_and_table.second->checkTableCanBeDropped(); /// If table was already dropped by anyone, an exception will be thrown auto table_lock = database_and_table.second->lockExclusively(context.getCurrentQueryId()); /// Drop table data, don't touch metadata database_and_table.second->truncate(query_ptr, context); } else if (kind == ASTDropQuery::Kind::Drop) { database_and_table.second->checkTableCanBeDropped(); database_and_table.second->shutdown(); /// If table was already dropped by anyone, an exception will be thrown auto table_lock = database_and_table.second->lockExclusively(context.getCurrentQueryId()); /// Delete table metadata and table itself from memory database_and_table.first->removeTable(context, database_and_table.second->getTableName()); /// Delete table data database_and_table.second->drop(); database_and_table.second->is_dropped = true; String database_data_path = database_and_table.first->getDataPath(); /// If it is not virtual database like Dictionary then drop remaining data dir if (!database_data_path.empty()) { String table_data_path = database_data_path + "/" + escapeForFileName(database_and_table.second->getTableName()); if (Poco::File(table_data_path).exists()) Poco::File(table_data_path).remove(true); } } } return {}; }
void DatabaseOrdinary::loadTables(Context & context, boost::threadpool::pool * thread_pool) { log = &Logger::get("DatabaseOrdinary (" + name + ")"); using FileNames = std::vector<std::string>; FileNames file_names; Poco::DirectoryIterator dir_end; for (Poco::DirectoryIterator dir_it(path); dir_it != dir_end; ++dir_it) { /// Для директории .svn и файла .gitignore if (dir_it.name().at(0) == '.') continue; /// Есть файлы .sql.bak - пропускаем. if (endsWith(dir_it.name(), ".sql.bak")) continue; /// Есть файлы .sql.tmp - удаляем. if (endsWith(dir_it.name(), ".sql.tmp")) { LOG_INFO(log, "Removing file " << dir_it->path()); Poco::File(dir_it->path()).remove(); continue; } /// Нужные файлы имеют имена вида table_name.sql if (endsWith(dir_it.name(), ".sql")) file_names.push_back(dir_it.name()); else throw Exception("Incorrect file extension: " + dir_it.name() + " in metadata directory " + path, ErrorCodes::INCORRECT_FILE_NAME); } /** Таблицы быстрее грузятся, если их грузить в сортированном (по именам) порядке. * Иначе (для файловой системы ext4) DirectoryIterator перебирает их в некотором порядке, * который не соответствует порядку создания таблиц и не соответствует порядку их расположения на диске. */ std::sort(file_names.begin(), file_names.end()); size_t total_tables = file_names.size(); LOG_INFO(log, "Total " << total_tables << " tables."); String data_path = context.getPath() + "/data/" + escapeForFileName(name) + "/"; StopwatchWithLock watch; size_t tables_processed = 0; auto task_function = [&](FileNames::const_iterator begin, FileNames::const_iterator end) { for (FileNames::const_iterator it = begin; it != end; ++it) { const String & table = *it; /// Сообщения, чтобы было не скучно ждать, когда сервер долго загружается. if (__sync_add_and_fetch(&tables_processed, 1) % PRINT_MESSAGE_EACH_N_TABLES == 0 || watch.lockTestAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)) { LOG_INFO(log, std::fixed << std::setprecision(2) << tables_processed * 100.0 / total_tables << "%"); watch.restart(); } loadTable(context, path, *this, name, data_path, table); } }; /** packaged_task используются, чтобы исключения автоматически прокидывались в основной поток. * Недостаток - исключения попадают в основной поток только после окончания работы всех task-ов. */ const size_t bunch_size = TABLES_PARALLEL_LOAD_BUNCH_SIZE; size_t num_bunches = (total_tables + bunch_size - 1) / bunch_size; std::vector<std::packaged_task<void()>> tasks(num_bunches); for (size_t i = 0; i < num_bunches; ++i) { auto begin = file_names.begin() + i * bunch_size; auto end = (i + 1 == num_bunches) ? file_names.end() : (file_names.begin() + (i + 1) * bunch_size); tasks[i] = std::packaged_task<void()>(std::bind(task_function, begin, end)); if (thread_pool) thread_pool->schedule([i, &tasks]{ tasks[i](); }); else tasks[i](); } if (thread_pool) thread_pool->wait(); for (auto & task : tasks) task.get_future().get(); }
BlockIO InterpreterDropQuery::execute() { String path = context.getPath(); String current_database = context.getCurrentDatabase(); ASTDropQuery & drop = typeid_cast<ASTDropQuery &>(*query_ptr); bool drop_database = drop.table.empty() && !drop.database.empty(); if (drop_database && drop.detach) { context.detachDatabase(drop.database); return {}; } String database_name = drop.database.empty() ? current_database : drop.database; String database_name_escaped = escapeForFileName(database_name); String data_path = path + "data/" + database_name_escaped + "/"; String metadata_path = path + "metadata/" + database_name_escaped + "/"; auto database = context.tryGetDatabase(database_name); if (!database && !drop.if_exists) throw Exception("Database " + database_name + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE); std::vector<std::pair< StoragePtr, std::unique_ptr<DDLGuard>>> tables_to_drop; if (!drop_database) { StoragePtr table; if (drop.if_exists) table = context.tryGetTable(database_name, drop.table); else table = context.getTable(database_name, drop.table); if (table) tables_to_drop.emplace_back(table, context.getDDLGuard(database_name, drop.table, "Table " + database_name + "." + drop.table + " is dropping or detaching right now")); else return {}; } else { if (!database) { if (!drop.if_exists) throw Exception("Database " + database_name + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE); return {}; } for (auto iterator = database->getIterator(); iterator->isValid(); iterator->next()) tables_to_drop.emplace_back(iterator->table(), context.getDDLGuard(database_name, iterator->name(), "Table " + database_name + "." + iterator->name() + " is dropping or detaching right now")); } for (auto & table : tables_to_drop) { table.first->shutdown(); /// If table was already dropped by anyone, an exception will be thrown auto table_lock = table.first->lockForAlter(); String current_table_name = table.first->getTableName(); if (drop.detach) { /// Drop table from memory, don't touch data and metadata database->detachTable(current_table_name); } else { if (!table.first->checkTableCanBeDropped()) throw Exception("Table " + database_name + "." + current_table_name + " couldn't be dropped due to failed pre-drop check", ErrorCodes::TABLE_WAS_NOT_DROPPED); /// Delete table metdata and table itself from memory database->removeTable(current_table_name); /// Delete table data table.first->drop(); table.first->is_dropped = true; String current_data_path = data_path + escapeForFileName(current_table_name); if (Poco::File(current_data_path).exists()) Poco::File(current_data_path).remove(true); } } if (drop_database) { /// Delete the database. The tables in it have already been deleted. auto lock = context.getLock(); /// Someone could have time to delete the database before us. context.assertDatabaseExists(database_name); /// Someone could have time to create a table in the database to be deleted while we deleted the tables without the context lock. if (!context.getDatabase(database_name)->empty()) throw Exception("New table appeared in database being dropped. Try dropping it again.", ErrorCodes::DATABASE_NOT_EMPTY); /// Delete database information from the RAM auto database = context.detachDatabase(database_name); /// Delete the database. database->drop(); Poco::File(data_path).remove(false); Poco::File(metadata_path).remove(false); } return {}; }