void test1() { Context context; StoragePtr table = StorageSystemNumbers::create("numbers"); Names column_names; column_names.push_back("number"); QueryProcessingStage::Enum stage1; QueryProcessingStage::Enum stage2; QueryProcessingStage::Enum stage3; BlockInputStreams streams; streams.emplace_back(std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, Settings(), stage1, 1)[0], 30, 30000)); streams.emplace_back(std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, Settings(), stage2, 1)[0], 30, 2000)); streams.emplace_back(std::make_shared<LimitBlockInputStream>(table->read(column_names, 0, context, Settings(), stage3, 1)[0], 30, 100)); UnionBlockInputStream<> union_stream(streams, nullptr, 2); WriteBufferFromFileDescriptor wb(STDERR_FILENO); Block sample = table->getSampleBlock(); BlockOutputStreamPtr out = context.getOutputFormat("TabSeparated", wb, sample); while (Block block = union_stream.read()) { out->write(block); wb.next(); } //copyData(union_stream, *out); }
void Database::insertWithCheck(StoragePtr to_load, DatePtr deadline, const Title& title, const std::string& orig_link, bool is_new /* = false */) { StorageConstIteratorPair p = to_load->equal_range(deadline); StorageConstIterator beg = p.first; StorageConstIterator end = p.second; if (beg != end) { for (StorageConstIterator it = beg; it != end; ++it) { DataEntry data = it->second; const Title& t = data.getTitle(); if (title == t) { // the same scholarship is already in, no need to insert return; } } } // now insert DataEntry data_entry(title, orig_link, is_new); to_load->insert(std::pair<DatePtr, DataEntry>(deadline, data_entry)); }
int main(int argc, char ** argv) try { using namespace DB; StoragePtr table = StorageSystemNumbers::create("Numbers"); Names column_names; column_names.push_back("number"); Block sample; ColumnWithTypeAndName col; col.type = std::make_shared<DataTypeUInt64>(); sample.insert(std::move(col)); WriteBufferFromOStream out_buf(std::cout); QueryProcessingStage::Enum stage; LimitBlockInputStream input(table->read(column_names, 0, Context{}, Settings(), stage, 10)[0], 10, 96); RowOutputStreamPtr output_ = std::make_shared<TabSeparatedRowOutputStream>(out_buf, sample); BlockOutputStreamFromRowOutputStream output(output_); copyData(input, output); return 0; } catch (const DB::Exception & e) { std::cerr << e.what() << ", " << e.displayText() << std::endl; return 1; }
void DatabaseOrdinary::renameTable( const Context & context, const String & table_name, IDatabase & to_database, const String & to_table_name) { DatabaseOrdinary * to_database_concrete = typeid_cast<DatabaseOrdinary *>(&to_database); if (!to_database_concrete) throw Exception("Moving tables between databases of different engines is not supported", ErrorCodes::NOT_IMPLEMENTED); StoragePtr table = tryGetTable(table_name); if (!table) throw Exception("Table " + name + "." + table_name + " doesn't exist.", ErrorCodes::TABLE_ALREADY_EXISTS); /// Уведомляем таблицу о том, что она переименовывается. Если таблица не поддерживает переименование - кинется исключение. try { table->rename(context.getPath() + "/data/" + escapeForFileName(to_database_concrete->name) + "/", to_database_concrete->name, to_table_name); } catch (const Poco::Exception & e) { /// Более хорошая диагностика. throw Exception{e}; } ASTPtr ast = getCreateQueryImpl(path, table_name); ASTCreateQuery & ast_create_query = typeid_cast<ASTCreateQuery &>(*ast); ast_create_query.table = to_table_name; /// NOTE Неатомарно. to_database_concrete->createTable(to_table_name, table, ast, table->getName()); removeTable(table_name); }
int main(int argc, char ** argv) try { using namespace DB; size_t n = argc == 2 ? parse<UInt64>(argv[1]) : 10ULL; std::string input = "SELECT number, number / 3, number * number"; ParserSelectQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), ""); Context context; ExpressionAnalyzer analyzer(ast, context, {}, {NameAndTypePair("number", std::make_shared<DataTypeUInt64>())}); ExpressionActionsChain chain; analyzer.appendSelect(chain, false); analyzer.appendProjectResult(chain, false); chain.finalize(); ExpressionActionsPtr expression = chain.getLastActions(); StoragePtr table = StorageSystemNumbers::create("Numbers"); Names column_names; column_names.push_back("number"); QueryProcessingStage::Enum stage; BlockInputStreamPtr in; in = table->read(column_names, 0, context, Settings(), stage)[0]; in = std::make_shared<ExpressionBlockInputStream>(in, expression); in = std::make_shared<LimitBlockInputStream>(in, 10, std::max(static_cast<Int64>(0), static_cast<Int64>(n) - 10)); WriteBufferFromOStream out1(std::cout); RowOutputStreamPtr out2 = std::make_shared<TabSeparatedRowOutputStream>(out1, expression->getSampleBlock()); BlockOutputStreamFromRowOutputStream out(out2); { Stopwatch stopwatch; stopwatch.start(); copyData(*in, out); stopwatch.stop(); std::cout << std::fixed << std::setprecision(2) << "Elapsed " << stopwatch.elapsedSeconds() << " sec." << ", " << n / stopwatch.elapsedSeconds() << " rows/sec." << std::endl; } return 0; } catch (const DB::Exception & e) { std::cerr << e.what() << ", " << e.displayText() << std::endl; throw; }
void Database::writeNotification(bool single_line) { StoragePtr homepage = m_Storages[IDX_HOMEPAGE]; std::string filename = ConfigurationFindscholarships::instance()->pathDatabase() + currentDateTime(); if (single_line) { filename = filename + ".single"; } else { filename = filename + ".multiple"; } std::ofstream file_notification(filename.c_str()); if (file_notification.is_open()) { Storage::const_iterator beg = homepage->begin(); Storage::const_iterator end = homepage->end(); end--; for (Storage::const_iterator it = end; it != beg; it--) { DatePtr deadline = it->first; const DataEntry& data = it->second; if (data.isNew()) { if (single_line) { file_notification << data.getTitle().getSingleLineNotification(deadline) << std::endl << std::endl; } else { file_notification << data.getTitle().getMultipleLineNotification(deadline) << std::endl << std::endl; } } } DatePtr deadline = beg->first; const DataEntry& data = beg->second; if (data.isNew()) { if (single_line) { file_notification << data.getTitle().getSingleLineNotification(deadline) << std::endl << std::endl; } else { file_notification << data.getTitle().getMultipleLineNotification(deadline) << std::endl << std::endl; } } } else { DBGERR(__FUNCTION__ << ": Cannot open file \"" << filename << "\" for writing!") } }
void Database::showStorage(StoragePtr to_show) { std::cout << "Size = " << to_show->size() << std::endl; for (Storage::const_iterator it = to_show->begin(); it != to_show->end(); ++it) { const DatePtr deadline = it->first; const DataEntry& data = it->second; std::cout << boost::gregorian::to_iso_extended_string(*deadline) << " " << data.getTitle().getTitleNoSpace() << std::endl; } }
BlockIO InterpreterInsertQuery::execute() { ASTInsertQuery & query = typeid_cast<ASTInsertQuery &>(*query_ptr); checkAccess(query); StoragePtr table = getTable(); auto table_lock = table->lockStructure(true, __PRETTY_FUNCTION__); NamesAndTypesList required_columns = table->getColumnsList(); /// We create a pipeline of several streams, into which we will write data. BlockOutputStreamPtr out; out = std::make_shared<PushingToViewsBlockOutputStream>(query.database, query.table, table, context, query_ptr, query.no_destination); out = std::make_shared<MaterializingBlockOutputStream>(out); out = std::make_shared<AddingDefaultBlockOutputStream>( out, required_columns, table->column_defaults, context, static_cast<bool>(context.getSettingsRef().strict_insert_defaults)); if (!allow_materialized) out = std::make_shared<ProhibitColumnsBlockOutputStream>(out, table->materialized_columns); out = std::make_shared<SquashingBlockOutputStream>( out, context.getSettingsRef().min_insert_block_size_rows, context.getSettingsRef().min_insert_block_size_bytes); auto out_wrapper = std::make_shared<CountingBlockOutputStream>(out); out_wrapper->setProcessListElement(context.getProcessListElement()); out = std::move(out_wrapper); BlockIO res; res.out_sample = getSampleBlock(); /// What type of query: INSERT or INSERT SELECT? if (!query.select) { res.out = out; } else { InterpreterSelectQuery interpreter_select{query.select, context}; res.in_sample = interpreter_select.getSampleBlock(); res.in = interpreter_select.execute().in; res.in = std::make_shared<NullableAdapterBlockInputStream>(res.in, res.in_sample, res.out_sample); res.in = std::make_shared<CastTypeBlockInputStream>(context, res.in, res.out_sample); res.in = std::make_shared<NullAndDoCopyBlockInputStream>(res.in, out); } return res; }
void InterpreterSystemQuery::syncReplica(ASTSystemQuery & query) { String database_name = !query.target_database.empty() ? query.target_database : context.getCurrentDatabase(); const String & table_name = query.target_table; StoragePtr table = context.getTable(database_name, table_name); auto table_replicated = dynamic_cast<StorageReplicatedMergeTree *>(table.get()); if (!table_replicated) throw Exception("Table " + database_name + "." + table_name + " is not replicated", ErrorCodes::BAD_ARGUMENTS); table_replicated->waitForShrinkingQueueSize(0, context.getSettingsRef().receive_timeout.value.milliseconds()); }
StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const { // Parse args ASTs & args_func = typeid_cast<ASTFunction &>(*ast_function).children; if (args_func.size() != 1) throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); ASTs & args = typeid_cast<ASTExpressionList &>(*args_func.at(0)).children; if (args.size() != 3) throw Exception("Table function '" + getName() + "' requires exactly 3 arguments: path, format and structure.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (size_t i = 0; i < 3; ++i) args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); std::string path = static_cast<const ASTLiteral &>(*args[0]).value.safeGet<String>(); std::string format = static_cast<const ASTLiteral &>(*args[1]).value.safeGet<String>(); std::string structure = static_cast<const ASTLiteral &>(*args[2]).value.safeGet<String>(); // Create sample block std::vector<std::string> structure_vals; boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); if (structure_vals.size() % 2 != 0) throw Exception("Odd number of elements in section structure: must be a list of name type pairs", ErrorCodes::LOGICAL_ERROR); Block sample_block; const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); for (size_t i = 0, size = structure_vals.size(); i < size; i += 2) { ColumnWithTypeAndName column; column.name = structure_vals[i]; column.type = data_type_factory.get(structure_vals[i + 1]); column.column = column.type->createColumn(); sample_block.insert(std::move(column)); } // Create table StoragePtr storage = StorageFile::create( path, -1, context.getUserFilesPath(), getName(), format, ColumnsDescription{sample_block.getNamesAndTypesList()}, const_cast<Context &>(context)); storage->startup(); return storage; }
StoragePtr InterpreterSystemQuery::tryRestartReplica(const String & database_name, const String & table_name, Context & context) { auto database = context.getDatabase(database_name); auto table_ddl_guard = context.getDDLGuard(database_name, table_name, "Table " + database_name + "." + table_name + " is restarting right now"); ASTPtr create_ast; /// Detach actions { auto table = context.tryGetTable(database_name, table_name); if (!table || !dynamic_cast<const StorageReplicatedMergeTree *>(table.get())) return nullptr; table->shutdown(); /// If table was already dropped by anyone, an exception will be thrown auto table_lock = table->lockForAlter(__PRETTY_FUNCTION__); create_ast = context.getCreateTableQuery(database_name, table_name); database->detachTable(table_name); } /// Attach actions { /// getCreateTableQuery must return canonical CREATE query representation, there are no need for AST postprocessing auto & create = typeid_cast<ASTCreateQuery &>(*create_ast); create.attach = true; std::string data_path = database->getDataPath(); auto columns = InterpreterCreateQuery::getColumnsDescription(*create.columns, context); StoragePtr table = StorageFactory::instance().get(create, data_path, table_name, database_name, context, context.getGlobalContext(), columns, create.attach, false); database->createTable(context, table_name, table, create_ast); table->startup(); return table; } }
void Database::writeToCategoryFile(const std::string& filename, const std::string& title, StoragePtr to_write) { std::ofstream out(filename.c_str()); if (out.is_open()) { out << ConfigurationFindscholarships::instance()->categoryPart1() << std::endl; out << "<title>Findscholarships: " << title << "</title>" << std::endl; out << ConfigurationFindscholarships::instance()->categoryPart2() << std::endl; out << "<div><center><h2>" << title << "</h2></center></div>" << std::endl; out << ConfigurationFindscholarships::instance()->categoryPart3() << std::endl; std::string list_new = ""; std::string list_old = ""; for (Storage::const_iterator it = to_write->begin(); it != to_write->end(); ++it) { const DatePtr deadline = it->first; const DataEntry& data = it->second; const Title& title = data.getTitle(); if (data.isNew()) { list_new = "<p>" + list_new + title.getHtmlLink(deadline) + "<img src=\"images/new_icon.gif\"></p>\n\n"; } else { list_old = "<p>" + list_old + title.getHtmlLink(deadline) + "</p>\n\n"; } } out << list_new << list_old << std::endl; out << ConfigurationFindscholarships::instance()->categoryPart4() << std::endl; out.close(); } else { DBGERR(__FUNCTION__ << ": Cannot write to category file \"" << filename << "\"!") } }
void DatabaseOrdinary::renameTable( const Context & context, const String & table_name, IDatabase & to_database, const String & to_table_name) { DatabaseOrdinary * to_database_concrete = typeid_cast<DatabaseOrdinary *>(&to_database); if (!to_database_concrete) throw Exception("Moving tables between databases of different engines is not supported", ErrorCodes::NOT_IMPLEMENTED); StoragePtr table = tryGetTable(context, table_name); if (!table) throw Exception("Table " + name + "." + table_name + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); /// Notify the table that it is renamed. If the table does not support renaming, exception is thrown. try { table->rename(context.getPath() + "/data/" + escapeForFileName(to_database_concrete->name) + "/", to_database_concrete->name, to_table_name); } catch (const Exception &) { throw; } catch (const Poco::Exception & e) { /// Better diagnostics. throw Exception{e}; } ASTPtr ast = getQueryFromMetadata(detail::getTableMetadataPath(metadata_path, table_name)); if (!ast) throw Exception("There is no metadata file for table " + table_name, ErrorCodes::FILE_DOESNT_EXIST); ASTCreateQuery & ast_create_query = typeid_cast<ASTCreateQuery &>(*ast); ast_create_query.table = to_table_name; /// NOTE Non-atomic. to_database_concrete->createTable(context, to_table_name, table, ast); removeTable(context, table_name); }
PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( String database, String table, StoragePtr storage, const Context & context_, const ASTPtr & query_ptr_, bool no_destination) : context(context_), query_ptr(query_ptr_) { /** TODO This is a very important line. At any insertion into the table one of streams should own lock. * Although now any insertion into the table is done via PushingToViewsBlockOutputStream, * but it's clear that here is not the best place for this functionality. */ addTableLock(storage->lockStructure(true, __PRETTY_FUNCTION__)); if (!table.empty()) { Dependencies dependencies = context.getDependencies(database, table); /// We need special context for materialized views insertions if (!dependencies.empty()) { views_context = std::make_unique<Context>(context); // Do not deduplicate insertions into MV if the main insertion is Ok views_context->getSettingsRef().insert_deduplicate = false; } for (const auto & database_table : dependencies) { auto dependent_table = context.getTable(database_table.first, database_table.second); auto & materialized_view = dynamic_cast<const StorageMaterializedView &>(*dependent_table); auto query = materialized_view.getInnerQuery(); auto out = std::make_shared<PushingToViewsBlockOutputStream>( database_table.first, database_table.second, dependent_table, *views_context, ASTPtr()); views.emplace_back(ViewInfo{std::move(query), database_table.first, database_table.second, std::move(out)}); } } /* Do not push to destination table if the flag is set */ if (!no_destination) { output = storage->write(query_ptr, context.getSettingsRef()); replicated_output = dynamic_cast<ReplicatedMergeTreeBlockOutputStream *>(output.get()); } }
BlockIO InterpreterDropQuery::executeToTemporaryTable(String & table_name, ASTDropQuery::Kind kind) { if (kind == ASTDropQuery::Kind::Detach) throw Exception("Unable to detach temporary table.", ErrorCodes::SYNTAX_ERROR); else { auto & context_handle = context.hasSessionContext() ? context.getSessionContext() : context; StoragePtr table = context_handle.tryGetExternalTable(table_name); if (table) { if (kind == ASTDropQuery::Kind::Truncate) { /// If table was already dropped by anyone, an exception will be thrown auto table_lock = table->lockExclusively(context.getCurrentQueryId()); /// Drop table data, don't touch metadata table->truncate(query_ptr, context); } else if (kind == ASTDropQuery::Kind::Drop) { context_handle.tryRemoveExternalTable(table_name); table->shutdown(); /// If table was already dropped by anyone, an exception will be thrown auto table_lock = table->lockExclusively(context.getCurrentQueryId()); /// Delete table data table->drop(); table->is_dropped = true; } } } return {}; }
StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const ASTPtr & query, bool has_virtual_column, bool get_lock) const { StorageListWithLocks selected_tables; DatabasePtr database = global_context.getDatabase(source_database); DatabaseIteratorPtr iterator = database->getIterator(global_context); auto virtual_column = ColumnString::create(); while (iterator->isValid()) { if (table_name_regexp.match(iterator->name())) { StoragePtr storage = iterator->table(); if (query && typeid_cast<ASTSelectQuery *>(query.get())->prewhere_expression && !storage->supportsPrewhere()) throw Exception("Storage " + storage->getName() + " doesn't support PREWHERE.", ErrorCodes::ILLEGAL_PREWHERE); if (storage.get() != this) { virtual_column->insert(storage->getTableName()); selected_tables.emplace_back(storage, get_lock ? storage->lockStructure(false) : TableStructureReadLockPtr{}); } } iterator->next(); } if (has_virtual_column) { Block virtual_columns_block = Block{ColumnWithTypeAndName(std::move(virtual_column), std::make_shared<DataTypeString>(), "_table")}; VirtualColumnUtils::filterBlockWithQuery(query, virtual_columns_block, global_context); auto values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_table"); /// Remove unused tables from the list selected_tables.remove_if([&] (const auto & elem) { return values.find(elem.first->getTableName()) == values.end(); }); } return selected_tables; }
BlockIO InterpreterInsertQuery::execute() { ASTInsertQuery & query = typeid_cast<ASTInsertQuery &>(*query_ptr); StoragePtr table = getTable(); auto table_lock = table->lockStructure(true); NamesAndTypesListPtr required_columns = std::make_shared<NamesAndTypesList>(table->getColumnsList()); /// Создаем кортеж из нескольких стримов, в которые будем писать данные. BlockOutputStreamPtr out = std::make_shared<ProhibitColumnsBlockOutputStream>( std::make_shared<AddingDefaultBlockOutputStream>( std::make_shared<MaterializingBlockOutputStream>( std::make_shared<PushingToViewsBlockOutputStream>(query.database, query.table, context, query_ptr)), required_columns, table->column_defaults, context, static_cast<bool>(context.getSettingsRef().strict_insert_defaults)), table->materialized_columns); BlockIO res; res.out_sample = getSampleBlock(); /// Какой тип запроса: INSERT или INSERT SELECT? if (!query.select) { res.out = out; } else { InterpreterSelectQuery interpreter_select{query.select, context}; BlockInputStreamPtr in{interpreter_select.execute().in}; res.in = std::make_shared<NullAndDoCopyBlockInputStream>(in, out); res.in_sample = interpreter_select.getSampleBlock(); } return res; }
void Database::storeDatabase(const std::string& filename, StoragePtr to_store) { std::ofstream out(filename.c_str()); if (out.is_open()) { for (Storage::const_iterator it = to_store->begin(); it != to_store->end(); ++it) { DatePtr deadline = it->first; const DataEntry& data_entry = it->second; out << boost::gregorian::to_iso_extended_string(*deadline) << std::endl; out << data_entry.getTitle().getTitle() << std::endl; out << data_entry.getOrigLink() << std::endl; } out.close(); } else { DBGERR(__FUNCTION__ << ": Cannot open file \"" << filename << "\" to store database on disk!") } }
ColumnsDescription InterpreterCreateQuery::setColumns( ASTCreateQuery & create, const Block & as_select_sample, const StoragePtr & as_storage) const { ColumnsDescription res; if (create.columns) { res = getColumnsDescription(*create.columns, context); } else if (!create.as_table.empty()) { res = as_storage->getColumns(); } else if (create.select) { for (size_t i = 0; i < as_select_sample.columns(); ++i) res.ordinary.emplace_back(as_select_sample.safeGetByPosition(i).name, as_select_sample.safeGetByPosition(i).type); } else throw Exception("Incorrect CREATE query: required list of column descriptions or AS section or SELECT.", ErrorCodes::INCORRECT_QUERY); /// Even if query has list of columns, canonicalize it (unfold Nested columns). ASTPtr new_columns = formatColumns(res); if (create.columns) create.replace(create.columns, new_columns); else create.set(create.columns, new_columns); /// Check for duplicates std::set<String> all_columns; auto check_column_already_exists = [&all_columns](const NameAndTypePair & column_name_and_type) { if (!all_columns.emplace(column_name_and_type.name).second) throw Exception("Column " + backQuoteIfNeed(column_name_and_type.name) + " already exists", ErrorCodes::DUPLICATE_COLUMN); }; for (const auto & elem : res.ordinary) check_column_already_exists(elem); for (const auto & elem : res.materialized) check_column_already_exists(elem); for (const auto & elem : res.aliases) check_column_already_exists(elem); return res; }
InterpreterCreateQuery::ColumnsInfo InterpreterCreateQuery::setColumns( ASTCreateQuery & create, const Block & as_select_sample, const StoragePtr & as_storage) const { ColumnsInfo res; if (create.columns) { res = getColumnsInfo(create.columns, context); } else if (!create.as_table.empty()) { res.columns = std::make_shared<NamesAndTypesList>(as_storage->getColumnsListNonMaterialized()); res.materialized_columns = as_storage->materialized_columns; res.alias_columns = as_storage->alias_columns; res.column_defaults = as_storage->column_defaults; } else if (create.select) { res.columns = std::make_shared<NamesAndTypesList>(); for (size_t i = 0; i < as_select_sample.columns(); ++i) res.columns->push_back(NameAndTypePair(as_select_sample.getByPosition(i).name, as_select_sample.getByPosition(i).type)); } else throw Exception("Incorrect CREATE query: required list of column descriptions or AS section or SELECT.", ErrorCodes::INCORRECT_QUERY); /// Даже если в запросе был список столбцов, на всякий случай приведем его к стандартному виду (развернём Nested). ASTPtr new_columns = formatColumns(*res.columns, res.materialized_columns, res.alias_columns, res.column_defaults); if (create.columns) { auto it = std::find(create.children.begin(), create.children.end(), create.columns); if (it != create.children.end()) *it = new_columns; else create.children.push_back(new_columns); } else create.children.push_back(new_columns); create.columns = new_columns; return res; }
String InterpreterCreateQuery::setEngine( ASTCreateQuery & create, const StoragePtr & as_storage) const { String storage_name; auto set_engine = [&](const char * engine) { storage_name = engine; auto func = std::make_shared<ASTFunction>(); func->name = engine; create.storage = func; }; if (create.storage) { storage_name = typeid_cast<ASTFunction &>(*create.storage).name; } else if (!create.as_table.empty()) { /// NOTE Получение структуры у таблицы, указанной в AS делается не атомарно с созданием таблицы. String as_database_name = create.as_database.empty() ? context.getCurrentDatabase() : create.as_database; String as_table_name = create.as_table; storage_name = as_storage->getName(); create.storage = typeid_cast<const ASTCreateQuery &>(*context.getCreateQuery(as_database_name, as_table_name)).storage; } else if (create.is_temporary) set_engine("Memory"); else if (create.is_view) set_engine("View"); else if (create.is_materialized_view) set_engine("MaterializedView"); else throw Exception("Incorrect CREATE query: required ENGINE.", ErrorCodes::ENGINE_REQUIRED); return storage_name; }
void write(const Block & block) override { if (!block) return; size_t rows = block.rows(); if (!rows) return; StoragePtr destination; if (!storage.no_destination) { destination = storage.context.tryGetTable(storage.destination_database, storage.destination_table); if (destination) { if (destination.get() == &storage) throw Exception("Destination table is myself. Write will cause infinite loop.", ErrorCodes::INFINITE_LOOP); /// Check table structure. try { destination->check(block, true); } catch (Exception & e) { e.addMessage("(when looking at destination table " + storage.destination_database + "." + storage.destination_table + ")"); throw; } } } size_t bytes = block.bytes(); /// If the block already exceeds the maximum limit, then we skip the buffer. if (rows > storage.max_thresholds.rows || bytes > storage.max_thresholds.bytes) { if (!storage.no_destination) { LOG_TRACE(storage.log, "Writing block with " << rows << " rows, " << bytes << " bytes directly."); storage.writeBlockToDestination(block, destination); } return; } /// We distribute the load on the shards by the stream number. const auto start_shard_num = Poco::ThreadNumber::get() % storage.num_shards; /// We loop through the buffers, trying to lock mutex. No more than one lap. auto shard_num = start_shard_num; StorageBuffer::Buffer * least_busy_buffer = nullptr; std::unique_lock<std::mutex> least_busy_lock; size_t least_busy_shard_rows = 0; for (size_t try_no = 0; try_no < storage.num_shards; ++try_no) { std::unique_lock<std::mutex> lock(storage.buffers[shard_num].mutex, std::try_to_lock_t()); if (lock.owns_lock()) { size_t num_rows = storage.buffers[shard_num].data.rows(); if (!least_busy_buffer || num_rows < least_busy_shard_rows) { least_busy_buffer = &storage.buffers[shard_num]; least_busy_lock = std::move(lock); least_busy_shard_rows = num_rows; } } shard_num = (shard_num + 1) % storage.num_shards; } /// If you still can not lock anything at once, then we'll wait on mutex. if (!least_busy_buffer) insertIntoBuffer(block, storage.buffers[start_shard_num], std::unique_lock<std::mutex>(storage.buffers[start_shard_num].mutex)); else insertIntoBuffer(block, *least_busy_buffer, std::move(least_busy_lock)); }
BlockIO InterpreterAlterQuery::execute() { auto & alter = typeid_cast<ASTAlterQuery &>(*query_ptr); if (!alter.cluster.empty()) return executeDDLQueryOnCluster(query_ptr, context, {alter.table}); const String & table_name = alter.table; String database_name = alter.database.empty() ? context.getCurrentDatabase() : alter.database; StoragePtr table = context.getTable(database_name, table_name); AlterCommands alter_commands; PartitionCommands partition_commands; MutationCommands mutation_commands; parseAlter(alter.parameters, alter_commands, partition_commands, mutation_commands); if (!mutation_commands.commands.empty()) { mutation_commands.validate(*table, context); table->mutate(mutation_commands, context); } partition_commands.validate(*table); for (const PartitionCommand & command : partition_commands) { switch (command.type) { case PartitionCommand::DROP_PARTITION: table->dropPartition(query_ptr, command.partition, command.detach, context); break; case PartitionCommand::ATTACH_PARTITION: table->attachPartition(command.partition, command.part, context); break; case PartitionCommand::REPLACE_PARTITION: { String from_database = command.from_database.empty() ? context.getCurrentDatabase() : command.from_database; auto from_storage = context.getTable(from_database, command.from_table); table->replacePartitionFrom(from_storage, command.partition, command.replace, context); } break; case PartitionCommand::FETCH_PARTITION: table->fetchPartition(command.partition, command.from_zookeeper_path, context); break; case PartitionCommand::FREEZE_PARTITION: table->freezePartition(command.partition, command.with_name, context); break; case PartitionCommand::CLEAR_COLUMN: table->clearColumnInPartition(command.partition, command.column_name, context); break; } } if (!alter_commands.empty()) { alter_commands.validate(*table, context); table->alter(alter_commands, database_name, table_name, context); } return {}; }
int main(int argc, char ** argv) try { using namespace DB; NamesAndTypesList names_and_types_list { {"WatchID", std::make_shared<DataTypeUInt64>()}, {"JavaEnable", std::make_shared<DataTypeUInt8>()}, {"Title", std::make_shared<DataTypeString>()}, {"EventTime", std::make_shared<DataTypeDateTime>()}, {"CounterID", std::make_shared<DataTypeUInt32>()}, {"ClientIP", std::make_shared<DataTypeUInt32>()}, {"RegionID", std::make_shared<DataTypeUInt32>()}, {"UniqID", std::make_shared<DataTypeUInt64>()}, {"CounterClass", std::make_shared<DataTypeUInt8>()}, {"OS", std::make_shared<DataTypeUInt8>()}, {"UserAgent", std::make_shared<DataTypeUInt8>()}, {"URL", std::make_shared<DataTypeString>()}, {"Referer", std::make_shared<DataTypeString>()}, {"ResolutionWidth", std::make_shared<DataTypeUInt16>()}, {"ResolutionHeight", std::make_shared<DataTypeUInt16>()}, {"ResolutionDepth", std::make_shared<DataTypeUInt8>()}, {"FlashMajor", std::make_shared<DataTypeUInt8>()}, {"FlashMinor", std::make_shared<DataTypeUInt8>()}, {"FlashMinor2", std::make_shared<DataTypeString>()}, {"NetMajor", std::make_shared<DataTypeUInt8>()}, {"NetMinor", std::make_shared<DataTypeUInt8>()}, {"UserAgentMajor", std::make_shared<DataTypeUInt16>()}, {"UserAgentMinor", std::make_shared<DataTypeFixedString>(2)}, {"CookieEnable", std::make_shared<DataTypeUInt8>()}, {"JavascriptEnable", std::make_shared<DataTypeUInt8>()}, {"IsMobile", std::make_shared<DataTypeUInt8>()}, {"MobilePhone", std::make_shared<DataTypeUInt8>()}, {"MobilePhoneModel", std::make_shared<DataTypeString>()}, {"Params", std::make_shared<DataTypeString>()}, {"IPNetworkID", std::make_shared<DataTypeUInt32>()}, {"TraficSourceID", std::make_shared<DataTypeInt8>()}, {"SearchEngineID", std::make_shared<DataTypeUInt16>()}, {"SearchPhrase", std::make_shared<DataTypeString>()}, {"AdvEngineID", std::make_shared<DataTypeUInt8>()}, {"IsArtifical", std::make_shared<DataTypeUInt8>()}, {"WindowClientWidth", std::make_shared<DataTypeUInt16>()}, {"WindowClientHeight", std::make_shared<DataTypeUInt16>()}, {"ClientTimeZone", std::make_shared<DataTypeInt16>()}, {"ClientEventTime", std::make_shared<DataTypeDateTime>()}, {"SilverlightVersion1", std::make_shared<DataTypeUInt8>()}, {"SilverlightVersion2", std::make_shared<DataTypeUInt8>()}, {"SilverlightVersion3", std::make_shared<DataTypeUInt32>()}, {"SilverlightVersion4", std::make_shared<DataTypeUInt16>()}, {"PageCharset", std::make_shared<DataTypeString>()}, {"CodeVersion", std::make_shared<DataTypeUInt32>()}, {"IsLink", std::make_shared<DataTypeUInt8>()}, {"IsDownload", std::make_shared<DataTypeUInt8>()}, {"IsNotBounce", std::make_shared<DataTypeUInt8>()}, {"FUniqID", std::make_shared<DataTypeUInt64>()}, {"OriginalURL", std::make_shared<DataTypeString>()}, {"HID", std::make_shared<DataTypeUInt32>()}, {"IsOldCounter", std::make_shared<DataTypeUInt8>()}, {"IsEvent", std::make_shared<DataTypeUInt8>()}, {"IsParameter", std::make_shared<DataTypeUInt8>()}, {"DontCountHits", std::make_shared<DataTypeUInt8>()}, {"WithHash", std::make_shared<DataTypeUInt8>()}, }; Names column_names; for (const auto & name_type : names_and_types_list) column_names.push_back(name_type.name); /// create an object of an existing hit log table StoragePtr table = StorageLog::create("./", "HitLog", std::make_shared<NamesAndTypesList>(names_and_types_list)); /// read from it if (argc == 2 && 0 == strcmp(argv[1], "read")) { QueryProcessingStage::Enum stage; BlockInputStreamPtr in = table->read(column_names, 0, Context{}, stage)[0]; WriteBufferFromFileDescriptor out1(STDOUT_FILENO); CompressedWriteBuffer out2(out1); NativeBlockOutputStream out3(out2, ClickHouseRevision::get()); copyData(*in, out3); } /// read the data from the native file and simultaneously write to the table if (argc == 2 && 0 == strcmp(argv[1], "write")) { ReadBufferFromFileDescriptor in1(STDIN_FILENO); CompressedReadBuffer in2(in1); NativeBlockInputStream in3(in2, ClickHouseRevision::get()); BlockOutputStreamPtr out = table->write({}, {}); copyData(in3, *out); } return 0; } catch (const DB::Exception & e) { std::cerr << e.what() << ", " << e.displayText() << std::endl; throw; }
BlockInputStreams StorageSystemColumns::read( const Names & column_names, ASTPtr query, const Context & context, const Settings & settings, QueryProcessingStage::Enum & processed_stage, const size_t max_block_size, const unsigned threads) { check(column_names); processed_stage = QueryProcessingStage::FetchColumns; Block block; std::map<std::pair<std::string, std::string>, StoragePtr> storages; { Databases databases = context.getDatabases(); /// Добавляем столбец database. ColumnPtr database_column = std::make_shared<ColumnString>(); for (const auto & database : databases) database_column->insert(database.first); block.insert(ColumnWithTypeAndName(database_column, std::make_shared<DataTypeString>(), "database")); /// Отфильтруем блок со столбцом database. VirtualColumnUtils::filterBlockWithQuery(query, block, context); if (!block.rows()) return BlockInputStreams(); database_column = block.getByName("database").column; size_t rows = database_column->size(); /// Добавляем столбец table. ColumnPtr table_column = std::make_shared<ColumnString>(); IColumn::Offsets_t offsets(rows); for (size_t i = 0; i < rows; ++i) { const std::string database_name = (*database_column)[i].get<std::string>(); const DatabasePtr database = databases.at(database_name); offsets[i] = i ? offsets[i - 1] : 0; for (auto iterator = database->getIterator(); iterator->isValid(); iterator->next()) { const String & table_name = iterator->name(); storages.emplace(std::piecewise_construct, std::forward_as_tuple(database_name, table_name), std::forward_as_tuple(iterator->table())); table_column->insert(table_name); offsets[i] += 1; } } for (size_t i = 0; i < block.columns(); ++i) { ColumnPtr & column = block.getByPosition(i).column; column = column->replicate(offsets); } block.insert(ColumnWithTypeAndName(table_column, std::make_shared<DataTypeString>(), "table")); } /// Отфильтруем блок со столбцами database и table. VirtualColumnUtils::filterBlockWithQuery(query, block, context); if (!block.rows()) return BlockInputStreams(); ColumnPtr filtered_database_column = block.getByName("database").column; ColumnPtr filtered_table_column = block.getByName("table").column; /// Составляем результат. ColumnPtr database_column = std::make_shared<ColumnString>(); ColumnPtr table_column = std::make_shared<ColumnString>(); ColumnPtr name_column = std::make_shared<ColumnString>(); ColumnPtr type_column = std::make_shared<ColumnString>(); ColumnPtr default_type_column = std::make_shared<ColumnString>(); ColumnPtr default_expression_column = std::make_shared<ColumnString>(); ColumnPtr bytes_column = std::make_shared<ColumnUInt64>(); size_t rows = filtered_database_column->size(); for (size_t i = 0; i < rows; ++i) { const std::string database_name = (*filtered_database_column)[i].get<std::string>(); const std::string table_name = (*filtered_table_column)[i].get<std::string>(); NamesAndTypesList columns; ColumnDefaults column_defaults; std::unordered_map<String, size_t> column_sizes; { StoragePtr storage = storages.at(std::make_pair(database_name, table_name)); IStorage::TableStructureReadLockPtr table_lock; try { table_lock = storage->lockStructure(false); } catch (const Exception & e) { /** There are case when IStorage::drop was called, * but we still own the object. * Then table will throw exception at attempt to lock it. * Just skip the table. */ if (e.code() == ErrorCodes::TABLE_IS_DROPPED) continue; else throw; } columns = storage->getColumnsList(); columns.insert(std::end(columns), std::begin(storage->alias_columns), std::end(storage->alias_columns)); column_defaults = storage->column_defaults; /** Данные о размерах столбцов для таблиц семейства MergeTree. * NOTE: В дальнейшем можно сделать интерфейс, позволяющий получить размеры столбцов у IStorage. */ if (auto storage_concrete = dynamic_cast<StorageMergeTree *>(storage.get())) { column_sizes = storage_concrete->getData().getColumnSizes(); } else if (auto storage_concrete = dynamic_cast<StorageReplicatedMergeTree *>(storage.get())) { column_sizes = storage_concrete->getData().getColumnSizes(); auto unreplicated_data = storage_concrete->getUnreplicatedData(); if (unreplicated_data) { auto unreplicated_column_sizes = unreplicated_data->getColumnSizes(); for (const auto & name_size : unreplicated_column_sizes) column_sizes[name_size.first] += name_size.second; } } } for (const auto & column : columns) { database_column->insert(database_name); table_column->insert(table_name); name_column->insert(column.name); type_column->insert(column.type->getName()); { const auto it = column_defaults.find(column.name); if (it == std::end(column_defaults)) { default_type_column->insertDefault(); default_expression_column->insertDefault(); } else { default_type_column->insert(toString(it->second.type)); default_expression_column->insert(queryToString(it->second.expression)); } } { const auto it = column_sizes.find(column.name); if (it == std::end(column_sizes)) bytes_column->insertDefault(); else bytes_column->insert(it->second); } } } block.clear(); block.insert(ColumnWithTypeAndName(database_column, std::make_shared<DataTypeString>(), "database")); block.insert(ColumnWithTypeAndName(table_column, std::make_shared<DataTypeString>(), "table")); block.insert(ColumnWithTypeAndName(name_column, std::make_shared<DataTypeString>(), "name")); block.insert(ColumnWithTypeAndName(type_column, std::make_shared<DataTypeString>(), "type")); block.insert(ColumnWithTypeAndName(default_type_column, std::make_shared<DataTypeString>(), "default_type")); block.insert(ColumnWithTypeAndName(default_expression_column, std::make_shared<DataTypeString>(), "default_expression")); block.insert(ColumnWithTypeAndName(bytes_column, std::make_shared<DataTypeUInt64>(), "bytes")); return BlockInputStreams{ 1, std::make_shared<OneBlockInputStream>(block) }; }
int main(int argc, char ** argv) { using namespace DB; try { NamesAndTypesList names_and_types_list { {"WatchID", std::make_shared<DataTypeUInt64>()}, {"JavaEnable", std::make_shared<DataTypeUInt8>()}, {"Title", std::make_shared<DataTypeString>()}, {"EventTime", std::make_shared<DataTypeDateTime>()}, {"CounterID", std::make_shared<DataTypeUInt32>()}, {"ClientIP", std::make_shared<DataTypeUInt32>()}, {"RegionID", std::make_shared<DataTypeUInt32>()}, {"UniqID", std::make_shared<DataTypeUInt64>()}, {"CounterClass", std::make_shared<DataTypeUInt8>()}, {"OS", std::make_shared<DataTypeUInt8>()}, {"UserAgent", std::make_shared<DataTypeUInt8>()}, {"URL", std::make_shared<DataTypeString>()}, {"Referer", std::make_shared<DataTypeString>()}, {"ResolutionWidth", std::make_shared<DataTypeUInt16>()}, {"ResolutionHeight", std::make_shared<DataTypeUInt16>()}, {"ResolutionDepth", std::make_shared<DataTypeUInt8>()}, {"FlashMajor", std::make_shared<DataTypeUInt8>()}, {"FlashMinor", std::make_shared<DataTypeUInt8>()}, {"FlashMinor2", std::make_shared<DataTypeString>()}, {"NetMajor", std::make_shared<DataTypeUInt8>()}, {"NetMinor", std::make_shared<DataTypeUInt8>()}, {"UserAgentMajor", std::make_shared<DataTypeUInt16>()}, {"UserAgentMinor", std::make_shared<DataTypeFixedString>(2)}, {"CookieEnable", std::make_shared<DataTypeUInt8>()}, {"JavascriptEnable", std::make_shared<DataTypeUInt8>()}, {"IsMobile", std::make_shared<DataTypeUInt8>()}, {"MobilePhone", std::make_shared<DataTypeUInt8>()}, {"MobilePhoneModel", std::make_shared<DataTypeString>()}, {"Params", std::make_shared<DataTypeString>()}, {"IPNetworkID", std::make_shared<DataTypeUInt32>()}, {"TraficSourceID", std::make_shared<DataTypeInt8>()}, {"SearchEngineID", std::make_shared<DataTypeUInt16>()}, {"SearchPhrase", std::make_shared<DataTypeString>()}, {"AdvEngineID", std::make_shared<DataTypeUInt8>()}, {"IsArtifical", std::make_shared<DataTypeUInt8>()}, {"WindowClientWidth", std::make_shared<DataTypeUInt16>()}, {"WindowClientHeight", std::make_shared<DataTypeUInt16>()}, {"ClientTimeZone", std::make_shared<DataTypeInt16>()}, {"ClientEventTime", std::make_shared<DataTypeDateTime>()}, {"SilverlightVersion1", std::make_shared<DataTypeUInt8>()}, {"SilverlightVersion2", std::make_shared<DataTypeUInt8>()}, {"SilverlightVersion3", std::make_shared<DataTypeUInt32>()}, {"SilverlightVersion4", std::make_shared<DataTypeUInt16>()}, {"PageCharset", std::make_shared<DataTypeString>()}, {"CodeVersion", std::make_shared<DataTypeUInt32>()}, {"IsLink", std::make_shared<DataTypeUInt8>()}, {"IsDownload", std::make_shared<DataTypeUInt8>()}, {"IsNotBounce", std::make_shared<DataTypeUInt8>()}, {"FUniqID", std::make_shared<DataTypeUInt64>()}, {"OriginalURL", std::make_shared<DataTypeString>()}, {"HID", std::make_shared<DataTypeUInt32>()}, {"IsOldCounter", std::make_shared<DataTypeUInt8>()}, {"IsEvent", std::make_shared<DataTypeUInt8>()}, {"IsParameter", std::make_shared<DataTypeUInt8>()}, {"DontCountHits", std::make_shared<DataTypeUInt8>()}, {"WithHash", std::make_shared<DataTypeUInt8>()}, }; Context context; std::string input = "SELECT UniqID, URL, CounterID, IsLink WHERE URL = 'http://mail.yandex.ru/neo2/#inbox'"; ParserSelectQuery parser; ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), ""); formatAST(*ast, std::cerr); std::cerr << std::endl; std::cerr << ast->getTreeID() << std::endl; /// create an object of an existing hit log table StoragePtr table = StorageLog::create("./", "HitLog", std::make_shared<NamesAndTypesList>(names_and_types_list)); /// read from it, apply the expression, filter, and write in tsv form to the console ExpressionAnalyzer analyzer(ast, context, nullptr, names_and_types_list); ExpressionActionsChain chain; analyzer.appendSelect(chain, false); analyzer.appendWhere(chain, false); chain.finalize(); ExpressionActionsPtr expression = chain.getLastActions(); Names column_names { "UniqID", "URL", "CounterID", "IsLink", }; QueryProcessingStage::Enum stage; BlockInputStreamPtr in = table->read(column_names, 0, context, stage)[0]; in = std::make_shared<FilterBlockInputStream>(in, expression, 4); //in = std::make_shared<LimitBlockInputStream>(in, 10, 0); WriteBufferFromOStream ob(std::cout); RowOutputStreamPtr out_ = std::make_shared<TabSeparatedRowOutputStream>(ob, expression->getSampleBlock()); BlockOutputStreamFromRowOutputStream out(out_); copyData(*in, out); } catch (const Exception & e) { std::cerr << e.what() << ", " << e.displayText() << std::endl; return 1; } return 0; }
BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { if (!create.cluster.empty()) { NameSet databases{create.database}; if (!create.to_table.empty()) databases.emplace(create.to_database); return executeDDLQueryOnCluster(query_ptr, context, std::move(databases)); } String path = context.getPath(); String current_database = context.getCurrentDatabase(); String database_name = create.database.empty() ? current_database : create.database; String table_name = create.table; String table_name_escaped = escapeForFileName(table_name); // If this is a stub ATTACH query, read the query definition from the database if (create.attach && !create.storage && !create.columns) { // Table SQL definition is available even if the table is detached auto query = context.getCreateTableQuery(database_name, table_name); auto & as_create = typeid_cast<const ASTCreateQuery &>(*query); create = as_create; // Copy the saved create query, but use ATTACH instead of CREATE create.attach = true; } if (create.to_database.empty()) create.to_database = current_database; if (create.select && (create.is_view || create.is_materialized_view)) { AddDefaultDatabaseVisitor visitor(current_database); visitor.visit(*create.select); } Block as_select_sample; if (create.select && (!create.attach || !create.columns)) as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), context); String as_database_name = create.as_database.empty() ? current_database : create.as_database; String as_table_name = create.as_table; StoragePtr as_storage; TableStructureReadLockPtr as_storage_lock; if (!as_table_name.empty()) { as_storage = context.getTable(as_database_name, as_table_name); as_storage_lock = as_storage->lockStructure(false); } /// Set and retrieve list of columns. ColumnsDescription columns = setColumns(create, as_select_sample, as_storage); /// Some column types may be not allowed according to settings. if (!create.attach) checkSupportedTypes(columns, context); /// Set the table engine if it was not specified explicitly. setEngine(create); StoragePtr res; { std::unique_ptr<DDLGuard> guard; String data_path; DatabasePtr database; if (!create.temporary) { database = context.getDatabase(database_name); data_path = database->getDataPath(); /** If the request specifies IF NOT EXISTS, we allow concurrent CREATE queries (which do nothing). * If table doesnt exist, one thread is creating table, while others wait in DDLGuard. */ guard = context.getDDLGuard(database_name, table_name); /// Table can be created before or it can be created concurrently in another thread, while we were waiting in DDLGuard. if (database->isTableExist(context, table_name)) { if (create.if_not_exists) return {}; else throw Exception("Table " + database_name + "." + table_name + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); } } else if (context.tryGetExternalTable(table_name) && create.if_not_exists) return {}; res = StorageFactory::instance().get(create, data_path, table_name, database_name, context, context.getGlobalContext(), columns, create.attach, false); if (create.temporary) context.getSessionContext().addExternalTable(table_name, res, query_ptr); else database->createTable(context, table_name, res, query_ptr); /// We must call "startup" and "shutdown" while holding DDLGuard. /// Because otherwise method "shutdown" (from InterpreterDropQuery) can be called before startup /// (in case when table was created and instantly dropped before started up) /// /// Method "startup" may create background tasks and method "shutdown" will wait for them. /// But if "shutdown" is called before "startup", it will exit early, because there are no background tasks to wait. /// Then background task is created by "startup" method. And when destructor of a table object is called, background task is still active, /// and the task will use references to freed data. res->startup(); } /// If the query is a CREATE SELECT, insert the data into the table. if (create.select && !create.attach && !create.is_view && (!create.is_materialized_view || create.is_populate)) { auto insert = std::make_shared<ASTInsertQuery>(); if (!create.temporary) insert->database = database_name; insert->table = table_name; insert->select = create.select->clone(); if (create.temporary && !context.getSessionContext().hasQueryContext()) context.getSessionContext().setQueryContext(context.getSessionContext()); return InterpreterInsertQuery(insert, create.temporary ? context.getSessionContext() : context, context.getSettingsRef().insert_allow_materialized_columns).execute(); } return {}; }
int main(int argc, char ** argv) try { NamesAndTypesList names_and_types_list { {"WatchID", std::make_shared<DataTypeUInt64>()}, {"JavaEnable", std::make_shared<DataTypeUInt8>()}, {"Title", std::make_shared<DataTypeString>()}, {"EventTime", std::make_shared<DataTypeDateTime>()}, {"CounterID", std::make_shared<DataTypeUInt32>()}, {"ClientIP", std::make_shared<DataTypeUInt32>()}, {"RegionID", std::make_shared<DataTypeUInt32>()}, {"UniqID", std::make_shared<DataTypeUInt64>()}, {"CounterClass", std::make_shared<DataTypeUInt8>()}, {"OS", std::make_shared<DataTypeUInt8>()}, {"UserAgent", std::make_shared<DataTypeUInt8>()}, {"URL", std::make_shared<DataTypeString>()}, {"Referer", std::make_shared<DataTypeString>()}, {"ResolutionWidth", std::make_shared<DataTypeUInt16>()}, {"ResolutionHeight", std::make_shared<DataTypeUInt16>()}, {"ResolutionDepth", std::make_shared<DataTypeUInt8>()}, {"FlashMajor", std::make_shared<DataTypeUInt8>()}, {"FlashMinor", std::make_shared<DataTypeUInt8>()}, {"FlashMinor2", std::make_shared<DataTypeString>()}, {"NetMajor", std::make_shared<DataTypeUInt8>()}, {"NetMinor", std::make_shared<DataTypeUInt8>()}, {"UserAgentMajor", std::make_shared<DataTypeUInt16>()}, {"UserAgentMinor", std::make_shared<DataTypeFixedString>(2)}, {"CookieEnable", std::make_shared<DataTypeUInt8>()}, {"JavascriptEnable", std::make_shared<DataTypeUInt8>()}, {"IsMobile", std::make_shared<DataTypeUInt8>()}, {"MobilePhone", std::make_shared<DataTypeUInt8>()}, {"MobilePhoneModel", std::make_shared<DataTypeString>()}, {"Params", std::make_shared<DataTypeString>()}, {"IPNetworkID", std::make_shared<DataTypeUInt32>()}, {"TraficSourceID", std::make_shared<DataTypeInt8>()}, {"SearchEngineID", std::make_shared<DataTypeUInt16>()}, {"SearchPhrase", std::make_shared<DataTypeString>()}, {"AdvEngineID", std::make_shared<DataTypeUInt8>()}, {"IsArtifical", std::make_shared<DataTypeUInt8>()}, {"WindowClientWidth", std::make_shared<DataTypeUInt16>()}, {"WindowClientHeight", std::make_shared<DataTypeUInt16>()}, {"ClientTimeZone", std::make_shared<DataTypeInt16>()}, {"ClientEventTime", std::make_shared<DataTypeDateTime>()}, {"SilverlightVersion1", std::make_shared<DataTypeUInt8>()}, {"SilverlightVersion2", std::make_shared<DataTypeUInt8>()}, {"SilverlightVersion3", std::make_shared<DataTypeUInt32>()}, {"SilverlightVersion4", std::make_shared<DataTypeUInt16>()}, {"PageCharset", std::make_shared<DataTypeString>()}, {"CodeVersion", std::make_shared<DataTypeUInt32>()}, {"IsLink", std::make_shared<DataTypeUInt8>()}, {"IsDownload", std::make_shared<DataTypeUInt8>()}, {"IsNotBounce", std::make_shared<DataTypeUInt8>()}, {"FUniqID", std::make_shared<DataTypeUInt64>()}, {"OriginalURL", std::make_shared<DataTypeString>()}, {"HID", std::make_shared<DataTypeUInt32>()}, {"IsOldCounter", std::make_shared<DataTypeUInt8>()}, {"IsEvent", std::make_shared<DataTypeUInt8>()}, {"IsParameter", std::make_shared<DataTypeUInt8>()}, {"DontCountHits", std::make_shared<DataTypeUInt8>()}, {"WithHash", std::make_shared<DataTypeUInt8>()}, }; DataTypes data_types; Names column_names; for (const auto & name_type : names_and_types_list) { data_types.push_back(name_type.type); column_names.push_back(name_type.name); } /// create a hit log table StoragePtr table = StorageLog::create( "./", "HitLog", ColumnsDescription{names_and_types_list}, DEFAULT_MAX_COMPRESS_BLOCK_SIZE); table->startup(); /// create a description of how to read data from the tab separated dump Block sample; for (const auto & name_type : names_and_types_list) { ColumnWithTypeAndName elem; elem.name = name_type.name; elem.type = name_type.type; elem.column = elem.type->createColumn(); sample.insert(std::move(elem)); } FormatSettings format_settings; /// read the data from tsv file and simultaneously write to table if (argc == 2 && 0 == strcmp(argv[1], "write")) { ReadBufferFromFileDescriptor in_buf(STDIN_FILENO); RowInputStreamPtr in_ = std::make_shared<TabSeparatedRowInputStream>(in_buf, sample, false, false, format_settings); BlockInputStreamFromRowInputStream in(in_, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, 0); BlockOutputStreamPtr out = table->write({}, {}); copyData(in, *out); } /// read from it if (argc == 2 && 0 == strcmp(argv[1], "read")) { WriteBufferFromFileDescriptor out_buf(STDOUT_FILENO); QueryProcessingStage::Enum stage; BlockInputStreamPtr in = table->read(column_names, {}, Context::createGlobal(), stage, 8192, 1)[0]; RowOutputStreamPtr out_ = std::make_shared<TabSeparatedRowOutputStream>(out_buf, sample, false, false, format_settings); BlockOutputStreamFromRowOutputStream out(out_, sample); copyData(*in, out); } return 0; } catch (const Exception & e) { std::cerr << e.what() << ", " << e.displayText() << std::endl; throw; }
int main(int argc, char ** argv) try { using namespace DB; const size_t rows = 10000000; /// создаём таблицу с парой столбцов NamesAndTypesListPtr names_and_types = std::make_shared<NamesAndTypesList>(); names_and_types->push_back(NameAndTypePair("a", std::make_shared<DataTypeUInt64>())); names_and_types->push_back(NameAndTypePair("b", std::make_shared<DataTypeUInt8>())); StoragePtr table = StorageLog::create("./", "test", names_and_types); /// пишем в неё { Block block; ColumnWithTypeAndName column1; column1.name = "a"; column1.type = table->getDataTypeByName("a"); column1.column = column1.type->createColumn(); ColumnUInt64::Container_t & vec1 = typeid_cast<ColumnUInt64&>(*column1.column).getData(); vec1.resize(rows); for (size_t i = 0; i < rows; ++i) vec1[i] = i; block.insert(column1); ColumnWithTypeAndName column2; column2.name = "b"; column2.type = table->getDataTypeByName("b"); column2.column = column2.type->createColumn(); ColumnUInt8::Container_t & vec2 = typeid_cast<ColumnUInt8&>(*column2.column).getData(); vec2.resize(rows); for (size_t i = 0; i < rows; ++i) vec2[i] = i * 2; block.insert(column2); BlockOutputStreamPtr out = table->write({}, {}); out->write(block); } /// читаем из неё { Names column_names; column_names.push_back("a"); column_names.push_back("b"); QueryProcessingStage::Enum stage; BlockInputStreamPtr in = table->read(column_names, 0, Context{}, Settings(), stage)[0]; Block sample; { ColumnWithTypeAndName col; col.type = std::make_shared<DataTypeUInt64>(); sample.insert(col); } { ColumnWithTypeAndName col; col.type = std::make_shared<DataTypeUInt8>(); sample.insert(col); } WriteBufferFromOStream out_buf(std::cout); LimitBlockInputStream in_limit(in, 10, 0); RowOutputStreamPtr output_ = std::make_shared<TabSeparatedRowOutputStream>(out_buf, sample); BlockOutputStreamFromRowOutputStream output(output_); copyData(in_limit, output); } return 0; } catch (const DB::Exception & e) { std::cerr << e.what() << ", " << e.displayText() << std::endl; return 1; }
void write(const Block & block) { if (!block) return; size_t rows = block.rowsInFirstColumn(); if (!rows) return; StoragePtr destination; if (!storage.no_destination) { destination = storage.context.tryGetTable(storage.destination_database, storage.destination_table); if (destination) { if (destination.get() == &storage) throw Exception("Destination table is myself. Write will cause infinite loop.", ErrorCodes::INFINITE_LOOP); /// Проверяем структуру таблицы. try { destination->check(block, true); } catch (Exception & e) { e.addMessage("(when looking at destination table " + storage.destination_database + "." + storage.destination_table + ")"); throw; } } } size_t bytes = block.bytes(); /// Если блок уже превышает максимальные ограничения, то пишем минуя буфер. if (rows > storage.max_thresholds.rows || bytes > storage.max_thresholds.bytes) { if (!storage.no_destination) { LOG_TRACE(storage.log, "Writing block with " << rows << " rows, " << bytes << " bytes directly."); storage.writeBlockToDestination(block, destination); } return; } /// Распределяем нагрузку по шардам по номеру потока. const auto start_shard_num = Poco::ThreadNumber::get() % storage.num_shards; /// Перебираем буферы по кругу, пытаясь заблокировать mutex. Не более одного круга. auto shard_num = start_shard_num; size_t try_no = 0; for (; try_no != storage.num_shards; ++try_no) { std::unique_lock<std::mutex> lock(storage.buffers[shard_num].mutex, std::try_to_lock_t()); if (lock.owns_lock()) { insertIntoBuffer(block, storage.buffers[shard_num], std::move(lock)); break; } ++shard_num; if (shard_num == storage.num_shards) shard_num = 0; } /// Если так и не удалось ничего сразу заблокировать, то будем ждать на mutex-е. if (try_no == storage.num_shards) insertIntoBuffer(block, storage.buffers[start_shard_num], std::unique_lock<std::mutex>(storage.buffers[start_shard_num].mutex)); }