MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPartition & block_with_partition)
{
    Block & block = block_with_partition.block;

    static const String TMP_PREFIX = "tmp_insert_";

    /// This will generate unique name in scope of current server process.
    Int64 temp_index = data.insert_increment.get();

    MergeTreeDataPart::MinMaxIndex minmax_idx;
    minmax_idx.update(block, data.minmax_idx_columns);

    MergeTreePartition partition(std::move(block_with_partition.partition));

    MergeTreePartInfo new_part_info(partition.getID(data), temp_index, temp_index, 0);
    String part_name;
    if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
    {
        DayNum min_date(minmax_idx.parallelogram[data.minmax_idx_date_column_pos].left.get<UInt64>());
        DayNum max_date(minmax_idx.parallelogram[data.minmax_idx_date_column_pos].right.get<UInt64>());

        const auto & date_lut = DateLUT::instance();

        DayNum min_month = date_lut.toFirstDayNumOfMonth(DayNum(min_date));
        DayNum max_month = date_lut.toFirstDayNumOfMonth(DayNum(max_date));

        if (min_month != max_month)
            throw Exception("Logical error: part spans more than one month.", ErrorCodes::LOGICAL_ERROR);

        part_name = new_part_info.getPartNameV0(min_date, max_date);
    }
    else
        part_name = new_part_info.getPartName();

    MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared<MergeTreeData::DataPart>(data, part_name, new_part_info);
    new_data_part->partition = std::move(partition);
    new_data_part->minmax_idx = std::move(minmax_idx);
    new_data_part->relative_path = TMP_PREFIX + part_name;
    new_data_part->is_temp = true;

    /// The name could be non-unique in case of stale files from previous runs.
    String full_path = new_data_part->getFullPath();
    Poco::File dir(full_path);

    if (dir.exists())
    {
        LOG_WARNING(log, "Removing old temporary directory " + full_path);
        dir.remove(true);
    }

    dir.createDirectories();

    /// If we need to calculate some columns to sort.
    if (data.hasSortingKey())
        data.sorting_key_expr->execute(block);

    Names sort_columns = data.sorting_key_columns;
    SortDescription sort_description;
    size_t sort_columns_size = sort_columns.size();
    sort_description.reserve(sort_columns_size);

    for (size_t i = 0; i < sort_columns_size; ++i)
        sort_description.emplace_back(block.getPositionByName(sort_columns[i]), 1, 1);

    ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocks);

    /// Sort.
    IColumn::Permutation * perm_ptr = nullptr;
    IColumn::Permutation perm;
    if (!sort_description.empty())
    {
        if (!isAlreadySorted(block, sort_description))
        {
            stableGetPermutation(block, sort_description, perm);
            perm_ptr = &perm;
        }
        else
            ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted);
    }

    /// This effectively chooses minimal compression method:
    ///  either default lz4 or compression method with zero thresholds on absolute and relative part size.
    auto compression_settings = data.context.chooseCompressionSettings(0, 0);

    NamesAndTypesList columns = data.getColumns().getAllPhysical().filter(block.getNames());
    MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, compression_settings);

    out.writePrefix();
    out.writeWithPermutation(block, perm_ptr);
    out.writeSuffixAndFinalizePart(new_data_part);

    ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows());
    ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterUncompressedBytes, block.bytes());
    ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterCompressedBytes, new_data_part->bytes_on_disk);

    return new_data_part;
}
Esempio n. 2
0
MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithDateInterval & block_with_dates, Int64 temp_index)
{
    /// For logging
    Stopwatch stopwatch;

    Block & block = block_with_dates.block;
    UInt16 min_date = block_with_dates.min_date;
    UInt16 max_date = block_with_dates.max_date;

    const auto & date_lut = DateLUT::instance();

    DayNum_t min_month = date_lut.toFirstDayNumOfMonth(DayNum_t(min_date));
    DayNum_t max_month = date_lut.toFirstDayNumOfMonth(DayNum_t(max_date));

    if (min_month != max_month)
        throw Exception("Logical error: part spans more than one month.");

    size_t part_size = (block.rows() + data.index_granularity - 1) / data.index_granularity;

    static const String TMP_PREFIX = "tmp_insert_";
    String part_name = ActiveDataPartSet::getPartName(DayNum_t(min_date), DayNum_t(max_date), temp_index, temp_index, 0);

    MergeTreeData::MutableDataPartPtr new_data_part = std::make_shared<MergeTreeData::DataPart>(data);
    new_data_part->name = part_name;
    new_data_part->relative_path = TMP_PREFIX + part_name;
    new_data_part->is_temp = true;

    Poco::File(new_data_part->getFullPath()).createDirectories();

    /// If you need to compute some columns to sort, we do it.
    if (data.merging_params.mode != MergeTreeData::MergingParams::Unsorted)
        data.getPrimaryExpression()->execute(block);

    SortDescription sort_descr = data.getSortDescription();

    ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocks);

    /// Sort.
    IColumn::Permutation * perm_ptr = nullptr;
    IColumn::Permutation perm;
    if (data.merging_params.mode != MergeTreeData::MergingParams::Unsorted)
    {
        if (!isAlreadySorted(block, sort_descr))
        {
            stableGetPermutation(block, sort_descr, perm);
            perm_ptr = &perm;
        }
        else
            ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted);
    }

    NamesAndTypesList columns = data.getColumnsList().filter(block.getColumnsList().getNames());
    MergedBlockOutputStream out(data, new_data_part->getFullPath(), columns, CompressionMethod::LZ4);

    out.writePrefix();
    out.writeWithPermutation(block, perm_ptr);
    MergeTreeData::DataPart::Checksums checksums = out.writeSuffixAndGetChecksums();

    new_data_part->left_date = DayNum_t(min_date);
    new_data_part->right_date = DayNum_t(max_date);
    new_data_part->left = temp_index;
    new_data_part->right = temp_index;
    new_data_part->level = 0;
    new_data_part->size = part_size;
    new_data_part->modification_time = time(0);
    new_data_part->month = min_month;
    new_data_part->columns = columns;
    new_data_part->checksums = checksums;
    new_data_part->index.swap(out.getIndex());
    new_data_part->size_in_bytes = MergeTreeData::DataPart::calcTotalSize(new_data_part->getFullPath());

    ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterRows, block.rows());
    ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterUncompressedBytes, block.bytes());
    ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterCompressedBytes, new_data_part->size_in_bytes);

    if (auto part_log = context.getPartLog(data.getDatabaseName(), data.getTableName()))
    {
        PartLogElement elem;
        elem.event_time = time(0);

        elem.event_type = PartLogElement::NEW_PART;
        elem.size_in_bytes = new_data_part->size_in_bytes;
        elem.duration_ms = stopwatch.elapsed() / 1000000;

        elem.database_name = new_data_part->storage.getDatabaseName();
        elem.table_name = new_data_part->storage.getTableName();
        elem.part_name = part_name;

        part_log->add(elem);
    }

    return new_data_part;
}