Exemplo n.º 1
0
void FunctionCoalesce::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result)
{
	/// coalesce(arg0, arg1, ..., argN) is essentially
	/// multiIf(isNotNull(arg0), arg0, isNotNull(arg1), arg1, ..., isNotNull(argN), argN, NULL)

	FunctionIsNotNull is_not_null;
	ColumnNumbers multi_if_args;

	Block temp_block = block;

	for (size_t i = 0; i < arguments.size(); ++i)
	{
		size_t res_pos = temp_block.columns();
		temp_block.insert({nullptr, std::make_shared<DataTypeUInt8>(), ""});

		is_not_null.executeImpl(temp_block, {arguments[i]}, res_pos);

		multi_if_args.push_back(res_pos);
		multi_if_args.push_back(arguments[i]);
	}

	/// Argument corresponding to the fallback NULL value.
	multi_if_args.push_back(temp_block.columns());

	/// Append a fallback NULL column.
	ColumnWithTypeAndName elem;
	elem.column = std::make_shared<ColumnNull>(temp_block.rows(), Null());
	elem.type = std::make_shared<DataTypeNull>();
	elem.name = "NULL";

	temp_block.insert(elem);

	FunctionMultiIf{}.executeImpl(temp_block, multi_if_args, result);

	block.safeGetByPosition(result).column = std::move(temp_block.safeGetByPosition(result).column);
}
Exemplo n.º 2
0
    void executeImpl(Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count) override
    {
        /** We will gather values from columns in branches to result column,
        *  depending on values of conditions.
        */
        struct Instruction
        {
            const IColumn * condition = nullptr;
            const IColumn * source = nullptr;

            bool condition_always_true = false;
            bool condition_is_nullable = false;
            bool source_is_constant = false;
        };

        std::vector<Instruction> instructions;
        instructions.reserve(args.size() / 2 + 1);

        Columns converted_columns_holder;
        converted_columns_holder.reserve(instructions.size());

        const DataTypePtr & return_type = block.getByPosition(result).type;

        for (size_t i = 0; i < args.size(); i += 2)
        {
            Instruction instruction;
            size_t source_idx = i + 1;

            if (source_idx == args.size())
            {
                /// The last, "else" branch can be treated as a branch with always true condition "else if (true)".
                --source_idx;
                instruction.condition_always_true = true;
            }
            else
            {
                const ColumnWithTypeAndName & cond_col = block.getByPosition(args[i]);

                /// We skip branches that are always false.
                /// If we encounter a branch that is always true, we can finish.

                if (cond_col.column->onlyNull())
                    continue;

                if (cond_col.column->isColumnConst())
                {
                    Field value = typeid_cast<const ColumnConst &>(*cond_col.column).getField();
                    if (value.isNull())
                        continue;
                    if (value.get<UInt64>() == 0)
                        continue;
                    instruction.condition_always_true = true;
                }
                else
                {
                    if (cond_col.column->isColumnNullable())
                        instruction.condition_is_nullable = true;

                    instruction.condition = cond_col.column.get();
                }
            }

            const ColumnWithTypeAndName & source_col = block.getByPosition(args[source_idx]);
            if (source_col.type->equals(*return_type))
            {
                instruction.source = source_col.column.get();
            }
            else
            {
                /// Cast all columns to result type.
                converted_columns_holder.emplace_back(castColumn(source_col, return_type, context));
                instruction.source = converted_columns_holder.back().get();
            }

            if (instruction.source && instruction.source->isColumnConst())
                instruction.source_is_constant = true;

            instructions.emplace_back(std::move(instruction));

            if (instructions.back().condition_always_true)
                break;
        }

        size_t rows = input_rows_count;
        MutableColumnPtr res = return_type->createColumn();

        for (size_t i = 0; i < rows; ++i)
        {
            for (const auto & instruction : instructions)
            {
                bool insert = false;

                if (instruction.condition_always_true)
                    insert = true;
                else if (!instruction.condition_is_nullable)
                    insert = static_cast<const ColumnUInt8 &>(*instruction.condition).getData()[i];
                else
                {
                    const ColumnNullable & condition_nullable = static_cast<const ColumnNullable &>(*instruction.condition);
                    const ColumnUInt8 & condition_nested = static_cast<const ColumnUInt8 &>(condition_nullable.getNestedColumn());
                    const NullMap & condition_null_map = condition_nullable.getNullMapData();

                    insert = !condition_null_map[i] && condition_nested.getData()[i];
                }

                if (insert)
                {
                    if (!instruction.source_is_constant)
                        res->insertFrom(*instruction.source, i);
                    else
                        res->insertFrom(static_cast<const ColumnConst &>(*instruction.source).getDataColumn(), 0);

                    break;
                }
            }
        }

        block.getByPosition(result).column = std::move(res);
    }
Exemplo n.º 3
0
void FunctionArrayIntersect::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
{
    const auto & return_type = block.getByPosition(result).type;
    auto return_type_array = checkAndGetDataType<DataTypeArray>(return_type.get());

    if (!return_type)
        throw Exception{"Return type for function " + getName() + " must be array.", ErrorCodes::LOGICAL_ERROR};

    const auto & nested_return_type = return_type_array->getNestedType();

    if (typeid_cast<const DataTypeNothing *>(nested_return_type.get()))
    {
        block.getByPosition(result).column = return_type->createColumnConstWithDefaultValue(input_rows_count);
        return;
    }

    auto num_args = arguments.size();
    DataTypes data_types;
    data_types.reserve(num_args);
    for (size_t i = 0; i < num_args; ++i)
        data_types.push_back(block.getByPosition(arguments[i]).type);

    auto return_type_with_nulls = getMostSubtype(data_types, true, true);

    Columns columns = castColumns(block, arguments, return_type, return_type_with_nulls);

    UnpackedArrays arrays = prepareArrays(columns);

    ColumnPtr result_column;
    auto not_nullable_nested_return_type = removeNullable(nested_return_type);
    TypeListNumbers::forEach(NumberExecutor(arrays, not_nullable_nested_return_type, result_column));

    using DateMap = ClearableHashMap<DataTypeDate::FieldType, size_t, DefaultHash<DataTypeDate::FieldType>,
            HashTableGrower<INITIAL_SIZE_DEGREE>,
            HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(DataTypeDate::FieldType)>>;

    using DateTimeMap = ClearableHashMap<DataTypeDateTime::FieldType, size_t, DefaultHash<DataTypeDateTime::FieldType>,
            HashTableGrower<INITIAL_SIZE_DEGREE>,
            HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(DataTypeDateTime::FieldType)>>;

    using StringMap = ClearableHashMap<StringRef, size_t, StringRefHash, HashTableGrower<INITIAL_SIZE_DEGREE>,
            HashTableAllocatorWithStackMemory<(1ULL << INITIAL_SIZE_DEGREE) * sizeof(StringRef)>>;

    if (!result_column)
    {
        auto column = not_nullable_nested_return_type->createColumn();
        WhichDataType which(not_nullable_nested_return_type);

        if (which.isDate())
            result_column = execute<DateMap, ColumnVector<DataTypeDate::FieldType>, true>(arrays, std::move(column));
        else if (which.isDateTime())
            result_column = execute<DateTimeMap, ColumnVector<DataTypeDateTime::FieldType>, true>(arrays, std::move(column));
        else if (which.isString())
            result_column = execute<StringMap, ColumnString, false>(arrays, std::move(column));
        else if (which.isFixedString())
            result_column = execute<StringMap, ColumnFixedString, false>(arrays, std::move(column));
        else
        {
            column = static_cast<const DataTypeArray &>(*return_type_with_nulls).getNestedType()->createColumn();
            result_column = castRemoveNullable(execute<StringMap, IColumn, false>(arrays, std::move(column)), return_type);
        }
    }

    block.getByPosition(result).column = std::move(result_column);
}
Exemplo n.º 4
0
Columns FunctionArrayIntersect::castColumns(
        Block & block, const ColumnNumbers & arguments, const DataTypePtr & return_type,
        const DataTypePtr & return_type_with_nulls) const
{
    size_t num_args = arguments.size();
    Columns columns(num_args);

    auto type_array = checkAndGetDataType<DataTypeArray>(return_type.get());
    auto & type_nested = type_array->getNestedType();
    auto type_not_nullable_nested = removeNullable(type_nested);

    const bool is_numeric_or_string = isNumber(type_not_nullable_nested)
                                      || isDateOrDateTime(type_not_nullable_nested)
                                      || isStringOrFixedString(type_not_nullable_nested);

    DataTypePtr nullable_return_type;

    if (is_numeric_or_string)
    {
        auto type_nullable_nested = makeNullable(type_nested);
        nullable_return_type = std::make_shared<DataTypeArray>(type_nullable_nested);
    }

    const bool nested_is_nullable = type_nested->isNullable();

    for (size_t i = 0; i < num_args; ++i)
    {
        const ColumnWithTypeAndName & arg = block.getByPosition(arguments[i]);
        auto & column = columns[i];

        if (is_numeric_or_string)
        {
            /// Cast to Array(T) or Array(Nullable(T)).
            if (nested_is_nullable)
            {
                if (arg.type->equals(*return_type))
                    column = arg.column;
                else
                    column = castColumn(arg, return_type, context);
            }
            else
            {
                /// If result has array type Array(T) still cast Array(Nullable(U)) to Array(Nullable(T))
                ///  because cannot cast Nullable(T) to T.
                if (arg.type->equals(*return_type) || arg.type->equals(*nullable_return_type))
                    column = arg.column;
                else if (static_cast<const DataTypeArray &>(*arg.type).getNestedType()->isNullable())
                    column = castColumn(arg, nullable_return_type, context);
                else
                    column = castColumn(arg, return_type, context);
            }
        }
        else
        {
            /// return_type_with_nulls is the most common subtype with possible nullable parts.
            if (arg.type->equals(*return_type_with_nulls))
                column = arg.column;
            else
                column = castColumn(arg, return_type_with_nulls, context);
        }
    }

    return columns;
}
Exemplo n.º 5
0
void FunctionCoalesce::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result)
{
    /// coalesce(arg0, arg1, ..., argN) is essentially
    /// multiIf(isNotNull(arg0), assumeNotNull(arg0), isNotNull(arg1), assumeNotNull(arg1), ..., argN)
    /// with constant NULL arguments removed.

    ColumnNumbers filtered_args;
    filtered_args.reserve(arguments.size());
    for (const auto & arg : arguments)
    {
        const auto & type = block.getByPosition(arg).type;

        if (type->onlyNull())
            continue;

        filtered_args.push_back(arg);

        if (!type->isNullable())
            break;
    }

    FunctionIsNotNull is_not_null;
    FunctionAssumeNotNull assume_not_null;
    ColumnNumbers multi_if_args;

    Block temp_block = block;

    for (size_t i = 0; i < filtered_args.size(); ++i)
    {
        size_t res_pos = temp_block.columns();
        bool is_last = i + 1 == filtered_args.size();

        if (is_last)
        {
            multi_if_args.push_back(filtered_args[i]);
        }
        else
        {
            temp_block.insert({nullptr, std::make_shared<DataTypeUInt8>(), ""});
            is_not_null.execute(temp_block, {filtered_args[i]}, res_pos);
            temp_block.insert({nullptr, removeNullable(block.getByPosition(filtered_args[i]).type), ""});
            assume_not_null.execute(temp_block, {filtered_args[i]}, res_pos + 1);

            multi_if_args.push_back(res_pos);
            multi_if_args.push_back(res_pos + 1);
        }
    }

    /// If all arguments appeared to be NULL.
    if (multi_if_args.empty())
    {
        block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(block.rows());
        return;
    }

    if (multi_if_args.size() == 1)
    {
        block.getByPosition(result).column = block.getByPosition(multi_if_args.front()).column;
        return;
    }

    FunctionMultiIf{context}.execute(temp_block, multi_if_args, result);

    ColumnPtr res = std::move(temp_block.getByPosition(result).column);

    /// if last argument is not nullable, result should be also not nullable
    if (!block.getByPosition(multi_if_args.back()).column->isColumnNullable() && res->isColumnNullable())
        res = static_cast<const ColumnNullable &>(*res).getNestedColumnPtr();

    block.getByPosition(result).column = std::move(res);
}
Exemplo n.º 6
0
    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
    {
        /// coalesce(arg0, arg1, ..., argN) is essentially
        /// multiIf(isNotNull(arg0), assumeNotNull(arg0), isNotNull(arg1), assumeNotNull(arg1), ..., argN)
        /// with constant NULL arguments removed.

        ColumnNumbers filtered_args;
        filtered_args.reserve(arguments.size());
        for (const auto & arg : arguments)
        {
            const auto & type = block.getByPosition(arg).type;

            if (type->onlyNull())
                continue;

            filtered_args.push_back(arg);

            if (!type->isNullable())
                break;
        }

        auto is_not_null = FunctionFactory::instance().get("isNotNull", context);
        auto assume_not_null = FunctionFactory::instance().get("assumeNotNull", context);
        auto multi_if = FunctionFactory::instance().get("multiIf", context);

        ColumnNumbers multi_if_args;

        Block temp_block = block;

        for (size_t i = 0; i < filtered_args.size(); ++i)
        {
            size_t res_pos = temp_block.columns();
            bool is_last = i + 1 == filtered_args.size();

            if (is_last)
            {
                multi_if_args.push_back(filtered_args[i]);
            }
            else
            {
                temp_block.insert({nullptr, std::make_shared<DataTypeUInt8>(), ""});
                is_not_null->build({temp_block.getByPosition(filtered_args[i])})->execute(temp_block, {filtered_args[i]}, res_pos, input_rows_count);
                temp_block.insert({nullptr, removeNullable(block.getByPosition(filtered_args[i]).type), ""});
                assume_not_null->build({temp_block.getByPosition(filtered_args[i])})->execute(temp_block, {filtered_args[i]}, res_pos + 1, input_rows_count);

                multi_if_args.push_back(res_pos);
                multi_if_args.push_back(res_pos + 1);
            }
        }

        /// If all arguments appeared to be NULL.
        if (multi_if_args.empty())
        {
            block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(input_rows_count);
            return;
        }

        if (multi_if_args.size() == 1)
        {
            block.getByPosition(result).column = block.getByPosition(multi_if_args.front()).column;
            return;
        }

        ColumnsWithTypeAndName multi_if_args_elems;
        multi_if_args_elems.reserve(multi_if_args.size());
        for (auto column_num : multi_if_args)
            multi_if_args_elems.emplace_back(temp_block.getByPosition(column_num));

        multi_if->build(multi_if_args_elems)->execute(temp_block, multi_if_args, result, input_rows_count);

        ColumnPtr res = std::move(temp_block.getByPosition(result).column);

        /// if last argument is not nullable, result should be also not nullable
        if (!block.getByPosition(multi_if_args.back()).column->isColumnNullable() && res->isColumnNullable())
            res = static_cast<const ColumnNullable &>(*res).getNestedColumnPtr();

        block.getByPosition(result).column = std::move(res);
    }
Exemplo n.º 7
0
    void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
    {
        size_t num_elements = arguments.size();

        if (num_elements == 0)
        {
            /// We should return constant empty array.
            block.getByPosition(result).column = block.getByPosition(result).type->createColumnConstWithDefaultValue(input_rows_count);
            return;
        }

        const DataTypePtr & return_type = block.getByPosition(result).type;
        const DataTypePtr & elem_type = static_cast<const DataTypeArray &>(*return_type).getNestedType();

        size_t block_size = input_rows_count;

        /** If part of columns have not same type as common type of all elements of array,
            *  then convert them to common type.
            * If part of columns are constants,
            *  then convert them to full columns.
            */

        Columns columns_holder(num_elements);
        ColumnRawPtrs columns(num_elements);

        for (size_t i = 0; i < num_elements; ++i)
        {
            const auto & arg = block.getByPosition(arguments[i]);

            ColumnPtr preprocessed_column = arg.column;

            if (!arg.type->equals(*elem_type))
                preprocessed_column = castColumn(arg, elem_type, context);

            preprocessed_column = preprocessed_column->convertToFullColumnIfConst();

            columns_holder[i] = std::move(preprocessed_column);
            columns[i] = columns_holder[i].get();
        }

        /// Create and fill the result array.

        auto out = ColumnArray::create(elem_type->createColumn());
        IColumn & out_data = out->getData();
        IColumn::Offsets & out_offsets = out->getOffsets();

        out_data.reserve(block_size * num_elements);
        out_offsets.resize(block_size);

        IColumn::Offset current_offset = 0;
        for (size_t i = 0; i < block_size; ++i)
        {
            for (size_t j = 0; j < num_elements; ++j)
                out_data.insertFrom(*columns[j], i);

            current_offset += num_elements;
            out_offsets[i] = current_offset;
        }

        block.getByPosition(result).column = std::move(out);
    }
Exemplo n.º 8
0
void FunctionArrayReduce::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count)
{
    IAggregateFunction & agg_func = *aggregate_function.get();
    AlignedBuffer place_holder(agg_func.sizeOfData(), agg_func.alignOfData());
    AggregateDataPtr place = place_holder.data();

    std::unique_ptr<Arena> arena = agg_func.allocatesMemoryInArena() ? std::make_unique<Arena>() : nullptr;

    size_t rows = input_rows_count;

    /// Aggregate functions do not support constant columns. Therefore, we materialize them.
    std::vector<ColumnPtr> materialized_columns;

    const size_t num_arguments_columns = arguments.size() - 1;

    std::vector<const IColumn *> aggregate_arguments_vec(num_arguments_columns);
    const ColumnArray::Offsets * offsets = nullptr;

    for (size_t i = 0; i < num_arguments_columns; ++i)
    {
        const IColumn * col = block.getByPosition(arguments[i + 1]).column.get();
        const ColumnArray::Offsets * offsets_i = nullptr;
        if (const ColumnArray * arr = checkAndGetColumn<ColumnArray>(col))
        {
            aggregate_arguments_vec[i] = &arr->getData();
            offsets_i = &arr->getOffsets();
        }
        else if (const ColumnConst * const_arr = checkAndGetColumnConst<ColumnArray>(col))
        {
            materialized_columns.emplace_back(const_arr->convertToFullColumn());
            const auto & arr = typeid_cast<const ColumnArray &>(*materialized_columns.back().get());
            aggregate_arguments_vec[i] = &arr.getData();
            offsets_i = &arr.getOffsets();
        }
        else
            throw Exception("Illegal column " + col->getName() + " as argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN);

        if (i == 0)
            offsets = offsets_i;
        else if (*offsets_i != *offsets)
            throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.",
                ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH);
    }
    const IColumn ** aggregate_arguments = aggregate_arguments_vec.data();

    MutableColumnPtr result_holder = block.getByPosition(result).type->createColumn();
    IColumn & res_col = *result_holder;

    /// AggregateFunction's states should be inserted into column using specific way
    auto res_col_aggregate_function = typeid_cast<ColumnAggregateFunction *>(&res_col);

    if (!res_col_aggregate_function && agg_func.isState())
        throw Exception("State function " + agg_func.getName() + " inserts results into non-state column "
                        + block.getByPosition(result).type->getName(), ErrorCodes::ILLEGAL_COLUMN);

    ColumnArray::Offset current_offset = 0;
    for (size_t i = 0; i < rows; ++i)
    {
        agg_func.create(place);
        ColumnArray::Offset next_offset = (*offsets)[i];

        try
        {
            for (size_t j = current_offset; j < next_offset; ++j)
                agg_func.add(place, aggregate_arguments, j, arena.get());

            if (!res_col_aggregate_function)
                agg_func.insertResultInto(place, res_col);
            else
                res_col_aggregate_function->insertFrom(place);
        }
        catch (...)
        {
            agg_func.destroy(place);
            throw;
        }

        agg_func.destroy(place);
        current_offset = next_offset;
    }

    block.getByPosition(result).column = std::move(result_holder);
}