void CLASS::processNextRow()
{
    LOG_DEBUG(mLogger, "In OnDemandResultSet::processNextRow()");

    ColumnVector::iterator i;
    int count = 0;
    for (i = mColumns->begin(); i != mColumns->end(); i++, count++)
    {
        ColumnPtr column = *i;
        string columnName = column->getName();
        string result;
        try
        {
            result = mResults->GetString(columnName);
        }
        catch (std::invalid_argument &e)
        {
            result = "";
            LOG_DEBUG(mLogger, str_stream() << e.what() << " -- ignoring");
        }

        SQLSMALLINT type = column->getDataType();
        if (type == SQL_LONGVARBINARY || type == SQL_BINARY ||
            type == SQL_VARBINARY)
        {
            LOG_DEBUG(mLogger, str_stream() << count << " " <<
                      column->getName() << ": [omitting possible binary data]");
        }
        else
        {
            LOG_DEBUG(mLogger, str_stream() << count << " " <<
                      column->getName() << ": " << result);
        }

        if (column->isBound())
        {
            column->setData(count, result);
        }
    }
}
	const PositionListPairPtr ColumnBaseTyped<Type>::nested_loop_join(ColumnPtr join_column_){
				assert(join_column_!=NULL);
				if(join_column_->type()!=typeid(Type)){
					std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_  << " and " << join_column_->getName() << std::endl;
					std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl;
					exit(-1);
				}
				
				shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(join_column_); //static_cast<IntColumnPtr>(column1);

				PositionListPairPtr join_tids( new PositionListPair());
				join_tids->first = PositionListPtr( new PositionList() );
				join_tids->second = PositionListPtr( new PositionList() );

		for(unsigned int i=0;i<this->size();i++){
			for(unsigned int j=0;j<join_column->size();j++){
				if((*this)[i]==(*join_column)[j]){
					if(debug) std::cout << "MATCH: (" << i << "," << j << ")" << std::endl;
					join_tids->first->push_back(i);
					join_tids->second->push_back(j);
				}
			}
		}

		return join_tids;
	}
	const PositionListPairPtr ColumnBaseTyped<Type>::sort_merge_join(ColumnPtr join_column_){

				if(join_column_->type()!=typeid(Type)){
					std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_  << " and " << join_column_->getName() << std::endl;
					std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl;
					exit(-1);
				}
				
				shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(join_column_); //static_cast<IntColumnPtr>(column1);

				PositionListPairPtr join_tids( new PositionListPair());
				join_tids->first = PositionListPtr( new PositionList() );
				join_tids->second = PositionListPtr( new PositionList() );

				return join_tids;
	}
	const PositionListPairPtr ColumnBaseTyped<T>::hash_join(ColumnPtr join_column_){

		typedef boost::unordered_multimap < T, TID, boost::hash<T>, std::equal_to<T> > HashTable;

				if(join_column_->type()!=typeid(T)){
					std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_  << " and " << join_column_->getName() << std::endl;
					std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl;
					exit(-1);
				}
				
				shared_pointer_namespace::shared_ptr<ColumnBaseTyped<T> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<T> >(join_column_); //static_cast<IntColumnPtr>(column1);

				PositionListPairPtr join_tids( new PositionListPair());
				join_tids->first = PositionListPtr( new PositionList() );
				join_tids->second = PositionListPtr( new PositionList() );


	//create hash table
	HashTable hashtable;
	for(unsigned int i=0;i<this->size();i++)	
		hashtable.insert(
								std::pair<T,TID> ((*this)[i],i)
					);

	//probe larger relation
	for(unsigned int i=0;i<join_column->size();i++){
		std::pair<typename HashTable::iterator, typename HashTable::iterator> range =  hashtable.equal_range((*join_column)[i]);
		for(typename HashTable::iterator it=range.first ; it!=range.second;it++){
			if(it->first==(*join_column)[i]){
				join_tids->first->push_back(it->second);
				join_tids->second->push_back(i);
				//cout << "match! " << it->second << ", " << i << "	"  << it->first << endl;
			}
		}
	}

		return join_tids;
	}
ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type)
{
    if (!column)
        return column;

    if (from_type->equals(*to_type))
        return column;

    if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get()))
        return ColumnConst::create(recursiveLowCardinalityConversion(column_const->getDataColumnPtr(), from_type, to_type),
                                   column_const->size());

    if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(from_type.get()))
    {
        if (to_type->equals(*low_cardinality_type->getDictionaryType()))
            return column->convertToFullColumnIfLowCardinality();
    }

    if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(to_type.get()))
    {
        if (from_type->equals(*low_cardinality_type->getDictionaryType()))
        {
            auto col = low_cardinality_type->createColumn();
            static_cast<ColumnLowCardinality &>(*col).insertRangeFromFullColumn(*column, 0, column->size());
            return std::move(col);
        }
    }

    if (const auto * from_array_type = typeid_cast<const DataTypeArray *>(from_type.get()))
    {
        if (const auto * to_array_type = typeid_cast<const DataTypeArray *>(to_type.get()))
        {
            const auto * column_array = typeid_cast<const ColumnArray *>(column.get());
            if (!column_array)
                throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(),
                                ErrorCodes::ILLEGAL_COLUMN);

            auto & nested_from = from_array_type->getNestedType();
            auto & nested_to = to_array_type->getNestedType();

            return ColumnArray::create(
                    recursiveLowCardinalityConversion(column_array->getDataPtr(), nested_from, nested_to),
                    column_array->getOffsetsPtr());
        }
    }

    if (const auto * from_tuple_type = typeid_cast<const DataTypeTuple *>(from_type.get()))
    {
        if (const auto * to_tuple_type = typeid_cast<const DataTypeTuple *>(to_type.get()))
        {
            const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get());
            if (!column_tuple)
                throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(),
                                ErrorCodes::ILLEGAL_COLUMN);

            Columns columns = column_tuple->getColumns();
            auto & from_elements = from_tuple_type->getElements();
            auto & to_elements = to_tuple_type->getElements();
            for (size_t i = 0; i < columns.size(); ++i)
            {
                auto & element = columns[i];
                element = recursiveLowCardinalityConversion(element, from_elements.at(i), to_elements.at(i));
            }
            return ColumnTuple::create(columns);
        }
    }

    throw Exception("Cannot convert: " + from_type->getName() + " to " + to_type->getName(), ErrorCodes::TYPE_MISMATCH);
}
Beispiel #6
0
Block MergeTreeBaseBlockInputStream::readFromPart()
{
    Block res;

    if (task->size_predictor)
        task->size_predictor->startBlock();

    if (prewhere_actions)
    {
        do
        {
            /// Let's read the full block of columns needed to calculate the expression in PREWHERE.
            size_t space_left = std::max(1LU, max_block_size_marks);
            MarkRanges ranges_to_read;

            if (task->size_predictor)
            {
                /// FIXME: size prediction model is updated by filtered rows, but it predicts size of unfiltered rows also

                size_t recommended_marks = task->size_predictor->estimateNumMarks(preferred_block_size_bytes, storage.index_granularity);
                if (res && recommended_marks < 1)
                    break;

                space_left = std::min(space_left, std::max(1LU, recommended_marks));
            }

            while (!task->mark_ranges.empty() && space_left && !isCancelled())
            {
                auto & range = task->mark_ranges.back();
                size_t marks_to_read = std::min(range.end - range.begin, space_left);

                pre_reader->readRange(range.begin, range.begin + marks_to_read, res);

                ranges_to_read.emplace_back(range.begin, range.begin + marks_to_read);
                space_left -= marks_to_read;
                range.begin += marks_to_read;
                if (range.begin == range.end)
                    task->mark_ranges.pop_back();
            }

            /// In case of isCancelled.
            if (!res)
                return res;

            progressImpl({ res.rows(), res.bytes() });
            pre_reader->fillMissingColumns(res, task->ordered_names, task->should_reorder);

            /// Compute the expression in PREWHERE.
            prewhere_actions->execute(res);

            ColumnPtr column = res.getByName(prewhere_column).column;
            if (task->remove_prewhere_column)
                res.erase(prewhere_column);

            const auto pre_bytes = res.bytes();

            ColumnPtr observed_column;
            if (column->isNullable())
            {
                ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*column);
                observed_column = nullable_col.getNestedColumn();
            }
            else
                observed_column = column;

            /** If the filter is a constant (for example, it says PREWHERE 1),
                * then either return an empty block, or return the block unchanged.
                */
            if (const auto column_const = typeid_cast<const ColumnConstUInt8 *>(observed_column.get()))
            {
                if (!column_const->getData())
                {
                    res.clear();
                    return res;
                }

                for (const auto & range : ranges_to_read)
                    reader->readRange(range.begin, range.end, res);

                progressImpl({ 0, res.bytes() - pre_bytes });
            }
            else if (const auto column_vec = typeid_cast<const ColumnUInt8 *>(observed_column.get()))
            {
                size_t index_granularity = storage.index_granularity;

                const auto & pre_filter = column_vec->getData();
                IColumn::Filter post_filter(pre_filter.size());

                /// Let's read the rest of the columns in the required segments and compose our own filter for them.
                size_t pre_filter_pos = 0;
                size_t post_filter_pos = 0;

                for (const auto & range : ranges_to_read)
                {
                    auto begin = range.begin;
                    auto pre_filter_begin_pos = pre_filter_pos;

                    for (auto mark = range.begin; mark <= range.end; ++mark)
                    {
                        UInt8 nonzero = 0;

                        if (mark != range.end)
                        {
                            const size_t limit = std::min(pre_filter.size(), pre_filter_pos + index_granularity);
                            for (size_t row = pre_filter_pos; row < limit; ++row)
                                nonzero |= pre_filter[row];
                        }

                        if (!nonzero)
                        {
                            if (mark > begin)
                            {
                                memcpy(
                                    &post_filter[post_filter_pos],
                                    &pre_filter[pre_filter_begin_pos],
                                    pre_filter_pos - pre_filter_begin_pos);
                                post_filter_pos += pre_filter_pos - pre_filter_begin_pos;
                                reader->readRange(begin, mark, res);
                            }
                            begin = mark + 1;
                            pre_filter_begin_pos = std::min(pre_filter_pos + index_granularity, pre_filter.size());
                        }

                        if (mark < range.end)
                            pre_filter_pos = std::min(pre_filter_pos + index_granularity, pre_filter.size());
                    }
                }

                if (!post_filter_pos)
                {
                    res.clear();
                    continue;
                }

                progressImpl({ 0, res.bytes() - pre_bytes });

                post_filter.resize(post_filter_pos);

                /// Filter the columns related to PREWHERE using pre_filter,
                ///  other columns - using post_filter.
                size_t rows = 0;
                for (const auto i : ext::range(0, res.columns()))
                {
                    auto & col = res.safeGetByPosition(i);
                    if (col.name == prewhere_column && res.columns() > 1)
                        continue;
                    col.column =
                        col.column->filter(task->column_name_set.count(col.name) ? post_filter : pre_filter, -1);
                    rows = col.column->size();
                }

                /// Replace column with condition value from PREWHERE to a constant.
                if (!task->remove_prewhere_column)
                    res.getByName(prewhere_column).column = std::make_shared<ColumnConstUInt8>(rows, 1);
            }
            else
                throw Exception{
                    "Illegal type " + column->getName() + " of column for filter. Must be ColumnUInt8 or ColumnConstUInt8.",
                    ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER
                };

            if (res)
            {
                if (task->size_predictor)
                    task->size_predictor->update(res);

                reader->fillMissingColumnsAndReorder(res, task->ordered_names);
            }
        }
        while (!task->mark_ranges.empty() && !res && !isCancelled());
    }
    else
    {
        size_t space_left = std::max(1LU, max_block_size_marks);

        while (!task->mark_ranges.empty() && space_left && !isCancelled())
        {
            auto & range = task->mark_ranges.back();

            size_t marks_to_read = std::min(range.end - range.begin, space_left);
            if (task->size_predictor)
            {
                size_t recommended_marks = task->size_predictor->estimateNumMarks(preferred_block_size_bytes, storage.index_granularity);
                if (res && recommended_marks < 1)
                    break;

                marks_to_read = std::min(marks_to_read, std::max(1LU, recommended_marks));
            }

            reader->readRange(range.begin, range.begin + marks_to_read, res);

            if (task->size_predictor)
                task->size_predictor->update(res);

            space_left -= marks_to_read;
            range.begin += marks_to_read;
            if (range.begin == range.end)
                task->mark_ranges.pop_back();
        }

        /// In the case of isCancelled.
        if (!res)
            return res;

        progressImpl({ res.rows(), res.bytes() });
        reader->fillMissingColumns(res, task->ordered_names, task->should_reorder);
    }

    return res;
}