void CLASS::processNextRow() { LOG_DEBUG(mLogger, "In OnDemandResultSet::processNextRow()"); ColumnVector::iterator i; int count = 0; for (i = mColumns->begin(); i != mColumns->end(); i++, count++) { ColumnPtr column = *i; string columnName = column->getName(); string result; try { result = mResults->GetString(columnName); } catch (std::invalid_argument &e) { result = ""; LOG_DEBUG(mLogger, str_stream() << e.what() << " -- ignoring"); } SQLSMALLINT type = column->getDataType(); if (type == SQL_LONGVARBINARY || type == SQL_BINARY || type == SQL_VARBINARY) { LOG_DEBUG(mLogger, str_stream() << count << " " << column->getName() << ": [omitting possible binary data]"); } else { LOG_DEBUG(mLogger, str_stream() << count << " " << column->getName() << ": " << result); } if (column->isBound()) { column->setData(count, result); } } }
const PositionListPairPtr ColumnBaseTyped<Type>::nested_loop_join(ColumnPtr join_column_){ assert(join_column_!=NULL); if(join_column_->type()!=typeid(Type)){ std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_ << " and " << join_column_->getName() << std::endl; std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl; exit(-1); } shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(join_column_); //static_cast<IntColumnPtr>(column1); PositionListPairPtr join_tids( new PositionListPair()); join_tids->first = PositionListPtr( new PositionList() ); join_tids->second = PositionListPtr( new PositionList() ); for(unsigned int i=0;i<this->size();i++){ for(unsigned int j=0;j<join_column->size();j++){ if((*this)[i]==(*join_column)[j]){ if(debug) std::cout << "MATCH: (" << i << "," << j << ")" << std::endl; join_tids->first->push_back(i); join_tids->second->push_back(j); } } } return join_tids; }
const PositionListPairPtr ColumnBaseTyped<Type>::sort_merge_join(ColumnPtr join_column_){ if(join_column_->type()!=typeid(Type)){ std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_ << " and " << join_column_->getName() << std::endl; std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl; exit(-1); } shared_pointer_namespace::shared_ptr<ColumnBaseTyped<Type> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<Type> >(join_column_); //static_cast<IntColumnPtr>(column1); PositionListPairPtr join_tids( new PositionListPair()); join_tids->first = PositionListPtr( new PositionList() ); join_tids->second = PositionListPtr( new PositionList() ); return join_tids; }
const PositionListPairPtr ColumnBaseTyped<T>::hash_join(ColumnPtr join_column_){ typedef boost::unordered_multimap < T, TID, boost::hash<T>, std::equal_to<T> > HashTable; if(join_column_->type()!=typeid(T)){ std::cout << "Fatal Error!!! Typemismatch for columns " << this->name_ << " and " << join_column_->getName() << std::endl; std::cout << "File: " << __FILE__ << " Line: " << __LINE__ << std::endl; exit(-1); } shared_pointer_namespace::shared_ptr<ColumnBaseTyped<T> > join_column = shared_pointer_namespace::static_pointer_cast<ColumnBaseTyped<T> >(join_column_); //static_cast<IntColumnPtr>(column1); PositionListPairPtr join_tids( new PositionListPair()); join_tids->first = PositionListPtr( new PositionList() ); join_tids->second = PositionListPtr( new PositionList() ); //create hash table HashTable hashtable; for(unsigned int i=0;i<this->size();i++) hashtable.insert( std::pair<T,TID> ((*this)[i],i) ); //probe larger relation for(unsigned int i=0;i<join_column->size();i++){ std::pair<typename HashTable::iterator, typename HashTable::iterator> range = hashtable.equal_range((*join_column)[i]); for(typename HashTable::iterator it=range.first ; it!=range.second;it++){ if(it->first==(*join_column)[i]){ join_tids->first->push_back(it->second); join_tids->second->push_back(i); //cout << "match! " << it->second << ", " << i << " " << it->first << endl; } } } return join_tids; }
ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type) { if (!column) return column; if (from_type->equals(*to_type)) return column; if (const auto * column_const = typeid_cast<const ColumnConst *>(column.get())) return ColumnConst::create(recursiveLowCardinalityConversion(column_const->getDataColumnPtr(), from_type, to_type), column_const->size()); if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(from_type.get())) { if (to_type->equals(*low_cardinality_type->getDictionaryType())) return column->convertToFullColumnIfLowCardinality(); } if (const auto * low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(to_type.get())) { if (from_type->equals(*low_cardinality_type->getDictionaryType())) { auto col = low_cardinality_type->createColumn(); static_cast<ColumnLowCardinality &>(*col).insertRangeFromFullColumn(*column, 0, column->size()); return std::move(col); } } if (const auto * from_array_type = typeid_cast<const DataTypeArray *>(from_type.get())) { if (const auto * to_array_type = typeid_cast<const DataTypeArray *>(to_type.get())) { const auto * column_array = typeid_cast<const ColumnArray *>(column.get()); if (!column_array) throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(), ErrorCodes::ILLEGAL_COLUMN); auto & nested_from = from_array_type->getNestedType(); auto & nested_to = to_array_type->getNestedType(); return ColumnArray::create( recursiveLowCardinalityConversion(column_array->getDataPtr(), nested_from, nested_to), column_array->getOffsetsPtr()); } } if (const auto * from_tuple_type = typeid_cast<const DataTypeTuple *>(from_type.get())) { if (const auto * to_tuple_type = typeid_cast<const DataTypeTuple *>(to_type.get())) { const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()); if (!column_tuple) throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(), ErrorCodes::ILLEGAL_COLUMN); Columns columns = column_tuple->getColumns(); auto & from_elements = from_tuple_type->getElements(); auto & to_elements = to_tuple_type->getElements(); for (size_t i = 0; i < columns.size(); ++i) { auto & element = columns[i]; element = recursiveLowCardinalityConversion(element, from_elements.at(i), to_elements.at(i)); } return ColumnTuple::create(columns); } } throw Exception("Cannot convert: " + from_type->getName() + " to " + to_type->getName(), ErrorCodes::TYPE_MISMATCH); }
Block MergeTreeBaseBlockInputStream::readFromPart() { Block res; if (task->size_predictor) task->size_predictor->startBlock(); if (prewhere_actions) { do { /// Let's read the full block of columns needed to calculate the expression in PREWHERE. size_t space_left = std::max(1LU, max_block_size_marks); MarkRanges ranges_to_read; if (task->size_predictor) { /// FIXME: size prediction model is updated by filtered rows, but it predicts size of unfiltered rows also size_t recommended_marks = task->size_predictor->estimateNumMarks(preferred_block_size_bytes, storage.index_granularity); if (res && recommended_marks < 1) break; space_left = std::min(space_left, std::max(1LU, recommended_marks)); } while (!task->mark_ranges.empty() && space_left && !isCancelled()) { auto & range = task->mark_ranges.back(); size_t marks_to_read = std::min(range.end - range.begin, space_left); pre_reader->readRange(range.begin, range.begin + marks_to_read, res); ranges_to_read.emplace_back(range.begin, range.begin + marks_to_read); space_left -= marks_to_read; range.begin += marks_to_read; if (range.begin == range.end) task->mark_ranges.pop_back(); } /// In case of isCancelled. if (!res) return res; progressImpl({ res.rows(), res.bytes() }); pre_reader->fillMissingColumns(res, task->ordered_names, task->should_reorder); /// Compute the expression in PREWHERE. prewhere_actions->execute(res); ColumnPtr column = res.getByName(prewhere_column).column; if (task->remove_prewhere_column) res.erase(prewhere_column); const auto pre_bytes = res.bytes(); ColumnPtr observed_column; if (column->isNullable()) { ColumnNullable & nullable_col = static_cast<ColumnNullable &>(*column); observed_column = nullable_col.getNestedColumn(); } else observed_column = column; /** If the filter is a constant (for example, it says PREWHERE 1), * then either return an empty block, or return the block unchanged. */ if (const auto column_const = typeid_cast<const ColumnConstUInt8 *>(observed_column.get())) { if (!column_const->getData()) { res.clear(); return res; } for (const auto & range : ranges_to_read) reader->readRange(range.begin, range.end, res); progressImpl({ 0, res.bytes() - pre_bytes }); } else if (const auto column_vec = typeid_cast<const ColumnUInt8 *>(observed_column.get())) { size_t index_granularity = storage.index_granularity; const auto & pre_filter = column_vec->getData(); IColumn::Filter post_filter(pre_filter.size()); /// Let's read the rest of the columns in the required segments and compose our own filter for them. size_t pre_filter_pos = 0; size_t post_filter_pos = 0; for (const auto & range : ranges_to_read) { auto begin = range.begin; auto pre_filter_begin_pos = pre_filter_pos; for (auto mark = range.begin; mark <= range.end; ++mark) { UInt8 nonzero = 0; if (mark != range.end) { const size_t limit = std::min(pre_filter.size(), pre_filter_pos + index_granularity); for (size_t row = pre_filter_pos; row < limit; ++row) nonzero |= pre_filter[row]; } if (!nonzero) { if (mark > begin) { memcpy( &post_filter[post_filter_pos], &pre_filter[pre_filter_begin_pos], pre_filter_pos - pre_filter_begin_pos); post_filter_pos += pre_filter_pos - pre_filter_begin_pos; reader->readRange(begin, mark, res); } begin = mark + 1; pre_filter_begin_pos = std::min(pre_filter_pos + index_granularity, pre_filter.size()); } if (mark < range.end) pre_filter_pos = std::min(pre_filter_pos + index_granularity, pre_filter.size()); } } if (!post_filter_pos) { res.clear(); continue; } progressImpl({ 0, res.bytes() - pre_bytes }); post_filter.resize(post_filter_pos); /// Filter the columns related to PREWHERE using pre_filter, /// other columns - using post_filter. size_t rows = 0; for (const auto i : ext::range(0, res.columns())) { auto & col = res.safeGetByPosition(i); if (col.name == prewhere_column && res.columns() > 1) continue; col.column = col.column->filter(task->column_name_set.count(col.name) ? post_filter : pre_filter, -1); rows = col.column->size(); } /// Replace column with condition value from PREWHERE to a constant. if (!task->remove_prewhere_column) res.getByName(prewhere_column).column = std::make_shared<ColumnConstUInt8>(rows, 1); } else throw Exception{ "Illegal type " + column->getName() + " of column for filter. Must be ColumnUInt8 or ColumnConstUInt8.", ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER }; if (res) { if (task->size_predictor) task->size_predictor->update(res); reader->fillMissingColumnsAndReorder(res, task->ordered_names); } } while (!task->mark_ranges.empty() && !res && !isCancelled()); } else { size_t space_left = std::max(1LU, max_block_size_marks); while (!task->mark_ranges.empty() && space_left && !isCancelled()) { auto & range = task->mark_ranges.back(); size_t marks_to_read = std::min(range.end - range.begin, space_left); if (task->size_predictor) { size_t recommended_marks = task->size_predictor->estimateNumMarks(preferred_block_size_bytes, storage.index_granularity); if (res && recommended_marks < 1) break; marks_to_read = std::min(marks_to_read, std::max(1LU, recommended_marks)); } reader->readRange(range.begin, range.begin + marks_to_read, res); if (task->size_predictor) task->size_predictor->update(res); space_left -= marks_to_read; range.begin += marks_to_read; if (range.begin == range.end) task->mark_ranges.pop_back(); } /// In the case of isCancelled. if (!res) return res; progressImpl({ res.rows(), res.bytes() }); reader->fillMissingColumns(res, task->ordered_names, task->should_reorder); } return res; }