storage::atable_ptr_t GroupByScan::createResultTableLayout() { metadata_list metadata; std::vector<AbstractTable::SharedDictionaryPtr> dictionaries; //creating fields from grouping fields storage::atable_ptr_t group_tab = getInputTable(0)->copy_structure_modifiable(&_field_definition); //creating fields from aggregate functions for (const auto & fun: _aggregate_functions) { ColumnMetadata *m = new ColumnMetadata(fun->columnName(getInputTable(0)->nameOfColumn(fun->getField())), fun->getType()); metadata.push_back(m); dictionaries.push_back(AbstractDictionary::dictionaryWithType<DictionaryFactory<OrderIndifferentDictionary> >(fun->getType())); } storage::atable_ptr_t agg_tab = std::make_shared<Table<DEFAULT_STRATEGY>>(&metadata, &dictionaries, 0, false); //Clean the metadata for (auto e : metadata) delete e; std::vector<storage::atable_ptr_t> vc; if (_field_definition.size() == 0 && _aggregate_functions.size() != 0) { return agg_tab; } else if (_field_definition.size() != 0 && _aggregate_functions.size() == 0) { return group_tab; } else { vc.push_back(group_tab); vc.push_back(agg_tab); storage::atable_ptr_t result = std::make_shared<MutableVerticalTable>(vc); return result; } }
void MergePrefixSum::executePlanOperation() { if (input.numberOfTables() == 1) { addResult(getInputTable()); return; } const auto resultSize = getInputTable()->size(); std::vector<storage::ColumnMetadata> meta {storage::ColumnMetadata::metadataFromString(types::integer_name, "count")}; auto result = std::make_shared<storage::Table>(&meta, nullptr, resultSize, true, false); result->resize(resultSize); const auto &res_vec = getDataVector(result).first; std::vector<std::shared_ptr<storage::FixedLengthVector<value_id_t>>> vecs; for(size_t i=0, stop=input.numberOfTables(); i < stop; ++i) { vecs.emplace_back(getDataVector(getInputTable(i)).first); } for(size_t i=0; i < resultSize; ++i) { value_id_t pos = std::numeric_limits<value_id_t>::max(); for(size_t j=0, stop=vecs.size(); j < stop; ++j) { auto tmp = vecs[j]->get(0,i); pos = tmp < pos ? tmp : pos; } res_vec->set(0, i, pos); } addResult(result); }
// calculates the prefix Sum for a given table void PrefixSum::executePlanOperation() { // get attribute vector of input table const auto &in = getInputTable(); const size_t table_size = in->size(); // get attribute vector of output table std::vector<storage::ColumnMetadata> metadata; metadata.push_back(in->metadataAt(0)); auto output = std::make_shared<storage::Table>(&metadata, nullptr, table_size, true, false); output->resize(table_size); const auto &oavs = output->getAttributeVectors(0); auto ovector = std::dynamic_pointer_cast<storage::FixedLengthVector<value_id_t>>(oavs.at(0).attribute_vector); // Build ivector list to avoid lock contention while getting the vectors const size_t ivec_size = input.numberOfTables(); std::vector<vec_ref_t> ivecs; for(size_t i=0; i < input.numberOfTables(); ++i) { ivecs.emplace_back(getDataVector(getInputTable(i)).first); } // calculate the prefix sum based on the index and the number of inputs // we need to look at to calculate the correct offset value_id_t sum = 0; for(size_t i=0; i < table_size; ++i) { sum += sumForIndex(ivec_size, ivecs, i); ovector->set(0, i, sum + sumForIndexPrev(ivec_size, ivecs, i)); } addResult(output); }
void PipeliningHashBuild::executePlanOperation() { // if no input is available, do nothing. if (input.sizeOf<storage::AbstractTable>() == 0) { return; } size_t row_offset = 0; // check if table is a TableRangeView; if yes, provide the offset to HashTable auto input = std::dynamic_pointer_cast<const storage::TableRangeView>(getInputTable()); if (input) row_offset = input->getStart(); if (_key == "groupby" || _key == "selfjoin") { if (_field_definition.size() == 1) emitChunk(std::make_shared<storage::SingleAggregateHashTable>(getInputTable(), _field_definition, row_offset)); else emitChunk(std::make_shared<storage::AggregateHashTable>(getInputTable(), _field_definition, row_offset)); } else if (_key == "join") { if (_field_definition.size() == 1) emitChunk(std::make_shared<storage::SingleJoinHashTable>(getInputTable(), _field_definition, row_offset)); else emitChunk(std::make_shared<storage::JoinHashTable>(getInputTable(), _field_definition, row_offset)); } else { throw std::runtime_error("Type in Plan operation HashBuild not supported; key: " + _key); } }
void SmallestTableScan::executePlanOperation() { auto smallestTable = getInputTable(0); for (size_t i = 1; i < input.numberOfTables(); ++i) { auto nextTable = getInputTable(i); if (nextTable->size() < smallestTable->size()) { smallestTable = nextTable; } } addResult(smallestTable); }
void GroupByScan::writeGroupResult(storage::atable_ptr_t &resultTab, const std::shared_ptr<storage::pos_list_t> &hit, const size_t row) { for (const auto & columnNr: _field_definition) { storage::write_group_functor fun(getInputTable(0), resultTab, hit->at(0), (size_t)columnNr, row); storage::type_switch<hyrise_basic_types> ts; ts(getInputTable(0)->typeOfColumn(columnNr), fun); } for (const auto & funct: _aggregate_functions) { funct->processValuesForRows(getInputTable(0), hit.get(), resultTab, row); } }
void RadixCluster2ndPass::executePlanOperation() { // ProfilerStart("RadixCluster.prof"); assert(_bits1 != 0); assert(_bits2 != 0); const auto& tab = getInputTable(); auto tableSize = getInputTable()->size(); auto result = getInputTable(1); // Get the prefix sum from the input const auto& in_data = getFixedDataVector(getInputTable(2)).first; auto prefix = std::dynamic_pointer_cast<storage::AbstractFixedLengthVector<value_id_t>>(in_data->copy()); // Cast the vectors to the lowest part in the hierarchy auto data_hash = getFixedDataVector(result).first; auto data_pos = getFixedDataVector(result, 1).first; // Get the check data const auto& rx_hashes = getFixedDataVector(tab).first; const auto& rx_pos = getFixedDataVector(tab, 1).first; auto mask1 = ((1 << _bits1) - 1) << _significantOffset1; auto mask2 = ((1 << _bits2) - 1) << _significantOffset2; size_t _start = 0, _stop = tableSize; if (_count > 0) { _start = (tableSize / _count) * _part; _stop = (_count - 1) == _part ? tableSize : (tableSize / _count) * (_part + 1); } // Iterate over the first pass radix clustered table and write the // newly clustered results for (size_t row = _start; row < _stop; ++row) { const auto hash_value = rx_hashes->get(0, row); const auto part1 = (hash_value & mask1) >> _significantOffset1; const auto part2 = (hash_value & mask2) >> _significantOffset2; const auto lookup = part1 * (1 << _bits2) + part2; const auto pos_to_write = prefix->inc(0, lookup); data_hash->set(0, pos_to_write, hash_value); data_pos->set(0, pos_to_write, rx_pos->get(0, row)); } // ProfilerStop(); addResult(result); }
void GroupByScan::setupPlanOperation() { _PlanOperation::setupPlanOperation(); const auto &t = getInputTable(0); for (const auto & function: _aggregate_functions) { function->walk(*t); } }
string AbstractPlanNode::debug(const string& spacer) const { std::ostringstream buffer; buffer << spacer << "* " << debug() << "\n"; std::string info_spacer = spacer + " |"; buffer << debugInfo(info_spacer); // // Inline PlanNodes // if (!m_inlineNodes.empty()) { buffer << info_spacer << "Inline Plannodes: " << m_inlineNodes.size() << "\n"; string internal_spacer = info_spacer + " "; map<PlanNodeType, AbstractPlanNode*>::const_iterator it; for (it = m_inlineNodes.begin(); it != m_inlineNodes.end(); it++) { buffer << info_spacer << "Inline " << planNodeToString(it->second->getPlanNodeType()) << ":\n"; buffer << it->second->debugInfo(internal_spacer); } } // // Output table // Table* outputTable = getOutputTable(); buffer << info_spacer << "Output table:\n"; if (outputTable != NULL) { buffer << outputTable->debug(spacer + " "); } else { buffer << " " << info_spacer << "<NULL>\n"; } // // Input tables // for (int i = 0; i < getInputTableCount(); ++i) { Table* inputTable = getInputTable(i); buffer << info_spacer << "Input table " << i << ":\n"; if (inputTable != NULL) { buffer << inputTable->debug(spacer + " "); } else { buffer << " " << info_spacer << "<NULL>\n"; } } // // Traverse the tree // string child_spacer = spacer + " "; for (int ctr = 0, cnt = static_cast<int>(m_children.size()); ctr < cnt; ctr++) { buffer << child_spacer << m_children[ctr]->getPlanNodeType() << "\n"; buffer << m_children[ctr]->debug(child_spacer); } return (buffer.str()); }
void PipeliningTableScan::executePlanOperation() { // TODO can we take this and move it into the PipelineObserver interface? if (!getInputTable()) { return; } size_t start, stop; const auto& tablerange = std::dynamic_pointer_cast<const storage::TableRangeView>(getInputTable()); if (tablerange) { start = tablerange->getStart(); stop = start + tablerange->size(); } else { start = 0; stop = getInputTable()->size(); } // When the input is 0, dont bother trying to generate results pos_list_t* positions = new pos_list_t(); if (stop - start > 0) // scan in 100K chunks for (size_t chunk = 0; chunk < ((stop - start) / 100000 + 1); ++chunk) { size_t partial_start = start + chunk * 100000; size_t partial_stop = std::min(partial_start + 100000, stop); _expr->match(positions, partial_start, partial_stop); if (positions->size() >= _chunkSize) { createAndEmitChunk(positions); positions = new pos_list_t(); } } else { createAndEmitChunk(positions); } // emit final chunk if (positions->size()) { createAndEmitChunk(positions); } }
void GroupByScan::executePlanOperation() { if ((_field_definition.size() != 0) && (input.numberOfHashTables() >= 1)) { if (_field_definition.size() == 1) return executeGroupBy<SingleAggregateHashTable, aggregate_single_hash_map_t, aggregate_single_key_t>(); else return executeGroupBy<AggregateHashTable, aggregate_hash_map_t, aggregate_key_t>(); } else { auto resultTab = createResultTableLayout(); resultTab->resize(1); for (const auto & funct: _aggregate_functions) { funct->processValuesForRows(getInputTable(0), nullptr, resultTab, 0); } this->addResult(resultTab); } }
void PagedIndexScan::executePlanOperation() { auto start_time = std::chrono::high_resolution_clock::now(); hyrise::io::StorageManager *sm = hyrise::io::StorageManager::getInstance(); auto idx = sm->getInvertedIndex(_indexName); // Handle type of index and value storage::type_switch<hyrise_basic_types> ts; ScanPagedIndexFunctor fun(_value, idx, _field_definition[0], getInputTable()); storage::pos_list_t *pos = ts(input.getTable(0)->typeOfColumn(_field_definition[0]), fun); addResult(hyrise::storage::PointerCalculator::create(input.getTable(0), pos)); auto end_time = std::chrono::high_resolution_clock::now(); std::cout << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << " us (PagedIndexScan)" << std::endl; }
void PipeliningTableScan::setupPlanOperation() { const auto& table = getInputTable(); // TODO can this also be moved in the PipelineObserver interface? if (!table) { return; } auto tablerange = std::dynamic_pointer_cast<const storage::TableRangeView>(table); if (tablerange) { _table = tablerange->getActualTable(); _expr->walk({tablerange->getActualTable()}); } else { _table = table; _expr->walk({table}); } }
storage::c_atable_ptr_t HashJoinProbe::getProbeTable() const { return getInputTable(); }
void Barrier::executePlanOperation() { for(size_t i=0; i < _field_definition.size(); ++i) addResult(getInputTable(i)); }
storage::c_atable_ptr_t PipeliningHashProbe::getProbeTable() const { return getInputTable(); }
const PlanOperation* PlanOperation::execute() { const bool recordPerformance = _performance_attr != nullptr; // Check if we really need this epoch_t startTime = 0; if (recordPerformance) startTime = get_epoch_nanoseconds(); PapiTracer pt; // Start the execution refreshInput(); setupPlanOperation(); if (recordPerformance) { pt.addEvent("PAPI_TOT_CYC"); pt.addEvent(getEvent()); pt.start(); } executePlanOperation(); if (recordPerformance) pt.stop(); teardownPlanOperation(); if (recordPerformance) { epoch_t endTime = get_epoch_nanoseconds(); std::string threadId = boost::lexical_cast<std::string>(std::this_thread::get_id()); size_t cardinality; unsigned core = getCurrentCore(); unsigned node = getCurrentNode(); std::optional<size_t> in_size; if (const auto& in = getInputTable()) { in_size = in->size(); } std::optional<size_t> out_size; if (const auto& out = getResultTable()) { out_size = out->size(); } if (getResultTable() != empty_result) cardinality = getResultTable()->size(); else // the cardinality is max(size_t) by convention if there is no return table cardinality = std::numeric_limits<size_t>::max(); *_performance_attr = (performance_attributes_t) {pt.value("PAPI_TOT_CYC"), pt.value(getEvent()), getEvent(), planOperationName(), _operatorId, startTime, endTime, threadId, cardinality, core, node, in_size, out_size}; } setState(OpSuccess); return this; }
void MergeStore::executePlanOperation() { auto t = checked_pointer_cast<const storage::Store>(getInputTable()); auto store = std::const_pointer_cast<storage::Store>(t); store->merge(); addResult(store); }