unsigned long Analyzer::getDistinctCardinality(const AttributeID& attr_id) { LogicalOperator* scan = new LogicalScan( Catalog::getInstance()->getTable(attr_id.table_id)->getProjectoin(0)); std::vector<Attribute> group_by_attributes; group_by_attributes.push_back( Catalog::getInstance()->getTable(attr_id.table_id)->getAttribute( attr_id.offset)); LogicalOperator* agg = new LogicalAggregation( group_by_attributes, std::vector<Attribute>(), std::vector<PhysicalAggregation::State::Aggregation>(), scan); std::vector<Attribute> aggregation_attributes; aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY)); std::vector<PhysicalAggregation::State::Aggregation> aggregation_function; aggregation_function.push_back(PhysicalAggregation::State::kCount); LogicalOperator* count_agg = new LogicalAggregation(std::vector<Attribute>(), aggregation_attributes, aggregation_function, agg); LogicalOperator* root = new LogicalQueryPlanRoot( 0, count_agg, LogicalQueryPlanRoot::kResultCollector); PhysicalOperatorBase* collector = root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned)); collector->Open(); collector->Next(0); collector->Close(); ResultSet* resultset = collector->GetResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase::BlockStreamTraverseIterator* b_it = it.nextBlock()->createIterator(); const unsigned long distinct_cardinality = *(unsigned long*)b_it->nextTuple(); resultset->destory(); collector->~PhysicalOperatorBase(); root->~LogicalOperator(); return distinct_cardinality; }
void Analyzer::compute_table_stat(const TableID& tab_id) { TableDescriptor* table = Catalog::getInstance()->getTable(tab_id); LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); std::vector<Attribute> group_by_attributes; std::vector<Attribute> aggregation_attributes; aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY)); std::vector<PhysicalAggregation::State::Aggregation> aggregation_function; aggregation_function.push_back(PhysicalAggregation::State::kCount); LogicalOperator* agg = new LogicalAggregation( group_by_attributes, aggregation_attributes, aggregation_function, scan); LogicalOperator* root = new LogicalQueryPlanRoot(0, agg, LogicalQueryPlanRoot::kResultCollector); PhysicalOperatorBase* collector = root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned)); collector->Open(); collector->Next(0); collector->Close(); ResultSet* resultset = collector->GetResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase::BlockStreamTraverseIterator* b_it = it.nextBlock()->createIterator(); const unsigned long tuple_count = *(unsigned long*)b_it->nextTuple(); BlockStreamBase* block; while (block = it.nextBlock()) { BlockStreamBase::BlockStreamTraverseIterator* b_it = block->createIterator(); } TableStatistic* tab_stat = new TableStatistic(); tab_stat->number_of_tuples_ = tuple_count; printf("Statistics for table %s is gathered!\n", Catalog::getInstance()->getTable(tab_id)->getTableName().c_str()); tab_stat->print(); StatManager::getInstance()->setTableStatistic(tab_id, tab_stat); resultset->destory(); root->~LogicalOperator(); }
void Analyzer::analyse(const AttributeID &attrID) { Catalog *catalog = Catalog::getInstance(); TableDescriptor* table = catalog->getTable(attrID.table_id); ProjectionDescriptor * projection = NULL; unsigned pidSize = table->getNumberOfProjection(); const Attribute attr = table->getAttribute(attrID.offset); for (unsigned i = 0; i < pidSize; ++i) { if (table->getProjectoin(i)->hasAttribute(attr)) { projection = table->getProjectoin(i); break; } } std::vector<Attribute> group_by_attributes; std::vector<Attribute> aggregation_attributes; group_by_attributes.push_back(attr); aggregation_attributes.push_back(attr); std::vector<BlockStreamAggregationIterator::State::aggregation> aggregation_function; aggregation_function.push_back( BlockStreamAggregationIterator::State::count); LogicalOperator* sb_payload_scan = new LogicalScan(projection); LogicalOperator* aggregation = new Aggregation(group_by_attributes, aggregation_attributes, aggregation_function, sb_payload_scan); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot(collector_node_id, aggregation, LogicalQueryPlanRoot::RESULTCOLLECTOR); BlockStreamIteratorBase* collector = root->getIteratorTree( 1024 * 64 - sizeof(unsigned)); collector->open(); collector->next(0); collector->close(); ResultSet* resultset = collector->getResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase* block; void* tuple; BlockStreamBase::BlockStreamTraverseIterator *block_it; unsigned long valueCount = resultset->getNumberOftuples(); unsigned long tupleCount = 0; TuplePtr *list = new TuplePtr[valueCount]; unsigned long i = 0; while (block = (BlockStreamBase*) it.atomicNextBlock()) { block_it = block->createIterator(); while (tuple = block_it->nextTuple()) { list[i++] = tuple; tupleCount += getFrequency(tuple, attr.attrType); } } int magicNumber = 100; StatisticOnTable *stat = new StatisticOnTable(magicNumber); stat->setValueCount(valueCount); stat->setTupleCount(tupleCount); qsort_r(list, valueCount, sizeof(void *), compare, (void *) (attr.attrType->operate)); mcvAnalyse(list, valueCount, attr, (Histogram *) stat); equiDepthAnalyse(list, valueCount, attr, (Histogram *) stat); // StatManager::getInstance()->addStat(attrID, stat); StatManager::getInstance()->getTableStatistic(attrID.table_id); delete list; resultset->destory(); }
Histogram* Analyzer::computeHistogram(const AttributeID& attr_id, const unsigned nbuckets) { printf("Compute for histogram for attribute %s (%d buckets)\n", Catalog::getInstance() ->getTable(attr_id.table_id) ->getAttribute(attr_id.offset) .attrName.c_str(), nbuckets); Catalog* catalog = Catalog::getInstance(); TableDescriptor* table = catalog->getTable(attr_id.table_id); ProjectionDescriptor* projection = NULL; unsigned pidSize = table->getNumberOfProjection(); const Attribute attr = table->getAttribute(attr_id.offset); for (unsigned i = 0; i < pidSize; ++i) { if (table->getProjectoin(i)->hasAttribute(attr)) { projection = table->getProjectoin(i); break; } } std::vector<Attribute> group_by_attributes; std::vector<Attribute> aggregation_attributes; group_by_attributes.push_back(attr); aggregation_attributes.push_back(attr); std::vector<PhysicalAggregation::State::Aggregation> aggregation_function; aggregation_function.push_back(PhysicalAggregation::State::kCount); LogicalOperator* sb_payload_scan = new LogicalScan(projection); LogicalOperator* aggregation = new LogicalAggregation(group_by_attributes, aggregation_attributes, aggregation_function, sb_payload_scan); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot( collector_node_id, aggregation, LogicalQueryPlanRoot::kResultCollector); PhysicalOperatorBase* collector = root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned)); collector->Open(); collector->Next(0); collector->Close(); ResultSet* resultset = collector->GetResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase* block; void* tuple; BlockStreamBase::BlockStreamTraverseIterator* block_it; unsigned long valueCount = resultset->getNumberOftuples(); unsigned long tupleCount = 0; TuplePtr* list = new TuplePtr[valueCount]; unsigned long i = 0; while (block = (BlockStreamBase*)it.atomicNextBlock()) { block_it = block->createIterator(); while (tuple = block_it->nextTuple()) { list[i++] = tuple; tupleCount += getFrequency(tuple, attr.attrType); } } Histogram* stat = new Histogram(nbuckets); stat->setValueCount(valueCount); stat->setTupleCount(tupleCount); qsort_r(list, valueCount, sizeof(void*), compare, (void*)(attr.attrType->operate)); mcvAnalyse(list, valueCount, attr, (Histogram*)stat); equiDepthAnalyse(list, valueCount, attr, (Histogram*)stat); // StatManager::getInstance()->addStat(attrID, stat); // StatManager::getInstance()->getTableStatistic(attrID.table_id) delete list; resultset->destory(); return stat; }