void Analyzer::analyse(const AttributeID &attrID) { Catalog *catalog = Catalog::getInstance(); TableDescriptor* table = catalog->getTable(attrID.table_id); ProjectionDescriptor * projection = NULL; unsigned pidSize = table->getNumberOfProjection(); const Attribute attr = table->getAttribute(attrID.offset); for (unsigned i = 0; i < pidSize; ++i) { if (table->getProjectoin(i)->hasAttribute(attr)) { projection = table->getProjectoin(i); break; } } std::vector<Attribute> group_by_attributes; std::vector<Attribute> aggregation_attributes; group_by_attributes.push_back(attr); aggregation_attributes.push_back(attr); std::vector<BlockStreamAggregationIterator::State::aggregation> aggregation_function; aggregation_function.push_back( BlockStreamAggregationIterator::State::count); LogicalOperator* sb_payload_scan = new LogicalScan(projection); LogicalOperator* aggregation = new Aggregation(group_by_attributes, aggregation_attributes, aggregation_function, sb_payload_scan); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot(collector_node_id, aggregation, LogicalQueryPlanRoot::RESULTCOLLECTOR); BlockStreamIteratorBase* collector = root->getIteratorTree( 1024 * 64 - sizeof(unsigned)); collector->open(); collector->next(0); collector->close(); ResultSet* resultset = collector->getResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase* block; void* tuple; BlockStreamBase::BlockStreamTraverseIterator *block_it; unsigned long valueCount = resultset->getNumberOftuples(); unsigned long tupleCount = 0; TuplePtr *list = new TuplePtr[valueCount]; unsigned long i = 0; while (block = (BlockStreamBase*) it.atomicNextBlock()) { block_it = block->createIterator(); while (tuple = block_it->nextTuple()) { list[i++] = tuple; tupleCount += getFrequency(tuple, attr.attrType); } } int magicNumber = 100; StatisticOnTable *stat = new StatisticOnTable(magicNumber); stat->setValueCount(valueCount); stat->setTupleCount(tupleCount); qsort_r(list, valueCount, sizeof(void *), compare, (void *) (attr.attrType->operate)); mcvAnalyse(list, valueCount, attr, (Histogram *) stat); equiDepthAnalyse(list, valueCount, attr, (Histogram *) stat); // StatManager::getInstance()->addStat(attrID, stat); StatManager::getInstance()->getTableStatistic(attrID.table_id); delete list; resultset->destory(); }
Histogram* Analyzer::computeHistogram(const AttributeID& attr_id, const unsigned nbuckets) { printf("Compute for histogram for attribute %s (%d buckets)\n", Catalog::getInstance() ->getTable(attr_id.table_id) ->getAttribute(attr_id.offset) .attrName.c_str(), nbuckets); Catalog* catalog = Catalog::getInstance(); TableDescriptor* table = catalog->getTable(attr_id.table_id); ProjectionDescriptor* projection = NULL; unsigned pidSize = table->getNumberOfProjection(); const Attribute attr = table->getAttribute(attr_id.offset); for (unsigned i = 0; i < pidSize; ++i) { if (table->getProjectoin(i)->hasAttribute(attr)) { projection = table->getProjectoin(i); break; } } std::vector<Attribute> group_by_attributes; std::vector<Attribute> aggregation_attributes; group_by_attributes.push_back(attr); aggregation_attributes.push_back(attr); std::vector<PhysicalAggregation::State::Aggregation> aggregation_function; aggregation_function.push_back(PhysicalAggregation::State::kCount); LogicalOperator* sb_payload_scan = new LogicalScan(projection); LogicalOperator* aggregation = new LogicalAggregation(group_by_attributes, aggregation_attributes, aggregation_function, sb_payload_scan); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot( collector_node_id, aggregation, LogicalQueryPlanRoot::kResultCollector); PhysicalOperatorBase* collector = root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned)); collector->Open(); collector->Next(0); collector->Close(); ResultSet* resultset = collector->GetResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase* block; void* tuple; BlockStreamBase::BlockStreamTraverseIterator* block_it; unsigned long valueCount = resultset->getNumberOftuples(); unsigned long tupleCount = 0; TuplePtr* list = new TuplePtr[valueCount]; unsigned long i = 0; while (block = (BlockStreamBase*)it.atomicNextBlock()) { block_it = block->createIterator(); while (tuple = block_it->nextTuple()) { list[i++] = tuple; tupleCount += getFrequency(tuple, attr.attrType); } } Histogram* stat = new Histogram(nbuckets); stat->setValueCount(valueCount); stat->setTupleCount(tupleCount); qsort_r(list, valueCount, sizeof(void*), compare, (void*)(attr.attrType->operate)); mcvAnalyse(list, valueCount, attr, (Histogram*)stat); equiDepthAnalyse(list, valueCount, attr, (Histogram*)stat); // StatManager::getInstance()->addStat(attrID, stat); // StatManager::getInstance()->getTableStatistic(attrID.table_id) delete list; resultset->destory(); return stat; }