Esempio n. 1
0
Histogram* Analyzer::computeHistogram(const AttributeID& attr_id,
                                      const unsigned nbuckets) {
  printf("Compute for histogram for attribute %s (%d buckets)\n",
         Catalog::getInstance()
             ->getTable(attr_id.table_id)
             ->getAttribute(attr_id.offset)
             .attrName.c_str(),
         nbuckets);
  Catalog* catalog = Catalog::getInstance();

  TableDescriptor* table = catalog->getTable(attr_id.table_id);
  ProjectionDescriptor* projection = NULL;

  unsigned pidSize = table->getNumberOfProjection();
  const Attribute attr = table->getAttribute(attr_id.offset);

  for (unsigned i = 0; i < pidSize; ++i) {
    if (table->getProjectoin(i)->hasAttribute(attr)) {
      projection = table->getProjectoin(i);
      break;
    }
  }

  std::vector<Attribute> group_by_attributes;
  std::vector<Attribute> aggregation_attributes;

  group_by_attributes.push_back(attr);
  aggregation_attributes.push_back(attr);

  std::vector<PhysicalAggregation::State::Aggregation> aggregation_function;
  aggregation_function.push_back(PhysicalAggregation::State::kCount);

  LogicalOperator* sb_payload_scan = new LogicalScan(projection);

  LogicalOperator* aggregation =
      new LogicalAggregation(group_by_attributes, aggregation_attributes,
                             aggregation_function, sb_payload_scan);
  const NodeID collector_node_id = 0;

  LogicalOperator* root = new LogicalQueryPlanRoot(
      collector_node_id, aggregation, LogicalQueryPlanRoot::kResultCollector);

  PhysicalOperatorBase* collector =
      root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned));

  collector->Open();
  collector->Next(0);
  collector->Close();
  ResultSet* resultset = collector->GetResultSet();
  ResultSet::Iterator it = resultset->createIterator();

  BlockStreamBase* block;
  void* tuple;
  BlockStreamBase::BlockStreamTraverseIterator* block_it;

  unsigned long valueCount = resultset->getNumberOftuples();
  unsigned long tupleCount = 0;
  TuplePtr* list = new TuplePtr[valueCount];
  unsigned long i = 0;
  while (block = (BlockStreamBase*)it.atomicNextBlock()) {
    block_it = block->createIterator();
    while (tuple = block_it->nextTuple()) {
      list[i++] = tuple;
      tupleCount += getFrequency(tuple, attr.attrType);
    }
  }

  Histogram* stat = new Histogram(nbuckets);

  stat->setValueCount(valueCount);
  stat->setTupleCount(tupleCount);

  qsort_r(list, valueCount, sizeof(void*), compare,
          (void*)(attr.attrType->operate));

  mcvAnalyse(list, valueCount, attr, (Histogram*)stat);
  equiDepthAnalyse(list, valueCount, attr, (Histogram*)stat);

  //	StatManager::getInstance()->addStat(attrID, stat);
  //	StatManager::getInstance()->getTableStatistic(attrID.table_id)
  delete list;
  resultset->destory();
  return stat;
}