Пример #1
0
unsigned long Analyzer::getDistinctCardinality(const AttributeID& attr_id) {
  LogicalOperator* scan = new LogicalScan(
      Catalog::getInstance()->getTable(attr_id.table_id)->getProjectoin(0));

  std::vector<Attribute> group_by_attributes;
  group_by_attributes.push_back(
      Catalog::getInstance()->getTable(attr_id.table_id)->getAttribute(
          attr_id.offset));

  LogicalOperator* agg = new LogicalAggregation(
      group_by_attributes, std::vector<Attribute>(),
      std::vector<PhysicalAggregation::State::Aggregation>(), scan);

  std::vector<Attribute> aggregation_attributes;
  aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY));
  std::vector<PhysicalAggregation::State::Aggregation> aggregation_function;
  aggregation_function.push_back(PhysicalAggregation::State::kCount);

  LogicalOperator* count_agg =
      new LogicalAggregation(std::vector<Attribute>(), aggregation_attributes,
                             aggregation_function, agg);

  LogicalOperator* root = new LogicalQueryPlanRoot(
      0, count_agg, LogicalQueryPlanRoot::kResultCollector);

  PhysicalOperatorBase* collector =
      root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned));
  collector->Open();
  collector->Next(0);
  collector->Close();
  ResultSet* resultset = collector->GetResultSet();
  ResultSet::Iterator it = resultset->createIterator();
  BlockStreamBase::BlockStreamTraverseIterator* b_it =
      it.nextBlock()->createIterator();
  const unsigned long distinct_cardinality = *(unsigned long*)b_it->nextTuple();

  resultset->destory();
  collector->~PhysicalOperatorBase();
  root->~LogicalOperator();
  return distinct_cardinality;
}
Пример #2
0
void Analyzer::compute_table_stat(const TableID& tab_id) {
  TableDescriptor* table = Catalog::getInstance()->getTable(tab_id);

  LogicalOperator* scan = new LogicalScan(table->getProjectoin(0));
  std::vector<Attribute> group_by_attributes;
  std::vector<Attribute> aggregation_attributes;

  aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY));

  std::vector<PhysicalAggregation::State::Aggregation> aggregation_function;
  aggregation_function.push_back(PhysicalAggregation::State::kCount);
  LogicalOperator* agg = new LogicalAggregation(
      group_by_attributes, aggregation_attributes, aggregation_function, scan);
  LogicalOperator* root =
      new LogicalQueryPlanRoot(0, agg, LogicalQueryPlanRoot::kResultCollector);

  PhysicalOperatorBase* collector =
      root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned));
  collector->Open();
  collector->Next(0);
  collector->Close();
  ResultSet* resultset = collector->GetResultSet();
  ResultSet::Iterator it = resultset->createIterator();
  BlockStreamBase::BlockStreamTraverseIterator* b_it =
      it.nextBlock()->createIterator();
  const unsigned long tuple_count = *(unsigned long*)b_it->nextTuple();
  BlockStreamBase* block;
  while (block = it.nextBlock()) {
    BlockStreamBase::BlockStreamTraverseIterator* b_it =
        block->createIterator();
  }
  TableStatistic* tab_stat = new TableStatistic();
  tab_stat->number_of_tuples_ = tuple_count;
  printf("Statistics for table %s is gathered!\n",
         Catalog::getInstance()->getTable(tab_id)->getTableName().c_str());
  tab_stat->print();
  StatManager::getInstance()->setTableStatistic(tab_id, tab_stat);
  resultset->destory();
  root->~LogicalOperator();
}
Пример #3
0
void Analyzer::analyse(const AttributeID &attrID) {

	Catalog *catalog = Catalog::getInstance();

	TableDescriptor* table = catalog->getTable(attrID.table_id);
	ProjectionDescriptor * projection = NULL;

	unsigned pidSize = table->getNumberOfProjection();
	const Attribute attr = table->getAttribute(attrID.offset);

	for (unsigned i = 0; i < pidSize; ++i) {
		if (table->getProjectoin(i)->hasAttribute(attr)) {
			projection = table->getProjectoin(i);
			break;
		}
	}

	std::vector<Attribute> group_by_attributes;
	std::vector<Attribute> aggregation_attributes;

	group_by_attributes.push_back(attr);
	aggregation_attributes.push_back(attr);

	std::vector<BlockStreamAggregationIterator::State::aggregation> aggregation_function;
	aggregation_function.push_back(
			BlockStreamAggregationIterator::State::count);

	LogicalOperator* sb_payload_scan = new LogicalScan(projection);

	LogicalOperator* aggregation = new Aggregation(group_by_attributes,
			aggregation_attributes, aggregation_function, sb_payload_scan);
	const NodeID collector_node_id = 0;

	LogicalOperator* root = new LogicalQueryPlanRoot(collector_node_id,
			aggregation, LogicalQueryPlanRoot::RESULTCOLLECTOR);

	BlockStreamIteratorBase* collector = root->getIteratorTree(
			1024 * 64 - sizeof(unsigned));

	collector->open();
	collector->next(0);
	collector->close();
	ResultSet* resultset = collector->getResultSet();
	ResultSet::Iterator it = resultset->createIterator();

	BlockStreamBase* block;
	void* tuple;
	BlockStreamBase::BlockStreamTraverseIterator *block_it;

	unsigned long valueCount = resultset->getNumberOftuples();
	unsigned long tupleCount = 0;
	TuplePtr *list = new TuplePtr[valueCount];
	unsigned long i = 0;
	while (block = (BlockStreamBase*) it.atomicNextBlock()) {
		block_it = block->createIterator();
		while (tuple = block_it->nextTuple()) {

			list[i++] = tuple;
			tupleCount += getFrequency(tuple, attr.attrType);
		}
	}

	int magicNumber = 100;

	StatisticOnTable *stat = new StatisticOnTable(magicNumber);

	stat->setValueCount(valueCount);
	stat->setTupleCount(tupleCount);

	qsort_r(list, valueCount, sizeof(void *), compare,
			(void *) (attr.attrType->operate));

	mcvAnalyse(list, valueCount, attr, (Histogram *) stat);
	equiDepthAnalyse(list, valueCount, attr, (Histogram *) stat);

//	StatManager::getInstance()->addStat(attrID, stat);
	StatManager::getInstance()->getTableStatistic(attrID.table_id);
	delete list;
	resultset->destory();
}
Пример #4
0
Histogram* Analyzer::computeHistogram(const AttributeID& attr_id,
                                      const unsigned nbuckets) {
  printf("Compute for histogram for attribute %s (%d buckets)\n",
         Catalog::getInstance()
             ->getTable(attr_id.table_id)
             ->getAttribute(attr_id.offset)
             .attrName.c_str(),
         nbuckets);
  Catalog* catalog = Catalog::getInstance();

  TableDescriptor* table = catalog->getTable(attr_id.table_id);
  ProjectionDescriptor* projection = NULL;

  unsigned pidSize = table->getNumberOfProjection();
  const Attribute attr = table->getAttribute(attr_id.offset);

  for (unsigned i = 0; i < pidSize; ++i) {
    if (table->getProjectoin(i)->hasAttribute(attr)) {
      projection = table->getProjectoin(i);
      break;
    }
  }

  std::vector<Attribute> group_by_attributes;
  std::vector<Attribute> aggregation_attributes;

  group_by_attributes.push_back(attr);
  aggregation_attributes.push_back(attr);

  std::vector<PhysicalAggregation::State::Aggregation> aggregation_function;
  aggregation_function.push_back(PhysicalAggregation::State::kCount);

  LogicalOperator* sb_payload_scan = new LogicalScan(projection);

  LogicalOperator* aggregation =
      new LogicalAggregation(group_by_attributes, aggregation_attributes,
                             aggregation_function, sb_payload_scan);
  const NodeID collector_node_id = 0;

  LogicalOperator* root = new LogicalQueryPlanRoot(
      collector_node_id, aggregation, LogicalQueryPlanRoot::kResultCollector);

  PhysicalOperatorBase* collector =
      root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned));

  collector->Open();
  collector->Next(0);
  collector->Close();
  ResultSet* resultset = collector->GetResultSet();
  ResultSet::Iterator it = resultset->createIterator();

  BlockStreamBase* block;
  void* tuple;
  BlockStreamBase::BlockStreamTraverseIterator* block_it;

  unsigned long valueCount = resultset->getNumberOftuples();
  unsigned long tupleCount = 0;
  TuplePtr* list = new TuplePtr[valueCount];
  unsigned long i = 0;
  while (block = (BlockStreamBase*)it.atomicNextBlock()) {
    block_it = block->createIterator();
    while (tuple = block_it->nextTuple()) {
      list[i++] = tuple;
      tupleCount += getFrequency(tuple, attr.attrType);
    }
  }

  Histogram* stat = new Histogram(nbuckets);

  stat->setValueCount(valueCount);
  stat->setTupleCount(tupleCount);

  qsort_r(list, valueCount, sizeof(void*), compare,
          (void*)(attr.attrType->operate));

  mcvAnalyse(list, valueCount, attr, (Histogram*)stat);
  equiDepthAnalyse(list, valueCount, attr, (Histogram*)stat);

  //	StatManager::getInstance()->addStat(attrID, stat);
  //	StatManager::getInstance()->getTableStatistic(attrID.table_id)
  delete list;
  resultset->destory();
  return stat;
}