unsigned long Analyzer::getDistinctCardinality(const AttributeID& attr_id){ LogicalOperator * scan=new LogicalScan(Catalog::getInstance()->getTable(attr_id.table_id)->getProjectoin(0)); std::vector<Attribute> group_by_attributes; group_by_attributes.push_back(Catalog::getInstance()->getTable(attr_id.table_id)->getAttribute(attr_id.offset)); LogicalOperator* agg=new Aggregation(group_by_attributes,std::vector<Attribute>(),std::vector<BlockStreamAggregationIterator::State::aggregation>(),scan); std::vector<Attribute> aggregation_attributes; aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY)); std::vector<BlockStreamAggregationIterator::State::aggregation> aggregation_function; aggregation_function.push_back( BlockStreamAggregationIterator::State::count); LogicalOperator* count_agg=new Aggregation(std::vector<Attribute>(),aggregation_attributes,aggregation_function,agg); LogicalOperator* root = new LogicalQueryPlanRoot(0, count_agg, LogicalQueryPlanRoot::RESULTCOLLECTOR); BlockStreamIteratorBase* collector = root->getIteratorTree( 1024 * 64 - sizeof(unsigned)); collector->open(); collector->next(0); collector->close(); ResultSet* resultset = collector->getResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase::BlockStreamTraverseIterator* b_it=it.nextBlock()->createIterator(); const unsigned long distinct_cardinality=*(unsigned long*)b_it->nextTuple(); resultset->destory(); collector->~BlockStreamIteratorBase(); root->~LogicalOperator(); return distinct_cardinality; }
void Analyzer::compute_table_stat(const TableID& tab_id){ TableDescriptor* table=Catalog::getInstance()->getTable(tab_id); LogicalOperator * scan=new LogicalScan(table->getProjectoin(0)); std::vector<Attribute> group_by_attributes; std::vector<Attribute> aggregation_attributes; aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY)); std::vector<BlockStreamAggregationIterator::State::aggregation> aggregation_function; aggregation_function.push_back( BlockStreamAggregationIterator::State::count); LogicalOperator* agg=new Aggregation(group_by_attributes,aggregation_attributes,aggregation_function,scan); LogicalOperator* root = new LogicalQueryPlanRoot(0, agg, LogicalQueryPlanRoot::RESULTCOLLECTOR); BlockStreamIteratorBase* collector = root->getIteratorTree( 1024 * 64 - sizeof(unsigned)); collector->open(); collector->next(0); collector->close(); ResultSet* resultset = collector->getResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase::BlockStreamTraverseIterator* b_it=it.nextBlock()->createIterator(); const unsigned long tuple_count=*(unsigned long*)b_it->nextTuple(); BlockStreamBase* block; while(block=it.nextBlock()){ BlockStreamBase::BlockStreamTraverseIterator* b_it=block->createIterator(); } TableStatistic* tab_stat=new TableStatistic(); tab_stat->number_of_tuples_=tuple_count; printf("Statistics for table %s is gathered!\n",Catalog::getInstance()->getTable(tab_id)->getTableName().c_str()); tab_stat->print(); StatManager::getInstance()->setTableStatistic(tab_id,tab_stat); resultset->destory(); root->~LogicalOperator(); }
//int main(int argc,const char** argv){ int main_combine(int argc,const char** argv){ const unsigned block_size=BLOCK_SIZE_CAO; const unsigned thread_count=4; const unsigned expander_buffer=4; std::vector<column_type> column_list,column_list_; column_list.push_back(column_type(t_int)); Schema* schema=new SchemaFix(column_list); ExpandableBlockStreamSingleColumnScan::State ebssc_state1("/home/imdb/temp/Uniform_0_99.column",schema,block_size); BlockStreamIteratorBase* ebssc1=new ExpandableBlockStreamSingleColumnScan(ebssc_state1); ExpandableBlockStreamSingleColumnScan::State ebssc_state2("/home/imdb/temp/Uniform_0_99.column",schema,block_size); BlockStreamIteratorBase* ebssc2=new ExpandableBlockStreamSingleColumnScan(ebssc_state2); std::vector<Schema *> inputs; inputs.push_back(schema); inputs.push_back(schema); column_list_.push_back(column_type(t_int)); column_list_.push_back(column_type(t_int)); Schema* output=new SchemaFix(column_list_); std::vector<BlockStreamIteratorBase *> children_; children_.push_back(ebssc1); children_.push_back(ebssc2); BlockStreamCombinedIterator::State bsci_state(inputs,output,children_); BlockStreamCombinedIterator *bsc=new BlockStreamCombinedIterator(bsci_state); BlockStreamExpander::State bse_state(schema,bsc,thread_count,block_size,expander_buffer); BlockStreamIteratorBase* bse=new BlockStreamExpander(bse_state); BlockStreamBase *block=new BlockStreamFix(block_size,8); int choice=0; std::ostringstream ostr; boost::archive::text_oarchive oa(ostr); oa.register_type(static_cast<BlockStreamCombinedIterator *>(NULL)); oa.register_type(static_cast<BlockStreamExpander *>(NULL)); oa.register_type(static_cast<ExpandableBlockStreamSingleColumnScan *>(NULL)); Register_Schemas<boost::archive::text_oarchive>(oa); // Register_Iterators(oa); oa<<bse; std::cout<<"Serialization Result:"<<ostr.str()<<std::endl; std::istringstream istr(ostr.str()); boost::archive::text_iarchive ia(istr); BlockStreamIteratorBase* des; ia.register_type(static_cast<BlockStreamCombinedIterator *>(NULL)); ia.register_type(static_cast<BlockStreamExpander *>(NULL)); ia.register_type(static_cast<ExpandableBlockStreamSingleColumnScan *>(NULL)); Register_Schemas<boost::archive::text_iarchive>(ia); ia>>des; // return 1; while(choice==0){ // bsf->open(); des->open(); cout<<"after open!"<<endl; unsigned long long int start=curtick(); cout<<"ready for the next"<<endl; unsigned tuple_count=0; while(des->next(block)){ BlockStreamBase::BlockStreamTraverseIterator* it=block->createIterator(); while(it->nextTuple()){ tuple_count++; } block->setEmpty(); } printf("Time=%f Throughput=%f.\n tuple=%d",getSecond(start),1024/getSecond(start),tuple_count); des->close(); printf("Continue(0) or Not(1) ?\n"); scanf("%d",&choice); } }
void Analyzer::analyse(const AttributeID &attrID) { Catalog *catalog = Catalog::getInstance(); TableDescriptor* table = catalog->getTable(attrID.table_id); ProjectionDescriptor * projection = NULL; unsigned pidSize = table->getNumberOfProjection(); const Attribute attr = table->getAttribute(attrID.offset); for (unsigned i = 0; i < pidSize; ++i) { if (table->getProjectoin(i)->hasAttribute(attr)) { projection = table->getProjectoin(i); break; } } std::vector<Attribute> group_by_attributes; std::vector<Attribute> aggregation_attributes; group_by_attributes.push_back(attr); aggregation_attributes.push_back(attr); std::vector<BlockStreamAggregationIterator::State::aggregation> aggregation_function; aggregation_function.push_back( BlockStreamAggregationIterator::State::count); LogicalOperator* sb_payload_scan = new LogicalScan(projection); LogicalOperator* aggregation = new Aggregation(group_by_attributes, aggregation_attributes, aggregation_function, sb_payload_scan); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot(collector_node_id, aggregation, LogicalQueryPlanRoot::RESULTCOLLECTOR); BlockStreamIteratorBase* collector = root->getIteratorTree( 1024 * 64 - sizeof(unsigned)); collector->open(); collector->next(0); collector->close(); ResultSet* resultset = collector->getResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase* block; void* tuple; BlockStreamBase::BlockStreamTraverseIterator *block_it; unsigned long valueCount = resultset->getNumberOftuples(); unsigned long tupleCount = 0; TuplePtr *list = new TuplePtr[valueCount]; unsigned long i = 0; while (block = (BlockStreamBase*) it.atomicNextBlock()) { block_it = block->createIterator(); while (tuple = block_it->nextTuple()) { list[i++] = tuple; tupleCount += getFrequency(tuple, attr.attrType); } } int magicNumber = 100; StatisticOnTable *stat = new StatisticOnTable(magicNumber); stat->setValueCount(valueCount); stat->setTupleCount(tupleCount); qsort_r(list, valueCount, sizeof(void *), compare, (void *) (attr.attrType->operate)); mcvAnalyse(list, valueCount, attr, (Histogram *) stat); equiDepthAnalyse(list, valueCount, attr, (Histogram *) stat); // StatManager::getInstance()->addStat(attrID, stat); StatManager::getInstance()->getTableStatistic(attrID.table_id); delete list; resultset->destory(); }
static int in_iterator_test () { cout << "test in\n"; std::vector<column_type> column_list; column_list.push_back(column_type(t_u_long)); column_list.push_back(column_type(t_int)); column_list.push_back(column_type(t_u_long)); column_list.push_back(column_type(t_int)); column_list.push_back(column_type(t_int)); column_list.push_back(column_type(t_int)); Schema* input=new SchemaFix(column_list); Schema* output=new SchemaFix(column_list); unsigned block_size = 64*1024-sizeof(unsigned); ExpandableBlockStreamSingleColumnScan::State ebsscs1_state("/home/claims/data/wangli/T0G0P0",input, block_size); BlockStreamIteratorBase* ebssc1=new ExpandableBlockStreamSingleColumnScan(ebsscs1_state); unsigned long f = 20000; AttributeComparator fA(column_type(t_u_long),Comparator::L,0,&f); std::vector<AttributeComparator> ComparatorList; ComparatorList.push_back(fA); ExpandableBlockStreamFilter::State ebsf_state(input, ebssc1, ComparatorList, block_size); BlockStreamIteratorBase* ebfs = new ExpandableBlockStreamFilter(ebsf_state); ExpandableBlockStreamSingleColumnScan::State ebsscs2_state("/home/claims/data/wangli/T0G0P0",input, block_size); BlockStreamIteratorBase* ebssc2=new ExpandableBlockStreamSingleColumnScan(ebsscs1_state); BlockStreamInIterator::State bsii_state(ebfs, ebssc2, input, input, 0, 0, block_size); BlockStreamIteratorBase* bsii = new BlockStreamInIterator(bsii_state); std::vector<string> attr_name; attr_name.push_back("rowid"); attr_name.push_back("Trade_Date"); attr_name.push_back("Order_No"); attr_name.push_back("Sec_Code"); attr_name.push_back("Trade_Dir"); attr_name.push_back("Order_Type"); BlockStreamPrint::State bsp1_state(input, ebfs, block_size, attr_name, "\t"); BlockStreamIteratorBase* bsp1 = new BlockStreamPrint(bsp1_state); bsp1->print(); bsp1->open(0); BlockStreamBase* block = BlockStreamBase::createBlock(input, block_size); while(bsp1->next(block)); { } bsp1->close(); BlockStreamPrint::State bsp_state(input, bsii, block_size, attr_name, "\t"); BlockStreamIteratorBase* bsp = new BlockStreamPrint(bsp_state); bsp->open(0); // BlockStreamBase* block = BlockStreamBase::createBlock(input, block_size); while(bsp->next(block)); { } bsp->close(); PhysicalQueryPlan IM(bsp); Message4K M4K = PhysicalQueryPlan::serialize4K(IM); PhysicalQueryPlan tmp = PhysicalQueryPlan::deserialize4K(M4K); tmp.run(); return 0; }