static void bulk_test_logical_index_scan() { vector<IndexScanIterator::query_range> q_range; int count = 1022; ifstream infile("/home/scdong/code/sec_code", ios::in); ofstream outfile("/home/scdong/code/fail_log.dat", ios::out); unsigned long int value = 0; unsigned long int expect_num; TableDescriptor* table = Catalog::getInstance()->getTable("cj"); IndexScanIterator::query_range q2; while (count > 0) { q_range.clear(); infile >> value >> expect_num; q2.value_low = malloc(sizeof(int)); // newmalloc q2.value_low = (void*)(&value); q2.comp_low = EQ; q2.value_high = malloc(sizeof(int)); // newmalloc q2.value_high = (void*)(&value); q2.comp_high = EQ; q2.c_type.type = t_int; q2.c_type.operate = new OperateInt(); q_range.push_back(q2); LogicalOperator* index_scan = new LogicalIndexScan(table->getProjectoin(0)->getProjectionID(), table->getAttribute("sec_code"), q_range); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot( collector_node_id, index_scan, LogicalQueryPlanRoot::kResultCollector); PhysicalOperatorBase* executable_query_plan = root->GetPhysicalPlan(1024 * 64); executable_query_plan->Open(); while (executable_query_plan->Next(0)) ; executable_query_plan->Close(); ResultSet* result_set = executable_query_plan->getResultSet(); const unsigned long int number_of_tuples = result_set->getNumberOftuples(); executable_query_plan->~PhysicalOperatorBase(); root->~LogicalOperator(); cout << 1022 - count << ": Sec_code: " << value << "\t Result: " << number_of_tuples << endl; if (!print_test_name_result(number_of_tuples == expect_num, "Index Scan")) { printf("\tIndex Scan sec_code = %d, Expected:%d actual: %d\n", value, expect_num, number_of_tuples); outfile << "Index Scan sec_code = " << value << "\tFAIL!\n"; outfile << "\tExcepted: " << expect_num << "\tActual: " << number_of_tuples << endl; } count--; } }
static void test_index_filter_performance(int value_high) { unsigned long long int start = curtick(); vector<IndexScanIterator::query_range> q_range; q_range.clear(); int value_low = 10107; // int value_high = 600257; TableDescriptor* table = Catalog::getInstance()->getTable("cj"); IndexScanIterator::query_range q; q.value_low = malloc(sizeof(int)); // newmalloc q.value_low = (void*)(&value_low); q.comp_low = GEQ; q.value_high = malloc(sizeof(int)); // newmalloc q.value_high = (void*)(&value_high); q.comp_high = L; q.c_type.type = t_int; q.c_type.operate = new OperateInt(); q_range.push_back(q); LogicalOperator* index_scan = new LogicalIndexScan(table->getProjectoin(0)->getProjectionID(), table->getAttribute("sec_code"), q_range); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot( collector_node_id, index_scan, LogicalQueryPlanRoot::PERFORMANCE); // root->print(); PerformanceMonitor* executable_query_plan = (PerformanceMonitor*)root->GetPhysicalPlan(1024 * 64); executable_query_plan->Open(); while (executable_query_plan->Next(0)) ; executable_query_plan->Close(); // ResultSet* result_set = executable_query_plan->getResultSet(); const unsigned long int number_of_tuples = executable_query_plan->GetNumberOfTuples(); delete executable_query_plan; root->~LogicalOperator(); // cout << "Sec_code: " << value_low << "\t Result: " << number_of_tuples //<< endl; printf("execution time: %4.4f seconds.\n", getSecond(start)); if (!print_test_name_result(number_of_tuples == 26820, "Index Scan")) { printf("\tIndex Scan sec_code = %d, Expected:%d actual: %d\n", value_low, 26820, number_of_tuples); } }
unsigned long Analyzer::getDistinctCardinality(const AttributeID& attr_id) { LogicalOperator* scan = new LogicalScan( Catalog::getInstance()->getTable(attr_id.table_id)->getProjectoin(0)); std::vector<Attribute> group_by_attributes; group_by_attributes.push_back( Catalog::getInstance()->getTable(attr_id.table_id)->getAttribute( attr_id.offset)); LogicalOperator* agg = new LogicalAggregation( group_by_attributes, std::vector<Attribute>(), std::vector<PhysicalAggregation::State::Aggregation>(), scan); std::vector<Attribute> aggregation_attributes; aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY)); std::vector<PhysicalAggregation::State::Aggregation> aggregation_function; aggregation_function.push_back(PhysicalAggregation::State::kCount); LogicalOperator* count_agg = new LogicalAggregation(std::vector<Attribute>(), aggregation_attributes, aggregation_function, agg); LogicalOperator* root = new LogicalQueryPlanRoot( 0, count_agg, LogicalQueryPlanRoot::kResultCollector); PhysicalOperatorBase* collector = root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned)); collector->Open(); collector->Next(0); collector->Close(); ResultSet* resultset = collector->GetResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase::BlockStreamTraverseIterator* b_it = it.nextBlock()->createIterator(); const unsigned long distinct_cardinality = *(unsigned long*)b_it->nextTuple(); resultset->destory(); collector->~PhysicalOperatorBase(); root->~LogicalOperator(); return distinct_cardinality; }
void Analyzer::compute_table_stat(const TableID& tab_id) { TableDescriptor* table = Catalog::getInstance()->getTable(tab_id); LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); std::vector<Attribute> group_by_attributes; std::vector<Attribute> aggregation_attributes; aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY)); std::vector<PhysicalAggregation::State::Aggregation> aggregation_function; aggregation_function.push_back(PhysicalAggregation::State::kCount); LogicalOperator* agg = new LogicalAggregation( group_by_attributes, aggregation_attributes, aggregation_function, scan); LogicalOperator* root = new LogicalQueryPlanRoot(0, agg, LogicalQueryPlanRoot::kResultCollector); PhysicalOperatorBase* collector = root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned)); collector->Open(); collector->Next(0); collector->Close(); ResultSet* resultset = collector->GetResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase::BlockStreamTraverseIterator* b_it = it.nextBlock()->createIterator(); const unsigned long tuple_count = *(unsigned long*)b_it->nextTuple(); BlockStreamBase* block; while (block = it.nextBlock()) { BlockStreamBase::BlockStreamTraverseIterator* b_it = block->createIterator(); } TableStatistic* tab_stat = new TableStatistic(); tab_stat->number_of_tuples_ = tuple_count; printf("Statistics for table %s is gathered!\n", Catalog::getInstance()->getTable(tab_id)->getTableName().c_str()); tab_stat->print(); StatManager::getInstance()->setTableStatistic(tab_id, tab_stat); resultset->destory(); root->~LogicalOperator(); }
static void test_logical_index_building() { TableDescriptor* table = Catalog::getInstance()->getTable("cj"); LogicalOperator* csb_building = new LogicalCSBIndexBuilding(table->getProjectoin(0)->getProjectionID(), table->getAttribute(3), "sec_code_index"); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot( collector_node_id, csb_building, LogicalQueryPlanRoot::kResultCollector); root->Print(); PhysicalOperatorBase* executable_query_plan = root->GetPhysicalPlan(1024 * 64); executable_query_plan->Open(); while (executable_query_plan->Next(0)) ; executable_query_plan->Close(); // ResultSet* result_set = executable_query_plan->getResultSet(); executable_query_plan->~PhysicalOperatorBase(); root->~LogicalOperator(); cout << "index building finished!\n"; }
static void test_logical_index_scan() { vector<IndexScanIterator::query_range> q_range; TableDescriptor* table = Catalog::getInstance()->getTable("cj"); IndexScanIterator::query_range q; int value = 0; while (true) { q_range.clear(); cout << "Input the search sec_code: "; cin >> value; q.value_low = malloc(sizeof(int)); // newmalloc q.value_low = (void*)(&value); q.comp_low = EQ; q.value_high = malloc(sizeof(int)); // newmalloc q.value_high = (void*)(&value); q.comp_high = EQ; q.c_type.type = t_int; q.c_type.operate = new OperateInt(); q_range.push_back(q); LogicalOperator* index_scan = new LogicalIndexScan(table->getProjectoin(0)->getProjectionID(), table->getAttribute("sec_code"), q_range); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot( collector_node_id, index_scan, LogicalQueryPlanRoot::kPrint); PhysicalOperatorBase* executable_query_plan = root->GetPhysicalPlan(1024 * 64); executable_query_plan->Open(); while (executable_query_plan->Next(0)) ; executable_query_plan->Close(); executable_query_plan->~PhysicalOperatorBase(); root->~LogicalOperator(); } }
static void test_scan_filter_performance(int value) { unsigned long long int start = curtick(); TableDescriptor* table = Catalog::getInstance()->getTable("cj"); LogicalOperator* cj_scan = new LogicalScan(table->getProjectoin(0)); LogicalFilter::Condition filter_condition_1; filter_condition_1.add(table->getAttribute(3), AttributeComparator::GEQ, std::string("10107")); filter_condition_1.add(table->getAttribute(3), AttributeComparator::L, (void*)&value); LogicalOperator* filter_1 = new LogicalFilter(filter_condition_1, cj_scan); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot( collector_node_id, filter_1, LogicalQueryPlanRoot::PERFORMANCE); PerformanceMonitor* executable_query_plan = (PerformanceMonitor*)root->GetPhysicalPlan(1024 * 64); // executable_query_plan->print(); executable_query_plan->Open(); while (executable_query_plan->Next(0)) ; executable_query_plan->Close(); // ResultSet *result_set=executable_query_plan->getResultSet(); const unsigned long int number_of_tuples = executable_query_plan->GetNumberOfTuples(); printf("execution time: %4.4f seconds.\n", getSecond(start)); if (!print_test_name_result(number_of_tuples == 26820, "Low selectivity filter")) { printf("\tExpected:26695 actual: %d\n", number_of_tuples); } // result_set->~ResultSet(); delete executable_query_plan; root->~LogicalOperator(); }
static void query_select_aggregation() { /* * select sum(a+1)+count(a),b * from T * group by b * * notation: p a p s * */ unsigned long long int start = curtick(); TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("LINEITEM"); //===========================scan=========================== LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); //==========================project========================= vector<vector<ExpressionItem> > expr_list1; vector<ExpressionItem> expr1; vector<ExpressionItem> expr2; vector<ExpressionItem> expr3; vector<ExpressionItem> expr4; vector<ExpressionItem> expr5; vector<ExpressionItem> expr6; vector<ExpressionItem> expr7; vector<ExpressionItem> expr8; vector<ExpressionItem> expr9; vector<ExpressionItem> expr10; vector<ExpressionItem> expr11; vector<ExpressionItem> expr12; vector<ExpressionItem> expr13; vector<ExpressionItem> expr14; vector<ExpressionItem> expr15; vector<ExpressionItem> expr16; vector<ExpressionItem> expr17; ExpressionItem ei1; ExpressionItem ei1_1; ExpressionItem ei1_2; ExpressionItem ei1_3; ExpressionItem ei1_4; ExpressionItem ei1_5; ExpressionItem ei1_6; ExpressionItem ei1_7; ExpressionItem ei1_8; ExpressionItem ei1_9; ExpressionItem ei2; ExpressionItem ei3; ExpressionItem ei4; ExpressionItem ei5; ExpressionItem ei6; ExpressionItem ei7; ExpressionItem ei8; ExpressionItem ei9; ExpressionItem ei10; ExpressionItem ei11; ExpressionItem ei12; ExpressionItem ei13; ExpressionItem ei14; ExpressionItem ei15; ExpressionItem ei16; ExpressionItem ei17; ei1_1.setVariable("LINEITEM", "L_EXTENDEDPRICE"); ei1_2.setIntValue("1"); ei1_3.setVariable("LINEITEM", "L_DISCOUNT"); ei1_4.setOperator("-"); ei1_5.setOperator("*"); ei1_6.setIntValue("1"); ei1_7.setVariable("LINEITEM", "L_TEX"); ei1_8.setOperator("+"); ei1_9.setOperator("*"); ei1.setVariable("LINEITEM", "row_id"); ei2.setVariable("LINEITEM", "L_ORDERKEY"); ei3.setVariable("LINEITEM", "L_PARTKEY"); ei4.setVariable("LINEITEM", "L_SUPPKEY"); ei5.setVariable("LINEITEM", "L_LINENUMBER"); ei6.setVariable("LINEITEM", "L_QUANTITY"); ei7.setVariable("LINEITEM", "L_EXTENDEDPRICE"); ei8.setVariable("LINEITEM", "L_DISCOUNT"); ei9.setVariable("LINEITEM", "L_TEX"); ei10.setVariable("LINEITEM", "L_RETURNFLAG"); // ei10.size=1; ei11.setVariable("LINEITEM", "L_LINESTATUS"); // ei11.size=1; ei12.setVariable("LINEITEM", "L_SHIPDATE"); ei13.setVariable("LINEITEM", "L_COMMITDATE"); ei14.setVariable("LINEITEM", "L_RECEIPTDATE"); ei15.setVariable("LINEITEM", "L_SHIPINSTRUCT"); // ei15.size=25; ei16.setVariable("LINEITEM", "L_SHIPMODE"); // ei16.size=10; ei17.setVariable("LINEITEM", "L_COMMENT"); // ei17.size=44; expr1.push_back(ei1_1); expr1.push_back(ei1_2); expr1.push_back(ei1_3); expr1.push_back(ei1_4); expr1.push_back(ei1_5); // expr1.push_back(ei1_6); // expr1.push_back(ei1_7); // expr1.push_back(ei1_8); // expr1.push_back(ei1_9); // expr1.push_back(ei1); expr2.push_back(ei1_1); expr2.push_back(ei1_2); expr2.push_back(ei1_3); expr2.push_back(ei1_4); expr2.push_back(ei1_5); expr2.push_back(ei1_6); expr2.push_back(ei1_7); expr2.push_back(ei1_8); expr2.push_back(ei1_9); expr3.push_back(ei1_2); expr3.push_back(ei1_3); expr3.push_back(ei1_4); // expr3.push_back(ei3); expr4.push_back(ei4); expr5.push_back(ei5); expr6.push_back(ei6); expr7.push_back(ei7); expr8.push_back(ei8); expr9.push_back(ei9); expr10.push_back(ei10); expr11.push_back(ei11); expr12.push_back(ei12); expr13.push_back(ei13); expr14.push_back(ei14); expr15.push_back(ei15); expr16.push_back(ei16); expr17.push_back(ei17); expr_list1.push_back(expr10); expr_list1.push_back(expr11); expr_list1.push_back(expr6); expr_list1.push_back(expr7); expr_list1.push_back(expr1); expr_list1.push_back(expr2); expr_list1.push_back(expr8); expr_list1.push_back(expr3); expr_list1.push_back(expr10); expr_list1.push_back(expr11); // expr_list1.push_back(expr3); // expr_list1.push_back(expr4); // expr_list1.push_back(expr5); // expr_list1.push_back(expr8); // expr_list1.push_back(expr9); // expr_list1.push_back(expr12); // expr_list1.push_back(expr13); // expr_list1.push_back(expr14); // expr_list1.push_back(expr15); // expr_list1.push_back(expr16); // expr_list1.push_back(expr17); LogicalOperator* project1 = new LogicalProject(scan, expr_list1); //========================aggregation======================= std::vector<Attribute> group_by_attributes; group_by_attributes.push_back(table->getAttribute("L_RETURNFLAG")); group_by_attributes.push_back(table->getAttribute("L_LINESTATUS")); std::vector<Attribute> aggregation_attributes; aggregation_attributes.push_back(table->getAttribute("L_QUANTITY")); aggregation_attributes.push_back(table->getAttribute("L_EXTENDEDPRICE")); aggregation_attributes.push_back(table->getAttribute("L_DISCOUNT")); aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY)); std::vector<BlockStreamAggregationIterator::State::aggregation> aggregation_function; aggregation_function.push_back(BlockStreamAggregationIterator::State::sum); aggregation_function.push_back(BlockStreamAggregationIterator::State::sum); aggregation_function.push_back(BlockStreamAggregationIterator::State::sum); aggregation_function.push_back(BlockStreamAggregationIterator::State::count); LogicalOperator* aggregation = new LogicalAggregation(group_by_attributes, aggregation_attributes, aggregation_function, project1); //==========================project========================= vector<vector<ExpressionItem> > expr_list2; LogicalOperator* project2 = new LogicalProject(aggregation, expr_list2); //===========================root=========================== LogicalOperator* root = new LogicalQueryPlanRoot(0, project1, LogicalQueryPlanRoot::PERFORMANCE); cout << "performance is ok!" << endl; PhysicalOperatorBase* physical_iterator_tree = root->GetPhysicalPlan(64 * 1024); // physical_iterator_tree->print(); physical_iterator_tree->Open(); while (physical_iterator_tree->Next(0)) ; physical_iterator_tree->Close(); printf("Q1: execution time: %4.4f second.\n", getSecond(start)); }
static void query_select_fzh() { /* * select sum(a+1)+count(a),b * from T * group by b * * notation: p a p s * */ unsigned long long int start = curtick(); TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("LINEITEM"); //===========================scan=========================== LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); //==========================project========================= vector<vector<ExpressionItem> > expr_list1; vector<ExpressionItem> expr1; vector<ExpressionItem> expr2; vector<ExpressionItem> expr3; vector<ExpressionItem> expr4; vector<ExpressionItem> expr5; vector<ExpressionItem> expr6; vector<ExpressionItem> expr7; vector<ExpressionItem> expr8; vector<ExpressionItem> expr9; vector<ExpressionItem> expr10; vector<ExpressionItem> expr11; vector<ExpressionItem> expr12; vector<ExpressionItem> expr13; vector<ExpressionItem> expr14; vector<ExpressionItem> expr15; vector<ExpressionItem> expr16; vector<ExpressionItem> expr17; ExpressionItem ei1; ExpressionItem ei1_1; ExpressionItem ei1_2; ExpressionItem ei1_3; ExpressionItem ei1_4; ExpressionItem ei1_5; ExpressionItem ei1_6; ExpressionItem ei1_7; ExpressionItem ei1_8; ExpressionItem ei1_9; ExpressionItem ei2; ExpressionItem ei3; ExpressionItem ei4; ExpressionItem ei5; ExpressionItem ei6; ExpressionItem ei7; ExpressionItem ei8; ExpressionItem ei9; ExpressionItem ei10; ExpressionItem ei11; ExpressionItem ei12; ExpressionItem ei13; ExpressionItem ei14; ExpressionItem ei15; ExpressionItem ei16; ExpressionItem ei17; ei1_1.setVariable("LINEITEM.row_id"); // ei1_2.setVariable("LINEITEM.L_ORDERKEY"); ei1_2.setIntValue("1"); ei1_3.setOperator("+"); expr1.push_back(ei1_1); expr1.push_back(ei1_2); expr1.push_back(ei1_3); expr_list1.push_back(expr1); LogicalOperator* project1 = new LogicalProject(scan, expr_list1); //========================aggregation======================= std::vector<Attribute> group_by_attributes; group_by_attributes.push_back(table->getAttribute("L_RETURNFLAG")); group_by_attributes.push_back(table->getAttribute("L_LINESTATUS")); std::vector<Attribute> aggregation_attributes; aggregation_attributes.push_back(table->getAttribute("L_QUANTITY")); aggregation_attributes.push_back(table->getAttribute("L_EXTENDEDPRICE")); aggregation_attributes.push_back(table->getAttribute("L_DISCOUNT")); aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY)); std::vector<PhysicalAggregation::State::Aggregation> aggregation_function; aggregation_function.push_back(PhysicalAggregation::State::kSum); aggregation_function.push_back(PhysicalAggregation::State::kSum); aggregation_function.push_back(PhysicalAggregation::State::kSum); aggregation_function.push_back(PhysicalAggregation::State::kCount); LogicalOperator* aggregation = new LogicalAggregation(group_by_attributes, aggregation_attributes, aggregation_function, project1); //==========================project========================= vector<vector<ExpressionItem> > expr_list2; ExpressionItem ei21_1; ei21_1.setVariable("LINEITEM.row_id+1"); vector<ExpressionItem> expr21; expr21.push_back(ei21_1); expr_list2.push_back(expr21); LogicalOperator* project2 = new LogicalProject(project1, expr_list2); //===========================root=========================== LogicalOperator* root = new LogicalQueryPlanRoot(0, project2, LogicalQueryPlanRoot::PRINT); cout << "performance is ok!" << endl; PhysicalOperatorBase* physical_iterator_tree = root->GetPhysicalPlan(64 * 1024); // physical_iterator_tree->print(); physical_iterator_tree->Open(); while (physical_iterator_tree->Next(0)) ; physical_iterator_tree->Close(); printf("Q1: execution time: %4.4f second.\n", getSecond(start)); }
void Analyzer::analyse(const AttributeID& attrID) { Catalog* catalog = Catalog::getInstance(); TableDescriptor* table = catalog->getTable(attrID.table_id); ProjectionDescriptor* projection = NULL; unsigned pidSize = table->getNumberOfProjection(); const Attribute attr = table->getAttribute(attrID.offset); for (unsigned i = 0; i < pidSize; ++i) { if (table->getProjectoin(i)->hasAttribute(attr)) { projection = table->getProjectoin(i); break; } } std::vector<Attribute> group_by_attributes; std::vector<Attribute> aggregation_attributes; group_by_attributes.push_back(attr); aggregation_attributes.push_back(attr); std::vector<PhysicalAggregation::State::Aggregation> aggregation_function; aggregation_function.push_back(PhysicalAggregation::State::kCount); LogicalOperator* sb_payload_scan = new LogicalScan(projection); LogicalOperator* aggregation = new LogicalAggregation(group_by_attributes, aggregation_attributes, aggregation_function, sb_payload_scan); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot( collector_node_id, aggregation, LogicalQueryPlanRoot::kResultCollector); PhysicalOperatorBase* collector = root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned)); collector->Open(); collector->Next(0); collector->Close(); ResultSet* resultset = collector->GetResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase* block; void* tuple; BlockStreamBase::BlockStreamTraverseIterator* block_it; unsigned long valueCount = resultset->getNumberOftuples(); unsigned long tupleCount = 0; TuplePtr* list = new TuplePtr[valueCount]; unsigned long i = 0; while (block = (BlockStreamBase*)it.atomicNextBlock()) { block_it = block->createIterator(); while (tuple = block_it->nextTuple()) { list[i++] = tuple; tupleCount += getFrequency(tuple, attr.attrType); } } int magicNumber = 100; StatisticOnTable* stat = new StatisticOnTable(magicNumber); stat->setValueCount(valueCount); stat->setTupleCount(tupleCount); qsort_r(list, valueCount, sizeof(void*), compare, (void*)(attr.attrType->operate)); mcvAnalyse(list, valueCount, attr, (Histogram*)stat); equiDepthAnalyse(list, valueCount, attr, (Histogram*)stat); // StatManager::getInstance()->addStat(attrID, stat); StatManager::getInstance()->getTableStatistic(attrID.table_id); delete list; resultset->destory(); }