static void test_logical_index_scan() { vector<IndexScanIterator::query_range> q_range; TableDescriptor* table = Catalog::getInstance()->getTable("cj"); IndexScanIterator::query_range q; int value = 0; while (true) { q_range.clear(); cout << "Input the search sec_code: "; cin >> value; q.value_low = malloc(sizeof(int)); //newmalloc q.value_low = (void*)(&value); q.comp_low = EQ; q.value_high = malloc(sizeof(int)); //newmalloc q.value_high = (void*) (&value); q.comp_high = EQ; q.c_type.type = t_int; q.c_type.operate = new OperateInt(); q_range.push_back(q); LogicalOperator* index_scan = new LogicalIndexScan(table->getProjectoin(0)->getProjectionID(), table->getAttribute("sec_code"), q_range); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot(collector_node_id, index_scan, LogicalQueryPlanRoot::PRINT); BlockStreamIteratorBase* executable_query_plan = root->getIteratorTree(1024 * 64); executable_query_plan->open(); while (executable_query_plan->next(0)); executable_query_plan->close(); executable_query_plan->~BlockStreamIteratorBase(); root->~LogicalOperator(); } }
static void bulk_test_logical_index_scan() { vector<IndexScanIterator::query_range> q_range; int count = 1022; ifstream infile("/home/scdong/code/sec_code", ios::in); ofstream outfile("/home/scdong/code/fail_log.dat", ios::out); unsigned long int value = 0; unsigned long int expect_num; TableDescriptor* table = Catalog::getInstance()->getTable("cj"); IndexScanIterator::query_range q2; while (count > 0) { q_range.clear(); infile >> value >> expect_num; q2.value_low = malloc(sizeof(int)); // newmalloc q2.value_low = (void*)(&value); q2.comp_low = EQ; q2.value_high = malloc(sizeof(int)); // newmalloc q2.value_high = (void*)(&value); q2.comp_high = EQ; q2.c_type.type = t_int; q2.c_type.operate = new OperateInt(); q_range.push_back(q2); LogicalOperator* index_scan = new LogicalIndexScan(table->getProjectoin(0)->getProjectionID(), table->getAttribute("sec_code"), q_range); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot( collector_node_id, index_scan, LogicalQueryPlanRoot::kResultCollector); PhysicalOperatorBase* executable_query_plan = root->GetPhysicalPlan(1024 * 64); executable_query_plan->Open(); while (executable_query_plan->Next(0)) ; executable_query_plan->Close(); ResultSet* result_set = executable_query_plan->getResultSet(); const unsigned long int number_of_tuples = result_set->getNumberOftuples(); executable_query_plan->~PhysicalOperatorBase(); root->~LogicalOperator(); cout << 1022 - count << ": Sec_code: " << value << "\t Result: " << number_of_tuples << endl; if (!print_test_name_result(number_of_tuples == expect_num, "Index Scan")) { printf("\tIndex Scan sec_code = %d, Expected:%d actual: %d\n", value, expect_num, number_of_tuples); outfile << "Index Scan sec_code = " << value << "\tFAIL!\n"; outfile << "\tExcepted: " << expect_num << "\tActual: " << number_of_tuples << endl; } count--; } }
static LogicalOperator *solve_insubquery(Node *exprnode, LogicalOperator *input) { switch (exprnode->type) { case t_expr_cal: { Expr_cal *node = (Expr_cal *)exprnode; if (strcmp(node->sign, "INS") == 0) { if (node->rnext->type == t_query_stmt) { LogicalOperator *sublogicalplan = parsetree2logicalplan( node->rnext); // 1.获得原子查询的logicalplan Query_stmt *subquery = (Query_stmt *)(node->rnext); vector<Attribute> group_by_attributes; vector<Attribute> aggregation_attributes; for (Node *p = subquery->select_list; p != NULL;) // 2.1获得groupby的属性 { Select_list *selectlist = (Select_list *)p; Select_expr *sexpr = (Select_expr *)selectlist->args; group_by_attributes.push_back( sublogicalplan->GetPlanContext().GetAttribute( sexpr->ascolname)); ///???? p = selectlist->next; } // 2.2在1中的logicalplan上做groupby LogicalOperator *aggrection_sublogicalplan = new LogicalAggregation( group_by_attributes, std::vector<Attribute>(), std::vector<PhysicalAggregation::State::Aggregation>(), sublogicalplan); vector<LogicalEqualJoin::JoinPair> join_pair_list; Node *lp, *sp; for (lp = node->lnext, sp = ((Query_stmt *)node->rnext)->select_list; lp != NULL;) // 3.1获得equaljoin的左右属性 { Expr_list *lpexpr = (Expr_list *)lp; Columns *lcol = (Columns *)lpexpr->data; Select_list *spexpr = (Select_list *)sp; Columns *rcol = (Columns *)spexpr->args; join_pair_list.push_back(LogicalEqualJoin::JoinPair( input->GetPlanContext().GetAttribute(lcol->parameter2), sublogicalplan->GetPlanContext().GetAttribute( rcol->parameter2))); lp = lpexpr->next; sp = spexpr->next; } LogicalOperator *join_logicalplan = new LogicalEqualJoin( join_pair_list, input, aggrection_sublogicalplan); return join_logicalplan; } } } break; default: {} } return NULL; }
static void test_index_filter_performance(int value_high) { unsigned long long int start = curtick(); vector<IndexScanIterator::query_range> q_range; q_range.clear(); int value_low = 10107; // int value_high = 600257; TableDescriptor* table = Catalog::getInstance()->getTable("cj"); IndexScanIterator::query_range q; q.value_low = malloc(sizeof(int)); // newmalloc q.value_low = (void*)(&value_low); q.comp_low = GEQ; q.value_high = malloc(sizeof(int)); // newmalloc q.value_high = (void*)(&value_high); q.comp_high = L; q.c_type.type = t_int; q.c_type.operate = new OperateInt(); q_range.push_back(q); LogicalOperator* index_scan = new LogicalIndexScan(table->getProjectoin(0)->getProjectionID(), table->getAttribute("sec_code"), q_range); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot( collector_node_id, index_scan, LogicalQueryPlanRoot::PERFORMANCE); // root->print(); PerformanceMonitor* executable_query_plan = (PerformanceMonitor*)root->GetPhysicalPlan(1024 * 64); executable_query_plan->Open(); while (executable_query_plan->Next(0)) ; executable_query_plan->Close(); // ResultSet* result_set = executable_query_plan->getResultSet(); const unsigned long int number_of_tuples = executable_query_plan->GetNumberOfTuples(); delete executable_query_plan; root->~LogicalOperator(); // cout << "Sec_code: " << value_low << "\t Result: " << number_of_tuples //<< endl; printf("execution time: %4.4f seconds.\n", getSecond(start)); if (!print_test_name_result(number_of_tuples == 26820, "Index Scan")) { printf("\tIndex Scan sec_code = %d, Expected:%d actual: %d\n", value_low, 26820, number_of_tuples); } }
unsigned long Analyzer::getDistinctCardinality(const AttributeID& attr_id) { LogicalOperator* scan = new LogicalScan( Catalog::getInstance()->getTable(attr_id.table_id)->getProjectoin(0)); std::vector<Attribute> group_by_attributes; group_by_attributes.push_back( Catalog::getInstance()->getTable(attr_id.table_id)->getAttribute( attr_id.offset)); LogicalOperator* agg = new LogicalAggregation( group_by_attributes, std::vector<Attribute>(), std::vector<PhysicalAggregation::State::Aggregation>(), scan); std::vector<Attribute> aggregation_attributes; aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY)); std::vector<PhysicalAggregation::State::Aggregation> aggregation_function; aggregation_function.push_back(PhysicalAggregation::State::kCount); LogicalOperator* count_agg = new LogicalAggregation(std::vector<Attribute>(), aggregation_attributes, aggregation_function, agg); LogicalOperator* root = new LogicalQueryPlanRoot( 0, count_agg, LogicalQueryPlanRoot::kResultCollector); PhysicalOperatorBase* collector = root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned)); collector->Open(); collector->Next(0); collector->Close(); ResultSet* resultset = collector->GetResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase::BlockStreamTraverseIterator* b_it = it.nextBlock()->createIterator(); const unsigned long distinct_cardinality = *(unsigned long*)b_it->nextTuple(); resultset->destory(); collector->~PhysicalOperatorBase(); root->~LogicalOperator(); return distinct_cardinality; }
void Analyzer::compute_table_stat(const TableID& tab_id) { TableDescriptor* table = Catalog::getInstance()->getTable(tab_id); LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); std::vector<Attribute> group_by_attributes; std::vector<Attribute> aggregation_attributes; aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY)); std::vector<PhysicalAggregation::State::Aggregation> aggregation_function; aggregation_function.push_back(PhysicalAggregation::State::kCount); LogicalOperator* agg = new LogicalAggregation( group_by_attributes, aggregation_attributes, aggregation_function, scan); LogicalOperator* root = new LogicalQueryPlanRoot(0, agg, LogicalQueryPlanRoot::kResultCollector); PhysicalOperatorBase* collector = root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned)); collector->Open(); collector->Next(0); collector->Close(); ResultSet* resultset = collector->GetResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase::BlockStreamTraverseIterator* b_it = it.nextBlock()->createIterator(); const unsigned long tuple_count = *(unsigned long*)b_it->nextTuple(); BlockStreamBase* block; while (block = it.nextBlock()) { BlockStreamBase::BlockStreamTraverseIterator* b_it = block->createIterator(); } TableStatistic* tab_stat = new TableStatistic(); tab_stat->number_of_tuples_ = tuple_count; printf("Statistics for table %s is gathered!\n", Catalog::getInstance()->getTable(tab_id)->getTableName().c_str()); tab_stat->print(); StatManager::getInstance()->setTableStatistic(tab_id, tab_stat); resultset->destory(); root->~LogicalOperator(); }
static void test_logical_index_building() { TableDescriptor* table = Catalog::getInstance()->getTable("cj"); LogicalOperator* csb_building = new LogicalCSBIndexBuilding(table->getProjectoin(0)->getProjectionID(), table->getAttribute(3), "sec_code_index"); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot( collector_node_id, csb_building, LogicalQueryPlanRoot::kResultCollector); root->Print(); PhysicalOperatorBase* executable_query_plan = root->GetPhysicalPlan(1024 * 64); executable_query_plan->Open(); while (executable_query_plan->Next(0)) ; executable_query_plan->Close(); // ResultSet* result_set = executable_query_plan->getResultSet(); executable_query_plan->~PhysicalOperatorBase(); root->~LogicalOperator(); cout << "index building finished!\n"; }
static void test_scan_filter_performance(int value) { unsigned long long int start = curtick(); TableDescriptor* table = Catalog::getInstance()->getTable("cj"); LogicalOperator* cj_scan = new LogicalScan(table->getProjectoin(0)); LogicalFilter::Condition filter_condition_1; filter_condition_1.add(table->getAttribute(3), AttributeComparator::GEQ, std::string("10107")); filter_condition_1.add(table->getAttribute(3), AttributeComparator::L, (void*)&value); LogicalOperator* filter_1 = new LogicalFilter(filter_condition_1, cj_scan); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot( collector_node_id, filter_1, LogicalQueryPlanRoot::PERFORMANCE); PerformanceMonitor* executable_query_plan = (PerformanceMonitor*)root->GetPhysicalPlan(1024 * 64); // executable_query_plan->print(); executable_query_plan->Open(); while (executable_query_plan->Next(0)) ; executable_query_plan->Close(); // ResultSet *result_set=executable_query_plan->getResultSet(); const unsigned long int number_of_tuples = executable_query_plan->GetNumberOfTuples(); printf("execution time: %4.4f seconds.\n", getSecond(start)); if (!print_test_name_result(number_of_tuples == 26820, "Low selectivity filter")) { printf("\tExpected:26695 actual: %d\n", number_of_tuples); } // result_set->~ResultSet(); delete executable_query_plan; root->~LogicalOperator(); }
static void query_select_aggregation() { /* * select sum(a+1)+count(a),b * from T * group by b * * notation: p a p s * */ unsigned long long int start = curtick(); TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("LINEITEM"); //===========================scan=========================== LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); //==========================project========================= vector<vector<ExpressionItem> > expr_list1; vector<ExpressionItem> expr1; vector<ExpressionItem> expr2; vector<ExpressionItem> expr3; vector<ExpressionItem> expr4; vector<ExpressionItem> expr5; vector<ExpressionItem> expr6; vector<ExpressionItem> expr7; vector<ExpressionItem> expr8; vector<ExpressionItem> expr9; vector<ExpressionItem> expr10; vector<ExpressionItem> expr11; vector<ExpressionItem> expr12; vector<ExpressionItem> expr13; vector<ExpressionItem> expr14; vector<ExpressionItem> expr15; vector<ExpressionItem> expr16; vector<ExpressionItem> expr17; ExpressionItem ei1; ExpressionItem ei1_1; ExpressionItem ei1_2; ExpressionItem ei1_3; ExpressionItem ei1_4; ExpressionItem ei1_5; ExpressionItem ei1_6; ExpressionItem ei1_7; ExpressionItem ei1_8; ExpressionItem ei1_9; ExpressionItem ei2; ExpressionItem ei3; ExpressionItem ei4; ExpressionItem ei5; ExpressionItem ei6; ExpressionItem ei7; ExpressionItem ei8; ExpressionItem ei9; ExpressionItem ei10; ExpressionItem ei11; ExpressionItem ei12; ExpressionItem ei13; ExpressionItem ei14; ExpressionItem ei15; ExpressionItem ei16; ExpressionItem ei17; ei1_1.setVariable("LINEITEM", "L_EXTENDEDPRICE"); ei1_2.setIntValue("1"); ei1_3.setVariable("LINEITEM", "L_DISCOUNT"); ei1_4.setOperator("-"); ei1_5.setOperator("*"); ei1_6.setIntValue("1"); ei1_7.setVariable("LINEITEM", "L_TEX"); ei1_8.setOperator("+"); ei1_9.setOperator("*"); ei1.setVariable("LINEITEM", "row_id"); ei2.setVariable("LINEITEM", "L_ORDERKEY"); ei3.setVariable("LINEITEM", "L_PARTKEY"); ei4.setVariable("LINEITEM", "L_SUPPKEY"); ei5.setVariable("LINEITEM", "L_LINENUMBER"); ei6.setVariable("LINEITEM", "L_QUANTITY"); ei7.setVariable("LINEITEM", "L_EXTENDEDPRICE"); ei8.setVariable("LINEITEM", "L_DISCOUNT"); ei9.setVariable("LINEITEM", "L_TEX"); ei10.setVariable("LINEITEM", "L_RETURNFLAG"); // ei10.size=1; ei11.setVariable("LINEITEM", "L_LINESTATUS"); // ei11.size=1; ei12.setVariable("LINEITEM", "L_SHIPDATE"); ei13.setVariable("LINEITEM", "L_COMMITDATE"); ei14.setVariable("LINEITEM", "L_RECEIPTDATE"); ei15.setVariable("LINEITEM", "L_SHIPINSTRUCT"); // ei15.size=25; ei16.setVariable("LINEITEM", "L_SHIPMODE"); // ei16.size=10; ei17.setVariable("LINEITEM", "L_COMMENT"); // ei17.size=44; expr1.push_back(ei1_1); expr1.push_back(ei1_2); expr1.push_back(ei1_3); expr1.push_back(ei1_4); expr1.push_back(ei1_5); // expr1.push_back(ei1_6); // expr1.push_back(ei1_7); // expr1.push_back(ei1_8); // expr1.push_back(ei1_9); // expr1.push_back(ei1); expr2.push_back(ei1_1); expr2.push_back(ei1_2); expr2.push_back(ei1_3); expr2.push_back(ei1_4); expr2.push_back(ei1_5); expr2.push_back(ei1_6); expr2.push_back(ei1_7); expr2.push_back(ei1_8); expr2.push_back(ei1_9); expr3.push_back(ei1_2); expr3.push_back(ei1_3); expr3.push_back(ei1_4); // expr3.push_back(ei3); expr4.push_back(ei4); expr5.push_back(ei5); expr6.push_back(ei6); expr7.push_back(ei7); expr8.push_back(ei8); expr9.push_back(ei9); expr10.push_back(ei10); expr11.push_back(ei11); expr12.push_back(ei12); expr13.push_back(ei13); expr14.push_back(ei14); expr15.push_back(ei15); expr16.push_back(ei16); expr17.push_back(ei17); expr_list1.push_back(expr10); expr_list1.push_back(expr11); expr_list1.push_back(expr6); expr_list1.push_back(expr7); expr_list1.push_back(expr1); expr_list1.push_back(expr2); expr_list1.push_back(expr8); expr_list1.push_back(expr3); expr_list1.push_back(expr10); expr_list1.push_back(expr11); // expr_list1.push_back(expr3); // expr_list1.push_back(expr4); // expr_list1.push_back(expr5); // expr_list1.push_back(expr8); // expr_list1.push_back(expr9); // expr_list1.push_back(expr12); // expr_list1.push_back(expr13); // expr_list1.push_back(expr14); // expr_list1.push_back(expr15); // expr_list1.push_back(expr16); // expr_list1.push_back(expr17); LogicalOperator* project1 = new LogicalProject(scan, expr_list1); //========================aggregation======================= std::vector<Attribute> group_by_attributes; group_by_attributes.push_back(table->getAttribute("L_RETURNFLAG")); group_by_attributes.push_back(table->getAttribute("L_LINESTATUS")); std::vector<Attribute> aggregation_attributes; aggregation_attributes.push_back(table->getAttribute("L_QUANTITY")); aggregation_attributes.push_back(table->getAttribute("L_EXTENDEDPRICE")); aggregation_attributes.push_back(table->getAttribute("L_DISCOUNT")); aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY)); std::vector<BlockStreamAggregationIterator::State::aggregation> aggregation_function; aggregation_function.push_back(BlockStreamAggregationIterator::State::sum); aggregation_function.push_back(BlockStreamAggregationIterator::State::sum); aggregation_function.push_back(BlockStreamAggregationIterator::State::sum); aggregation_function.push_back(BlockStreamAggregationIterator::State::count); LogicalOperator* aggregation = new LogicalAggregation(group_by_attributes, aggregation_attributes, aggregation_function, project1); //==========================project========================= vector<vector<ExpressionItem> > expr_list2; LogicalOperator* project2 = new LogicalProject(aggregation, expr_list2); //===========================root=========================== LogicalOperator* root = new LogicalQueryPlanRoot(0, project1, LogicalQueryPlanRoot::PERFORMANCE); cout << "performance is ok!" << endl; PhysicalOperatorBase* physical_iterator_tree = root->GetPhysicalPlan(64 * 1024); // physical_iterator_tree->print(); physical_iterator_tree->Open(); while (physical_iterator_tree->Next(0)) ; physical_iterator_tree->Close(); printf("Q1: execution time: %4.4f second.\n", getSecond(start)); }
void Analyzer::analyse(const AttributeID &attrID) { Catalog *catalog = Catalog::getInstance(); TableDescriptor* table = catalog->getTable(attrID.table_id); ProjectionDescriptor * projection = NULL; unsigned pidSize = table->getNumberOfProjection(); const Attribute attr = table->getAttribute(attrID.offset); for (unsigned i = 0; i < pidSize; ++i) { if (table->getProjectoin(i)->hasAttribute(attr)) { projection = table->getProjectoin(i); break; } } std::vector<Attribute> group_by_attributes; std::vector<Attribute> aggregation_attributes; group_by_attributes.push_back(attr); aggregation_attributes.push_back(attr); std::vector<BlockStreamAggregationIterator::State::aggregation> aggregation_function; aggregation_function.push_back( BlockStreamAggregationIterator::State::count); LogicalOperator* sb_payload_scan = new LogicalScan(projection); LogicalOperator* aggregation = new Aggregation(group_by_attributes, aggregation_attributes, aggregation_function, sb_payload_scan); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot(collector_node_id, aggregation, LogicalQueryPlanRoot::RESULTCOLLECTOR); BlockStreamIteratorBase* collector = root->getIteratorTree( 1024 * 64 - sizeof(unsigned)); collector->open(); collector->next(0); collector->close(); ResultSet* resultset = collector->getResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase* block; void* tuple; BlockStreamBase::BlockStreamTraverseIterator *block_it; unsigned long valueCount = resultset->getNumberOftuples(); unsigned long tupleCount = 0; TuplePtr *list = new TuplePtr[valueCount]; unsigned long i = 0; while (block = (BlockStreamBase*) it.atomicNextBlock()) { block_it = block->createIterator(); while (tuple = block_it->nextTuple()) { list[i++] = tuple; tupleCount += getFrequency(tuple, attr.attrType); } } int magicNumber = 100; StatisticOnTable *stat = new StatisticOnTable(magicNumber); stat->setValueCount(valueCount); stat->setTupleCount(tupleCount); qsort_r(list, valueCount, sizeof(void *), compare, (void *) (attr.attrType->operate)); mcvAnalyse(list, valueCount, attr, (Histogram *) stat); equiDepthAnalyse(list, valueCount, attr, (Histogram *) stat); // StatManager::getInstance()->addStat(attrID, stat); StatManager::getInstance()->getTableStatistic(attrID.table_id); delete list; resultset->destory(); }
/// Get the stringified AutochunkFixer so we can fix up the intervals in execute(). /// @see LogicalSlice::getInspectable() void inspectLogicalOp(LogicalOperator const& lop) override { setControlCookie(lop.getInspectable()); }
static void query_select_fzh() { /* * select sum(a+1)+count(a),b * from T * group by b * * notation: p a p s * */ unsigned long long int start = curtick(); TableDescriptor* table = Environment::getInstance()->getCatalog()->getTable("LINEITEM"); //===========================scan=========================== LogicalOperator* scan = new LogicalScan(table->getProjectoin(0)); //==========================project========================= vector<vector<ExpressionItem> > expr_list1; vector<ExpressionItem> expr1; vector<ExpressionItem> expr2; vector<ExpressionItem> expr3; vector<ExpressionItem> expr4; vector<ExpressionItem> expr5; vector<ExpressionItem> expr6; vector<ExpressionItem> expr7; vector<ExpressionItem> expr8; vector<ExpressionItem> expr9; vector<ExpressionItem> expr10; vector<ExpressionItem> expr11; vector<ExpressionItem> expr12; vector<ExpressionItem> expr13; vector<ExpressionItem> expr14; vector<ExpressionItem> expr15; vector<ExpressionItem> expr16; vector<ExpressionItem> expr17; ExpressionItem ei1; ExpressionItem ei1_1; ExpressionItem ei1_2; ExpressionItem ei1_3; ExpressionItem ei1_4; ExpressionItem ei1_5; ExpressionItem ei1_6; ExpressionItem ei1_7; ExpressionItem ei1_8; ExpressionItem ei1_9; ExpressionItem ei2; ExpressionItem ei3; ExpressionItem ei4; ExpressionItem ei5; ExpressionItem ei6; ExpressionItem ei7; ExpressionItem ei8; ExpressionItem ei9; ExpressionItem ei10; ExpressionItem ei11; ExpressionItem ei12; ExpressionItem ei13; ExpressionItem ei14; ExpressionItem ei15; ExpressionItem ei16; ExpressionItem ei17; ei1_1.setVariable("LINEITEM.row_id"); // ei1_2.setVariable("LINEITEM.L_ORDERKEY"); ei1_2.setIntValue("1"); ei1_3.setOperator("+"); expr1.push_back(ei1_1); expr1.push_back(ei1_2); expr1.push_back(ei1_3); expr_list1.push_back(expr1); LogicalOperator* project1 = new LogicalProject(scan, expr_list1); //========================aggregation======================= std::vector<Attribute> group_by_attributes; group_by_attributes.push_back(table->getAttribute("L_RETURNFLAG")); group_by_attributes.push_back(table->getAttribute("L_LINESTATUS")); std::vector<Attribute> aggregation_attributes; aggregation_attributes.push_back(table->getAttribute("L_QUANTITY")); aggregation_attributes.push_back(table->getAttribute("L_EXTENDEDPRICE")); aggregation_attributes.push_back(table->getAttribute("L_DISCOUNT")); aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY)); std::vector<PhysicalAggregation::State::Aggregation> aggregation_function; aggregation_function.push_back(PhysicalAggregation::State::kSum); aggregation_function.push_back(PhysicalAggregation::State::kSum); aggregation_function.push_back(PhysicalAggregation::State::kSum); aggregation_function.push_back(PhysicalAggregation::State::kCount); LogicalOperator* aggregation = new LogicalAggregation(group_by_attributes, aggregation_attributes, aggregation_function, project1); //==========================project========================= vector<vector<ExpressionItem> > expr_list2; ExpressionItem ei21_1; ei21_1.setVariable("LINEITEM.row_id+1"); vector<ExpressionItem> expr21; expr21.push_back(ei21_1); expr_list2.push_back(expr21); LogicalOperator* project2 = new LogicalProject(project1, expr_list2); //===========================root=========================== LogicalOperator* root = new LogicalQueryPlanRoot(0, project2, LogicalQueryPlanRoot::PRINT); cout << "performance is ok!" << endl; PhysicalOperatorBase* physical_iterator_tree = root->GetPhysicalPlan(64 * 1024); // physical_iterator_tree->print(); physical_iterator_tree->Open(); while (physical_iterator_tree->Next(0)) ; physical_iterator_tree->Close(); printf("Q1: execution time: %4.4f second.\n", getSecond(start)); }
static LogicalOperator *where_from2logicalplan( Node * parsetree) //实现where_from_parsetree(即将where转换到from_list后的)到logicalplan的转换 { if (parsetree == NULL) { return NULL; } switch (parsetree->type) { case t_table: // table节点获得scan 和在该节点上condition的filter { Table *node = (Table *)parsetree; LogicalOperator *tablescan; if (node->issubquery == 0) { tablescan = new LogicalScan(Environment::getInstance() ->getCatalog() ->getTable(std::string(node->tablename)) ->getProjectoin(0)); // todo // // change for selecting best projection // tablescan=new // LogicalScan(Environment::getInstance()->getCatalog()->getTable(std::string(node->tablename))->get_table_id());// } else // need to modify the output_schema_attrname from the subquery to // the form of subquery's alias.attrname { tablescan = parsetree2logicalplan(node->subquery); vector<Attribute> output_attribute = tablescan->GetPlanContext().attribute_list_; vector<QNode *> exprTree; string subquery_alias = string(node->astablename); for (int i = 0; i < output_attribute.size(); i++) { string attrname = output_attribute[i].attrName; int pos; for (pos = 0; pos < attrname.size() && attrname[pos] != '.'; pos++) ; if (pos < attrname.size()) { attrname = attrname.substr(pos + 1, attrname.size() - pos - 1); } exprTree.push_back( new QColcumns(subquery_alias.c_str(), attrname.c_str(), output_attribute[i].attrType->type, string(subquery_alias + "." + attrname).c_str())); // cout<<"The "<<i<<" //"<<subquery_alias+"."+attrname<<endl; } tablescan = new LogicalProject(tablescan, exprTree); } Expr_list_header *whcdn = (Expr_list_header *)node->whcdn; if (whcdn->header != NULL) { assert(tablescan != NULL); Node *p; bool hasin = false; vector<QNode *> v_qual; for (p = whcdn->header; p != NULL; p = ((Expr_list *)p)->next) { QNode *qual = transformqual((Node *)((Expr_list *)p)->data, tablescan); v_qual.push_back(qual); } LogicalOperator *filter = new LogicalFilter(tablescan, v_qual); if (hasin == true) { for (p = whcdn->header; p != NULL; p = ((Expr_list *)p)->next) { filter = solve_insubquery(((Expr_list *)p)->data, filter); } } return filter; } else { return tablescan; } } break; case t_from_list: //由from_list递归进入args/next,并获得在其上的equaljoin { From_list *node = (From_list *)parsetree; LogicalOperator *filter_1 = where_from2logicalplan(node->args); LogicalOperator *filter_2 = where_from2logicalplan(node->next); // maybe NULL LogicalOperator *lopfrom = NULL; if (filter_2 == NULL) // a join b on c where a.a>0; { Expr_list_header *whcdn = (Expr_list_header *)node->whcdn; if (whcdn->header != NULL) { Node *p; vector<QNode *> v_qual; for ( p = whcdn->header; p != NULL; p = ((Expr_list *) p)->next) //应该根据getdataflow的信息确定joinpair跟filter1/2是否一致 { QNode *qual = transformqual((Node *)((Expr_list *)p)->data, filter_1); v_qual.push_back(qual); } if (v_qual.size() > 0) { lopfrom = new LogicalFilter(filter_1, v_qual); } else { lopfrom = filter_1; } return lopfrom; } else { return filter_1; } } Expr_list_header *whcdn = (Expr_list_header *)node->whcdn; if (whcdn->header != NULL) { vector<LogicalEqualJoin::JoinPair> join_pair_list; Node *p; vector<QNode *> v_qual; vector<Node *> raw_qual; for (p = whcdn->header; p != NULL; p = ((Expr_list *)p)->next) { int fg = getjoinpairlist((Node *)((Expr_list *)p)->data, join_pair_list, filter_1, filter_2); if (fg == 0) // get raw qualification from whcdn { raw_qual.push_back((Node *)((Expr_list *)p)->data); } } if (join_pair_list.size() > 0) { lopfrom = new LogicalEqualJoin(join_pair_list, filter_1, filter_2); } else // other join { lopfrom = new LogicalCrossJoin(filter_1, filter_2); } for (int i = 0; i < raw_qual.size(); i++) { v_qual.push_back(transformqual(raw_qual[i], lopfrom)); } if (v_qual.size() > 0) { lopfrom = new LogicalFilter(lopfrom, v_qual); } return lopfrom; } else // other to crossjoin { lopfrom = new LogicalCrossJoin(filter_1, filter_2); return lopfrom; } } break; case t_join: { Join *node = (Join *)parsetree; LogicalOperator *filter_1 = where_from2logicalplan(node->lnext); LogicalOperator *filter_2 = where_from2logicalplan(node->rnext); if (node->condition != NULL) { vector<LogicalEqualJoin::JoinPair> join_pair_list; Node *p; vector<QNode *> v_qual; vector<Node *> raw_qual; for (p = node->condition; p != NULL; p = ((Expr_list *)p)->next) { int fg = getjoinpairlist((Node *)((Expr_list *)p)->data, join_pair_list, filter_1, filter_2); if (fg == 0) // get raw qualification from whcdn { raw_qual.push_back((Node *)((Expr_list *)p)->data); } } LogicalOperator *join; if (join_pair_list.size() > 0) { join = new LogicalEqualJoin(join_pair_list, filter_1, filter_2); } else // other join { join = new LogicalCrossJoin(filter_1, filter_2); } for (int i = 0; i < raw_qual.size(); i++) { v_qual.push_back(transformqual(raw_qual[i], join)); } if (v_qual.size() > 0) { join = new LogicalFilter(join, v_qual); } return join; } else // other to crossjoin { LogicalOperator *join = new LogicalCrossJoin(filter_1, filter_2); return join; } } break; default: { SQLParse_elog("parsetree2logicalplan type error"); return NULL; } } return NULL; }
Histogram* Analyzer::computeHistogram(const AttributeID& attr_id, const unsigned nbuckets) { printf("Compute for histogram for attribute %s (%d buckets)\n", Catalog::getInstance() ->getTable(attr_id.table_id) ->getAttribute(attr_id.offset) .attrName.c_str(), nbuckets); Catalog* catalog = Catalog::getInstance(); TableDescriptor* table = catalog->getTable(attr_id.table_id); ProjectionDescriptor* projection = NULL; unsigned pidSize = table->getNumberOfProjection(); const Attribute attr = table->getAttribute(attr_id.offset); for (unsigned i = 0; i < pidSize; ++i) { if (table->getProjectoin(i)->hasAttribute(attr)) { projection = table->getProjectoin(i); break; } } std::vector<Attribute> group_by_attributes; std::vector<Attribute> aggregation_attributes; group_by_attributes.push_back(attr); aggregation_attributes.push_back(attr); std::vector<PhysicalAggregation::State::Aggregation> aggregation_function; aggregation_function.push_back(PhysicalAggregation::State::kCount); LogicalOperator* sb_payload_scan = new LogicalScan(projection); LogicalOperator* aggregation = new LogicalAggregation(group_by_attributes, aggregation_attributes, aggregation_function, sb_payload_scan); const NodeID collector_node_id = 0; LogicalOperator* root = new LogicalQueryPlanRoot( collector_node_id, aggregation, LogicalQueryPlanRoot::kResultCollector); PhysicalOperatorBase* collector = root->GetPhysicalPlan(1024 * 64 - sizeof(unsigned)); collector->Open(); collector->Next(0); collector->Close(); ResultSet* resultset = collector->GetResultSet(); ResultSet::Iterator it = resultset->createIterator(); BlockStreamBase* block; void* tuple; BlockStreamBase::BlockStreamTraverseIterator* block_it; unsigned long valueCount = resultset->getNumberOftuples(); unsigned long tupleCount = 0; TuplePtr* list = new TuplePtr[valueCount]; unsigned long i = 0; while (block = (BlockStreamBase*)it.atomicNextBlock()) { block_it = block->createIterator(); while (tuple = block_it->nextTuple()) { list[i++] = tuple; tupleCount += getFrequency(tuple, attr.attrType); } } Histogram* stat = new Histogram(nbuckets); stat->setValueCount(valueCount); stat->setTupleCount(tupleCount); qsort_r(list, valueCount, sizeof(void*), compare, (void*)(attr.attrType->operate)); mcvAnalyse(list, valueCount, attr, (Histogram*)stat); equiDepthAnalyse(list, valueCount, attr, (Histogram*)stat); // StatManager::getInstance()->addStat(attrID, stat); // StatManager::getInstance()->getTableStatistic(attrID.table_id) delete list; resultset->destory(); return stat; }