void Q1() { string sql = "\ select\ c_custkey,\ c_name,\ sum(l_extendedprice * (1 - l_discount)) as revenue,\ c_acctbal,\ n_name,\ c_address,\ c_phone,\ c_comment\ from\ customer,\ orders,\ lineitem,\ nation\ where\ c_custkey = o_custkey\ and l_orderkey = o_orderkey\ and o_orderdate >= date ':1'\ and o_orderdate < date ':1' + interval '3' month\ and l_returnflag = 'R'\ and c_nationkey = n_nationkey\ group by\ c_custkey,\ c_name,\ c_acctbal,\ c_phone,\ n_name,\ c_address,\ c_comment\ order by\ revenue desc;"; CalpontSelectExecutionPlan csep; // Returned columns CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnList; SimpleColumn *c1 = new SimpleColumn("tpch.customer.l_returnflag"); returnedColumnList.push_back(c1); SimpleColumn *c2 = new SimpleColumn("tpch.customer.c_name"); returnedColumnList.push_back(c2); ArithmeticColumn *c3 = new ArithmeticColumn("sum(tpch.lineitem.l_extendedprice*(1-tpch.lineitem.l_discount))"); c3->alias("revenue"); returnedColumnList.push_back(c3); SimpleColumn *c4 = new SimpleColumn("tpch.customer.c_acctbal"); returnedColumnList.push_back(c4); SimpleColumn *c5 = new SimpleColumn("tpch.nation.n_name"); returnedColumnList.push_back(c5); SimpleColumn *c6 = new SimpleColumn("tpch.customer.c_address"); returnedColumnList.push_back(c6); SimpleColumn *c7 = new SimpleColumn("tpch.customer.c_phone"); returnedColumnList.push_back(c7); SimpleColumn *c8 = new SimpleColumn("tpch.costomer.c_comment"); returnedColumnList.push_back(c8); csep.returnedCols(returnedColumnList); // Filters CalpontSelectExecutionPlan::FilterTokenList filterTokenList; SimpleFilter *f1 = new SimpleFilter (new Operator("="), new SimpleColumn("tpch.customer.c_custkey"), new SimpleColumn("tpch.orders.o_custkey")); filterTokenList.push_back(f1); filterTokenList.push_back( new Operator ("and")); SimpleFilter *f2 = new SimpleFilter (new Operator("="), new SimpleColumn("tpch.lineitem.l_orderkey"), new SimpleColumn("tpch.orders.o_orderkey")); filterTokenList.push_back(f2); filterTokenList.push_back( new Operator ("and")); SimpleFilter *f3 = new SimpleFilter (new Operator(">="), new SimpleColumn("tpch.orders.o_orderdate"), new ArithmeticColumn("date(':1')")); filterTokenList.push_back(f3); filterTokenList.push_back( new Operator ("and")); SimpleFilter *f4 = new SimpleFilter (new Operator("<"), new SimpleColumn("tpch.orders.o_orderdate"), new ArithmeticColumn("date(':1') + interval ('3', month)")); filterTokenList.push_back(f4); filterTokenList.push_back( new Operator ("and")); SimpleFilter *f5 = new SimpleFilter (new Operator("="), new SimpleColumn("tpch.lineitem.l_returnflag"), new ConstantColumn("R")); filterTokenList.push_back(f5); filterTokenList.push_back( new Operator ("and")); SimpleFilter *f6 = new SimpleFilter (new Operator("="), new SimpleColumn("tpch.customer.c_nationkey"), new SimpleColumn("tpch.nation.n_nationkey")); filterTokenList.push_back(f6); csep.filterTokenList(filterTokenList); ParseTree *pt = const_cast<ParseTree*>(csep.filters()); pt->drawTree ("q10.dot"); // Group by CalpontSelectExecutionPlan::GroupByColumnList groupByList; groupByList.push_back(c1->clone()); groupByList.push_back(c2->clone()); groupByList.push_back(c4->clone()); groupByList.push_back(c7->clone()); groupByList.push_back(c5->clone()); groupByList.push_back(c6->clone()); groupByList.push_back(c8->clone()); csep.groupByCols (groupByList); // Order by CalpontSelectExecutionPlan::OrderByColumnList orderByList; ArithmeticColumn *o1 = new ArithmeticColumn(*c3); o1->asc(false); orderByList.push_back(o1); csep.orderByCols(orderByList); cout << csep; }
void derivedTableOptimization(SCSEP& csep) { // @bug5634. replace the unused column with ConstantColumn from derived table column list, // ExeMgr will not project ConstantColumn. Only count for local derived column. // subquery may carry main query derived table list for column reference, those // derived tables are not checked for optimization in this scope. CalpontSelectExecutionPlan::SelectList derivedTbList = csep->derivedTableList(); // @bug6156. Skip horizontal optimization for no table union. bool horizontalOptimization = true; for (uint i = 0; i < derivedTbList.size(); i++) { CalpontSelectExecutionPlan* plan = dynamic_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get()); CalpontSelectExecutionPlan::ReturnedColumnList cols = plan->returnedCols(); vector<CalpontSelectExecutionPlan::ReturnedColumnList> unionColVec; // only do vertical optimization for union all // @bug6134. Also skip the vertical optimization for select distinct // because all columns need to be projected to check the distinctness. bool verticalOptimization = false; if (plan->distinctUnionNum() == 0 && !plan->distinct()) { verticalOptimization = true; for (uint j = 0; j < plan->unionVec().size(); j++) { unionColVec.push_back( dynamic_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->returnedCols()); } } if (plan->tableList().empty()) horizontalOptimization = false; for (uint j = 0; j < plan->unionVec().size(); j++) { if (dynamic_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->tableList().empty()) { horizontalOptimization = false; break; } } if (verticalOptimization) { int64_t val = 1; for (uint i = 0; i < cols.size(); i++) { //if (cols[i]->derivedTable().empty()) if (cols[i]->refCount() == 0) { if (cols[i]->derivedRefCol()) cols[i]->derivedRefCol()->decRefCount(); cols[i].reset(new ConstantColumn(val)); for (uint j = 0; j < unionColVec.size(); j++) unionColVec[j][i].reset(new ConstantColumn(val)); } } // set back plan->returnedCols(cols); for (uint j = 0; j < unionColVec.size(); j++) dynamic_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get())->returnedCols(unionColVec[j]); } } /* * @bug5635. Move filters that only belongs to a derived table to inside the derived table. * 1. parse tree walk to populate derivedTableFilterMap and set null candidate on the tree. * 2. remove the null filters * 3. and the filters of derivedTableFilterMap and append to the WHERE filter of the derived table * * Note: * 1. Subquery filters is ignored because derived table can not be in subquery * 2. While walking tree, whenever a single derive simplefilter is encountered, * this filter is pushed to the corresponding stack * 2. Whenever an OR operator is encountered, all the filter stack of * that OR involving derived table are emptied and null candidate of each * stacked filter needs to be reset (not null) */ ParseTree* pt = csep->filters(); map<string, ParseTree*> derivedTbFilterMap; if (horizontalOptimization && pt) { pt->walk(setDerivedTable); setDerivedFilter(pt, derivedTbFilterMap, derivedTbList); csep->filters(pt); } // AND the filters of individual stack to the derived table filter tree // @todo union filters. // @todo outer join complication map<string, ParseTree*>::iterator mapIt; for (uint i = 0; i < derivedTbList.size(); i++) { CalpontSelectExecutionPlan* plan = dynamic_cast<CalpontSelectExecutionPlan*>(derivedTbList[i].get()); CalpontSelectExecutionPlan::ReturnedColumnList derivedColList = plan->returnedCols(); mapIt = derivedTbFilterMap.find(plan->derivedTbAlias()); if (mapIt != derivedTbFilterMap.end()) { // replace all derived column of this filter with real column from // derived table projection list. ParseTree* mainFilter = new ParseTree(); mainFilter->copyTree(*(mapIt->second)); replaceRefCol(mainFilter, derivedColList); ParseTree* derivedFilter = plan->filters(); if (derivedFilter) { LogicOperator* op = new LogicOperator("and"); ParseTree* filter = new ParseTree(op); filter->left(derivedFilter); filter->right(mainFilter); plan->filters(filter); } else { plan->filters(mainFilter); } // union filter handling for (uint j = 0; j < plan->unionVec().size(); j++) { CalpontSelectExecutionPlan* unionPlan = dynamic_cast<CalpontSelectExecutionPlan*>(plan->unionVec()[j].get()); CalpontSelectExecutionPlan::ReturnedColumnList unionColList = unionPlan->returnedCols(); ParseTree* mainFilterForUnion = new ParseTree(); mainFilterForUnion->copyTree(*(mapIt->second)); replaceRefCol(mainFilterForUnion, unionColList); ParseTree* unionFilter = unionPlan->filters(); if (unionFilter) { LogicOperator* op = new LogicOperator("and"); ParseTree* filter = new ParseTree(op); filter->left(unionFilter); filter->right(mainFilterForUnion); unionPlan->filters(filter); } else { unionPlan->filters(mainFilterForUnion); } } } } // clean derivedTbFilterMap because all the filters are copied for ( mapIt = derivedTbFilterMap.begin(); mapIt != derivedTbFilterMap.end(); ++mapIt ) delete (*mapIt).second; // recursively process the nested derived table for (uint i = 0; i < csep->subSelectList().size(); i++) { SCSEP subselect(boost::dynamic_pointer_cast<CalpontSelectExecutionPlan>(csep->subSelectList()[i])); derivedTableOptimization(subselect); } }
void Q1() { string sql = "\ select\ supp_nation,\ cust_nation,\ l_year,\ sum(volume) as revenue\ from\ (\ select\ n1.n_name as supp_nation,\ n2.n_name as cust_nation,\ extract(year from l_shipdate) as l_year,\ l_extendedprice * (1 - l_discount) as volume\ from\ supplier,\ lineitem,\ orders,\ customer,\ nation n1,\ nation n2\ where\ s_suppkey = l_suppkey\ and o_orderkey = l_orderkey\ and c_custkey = o_custkey\ and s_nationkey = n1.n_nationkey\ and c_nationkey = n2.n_nationkey\ and (\ (n1.n_name = ':1' and n2.n_name = ':2')\ or (n1.n_name = ':2' and n2.n_name = ':1')\ )\ and l_shipdate between date '1995-01-01' and date '1996-12-31'\ ) as shipping\ group by\ supp_nation,\ cust_nation,\ l_year\ order by\ supp_nation,\ cust_nation,\ l_year;"; CalpontSelectExecutionPlan csep; // Returned columns CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnList; // these columns are from the temp table of FROM clause. // I hereby give schema name "calpont", table name "FROMTABLE", // and alias name "shipping" SimpleColumn *c1 = new SimpleColumn("calpont.FROMTABLE.supp_nation"); c1->tableAlias ("shipping"); returnedColumnList.push_back(c1); SimpleColumn *c2 = new SimpleColumn("calpont.FROMTABLE.cust_nation"); c2->tableAlias ("shipping"); returnedColumnList.push_back(c2); SimpleColumn *c3 = new SimpleColumn("tpch.lineitem.l_year"); returnedColumnList.push_back(c3); ArithmeticColumn *c4 = new ArithmeticColumn("sum(volumn)"); c4->alias("revenue"); returnedColumnList.push_back(c4); csep.returnedCols(returnedColumnList); // from subselect CalpontSelectExecutionPlan *subsep = new CalpontSelectExecutionPlan(CalpontSelectExecutionPlan::FROM); // subselect returned columns CalpontSelectExecutionPlan::ReturnedColumnList subReturnedColList; SimpleColumn *sc1 = new SimpleColumn("tpch.nation.n_name"); sc1->alias ("supp_nation"); sc1->tableAlias ("n1"); subReturnedColList.push_back(sc1); SimpleColumn *sc2 = new SimpleColumn("tpch.nation.n_name"); sc1->alias ("cust_nation"); sc1->tableAlias ("n2"); subReturnedColList.push_back(sc2); ArithmeticColumn *sc3 = new ArithmeticColumn("extract(year from tpch.lineitem.l_shipdate)"); sc3->alias("l_year"); subReturnedColList.push_back(sc3); ArithmeticColumn *sc4 = new ArithmeticColumn("tpch.lineitem.l_extendeprice * (1-tpch.lineitem.l_discount)"); sc3->alias("volume"); subReturnedColList.push_back(sc4); subsep->returnedCols(subReturnedColList); // subselect filters CalpontSelectExecutionPlan::FilterTokenList subFilterTokenList; SimpleFilter *sf1 = new SimpleFilter (new Operator("="), new SimpleColumn("tpch.supplier.s_suppkey"), new SimpleColumn("tpch.lineitem.l_suppkey")); subFilterTokenList.push_back(sf1); subFilterTokenList.push_back(new Operator("and")); SimpleFilter *sf2 = new SimpleFilter (new Operator("="), new SimpleColumn("tpch.orders.o_orderkey"), new SimpleColumn("tpch.lineitem.l_orderkey")); subFilterTokenList.push_back(sf2); subFilterTokenList.push_back(new Operator("and")); SimpleFilter *sf3 = new SimpleFilter (new Operator("="), new SimpleColumn("tpch.customer.c_custkey"), new SimpleColumn("tpch.orders.o_custkey")); subFilterTokenList.push_back(sf3); subFilterTokenList.push_back(new Operator("and")); SimpleColumn *n1 = new SimpleColumn ("tpch.nation.n_nationkey"); n1->tableAlias("n1"); SimpleFilter *sf4 = new SimpleFilter (new Operator("="), new SimpleColumn("tpch.supplier.s_nationkey"), n1); subFilterTokenList.push_back(sf4); subFilterTokenList.push_back(new Operator("and")); SimpleColumn *n2 = new SimpleColumn ("tpch.nation.n_nationkey"); n2->tableAlias("n2"); SimpleFilter *sf5 = new SimpleFilter (new Operator("="), new SimpleColumn("tpch.customer.c_nationkey"), n2); subFilterTokenList.push_back(sf5); subFilterTokenList.push_back(new Operator("and")); // ((n1.n_name = ':1' and n2.n_name = ':2') // or (n1.n_name = ':2' and n2.n_name = ':1')) subFilterTokenList.push_back(new Operator("(")); subFilterTokenList.push_back(new Operator("(")); SimpleColumn *n1_name = new SimpleColumn("tpch.nation.n_name"); n1_name->tableAlias ("n1"); SimpleFilter *sf6 = new SimpleFilter ( new Operator("="), n1_name, new ConstantColumn (":1")); subFilterTokenList.push_back(sf6); subFilterTokenList.push_back(new Operator("and")); SimpleColumn *n2_name = new SimpleColumn("tpch.nation.n_name"); n1_name->tableAlias ("n2"); SimpleFilter *sf7 = new SimpleFilter ( new Operator("="), n2_name, new ConstantColumn (":2")); subFilterTokenList.push_back(sf7); subFilterTokenList.push_back(new Operator (")")); subFilterTokenList.push_back(new Operator("or")); subFilterTokenList.push_back(new Operator("(")); SimpleFilter *sf8 = new SimpleFilter ( new Operator("="), new SimpleColumn(*n1_name), new ConstantColumn (":2")); subFilterTokenList.push_back(sf8); subFilterTokenList.push_back(new Operator("and")); SimpleFilter *sf9 = new SimpleFilter ( new Operator("="), new SimpleColumn(*n2_name), new ConstantColumn (":1")); subFilterTokenList.push_back(sf9); subFilterTokenList.push_back(new Operator (")")); subFilterTokenList.push_back(new Operator (")")); subFilterTokenList.push_back(new Operator("and")); SimpleFilter *sf10 = new SimpleFilter (new Operator (">="), new SimpleColumn ("tpch.lineitem.l_shipdate"), new ConstantColumn ("1995-01-01")); subFilterTokenList.push_back(sf10); subFilterTokenList.push_back(new Operator("and")); SimpleFilter *sf11 = new SimpleFilter (new Operator ("<="), new SimpleColumn ("tpch.lineitem.l_shipdate"), new ConstantColumn ("1995-01-06")); subFilterTokenList.push_back(sf11); subsep->filterTokenList(subFilterTokenList); // end of subselect in FROM. push FROM subselect to selectList // NOTE: only FROM subselect needs to be pushed into selectList. // Subselects in WHERE or HAVING clause are in where or having // filter parse tree. It may make more sense to change the member // fSelectList of CSEP class to fFromSubSelect (type CSEP*) CalpontSelectExecutionPlan::SelectList fromSubSelectList; fromSubSelectList.push_back(subsep); csep.subSelects(fromSubSelectList); ParseTree* pt = const_cast<ParseTree*>(subsep->filters()); pt->drawTree("q7.dot"); // Group by CalpontSelectExecutionPlan::GroupByColumnList groupByList; SimpleColumn *g1 = new SimpleColumn (*c1); groupByList.push_back (g1); SimpleColumn *g2 = new SimpleColumn (*c2); groupByList.push_back (g2); SimpleColumn *g3 = new SimpleColumn (*c3); groupByList.push_back (g3); csep.groupByCols(groupByList); // Order by CalpontSelectExecutionPlan::OrderByColumnList orderByList; SimpleColumn *o1 = new SimpleColumn(*c1); orderByList.push_back(o1); SimpleColumn *o2 = new SimpleColumn(*c2); orderByList.push_back(o2); SimpleColumn *o3 = new SimpleColumn(*c3); orderByList.push_back(o3); csep.orderByCols(orderByList); cout << csep; }
void selectExecutionPlan_1() { cout << "SQL: select r_regionkey from region, nation where n_regionkey = r_regionkey and n_regionkey = 2;" << endl; CalpontSelectExecutionPlan csep; CalpontSelectExecutionPlan::ReturnedColumnList colList; ParseTree* filterList; // returned columns SimpleColumn r_regionkey("tpch.region.r_regionkey"); SimpleColumn n_regionkey("tpch.nation.n_regionkey"); SimpleColumn p_partkey("tpch.part.p_partkey"); SimpleColumn n_name("tpch.nation.n_name"); SimpleColumn c_custkey("tpch.customer.c_custkey"); colList.push_back(new SimpleColumn(r_regionkey)); // filters CalpontSelectExecutionPlan::Parser parser; std::vector<Token> tokens; //tokens.push_back(Token(new Operator("("))); //tokens.push_back(Token(new Operator(")"))); tokens.push_back(Token(new SimpleFilter(new Operator("="), new SimpleColumn(r_regionkey), new SimpleColumn(n_regionkey)))); tokens.push_back(Token(new Operator("and"))); tokens.push_back(Token(new SimpleFilter(new Operator("="), new SimpleColumn(r_regionkey), new SimpleColumn(c_custkey)))); tokens.push_back(Token(new Operator("and"))); tokens.push_back(Token(new Operator("("))); tokens.push_back(Token(new SimpleFilter(new Operator("="), new SimpleColumn(n_regionkey), new ConstantColumn("779")))); tokens.push_back(Token(new Operator("or"))); tokens.push_back(Token(new SimpleFilter(new Operator("!="), new SimpleColumn(n_name), new ConstantColumn("'ASIA'")))); tokens.push_back(Token(new Operator(")"))); tokens.push_back(Token(new Operator ("and"))); tokens.push_back(Token(new Operator("("))); tokens.push_back(Token(new SimpleFilter(new Operator("<"), new SimpleColumn(n_regionkey), new ConstantColumn("77")))); tokens.push_back(Token(new Operator("or"))); tokens.push_back(Token(new SimpleFilter(new Operator(">"), new SimpleColumn(p_partkey), new ConstantColumn("7007")))); tokens.push_back(Token(new Operator(")"))); filterList = parser.parse(tokens.begin(), tokens.end()); // draw filterList tree filterList->drawTree("selectExecutionPlan_1.dot"); // calpont execution plan csep.returnedCols (colList); csep.filters (filterList); cout << "\nCalpont Execution Plan:" << endl; cout << csep << endl; cout << " --- end of test 1 ---" << endl; filterList->walk(walkfnString); }
void Q1() { string sql = "\ select\ l_orderkey,\ sum(l_extendedprice * (1 - l_discount)) as revenue,\ o_orderdate,\ o_shippriority\ from\ customer,\ orders,\ lineitem\ where\ c_mktsegment = ':1'\ and c_custkey = o_custkey\ and l_orderkey = o_orderkey\ and o_orderdate < date ':2'\ and l_shipdate > date ':2'\ group by\ l_orderkey,\ o_orderdate,\ o_shippriority\ order by\ revenue desc,\ o_orderdate;"; CalpontSelectExecutionPlan csep; // Returned columns CalpontSelectExecutionPlan::ReturnedColumnList returnedColumnList; SimpleColumn *c1 = new SimpleColumn("tpch.lineitem.l_orderkey"); returnedColumnList.push_back(c1); ArithmeticColumn *c2 = new ArithmeticColumn ("sum(tpch.lineitem.l_extendedprice * (1 - tpch.lineitem.l_discount))"); c2->alias("revenue"); returnedColumnList.push_back(c2); SimpleColumn *c3 = new SimpleColumn("tpch.orders.o_orderdate"); returnedColumnList.push_back(c3); SimpleColumn *c4 = new SimpleColumn("tpch.orders.o_shippriority"); returnedColumnList.push_back(c4); csep.returnedCols(returnedColumnList); // Filters CalpontSelectExecutionPlan::FilterTokenList filterTokenList; SimpleFilter *f1 = new SimpleFilter (new Operator("="), new SimpleColumn("tpch.customer.c_mktsegment"), new ConstantColumn(":1")); filterTokenList.push_back(f1); filterTokenList.push_back(new Operator("and")); SimpleFilter *f2 = new SimpleFilter (new Operator("="), new SimpleColumn("tpch.customer.c_custkey"), new SimpleColumn("tpch.orders.o_custkey")); filterTokenList.push_back(f2); filterTokenList.push_back(new Operator("and")); SimpleFilter *f3 = new SimpleFilter (new Operator("="), new SimpleColumn("tpch.lineitem.l_orderkey"), new SimpleColumn("tpch.orders.o_orderkey")); filterTokenList.push_back(f3); filterTokenList.push_back(new Operator("and")); SimpleFilter *f4 = new SimpleFilter (new Operator("<"), new SimpleColumn("tpch.orders.o_orderdate"), new ArithmeticColumn("date(':2')")); filterTokenList.push_back(f4); filterTokenList.push_back(new Operator("and")); SimpleFilter *f5 = new SimpleFilter (new Operator(">"), new SimpleColumn("tpch.lineitem.l_shipdate"), new ArithmeticColumn("date(':2')")); filterTokenList.push_back(f5); csep.filterTokenList(filterTokenList); ParseTree* pt = const_cast<ParseTree*>(csep.filters()); pt->drawTree("q3.dot"); // Group by CalpontSelectExecutionPlan::GroupByColumnList groupByList; SimpleColumn *g1 = new SimpleColumn(*c1); groupByList.push_back(g1); SimpleColumn *g2 = new SimpleColumn(*c3); groupByList.push_back(g2); SimpleColumn *g3 = new SimpleColumn(*c4); groupByList.push_back(g3); csep.groupByCols(groupByList); // Order by CalpontSelectExecutionPlan::OrderByColumnList orderByList; //ArithmeticColumn *test = new ArithmeticColumn("a+b"); ArithmeticColumn *o1 = new ArithmeticColumn(*c2); o1->asc(false); orderByList.push_back(o1); SimpleColumn *o2 = new SimpleColumn(*c3); orderByList.push_back(o2); csep.orderByCols(orderByList); cout << csep; }