void makeAntiJoin(const ParseTree* n) { TreeNode* tn = n->data(); SimpleFilter* sf = dynamic_cast<SimpleFilter*>(tn); if (!sf) return; uint64_t lJoinInfo = sf->lhs()->joinInfo(); if (lJoinInfo & JOIN_SEMI) { lJoinInfo &= ~JOIN_SEMI; lJoinInfo |= JOIN_ANTI; if (lJoinInfo & JOIN_NULLMATCH_CANDIDATE) lJoinInfo |= JOIN_NULL_MATCH; sf->lhs()->joinInfo(lJoinInfo); } uint64_t rJoinInfo = sf->rhs()->joinInfo(); if (rJoinInfo & JOIN_SEMI) { rJoinInfo &= ~JOIN_SEMI; rJoinInfo |= JOIN_ANTI; if (rJoinInfo & JOIN_NULLMATCH_CANDIDATE) rJoinInfo |= JOIN_NULL_MATCH; sf->rhs()->joinInfo(rJoinInfo); } }
SimpleFilter* createSimpleFilter ( CalpontSystemCatalog*& csc, const CalpontSystemCatalog::TableColName& tcn, const string& opstr, ConstantColumn* cc ) { SimpleFilter* lsf = new SimpleFilter(); Operator* op = new Operator(); op->data(opstr); CalpontSystemCatalog::ColType ccct; ccct = op->resultType(); ccct.colDataType = cc->resultType().colDataType; op->operationType(ccct); SOP sop(op); lsf->op(sop); CalpontSystemCatalog::OID oid = csc->lookupOID(tcn); CalpontSystemCatalog::ColType ct = csc->colType(oid); SimpleColumn* sc = new SimpleColumn(); sc->schemaName(tcn.schema); sc->tableName(tcn.table); sc->tableAlias(tcn.table); sc->columnName(tcn.column); sc->oid(oid); sc->resultType(ct); sc->alias(tcn.toString()); lsf->lhs(sc); lsf->rhs(cc); return lsf; }
/** * Handle MySQL's plugin functions * This is mostly for handling the null related functions that MySQL adds to the execution plan */ void InSub::handleFunc(gp_walk_info* gwip, Item_func* func) { if (func->functype() == Item_func::TRIG_COND_FUNC || func->functype() == Item_func::COND_OR_FUNC) { // purpose: remove the isnull() function from the parsetree in ptWorkStack. // IDB handles the null semantics in the join operation // trigcond(or_cond) is the only form we recognize for now if (func->argument_count() > 2) { fGwip.fatalParseError = true; fGwip.parseErrorText = "Unsupported item in IN subquery"; return; } Item_cond* cond; if (func->functype() == Item_func::TRIG_COND_FUNC) { Item* item; if (func->arguments()[0]->type() == Item::REF_ITEM) item = (Item_ref*)(func->arguments()[0])->real_item(); else item = func->arguments()[0]; cond = (Item_cond*)(item); } else { cond = (Item_cond*)(func); } if (cond->functype() == Item_func::COND_OR_FUNC) { // (cache=item) case. do nothing. ignore trigcond()? if (cond->argument_list()->elements == 1) return; // (cache=item or isnull(item)) case. remove "or isnull()" if (cond->argument_list()->elements == 2) { // don't know how to deal with this. don't think it's a fatal error either. if (gwip->ptWorkStack.empty()) return; ParseTree* pt = gwip->ptWorkStack.top(); if (!pt->left() || !pt->right()) return; SimpleFilter* sf = dynamic_cast<SimpleFilter*>(pt->left()->data()); //assert (sf && sf->op()->op() == execplan::OP_ISNULL); if (!sf || sf->op()->op() != execplan::OP_ISNULL) return; delete sf; sf = dynamic_cast<SimpleFilter*>(pt->right()->data()); //idbassert(sf && sf->op()->op() == execplan::OP_EQ); if (!sf || sf->op()->op() != execplan::OP_EQ) return; // set NULLMATCH for both operand. It's really a setting for the join. // should only set NULLMATCH when the subtype is NOT_IN. for some IN subquery // with aggregation column, MySQL inefficiently convert to: // (cache=item or item is null) and item is not null, which is equivalent to // cache = item. Do not set NULLMATCH for this case. // Because we don't know IN or NOTIN yet, set candidate bit and switch to NULLMATCH // later in handleNot function. if (sf->lhs()->joinInfo() & JOIN_CORRELATED) sf->lhs()->joinInfo(sf->lhs()->joinInfo() | JOIN_NULLMATCH_CANDIDATE); if (sf->rhs()->joinInfo() & JOIN_CORRELATED) sf->rhs()->joinInfo(sf->rhs()->joinInfo() | JOIN_NULLMATCH_CANDIDATE); pt = pt->right(); gwip->ptWorkStack.pop(); gwip->ptWorkStack.push(pt); } } else if (cond->functype() == Item_func::EQ_FUNC) { // not in (select const ...) if (gwip->ptWorkStack.empty()) return; ParseTree* pt = gwip->ptWorkStack.top(); SimpleFilter* sf = dynamic_cast<SimpleFilter*>(pt->data()); if (!sf || sf->op()->op() != execplan::OP_EQ) return; if (sf->lhs()->joinInfo() & JOIN_CORRELATED) sf->lhs()->joinInfo(sf->lhs()->joinInfo() | JOIN_NULLMATCH_CANDIDATE); if (sf->rhs()->joinInfo() & JOIN_CORRELATED) sf->rhs()->joinInfo(sf->rhs()->joinInfo() | JOIN_NULLMATCH_CANDIDATE); } } }
SimpleFilter::SimpleFilter(const SimpleFilter& rhs) : fOp(rhs.op()), fIndexFlag(rhs.indexFlag()), fJoinFlag(rhs.joinFlag()) { fLhs = rhs.lhs()->clone(); fRhs = rhs.rhs()->clone(); fSimpleColumnList.clear(); fAggColumnList.clear(); fWindowFunctionColumnList.clear(); SimpleColumn *lsc = dynamic_cast<SimpleColumn*>(fLhs); FunctionColumn *lfc = dynamic_cast<FunctionColumn*>(fLhs); ArithmeticColumn *lac = dynamic_cast<ArithmeticColumn*>(fLhs); WindowFunctionColumn *laf = dynamic_cast<WindowFunctionColumn*>(fLhs); AggregateColumn *lagc = dynamic_cast<AggregateColumn*>(fLhs); SimpleColumn *rsc = dynamic_cast<SimpleColumn*>(fRhs); FunctionColumn *rfc = dynamic_cast<FunctionColumn*>(fRhs); ArithmeticColumn *rac = dynamic_cast<ArithmeticColumn*>(fRhs); AggregateColumn *ragc = dynamic_cast<AggregateColumn*>(fRhs); WindowFunctionColumn *raf = dynamic_cast<WindowFunctionColumn*>(fRhs); if (lsc) { fSimpleColumnList.push_back(lsc); } else if (lagc) { fAggColumnList.push_back(lagc); } else if (lfc) { fSimpleColumnList.insert(fSimpleColumnList.end(), lfc->simpleColumnList().begin(), lfc->simpleColumnList().end()); fAggColumnList.insert(fAggColumnList.end(), lfc->aggColumnList().begin(), lfc->aggColumnList().end()); fWindowFunctionColumnList.insert (fWindowFunctionColumnList.end(), lfc->windowfunctionColumnList().begin(), lfc->windowfunctionColumnList().end()); } else if (lac) { fSimpleColumnList.insert(fSimpleColumnList.end(), lac->simpleColumnList().begin(), lac->simpleColumnList().end()); fAggColumnList.insert(fAggColumnList.end(), lac->aggColumnList().begin(), lac->aggColumnList().end()); fWindowFunctionColumnList.insert (fWindowFunctionColumnList.end(), lac->windowfunctionColumnList().begin(), lac->windowfunctionColumnList().end()); } else if (laf) { fWindowFunctionColumnList.push_back(laf); } if (rsc) { fSimpleColumnList.push_back(rsc); } else if (ragc) { fAggColumnList.push_back(ragc); } else if (rfc) { fSimpleColumnList.insert (fSimpleColumnList.end(), rfc->simpleColumnList().begin(), rfc->simpleColumnList().end()); fAggColumnList.insert (fAggColumnList.end(), rfc->aggColumnList().begin(), rfc->aggColumnList().end()); fWindowFunctionColumnList.insert (fWindowFunctionColumnList.end(), rfc->windowfunctionColumnList().begin(), rfc->windowfunctionColumnList().end()); } else if (rac) { fSimpleColumnList.insert(fSimpleColumnList.end(), rac->simpleColumnList().begin(), rac->simpleColumnList().end()); fAggColumnList.insert(fAggColumnList.end(), rac->aggColumnList().begin(), rac->aggColumnList().end()); fWindowFunctionColumnList.insert (fWindowFunctionColumnList.end(), rac->windowfunctionColumnList().begin(), rac->windowfunctionColumnList().end()); } else if (raf) { fWindowFunctionColumnList.push_back(raf); } }
void serializeCSEP() { /* * erydbSelectExecutionPlan * This is a large class; it makes more sense to write == operators * for everything than to write a giant equivalance test here. * For now this is mostly a regression test. */ erydbSelectExecutionPlan csep1, csep2; erydbSelectExecutionPlan::ReturnedColumnList colList; ParseTree* filterList; erydbExecutionPlan *cep; ByteStream b; cep = &csep2; CPPUNIT_ASSERT(csep1 == csep2); CPPUNIT_ASSERT(!(csep1 != csep2)); CPPUNIT_ASSERT(csep1 == cep); CPPUNIT_ASSERT(!(csep1 != cep)); // returned columns SimpleColumn *sc = new SimpleColumn("tpch.region.r_regionkey"); colList.push_back(sc); // filters erydbSelectExecutionPlan::Parser parser; std::vector<Token> tokens; Token t; SimpleFilter *sf = new SimpleFilter(); SimpleColumn *lhs = new SimpleColumn(*sc); SimpleColumn *rhs = new SimpleColumn("tpch.nation.n_regionkey"); Operator *op = new Operator("="); sf->op(op); sf->lhs(lhs); sf->rhs(rhs); t.value = sf; tokens.push_back(t); Operator *op1 = new Operator ("and"); t.value = op1; tokens.push_back(t); SimpleFilter *sf1 = new SimpleFilter(); SimpleColumn *lhs1 = new SimpleColumn (*rhs); ConstantColumn *constCol = new ConstantColumn("3", ConstantColumn::NUM); Operator *op2 = new Operator("!="); sf1->op(op2); sf1->lhs(lhs1); sf1->rhs(constCol); t.value = sf1; tokens.push_back(t); filterList = parser.parse(tokens.begin(), tokens.end()); // draw filterList tree filterList->drawTree("selectExecutionPlan_1.dot"); // erydb execution plan csep1.returnedCols (colList); csep1.filters (filterList); CPPUNIT_ASSERT(csep1 != csep2); CPPUNIT_ASSERT(!(csep1 == csep2)); CPPUNIT_ASSERT(csep1 != cep); CPPUNIT_ASSERT(!(csep1 == cep)); csep1.serialize(b); csep2.unserialize(b); CPPUNIT_ASSERT(b.length() == 0); CPPUNIT_ASSERT(csep1 == csep2); CPPUNIT_ASSERT(!(csep1 != csep2)); CPPUNIT_ASSERT(csep1 == cep); CPPUNIT_ASSERT(!(csep1 != cep)); erydbSelectExecutionPlan csep3, csep4; // subselect erydbSelectExecutionPlan *subselect = new erydbSelectExecutionPlan; subselect->location(erydbSelectExecutionPlan::WHERE); subselect->dependent (false); CPPUNIT_ASSERT (subselect->location() == erydbSelectExecutionPlan::WHERE); CPPUNIT_ASSERT (subselect->dependent() == false); erydbSelectExecutionPlan::SelectList selectList; selectList.push_back(subselect); csep3.subSelects(selectList); // exist filter erydbSelectExecutionPlan* cep1 = new erydbSelectExecutionPlan(); ExistsFilter *filter = new ExistsFilter(); delete filter; filter = new ExistsFilter(cep1); filter->exists(cep1); //erydbSelectExecutionPlan* cep2 = const_cast<erydbSelectExecutionPlan*>(filter->exists()); erydbSelectExecutionPlan::Parser parser1; std::vector<Token> tokens1; Token t1; t1.value = filter; tokens1.push_back(t1); csep3.filters(parser1.parse(tokens1.begin(), tokens1.end())); csep3.serialize(b); csep4.unserialize(b); CPPUNIT_ASSERT(csep3 == csep4); CPPUNIT_ASSERT(!(csep3 != csep4)); }
void selectExecutionPlan_1() { cout << "SQL: select region.r_regionkey from region, nation where nation.n_regionkey = region.r_regionkey and nation.n_regionkey != 3;" << endl; erydbSelectExecutionPlan csep; CPPUNIT_ASSERT (csep.location() == erydbSelectExecutionPlan::MAIN); CPPUNIT_ASSERT (csep.dependent() == false); CPPUNIT_ASSERT (csep.subSelects().size() == 0); // returned columns erydbSelectExecutionPlan::ReturnedColumnList colList; SimpleColumn *sc = new SimpleColumn("tpch.region.r_regionkey", 0); colList.push_back(sc); ArithmeticColumn *ac = new ArithmeticColumn("a+sum(r_regionkey)", 0); colList.push_back(ac); csep.returnedCols (colList); CPPUNIT_ASSERT(csep.returnedCols().size() == 2); // filters erydbSelectExecutionPlan::FilterTokenList filterTokenList; SimpleFilter *sf = new SimpleFilter(); SimpleColumn *lhs = new SimpleColumn(); *lhs = *sc; SimpleColumn *rhs = new SimpleColumn("tpch.nation.n_regionkey", 0); CPPUNIT_ASSERT (*lhs == *sc); CPPUNIT_ASSERT (*rhs != *lhs); Operator *op = new Operator("="); sf->op(op); sf->lhs(lhs); sf->rhs(rhs); filterTokenList.push_back (sf); filterTokenList.push_back( new Operator ("And") ); SimpleFilter *sf1 = new SimpleFilter (new Operator("="), sc->clone(), ac->clone()); filterTokenList.push_back (sf1); csep.filterTokenList (filterTokenList); ParseTree *filterList = const_cast<ParseTree*> (csep.filters()); // draw filterList tree filterList->drawTree("selectExecutionPlan_1.dot"); csep.filters (filterList); // Group by erydbSelectExecutionPlan::GroupByColumnList groupByList; groupByList.push_back(sc->clone()); csep.groupByCols (groupByList); CPPUNIT_ASSERT(csep.groupByCols().size() == 1); // Having erydbSelectExecutionPlan::FilterTokenList havingTokenList; SimpleFilter *having = new SimpleFilter( new Operator("="), new ArithmeticColumn("sum(volumn)", 0), new ConstantColumn(8)); havingTokenList.push_back (having); csep.havingTokenList (havingTokenList); CPPUNIT_ASSERT (*sf1 != *having); CPPUNIT_ASSERT (csep.havingTokenList().size() == 1); // Order by erydbSelectExecutionPlan::OrderByColumnList orderByList; ArithmeticColumn *o1 = new ArithmeticColumn(*ac); o1->asc(false); orderByList.push_back(o1); csep.orderByCols(orderByList); CPPUNIT_ASSERT(csep.orderByCols().size() == 1); // another csep erydbSelectExecutionPlan *newcsep = new erydbSelectExecutionPlan(erydbSelectExecutionPlan::FROM); erydbSelectExecutionPlan::ReturnedColumnList ncolList; SimpleColumn *newsc = new SimpleColumn("tpch.region.r_regionkey", 0); ncolList.push_back(newsc); newcsep->returnedCols (ncolList); erydbSelectExecutionPlan::FilterTokenList nfilterTokenList; SimpleFilter *newsf = new SimpleFilter ( new Operator (">"), sc->clone(), newsc->clone()); nfilterTokenList.push_back(newsf); newcsep->filterTokenList (nfilterTokenList); erydbSelectExecutionPlan::FilterTokenList nhavingTokenList; SimpleFilter *newhaving = new SimpleFilter ( new Operator (">"), sc->clone(), newsc->clone()); CPPUNIT_ASSERT (*newsf == *newhaving); nhavingTokenList.push_back(newhaving); newcsep->havingTokenList (nhavingTokenList); CPPUNIT_ASSERT (*newcsep != csep); CPPUNIT_ASSERT (*newcsep->filters() == *newcsep->having()); ByteStream b; csep.serialize (b); newcsep->unserialize (b); CPPUNIT_ASSERT (csep == *newcsep); erydbSelectExecutionPlan::SelectList selectList; selectList.push_back(newcsep); csep.subSelects(selectList); cout << "\nerydb Execution Plan:" << endl; cout << csep; cout << " --- end of test 1 ---" << endl; }