Dataflow Filter::getDataflow(){ /** In the currently implementation, we assume that the boolean operator * between each AttributeComparator is "AND". */ Dataflow dataflow=child_->getDataflow(); if(comparator_list_.size()==0) generateComparatorList(dataflow); if(dataflow.isHashPartitioned()){ for(unsigned i=0;i<dataflow.property_.partitioner.getNumberOfPartitions();i++){ if(couldHashPruned(i,dataflow.property_.partitioner))//is filtered { dataflow.property_.partitioner.getPartition(i)->setFiltered(); } else{ /* * should predict the volume of data that passes the filter. * TODO: a precious prediction is needed based on the statistic of * the input data, which may be maintained in the catalog module. */ const unsigned before_filter_cardinality=dataflow.property_.partitioner.getPartition(i)->getDataCardinality(); const unsigned after_filter_cardinality=before_filter_cardinality*predictSelectivity(); dataflow.property_.partitioner.getPartition(i)->setDataCardinality(after_filter_cardinality); } } } getcolindex(dataflow); Schema *input_=getSchema(dataflow.attribute_list_); for(int i=0;i<qual_.size();i++) { InitExprAtLogicalPlan(qual_[i],t_boolean,colindex_,input_); } return dataflow; }
PlanContext LogicalFilter::GetPlanContext() { /** In the currently implementation, we assume that the boolean operator * between each AttributeComparator is "AND". */ PlanContext plan_context = child_->GetPlanContext(); if (plan_context.IsHashPartitioned()) { for (unsigned i = 0; i < plan_context.plan_partitioner_.GetNumberOfPartitions(); ++i) { if (CanBeHashPruned(i, plan_context.plan_partitioner_)) { // Is filtered. plan_context.plan_partitioner_.GetPartition(i)->set_filtered(); } else { // Call predictSelectivilty() to alter cardinality. /** * Should predict the volume of data that passes the filter. * TODO(wangli): A precious prediction is needed based on the statistic * of the input data, which may be maintained in the * catalog module. */ const unsigned before_filter_cardinality = plan_context.plan_partitioner_.GetPartition(i)->get_cardinality(); const unsigned after_filter_cardinality = before_filter_cardinality * PredictSelectivity(); plan_context.plan_partitioner_.GetPartition(i) ->set_cardinality(after_filter_cardinality); } } } set_column_id(plan_context); Schema* input_ = GetSchema(plan_context.attribute_list_); for (int i = 0; i < condi_.size(); ++i) { // Initialize expression of logical execution plan. InitExprAtLogicalPlan(condi_[i], t_boolean, column_id_, input_); } return plan_context; }
PlanContext LogicalFilter::GetPlanContext() { /** In the currently implementation, we assume that the boolean operator * between each AttributeComparator is "AND". */ lock_->acquire(); if (NULL != plan_context_) { lock_->release(); return *plan_context_; } PlanContext plan_context = child_->GetPlanContext(); if (plan_context.IsHashPartitioned()) { for (unsigned i = 0; i < plan_context.plan_partitioner_.GetNumberOfPartitions(); ++i) { if (CanBeHashPruned(i, plan_context.plan_partitioner_)) { // Is filtered. plan_context.plan_partitioner_.GetPartition(i)->set_filtered(); } else { // Call predictSelectivilty() to alter cardinality. /** * Should predict the volume of data that passes the filter. * TODO(wangli): A precious prediction is needed based on the statistic * of the input data, which may be maintained in the * catalog module. */ const unsigned before_filter_cardinality = plan_context.plan_partitioner_.GetPartition(i)->get_cardinality(); const unsigned after_filter_cardinality = before_filter_cardinality * PredictSelectivity(); plan_context.plan_partitioner_.GetPartition(i) ->set_cardinality(after_filter_cardinality); } } } // std::map<std::string, int> column_to_id; // GetColumnToId(plan_context.attribute_list_, column_to_id); // Schema* input_schema = GetSchema(plan_context.attribute_list_); #ifdef NEWCONDI for (int i = 0; i < condi_.size(); ++i) { // Initialize expression of logical execution plan. InitExprAtLogicalPlan(condi_[i], t_boolean, column_to_id, input_schema); } #else LogicInitCnxt licnxt; GetColumnToId(plan_context.attribute_list_, licnxt.column_id0_); licnxt.schema0_ = plan_context.GetSchema(); for (int i = 0; i < condition_.size(); ++i) { licnxt.return_type_ = t_boolean; condition_[i]->InitExprAtLogicalPlan(licnxt); } #endif plan_context_ = new PlanContext(); *plan_context_ = plan_context; plan_context_->attribute_list_.assign(plan_context.attribute_list_.begin(), plan_context.attribute_list_.end()); lock_->release(); return *plan_context_; }
/* * the InitExprAtLogicalPlan() initialize the exprTree nodes at logical plan * set return type * get column reference id in schema * get isnull * get the storage length */ void InitExprAtLogicalPlan(QNode *node,data_type r_type,map<string,int>&colindex,Schema *schema) { if(node==NULL) return ; switch(node->type) { case t_qexpr_cal://binary calculation node { QExpr_binary * calnode=(QExpr_binary *)(node); calnode->return_type=r_type; InitExprAtLogicalPlan(calnode->lnext,calnode->actual_type,colindex,schema); InitExprAtLogicalPlan(calnode->rnext,calnode->actual_type,colindex,schema); calnode->isnull=(calnode->lnext->isnull||calnode->rnext->isnull); calnode->length=max(calnode->lnext->length,calnode->rnext->length); }break; case t_qexpr_cmp://binary comparison node { QExpr_binary * cmpnode=(QExpr_binary *)(node); cmpnode->return_type=r_type; // Li: I believe the return type for compare expression should be t_boolean InitExprAtLogicalPlan(cmpnode->lnext,cmpnode->actual_type,colindex,schema); InitExprAtLogicalPlan(cmpnode->rnext,cmpnode->actual_type,colindex,schema); cmpnode->length=max(cmpnode->lnext->length,cmpnode->rnext->length); cmpnode->isnull=(cmpnode->lnext->isnull||cmpnode->rnext->isnull); }break; case t_qexpr_unary: { QExpr_unary *unode=(QExpr_unary *)node; unode->return_type=r_type; InitExprAtLogicalPlan(unode->next,unode->actual_type,colindex,schema); unode->length=unode->next->length; unode->isnull=unode->next->isnull; }break; case t_qexpr_ternary://now for substring,not for all { QExpr_ternary *tnode=(QExpr_ternary *)node; tnode->return_type=r_type; InitExprAtLogicalPlan(tnode->next0,tnode->actual_type,colindex,schema); InitExprAtLogicalPlan(tnode->next1,tnode->next1->actual_type,colindex,schema);//parameter return type =actual type InitExprAtLogicalPlan(tnode->next2,tnode->next2->actual_type,colindex,schema);//parameter return type =actual type tnode->length=max(tnode->next0->length,max(tnode->next1->length,tnode->next2->length)); tnode->isnull=(tnode->next0->isnull||tnode->next1->isnull||tnode->next2->isnull); }break; case t_qexpr_case_when: { QExpr_case_when *cwnode=(QExpr_case_when *)node; cwnode->return_type=r_type; cwnode->length=BASE_SIZE; cwnode->isnull=true; for(int i=0;i<cwnode->qual.size();i++) { InitExprAtLogicalPlan(cwnode->qual[i],t_boolean,colindex,schema); } for(int i=0;i<cwnode->ans.size();i++) { InitExprAtLogicalPlan(cwnode->ans[i],cwnode->ans[i]->actual_type,colindex,schema); cwnode->length=max(cwnode->length,cwnode->ans[i]->length); cwnode->isnull=(cwnode->isnull||cwnode->ans[i]->isnull); } }break; case t_qexpr_in: { QExpr_in *innode=(QExpr_in *)node; innode->return_type=r_type; innode->length=BASE_SIZE; innode->isnull=true; for(int i=0;i<innode->cmpnode.size();i++) { InitExprAtLogicalPlan(innode->cmpnode[i],t_boolean,colindex,schema); } for(int i=0;i<innode->rnode.size();i++) { for(int j=0;j<innode->rnode[i].size();j++) { InitExprAtLogicalPlan(innode->rnode[i][j],innode->cmpnode[j]->actual_type,colindex,schema); } } }break; case t_qexpr_date_add_sub: { QExpr_date_add_sub * date_node=(QExpr_date_add_sub *)(node); date_node->return_type=r_type; InitExprAtLogicalPlan(date_node->lnext,date_node->actual_type,colindex,schema); InitExprAtLogicalPlan(date_node->rnext,date_node->rnext_type,colindex,schema);//the difference between t_qexpr_date_add_sub and t_qexpr_cal date_node->isnull=(date_node->lnext->isnull||date_node->rnext->isnull); date_node->length=max(date_node->lnext->length,date_node->rnext->length); }break; case t_qexpr_func: { // QExpr_binary *funcnode=(QExpr_binary *)(node); // funcnode->return_type=r_type; }break; case t_qcolcumns: { QColcumns * qcol=(QColcumns *)(node); qcol->id=colindex[qcol->col];//col=A.a or col= a? qcol->return_type=r_type; if(qcol->return_type ==t_string) qcol->length=max(schema->getcolumn(qcol->id).get_length(),(unsigned int)BASE_SIZE); else qcol->length=schema->getcolumn(qcol->id).size; qcol->isnull=false;//TODO }break; case t_qexpr: { QExpr *qexpr=(QExpr *)(node); qexpr->return_type=r_type; qexpr->length=max((int)(qexpr->const_value.size()),BASE_SIZE); qexpr->isnull=false; }break; default: { } } }