예제 #1
0
파일: Filter.cpp 프로젝트: wzzz/Claims
Dataflow Filter::getDataflow(){
	/** In the currently implementation, we assume that the boolean operator
	 * between each AttributeComparator is "AND".
	 */

	Dataflow dataflow=child_->getDataflow();
	if(comparator_list_.size()==0)
		generateComparatorList(dataflow);
	if(dataflow.isHashPartitioned()){
		for(unsigned i=0;i<dataflow.property_.partitioner.getNumberOfPartitions();i++){
			if(couldHashPruned(i,dataflow.property_.partitioner))//is filtered
			{
				dataflow.property_.partitioner.getPartition(i)->setFiltered();
			}
			else{
				/*
				 * should predict the volume of data that passes the filter.
				 * TODO: a precious prediction is needed based on the statistic of
				 * the input data, which may be maintained in the catalog module.
				 */
				const unsigned before_filter_cardinality=dataflow.property_.partitioner.getPartition(i)->getDataCardinality();
				const unsigned after_filter_cardinality=before_filter_cardinality*predictSelectivity();
				dataflow.property_.partitioner.getPartition(i)->setDataCardinality(after_filter_cardinality);
			}
		}
	}
	getcolindex(dataflow);
	Schema *input_=getSchema(dataflow.attribute_list_);
	for(int i=0;i<qual_.size();i++)
	{
		InitExprAtLogicalPlan(qual_[i],t_boolean,colindex_,input_);
	}
	return dataflow;
}
예제 #2
0
PlanContext LogicalFilter::GetPlanContext() {
  /** In the currently implementation, we assume that the boolean operator
   * between each AttributeComparator is "AND".
   */
  PlanContext plan_context = child_->GetPlanContext();
  if (plan_context.IsHashPartitioned()) {
    for (unsigned i = 0;
         i < plan_context.plan_partitioner_.GetNumberOfPartitions(); ++i) {
      if (CanBeHashPruned(i, plan_context.plan_partitioner_)) {
        // Is filtered.
        plan_context.plan_partitioner_.GetPartition(i)->set_filtered();
      } else {  // Call predictSelectivilty() to alter cardinality.
                /**
                 * Should predict the volume of data that passes the filter.
                 * TODO(wangli): A precious prediction is needed based on the statistic
                 *               of the input data, which may be maintained in the
                 *               catalog module.
                 */
        const unsigned before_filter_cardinality =
            plan_context.plan_partitioner_.GetPartition(i)->get_cardinality();
        const unsigned after_filter_cardinality =
            before_filter_cardinality * PredictSelectivity();
        plan_context.plan_partitioner_.GetPartition(i)
            ->set_cardinality(after_filter_cardinality);
      }
    }
  }
  set_column_id(plan_context);
  Schema* input_ = GetSchema(plan_context.attribute_list_);
  for (int i = 0; i < condi_.size(); ++i) {
    // Initialize expression of logical execution plan.
    InitExprAtLogicalPlan(condi_[i], t_boolean, column_id_, input_);
  }
  return plan_context;
}
예제 #3
0
PlanContext LogicalFilter::GetPlanContext() {
  /** In the currently implementation, we assume that the boolean operator
   * between each AttributeComparator is "AND".
   */
  lock_->acquire();
  if (NULL != plan_context_) {
    lock_->release();
    return *plan_context_;
  }
  PlanContext plan_context = child_->GetPlanContext();
  if (plan_context.IsHashPartitioned()) {
    for (unsigned i = 0;
         i < plan_context.plan_partitioner_.GetNumberOfPartitions(); ++i) {
      if (CanBeHashPruned(i, plan_context.plan_partitioner_)) {
        // Is filtered.
        plan_context.plan_partitioner_.GetPartition(i)->set_filtered();
      } else {  // Call predictSelectivilty() to alter cardinality.
                /**
                 * Should predict the volume of data that passes the filter.
                 * TODO(wangli): A precious prediction is needed based on the statistic
                 *               of the input data, which may be maintained in the
                 *               catalog module.
                 */

        const unsigned before_filter_cardinality =
            plan_context.plan_partitioner_.GetPartition(i)->get_cardinality();
        const unsigned after_filter_cardinality =
            before_filter_cardinality * PredictSelectivity();
        plan_context.plan_partitioner_.GetPartition(i)
            ->set_cardinality(after_filter_cardinality);
      }
    }
  }
//  std::map<std::string, int> column_to_id;
//  GetColumnToId(plan_context.attribute_list_, column_to_id);
//  Schema* input_schema = GetSchema(plan_context.attribute_list_);
#ifdef NEWCONDI
  for (int i = 0; i < condi_.size(); ++i) {
    // Initialize expression of logical execution plan.
    InitExprAtLogicalPlan(condi_[i], t_boolean, column_to_id, input_schema);
  }
#else
  LogicInitCnxt licnxt;
  GetColumnToId(plan_context.attribute_list_, licnxt.column_id0_);
  licnxt.schema0_ = plan_context.GetSchema();
  for (int i = 0; i < condition_.size(); ++i) {
    licnxt.return_type_ = t_boolean;
    condition_[i]->InitExprAtLogicalPlan(licnxt);
  }
#endif
  plan_context_ = new PlanContext();
  *plan_context_ = plan_context;
  plan_context_->attribute_list_.assign(plan_context.attribute_list_.begin(),
                                        plan_context.attribute_list_.end());
  lock_->release();
  return *plan_context_;
}
예제 #4
0
/*
 * the InitExprAtLogicalPlan() initialize the exprTree nodes at logical plan
 * set return type
 * get column reference id in schema
 * get isnull
 * get the storage length
 */
void InitExprAtLogicalPlan(QNode *node,data_type r_type,map<string,int>&colindex,Schema *schema)
{
	if(node==NULL)
		return ;
	switch(node->type)
	{
		case t_qexpr_cal://binary calculation node
		{
			QExpr_binary * calnode=(QExpr_binary *)(node);
			calnode->return_type=r_type;
			InitExprAtLogicalPlan(calnode->lnext,calnode->actual_type,colindex,schema);
			InitExprAtLogicalPlan(calnode->rnext,calnode->actual_type,colindex,schema);
			calnode->isnull=(calnode->lnext->isnull||calnode->rnext->isnull);
			calnode->length=max(calnode->lnext->length,calnode->rnext->length);
		}break;
		case t_qexpr_cmp://binary comparison node
		{
			QExpr_binary * cmpnode=(QExpr_binary *)(node);
			cmpnode->return_type=r_type; // Li: I believe the return type for compare expression should be t_boolean
			InitExprAtLogicalPlan(cmpnode->lnext,cmpnode->actual_type,colindex,schema);
			InitExprAtLogicalPlan(cmpnode->rnext,cmpnode->actual_type,colindex,schema);
			cmpnode->length=max(cmpnode->lnext->length,cmpnode->rnext->length);
			cmpnode->isnull=(cmpnode->lnext->isnull||cmpnode->rnext->isnull);
		}break;
		case t_qexpr_unary:
		{
			QExpr_unary *unode=(QExpr_unary *)node;
			unode->return_type=r_type;
			InitExprAtLogicalPlan(unode->next,unode->actual_type,colindex,schema);
			unode->length=unode->next->length;
			unode->isnull=unode->next->isnull;
		}break;
		case t_qexpr_ternary://now for substring,not for all
		{
			QExpr_ternary *tnode=(QExpr_ternary *)node;
			tnode->return_type=r_type;
			InitExprAtLogicalPlan(tnode->next0,tnode->actual_type,colindex,schema);
			InitExprAtLogicalPlan(tnode->next1,tnode->next1->actual_type,colindex,schema);//parameter return type =actual type
			InitExprAtLogicalPlan(tnode->next2,tnode->next2->actual_type,colindex,schema);//parameter return type =actual type
			tnode->length=max(tnode->next0->length,max(tnode->next1->length,tnode->next2->length));
			tnode->isnull=(tnode->next0->isnull||tnode->next1->isnull||tnode->next2->isnull);
		}break;
		case t_qexpr_case_when:
		{
			QExpr_case_when *cwnode=(QExpr_case_when *)node;
			cwnode->return_type=r_type;
			cwnode->length=BASE_SIZE;
			cwnode->isnull=true;
			for(int i=0;i<cwnode->qual.size();i++)
			{
				InitExprAtLogicalPlan(cwnode->qual[i],t_boolean,colindex,schema);
			}
			for(int i=0;i<cwnode->ans.size();i++)
			{
				InitExprAtLogicalPlan(cwnode->ans[i],cwnode->ans[i]->actual_type,colindex,schema);
				cwnode->length=max(cwnode->length,cwnode->ans[i]->length);
				cwnode->isnull=(cwnode->isnull||cwnode->ans[i]->isnull);
			}
		}break;
		case t_qexpr_in:
		{
			QExpr_in *innode=(QExpr_in *)node;
			innode->return_type=r_type;
			innode->length=BASE_SIZE;
			innode->isnull=true;
			for(int i=0;i<innode->cmpnode.size();i++)
			{
				InitExprAtLogicalPlan(innode->cmpnode[i],t_boolean,colindex,schema);
			}
			for(int i=0;i<innode->rnode.size();i++)
			{
				for(int j=0;j<innode->rnode[i].size();j++)
				{
					InitExprAtLogicalPlan(innode->rnode[i][j],innode->cmpnode[j]->actual_type,colindex,schema);
				}
			}
		}break;
		case t_qexpr_date_add_sub:
		{
			QExpr_date_add_sub * date_node=(QExpr_date_add_sub *)(node);
			date_node->return_type=r_type;
			InitExprAtLogicalPlan(date_node->lnext,date_node->actual_type,colindex,schema);
			InitExprAtLogicalPlan(date_node->rnext,date_node->rnext_type,colindex,schema);//the difference between t_qexpr_date_add_sub and t_qexpr_cal
			date_node->isnull=(date_node->lnext->isnull||date_node->rnext->isnull);
			date_node->length=max(date_node->lnext->length,date_node->rnext->length);
		}break;
		case t_qexpr_func:
		{
//			QExpr_binary *funcnode=(QExpr_binary *)(node);
//			funcnode->return_type=r_type;
		}break;
		case t_qcolcumns:
		{
			QColcumns * qcol=(QColcumns *)(node);
			qcol->id=colindex[qcol->col];//col=A.a or col= a?
			qcol->return_type=r_type;
			if(qcol->return_type ==t_string)
				qcol->length=max(schema->getcolumn(qcol->id).get_length(),(unsigned int)BASE_SIZE);
			else
				qcol->length=schema->getcolumn(qcol->id).size;
			qcol->isnull=false;//TODO
		}break;
		case t_qexpr:
		{
			QExpr *qexpr=(QExpr *)(node);
			qexpr->return_type=r_type;
			qexpr->length=max((int)(qexpr->const_value.size()),BASE_SIZE);
			qexpr->isnull=false;

		}break;
		default:
		{

		}


	}
}