Пример #1
0
bool Filter::GetOptimalPhysicalPlan(Requirement requirement,PhysicalPlanDescriptor& physical_plan_descriptor, const unsigned & block_size){
	PhysicalPlanDescriptor physical_plan;
	std::vector<PhysicalPlanDescriptor> candidate_physical_plans;

	/* no requirement to the child*/
	if(child_->GetOptimalPhysicalPlan(Requirement(),physical_plan)){
		NetworkTransfer transfer=requirement.requireNetworkTransfer(physical_plan.dataflow);
		if(transfer==NONE){
			ExpandableBlockStreamFilter::State state;
			state.block_size_=block_size;
			state.child_=physical_plan.plan;
			state.qual_=qual_;
			state.colindex_=colindex_;
			state.comparator_list_=comparator_list_;
			state.v_ei_=exprArray_;
			Dataflow dataflow=getDataflow();
			state.schema_=getSchema(dataflow.attribute_list_);
			BlockStreamIteratorBase* filter=new ExpandableBlockStreamFilter(state);
			physical_plan.plan=filter;
			candidate_physical_plans.push_back(physical_plan);
		}
		else if((transfer==OneToOne)||(transfer==Shuffle)){
			/* the input data flow should be transfered in the network to meet the requirement
			 * TODO: implement OneToOne Exchange
			 * */
			ExpandableBlockStreamFilter::State state_f;
			state_f.block_size_=block_size;
			state_f.child_=physical_plan.plan;
			state_f.v_ei_=exprArray_;
			state_f.qual_=qual_;
			state_f.colindex_=colindex_;
			state_f.comparator_list_=comparator_list_;
			Dataflow dataflow=getDataflow();
			state_f.schema_=getSchema(dataflow.attribute_list_);
			BlockStreamIteratorBase* filter=new ExpandableBlockStreamFilter(state_f);
			physical_plan.plan=filter;

			physical_plan.cost+=physical_plan.dataflow.getAggregatedDatasize();

			ExpandableBlockStreamExchangeEpoll::State state;
			state.block_size_=block_size;
			state.child_=physical_plan.plan;//child_iterator;
			state.exchange_id_=IDsGenerator::getInstance()->generateUniqueExchangeID();
			state.schema_=getSchema(physical_plan.dataflow.attribute_list_);

			std::vector<NodeID> upper_id_list;
			if(requirement.hasRequiredLocations()){
				upper_id_list=requirement.getRequiredLocations();
			}
			else{
				if(requirement.hasRequiredPartitionFunction()){
					/* partition function contains the number of partitions*/
					PartitionFunction* partitoin_function=requirement.getPartitionFunction();
					upper_id_list=std::vector<NodeID>(NodeTracker::getInstance()->getNodeIDList().begin(),NodeTracker::getInstance()->getNodeIDList().begin()+partitoin_function->getNumberOfPartitions()-1);
				}
				else{
					//TODO: decide the degree of parallelism
					upper_id_list=NodeTracker::getInstance()->getNodeIDList();
				}
			}
			state.upper_ip_list_=convertNodeIDListToNodeIPList(upper_id_list);

			assert(requirement.hasReuiredPartitionKey());

			state.partition_schema_=partition_schema::set_hash_partition(this->getIndexInAttributeList(physical_plan.dataflow.attribute_list_,requirement.getPartitionKey()));
			assert(state.partition_schema_.partition_key_index>=0);

			std::vector<NodeID> lower_id_list=getInvolvedNodeID(physical_plan.dataflow.property_.partitioner);

			state.lower_ip_list_=convertNodeIDListToNodeIPList(lower_id_list);


			BlockStreamIteratorBase* exchange=new ExpandableBlockStreamExchangeEpoll(state);

			physical_plan.plan=exchange;

		}
		candidate_physical_plans.push_back(physical_plan);
	}


	if(child_->GetOptimalPhysicalPlan(requirement,physical_plan)){
		ExpandableBlockStreamFilter::State state;
		state.block_size_=block_size;
		state.child_=physical_plan.plan;
		state.v_ei_=exprArray_;
		state.qual_=qual_;
		state.colindex_=colindex_;
		state.comparator_list_=comparator_list_;
		Dataflow dataflow=getDataflow();
		state.schema_=getSchema(dataflow.attribute_list_);
		BlockStreamIteratorBase* filter=new ExpandableBlockStreamFilter(state);
		physical_plan.plan=filter;
		candidate_physical_plans.push_back(physical_plan);
	}

	physical_plan_descriptor=getBestPhysicalPlanDescriptor(candidate_physical_plans);

	if(requirement.passLimits(physical_plan_descriptor.cost))
		return true;
	else
		return false;

}
Пример #2
0
bool LogicalScan::GetOptimalPhysicalPlan(Requirement requirement,PhysicalPlanDescriptor& physical_plan_descriptor, const unsigned & block_size){
	Dataflow dataflow=getDataflow();
	NetworkTransfer transfer=requirement.requireNetworkTransfer(dataflow);

	ExpandableBlockStreamProjectionScan::State state;
	state.block_size_=block_size;
	state.projection_id_=target_projection_->getProjectionID();
	state.schema_=getSchema(dataflow_->attribute_list_);
	state.sample_rate_=sample_rate_;

	PhysicalPlan scan=new ExpandableBlockStreamProjectionScan(state);

	if(transfer==NONE){

		physical_plan_descriptor.plan=scan;
		physical_plan_descriptor.dataflow=dataflow;
		physical_plan_descriptor.cost+=0;
	}
	else{
		physical_plan_descriptor.cost+=dataflow.getAggregatedDatasize();

		ExpandableBlockStreamExchangeEpoll::State state;
		state.block_size_=block_size;
		state.child_=scan;//child_iterator;
		state.exchange_id_=IDsGenerator::getInstance()->generateUniqueExchangeID();
		state.schema_=getSchema(dataflow.attribute_list_);

		std::vector<NodeID> lower_id_list=getInvolvedNodeID(dataflow.property_.partitioner);
		state.lower_id_list_=lower_id_list;

		std::vector<NodeID> upper_id_list;
		if(requirement.hasRequiredLocations()){
			upper_id_list=requirement.getRequiredLocations();
		}
		else{
			if(requirement.hasRequiredPartitionFunction()){
				/* partition function contains the number of partitions*/
				PartitionFunction* partitoin_function=requirement.getPartitionFunction();
				upper_id_list=std::vector<NodeID>(NodeTracker::getInstance()->getNodeIDList().begin(),NodeTracker::getInstance()->getNodeIDList().begin()+partitoin_function->getNumberOfPartitions()-1);
			}
			else{
				//TODO: decide the degree of parallelism
				upper_id_list=NodeTracker::getInstance()->getNodeIDList();
			}
		}

		state.upper_id_list_=upper_id_list;

		state.partition_schema_=partition_schema::set_hash_partition(getIndexInAttributeList(dataflow.attribute_list_,requirement.getPartitionKey()));
		assert(state.partition_schema_.partition_key_index>=0);

		BlockStreamIteratorBase* exchange=new ExpandableBlockStreamExchangeEpoll(state);

		Dataflow new_dataflow;
		new_dataflow.attribute_list_=dataflow.attribute_list_;
		new_dataflow.property_.partitioner.setPartitionKey(requirement.getPartitionKey());
		new_dataflow.property_.partitioner.setPartitionFunction(PartitionFunctionFactory::createBoostHashFunction(state.upper_id_list_.size()));

		const unsigned total_size=dataflow.getAggregatedDatasize();
		const unsigned degree_of_parallelism=state.upper_id_list_.size();
		std::vector<DataflowPartition> dataflow_partition_list;
			for(unsigned i=0;i<degree_of_parallelism;i++){
				const NodeID location=upper_id_list[i];

				/* Currently, the join output size cannot be predicted due to the absence of data statistics.
				 * We just use the magic number as following */
				const unsigned datasize=total_size/degree_of_parallelism;
				DataflowPartition dfp(i,datasize,location);
				dataflow_partition_list.push_back(dfp);
			}
		new_dataflow.property_.partitioner.setPartitionList(dataflow_partition_list);


		physical_plan_descriptor.plan=exchange;
		physical_plan_descriptor.dataflow=new_dataflow;
		physical_plan_descriptor.cost+=new_dataflow.getAggregatedDatasize();
	}

	if(requirement.passLimits(physical_plan_descriptor.cost))
		return true;
	else
		return false;

}
Пример #3
0
bool LogicalFilter::GetOptimalPhysicalPlan(
    Requirement requirement, PhysicalPlanDescriptor& physical_plan_descriptor,
    const unsigned& block_size) {
  PhysicalPlanDescriptor physical_plan;
  std::vector<PhysicalPlanDescriptor> candidate_physical_plans;

  /* no requirement to the child*/
  if (child_->GetOptimalPhysicalPlan(Requirement(), physical_plan)) {
    NetworkTransfer transfer =
        requirement.requireNetworkTransfer(physical_plan.plan_context_);
    if (NONE == transfer) {
      PhysicalFilter::State state;
      state.block_size_ = block_size;
      state.child_ = physical_plan.plan;
      state.qual_ = condi_;
      state.column_id_ = column_id_;
      PlanContext plan_context = GetPlanContext();
      state.schema_ = GetSchema(plan_context.attribute_list_);
      PhysicalOperatorBase* filter = new PhysicalFilter(state);
      physical_plan.plan = filter;
      candidate_physical_plans.push_back(physical_plan);
    } else if ((OneToOne == transfer) || (Shuffle == transfer)) {
      /**
       * The input plan context should be transfered in the network to meet the
       * requirement.
       * TODO(wangli): Implement OneToOne Exchange
       * */
      PhysicalFilter::State state_f;
      state_f.block_size_ = block_size;
      state_f.child_ = physical_plan.plan;
      state_f.qual_ = condi_;
      state_f.column_id_ = column_id_;
      PlanContext plan_context = GetPlanContext();
      state_f.schema_ = GetSchema(plan_context.attribute_list_);
      PhysicalOperatorBase* filter = new PhysicalFilter(state_f);
      physical_plan.plan = filter;

      physical_plan.cost += physical_plan.plan_context_.GetAggregatedDatasize();

      ExchangeMerger::State state;
      state.block_size_ = block_size;
      state.child_ = physical_plan.plan;  // child_iterator;
      state.exchange_id_ =
          IDsGenerator::getInstance()->generateUniqueExchangeID();
      state.schema_ = GetSchema(physical_plan.plan_context_.attribute_list_);

      std::vector<NodeID> upper_id_list;
      if (requirement.hasRequiredLocations()) {
        upper_id_list = requirement.getRequiredLocations();
      } else {
        if (requirement.hasRequiredPartitionFunction()) {
          // Partition function contains the number of partitions.
          PartitionFunction* partitoin_function =
              requirement.getPartitionFunction();
          upper_id_list = std::vector<NodeID>(
              NodeTracker::GetInstance()->GetNodeIDList().begin(),
              NodeTracker::GetInstance()->GetNodeIDList().begin() +
                  partitoin_function->getNumberOfPartitions() - 1);
        } else {
          // TODO(wangli): decide the degree of parallelism
          upper_id_list = NodeTracker::GetInstance()->GetNodeIDList();
        }
      }
      state.upper_id_list_ = upper_id_list;

      assert(requirement.hasReuiredPartitionKey());

      state.partition_schema_ =
          partition_schema::set_hash_partition(this->GetIdInAttributeList(
              physical_plan.plan_context_.attribute_list_,
              requirement.getPartitionKey()));
      assert(state.partition_schema_.partition_key_index >= 0);

      std::vector<NodeID> lower_id_list =
          GetInvolvedNodeID(physical_plan.plan_context_.plan_partitioner_);

      state.lower_id_list_ = lower_id_list;

      PhysicalOperatorBase* exchange = new ExchangeMerger(state);

      physical_plan.plan = exchange;
    }
    candidate_physical_plans.push_back(physical_plan);
  }

  if (child_->GetOptimalPhysicalPlan(requirement, physical_plan)) {
    PhysicalFilter::State state;
    state.block_size_ = block_size;
    state.child_ = physical_plan.plan;
    state.column_id_ = column_id_;
    PlanContext plan_context = GetPlanContext();
    state.schema_ = GetSchema(plan_context.attribute_list_);
    PhysicalOperatorBase* filter = new PhysicalFilter(state);
    physical_plan.plan = filter;
    candidate_physical_plans.push_back(physical_plan);
  }

  physical_plan_descriptor =
      GetBestPhysicalPlanDescriptor(candidate_physical_plans);

  if (requirement.passLimits(physical_plan_descriptor.cost))
    return true;
  else
    return false;
}