Exemplo n.º 1
0
PhysicalOperatorBase *LogicalSort::GetPhysicalPlan(const unsigned &blocksize) {
  PlanContext child_plan_context_ = child_->GetPlanContext();

  // Get all of the data from other nodes if needed.
  Expander::State expander_state;
  expander_state.block_count_in_buffer_ = EXPANDER_BUFFER_SIZE;
  expander_state.block_size_ = blocksize;
  expander_state.init_thread_count_ = Config::initial_degree_of_parallelism;
  expander_state.child_ = child_->GetPhysicalPlan(blocksize);
  expander_state.schema_ = GetSchema(child_plan_context_.attribute_list_);
  PhysicalOperatorBase *expander_lower = new Expander(expander_state);

  ExchangeMerger::State exchange_state;
  exchange_state.block_size_ = blocksize;
  exchange_state.child_ = expander_lower;
  exchange_state.exchange_id_ =
      IDsGenerator::getInstance()->generateUniqueExchangeID();
  exchange_state.schema_ = GetSchema(child_plan_context_.attribute_list_);
  vector<NodeID> lower_id_list =
      GetInvolvedNodeID(child_plan_context_.plan_partitioner_);
  exchange_state.lower_id_list_ = lower_id_list;  // upper
  exchange_state.partition_schema_ = partition_schema::set_hash_partition(0);
  // TODO(admin): compute the upper_ip_list to do reduce side sort
  vector<NodeID> upper_ip_list;
  upper_ip_list.push_back(0);
  exchange_state.upper_id_list_ = upper_ip_list;  // lower
  PhysicalOperatorBase *exchange = new ExchangeMerger(exchange_state);

  PhysicalSort::State reducer_state;
  reducer_state.block_size_ = blocksize;
  reducer_state.child_ = exchange;
#ifndef NEWCONDI
  // Actually we just need the column number in the end.
  for (unsigned i = 0; i < order_by_attr_.size(); i++) {
    reducer_state.order_by_key_.push_back(
        GetOrderByKey(order_by_attr_[i]->table_name_));
    reducer_state.direction_.push_back(order_by_attr_[i]->direction_);
  }
#else
  reducer_state.order_by_attrs_ = order_by_attrs_;
#endif
  reducer_state.input_schema_ = GetSchema(child_plan_context_.attribute_list_);
  PhysicalOperatorBase *reducer_sort = new PhysicalSort(reducer_state);

  return reducer_sort;
}
Exemplo n.º 2
0
/**
 * get PlanContext and child physical plan from child ,
 * consider PlanContext's partitioner's location and collector,
 * decide whether add expander and exchange operator in physical plan.
 *
 * choose one of three top physical operators depend on fashion_,
 * return complete physical plan
 */
PhysicalOperatorBase* LogicalQueryPlanRoot::GetPhysicalPlan(
    const unsigned& block_size) {
  PlanContext child_plan_context = GetPlanContext();
  ///////////
  LogicalLimit* limit = NULL;
  PhysicalOperatorBase* child_iterator = NULL;
  if (child_->get_operator_type() == OperatorType::kLogicalLimit) {
    limit = reinterpret_cast<LogicalLimit*>(child_);
    child_iterator = limit->child_->GetPhysicalPlan(block_size);
  } else {
    child_iterator = child_->GetPhysicalPlan(block_size);
  }
  /////////////
  NodeTracker* node_tracker = NodeTracker::GetInstance();

  bool is_exchange_need = false;
  /**
   * If the number of partitions in the child PlanContext is 1 and the the
   * location is right in the collector,
   * then exchange is not necessary.
   */
  if (!(1 == child_plan_context.plan_partitioner_.GetNumberOfPartitions() &&
        child_plan_context.plan_partitioner_.get_partition_list()[0]
                .get_location() == collecter_node)) {
    is_exchange_need = true;

    // add BlockStreamExpander iterator into physical plan
    Expander::State expander_state_lower;
    expander_state_lower.block_count_in_buffer_ = 10;
    expander_state_lower.block_size_ = block_size;
    expander_state_lower.init_thread_count_ =
        Config::initial_degree_of_parallelism;
    expander_state_lower.child_ = child_iterator;
    expander_state_lower.schema_ =
        GetSchema(child_plan_context.attribute_list_);
    PhysicalOperatorBase* expander_lower = new Expander(expander_state_lower);

    // add ExchangeEpoll iterator into physical plan
    ExchangeMerger::State state;
    state.block_size_ = block_size;
    state.child_ = expander_lower;  // child_iterator;
    state.exchange_id_ =
        IDsGenerator::getInstance()->generateUniqueExchangeID();
    state.schema_ = GetSchema(child_plan_context.attribute_list_);
    state.upper_id_list_.push_back(collecter_node);
    state.partition_schema_ = partition_schema::set_hash_partition(0);
    std::vector<NodeID> lower_id_list =
        GetInvolvedNodeID(child_plan_context.plan_partitioner_);
    state.lower_id_list_ = lower_id_list;
    child_iterator = new ExchangeMerger(state);
  }

  Expander::State expander_state;
  expander_state.block_count_in_buffer_ = 10;
  expander_state.block_size_ = block_size;
  if (is_exchange_need)
    // if data exchange is used, only one expanded thread is enough.
    expander_state.init_thread_count_ = 1;
  else
    expander_state.init_thread_count_ = Config::initial_degree_of_parallelism;
  expander_state.child_ = child_iterator;
  expander_state.schema_ = GetSchema(child_plan_context.attribute_list_);
  PhysicalOperatorBase* expander = new Expander(expander_state);
  if (child_->get_operator_type() == OperatorType::kLogicalLimit) {
    expander = limit->GetPhysicalPlan(block_size, expander);
  }
  PhysicalOperatorBase* ret;
  switch (style_) {
    case kPrint: {
      ResultPrinter::State print_state(
          GetSchema(child_plan_context.attribute_list_), expander, block_size,
          GetAttributeName(child_plan_context));
      ret = new ResultPrinter(print_state);
      break;
    }
    case kPerformance: {
      PerformanceMonitor::State performance_state(
          GetSchema(child_plan_context.attribute_list_), expander, block_size);
      ret = new PerformanceMonitor(performance_state);
      break;
    }
    case kResultCollector: {
      std::vector<std::string> column_header;
      GetColumnHeader(column_header, child_plan_context.attribute_list_);

      physical_operator::ResultCollector::State result_state(
          GetSchema(child_plan_context.attribute_list_), expander, block_size,
          column_header);
      ret = new physical_operator::ResultCollector(result_state);
      break;
    }
  }

  return ret;
}
Exemplo n.º 3
0
bool LogicalQueryPlanRoot::GetOptimalPhysicalPlan(
    Requirement requirement, PhysicalPlanDescriptor& final_physical_plan_desc,
    const unsigned& block_size) {
  std::vector<PhysicalPlanDescriptor> candidate_physical_plan;
  Requirement current_req;
  current_req.setRequiredLocations(std::vector<NodeID>(1, collecter_node));

  Requirement merged_req;
  bool requirement_merged = current_req.tryMerge(requirement, merged_req);
  if (requirement_merged) {
    current_req = merged_req;
  }

  PhysicalPlanDescriptor physical_plan;

  /** no requirement**/
  if (child_->GetOptimalPhysicalPlan(Requirement(), physical_plan,
                                     block_size)) {
    NetworkTransfer transfer =
        current_req.requireNetworkTransfer(physical_plan.plan_context_);

    if (transfer == NONE) {
      candidate_physical_plan.push_back(physical_plan);
    } else if ((transfer == OneToOne) || (transfer == Shuffle)) {
      // why transfer is compared with OneToOne, whose type is binding_mode?
      // ---Yu

      /* the input PlanContext should be transfered in the network to meet the
       * requirement
       * TODO: implement OneToOne Exchange
       * */
      physical_plan.cost += physical_plan.plan_context_.GetAggregatedDatasize();

      ExchangeMerger::State state;
      state.block_size_ = block_size;
      state.child_ = physical_plan.plan;  // child_iterator;
      state.exchange_id_ =
          IDsGenerator::getInstance()->generateUniqueExchangeID();
      state.schema_ = GetSchema(physical_plan.plan_context_.attribute_list_);
      state.upper_id_list_.push_back(collecter_node);
      state.partition_schema_ = partition_schema::set_hash_partition(0);
      state.lower_id_list_ =
          GetInvolvedNodeID(physical_plan.plan_context_.plan_partitioner_);
      PhysicalOperatorBase* exchange = new ExchangeMerger(state);
      physical_plan.plan = exchange;
    }
  }

  /** with requirement**/
  if (child_->GetOptimalPhysicalPlan(current_req, physical_plan, block_size)) {
    candidate_physical_plan.push_back(physical_plan);
  }

  PhysicalPlanDescriptor best_plan =
      GetBestPhysicalPlanDescriptor(candidate_physical_plan);

  PhysicalPlan final_plan;
  switch (style_) {
    case kPrint: {
      ResultPrinter::State print_state(
          GetSchema(best_plan.plan_context_.attribute_list_), best_plan.plan,
          block_size, GetAttributeName(physical_plan.plan_context_));
      final_plan = new ResultPrinter(print_state);
      break;
    }
    case kPerformance: {
      PerformanceMonitor::State performance_state(
          GetSchema(best_plan.plan_context_.attribute_list_), best_plan.plan,
          block_size);
      final_plan = new PerformanceMonitor(performance_state);
    }
  }

  if (requirement_merged) {
    final_physical_plan_desc.cost = best_plan.cost;
    final_physical_plan_desc.plan_context_ = best_plan.plan_context_;
    final_physical_plan_desc.plan = final_plan;
  } else {
    NetworkTransfer transfer =
        current_req.requireNetworkTransfer(best_plan.plan_context_);

    if (transfer == NONE) {
      final_physical_plan_desc.cost = best_plan.cost;
      final_physical_plan_desc.plan_context_ = best_plan.plan_context_;
      final_physical_plan_desc.plan = final_plan;
    } else if ((transfer == OneToOne) || (transfer == Shuffle)) {
      /* the input PlanContext should be transfered in the network to meet the
       * requirement
       * TODO: implement OneToOne Exchange
       * */

      ExchangeMerger::State state;
      state.block_size_ = block_size;
      state.child_ = best_plan.plan;  // child_iterator;
      state.exchange_id_ =
          IDsGenerator::getInstance()->generateUniqueExchangeID();
      state.schema_ = GetSchema(best_plan.plan_context_.attribute_list_);
      std::vector<NodeID> upper_id_list;
      if (requirement.hasRequiredLocations()) {
        upper_id_list = requirement.getRequiredLocations();
      } else {
        if (requirement.hasRequiredPartitionFunction()) {
          /* partition function contains the number of partitions*/
          PartitionFunction* partitoin_function =
              requirement.getPartitionFunction();
          upper_id_list = std::vector<NodeID>(
              NodeTracker::GetInstance()->GetNodeIDList().begin(),
              NodeTracker::GetInstance()->GetNodeIDList().begin() +
                  partitoin_function->getNumberOfPartitions() - 1);
        } else {
          // TODO(wangli): decide the degree of parallelism
          upper_id_list = NodeTracker::GetInstance()->GetNodeIDList();
        }
      }

      state.upper_id_list_ = upper_id_list;

      assert(requirement.hasReuiredPartitionKey());

      state.partition_schema_ = partition_schema::set_hash_partition(
          this->GetIdInAttributeList(best_plan.plan_context_.attribute_list_,
                                     requirement.getPartitionKey()));
      assert(state.partition_schema_.partition_key_index >= 0);

      std::vector<NodeID> lower_id_list =
          GetInvolvedNodeID(best_plan.plan_context_.plan_partitioner_);

      state.lower_id_list_ = lower_id_list;

      PhysicalOperatorBase* exchange = new ExchangeMerger(state);
      best_plan.plan = exchange;
      best_plan.cost += best_plan.plan_context_.GetAggregatedDatasize();

      final_physical_plan_desc.cost = best_plan.cost;
      final_physical_plan_desc.plan_context_ = best_plan.plan_context_;
      final_physical_plan_desc.plan = exchange;
    }
  }

  if (requirement.passLimits(final_physical_plan_desc.cost))
    return true;
  else
    return false;
}
Exemplo n.º 4
0
bool LogicalFilter::GetOptimalPhysicalPlan(
    Requirement requirement, PhysicalPlanDescriptor& physical_plan_descriptor,
    const unsigned& block_size) {
  PhysicalPlanDescriptor physical_plan;
  std::vector<PhysicalPlanDescriptor> candidate_physical_plans;

  /* no requirement to the child*/
  if (child_->GetOptimalPhysicalPlan(Requirement(), physical_plan)) {
    NetworkTransfer transfer =
        requirement.requireNetworkTransfer(physical_plan.plan_context_);
    if (NONE == transfer) {
      PhysicalFilter::State state;
      state.block_size_ = block_size;
      state.child_ = physical_plan.plan;
      state.qual_ = condi_;
      state.column_id_ = column_id_;
      PlanContext plan_context = GetPlanContext();
      state.schema_ = GetSchema(plan_context.attribute_list_);
      PhysicalOperatorBase* filter = new PhysicalFilter(state);
      physical_plan.plan = filter;
      candidate_physical_plans.push_back(physical_plan);
    } else if ((OneToOne == transfer) || (Shuffle == transfer)) {
      /**
       * The input plan context should be transfered in the network to meet the
       * requirement.
       * TODO(wangli): Implement OneToOne Exchange
       * */
      PhysicalFilter::State state_f;
      state_f.block_size_ = block_size;
      state_f.child_ = physical_plan.plan;
      state_f.qual_ = condi_;
      state_f.column_id_ = column_id_;
      PlanContext plan_context = GetPlanContext();
      state_f.schema_ = GetSchema(plan_context.attribute_list_);
      PhysicalOperatorBase* filter = new PhysicalFilter(state_f);
      physical_plan.plan = filter;

      physical_plan.cost += physical_plan.plan_context_.GetAggregatedDatasize();

      ExchangeMerger::State state;
      state.block_size_ = block_size;
      state.child_ = physical_plan.plan;  // child_iterator;
      state.exchange_id_ =
          IDsGenerator::getInstance()->generateUniqueExchangeID();
      state.schema_ = GetSchema(physical_plan.plan_context_.attribute_list_);

      std::vector<NodeID> upper_id_list;
      if (requirement.hasRequiredLocations()) {
        upper_id_list = requirement.getRequiredLocations();
      } else {
        if (requirement.hasRequiredPartitionFunction()) {
          // Partition function contains the number of partitions.
          PartitionFunction* partitoin_function =
              requirement.getPartitionFunction();
          upper_id_list = std::vector<NodeID>(
              NodeTracker::GetInstance()->GetNodeIDList().begin(),
              NodeTracker::GetInstance()->GetNodeIDList().begin() +
                  partitoin_function->getNumberOfPartitions() - 1);
        } else {
          // TODO(wangli): decide the degree of parallelism
          upper_id_list = NodeTracker::GetInstance()->GetNodeIDList();
        }
      }
      state.upper_id_list_ = upper_id_list;

      assert(requirement.hasReuiredPartitionKey());

      state.partition_schema_ =
          partition_schema::set_hash_partition(this->GetIdInAttributeList(
              physical_plan.plan_context_.attribute_list_,
              requirement.getPartitionKey()));
      assert(state.partition_schema_.partition_key_index >= 0);

      std::vector<NodeID> lower_id_list =
          GetInvolvedNodeID(physical_plan.plan_context_.plan_partitioner_);

      state.lower_id_list_ = lower_id_list;

      PhysicalOperatorBase* exchange = new ExchangeMerger(state);

      physical_plan.plan = exchange;
    }
    candidate_physical_plans.push_back(physical_plan);
  }

  if (child_->GetOptimalPhysicalPlan(requirement, physical_plan)) {
    PhysicalFilter::State state;
    state.block_size_ = block_size;
    state.child_ = physical_plan.plan;
    state.column_id_ = column_id_;
    PlanContext plan_context = GetPlanContext();
    state.schema_ = GetSchema(plan_context.attribute_list_);
    PhysicalOperatorBase* filter = new PhysicalFilter(state);
    physical_plan.plan = filter;
    candidate_physical_plans.push_back(physical_plan);
  }

  physical_plan_descriptor =
      GetBestPhysicalPlanDescriptor(candidate_physical_plans);

  if (requirement.passLimits(physical_plan_descriptor.cost))
    return true;
  else
    return false;
}
Exemplo n.º 5
0
/**
 * Note: if group_by_attribute_list_ is empty, the partition key is
 * ATTRIBUTE_NULL
 */
PhysicalOperatorBase* LogicalAggregation::GetPhysicalPlan(
    const unsigned& block_size) {
  if (NULL == plan_context_) {
    GetPlanContext();
  }
  PhysicalOperatorBase* ret;
  const PlanContext child_plan_context = child_->GetPlanContext();
  PhysicalAggregation::State local_agg_state;
  local_agg_state.group_by_attrs_ = group_by_attrs_;
  local_agg_state.aggregation_attrs_ = aggregation_attrs_;
  local_agg_state.block_size_ = block_size;
  local_agg_state.num_of_buckets_ =
      EstimateGroupByCardinality(child_plan_context);
  local_agg_state.bucket_size_ = 64;
  local_agg_state.input_schema_ = GetSchema(child_plan_context.attribute_list_);
  local_agg_state.output_schema_ = GetSchema(plan_context_->attribute_list_);
  local_agg_state.child_ = child_->GetPhysicalPlan(block_size);
  local_agg_state.avg_index_ = avg_id_in_agg_;
  local_agg_state.count_column_id_ = count_column_id_;
  local_agg_state.hash_schema_ =
      local_agg_state.output_schema_->duplicateSchema();
  switch (aggregation_style_) {
    case kLocalAgg: {
      local_agg_state.agg_node_type_ =
          PhysicalAggregation::State::kNotHybridAgg;
      ret = new PhysicalAggregation(local_agg_state);
      break;
    }
    case kLocalAggReparGlobalAgg: {
      local_agg_state.agg_node_type_ =
          PhysicalAggregation::State::kHybridAggLocal;
      PhysicalAggregation* local_aggregation =
          new PhysicalAggregation(local_agg_state);
      Expander::State expander_state;
      expander_state.block_count_in_buffer_ = EXPANDER_BUFFER_SIZE;
      expander_state.block_size_ = block_size;
      expander_state.init_thread_count_ = Config::initial_degree_of_parallelism;
      expander_state.child_ = local_aggregation;
      expander_state.schema_ = local_agg_state.hash_schema_->duplicateSchema();
      PhysicalOperatorBase* expander_lower = new Expander(expander_state);

      ExchangeMerger::State exchange_state;
      exchange_state.block_size_ = block_size;
      exchange_state.child_ = expander_lower;
      exchange_state.exchange_id_ =
          IDsGenerator::getInstance()->generateUniqueExchangeID();
      exchange_state.lower_id_list_ =
          GetInvolvedNodeID(child_->GetPlanContext().plan_partitioner_);
      exchange_state.upper_id_list_ =
          GetInvolvedNodeID(plan_context_->plan_partitioner_);
      exchange_state.partition_schema_ =
          partition_schema::set_hash_partition(0);
      exchange_state.schema_ = local_agg_state.hash_schema_->duplicateSchema();
      PhysicalOperatorBase* exchange = new ExchangeMerger(exchange_state);
      PhysicalAggregation::State global_agg_state;
      global_agg_state.agg_node_type_ =
          PhysicalAggregation::State::kHybridAggGlobal;
      global_agg_state.input_schema_ =
          GetSchema(plan_context_->attribute_list_);
      global_agg_state.output_schema_ =
          GetSchema(plan_context_->attribute_list_);
      global_agg_state.hash_schema_ =
          global_agg_state.output_schema_->duplicateSchema();
      // change each aggregation expression and group by expression to one
      // attribute
      SetGroupbyAndAggAttrsForGlobalAgg(global_agg_state.group_by_attrs_,
                                        global_agg_state.aggregation_attrs_,
                                        global_agg_state.input_schema_);
      global_agg_state.block_size_ = block_size;
      global_agg_state.bucket_size_ = 64;
      global_agg_state.child_ = exchange;
      global_agg_state.num_of_buckets_ = local_agg_state.num_of_buckets_;
      global_agg_state.avg_index_ = avg_id_in_agg_;
      global_agg_state.count_column_id_ = count_column_id_;
      PhysicalOperatorBase* global_aggregation =
          new PhysicalAggregation(global_agg_state);
      ret = global_aggregation;
      break;
    }
    case kReparGlobalAgg: {
      assert(false);
    }
  }
  return ret;
}