Example #1
0
bool PhysicalSort::Open(const PartitionOffset &part_off) {
  /**
   * TODO(anyone): multi threads can be used to pipeline!!!
   */
  swap_num_ = 0;
  temp_cur_ = 0;
  /**
   *  first we can store all the data which will be bufferred
   * 1, buffer is the first phase. multi-threads will be applyed to the data
   *    in the buffer.
   * 2, sort the data in the buffer, we choose quicksort to sort the records
   *    by specifying the column to be sorted
   * 3, whether to register the buffer into the blockmanager.
   * */
  BlockStreamBase *block_for_asking;

  state_.partition_offset_ = part_off;

  state_.child_->Open(state_.partition_offset_);

  if (sema_open_.try_wait()) {
    block_buffer_iterator_ = block_buffer_.createIterator();
    open_finished_ = true;
  } else {
    while (!open_finished_) {
      usleep(1);
    }
  }

  if (CreateBlockStream(block_for_asking) == false) {
    LOG(ERROR) << "error in the create block stream!!!" << endl;
    return 0;
  }
  /**
   *  phase 1: store the data in the buffer!
   *          by using multi-threads to speed up
   */
  unsigned block_offset = 0;
  unsigned tuple_count_sum = 0;
  BlockStreamBase::BlockStreamTraverseIterator *iterator_for_scan;
  while (state_.child_->Next(block_for_asking)) {
    tuple_count_sum += block_for_asking->getTuplesInBlock();
    block_buffer_.atomicAppendNewBlock(block_for_asking);
    iterator_for_scan = block_buffer_.getBlock(block_offset)->createIterator();
    void *tuple_ptr = 0;
    while ((tuple_ptr = iterator_for_scan->nextTuple()) != 0) {
      tuple_vector_.push_back(tuple_ptr);
    }
    block_offset++;
    if (CreateBlockStream(block_for_asking) == false) {
      LOG(ERROR) << "error in the create block stream!!!" << endl;
      return 0;
    }
  }

  /**
   *  phase 2: sort the data in the buffer!
   *          by using multi-threads to speed up?
   * TODO(anyone): whether to store the sorted data into the blockmanager.
   */
  //    cout<<"check the memory usage!!!"<<endl;
  unsigned long long int time = curtick();
  //    order(state_.orderbyKey_,tuple_count_sum);
  Order();

  // cout<<"the tuple_count is: "<<tuple_count_sum<<"Total time:
  // "<<getSecond(time)<<" seconds, the swap num is: "<<swap_num<<endl;
  return true;
}
/**
 * @brief  Method description : describe the open method which gets results from
 * the left child and copy them into its local buffer, say the block buffer. the
 * block buffer is a dynamic block buffer since all the expanded threads will
 * share the same block buffer.
 */
bool PhysicalNestLoopJoin::Open(SegmentExecStatus *const exec_status,
                                const PartitionOffset &partition_offset) {
  RETURN_IF_CANCELLED(exec_status);

  RegisterExpandedThreadToAllBarriers();
  unsigned long long int timer;
  bool winning_thread = false;
  if (TryEntryIntoSerializedSection(0)) {  // the first thread of all need to do
    ExpanderTracker::getInstance()->addNewStageEndpoint(
        pthread_self(), LocalStageEndPoint(stage_desc, "nest loop", 0));
    winning_thread = true;
    timer = curtick();
    block_buffer_ = new DynamicBlockBuffer();
    if (state_.join_condi_.size() == 0) {
      join_condi_process_ = WithoutJoinCondi;
    } else {
      join_condi_process_ = WithJoinCondi;
    }
    LOG(INFO) << "[NestloopJoin]: [the first thread opens the nestloopJoin "
                 "physical operator]" << std::endl;
  }
  RETURN_IF_CANCELLED(exec_status);

  state_.child_left_->Open(exec_status, partition_offset);
  RETURN_IF_CANCELLED(exec_status);

  BarrierArrive(0);

  NestLoopJoinContext *jtc = CreateOrReuseContext(crm_numa_sensitive);
  // create a new block to hold the results from the left child
  // and add results to the dynamic buffer
  //  jtc->block_for_asking_ == BlockStreamBase::createBlock(
  //                                state_.input_schema_left_,
  //                                state_.block_size_);
  CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_);
  //  auto temp = jtc->block_for_asking_->getBlock();
  //  cout << "temp start" << temp << endl;
  //
  //  cout << "init block_for_asking_ : " << jtc->block_for_asking_->getBlock()
  //       << " is reference : " << jtc->block_for_asking_->isIsReference() <<
  //       endl;
  while (state_.child_left_->Next(exec_status, jtc->block_for_asking_)) {
    if (exec_status->is_cancelled()) {
      if (NULL != jtc->block_for_asking_) {
        delete jtc->block_for_asking_;
        jtc->block_for_asking_ = NULL;
      }
      return false;
    }
    //    cout << "after assgin start :" << jtc->block_for_asking_->getBlock()
    //         << " is reference : " << jtc->block_for_asking_->isIsReference()
    //         << endl;
    block_buffer_->atomicAppendNewBlock(jtc->block_for_asking_);
    //    if (!jtc->block_for_asking_->isIsReference()) {
    CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_);
    //    } else {
    //      //      cout << "temp after" << temp << endl;
    //      //      delete temp;
    //      CreateBlockStream(jtc->block_for_asking_,
    //      state_.input_schema_left_);
    //      jtc->block_for_asking_->setIsReference(false);
    //    }
    //    cout << "new start :" << jtc->block_for_asking_->getBlock()
    //         << " is reference : " << jtc->block_for_asking_->isIsReference()
    //         << endl;
  }
  //  cout << "buffer_size_ : " << block_buffer_->GetBufferSize() << endl;
  //  the last block is created without storing the results from the left
  // child

  if (NULL != jtc->block_for_asking_) {
    delete jtc->block_for_asking_;
    jtc->block_for_asking_ = NULL;
  }
  // when the finished expanded thread finished its allocated work, it can be
  // called back here. What should be noticed that the callback meas the to
  // exit on the of the thread
  if (ExpanderTracker::getInstance()->isExpandedThreadCallBack(
          pthread_self())) {
    UnregisterExpandedThreadToAllBarriers(1);
    LOG(INFO) << "[NestloopJoin]: [the" << pthread_self()
              << "the thread is called to exit]" << std::endl;
    return true;  // the
  }
  BarrierArrive(1);  // ??ERROR
                     //	join_thread_context* jtc=new join_thread_context();
                     //  jtc->block_for_asking_ == BlockStreamBase::createBlock(
  //                                state_.input_schema_right_,
  //                                state_.block_size_);
  CreateBlockStream(jtc->block_for_asking_, state_.input_schema_right_);
  jtc->block_for_asking_->setEmpty();
  jtc->block_stream_iterator_ = jtc->block_for_asking_->createIterator();
  jtc->buffer_iterator_ = block_buffer_->createIterator();

  // underlying bug: as for buffer_iterator may be NULL, it's necessary to let
  // every buffer_iterator of each thread point to an empty block
  // jtc->buffer_stream_iterator_ =
  //    jtc->buffer_iterator_.nextBlock()->createIterator();

  InitContext(jtc);  // rename this function, here means to store the thread
                     // context in the operator context
  RETURN_IF_CANCELLED(exec_status);
  state_.child_right_->Open(exec_status, partition_offset);
  return true;
}