// TODO(Hanzhang): According to AVOID_CONTENTION_IN_SCAN, we choose the
// strategy. We need finish case(1).
bool PhysicalProjectionScan::Next(SegmentExecStatus* const exec_status,
                                  BlockStreamBase* block) {
  RETURN_IF_CANCELLED(exec_status);

  unsigned long long total_start = curtick();
  if (!block->isIsReference()) {
    block->setIsReference(false);
  }
#ifdef AVOID_CONTENTION_IN_SCAN
  ScanThreadContext* stc = reinterpret_cast<ScanThreadContext*>(GetContext());
  if (NULL == stc) {
    stc = new ScanThreadContext();
    InitContext(stc);
  }
  if (ExpanderTracker::getInstance()->isExpandedThreadCallBack(
          pthread_self())) {
    input_dataset_.AtomicPut(stc->assigned_data_);
    delete stc;
    destorySelfContext();
    kPerfInfo->report_instance_performance_in_millibytes();
    return false;
  }

  if (!stc->assigned_data_.empty()) {
    ChunkReaderIterator::block_accessor* ba = stc->assigned_data_.front();
    stc->assigned_data_.pop_front();

    ba->GetBlock(block);

    // whether delete InMemeryBlockAccessor::target_block_start_address
    // is depend on whether use copy in ba->getBlock(block);
    delete ba;
    kPerfInfo->processed_one_block();
    return true;
  } else {
    if (input_dataset_.AtomicGet(stc->assigned_data_, Config::scan_batch)) {
      // case(1)
      return Next(block);
    } else {
      delete stc;
      destorySelfContext();
      return false;
    }
  }

#else

  if (ExpanderTracker::getInstance()->isExpandedThreadCallBack(
          pthread_self())) {
    return false;
  }
  //  perf_info_->processed_one_block();
  // case(2)
  RETURN_IF_CANCELLED(exec_status);
  return partition_reader_iterator_->NextBlock(block);

#endif
}
bool PhysicalProjectionScan::Open(SegmentExecStatus* const exec_status,
                                  const PartitionOffset& kPartitionOffset) {
  RETURN_IF_CANCELLED(exec_status);

  RegisterExpandedThreadToAllBarriers();

  if (TryEntryIntoSerializedSection()) {
    /* this is the first expanded thread*/
    PartitionStorage* partition_handle_;
    if (NULL ==
        (partition_handle_ = BlockManager::getInstance()->GetPartitionHandle(
             PartitionID(state_.projection_id_, kPartitionOffset)))) {
      LOG(ERROR) << PartitionID(state_.projection_id_, kPartitionOffset)
                        .getName()
                        .c_str() << CStrError(rNoPartitionIdScan) << std::endl;
      SetReturnStatus(false);
    } else {
      partition_reader_iterator_ =
          partition_handle_->CreateAtomicReaderIterator();
      SetReturnStatus(true);
    }

#ifdef AVOID_CONTENTION_IN_SCAN
    unsigned long long start = curtick();

    ChunkReaderIterator* chunk_reader_it;
    ChunkReaderIterator::block_accessor* ba;
    while (chunk_reader_it = partition_reader_iterator_->NextChunk()) {
      while (chunk_reader_it->GetNextBlockAccessor(ba)) {
        ba->GetBlockSize();
        input_dataset_.input_data_blocks_.push_back(ba);
      }
    }
#endif
    ExpanderTracker::getInstance()->addNewStageEndpoint(
        pthread_self(), LocalStageEndPoint(stage_src, "Scan", 0));
    perf_info_ =
        ExpanderTracker::getInstance()->getPerformanceInfo(pthread_self());
    perf_info_->initialize();
  }
  BarrierArrive();
  return GetReturnStatus();
}
示例#3
0
// just only thread can fetch this result
bool PhysicalSort::Next(SegmentExecStatus *const exec_status,
                        BlockStreamBase *block) {
  RETURN_IF_CANCELLED(exec_status);

  lock_->acquire();
  if (thread_id_ == -1) {
    thread_id_ = pthread_self();
    lock_->release();
  } else {
    if (thread_id_ != pthread_self()) {
      lock_->release();
      return false;
    } else {
      lock_->release();
    }
  }

  unsigned tuple_size = state_.input_schema_->getTupleMaxSize();
  void *desc = NULL;
  int tmp_tuple = -1;
  while (true) {
    if (all_cur_ < all_tuples_.size()) {
      if (NULL != (desc = block->allocateTuple(tuple_size))) {
        tmp_tuple = all_cur_++;
        memcpy(desc, all_tuples_[tmp_tuple], tuple_size);
      } else {  // block is full
        return true;
      }
    } else {                  // all tuple are fetched
      if (tmp_tuple == -1) {  // but this block is empty
        return false;
      } else {  // get several tuples
        return true;
      }
    }
  }
  return false;
}
示例#4
0
/**
 * build a hash table first, which stores the tuple needed to be deleted in a
 *hash manner and accelerate the probe phase
 *
 */
bool PhysicalDeleteFilter::Open(SegmentExecStatus* const exec_status,
                                const PartitionOffset& partition_offset) {
#ifdef TIME
  startTimer(&timer);
#endif
  RETURN_IF_CANCELLED(exec_status);

  RegisterExpandedThreadToAllBarriers();
  int ret = rSuccess;
  int64_t timer;
  bool winning_thread = false;
  if (TryEntryIntoSerializedSection(0)) {
    winning_thread = true;
    ExpanderTracker::getInstance()->addNewStageEndpoint(
        pthread_self(),
        LocalStageEndPoint(stage_desc, "delete filter build", 0));
    unsigned output_index = 0;
    for (unsigned i = 0; i < state_.filter_key_deleted_.size(); i++) {
      joinIndex_table_to_output_[i] = output_index;
      output_index++;
    }
    for (unsigned i = 0; i < state_.payload_base_.size(); i++) {
      payload_table_to_output_[i] = output_index;
      output_index++;
    }
    // start to create the hash table, including the used hash function, hash
    // table structure
    hash_ = PartitionFunctionFactory::createBoostHashFunction(
        state_.hashtable_bucket_num_);
    int64_t hash_table_build = curtick();
    hashtable_ = new BasicHashTable(
        state_.hashtable_bucket_num_, state_.hashtable_bucket_size_,
        state_.input_schema_left_->getTupleMaxSize());
    if (NULL == hashtable_) {
      return ret = rMemoryAllocationFailed;
      LOG(ERROR) << "hashtable allocation failed"
                 << "[" << rMemoryAllocationFailed << "]" << endl;
    }
#ifdef _DEBUG_
    consumed_tuples_from_left = 0;
#endif

    // start to create the join expression, based on which it is able to the
    // probe the deleted tuples
    //    QNode* expr = createEqualJoinExpression(
    //        state_.hashtable_schema_, state_.input_schema_right_,
    //        state_.filter_key_deleted_, state_.filter_key_base_);
    //    if (NULL == expr) {
    //      ret = rSuccess;
    //      LOG(ERROR) << "The generation of the enqual join expression for
    //      delete "
    //                    "filter is failed" << endl;
    //    }
    //    ticks start = curtick();
    //
    //    // start to generate the dedicated function, based on which the probe
    //    is
    //    // eventually acted, including using llvm and the function pointer
    //    if (Config::enable_codegen) {
    //      eftt_ = getExprFuncTwoTuples(expr, state_.hashtable_schema_,
    //                                   state_.input_schema_right_);
    //      memcpy_ = getMemcpy(state_.hashtable_schema_->getTupleMaxSize());
    //      memcat_ = getMemcat(state_.hashtable_schema_->getTupleMaxSize(),
    //                          state_.input_schema_right_->getTupleMaxSize());
    //    }
    //    if (eftt_) {
    //      cff_ = PhysicalDeleteFilter::isMatchCodegen;
    //      printf("Codegen(delete filter) succeed(%4.3fms)!\n",
    //             getMilliSecond(start));
    //    } else {
    cff_ = PhysicalDeleteFilter::isMatch;
    //      printf("Codegen(delete filter) failed!\n");
    //    }
    //    delete expr;
  }

  /**
   * For performance concern, the following line should place just after
   * "RegisterNewThreadToAllBarriers();"
   * in order to accelerate the open response time.
   */
  LOG(INFO) << "delete filter operator begin to open left child" << endl;
  state_.child_left_->Open(exec_status, partition_offset);
  LOG(INFO) << "delete filter operator finished opening left child" << endl;
  BarrierArrive(0);
  BasicHashTable::Iterator tmp_it = hashtable_->CreateIterator();

  void* cur;
  void* tuple_in_hashtable;
  unsigned bn;

  void* key_in_input;
  void* key_in_hashtable;
  void* value_in_input;
  void* value_in_hashtable;
  // create the context for the multi-thread to build the hash table
  DeleteFilterThreadContext* dftc = CreateOrReuseContext(crm_numa_sensitive);
  const Schema* input_schema = state_.input_schema_left_->duplicateSchema();
  //  we used the filter_key_deleted_[0] here, because the data is partitioned
  //  based on the first column in the join index
  const Operate* op = input_schema->getcolumn(state_.filter_key_deleted_[0])
                          .operate->duplicateOperator();
  const unsigned buckets = state_.hashtable_bucket_num_;

  int64_t start = curtick();
  int64_t processed_tuple_count = 0;

  LOG(INFO) << "delete filter operator begin to call left child's next()"
            << endl;
  RETURN_IF_CANCELLED(exec_status);

  while (state_.child_left_->Next(exec_status, dftc->l_block_for_asking_)) {
    RETURN_IF_CANCELLED(exec_status);
    delete dftc->l_block_stream_iterator_;
    dftc->l_block_stream_iterator_ =
        dftc->l_block_for_asking_->createIterator();
    while (cur = dftc->l_block_stream_iterator_->nextTuple()) {
#ifdef _DEBUG_
      processed_tuple_count++;
      lock_.acquire();
      consumed_tuples_from_left++;
      lock_.release();
#endif
      const void* key_addr =
          input_schema->getColumnAddess(state_.filter_key_deleted_[0], cur);
      bn = op->getPartitionValue(key_addr, buckets);
      tuple_in_hashtable = hashtable_->atomicAllocate(bn);
      if (memcpy_)
        memcpy_(tuple_in_hashtable, cur);
      else
        input_schema->copyTuple(cur, tuple_in_hashtable);
    }
    dftc->l_block_for_asking_->setEmpty();
  }
  //  printf("%d cycles per
  //  tuple!\n",(curtick()-start)/processed_tuple_count);
  unsigned tmp = 0;
#ifdef _DEBUG_
  tuples_in_hashtable = 0;

  produced_tuples = 0;
  consumed_tuples_from_right = 0;
#endif
  if (ExpanderTracker::getInstance()->isExpandedThreadCallBack(
          pthread_self())) {
    UnregisterExpandedThreadToAllBarriers(1);
    //    printf("<<<<<<<<<<<<<<<<<Join open detected call back
    //    signal!>>>>>>>>>>>>>>>>>\n");
    return true;
  }
  BarrierArrive(1);
  //  if(winning_thread){
  ////    hashtable->report_status();
  ////    printf("Hash Table Build time: %4.4f\n",getMilliSecond(timer));
  //  }

  //  hashtable->report_status();

  //  printf("join open consume %d tuples\n",consumed_tuples_from_left);
  RETURN_IF_CANCELLED(exec_status);

  state_.child_right_->Open(exec_status, partition_offset);
  RETURN_IF_CANCELLED(exec_status);

  LOG(INFO) << "delete filter operator finished opening right child" << endl;
  return true;
}
示例#5
0
bool PhysicalDeleteFilter::Next(SegmentExecStatus* const exec_status,
                                BlockStreamBase* block) {
  void* result_tuple;
  void* tuple_from_right_child;
  void* tuple_in_hashtable;
  void* key_in_input;
  void* key_in_hashtable;
  void* column_in_joinedTuple;
  void* joinedTuple =
      memalign(cacheline_size, state_.output_schema_->getTupleMaxSize());
  bool key_exist;

  DeleteFilterThreadContext* dftc =
      reinterpret_cast<DeleteFilterThreadContext*>(GetContext());

  while (true) {
    RETURN_IF_CANCELLED(exec_status);

    while ((tuple_from_right_child =
                dftc->r_block_stream_iterator_->currentTuple()) > 0) {
      unsigned bn =
          state_.input_schema_right_->getcolumn(state_.filter_key_base_[0])
              .operate->getPartitionValue(
                  state_.input_schema_right_->getColumnAddess(
                      state_.filter_key_base_[0], tuple_from_right_child),
                  state_.hashtable_bucket_num_);
      // hashtable_->placeIterator(dftc->hashtable_iterator_, bn);
      // if there is no tuple in the bn bucket of the hash table, then the
      // tuple
      // in the base table will be output
      if (NULL ==
          (tuple_in_hashtable = dftc->hashtable_iterator_.readCurrent())) {
        if ((result_tuple = block->allocateTuple(
                 state_.output_schema_->getTupleMaxSize())) > 0) {
          produced_tuples_++;
          if (memcat_) {
            memcat_(result_tuple, tuple_in_hashtable, tuple_from_right_child);
          } else {
            state_.input_schema_right_->copyTuple(
                tuple_from_right_child, reinterpret_cast<char*>(result_tuple));
          }
        } else {
          free(joinedTuple);
          return true;
        }
      } else {
        while ((tuple_in_hashtable = dftc->hashtable_iterator_.readCurrent()) >
               0) {
          cff_(tuple_in_hashtable, tuple_from_right_child, &key_exist,
               state_.filter_key_deleted_, state_.filter_key_base_,
               state_.hashtable_schema_, state_.input_schema_right_, eftt_);
          if (!key_exist) {
            if ((result_tuple = block->allocateTuple(
                     state_.output_schema_->getTupleMaxSize())) > 0) {
              produced_tuples_++;
              if (memcat_) {
                memcat_(result_tuple, tuple_in_hashtable,
                        tuple_from_right_child);
              } else {
                state_.input_schema_right_->copyTuple(
                    tuple_from_right_child,
                    reinterpret_cast<char*>(result_tuple));
              }
            } else {
              free(joinedTuple);
              return true;
            }
          }
          dftc->hashtable_iterator_.increase_cur_();
        }
      }
      dftc->r_block_stream_iterator_->increase_cur_();
#ifdef _DEBUG_
      consumed_tuples_from_right++;
#endif
      if ((tuple_from_right_child =
               dftc->r_block_stream_iterator_->currentTuple())) {
        bn = state_.input_schema_right_->getcolumn(state_.filter_key_base_[0])
                 .operate->getPartitionValue(
                     state_.input_schema_right_->getColumnAddess(
                         state_.filter_key_base_[0], tuple_from_right_child),
                     state_.hashtable_bucket_num_);
        hashtable_->placeIterator(dftc->hashtable_iterator_, bn);
      }
    }
    dftc->r_block_for_asking_->setEmpty();
    dftc->hashtable_iterator_ = hashtable_->CreateIterator();
    if (state_.child_right_->Next(exec_status, dftc->r_block_for_asking_) ==
        false) {
      if (block->Empty() == true) {
        free(joinedTuple);
        return false;
      } else {
        free(joinedTuple);
        return true;
      }
    }
    delete dftc->r_block_stream_iterator_;
    dftc->r_block_stream_iterator_ =
        dftc->r_block_for_asking_->createIterator();
    if ((tuple_from_right_child =
             dftc->r_block_stream_iterator_->currentTuple())) {
      unsigned bn =
          state_.input_schema_right_->getcolumn(state_.filter_key_base_[0])
              .operate->getPartitionValue(
                  state_.input_schema_right_->getColumnAddess(
                      state_.filter_key_base_[0], tuple_from_right_child),
                  state_.hashtable_bucket_num_);
      hashtable_->placeIterator(dftc->hashtable_iterator_, bn);
    }
  }
  RETURN_IF_CANCELLED(exec_status);

  return Next(exec_status, block);
}
/**
 * pay attention to the work of different block buffer according to the
 * comments near it
 */
bool ExchangeSenderPipeline::Open(SegmentExecStatus* const exec_status,
                                  const PartitionOffset&) {
  RETURN_IF_CANCELLED(exec_status);
  state_.child_->Open(exec_status, state_.partition_offset_);
  RETURN_IF_CANCELLED(exec_status);

  upper_num_ = state_.upper_id_list_.size();
  partition_function_ =
      PartitionFunctionFactory::createBoostHashFunction(upper_num_);
  socket_fd_upper_list_ = new int[upper_num_];

  /**
   * initialize the block that is used to accumulate the block obtained
   * by calling child iterator's next()
   */
  block_for_asking_ =
      BlockStreamBase::createBlock(state_.schema_, state_.block_size_);

  /**
   * partitioned_data_buffer_ stores the tuples received from child iterator.
   * Note the tuples are partitioned and stored.
   */
  partitioned_data_buffer_ = new PartitionedBlockBuffer(
      upper_num_, block_for_asking_->getSerializedBlockSize());

  /**
   * the temporary block that is used to transfer a block from partitioned data
   * buffer into sending_buffer.
   */
  block_for_sending_buffer_ =
      new BlockContainer(block_for_asking_->getSerializedBlockSize());

  /**
   * Initialize the buffer that is used to hold the blocks being sent. There are
   * upper_num blocks, each corresponding to a merger.
   */
  sending_buffer_ = new PartitionedBlockContainer(
      upper_num_, block_for_asking_->getSerializedBlockSize());

  // Initialized the temporary block to hold the serialized block.

  block_for_serialization_ =
      new Block(block_for_asking_->getSerializedBlockSize());

  /**
   * Initialize the blocks that are used to accumulate the tuples from child so
   * that the insertion to the buffer
   * can be conducted at the granularity of blocks rather than tuples.
   */
  partitioned_block_stream_ = new BlockStreamBase* [upper_num_];
  for (unsigned i = 0; i < upper_num_; ++i) {
    partitioned_block_stream_[i] =
        BlockStreamBase::createBlock(state_.schema_, state_.block_size_);
  }
  RETURN_IF_CANCELLED(exec_status);

  /** connect to all the mergers **/
  for (unsigned upper_offset = 0; upper_offset < state_.upper_id_list_.size();
       ++upper_offset) {
    RETURN_IF_CANCELLED(exec_status);

    LOG(INFO) << "(exchane_id= " << state_.exchange_id_
              << " partition_offset= " << state_.partition_offset_
              << " ) try to connect to upper( " << upper_offset << " , "
              << state_.upper_id_list_[upper_offset] << " ) ";

    if (ConnectToUpper(ExchangeID(state_.exchange_id_, upper_offset),
                       state_.upper_id_list_[upper_offset],
                       socket_fd_upper_list_[upper_offset]) != true) {
      LOG(INFO) << "unsuccessfully !" << std::endl;
      return false;
    }
  }
  LOG(INFO) << "connect to all mereger successfully !" << std::endl;

  RETURN_IF_CANCELLED(exec_status);

  /** create the Sender thread **/
  int error = pthread_create(&sender_thread_id_, NULL, Sender, this);
  if (error != 0) {
    LOG(ERROR) << "(exchane_id= " << state_.exchange_id_
               << " partition_offset= " << state_.partition_offset_
               << " ) Failed to create the sender thread>>>>>>>>>>"
               << std::endl;
    return false;
  }
  return true;
}
/**
 * Note the process from getting block of child to sending to mergers in
 * different buffer.
 * if the state_.partition_schema_ is hash partitioned, every tuple of the block
 * which get from child will be hash repartition and copied into
 * partitioned_block_stream_, if it is full, then
 * serialize it and insert into corresponding partition buffer.
 * else the state_.partition_schema_ is broadcast, straightly insert the block
 * from child into each partition buffer.
 */
bool ExchangeSenderPipeline::Next(SegmentExecStatus* const exec_status,
                                  BlockStreamBase* no_block) {
  void* tuple_from_child;
  void* tuple_in_cur_block_stream;
  while (true) {
    RETURN_IF_CANCELLED(exec_status);

    block_for_asking_->setEmpty();
    if (state_.child_->Next(exec_status, block_for_asking_)) {
      RETURN_IF_CANCELLED(exec_status);

      /**
       * if a blocks is obtained from child, we repartition the tuples in the
       * block to corresponding partition_block_stream_.
       */
      if (state_.partition_schema_.isHashPartition()) {
        BlockStreamBase::BlockStreamTraverseIterator* traverse_iterator =
            block_for_asking_->createIterator();
        while ((tuple_from_child = traverse_iterator->nextTuple()) > 0) {
          /**
           * for each tuple in the newly obtained block, insert the tuple to
           * one partitioned block according to the partition hash value
           */
          const unsigned partition_id = GetHashPartitionId(
              tuple_from_child, state_.schema_,
              state_.partition_schema_.partition_key_index, upper_num_);

          // calculate the tuple size for the current tuple
          const unsigned bytes =
              state_.schema_->getTupleActualSize(tuple_from_child);

          // insert the tuple into the corresponding partitioned block
          while (!(tuple_in_cur_block_stream =
                       partitioned_block_stream_[partition_id]->allocateTuple(
                           bytes))) {
            /**
             * if the destination block is full, it should be serialized and
             * inserted into the partitioned_data_buffer.
             */
            partitioned_block_stream_[partition_id]->serialize(
                *block_for_serialization_);
            partitioned_data_buffer_->insertBlockToPartitionedList(
                block_for_serialization_, partition_id);
            partitioned_block_stream_[partition_id]->setEmpty();
          }
          /**
           * thread arriving here means that the space for the tuple is
           * successfully allocated, so we copy the tuple
           */
          state_.schema_->copyTuple(tuple_from_child,
                                    tuple_in_cur_block_stream);
        }
        DELETE_PTR(traverse_iterator);  // by hAN MEMORY LEAK
      } else if (state_.partition_schema_.isBroadcastPartition()) {
        /**
         * for boardcast case, all block from child should inserted into all
         * partitioned_data_buffer
         */
        block_for_asking_->serialize(*block_for_serialization_);
        for (unsigned i = 0; i < upper_num_; ++i) {
          partitioned_data_buffer_->insertBlockToPartitionedList(
              block_for_serialization_, i);
        }
      }
    } else {
      RETURN_IF_CANCELLED(exec_status);

      if (state_.partition_schema_.isHashPartition()) {
        /* the child iterator is exhausted. We add the last block stream block
         * which would be not full into the buffer for hash partitioned case.
         */
        for (unsigned i = 0; i < upper_num_; ++i) {
          partitioned_block_stream_[i]->serialize(*block_for_serialization_);
          partitioned_data_buffer_->insertBlockToPartitionedList(
              block_for_serialization_, i);
        }
        /* The following lines send an empty block to the upper, indicating that
         * all the data from current sent has been transmit to the uppers.
         */
        for (unsigned i = 0; i < upper_num_; ++i) {
          if (!partitioned_block_stream_[i]->Empty()) {
            partitioned_block_stream_[i]->setEmpty();
            partitioned_block_stream_[i]->serialize(*block_for_serialization_);
            partitioned_data_buffer_->insertBlockToPartitionedList(
                block_for_serialization_, i);
          }
        }
      } else if (state_.partition_schema_.isBroadcastPartition()) {
        /* The following lines send an empty block to the upper, indicating that
         * all the data from current sent has been transmit to the uppers.
         */
        block_for_asking_->setEmpty();
        block_for_asking_->serialize(*block_for_serialization_);
        for (unsigned i = 0; i < upper_num_; ++i) {
          partitioned_data_buffer_->insertBlockToPartitionedList(
              block_for_serialization_, i);
        }
      }

      /*
       * waiting until all the block in the buffer has been
       * transformed to the uppers.
       */
      LOG(INFO) << "(exchane_id= " << state_.exchange_id_
                << " partition_offset= " << state_.partition_offset_
                << " ) Waiting until all the blocks in the buffer is sent!"
                << std::endl;
      RETURN_IF_CANCELLED(exec_status);

      while (!partitioned_data_buffer_->isEmpty()) {
        RETURN_IF_CANCELLED(exec_status);

        usleep(1);
      }

      /*
       * waiting until all the uppers send the close notification which means
       * that
       * blocks in the uppers' socket buffer have all been
       * consumed.
       */
      LOG(INFO) << "(exchane_id= " << state_.exchange_id_
                << " partition_offset= " << state_.partition_offset_
                << " ) Waiting for close notification from all merger!"
                << std::endl;
      RETURN_IF_CANCELLED(exec_status);

      for (unsigned i = 0; i < upper_num_; i++) {
        RETURN_IF_CANCELLED(exec_status);

        WaitingForCloseNotification(socket_fd_upper_list_[i]);
      }
      LOG(INFO) << " received all close notification, closing.. " << endl;
      return false;
    }
  }
}
示例#8
0
/**
 *  first we can store all the data which will be bufferred
 * 1, buffer is the first phase. multi-threads will be applyed to the data
 *    in the buffer.
 * 2, sort the data in the buffer, we choose stable_sort() to sort the records
 *    by specifying the column to be sorted
 * 3, whether to register the buffer into the blockmanager.
 * */
bool PhysicalSort::Open(SegmentExecStatus *const exec_status,
                        const PartitionOffset &part_off) {
  RETURN_IF_CANCELLED(exec_status);

  RegisterExpandedThreadToAllBarriers();
  if (TryEntryIntoSerializedSection(0)) {
    all_cur_ = 0;
    thread_id_ = -1;
    all_tuples_.clear();
    block_buffer_ = new DynamicBlockBuffer();
  }
  BarrierArrive(0);
  BlockStreamBase *block_for_asking;
  if (CreateBlock(block_for_asking) == false) {
    LOG(ERROR) << "error in the create block stream!!!" << endl;
    return 0;
  }
  //  state_.partition_offset_ = part_off;
  state_.child_->Open(exec_status, part_off);
  RETURN_IF_CANCELLED(exec_status);

  /**
   *  phase 1: store the data in the buffer!
   *          by using multi-threads to speed up
   */
  vector<void *> thread_tuple;
  thread_tuple.clear();
  void *tuple_ptr = NULL;
  BlockStreamBase::BlockStreamTraverseIterator *block_it;

  while (state_.child_->Next(exec_status, block_for_asking)) {
    RETURN_IF_CANCELLED(exec_status);

    block_buffer_->atomicAppendNewBlock(block_for_asking);
    block_it = block_for_asking->createIterator();
    while (NULL != (tuple_ptr = block_it->nextTuple())) {
      thread_tuple.push_back(tuple_ptr);
    }
    if (NULL != block_it) {
      delete block_it;
      block_it = NULL;
    }
    if (CreateBlock(block_for_asking) == false) {
      LOG(ERROR) << "error in the create block stream!!!" << endl;
      return 0;
    }
  }

  if (NULL != block_for_asking) {
    delete block_for_asking;
    block_for_asking = NULL;
  }
  lock_->acquire();
  all_tuples_.insert(all_tuples_.end(), thread_tuple.begin(),
                     thread_tuple.end());
  lock_->release();
  thread_tuple.clear();

  // guarantee the block_buffer get all data blocks completely
  BarrierArrive(1);

  // phase 2: sort the data in the buffer, only just one thread!
  if (TryEntryIntoSerializedSection(1)) {
    // reverse the order of order_by_attrs for preserve The relative ordering of
    // equivalent elements
    reverse(state_.order_by_attrs_.begin(), state_.order_by_attrs_.end());
    // one expression for 2 tuples results in overwriting result, so copy the
    // expression for 2 different tuples calculating
    state_.order_by_attrs_copy_ = state_.order_by_attrs_;
    OperFuncInfoData oper_info;
    fcinfo = &oper_info;
    state_.compare_funcs_ =
        new DataTypeOperFunc[state_.order_by_attrs_.size()][2];
    for (int i = 0; i < state_.order_by_attrs_.size(); ++i) {
      state_.order_by_attrs_copy_[i].first =
          state_.order_by_attrs_[i].first->ExprCopy();  // deep copy
      state_.order_by_attrs_[i].first->InitExprAtPhysicalPlan();
      state_.order_by_attrs_copy_[i].first->InitExprAtPhysicalPlan();

      state_.compare_funcs_[i][0] = DataTypeOper::data_type_oper_func_
          [state_.order_by_attrs_[i].first->get_type_][OperType::oper_less];
      state_.compare_funcs_[i][1] = DataTypeOper::data_type_oper_func_
          [state_.order_by_attrs_[i].first->get_type_][OperType::oper_great];
    }
    //    int64_t time = curtick();
    state_.eecnxt_.schema[0] = state_.input_schema_;
    state_.eecnxt1_.schema[0] = state_.input_schema_;
    RETURN_IF_CANCELLED(exec_status);
    cmp_state_ = &state_;

    Order();
  }
  BarrierArrive(2);
  return true;
}
/**
 * @brief  Method description : describe the open method which gets results from
 * the left child and copy them into its local buffer, say the block buffer. the
 * block buffer is a dynamic block buffer since all the expanded threads will
 * share the same block buffer.
 */
bool PhysicalNestLoopJoin::Open(SegmentExecStatus *const exec_status,
                                const PartitionOffset &partition_offset) {
  RETURN_IF_CANCELLED(exec_status);

  RegisterExpandedThreadToAllBarriers();
  unsigned long long int timer;
  bool winning_thread = false;
  if (TryEntryIntoSerializedSection(0)) {  // the first thread of all need to do
    ExpanderTracker::getInstance()->addNewStageEndpoint(
        pthread_self(), LocalStageEndPoint(stage_desc, "nest loop", 0));
    winning_thread = true;
    timer = curtick();
    block_buffer_ = new DynamicBlockBuffer();
    if (state_.join_condi_.size() == 0) {
      join_condi_process_ = WithoutJoinCondi;
    } else {
      join_condi_process_ = WithJoinCondi;
    }
    LOG(INFO) << "[NestloopJoin]: [the first thread opens the nestloopJoin "
                 "physical operator]" << std::endl;
  }
  RETURN_IF_CANCELLED(exec_status);

  state_.child_left_->Open(exec_status, partition_offset);
  RETURN_IF_CANCELLED(exec_status);

  BarrierArrive(0);

  NestLoopJoinContext *jtc = CreateOrReuseContext(crm_numa_sensitive);
  // create a new block to hold the results from the left child
  // and add results to the dynamic buffer
  //  jtc->block_for_asking_ == BlockStreamBase::createBlock(
  //                                state_.input_schema_left_,
  //                                state_.block_size_);
  CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_);
  //  auto temp = jtc->block_for_asking_->getBlock();
  //  cout << "temp start" << temp << endl;
  //
  //  cout << "init block_for_asking_ : " << jtc->block_for_asking_->getBlock()
  //       << " is reference : " << jtc->block_for_asking_->isIsReference() <<
  //       endl;
  while (state_.child_left_->Next(exec_status, jtc->block_for_asking_)) {
    if (exec_status->is_cancelled()) {
      if (NULL != jtc->block_for_asking_) {
        delete jtc->block_for_asking_;
        jtc->block_for_asking_ = NULL;
      }
      return false;
    }
    //    cout << "after assgin start :" << jtc->block_for_asking_->getBlock()
    //         << " is reference : " << jtc->block_for_asking_->isIsReference()
    //         << endl;
    block_buffer_->atomicAppendNewBlock(jtc->block_for_asking_);
    //    if (!jtc->block_for_asking_->isIsReference()) {
    CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_);
    //    } else {
    //      //      cout << "temp after" << temp << endl;
    //      //      delete temp;
    //      CreateBlockStream(jtc->block_for_asking_,
    //      state_.input_schema_left_);
    //      jtc->block_for_asking_->setIsReference(false);
    //    }
    //    cout << "new start :" << jtc->block_for_asking_->getBlock()
    //         << " is reference : " << jtc->block_for_asking_->isIsReference()
    //         << endl;
  }
  //  cout << "buffer_size_ : " << block_buffer_->GetBufferSize() << endl;
  //  the last block is created without storing the results from the left
  // child

  if (NULL != jtc->block_for_asking_) {
    delete jtc->block_for_asking_;
    jtc->block_for_asking_ = NULL;
  }
  // when the finished expanded thread finished its allocated work, it can be
  // called back here. What should be noticed that the callback meas the to
  // exit on the of the thread
  if (ExpanderTracker::getInstance()->isExpandedThreadCallBack(
          pthread_self())) {
    UnregisterExpandedThreadToAllBarriers(1);
    LOG(INFO) << "[NestloopJoin]: [the" << pthread_self()
              << "the thread is called to exit]" << std::endl;
    return true;  // the
  }
  BarrierArrive(1);  // ??ERROR
                     //	join_thread_context* jtc=new join_thread_context();
                     //  jtc->block_for_asking_ == BlockStreamBase::createBlock(
  //                                state_.input_schema_right_,
  //                                state_.block_size_);
  CreateBlockStream(jtc->block_for_asking_, state_.input_schema_right_);
  jtc->block_for_asking_->setEmpty();
  jtc->block_stream_iterator_ = jtc->block_for_asking_->createIterator();
  jtc->buffer_iterator_ = block_buffer_->createIterator();

  // underlying bug: as for buffer_iterator may be NULL, it's necessary to let
  // every buffer_iterator of each thread point to an empty block
  // jtc->buffer_stream_iterator_ =
  //    jtc->buffer_iterator_.nextBlock()->createIterator();

  InitContext(jtc);  // rename this function, here means to store the thread
                     // context in the operator context
  RETURN_IF_CANCELLED(exec_status);
  state_.child_right_->Open(exec_status, partition_offset);
  return true;
}
示例#10
0
bool PhysicalNestLoopJoin::Next(SegmentExecStatus *const exec_status,
                                BlockStreamBase *block) {
  /**
   * @brief it describes the sequence of the nestloop join. As the intermediate
   * result of the left child has been stored in the dynamic block buffer in the
   * open function. in this next function, it get the intermediate result of the
   * right child operator, one block after one block. Within each block, it gets
   * each tuple in the block and joins with each tuple in the dynamic block
   * buffer
   * when traversing them.
   * Method description :
   * @param
   * @ return
   * @details   (additional)
   */
  RETURN_IF_CANCELLED(exec_status);

  void *tuple_from_buffer_child = NULL;
  void *tuple_from_right_child = NULL;
  void *result_tuple = NULL;
  bool pass = false;
  BlockStreamBase *buffer_block = NULL;
  NestLoopJoinContext *jtc =
      reinterpret_cast<NestLoopJoinContext *>(GetContext());
  while (1) {
    RETURN_IF_CANCELLED(exec_status);

    while (NULL != (tuple_from_right_child =
                        jtc->block_stream_iterator_->currentTuple())) {
      while (1) {
        while (NULL != (tuple_from_buffer_child =
                            jtc->buffer_stream_iterator_->currentTuple())) {
          pass = join_condi_process_(tuple_from_buffer_child,
                                     tuple_from_right_child, jtc);
          if (pass) {
            if (NULL != (result_tuple = block->allocateTuple(
                             state_.output_schema_->getTupleMaxSize()))) {
              const unsigned copyed_bytes =
                  state_.input_schema_left_->copyTuple(tuple_from_buffer_child,
                                                       result_tuple);
              state_.input_schema_right_->copyTuple(
                  tuple_from_right_child,
                  reinterpret_cast<char *>(result_tuple + copyed_bytes));
            } else {
              //            LOG(INFO) << "[NestloopJoin]:  [a block of the
              //            result
              //            is full of "
              //                         "the nest loop join result ]" <<
              //                         std::endl;
              return true;
            }
          }
          jtc->buffer_stream_iterator_->increase_cur_();
        }

        //        jtc->buffer_stream_iterator_->~BlockStreamTraverseIterator();
        if (jtc->buffer_stream_iterator_ != NULL) {
          delete jtc->buffer_stream_iterator_;
          jtc->buffer_stream_iterator_ = NULL;
        }
        if (NULL != (buffer_block = jtc->buffer_iterator_.nextBlock())) {
          jtc->buffer_stream_iterator_ = buffer_block->createIterator();
        } else {
          break;
        }
      }

      jtc->buffer_iterator_.ResetCur();
      if (NULL == (buffer_block = jtc->buffer_iterator_.nextBlock())) {
        LOG(ERROR) << "[NestloopJoin]: this block shouldn't be NULL in nest "
                      "loop join!";
        assert(
            false &&
            "[NestloopJoin]: this block shouldn't be NULL in nest loop join!");
      }
      if (jtc->buffer_stream_iterator_ != NULL) {
        delete jtc->buffer_stream_iterator_;
        jtc->buffer_stream_iterator_ = NULL;
      }
      jtc->buffer_stream_iterator_ = buffer_block->createIterator();
      jtc->block_stream_iterator_->increase_cur_();
    }

    // if buffer is empty, return false directly
    jtc->buffer_iterator_.ResetCur();
    if (NULL == (buffer_block = jtc->buffer_iterator_.nextBlock())) {
      LOG(WARNING) << "[NestloopJoin]: the buffer is empty in nest loop join!";
      // for getting all right child's data
      jtc->block_for_asking_->setEmpty();
      while (state_.child_right_->Next(exec_status, jtc->block_for_asking_)) {
        jtc->block_for_asking_->setEmpty();
      }
      return false;
    }
    if (jtc->buffer_stream_iterator_ != NULL) {
      delete jtc->buffer_stream_iterator_;
      jtc->buffer_stream_iterator_ = NULL;
    }
    jtc->buffer_stream_iterator_ = buffer_block->createIterator();

    // ask block from right child
    jtc->block_for_asking_->setEmpty();
    if (false ==
        state_.child_right_->Next(exec_status, jtc->block_for_asking_)) {
      if (true == block->Empty()) {
        LOG(WARNING) << "[NestloopJoin]: [no join result is stored in the "
                        "block after traverse the right child operator]"
                     << std::endl;
        return false;
      } else {
        LOG(INFO) << "[NestloopJoin]: get a new block from right child "
                  << std::endl;
        return true;
      }
    }
    if (jtc->block_stream_iterator_ != NULL) {
      delete jtc->block_stream_iterator_;
      jtc->block_stream_iterator_ = NULL;
    }
    jtc->block_stream_iterator_ = jtc->block_for_asking_->createIterator();
  }
  return Next(exec_status, block);
}