bool BlockStreamNestLoopJoinIterator::open(const PartitionOffset& partition_offset)
{
	RegisterExpandedThreadToAllBarriers();
//	AtomicPushFreeHtBlockStream(BlockStreamBase::createBlock(state_.input_schema_left,state_.block_size_));
//	AtomicPushFreeBlockStream(BlockStreamBase::createBlock(state_.input_schema_right,state_.block_size_));
	unsigned long long int timer;
	bool winning_thread=false;
	if(tryEntryIntoSerializedSection(0))//the first thread of all need to do
	{
		ExpanderTracker::getInstance()->addNewStageEndpoint(pthread_self(),LocalStageEndPoint(stage_desc,"nest loop build",0));
		winning_thread=true;
		timer=curtick();
//		unsigned output_index=0;
//		for(unsigned i=0;i<state_.joinIndex_left.size();i++){
//			joinIndex_left_to_output[i]=output_index;
//			output_index++;
//		}
//		for(unsigned i=0;i<state_.payload_left.size();i++){
//			payload_left_to_output[i]=output_index;
//			output_index++;
//		}
//		for(unsigned i=0;i<state_.payload_right.size();i++){
//			payload_right_to_output[i]=output_index;
//			output_index++;
//		}
		blockbuffer=new DynamicBlockBuffer();

	}
	state_.child_left->open(partition_offset);
	barrierArrive(0);
	join_thread_context* jtc=new join_thread_context();
	createBlockStream(jtc->block_for_asking_);
	while(state_.child_left->next(jtc->block_for_asking_))
	{
		blockbuffer->atomicAppendNewBlock(jtc->block_for_asking_);
		createBlockStream(jtc->block_for_asking_);
	}

	delete jtc->block_for_asking_;
	if(ExpanderTracker::getInstance()->isExpandedThreadCallBack(pthread_self())){
		unregisterExpandedThreadToAllBarriers(1);
		return true;
	}
	barrierArrive(1);//??ERROR
//	join_thread_context* jtc=new join_thread_context();
	jtc->block_for_asking_=BlockStreamBase::createBlock(state_.input_schema_right,state_.block_size_);
	jtc->block_stream_iterator_=jtc->block_for_asking_->createIterator();
	initContext(jtc);
	state_.child_right->open(partition_offset);
	return true;
}
bool PhysicalProjectionScan::Open(SegmentExecStatus* const exec_status,
                                  const PartitionOffset& kPartitionOffset) {
  RETURN_IF_CANCELLED(exec_status);

  RegisterExpandedThreadToAllBarriers();

  if (TryEntryIntoSerializedSection()) {
    /* this is the first expanded thread*/
    PartitionStorage* partition_handle_;
    if (NULL ==
        (partition_handle_ = BlockManager::getInstance()->GetPartitionHandle(
             PartitionID(state_.projection_id_, kPartitionOffset)))) {
      LOG(ERROR) << PartitionID(state_.projection_id_, kPartitionOffset)
                        .getName()
                        .c_str() << CStrError(rNoPartitionIdScan) << std::endl;
      SetReturnStatus(false);
    } else {
      partition_reader_iterator_ =
          partition_handle_->CreateAtomicReaderIterator();
      SetReturnStatus(true);
    }

#ifdef AVOID_CONTENTION_IN_SCAN
    unsigned long long start = curtick();

    ChunkReaderIterator* chunk_reader_it;
    ChunkReaderIterator::block_accessor* ba;
    while (chunk_reader_it = partition_reader_iterator_->NextChunk()) {
      while (chunk_reader_it->GetNextBlockAccessor(ba)) {
        ba->GetBlockSize();
        input_dataset_.input_data_blocks_.push_back(ba);
      }
    }
#endif
    ExpanderTracker::getInstance()->addNewStageEndpoint(
        pthread_self(), LocalStageEndPoint(stage_src, "Scan", 0));
    perf_info_ =
        ExpanderTracker::getInstance()->getPerformanceInfo(pthread_self());
    perf_info_->initialize();
  }
  BarrierArrive();
  return GetReturnStatus();
}
bool ExpandableBlockStreamExchangeEpoll::open(const PartitionOffset& partition_offset)
{
	unsigned long long int start = curtick();

	RegisterExpandedThreadToAllBarriers();

	if (tryEntryIntoSerializedSection())
	{
		debug_winner_thread++;


		nexhausted_lowers=0;
		this->partition_offset=partition_offset;
		nlowers=state.lower_id_list_.size();

		for (unsigned i = 0; i < nlowers; i++)
		{
			debug_received_block[i] = 0;
		}

		socket_fd_lower_list = new int[nlowers];
		//init -1 ---Yu
		for (int i = 0; i < nlowers; ++i) {
			socket_fd_lower_list[i] = -1;
		}
		buffer=new BlockStreamBuffer(state.block_size_,BUFFER_SIZE_IN_EXCHANGE,state.schema_);
		ExpanderTracker::getInstance()->addNewStageEndpoint(pthread_self(),LocalStageEndPoint(stage_src,"Exchange",buffer));
		received_block_stream_=BlockStreamBase::createBlock(state.schema_,state.block_size_);

		block_for_socket_ = new BlockContainer*[nlowers];
		for (unsigned i = 0; i < nlowers; i++)
		{
			block_for_socket_[i] = new BlockContainer(received_block_stream_->getSerializedBlockSize());
		}

		if (PrepareTheSocket() == false)
			return false;

		if (SetSocketNonBlocking(sock_fd) == false)
		{
			return false;
		}

		logging_->log("[%ld,%d] Open: nexhausted lowers=%d, nlower=%d", state.exchange_id_, partition_offset, nexhausted_lowers, nlowers);

		if (RegisterExchange() == false)
		{
			logging_->elog("Register Exchange with ID=%d fails!", state.exchange_id_);
		}

		if(isMaster()){
			/*  According to a bug reported by dsc, the master exchangeupper should check whether other
			 *  uppers have registered to exchangeTracker. Otherwise, the lower may fail to connect to the
			 *  exchangeTracker of some uppers when the lower nodes receive the exchagnelower, as some uppers
			 *  have not register the exchange_id to the exchangeTracker.
			 */
			logging_->log("[%ld,%d] Synchronizing....", state.exchange_id_, partition_offset);
			checkOtherUpperRegistered();
			logging_->log("[%ld,%d] Synchronized!", state.exchange_id_, partition_offset);
			logging_->log("[%ld,%d] This exchange is the master one, serialize the iterator subtree to the children...", state.exchange_id_, partition_offset);

			if (SerializeAndSendToMulti() == false)
				return false;
		}

		if (CreateReceiverThread() == false)
		{
			return false;
		}

		createPerformanceInfo();

	}

	/* A synchronization barrier, in case of multiple expanded threads*/
	barrierArrive();
	return true;
}
/**
 * build a hash table first, which stores the tuple needed to be deleted in a
 *hash manner and accelerate the probe phase
 *
 */
bool PhysicalDeleteFilter::Open(SegmentExecStatus* const exec_status,
                                const PartitionOffset& partition_offset) {
#ifdef TIME
  startTimer(&timer);
#endif
  RETURN_IF_CANCELLED(exec_status);

  RegisterExpandedThreadToAllBarriers();
  int ret = rSuccess;
  int64_t timer;
  bool winning_thread = false;
  if (TryEntryIntoSerializedSection(0)) {
    winning_thread = true;
    ExpanderTracker::getInstance()->addNewStageEndpoint(
        pthread_self(),
        LocalStageEndPoint(stage_desc, "delete filter build", 0));
    unsigned output_index = 0;
    for (unsigned i = 0; i < state_.filter_key_deleted_.size(); i++) {
      joinIndex_table_to_output_[i] = output_index;
      output_index++;
    }
    for (unsigned i = 0; i < state_.payload_base_.size(); i++) {
      payload_table_to_output_[i] = output_index;
      output_index++;
    }
    // start to create the hash table, including the used hash function, hash
    // table structure
    hash_ = PartitionFunctionFactory::createBoostHashFunction(
        state_.hashtable_bucket_num_);
    int64_t hash_table_build = curtick();
    hashtable_ = new BasicHashTable(
        state_.hashtable_bucket_num_, state_.hashtable_bucket_size_,
        state_.input_schema_left_->getTupleMaxSize());
    if (NULL == hashtable_) {
      return ret = rMemoryAllocationFailed;
      LOG(ERROR) << "hashtable allocation failed"
                 << "[" << rMemoryAllocationFailed << "]" << endl;
    }
#ifdef _DEBUG_
    consumed_tuples_from_left = 0;
#endif

    // start to create the join expression, based on which it is able to the
    // probe the deleted tuples
    //    QNode* expr = createEqualJoinExpression(
    //        state_.hashtable_schema_, state_.input_schema_right_,
    //        state_.filter_key_deleted_, state_.filter_key_base_);
    //    if (NULL == expr) {
    //      ret = rSuccess;
    //      LOG(ERROR) << "The generation of the enqual join expression for
    //      delete "
    //                    "filter is failed" << endl;
    //    }
    //    ticks start = curtick();
    //
    //    // start to generate the dedicated function, based on which the probe
    //    is
    //    // eventually acted, including using llvm and the function pointer
    //    if (Config::enable_codegen) {
    //      eftt_ = getExprFuncTwoTuples(expr, state_.hashtable_schema_,
    //                                   state_.input_schema_right_);
    //      memcpy_ = getMemcpy(state_.hashtable_schema_->getTupleMaxSize());
    //      memcat_ = getMemcat(state_.hashtable_schema_->getTupleMaxSize(),
    //                          state_.input_schema_right_->getTupleMaxSize());
    //    }
    //    if (eftt_) {
    //      cff_ = PhysicalDeleteFilter::isMatchCodegen;
    //      printf("Codegen(delete filter) succeed(%4.3fms)!\n",
    //             getMilliSecond(start));
    //    } else {
    cff_ = PhysicalDeleteFilter::isMatch;
    //      printf("Codegen(delete filter) failed!\n");
    //    }
    //    delete expr;
  }

  /**
   * For performance concern, the following line should place just after
   * "RegisterNewThreadToAllBarriers();"
   * in order to accelerate the open response time.
   */
  LOG(INFO) << "delete filter operator begin to open left child" << endl;
  state_.child_left_->Open(exec_status, partition_offset);
  LOG(INFO) << "delete filter operator finished opening left child" << endl;
  BarrierArrive(0);
  BasicHashTable::Iterator tmp_it = hashtable_->CreateIterator();

  void* cur;
  void* tuple_in_hashtable;
  unsigned bn;

  void* key_in_input;
  void* key_in_hashtable;
  void* value_in_input;
  void* value_in_hashtable;
  // create the context for the multi-thread to build the hash table
  DeleteFilterThreadContext* dftc = CreateOrReuseContext(crm_numa_sensitive);
  const Schema* input_schema = state_.input_schema_left_->duplicateSchema();
  //  we used the filter_key_deleted_[0] here, because the data is partitioned
  //  based on the first column in the join index
  const Operate* op = input_schema->getcolumn(state_.filter_key_deleted_[0])
                          .operate->duplicateOperator();
  const unsigned buckets = state_.hashtable_bucket_num_;

  int64_t start = curtick();
  int64_t processed_tuple_count = 0;

  LOG(INFO) << "delete filter operator begin to call left child's next()"
            << endl;
  RETURN_IF_CANCELLED(exec_status);

  while (state_.child_left_->Next(exec_status, dftc->l_block_for_asking_)) {
    RETURN_IF_CANCELLED(exec_status);
    delete dftc->l_block_stream_iterator_;
    dftc->l_block_stream_iterator_ =
        dftc->l_block_for_asking_->createIterator();
    while (cur = dftc->l_block_stream_iterator_->nextTuple()) {
#ifdef _DEBUG_
      processed_tuple_count++;
      lock_.acquire();
      consumed_tuples_from_left++;
      lock_.release();
#endif
      const void* key_addr =
          input_schema->getColumnAddess(state_.filter_key_deleted_[0], cur);
      bn = op->getPartitionValue(key_addr, buckets);
      tuple_in_hashtable = hashtable_->atomicAllocate(bn);
      if (memcpy_)
        memcpy_(tuple_in_hashtable, cur);
      else
        input_schema->copyTuple(cur, tuple_in_hashtable);
    }
    dftc->l_block_for_asking_->setEmpty();
  }
  //  printf("%d cycles per
  //  tuple!\n",(curtick()-start)/processed_tuple_count);
  unsigned tmp = 0;
#ifdef _DEBUG_
  tuples_in_hashtable = 0;

  produced_tuples = 0;
  consumed_tuples_from_right = 0;
#endif
  if (ExpanderTracker::getInstance()->isExpandedThreadCallBack(
          pthread_self())) {
    UnregisterExpandedThreadToAllBarriers(1);
    //    printf("<<<<<<<<<<<<<<<<<Join open detected call back
    //    signal!>>>>>>>>>>>>>>>>>\n");
    return true;
  }
  BarrierArrive(1);
  //  if(winning_thread){
  ////    hashtable->report_status();
  ////    printf("Hash Table Build time: %4.4f\n",getMilliSecond(timer));
  //  }

  //  hashtable->report_status();

  //  printf("join open consume %d tuples\n",consumed_tuples_from_left);
  RETURN_IF_CANCELLED(exec_status);

  state_.child_right_->Open(exec_status, partition_offset);
  RETURN_IF_CANCELLED(exec_status);

  LOG(INFO) << "delete filter operator finished opening right child" << endl;
  return true;
}
Example #5
0
/**
 * note the serialized block's size is different from others, it has tail info.
 * exchange merger is at the end of one segment of plan, so it's the "stage_src"
 * for this stage
 */
bool ExchangeMerger::Open(const PartitionOffset& partition_offset) {
  unsigned long long int start = curtick();
  RegisterExpandedThreadToAllBarriers();
  if (TryEntryIntoSerializedSection()) {  // first arrived thread dose
    exhausted_lowers = 0;
    this->partition_offset_ = partition_offset;
    lower_num_ = state_.lower_id_list_.size();
    socket_fd_lower_list_ = new int[lower_num_];
    for (int i = 0; i < lower_num_; ++i) {
      socket_fd_lower_list_[i] = -1;
    }
    // buffer all deserialized blocks come from every socket
    all_merged_block_buffer_ = new BlockStreamBuffer(
        state_.block_size_, BUFFER_SIZE_IN_EXCHANGE, state_.schema_);
    ExpanderTracker::getInstance()->addNewStageEndpoint(
        pthread_self(),
        LocalStageEndPoint(stage_src, "Exchange", all_merged_block_buffer_));

    // if one of block_for_socket is full, it will be deserialized into
    // block_for_deserialization and sended to all_merged_data_buffer
    block_for_deserialization =
        BlockStreamBase::createBlock(state_.schema_, state_.block_size_);

    // store block for each socket and the received block is serialized.
    block_for_socket_ = new BlockContainer* [lower_num_];
    for (unsigned i = 0; i < lower_num_; ++i) {
      block_for_socket_[i] = new BlockContainer(
          block_for_deserialization->getSerializedBlockSize());
    }
    if (PrepareSocket() == false) return false;
    if (SetSocketNonBlocking(sock_fd_) == false) {
      return false;
    }

    LOG(INFO) << "exchange_id = " << state_.exchange_id_
              << " partition_offset = " << partition_offset
              << " Open: exhausted lower senders num = " << exhausted_lowers
              << " lower sender num = " << lower_num_ << std::endl;

    if (RegisterExchange() == false) {
      LOG(ERROR) << "Register Exchange with ID = " << state_.exchange_id_
                 << " fails!" << std::endl;
    }

    if (IsMaster()) {
      /*  According to a bug reported by dsc, the master exchange upper should
       * check whether other uppers have registered to exchangeTracker.
       * Otherwise, the lower may fail to connect to the exchangeTracker of some
       * uppers when the lower nodes receive the exchange lower, as some uppers
       *  have not register the exchange_id to the exchangeTracker.
       */
      LOG(INFO) << " exchange_id = " << state_.exchange_id_
                << " partition_offset = " << partition_offset
                << "Synchronizing...." << std::endl;
      IsOtherMergersRegistered();
      LOG(INFO) << " exchange_id = " << state_.exchange_id_
                << " partition_offset = " << partition_offset
                << " Synchronized! Then serialize and send its next segment "
                   "plan to all its lower senders" << std::endl;
      if (SerializeAndSendPlan() == false) return false;
    }
    if (CreateReceiverThread() == false) {
      return false;
    }
    CreatePerformanceInfo();
  }
  /// A synchronization barrier, in case of multiple expanded threads
  BarrierArrive();
  return true;
}
/**
 * @brief  Method description : describe the open method which gets results from
 * the left child and copy them into its local buffer, say the block buffer. the
 * block buffer is a dynamic block buffer since all the expanded threads will
 * share the same block buffer.
 */
bool PhysicalNestLoopJoin::Open(SegmentExecStatus *const exec_status,
                                const PartitionOffset &partition_offset) {
  RETURN_IF_CANCELLED(exec_status);

  RegisterExpandedThreadToAllBarriers();
  unsigned long long int timer;
  bool winning_thread = false;
  if (TryEntryIntoSerializedSection(0)) {  // the first thread of all need to do
    ExpanderTracker::getInstance()->addNewStageEndpoint(
        pthread_self(), LocalStageEndPoint(stage_desc, "nest loop", 0));
    winning_thread = true;
    timer = curtick();
    block_buffer_ = new DynamicBlockBuffer();
    if (state_.join_condi_.size() == 0) {
      join_condi_process_ = WithoutJoinCondi;
    } else {
      join_condi_process_ = WithJoinCondi;
    }
    LOG(INFO) << "[NestloopJoin]: [the first thread opens the nestloopJoin "
                 "physical operator]" << std::endl;
  }
  RETURN_IF_CANCELLED(exec_status);

  state_.child_left_->Open(exec_status, partition_offset);
  RETURN_IF_CANCELLED(exec_status);

  BarrierArrive(0);

  NestLoopJoinContext *jtc = CreateOrReuseContext(crm_numa_sensitive);
  // create a new block to hold the results from the left child
  // and add results to the dynamic buffer
  //  jtc->block_for_asking_ == BlockStreamBase::createBlock(
  //                                state_.input_schema_left_,
  //                                state_.block_size_);
  CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_);
  //  auto temp = jtc->block_for_asking_->getBlock();
  //  cout << "temp start" << temp << endl;
  //
  //  cout << "init block_for_asking_ : " << jtc->block_for_asking_->getBlock()
  //       << " is reference : " << jtc->block_for_asking_->isIsReference() <<
  //       endl;
  while (state_.child_left_->Next(exec_status, jtc->block_for_asking_)) {
    if (exec_status->is_cancelled()) {
      if (NULL != jtc->block_for_asking_) {
        delete jtc->block_for_asking_;
        jtc->block_for_asking_ = NULL;
      }
      return false;
    }
    //    cout << "after assgin start :" << jtc->block_for_asking_->getBlock()
    //         << " is reference : " << jtc->block_for_asking_->isIsReference()
    //         << endl;
    block_buffer_->atomicAppendNewBlock(jtc->block_for_asking_);
    //    if (!jtc->block_for_asking_->isIsReference()) {
    CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_);
    //    } else {
    //      //      cout << "temp after" << temp << endl;
    //      //      delete temp;
    //      CreateBlockStream(jtc->block_for_asking_,
    //      state_.input_schema_left_);
    //      jtc->block_for_asking_->setIsReference(false);
    //    }
    //    cout << "new start :" << jtc->block_for_asking_->getBlock()
    //         << " is reference : " << jtc->block_for_asking_->isIsReference()
    //         << endl;
  }
  //  cout << "buffer_size_ : " << block_buffer_->GetBufferSize() << endl;
  //  the last block is created without storing the results from the left
  // child

  if (NULL != jtc->block_for_asking_) {
    delete jtc->block_for_asking_;
    jtc->block_for_asking_ = NULL;
  }
  // when the finished expanded thread finished its allocated work, it can be
  // called back here. What should be noticed that the callback meas the to
  // exit on the of the thread
  if (ExpanderTracker::getInstance()->isExpandedThreadCallBack(
          pthread_self())) {
    UnregisterExpandedThreadToAllBarriers(1);
    LOG(INFO) << "[NestloopJoin]: [the" << pthread_self()
              << "the thread is called to exit]" << std::endl;
    return true;  // the
  }
  BarrierArrive(1);  // ??ERROR
                     //	join_thread_context* jtc=new join_thread_context();
                     //  jtc->block_for_asking_ == BlockStreamBase::createBlock(
  //                                state_.input_schema_right_,
  //                                state_.block_size_);
  CreateBlockStream(jtc->block_for_asking_, state_.input_schema_right_);
  jtc->block_for_asking_->setEmpty();
  jtc->block_stream_iterator_ = jtc->block_for_asking_->createIterator();
  jtc->buffer_iterator_ = block_buffer_->createIterator();

  // underlying bug: as for buffer_iterator may be NULL, it's necessary to let
  // every buffer_iterator of each thread point to an empty block
  // jtc->buffer_stream_iterator_ =
  //    jtc->buffer_iterator_.nextBlock()->createIterator();

  InitContext(jtc);  // rename this function, here means to store the thread
                     // context in the operator context
  RETURN_IF_CANCELLED(exec_status);
  state_.child_right_->Open(exec_status, partition_offset);
  return true;
}