void* ResultCollector::CollectResult(void* arg) {
  ResultCollector* Pthis = (ResultCollector*)arg;
  Pthis->state_.child_->Open(Pthis->state_.partition_offset_);
  BlockStreamBase* block_for_asking;
  if (false == Pthis->CreateBlockStream(block_for_asking)) {
    assert(false);
    return 0;
  }
  Pthis->block_buffer_->column_header_list_ = Pthis->state_.column_header_;

  unsigned long long start = 0;
  start = curtick();

  while (Pthis->state_.child_->Next(block_for_asking)) {
    Pthis->block_buffer_->atomicAppendNewBlock(block_for_asking);
    if (false == Pthis->CreateBlockStream(block_for_asking)) {
      assert(false);
      return 0;
    }
  }
  Pthis->sema_input_complete_.post();
  double eclipsed_seconds = getSecond(start);
  Pthis->block_buffer_->query_time_ = eclipsed_seconds;
  Pthis->block_buffer_->schema_ = Pthis->state_.input_->duplicateSchema();
  Pthis->finished_thread_count_++;

  return 0;
}
static void test_scan_filter_performance(int value)
{
	unsigned long long int start=curtick();
	TableDescriptor* table=Catalog::getInstance()->getTable("cj");
	LogicalOperator* cj_scan=new LogicalScan(table->getProjectoin(0));

	Filter::Condition filter_condition_1;
	filter_condition_1.add(table->getAttribute(3),AttributeComparator::GEQ,std::string("10107"));
	filter_condition_1.add(table->getAttribute(3),AttributeComparator::L,(void*)&value);
	LogicalOperator* filter_1=new Filter(filter_condition_1,cj_scan);

	const NodeID collector_node_id=0;
	LogicalOperator* root=new LogicalQueryPlanRoot(collector_node_id,filter_1,LogicalQueryPlanRoot::PERFORMANCE);

	BlockStreamPerformanceMonitorTop* executable_query_plan=(BlockStreamPerformanceMonitorTop*)root->getIteratorTree(1024*64);
//	executable_query_plan->print();
	executable_query_plan->open();
	while(executable_query_plan->next(0));
	executable_query_plan->close();

//	ResultSet *result_set=executable_query_plan->getResultSet();

	const unsigned long int number_of_tuples=executable_query_plan->getNumberOfTuples();
	printf("execution time: %4.4f seconds.\n",getSecond(start));
	if(!print_test_name_result(number_of_tuples==26820,"Low selectivity filter")){
		printf("\tExpected:26695 actual: %d\n",number_of_tuples);
	}
//	result_set->~ResultSet();
	executable_query_plan->~BlockStreamIteratorBase();
	root->~LogicalOperator();
}
Exemple #3
0
static void query_select_sort() {
	/*
	 * select sum(a+1)+count(a),b
	 * from T
	 * group by b
	 *
	 * notation: p a p s
	 * */
	unsigned long long int start=curtick();
	TableDescriptor* table=Environment::getInstance()->getCatalog()->getTable("LINEITEM");
	//===========================scan===========================
	LogicalOperator* scan=new LogicalScan(table->getProjectoin(0));

	//==========================project=========================
	vector< vector<ExpressionItem> >expr_list1;

	vector<ExpressionItem> expr1;
	ExpressionItem ei1_1;
	ExpressionItem ei1_2;
	ExpressionItem ei1_3;
	ei1_1.setVariable("LINEITEM.L_ORDERKEY");
	ei1_2.setIntValue("1");
	ei1_3.setOperator("+");

	expr1.push_back(ei1_1);
	expr1.push_back(ei1_2);
	expr1.push_back(ei1_3);

	expr_list1.push_back(expr1);

	LogicalOperator* project1=new LogicalProject(scan,expr_list1);

	//==========================project=========================
	vector< vector<ExpressionItem> >expr_list2;

	ExpressionItem ei21_1;
	ei21_1.setVariable("LINEITEM.L_ORDERKEY+1");
	vector<ExpressionItem> expr21;
	expr21.push_back(ei21_1);
	expr_list2.push_back(expr21);
	LogicalOperator* project2=new LogicalProject(project1,expr_list2);

	//============================sort==========================
	vector<LogicalSort::OrderByAttr*> vo;

	LogicalSort::OrderByAttr tmp=LogicalSort::OrderByAttr("LINEITEM.L_ORDERKEY+1",0);
	vo.push_back(&tmp);
	LogicalOperator* sort=new LogicalSort(project1,vo);

	//===========================root===========================
	LogicalOperator* root=new LogicalQueryPlanRoot(0,sort,LogicalQueryPlanRoot::PRINT);

	cout<<"performance is ok!"<<endl;
	BlockStreamIteratorBase* physical_iterator_tree=root->getIteratorTree(64*1024);
//	physical_iterator_tree->print();
	physical_iterator_tree->open();
	while(physical_iterator_tree->next(0));
	physical_iterator_tree->close();
	printf("Q1: execution time: %4.4f second.\n",getSecond(start));
}
unsigned TimeOutReceiver::TimeOutWait(unsigned expected_message_count,unsigned time_out_in_ms){
	unsigned long long int start=curtick();
	unsigned count(0);
	while(count<expected_message_count&&getMilliSecond(start)<time_out_in_ms){
		count+=Consume(expected_message_count-count);
	}
	return count;
}
// TODO(Hanzhang): According to AVOID_CONTENTION_IN_SCAN, we choose the
// strategy. We need finish case(1).
bool PhysicalProjectionScan::Next(SegmentExecStatus* const exec_status,
                                  BlockStreamBase* block) {
  RETURN_IF_CANCELLED(exec_status);

  unsigned long long total_start = curtick();
  if (!block->isIsReference()) {
    block->setIsReference(false);
  }
#ifdef AVOID_CONTENTION_IN_SCAN
  ScanThreadContext* stc = reinterpret_cast<ScanThreadContext*>(GetContext());
  if (NULL == stc) {
    stc = new ScanThreadContext();
    InitContext(stc);
  }
  if (ExpanderTracker::getInstance()->isExpandedThreadCallBack(
          pthread_self())) {
    input_dataset_.AtomicPut(stc->assigned_data_);
    delete stc;
    destorySelfContext();
    kPerfInfo->report_instance_performance_in_millibytes();
    return false;
  }

  if (!stc->assigned_data_.empty()) {
    ChunkReaderIterator::block_accessor* ba = stc->assigned_data_.front();
    stc->assigned_data_.pop_front();

    ba->GetBlock(block);

    // whether delete InMemeryBlockAccessor::target_block_start_address
    // is depend on whether use copy in ba->getBlock(block);
    delete ba;
    kPerfInfo->processed_one_block();
    return true;
  } else {
    if (input_dataset_.AtomicGet(stc->assigned_data_, Config::scan_batch)) {
      // case(1)
      return Next(block);
    } else {
      delete stc;
      destorySelfContext();
      return false;
    }
  }

#else

  if (ExpanderTracker::getInstance()->isExpandedThreadCallBack(
          pthread_self())) {
    return false;
  }
  //  perf_info_->processed_one_block();
  // case(2)
  RETURN_IF_CANCELLED(exec_status);
  return partition_reader_iterator_->NextBlock(block);

#endif
}
bool BlockStreamPerformanceMonitorTop::open(const PartitionOffset& partition_offset){
	start_=curtick();
	state_.child_->open(partition_offset);
	block_=BlockStreamBase::createBlock(state_.schema_,state_.block_size_);
	tuplecount_=0;
	int error;
	error=pthread_create(&report_tid_,NULL,report,this);
	if(error!=0){
		std::cout<<"create threads error!"<<std::endl;
	}

	return true;
}
bool PerformanceIteratorTop::open(){

	state.child->open();
	tuple=memalign(cacheline_size,state.schema->getTupleMaxSize());
	tuplecount=0;
	int error;
	error=pthread_create(&report_tid,NULL,report,this);
	if(error!=0){
		std::cout<<"create threads error!"<<std::endl;
	}
	start=curtick();
	return true;
}
bool BlockStreamNestLoopJoinIterator::open(const PartitionOffset& partition_offset)
{
	RegisterExpandedThreadToAllBarriers();
//	AtomicPushFreeHtBlockStream(BlockStreamBase::createBlock(state_.input_schema_left,state_.block_size_));
//	AtomicPushFreeBlockStream(BlockStreamBase::createBlock(state_.input_schema_right,state_.block_size_));
	unsigned long long int timer;
	bool winning_thread=false;
	if(tryEntryIntoSerializedSection(0))//the first thread of all need to do
	{
		ExpanderTracker::getInstance()->addNewStageEndpoint(pthread_self(),LocalStageEndPoint(stage_desc,"nest loop build",0));
		winning_thread=true;
		timer=curtick();
//		unsigned output_index=0;
//		for(unsigned i=0;i<state_.joinIndex_left.size();i++){
//			joinIndex_left_to_output[i]=output_index;
//			output_index++;
//		}
//		for(unsigned i=0;i<state_.payload_left.size();i++){
//			payload_left_to_output[i]=output_index;
//			output_index++;
//		}
//		for(unsigned i=0;i<state_.payload_right.size();i++){
//			payload_right_to_output[i]=output_index;
//			output_index++;
//		}
		blockbuffer=new DynamicBlockBuffer();

	}
	state_.child_left->open(partition_offset);
	barrierArrive(0);
	join_thread_context* jtc=new join_thread_context();
	createBlockStream(jtc->block_for_asking_);
	while(state_.child_left->next(jtc->block_for_asking_))
	{
		blockbuffer->atomicAppendNewBlock(jtc->block_for_asking_);
		createBlockStream(jtc->block_for_asking_);
	}

	delete jtc->block_for_asking_;
	if(ExpanderTracker::getInstance()->isExpandedThreadCallBack(pthread_self())){
		unregisterExpandedThreadToAllBarriers(1);
		return true;
	}
	barrierArrive(1);//??ERROR
//	join_thread_context* jtc=new join_thread_context();
	jtc->block_for_asking_=BlockStreamBase::createBlock(state_.input_schema_right,state_.block_size_);
	jtc->block_stream_iterator_=jtc->block_for_asking_->createIterator();
	initContext(jtc);
	state_.child_right->open(partition_offset);
	return true;
}
static void test_index_filter_performance(int value_high) {
  unsigned long long int start = curtick();
  vector<IndexScanIterator::query_range> q_range;
  q_range.clear();
  int value_low = 10107;
  //	int value_high = 600257;
  TableDescriptor* table = Catalog::getInstance()->getTable("cj");

  IndexScanIterator::query_range q;
  q.value_low = malloc(sizeof(int));  // newmalloc
  q.value_low = (void*)(&value_low);
  q.comp_low = GEQ;
  q.value_high = malloc(sizeof(int));  // newmalloc
  q.value_high = (void*)(&value_high);
  q.comp_high = L;
  q.c_type.type = t_int;
  q.c_type.operate = new OperateInt();
  q_range.push_back(q);

  LogicalOperator* index_scan =
      new LogicalIndexScan(table->getProjectoin(0)->getProjectionID(),
                           table->getAttribute("sec_code"), q_range);
  const NodeID collector_node_id = 0;
  LogicalOperator* root = new LogicalQueryPlanRoot(
      collector_node_id, index_scan, LogicalQueryPlanRoot::PERFORMANCE);
  //	root->print();

  PerformanceMonitor* executable_query_plan =
      (PerformanceMonitor*)root->GetPhysicalPlan(1024 * 64);
  executable_query_plan->Open();
  while (executable_query_plan->Next(0))
    ;
  executable_query_plan->Close();

  //	ResultSet* result_set = executable_query_plan->getResultSet();

  const unsigned long int number_of_tuples =
      executable_query_plan->GetNumberOfTuples();
  delete executable_query_plan;
  root->~LogicalOperator();
  //	cout << "Sec_code: " << value_low << "\t Result: " << number_of_tuples
  //<< endl;
  printf("execution time: %4.4f seconds.\n", getSecond(start));
  if (!print_test_name_result(number_of_tuples == 26820, "Index Scan")) {
    printf("\tIndex Scan sec_code = %d, Expected:%d actual: %d\n", value_low,
           26820, number_of_tuples);
  }
}
int mainasdfaf234(int argc,const char** argv){

	std::vector<column_type> column_list,column_list_;
	column_list.push_back(column_type(t_int));

	Schema* input=new SchemaFix(column_list);

	BlockStreamSingleColumnScan::State bsscs_state("/home/claims/temp/Uniform_0_99.column",input);
	PhysicalOperatorBase* bsscs1=new BlockStreamSingleColumnScan(bsscs_state);
	PhysicalOperatorBase* bsscs2=new BlockStreamSingleColumnScan(bsscs_state);

	int f=atoi(argv[2]);



	AttributeComparator fA(column_type(t_int),Comparator::L,0,&f);
	std::vector<AttributeComparator> ComparatorList;
	ComparatorList.push_back(fA);
	BlockStreamFilter::State bsf_state(input,bsscs1,ComparatorList,4096);
	PhysicalOperatorBase* bsf=new BlockStreamFilter(bsf_state);

//
	BlockStreamBase *block=new BlockStreamFix(4096,4);
	int choice=0;

	while(choice==0){


//		bsf->open();
		bsf->Open();
		unsigned long long int start=curtick();

		while(bsf->Next(block)){
			block->setEmpty();
		}
		printf("Time=%f Throughput=%f.\n",getSecond(start),1024/getSecond(start));
		bsf->Close();
		printf("Continue(0) or Not(1) ?\n");

		scanf("%d",&choice);

	}

}
bool PhysicalProjectionScan::Open(SegmentExecStatus* const exec_status,
                                  const PartitionOffset& kPartitionOffset) {
  RETURN_IF_CANCELLED(exec_status);

  RegisterExpandedThreadToAllBarriers();

  if (TryEntryIntoSerializedSection()) {
    /* this is the first expanded thread*/
    PartitionStorage* partition_handle_;
    if (NULL ==
        (partition_handle_ = BlockManager::getInstance()->GetPartitionHandle(
             PartitionID(state_.projection_id_, kPartitionOffset)))) {
      LOG(ERROR) << PartitionID(state_.projection_id_, kPartitionOffset)
                        .getName()
                        .c_str() << CStrError(rNoPartitionIdScan) << std::endl;
      SetReturnStatus(false);
    } else {
      partition_reader_iterator_ =
          partition_handle_->CreateAtomicReaderIterator();
      SetReturnStatus(true);
    }

#ifdef AVOID_CONTENTION_IN_SCAN
    unsigned long long start = curtick();

    ChunkReaderIterator* chunk_reader_it;
    ChunkReaderIterator::block_accessor* ba;
    while (chunk_reader_it = partition_reader_iterator_->NextChunk()) {
      while (chunk_reader_it->GetNextBlockAccessor(ba)) {
        ba->GetBlockSize();
        input_dataset_.input_data_blocks_.push_back(ba);
      }
    }
#endif
    ExpanderTracker::getInstance()->addNewStageEndpoint(
        pthread_self(), LocalStageEndPoint(stage_src, "Scan", 0));
    perf_info_ =
        ExpanderTracker::getInstance()->getPerformanceInfo(pthread_self());
    perf_info_->initialize();
  }
  BarrierArrive();
  return GetReturnStatus();
}
Exemple #12
0
bool ResultCollector::Open(const PartitionOffset& part_offset) {
  state_.partition_offset_ = part_offset;

  if (sema_open_.try_wait()) {
    block_buffer_ = new ResultSet();
    block_buffer_iterator_ = block_buffer_->createIterator();
    open_finished_ = true;
  } else {
    while (!open_finished_) {
      usleep(1);
    }
  }
  registered_thread_count_++;
  if (true == g_thread_pool_used) {
    Environment::getInstance()->getThreadPool()->add_task(CollectResult, this);
  } else {
    pthread_t tid;
    pthread_create(&tid, NULL, CollectResult, this);
  }
  unsigned long long int start = curtick();
  sema_input_complete_.wait();
  block_buffer_->query_time_ = getSecond(start);
  return true;
}
bool ExpandableBlockStreamExchangeEpoll::open(const PartitionOffset& partition_offset)
{
	unsigned long long int start = curtick();

	RegisterExpandedThreadToAllBarriers();

	if (tryEntryIntoSerializedSection())
	{
		debug_winner_thread++;


		nexhausted_lowers=0;
		this->partition_offset=partition_offset;
		nlowers=state.lower_id_list_.size();

		for (unsigned i = 0; i < nlowers; i++)
		{
			debug_received_block[i] = 0;
		}

		socket_fd_lower_list = new int[nlowers];
		//init -1 ---Yu
		for (int i = 0; i < nlowers; ++i) {
			socket_fd_lower_list[i] = -1;
		}
		buffer=new BlockStreamBuffer(state.block_size_,BUFFER_SIZE_IN_EXCHANGE,state.schema_);
		ExpanderTracker::getInstance()->addNewStageEndpoint(pthread_self(),LocalStageEndPoint(stage_src,"Exchange",buffer));
		received_block_stream_=BlockStreamBase::createBlock(state.schema_,state.block_size_);

		block_for_socket_ = new BlockContainer*[nlowers];
		for (unsigned i = 0; i < nlowers; i++)
		{
			block_for_socket_[i] = new BlockContainer(received_block_stream_->getSerializedBlockSize());
		}

		if (PrepareTheSocket() == false)
			return false;

		if (SetSocketNonBlocking(sock_fd) == false)
		{
			return false;
		}

		logging_->log("[%ld,%d] Open: nexhausted lowers=%d, nlower=%d", state.exchange_id_, partition_offset, nexhausted_lowers, nlowers);

		if (RegisterExchange() == false)
		{
			logging_->elog("Register Exchange with ID=%d fails!", state.exchange_id_);
		}

		if(isMaster()){
			/*  According to a bug reported by dsc, the master exchangeupper should check whether other
			 *  uppers have registered to exchangeTracker. Otherwise, the lower may fail to connect to the
			 *  exchangeTracker of some uppers when the lower nodes receive the exchagnelower, as some uppers
			 *  have not register the exchange_id to the exchangeTracker.
			 */
			logging_->log("[%ld,%d] Synchronizing....", state.exchange_id_, partition_offset);
			checkOtherUpperRegistered();
			logging_->log("[%ld,%d] Synchronized!", state.exchange_id_, partition_offset);
			logging_->log("[%ld,%d] This exchange is the master one, serialize the iterator subtree to the children...", state.exchange_id_, partition_offset);

			if (SerializeAndSendToMulti() == false)
				return false;
		}

		if (CreateReceiverThread() == false)
		{
			return false;
		}

		createPerformanceInfo();

	}

	/* A synchronization barrier, in case of multiple expanded threads*/
	barrierArrive();
	return true;
}
// receive each one block from all sender
void* ExpandableBlockStreamExchangeEpoll::receiver(void* arg){
	ExpandableBlockStreamExchangeEpoll* Pthis=(ExpandableBlockStreamExchangeEpoll*)arg;

	struct epoll_event event;
	struct epoll_event *events;

	int status;

	/** create epoll **/
	Pthis->epoll_fd_ = epoll_create1(0);
	if (Pthis->epoll_fd_ == -1)
	{
		Pthis->logging_->elog("epoll create error!\n");
		return 0;
	}

	event.data.fd = Pthis->sock_fd;
	event.events = EPOLLIN | EPOLLET;
	status = epoll_ctl(Pthis->epoll_fd_, EPOLL_CTL_ADD, Pthis->sock_fd, &event);
	if (status == -1)
	{
		Pthis->logging_->elog("epoll ctl error!\n");
		return 0;
	}


	events=(epoll_event*)calloc(Pthis->nlowers,sizeof(epoll_event));
	int fd_cur=0;
	ticks start=curtick();
	std::vector<int> finish_times;//in ms
	while(true){
		usleep(1);
		const int event_count = epoll_wait(Pthis->epoll_fd_, events, Pthis->nlowers, -1);
		for (int i = 0; i < event_count; i++)
		{
			if ((events[i].events & EPOLLERR) || (events[i].events & EPOLLHUP) || (!(events[i].events & EPOLLIN)))
			{
				if (errno == EINTR)
				{
					continue;
				}
				Pthis->logging_->elog("[%ld] epoll error,reason:%s\n", Pthis->state.exchange_id_, strerror(errno));
				FileClose(events[i].data.fd);
				std::cout << "in " << __FILE__ << ":" << __LINE__;
				printf("-----for debug:close fd %d.\n", events[i].data.fd);
				continue;
			}
			else if (Pthis->sock_fd == events[i].data.fd)
			{
				/* We have a notification on the listening socket, which means one or more incoming connections.*/
				while (true)
				{
					sockaddr in_addr;
					socklen_t in_len;
					int infd;
					char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];

					in_len = sizeof in_addr;
					infd = accept(Pthis->sock_fd, &in_addr, &in_len);
					if (infd == -1)
					{
						if ((errno == EAGAIN) || (errno == EWOULDBLOCK))
						{
							/* all the incoming connections are processed.*/
							break;
						}
						else
						{
							Pthis->logging_->elog("accept error!  ");
							break;
						}
					}
					status=getnameinfo(&in_addr,in_len,hbuf,sizeof(hbuf),sbuf,sizeof(sbuf),NI_NUMERICHOST|NI_NUMERICSERV);
					if(status==0){
						Pthis->logging_->log("[%ld] Accepted connection on descriptor %d (host=%s, port=%s),id=%d\n",Pthis->state.exchange_id_, infd, hbuf, sbuf,Pthis->state.exchange_id_);
						Pthis->lower_ip_array.push_back(hbuf);
						Pthis->lower_sock_fd_to_index[infd]=Pthis->lower_ip_array.size()-1;
						assert(Pthis->lower_ip_array.size()<=Pthis->state.lower_id_list_.size());
					}
					/*Make the incoming socket non-blocking and add it to the list of fds to monitor.*/
					if (!Pthis->SetSocketNonBlocking(infd))
					{
						return 0;
					}
					event.data.fd = infd;
					event.events = EPOLLIN | EPOLLET;
					status = epoll_ctl(Pthis->epoll_fd_, EPOLL_CTL_ADD, infd, &event);
					if (status == -1)
					{
						Pthis->logging_->elog("epoll_ctl");
						return 0;
					}
				}
				continue;
			}
			else
			{
				/* We have data on the fd waiting to be read.*/
				int done = 0;
				while (true)
				{
					int byte_received;

					int socket_fd_index=Pthis->lower_sock_fd_to_index[events[i].data.fd];

					byte_received=read(events[i].data.fd,
					                   (char*)Pthis->block_for_socket_[socket_fd_index]->getBlock()+Pthis->block_for_socket_[socket_fd_index]->GetCurSize(),
					                   Pthis->block_for_socket_[socket_fd_index]->GetRestSize());
					if(byte_received==-1){
						if(errno==EAGAIN){
							/*We have read all the data,so go back to the loop.*/
							break;
						}
						Pthis->logging_->elog("read error!\n");
						done = 1;
					}
					else if (byte_received == 0)
					{
						/* End of file. The remote has closed the connection.*/
						done = 1;
						break;
					}

					/* The data is successfully read.*/

					Pthis->block_for_socket_[socket_fd_index]->IncreaseActualSize(byte_received);
					if (Pthis->block_for_socket_[socket_fd_index]->GetRestSize() > 0)
					{
						/** the current block is not read entirely from the sender, so continue the loop to read.**/
						continue;
					}

					/** a block is completely read. **/

					Pthis->logging_->log("[%ld] The %d-th block is received from Lower[%s]", Pthis->state.exchange_id_, Pthis->debug_received_block[socket_fd_index],
							Pthis->lower_ip_array[socket_fd_index].c_str());
					Pthis->debug_received_block[socket_fd_index]++;

					/** deserialize the data block from sender to the blockstreambase (received_block_stream_) **/
					Pthis->received_block_stream_->deserialize((Block*) Pthis->block_for_socket_[socket_fd_index]);

					/** mark block_for_socket_[socket_fd_index] to be empty so that it can accommodate the subsequent data **/
					Pthis->block_for_socket_[socket_fd_index]->reset();

					/** In the current implementation, a empty block stream means End-Of-File**/
					const bool eof=Pthis->received_block_stream_->Empty();
					if(!eof){
						/** the newly obtained data block is validate, so we insert it into the buffer and post
						 * sem_new_block_or_eof_ so that all the threads waiting for the semaphore continue. **/
						Pthis->buffer->insertBlock(Pthis->received_block_stream_);

						//??? why is all ,not 1
						// multiple threads will still compete with lock
						Pthis->sem_new_block_or_eof_.post(Pthis->number_of_registered_expanded_threads_);
					}
					else
					{
						/** The newly obtained data block is the end-of-file.  **/
						Pthis->logging_->log("[%ld] *****This block is the last one.", Pthis->state.exchange_id_);

						finish_times.push_back((int)getMilliSecond(start));

						/** update the exhausted senders count and post sem_new_block_or_eof_ so that all the
						 * threads waiting for the semaphore continue.
						 **/
						Pthis->nexhausted_lowers++;
						Pthis->sem_new_block_or_eof_.post(Pthis->number_of_registered_expanded_threads_);

						if (Pthis->nexhausted_lowers == Pthis->nlowers)
						{
							/*
							 * When all the exchange lowers are exhausted, notify the buffer
							 * that the input data is completely received.
							 */
							Pthis->buffer->setInputComplete();

							/* print the finish times */
							for(unsigned i=0;i<finish_times.size();i++){
								printf("%d\t",finish_times[i]);
							}
							printf("\t Var:%5.4f\n",get_stddev(finish_times));
						}


						Pthis->logging_->log(
                "[%ld] <<<<<<<<<<<<<<<<nexhausted_lowers=%d>>>>>>>>>>>>>>>>exchange=(%d,%d)",
                Pthis->state.exchange_id_, Pthis->nexhausted_lowers,
                Pthis->state.exchange_id_, Pthis->partition_offset);

						/** tell the sender that all the block are consumed so that the sender can close the socket**/
						Pthis->SendBlockAllConsumedNotification(events[i].data.fd);

						Pthis->logging_->log("[%ld] This notification (all the blocks in the socket buffer are consumed) is send to the lower[%s] exchange=(%d,%d).\n",
								Pthis->state.exchange_id_, Pthis->lower_ip_array[socket_fd_index].c_str(), Pthis->state.exchange_id_, Pthis->partition_offset);


					}
				}
				if (done)
				{
					Pthis->logging_->log("[%ld] Closed connection on descriptor %d[%s]\n", Pthis->state.exchange_id_, events[i].data.fd,
							Pthis->lower_ip_array[Pthis->lower_sock_fd_to_index[events[i].data.fd]].c_str());
					/* Closing the descriptor will make epoll remove it
					 from the set of descriptors which are monitored. */
					FileClose(events[i].data.fd);
				}
			}
		}
	}

}
/**
 * build a hash table first, which stores the tuple needed to be deleted in a
 *hash manner and accelerate the probe phase
 *
 */
bool PhysicalDeleteFilter::Open(SegmentExecStatus* const exec_status,
                                const PartitionOffset& partition_offset) {
#ifdef TIME
  startTimer(&timer);
#endif
  RETURN_IF_CANCELLED(exec_status);

  RegisterExpandedThreadToAllBarriers();
  int ret = rSuccess;
  int64_t timer;
  bool winning_thread = false;
  if (TryEntryIntoSerializedSection(0)) {
    winning_thread = true;
    ExpanderTracker::getInstance()->addNewStageEndpoint(
        pthread_self(),
        LocalStageEndPoint(stage_desc, "delete filter build", 0));
    unsigned output_index = 0;
    for (unsigned i = 0; i < state_.filter_key_deleted_.size(); i++) {
      joinIndex_table_to_output_[i] = output_index;
      output_index++;
    }
    for (unsigned i = 0; i < state_.payload_base_.size(); i++) {
      payload_table_to_output_[i] = output_index;
      output_index++;
    }
    // start to create the hash table, including the used hash function, hash
    // table structure
    hash_ = PartitionFunctionFactory::createBoostHashFunction(
        state_.hashtable_bucket_num_);
    int64_t hash_table_build = curtick();
    hashtable_ = new BasicHashTable(
        state_.hashtable_bucket_num_, state_.hashtable_bucket_size_,
        state_.input_schema_left_->getTupleMaxSize());
    if (NULL == hashtable_) {
      return ret = rMemoryAllocationFailed;
      LOG(ERROR) << "hashtable allocation failed"
                 << "[" << rMemoryAllocationFailed << "]" << endl;
    }
#ifdef _DEBUG_
    consumed_tuples_from_left = 0;
#endif

    // start to create the join expression, based on which it is able to the
    // probe the deleted tuples
    //    QNode* expr = createEqualJoinExpression(
    //        state_.hashtable_schema_, state_.input_schema_right_,
    //        state_.filter_key_deleted_, state_.filter_key_base_);
    //    if (NULL == expr) {
    //      ret = rSuccess;
    //      LOG(ERROR) << "The generation of the enqual join expression for
    //      delete "
    //                    "filter is failed" << endl;
    //    }
    //    ticks start = curtick();
    //
    //    // start to generate the dedicated function, based on which the probe
    //    is
    //    // eventually acted, including using llvm and the function pointer
    //    if (Config::enable_codegen) {
    //      eftt_ = getExprFuncTwoTuples(expr, state_.hashtable_schema_,
    //                                   state_.input_schema_right_);
    //      memcpy_ = getMemcpy(state_.hashtable_schema_->getTupleMaxSize());
    //      memcat_ = getMemcat(state_.hashtable_schema_->getTupleMaxSize(),
    //                          state_.input_schema_right_->getTupleMaxSize());
    //    }
    //    if (eftt_) {
    //      cff_ = PhysicalDeleteFilter::isMatchCodegen;
    //      printf("Codegen(delete filter) succeed(%4.3fms)!\n",
    //             getMilliSecond(start));
    //    } else {
    cff_ = PhysicalDeleteFilter::isMatch;
    //      printf("Codegen(delete filter) failed!\n");
    //    }
    //    delete expr;
  }

  /**
   * For performance concern, the following line should place just after
   * "RegisterNewThreadToAllBarriers();"
   * in order to accelerate the open response time.
   */
  LOG(INFO) << "delete filter operator begin to open left child" << endl;
  state_.child_left_->Open(exec_status, partition_offset);
  LOG(INFO) << "delete filter operator finished opening left child" << endl;
  BarrierArrive(0);
  BasicHashTable::Iterator tmp_it = hashtable_->CreateIterator();

  void* cur;
  void* tuple_in_hashtable;
  unsigned bn;

  void* key_in_input;
  void* key_in_hashtable;
  void* value_in_input;
  void* value_in_hashtable;
  // create the context for the multi-thread to build the hash table
  DeleteFilterThreadContext* dftc = CreateOrReuseContext(crm_numa_sensitive);
  const Schema* input_schema = state_.input_schema_left_->duplicateSchema();
  //  we used the filter_key_deleted_[0] here, because the data is partitioned
  //  based on the first column in the join index
  const Operate* op = input_schema->getcolumn(state_.filter_key_deleted_[0])
                          .operate->duplicateOperator();
  const unsigned buckets = state_.hashtable_bucket_num_;

  int64_t start = curtick();
  int64_t processed_tuple_count = 0;

  LOG(INFO) << "delete filter operator begin to call left child's next()"
            << endl;
  RETURN_IF_CANCELLED(exec_status);

  while (state_.child_left_->Next(exec_status, dftc->l_block_for_asking_)) {
    RETURN_IF_CANCELLED(exec_status);
    delete dftc->l_block_stream_iterator_;
    dftc->l_block_stream_iterator_ =
        dftc->l_block_for_asking_->createIterator();
    while (cur = dftc->l_block_stream_iterator_->nextTuple()) {
#ifdef _DEBUG_
      processed_tuple_count++;
      lock_.acquire();
      consumed_tuples_from_left++;
      lock_.release();
#endif
      const void* key_addr =
          input_schema->getColumnAddess(state_.filter_key_deleted_[0], cur);
      bn = op->getPartitionValue(key_addr, buckets);
      tuple_in_hashtable = hashtable_->atomicAllocate(bn);
      if (memcpy_)
        memcpy_(tuple_in_hashtable, cur);
      else
        input_schema->copyTuple(cur, tuple_in_hashtable);
    }
    dftc->l_block_for_asking_->setEmpty();
  }
  //  printf("%d cycles per
  //  tuple!\n",(curtick()-start)/processed_tuple_count);
  unsigned tmp = 0;
#ifdef _DEBUG_
  tuples_in_hashtable = 0;

  produced_tuples = 0;
  consumed_tuples_from_right = 0;
#endif
  if (ExpanderTracker::getInstance()->isExpandedThreadCallBack(
          pthread_self())) {
    UnregisterExpandedThreadToAllBarriers(1);
    //    printf("<<<<<<<<<<<<<<<<<Join open detected call back
    //    signal!>>>>>>>>>>>>>>>>>\n");
    return true;
  }
  BarrierArrive(1);
  //  if(winning_thread){
  ////    hashtable->report_status();
  ////    printf("Hash Table Build time: %4.4f\n",getMilliSecond(timer));
  //  }

  //  hashtable->report_status();

  //  printf("join open consume %d tuples\n",consumed_tuples_from_left);
  RETURN_IF_CANCELLED(exec_status);

  state_.child_right_->Open(exec_status, partition_offset);
  RETURN_IF_CANCELLED(exec_status);

  LOG(INFO) << "delete filter operator finished opening right child" << endl;
  return true;
}
//int main_join_success(int argc, const char** argv){
	int main_join_success22(int argc, const char** argv){
	int master;
	printf("Master(0) or Slave(others)?\n");
	scanf("%d",&master);
	if(master==0){






		Environment::getInstance(true);

//		const unsigned block_size=atoi(argv[1]);
//		const unsigned thread_count=atoi(argv[2]);
//		const unsigned expander_buffer=atoi(argv[3]);
//		std::vector<std::string> upper_ip_list;
//		const unsigned lowers_1=atoi(argv[4]);
//		const unsigned lowers_2=atoi(argv[5]);

		const unsigned block_size=4096;
		const unsigned thread_count=3;
		const unsigned expander_buffer=3;
		std::vector<std::string> upper_ip_list;
		const unsigned lowers_1=3;
		const unsigned lowers_2=3;

		upper_ip_list.push_back("10.11.1.208");

		std::vector<std::string> lower_ip_list;
//		lower_ip_list.push_back("10.11.1.201");
		lower_ip_list.push_back("10.11.1.202");
		lower_ip_list.push_back("10.11.1.203");
		lower_ip_list.push_back("10.11.1.204");

		lower_ip_list.push_back("10.11.1.205");
		lower_ip_list.push_back("10.11.1.206");
		lower_ip_list.push_back("10.11.1.207");
//		lower_ip_list.push_back("10.11.1.208");
//		lower_ip_list.push_back("10.11.1.209");
//		lower_ip_list.push_back("10.11.1.210");
//		lower_ip_list.push_back("10.11.1.211");
//		lower_ip_list.push_back("10.11.1.214");

//		std::vector<std::string> used_lowers;
//		for(unsigned i=0;i<lowers;i++){
//			used_lowers.push_back(lower_ip_list[i]);
//		}

		std::vector<std::string> used_lowers_1;
		for(unsigned i=0;i<lowers_1;i++){
			used_lowers_1.push_back(lower_ip_list[i]);
		}

		std::vector<std::string> used_lowers_2;

		for(unsigned i=lower_ip_list.size()-1;i>=lower_ip_list.size()-lowers_2;i--){
			used_lowers_2.push_back(lower_ip_list[i]);
		}

		std::vector<column_type> column_list,column_list_;
		column_list.push_back(column_type(t_int));

		Schema* schema=new SchemaFix(column_list);
//		ExpandableBlockStreamSingleColumnScan::State ebssc_state("/home/imdb/temp/Uniform_0_99.column",schema,block_size);
//		BlockStreamIteratorBase* ebssc=new ExpandableBlockStreamSingleColumnScan(ebssc_state);

		ExpandableBlockStreamSingleColumnScan::State ebssc_state1("/home/imdb/temp/zhanglei/join_2",schema,block_size);
		BlockStreamIteratorBase* ebssc1=new ExpandableBlockStreamSingleColumnScan(ebssc_state1);

//		ExpandableBlockStreamSingleColumnScan::State ebssc_state2("/home/imdb/temp/zhanglei/Uniform_0_99.column_10000",schema,block_size);
//		BlockStreamIteratorBase* ebssc2=new ExpandableBlockStreamSingleColumnScan(ebssc_state2);

		ExpandableBlockStreamSingleColumnScan::State ebssc_state3("/home/imdb/temp/zhanglei/join_2_cp",schema,block_size);
		BlockStreamIteratorBase* ebssc3=new ExpandableBlockStreamSingleColumnScan(ebssc_state3);

//		ExpandableBlockStreamSingleColumnScan::State ebssc_state4("/home/imdb/temp/zhanglei/Uniform_0_99.column_10000_copy",schema,block_size);
//		BlockStreamIteratorBase* ebssc4=new ExpandableBlockStreamSingleColumnScan(ebssc_state4);

//		FilterIterator::AttributeComparator filter1(column_type(t_int),Comparator::L,0,&f);
//		std::vector<FilterIterator::AttributeComparator> ComparatorList;
//		ComparatorList.push_back(filter1);
//		ExpandableBlockStreamFilter::State ebsf_state(schema,ebssc,ComparatorList,block_size);
//		BlockStreamIteratorBase* bbsf=new ExpandableBlockStreamFilter(ebsf_state);

		std::vector<Schema *> inputs;
		inputs.push_back(schema);
		inputs.push_back(schema);

		column_list_.push_back(column_type(t_int));
		column_list_.push_back(column_type(t_int));
		Schema* output=new SchemaFix(column_list_);

		std::vector<BlockStreamIteratorBase *> children_;
		children_.push_back(ebssc1);
		children_.push_back(ebssc3);

		////////////////////////////////////////combined\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\


		BlockStreamCombinedIterator::State bsci_state1(inputs,output,children_);
		BlockStreamCombinedIterator *bsc1=new BlockStreamCombinedIterator(bsci_state1);


		BlockStreamExpander::State bse_state(output,bsc1,thread_count,block_size,expander_buffer);
		BlockStreamIteratorBase* bse=new BlockStreamExpander(bse_state);


		const int exchange_id=1;
		ExpandableBlockStreamBroadcastExchange::State ebse_state(output,bse,block_size,used_lowers_2,used_lowers_1,exchange_id);
		BlockStreamIteratorBase* ebse=new ExpandableBlockStreamBroadcastExchange(ebse_state);

		BlockStreamCombinedIterator::State bsci_state2(inputs,output,children_);
		BlockStreamCombinedIterator *bsc2=new BlockStreamCombinedIterator(bsci_state2);

		std::vector<column_type> column_list_join;
		column_list_join.push_back(column_type(t_int));
		column_list_join.push_back(column_type(t_int));
		column_list_join.push_back(column_type(t_int));
		Schema* output_join=new SchemaFix(column_list_join);

		std::vector<unsigned> joinIndex_left;
		joinIndex_left.push_back(0);
		std::vector<unsigned> joinIndex_right;
		joinIndex_right.push_back(1);
		std::vector<unsigned> payload_left;
		payload_left.push_back(1);
		std::vector<unsigned> payload_right;
		payload_right.push_back(0);

		BlockStreamJoinIterator::State bsji_state;//(ebse,bsc2,output,output,output_join,joinIndex_left,joinIndex_right,payload_left,payload_right,100,1024,4096);
		BlockStreamJoinIterator* bsji=new BlockStreamJoinIterator(bsji_state);

		BlockStreamExpander::State bse_state_(output_join,bsji,thread_count,block_size,expander_buffer);
		BlockStreamIteratorBase* bse_=new BlockStreamExpander(bse_state_);

		const int exchange_id_1=0;
		ExpandableBlockStreamBroadcastExchange::State ebse_state_(output,bse_,block_size,used_lowers_1,upper_ip_list,exchange_id_1);
		BlockStreamIteratorBase* ebse_=new ExpandableBlockStreamBroadcastExchange(ebse_state_);

		BlockStreamExpander::State bse_state_1(output_join,ebse_,thread_count,block_size,expander_buffer);
		BlockStreamIteratorBase* bse_1=new BlockStreamExpander(bse_state_1);


		BlockStreamPerformanceMonitorTop::State bspfm_state(output_join,bse_1,block_size,1000);
		BlockStreamIteratorBase* bspfm=new BlockStreamPerformanceMonitorTop(bspfm_state);

		/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////


			BlockStreamBase *block=new BlockStreamFix(block_size,12);


			volatile int choice;

			printf("Continue(1) or Not(0) ?\n");
			scanf("%d",&choice);
			unsigned tuple_count=0;
			while(choice==1){


		//		bsf->open();

				ebse_->open();
				unsigned long long int start=curtick();
//				tuple_count=0;
				while(ebse_->next(block)){
					BlockStreamBase::BlockStreamTraverseIterator *it=block->createIterator();
					void* tuple;
					while(tuple=it->nextTuple()){
						printf("tuple:%d \n",*(int*)tuple);
						tuple_count++;
					}
					block->setEmpty();
				}

				printf("Total tupls:%d\n",tuple_count);
				printf("Time=%f Throughput=%f.\n",getSecond(start),1024/getSecond(start));
				ebse_->close();

				printf("Continue(0) or Not(1) ?\n");
	//			getchar();
				scanf("%d",&choice);
				printf("you input %d\n",choice);

			}
		}
		else{
			Environment::getInstance(false);
		}

		printf("Waiting~~~~~....\n");
		while(true){
			sleep(1);
		}
}
/**
 * first, block_for_socket_ for receive data from senders, then if one block is
 * enough, next serialize it and put it into all_merged_block_buffer.
 *  epoll is good at listening every coming block for different socket.
 *
 */
void* ExchangeMerger::Receiver(void* arg) {
  ExchangeMerger* Pthis = reinterpret_cast<ExchangeMerger*>(arg);
  struct epoll_event event;
  struct epoll_event* events;
  int status;
  // create epoll
  Pthis->epoll_fd_ = epoll_create1(0);
  if (Pthis->epoll_fd_ == -1) {
    LOG(ERROR) << " exchange_id = " << Pthis->state_.exchange_id_
               << " partition_offset = " << Pthis->partition_offset_
               << " merger fail to create epoll!" << std::endl;
    return NULL;
  }

  event.data.fd = Pthis->sock_fd_;
  event.events = EPOLLIN | EPOLLET;
  status = epoll_ctl(Pthis->epoll_fd_, EPOLL_CTL_ADD, Pthis->sock_fd_, &event);
  if (-1 == status) {
    LOG(ERROR) << " exchange_id = " << Pthis->state_.exchange_id_
               << " partition_offset = " << Pthis->partition_offset_
               << " merger fail to create epoll_ctl!" << std::endl;
    return NULL;
  }

  events = reinterpret_cast<epoll_event*>(
      calloc(Pthis->lower_num_, sizeof(epoll_event)));
  int fd_cur = 0;
  ticks start = curtick();
  std::vector<int> finish_times;  // in ms
  while (true) {
    usleep(1);
    const int event_count =
        epoll_wait(Pthis->epoll_fd_, events, Pthis->lower_num_, -1);
    for (int i = 0; i < event_count; i++) {
      if ((events[i].events & EPOLLERR) || (events[i].events & EPOLLHUP) ||
          (!(events[i].events & EPOLLIN))) {
        if (errno == EINTR) {
          continue;
        }
        LOG(WARNING) << " exchange_id = " << Pthis->state_.exchange_id_
                     << " partition_offset = " << Pthis->partition_offset_
                     << " epoll error,reason: " << strerror(errno)
                     << " close fd = " << events[i].data.fd << std::endl;
        FileClose(events[i].data.fd);
        continue;
      } else if (Pthis->sock_fd_ == events[i].data.fd) {
        /* We have a notification on the listening socket, which means one or
         * more incoming connections.
         */
        while (true) {
          sockaddr in_addr;
          socklen_t in_len;
          int infd;
          char hbuf[NI_MAXHOST], sbuf[NI_MAXSERV];

          in_len = sizeof in_addr;
          infd = accept(Pthis->sock_fd_, &in_addr, &in_len);
          if (infd == -1) {
            if ((errno == EAGAIN) || (errno == EWOULDBLOCK)) {
              /* all the incoming connections are processed.*/
              break;
            } else {
              LOG(WARNING) << " exchange_id = " << Pthis->state_.exchange_id_
                           << " partition_offset = " << Pthis->partition_offset_
                           << " epoll accept error, try again!" << std::endl;
              break;
            }
          }
          status = getnameinfo(&in_addr, in_len, hbuf, sizeof(hbuf), sbuf,
                               sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV);
          if (0 == status) {
            LOG(INFO) << " exchange_id = " << Pthis->state_.exchange_id_
                      << " partition_offset = " << Pthis->partition_offset_
                      << " Accepted connection on descriptor " << infd
                      << " host= " << hbuf << " port= " << sbuf << std::endl;
            Pthis->lower_ip_list_.push_back(hbuf);
            Pthis->lower_sock_fd_to_id_[infd] =
                Pthis->lower_ip_list_.size() - 1;
            assert(Pthis->lower_ip_list_.size() <=
                   Pthis->state_.lower_id_list_.size());
          }
          /*Make the incoming socket non-blocking and add it to the list of fds
           * to monitor.*/
          if (!Pthis->SetSocketNonBlocking(infd)) {
            return 0;
          }
          event.data.fd = infd;
          event.events = EPOLLIN | EPOLLET;
          status = epoll_ctl(Pthis->epoll_fd_, EPOLL_CTL_ADD, infd, &event);
          if (-1 == status) {
            LOG(ERROR) << " exchange_id = " << Pthis->state_.exchange_id_
                       << " partition_offset = " << Pthis->partition_offset_
                       << " epoll_ctl error2" << std::endl;
            return NULL;
          }
        }
        continue;
      } else {
        /* We have data on the fd waiting to be read.*/
        int done = 0;
        while (true) {
          int byte_received;
          int socket_fd_index = Pthis->lower_sock_fd_to_id_[events[i].data.fd];
          byte_received =
              read(events[i].data.fd,
                   (reinterpret_cast<char*>(
                       Pthis->block_for_socket_[socket_fd_index]->getBlock())) +
                       Pthis->block_for_socket_[socket_fd_index]->GetCurSize(),
                   Pthis->block_for_socket_[socket_fd_index]->GetRestSize());
          if (byte_received == -1) {
            if (errno == EAGAIN) {
              /*We have read all the data,so go back to the loop.*/
              break;
            }
            LOG(WARNING) << " exchange_id = " << Pthis->state_.exchange_id_
                         << " partition_offset = " << Pthis->partition_offset_
                         << " merger read error!" << std::endl;
            done = 1;
          } else if (byte_received == 0) {
            /* End of file. The remote has closed the connection.*/
            done = 1;
            break;
          }

          /* The data is successfully read.*/

          Pthis->block_for_socket_[socket_fd_index]->IncreaseActualSize(
              byte_received);
          if (Pthis->block_for_socket_[socket_fd_index]->GetRestSize() > 0) {
            /** the current block is not read entirely from the Sender, so
             * continue the loop to read.**/
            continue;
          }

          /** a block is completely read. **/

          /** deserialize the data block from Sender to the blockstreambase
           * (block_for_deserialization) **/
          Pthis->block_for_deserialization->deserialize(
              reinterpret_cast<Block*>(
                  Pthis->block_for_socket_[socket_fd_index]));

          /** mark block_for_socket_[socket_fd_index] to be empty so that it can
           * accommodate the subsequent data **/
          Pthis->block_for_socket_[socket_fd_index]->reset();

          /**
           * In the current implementation, a empty block stream means
           * End-Of-File
           */
          const bool eof = Pthis->block_for_deserialization->Empty();
          if (!eof) {
            /** the newly obtained data block is validate, so we insert it into
             * the all_merged_block_buffer_ and post sem_new_block_or_eof_ so
             * that all the threads waiting for the semaphore continue.
             */
            Pthis->all_merged_block_buffer_->insertBlock(
                Pthis->block_for_deserialization);

            //??? why is all ,not 1
            // multiple threads will still compete with lock
            Pthis->sem_new_block_or_eof_.post(
                Pthis->number_of_registered_expanded_threads_);
          } else {
            /** The newly obtained data block is the end-of-file.  **/
            LOG(INFO) << " exchange_id = " << Pthis->state_.exchange_id_
                      << " partition_offset = " << Pthis->partition_offset_
                      << " This block is the last one." << std::endl;

            finish_times.push_back(static_cast<int>(getMilliSecond(start)));

            /** update the exhausted senders count and post
             *sem_new_block_or_eof_ so that all the
             * threads waiting for the semaphore continue.
             **/
            Pthis->exhausted_lowers++;
            Pthis->sem_new_block_or_eof_.post(
                Pthis->number_of_registered_expanded_threads_);

            if (Pthis->exhausted_lowers == Pthis->lower_num_) {
              /*
               * When all the exchange lowers are exhausted, notify the
               * all_merged_block_buffer_
               * that the input data is completely received.
               */
              Pthis->all_merged_block_buffer_->setInputComplete();

              /* print the finish times */
              // for (unsigned i = 0; i < finish_times.size(); i++)
              // {
              //    printf("%d\t", finish_times[i]);
              // }
              // printf("\t Var:%5.4f\n", get_stddev(finish_times));
            }

            LOG(INFO) << " exchange_id = " << Pthis->state_.exchange_id_
                      << " partition_offset = " << Pthis->partition_offset_
                      << " exhausted lowers = " << Pthis->exhausted_lowers
                      << " senders have exhausted" << std::endl;

            /** tell the Sender that all the block are consumed so that the
             * Sender can close the socket**/
            Pthis->ReplyAllBlocksConsumed(events[i].data.fd);

            LOG(INFO)
                << " exchange_id = " << Pthis->state_.exchange_id_
                << " partition_offset = " << Pthis->partition_offset_
                << " This notification (all the blocks in the socket buffer "
                   "are consumed) is replied to the lower "
                << Pthis->lower_ip_list_[socket_fd_index].c_str() << std::endl;
          }
        }
        if (done) {
          LOG(INFO)
              << " exchange_id = " << Pthis->state_.exchange_id_
              << " partition_offset = " << Pthis->partition_offset_
              << " Closed connection on descriptor " << events[i].data.fd << " "
              << Pthis->lower_ip_list_
                     [Pthis->lower_sock_fd_to_id_[events[i].data.fd]].c_str()
              << std::endl;
          /* Closing the descriptor will make epoll remove it
           from the set of descriptors which are monitored. */
          FileClose(events[i].data.fd);
        }
      }
    }
  }
}
/**
 * note the serialized block's size is different from others, it has tail info.
 * exchange merger is at the end of one segment of plan, so it's the "stage_src"
 * for this stage
 */
bool ExchangeMerger::Open(const PartitionOffset& partition_offset) {
  unsigned long long int start = curtick();
  RegisterExpandedThreadToAllBarriers();
  if (TryEntryIntoSerializedSection()) {  // first arrived thread dose
    exhausted_lowers = 0;
    this->partition_offset_ = partition_offset;
    lower_num_ = state_.lower_id_list_.size();
    socket_fd_lower_list_ = new int[lower_num_];
    for (int i = 0; i < lower_num_; ++i) {
      socket_fd_lower_list_[i] = -1;
    }
    // buffer all deserialized blocks come from every socket
    all_merged_block_buffer_ = new BlockStreamBuffer(
        state_.block_size_, BUFFER_SIZE_IN_EXCHANGE, state_.schema_);
    ExpanderTracker::getInstance()->addNewStageEndpoint(
        pthread_self(),
        LocalStageEndPoint(stage_src, "Exchange", all_merged_block_buffer_));

    // if one of block_for_socket is full, it will be deserialized into
    // block_for_deserialization and sended to all_merged_data_buffer
    block_for_deserialization =
        BlockStreamBase::createBlock(state_.schema_, state_.block_size_);

    // store block for each socket and the received block is serialized.
    block_for_socket_ = new BlockContainer* [lower_num_];
    for (unsigned i = 0; i < lower_num_; ++i) {
      block_for_socket_[i] = new BlockContainer(
          block_for_deserialization->getSerializedBlockSize());
    }
    if (PrepareSocket() == false) return false;
    if (SetSocketNonBlocking(sock_fd_) == false) {
      return false;
    }

    LOG(INFO) << "exchange_id = " << state_.exchange_id_
              << " partition_offset = " << partition_offset
              << " Open: exhausted lower senders num = " << exhausted_lowers
              << " lower sender num = " << lower_num_ << std::endl;

    if (RegisterExchange() == false) {
      LOG(ERROR) << "Register Exchange with ID = " << state_.exchange_id_
                 << " fails!" << std::endl;
    }

    if (IsMaster()) {
      /*  According to a bug reported by dsc, the master exchange upper should
       * check whether other uppers have registered to exchangeTracker.
       * Otherwise, the lower may fail to connect to the exchangeTracker of some
       * uppers when the lower nodes receive the exchange lower, as some uppers
       *  have not register the exchange_id to the exchangeTracker.
       */
      LOG(INFO) << " exchange_id = " << state_.exchange_id_
                << " partition_offset = " << partition_offset
                << "Synchronizing...." << std::endl;
      IsOtherMergersRegistered();
      LOG(INFO) << " exchange_id = " << state_.exchange_id_
                << " partition_offset = " << partition_offset
                << " Synchronized! Then serialize and send its next segment "
                   "plan to all its lower senders" << std::endl;
      if (SerializeAndSendPlan() == false) return false;
    }
    if (CreateReceiverThread() == false) {
      return false;
    }
    CreatePerformanceInfo();
  }
  /// A synchronization barrier, in case of multiple expanded threads
  BarrierArrive();
  return true;
}
//int main(int argc,const char** argv){
int main_combine(int argc,const char** argv){
	const unsigned block_size=BLOCK_SIZE_CAO;
	const unsigned thread_count=4;
	const unsigned expander_buffer=4;
	std::vector<column_type> column_list,column_list_;
	column_list.push_back(column_type(t_int));

	Schema* schema=new SchemaFix(column_list);
	ExpandableBlockStreamSingleColumnScan::State ebssc_state1("/home/imdb/temp/Uniform_0_99.column",schema,block_size);
	BlockStreamIteratorBase* ebssc1=new ExpandableBlockStreamSingleColumnScan(ebssc_state1);
	ExpandableBlockStreamSingleColumnScan::State ebssc_state2("/home/imdb/temp/Uniform_0_99.column",schema,block_size);
	BlockStreamIteratorBase* ebssc2=new ExpandableBlockStreamSingleColumnScan(ebssc_state2);

	std::vector<Schema *> inputs;
	inputs.push_back(schema);
	inputs.push_back(schema);

	column_list_.push_back(column_type(t_int));
	column_list_.push_back(column_type(t_int));
	Schema* output=new SchemaFix(column_list_);

	std::vector<BlockStreamIteratorBase *> children_;
	children_.push_back(ebssc1);
	children_.push_back(ebssc2);

	BlockStreamCombinedIterator::State bsci_state(inputs,output,children_);
	BlockStreamCombinedIterator *bsc=new BlockStreamCombinedIterator(bsci_state);

	BlockStreamExpander::State bse_state(schema,bsc,thread_count,block_size,expander_buffer);
	BlockStreamIteratorBase* bse=new BlockStreamExpander(bse_state);

	BlockStreamBase *block=new BlockStreamFix(block_size,8);
	int choice=0;


	std::ostringstream ostr;
	boost::archive::text_oarchive oa(ostr);
	oa.register_type(static_cast<BlockStreamCombinedIterator *>(NULL));
	oa.register_type(static_cast<BlockStreamExpander *>(NULL));
	oa.register_type(static_cast<ExpandableBlockStreamSingleColumnScan *>(NULL));
	Register_Schemas<boost::archive::text_oarchive>(oa);
//	Register_Iterators(oa);
	oa<<bse;

	std::cout<<"Serialization Result:"<<ostr.str()<<std::endl;

	std::istringstream istr(ostr.str());
	boost::archive::text_iarchive ia(istr);
	BlockStreamIteratorBase* des;

	ia.register_type(static_cast<BlockStreamCombinedIterator *>(NULL));
	ia.register_type(static_cast<BlockStreamExpander *>(NULL));
	ia.register_type(static_cast<ExpandableBlockStreamSingleColumnScan *>(NULL));
	Register_Schemas<boost::archive::text_iarchive>(ia);
	ia>>des;
//	return 1;
	while(choice==0){


//		bsf->open();
		des->open();
		cout<<"after open!"<<endl;
		unsigned long long int start=curtick();

		cout<<"ready for the next"<<endl;

		unsigned tuple_count=0;
		while(des->next(block)){
			BlockStreamBase::BlockStreamTraverseIterator* it=block->createIterator();
			while(it->nextTuple()){
				tuple_count++;
			}
			block->setEmpty();
		}
		printf("Time=%f Throughput=%f.\n tuple=%d",getSecond(start),1024/getSecond(start),tuple_count);
		des->close();
		printf("Continue(0) or Not(1) ?\n");

		scanf("%d",&choice);

	}

}
Exemple #20
0
static void query_select_aggregation_ing(){
	/*
	 * select sum(a+1)+count(a),b
	 * from T
	 * group by b
	 *
	 * notation: p a p s
	 * */
	unsigned long long int start=curtick();
	TableDescriptor* table=Environment::getInstance()->getCatalog()->getTable("LINEITEM");
	//===========================scan===========================
	LogicalOperator* scan=new LogicalScan(table->getProjectoin(0));

	//==========================project=========================
	vector< vector<ExpressionItem> >expr_list1;

	vector<ExpressionItem> expr0;
	vector<ExpressionItem> expr1;
	vector<ExpressionItem> expr2;
	vector<ExpressionItem> expr3;
	vector<ExpressionItem> expr4;
	vector<ExpressionItem> expr5;
	vector<ExpressionItem> expr6;
	vector<ExpressionItem> expr7;
	vector<ExpressionItem> expr8;
	vector<ExpressionItem> expr9;
	vector<ExpressionItem> expr10;
	vector<ExpressionItem> expr11;
	vector<ExpressionItem> expr12;
	vector<ExpressionItem> expr13;
	vector<ExpressionItem> expr14;
	vector<ExpressionItem> expr15;
	vector<ExpressionItem> expr16;
	vector<ExpressionItem> expr17;

	ExpressionItem ei1;
	ExpressionItem ei1_1;
	ExpressionItem ei1_2;
	ExpressionItem ei1_3;
	ExpressionItem ei1_4;
	ExpressionItem ei1_5;
	ExpressionItem ei1_6;
	ExpressionItem ei1_7;
	ExpressionItem ei1_8;
	ExpressionItem ei1_9;
	ExpressionItem ei2;
	ExpressionItem ei3;
	ExpressionItem ei4;
	ExpressionItem ei5;
	ExpressionItem ei6;
	ExpressionItem ei7;
	ExpressionItem ei8;
	ExpressionItem ei9;
	ExpressionItem ei10;
	ExpressionItem ei11;
	ExpressionItem ei12;
	ExpressionItem ei13;
	ExpressionItem ei14;
	ExpressionItem ei15;
	ExpressionItem ei16;
	ExpressionItem ei17;


	ei1_1.setVariable("LINEITEM.L_EXTENDEDPRICE");
	ei1_2.setIntValue("1");
	ei1_3.setVariable("LINEITEM.L_DISCOUNT");
	ei1_4.setOperator("-");
	ei1_5.setOperator("*");
	ei1_6.setIntValue("1");
	ei1_7.setVariable("LINEITEM.L_TEX");
	ei1_8.setOperator("+");
	ei1_9.setOperator("*");


	ei1.setVariable("LINEITEM.row_id");
	ei2.setVariable("LINEITEM.L_ORDERKEY");
	ei3.setVariable("LINEITEM.L_PARTKEY");
	ei4.setVariable("LINEITEM.L_SUPPKEY");
	ei5.setVariable("LINEITEM.L_LINENUMBER");
	ei6.setVariable("LINEITEM.L_QUANTITY");
	ei7.setVariable("LINEITEM.L_EXTENDEDPRICE");
	ei8.setVariable("LINEITEM.L_DISCOUNT");
	ei9.setVariable("LINEITEM.L_TEX");
	ei10.setVariable("LINEITEM.L_RETURNFLAG");
//	ei10.size=1;
	ei11.setVariable("LINEITEM.L_LINESTATUS");
//	ei11.size=1;


	ei13.setVariable("LINEITEM.L_COMMITDATE");
	ei14.setVariable("LINEITEM.L_RECEIPTDATE");
	ei15.setVariable("LINEITEM.L_SHIPINSTRUCT");
//	ei15.size=25;
	ei16.setVariable("LINEITEM.L_SHIPMODE");
//	ei16.size=10;
	ei17.setVariable("LINEITEM.L_COMMENT");
//	ei17.size=44;



	expr1.push_back(ei1_1);
	expr1.push_back(ei1_2);
	expr1.push_back(ei1_3);
	expr1.push_back(ei1_4);
	expr1.push_back(ei1_5);

//	expr1.push_back(ei1_6);
//	expr1.push_back(ei1_7);
//	expr1.push_back(ei1_8);
//	expr1.push_back(ei1_9);
//	expr1.push_back(ei1);

	expr2.push_back(ei1_1);
	expr2.push_back(ei1_2);
	expr2.push_back(ei1_3);
	expr2.push_back(ei1_4);
	expr2.push_back(ei1_5);
	expr2.push_back(ei1_6);
	expr2.push_back(ei1_7);
	expr2.push_back(ei1_8);
	expr2.push_back(ei1_9);

	expr3.push_back(ei1_2);
	expr3.push_back(ei1_3);
	expr3.push_back(ei1_4);

//	expr3.push_back(ei3);
	expr4.push_back(ei4);
	expr5.push_back(ei5);
	expr6.push_back(ei6);
	expr7.push_back(ei7);
	expr8.push_back(ei8);
	expr9.push_back(ei9);
	expr10.push_back(ei10);
	expr11.push_back(ei11);
	expr12.push_back(ei12);
	expr13.push_back(ei13);
	expr14.push_back(ei14);
	expr15.push_back(ei15);
	expr16.push_back(ei16);
	expr17.push_back(ei17);

	expr0.push_back(ei1);

//	expr_list1.push_back(expr0);
	expr_list1.push_back(expr10);
	expr_list1.push_back(expr11);
	expr_list1.push_back(expr6);
	expr_list1.push_back(expr7);
	expr_list1.push_back(expr1);
	expr_list1.push_back(expr2);
	expr_list1.push_back(expr8);
	expr_list1.push_back(expr3);




//	expr_list1.push_back(expr3);
//	expr_list1.push_back(expr4);
//	expr_list1.push_back(expr5);
//	expr_list1.push_back(expr8);
//	expr_list1.push_back(expr9);
//	expr_list1.push_back(expr12);
//	expr_list1.push_back(expr13);
//	expr_list1.push_back(expr14);
//	expr_list1.push_back(expr15);
//	expr_list1.push_back(expr16);
//	expr_list1.push_back(expr17);

	LogicalOperator* project1=new LogicalProject(scan,expr_list1);

//	//========================aggregation=======================
//	std::vector<Attribute> group_by_attributes;
//	group_by_attributes.push_back(table->getAttribute("L_RETURNFLAG"));
//	group_by_attributes.push_back(table->getAttribute("L_LINESTATUS"));
//	std::vector<Attribute> aggregation_attributes;
//	aggregation_attributes.push_back(table->getAttribute("L_QUANTITY"));
//	aggregation_attributes.push_back(table->getAttribute("L_EXTENDEDPRICE"));
//	aggregation_attributes.push_back(table->getAttribute("L_DISCOUNT"));
//	aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY));
//	std::vector<BlockStreamAggregationIterator::State::aggregation> aggregation_function;
//
//	aggregation_function.push_back(BlockStreamAggregationIterator::State::sum);
//	aggregation_function.push_back(BlockStreamAggregationIterator::State::sum);
//	aggregation_function.push_back(BlockStreamAggregationIterator::State::sum);
//	aggregation_function.push_back(BlockStreamAggregationIterator::State::count);
//	LogicalOperator* aggregation=new Aggregation(group_by_attributes,aggregation_attributes,aggregation_function,project1);
//
//	//==========================project=========================
//	vector< vector<ExpressionItem> >expr_list2;
//	LogicalOperator* project2=new LogicalProject(aggregation,expr_list2);
	//===========================root===========================
	LogicalOperator* root=new LogicalQueryPlanRoot(0,project1,LogicalQueryPlanRoot::PERFORMANCE);

	cout<<"performance is ok!"<<endl;
	BlockStreamIteratorBase* physical_iterator_tree=root->getIteratorTree(64*1024);
//	physical_iterator_tree->print();
	physical_iterator_tree->open();
	while(physical_iterator_tree->next(0));
	physical_iterator_tree->close();
	printf("Q1: execution time: %4.4f second.\n",getSecond(start));

}
Exemple #21
0
static void query_select_fzh() {
	/*
	 * select sum(a+1)+count(a),b
	 * from T
	 * group by b
	 *
	 * notation: p a p s
	 * */
	unsigned long long int start=curtick();
	TableDescriptor* table=Environment::getInstance()->getCatalog()->getTable("LINEITEM");
	//===========================scan===========================
	LogicalOperator* scan=new LogicalScan(table->getProjectoin(0));

	//==========================project=========================
	vector< vector<ExpressionItem> >expr_list1;

	vector<ExpressionItem> expr1;
	vector<ExpressionItem> expr2;
	vector<ExpressionItem> expr3;
	vector<ExpressionItem> expr4;
	vector<ExpressionItem> expr5;
	vector<ExpressionItem> expr6;
	vector<ExpressionItem> expr7;
	vector<ExpressionItem> expr8;
	vector<ExpressionItem> expr9;
	vector<ExpressionItem> expr10;
	vector<ExpressionItem> expr11;
	vector<ExpressionItem> expr12;
	vector<ExpressionItem> expr13;
	vector<ExpressionItem> expr14;
	vector<ExpressionItem> expr15;
	vector<ExpressionItem> expr16;
	vector<ExpressionItem> expr17;

	ExpressionItem ei1;
	ExpressionItem ei1_1;
	ExpressionItem ei1_2;
	ExpressionItem ei1_3;
	ExpressionItem ei1_4;
	ExpressionItem ei1_5;
	ExpressionItem ei1_6;
	ExpressionItem ei1_7;
	ExpressionItem ei1_8;
	ExpressionItem ei1_9;
	ExpressionItem ei2;
	ExpressionItem ei3;
	ExpressionItem ei4;
	ExpressionItem ei5;
	ExpressionItem ei6;
	ExpressionItem ei7;
	ExpressionItem ei8;
	ExpressionItem ei9;
	ExpressionItem ei10;
	ExpressionItem ei11;
	ExpressionItem ei12;
	ExpressionItem ei13;
	ExpressionItem ei14;
	ExpressionItem ei15;
	ExpressionItem ei16;
	ExpressionItem ei17;


	ei1_1.setVariable("LINEITEM.row_id");
//	ei1_2.setVariable("LINEITEM.L_ORDERKEY");
	ei1_2.setIntValue("1");
	ei1_3.setOperator("+");

	expr1.push_back(ei1_1);
	expr1.push_back(ei1_2);
	expr1.push_back(ei1_3);

	expr_list1.push_back(expr1);

	LogicalOperator* project1=new LogicalProject(scan,expr_list1);

	//========================aggregation=======================
	std::vector<Attribute> group_by_attributes;
	group_by_attributes.push_back(table->getAttribute("L_RETURNFLAG"));
	group_by_attributes.push_back(table->getAttribute("L_LINESTATUS"));
	std::vector<Attribute> aggregation_attributes;
	aggregation_attributes.push_back(table->getAttribute("L_QUANTITY"));
	aggregation_attributes.push_back(table->getAttribute("L_EXTENDEDPRICE"));
	aggregation_attributes.push_back(table->getAttribute("L_DISCOUNT"));
	aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY));
	std::vector<BlockStreamAggregationIterator::State::aggregation> aggregation_function;

	aggregation_function.push_back(BlockStreamAggregationIterator::State::sum);
	aggregation_function.push_back(BlockStreamAggregationIterator::State::sum);
	aggregation_function.push_back(BlockStreamAggregationIterator::State::sum);
	aggregation_function.push_back(BlockStreamAggregationIterator::State::count);
	LogicalOperator* aggregation=new Aggregation(group_by_attributes,aggregation_attributes,aggregation_function,project1);

	//==========================project=========================
	vector< vector<ExpressionItem> >expr_list2;

	ExpressionItem ei21_1;
	ei21_1.setVariable("LINEITEM.row_id+1");
	vector<ExpressionItem> expr21;
	expr21.push_back(ei21_1);
	expr_list2.push_back(expr21);
	LogicalOperator* project2=new LogicalProject(project1,expr_list2);
	//===========================root===========================
	LogicalOperator* root=new LogicalQueryPlanRoot(0,project2,LogicalQueryPlanRoot::PRINT);

	cout<<"performance is ok!"<<endl;
	BlockStreamIteratorBase* physical_iterator_tree=root->getIteratorTree(64*1024);
//	physical_iterator_tree->print();
	physical_iterator_tree->open();
	while(physical_iterator_tree->next(0));
	physical_iterator_tree->close();
	printf("Q1: execution time: %4.4f second.\n",getSecond(start));
}
Exemple #22
0
static void query_select_sort_string() {
	/*
	 * select sum(a+1)+count(a),b
	 * from T
	 * group by b
	 *
	 * notation: p a p s
	 * */
	unsigned long long int start=curtick();
	TableDescriptor* table=Environment::getInstance()->getCatalog()->getTable("LINEITEM");
	//===========================scan===========================
	LogicalOperator* scan=new LogicalScan(table->getProjectoin(0));

	//==========================project=========================
	vector< vector<ExpressionItem> >expr_list1;

//	vector<ExpressionItem> expr1;
	vector<ExpressionItem> expr2;
	vector<ExpressionItem> expr3;
	ExpressionItem ei1_1;
	ExpressionItem ei1_2;
	ExpressionItem ei1_3;
	ExpressionItem ei2;
	ExpressionItem ei3;
//	ei1_1.setVariable("LINEITEM.L_ORDERKEY");
//	ei1_2.setIntValue("1");
//	ei1_3.setOperator("+");
	ei2.setVariable("LINEITEM.L_RETURNFLAG");
	ei3.setVariable("LINEITEM.L_LINESTATUS");

//	expr1.push_back(ei1_1);
//	expr1.push_back(ei1_2);
//	expr1.push_back(ei1_3);
	expr2.push_back(ei2);
	expr3.push_back(ei3);

//	expr_list1.push_back(expr1);
	expr_list1.push_back(expr2);
	expr_list1.push_back(expr3);

	LogicalOperator* project1=new LogicalProject(scan,expr_list1);

	//============================groupby==========================
//	std::vector<Attribute> group_by_attributes;
//	group_by_attributes.push_back(project1->getDataflow().getAttribute("LINEITEM.L_RETURNFLAG"));
//	group_by_attributes.push_back(project1->getDataflow().getAttribute("LINEITEM.L_LINESTATUS"));
//	std::vector<Attribute> aggregation_attributes;
//	aggregation_attributes.push_back(table->getAttribute("L_QUANTITY"));
//	aggregation_attributes.push_back(table->getAttribute("L_EXTENDEDPRICE"));
//	aggregation_attributes.push_back(table->getAttribute("L_DISCOUNT"));
//	aggregation_attributes.push_back(Attribute(ATTRIBUTE_ANY));
//	std::vector<BlockStreamAggregationIterator::State::aggregation> aggregation_function;

//	aggregation_function.push_back(BlockStreamAggregationIterator::State::sum);
//	aggregation_function.push_back(BlockStreamAggregationIterator::State::sum);
//	aggregation_function.push_back(BlockStreamAggregationIterator::State::sum);
//	aggregation_function.push_back(BlockStreamAggregationIterator::State::count);
//	LogicalOperator* aggregation=new Aggregation(group_by_attributes,aggregation_attributes,aggregation_function,project1);
	//==========================project=========================
//	vector< vector<ExpressionItem> >expr_list2;
//
////	ExpressionItem ei21_1;
//	ExpressionItem ei22;
//	ExpressionItem ei23;
////	ei21_1.setVariable("LINEITEM.L_ORDERKEY+1");
//	ei22.setVariable("LINEITEM.L_RETURNFLAG");
//	ei23.setVariable("LINEITEM.L_LINESTATUS");
////	vector<ExpressionItem> expr21;
//	vector<ExpressionItem> expr22;
//	vector<ExpressionItem> expr23;
////	expr21.push_back(ei21_1);
//	expr22.push_back(ei22);
//	expr23.push_back(ei23);
////	expr_list2.push_back(expr21);
//	expr_list2.push_back(expr22);
//	expr_list2.push_back(expr23);
//	LogicalOperator* project2=new LogicalProject(aggregation,expr_list2);



	//============================sort==========================
	vector<LogicalSort::OrderByAttr*>vo;
//	vo.push_back(&LogicalSort::OrderByAttr("LINEITEM.L_ORDERKEY+1"));
	//get_orderby_att(vo);
//	char * orderstr1="LINEITEM.L_LINESTATUS";
//	char * orderstr2="LINEITEM.L_RETURNFLAG";
	LogicalSort::OrderByAttr tmp1("LINEITEM.L_LINESTATUS",1);
	LogicalSort::OrderByAttr tmp2("LINEITEM.L_RETURNFLAG",0);
	vo.push_back(&tmp1);
	vo.push_back(&tmp2);
	for(int i=0;i<vo.size();i++)
	{
		printf("vo[%d]= %s len=%d  aa=%x  sa=%x\n",i,(vo[i])->ta_,strlen(vo[i]->ta_) ,vo[i],vo[i]->ta_);
	}
	LogicalOperator* sort1=new LogicalSort(project1,vo);
	sort1->print();
	cout<<"~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"<<endl;

	//===========================root===========================
	LogicalOperator* root=new LogicalQueryPlanRoot(0,sort1,LogicalQueryPlanRoot::PRINT);
	root->print();
	cout<<"performance is ok!"<<endl;
	getchar();
	getchar();
//	BlockStreamIteratorBase* physical_iterator_tree=root->getIteratorTree(64*1024);
////	physical_iterator_tree->print();
//	physical_iterator_tree->open();
//	while(physical_iterator_tree->next(0));
//	physical_iterator_tree->close();
	printf("Q1: execution time: %4.4f second.\n",getSecond(start));
}
Exemple #23
0
bool PhysicalSort::Open(const PartitionOffset &part_off) {
  /**
   * TODO(anyone): multi threads can be used to pipeline!!!
   */
  swap_num_ = 0;
  temp_cur_ = 0;
  /**
   *  first we can store all the data which will be bufferred
   * 1, buffer is the first phase. multi-threads will be applyed to the data
   *    in the buffer.
   * 2, sort the data in the buffer, we choose quicksort to sort the records
   *    by specifying the column to be sorted
   * 3, whether to register the buffer into the blockmanager.
   * */
  BlockStreamBase *block_for_asking;

  state_.partition_offset_ = part_off;

  state_.child_->Open(state_.partition_offset_);

  if (sema_open_.try_wait()) {
    block_buffer_iterator_ = block_buffer_.createIterator();
    open_finished_ = true;
  } else {
    while (!open_finished_) {
      usleep(1);
    }
  }

  if (CreateBlockStream(block_for_asking) == false) {
    LOG(ERROR) << "error in the create block stream!!!" << endl;
    return 0;
  }
  /**
   *  phase 1: store the data in the buffer!
   *          by using multi-threads to speed up
   */
  unsigned block_offset = 0;
  unsigned tuple_count_sum = 0;
  BlockStreamBase::BlockStreamTraverseIterator *iterator_for_scan;
  while (state_.child_->Next(block_for_asking)) {
    tuple_count_sum += block_for_asking->getTuplesInBlock();
    block_buffer_.atomicAppendNewBlock(block_for_asking);
    iterator_for_scan = block_buffer_.getBlock(block_offset)->createIterator();
    void *tuple_ptr = 0;
    while ((tuple_ptr = iterator_for_scan->nextTuple()) != 0) {
      tuple_vector_.push_back(tuple_ptr);
    }
    block_offset++;
    if (CreateBlockStream(block_for_asking) == false) {
      LOG(ERROR) << "error in the create block stream!!!" << endl;
      return 0;
    }
  }

  /**
   *  phase 2: sort the data in the buffer!
   *          by using multi-threads to speed up?
   * TODO(anyone): whether to store the sorted data into the blockmanager.
   */
  //    cout<<"check the memory usage!!!"<<endl;
  unsigned long long int time = curtick();
  //    order(state_.orderbyKey_,tuple_count_sum);
  Order();

  // cout<<"the tuple_count is: "<<tuple_count_sum<<"Total time:
  // "<<getSecond(time)<<" seconds, the swap num is: "<<swap_num<<endl;
  return true;
}
/**
 * @brief  Method description : describe the open method which gets results from
 * the left child and copy them into its local buffer, say the block buffer. the
 * block buffer is a dynamic block buffer since all the expanded threads will
 * share the same block buffer.
 */
bool PhysicalNestLoopJoin::Open(SegmentExecStatus *const exec_status,
                                const PartitionOffset &partition_offset) {
  RETURN_IF_CANCELLED(exec_status);

  RegisterExpandedThreadToAllBarriers();
  unsigned long long int timer;
  bool winning_thread = false;
  if (TryEntryIntoSerializedSection(0)) {  // the first thread of all need to do
    ExpanderTracker::getInstance()->addNewStageEndpoint(
        pthread_self(), LocalStageEndPoint(stage_desc, "nest loop", 0));
    winning_thread = true;
    timer = curtick();
    block_buffer_ = new DynamicBlockBuffer();
    if (state_.join_condi_.size() == 0) {
      join_condi_process_ = WithoutJoinCondi;
    } else {
      join_condi_process_ = WithJoinCondi;
    }
    LOG(INFO) << "[NestloopJoin]: [the first thread opens the nestloopJoin "
                 "physical operator]" << std::endl;
  }
  RETURN_IF_CANCELLED(exec_status);

  state_.child_left_->Open(exec_status, partition_offset);
  RETURN_IF_CANCELLED(exec_status);

  BarrierArrive(0);

  NestLoopJoinContext *jtc = CreateOrReuseContext(crm_numa_sensitive);
  // create a new block to hold the results from the left child
  // and add results to the dynamic buffer
  //  jtc->block_for_asking_ == BlockStreamBase::createBlock(
  //                                state_.input_schema_left_,
  //                                state_.block_size_);
  CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_);
  //  auto temp = jtc->block_for_asking_->getBlock();
  //  cout << "temp start" << temp << endl;
  //
  //  cout << "init block_for_asking_ : " << jtc->block_for_asking_->getBlock()
  //       << " is reference : " << jtc->block_for_asking_->isIsReference() <<
  //       endl;
  while (state_.child_left_->Next(exec_status, jtc->block_for_asking_)) {
    if (exec_status->is_cancelled()) {
      if (NULL != jtc->block_for_asking_) {
        delete jtc->block_for_asking_;
        jtc->block_for_asking_ = NULL;
      }
      return false;
    }
    //    cout << "after assgin start :" << jtc->block_for_asking_->getBlock()
    //         << " is reference : " << jtc->block_for_asking_->isIsReference()
    //         << endl;
    block_buffer_->atomicAppendNewBlock(jtc->block_for_asking_);
    //    if (!jtc->block_for_asking_->isIsReference()) {
    CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_);
    //    } else {
    //      //      cout << "temp after" << temp << endl;
    //      //      delete temp;
    //      CreateBlockStream(jtc->block_for_asking_,
    //      state_.input_schema_left_);
    //      jtc->block_for_asking_->setIsReference(false);
    //    }
    //    cout << "new start :" << jtc->block_for_asking_->getBlock()
    //         << " is reference : " << jtc->block_for_asking_->isIsReference()
    //         << endl;
  }
  //  cout << "buffer_size_ : " << block_buffer_->GetBufferSize() << endl;
  //  the last block is created without storing the results from the left
  // child

  if (NULL != jtc->block_for_asking_) {
    delete jtc->block_for_asking_;
    jtc->block_for_asking_ = NULL;
  }
  // when the finished expanded thread finished its allocated work, it can be
  // called back here. What should be noticed that the callback meas the to
  // exit on the of the thread
  if (ExpanderTracker::getInstance()->isExpandedThreadCallBack(
          pthread_self())) {
    UnregisterExpandedThreadToAllBarriers(1);
    LOG(INFO) << "[NestloopJoin]: [the" << pthread_self()
              << "the thread is called to exit]" << std::endl;
    return true;  // the
  }
  BarrierArrive(1);  // ??ERROR
                     //	join_thread_context* jtc=new join_thread_context();
                     //  jtc->block_for_asking_ == BlockStreamBase::createBlock(
  //                                state_.input_schema_right_,
  //                                state_.block_size_);
  CreateBlockStream(jtc->block_for_asking_, state_.input_schema_right_);
  jtc->block_for_asking_->setEmpty();
  jtc->block_stream_iterator_ = jtc->block_for_asking_->createIterator();
  jtc->buffer_iterator_ = block_buffer_->createIterator();

  // underlying bug: as for buffer_iterator may be NULL, it's necessary to let
  // every buffer_iterator of each thread point to an empty block
  // jtc->buffer_stream_iterator_ =
  //    jtc->buffer_iterator_.nextBlock()->createIterator();

  InitContext(jtc);  // rename this function, here means to store the thread
                     // context in the operator context
  RETURN_IF_CANCELLED(exec_status);
  state_.child_right_->Open(exec_status, partition_offset);
  return true;
}