Ejemplo n.º 1
0
bool InOperator::Open(const PartitionOffset& partition_offset) {
  state_.child_set_->Open(partition_offset);
  state_.child_in_->Open(partition_offset);
  AtomicPushFreeHtBlockStream(BlockStreamBase::createBlock(
      state_.schema_child_set_, state_.block_size_));
  AtomicPushFreeBlockStream(BlockStreamBase::createBlock(
      state_.schema_child_in_, state_.block_size_));

  if (sema_open_.try_wait()) {
    // initialize hash table, use the child_set to build hash table
    hash_func_ =
        PartitionFunctionFactory::createBoostHashFunction(state_.ht_nbuckets_);
    vector<unsigned> ht_index;
    ht_index.push_back(state_.index_child_set_);
    hash_table_ = new BasicHashTable(
        state_.ht_nbuckets_, state_.ht_bucket_size_,
        (state_.schema_child_set_->getSubSchema(ht_index))->getTupleMaxSize());
    ht_index.clear();
    open_finished_ = true;
  } else {
    while (!open_finished_) usleep(1);
  }

  void* cur_tuple = NULL;
  void* tuple_in_hashtable = NULL;
  unsigned bn = 0;

  BlockStreamBase* bsb = AtomicPopFreeHtBlockStream();
  while (state_.child_set_->Next(bsb)) {
    BlockStreamBase::BlockStreamTraverseIterator* bsti = bsb->createIterator();
    bsti->reset();
    while (cur_tuple = bsti->nextTuple()) {
      bn = state_.schema_child_set_->getcolumn(state_.index_child_set_)
               .operate->GetPartitionValue(
                   state_.schema_child_set_->getColumnAddess(
                       state_.index_child_set_, cur_tuple),
                   state_.ht_nbuckets_);
      tuple_in_hashtable = hash_table_->atomicAllocate(bn);
      state_.schema_child_set_->getcolumn(state_.index_child_set_)
          .operate->Assign(state_.schema_child_set_->getColumnAddess(
                                   state_.index_child_set_, cur_tuple),
                               tuple_in_hashtable);
    }
    bsb->setEmpty();
  }
  barrier_->Arrive();
  printf("-----------In Iterator Open Successful!-----------\n");
  return true;
}
Ejemplo n.º 2
0
bool BlockStreamJoinIterator::open(const PartitionOffset& partition_offset){
#ifdef TIME
	startTimer(&timer);
#endif
	state_.child_left->open(partition_offset);
	AtomicPushFreeHtBlockStream(BlockStreamBase::createBlock(state_.input_schema_left,state_.block_size_));
	AtomicPushFreeBlockStream(BlockStreamBase::createBlock(state_.input_schema_right,state_.block_size_));
	cout<<"AtomicPushFreeBlockStream\n\n"<<endl;
	cout<<"join open begin"<<endl;
	if(sema_open_.try_wait()){
		unsigned output_index=0;
		for(unsigned i=0;i<state_.joinIndex_left.size();i++){
			joinIndex_left_to_output[i]=output_index;
			output_index++;
		}
		for(unsigned i=0;i<state_.payload_left.size();i++){
			payload_left_to_output[i]=output_index;
			output_index++;
		}
		for(unsigned i=0;i<state_.payload_right.size();i++){
			payload_right_to_output[i]=output_index;
			output_index++;
		}
		/* Currently, the block is 4096, and the table in build phase is left one*/
		hash=PartitionFunctionFactory::createBoostHashFunction(state_.ht_nbuckets);
		hashtable=new BasicHashTable(state_.ht_nbuckets,state_.ht_bucketsize,state_.input_schema_left->getTupleMaxSize());
		cout<<"in the open master "<<endl;
		open_finished_=true;
	}else{
		while (!open_finished_) {
			usleep(1);
		}
	}

	//hashtable createIterator的好处就是创建的都是可读的对象,不需要加锁
//	lock_.acquire();
	BasicHashTable::Iterator tmp_it=hashtable->CreateIterator();
//	lock_.release();
	void *cur;
	void *tuple_in_hashtable;
	unsigned bn;

	void *key_in_input;
	void *key_in_hashtable;
	void *value_in_input;
	void *value_in_hashtable;
	BlockStreamBase *bsb=AtomicPopFreeHtBlockStream();
	PartitionFunction* hash_test=PartitionFunctionFactory::createBoostHashFunction(4);
	cout<<"in the hashtable build stage!"<<endl;
//	consumed_tuples_from_left=0;
	while(state_.child_left->next(bsb)){
		BlockStreamBase::BlockStreamTraverseIterator *bsti=bsb->createIterator();

		bsti->reset();
		while(cur=bsti->nextTuple()){
			consumed_tuples_from_left++;
//
//			if(state_.ht_schema->getncolumns()>20)
//			state_.ht_schema->displayTuple(cur,"|B|"); ///for debug
			/* Currently, the join index is [0]-th column, so the hash table is based on the hash value of [0]-th column*/
//			bn=hash->get_partition_value(*(unsigned long*)(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],cur)));
//			bn=state_.input_schema_left->getcolumn(0).operate->getPartitionValue(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],cur),hash);

			bn=state_.input_schema_left->getcolumn(state_.joinIndex_left[0]).operate->getPartitionValue(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],cur),hash);
//			const unsigned test_bn=state_.input_schema_left->getcolumn(state_.joinIndex_left[0]).operate->getPartitionValue(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],cur),hash_test);
//			if(rand()%10000<3){
//				printf("key:%d\n",test_bn);
//			}
//			hashtable->placeIterator(tmp_it,bn);

//			lock_.acquire();
			tuple_in_hashtable=hashtable->atomicAllocate(bn);
			/* copy join index columns*/
//			for(unsigned i=0;i<state_.joinIndex_left.size();i++){
//				key_in_input=state_.input_schema_left->getColumnAddess(state_.joinIndex_left[i],cur);
//				key_in_hashtable=state_.ht_schema->getColumnAddess(joinIndex_left_to_output[i],tuple_in_hashtable);
//				state_.input_schema_left->getcolumn(state_.joinIndex_left[i]).operate->assignment(key_in_input,key_in_hashtable);
//
//			}
//			/* copy left payload columns*/
//			for(unsigned i=0;i<state_.payload_left.size();i++){
//				value_in_input=state_.input_schema_left->getColumnAddess(state_.payload_left[i],cur);
//				value_in_hashtable=state_.ht_schema->getColumnAddess(payload_left_to_output[i],tuple_in_hashtable);
//				state_.input_schema_left->getcolumn(state_.payload_left[i]).operate->assignment(value_in_input,value_in_hashtable);
//			}
			state_.input_schema_left->copyTuple(cur,tuple_in_hashtable);

//			lock_.release();
		}
		bsb->setEmpty();
	}
//	printf("<<<<<<<<<<<<<<<<Join Open consumes %d tuples\n",consumed_tuples_from_left);
	BasicHashTable::Iterator it=hashtable->CreateIterator();
	unsigned tmp=0;
	tuples_in_hashtable=0;
//	PartitionFunction* hash_tmp=PartitionFunctionFactory::createGeneralModuloFunction(4);
//	while(hashtable->placeIterator(it,tmp++)){
//		void* tuple;
//		while(tuple=it.readCurrent()){
////			printf("join key:%s\n",(state_.input_schema_left->getcolumn(state_.joinIndex_left[0]).operate->toString(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],tuple)).c_str()));
//			tuples_in_hashtable++;
//			unsigned bn=state_.input_schema_left->getcolumn(state_.joinIndex_left[0]).operate->getPartitionValue(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],tuple),hash_tmp);
//			if(rand()%1000<3)
//			printf("partition key of left tuple:%d\n",bn);
//			it.increase_cur_();
//		}
//	}
//	cout<<"join open end"<<endl;
	produced_tuples=0;
	consumed_tuples_from_right=0;
//	water_mark=0;
	barrier_->Arrive();
	cout<<"pass the arrive of barrier!!!"<<endl;
	state_.child_right->open(partition_offset);
//	cout<<"PartitionOffset:"<<partition_offset<<endl;
//	sleep(1);
	return true;
}