bool ExpandableBlockStreamRandomDiskAccess::open(const PartitionOffset& part_off) {
	AtomicPushFreeBlockStream(BlockStreamBase::createBlock(state_.c_schema_,state_.block_size_));
	printf("Free block stream list added!\n");

	if (sema_open_.try_wait()) {
		printf("RDA: Scan open!\n");

		/* the winning thread does the read job in the open function*/
		fd_ = fopen(state_.filename_.c_str(), "r+");
		if (NULL == fd_)
		{
			printf("Cannot open file %s!\n", state_.filename_.c_str());
			return false;
		}
		fseek(fd_, 0, SEEK_END);
		file_length_ = ftell(fd_);
		printf("RDA: Open is successful!\n");
		open_finished_ = true;
		return state_.child_->open();
	}
	else {
		while (!open_finished_) {
			usleep(1);
		}
		return state_.child_->open();
	}
}
示例#2
0
bool InOperator::Open(const PartitionOffset& partition_offset) {
  state_.child_set_->Open(partition_offset);
  state_.child_in_->Open(partition_offset);
  AtomicPushFreeHtBlockStream(BlockStreamBase::createBlock(
      state_.schema_child_set_, state_.block_size_));
  AtomicPushFreeBlockStream(BlockStreamBase::createBlock(
      state_.schema_child_in_, state_.block_size_));

  if (sema_open_.try_wait()) {
    // initialize hash table, use the child_set to build hash table
    hash_func_ =
        PartitionFunctionFactory::createBoostHashFunction(state_.ht_nbuckets_);
    vector<unsigned> ht_index;
    ht_index.push_back(state_.index_child_set_);
    hash_table_ = new BasicHashTable(
        state_.ht_nbuckets_, state_.ht_bucket_size_,
        (state_.schema_child_set_->getSubSchema(ht_index))->getTupleMaxSize());
    ht_index.clear();
    open_finished_ = true;
  } else {
    while (!open_finished_) usleep(1);
  }

  void* cur_tuple = NULL;
  void* tuple_in_hashtable = NULL;
  unsigned bn = 0;

  BlockStreamBase* bsb = AtomicPopFreeHtBlockStream();
  while (state_.child_set_->Next(bsb)) {
    BlockStreamBase::BlockStreamTraverseIterator* bsti = bsb->createIterator();
    bsti->reset();
    while (cur_tuple = bsti->nextTuple()) {
      bn = state_.schema_child_set_->getcolumn(state_.index_child_set_)
               .operate->GetPartitionValue(
                   state_.schema_child_set_->getColumnAddess(
                       state_.index_child_set_, cur_tuple),
                   state_.ht_nbuckets_);
      tuple_in_hashtable = hash_table_->atomicAllocate(bn);
      state_.schema_child_set_->getcolumn(state_.index_child_set_)
          .operate->Assign(state_.schema_child_set_->getColumnAddess(
                                   state_.index_child_set_, cur_tuple),
                               tuple_in_hashtable);
    }
    bsb->setEmpty();
  }
  barrier_->Arrive();
  printf("-----------In Iterator Open Successful!-----------\n");
  return true;
}
bool ExpandableBlockStreamRandomMemAccess::open(const PartitionOffset& part_off) {
	AtomicPushFreeBlockStream(BlockStreamBase::createBlock(state_.c_schema_,state_.block_size_));
	printf("Free block stream list added!\n");

	if (sema_open_.try_wait()) {
		printf("RMA: Scan open!\n");

		/* the winning thread does the read job in the open function*/
		fd_ = FileOpen(state_.filename_.c_str(), O_RDONLY);
		if (fd_ == -1) {
			printf("Cannot open file %s! Reason: %s\n",
					state_.filename_.c_str(), strerror(errno));
			return false;
		}
		file_length_ = lseek(fd_, 0, SEEK_END);
		lseek(fd_,0,SEEK_SET);

		base_ = (char*) mmap(0, file_length_, PROT_READ, MAP_PRIVATE, fd_, 0);

		if (base_ == 0) {
			printf("mmap errors!\n");
			return false;
		}

		data_=base_;

		if (data_ != 0) {
			printf("RMA: Open is successful!\n");
			open_finished_ = true;
			return state_.child_->open();
		} else
		{
			return false;
		}

	}

	else {
		while (!open_finished_) {
			usleep(1);
		}
		return state_.child_->open();
	}
}
示例#4
0
bool InOperator::Next(BlockStreamBase* block) {
  unsigned bn;
  RemainingBlock rb;
  void* tuple_from_child_in = NULL;
  void* tuple_in_output_block = NULL;
  void* tuple_in_hashtable;
  void* key_in_input;
  bool passIn = false;
  BasicHashTable::Iterator hashtable_iterator = hash_table_->CreateIterator();

  if (AtomicPopRemainingBlock(rb)) {
    while ((tuple_from_child_in = rb.blockstream_iterator_->currentTuple()) >
           0) {
      passIn = false;
      bn = state_.schema_child_in_->getcolumn(state_.index_child_in_)
               .operate->GetPartitionValue(
                   state_.schema_child_in_->getColumnAddess(
                       state_.index_child_in_, tuple_from_child_in),
                   state_.ht_nbuckets_);
      hash_table_->placeIterator(hashtable_iterator, bn);
      while ((tuple_in_hashtable = hashtable_iterator.readnext()) > 0) {
        key_in_input = state_.schema_child_in_->getColumnAddess(
            state_.index_child_in_, tuple_from_child_in);
        if (state_.schema_child_in_->getcolumn(state_.index_child_in_)
                .operate->Equal(tuple_in_hashtable, key_in_input)) {
          passIn = true;
          break;
        }
      }
      if (passIn) {
        const unsigned bytes = state_.schema_child_in_->getTupleMaxSize();
        if ((tuple_in_output_block = block->allocateTuple(bytes)) > 0) {
          state_.schema_child_in_->copyTuple(tuple_from_child_in,
                                             tuple_in_output_block);
          rb.blockstream_iterator_->increase_cur_();
        } else {
          AtomicPushRemainingBlock(rb);
          return true;
        }
      } else
        rb.blockstream_iterator_->increase_cur_();
    }
    AtomicPushFreeBlockStream(rb.bsb_in_);
  }

  BlockStreamBase* block_for_asking = AtomicPopFreeBlockStream();
  block_for_asking->setEmpty();
  while (state_.child_in_->Next(block_for_asking)) {
    BlockStreamBase::BlockStreamTraverseIterator* traverse_iterator =
        block_for_asking->createIterator();
    while ((tuple_from_child_in = traverse_iterator->currentTuple()) > 0) {
      passIn = false;
      bn = state_.schema_child_in_->getcolumn(state_.index_child_in_)
               .operate->GetPartitionValue(
                   state_.schema_child_in_->getColumnAddess(
                       state_.index_child_in_, tuple_from_child_in),
                   state_.ht_nbuckets_);
      hash_table_->placeIterator(hashtable_iterator, bn);
      while ((tuple_in_hashtable = hashtable_iterator.readCurrent()) != 0) {
        key_in_input = state_.schema_child_in_->getColumnAddess(
            state_.index_child_in_, tuple_from_child_in);
        if (state_.schema_child_in_->getcolumn(state_.index_child_in_)
                .operate->Equal(tuple_in_hashtable, key_in_input)) {
          passIn = true;
          break;
        }
        hashtable_iterator.increase_cur_();
      }
      if (passIn) {
        const unsigned bytes = state_.schema_child_in_->getTupleMaxSize();
        if ((tuple_in_output_block = block->allocateTuple(bytes)) > 0) {
          state_.schema_child_in_->copyTuple(tuple_from_child_in,
                                             tuple_in_output_block);
          traverse_iterator->increase_cur_();
        } else {
          AtomicPushRemainingBlock(
              RemainingBlock(block_for_asking, traverse_iterator));
          return true;
        }
      } else
        traverse_iterator->increase_cur_();
    }
    traverse_iterator->~BlockStreamTraverseIterator();
    block_for_asking->setEmpty();
  }
  AtomicPushFreeBlockStream(block_for_asking);
  if (!block->Empty()) return true;
  return false;
}
bool BlockStreamJoinIterator::open(const PartitionOffset& partition_offset){
#ifdef TIME
	startTimer(&timer);
#endif
	state_.child_left->open(partition_offset);
	AtomicPushFreeHtBlockStream(BlockStreamBase::createBlock(state_.input_schema_left,state_.block_size_));
	AtomicPushFreeBlockStream(BlockStreamBase::createBlock(state_.input_schema_right,state_.block_size_));
	cout<<"AtomicPushFreeBlockStream\n\n"<<endl;
	cout<<"join open begin"<<endl;
	if(sema_open_.try_wait()){
		unsigned output_index=0;
		for(unsigned i=0;i<state_.joinIndex_left.size();i++){
			joinIndex_left_to_output[i]=output_index;
			output_index++;
		}
		for(unsigned i=0;i<state_.payload_left.size();i++){
			payload_left_to_output[i]=output_index;
			output_index++;
		}
		for(unsigned i=0;i<state_.payload_right.size();i++){
			payload_right_to_output[i]=output_index;
			output_index++;
		}
		/* Currently, the block is 4096, and the table in build phase is left one*/
		hash=PartitionFunctionFactory::createBoostHashFunction(state_.ht_nbuckets);
		hashtable=new BasicHashTable(state_.ht_nbuckets,state_.ht_bucketsize,state_.input_schema_left->getTupleMaxSize());
		cout<<"in the open master "<<endl;
		open_finished_=true;
	}else{
		while (!open_finished_) {
			usleep(1);
		}
	}

	//hashtable createIterator的好处就是创建的都是可读的对象,不需要加锁
//	lock_.acquire();
	BasicHashTable::Iterator tmp_it=hashtable->CreateIterator();
//	lock_.release();
	void *cur;
	void *tuple_in_hashtable;
	unsigned bn;

	void *key_in_input;
	void *key_in_hashtable;
	void *value_in_input;
	void *value_in_hashtable;
	BlockStreamBase *bsb=AtomicPopFreeHtBlockStream();
	PartitionFunction* hash_test=PartitionFunctionFactory::createBoostHashFunction(4);
	cout<<"in the hashtable build stage!"<<endl;
//	consumed_tuples_from_left=0;
	while(state_.child_left->next(bsb)){
		BlockStreamBase::BlockStreamTraverseIterator *bsti=bsb->createIterator();

		bsti->reset();
		while(cur=bsti->nextTuple()){
			consumed_tuples_from_left++;
//
//			if(state_.ht_schema->getncolumns()>20)
//			state_.ht_schema->displayTuple(cur,"|B|"); ///for debug
			/* Currently, the join index is [0]-th column, so the hash table is based on the hash value of [0]-th column*/
//			bn=hash->get_partition_value(*(unsigned long*)(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],cur)));
//			bn=state_.input_schema_left->getcolumn(0).operate->getPartitionValue(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],cur),hash);

			bn=state_.input_schema_left->getcolumn(state_.joinIndex_left[0]).operate->getPartitionValue(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],cur),hash);
//			const unsigned test_bn=state_.input_schema_left->getcolumn(state_.joinIndex_left[0]).operate->getPartitionValue(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],cur),hash_test);
//			if(rand()%10000<3){
//				printf("key:%d\n",test_bn);
//			}
//			hashtable->placeIterator(tmp_it,bn);

//			lock_.acquire();
			tuple_in_hashtable=hashtable->atomicAllocate(bn);
			/* copy join index columns*/
//			for(unsigned i=0;i<state_.joinIndex_left.size();i++){
//				key_in_input=state_.input_schema_left->getColumnAddess(state_.joinIndex_left[i],cur);
//				key_in_hashtable=state_.ht_schema->getColumnAddess(joinIndex_left_to_output[i],tuple_in_hashtable);
//				state_.input_schema_left->getcolumn(state_.joinIndex_left[i]).operate->assignment(key_in_input,key_in_hashtable);
//
//			}
//			/* copy left payload columns*/
//			for(unsigned i=0;i<state_.payload_left.size();i++){
//				value_in_input=state_.input_schema_left->getColumnAddess(state_.payload_left[i],cur);
//				value_in_hashtable=state_.ht_schema->getColumnAddess(payload_left_to_output[i],tuple_in_hashtable);
//				state_.input_schema_left->getcolumn(state_.payload_left[i]).operate->assignment(value_in_input,value_in_hashtable);
//			}
			state_.input_schema_left->copyTuple(cur,tuple_in_hashtable);

//			lock_.release();
		}
		bsb->setEmpty();
	}
//	printf("<<<<<<<<<<<<<<<<Join Open consumes %d tuples\n",consumed_tuples_from_left);
	BasicHashTable::Iterator it=hashtable->CreateIterator();
	unsigned tmp=0;
	tuples_in_hashtable=0;
//	PartitionFunction* hash_tmp=PartitionFunctionFactory::createGeneralModuloFunction(4);
//	while(hashtable->placeIterator(it,tmp++)){
//		void* tuple;
//		while(tuple=it.readCurrent()){
////			printf("join key:%s\n",(state_.input_schema_left->getcolumn(state_.joinIndex_left[0]).operate->toString(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],tuple)).c_str()));
//			tuples_in_hashtable++;
//			unsigned bn=state_.input_schema_left->getcolumn(state_.joinIndex_left[0]).operate->getPartitionValue(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],tuple),hash_tmp);
//			if(rand()%1000<3)
//			printf("partition key of left tuple:%d\n",bn);
//			it.increase_cur_();
//		}
//	}
//	cout<<"join open end"<<endl;
	produced_tuples=0;
	consumed_tuples_from_right=0;
//	water_mark=0;
	barrier_->Arrive();
	cout<<"pass the arrive of barrier!!!"<<endl;
	state_.child_right->open(partition_offset);
//	cout<<"PartitionOffset:"<<partition_offset<<endl;
//	sleep(1);
	return true;
}
bool BlockStreamJoinIterator::next(BlockStreamBase *block){
	unsigned bn;
	void *result_tuple;
	void *tuple_from_right_child;
	void *tuple_in_hashtable;
	void *key_in_input;
	void *key_in_hashtable;
	void *column_in_joinedTuple;
	void *joinedTuple=memalign(cacheline_size,state_.output_schema->getTupleMaxSize());
	bool key_exit;

	remaining_block rb;

	PartitionFunction* hash_tmp=PartitionFunctionFactory::createGeneralModuloFunction(4);
	while(true){
		if(atomicPopRemainingBlock(rb)){
			while((tuple_from_right_child=rb.blockstream_iterator->currentTuple())>0){
				unsigned bn=state_.input_schema_right->getcolumn(state_.joinIndex_right[0]).operate->getPartitionValue(state_.input_schema_right->getColumnAddess(state_.joinIndex_right[0],tuple_from_right_child),hash_tmp);
				while((tuple_in_hashtable=rb.hashtable_iterator_.readCurrent())>0){
					key_exit=true;
					for(unsigned i=0;i<state_.joinIndex_right.size();i++){
						key_in_input=state_.input_schema_right->getColumnAddess(state_.joinIndex_right[i],tuple_from_right_child);
						key_in_hashtable=state_.ht_schema->getColumnAddess(state_.joinIndex_left[i],tuple_in_hashtable);
						if(!state_.input_schema_right->getcolumn(state_.joinIndex_right[i]).operate->equal(key_in_input,key_in_hashtable)){
							key_exit=false;
							break;
						}
					}
					if(key_exit){
						if((result_tuple=block->allocateTuple(state_.output_schema->getTupleMaxSize()))>0){
							produced_tuples++;
							const unsigned copyed_bytes=state_.input_schema_left->copyTuple(tuple_in_hashtable,result_tuple);
							state_.input_schema_right->copyTuple(tuple_from_right_child,result_tuple+copyed_bytes);
						}
						else{
							atomicPushRemainingBlock(rb);
							free(joinedTuple);
							return true;
						}
					}
					BasicHashTable::Iterator tmp=rb.hashtable_iterator_;
					rb.hashtable_iterator_.increase_cur_();
				}
				rb.blockstream_iterator->increase_cur_();
				consumed_tuples_from_right++;

				if((tuple_from_right_child=rb.blockstream_iterator->currentTuple())){
					bn=state_.input_schema_right->getcolumn(state_.joinIndex_right[0]).operate->getPartitionValue(state_.input_schema_right->getColumnAddess(state_.joinIndex_right[0],tuple_from_right_child),hash);
					hashtable->placeIterator(rb.hashtable_iterator_,bn);
				}
			}
			AtomicPushFreeBlockStream(rb.bsb_right_);
		}
		rb.bsb_right_=AtomicPopFreeBlockStream();//1 1 1
		rb.bsb_right_->setEmpty();
		rb.hashtable_iterator_=hashtable->CreateIterator();
		if(state_.child_right->next(rb.bsb_right_)==false){
			if(block->Empty()==true){
				AtomicPushFreeBlockStream(rb.bsb_right_);
				free(joinedTuple);
				printf("****join next produces %d tuples while consumed %d tuples from right child and %d tuples from left, hash table has %d tuples\n",produced_tuples,consumed_tuples_from_right,consumed_tuples_from_left,tuples_in_hashtable);
				return false;
			}
			else{
				AtomicPushFreeBlockStream(rb.bsb_right_);
				free(joinedTuple);
				return true;
			}
		}
		rb.blockstream_iterator=rb.bsb_right_->createIterator();
		if((tuple_from_right_child=rb.blockstream_iterator->currentTuple())){
			bn=state_.input_schema_right->getcolumn(state_.joinIndex_right[0]).operate->getPartitionValue(state_.input_schema_right->getColumnAddess(state_.joinIndex_right[0],tuple_from_right_child),hash);
			hashtable->placeIterator(rb.hashtable_iterator_,bn);
		}
		atomicPushRemainingBlock(rb);
	}
	return next(block);
}
bool ExpandableBlockStreamRandomMemAccess::next(BlockStreamBase* block) {

	remaining_block rb;
	void* tuple_from_child;
	void* tuple_in_block;

	if (atomicPopRemainingBlock(rb))
	{
		while ((tuple_from_child = rb.iterator->currentTuple()) > 0)
		{
			const unsigned bytes = state_.f_schema_->getTupleActualSize(tuple_in_block);
			if ((tuple_in_block = block->allocateTuple(bytes)) > 0)
			{
				/* the block has enough space to hold this tuple */
				state_.f_schema_->copyTuple((void*)(base_+(*(int*)tuple_from_child)*bytes), tuple_in_block);
				rb.iterator->increase_cur_();
			}
			else
			{
				/* the block is full, before we return, we pop the remaining block. */
				atomicPushRemainingBlock(rb);
				return true;
			}
		}
		AtomicPushFreeBlockStream(rb.block);
	}

	/* When the program arrivals here, it mains that there is no remaining blocks or the remaining block
	 * is exhausted, so we read a new block from the child
	 */
	BlockStreamBase* block_for_asking = AtomicPopFreeBlockStream();
	block_for_asking->setEmpty();
	while (state_.child_->next(block_for_asking))
	{
		BlockStreamBase::BlockStreamTraverseIterator* traverse_iterator = block_for_asking->createIterator();
		while((tuple_from_child = traverse_iterator->currentTuple()) > 0)
		{
			const unsigned bytes = state_.f_schema_->getTupleActualSize(tuple_from_child);
			if ((tuple_in_block = block->allocateTuple(bytes)) > 0)
			{
				/* the block has enough space to hold this tuple */
				state_.f_schema_->copyTuple((void*)(base_+(*(int*)tuple_from_child)*bytes), tuple_in_block);
				traverse_iterator->increase_cur_();
			}
			else
			{
				/* the block is full, before we return, we pop the remaining block. */
				atomicPushRemainingBlock(remaining_block(block_for_asking, traverse_iterator));
				return true;
			}
		}
		/* the block_for_asking is exhausted, but the block is not full */
		traverse_iterator->~BlockStreamTraverseIterator();
		block_for_asking->setEmpty();
	}
	/* the child iterator is exhausted, but the block is not full */
	AtomicPushFreeBlockStream(block_for_asking);
	if (!block->Empty())
		return true;
	else
		return false;
}