bool ExpandableBlockStreamRandomDiskAccess::open(const PartitionOffset& part_off) { AtomicPushFreeBlockStream(BlockStreamBase::createBlock(state_.c_schema_,state_.block_size_)); printf("Free block stream list added!\n"); if (sema_open_.try_wait()) { printf("RDA: Scan open!\n"); /* the winning thread does the read job in the open function*/ fd_ = fopen(state_.filename_.c_str(), "r+"); if (NULL == fd_) { printf("Cannot open file %s!\n", state_.filename_.c_str()); return false; } fseek(fd_, 0, SEEK_END); file_length_ = ftell(fd_); printf("RDA: Open is successful!\n"); open_finished_ = true; return state_.child_->open(); } else { while (!open_finished_) { usleep(1); } return state_.child_->open(); } }
bool InOperator::Open(const PartitionOffset& partition_offset) { state_.child_set_->Open(partition_offset); state_.child_in_->Open(partition_offset); AtomicPushFreeHtBlockStream(BlockStreamBase::createBlock( state_.schema_child_set_, state_.block_size_)); AtomicPushFreeBlockStream(BlockStreamBase::createBlock( state_.schema_child_in_, state_.block_size_)); if (sema_open_.try_wait()) { // initialize hash table, use the child_set to build hash table hash_func_ = PartitionFunctionFactory::createBoostHashFunction(state_.ht_nbuckets_); vector<unsigned> ht_index; ht_index.push_back(state_.index_child_set_); hash_table_ = new BasicHashTable( state_.ht_nbuckets_, state_.ht_bucket_size_, (state_.schema_child_set_->getSubSchema(ht_index))->getTupleMaxSize()); ht_index.clear(); open_finished_ = true; } else { while (!open_finished_) usleep(1); } void* cur_tuple = NULL; void* tuple_in_hashtable = NULL; unsigned bn = 0; BlockStreamBase* bsb = AtomicPopFreeHtBlockStream(); while (state_.child_set_->Next(bsb)) { BlockStreamBase::BlockStreamTraverseIterator* bsti = bsb->createIterator(); bsti->reset(); while (cur_tuple = bsti->nextTuple()) { bn = state_.schema_child_set_->getcolumn(state_.index_child_set_) .operate->GetPartitionValue( state_.schema_child_set_->getColumnAddess( state_.index_child_set_, cur_tuple), state_.ht_nbuckets_); tuple_in_hashtable = hash_table_->atomicAllocate(bn); state_.schema_child_set_->getcolumn(state_.index_child_set_) .operate->Assign(state_.schema_child_set_->getColumnAddess( state_.index_child_set_, cur_tuple), tuple_in_hashtable); } bsb->setEmpty(); } barrier_->Arrive(); printf("-----------In Iterator Open Successful!-----------\n"); return true; }
bool ExpandableBlockStreamRandomMemAccess::open(const PartitionOffset& part_off) { AtomicPushFreeBlockStream(BlockStreamBase::createBlock(state_.c_schema_,state_.block_size_)); printf("Free block stream list added!\n"); if (sema_open_.try_wait()) { printf("RMA: Scan open!\n"); /* the winning thread does the read job in the open function*/ fd_ = FileOpen(state_.filename_.c_str(), O_RDONLY); if (fd_ == -1) { printf("Cannot open file %s! Reason: %s\n", state_.filename_.c_str(), strerror(errno)); return false; } file_length_ = lseek(fd_, 0, SEEK_END); lseek(fd_,0,SEEK_SET); base_ = (char*) mmap(0, file_length_, PROT_READ, MAP_PRIVATE, fd_, 0); if (base_ == 0) { printf("mmap errors!\n"); return false; } data_=base_; if (data_ != 0) { printf("RMA: Open is successful!\n"); open_finished_ = true; return state_.child_->open(); } else { return false; } } else { while (!open_finished_) { usleep(1); } return state_.child_->open(); } }
bool InOperator::Next(BlockStreamBase* block) { unsigned bn; RemainingBlock rb; void* tuple_from_child_in = NULL; void* tuple_in_output_block = NULL; void* tuple_in_hashtable; void* key_in_input; bool passIn = false; BasicHashTable::Iterator hashtable_iterator = hash_table_->CreateIterator(); if (AtomicPopRemainingBlock(rb)) { while ((tuple_from_child_in = rb.blockstream_iterator_->currentTuple()) > 0) { passIn = false; bn = state_.schema_child_in_->getcolumn(state_.index_child_in_) .operate->GetPartitionValue( state_.schema_child_in_->getColumnAddess( state_.index_child_in_, tuple_from_child_in), state_.ht_nbuckets_); hash_table_->placeIterator(hashtable_iterator, bn); while ((tuple_in_hashtable = hashtable_iterator.readnext()) > 0) { key_in_input = state_.schema_child_in_->getColumnAddess( state_.index_child_in_, tuple_from_child_in); if (state_.schema_child_in_->getcolumn(state_.index_child_in_) .operate->Equal(tuple_in_hashtable, key_in_input)) { passIn = true; break; } } if (passIn) { const unsigned bytes = state_.schema_child_in_->getTupleMaxSize(); if ((tuple_in_output_block = block->allocateTuple(bytes)) > 0) { state_.schema_child_in_->copyTuple(tuple_from_child_in, tuple_in_output_block); rb.blockstream_iterator_->increase_cur_(); } else { AtomicPushRemainingBlock(rb); return true; } } else rb.blockstream_iterator_->increase_cur_(); } AtomicPushFreeBlockStream(rb.bsb_in_); } BlockStreamBase* block_for_asking = AtomicPopFreeBlockStream(); block_for_asking->setEmpty(); while (state_.child_in_->Next(block_for_asking)) { BlockStreamBase::BlockStreamTraverseIterator* traverse_iterator = block_for_asking->createIterator(); while ((tuple_from_child_in = traverse_iterator->currentTuple()) > 0) { passIn = false; bn = state_.schema_child_in_->getcolumn(state_.index_child_in_) .operate->GetPartitionValue( state_.schema_child_in_->getColumnAddess( state_.index_child_in_, tuple_from_child_in), state_.ht_nbuckets_); hash_table_->placeIterator(hashtable_iterator, bn); while ((tuple_in_hashtable = hashtable_iterator.readCurrent()) != 0) { key_in_input = state_.schema_child_in_->getColumnAddess( state_.index_child_in_, tuple_from_child_in); if (state_.schema_child_in_->getcolumn(state_.index_child_in_) .operate->Equal(tuple_in_hashtable, key_in_input)) { passIn = true; break; } hashtable_iterator.increase_cur_(); } if (passIn) { const unsigned bytes = state_.schema_child_in_->getTupleMaxSize(); if ((tuple_in_output_block = block->allocateTuple(bytes)) > 0) { state_.schema_child_in_->copyTuple(tuple_from_child_in, tuple_in_output_block); traverse_iterator->increase_cur_(); } else { AtomicPushRemainingBlock( RemainingBlock(block_for_asking, traverse_iterator)); return true; } } else traverse_iterator->increase_cur_(); } traverse_iterator->~BlockStreamTraverseIterator(); block_for_asking->setEmpty(); } AtomicPushFreeBlockStream(block_for_asking); if (!block->Empty()) return true; return false; }
bool BlockStreamJoinIterator::open(const PartitionOffset& partition_offset){ #ifdef TIME startTimer(&timer); #endif state_.child_left->open(partition_offset); AtomicPushFreeHtBlockStream(BlockStreamBase::createBlock(state_.input_schema_left,state_.block_size_)); AtomicPushFreeBlockStream(BlockStreamBase::createBlock(state_.input_schema_right,state_.block_size_)); cout<<"AtomicPushFreeBlockStream\n\n"<<endl; cout<<"join open begin"<<endl; if(sema_open_.try_wait()){ unsigned output_index=0; for(unsigned i=0;i<state_.joinIndex_left.size();i++){ joinIndex_left_to_output[i]=output_index; output_index++; } for(unsigned i=0;i<state_.payload_left.size();i++){ payload_left_to_output[i]=output_index; output_index++; } for(unsigned i=0;i<state_.payload_right.size();i++){ payload_right_to_output[i]=output_index; output_index++; } /* Currently, the block is 4096, and the table in build phase is left one*/ hash=PartitionFunctionFactory::createBoostHashFunction(state_.ht_nbuckets); hashtable=new BasicHashTable(state_.ht_nbuckets,state_.ht_bucketsize,state_.input_schema_left->getTupleMaxSize()); cout<<"in the open master "<<endl; open_finished_=true; }else{ while (!open_finished_) { usleep(1); } } //hashtable createIterator的好处就是创建的都是可读的对象,不需要加锁 // lock_.acquire(); BasicHashTable::Iterator tmp_it=hashtable->CreateIterator(); // lock_.release(); void *cur; void *tuple_in_hashtable; unsigned bn; void *key_in_input; void *key_in_hashtable; void *value_in_input; void *value_in_hashtable; BlockStreamBase *bsb=AtomicPopFreeHtBlockStream(); PartitionFunction* hash_test=PartitionFunctionFactory::createBoostHashFunction(4); cout<<"in the hashtable build stage!"<<endl; // consumed_tuples_from_left=0; while(state_.child_left->next(bsb)){ BlockStreamBase::BlockStreamTraverseIterator *bsti=bsb->createIterator(); bsti->reset(); while(cur=bsti->nextTuple()){ consumed_tuples_from_left++; // // if(state_.ht_schema->getncolumns()>20) // state_.ht_schema->displayTuple(cur,"|B|"); ///for debug /* Currently, the join index is [0]-th column, so the hash table is based on the hash value of [0]-th column*/ // bn=hash->get_partition_value(*(unsigned long*)(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],cur))); // bn=state_.input_schema_left->getcolumn(0).operate->getPartitionValue(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],cur),hash); bn=state_.input_schema_left->getcolumn(state_.joinIndex_left[0]).operate->getPartitionValue(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],cur),hash); // const unsigned test_bn=state_.input_schema_left->getcolumn(state_.joinIndex_left[0]).operate->getPartitionValue(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],cur),hash_test); // if(rand()%10000<3){ // printf("key:%d\n",test_bn); // } // hashtable->placeIterator(tmp_it,bn); // lock_.acquire(); tuple_in_hashtable=hashtable->atomicAllocate(bn); /* copy join index columns*/ // for(unsigned i=0;i<state_.joinIndex_left.size();i++){ // key_in_input=state_.input_schema_left->getColumnAddess(state_.joinIndex_left[i],cur); // key_in_hashtable=state_.ht_schema->getColumnAddess(joinIndex_left_to_output[i],tuple_in_hashtable); // state_.input_schema_left->getcolumn(state_.joinIndex_left[i]).operate->assignment(key_in_input,key_in_hashtable); // // } // /* copy left payload columns*/ // for(unsigned i=0;i<state_.payload_left.size();i++){ // value_in_input=state_.input_schema_left->getColumnAddess(state_.payload_left[i],cur); // value_in_hashtable=state_.ht_schema->getColumnAddess(payload_left_to_output[i],tuple_in_hashtable); // state_.input_schema_left->getcolumn(state_.payload_left[i]).operate->assignment(value_in_input,value_in_hashtable); // } state_.input_schema_left->copyTuple(cur,tuple_in_hashtable); // lock_.release(); } bsb->setEmpty(); } // printf("<<<<<<<<<<<<<<<<Join Open consumes %d tuples\n",consumed_tuples_from_left); BasicHashTable::Iterator it=hashtable->CreateIterator(); unsigned tmp=0; tuples_in_hashtable=0; // PartitionFunction* hash_tmp=PartitionFunctionFactory::createGeneralModuloFunction(4); // while(hashtable->placeIterator(it,tmp++)){ // void* tuple; // while(tuple=it.readCurrent()){ //// printf("join key:%s\n",(state_.input_schema_left->getcolumn(state_.joinIndex_left[0]).operate->toString(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],tuple)).c_str())); // tuples_in_hashtable++; // unsigned bn=state_.input_schema_left->getcolumn(state_.joinIndex_left[0]).operate->getPartitionValue(state_.input_schema_left->getColumnAddess(state_.joinIndex_left[0],tuple),hash_tmp); // if(rand()%1000<3) // printf("partition key of left tuple:%d\n",bn); // it.increase_cur_(); // } // } // cout<<"join open end"<<endl; produced_tuples=0; consumed_tuples_from_right=0; // water_mark=0; barrier_->Arrive(); cout<<"pass the arrive of barrier!!!"<<endl; state_.child_right->open(partition_offset); // cout<<"PartitionOffset:"<<partition_offset<<endl; // sleep(1); return true; }
bool BlockStreamJoinIterator::next(BlockStreamBase *block){ unsigned bn; void *result_tuple; void *tuple_from_right_child; void *tuple_in_hashtable; void *key_in_input; void *key_in_hashtable; void *column_in_joinedTuple; void *joinedTuple=memalign(cacheline_size,state_.output_schema->getTupleMaxSize()); bool key_exit; remaining_block rb; PartitionFunction* hash_tmp=PartitionFunctionFactory::createGeneralModuloFunction(4); while(true){ if(atomicPopRemainingBlock(rb)){ while((tuple_from_right_child=rb.blockstream_iterator->currentTuple())>0){ unsigned bn=state_.input_schema_right->getcolumn(state_.joinIndex_right[0]).operate->getPartitionValue(state_.input_schema_right->getColumnAddess(state_.joinIndex_right[0],tuple_from_right_child),hash_tmp); while((tuple_in_hashtable=rb.hashtable_iterator_.readCurrent())>0){ key_exit=true; for(unsigned i=0;i<state_.joinIndex_right.size();i++){ key_in_input=state_.input_schema_right->getColumnAddess(state_.joinIndex_right[i],tuple_from_right_child); key_in_hashtable=state_.ht_schema->getColumnAddess(state_.joinIndex_left[i],tuple_in_hashtable); if(!state_.input_schema_right->getcolumn(state_.joinIndex_right[i]).operate->equal(key_in_input,key_in_hashtable)){ key_exit=false; break; } } if(key_exit){ if((result_tuple=block->allocateTuple(state_.output_schema->getTupleMaxSize()))>0){ produced_tuples++; const unsigned copyed_bytes=state_.input_schema_left->copyTuple(tuple_in_hashtable,result_tuple); state_.input_schema_right->copyTuple(tuple_from_right_child,result_tuple+copyed_bytes); } else{ atomicPushRemainingBlock(rb); free(joinedTuple); return true; } } BasicHashTable::Iterator tmp=rb.hashtable_iterator_; rb.hashtable_iterator_.increase_cur_(); } rb.blockstream_iterator->increase_cur_(); consumed_tuples_from_right++; if((tuple_from_right_child=rb.blockstream_iterator->currentTuple())){ bn=state_.input_schema_right->getcolumn(state_.joinIndex_right[0]).operate->getPartitionValue(state_.input_schema_right->getColumnAddess(state_.joinIndex_right[0],tuple_from_right_child),hash); hashtable->placeIterator(rb.hashtable_iterator_,bn); } } AtomicPushFreeBlockStream(rb.bsb_right_); } rb.bsb_right_=AtomicPopFreeBlockStream();//1 1 1 rb.bsb_right_->setEmpty(); rb.hashtable_iterator_=hashtable->CreateIterator(); if(state_.child_right->next(rb.bsb_right_)==false){ if(block->Empty()==true){ AtomicPushFreeBlockStream(rb.bsb_right_); free(joinedTuple); printf("****join next produces %d tuples while consumed %d tuples from right child and %d tuples from left, hash table has %d tuples\n",produced_tuples,consumed_tuples_from_right,consumed_tuples_from_left,tuples_in_hashtable); return false; } else{ AtomicPushFreeBlockStream(rb.bsb_right_); free(joinedTuple); return true; } } rb.blockstream_iterator=rb.bsb_right_->createIterator(); if((tuple_from_right_child=rb.blockstream_iterator->currentTuple())){ bn=state_.input_schema_right->getcolumn(state_.joinIndex_right[0]).operate->getPartitionValue(state_.input_schema_right->getColumnAddess(state_.joinIndex_right[0],tuple_from_right_child),hash); hashtable->placeIterator(rb.hashtable_iterator_,bn); } atomicPushRemainingBlock(rb); } return next(block); }
bool ExpandableBlockStreamRandomMemAccess::next(BlockStreamBase* block) { remaining_block rb; void* tuple_from_child; void* tuple_in_block; if (atomicPopRemainingBlock(rb)) { while ((tuple_from_child = rb.iterator->currentTuple()) > 0) { const unsigned bytes = state_.f_schema_->getTupleActualSize(tuple_in_block); if ((tuple_in_block = block->allocateTuple(bytes)) > 0) { /* the block has enough space to hold this tuple */ state_.f_schema_->copyTuple((void*)(base_+(*(int*)tuple_from_child)*bytes), tuple_in_block); rb.iterator->increase_cur_(); } else { /* the block is full, before we return, we pop the remaining block. */ atomicPushRemainingBlock(rb); return true; } } AtomicPushFreeBlockStream(rb.block); } /* When the program arrivals here, it mains that there is no remaining blocks or the remaining block * is exhausted, so we read a new block from the child */ BlockStreamBase* block_for_asking = AtomicPopFreeBlockStream(); block_for_asking->setEmpty(); while (state_.child_->next(block_for_asking)) { BlockStreamBase::BlockStreamTraverseIterator* traverse_iterator = block_for_asking->createIterator(); while((tuple_from_child = traverse_iterator->currentTuple()) > 0) { const unsigned bytes = state_.f_schema_->getTupleActualSize(tuple_from_child); if ((tuple_in_block = block->allocateTuple(bytes)) > 0) { /* the block has enough space to hold this tuple */ state_.f_schema_->copyTuple((void*)(base_+(*(int*)tuple_from_child)*bytes), tuple_in_block); traverse_iterator->increase_cur_(); } else { /* the block is full, before we return, we pop the remaining block. */ atomicPushRemainingBlock(remaining_block(block_for_asking, traverse_iterator)); return true; } } /* the block_for_asking is exhausted, but the block is not full */ traverse_iterator->~BlockStreamTraverseIterator(); block_for_asking->setEmpty(); } /* the child iterator is exhausted, but the block is not full */ AtomicPushFreeBlockStream(block_for_asking); if (!block->Empty()) return true; else return false; }