// TODO(Hanzhang): According to AVOID_CONTENTION_IN_SCAN, we choose the // strategy. We need finish case(1). bool PhysicalProjectionScan::Next(SegmentExecStatus* const exec_status, BlockStreamBase* block) { RETURN_IF_CANCELLED(exec_status); unsigned long long total_start = curtick(); if (!block->isIsReference()) { block->setIsReference(false); } #ifdef AVOID_CONTENTION_IN_SCAN ScanThreadContext* stc = reinterpret_cast<ScanThreadContext*>(GetContext()); if (NULL == stc) { stc = new ScanThreadContext(); InitContext(stc); } if (ExpanderTracker::getInstance()->isExpandedThreadCallBack( pthread_self())) { input_dataset_.AtomicPut(stc->assigned_data_); delete stc; destorySelfContext(); kPerfInfo->report_instance_performance_in_millibytes(); return false; } if (!stc->assigned_data_.empty()) { ChunkReaderIterator::block_accessor* ba = stc->assigned_data_.front(); stc->assigned_data_.pop_front(); ba->GetBlock(block); // whether delete InMemeryBlockAccessor::target_block_start_address // is depend on whether use copy in ba->getBlock(block); delete ba; kPerfInfo->processed_one_block(); return true; } else { if (input_dataset_.AtomicGet(stc->assigned_data_, Config::scan_batch)) { // case(1) return Next(block); } else { delete stc; destorySelfContext(); return false; } } #else if (ExpanderTracker::getInstance()->isExpandedThreadCallBack( pthread_self())) { return false; } // perf_info_->processed_one_block(); // case(2) RETURN_IF_CANCELLED(exec_status); return partition_reader_iterator_->NextBlock(block); #endif }
bool PhysicalProjectionScan::Open(SegmentExecStatus* const exec_status, const PartitionOffset& kPartitionOffset) { RETURN_IF_CANCELLED(exec_status); RegisterExpandedThreadToAllBarriers(); if (TryEntryIntoSerializedSection()) { /* this is the first expanded thread*/ PartitionStorage* partition_handle_; if (NULL == (partition_handle_ = BlockManager::getInstance()->GetPartitionHandle( PartitionID(state_.projection_id_, kPartitionOffset)))) { LOG(ERROR) << PartitionID(state_.projection_id_, kPartitionOffset) .getName() .c_str() << CStrError(rNoPartitionIdScan) << std::endl; SetReturnStatus(false); } else { partition_reader_iterator_ = partition_handle_->CreateAtomicReaderIterator(); SetReturnStatus(true); } #ifdef AVOID_CONTENTION_IN_SCAN unsigned long long start = curtick(); ChunkReaderIterator* chunk_reader_it; ChunkReaderIterator::block_accessor* ba; while (chunk_reader_it = partition_reader_iterator_->NextChunk()) { while (chunk_reader_it->GetNextBlockAccessor(ba)) { ba->GetBlockSize(); input_dataset_.input_data_blocks_.push_back(ba); } } #endif ExpanderTracker::getInstance()->addNewStageEndpoint( pthread_self(), LocalStageEndPoint(stage_src, "Scan", 0)); perf_info_ = ExpanderTracker::getInstance()->getPerformanceInfo(pthread_self()); perf_info_->initialize(); } BarrierArrive(); return GetReturnStatus(); }
// just only thread can fetch this result bool PhysicalSort::Next(SegmentExecStatus *const exec_status, BlockStreamBase *block) { RETURN_IF_CANCELLED(exec_status); lock_->acquire(); if (thread_id_ == -1) { thread_id_ = pthread_self(); lock_->release(); } else { if (thread_id_ != pthread_self()) { lock_->release(); return false; } else { lock_->release(); } } unsigned tuple_size = state_.input_schema_->getTupleMaxSize(); void *desc = NULL; int tmp_tuple = -1; while (true) { if (all_cur_ < all_tuples_.size()) { if (NULL != (desc = block->allocateTuple(tuple_size))) { tmp_tuple = all_cur_++; memcpy(desc, all_tuples_[tmp_tuple], tuple_size); } else { // block is full return true; } } else { // all tuple are fetched if (tmp_tuple == -1) { // but this block is empty return false; } else { // get several tuples return true; } } } return false; }
/** * build a hash table first, which stores the tuple needed to be deleted in a *hash manner and accelerate the probe phase * */ bool PhysicalDeleteFilter::Open(SegmentExecStatus* const exec_status, const PartitionOffset& partition_offset) { #ifdef TIME startTimer(&timer); #endif RETURN_IF_CANCELLED(exec_status); RegisterExpandedThreadToAllBarriers(); int ret = rSuccess; int64_t timer; bool winning_thread = false; if (TryEntryIntoSerializedSection(0)) { winning_thread = true; ExpanderTracker::getInstance()->addNewStageEndpoint( pthread_self(), LocalStageEndPoint(stage_desc, "delete filter build", 0)); unsigned output_index = 0; for (unsigned i = 0; i < state_.filter_key_deleted_.size(); i++) { joinIndex_table_to_output_[i] = output_index; output_index++; } for (unsigned i = 0; i < state_.payload_base_.size(); i++) { payload_table_to_output_[i] = output_index; output_index++; } // start to create the hash table, including the used hash function, hash // table structure hash_ = PartitionFunctionFactory::createBoostHashFunction( state_.hashtable_bucket_num_); int64_t hash_table_build = curtick(); hashtable_ = new BasicHashTable( state_.hashtable_bucket_num_, state_.hashtable_bucket_size_, state_.input_schema_left_->getTupleMaxSize()); if (NULL == hashtable_) { return ret = rMemoryAllocationFailed; LOG(ERROR) << "hashtable allocation failed" << "[" << rMemoryAllocationFailed << "]" << endl; } #ifdef _DEBUG_ consumed_tuples_from_left = 0; #endif // start to create the join expression, based on which it is able to the // probe the deleted tuples // QNode* expr = createEqualJoinExpression( // state_.hashtable_schema_, state_.input_schema_right_, // state_.filter_key_deleted_, state_.filter_key_base_); // if (NULL == expr) { // ret = rSuccess; // LOG(ERROR) << "The generation of the enqual join expression for // delete " // "filter is failed" << endl; // } // ticks start = curtick(); // // // start to generate the dedicated function, based on which the probe // is // // eventually acted, including using llvm and the function pointer // if (Config::enable_codegen) { // eftt_ = getExprFuncTwoTuples(expr, state_.hashtable_schema_, // state_.input_schema_right_); // memcpy_ = getMemcpy(state_.hashtable_schema_->getTupleMaxSize()); // memcat_ = getMemcat(state_.hashtable_schema_->getTupleMaxSize(), // state_.input_schema_right_->getTupleMaxSize()); // } // if (eftt_) { // cff_ = PhysicalDeleteFilter::isMatchCodegen; // printf("Codegen(delete filter) succeed(%4.3fms)!\n", // getMilliSecond(start)); // } else { cff_ = PhysicalDeleteFilter::isMatch; // printf("Codegen(delete filter) failed!\n"); // } // delete expr; } /** * For performance concern, the following line should place just after * "RegisterNewThreadToAllBarriers();" * in order to accelerate the open response time. */ LOG(INFO) << "delete filter operator begin to open left child" << endl; state_.child_left_->Open(exec_status, partition_offset); LOG(INFO) << "delete filter operator finished opening left child" << endl; BarrierArrive(0); BasicHashTable::Iterator tmp_it = hashtable_->CreateIterator(); void* cur; void* tuple_in_hashtable; unsigned bn; void* key_in_input; void* key_in_hashtable; void* value_in_input; void* value_in_hashtable; // create the context for the multi-thread to build the hash table DeleteFilterThreadContext* dftc = CreateOrReuseContext(crm_numa_sensitive); const Schema* input_schema = state_.input_schema_left_->duplicateSchema(); // we used the filter_key_deleted_[0] here, because the data is partitioned // based on the first column in the join index const Operate* op = input_schema->getcolumn(state_.filter_key_deleted_[0]) .operate->duplicateOperator(); const unsigned buckets = state_.hashtable_bucket_num_; int64_t start = curtick(); int64_t processed_tuple_count = 0; LOG(INFO) << "delete filter operator begin to call left child's next()" << endl; RETURN_IF_CANCELLED(exec_status); while (state_.child_left_->Next(exec_status, dftc->l_block_for_asking_)) { RETURN_IF_CANCELLED(exec_status); delete dftc->l_block_stream_iterator_; dftc->l_block_stream_iterator_ = dftc->l_block_for_asking_->createIterator(); while (cur = dftc->l_block_stream_iterator_->nextTuple()) { #ifdef _DEBUG_ processed_tuple_count++; lock_.acquire(); consumed_tuples_from_left++; lock_.release(); #endif const void* key_addr = input_schema->getColumnAddess(state_.filter_key_deleted_[0], cur); bn = op->getPartitionValue(key_addr, buckets); tuple_in_hashtable = hashtable_->atomicAllocate(bn); if (memcpy_) memcpy_(tuple_in_hashtable, cur); else input_schema->copyTuple(cur, tuple_in_hashtable); } dftc->l_block_for_asking_->setEmpty(); } // printf("%d cycles per // tuple!\n",(curtick()-start)/processed_tuple_count); unsigned tmp = 0; #ifdef _DEBUG_ tuples_in_hashtable = 0; produced_tuples = 0; consumed_tuples_from_right = 0; #endif if (ExpanderTracker::getInstance()->isExpandedThreadCallBack( pthread_self())) { UnregisterExpandedThreadToAllBarriers(1); // printf("<<<<<<<<<<<<<<<<<Join open detected call back // signal!>>>>>>>>>>>>>>>>>\n"); return true; } BarrierArrive(1); // if(winning_thread){ //// hashtable->report_status(); //// printf("Hash Table Build time: %4.4f\n",getMilliSecond(timer)); // } // hashtable->report_status(); // printf("join open consume %d tuples\n",consumed_tuples_from_left); RETURN_IF_CANCELLED(exec_status); state_.child_right_->Open(exec_status, partition_offset); RETURN_IF_CANCELLED(exec_status); LOG(INFO) << "delete filter operator finished opening right child" << endl; return true; }
bool PhysicalDeleteFilter::Next(SegmentExecStatus* const exec_status, BlockStreamBase* block) { void* result_tuple; void* tuple_from_right_child; void* tuple_in_hashtable; void* key_in_input; void* key_in_hashtable; void* column_in_joinedTuple; void* joinedTuple = memalign(cacheline_size, state_.output_schema_->getTupleMaxSize()); bool key_exist; DeleteFilterThreadContext* dftc = reinterpret_cast<DeleteFilterThreadContext*>(GetContext()); while (true) { RETURN_IF_CANCELLED(exec_status); while ((tuple_from_right_child = dftc->r_block_stream_iterator_->currentTuple()) > 0) { unsigned bn = state_.input_schema_right_->getcolumn(state_.filter_key_base_[0]) .operate->getPartitionValue( state_.input_schema_right_->getColumnAddess( state_.filter_key_base_[0], tuple_from_right_child), state_.hashtable_bucket_num_); // hashtable_->placeIterator(dftc->hashtable_iterator_, bn); // if there is no tuple in the bn bucket of the hash table, then the // tuple // in the base table will be output if (NULL == (tuple_in_hashtable = dftc->hashtable_iterator_.readCurrent())) { if ((result_tuple = block->allocateTuple( state_.output_schema_->getTupleMaxSize())) > 0) { produced_tuples_++; if (memcat_) { memcat_(result_tuple, tuple_in_hashtable, tuple_from_right_child); } else { state_.input_schema_right_->copyTuple( tuple_from_right_child, reinterpret_cast<char*>(result_tuple)); } } else { free(joinedTuple); return true; } } else { while ((tuple_in_hashtable = dftc->hashtable_iterator_.readCurrent()) > 0) { cff_(tuple_in_hashtable, tuple_from_right_child, &key_exist, state_.filter_key_deleted_, state_.filter_key_base_, state_.hashtable_schema_, state_.input_schema_right_, eftt_); if (!key_exist) { if ((result_tuple = block->allocateTuple( state_.output_schema_->getTupleMaxSize())) > 0) { produced_tuples_++; if (memcat_) { memcat_(result_tuple, tuple_in_hashtable, tuple_from_right_child); } else { state_.input_schema_right_->copyTuple( tuple_from_right_child, reinterpret_cast<char*>(result_tuple)); } } else { free(joinedTuple); return true; } } dftc->hashtable_iterator_.increase_cur_(); } } dftc->r_block_stream_iterator_->increase_cur_(); #ifdef _DEBUG_ consumed_tuples_from_right++; #endif if ((tuple_from_right_child = dftc->r_block_stream_iterator_->currentTuple())) { bn = state_.input_schema_right_->getcolumn(state_.filter_key_base_[0]) .operate->getPartitionValue( state_.input_schema_right_->getColumnAddess( state_.filter_key_base_[0], tuple_from_right_child), state_.hashtable_bucket_num_); hashtable_->placeIterator(dftc->hashtable_iterator_, bn); } } dftc->r_block_for_asking_->setEmpty(); dftc->hashtable_iterator_ = hashtable_->CreateIterator(); if (state_.child_right_->Next(exec_status, dftc->r_block_for_asking_) == false) { if (block->Empty() == true) { free(joinedTuple); return false; } else { free(joinedTuple); return true; } } delete dftc->r_block_stream_iterator_; dftc->r_block_stream_iterator_ = dftc->r_block_for_asking_->createIterator(); if ((tuple_from_right_child = dftc->r_block_stream_iterator_->currentTuple())) { unsigned bn = state_.input_schema_right_->getcolumn(state_.filter_key_base_[0]) .operate->getPartitionValue( state_.input_schema_right_->getColumnAddess( state_.filter_key_base_[0], tuple_from_right_child), state_.hashtable_bucket_num_); hashtable_->placeIterator(dftc->hashtable_iterator_, bn); } } RETURN_IF_CANCELLED(exec_status); return Next(exec_status, block); }
/** * pay attention to the work of different block buffer according to the * comments near it */ bool ExchangeSenderPipeline::Open(SegmentExecStatus* const exec_status, const PartitionOffset&) { RETURN_IF_CANCELLED(exec_status); state_.child_->Open(exec_status, state_.partition_offset_); RETURN_IF_CANCELLED(exec_status); upper_num_ = state_.upper_id_list_.size(); partition_function_ = PartitionFunctionFactory::createBoostHashFunction(upper_num_); socket_fd_upper_list_ = new int[upper_num_]; /** * initialize the block that is used to accumulate the block obtained * by calling child iterator's next() */ block_for_asking_ = BlockStreamBase::createBlock(state_.schema_, state_.block_size_); /** * partitioned_data_buffer_ stores the tuples received from child iterator. * Note the tuples are partitioned and stored. */ partitioned_data_buffer_ = new PartitionedBlockBuffer( upper_num_, block_for_asking_->getSerializedBlockSize()); /** * the temporary block that is used to transfer a block from partitioned data * buffer into sending_buffer. */ block_for_sending_buffer_ = new BlockContainer(block_for_asking_->getSerializedBlockSize()); /** * Initialize the buffer that is used to hold the blocks being sent. There are * upper_num blocks, each corresponding to a merger. */ sending_buffer_ = new PartitionedBlockContainer( upper_num_, block_for_asking_->getSerializedBlockSize()); // Initialized the temporary block to hold the serialized block. block_for_serialization_ = new Block(block_for_asking_->getSerializedBlockSize()); /** * Initialize the blocks that are used to accumulate the tuples from child so * that the insertion to the buffer * can be conducted at the granularity of blocks rather than tuples. */ partitioned_block_stream_ = new BlockStreamBase* [upper_num_]; for (unsigned i = 0; i < upper_num_; ++i) { partitioned_block_stream_[i] = BlockStreamBase::createBlock(state_.schema_, state_.block_size_); } RETURN_IF_CANCELLED(exec_status); /** connect to all the mergers **/ for (unsigned upper_offset = 0; upper_offset < state_.upper_id_list_.size(); ++upper_offset) { RETURN_IF_CANCELLED(exec_status); LOG(INFO) << "(exchane_id= " << state_.exchange_id_ << " partition_offset= " << state_.partition_offset_ << " ) try to connect to upper( " << upper_offset << " , " << state_.upper_id_list_[upper_offset] << " ) "; if (ConnectToUpper(ExchangeID(state_.exchange_id_, upper_offset), state_.upper_id_list_[upper_offset], socket_fd_upper_list_[upper_offset]) != true) { LOG(INFO) << "unsuccessfully !" << std::endl; return false; } } LOG(INFO) << "connect to all mereger successfully !" << std::endl; RETURN_IF_CANCELLED(exec_status); /** create the Sender thread **/ int error = pthread_create(&sender_thread_id_, NULL, Sender, this); if (error != 0) { LOG(ERROR) << "(exchane_id= " << state_.exchange_id_ << " partition_offset= " << state_.partition_offset_ << " ) Failed to create the sender thread>>>>>>>>>>" << std::endl; return false; } return true; }
/** * Note the process from getting block of child to sending to mergers in * different buffer. * if the state_.partition_schema_ is hash partitioned, every tuple of the block * which get from child will be hash repartition and copied into * partitioned_block_stream_, if it is full, then * serialize it and insert into corresponding partition buffer. * else the state_.partition_schema_ is broadcast, straightly insert the block * from child into each partition buffer. */ bool ExchangeSenderPipeline::Next(SegmentExecStatus* const exec_status, BlockStreamBase* no_block) { void* tuple_from_child; void* tuple_in_cur_block_stream; while (true) { RETURN_IF_CANCELLED(exec_status); block_for_asking_->setEmpty(); if (state_.child_->Next(exec_status, block_for_asking_)) { RETURN_IF_CANCELLED(exec_status); /** * if a blocks is obtained from child, we repartition the tuples in the * block to corresponding partition_block_stream_. */ if (state_.partition_schema_.isHashPartition()) { BlockStreamBase::BlockStreamTraverseIterator* traverse_iterator = block_for_asking_->createIterator(); while ((tuple_from_child = traverse_iterator->nextTuple()) > 0) { /** * for each tuple in the newly obtained block, insert the tuple to * one partitioned block according to the partition hash value */ const unsigned partition_id = GetHashPartitionId( tuple_from_child, state_.schema_, state_.partition_schema_.partition_key_index, upper_num_); // calculate the tuple size for the current tuple const unsigned bytes = state_.schema_->getTupleActualSize(tuple_from_child); // insert the tuple into the corresponding partitioned block while (!(tuple_in_cur_block_stream = partitioned_block_stream_[partition_id]->allocateTuple( bytes))) { /** * if the destination block is full, it should be serialized and * inserted into the partitioned_data_buffer. */ partitioned_block_stream_[partition_id]->serialize( *block_for_serialization_); partitioned_data_buffer_->insertBlockToPartitionedList( block_for_serialization_, partition_id); partitioned_block_stream_[partition_id]->setEmpty(); } /** * thread arriving here means that the space for the tuple is * successfully allocated, so we copy the tuple */ state_.schema_->copyTuple(tuple_from_child, tuple_in_cur_block_stream); } DELETE_PTR(traverse_iterator); // by hAN MEMORY LEAK } else if (state_.partition_schema_.isBroadcastPartition()) { /** * for boardcast case, all block from child should inserted into all * partitioned_data_buffer */ block_for_asking_->serialize(*block_for_serialization_); for (unsigned i = 0; i < upper_num_; ++i) { partitioned_data_buffer_->insertBlockToPartitionedList( block_for_serialization_, i); } } } else { RETURN_IF_CANCELLED(exec_status); if (state_.partition_schema_.isHashPartition()) { /* the child iterator is exhausted. We add the last block stream block * which would be not full into the buffer for hash partitioned case. */ for (unsigned i = 0; i < upper_num_; ++i) { partitioned_block_stream_[i]->serialize(*block_for_serialization_); partitioned_data_buffer_->insertBlockToPartitionedList( block_for_serialization_, i); } /* The following lines send an empty block to the upper, indicating that * all the data from current sent has been transmit to the uppers. */ for (unsigned i = 0; i < upper_num_; ++i) { if (!partitioned_block_stream_[i]->Empty()) { partitioned_block_stream_[i]->setEmpty(); partitioned_block_stream_[i]->serialize(*block_for_serialization_); partitioned_data_buffer_->insertBlockToPartitionedList( block_for_serialization_, i); } } } else if (state_.partition_schema_.isBroadcastPartition()) { /* The following lines send an empty block to the upper, indicating that * all the data from current sent has been transmit to the uppers. */ block_for_asking_->setEmpty(); block_for_asking_->serialize(*block_for_serialization_); for (unsigned i = 0; i < upper_num_; ++i) { partitioned_data_buffer_->insertBlockToPartitionedList( block_for_serialization_, i); } } /* * waiting until all the block in the buffer has been * transformed to the uppers. */ LOG(INFO) << "(exchane_id= " << state_.exchange_id_ << " partition_offset= " << state_.partition_offset_ << " ) Waiting until all the blocks in the buffer is sent!" << std::endl; RETURN_IF_CANCELLED(exec_status); while (!partitioned_data_buffer_->isEmpty()) { RETURN_IF_CANCELLED(exec_status); usleep(1); } /* * waiting until all the uppers send the close notification which means * that * blocks in the uppers' socket buffer have all been * consumed. */ LOG(INFO) << "(exchane_id= " << state_.exchange_id_ << " partition_offset= " << state_.partition_offset_ << " ) Waiting for close notification from all merger!" << std::endl; RETURN_IF_CANCELLED(exec_status); for (unsigned i = 0; i < upper_num_; i++) { RETURN_IF_CANCELLED(exec_status); WaitingForCloseNotification(socket_fd_upper_list_[i]); } LOG(INFO) << " received all close notification, closing.. " << endl; return false; } } }
/** * first we can store all the data which will be bufferred * 1, buffer is the first phase. multi-threads will be applyed to the data * in the buffer. * 2, sort the data in the buffer, we choose stable_sort() to sort the records * by specifying the column to be sorted * 3, whether to register the buffer into the blockmanager. * */ bool PhysicalSort::Open(SegmentExecStatus *const exec_status, const PartitionOffset &part_off) { RETURN_IF_CANCELLED(exec_status); RegisterExpandedThreadToAllBarriers(); if (TryEntryIntoSerializedSection(0)) { all_cur_ = 0; thread_id_ = -1; all_tuples_.clear(); block_buffer_ = new DynamicBlockBuffer(); } BarrierArrive(0); BlockStreamBase *block_for_asking; if (CreateBlock(block_for_asking) == false) { LOG(ERROR) << "error in the create block stream!!!" << endl; return 0; } // state_.partition_offset_ = part_off; state_.child_->Open(exec_status, part_off); RETURN_IF_CANCELLED(exec_status); /** * phase 1: store the data in the buffer! * by using multi-threads to speed up */ vector<void *> thread_tuple; thread_tuple.clear(); void *tuple_ptr = NULL; BlockStreamBase::BlockStreamTraverseIterator *block_it; while (state_.child_->Next(exec_status, block_for_asking)) { RETURN_IF_CANCELLED(exec_status); block_buffer_->atomicAppendNewBlock(block_for_asking); block_it = block_for_asking->createIterator(); while (NULL != (tuple_ptr = block_it->nextTuple())) { thread_tuple.push_back(tuple_ptr); } if (NULL != block_it) { delete block_it; block_it = NULL; } if (CreateBlock(block_for_asking) == false) { LOG(ERROR) << "error in the create block stream!!!" << endl; return 0; } } if (NULL != block_for_asking) { delete block_for_asking; block_for_asking = NULL; } lock_->acquire(); all_tuples_.insert(all_tuples_.end(), thread_tuple.begin(), thread_tuple.end()); lock_->release(); thread_tuple.clear(); // guarantee the block_buffer get all data blocks completely BarrierArrive(1); // phase 2: sort the data in the buffer, only just one thread! if (TryEntryIntoSerializedSection(1)) { // reverse the order of order_by_attrs for preserve The relative ordering of // equivalent elements reverse(state_.order_by_attrs_.begin(), state_.order_by_attrs_.end()); // one expression for 2 tuples results in overwriting result, so copy the // expression for 2 different tuples calculating state_.order_by_attrs_copy_ = state_.order_by_attrs_; OperFuncInfoData oper_info; fcinfo = &oper_info; state_.compare_funcs_ = new DataTypeOperFunc[state_.order_by_attrs_.size()][2]; for (int i = 0; i < state_.order_by_attrs_.size(); ++i) { state_.order_by_attrs_copy_[i].first = state_.order_by_attrs_[i].first->ExprCopy(); // deep copy state_.order_by_attrs_[i].first->InitExprAtPhysicalPlan(); state_.order_by_attrs_copy_[i].first->InitExprAtPhysicalPlan(); state_.compare_funcs_[i][0] = DataTypeOper::data_type_oper_func_ [state_.order_by_attrs_[i].first->get_type_][OperType::oper_less]; state_.compare_funcs_[i][1] = DataTypeOper::data_type_oper_func_ [state_.order_by_attrs_[i].first->get_type_][OperType::oper_great]; } // int64_t time = curtick(); state_.eecnxt_.schema[0] = state_.input_schema_; state_.eecnxt1_.schema[0] = state_.input_schema_; RETURN_IF_CANCELLED(exec_status); cmp_state_ = &state_; Order(); } BarrierArrive(2); return true; }
/** * @brief Method description : describe the open method which gets results from * the left child and copy them into its local buffer, say the block buffer. the * block buffer is a dynamic block buffer since all the expanded threads will * share the same block buffer. */ bool PhysicalNestLoopJoin::Open(SegmentExecStatus *const exec_status, const PartitionOffset &partition_offset) { RETURN_IF_CANCELLED(exec_status); RegisterExpandedThreadToAllBarriers(); unsigned long long int timer; bool winning_thread = false; if (TryEntryIntoSerializedSection(0)) { // the first thread of all need to do ExpanderTracker::getInstance()->addNewStageEndpoint( pthread_self(), LocalStageEndPoint(stage_desc, "nest loop", 0)); winning_thread = true; timer = curtick(); block_buffer_ = new DynamicBlockBuffer(); if (state_.join_condi_.size() == 0) { join_condi_process_ = WithoutJoinCondi; } else { join_condi_process_ = WithJoinCondi; } LOG(INFO) << "[NestloopJoin]: [the first thread opens the nestloopJoin " "physical operator]" << std::endl; } RETURN_IF_CANCELLED(exec_status); state_.child_left_->Open(exec_status, partition_offset); RETURN_IF_CANCELLED(exec_status); BarrierArrive(0); NestLoopJoinContext *jtc = CreateOrReuseContext(crm_numa_sensitive); // create a new block to hold the results from the left child // and add results to the dynamic buffer // jtc->block_for_asking_ == BlockStreamBase::createBlock( // state_.input_schema_left_, // state_.block_size_); CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_); // auto temp = jtc->block_for_asking_->getBlock(); // cout << "temp start" << temp << endl; // // cout << "init block_for_asking_ : " << jtc->block_for_asking_->getBlock() // << " is reference : " << jtc->block_for_asking_->isIsReference() << // endl; while (state_.child_left_->Next(exec_status, jtc->block_for_asking_)) { if (exec_status->is_cancelled()) { if (NULL != jtc->block_for_asking_) { delete jtc->block_for_asking_; jtc->block_for_asking_ = NULL; } return false; } // cout << "after assgin start :" << jtc->block_for_asking_->getBlock() // << " is reference : " << jtc->block_for_asking_->isIsReference() // << endl; block_buffer_->atomicAppendNewBlock(jtc->block_for_asking_); // if (!jtc->block_for_asking_->isIsReference()) { CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_); // } else { // // cout << "temp after" << temp << endl; // // delete temp; // CreateBlockStream(jtc->block_for_asking_, // state_.input_schema_left_); // jtc->block_for_asking_->setIsReference(false); // } // cout << "new start :" << jtc->block_for_asking_->getBlock() // << " is reference : " << jtc->block_for_asking_->isIsReference() // << endl; } // cout << "buffer_size_ : " << block_buffer_->GetBufferSize() << endl; // the last block is created without storing the results from the left // child if (NULL != jtc->block_for_asking_) { delete jtc->block_for_asking_; jtc->block_for_asking_ = NULL; } // when the finished expanded thread finished its allocated work, it can be // called back here. What should be noticed that the callback meas the to // exit on the of the thread if (ExpanderTracker::getInstance()->isExpandedThreadCallBack( pthread_self())) { UnregisterExpandedThreadToAllBarriers(1); LOG(INFO) << "[NestloopJoin]: [the" << pthread_self() << "the thread is called to exit]" << std::endl; return true; // the } BarrierArrive(1); // ??ERROR // join_thread_context* jtc=new join_thread_context(); // jtc->block_for_asking_ == BlockStreamBase::createBlock( // state_.input_schema_right_, // state_.block_size_); CreateBlockStream(jtc->block_for_asking_, state_.input_schema_right_); jtc->block_for_asking_->setEmpty(); jtc->block_stream_iterator_ = jtc->block_for_asking_->createIterator(); jtc->buffer_iterator_ = block_buffer_->createIterator(); // underlying bug: as for buffer_iterator may be NULL, it's necessary to let // every buffer_iterator of each thread point to an empty block // jtc->buffer_stream_iterator_ = // jtc->buffer_iterator_.nextBlock()->createIterator(); InitContext(jtc); // rename this function, here means to store the thread // context in the operator context RETURN_IF_CANCELLED(exec_status); state_.child_right_->Open(exec_status, partition_offset); return true; }
bool PhysicalNestLoopJoin::Next(SegmentExecStatus *const exec_status, BlockStreamBase *block) { /** * @brief it describes the sequence of the nestloop join. As the intermediate * result of the left child has been stored in the dynamic block buffer in the * open function. in this next function, it get the intermediate result of the * right child operator, one block after one block. Within each block, it gets * each tuple in the block and joins with each tuple in the dynamic block * buffer * when traversing them. * Method description : * @param * @ return * @details (additional) */ RETURN_IF_CANCELLED(exec_status); void *tuple_from_buffer_child = NULL; void *tuple_from_right_child = NULL; void *result_tuple = NULL; bool pass = false; BlockStreamBase *buffer_block = NULL; NestLoopJoinContext *jtc = reinterpret_cast<NestLoopJoinContext *>(GetContext()); while (1) { RETURN_IF_CANCELLED(exec_status); while (NULL != (tuple_from_right_child = jtc->block_stream_iterator_->currentTuple())) { while (1) { while (NULL != (tuple_from_buffer_child = jtc->buffer_stream_iterator_->currentTuple())) { pass = join_condi_process_(tuple_from_buffer_child, tuple_from_right_child, jtc); if (pass) { if (NULL != (result_tuple = block->allocateTuple( state_.output_schema_->getTupleMaxSize()))) { const unsigned copyed_bytes = state_.input_schema_left_->copyTuple(tuple_from_buffer_child, result_tuple); state_.input_schema_right_->copyTuple( tuple_from_right_child, reinterpret_cast<char *>(result_tuple + copyed_bytes)); } else { // LOG(INFO) << "[NestloopJoin]: [a block of the // result // is full of " // "the nest loop join result ]" << // std::endl; return true; } } jtc->buffer_stream_iterator_->increase_cur_(); } // jtc->buffer_stream_iterator_->~BlockStreamTraverseIterator(); if (jtc->buffer_stream_iterator_ != NULL) { delete jtc->buffer_stream_iterator_; jtc->buffer_stream_iterator_ = NULL; } if (NULL != (buffer_block = jtc->buffer_iterator_.nextBlock())) { jtc->buffer_stream_iterator_ = buffer_block->createIterator(); } else { break; } } jtc->buffer_iterator_.ResetCur(); if (NULL == (buffer_block = jtc->buffer_iterator_.nextBlock())) { LOG(ERROR) << "[NestloopJoin]: this block shouldn't be NULL in nest " "loop join!"; assert( false && "[NestloopJoin]: this block shouldn't be NULL in nest loop join!"); } if (jtc->buffer_stream_iterator_ != NULL) { delete jtc->buffer_stream_iterator_; jtc->buffer_stream_iterator_ = NULL; } jtc->buffer_stream_iterator_ = buffer_block->createIterator(); jtc->block_stream_iterator_->increase_cur_(); } // if buffer is empty, return false directly jtc->buffer_iterator_.ResetCur(); if (NULL == (buffer_block = jtc->buffer_iterator_.nextBlock())) { LOG(WARNING) << "[NestloopJoin]: the buffer is empty in nest loop join!"; // for getting all right child's data jtc->block_for_asking_->setEmpty(); while (state_.child_right_->Next(exec_status, jtc->block_for_asking_)) { jtc->block_for_asking_->setEmpty(); } return false; } if (jtc->buffer_stream_iterator_ != NULL) { delete jtc->buffer_stream_iterator_; jtc->buffer_stream_iterator_ = NULL; } jtc->buffer_stream_iterator_ = buffer_block->createIterator(); // ask block from right child jtc->block_for_asking_->setEmpty(); if (false == state_.child_right_->Next(exec_status, jtc->block_for_asking_)) { if (true == block->Empty()) { LOG(WARNING) << "[NestloopJoin]: [no join result is stored in the " "block after traverse the right child operator]" << std::endl; return false; } else { LOG(INFO) << "[NestloopJoin]: get a new block from right child " << std::endl; return true; } } if (jtc->block_stream_iterator_ != NULL) { delete jtc->block_stream_iterator_; jtc->block_stream_iterator_ = NULL; } jtc->block_stream_iterator_ = jtc->block_for_asking_->createIterator(); } return Next(exec_status, block); }