bool BlockStreamNestLoopJoinIterator::open(const PartitionOffset& partition_offset) { RegisterExpandedThreadToAllBarriers(); // AtomicPushFreeHtBlockStream(BlockStreamBase::createBlock(state_.input_schema_left,state_.block_size_)); // AtomicPushFreeBlockStream(BlockStreamBase::createBlock(state_.input_schema_right,state_.block_size_)); unsigned long long int timer; bool winning_thread=false; if(tryEntryIntoSerializedSection(0))//the first thread of all need to do { ExpanderTracker::getInstance()->addNewStageEndpoint(pthread_self(),LocalStageEndPoint(stage_desc,"nest loop build",0)); winning_thread=true; timer=curtick(); // unsigned output_index=0; // for(unsigned i=0;i<state_.joinIndex_left.size();i++){ // joinIndex_left_to_output[i]=output_index; // output_index++; // } // for(unsigned i=0;i<state_.payload_left.size();i++){ // payload_left_to_output[i]=output_index; // output_index++; // } // for(unsigned i=0;i<state_.payload_right.size();i++){ // payload_right_to_output[i]=output_index; // output_index++; // } blockbuffer=new DynamicBlockBuffer(); } state_.child_left->open(partition_offset); barrierArrive(0); join_thread_context* jtc=new join_thread_context(); createBlockStream(jtc->block_for_asking_); while(state_.child_left->next(jtc->block_for_asking_)) { blockbuffer->atomicAppendNewBlock(jtc->block_for_asking_); createBlockStream(jtc->block_for_asking_); } delete jtc->block_for_asking_; if(ExpanderTracker::getInstance()->isExpandedThreadCallBack(pthread_self())){ unregisterExpandedThreadToAllBarriers(1); return true; } barrierArrive(1);//??ERROR // join_thread_context* jtc=new join_thread_context(); jtc->block_for_asking_=BlockStreamBase::createBlock(state_.input_schema_right,state_.block_size_); jtc->block_stream_iterator_=jtc->block_for_asking_->createIterator(); initContext(jtc); state_.child_right->open(partition_offset); return true; }
bool PhysicalProjectionScan::Open(SegmentExecStatus* const exec_status, const PartitionOffset& kPartitionOffset) { RETURN_IF_CANCELLED(exec_status); RegisterExpandedThreadToAllBarriers(); if (TryEntryIntoSerializedSection()) { /* this is the first expanded thread*/ PartitionStorage* partition_handle_; if (NULL == (partition_handle_ = BlockManager::getInstance()->GetPartitionHandle( PartitionID(state_.projection_id_, kPartitionOffset)))) { LOG(ERROR) << PartitionID(state_.projection_id_, kPartitionOffset) .getName() .c_str() << CStrError(rNoPartitionIdScan) << std::endl; SetReturnStatus(false); } else { partition_reader_iterator_ = partition_handle_->CreateAtomicReaderIterator(); SetReturnStatus(true); } #ifdef AVOID_CONTENTION_IN_SCAN unsigned long long start = curtick(); ChunkReaderIterator* chunk_reader_it; ChunkReaderIterator::block_accessor* ba; while (chunk_reader_it = partition_reader_iterator_->NextChunk()) { while (chunk_reader_it->GetNextBlockAccessor(ba)) { ba->GetBlockSize(); input_dataset_.input_data_blocks_.push_back(ba); } } #endif ExpanderTracker::getInstance()->addNewStageEndpoint( pthread_self(), LocalStageEndPoint(stage_src, "Scan", 0)); perf_info_ = ExpanderTracker::getInstance()->getPerformanceInfo(pthread_self()); perf_info_->initialize(); } BarrierArrive(); return GetReturnStatus(); }
bool ExpandableBlockStreamExchangeEpoll::open(const PartitionOffset& partition_offset) { unsigned long long int start = curtick(); RegisterExpandedThreadToAllBarriers(); if (tryEntryIntoSerializedSection()) { debug_winner_thread++; nexhausted_lowers=0; this->partition_offset=partition_offset; nlowers=state.lower_id_list_.size(); for (unsigned i = 0; i < nlowers; i++) { debug_received_block[i] = 0; } socket_fd_lower_list = new int[nlowers]; //init -1 ---Yu for (int i = 0; i < nlowers; ++i) { socket_fd_lower_list[i] = -1; } buffer=new BlockStreamBuffer(state.block_size_,BUFFER_SIZE_IN_EXCHANGE,state.schema_); ExpanderTracker::getInstance()->addNewStageEndpoint(pthread_self(),LocalStageEndPoint(stage_src,"Exchange",buffer)); received_block_stream_=BlockStreamBase::createBlock(state.schema_,state.block_size_); block_for_socket_ = new BlockContainer*[nlowers]; for (unsigned i = 0; i < nlowers; i++) { block_for_socket_[i] = new BlockContainer(received_block_stream_->getSerializedBlockSize()); } if (PrepareTheSocket() == false) return false; if (SetSocketNonBlocking(sock_fd) == false) { return false; } logging_->log("[%ld,%d] Open: nexhausted lowers=%d, nlower=%d", state.exchange_id_, partition_offset, nexhausted_lowers, nlowers); if (RegisterExchange() == false) { logging_->elog("Register Exchange with ID=%d fails!", state.exchange_id_); } if(isMaster()){ /* According to a bug reported by dsc, the master exchangeupper should check whether other * uppers have registered to exchangeTracker. Otherwise, the lower may fail to connect to the * exchangeTracker of some uppers when the lower nodes receive the exchagnelower, as some uppers * have not register the exchange_id to the exchangeTracker. */ logging_->log("[%ld,%d] Synchronizing....", state.exchange_id_, partition_offset); checkOtherUpperRegistered(); logging_->log("[%ld,%d] Synchronized!", state.exchange_id_, partition_offset); logging_->log("[%ld,%d] This exchange is the master one, serialize the iterator subtree to the children...", state.exchange_id_, partition_offset); if (SerializeAndSendToMulti() == false) return false; } if (CreateReceiverThread() == false) { return false; } createPerformanceInfo(); } /* A synchronization barrier, in case of multiple expanded threads*/ barrierArrive(); return true; }
/** * build a hash table first, which stores the tuple needed to be deleted in a *hash manner and accelerate the probe phase * */ bool PhysicalDeleteFilter::Open(SegmentExecStatus* const exec_status, const PartitionOffset& partition_offset) { #ifdef TIME startTimer(&timer); #endif RETURN_IF_CANCELLED(exec_status); RegisterExpandedThreadToAllBarriers(); int ret = rSuccess; int64_t timer; bool winning_thread = false; if (TryEntryIntoSerializedSection(0)) { winning_thread = true; ExpanderTracker::getInstance()->addNewStageEndpoint( pthread_self(), LocalStageEndPoint(stage_desc, "delete filter build", 0)); unsigned output_index = 0; for (unsigned i = 0; i < state_.filter_key_deleted_.size(); i++) { joinIndex_table_to_output_[i] = output_index; output_index++; } for (unsigned i = 0; i < state_.payload_base_.size(); i++) { payload_table_to_output_[i] = output_index; output_index++; } // start to create the hash table, including the used hash function, hash // table structure hash_ = PartitionFunctionFactory::createBoostHashFunction( state_.hashtable_bucket_num_); int64_t hash_table_build = curtick(); hashtable_ = new BasicHashTable( state_.hashtable_bucket_num_, state_.hashtable_bucket_size_, state_.input_schema_left_->getTupleMaxSize()); if (NULL == hashtable_) { return ret = rMemoryAllocationFailed; LOG(ERROR) << "hashtable allocation failed" << "[" << rMemoryAllocationFailed << "]" << endl; } #ifdef _DEBUG_ consumed_tuples_from_left = 0; #endif // start to create the join expression, based on which it is able to the // probe the deleted tuples // QNode* expr = createEqualJoinExpression( // state_.hashtable_schema_, state_.input_schema_right_, // state_.filter_key_deleted_, state_.filter_key_base_); // if (NULL == expr) { // ret = rSuccess; // LOG(ERROR) << "The generation of the enqual join expression for // delete " // "filter is failed" << endl; // } // ticks start = curtick(); // // // start to generate the dedicated function, based on which the probe // is // // eventually acted, including using llvm and the function pointer // if (Config::enable_codegen) { // eftt_ = getExprFuncTwoTuples(expr, state_.hashtable_schema_, // state_.input_schema_right_); // memcpy_ = getMemcpy(state_.hashtable_schema_->getTupleMaxSize()); // memcat_ = getMemcat(state_.hashtable_schema_->getTupleMaxSize(), // state_.input_schema_right_->getTupleMaxSize()); // } // if (eftt_) { // cff_ = PhysicalDeleteFilter::isMatchCodegen; // printf("Codegen(delete filter) succeed(%4.3fms)!\n", // getMilliSecond(start)); // } else { cff_ = PhysicalDeleteFilter::isMatch; // printf("Codegen(delete filter) failed!\n"); // } // delete expr; } /** * For performance concern, the following line should place just after * "RegisterNewThreadToAllBarriers();" * in order to accelerate the open response time. */ LOG(INFO) << "delete filter operator begin to open left child" << endl; state_.child_left_->Open(exec_status, partition_offset); LOG(INFO) << "delete filter operator finished opening left child" << endl; BarrierArrive(0); BasicHashTable::Iterator tmp_it = hashtable_->CreateIterator(); void* cur; void* tuple_in_hashtable; unsigned bn; void* key_in_input; void* key_in_hashtable; void* value_in_input; void* value_in_hashtable; // create the context for the multi-thread to build the hash table DeleteFilterThreadContext* dftc = CreateOrReuseContext(crm_numa_sensitive); const Schema* input_schema = state_.input_schema_left_->duplicateSchema(); // we used the filter_key_deleted_[0] here, because the data is partitioned // based on the first column in the join index const Operate* op = input_schema->getcolumn(state_.filter_key_deleted_[0]) .operate->duplicateOperator(); const unsigned buckets = state_.hashtable_bucket_num_; int64_t start = curtick(); int64_t processed_tuple_count = 0; LOG(INFO) << "delete filter operator begin to call left child's next()" << endl; RETURN_IF_CANCELLED(exec_status); while (state_.child_left_->Next(exec_status, dftc->l_block_for_asking_)) { RETURN_IF_CANCELLED(exec_status); delete dftc->l_block_stream_iterator_; dftc->l_block_stream_iterator_ = dftc->l_block_for_asking_->createIterator(); while (cur = dftc->l_block_stream_iterator_->nextTuple()) { #ifdef _DEBUG_ processed_tuple_count++; lock_.acquire(); consumed_tuples_from_left++; lock_.release(); #endif const void* key_addr = input_schema->getColumnAddess(state_.filter_key_deleted_[0], cur); bn = op->getPartitionValue(key_addr, buckets); tuple_in_hashtable = hashtable_->atomicAllocate(bn); if (memcpy_) memcpy_(tuple_in_hashtable, cur); else input_schema->copyTuple(cur, tuple_in_hashtable); } dftc->l_block_for_asking_->setEmpty(); } // printf("%d cycles per // tuple!\n",(curtick()-start)/processed_tuple_count); unsigned tmp = 0; #ifdef _DEBUG_ tuples_in_hashtable = 0; produced_tuples = 0; consumed_tuples_from_right = 0; #endif if (ExpanderTracker::getInstance()->isExpandedThreadCallBack( pthread_self())) { UnregisterExpandedThreadToAllBarriers(1); // printf("<<<<<<<<<<<<<<<<<Join open detected call back // signal!>>>>>>>>>>>>>>>>>\n"); return true; } BarrierArrive(1); // if(winning_thread){ //// hashtable->report_status(); //// printf("Hash Table Build time: %4.4f\n",getMilliSecond(timer)); // } // hashtable->report_status(); // printf("join open consume %d tuples\n",consumed_tuples_from_left); RETURN_IF_CANCELLED(exec_status); state_.child_right_->Open(exec_status, partition_offset); RETURN_IF_CANCELLED(exec_status); LOG(INFO) << "delete filter operator finished opening right child" << endl; return true; }
/** * note the serialized block's size is different from others, it has tail info. * exchange merger is at the end of one segment of plan, so it's the "stage_src" * for this stage */ bool ExchangeMerger::Open(const PartitionOffset& partition_offset) { unsigned long long int start = curtick(); RegisterExpandedThreadToAllBarriers(); if (TryEntryIntoSerializedSection()) { // first arrived thread dose exhausted_lowers = 0; this->partition_offset_ = partition_offset; lower_num_ = state_.lower_id_list_.size(); socket_fd_lower_list_ = new int[lower_num_]; for (int i = 0; i < lower_num_; ++i) { socket_fd_lower_list_[i] = -1; } // buffer all deserialized blocks come from every socket all_merged_block_buffer_ = new BlockStreamBuffer( state_.block_size_, BUFFER_SIZE_IN_EXCHANGE, state_.schema_); ExpanderTracker::getInstance()->addNewStageEndpoint( pthread_self(), LocalStageEndPoint(stage_src, "Exchange", all_merged_block_buffer_)); // if one of block_for_socket is full, it will be deserialized into // block_for_deserialization and sended to all_merged_data_buffer block_for_deserialization = BlockStreamBase::createBlock(state_.schema_, state_.block_size_); // store block for each socket and the received block is serialized. block_for_socket_ = new BlockContainer* [lower_num_]; for (unsigned i = 0; i < lower_num_; ++i) { block_for_socket_[i] = new BlockContainer( block_for_deserialization->getSerializedBlockSize()); } if (PrepareSocket() == false) return false; if (SetSocketNonBlocking(sock_fd_) == false) { return false; } LOG(INFO) << "exchange_id = " << state_.exchange_id_ << " partition_offset = " << partition_offset << " Open: exhausted lower senders num = " << exhausted_lowers << " lower sender num = " << lower_num_ << std::endl; if (RegisterExchange() == false) { LOG(ERROR) << "Register Exchange with ID = " << state_.exchange_id_ << " fails!" << std::endl; } if (IsMaster()) { /* According to a bug reported by dsc, the master exchange upper should * check whether other uppers have registered to exchangeTracker. * Otherwise, the lower may fail to connect to the exchangeTracker of some * uppers when the lower nodes receive the exchange lower, as some uppers * have not register the exchange_id to the exchangeTracker. */ LOG(INFO) << " exchange_id = " << state_.exchange_id_ << " partition_offset = " << partition_offset << "Synchronizing...." << std::endl; IsOtherMergersRegistered(); LOG(INFO) << " exchange_id = " << state_.exchange_id_ << " partition_offset = " << partition_offset << " Synchronized! Then serialize and send its next segment " "plan to all its lower senders" << std::endl; if (SerializeAndSendPlan() == false) return false; } if (CreateReceiverThread() == false) { return false; } CreatePerformanceInfo(); } /// A synchronization barrier, in case of multiple expanded threads BarrierArrive(); return true; }
/** * @brief Method description : describe the open method which gets results from * the left child and copy them into its local buffer, say the block buffer. the * block buffer is a dynamic block buffer since all the expanded threads will * share the same block buffer. */ bool PhysicalNestLoopJoin::Open(SegmentExecStatus *const exec_status, const PartitionOffset &partition_offset) { RETURN_IF_CANCELLED(exec_status); RegisterExpandedThreadToAllBarriers(); unsigned long long int timer; bool winning_thread = false; if (TryEntryIntoSerializedSection(0)) { // the first thread of all need to do ExpanderTracker::getInstance()->addNewStageEndpoint( pthread_self(), LocalStageEndPoint(stage_desc, "nest loop", 0)); winning_thread = true; timer = curtick(); block_buffer_ = new DynamicBlockBuffer(); if (state_.join_condi_.size() == 0) { join_condi_process_ = WithoutJoinCondi; } else { join_condi_process_ = WithJoinCondi; } LOG(INFO) << "[NestloopJoin]: [the first thread opens the nestloopJoin " "physical operator]" << std::endl; } RETURN_IF_CANCELLED(exec_status); state_.child_left_->Open(exec_status, partition_offset); RETURN_IF_CANCELLED(exec_status); BarrierArrive(0); NestLoopJoinContext *jtc = CreateOrReuseContext(crm_numa_sensitive); // create a new block to hold the results from the left child // and add results to the dynamic buffer // jtc->block_for_asking_ == BlockStreamBase::createBlock( // state_.input_schema_left_, // state_.block_size_); CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_); // auto temp = jtc->block_for_asking_->getBlock(); // cout << "temp start" << temp << endl; // // cout << "init block_for_asking_ : " << jtc->block_for_asking_->getBlock() // << " is reference : " << jtc->block_for_asking_->isIsReference() << // endl; while (state_.child_left_->Next(exec_status, jtc->block_for_asking_)) { if (exec_status->is_cancelled()) { if (NULL != jtc->block_for_asking_) { delete jtc->block_for_asking_; jtc->block_for_asking_ = NULL; } return false; } // cout << "after assgin start :" << jtc->block_for_asking_->getBlock() // << " is reference : " << jtc->block_for_asking_->isIsReference() // << endl; block_buffer_->atomicAppendNewBlock(jtc->block_for_asking_); // if (!jtc->block_for_asking_->isIsReference()) { CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_); // } else { // // cout << "temp after" << temp << endl; // // delete temp; // CreateBlockStream(jtc->block_for_asking_, // state_.input_schema_left_); // jtc->block_for_asking_->setIsReference(false); // } // cout << "new start :" << jtc->block_for_asking_->getBlock() // << " is reference : " << jtc->block_for_asking_->isIsReference() // << endl; } // cout << "buffer_size_ : " << block_buffer_->GetBufferSize() << endl; // the last block is created without storing the results from the left // child if (NULL != jtc->block_for_asking_) { delete jtc->block_for_asking_; jtc->block_for_asking_ = NULL; } // when the finished expanded thread finished its allocated work, it can be // called back here. What should be noticed that the callback meas the to // exit on the of the thread if (ExpanderTracker::getInstance()->isExpandedThreadCallBack( pthread_self())) { UnregisterExpandedThreadToAllBarriers(1); LOG(INFO) << "[NestloopJoin]: [the" << pthread_self() << "the thread is called to exit]" << std::endl; return true; // the } BarrierArrive(1); // ??ERROR // join_thread_context* jtc=new join_thread_context(); // jtc->block_for_asking_ == BlockStreamBase::createBlock( // state_.input_schema_right_, // state_.block_size_); CreateBlockStream(jtc->block_for_asking_, state_.input_schema_right_); jtc->block_for_asking_->setEmpty(); jtc->block_stream_iterator_ = jtc->block_for_asking_->createIterator(); jtc->buffer_iterator_ = block_buffer_->createIterator(); // underlying bug: as for buffer_iterator may be NULL, it's necessary to let // every buffer_iterator of each thread point to an empty block // jtc->buffer_stream_iterator_ = // jtc->buffer_iterator_.nextBlock()->createIterator(); InitContext(jtc); // rename this function, here means to store the thread // context in the operator context RETURN_IF_CANCELLED(exec_status); state_.child_right_->Open(exec_status, partition_offset); return true; }