bool PhysicalSort::Open(const PartitionOffset &part_off) { /** * TODO(anyone): multi threads can be used to pipeline!!! */ swap_num_ = 0; temp_cur_ = 0; /** * first we can store all the data which will be bufferred * 1, buffer is the first phase. multi-threads will be applyed to the data * in the buffer. * 2, sort the data in the buffer, we choose quicksort to sort the records * by specifying the column to be sorted * 3, whether to register the buffer into the blockmanager. * */ BlockStreamBase *block_for_asking; state_.partition_offset_ = part_off; state_.child_->Open(state_.partition_offset_); if (sema_open_.try_wait()) { block_buffer_iterator_ = block_buffer_.createIterator(); open_finished_ = true; } else { while (!open_finished_) { usleep(1); } } if (CreateBlockStream(block_for_asking) == false) { LOG(ERROR) << "error in the create block stream!!!" << endl; return 0; } /** * phase 1: store the data in the buffer! * by using multi-threads to speed up */ unsigned block_offset = 0; unsigned tuple_count_sum = 0; BlockStreamBase::BlockStreamTraverseIterator *iterator_for_scan; while (state_.child_->Next(block_for_asking)) { tuple_count_sum += block_for_asking->getTuplesInBlock(); block_buffer_.atomicAppendNewBlock(block_for_asking); iterator_for_scan = block_buffer_.getBlock(block_offset)->createIterator(); void *tuple_ptr = 0; while ((tuple_ptr = iterator_for_scan->nextTuple()) != 0) { tuple_vector_.push_back(tuple_ptr); } block_offset++; if (CreateBlockStream(block_for_asking) == false) { LOG(ERROR) << "error in the create block stream!!!" << endl; return 0; } } /** * phase 2: sort the data in the buffer! * by using multi-threads to speed up? * TODO(anyone): whether to store the sorted data into the blockmanager. */ // cout<<"check the memory usage!!!"<<endl; unsigned long long int time = curtick(); // order(state_.orderbyKey_,tuple_count_sum); Order(); // cout<<"the tuple_count is: "<<tuple_count_sum<<"Total time: // "<<getSecond(time)<<" seconds, the swap num is: "<<swap_num<<endl; return true; }
/** * @brief Method description : describe the open method which gets results from * the left child and copy them into its local buffer, say the block buffer. the * block buffer is a dynamic block buffer since all the expanded threads will * share the same block buffer. */ bool PhysicalNestLoopJoin::Open(SegmentExecStatus *const exec_status, const PartitionOffset &partition_offset) { RETURN_IF_CANCELLED(exec_status); RegisterExpandedThreadToAllBarriers(); unsigned long long int timer; bool winning_thread = false; if (TryEntryIntoSerializedSection(0)) { // the first thread of all need to do ExpanderTracker::getInstance()->addNewStageEndpoint( pthread_self(), LocalStageEndPoint(stage_desc, "nest loop", 0)); winning_thread = true; timer = curtick(); block_buffer_ = new DynamicBlockBuffer(); if (state_.join_condi_.size() == 0) { join_condi_process_ = WithoutJoinCondi; } else { join_condi_process_ = WithJoinCondi; } LOG(INFO) << "[NestloopJoin]: [the first thread opens the nestloopJoin " "physical operator]" << std::endl; } RETURN_IF_CANCELLED(exec_status); state_.child_left_->Open(exec_status, partition_offset); RETURN_IF_CANCELLED(exec_status); BarrierArrive(0); NestLoopJoinContext *jtc = CreateOrReuseContext(crm_numa_sensitive); // create a new block to hold the results from the left child // and add results to the dynamic buffer // jtc->block_for_asking_ == BlockStreamBase::createBlock( // state_.input_schema_left_, // state_.block_size_); CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_); // auto temp = jtc->block_for_asking_->getBlock(); // cout << "temp start" << temp << endl; // // cout << "init block_for_asking_ : " << jtc->block_for_asking_->getBlock() // << " is reference : " << jtc->block_for_asking_->isIsReference() << // endl; while (state_.child_left_->Next(exec_status, jtc->block_for_asking_)) { if (exec_status->is_cancelled()) { if (NULL != jtc->block_for_asking_) { delete jtc->block_for_asking_; jtc->block_for_asking_ = NULL; } return false; } // cout << "after assgin start :" << jtc->block_for_asking_->getBlock() // << " is reference : " << jtc->block_for_asking_->isIsReference() // << endl; block_buffer_->atomicAppendNewBlock(jtc->block_for_asking_); // if (!jtc->block_for_asking_->isIsReference()) { CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_); // } else { // // cout << "temp after" << temp << endl; // // delete temp; // CreateBlockStream(jtc->block_for_asking_, // state_.input_schema_left_); // jtc->block_for_asking_->setIsReference(false); // } // cout << "new start :" << jtc->block_for_asking_->getBlock() // << " is reference : " << jtc->block_for_asking_->isIsReference() // << endl; } // cout << "buffer_size_ : " << block_buffer_->GetBufferSize() << endl; // the last block is created without storing the results from the left // child if (NULL != jtc->block_for_asking_) { delete jtc->block_for_asking_; jtc->block_for_asking_ = NULL; } // when the finished expanded thread finished its allocated work, it can be // called back here. What should be noticed that the callback meas the to // exit on the of the thread if (ExpanderTracker::getInstance()->isExpandedThreadCallBack( pthread_self())) { UnregisterExpandedThreadToAllBarriers(1); LOG(INFO) << "[NestloopJoin]: [the" << pthread_self() << "the thread is called to exit]" << std::endl; return true; // the } BarrierArrive(1); // ??ERROR // join_thread_context* jtc=new join_thread_context(); // jtc->block_for_asking_ == BlockStreamBase::createBlock( // state_.input_schema_right_, // state_.block_size_); CreateBlockStream(jtc->block_for_asking_, state_.input_schema_right_); jtc->block_for_asking_->setEmpty(); jtc->block_stream_iterator_ = jtc->block_for_asking_->createIterator(); jtc->buffer_iterator_ = block_buffer_->createIterator(); // underlying bug: as for buffer_iterator may be NULL, it's necessary to let // every buffer_iterator of each thread point to an empty block // jtc->buffer_stream_iterator_ = // jtc->buffer_iterator_.nextBlock()->createIterator(); InitContext(jtc); // rename this function, here means to store the thread // context in the operator context RETURN_IF_CANCELLED(exec_status); state_.child_right_->Open(exec_status, partition_offset); return true; }