bool PhysicalProjectionScan::Open(SegmentExecStatus* const exec_status, const PartitionOffset& kPartitionOffset) { RETURN_IF_CANCELLED(exec_status); RegisterExpandedThreadToAllBarriers(); if (TryEntryIntoSerializedSection()) { /* this is the first expanded thread*/ PartitionStorage* partition_handle_; if (NULL == (partition_handle_ = BlockManager::getInstance()->GetPartitionHandle( PartitionID(state_.projection_id_, kPartitionOffset)))) { LOG(ERROR) << PartitionID(state_.projection_id_, kPartitionOffset) .getName() .c_str() << CStrError(rNoPartitionIdScan) << std::endl; SetReturnStatus(false); } else { partition_reader_iterator_ = partition_handle_->CreateAtomicReaderIterator(); SetReturnStatus(true); } #ifdef AVOID_CONTENTION_IN_SCAN unsigned long long start = curtick(); ChunkReaderIterator* chunk_reader_it; ChunkReaderIterator::block_accessor* ba; while (chunk_reader_it = partition_reader_iterator_->NextChunk()) { while (chunk_reader_it->GetNextBlockAccessor(ba)) { ba->GetBlockSize(); input_dataset_.input_data_blocks_.push_back(ba); } } #endif ExpanderTracker::getInstance()->addNewStageEndpoint( pthread_self(), LocalStageEndPoint(stage_src, "Scan", 0)); perf_info_ = ExpanderTracker::getInstance()->getPerformanceInfo(pthread_self()); perf_info_->initialize(); } BarrierArrive(); return GetReturnStatus(); }
bool IndexScanIterator::Open(const PartitionOffset& partition_off) { AtomicPushBlockStream(BlockStreamBase::createBlockWithDesirableSerilaizedSize(state_.schema_, state_.block_size_)); if(TryEntryIntoSerializedSection()){ /* this is the first expanded thread*/ csb_index_list_ = IndexManager::getInstance()->getAttrIndex(state_.index_id_); PartitionStorage* partition_handle_; if((partition_handle_=BlockManager::getInstance()->getPartitionHandle(PartitionID(state_.projection_id_,partition_off)))==0){ printf("The partition[%s] does not exists!\n",PartitionID(state_.projection_id_,partition_off).getName().c_str()); SetReturnStatus(false); } else{ partition_reader_iterator_=partition_handle_->createAtomicReaderIterator(); // chunk_reader_iterator_ = partition_reader_iterator_->nextChunk(); } SetReturnStatus(true); } BarrierArrive(); return GetReturnStatus(); }
/** * build a hash table first, which stores the tuple needed to be deleted in a *hash manner and accelerate the probe phase * */ bool PhysicalDeleteFilter::Open(SegmentExecStatus* const exec_status, const PartitionOffset& partition_offset) { #ifdef TIME startTimer(&timer); #endif RETURN_IF_CANCELLED(exec_status); RegisterExpandedThreadToAllBarriers(); int ret = rSuccess; int64_t timer; bool winning_thread = false; if (TryEntryIntoSerializedSection(0)) { winning_thread = true; ExpanderTracker::getInstance()->addNewStageEndpoint( pthread_self(), LocalStageEndPoint(stage_desc, "delete filter build", 0)); unsigned output_index = 0; for (unsigned i = 0; i < state_.filter_key_deleted_.size(); i++) { joinIndex_table_to_output_[i] = output_index; output_index++; } for (unsigned i = 0; i < state_.payload_base_.size(); i++) { payload_table_to_output_[i] = output_index; output_index++; } // start to create the hash table, including the used hash function, hash // table structure hash_ = PartitionFunctionFactory::createBoostHashFunction( state_.hashtable_bucket_num_); int64_t hash_table_build = curtick(); hashtable_ = new BasicHashTable( state_.hashtable_bucket_num_, state_.hashtable_bucket_size_, state_.input_schema_left_->getTupleMaxSize()); if (NULL == hashtable_) { return ret = rMemoryAllocationFailed; LOG(ERROR) << "hashtable allocation failed" << "[" << rMemoryAllocationFailed << "]" << endl; } #ifdef _DEBUG_ consumed_tuples_from_left = 0; #endif // start to create the join expression, based on which it is able to the // probe the deleted tuples // QNode* expr = createEqualJoinExpression( // state_.hashtable_schema_, state_.input_schema_right_, // state_.filter_key_deleted_, state_.filter_key_base_); // if (NULL == expr) { // ret = rSuccess; // LOG(ERROR) << "The generation of the enqual join expression for // delete " // "filter is failed" << endl; // } // ticks start = curtick(); // // // start to generate the dedicated function, based on which the probe // is // // eventually acted, including using llvm and the function pointer // if (Config::enable_codegen) { // eftt_ = getExprFuncTwoTuples(expr, state_.hashtable_schema_, // state_.input_schema_right_); // memcpy_ = getMemcpy(state_.hashtable_schema_->getTupleMaxSize()); // memcat_ = getMemcat(state_.hashtable_schema_->getTupleMaxSize(), // state_.input_schema_right_->getTupleMaxSize()); // } // if (eftt_) { // cff_ = PhysicalDeleteFilter::isMatchCodegen; // printf("Codegen(delete filter) succeed(%4.3fms)!\n", // getMilliSecond(start)); // } else { cff_ = PhysicalDeleteFilter::isMatch; // printf("Codegen(delete filter) failed!\n"); // } // delete expr; } /** * For performance concern, the following line should place just after * "RegisterNewThreadToAllBarriers();" * in order to accelerate the open response time. */ LOG(INFO) << "delete filter operator begin to open left child" << endl; state_.child_left_->Open(exec_status, partition_offset); LOG(INFO) << "delete filter operator finished opening left child" << endl; BarrierArrive(0); BasicHashTable::Iterator tmp_it = hashtable_->CreateIterator(); void* cur; void* tuple_in_hashtable; unsigned bn; void* key_in_input; void* key_in_hashtable; void* value_in_input; void* value_in_hashtable; // create the context for the multi-thread to build the hash table DeleteFilterThreadContext* dftc = CreateOrReuseContext(crm_numa_sensitive); const Schema* input_schema = state_.input_schema_left_->duplicateSchema(); // we used the filter_key_deleted_[0] here, because the data is partitioned // based on the first column in the join index const Operate* op = input_schema->getcolumn(state_.filter_key_deleted_[0]) .operate->duplicateOperator(); const unsigned buckets = state_.hashtable_bucket_num_; int64_t start = curtick(); int64_t processed_tuple_count = 0; LOG(INFO) << "delete filter operator begin to call left child's next()" << endl; RETURN_IF_CANCELLED(exec_status); while (state_.child_left_->Next(exec_status, dftc->l_block_for_asking_)) { RETURN_IF_CANCELLED(exec_status); delete dftc->l_block_stream_iterator_; dftc->l_block_stream_iterator_ = dftc->l_block_for_asking_->createIterator(); while (cur = dftc->l_block_stream_iterator_->nextTuple()) { #ifdef _DEBUG_ processed_tuple_count++; lock_.acquire(); consumed_tuples_from_left++; lock_.release(); #endif const void* key_addr = input_schema->getColumnAddess(state_.filter_key_deleted_[0], cur); bn = op->getPartitionValue(key_addr, buckets); tuple_in_hashtable = hashtable_->atomicAllocate(bn); if (memcpy_) memcpy_(tuple_in_hashtable, cur); else input_schema->copyTuple(cur, tuple_in_hashtable); } dftc->l_block_for_asking_->setEmpty(); } // printf("%d cycles per // tuple!\n",(curtick()-start)/processed_tuple_count); unsigned tmp = 0; #ifdef _DEBUG_ tuples_in_hashtable = 0; produced_tuples = 0; consumed_tuples_from_right = 0; #endif if (ExpanderTracker::getInstance()->isExpandedThreadCallBack( pthread_self())) { UnregisterExpandedThreadToAllBarriers(1); // printf("<<<<<<<<<<<<<<<<<Join open detected call back // signal!>>>>>>>>>>>>>>>>>\n"); return true; } BarrierArrive(1); // if(winning_thread){ //// hashtable->report_status(); //// printf("Hash Table Build time: %4.4f\n",getMilliSecond(timer)); // } // hashtable->report_status(); // printf("join open consume %d tuples\n",consumed_tuples_from_left); RETURN_IF_CANCELLED(exec_status); state_.child_right_->Open(exec_status, partition_offset); RETURN_IF_CANCELLED(exec_status); LOG(INFO) << "delete filter operator finished opening right child" << endl; return true; }
/** * note the serialized block's size is different from others, it has tail info. * exchange merger is at the end of one segment of plan, so it's the "stage_src" * for this stage */ bool ExchangeMerger::Open(const PartitionOffset& partition_offset) { unsigned long long int start = curtick(); RegisterExpandedThreadToAllBarriers(); if (TryEntryIntoSerializedSection()) { // first arrived thread dose exhausted_lowers = 0; this->partition_offset_ = partition_offset; lower_num_ = state_.lower_id_list_.size(); socket_fd_lower_list_ = new int[lower_num_]; for (int i = 0; i < lower_num_; ++i) { socket_fd_lower_list_[i] = -1; } // buffer all deserialized blocks come from every socket all_merged_block_buffer_ = new BlockStreamBuffer( state_.block_size_, BUFFER_SIZE_IN_EXCHANGE, state_.schema_); ExpanderTracker::getInstance()->addNewStageEndpoint( pthread_self(), LocalStageEndPoint(stage_src, "Exchange", all_merged_block_buffer_)); // if one of block_for_socket is full, it will be deserialized into // block_for_deserialization and sended to all_merged_data_buffer block_for_deserialization = BlockStreamBase::createBlock(state_.schema_, state_.block_size_); // store block for each socket and the received block is serialized. block_for_socket_ = new BlockContainer* [lower_num_]; for (unsigned i = 0; i < lower_num_; ++i) { block_for_socket_[i] = new BlockContainer( block_for_deserialization->getSerializedBlockSize()); } if (PrepareSocket() == false) return false; if (SetSocketNonBlocking(sock_fd_) == false) { return false; } LOG(INFO) << "exchange_id = " << state_.exchange_id_ << " partition_offset = " << partition_offset << " Open: exhausted lower senders num = " << exhausted_lowers << " lower sender num = " << lower_num_ << std::endl; if (RegisterExchange() == false) { LOG(ERROR) << "Register Exchange with ID = " << state_.exchange_id_ << " fails!" << std::endl; } if (IsMaster()) { /* According to a bug reported by dsc, the master exchange upper should * check whether other uppers have registered to exchangeTracker. * Otherwise, the lower may fail to connect to the exchangeTracker of some * uppers when the lower nodes receive the exchange lower, as some uppers * have not register the exchange_id to the exchangeTracker. */ LOG(INFO) << " exchange_id = " << state_.exchange_id_ << " partition_offset = " << partition_offset << "Synchronizing...." << std::endl; IsOtherMergersRegistered(); LOG(INFO) << " exchange_id = " << state_.exchange_id_ << " partition_offset = " << partition_offset << " Synchronized! Then serialize and send its next segment " "plan to all its lower senders" << std::endl; if (SerializeAndSendPlan() == false) return false; } if (CreateReceiverThread() == false) { return false; } CreatePerformanceInfo(); } /// A synchronization barrier, in case of multiple expanded threads BarrierArrive(); return true; }
/** * first we can store all the data which will be bufferred * 1, buffer is the first phase. multi-threads will be applyed to the data * in the buffer. * 2, sort the data in the buffer, we choose stable_sort() to sort the records * by specifying the column to be sorted * 3, whether to register the buffer into the blockmanager. * */ bool PhysicalSort::Open(SegmentExecStatus *const exec_status, const PartitionOffset &part_off) { RETURN_IF_CANCELLED(exec_status); RegisterExpandedThreadToAllBarriers(); if (TryEntryIntoSerializedSection(0)) { all_cur_ = 0; thread_id_ = -1; all_tuples_.clear(); block_buffer_ = new DynamicBlockBuffer(); } BarrierArrive(0); BlockStreamBase *block_for_asking; if (CreateBlock(block_for_asking) == false) { LOG(ERROR) << "error in the create block stream!!!" << endl; return 0; } // state_.partition_offset_ = part_off; state_.child_->Open(exec_status, part_off); RETURN_IF_CANCELLED(exec_status); /** * phase 1: store the data in the buffer! * by using multi-threads to speed up */ vector<void *> thread_tuple; thread_tuple.clear(); void *tuple_ptr = NULL; BlockStreamBase::BlockStreamTraverseIterator *block_it; while (state_.child_->Next(exec_status, block_for_asking)) { RETURN_IF_CANCELLED(exec_status); block_buffer_->atomicAppendNewBlock(block_for_asking); block_it = block_for_asking->createIterator(); while (NULL != (tuple_ptr = block_it->nextTuple())) { thread_tuple.push_back(tuple_ptr); } if (NULL != block_it) { delete block_it; block_it = NULL; } if (CreateBlock(block_for_asking) == false) { LOG(ERROR) << "error in the create block stream!!!" << endl; return 0; } } if (NULL != block_for_asking) { delete block_for_asking; block_for_asking = NULL; } lock_->acquire(); all_tuples_.insert(all_tuples_.end(), thread_tuple.begin(), thread_tuple.end()); lock_->release(); thread_tuple.clear(); // guarantee the block_buffer get all data blocks completely BarrierArrive(1); // phase 2: sort the data in the buffer, only just one thread! if (TryEntryIntoSerializedSection(1)) { // reverse the order of order_by_attrs for preserve The relative ordering of // equivalent elements reverse(state_.order_by_attrs_.begin(), state_.order_by_attrs_.end()); // one expression for 2 tuples results in overwriting result, so copy the // expression for 2 different tuples calculating state_.order_by_attrs_copy_ = state_.order_by_attrs_; OperFuncInfoData oper_info; fcinfo = &oper_info; state_.compare_funcs_ = new DataTypeOperFunc[state_.order_by_attrs_.size()][2]; for (int i = 0; i < state_.order_by_attrs_.size(); ++i) { state_.order_by_attrs_copy_[i].first = state_.order_by_attrs_[i].first->ExprCopy(); // deep copy state_.order_by_attrs_[i].first->InitExprAtPhysicalPlan(); state_.order_by_attrs_copy_[i].first->InitExprAtPhysicalPlan(); state_.compare_funcs_[i][0] = DataTypeOper::data_type_oper_func_ [state_.order_by_attrs_[i].first->get_type_][OperType::oper_less]; state_.compare_funcs_[i][1] = DataTypeOper::data_type_oper_func_ [state_.order_by_attrs_[i].first->get_type_][OperType::oper_great]; } // int64_t time = curtick(); state_.eecnxt_.schema[0] = state_.input_schema_; state_.eecnxt1_.schema[0] = state_.input_schema_; RETURN_IF_CANCELLED(exec_status); cmp_state_ = &state_; Order(); } BarrierArrive(2); return true; }
/** * @brief Method description : describe the open method which gets results from * the left child and copy them into its local buffer, say the block buffer. the * block buffer is a dynamic block buffer since all the expanded threads will * share the same block buffer. */ bool PhysicalNestLoopJoin::Open(SegmentExecStatus *const exec_status, const PartitionOffset &partition_offset) { RETURN_IF_CANCELLED(exec_status); RegisterExpandedThreadToAllBarriers(); unsigned long long int timer; bool winning_thread = false; if (TryEntryIntoSerializedSection(0)) { // the first thread of all need to do ExpanderTracker::getInstance()->addNewStageEndpoint( pthread_self(), LocalStageEndPoint(stage_desc, "nest loop", 0)); winning_thread = true; timer = curtick(); block_buffer_ = new DynamicBlockBuffer(); if (state_.join_condi_.size() == 0) { join_condi_process_ = WithoutJoinCondi; } else { join_condi_process_ = WithJoinCondi; } LOG(INFO) << "[NestloopJoin]: [the first thread opens the nestloopJoin " "physical operator]" << std::endl; } RETURN_IF_CANCELLED(exec_status); state_.child_left_->Open(exec_status, partition_offset); RETURN_IF_CANCELLED(exec_status); BarrierArrive(0); NestLoopJoinContext *jtc = CreateOrReuseContext(crm_numa_sensitive); // create a new block to hold the results from the left child // and add results to the dynamic buffer // jtc->block_for_asking_ == BlockStreamBase::createBlock( // state_.input_schema_left_, // state_.block_size_); CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_); // auto temp = jtc->block_for_asking_->getBlock(); // cout << "temp start" << temp << endl; // // cout << "init block_for_asking_ : " << jtc->block_for_asking_->getBlock() // << " is reference : " << jtc->block_for_asking_->isIsReference() << // endl; while (state_.child_left_->Next(exec_status, jtc->block_for_asking_)) { if (exec_status->is_cancelled()) { if (NULL != jtc->block_for_asking_) { delete jtc->block_for_asking_; jtc->block_for_asking_ = NULL; } return false; } // cout << "after assgin start :" << jtc->block_for_asking_->getBlock() // << " is reference : " << jtc->block_for_asking_->isIsReference() // << endl; block_buffer_->atomicAppendNewBlock(jtc->block_for_asking_); // if (!jtc->block_for_asking_->isIsReference()) { CreateBlockStream(jtc->block_for_asking_, state_.input_schema_left_); // } else { // // cout << "temp after" << temp << endl; // // delete temp; // CreateBlockStream(jtc->block_for_asking_, // state_.input_schema_left_); // jtc->block_for_asking_->setIsReference(false); // } // cout << "new start :" << jtc->block_for_asking_->getBlock() // << " is reference : " << jtc->block_for_asking_->isIsReference() // << endl; } // cout << "buffer_size_ : " << block_buffer_->GetBufferSize() << endl; // the last block is created without storing the results from the left // child if (NULL != jtc->block_for_asking_) { delete jtc->block_for_asking_; jtc->block_for_asking_ = NULL; } // when the finished expanded thread finished its allocated work, it can be // called back here. What should be noticed that the callback meas the to // exit on the of the thread if (ExpanderTracker::getInstance()->isExpandedThreadCallBack( pthread_self())) { UnregisterExpandedThreadToAllBarriers(1); LOG(INFO) << "[NestloopJoin]: [the" << pthread_self() << "the thread is called to exit]" << std::endl; return true; // the } BarrierArrive(1); // ??ERROR // join_thread_context* jtc=new join_thread_context(); // jtc->block_for_asking_ == BlockStreamBase::createBlock( // state_.input_schema_right_, // state_.block_size_); CreateBlockStream(jtc->block_for_asking_, state_.input_schema_right_); jtc->block_for_asking_->setEmpty(); jtc->block_stream_iterator_ = jtc->block_for_asking_->createIterator(); jtc->buffer_iterator_ = block_buffer_->createIterator(); // underlying bug: as for buffer_iterator may be NULL, it's necessary to let // every buffer_iterator of each thread point to an empty block // jtc->buffer_stream_iterator_ = // jtc->buffer_iterator_.nextBlock()->createIterator(); InitContext(jtc); // rename this function, here means to store the thread // context in the operator context RETURN_IF_CANCELLED(exec_status); state_.child_right_->Open(exec_status, partition_offset); return true; }