void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() { #ifndef CPU_ONLY cudaStream_t stream; if (Caffe::mode() == Caffe::GPU) { CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); } #endif try { while (!must_stop()) { Batch<Dtype>* batch = prefetch_free_.pop(); load_batch(batch); #ifndef CPU_ONLY if (Caffe::mode() == Caffe::GPU) { batch->data_.data().get()->async_gpu_push(stream); CUDA_CHECK(cudaStreamSynchronize(stream)); } #endif prefetch_full_.push(batch); } } catch (boost::thread_interrupted&) { // Interrupted exception is expected on shutdown } #ifndef CPU_ONLY if (Caffe::mode() == Caffe::GPU) { CUDA_CHECK(cudaStreamDestroy(stream)); } #endif }
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() { #ifndef CPU_ONLY cudaStream_t stream;//创建CUDA stream,非阻塞类型 if (Caffe::mode() == Caffe::GPU) { CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking)); } #endif try { while (!must_stop()) { //循环载入批量数据 Batch<Dtype>* batch = prefetch_free_.pop();//拿到一个空闲batch load_batch(batch);//载入批量数据 #ifndef CPU_ONLY if (Caffe::mode() == Caffe::GPU) { batch->data_.data().get()->async_gpu_push(stream); if (this->output_labels_) { batch->label_.data().get()->async_gpu_push(stream); } CUDA_CHECK(cudaStreamSynchronize(stream));//同步到GPU } #endif prefetch_full_.push(batch);//加入到带负载的Batch队列中 } } catch (boost::thread_interrupted&) {//捕获异常,退出while循环 // Interrupted exception is expected on shutdown } #ifndef CPU_ONLY if (Caffe::mode() == Caffe::GPU) { CUDA_CHECK(cudaStreamDestroy(stream));//销毁CUDA stream } #endif }
void BasePrefetchingDataLayer<Ftype, Btype>::InternalThreadEntryN(size_t thread_id) { #ifndef CPU_ONLY const bool use_gpu_transform = this->is_gpu_transform(); #endif static thread_local bool iter0 = this->phase_ == TRAIN; if (iter0 && this->net_inititialized_flag_ != nullptr) { this->net_inititialized_flag_->wait(); } else { // nothing to wait -> initialize and start pumping std::lock_guard<std::mutex> lock(mutex_in_); InitializePrefetch(); start_reading(); iter0 = false; } try { while (!must_stop(thread_id)) { const size_t qid = this->queue_id(thread_id); #ifndef CPU_ONLY shared_ptr<Batch<Ftype>> batch = prefetches_free_[qid]->pop(); CHECK_EQ((size_t) -1, batch->id()); load_batch(batch.get(), thread_id, qid); if (Caffe::mode() == Caffe::GPU) { if (!use_gpu_transform) { batch->data_.async_gpu_push(); } if (this->output_labels_) { batch->label_.async_gpu_push(); } CUDA_CHECK(cudaStreamSynchronize(Caffe::th_stream_aux(Caffe::STREAM_ID_ASYNC_PUSH))); } prefetches_full_[qid]->push(batch); #else shared_ptr<Batch<Ftype>> batch = prefetches_free_[qid]->pop(); load_batch(batch.get(), thread_id, qid); prefetches_full_[qid]->push(batch); #endif if (iter0) { if (this->net_iteration0_flag_ != nullptr) { this->net_iteration0_flag_->wait(); } std::lock_guard<std::mutex> lock(mutex_out_); if (this->net_inititialized_flag_ != nullptr) { this->net_inititialized_flag_ = nullptr; // no wait on the second round InitializePrefetch(); start_reading(); } if (this->auto_mode_) { break; } // manual otherwise, thus keep rolling iter0 = false; } } } catch (boost::thread_interrupted&) { } }
void TransformingFastHDF5InputLayer<Dtype>::InternalThreadEntry() { try { while (!must_stop()) { Batch* batch = prefetch_free_.pop(); load_batch(batch); prefetch_full_.push(batch); } } catch (boost::thread_interrupted&) { // Interrupted exception is expected on shutdown } }
/** * Polling for events on a inner thread allows processing of management messages * like buffer connection immediately, even if the user is not polling. * Otherwise buffer constructors would block indefinitely. * * Deep learning workloads are about sending small numbers of large messages, * in which case this model works great. If the library was to be used to * exchange large numbers of short messages, it would be useful to split * management and data messages over two different queue pairs. User threads * could then wait or poll on the data queue pair directly. */ void RDMAAdapter::InternalThreadEntry() { while (!must_stop()) { ibv_cq* cq; void* cq_context; CHECK(!ibv_get_cq_event(channel_, &cq, &cq_context)); CHECK(cq == cq_); ibv_ack_cq_events(cq, 1); CHECK(!ibv_req_notify_cq(cq_, 0)); int ne = ibv_poll_cq(cq_, MAX_CONCURRENT_WRITES * 2, static_cast<ibv_wc*>(wc_)); CHECK_GE(ne, 0); for (int i = 0; i < ne; ++i) { CHECK(wc_[i].status == IBV_WC_SUCCESS) << "Failed status \n" << ibv_wc_status_str(wc_[i].status) << " " << wc_[i].status << " " << static_cast<int>(wc_[i].wr_id) << " "<< wc_[i].vendor_err; if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) { // Data message, add it to user received queue RDMAChannel* channel = reinterpret_cast<RDMAChannel*>(wc_[i].wr_id); channel->recv(); int id = wc_[i].imm_data; if (id >= CTRL_ID_OFFSET) { // ctrl signal ctrl_received_.push(channel->buffers_[id - CTRL_ID_OFFSET]); } else { // data received_.push(channel->buffers_[id]); } } else { if (wc_[i].opcode & IBV_WC_RECV) { // Buffer connection message RDMAChannel* channel = reinterpret_cast<RDMAChannel*>(wc_[i].wr_id); int id = wc_[i].imm_data; channel->memory_regions_queue_.push(channel->memory_regions_[id]); CHECK(id == channel->memory_regions_received_++); CHECK(!ibv_dereg_mr(channel->region_regions_[id])); } } } } }
void DataReader::interfaceKernel(){ try{while (!must_stop()) read_one(); } catch (boost::thread_interrupted&) {} }