Example #1
0
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
#ifndef CPU_ONLY
  cudaStream_t stream;
  if (Caffe::mode() == Caffe::GPU) {
    CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
  }
#endif

  try {
    while (!must_stop()) {
      Batch<Dtype>* batch = prefetch_free_.pop();
      load_batch(batch);
#ifndef CPU_ONLY
      if (Caffe::mode() == Caffe::GPU) {
        batch->data_.data().get()->async_gpu_push(stream);
        CUDA_CHECK(cudaStreamSynchronize(stream));
      }
#endif
      prefetch_full_.push(batch);
    }
  } catch (boost::thread_interrupted&) {
    // Interrupted exception is expected on shutdown
  }
#ifndef CPU_ONLY
  if (Caffe::mode() == Caffe::GPU) {
    CUDA_CHECK(cudaStreamDestroy(stream));
  }
#endif
}
Example #2
0
void BasePrefetchingDataLayer<Dtype>::InternalThreadEntry() {
#ifndef CPU_ONLY
  cudaStream_t stream;//创建CUDA stream,非阻塞类型
  if (Caffe::mode() == Caffe::GPU) {
    CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
  }
#endif

  try {
    while (!must_stop()) { //循环载入批量数据
      Batch<Dtype>* batch = prefetch_free_.pop();//拿到一个空闲batch
      load_batch(batch);//载入批量数据
#ifndef CPU_ONLY
      if (Caffe::mode() == Caffe::GPU) {
        batch->data_.data().get()->async_gpu_push(stream);
        if (this->output_labels_) {
          batch->label_.data().get()->async_gpu_push(stream);
        }
        CUDA_CHECK(cudaStreamSynchronize(stream));//同步到GPU
      }
#endif
      prefetch_full_.push(batch);//加入到带负载的Batch队列中
    }
  } catch (boost::thread_interrupted&) {//捕获异常,退出while循环
    // Interrupted exception is expected on shutdown
  }
#ifndef CPU_ONLY
  if (Caffe::mode() == Caffe::GPU) {
    CUDA_CHECK(cudaStreamDestroy(stream));//销毁CUDA stream
  }
#endif
}
void BasePrefetchingDataLayer<Ftype, Btype>::InternalThreadEntryN(size_t thread_id) {
#ifndef CPU_ONLY
  const bool use_gpu_transform = this->is_gpu_transform();
#endif
  static thread_local bool iter0 = this->phase_ == TRAIN;
  if (iter0 && this->net_inititialized_flag_ != nullptr) {
    this->net_inititialized_flag_->wait();
  } else {  // nothing to wait -> initialize and start pumping
    std::lock_guard<std::mutex> lock(mutex_in_);
    InitializePrefetch();
    start_reading();
    iter0 = false;
  }
  try {
    while (!must_stop(thread_id)) {
      const size_t qid = this->queue_id(thread_id);
#ifndef CPU_ONLY
      shared_ptr<Batch<Ftype>> batch = prefetches_free_[qid]->pop();

      CHECK_EQ((size_t) -1, batch->id());
      load_batch(batch.get(), thread_id, qid);
      if (Caffe::mode() == Caffe::GPU) {
        if (!use_gpu_transform) {
          batch->data_.async_gpu_push();
        }
        if (this->output_labels_) {
          batch->label_.async_gpu_push();
        }
        CUDA_CHECK(cudaStreamSynchronize(Caffe::th_stream_aux(Caffe::STREAM_ID_ASYNC_PUSH)));
      }

      prefetches_full_[qid]->push(batch);
#else
      shared_ptr<Batch<Ftype>> batch = prefetches_free_[qid]->pop();
      load_batch(batch.get(), thread_id, qid);
      prefetches_full_[qid]->push(batch);
#endif

      if (iter0) {
        if (this->net_iteration0_flag_ != nullptr) {
          this->net_iteration0_flag_->wait();
        }
        std::lock_guard<std::mutex> lock(mutex_out_);
        if (this->net_inititialized_flag_ != nullptr) {
          this->net_inititialized_flag_ = nullptr;  // no wait on the second round
          InitializePrefetch();
          start_reading();
        }
        if (this->auto_mode_) {
          break;
        }  // manual otherwise, thus keep rolling
        iter0 = false;
      }
    }
  } catch (boost::thread_interrupted&) {
  }
}
void TransformingFastHDF5InputLayer<Dtype>::InternalThreadEntry() {
  try {
    while (!must_stop()) {
      Batch* batch = prefetch_free_.pop();
      load_batch(batch);
      prefetch_full_.push(batch);
    }
  } catch (boost::thread_interrupted&) {
    // Interrupted exception is expected on shutdown
  }
}
Example #5
0
/**
 * Polling for events on a inner thread allows processing of management messages
 * like buffer connection immediately, even if the user is not polling.
 * Otherwise buffer constructors would block indefinitely.
 *
 * Deep learning workloads are about sending small numbers of large messages,
 * in which case this model works great. If the library was to be used to
 * exchange large numbers of short messages, it would be useful to split
 * management and data messages over two different queue pairs. User threads
 * could then wait or poll on the data queue pair directly.
 */
void RDMAAdapter::InternalThreadEntry() {
  while (!must_stop()) {
    ibv_cq* cq;
    void* cq_context;
    CHECK(!ibv_get_cq_event(channel_, &cq, &cq_context));
    CHECK(cq == cq_);
    ibv_ack_cq_events(cq, 1);
    CHECK(!ibv_req_notify_cq(cq_, 0));

    int ne = ibv_poll_cq(cq_, MAX_CONCURRENT_WRITES * 2,
      static_cast<ibv_wc*>(wc_));
    CHECK_GE(ne, 0);

    for (int i = 0; i < ne; ++i) {
      CHECK(wc_[i].status == IBV_WC_SUCCESS) << "Failed status \n"
                                             << ibv_wc_status_str(wc_[i].status)
                                             << " " << wc_[i].status << " "
                                             << static_cast<int>(wc_[i].wr_id)
                                             << " "<< wc_[i].vendor_err;

      if (wc_[i].opcode == IBV_WC_RECV_RDMA_WITH_IMM) {
        // Data message, add it to user received queue
        RDMAChannel* channel = reinterpret_cast<RDMAChannel*>(wc_[i].wr_id);
        channel->recv();
        int id = wc_[i].imm_data;
        if (id >= CTRL_ID_OFFSET) {
        // ctrl signal
          ctrl_received_.push(channel->buffers_[id - CTRL_ID_OFFSET]);
        } else {
        // data
          received_.push(channel->buffers_[id]);
        }
      } else {
        if (wc_[i].opcode & IBV_WC_RECV) {
          // Buffer connection message
          RDMAChannel* channel = reinterpret_cast<RDMAChannel*>(wc_[i].wr_id);
          int id = wc_[i].imm_data;
          channel->memory_regions_queue_.push(channel->memory_regions_[id]);
          CHECK(id == channel->memory_regions_received_++);
          CHECK(!ibv_dereg_mr(channel->region_regions_[id]));
        }
      }
    }
  }
}
Example #6
0
void DataReader::interfaceKernel(){
	try{while (!must_stop()) read_one();
	} catch (boost::thread_interrupted&) {}
}