/**
 * \brief NCCL implementation of \ref gpucomm_new.
 */
static int comm_new(gpucomm **comm_ptr, gpucontext *ctx,
                    gpucommCliqueId comm_id, int ndev, int rank) {
  gpucomm *comm;
  ncclResult_t err;

  ASSERT_CTX(ctx);

  GA_CHECK(setup_lib(ctx->err));

  comm = calloc(1, sizeof(*comm));  // Allocate memory
  if (comm == NULL) {
    *comm_ptr = NULL;  // Set to NULL if failed
    return error_sys(ctx->err, "calloc");
  }
  comm->ctx = (cuda_context *)ctx;  // convert to underlying cuda context
  // So that context would not be destroyed before communicator
  comm->ctx->refcnt++;
  cuda_enter(comm->ctx);  // Use device
  err = ncclCommInitRank(&comm->c, ndev, *((ncclUniqueId *)&comm_id), rank);
  cuda_exit(comm->ctx);
  TAG_COMM(comm);
  if (err != ncclSuccess) {
    *comm_ptr = NULL;  // Set to NULL if failed
    comm_clear(comm);
    return error_nccl(ctx->err, "ncclCommInitRank", err);
  }
  *comm_ptr = comm;
  return GA_NO_ERROR;
}
Example #2
0
NCCL<Dtype>::NCCL(shared_ptr<Solver<Dtype> > solver, const string& uid)
  : GPUParams<Dtype>(solver, getDevice()),
    solver_(solver), barrier_() {
  this->Configure(solver.get());
  Caffe::set_multiprocess(true);
  ncclUniqueId nccl_uid;
  memcpy(&nccl_uid, &uid[0], NCCL_UNIQUE_ID_BYTES);  // NOLINT(caffe/alt_fn)
  NCCL_CHECK(ncclCommInitRank(&comm_,
                              Caffe::solver_count(),
                              nccl_uid,
                              Caffe::solver_rank()));
  Init();
}