/** * \brief NCCL implementation of \ref gpucomm_new. */ static int comm_new(gpucomm **comm_ptr, gpucontext *ctx, gpucommCliqueId comm_id, int ndev, int rank) { gpucomm *comm; ncclResult_t err; ASSERT_CTX(ctx); GA_CHECK(setup_lib(ctx->err)); comm = calloc(1, sizeof(*comm)); // Allocate memory if (comm == NULL) { *comm_ptr = NULL; // Set to NULL if failed return error_sys(ctx->err, "calloc"); } comm->ctx = (cuda_context *)ctx; // convert to underlying cuda context // So that context would not be destroyed before communicator comm->ctx->refcnt++; cuda_enter(comm->ctx); // Use device err = ncclCommInitRank(&comm->c, ndev, *((ncclUniqueId *)&comm_id), rank); cuda_exit(comm->ctx); TAG_COMM(comm); if (err != ncclSuccess) { *comm_ptr = NULL; // Set to NULL if failed comm_clear(comm); return error_nccl(ctx->err, "ncclCommInitRank", err); } *comm_ptr = comm; return GA_NO_ERROR; }
NCCL<Dtype>::NCCL(shared_ptr<Solver<Dtype> > solver, const string& uid) : GPUParams<Dtype>(solver, getDevice()), solver_(solver), barrier_() { this->Configure(solver.get()); Caffe::set_multiprocess(true); ncclUniqueId nccl_uid; memcpy(&nccl_uid, &uid[0], NCCL_UNIQUE_ID_BYTES); // NOLINT(caffe/alt_fn) NCCL_CHECK(ncclCommInitRank(&comm_, Caffe::solver_count(), nccl_uid, Caffe::solver_rank())); Init(); }