void NCCL<Dtype>::InitSingleProcess(vector<NCCL<Dtype>*>* nccls) { ncclComm_t* comms = new ncclComm_t[nccls->size()]; int* gpu_list = new int[nccls->size()]; for (int i = 0; i < nccls->size(); ++i) { gpu_list[i] = (*nccls)[i]->solver_->param().device_id(); } NCCL_CHECK(ncclCommInitAll(comms, static_cast<int>(nccls->size()), gpu_list)); for (int i = 0; i < nccls->size(); ++i) { (*nccls)[i]->comm_ = comms[i]; } }
NcclCommList(const std::vector<int>& devices) : comms(new ncclComm_t[devices.size()]), ndevices(devices.size()) { CHECK(ncclCommInitAll(comms.get(), devices.size(), devices.data())); }