SocketSyncCPU<Dtype>::SocketSyncCPU(shared_ptr<Solver<Dtype> > root_solver, const vector<shared_ptr<SocketChannel> > & peers, int rank) : P2PSyncCPU<Dtype>(root_solver, NULL, root_solver->param()), peers_(peers), rank_(rank), data_send_(peers.size()), data_recv_(peers.size()), diff_send_(peers.size()), diff_recv_(peers.size()) { chunk(rank_, &own_offs_, &own_size_); for (int peer = 0; peer < peers_.size(); ++peer) { if (peer == rank_) { // Chunk for which we are master, connected to all peers. // Loops must be imbricated to have buffers created in // the same order on all boxes. for (int i = 0; i < peers_.size(); ++i) { if (i != rank_) { CreateMasterBuffers(i); } } } else { // Other chunks are connected to their respective masters CreateWorkerBuffers(peer); } } }
SocketSync<Dtype>::SocketSync(shared_ptr<Solver<Dtype> > root_solver, const vector<shared_ptr<SocketChannel> > & peers, int rank) : P2PSync<Dtype>(root_solver, NULL, root_solver->param()), peers_(peers), rank_(rank), data_send_(peers.size()), data_recv_(peers.size()), diff_send_(peers.size()), diff_recv_(peers.size()), ctrl_send_(peers.size()), ctrl_recv_(peers.size()) { #ifndef CPU_ONLY int initial_device; CUDA_CHECK(cudaGetDevice(&initial_device)); CUDA_CHECK(cudaSetDevice(root_solver->param().device_id())); chunk(rank_, &own_offs_, &own_size_); for (int peer = 0; peer < peers_.size(); ++peer) { if (peer == rank_) { // Chunk for which we are master, connected to all peers. // Loops must be imbricated to have buffers created in // the same order on all boxes. for (int i = 0; i < peers_.size(); ++i) { if (i != rank_) { CreateMasterBuffers(i); } } } else { // Other chunks are connected to their respective masters CreateWorkerBuffers(peer); } } CUDA_CHECK(cudaSetDevice(initial_device)); #else NO_GPU; #endif }