void DataChannelMPI::allGather(std::vector<at::Tensor>& output, at::Tensor& input, THDGroup group_id) { const auto& group_pair = _groups.at(group_id); const auto& comm = group_pair.first; if (comm == MPI_COMM_NULL) return; if (output.size() != group_pair.second.size()) throw std::logic_error("allGather: number of output tensors and group size does not match"); for (auto out_tensor : output) assertSameSizeAndType(out_tensor, input, "allGather"); auto recv_buffer = _newLikeFlat(output); auto contig_input = input.contiguous(); MPI_Allgather( contig_input.data_ptr(), contig_input.numel(), mpi_datatype.at(contig_input.type().scalarType()), recv_buffer.data_ptr(), contig_input.numel(), mpi_datatype.at(recv_buffer.type().scalarType()), comm ); for (size_t i = 0; i < output.size(); ++i) output[i].copy_(recv_buffer[i]); }
std::tuple<at::Tensor,at::Tensor,at::Tensor> mkldnn_convolution_backward( const at::Tensor& input, const at::Tensor& grad_output_t, const at::Tensor& weight, IntList padding, IntList stride, IntList dilation, std::array<bool,3> output_mask) { Tensor grad_output = grad_output_t.contiguous(); Tensor grad_input, grad_weight, grad_bias; if (output_mask[0]) { grad_input = at::mkldnn_convolution_backward_input( input.sizes(), grad_output, weight, padding, stride, dilation, output_mask[2]); } if (output_mask[1] || output_mask[2]) { std::tie(grad_weight, grad_bias) = at::mkldnn_convolution_backward_weights( weight.sizes(), grad_output, input, padding, stride, dilation, output_mask[2]); } return std::tuple<Tensor, Tensor, Tensor>{grad_input, grad_weight, grad_bias}; }
void DataChannelMPI::gather(std::vector<at::Tensor>& output, at::Tensor& input, rank_type dst_rank, THDGroup group_id) { const auto& group_pair = _groups.at(group_id); const auto& comm = group_pair.first; if (comm == MPI_COMM_NULL) return; at::Tensor recv_buffer; void *recvbuf = nullptr; if (_rank != dst_rank) { if (output.size() > 0) throw std::logic_error("gather: number of input tensors should be 0 for non root"); } else { if (output.size() != group_pair.second.size()) throw std::logic_error("gather: number of output tensors and group size does not match"); for (auto out_tensor : output) assertSameSizeAndType(out_tensor, input, "gather"); recv_buffer = _newLikeFlat(output); recvbuf = recv_buffer.data_ptr(); } rank_type group_dst_rank = group_pair.second.mustGetGroupRank(dst_rank); auto contig_input = input.contiguous(); MPI_Gather( contig_input.data_ptr(), input.numel(), mpi_datatype.at(input.type().scalarType()), recvbuf, input.numel(), mpi_datatype.at(input.type().scalarType()), group_dst_rank, comm ); // NOTE: this is a no-op in all processes except dst_rank for (size_t i = 0; i < output.size(); ++i) output[i].copy_(recv_buffer[i]); }