예제 #1
0
void DataChannelMPI::allGather(std::vector<at::Tensor>& output,
                               at::Tensor& input, THDGroup group_id) {
  const auto& group_pair = _groups.at(group_id);
  const auto& comm = group_pair.first;
  if (comm == MPI_COMM_NULL)
    return;

  if (output.size() != group_pair.second.size())
    throw std::logic_error("allGather: number of output tensors and group size does not match");

  for (auto out_tensor : output)
    assertSameSizeAndType(out_tensor, input, "allGather");

  auto recv_buffer = _newLikeFlat(output);
  auto contig_input = input.contiguous();

  MPI_Allgather(
    contig_input.data_ptr(), contig_input.numel(), mpi_datatype.at(contig_input.type().scalarType()),
    recv_buffer.data_ptr(), contig_input.numel(), mpi_datatype.at(recv_buffer.type().scalarType()),
    comm
  );

  for (size_t i = 0; i < output.size(); ++i)
    output[i].copy_(recv_buffer[i]);
}
예제 #2
0
std::tuple<at::Tensor,at::Tensor,at::Tensor> mkldnn_convolution_backward(
    const at::Tensor& input, const at::Tensor& grad_output_t, const at::Tensor& weight,
    IntList padding, IntList stride, IntList dilation, std::array<bool,3> output_mask)
{
  Tensor grad_output = grad_output_t.contiguous();

  Tensor grad_input, grad_weight, grad_bias;
  if (output_mask[0]) {
    grad_input = at::mkldnn_convolution_backward_input(
      input.sizes(), grad_output, weight, padding, stride, dilation, output_mask[2]);
  }
  if (output_mask[1] || output_mask[2]) {
    std::tie(grad_weight, grad_bias) = at::mkldnn_convolution_backward_weights(
      weight.sizes(), grad_output, input, padding, stride, dilation, output_mask[2]);
  }

  return std::tuple<Tensor, Tensor, Tensor>{grad_input, grad_weight, grad_bias};
}
예제 #3
0
void DataChannelMPI::gather(std::vector<at::Tensor>& output,
                            at::Tensor& input, rank_type dst_rank,
                            THDGroup group_id) {
  const auto& group_pair = _groups.at(group_id);
  const auto& comm = group_pair.first;
  if (comm == MPI_COMM_NULL)
    return;

  at::Tensor recv_buffer;
  void *recvbuf = nullptr;
  if (_rank != dst_rank) {
    if (output.size() > 0)
      throw std::logic_error("gather: number of input tensors should be 0 for non root");
  } else {
    if (output.size() != group_pair.second.size())
      throw std::logic_error("gather: number of output tensors and group size does not match");

    for (auto out_tensor : output)
      assertSameSizeAndType(out_tensor, input, "gather");

    recv_buffer = _newLikeFlat(output);
    recvbuf = recv_buffer.data_ptr();
  }

  rank_type group_dst_rank = group_pair.second.mustGetGroupRank(dst_rank);
  auto contig_input = input.contiguous();

  MPI_Gather(
    contig_input.data_ptr(), input.numel(), mpi_datatype.at(input.type().scalarType()),
    recvbuf, input.numel(), mpi_datatype.at(input.type().scalarType()),
    group_dst_rank, comm
  );

  // NOTE: this is a no-op in all processes except dst_rank
  for (size_t i = 0; i < output.size(); ++i)
    output[i].copy_(recv_buffer[i]);
}