Example #1
0
std::vector<int64_t> to_arg_sizes(TensorList tensors, int64_t dim) {
  std::vector<int64_t> arg_sizes(tensors.size());
  for (size_t i = 0; i < tensors.size(); ++i) {
    arg_sizes[i] = tensors[i].size(dim);
  }
  return arg_sizes;
}
Example #2
0
Tensor index(const Tensor & self, TensorList indices) {
  if (indices.size() > (size_t)self.dim()) {
   AT_ERROR("too many indices for tensor of dimension ", self.dim(), " (got ", indices.size(), ")");
  }

  Tensor src, linearIndex;
  std::tie(src, linearIndex) = makeLinearIndex(self, indices);
  return src.take(linearIndex);
}
Example #3
0
void _check_inputs(TensorList inputs, TensorList outputs, int input_multiplier, int output_multiplier) {
  // len(inputs) == len(outputs)
  size_t len = inputs.size();

  if (len <= 0) {
    throw std::runtime_error("input sequence can't be empty");
  }

  if (len != outputs.size()) {
    std::stringstream err;
    err << "inputs and outputs sequences have to be of the same length, but got input of length " << len << " and output of length " << outputs.size();
    throw std::runtime_error(err.str());
  }

  device_set devices;
  int64_t numel = inputs[0].numel();
  auto& type = inputs[0].type();

  for (size_t i = 0; i < len; i++) {
    auto input = inputs[i];
    auto output = outputs[i];

    if (!(input.type().is_cuda() && !input.type().is_sparse()
        && output.type().is_cuda()  && !output.type().is_sparse())) {
      throw std::runtime_error("input and output elements have to be cuda dense Tensors");
    }

    if (!(type == input.type() && type == output.type())) {
      throw std::runtime_error("all inputs and outputs must be of the same Tensor type");
    }

    if (!input.is_contiguous() || !output.is_contiguous()) {
      throw std::runtime_error("all inputs and outputs have to be contiguous");
    }

    auto input_device = input.get_device();
    // inputs must be on unique devices
    if (devices.test(input_device)) {
      throw std::runtime_error("inputs must be on unique devices");
    }
    devices.set(input_device);

    // inputs and outputs must be on same device respectively
    if (input_device != output.get_device()) {
      throw std::runtime_error("input and output must be on the same device");
    }

    // all inputs must be same size
    if (input.numel() != numel) {
      throw std::runtime_error("all inputs must have the same number of elements");
    }

    if (output.numel() * output_multiplier != numel * input_multiplier) {
      throw std::runtime_error("output must be of size input_size * size_multiplier");
    }
  }
}
Example #4
0
void broadcast(TensorList tensors, const stream_list& streams, const comm_list& user_comms) {
#ifdef WITH_NCCL
  using namespace torch::cuda::nccl::detail;
  _check_inputs(tensors, tensors, 1, 1);
  ncclDataType_t data_type = _get_data_type(tensors[0].type());
  int64_t numel = tensors[0].numel();

  std::lock_guard<std::mutex> free_mutex(*(THCCachingAllocator_getCudaFreeMutex()));
  const auto comms = user_comms.empty() ? _get_communicators(tensors) : ArrayRef<ncclComm_t>(user_comms);
  AutoGPU gpu_guard;
  AutoNcclGroup nccl_group_guard;
  for (size_t i = 0, num_tensors = tensors.size(); i < num_tensors; i++) {
    gpu_guard.setDevice(tensors[i].get_device());
    // TODO: use current stream
    const auto stream = (streams.empty() || !streams[i]) ? NULL : streams[i]->stream;
    CHECK(ncclBcast(tensors[i].data_ptr(), numel, data_type, 0, comms[i], stream));
  }
#else
  throw std::runtime_error("PyTorch built without NCCL support");
#endif
}