static Tensor dispatch_type(const Tensor & self, const at::Type & type, int device, bool non_blocking) {
  if (type.is_cuda()) {
    torch::utils::cuda_lazy_init();
  }
  AutoNoGIL no_gil;
  AutoGPU auto_gpu(device);
  int64_t tensor_device = self.is_cuda() ? self.get_device() : -1;
  if (self.is_cuda() && type.is_cuda() && tensor_device != at::current_device()) {
    // copy if the devices are different even if the types are the same
    return type.copy(self, non_blocking);
  }
  return self.toType(type, non_blocking);
}