Beispiel #1
0
at::Tensor nms_cuda(const at::Tensor input,
                    float thresh)
{

    AT_CHECK(input.ndimension() == 3,
        "First argument should be a 3D Tensor, (batch_sz x n_boxes x 4)");
    // AT_CHECK(scores.ndimens/ion() == 2,
        // "Second argument should be a 2D Tensor, (batch_sz x n_boxes)");
    // AT_CHECK(input.size(0) == scores.size(0),
        // "First and second arguments must have equal-sized first dimensions");
    // AT_CHECK(input.size(1) == scores.size(1),
        // "First and second arguments must have equal-sized second dimensions");
    AT_CHECK(input.size(2) == 4,
        "First argument dimension 2 must have size 4, and should be of the form [x, y, w, h]");
    AT_CHECK(input.is_contiguous(), "First argument must be a contiguous Tensor");
    // AT_CHECK(scores.is_contiguous(), "Second argument must be a contiguous Tensor");
    AT_CHECK(input.type().scalarType() == at::kFloat || input.type().scalarType() == at::kDouble,
        "First argument must be Float or Double Tensor");
    // AT_CHECK(scores.type().scalarType() == at::kFloat || scores.type().scalarType() == at::kDouble,
        // "Second argument must be Float or Double Tensor");
    AT_CHECK(input.is_contiguous(), "First argument must be a contiguous Tensor");
    // AT_CHECK(scores.is_contiguous(), "Second argument must be a contiguous Tensor");

    return non_max_suppression_cuda(input, thresh);

}
Beispiel #2
0
at::Tensor sigmoid_add(at::Tensor x, at::Tensor y) {
  AT_CHECK(x.type().is_cuda(), "x must be a CUDA tensor");
  AT_CHECK(y.type().is_cuda(), "y must be a CUDA tensor");
  auto output = at::zeros_like(x);
  sigmoid_add_cuda(
      x.data<float>(), y.data<float>(), output.data<float>(), output.numel());
  return output;
}
Beispiel #3
0
size_t ReplaceAll(std::string& s, const char* from, const char* to) {
  AT_CHECK(from && *from, "");
  AT_CHECK(to, "");

  size_t numReplaced = 0;
  std::string::size_type lenFrom = std::strlen(from);
  std::string::size_type lenTo = std::strlen(to);
  for (auto pos = s.find(from); pos != std::string::npos;
       pos = s.find(from, pos + lenTo)) {
    s.replace(pos, lenFrom, to);
    numReplaced++;
  }
  return numReplaced;
}
Beispiel #4
0
Tensor PerTensorAffineQuantizer::quantize(Tensor tensor) {
  IntArrayRef sizes = tensor.sizes();
  // Here we need a std::intrusive_ptr<Quantizer>.. but actually "this" is the
  // quantizer that can be reused, so I'm using intrusive_from_this here
  AT_CHECK(
      tensor.options().device() == kCPU,
      "quantize only works for CPU backend right now.");
  Tensor qv = new_qtensor_cpu(
      sizes,
      tensor.options().dtype(at::kQInt8),
      intrusive_from_this());

  tensor = tensor.contiguous();
  const float* svd = tensor.data<float>();

#ifdef USE_FBGEMM
  auto qvd = reinterpret_cast<uint8_t*>(qv.data<qint8>());
  fbgemm::TensorQuantizationParams qparams;
  qparams.scale = scale_;
  qparams.zero_point = zero_point_;
  qparams.precision = 8;
  fbgemm::Quantize<uint8_t>(/*src=*/svd,
                            /*dst=*/qvd,
                            /*len=*/tensor.numel(),
                            /*qparams=*/qparams);
#else
  auto qvd = qv.data<qint8>();
  for (int i = 0; i < tensor.numel(); ++i) {
    qvd[i] = quantize_uint8(scale_, zero_point_, svd[i]);
  }
#endif
  return qv;
}
Beispiel #5
0
Tensor Type::copy(const Tensor & src, bool non_blocking) const {
  AT_CHECK(src.defined(), "attempt to copy an undefined tensor");
  if (is_sparse()) {
    auto indices = src._indices();
    auto values = src._values();
    auto & this_dense = toBackend(is_cuda() ? Backend::CUDA : Backend::CPU);
    auto & this_dense_idx = this_dense.toScalarType(ScalarType::Long);
    auto indices_copy = this_dense_idx.copy(indices, non_blocking);
    auto values_copy = this_dense.copy(values, non_blocking);
    return _sparse_coo_tensor_unsafe(indices_copy, values_copy, src.sizes());
  } else {
    Tensor r = this->tensor(src.sizes());
    r.copy_(src, non_blocking);
    return r;
  }
}
Beispiel #6
0
inline Tensor new_qtensor_cpu(
    IntArrayRef sizes,
    const TensorOptions& options,
    QuantizerPtr quantizer) {
  AT_ASSERT(options.device().is_cpu());

  native::check_size_nonnegative(sizes);
  auto* allocator = at::getCPUAllocator();
  int64_t nelements = at::prod_intlist(sizes);
  auto dtype = options.dtype();
  AT_CHECK(isQIntType(typeMetaToScalarType(dtype)),
           "ScalarType is not supported in new_qtensor_cpu.");
  auto storage = c10::make_intrusive<StorageImpl>(
      dtype,
      nelements,
      allocator->allocate(nelements * dtype.itemsize()),
      allocator,
      /*resizable=*/true);
  auto tensor = detail::make_tensor<QTensorImpl>(
      storage, at::QuantizedCPUTensorId(), quantizer);
  get_qtensorimpl(tensor)->set_sizes_contiguous(sizes);
  return tensor;
}
void THNN_(SpatialMaxUnpooling_updateOutput)(
    THNNState *state,
    THTensor *input,
    THTensor *output,
    THIndexTensor *indices,
    int owidth, int oheight)
{
  int dimw = 2;
  int dimh = 1;
  int nbatch = 1;
  int nslices;
  int iheight;
  int iwidth;
  scalar_t *input_data;
  scalar_t *output_data;
  THIndex_t *indices_data;


  AT_CHECK(!input->is_empty() && (input->dim() == 3 || input->dim() == 4),
           "non-empty 3D or 4D (batch mode) tensor expected for input, but got sizes: ", input->sizes());
  THNN_CHECK_SHAPE_INDICES(input, indices);

  if (input->dim() == 4)
  {
    nbatch = input->size(0);
    dimw++;
    dimh++;
  }

  /* sizes */
  nslices = input->size(dimh-1);
  iheight = input->size(dimh);
  iwidth = input->size(dimw);

  /* get contiguous input and indices */
  input = THTensor_(newContiguous)(input);
  indices = THIndexTensor_(newContiguous)(indices);

  /* resize output */
  if (input->dim() == 3)
  {
    THTensor_(resize3d)(output, nslices, oheight, owidth);
    THTensor_(zero)(output);

    input_data = input->data<scalar_t>();
    output_data = output->data<scalar_t>();
    indices_data = THIndexTensor_(data)(indices);

    THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data, output_data,
                                              indices_data,
                                              nslices,
                                              iwidth, iheight,
                                              owidth, oheight);
  }
  else
  {
    int p;

    THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
    THTensor_(zero)(output);

    input_data = input->data<scalar_t>();
    output_data = output->data<scalar_t>();
    indices_data = THIndexTensor_(data)(indices);

    for (p = 0; p < nbatch; p++)
    {
      THNN_(SpatialMaxUnpooling_updateOutput_frame)(
                                                    input_data+p*nslices*iwidth*iheight,
                                                    output_data+p*nslices*owidth*oheight,
                                                    indices_data+p*nslices*iwidth*iheight,
                                                    nslices,
                                                    iwidth, iheight,
                                                    owidth, oheight);
    }
  }

  /* cleanup */
  c10::raw::intrusive_ptr::decref(input);
  THIndexTensor_(free)(indices);
}