at::Tensor nms_cuda(const at::Tensor input, float thresh) { AT_CHECK(input.ndimension() == 3, "First argument should be a 3D Tensor, (batch_sz x n_boxes x 4)"); // AT_CHECK(scores.ndimens/ion() == 2, // "Second argument should be a 2D Tensor, (batch_sz x n_boxes)"); // AT_CHECK(input.size(0) == scores.size(0), // "First and second arguments must have equal-sized first dimensions"); // AT_CHECK(input.size(1) == scores.size(1), // "First and second arguments must have equal-sized second dimensions"); AT_CHECK(input.size(2) == 4, "First argument dimension 2 must have size 4, and should be of the form [x, y, w, h]"); AT_CHECK(input.is_contiguous(), "First argument must be a contiguous Tensor"); // AT_CHECK(scores.is_contiguous(), "Second argument must be a contiguous Tensor"); AT_CHECK(input.type().scalarType() == at::kFloat || input.type().scalarType() == at::kDouble, "First argument must be Float or Double Tensor"); // AT_CHECK(scores.type().scalarType() == at::kFloat || scores.type().scalarType() == at::kDouble, // "Second argument must be Float or Double Tensor"); AT_CHECK(input.is_contiguous(), "First argument must be a contiguous Tensor"); // AT_CHECK(scores.is_contiguous(), "Second argument must be a contiguous Tensor"); return non_max_suppression_cuda(input, thresh); }
at::Tensor sigmoid_add(at::Tensor x, at::Tensor y) { AT_CHECK(x.type().is_cuda(), "x must be a CUDA tensor"); AT_CHECK(y.type().is_cuda(), "y must be a CUDA tensor"); auto output = at::zeros_like(x); sigmoid_add_cuda( x.data<float>(), y.data<float>(), output.data<float>(), output.numel()); return output; }
size_t ReplaceAll(std::string& s, const char* from, const char* to) { AT_CHECK(from && *from, ""); AT_CHECK(to, ""); size_t numReplaced = 0; std::string::size_type lenFrom = std::strlen(from); std::string::size_type lenTo = std::strlen(to); for (auto pos = s.find(from); pos != std::string::npos; pos = s.find(from, pos + lenTo)) { s.replace(pos, lenFrom, to); numReplaced++; } return numReplaced; }
Tensor PerTensorAffineQuantizer::quantize(Tensor tensor) { IntArrayRef sizes = tensor.sizes(); // Here we need a std::intrusive_ptr<Quantizer>.. but actually "this" is the // quantizer that can be reused, so I'm using intrusive_from_this here AT_CHECK( tensor.options().device() == kCPU, "quantize only works for CPU backend right now."); Tensor qv = new_qtensor_cpu( sizes, tensor.options().dtype(at::kQInt8), intrusive_from_this()); tensor = tensor.contiguous(); const float* svd = tensor.data<float>(); #ifdef USE_FBGEMM auto qvd = reinterpret_cast<uint8_t*>(qv.data<qint8>()); fbgemm::TensorQuantizationParams qparams; qparams.scale = scale_; qparams.zero_point = zero_point_; qparams.precision = 8; fbgemm::Quantize<uint8_t>(/*src=*/svd, /*dst=*/qvd, /*len=*/tensor.numel(), /*qparams=*/qparams); #else auto qvd = qv.data<qint8>(); for (int i = 0; i < tensor.numel(); ++i) { qvd[i] = quantize_uint8(scale_, zero_point_, svd[i]); } #endif return qv; }
Tensor Type::copy(const Tensor & src, bool non_blocking) const { AT_CHECK(src.defined(), "attempt to copy an undefined tensor"); if (is_sparse()) { auto indices = src._indices(); auto values = src._values(); auto & this_dense = toBackend(is_cuda() ? Backend::CUDA : Backend::CPU); auto & this_dense_idx = this_dense.toScalarType(ScalarType::Long); auto indices_copy = this_dense_idx.copy(indices, non_blocking); auto values_copy = this_dense.copy(values, non_blocking); return _sparse_coo_tensor_unsafe(indices_copy, values_copy, src.sizes()); } else { Tensor r = this->tensor(src.sizes()); r.copy_(src, non_blocking); return r; } }
inline Tensor new_qtensor_cpu( IntArrayRef sizes, const TensorOptions& options, QuantizerPtr quantizer) { AT_ASSERT(options.device().is_cpu()); native::check_size_nonnegative(sizes); auto* allocator = at::getCPUAllocator(); int64_t nelements = at::prod_intlist(sizes); auto dtype = options.dtype(); AT_CHECK(isQIntType(typeMetaToScalarType(dtype)), "ScalarType is not supported in new_qtensor_cpu."); auto storage = c10::make_intrusive<StorageImpl>( dtype, nelements, allocator->allocate(nelements * dtype.itemsize()), allocator, /*resizable=*/true); auto tensor = detail::make_tensor<QTensorImpl>( storage, at::QuantizedCPUTensorId(), quantizer); get_qtensorimpl(tensor)->set_sizes_contiguous(sizes); return tensor; }
void THNN_(SpatialMaxUnpooling_updateOutput)( THNNState *state, THTensor *input, THTensor *output, THIndexTensor *indices, int owidth, int oheight) { int dimw = 2; int dimh = 1; int nbatch = 1; int nslices; int iheight; int iwidth; scalar_t *input_data; scalar_t *output_data; THIndex_t *indices_data; AT_CHECK(!input->is_empty() && (input->dim() == 3 || input->dim() == 4), "non-empty 3D or 4D (batch mode) tensor expected for input, but got sizes: ", input->sizes()); THNN_CHECK_SHAPE_INDICES(input, indices); if (input->dim() == 4) { nbatch = input->size(0); dimw++; dimh++; } /* sizes */ nslices = input->size(dimh-1); iheight = input->size(dimh); iwidth = input->size(dimw); /* get contiguous input and indices */ input = THTensor_(newContiguous)(input); indices = THIndexTensor_(newContiguous)(indices); /* resize output */ if (input->dim() == 3) { THTensor_(resize3d)(output, nslices, oheight, owidth); THTensor_(zero)(output); input_data = input->data<scalar_t>(); output_data = output->data<scalar_t>(); indices_data = THIndexTensor_(data)(indices); THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data, output_data, indices_data, nslices, iwidth, iheight, owidth, oheight); } else { int p; THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); THTensor_(zero)(output); input_data = input->data<scalar_t>(); output_data = output->data<scalar_t>(); indices_data = THIndexTensor_(data)(indices); for (p = 0; p < nbatch; p++) { THNN_(SpatialMaxUnpooling_updateOutput_frame)( input_data+p*nslices*iwidth*iheight, output_data+p*nslices*owidth*oheight, indices_data+p*nslices*iwidth*iheight, nslices, iwidth, iheight, owidth, oheight); } } /* cleanup */ c10::raw::intrusive_ptr::decref(input); THIndexTensor_(free)(indices); }