void THNN_(SpatialUpSamplingBilinear_updateOutput)(
    THNNState *state,
    THTensor *input,
    THTensor *output,
    int outputHeight,
    int outputWidth){

  int nbatch = THTensor_(size)(input, 0);
  int channels = THTensor_(size)(input, 1);
  int inputHeight = THTensor_(size)(input, 2);
  int inputWidth = THTensor_(size)(input, 3);

  THNN_(SpatialUpSamplingBilinear_shapeCheck)
    (input, NULL,
     nbatch, channels,
     inputHeight, inputWidth,
     outputHeight, outputWidth);

  input = THTensor_(newContiguous)(input);
  THTensor_(resize4d)(output, 
		      THTensor_(size)(input, 0), 
		      THTensor_(size)(input, 1), 
		      outputHeight, outputWidth);
  THTensor_(zero)(output);
  real *idata = THTensor_(data)(input);
  real *odata = THTensor_(data)(output);
  channels = nbatch * channels;
  THAssert(inputHeight > 0 && inputWidth > 0 && outputHeight > 0 && outputWidth > 0);
  // special case: just copy
  if (inputHeight == outputHeight && inputWidth == outputWidth) {
    for (int h2 = 0; h2 < outputHeight; ++h2) {
      const int h1 = h2;
      for (int w2 = 0; w2 < outputWidth; ++w2) {
        const int w1 = w2;
        const real* pos1 = &idata[h1 * inputWidth + w1];
        real* pos2 = &odata[h2 * outputWidth + w2];
        for (int c = 0; c < channels; ++c) {
          pos2[0] = pos1[0];
          pos1 += inputWidth * inputHeight;
          pos2 += outputWidth * outputHeight;
        }
      }
    }
    return;
  }
  const float rheight =(outputHeight > 1) ? (float)(inputHeight - 1)/(outputHeight - 1) : 0.f;
  const float rwidth = (outputWidth > 1) ? (float)(inputWidth - 1) / (outputWidth - 1) : 0.f;
  for (int h2 = 0; h2 < outputHeight; ++h2) {
    const float h1r = rheight * h2;
    const int h1 = h1r;
    const int h1p = (h1 < inputHeight - 1) ? 1 : 0;
    const real h1lambda = h1r - h1;
    const real h0lambda = (real)1. - h1lambda;
    for (int w2 = 0; w2 < outputWidth; ++w2) {
      const float w1r = rwidth * w2;
      const int w1 = w1r;
      const int w1p = (w1 < inputWidth - 1) ? 1 : 0;
      const real w1lambda = w1r - w1;
      const real w0lambda = (real)1. - w1lambda;
      const real* pos1 = &idata[h1 * inputWidth + w1];
      real* pos2 = &odata[h2 * outputWidth + w2];
      for (int c = 0; c < channels; ++c) {
        pos2[0] = h0lambda * (w0lambda * pos1[0]+ w1lambda * pos1[w1p])
                  + h1lambda * (w0lambda * pos1[h1p * inputWidth]
                  + w1lambda * pos1[h1p * inputWidth + w1p]);
        pos1 += inputWidth * inputHeight;
        pos2 += outputWidth * outputHeight;
      }
    }
  }
  THTensor_(free)(input);
}
Esempio n. 2
0
void THNN_(ClassNLLCriterion_updateGradInput)(THNNState *state, THTensor *input,
                                              THIndexTensor *target,
                                              THTensor *gradInput,
                                              bool sizeAverage,
                                              THTensor *weights,
                                              THTensor *total_weight)
{
  int n_dims = THTensor_(nDimension)(input);
  int n_classes = THTensor_(size)(input, n_dims - 1);

  if (!THTensor_(isContiguous)(gradInput)) {
    THError("gradInput must be contiguous");
  }

  real *total_weight_data = THTensor_(data)(total_weight);

  if (!(*total_weight_data > 0)) {
    return;
  }

  if (THIndexTensor_(nDimension)(target) > 1) {
    THError("multi-target not supported");
  }

  if (THTensor_(nDimension)(input) > 2) {
    THError("input tensor should be 1D or 2D");
  }

  target = THIndexTensor_(newContiguous)(target);
  weights = weights ? THTensor_(newContiguous)(weights) : NULL;

  THIndex_t *target_data = THIndexTensor_(data)(target);
  real *weights_data = weights ? THTensor_(data)(weights) : NULL;
  real *gradInput_data = THTensor_(data)(gradInput);

  if (THTensor_(nDimension)(input) == 1) {
    int cur_target = target_data[0] - 1;
    THAssert(cur_target >= 0 && cur_target < n_classes);

    gradInput_data[cur_target] =
      (!sizeAverage && weights) ? -weights_data[cur_target] : -1;

  } else if (THTensor_(nDimension)(input) == 2) {
    int batch_size = THTensor_(size)(input, 0);
    THAssert(THIndexTensor_(size)(target, 0) == batch_size);

    int n_target = THTensor_(size)(input, 1);

    int i;
    for (i = 0; i < batch_size; i++){
      int cur_target = target_data[i] - 1;

      THAssert(cur_target >= 0 && cur_target < n_classes);

      gradInput_data[i * n_target + cur_target] =
        -(weights ? weights_data[cur_target] : 1.0f);

      if (sizeAverage && *total_weight_data) {
        gradInput_data[i * n_target + cur_target] /= *total_weight_data;
      }
    }
  }

  THIndexTensor_(free)(target);
  if (weights) {
    THTensor_(free)(weights);
  }
}
Esempio n. 3
0
void THCStorage_resize(THCState *state, THCStorage *self, ptrdiff_t size)
{
  THArgCheck(size >= 0, 2, "invalid size");
  THAssert(self->allocator != NULL);
  int device;
  THCudaCheck(cudaGetDevice(&device));

  if(!(self->flag & TH_STORAGE_RESIZABLE))
    THError("Trying to resize storage that is not resizable");

  size_t elementSize = at::elementSize(self->scalar_type);

  if (self->allocator->realloc) {
    void * data_ptr = self->data_ptr;
    cudaError_t err = (*self->allocator->realloc)(
      self->allocatorContext,
      (void**)&(data_ptr),
      self->size * elementSize,
      size * elementSize, THCState_getCurrentStreamOnDevice(state, device));
    if (err != cudaSuccess) {
      THCudaCheck(err);
    }
    self->size = size;
    self->device = device;
    return;
  }

  if(size == 0)
  {
    if(self->flag & TH_STORAGE_FREEMEM) {
      THCudaCheck(
        (*self->allocator->free)(self->allocatorContext, self->data_ptr));
    }
    self->data_ptr = NULL;
    self->size = 0;
    self->device = device;
  }
  else
  {
    void *data = NULL;
    cudaError_t err =
      (*self->allocator->malloc)(self->allocatorContext,
                                 (void**)&(data),
                                 size * elementSize,
                                 THCState_getCurrentStreamOnDevice(state, device));
    THCudaCheck(err);

    if (self->data_ptr) {
      // Enable p2p access when the memcpy is across devices
      THCState_getPeerToPeerAccess(state, device, self->device);

      THCudaCheck(cudaMemcpyAsync(data,
                                  self->data_ptr,
                                  THMin(self->size, size) * elementSize,
                                  cudaMemcpyDeviceToDevice,
                                  THCState_getCurrentStream(state)));
      if(self->flag & TH_STORAGE_FREEMEM) {
        THCudaCheck(
          (*self->allocator->free)(self->allocatorContext, self->data_ptr));
      }
    }

    self->data_ptr = data;
    self->size = size;
    self->device = device;
  }
}