예제 #1
0
void THNN_(ClassNLLCriterion_updateOutput)(THNNState *state, THTensor *input,
                                           THIndexTensor *target,
                                           THTensor *output, bool sizeAverage,
                                           THTensor *weights,
                                           THTensor *total_weight)
{
  int n_dims = THTensor_(nDimension)(input);
  int n_classes = THTensor_(size)(input, n_dims - 1);

  if (THIndexTensor_(nDimension)(target) > 1) {
    THError("multi-target not supported");
  }
  if (THTensor_(nDimension)(input) > 2) {
    THError("input tensor should be 1D or 2D");
  }

  input = THTensor_(newContiguous)(input);
  target = THIndexTensor_(newContiguous)(target);
  weights = weights ? THTensor_(newContiguous)(weights) : NULL;

  real *input_data = THTensor_(data)(input);
  THIndex_t *target_data = THIndexTensor_(data)(target);
  real *weights_data = weights ? THTensor_(data)(weights) : NULL;
  real *output_data = THTensor_(data)(output);
  real *total_weight_data = THTensor_(data)(total_weight);

  output_data[0] = total_weight_data[0] = 0.0;

  if (THTensor_(nDimension)(input) == 1) {
    int cur_target = target_data[0] - 1;
    THAssert(cur_target >= 0 && cur_target < n_classes);
    total_weight_data[0] = weights ? weights_data[cur_target] : 1.0f;
    output_data[0] = -input_data[cur_target] * total_weight_data[0];
  } else if (THTensor_(nDimension)(input) == 2) {
    int batch_size = THTensor_(size)(input, 0);
    THAssert(THIndexTensor_(size)(target, 0) == batch_size);

    int n_target = THTensor_(size)(input, 1);

    int i;
    for (i = 0; i < batch_size; i++) {
      int cur_target = target_data[i] - 1;
      THAssert(cur_target >= 0 && cur_target < n_classes);

      real cur_weight = weights ? weights_data[cur_target] : 1.0f;
      total_weight_data[0] += cur_weight;
      output_data[0] -= input_data[i * n_target + cur_target] * cur_weight;
    }
  }

  if (sizeAverage && total_weight_data[0]) {
    output_data[0] /= total_weight_data[0];
  }

  if (weights) {
    THTensor_(free)(weights);
  }
  THTensor_(free)(input);
  THIndexTensor_(free)(target);
}
예제 #2
0
파일: LookupTable.c 프로젝트: ACB1337/nn
void THNN_(LookupTable_renorm)(
          THNNState *state,
          THIndexTensor *idx,
          THTensor *weight,
          real maxNorm,
          real normType)
{
  if (!THTensor_(isContiguous)(weight))
    THError("weight must be contiguous");
  if (!THIndexTensor_(isContiguous)(idx))
    THError("input must be contiguous");
  if (THIndexTensor_(nDimension)(idx) != 1)
    THError("idx must be a vector");
  if (normType <= 0)
    THError("non-positive-norm not supported");

  long i;
  THIndex_t *row_idx = THIndexTensor_(data)(idx);
  long numel = THIndexTensor_(nElement)(idx);

  long numw = THTensor_(size)(weight, 0);
  long stride = THTensor_(stride)(weight, 0);
  real *gw = THTensor_(data)(weight);
  for (i=0; i<numel; i++)
    if (row_idx[i] < 1 || row_idx[i] > numw)
      THError("input out of range");
  // get unique indices
  qsort(row_idx, numel, sizeof(THIndex_t), THNN_(compare_THIndex));
  long ptr = 0;
  for (i=0; i<numel; i++)
    if (i == 0 || row_idx[i] != row_idx[i-1])
      row_idx[ptr++] = row_idx[i];
  numel = ptr;

#ifdef _OPENMP
  if (numel > 1000)
  {
    // The strategy is to parallelize over the rows that appear in
    // row_idx, so that thread 1 handles the rows in row_idx[0..numel/nThreads].
    // This distributes the work evenly to each thread.
    #pragma omp parallel for private(i)
    for (i=0; i<numel; i++)
    {
      long k = row_idx[i] - 1;
      THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
    }
    return;
  }
#endif
  for (i=0; i<numel; i++)
  {
    long k = row_idx[i] - 1;
    THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
  }
}
예제 #3
0
void THNN_(SpatialClassNLLCriterion_updateOutput)(
          THNNState *state,
          THTensor *input,
          THIndexTensor *target,
          THTensor *output,
          bool sizeAverage,
          THTensor *weights,
          THTensor *total_weight)
{
  INITIAL_CHECK;

  input = THTensor_(newContiguous)(input);
  target = THIndexTensor_(newContiguous)(target);
  weights = weights ? THTensor_(newContiguous)(weights) : NULL;

  real *input_data = THTensor_(data)(input);
  THIndex_t *target_data = THIndexTensor_(data)(target);
  real *weights_data = weights ? THTensor_(data)(weights) : NULL;
  real *output_data = THTensor_(data)(output);
  real *total_weight_data = THTensor_(data)(total_weight);

  long batch_size = THTensor_(size)(input, 0);
  long n_classes = THTensor_(size)(input, 1);
  long map_size = THTensor_(size)(input, 2) * THTensor_(size)(input, 3);
  long sample_size = map_size * n_classes;

  real total_weight_acc = 0;
  real output_acc = 0;
  for (int b = 0; b < batch_size; b++) {
    for (int elem = 0; elem < map_size; elem++) {
      int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE;
      THAssert(cur_target >= 0 && cur_target < n_classes);

      real cur_weight = weights ? weights_data[cur_target] : 1.0f;
      total_weight_acc += cur_weight;
      output_acc -= input_data[b * sample_size + cur_target * map_size + elem] * cur_weight;
    }
  }
  *total_weight_data = total_weight_acc;
  *output_data = output_acc;

  if (sizeAverage && *total_weight_data)
    *output_data /= *total_weight_data;

  THTensor_(free)(input);
  THIndexTensor_(free)(target);
  if (weights)
    THTensor_(free)(weights);
}
예제 #4
0
void THNN_(SpatialClassNLLCriterion_updateGradInput)(
          THNNState *state,
          THTensor *input,
          THIndexTensor *target,
          THTensor *gradInput,
          bool sizeAverage,
          THTensor *weights,
          THTensor *total_weight)
{
  INITIAL_CHECK;
  THArgCheck(THTensor_(isContiguous)(gradInput), 4,
              "gradInput must be contiguous");

  real *total_weight_data = THTensor_(data)(total_weight);
  if (*total_weight_data <= 0)
    return;

  target = THIndexTensor_(newContiguous)(target);
  weights = weights ? THTensor_(newContiguous)(weights) : NULL;

  THIndex_t *target_data = THIndexTensor_(data)(target);
  real *weights_data = weights ? THTensor_(data)(weights) : NULL;
  real *gradInput_data = THTensor_(data)(gradInput);

  long batch_size = THTensor_(size)(input, 0);
  long n_classes = THTensor_(size)(input, 1);
  long map_size = THTensor_(size)(input, 2) * THTensor_(size)(input, 3);
  long sample_size = map_size * n_classes;

  real normalize = sizeAverage ? *total_weight_data : 1.0f;

  int b;
  #pragma omp parallel for
  for (b = 0; b < batch_size; b++) {
    int elem;
    for (elem = 0; elem < map_size; elem++) {
      int cur_target = target_data[b * map_size + elem] - TH_INDEX_BASE;
      THAssert(cur_target >= 0 && cur_target < n_classes);

      gradInput_data[b * sample_size + cur_target * map_size + elem] =
        -(weights ? weights_data[cur_target] : 1.0f) / normalize;
    }
  }

  THIndexTensor_(free)(target);
  if (weights)
    THTensor_(free)(weights);
}
예제 #5
0
파일: LookupTable.c 프로젝트: noa/jhnn
static void JHNN_(LookupTable_resetCount)(
          THInteger_t *count_data,
          THIndexTensor *input)
{
  int i;
  THIndex_t *input_data = THIndexTensor_(data)(input);
  long numel = THIndexTensor_(nElement)(input);

  for (i = 0; i<numel; i++) {
    long k = input_data[i] - 1;
    count_data[k] = 0;
  }
  for (i = 0; i<numel; i++) {
    long k = input_data[i] - 1;
    count_data[k]++;
  }
}
예제 #6
0
static void THNN_(LookupTable_resetCount)(
          THInteger_t *count_data,
          THIndexTensor *input)
{
  ptrdiff_t i;
  THIndex_t *input_data = THIndexTensor_(data)(input);
  ptrdiff_t numel = THIndexTensor_(nElement)(input);

  for (i = 0; i<numel; i++)
  {
    long k = input_data[i] - TH_INDEX_BASE;
    count_data[k] = 0;
  }
  for (i = 0; i<numel; i++)
  {
    long k = input_data[i] - TH_INDEX_BASE;
    count_data[k]++;
  }
}
예제 #7
0
void THNN_(SpatialMaxUnpooling_updateOutput)(
    THNNState *state,
    THTensor *input,
    THTensor *output,
    THIndexTensor *indices,
    int owidth, int oheight)
{
  int dimw = 2;
  int dimh = 1;
  int nbatch = 1;
  int nslices;
  int iheight;
  int iwidth;
  real *input_data;
  real *output_data;
  THIndex_t *indices_data;


  THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input,
		"3D or 4D (batch mode) tensor expected for input, but got: %s");
  THNN_CHECK_SHAPE_INDICES(input, indices);

  if (input->nDimension == 4)
  {
    nbatch = input->size[0];
    dimw++;
    dimh++;
  }

  /* sizes */
  nslices = input->size[dimh-1];
  iheight = input->size[dimh];
  iwidth = input->size[dimw];

  /* get contiguous input and indices */
  input = THTensor_(newContiguous)(input);
  indices = THIndexTensor_(newContiguous)(indices);

  /* resize output */
  if (input->nDimension == 3)
  {
    THTensor_(resize3d)(output, nslices, oheight, owidth);
    THTensor_(zero)(output);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THIndexTensor_(data)(indices);

    THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data, output_data,
                                              indices_data,
                                              nslices,
                                              iwidth, iheight,
                                              owidth, oheight);
  }
  else
  {
    long p;

    THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
    THTensor_(zero)(output);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THIndexTensor_(data)(indices);

#pragma omp parallel for private(p)
    for (p = 0; p < nbatch; p++)
    {
      THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data+p*nslices*iwidth*iheight, output_data+p*nslices*owidth*oheight,
                                                indices_data+p*nslices*iwidth*iheight,
                                                nslices,
                                                iwidth, iheight,
                                                owidth, oheight);
    }
  }

  /* cleanup */
  THTensor_(free)(input);
  THIndexTensor_(free)(indices);
}
예제 #8
0
void THNN_(SpatialMaxUnpooling_updateGradInput)(
    THNNState *state,
    THTensor *input,
    THTensor *gradOutput,
    THTensor *gradInput,
    THIndexTensor *indices,
    int owidth, int oheight)
{
  int dimw = 2;
  int dimh = 1;
  int nbatch = 1;
  int nslices;
  int iheight;
  int iwidth;
  real *gradInput_data;
  real *gradOutput_data;
  THIndex_t *indices_data;

  THNN_CHECK_SHAPE_INDICES(input, indices);

  /* get contiguous gradOutput and indices */
  gradOutput = THTensor_(newContiguous)(gradOutput);
  indices = THIndexTensor_(newContiguous)(indices);

  /* resize */
  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  if (input->nDimension == 4) {
    nbatch = input->size[0];
    dimw++;
    dimh++;
  }

  /* sizes */
  nslices = input->size[dimh-1];
  iheight = input->size[dimh];
  iwidth = input->size[dimw];

  if(owidth!=gradOutput->size[dimw] || oheight!=gradOutput->size[dimh]){
    THError("Inconsistent gradOutput size. oheight= %d, owidth= %d, gradOutput: %dx%d",
	    oheight, owidth,gradOutput->size[dimh],gradOutput->size[dimw]);
  }

  /* get raw pointers */
  gradInput_data = THTensor_(data)(gradInput);
  gradOutput_data = THTensor_(data)(gradOutput);
  indices_data = THIndexTensor_(data)(indices);

  /* backprop */
  if (input->nDimension == 3)
  {
    THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
                                                 indices_data,
                                                 nslices,
                                                 iwidth, iheight,
                                                 owidth, oheight);
  }
  else
  {
    long p;
#pragma omp parallel for private(p)
    for (p = 0; p < nbatch; p++)
    {
      THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
                                                   indices_data+p*nslices*iwidth*iheight,
                                                   nslices,
                                                   iwidth, iheight,
                                                   owidth, oheight);
    }
  }

  /* cleanup */
  THTensor_(free)(gradOutput);
  THIndexTensor_(free)(indices);
}
// TODO: improve error messages
void THNN_(MultiLabelMarginCriterion_updateOutput)(
          THNNState *state,
          THTensor *input,
          THIndexTensor *target,
          THTensor *output,
          THTensor *isTarget,
          bool sizeAverage)
{
  real *input_data, *isTarget_data;
  THIndex_t *target_data;
  long nframe, dim;
  long t, d, dt, ddt;
  real sum;

  THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2,
	     "vector or matrix expected");

  if (input->nDimension == 1)
  {
    nframe = 1;
    dim = input->size[0];
    THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3,
	       "inconsistent target size");
  }
  else
  {
    nframe = input->size[0];
    dim = input->size[1];
    THArgCheck((target->nDimension == 2) && (target->size[0] == nframe)
	       && (target->size[1] == dim), 3, "inconsistent target size");
  }

  THArgCheck(THIndexTensor_(minall)(target) >= 0, 3, "target out of range");
  THArgCheck(THIndexTensor_(maxall)(target) <= dim, 3, "target out of range");

  target = THIndexTensor_(newContiguous)(target);
  input = THTensor_(newContiguous)(input);
  input_data = THTensor_(data)(input);
  target_data = THIndexTensor_(data)(target);

  THNN_resizeAs_indices(isTarget, target);
  THTensor_(zero)(isTarget);
  isTarget_data = THTensor_(data)(isTarget);

  sum = 0;
  for (t = 0; t < nframe; t++)
  {
    for (ddt = 0; ddt < dim; ddt++)
    {
      THIndex_t target_idx = target_data[ddt] - TH_INDEX_BASE;
      if (target_idx < 0)
        break;
      isTarget_data[target_idx] = 1;
    }
    for (dt = 0; dt < dim; dt++)
    {
      THIndex_t target_idx = target_data[dt] - TH_INDEX_BASE;
      real input_target;
      if (target_idx < 0)
        break;

      input_target = input_data[target_idx];
      for (d = 0; d < dim; d++)
      {
        if (!isTarget_data[d])
        {
          real z = 1 - input_target + input_data[d];
          if (z > 0)
            sum += z;
        }
      }
    }
    input_data += dim;
    target_data += dim;
    isTarget_data += dim;
  }

  sum /= dim;
  if (sizeAverage)
    sum /= nframe;

  THTensor_(set1d)(output, 0, sum);

  THTensor_(free)(input);
  THIndexTensor_(free)(target);
}
예제 #10
0
void THNN_(SpatialMaxUnpooling_updateGradInput)(
    THNNState *state,
    THTensor *input,
    THTensor *gradOutput,
    THTensor *gradInput,
    THIndexTensor *indices,
    int owidth, int oheight)
{
  int dimw = 2;
  int dimh = 1;
  int nbatch = 1;
  int nslices;
  int iheight;
  int iwidth;
  scalar_t *gradInput_data;
  scalar_t *gradOutput_data;
  THIndex_t *indices_data;

  THNN_CHECK_SHAPE_INDICES(input, indices);

  /* get contiguous gradOutput and indices */
  gradOutput = THTensor_(newContiguous)(gradOutput);
  indices = THIndexTensor_(newContiguous)(indices);

  /* resize */
  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  if (input->dim() == 4) {
    nbatch = input->size(0);
    dimw++;
    dimh++;
  }

  /* sizes */
  nslices = input->size(dimh-1);
  iheight = input->size(dimh);
  iwidth = input->size(dimw);

  if(owidth!=gradOutput->size(dimw) || oheight!=gradOutput->size(dimh)){
    THError("Inconsistent gradOutput size. oheight= %d, owidth= %d, gradOutput: %dx%d",
            oheight, owidth, gradOutput->size(dimh), gradOutput->size(dimw));
  }

  /* get raw pointers */
  gradInput_data = gradInput->data<scalar_t>();
  gradOutput_data = gradOutput->data<scalar_t>();
  indices_data = THIndexTensor_(data)(indices);

  /* backprop */
  if (input->dim() == 3)
  {
    THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
                                                 indices_data,
                                                 nslices,
                                                 iwidth, iheight,
                                                 owidth, oheight);
  }
  else
  {
    int p;
    for (p = 0; p < nbatch; p++)
    {
      THNN_(SpatialMaxUnpooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
                                                   indices_data+p*nslices*iwidth*iheight,
                                                   nslices,
                                                   iwidth, iheight,
                                                   owidth, oheight);
    }
  }

  /* cleanup */
  c10::raw::intrusive_ptr::decref(gradOutput);
  THIndexTensor_(free)(indices);
}
예제 #11
0
void THNN_(VolumetricAdaptiveMaxPooling_updateGradInput)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradInput,
          THIndexTensor *indices)
{
  int dimD = 0;
  int dimT = 1;
  int dimH = 2;
  int dimW = 3;
  int64_t sizeB = 1;
  int64_t sizeD;
  int64_t isizeT;
  int64_t isizeH;
  int64_t isizeW;
  int64_t osizeT;
  int64_t osizeH;
  int64_t osizeW;
  real *gradInput_data;
  real *gradOutput_data;
  THIndex_t *indices_data;

  /* get contiguous gradOutput */
  gradOutput = THTensor_(newContiguous)(gradOutput);

  /* resize */
  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  if (input->dim() == 5) {
    sizeB = input->size[0];
    dimD++;
    dimT++;
    dimH++;
    dimW++;
  }

  /* sizes */
  sizeD  = input->size[dimD];
  isizeT = input->size[dimT];
  isizeH = input->size[dimH];
  isizeW = input->size[dimW];
  osizeT = gradOutput->size[dimT];
  osizeH = gradOutput->size[dimH];
  osizeW = gradOutput->size[dimW];

  /* get raw pointers */
  gradInput_data = THTensor_(data)(gradInput);
  gradOutput_data = THTensor_(data)(gradOutput);
  indices_data = THIndexTensor_(data)(indices);

  /* backprop */
  if (input->dim() == 4)
  {
    THNN_(VolumetricAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
                                                         indices_data,
                                                         sizeD,
                                                         isizeT, isizeH, isizeW,
                                                         osizeT, osizeH, osizeW);
  }
  else
  {
    int64_t b;
#pragma omp parallel for private(b)
    for (b = 0; b < sizeB; b++)
    {
      THNN_(VolumetricAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data+b*sizeD*isizeT*isizeH*isizeW, gradOutput_data+b*sizeD*osizeT*osizeH*osizeW,
                                                           indices_data+b*sizeD*osizeT*osizeH*osizeW,
                                                           sizeD,
                                                           isizeT, isizeH, isizeW,
                                                           osizeT, osizeH, osizeW);
    }
  }

  /* cleanup */
  THTensor_(free)(gradOutput);
}
예제 #12
0
void THNN_(SpatialDilatedMaxPooling_updateGradInput)(
    THNNState *state,
    THTensor *input,
    THTensor *gradOutput,
    THTensor *gradInput,
    THIndexTensor *indices,
    int kW,
    int kH,
    int dW,
    int dH,
    int padW,
    int padH,
    int dilationW,
    int dilationH,
    bool ceil_mode)
{
    int dimw = 2;
    int dimh = 1;
    long nbatch = 1;
    int nInputPlane;
    int inputHeight;
    int inputWidth;
    int outputHeight;
    int outputWidth;
    real *gradInput_data;
    real *gradOutput_data;
    THIndex_t *indices_data;

    THNN_(SpatialDilatedMaxPooling_shapeCheck)
    (input, gradOutput, indices, kH, kW, dH, dW,
     padH, padW, dilationH, dilationW, ceil_mode);

    /* get contiguous gradOutput */
    gradOutput = THTensor_(newContiguous)(gradOutput);

    /* resize */
    THTensor_(resizeAs)(gradInput, input);
    THTensor_(zero)(gradInput);

    if (input->nDimension == 4) {
        nbatch = input->size[0];
        dimw++;
        dimh++;
    }

    /* sizes */
    nInputPlane = input->size[dimh-1];
    inputHeight = input->size[dimh];
    inputWidth = input->size[dimw];
    outputHeight = gradOutput->size[dimh];
    outputWidth = gradOutput->size[dimw];

    /* get raw pointers */
    gradInput_data = THTensor_(data)(gradInput);
    gradOutput_data = THTensor_(data)(gradOutput);
    indices_data = THIndexTensor_(data)(indices);

    /* backprop */
    if (input->nDimension == 3)
    {
        THNN_(SpatialDilatedMaxPooling_updateGradInput_frame)
        (gradInput_data, gradOutput_data,
         indices_data,
         nInputPlane,
         inputWidth, inputHeight,
         outputWidth, outputHeight,
         dW, dH);
    }
    else
    {
        long p;
        #pragma omp parallel for private(p)
        for (p = 0; p < nbatch; p++)
        {
            THNN_(SpatialDilatedMaxPooling_updateGradInput_frame)
            (gradInput_data+p*nInputPlane*inputWidth*inputHeight,
             gradOutput_data+p*nInputPlane*outputWidth*outputHeight,
             indices_data+p*nInputPlane*outputWidth*outputHeight,
             nInputPlane,
             inputWidth, inputHeight,
             outputWidth, outputHeight,
             dW, dH);
        }
    }

    /* cleanup */
    THTensor_(free)(gradOutput);
}
예제 #13
0
void THNN_(SpatialDilatedMaxPooling_updateOutput)(
    THNNState *state,
    THTensor *input,
    THTensor *output,
    THIndexTensor *indices,
    int kW,
    int kH,
    int dW,
    int dH,
    int padW,
    int padH,
    int dilationW,
    int dilationH,
    bool ceil_mode)
{

    int dimw = 2;
    int dimh = 1;
    long nbatch = 1;
    long nInputPlane;
    long inputHeight;
    long inputWidth;
    long outputHeight;
    long outputWidth;
    real *input_data;
    real *output_data;
    THIndex_t *indices_data;

    THNN_(SpatialDilatedMaxPooling_shapeCheck)
    (input, NULL, NULL, kH, kW, dH, dW,
     padH, padW, dilationH, dilationW, ceil_mode);

    if (input->nDimension == 4)
    {
        nbatch = input->size[0];
        dimw++;
        dimh++;
    }

    /* sizes */
    nInputPlane = input->size[dimh-1];
    inputHeight = input->size[dimh];
    inputWidth = input->size[dimw];
    if (ceil_mode)
    {
        outputHeight = (long)(ceil((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
        outputWidth  = (long)(ceil((float)(inputWidth  - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
    }
    else
    {
        outputHeight = (long)(floor((float)(inputHeight - (dilationH * (kH - 1) + 1) + 2*padH) / dH)) + 1;
        outputWidth  = (long)(floor((float)(inputWidth  - (dilationW * (kW - 1) + 1) + 2*padW) / dW)) + 1;
    }

    if (padW || padH)
    {
        // ensure that the last pooling starts inside the image
        if ((outputHeight - 1)*dH >= inputHeight + padH)
            --outputHeight;
        if ((outputWidth  - 1)*dW >= inputWidth  + padW)
            --outputWidth;
    }

    /* get contiguous input */
    input = THTensor_(newContiguous)(input);

    /* resize output */
    if (input->nDimension == 3)
    {
        THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
        /* indices will contain the locations for each output point */
        THIndexTensor_(resize3d)(indices,  nInputPlane, outputHeight, outputWidth);

        input_data = THTensor_(data)(input);
        output_data = THTensor_(data)(output);
        indices_data = THIndexTensor_(data)(indices);

        THNN_(SpatialDilatedMaxPooling_updateOutput_frame)
        (input_data, output_data,
         indices_data,
         nInputPlane,
         inputWidth, inputHeight,
         outputWidth, outputHeight,
         kW, kH, dW, dH,
         padW, padH,
         dilationW, dilationH
        );
    }
    else
    {
        long p;

        THTensor_(resize4d)(output, nbatch, nInputPlane, outputHeight, outputWidth);
        /* indices will contain the locations for each output point */
        THIndexTensor_(resize4d)(indices, nbatch, nInputPlane, outputHeight, outputWidth);

        input_data = THTensor_(data)(input);
        output_data = THTensor_(data)(output);
        indices_data = THIndexTensor_(data)(indices);

        #pragma omp parallel for private(p)
        for (p = 0; p < nbatch; p++)
        {
            THNN_(SpatialDilatedMaxPooling_updateOutput_frame)
            (input_data+p*nInputPlane*inputWidth*inputHeight,
             output_data+p*nInputPlane*outputWidth*outputHeight,
             indices_data+p*nInputPlane*outputWidth*outputHeight,
             nInputPlane,
             inputWidth, inputHeight,
             outputWidth, outputHeight,
             kW, kH, dW, dH,
             padW, padH,
             dilationW, dilationH
            );
        }
    }

    /* cleanup */
    THTensor_(free)(input);
}
void THNN_(SpatialDilatedMaxPooling_updateGradInput)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradInput,
          THIndexTensor *indices,
          int kW,
          int kH,
          int dW,
          int dH,
          int padW,
          int padH,
          int dilationW,
          int dilationH,
          bool ceil_mode)
{
  int dimw = 2;
  int dimh = 1;
  int64_t nbatch = 1;
  int nInputPlane;
  int inputHeight;
  int inputWidth;
  int outputHeight;
  int outputWidth;
  scalar_t *gradInput_data;
  scalar_t *gradOutput_data;
  THIndex_t *indices_data;

  THNN_(SpatialDilatedMaxPooling_shapeCheck)
    (input, gradOutput, indices, kH, kW, dH, dW,
     padH, padW, dilationH, dilationW, ceil_mode);

  /* get contiguous gradOutput */
  gradOutput = THTensor_(newContiguous)(gradOutput);

  /* resize */
  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  if (input->dim() == 4) {
    nbatch = input->size(0);
    dimw++;
    dimh++;
  }

  /* sizes */
  nInputPlane = input->size(dimh-1);
  inputHeight = input->size(dimh);
  inputWidth = input->size(dimw);
  outputHeight = gradOutput->size(dimh);
  outputWidth = gradOutput->size(dimw);

  /* get raw pointers */
  gradInput_data = gradInput->data<scalar_t>();
  gradOutput_data = gradOutput->data<scalar_t>();
  indices_data = THIndexTensor_(data)(indices);

  /* backprop */
  if (input->dim() == 3)
  {
    THNN_(SpatialDilatedMaxPooling_updateGradInput_frame)
      (gradInput_data, gradOutput_data,
       indices_data,
       nInputPlane,
       inputWidth, inputHeight,
       outputWidth, outputHeight,
       dW, dH);
  }
  else
  {
    at::parallel_for(0, nbatch, 0, [&](int64_t start, int64_t end) {
      for (auto p = start; p < end; p++)
      {
        THNN_(SpatialDilatedMaxPooling_updateGradInput_frame)
          (gradInput_data+p*nInputPlane*inputWidth*inputHeight,
           gradOutput_data+p*nInputPlane*outputWidth*outputHeight,
           indices_data+p*nInputPlane*outputWidth*outputHeight,
           nInputPlane,
           inputWidth, inputHeight,
           outputWidth, outputHeight,
           dW, dH);
      }
    });
  }

  /* cleanup */
  c10::raw::intrusive_ptr::decref(gradOutput);
}
void THNN_(SpatialDilatedMaxPooling_updateOutput)(
          THNNState *state,
          THTensor *input,
          THTensor *output,
          THIndexTensor *indices,
          int kW,
          int kH,
          int dW,
          int dH,
          int padW,
          int padH,
          int dilationW,
          int dilationH,
          bool ceil_mode)
{

  int dimw = 2;
  int dimh = 1;
  int64_t nbatch = 1;
  int64_t nInputPlane;
  int64_t inputHeight;
  int64_t inputWidth;
  int64_t outputHeight;
  int64_t outputWidth;
  scalar_t *input_data;
  scalar_t *output_data;
  THIndex_t *indices_data;

  THNN_(SpatialDilatedMaxPooling_shapeCheck)
    (input, NULL, NULL, kH, kW, dH, dW,
     padH, padW, dilationH, dilationW, ceil_mode);

  if (input->dim() == 4)
  {
    nbatch = input->size(0);
    dimw++;
    dimh++;
  }

  /* sizes */
  nInputPlane = input->size(dimh-1);
  inputHeight = input->size(dimh);
  inputWidth = input->size(dimw);
  outputHeight = pooling_output_shape<int64_t>(inputHeight, kH, padH, dH, dilationH, ceil_mode);
  outputWidth = pooling_output_shape<int64_t>(inputWidth, kW, padW, dW, dilationW, ceil_mode);

  /* get contiguous input */
  input = THTensor_(newContiguous)(input);

  /* resize output */
  if (input->dim() == 3)
  {
    THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
    /* indices will contain the locations for each output point */
    THIndexTensor_(resize3d)(indices,  nInputPlane, outputHeight, outputWidth);

    input_data = input->data<scalar_t>();
    output_data = output->data<scalar_t>();
    indices_data = THIndexTensor_(data)(indices);

    THNN_(SpatialDilatedMaxPooling_updateOutput_frame)
      (input_data, output_data,
       indices_data,
       nInputPlane,
       inputWidth, inputHeight,
       outputWidth, outputHeight,
       kW, kH, dW, dH,
       padW, padH,
       dilationW, dilationH
       );
  }
  else
  {
    THTensor_(resize4d)(output, nbatch, nInputPlane, outputHeight, outputWidth);
    /* indices will contain the locations for each output point */
    THIndexTensor_(resize4d)(indices, nbatch, nInputPlane, outputHeight, outputWidth);

    input_data = input->data<scalar_t>();
    output_data = output->data<scalar_t>();
    indices_data = THIndexTensor_(data)(indices);

    at::parallel_for(0, nbatch, 0, [&](int64_t start, int64_t end) {
      for (auto p = start; p < end; p++)
      {
        THNN_(SpatialDilatedMaxPooling_updateOutput_frame)
          (input_data+p*nInputPlane*inputWidth*inputHeight,
           output_data+p*nInputPlane*outputWidth*outputHeight,
           indices_data+p*nInputPlane*outputWidth*outputHeight,
           nInputPlane,
           inputWidth, inputHeight,
           outputWidth, outputHeight,
           kW, kH, dW, dH,
           padW, padH,
           dilationW, dilationH
           );
      }
    });
  }

  /* cleanup */
  c10::raw::intrusive_ptr::decref(input);
}
예제 #16
0
void THNN_(ClassNLLCriterion_updateGradInput)(THNNState *state, THTensor *input, THIndexTensor *target, THTensor *gradInput, bool sizeAverage, THTensor *weights, THTensor *total_weight)
{
  int n_dims = THTensor_(nDimension)(input);
  int n_classes = THTensor_(size)(input, n_dims - 1);

  if (!THTensor_(isContiguous)(gradInput)) {
    THError("gradInput must be contiguous");
  }

  real *total_weight_data = THTensor_(data)(total_weight);

  if (!(*total_weight_data > 0)) {
    return;
  }

  if (THIndexTensor_(nDimension)(target) > 1) {
    THError("multi-target not supported");
  }

  if (THTensor_(nDimension)(input) > 2) {
    THError("input tensor should be 1D or 2D");
  }

  target = THIndexTensor_(newContiguous)(target);
  weights = weights ? THTensor_(newContiguous)(weights) : NULL;

  THIndex_t *target_data = THIndexTensor_(data)(target);
  real *weights_data = weights ? THTensor_(data)(weights) : NULL;
  real *gradInput_data = THTensor_(data)(gradInput);

  if (THTensor_(nDimension)(input) == 1) {
    int cur_target = target_data[0] - 1;
    THAssert(cur_target >= 0 && cur_target < n_classes);

    gradInput_data[cur_target] =
      (!sizeAverage && weights) ? -weights_data[cur_target] : -1;

  } else if (THTensor_(nDimension)(input) == 2) {
    int batch_size = THTensor_(size)(input, 0);
    int n_target = THTensor_(size)(input, 1);

    int i;
    for (i = 0; i < batch_size; i++){
      int cur_target = target_data[i] - 1;

      THAssert(cur_target >= 0 && cur_target < n_classes);

      gradInput_data[i * n_target + cur_target] =
        -(weights ? weights_data[cur_target] : 1.0f);

      if (sizeAverage && *total_weight_data) {
        gradInput_data[i * n_target + cur_target] /= *total_weight_data;
      }
    }
  }

  THIndexTensor_(free)(target);
  if (weights) {
    THTensor_(free)(weights);
  }
}
예제 #17
0
void THNN_(VolumetricAdaptiveMaxPooling_updateOutput)(
          THNNState *state,
          THTensor *input,
          THTensor *output,
          THIndexTensor *indices,
          int osizeT,
          int osizeW,
          int osizeH)
{
  int dimD = 0;
  int dimT = 1;
  int dimH = 2;
  int dimW = 3;
  int64_t sizeB = 1;
  int64_t sizeD = 0;
  int64_t isizeT = 0;
  int64_t isizeH = 0;
  int64_t isizeW = 0;

  int64_t istrideB = 0;
  int64_t istrideD = 0;
  int64_t istrideT = 0;
  int64_t istrideH = 0;
  int64_t istrideW = 0;

  real *input_data = nullptr;
  real *output_data = nullptr;
  THIndex_t *indices_data = nullptr;

  THNN_ARGCHECK(!input->is_empty() && (input->dim() == 4 || input->dim() == 5), 2, input,
    "non-empty 4D or 5D (batch mode) tensor expected for input, but got: %s");

  if (input->dim() == 5)
  {
    istrideB = input->stride[0];
    sizeB = input->size[0];
    dimD++;
    dimT++;
    dimH++;
    dimW++;
  }

  /* sizes */
  sizeD  = input->size[dimD];
  isizeT = input->size[dimT];
  isizeH = input->size[dimH];
  isizeW = input->size[dimW];
  /* strides */
  istrideD = input->stride[dimD];
  istrideT = input->stride[dimT];
  istrideH = input->stride[dimH];
  istrideW = input->stride[dimW];

  /* resize output */
  if (input->dim() == 4)
  {
    THTensor_(resize4d)(output, sizeD, osizeT, osizeH, osizeW);
    /* indices will contain max input locations for each output point */
    THIndexTensor_(resize4d)(indices, sizeD, osizeT, osizeH, osizeW);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THIndexTensor_(data)(indices);

    THNN_(VolumetricAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data,
                                                      indices_data,
                                                      sizeD,
                                                      isizeT, isizeH, isizeW,
                                                      osizeT, osizeH, osizeW,
                                                      istrideD, istrideT,
                                                      istrideH, istrideW);
  }
  else
  {
    int64_t b;

    THTensor_(resize5d)(output, sizeB, sizeD, osizeT, osizeH, osizeW);
    /* indices will contain max input locations for each output point */
    THIndexTensor_(resize5d)(indices, sizeB, sizeD, osizeT, osizeH, osizeW);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THIndexTensor_(data)(indices);

#pragma omp parallel for private(b)
    for (b = 0; b < sizeB; b++)
    {
      THNN_(VolumetricAdaptiveMaxPooling_updateOutput_frame)(input_data+b*istrideB, output_data+b*sizeD*osizeT*osizeH*osizeW,
                                                        indices_data+b*sizeD*osizeT*osizeH*osizeW,
                                                        sizeD,
                                                        isizeT, isizeH, isizeW,
                                                        osizeT, osizeH, osizeW,
                                                        istrideD, istrideT,
                                                        istrideH, istrideW);
    }
  }
}
void THNN_(VolumetricDilatedMaxPooling_updateOutput)(
          THNNState *state,
          THTensor *input,
          THTensor *output,
          THIndexTensor *indices,
          int kT,
          int kW,
          int kH,
          int dT,
          int dW,
          int dH,
          int pT,
          int pW,
          int pH,
          int dilationT,
          int dilationW,
          int dilationH,
          bool ceilMode)
{
  int64_t nslices;
  int64_t itime;
  int64_t iheight;
  int64_t iwidth;
  int64_t otime;
  int64_t oheight;
  int64_t owidth;
  real *input_data;
  real *output_data;
  THIndex_t *indices_data;


  int dimN = 0;
  int dimt = 1;
  int dimh = 2;
  int dimw = 3;

  if (input->nDimension == 5)
  {
    dimN++;
    dimt++;
    dimh++;
    dimw++;
  }

  THNN_(VolumetricDilatedMaxPooling_shapeCheck)(
        state, input, NULL, NULL,
        kT,  kW,  kH, dT,  dW,  dH,
        pT,  pW,  pH, dilationT,  dilationW,  dilationH,
        ceilMode);

  /* sizes */
  nslices = input->size[dimN];
  itime   = input->size[dimt];
  iheight = input->size[dimh];
  iwidth  = input->size[dimw];
  if (ceilMode)
  {
    otime = (int)(ceil((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1;
    oheight = (int)(ceil((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1;
    owidth  = (int)(ceil((float)(iwidth  - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1;
  }
  else
  {
    otime = (int)(floor((float)(itime - (dilationT * (kT - 1) + 1) + 2*pT) / dT)) + 1;
    oheight = (int)(floor((float)(iheight - (dilationH * (kH - 1) + 1) + 2*pH) / dH)) + 1;
    owidth  = (int)(floor((float)(iwidth  - (dilationW * (kW - 1) + 1) + 2*pW) / dW)) + 1;
  }

  if (pT || pW || pH)
  {
    // ensure that the last pooling starts inside the image
    if ((otime - 1)*dT >= itime + pT)
      --otime;
    if ((oheight - 1)*dH >= iheight + pH)
      --oheight;
    if ((owidth  - 1)*dW >= iwidth  + pW)
      --owidth;
  }

  /* get contiguous input */
  input = THTensor_(newContiguous)(input);

  if (input->nDimension == 4) /* non-batch mode */
  {
    /* resize output */
    THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
    /* indices will contain ti,i,j uchar locations packed into float/double */
    THIndexTensor_(resize4d)(indices, nslices, otime, oheight, owidth);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THIndexTensor_(data)(indices);

    THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)(
      input_data, output_data,
      indices_data,
      nslices,
      itime, iwidth, iheight,
      otime, owidth, oheight,
      kT, kW, kH,
      dT, dW, dH,
      pT, pW, pH,
      dilationT, dilationW, dilationH
    );
  }
  else /* batch mode */
  {
    int64_t p;
    int64_t nBatch = input->size[0];

    int64_t istride = nslices * itime * iwidth * iheight;
    int64_t ostride = nslices * otime * owidth * oheight;

    /* resize output */
    THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
    /* indices will contain ti,i,j locations for each output point */
    THIndexTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THIndexTensor_(data)(indices);

#pragma omp parallel for private(p)
    for (p=0; p < nBatch; p++)
    {
      THNN_(VolumetricDilatedMaxPooling_updateOutput_frame)(
        input_data   + p * istride,
        output_data  + p * ostride,
        indices_data + p * ostride,
        nslices,
        itime, iwidth, iheight,
        otime, owidth, oheight,
        kT, kW, kH,
        dT, dW, dH,
        pT, pW, pH,
        dilationT, dilationW, dilationH
      );
    }
  }

  /* cleanup */
  THTensor_(free)(input);
}
예제 #19
0
파일: LookupTable.c 프로젝트: ACB1337/nn
void THNN_(LookupTable_accGradParameters)(
          THNNState *state,
          THIndexTensor *input,
          THTensor *gradOutput,
          THTensor *gradWeight,
          THIntegerTensor *count,
          THTensor *sorted,
          THTensor *indices,
          bool scaleGradByFreq,
          int paddingValue,
          real scale)
{
  long i;
  THInteger_t *count_data = NULL;

  if (scaleGradByFreq)
  {
    THIntegerTensor_(resize1d)(count, gradWeight->size[0]);
    count_data = THIntegerTensor_(data)(count);
  }

  if (!THTensor_(isContiguous)(gradWeight))
    THError("gradWeight must be contiguous");
  if (!THIndexTensor_(isContiguous)(input))
    THError("input must be contiguous");
  if (THIndexTensor_(nDimension)(input) != 1 && THIndexTensor_(nDimension)(input) != 2)
    THError("input must be a vector or matrix");

  THIndex_t *input_data = THIndexTensor_(data)(input);
  long numel = THIndexTensor_(nElement)(input);
  long numw = THTensor_(size)(gradWeight, 0);

  // check that inputs are all within range
  for (i=0; i<numel; i++)
    if (input_data[i] < 1 || input_data[i] > numw)
      THError("input out of range");

  gradOutput = THTensor_(newContiguous)(gradOutput);

  real *gw = THTensor_(data)(gradWeight);
  real *go = THTensor_(data)(gradOutput);
  long stride = THTensor_(stride)(gradWeight, 0);

  if (count_data)
    THNN_(LookupTable_resetCount)(count_data, input);

#ifdef _OPENMP
  if (numel > 1000)
  {
    // The strategy is to parallelize over sections of the vocabulary, so that
    // thread 1 handles updates to gradWeight[0..nVocab/nThreads]. Every thread
    // has to traverse the entire input, but the dominating factor is the axpy
    // BLAS call.
    #pragma omp parallel private(i)
    {
      int tid = omp_get_thread_num();
      int nthreads = omp_get_num_threads();

      long start = tid * (numw/nthreads + 1);
      long end = start + (numw/nthreads + 1);
      for (i=0; i<numel; i++)
      {
        if (input_data[i] != paddingValue)
        {
            long k = input_data[i] - 1;
            if (k >= start && k < end)
            {
                real scale_ = scale;
                if (count_data) scale_ /= count_data[k];
                THBlas_(axpy)(stride, scale_, go + i*stride, 1, gw + k*stride, 1);
            }
        }
      }
    }

    THTensor_(free)(gradOutput);
    return;
  }
#endif

  for (i=0; i<numel; i++)
  {
    if (input_data[i] != paddingValue)
    {
        long k = input_data[i] - 1;
        real scale_ = scale;
        if (count_data) scale_ /= count_data[k];
        THBlas_(axpy)(stride, scale_, go + i*stride, 1, gw + k*stride, 1);
     }
  }

  THTensor_(free)(gradOutput);
}
void THNN_(VolumetricDilatedMaxPooling_updateGradInput)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradInput,
          THIndexTensor *indices,
          int kT,
          int kW,
          int kH,
          int dT,
          int dW,
          int dH,
          int pT,
          int pW,
          int pH,
          int dilationT,
          int dilationW,
          int dilationH,
          bool ceilMode)
{
  int nslices;
  int itime;
  int iheight;
  int iwidth;
  int otime;
  int oheight;
  int owidth;
  real *gradInput_data;
  real *gradOutput_data;
  THIndex_t *indices_data;

  int dimN = 0;
  int dimt = 1;
  int dimh = 2;
  int dimw = 3;

  THNN_(VolumetricDilatedMaxPooling_shapeCheck)(
        state, input, gradOutput, indices,
        kT,  kW,  kH, dT,  dW,  dH,
        pT,  pW,  pH, dilationT,  dilationW,  dilationH,
        ceilMode);

  // TODO: gradOutput shape check
  /* get contiguous gradOutput */
  gradOutput = THTensor_(newContiguous)(gradOutput);

  /* resize */
  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  if (input->nDimension == 5)
  {
    dimN++;
    dimt++;
    dimh++;
    dimw++;
  }

  /* sizes */
  nslices = input->size[dimN];
  itime = input->size[dimt];
  iheight = input->size[dimh];
  iwidth = input->size[dimw];
  otime = gradOutput->size[dimt];
  oheight = gradOutput->size[dimh];
  owidth = gradOutput->size[dimw];

  /* get raw pointers */
  gradInput_data = THTensor_(data)(gradInput);
  gradOutput_data = THTensor_(data)(gradOutput);
  indices_data = THIndexTensor_(data)(indices);

  /* backprop */
  if (input->nDimension == 4) /* non-batch mode*/
  {
    THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)(
      gradInput_data, gradOutput_data,
      indices_data,
      nslices,
      itime, iwidth, iheight,
      otime, owidth, oheight,
      dT, dW, dH,
      pT, pW, pH,
      dilationT, dilationW, dilationH
    );
  }
  else /* batch mode */
  {
    int64_t p;
    int64_t nBatch = input->size[0];

    int64_t istride = nslices * itime * iwidth * iheight;
    int64_t ostride = nslices * otime * owidth * oheight;

#pragma omp parallel for private(p)
    for (p = 0; p < nBatch; p++)
    {
      THNN_(VolumetricDilatedMaxPooling_updateGradInput_frame)(
        gradInput_data + p * istride,
        gradOutput_data + p * ostride,
        indices_data + p * ostride,
        nslices,
        itime, iwidth, iheight,
        otime, owidth, oheight,
        dT, dW, dH,
        pT, pW, pH,
        dilationT, dilationW, dilationH
      );
    }
  }

  /* cleanup */
  THTensor_(free)(gradOutput);
}
예제 #21
0
void THNN_(SpatialMaxUnpooling_updateOutput)(
    THNNState *state,
    THTensor *input,
    THTensor *output,
    THIndexTensor *indices,
    int owidth, int oheight)
{
  int dimw = 2;
  int dimh = 1;
  int nbatch = 1;
  int nslices;
  int iheight;
  int iwidth;
  scalar_t *input_data;
  scalar_t *output_data;
  THIndex_t *indices_data;


  AT_CHECK(!input->is_empty() && (input->dim() == 3 || input->dim() == 4),
           "non-empty 3D or 4D (batch mode) tensor expected for input, but got sizes: ", input->sizes());
  THNN_CHECK_SHAPE_INDICES(input, indices);

  if (input->dim() == 4)
  {
    nbatch = input->size(0);
    dimw++;
    dimh++;
  }

  /* sizes */
  nslices = input->size(dimh-1);
  iheight = input->size(dimh);
  iwidth = input->size(dimw);

  /* get contiguous input and indices */
  input = THTensor_(newContiguous)(input);
  indices = THIndexTensor_(newContiguous)(indices);

  /* resize output */
  if (input->dim() == 3)
  {
    THTensor_(resize3d)(output, nslices, oheight, owidth);
    THTensor_(zero)(output);

    input_data = input->data<scalar_t>();
    output_data = output->data<scalar_t>();
    indices_data = THIndexTensor_(data)(indices);

    THNN_(SpatialMaxUnpooling_updateOutput_frame)(input_data, output_data,
                                              indices_data,
                                              nslices,
                                              iwidth, iheight,
                                              owidth, oheight);
  }
  else
  {
    int p;

    THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
    THTensor_(zero)(output);

    input_data = input->data<scalar_t>();
    output_data = output->data<scalar_t>();
    indices_data = THIndexTensor_(data)(indices);

    for (p = 0; p < nbatch; p++)
    {
      THNN_(SpatialMaxUnpooling_updateOutput_frame)(
                                                    input_data+p*nslices*iwidth*iheight,
                                                    output_data+p*nslices*owidth*oheight,
                                                    indices_data+p*nslices*iwidth*iheight,
                                                    nslices,
                                                    iwidth, iheight,
                                                    owidth, oheight);
    }
  }

  /* cleanup */
  c10::raw::intrusive_ptr::decref(input);
  THIndexTensor_(free)(indices);
}
예제 #22
0
void THNN_(LookupTable_renorm)(
          THNNState *state,
          THIndexTensor *idx,
          THTensor *weight,
          accreal maxNorm_,
          accreal normType_)
{
  real maxNorm = TH_CONVERT_ACCREAL_TO_REAL(maxNorm_);
  real normType = TH_CONVERT_ACCREAL_TO_REAL(normType_);
  if (!THTensor_(isContiguous)(weight))
    THError("weight must be contiguous");
  if (!THIndexTensor_(isContiguous)(idx))
    THError("input must be contiguous");
  if (THIndexTensor_(nDimension)(idx) != 1)
    THError("idx must be a vector");
  if (normType <= 0)
    THError("non-positive-norm not supported");

  ptrdiff_t i;
  THIndex_t *row_idx = THIndexTensor_(data)(idx);
  ptrdiff_t numel = THIndexTensor_(nElement)(idx);

  long numw = THTensor_(size)(weight, 0);
  long stride = THTensor_(stride)(weight, 0);
  real *gw = THTensor_(data)(weight);
  for (i=0; i<numel; i++) {
    if (row_idx[i] < TH_INDEX_BASE || row_idx[i] >= numw + TH_INDEX_BASE) {
      THError("input need to be in the range %ld <= input < %ld, "
	      "but got input of value: %ld", TH_INDEX_BASE, (numw + TH_INDEX_BASE),
	      row_idx[i]);
    }
  }
  // get unique indices
  qsort(row_idx, numel, sizeof(THIndex_t), THNN_(compare_THIndex));
  ptrdiff_t ptr = 0;
  for (i=0; i<numel; i++)
    if (i == 0 || row_idx[i] != row_idx[i-1])
      row_idx[ptr++] = row_idx[i];
  numel = ptr;

#ifdef _OPENMP
  if (numel > 1000)
  {
    // The strategy is to parallelize over the rows that appear in
    // row_idx, so that thread 1 handles the rows in row_idx[0..numel/nThreads].
    // This distributes the work evenly to each thread.
    #pragma omp parallel for private(i)
    for (i=0; i<numel; i++)
    {
      long k = row_idx[i] - TH_INDEX_BASE;
      THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
    }
    return;
  }
#endif
  for (i=0; i<numel; i++)
  {
    long k = row_idx[i] - TH_INDEX_BASE;
    THNN_(LookupTable_renormRow)(gw + k*stride, stride, maxNorm, normType);
  }
}
예제 #23
0
void THNN_(MultiLabelMarginCriterion_updateGradInput)(
          THNNState *state,
          THTensor *input,
          THIndexTensor *target,
          THTensor *gradInput,
          THTensor *isTarget,
          bool sizeAverage)
{
  real *input_data;
  real *gradInput_data;
  THIndex_t *target_data;
  real *isTarget_data;
  long nframe, dim;
  long t, d, dt;
  real g;

  THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2,
	     "vector or matrix expected");

  if (input->nDimension == 1)
  {
    nframe = 1;
    dim = input->size[0];
    THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3,
	       "inconsistent target size");
    THArgCheck((isTarget->nDimension == 1) && (isTarget->size[0] == dim), 3,
	       "inconsistent isTarget size");
  }
  else
  {
    nframe = input->size[0];
    dim = input->size[1];
    THArgCheck((target->nDimension == 2) && (target->size[0] == nframe)
	       && (target->size[1] == dim), 3, "inconsistent target size");
    THArgCheck((isTarget->nDimension == 2) && (isTarget->size[0] == nframe)
	       && (isTarget->size[1] == dim), 3, "inconsistent isTarget size");
  }

  THArgCheck(THIndexTensor_(minall)(target) >= 0, 3, "target out of range");
  THArgCheck(THIndexTensor_(maxall)(target) <= dim, 3, "target out of range");

  THArgCheck(THTensor_(minall)(isTarget) >= 0, 3, "isTarget out of range");
  THArgCheck(THTensor_(maxall)(isTarget) <= 1, 3, "isTarget out of range");

  target = THIndexTensor_(newContiguous)(target);
  input = THTensor_(newContiguous)(input);
  isTarget = THTensor_(newContiguous)(isTarget);
  input_data = THTensor_(data)(input);
  target_data = THIndexTensor_(data)(target);
  isTarget_data = THTensor_(data)(isTarget);

  g = sizeAverage ? ( 1./((real)(nframe*dim)) ) : ( 1./((real)dim) );

  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);
  gradInput_data = THTensor_(data)(gradInput);

  for (t = 0; t < nframe; t++)
  {
    for (dt = 0; dt < dim; dt++)
    {
      THIndex_t target_idx = target_data[dt] - TH_INDEX_BASE;
      real input_target;
      if (target_idx < 0)
        break;

      input_target = input_data[target_idx];
      for (d = 0; d < dim; d++)
      {
        if (!isTarget_data[d])
        {
          real z = 1 - input_target + input_data[d];
          if (z > 0)
          {
            gradInput_data[target_idx] -= g;
            gradInput_data[d] += g;
          }
        }
      }
    }
    input_data += dim;
    target_data += dim;
    isTarget_data += dim;
    gradInput_data += dim;
  }

  THTensor_(free)(input);
  THIndexTensor_(free)(target);
  THTensor_(free)(isTarget);
}
void THNN_(SpatialAdaptiveMaxPooling_updateOutput)(
          THNNState *state,
          THTensor *input,
          THTensor *output,
          THIndexTensor *indices,
          int osizeW,
          int osizeH)
{
  int dimW = 2;
  int dimH = 1;
  int64_t sizeB = 1;
  int64_t sizeD = 0;
  int64_t isizeH = 0;
  int64_t isizeW = 0;

  int64_t istrideD = 0;
  int64_t istrideH = 0;
  int64_t istrideW = 0;
  int64_t istrideB = 0;

  real *input_data = nullptr;
  real *output_data = nullptr;
  THIndex_t *indices_data = nullptr;


  THNN_ARGCHECK(input->nDimension == 3 || input->nDimension == 4, 2, input,
		"3D or 4D (batch mode) tensor expected for input, but got: %s");

  if (input->nDimension == 4)
  {
    istrideB = input->stride[0];
    sizeB = input->size[0];
    dimW++;
    dimH++;
  }

  /* sizes */
  sizeD  = input->size[dimH-1];
  isizeH = input->size[dimH];
  isizeW = input->size[dimW];
  /* strides */
  istrideD = input->stride[dimH-1];
  istrideH = input->stride[dimH];
  istrideW = input->stride[dimW];

  /* resize output */
  if (input->nDimension == 3)
  {
    THTensor_(resize3d)(output, sizeD, osizeH, osizeW);
    /* indices will contain i,j locations for each output point */
    THIndexTensor_(resize3d)(indices, sizeD, osizeH, osizeW);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THIndexTensor_(data)(indices);

    THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data,
                                                      indices_data,
                                                      sizeD,
                                                      isizeH, isizeW,
                                                      osizeH, osizeW,
                                                      istrideD,
                                                      istrideH, istrideW);
  }
  else
  {
    int64_t b;

    THTensor_(resize4d)(output, sizeB, sizeD, osizeH, osizeW);
    /* indices will contain i,j locations for each output point */
    THIndexTensor_(resize4d)(indices, sizeB, sizeD, osizeH, osizeW);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THIndexTensor_(data)(indices);

#pragma omp parallel for private(b)
    for (b = 0; b < sizeB; b++)
    {
      THNN_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+b*istrideB, output_data+b*sizeD*osizeH*osizeW,
                                                        indices_data+b*sizeD*osizeH*osizeW,
                                                        sizeD,
                                                        isizeH, isizeW,
                                                        osizeH, osizeW,
                                                        istrideD,
                                                        istrideH, istrideW);
    }
  }
}