Esempio n. 1
0
void THNN_(SoftMax_updateGradInput)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradInput,
          THTensor *output,
          int64_t dim)
{
  THNN_CHECK_SHAPE(output, gradOutput);
  THArgCheck(dim >= 0 && dim < output->nDimension, 6,
	     "dim out of range (got %d, but input has %d dims)", dim, output->nDimension);

  uint64_t outer_size = 1;
  uint64_t dim_size = output->size[dim];
  uint64_t inner_size = 1;
  for (int64_t i = 0; i < dim; ++i)
    outer_size *= output->size[i];
  for (int64_t i = dim + 1; i < output->nDimension; ++i)
    inner_size *= output->size[i];

  gradOutput = THTensor_(newContiguous)(gradOutput);
  output = THTensor_(newContiguous)(output);
  THTensor_(resizeAs)(gradInput, output);

  real *gradInput_data_base  = THTensor_(data)(gradInput);
  real *output_data_base     = THTensor_(data)(output);
  real *gradOutput_data_base = THTensor_(data)(gradOutput);

  uint64_t dim_stride = inner_size;
  uint64_t outer_stride = dim_size * dim_stride;

  SOFTMAX_SIZE_TYPE i, d;

#pragma omp parallel for private(i, d)
  for (i = 0; i < SOFTMAX_CAST_TYPE (outer_size * inner_size); i++)
  {
    uint64_t outer_idx = i / inner_size;
    uint64_t inner_idx = i % inner_size;
    real *gradInput_data  = gradInput_data_base  + outer_idx * outer_stride + inner_idx;
    real *output_data     = output_data_base     + outer_idx * outer_stride + inner_idx;
    real *gradOutput_data = gradOutput_data_base + outer_idx * outer_stride + inner_idx;

    accreal sum = 0;
    for (d = 0; d < SOFTMAX_CAST_TYPE dim_size; d++)
      sum += ((accreal)gradOutput_data[d * dim_stride]) * ((accreal)output_data[d * dim_stride]);

    for (d = 0; d < SOFTMAX_CAST_TYPE dim_size; d++)
      gradInput_data[d * dim_stride] = output_data[d * dim_stride] * (gradOutput_data[d * dim_stride] - sum);
  }

  THTensor_(free)(gradOutput);
  THTensor_(free)(output);
}
Esempio n. 2
0
void THNN_(SmoothL1Criterion_updateOutput)(
          THNNState *state,
          THTensor *input,
          THTensor *target,
          THTensor *output,
          bool sizeAverage,
          bool reduce)
{
  THNN_CHECK_SHAPE(input, target);

  if (!reduce) {
    THTensor_(resizeAs)(output, input);
    TH_TENSOR_APPLY3(real, input, real, target, real, output,
      real z = fabs(*input_data - *target_data);
      *output_data = z < 1 ? 0.5 * z * z : z - 0.5;
    );
Esempio n. 3
0
void THNN_(SoftMax_updateGradInput)(
          THNNState *state,
          THTensor *input,
          THTensor *gradOutput,
          THTensor *gradInput,
          THTensor *output)
{
  THNN_CHECK_SHAPE(input, gradOutput);  
  real *gradInput_data, *gradOutput_data, *output_data;
  ptrdiff_t nframe = 0, dim = 0, stride = 0;
  ptrdiff_t t;

  if (output->nDimension == 1)
  {
    nframe = 1;
    dim = output->size[0];
    stride = 1;
  }
  else if (output->nDimension == 2)
  {
    nframe = output->size[0];
    dim = output->size[1];
    stride = 1;
  }
  else if (output->nDimension == 3)
  {
    nframe = 1;
    dim = output->size[0];
    stride = output->size[1]*output->size[2];
  }
  else if (output->nDimension == 4)
  {
    nframe = output->size[0];
    dim = output->size[1];
    stride = output->size[2]*output->size[3];
  }
  else
  {
    THError("1D, 2D, 3D or 4D tensor expected");
  }

  gradOutput = THTensor_(newContiguous)(gradOutput);
  output = THTensor_(newContiguous)(output);

  THTensor_(resizeAs)(gradInput, output);
  gradInput_data = THTensor_(data)(gradInput);
  output_data = THTensor_(data)(output);
  gradOutput_data = THTensor_(data)(gradOutput);

#pragma omp parallel for private(t)
  for (t = 0; t < stride*nframe; t++)
  {
    real *gradInput_ptr = gradInput_data + (t/stride)*dim*stride + t % stride;
    real *output_ptr = output_data + (t/stride)*dim*stride + t % stride;
    real *gradOutput_ptr = gradOutput_data + (t/stride)*dim*stride + t % stride;

    ptrdiff_t d;
    accreal sum = 0;
    for (d = 0; d < dim; d++)
      sum += (accreal)gradOutput_ptr[d*stride] * output_ptr[d*stride];

    for (d = 0; d < dim; d++)
      gradInput_ptr[d*stride] = output_ptr[d*stride] * (gradOutput_ptr[d*stride] - sum);
  }

  THTensor_(free)(gradOutput);
  THTensor_(free)(output);
}