Esempio n. 1
0
static int nn_(SparseLinear_updateOutput)(lua_State *L)
{
  long i;
  THTensor * input = luaT_checkudata(L, 2, torch_(Tensor_id));
  THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
  THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
  THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
  long dim = weight->size[0]; /* number of weights.. */

  THTensor_(copy)(output, bias);
  for(i = 0; i < input->size[1]; i++)
  {
    long offset = (long)(THTensor_(get2d)(input, 0, i))-1;
    
    if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */
    {
      real val = THTensor_(get2d)(input, 1, i);
      THBlas_(axpy)(output->size[0], 
                    val, 
                    THTensor_(data)(weight)+offset*weight->stride[0],
                    weight->stride[1], 
                    THTensor_(data)(output), 
                    output->stride[0]);
    }
    else
      luaL_error(L, "index out of bound");
  }
  return 1;
}
Esempio n. 2
0
static int nn_(Min_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1;
  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

  THLongStorage *dim;
  long i;

  luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range");

  dim = THLongStorage_newWithSize(input->nDimension);
  for(i = 0; i < input->nDimension; i++)
    dim->data[i] = input->size[i];
  dim->data[dimension] = 1;
  THTensor_(resize)(output, dim, NULL);
  THTensor_(resize)(indices, dim, NULL);
  THLongStorage_free(dim);

  TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension,
                       long theIndex = 0;
                       real theMin = input_data[0];
                       for(i = 1; i < input_size; i++)
                       {
                         if(input_data[i*input_stride] < theMin)
                         {
                           theIndex = i;
                           theMin = input_data[i*input_stride];
                         }
                       }
                       *indices_data = theIndex+1;
                       *output_data = theMin;)

  if(output->nDimension > 1)
int nn_(SparseLinear_updateParameters)(lua_State *L)
{
  long i;
  real learningRate = luaL_checknumber(L, 2);
  THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
  THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
  THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);
  THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
  THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_Tensor);
  
  long dim = weight->size[0]; /* number of weights.. */
  THTensor_(cadd)(bias, bias, -learningRate, gradBias);
  
  for(i = 0; i < lastInput->size[1]; i++) 
  {
    long offset = (long)(THTensor_(get2d)(lastInput, 0, i))-1;
    
    if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */
    {
      THBlas_(axpy)(bias->size[0], 
                    -learningRate, 
                    THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], 
                    gradWeight->stride[1], 
                    THTensor_(data)(weight)+offset*weight->stride[0], 
                    weight->stride[1]);
    }
    else
      luaL_error(L, "index out of bound");
  }
  return 0;
}
Esempio n. 4
0
static int nn_(SpatialConvolution_updateGradInput)(lua_State *L)
{
    THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
    THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
    int dW = luaT_getfieldcheckint(L, 1, "dW");
    int dH = luaT_getfieldcheckint(L, 1, "dH");
    int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");

    THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
    THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);

    THTensor *tweight;

    THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );

    /* gradient to input */
    tweight = THTensor_(newTranspose)(weight,0,1);

    if (input->nDimension == 3)
    {
        THTensor_(conv2Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dH, dW, "F","C");
    }
    else
    {
        THTensor_(conv2Dmm)(gradInput, 0.0, 1.0, gradOutput, tweight, dH, dW, "F","C");
    }
    THTensor_(free)(tweight);
    return 1;
}
Esempio n. 5
0
static int nn_(SpatialConvolutionLocal_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  long inputWidth = luaT_getfieldcheckint(L, 1, "iW");
  long inputHeight = luaT_getfieldcheckint(L, 1, "iH");
  long outputWidth = luaT_getfieldcheckint(L, 1, "oW");
  long outputHeight = luaT_getfieldcheckint(L, 1, "oH");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int padW = luaT_getfieldcheckint(L, 1, "padW");
  int padH = luaT_getfieldcheckint(L, 1, "padH");
  long nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
  long nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); 

  THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); 
  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

  if(input->nDimension == 3)
  {
    THTensor_(resize2d)(finput, kW*kH*nInputPlane, outputHeight*outputWidth);
    THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);

    nn_(SpatialConvolutionLocal_updateOutput_frame)(input, output, weight, bias, finput,
                                                 kW, kH, dW, dH, padW, padH,
                                                 nInputPlane, inputWidth, inputHeight,
                                                 nOutputPlane, outputWidth, outputHeight);
  }
  else
  {
    long T = input->size[0];
    long t;

    THTensor_(resize3d)(finput, T, kW*kH*nInputPlane, outputHeight*outputWidth);
    THTensor_(resize4d)(output, T, nOutputPlane, outputHeight, outputWidth);

#pragma omp parallel for private(t)
    for(t = 0; t < T; t++)
    {
      THTensor *input_t = THTensor_(newSelect)(input, 0, t);
      THTensor *output_t = THTensor_(newSelect)(output, 0, t);
      THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);

      nn_(SpatialConvolutionLocal_updateOutput_frame)(input_t, output_t, weight, bias, finput_t,
                                                   kW, kH, dW, dH, padW, padH,
                                                   nInputPlane, inputWidth, inputHeight,
                                                   nOutputPlane, outputWidth, outputHeight);

      THTensor_(free)(input_t);
      THTensor_(free)(output_t);
      THTensor_(free)(finput_t);
    }
  }

  return 1;
}
Esempio n. 6
0
static int nn_(SpatialConvolutionLocal_updateGradInput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  long inputWidth = luaT_getfieldcheckint(L, 1, "iW");
  long inputHeight = luaT_getfieldcheckint(L, 1, "iH");
  long outputWidth = luaT_getfieldcheckint(L, 1, "oW");
  long outputHeight = luaT_getfieldcheckint(L, 1, "oH");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int padW = luaT_getfieldcheckint(L, 1, "padW");
  int padH = luaT_getfieldcheckint(L, 1, "padH");
  long nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
  long nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");

  THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
  THTensor *fgradInput = luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor);
  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);

  THTensor_(resizeAs)(gradInput, input);
  THTensor_(resizeAs)(fgradInput, finput);
  THTensor_(transpose)(weight, weight, 1, 2);

  if(input->nDimension == 3)
  {
    nn_(SpatialConvolutionLocal_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH, dW, dH, padW, padH, 
                                                       nInputPlane, inputWidth, inputHeight,
                                                       nOutputPlane, outputWidth, outputHeight);
  }
  else
  {
    long T = input->size[0];
    long t;

#pragma omp parallel for private(t)
    for(t = 0; t < T; t++)
    {
      THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
      THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);

      nn_(SpatialConvolutionLocal_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH, dW, dH, padW, padH, 
                                                         nInputPlane, inputWidth, inputHeight,
                                                         nOutputPlane, outputWidth, outputHeight);

      THTensor_(free)(gradInput_t);
      THTensor_(free)(gradOutput_t);
      THTensor_(free)(fgradInput_t);
    }
  }

  THTensor_(transpose)(weight, weight, 1, 2);

  return 1;
}
Esempio n. 7
0
static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);

  // get contiguous gradOutput
  gradOutput = THTensor_(newContiguous)(gradOutput);

  // resize
  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  // sizes
  int ichannels = input->size[0];
  int iheight = input->size[1];
  int iwidth = input->size[2];
  int ochannels = ichannels;
  int oheight = gradOutput->size[1];
  int owidth = gradOutput->size[2];

  // get raw pointers
  real *gradInput_data = THTensor_(data)(gradInput);
  real *gradOutput_data = THTensor_(data)(gradOutput);
  real *indices_data = THTensor_(data)(indices);

  // backprop
  long k;
  for (k = 0; k < input->size[0]; k++) {
    // pointers to slices
    real *gradOutput_p = gradOutput_data + k*owidth*oheight;
    real *gradInput_p = gradInput_data + k*iwidth*iheight;
    real *indy_p = indices_data + k*owidth*oheight;
    real *indx_p = indices_data + (k+ochannels)*owidth*oheight;

    // calculate max points
    int i,j;
    for(i = 0; i < oheight; i++) {
      for(j = 0; j < owidth; j++) {
        // retrieve position of max
       	long maxi = *(indy_p + i*owidth + j) - 1 + i*dH;
       	long maxj = *(indx_p + i*owidth + j) - 1 + j*dW;

        // update gradient
        *(gradInput_p + maxi*iwidth + maxj) += *(gradOutput_p + i*owidth + j);
      }
    }
  }

  // cleanup
  THTensor_(free)(gradOutput);

  return 1;
}
Esempio n. 8
0
static int nnOmp_(SpatialMaxPooling_updateGradInputOmp)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  setompnthread(L,1,"nThread");

  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));

  THTensor *gradOutputPlane, *gradInputPlane, *unfoldedGradInputPlane, *gradLocalInput;
  int k,i,j;

  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  gradInputPlane = THTensor_(new)();
  gradOutputPlane = THTensor_(new)();
  gradLocalInput = THTensor_(new)();
  unfoldedGradInputPlane = THTensor_(new)();

  for (k = 0; k < input->size[0]; k++)
  {
    /* get input and output plane */
    THTensor_(select)(gradOutputPlane, gradOutput, 0, k);
    THTensor_(select)(gradInputPlane, gradInput, 0, k);

    /* Unfold input to get each local window */
    THTensor_(unfold)(unfoldedGradInputPlane, gradInputPlane, 0, kH, dH);
    THTensor_(unfold)(unfoldedGradInputPlane, NULL,           1, kW, dW);

    /* Calculate max points */
    for(i = 0; i < gradOutputPlane->size[0]; i++) {
      for(j = 0; j < gradOutputPlane->size[1]; j++) {
	THTensor_(select)(gradLocalInput, unfoldedGradInputPlane,0,i);
	THTensor_(select)(gradLocalInput, NULL,                  0,j);
	long maxi = THTensor_(get4d)(indices,0,k,i,j)-1;
	long maxj = THTensor_(get4d)(indices,1,k,i,j)-1;
	double gi = THTensor_(get2d)(gradLocalInput,maxi,maxj)+THTensor_(get2d)(gradOutputPlane,i,j);
	THTensor_(set2d)(gradLocalInput,maxi,maxj,gi);
      }
    }
  }

  /* Cleanup */
  THTensor_(free)(gradInputPlane);
  THTensor_(free)(gradOutputPlane);
  THTensor_(free)(unfoldedGradInputPlane);
  THTensor_(free)(gradLocalInput);

  return 1;
}
Esempio n. 9
0
static int nn_(SparseLinear_updateOutput)(lua_State *L)
{
  long i;
  THTensor * input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
  THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
  THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

  long outDim = weight->size[0];
  long inDim = weight->size[1];

  luaL_argcheck(L, nn_(checkInput)(input), 2, "input size must be nnz x 2");
  luaL_argcheck(L, nn_(checkSize1D)(output, outDim), 1, "output size wrong");
  luaL_argcheck(L, nn_(checkSize1D)(bias, outDim), 1, "bias size wrong");

  lua_getfield(L, 1, "shardBuffer");
  if (!lua_isnil(L, -1)) {
    THTensor *buffer =
      luaT_getfieldcheckudata(L, 1, "shardBuffer", torch_Tensor);
    long num_shards = buffer->size[1];
    luaL_argcheck(L,
                  buffer->nDimension == 2 && buffer->size[0] == outDim &&
                      num_shards > 0,
                  1,
                  "shardBuffer size wrong");

    THTensor_(zero)(buffer);
    #pragma omp parallel for private(i) schedule(static) num_threads(num_shards)
    for (i = 0; i < input->size[0]; i++) {
#ifdef _OPENMP
      int shardId = omp_get_thread_num();
#else
      int shardId = 1;
#endif
      long offset = (long)(THTensor_(get2d)(input, i, 0)) - 1;

      if (offset >= 0 && offset < inDim) {
        THBlas_(axpy)(outDim,
                      THTensor_(get2d)(input, i, 1),
                      THTensor_(data)(weight) + offset * weight->stride[1],
                      weight->stride[0],
                      THTensor_(data)(buffer) + shardId * buffer->stride[1],
                      buffer->stride[0]);
      } else {
        luaL_error(L, "index out of bound. updateOutput: \
%ld not between 1 and %ld", offset + 1, inDim);
      }
    }

    THTensor_(sum)(output, buffer, 1);
    THTensor_(cadd)(output, bias, 1.0, output);

    lua_getfield(L, 1, "output");
    return 1;
  }
static int nn_(SpatialConvolutionMM_updateGradInput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");

  THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
  THTensor *fgradInput = luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor);
  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);

  THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );

  THTensor_(resizeAs)(gradInput, input);
  THTensor_(resizeAs)(fgradInput, finput);
  THTensor_(transpose)(weight, weight, 0, 1);

  if(input->nDimension == 3)
  {   
    nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH);
  }
  else
  {
    long T = input->size[0];
    long t;

    THStorage_(clearFlag)(gradInput->storage, TH_STORAGE_REFCOUNTED);
    THStorage_(clearFlag)(gradOutput->storage, TH_STORAGE_REFCOUNTED);
    THStorage_(clearFlag)(fgradInput->storage, TH_STORAGE_REFCOUNTED);

#pragma omp parallel for private(t)
    for(t = 0; t < T; t++)
    {
      THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
      THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);

      nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH);
      
      THTensor_(free)(gradInput_t);
      THTensor_(free)(gradOutput_t);      
      THTensor_(free)(fgradInput_t);
    }

    THStorage_(setFlag)(gradInput->storage, TH_STORAGE_REFCOUNTED);
    THStorage_(setFlag)(gradOutput->storage, TH_STORAGE_REFCOUNTED);
    THStorage_(setFlag)(fgradInput->storage, TH_STORAGE_REFCOUNTED);    
  }
    
  THTensor_(transpose)(weight, weight, 0, 1);

  return 1;
}
Esempio n. 11
0
static int nn_TemporalSubSampling_forward(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id);  
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int inputFrameSize = luaT_getfieldcheckint(L, 1, "inputFrameSize");

  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id);
  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id);

  THTensor *unfoldedInput, *unfoldedInputFrame, *unfoldedInputFrames;
  THTensor *outputFrame;
  int nInputFrame, nOutputFrame;
  int i, k;
  
  luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected");
  luaL_argcheck(L, input->size[0] == inputFrameSize, 2, "invalid input frame size");
  luaL_argcheck(L, input->size[1] >= kW, 2, "input sequence smaller than kernel size");

  nInputFrame = input->size[1];
  nOutputFrame = (nInputFrame - kW) / dW + 1;

  THTensor_resize2d(output,
                    inputFrameSize, 
                    nOutputFrame);

  outputFrame = THTensor_new();
  unfoldedInput = THTensor_new();
  unfoldedInputFrame = THTensor_new();
  unfoldedInputFrames = THTensor_new();

  THTensor_unfold(unfoldedInput, input, 1, kW, dW);
  for(k = 0; k < nOutputFrame; k++)
  {
    THTensor_select(unfoldedInputFrames, unfoldedInput, 1, k);
    THTensor_select(outputFrame, output, 1, k);
    THTensor_zero(outputFrame);
    for(i = 0; i < kW; i++)
    {
      THTensor_select(unfoldedInputFrame, unfoldedInputFrames, 1, i);
      THTensor_addTensor(outputFrame, 1, unfoldedInputFrame);
    }
    THTensor_cmul(outputFrame, weight);
    THTensor_addTensor(outputFrame, 1, bias);
  }

  THTensor_free(outputFrame);
  THTensor_free(unfoldedInput);
  THTensor_free(unfoldedInputFrame);
  THTensor_free(unfoldedInputFrames);

  return 1;
}
static int gpunn_SpatialConvolutionGPU_accGradParameters(lua_State *L)
{
  THGPUTensor *input = (THGPUTensor *)luaT_checkudata(L, 2, "torch.GPUTensor");
  THGPUTensor *gradOutput = (THGPUTensor *)luaT_checkudata(L, 3, "torch.GPUTensor");
  THGPUTensor *gradWeight = (THGPUTensor *)luaT_getfieldcheckudata(L, 1, "gradWeight", "torch.GPUTensor");
  
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int padding = luaT_getfieldcheckint(L, 1, "padding");
  int partialSum = luaT_getfieldcheckint(L, 1, "partialSum");
  float scale = luaL_optnumber(L, 4, 1);

  long nOutputPlane = gradWeight->size[3];
  long nInputPlane  = gradWeight->size[0];
  long kH           = gradWeight->size[1];
  long kW           = gradWeight->size[2];
  long inputHeight  = input->size[1];
  long inputWidth   = input->size[2];
  long batchSize    = input->size[3];
  long outputHeight = (padding + inputHeight - kH) / dH + 1;
  long outputWidth  = (padding + inputWidth - kW) / dW + 1;

  // asserts
  luaL_argcheck(L, inputWidth == inputHeight, 1, "input must be square");
  luaL_argcheck(L, kH == kW, 1, "kH must be equal to kW");
  luaL_argcheck(L, dH == dW, 1, "dH must be equal to dW");

  if (partialSum)
  {
    // compute partial gradients for outputHeight*outputWidth/partialSum groups of filters separately
    gradWeight = (THGPUTensor *)luaT_getfieldcheckudata(L, 1, "gradWeightPartial", "torch.GPUTensor");
    THGPUTensor_resize4d(gradWeight, outputHeight * outputWidth / partialSum, nInputPlane, kH * kW,
                         nOutputPlane);
    // numModuleY*numModulesX/partialSum, numFilterColors, filterPixels, numFilters
  }

  // all the data must be contiguous: 
  luaL_argcheck(L, THGPUTensor_isContiguous(input), 2, "input must be contiguous");
  luaL_argcheck(L, THGPUTensor_isContiguous(gradWeight), 1, "weight must be contiguous");
  luaL_argcheck(L, THGPUTensor_isContiguous(gradOutput), 1, "output must be contiguous");

  auto avInput = input->get_array_view();
  auto avGradOutput = gradOutput->get_array_view();
  auto avGradWeight = gradWeight->get_array_view();
  // convolutions
  spatialConv_accGradParameters(avInput, avGradOutput, avGradWeight, nInputPlane, inputHeight,
                               inputWidth, batchSize, nOutputPlane, outputHeight, outputWidth, kH, kW,
                               -floor((double)padding/2), dW, 0, scale, partialSum);

  return 0;
}
Esempio n. 13
0
static int nn_(SpatialConvolutionLocal_accGradParameters)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  real scale = luaL_optnumber(L, 4, 1);
  long inputWidth = luaT_getfieldcheckint(L, 1, "iW");
  long inputHeight = luaT_getfieldcheckint(L, 1, "iH");
  long outputWidth = luaT_getfieldcheckint(L, 1, "oW");
  long outputHeight = luaT_getfieldcheckint(L, 1, "oH");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int padW = luaT_getfieldcheckint(L, 1, "padW");
  int padH = luaT_getfieldcheckint(L, 1, "padH");
  long nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
  long nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");

  THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);

  if(input->nDimension == 3)
  {
    nn_(SpatialConvolutionLocal_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale, kW, kH, dW, dH, padW, padH,
                                                         nInputPlane, inputWidth, inputHeight,
                                                         nOutputPlane, outputWidth, outputHeight);
  }
  else
  {
    long T = input->size[0];
    long t;

    for(t = 0; t < T; t++)
    {
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
      THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);

      nn_(SpatialConvolutionLocal_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale, kW, kH, dW, dH, padW, padH,
                                                           nInputPlane, inputWidth, inputHeight,
                                                           nOutputPlane, outputWidth, outputHeight);

      THTensor_(free)(gradOutput_t);
      THTensor_(free)(finput_t);
    }
  }

  return 0;
}
Esempio n. 14
0
static int nn_(LogSigmoid_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *buffer = luaT_getfieldcheckudata(L, 1, "buffer", torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

  THTensor_(resizeAs)(output, input);
  THTensor_(resizeAs)(buffer, input);

  TH_TENSOR_APPLY3(real, output, real, input, real, buffer,    \
                   real z = exp(-*input_data);                 \
                   *buffer_data = z;                           \
                   *output_data = -log(1. + z);)

  return 1;
Esempio n. 15
0
static int gpunn_Abs_updateGradInput(lua_State *L)
{
  THGPUTensor *input = (THGPUTensor*)luaT_checkudata(L, 2, "torch.GPUTensor");
  THGPUTensor *gradOutput = (THGPUTensor*)luaT_checkudata(L, 3, "torch.GPUTensor");
  THGPUTensor *gradInput = (THGPUTensor*)luaT_getfieldcheckudata(L, 1, "gradInput", "torch.GPUTensor");
  long size = THGPUTensor_nElement(input);

  input = THGPUTensor_newContiguous(input);
  gradOutput = THGPUTensor_newContiguous(gradOutput);
  THGPUTensor_resizeAs(gradInput, input);

  auto dv_input_data = input->get_bolt_dev_vec();
  auto dv_gradOutput_data = gradOutput->get_bolt_dev_vec();
  auto dv_gradInput_data = gradInput->get_bolt_dev_vec();

  bolt::amp::transform(dv_input_data.begin() + input->storageOffset,
                       dv_input_data.begin() + input->storageOffset + size,
                       dv_gradOutput_data.begin() + gradOutput->storageOffset,
                       dv_gradInput_data.begin() + gradInput->storageOffset,
                       absupdateGradInput_functor());

  THGPUTensor_free(gradOutput);
  THGPUTensor_free(input);
  return 1;
}
Esempio n. 16
0
static int nn_(SparseLinear_accGradParameters)(lua_State *L)
{
  long i;
  THTensor * input = luaT_checkudata(L, 2, torch_(Tensor_id));
  THTensor * gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
  real scale = luaL_optnumber(L, 4, 1);
  THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
  THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
  THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
  THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
  THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_(Tensor_id));
  real weightDecay = luaT_getfieldchecknumber(L, 1, "weightDecay");
  long dim = gradWeight->size[0]; /* number of weights.. */

  for(i = 0; i < input->size[1]; i++)
  {
    long offset = (long)(THTensor_(get2d)(input, 0, i))-1;

    if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */
    {
      real val = scale*THTensor_(get2d)(input, 1, i);
      THBlas_(scal)(gradOutput->size[0],
                    0, 
                    THTensor_(data)(gradWeight)+offset*gradWeight->stride[0],
                    gradWeight->stride[1]); /* zero */

      THBlas_(axpy)(gradOutput->size[0], 
                    val, 
                    THTensor_(data)(gradOutput), 
                    gradOutput->stride[0], 
                    THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], 
                    gradWeight->stride[1]);
    }
    else
      luaL_error(L, "index out of bound");
  }
  
  THTensor_(cadd)(gradBias, gradBias, 1, gradOutput); 
  
  if(weightDecay != 0)
    THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight);
  
  THTensor_(resizeAs)(lastInput, input);
  THTensor_(copy)(lastInput, input);
  
  return 0;
}
Esempio n. 17
0
static int nn_(Sqrt_updateGradInput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);

  THTensor_(resizeAs)(gradInput, input);

  if (output->nDimension == 1 || 
      !THTensor_(isContiguous)(output) || 
      !THTensor_(isContiguous)(gradOutput) ||
      !THTensor_(isContiguous)(gradInput))
  {
    TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \
                     *gradInput_data = ((*output_data == 0.0) ? 0.0 : \
                                        (0.5 * (*gradOutput_data / *output_data))););
static int gpunn_SpatialConvolutionGPU_updateOutput(lua_State *L)
{
  THGPUTensor *input = (THGPUTensor*)luaT_checkudata(L, 2, "torch.GPUTensor");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int padding = luaT_getfieldcheckint(L, 1, "padding");

  THGPUTensor *weight = (THGPUTensor*)luaT_getfieldcheckudata(L, 1, "weight", "torch.GPUTensor");
  THGPUTensor *output = (THGPUTensor*)luaT_getfieldcheckudata(L, 1, "output", "torch.GPUTensor");

  luaL_argcheck(L, input->nDimension == 4, 2, "4D (batch mode) tensor is expected");

  long nOutputPlane = weight->size[3];
  long nInputPlane  = weight->size[0];
  long kH           = weight->size[1];
  long kW           = weight->size[2];
  long inputHeight  = input->size[1];
  long inputWidth   = input->size[2];
  long batchSize    = input->size[3];
  long outputHeight = (padding + inputHeight - kH) / dH + 1;
  long outputWidth  = (padding + inputWidth - kW) / dW + 1;

  // resize output
  THGPUTensor_resize4d(output, nOutputPlane, outputHeight, outputWidth, batchSize);

  // asserts
  luaL_argcheck(L, inputWidth == inputHeight, 1, "input must be square");
  luaL_argcheck(L, kH == kW, 1, "kH must be equal to kW");
  luaL_argcheck(L, dH == dW, 1, "dH must be equal to dW");

  // all the data must be contiguous: 
  luaL_argcheck(L, THGPUTensor_isContiguous(input), 2, "input must be contiguous");
  luaL_argcheck(L, THGPUTensor_isContiguous(weight), 1, "weight must be contiguous");
  luaL_argcheck(L, THGPUTensor_isContiguous(output), 1, "output must be contiguous");

  auto avInput = input->get_array_view();
  auto avOutput = output->get_array_view();
  auto avWeight = weight->get_array_view();
  // convolutions
  spatialConv_updateOutput(avInput, avWeight, avOutput, nInputPlane, inputHeight, inputWidth,
                          batchSize, nOutputPlane, outputHeight, outputWidth, kH, kW,
                          -floor((double)padding/2), dW, 0, 1, true);

  return 1;
}
Esempio n. 19
0
static int nn_(Square_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
  
  THTensor_(resizeAs)(output, input);

  TH_TENSOR_APPLY2(real, output, real, input,	\
		   *output_data = *input_data * *input_data;);
Esempio n. 20
0
static int nn_(TemporalMaxPooling_updateGradInput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));

  // get contiguous gradOutput
  gradOutput = THTensor_(newContiguous)(gradOutput);

  // resize and zero
  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  // sizes
  int noframe = gradOutput->size[0];
  long framesize = gradOutput->size[1];

  // get raw pointers
  real *gradInput_data = THTensor_(data)(gradInput);
  real *gradOutput_data = THTensor_(data)(gradOutput);
  real *indices_data = THTensor_(data)(indices);

  long t, y;
  for(t = 0; t < noframe; t++)
  {
    real *gip = gradInput_data + t*framesize*dW;
    real *gop = gradOutput_data + t*framesize;
    real *xp = indices_data + t*framesize;
#pragma omp parallel for private(y)
    for(y = 0; y < framesize; y++)
    {
      // compute local max:
      long maxindex = (long)xp[y];
      gip[maxindex*framesize+y] += gop[y];
    }
  }

  // cleanup
  THTensor_(free)(gradOutput);

  return 1;
}
Esempio n. 21
0
static int nn_(Sqrt_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  real bias = luaT_getfieldchecknumber(L,1,"eps");
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

  THTensor_(resizeAs)(output, input);
  THTensor_(sqrt)(output, input);
  return 1;
}
Esempio n. 22
0
static int nn_(ExampleModule_updateOutput)(lua_State *L)
{
    THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
    THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

    THTensor_(resizeAs)(output, input);

    TH_TENSOR_APPLY2(real, output, real, input, \
                     *output_data = fabs(*input_data);)
    return 1;
Esempio n. 23
0
static int nn_(Sqrt_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
  real bias = luaT_getfieldchecknumber(L,1,"eps");
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));

  THTensor_(resizeAs)(output, input);

  TH_TENSOR_APPLY2(real, output, real, input,		\
		   *output_data = sqrt(*input_data + bias););
Esempio n. 24
0
static int nn_(LogSoftMax_updateGradInput)(lua_State *L)
{
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
  real *gradInput_data, *gradOutput_data, *output_data;
  long nframe = 0, dim = 0;
  long t, d;

  if(output->nDimension == 1)
  {
    nframe = 1;
    dim = output->size[0];
  }
  else if(output->nDimension == 2)
  {
    nframe = output->size[0];
    dim = output->size[1];
  }
  else
    THError("vector or matrix expected");

  THTensor_(resizeAs)(gradInput, output);
  gradInput_data = THTensor_(data)(gradInput);
  output_data = THTensor_(data)(output);
  gradOutput_data = THTensor_(data)(gradOutput);
  for(t = 0; t < nframe; t++)
  {
    accreal sum = 0;
    for(d = 0; d < dim; d++)
      sum += gradOutput_data[d];

    for(d = 0; d < dim; d++)
      gradInput_data[d] = gradOutput_data[d] - exp(output_data[d])*sum;

    gradInput_data += dim;
    output_data += dim;
    gradOutput_data += dim;
  }

  return 1;
}
Esempio n. 25
0
static int nn_(ReLU_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
  
  THTensor_(resizeAs)(output, input);

  TH_TENSOR_APPLY2(real, output, real, input,         \
                   *output_data = *input_data > 0 ? *input_data : 0;)
    
  return 1;
Esempio n. 26
0
static int nn_Tanh_forward(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id);

  THTensor_resizeAs(output, input);

  TH_TENSOR_APPLY2(double, output, double, input, \
                   *output_p = tanh(*input_p);)

  return 1;
Esempio n. 27
0
static int nn_(HardShrink_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));

  THTensor_(resizeAs)(output, input);
  
  TH_TENSOR_APPLY2(real, output, real, input,                       \
                   if ((*input_data) > 0.5) *output_data = *input_data - 0.5;    \
                   else if ((*input_data) < 0.5) *output_data = *input_data + 0.5; \
                   else *output_data = 0;);
Esempio n. 28
0
static int nn_(Tanh_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

  THTensor_(resizeAs)(output, input);

  if (input->nDimension == 1 || !THTensor_(isContiguous)(input) || !THTensor_(isContiguous)(output))
  {
    TH_TENSOR_APPLY2(real, output, real, input,   \
         *output_data = tanh(*input_data););
Esempio n. 29
0
static int nn_AbsModuleHessian_backwardHessian(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id);
  THTensor *hessianOutput = luaT_checkudata(L, 3, torch_Tensor_id);
  THTensor *hessianInput = luaT_getfieldcheckudata(L, 1, "hessianInput", torch_Tensor_id);

  THTensor_resizeAs(hessianInput, input);
  TH_TENSOR_APPLY3(double, hessianInput, double, hessianOutput, double, input, \
                   double z = *input_p; \
                   double squaredDerivate = (z >= 0 ? 1 : -1)*(z >= 0 ? 1 : -1);
                   *hessianInput_p = *hessianOutput_p * squaredDerivate;)
  return 1;
Esempio n. 30
0
static int nn_(LogSoftMax_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
  real *input_data, *output_data;
  long nframe = 0, dim = 0;
  long t, d;

  if(input->nDimension == 1)
  {
    nframe = 1;
    dim = input->size[0];
  }
  else if(input->nDimension == 2)
  {
    nframe = input->size[0];
    dim = input->size[1];
  }
  else
    THArgCheck(0, 2, "vector or matrix expected");

  input = THTensor_(newContiguous)(input);
  THTensor_(resizeAs)(output, input);

  input_data = THTensor_(data)(input);
  output_data = THTensor_(data)(output);
  for(t = 0; t < nframe; t++)
  {
    accreal logsum = 0;
    real maxInput = -THInf;

    for(d = 0; d < dim; d++)
      maxInput = THMax(maxInput, input_data[d]);

    for(d = 0; d < dim; d++)
      logsum += THExpMinusApprox(maxInput-input_data[d]);
    logsum = maxInput + log(logsum);

    for(d = 0; d < dim; d++)
      output_data[d] = input_data[d] - logsum;

    input_data += dim;
    output_data += dim;
  }

  THTensor_(free)(input);

  return 1;
}