示例#1
0
static int nn_(SpatialConvolution_updateGradInput)(lua_State *L)
{
    THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
    THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
    int dW = luaT_getfieldcheckint(L, 1, "dW");
    int dH = luaT_getfieldcheckint(L, 1, "dH");
    int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");

    THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
    THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);

    THTensor *tweight;

    THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );

    /* gradient to input */
    tweight = THTensor_(newTranspose)(weight,0,1);

    if (input->nDimension == 3)
    {
        THTensor_(conv2Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dH, dW, "F","C");
    }
    else
    {
        THTensor_(conv2Dmm)(gradInput, 0.0, 1.0, gradOutput, tweight, dH, dW, "F","C");
    }
    THTensor_(free)(tweight);
    return 1;
}
示例#2
0
static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);

  // get contiguous gradOutput
  gradOutput = THTensor_(newContiguous)(gradOutput);

  // resize
  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  // sizes
  int ichannels = input->size[0];
  int iheight = input->size[1];
  int iwidth = input->size[2];
  int ochannels = ichannels;
  int oheight = gradOutput->size[1];
  int owidth = gradOutput->size[2];

  // get raw pointers
  real *gradInput_data = THTensor_(data)(gradInput);
  real *gradOutput_data = THTensor_(data)(gradOutput);
  real *indices_data = THTensor_(data)(indices);

  // backprop
  long k;
  for (k = 0; k < input->size[0]; k++) {
    // pointers to slices
    real *gradOutput_p = gradOutput_data + k*owidth*oheight;
    real *gradInput_p = gradInput_data + k*iwidth*iheight;
    real *indy_p = indices_data + k*owidth*oheight;
    real *indx_p = indices_data + (k+ochannels)*owidth*oheight;

    // calculate max points
    int i,j;
    for(i = 0; i < oheight; i++) {
      for(j = 0; j < owidth; j++) {
        // retrieve position of max
       	long maxi = *(indy_p + i*owidth + j) - 1 + i*dH;
       	long maxj = *(indx_p + i*owidth + j) - 1 + j*dW;

        // update gradient
        *(gradInput_p + maxi*iwidth + maxj) += *(gradOutput_p + i*owidth + j);
      }
    }
  }

  // cleanup
  THTensor_(free)(gradOutput);

  return 1;
}
示例#3
0
static int nnOmp_(SpatialMaxPooling_updateGradInputOmp)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  setompnthread(L,1,"nThread");

  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));

  THTensor *gradOutputPlane, *gradInputPlane, *unfoldedGradInputPlane, *gradLocalInput;
  int k,i,j;

  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  gradInputPlane = THTensor_(new)();
  gradOutputPlane = THTensor_(new)();
  gradLocalInput = THTensor_(new)();
  unfoldedGradInputPlane = THTensor_(new)();

  for (k = 0; k < input->size[0]; k++)
  {
    /* get input and output plane */
    THTensor_(select)(gradOutputPlane, gradOutput, 0, k);
    THTensor_(select)(gradInputPlane, gradInput, 0, k);

    /* Unfold input to get each local window */
    THTensor_(unfold)(unfoldedGradInputPlane, gradInputPlane, 0, kH, dH);
    THTensor_(unfold)(unfoldedGradInputPlane, NULL,           1, kW, dW);

    /* Calculate max points */
    for(i = 0; i < gradOutputPlane->size[0]; i++) {
      for(j = 0; j < gradOutputPlane->size[1]; j++) {
	THTensor_(select)(gradLocalInput, unfoldedGradInputPlane,0,i);
	THTensor_(select)(gradLocalInput, NULL,                  0,j);
	long maxi = THTensor_(get4d)(indices,0,k,i,j)-1;
	long maxj = THTensor_(get4d)(indices,1,k,i,j)-1;
	double gi = THTensor_(get2d)(gradLocalInput,maxi,maxj)+THTensor_(get2d)(gradOutputPlane,i,j);
	THTensor_(set2d)(gradLocalInput,maxi,maxj,gi);
      }
    }
  }

  /* Cleanup */
  THTensor_(free)(gradInputPlane);
  THTensor_(free)(gradOutputPlane);
  THTensor_(free)(unfoldedGradInputPlane);
  THTensor_(free)(gradLocalInput);

  return 1;
}
static int nn_(SpatialConvolutionMM_updateGradInput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");

  THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
  THTensor *fgradInput = luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor);
  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);

  THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );

  THTensor_(resizeAs)(gradInput, input);
  THTensor_(resizeAs)(fgradInput, finput);
  THTensor_(transpose)(weight, weight, 0, 1);

  if(input->nDimension == 3)
  {   
    nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput, gradOutput, weight, fgradInput, kW, kH);
  }
  else
  {
    long T = input->size[0];
    long t;

    THStorage_(clearFlag)(gradInput->storage, TH_STORAGE_REFCOUNTED);
    THStorage_(clearFlag)(gradOutput->storage, TH_STORAGE_REFCOUNTED);
    THStorage_(clearFlag)(fgradInput->storage, TH_STORAGE_REFCOUNTED);

#pragma omp parallel for private(t)
    for(t = 0; t < T; t++)
    {
      THTensor *gradInput_t = THTensor_(newSelect)(gradInput, 0, t);
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
      THTensor *fgradInput_t = THTensor_(newSelect)(fgradInput, 0, t);

      nn_(SpatialConvolutionMM_updateGradInput_frame)(gradInput_t, gradOutput_t, weight, fgradInput_t, kW, kH);
      
      THTensor_(free)(gradInput_t);
      THTensor_(free)(gradOutput_t);      
      THTensor_(free)(fgradInput_t);
    }

    THStorage_(setFlag)(gradInput->storage, TH_STORAGE_REFCOUNTED);
    THStorage_(setFlag)(gradOutput->storage, TH_STORAGE_REFCOUNTED);
    THStorage_(setFlag)(fgradInput->storage, TH_STORAGE_REFCOUNTED);    
  }
    
  THTensor_(transpose)(weight, weight, 0, 1);

  return 1;
}
示例#5
0
static int nn_TemporalSubSampling_forward(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id);  
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int inputFrameSize = luaT_getfieldcheckint(L, 1, "inputFrameSize");

  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id);
  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id);

  THTensor *unfoldedInput, *unfoldedInputFrame, *unfoldedInputFrames;
  THTensor *outputFrame;
  int nInputFrame, nOutputFrame;
  int i, k;
  
  luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected");
  luaL_argcheck(L, input->size[0] == inputFrameSize, 2, "invalid input frame size");
  luaL_argcheck(L, input->size[1] >= kW, 2, "input sequence smaller than kernel size");

  nInputFrame = input->size[1];
  nOutputFrame = (nInputFrame - kW) / dW + 1;

  THTensor_resize2d(output,
                    inputFrameSize, 
                    nOutputFrame);

  outputFrame = THTensor_new();
  unfoldedInput = THTensor_new();
  unfoldedInputFrame = THTensor_new();
  unfoldedInputFrames = THTensor_new();

  THTensor_unfold(unfoldedInput, input, 1, kW, dW);
  for(k = 0; k < nOutputFrame; k++)
  {
    THTensor_select(unfoldedInputFrames, unfoldedInput, 1, k);
    THTensor_select(outputFrame, output, 1, k);
    THTensor_zero(outputFrame);
    for(i = 0; i < kW; i++)
    {
      THTensor_select(unfoldedInputFrame, unfoldedInputFrames, 1, i);
      THTensor_addTensor(outputFrame, 1, unfoldedInputFrame);
    }
    THTensor_cmul(outputFrame, weight);
    THTensor_addTensor(outputFrame, 1, bias);
  }

  THTensor_free(outputFrame);
  THTensor_free(unfoldedInput);
  THTensor_free(unfoldedInputFrame);
  THTensor_free(unfoldedInputFrames);

  return 1;
}
static int gpunn_SpatialConvolutionGPU_accGradParameters(lua_State *L)
{
  THGPUTensor *input = (THGPUTensor *)luaT_checkudata(L, 2, "torch.GPUTensor");
  THGPUTensor *gradOutput = (THGPUTensor *)luaT_checkudata(L, 3, "torch.GPUTensor");
  THGPUTensor *gradWeight = (THGPUTensor *)luaT_getfieldcheckudata(L, 1, "gradWeight", "torch.GPUTensor");
  
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int padding = luaT_getfieldcheckint(L, 1, "padding");
  int partialSum = luaT_getfieldcheckint(L, 1, "partialSum");
  float scale = luaL_optnumber(L, 4, 1);

  long nOutputPlane = gradWeight->size[3];
  long nInputPlane  = gradWeight->size[0];
  long kH           = gradWeight->size[1];
  long kW           = gradWeight->size[2];
  long inputHeight  = input->size[1];
  long inputWidth   = input->size[2];
  long batchSize    = input->size[3];
  long outputHeight = (padding + inputHeight - kH) / dH + 1;
  long outputWidth  = (padding + inputWidth - kW) / dW + 1;

  // asserts
  luaL_argcheck(L, inputWidth == inputHeight, 1, "input must be square");
  luaL_argcheck(L, kH == kW, 1, "kH must be equal to kW");
  luaL_argcheck(L, dH == dW, 1, "dH must be equal to dW");

  if (partialSum)
  {
    // compute partial gradients for outputHeight*outputWidth/partialSum groups of filters separately
    gradWeight = (THGPUTensor *)luaT_getfieldcheckudata(L, 1, "gradWeightPartial", "torch.GPUTensor");
    THGPUTensor_resize4d(gradWeight, outputHeight * outputWidth / partialSum, nInputPlane, kH * kW,
                         nOutputPlane);
    // numModuleY*numModulesX/partialSum, numFilterColors, filterPixels, numFilters
  }

  // all the data must be contiguous: 
  luaL_argcheck(L, THGPUTensor_isContiguous(input), 2, "input must be contiguous");
  luaL_argcheck(L, THGPUTensor_isContiguous(gradWeight), 1, "weight must be contiguous");
  luaL_argcheck(L, THGPUTensor_isContiguous(gradOutput), 1, "output must be contiguous");

  auto avInput = input->get_array_view();
  auto avGradOutput = gradOutput->get_array_view();
  auto avGradWeight = gradWeight->get_array_view();
  // convolutions
  spatialConv_accGradParameters(avInput, avGradOutput, avGradWeight, nInputPlane, inputHeight,
                               inputWidth, batchSize, nOutputPlane, outputHeight, outputWidth, kH, kW,
                               -floor((double)padding/2), dW, 0, scale, partialSum);

  return 0;
}
示例#7
0
文件: Min.c 项目: CantabResearch/nn
static int nn_(Min_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1;
  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

  THLongStorage *dim;
  long i;

  luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range");

  dim = THLongStorage_newWithSize(input->nDimension);
  for(i = 0; i < input->nDimension; i++)
    dim->data[i] = input->size[i];
  dim->data[dimension] = 1;
  THTensor_(resize)(output, dim, NULL);
  THTensor_(resize)(indices, dim, NULL);
  THLongStorage_free(dim);

  TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension,
                       long theIndex = 0;
                       real theMin = input_data[0];
                       for(i = 1; i < input_size; i++)
                       {
                         if(input_data[i*input_stride] < theMin)
                         {
                           theIndex = i;
                           theMin = input_data[i*input_stride];
                         }
                       }
                       *indices_data = theIndex+1;
                       *output_data = theMin;)

  if(output->nDimension > 1)
示例#8
0
static int nn_(SpatialUpSampling_backward)(lua_State *L)
{
  // get all params
  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");

  // dims
  int iwidth = input->size[2];
  int iheight = input->size[1];
  int ichannels = input->size[0];
  int owidth = gradOutput->size[2];
  int oheight = gradOutput->size[1];
  int ochannels = gradOutput->size[0];

  // resize gradInput
  THTensor_(zero)(gradInput);

  // get raw pointers
  real *gradInput_data = THTensor_(data)(gradInput);
  real *gradOutput_data = THTensor_(data)(gradOutput);

  // compute gradients for each plane
  int k;
  for (k=0; k<ochannels; k++) {
    // get planes
    real *gradInput_p = gradInput_data + k*iwidth*iheight;
    real *gradOutput_p = gradOutput_data + k*owidth*oheight;

    // for each plane, resample
    int x,y;
    for (y=0; y<oheight; y++) {
      for (x=0; x<owidth; x++) {
        // input positions (floored)
        int ix = x/dW;
        int iy = y/dH;

        // accumulate gradient
        gradInput_p[iy*iwidth + ix] += gradOutput_p[y*owidth + x];
      }
    }
  }
  return 1;
}
static int gpunn_SpatialConvolutionGPU_updateOutput(lua_State *L)
{
  THGPUTensor *input = (THGPUTensor*)luaT_checkudata(L, 2, "torch.GPUTensor");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int padding = luaT_getfieldcheckint(L, 1, "padding");

  THGPUTensor *weight = (THGPUTensor*)luaT_getfieldcheckudata(L, 1, "weight", "torch.GPUTensor");
  THGPUTensor *output = (THGPUTensor*)luaT_getfieldcheckudata(L, 1, "output", "torch.GPUTensor");

  luaL_argcheck(L, input->nDimension == 4, 2, "4D (batch mode) tensor is expected");

  long nOutputPlane = weight->size[3];
  long nInputPlane  = weight->size[0];
  long kH           = weight->size[1];
  long kW           = weight->size[2];
  long inputHeight  = input->size[1];
  long inputWidth   = input->size[2];
  long batchSize    = input->size[3];
  long outputHeight = (padding + inputHeight - kH) / dH + 1;
  long outputWidth  = (padding + inputWidth - kW) / dW + 1;

  // resize output
  THGPUTensor_resize4d(output, nOutputPlane, outputHeight, outputWidth, batchSize);

  // asserts
  luaL_argcheck(L, inputWidth == inputHeight, 1, "input must be square");
  luaL_argcheck(L, kH == kW, 1, "kH must be equal to kW");
  luaL_argcheck(L, dH == dW, 1, "dH must be equal to dW");

  // all the data must be contiguous: 
  luaL_argcheck(L, THGPUTensor_isContiguous(input), 2, "input must be contiguous");
  luaL_argcheck(L, THGPUTensor_isContiguous(weight), 1, "weight must be contiguous");
  luaL_argcheck(L, THGPUTensor_isContiguous(output), 1, "output must be contiguous");

  auto avInput = input->get_array_view();
  auto avOutput = output->get_array_view();
  auto avWeight = weight->get_array_view();
  // convolutions
  spatialConv_updateOutput(avInput, avWeight, avOutput, nInputPlane, inputHeight, inputWidth,
                          batchSize, nOutputPlane, outputHeight, outputWidth, kH, kW,
                          -floor((double)padding/2), dW, 0, 1, true);

  return 1;
}
示例#10
0
static int nn_(SpatialUpSampling_forward)(lua_State *L)
{
  // get all params
  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));

  // dims
  int iwidth = input->size[2];
  int iheight = input->size[1];
  int ochannels = input->size[0];
  int owidth = iwidth * dW;
  int oheight = iheight * dH;

  // get raw pointers
  real *input_data = THTensor_(data)(input);
  real *output_data = THTensor_(data)(output);

  // resample each plane
  int k;
  for (k=0; k<ochannels; k++) {
    // get planes
    real *input_p = input_data + k*iwidth*iheight;
    real *output_p = output_data + k*owidth*oheight;

    // for each plane, resample
    int x,y;
    for (y=0; y<oheight; y++) {
      for (x=0; x<owidth; x++) {
        // input positions (floored)
        int ix = x/dW;
        int iy = y/dH;

        // set output
        output_p[y*owidth + x] = input_p[iy*iwidth + ix];
      }
    }
  }
  return 1;
}
示例#11
0
static int nn_(TemporalMaxPooling_updateGradInput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));

  // get contiguous gradOutput
  gradOutput = THTensor_(newContiguous)(gradOutput);

  // resize and zero
  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  // sizes
  int noframe = gradOutput->size[0];
  long framesize = gradOutput->size[1];

  // get raw pointers
  real *gradInput_data = THTensor_(data)(gradInput);
  real *gradOutput_data = THTensor_(data)(gradOutput);
  real *indices_data = THTensor_(data)(indices);

  long t, y;
  for(t = 0; t < noframe; t++)
  {
    real *gip = gradInput_data + t*framesize*dW;
    real *gop = gradOutput_data + t*framesize;
    real *xp = indices_data + t*framesize;
#pragma omp parallel for private(y)
    for(y = 0; y < framesize; y++)
    {
      // compute local max:
      long maxindex = (long)xp[y];
      gip[maxindex*framesize+y] += gop[y];
    }
  }

  // cleanup
  THTensor_(free)(gradOutput);

  return 1;
}
示例#12
0
static int nn_(SpatialConvolutionMM_accGradParameters)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  real scale = luaL_optnumber(L, 4, 1);
  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");

  THTensor *finput = luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);

  THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );

  if(input->nDimension == 3)
  {
    nn_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput, gradWeight, gradBias, finput, scale);
  }
  else
  {
    long T = input->size[0];
    long t;

    for(t = 0; t < T; t++)
    {
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, t);
      THTensor *finput_t = THTensor_(newSelect)(finput, 0, t);

      nn_(SpatialConvolutionMM_accGradParameters_frame)(gradOutput_t, gradWeight, gradBias, finput_t, scale);

      THTensor_(free)(gradOutput_t);
      THTensor_(free)(finput_t);
    }
  }

  return 0;
}
示例#13
0
static int nn_(SpatialGraph_updateOutput)(lua_State *L)
{
  // get all params
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  int connex = luaT_getfieldcheckint(L, 1, "connex");
  int dist = luaT_getfieldcheckint(L, 1, "dist");
  int norm = luaT_getfieldcheckint(L, 1, "normalize");
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

  // dims
  int iwidth = input->size[2];
  int iheight = input->size[1];
  int ichannels = input->size[0];
  int owidth = iwidth;
  int oheight = iheight;
  int ochannels = connex / 2;

  // norm ?
  double normer = (norm == 1) ? 1/sqrt(ichannels) : 1;

  // zero output
  THTensor_(zero)(output);

  // Euclidean distance
  if (dist == 0) {
    // Sum[ (Xi - Xi+1)^2 ]
    int x,y,k;
    for (k=0; k<ichannels; k++) {
      for (y=0; y<oheight; y++) {
        for (x=0; x<owidth; x++) {
          if (x < owidth-1) {
            double temp = square(THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y, x+1));
            THTensor_(set3d)(output, 0, y, x, temp + THTensor_(get3d)(output, 0, y, x));
          }
          if (y < oheight-1) {
            double temp = square(THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y+1, x));
            THTensor_(set3d)(output, 1, y, x, temp + THTensor_(get3d)(output, 1, y, x));
          }
        }
      }
    }

    // Sqrt[ Sum[ (Xi - Xi+1)^2 ] ]
    for (k=0; k<ochannels; k++) {
      for (y=0; y<oheight; y++) {
        for (x=0; x<owidth; x++) {
          THTensor_(set3d)(output, k, y, x, sqrt(THTensor_(get3d)(output, k, y, x)) * normer);
        }
      }
    }

    // Cosine dissimilarity
  } else {
    // add epsilon to input (to get rid of 0s)
    THTensor *inputb = THTensor_(newWithSize3d)(input->size[0], input->size[1], input->size[2]);
    THTensor_(copy)(inputb, input);
    THTensor_(add)(inputb, inputb, 1e-12);

    // Sum[ (Xi * Xi+1) ]
    int x,y,k;
    for (y=0; y<oheight; y++) {
      for (x=0; x<owidth; x++) {
        double norm_A = 0;
        double norm_B = 0;
        double norm_C = 0;
        for (k=0; k<ichannels; k++) {
          norm_A += square(THTensor_(get3d)(inputb, k, y, x));
          if (x < owidth-1) {
            double temp = THTensor_(get3d)(inputb, k, y, x) * THTensor_(get3d)(inputb, k, y, x+1);
            THTensor_(set3d)(output, 0, y, x, temp + THTensor_(get3d)(output, 0, y, x));
            norm_B += square(THTensor_(get3d)(inputb, k, y, x+1));
          }
          if (y < oheight-1) {
            double temp = THTensor_(get3d)(inputb, k, y, x) * THTensor_(get3d)(inputb, k, y+1, x);
            THTensor_(set3d)(output, 1, y, x, temp + THTensor_(get3d)(output, 1, y, x));
            norm_C += square(THTensor_(get3d)(inputb, k, y+1, x));
          }
        }
        if (x < owidth-1) {
          if (norm) {
            THTensor_(set3d)(output, 0, y, x, 1 - THTensor_(get3d)(output, 0, y, x) / (sqrt(norm_A) * sqrt(norm_B)));
          } else {
            THTensor_(set3d)(output, 0, y, x, ichannels - THTensor_(get3d)(output, 0, y, x));
          }
        }
        if (y < oheight-1) {
          if (norm) {
            THTensor_(set3d)(output, 1, y, x, 1 - THTensor_(get3d)(output, 1, y, x) / (sqrt(norm_A) * sqrt(norm_C)));
          } else {
            THTensor_(set3d)(output, 1, y, x, ichannels - THTensor_(get3d)(output, 1, y, x));
          }
        }
      }
    }

    // Cleanup
    THTensor_(free)(inputb);
  }

  return 1;
}
示例#14
0
static int nn_(SpatialGraph_updateGradInput)(lua_State *L)
{
  // get all params
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  //int connex = luaT_getfieldcheckint(L, 1, "connex");
  int dist = luaT_getfieldcheckint(L, 1, "dist");
  int norm = luaT_getfieldcheckint(L, 1, "normalize");

  // dims
  //int iwidth = input->size[2];
  //int iheight = input->size[1];
  int ichannels = input->size[0];
  int owidth = gradOutput->size[2];
  int oheight = gradOutput->size[1];
  //int ochannels = gradOutput->size[0];

  // norm ?
  double normer = (norm == 1) ? 1/sqrt(ichannels)/sqrt(ichannels) : 1;

  // resize gradInput
  THTensor_(zero)(gradInput);

  // compute derivatives, and backpropagate output error to input
  if (dist == 0) {
    int x,y,k;
    for (k=0; k<ichannels; k++) {
      for (y=0; y<oheight; y++) {
        for (x=0; x<owidth; x++) {
          if (x < owidth-1) {
            double partial_d = THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y, x+1);
            if (partial_d != 0) partial_d /= THTensor_(get3d)(output, 0, y, x);
            partial_d *= THTensor_(get3d)(gradOutput, 0, y, x) * normer;
            THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x));
            THTensor_(set3d)(gradInput, k, y, x+1, -partial_d + THTensor_(get3d)(gradInput, k, y, x+1));
          }
          if (y < oheight-1) {
            double partial_d = THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y+1, x);
            if (partial_d != 0) partial_d /= THTensor_(get3d)(output, 1, y, x);
            partial_d *= THTensor_(get3d)(gradOutput, 1, y, x) * normer;
            THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x));
            THTensor_(set3d)(gradInput, k, y+1, x, -partial_d + THTensor_(get3d)(gradInput, k, y+1, x));
          }
        }
      }
    }

    // Cosine
  } else {
    int x,y,k;
    for (y=0; y<oheight; y++) {
      for (x=0; x<owidth; x++) {
        double sum_A = 0;
        double sum_B = 0;
        double sum_C = 0;
        double sum_AB = 0;
        double sum_AC = 0;

        if (norm) {
          for (k=0; k<ichannels; k++) {
            sum_A += square(THTensor_(get3d)(input, k, y, x));
            if (x < owidth-1) {
              sum_B += square(THTensor_(get3d)(input, k, y, x+1));
              sum_AB += THTensor_(get3d)(input, k, y, x) * THTensor_(get3d)(input, k, y, x+1);
            }
            if (y < oheight-1) {
              sum_C += square(THTensor_(get3d)(input, k, y+1, x));
              sum_AC += THTensor_(get3d)(input, k, y, x) * THTensor_(get3d)(input, k, y+1, x);
            }
          }
        }

        double term1, term2, term3, partial_d;
        double epsi = 1e-12;
        if (x < owidth-1) {
          if (norm) {
            term1 = 1 / ( pow(sum_A, 1/2) * pow(sum_B, 1/2) + epsi );
            term2 = sum_AB / ( pow(sum_A, 3/2) * pow(sum_B, 1/2) + epsi );
            term3 = sum_AB / ( pow(sum_B, 3/2) * pow(sum_A, 1/2) + epsi );
          }
          for (k=0; k<ichannels; k++) {
            if (norm) {
              partial_d = term2 * THTensor_(get3d)(input, k, y, x)
                - term1 * THTensor_(get3d)(input, k, y, x+1);
            } else {
              partial_d = -THTensor_(get3d)(input, k, y, x+1);
            }
            partial_d *= THTensor_(get3d)(gradOutput, 0, y, x);
            THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x));

            if (norm) {
              partial_d = term3 * THTensor_(get3d)(input, k, y, x+1)
                - term1 * THTensor_(get3d)(input, k, y, x);
            } else {
              partial_d = -THTensor_(get3d)(input, k, y, x);
            }
            partial_d *= THTensor_(get3d)(gradOutput, 0, y, x);
            THTensor_(set3d)(gradInput, k, y, x+1, partial_d + THTensor_(get3d)(gradInput, k, y, x+1));
          }
        }
        if (y < oheight-1) {
          if (norm) {
            term1 = 1 / ( pow(sum_A, 1/2) * pow(sum_C, 1/2) + epsi );
            term2 = sum_AC / ( pow(sum_A, 3/2) * pow(sum_C, 1/2) + epsi );
            term3 = sum_AC / ( pow(sum_C, 3/2) * pow(sum_A, 1/2) + epsi );
          }
          for (k=0; k<ichannels; k++) {
            if (norm) {
              partial_d = term2 * THTensor_(get3d)(input, k, y, x)
                - term1 * THTensor_(get3d)(input, k, y+1, x);
            } else {
              partial_d = -THTensor_(get3d)(input, k, y+1, x);
            }
            partial_d *= THTensor_(get3d)(gradOutput, 1, y, x);
            THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x));

            if (norm) {
              partial_d = term3 * THTensor_(get3d)(input, k, y+1, x)
                - term1 * THTensor_(get3d)(input, k, y, x);
            } else {
              partial_d = -THTensor_(get3d)(input, k, y, x);
            }
            partial_d *= THTensor_(get3d)(gradOutput, 1, y, x);
            THTensor_(set3d)(gradInput, k, y+1, x, partial_d + THTensor_(get3d)(gradInput, k, y+1, x));
          }
        }
      }
    }
  }

  return 1;
}
示例#15
0
static int nn_SpatialConvolution_backward(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id);  
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor_id);  
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");

  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id);
  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor_id);
  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor_id);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor_id);

  THTensor *gradInputPlane, *unfoldedInputPlane, *unfoldedGradInputPlane, *inputPlane;
  THTensor *gradOutputPlane;
  THTensor *weightPlane, *gradWeightPlane;
  int i, k;

  gradInputPlane = THTensor_new();
  unfoldedInputPlane = THTensor_new();
  unfoldedGradInputPlane = THTensor_new();
  inputPlane = THTensor_new();
  gradOutputPlane = THTensor_new();
  weightPlane = THTensor_new();
  gradWeightPlane = THTensor_new();
  
  /* Not necessary with partial backprop: */
  THTensor_resizeAs(gradInput, input);
  THTensor_zero(gradInput);

  for(k = 0; k < nOutputPlane; k++)
  {
    THTensor_select(gradOutputPlane, gradOutput, 2, k);
    THTensor_set1d(gradBias, k, THTensor_get1d(gradBias, k) + THTensor_sum(gradOutputPlane));
      
    for(i = 0; i < nInputPlane; i++)
    {
      /* ------------------------- gradWeight ------------------------------------- */

      /* Get the input image */
      THTensor_select(inputPlane, input, 2, i);
      THTensor_unfold(unfoldedInputPlane, inputPlane, 0, kW, dW);
      THTensor_unfold(unfoldedInputPlane, NULL,       1, kH, dH);
      THTensor_transpose(unfoldedInputPlane,NULL,0,2);
      THTensor_transpose(unfoldedInputPlane,NULL,1,3);

      /* Get the good gradWeight for (k,i) (k out, i in) */
      THTensor_select(gradWeightPlane, gradWeight, 3, k);
      THTensor_select(gradWeightPlane, NULL, 2, i);

      THTensor_addT4dotT2(gradWeightPlane, 1, unfoldedInputPlane, gradOutputPlane);

      /* -------------------------- gradInput ------------------------------------- */

      /* Not necessary with partial backprop: */

      /* Get the gradInput image */
      THTensor_select(gradInputPlane, gradInput, 2, i);
      THTensor_unfold(unfoldedGradInputPlane, gradInputPlane, 0, kW, dW);
      THTensor_unfold(unfoldedGradInputPlane, NULL          , 1, kH, dH);

      /* Get the good weight for (k,i) (k out, i in) */
      THTensor_select(weightPlane, weight, 3, k);
      THTensor_select(weightPlane, NULL, 2, i);

      THTensor_addT2outT2(unfoldedGradInputPlane, 1, gradOutputPlane, weightPlane);
    }
  }

  THTensor_free(gradInputPlane);
  THTensor_free(unfoldedInputPlane);
  THTensor_free(unfoldedGradInputPlane);
  THTensor_free(inputPlane);
  THTensor_free(gradOutputPlane);
  THTensor_free(weightPlane);
  THTensor_free(gradWeightPlane);

  return 1;
}
示例#16
0
static int nn_SpatialConvolution_forward(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id);  
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");

  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id);
  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id);
  
  THTensor *outputPlane, *inputPlane, *weightPlane, *unfoldedInputPlane;
  int i, k;

  luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
  luaL_argcheck(L, input->size[2] == nInputPlane, 2, "invalid number of input planes");
  luaL_argcheck(L, input->size[0] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size");

  THTensor_resize3d(output,
                    (input->size[0] - kW) / dW + 1, 
                    (input->size[1] - kH) / dH + 1,
                    nOutputPlane);

  inputPlane = THTensor_new();
  weightPlane = THTensor_new();
  outputPlane = THTensor_new();
  unfoldedInputPlane = THTensor_new();
  
  for(k = 0; k < nOutputPlane; k++)
  {
    THTensor_select(outputPlane, output, 2, k);
    
    /* Initialize to the bias */
    THTensor_fill(outputPlane, THTensor_get1d(bias, k));

    /* Go! */
    for(i = 0; i < nInputPlane; i++)
    {
      THTensor_select(inputPlane, input, 2, i);

      /* Get the good mask for (k,i) (k out, i in) */
      THTensor_select(weightPlane, weight, 3, k);
      THTensor_select(weightPlane, NULL, 2, i);

      /* Get the input image */
      THTensor_unfold(unfoldedInputPlane, inputPlane,  0, kW, dW);
      THTensor_unfold(unfoldedInputPlane, NULL,        1, kH, dH);

      THTensor_addT4dotT2(outputPlane, 1, unfoldedInputPlane, weightPlane);
    }
  }

  THTensor_free(inputPlane);
  THTensor_free(weightPlane);
  THTensor_free(outputPlane);
  THTensor_free(unfoldedInputPlane);

  return 1;
}
示例#17
0
static int nn_(SpatialFullConvolution_updateOutput)(lua_State *L) {
  // Input
  THTensor *input = (THTensor*)luaT_checkudata(L, 2, torch_Tensor);

  // Params:
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
  int padW = luaT_getfieldcheckint(L, 1, "padW");
  int padH = luaT_getfieldcheckint(L, 1, "padH");
  int adjW = luaT_getfieldcheckint(L, 1, "adjW");
  int adjH = luaT_getfieldcheckint(L, 1, "adjH");

  THTensor *weight  = (THTensor*)luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
  THTensor *bias    = (THTensor*)luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
  THTensor *columns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
  THTensor *ones    = (THTensor*)luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor);
  THTensor *output  = (THTensor*)luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

  luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");

  int batch = 1;
  if (input->nDimension == 3) {
    luaL_argcheck(L, input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
    // Force batch
    batch = 0;
    THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
  } else {
    luaL_argcheck(L, input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
  }

  long inputWidth   = input->size[3];
  long inputHeight  = input->size[2];
  long outputWidth  = (inputWidth - 1) * dW - 2*padW + kW + adjW;
  long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;

  // Batch size + input planes
  long batchSize = input->size[0];

  // Resize output
  THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);

  // Resize temporary columns
  THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth);

  // Define a buffer of ones, for bias accumulation
  // Note: this buffer can be shared with other modules, it only ever gets increased,
  // and always contains ones.
  if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
    // Resize plane and fill with ones...
    THTensor_(resize2d)(ones, outputHeight, outputWidth);
    THTensor_(fill)(ones, 1);
  }

  // Helpers
  THTensor *input_n = THTensor_(new)();
  THTensor *output_n = THTensor_(new)();

  int elt;
  // For each elt in batch, do:
  for (elt = 0; elt < batchSize; elt ++) {
    // Matrix mulitply per output:
    THTensor_(select)(input_n, input, 0, elt);
    THTensor_(select)(output_n, output, 0, elt);

    // M,N,K are dims of matrix A and B
    // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
    long m = weight->size[1] * weight->size[2] * weight->size[3];
    long n = columns->size[1];
    long k = weight->size[0];

    // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
    THBlas_(gemm)(
        'n', 't',
        n, m, k,
        1,
        THTensor_(data)(input_n), n,
        THTensor_(data)(weight), m,
        0,
        THTensor_(data)(columns), n
    );

    // Unpack columns back into input:
    nn_(col2im)(
      THTensor_(data)(columns),
      nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
      THTensor_(data)(output_n)
    );

    // Do Bias after:
    // M,N,K are dims of matrix A and B
    // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
    long m_ = nOutputPlane;
    long n_ = outputHeight * outputWidth;
    long k_ = 1;

    // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
    THBlas_(gemm)(
        't', 'n',
        n_, m_, k_,
        1,
        THTensor_(data)(ones), k_,
        THTensor_(data)(bias), k_,
        1,
        THTensor_(data)(output_n), n_
    );

  }

  // Free
  THTensor_(free)(input_n);
  THTensor_(free)(output_n);

  // Resize output
  if (batch == 0) {
    THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
    THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
  }

  // return output
  return 1;
}
示例#18
0
static int nxn_(Jitter_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
  int xstart = luaT_getfieldcheckint(L, 1, "xstart");
  int ystart = luaT_getfieldcheckint(L, 1, "ystart");
  int xcrop = luaT_getfieldcheckint(L, 1, "xcrop");
  int ycrop = luaT_getfieldcheckint(L, 1, "ycrop");
  int hflip = luaT_getfieldcheckint(L, 1, "randflip");
  
  int bs   = input->size[0];
  int outy = input->size[1] - ycrop;
  int outx = input->size[2] - xcrop;
  int channels = input->size[3];
  
  THTensor_(resize4d)(output, bs, outy, outx, channels);

  real* idata = THTensor_(data)(input);
  real* odata = THTensor_(data)(output);

  int istr0 = input->stride[0];
  int istr1 = input->stride[1];
  int istr2 = input->stride[2];
  int istr3 = input->stride[3];
  
  int ostr0 = output->stride[0];
  int ostr1 = output->stride[1];
  int ostr2 = output->stride[2];
  int ostr3 = output->stride[3];

  /* This is jittering + hflip */
  int batchidx, y, x, ch;
  if(hflip==1)
  {
     #pragma omp parallel for private(batchidx)
     for(batchidx=0; batchidx<bs; batchidx++)
     {
        for (y = 0; y<outy; y++)
        {
            for(x = 0; x<outx; x++)
            {
               for (ch = 0; ch < channels; ch++)
               {
                   odata[batchidx*ostr0 + y*ostr1 + x*ostr2 + ch*ostr3] = idata[batchidx*istr0 + (y+ystart-1)*istr1 + (xstart-1+outx-1-x)*istr2 + ch*istr3];
               }
            }
        }
     }
  }
  else 
  /* This is only jittering */
  {
     #pragma omp parallel for private(batchidx)
     for(batchidx=0; batchidx<bs; batchidx++)
     {
        for (y = 0; y<outy; y++)
        {
            for(x = 0; x<outx; x++)
            {
               for (ch = 0; ch < channels; ch++)
               {
                   odata[batchidx*ostr0 + y*ostr1 + x*ostr2 + ch*ostr3] = idata[batchidx*istr0 + (y+ystart-1)*istr1 + (x+xstart-1)*istr2 + ch*istr3];
               }
            }
        }
     }
  }
  
  
    
  return 1;
}
示例#19
0
static int nn_(SpatialConvolution_updateOutput)(lua_State *L)
{
    THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
    int dW = luaT_getfieldcheckint(L, 1, "dW");
    int dH = luaT_getfieldcheckint(L, 1, "dH");

    THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
    THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
    THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

    int dimw = 2;
    int dimh = 1;

    luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");

    if (input->nDimension == 4) {
        dimw++;
        dimh++;
    }

    {
        long nOutputPlane = weight->size[0];
        long kW           = weight->size[3];
        long kH           = weight->size[2];
        long inputWidth   = input->size[dimw];
        long inputHeight  = input->size[dimh];
        long outputWidth  = (inputWidth - kW) / dW + 1;
        long outputHeight = (inputHeight - kH) / dH + 1;

        if (input->nDimension == 3)
        {
            long i;
            real* bias_data;
            real* output_data;

            THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
            /* add bias */
            bias_data = THTensor_(data)(bias);
            output_data = THTensor_(data)(output);

            #pragma omp parallel for private(i)
            for (i=0; i<bias->size[0]; i++)
            {
                /*THTensor_(select)(outn,output,0,i);*/
                /*TH_TENSOR_APPLY(real,outn, *outn_data = bias_data[i];);*/
                real *ptr_output = output_data + i*outputWidth*outputHeight;
                long j;
                for(j = 0; j < outputWidth*outputHeight; j++)
                    ptr_output[j] = bias_data[i];
            }
            /*THTensor_(free)(outn);*/

            /* do convolutions */
            THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
        }
        else
        {
            real* bias_data;
            real* output_data;
            long p;

            THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth);

            bias_data = THTensor_(data)(bias);
            output_data = THTensor_(data)(output);

            #pragma omp parallel for private(p)
            for (p=0; p<input->size[0]; p++)
            {
                /* BIAS */
                long i;
                for (i=0; i<bias->size[0]; i++)
                {
                    real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight;
                    long j;
                    for(j = 0; j < outputWidth*outputHeight; j++)
                        ptr_output[j] = bias_data[i];
                }
            }

            /* do convolutions */
            THTensor_(conv2Dmm)(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
        }
    }
    return 1;
}
示例#20
0
static int nn_LcEncoder_forward(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id);  
  int winX = luaT_getfieldcheckint(L, 1, "winX");
  int winY = luaT_getfieldcheckint(L, 1, "winY");
  int woutX = luaT_getfieldcheckint(L, 1, "woutX");
  int woutY = luaT_getfieldcheckint(L, 1, "woutY");
  double xStep = luaT_getfieldchecknumber(L, 1, "xStep");
  double yStep = luaT_getfieldchecknumber(L, 1, "yStep");

  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id);
  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id);

  luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
  luaL_argcheck(L, input->size[2] == 1, 2, "invalid input 3rd dim size has to be 1");

  THTensor *inputPlane, *inputNarrowedX, *inputNarrowedYX;
  THTensor *weightSelectedX, *weightSelectedYX;
  
  inputPlane = THTensor_new();
  inputNarrowedX = THTensor_new();
  inputNarrowedYX = THTensor_new();
  weightSelectedX = THTensor_new();
  weightSelectedYX = THTensor_new();

  // get output size from input
  THTensor_resize3d(output,
                    (input->size[0] - winX+1) / xStep, 
                    (input->size[1] - winY+1) / yStep,
                    1);
  
  THTensor_select(inputPlane, input, 2, 0);


  int y,x,iy,ix,wy,wx;
  for (y = 0; y<output->size[1]; y++)
    {
      iy = (int)floor(y*yStep);
      wy = y%woutY;
      for (x = 0; x<output->size[0]; x++)
        {
          ix = (int)floor(x*xStep);
          wx = x%woutX;
          THTensor_narrow(inputNarrowedX, inputPlane, 0, ix, winX);
          THTensor_narrow(inputNarrowedYX, inputNarrowedX, 1, iy, winY);
          THTensor_select(weightSelectedX, weight, 3, wy);
          THTensor_select(weightSelectedYX, weightSelectedX, 2, wx);
          double dot = THTensor_dot(inputNarrowedYX, weightSelectedYX);
          double biasSelect = THTensor_get2d(bias,wx,wy);
          THTensor_set3d(output,x,y,0,dot+biasSelect);
        }
    }

  THTensor_free(inputPlane);
  THTensor_free(inputNarrowedX);
  THTensor_free(inputNarrowedYX);
  THTensor_free(weightSelectedX);
  THTensor_free(weightSelectedYX);
  return 1;
}
示例#21
0
static int nn_LcEncoder_backward(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id);  
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor_id);  
  int winX = luaT_getfieldcheckint(L, 1, "winX");
  int winY = luaT_getfieldcheckint(L, 1, "winY");
  int woutX = luaT_getfieldcheckint(L, 1, "woutX");
  int woutY = luaT_getfieldcheckint(L, 1, "woutY");
  double xStep = luaT_getfieldchecknumber(L, 1, "xStep");
  double yStep = luaT_getfieldchecknumber(L, 1, "yStep");

  luaL_argcheck(L, input->nDimension == 3, 2, "input 3D tensor expected");
  luaL_argcheck(L, input->size[2] == 1, 2, "invalid input 3rd dim size has to be 1");
  luaL_argcheck(L, gradOutput->nDimension == 3, 3, "gradOutput 3D tensor expected");
  luaL_argcheck(L, gradOutput->size[2] == 1, 3, "invalid gradOutput 3rd dim size has to be 1");

  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id);
  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor_id);
  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id);
  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor_id);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor_id);


  /* ----------------------- gradWeight ----------------------- */
  THTensor_fill(gradWeight, 0);

  THTensor *inputPlane, *inputNarrowedX, *inputNarrowedYX;
  inputPlane = THTensor_new();
  inputNarrowedX = THTensor_new();
  inputNarrowedYX = THTensor_new();
  THTensor_select(inputPlane, input, 2, 0);

  THTensor *gradWeightSelectedX, *gradWeightSelectedYX;
  gradWeightSelectedX = THTensor_new();
  gradWeightSelectedYX = THTensor_new();

  /* ----------------------- gradInput ------------------------ */
  THTensor_resizeAs(gradInput, input);
  THTensor_fill(gradInput, 0);

  THTensor *gradInputPlane, *gradInputNarrowedX, *gradInputNarrowedYX;
  gradInputPlane = THTensor_new();
  gradInputNarrowedX = THTensor_new();
  gradInputNarrowedYX = THTensor_new();
  THTensor_select(gradInputPlane, gradInput, 2, 0);

  THTensor *weightSelectedX, *weightSelectedYX;
  weightSelectedX = THTensor_new();
  weightSelectedYX = THTensor_new();


  int y,x,iy,ix,wy,wx;
  for (y = 0; y<gradOutput->size[1]; y++)
    {
      iy = (int)floor(y*yStep);
      wy = y%woutY;
      for (x = 0; x<gradOutput->size[0]; x++)
        {
          ix = (int)floor(x*xStep);
          wx = x%woutX;
          double gradOutVal = THTensor_get3d(gradOutput,x,y,0);

          /* ----------------------- gradWeight ----------------------- */
          THTensor_narrow(inputNarrowedX, inputPlane, 0, ix, winX);
          THTensor_narrow(inputNarrowedYX, inputNarrowedX, 1, iy, winY);
          THTensor_select(gradWeightSelectedX, gradWeight, 3, wy);
          THTensor_select(gradWeightSelectedYX, gradWeightSelectedX, 2, wx);
          THTensor_addTensor(gradWeightSelectedYX, gradOutVal, inputNarrowedYX);
          /* ----------------------- gradBias ----------------------- */
          THTensor_set2d(gradBias,wx,wy, THTensor_get2d(gradBias,wx,wy) + gradOutVal);
          /* ----------------------- gradInput ------------------------ */
          THTensor_narrow(gradInputNarrowedX, gradInputPlane, 0, ix, winX);
          THTensor_narrow(gradInputNarrowedYX, gradInputNarrowedX, 1, iy, winY);
          THTensor_select(weightSelectedX, weight, 3, wy);
          THTensor_select(weightSelectedYX, weightSelectedX, 2, wx);
          THTensor_addTensor(gradInputNarrowedYX, gradOutVal, weightSelectedYX);
        }
    }

  /* free gradWeight  */
  THTensor_free(inputPlane);
  THTensor_free(inputNarrowedX);
  THTensor_free(inputNarrowedYX);
  THTensor_free(gradWeightSelectedX);
  THTensor_free(gradWeightSelectedYX);
  /* free gradInput  */
  THTensor_free(gradInputPlane);
  THTensor_free(gradInputNarrowedX);
  THTensor_free(gradInputNarrowedYX);
  THTensor_free(weightSelectedX);
  THTensor_free(weightSelectedYX);

  return 1;
}
示例#22
0
static int nn_(SpatialFullConvolution_updateGradInput)(lua_State *L) {
  // Inputs
  THTensor *input = (THTensor *)luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = (THTensor *)luaT_checkudata(L, 3, torch_Tensor);

  // Params
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
  int padW = luaT_getfieldcheckint(L, 1, "padW");
  int padH = luaT_getfieldcheckint(L, 1, "padH");
  int adjW = luaT_getfieldcheckint(L, 1, "adjW");
  int adjH = luaT_getfieldcheckint(L, 1, "adjH");

  THTensor *weight = (THTensor *)luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
  THTensor *gradColumns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
  THTensor *gradInput = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);

  luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");

  int batch = 1;
  if (input->nDimension == 3) {
    // Force batch
    batch = 0;
    THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
    THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
  }

  long inputWidth   = input->size[3];
  long inputHeight  = input->size[2];
  long outputWidth  = (inputWidth - 1) * dW - 2*padW + kW + adjW;
  long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;

  // Batch size + input planes
  long batchSize = input->size[0];

  // Resize output
  THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth);

  // Resize temporary columns
  THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH, inputHeight*inputWidth);

  // Helpers
  THTensor *gradInput_n = THTensor_(new)();
  THTensor *gradOutput_n = THTensor_(new)();

  int elt;
  // For each elt in batch, do:
  for (elt = 0; elt < batchSize; elt ++) {
    // Matrix mulitply per sample:
    THTensor_(select)(gradInput_n, gradInput, 0, elt);
    THTensor_(select)(gradOutput_n, gradOutput, 0, elt);

    // Extract columns:
    nn_(im2col)(
      THTensor_(data)(gradOutput_n),
      nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
      THTensor_(data)(gradColumns)
    );


    // M,N,K are dims of matrix A and B
    // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
    long m = weight->size[0];
    long n = gradColumns->size[1];
    long k = weight->size[1] * weight->size[2] * weight->size[3];

    // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
    THBlas_(gemm)(
        'n', 'n',
        n, m, k,
        1,
        THTensor_(data)(gradColumns), n,
        THTensor_(data)(weight), k,
        0,
        THTensor_(data)(gradInput_n), n
    );
  }


  // Free
  THTensor_(free)(gradInput_n);
  THTensor_(free)(gradOutput_n);

  // Resize output
  if (batch == 0) {
    THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
    THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
    THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth);
  }

  // Return gradInput
  return 1;
}
示例#23
0
static int nnconv1d_(HorizontalConvolution_updateOutput)(lua_State *L)
{
   THTensor *input = luaT_checkudata(L, 2, torch_Tensor);

   int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
   int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
   int kL = luaT_getfieldcheckint(L, 1, "kL");

   THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
   THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
   THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

   luaL_argcheck(L, input->nDimension == 3 ||
                    input->nDimension == 4, 2, "3D or 4D (batch mode) tensor expected");

   // change to batch mode
   int batch = 1;
   if (input->nDimension == 3) {
      batch = 0;
      THTensor_(resize4d)(input, 1, nInputPlane, input->size[1], input->size[2]);
   }

   long batchSize    = input->size[0];
   long inputHeight  = input->size[2];
   long inputWidth   = input->size[3];
   long outputHeight = inputHeight;
   long outputWidth  = inputWidth - kL + 1;

   THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);

   int elt;
#pragma omp parallel for private(elt)
   for (elt = 0; elt < batchSize; elt++) {

      // select each batch
      THTensor *input_t  = THTensor_(newSelect)(input, 0, elt);
      THTensor *output_t = THTensor_(newSelect)(output, 0, elt);

      // fill biases
      int i, j, k;
      for (i = 0; i < nOutputPlane; i++) {
         THVector_(fill)(output_t->storage->data+output_t->storageOffset+output_t->stride[0]*i,
                         THTensor_(get1d)(bias, i), outputHeight*outputWidth);
      }

      // convolve horizontally
      for (i = 0; i < nInputPlane; i++) {
         for (j = 0; j < inputHeight; j++) {
            for (k = 0; k < kL; k++) {
               THVector_(add)(output_t->storage->data + output_t->storageOffset +
                              output_t->stride[0]*i + output_t->stride[1]*j,
                              input_t->storage->data + input_t->storageOffset +
                              input_t->stride[0]*i + input_t->stride[1]*j + k,
                              *(THTensor_(data)(weight)+i*kL+k), outputWidth);
            }
         }
      }

      // release temp tensors
      THTensor_(free)(input_t);
      THTensor_(free)(output_t);
   }

   // revert to single batch
   if (batch == 0) {
      THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
      THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
   }

   return 1;
}
示例#24
0
static int nnconv1d_(HorizontalConvolution_accGradParameters)(lua_State *L)
{
   THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
   THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
   real scale = luaL_optnumber(L, 4, 1);
   int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
   int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
   int kL = luaT_getfieldcheckint(L, 1, "kL");

   THTensor *ones = luaT_getfieldcheckudata(L, 1, "ones", torch_Tensor);
   THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
   THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);

   THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1,
              "Number of output features is not equal to nOutputPlane" );

   // change to batch mode
   int batch = 1;
   if (input->nDimension == 3) {
      batch = 0;
      THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
      THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
   }

   long batchSize    = input->size[0];
   long inputHeight  = input->size[2];
   long inputWidth   = input->size[3];
   long outputHeight = inputHeight;
   long outputWidth  = inputWidth - kL + 1;

   if (ones->nDimension != 1 || ones->size[0] < outputHeight*outputWidth) {
      THTensor_(resize1d)(ones, outputHeight*outputWidth);
      THTensor_(fill)(ones, 1);
   }

   int elt;
   for (elt = 0; elt < batchSize; elt++) {

      // select each batch in 2D
      THTensor *input_t      = THTensor_(newSelect)(input, 0, elt);
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, elt);
      THTensor *gradOutput2d = THTensor_(newWithStorage2d)(gradOutput->storage, gradOutput->storageOffset,
                                   nOutputPlane, -1, outputWidth*outputHeight, -1);

      // dot products
      int i, j, k;
      for (i = 0; i < nInputPlane; i++) {
         for (k = 0; k < kL; k++) {
             for (j = 0; j < outputHeight; j++) {
                *(gradWeight->storage->data + gradWeight->storageOffset + i*gradWeight->stride[0] + k) +=
                   scale*THBlas_(dot)
                      (outputWidth,
                       gradOutput_t->storage->data + gradOutput_t->storageOffset +
                       i*gradOutput_t->stride[0] + j*gradOutput_t->stride[1],
                       gradOutput_t->stride[2],
                       input_t->storage->data + input_t->storageOffset +
                       i*input_t->stride[0] + j*input_t->stride[1] + k,
                       input_t->stride[2]);
            }
         }
      }

      // fill biases
      THTensor_(addmv)(gradBias, 1, gradBias, scale, gradOutput2d, ones);

      THTensor_(free)(gradOutput2d);
      THTensor_(free)(input_t);
      THTensor_(free)(gradOutput_t);
   }

   // revert to single batch
   if (batch == 0) {
      THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
      THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
   }

   return 0;
}
示例#25
0
static int nn_(SpatialAdaptiveMaxPooling_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  long oheight = luaT_getfieldcheckint(L, 1, "H");
  long owidth = luaT_getfieldcheckint(L, 1, "W");
  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
  int dimw = 2;
  int dimh = 1;
  long nbatch = 1;
  long nslices;
  long iheight;
  long iwidth;
  
  long istride_d;
  long istride_h;
  long istride_w;
  long istride_b;

  real *input_data;
  real *output_data;
  real *indices_data;


  luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");

  if (input->nDimension == 4) 
  {
    istride_b = input->stride[0];
    nbatch = input->size[0];
    dimw++;
    dimh++;
  }

  /* sizes */
  nslices = input->size[dimh-1];
  iheight = input->size[dimh];
  iwidth = input->size[dimw];
  /* strides */
  istride_d = input->stride[dimh-1];
  istride_h = input->stride[dimh];
  istride_w = input->stride[dimw];

  /* resize output */
  if (input->nDimension == 3)
  {
    THTensor_(resize3d)(output, nslices, oheight, owidth);
    /* indices will contain i,j locations for each output point */
    THTensor_(resize4d)(indices, 2, nslices, oheight, owidth);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THTensor_(data)(indices);

    nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data,
                                                      indices_data+nslices*owidth*oheight, indices_data,
                                                      nslices,
                                                      iwidth, iheight,
                                                      owidth, oheight,
                                                      istride_w,istride_h,
                                                      istride_d);
  }
  else
  {
    long p;

    THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
    /* indices will contain i,j locations for each output point */
    THTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THTensor_(data)(indices);

#pragma omp parallel for private(p)
    for (p = 0; p < nbatch; p++)
    {
      nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+p*istride_b, output_data+p*nslices*owidth*oheight,
                                                        indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight,
                                                        nslices,
                                                        iwidth, iheight,
                                                        owidth, oheight,
                                                        istride_w,istride_h,
                                                        istride_d);
    }
  }

  return 1;
}
示例#26
0
static int nn_(VolumetricAveragePooling_updateGradInput)(lua_State *L) {
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  int dT = luaT_getfieldcheckint(L, 1, "dT");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int kT = luaT_getfieldcheckint(L, 1, "kT");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput",
                                                torch_Tensor);
  int nslices;
  int itime;
  int iheight;
  int iwidth;
  int otime;
  int oheight;
  int owidth;
  real *gradInput_data;
  real *gradOutput_data;

  int dimN = 0;
  int dimt = 1;
  int dimh = 2;
  int dimw = 3;

  /* get contiguous gradOutput */
  gradOutput = THTensor_(newContiguous)(gradOutput);

  /* resize */
  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  if (input->nDimension == 5) {
    dimN++;
    dimt++;
    dimh++;
    dimw++;
  }

  /* sizes */
  nslices = input->size[dimN];
  itime = input->size[dimt];
  iheight = input->size[dimh];
  iwidth = input->size[dimw];
  otime = gradOutput->size[dimt];
  oheight = gradOutput->size[dimh];
  owidth = gradOutput->size[dimw];

  /* get raw pointers */
  gradInput_data = THTensor_(data)(gradInput);
  gradOutput_data = THTensor_(data)(gradOutput);

  /* backprop */
  if (input->nDimension == 4) { /* non-batch mode*/
    nn_(VolumetricAveragePooling_updateGradInput_frame)(
      gradInput_data, gradOutput_data, nslices,
      itime, iwidth, iheight, otime, owidth, oheight,
      kT, kW, kH, dT, dW, dH);
  } else { /* batch mode */
    long p;
    long nBatch = input->size[0];

    long istride = nslices * itime * iwidth * iheight;
    long ostride = nslices * otime * owidth * oheight;

#pragma omp parallel for private(p)
    for (p = 0; p < nBatch; p++) {
      nn_(VolumetricAveragePooling_updateGradInput_frame)(
        gradInput_data  + p * istride, gradOutput_data + p * ostride, nslices,
        itime, iwidth, iheight, otime, owidth, oheight,
        kT, kW, kH, dT, dW, dH);
    }
  }

  /* cleanup */
  THTensor_(free)(gradOutput);
  return 1;
}
示例#27
0
static int nn_(SpatialConvolution_accGradParameters)(lua_State *L)
{
    THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
    THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
    real scale = luaL_optnumber(L, 4, 1);
    int dW = luaT_getfieldcheckint(L, 1, "dW");
    int dH = luaT_getfieldcheckint(L, 1, "dH");
    int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");

    THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
    THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);

    int dimw = 2;
    int dimh = 1;

    real *gradBias_data;
    real *gradOutput_data;
    long noutSlice;

    THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );

    if (input->nDimension == 4)
    {
        dimw++;
        dimh++;
    }

    /* gradient to bias */
    gradBias_data = THTensor_(data)(gradBias);
    gradOutput_data = THTensor_(data)(gradOutput);
    noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw];
    /*THTensor* gradOutSlice = THTensor_(new)();*/

    if (input->nDimension == 3)
    {
        long k;
        #pragma omp parallel for private(k)
        for(k = 0; k < nOutputPlane; k++)
        {
            /*THTensor_(select)(gradOutSlice, gradOutput, 0, k);*/
            real *ptr_gradOutput = gradOutput_data + k*noutSlice;
            long l;
            for(l = 0; l < noutSlice; l++)
                gradBias_data[k] += scale*ptr_gradOutput[l];
        }

        /* gradient to kernels */
        THTensor_(conv2DRevger)(gradWeight, 1.0, scale, input, gradOutput, dH, dW);
    }
    else
    {
        long k;
        #pragma omp parallel for private(k)
        for(k = 0; k < nOutputPlane; k++)
        {
            long p;
            for(p = 0; p < input->size[0]; p++)
            {
                /* BIAS */
                real *ptr_gradOutput = gradOutput_data + p*nOutputPlane*noutSlice + k*noutSlice;
                long l;
                for(l = 0; l < noutSlice; l++)
                    gradBias_data[k] += scale*ptr_gradOutput[l];
            }
        }
        /* gradient to kernels */
        THTensor_(conv2DRevgerm)(gradWeight, 1.0, scale, input, gradOutput, dH, dW);
    }
    return 0;
}
示例#28
0
static int nn_(VolumetricAveragePooling_updateOutput)(lua_State *L) {
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  int kT = luaT_getfieldcheckint(L, 1, "kT");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int dT = luaT_getfieldcheckint(L, 1, "dT");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
  long nslices;
  long itime;
  long iheight;
  long iwidth;
  long otime;
  long oheight;
  long owidth;
  real *input_data;
  real *output_data;

  luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2,
                "4D or 5D (batch-mode) tensor expected");

  int dimN = 0;
  int dimt = 1;
  int dimh = 2;
  int dimw = 3;

  if (input->nDimension == 5) {
    dimN++;
    dimt++;
    dimh++;
    dimw++;
  }

  luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH &&
                input->size[dimt] >= kT, 2,
                "input image smaller than kernel size");

  /* sizes */
  nslices = input->size[dimN];
  itime   = input->size[dimt];
  iheight = input->size[dimh];
  iwidth  = input->size[dimw];
  otime   = (itime   - kT) / dT + 1;
  oheight = (iheight - kH) / dH + 1;
  owidth  = (iwidth  - kW) / dW + 1;

  /* get contiguous input */
  input = THTensor_(newContiguous)(input);

  if (input->nDimension == 4) { /* non-batch mode */
    /* resize output */
    THTensor_(resize4d)(output, nslices, otime, oheight, owidth);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);

    nn_(VolumetricAveragePooling_updateOutput_frame)(input_data, output_data,
                                                     nslices,
                                                     itime, iwidth, iheight,
                                                     otime, owidth, oheight,
                                                     kT, kW, kH, dT, dW, dH);
  } else { /* batch mode */
    long p;
    long nBatch = input->size[0];

    long istride = nslices * itime * iwidth * iheight;
    long ostride = nslices * otime * owidth * oheight;

    /* resize output */
    THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);

#pragma omp parallel for private(p)
    for (p=0; p < nBatch; p++) {
      nn_(VolumetricAveragePooling_updateOutput_frame)(
        input_data + p * istride, output_data + p * ostride,
        nslices, itime, iwidth, iheight, otime, owidth, oheight,
        kT, kW, kH, dT, dW, dH);
    }
  }

  /* cleanup */
  THTensor_(free)(input);
  return 1;
}
示例#29
0
static int nnconv1d_(HorizontalConvolution_updateGradInput)(lua_State *L)
{
   THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
   THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);

   int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
   int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
   int kL = luaT_getfieldcheckint(L, 1, "kL");

   THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
   THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);

   THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1,
              "Number of output features is not equal to nOutputPlane" );

   // change to batch mode
   int batch = 1;
   if (input->nDimension == 3) {
      batch = 0;
      THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
      THTensor_(resize4d)(gradOutput, 1, nOutputPlane, gradOutput->size[1], gradOutput->size[2]);
   }

   long batchSize    = input->size[0];
   long inputHeight  = input->size[2];
   long inputWidth   = input->size[3];
   long outputHeight = inputHeight;
   long outputWidth  = inputWidth - kL + 1;

   THTensor_(resizeAs)(gradInput, input);
   THTensor_(zero)(gradInput);

   int elt;
#pragma omp parallel for private(elt)
   for (elt = 0; elt < batchSize; elt++) {

      // select each batch
      THTensor *gradInput_t  = THTensor_(newSelect)(gradInput, 0, elt);
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, elt);

      // convolve horizontally
      int i, j, k;
      for (i = 0; i < nOutputPlane; i++) {
         for (j = 0; j < outputHeight; j++) {
            for (k = 0; k < kL; k++) {
               THVector_(add)(gradInput_t->storage->data + gradInput_t->storageOffset +
                              gradInput_t->stride[0]*i + gradInput_t->stride[1]*j + k,
                              gradOutput_t->storage->data + gradOutput_t->storageOffset +
                              gradOutput_t->stride[0]*i + gradOutput_t->stride[1]*j,
                              *(THTensor_(data)(weight)+i*kL+k), outputWidth);   // needs to change
            }
         }
      }

      // release temp tensors
      THTensor_(free)(gradInput_t);
      THTensor_(free)(gradOutput_t);
   }

   // revert to single batch
   if (batch == 0) {
      THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
      THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth);
      THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
   }

   return 1;
}
示例#30
0
static int nn_(SpatialFullConvolution_accGradParameters)(lua_State *L) {
  // Inputs
  THTensor *input = (THTensor *)luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = (THTensor *)luaT_checkudata(L, 3, torch_Tensor);

  // Params
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
  int padW = luaT_getfieldcheckint(L, 1, "padW");
  int padH = luaT_getfieldcheckint(L, 1, "padH");
  int adjW = luaT_getfieldcheckint(L, 1, "adjW");
  int adjH = luaT_getfieldcheckint(L, 1, "adjH");
  float scale = luaL_optnumber(L, 4, 1);

  THTensor *gradWeight = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
  THTensor *gradBias = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);
  THTensor *columns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
  THTensor *ones = (THTensor*)luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor);

  luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");

  int batch = 1;
  if (input->nDimension == 3) {
    // Force batch
    batch = 0;
    THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
    THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
  }

  long inputWidth   = input->size[3];
  long inputHeight  = input->size[2];
  long outputWidth  = (inputWidth - 1) * dW - 2*padW + kW + adjW;
  long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;

  // Batch size + input planes
  long batchSize = input->size[0];

  // Define a buffer of ones, for bias accumulation
  if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
    // Resize plane and fill with ones...
    THTensor_(resize2d)(ones, outputHeight, outputWidth);
    THTensor_(fill)(ones, 1);
  }

  // Resize temporary columns
  THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth);

  // Helpers
  THTensor *input_n = THTensor_(new)();
  THTensor *gradOutput_n = THTensor_(new)();

  int elt;
  // For each elt in batch, do:
  for (elt = 0; elt < batchSize; elt ++) {
    // Matrix mulitply per output:
    THTensor_(select)(input_n, input, 0, elt);
    THTensor_(select)(gradOutput_n, gradOutput, 0, elt);

    // Extract columns:
    nn_(im2col)(
      THTensor_(data)(gradOutput_n),
      nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
      THTensor_(data)(columns)
    );

    // M,N,K are dims of matrix A and B
    // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
    long n = columns->size[0];   // nOutputPlane * kh * kw
    long m = input_n->size[0];   // nInputPlane
    long k = columns->size[1];   // inputHeight * inputWidth

    // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
    THBlas_(gemm)(
        't', 'n',
        n, m, k,
        scale,
        THTensor_(data)(columns), k,
        THTensor_(data)(input_n), k,
        1,
        THTensor_(data)(gradWeight), n
    );


    // Do Bias:
    // M,N,K are dims of matrix A and B
    // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
    long m_ = nOutputPlane;
    long k_ = outputHeight * outputWidth;

    // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
    THBlas_(gemv)(
        't',
        k_, m_,
        scale,
        THTensor_(data)(gradOutput_n), k_,
        THTensor_(data)(ones), 1,
        1,
        THTensor_(data)(gradBias), 1
    );
  }

  // Free
  THTensor_(free)(input_n);
  THTensor_(free)(gradOutput_n);

  // Resize
  if (batch == 0) {
    THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
    THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
  }

  // Return nothing
  return 0;
}