Beispiel #1
0
static int QPSolver_n(lua_State *L)
{
  SVQP2 *qp = (SVQP2*)luaT_checkudata(L, 1, QPSolver_id);
  lua_pushnumber(L, qp->n);
  return 1;
}
Beispiel #2
0
static int nn_(SpatialGraph_updateOutput)(lua_State *L)
{
  // get all params
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  int connex = luaT_getfieldcheckint(L, 1, "connex");
  int dist = luaT_getfieldcheckint(L, 1, "dist");
  int norm = luaT_getfieldcheckint(L, 1, "normalize");
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

  // dims
  int iwidth = input->size[2];
  int iheight = input->size[1];
  int ichannels = input->size[0];
  int owidth = iwidth;
  int oheight = iheight;
  int ochannels = connex / 2;

  // norm ?
  double normer = (norm == 1) ? 1/sqrt(ichannels) : 1;

  // zero output
  THTensor_(zero)(output);

  // Euclidean distance
  if (dist == 0) {
    // Sum[ (Xi - Xi+1)^2 ]
    int x,y,k;
    for (k=0; k<ichannels; k++) {
      for (y=0; y<oheight; y++) {
        for (x=0; x<owidth; x++) {
          if (x < owidth-1) {
            double temp = square(THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y, x+1));
            THTensor_(set3d)(output, 0, y, x, temp + THTensor_(get3d)(output, 0, y, x));
          }
          if (y < oheight-1) {
            double temp = square(THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y+1, x));
            THTensor_(set3d)(output, 1, y, x, temp + THTensor_(get3d)(output, 1, y, x));
          }
        }
      }
    }

    // Sqrt[ Sum[ (Xi - Xi+1)^2 ] ]
    for (k=0; k<ochannels; k++) {
      for (y=0; y<oheight; y++) {
        for (x=0; x<owidth; x++) {
          THTensor_(set3d)(output, k, y, x, sqrt(THTensor_(get3d)(output, k, y, x)) * normer);
        }
      }
    }

    // Cosine dissimilarity
  } else {
    // add epsilon to input (to get rid of 0s)
    THTensor *inputb = THTensor_(newWithSize3d)(input->size[0], input->size[1], input->size[2]);
    THTensor_(copy)(inputb, input);
    THTensor_(add)(inputb, inputb, 1e-12);

    // Sum[ (Xi * Xi+1) ]
    int x,y,k;
    for (y=0; y<oheight; y++) {
      for (x=0; x<owidth; x++) {
        double norm_A = 0;
        double norm_B = 0;
        double norm_C = 0;
        for (k=0; k<ichannels; k++) {
          norm_A += square(THTensor_(get3d)(inputb, k, y, x));
          if (x < owidth-1) {
            double temp = THTensor_(get3d)(inputb, k, y, x) * THTensor_(get3d)(inputb, k, y, x+1);
            THTensor_(set3d)(output, 0, y, x, temp + THTensor_(get3d)(output, 0, y, x));
            norm_B += square(THTensor_(get3d)(inputb, k, y, x+1));
          }
          if (y < oheight-1) {
            double temp = THTensor_(get3d)(inputb, k, y, x) * THTensor_(get3d)(inputb, k, y+1, x);
            THTensor_(set3d)(output, 1, y, x, temp + THTensor_(get3d)(output, 1, y, x));
            norm_C += square(THTensor_(get3d)(inputb, k, y+1, x));
          }
        }
        if (x < owidth-1) {
          if (norm) {
            THTensor_(set3d)(output, 0, y, x, 1 - THTensor_(get3d)(output, 0, y, x) / (sqrt(norm_A) * sqrt(norm_B)));
          } else {
            THTensor_(set3d)(output, 0, y, x, ichannels - THTensor_(get3d)(output, 0, y, x));
          }
        }
        if (y < oheight-1) {
          if (norm) {
            THTensor_(set3d)(output, 1, y, x, 1 - THTensor_(get3d)(output, 1, y, x) / (sqrt(norm_A) * sqrt(norm_C)));
          } else {
            THTensor_(set3d)(output, 1, y, x, ichannels - THTensor_(get3d)(output, 1, y, x));
          }
        }
      }
    }

    // Cleanup
    THTensor_(free)(inputb);
  }

  return 1;
}
Beispiel #3
0
static int nn_(SpatialAdaptiveMaxPooling_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  long oheight = luaT_getfieldcheckint(L, 1, "H");
  long owidth = luaT_getfieldcheckint(L, 1, "W");
  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
  int dimw = 2;
  int dimh = 1;
  long nbatch = 1;
  long nslices;
  long iheight;
  long iwidth;
  
  long istride_d;
  long istride_h;
  long istride_w;
  long istride_b;

  real *input_data;
  real *output_data;
  real *indices_data;


  luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");

  if (input->nDimension == 4) 
  {
    istride_b = input->stride[0];
    nbatch = input->size[0];
    dimw++;
    dimh++;
  }

  /* sizes */
  nslices = input->size[dimh-1];
  iheight = input->size[dimh];
  iwidth = input->size[dimw];
  /* strides */
  istride_d = input->stride[dimh-1];
  istride_h = input->stride[dimh];
  istride_w = input->stride[dimw];

  /* resize output */
  if (input->nDimension == 3)
  {
    THTensor_(resize3d)(output, nslices, oheight, owidth);
    /* indices will contain i,j locations for each output point */
    THTensor_(resize4d)(indices, 2, nslices, oheight, owidth);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THTensor_(data)(indices);

    nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data,
                                                      indices_data+nslices*owidth*oheight, indices_data,
                                                      nslices,
                                                      iwidth, iheight,
                                                      owidth, oheight,
                                                      istride_w,istride_h,
                                                      istride_d);
  }
  else
  {
    long p;

    THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
    /* indices will contain i,j locations for each output point */
    THTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THTensor_(data)(indices);

#pragma omp parallel for private(p)
    for (p = 0; p < nbatch; p++)
    {
      nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+p*istride_b, output_data+p*nslices*owidth*oheight,
                                                        indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight,
                                                        nslices,
                                                        iwidth, iheight,
                                                        owidth, oheight,
                                                        istride_w,istride_h,
                                                        istride_d);
    }
  }

  return 1;
}
Beispiel #4
0
static int torch_PipeFile_free(lua_State *L)
{
  THFile *self = luaT_checkudata(L, 1, "torch.PipeFile");
  THFile_free(self);
  return 0;
}
Beispiel #5
0
static int nn_SpatialConvolution_backward(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id);  
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor_id);  
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");

  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id);
  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor_id);
  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor_id);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor_id);

  THTensor *gradInputPlane, *unfoldedInputPlane, *unfoldedGradInputPlane, *inputPlane;
  THTensor *gradOutputPlane;
  THTensor *weightPlane, *gradWeightPlane;
  int i, k;

  gradInputPlane = THTensor_new();
  unfoldedInputPlane = THTensor_new();
  unfoldedGradInputPlane = THTensor_new();
  inputPlane = THTensor_new();
  gradOutputPlane = THTensor_new();
  weightPlane = THTensor_new();
  gradWeightPlane = THTensor_new();
  
  /* Not necessary with partial backprop: */
  THTensor_resizeAs(gradInput, input);
  THTensor_zero(gradInput);

  for(k = 0; k < nOutputPlane; k++)
  {
    THTensor_select(gradOutputPlane, gradOutput, 2, k);
    THTensor_set1d(gradBias, k, THTensor_get1d(gradBias, k) + THTensor_sum(gradOutputPlane));
      
    for(i = 0; i < nInputPlane; i++)
    {
      /* ------------------------- gradWeight ------------------------------------- */

      /* Get the input image */
      THTensor_select(inputPlane, input, 2, i);
      THTensor_unfold(unfoldedInputPlane, inputPlane, 0, kW, dW);
      THTensor_unfold(unfoldedInputPlane, NULL,       1, kH, dH);
      THTensor_transpose(unfoldedInputPlane,NULL,0,2);
      THTensor_transpose(unfoldedInputPlane,NULL,1,3);

      /* Get the good gradWeight for (k,i) (k out, i in) */
      THTensor_select(gradWeightPlane, gradWeight, 3, k);
      THTensor_select(gradWeightPlane, NULL, 2, i);

      THTensor_addT4dotT2(gradWeightPlane, 1, unfoldedInputPlane, gradOutputPlane);

      /* -------------------------- gradInput ------------------------------------- */

      /* Not necessary with partial backprop: */

      /* Get the gradInput image */
      THTensor_select(gradInputPlane, gradInput, 2, i);
      THTensor_unfold(unfoldedGradInputPlane, gradInputPlane, 0, kW, dW);
      THTensor_unfold(unfoldedGradInputPlane, NULL          , 1, kH, dH);

      /* Get the good weight for (k,i) (k out, i in) */
      THTensor_select(weightPlane, weight, 3, k);
      THTensor_select(weightPlane, NULL, 2, i);

      THTensor_addT2outT2(unfoldedGradInputPlane, 1, gradOutputPlane, weightPlane);
    }
  }

  THTensor_free(gradInputPlane);
  THTensor_free(unfoldedInputPlane);
  THTensor_free(unfoldedGradInputPlane);
  THTensor_free(inputPlane);
  THTensor_free(gradOutputPlane);
  THTensor_free(weightPlane);
  THTensor_free(gradWeightPlane);

  return 1;
}
Beispiel #6
0
static int torch_Tensor_(storageOffset)(lua_State *L)
{
  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
  lua_pushnumber(L, tensor->storageOffset+1);
  return 1;
}
Beispiel #7
0
static int libjpeg_(Main_load)(lua_State *L)
{
  /* This struct contains the JPEG decompression parameters and pointers to
   * working space (which is allocated as needed by the JPEG library).
   */
  struct jpeg_decompress_struct cinfo;
  /* We use our private extension JPEG error handler.
   * Note that this struct must live as long as the main JPEG parameter
   * struct, to avoid dangling-pointer problems.
   */
  struct my_error_mgr jerr;
  /* More stuff */
  FILE * infile;		    /* source file (if loading from file) */
  unsigned char * inmem;    /* source memory (if loading from memory) */
  unsigned long inmem_size; /* source memory size (bytes) */
  JSAMPARRAY buffer;		/* Output row buffer */
  /* int row_stride;		/1* physical row width in output buffer *1/ */
  int i, k;

  THTensor *tensor = NULL;
  const int load_from_file = luaL_checkint(L, 1);
  
  if (load_from_file == 1) {
    const char *filename = luaL_checkstring(L, 2);
    
    /* In this example we want to open the input file before doing anything else,
     * so that the setjmp() error recovery below can assume the file is open.
     * VERY IMPORTANT: use "b" option to fopen() if you are on a machine that
     * requires it in order to read binary files.
     */

    if ((infile = fopen(filename, "rb")) == NULL)
    {
      luaL_error(L, "cannot open file <%s> for reading", filename);
    }
  } else {
    /* We're loading from a ByteTensor */
    THByteTensor *src = luaT_checkudata(L, 2, "torch.ByteTensor");
    inmem = THByteTensor_data(src);
    inmem_size = src->size[0];
    infile = NULL;
  }
  
  /* Step 1: allocate and initialize JPEG decompression object */

  /* We set up the normal JPEG error routines, then override error_exit. */
  cinfo.err = jpeg_std_error(&jerr.pub);
  jerr.pub.error_exit = libjpeg_(Main_error);
  /* Establish the setjmp return context for my_error_exit to use. */
  if (setjmp(jerr.setjmp_buffer)) {
    /* If we get here, the JPEG code has signaled an error.
     * We need to clean up the JPEG object, close the input file, and return.
     */
    jpeg_destroy_decompress(&cinfo);
    if (infile) {
      fclose(infile);
    }
    return 0;
  }
  /* Now we can initialize the JPEG decompression object. */
  jpeg_create_decompress(&cinfo);

  /* Step 2: specify data source (eg, a file) */
  if (load_from_file == 1) {
    jpeg_stdio_src(&cinfo, infile);
  } else {
    jpeg_mem_src(&cinfo, inmem, inmem_size);
  }

  /* Step 3: read file parameters with jpeg_read_header() */

  (void) jpeg_read_header(&cinfo, TRUE);
  /* We can ignore the return value from jpeg_read_header since
   *   (a) suspension is not possible with the stdio data source, and
   *   (b) we passed TRUE to reject a tables-only JPEG file as an error.
   * See libjpeg.doc for more info.
   */

  /* Step 4: set parameters for decompression */

  /* In this example, we don't need to change any of the defaults set by
   * jpeg_read_header(), so we do nothing here.
   */

  /* Step 5: Start decompressor */

  (void) jpeg_start_decompress(&cinfo);
  /* We can ignore the return value since suspension is not possible
   * with the stdio data source.
   */

  /* We may need to do some setup of our own at this point before reading
   * the data.  After jpeg_start_decompress() we have the correct scaled
   * output image dimensions available, as well as the output colormap
   * if we asked for color quantization.
   * In this example, we need to make an output work buffer of the right size.
   */ 

  /* Make a one-row-high sample array that will go away when done with image */

  tensor = THTensor_(newWithSize3d)(cinfo.output_components, cinfo.output_height, cinfo.output_width);
  buffer = (*cinfo.mem->alloc_sarray)
		((j_common_ptr) &cinfo, JPOOL_IMAGE, cinfo.output_width * cinfo.output_components, 1);

  /* Step 6: while (scan lines remain to be read) */
  /*           jpeg_read_scanlines(...); */

  /* Here we use the library's state variable cinfo.output_scanline as the
   * loop counter, so that we don't have to keep track ourselves.
   */
  while (cinfo.output_scanline < cinfo.output_height) {
    /* jpeg_read_scanlines expects an array of pointers to scanlines.
     * Here the array is only one element long, but you could ask for
     * more than one scanline at a time if that's more convenient.
     */
    (void) jpeg_read_scanlines(&cinfo, buffer, 1);
    
    for(k = 0; k < cinfo.output_components; k++)
    {
      for(i = 0; i < cinfo.output_width; i++)
        THTensor_(set3d)(tensor, k, cinfo.output_scanline-1, i, 
                         (real)buffer[0][cinfo.output_components*i+k]);
    }
  }

  /* Step 7: Finish decompression */

  (void) jpeg_finish_decompress(&cinfo);
  /* We can ignore the return value since suspension is not possible
   * with the stdio data source.
   */

  /* Step 8: Release JPEG decompression object */

  /* This is an important step since it will release a good deal of memory. */
  jpeg_destroy_decompress(&cinfo);

  /* After finish_decompress, we can close the input file.
   * Here we postpone it until after no more JPEG errors are possible,
   * so as to simplify the setjmp error logic above.  (Actually, I don't
   * think that jpeg_destroy can do an error exit, but why assume anything...)
   */
  if (infile) {
    fclose(infile);
  }

  /* At this point you may want to check to see whether any corrupt-data
   * warnings occurred (test whether jerr.pub.num_warnings is nonzero).
   */

  /* And we're done! */
  luaT_pushudata(L, tensor, torch_Tensor);
  return 1;
}
Beispiel #8
0
static int nn_(SpatialFullConvolution_updateOutput)(lua_State *L) {
  // Input
  THTensor *input = (THTensor*)luaT_checkudata(L, 2, torch_Tensor);

  // Params:
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
  int padW = luaT_getfieldcheckint(L, 1, "padW");
  int padH = luaT_getfieldcheckint(L, 1, "padH");
  int adjW = luaT_getfieldcheckint(L, 1, "adjW");
  int adjH = luaT_getfieldcheckint(L, 1, "adjH");

  THTensor *weight  = (THTensor*)luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
  THTensor *bias    = (THTensor*)luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
  THTensor *columns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
  THTensor *ones    = (THTensor*)luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor);
  THTensor *output  = (THTensor*)luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

  luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");

  int batch = 1;
  if (input->nDimension == 3) {
    luaL_argcheck(L, input->size[0] == nInputPlane, 2, "input channels and nInputPlane dont match");
    // Force batch
    batch = 0;
    THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
  } else {
    luaL_argcheck(L, input->size[1] == nInputPlane, 2, "input channels and nInputPlane dont match");
  }

  long inputWidth   = input->size[3];
  long inputHeight  = input->size[2];
  long outputWidth  = (inputWidth - 1) * dW - 2*padW + kW + adjW;
  long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;

  // Batch size + input planes
  long batchSize = input->size[0];

  // Resize output
  THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);

  // Resize temporary columns
  THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth);

  // Define a buffer of ones, for bias accumulation
  // Note: this buffer can be shared with other modules, it only ever gets increased,
  // and always contains ones.
  if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
    // Resize plane and fill with ones...
    THTensor_(resize2d)(ones, outputHeight, outputWidth);
    THTensor_(fill)(ones, 1);
  }

  // Helpers
  THTensor *input_n = THTensor_(new)();
  THTensor *output_n = THTensor_(new)();

  int elt;
  // For each elt in batch, do:
  for (elt = 0; elt < batchSize; elt ++) {
    // Matrix mulitply per output:
    THTensor_(select)(input_n, input, 0, elt);
    THTensor_(select)(output_n, output, 0, elt);

    // M,N,K are dims of matrix A and B
    // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
    long m = weight->size[1] * weight->size[2] * weight->size[3];
    long n = columns->size[1];
    long k = weight->size[0];

    // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
    THBlas_(gemm)(
        'n', 't',
        n, m, k,
        1,
        THTensor_(data)(input_n), n,
        THTensor_(data)(weight), m,
        0,
        THTensor_(data)(columns), n
    );

    // Unpack columns back into input:
    nn_(col2im)(
      THTensor_(data)(columns),
      nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
      THTensor_(data)(output_n)
    );

    // Do Bias after:
    // M,N,K are dims of matrix A and B
    // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
    long m_ = nOutputPlane;
    long n_ = outputHeight * outputWidth;
    long k_ = 1;

    // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
    THBlas_(gemm)(
        't', 'n',
        n_, m_, k_,
        1,
        THTensor_(data)(ones), k_,
        THTensor_(data)(bias), k_,
        1,
        THTensor_(data)(output_n), n_
    );

  }

  // Free
  THTensor_(free)(input_n);
  THTensor_(free)(output_n);

  // Resize output
  if (batch == 0) {
    THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
    THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
  }

  // return output
  return 1;
}
static int nnconv1d_(HorizontalConvolution_accGradParameters)(lua_State *L)
{
   THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
   THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
   real scale = luaL_optnumber(L, 4, 1);
   int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
   int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
   int kL = luaT_getfieldcheckint(L, 1, "kL");

   THTensor *ones = luaT_getfieldcheckudata(L, 1, "ones", torch_Tensor);
   THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
   THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);

   THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1,
              "Number of output features is not equal to nOutputPlane" );

   // change to batch mode
   int batch = 1;
   if (input->nDimension == 3) {
      batch = 0;
      THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
      THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
   }

   long batchSize    = input->size[0];
   long inputHeight  = input->size[2];
   long inputWidth   = input->size[3];
   long outputHeight = inputHeight;
   long outputWidth  = inputWidth - kL + 1;

   if (ones->nDimension != 1 || ones->size[0] < outputHeight*outputWidth) {
      THTensor_(resize1d)(ones, outputHeight*outputWidth);
      THTensor_(fill)(ones, 1);
   }

   int elt;
   for (elt = 0; elt < batchSize; elt++) {

      // select each batch in 2D
      THTensor *input_t      = THTensor_(newSelect)(input, 0, elt);
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, elt);
      THTensor *gradOutput2d = THTensor_(newWithStorage2d)(gradOutput->storage, gradOutput->storageOffset,
                                   nOutputPlane, -1, outputWidth*outputHeight, -1);

      // dot products
      int i, j, k;
      for (i = 0; i < nInputPlane; i++) {
         for (k = 0; k < kL; k++) {
             for (j = 0; j < outputHeight; j++) {
                *(gradWeight->storage->data + gradWeight->storageOffset + i*gradWeight->stride[0] + k) +=
                   scale*THBlas_(dot)
                      (outputWidth,
                       gradOutput_t->storage->data + gradOutput_t->storageOffset +
                       i*gradOutput_t->stride[0] + j*gradOutput_t->stride[1],
                       gradOutput_t->stride[2],
                       input_t->storage->data + input_t->storageOffset +
                       i*input_t->stride[0] + j*input_t->stride[1] + k,
                       input_t->stride[2]);
            }
         }
      }

      // fill biases
      THTensor_(addmv)(gradBias, 1, gradBias, scale, gradOutput2d, ones);

      THTensor_(free)(gradOutput2d);
      THTensor_(free)(input_t);
      THTensor_(free)(gradOutput_t);
   }

   // revert to single batch
   if (batch == 0) {
      THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
      THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
   }

   return 0;
}
Beispiel #10
0
static int nn_(SpatialFullConvolution_updateGradInput)(lua_State *L) {
  // Inputs
  THTensor *input = (THTensor *)luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = (THTensor *)luaT_checkudata(L, 3, torch_Tensor);

  // Params
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
  int padW = luaT_getfieldcheckint(L, 1, "padW");
  int padH = luaT_getfieldcheckint(L, 1, "padH");
  int adjW = luaT_getfieldcheckint(L, 1, "adjW");
  int adjH = luaT_getfieldcheckint(L, 1, "adjH");

  THTensor *weight = (THTensor *)luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
  THTensor *gradColumns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
  THTensor *gradInput = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);

  luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");

  int batch = 1;
  if (input->nDimension == 3) {
    // Force batch
    batch = 0;
    THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
    THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
  }

  long inputWidth   = input->size[3];
  long inputHeight  = input->size[2];
  long outputWidth  = (inputWidth - 1) * dW - 2*padW + kW + adjW;
  long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;

  // Batch size + input planes
  long batchSize = input->size[0];

  // Resize output
  THTensor_(resize4d)(gradInput, batchSize, nInputPlane, inputHeight, inputWidth);

  // Resize temporary columns
  THTensor_(resize2d)(gradColumns, nOutputPlane*kW*kH, inputHeight*inputWidth);

  // Helpers
  THTensor *gradInput_n = THTensor_(new)();
  THTensor *gradOutput_n = THTensor_(new)();

  int elt;
  // For each elt in batch, do:
  for (elt = 0; elt < batchSize; elt ++) {
    // Matrix mulitply per sample:
    THTensor_(select)(gradInput_n, gradInput, 0, elt);
    THTensor_(select)(gradOutput_n, gradOutput, 0, elt);

    // Extract columns:
    nn_(im2col)(
      THTensor_(data)(gradOutput_n),
      nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
      THTensor_(data)(gradColumns)
    );


    // M,N,K are dims of matrix A and B
    // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
    long m = weight->size[0];
    long n = gradColumns->size[1];
    long k = weight->size[1] * weight->size[2] * weight->size[3];

    // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
    THBlas_(gemm)(
        'n', 'n',
        n, m, k,
        1,
        THTensor_(data)(gradColumns), n,
        THTensor_(data)(weight), k,
        0,
        THTensor_(data)(gradInput_n), n
    );
  }


  // Free
  THTensor_(free)(gradInput_n);
  THTensor_(free)(gradOutput_n);

  // Resize output
  if (batch == 0) {
    THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
    THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
    THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth);
  }

  // Return gradInput
  return 1;
}
Beispiel #11
0
static int nn_(SpatialFullConvolution_accGradParameters)(lua_State *L) {
  // Inputs
  THTensor *input = (THTensor *)luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = (THTensor *)luaT_checkudata(L, 3, torch_Tensor);

  // Params
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
  int padW = luaT_getfieldcheckint(L, 1, "padW");
  int padH = luaT_getfieldcheckint(L, 1, "padH");
  int adjW = luaT_getfieldcheckint(L, 1, "adjW");
  int adjH = luaT_getfieldcheckint(L, 1, "adjH");
  float scale = luaL_optnumber(L, 4, 1);

  THTensor *gradWeight = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
  THTensor *gradBias = (THTensor *)luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);
  THTensor *columns = (THTensor*)luaT_getfieldcheckudata(L, 1, "finput", torch_Tensor);
  THTensor *ones = (THTensor*)luaT_getfieldcheckudata(L, 1, "fgradInput", torch_Tensor);

  luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D (batch mode) tensor is expected");

  int batch = 1;
  if (input->nDimension == 3) {
    // Force batch
    batch = 0;
    THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
    THTensor_(resize4d)(gradOutput, 1, gradOutput->size[0], gradOutput->size[1], gradOutput->size[2]);
  }

  long inputWidth   = input->size[3];
  long inputHeight  = input->size[2];
  long outputWidth  = (inputWidth - 1) * dW - 2*padW + kW + adjW;
  long outputHeight = (inputHeight - 1) * dH - 2*padH + kH + adjH;

  // Batch size + input planes
  long batchSize = input->size[0];

  // Define a buffer of ones, for bias accumulation
  if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
    // Resize plane and fill with ones...
    THTensor_(resize2d)(ones, outputHeight, outputWidth);
    THTensor_(fill)(ones, 1);
  }

  // Resize temporary columns
  THTensor_(resize2d)(columns, nOutputPlane*kW*kH, inputHeight*inputWidth);

  // Helpers
  THTensor *input_n = THTensor_(new)();
  THTensor *gradOutput_n = THTensor_(new)();

  int elt;
  // For each elt in batch, do:
  for (elt = 0; elt < batchSize; elt ++) {
    // Matrix mulitply per output:
    THTensor_(select)(input_n, input, 0, elt);
    THTensor_(select)(gradOutput_n, gradOutput, 0, elt);

    // Extract columns:
    nn_(im2col)(
      THTensor_(data)(gradOutput_n),
      nOutputPlane, outputHeight, outputWidth, kH, kW, padH, padW, dH, dW,
      THTensor_(data)(columns)
    );

    // M,N,K are dims of matrix A and B
    // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
    long n = columns->size[0];   // nOutputPlane * kh * kw
    long m = input_n->size[0];   // nInputPlane
    long k = columns->size[1];   // inputHeight * inputWidth

    // Do GEMM (note: this is a bit confusing because gemm assumes column-major matrices)
    THBlas_(gemm)(
        't', 'n',
        n, m, k,
        scale,
        THTensor_(data)(columns), k,
        THTensor_(data)(input_n), k,
        1,
        THTensor_(data)(gradWeight), n
    );


    // Do Bias:
    // M,N,K are dims of matrix A and B
    // (see http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm)
    long m_ = nOutputPlane;
    long k_ = outputHeight * outputWidth;

    // Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
    THBlas_(gemv)(
        't',
        k_, m_,
        scale,
        THTensor_(data)(gradOutput_n), k_,
        THTensor_(data)(ones), 1,
        1,
        THTensor_(data)(gradBias), 1
    );
  }

  // Free
  THTensor_(free)(input_n);
  THTensor_(free)(gradOutput_n);

  // Resize
  if (batch == 0) {
    THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
    THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
  }

  // Return nothing
  return 0;
}
Beispiel #12
0
static int nn_LcEncoder_forward(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id);  
  int winX = luaT_getfieldcheckint(L, 1, "winX");
  int winY = luaT_getfieldcheckint(L, 1, "winY");
  int woutX = luaT_getfieldcheckint(L, 1, "woutX");
  int woutY = luaT_getfieldcheckint(L, 1, "woutY");
  double xStep = luaT_getfieldchecknumber(L, 1, "xStep");
  double yStep = luaT_getfieldchecknumber(L, 1, "yStep");

  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id);
  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id);

  luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
  luaL_argcheck(L, input->size[2] == 1, 2, "invalid input 3rd dim size has to be 1");

  THTensor *inputPlane, *inputNarrowedX, *inputNarrowedYX;
  THTensor *weightSelectedX, *weightSelectedYX;
  
  inputPlane = THTensor_new();
  inputNarrowedX = THTensor_new();
  inputNarrowedYX = THTensor_new();
  weightSelectedX = THTensor_new();
  weightSelectedYX = THTensor_new();

  // get output size from input
  THTensor_resize3d(output,
                    (input->size[0] - winX+1) / xStep, 
                    (input->size[1] - winY+1) / yStep,
                    1);
  
  THTensor_select(inputPlane, input, 2, 0);


  int y,x,iy,ix,wy,wx;
  for (y = 0; y<output->size[1]; y++)
    {
      iy = (int)floor(y*yStep);
      wy = y%woutY;
      for (x = 0; x<output->size[0]; x++)
        {
          ix = (int)floor(x*xStep);
          wx = x%woutX;
          THTensor_narrow(inputNarrowedX, inputPlane, 0, ix, winX);
          THTensor_narrow(inputNarrowedYX, inputNarrowedX, 1, iy, winY);
          THTensor_select(weightSelectedX, weight, 3, wy);
          THTensor_select(weightSelectedYX, weightSelectedX, 2, wx);
          double dot = THTensor_dot(inputNarrowedYX, weightSelectedYX);
          double biasSelect = THTensor_get2d(bias,wx,wy);
          THTensor_set3d(output,x,y,0,dot+biasSelect);
        }
    }

  THTensor_free(inputPlane);
  THTensor_free(inputNarrowedX);
  THTensor_free(inputNarrowedYX);
  THTensor_free(weightSelectedX);
  THTensor_free(weightSelectedYX);
  return 1;
}
Beispiel #13
0
static int nn_LcEncoder_backward(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id);  
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor_id);  
  int winX = luaT_getfieldcheckint(L, 1, "winX");
  int winY = luaT_getfieldcheckint(L, 1, "winY");
  int woutX = luaT_getfieldcheckint(L, 1, "woutX");
  int woutY = luaT_getfieldcheckint(L, 1, "woutY");
  double xStep = luaT_getfieldchecknumber(L, 1, "xStep");
  double yStep = luaT_getfieldchecknumber(L, 1, "yStep");

  luaL_argcheck(L, input->nDimension == 3, 2, "input 3D tensor expected");
  luaL_argcheck(L, input->size[2] == 1, 2, "invalid input 3rd dim size has to be 1");
  luaL_argcheck(L, gradOutput->nDimension == 3, 3, "gradOutput 3D tensor expected");
  luaL_argcheck(L, gradOutput->size[2] == 1, 3, "invalid gradOutput 3rd dim size has to be 1");

  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id);
  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor_id);
  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id);
  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor_id);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor_id);


  /* ----------------------- gradWeight ----------------------- */
  THTensor_fill(gradWeight, 0);

  THTensor *inputPlane, *inputNarrowedX, *inputNarrowedYX;
  inputPlane = THTensor_new();
  inputNarrowedX = THTensor_new();
  inputNarrowedYX = THTensor_new();
  THTensor_select(inputPlane, input, 2, 0);

  THTensor *gradWeightSelectedX, *gradWeightSelectedYX;
  gradWeightSelectedX = THTensor_new();
  gradWeightSelectedYX = THTensor_new();

  /* ----------------------- gradInput ------------------------ */
  THTensor_resizeAs(gradInput, input);
  THTensor_fill(gradInput, 0);

  THTensor *gradInputPlane, *gradInputNarrowedX, *gradInputNarrowedYX;
  gradInputPlane = THTensor_new();
  gradInputNarrowedX = THTensor_new();
  gradInputNarrowedYX = THTensor_new();
  THTensor_select(gradInputPlane, gradInput, 2, 0);

  THTensor *weightSelectedX, *weightSelectedYX;
  weightSelectedX = THTensor_new();
  weightSelectedYX = THTensor_new();


  int y,x,iy,ix,wy,wx;
  for (y = 0; y<gradOutput->size[1]; y++)
    {
      iy = (int)floor(y*yStep);
      wy = y%woutY;
      for (x = 0; x<gradOutput->size[0]; x++)
        {
          ix = (int)floor(x*xStep);
          wx = x%woutX;
          double gradOutVal = THTensor_get3d(gradOutput,x,y,0);

          /* ----------------------- gradWeight ----------------------- */
          THTensor_narrow(inputNarrowedX, inputPlane, 0, ix, winX);
          THTensor_narrow(inputNarrowedYX, inputNarrowedX, 1, iy, winY);
          THTensor_select(gradWeightSelectedX, gradWeight, 3, wy);
          THTensor_select(gradWeightSelectedYX, gradWeightSelectedX, 2, wx);
          THTensor_addTensor(gradWeightSelectedYX, gradOutVal, inputNarrowedYX);
          /* ----------------------- gradBias ----------------------- */
          THTensor_set2d(gradBias,wx,wy, THTensor_get2d(gradBias,wx,wy) + gradOutVal);
          /* ----------------------- gradInput ------------------------ */
          THTensor_narrow(gradInputNarrowedX, gradInputPlane, 0, ix, winX);
          THTensor_narrow(gradInputNarrowedYX, gradInputNarrowedX, 1, iy, winY);
          THTensor_select(weightSelectedX, weight, 3, wy);
          THTensor_select(weightSelectedYX, weightSelectedX, 2, wx);
          THTensor_addTensor(gradInputNarrowedYX, gradOutVal, weightSelectedYX);
        }
    }

  /* free gradWeight  */
  THTensor_free(inputPlane);
  THTensor_free(inputNarrowedX);
  THTensor_free(inputNarrowedYX);
  THTensor_free(gradWeightSelectedX);
  THTensor_free(gradWeightSelectedYX);
  /* free gradInput  */
  THTensor_free(gradInputPlane);
  THTensor_free(gradInputNarrowedX);
  THTensor_free(gradInputNarrowedYX);
  THTensor_free(weightSelectedX);
  THTensor_free(weightSelectedYX);

  return 1;
}
Beispiel #14
0
static int QPSolver_sumflag(lua_State *L)
{
  SVQP2 *qp = (SVQP2*)luaT_checkudata(L, 1, QPSolver_id);
  qp->sumflag = luaT_checkboolean(L, 2);
  return 0;
}
Beispiel #15
0
static int nn_(VolumetricAveragePooling_updateOutput)(lua_State *L) {
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  int kT = luaT_getfieldcheckint(L, 1, "kT");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int dT = luaT_getfieldcheckint(L, 1, "dT");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
  long nslices;
  long itime;
  long iheight;
  long iwidth;
  long otime;
  long oheight;
  long owidth;
  real *input_data;
  real *output_data;

  luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2,
                "4D or 5D (batch-mode) tensor expected");

  int dimN = 0;
  int dimt = 1;
  int dimh = 2;
  int dimw = 3;

  if (input->nDimension == 5) {
    dimN++;
    dimt++;
    dimh++;
    dimw++;
  }

  luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH &&
                input->size[dimt] >= kT, 2,
                "input image smaller than kernel size");

  /* sizes */
  nslices = input->size[dimN];
  itime   = input->size[dimt];
  iheight = input->size[dimh];
  iwidth  = input->size[dimw];
  otime   = (itime   - kT) / dT + 1;
  oheight = (iheight - kH) / dH + 1;
  owidth  = (iwidth  - kW) / dW + 1;

  /* get contiguous input */
  input = THTensor_(newContiguous)(input);

  if (input->nDimension == 4) { /* non-batch mode */
    /* resize output */
    THTensor_(resize4d)(output, nslices, otime, oheight, owidth);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);

    nn_(VolumetricAveragePooling_updateOutput_frame)(input_data, output_data,
                                                     nslices,
                                                     itime, iwidth, iheight,
                                                     otime, owidth, oheight,
                                                     kT, kW, kH, dT, dW, dH);
  } else { /* batch mode */
    long p;
    long nBatch = input->size[0];

    long istride = nslices * itime * iwidth * iheight;
    long ostride = nslices * otime * owidth * oheight;

    /* resize output */
    THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);

#pragma omp parallel for private(p)
    for (p=0; p < nBatch; p++) {
      nn_(VolumetricAveragePooling_updateOutput_frame)(
        input_data + p * istride, output_data + p * ostride,
        nslices, itime, iwidth, iheight, otime, owidth, oheight,
        kT, kW, kH, dT, dW, dH);
    }
  }

  /* cleanup */
  THTensor_(free)(input);
  return 1;
}
static int nnconv1d_(HorizontalConvolution_updateOutput)(lua_State *L)
{
   THTensor *input = luaT_checkudata(L, 2, torch_Tensor);

   int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
   int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
   int kL = luaT_getfieldcheckint(L, 1, "kL");

   THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
   THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
   THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

   luaL_argcheck(L, input->nDimension == 3 ||
                    input->nDimension == 4, 2, "3D or 4D (batch mode) tensor expected");

   // change to batch mode
   int batch = 1;
   if (input->nDimension == 3) {
      batch = 0;
      THTensor_(resize4d)(input, 1, nInputPlane, input->size[1], input->size[2]);
   }

   long batchSize    = input->size[0];
   long inputHeight  = input->size[2];
   long inputWidth   = input->size[3];
   long outputHeight = inputHeight;
   long outputWidth  = inputWidth - kL + 1;

   THTensor_(resize4d)(output, batchSize, nOutputPlane, outputHeight, outputWidth);

   int elt;
#pragma omp parallel for private(elt)
   for (elt = 0; elt < batchSize; elt++) {

      // select each batch
      THTensor *input_t  = THTensor_(newSelect)(input, 0, elt);
      THTensor *output_t = THTensor_(newSelect)(output, 0, elt);

      // fill biases
      int i, j, k;
      for (i = 0; i < nOutputPlane; i++) {
         THVector_(fill)(output_t->storage->data+output_t->storageOffset+output_t->stride[0]*i,
                         THTensor_(get1d)(bias, i), outputHeight*outputWidth);
      }

      // convolve horizontally
      for (i = 0; i < nInputPlane; i++) {
         for (j = 0; j < inputHeight; j++) {
            for (k = 0; k < kL; k++) {
               THVector_(add)(output_t->storage->data + output_t->storageOffset +
                              output_t->stride[0]*i + output_t->stride[1]*j,
                              input_t->storage->data + input_t->storageOffset +
                              input_t->stride[0]*i + input_t->stride[1]*j + k,
                              *(THTensor_(data)(weight)+i*kL+k), outputWidth);
            }
         }
      }

      // release temp tensors
      THTensor_(free)(input_t);
      THTensor_(free)(output_t);
   }

   // revert to single batch
   if (batch == 0) {
      THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
      THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
   }

   return 1;
}
Beispiel #17
0
static int torch_Tensor_(nDimension)(lua_State *L)
{
  THTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
  lua_pushnumber(L, tensor->nDimension);
  return 1;
}
static int nnconv1d_(HorizontalConvolution_updateGradInput)(lua_State *L)
{
   THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
   THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);

   int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
   int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
   int kL = luaT_getfieldcheckint(L, 1, "kL");

   THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
   THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);

   THArgCheck(nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1,
              "Number of output features is not equal to nOutputPlane" );

   // change to batch mode
   int batch = 1;
   if (input->nDimension == 3) {
      batch = 0;
      THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
      THTensor_(resize4d)(gradOutput, 1, nOutputPlane, gradOutput->size[1], gradOutput->size[2]);
   }

   long batchSize    = input->size[0];
   long inputHeight  = input->size[2];
   long inputWidth   = input->size[3];
   long outputHeight = inputHeight;
   long outputWidth  = inputWidth - kL + 1;

   THTensor_(resizeAs)(gradInput, input);
   THTensor_(zero)(gradInput);

   int elt;
#pragma omp parallel for private(elt)
   for (elt = 0; elt < batchSize; elt++) {

      // select each batch
      THTensor *gradInput_t  = THTensor_(newSelect)(gradInput, 0, elt);
      THTensor *gradOutput_t = THTensor_(newSelect)(gradOutput, 0, elt);

      // convolve horizontally
      int i, j, k;
      for (i = 0; i < nOutputPlane; i++) {
         for (j = 0; j < outputHeight; j++) {
            for (k = 0; k < kL; k++) {
               THVector_(add)(gradInput_t->storage->data + gradInput_t->storageOffset +
                              gradInput_t->stride[0]*i + gradInput_t->stride[1]*j + k,
                              gradOutput_t->storage->data + gradOutput_t->storageOffset +
                              gradOutput_t->stride[0]*i + gradOutput_t->stride[1]*j,
                              *(THTensor_(data)(weight)+i*kL+k), outputWidth);   // needs to change
            }
         }
      }

      // release temp tensors
      THTensor_(free)(gradInput_t);
      THTensor_(free)(gradOutput_t);
   }

   // revert to single batch
   if (batch == 0) {
      THTensor_(resize3d)(input, nInputPlane, inputHeight, inputWidth);
      THTensor_(resize3d)(gradInput, nInputPlane, inputHeight, inputWidth);
      THTensor_(resize3d)(gradOutput, nOutputPlane, outputHeight, outputWidth);
   }

   return 1;
}
Beispiel #19
0
Datei: jpeg.c Projekt: omry/image
/*
 * save function
 *
 */
int libjpeg_(Main_save)(lua_State *L) {
  unsigned char *inmem = NULL;  /* destination memory (if saving to memory) */
  unsigned long inmem_size = 0;  /* destination memory size (bytes) */

  /* get args */
  const char *filename = luaL_checkstring(L, 1);
  THTensor *tensor = luaT_checkudata(L, 2, torch_Tensor);  
  THTensor *tensorc = THTensor_(newContiguous)(tensor);
  real *tensor_data = THTensor_(data)(tensorc);

  const int save_to_file = luaL_checkint(L, 3);

  THByteTensor* tensor_dest = NULL;
  if (save_to_file == 0) {
    tensor_dest = luaT_checkudata(L, 5, "torch.ByteTensor");
  }

  int quality = luaL_checkint(L, 4);
  if (quality < 0 || quality > 100) {
    luaL_error(L, "quality should be between 0 and 100");
  }

  /* jpeg struct */
  struct jpeg_compress_struct cinfo;
  struct jpeg_error_mgr jerr;

  /* pointer to raw image */
  unsigned char *raw_image = NULL;

  /* dimensions of the image we want to write */
  int width=0, height=0, bytes_per_pixel=0;
  int color_space=0;
  if (tensorc->nDimension == 3) {
    bytes_per_pixel = tensorc->size[0];
    height = tensorc->size[1];
    width = tensorc->size[2];
    if (bytes_per_pixel == 3) {
      color_space = JCS_RGB;
    } else if (bytes_per_pixel == 1) {
      color_space = JCS_GRAYSCALE;
    } else {
      luaL_error(L, "tensor should have 1 or 3 channels (gray or RGB)");
    }
  } else if (tensorc->nDimension == 2) {
    bytes_per_pixel = 1;
    height = tensorc->size[0];
    width = tensorc->size[1];
    color_space = JCS_GRAYSCALE;
  } else {
    luaL_error(L, "supports only 1 or 3 dimension tensors");
  }

  /* alloc raw image data */
  raw_image = (unsigned char *)malloc((sizeof (unsigned char))*width*height*bytes_per_pixel);

  /* convert tensor to raw bytes */
  int x,y,k;
  for (k=0; k<bytes_per_pixel; k++) {
    for (y=0; y<height; y++) {
      for (x=0; x<width; x++) {
        raw_image[(y*width+x)*bytes_per_pixel+k] = *tensor_data++;
      }
    }
  }

  /* this is a pointer to one row of image data */
  JSAMPROW row_pointer[1];
  FILE *outfile = NULL;
  if (save_to_file == 1) {
    outfile = fopen( filename, "wb" );
    if ( !outfile ) {
      luaL_error(L, "Error opening output jpeg file %s\n!", filename );
    }
  }

  cinfo.err = jpeg_std_error( &jerr );
  jpeg_create_compress(&cinfo);

  /* specify data source (eg, a file) */
  if (save_to_file == 1) {
    jpeg_stdio_dest(&cinfo, outfile);
  } else {
    jpeg_mem_dest(&cinfo, &inmem, &inmem_size);
  }

  /* Setting the parameters of the output file here */
  cinfo.image_width = width;	
  cinfo.image_height = height;
  cinfo.input_components = bytes_per_pixel;
  cinfo.in_color_space = color_space;

  /* default compression parameters, we shouldn't be worried about these */
  jpeg_set_defaults( &cinfo );
  jpeg_set_quality(&cinfo, quality, (boolean)0);

  /* Now do the compression .. */
  jpeg_start_compress( &cinfo, TRUE );

  /* like reading a file, this time write one row at a time */
  while( cinfo.next_scanline < cinfo.image_height ) {
    row_pointer[0] = &raw_image[ cinfo.next_scanline * cinfo.image_width *  cinfo.input_components];
    jpeg_write_scanlines( &cinfo, row_pointer, 1 );
  }

  /* similar to read file, clean up after we're done compressing */
  jpeg_finish_compress( &cinfo );
  jpeg_destroy_compress( &cinfo );
  
  if (outfile != NULL) {
    fclose( outfile );
  }

  if (save_to_file == 0) {
    
    THByteTensor_resize1d(tensor_dest, inmem_size);  /* will fail if it's not a Byte Tensor */ 
    unsigned char* tensor_dest_data = THByteTensor_data(tensor_dest); 
    memcpy(tensor_dest_data, inmem, inmem_size);
    free(inmem);
  }

  /* some cleanup */
  free(raw_image);
  THTensor_(free)(tensorc);

  /* success code is 1! */
  return 1;
}
Beispiel #20
0
inline THTensor *libopencv_(checkTensor)(lua_State* L, int arg) {
  return (THTensor*)luaT_checkudata(L, arg, torch_Tensor);  
}
Beispiel #21
0
/*
 * save function
 *
 */
int libjpeg_(Main_save)(lua_State *L) {
  /* get args */
  const char *filename = luaL_checkstring(L, 1);
  THTensor *tensor = luaT_checkudata(L, 2, torch_Tensor);  
  THTensor *tensorc = THTensor_(newContiguous)(tensor);
  real *tensor_data = THTensor_(data)(tensorc);

  /* jpeg struct */
  struct jpeg_compress_struct cinfo;
  struct jpeg_error_mgr jerr;

  /* pointer to raw image */
  unsigned char *raw_image = NULL;

  /* dimensions of the image we want to write */
  int width=0, height=0, bytes_per_pixel=0;
  int color_space=0;
  if (tensorc->nDimension == 3) {
    bytes_per_pixel = tensorc->size[0];
    height = tensorc->size[1];
    width = tensorc->size[2];
    if (bytes_per_pixel == 3) {
      color_space = JCS_RGB;
    } else if (bytes_per_pixel == 1) {
      color_space = JCS_GRAYSCALE;
    } else {
      luaL_error(L, "tensor should have 1 or 3 channels (gray or RGB)");
    }
  } else if (tensorc->nDimension == 2) {
    bytes_per_pixel = 1;
    height = tensorc->size[0];
    width = tensorc->size[1];
    color_space = JCS_GRAYSCALE;
  } else {
    luaL_error(L, "supports only 1 or 3 dimension tensors");
  }

  /* alloc raw image data */
  raw_image = (unsigned char *)malloc((sizeof (unsigned char))*width*height*bytes_per_pixel);

  /* convert tensor to raw bytes */
  int x,y,k;
  for (k=0; k<bytes_per_pixel; k++) {
    for (y=0; y<height; y++) {
      for (x=0; x<width; x++) {
        raw_image[(y*width+x)*bytes_per_pixel+k] = *tensor_data++;
      }
    }
  }

  /* this is a pointer to one row of image data */
  JSAMPROW row_pointer[1];
  FILE *outfile = fopen( filename, "wb" );

  if ( !outfile ) {
    printf("Error opening output jpeg file %s\n!", filename );
    return -1;
  }
  cinfo.err = jpeg_std_error( &jerr );
  jpeg_create_compress(&cinfo);
  jpeg_stdio_dest(&cinfo, outfile);

  /* Setting the parameters of the output file here */
  cinfo.image_width = width;	
  cinfo.image_height = height;
  cinfo.input_components = bytes_per_pixel;
  cinfo.in_color_space = color_space;

  /* default compression parameters, we shouldn't be worried about these */
  jpeg_set_defaults( &cinfo );

  /* Now do the compression .. */
  jpeg_start_compress( &cinfo, TRUE );

  /* like reading a file, this time write one row at a time */
  while( cinfo.next_scanline < cinfo.image_height ) {
    row_pointer[0] = &raw_image[ cinfo.next_scanline * cinfo.image_width *  cinfo.input_components];
    jpeg_write_scanlines( &cinfo, row_pointer, 1 );
  }

  /* similar to read file, clean up after we're done compressing */
  jpeg_finish_compress( &cinfo );
  jpeg_destroy_compress( &cinfo );
  fclose( outfile );

  /* some cleanup */
  free(raw_image);
  THTensor_(free)(tensorc);

  /* success code is 1! */
  return 1;
}
template<> inline THTensor<float> FromLuaStack<THTensor<float> >(lua_State* L, int i) {
  return THTensor<float>((TH<float>::CTensor*)luaT_checkudata(L, i, luaT_checktypename2id(L, "torch.FloatTensor")));
}
Beispiel #23
0
static int nn_SpatialConvolution_forward(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor_id);  
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");

  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor_id);
  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor_id);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor_id);
  
  THTensor *outputPlane, *inputPlane, *weightPlane, *unfoldedInputPlane;
  int i, k;

  luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
  luaL_argcheck(L, input->size[2] == nInputPlane, 2, "invalid number of input planes");
  luaL_argcheck(L, input->size[0] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size");

  THTensor_resize3d(output,
                    (input->size[0] - kW) / dW + 1, 
                    (input->size[1] - kH) / dH + 1,
                    nOutputPlane);

  inputPlane = THTensor_new();
  weightPlane = THTensor_new();
  outputPlane = THTensor_new();
  unfoldedInputPlane = THTensor_new();
  
  for(k = 0; k < nOutputPlane; k++)
  {
    THTensor_select(outputPlane, output, 2, k);
    
    /* Initialize to the bias */
    THTensor_fill(outputPlane, THTensor_get1d(bias, k));

    /* Go! */
    for(i = 0; i < nInputPlane; i++)
    {
      THTensor_select(inputPlane, input, 2, i);

      /* Get the good mask for (k,i) (k out, i in) */
      THTensor_select(weightPlane, weight, 3, k);
      THTensor_select(weightPlane, NULL, 2, i);

      /* Get the input image */
      THTensor_unfold(unfoldedInputPlane, inputPlane,  0, kW, dW);
      THTensor_unfold(unfoldedInputPlane, NULL,        1, kH, dH);

      THTensor_addT4dotT2(outputPlane, 1, unfoldedInputPlane, weightPlane);
    }
  }

  THTensor_free(inputPlane);
  THTensor_free(weightPlane);
  THTensor_free(outputPlane);
  THTensor_free(unfoldedInputPlane);

  return 1;
}
template<> inline THTensor<double> FromLuaStack<THTensor<double> >(lua_State* L, int i) {
  return THTensor<double>((TH<double>::CTensor*)luaT_checkudata(L, i, luaT_checktypename2id(L, "torch.DoubleTensor")));
}
Beispiel #25
0
static int nn_(SpatialGraph_updateGradInput)(lua_State *L)
{
  // get all params
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  //int connex = luaT_getfieldcheckint(L, 1, "connex");
  int dist = luaT_getfieldcheckint(L, 1, "dist");
  int norm = luaT_getfieldcheckint(L, 1, "normalize");

  // dims
  //int iwidth = input->size[2];
  //int iheight = input->size[1];
  int ichannels = input->size[0];
  int owidth = gradOutput->size[2];
  int oheight = gradOutput->size[1];
  //int ochannels = gradOutput->size[0];

  // norm ?
  double normer = (norm == 1) ? 1/sqrt(ichannels)/sqrt(ichannels) : 1;

  // resize gradInput
  THTensor_(zero)(gradInput);

  // compute derivatives, and backpropagate output error to input
  if (dist == 0) {
    int x,y,k;
    for (k=0; k<ichannels; k++) {
      for (y=0; y<oheight; y++) {
        for (x=0; x<owidth; x++) {
          if (x < owidth-1) {
            double partial_d = THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y, x+1);
            if (partial_d != 0) partial_d /= THTensor_(get3d)(output, 0, y, x);
            partial_d *= THTensor_(get3d)(gradOutput, 0, y, x) * normer;
            THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x));
            THTensor_(set3d)(gradInput, k, y, x+1, -partial_d + THTensor_(get3d)(gradInput, k, y, x+1));
          }
          if (y < oheight-1) {
            double partial_d = THTensor_(get3d)(input, k, y, x) - THTensor_(get3d)(input, k, y+1, x);
            if (partial_d != 0) partial_d /= THTensor_(get3d)(output, 1, y, x);
            partial_d *= THTensor_(get3d)(gradOutput, 1, y, x) * normer;
            THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x));
            THTensor_(set3d)(gradInput, k, y+1, x, -partial_d + THTensor_(get3d)(gradInput, k, y+1, x));
          }
        }
      }
    }

    // Cosine
  } else {
    int x,y,k;
    for (y=0; y<oheight; y++) {
      for (x=0; x<owidth; x++) {
        double sum_A = 0;
        double sum_B = 0;
        double sum_C = 0;
        double sum_AB = 0;
        double sum_AC = 0;

        if (norm) {
          for (k=0; k<ichannels; k++) {
            sum_A += square(THTensor_(get3d)(input, k, y, x));
            if (x < owidth-1) {
              sum_B += square(THTensor_(get3d)(input, k, y, x+1));
              sum_AB += THTensor_(get3d)(input, k, y, x) * THTensor_(get3d)(input, k, y, x+1);
            }
            if (y < oheight-1) {
              sum_C += square(THTensor_(get3d)(input, k, y+1, x));
              sum_AC += THTensor_(get3d)(input, k, y, x) * THTensor_(get3d)(input, k, y+1, x);
            }
          }
        }

        double term1, term2, term3, partial_d;
        double epsi = 1e-12;
        if (x < owidth-1) {
          if (norm) {
            term1 = 1 / ( pow(sum_A, 1/2) * pow(sum_B, 1/2) + epsi );
            term2 = sum_AB / ( pow(sum_A, 3/2) * pow(sum_B, 1/2) + epsi );
            term3 = sum_AB / ( pow(sum_B, 3/2) * pow(sum_A, 1/2) + epsi );
          }
          for (k=0; k<ichannels; k++) {
            if (norm) {
              partial_d = term2 * THTensor_(get3d)(input, k, y, x)
                - term1 * THTensor_(get3d)(input, k, y, x+1);
            } else {
              partial_d = -THTensor_(get3d)(input, k, y, x+1);
            }
            partial_d *= THTensor_(get3d)(gradOutput, 0, y, x);
            THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x));

            if (norm) {
              partial_d = term3 * THTensor_(get3d)(input, k, y, x+1)
                - term1 * THTensor_(get3d)(input, k, y, x);
            } else {
              partial_d = -THTensor_(get3d)(input, k, y, x);
            }
            partial_d *= THTensor_(get3d)(gradOutput, 0, y, x);
            THTensor_(set3d)(gradInput, k, y, x+1, partial_d + THTensor_(get3d)(gradInput, k, y, x+1));
          }
        }
        if (y < oheight-1) {
          if (norm) {
            term1 = 1 / ( pow(sum_A, 1/2) * pow(sum_C, 1/2) + epsi );
            term2 = sum_AC / ( pow(sum_A, 3/2) * pow(sum_C, 1/2) + epsi );
            term3 = sum_AC / ( pow(sum_C, 3/2) * pow(sum_A, 1/2) + epsi );
          }
          for (k=0; k<ichannels; k++) {
            if (norm) {
              partial_d = term2 * THTensor_(get3d)(input, k, y, x)
                - term1 * THTensor_(get3d)(input, k, y+1, x);
            } else {
              partial_d = -THTensor_(get3d)(input, k, y+1, x);
            }
            partial_d *= THTensor_(get3d)(gradOutput, 1, y, x);
            THTensor_(set3d)(gradInput, k, y, x, partial_d + THTensor_(get3d)(gradInput, k, y, x));

            if (norm) {
              partial_d = term3 * THTensor_(get3d)(input, k, y+1, x)
                - term1 * THTensor_(get3d)(input, k, y, x);
            } else {
              partial_d = -THTensor_(get3d)(input, k, y, x);
            }
            partial_d *= THTensor_(get3d)(gradOutput, 1, y, x);
            THTensor_(set3d)(gradInput, k, y+1, x, partial_d + THTensor_(get3d)(gradInput, k, y+1, x));
          }
        }
      }
    }
  }

  return 1;
}
Beispiel #26
0
static int nxn_(Jitter_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
  int xstart = luaT_getfieldcheckint(L, 1, "xstart");
  int ystart = luaT_getfieldcheckint(L, 1, "ystart");
  int xcrop = luaT_getfieldcheckint(L, 1, "xcrop");
  int ycrop = luaT_getfieldcheckint(L, 1, "ycrop");
  int hflip = luaT_getfieldcheckint(L, 1, "randflip");
  
  int bs   = input->size[0];
  int outy = input->size[1] - ycrop;
  int outx = input->size[2] - xcrop;
  int channels = input->size[3];
  
  THTensor_(resize4d)(output, bs, outy, outx, channels);

  real* idata = THTensor_(data)(input);
  real* odata = THTensor_(data)(output);

  int istr0 = input->stride[0];
  int istr1 = input->stride[1];
  int istr2 = input->stride[2];
  int istr3 = input->stride[3];
  
  int ostr0 = output->stride[0];
  int ostr1 = output->stride[1];
  int ostr2 = output->stride[2];
  int ostr3 = output->stride[3];

  /* This is jittering + hflip */
  int batchidx, y, x, ch;
  if(hflip==1)
  {
     #pragma omp parallel for private(batchidx)
     for(batchidx=0; batchidx<bs; batchidx++)
     {
        for (y = 0; y<outy; y++)
        {
            for(x = 0; x<outx; x++)
            {
               for (ch = 0; ch < channels; ch++)
               {
                   odata[batchidx*ostr0 + y*ostr1 + x*ostr2 + ch*ostr3] = idata[batchidx*istr0 + (y+ystart-1)*istr1 + (xstart-1+outx-1-x)*istr2 + ch*istr3];
               }
            }
        }
     }
  }
  else 
  /* This is only jittering */
  {
     #pragma omp parallel for private(batchidx)
     for(batchidx=0; batchidx<bs; batchidx++)
     {
        for (y = 0; y<outy; y++)
        {
            for(x = 0; x<outx; x++)
            {
               for (ch = 0; ch < channels; ch++)
               {
                   odata[batchidx*ostr0 + y*ostr1 + x*ostr2 + ch*ostr3] = idata[batchidx*istr0 + (y+ystart-1)*istr1 + (x+xstart-1)*istr2 + ch*istr3];
               }
            }
        }
     }
  }
  
  
    
  return 1;
}
Beispiel #27
0
static int nn_(SpatialAdaptiveMaxPooling_updateGradInput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
  int dimw = 2;
  int dimh = 1;
  long nbatch = 1;
  int nslices;
  int iheight;
  int iwidth;
  int oheight;
  int owidth;
  real *gradInput_data;
  real *gradOutput_data;
  real *indices_data;

  /* get contiguous gradOutput */
  gradOutput = THTensor_(newContiguous)(gradOutput);

  /* resize */
  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  if (input->nDimension == 4) {
    nbatch = input->size[0];
    dimw++;
    dimh++;
  }

  /* sizes */
  nslices = input->size[dimh-1];
  iheight = input->size[dimh];
  iwidth = input->size[dimw];
  oheight = gradOutput->size[dimh];
  owidth = gradOutput->size[dimw];

  /* get raw pointers */
  gradInput_data = THTensor_(data)(gradInput);
  gradOutput_data = THTensor_(data)(gradOutput);
  indices_data = THTensor_(data)(indices);

  /* backprop */
  if (input->nDimension == 3)
  {
    nn_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
                                                         indices_data+nslices*owidth*oheight, indices_data,
                                                         nslices,
                                                         iwidth, iheight,
                                                         owidth, oheight);
  }
  else
  {
    long p;
#pragma omp parallel for private(p)
    for (p = 0; p < nbatch; p++)
    {
      nn_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
                                                           indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight,
                                                           nslices,
                                                           iwidth, iheight,
                                                           owidth, oheight);
    }
  }

  /* cleanup */
  THTensor_(free)(gradOutput);

  return 1;
}
Beispiel #28
0
static int nn_(VolumetricAveragePooling_updateGradInput)(lua_State *L) {
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
  int dT = luaT_getfieldcheckint(L, 1, "dT");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  int kT = luaT_getfieldcheckint(L, 1, "kT");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput",
                                                torch_Tensor);
  int nslices;
  int itime;
  int iheight;
  int iwidth;
  int otime;
  int oheight;
  int owidth;
  real *gradInput_data;
  real *gradOutput_data;

  int dimN = 0;
  int dimt = 1;
  int dimh = 2;
  int dimw = 3;

  /* get contiguous gradOutput */
  gradOutput = THTensor_(newContiguous)(gradOutput);

  /* resize */
  THTensor_(resizeAs)(gradInput, input);
  THTensor_(zero)(gradInput);

  if (input->nDimension == 5) {
    dimN++;
    dimt++;
    dimh++;
    dimw++;
  }

  /* sizes */
  nslices = input->size[dimN];
  itime = input->size[dimt];
  iheight = input->size[dimh];
  iwidth = input->size[dimw];
  otime = gradOutput->size[dimt];
  oheight = gradOutput->size[dimh];
  owidth = gradOutput->size[dimw];

  /* get raw pointers */
  gradInput_data = THTensor_(data)(gradInput);
  gradOutput_data = THTensor_(data)(gradOutput);

  /* backprop */
  if (input->nDimension == 4) { /* non-batch mode*/
    nn_(VolumetricAveragePooling_updateGradInput_frame)(
      gradInput_data, gradOutput_data, nslices,
      itime, iwidth, iheight, otime, owidth, oheight,
      kT, kW, kH, dT, dW, dH);
  } else { /* batch mode */
    long p;
    long nBatch = input->size[0];

    long istride = nslices * itime * iwidth * iheight;
    long ostride = nslices * otime * owidth * oheight;

#pragma omp parallel for private(p)
    for (p = 0; p < nBatch; p++) {
      nn_(VolumetricAveragePooling_updateGradInput_frame)(
        gradInput_data  + p * istride, gradOutput_data + p * ostride, nslices,
        itime, iwidth, iheight, otime, owidth, oheight,
        kT, kW, kH, dT, dW, dH);
    }
  }

  /* cleanup */
  THTensor_(free)(gradOutput);
  return 1;
}
Beispiel #29
0
static int nn_(SpatialConvolution_accGradParameters)(lua_State *L)
{
    THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
    THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
    real scale = luaL_optnumber(L, 4, 1);
    int dW = luaT_getfieldcheckint(L, 1, "dW");
    int dH = luaT_getfieldcheckint(L, 1, "dH");
    int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");

    THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
    THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);

    int dimw = 2;
    int dimh = 1;

    real *gradBias_data;
    real *gradOutput_data;
    long noutSlice;

    THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );

    if (input->nDimension == 4)
    {
        dimw++;
        dimh++;
    }

    /* gradient to bias */
    gradBias_data = THTensor_(data)(gradBias);
    gradOutput_data = THTensor_(data)(gradOutput);
    noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw];
    /*THTensor* gradOutSlice = THTensor_(new)();*/

    if (input->nDimension == 3)
    {
        long k;
        #pragma omp parallel for private(k)
        for(k = 0; k < nOutputPlane; k++)
        {
            /*THTensor_(select)(gradOutSlice, gradOutput, 0, k);*/
            real *ptr_gradOutput = gradOutput_data + k*noutSlice;
            long l;
            for(l = 0; l < noutSlice; l++)
                gradBias_data[k] += scale*ptr_gradOutput[l];
        }

        /* gradient to kernels */
        THTensor_(conv2DRevger)(gradWeight, 1.0, scale, input, gradOutput, dH, dW);
    }
    else
    {
        long k;
        #pragma omp parallel for private(k)
        for(k = 0; k < nOutputPlane; k++)
        {
            long p;
            for(p = 0; p < input->size[0]; p++)
            {
                /* BIAS */
                real *ptr_gradOutput = gradOutput_data + p*nOutputPlane*noutSlice + k*noutSlice;
                long l;
                for(l = 0; l < noutSlice; l++)
                    gradBias_data[k] += scale*ptr_gradOutput[l];
            }
        }
        /* gradient to kernels */
        THTensor_(conv2DRevgerm)(gradWeight, 1.0, scale, input, gradOutput, dH, dW);
    }
    return 0;
}
Beispiel #30
0
static int QPSolver_free(lua_State *L)
{
  SVQP2 *qp = (SVQP2*)luaT_checkudata(L, 1, QPSolver_id);
  delete qp;
  return 0;
}