Пример #1
0
static int nn_(Min_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1;
  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);

  THLongStorage *dim;
  long i;

  luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range");

  dim = THLongStorage_newWithSize(input->nDimension);
  for(i = 0; i < input->nDimension; i++)
    dim->data[i] = input->size[i];
  dim->data[dimension] = 1;
  THTensor_(resize)(output, dim, NULL);
  THTensor_(resize)(indices, dim, NULL);
  THLongStorage_free(dim);

  TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension,
                       long theIndex = 0;
                       real theMin = input_data[0];
                       for(i = 1; i < input_size; i++)
                       {
                         if(input_data[i*input_stride] < theMin)
                         {
                           theIndex = i;
                           theMin = input_data[i*input_stride];
                         }
                       }
                       *indices_data = theIndex+1;
                       *output_data = theMin;)

  if(output->nDimension > 1)
Пример #2
0
void THLab_(max)(THTensor *values_, THLongTensor *indices_, THTensor *t, int dimension)
{
  THLongStorage *dim;
  long i;

  THArgCheck(dimension >= 0 && dimension < THTensor_(nDimension)(t), 2, "dimension out of range");

  dim = THTensor_(newSizeOf)(t);
  THLongStorage_set(dim, dimension, 1);
  THTensor_(resize)(values_, dim);
  THLongTensor_resize(indices_, dim);
  THLongStorage_free(dim);

  TH_TENSOR_DIM_APPLY3(real, Real, t, real, Real, values_, long, Long, indices_, dimension,
                       long theIndex = 0;
                       real theMax = t_data[0];
                       for(i = 1; i < t_size; i++)
                       {
                         if(t_data[i*t_stride] > theMax)
                         {
                           theIndex = i;
                           theMax = t_data[i*t_stride];
                         }
                       }
                       *indices__data = theIndex;
                       *values__data = theMax;);  
Пример #3
0
/* helpful functions */
static void torch_(Tensor_c_readSize)(lua_State *L, int index, THLongStorage **size_)
{
  THLongStorage *size = NULL;
  long i;

  if( (size = luaT_toudata(L, index, torch_LongStorage_id)) )
  {
    THLongStorage_retain(size);
    *size_ = size;
  }
  else
  {
    size = THLongStorage_newWithSize(4);
    for(i = 0; i < 4; i++)
    {
      if(lua_isnone(L, index+i))
        THLongStorage_set(size, i, 0);
      else
      {
        if(lua_isnumber(L, index+i))
          THLongStorage_set(size, i, lua_tonumber(L, index+i));
        else
        {
          THLongStorage_free(size);
          luaL_error(L, "invalid argument %d: number expected", index+i);
        }
      }
    }
    *size_ = size;
  }
}
Пример #4
0
THLongStorage* lab_checklongargs(lua_State *L, int index)
{
  THLongStorage *storage;
  int i;
  int narg = lua_gettop(L)-index+1;

  if(narg == 1 && luaT_toudata(L, index, torch_LongStorage_id))
  {
    THLongStorage *storagesrc = luaT_toudata(L, index, torch_LongStorage_id);
    storage = THLongStorage_newWithSize(storagesrc->size);
    THLongStorage_copy(storage, storagesrc);
  }
  else
  {
    storage = THLongStorage_newWithSize(narg);
    for(i = index; i < index+narg; i++)
    {
      if(!lua_isnumber(L, i))
      {
        THLongStorage_free(storage);
        luaL_argerror(L, i, "number expected");
      }
      storage->data[i-index] = lua_tonumber(L, i);
    }
  }
  return storage;
}
Пример #5
0
void THNN_(GatedLinear_updateOutput)(
          THNNState *state,
          THTensor *input,
          THTensor *output,
          int dim)
{
  // size output to half of input
  dim = dim - TH_INDEX_BASE;
  const int64_t nIn = THTensor_(size)(input, dim);
  THArgCheck(nIn % 2 == 0, 2, "Halving dimension must be even. Dim %d is size %ld",
      dim + TH_INDEX_BASE, nIn);

  const int64_t inputSize = THTensor_(size)(input, dim) / 2;
  THLongStorage *newSizes = THTensor_(newSizeOf)(input);
  THLongStorage_set(newSizes, dim, inputSize);
  THTensor_(resize)(output, newSizes, NULL);

  // halve tensor
  THTensor *firstHalf = THTensor_(newNarrow)(input, dim, 0, inputSize);
  THTensor *secondHalf = THTensor_(newNarrow)(input, dim, inputSize, inputSize);

  // x = x1:cmul( sigmoid(x2) )
  THTensor_(sigmoid)(output, secondHalf);
  THTensor_(cmul)(output, output, firstHalf);

  THLongStorage_free(newSizes);
  THTensor_(free)(firstHalf);
  THTensor_(free)(secondHalf);
}
Пример #6
0
static void tensorRandn(rpc::RPCMessage& raw_message) {
  thpp::Tensor *r = unpackRetrieveTensor(raw_message);
  thpp::Generator *_generator = unpackRetrieveGenerator(raw_message);
  THLongStorage *size = unpackTHLongStorage(raw_message);
  finalize(raw_message);
  r->randn(*_generator, size);
  THLongStorage_free(size);
}
Пример #7
0
static int torch_(Tensor_resize)(lua_State *L)
{
  THTensor *tensor = luaT_checkudata(L, 1, torch_(Tensor_id));
  THLongStorage *size;

  torch_(Tensor_c_readSize)(L, 2, &size);

  THTensor_(resize)(tensor, size);
  THLongStorage_free(size);

  lua_settop(L, 1);
  return 1;
}
Пример #8
0
static int torch_(Tensor_new)(lua_State *L)
{
  THTensor *tensor;
  THStorage *storage = NULL;
  long storageOffset = 0;
  THLongStorage *size = NULL;

  torch_(Tensor_c_readTensorStorageSize)(L, 1, 1, 1, 1,
                                               &storage, &storageOffset, &size);

  tensor = THTensor_(newWithStorage)(storage, storageOffset, size);
  THLongStorage_free(size);

  luaT_pushudata(L, tensor, torch_(Tensor_id));
  return 1;
}
Пример #9
0
/*
 * Based on the implementation of the THTensor_(indexSelect) in torch7
 */
static void THCudaTensor_indexSelect(THCudaTensor *tensor, THCudaTensor *src, int dim, THLongTensor *index)
{
  long i, numel;
  THLongStorage *newSize;
  THCudaTensor *tSlice, *sSlice;
  long *index_data;

  THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector");
  THArgCheck(dim < src->nDimension,4,"Indexing dim is out of bounds");
  THArgCheck(src->nDimension > 0,2,"Source tensor is empty");

  numel = THLongTensor_nElement(index);

  newSize = THLongStorage_newWithSize(src->nDimension);
  THLongStorage_rawCopy(newSize,src->size);
  newSize->data[dim] = numel;
  THCudaTensor_resize(tensor,newSize,NULL);
  THLongStorage_free(newSize);

  index = THLongTensor_newContiguous(index);
  index_data = THLongTensor_data(index);
  for (i=0; i<numel; i++)
  {
    if (src->nDimension > 1)
    {
      tSlice = THCudaTensor_new();
      sSlice = THCudaTensor_new();
      THCudaTensor_select(tSlice, tensor, dim, i);
      THCudaTensor_select(sSlice, src, dim, index_data[i]-1);
      THCudaTensor_copy(tSlice, sSlice);
      THCudaTensor_free(tSlice);
      THCudaTensor_free(sSlice);
    }
    else
    { // It's faster to copy a float from an address in the device to another address in the device than 
      // retrieving it to the host memory and recopy it to the device memory
      THCudaCheck(cudaMemcpy(tensor->storage->data + tensor->storageOffset + i,\
        src->storage->data + src->storageOffset + index_data[i]-1, sizeof(float), cudaMemcpyDeviceToDevice));
    }
  }
  THLongTensor_free(index);
}
Пример #10
0
void translate_rotate(THDoubleTensor *result,
                      THDoubleTensor *trans,
                      THDoubleTensor *quat,
                      THDoubleTensor *vect
                      )
{
  long outDimension      = quat->nDimension + vect->nDimension -1;
  THLongStorage *newSize = THLongStorage_newWithSize(outDimension);
  long *sd               = THLongStorage_data(newSize);

  long offset = 0;
  long quatStride   = quat->size[quat->nDimension-1];
  long transStride  = trans->size[trans->nDimension-1];
  long vectStride   = vect->size[vect->nDimension-1];
  long nElementQuat = THDoubleTensor_nElement(quat);
  long nElementVect = THDoubleTensor_nElement(vect);
  long nQuat        = nElementQuat / quatStride;
  long nTrans       = THDoubleTensor_nElement(trans) / transStride;

  long i,j;

  THArgCheck(nTrans == nQuat,                           2,
             "Different number of translations and rotations");
  THArgCheck(((transStride == 3) || (transStride == 4)),2,
             "translation vectors should be of length 3 or 4");
  THArgCheck(quatStride == 4,                           3,
             "quaternion is a vector of length 4");
  THArgCheck(((vectStride == 3) || (vectStride == 4)),  4,
             "point vectors should be of length 3 or 4");

  for (i = 0 ; i < quat->nDimension-1 ; i++){
    sd[offset] = quat->size[i];
    offset += 1;
  }

  for (i = 0 ; i < vect->nDimension-1 ; i++){
    sd[offset] = vect->size[i];
    offset += 1;
  }

  sd[offset] = vectStride;

  THDoubleTensor_resize(result, newSize, NULL);
  if (vectStride == 4) // incase homogenous coordinates are requested
    THDoubleTensor_fill(result,1);
  THLongStorage_free(newSize);

  double *res = THDoubleTensor_data(result);
  double *q   = THDoubleTensor_data(quat);
  double *t   = THDoubleTensor_data(trans);
  double *v   = THDoubleTensor_data(vect);

  double x1, y1, z1;


  for (j = 0; j < nElementQuat; j += quatStride)
    {
#pragma omp parallel for private(i,x1,y1,z1)
      for (i = 0; i < nElementVect; i += vectStride)
        {
          res[i]   =   v[i] + t[0];
          res[i+1] = v[i+1] + t[1];
          res[i+2] = v[i+2] + t[2];

          x1 = q[1]*res[i+2] - q[2]*res[i+1];
          y1 = q[2]*res[i]   - q[0]*res[i+2];
          z1 = q[0]*res[i+1] - q[1]*res[i];

          res[i]   += 2 * (q[3]*x1 + q[1]*z1 - q[2]*y1);
          res[i+1] += 2 * (q[3]*y1 + q[2]*x1 - q[0]*z1);
          res[i+2] += 2 * (q[3]*z1 + q[0]*y1 - q[1]*x1);
        }
      q   += quatStride;
      t   += transStride;
      res += nElementVect;
    }
}
Пример #11
0
void rotate_by_quat(THDoubleTensor *result,
                    THDoubleTensor *quat,
                    THDoubleTensor *vect
                    )
{
  long outDimension      = quat->nDimension + vect->nDimension -1;
  THLongStorage *newSize = THLongStorage_newWithSize(outDimension);
  long *sd               = THLongStorage_data(newSize);

  long offset         = 0;
  // TODO look at torch.min() or torch.max() to allow vector in any dimension.
  // which dimension contains quat or vect (default to NxD)
  char DHW            = 0;
  long quatDim        = quat->nDimension-1;
  long vectDim        = vect->nDimension-1;
  long quatSize       = quat->size[quatDim]; // == 4
  long vectSize       = vect->size[vectDim]; // == 3 or 4
  long nElementQuat   = THDoubleTensor_nElement(quat);
  long nElementVect   = THDoubleTensor_nElement(vect);
  // step to get to next dimension
  long quatDimStride  = 1;
  long vectDimStride  = 1;
  // step to get to next element
  long quatElemStride = quatSize;
  long vectElemStride = vectSize;
 
  long i,j;

  // check for DxN
  // quaternions and vectors are either Nx3,4 or 3,4 x N but must be consistent.
  if ((quatSize != 4) || ((vectSize != 3) && vectSize != 4)) {
    vectDim        = 0; // test DxN
    quatDim        = 0;
    quatSize       = quat->size[vectDim];
    vectSize       = vect->size[quatDim];
    quatElemStride = 1;
    vectElemStride = 1;
    quatDimStride  = quat->stride[vectDim];
    vectDimStride  = vect->stride[quatDim];
    DHW = 1;
  }

  THArgCheck(quatSize == 4,                         2,
             "quaternion is a vector of length 4");
  THArgCheck(((vectSize == 3) || (vectSize == 4)),3,
             "point vectors should be of length 3 or 4");

  long n_vect = nElementVect / vectSize;
  long n_quat = nElementQuat / quatSize;

  // get dimensions for the output
  long start    = 0;
  long quat_end = quat->nDimension-1;
  long vect_end = vect->nDimension-1;
  if (DHW > 0) {
    start++;
    quat_end++;
    vect_end++;
  }
  // quaternion dimensions
  for (i = start ; i < quat_end ; i++){
    sd[offset] = quat->size[i];
    offset += 1;
  }

  if (DHW > 0) {
    // output nquat x 3,4 x nvect
    sd[offset] = vectSize;
    offset += 1;
  }

  // vector dimensions
  for (i = start ; i < vect_end ; i++){
    sd[offset] = vect->size[i];
    offset += 1;
  }

  if (DHW==0) {
    // output nquat x nvect x 3
    sd[offset] = vectSize;
    offset += 1;
  }

  // resize the output
  THDoubleTensor_resize(result, newSize, NULL);

  if (vectSize == 4) // incase homogenous coordinates are requested
    THDoubleTensor_fill(result,1);
  THLongStorage_free(newSize);

  double *res = THDoubleTensor_data(result);
  double *q   = THDoubleTensor_data(quat);
  double *v   = THDoubleTensor_data(vect);

  double x1, y1, z1;
  // how to step through the result
  long resDimStride  = result->stride[outDimension-1];
  long resElemStride = vectSize;
  long resQuatStride = 0;

  if (DHW>0) {
    resDimStride  = result->stride[quat->nDimension-1];
    resElemStride = result->stride[outDimension-1];
    
    if (n_quat > 1) {
      resQuatStride = result->stride[0] - resDimStride;
    }
  }
  double * qres = res;
  double * res0 = res;
  double * res1 = res0 + resDimStride;
  double * res2 = res1 + resDimStride;

  double * q0 = q;
  double * q1 = q0+quatDimStride;
  double * q2 = q1+quatDimStride;
  double * q3 = q2+quatDimStride;
  for (j = 0; j < n_quat; j++)
    {
      double * v0 = v;
      double * v1 = v0+vectDimStride;
      double * v2 = v1+vectDimStride;
#pragma omp parallel for private(i,x1,y1,z1)
      for (i = 0; i < n_vect; i++)
        {
          x1 = (*q1)*(*v2) - (*q2)*(*v1);
          y1 = (*q2)*(*v0) - (*q0)*(*v2);
          z1 = (*q0)*(*v1) - (*q1)*(*v0);

          (*res0) = (*v0) + 2 * ((*q3)*x1 + (*q1)*z1 - (*q2)*y1);
          (*res1) = (*v1) + 2 * ((*q3)*y1 + (*q2)*x1 - (*q0)*z1);
          (*res2) = (*v2) + 2 * ((*q3)*z1 + (*q0)*y1 - (*q1)*x1);

          v0+=vectElemStride;  v1+=vectElemStride;  v2+=vectElemStride;
          res0+=resElemStride; res1+=resElemStride; res2+=resElemStride;
      }
      q0+=quatElemStride; q1+=quatElemStride; q2+=quatElemStride; q3+=quatElemStride;
      // facilitate nquats x 3 x nvect output
      res0 = res0 + resQuatStride;
      res1 = res0 + resDimStride;
      res2 = res1 + resDimStride;
    }
}
Пример #12
0
static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L)
{
  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
  int kT = luaT_getfieldcheckint(L, 1, "kT");
  int kW = luaT_getfieldcheckint(L, 1, "kW");
  int kH = luaT_getfieldcheckint(L, 1, "kH");
  int dT = luaT_getfieldcheckint(L, 1, "dT");
  int dW = luaT_getfieldcheckint(L, 1, "dW");
  int dH = luaT_getfieldcheckint(L, 1, "dH");
  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
  long nslices;
  long itime;
  long iheight;
  long iwidth;
  long otime;
  long oheight;
  long owidth;
  real *input_data;
  real *output_data;
  real *indices_data;

  luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch-mode) tensor expected");

  int dimN = 0;
  int dimt = 1;
  int dimh = 2;
  int dimw = 3;

  if (input->nDimension == 5) {
    dimN++;
    dimt++;
    dimh++;
    dimw++;
  }

  luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, "input image smaller than kernel size");

  /* sizes */
  nslices = input->size[dimN];
  itime = input->size[dimt];
  iheight = input->size[dimh];
  iwidth = input->size[dimw];
  otime = (itime - kT) / dT + 1;
  oheight = (iheight - kH) / dH + 1;
  owidth = (iwidth - kW) / dW + 1;

  /* get contiguous input */
  input = THTensor_(newContiguous)(input);

  if (input->nDimension == 4) { /* non-batch mode */
    /* resize output */
    THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
    /* indices will contain ti,i,j locations for each output point */
    THTensor_(resize5d)(indices, 3, nslices, otime, oheight, owidth);
    
    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THTensor_(data)(indices);
    
    nn_(VolumetricMaxPooling_updateOutput_frame)(input_data, output_data,
  					       indices_data+nslices*otime*owidth*oheight*2, 
  					       indices_data+nslices*otime*owidth*oheight, 
  					       indices_data,
  					       nslices,
  					       itime, iwidth, iheight,
  					       otime, owidth, oheight,
  					       kT, kW, kH, dT, dW, dH);
  }
  else { /* batch mode */
    long p;
    long nBatch = input->size[0];

    long istride = nslices*itime*iwidth*iheight;
    long ostride = nslices*otime*owidth*oheight;

    /* resize output */
    THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
    /* indices will contain ti,i,j locations for each output point */

    THLongStorage* size = THLongStorage_newWithSize(6);
    size->data[0] = 3; size->data[1] = nBatch;
    size->data[2] = nslices; size->data[3] = otime; 
    size->data[4] = oheight; size->data[5] = owidth;
    THTensor_(resize)(indices, size, NULL); /* resize6d not available */
    //TODO: Replace with resize6d when available
    //THTensor_(resize6d)(indices, 3, nBatch, nslices, otime, oheight, owidth);

    input_data = THTensor_(data)(input);
    output_data = THTensor_(data)(output);
    indices_data = THTensor_(data)(indices);

#pragma omp parallel for private(p)
    for (p=0; p < nBatch; p++) 
    {
      nn_(VolumetricMaxPooling_updateOutput_frame)(
                    input_data+p*istride, 
                    output_data+p*ostride,
                    indices_data+(p+nBatch+nBatch)*ostride, 
                    indices_data+(p+nBatch)*ostride, 
                    indices_data+p*ostride,
                    nslices,
                    itime, iwidth, iheight,
                    otime, owidth, oheight,
                    kT, kW, kH, dT, dW, dH);
    }

    THLongStorage_free(size);
  }

  /* cleanup */
  THTensor_(free)(input);
  return 1;
}