Beispiel #1
0
/*
 * Based on the implementation of the THTensor_(indexCopy) in torch7
 */
static void THCudaTensor_indexCopy(THCudaTensor *tensor, int dim, THLongTensor *index, THCudaTensor *src)
{
  long i, numel;
  THCudaTensor *tSlice, *sSlice;
  long *index_data;

  numel = THLongTensor_nElement(index);
  THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector");
  THArgCheck(dim < src->nDimension,4,"Indexing dim is out of bounds");

  index = THLongTensor_newContiguous(index);
  index_data = THLongTensor_data(index);

  for (i=0; i<numel; i++)
  {
    if (tensor->nDimension > 1 )
    {
      tSlice = THCudaTensor_new();
      sSlice = THCudaTensor_new();
      THCudaTensor_select(tSlice, tensor, dim, index_data[i]-1);
      THCudaTensor_select(sSlice, src, dim, i);
      THCudaTensor_copy(tSlice, sSlice);
      THCudaTensor_free(tSlice);
      THCudaTensor_free(sSlice);
    }
    else
    {
      // It's faster to copy a float from an address in the device to another address in the device than 
      // retrieving it to the host memory and recopy it to the device memory
      THCudaCheck(cudaMemcpy(tensor->storage->data + tensor->storageOffset + index_data[i]-1,\
        src->storage->data + src->storageOffset + i, sizeof(float), cudaMemcpyDeviceToDevice));
    }
  }
  THLongTensor_free(index);
}
Beispiel #2
0
THCudaTensor *THCudaTensor_newClone(THCState *state, THCudaTensor *self)
{
  THCudaTensor *tensor = THCudaTensor_new(state);
  THCudaTensor_resizeAs(state, tensor, self);
  THCudaTensor_copy(state, tensor, self);
  return tensor;
}
Beispiel #3
0
/*
 * Based on the implementation of the THTensor_(indexCopy) in torch7
 */
static void THCudaTensor_indexFill(THCudaTensor *tensor, int dim, THLongTensor *index, float val)
{
  long i, numel;
  THCudaTensor *tSlice;
  long *index_data;

  numel = THLongTensor_nElement(index);
  THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector");
  THArgCheck(dim < tensor->nDimension,4,"Indexing dim is out of bounds");

  index = THLongTensor_newContiguous(index);
  index_data = THLongTensor_data(index);
  
  for (i=0; i<numel; i++)
  {
    if (tensor->nDimension > 1 )
    {
      // create a new CudaTensor
      tSlice = THCudaTensor_new();
      // set its storage to point to the corresponding storage in tensor
      THCudaTensor_select(tSlice, tensor,dim,index_data[i]-1);
      THCudaTensor_fill(tSlice, val);
      THCudaTensor_free(tSlice);
    }
    else
    {
      THCudaTensor_set1d(tensor,index_data[i]-1,val);
    }
  }
  THLongTensor_free(index);
}
Beispiel #4
0
static int cutorch_CudaTensorOperator___sub__(lua_State *L)
{
  THCudaTensor *tensor1 = luaT_toudata(L, 1, "torch.CudaTensor");
  THCudaTensor *tensor2 = luaT_toudata(L, 2, "torch.CudaTensor");
  THCudaTensor *r;

  if(!tensor1 && !tensor2)
    luaL_error(L, "expecting two Tensors or one Tensor and one number");
  else
  {
    r = THCudaTensor_new();
    luaT_pushudata(L, r, "torch.CudaTensor");

    if(!tensor1 && tensor2)
    {
      THCudaTensor_resizeAs(r, tensor2);
      THCudaTensor_fill(r, luaL_checknumber(L, 1));
      THCudaTensor_cadd(r, r, -1, tensor2);
    }
    else if(tensor1 && !tensor2)
    {
      THCudaTensor_resizeAs(r, tensor1);
      THCudaTensor_copy(r, tensor1);
      THCudaTensor_add(r, r, -luaL_checknumber(L, 2));
    }
    else
    {
      THCudaTensor_resizeAs(r, tensor1);
      THCudaTensor_copy(r, tensor1);
      THCudaTensor_cadd(r, r, -1, tensor2);
    }
  }
  return 1;
}
Beispiel #5
0
static int cutorch_CudaTensorOperator___unm__(lua_State *L)
{
  THCudaTensor *tensor = luaT_checkudata(L, 1, "torch.CudaTensor");
  THCudaTensor *r;

  r = THCudaTensor_new();
  luaT_pushudata(L, r, "torch.CudaTensor");
  THCudaTensor_resizeAs(r, tensor);
  THCudaTensor_copy(r, tensor);
  THCudaTensor_mul(r, r, -1);

  return 1;
}
Beispiel #6
0
TensorWrapper::TensorWrapper(cuda::GpuMat & mat, THCState *state) {

    this->definedInLua = false;

    if (mat.empty()) {
        this->typeCode = CV_CUDA;
        this->tensorPtr = nullptr;
        return;
    }

    this->typeCode = CV_CUDA;

    THCudaTensor *outputPtr = THCudaTensor_new(state);

    // Build new storage on top of the Mat
    outputPtr->storage = THCudaStorage_newWithData(
            state,
            reinterpret_cast<float *>(mat.data),
            mat.step * mat.rows * mat.channels() / cv::getElemSize(mat.depth())
    );

    int sizeMultiplier;
    if (mat.channels() == 1) {
        outputPtr->nDimension = 2;
        sizeMultiplier = cv::getElemSize(mat.depth());
    } else {
        outputPtr->nDimension = 3;
        sizeMultiplier = mat.elemSize1();
    }

    outputPtr->size = static_cast<long *>(THAlloc(sizeof(long) * outputPtr->nDimension));
    outputPtr->stride = static_cast<long *>(THAlloc(sizeof(long) * outputPtr->nDimension));

    if (mat.channels() > 1) {
        outputPtr->size[2] = mat.channels();
        outputPtr->stride[2] = 1;
    }

    outputPtr->size[0] = mat.rows;
    outputPtr->size[1] = mat.cols;

    outputPtr->stride[0] = mat.step / sizeMultiplier;
    outputPtr->stride[1] = mat.channels();

    outputPtr->storageOffset = 0;

    // Make OpenCV treat underlying data as user-allocated
    mat.refcount = nullptr;

    this->tensorPtr = reinterpret_cast<THByteTensor *>(outputPtr);
}
Beispiel #7
0
static int cutorch_CudaTensorOperator___mul__(lua_State *L)
{
  THCudaTensor *tensor1 = luaT_toudata(L, 1, "torch.CudaTensor");
  THCudaTensor *tensor2 = luaT_toudata(L, 2, "torch.CudaTensor");
  THCudaTensor *r;

  if(!tensor1 && !tensor2)
    luaL_error(L, "expecting two Tensors or one Tensor and one number");
  else
  {
    r = THCudaTensor_new();
    luaT_pushudata(L, r, "torch.CudaTensor");

    if(!tensor1 && tensor2)
    {
      THCudaTensor_resizeAs(r, tensor2);
      THCudaTensor_copy(r, tensor2);
      THCudaTensor_mul(r, r, luaL_checknumber(L, 1));
    }
    else if(tensor1 && !tensor2)
    {
      THCudaTensor_resizeAs(r, tensor1);
      THCudaTensor_copy(r, tensor1);
      THCudaTensor_mul(r, r, luaL_checknumber(L, 2));
    }
    else
    {
      int dimt = tensor1->nDimension;
      int dims = tensor2->nDimension;

      if(dimt == 1 && dims == 1)
        lua_pushnumber(L, THCudaTensor_dot(tensor1, tensor2)); /* ok, we wasted r, but who cares */
      else if(dimt == 2 && dims == 1)
      {
        THCudaTensor_resize1d(r, tensor1->size[0]);
        THCudaTensor_zero(r);
        THCudaTensor_addmv(r, 1, r, 1, tensor1, tensor2);
      }
      else if(dimt == 2 && dims == 2)
      {
        THCudaTensor_resize2d(r, tensor1->size[0], tensor2->size[1]);
        THCudaTensor_zero(r);
        THCudaTensor_addmm(r, 1, r, 1, tensor1, tensor2);
      }
      else
        luaL_error(L, "multiplication between %dD and %dD tensors not yet supported", tensor1->nDimension, tensor2->nDimension);
    }
  }
  return 1;
}
Beispiel #8
0
static int cutorch_CudaTensorOperator___div__(lua_State *L)
{
  THCudaTensor *tensor = luaT_checkudata(L, 1, "torch.CudaTensor");
  THCudaTensor *r;

  luaL_argcheck(L, lua_isnumber(L,2), 2, "number expected");

  r = THCudaTensor_new();
  luaT_pushudata(L, r, "torch.CudaTensor");

  THCudaTensor_resizeAs(r, tensor);
  THCudaTensor_copy(r, tensor);
  THCudaTensor_mul(r, r, 1/lua_tonumber(L, 2));

  return 1;
}