Example #1
0
void THCTensor_(copyAsyncCPU)(THCState *state, THCTensor *self, struct THTensor *src)
{
  THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match");
  THArgCheck(THCTensor_(isContiguous)(state, self), 2, "Target tensor must be contiguous");
  THArgCheck(THTensor_(isContiguous)(src), 3, "Source tensor must be contiguous");

  if (THCTensor_(nElement)(state, self) == 0) return;

  // Perform the copy wrt the current stream on the CudaTensor's device.
  int tensorDevice = THCTensor_(getDevice)(state, self);
  int currentDevice;
  THCudaCheck(cudaGetDevice(&currentDevice));

  if (currentDevice != tensorDevice) {
    THCudaCheck(cudaSetDevice(tensorDevice));
  }

  THCStream *stream  = THCState_getStream(state);
  THCudaCheck(cudaMemcpyAsync(THCTensor_(data)(state, self),
                              THTensor_(data)(src),
                              THTensor_(nElement)(src) * sizeof(real),
                              cudaMemcpyHostToDevice,
                              stream->stream));

  THCudaCheck(THCCachingHostAllocator_recordEvent(THStorage_(data)(src->storage), stream));

  if (currentDevice != tensorDevice) {
    THCudaCheck(cudaSetDevice(currentDevice));
  }
}
void THTensor_(copyAsyncCuda)(THCState *state, THTensor *self, struct THCTensor *src)
{
  THArgCheck(THTensor_(nElement)(self) == THCTensor_(nElement)(state, src), 2, "sizes do not match");
  THArgCheck(THTensor_(isContiguous)(self), 2, "Target tensor must be contiguous");
  THArgCheck(THCTensor_(isContiguous)(state, src), 3, "Source tensor must be contiguous");

  if (THTensor_(nElement)(self) == 0) return;

  // Perform the copy wrt the current stream on the CudaTensor's device.
  int tensorDevice = THCTensor_(getDevice)(state, src);
  int currentDevice;
  THCudaCheck(cudaGetDevice(&currentDevice));

  if (currentDevice != tensorDevice) {
    THCudaCheck(cudaSetDevice(tensorDevice));
  }

  THCudaCheck(cudaMemcpyAsync(THTensor_(data)(self),
                              THCTensor_(data)(state, src),
                              THCTensor_(nElement)(state, src) * sizeof(real),
                              cudaMemcpyDeviceToHost,
                              THCState_getDeviceStream(state, tensorDevice,
                                                       THCState_getCurrentStreamIndex(state))));

  if (currentDevice != tensorDevice) {
    THCudaCheck(cudaSetDevice(currentDevice));
  }
}
Example #3
0
THCTensor *THCSTensor_(newValues)(THCState *state, const THCSTensor *self) {
  if (self->nnz == 0) {
    THCTensor_(retain)(state, self->values);
    return self->values;
  }
  return THCTensor_(newNarrow)(state, self->values, 0, 0, self->nnz);
}
void THCTensor_(copyCPU)(THCState *state, THCTensor *self, struct THTensor *src)
{
  THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match");

  {
    THCTensor *selfc = THCTensor_(newContiguous)(state, self);
    src = THTensor_(newContiguous)(src);

    THCudaCheck(cudaMemcpy(THCTensor_(data)(state,selfc),
                           THTensor_(data)(src),
                           THTensor_(nElement)(src) * sizeof(real),
                           cudaMemcpyHostToDevice));

    THTensor_(free)(src);
    THCTensor_(freeCopyTo)(state, selfc, self);
  }
}
Example #5
0
static int cutorch_Tensor_(copyAsyncCPU)(lua_State *L)
{
#define STRINGIFY_TENSOR(x) TH_CONCAT_STRING_3(torch.,x,Tensor)
  THCState *state = cutorch_getstate(L);
  THCTensor *tensor = luaT_checkudata(L, 1, STRINGIFY_TENSOR(CReal));
  void *src;
  if( (src = luaT_toudata(L, 2, STRINGIFY_TENSOR(CReal))))
    THCTensor_(copy)(state, tensor, src);
  else if( (src = luaT_toudata(L, 2, STRINGIFY_TENSOR(Real))))
    THCTensor_(copyAsyncCPU)(state, tensor, src);
  else
    luaL_typerror(L, 2, STRINGIFY_TENSOR(Real) " or " STRINGIFY_TENSOR(CReal));

  lua_settop(L, 1);
  return 1;
#undef STRINGIFY_TENSOR
}
Example #6
0
void THCTensor_(copyCPU)(THCState *state, THCTensor *self, struct THTensor *src)
{
  THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match");

  {
    THCTensor *selfc = THCTensor_(newContiguous)(state, self);
    src = THTensor_(newContiguous)(src);

    cudaStream_t stream = THCState_getCurrentStream(state);
    THCudaCheck(cudaMemcpyAsync(THCTensor_(data)(state,selfc),
                                THTensor_(data)(src),
                                THTensor_(nElement)(src) * sizeof(real),
                                cudaMemcpyHostToDevice,
                                stream));
    THCudaCheck(cudaStreamSynchronize(stream));

    THTensor_(free)(src);
    THCTensor_(freeCopyTo)(state, selfc, self);
  }
}
Example #7
0
// directly assign without cloning or retaining (internal method)
THCSTensor* THCSTensor_(_move)(THCState *state, THCSTensor *self, THCIndexTensor *indices, THCTensor *values) {
  int empty = THCTensor_(_nDimension)(state, values) == 0;
  if (!empty) {
    THArgCheck(THCIndexTensor_(_nDimension)(state, indices) == 2, 2,
        "indices must be nDim x nnz");
    THArgCheck(THCIndexTensor_(size)(state, indices, 1) == THCTensor_(size)(state, values, 0), 2,
        "indices and values must have same nnz");
    THArgCheck(THCIndexTensor_(size)(state, indices, 0) == self->nDimensionI, 2,
        "indices has incorrect first dimension, expected %d, got %d", self->nDimensionI, THCIndexTensor_(size)(state, indices, 0));
    THArgCheck(THCTensor_(_nDimension)(state, values) == self->nDimensionV + 1, 3,
        "values has incorrect number of dimensions, expected %d, got %d", self->nDimensionV + 1, THCTensor_(_nDimension)(state, values));
  } else {
    THArgCheck(THCIndexTensor_(_nDimension)(state, indices) == 0, 2,
        "if values is empty, indices must be empty too");
  }
  THCIndexTensor_(free)(state, self->indices);
  THCTensor_(free)(state, self->values);
  self->indices = indices;
  self->values = values;
  self->nnz = empty ? 0 : THCTensor_(size)(state, values, 0);
  self->coalesced = 0;

  return self;
}
void THCTensor_(copyCuda)(THCState *state, THCTensor *self, THCTensor *src)
{
  THCTensor_(copy)(state, self, src);
}
Example #9
0
THCSTensor* THCSTensor_(_set)(THCState *state, THCSTensor *self, THCIndexTensor *indices, THCTensor *values) {
  // Note: Not like torch.set, this is an internal method
  return THCSTensor_(_move)(state, self, THCIndexTensor_(newClone)(state, indices), THCTensor_(newClone)(state, values));
}
Example #10
0
/* now we overwrite some methods specific to CudaTensor */
static int cutorch_Tensor_(copy)(lua_State *L)
{
  THCState *state = cutorch_getstate(L);
  THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor);
  void *src;
  if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) )
    THCTensor_(copyCudaFloat)(state, tensor, src);
  else if( (src = luaT_toudata(L, 2, "torch.CudaByteTensor")) )
    THCTensor_(copyCudaByte)(state, tensor, src);
  else if( (src = luaT_toudata(L, 2, "torch.CudaCharTensor")) )
    THCTensor_(copyCudaChar)(state, tensor, src);
  else if( (src = luaT_toudata(L, 2, "torch.CudaShortTensor")) )
    THCTensor_(copyCudaShort)(state, tensor, src);
  else if( (src = luaT_toudata(L, 2, "torch.CudaIntTensor")) )
    THCTensor_(copyCudaInt)(state, tensor, src);
  else if( (src = luaT_toudata(L, 2, "torch.CudaLongTensor")) )
    THCTensor_(copyCudaLong)(state, tensor, src);
  else if( (src = luaT_toudata(L, 2, "torch.CudaDoubleTensor")) )
    THCTensor_(copyCudaDouble)(state, tensor, src);

  else if( (src = luaT_toudata(L, 2, "torch.ByteTensor")) )
    THCTensor_(copyByte)(state, tensor, src);
  else if( (src = luaT_toudata(L, 2, "torch.CharTensor")) )
    THCTensor_(copyChar)(state, tensor, src);
  else if( (src = luaT_toudata(L, 2, "torch.ShortTensor")) )
    THCTensor_(copyShort)(state, tensor, src);
  else if( (src = luaT_toudata(L, 2, "torch.IntTensor")) )
    THCTensor_(copyInt)(state, tensor, src);
  else if( (src = luaT_toudata(L, 2, "torch.LongTensor")) )
    THCTensor_(copyLong)(state, tensor, src);
  else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) )
    THCTensor_(copyFloat)(state, tensor, src);
  else if( (src = luaT_toudata(L, 2, "torch.DoubleTensor")) )
    THCTensor_(copyDouble)(state, tensor, src);
  else
    luaL_typerror(L, 2, "torch.*Tensor");

  lua_settop(L, 1);
  return 1;
}