void THCTensor_(copyAsyncCPU)(THCState *state, THCTensor *self, struct THTensor *src) { THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match"); THArgCheck(THCTensor_(isContiguous)(state, self), 2, "Target tensor must be contiguous"); THArgCheck(THTensor_(isContiguous)(src), 3, "Source tensor must be contiguous"); if (THCTensor_(nElement)(state, self) == 0) return; // Perform the copy wrt the current stream on the CudaTensor's device. int tensorDevice = THCTensor_(getDevice)(state, self); int currentDevice; THCudaCheck(cudaGetDevice(¤tDevice)); if (currentDevice != tensorDevice) { THCudaCheck(cudaSetDevice(tensorDevice)); } THCStream *stream = THCState_getStream(state); THCudaCheck(cudaMemcpyAsync(THCTensor_(data)(state, self), THTensor_(data)(src), THTensor_(nElement)(src) * sizeof(real), cudaMemcpyHostToDevice, stream->stream)); THCudaCheck(THCCachingHostAllocator_recordEvent(THStorage_(data)(src->storage), stream)); if (currentDevice != tensorDevice) { THCudaCheck(cudaSetDevice(currentDevice)); } }
void THTensor_(copyAsyncCuda)(THCState *state, THTensor *self, struct THCTensor *src) { THArgCheck(THTensor_(nElement)(self) == THCTensor_(nElement)(state, src), 2, "sizes do not match"); THArgCheck(THTensor_(isContiguous)(self), 2, "Target tensor must be contiguous"); THArgCheck(THCTensor_(isContiguous)(state, src), 3, "Source tensor must be contiguous"); if (THTensor_(nElement)(self) == 0) return; // Perform the copy wrt the current stream on the CudaTensor's device. int tensorDevice = THCTensor_(getDevice)(state, src); int currentDevice; THCudaCheck(cudaGetDevice(¤tDevice)); if (currentDevice != tensorDevice) { THCudaCheck(cudaSetDevice(tensorDevice)); } THCudaCheck(cudaMemcpyAsync(THTensor_(data)(self), THCTensor_(data)(state, src), THCTensor_(nElement)(state, src) * sizeof(real), cudaMemcpyDeviceToHost, THCState_getDeviceStream(state, tensorDevice, THCState_getCurrentStreamIndex(state)))); if (currentDevice != tensorDevice) { THCudaCheck(cudaSetDevice(currentDevice)); } }
THCTensor *THCSTensor_(newValues)(THCState *state, const THCSTensor *self) { if (self->nnz == 0) { THCTensor_(retain)(state, self->values); return self->values; } return THCTensor_(newNarrow)(state, self->values, 0, 0, self->nnz); }
void THCTensor_(copyCPU)(THCState *state, THCTensor *self, struct THTensor *src) { THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match"); { THCTensor *selfc = THCTensor_(newContiguous)(state, self); src = THTensor_(newContiguous)(src); THCudaCheck(cudaMemcpy(THCTensor_(data)(state,selfc), THTensor_(data)(src), THTensor_(nElement)(src) * sizeof(real), cudaMemcpyHostToDevice)); THTensor_(free)(src); THCTensor_(freeCopyTo)(state, selfc, self); } }
static int cutorch_Tensor_(copyAsyncCPU)(lua_State *L) { #define STRINGIFY_TENSOR(x) TH_CONCAT_STRING_3(torch.,x,Tensor) THCState *state = cutorch_getstate(L); THCTensor *tensor = luaT_checkudata(L, 1, STRINGIFY_TENSOR(CReal)); void *src; if( (src = luaT_toudata(L, 2, STRINGIFY_TENSOR(CReal)))) THCTensor_(copy)(state, tensor, src); else if( (src = luaT_toudata(L, 2, STRINGIFY_TENSOR(Real)))) THCTensor_(copyAsyncCPU)(state, tensor, src); else luaL_typerror(L, 2, STRINGIFY_TENSOR(Real) " or " STRINGIFY_TENSOR(CReal)); lua_settop(L, 1); return 1; #undef STRINGIFY_TENSOR }
void THCTensor_(copyCPU)(THCState *state, THCTensor *self, struct THTensor *src) { THArgCheck(THCTensor_(nElement)(state, self) == THTensor_(nElement)(src), 2, "sizes do not match"); { THCTensor *selfc = THCTensor_(newContiguous)(state, self); src = THTensor_(newContiguous)(src); cudaStream_t stream = THCState_getCurrentStream(state); THCudaCheck(cudaMemcpyAsync(THCTensor_(data)(state,selfc), THTensor_(data)(src), THTensor_(nElement)(src) * sizeof(real), cudaMemcpyHostToDevice, stream)); THCudaCheck(cudaStreamSynchronize(stream)); THTensor_(free)(src); THCTensor_(freeCopyTo)(state, selfc, self); } }
// directly assign without cloning or retaining (internal method) THCSTensor* THCSTensor_(_move)(THCState *state, THCSTensor *self, THCIndexTensor *indices, THCTensor *values) { int empty = THCTensor_(_nDimension)(state, values) == 0; if (!empty) { THArgCheck(THCIndexTensor_(_nDimension)(state, indices) == 2, 2, "indices must be nDim x nnz"); THArgCheck(THCIndexTensor_(size)(state, indices, 1) == THCTensor_(size)(state, values, 0), 2, "indices and values must have same nnz"); THArgCheck(THCIndexTensor_(size)(state, indices, 0) == self->nDimensionI, 2, "indices has incorrect first dimension, expected %d, got %d", self->nDimensionI, THCIndexTensor_(size)(state, indices, 0)); THArgCheck(THCTensor_(_nDimension)(state, values) == self->nDimensionV + 1, 3, "values has incorrect number of dimensions, expected %d, got %d", self->nDimensionV + 1, THCTensor_(_nDimension)(state, values)); } else { THArgCheck(THCIndexTensor_(_nDimension)(state, indices) == 0, 2, "if values is empty, indices must be empty too"); } THCIndexTensor_(free)(state, self->indices); THCTensor_(free)(state, self->values); self->indices = indices; self->values = values; self->nnz = empty ? 0 : THCTensor_(size)(state, values, 0); self->coalesced = 0; return self; }
void THCTensor_(copyCuda)(THCState *state, THCTensor *self, THCTensor *src) { THCTensor_(copy)(state, self, src); }
THCSTensor* THCSTensor_(_set)(THCState *state, THCSTensor *self, THCIndexTensor *indices, THCTensor *values) { // Note: Not like torch.set, this is an internal method return THCSTensor_(_move)(state, self, THCIndexTensor_(newClone)(state, indices), THCTensor_(newClone)(state, values)); }
/* now we overwrite some methods specific to CudaTensor */ static int cutorch_Tensor_(copy)(lua_State *L) { THCState *state = cutorch_getstate(L); THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); void *src; if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) ) THCTensor_(copyCudaFloat)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.CudaByteTensor")) ) THCTensor_(copyCudaByte)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.CudaCharTensor")) ) THCTensor_(copyCudaChar)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.CudaShortTensor")) ) THCTensor_(copyCudaShort)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.CudaIntTensor")) ) THCTensor_(copyCudaInt)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.CudaLongTensor")) ) THCTensor_(copyCudaLong)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.CudaDoubleTensor")) ) THCTensor_(copyCudaDouble)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.ByteTensor")) ) THCTensor_(copyByte)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.CharTensor")) ) THCTensor_(copyChar)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.ShortTensor")) ) THCTensor_(copyShort)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.IntTensor")) ) THCTensor_(copyInt)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.LongTensor")) ) THCTensor_(copyLong)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) ) THCTensor_(copyFloat)(state, tensor, src); else if( (src = luaT_toudata(L, 2, "torch.DoubleTensor")) ) THCTensor_(copyDouble)(state, tensor, src); else luaL_typerror(L, 2, "torch.*Tensor"); lua_settop(L, 1); return 1; }