void THTensor_(copyAsyncCuda)(THCState *state, THTensor *self, struct THCTensor *src) { THArgCheck(THTensor_(nElement)(self) == THCTensor_(nElement)(state, src), 2, "sizes do not match"); THArgCheck(THTensor_(isContiguous)(self), 2, "Target tensor must be contiguous"); THArgCheck(THCTensor_(isContiguous)(state, src), 3, "Source tensor must be contiguous"); if (THTensor_(nElement)(self) == 0) return; // Perform the copy wrt the current stream on the CudaTensor's device. int tensorDevice = THCTensor_(getDevice)(state, src); int currentDevice; THCudaCheck(cudaGetDevice(¤tDevice)); if (currentDevice != tensorDevice) { THCudaCheck(cudaSetDevice(tensorDevice)); } THCStream *stream = THCState_getStream(state); THCudaCheck(cudaMemcpyAsync(THTensor_(data)(self), THCTensor_(data)(state, src), THCTensor_(nElement)(state, src) * sizeof(real), cudaMemcpyDeviceToHost, stream->stream)); THCudaCheck(THCCachingHostAllocator_recordEvent(THCStorage_(data)(state, src->storage), stream)); if (currentDevice != tensorDevice) { THCudaCheck(cudaSetDevice(currentDevice)); } }
void THCStorage_(set)(THCState *state, THCStorage *self, ptrdiff_t index, scalar_t value) { THArgCheck((index >= 0) && (index < self->numel()), 2, "index out of bounds"); cudaStream_t stream = THCState_getCurrentStream(state); THCudaCheck(cudaMemcpyAsync(THCStorage_(data)(state, self) + index, &value, sizeof(scalar_t), cudaMemcpyHostToDevice, stream)); THCudaCheck(cudaStreamSynchronize(stream)); }
scalar_t THCStorage_(get)(THCState *state, const THCStorage *self, ptrdiff_t index) { THArgCheck((index >= 0) && (index < self->numel()), 2, "index out of bounds"); scalar_t value; cudaStream_t stream = THCState_getCurrentStream(state); THCudaCheck(cudaMemcpyAsync(&value, THCStorage_(data)(state, self) + index, sizeof(scalar_t), cudaMemcpyDeviceToHost, stream)); THCudaCheck(cudaStreamSynchronize(stream)); return value; }
static int torch_Tensor_(storage)(lua_State *L) { THCTensor *tensor = luaT_checkudata(L, 1, torch_Tensor); if(tensor->storage) { THCStorage_(retain)(cutorch_getstate(L), tensor->storage); luaT_pushudata(L, tensor->storage, torch_Storage); } else lua_pushnil(L); return 1; }
static PyObject * THPStorage_(pynew)(PyTypeObject *type, PyObject *args, PyObject *kwargs) { HANDLE_TH_ERRORS Py_ssize_t num_args = args ? PyTuple_Size(args) : 0; THPStoragePtr self((THPStorage *)type->tp_alloc(type, 0)); THPUtils_assert(self, "failed to allocate a " THPStorageStr " object"); THAllocator* allocator = NULL; // Internally we allow constructing with a keywoard only argument cdata if (kwargs != NULL) { PyObject *allocator_ptr = PyDict_GetItemString(kwargs, "allocator"); if (allocator_ptr) { THPUtils_assert(THPUtils_checkLong(allocator_ptr), "invalid allocator"); allocator = (THAllocator*) PyLong_AsVoidPtr(allocator_ptr); PyDict_DelItemString(kwargs, "allocator"); } Py_ssize_t num_kwargs = PyDict_Size(kwargs); if (num_args == 0) { PyObject *cdata_ptr = PyDict_GetItemString(kwargs, "cdata"); if (num_kwargs == 1 && cdata_ptr && THPUtils_checkLong(cdata_ptr)) { THStorage *ptr = (THStorage*)PyLong_AsVoidPtr(cdata_ptr); self->cdata = ptr; return (PyObject*)self.release(); } } THPUtils_assert(num_kwargs == 0, THPStorageStr "(): invalid keyword arguments"); } // torch.Storage() if (num_args == 0) { if (allocator) { self->cdata = THPStorage_(newWithAllocator)(0, allocator); } else { self->cdata = THStorage_(new)(LIBRARY_STATE_NOARGS); } return (PyObject*)self.release(); } PyObject *first_arg = PyTuple_GET_ITEM(args, 0); // torch.Storage(size) if (num_args == 1 && THPUtils_checkLong(first_arg)) { int64_t size = THPUtils_unpackLong(first_arg); if (allocator) { self->cdata = THPStorage_(newWithAllocator)(size, allocator); } else { self->cdata = THStorage_(newWithSize)(LIBRARY_STATE size); } return (PyObject*)self.release(); } // torch.Storage(view_source, [offset, [size]]) if (num_args < 4 && THPStorage_(Check)(first_arg)) { #ifdef THD_GENERIC_FILE THPUtils_setError("distributed storages don't support storage views"); return NULL; #else THPStorage *storage_arg = (THPStorage *)first_arg; int64_t numel = storage_arg->cdata->size; int64_t offset = 0; if (num_args >= 2) { PyObject *second_arg = PyTuple_GET_ITEM(args, 1); if (!THPUtils_checkLong(second_arg)) goto invalid_arguments; offset = THPUtils_unpackLong(second_arg); } int64_t size = numel - offset; if (num_args >= 3) { PyObject *third_arg = PyTuple_GET_ITEM(args, 2); if (!THPUtils_checkLong(third_arg)) goto invalid_arguments; size = THPUtils_unpackLong(third_arg); } THPUtils_assert(offset >= 0 && offset <= numel, "specified an offset of " "%" PRId64 ", but the viewed storage has only %" PRId64 " element(s)", offset, numel); THPUtils_assert(size >= 1 && size <= numel - offset, "specified a size of " "%" PRId64 ", but the viewed storage has only %" PRId64 " element(s) after offset %" PRId64, size, numel - offset, offset); real *data_ptr = THStorage_(data)(LIBRARY_STATE storage_arg->cdata) + offset; THStoragePtr storage(THStorage_(newWithData)(LIBRARY_STATE data_ptr, size)); storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_VIEW; storage->view = storage_arg->cdata; THStorage_(retain)(LIBRARY_STATE storage_arg->cdata); self->cdata = storage.release(); return (PyObject*)self.release(); #endif } // torch.Storage(sequence) if (num_args == 1 && PySequence_Check(first_arg)) { #ifdef THD_GENERIC_FILE THPUtils_setError("distributed storages don't support construction from a sequence"); #else Py_ssize_t length = PySequence_Length(first_arg); THPUtils_assert(length >= 0, "couldn't obtain the length of %s", THPUtils_typename(first_arg)); self->cdata = THStorage_(newWithSize)(LIBRARY_STATE length); THPObjectPtr item; try { for (Py_ssize_t i = 0; i < length; i++) { item = PySequence_GetItem(first_arg, i); real value = THPUtils_(unpackReal)(item.get()); #if !defined(THC_GENERIC_FILE) self->cdata->unsafe_data<real>()[i] = value; #else // TODO: this might be slow - consider batched updates? THCStorage_(set)(LIBRARY_STATE self->cdata, i, value); #endif } } catch (std::runtime_error &e) { THPUtils_setError("tried to construct a storage from a sequence (%s), " "but one of the items was of type %s instead of %s", THPUtils_typename(first_arg), THPUtils_typename(item.get()), THPUtils_typeTraits<real>::python_type_str); return NULL; } return (PyObject*)self.release(); #endif } #ifndef THD_GENERIC_FILE invalid_arguments: #endif THPUtils_invalidArguments(args, kwargs, THPStorageStr " constructor", 6, "no arguments", "(int size)", "(Sequence data)", "(" THPStorageStr " view_source)", "(" THPStorageStr " view_source, int offset)", "(" THPStorageStr " view_source, int offset, int size)"); return NULL; END_HANDLE_TH_ERRORS }
static int torch_Tensor_(elementSize)(lua_State *L) { lua_pushnumber(L, THCStorage_(elementSize)(cutorch_getstate(L))); return 1; }
static int cutorch_Storage_(copy)(lua_State *L) { THCState *state = cutorch_getstate(L); THCStorage *storage = luaT_checkudata(L, 1, torch_Storage); void *src; if( (src = luaT_toudata(L, 2, "torch.CudaByteStorage")) ) THCStorage_(copyCudaByte)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.CudaCharStorage")) ) THCStorage_(copyCudaChar)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.CudaShortStorage")) ) THCStorage_(copyCudaShort)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.CudaIntStorage")) ) THCStorage_(copyCudaInt)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.CudaLongStorage")) ) THCStorage_(copyCudaLong)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.CudaStorage")) ) THCStorage_(copyCudaFloat)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.CudaDoubleStorage")) ) THCStorage_(copyCudaDouble)(state, storage, src); #if CUDA_VERSION >= 7050 else if( (src = luaT_toudata(L, 2, "torch.CudaHalfStorage")) ) THCStorage_(copyCudaHalf)(state, storage, src); #endif else if( (src = luaT_toudata(L, 2, "torch.ByteStorage")) ) THCStorage_(copyByte)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.CharStorage")) ) THCStorage_(copyChar)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.ShortStorage")) ) THCStorage_(copyShort)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.IntStorage")) ) THCStorage_(copyInt)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.LongStorage")) ) THCStorage_(copyLong)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.FloatStorage")) ) THCStorage_(copyFloat)(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.DoubleStorage")) ) THCStorage_(copyDouble)(state, storage, src); else luaL_typerror(L, 2, "torch.*Storage"); lua_settop(L, 1); return 1; }