static int cutorch_CudaTensorOperator___sub__(lua_State *L) { THCudaTensor *tensor1 = luaT_toudata(L, 1, "torch.CudaTensor"); THCudaTensor *tensor2 = luaT_toudata(L, 2, "torch.CudaTensor"); THCudaTensor *r; if(!tensor1 && !tensor2) luaL_error(L, "expecting two Tensors or one Tensor and one number"); else { r = THCudaTensor_new(); luaT_pushudata(L, r, "torch.CudaTensor"); if(!tensor1 && tensor2) { THCudaTensor_resizeAs(r, tensor2); THCudaTensor_fill(r, luaL_checknumber(L, 1)); THCudaTensor_cadd(r, r, -1, tensor2); } else if(tensor1 && !tensor2) { THCudaTensor_resizeAs(r, tensor1); THCudaTensor_copy(r, tensor1); THCudaTensor_add(r, r, -luaL_checknumber(L, 2)); } else { THCudaTensor_resizeAs(r, tensor1); THCudaTensor_copy(r, tensor1); THCudaTensor_cadd(r, r, -1, tensor2); } } return 1; }
static int cutorch_CudaTensorOperator___mul__(lua_State *L) { THCudaTensor *tensor1 = luaT_toudata(L, 1, "torch.CudaTensor"); THCudaTensor *tensor2 = luaT_toudata(L, 2, "torch.CudaTensor"); THCudaTensor *r; if(!tensor1 && !tensor2) luaL_error(L, "expecting two Tensors or one Tensor and one number"); else { r = THCudaTensor_new(); luaT_pushudata(L, r, "torch.CudaTensor"); if(!tensor1 && tensor2) { THCudaTensor_resizeAs(r, tensor2); THCudaTensor_copy(r, tensor2); THCudaTensor_mul(r, r, luaL_checknumber(L, 1)); } else if(tensor1 && !tensor2) { THCudaTensor_resizeAs(r, tensor1); THCudaTensor_copy(r, tensor1); THCudaTensor_mul(r, r, luaL_checknumber(L, 2)); } else { int dimt = tensor1->nDimension; int dims = tensor2->nDimension; if(dimt == 1 && dims == 1) lua_pushnumber(L, THCudaTensor_dot(tensor1, tensor2)); /* ok, we wasted r, but who cares */ else if(dimt == 2 && dims == 1) { THCudaTensor_resize1d(r, tensor1->size[0]); THCudaTensor_zero(r); THCudaTensor_addmv(r, 1, r, 1, tensor1, tensor2); } else if(dimt == 2 && dims == 2) { THCudaTensor_resize2d(r, tensor1->size[0], tensor2->size[1]); THCudaTensor_zero(r); THCudaTensor_addmm(r, 1, r, 1, tensor1, tensor2); } else luaL_error(L, "multiplication between %dD and %dD tensors not yet supported", tensor1->nDimension, tensor2->nDimension); } } return 1; }
void THCudaTensor_freeCopyTo(THCState *state, THCudaTensor *self, THCudaTensor *dst) { if(self != dst) THCudaTensor_copy(state, dst, self); THCudaTensor_free(state, self); }
THCudaTensor *THCudaTensor_newClone(THCState *state, THCudaTensor *self) { THCudaTensor *tensor = THCudaTensor_new(state); THCudaTensor_resizeAs(state, tensor, self); THCudaTensor_copy(state, tensor, self); return tensor; }
/* * Based on the implementation of the THTensor_(indexCopy) in torch7 */ static void THCudaTensor_indexCopy(THCudaTensor *tensor, int dim, THLongTensor *index, THCudaTensor *src) { long i, numel; THCudaTensor *tSlice, *sSlice; long *index_data; numel = THLongTensor_nElement(index); THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector"); THArgCheck(dim < src->nDimension,4,"Indexing dim is out of bounds"); index = THLongTensor_newContiguous(index); index_data = THLongTensor_data(index); for (i=0; i<numel; i++) { if (tensor->nDimension > 1 ) { tSlice = THCudaTensor_new(); sSlice = THCudaTensor_new(); THCudaTensor_select(tSlice, tensor, dim, index_data[i]-1); THCudaTensor_select(sSlice, src, dim, i); THCudaTensor_copy(tSlice, sSlice); THCudaTensor_free(tSlice); THCudaTensor_free(sSlice); } else { // It's faster to copy a float from an address in the device to another address in the device than // retrieving it to the host memory and recopy it to the device memory THCudaCheck(cudaMemcpy(tensor->storage->data + tensor->storageOffset + index_data[i]-1,\ src->storage->data + src->storageOffset + i, sizeof(float), cudaMemcpyDeviceToDevice)); } } THLongTensor_free(index); }
/* now we overwrite some methods specific to CudaTensor */ static int cutorch_CudaTensor_copy(lua_State *L) { THCState *state = cutorch_getstate(L); THCudaTensor *storage = luaT_checkudata(L, 1, "torch.CudaTensor"); void *src; if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) ) THCudaTensor_copy(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.ByteTensor")) ) THCudaTensor_copyByte(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.CharTensor")) ) THCudaTensor_copyChar(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.ShortTensor")) ) THCudaTensor_copyShort(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.IntTensor")) ) THCudaTensor_copyInt(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.LongTensor")) ) THCudaTensor_copyLong(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.FloatTensor")) ) THCudaTensor_copyFloat(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.DoubleTensor")) ) THCudaTensor_copyDouble(state, storage, src); else if( (src = luaT_toudata(L, 2, "torch.CudaTensor")) ) THCudaTensor_copyCuda(state, storage, src); else luaL_typerror(L, 2, "torch.*Tensor"); lua_settop(L, 1); return 1; }
static int cutorch_CudaTensorOperator___unm__(lua_State *L) { THCudaTensor *tensor = luaT_checkudata(L, 1, "torch.CudaTensor"); THCudaTensor *r; r = THCudaTensor_new(); luaT_pushudata(L, r, "torch.CudaTensor"); THCudaTensor_resizeAs(r, tensor); THCudaTensor_copy(r, tensor); THCudaTensor_mul(r, r, -1); return 1; }
static int cutorch_CudaTensorOperator___div__(lua_State *L) { THCudaTensor *tensor = luaT_checkudata(L, 1, "torch.CudaTensor"); THCudaTensor *r; luaL_argcheck(L, lua_isnumber(L,2), 2, "number expected"); r = THCudaTensor_new(); luaT_pushudata(L, r, "torch.CudaTensor"); THCudaTensor_resizeAs(r, tensor); THCudaTensor_copy(r, tensor); THCudaTensor_mul(r, r, 1/lua_tonumber(L, 2)); return 1; }
void THCudaTensor_copyCuda(THCudaTensor *self, THCudaTensor *src) { THCudaTensor_copy(self, src); }