/* * Based on the implementation of the THTensor_(indexCopy) in torch7 */ static void THCudaTensor_indexCopy(THCudaTensor *tensor, int dim, THLongTensor *index, THCudaTensor *src) { long i, numel; THCudaTensor *tSlice, *sSlice; long *index_data; numel = THLongTensor_nElement(index); THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector"); THArgCheck(dim < src->nDimension,4,"Indexing dim is out of bounds"); index = THLongTensor_newContiguous(index); index_data = THLongTensor_data(index); for (i=0; i<numel; i++) { if (tensor->nDimension > 1 ) { tSlice = THCudaTensor_new(); sSlice = THCudaTensor_new(); THCudaTensor_select(tSlice, tensor, dim, index_data[i]-1); THCudaTensor_select(sSlice, src, dim, i); THCudaTensor_copy(tSlice, sSlice); THCudaTensor_free(tSlice); THCudaTensor_free(sSlice); } else { // It's faster to copy a float from an address in the device to another address in the device than // retrieving it to the host memory and recopy it to the device memory THCudaCheck(cudaMemcpy(tensor->storage->data + tensor->storageOffset + index_data[i]-1,\ src->storage->data + src->storageOffset + i, sizeof(float), cudaMemcpyDeviceToDevice)); } } THLongTensor_free(index); }
THCudaTensor *THCudaTensor_newClone(THCState *state, THCudaTensor *self) { THCudaTensor *tensor = THCudaTensor_new(state); THCudaTensor_resizeAs(state, tensor, self); THCudaTensor_copy(state, tensor, self); return tensor; }
/* * Based on the implementation of the THTensor_(indexCopy) in torch7 */ static void THCudaTensor_indexFill(THCudaTensor *tensor, int dim, THLongTensor *index, float val) { long i, numel; THCudaTensor *tSlice; long *index_data; numel = THLongTensor_nElement(index); THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector"); THArgCheck(dim < tensor->nDimension,4,"Indexing dim is out of bounds"); index = THLongTensor_newContiguous(index); index_data = THLongTensor_data(index); for (i=0; i<numel; i++) { if (tensor->nDimension > 1 ) { // create a new CudaTensor tSlice = THCudaTensor_new(); // set its storage to point to the corresponding storage in tensor THCudaTensor_select(tSlice, tensor,dim,index_data[i]-1); THCudaTensor_fill(tSlice, val); THCudaTensor_free(tSlice); } else { THCudaTensor_set1d(tensor,index_data[i]-1,val); } } THLongTensor_free(index); }
static int cutorch_CudaTensorOperator___sub__(lua_State *L) { THCudaTensor *tensor1 = luaT_toudata(L, 1, "torch.CudaTensor"); THCudaTensor *tensor2 = luaT_toudata(L, 2, "torch.CudaTensor"); THCudaTensor *r; if(!tensor1 && !tensor2) luaL_error(L, "expecting two Tensors or one Tensor and one number"); else { r = THCudaTensor_new(); luaT_pushudata(L, r, "torch.CudaTensor"); if(!tensor1 && tensor2) { THCudaTensor_resizeAs(r, tensor2); THCudaTensor_fill(r, luaL_checknumber(L, 1)); THCudaTensor_cadd(r, r, -1, tensor2); } else if(tensor1 && !tensor2) { THCudaTensor_resizeAs(r, tensor1); THCudaTensor_copy(r, tensor1); THCudaTensor_add(r, r, -luaL_checknumber(L, 2)); } else { THCudaTensor_resizeAs(r, tensor1); THCudaTensor_copy(r, tensor1); THCudaTensor_cadd(r, r, -1, tensor2); } } return 1; }
static int cutorch_CudaTensorOperator___unm__(lua_State *L) { THCudaTensor *tensor = luaT_checkudata(L, 1, "torch.CudaTensor"); THCudaTensor *r; r = THCudaTensor_new(); luaT_pushudata(L, r, "torch.CudaTensor"); THCudaTensor_resizeAs(r, tensor); THCudaTensor_copy(r, tensor); THCudaTensor_mul(r, r, -1); return 1; }
TensorWrapper::TensorWrapper(cuda::GpuMat & mat, THCState *state) { this->definedInLua = false; if (mat.empty()) { this->typeCode = CV_CUDA; this->tensorPtr = nullptr; return; } this->typeCode = CV_CUDA; THCudaTensor *outputPtr = THCudaTensor_new(state); // Build new storage on top of the Mat outputPtr->storage = THCudaStorage_newWithData( state, reinterpret_cast<float *>(mat.data), mat.step * mat.rows * mat.channels() / cv::getElemSize(mat.depth()) ); int sizeMultiplier; if (mat.channels() == 1) { outputPtr->nDimension = 2; sizeMultiplier = cv::getElemSize(mat.depth()); } else { outputPtr->nDimension = 3; sizeMultiplier = mat.elemSize1(); } outputPtr->size = static_cast<long *>(THAlloc(sizeof(long) * outputPtr->nDimension)); outputPtr->stride = static_cast<long *>(THAlloc(sizeof(long) * outputPtr->nDimension)); if (mat.channels() > 1) { outputPtr->size[2] = mat.channels(); outputPtr->stride[2] = 1; } outputPtr->size[0] = mat.rows; outputPtr->size[1] = mat.cols; outputPtr->stride[0] = mat.step / sizeMultiplier; outputPtr->stride[1] = mat.channels(); outputPtr->storageOffset = 0; // Make OpenCV treat underlying data as user-allocated mat.refcount = nullptr; this->tensorPtr = reinterpret_cast<THByteTensor *>(outputPtr); }
static int cutorch_CudaTensorOperator___mul__(lua_State *L) { THCudaTensor *tensor1 = luaT_toudata(L, 1, "torch.CudaTensor"); THCudaTensor *tensor2 = luaT_toudata(L, 2, "torch.CudaTensor"); THCudaTensor *r; if(!tensor1 && !tensor2) luaL_error(L, "expecting two Tensors or one Tensor and one number"); else { r = THCudaTensor_new(); luaT_pushudata(L, r, "torch.CudaTensor"); if(!tensor1 && tensor2) { THCudaTensor_resizeAs(r, tensor2); THCudaTensor_copy(r, tensor2); THCudaTensor_mul(r, r, luaL_checknumber(L, 1)); } else if(tensor1 && !tensor2) { THCudaTensor_resizeAs(r, tensor1); THCudaTensor_copy(r, tensor1); THCudaTensor_mul(r, r, luaL_checknumber(L, 2)); } else { int dimt = tensor1->nDimension; int dims = tensor2->nDimension; if(dimt == 1 && dims == 1) lua_pushnumber(L, THCudaTensor_dot(tensor1, tensor2)); /* ok, we wasted r, but who cares */ else if(dimt == 2 && dims == 1) { THCudaTensor_resize1d(r, tensor1->size[0]); THCudaTensor_zero(r); THCudaTensor_addmv(r, 1, r, 1, tensor1, tensor2); } else if(dimt == 2 && dims == 2) { THCudaTensor_resize2d(r, tensor1->size[0], tensor2->size[1]); THCudaTensor_zero(r); THCudaTensor_addmm(r, 1, r, 1, tensor1, tensor2); } else luaL_error(L, "multiplication between %dD and %dD tensors not yet supported", tensor1->nDimension, tensor2->nDimension); } } return 1; }
static int cutorch_CudaTensorOperator___div__(lua_State *L) { THCudaTensor *tensor = luaT_checkudata(L, 1, "torch.CudaTensor"); THCudaTensor *r; luaL_argcheck(L, lua_isnumber(L,2), 2, "number expected"); r = THCudaTensor_new(); luaT_pushudata(L, r, "torch.CudaTensor"); THCudaTensor_resizeAs(r, tensor); THCudaTensor_copy(r, tensor); THCudaTensor_mul(r, r, 1/lua_tonumber(L, 2)); return 1; }