/* * Based on the implementation of the THTensor_(indexCopy) in torch7 */ static void THCudaTensor_indexFill(THCudaTensor *tensor, int dim, THLongTensor *index, float val) { long i, numel; THCudaTensor *tSlice; long *index_data; numel = THLongTensor_nElement(index); THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector"); THArgCheck(dim < tensor->nDimension,4,"Indexing dim is out of bounds"); index = THLongTensor_newContiguous(index); index_data = THLongTensor_data(index); for (i=0; i<numel; i++) { if (tensor->nDimension > 1 ) { // create a new CudaTensor tSlice = THCudaTensor_new(); // set its storage to point to the corresponding storage in tensor THCudaTensor_select(tSlice, tensor,dim,index_data[i]-1); THCudaTensor_fill(tSlice, val); THCudaTensor_free(tSlice); } else { THCudaTensor_set1d(tensor,index_data[i]-1,val); } } THLongTensor_free(index); }
/* * Based on the implementation of the THTensor_(indexCopy) in torch7 */ static void THCudaTensor_indexCopy(THCudaTensor *tensor, int dim, THLongTensor *index, THCudaTensor *src) { long i, numel; THCudaTensor *tSlice, *sSlice; long *index_data; numel = THLongTensor_nElement(index); THArgCheck(index->nDimension == 1, 3, "Index is supposed to be a vector"); THArgCheck(dim < src->nDimension,4,"Indexing dim is out of bounds"); index = THLongTensor_newContiguous(index); index_data = THLongTensor_data(index); for (i=0; i<numel; i++) { if (tensor->nDimension > 1 ) { tSlice = THCudaTensor_new(); sSlice = THCudaTensor_new(); THCudaTensor_select(tSlice, tensor, dim, index_data[i]-1); THCudaTensor_select(sSlice, src, dim, i); THCudaTensor_copy(tSlice, sSlice); THCudaTensor_free(tSlice); THCudaTensor_free(sSlice); } else { // It's faster to copy a float from an address in the device to another address in the device than // retrieving it to the host memory and recopy it to the device memory THCudaCheck(cudaMemcpy(tensor->storage->data + tensor->storageOffset + index_data[i]-1,\ src->storage->data + src->storageOffset + i, sizeof(float), cudaMemcpyDeviceToDevice)); } } THLongTensor_free(index); }
static void load_array_to_lua(lua_State *L, cnpy::NpyArray& arr){ int ndims = arr.shape.size(); //based on code from mattorch with stride fix int k; THLongStorage *size = THLongStorage_newWithSize(ndims); THLongStorage *stride = THLongStorage_newWithSize(ndims); for (k=0; k<ndims; k++) { THLongStorage_set(size, k, arr.shape[k]); if (k > 0) THLongStorage_set(stride, ndims-k-1, arr.shape[ndims-k]*THLongStorage_get(stride,ndims-k)); else THLongStorage_set(stride, ndims-k-1, 1); } void * tensorDataPtr = NULL; size_t numBytes = 0; if ( arr.arrayType == 'f' ){ // float32/64 if ( arr.word_size == 4 ){ //float32 THFloatTensor *tensor = THFloatTensor_newWithSize(size, stride); tensorDataPtr = (void *)(THFloatTensor_data(tensor)); numBytes = THFloatTensor_nElement(tensor) * arr.word_size; luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.FloatTensor")); }else if ( arr.word_size == 8){ //float 64 THDoubleTensor *tensor = THDoubleTensor_newWithSize(size, stride); tensorDataPtr = (void *)(THDoubleTensor_data(tensor)); numBytes = THDoubleTensor_nElement(tensor) * arr.word_size; luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.DoubleTensor")); } }else if ( arr.arrayType == 'i' || arr.arrayType == 'u' ){ // does torch have unsigned types .. need to look if ( arr.word_size == 1 ){ //int8 THByteTensor *tensor = THByteTensor_newWithSize(size, stride); tensorDataPtr = (void *)(THByteTensor_data(tensor)); numBytes = THByteTensor_nElement(tensor) * arr.word_size; luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.ByteTensor")); }else if ( arr.word_size == 2 ){ //int16 THShortTensor *tensor = THShortTensor_newWithSize(size, stride); tensorDataPtr = (void *)(THShortTensor_data(tensor)); numBytes = THShortTensor_nElement(tensor) * arr.word_size; luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.ShortTensor")); }else if ( arr.word_size == 4 ){ //int32 THIntTensor *tensor = THIntTensor_newWithSize(size, stride); tensorDataPtr = (void *)(THIntTensor_data(tensor)); numBytes = THIntTensor_nElement(tensor) * arr.word_size; luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.IntTensor")); }else if ( arr.word_size == 8){ //long 64 THLongTensor *tensor = THLongTensor_newWithSize(size, stride); tensorDataPtr = (void *)(THLongTensor_data(tensor)); numBytes = THLongTensor_nElement(tensor) * arr.word_size; luaT_pushudata(L, tensor, luaT_checktypename2id(L, "torch.LongTensor")); } }else{ printf("array type unsupported"); throw std::runtime_error("unsupported data type"); } // now copy the data assert(tensorDataPtr); memcpy(tensorDataPtr, (void *)(arr.data<void>()), numBytes); }