THMapAllocatorContext *THMapAllocatorContext_new(const char *filename, int flags) { THMapAllocatorContext *ctx = THAlloc(sizeof(THMapAllocatorContext)); if (!(flags & TH_ALLOCATOR_MAPPED_SHARED) && !(flags & TH_ALLOCATOR_MAPPED_SHAREDMEM)) flags &= ~TH_ALLOCATOR_MAPPED_NOCREATE; if ((flags ^ TH_ALLOCATOR_MAPPED_EXCLUSIVE) == 0) THError("TH_ALLOCATOR_MAPPED_EXCLUSIVE flag requires opening the file " "in shared mode"); if (filename) { ctx->filename = THAlloc(strlen(filename)+1); strcpy(ctx->filename, filename); } else { ctx->filename = unknown_filename; } ctx->flags = flags; ctx->size = 0; #ifdef _WIN32 ctx->handle = INVALID_HANDLE_VALUE; #else ctx->fd = -1; #endif return ctx; }
TensorWrapper::TensorWrapper(cuda::GpuMat & mat, THCState *state) { this->definedInLua = false; if (mat.empty()) { this->typeCode = CV_CUDA; this->tensorPtr = nullptr; return; } this->typeCode = CV_CUDA; THCudaTensor *outputPtr = THCudaTensor_new(state); // Build new storage on top of the Mat outputPtr->storage = THCudaStorage_newWithData( state, reinterpret_cast<float *>(mat.data), mat.step * mat.rows * mat.channels() / cv::getElemSize(mat.depth()) ); int sizeMultiplier; if (mat.channels() == 1) { outputPtr->nDimension = 2; sizeMultiplier = cv::getElemSize(mat.depth()); } else { outputPtr->nDimension = 3; sizeMultiplier = mat.elemSize1(); } outputPtr->size = static_cast<long *>(THAlloc(sizeof(long) * outputPtr->nDimension)); outputPtr->stride = static_cast<long *>(THAlloc(sizeof(long) * outputPtr->nDimension)); if (mat.channels() > 1) { outputPtr->size[2] = mat.channels(); outputPtr->stride[2] = 1; } outputPtr->size[0] = mat.rows; outputPtr->size[1] = mat.cols; outputPtr->stride[0] = mat.step / sizeMultiplier; outputPtr->stride[1] = mat.channels(); outputPtr->storageOffset = 0; // Make OpenCV treat underlying data as user-allocated mat.refcount = nullptr; this->tensorPtr = reinterpret_cast<THByteTensor *>(outputPtr); }
void* THRealloc(void *ptr, long size) { if(!ptr) return(THAlloc(size)); if(size == 0) { THFree(ptr); return NULL; } if(size < 0) THError("$ Torch: invalid memory size -- maybe an overflow?"); THHeapUpdate(-getAllocSize(ptr)); void *newptr = realloc(ptr, size); if(!newptr && torchGCFunction) { torchGCFunction(torchGCData); newptr = realloc(ptr, size); } THHeapUpdate(getAllocSize(newptr ? newptr : ptr)); if(!newptr) THError("$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824); return newptr; }
void* THRealloc(void *ptr, ptrdiff_t size) { if(!ptr) return(THAlloc(size)); if(size == 0) { THFree(ptr); return NULL; } if(size < 0) THError("$ Torch: invalid memory size -- maybe an overflow?"); ptrdiff_t oldSize = -getAllocSize(ptr); void *newptr = realloc(ptr, size); if(!newptr && torchGCFunction) { torchGCFunction(torchGCData); newptr = realloc(ptr, size); } if(!newptr) THError("$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824); // update heapSize only after successfully reallocated THHeapUpdate(oldSize + getAllocSize(newptr)); return newptr; }
TensorWrapper::TensorWrapper(cv::Mat & mat) { if (mat.empty()) { this->tensorPtr = nullptr; return; } this->typeCode = static_cast<char>(mat.depth()); THByteTensor *outputPtr = new THByteTensor; // Build new storage on top of the Mat outputPtr->storage = THByteStorage_newWithData( mat.data, mat.step[0] * mat.rows ); int sizeMultiplier; if (mat.channels() == 1) { outputPtr->nDimension = mat.dims; sizeMultiplier = cv::getElemSize(mat.depth()); } else { outputPtr->nDimension = mat.dims + 1; sizeMultiplier = mat.elemSize1(); } outputPtr->size = static_cast<long *>(THAlloc(sizeof(long) * outputPtr->nDimension)); outputPtr->stride = static_cast<long *>(THAlloc(sizeof(long) * outputPtr->nDimension)); if (mat.channels() > 1) { outputPtr->size[outputPtr->nDimension - 1] = mat.channels(); outputPtr->stride[outputPtr->nDimension - 1] = 1; //cv::getElemSize(returnValue.typeCode); } for (int i = 0; i < mat.dims; ++i) { outputPtr->size[i] = mat.size[i]; outputPtr->stride[i] = mat.step[i] / sizeMultiplier; } // Prevent OpenCV from deallocating Mat data mat.addref(); outputPtr->refcount = 0; this->tensorPtr = outputPtr; }
THCudaStorage* THCudaStorage_new(void) { THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage)); storage->data = NULL; storage->size = 0; storage->refcount = 1; storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM; return storage; }
THCudaStorage* THCudaStorage_newWithData(float *data, long size) { THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage)); storage->data = data; storage->size = size; storage->refcount = 1; storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM; return storage; }
THCudaTensor *THCudaTensor_newWithSize4d(THCState *state, long size0, long size1, long size2, long size3) { long size[4] = {size0, size1, size2, size3}; THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor)); THCudaTensor_rawInit(state, self); THCudaTensor_rawResize(state, self, 4, size, NULL); return self; }
void THCudaTensor_unfold(THCState *state, THCudaTensor *self, THCudaTensor *src, int dimension, long size, long step) { long *newSize; long *newStride; int d; if(!src) src = self; THArgCheck( (src->nDimension > 0), 1, "cannot unfold an empty tensor"); THArgCheck(dimension < src->nDimension, 2, "out of range"); THArgCheck(size <= src->size[dimension], 3, "out of range"); THArgCheck(step > 0, 4, "invalid step"); THCudaTensor_set(state, self, src); newSize = (long*)THAlloc(sizeof(long)*(self->nDimension+1)); newStride = (long*)THAlloc(sizeof(long)*(self->nDimension+1)); newSize[self->nDimension] = size; newStride[self->nDimension] = self->stride[dimension]; for(d = 0; d < self->nDimension; d++) { if(d == dimension) { newSize[d] = (self->size[d] - size) / step + 1; newStride[d] = step*self->stride[d]; } else { newSize[d] = self->size[d]; newStride[d] = self->stride[d]; } } THFree(self->size); THFree(self->stride); self->size = newSize; self->stride = newStride; self->nDimension++; }
/* Pointer-copy init */ THGPUTensor *THGPUTensor_newWithTensor(THGPUTensor *tensor) { THGPUTensor *self = (THGPUTensor*)THAlloc(sizeof(THGPUTensor)); THGPUTensor_rawInit(self); THGPUTensor_rawSet(self, tensor->storage, tensor->storageOffset, tensor->nDimension, tensor->size, tensor->stride); return self; }
/*** Helper methods ***/ static void THCSTensor_(rawInit)(THCState *state, THCSTensor *self) { self->size = static_cast<int64_t *>(THAlloc(sizeof(int64_t))); self->size[0] = 0; self->indices = THCIndexTensor_(new)(state); self->values = THCTensor_(new)(state); self->nDimensionI = 0; self->nDimensionV = 0; self->coalesced = 0; self->nnz = 0; // self->flag = TH_TENSOR_REFCOUNTED; new (&self->refcount) std::atomic<int>(1); }
static void THFloatTensor_computesz(THFloatTensor *self, long **sz_, long **st_) { long *sz, *st, *szh; int i; sz = THAlloc(sizeof(long)*self->nDimension); st = THAlloc(sizeof(long)*self->nDimension); szh = THAlloc(sizeof(long)*self->nDimension); for(i = self->nDimension-1; i >= 0; i--) { if(i == self->nDimension-1) szh[i] = 1; else szh[i] = szh[i+1]*self->size[i+1]; } memcpy(sz, szh, self->nDimension * sizeof(long)); memcpy(st, self->stride, self->nDimension * sizeof(long)); THFree(szh); *sz_ = sz; *st_ = st; }
THCudaTensor *THCudaTensor_newWithStorage4d(THCState *state, THCudaStorage *storage, long storageOffset, long size0, long stride0, long size1, long stride1, long size2, long stride2, long size3, long stride3) { long size[4] = {size0, size1, size2, size3}; long stride[4] = {stride0, stride1, stride2, stride3}; THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor)); THCudaTensor_rawInit(state, self); THCudaTensor_rawSet(state, self, storage, storageOffset, 4, size, stride); return self; }
void transfer_tensor_CUDA(THCState *state, THCudaTensor *dst, struct TensorWrapper srcWrapper) { THCudaTensor *src = reinterpret_cast<THCudaTensor *>(srcWrapper.tensorPtr); dst->nDimension = src->nDimension; dst->refcount = src->refcount; dst->storage = src->storage; if (!srcWrapper.definedInLua) { // Don't let Torch deallocate size and stride arrays dst->size = src->size; dst->stride = src->stride; src->size = nullptr; src->stride = nullptr; THAtomicIncrementRef(&src->storage->refcount); THCudaTensor_free(state, src); } else { dst->size = static_cast<long *>(THAlloc(sizeof(long) * dst->nDimension)); dst->stride = static_cast<long *>(THAlloc(sizeof(long) * dst->nDimension)); memcpy(dst->size, src->size, src->nDimension * sizeof(long)); memcpy(dst->stride, src->stride, src->nDimension * sizeof(long)); } }
/* Storage init */ THGPUTensor *THGPUTensor_newWithStorage(THGPUStorage *storage, long storageOffset, THLongStorage *size, THLongStorage *stride) { THGPUTensor *self = (THGPUTensor*)THAlloc(sizeof(THGPUTensor)); if (size && stride) THArgCheck(size->size == stride->size, 4, "inconsistent size"); THGPUTensor_rawInit(self); THGPUTensor_rawSet(self, storage, storageOffset, (size ? size->size : (stride ? stride->size : 0)), (size ? size->data : NULL), (stride ? stride->data : NULL)); return self; }
static long* THNN_(SpatialFractionalMaxPooling_generateIntervals)( real sample, long inputSize, long outputSize, int poolSize) { real alpha = (real) (inputSize - poolSize) / (real) (outputSize - 1); long* sequence = (long*) THAlloc(sizeof(long) * outputSize); long i; for (i = 0; i < outputSize - 1; ++i) { sequence[i] = (long) ((i + sample) * alpha) - (long) (sample * alpha); } sequence[outputSize - 1] = inputSize - poolSize; return sequence; }
THCudaStorage* THCudaStorage_newWithSize(long size) { THArgCheck(size >= 0, 2, "invalid size"); if(size > 0) { THCudaStorage *storage = (THCudaStorage*)THAlloc(sizeof(THCudaStorage)); THCudaCheck(cudaMalloc((void**)&(storage->data), size * sizeof(float))); storage->size = size; storage->refcount = 1; storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM; return storage; } else { return THCudaStorage_new(); } }
void* THRealloc(void *ptr, long size) { if(!ptr) return(THAlloc(size)); if(size == 0) { THFree(ptr); return NULL; } if(size < 0) THError("$ Torch: invalid memory size -- maybe an overflow?"); ptr = realloc(ptr, size); if(!ptr) THError("$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824); return ptr; }
THCStorage* THCStorage_newWithAllocator(THCState *state, at::ScalarType scalar_type, ptrdiff_t size, THCDeviceAllocator* allocator, void* allocatorContext) { THArgCheck(size >= 0, 2, "invalid size"); int device; THCudaCheck(cudaGetDevice(&device)); THCStorage *storage = (THCStorage*)THAlloc(sizeof(THCStorage)); memset(storage, 0, sizeof(THCStorage)); new (&storage->refcount) std::atomic<int>(1); storage->scalar_type = scalar_type; storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM; storage->allocator = allocator; storage->allocatorContext = allocatorContext; storage->size = size; storage->device = device; if(size > 0) { // update heap *before* attempting malloc, to free space for the malloc cudaError_t err = (*allocator->malloc)(allocatorContext, (void**)&(storage->data_ptr), size * at::elementSize(scalar_type), THCState_getCurrentStreamOnDevice(state, device)); if(err != cudaSuccess){ free(storage); } THCudaCheck(err); } else { storage->data_ptr = NULL; } return storage; }
/* Empty init */ THGPUTensor *THGPUTensor_new() { THGPUTensor *self = (THGPUTensor*)THAlloc(sizeof(THGPUTensor)); THGPUTensor_rawInit(self); return self; }
self->refcount = 1; if(nDimension > 0) THTensor_(resize_raw)(self, nDimension, size); } void THTensor_(init)(THTensor *self, THStorage *storage, long storageOffset, THLongStorage *size) { THTensor_(init_raw)(self, storage, storageOffset, (size ? THLongStorage_size(size) : 0), (size ? THLongStorage_data(size) : NULL)); } /* Empty init */ THTensor *THTensor_(new)(void) { THTensor* self = THAlloc(sizeof(THTensor)); THTensor_(init_raw)(self, NULL, 0, 0, NULL); return self; } /* Pointer-copy init */ THTensor* THTensor_(newWithTensor)(THTensor *tensor) { THTensor* self = THAlloc(sizeof(THTensor)); THTensor_(init_raw)(self, tensor->storage, tensor->storageOffset, tensor->nDimension, tensor->size); return self; } THTensor* THTensor_(newWithTensorNarrow)(THTensor *tensor, long firstIndex, long size) { THTensor *self;
#define TENSOR_FUNC(NAME) TENSOR_FUNC_TN(CAP_TYPE, NAME) /* For the default Tensor type, we simplify the naming */ #ifdef DEFAULT_TENSOR #undef TENSOR #undef TENSOR_FUNC #define TENSOR THTensor #define TENSOR_FUNC(NAME) TENSOR_FUNC_TN(, NAME) #endif static void TENSOR_FUNC(reinit)(TENSOR *tensor, STORAGE *storage, long storageOffset, int nDimension, long *size, long *stride); /* Empty init */ TENSOR *TENSOR_FUNC(new)(void) { TENSOR *tensor = THAlloc(sizeof(TENSOR)); tensor->size = NULL; tensor->stride = NULL; tensor->nDimension = 0; tensor->storage = NULL; tensor->storageOffset = 0; tensor->ownStorage = 0; tensor->refcount = 1; return tensor; } /* Pointer-copy init */ TENSOR *TENSOR_FUNC(newWithTensor)(TENSOR *src) { TENSOR *tensor = TENSOR_FUNC(new)(); TENSOR_FUNC(reinit)(tensor, src->storage, src->storageOffset, src->nDimension, src->size, src->stride);
/* Empty init */ THCudaTensor *THCudaTensor_new(THCState *state) { THCudaTensor *self = (THCudaTensor*)THAlloc(sizeof(THCudaTensor)); THCudaTensor_rawInit(state, self); return self; }
THCudaCheck(cudaMemcpy(&value, self->data + index, sizeof(real), cudaMemcpyDeviceToHost)); return realToHostreal(value); #else float *ret_d; float ret; THCudaCheck(THCudaMalloc(state, (void**)&ret_d, sizeof(float))); THCHalf2Float(state, ret_d, self->data + index, 1); THCudaCheck(cudaMemcpy(&ret, ret_d, sizeof(float), cudaMemcpyDeviceToHost)); THCudaFree(state, ret_d); return ret; #endif } THCStorage* THCStorage_(new)(THCState *state) { THCStorage *storage = (THCStorage*)THAlloc(sizeof(THCStorage)); storage->data = NULL; storage->size = 0; storage->refcount = 1; storage->flag = TH_STORAGE_REFCOUNTED | TH_STORAGE_RESIZABLE | TH_STORAGE_FREEMEM; return storage; } THCStorage* THCStorage_(newWithSize)(THCState *state, long size) { THArgCheck(size >= 0, 2, "invalid size"); if(size > 0) { THCStorage *storage = (THCStorage*)THAlloc(sizeof(THCStorage));
} void THCTensor_(clearFlag)(THCState *state, THCTensor *self, const char flag) { self->flag &= ~flag; } /**** creation methods ****/ static void THCTensor_(rawInit)(THCState *state, THCTensor *self); /* Empty init */ THCTensor *THCTensor_(new)(THCState *state) { THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor)); THCTensor_(rawInit)(state, self); return self; } /* Pointer-copy init */ THCTensor *THCTensor_(newWithTensor)(THCState *state, THCTensor *tensor) { THCTensor *self = (THCTensor*)THAlloc(sizeof(THCTensor)); THCTensor_(rawInit)(state, self); THCTensor_(setStorageNd)(state, self, tensor->storage, tensor->storageOffset, tensor->nDimension, tensor->size,
static void *THDefaultAllocator_alloc(void* ctx, ptrdiff_t size) { return THAlloc(size); }