size_t fread__(void *ptr, size_t size, size_t nitems, FILE *stream) { size_t nread = 0; while(!feof(stream) && !ferror(stream) && (nread < nitems)) nread += fread((char*)ptr+nread*size, size, THMin(2147483648/size, nitems-nread), stream); return nread; }
void THPStorage_(writeFileRaw)(THStorage *self, io fd) { real *data; int64_t size = self->size; #ifndef THC_GENERIC_FILE data = self->data; #else std::unique_ptr<char[]> cpu_data(new char[size * sizeof(real)]); data = (real*)cpu_data.get(); THCudaCheck(cudaMemcpy(data, self->data, size * sizeof(real), cudaMemcpyDeviceToHost)); #endif ssize_t result = doWrite(fd, &size, sizeof(int64_t)); if (result != sizeof(int64_t)) throw std::system_error(result, std::system_category()); // fast track for bytes and little endian if (sizeof(real) == 1 || THP_nativeByteOrder() == THPByteOrder::THP_LITTLE_ENDIAN) { char *bytes = (char *) data; int64_t remaining = sizeof(real) * size; while (remaining > 0) { // we write and read in 1GB blocks to avoid bugs on some OSes ssize_t result = doWrite(fd, bytes, THMin(remaining, 1073741824)); if (result < 0) throw std::system_error(result, std::system_category()); bytes += result; remaining -= result; } if (remaining != 0) throw std::system_error(result, std::system_category()); } else { int64_t buffer_size = std::min(size, (int64_t)5000); std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * sizeof(real)]); for (int64_t i = 0; i < size; i += buffer_size) { size_t to_convert = std::min(size - i, buffer_size); if (sizeof(real) == 2) { THP_encodeInt16Buffer((uint8_t*)le_buffer.get(), (const int16_t*)data + i, THPByteOrder::THP_LITTLE_ENDIAN, to_convert); } else if (sizeof(real) == 4) { THP_encodeInt32Buffer((uint8_t*)le_buffer.get(), (const int32_t*)data + i, THPByteOrder::THP_LITTLE_ENDIAN, to_convert); } else if (sizeof(real) == 8) { THP_encodeInt64Buffer((uint8_t*)le_buffer.get(), (const int64_t*)data + i, THPByteOrder::THP_LITTLE_ENDIAN, to_convert); } SYSCHECK(doWrite(fd, le_buffer.get(), to_convert * sizeof(real))); } } }
THStorage * THPStorage_(readFileRaw)(io file, THStorage *_storage) { real *data; int64_t size; ssize_t result = doRead(file, &size, sizeof(int64_t)); if (result == 0) throw std::runtime_error("unexpected EOF. The file might be corrupted."); if (result != sizeof(int64_t)) throw std::system_error(result, std::system_category()); THStoragePtr storage; if (_storage == nullptr) { storage = THStorage_(newWithSize)(LIBRARY_STATE size); } else { THPUtils_assert(_storage->size == size, "storage has wrong size: expected %ld got %ld", size, _storage->size); storage = _storage; } #ifndef THC_GENERIC_FILE data = storage->data; #else std::unique_ptr<char[]> cpu_data(new char[size * sizeof(real)]); data = (real*)cpu_data.get(); #endif // fast track for bytes and little endian if (sizeof(real) == 1 || THP_nativeByteOrder() == THPByteOrder::THP_LITTLE_ENDIAN) { char *bytes = (char *) data; int64_t remaining = sizeof(real) * storage->size; while (remaining > 0) { // we write and read in 1GB blocks to avoid bugs on some OSes ssize_t result = doRead(file, bytes, THMin(remaining, 1073741824)); if (result == 0) // 0 means EOF, which is also an error throw std::runtime_error("unexpected EOF. The file might be corrupted."); if (result < 0) throw std::system_error(result, std::system_category()); bytes += result; remaining -= result; } if (remaining != 0) throw std::system_error(result, std::system_category()); } else { int64_t buffer_size = std::min(size, (int64_t)5000); std::unique_ptr<uint8_t[]> le_buffer(new uint8_t[buffer_size * sizeof(real)]); for (int64_t i = 0; i < size; i += buffer_size) { size_t to_convert = std::min(size - i, buffer_size); SYSCHECK(doRead(file, le_buffer.get(), sizeof(real) * to_convert)); if (sizeof(real) == 2) { THP_decodeInt16Buffer((int16_t*)data + i, le_buffer.get(), THPByteOrder::THP_LITTLE_ENDIAN, to_convert); } else if (sizeof(real) == 4) { THP_decodeInt32Buffer((int32_t*)data + i, le_buffer.get(), THPByteOrder::THP_LITTLE_ENDIAN, to_convert); } else if (sizeof(real) == 8) { THP_decodeInt64Buffer((int64_t*)data + i, le_buffer.get(), THPByteOrder::THP_LITTLE_ENDIAN, to_convert); } } } #ifdef THC_GENERIC_FILE THCudaCheck(cudaMemcpy(storage->data, data, size * sizeof(real), cudaMemcpyHostToDevice)); #endif return storage.release(); }
void THCStorage_resize(THCState *state, THCStorage *self, ptrdiff_t size) { THArgCheck(size >= 0, 2, "invalid size"); THAssert(self->allocator != NULL); int device; THCudaCheck(cudaGetDevice(&device)); if(!(self->flag & TH_STORAGE_RESIZABLE)) THError("Trying to resize storage that is not resizable"); size_t elementSize = at::elementSize(self->scalar_type); if (self->allocator->realloc) { void * data_ptr = self->data_ptr; cudaError_t err = (*self->allocator->realloc)( self->allocatorContext, (void**)&(data_ptr), self->size * elementSize, size * elementSize, THCState_getCurrentStreamOnDevice(state, device)); if (err != cudaSuccess) { THCudaCheck(err); } self->size = size; self->device = device; return; } if(size == 0) { if(self->flag & TH_STORAGE_FREEMEM) { THCudaCheck( (*self->allocator->free)(self->allocatorContext, self->data_ptr)); } self->data_ptr = NULL; self->size = 0; self->device = device; } else { void *data = NULL; cudaError_t err = (*self->allocator->malloc)(self->allocatorContext, (void**)&(data), size * elementSize, THCState_getCurrentStreamOnDevice(state, device)); THCudaCheck(err); if (self->data_ptr) { // Enable p2p access when the memcpy is across devices THCState_getPeerToPeerAccess(state, device, self->device); THCudaCheck(cudaMemcpyAsync(data, self->data_ptr, THMin(self->size, size) * elementSize, cudaMemcpyDeviceToDevice, THCState_getCurrentStream(state))); if(self->flag & TH_STORAGE_FREEMEM) { THCudaCheck( (*self->allocator->free)(self->allocatorContext, self->data_ptr)); } } self->data_ptr = data; self->size = size; self->device = device; } }