void DeviceMatrix3D_copyFromDevice(const DeviceMatrix3D& self, float* dst) { if ((self.dim_x == 0) || (self.dim_y == 0) || (self.dim_t == 0)) { // Bail early if there is nothing to copy return; } if (self.pitch_t == self.dim_y * self.pitch_y) { // Shortcut if we're packed in the t direction const size_t widthInBytes = self.dim_x * sizeof(float); CUDA_SAFE_CALL_NO_SYNC (cudaMemcpy2D(dst, widthInBytes, self.data, self.pitch_y * sizeof(float), widthInBytes, self.dim_y * self.dim_t, cudaMemcpyDeviceToHost)); return; } // Do a series of copies to fill in the 3D array for (size_t t=0; t < self.dim_t; t++) { const size_t widthInBytes = self.dim_x * sizeof(float); float* host_start = dst + t * self.dim_y * self.dim_x; float* device_start = self.data + t * self.pitch_t; CUDA_SAFE_CALL_NO_SYNC (cudaMemcpy2D(host_start, widthInBytes, device_start, self.pitch_y * sizeof(float), widthInBytes, self.dim_y, cudaMemcpyDeviceToHost)); } }
void SimBufferCuda::Alloc(size_t size) { mSize = size; if (mAllocedSize > 0) { if (mAllocedSize == mSize) return; Free(); } cudaError_t result; switch (mBufferLocation) { case Host: result = mSimCudaAllocator->AllocateHost(&mPtr, mSize); break; case HostPinned: result = mSimCudaAllocator->AllocateHostPinned(&mPtr, mSize); break; case Device: result = mSimCudaAllocator->Allocate(&mPtr, mSize); break; } CUDA_SAFE_CALL_NO_SYNC(result) mAllocedSize = mSize; }
void DeviceMatrix_copyFromDevice(const DeviceMatrix& self, float* dst) { if ((self.width > 0) && (self.height > 0)) { const size_t widthInBytes = self.width * sizeof(float); CUDA_SAFE_CALL_NO_SYNC (cudaMemcpy2D(dst, widthInBytes, self.data, self.pitch * sizeof(float), widthInBytes, self.height, cudaMemcpyDeviceToHost)); } }
void DeviceMatrix_copyToDevice(DeviceMatrix& self, const float* data) { if ((self.width > 0) && (self.height > 0)) { const size_t widthInBytes = self.width * sizeof(float); CUDA_SAFE_CALL_NO_SYNC (cudaMemcpy2D(self.data, self.pitch * sizeof(float), data, widthInBytes, widthInBytes, self.height, cudaMemcpyHostToDevice)); } }
void DeviceMatrix3D_copyToDevice(DeviceMatrix3D& self, const float* data) { if ((self.dim_x > 0) && (self.dim_y > 0) && (self.dim_t > 0)) { const size_t widthInBytes = self.dim_x * sizeof(float); CUDA_SAFE_CALL_NO_SYNC (cudaMemcpy2D(self.data, self.pitch_y * sizeof(float), data, widthInBytes, widthInBytes, self.dim_y * self.dim_t, cudaMemcpyHostToDevice)); } }
void SimBufferCuda::Free() { if (mAllocedSize > 0) { cudaError_t result; switch (mBufferLocation) { case Host: result = mSimCudaAllocator->FreeHost(&mPtr); break; case HostPinned: result = mSimCudaAllocator->FreeHostPinned(&mPtr); break; case Device: result = mSimCudaAllocator->Free(&mPtr); break; } CUDA_SAFE_CALL_NO_SYNC(result) mAllocedSize = 0; } }