Ejemplo n.º 1
0
void DeviceMatrix3D_copyFromDevice(const DeviceMatrix3D& self, float* dst)
{
    if ((self.dim_x == 0) || (self.dim_y == 0) || (self.dim_t == 0)) {
        // Bail early if there is nothing to copy
        return;
    }

    if (self.pitch_t == self.dim_y * self.pitch_y) {
        // Shortcut if we're packed in the t direction
        const size_t widthInBytes = self.dim_x * sizeof(float);
        CUDA_SAFE_CALL_NO_SYNC
            (cudaMemcpy2D(dst, widthInBytes,
                          self.data, self.pitch_y * sizeof(float),
                          widthInBytes, self.dim_y * self.dim_t,
                          cudaMemcpyDeviceToHost));

        return;
    }

    // Do a series of copies to fill in the 3D array
    for (size_t t=0; t < self.dim_t; t++) {
        const size_t widthInBytes = self.dim_x * sizeof(float);
        float* host_start = dst + t * self.dim_y * self.dim_x;
        float* device_start = self.data + t * self.pitch_t;
        CUDA_SAFE_CALL_NO_SYNC
            (cudaMemcpy2D(host_start, widthInBytes,
                          device_start, self.pitch_y * sizeof(float),
                          widthInBytes, self.dim_y,
                          cudaMemcpyDeviceToHost));
    }
}
Ejemplo n.º 2
0
void SimBufferCuda::Alloc(size_t size)
{
    mSize = size;
    if (mAllocedSize > 0)
    {
        if (mAllocedSize == mSize)
            return;

        Free();
    }

    cudaError_t result;
    switch (mBufferLocation)
    {
    case Host:
        result = mSimCudaAllocator->AllocateHost(&mPtr, mSize);
        break;
    case HostPinned:
        result = mSimCudaAllocator->AllocateHostPinned(&mPtr, mSize);
        break;
    case Device:
        result = mSimCudaAllocator->Allocate(&mPtr, mSize);
        break;
    }

    CUDA_SAFE_CALL_NO_SYNC(result)
    mAllocedSize = mSize;
}
Ejemplo n.º 3
0
void DeviceMatrix_copyFromDevice(const DeviceMatrix& self, float* dst)
{
	if ((self.width > 0) && (self.height > 0)) {
		const size_t widthInBytes = self.width * sizeof(float);
		CUDA_SAFE_CALL_NO_SYNC
			(cudaMemcpy2D(dst, widthInBytes,
			self.data, self.pitch * sizeof(float),
			widthInBytes, self.height,
			cudaMemcpyDeviceToHost));
	}
}
Ejemplo n.º 4
0
void DeviceMatrix_copyToDevice(DeviceMatrix& self, const float* data)
{
	if ((self.width > 0) && (self.height > 0)) {
		const size_t widthInBytes = self.width * sizeof(float);
		CUDA_SAFE_CALL_NO_SYNC
			(cudaMemcpy2D(self.data, self.pitch * sizeof(float),
			data, widthInBytes,
			widthInBytes, self.height,
			cudaMemcpyHostToDevice));
	}
}
Ejemplo n.º 5
0
void DeviceMatrix3D_copyToDevice(DeviceMatrix3D& self, const float* data)
{
    if ((self.dim_x > 0) && (self.dim_y > 0) && (self.dim_t > 0)) {
        const size_t widthInBytes = self.dim_x * sizeof(float);
        CUDA_SAFE_CALL_NO_SYNC
        (cudaMemcpy2D(self.data, self.pitch_y * sizeof(float),
                      data, widthInBytes,
                      widthInBytes, self.dim_y * self.dim_t,
                      cudaMemcpyHostToDevice));
    }
}
Ejemplo n.º 6
0
void SimBufferCuda::Free()
{
    if (mAllocedSize > 0)
    {
        cudaError_t result;
        switch (mBufferLocation)
        {
        case Host:
            result = mSimCudaAllocator->FreeHost(&mPtr);
            break;
        case HostPinned:
            result = mSimCudaAllocator->FreeHostPinned(&mPtr);
            break;
        case Device:
            result = mSimCudaAllocator->Free(&mPtr);
            break;
        }

        CUDA_SAFE_CALL_NO_SYNC(result)
        mAllocedSize = 0;
    }
}