Ejemplo n.º 1
0
TransferFunction::~TransferFunction()
{
    if(compositeTex)
        CudaSafeCall(cudaDestroyTextureObject(compositeTex));

    CudaSafeCall(cudaFreeArray(array));
}
Ejemplo n.º 2
0
SingleParticle2dx::Methods::CUDAProjectionMethod::~CUDAProjectionMethod ()
{
	cudaDestroyTextureObject(m_texObj);
	cudaFreeArray(m_cuArray);
	cudaStreamDestroy(m_stream);
	delete[] m_matrix;
	delete m_t;
	
	free(res_data_h);
	cudaFree(res_data_d);
}
Ejemplo n.º 3
0
TEST(Malloc3DArray, Attributes) {
    struct cudaArray * ary;
    struct cudaChannelFormatDesc dsc;
    dsc.x = dsc.y = dsc.z = dsc.w = 8;
    dsc.f = cudaChannelFormatKindSigned;

    cudaError_t ret;

    ret = cudaMalloc3DArray(&ary, &dsc, make_cudaExtent(1, 1, 1), 0);
    ASSERT_EQ(cudaSuccess, ret);

    struct cudaPointerAttributes attr;
    ret = cudaPointerGetAttributes(&attr, ary);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
}
Ejemplo n.º 4
0
void CudaImagePyramidHost::clear() 
{
    if (!isInitialized()) {
        return;
    }

    // Don't bother unbinding the texture if everything is getting destroyed,
    // because it's likely that CUDA has already destroyed the texture.
    if (!_in_destructor) {
        unbindTexture();
    }

    cudaFreeArray(_storage);
    checkCUDAError("Free error", _name);

    _storage = NULL;
    _baseWidth = 0; _baseHeight = 0; _baseWidth = 0; _baseHeight = 0;
}
Ejemplo n.º 5
0
void DeleteCudaLayers()
{
	if(tempHostData)
		free(tempHostData);

	if(tempHostDataNoCuda)
		free(tempHostDataNoCuda);

	if(grid8ValTick)
		free(grid8ValTick);

	if(cuTempData)
		cudaFree(cuTempData);

	if(cuRandArr)
		cudaFree(cuRandArr);

	if(gCudaFuncWavePack)
		cudaFreeArray(gCudaFuncWavePack);

	if(gCudaFuncSmooth)
		cudaFreeArray(gCudaFuncSmooth);

	if(gCudaVectArray)
		cudaFreeArray(gCudaVectArray);

	if(gCudaFlArray)
		cudaFreeArray(gCudaFlArray);

	if(gVectorLayer)
			cudaFree(gVectorLayer);

	if(gRedBlueField)
		cudaFree(gRedBlueField);

	for (int k = 0; k < MAX_LAYERS; k++)
	{
		if(gCudaLayer[k])
			cudaFreeArray(gCudaLayer[k]);

		if(gCudaFuncLayer[k])
			cudaFreeArray(gCudaFuncLayer[k]);

		if(gPhysLayer[k])
			cudaFree(gPhysLayer[k]);

		if(gStateLayer[k])
			cudaFree(gStateLayer[k]);
	}
}
TEST(PointerGetAttributes, Array) {
    struct cudaArray * ary;
    cudaError_t ret;

    struct cudaChannelFormatDesc dsc;
    dsc.x = dsc.y = dsc.z = dsc.w = 8;
    dsc.f = cudaChannelFormatKindSigned;

    int device;
    ret = cudaGetDevice(&device);
    ASSERT_EQ(cudaSuccess, ret);

    ret = cudaMallocArray(&ary, &dsc, 1, 1, 0);
    ASSERT_EQ(cudaSuccess, ret);

    struct cudaPointerAttributes attr;
    ret = cudaPointerGetAttributes(&attr, ary);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaFreeArray(ary);
    ASSERT_EQ(cudaSuccess, ret);
}
Ejemplo n.º 7
0
////////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv)
{
    float
    *h_Kernel,
    *h_Input,
    *h_Buffer,
    *h_OutputCPU,
    *h_OutputGPU;

    cudaArray
    *a_Src;

    cudaChannelFormatDesc floatTex = cudaCreateChannelDesc<float>();

    float
    *d_Output;

    float
    gpuTime;

    StopWatchInterface *hTimer = NULL;

    const int imageW = 3072;
    const int imageH = 3072 / 2;
    const unsigned int iterations = 10;

    printf("[%s] - Starting...\n", argv[0]);

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
    findCudaDevice(argc, (const char **)argv);

    sdkCreateTimer(&hTimer);

    printf("Initializing data...\n");
    h_Kernel    = (float *)malloc(KERNEL_LENGTH * sizeof(float));
    h_Input     = (float *)malloc(imageW * imageH * sizeof(float));
    h_Buffer    = (float *)malloc(imageW * imageH * sizeof(float));
    h_OutputCPU = (float *)malloc(imageW * imageH * sizeof(float));
    h_OutputGPU = (float *)malloc(imageW * imageH * sizeof(float));
    checkCudaErrors(cudaMallocArray(&a_Src, &floatTex, imageW, imageH));
    checkCudaErrors(cudaMalloc((void **)&d_Output, imageW * imageH * sizeof(float)));

    srand(2009);

    for (unsigned int i = 0; i < KERNEL_LENGTH; i++)
    {
        h_Kernel[i] = (float)(rand() % 16);
    }

    for (unsigned int i = 0; i < imageW * imageH; i++)
    {
        h_Input[i] = (float)(rand() % 16);
    }

    setConvolutionKernel(h_Kernel);
    checkCudaErrors(cudaMemcpyToArray(a_Src, 0, 0, h_Input, imageW * imageH * sizeof(float), cudaMemcpyHostToDevice));


    printf("Running GPU rows convolution (%u identical iterations)...\n", iterations);
    checkCudaErrors(cudaDeviceSynchronize());
    sdkResetTimer(&hTimer);
    sdkStartTimer(&hTimer);

    for (unsigned int i = 0; i < iterations; i++)
    {
        convolutionRowsGPU(
            d_Output,
            a_Src,
            imageW,
            imageH
        );
    }

    checkCudaErrors(cudaDeviceSynchronize());
    sdkStopTimer(&hTimer);
    gpuTime = sdkGetTimerValue(&hTimer) / (float)iterations;
    printf("Average convolutionRowsGPU() time: %f msecs; //%f Mpix/s\n", gpuTime, imageW * imageH * 1e-6 / (0.001 * gpuTime));

    //While CUDA kernels can't write to textures directly, this copy is inevitable
    printf("Copying convolutionRowGPU() output back to the texture...\n");
    checkCudaErrors(cudaDeviceSynchronize());
    sdkResetTimer(&hTimer);
    sdkStartTimer(&hTimer);
    checkCudaErrors(cudaMemcpyToArray(a_Src, 0, 0, d_Output, imageW * imageH * sizeof(float), cudaMemcpyDeviceToDevice));
    checkCudaErrors(cudaDeviceSynchronize());
    sdkStopTimer(&hTimer);
    gpuTime = sdkGetTimerValue(&hTimer);
    printf("cudaMemcpyToArray() time: %f msecs; //%f Mpix/s\n", gpuTime, imageW * imageH * 1e-6 / (0.001 * gpuTime));

    printf("Running GPU columns convolution (%i iterations)\n", iterations);
    checkCudaErrors(cudaDeviceSynchronize());
    sdkResetTimer(&hTimer);
    sdkStartTimer(&hTimer);

    for (int i = 0; i < iterations; i++)
    {
        convolutionColumnsGPU(
            d_Output,
            a_Src,
            imageW,
            imageH
        );
    }

    checkCudaErrors(cudaDeviceSynchronize());
    sdkStopTimer(&hTimer);
    gpuTime = sdkGetTimerValue(&hTimer) / (float)iterations;
    printf("Average convolutionColumnsGPU() time: %f msecs; //%f Mpix/s\n", gpuTime, imageW * imageH * 1e-6 / (0.001 * gpuTime));

    printf("Reading back GPU results...\n");
    checkCudaErrors(cudaMemcpy(h_OutputGPU, d_Output, imageW * imageH * sizeof(float), cudaMemcpyDeviceToHost));

    printf("Checking the results...\n");
    printf("...running convolutionRowsCPU()\n");
    convolutionRowsCPU(
        h_Buffer,
        h_Input,
        h_Kernel,
        imageW,
        imageH,
        KERNEL_RADIUS
    );

    printf("...running convolutionColumnsCPU()\n");
    convolutionColumnsCPU(
        h_OutputCPU,
        h_Buffer,
        h_Kernel,
        imageW,
        imageH,
        KERNEL_RADIUS
    );

    double delta = 0;
    double sum = 0;

    for (unsigned int i = 0; i < imageW * imageH; i++)
    {
        sum += h_OutputCPU[i] * h_OutputCPU[i];
        delta += (h_OutputGPU[i] - h_OutputCPU[i]) * (h_OutputGPU[i] - h_OutputCPU[i]);
    }

    double L2norm = sqrt(delta / sum);
    printf("Relative L2 norm: %E\n", L2norm);
    printf("Shutting down...\n");

    checkCudaErrors(cudaFree(d_Output));
    checkCudaErrors(cudaFreeArray(a_Src));
    free(h_OutputGPU);
    free(h_Buffer);
    free(h_Input);
    free(h_Kernel);

    sdkDeleteTimer(&hTimer);

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();

    if (L2norm > 1e-6)
    {
        printf("Test failed!\n");
        exit(EXIT_FAILURE);
    }

    printf("Test passed\n");
    exit(EXIT_SUCCESS);
}
Ejemplo n.º 8
0
 __host__
 inline ~TextureArray()
 {
   SHAKTI_SAFE_CUDA_CALL(cudaFreeArray(_array));
 }
Ejemplo n.º 9
0
TEST(Malloc3DArray, NullArguments) {
    struct cudaArray * ary;
    struct cudaChannelFormatDesc dsc;
    dsc.x = dsc.y = dsc.z = dsc.w = 8;
    dsc.f = cudaChannelFormatKindSigned;

    // Commented out cases segfault.

    cudaError_t ret;
    ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(0, 0, 0), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(0, 0, 8), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(0, 8, 0), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(0, 8, 8), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    // ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(8, 0, 0), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);
    ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(8, 0, 8), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);
    // ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(8, 8, 0), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);
    // ret = cudaMalloc3DArray(NULL, NULL, make_cudaExtent(8, 8, 8), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(0, 0, 0), 0);
    EXPECT_EQ(cudaSuccess, ret);
    ret = cudaFreeArray(ary);
    EXPECT_EQ(cudaSuccess, ret);
    ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(0, 0, 8), 0);
    EXPECT_EQ(cudaSuccess, ret);
    ret = cudaFreeArray(ary);
    EXPECT_EQ(cudaSuccess, ret);

    ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(0, 8, 0), 0);
    EXPECT_EQ(cudaSuccess, ret);
    ret = cudaFreeArray(ary);
    EXPECT_EQ(cudaSuccess, ret);

    ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(0, 8, 8), 0);
    EXPECT_EQ(cudaSuccess, ret);
    ret = cudaFreeArray(ary);
    EXPECT_EQ(cudaSuccess, ret);

    // ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(8, 0, 0), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);

    /**
     * There's no reason why this should pass...

    ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(8, 0, 8), 0);
    EXPECT_EQ(cudaSuccess, ret);
    ret = cudaFreeArray(ary);
    EXPECT_EQ(cudaSuccess, ret);
     */

    // ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(8, 8, 0), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);
    // ret = cudaMalloc3DArray(&ary, NULL, make_cudaExtent(8, 8, 8), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(0, 0, 0), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(0, 0, 8), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(0, 8, 0), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(0, 8, 8), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);

    // ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(8, 0, 0), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);
    ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(8, 0, 8), 0);
    EXPECT_EQ(cudaErrorInvalidValue, ret);
    // ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(8, 8, 0), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);
    // ret = cudaMalloc3DArray(NULL, &dsc, make_cudaExtent(8, 8, 8), 0);
    // EXPECT_EQ(cudaErrorInvalidValue, ret);
}
Ejemplo n.º 10
0
TEST(Malloc3DArray, Limits) {
    struct cudaArray * ary;
    struct cudaChannelFormatDesc dsc;
    dsc.x = dsc.y = dsc.z = dsc.w = 8;
    dsc.f = cudaChannelFormatKindSigned;

    cudaError_t ret;

    ret = cudaMalloc3DArray(&ary, &dsc,
        make_cudaExtent(0,         0,          0),        0);
    EXPECT_EQ(cudaSuccess, ret);
    if (ret == cudaSuccess) {
        EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
    }

    int device;
    ret = cudaGetDevice(&device);
    ASSERT_EQ(cudaSuccess, ret);

    struct cudaDeviceProp prop;
    ret = cudaGetDeviceProperties(&prop, device);
    ASSERT_EQ(cudaSuccess, ret);

    /* Adapt to what's available by a safe margin */
    size_t targetable = prop.totalGlobalMem / 8;

    if ((size_t) prop.maxTexture1D < targetable) {
        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture1D, 0, 0), 0);
        EXPECT_EQ(cudaSuccess, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }

        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture1D + 1, 0, 0), 0);
        EXPECT_EQ(cudaErrorInvalidValue, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }
    }

    if ((size_t) prop.maxTexture2D[0] < targetable) {
        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0],     1, 0), 0);
        EXPECT_EQ(cudaSuccess, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }

        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0] + 1, 1, 0), 0);
        EXPECT_EQ(cudaErrorInvalidValue, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }
    }

    if ((size_t) prop.maxTexture2D[1] < targetable) {
        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(1, prop.maxTexture2D[1],     0), 0);
        EXPECT_EQ(cudaSuccess, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }

        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(1, prop.maxTexture2D[1] + 1, 0), 0);
        EXPECT_EQ(cudaErrorInvalidValue, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }
    }

    if ((size_t) prop.maxTexture2D[0] * prop.maxTexture2D[1] < targetable) {
        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0],
                prop.maxTexture2D[1], 0), 0);
        EXPECT_EQ(cudaSuccess, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }

        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0],
                prop.maxTexture2D[1] + 1, 0), 0);
        EXPECT_EQ(cudaErrorInvalidValue, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }

        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0] + 1,
                prop.maxTexture2D[1], 0), 0);
        EXPECT_EQ(cudaErrorInvalidValue, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }

        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0] + 1,
                prop.maxTexture2D[1] + 1, 0), 0);
        EXPECT_EQ(cudaErrorInvalidValue, ret);
        if (ret == cudaSuccess) {
            EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
        }
    } else if ((size_t) prop.maxTexture2D[0] * prop.maxTexture2D[1] >
            prop.totalGlobalMem) {
        ret = cudaMalloc3DArray(&ary, &dsc,
            make_cudaExtent(prop.maxTexture2D[0], prop.maxTexture2D[1], 0), 0);
        EXPECT_EQ(cudaErrorMemoryAllocation, ret);
    }

    ret = cudaMalloc3DArray(&ary, &dsc,
        make_cudaExtent(1,         1,         1),        0);
    EXPECT_EQ(cudaSuccess, ret);
    if (ret == cudaSuccess) {
        EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
    }

    ret = cudaMalloc3DArray(&ary, &dsc,
        make_cudaExtent(64,        64,        64),     0);
    EXPECT_EQ(cudaSuccess, ret);
    if (ret == cudaSuccess) {
        EXPECT_EQ(cudaSuccess, cudaFreeArray(ary));
    }

    /* TODO:  More 3D tests. */
}
Ejemplo n.º 11
0
 void deallocate(){
     cudaDestroyTextureObject(texObj);
     cudaFreeArray(cuArray);
     
 }
Ejemplo n.º 12
0
CudaFloatTexture1D::~CudaFloatTexture1D()
{
    CUDA_SAFE_CALL(cudaFreeArray(deviceArray));
    CUDA_SAFE_CALL(cudaFreeHost(hostMem));
}
Ejemplo n.º 13
0
 //! Destructor releases array
 ~CTfactory( void ) {
     if( this->dca_data != NULL ) {
         // Actually, shouldn't exist without an array...
         CUDA_SAFE_CALL( cudaFreeArray( this->dca_data ) );
     }
 }
Ejemplo n.º 14
0
cudaError_t WINAPI wine_cudaFreeArray( struct cudaArray* array ) {
    WINE_TRACE("\n");
    return cudaFreeArray( array );
}