コード例 #1
0
void runAutoTest(int argc, char **argv)
{
	int devID = 0;
    printf("[%s] - (automated testing w/ readback)\n", sSDKsample);

	devID = cutilChooseCudaDevice(argc, argv);

    // First load the image, so we know what the size of the image (imageW and imageH)
    printf("Allocating host and CUDA memory and loading image file...\n");
    const char *image_path = cutFindFilePath("portrait_noise.bmp", argv[0]);
    if (image_path == NULL) {
       printf( "imageDenoisingGL was unable to find and load image file <portrait_noise.bmp>.\nExiting...\n");
       shrQAFinishExit(argc, (const char **)argv, QA_FAILED);
    }
    LoadBMPFile(&h_Src, &imageW, &imageH, image_path);
    printf("Data init done.\n");

    cutilSafeCall( CUDA_MallocArray(&h_Src, imageW, imageH) );

    g_CheckRender       = new CheckBackBuffer(imageW, imageH, sizeof(TColor), false);
    g_CheckRender->setExecPath(argv[0]);

    TColor *d_dst = NULL;
    cutilSafeCall( cudaMalloc( (void **)&d_dst, imageW*imageH*sizeof(TColor)) );

    while (g_Kernel <= 3) {
        printf("[AutoTest]: %s <%s>\n", sSDKsample, filterMode[g_Kernel]);
        cutilSafeCall( CUDA_Bind2TextureArray()                      );
        runImageFilters(d_dst);
        cutilSafeCall( CUDA_UnbindTexture()     );
        cutilSafeCall( cutilDeviceSynchronize() );
        cudaMemcpy(g_CheckRender->imageData(), d_dst, imageW*imageH*sizeof(TColor), cudaMemcpyDeviceToHost);
        g_CheckRender->savePPM(sOriginal[g_Kernel], true, NULL);

        if (!g_CheckRender->PPMvsPPM(sOriginal[g_Kernel], sReference[g_Kernel], MAX_EPSILON_ERROR, 0.15f)) {
            g_TotalErrors++;
        }
        g_Kernel++;
    }

    cutilSafeCall( CUDA_FreeArray() );
    free(h_Src);

    cutilSafeCall( cudaFree( d_dst ) );
    delete g_CheckRender;

	printf("\n[%s] -> Test Results: %d errors\n", sSDKsample, g_TotalErrors);

	cutilDeviceReset();
	shrQAFinishExit(argc, (const char **)argv, (!g_TotalErrors ? QA_PASSED : QA_FAILED));
}
コード例 #2
0
////////////////////////////////////////////////////////////////////////////////
//! Run test
////////////////////////////////////////////////////////////////////////////////
void runAutoTest(int argc, char** argv)
{
    printf("[%s]\n", sSDKsample);

    // Cuda init
	int dev = cutilChooseCudaDevice(argc, argv);

    cudaDeviceProp deviceProp;
    cutilSafeCall(cudaGetDeviceProperties(&deviceProp, dev));
    printf("Compute capability %d.%d\n", deviceProp.major, deviceProp.minor);
    int version = deviceProp.major*10 + deviceProp.minor;
    g_hasDouble = (version >= 13);
    if (inEmulationMode()) {
        // workaround since SM13 kernel doesn't produce correct output in emulation mode
        g_hasDouble = false;
    }

    // create FFT plan
    CUFFT_SAFE_CALL(cufftPlan2d(&fftPlan, meshW, meshH, CUFFT_C2R) );

    // allocate memory
    fftInputW = (meshW / 2)+1;
    fftInputH = meshH;
    fftInputSize = (fftInputW*fftInputH)*sizeof(float2);

    cutilSafeCall(cudaMalloc((void **)&d_h0, fftInputSize) );
    cutilSafeCall(cudaMalloc((void **)&d_ht, fftInputSize) );
    h_h0 = (float2 *) malloc(fftInputSize);
    generate_h0();
    cutilSafeCall(cudaMemcpy(d_h0, h_h0, fftInputSize, cudaMemcpyHostToDevice) );

    cutilSafeCall(cudaMalloc((void **)&d_slope, meshW*meshH*sizeof(float2)) );

    cutCreateTimer(&timer);
    cutStartTimer(timer);
    prevTime = cutGetTimerValue(timer);

    // Creating the Auto-Validation Code
    g_CheckRender = new CheckBackBuffer(windowH, windowH, 4, false);
    g_CheckRender->setPixelFormat(GL_RGBA);
    g_CheckRender->setExecPath(argv[0]);
    g_CheckRender->EnableQAReadback(true);

    runCudaTest(g_hasDouble);
    cudaThreadExit();
}
コード例 #3
0
ファイル: boxFilter.cpp プロジェクト: yyzreal/gpuocelot
////////////////////////////////////////////////////////////////////////////////
//! Run a simple benchmark test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runBenchmark( int argc, char **argv )
{
    int devID = 0;
    shrLog("[runBenchmark]: [%s]\n", sSDKsample);
    devID = cutilChooseCudaDevice(argc, argv);

    loadImageData(argc, argv);

    initCuda();

    g_CheckRender       = new CheckBackBuffer(width, height, 4, false);
    g_CheckRender->setExecPath(argv[0]);

    unsigned int *d_result;
    cutilSafeCall( cudaMalloc( (void **)&d_result, width*height*sizeof(unsigned int)) );

    // warm-up
    boxFilterRGBA(d_img, d_temp, d_temp, width, height, filter_radius, iterations, nthreads);
    cutilSafeCall( cutilDeviceSynchronize() );

    // Start round-trip timer and process iCycles loops on the GPU
    iterations = 1;     // standard 1-pass filtering
    const int iCycles = 150;
    double dProcessingTime = 0.0;
    shrLog("\nRunning BoxFilterGPU for %d cycles...\n\n", iCycles);
    shrDeltaT(2);
    for (int i = 0; i < iCycles; i++)
    {
        dProcessingTime += boxFilterRGBA(d_img, d_temp, d_img, width, height, filter_radius, iterations, nthreads);
    }

    // check if kernel execution generated an error and sync host
    cutilCheckMsg("Error: boxFilterRGBA Kernel execution FAILED");
    cutilSafeCall(cutilDeviceSynchronize());

    // Get average computation time
    dProcessingTime /= (double)iCycles;

    // log testname, throughput, timing and config info to sample and master logs
    shrLogEx(LOGBOTH | MASTER, 0, "boxFilter-texture, Throughput = %.4f M RGBA Pixels/s, Time = %.5f s, Size = %u RGBA Pixels, NumDevsUsed = %u, Workgroup = %u\n",
             (1.0e-6 * width * height)/dProcessingTime, dProcessingTime,
             (width * height), 1, nthreads);
    shrLog("\n");
}
コード例 #4
0
void runAutoTest(int argc, char **argv)
{
    int devID = 0;
    shrLog("[runAutoTest]: [%s] (automated testing w/ readback)\n", sSDKsample);

    devID = cutilChooseCudaDevice(argc, argv);

    loadImageData(argc, argv);

    initCuda();

    g_CheckRender       = new CheckBackBuffer(width, height, 4, false);
    g_CheckRender->setExecPath(argv[0]);

    unsigned int *d_result;
    cutilSafeCall( cudaMalloc( (void **)&d_result, width*height*sizeof(unsigned int)) );

    for(int i = 0; i < 4; i++)
    {
        shrLog("[AutoTest]: %s (radius=%d)", sSDKsample, filter_radius );
        bilateralFilterRGBA(d_result, width, height, euclidean_delta, filter_radius, iterations, nthreads);

        // check if kernel execution generated an error
        cutilCheckMsg("Error: bilateralFilterRGBA Kernel execution FAILED");
        cutilSafeCall( cutilDeviceSynchronize() );
        cudaMemcpy(g_CheckRender->imageData(), d_result, width*height*sizeof(unsigned int), cudaMemcpyDeviceToHost);

        g_CheckRender->savePPM(sOriginal[i], false, NULL);

        if (!g_CheckRender->PPMvsPPM(sOriginal[i], sReference[i], MAX_EPSILON_ERROR, 0.15f)) {
            g_TotalErrors++;
        }
        gaussian_delta += 1.0f;
        euclidean_delta *= 1.25f;

		updateGaussian(gaussian_delta, filter_radius);
    }

    cutilSafeCall( cudaFree( d_result ) );
    delete g_CheckRender;
}
コード例 #5
0
void runAutoTest(int argc, char **argv)
{
	int devID = cutilChooseCudaDevice(argc, argv);

    // Initialize CUDA buffers for Marching Cubes 
    initMC(argc, argv);

    g_CheckRender = new CheckBackBuffer(maxVerts*sizeof(float)*4, 1, 1, false);
    g_CheckRender->setPixelFormat(GL_RGBA);
    g_CheckRender->setExecPath(argv[0]);
    g_CheckRender->EnableQAReadback(true);

    computeIsosurface();

    if (g_bQAReadback)
    {
        dumpFile<float4>(d_pos,          maxVerts, sizeof(float4), "marchCube_posArray.bin");
        dumpFile<float4>(d_normal,       maxVerts, sizeof(float4), "marchCube_normalArray.bin");
        dumpFile<uint>(d_compVoxelArray, numVoxels, sizeof(uint),  "marchCube_compVoxelArray.bin");

        if (!g_CheckRender->compareBin2BinFloat("marchCube_posArray.bin",      "posArray.bin",       maxVerts*sizeof(float)*4, EPSILON, THRESHOLD))
           g_TotalErrors++;

        if (!g_CheckRender->compareBin2BinFloat("marchCube_normalArray.bin",   "normalArray.bin",    maxVerts*sizeof(float)*4, EPSILON, THRESHOLD))
           g_TotalErrors++;

	//printf("sizeof(uint) = %d\n", sizeof(uint));

        if (!g_CheckRender->compareBin2BinFloat("marchCube_compVoxelArray.bin", "compVoxelArray.bin", numVoxels*sizeof(uint), EPSILON, THRESHOLD))
           g_TotalErrors++;

        printf("%s\n", (g_TotalErrors > 0) ? "FAILED" : "PASSED");
    }

    cleanup();
    cudaThreadExit();
}
コード例 #6
0
ファイル: main2.cpp プロジェクト: autumnm1981/Halide
int main( int argc, char* argv[] )
{
	int argc2 = 2;
	char* argv2[2] = { "", "-device=1" };

	cudaDeviceProp deviceProp;
    int devID = cutilChooseCudaDevice( argc2, argv2 );
    if( devID < 0 )
	{
       printf( "exiting...\n" );
       cutilExit( argc, argv );
       exit( 0 );
    }
    cutilSafeCall( cudaGetDeviceProperties( &deviceProp, devID ) );

	//Image4f im( "c:/tmp/tulip.png" );
	//Image4f im( "c:/tmp/tulip_1080.png" ); // Jiawen version
	Image4f im( "../../apps/bilateral_grid/input.png" );
	//Image4f im( "c:/tmp/church_panorama_5097x2889.pfm" );

	im = im.flipUD();

	Array2D< float > data( im.width(), im.height() );
	Array2D< float > output( im.width(), im.height() );

	for( int y = 0; y < im.height(); ++y )
	{
		for( int x = 0; x < im.width(); ++x )
		{
			Vector3f rgb = im.pixel( x, y ).xyz();
            // float lum = ColorUtils::rgb2luminance( rgb );
            // data( x, y ) = lum;
            // jrk: just use red
            data( x, y ) = rgb[0];
		}
	}

	testBilateralFilter( data, 8, 0.1f, output );
	saveArrayAsImage( output, "bf", 8, 0.1f );
	testBilateralFilter( data, 16, 0.1f, output );
	saveArrayAsImage( output, "bf", 16, 0.1f );
	testBilateralFilter( data, 32, 0.2f, output );
	saveArrayAsImage( output, "bf", 32, 0.2f );
	testBilateralFilter( data, 64, 0.4f, output );
	saveArrayAsImage( output, "bf", 64, 0.4f );
    
#if 0
	Image4f edgeImage( "/tmp/step.png" );
	edgeImage.flipUD();

	Array2D< float > edge( im.width(), im.height() );
	for( int y = 0; y < im.height(); ++y )
	{
		for( int x = 0; x < im.width(); ++x )
		{
			edge( x, y ) = edgeImage.pixel( x, y ).x;
		}
	}

	testCrossBilateralFilter( data, edge, 16, 0.1f, output );
	saveArrayAsImage( output, "cbf", 16, 0.1f );
	testCrossBilateralFilter( data, edge, 32, 0.2f, output );
	saveArrayAsImage( output, "cbf", 32, 0.2f );
	testCrossBilateralFilter( data, edge, 64, 0.4f, output );
	saveArrayAsImage( output, "cbf", 64, 0.4f );
#endif

	return 0;
}