Example #1
0
void 
CheckRender::savePPM(  const char *zfilename, bool bInvert, void **ppReadBuf )
{
    if (zfilename != NULL) {
        if (bInvert) {
            unsigned char *readBuf;
            unsigned char *writeBuf= (unsigned char *)malloc(m_Width * m_Height * m_Bpp);

            for (unsigned int y=0; y < m_Height; y++) {
                if (ppReadBuf) {
                    readBuf = *(unsigned char **)ppReadBuf;
                } else {
                    readBuf = (unsigned char *)m_pImageData;
                }
                memcpy(&writeBuf[m_Width*m_Bpp*y], (readBuf+ m_Width*m_Bpp*(m_Height-1-y)), m_Width*m_Bpp);
            }
            // we copy the results back to original system buffer
            if (ppReadBuf) {
                memcpy(*ppReadBuf, writeBuf, m_Width*m_Height*m_Bpp);
            } else {
                memcpy(m_pImageData, writeBuf, m_Width*m_Height*m_Bpp);
            }
            free (writeBuf);
        }
        printf("> Saving PPM: <%s>\n", zfilename);
        if (ppReadBuf) {
		    sdkSavePPM4ub(zfilename, *(unsigned char **)ppReadBuf, m_Width, m_Height);
        } else {
		    sdkSavePPM4ub(zfilename, (unsigned char *)m_pImageData, m_Width, m_Height);
        }
    }
}
void runAutoTest(int argc, char **argv, const char *filename, int kernel_param)
{
    printf("[%s] - (automated testing w/ readback)\n", sSDKsample);

    int devID = findCudaDevice(argc, (const char **)argv);

    // First load the image, so we know what the size of the image (imageW and imageH)
    printf("Allocating host and CUDA memory and loading image file...\n");
    const char *image_path = sdkFindFilePath("portrait_noise.bmp", argv[0]);

    if (image_path == NULL)
    {
        printf("imageDenoisingGL was unable to find and load image file <portrait_noise.bmp>.\nExiting...\n");
        exit(EXIT_FAILURE);
    }

    LoadBMPFile(&h_Src, &imageW, &imageH, image_path);
    printf("Data init done.\n");

    checkCudaErrors(CUDA_MallocArray(&h_Src, imageW, imageH));

    TColor *d_dst = NULL;
    unsigned char *h_dst = NULL;
    checkCudaErrors(cudaMalloc((void **)&d_dst, imageW*imageH*sizeof(TColor)));
    h_dst = (unsigned char *)malloc(imageH*imageW*4);

    {
        g_Kernel = kernel_param;
        printf("[AutoTest]: %s <%s>\n", sSDKsample, filterMode[g_Kernel]);
        checkCudaErrors(CUDA_Bind2TextureArray());
        runImageFilters(d_dst);
        checkCudaErrors(CUDA_UnbindTexture());
        checkCudaErrors(cudaDeviceSynchronize());

        checkCudaErrors(cudaMemcpy(h_dst, d_dst, imageW*imageH*sizeof(TColor), cudaMemcpyDeviceToHost));
        sdkSavePPM4ub(filename, h_dst, imageW, imageH);
    }

    checkCudaErrors(CUDA_FreeArray());
    free(h_Src);

    checkCudaErrors(cudaFree(d_dst));
    free(h_dst);

    printf("\n[%s] -> Kernel %d, Saved: %s\n", sSDKsample, kernel_param, filename);

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();
    exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}
Example #3
0
// This test specifies a single test (where you specify radius and/or iterations)
int runSingleTest(char *ref_file, char *exec_path)
{
    int nTotalErrors = 0;
    char dump_file[256];

    printf("[runSingleTest]: [%s]\n", sSDKsample);

    initCuda();

    unsigned int *dResult;
    unsigned int *hResult = (unsigned int *)malloc(width * height * sizeof(unsigned int));
    size_t pitch;
    checkCudaErrors(cudaMallocPitch((void **)&dResult, &pitch, width*sizeof(unsigned int), height));

    // run the sample radius
    {
        printf("%s (radius=%d) (passes=%d) ", sSDKsample, filter_radius, iterations);
        bilateralFilterRGBA(dResult, width, height, euclidean_delta, filter_radius, iterations, kernel_timer);

        // check if kernel execution generated an error
        getLastCudaError("Error: bilateralFilterRGBA Kernel execution FAILED");
        checkCudaErrors(cudaDeviceSynchronize());

        // readback the results to system memory
        cudaMemcpy2D(hResult, sizeof(unsigned int)*width, dResult, pitch,
                     sizeof(unsigned int)*width, height, cudaMemcpyDeviceToHost);

        sprintf(dump_file, "nature_%02d.ppm", filter_radius);

        sdkSavePPM4ub((const char *)dump_file, (unsigned char *)hResult, width, height);

        if (!sdkComparePPM(dump_file, sdkFindFilePath(ref_file, exec_path), MAX_EPSILON_ERROR, 0.15f, false))
        {
            printf("Image is Different ");
            nTotalErrors++;
        }
        else
        {
            printf("Image is Matching ");
        }

        printf(" <%s>\n", ref_file);
    }
    printf("\n");

    free(hResult);
    checkCudaErrors(cudaFree(dResult));

    return nTotalErrors;
}
Example #4
0
HRESULT CheckRenderD3D10::ResourceToPPM(ID3D10Device *pDevice, ID3D10Resource *pResource, const char *zFileName)
{
    D3D10_RESOURCE_DIMENSION rType;
    pResource->GetType(&rType);

    if (rType != D3D10_RESOURCE_DIMENSION_TEXTURE2D)
    {
        printf("SurfaceToPPM: pResource is not a 2D texture! Aborting...\n");
        return E_FAIL;
    }

    ID3D10Texture2D *pSourceTexture = (ID3D10Texture2D *)pResource;
    ID3D10Texture2D *pTargetTexture = NULL;

    D3D10_TEXTURE2D_DESC desc;
    pSourceTexture->GetDesc(&desc);
    desc.BindFlags = 0;
    desc.CPUAccessFlags = D3D10_CPU_ACCESS_READ;
    desc.Usage = D3D10_USAGE_STAGING;

    if (FAILED(pDevice->CreateTexture2D(&desc,NULL,&pTargetTexture)))
    {
        printf("SurfaceToPPM: Unable to create target Texture resoruce! Aborting... \n");
        return E_FAIL;
    }

    pDevice->CopyResource(pTargetTexture,pSourceTexture);

    D3D10_MAPPED_TEXTURE2D mappedTex2D;
    pTargetTexture->Map(0,D3D10_MAP_READ,0,&mappedTex2D);

    // Need to convert from dx pitch to pitch=width
    unsigned char *pPPMData = new unsigned char[desc.Width*desc.Height*4];

    for (unsigned int iHeight = 0; iHeight<desc.Height; iHeight++)
    {
        memcpy(&(pPPMData[iHeight*desc.Width*4]),(unsigned char *)(mappedTex2D.pData)+iHeight*mappedTex2D.RowPitch,desc.Width*4);
    }

    pTargetTexture->Unmap(0);

    // Prepends the PPM header info and bumps byte data afterwards
    sdkSavePPM4ub(zFileName, pPPMData, desc.Width, desc.Height);

    delete [] pPPMData;
    pTargetTexture->Release();

    return S_OK;
}
bool
runSingleTest(const char *ref_file, const char *exec_path)
{
    // allocate memory for result
    int nTotalErrors = 0;
    unsigned int *d_result;
    unsigned int size = width * height * sizeof(unsigned int);
    checkCudaErrors(cudaMalloc((void **) &d_result, size));

    // warm-up
    gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads);

    checkCudaErrors(cudaDeviceSynchronize());
    sdkStartTimer(&timer);

    gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads);
    checkCudaErrors(cudaDeviceSynchronize());
    getLastCudaError("Kernel execution failed");
    sdkStopTimer(&timer);

    unsigned char *h_result = (unsigned char *)malloc(width*height*4);
    checkCudaErrors(cudaMemcpy(h_result, d_result, width*height*4, cudaMemcpyDeviceToHost));

    char dump_file[1024];
    sprintf(dump_file, "lena_%02d.ppm", (int)sigma);
    sdkSavePPM4ub(dump_file, h_result, width, height);

    if (!sdkComparePPM(dump_file, sdkFindFilePath(ref_file, exec_path), MAX_EPSILON_ERROR, THRESHOLD, false))
    {
        nTotalErrors++;
    }

    printf("Processing time: %f (ms)\n", sdkGetTimerValue(&timer));
    printf("%.2f Mpixels/sec\n", (width*height / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6);

    checkCudaErrors(cudaFree(d_result));
    free(h_result);

    printf("Summary: %d errors!\n", nTotalErrors);

    printf(nTotalErrors == 0 ? "Test passed\n": "Test failed!\n");
    return (nTotalErrors == 0);
}
void runAutoTest(int argc, char **argv, const char *dump_filename, eFilterMode filter_mode)
{
    cudaDeviceProp deviceProps;

    int devID = findCudaDevice(argc, (const char **)argv);

    checkCudaErrors(cudaGetDeviceProperties(&deviceProps, devID));

    printf("[%s] (automated testing w/ readback)\n", sSDKsample);
    printf("CUDA device [%s] has %d Multi-Processors\n", deviceProps.name, deviceProps.multiProcessorCount);

    loadImageData(argc, argv);

    uchar4 *d_output;
    checkCudaErrors(cudaMalloc((void **)&d_output, imageWidth*imageHeight*4));
    unsigned int *h_result = (unsigned int *)malloc(width * height * sizeof(unsigned int));

    printf("AutoTest: %s Filter Mode: <%s>\n", sSDKsample, sFilterMode[g_FilterMode]);

    render(imageWidth, imageHeight,
           tx, ty, scale, cx, cy,
           blockSize, gridSize, filter_mode, d_output);

    // check if kernel execution generated an error
    getLastCudaError("Error: render (bicubicTexture) Kernel execution FAILED");
    checkCudaErrors(cudaDeviceSynchronize());

    cudaMemcpy(h_result, d_output, imageWidth*imageHeight*4, cudaMemcpyDeviceToHost);

    sdkSavePPM4ub(dump_filename, (unsigned char *)h_result, imageWidth, imageHeight);

    checkCudaErrors(cudaFree(d_output));
    free(h_result);

    // cudaDeviceReset causes the driver to clean up all state. While
    // not mandatory in normal operation, it is good practice.  It is also
    // needed to ensure correct operation when the application is being
    // profiled. Calling cudaDeviceReset causes all profile data to be
    // flushed before the application exits
    cudaDeviceReset();
}
//////////////////////////////////////////////////////////////////////////
// AUTOMATIC TESTING
void runSingleTest(const char *ref_file, const char *exec_path)
{
    uint *d_output;
    checkCudaErrors(cudaMalloc((void **)&d_output, width*height*sizeof(uint)));
    checkCudaErrors(cudaMemset(d_output, 0, width*height*sizeof(uint)));

    float modelView[16] =
    {
        1.0f, 0.0f, 0.0f, 0.0f,
        0.0f, 1.0f, 0.0f, 0.0f,
        0.0f, 0.0f, 1.0f, 0.0f,
        0.0f, 0.0f, 4.0f, 1.0f
    };

    invViewMatrix[0] = modelView[0];
    invViewMatrix[1] = modelView[4];
    invViewMatrix[2] = modelView[8];
    invViewMatrix[3] = modelView[12];
    invViewMatrix[4] = modelView[1];
    invViewMatrix[5] = modelView[5];
    invViewMatrix[6] = modelView[9];
    invViewMatrix[7] = modelView[13];
    invViewMatrix[8] = modelView[2];
    invViewMatrix[9] = modelView[6];
    invViewMatrix[10] = modelView[10];
    invViewMatrix[11] = modelView[14];

    // call CUDA kernel, writing results to PBO
    VolumeRender_copyInvViewMatrix(invViewMatrix, sizeof(float4)*3);
    filterAnimation = false;

    // Start timer 0 and process n loops on the GPU
    int nIter = 10;
    float scale = 2.0f/float(nIter-1);

    for (int i = -1; i < nIter; i++)
    {
        if (i == 0)
        {
            cudaDeviceSynchronize();
            sdkStartTimer(&timer);
        }

        filterFactor = (float(i) * scale) - 1.0f;
        filterFactor = -filterFactor;
        filter();
        VolumeRender_render(gridSize, blockSize, d_output, width, height, density, brightness, transferOffset, transferScale);
    }

    cudaDeviceSynchronize();
    sdkStopTimer(&timer);
    // Get elapsed time and throughput, then log to sample and master logs
    double dAvgTime = sdkGetTimerValue(&timer)/(nIter * 1000.0);
    printf("volumeFiltering, Throughput = %.4f MTexels/s, Time = %.5f s, Size = %u Texels, NumDevsUsed = %u, Workgroup = %u\n",
           (1.0e-6 * width * height)/dAvgTime, dAvgTime, (width * height), 1, blockSize.x * blockSize.y);


    getLastCudaError("Error: kernel execution FAILED");
    checkCudaErrors(cudaDeviceSynchronize());

    unsigned char *h_output = (unsigned char *)malloc(width*height*4);
    checkCudaErrors(cudaMemcpy(h_output, d_output, width*height*4, cudaMemcpyDeviceToHost));

    sdkSavePPM4ub("volumefilter.ppm", h_output, width, height);
    bool bTestResult = sdkComparePPM("volumefilter.ppm", sdkFindFilePath(ref_file, exec_path),
                                     MAX_EPSILON_ERROR, THRESHOLD, true);

    checkCudaErrors(cudaFree(d_output));
    free(h_output);
    cleanup();

    exit(bTestResult ? EXIT_SUCCESS : EXIT_FAILURE);
}