void CheckRender::savePPM( const char *zfilename, bool bInvert, void **ppReadBuf ) { if (zfilename != NULL) { if (bInvert) { unsigned char *readBuf; unsigned char *writeBuf= (unsigned char *)malloc(m_Width * m_Height * m_Bpp); for (unsigned int y=0; y < m_Height; y++) { if (ppReadBuf) { readBuf = *(unsigned char **)ppReadBuf; } else { readBuf = (unsigned char *)m_pImageData; } memcpy(&writeBuf[m_Width*m_Bpp*y], (readBuf+ m_Width*m_Bpp*(m_Height-1-y)), m_Width*m_Bpp); } // we copy the results back to original system buffer if (ppReadBuf) { memcpy(*ppReadBuf, writeBuf, m_Width*m_Height*m_Bpp); } else { memcpy(m_pImageData, writeBuf, m_Width*m_Height*m_Bpp); } free (writeBuf); } printf("> Saving PPM: <%s>\n", zfilename); if (ppReadBuf) { sdkSavePPM4ub(zfilename, *(unsigned char **)ppReadBuf, m_Width, m_Height); } else { sdkSavePPM4ub(zfilename, (unsigned char *)m_pImageData, m_Width, m_Height); } } }
void runAutoTest(int argc, char **argv, const char *filename, int kernel_param) { printf("[%s] - (automated testing w/ readback)\n", sSDKsample); int devID = findCudaDevice(argc, (const char **)argv); // First load the image, so we know what the size of the image (imageW and imageH) printf("Allocating host and CUDA memory and loading image file...\n"); const char *image_path = sdkFindFilePath("portrait_noise.bmp", argv[0]); if (image_path == NULL) { printf("imageDenoisingGL was unable to find and load image file <portrait_noise.bmp>.\nExiting...\n"); exit(EXIT_FAILURE); } LoadBMPFile(&h_Src, &imageW, &imageH, image_path); printf("Data init done.\n"); checkCudaErrors(CUDA_MallocArray(&h_Src, imageW, imageH)); TColor *d_dst = NULL; unsigned char *h_dst = NULL; checkCudaErrors(cudaMalloc((void **)&d_dst, imageW*imageH*sizeof(TColor))); h_dst = (unsigned char *)malloc(imageH*imageW*4); { g_Kernel = kernel_param; printf("[AutoTest]: %s <%s>\n", sSDKsample, filterMode[g_Kernel]); checkCudaErrors(CUDA_Bind2TextureArray()); runImageFilters(d_dst); checkCudaErrors(CUDA_UnbindTexture()); checkCudaErrors(cudaDeviceSynchronize()); checkCudaErrors(cudaMemcpy(h_dst, d_dst, imageW*imageH*sizeof(TColor), cudaMemcpyDeviceToHost)); sdkSavePPM4ub(filename, h_dst, imageW, imageH); } checkCudaErrors(CUDA_FreeArray()); free(h_Src); checkCudaErrors(cudaFree(d_dst)); free(h_dst); printf("\n[%s] -> Kernel %d, Saved: %s\n", sSDKsample, kernel_param, filename); // cudaDeviceReset causes the driver to clean up all state. While // not mandatory in normal operation, it is good practice. It is also // needed to ensure correct operation when the application is being // profiled. Calling cudaDeviceReset causes all profile data to be // flushed before the application exits cudaDeviceReset(); exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE); }
// This test specifies a single test (where you specify radius and/or iterations) int runSingleTest(char *ref_file, char *exec_path) { int nTotalErrors = 0; char dump_file[256]; printf("[runSingleTest]: [%s]\n", sSDKsample); initCuda(); unsigned int *dResult; unsigned int *hResult = (unsigned int *)malloc(width * height * sizeof(unsigned int)); size_t pitch; checkCudaErrors(cudaMallocPitch((void **)&dResult, &pitch, width*sizeof(unsigned int), height)); // run the sample radius { printf("%s (radius=%d) (passes=%d) ", sSDKsample, filter_radius, iterations); bilateralFilterRGBA(dResult, width, height, euclidean_delta, filter_radius, iterations, kernel_timer); // check if kernel execution generated an error getLastCudaError("Error: bilateralFilterRGBA Kernel execution FAILED"); checkCudaErrors(cudaDeviceSynchronize()); // readback the results to system memory cudaMemcpy2D(hResult, sizeof(unsigned int)*width, dResult, pitch, sizeof(unsigned int)*width, height, cudaMemcpyDeviceToHost); sprintf(dump_file, "nature_%02d.ppm", filter_radius); sdkSavePPM4ub((const char *)dump_file, (unsigned char *)hResult, width, height); if (!sdkComparePPM(dump_file, sdkFindFilePath(ref_file, exec_path), MAX_EPSILON_ERROR, 0.15f, false)) { printf("Image is Different "); nTotalErrors++; } else { printf("Image is Matching "); } printf(" <%s>\n", ref_file); } printf("\n"); free(hResult); checkCudaErrors(cudaFree(dResult)); return nTotalErrors; }
HRESULT CheckRenderD3D10::ResourceToPPM(ID3D10Device *pDevice, ID3D10Resource *pResource, const char *zFileName) { D3D10_RESOURCE_DIMENSION rType; pResource->GetType(&rType); if (rType != D3D10_RESOURCE_DIMENSION_TEXTURE2D) { printf("SurfaceToPPM: pResource is not a 2D texture! Aborting...\n"); return E_FAIL; } ID3D10Texture2D *pSourceTexture = (ID3D10Texture2D *)pResource; ID3D10Texture2D *pTargetTexture = NULL; D3D10_TEXTURE2D_DESC desc; pSourceTexture->GetDesc(&desc); desc.BindFlags = 0; desc.CPUAccessFlags = D3D10_CPU_ACCESS_READ; desc.Usage = D3D10_USAGE_STAGING; if (FAILED(pDevice->CreateTexture2D(&desc,NULL,&pTargetTexture))) { printf("SurfaceToPPM: Unable to create target Texture resoruce! Aborting... \n"); return E_FAIL; } pDevice->CopyResource(pTargetTexture,pSourceTexture); D3D10_MAPPED_TEXTURE2D mappedTex2D; pTargetTexture->Map(0,D3D10_MAP_READ,0,&mappedTex2D); // Need to convert from dx pitch to pitch=width unsigned char *pPPMData = new unsigned char[desc.Width*desc.Height*4]; for (unsigned int iHeight = 0; iHeight<desc.Height; iHeight++) { memcpy(&(pPPMData[iHeight*desc.Width*4]),(unsigned char *)(mappedTex2D.pData)+iHeight*mappedTex2D.RowPitch,desc.Width*4); } pTargetTexture->Unmap(0); // Prepends the PPM header info and bumps byte data afterwards sdkSavePPM4ub(zFileName, pPPMData, desc.Width, desc.Height); delete [] pPPMData; pTargetTexture->Release(); return S_OK; }
bool runSingleTest(const char *ref_file, const char *exec_path) { // allocate memory for result int nTotalErrors = 0; unsigned int *d_result; unsigned int size = width * height * sizeof(unsigned int); checkCudaErrors(cudaMalloc((void **) &d_result, size)); // warm-up gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); checkCudaErrors(cudaDeviceSynchronize()); sdkStartTimer(&timer); gaussianFilterRGBA(d_img, d_result, d_temp, width, height, sigma, order, nthreads); checkCudaErrors(cudaDeviceSynchronize()); getLastCudaError("Kernel execution failed"); sdkStopTimer(&timer); unsigned char *h_result = (unsigned char *)malloc(width*height*4); checkCudaErrors(cudaMemcpy(h_result, d_result, width*height*4, cudaMemcpyDeviceToHost)); char dump_file[1024]; sprintf(dump_file, "lena_%02d.ppm", (int)sigma); sdkSavePPM4ub(dump_file, h_result, width, height); if (!sdkComparePPM(dump_file, sdkFindFilePath(ref_file, exec_path), MAX_EPSILON_ERROR, THRESHOLD, false)) { nTotalErrors++; } printf("Processing time: %f (ms)\n", sdkGetTimerValue(&timer)); printf("%.2f Mpixels/sec\n", (width*height / (sdkGetTimerValue(&timer) / 1000.0f)) / 1e6); checkCudaErrors(cudaFree(d_result)); free(h_result); printf("Summary: %d errors!\n", nTotalErrors); printf(nTotalErrors == 0 ? "Test passed\n": "Test failed!\n"); return (nTotalErrors == 0); }
void runAutoTest(int argc, char **argv, const char *dump_filename, eFilterMode filter_mode) { cudaDeviceProp deviceProps; int devID = findCudaDevice(argc, (const char **)argv); checkCudaErrors(cudaGetDeviceProperties(&deviceProps, devID)); printf("[%s] (automated testing w/ readback)\n", sSDKsample); printf("CUDA device [%s] has %d Multi-Processors\n", deviceProps.name, deviceProps.multiProcessorCount); loadImageData(argc, argv); uchar4 *d_output; checkCudaErrors(cudaMalloc((void **)&d_output, imageWidth*imageHeight*4)); unsigned int *h_result = (unsigned int *)malloc(width * height * sizeof(unsigned int)); printf("AutoTest: %s Filter Mode: <%s>\n", sSDKsample, sFilterMode[g_FilterMode]); render(imageWidth, imageHeight, tx, ty, scale, cx, cy, blockSize, gridSize, filter_mode, d_output); // check if kernel execution generated an error getLastCudaError("Error: render (bicubicTexture) Kernel execution FAILED"); checkCudaErrors(cudaDeviceSynchronize()); cudaMemcpy(h_result, d_output, imageWidth*imageHeight*4, cudaMemcpyDeviceToHost); sdkSavePPM4ub(dump_filename, (unsigned char *)h_result, imageWidth, imageHeight); checkCudaErrors(cudaFree(d_output)); free(h_result); // cudaDeviceReset causes the driver to clean up all state. While // not mandatory in normal operation, it is good practice. It is also // needed to ensure correct operation when the application is being // profiled. Calling cudaDeviceReset causes all profile data to be // flushed before the application exits cudaDeviceReset(); }
////////////////////////////////////////////////////////////////////////// // AUTOMATIC TESTING void runSingleTest(const char *ref_file, const char *exec_path) { uint *d_output; checkCudaErrors(cudaMalloc((void **)&d_output, width*height*sizeof(uint))); checkCudaErrors(cudaMemset(d_output, 0, width*height*sizeof(uint))); float modelView[16] = { 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 4.0f, 1.0f }; invViewMatrix[0] = modelView[0]; invViewMatrix[1] = modelView[4]; invViewMatrix[2] = modelView[8]; invViewMatrix[3] = modelView[12]; invViewMatrix[4] = modelView[1]; invViewMatrix[5] = modelView[5]; invViewMatrix[6] = modelView[9]; invViewMatrix[7] = modelView[13]; invViewMatrix[8] = modelView[2]; invViewMatrix[9] = modelView[6]; invViewMatrix[10] = modelView[10]; invViewMatrix[11] = modelView[14]; // call CUDA kernel, writing results to PBO VolumeRender_copyInvViewMatrix(invViewMatrix, sizeof(float4)*3); filterAnimation = false; // Start timer 0 and process n loops on the GPU int nIter = 10; float scale = 2.0f/float(nIter-1); for (int i = -1; i < nIter; i++) { if (i == 0) { cudaDeviceSynchronize(); sdkStartTimer(&timer); } filterFactor = (float(i) * scale) - 1.0f; filterFactor = -filterFactor; filter(); VolumeRender_render(gridSize, blockSize, d_output, width, height, density, brightness, transferOffset, transferScale); } cudaDeviceSynchronize(); sdkStopTimer(&timer); // Get elapsed time and throughput, then log to sample and master logs double dAvgTime = sdkGetTimerValue(&timer)/(nIter * 1000.0); printf("volumeFiltering, Throughput = %.4f MTexels/s, Time = %.5f s, Size = %u Texels, NumDevsUsed = %u, Workgroup = %u\n", (1.0e-6 * width * height)/dAvgTime, dAvgTime, (width * height), 1, blockSize.x * blockSize.y); getLastCudaError("Error: kernel execution FAILED"); checkCudaErrors(cudaDeviceSynchronize()); unsigned char *h_output = (unsigned char *)malloc(width*height*4); checkCudaErrors(cudaMemcpy(h_output, d_output, width*height*4, cudaMemcpyDeviceToHost)); sdkSavePPM4ub("volumefilter.ppm", h_output, width, height); bool bTestResult = sdkComparePPM("volumefilter.ppm", sdkFindFilePath(ref_file, exec_path), MAX_EPSILON_ERROR, THRESHOLD, true); checkCudaErrors(cudaFree(d_output)); free(h_output); cleanup(); exit(bTestResult ? EXIT_SUCCESS : EXIT_FAILURE); }