int main(int argc, char *argv[]) { printf("%s Starting...\n\n", argv[0]); try { std::string sFilename; char *filePath = sdkFindFilePath("person.txt", argv[0]); if (filePath) { sFilename = filePath; } else { printf("Error %s was unable to find person.txt\n", argv[0]); exit(EXIT_FAILURE); } cudaDeviceInit(argc, (const char **)argv); printfNPPinfo(argc, argv); if (g_bQATest == false && (g_nDevice == -1) && argc > 1) { sFilename = argv[1]; } // if we specify the filename at the command line, then we only test sFilename int file_errors = 0; std::ifstream infile(sFilename.data(), std::ifstream::in); if (infile.good()) { std::cout << "imageSegmentationNPP opened: <" << sFilename.data() << "> successfully!" << std::endl; file_errors = 0; infile.close(); } else { std::cout << "imageSegmentationNPP unable to open: <" << sFilename.data() << ">" << std::endl; file_errors++; infile.close(); } if (file_errors > 0) { exit(EXIT_FAILURE); } std::string sResultFilename = sFilename; std::string::size_type dot = sResultFilename.rfind('.'); if (dot != std::string::npos) { sResultFilename = sResultFilename.substr(0, dot); } sResultFilename += "_segmentation.pgm"; if (argc >= 3 && !g_bQATest) { sResultFilename = argv[2]; } // load MRF declaration int width, height, nLabels; int *hCue, *vCue, *dataCostArray; loadMiddleburyMRFData(sFilename, dataCostArray, hCue, vCue, width, height, nLabels); NPP_ASSERT(nLabels == 2); std::cout << "Dataset: " << sFilename << std::endl; std::cout << "Size: " << width << "x" << height << std::endl; NppiSize size; size.width = width; size.height = height; NppiRect roi; roi.x=0; roi.y=0; roi.width=width; roi.height=height; // Setup flow network int step, transposed_step; Npp32s *d_source, *d_sink, *d_terminals, *d_left_transposed, *d_right_transposed, *d_top, *d_bottom; // Setup terminal capacities d_source = nppiMalloc_32s_C1(width, height, &step); cudaMemcpy2D(d_source, step, dataCostArray, width * sizeof(int), width*sizeof(int), height, cudaMemcpyHostToDevice); d_sink = nppiMalloc_32s_C1(width, height, &step); cudaMemcpy2D(d_sink, step, &dataCostArray[width*height], width * sizeof(int), width*sizeof(int), height, cudaMemcpyHostToDevice); d_terminals = nppiMalloc_32s_C1(width, height, &step); nppiSub_32s_C1RSfs(d_sink, step, d_source, step, d_terminals, step, size, 0); // Setup edge capacities NppiSize edgeTranposedSize; edgeTranposedSize.width = height; edgeTranposedSize.height = width-1; NppiSize oneRowTranposedSize; oneRowTranposedSize.width = height; oneRowTranposedSize.height = 1; d_right_transposed = nppiMalloc_32s_C1(height, width, &transposed_step); cudaMemcpy2D(d_right_transposed, transposed_step, hCue, height * sizeof(int), height * sizeof(int), width, cudaMemcpyHostToDevice); d_left_transposed = nppiMalloc_32s_C1(height, width, &transposed_step); nppiSet_32s_C1R(0, d_left_transposed, transposed_step, oneRowTranposedSize); nppiCopy_32s_C1R(d_right_transposed, transposed_step, d_left_transposed + transposed_step/sizeof(int), transposed_step, edgeTranposedSize); NppiSize edgeSize; edgeSize.width = width; edgeSize.height = height-1; NppiSize oneRowSize; oneRowSize.width = width; oneRowSize.height = 1; d_bottom = nppiMalloc_32s_C1(width, height, &step); cudaMemcpy2D(d_bottom, step, vCue, width * sizeof(int), width*sizeof(int), height, cudaMemcpyHostToDevice); d_top = nppiMalloc_32s_C1(width, height, &step); nppiSet_32s_C1R(0, d_top, step, oneRowSize); nppiCopy_32s_C1R(d_bottom, step, d_top + step/sizeof(int), step, edgeSize); // Allocate temp storage for graphcut computation Npp8u *pBuffer; int bufferSize; nppiGraphcutGetSize(size, &bufferSize); cudaMalloc(&pBuffer, bufferSize); NppiGraphcutState *pGraphcutState; nppiGraphcutInitAlloc(size, &pGraphcutState, pBuffer); // Allocate label storage npp::ImageNPP_8u_C1 oDeviceDst(width, height); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); // Compute the graphcut, result is 0 / !=0 cudaEventRecord(start,0); nppiGraphcut_32s8u(d_terminals, d_left_transposed, d_right_transposed, d_top, d_bottom, step, transposed_step, size, oDeviceDst.data(), oDeviceDst.pitch(), pGraphcutState); cudaEventRecord(stop,0); cudaEventSynchronize(stop); float time; cudaEventElapsedTime(&time, start, stop); std::cout << "Elapsed Time: " << time << " ms" << std::endl; // declare a host image object for an 8-bit grayscale image npp::ImageCPU_8u_C1 oHostAlpha(width, height); // convert graphcut result to 0/255 alpha image using new nppiCompareC_8u_C1R primitive (CUDA 5.0) npp::ImageNPP_8u_C1 oDeviceAlpha(width, height); nppiCompareC_8u_C1R(oDeviceDst.data(), oDeviceDst.pitch(), 0, oDeviceAlpha.data(), oDeviceAlpha.pitch(), size, NPP_CMP_GREATER); // and copy the result to host oDeviceAlpha.copyTo(oHostAlpha.data(), oHostAlpha.pitch()); int E_d, E_s; std::cout << "Graphcut Cost: " << computeEnergy(E_d, E_s, oHostAlpha.data(), oHostAlpha.pitch(), hCue, vCue, dataCostArray, width, height) << std::endl; std::cout << "(E_d = " << E_d << ", E_s = " << E_s << ")" << std::endl; std::cout << "Saving segmentation result as " << sResultFilename << std::endl; saveImage(sResultFilename, oHostAlpha); nppiGraphcutFree(pGraphcutState); cudaFree(pBuffer); cudaFree(d_top); cudaFree(d_bottom); cudaFree(d_left_transposed); cudaFree(d_right_transposed); cudaFree(d_source); cudaFree(d_sink); cudaFree(d_terminals); exit(EXIT_SUCCESS); } catch (npp::Exception &rException) { std::cerr << "Program error! The following exception occurred: \n"; std::cerr << rException << std::endl; std::cerr << "Aborting." << std::endl; exit(EXIT_FAILURE); } catch (...) { std::cerr << "Program error! An unknow type of exception occurred. \n"; std::cerr << "Aborting." << std::endl; exit(EXIT_FAILURE); } return 0; }
int main(int argc, char* argv[]) { shrQAStart(argc, argv); try { std::string sFilename; char *filePath = findFilePath("Lena.pgm", argv[0]); if (filePath) { sFilename = filePath; } else { printf("Error unable to find Lena.pgm\n"); shrQAFinishExit(argc, (const char **)argv, QA_FAILED); } // Parse the command line arguments for proper configuration parseCommandLineArguments(argc, argv); printfNPPinfo(argc, argv); if (g_bQATest == false && (g_nDevice == -1) && argc > 1) { sFilename = argv[1]; } // if we specify the filename at the command line, then we only test sFilename[0]. int file_errors = 0; std::ifstream infile(sFilename.data(), std::ifstream::in); if (infile.good()) { std::cout << "boxFilterNPP opened: <" << sFilename.data() << "> successfully!" << std::endl; file_errors = 0; infile.close(); } else { std::cout << "boxFilterNPP unable to open: <" << sFilename.data() << ">" << std::endl; file_errors++; infile.close(); } if (file_errors > 0) { shrQAFinish(argc, (const char **)argv, QA_FAILED); exit(EXIT_FAILURE); } std::string sResultFilename = sFilename; std::string::size_type dot = sResultFilename.rfind('.'); if (dot != std::string::npos) sResultFilename = sResultFilename.substr(0, dot); sResultFilename += "_boxFilter.pgm"; if (argc >= 3 && !g_bQATest) sResultFilename = argv[2]; // declare a host image object for an 8-bit grayscale image npp::ImageCPU_8u_C1 oHostSrc; // load gray-scale image from disk npp::loadImage(sFilename, oHostSrc); // declare a device image and copy construct from the host image, // i.e. upload host to device npp::ImageNPP_8u_C1 oDeviceSrc(oHostSrc); // create struct with box-filter mask size NppiSize oMaskSize = {5, 5}; // create struct with ROI size given the current mask NppiSize oSizeROI = {oDeviceSrc.width() - oMaskSize.width + 1, oDeviceSrc.height() - oMaskSize.height + 1}; // allocate device image of appropriatedly reduced size npp::ImageNPP_8u_C1 oDeviceDst(oSizeROI.width, oSizeROI.height); // set anchor point inside the mask to (0, 0) NppiPoint oAnchor = {0, 0}; // run box filter NppStatus eStatusNPP; eStatusNPP = nppiFilterBox_8u_C1R(oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, oMaskSize, oAnchor); NPP_ASSERT(NPP_NO_ERROR == eStatusNPP); // declare a host image for the result npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size()); // and copy the device result data into it oDeviceDst.copyTo(oHostDst.data(), oHostDst.pitch()); saveImage(sResultFilename, oHostDst); std::cout << "Saved image: " << sResultFilename << std::endl; shrQAFinish(argc, (const char **)argv, QA_PASSED); exit(EXIT_SUCCESS); } catch (npp::Exception & rException) { std::cerr << "Program error! The following exception occurred: \n"; std::cerr << rException << std::endl; std::cerr << "Aborting." << std::endl; shrQAFinish(argc, (const char **)argv, QA_FAILED); exit(EXIT_FAILURE); } catch (...) { std::cerr << "Program error! An unknow type of exception occurred. \n"; std::cerr << "Aborting." << std::endl; shrQAFinish(argc, (const char **)argv, QA_FAILED); exit(EXIT_FAILURE); return -1; } return 0; }
int main(int argc, char* argv[]) { shrQAStart(argc, argv); try { std::string sFilename; char *filePath = findFilePath("Lena.pgm", argv[0]); if (filePath) { sFilename = filePath; } else { printf("Error unable to find Lena.pgm\n"); shrQAFinishExit(argc, (const char **)argv, QA_FAILED); } // Parse the command line arguments for proper configuration parseCommandLineArguments(argc, argv); printfNPPinfo(argc, argv); if (g_bQATest == false && (g_nDevice == -1) && argc > 1) { sFilename = argv[1]; } // if we specify the filename at the command line, then we only test sFilename. int file_errors = 0; std::ifstream infile(sFilename.data(), std::ifstream::in); if (infile.good()) { std::cout << "histEqualizationNPP opened: <" << sFilename.data() << "> successfully!" << std::endl; file_errors = 0; infile.close(); } else { std::cout << "histEqualizationNPP unable to open: <" << sFilename.data() << ">" << std::endl; file_errors++; infile.close(); } if (file_errors > 0) { shrQAFinishExit(argc, (const char **)argv, QA_FAILED); } std::string dstFileName = sFilename; std::string::size_type dot = dstFileName.rfind('.'); if (dot != std::string::npos) dstFileName = dstFileName.substr(0, dot); dstFileName += "_histEqualization.pgm"; if (argc >= 3 && !g_bQATest) dstFileName = argv[2]; npp::ImageCPU_8u_C1 oHostSrc; npp::loadImage(sFilename, oHostSrc); npp::ImageNPP_8u_C1 oDeviceSrc(oHostSrc); // // allocate arrays for histogram and levels // const int binCount = 256; const int levelCount = binCount + 1; // levels array has one more element Npp32s * histDevice = 0; Npp32s * levelsDevice = 0; NPP_CHECK_CUDA(cudaMalloc((void **)&histDevice, binCount * sizeof(Npp32s))); NPP_CHECK_CUDA(cudaMalloc((void **)&levelsDevice, levelCount * sizeof(Npp32s))); // // compute histogram // NppiSize oSizeROI = {oDeviceSrc.width(), oDeviceSrc.height()}; // full image // create device scratch buffer for nppiHistogram int nDeviceBufferSize; nppiHistogramEvenGetBufferSize_8u_C1R(oSizeROI, levelCount ,&nDeviceBufferSize); Npp8u * pDeviceBuffer; NPP_CHECK_CUDA(cudaMalloc((void **)&pDeviceBuffer, nDeviceBufferSize)); // compute levels values on host Npp32s levelsHost[levelCount]; NPP_CHECK_NPP(nppiEvenLevelsHost_32s(levelsHost, levelCount, 0, binCount)); // compute the histogram NPP_CHECK_NPP(nppiHistogramEven_8u_C1R(oDeviceSrc.data(), oDeviceSrc.pitch(), oSizeROI, histDevice, levelCount, 0, binCount, pDeviceBuffer)); // copy histogram and levels to host memory Npp32s histHost[binCount]; NPP_CHECK_CUDA(cudaMemcpy(histHost, histDevice, binCount * sizeof(Npp32s), cudaMemcpyDeviceToHost)); Npp32s lutHost[binCount + 1]; // fill LUT { Npp32s * pHostHistogram = histHost; Npp32s totalSum = 0; for (; pHostHistogram < histHost + binCount; ++pHostHistogram) totalSum += *pHostHistogram; NPP_ASSERT(totalSum == oSizeROI.width * oSizeROI.height); if (totalSum == 0) totalSum = 1; float multiplier = 1.0f / float(totalSum) * 0xFF; Npp32s runningSum = 0; Npp32s * pLookupTable = lutHost; for (pHostHistogram = histHost; pHostHistogram < histHost + binCount; ++pHostHistogram) { *pLookupTable = (Npp32s)(runningSum * multiplier + 0.5f); pLookupTable++; runningSum += *pHostHistogram; } lutHost[binCount] = 0xFF; // last element is always 1 } // // apply LUT transformation to the image // // Create a device image for the result. npp::ImageNPP_8u_C1 oDeviceDst(oDeviceSrc.size()); NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, lutHost, // value and level arrays are in host memory levelsHost, binCount+1)); // copy the result image back into the storage that contained the // input image npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size()); oDeviceDst.copyTo(oHostDst.data(), oHostDst.pitch()); // save the result npp::saveImage(dstFileName.c_str(), oHostDst); std::cout << "Saved image file " << dstFileName << std::endl; shrQAFinishExit(argc, (const char **)argv, QA_PASSED); } catch (npp::Exception & rException) { std::cerr << "Program error! The following exception occurred: \n"; std::cerr << rException << std::endl; std::cerr << "Aborting." << std::endl; shrQAFinishExit(argc, (const char **)argv, QA_FAILED); } catch (...) { std::cerr << "Program error! An unknow type of exception occurred. \n"; std::cerr << "Aborting." << std::endl; shrQAFinishExit(argc, (const char **)argv, QA_FAILED); } return 0; }