void computeDifferenceImage(const FourierImageStack & rFourierImages, unsigned int nDelta, npp::ImageNPP_32f_C1 & rResult) { NPP_DEBUG_ASSERT(rResult.width() == rFourierImages.width()); NPP_DEBUG_ASSERT(rResult.height() == rFourierImages.height()); NppiSize oSizeROI = {rFourierImages.width(), rFourierImages.height()}; nppiSet_32f_C1R(0.0f, rResult.data(), rResult.pitch(), oSizeROI); npp::ImageNPP_32f_C1 oMagnitudeImage(rFourierImages.width(), rFourierImages.height()); NppiSize oFourierROI = {rFourierImages.width() * 2, rFourierImages.height() * 2}; npp::ImageNPP_32f_C1 oDifferenceImage(oFourierROI.width, oFourierROI.height); for (unsigned int iSlice = 0; iSlice + nDelta < rFourierImages.slices(); ++iSlice) { NPP_CHECK_NPP(nppiSub_32f_C1R(reinterpret_cast<const Npp32f *>(rFourierImages.data(iSlice + nDelta)), rFourierImages.pitch(), reinterpret_cast<const Npp32f *>(rFourierImages.data(iSlice)), rFourierImages.pitch(), oDifferenceImage.data(), oDifferenceImage.pitch(), oFourierROI)); NPP_CHECK_NPP(nppiMagnitudeSqr_32fc32f_C1R(reinterpret_cast<const Npp32fc *>(oDifferenceImage.data()), oDifferenceImage.pitch(), oMagnitudeImage.data(), oMagnitudeImage.pitch(), oSizeROI)); NPP_CHECK_NPP(nppiAdd_32f_C1R(oMagnitudeImage.data(), oMagnitudeImage.pitch(), rResult.data(), rResult.pitch(), rResult.data(), rResult.pitch(), oSizeROI)); } // scale the result image by a factor float scale_factor = 1.0f / (4 * rFourierImages.width() * rFourierImages.width() * (rFourierImages.slices()-nDelta)); NPP_CHECK_NPP(nppiMulC_32f_C1R(rResult.data(), rResult.pitch(), scale_factor, rResult.data(), rResult.pitch(), oSizeROI)); }
void transformStack(const FreeImageStack & rImageStack, FourierImageStack & rFourierStack) { unsigned int nMaxSlices = rImageStack.slices(); if (nMaxSlices > rFourierStack.slices()) nMaxSlices = rFourierStack.slices(); NppiSize oSizeROI = {rImageStack.width(), rImageStack.height()}; // create plan for the FFT cufftHandle oPlanCUFFT; NPP_CHECK_CUFFT(cufftPlan2d(&oPlanCUFFT, oSizeROI.width, oSizeROI.height, CUFFT_R2C)); // allocate 32-bit float intermediate image // for this image to work with cuFFT, we must have tightly packed pixels. npp::ImageNPP<Npp32f, 1, FrugalAllocator_32f_C1> oSource_32f_C1(oSizeROI.width, oSizeROI.height); NPP_DEBUG_ASSERT(oSource_32f_C1.width() * sizeof(Npp32f) == oSource_32f_C1.pitch()); // allocate 8-bit image npp::ImageNPP_8u_C1 oSource_8u_C1; for (unsigned int iSlice = 0; iSlice < nMaxSlices; ++iSlice) { // load slice rImageStack.loadImage(iSlice, oSource_8u_C1); // upconvert 8-bit image to 32-bit float image NPP_CHECK_NPP(nppiConvert_8u32f_C1R(oSource_8u_C1.data(), oSource_8u_C1.pitch(), oSource_32f_C1.data(), oSource_32f_C1.pitch(), oSizeROI)); NPP_CHECK_CUFFT(cufftExecR2C(oPlanCUFFT, oSource_32f_C1.data(), reinterpret_cast<cufftComplex *>(rFourierStack.data(iSlice)))); } }
int main(int argc, char *argv[]) { printf("%s Starting...\n\n", argv[0]); try { std::string sFilename; char *filePath = sdkFindFilePath("Lena.pgm", argv[0]); if (filePath) { sFilename = filePath; } else { printf("Error unable to find Lena.pgm\n"); exit(EXIT_FAILURE); } // set your own FreeImage error handler FreeImage_SetOutputMessage(FreeImageErrorHandler); cudaDeviceInit(argc, (const char **)argv); // Min spec is SM 1.0 devices if (printfNPPinfo(argc, argv, 1, 0) == false) { cudaDeviceReset(); exit(EXIT_SUCCESS); } if (argc > 1) { sFilename = argv[1]; } // if we specify the filename at the command line, then we only test sFilename // otherwise we will check both sFilename[0,1] int file_errors = 0; std::ifstream infile(sFilename.data(), std::ifstream::in); if (infile.good()) { std::cout << "freeImageInteropNPP opened: <" << sFilename.data() << "> successfully!" << std::endl; file_errors = 0; infile.close(); } else { std::cout << "freeImageInteropNPP unable to open: <" << sFilename.data() << ">" << std::endl; file_errors++; infile.close(); } if (file_errors > 0) { exit(EXIT_FAILURE); } std::string sResultFilename = sFilename; std::string::size_type dot = sResultFilename.rfind('.'); if (dot != std::string::npos) { sResultFilename = sResultFilename.substr(0, dot); } sResultFilename += "_boxFilterFII.pgm"; if (argc >= 3) { sResultFilename = argv[2]; } FREE_IMAGE_FORMAT eFormat = FreeImage_GetFileType(sFilename.c_str()); // no signature? try to guess the file format from the file extension if (eFormat == FIF_UNKNOWN) { eFormat = FreeImage_GetFIFFromFilename(sFilename.c_str()); } NPP_ASSERT(eFormat != FIF_UNKNOWN); // check that the plugin has reading capabilities ... FIBITMAP *pBitmap; if (FreeImage_FIFSupportsReading(eFormat)) { pBitmap = FreeImage_Load(eFormat, sFilename.c_str()); } NPP_ASSERT(pBitmap != 0); // Dump the bitmap information to the console std::cout << (*pBitmap) << std::endl; // make sure this is an 8-bit single channel image NPP_ASSERT(FreeImage_GetColorType(pBitmap) == FIC_MINISBLACK); NPP_ASSERT(FreeImage_GetBPP(pBitmap) == 8); unsigned int nImageWidth = FreeImage_GetWidth(pBitmap); unsigned int nImageHeight = FreeImage_GetHeight(pBitmap); unsigned int nSrcPitch = FreeImage_GetPitch(pBitmap); unsigned char *pSrcData = FreeImage_GetBits(pBitmap); int nSrcPitchCUDA; Npp8u *pSrcImageCUDA = nppiMalloc_8u_C1(nImageWidth, nImageHeight, &nSrcPitchCUDA); NPP_ASSERT_NOT_NULL(pSrcImageCUDA); // copy image loaded via FreeImage to into CUDA device memory, i.e. // transfer the image-data up to the GPU's video-memory NPP_CHECK_CUDA(cudaMemcpy2D(pSrcImageCUDA, nSrcPitchCUDA, pSrcData, nSrcPitch, nImageWidth, nImageHeight, cudaMemcpyHostToDevice)); // define size of the box filter const NppiSize oMaskSize = {7, 7}; const NppiPoint oMaskAchnor = {0, 0}; // compute maximal result image size const NppiSize oSizeROI = {nImageWidth - (oMaskSize.width - 1), nImageHeight - (oMaskSize.height - 1) }; // allocate result image memory int nDstPitchCUDA; Npp8u *pDstImageCUDA = nppiMalloc_8u_C1(oSizeROI.width, oSizeROI.height, &nDstPitchCUDA); NPP_ASSERT_NOT_NULL(pDstImageCUDA); NPP_CHECK_NPP(nppiFilterBox_8u_C1R(pSrcImageCUDA, nSrcPitchCUDA, pDstImageCUDA, nDstPitchCUDA, oSizeROI, oMaskSize, oMaskAchnor)); // create the result image storage using FreeImage so we can easily // save FIBITMAP *pResultBitmap = FreeImage_Allocate(oSizeROI.width, oSizeROI.height, 8 /* bits per pixel */); NPP_ASSERT_NOT_NULL(pResultBitmap); unsigned int nResultPitch = FreeImage_GetPitch(pResultBitmap); unsigned char *pResultData = FreeImage_GetBits(pResultBitmap); NPP_CHECK_CUDA(cudaMemcpy2D(pResultData, nResultPitch, pDstImageCUDA, nDstPitchCUDA, oSizeROI.width, oSizeROI.height, cudaMemcpyDeviceToHost)); // now save the result image bool bSuccess; bSuccess = FreeImage_Save(FIF_PGM, pResultBitmap, sResultFilename.c_str(), 0) == TRUE; NPP_ASSERT_MSG(bSuccess, "Failed to save result image."); //free nppiImage nppiFree(pSrcImageCUDA); nppiFree(pDstImageCUDA); cudaDeviceReset(); exit(EXIT_SUCCESS); } catch (npp::Exception &rException) { std::cerr << "Program error! The following exception occurred: \n"; std::cerr << rException << std::endl; std::cerr << "Aborting." << std::endl; exit(EXIT_FAILURE); } catch (...) { std::cerr << "Program error! An unknow type of exception occurred. \n"; std::cerr << "Aborting." << std::endl; exit(EXIT_FAILURE); } exit(EXIT_SUCCESS); }
int main(int argc, char* argv[]) { shrQAStart(argc, argv); try { std::string sFilename; char *filePath = findFilePath("Lena.pgm", argv[0]); if (filePath) { sFilename = filePath; } else { printf("Error unable to find Lena.pgm\n"); shrQAFinishExit(argc, (const char **)argv, QA_FAILED); } // Parse the command line arguments for proper configuration parseCommandLineArguments(argc, argv); printfNPPinfo(argc, argv); if (g_bQATest == false && (g_nDevice == -1) && argc > 1) { sFilename = argv[1]; } // if we specify the filename at the command line, then we only test sFilename. int file_errors = 0; std::ifstream infile(sFilename.data(), std::ifstream::in); if (infile.good()) { std::cout << "histEqualizationNPP opened: <" << sFilename.data() << "> successfully!" << std::endl; file_errors = 0; infile.close(); } else { std::cout << "histEqualizationNPP unable to open: <" << sFilename.data() << ">" << std::endl; file_errors++; infile.close(); } if (file_errors > 0) { shrQAFinishExit(argc, (const char **)argv, QA_FAILED); } std::string dstFileName = sFilename; std::string::size_type dot = dstFileName.rfind('.'); if (dot != std::string::npos) dstFileName = dstFileName.substr(0, dot); dstFileName += "_histEqualization.pgm"; if (argc >= 3 && !g_bQATest) dstFileName = argv[2]; npp::ImageCPU_8u_C1 oHostSrc; npp::loadImage(sFilename, oHostSrc); npp::ImageNPP_8u_C1 oDeviceSrc(oHostSrc); // // allocate arrays for histogram and levels // const int binCount = 256; const int levelCount = binCount + 1; // levels array has one more element Npp32s * histDevice = 0; Npp32s * levelsDevice = 0; NPP_CHECK_CUDA(cudaMalloc((void **)&histDevice, binCount * sizeof(Npp32s))); NPP_CHECK_CUDA(cudaMalloc((void **)&levelsDevice, levelCount * sizeof(Npp32s))); // // compute histogram // NppiSize oSizeROI = {oDeviceSrc.width(), oDeviceSrc.height()}; // full image // create device scratch buffer for nppiHistogram int nDeviceBufferSize; nppiHistogramEvenGetBufferSize_8u_C1R(oSizeROI, levelCount ,&nDeviceBufferSize); Npp8u * pDeviceBuffer; NPP_CHECK_CUDA(cudaMalloc((void **)&pDeviceBuffer, nDeviceBufferSize)); // compute levels values on host Npp32s levelsHost[levelCount]; NPP_CHECK_NPP(nppiEvenLevelsHost_32s(levelsHost, levelCount, 0, binCount)); // compute the histogram NPP_CHECK_NPP(nppiHistogramEven_8u_C1R(oDeviceSrc.data(), oDeviceSrc.pitch(), oSizeROI, histDevice, levelCount, 0, binCount, pDeviceBuffer)); // copy histogram and levels to host memory Npp32s histHost[binCount]; NPP_CHECK_CUDA(cudaMemcpy(histHost, histDevice, binCount * sizeof(Npp32s), cudaMemcpyDeviceToHost)); Npp32s lutHost[binCount + 1]; // fill LUT { Npp32s * pHostHistogram = histHost; Npp32s totalSum = 0; for (; pHostHistogram < histHost + binCount; ++pHostHistogram) totalSum += *pHostHistogram; NPP_ASSERT(totalSum == oSizeROI.width * oSizeROI.height); if (totalSum == 0) totalSum = 1; float multiplier = 1.0f / float(totalSum) * 0xFF; Npp32s runningSum = 0; Npp32s * pLookupTable = lutHost; for (pHostHistogram = histHost; pHostHistogram < histHost + binCount; ++pHostHistogram) { *pLookupTable = (Npp32s)(runningSum * multiplier + 0.5f); pLookupTable++; runningSum += *pHostHistogram; } lutHost[binCount] = 0xFF; // last element is always 1 } // // apply LUT transformation to the image // // Create a device image for the result. npp::ImageNPP_8u_C1 oDeviceDst(oDeviceSrc.size()); NPP_CHECK_NPP(nppiLUT_Linear_8u_C1R(oDeviceSrc.data(), oDeviceSrc.pitch(), oDeviceDst.data(), oDeviceDst.pitch(), oSizeROI, lutHost, // value and level arrays are in host memory levelsHost, binCount+1)); // copy the result image back into the storage that contained the // input image npp::ImageCPU_8u_C1 oHostDst(oDeviceDst.size()); oDeviceDst.copyTo(oHostDst.data(), oHostDst.pitch()); // save the result npp::saveImage(dstFileName.c_str(), oHostDst); std::cout << "Saved image file " << dstFileName << std::endl; shrQAFinishExit(argc, (const char **)argv, QA_PASSED); } catch (npp::Exception & rException) { std::cerr << "Program error! The following exception occurred: \n"; std::cerr << rException << std::endl; std::cerr << "Aborting." << std::endl; shrQAFinishExit(argc, (const char **)argv, QA_FAILED); } catch (...) { std::cerr << "Program error! An unknow type of exception occurred. \n"; std::cerr << "Aborting." << std::endl; shrQAFinishExit(argc, (const char **)argv, QA_FAILED); } return 0; }