void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s) { #if (CUDA_VERSION < 5000) CV_Assert(terminals.type() == CV_32S); #else CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F); #endif Size src_size = terminals.size(); CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width)); CV_Assert(leftTransp.type() == terminals.type()); CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width)); CV_Assert(rightTransp.type() == terminals.type()); CV_Assert(top.size() == src_size); CV_Assert(top.type() == terminals.type()); CV_Assert(bottom.size() == src_size); CV_Assert(bottom.type() == terminals.type()); labels.create(src_size, CV_8U); NppiSize sznpp; sznpp.width = src_size.width; sznpp.height = src_size.height; int bufsz; nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) ); ensureSizeIsEnough(1, bufsz, CV_8U, buf); cudaStream_t stream = StreamAccessor::getStream(s); NppStreamHandler h(stream); NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcutInitAlloc); #if (CUDA_VERSION < 5000) nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(), static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) ); #else if (terminals.type() == CV_32S) { nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(), static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) ); } else { nppSafeCall( nppiGraphcut_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(), top.ptr<Npp32f>(), bottom.ptr<Npp32f>(), static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) ); } #endif if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); }
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s) { Size src_size = terminals.size(); CV_Assert(terminals.type() == CV_32S); CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width)); CV_Assert(leftTransp.type() == CV_32S); CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width)); CV_Assert(rightTransp.type() == CV_32S); CV_Assert(top.size() == src_size); CV_Assert(top.type() == CV_32S); CV_Assert(bottom.size() == src_size); CV_Assert(bottom.type() == CV_32S); labels.create(src_size, CV_8U); NppiSize sznpp; sznpp.width = src_size.width; sznpp.height = src_size.height; int bufsz; nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) ); if ((size_t)bufsz > buf.cols * buf.rows * buf.elemSize()) buf.create(1, bufsz, CV_8U); cudaStream_t stream = StreamAccessor::getStream(s); NppStreamHandler h(stream); nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(), static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), buf.ptr<Npp8u>()) ); if (stream == 0) cudaSafeCall( cudaDeviceSynchronize() ); }
int main(int argc, char *argv[]) { printf("%s Starting...\n\n", argv[0]); try { std::string sFilename; char *filePath = sdkFindFilePath("person.txt", argv[0]); if (filePath) { sFilename = filePath; } else { printf("Error %s was unable to find person.txt\n", argv[0]); exit(EXIT_FAILURE); } cudaDeviceInit(argc, (const char **)argv); printfNPPinfo(argc, argv); if (g_bQATest == false && (g_nDevice == -1) && argc > 1) { sFilename = argv[1]; } // if we specify the filename at the command line, then we only test sFilename int file_errors = 0; std::ifstream infile(sFilename.data(), std::ifstream::in); if (infile.good()) { std::cout << "imageSegmentationNPP opened: <" << sFilename.data() << "> successfully!" << std::endl; file_errors = 0; infile.close(); } else { std::cout << "imageSegmentationNPP unable to open: <" << sFilename.data() << ">" << std::endl; file_errors++; infile.close(); } if (file_errors > 0) { exit(EXIT_FAILURE); } std::string sResultFilename = sFilename; std::string::size_type dot = sResultFilename.rfind('.'); if (dot != std::string::npos) { sResultFilename = sResultFilename.substr(0, dot); } sResultFilename += "_segmentation.pgm"; if (argc >= 3 && !g_bQATest) { sResultFilename = argv[2]; } // load MRF declaration int width, height, nLabels; int *hCue, *vCue, *dataCostArray; loadMiddleburyMRFData(sFilename, dataCostArray, hCue, vCue, width, height, nLabels); NPP_ASSERT(nLabels == 2); std::cout << "Dataset: " << sFilename << std::endl; std::cout << "Size: " << width << "x" << height << std::endl; NppiSize size; size.width = width; size.height = height; NppiRect roi; roi.x=0; roi.y=0; roi.width=width; roi.height=height; // Setup flow network int step, transposed_step; Npp32s *d_source, *d_sink, *d_terminals, *d_left_transposed, *d_right_transposed, *d_top, *d_bottom; // Setup terminal capacities d_source = nppiMalloc_32s_C1(width, height, &step); cudaMemcpy2D(d_source, step, dataCostArray, width * sizeof(int), width*sizeof(int), height, cudaMemcpyHostToDevice); d_sink = nppiMalloc_32s_C1(width, height, &step); cudaMemcpy2D(d_sink, step, &dataCostArray[width*height], width * sizeof(int), width*sizeof(int), height, cudaMemcpyHostToDevice); d_terminals = nppiMalloc_32s_C1(width, height, &step); nppiSub_32s_C1RSfs(d_sink, step, d_source, step, d_terminals, step, size, 0); // Setup edge capacities NppiSize edgeTranposedSize; edgeTranposedSize.width = height; edgeTranposedSize.height = width-1; NppiSize oneRowTranposedSize; oneRowTranposedSize.width = height; oneRowTranposedSize.height = 1; d_right_transposed = nppiMalloc_32s_C1(height, width, &transposed_step); cudaMemcpy2D(d_right_transposed, transposed_step, hCue, height * sizeof(int), height * sizeof(int), width, cudaMemcpyHostToDevice); d_left_transposed = nppiMalloc_32s_C1(height, width, &transposed_step); nppiSet_32s_C1R(0, d_left_transposed, transposed_step, oneRowTranposedSize); nppiCopy_32s_C1R(d_right_transposed, transposed_step, d_left_transposed + transposed_step/sizeof(int), transposed_step, edgeTranposedSize); NppiSize edgeSize; edgeSize.width = width; edgeSize.height = height-1; NppiSize oneRowSize; oneRowSize.width = width; oneRowSize.height = 1; d_bottom = nppiMalloc_32s_C1(width, height, &step); cudaMemcpy2D(d_bottom, step, vCue, width * sizeof(int), width*sizeof(int), height, cudaMemcpyHostToDevice); d_top = nppiMalloc_32s_C1(width, height, &step); nppiSet_32s_C1R(0, d_top, step, oneRowSize); nppiCopy_32s_C1R(d_bottom, step, d_top + step/sizeof(int), step, edgeSize); // Allocate temp storage for graphcut computation Npp8u *pBuffer; int bufferSize; nppiGraphcutGetSize(size, &bufferSize); cudaMalloc(&pBuffer, bufferSize); NppiGraphcutState *pGraphcutState; nppiGraphcutInitAlloc(size, &pGraphcutState, pBuffer); // Allocate label storage npp::ImageNPP_8u_C1 oDeviceDst(width, height); cudaEvent_t start, stop; cudaEventCreate(&start); cudaEventCreate(&stop); // Compute the graphcut, result is 0 / !=0 cudaEventRecord(start,0); nppiGraphcut_32s8u(d_terminals, d_left_transposed, d_right_transposed, d_top, d_bottom, step, transposed_step, size, oDeviceDst.data(), oDeviceDst.pitch(), pGraphcutState); cudaEventRecord(stop,0); cudaEventSynchronize(stop); float time; cudaEventElapsedTime(&time, start, stop); std::cout << "Elapsed Time: " << time << " ms" << std::endl; // declare a host image object for an 8-bit grayscale image npp::ImageCPU_8u_C1 oHostAlpha(width, height); // convert graphcut result to 0/255 alpha image using new nppiCompareC_8u_C1R primitive (CUDA 5.0) npp::ImageNPP_8u_C1 oDeviceAlpha(width, height); nppiCompareC_8u_C1R(oDeviceDst.data(), oDeviceDst.pitch(), 0, oDeviceAlpha.data(), oDeviceAlpha.pitch(), size, NPP_CMP_GREATER); // and copy the result to host oDeviceAlpha.copyTo(oHostAlpha.data(), oHostAlpha.pitch()); int E_d, E_s; std::cout << "Graphcut Cost: " << computeEnergy(E_d, E_s, oHostAlpha.data(), oHostAlpha.pitch(), hCue, vCue, dataCostArray, width, height) << std::endl; std::cout << "(E_d = " << E_d << ", E_s = " << E_s << ")" << std::endl; std::cout << "Saving segmentation result as " << sResultFilename << std::endl; saveImage(sResultFilename, oHostAlpha); nppiGraphcutFree(pGraphcutState); cudaFree(pBuffer); cudaFree(d_top); cudaFree(d_bottom); cudaFree(d_left_transposed); cudaFree(d_right_transposed); cudaFree(d_source); cudaFree(d_sink); cudaFree(d_terminals); exit(EXIT_SUCCESS); } catch (npp::Exception &rException) { std::cerr << "Program error! The following exception occurred: \n"; std::cerr << rException << std::endl; std::cerr << "Aborting." << std::endl; exit(EXIT_FAILURE); } catch (...) { std::cerr << "Program error! An unknow type of exception occurred. \n"; std::cerr << "Aborting." << std::endl; exit(EXIT_FAILURE); } return 0; }