Пример #1
0
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)
{
#if (CUDA_VERSION < 5000)
    CV_Assert(terminals.type() == CV_32S);
#else
    CV_Assert(terminals.type() == CV_32S || terminals.type() == CV_32F);
#endif

    Size src_size = terminals.size();

    CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
    CV_Assert(leftTransp.type() == terminals.type());

    CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
    CV_Assert(rightTransp.type() == terminals.type());

    CV_Assert(top.size() == src_size);
    CV_Assert(top.type() == terminals.type());

    CV_Assert(bottom.size() == src_size);
    CV_Assert(bottom.type() == terminals.type());

    labels.create(src_size, CV_8U);

    NppiSize sznpp;
    sznpp.width = src_size.width;
    sznpp.height = src_size.height;

    int bufsz;
    nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) );

    ensureSizeIsEnough(1, bufsz, CV_8U, buf);

    cudaStream_t stream = StreamAccessor::getStream(s);

    NppStreamHandler h(stream);

    NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcutInitAlloc);

#if (CUDA_VERSION < 5000)
    nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
#else
    if (terminals.type() == CV_32S)
    {
        nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
    }
    else
    {
        nppSafeCall( nppiGraphcut_32f8u(terminals.ptr<Npp32f>(), leftTransp.ptr<Npp32f>(), rightTransp.ptr<Npp32f>(), top.ptr<Npp32f>(), bottom.ptr<Npp32f>(),
            static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
    }
#endif

    if (stream == 0)
        cudaSafeCall( cudaDeviceSynchronize() );
}
Пример #2
0
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)
{
    Size src_size = terminals.size();
    CV_Assert(terminals.type() == CV_32S);
    CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
    CV_Assert(leftTransp.type() == CV_32S);
    CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
    CV_Assert(rightTransp.type() == CV_32S);
    CV_Assert(top.size() == src_size);
    CV_Assert(top.type() == CV_32S);
    CV_Assert(bottom.size() == src_size);
    CV_Assert(bottom.type() == CV_32S);

    labels.create(src_size, CV_8U);

    NppiSize sznpp;
    sznpp.width = src_size.width;
    sznpp.height = src_size.height;

    int bufsz;
    nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) );

    if ((size_t)bufsz > buf.cols * buf.rows * buf.elemSize())
        buf.create(1, bufsz, CV_8U);

    cudaStream_t stream = StreamAccessor::getStream(s);

    NppStreamHandler h(stream);

    nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), buf.ptr<Npp8u>()) );

    if (stream == 0)
        cudaSafeCall( cudaDeviceSynchronize() );
}
Пример #3
0
int main(int argc, char *argv[])
{
    printf("%s Starting...\n\n", argv[0]);

    try
    {
        std::string sFilename;
        char *filePath = sdkFindFilePath("person.txt", argv[0]);

        if (filePath)
        {
            sFilename = filePath;
        }
        else
        {
            printf("Error %s was unable to find person.txt\n", argv[0]);
            exit(EXIT_FAILURE);
        }

        cudaDeviceInit(argc, (const char **)argv);

        printfNPPinfo(argc, argv);

        if (g_bQATest == false && (g_nDevice == -1) && argc > 1)
        {
            sFilename = argv[1];
        }

        // if we specify the filename at the command line, then we only test sFilename
        int file_errors = 0;

        std::ifstream infile(sFilename.data(), std::ifstream::in);

        if (infile.good())
        {
            std::cout << "imageSegmentationNPP opened: <" << sFilename.data() << "> successfully!" << std::endl;
            file_errors = 0;
            infile.close();
        }
        else
        {
            std::cout << "imageSegmentationNPP unable to open: <" << sFilename.data() << ">" << std::endl;
            file_errors++;
            infile.close();
        }

        if (file_errors > 0)
        {
            exit(EXIT_FAILURE);
        }

        std::string sResultFilename = sFilename;

        std::string::size_type dot = sResultFilename.rfind('.');

        if (dot != std::string::npos)
        {
            sResultFilename = sResultFilename.substr(0, dot);
        }

        sResultFilename += "_segmentation.pgm";

        if (argc >= 3 && !g_bQATest)
        {
            sResultFilename = argv[2];
        }

        // load MRF declaration
        int width, height, nLabels;
        int *hCue, *vCue, *dataCostArray;

        loadMiddleburyMRFData(sFilename, dataCostArray, hCue, vCue, width, height, nLabels);
        NPP_ASSERT(nLabels == 2);

        std::cout << "Dataset: " << sFilename << std::endl;
        std::cout << "Size: " << width << "x" << height << std::endl;

        NppiSize size;
        size.width = width;
        size.height = height;

        NppiRect roi;
        roi.x=0;
        roi.y=0;
        roi.width=width;
        roi.height=height;

        // Setup flow network
        int step, transposed_step;
        Npp32s *d_source, *d_sink, *d_terminals, *d_left_transposed, *d_right_transposed, *d_top, *d_bottom;

        // Setup terminal capacities
        d_source = nppiMalloc_32s_C1(width, height, &step);
        cudaMemcpy2D(d_source, step, dataCostArray, width * sizeof(int), width*sizeof(int), height, cudaMemcpyHostToDevice);
        d_sink = nppiMalloc_32s_C1(width, height, &step);
        cudaMemcpy2D(d_sink, step, &dataCostArray[width*height], width * sizeof(int), width*sizeof(int), height, cudaMemcpyHostToDevice);

        d_terminals = nppiMalloc_32s_C1(width, height, &step);

        nppiSub_32s_C1RSfs(d_sink, step, d_source, step, d_terminals, step, size, 0);

        // Setup edge capacities
        NppiSize edgeTranposedSize;
        edgeTranposedSize.width = height;
        edgeTranposedSize.height = width-1;

        NppiSize oneRowTranposedSize;
        oneRowTranposedSize.width = height;
        oneRowTranposedSize.height = 1;

        d_right_transposed = nppiMalloc_32s_C1(height, width, &transposed_step);
        cudaMemcpy2D(d_right_transposed, transposed_step, hCue, height * sizeof(int), height * sizeof(int), width, cudaMemcpyHostToDevice);

        d_left_transposed = nppiMalloc_32s_C1(height, width, &transposed_step);
        nppiSet_32s_C1R(0, d_left_transposed, transposed_step, oneRowTranposedSize);
        nppiCopy_32s_C1R(d_right_transposed, transposed_step, d_left_transposed + transposed_step/sizeof(int), transposed_step, edgeTranposedSize);

        NppiSize edgeSize;
        edgeSize.width = width;
        edgeSize.height = height-1;

        NppiSize oneRowSize;
        oneRowSize.width = width;
        oneRowSize.height = 1;

        d_bottom = nppiMalloc_32s_C1(width, height, &step);
        cudaMemcpy2D(d_bottom, step, vCue, width * sizeof(int), width*sizeof(int), height, cudaMemcpyHostToDevice);

        d_top = nppiMalloc_32s_C1(width, height, &step);
        nppiSet_32s_C1R(0, d_top, step, oneRowSize);
        nppiCopy_32s_C1R(d_bottom, step, d_top + step/sizeof(int), step, edgeSize);

        // Allocate temp storage for graphcut computation
        Npp8u *pBuffer;
        int bufferSize;
        nppiGraphcutGetSize(size, &bufferSize);
        cudaMalloc(&pBuffer, bufferSize);

        NppiGraphcutState *pGraphcutState;
        nppiGraphcutInitAlloc(size, &pGraphcutState, pBuffer);

        // Allocate label storage
        npp::ImageNPP_8u_C1 oDeviceDst(width, height);

        cudaEvent_t start, stop;
        cudaEventCreate(&start);
        cudaEventCreate(&stop);

        // Compute the graphcut, result is 0 / !=0
        cudaEventRecord(start,0);

        nppiGraphcut_32s8u(d_terminals, d_left_transposed, d_right_transposed,
                           d_top, d_bottom, step, transposed_step,
                           size, oDeviceDst.data(), oDeviceDst.pitch(), pGraphcutState);

        cudaEventRecord(stop,0);
        cudaEventSynchronize(stop);

        float time;
        cudaEventElapsedTime(&time, start, stop);
        std::cout << "Elapsed Time: " << time << " ms" << std::endl;

        // declare a host image object for an 8-bit grayscale image
        npp::ImageCPU_8u_C1 oHostAlpha(width, height);

        // convert graphcut result to 0/255 alpha image using new nppiCompareC_8u_C1R primitive (CUDA 5.0)
        npp::ImageNPP_8u_C1 oDeviceAlpha(width, height);
        nppiCompareC_8u_C1R(oDeviceDst.data(), oDeviceDst.pitch(), 0, oDeviceAlpha.data(), oDeviceAlpha.pitch(), size,
                            NPP_CMP_GREATER);

        // and copy the result to host
        oDeviceAlpha.copyTo(oHostAlpha.data(), oHostAlpha.pitch());

        int E_d, E_s;
        std::cout << "Graphcut Cost: " << computeEnergy(E_d, E_s, oHostAlpha.data(), oHostAlpha.pitch(), hCue, vCue, dataCostArray, width, height) << std::endl;
        std::cout << "(E_d = " << E_d << ", E_s = " << E_s << ")" << std::endl;

        std::cout << "Saving segmentation result as " << sResultFilename << std::endl;
        saveImage(sResultFilename, oHostAlpha);

        nppiGraphcutFree(pGraphcutState);
        cudaFree(pBuffer);
        cudaFree(d_top);
        cudaFree(d_bottom);
        cudaFree(d_left_transposed);
        cudaFree(d_right_transposed);
        cudaFree(d_source);
        cudaFree(d_sink);
        cudaFree(d_terminals);

        exit(EXIT_SUCCESS);
    }
    catch (npp::Exception &rException)
    {
        std::cerr << "Program error! The following exception occurred: \n";
        std::cerr << rException << std::endl;
        std::cerr << "Aborting." << std::endl;
        exit(EXIT_FAILURE);
    }
    catch (...)
    {
        std::cerr << "Program error! An unknow type of exception occurred. \n";
        std::cerr << "Aborting." << std::endl;
        exit(EXIT_FAILURE);
    }

    return 0;
}