Beispiel #1
0
void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
{
    CV_Assert( src.elemSize() == 1 || src.elemSize() == 4 || src.elemSize() == 8 );

    dst.create( src.cols, src.rows, src.type() );

    cudaStream_t stream = StreamAccessor::getStream(s);

    if (src.elemSize() == 1)
    {
        NppStreamHandler h(stream);

        NppiSize sz;
        sz.width  = src.cols;
        sz.height = src.rows;

        nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );

        if (stream == 0)
            cudaSafeCall( cudaDeviceSynchronize() );
    }
    else if (src.elemSize() == 4)
    {
        arithm::transpose<int>(src, dst, stream);
    }
    else // if (src.elemSize() == 8)
    {
        if (!deviceSupports(NATIVE_DOUBLE))
            CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");

        arithm::transpose<double>(src, dst, stream);
    }
}
Beispiel #2
0
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
{
    CV_Assert((src.depth() != CV_64F) ||
              (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));

    if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
    {
        cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, impl->stream) );
        return;
    }
    if (src.depth() == CV_8U)
    {
        int cn = src.channels();

        if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
        {
            int val = saturate_cast<uchar>(s[0]);
            cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, impl->stream) );
            return;
        }
    }

    typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, cudaStream_t stream);
    static const set_caller_t set_callers[] =
    {
        kernelSet<uchar>, kernelSet<schar>, kernelSet<ushort>, kernelSet<short>,
        kernelSet<int>, kernelSet<float>, kernelSet<double>
    };
    set_callers[src.depth()](src, s, impl->stream);
}
Beispiel #3
0
void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
{
    CV_Assert(src.elemSize() == 1 || src.elemSize() == 4 || src.elemSize() == 8);

    dst.create( src.cols, src.rows, src.type() );

    cudaStream_t stream = StreamAccessor::getStream(s);

    if (src.elemSize() == 1)
    {
        NppStreamHandler h(stream);

        NppiSize sz;
        sz.width  = src.cols;
        sz.height = src.rows;

        nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
            dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );
    }
    else if (src.elemSize() == 4)
    {
        NppStStreamHandler h(stream);

        NcvSize32u sz;
        sz.width  = src.cols;
        sz.height = src.rows;

        ncvSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), static_cast<int>(src.step),
            dst.ptr<Ncv32u>(), static_cast<int>(dst.step), sz) );
    }
    else // if (src.elemSize() == 8)
    {
        if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
            CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");

        NppStStreamHandler h(stream);

        NcvSize32u sz;
        sz.width  = src.cols;
        sz.height = src.rows;

        ncvSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), static_cast<int>(src.step),
            dst.ptr<Ncv64u>(), static_cast<int>(dst.step), sz) );
    }

    if (stream == 0)
        cudaSafeCall( cudaDeviceSynchronize() );
}
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
                        const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf)
{
    typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
    static const func_t funcs[] =
    {
        ::minMaxLoc::run<uchar>,
        ::minMaxLoc::run<schar>,
        ::minMaxLoc::run<ushort>,
        ::minMaxLoc::run<short>,
        ::minMaxLoc::run<int>,
        ::minMaxLoc::run<float>,
        ::minMaxLoc::run<double>
    };

    CV_Assert( src.channels() == 1 );
    CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );

    if (src.depth() == CV_64F)
    {
        if (!deviceSupports(NATIVE_DOUBLE))
            CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
    }

    Size valbuf_size, locbuf_size;
    ::minMaxLoc::getBufSize(src.cols, src.rows, src.elemSize(), valbuf_size.width, valbuf_size.height, locbuf_size.width, locbuf_size.height);
    ensureSizeIsEnough(valbuf_size, CV_8U, valBuf);
    ensureSizeIsEnough(locbuf_size, CV_8U, locBuf);

    const func_t func = funcs[src.depth()];

    double temp1, temp2;
    Point temp3, temp4;
    func(src, mask, minVal ? minVal : &temp1, maxVal ? maxVal : &temp2, minLoc ? &minLoc->x : &temp3.x, maxLoc ? &maxLoc->x : &temp4.x, valBuf, locBuf);
}
Beispiel #5
0
void cv::gpu::graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& s)
{
    Size src_size = terminals.size();
    CV_Assert(terminals.type() == CV_32S);
    CV_Assert(leftTransp.size() == Size(src_size.height, src_size.width));
    CV_Assert(leftTransp.type() == CV_32S);
    CV_Assert(rightTransp.size() == Size(src_size.height, src_size.width));
    CV_Assert(rightTransp.type() == CV_32S);
    CV_Assert(top.size() == src_size);
    CV_Assert(top.type() == CV_32S);
    CV_Assert(bottom.size() == src_size);
    CV_Assert(bottom.type() == CV_32S);

    labels.create(src_size, CV_8U);

    NppiSize sznpp;
    sznpp.width = src_size.width;
    sznpp.height = src_size.height;

    int bufsz;
    nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) );

    if ((size_t)bufsz > buf.cols * buf.rows * buf.elemSize())
        buf.create(1, bufsz, CV_8U);

    cudaStream_t stream = StreamAccessor::getStream(s);

    NppStreamHandler h(stream);

    nppSafeCall( nppiGraphcut_32s8u(terminals.ptr<Npp32s>(), leftTransp.ptr<Npp32s>(), rightTransp.ptr<Npp32s>(), top.ptr<Npp32s>(), bottom.ptr<Npp32s>(),
        static_cast<int>(terminals.step), static_cast<int>(leftTransp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), buf.ptr<Npp8u>()) );

    if (stream == 0)
        cudaSafeCall( cudaDeviceSynchronize() );
}
Beispiel #6
0
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
                        const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf)
{
    using namespace mathfunc::minmaxloc;

    typedef void (*Caller)(const DevMem2D, double*, double*, int[2], int[2], PtrStep, PtrStep);
    typedef void (*MaskedCaller)(const DevMem2D, const PtrStep, double*, double*, int[2], int[2], PtrStep, PtrStep);

    static const Caller callers[2][7] = 
    { { min_max_loc_multipass_caller<unsigned char>, min_max_loc_multipass_caller<char>, 
        min_max_loc_multipass_caller<unsigned short>, min_max_loc_multipass_caller<short>, 
        min_max_loc_multipass_caller<int>, min_max_loc_multipass_caller<float>, 0 },
      { min_max_loc_caller<unsigned char>, min_max_loc_caller<char>, 
        min_max_loc_caller<unsigned short>, min_max_loc_caller<short>, 
        min_max_loc_caller<int>, min_max_loc_caller<float>, min_max_loc_caller<double> } };

    static const MaskedCaller masked_callers[2][7] = 
    { { min_max_loc_mask_multipass_caller<unsigned char>, min_max_loc_mask_multipass_caller<char>, 
        min_max_loc_mask_multipass_caller<unsigned short>, min_max_loc_mask_multipass_caller<short>, 
        min_max_loc_mask_multipass_caller<int>, min_max_loc_mask_multipass_caller<float>, 0 },
      { min_max_loc_mask_caller<unsigned char>, min_max_loc_mask_caller<char>, 
        min_max_loc_mask_caller<unsigned short>, min_max_loc_mask_caller<short>, 
        min_max_loc_mask_caller<int>, min_max_loc_mask_caller<float>, min_max_loc_mask_caller<double> } };

    CV_Assert(src.channels() == 1);
    CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
    CV_Assert(src.type() != CV_64F || hasNativeDoubleSupport(getDevice()));

    double minVal_; if (!minVal) minVal = &minVal_;
    double maxVal_; if (!maxVal) maxVal = &maxVal_;
    int minLoc_[2];
    int maxLoc_[2];

    Size valbuf_size, locbuf_size;
    get_buf_size_required(src.cols, src.rows, src.elemSize(), valbuf_size.width, 
                          valbuf_size.height, locbuf_size.width, locbuf_size.height);
    valbuf.create(valbuf_size, CV_8U);
    locbuf.create(locbuf_size, CV_8U);

    if (mask.empty())
    {
        Caller caller = callers[hasAtomicsSupport(getDevice())][src.type()];
        if (!caller) CV_Error(CV_StsBadArg, "minMaxLoc: unsupported type");
        caller(src, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf);
    }
    else
    {
        MaskedCaller caller = masked_callers[hasAtomicsSupport(getDevice())][src.type()];
        if (!caller) CV_Error(CV_StsBadArg, "minMaxLoc: unsupported type");
        caller(src, mask, minVal, maxVal, minLoc_, maxLoc_, valbuf, locbuf);
    }

    if (minLoc) { minLoc->x = minLoc_[0]; minLoc->y = minLoc_[1]; }
    if (maxLoc) { maxLoc->x = maxLoc_[0]; maxLoc->y = maxLoc_[1]; }
}
void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
{
#ifndef HAVE_OPENGL
    (void) arr;
    (void) target;
    (void) autoRelease;
    throw_nogl();
#else
    const int kind = arr.kind();

    if (kind == _InputArray::OPENGL_TEXTURE)
    {
        ogl::Texture2D tex = arr.getOGlTexture2D();
        tex.copyTo(*this);
        setAutoRelease(autoRelease);
        return;
    }

    const Size asize = arr.size();
    const int atype = arr.type();
    create(asize, atype, target, autoRelease);

    switch (kind)
    {
    case _InputArray::OPENGL_BUFFER:
        {
            ogl::Buffer buf = arr.getOGlBuffer();
            impl_->copyFrom(buf.bufId(), asize.area() * CV_ELEM_SIZE(atype));
            break;
        }

    case _InputArray::GPU_MAT:
        {
            #if !defined HAVE_CUDA || defined(CUDA_DISABLER)
                throw_nocuda();
            #else
                GpuMat dmat = arr.getGpuMat();
                impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows);
            #endif

            break;
        }

    default:
        {
            Mat mat = arr.getMat();
            CV_Assert( mat.isContinuous() );
            impl_->copyFrom(asize.area() * CV_ELEM_SIZE(atype), mat.data);
        }
    }
#endif
}
Beispiel #8
0
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf)
{
    using namespace mathfunc::minmax;

    typedef void (*Caller)(const DevMem2D, double*, double*, PtrStep);
    typedef void (*MaskedCaller)(const DevMem2D, const PtrStep, double*, double*, PtrStep);

    static const Caller callers[2][7] = 
    { { min_max_multipass_caller<unsigned char>, min_max_multipass_caller<char>, 
        min_max_multipass_caller<unsigned short>, min_max_multipass_caller<short>, 
        min_max_multipass_caller<int>, min_max_multipass_caller<float>, 0 },
      { min_max_caller<unsigned char>, min_max_caller<char>, 
        min_max_caller<unsigned short>, min_max_caller<short>, 
        min_max_caller<int>, min_max_caller<float>, min_max_caller<double> } };

    static const MaskedCaller masked_callers[2][7] = 
    { { min_max_mask_multipass_caller<unsigned char>, min_max_mask_multipass_caller<char>, 
        min_max_mask_multipass_caller<unsigned short>, min_max_mask_multipass_caller<short>, 
        min_max_mask_multipass_caller<int>, min_max_mask_multipass_caller<float>, 0 },
      { min_max_mask_caller<unsigned char>, min_max_mask_caller<char>, 
        min_max_mask_caller<unsigned short>, min_max_mask_caller<short>, 
        min_max_mask_caller<int>, min_max_mask_caller<float>, 
        min_max_mask_caller<double> } };


    CV_Assert(src.channels() == 1);
    CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
    CV_Assert(src.type() != CV_64F || hasNativeDoubleSupport(getDevice()));

    double minVal_; if (!minVal) minVal = &minVal_;
    double maxVal_; if (!maxVal) maxVal = &maxVal_;
    
    Size bufSize;
    get_buf_size_required(src.cols, src.rows, src.elemSize(), bufSize.width, bufSize.height);
    buf.create(bufSize, CV_8U);

    if (mask.empty())
    {
        Caller caller = callers[hasAtomicsSupport(getDevice())][src.type()];
        if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type");
        caller(src, minVal, maxVal, buf);
    }
    else
    {
        MaskedCaller caller = masked_callers[hasAtomicsSupport(getDevice())][src.type()];
        if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type");
        caller(src, mask, minVal, maxVal, buf);
    }
}
Beispiel #9
0
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar s)
{
    CV_Assert((src.depth() != CV_64F) || 
        (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));

    if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
    {
        cudaSafeCall( cudaMemset2DAsync(src.data, src.step, 0, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
        return;
    }
    if (src.depth() == CV_8U)
    {
        int cn = src.channels();

        if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
        {
            int val = saturate_cast<uchar>(s[0]);
            cudaSafeCall( cudaMemset2DAsync(src.data, src.step, val, src.cols * src.elemSize(), src.rows, Impl::getStream(impl)) );
            return;
        }
    }

    setTo(src, s, Impl::getStream(impl));
}
Beispiel #10
0
void cv::ogl::Buffer::copyTo(OutputArray arr, cuda::Stream& stream) const
{
#ifndef HAVE_OPENGL
    (void) arr;
    (void) stream;
    throw_no_ogl();
#else
    #ifndef HAVE_CUDA
        (void) arr;
        (void) stream;
        throw_no_cuda();
    #else
        arr.create(rows_, cols_, type_);
        GpuMat dmat = arr.getGpuMat();
        impl_->copyTo(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows, cuda::StreamAccessor::getStream(stream));
    #endif
#endif
}
Beispiel #11
0
void cv::ogl::Buffer::copyFrom(InputArray arr, Target target, bool autoRelease)
{
#ifndef HAVE_OPENGL
    (void) arr;
    (void) target;
    (void) autoRelease;
    throw_no_ogl();
#else
    const int kind = arr.kind();

    const Size asize = arr.size();
    const int atype = arr.type();
    create(asize, atype, target, autoRelease);

    switch (kind)
    {
    case _InputArray::OPENGL_BUFFER:
        {
            ogl::Buffer buf = arr.getOGlBuffer();
            impl_->copyFrom(buf.bufId(), asize.area() * CV_ELEM_SIZE(atype));
            break;
        }

    case _InputArray::CUDA_GPU_MAT:
        {
            #ifndef HAVE_CUDA
                throw_no_cuda();
            #else
                GpuMat dmat = arr.getGpuMat();
                impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows);
            #endif

            break;
        }

    default:
        {
            Mat mat = arr.getMat();
            CV_Assert( mat.isContinuous() );
            impl_->copyFrom(asize.area() * CV_ELEM_SIZE(atype), mat.data);
        }
    }
#endif
}
Beispiel #12
0
void cv::ogl::Buffer::copyFrom(InputArray arr, cuda::Stream& stream, Target target, bool autoRelease)
{
#ifndef HAVE_OPENGL
    (void) arr;
    (void) stream;
    (void) target;
    (void) autoRelease;
    throw_no_ogl();
#else
    #ifndef HAVE_CUDA
        (void) arr;
        (void) stream;
        (void) target;
        (void) autoRelease;
        throw_no_cuda();
    #else
        GpuMat dmat = arr.getGpuMat();

        create(dmat.size(), dmat.type(), target, autoRelease);

        impl_->copyFrom(dmat.data, dmat.step, dmat.cols * dmat.elemSize(), dmat.rows, cuda::StreamAccessor::getStream(stream));
    #endif
#endif
}
Beispiel #13
0
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
                        const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf)
{
    using namespace ::cv::gpu::device::matrix_reductions::minmaxloc;

    typedef void (*Caller)(const PtrStepSzb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
    typedef void (*MaskedCaller)(const PtrStepSzb, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);

    static Caller multipass_callers[] =
    {
        minMaxLocMultipassCaller<unsigned char>, minMaxLocMultipassCaller<char>,
        minMaxLocMultipassCaller<unsigned short>, minMaxLocMultipassCaller<short>,
        minMaxLocMultipassCaller<int>, minMaxLocMultipassCaller<float>, 0
    };

    static Caller singlepass_callers[] =
    {
        minMaxLocCaller<unsigned char>, minMaxLocCaller<char>,
        minMaxLocCaller<unsigned short>, minMaxLocCaller<short>,
        minMaxLocCaller<int>, minMaxLocCaller<float>, minMaxLocCaller<double>
    };

    static MaskedCaller masked_multipass_callers[] =
    {
        minMaxLocMaskMultipassCaller<unsigned char>, minMaxLocMaskMultipassCaller<char>,
        minMaxLocMaskMultipassCaller<unsigned short>, minMaxLocMaskMultipassCaller<short>,
        minMaxLocMaskMultipassCaller<int>, minMaxLocMaskMultipassCaller<float>, 0
    };

    static MaskedCaller masked_singlepass_callers[] =
    {
        minMaxLocMaskCaller<unsigned char>, minMaxLocMaskCaller<char>,
        minMaxLocMaskCaller<unsigned short>, minMaxLocMaskCaller<short>,
        minMaxLocMaskCaller<int>, minMaxLocMaskCaller<float>, minMaxLocMaskCaller<double>
    };

    CV_Assert(src.depth() <= CV_64F);
    CV_Assert(src.channels() == 1);
    CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));

    if (src.depth() == CV_64F)
    {
        if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
            CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
    }

    double minVal_;
    if (!minVal) minVal = &minVal_;
    double maxVal_;
    if (!maxVal) maxVal = &maxVal_;
    int minLoc_[2];
    int maxLoc_[2];

    Size valbuf_size, locbuf_size;
    getBufSizeRequired(src.cols, src.rows, static_cast<int>(src.elemSize()), valbuf_size.width,
                       valbuf_size.height, locbuf_size.width, locbuf_size.height);
    ensureSizeIsEnough(valbuf_size, CV_8U, valBuf);
    ensureSizeIsEnough(locbuf_size, CV_8U, locBuf);

    if (mask.empty())
    {
        Caller* callers = multipass_callers;
        if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS))
            callers = singlepass_callers;

        Caller caller = callers[src.type()];
        CV_Assert(caller != 0);
        caller(src, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf);
    }
    else
    {
        MaskedCaller* callers = masked_multipass_callers;
        if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS))
            callers = masked_singlepass_callers;

        MaskedCaller caller = callers[src.type()];
        CV_Assert(caller != 0);
        caller(src, mask, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf);
    }

    if (minLoc) {
        minLoc->x = minLoc_[0];
        minLoc->y = minLoc_[1];
    }
    if (maxLoc) {
        maxLoc->x = maxLoc_[0];
        maxLoc->y = maxLoc_[1];
    }
}
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf)
{
    using namespace mathfunc::minmax;

    typedef void (*Caller)(const DevMem2D, double*, double*, PtrStep);
    typedef void (*MaskedCaller)(const DevMem2D, const PtrStep, double*, double*, PtrStep);

    static Caller multipass_callers[7] = { 
            minMaxMultipassCaller<unsigned char>, minMaxMultipassCaller<char>, 
            minMaxMultipassCaller<unsigned short>, minMaxMultipassCaller<short>, 
            minMaxMultipassCaller<int>, minMaxMultipassCaller<float>, 0 };

    static Caller singlepass_callers[7] = { 
            minMaxCaller<unsigned char>, minMaxCaller<char>, 
            minMaxCaller<unsigned short>, minMaxCaller<short>, 
            minMaxCaller<int>, minMaxCaller<float>, minMaxCaller<double> };

    static MaskedCaller masked_multipass_callers[7] = { 
            minMaxMaskMultipassCaller<unsigned char>, minMaxMaskMultipassCaller<char>, 
            minMaxMaskMultipassCaller<unsigned short>, minMaxMaskMultipassCaller<short>,
            minMaxMaskMultipassCaller<int>, minMaxMaskMultipassCaller<float>, 0 };

    static MaskedCaller masked_singlepass_callers[7] = { 
            minMaxMaskCaller<unsigned char>, minMaxMaskCaller<char>, 
            minMaxMaskCaller<unsigned short>, minMaxMaskCaller<short>, 
            minMaxMaskCaller<int>, minMaxMaskCaller<float>, 
            minMaxMaskCaller<double> };

    CV_Assert(src.channels() == 1);

    CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));

    CV_Assert(src.type() != CV_64F || (TargetArchs::builtWith(NATIVE_DOUBLE) && 
                                       DeviceInfo().supports(NATIVE_DOUBLE)));

    double minVal_; if (!minVal) minVal = &minVal_;
    double maxVal_; if (!maxVal) maxVal = &maxVal_;
    
    Size buf_size;
    getBufSizeRequired(src.cols, src.rows, static_cast<int>(src.elemSize()), buf_size.width, buf_size.height);
    ensureSizeIsEnough(buf_size, CV_8U, buf);

    if (mask.empty())
    {
        Caller* callers = multipass_callers;
        if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS))
            callers = singlepass_callers;

        Caller caller = callers[src.type()];
        if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type");
        caller(src, minVal, maxVal, buf);
    }
    else
    {
        MaskedCaller* callers = masked_multipass_callers;
        if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS))
            callers = masked_singlepass_callers;

        MaskedCaller caller = callers[src.type()];
        if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type");
        caller(src, mask, minVal, maxVal, buf);
    }
}