void AdaptiveManifoldFilterN::computeEigenVector(const vector<Mat>& X, const Mat1b& mask, Mat1f& vecDst, int num_pca_iterations, const Mat1f& vecRand)
{
    int cnNum = (int)X.size();
    int height = X[0].rows;
    int width = X[0].cols;

    vecDst.create(1, cnNum);
    CV_Assert(vecRand.size() == Size(cnNum, 1) && vecDst.size() == Size(cnNum, 1));
    CV_Assert(mask.rows == height && mask.cols == width);
    
    const float *pVecRand = vecRand.ptr<float>();
    Mat1d vecDstd(1, cnNum, 0.0);
    double *pVecDst = vecDstd.ptr<double>();
    Mat1f Xw(height, width);

    for (int iter = 0; iter < num_pca_iterations; iter++)
    {
        for (int  i = 0; i < height; i++)
        {
            const uchar *maskRow = mask.ptr<uchar>(i);
            float *mulRow = Xw.ptr<float>(i);

            //first multiplication 
            for (int cn = 0; cn < cnNum; cn++)
            {
                const float *srcRow = X[cn].ptr<float>(i);
                const float cnVal = pVecRand[cn];

                if (cn == 0)
                {
                    for (int j = 0; j < width; j++)
                        mulRow[j] = cnVal*srcRow[j];
                }
                else
                {
                    for (int j = 0; j < width; j++)
                        mulRow[j] += cnVal*srcRow[j];
                }
            }

            for (int j = 0; j < width; j++)
                if (!maskRow[j]) mulRow[j] = 0.0f;

            //second multiplication
            for (int cn = 0; cn < cnNum; cn++)
            {
                float curCnSum = 0.0f;
                const float *srcRow = X[cn].ptr<float>(i);

                for (int j = 0; j < width; j++)
                    curCnSum += mulRow[j]*srcRow[j];

                //TODO: parallel reduce 
                pVecDst[cn] += curCnSum;
            }
        }
    }

    divide(vecDstd, norm(vecDstd), vecDst);
}
void computeEigenVector(const Mat1f& X, const Mat1b& mask, Mat1f& dst, int num_pca_iterations, const Mat1f& rand_vec)
{
    CV_DbgAssert( X.cols == rand_vec.cols );
    CV_DbgAssert( X.rows == mask.size().area() );
    CV_DbgAssert( rand_vec.rows == 1 );

    dst.create(rand_vec.size());
    rand_vec.copyTo(dst);

    Mat1f t(X.size());

    float* dst_row = dst[0];

    for (int i = 0; i < num_pca_iterations; ++i)
    {
        t.setTo(Scalar::all(0));

        for (int y = 0, ind = 0; y < mask.rows; ++y)
        {
            const uchar* mask_row = mask[y];

            for (int x = 0; x < mask.cols; ++x, ++ind)
            {
                if (mask_row[x])
                {
                    const float* X_row = X[ind];
                    float* t_row = t[ind];

                    float dots = 0.0;
                    for (int c = 0; c < X.cols; ++c)
                        dots += dst_row[c] * X_row[c];

                    for (int c = 0; c < X.cols; ++c)
                        t_row[c] = dots * X_row[c];
                }
            }
        }

        dst.setTo(0.0);
        for (int k = 0; k < X.rows; ++k)
        {
            const float* t_row = t[k];

            for (int c = 0; c < X.cols; ++c)
            {
                dst_row[c] += t_row[c];
            }
        }
    }

    double n = norm(dst);
    divide(dst, n, dst);
}
void AdaptiveManifoldFilterN::h_filter(const Mat1f& src, Mat& dst, float sigma)
{
    CV_DbgAssert(src.depth() == CV_32F);

    const float a = exp(-sqrt(2.0f) / sigma);

    dst.create(src.size(), CV_32FC1);

    for (int y = 0; y < src.rows; ++y)
    {
        const float* src_row = src[y];
        float* dst_row = dst.ptr<float>(y);

        dst_row[0] = src_row[0];
        for (int x = 1; x < src.cols; ++x)
        {
            dst_row[x] = src_row[x] + a * (dst_row[x - 1] - src_row[x]);
        }
        for (int x = src.cols - 2; x >= 0; --x)
        {
            dst_row[x] = dst_row[x] + a * (dst_row[x + 1] - dst_row[x]);
        }
    }

    for (int y = 1; y < src.rows; ++y)
    {
        float* dst_cur_row = dst.ptr<float>(y);
        float* dst_prev_row = dst.ptr<float>(y-1);

        rf_vert_row_pass(dst_cur_row, dst_prev_row, a, src.cols);
    }
    for (int y = src.rows - 2; y >= 0; --y)
    {
        float* dst_cur_row = dst.ptr<float>(y);
        float* dst_prev_row = dst.ptr<float>(y+1);

        rf_vert_row_pass(dst_cur_row, dst_prev_row, a, src.cols);
    }
}
void AdaptiveManifoldFilterN::computeClusters(Mat1b& cluster, Mat1b& cluster_minus, Mat1b& cluster_plus)
{
    Mat difEtaSrc;
    {
        vector<Mat> eta_difCn(jointCnNum);
        for (int i = 0; i < jointCnNum; i++)
            subtract(jointCn[i], etaFull[i], eta_difCn[i]);

        merge(eta_difCn, difEtaSrc);
        difEtaSrc = difEtaSrc.reshape(1, (int)difEtaSrc.total());
        CV_DbgAssert(difEtaSrc.cols == jointCnNum);
    }

    Mat1f initVec(1, jointCnNum);
    if (useRNG)
    {
        rnd.fill(initVec, RNG::UNIFORM, -0.5, 0.5);
    }
    else
    {
        for (int i = 0; i < (int)initVec.total(); i++)
            initVec(0, i) = (i % 2 == 0) ? 0.5f : -0.5f;
    }

    Mat1f eigenVec(1, jointCnNum);
    computeEigenVector(difEtaSrc, cluster, eigenVec, num_pca_iterations_, initVec);

    Mat1f difOreientation;
    gemm(difEtaSrc, eigenVec, 1, noArray(), 0, difOreientation, GEMM_2_T);
    difOreientation = difOreientation.reshape(1, srcSize.height);
    CV_DbgAssert(difOreientation.size() == srcSize);

    compare(difOreientation, 0, cluster_minus, CMP_LT);
    bitwise_and(cluster_minus, cluster, cluster_minus);

    compare(difOreientation, 0, cluster_plus, CMP_GE);
    bitwise_and(cluster_plus, cluster, cluster_plus);
}
void AdaptiveManifoldFilterN::computeClusters(Mat1b& cluster, Mat1b& cluster_minus, Mat1b& cluster_plus)
{
    
    Mat1f difOreientation;
    if (jointCnNum > 1)
    {
        Mat1f initVec(1, jointCnNum);
        if (useRNG)
        {
            rnd.fill(initVec, RNG::UNIFORM, -0.5, 0.5);
        }
        else
        {
            for (int i = 0; i < (int)initVec.total(); i++)
                initVec(0, i) = (i % 2 == 0) ? 0.5f : -0.5f;
        }

        vector<Mat> difEtaSrc(jointCnNum);
        for (int i = 0; i < jointCnNum; i++)
            subtract(jointCn[i], etaFull[i], difEtaSrc[i]);

        Mat1f eigenVec(1, jointCnNum);
        computeEigenVector(difEtaSrc, cluster, eigenVec, num_pca_iterations_, initVec);

        computeOrientation(difEtaSrc, eigenVec, difOreientation);
        CV_DbgAssert(difOreientation.size() == srcSize);
    }
    else
    {
        subtract(jointCn[0], etaFull[0], difOreientation);
    }

    compare(difOreientation, 0, cluster_minus, CMP_LT);
    bitwise_and(cluster_minus, cluster, cluster_minus);

    compare(difOreientation, 0, cluster_plus, CMP_GE);
    bitwise_and(cluster_plus, cluster, cluster_plus);
}