void sort(const hoNDArray<T>& x, hoNDArray<T>& r, bool isascending)
    {
        if ( &r != &x )
        {
            if ( r.get_number_of_elements()!=x.get_number_of_elements())
            {
                r = x;
            }
            else
            {
                memcpy(r.begin(), x.begin(), x.get_number_of_bytes());
            }
        }

        sort(x.get_number_of_elements(), x.begin(), r.begin(), isascending);
    }
void grappa2d_calib(const hoNDArray<T>& acsSrc, const hoNDArray<T>& acsDst, double thres, size_t kRO, const std::vector<int>& kE1, const std::vector<int>& oE1, size_t startRO, size_t endRO, size_t startE1, size_t endE1, hoNDArray<T>& ker)
{
    try
    {
        GADGET_CHECK_THROW(acsSrc.get_size(0)==acsDst.get_size(0));
        GADGET_CHECK_THROW(acsSrc.get_size(1)==acsDst.get_size(1));
        GADGET_CHECK_THROW(acsSrc.get_size(2)>=acsDst.get_size(2));

        size_t RO = acsSrc.get_size(0);
        size_t E1 = acsSrc.get_size(1);
        size_t srcCHA = acsSrc.get_size(2);
        size_t dstCHA = acsDst.get_size(2);

        const T* pSrc = acsSrc.begin();
        const T* pDst = acsDst.begin();

        long long kROhalf = kRO/2;
        if ( 2*kROhalf == kRO )
        {
            GWARN_STREAM("grappa<T>::calib(...) - 2*kROhalf == kRO " << kRO);
        }
        kRO = 2*kROhalf + 1;

        size_t kNE1 = kE1.size();
        size_t oNE1 = oE1.size();

        /// allocate kernel
        ker.create(kRO, kNE1, srcCHA, dstCHA, oNE1);

        /// loop over the calibration region and assemble the equation
        /// Ax = b

        size_t sRO = startRO + kROhalf;
        size_t eRO = endRO - kROhalf;
        size_t sE1 = std::abs(kE1[0]) + startE1;
        size_t eE1 = endE1 - kE1[kNE1-1];

        size_t lenRO = eRO - sRO + 1;

        size_t rowA = (eE1-sE1+1)*lenRO;
        size_t colA = kRO*kNE1*srcCHA;
        size_t colB = dstCHA*oNE1;

        hoMatrix<T> A;
        hoMatrix<T> B;
        hoMatrix<T> x( colA, colB );

        hoNDArray<T> A_mem(rowA, colA);
        A.createMatrix( rowA, colA, A_mem.begin() );
        T* pA = A.begin();

        hoNDArray<T> B_mem(rowA, colB);
        B.createMatrix( A.rows(), colB, B_mem.begin() );
        T* pB = B.begin();

        long long e1;
        for ( e1=(long long)sE1; e1<=(long long)eE1; e1++ )
        {
            for ( long long ro=sRO; ro<=(long long)eRO; ro++ )
            {
                long long rInd = (e1-sE1)*lenRO+ro-kROhalf;

                size_t src, dst, ke1, oe1;
                long long kro;

                /// fill matrix A
                size_t col = 0;
                size_t offset = 0;
                for ( src=0; src<srcCHA; src++ )
                {
                    for ( ke1=0; ke1<kNE1; ke1++ )
                    {
                        offset = src*RO*E1 + (e1+kE1[ke1])*RO;
                        for ( kro=-kROhalf; kro<=kROhalf; kro++ )
                        {
                            /// A(rInd, col++) = acsSrc(ro+kro, e1+kE1[ke1], src);
                            pA[rInd + col*rowA] = pSrc[ro+kro+offset];
                            col++;
                        }
                    }
                }

                /// fill matrix B
                col = 0;
                for ( oe1=0; oe1<oNE1; oe1++ )
                {
                    for ( dst=0; dst<dstCHA; dst++ )
                    {
                        B(rInd, col++) = acsDst(ro, e1+oE1[oe1], dst);
                    }
                }
            }
        }

        SolveLinearSystem_Tikhonov(A, B, x, thres);
        memcpy(ker.begin(), x.begin(), ker.get_number_of_bytes());
    }
    catch(...)
    {
        GADGET_THROW("Errors in grappa2d_calib(...) ... ");
    }

    return;
}