void GenericReconCartesianGrappaGadget::compute_snr_map(ReconObjType &recon_obj,
                                                            hoNDArray<std::complex<float> > &snr_map) {

        typedef std::complex<float> T;

        snr_map = recon_obj.recon_res_.data_;

        size_t RO = recon_obj.recon_res_.data_.get_size(0);
        size_t E1 = recon_obj.recon_res_.data_.get_size(1);
        size_t E2 = recon_obj.recon_res_.data_.get_size(2);
        size_t CHA = recon_obj.recon_res_.data_.get_size(3);
        size_t N = recon_obj.recon_res_.data_.get_size(4);
        size_t S = recon_obj.recon_res_.data_.get_size(5);
        size_t SLC = recon_obj.recon_res_.data_.get_size(6);

        size_t gN = recon_obj.gfactor_.get_size(4);
        size_t gS = recon_obj.gfactor_.get_size(5);

        GADGET_CHECK_THROW(recon_obj.gfactor_.get_size(0) == RO);
        GADGET_CHECK_THROW(recon_obj.gfactor_.get_size(1) == E1);
        GADGET_CHECK_THROW(recon_obj.gfactor_.get_size(2) == E2);
        GADGET_CHECK_THROW(recon_obj.gfactor_.get_size(3) == CHA);
        GADGET_CHECK_THROW(recon_obj.gfactor_.get_size(6) == SLC);

        size_t n, s, slc;
        for (slc = 0; slc < SLC; slc++) {
            for (s = 0; s < S; s++) {
                size_t usedS = s;
                if (usedS >= gS) usedS = gS - 1;

                for (n = 0; n < N; n++) {
                    size_t usedN = n;
                    if (usedN >= gN) usedN = gN - 1;

                    float *pG = &(recon_obj.gfactor_(0, 0, 0, 0, usedN, usedS, slc));
                    T *pIm = &(recon_obj.recon_res_.data_(0, 0, 0, 0, n, s, slc));
                    T *pSNR = &(snr_map(0, 0, 0, 0, n, s, slc));

                    for (size_t ii = 0; ii < RO * E1 * E2 * CHA; ii++) {
                        pSNR[ii] = pIm[ii] / pG[ii];
                    }
                }
            }
        }

    }
    void GenericReconCartesianNonLinearSpirit2DTGadget::perform_unwrapping(IsmrmrdReconBit& recon_bit, ReconObjType& recon_obj, size_t e)
    {
        try
        {
            size_t RO = recon_bit.data_.data_.get_size(0);
            size_t E1 = recon_bit.data_.data_.get_size(1);
            size_t E2 = recon_bit.data_.data_.get_size(2);
            size_t dstCHA = recon_bit.data_.data_.get_size(3);
            size_t N = recon_bit.data_.data_.get_size(4);
            size_t S = recon_bit.data_.data_.get_size(5);
            size_t SLC = recon_bit.data_.data_.get_size(6);

            hoNDArray< std::complex<float> >& src = recon_obj.ref_calib_;

            size_t ref_RO = src.get_size(0);
            size_t ref_E1 = src.get_size(1);
            size_t ref_E2 = src.get_size(2);
            size_t srcCHA = src.get_size(3);
            size_t ref_N = src.get_size(4);
            size_t ref_S = src.get_size(5);
            size_t ref_SLC = src.get_size(6);

            size_t convkRO = recon_obj.kernel_.get_size(0);
            size_t convkE1 = recon_obj.kernel_.get_size(1);
            size_t convkE2 = recon_obj.kernel_.get_size(2);

            recon_obj.recon_res_.data_.create(RO, E1, E2, 1, N, S, SLC);
            Gadgetron::clear(recon_obj.recon_res_.data_);
            recon_obj.full_kspace_ = recon_bit.data_.data_;
            Gadgetron::clear(recon_obj.full_kspace_);

            std::stringstream os;
            os << "encoding_" << e;
            std::string suffix = os.str();

            if (!debug_folder_full_path_.empty()) { gt_exporter_.export_array_complex(recon_bit.data_.data_, debug_folder_full_path_ + "data_src_" + suffix); }

            // ------------------------------------------------------------------
            // compute effective acceleration factor
            // ------------------------------------------------------------------
            float effective_acce_factor(1), snr_scaling_ratio(1);
            this->compute_snr_scaling_factor(recon_bit, effective_acce_factor, snr_scaling_ratio);
            if (effective_acce_factor > 1)
            {
                Gadgetron::scal(snr_scaling_ratio, recon_bit.data_.data_);
            }

            Gadgetron::GadgetronTimer timer(false);

            // ------------------------------------------------------------------
            // compute the reconstruction
            // ------------------------------------------------------------------
            if(this->acceFactorE1_[e]<=1 && this->acceFactorE2_[e]<=1)
            {
                recon_obj.full_kspace_ = recon_bit.data_.data_;
            }
            else
            {
                hoNDArray< std::complex<float> >& kspace = recon_bit.data_.data_;
                hoNDArray< std::complex<float> >& res = recon_obj.full_kspace_;
                hoNDArray< std::complex<float> >& ref = recon_obj.ref_calib_;

                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_parallel_imaging_lamda             : " << this->spirit_parallel_imaging_lamda.value());
                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_image_reg_lamda                    : " << this->spirit_image_reg_lamda.value());
                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_data_fidelity_lamda                : " << this->spirit_data_fidelity_lamda.value());
                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_nl_iter_max                        : " << this->spirit_nl_iter_max.value());
                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_nl_iter_thres                      : " << this->spirit_nl_iter_thres.value());
                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_reg_name                           : " << this->spirit_reg_name.value());
                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_reg_level                          : " << this->spirit_reg_level.value());
                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_reg_keep_approx_coeff              : " << this->spirit_reg_keep_approx_coeff.value());
                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_reg_keep_redundant_dimension_coeff : " << this->spirit_reg_keep_redundant_dimension_coeff.value());
                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_reg_proximity_across_cha           : " << this->spirit_reg_proximity_across_cha.value());
                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_reg_use_coil_sen_map               : " << this->spirit_reg_use_coil_sen_map.value());
                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_reg_RO_weighting_ratio             : " << this->spirit_reg_RO_weighting_ratio.value());
                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_reg_E1_weighting_ratio             : " << this->spirit_reg_E1_weighting_ratio.value());
                GDEBUG_CONDITION_STREAM(this->verbose.value(), "spirit_reg_N_weighting_ratio              : " << this->spirit_reg_N_weighting_ratio.value());

                size_t slc, s;

                for (slc = 0; slc < SLC; slc++)
                {
                    for (s = 0; s < S; s++)
                    {
                        std::stringstream os;
                        os << "encoding_" << e << "_s" << s << "_slc" << slc;
                        std::string suffix_2DT = os.str();

                        // ------------------------------

                        std::complex<float>* pKspace = &kspace(0, 0, 0, 0, 0, s, slc);
                        hoNDArray< std::complex<float> > kspace2DT(RO, E1, E2, dstCHA, N, 1, 1, pKspace);

                        // ------------------------------

                        long long kernelS = s;
                        if (kernelS >= (long long)ref_S) kernelS = (long long)ref_S - 1;

                        std::complex<float>* pKIm = &recon_obj.kernelIm2D_(0, 0, 0, 0, 0, kernelS, slc);
                        hoNDArray< std::complex<float> > kIm2DT(RO, E1, srcCHA, dstCHA, ref_N, 1, 1, pKIm);

                        // ------------------------------

                        std::complex<float>* pRef = &ref(0, 0, 0, 0, 0, kernelS, slc);
                        hoNDArray< std::complex<float> > ref2DT(ref.get_size(0), ref.get_size(1), ref.get_size(2), dstCHA, ref_N, 1, 1, pRef);

                        // ------------------------------

                        hoNDArray< std::complex<float> > coilMap2DT;
                        if (recon_obj.coil_map_.get_size(6) == SLC)
                        {
                            size_t coil_S = recon_obj.coil_map_.get_size(5);
                            std::complex<float>* pCoilMap = &recon_obj.coil_map_(0, 0, 0, 0, 0, ((s>=coil_S) ? coil_S-1 : s), slc);
                            coilMap2DT.create(RO, E1, E2, dstCHA, ref_N, 1, 1, pCoilMap);
                        }

                        // ------------------------------

                        std::complex<float>* pRes = &res(0, 0, 0, 0, 0, s, slc);
                        hoNDArray< std::complex<float> > res2DT(RO, E1, E2, dstCHA, N, 1, 1, pRes);

                        // ------------------------------

                        if (!debug_folder_full_path_.empty()) { gt_exporter_.export_array_complex(kspace2DT, debug_folder_full_path_ + "kspace2DT_nl_spirit_" + suffix_2DT); }
                        if (!debug_folder_full_path_.empty()) { gt_exporter_.export_array_complex(kIm2DT, debug_folder_full_path_ + "kIm2DT_nl_spirit_" + suffix_2DT); }
                        if (!debug_folder_full_path_.empty()) { gt_exporter_.export_array_complex(ref2DT, debug_folder_full_path_ + "ref2DT_nl_spirit_" + suffix_2DT); }

                        // ------------------------------

                        std::string timing_str = "SPIRIT, Non-linear unwrapping, 2DT_" + suffix_2DT;
                        if (this->perform_timing.value()) timer.start(timing_str.c_str());
                        this->perform_nonlinear_spirit_unwrapping(kspace2DT, kIm2DT, ref2DT, coilMap2DT, res2DT, e);
                        if (this->perform_timing.value()) timer.stop();

                        if (!debug_folder_full_path_.empty()) { gt_exporter_.export_array_complex(res2DT, debug_folder_full_path_ + "res_nl_spirit_2DT_" + suffix_2DT); }
                    }
                }
            }

            // ---------------------------------------------------------------------
            // compute coil combined images
            // ---------------------------------------------------------------------
            if (this->perform_timing.value()) timer.start("SPIRIT Non linear, coil combination ... ");
            this->perform_spirit_coil_combine(recon_obj);
            if (this->perform_timing.value()) timer.stop();

            if (!debug_folder_full_path_.empty()) { gt_exporter_.export_array_complex(recon_obj.recon_res_.data_, debug_folder_full_path_ + "unwrappedIm_" + suffix); }
        }
        catch (...)
        {
            GADGET_THROW("Errors happened in GenericReconCartesianNonLinearSpirit2DTGadget::perform_unwrapping(...) ... ");
        }
    }
    void GenericReconCartesianGrappaGadget::perform_unwrapping(IsmrmrdReconBit &recon_bit, ReconObjType &recon_obj,
                                                               size_t e) {

        typedef std::complex<float> T;

        typedef std::complex<float> T;

        size_t RO = recon_bit.data_.data_.get_size(0);
        size_t E1 = recon_bit.data_.data_.get_size(1);
        size_t E2 = recon_bit.data_.data_.get_size(2);
        size_t dstCHA = recon_bit.data_.data_.get_size(3);
        size_t N = recon_bit.data_.data_.get_size(4);
        size_t S = recon_bit.data_.data_.get_size(5);
        size_t SLC = recon_bit.data_.data_.get_size(6);

        hoNDArray<std::complex<float> > &src = recon_obj.ref_calib_;

        size_t ref_RO = src.get_size(0);
        size_t ref_E1 = src.get_size(1);
        size_t ref_E2 = src.get_size(2);
        size_t srcCHA = src.get_size(3);
        size_t ref_N = src.get_size(4);
        size_t ref_S = src.get_size(5);
        size_t ref_SLC = src.get_size(6);

        size_t unmixingCoeff_CHA = recon_obj.unmixing_coeff_.get_size(3);

        size_t convkRO = recon_obj.kernel_.get_size(0);
        size_t convkE1 = recon_obj.kernel_.get_size(1);
        size_t convkE2 = recon_obj.kernel_.get_size(2);

        recon_obj.recon_res_.data_.create(RO, E1, E2, 1, N, S, SLC);

        if (!debug_folder_full_path_.empty()) {
            std::stringstream os;
            os << "encoding_" << e;
            std::string suffix = os.str();
            gt_exporter_.export_array_complex(recon_bit.data_.data_, debug_folder_full_path_ + "data_src_" + suffix);
        }

        // compute aliased images
        data_recon_buf_.create(RO, E1, E2, dstCHA, N, S, SLC);

        if (E2 > 1) {
            Gadgetron::hoNDFFT<float>::instance()->ifft3c(recon_bit.data_.data_, complex_im_recon_buf_,
                                                          data_recon_buf_);
        } else {
            Gadgetron::hoNDFFT<float>::instance()->ifft2c(recon_bit.data_.data_, complex_im_recon_buf_,
                                                          data_recon_buf_);
        }

        // SNR unit scaling
        float effective_acce_factor(1), snr_scaling_ratio(1);
        this->compute_snr_scaling_factor(recon_bit, effective_acce_factor, snr_scaling_ratio);
        if (effective_acce_factor > 1) {
            // since the grappa in gadgetron is doing signal preserving scaling, to perserve noise level, we need this compensation factor
            double grappaKernelCompensationFactor = 1.0 / (acceFactorE1_[e] * acceFactorE2_[e]);
            Gadgetron::scal((float) (grappaKernelCompensationFactor * snr_scaling_ratio), complex_im_recon_buf_);

            if (this->verbose.value()) GDEBUG_STREAM(
                    "GenericReconCartesianGrappaGadget, grappaKernelCompensationFactor*snr_scaling_ratio : "
                            << grappaKernelCompensationFactor * snr_scaling_ratio);
        }

        if (!debug_folder_full_path_.empty()) {
            std::stringstream os;
            os << "encoding_" << e;
            std::string suffix = os.str();
            gt_exporter_.export_array_complex(complex_im_recon_buf_, debug_folder_full_path_ + "aliasedIm_" + suffix);
        }

        // unwrapping

        long long num = N * S * SLC;

        long long ii;

#pragma omp parallel default(none) private(ii) shared(num, N, S, RO, E1, E2, srcCHA, convkRO, convkE1, convkE2, ref_N, ref_S, recon_obj, dstCHA, unmixingCoeff_CHA, e) if(num>1)
        {
#pragma omp for
            for (ii = 0; ii < num; ii++) {
                size_t slc = ii / (N * S);
                size_t s = (ii - slc * N * S) / N;
                size_t n = ii - slc * N * S - s * N;

                // combined channels
                T *pIm = &(complex_im_recon_buf_(0, 0, 0, 0, n, s, slc));

                size_t usedN = n;
                if (n >= ref_N) usedN = ref_N - 1;

                size_t usedS = s;
                if (s >= ref_S) usedS = ref_S - 1;

                T *pUnmix = &(recon_obj.unmixing_coeff_(0, 0, 0, 0, usedN, usedS, slc));

                T *pRes = &(recon_obj.recon_res_.data_(0, 0, 0, 0, n, s, slc));
                hoNDArray<std::complex<float> > res(RO, E1, E2, 1, pRes);

                hoNDArray<std::complex<float> > unmixing(RO, E1, E2, unmixingCoeff_CHA, pUnmix);
                hoNDArray<std::complex<float> > aliasedIm(RO, E1, E2,
                                                          ((unmixingCoeff_CHA <= srcCHA) ? unmixingCoeff_CHA : srcCHA),
                                                          1, pIm);
                Gadgetron::apply_unmix_coeff_aliased_image_3D(aliasedIm, unmixing, res);
            }
        }

        if (!debug_folder_full_path_.empty()) {
            std::stringstream os;
            os << "encoding_" << e;
            std::string suffix = os.str();
            gt_exporter_.export_array_complex(recon_obj.recon_res_.data_,
                                              debug_folder_full_path_ + "unwrappedIm_" + suffix);
        }

    }
    void
    GenericReconCartesianGrappaGadget::perform_calib(IsmrmrdReconBit &recon_bit, ReconObjType &recon_obj, size_t e) {

        size_t RO = recon_bit.data_.data_.get_size(0);
        size_t E1 = recon_bit.data_.data_.get_size(1);
        size_t E2 = recon_bit.data_.data_.get_size(2);

        hoNDArray<std::complex<float> > &src = recon_obj.ref_calib_;
        hoNDArray<std::complex<float> > &dst = recon_obj.ref_calib_dst_;

        size_t ref_RO = src.get_size(0);
        size_t ref_E1 = src.get_size(1);
        size_t ref_E2 = src.get_size(2);
        size_t srcCHA = src.get_size(3);
        size_t ref_N = src.get_size(4);
        size_t ref_S = src.get_size(5);
        size_t ref_SLC = src.get_size(6);

        size_t dstCHA = dst.get_size(3);

        recon_obj.unmixing_coeff_.create(RO, E1, E2, srcCHA, ref_N, ref_S, ref_SLC);
        recon_obj.gfactor_.create(RO, E1, E2, 1, ref_N, ref_S, ref_SLC);

        Gadgetron::clear(recon_obj.unmixing_coeff_);
        Gadgetron::clear(recon_obj.gfactor_);

        if (acceFactorE1_[e] <= 1 && acceFactorE2_[e] <= 1) {
            Gadgetron::conjugate(recon_obj.coil_map_, recon_obj.unmixing_coeff_);
        } else {
            // allocate buffer for kernels
            size_t kRO = grappa_kSize_RO.value();
            size_t kNE1 = grappa_kSize_E1.value();
            size_t kNE2 = grappa_kSize_E2.value();

            size_t convKRO(1), convKE1(1), convKE2(1);

            bool fitItself = this->downstream_coil_compression.value();

            if (E2 > 1) {
                std::vector<int> kE1, oE1;
                std::vector<int> kE2, oE2;
                grappa3d_kerPattern(kE1, oE1, kE2, oE2, convKRO, convKE1, convKE2, (size_t) acceFactorE1_[e],
                                    (size_t) acceFactorE2_[e], kRO, kNE1, kNE2, fitItself);
            } else {
                std::vector<int> kE1, oE1;
                Gadgetron::grappa2d_kerPattern(kE1, oE1, convKRO, convKE1, (size_t) acceFactorE1_[e], kRO, kNE1,
                                               fitItself);
                recon_obj.kernelIm_.create(RO, E1, 1, srcCHA, dstCHA, ref_N, ref_S, ref_SLC);
            }

            recon_obj.kernel_.create(convKRO, convKE1, convKE2, srcCHA, dstCHA, ref_N, ref_S, ref_SLC);

            Gadgetron::clear(recon_obj.kernel_);
            Gadgetron::clear(recon_obj.kernelIm_);

            long long num = ref_N * ref_S * ref_SLC;

            long long ii;

            // only allow this for loop openmp if num>1 and 2D recon
#pragma omp parallel for default(none) private(ii) shared(src, dst, recon_obj, e, num, ref_N, ref_S, ref_RO, ref_E1, ref_E2, RO, E1, E2, dstCHA, srcCHA, convKRO, convKE1, convKE2, kRO, kNE1, kNE2, fitItself) if(num>1)
            for (ii = 0; ii < num; ii++) {
                size_t slc = ii / (ref_N * ref_S);
                size_t s = (ii - slc * ref_N * ref_S) / (ref_N);
                size_t n = ii - slc * ref_N * ref_S - s * ref_N;

                std::stringstream os;
                os << "n" << n << "_s" << s << "_slc" << slc << "_encoding_" << e;
                std::string suffix = os.str();

                std::complex<float> *pSrc = &(src(0, 0, 0, 0, n, s, slc));
                hoNDArray<std::complex<float> > ref_src(ref_RO, ref_E1, ref_E2, srcCHA, pSrc);

                std::complex<float> *pDst = &(dst(0, 0, 0, 0, n, s, slc));
                hoNDArray<std::complex<float> > ref_dst(ref_RO, ref_E1, ref_E2, dstCHA, pDst);

                // -----------------------------------

                if (E2 > 1) {
                    hoNDArray<std::complex<float> > ker(convKRO, convKE1, convKE2, srcCHA, dstCHA,
                                                        &(recon_obj.kernel_(0, 0, 0, 0, 0, n, s, slc)));

                    if (fitItself)
                    {
                        Gadgetron::grappa3d_calib_convolution_kernel(ref_src, ref_dst, (size_t)acceFactorE1_[e],
                            (size_t)acceFactorE2_[e], grappa_reg_lamda.value(),
                            grappa_calib_over_determine_ratio.value(), kRO, kNE1,
                            kNE2, ker);
                    }
                    else
                    {
                        Gadgetron::grappa3d_calib_convolution_kernel(ref_src, ref_src, (size_t)acceFactorE1_[e],
                            (size_t)acceFactorE2_[e], grappa_reg_lamda.value(),
                            grappa_calib_over_determine_ratio.value(), kRO, kNE1,
                            kNE2, ker);
                    }

                    //if (!debug_folder_full_path_.empty())
                    //{
                    //    gt_exporter_.export_array_complex(ker, debug_folder_full_path_ + "convKer3D_" + suffix);
                    //}

                    hoNDArray<std::complex<float> > coilMap(RO, E1, E2, dstCHA,
                                                            &(recon_obj.coil_map_(0, 0, 0, 0, n, s, slc)));
                    hoNDArray<std::complex<float> > unmixC(RO, E1, E2, srcCHA,
                                                           &(recon_obj.unmixing_coeff_(0, 0, 0, 0, n, s, slc)));
                    hoNDArray<float> gFactor(RO, E1, E2, 1, &(recon_obj.gfactor_(0, 0, 0, 0, n, s, slc)));
                    Gadgetron::grappa3d_unmixing_coeff(ker, coilMap, (size_t) acceFactorE1_[e],
                                                       (size_t) acceFactorE2_[e], unmixC, gFactor);

                    //if (!debug_folder_full_path_.empty())
                    //{
                    //    gt_exporter_.export_array_complex(unmixC, debug_folder_full_path_ + "unmixC_3D_" + suffix);
                    //}

                    //if (!debug_folder_full_path_.empty())
                    //{
                    //    gt_exporter_.export_array(gFactor, debug_folder_full_path_ + "gFactor_3D_" + suffix);
                    //}
                } else {
                    hoNDArray<std::complex<float> > acsSrc(ref_RO, ref_E1, srcCHA,
                                                           const_cast< std::complex<float> *>(ref_src.begin()));
                    hoNDArray<std::complex<float> > acsDst(ref_RO, ref_E1, dstCHA,
                                                           const_cast< std::complex<float> *>(ref_dst.begin()));

                    hoNDArray<std::complex<float> > convKer(convKRO, convKE1, srcCHA, dstCHA,
                                                            &(recon_obj.kernel_(0, 0, 0, 0, 0, n, s, slc)));
                    hoNDArray<std::complex<float> > kIm(RO, E1, srcCHA, dstCHA,
                                                        &(recon_obj.kernelIm_(0, 0, 0, 0, 0, n, s, slc)));

                    if (fitItself)
                    {
                        Gadgetron::grappa2d_calib_convolution_kernel(acsSrc, acsDst, (size_t)acceFactorE1_[e],
                            grappa_reg_lamda.value(), kRO, kNE1, convKer);
                    }
                    else
                    {
                        Gadgetron::grappa2d_calib_convolution_kernel(acsSrc, acsSrc, (size_t)acceFactorE1_[e],
                            grappa_reg_lamda.value(), kRO, kNE1, convKer);
                    }
                    Gadgetron::grappa2d_image_domain_kernel(convKer, RO, E1, kIm);

                    /*if (!debug_folder_full_path_.empty())
                    {
                        gt_exporter_.export_array_complex(convKer, debug_folder_full_path_ + "convKer_" + suffix);
                    }

                    if (!debug_folder_full_path_.empty())
                    {
                        gt_exporter_.export_array_complex(kIm, debug_folder_full_path_ + "kIm_" + suffix);
                    }*/

                    hoNDArray<std::complex<float> > coilMap(RO, E1, dstCHA,
                                                            &(recon_obj.coil_map_(0, 0, 0, 0, n, s, slc)));
                    hoNDArray<std::complex<float> > unmixC(RO, E1, srcCHA,
                                                           &(recon_obj.unmixing_coeff_(0, 0, 0, 0, n, s, slc)));
                    hoNDArray<float> gFactor;

                    Gadgetron::grappa2d_unmixing_coeff(kIm, coilMap, (size_t) acceFactorE1_[e], unmixC, gFactor);
                    memcpy(&(recon_obj.gfactor_(0, 0, 0, 0, n, s, slc)), gFactor.begin(),
                           gFactor.get_number_of_bytes());

                    /*if (!debug_folder_full_path_.empty())
                    {
                        gt_exporter_.export_array_complex(unmixC, debug_folder_full_path_ + "unmixC_" + suffix);
                    }

                    if (!debug_folder_full_path_.empty())
                    {
                        gt_exporter_.export_array(gFactor, debug_folder_full_path_ + "gFactor_" + suffix);
                    }*/
                }

                // -----------------------------------
            }
        }

    }
    void MultiChannelCartesianGrappaReconGadget::perform_unwrapping(IsmrmrdReconBit& recon_bit, ReconObjType& recon_obj, size_t e)
    {
        try
        {
            typedef std::complex<float> T;

            size_t RO = recon_bit.data_.data_.get_size(0);
            size_t E1 = recon_bit.data_.data_.get_size(1);
            size_t E2 = recon_bit.data_.data_.get_size(2);
            size_t dstCHA = recon_bit.data_.data_.get_size(3);
            size_t N = recon_bit.data_.data_.get_size(4);
            size_t S = recon_bit.data_.data_.get_size(5);
            size_t SLC = recon_bit.data_.data_.get_size(6);

            hoNDArray< std::complex<float> >& src = recon_obj.ref_calib_;
            hoNDArray< std::complex<float> >& dst = recon_obj.ref_calib_;

            size_t ref_RO = src.get_size(0);
            size_t ref_E1 = src.get_size(1);
            size_t ref_E2 = src.get_size(2);
            size_t srcCHA = src.get_size(3);
            size_t ref_N = src.get_size(4);
            size_t ref_S = src.get_size(5);
            size_t ref_SLC = src.get_size(6);

            size_t convkRO = recon_obj.kernel_.get_size(0);
            size_t convkE1 = recon_obj.kernel_.get_size(1);
            size_t convkE2 = recon_obj.kernel_.get_size(2);

            recon_obj.recon_res_.data_.create(RO, E1, E2, dstCHA, N, S, SLC);

            // compute aliased images
            data_recon_buf_.create(RO, E1, E2, dstCHA, N, S, SLC);

            if (E2>1)
            {
                Gadgetron::hoNDFFT<float>::instance()->ifft3c(recon_bit.data_.data_, complex_im_recon_buf_, data_recon_buf_);
            }
            else
            {
                Gadgetron::hoNDFFT<float>::instance()->ifft2c(recon_bit.data_.data_, complex_im_recon_buf_, data_recon_buf_);
            }

            // SNR unit scaling
            float effectiveAcceFactor = acceFactorE1_[e] * acceFactorE2_[e];
            if (effectiveAcceFactor > 1)
            {
                float fftCompensationRatio = (float)(1.0 / std::sqrt(effectiveAcceFactor));
                Gadgetron::scal(fftCompensationRatio, complex_im_recon_buf_);
            }

            // unwrapping

            long long num = N*S*SLC;

            long long ii;

#pragma omp parallel default(none) private(ii) shared(num, N, S, RO, E1, E2, srcCHA, convkRO, convkE1, convkE2, ref_N, ref_S, recon_obj, dstCHA, e) if(num>1)
            {
#pragma omp for 
                for (ii = 0; ii < num; ii++)
                {
                    size_t slc = ii / (N*S);
                    size_t s = (ii - slc*N*S) / N;
                    size_t n = ii - slc*N*S - s*N;

                    // combined channels
                    T* pIm = &(complex_im_recon_buf_(0, 0, 0, 0, n, s, slc));
                    hoNDArray< std::complex<float> > aliasedIm(RO, E1, E2, srcCHA, 1, pIm);

                    size_t usedN = n;
                    if (n >= ref_N) usedN = ref_N - 1;

                    size_t usedS = s;
                    if (s >= ref_S) usedS = ref_S - 1;

                    T* pUnmix = &(recon_obj.unmixing_coeff_(0, 0, 0, 0, usedN, usedS, slc));
                    hoNDArray< std::complex<float> > unmixing(RO, E1, E2, srcCHA, pUnmix);

                    T* pRes = &(recon_obj.recon_res_.data_(0, 0, 0, 0, n, s, slc));
                    hoNDArray< std::complex<float> > res(RO, E1, E2, dstCHA, pRes);
		
                    Gadgetron::apply_unmix_coeff_aliased_image_3D(aliasedIm, unmixing, res);
                }
            }
        }
        catch (...)
        {
            GADGET_THROW("Errors happened in MultiChannelCartesianGrappaReconGadget::perform_unwrapping(...) ... ");
        }
    }
    void MultiChannelCartesianGrappaReconGadget::perform_calib(IsmrmrdReconBit& recon_bit, ReconObjType& recon_obj, size_t e)
    {
        try
        {
            size_t RO = recon_bit.data_.data_.get_size(0);
            size_t E1 = recon_bit.data_.data_.get_size(1);
            size_t E2 = recon_bit.data_.data_.get_size(2);

            hoNDArray< std::complex<float> >& src = recon_obj.ref_calib_;
            hoNDArray< std::complex<float> >& dst = recon_obj.ref_calib_;

            size_t ref_RO = src.get_size(0);
            size_t ref_E1 = src.get_size(1);
            size_t ref_E2 = src.get_size(2);
            size_t srcCHA = src.get_size(3);
            size_t ref_N = src.get_size(4);
            size_t ref_S = src.get_size(5);
            size_t ref_SLC = src.get_size(6);

            size_t dstCHA = dst.get_size(3);

            recon_obj.unmixing_coeff_.create(RO, E1, E2, srcCHA, ref_N, ref_S, ref_SLC);
            recon_obj.gfactor_.create(RO, E1, E2, 1, ref_N, ref_S, ref_SLC);

            Gadgetron::clear(recon_obj.unmixing_coeff_);
            Gadgetron::clear(recon_obj.gfactor_);

            if (acceFactorE1_[e] <= 1 && acceFactorE2_[e] <= 1)
            {
                Gadgetron::conjugate(recon_obj.coil_map_, recon_obj.unmixing_coeff_);
            }
            else
            {
                // allocate buffer for kernels
                size_t kRO = grappa_kSize_RO.value();
                size_t kNE1 = grappa_kSize_E1.value();
                size_t kNE2 = grappa_kSize_E2.value();

                size_t convKRO(1), convKE1(1), convKE2(1);

                if (E2 > 1)
                {
                    std::vector<int> kE1, oE1;
                    std::vector<int> kE2, oE2;
                    bool fitItself = true;
                    grappa3d_kerPattern(kE1, oE1, kE2, oE2, convKRO, convKE1, convKE2, (size_t)acceFactorE1_[e], (size_t)acceFactorE2_[e], kRO, kNE1, kNE2, fitItself);
                }
                else
                {
                    std::vector<int> kE1, oE1;
                    bool fitItself = true;

                    Gadgetron::grappa2d_kerPattern(kE1, oE1, convKRO, convKE1, (size_t)acceFactorE1_[e], kRO, kNE1, fitItself);
                    recon_obj.kernelIm_.create(RO, E1, 1, srcCHA, dstCHA, ref_N, ref_S, ref_SLC);
                }

                recon_obj.kernel_.create(convKRO, convKE1, convKE2, srcCHA, dstCHA, ref_N, ref_S, ref_SLC);
                Gadgetron::clear(recon_obj.kernel_);
                Gadgetron::clear(recon_obj.kernelIm_);

                long long num = ref_N*ref_S*ref_SLC;

                long long ii;

#pragma omp parallel for default(none) private(ii) shared(src, dst, recon_obj, e, num, ref_N, ref_S, ref_RO, ref_E1, ref_E2, RO, E1, E2, dstCHA, srcCHA, convKRO, convKE1, convKE2, kRO, kNE1, kNE2) if(num>1)
                for (ii = 0; ii < num; ii++)
                {
                    size_t slc = ii / (ref_N*ref_S);
                    size_t s = (ii - slc*ref_N*ref_S) / (ref_N);
                    size_t n = ii - slc*ref_N*ref_S - s*ref_N;

                    std::stringstream os;
                    os << "n" << n << "_s" << s << "_slc" << slc << "_encoding_" << e;
                    std::string suffix = os.str();

                    std::complex<float>* pSrc = &(src(0, 0, 0, 0, n, s, slc));
                    hoNDArray< std::complex<float> > ref_src(ref_RO, ref_E1, ref_E2, srcCHA, pSrc);

                    std::complex<float>* pDst = &(dst(0, 0, 0, 0, n, s, slc));
                    hoNDArray< std::complex<float> > ref_dst(ref_RO, ref_E1, ref_E2, dstCHA, pDst);

                    // -----------------------------------

                    if (E2 > 1)
                    {
                        hoNDArray< std::complex<float> > ker(convKRO, convKE1, convKE2, srcCHA, dstCHA, &(recon_obj.kernel_(0, 0, 0, 0, 0, n, s, slc)));
                        Gadgetron::grappa3d_calib_convolution_kernel(ref_src, ref_dst, (size_t)acceFactorE1_[e], (size_t)acceFactorE2_[e], grappa_reg_lamda.value(), grappa_calib_over_determine_ratio.value(), kRO, kNE1, kNE2, ker);

                        hoNDArray< std::complex<float> > coilMap(RO, E1, E2, dstCHA, &(recon_obj.coil_map_(0, 0, 0, 0, n, s, slc)));
                        hoNDArray< std::complex<float> > unmixC(RO, E1, E2, srcCHA, &(recon_obj.unmixing_coeff_(0, 0, 0, 0, n, s, slc)));
                        hoNDArray<float> gFactor(RO, E1, E2, 1, &(recon_obj.gfactor_(0, 0, 0, 0, n, s, slc)));
                        Gadgetron::grappa3d_unmixing_coeff(ker, coilMap, (size_t)acceFactorE1_[e], (size_t)acceFactorE2_[e], unmixC, gFactor);

                    }
                    else
                    {
                        hoNDArray< std::complex<float> > acsSrc(ref_RO, ref_E1, srcCHA, const_cast< std::complex<float>*>(ref_src.begin()));
                        hoNDArray< std::complex<float> > acsDst(ref_RO, ref_E1, dstCHA, const_cast< std::complex<float>*>(ref_dst.begin()));

                        hoNDArray< std::complex<float> > convKer(convKRO, convKE1, srcCHA, dstCHA, &(recon_obj.kernel_(0, 0, 0, 0, 0, n, s, slc)));
                        hoNDArray< std::complex<float> > kIm(RO, E1, srcCHA, dstCHA, &(recon_obj.kernelIm_(0, 0, 0, 0, 0, n, s, slc)));

                        Gadgetron::grappa2d_calib_convolution_kernel(acsSrc, acsDst, (size_t)acceFactorE1_[e], grappa_reg_lamda.value(), kRO, kNE1, convKer);
                        Gadgetron::grappa2d_image_domain_kernel(convKer, RO, E1, kIm);

                        hoNDArray< std::complex<float> > coilMap(RO, E1, dstCHA, &(recon_obj.coil_map_(0, 0, 0, 0, n, s, slc)));
                        hoNDArray< std::complex<float> > unmixC(RO, E1, srcCHA, &(recon_obj.unmixing_coeff_(0, 0, 0, 0, n, s, slc)));
                        hoNDArray<float> gFactor;

                        Gadgetron::grappa2d_unmixing_coeff(kIm, coilMap, (size_t)acceFactorE1_[e], unmixC, gFactor);
                        memcpy(&(recon_obj.gfactor_(0, 0, 0, 0, n, s, slc)), gFactor.begin(), gFactor.get_number_of_bytes());

                    }

                    // -----------------------------------
                }
            }
        }
        catch (...)
        {
            GADGET_THROW("Errors happened in MultiChannelCartesianGrappaReconGadget::perform_calib(...) ... ");
        }
    }