void
    GenericReconCartesianGrappaGadget::perform_calib(IsmrmrdReconBit &recon_bit, ReconObjType &recon_obj, size_t e) {

        size_t RO = recon_bit.data_.data_.get_size(0);
        size_t E1 = recon_bit.data_.data_.get_size(1);
        size_t E2 = recon_bit.data_.data_.get_size(2);

        hoNDArray<std::complex<float> > &src = recon_obj.ref_calib_;
        hoNDArray<std::complex<float> > &dst = recon_obj.ref_calib_dst_;

        size_t ref_RO = src.get_size(0);
        size_t ref_E1 = src.get_size(1);
        size_t ref_E2 = src.get_size(2);
        size_t srcCHA = src.get_size(3);
        size_t ref_N = src.get_size(4);
        size_t ref_S = src.get_size(5);
        size_t ref_SLC = src.get_size(6);

        size_t dstCHA = dst.get_size(3);

        recon_obj.unmixing_coeff_.create(RO, E1, E2, srcCHA, ref_N, ref_S, ref_SLC);
        recon_obj.gfactor_.create(RO, E1, E2, 1, ref_N, ref_S, ref_SLC);

        Gadgetron::clear(recon_obj.unmixing_coeff_);
        Gadgetron::clear(recon_obj.gfactor_);

        if (acceFactorE1_[e] <= 1 && acceFactorE2_[e] <= 1) {
            Gadgetron::conjugate(recon_obj.coil_map_, recon_obj.unmixing_coeff_);
        } else {
            // allocate buffer for kernels
            size_t kRO = grappa_kSize_RO.value();
            size_t kNE1 = grappa_kSize_E1.value();
            size_t kNE2 = grappa_kSize_E2.value();

            size_t convKRO(1), convKE1(1), convKE2(1);

            bool fitItself = this->downstream_coil_compression.value();

            if (E2 > 1) {
                std::vector<int> kE1, oE1;
                std::vector<int> kE2, oE2;
                grappa3d_kerPattern(kE1, oE1, kE2, oE2, convKRO, convKE1, convKE2, (size_t) acceFactorE1_[e],
                                    (size_t) acceFactorE2_[e], kRO, kNE1, kNE2, fitItself);
            } else {
                std::vector<int> kE1, oE1;
                Gadgetron::grappa2d_kerPattern(kE1, oE1, convKRO, convKE1, (size_t) acceFactorE1_[e], kRO, kNE1,
                                               fitItself);
                recon_obj.kernelIm_.create(RO, E1, 1, srcCHA, dstCHA, ref_N, ref_S, ref_SLC);
            }

            recon_obj.kernel_.create(convKRO, convKE1, convKE2, srcCHA, dstCHA, ref_N, ref_S, ref_SLC);

            Gadgetron::clear(recon_obj.kernel_);
            Gadgetron::clear(recon_obj.kernelIm_);

            long long num = ref_N * ref_S * ref_SLC;

            long long ii;

            // only allow this for loop openmp if num>1 and 2D recon
#pragma omp parallel for default(none) private(ii) shared(src, dst, recon_obj, e, num, ref_N, ref_S, ref_RO, ref_E1, ref_E2, RO, E1, E2, dstCHA, srcCHA, convKRO, convKE1, convKE2, kRO, kNE1, kNE2, fitItself) if(num>1)
            for (ii = 0; ii < num; ii++) {
                size_t slc = ii / (ref_N * ref_S);
                size_t s = (ii - slc * ref_N * ref_S) / (ref_N);
                size_t n = ii - slc * ref_N * ref_S - s * ref_N;

                std::stringstream os;
                os << "n" << n << "_s" << s << "_slc" << slc << "_encoding_" << e;
                std::string suffix = os.str();

                std::complex<float> *pSrc = &(src(0, 0, 0, 0, n, s, slc));
                hoNDArray<std::complex<float> > ref_src(ref_RO, ref_E1, ref_E2, srcCHA, pSrc);

                std::complex<float> *pDst = &(dst(0, 0, 0, 0, n, s, slc));
                hoNDArray<std::complex<float> > ref_dst(ref_RO, ref_E1, ref_E2, dstCHA, pDst);

                // -----------------------------------

                if (E2 > 1) {
                    hoNDArray<std::complex<float> > ker(convKRO, convKE1, convKE2, srcCHA, dstCHA,
                                                        &(recon_obj.kernel_(0, 0, 0, 0, 0, n, s, slc)));

                    if (fitItself)
                    {
                        Gadgetron::grappa3d_calib_convolution_kernel(ref_src, ref_dst, (size_t)acceFactorE1_[e],
                            (size_t)acceFactorE2_[e], grappa_reg_lamda.value(),
                            grappa_calib_over_determine_ratio.value(), kRO, kNE1,
                            kNE2, ker);
                    }
                    else
                    {
                        Gadgetron::grappa3d_calib_convolution_kernel(ref_src, ref_src, (size_t)acceFactorE1_[e],
                            (size_t)acceFactorE2_[e], grappa_reg_lamda.value(),
                            grappa_calib_over_determine_ratio.value(), kRO, kNE1,
                            kNE2, ker);
                    }

                    //if (!debug_folder_full_path_.empty())
                    //{
                    //    gt_exporter_.export_array_complex(ker, debug_folder_full_path_ + "convKer3D_" + suffix);
                    //}

                    hoNDArray<std::complex<float> > coilMap(RO, E1, E2, dstCHA,
                                                            &(recon_obj.coil_map_(0, 0, 0, 0, n, s, slc)));
                    hoNDArray<std::complex<float> > unmixC(RO, E1, E2, srcCHA,
                                                           &(recon_obj.unmixing_coeff_(0, 0, 0, 0, n, s, slc)));
                    hoNDArray<float> gFactor(RO, E1, E2, 1, &(recon_obj.gfactor_(0, 0, 0, 0, n, s, slc)));
                    Gadgetron::grappa3d_unmixing_coeff(ker, coilMap, (size_t) acceFactorE1_[e],
                                                       (size_t) acceFactorE2_[e], unmixC, gFactor);

                    //if (!debug_folder_full_path_.empty())
                    //{
                    //    gt_exporter_.export_array_complex(unmixC, debug_folder_full_path_ + "unmixC_3D_" + suffix);
                    //}

                    //if (!debug_folder_full_path_.empty())
                    //{
                    //    gt_exporter_.export_array(gFactor, debug_folder_full_path_ + "gFactor_3D_" + suffix);
                    //}
                } else {
                    hoNDArray<std::complex<float> > acsSrc(ref_RO, ref_E1, srcCHA,
                                                           const_cast< std::complex<float> *>(ref_src.begin()));
                    hoNDArray<std::complex<float> > acsDst(ref_RO, ref_E1, dstCHA,
                                                           const_cast< std::complex<float> *>(ref_dst.begin()));

                    hoNDArray<std::complex<float> > convKer(convKRO, convKE1, srcCHA, dstCHA,
                                                            &(recon_obj.kernel_(0, 0, 0, 0, 0, n, s, slc)));
                    hoNDArray<std::complex<float> > kIm(RO, E1, srcCHA, dstCHA,
                                                        &(recon_obj.kernelIm_(0, 0, 0, 0, 0, n, s, slc)));

                    if (fitItself)
                    {
                        Gadgetron::grappa2d_calib_convolution_kernel(acsSrc, acsDst, (size_t)acceFactorE1_[e],
                            grappa_reg_lamda.value(), kRO, kNE1, convKer);
                    }
                    else
                    {
                        Gadgetron::grappa2d_calib_convolution_kernel(acsSrc, acsSrc, (size_t)acceFactorE1_[e],
                            grappa_reg_lamda.value(), kRO, kNE1, convKer);
                    }
                    Gadgetron::grappa2d_image_domain_kernel(convKer, RO, E1, kIm);

                    /*if (!debug_folder_full_path_.empty())
                    {
                        gt_exporter_.export_array_complex(convKer, debug_folder_full_path_ + "convKer_" + suffix);
                    }

                    if (!debug_folder_full_path_.empty())
                    {
                        gt_exporter_.export_array_complex(kIm, debug_folder_full_path_ + "kIm_" + suffix);
                    }*/

                    hoNDArray<std::complex<float> > coilMap(RO, E1, dstCHA,
                                                            &(recon_obj.coil_map_(0, 0, 0, 0, n, s, slc)));
                    hoNDArray<std::complex<float> > unmixC(RO, E1, srcCHA,
                                                           &(recon_obj.unmixing_coeff_(0, 0, 0, 0, n, s, slc)));
                    hoNDArray<float> gFactor;

                    Gadgetron::grappa2d_unmixing_coeff(kIm, coilMap, (size_t) acceFactorE1_[e], unmixC, gFactor);
                    memcpy(&(recon_obj.gfactor_(0, 0, 0, 0, n, s, slc)), gFactor.begin(),
                           gFactor.get_number_of_bytes());

                    /*if (!debug_folder_full_path_.empty())
                    {
                        gt_exporter_.export_array_complex(unmixC, debug_folder_full_path_ + "unmixC_" + suffix);
                    }

                    if (!debug_folder_full_path_.empty())
                    {
                        gt_exporter_.export_array(gFactor, debug_folder_full_path_ + "gFactor_" + suffix);
                    }*/
                }

                // -----------------------------------
            }
        }

    }
    void GenericReconCartesianNonLinearSpirit2DTGadget::perform_nonlinear_spirit_unwrapping(hoNDArray< std::complex<float> >& kspace, 
        hoNDArray< std::complex<float> >& kerIm, hoNDArray< std::complex<float> >& ref2DT, hoNDArray< std::complex<float> >& coilMap2DT, hoNDArray< std::complex<float> >& res, size_t e)
    {
        try
        {
            bool print_iter = this->spirit_print_iter.value();

            size_t RO = kspace.get_size(0);
            size_t E1 = kspace.get_size(1);
            size_t E2 = kspace.get_size(2);
            size_t CHA = kspace.get_size(3);
            size_t N = kspace.get_size(4);
            size_t S = kspace.get_size(5);
            size_t SLC = kspace.get_size(6);

            size_t ref_N = kerIm.get_size(4);
            size_t ref_S = kerIm.get_size(5);

            hoNDArray< std::complex<float> > kspaceLinear(kspace);
            res = kspace;

            // detect whether random sampling is used
            bool use_random_sampling = false;
            std::vector<long long> sampled_step_size;
            long long n, e1;
            for (n=0; n<(long long)N; n++)
            {
                long long prev_sampled_line = -1;
                for (e1=0; e1<(long long)E1; e1++)
                {
                    if(std::abs(kspace(RO/2, e1, 0, 0, 0, 0, 0))>0 && std::abs(kspace(RO/2, e1, 0, CHA-1, 0, 0, 0))>0)
                    {
                        if(prev_sampled_line>0)
                        {
                            sampled_step_size.push_back(e1 - prev_sampled_line);
                        }

                        prev_sampled_line = e1;
                    }
                }
            }

            if(sampled_step_size.size()>4)
            {
                size_t s;
                for (s=2; s<sampled_step_size.size()-1; s++)
                {
                    if(sampled_step_size[s]!=sampled_step_size[s-1])
                    {
                        use_random_sampling = true;
                        break;
                    }
                }
            }

            if(use_random_sampling)
            {
                GDEBUG_STREAM("SPIRIT Non linear, random sampling is detected ... ");
            }

            Gadgetron::GadgetronTimer timer(false);

            boost::shared_ptr< hoNDArray< std::complex<float> > > coilMap;

            bool hasCoilMap = false;
            if (coilMap2DT.get_size(0) == RO && coilMap2DT.get_size(1) == E1 && coilMap2DT.get_size(3)==CHA)
            {
                if (ref_N < N)
                {
                    coilMap = boost::shared_ptr< hoNDArray< std::complex<float> > >(new hoNDArray< std::complex<float> >(RO, E1, CHA, coilMap2DT.begin()));
                }
                else
                {
                    coilMap = boost::shared_ptr< hoNDArray< std::complex<float> > >(new hoNDArray< std::complex<float> >(RO, E1, CHA, ref_N, coilMap2DT.begin()));
                }

                hasCoilMap = true;
            }

            hoNDArray<float> gFactor;
            float gfactorMedian = 0;

            float smallest_eigen_value(0);

            // -----------------------------------------------------
            // estimate gfactor
            // -----------------------------------------------------

            // mean over N
            hoNDArray< std::complex<float> > meanKSpace;

            if(calib_mode_[e]==ISMRMRD_interleaved)
            {
                Gadgetron::compute_averaged_data_N_S(kspace, true, true, true, meanKSpace);
            }
            else
            {
                Gadgetron::compute_averaged_data_N_S(ref2DT, true, true, true, meanKSpace);
            }

            if (!debug_folder_full_path_.empty()) { gt_exporter_.export_array_complex(meanKSpace, debug_folder_full_path_ + "spirit_nl_2DT_meanKSpace"); }

            hoNDArray< std::complex<float> > acsSrc(meanKSpace.get_size(0), meanKSpace.get_size(1), CHA, meanKSpace.begin());
            hoNDArray< std::complex<float> > acsDst(meanKSpace.get_size(0), meanKSpace.get_size(1), CHA, meanKSpace.begin());

            double grappa_reg_lamda = 0.0005;
            size_t kRO = 5;
            size_t kE1 = 4;

            hoNDArray< std::complex<float> > convKer;
            hoNDArray< std::complex<float> > kIm(RO, E1, CHA, CHA);

            Gadgetron::grappa2d_calib_convolution_kernel(acsSrc, acsDst, (size_t)this->acceFactorE1_[e], grappa_reg_lamda, kRO, kE1, convKer);
            Gadgetron::grappa2d_image_domain_kernel(convKer, RO, E1, kIm);

            hoNDArray< std::complex<float> > unmixC;

            if(hasCoilMap)
            {
                Gadgetron::grappa2d_unmixing_coeff(kIm, *coilMap, (size_t)acceFactorE1_[e], unmixC, gFactor);

                if (!debug_folder_full_path_.empty()) gt_exporter_.export_array(gFactor, debug_folder_full_path_ + "spirit_nl_2DT_gFactor");

                hoNDArray<float> gfactorSorted(gFactor);
                std::sort(gfactorSorted.begin(), gfactorSorted.begin()+RO*E1);
                gfactorMedian = gFactor((RO*E1 / 2));

                GDEBUG_STREAM("SPIRIT Non linear, the median gfactor is found to be : " << gfactorMedian);
            }

            if (!debug_folder_full_path_.empty()) gt_exporter_.export_array_complex(kIm, debug_folder_full_path_ + "spirit_nl_2DT_kIm");

            hoNDArray< std::complex<float> > complexIm;

            // compute linear solution as the initialization
            if(use_random_sampling)
            {
                if (this->perform_timing.value()) timer.start("SPIRIT Non linear, perform linear spirit recon ... ");
                this->perform_spirit_unwrapping(kspace, kerIm, kspaceLinear);
                if (this->perform_timing.value()) timer.stop();
            }
            else
            {
                if (this->perform_timing.value()) timer.start("SPIRIT Non linear, perform linear recon ... ");

                //size_t ref2DT_RO = ref2DT.get_size(0);
                //size_t ref2DT_E1 = ref2DT.get_size(1);

                //// mean over N
                //hoNDArray< std::complex<float> > meanKSpace;
                //Gadgetron::sum_over_dimension(ref2DT, meanKSpace, 4);

                //if (!debug_folder_full_path_.empty()) { gt_exporter_.export_array_complex(meanKSpace, debug_folder_full_path_ + "spirit_nl_2DT_meanKSpace"); }

                //hoNDArray< std::complex<float> > acsSrc(ref2DT_RO, ref2DT_E1, CHA, meanKSpace.begin());
                //hoNDArray< std::complex<float> > acsDst(ref2DT_RO, ref2DT_E1, CHA, meanKSpace.begin());

                //double grappa_reg_lamda = 0.0005;
                //size_t kRO = 5;
                //size_t kE1 = 4;

                //hoNDArray< std::complex<float> > convKer;
                //hoNDArray< std::complex<float> > kIm(RO, E1, CHA, CHA);

                //Gadgetron::grappa2d_calib_convolution_kernel(acsSrc, acsDst, (size_t)this->acceFactorE1_[e], grappa_reg_lamda, kRO, kE1, convKer);
                //Gadgetron::grappa2d_image_domain_kernel(convKer, RO, E1, kIm);

                //if (!debug_folder_full_path_.empty()) gt_exporter_.export_array_complex(kIm, debug_folder_full_path_ + "spirit_nl_2DT_kIm");

                Gadgetron::hoNDFFT<float>::instance()->ifft2c(kspace, complex_im_recon_buf_);
                if (!debug_folder_full_path_.empty()) gt_exporter_.export_array_complex(complex_im_recon_buf_, debug_folder_full_path_ + "spirit_nl_2DT_aliasedImage");

                hoNDArray< std::complex<float> > resKSpace(RO, E1, CHA, N);
                hoNDArray< std::complex<float> > aliasedImage(RO, E1, CHA, N, complex_im_recon_buf_.begin());
                Gadgetron::grappa2d_image_domain_unwrapping_aliased_image(aliasedImage, kIm, resKSpace);

                if (!debug_folder_full_path_.empty()) gt_exporter_.export_array_complex(resKSpace, debug_folder_full_path_ + "spirit_nl_2DT_linearImage");

                Gadgetron::hoNDFFT<float>::instance()->fft2c(resKSpace);

                memcpy(kspaceLinear.begin(), resKSpace.begin(), resKSpace.get_number_of_bytes());

                Gadgetron::apply_unmix_coeff_aliased_image(aliasedImage, unmixC, complexIm);

                if (this->perform_timing.value()) timer.stop();
            }

            if (!debug_folder_full_path_.empty()) gt_exporter_.export_array_complex(kspaceLinear, debug_folder_full_path_ + "spirit_nl_2DT_kspaceLinear");

            if(hasCoilMap)
            {
                if(N>=spirit_reg_minimal_num_images_for_noise_floor.value())
                {
                    // estimate the noise level

                    if(use_random_sampling)
                    {
                        Gadgetron::hoNDFFT<float>::instance()->ifft2c(kspaceLinear, complex_im_recon_buf_);

                        hoNDArray< std::complex<float> > complexLinearImage(RO, E1, CHA, N, complex_im_recon_buf_.begin());

                        Gadgetron::coil_combine(complexLinearImage, *coilMap, 2, complexIm);
                    }

                    if (!debug_folder_full_path_.empty()) gt_exporter_.export_array_complex(complexIm, debug_folder_full_path_ + "spirit_nl_2DT_linearImage_complexIm");

                    // if N is sufficiently large, we can estimate the noise floor by the smallest eigen value
                    hoMatrix< std::complex<float> > data;
                    data.createMatrix(RO*E1, N, complexIm.begin(), false);

                    hoNDArray< std::complex<float> > eigenVectors, eigenValues, eigenVectorsPruned;

                    // compute eigen
                    hoNDKLT< std::complex<float> > klt;
                    klt.prepare(data, (size_t)1, (size_t)0);
                    klt.eigen_value(eigenValues);

                    if (this->verbose.value())
                    {
                        GDEBUG_STREAM("SPIRIT Non linear, computes eigen values for all 2D kspaces ... ");
                        eigenValues.print(std::cout);

                        for (size_t i = 0; i<eigenValues.get_size(0); i++)
                        {
                            GDEBUG_STREAM(i << " = " << eigenValues(i));
                        }
                    }

                    smallest_eigen_value = std::sqrt( std::abs(eigenValues(N - 1).real()) / (RO*E1) );
                    GDEBUG_STREAM("SPIRIT Non linear, the smallest eigen value is : " << smallest_eigen_value);
                }
            }

            // perform nonlinear reconstruction
            {
                boost::shared_ptr<hoNDArray< std::complex<float> > > ker(new hoNDArray< std::complex<float> >(RO, E1, CHA, CHA, ref_N, kerIm.begin()));
                boost::shared_ptr<hoNDArray< std::complex<float> > > acq(new hoNDArray< std::complex<float> >(RO, E1, CHA, N, kspace.begin()));
                hoNDArray< std::complex<float> > kspaceInitial(RO, E1, CHA, N, kspaceLinear.begin());
                hoNDArray< std::complex<float> > res2DT(RO, E1, CHA, N, res.begin());

                if (this->spirit_data_fidelity_lamda.value() > 0)
                {
                    GDEBUG_STREAM("Start the NL SPIRIT data fidelity iteration - regularization strength : " << this->spirit_image_reg_lamda.value()
                                    << " - number of iteration : "                      << this->spirit_nl_iter_max.value()
                                    << " - proximity across cha : "                     << this->spirit_reg_proximity_across_cha.value()
                                    << " - redundant dimension weighting ratio : "      << this->spirit_reg_N_weighting_ratio.value()
                                    << " - using coil sen map : "                       << this->spirit_reg_use_coil_sen_map.value()
                                    << " - iter thres : "                               << this->spirit_nl_iter_thres.value()
                                    << " - wavelet name : "                             << this->spirit_reg_name.value()
                                    );

                    typedef hoGdSolver< hoNDArray< std::complex<float> >, hoWavelet2DTOperator< std::complex<float> > > SolverType;
                    SolverType solver;
                    solver.iterations_ = this->spirit_nl_iter_max.value();
                    solver.set_output_mode(this->spirit_print_iter.value() ? SolverType::OUTPUT_VERBOSE : SolverType::OUTPUT_SILENT);
                    solver.grad_thres_ = this->spirit_nl_iter_thres.value();

                    if(spirit_reg_estimate_noise_floor.value() && std::abs(smallest_eigen_value)>0)
                    {
                        solver.scale_factor_ = smallest_eigen_value;
                        solver.proximal_strength_ratio_ = this->spirit_image_reg_lamda.value() * gfactorMedian;

                        GDEBUG_STREAM("SPIRIT Non linear, eigen value is used to derive the regularization strength : " << solver.proximal_strength_ratio_ << " - smallest eigen value : " << solver.scale_factor_);
                    }
                    else
                    {
                        solver.proximal_strength_ratio_ = this->spirit_image_reg_lamda.value();
                    }

                    boost::shared_ptr< hoNDArray< std::complex<float> > > x0 = boost::make_shared< hoNDArray< std::complex<float> > >(kspaceInitial);
                    solver.set_x0(x0);

                    // parallel imaging term
                    std::vector<size_t> dims;
                    acq->get_dimensions(dims);
                    hoSPIRIT2DTDataFidelityOperator< std::complex<float> > spirit(&dims);
                    spirit.set_forward_kernel(*ker, false);
                    spirit.set_acquired_points(*acq);

                    // image reg term
                    hoWavelet2DTOperator< std::complex<float> > wav3DOperator(&dims);
                    wav3DOperator.set_acquired_points(*acq);
                    wav3DOperator.scale_factor_first_dimension_ = this->spirit_reg_RO_weighting_ratio.value();
                    wav3DOperator.scale_factor_second_dimension_ = this->spirit_reg_E1_weighting_ratio.value();
                    wav3DOperator.scale_factor_third_dimension_ = this->spirit_reg_N_weighting_ratio.value();
                    wav3DOperator.with_approx_coeff_ = !this->spirit_reg_keep_approx_coeff.value();
                    wav3DOperator.change_coeffcients_third_dimension_boundary_ = !this->spirit_reg_keep_redundant_dimension_coeff.value();
                    wav3DOperator.proximity_across_cha_ = this->spirit_reg_proximity_across_cha.value();
                    wav3DOperator.no_null_space_ = true;
                    wav3DOperator.input_in_kspace_ = true;
                    wav3DOperator.select_wavelet(this->spirit_reg_name.value());

                    if (this->spirit_reg_use_coil_sen_map.value() && hasCoilMap)
                    {
                        wav3DOperator.coil_map_ = *coilMap;
                    }

                    // set operators

                    solver.oper_system_ = &spirit;
                    solver.oper_reg_ = &wav3DOperator;

                    if (this->perform_timing.value()) timer.start("NonLinear SPIRIT solver for 2DT with data fidelity ... ");
                    solver.solve(*acq, res2DT);
                    if (this->perform_timing.value()) timer.stop();

                    if (!debug_folder_full_path_.empty()) gt_exporter_.export_array_complex(res2DT, debug_folder_full_path_ + "spirit_nl_2DT_data_fidelity_res");
                }
                else
                {
                    GDEBUG_STREAM("Start the NL SPIRIT iteration with regularization strength : "<< this->spirit_image_reg_lamda.value()
                                    << " - number of iteration : " << this->spirit_nl_iter_max.value()
                                    << " - proximity across cha : " << this->spirit_reg_proximity_across_cha.value()
                                    << " - redundant dimension weighting ratio : " << this->spirit_reg_N_weighting_ratio.value()
                                    << " - using coil sen map : " << this->spirit_reg_use_coil_sen_map.value()
                                    << " - iter thres : " << this->spirit_nl_iter_thres.value()
                                    << " - wavelet name : " << this->spirit_reg_name.value()
                                    );

                    typedef hoGdSolver< hoNDArray< std::complex<float> >, hoWavelet2DTOperator< std::complex<float> > > SolverType;
                    SolverType solver;
                    solver.iterations_ = this->spirit_nl_iter_max.value();
                    solver.set_output_mode(this->spirit_print_iter.value() ? SolverType::OUTPUT_VERBOSE : SolverType::OUTPUT_SILENT);
                    solver.grad_thres_ = this->spirit_nl_iter_thres.value();

                    if(spirit_reg_estimate_noise_floor.value() && std::abs(smallest_eigen_value)>0)
                    {
                        solver.scale_factor_ = smallest_eigen_value;
                        solver.proximal_strength_ratio_ = this->spirit_image_reg_lamda.value() * gfactorMedian;

                        GDEBUG_STREAM("SPIRIT Non linear, eigen value is used to derive the regularization strength : " << solver.proximal_strength_ratio_ << " - smallest eigen value : " << solver.scale_factor_);
                    }
                    else
                    {
                        solver.proximal_strength_ratio_ = this->spirit_image_reg_lamda.value();
                    }

                    boost::shared_ptr< hoNDArray< std::complex<float> > > x0 = boost::make_shared< hoNDArray< std::complex<float> > >(kspaceInitial);
                    solver.set_x0(x0);

                    // parallel imaging term
                    std::vector<size_t> dims;
                    acq->get_dimensions(dims);

                    hoSPIRIT2DTOperator< std::complex<float> > spirit(&dims);
                    spirit.set_forward_kernel(*ker, false);
                    spirit.set_acquired_points(*acq);
                    spirit.no_null_space_ = true;
                    spirit.use_non_centered_fft_ = false;

                    // image reg term
                    std::vector<size_t> dim;
                    acq->get_dimensions(dim);

                    hoWavelet2DTOperator< std::complex<float> > wav3DOperator(&dim);
                    wav3DOperator.set_acquired_points(*acq);
                    wav3DOperator.scale_factor_first_dimension_ = this->spirit_reg_RO_weighting_ratio.value();
                    wav3DOperator.scale_factor_second_dimension_ = this->spirit_reg_E1_weighting_ratio.value();
                    wav3DOperator.scale_factor_third_dimension_ = this->spirit_reg_N_weighting_ratio.value();
                    wav3DOperator.with_approx_coeff_ = !this->spirit_reg_keep_approx_coeff.value();
                    wav3DOperator.change_coeffcients_third_dimension_boundary_ = !this->spirit_reg_keep_redundant_dimension_coeff.value();
                    wav3DOperator.proximity_across_cha_ = this->spirit_reg_proximity_across_cha.value();
                    wav3DOperator.no_null_space_ = true;
                    wav3DOperator.input_in_kspace_ = true;
                    wav3DOperator.select_wavelet(this->spirit_reg_name.value());

                    if (this->spirit_reg_use_coil_sen_map.value() && hasCoilMap)
                    {
                        wav3DOperator.coil_map_ = *coilMap;
                    }

                    // set operators
                    solver.oper_system_ = &spirit;
                    solver.oper_reg_ = &wav3DOperator;

                    // set call back
                    solverCallBack cb;
                    cb.solver_ = &solver;
                    solver.call_back_ = &cb;

                    hoNDArray< std::complex<float> > b(kspaceInitial);
                    Gadgetron::clear(b);

                    if (this->perform_timing.value()) timer.start("NonLinear SPIRIT solver for 2DT ... ");
                    solver.solve(b, res2DT);
                    if (this->perform_timing.value()) timer.stop();

                    if (!debug_folder_full_path_.empty()) gt_exporter_.export_array_complex(res2DT, debug_folder_full_path_ + "spirit_nl_2DT_res");

                    spirit.restore_acquired_kspace(kspace, res2DT);

                    if (!debug_folder_full_path_.empty()) gt_exporter_.export_array_complex(res2DT, debug_folder_full_path_ + "spirit_nl_2DT_res_restored");
                }
            }
        }
        catch (...)
        {
            GADGET_THROW("Errors happened in GenericReconCartesianNonLinearSpirit2DTGadget::perform_nonlinear_spirit_unwrapping(...) ... ");
        }
    }
    void MultiChannelCartesianGrappaReconGadget::perform_calib(IsmrmrdReconBit& recon_bit, ReconObjType& recon_obj, size_t e)
    {
        try
        {
            size_t RO = recon_bit.data_.data_.get_size(0);
            size_t E1 = recon_bit.data_.data_.get_size(1);
            size_t E2 = recon_bit.data_.data_.get_size(2);

            hoNDArray< std::complex<float> >& src = recon_obj.ref_calib_;
            hoNDArray< std::complex<float> >& dst = recon_obj.ref_calib_;

            size_t ref_RO = src.get_size(0);
            size_t ref_E1 = src.get_size(1);
            size_t ref_E2 = src.get_size(2);
            size_t srcCHA = src.get_size(3);
            size_t ref_N = src.get_size(4);
            size_t ref_S = src.get_size(5);
            size_t ref_SLC = src.get_size(6);

            size_t dstCHA = dst.get_size(3);

            recon_obj.unmixing_coeff_.create(RO, E1, E2, srcCHA, ref_N, ref_S, ref_SLC);
            recon_obj.gfactor_.create(RO, E1, E2, 1, ref_N, ref_S, ref_SLC);

            Gadgetron::clear(recon_obj.unmixing_coeff_);
            Gadgetron::clear(recon_obj.gfactor_);

            if (acceFactorE1_[e] <= 1 && acceFactorE2_[e] <= 1)
            {
                Gadgetron::conjugate(recon_obj.coil_map_, recon_obj.unmixing_coeff_);
            }
            else
            {
                // allocate buffer for kernels
                size_t kRO = grappa_kSize_RO.value();
                size_t kNE1 = grappa_kSize_E1.value();
                size_t kNE2 = grappa_kSize_E2.value();

                size_t convKRO(1), convKE1(1), convKE2(1);

                if (E2 > 1)
                {
                    std::vector<int> kE1, oE1;
                    std::vector<int> kE2, oE2;
                    bool fitItself = true;
                    grappa3d_kerPattern(kE1, oE1, kE2, oE2, convKRO, convKE1, convKE2, (size_t)acceFactorE1_[e], (size_t)acceFactorE2_[e], kRO, kNE1, kNE2, fitItself);
                }
                else
                {
                    std::vector<int> kE1, oE1;
                    bool fitItself = true;

                    Gadgetron::grappa2d_kerPattern(kE1, oE1, convKRO, convKE1, (size_t)acceFactorE1_[e], kRO, kNE1, fitItself);
                    recon_obj.kernelIm_.create(RO, E1, 1, srcCHA, dstCHA, ref_N, ref_S, ref_SLC);
                }

                recon_obj.kernel_.create(convKRO, convKE1, convKE2, srcCHA, dstCHA, ref_N, ref_S, ref_SLC);
                Gadgetron::clear(recon_obj.kernel_);
                Gadgetron::clear(recon_obj.kernelIm_);

                long long num = ref_N*ref_S*ref_SLC;

                long long ii;

#pragma omp parallel for default(none) private(ii) shared(src, dst, recon_obj, e, num, ref_N, ref_S, ref_RO, ref_E1, ref_E2, RO, E1, E2, dstCHA, srcCHA, convKRO, convKE1, convKE2, kRO, kNE1, kNE2) if(num>1)
                for (ii = 0; ii < num; ii++)
                {
                    size_t slc = ii / (ref_N*ref_S);
                    size_t s = (ii - slc*ref_N*ref_S) / (ref_N);
                    size_t n = ii - slc*ref_N*ref_S - s*ref_N;

                    std::stringstream os;
                    os << "n" << n << "_s" << s << "_slc" << slc << "_encoding_" << e;
                    std::string suffix = os.str();

                    std::complex<float>* pSrc = &(src(0, 0, 0, 0, n, s, slc));
                    hoNDArray< std::complex<float> > ref_src(ref_RO, ref_E1, ref_E2, srcCHA, pSrc);

                    std::complex<float>* pDst = &(dst(0, 0, 0, 0, n, s, slc));
                    hoNDArray< std::complex<float> > ref_dst(ref_RO, ref_E1, ref_E2, dstCHA, pDst);

                    // -----------------------------------

                    if (E2 > 1)
                    {
                        hoNDArray< std::complex<float> > ker(convKRO, convKE1, convKE2, srcCHA, dstCHA, &(recon_obj.kernel_(0, 0, 0, 0, 0, n, s, slc)));
                        Gadgetron::grappa3d_calib_convolution_kernel(ref_src, ref_dst, (size_t)acceFactorE1_[e], (size_t)acceFactorE2_[e], grappa_reg_lamda.value(), grappa_calib_over_determine_ratio.value(), kRO, kNE1, kNE2, ker);

                        hoNDArray< std::complex<float> > coilMap(RO, E1, E2, dstCHA, &(recon_obj.coil_map_(0, 0, 0, 0, n, s, slc)));
                        hoNDArray< std::complex<float> > unmixC(RO, E1, E2, srcCHA, &(recon_obj.unmixing_coeff_(0, 0, 0, 0, n, s, slc)));
                        hoNDArray<float> gFactor(RO, E1, E2, 1, &(recon_obj.gfactor_(0, 0, 0, 0, n, s, slc)));
                        Gadgetron::grappa3d_unmixing_coeff(ker, coilMap, (size_t)acceFactorE1_[e], (size_t)acceFactorE2_[e], unmixC, gFactor);

                    }
                    else
                    {
                        hoNDArray< std::complex<float> > acsSrc(ref_RO, ref_E1, srcCHA, const_cast< std::complex<float>*>(ref_src.begin()));
                        hoNDArray< std::complex<float> > acsDst(ref_RO, ref_E1, dstCHA, const_cast< std::complex<float>*>(ref_dst.begin()));

                        hoNDArray< std::complex<float> > convKer(convKRO, convKE1, srcCHA, dstCHA, &(recon_obj.kernel_(0, 0, 0, 0, 0, n, s, slc)));
                        hoNDArray< std::complex<float> > kIm(RO, E1, srcCHA, dstCHA, &(recon_obj.kernelIm_(0, 0, 0, 0, 0, n, s, slc)));

                        Gadgetron::grappa2d_calib_convolution_kernel(acsSrc, acsDst, (size_t)acceFactorE1_[e], grappa_reg_lamda.value(), kRO, kNE1, convKer);
                        Gadgetron::grappa2d_image_domain_kernel(convKer, RO, E1, kIm);

                        hoNDArray< std::complex<float> > coilMap(RO, E1, dstCHA, &(recon_obj.coil_map_(0, 0, 0, 0, n, s, slc)));
                        hoNDArray< std::complex<float> > unmixC(RO, E1, srcCHA, &(recon_obj.unmixing_coeff_(0, 0, 0, 0, n, s, slc)));
                        hoNDArray<float> gFactor;

                        Gadgetron::grappa2d_unmixing_coeff(kIm, coilMap, (size_t)acceFactorE1_[e], unmixC, gFactor);
                        memcpy(&(recon_obj.gfactor_(0, 0, 0, 0, n, s, slc)), gFactor.begin(), gFactor.get_number_of_bytes());

                    }

                    // -----------------------------------
                }
            }
        }
        catch (...)
        {
            GADGET_THROW("Errors happened in MultiChannelCartesianGrappaReconGadget::perform_calib(...) ... ");
        }
    }