boost::shared_ptr<cuNDArray<float_complext> > gpuBufferSensePrepGadget::reconstruct_regularization(
		cuNDArray<float_complext>* data, cuNDArray<floatd2>* traj, cuNDArray<float>* dcw, size_t ncoils ) {

	if (dcw) { //We have density compensation, so we can get away with gridding

		cuNFFT_plan<float,2> plan(from_std_vector<size_t,2>(image_dims_recon_),image_dims_recon_os_,kernel_width_);
		std::vector<size_t> csm_dims = image_dims_recon_;
		csm_dims.push_back(ncoils);
		auto result = new cuNDArray<float_complext>(csm_dims);
		GDEBUG("Coils %i \n\n",ncoils);

		std::vector<size_t> flat_dims = {traj->get_number_of_elements()};
		cuNDArray<floatd2> flat_traj(flat_dims,traj->get_data_ptr());
		GDEBUG("traj: %i data %i\n",traj->get_number_of_elements(),data->get_number_of_elements());
		GDEBUG("Preprocessing\n\n");
		plan.preprocess(&flat_traj,cuNFFT_plan<float,2>::NFFT_PREP_NC2C);
		GDEBUG("Computing\n\n");
		plan.compute(data,result,dcw,cuNFFT_plan<float,2>::NFFT_BACKWARDS_NC2C);

		return boost::shared_ptr<cuNDArray<float_complext>>(result);

	} else { //No density compensation, we have to do iterative reconstruction.
		std::vector<size_t> csm_dims = image_dims_recon_;
		csm_dims.push_back(ncoils);

		auto E = boost::make_shared<cuNFFTOperator<float,2>>();

		E->setup(from_std_vector<size_t,2>(image_dims_recon_),image_dims_recon_os_,kernel_width_);
		std::vector<size_t> flat_dims = {traj->get_number_of_elements()};
		cuNDArray<floatd2> flat_traj(flat_dims,traj->get_data_ptr());

		E->set_domain_dimensions(&csm_dims);
		cuCgSolver<float_complext> solver;
		solver.set_max_iterations(0);
		solver.set_encoding_operator(E);
		solver.set_output_mode(cuCgSolver<float_complext>::OUTPUT_VERBOSE);
		E->set_codomain_dimensions(data->get_dimensions().get());
		E->preprocess(&flat_traj);
		auto res = solver.solve(data);
		return res;
	}
}
boost::shared_ptr<cuNDArray<float_complext> > gpuCSICoilEstimationGadget::calculate_CSM(
		cuNDArray<float_complext>* data, cuNDArray<floatd2>* traj, cuNDArray<float>* dcw ) {


	if (dcw) { //We have density compensation, so we can get away with gridding

		cuNFFT_plan<float,2> plan(from_std_vector<size_t,2>(img_size),from_std_vector<size_t,2>(img_size)*size_t(2),kernel_width_);
		std::vector<size_t> csm_dims = img_size;
		csm_dims.push_back(coils);
		cuNDArray<float_complext> tmp(csm_dims);
		GDEBUG("Coils %i \n\n",tmp.get_size(2));
		std::vector<size_t> flat_dims = {traj->get_number_of_elements()};
		cuNDArray<floatd2> flat_traj(flat_dims,traj->get_data_ptr());

		std::vector<size_t> spiral_dims{data->get_size(0),data->get_size(1)}; //Trajectories, coils
		cuNDArray<complext<float>> second_spiral(spiral_dims,data->get_data_ptr()+spiral_dims[0]*spiral_dims[1]*0);
		std::vector<size_t> spiral_traj_dims{spiral_dims[0]};
		cuNDArray<floatd2> spiral_traj(spiral_traj_dims,traj->get_data_ptr()+spiral_dims[0]*0);
		cuNDArray<float> spiral_dcw(spiral_traj_dims,dcw->get_data_ptr()+spiral_dims[0]*0);

		GDEBUG("Preprocessing\n\n");
		plan.preprocess(&spiral_traj,cuNFFT_plan<float,2>::NFFT_PREP_NC2C);
		GDEBUG("Computing\n\n");
		plan.compute(&second_spiral,&tmp,&spiral_dcw,cuNFFT_plan<float,2>::NFFT_BACKWARDS_NC2C);
		auto tmp_abs = abs(&tmp);

		return estimate_b1_map<float,2>(&tmp);

	} else { //No density compensation, we have to do iterative reconstruction.
		std::vector<size_t> csm_dims = img_size;
		csm_dims.push_back(coils);

		auto E = boost::make_shared<cuNFFTOperator<float,2>>();

		E->setup(from_std_vector<size_t,2>(img_size),from_std_vector<size_t,2>(img_size)*size_t(2),kernel_width_);
		std::vector<size_t> flat_dims = {traj->get_number_of_elements()};
		cuNDArray<floatd2> flat_traj(flat_dims,traj->get_data_ptr());

		E->set_domain_dimensions(&csm_dims);
		cuCgSolver<float_complext> solver;
		solver.set_max_iterations(20);
		solver.set_encoding_operator(E);
		std::vector<size_t> spiral_dims{data->get_size(0),data->get_size(1)}; //Trajectories, coils
		cuNDArray<complext<float>> second_spiral(spiral_dims,data->get_data_ptr()+spiral_dims[0]*spiral_dims[1]*0);
		E->set_codomain_dimensions(&spiral_dims);
		std::vector<size_t> spiral_traj_dims{spiral_dims[0]};
		cuNDArray<floatd2> spiral_traj(spiral_traj_dims,traj->get_data_ptr()+spiral_dims[0]*0);
		E->preprocess(&spiral_traj);
		auto tmp = solver.solve(&second_spiral);
		auto tmp_abs = abs(tmp.get());


		auto res = estimate_b1_map<float,2>(tmp.get());
		//fill(res.get(),float_complext(1,0));
		//auto res= boost::make_shared<cuNDArray<float_complext>>(csm_dims);
		//fill(res.get(),float_complext(1,0));
		return res;

	}

}