int gpuBufferSensePrepGadget::process(
		GadgetContainerMessage<IsmrmrdReconData>* m1) {

	IsmrmrdReconData* recondata= m1->getObjectPtr();

	if (recondata->rbit_.size() != 1){
		throw std::runtime_error("gpuBufferSensePrepGadget only support a single encoding space");
	}

	IsmrmrdReconBit& reconbit = recondata->rbit_[0];

	GenericReconJob job;

	IsmrmrdDataBuffered* buffer = &reconbit.data_;

	//Use reference data if available.
	if (reconbit.ref_){
		GDEBUG("Using Reference data for CSM estimation\n");
		buffer = reconbit.ref_.get_ptr();
	}

	size_t ncoils = buffer->headers_[0].active_channels;


	std::vector<size_t> new_order = {0,1,2,4,5,6,3};

	boost::shared_ptr<cuNDArray<float>> dcw;
	boost::shared_ptr<cuNDArray<floatd2>> traj;
	if (buffer->trajectory_){
		auto & trajectory = *buffer->trajectory_;

		if (buffer->headers_[0].trajectory_dimensions == 3){
			auto traj_dcw = separate_traj_and_dcw(&trajectory);
			dcw = boost::make_shared<cuNDArray<float>>(std::get<1>(traj_dcw).get());
			traj = boost::make_shared<cuNDArray<floatd2>>(std::get<0>(traj_dcw).get());
		} else if (buffer->headers_[0].trajectory_dimensions == 2){
			auto old_traj_dims = *trajectory.get_dimensions();
			std::vector<size_t> traj_dims (old_traj_dims.begin()+1,old_traj_dims.end()); //Remove first element
			hoNDArray<floatd2> tmp_traj(traj_dims,(floatd2*)trajectory.get_data_ptr());
			traj = boost::make_shared<cuNDArray<floatd2>>(tmp_traj);
		} else {
			throw std::runtime_error("Unsupported number of trajectory dimensions");
		}
	}
	{
		auto tmpdim = *buffer->data_.get_dimensions();
		for (auto dim : tmpdim)
			std::cout << dim << " ";
		std::cout << std::endl;
		auto permuted = permute((hoNDArray<float_complext>*)&buffer->data_,&new_order);
		cuNDArray<float_complext> data(*permuted);
		if (dcw){
			float scale_factor = float(prod(image_dims_recon_os_))/asum(dcw.get());
			*dcw *= scale_factor;
		}

		auto reg_images = reconstruct_regularization(&data,traj.get(),dcw.get(),ncoils);
		//reg_images->squeeze();

		auto csm = estimate_b1_map<float,2>(reg_images.get());
		*reg_images *= *csm;
		auto combined = sum(reg_images.get(),reg_images->get_number_of_dimensions()-1);

		auto tmp_combined = abs(reg_images.get());
		auto tmpcsm = abs(csm.get());
		job.csm_host_ = csm->to_host();
		job.reg_host_ = combined->to_host();
	}


	IsmrmrdDataBuffered* mainbuffer = &reconbit.data_;

	//Permute as Sensegadgets expect last dimension to be coils. *Sigh*
	job.dat_host_ =permute((hoNDArray<float_complext>*)&mainbuffer->data_,&new_order);

	if (mainbuffer->trajectory_){
		auto & trajectory = *mainbuffer->trajectory_;
		if (mainbuffer->headers_[0].trajectory_dimensions >2 ){
			auto traj_dcw = separate_traj_and_dcw(&trajectory);
			job.tra_host_ = std::get<0>(traj_dcw);
			job.dcw_host_ = std::get<1>(traj_dcw);
		} else if (mainbuffer->headers_[0].trajectory_dimensions == 2){
			auto old_traj_dims = *trajectory.get_dimensions();
			std::vector<size_t> traj_dims (old_traj_dims.begin()+1,old_traj_dims.end()); //Remove first element
			hoNDArray<floatd2> tmp_traj(traj_dims,(floatd2*)trajectory.get_data_ptr());
			job.tra_host_ = boost::make_shared<hoNDArray<floatd2>>(tmp_traj);
			auto host_dcw = boost::make_shared<hoNDArray<float>>(traj_dims);
			fill(host_dcw.get(),1.0f);
			job.dcw_host_ = host_dcw;

		} else {
			throw std::runtime_error("Unsupported number of trajectory dimensions");
		}
	}
	{
		float scale_factor = float(prod(image_dims_recon_os_))/asum(job.dcw_host_.get());
		*job.dcw_host_  *= scale_factor;
	}

	auto data_dims = *job.dat_host_->get_dimensions();
	//Sense gadgets expect only 1 dimension for encoding, so collapse the first
	size_t elements = std::accumulate(data_dims.begin(),data_dims.end()-1,1,std::multiplies<size_t>());
	std::vector<size_t> new_data_dims = {elements,data_dims.back()};
	job.dat_host_->reshape(&new_data_dims);

	size_t traj_elements = job.tra_host_->get_number_of_elements();
	auto traj_dims = *job.tra_host_->get_dimensions();

	size_t kpoints_per_frame = traj_dims[0]*profiles_per_frame_;
	if (traj_elements%kpoints_per_frame){
		std::stringstream ss;
		ss << "Profiles per frame (" << profiles_per_frame_ << ") must be a divisor of total number of profiles (" << traj_elements/traj_dims[0] << ")";
		throw std::runtime_error(ss.str());
	}
	std::vector<size_t> new_traj_dims ={kpoints_per_frame,traj_elements/kpoints_per_frame};

	job.tra_host_->reshape(&new_traj_dims);
	job.dcw_host_->reshape(&new_traj_dims);


	//Let's invent some image headers!
	size_t total_frames = profiles_per_frame_ > 0 ? mainbuffer->headers_.get_number_of_elements()/profiles_per_frame_ : 1 ;
	job.image_headers_ = boost::shared_array<ISMRMRD::ImageHeader>(new ISMRMRD::ImageHeader[total_frames]);
	for (size_t i = 0; i < total_frames; i++){
		job.image_headers_[i] = create_image_header(mainbuffer->headers_[i*profiles_per_frame_],mainbuffer->sampling_,i,total_frames);
	}


	m1->release(); //We be done with everything now.

	auto header_message = new GadgetContainerMessage<ISMRMRD::ImageHeader>(job.image_headers_[0]);

	auto job_message = new GadgetContainerMessage<GenericReconJob>(job);

	header_message->cont(job_message);

	if (!this->next()->putq(header_message)){
		GDEBUG("Failed to put message on que");
		return GADGET_FAIL;
	} else
		return GADGET_OK;



	//cuNDArray<float_complext> reg_images = reconstruct_regularization(reconbit.data_);
}
int gpuCSICoilEstimationGadget::process(
		GadgetContainerMessage<IsmrmrdAcquisitionBucket>* m1) {
	IsmrmrdAcquisitionBucket* bucket = m1->getObjectPtr();

	auto cm1 = new GadgetContainerMessage<cuSenseData>();
	auto senseData = cm1->getObjectPtr();

	coils = bucket->data_.front().head_->getObjectPtr()->active_channels;
	GDEBUG("Active channels %i \n",coils);


	{

		hoNDArray<std::complex<float>> * ho_data;
		hoNDArray<float>* ho_traj;

		std::tie(ho_data,ho_traj) = combine_data(bucket->data_);

		if (skip_lines_ > 0){
			auto cal_dims = *ho_data->get_dimensions();
			cal_dims.back() = skip_lines_;
			auto data_dims = *ho_data->get_dimensions();
			data_dims.back() -= skip_lines_;


			hoNDArray<float_complext> cal_view(cal_dims,(float_complext*) ho_data->get_data_ptr());
			senseData->freq_calibration = boost::make_shared<cuNDArray<float_complext>>(cal_view);
			senseData->freq_calibration->squeeze();
			hoNDArray<float_complext> data_view(data_dims,(float_complext*)ho_data->get_data_ptr()+cal_view.get_number_of_elements());
			senseData->data = boost::make_shared<cuNDArray<float_complext>>(data_view);
		} else {

			senseData->data = boost::make_shared<cuNDArray<float_complext>>(reinterpret_cast<hoNDArray<float_complext>*>(ho_data));
		}


		if (ho_traj->get_size(0) > 2){ //We have dcw
			auto traj_dcw = separate_traj_and_dcw(ho_traj);
			senseData->traj = boost::make_shared<cuNDArray<floatd2>>(*std::get<0>(traj_dcw));
			senseData->dcw = boost::make_shared<cuNDArray<float>>(*std::get<1>(traj_dcw));
		} else {
			std::vector<size_t> tdims = *ho_traj->get_dimensions();
			std::vector<size_t> tmp_dim(tdims.begin()+1,tdims.end());
			hoNDArray<floatd2> tmp(tmp_dim,reinterpret_cast<floatd2*>(ho_traj->get_data_ptr()));
			senseData->traj = boost::make_shared<cuNDArray<floatd2>>(tmp);
		}

		delete ho_data;
		delete ho_traj;
	}


	//Remove Initial Spirals

	boost::shared_ptr< cuNDArray<float_complext> >  ref_data;
	boost::shared_ptr< cuNDArray<floatd2> > ref_traj;
	boost::shared_ptr<cuNDArray<float> > ref_dcw;


	if (bucket->ref_.empty()){
		ref_data = senseData->data;
		ref_traj = senseData->traj;
		ref_dcw = senseData->dcw;
	} else {

		hoNDArray<std::complex<float>> * ho_data;
		hoNDArray<float>* ho_traj;
		std::tie(ho_data,ho_traj) = combine_data(bucket->ref_);

		ref_data = boost::make_shared<cuNDArray<float_complext>>(reinterpret_cast<hoNDArray<float_complext>*>(ho_data));
		if (ho_traj->get_size(0) > 2){
			auto traj_dcw = separate_traj_and_dcw(ho_traj);
			ref_traj =boost::make_shared<cuNDArray<floatd2>>(*std::get<0>(traj_dcw));
			ref_dcw = boost::make_shared<cuNDArray<float>>(*std::get<1>(traj_dcw));
		} else {
			std::vector<size_t> tdims = *ho_traj->get_dimensions();
			std::vector<size_t> tmp_dim(tdims.begin()+1,tdims.end());
			hoNDArray<floatd2> tmp(tmp_dim,reinterpret_cast<floatd2*>(ho_traj->get_data_ptr()));
			ref_traj = boost::make_shared<cuNDArray<floatd2>>(tmp);
		}
		delete ho_data;
		delete ho_traj;


	}

	senseData->csm = calculate_CSM(ref_data.get(),ref_traj.get(),ref_dcw.get());


	if (this->next()->putq(cm1) == GADGET_FAIL){
		GERROR("Failed to put message on que\n");
		return GADGET_FAIL;
	}

	return GADGET_OK;


}