예제 #1
0
template<unsigned int N> static boost::shared_ptr<hoNDArray<float_complext> > gadgetronNFFT_instance(hoNDArray<float_complext> * input_data, hoNDArray<vector_td<float,N> >* trajectory,
		vector_td<uint64_t,N> matrix_size, float W, hoNDArray<float>* dcw = nullptr){

	cuNDArray<float_complext> cuInput(*input_data);
	cuNDArray<vector_td<float,N> > cu_traj(*trajectory);
	auto op = boost::make_shared<cuNFFTOperator<float,N>>();
	op->setup(matrix_size,matrix_size*size_t(2),W);
	op->preprocess(&cu_traj);
	if (dcw){
		auto cu_dcw = boost::make_shared<cuNDArray<float>>(*dcw);
		sqrt_inplace(cu_dcw.get());
		op->set_dcw(cu_dcw);

		cuInput *= *cu_dcw;
	}
	std::vector<size_t> out_dims(&matrix_size[0],&matrix_size[N]);
	out_dims.push_back(cuInput.get_number_of_elements()/cu_traj.get_number_of_elements());
/*
	op->set_domain_dimensions(&out_dims);
	op->set_codomain_dimensions(cuInput.get_dimensions().get());
	cuCgSolver<float_complext> cg;
	cg.set_max_iterations(10);
	cg.set_tc_tolerance(1e-8);
	cg.set_encoding_operator(op);
	auto output = cg.solve(&cuInput);
*/
	cuNDArray<float_complext> output(out_dims);
	op->mult_MH(&cuInput,&output);
	return output.to_host();
}
int SimpleReconGadget::process( GadgetContainerMessage<IsmrmrdReconData>* m1)
{
    
    //Iterate over all the recon bits
    for(std::vector<IsmrmrdReconBit>::iterator it = m1->getObjectPtr()->rbit_.begin();
        it != m1->getObjectPtr()->rbit_.end(); ++it)
    {
        //Grab a reference to the buffer containing the imaging data
        //We are ignoring the reference data
        IsmrmrdDataBuffered & dbuff = it->data_;

        //Data 7D, fixed order [E0, E1, E2, CHA, N, S, LOC]
        uint16_t E0 = dbuff.data_.get_size(0);
        uint16_t E1 = dbuff.data_.get_size(1);
        uint16_t E2 = dbuff.data_.get_size(2);
        uint16_t CHA = dbuff.data_.get_size(3);
        uint16_t N = dbuff.data_.get_size(4);
        uint16_t S = dbuff.data_.get_size(5);
        uint16_t LOC = dbuff.data_.get_size(6);
      
        //Create an image array message
        GadgetContainerMessage<IsmrmrdImageArray>* cm1 = 
                new GadgetContainerMessage<IsmrmrdImageArray>();

        //Grab references to the image array data and headers
        IsmrmrdImageArray & imarray = *cm1->getObjectPtr();

        //The image array data will be [E0,E1,E2,1,N,S,LOC] big
        //Will collapse across coils at the end
        std::vector<size_t> data_dims(7);
        data_dims[0] = E0;
        data_dims[1] = E1;
        data_dims[2] = E2;
        data_dims[3] = 1;
        data_dims[4] = N;
        data_dims[5] = S;
        data_dims[6] = LOC;        
        imarray.data_.create(&data_dims);
        
        //ImageHeaders will be [N, S, LOC]
        std::vector<size_t> header_dims(3);
        header_dims[0] = N;
        header_dims[1] = S;
        header_dims[2] = LOC;        
        imarray.headers_.create(&header_dims);

        //We will not add any meta data
        //so skip the meta_ part
        
        //Loop over S and N and LOC
        for (uint16_t loc=0; loc < LOC; loc++) {
            for (uint16_t s=0; s < S; s++) {                
                for (uint16_t n=0; n < N; n++) {
                    
                    //Set some information into the image header
                    //Use the middle acquisition header for some info
                    //[E1, E2, N, S, LOC]
                    ISMRMRD::AcquisitionHeader & acqhdr = dbuff.headers_(dbuff.sampling_.sampling_limits_[1].center_,
                                                                         dbuff.sampling_.sampling_limits_[2].center_,
                                                                         n, s, loc);                    
                    imarray.headers_(n,s,loc).matrix_size[0]     = E0;
                    imarray.headers_(n,s,loc).matrix_size[1]     = E1;
                    imarray.headers_(n,s,loc).matrix_size[2]     = E2;
                    imarray.headers_(n,s,loc).field_of_view[0]   = dbuff.sampling_.recon_FOV_[0];
                    imarray.headers_(n,s,loc).field_of_view[1]   = dbuff.sampling_.recon_FOV_[1];
                    imarray.headers_(n,s,loc).field_of_view[2]   = dbuff.sampling_.recon_FOV_[2];
                    imarray.headers_(n,s,loc).channels           = 1;                    
                    imarray.headers_(n,s,loc).average = acqhdr.idx.average;
                    imarray.headers_(n,s,loc).slice = acqhdr.idx.slice;
                    imarray.headers_(n,s,loc).contrast = acqhdr.idx.contrast;
                    imarray.headers_(n,s,loc).phase = acqhdr.idx.phase;
                    imarray.headers_(n,s,loc).repetition = acqhdr.idx.repetition;
                    imarray.headers_(n,s,loc).set = acqhdr.idx.set;
                    imarray.headers_(n,s,loc).acquisition_time_stamp = acqhdr.acquisition_time_stamp;
                    imarray.headers_(n,s,loc).position[0] = acqhdr.position[0];
                    imarray.headers_(n,s,loc).position[1] = acqhdr.position[1];
                    imarray.headers_(n,s,loc).position[2] = acqhdr.position[2];
                    imarray.headers_(n,s,loc).read_dir[0] = acqhdr.read_dir[0];
                    imarray.headers_(n,s,loc).read_dir[1] = acqhdr.read_dir[1];
                    imarray.headers_(n,s,loc).read_dir[2] = acqhdr.read_dir[2];
                    imarray.headers_(n,s,loc).phase_dir[0] = acqhdr.phase_dir[0];
                    imarray.headers_(n,s,loc).phase_dir[1] = acqhdr.phase_dir[1];
                    imarray.headers_(n,s,loc).phase_dir[2] = acqhdr.phase_dir[2];
                    imarray.headers_(n,s,loc).slice_dir[0] = acqhdr.slice_dir[0];
                    imarray.headers_(n,s,loc).slice_dir[1] = acqhdr.slice_dir[1];
                    imarray.headers_(n,s,loc).slice_dir[2] = acqhdr.slice_dir[2];
                    imarray.headers_(n,s,loc).patient_table_position[0] = acqhdr.patient_table_position[0];
                    imarray.headers_(n,s,loc).patient_table_position[1] = acqhdr.patient_table_position[1];
                    imarray.headers_(n,s,loc).patient_table_position[2] = acqhdr.patient_table_position[2];
                    imarray.headers_(n,s,loc).data_type = ISMRMRD::ISMRMRD_CXFLOAT;
                    imarray.headers_(n,s,loc).image_index = ++image_counter_;

                    //Grab a wrapper around the relevant chunk of data [E0,E1,E2,CHA] for this loc, n, and s
                    //Each chunk will be [E0,E1,E2,CHA] big
                    std::vector<size_t> chunk_dims(4);
                    chunk_dims[0] = E0;
                    chunk_dims[1] = E1;
                    chunk_dims[2] = E2;
                    chunk_dims[3] = CHA;
                    hoNDArray<std::complex<float> > chunk = hoNDArray<std::complex<float> >(chunk_dims, &dbuff.data_(0,0,0,0,n,s,loc));

                    //Do the FFTs in place
                    hoNDFFT<float>::instance()->ifft(&chunk,0);
                    hoNDFFT<float>::instance()->ifft(&chunk,1);
                    if (E2>1) {
                        hoNDFFT<float>::instance()->ifft(&chunk,2);
                    }

                    //Square root of the sum of squares
                    //Each image will be [E0,E1,E2,1] big
                    std::vector<size_t> img_dims(3);
                    img_dims[0] = E0;
                    img_dims[1] = E1;
                    img_dims[2] = E2;
                    hoNDArray<std::complex<float> > output = hoNDArray<std::complex<float> >(img_dims, &imarray.data_(0,0,0,0,n,s,loc));
                    //Zero out the output
                    clear(output);

                    //Compute d* d in place
                    multiplyConj(chunk,chunk,chunk);                    
                    //Add up
                    for (size_t c = 0; c < CHA; c++) {
                        output += hoNDArray<std::complex<float> >(img_dims, &chunk(0,0,0,c));
                    }                    
                    //Take the square root in place
                    sqrt_inplace(&output);                    
               }
            }
        }

        //Pass the image array down the chain
        if (this->next()->putq(cm1) < 0) {
	  m1->release();
          return GADGET_FAIL;
        }

    }

    m1->release();
    return GADGET_OK;  

}
  int gpuCgSpiritGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<GenericReconJob> *m2)
  {
    // Is this data for this gadget's set/slice?
    //
    
    if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) {      
      // No, pass it downstream...
      return this->next()->putq(m1);
    }
    
    //GDEBUG("gpuCgSpiritGadget::process\n");

    boost::shared_ptr<GPUTimer> process_timer;
    if( output_timing_ )
      process_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSpiritGadget::process()") );
    
    if (!is_configured_) {
      GDEBUG("Data received before configuration was completed\n");
      return GADGET_FAIL;
    }

    GenericReconJob* j = m2->getObjectPtr();

    // Some basic validation of the incoming Spirit job
    if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get() || !j->reg_host_.get()) {
      GDEBUG("Received an incomplete Spirit job\n");
      return GADGET_FAIL;
    }

    unsigned int samples = j->dat_host_->get_size(0);
    unsigned int channels = j->dat_host_->get_size(1);
    unsigned int rotations = samples / j->tra_host_->get_number_of_elements();
    unsigned int frames = j->tra_host_->get_size(1)*rotations;

    if( samples%j->tra_host_->get_number_of_elements() ) {
      GDEBUG("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n",
                    samples, j->tra_host_->get_number_of_elements());
      return GADGET_FAIL;
    }

    boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get()));
    boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get()));
    sqrt_inplace(dcw.get()); //Take square root to use for weighting
    boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get()));
    boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get()));
    
    cudaDeviceProp deviceProp;
    if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
      GDEBUG( "Error: unable to query device properties.\n" );
      return GADGET_FAIL;
    }
    
    unsigned int warp_size = deviceProp.warpSize;
    
    matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) );    

    matrix_size_os_ =
      uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
               ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);

    if( !matrix_size_reported_ ) {
      GDEBUG("Matrix size    : [%d,%d] \n", matrix_size_[0], matrix_size_[1]);
      GDEBUG("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]);
      matrix_size_reported_ = true;
    }

    std::vector<size_t> image_dims = to_std_vector(matrix_size_);

    image_dims.push_back(frames);
    image_dims.push_back(channels);
    GDEBUG("Number of coils: %d %d \n",channels,image_dims.size());
    
    E_->set_domain_dimensions(&image_dims);
    E_->set_codomain_dimensions(device_samples->get_dimensions().get());
    E_->set_dcw(dcw);
    E_->setup( matrix_size_, matrix_size_os_, static_cast<float>(kernel_width_) );
    E_->preprocess(traj.get());
    
    boost::shared_ptr< cuNDArray<float_complext> > csm_device( new cuNDArray<float_complext>( csm.get() ));
    S_->set_calibration_kernels(csm_device);
    S_->set_domain_dimensions(&image_dims);
    S_->set_codomain_dimensions(&image_dims);

    /*
    boost::shared_ptr< cuNDArray<float_complext> > reg_image(new cuNDArray<float_complext> (j->reg_host_.get()));
    R_->compute(reg_image.get());

    // Define preconditioning weights
    boost::shared_ptr< cuNDArray<float> > _precon_weights = sum(abs_square(csm.get()).get(), 2);
    boost::shared_ptr<cuNDArray<float> > R_diag = R_->get();
    *R_diag *= float(kappa_);
    *_precon_weights += *R_diag;
    R_diag.reset();
    reciprocal_sqrt_inplace(_precon_weights.get());	
    boost::shared_ptr< cuNDArray<float_complext> > precon_weights = real_to_complex<float_complext>( _precon_weights.get() );
    _precon_weights.reset();
    D_->set_weights( precon_weights );
    */

    /*{
      static int counter = 0;
      char filename[256];
      sprintf((char*)filename, "_traj_%d.real", counter);
      write_nd_array<floatd2>( traj->to_host().get(), filename );
      sprintf((char*)filename, "_dcw_%d.real", counter);
      write_nd_array<float>( dcw->to_host().get(), filename );
      sprintf((char*)filename, "_csm_%d.cplx", counter);
      write_nd_array<float_complext>( csm->to_host().get(), filename );
      sprintf((char*)filename, "_samples_%d.cplx", counter);
      write_nd_array<float_complext>( device_samples->to_host().get(), filename );
      sprintf((char*)filename, "_reg_%d.cplx", counter);
      write_nd_array<float_complext>( reg_image->to_host().get(), filename );
      counter++; 
      }*/

    // Invoke solver
    // 

    boost::shared_ptr< cuNDArray<float_complext> > cgresult;

    {
      boost::shared_ptr<GPUTimer> solve_timer;
      if( output_timing_ )
        solve_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSpiritGadget::solve()") );
      
      cgresult = cg_.solve(device_samples.get());
      
      if( output_timing_ )
        solve_timer.reset();
    }
    
    if (!cgresult.get()) {
      GDEBUG("Iterative_spirit_compute failed\n");
      return GADGET_FAIL;
    }

    /*
      static int counter = 0;
      char filename[256];
      sprintf((char*)filename, "recon_%d.real", counter);
      write_nd_array<float>( abs(cgresult.get())->to_host().get(), filename );
      counter++; 
    */

    // If the recon matrix size exceeds the sequence matrix size then crop
    if( matrix_size_seq_ != matrix_size_ )
      cgresult = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, cgresult.get() );    
    
    // Combine coil images
    //

    cgresult = real_to_complex<float_complext>(sqrt(sum(abs_square(cgresult.get()).get(), 3).get()).get()); // RSS
    //cgresult = sum(cgresult.get(), 2);

    // Pass on the reconstructed images
    //

    
	put_frames_on_que(frames,rotations,j,cgresult.get());
    frame_counter_ += frames;

    if( output_timing_ )
      process_timer.reset();

    m1->release();
    return GADGET_OK;
  }
예제 #4
0
int gpuOsSenseGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<GenericReconJob> *m2)
{
	// Is this data for this gadget's set/slice?
	//
	GDEBUG("Starting gpuOsSenseGadget\n");

	if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) {
		// No, pass it downstream...
		return this->next()->putq(m1);
	}

	//GDEBUG("gpuOsSenseGadget::process\n");
	//GPUTimer timer("gpuOsSenseGadget::process");

	if (!is_configured_) {
		GDEBUG("\nData received before configuration complete\n");
		return GADGET_FAIL;
	}

	GenericReconJob* j = m2->getObjectPtr();

	// Let's first check that this job has the required data...
	if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get()) {
		GDEBUG("Received an incomplete Sense job\n");
		return GADGET_FAIL;
	}

	unsigned int samples = j->dat_host_->get_size(0);
	unsigned int channels = j->dat_host_->get_size(1);
	unsigned int rotations = samples / j->tra_host_->get_number_of_elements();
	unsigned int frames = j->tra_host_->get_size(1)*rotations;

	if( samples%j->tra_host_->get_number_of_elements() ) {
		GDEBUG("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n",
				samples, j->tra_host_->get_number_of_elements());
		return GADGET_FAIL;
	}

	boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get()));
	boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get()));
	sqrt_inplace(dcw.get());
	boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get()));
	boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get()));


	// Take the reconstruction matrix size from the regulariaztion image.
	// It could be oversampled from the sequence specified size...

	matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) );

	cudaDeviceProp deviceProp;
	if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
		GDEBUG( "\nError: unable to query device properties.\n" );
		return GADGET_FAIL;
	}

	unsigned int warp_size = deviceProp.warpSize;

	matrix_size_os_ =
			uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
					((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);

	GDEBUG("Matrix size    : [%d,%d] \n", matrix_size_[0], matrix_size_[1]);
	GDEBUG("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]);

	std::vector<size_t> image_dims = to_std_vector(matrix_size_);
	image_dims.push_back(frames);

	E_->set_domain_dimensions(&image_dims);
	E_->set_codomain_dimensions(device_samples->get_dimensions().get());
	E_->set_csm(csm);
	E_->setup( matrix_size_, matrix_size_os_, kernel_width_ );
	E_->preprocess(traj.get());

	{
		auto precon = boost::make_shared<cuNDArray<float_complext>>(image_dims);
		fill(precon.get(),float_complext(1.0f));
		//solver_.set_preconditioning_image(precon);
	}
	reg_image_ = boost::shared_ptr< cuNDArray<float_complext> >(new cuNDArray<float_complext>(&image_dims));

	// These operators need their domain/codomain set before being added to the solver
	//

	//E_->set_dcw(dcw);
	GDEBUG("Prepared\n");

	// Expand the average image to the number of frames
	//

	{
		cuNDArray<float_complext> tmp(*j->reg_host_);
		*reg_image_ = expand( tmp, frames );
	}
	PICS_->set_prior(reg_image_);

	// Define preconditioning weights
	//

	//Apply weights
	//*device_samples *= *dcw;

	// Invoke solver
	//

	boost::shared_ptr< cuNDArray<float_complext> > result;
	{
		GDEBUG("Running NLCG solver\n");
		GPUTimer timer("Running NLCG solver");

		// Optionally, allow exclusive (per device) access to the solver
		// This may not matter much in terms of speed, but it can in terms of memory consumption
		//

		if( exclusive_access_ )
			_mutex[device_number_].lock();

		result = solver_.solve(device_samples.get());

		if( exclusive_access_ )
			_mutex[device_number_].unlock();
	}

	// Provide some info about the scaling between the regularization and reconstruction.
	// If it is not close to one, PICCS does not work optimally...
	//

	if( alpha_ > 0.0 ){
		cuNDArray<float_complext> gpureg(j->reg_host_.get());
		boost::shared_ptr< cuNDArray<float_complext> > gpurec = sum(result.get(),2);
		*gpurec /= float(result->get_size(2));
		float scale = abs(dot(gpurec.get(), gpurec.get())/dot(gpurec.get(),&gpureg));
		GDEBUG("Scaling factor between regularization and reconstruction is %f.\n", scale);
	}

	if (!result.get()) {
		GDEBUG("\nNon-linear conjugate gradient solver failed\n");
		return GADGET_FAIL;
	}

	/*
      static int counter = 0;
      char filename[256];
      sprintf((char*)filename, "recon_sb_%d.cplx", counter);
      write_nd_array<float_complext>( sbresult->to_host().get(), filename );
      counter++; */

	// If the recon matrix size exceeds the sequence matrix size then crop
	if( matrix_size_seq_ != matrix_size_ )
		*result = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, *result );


	// Now pass on the reconstructed images
	//
	this->put_frames_on_que(frames,rotations,j,result.get(),channels);

	frame_counter_ += frames;
	m1->release();
	return GADGET_OK;
}