template<unsigned int N> static boost::shared_ptr<hoNDArray<float_complext> > gadgetronNFFT_instance(hoNDArray<float_complext> * input_data, hoNDArray<vector_td<float,N> >* trajectory, vector_td<uint64_t,N> matrix_size, float W, hoNDArray<float>* dcw = nullptr){ cuNDArray<float_complext> cuInput(*input_data); cuNDArray<vector_td<float,N> > cu_traj(*trajectory); auto op = boost::make_shared<cuNFFTOperator<float,N>>(); op->setup(matrix_size,matrix_size*size_t(2),W); op->preprocess(&cu_traj); if (dcw){ auto cu_dcw = boost::make_shared<cuNDArray<float>>(*dcw); sqrt_inplace(cu_dcw.get()); op->set_dcw(cu_dcw); cuInput *= *cu_dcw; } std::vector<size_t> out_dims(&matrix_size[0],&matrix_size[N]); out_dims.push_back(cuInput.get_number_of_elements()/cu_traj.get_number_of_elements()); /* op->set_domain_dimensions(&out_dims); op->set_codomain_dimensions(cuInput.get_dimensions().get()); cuCgSolver<float_complext> cg; cg.set_max_iterations(10); cg.set_tc_tolerance(1e-8); cg.set_encoding_operator(op); auto output = cg.solve(&cuInput); */ cuNDArray<float_complext> output(out_dims); op->mult_MH(&cuInput,&output); return output.to_host(); }
int SimpleReconGadget::process( GadgetContainerMessage<IsmrmrdReconData>* m1) { //Iterate over all the recon bits for(std::vector<IsmrmrdReconBit>::iterator it = m1->getObjectPtr()->rbit_.begin(); it != m1->getObjectPtr()->rbit_.end(); ++it) { //Grab a reference to the buffer containing the imaging data //We are ignoring the reference data IsmrmrdDataBuffered & dbuff = it->data_; //Data 7D, fixed order [E0, E1, E2, CHA, N, S, LOC] uint16_t E0 = dbuff.data_.get_size(0); uint16_t E1 = dbuff.data_.get_size(1); uint16_t E2 = dbuff.data_.get_size(2); uint16_t CHA = dbuff.data_.get_size(3); uint16_t N = dbuff.data_.get_size(4); uint16_t S = dbuff.data_.get_size(5); uint16_t LOC = dbuff.data_.get_size(6); //Create an image array message GadgetContainerMessage<IsmrmrdImageArray>* cm1 = new GadgetContainerMessage<IsmrmrdImageArray>(); //Grab references to the image array data and headers IsmrmrdImageArray & imarray = *cm1->getObjectPtr(); //The image array data will be [E0,E1,E2,1,N,S,LOC] big //Will collapse across coils at the end std::vector<size_t> data_dims(7); data_dims[0] = E0; data_dims[1] = E1; data_dims[2] = E2; data_dims[3] = 1; data_dims[4] = N; data_dims[5] = S; data_dims[6] = LOC; imarray.data_.create(&data_dims); //ImageHeaders will be [N, S, LOC] std::vector<size_t> header_dims(3); header_dims[0] = N; header_dims[1] = S; header_dims[2] = LOC; imarray.headers_.create(&header_dims); //We will not add any meta data //so skip the meta_ part //Loop over S and N and LOC for (uint16_t loc=0; loc < LOC; loc++) { for (uint16_t s=0; s < S; s++) { for (uint16_t n=0; n < N; n++) { //Set some information into the image header //Use the middle acquisition header for some info //[E1, E2, N, S, LOC] ISMRMRD::AcquisitionHeader & acqhdr = dbuff.headers_(dbuff.sampling_.sampling_limits_[1].center_, dbuff.sampling_.sampling_limits_[2].center_, n, s, loc); imarray.headers_(n,s,loc).matrix_size[0] = E0; imarray.headers_(n,s,loc).matrix_size[1] = E1; imarray.headers_(n,s,loc).matrix_size[2] = E2; imarray.headers_(n,s,loc).field_of_view[0] = dbuff.sampling_.recon_FOV_[0]; imarray.headers_(n,s,loc).field_of_view[1] = dbuff.sampling_.recon_FOV_[1]; imarray.headers_(n,s,loc).field_of_view[2] = dbuff.sampling_.recon_FOV_[2]; imarray.headers_(n,s,loc).channels = 1; imarray.headers_(n,s,loc).average = acqhdr.idx.average; imarray.headers_(n,s,loc).slice = acqhdr.idx.slice; imarray.headers_(n,s,loc).contrast = acqhdr.idx.contrast; imarray.headers_(n,s,loc).phase = acqhdr.idx.phase; imarray.headers_(n,s,loc).repetition = acqhdr.idx.repetition; imarray.headers_(n,s,loc).set = acqhdr.idx.set; imarray.headers_(n,s,loc).acquisition_time_stamp = acqhdr.acquisition_time_stamp; imarray.headers_(n,s,loc).position[0] = acqhdr.position[0]; imarray.headers_(n,s,loc).position[1] = acqhdr.position[1]; imarray.headers_(n,s,loc).position[2] = acqhdr.position[2]; imarray.headers_(n,s,loc).read_dir[0] = acqhdr.read_dir[0]; imarray.headers_(n,s,loc).read_dir[1] = acqhdr.read_dir[1]; imarray.headers_(n,s,loc).read_dir[2] = acqhdr.read_dir[2]; imarray.headers_(n,s,loc).phase_dir[0] = acqhdr.phase_dir[0]; imarray.headers_(n,s,loc).phase_dir[1] = acqhdr.phase_dir[1]; imarray.headers_(n,s,loc).phase_dir[2] = acqhdr.phase_dir[2]; imarray.headers_(n,s,loc).slice_dir[0] = acqhdr.slice_dir[0]; imarray.headers_(n,s,loc).slice_dir[1] = acqhdr.slice_dir[1]; imarray.headers_(n,s,loc).slice_dir[2] = acqhdr.slice_dir[2]; imarray.headers_(n,s,loc).patient_table_position[0] = acqhdr.patient_table_position[0]; imarray.headers_(n,s,loc).patient_table_position[1] = acqhdr.patient_table_position[1]; imarray.headers_(n,s,loc).patient_table_position[2] = acqhdr.patient_table_position[2]; imarray.headers_(n,s,loc).data_type = ISMRMRD::ISMRMRD_CXFLOAT; imarray.headers_(n,s,loc).image_index = ++image_counter_; //Grab a wrapper around the relevant chunk of data [E0,E1,E2,CHA] for this loc, n, and s //Each chunk will be [E0,E1,E2,CHA] big std::vector<size_t> chunk_dims(4); chunk_dims[0] = E0; chunk_dims[1] = E1; chunk_dims[2] = E2; chunk_dims[3] = CHA; hoNDArray<std::complex<float> > chunk = hoNDArray<std::complex<float> >(chunk_dims, &dbuff.data_(0,0,0,0,n,s,loc)); //Do the FFTs in place hoNDFFT<float>::instance()->ifft(&chunk,0); hoNDFFT<float>::instance()->ifft(&chunk,1); if (E2>1) { hoNDFFT<float>::instance()->ifft(&chunk,2); } //Square root of the sum of squares //Each image will be [E0,E1,E2,1] big std::vector<size_t> img_dims(3); img_dims[0] = E0; img_dims[1] = E1; img_dims[2] = E2; hoNDArray<std::complex<float> > output = hoNDArray<std::complex<float> >(img_dims, &imarray.data_(0,0,0,0,n,s,loc)); //Zero out the output clear(output); //Compute d* d in place multiplyConj(chunk,chunk,chunk); //Add up for (size_t c = 0; c < CHA; c++) { output += hoNDArray<std::complex<float> >(img_dims, &chunk(0,0,0,c)); } //Take the square root in place sqrt_inplace(&output); } } } //Pass the image array down the chain if (this->next()->putq(cm1) < 0) { m1->release(); return GADGET_FAIL; } } m1->release(); return GADGET_OK; }
int gpuCgSpiritGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<GenericReconJob> *m2) { // Is this data for this gadget's set/slice? // if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) { // No, pass it downstream... return this->next()->putq(m1); } //GDEBUG("gpuCgSpiritGadget::process\n"); boost::shared_ptr<GPUTimer> process_timer; if( output_timing_ ) process_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSpiritGadget::process()") ); if (!is_configured_) { GDEBUG("Data received before configuration was completed\n"); return GADGET_FAIL; } GenericReconJob* j = m2->getObjectPtr(); // Some basic validation of the incoming Spirit job if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get() || !j->reg_host_.get()) { GDEBUG("Received an incomplete Spirit job\n"); return GADGET_FAIL; } unsigned int samples = j->dat_host_->get_size(0); unsigned int channels = j->dat_host_->get_size(1); unsigned int rotations = samples / j->tra_host_->get_number_of_elements(); unsigned int frames = j->tra_host_->get_size(1)*rotations; if( samples%j->tra_host_->get_number_of_elements() ) { GDEBUG("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n", samples, j->tra_host_->get_number_of_elements()); return GADGET_FAIL; } boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get())); boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get())); sqrt_inplace(dcw.get()); //Take square root to use for weighting boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get())); boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get())); cudaDeviceProp deviceProp; if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) { GDEBUG( "Error: unable to query device properties.\n" ); return GADGET_FAIL; } unsigned int warp_size = deviceProp.warpSize; matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) ); matrix_size_os_ = uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size, ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size); if( !matrix_size_reported_ ) { GDEBUG("Matrix size : [%d,%d] \n", matrix_size_[0], matrix_size_[1]); GDEBUG("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]); matrix_size_reported_ = true; } std::vector<size_t> image_dims = to_std_vector(matrix_size_); image_dims.push_back(frames); image_dims.push_back(channels); GDEBUG("Number of coils: %d %d \n",channels,image_dims.size()); E_->set_domain_dimensions(&image_dims); E_->set_codomain_dimensions(device_samples->get_dimensions().get()); E_->set_dcw(dcw); E_->setup( matrix_size_, matrix_size_os_, static_cast<float>(kernel_width_) ); E_->preprocess(traj.get()); boost::shared_ptr< cuNDArray<float_complext> > csm_device( new cuNDArray<float_complext>( csm.get() )); S_->set_calibration_kernels(csm_device); S_->set_domain_dimensions(&image_dims); S_->set_codomain_dimensions(&image_dims); /* boost::shared_ptr< cuNDArray<float_complext> > reg_image(new cuNDArray<float_complext> (j->reg_host_.get())); R_->compute(reg_image.get()); // Define preconditioning weights boost::shared_ptr< cuNDArray<float> > _precon_weights = sum(abs_square(csm.get()).get(), 2); boost::shared_ptr<cuNDArray<float> > R_diag = R_->get(); *R_diag *= float(kappa_); *_precon_weights += *R_diag; R_diag.reset(); reciprocal_sqrt_inplace(_precon_weights.get()); boost::shared_ptr< cuNDArray<float_complext> > precon_weights = real_to_complex<float_complext>( _precon_weights.get() ); _precon_weights.reset(); D_->set_weights( precon_weights ); */ /*{ static int counter = 0; char filename[256]; sprintf((char*)filename, "_traj_%d.real", counter); write_nd_array<floatd2>( traj->to_host().get(), filename ); sprintf((char*)filename, "_dcw_%d.real", counter); write_nd_array<float>( dcw->to_host().get(), filename ); sprintf((char*)filename, "_csm_%d.cplx", counter); write_nd_array<float_complext>( csm->to_host().get(), filename ); sprintf((char*)filename, "_samples_%d.cplx", counter); write_nd_array<float_complext>( device_samples->to_host().get(), filename ); sprintf((char*)filename, "_reg_%d.cplx", counter); write_nd_array<float_complext>( reg_image->to_host().get(), filename ); counter++; }*/ // Invoke solver // boost::shared_ptr< cuNDArray<float_complext> > cgresult; { boost::shared_ptr<GPUTimer> solve_timer; if( output_timing_ ) solve_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSpiritGadget::solve()") ); cgresult = cg_.solve(device_samples.get()); if( output_timing_ ) solve_timer.reset(); } if (!cgresult.get()) { GDEBUG("Iterative_spirit_compute failed\n"); return GADGET_FAIL; } /* static int counter = 0; char filename[256]; sprintf((char*)filename, "recon_%d.real", counter); write_nd_array<float>( abs(cgresult.get())->to_host().get(), filename ); counter++; */ // If the recon matrix size exceeds the sequence matrix size then crop if( matrix_size_seq_ != matrix_size_ ) cgresult = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, cgresult.get() ); // Combine coil images // cgresult = real_to_complex<float_complext>(sqrt(sum(abs_square(cgresult.get()).get(), 3).get()).get()); // RSS //cgresult = sum(cgresult.get(), 2); // Pass on the reconstructed images // put_frames_on_que(frames,rotations,j,cgresult.get()); frame_counter_ += frames; if( output_timing_ ) process_timer.reset(); m1->release(); return GADGET_OK; }
int gpuOsSenseGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<GenericReconJob> *m2) { // Is this data for this gadget's set/slice? // GDEBUG("Starting gpuOsSenseGadget\n"); if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) { // No, pass it downstream... return this->next()->putq(m1); } //GDEBUG("gpuOsSenseGadget::process\n"); //GPUTimer timer("gpuOsSenseGadget::process"); if (!is_configured_) { GDEBUG("\nData received before configuration complete\n"); return GADGET_FAIL; } GenericReconJob* j = m2->getObjectPtr(); // Let's first check that this job has the required data... if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get()) { GDEBUG("Received an incomplete Sense job\n"); return GADGET_FAIL; } unsigned int samples = j->dat_host_->get_size(0); unsigned int channels = j->dat_host_->get_size(1); unsigned int rotations = samples / j->tra_host_->get_number_of_elements(); unsigned int frames = j->tra_host_->get_size(1)*rotations; if( samples%j->tra_host_->get_number_of_elements() ) { GDEBUG("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n", samples, j->tra_host_->get_number_of_elements()); return GADGET_FAIL; } boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get())); boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get())); sqrt_inplace(dcw.get()); boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get())); boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get())); // Take the reconstruction matrix size from the regulariaztion image. // It could be oversampled from the sequence specified size... matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) ); cudaDeviceProp deviceProp; if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) { GDEBUG( "\nError: unable to query device properties.\n" ); return GADGET_FAIL; } unsigned int warp_size = deviceProp.warpSize; matrix_size_os_ = uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size, ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size); GDEBUG("Matrix size : [%d,%d] \n", matrix_size_[0], matrix_size_[1]); GDEBUG("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]); std::vector<size_t> image_dims = to_std_vector(matrix_size_); image_dims.push_back(frames); E_->set_domain_dimensions(&image_dims); E_->set_codomain_dimensions(device_samples->get_dimensions().get()); E_->set_csm(csm); E_->setup( matrix_size_, matrix_size_os_, kernel_width_ ); E_->preprocess(traj.get()); { auto precon = boost::make_shared<cuNDArray<float_complext>>(image_dims); fill(precon.get(),float_complext(1.0f)); //solver_.set_preconditioning_image(precon); } reg_image_ = boost::shared_ptr< cuNDArray<float_complext> >(new cuNDArray<float_complext>(&image_dims)); // These operators need their domain/codomain set before being added to the solver // //E_->set_dcw(dcw); GDEBUG("Prepared\n"); // Expand the average image to the number of frames // { cuNDArray<float_complext> tmp(*j->reg_host_); *reg_image_ = expand( tmp, frames ); } PICS_->set_prior(reg_image_); // Define preconditioning weights // //Apply weights //*device_samples *= *dcw; // Invoke solver // boost::shared_ptr< cuNDArray<float_complext> > result; { GDEBUG("Running NLCG solver\n"); GPUTimer timer("Running NLCG solver"); // Optionally, allow exclusive (per device) access to the solver // This may not matter much in terms of speed, but it can in terms of memory consumption // if( exclusive_access_ ) _mutex[device_number_].lock(); result = solver_.solve(device_samples.get()); if( exclusive_access_ ) _mutex[device_number_].unlock(); } // Provide some info about the scaling between the regularization and reconstruction. // If it is not close to one, PICCS does not work optimally... // if( alpha_ > 0.0 ){ cuNDArray<float_complext> gpureg(j->reg_host_.get()); boost::shared_ptr< cuNDArray<float_complext> > gpurec = sum(result.get(),2); *gpurec /= float(result->get_size(2)); float scale = abs(dot(gpurec.get(), gpurec.get())/dot(gpurec.get(),&gpureg)); GDEBUG("Scaling factor between regularization and reconstruction is %f.\n", scale); } if (!result.get()) { GDEBUG("\nNon-linear conjugate gradient solver failed\n"); return GADGET_FAIL; } /* static int counter = 0; char filename[256]; sprintf((char*)filename, "recon_sb_%d.cplx", counter); write_nd_array<float_complext>( sbresult->to_host().get(), filename ); counter++; */ // If the recon matrix size exceeds the sequence matrix size then crop if( matrix_size_seq_ != matrix_size_ ) *result = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, *result ); // Now pass on the reconstructed images // this->put_frames_on_que(frames,rotations,j,result.get(),channels); frame_counter_ += frames; m1->release(); return GADGET_OK; }