int gpuCgSpiritGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<GenericReconJob> *m2) { // Is this data for this gadget's set/slice? // if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) { // No, pass it downstream... return this->next()->putq(m1); } //GDEBUG("gpuCgSpiritGadget::process\n"); boost::shared_ptr<GPUTimer> process_timer; if( output_timing_ ) process_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSpiritGadget::process()") ); if (!is_configured_) { GDEBUG("Data received before configuration was completed\n"); return GADGET_FAIL; } GenericReconJob* j = m2->getObjectPtr(); // Some basic validation of the incoming Spirit job if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get() || !j->reg_host_.get()) { GDEBUG("Received an incomplete Spirit job\n"); return GADGET_FAIL; } unsigned int samples = j->dat_host_->get_size(0); unsigned int channels = j->dat_host_->get_size(1); unsigned int rotations = samples / j->tra_host_->get_number_of_elements(); unsigned int frames = j->tra_host_->get_size(1)*rotations; if( samples%j->tra_host_->get_number_of_elements() ) { GDEBUG("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n", samples, j->tra_host_->get_number_of_elements()); return GADGET_FAIL; } boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get())); boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get())); sqrt_inplace(dcw.get()); //Take square root to use for weighting boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get())); boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get())); cudaDeviceProp deviceProp; if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) { GDEBUG( "Error: unable to query device properties.\n" ); return GADGET_FAIL; } unsigned int warp_size = deviceProp.warpSize; matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) ); matrix_size_os_ = uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size, ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size); if( !matrix_size_reported_ ) { GDEBUG("Matrix size : [%d,%d] \n", matrix_size_[0], matrix_size_[1]); GDEBUG("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]); matrix_size_reported_ = true; } std::vector<size_t> image_dims = to_std_vector(matrix_size_); image_dims.push_back(frames); image_dims.push_back(channels); GDEBUG("Number of coils: %d %d \n",channels,image_dims.size()); E_->set_domain_dimensions(&image_dims); E_->set_codomain_dimensions(device_samples->get_dimensions().get()); E_->set_dcw(dcw); E_->setup( matrix_size_, matrix_size_os_, static_cast<float>(kernel_width_) ); E_->preprocess(traj.get()); boost::shared_ptr< cuNDArray<float_complext> > csm_device( new cuNDArray<float_complext>( csm.get() )); S_->set_calibration_kernels(csm_device); S_->set_domain_dimensions(&image_dims); S_->set_codomain_dimensions(&image_dims); /* boost::shared_ptr< cuNDArray<float_complext> > reg_image(new cuNDArray<float_complext> (j->reg_host_.get())); R_->compute(reg_image.get()); // Define preconditioning weights boost::shared_ptr< cuNDArray<float> > _precon_weights = sum(abs_square(csm.get()).get(), 2); boost::shared_ptr<cuNDArray<float> > R_diag = R_->get(); *R_diag *= float(kappa_); *_precon_weights += *R_diag; R_diag.reset(); reciprocal_sqrt_inplace(_precon_weights.get()); boost::shared_ptr< cuNDArray<float_complext> > precon_weights = real_to_complex<float_complext>( _precon_weights.get() ); _precon_weights.reset(); D_->set_weights( precon_weights ); */ /*{ static int counter = 0; char filename[256]; sprintf((char*)filename, "_traj_%d.real", counter); write_nd_array<floatd2>( traj->to_host().get(), filename ); sprintf((char*)filename, "_dcw_%d.real", counter); write_nd_array<float>( dcw->to_host().get(), filename ); sprintf((char*)filename, "_csm_%d.cplx", counter); write_nd_array<float_complext>( csm->to_host().get(), filename ); sprintf((char*)filename, "_samples_%d.cplx", counter); write_nd_array<float_complext>( device_samples->to_host().get(), filename ); sprintf((char*)filename, "_reg_%d.cplx", counter); write_nd_array<float_complext>( reg_image->to_host().get(), filename ); counter++; }*/ // Invoke solver // boost::shared_ptr< cuNDArray<float_complext> > cgresult; { boost::shared_ptr<GPUTimer> solve_timer; if( output_timing_ ) solve_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSpiritGadget::solve()") ); cgresult = cg_.solve(device_samples.get()); if( output_timing_ ) solve_timer.reset(); } if (!cgresult.get()) { GDEBUG("Iterative_spirit_compute failed\n"); return GADGET_FAIL; } /* static int counter = 0; char filename[256]; sprintf((char*)filename, "recon_%d.real", counter); write_nd_array<float>( abs(cgresult.get())->to_host().get(), filename ); counter++; */ // If the recon matrix size exceeds the sequence matrix size then crop if( matrix_size_seq_ != matrix_size_ ) cgresult = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, cgresult.get() ); // Combine coil images // cgresult = real_to_complex<float_complext>(sqrt(sum(abs_square(cgresult.get()).get(), 3).get()).get()); // RSS //cgresult = sum(cgresult.get(), 2); // Pass on the reconstructed images // put_frames_on_que(frames,rotations,j,cgresult.get()); frame_counter_ += frames; if( output_timing_ ) process_timer.reset(); m1->release(); return GADGET_OK; }
int gpuOsSenseGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<GenericReconJob> *m2) { // Is this data for this gadget's set/slice? // GDEBUG("Starting gpuOsSenseGadget\n"); if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) { // No, pass it downstream... return this->next()->putq(m1); } //GDEBUG("gpuOsSenseGadget::process\n"); //GPUTimer timer("gpuOsSenseGadget::process"); if (!is_configured_) { GDEBUG("\nData received before configuration complete\n"); return GADGET_FAIL; } GenericReconJob* j = m2->getObjectPtr(); // Let's first check that this job has the required data... if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get()) { GDEBUG("Received an incomplete Sense job\n"); return GADGET_FAIL; } unsigned int samples = j->dat_host_->get_size(0); unsigned int channels = j->dat_host_->get_size(1); unsigned int rotations = samples / j->tra_host_->get_number_of_elements(); unsigned int frames = j->tra_host_->get_size(1)*rotations; if( samples%j->tra_host_->get_number_of_elements() ) { GDEBUG("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n", samples, j->tra_host_->get_number_of_elements()); return GADGET_FAIL; } boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get())); boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get())); sqrt_inplace(dcw.get()); boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get())); boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get())); // Take the reconstruction matrix size from the regulariaztion image. // It could be oversampled from the sequence specified size... matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) ); cudaDeviceProp deviceProp; if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) { GDEBUG( "\nError: unable to query device properties.\n" ); return GADGET_FAIL; } unsigned int warp_size = deviceProp.warpSize; matrix_size_os_ = uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size, ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size); GDEBUG("Matrix size : [%d,%d] \n", matrix_size_[0], matrix_size_[1]); GDEBUG("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]); std::vector<size_t> image_dims = to_std_vector(matrix_size_); image_dims.push_back(frames); E_->set_domain_dimensions(&image_dims); E_->set_codomain_dimensions(device_samples->get_dimensions().get()); E_->set_csm(csm); E_->setup( matrix_size_, matrix_size_os_, kernel_width_ ); E_->preprocess(traj.get()); { auto precon = boost::make_shared<cuNDArray<float_complext>>(image_dims); fill(precon.get(),float_complext(1.0f)); //solver_.set_preconditioning_image(precon); } reg_image_ = boost::shared_ptr< cuNDArray<float_complext> >(new cuNDArray<float_complext>(&image_dims)); // These operators need their domain/codomain set before being added to the solver // //E_->set_dcw(dcw); GDEBUG("Prepared\n"); // Expand the average image to the number of frames // { cuNDArray<float_complext> tmp(*j->reg_host_); *reg_image_ = expand( tmp, frames ); } PICS_->set_prior(reg_image_); // Define preconditioning weights // //Apply weights //*device_samples *= *dcw; // Invoke solver // boost::shared_ptr< cuNDArray<float_complext> > result; { GDEBUG("Running NLCG solver\n"); GPUTimer timer("Running NLCG solver"); // Optionally, allow exclusive (per device) access to the solver // This may not matter much in terms of speed, but it can in terms of memory consumption // if( exclusive_access_ ) _mutex[device_number_].lock(); result = solver_.solve(device_samples.get()); if( exclusive_access_ ) _mutex[device_number_].unlock(); } // Provide some info about the scaling between the regularization and reconstruction. // If it is not close to one, PICCS does not work optimally... // if( alpha_ > 0.0 ){ cuNDArray<float_complext> gpureg(j->reg_host_.get()); boost::shared_ptr< cuNDArray<float_complext> > gpurec = sum(result.get(),2); *gpurec /= float(result->get_size(2)); float scale = abs(dot(gpurec.get(), gpurec.get())/dot(gpurec.get(),&gpureg)); GDEBUG("Scaling factor between regularization and reconstruction is %f.\n", scale); } if (!result.get()) { GDEBUG("\nNon-linear conjugate gradient solver failed\n"); return GADGET_FAIL; } /* static int counter = 0; char filename[256]; sprintf((char*)filename, "recon_sb_%d.cplx", counter); write_nd_array<float_complext>( sbresult->to_host().get(), filename ); counter++; */ // If the recon matrix size exceeds the sequence matrix size then crop if( matrix_size_seq_ != matrix_size_ ) *result = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, *result ); // Now pass on the reconstructed images // this->put_frames_on_que(frames,rotations,j,result.get(),channels); frame_counter_ += frames; m1->release(); return GADGET_OK; }