Esempio n. 1
0
  void callback(const sensor_msgs::ImageConstPtr& img_msg,
                const sensor_msgs::ImageConstPtr& depth_msg)
  {
    // Convert images to OpenCV format
    cv::Mat input_frame;
    cv::Mat depth_frame;
    cv_bridge::CvImagePtr input_cv_ptr = cv_bridge::toCvCopy(img_msg, sensor_msgs::image_encodings::BGR8);
    cv_bridge::CvImagePtr depth_cv_ptr = cv_bridge::toCvCopy(depth_msg);
    input_frame = input_cv_ptr->image;
    depth_frame = depth_cv_ptr->image;
    ROS_INFO_STREAM("input_frame.type()" << input_frame.type());
    cv::imshow("input_frame", input_frame);
    cv::imshow("depth_frame", depth_frame);
    cv::cvtColor(input_frame, input_frame, CV_RGB2BGR);

    // Save frames to disk
    cv::Mat depth_to_save(input_frame.size(), CV_16UC1);
    std::vector<int> params;
    params.push_back(CV_IMWRITE_PNG_COMPRESSION);
    params.push_back(0);
    for (int r = 0; r < depth_frame.rows; ++r)
    {
      for (int c = 0; c < depth_frame.cols; ++c)
      {
        if (isnan(depth_frame.at<float>(r,c)))
          depth_frame.at<float>(r,c) = 0.0;
      }
    }
    ROS_INFO_STREAM("Removed NaNs");

#ifdef SAVE_IMAGES
    std::stringstream intensity_name;
    intensity_name << "/home/thermans/Desktop/intensity" << img_count_
                   << ".png";
    std::stringstream depth_name;
    depth_name << "/home/thermans/Desktop/depth" << img_count_++ << ".png";
    depth_frame.convertTo(depth_to_save, depth_to_save.type(), 512.0);
    cv::imwrite(intensity_name.str(), input_frame, params);
    cv::imwrite(depth_name.str(), depth_to_save, params);
    ROS_INFO_STREAM("Saved images");
#endif // SAVE_IMAGES

    ROS_INFO_STREAM("Scaling image");
    cv::Mat scaled_depth = depth_frame.clone();
    cv::Mat scaled_depth_frame = csm.scaleMap(depth_frame);
    cv::imshow("scaled_frame", scaled_depth_frame);
    scaled_depth_frame.convertTo(scaled_depth, CV_32FC1);
    ROS_INFO_STREAM("Running saliency");
    cv::Mat saliency_map = csm(input_frame, scaled_depth);
    // cv::Mat saliency_map = csm(input_frame, depth_frame);
    double min_val = 0;
    double max_val = 0;
    cv::minMaxLoc(saliency_map, &min_val, &max_val);
    ROS_INFO_STREAM("Minimum saliency unscaled: " << min_val << "\tmax: "
                    << max_val);
    cv::imshow("unsaceled saliency", saliency_map);
    cv::waitKey();
  }
Esempio n. 2
0
void test2()
{
    ses::EntityManager::Ptr sem = std::shared_ptr<ses::EntityManager>(new ServerEntityManager());
    ses::SystemManager ssm(sem);
    ssm.addSystem<RenderSystem>();

    ses::EntityManager::Ptr cem = std::shared_ptr<ses::EntityManager>(new ClientEntityManager());
    ses::SystemManager csm(cem);
    csm.addSystem<RenderSystem>();
    csm.addSystem<InputSystem>();

    sf::RenderWindow wS(sf::VideoMode(500,500),"Server");
    wS.setPosition({100,100});
    sf::RenderWindow wC(sf::VideoMode(500,500),"Client");
    wC.setPosition({700,100});

    sem->usePrefab("Sprite");

    sf::Clock clock;
    bool run = true;
    while (run)
    {
        sf::Event e;
        while (wS.pollEvent(e))
        {
            if (e.type == sf::Event::Closed)
            {
                run = false;
            }
        }
        while (wC.pollEvent(e))
        {
            if (e.type == sf::Event::Closed)
            {
                run = false;
            }
        }

        csm.getSystem<InputSystem>().update(clock.restart());

        wS.clear();
        ssm.getSystem<RenderSystem>().render(wS);
        wS.display();

        wC.clear();
        csm.getSystem<RenderSystem>().render(wC);
        wC.display();
    }
    wC.close();
    wS.close();
}
void
StringCaseTest::TestCasingImpl(const UnicodeString &input,
                               const UnicodeString &output,
                               int32_t whichCase,
                               void *iter, const char *localeID, uint32_t options) {
    // UnicodeString
    UnicodeString result;
    const char *name;
    Locale locale(localeID);

    result=input;
    switch(whichCase) {
    case TEST_LOWER:
        name="toLower";
        result.toLower(locale);
        break;
    case TEST_UPPER:
        name="toUpper";
        result.toUpper(locale);
        break;
#if !UCONFIG_NO_BREAK_ITERATION
    case TEST_TITLE:
        name="toTitle";
        result.toTitle((BreakIterator *)iter, locale, options);
        break;
#endif
    case TEST_FOLD:
        name="foldCase";
        result.foldCase(options);
        break;
    default:
        name="";
        break; // won't happen
    }
    if(result!=output) {
        dataerrln("error: UnicodeString.%s() got a wrong result for a test case from casing.res", name);
    }
#if !UCONFIG_NO_BREAK_ITERATION
    if(whichCase==TEST_TITLE && options==0) {
        result=input;
        result.toTitle((BreakIterator *)iter, locale);
        if(result!=output) {
            dataerrln("error: UnicodeString.toTitle(options=0) got a wrong result for a test case from casing.res");
        }
    }
#endif

    // UTF-8
    char utf8In[100], utf8Out[100];
    int32_t utf8InLength, utf8OutLength, resultLength;
    UChar *buffer;

    IcuTestErrorCode errorCode(*this, "TestCasingImpl");
    LocalUCaseMapPointer csm(ucasemap_open(localeID, options, errorCode));
#if !UCONFIG_NO_BREAK_ITERATION
    if(iter!=NULL) {
        // Clone the break iterator so that the UCaseMap can safely adopt it.
        UBreakIterator *clone=ubrk_safeClone((UBreakIterator *)iter, NULL, NULL, errorCode);
        ucasemap_setBreakIterator(csm.getAlias(), clone, errorCode);
    }
#endif

    u_strToUTF8(utf8In, (int32_t)sizeof(utf8In), &utf8InLength, input.getBuffer(), input.length(), errorCode);
    switch(whichCase) {
    case TEST_LOWER:
        name="ucasemap_utf8ToLower";
        utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
                    utf8Out, (int32_t)sizeof(utf8Out),
                    utf8In, utf8InLength, errorCode);
        break;
    case TEST_UPPER:
        name="ucasemap_utf8ToUpper";
        utf8OutLength=ucasemap_utf8ToUpper(csm.getAlias(),
                    utf8Out, (int32_t)sizeof(utf8Out),
                    utf8In, utf8InLength, errorCode);
        break;
#if !UCONFIG_NO_BREAK_ITERATION
    case TEST_TITLE:
        name="ucasemap_utf8ToTitle";
        utf8OutLength=ucasemap_utf8ToTitle(csm.getAlias(),
                    utf8Out, (int32_t)sizeof(utf8Out),
                    utf8In, utf8InLength, errorCode);
        break;
#endif
    case TEST_FOLD:
        name="ucasemap_utf8FoldCase";
        utf8OutLength=ucasemap_utf8FoldCase(csm.getAlias(),
                    utf8Out, (int32_t)sizeof(utf8Out),
                    utf8In, utf8InLength, errorCode);
        break;
    default:
        name="";
        utf8OutLength=0;
        break; // won't happen
    }
    buffer=result.getBuffer(utf8OutLength);
    u_strFromUTF8(buffer, result.getCapacity(), &resultLength, utf8Out, utf8OutLength, errorCode);
    result.releaseBuffer(errorCode.isSuccess() ? resultLength : 0);

    if(errorCode.isFailure()) {
        errcheckln(errorCode, "error: %s() got an error for a test case from casing.res - %s", name, u_errorName(errorCode));
        errorCode.reset();
    } else if(result!=output) {
        errln("error: %s() got a wrong result for a test case from casing.res", name);
        errln("expected \"" + output + "\" got \"" + result + "\"" );
    }
}
  int gpuCgSpiritGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<GenericReconJob> *m2)
  {
    // Is this data for this gadget's set/slice?
    //
    
    if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) {      
      // No, pass it downstream...
      return this->next()->putq(m1);
    }
    
    //GDEBUG("gpuCgSpiritGadget::process\n");

    boost::shared_ptr<GPUTimer> process_timer;
    if( output_timing_ )
      process_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSpiritGadget::process()") );
    
    if (!is_configured_) {
      GDEBUG("Data received before configuration was completed\n");
      return GADGET_FAIL;
    }

    GenericReconJob* j = m2->getObjectPtr();

    // Some basic validation of the incoming Spirit job
    if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get() || !j->reg_host_.get()) {
      GDEBUG("Received an incomplete Spirit job\n");
      return GADGET_FAIL;
    }

    unsigned int samples = j->dat_host_->get_size(0);
    unsigned int channels = j->dat_host_->get_size(1);
    unsigned int rotations = samples / j->tra_host_->get_number_of_elements();
    unsigned int frames = j->tra_host_->get_size(1)*rotations;

    if( samples%j->tra_host_->get_number_of_elements() ) {
      GDEBUG("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n",
                    samples, j->tra_host_->get_number_of_elements());
      return GADGET_FAIL;
    }

    boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get()));
    boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get()));
    sqrt_inplace(dcw.get()); //Take square root to use for weighting
    boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get()));
    boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get()));
    
    cudaDeviceProp deviceProp;
    if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
      GDEBUG( "Error: unable to query device properties.\n" );
      return GADGET_FAIL;
    }
    
    unsigned int warp_size = deviceProp.warpSize;
    
    matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) );    

    matrix_size_os_ =
      uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
               ((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);

    if( !matrix_size_reported_ ) {
      GDEBUG("Matrix size    : [%d,%d] \n", matrix_size_[0], matrix_size_[1]);
      GDEBUG("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]);
      matrix_size_reported_ = true;
    }

    std::vector<size_t> image_dims = to_std_vector(matrix_size_);

    image_dims.push_back(frames);
    image_dims.push_back(channels);
    GDEBUG("Number of coils: %d %d \n",channels,image_dims.size());
    
    E_->set_domain_dimensions(&image_dims);
    E_->set_codomain_dimensions(device_samples->get_dimensions().get());
    E_->set_dcw(dcw);
    E_->setup( matrix_size_, matrix_size_os_, static_cast<float>(kernel_width_) );
    E_->preprocess(traj.get());
    
    boost::shared_ptr< cuNDArray<float_complext> > csm_device( new cuNDArray<float_complext>( csm.get() ));
    S_->set_calibration_kernels(csm_device);
    S_->set_domain_dimensions(&image_dims);
    S_->set_codomain_dimensions(&image_dims);

    /*
    boost::shared_ptr< cuNDArray<float_complext> > reg_image(new cuNDArray<float_complext> (j->reg_host_.get()));
    R_->compute(reg_image.get());

    // Define preconditioning weights
    boost::shared_ptr< cuNDArray<float> > _precon_weights = sum(abs_square(csm.get()).get(), 2);
    boost::shared_ptr<cuNDArray<float> > R_diag = R_->get();
    *R_diag *= float(kappa_);
    *_precon_weights += *R_diag;
    R_diag.reset();
    reciprocal_sqrt_inplace(_precon_weights.get());	
    boost::shared_ptr< cuNDArray<float_complext> > precon_weights = real_to_complex<float_complext>( _precon_weights.get() );
    _precon_weights.reset();
    D_->set_weights( precon_weights );
    */

    /*{
      static int counter = 0;
      char filename[256];
      sprintf((char*)filename, "_traj_%d.real", counter);
      write_nd_array<floatd2>( traj->to_host().get(), filename );
      sprintf((char*)filename, "_dcw_%d.real", counter);
      write_nd_array<float>( dcw->to_host().get(), filename );
      sprintf((char*)filename, "_csm_%d.cplx", counter);
      write_nd_array<float_complext>( csm->to_host().get(), filename );
      sprintf((char*)filename, "_samples_%d.cplx", counter);
      write_nd_array<float_complext>( device_samples->to_host().get(), filename );
      sprintf((char*)filename, "_reg_%d.cplx", counter);
      write_nd_array<float_complext>( reg_image->to_host().get(), filename );
      counter++; 
      }*/

    // Invoke solver
    // 

    boost::shared_ptr< cuNDArray<float_complext> > cgresult;

    {
      boost::shared_ptr<GPUTimer> solve_timer;
      if( output_timing_ )
        solve_timer = boost::shared_ptr<GPUTimer>( new GPUTimer("gpuCgSpiritGadget::solve()") );
      
      cgresult = cg_.solve(device_samples.get());
      
      if( output_timing_ )
        solve_timer.reset();
    }
    
    if (!cgresult.get()) {
      GDEBUG("Iterative_spirit_compute failed\n");
      return GADGET_FAIL;
    }

    /*
      static int counter = 0;
      char filename[256];
      sprintf((char*)filename, "recon_%d.real", counter);
      write_nd_array<float>( abs(cgresult.get())->to_host().get(), filename );
      counter++; 
    */

    // If the recon matrix size exceeds the sequence matrix size then crop
    if( matrix_size_seq_ != matrix_size_ )
      cgresult = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, cgresult.get() );    
    
    // Combine coil images
    //

    cgresult = real_to_complex<float_complext>(sqrt(sum(abs_square(cgresult.get()).get(), 3).get()).get()); // RSS
    //cgresult = sum(cgresult.get(), 2);

    // Pass on the reconstructed images
    //

    
	put_frames_on_que(frames,rotations,j,cgresult.get());
    frame_counter_ += frames;

    if( output_timing_ )
      process_timer.reset();

    m1->release();
    return GADGET_OK;
  }
Esempio n. 5
0
/* Analyses function; done when cuc command is entered in (sim) prompt */
cuc_func *
analyse_function (char *module_name, long orig_time,
		  unsigned long start_addr, unsigned long end_addr,
		  int memory_order, int num_runs)
{
  cuc_timings timings;
  cuc_func *func = (cuc_func *) malloc (sizeof (cuc_func));
  cuc_func *saved;
  int b, i, j;
  char tmp1[256];
  char tmp2[256];

  func->orig_time = orig_time;
  func->start_addr = start_addr;
  func->end_addr = end_addr;
  func->memory_order = memory_order;
  func->nfdeps = 0;
  func->fdeps = NULL;
  func->num_runs = num_runs;

  sprintf (tmp1, "%s.bin", module_name);
  cucdebug (2, "Loading %s.bin\n", module_name);
  if (cuc_load (tmp1))
    {
      free (func);
      return NULL;
    }

  log ("Detecting basic blocks\n");
  detect_bb (func);
  if (cuc_debug >= 2)
    print_cuc_insns ("WITH_BB_LIMITS", 0);

  //sprintf (tmp1, "%s.bin.mp", module_name);
  sprintf (tmp2, "%s.bin.bb", module_name);
  generate_bb_seq (func, config.sim.mprof_fn, tmp2);
  log ("Assuming %i clk cycle load (%i cyc burst)\n", runtime.cuc.mdelay[0],
       runtime.cuc.mdelay[2]);
  log ("Assuming %i clk cycle store (%i cyc burst)\n", runtime.cuc.mdelay[1],
       runtime.cuc.mdelay[3]);

  build_bb (func);
  if (cuc_debug >= 5)
    print_cuc_bb (func, "AFTER_BUILD_BB");
  reg_dep (func);

  log ("Detecting dependencies\n");
  if (cuc_debug >= 2)
    print_cuc_bb (func, "AFTER_REG_DEP");
  cuc_optimize (func);

#if 0
  csm (func);
#endif
  assert (saved = dup_func (func));

  timings.preroll = timings.unroll = 1;
  timings.nshared = 0;

  add_latches (func);
  if (cuc_debug >= 1)
    print_cuc_bb (func, "AFTER_LATCHES");
  analyse_timings (func, &timings);

  free_func (func);
  log ("Base option: pre%i,un%i,sha%i: %icyc %.1f\n",
       timings.preroll, timings.unroll, timings.nshared, timings.new_time,
       timings.size);
  saved->timings = timings;

#if 1
  /* detect and unroll simple loops */
  for (b = 0; b < saved->num_bb; b++)
    {
      cuc_timings t[MAX_UNROLL * MAX_PREROLL];
      cuc_timings *ut;
      cuc_timings *cut = &t[0];
      int nt = 1;
      double csize;
      saved->bb[b].selected_tim = -1;

      /* Is it a loop? */
      if (saved->bb[b].next[0] != b && saved->bb[b].next[1] != b)
	continue;
      log ("Found loop at BB%x.  Trying to unroll.\n", b);
      t[0] = timings;
      t[0].b = b;
      t[0].preroll = 1;
      t[0].unroll = 1;
      t[0].nshared = 0;

      sprintf (tmp1, "%s.bin.bb", module_name);
      i = 1;
      do
	{
	  cuc_timings *pt;
	  cuc_timings *cpt = cut;
	  j = 1;

	  do
	    {
	      pt = cpt;
	      cpt = preunroll_bb (tmp1, saved, &t[nt++], b, ++j, i);
	    }
	  while (j <= MAX_PREROLL && pt->new_time > cpt->new_time);
	  i++;
	  ut = cut;
	  cut = preunroll_bb (tmp1, saved, &t[nt++], b, 1, i);
	}
      while (i <= MAX_UNROLL && ut->new_time > cut->new_time);

      /* Sort the timings */
#if 0
      if (cuc_debug >= 3)
	for (i = 0; i < nt; i++)
	  PRINTF ("%i:%i,%i: %icyc\n",
		  t[i].b, t[i].preroll, t[i].unroll, t[i].new_time);
#endif

#if HAVE___COMPAR_FN_T
      qsort (t, nt, sizeof (cuc_timings),
	     (__compar_fn_t) tim_comp);
#else
      qsort (t, nt, sizeof (cuc_timings),
	     (int (*) (const void *, const void *)) tim_comp);
#endif

      /* Delete timings, that have worst time and bigger size than other */
      j = 1;
      csize = t[0].size;
      for (i = 1; i < nt; i++)
	if (t[i].size < csize)
	  t[j++] = t[i];
      nt = j;

      cucdebug (1, "Available options\n");
      for (i = 0; i < nt; i++)
	cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
		  t[i].b, t[i].preroll, t[i].unroll, t[i].new_time,
		  t[i].size);
      /* Add results from CSM */
      j = nt;
      for (i = 0; i < saved->bb[b].ntim; i++)
	{
	  int i1;
	  for (i1 = 0; i1 < nt; i1++)
	    {
	      t[j] = t[i1];
	      t[j].size += saved->bb[b].tim[i].size - timings.size;
	      t[j].new_time +=
		saved->bb[b].tim[i].new_time - timings.new_time;
	      t[j].nshared = saved->bb[b].tim[i].nshared;
	      t[j].shared = saved->bb[b].tim[i].shared;
	      if (++j >= MAX_UNROLL * MAX_PREROLL)
		goto full;
	    }
	}

    full:
      nt = j;

      cucdebug (1, "Available options:\n");
      for (i = 0; i < nt; i++)
	cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
		  t[i].b, t[i].preroll, t[i].unroll, t[i].new_time,
		  t[i].size);

      /* Sort again with new timings added */
#if HAVE___COMPAR_FN_T
      qsort (t, nt, sizeof (cuc_timings),
	     (__compar_fn_t) tim_comp);
#else
      qsort (t, nt, sizeof (cuc_timings),
	     (int (*)(const void *, const void *)) tim_comp);
#endif

      /* Delete timings, that have worst time and bigger size than other */
      j = 1;
      csize = t[0].size;
      for (i = 1; i < nt; i++)
	if (t[i].size < csize)
	  t[j++] = t[i];
      nt = j;

      cucdebug (1, "Available options:\n");
      for (i = 0; i < nt; i++)
	cucdebug (1, "%i:%i,%i: %icyc %.1f\n",
		  t[i].b, t[i].preroll, t[i].unroll, t[i].new_time,
		  t[i].size);

      if (saved->bb[b].ntim)
	free (saved->bb[b].tim);
      saved->bb[b].ntim = nt;
      assert (saved->bb[b].tim =
	      (cuc_timings *) malloc (sizeof (cuc_timings) * nt));

      /* Copy options in reverse order -- smallest first */
      for (i = 0; i < nt; i++)
	saved->bb[b].tim[i] = t[nt - 1 - i];

      log ("Available options:\n");
      for (i = 0; i < saved->bb[b].ntim; i++)
	{
	  log ("%i:pre%i,un%i,sha%i: %icyc %.1f\n",
	       saved->bb[b].tim[i].b, saved->bb[b].tim[i].preroll,
	       saved->bb[b].tim[i].unroll, saved->bb[b].tim[i].nshared,
	       saved->bb[b].tim[i].new_time, saved->bb[b].tim[i].size);
	}
    }
#endif
  return saved;
}
Esempio n. 6
0
int gpuOsSenseGadget::process(GadgetContainerMessage<ISMRMRD::ImageHeader> *m1, GadgetContainerMessage<GenericReconJob> *m2)
{
	// Is this data for this gadget's set/slice?
	//
	GDEBUG("Starting gpuOsSenseGadget\n");

	if( m1->getObjectPtr()->set != set_number_ || m1->getObjectPtr()->slice != slice_number_ ) {
		// No, pass it downstream...
		return this->next()->putq(m1);
	}

	//GDEBUG("gpuOsSenseGadget::process\n");
	//GPUTimer timer("gpuOsSenseGadget::process");

	if (!is_configured_) {
		GDEBUG("\nData received before configuration complete\n");
		return GADGET_FAIL;
	}

	GenericReconJob* j = m2->getObjectPtr();

	// Let's first check that this job has the required data...
	if (!j->csm_host_.get() || !j->dat_host_.get() || !j->tra_host_.get() || !j->dcw_host_.get()) {
		GDEBUG("Received an incomplete Sense job\n");
		return GADGET_FAIL;
	}

	unsigned int samples = j->dat_host_->get_size(0);
	unsigned int channels = j->dat_host_->get_size(1);
	unsigned int rotations = samples / j->tra_host_->get_number_of_elements();
	unsigned int frames = j->tra_host_->get_size(1)*rotations;

	if( samples%j->tra_host_->get_number_of_elements() ) {
		GDEBUG("Mismatch between number of samples (%d) and number of k-space coordinates (%d).\nThe first should be a multiplum of the latter.\n",
				samples, j->tra_host_->get_number_of_elements());
		return GADGET_FAIL;
	}

	boost::shared_ptr< cuNDArray<floatd2> > traj(new cuNDArray<floatd2> (j->tra_host_.get()));
	boost::shared_ptr< cuNDArray<float> > dcw(new cuNDArray<float> (j->dcw_host_.get()));
	sqrt_inplace(dcw.get());
	boost::shared_ptr< cuNDArray<float_complext> > csm(new cuNDArray<float_complext> (j->csm_host_.get()));
	boost::shared_ptr< cuNDArray<float_complext> > device_samples(new cuNDArray<float_complext> (j->dat_host_.get()));


	// Take the reconstruction matrix size from the regulariaztion image.
	// It could be oversampled from the sequence specified size...

	matrix_size_ = uint64d2( j->reg_host_->get_size(0), j->reg_host_->get_size(1) );

	cudaDeviceProp deviceProp;
	if( cudaGetDeviceProperties( &deviceProp, device_number_ ) != cudaSuccess) {
		GDEBUG( "\nError: unable to query device properties.\n" );
		return GADGET_FAIL;
	}

	unsigned int warp_size = deviceProp.warpSize;

	matrix_size_os_ =
			uint64d2(((static_cast<unsigned int>(std::ceil(matrix_size_[0]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size,
					((static_cast<unsigned int>(std::ceil(matrix_size_[1]*oversampling_factor_))+warp_size-1)/warp_size)*warp_size);

	GDEBUG("Matrix size    : [%d,%d] \n", matrix_size_[0], matrix_size_[1]);
	GDEBUG("Matrix size OS : [%d,%d] \n", matrix_size_os_[0], matrix_size_os_[1]);

	std::vector<size_t> image_dims = to_std_vector(matrix_size_);
	image_dims.push_back(frames);

	E_->set_domain_dimensions(&image_dims);
	E_->set_codomain_dimensions(device_samples->get_dimensions().get());
	E_->set_csm(csm);
	E_->setup( matrix_size_, matrix_size_os_, kernel_width_ );
	E_->preprocess(traj.get());

	{
		auto precon = boost::make_shared<cuNDArray<float_complext>>(image_dims);
		fill(precon.get(),float_complext(1.0f));
		//solver_.set_preconditioning_image(precon);
	}
	reg_image_ = boost::shared_ptr< cuNDArray<float_complext> >(new cuNDArray<float_complext>(&image_dims));

	// These operators need their domain/codomain set before being added to the solver
	//

	//E_->set_dcw(dcw);
	GDEBUG("Prepared\n");

	// Expand the average image to the number of frames
	//

	{
		cuNDArray<float_complext> tmp(*j->reg_host_);
		*reg_image_ = expand( tmp, frames );
	}
	PICS_->set_prior(reg_image_);

	// Define preconditioning weights
	//

	//Apply weights
	//*device_samples *= *dcw;

	// Invoke solver
	//

	boost::shared_ptr< cuNDArray<float_complext> > result;
	{
		GDEBUG("Running NLCG solver\n");
		GPUTimer timer("Running NLCG solver");

		// Optionally, allow exclusive (per device) access to the solver
		// This may not matter much in terms of speed, but it can in terms of memory consumption
		//

		if( exclusive_access_ )
			_mutex[device_number_].lock();

		result = solver_.solve(device_samples.get());

		if( exclusive_access_ )
			_mutex[device_number_].unlock();
	}

	// Provide some info about the scaling between the regularization and reconstruction.
	// If it is not close to one, PICCS does not work optimally...
	//

	if( alpha_ > 0.0 ){
		cuNDArray<float_complext> gpureg(j->reg_host_.get());
		boost::shared_ptr< cuNDArray<float_complext> > gpurec = sum(result.get(),2);
		*gpurec /= float(result->get_size(2));
		float scale = abs(dot(gpurec.get(), gpurec.get())/dot(gpurec.get(),&gpureg));
		GDEBUG("Scaling factor between regularization and reconstruction is %f.\n", scale);
	}

	if (!result.get()) {
		GDEBUG("\nNon-linear conjugate gradient solver failed\n");
		return GADGET_FAIL;
	}

	/*
      static int counter = 0;
      char filename[256];
      sprintf((char*)filename, "recon_sb_%d.cplx", counter);
      write_nd_array<float_complext>( sbresult->to_host().get(), filename );
      counter++; */

	// If the recon matrix size exceeds the sequence matrix size then crop
	if( matrix_size_seq_ != matrix_size_ )
		*result = crop<float_complext,2>( (matrix_size_-matrix_size_seq_)>>1, matrix_size_seq_, *result );


	// Now pass on the reconstructed images
	//
	this->put_frames_on_que(frames,rotations,j,result.get(),channels);

	frame_counter_ += frames;
	m1->release();
	return GADGET_OK;
}