Example #1
0
static int
bicgilu_cl_qop(quark_invert_control *qic, Real clov,
	       MYREAL *kappas[], int nkappa[], 
	       wilson_vector *milc_srcs[], 
	       wilson_vector **milc_sols[],
	       int nsrc, int *final_restart,
               Real* final_rsq_ptr, int milc_parity )
{
  int isrc, ikappa;
  QOP_FermionLinksWilson *qop_links;
  QOP_DiracFermion **qop_sol[MAXSRC], *qop_src[MAXSRC];
  int iterations_used = 0;
  QOP_invert_arg_t qop_invert_arg;
  QOP_resid_arg_t  ***qop_resid_arg;
  double remaptime;
  int i;
  site *s;

  if(nsrc > MAXSRC){
    printf("bicgilu_cl_qop: too many sources\n");
    terminate(1);
  }

  /* Initialize QOP */
  if(initialize_qop() != QOP_SUCCESS){
    printf("bicbilu_cl_qop: Error initializing QOP\n");
    terminate(1);
  }

  /* Create QOP links object */

  qop_links = create_qop_wilson_fermion_links( clov );

  /* Set qop_invert_arg */
  set_qop_invert_arg( & qop_invert_arg, qic, milc_parity );
  
  /* Pointers for residual errors */
  qop_resid_arg = create_qop_resid_arg( nsrc, nkappa, (qic->resid)*(qic->resid));

  remaptime = -dclock(); 

  /* Pointers for solution vectors */
  for(isrc = 0; isrc < nsrc; isrc++){
    qop_sol[isrc] = 
      (QOP_DiracFermion **)malloc(sizeof(QOP_DiracFermion *)*nkappa[isrc]);
    if(qop_sol[isrc] == NULL){
      printf("bicgilu_cl_qop: Can't allocate qop_sol\n");
      terminate(1);
    }
  }

  /* Map MILC source and sink to QOP fields */
  for(isrc = 0; isrc < nsrc; isrc++){
    gamma5_flip(milc_srcs[isrc], milc_parity);  /* compensate for QOP gamma */
    qop_src[isrc] = create_D_from_field( milc_srcs[isrc], milc_parity);
    gamma5_flip(milc_srcs[isrc], milc_parity);  /* restore the source */
    for(ikappa = 0; ikappa < nkappa[isrc]; ikappa++){
      /* Adjust normalization for MILC conventions */
      gamma5_flip(milc_sols[isrc][ikappa], milc_parity);  /* compensate for QOP gamma */
      FORALLSITES(i,s){
	scalar_mult_wvec( milc_sols[isrc][ikappa]+i, 2.*kappas[isrc][ikappa],
			  milc_sols[isrc][ikappa]+i);
      }
      qop_sol[isrc][ikappa] = 
	create_D_from_field( milc_sols[isrc][ikappa], milc_parity);
    }
  }
Example #2
0
int 
bicgilu_cl_qop_single_for_double( int prop_type,
				  QOP_FermionLinksWilson *qop_links, 
				  quark_invert_control *qic, int milc_parity,
				  void *dmps[],
				  float *kappas[], int nkappa[], 
				  QOP_DiracFermion **qop_sol[], 
				  QOP_DiracFermion *qop_src[], 
				  int nsrc,		    
				  int *final_restart,
				  Real *final_rsq_ptr )
{
  int i, iters, iters_F = 0;
  int converged;
  int nrestart;
  int max_restarts = qic->nrestart;
  int isrc, ikappa;
  int final_restart_F;
  Real final_rsq_F, final_relrsq_F;
  Real resid_F = 3e-7;   /* The limits of a single precision inversion */
  Real rel_F = 0;   /* The limits of a single precision inversion */
  QOP_invert_arg_t qop_invert_arg;
  QOP_resid_arg_t  ***qop_resid_arg_F;
  QOP_info_t info_F = {0., 0., 0, 0, 0}, info = {0., 0., 0, 0, 0};
  QDP_Subset subset = milc2qdp_subset(milc_parity);
  QOP_F3_FermionLinksWilson *qop_links_F;
  QOP_F3_DiracFermion **qop_sol_F[MAXSRC], *qop_rhs_F[MAXSRC];
  QDP_F3_DiracFermion *qdp_rhs_F[MAXSRC];
  QDP_D3_DiracFermion *qdp_src[MAXSRC], *qdp_resid[MAXSRC];
  QDP_D3_DiracFermion *qdp_sol;
  Real relresid2[MAXSRC];
  Real resid2[MAXSRC];
  QLA_D_Real norm2_src[MAXSRC], norm2_resid[MAXSRC], norm_resid[MAXSRC], scale_resid;
  char myname[] = "bicgilu_cl_qop_single_for_double";
  
  /* Only one kappa allowed per source for this algorithm */
  for(i = 0; i < nsrc; i++){
    if(nkappa[i] > 1){
      printf("%s: nkappa[%d] = %d != 1\n",myname,i,nkappa[i]);
      terminate(1);
    }
  }
  
  /* Set qop_invert_arg */
  /* We don't do restarts for the single precision step */
  /* We interpret "qic->nrestart" to mean the max number of calls to
     the single-precision inverter */
  set_qop_invert_arg_norestart( & qop_invert_arg, qic, milc_parity );
  
  /* Pointers for residual errors */
  /* For now we set the residual to something sensible for single precision */
  qop_resid_arg_F = create_qop_resid_arg( nsrc, nkappa, resid_F*resid_F, rel_F*rel_F);

  /* Create a single precision copy of the links object */
  qop_links_F = QOP_FD3_wilson_create_L_from_L( qop_links );

  /* Take norm of source and create temporaries */

  for(i = 0; i < nsrc; i++){
    qdp_src[i] = QOP_D3_convert_D_to_qdp( qop_src[i] );
    QDP_D3_r_eq_norm2_D( norm2_src+i, qdp_src[i], subset );
    qdp_resid[i] = QDP_D3_create_D();
    qdp_rhs_F[i] = QDP_F3_create_D();
    qop_sol_F[i] = (QOP_F3_DiracFermion **)malloc(sizeof(QOP_F3_DiracFermion *));
  }


  /* Main loop */

  nrestart = 0;
  converged = 0;
  iters = 0;

  info.final_sec = -dclock();
  info.final_flop = 0;
  info.status = QOP_SUCCESS;

  while(1){
    /* Create new residual vectors from the result */
    /* r = src - A sol */
    compute_qdp_residuals( prop_type, qdp_resid, qdp_src, 
			   qop_links, qop_sol, dmps, kappas, 
			   nkappa, nsrc, milc_parity );

    /* Compute two different norms */
    qic->final_rsq = 0;
    qic->final_relrsq = 0;
    for(i = 0; i < nsrc; i++){
      qdp_sol = QOP_convert_D_to_qdp( qop_sol[i][0] );
      relresid2[i] = qdp_relative_residue( qdp_resid[i], qdp_sol, subset );
      qop_sol[i][0] = QOP_convert_D_from_qdp( qdp_sol );
      qic->final_relrsq = (relresid2[i] > qic->final_relrsq) ? relresid2[i] : qic->final_relrsq;

      QDP_D3_r_eq_norm2_D( norm2_resid+i, qdp_resid[i], subset );
      resid2[i] = norm2_resid[i]/norm2_src[i];
      qic->final_rsq = (resid2[i] > qic->final_rsq) ? resid2[i] : qic->final_rsq;
#ifdef CG_DEBUG
      node0_printf("%s: double precision restart %d resid2 = %.2e vs %.2e relresid2 = %.2e vs %.2e\n",
		   myname, nrestart, resid2[i], qic->resid * qic->resid, relresid2[i], 
		   qic->relresid * qic->relresid );
#endif
    }
    *final_rsq_ptr = qic->final_rsq;  /* Use Cartesian norm for now */
    *final_restart = nrestart;

    /* Stop when converged */
    converged = 1;
    for(i = 0; i < nsrc; i++){
      if((qic->resid > 0 && resid2[i] > qic->resid * qic->resid) || 
	 (qic->relresid > 0 && relresid2[i] > qic->relresid * qic->relresid)){
	converged = 0;
	break;
      }
    }

    if(converged || nrestart++>=max_restarts)break;

    for(i = 0; i < nsrc; i++){
      /* Scale the RHS to avoid underflow */
      norm_resid[i] = sqrt(norm2_resid[i]);
      scale_resid = 1./norm_resid[i];
      QDP_D3_D_eq_r_times_D(qdp_resid[i], &scale_resid, qdp_resid[i], subset);
      /* Scaled residual becomes the new source */
      QDP_FD3_D_eq_D( qdp_rhs_F[i], qdp_resid[i],  subset);
      qop_rhs_F[i] = QOP_F3_convert_D_from_qdp( qdp_rhs_F[i]);
      /* Prepare to solve in single precision by creating a single
	 precision copy of the source.  Set the trial solution to zero. */
      qop_sol_F[i][0] = create_qop_DiracFermion_F();
    }


    /* Solve in single precision */
    double dtime = -dclock();
    info_F.final_flop = 0.;
    bicgilu_cl_qop_generic_F( prop_type, &info_F, qop_links_F, 
	  &qop_invert_arg, qop_resid_arg_F, dmps, nkappa, qop_sol_F, 
  	  qop_rhs_F, nsrc);
    dtime += dclock();

    /* Report performance statistics */
    
    /* For now we return the largest value and total iterations */
    final_rsq_F = 0;
    final_relrsq_F = 0;
    final_restart_F = 0;
    iters_F = 0;
    for(isrc = 0; isrc < nsrc; isrc++)
      for(ikappa = 0; ikappa < nkappa[isrc]; ikappa++){
	/* QOP routines return the ratios of the squared norms */
	final_rsq_F =    MAX(final_rsq_F, qop_resid_arg_F[isrc][ikappa]->final_rsq);
	final_relrsq_F = MAX(final_relrsq_F, qop_resid_arg_F[isrc][ikappa]->final_rel);
	final_restart_F =    MAX(final_restart_F,  qop_resid_arg_F[isrc][ikappa]->final_restart);
	iters_F += qop_resid_arg_F[isrc][ikappa]->final_iter;
	if(nsrc > 1 || nkappa[isrc] > 1)
	  node0_printf("BICG(src %d,kappa %d): iters = %d resid = %e relresid = %e\n",
		       isrc, ikappa,
		       qop_resid_arg_F[isrc][ikappa]->final_iter,
		       sqrt(qop_resid_arg_F[isrc][ikappa]->final_rsq),
		       sqrt(qop_resid_arg_F[isrc][ikappa]->final_rel));
      }
    
#ifdef CGTIME
    node0_printf("%s: single precision iters = %d status %d final_rsq %.2e wanted %2e final_rel %.2e wanted %.2e\n",
		 myname, iters_F, info_F.status, final_rsq_F, resid_F * resid_F, final_relrsq_F, rel_F);
    node0_printf("time = %g flops = %e mflops = %g\n", dtime, info_F.final_flop, 
		 info_F.final_flop/(1.0e6*dtime) );
    fflush(stdout);
#endif

    /* Add single-precision result to double precision solution (with rescaling) */
    update_qop_solution( qop_sol, norm_resid, qop_sol_F, nsrc, subset );

    for(i = 0; i < nsrc; i++){
      QOP_F3_destroy_D(qop_sol_F[i][0]);
      /* Convert back */
      qdp_rhs_F[i] = QOP_F3_convert_D_to_qdp(qop_rhs_F[i]);
    }

    info.final_flop += info_F.final_flop;
    iters += iters_F;
  }

  /* Clean up */

  for(i = 0; i < nsrc; i++){
    QDP_F3_destroy_D( qdp_rhs_F[i] );
    QDP_D3_destroy_D( qdp_resid[i] );
    /* Must restore qop_src in case the caller reuses it */
    qop_src[i] = QOP_D3_convert_D_from_qdp( qdp_src[i] );
    free(qop_sol_F[i]);
  }

  QOP_F3_wilson_destroy_L( qop_links_F );
  destroy_qop_resid_arg(qop_resid_arg_F, nsrc, nkappa);
  qop_resid_arg_F = NULL;

  if(!converged){
    node0_printf("%s: NOT Converged after %d iters and %d restarts\n",
		 myname, iters, nrestart);
  }

  info.final_sec += dclock();
#ifdef CGTIME
  node0_printf("CGTIME: time = %e (wilson_qop FD) ", info.final_sec);
  for(isrc = 0; isrc < nsrc; isrc++)
    node0_printf("nkappa[%d] = %d tot_iters = %d ",
		 isrc,nkappa[isrc],iters);
  node0_printf("mflops = %e\n", info.final_flop/(1.0e6*info.final_sec) );
  fflush(stdout);
#endif

  return iters;
}