/*--------------------------------------------------------------------*/ void clear_v_field(su3_vector *v){ int i; site *s; FORALLSITES(i,s){ clearvec( &v[i] ); }
/* Thin the random source */ static void thin_source(su3_vector *src, int thinning, int ex, int ey, int ez, int et){ site *s; int i; FORALLSITES(i,s) { if(s->x % thinning != ex || s->y % thinning != ey || s->z % thinning != ez || s->t % thinning != et){ clearvec(src+i); } } }
void accum_gauge_hit(int gauge_dir,int parity) { /* Accumulates sums and differences of link matrices for determining optimum */ /* hit for gauge fixing */ /* Differences are kept in diffmat and the diagonal elements of the sums */ /* in sumvec */ register int j; register su3_matrix *m1,*m2; register int dir,i; register site *s; /* Clear sumvec and diffmat */ FORSOMEPARITY(i,s,parity) { clear_su3mat(&diffmatp[i]); clearvec(&sumvecp[i]); }
int quark_renorm( void ) { register int i, dir; register site *s; int j, cgn; Real mass_x2, finalrsq; Real pix, piy, piz, pit; Real sin_pmu, q_mu, prop_a, prop_b, z_fac, m_func, ftmp = 0; Real r1, r2, r3; int pmu, px, py, pz, pt; int pxn, pyn, pzn, ptn; int currentnode; int j1, jm2, k, dirs[4]; msg_tag *mtag[2]; int j_mass; su3_vector **psim = NULL; int xi, j2, j3, j4, parity; int multiflag; FILE *fp_mom_ks[MAX_NUM_MASS]; /* for writing mom propagator files */ char filename[50]; int prec = PRECISION; /* Make internal precision for CG the same as the prevailing precision */ pix = 2.*PI / (Real)nx; piy = 2.*PI / (Real)ny; piz = 2.*PI / (Real)nz; pit = 2.*PI / (Real)nt; cgn = 0; if( num_mass == 1){ multiflag = 0; } else{ multiflag = 1; psim = (su3_vector **)malloc(num_mass*sizeof(su3_vector *)); for(j=0; j<num_mass; j++){ psim[j] = (su3_vector *)malloc(sites_on_node*sizeof(su3_vector)); } } /* Open the momentum propagator files */ if(this_node == 0){ for(j=0; j<num_mass; j++){ sprintf(filename,"mom_pt_prop.m_%d",j); fp_mom_ks[j] = fopen(filename, "ab"); if(fp_mom_ks[j] == NULL){ printf("quark_renorm: Node %d can't open file %s, error %d\n", this_node,filename,errno);fflush(stdout); terminate(1); } } } rephase( ON ); /* Turn staggered phases on */ /* Create fat and long links */ load_ferm_links(&fn_links, &ks_act_paths); /* Loop over the 16 source points */ for(xi=0; xi<16; xi++){ /* Initialize color trace of the propagator */ FORALLSITES(i,s){ for(j=0; j<num_mass; j++){ s->trace_prop[j].real = 0.0; s->trace_prop[j].imag = 0.0; } } j1 = xi%2; k = xi/2; j2 = k%2; k /= 2; j3 = k%2; k /= 2; j4 = k%2; parity = (j1+j2+j3+j4)%2; /* dirs[XUP] = j1; dirs[YUP] = j2; dirs[ZUP] = j3; dirs[TUP] = j4; */ /* Loop over colors of source vector */ for(j=0; j<3; j++){ /* initialize the source in phi */ FORALLSITES(i,s){ clearvec( &(s->phi)); } /* Point source at site xi in the hypercube at origin */ if( node_number(j1,j2,j3,j4) == this_node ){ i=node_index(j1,j2,j3,j4); lattice[i].phi.c[j].real = -1.0; } if( multiflag == 0){ for(j_mass=0; j_mass<num_mass; j_mass++){ FORALLSITES(i,s){ clearvec( &(s->xxx1)); } if(parity == 0){ /* do a C.G. (source in phi, result in xxx1) */ cgn += ks_congrad( F_OFFSET(phi), F_OFFSET(xxx1), mass[j_mass], niter, nrestart, rsqprop, PRECISION, EVEN, &finalrsq, &fn_links); /* Multiply by -Madjoint */ dslash_site( F_OFFSET(xxx1), F_OFFSET(ttt), ODD, &fn_links); mass_x2 = 2.*mass[j_mass]; FOREVENSITES(i,s){ scalar_mult_su3_vector( &(s->xxx1), -mass_x2, &(s->ttt)); } } else{
// ----------------------------------------------------------------- // Matrix--vector operation // Applies either the operator (sign = 1) or its adjoint (sign = -1) // Adjoint is simply overall negative sign... void fermion_op(vector *src, vector *dest, int sign) { register int i; register site *s; int dir, a, b, c, d, par, L[NDIMS] = {nx, ny, nz, nt}; Real tr, halfG = 0.5 * G, m_ov_G, vev[DIMF][DIMF]; vector tvec, tvec_dir, tvec_opp; msg_tag *tag[2 * NDIMS]; // Quick sanity check if (sign != 1 && sign != -1) { node0_printf("Error: incorrect sign in fermion_op: %d\n", sign); terminate(1); } // Ignore site_mass if G = 0 to avoid dividing by zero // Could be made more robust, but unlikely to matter if (G == 0.0) m_ov_G = 0.0; else m_ov_G = 2.0 * site_mass / G; for (a = 0; a < DIMF; a++) { for (b = 0; b < DIMF; b++) vev[a][b] = 0.0; } vev[0][1] = m_ov_G; vev[2][3] = m_ov_G; vev[1][0] = -m_ov_G; vev[3][2] = -m_ov_G; // Start gathers for kinetic term FORALLUPDIR(dir) { if (L[dir] <= 1) // Will be skipped below continue; tag[dir] = start_gather_field(src, sizeof(vector), dir, EVENANDODD, gen_pt[dir]); tag[OPP_DIR(dir)] = start_gather_field(src, sizeof(vector), OPP_DIR(dir), EVENANDODD, gen_pt[OPP_DIR(dir)]); } // Compute scalar term as gathers run // Initialize dest = 0.5G * (sigma + 2m / G) * src // Add SO(4)-breaking 'site mass' term with same structure as sigma FORALLSITES(i, s) { clearvec(&(dest[i])); if (stagger == -1 || lattice[i].parity == EVEN) par = 1; else // Both stagger == 1 and lattice[i].parity == ODD par = -1; for (a = 0; a < DIMF; a++) { for (b = a + 1; b < DIMF; b++) { tr = s->sigma.e[as_index[a][b]] + par * vev[a][b]; for (c = 0; c < DIMF; c++) { for (d = c + 1; d < DIMF; d++) { tr += perm[a][b][c][d] * (s->sigma.e[as_index[c][d]] + par * vev[c][d]); } } // No half since not double-counting dest[i].c[a] += tr * src[i].c[b]; dest[i].c[b] -= tr * src[i].c[a]; } } scalar_mult_vec(&(dest[i]), halfG, &(dest[i])); }
static void ks_multicg_reverse_field( /* Return value is number of iterations taken */ su3_vector *src, /* source vector (type su3_vector) */ su3_vector **psim, /* solution vectors */ ks_param *ksp, /* KS parametes, including the offsets */ int num_offsets, /* number of offsets */ quark_invert_control *qic, imp_ferm_links_t *fn /* Storage for fermion links */ ) { char myname[] = "ks_multicg_reverse_field"; /* Site su3_vector's resid, cg_p and ttt are used as temporaies */ register int i; register site *s; int iteration; /* counter for iterations */ int num_offsets_now; /* number of offsets still being worked on */ double c1, c2, rsq, oldrsq, pkp; /* pkp = cg_p.K.cg_p */ double source_norm; /* squared magnitude of source vector */ double rsqstop; /* stopping residual normalized by source norm */ int l_parity=0; /* parity we are currently doing */ int l_otherparity=0; /* the other parity */ #ifdef FN msg_tag *tags1[16], *tags2[16]; /* tags for gathers to parity and opposite */ #endif int special_started; /* 1 if dslash_special has been called */ int j, j_low; Real *shifts, mass_low, msq_xm4; double *zeta_i, *zeta_im1, *zeta_ip1; double *beta_i, *beta_im1, *alpha; // su3_vector **pm; /* vectors not involved in gathers */ // Switch indices su3_vector **psim_rev; su3_vector *psim_space; su3_vector **pm_rev; su3_vector *pm_space; /* Unpack structure */ /* We don't restart this algorithm, so we adopt the convention of taking the product here */ int niter = qic->max*qic->nrestart; Real rsqmin = qic->resid * qic->resid; /* desired squared residual - normalized as sqrt(r*r)/sqrt(src_e*src_e) */ int parity = qic->parity; /* EVEN, ODD */ /* Timing */ #ifdef CGTIME double dtimec; #endif double nflop; qic->final_iters = 0; qic->final_restart = 0; //#if FERM_ACTION == HISQ // fn->hl.current_X_set = 0; // restore_fn_links(fn); //#endif if( num_offsets==0 )return; if(fn == NULL){ printf("%s(%d): Called with NULL fn\n", myname, this_node); terminate(1); } // Switch indices psim_rev = (su3_vector **)malloc( sizeof(su3_vector *)*sites_on_node ); psim_space = (su3_vector *)malloc( sizeof(su3_vector)*sites_on_node*num_offsets ); pm_rev = (su3_vector **)malloc( sizeof(su3_vector *)*sites_on_node ); pm_space = (su3_vector *)malloc( sizeof(su3_vector)*sites_on_node*num_offsets ); if( psim_space == NULL || pm_space == NULL){printf("%s: NO ROOM!\n",myname); exit(0); } for( i=0; i<sites_on_node; i++ ){ psim_rev[i] = &(psim_space[num_offsets*i]); pm_rev[i] = &(pm_space[num_offsets*i]); for( j=0; j<num_offsets; j++){ psim_rev[i][j] = psim[j][i]; } } /* debug */ #ifdef CGTIME dtimec = -dclock(); #endif nflop = 1205 + 15*num_offsets; if(parity==EVENANDODD)nflop *=2; special_started = 0; /* if we want both parities, we will do even first. */ switch(parity){ case(EVEN): l_parity=EVEN; l_otherparity=ODD; break; case(ODD): l_parity=ODD; l_otherparity=EVEN; break; case(EVENANDODD): l_parity=EVEN; l_otherparity=ODD; break; } shifts = (Real *)malloc(num_offsets*sizeof(Real)); zeta_i = (double *)malloc(num_offsets*sizeof(double)); zeta_im1 = (double *)malloc(num_offsets*sizeof(double)); zeta_ip1 = (double *)malloc(num_offsets*sizeof(double)); beta_i = (double *)malloc(num_offsets*sizeof(double)); beta_im1 = (double *)malloc(num_offsets*sizeof(double)); alpha = (double *)malloc(num_offsets*sizeof(double)); //pm = (su3_vector **)malloc(num_offsets*sizeof(su3_vector *)); mass_low = 1.0e+20; j_low = -1; for(j=0;j<num_offsets;j++){ shifts[j] = ksp[j].offset; if (ksp[j].offset < mass_low){ mass_low = ksp[j].offset; j_low = j; } } for(j=0;j<num_offsets;j++) if(j!=j_low){ //pm[j] = (su3_vector *)malloc(sites_on_node*sizeof(su3_vector)); shifts[j] -= shifts[j_low]; } msq_xm4 = -shifts[j_low]; iteration = 0; #define PAD 0 /* now we can allocate temporary variables and copy then */ /* PAD may be used to avoid cache thrashing */ if(first_multicongrad) { ttt = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector)); cg_p = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector)); resid = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector)); first_multicongrad = 0; } #ifdef CGTIME dtimec = -dclock(); #endif /* initialization process */ start: #ifdef FN if(special_started==1) { /* clean up gathers */ cleanup_gathers(tags1, tags2); special_started = 0; } #endif num_offsets_now = num_offsets; source_norm = 0.0; FORSOMEPARITY(i,s,l_parity){ source_norm += (double) magsq_su3vec( src+i ); su3vec_copy( src+i, &(resid[i])); su3vec_copy(&(resid[i]), &(cg_p[i])); clearvec(&(psim_rev[i][j_low])); for(j=0;j<num_offsets;j++) if(j!=j_low){ clearvec(&(psim_rev[i][j])); su3vec_copy(&(resid[i]), &(pm_rev[i][j])); } } END_LOOP;