static void remap_data(int index, ft_data *ftd){ msg_tag *mtag; char *temp; int i; double dtime = start_timing(); temp = (char *)malloc(sites_on_node*ftd->size); if(temp==NULL){ printf("remap_data: No room\n"); terminate(1); } mtag = start_gather_field(ftd->data, ftd->size, index, EVENANDODD, gen_pt[0]); wait_gather(mtag); /* First copy gathered data to temporary */ for(i = 0; i < sites_on_node; i++) memcpy(temp + ftd->size*i, gen_pt[0][i], ftd->size); cleanup_gather(mtag); /* Then copy temp back to field */ memcpy((char *)ftd->data, temp, sites_on_node*ftd->size); free(temp); print_timing(dtime, "REMAP FFTW remap"); }
void dslash_fn_dir(su3_vector *src, su3_vector *dest, int parity, fn_links_t *fn, int dir, int fb, Real wtfat, Real wtlong) { register int i ; site *s; msg_tag *tag[2]; su3_matrix *fat = get_fatlinks(fn); su3_matrix *lng = get_lnglinks(fn); su3_vector tmp; int do_long = (lng != NULL) && (wtlong != 0.); char myname[] = "fn_shift"; if(fat == NULL) { printf("%s(%d): fat or lng member is null\n", myname, this_node); terminate(1); } if(fb > 0){ /* Shift from forward direction */ tag[0] = start_gather_field( src, sizeof(su3_vector), dir, parity, gen_pt[0] ); if(do_long) tag[1] = start_gather_field( src, sizeof(su3_vector), DIR3(dir), parity, gen_pt[1] ); wait_gather(tag[0]); if(do_long) wait_gather(tag[1]); FORSOMEPARITYDOMAIN(i,s,parity) { mult_su3_mat_vec( fat+4*i+dir, (su3_vector *)gen_pt[0][i], &tmp ); scalar_mult_add_su3_vector( dest+i, &tmp, wtfat, dest+i ) ; if(do_long){ mult_su3_mat_vec( lng+4*i+dir, (su3_vector *)gen_pt[1][i], &tmp ); scalar_mult_add_su3_vector( dest+i, &tmp, wtlong, dest+i ) ; } } END_LOOP
/* Apply the symmetric shift operator in direction "dir" * * This is the explicit version * * Covariant shifts are used * * The KS phases MUST BE in the links */ static void sym_shift_field(int dir, su3_vector *src, su3_vector *dest) { register int i ; register site *s ; msg_tag *tag[2]; su3_vector *tvec = create_v_field(); tag[0] = start_gather_field( src, sizeof(su3_vector), dir, EVENANDODD, gen_pt[0] ); /* With ONE_SIDED_SHIFT defined, the shift is asymmetric */ #ifndef ONE_SIDED_SHIFT FORALLSITES(i,s) { mult_adj_su3_mat_vec( &(s->link[dir]), src+i, tvec+i ) ; }
// then gathered to x FORALLSITES(i, s) mult_su3_an((su3_matrix*)F_PT(s,lnk2), (su3_matrix*)F_PT(s,lnk1), tempmat1 + i); wait_gather(tag0); wait_gather(tag1); // Finish lower staple FORALLSITES(i, s) { mult_su3_nn(tempmat1 + i, (su3_matrix *)gen_pt[0][i], &tmat1); su3mat_copy(&tmat1, tempmat1 + i); } // Gather staple from direction -dir2 to "home" site tag2 = start_gather_field(tempmat1, sizeof(su3_matrix), OPP_DIR(dir2), EVENANDODD, gen_pt[2]); // Calculate upper staple, add it FORALLSITES(i, s) { mult_su3_nn((su3_matrix*)F_PT(s,lnk2), (su3_matrix *)gen_pt[1][i], &tmat1); mult_su3_na(&tmat1, (su3_matrix *)gen_pt[0][i], &tmat2); add_su3_matrix(stp + i, &tmat2, stp + i); } // Finally add the lower staple wait_gather(tag2); FORALLSITES(i, s) add_su3_matrix(stp+i, (su3_matrix *)gen_pt[2][i], stp+i); cleanup_gather(tag0); cleanup_gather(tag1);
void hvy_pot(int do_det) { register int i; register site *s; int t_dist, x_dist; double wloop; complex tc; matrix tmat, tmat2; msg_tag *mtag = NULL; node0_printf("hvy_pot: MAX_T = %d, MAX_X = %d\n", MAX_T, MAX_X); // Use staple to hold product of t_dist links at each point for (t_dist = 1; t_dist <= MAX_T; t_dist++) { if (t_dist == 1) { FORALLSITES(i, s) mat_copy(&(s->link[TUP]), &(staple[i])); } else { mtag = start_gather_field(staple, sizeof(matrix), goffset[TUP], EVENANDODD, gen_pt[0]); // Be careful about overwriting staple; // gen_pt may just point to it for on-node "gathers" wait_gather(mtag); FORALLSITES(i, s) mult_nn(&(s->link[TUP]), (matrix *)gen_pt[0][i], &(tempmat2[i])); cleanup_gather(mtag); FORALLSITES(i, s) mat_copy(&(tempmat2[i]), &(staple[i])); } // Copy staple to tempmat // Will shoft at end of loop FORALLSITES(i, s) mat_copy(&(staple[i]), &(tempmat[i])); for (x_dist = 0; x_dist <= MAX_X; x_dist++) { // Evaluate potential at this separation wloop = 0.0; FORALLSITES(i, s) { // Compute the actual Coulomb gauge Wilson loop product mult_na(&(staple[i]), &(tempmat[i]), &tmat); if (do_det == 1) det_project(&tmat, &tmat2); else mat_copy(&tmat, &tmat2); tc = trace(&tmat2); wloop += tc.real; } g_doublesum(&wloop); if (do_det == 1) { // Braces fix compiler error node0_printf("D_LOOP "); } else node0_printf("POT_LOOP "); node0_printf("%d %d %.6g\n", x_dist, t_dist, wloop / volume); // As we increment x, shift in x direction shiftmat(tempmat, tempmat2, goffset[XUP]); } // x_dist } // t_dist
// ----------------------------------------------------------------- // Matrix--vector operation // Applies either the operator (sign = 1) or its adjoint (sign = -1) // Adjoint is simply overall negative sign... void fermion_op(vector *src, vector *dest, int sign) { register int i; register site *s; int dir, a, b, c, d, par, L[NDIMS] = {nx, ny, nz, nt}; Real tr, halfG = 0.5 * G, m_ov_G, vev[DIMF][DIMF]; vector tvec, tvec_dir, tvec_opp; msg_tag *tag[2 * NDIMS]; // Quick sanity check if (sign != 1 && sign != -1) { node0_printf("Error: incorrect sign in fermion_op: %d\n", sign); terminate(1); } // Ignore site_mass if G = 0 to avoid dividing by zero // Could be made more robust, but unlikely to matter if (G == 0.0) m_ov_G = 0.0; else m_ov_G = 2.0 * site_mass / G; for (a = 0; a < DIMF; a++) { for (b = 0; b < DIMF; b++) vev[a][b] = 0.0; } vev[0][1] = m_ov_G; vev[2][3] = m_ov_G; vev[1][0] = -m_ov_G; vev[3][2] = -m_ov_G; // Start gathers for kinetic term FORALLUPDIR(dir) { if (L[dir] <= 1) // Will be skipped below continue; tag[dir] = start_gather_field(src, sizeof(vector), dir, EVENANDODD, gen_pt[dir]); tag[OPP_DIR(dir)] = start_gather_field(src, sizeof(vector), OPP_DIR(dir), EVENANDODD, gen_pt[OPP_DIR(dir)]); } // Compute scalar term as gathers run // Initialize dest = 0.5G * (sigma + 2m / G) * src // Add SO(4)-breaking 'site mass' term with same structure as sigma FORALLSITES(i, s) { clearvec(&(dest[i])); if (stagger == -1 || lattice[i].parity == EVEN) par = 1; else // Both stagger == 1 and lattice[i].parity == ODD par = -1; for (a = 0; a < DIMF; a++) { for (b = a + 1; b < DIMF; b++) { tr = s->sigma.e[as_index[a][b]] + par * vev[a][b]; for (c = 0; c < DIMF; c++) { for (d = c + 1; d < DIMF; d++) { tr += perm[a][b][c][d] * (s->sigma.e[as_index[c][d]] + par * vev[c][d]); } } // No half since not double-counting dest[i].c[a] += tr * src[i].c[b]; dest[i].c[b] -= tr * src[i].c[a]; } } scalar_mult_vec(&(dest[i]), halfG, &(dest[i])); }
/* Special dslash for use by congrad. Uses restart_gather_field() when possible. Next to last argument is an array of message tags, to be set if this is the first use, otherwise reused. If start=1,use start_gather_field, otherwise use restart_gather_field. The calling program must clean up the gathers and temps! */ void dslash_fn_field_special(su3_vector *src, su3_vector *dest, int parity, msg_tag **tag, int start, fn_links_t *fn){ register int i; register site *s; register int dir,otherparity=0; register su3_matrix *fat4; su3_matrix *t_fatlink; #ifndef NO_LONG_LINKS register su3_matrix *long4; su3_matrix *t_longlink; #endif /* allocate temporary work space only if not already allocated */ if(temp_not_allocated) { for( dir=XUP; dir<=TUP; dir++ ){ temp[dir] =(su3_vector *)malloc(sites_on_node*sizeof(su3_vector)); temp[dir+4]=(su3_vector *)malloc(sites_on_node*sizeof(su3_vector)); } temp[8]=(su3_vector *)malloc(sites_on_node*sizeof(su3_vector)); temp_not_allocated = 0 ; } /* load fatlinks and longlinks */ if(fn == NULL){ printf("dslash_fn_field_special: invalid fn links!\n"); terminate(1); } #ifndef NO_LONG_LINKS t_longlink = get_lnglinks(fn); #endif t_fatlink = get_fatlinks(fn); switch(parity) { case EVEN: otherparity=ODD; break; case ODD: otherparity=EVEN; break; case EVENANDODD: otherparity=EVENANDODD; break; } /* Start gathers from positive directions */ /* And start the 3-step gather too */ for( dir=XUP; dir<=TUP; dir++ ){ if(start==1) { tag[dir] = start_gather_field( src, sizeof(su3_vector), dir, parity,gen_pt[dir] ); #ifndef NO_LONG_LINKS tag[DIR3(dir)] = start_gather_field(src, sizeof(su3_vector), DIR3(dir),parity, gen_pt[DIR3(dir)] ); #endif } else { restart_gather_field( src, sizeof(su3_vector), dir, parity,gen_pt[dir], tag[dir]); #ifndef NO_LONG_LINKS restart_gather_field(src, sizeof(su3_vector), DIR3(dir), parity, gen_pt[DIR3(dir)], tag[DIR3(dir)]); #endif } } /* Multiply by adjoint matrix at other sites */ /* Use fat link for single link transport */ FORSOMEPARITYDOMAIN_OMP( i, s, otherparity, private(fat4,long4) ){ //NOPRE if( i < loopend-FETCH_UP ){ //NOPRE fat4 = &(t_fatlink[4*(i+FETCH_UP)]); //NOPRE prefetch_V(&(src[i+FETCH_UP])); //NOPRE prefetch_4MVVVV( //NOPRE fat4, //NOPRE &(temp[0][i+FETCH_UP]), //NOPRE &(temp[1][i+FETCH_UP]), //NOPRE &(temp[2][i+FETCH_UP]), //NOPRE &(temp[3][i+FETCH_UP]) ); #ifndef NO_LONG_LINKS //NOPRE long4 = &(t_longlink[4*(i+FETCH_UP)]); //NOPRE prefetch_4MVVVV( //NOPRE long4, //NOPRE &(temp[4][i+FETCH_UP]), //NOPRE &(temp[5][i+FETCH_UP]), //NOPRE &(temp[6][i+FETCH_UP]), //NOPRE &(temp[7][i+FETCH_UP]) ); #endif //NOPRE } fat4 = &(t_fatlink[4*i]); mult_adj_su3_mat_4vec( fat4, &(src[i]), &(temp[0][i]), &(temp[1][i]), &(temp[2][i]), &(temp[3][i]) ); #ifndef NO_LONG_LINKS /* multiply by 3-link matrices too */ long4 = &(t_longlink[4*i]); mult_adj_su3_mat_4vec( long4, &(src[i]),&(temp[4][i]), &(temp[5][i]), &(temp[6][i]), &(temp[7][i]) ); #endif } END_LOOP_OMP /* Start gathers from negative directions */ for( dir=XUP; dir <= TUP; dir++){ if (start==1) tag[OPP_DIR(dir)] = start_gather_field( temp[dir], sizeof(su3_vector), OPP_DIR( dir), parity, gen_pt[OPP_DIR(dir)] ); else restart_gather_field( temp[dir], sizeof(su3_vector), OPP_DIR( dir), parity, gen_pt[OPP_DIR(dir)], tag[OPP_DIR(dir)] ); } /* Start 3-neighbour gathers from negative directions */ for( dir=X3UP; dir <= T3UP; dir++){ if (start==1) tag[OPP_3_DIR(dir)]=start_gather_field( temp[INDEX_3RD(dir)+4], sizeof(su3_vector), OPP_3_DIR( dir), parity, gen_pt[OPP_3_DIR(dir)] ); else restart_gather_field(temp[INDEX_3RD(dir)+4], sizeof(su3_vector), OPP_3_DIR( dir),parity, gen_pt[OPP_3_DIR(dir)], tag[OPP_3_DIR(dir)] ); } /* Wait gathers from positive directions, multiply by matrix and accumulate */ /* wait for the 3-neighbours from positive directions, multiply */ for(dir=XUP; dir<=TUP; dir++){ wait_gather(tag[dir]); #ifndef NO_LONG_LINKS wait_gather(tag[DIR3(dir)]); #endif } FORSOMEPARITYDOMAIN_OMP(i,s,parity, private(fat4,long4) ){ //NOPRE if( i < loopend-FETCH_UP ){ //NOPRE fat4 = &(t_fatlink[4*(i+FETCH_UP)]); //NOPRE prefetch_4MVVVV( //NOPRE fat4, //NOPRE (su3_vector *)gen_pt[XUP][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[YUP][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[ZUP][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[TUP][i+FETCH_UP] ); //NOPRE prefetch_VVVV( //NOPRE (su3_vector *)gen_pt[XDOWN][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[YDOWN][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[ZDOWN][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[TDOWN][i+FETCH_UP] ); #ifndef NO_LONG_LINKS //NOPRE long4 = &(t_longlink[4*(i+FETCH_UP)]); //NOPRE prefetch_4MVVVV( //NOPRE long4, //NOPRE (su3_vector *)gen_pt[X3UP][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[Y3UP][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[Z3UP][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[T3UP][i+FETCH_UP] ); //NOPRE prefetch_VVVV( //NOPRE (su3_vector *)gen_pt[X3DOWN][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[Y3DOWN][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[Z3DOWN][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[T3DOWN][i+FETCH_UP] ); #endif //NOPRE } fat4 = &(t_fatlink[4*i]); mult_su3_mat_vec_sum_4dir( fat4, (su3_vector *)gen_pt[XUP][i], (su3_vector *)gen_pt[YUP][i], (su3_vector *)gen_pt[ZUP][i], (su3_vector *)gen_pt[TUP][i], &(dest[i]) ); #ifndef NO_LONG_LINKS long4 = &(t_longlink[4*i]); mult_su3_mat_vec_sum_4dir( long4, (su3_vector *)gen_pt[X3UP][i], (su3_vector *)gen_pt[Y3UP][i], (su3_vector *)gen_pt[Z3UP][i], (su3_vector *)gen_pt[T3UP][i], &(temp[8][i])); #endif } END_LOOP_OMP /* Wait gathers from negative directions, accumulate (negative) */ /* and the same for the negative 3-rd neighbours */ for(dir=XUP; dir<=TUP; dir++){ wait_gather(tag[OPP_DIR(dir)]); } for(dir=X3UP; dir<=T3UP; dir++){ wait_gather(tag[OPP_3_DIR(dir)]); } FORSOMEPARITYDOMAIN_OMP(i,s,parity, ){ //NOPRE if( i < loopend-FETCH_UP ){ //NOPRE prefetch_VVVVV( //NOPRE &(dest[i+FETCH_UP]), //NOPRE (su3_vector *)gen_pt[XDOWN][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[YDOWN][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[ZDOWN][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[TDOWN][i+FETCH_UP] ); //NOPRE prefetch_VVVVV( //NOPRE &(temp[8][i+FETCH_UP]), //NOPRE (su3_vector *)gen_pt[X3DOWN][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[Y3DOWN][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[Z3DOWN][i+FETCH_UP], //NOPRE (su3_vector *)gen_pt[T3DOWN][i+FETCH_UP] ); //NOPRE } sub_four_su3_vecs( &(dest[i]), (su3_vector *)(gen_pt[XDOWN][i]), (su3_vector *)(gen_pt[YDOWN][i]), (su3_vector *)(gen_pt[ZDOWN][i]), (su3_vector *)(gen_pt[TDOWN][i]) ); sub_four_su3_vecs( &(temp[8][i]), (su3_vector *)(gen_pt[X3DOWN][i]), (su3_vector *)(gen_pt[Y3DOWN][i]), (su3_vector *)(gen_pt[Z3DOWN][i]), (su3_vector *)(gen_pt[T3DOWN][i]) ); /* Now need to add these things together */ add_su3_vector(&(dest[i]), &(temp[8][i]),&(dest[i])); } END_LOOP_OMP }
c_scalar_mult_sum_mat(&(Lambda[j]), &(in[iter]), &(plaq_src[5][i])); iter++; // 0, 3 --> 2 c_scalar_mult_sum_mat(&(Lambda[j]), &(in[iter]), &(src[i].Fplaq[2])); iter++; // 1, 2 --> 4 c_scalar_mult_sum_mat(&(Lambda[j]), &(in[iter]), &(plaq_src[4][i])); iter++; } } // Gather plaq_src[7] (2, 3) from x - 0 - 1 (gather path 23), // plaq_src[5] (1, 3) from x - 0 - 2 (gather path 31) // and plaq_src[4] (1, 2) from x - 0 - 3 (gather path 35) mtag0 = start_gather_field(plaq_src[7], sizeof(matrix), 23, EVENANDODD, gen_pt[0]); mtag1 = start_gather_field(plaq_src[5], sizeof(matrix), 31, EVENANDODD, gen_pt[1]); mtag2 = start_gather_field(plaq_src[4], sizeof(matrix), 35, EVENANDODD, gen_pt[2]); wait_gather(mtag0); wait_gather(mtag1); wait_gather(mtag2); FORALLSITES(i, s) { mat_copy((matrix *)(gen_pt[0][i]), &(src[i].Fplaq[7])); // 2, 3 mat_copy((matrix *)(gen_pt[1][i]), &(src[i].Fplaq[5])); // 1, 3 mat_copy((matrix *)(gen_pt[2][i]), &(src[i].Fplaq[4])); // 1, 2 } cleanup_gather(mtag0); cleanup_gather(mtag1);
void d_plaquette_field_hist(su3_matrix **U_field, int Npowers, int *Nhist, double **hist, double **hist_bounds, double *ss_plaq, double *st_plaq) { /* su3mat is scratch space of size su3_matrix */ su3_matrix *su3mat; register int i,dir1,dir2; register int ipower,ihist; register site *s; register su3_matrix *m1,*m4; double *plaq_power, *step_hist; su3_matrix mtmp; double ss_sum,st_sum; double rtrace, rtrace3; msg_tag *mtag0,*mtag1; ss_sum = st_sum = 0.0; #ifdef HISQ_DUMP_PLAQ_INTO_FILE FILE *fp; char plaq_file_name[300]; #endif /* HISQ_DUMP_PLAQ_INTO_FILE */ su3mat = (su3_matrix *)malloc(sizeof(su3_matrix)*sites_on_node); if(su3mat == NULL) { printf("plaquette: can't malloc su3mat\n"); fflush(stdout); terminate(1); } /* zero out the histogram */ for(ipower=0;ipower<Npowers;ipower++) { for(ihist=0;ihist<Nhist[ipower];ihist++) { hist[ipower][ihist]=0.0; } } /* array with powers of (3-plaquette) */ plaq_power=(double*)malloc(sizeof(double)*Npowers); /* array with step sizes */ step_hist=(double*)malloc(sizeof(double)*Npowers); for(ipower=0;ipower<Npowers;ipower++) { step_hist[ipower]= (hist_bounds[ipower][1]-hist_bounds[ipower][0])/Nhist[ipower]; } #ifdef HISQ_DUMP_PLAQ_INTO_FILE sprintf( plaq_file_name, "plaq_W_node%04d.dat", this_node ); fp = fopen( plaq_file_name, "wt" ); #endif /* HISQ_DUMP_PLAQ_INTO_FILE */ for(dir1=YUP;dir1<=TUP;dir1++){ for(dir2=XUP;dir2<dir1;dir2++){ mtag0 = start_gather_field( U_field[dir2], sizeof(su3_matrix), dir1, EVENANDODD, gen_pt[0] ); mtag1 = start_gather_field( U_field[dir1], sizeof(su3_matrix), dir2, EVENANDODD, gen_pt[1] ); FORALLSITES(i,s){ m1 = &(U_field[dir1][i]); m4 = &(U_field[dir2][i]); mult_su3_an(m4,m1,&su3mat[i]); } wait_gather(mtag0); wait_gather(mtag1); FORALLSITES(i,s){ mult_su3_nn( &su3mat[i], (su3_matrix *)(gen_pt[0][i]), &mtmp); if(dir1==TUP ) { rtrace = (double) realtrace_su3((su3_matrix *)(gen_pt[1][i]),&mtmp); st_sum += rtrace; } else { rtrace = (double) realtrace_su3((su3_matrix *)(gen_pt[1][i]),&mtmp); ss_sum += rtrace; } // printf("Plaq i=%d, dir1=%d, dir2=%d: %f %f\n", // i,dir1,dir2,rtrace_s,rtrace_t); #ifdef HISQ_DUMP_PLAQ_INTO_FILE fprintf( fp, "%18.12g\n", rtrace ); #endif /* HISQ_DUMP_PLAQ_INTO_FILE */ /* powers of (3-plaquette) */ rtrace3=3.0-rtrace; plaq_power[0]=rtrace3; for(ipower=1;ipower<Npowers;ipower++) { plaq_power[ipower]=plaq_power[ipower-1]*rtrace3; } /* find histogram entry */ for(ipower=0;ipower<Npowers;ipower++) { if( (plaq_power[ipower]>hist_bounds[ipower][0]) && (plaq_power[ipower]<hist_bounds[ipower][1]) ) { ihist=(int)( (plaq_power[ipower]-hist_bounds[ipower][0])/ step_hist[ipower] ); hist[ipower][ihist]+=1.0; } } } cleanup_gather(mtag0); cleanup_gather(mtag1); }
void sym_shift(int dir, field_offset src,field_offset dest) { register int i ; register site *s ; msg_tag *tag[2]; su3_vector *tvec; tvec = (su3_vector *)malloc( sites_on_node*sizeof(su3_vector) ); tag[0] = start_gather_site( src, sizeof(su3_vector), dir, EVENANDODD ,gen_pt[0] ); FORALLSITES(i,s) { mult_adj_su3_mat_vec( &(s->link[dir]), (su3_vector *)F_PT(s,src), &(tvec[i]) ) ; } tag[1] = start_gather_field(tvec, sizeof(su3_vector), OPP_DIR(dir), EVENANDODD ,gen_pt[1] ); wait_gather(tag[0]); FORALLSITES(i,s) { mult_su3_mat_vec( &(s->link[dir]), (su3_vector *)gen_pt[0][i], (su3_vector *)F_PT(s,dest) ) ; } wait_gather(tag[1]); FORALLSITES(i,s) { add_su3_vector( (su3_vector *)F_PT(s,dest), (su3_vector *)gen_pt[1][i], (su3_vector *)F_PT(s,dest) ) ; } /* Now devide by 2 eq. (4.2b) of Golderman's Meson paper*/ FORALLSITES(i,s) {
} /**cleanup_gather(mtag0); Use same gather in next plaquette**/ cleanup_gather(mtag1); /* Plaquette in -dir0 +dir1 direction */ /**mtag0 = start_gather_site( LINK_OFFSET(dir0), sizeof(su3_matrix), dir1, EVENANDODD, gen_pt[0] ); wait_gather(mtag0); Already gathered above**/ FORALLSITES(i,s){ mult_su3_an( &LINK(dir1), &LINK(dir0), &tmat1 ); mult_su3_an( (su3_matrix *)(gen_pt[0][i]), &tmat1, &temp1[i] ); } mtag1 = start_gather_field( temp1, sizeof(su3_matrix), OPP_DIR(dir0), EVENANDODD, gen_pt[1] ); wait_gather(mtag1); FORALLSITES(i,s){ mult_su3_nn( &LINK(dir1), (su3_matrix *)(gen_pt[1][i]), &tmat1 ); su3_adjoint( &tmat1, &tmat2 ); add_su3_matrix( &FIELD_STRENGTH(component), &tmat1, &FIELD_STRENGTH(component) ); sub_su3_matrix( &FIELD_STRENGTH(component), &tmat2, &FIELD_STRENGTH(component) ); } cleanup_gather(mtag0); cleanup_gather(mtag1); /* Plaquette in -dir0 -dir1 direction */ mtag0 = start_gather_site( LINK_OFFSET(dir0), sizeof(su3_matrix),