void load_config_from_file(su3 **in, char * filename) { int x, mu; su3 ** temp_su3; su3 * tsu3; temp_su3 = (su3**)calloc(VOLUME, sizeof(su3*)); tsu3 = (su3*)calloc(4*VOLUME, sizeof(su3)); temp_su3[0] = tsu3; for(x = 0; x < VOLUME; x++) { temp_su3[x] = temp_su3[x]+4; } for(x = 0; x < VOLUME; x++) { for(mu = 0; mu < 4; mu++) { _su3_assign(temp_su3[x][mu], g_gauge_field[x][mu]); } } read_gauge_field(filename); for(x = 0; x < VOLUME; x++) { for(mu = 0; mu < 4; mu++) { _su3_assign((in[x][mu]), g_gauge_field[x][mu]); _su3_assign(g_gauge_field[x][mu], temp_su3[x][mu]); } } free(temp_su3); free(tsu3); }
/* THINK OF PARALLELIZATION (RAND!!!)*/ void copy_gauge_field (su3 ** to, su3 ** from) { for (int ix = 0; ix < VOLUME; ix++) { _su3_assign(to[ix][0], from[ix][0]); _su3_assign(to[ix][1], from[ix][1]); _su3_assign(to[ix][2], from[ix][2]); _su3_assign(to[ix][3], from[ix][3]); } }
void update_backward_gauge() { int ix=0, kb=0, iy=0; for(ix = 0; ix < VOLUME/2; ix++) { iy = (VOLUME+RAND)/2+ix; kb = g_idn[ g_eo2lexic[iy] ][0]; _su3_assign(g_gauge_field_copy[0][ix][0], g_gauge_field[kb][0]); kb = g_idn[ g_eo2lexic[iy] ][1]; _su3_assign(g_gauge_field_copy[0][ix][1], g_gauge_field[kb][1]); kb = g_idn[ g_eo2lexic[iy] ][2]; _su3_assign(g_gauge_field_copy[0][ix][2], g_gauge_field[kb][2]); kb = g_idn[ g_eo2lexic[iy] ][3]; _su3_assign(g_gauge_field_copy[0][ix][3], g_gauge_field[kb][3]); } for(ix = 0; ix < VOLUME/2; ix++) { kb = g_idn[ g_eo2lexic[ix] ][0]; _su3_assign(g_gauge_field_copy[1][ix][0], g_gauge_field[kb][0]); kb = g_idn[ g_eo2lexic[ix] ][1]; _su3_assign(g_gauge_field_copy[1][ix][1], g_gauge_field[kb][1]); kb = g_idn[ g_eo2lexic[ix] ][2]; _su3_assign(g_gauge_field_copy[1][ix][2], g_gauge_field[kb][2]); kb = g_idn[ g_eo2lexic[ix] ][3]; _su3_assign(g_gauge_field_copy[1][ix][3], g_gauge_field[kb][3]); } g_update_gauge_copy = 0; return; }
int ape_smear(su3_tuple *m_field_out, struct ape_parameters const *params, su3_tuple *m_field_in) { static int initialized = 0; static su3_tuple *buffer; static su3 tmp; double const rho_p = 1 - params->rho; double const rho_s = params->rho / 6.0; if (!initialized) { /* Allocate consecutive memory for both of the buffers upon first instantiation */ buffer = (su3_tuple*)malloc(sizeof(su3_tuple) * VOLUMEPLUSRAND + 1); #if (defined SSE || defined SSE2 || defined SSE3) buffer = (su3_tuple*)(((unsigned long int)(buffer) + ALIGN_BASE) & ~ALIGN_BASE); #endif if (buffer == (su3_tuple*)NULL) return -1; initialized = 1; } /* start of the the stout smearing **/ for(int iter = 0; iter < params->iterations; ++iter) { for (int x = 0; x < VOLUME; ++x) for (int mu = 0; mu < 4; ++mu) { generic_staples(&tmp, x, mu, m_field_in); _real_times_su3_plus_real_times_su3(buffer[x][mu], rho_p, m_field_in[x][mu], rho_s, tmp) reunitarize(&buffer[x][mu]); } for(int x = 0; x < VOLUME; ++x) for(int mu = 0 ; mu < 4; ++mu) { _su3_assign(m_field_out[x][mu], buffer[x][mu]); } generic_exchange(m_field_out, sizeof(su3_tuple)); m_field_in = m_field_out; /* Prepare for next iteration */ } return(0); }
/* Method based on Givens' rotations, as used by Urs Wenger */ void reunitarize(su3 *omega) { static su3 w, rot, tmp; static double trace_old, trace_new; static _Complex double s0, s1; static double scale; _su3_one(w); trace_old = omega->c00 + omega->c11 + omega->c22; for (int iter = 0; iter < 200; ++iter) { /* Givens' rotation 01 */ s0 = omega->c00 + conj(omega->c11); s1 = omega->c01 - conj(omega->c10); scale = 1.0 / sqrt(conj(s0) * s0 + conj(s1) * s1); s0 *= scale; s1 *= scale; /* Projecting */ _su3_one(rot); rot.c00 = s0; rot.c11 = conj(s0); rot.c01 = s1; rot.c10 = -conj(s1); _su3_times_su3(tmp, rot, w); _su3_assign(w, tmp); _su3_times_su3d(tmp, *omega, rot); _su3_assign(*omega, tmp); /* Givens' rotation 12 */ s0 = omega->c11 + conj(omega->c22); s1 = omega->c12 - conj(omega->c21); scale = 1.0 / sqrt(conj(s0) * s0 + conj(s1) * s1); s0 *= scale; s1 *= scale; /* Projecting */ _su3_one(rot); rot.c11 = s0; rot.c22 = conj(s0); rot.c12 = s1; rot.c21 = -conj(s1); _su3_times_su3(tmp, rot, w); _su3_assign(w, tmp); _su3_times_su3d(tmp, *omega, rot); _su3_assign(*omega, tmp); /* Givens' rotation 20 */ s0 = omega->c22 + conj(omega->c00); s1 = omega->c20 - conj(omega->c02); scale = 1.0 / sqrt(conj(s0) * s0 + conj(s1) * s1); s0 *= scale; s1 *= scale; /* Projecting */ _su3_one(rot); rot.c22 = s0; rot.c00 = conj(s0); rot.c20 = s1; rot.c02 = -conj(s1); _su3_times_su3(tmp, rot, w); _su3_assign(w, tmp); _su3_times_su3d(tmp, *omega, rot); _su3_assign(*omega, tmp); trace_new = omega->c00 + omega->c11 + omega->c22; if (trace_new - trace_old < 1e-15) break; trace_old = trace_new; } _su3_assign(*omega, w); }
void update_gauge(double step) { int i,mu; static su3 v,w; su3 *z; static su3adj deriv; su3adj *xm; #ifdef _KOJAK_INST #pragma pomp inst begin(updategauge) #endif if (bc_flag == 0) { /* if PBC */ for(i = 0; i < VOLUME; i++) { for(mu = 0; mu < 4; mu++){ /* moment[i][mu] = h_{i,mu}^{alpha} */ xm=&moment[i][mu]; z=&g_gauge_field[i][mu]; _assign_const_times_mom(deriv, step, *xm); v=restoresu3( exposu3(deriv) ); _su3_times_su3(w, v, *z); _su3_assign(*z, w); } } } else if (bc_flag == 1) { /* if SF bc */ for(i = 0; i < VOLUME; i++) { for(mu = 0; mu < 4; mu++){ if (g_t[i] == 0 && (mu==1 || mu==2 || mu==3)) { /* do not update spatial links at zero boundary */ } else if (g_t[i] == g_Tbsf) { /* do not update all the links at T boundary */ } else { /* update all links in the bulk and the temporal link at zero */ xm=&moment[i][mu]; z=&g_gauge_field[i][mu]; _assign_const_times_mom(deriv, step, *xm); v=restoresu3( exposu3(deriv) ); _su3_times_su3(w, v, *z); _su3_assign(*z, w); } } } } #ifdef MPI /* for parallelization */ xchange_gauge(); #endif /* * The backward copy of the gauge field * is not updated here! */ g_update_gauge_copy = 1; g_update_gauge_energy = 1; g_update_rectangle_energy = 1; return; #ifdef _KOJAK_INST #pragma pomp inst end(updategauge) #endif }
void sw_term(su3 ** const gf, const double kappa, const double c_sw) { int k,l; int x,xpk,xpl,xmk,xml,xpkml,xplmk,xmkml; su3 *w1,*w2,*w3,*w4; double ka_csw_8 = kappa*c_sw/8.; static su3 v1,v2,plaq; static su3 fkl[4][4]; static su3 magnetic[4],electric[4]; static su3 aux; /* compute the clover-leave */ /* l __ __ | | | | |__| |__| __ __ | | | | |__| |__| k */ for(x = 0; x < VOLUME; x++) { for(k = 0; k < 4; k++) { for(l = k+1; l < 4; l++) { xpk=g_iup[x][k]; xpl=g_iup[x][l]; xmk=g_idn[x][k]; xml=g_idn[x][l]; xpkml=g_idn[xpk][l]; xplmk=g_idn[xpl][k]; xmkml=g_idn[xml][k]; w1=&gf[x][k]; w2=&gf[xpk][l]; w3=&gf[xpl][k]; w4=&gf[x][l]; _su3_times_su3(v1,*w1,*w2); _su3_times_su3(v2,*w4,*w3); _su3_times_su3d(plaq,v1,v2); w1=&gf[x][l]; w2=&gf[xplmk][k]; w3=&gf[xmk][l]; w4=&gf[xmk][k]; _su3_times_su3d(v1,*w1,*w2); _su3d_times_su3(v2,*w3,*w4); _su3_times_su3_acc(plaq,v1,v2); w1=&gf[xmk][k]; w2=&gf[xmkml][l]; w3=&gf[xmkml][k]; w4=&gf[xml][l]; _su3_times_su3(v1,*w2,*w1); _su3_times_su3(v2,*w3,*w4); _su3d_times_su3_acc(plaq,v1,v2); w1=&gf[xml][l]; w2=&gf[xml][k]; w3=&gf[xpkml][l]; w4=&gf[x][k]; _su3d_times_su3(v1,*w1,*w2); _su3_times_su3d(v2,*w3,*w4); _su3_times_su3_acc(plaq,v1,v2); _su3_dagger(v2,plaq); _su3_minus_su3(fkl[k][l],plaq,v2); } } // this is the one in flavour and colour space // twisted mass term is treated in clover, sw_inv and // clover_gamma5 _su3_one(sw[x][0][0]); _su3_one(sw[x][2][0]); _su3_one(sw[x][0][1]); _su3_one(sw[x][2][1]); for(k = 1; k < 4; k++) { _su3_assign(electric[k], fkl[0][k]); } _su3_assign(magnetic[1], fkl[2][3]); _su3_minus_assign(magnetic[2], fkl[1][3]); _su3_assign(magnetic[3], fkl[1][2]); /* upper left block 6x6 matrix */ _itimes_su3_minus_su3(aux,electric[3],magnetic[3]); _su3_refac_acc(sw[x][0][0],ka_csw_8,aux); _itimes_su3_minus_su3(aux,electric[1],magnetic[1]); _su3_minus_su3(v2,electric[2],magnetic[2]); _su3_acc(aux,v2); _real_times_su3(sw[x][1][0],ka_csw_8,aux); _itimes_su3_minus_su3(aux,magnetic[3],electric[3]); _su3_refac_acc(sw[x][2][0],ka_csw_8,aux); /* lower right block 6x6 matrix */ _itimes_su3_plus_su3(aux,electric[3],magnetic[3]); _su3_refac_acc(sw[x][0][1],(-ka_csw_8),aux); _itimes_su3_plus_su3(aux,electric[1],magnetic[1]); _su3_plus_su3(v2,electric[2],magnetic[2]); _su3_acc(aux,v2); _real_times_su3(sw[x][1][1],(-ka_csw_8),aux); _itimes_su3_plus_su3(aux,magnetic[3],electric[3]); _su3_refac_acc(sw[x][2][1],ka_csw_8,aux); } return; }
int update_tm(double *plaquette_energy, double *rectangle_energy, char * filename, const int return_check, const int acctest, const int traj_counter) { su3 *v, *w; static int ini_g_tmp = 0; int accept, i=0, j=0, iostatus=0; double yy[1]; double dh, expmdh, ret_dh=0., ret_gauge_diff=0., tmp; double atime=0., etime=0.; double ks = 0., kc = 0., ds, tr, ts, tt; char tmp_filename[50]; /* Energy corresponding to the Gauge part */ double new_plaquette_energy=0., new_rectangle_energy = 0.; /* Energy corresponding to the Momenta part */ double enep=0., enepx=0., ret_enep = 0.; /* Energy corresponding to the pseudo fermion part(s) */ FILE * datafile=NULL, * ret_check_file=NULL; hamiltonian_field_t hf; paramsXlfInfo *xlfInfo; hf.gaugefield = g_gauge_field; hf.momenta = moment; hf.derivative = df0; hf.update_gauge_copy = g_update_gauge_copy; hf.update_gauge_energy = g_update_gauge_energy; hf.update_rectangle_energy = g_update_rectangle_energy; hf.traj_counter = traj_counter; integrator_set_fields(&hf); strcpy(tmp_filename, ".conf.tmp"); if(ini_g_tmp == 0) { ini_g_tmp = init_gauge_tmp(VOLUME); if(ini_g_tmp != 0) { exit(-1); } ini_g_tmp = 1; } atime = gettime(); /* * here the momentum and spinor fields are initialized * and their respective actions are calculated */ /* * copy the gauge field to gauge_tmp */ #ifdef OMP #pragma omp parallel for private(w,v) #endif for(int ix=0;ix<VOLUME;ix++) { for(int mu=0;mu<4;mu++) { v=&hf.gaugefield[ix][mu]; w=&gauge_tmp[ix][mu]; _su3_assign(*w,*v); } } /* heatbath for all monomials */ for(i = 0; i < Integrator.no_timescales; i++) { for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) { monomial_list[ Integrator.mnls_per_ts[i][j] ].hbfunction(Integrator.mnls_per_ts[i][j], &hf); } } if(Integrator.monitor_forces) monitor_forces(&hf); /* initialize the momenta */ enep = random_su3adj_field(reproduce_randomnumber_flag, hf.momenta); g_sloppy_precision = 1; /* run the trajectory */ if(Integrator.n_int[Integrator.no_timescales-1] > 0) { Integrator.integrate[Integrator.no_timescales-1](Integrator.tau, Integrator.no_timescales-1, 1); } g_sloppy_precision = 0; /* compute the final energy contributions for all monomials */ dh = 0.; for(i = 0; i < Integrator.no_timescales; i++) { for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) { dh += monomial_list[ Integrator.mnls_per_ts[i][j] ].accfunction(Integrator.mnls_per_ts[i][j], &hf); } } enepx = moment_energy(hf.momenta); if (!bc_flag) { /* if PBC */ new_plaquette_energy = measure_gauge_action( (const su3**) hf.gaugefield); if(g_rgi_C1 > 0. || g_rgi_C1 < 0.) { new_rectangle_energy = measure_rectangles( (const su3**) hf.gaugefield); } } if(g_proc_id == 0 && g_debug_level > 3) printf("called moment_energy: dh = %1.10e\n", (enepx - enep)); /* Compute the energy difference */ dh = dh + (enepx - enep); if(g_proc_id == 0 && g_debug_level > 3) { printf("called momenta_acc dH = %e\n", (enepx - enep)); } expmdh = exp(-dh); /* the random number is only taken at node zero and then distributed to the other sites */ ranlxd(yy,1); if(g_proc_id==0) { #ifdef MPI for(i = 1; i < g_nproc; i++) { MPI_Send(&yy[0], 1, MPI_DOUBLE, i, 31, MPI_COMM_WORLD); } #endif } #ifdef MPI else{ MPI_Recv(&yy[0], 1, MPI_DOUBLE, 0, 31, MPI_COMM_WORLD, &status); } #endif accept = (!acctest | (expmdh > yy[0])); if(g_proc_id == 0) { fprintf(stdout, "# Trajectory is %saccepted.\n", (accept ? "" : "not ")); } /* Here a reversibility test is performed */ /* The trajectory is integrated back */ if(return_check) { if(g_proc_id == 0) { fprintf(stdout, "# Performing reversibility check.\n"); } if(accept) { /* save gauge file to disk before performing reversibility check */ xlfInfo = construct_paramsXlfInfo((*plaquette_energy)/(6.*VOLUME*g_nproc), -1); // Should write this to temporary file first, and then check if(g_proc_id == 0 && g_debug_level > 0) { fprintf(stdout, "# Writing gauge field to file %s.\n", tmp_filename); } if((iostatus = write_gauge_field( tmp_filename, 64, xlfInfo) != 0 )) { /* Writing failed directly */ fprintf(stderr, "Error %d while writing gauge field to %s\nAborting...\n", iostatus, tmp_filename); exit(-2); } /* There is double writing of the gauge field, also in hmc_tm.c in this case */ /* No reading back check needed here, as reading back is done further down */ if(g_proc_id == 0 && g_debug_level > 0) { fprintf(stdout, "# Writing done.\n"); } free(xlfInfo); } g_sloppy_precision = 1; /* run the trajectory back */ Integrator.integrate[Integrator.no_timescales-1](-Integrator.tau, Integrator.no_timescales-1, 1); g_sloppy_precision = 0; /* compute the energy contributions from the pseudo-fermions */ ret_dh = 0.; for(i = 0; i < Integrator.no_timescales; i++) { for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) { ret_dh += monomial_list[ Integrator.mnls_per_ts[i][j] ].accfunction(Integrator.mnls_per_ts[i][j], &hf); } } ret_enep = moment_energy(hf.momenta); /* Compute the energy difference */ ret_dh += ret_enep - enep ; /* Compute Differences in the fields */ ks = 0.; kc = 0.; #ifdef OMP #pragma omp parallel private(w,v,tt,tr,ts,ds,ks,kc) { int thread_num = omp_get_thread_num(); #endif su3 ALIGN v0; #ifdef OMP #pragma omp for #endif for(int ix = 0; ix < VOLUME; ++ix) { for(int mu = 0; mu < 4; ++mu) { v=&hf.gaugefield[ix][mu]; w=&gauge_tmp[ix][mu]; _su3_minus_su3(v0, *v, *w); _su3_square_norm(ds, v0); tr = sqrt(ds) + kc; ts = tr + ks; tt = ts-ks; ks = ts; kc = tr-tt; } } kc=ks+kc; #ifdef OMP g_omp_acc_re[thread_num] = kc; } /* OpenMP parallel section closing brace */ /* sum up contributions from thread-local kahan summations */ for(int k = 0; k < omp_num_threads; ++k) ret_gauge_diff += g_omp_acc_re[k]; #else ret_gauge_diff = kc; #endif #ifdef MPI tmp = ret_gauge_diff; MPI_Reduce(&tmp, &ret_gauge_diff, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); #endif /* compute the total H */ tmp = enep; for(i = 0; i < Integrator.no_timescales; i++) { for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) { tmp += monomial_list[ Integrator.mnls_per_ts[i][j] ].energy0; } } /* Output */ if(g_proc_id == 0) { ret_check_file = fopen("return_check.data","a"); fprintf(ret_check_file,"ddh = %1.4e ddU= %1.4e ddh/H = %1.4e\n", ret_dh, ret_gauge_diff/4./((double)(VOLUME*g_nproc))/3., ret_dh/tmp); fclose(ret_check_file); } if(accept) { /* Read back gauge field */ if(g_proc_id == 0 && g_debug_level > 0) { fprintf(stdout, "# Trying to read gauge field from file %s.\n", tmp_filename); } if((iostatus = read_gauge_field(tmp_filename) != 0)) { fprintf(stderr, "Error %d while reading gauge field from %s\nAborting...\n", iostatus, tmp_filename); exit(-2); } if(g_proc_id == 0 && g_debug_level > 0) { fprintf(stdout, "# Reading done.\n"); } } if(g_proc_id == 0) { fprintf(stdout, "# Reversibility check done.\n"); } } /* end of reversibility check */ if(accept) { *plaquette_energy = new_plaquette_energy; *rectangle_energy = new_rectangle_energy; /* put the links back to SU(3) group */ if (!bc_flag) { /* periodic boundary conditions */ #ifdef OMP #pragma omp parallel for private(v) #endif for(int ix=0;ix<VOLUME;ix++) { for(int mu=0;mu<4;mu++) { v=&hf.gaugefield[ix][mu]; restoresu3_in_place(v); } } } } else { /* reject: copy gauge_tmp to hf.gaugefield */ #ifdef OMP #pragma omp parallel for private(w) private(v) #endif for(int ix=0;ix<VOLUME;ix++) { for(int mu=0;mu<4;mu++){ v=&hf.gaugefield[ix][mu]; w=&gauge_tmp[ix][mu]; _su3_assign(*v,*w); } } } hf.update_gauge_copy = 1; g_update_gauge_copy = 1; hf.update_gauge_energy = 1; g_update_gauge_energy = 1; hf.update_rectangle_energy = 1; g_update_rectangle_energy = 1; #ifdef MPI xchange_gauge(hf.gaugefield); #endif etime=gettime(); /* printing data in the .data file */ if(g_proc_id==0) { datafile = fopen(filename, "a"); if (!bc_flag) { /* if Periodic Boundary Conditions */ fprintf(datafile, "%.8d %14.12f %14.12f %e ", traj_counter, (*plaquette_energy)/(6.*VOLUME*g_nproc), dh, expmdh); } for(i = 0; i < Integrator.no_timescales; i++) { for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) { if(monomial_list[ Integrator.mnls_per_ts[i][j] ].type != GAUGE && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != SFGAUGE && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != NDPOLY && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != NDCLOVER && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != CLOVERNDTRLOG && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != CLOVERTRLOG ) { fprintf(datafile,"%d %d ", monomial_list[ Integrator.mnls_per_ts[i][j] ].iter0, monomial_list[ Integrator.mnls_per_ts[i][j] ].iter1); } } } fprintf(datafile, "%d %e", accept, etime-atime); if(g_rgi_C1 > 0. || g_rgi_C1 < 0) { fprintf(datafile, " %e", (*rectangle_energy)/(12*VOLUME*g_nproc)); } fprintf(datafile, "\n"); fflush(datafile); fclose(datafile); } return(accept); }
void polyakov_loop(complex * pl_, const int mu) { static int i0, i1, i2, i3, L0, L1, L2, L3, ixyzt, ixyzt_up; static double vol; static su3 tmp, tmp2; su3 *v = NULL , *w = NULL; static complex pl; /* For the Kahan summation:*/ #ifdef MPI static complex pls; #endif static complex ks, kc, tr, ts, tt; kc.re=0.0; ks.re=0.0; kc.im=0.0; ks.im=0.0; /* For the moment only the Polyakov loop in y- and z-direction are implemented, since they are not affected by parallelisation: */ if(mu == 0 || mu == 1 || mu > 3) { fprintf(stderr, "Wrong parameter for Polyakov loop calculation in polyakov_loop.c:\n"); fprintf(stderr, "Only direction %d and %d are allowed.\n",2,3); fprintf(stderr, "Actual value is %d! Aborting...\n",mu); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 10); MPI_Finalize(); #endif exit(0); } L0=T; L1=LX; if(mu==2) { L2=LZ; L3=LY; } else { L2=LY; L3=LZ; } /* loop over the spatial sites: */ for (i0=0; i0 < L0; i0++) { for (i1=0; i1 < L1; i1++) { for (i2=0; i2 < L2; i2++) { /* at each spatial site multiply the links in temporal direction: */ i3 = 0; /* get the site index: */ if(mu==2) { ixyzt = g_ipt[i0][i1][i3][i2]; } else { ixyzt = g_ipt[i0][i1][i2][i3]; } /* and its neigbour in direction mu: */ ixyzt_up = g_iup[ixyzt][mu]; /* Get the links and multiply them: ixyzt --> ixyzt_up --> */ v = &g_gauge_field[ixyzt][mu]; w = &g_gauge_field[ixyzt_up][mu]; _su3_times_su3(tmp, *v, *w); /* now start the loop over indices in mu-direction: */ for (i3=1; i3 < L3-2; i3++) { /* store the current result in v:*/ _su3_assign(tmp2,tmp); /* get the next site index: */ ixyzt_up = g_iup[ixyzt_up][mu]; /* and the corresponding link matrix: */ w = &g_gauge_field[ixyzt_up][mu]; /* and multiply them: */ _su3_times_su3(tmp, tmp2, *w); } /* for the last link we directly take the complex trace: */ ixyzt_up = g_iup[ixyzt_up][mu]; w = &g_gauge_field[ixyzt_up][mu]; _trace_su3_times_su3(pl,tmp,*w); /* printf("i0=%d, i1=%d, i2=%d, pl=(%e,%e)\n",i0,i1,i2,pl.re,pl.im);*/ /* Kahan summation for real and imaginary part: */ tr.re=pl.re+kc.re; ts.re=tr.re+ks.re; tt.re=ts.re-ks.re; ks.re=ts.re; kc.im=tr.im-tt.im; tr.im=pl.im+kc.im; ts.im=tr.im+ks.im; tt.im=ts.im-ks.im; ks.im=ts.im; kc.im=tr.im-tt.im; } } } /* Finish Kahan summation: */ /* (Division by 3 is for normalising the colour trace.) */ pl.re=(kc.re+ks.re)/3.0; pl.im=(kc.im+ks.im)/3.0; /* printf("Polyakov loop before normalisation, pl.re=%e, pl.im=%e\n",pl.re,pl.im);*/ /* Collect the results and return:*/ #ifdef MPI MPI_Allreduce(&pl, &pls, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); pl=pls; #endif /* Normalise, i.e. divide by the number of loops: */ vol = (double) L0*L1*L2*g_nproc_t*g_nproc_x; /* printf("L0*L1*L2=%d, vol=%e\n",L0*L1*L2,vol); */ _div_real(pl,pl,vol); /* printf("Polyakov loop after normalisation, pl.re=%e, pl.im=%e\n",pl.re,pl.im) */; /* return pl; */ (*pl_).re = pl.re; (*pl_).im = pl.im; }
int polyakov_loop_0(const int nstore, complex *pl) { int i0, i1, i2, i3, ixyz, ixyzt, ixyzt_up, VOL3, VOLUME3; int L0, L1, L2, L3; double retime, ratime; complex pl_tmp, tr, ts, tt, kc, ks; su3 *tmp_loc = NULL, tmp, tmp2; su3 *v = NULL, *w = NULL; FILE *ofs = NULL; #ifdef MPI int iproc; MPI_Status status; su3 *tmp_nnb = NULL; #endif L0 = LX; /* enable transparent comparison with existing Polyakov routines */ L1 = LY; /* in spatial directions */ L2 = LZ; L3 = T; /************** * local part * **************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock()/(double)(CLOCKS_PER_SEC); #endif VOL3 = L0*L1*L2; tmp_loc = (su3 *)calloc(VOL3, sizeof(su3)); for(i0 = 0; i0 < LX; i0++) { for(i1 = 0; i1 < LY; i1++) { for(i2 = 0; i2 < LZ; i2++) { ixyz = (i2 * L1 + i1) * L0 + i0; i3 = 0; ixyzt = g_ipt[i3][i0][i1][i2]; ixyzt_up = g_iup[ixyzt][0]; v = &g_gauge_field[ixyzt][0]; w = &g_gauge_field[ixyzt_up][0]; _su3_times_su3(tmp, *v, *w); for(i3 = 1; i3 < L3-1; i3++) { _su3_assign(tmp2,tmp); ixyzt_up = g_iup[ixyzt_up][0]; w = &g_gauge_field[ixyzt_up][0]; _su3_times_su3(tmp, tmp2, *w); } _su3_assign(tmp_loc[ixyz],tmp); } } } #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock()/(double)(CLOCKS_PER_SEC); #endif if(g_debug_level>0) { fprintf(stdout, "[polyakov_loop_0 | %3d] time for calculating local part = %e seconds\n", g_cart_id, retime-ratime); } /********************************************************************************/ #ifdef MPI /*************** * global part * ***************/ ratime = MPI_Wtime(); /* (1) collect contributions from different time slices to nodes with t-coord. 0 */ tmp_nnb = (su3*)calloc(VOL3, sizeof(su3)); /* contains the next-neighbour-part*/ /* note: in the following loop t is taken as the time coordinate of nodes */ for(iproc = g_nproc_t-1; iproc > 0; iproc--) { if(g_proc_coords[0] == iproc) /* node is in the {t=iproc}-hyperplane */ { MPI_Send(tmp_loc, VOL3, mpi_su3, g_nb_t_dn, 100+g_cart_id, g_cart_grid); /* send tmp_loc from {t=iproc}-hyperplane to {t=iproc-1}-hyperplane */ } if(g_proc_coords[0] == iproc-1) { /* so the node is right below the sending one in time(= 0)-direction */ MPI_Recv(tmp_nnb, VOL3, mpi_su3, g_nb_t_up, 100+g_nb_t_up, g_cart_grid, &status); /* receive tmp_loc from the tmp_loc from the {t=my_own_t_index+1}-hyperplane */ for(ixyz=0; ixyz<VOL3; ixyz++) { /* multiply all matrices in tmp_nbb to my own in tmp_loc from the right */ v = tmp_loc+ixyz; w = tmp_nnb+ixyz; _su3_assign(tmp2, *v); _su3_times_su3(*v, tmp2, *w); } } /* if iproc==0 then the node with g_proc_coords[0]=0 will finally contain the product of all contributions from all {t=const.}-planes */ } retime = MPI_Wtime(); if(g_proc_id==0 && g_debug_level>0) { fprintf(stdout, "[polyakov_loop_0 | %3d] time for calculating global part = %e seconds\n", g_cart_id, retime-ratime); } /* (2) nodes with time coordinate 0 sum traces over local spatial points */ #endif _complex_zero(pl_tmp); /* pl_tmp.re = 0.0; pl_tmp.im = 0.0; */ if(g_proc_coords[0] == 0) { kc.re = 0.0; kc.im = 0.0; ks.re = 0.0; ks.im = 0.0; for(ixyz = 0; ixyz < VOL3; ixyz++) /* Kahan-summation of traces */ { pl_tmp.re = (tmp_loc[ixyz]).c00.re + (tmp_loc[ixyz]).c11.re + (tmp_loc[ixyz]).c22.re; pl_tmp.im = (tmp_loc[ixyz]).c00.im + (tmp_loc[ixyz]).c11.im + (tmp_loc[ixyz]).c22.im; tr.re=pl_tmp.re+kc.re; ts.re=tr.re+ks.re; tt.re=ts.re-ks.re; ks.re=ts.re; kc.re=tr.re-tt.re; tr.im=pl_tmp.im+kc.im; ts.im=tr.im+ks.im; tt.im=ts.im-ks.im; ks.im=ts.im; kc.im=tr.im-tt.im; } pl_tmp.re = ks.re + kc.re; pl_tmp.im = ks.im + kc.im; } #ifdef MPI /* (3) sum over all contributions from all nodes (also nodes with pl_tmp=0; apparently the easiest way) */ MPI_Reduce(&pl_tmp, pl, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, g_cart_grid); /* MPI_Reduce(&(pl_tmp.re), &((*pl).re), 1, MPI_DOUBLE, MPI_SUM, 0, g_cart_grid); */ /* MPI_Reduce(&(pl_tmp.im), &((*pl).im), 1, MPI_DOUBLE, MPI_SUM, 0, g_cart_grid); */ #else (*pl).re = pl_tmp.re; (*pl).im = pl_tmp.im; #endif /* normalization */ VOLUME3 = VOL3; if(g_proc_id == 0) { VOLUME3 = VOLUME3 * g_nproc_x*g_nproc_y*g_nproc_z; (*pl).re /= 3*VOLUME3; (*pl).im /= 3*VOLUME3; } /* write result to file */ if (g_proc_id == 0) { if (nstore == 0) { ofs = fopen("polyakov_loop_0.dat","w"); } else { ofs = fopen("polyakov_loop_0.dat","a"); } fprintf(ofs, "%25.16e\t%25.16e\n", (*pl).re, (*pl).im); fclose(ofs); } #ifdef MPI free(tmp_nnb); #endif free(tmp_loc); return(0); }
void flip_subgroup(int ix, int mu, su3 vv, int i){ static double vv0,vv1,vv2,vv3,aa0,aa1,aa2,aa3; static double aux,norm_vv_sq; static su3 a,w,v; su3 *z; _su3_assign(v,vv); _su3_one(a); z=&g_gauge_field[ix][mu]; _su3_times_su3d(w,*z,v); /* According to Peter's notes ``A Cabibbo-Marinari SU(3)....", eqs. (A.14-A.17) we have */ if(i==1) { vv0 = creal(w.c00) + creal(w.c11); vv3 = -cimag(w.c00) + cimag(w.c11); vv1 = -cimag(w.c01) - cimag(w.c10); vv2 = -creal(w.c01) + creal(w.c10); } else if(i==2) { vv0 = creal(w.c00) + creal(w.c22); vv3 = -cimag(w.c00) + cimag(w.c22); vv1 = -cimag(w.c02) - cimag(w.c20); vv2 = -creal(w.c02) + creal(w.c20); } else { vv0 = creal(w.c11) + creal(w.c22); vv3 = -cimag(w.c11) + cimag(w.c22); vv1 = -cimag(w.c12) - cimag(w.c21); vv2 = -creal(w.c12) + creal(w.c21); } norm_vv_sq= vv0 * vv0 + vv1 * vv1 + vv2 * vv2 + vv3 * vv3; aux= 2.0 * vv0 / norm_vv_sq; aa0 = aux * vv0-1.0; aa1 = aux * vv1; aa2 = aux * vv2; aa3 = aux * vv3; /* aa is embedded in the SU(3) matrix (a) which can be multiplied on the link variable using the su3_type operator * . */ if(i==1) { a.c00 = aa0 + aa3 * I; a.c11 = conj(a.c00); a.c01 = aa2 + aa1 * I; a.c10 = -conj(a.c01); } else if(i==2) { a.c00 = aa0 + aa3 * I; a.c22 = conj(a.c00); a.c02 = aa2 + aa1 * I; a.c20 = -conj(a.c02); } else { a.c11 = aa0 + aa3 * I; a.c22 = conj(a.c11); a.c12 = aa2 + aa1 * I; a.c21 = -conj(a.c12); } _su3_times_su3(w,a,*z); *z=w; }