double calc_rect_plaq( void ) { int x; int x_p_mu; int x_p_nu; int x_p_nu_m_mu; int x_m_mu; int mu; int nu; su3* u_mu_x; su3* u_nu_x_p_mu; su3* u_mu_x_p_nu; su3* u_mu_x_p_nu_m_mu; su3* u_nu_x_m_mu; su3* u_mu_x_m_mu; su3 tmp1; su3 tmp2; su3 tmp3; double tr; double sum = 0; for( x=0; x<VOLUME; x++ ) for( mu=0; mu<4; mu++ ) for( nu=0; nu<4; nu++ ) if( mu != nu ) { x_p_mu = g_iup[ x ][ mu ]; x_p_nu = g_iup[ x ][ nu ]; x_p_nu_m_mu = g_idn[ x_p_nu ][ mu ]; x_m_mu = g_idn[ x ][ mu ]; u_mu_x = &g_gauge_field[ x ][ mu ]; u_nu_x_p_mu = &g_gauge_field[ x_p_mu ][ nu ]; u_mu_x_p_nu = &g_gauge_field[ x_p_nu ][ mu ]; u_mu_x_p_nu_m_mu = &g_gauge_field[ x_p_nu_m_mu ][ mu ]; u_nu_x_m_mu = &g_gauge_field[ x_m_mu ][ nu ]; u_mu_x_m_mu = &g_gauge_field[ x_m_mu ][ mu ]; _su3_times_su3( tmp1, *u_mu_x_p_nu_m_mu, *u_mu_x_p_nu ); _su3_times_su3( tmp2, *u_nu_x_m_mu, tmp1 ); _su3_times_su3( tmp1, *u_mu_x, *u_nu_x_p_mu ); _su3_times_su3( tmp3, *u_mu_x_m_mu, tmp1 ); _trace_su3_times_su3d( tr, tmp2, tmp3 ); sum += tr; } sum /= ((double)3); return sum; }
su3 slow_expon(su3 in, int nterm) { su3 out , out_new , aa , aa_old , aa_scale ; int i ; double fact = 1 ; _su3_one(out) ; aa = in ; aa_old = aa ; _su3_plus_su3(out_new,out,aa) ; out = out_new ; for(i=2 ; i < nterm ; ++i) { _su3_times_su3(aa,in,aa_old); aa_old = aa ; aa_scale = aa ; fact *= i ; scale_su3(&aa_scale, 1.0/fact ) ; _su3_plus_su3(out_new,out,aa_scale) ; out = out_new ; } return(out); }
double calc_bulk_sq_plaq( void ) { int x; int x_p_mu; int x_p_nu; int mu; int nu; su3* u_mu_x; su3* u_nu_x_p_mu; su3* u_mu_x_p_nu; su3* u_nu_x; su3 tmp1; su3 tmp2; double tr; double sum = 0; /* None of the forward plaquettes for t=T-1 contribute. */ /* None of the forward plaquettes for t=0 contribute. */ /* Also the space-time square for t=T-2 does not contribute. */ for( x=0; x<VOLUME; x++ ) for( mu=0; mu<4; mu++ ) for( nu=0; nu<mu; nu++ ) if( ( g_t[ x ] != (T-1) ) & ( ( g_t[ x ] != (T-2) ) || ( ( mu != 3 ) && ( nu != 3 ) ) ) & ( g_t[ x ] != 0 ) ) { x_p_mu = g_iup[ x ][ mu ]; x_p_nu = g_iup[ x ][ nu ]; u_mu_x = &g_gauge_field[ x ][ mu ]; u_nu_x_p_mu = &g_gauge_field[ x_p_mu ][ nu ]; u_mu_x_p_nu = &g_gauge_field[ x_p_nu ][ mu ]; u_nu_x = &g_gauge_field[ x ][ nu ]; _su3_times_su3( tmp1, *u_nu_x, *u_mu_x_p_nu ); _su3_times_su3( tmp2, *u_mu_x, *u_nu_x_p_mu ); _trace_su3_times_su3d( tr, tmp1, tmp2 ); sum += tr; } sum /= ((double)3); return sum; }
void apply_inv_gtrafo (su3 ** gfield, su3 * trafofield) { int it, iz, iy, ix; int xpos; int mu; su3 temp1, temp2; if(g_proc_id == 0) { printf("Applying INVERSE gauge transformation..."); } for (it = 0; it < T; it++) { for (ix = 0; ix < LX; ix++) { for (iy = 0; iy < LY; iy++) { for (iz = 0; iz < LZ; iz++) { xpos = g_ipt[it][ix][iy][iz]; for (mu = 0; mu < 4; mu++) { /* _su3d_times_su3( temp1, trafofield[xpos], gfield[xpos][mu] ); _su3_times_su3( gfield[xpos][mu],temp1, trafofield[ g_iup[xpos][mu] ]); */ /* help = U^{'}_mu(x) g(x+mu)*/ _su3_times_su3( temp1, gfield[xpos][mu], trafofield[ g_iup[xpos][mu]] ); // temp1 = gfield[xpos][mu] * trafofield[ g_iup[xpos][mu] ] /* U_mu(x) <- g^{+}(x) help */ _su3_dagger(temp2, trafofield[xpos] ) // temp2 = trafofield[xpos]_{dagger} _su3_times_su3( gfield[xpos][mu], temp2, temp1); // gfield[xpos][mu] = temp2 * temp1 // = trafofield[xpos]_{dagger} * gfield[xpos][mu] * trafofield[ g_iup[xpos][mu] ] } }}}} if(g_proc_id == 0) { printf("done\n"); } /* update gauge copy fields in the next call to HoppingMatrix */ g_update_gauge_copy = 1; }
double calc_boundary_space_time_sq_plaq( void ) { int x; int x_p_mu; int x_p_nu; int mu; int nu; su3* u_mu_x; su3* u_nu_x_p_mu; su3* u_mu_x_p_nu; su3* u_nu_x; su3 tmp1; su3 tmp2; double tr; double sum = 0; /* The space-time square for t=T-2 contributes. */ /* The space-time square for t=0 contributes. */ for( x=0; x<VOLUME; x++ ) for( mu=0; mu<4; mu++ ) for( nu=0; nu<mu; nu++ ) if( ( ( g_t[ x ] == (T-2) ) & ( ( mu == 3 ) || ( nu == 3 ) ) ) || ( ( g_t[ x ] == 0 ) & ( ( mu == 3 ) || ( nu == 3 ) ) ) ) { x_p_mu = g_iup[ x ][ mu ]; x_p_nu = g_iup[ x ][ nu ]; u_mu_x = &g_gauge_field[ x ][ mu ]; u_nu_x_p_mu = &g_gauge_field[ x_p_mu ][ nu ]; u_mu_x_p_nu = &g_gauge_field[ x_p_nu ][ mu ]; u_nu_x = &g_gauge_field[ x ][ nu ]; _su3_times_su3( tmp1, *u_nu_x, *u_mu_x_p_nu ); _su3_times_su3( tmp2, *u_mu_x, *u_nu_x_p_mu ); _trace_su3_times_su3d( tr, tmp1, tmp2 ); sum += tr; } sum /= ((double)3); return sum; }
double calc_boundary_space_space_sq_plaq( void ) { int x; int x_p_mu; int x_p_nu; int mu; int nu; su3* u_mu_x; su3* u_nu_x_p_mu; su3* u_mu_x_p_nu; su3* u_nu_x; su3 tmp1; su3 tmp2; double tr; double sum = 0; /* We need the space-space plaquettes for t=T-1 and t=0. */ for( x=0; x<VOLUME; x++ ) for( mu=0; mu<4; mu++ ) for( nu=0; nu<mu; nu++ ) if( ( ( g_t[ x ] == (T-1) ) & ( mu != 3 ) & ( nu != 3 ) ) || ( ( g_t[ x ] == 0 ) & ( mu != 3 ) & ( nu != 3 ) ) ) { x_p_mu = g_iup[ x ][ mu ]; x_p_nu = g_iup[ x ][ nu ]; u_mu_x = &g_gauge_field[ x ][ mu ]; u_nu_x_p_mu = &g_gauge_field[ x_p_mu ][ nu ]; u_mu_x_p_nu = &g_gauge_field[ x_p_nu ][ mu ]; u_nu_x = &g_gauge_field[ x ][ nu ]; _su3_times_su3( tmp1, *u_nu_x, *u_mu_x_p_nu ); _su3_times_su3( tmp2, *u_mu_x, *u_nu_x_p_mu ); _trace_su3_times_su3d( tr, tmp1, tmp2 ); sum += tr; } sum /= ((double)3); return sum; }
static #endif void exponent_from_coefficients(su3 *out, _Complex double f0, _Complex double f1, _Complex double f2, su3 const *in) { su3 ALIGN tmp; _complex_times_su3(tmp, f2, *in); _su3_add_equals_complex_identity(tmp, f1); _su3_times_su3(*out, tmp, *in); _su3_add_equals_complex_identity(*out, f0); }
double measure_gauge_action(su3 ** const gf) { int ix,ix1,ix2,mu1,mu2; static su3 pr1,pr2; su3 *v,*w; static double ga,ac; #ifdef MPI static double gas; #endif static double ks,kc,tr,ts,tt; if(g_update_gauge_energy) { kc=0.0; ks=0.0; for (ix=0;ix<VOLUME;ix++){ for (mu1=0;mu1<3;mu1++){ ix1=g_iup[ix][mu1]; for (mu2=mu1+1;mu2<4;mu2++){ ix2=g_iup[ix][mu2]; v=&gf[ix][mu1]; w=&gf[ix1][mu2]; _su3_times_su3(pr1,*v,*w); v=&gf[ix][mu2]; w=&gf[ix2][mu1]; _su3_times_su3(pr2,*v,*w); _trace_su3_times_su3d(ac,pr1,pr2); tr=ac+kc; ts=tr+ks; tt=ts-ks; ks=ts; kc=tr-tt; } } } ga=(kc+ks)/3.0; #ifdef MPI MPI_Allreduce(&ga, &gas, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); ga = gas; #endif GaugeInfo.plaquetteEnergy = ga; g_update_gauge_energy = 0; } return ga; }
double calc_wrapped_sq_plaq( void ) { int x; int x_p_mu; int x_p_nu; int mu; int nu; su3* u_mu_x; su3* u_nu_x_p_mu; su3* u_mu_x_p_nu; su3* u_nu_x; su3 tmp1; su3 tmp2; double tr; double sum = 0; for( x=0; x<VOLUME; x++ ) for( mu=0; mu<4; mu++ ) for( nu=0; nu<mu; nu++ ) if( ( g_t[ x ] == (T-1) ) & ( ( mu == 3 ) || ( nu == 3 ) ) ) { x_p_mu = g_iup[ x ][ mu ]; x_p_nu = g_iup[ x ][ nu ]; u_mu_x = &g_gauge_field[ x ][ mu ]; u_nu_x_p_mu = &g_gauge_field[ x_p_mu ][ nu ]; u_mu_x_p_nu = &g_gauge_field[ x_p_nu ][ mu ]; u_nu_x = &g_gauge_field[ x ][ nu ]; _su3_times_su3( tmp1, *u_nu_x, *u_mu_x_p_nu ); _su3_times_su3( tmp2, *u_mu_x, *u_nu_x_p_mu ); _trace_su3_times_su3d( tr, tmp1, tmp2 ); sum += tr; } sum /= ((double)3); return sum; }
void rnd_gauge_trafo(const int repro, su3 ** const gf){ int ix,iy,mu; static su3 u,v,w,x,y; su3 * _gauge_trafo = NULL; su3 * gauge_trafo = NULL; if((_gauge_trafo = calloc(VOLUMEPLUSRAND+1, sizeof(su3))) == NULL) { fprintf(stderr, "Could not allocate memory in rnd_gauge_trafo. Exiting!\n"); exit(0); } gauge_trafo = (su3*)(((unsigned long int)(gauge_trafo)+ALIGN_BASE)&~ALIGN_BASE); random_gauge_field(repro, gauge_trafo); #ifdef TM_USE_MPI xchange_gauge(gauge_trafo); #endif for (ix=0;ix<VOLUME;ix++){ u=gauge_trafo[ix]; for (mu=0;mu<4;mu++){ iy=g_iup[ix][mu]; w=gauge_trafo[iy]; _su3_dagger(v,w); w=g_gauge_field[ix][mu]; _su3_times_su3(x,w,v); _su3_times_su3(y,u,x); gf[ix][mu]=y; } } free(_gauge_trafo); }
/* Set the trafo field for a temporal gauge g(t=0) == ID other g's are determined recursively from U (gfield) requiering that U^{'}_0 != ID => only the U(t=T-1) are not ID!! */ int init_temporalgauge_trafo (const int V, su3** gfield) { #ifndef TM_USE_MPI int it, iz, iy, ix; int pos; if ((void *)(g_trafo = (su3 *) calloc(V, sizeof(su3))) == NULL ) { printf("malloc error in 'init_temporalgauge_trafo'\n"); return(2); } /* initialize first timeslice (t=0) with unit matrices*/ for (ix = 0; ix < LX; ix++) { for (iy = 0; iy < LY; iy++) { for (iz = 0; iz < LZ; iz++) { g_trafo[g_ipt[0][ix][iy][iz]] = unit_su3(); } } } /* U^{'}_0(x) g(x) U_0(x) g^{+}(x+0) != ID => g_(x+0) = g(x) U_0(x) */ for (it = 1; it < T; it++) { for (ix = 0; ix < LX; ix++) { for (iy = 0; iy < LY; iy++) { for (iz = 0; iz < LZ; iz++) { pos = g_ipt[it][ix][iy][iz]; _su3_times_su3( g_trafo[ g_ipt[it ][ix][iy][iz] ] , g_trafo[ g_ipt[it-1][ix][iy][iz] ] , //gfield [ g_ipt[it-1][ix][iy][iz] ] [0] ); gfield [ g_idn[pos][0] ] [0] ); } } } } #else // MPI int it, iz, iy, ix; int pos; MPI_Status status; if ((void *)(left = (su3 *) calloc(LX*LY*LZ, sizeof(su3))) == NULL ) { // allocates memory for a time-slice of su3-matrices printf("malloc error in 'init_temporalgauge_trafo_mpi'\n"); return(-1); } if ((void *)(right = (su3 *) calloc(LX*LY*LZ, sizeof(su3))) == NULL ) { // allocates memory for a time-slice of su3-matrices printf("malloc error in 'init_temporalgauge_trafo_mpi'\n"); return(-1); } if ((void *)(g_trafo = (su3 *) calloc(V, sizeof(su3))) == NULL ) { // allocates memory for V su3-matrices printf("malloc error in 'init_temporalgauge_trafo'\n"); return(2); } ////////////////////////////////////////////// // initializing the transformation matrices // ////////////////////////////////////////////// // first process in t-direction if (g_cart_id == 0) { /* initialize first timeslice (t=0) with unit matrices*/ for (ix = 0; ix < LX; ix++) { for (iy = 0; iy < LY; iy++) { for (iz = 0; iz < LZ; iz++) { g_trafo[g_ipt[0][ix][iy][iz]] = unit_su3(); // g_trafo[0-th time slice] = ID } } } /* U^{'}_0(x) = g(x) U_0(x) g^{+}(x+0) != ID => g_(x+0) = g(x) U_0(x) */ for (it = 1; it < T; it++) { for (ix = 0; ix < LX; ix++) { for (iy = 0; iy < LY; iy++) { for (iz = 0; iz < LZ; iz++) { _su3_times_su3( g_trafo[ g_ipt[it ][ix][iy][iz] ] , // g_trafo[next t-slice] = g_trafo[old t-slice] * gfield[old t-slice][t-dir.] g_trafo[ g_ipt[it-1][ix][iy][iz] ] , gfield [ g_ipt[it-1][ix][iy][iz] ] [0] ); } } } } // sending MPI_Send((void *)(g_trafo+(T-1)*LX*LY*LZ), LX*LY*LZ, mpi_su3, g_nb_t_up, 0, g_cart_grid); //MPI_Send((void *)(g_trafo+(T-1)*LX*LY*LZ), LX*LY*LZ, mpi_su3, g_cart_id+1, 0, g_cart_grid); printf("g_cart_id = %i has send a message to %i\n", g_cart_id, g_nb_t_up); } // first process // following processes else { // receiving MPI_Recv((void *)left, LX*LY*LZ, mpi_su3, g_nb_t_dn, 0, g_cart_grid, &status); //MPI_Recv((void *)left, LX*LY*LZ, mpi_su3, g_cart_id-1, 0, g_cart_grid, &status); printf("g_cart_id = %i has received a message from %i\n", g_cart_id, g_nb_t_dn); it = 0; for (ix = 0; ix < LX; ix++) { for (iy = 0; iy < LY; iy++) { for (iz = 0; iz < LZ; iz++) { pos = g_ipt[it][ix][iy][iz]; _su3_times_su3( g_trafo[ g_ipt[it ][ix][iy][iz] ] , // g_trafo[0-th time slice] = left[xchanged t-slice] * gfield[ left [ g_ipt[it ][ix][iy][iz] ] , gfield [ g_idn[pos ][0] ] [0] ); // notice: have to access the RAND region of the gauge field } } } for (it = 1; it < T; it++) { for (ix = 0; ix < LX; ix++) { for (iy = 0; iy < LY; iy++) { for (iz = 0; iz < LZ; iz++) { _su3_times_su3( g_trafo[ g_ipt[it ][ix][iy][iz] ] , g_trafo[ g_ipt[it-1][ix][iy][iz] ] , gfield [ g_ipt[it-1][ix][iy][iz] ] [0] ); } } } } // sending if (g_cart_id != g_nproc-1) { MPI_Send((void *)(g_trafo+(T-1)*LX*LY*LZ), LX*LY*LZ, mpi_su3, g_nb_t_up, 0, g_cart_grid); //MPI_Send((void *)(g_trafo+(T-1)*LX*LY*LZ), LX*LY*LZ, mpi_su3, g_cart_id+1, 0, g_cart_grid); printf("g_cart_id = %i has send a message to %i\n", g_cart_id, g_nb_t_up); } } // following processes //////////////////////////////////////////// // exchanging the transformation matrices // //////////////////////////////////////////// MPI_Sendrecv((void *)(g_trafo), LX*LY*LZ, mpi_su3, g_nb_t_dn, 1, (void *)(right ), LX*LY*LZ, mpi_su3, g_nb_t_up, 1, g_cart_grid, &status); printf("g_cart_id = %i has send to %i and received from %i\n", g_cart_id, g_nb_t_dn, g_nb_t_up); #endif // MPI /* allocate and initialize g_tempgauge_field which holds a copy of the global gauge field g_gauge_field which is copied back after the inversion when the temporal gauge is undone again */ int i = 0; if ((void *)(g_tempgauge_field = (su3 **) calloc(V, sizeof(su3*))) == NULL ) { printf ("malloc error in 'init_temporalgauge_trafo'\n"); return(1); } if ((void *)(tempgauge_field = (su3 *) calloc(4*V+1, sizeof(su3))) == NULL ) { printf ("malloc error in 'init_temporalgauge_trafo'\n"); return(2); } #if (defined SSE || defined SSE2 || defined SSE3) g_tempgauge_field[0] = (su3*)(((unsigned long int)(tempgauge_field)+ALIGN_BASE)&~ALIGN_BASE); #else g_tempgauge_field[0] = tempgauge_field; #endif for(i = 1; i < V; i++){ g_tempgauge_field[i] = g_tempgauge_field[i-1]+4; } /* copy the original field */ copy_gauge_field(g_tempgauge_field, g_gauge_field); return(0); }
void apply_gtrafo (su3 ** gfield, su3 * trafofield) { int it, iz, iy, ix; int pos; int mu; su3 temp1; if (g_proc_id == 0) { printf("Applying gauge transformation..."); } for (it = 0; it < T; it++) { for (ix = 0; ix < LX; ix++) { for (iy = 0; iy < LY; iy++) { for (iz = 0; iz < LZ; iz++) { #ifdef TM_USE_MPI // this is the MPI implementation of the GLOBAL TEMPORALGAUGE pos = g_ipt[it][ix][iy][iz]; for (mu = 0; mu < 4; mu++) { if ((it != T-1) || (mu != 0)) { /* help = g(x) U_mu(x) */ _su3_times_su3( temp1, trafofield[pos], gfield[pos][mu] ); // temp1 = trafofield[pos] * gfield[pos][mu] /* U_mu(x) <- U_mu^{'}(x) = help g^{+}(x+mu)*/ _su3_times_su3d( gfield[pos][mu],temp1, trafofield[ g_iup[pos][mu] ]); // gfield[pos][mu] = temp1 * trafofield[ g_iup[pos][mu] ] _ {dagger} } // = trafofield[pos] * gfield[pos][mu] * trafofield[ g_iup[pos][mu] ]_{dagger} else { _su3_times_su3( temp1, trafofield[pos], gfield[pos][mu] ); _su3_times_su3d( gfield[pos][mu],temp1, right[ g_ipt[0][ix][iy][iz] ]); // "rightest" transf. matrices are stored in right[] } } #else // in case of using this version with MPI this is // a LOCAL version of TEMPORALGAUGE pos = g_ipt[it][ix][iy][iz]; for (mu = 0; mu < 4; mu++) { if ((it != T-1) || (mu != 0)) { /* help = g(x) U_mu(x) */ _su3_times_su3( temp1, trafofield[pos], gfield[pos][mu] ); /* U_mu(x) <- U_mu^{'}(x) = help g^{+}(x+mu)*/ _su3_times_su3d( gfield[pos][mu],temp1, trafofield[ g_iup[pos][mu] ]); } else { // (it = T-1) && (mu = 0) _su3_times_su3( temp1, trafofield[pos], gfield[pos][mu] ); _su3_times_su3d( gfield[pos][mu],temp1, trafofield[ g_ipt[0][ix][iy][iz] ]); // "rightest" transf. matrices are the first (periodic) and are initialized to ID } } #endif } } } } if (g_proc_id == 0) { printf("done\n"); } /* update gauge copy fields in the next call to HoppingMatrix */ g_update_gauge_copy = 1; }//apply_gtrafo()
double measure_rectangles(const su3 ** const gf) { static double res; #ifdef TM_USE_MPI double ALIGN mres; #endif #ifdef TM_USE_OMP #pragma omp parallel { int thread_num = omp_get_thread_num(); #endif int i, j, k, mu, nu; su3 ALIGN pr1, pr2, tmp; const su3 *v = NULL , *w = NULL; double ALIGN ac, ks, kc, tr, ts, tt; kc = 0.0; ks = 0.0; #ifdef TM_USE_OMP #pragma omp for #endif for (i = 0; i < VOLUME; i++) { for (mu = 0; mu < 4; mu++) { for (nu = 0; nu < 4; nu++) { if(nu != mu) { /* ^ | ^ | -> */ j = g_iup[i][mu]; k = g_iup[j][nu]; v = &gf[i][mu]; w = &gf[j][nu]; _su3_times_su3(tmp, *v, *w); v = &gf[k][nu]; _su3_times_su3(pr1, tmp, *v); /* -> ^ | ^ | */ j = g_iup[i][nu]; k = g_iup[j][nu]; v = &gf[i][nu]; w = &gf[j][nu]; _su3_times_su3(tmp, *v, *w); v = &gf[k][mu]; _su3_times_su3(pr2, tmp, *v); /* Trace it */ _trace_su3_times_su3d(ac,pr1,pr2); /* printf("i mu nu: %d %d %d, ac = %e\n", i, mu, nu, ac); */ /* Kahan summation */ tr=ac+kc; ts=tr+ks; tt=ts-ks; ks=ts; kc=tr-tt; } } } } kc=(kc+ks)/3.0; #ifdef TM_USE_OMP g_omp_acc_re[thread_num] = kc; #else res = kc; #endif #ifdef TM_USE_OMP } /* OpenMP parallel closing brace */ res = 0.0; for(int i = 0; i < omp_num_threads; ++i) res += g_omp_acc_re[i]; #else #endif #ifdef TM_USE_MPI MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); res = mres; #endif return res; }
double measure_gauge_action(const su3 ** const gf) { static double res; #ifdef MPI double ALIGN mres; #endif #ifdef OMP #pragma omp parallel { int thread_num = omp_get_thread_num(); #endif int ix,ix1,ix2,mu1,mu2; su3 ALIGN pr1,pr2; const su3 *v,*w; double ALIGN ac,ks,kc,tr,ts,tt; if(g_update_gauge_energy) { kc=0.0; ks=0.0; #ifdef OMP #pragma omp for #endif for (ix=0;ix<VOLUME;ix++){ for (mu1=0;mu1<3;mu1++){ ix1=g_iup[ix][mu1]; for (mu2=mu1+1;mu2<4;mu2++){ ix2=g_iup[ix][mu2]; v=&gf[ix][mu1]; w=&gf[ix1][mu2]; _su3_times_su3(pr1,*v,*w); v=&gf[ix][mu2]; w=&gf[ix2][mu1]; _su3_times_su3(pr2,*v,*w); _trace_su3_times_su3d(ac,pr1,pr2); tr=ac+kc; ts=tr+ks; tt=ts-ks; ks=ts; kc=tr-tt; } } } kc=(kc+ks)/3.0; #ifdef OMP g_omp_acc_re[thread_num] = kc; #else res = kc; #endif } #ifdef OMP } /* OpenMP parallel closing brace */ if(g_update_gauge_energy) { res = 0.0; for(int i=0; i < omp_num_threads; ++i) res += g_omp_acc_re[i]; #else if(g_update_gauge_energy) { #endif #ifdef MPI MPI_Allreduce(&res, &mres, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); res = mres; #endif GaugeInfo.plaquetteEnergy = res; g_update_gauge_energy = 0; } return res; }
double measure_rectangles() { int i, j, k, mu, nu; static su3 pr1, pr2, tmp; su3 *v = NULL , *w = NULL; static double ga, ac; #ifdef MPI static double gas; #endif static double ks, kc, tr, ts, tt; kc=0.0; ks=0.0; if(g_update_rectangle_energy) { for (i = 0; i < VOLUME; i++) { for (mu = 0; mu < 4; mu++) { for (nu = 0; nu < 4; nu++) { if(nu != mu) { /* ^ | ^ | -> */ j = g_iup[i][mu]; k = g_iup[j][nu]; v = &g_gauge_field[i][mu]; w = &g_gauge_field[j][nu]; _su3_times_su3(tmp, *v, *w); v = &g_gauge_field[k][nu]; _su3_times_su3(pr1, tmp, *v); /* -> ^ | ^ | */ j = g_iup[i][nu]; k = g_iup[j][nu]; v = &g_gauge_field[i][nu]; w = &g_gauge_field[j][nu]; _su3_times_su3(tmp, *v, *w); v = &g_gauge_field[k][mu]; _su3_times_su3(pr2, tmp, *v); /* Trace it */ _trace_su3_times_su3d(ac,pr1,pr2); /* printf("i mu nu: %d %d %d, ac = %e\n", i, mu, nu, ac); */ /* Kahan summation */ tr=ac+kc; ts=tr+ks; tt=ts-ks; ks=ts; kc=tr-tt; } } } } ga=(kc+ks)/3.0; #ifdef MPI MPI_Allreduce(&ga, &gas, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); ga = gas; #endif g_update_rectangle_energy = 0; } return ga; }
double measure_rectangles() { int i, j, k, mu, nu; int x, y, z, t; static su3 pr1, pr2, tmp; su3 *v = NULL , *w = NULL; static double ga, ac, gas; static double ks, kc, tr, ts, tt; kc=0.0; ks=0.0; double d = 0.; FILE * debugfile; char filename[100]; sprintf(filename,"debug_mr.s"); #ifdef PARALLELT sprintf(filename,"debug_mr.pt.%d", g_proc_id); #endif #ifdef PARALLELXT sprintf(filename,"debug_mr.pxt.%d", g_proc_id); #endif debugfile = fopen(filename,"w"); for(x = 0; x < LX; x++) { for(y = 0; y < LY; y++) { for(z = 0; z < LZ; z++) { for(t = 0; t < T; t++) { i = g_ipt[t][x][y][z]; for (mu = 0; mu < 4; mu++) { d = 0.; for (nu = 0; nu < 4; nu++) { if(nu != mu) { /* ^ | ^ | -> */ j = g_iup[i][mu]; k = g_iup[j][nu]; v = &g_gauge_field[i][mu]; w = &g_gauge_field[j][nu]; _su3_times_su3(tmp, *v, *w); v = &g_gauge_field[k][nu]; _su3_times_su3(pr1, tmp, *v); /* -> ^ | ^ | */ j = g_iup[i][nu]; k = g_iup[j][nu]; v = &g_gauge_field[i][nu]; w = &g_gauge_field[j][nu]; _su3_times_su3(tmp, *v, *w); v = &g_gauge_field[k][mu]; _su3_times_su3(pr2, tmp, *v); /* Trace it */ _trace_su3_times_su3d(ac,pr1,pr2); d += ac; /* printf("i mu nu: %d %d %d, ac = %e\n", i, mu, nu, ac); */ /* Kahan summation */ tr=ac+kc; ts=tr+ks; tt=ts-ks; ks=ts; kc=tr-tt; } } fprintf(debugfile,"%d %d %d %d %d %e\n", g_proc_coords[0]*T+t, g_proc_coords[1]*LX+x, y, z, mu, d); } } } } } /* fprintf(debugfile,"###\n"); */ fclose(debugfile); ga=(kc+ks)/3.0; #ifdef TM_USE_MPI MPI_Allreduce(&ga, &gas, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); return gas; #else return ga; #endif }
void update_gauge(double step) { int i,mu; static su3 v,w; su3 *z; static su3adj deriv; su3adj *xm; #ifdef _KOJAK_INST #pragma pomp inst begin(updategauge) #endif if (bc_flag == 0) { /* if PBC */ for(i = 0; i < VOLUME; i++) { for(mu = 0; mu < 4; mu++){ /* moment[i][mu] = h_{i,mu}^{alpha} */ xm=&moment[i][mu]; z=&g_gauge_field[i][mu]; _assign_const_times_mom(deriv, step, *xm); v=restoresu3( exposu3(deriv) ); _su3_times_su3(w, v, *z); _su3_assign(*z, w); } } } else if (bc_flag == 1) { /* if SF bc */ for(i = 0; i < VOLUME; i++) { for(mu = 0; mu < 4; mu++){ if (g_t[i] == 0 && (mu==1 || mu==2 || mu==3)) { /* do not update spatial links at zero boundary */ } else if (g_t[i] == g_Tbsf) { /* do not update all the links at T boundary */ } else { /* update all links in the bulk and the temporal link at zero */ xm=&moment[i][mu]; z=&g_gauge_field[i][mu]; _assign_const_times_mom(deriv, step, *xm); v=restoresu3( exposu3(deriv) ); _su3_times_su3(w, v, *z); _su3_assign(*z, w); } } } } #ifdef MPI /* for parallelization */ xchange_gauge(); #endif /* * The backward copy of the gauge field * is not updated here! */ g_update_gauge_copy = 1; g_update_gauge_energy = 1; g_update_rectangle_energy = 1; return; #ifdef _KOJAK_INST #pragma pomp inst end(updategauge) #endif }
void sw_term(su3 ** const gf, const double kappa, const double c_sw) { int k,l; int x,xpk,xpl,xmk,xml,xpkml,xplmk,xmkml; su3 *w1,*w2,*w3,*w4; double ka_csw_8 = kappa*c_sw/8.; static su3 v1,v2,plaq; static su3 fkl[4][4]; static su3 magnetic[4],electric[4]; static su3 aux; /* compute the clover-leave */ /* l __ __ | | | | |__| |__| __ __ | | | | |__| |__| k */ for(x = 0; x < VOLUME; x++) { for(k = 0; k < 4; k++) { for(l = k+1; l < 4; l++) { xpk=g_iup[x][k]; xpl=g_iup[x][l]; xmk=g_idn[x][k]; xml=g_idn[x][l]; xpkml=g_idn[xpk][l]; xplmk=g_idn[xpl][k]; xmkml=g_idn[xml][k]; w1=&gf[x][k]; w2=&gf[xpk][l]; w3=&gf[xpl][k]; w4=&gf[x][l]; _su3_times_su3(v1,*w1,*w2); _su3_times_su3(v2,*w4,*w3); _su3_times_su3d(plaq,v1,v2); w1=&gf[x][l]; w2=&gf[xplmk][k]; w3=&gf[xmk][l]; w4=&gf[xmk][k]; _su3_times_su3d(v1,*w1,*w2); _su3d_times_su3(v2,*w3,*w4); _su3_times_su3_acc(plaq,v1,v2); w1=&gf[xmk][k]; w2=&gf[xmkml][l]; w3=&gf[xmkml][k]; w4=&gf[xml][l]; _su3_times_su3(v1,*w2,*w1); _su3_times_su3(v2,*w3,*w4); _su3d_times_su3_acc(plaq,v1,v2); w1=&gf[xml][l]; w2=&gf[xml][k]; w3=&gf[xpkml][l]; w4=&gf[x][k]; _su3d_times_su3(v1,*w1,*w2); _su3_times_su3d(v2,*w3,*w4); _su3_times_su3_acc(plaq,v1,v2); _su3_dagger(v2,plaq); _su3_minus_su3(fkl[k][l],plaq,v2); } } // this is the one in flavour and colour space // twisted mass term is treated in clover, sw_inv and // clover_gamma5 _su3_one(sw[x][0][0]); _su3_one(sw[x][2][0]); _su3_one(sw[x][0][1]); _su3_one(sw[x][2][1]); for(k = 1; k < 4; k++) { _su3_assign(electric[k], fkl[0][k]); } _su3_assign(magnetic[1], fkl[2][3]); _su3_minus_assign(magnetic[2], fkl[1][3]); _su3_assign(magnetic[3], fkl[1][2]); /* upper left block 6x6 matrix */ _itimes_su3_minus_su3(aux,electric[3],magnetic[3]); _su3_refac_acc(sw[x][0][0],ka_csw_8,aux); _itimes_su3_minus_su3(aux,electric[1],magnetic[1]); _su3_minus_su3(v2,electric[2],magnetic[2]); _su3_acc(aux,v2); _real_times_su3(sw[x][1][0],ka_csw_8,aux); _itimes_su3_minus_su3(aux,magnetic[3],electric[3]); _su3_refac_acc(sw[x][2][0],ka_csw_8,aux); /* lower right block 6x6 matrix */ _itimes_su3_plus_su3(aux,electric[3],magnetic[3]); _su3_refac_acc(sw[x][0][1],(-ka_csw_8),aux); _itimes_su3_plus_su3(aux,electric[1],magnetic[1]); _su3_plus_su3(v2,electric[2],magnetic[2]); _su3_acc(aux,v2); _real_times_su3(sw[x][1][1],(-ka_csw_8),aux); _itimes_su3_plus_su3(aux,magnetic[3],electric[3]); _su3_refac_acc(sw[x][2][1],ka_csw_8,aux); } return; }
void sw_all(hamiltonian_field_t * const hf, const double kappa, const double c_sw) { int k,l; int x,xpk,xpl,xmk,xml,xpkml,xplmk,xmkml; su3 *w1,*w2,*w3,*w4; double ka_csw_8 = kappa*c_sw/8.; static su3 v1,v2,vv1,vv2,plaq; static su3 vis[4][4]; for(x = 0; x < VOLUME; x++) { _minus_itimes_su3_plus_su3(vis[0][1],swm[x][1],swm[x][3]); _su3_minus_su3(vis[0][2],swm[x][1],swm[x][3]); _itimes_su3_minus_su3(vis[0][3],swm[x][2],swm[x][0]); _minus_itimes_su3_plus_su3(vis[2][3],swp[x][1],swp[x][3]); _su3_minus_su3(vis[1][3],swp[x][3],swp[x][1]); _itimes_su3_minus_su3(vis[1][2],swp[x][2],swp[x][0]); // project to the traceless anti-hermitian part _su3_dagger(v1,vis[0][1]); _su3_minus_su3(vis[0][1],vis[0][1],v1); _su3_dagger(v1,vis[0][2]); _su3_minus_su3(vis[0][2],vis[0][2],v1); _su3_dagger(v1,vis[0][3]); _su3_minus_su3(vis[0][3],vis[0][3],v1); _su3_dagger(v1,vis[2][3]); _su3_minus_su3(vis[2][3],vis[2][3],v1); _su3_dagger(v1,vis[1][3]); _su3_minus_su3(vis[1][3],vis[1][3],v1); _su3_dagger(v1,vis[1][2]); _su3_minus_su3(vis[1][2],vis[1][2],v1); for(k = 0; k < 4; k++) { for(l = k+1; l < 4; l++) { xpk=g_iup[x][k]; xpl=g_iup[x][l]; xmk=g_idn[x][k]; xml=g_idn[x][l]; xpkml=g_idn[xpk][l]; xplmk=g_idn[xpl][k]; xmkml=g_idn[xml][k]; w1=&hf->gaugefield[x][k]; w2=&hf->gaugefield[xpk][l]; w3=&hf->gaugefield[xpl][k]; /*dag*/ w4=&hf->gaugefield[x][l]; /*dag*/ _su3_times_su3(v1,*w1,*w2); _su3_times_su3(v2,*w4,*w3); _su3_times_su3d(plaq,v1,v2); _su3_times_su3(vv1,plaq,vis[k][l]); _trace_lambda_mul_add_assign(hf->derivative[x][k], -2.*ka_csw_8, vv1); _su3d_times_su3(vv2,*w1,vv1); _su3_times_su3(vv1,vv2,*w1); _trace_lambda_mul_add_assign(hf->derivative[xpk][l], -2.*ka_csw_8, vv1); _su3_times_su3(vv2,vis[k][l],plaq); _su3_dagger(vv1,vv2); _trace_lambda_mul_add_assign(hf->derivative[x][l], -2.*ka_csw_8, vv1); _su3d_times_su3(vv2,*w4,vv1); _su3_times_su3(vv1,vv2,*w4); _trace_lambda_mul_add_assign(hf->derivative[xpl][k], -2.*ka_csw_8, vv1); w1=&hf->gaugefield[x][l]; w2=&hf->gaugefield[xplmk][k]; /*dag*/ w3=&hf->gaugefield[xmk][l]; /*dag*/ w4=&hf->gaugefield[xmk][k]; _su3_times_su3d(v1,*w1,*w2); _su3d_times_su3(v2,*w3,*w4); _su3_times_su3(plaq,v1,v2); _su3_times_su3(vv1,plaq,vis[k][l]); _trace_lambda_mul_add_assign(hf->derivative[x][l], -2.*ka_csw_8, vv1); _su3_dagger(vv1,v1); _su3_times_su3d(vv2,vv1,vis[k][l]); _su3_times_su3d(vv1,vv2,v2); _trace_lambda_mul_add_assign(hf->derivative[xplmk][k], -2.*ka_csw_8, vv1); _su3_times_su3(vv2,*w3,vv1); _su3_times_su3d(vv1,vv2,*w3); _trace_lambda_mul_add_assign(hf->derivative[xmk][l], -2.*ka_csw_8, vv1); _su3_dagger(vv2,vv1); _trace_lambda_mul_add_assign(hf->derivative[xmk][k], -2.*ka_csw_8, vv2); w1=&hf->gaugefield[xmk][k]; /*dag*/ w2=&hf->gaugefield[xmkml][l]; /*dag*/ w3=&hf->gaugefield[xmkml][k]; w4=&hf->gaugefield[xml][l]; _su3_times_su3(v1,*w2,*w1); _su3_times_su3(v2,*w3,*w4); _su3_times_su3d(vv1,*w1,vis[k][l]); _su3_times_su3d(vv2,vv1,v2); _su3_times_su3(vv1,vv2,*w2); _trace_lambda_mul_add_assign(hf->derivative[xmk][k], -2.*ka_csw_8, vv1); _su3_times_su3(vv2,*w2,vv1); _su3_times_su3d(vv1,vv2,*w2); _trace_lambda_mul_add_assign(hf->derivative[xmkml][l], -2.*ka_csw_8, vv1); _su3_dagger(vv2,vv1); _trace_lambda_mul_add_assign(hf->derivative[xmkml][k], -2.*ka_csw_8, vv2); _su3d_times_su3(vv1,*w3,vv2); _su3_times_su3(vv2,vv1,*w3); _trace_lambda_mul_add_assign(hf->derivative[xml][l], -2.*ka_csw_8, vv2); w1=&hf->gaugefield[xml][l]; /*dag*/ w2=&hf->gaugefield[xml][k]; w3=&hf->gaugefield[xpkml][l]; w4=&hf->gaugefield[x][k]; /*dag*/ _su3d_times_su3(v1,*w1,*w2); _su3_times_su3d(v2,*w3,*w4); _su3_times_su3d(vv1,*w1,vis[k][l]); _su3_times_su3d(vv2,vv1,v2); _su3_times_su3d(vv1,vv2,*w2); _trace_lambda_mul_add_assign(hf->derivative[xml][l], -2.*ka_csw_8, vv1); _su3_dagger(vv2,vv1); _trace_lambda_mul_add_assign(hf->derivative[xml][k], -2.*ka_csw_8, vv2); _su3d_times_su3(vv1,*w2,vv2); _su3_times_su3(vv2,vv1,*w2); _trace_lambda_mul_add_assign(hf->derivative[xpkml][l], -2.*ka_csw_8, vv2); _su3_dagger(vv2,v2); _su3_times_su3d(vv1,vv2,v1); _su3_times_su3d(vv2,vv1,vis[k][l]); _trace_lambda_mul_add_assign(hf->derivative[x][k], -2.*ka_csw_8, vv2); } } } return; }
int stout_smear_gauge_field(const double rho , const int no_iters) { const int dim=4 ; int iter , mu , x; su3 *gauge_wk[4] ; su3 wk_staple ; su3 omega , Exp_p ; su3adj p; su3 *gauge_local ; su3 new_gauge_local ; /*printf("Entering stout_smear_gauge_field\n");*/ if(g_proc_id == 0 && g_debug_level > 3) { printf("DUMP OF g_gauge_field in STOUT\n"); print_config_to_screen(g_gauge_field); printf("STOUT smearing the gauge fields\n") ; printf("rho = %g number of iterations = %d\n",rho,no_iters) ; } /* reserve memory */ for(mu = 0 ; mu < dim ; ++mu) { gauge_wk[mu] = calloc(VOLUME, sizeof(su3)); if(errno == ENOMEM) { return(1); } } /* start of the the stout smearing **/ for(iter = 0 ; iter < no_iters ; ++iter) { for(mu = 0 ; mu < dim ; ++mu) { for(x= 0 ; x < VOLUME ; x++) { /* * we need to save all intermediate gauge configurations * because they are needed for the force back iteration in * "stout_smear_force.c" */ /*_su3_assign(g_gauge_field_smear_iterations[iter][x][mu], g_gauge_field[x][mu]);*/ /* get staples */ wk_staple = get_staples(x, mu, g_gauge_field) ; scale_su3(&wk_staple, rho) ; /* omega = staple * u^dagger */ gauge_local = &g_gauge_field[x][mu]; _su3_times_su3d(omega,wk_staple,*gauge_local); /* project out anti-hermitian traceless part */ project_anti_herm(&omega) ; /* exponentiate */ _trace_lambda(p,omega) ; /* -2.0 to get su3 to su3adjoint consistency ****/ p.d1 /= -2.0 ; p.d2 /= -2.0 ; p.d3 /= -2.0 ; p.d4 /= -2.0 ; p.d5 /= -2.0 ; p.d6 /= -2.0 ; p.d7 /= -2.0 ; p.d8 /= -2.0 ; Exp_p = exposu3(p); /* new_gauge_local = Exp_p * gauge_local */ _su3_times_su3(new_gauge_local,Exp_p,*gauge_local); gauge_wk[mu][x] = new_gauge_local ; } /* end the loop over space-time */ } /** update gauge field on this node **/ for(mu = 0 ; mu < dim ; ++mu) { for(x= 0 ; x < VOLUME ; ++x) { g_gauge_field[x][mu] = gauge_wk[mu][x] ; } } if(g_debug_level > 3 && g_proc_id == 0) { printf("DUMP OF g_gauge_field in STOUT\n"); print_config_to_screen(g_gauge_field); } #ifdef MPI /** update boundaries for parallel stuff **/ xchange_gauge(); #endif g_update_gauge_copy = 1; g_update_gauge_energy = 1; g_update_rectangle_energy = 1; /* * here we save the intermediate smeares gauge fields a large array */ } /* end loop over stout smearing iterations */ /* free up memory */ for(mu=0 ; mu < dim ; ++mu) { free(gauge_wk[mu]); } if(g_debug_level > 3 && g_proc_id == 0) { printf("Leaving stout_smear_gauge_field\n"); } return(0); }
void polyakov_loop(complex * pl_, const int mu) { static int i0, i1, i2, i3, L0, L1, L2, L3, ixyzt, ixyzt_up; static double vol; static su3 tmp, tmp2; su3 *v = NULL , *w = NULL; static complex pl; /* For the Kahan summation:*/ #ifdef MPI static complex pls; #endif static complex ks, kc, tr, ts, tt; kc.re=0.0; ks.re=0.0; kc.im=0.0; ks.im=0.0; /* For the moment only the Polyakov loop in y- and z-direction are implemented, since they are not affected by parallelisation: */ if(mu == 0 || mu == 1 || mu > 3) { fprintf(stderr, "Wrong parameter for Polyakov loop calculation in polyakov_loop.c:\n"); fprintf(stderr, "Only direction %d and %d are allowed.\n",2,3); fprintf(stderr, "Actual value is %d! Aborting...\n",mu); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 10); MPI_Finalize(); #endif exit(0); } L0=T; L1=LX; if(mu==2) { L2=LZ; L3=LY; } else { L2=LY; L3=LZ; } /* loop over the spatial sites: */ for (i0=0; i0 < L0; i0++) { for (i1=0; i1 < L1; i1++) { for (i2=0; i2 < L2; i2++) { /* at each spatial site multiply the links in temporal direction: */ i3 = 0; /* get the site index: */ if(mu==2) { ixyzt = g_ipt[i0][i1][i3][i2]; } else { ixyzt = g_ipt[i0][i1][i2][i3]; } /* and its neigbour in direction mu: */ ixyzt_up = g_iup[ixyzt][mu]; /* Get the links and multiply them: ixyzt --> ixyzt_up --> */ v = &g_gauge_field[ixyzt][mu]; w = &g_gauge_field[ixyzt_up][mu]; _su3_times_su3(tmp, *v, *w); /* now start the loop over indices in mu-direction: */ for (i3=1; i3 < L3-2; i3++) { /* store the current result in v:*/ _su3_assign(tmp2,tmp); /* get the next site index: */ ixyzt_up = g_iup[ixyzt_up][mu]; /* and the corresponding link matrix: */ w = &g_gauge_field[ixyzt_up][mu]; /* and multiply them: */ _su3_times_su3(tmp, tmp2, *w); } /* for the last link we directly take the complex trace: */ ixyzt_up = g_iup[ixyzt_up][mu]; w = &g_gauge_field[ixyzt_up][mu]; _trace_su3_times_su3(pl,tmp,*w); /* printf("i0=%d, i1=%d, i2=%d, pl=(%e,%e)\n",i0,i1,i2,pl.re,pl.im);*/ /* Kahan summation for real and imaginary part: */ tr.re=pl.re+kc.re; ts.re=tr.re+ks.re; tt.re=ts.re-ks.re; ks.re=ts.re; kc.im=tr.im-tt.im; tr.im=pl.im+kc.im; ts.im=tr.im+ks.im; tt.im=ts.im-ks.im; ks.im=ts.im; kc.im=tr.im-tt.im; } } } /* Finish Kahan summation: */ /* (Division by 3 is for normalising the colour trace.) */ pl.re=(kc.re+ks.re)/3.0; pl.im=(kc.im+ks.im)/3.0; /* printf("Polyakov loop before normalisation, pl.re=%e, pl.im=%e\n",pl.re,pl.im);*/ /* Collect the results and return:*/ #ifdef MPI MPI_Allreduce(&pl, &pls, 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); pl=pls; #endif /* Normalise, i.e. divide by the number of loops: */ vol = (double) L0*L1*L2*g_nproc_t*g_nproc_x; /* printf("L0*L1*L2=%d, vol=%e\n",L0*L1*L2,vol); */ _div_real(pl,pl,vol); /* printf("Polyakov loop after normalisation, pl.re=%e, pl.im=%e\n",pl.re,pl.im) */; /* return pl; */ (*pl_).re = pl.re; (*pl_).im = pl.im; }
int polyakov_loop_dir( const int nstore /* in */, const int dir /* in */) { int ixyz, ixyzt, ixyzt_up, VOL3, VOLUME3, ix, iy, iz, it; complex pl_tmp, tr, ts, tt, kc, ks, pl; su3 *tmp_loc, tmp, tmp2; su3 *u, *v, *w; double ratime, retime; char filename[50]; FILE *ofs; #ifdef MPI int rank_slice, rank_ray; MPI_Comm slice, ray; su3 *tmp_ray; #endif if(dir!=0 && dir!=3 && g_proc_id==0) { fprintf(stderr, "Wrong direction; must be 0 (t) or 3 (z)\n"); return(-1); } pl.re = 0.; pl.im = 0.; /********************************************************************************/ /************** * local part * **************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock()/(double)(CLOCKS_PER_SEC); #endif if(dir==0) { VOL3 = LX*LY*LZ; tmp_loc = (su3 *)calloc(VOL3, sizeof(su3)); if((void*)tmp_loc == NULL) { fprintf(stderr, "[%2d] Could not allocate memory for tmp_loc\n", g_proc_id); return(-1); } for(ix=0; ix<LX; ix++) { for(iy=0; iy<LY; iy++) { for(iz=0; iz<LZ; iz++) { /* ixyz = ix*LY*LZ + iy*LZ + iz */ ixyz = (ix * LY + iy) * LZ + iz; it = 0; ixyzt = g_ipt[it][ix][iy][iz]; ixyzt_up = g_iup[ixyzt][0]; v = &g_gauge_field[ixyzt][0]; w = &g_gauge_field[ixyzt_up][0]; u = &tmp; _su3_times_su3(*u, *v, *w); v = &tmp2; for(it=1; it<T-2; it++) { /* swap u and v via w */ w = u; u = v; v = w; ixyzt_up = g_iup[ixyzt_up][0]; w = &g_gauge_field[ixyzt_up][0]; _su3_times_su3(*u, *v, *w); } /* last multiplication for it=T-1 */ ixyzt_up = g_iup[ixyzt_up][0]; w = &g_gauge_field[ixyzt_up][0]; _su3_times_su3(tmp_loc[ixyz],*u, *w); } } } } else { /* z-direction <=> dir==3 */ VOL3 = T*LX*LY; tmp_loc = (su3 *)calloc(VOL3, sizeof(su3)); if((void*)tmp_loc == NULL) { /* Abort */ } for(it=0; it<T; it++) { for(ix=0; ix<LX; ix++) { for(iy=0; iy<LY; iy++) { /* ixyz = it*LX*LY + ix*LY + iy */ ixyz = (it * LX + ix) * LY + iy; iz = 0; ixyzt = g_ipt[it][ix][iy][iz]; ixyzt_up = g_iup[ixyzt][3]; v = &g_gauge_field[ixyzt][3]; w = &g_gauge_field[ixyzt_up][3]; u = &tmp; _su3_times_su3(*u, *v, *w); v = &tmp2; for(iz=1; iz<LZ-2; iz++) { /* swap u and v via w */ w = u; u = v; v = w; ixyzt_up = g_iup[ixyzt_up][3]; w = &g_gauge_field[ixyzt_up][3]; _su3_times_su3(*u, *v, *w); } ixyzt_up = g_iup[ixyzt_up][3]; w = &g_gauge_field[ixyzt_up][3]; _su3_times_su3(tmp_loc[ixyz], *u, *w); } } } } #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock()/(double)(CLOCKS_PER_SEC); #endif if(g_debug_level > 0 && g_proc_id == 0) { fprintf(stdout, "# [pl02 dir%1d proc%.2d] time for calculating local part"\ " = %e seconds\n", dir, g_cart_id, retime-ratime); } /********************************************************************************/ #ifdef MPI /*************** * global part * ***************/ /* choose the slice and ray communicators according to direction */ if(dir==0) { slice = g_mpi_time_slices; ray = g_mpi_SV_slices; rank_slice = g_mpi_time_rank; rank_ray = g_mpi_SV_rank; } else { slice = g_mpi_z_slices; ray = g_mpi_ST_slices; rank_slice = g_mpi_z_rank; rank_ray = g_mpi_ST_rank; } ratime = MPI_Wtime(); /* (1) collect contributions from different time/z slices to nodes with rank=0 in spatial volume/space-time slices */ # ifndef PARALLELXYZT if(dir==0) { # endif tmp_ray = (su3*)calloc(VOL3, sizeof(su3)); /* */ if((void*)tmp_ray== NULL) { fprintf(stderr, "[%2d] Could not allocate memory for tmp_ray\n", g_proc_id); return(-1); } MPI_Reduce(tmp_loc, tmp_ray, VOL3, mpi_su3, mpi_reduce_su3_ray, 0, ray); # ifndef PARALLELXYZT } # endif retime = MPI_Wtime(); if(g_proc_id==0 && g_debug_level>0) { fprintf(stdout, "# [pl02 dir%1d proc%.2d] time for calculating global part"\ " = %e seconds\n", dir, g_cart_id, retime-ratime); } if(rank_ray == 0) { #endif _complex_zero(pl_tmp); _complex_zero(kc); _complex_zero(ks); #ifdef MPI # ifdef PARALLELXYZT u = tmp_ray; # else if(dir==0) { u = tmp_ray; } else { u = tmp_loc; } # endif #else u = tmp_loc; #endif for(ixyz=0; ixyz<VOL3; ixyz++) /* Kahan-summation of traces */ { pl_tmp.re = (u[ixyz]).c00.re + (u[ixyz]).c11.re + (u[ixyz]).c22.re; pl_tmp.im = (u[ixyz]).c00.im + (u[ixyz]).c11.im + (u[ixyz]).c22.im; tr.re=pl_tmp.re+kc.re; ts.re=tr.re+ks.re; tt.re=ts.re-ks.re; ks.re=ts.re; kc.re=tr.re-tt.re; tr.im=pl_tmp.im+kc.im; ts.im=tr.im+ks.im; tt.im=ts.im-ks.im; ks.im=ts.im; kc.im=tr.im-tt.im; } pl_tmp.re = ks.re + kc.re; pl_tmp.im = ks.im + kc.im; #ifdef MPI MPI_Reduce(&pl_tmp, &pl, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, slice); } # ifndef PARALLELXYZT if(dir==0) { # endif free(tmp_ray); # ifndef PARALLELXYZT } # endif #else pl.re = pl_tmp.re; pl.im = pl_tmp.im; #endif /* normalization pl |-> pl / ( 3 * 3-dim. volume)*/ VOLUME3 = VOL3; #ifdef MPI if(rank_slice==0 && rank_ray==0) { /* this process has the sum of the Polyakov loop values */ if(dir==0) { VOLUME3 = VOLUME3 * g_nproc_x*g_nproc_y*g_nproc_z; } else { VOLUME3 = VOLUME3 * g_nproc_t*g_nproc_x*g_nproc_y; } #endif pl.re /= 3. * (double)VOLUME3; pl.im /= 3. * (double)VOLUME3; /* write result to file */ sprintf(filename, "polyakovloop_dir%1d", dir); if (nstore == 0) { ofs = fopen(filename,"w"); } else { ofs = fopen(filename,"a"); } if((void*)ofs == NULL) { fprintf(stderr, "Could not open file %s for writing\n", filename); return(-1); } fprintf(ofs, "%4d\t%2d\t%25.16e\t%25.16e\n", nstore, dir, pl.re, pl.im); fclose(ofs); #if defined MPI } #endif free(tmp_loc); return(0); }
int polyakov_loop_0(const int nstore, complex *pl) { int i0, i1, i2, i3, ixyz, ixyzt, ixyzt_up, VOL3, VOLUME3; int L0, L1, L2, L3; double retime, ratime; complex pl_tmp, tr, ts, tt, kc, ks; su3 *tmp_loc = NULL, tmp, tmp2; su3 *v = NULL, *w = NULL; FILE *ofs = NULL; #ifdef MPI int iproc; MPI_Status status; su3 *tmp_nnb = NULL; #endif L0 = LX; /* enable transparent comparison with existing Polyakov routines */ L1 = LY; /* in spatial directions */ L2 = LZ; L3 = T; /************** * local part * **************/ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock()/(double)(CLOCKS_PER_SEC); #endif VOL3 = L0*L1*L2; tmp_loc = (su3 *)calloc(VOL3, sizeof(su3)); for(i0 = 0; i0 < LX; i0++) { for(i1 = 0; i1 < LY; i1++) { for(i2 = 0; i2 < LZ; i2++) { ixyz = (i2 * L1 + i1) * L0 + i0; i3 = 0; ixyzt = g_ipt[i3][i0][i1][i2]; ixyzt_up = g_iup[ixyzt][0]; v = &g_gauge_field[ixyzt][0]; w = &g_gauge_field[ixyzt_up][0]; _su3_times_su3(tmp, *v, *w); for(i3 = 1; i3 < L3-1; i3++) { _su3_assign(tmp2,tmp); ixyzt_up = g_iup[ixyzt_up][0]; w = &g_gauge_field[ixyzt_up][0]; _su3_times_su3(tmp, tmp2, *w); } _su3_assign(tmp_loc[ixyz],tmp); } } } #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock()/(double)(CLOCKS_PER_SEC); #endif if(g_debug_level>0) { fprintf(stdout, "[polyakov_loop_0 | %3d] time for calculating local part = %e seconds\n", g_cart_id, retime-ratime); } /********************************************************************************/ #ifdef MPI /*************** * global part * ***************/ ratime = MPI_Wtime(); /* (1) collect contributions from different time slices to nodes with t-coord. 0 */ tmp_nnb = (su3*)calloc(VOL3, sizeof(su3)); /* contains the next-neighbour-part*/ /* note: in the following loop t is taken as the time coordinate of nodes */ for(iproc = g_nproc_t-1; iproc > 0; iproc--) { if(g_proc_coords[0] == iproc) /* node is in the {t=iproc}-hyperplane */ { MPI_Send(tmp_loc, VOL3, mpi_su3, g_nb_t_dn, 100+g_cart_id, g_cart_grid); /* send tmp_loc from {t=iproc}-hyperplane to {t=iproc-1}-hyperplane */ } if(g_proc_coords[0] == iproc-1) { /* so the node is right below the sending one in time(= 0)-direction */ MPI_Recv(tmp_nnb, VOL3, mpi_su3, g_nb_t_up, 100+g_nb_t_up, g_cart_grid, &status); /* receive tmp_loc from the tmp_loc from the {t=my_own_t_index+1}-hyperplane */ for(ixyz=0; ixyz<VOL3; ixyz++) { /* multiply all matrices in tmp_nbb to my own in tmp_loc from the right */ v = tmp_loc+ixyz; w = tmp_nnb+ixyz; _su3_assign(tmp2, *v); _su3_times_su3(*v, tmp2, *w); } } /* if iproc==0 then the node with g_proc_coords[0]=0 will finally contain the product of all contributions from all {t=const.}-planes */ } retime = MPI_Wtime(); if(g_proc_id==0 && g_debug_level>0) { fprintf(stdout, "[polyakov_loop_0 | %3d] time for calculating global part = %e seconds\n", g_cart_id, retime-ratime); } /* (2) nodes with time coordinate 0 sum traces over local spatial points */ #endif _complex_zero(pl_tmp); /* pl_tmp.re = 0.0; pl_tmp.im = 0.0; */ if(g_proc_coords[0] == 0) { kc.re = 0.0; kc.im = 0.0; ks.re = 0.0; ks.im = 0.0; for(ixyz = 0; ixyz < VOL3; ixyz++) /* Kahan-summation of traces */ { pl_tmp.re = (tmp_loc[ixyz]).c00.re + (tmp_loc[ixyz]).c11.re + (tmp_loc[ixyz]).c22.re; pl_tmp.im = (tmp_loc[ixyz]).c00.im + (tmp_loc[ixyz]).c11.im + (tmp_loc[ixyz]).c22.im; tr.re=pl_tmp.re+kc.re; ts.re=tr.re+ks.re; tt.re=ts.re-ks.re; ks.re=ts.re; kc.re=tr.re-tt.re; tr.im=pl_tmp.im+kc.im; ts.im=tr.im+ks.im; tt.im=ts.im-ks.im; ks.im=ts.im; kc.im=tr.im-tt.im; } pl_tmp.re = ks.re + kc.re; pl_tmp.im = ks.im + kc.im; } #ifdef MPI /* (3) sum over all contributions from all nodes (also nodes with pl_tmp=0; apparently the easiest way) */ MPI_Reduce(&pl_tmp, pl, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, g_cart_grid); /* MPI_Reduce(&(pl_tmp.re), &((*pl).re), 1, MPI_DOUBLE, MPI_SUM, 0, g_cart_grid); */ /* MPI_Reduce(&(pl_tmp.im), &((*pl).im), 1, MPI_DOUBLE, MPI_SUM, 0, g_cart_grid); */ #else (*pl).re = pl_tmp.re; (*pl).im = pl_tmp.im; #endif /* normalization */ VOLUME3 = VOL3; if(g_proc_id == 0) { VOLUME3 = VOLUME3 * g_nproc_x*g_nproc_y*g_nproc_z; (*pl).re /= 3*VOLUME3; (*pl).im /= 3*VOLUME3; } /* write result to file */ if (g_proc_id == 0) { if (nstore == 0) { ofs = fopen("polyakov_loop_0.dat","w"); } else { ofs = fopen("polyakov_loop_0.dat","a"); } fprintf(ofs, "%25.16e\t%25.16e\n", (*pl).re, (*pl).im); fclose(ofs); } #ifdef MPI free(tmp_nnb); #endif free(tmp_loc); return(0); }
/* Method based on Givens' rotations, as used by Urs Wenger */ void reunitarize(su3 *omega) { static su3 w, rot, tmp; static double trace_old, trace_new; static _Complex double s0, s1; static double scale; _su3_one(w); trace_old = omega->c00 + omega->c11 + omega->c22; for (int iter = 0; iter < 200; ++iter) { /* Givens' rotation 01 */ s0 = omega->c00 + conj(omega->c11); s1 = omega->c01 - conj(omega->c10); scale = 1.0 / sqrt(conj(s0) * s0 + conj(s1) * s1); s0 *= scale; s1 *= scale; /* Projecting */ _su3_one(rot); rot.c00 = s0; rot.c11 = conj(s0); rot.c01 = s1; rot.c10 = -conj(s1); _su3_times_su3(tmp, rot, w); _su3_assign(w, tmp); _su3_times_su3d(tmp, *omega, rot); _su3_assign(*omega, tmp); /* Givens' rotation 12 */ s0 = omega->c11 + conj(omega->c22); s1 = omega->c12 - conj(omega->c21); scale = 1.0 / sqrt(conj(s0) * s0 + conj(s1) * s1); s0 *= scale; s1 *= scale; /* Projecting */ _su3_one(rot); rot.c11 = s0; rot.c22 = conj(s0); rot.c12 = s1; rot.c21 = -conj(s1); _su3_times_su3(tmp, rot, w); _su3_assign(w, tmp); _su3_times_su3d(tmp, *omega, rot); _su3_assign(*omega, tmp); /* Givens' rotation 20 */ s0 = omega->c22 + conj(omega->c00); s1 = omega->c20 - conj(omega->c02); scale = 1.0 / sqrt(conj(s0) * s0 + conj(s1) * s1); s0 *= scale; s1 *= scale; /* Projecting */ _su3_one(rot); rot.c22 = s0; rot.c00 = conj(s0); rot.c20 = s1; rot.c02 = -conj(s1); _su3_times_su3(tmp, rot, w); _su3_assign(w, tmp); _su3_times_su3d(tmp, *omega, rot); _su3_assign(*omega, tmp); trace_new = omega->c00 + omega->c11 + omega->c22; if (trace_new - trace_old < 1e-15) break; trace_old = trace_new; } _su3_assign(*omega, w); }
void flip_subgroup(int ix, int mu, su3 vv, int i){ static double vv0,vv1,vv2,vv3,aa0,aa1,aa2,aa3; static double aux,norm_vv_sq; static su3 a,w,v; su3 *z; _su3_assign(v,vv); _su3_one(a); z=&g_gauge_field[ix][mu]; _su3_times_su3d(w,*z,v); /* According to Peter's notes ``A Cabibbo-Marinari SU(3)....", eqs. (A.14-A.17) we have */ if(i==1) { vv0 = creal(w.c00) + creal(w.c11); vv3 = -cimag(w.c00) + cimag(w.c11); vv1 = -cimag(w.c01) - cimag(w.c10); vv2 = -creal(w.c01) + creal(w.c10); } else if(i==2) { vv0 = creal(w.c00) + creal(w.c22); vv3 = -cimag(w.c00) + cimag(w.c22); vv1 = -cimag(w.c02) - cimag(w.c20); vv2 = -creal(w.c02) + creal(w.c20); } else { vv0 = creal(w.c11) + creal(w.c22); vv3 = -cimag(w.c11) + cimag(w.c22); vv1 = -cimag(w.c12) - cimag(w.c21); vv2 = -creal(w.c12) + creal(w.c21); } norm_vv_sq= vv0 * vv0 + vv1 * vv1 + vv2 * vv2 + vv3 * vv3; aux= 2.0 * vv0 / norm_vv_sq; aa0 = aux * vv0-1.0; aa1 = aux * vv1; aa2 = aux * vv2; aa3 = aux * vv3; /* aa is embedded in the SU(3) matrix (a) which can be multiplied on the link variable using the su3_type operator * . */ if(i==1) { a.c00 = aa0 + aa3 * I; a.c11 = conj(a.c00); a.c01 = aa2 + aa1 * I; a.c10 = -conj(a.c01); } else if(i==2) { a.c00 = aa0 + aa3 * I; a.c22 = conj(a.c00); a.c02 = aa2 + aa1 * I; a.c20 = -conj(a.c02); } else { a.c11 = aa0 + aa3 * I; a.c22 = conj(a.c11); a.c12 = aa2 + aa1 * I; a.c21 = -conj(a.c12); } _su3_times_su3(w,a,*z); *z=w; }