/* so mean = 0, sd = 1 */ void rnormal(double * r, const int n) { double u[2], s, l; int i; /* basic form, but trig. functions needed */ /* for(i = 0; i < n; i+=2) { */ /* ranlxd(u, 2); */ /* l = sqrt(-2*log(u[0])); */ /* r[i] = l*cos(2*M_PI*u[1]); */ /* r[i+1] = l*sin(2*M_PI*u[1]); */ /* printf("%f\n", r[i]); */ /* printf("%f\n", r[i+1]); */ /* } */ /* return; */ /* polar form, no trig. functions, but more random numbers */ /* which one is faster? */ for(i = 0; i < n; i += 2) { ranlxd(u, 2); u[0] = 2.*u[0] - 1.; u[1] = 2.*u[1] - 1.; s = u[0]*u[0]+u[1]*u[1]; while(s == 0. || s > 1.) { ranlxd(u, 2); u[0] = 2.*u[0] - 1.; u[1] = 2.*u[1] - 1.; s = u[0]*u[0]+u[1]*u[1]; } l = sqrt(-2.*log(s)/s); r[i] = u[0]*l; r[i+1] = u[1]*l; } return; }
double singlePi (double * x, double * y){ double integral = 0; ranlxd(y,N); ranlxd( x, N); int j =0; for (j = 0; j < N; j++){ if ( x[j]*x[j] + y[j]*y[j] < 1 ){ integral += 1; } } integral /= N; return ( 4*integral); }
int accept(const double exphdiff) { int acc=0, i; double r[1]; // the acceptance step if(exphdiff>=1) { acc = 1; R += 1; } else { ranlxd(r,1); if(r[0]<exphdiff) { acc = 1; R += 1; } else { // get the old values for phi, cause the configuration was not accepted for (i=0; i<GRIDPOINTS; i++) { gauge1[i]=gauge1_old[i]; gauge2[i]=gauge2_old[i]; }; calculatelinkvars(); s_g = s_g_old; } } return acc; }
su3_vector unif_su3_vector(void) { int i; double v[6],norm,fact; su3_vector s; for (;;) { ranlxd(v,6); norm=0.0; for (i=0;i<6;i++){ v[i] *= 6.2831853071796; norm+=v[i]*v[i]; } norm=sqrt(norm); if (1.0!=(1.0+norm)) break; } fact=1.0/norm; s.c0.re=v[0]*fact; s.c0.im=v[1]*fact; s.c1.re=v[2]*fact; s.c1.im=v[3]*fact; s.c2.re=v[4]*fact; s.c2.im=v[5]*fact; return(s); }
void gauss_vector(double v[],int n) { int k; double r[2]; /* float r[4]; */ /* double pi; */ double x1,x2,rho,y1,y2; /* pi=4.0*atan(1.0); */ for (k=0;;k+=2) { ranlxd(r,2); x1=r[0]; x2=r[1]; rho=-log(1.0-x1); rho=sqrt(rho); /* x2*=2.0*pi; */ x2*=6.2831853071796; y1=rho*sin(x2); y2=rho*cos(x2); if (n>k) v[k]=y1; if (n>(k+1)) v[k+1]=y2; if (n<=(k+2)) return; } }
void test() { int i; complex double detr; double rA[2]; //det2 = hard_inverse(Minv1); for(i = 0; i < 60; i++) { //At[i] += 1.8; ranlxd(rA, 2); At[i] += rA[0]; //Ax[i] += rA[0]; //Ay[i] += rA[1]; calculatelinkvars(); //det1 = det2; //det2 = hard_inverse(Minv2); detr = det_ratio_t(i, Minv1); update_inverse(i, Minv1); printf("%.12f+ I*%.12f, %.12f\n", creal(detr), cimag(detr), cabs(detr)); //printf("%.12f+ I*%.12f, %.12f\n\n", creal(det2/det1), cimag(det2/det1), cabs(det2/det1)); //printf("%g\n", matrix_diff(Minv1, Minv2)); } }
/* Function provides a spinor field of length V with Gaussian distribution */ void random_spinor_field_lexic(spinor * const k) { int x, y, z, t, X, Y, Z, tt, id=0; #ifdef MPI int rlxd_state[105]; #endif int coords[4]; spinor *s; double v[24]; #ifdef MPI if(g_proc_id == 0) { rlxd_get(rlxd_state); } MPI_Bcast(rlxd_state, 105, MPI_INT, 0, MPI_COMM_WORLD); if(g_proc_id != 0) { rlxd_reset(rlxd_state); } #endif for(t = 0; t < g_nproc_t*T; t++) { tt = t - g_proc_coords[0]*T; coords[0] = t / T; for(x = 0; x < g_nproc_x*LX; x++) { X = x - g_proc_coords[1]*LX; coords[1] = x / LX; for(y = 0; y < g_nproc_y*LY; y++) { Y = y - g_proc_coords[2]*LY; coords[2] = y / LY; for(z = 0; z < g_nproc_z*LZ; z++) { Z = z - g_proc_coords[3]*LZ; coords[3] = z / LZ; #ifdef MPI MPI_Cart_rank(g_cart_grid, coords, &id); #endif if(g_cart_id == id) { gauss_vector(v, 24); s = k + g_ipt[tt][X][Y][Z]; memcpy(s, v, 24*sizeof(double)); } else { ranlxd(v,24); } } } } } return; }
/* Function provides a zero spinor field of length N with */ void z2_random_spinor_field(spinor * const k, const int N) { int ix; spinor *s; double r[24]; double z2noise[24]; int rv=0; double x1,x2; s = k; for (ix = 0;ix < N; ix++) { ranlxd(r,24); for (rv = 0 ; rv < 24; rv++){ if(r[rv] < 0.5) z2noise[rv]=1/sqrt(2); else z2noise[rv]=-1/sqrt(2); } (*s).s0.c0.re=z2noise[0]; (*s).s0.c0.im=z2noise[1]; (*s).s0.c1.re=z2noise[2]; (*s).s0.c1.im=z2noise[3]; (*s).s0.c2.re=z2noise[4]; (*s).s0.c2.im=z2noise[5]; (*s).s1.c0.re=z2noise[6]; (*s).s1.c0.im=z2noise[7]; (*s).s1.c1.re=z2noise[8]; (*s).s1.c1.im=z2noise[9]; (*s).s1.c2.re=z2noise[10]; (*s).s1.c2.im=z2noise[11]; (*s).s2.c0.re=z2noise[12]; (*s).s2.c0.im=z2noise[13]; (*s).s2.c1.re=z2noise[14]; (*s).s2.c1.im=z2noise[15]; (*s).s2.c2.re=z2noise[16]; (*s).s2.c2.im=z2noise[17]; (*s).s3.c0.re=z2noise[18]; (*s).s3.c0.im=z2noise[19]; (*s).s3.c1.re=z2noise[20]; (*s).s3.c1.im=z2noise[21]; (*s).s3.c2.re=z2noise[22]; (*s).s3.c2.im=z2noise[23]; s++; } return; }
/* Pigreco "hit or missed" */ double Pi_trivial (int N) { int i; double *v; int S = 0; double rho; v = malloc(2*sizeof(double)); for(i=0; i<N; i++) { ranlxd(v,2); rho = v[0]*v[0] + v[1]*v[1]; if(rho < 1) S++; } free(v); return 4.0*(double)S/(double)N; }
/* Pigreco con il metodo di Buffon */ double Pi_buffon (int N, double L, double pigreco) { int i; double *u; double theta; double S = 0; u = malloc(2*sizeof(double)); for(i=0; i<N; i++) { ranlxd(u,2); theta = u[0]*pigreco; if((L*sin(theta)/2.0 > u[1])||(u[1] > 1-L*sin(theta)/2.0)) S += 1.0/(double)N; } free(u); return 2*L/S;; }
extern void random_Z4(int vol, spinor_dble *pk) { int i; double r[24], *s, norm, neg, pos; spinor_dble *rpk; norm = sqrt(L1 * NPROC1 * L2 * NPROC2 * L3 * NPROC3); neg = -1.0 / sqrt(2) / norm; pos = 1.0 / sqrt(2) / norm; for (rpk = pk; rpk < (pk + vol); rpk++) { ranlxd(r, 24); s = (double*) (rpk); for (i = 0; i < 24; i++) { if (r[i] < 0.5) s[i] = neg; else s[i] = pos; } } }
void rootexpo (float vf[], int n) { int k; float temp_exp[1]; float temp_gauss[1]; double x1, x2, y; double choice[1]; for(k=0; k<n; k++) { ranlxd(choice, 1); gauss(temp_gauss,1); expo(temp_exp,1); x1 = (double)temp_gauss[0]; x2 = (double)temp_exp[0]; y = pow(x1,2) + x2; vf[k] = (float)y; } }
/* Algoritmo di Metropolis */ double HOmetropolis (double* state, int state_dim) { int i; double u[2]; double x_new, temp1, temp2; double DS = 0; for(i=0; i<state_dim; i++) { ranlxd(u,2); x_new = state[i] + DELTA*(2*u[1] - 1); temp1 = deltaS(state,state_dim,x_new,i); temp2 = exp(-temp1); /* scelta nuova configurazione */ if(temp2 >= u[0]) { state[i] = x_new; DS += temp1; } } return DS; }
rtn_int_var campionamentoImportanza ( double a, double b , int N ,double (*f) (double)){ int nRandom=1000; int j=0; double *rd; int i = 0; double integral_true = partition(a,b,Nbintrap,1,f); rtn_int_var rtn ; double tmp = 0.0; rd = malloc(nRandom*sizeof(double)); init_int_var(rtn); rtn.Npnt = N; /* * 1 -> Flat * 2 -> Gauss * 3 -> sqrt(x) e^(-x) */ /* Il doppio ciclo serve a non usare troppa memoria. In questo modo, infatti, * il vettore di numeri random ha lunghezza 100, qualsiasi sia il numero * dei punti utilizzati dal montecarlo. * ranlxd viene chiamata con un numero maggiore di 32, in quanto consigliato dalle librerie. * Ho scelto il valore 50 per nRandom per non avere problemi con la divisione intera * visto che i punti utlilizzati dal monte carlo sono multipli di 50. */ for(j=0;j< N/nRandom;j++){ ranlxd(rd,nRandom); for (i = 0; i< nRandom ; i++){ rd[i] = a + (b-a)*rd[i]; tmp = ( f( rd[i] ) / flatPdf( a,b ,1 )); rtn.var_flat += (integral_true - tmp)*(integral_true-tmp)/((double)(N-1)*(double)N); rtn.int_flat += tmp/(double)N; } } /* * GAUSSSSSS */ for(j=0;j<N/nRandom;j++){ gauss_dble(rd,nRandom); for ( i = 0; i< nRandom; i++){ tmp = ( f( rd[i] ) / gaussPdf( 0.5,0,rd[i])); rtn.int_gauss += tmp ; rtn.var_gauss += (integral_true - tmp)*(integral_true-tmp)/((double )N*(double)(N-1)); } } rtn.int_gauss /= (double) N ; /* * *ROOT */ for(j=0;j<N/nRandom;j++){ root_exp_dble(rd,nRandom); for ( i = 0; i< nRandom ; i++){ tmp= f( rd[i] ) / root_exp_pdf(rd[i]); rtn.var_root +=(integral_true - tmp)*(integral_true-tmp)/((double )N* (double)(N-1)); rtn.int_root += tmp /(double) N; } } /* Viene raddoppiato perchè la pdf estrae solo tra 0 e +inf. * Utilizzabile solo per funzioni pari */ rtn.int_root*=2; free(rd); return (rtn); }
/* cluster update for odd time-slices */ void clustodd(){ extern void measureflux(int); int i,p,d,m,imf6,imf7; int im,imf0,imf1,imf2,imf3,imf4,imf5,fwd,bwd; int r1,r2,r3,l1,l2,l3,u1,u2,u3,d1,d2,d3; int A,B,C,D,aux; int cflag[VOL]; int bondflag; int chargeflag; int sx,sy,val; double ran[1]; /* note that the variables ising[VOL2+i] with i in [0,VOL2-1] are the */ /* ones that carry the flag for the reference configurations */ /* reference configuration flags are 2 */ /* if there is no ref config, then ising[i] is set to zero */ for(i=VOL2;i<VOL;i++){ if((ising[neigh[2][i]]==ising[neigh[3][i]])&& (ising[neigh[4][i]]==ising[neigh[1][i]])&& (ising[neigh[1][i]]!=ising[neigh[2][i]])) ising[i]=2; else ising[i]=0; } /* mark spins on odd time slices for growing clusters */ /* spins on even-time slices are marked as 0; so that it will never join to a cluster */ for(p=0;p<VOL2;p++) { if((itc[p]%2)==0) cflag[p]=0; if((itc[p]%2)==1) cflag[p]=1; } /* serve as flags for joining spins on the same time-slice */ for(p=VOL2;p<VOL;p++) cflag[p]=1; /* grow clusters on the odd time slices */ for(p=VOL2;p<VOL;p++){ /* first check if the tracking is done on even slice */ if(itc[p]%2==1) continue; /* check for any inconsistency: if any of the flag variables is -1 */ /* then the cluster building is flawed */ if((ising[p]==-1) ||(ising[p]==1)) printf("Wrong cluster grown\n"); /* skip if the site already belongs to a cluster */ if(cflag[neigh[0][p]]==0) continue; /* initialize the charge-flag */ chargeflag=0; /* otherwise, start building a new cluster */ m=0; i=0; list[i]=neigh[0][p]; cflag[neigh[0][p]]=0; nclusodd++; do{ im=list[m]; /* m is the new or the starting site */ if(chptr[im]==1) chargeflag=1; /* mark the charge-carrying cluster */ /* first check the spin on time-slice t+1 wants to bind */ /* remember that you are on odd time-slice t-1*/ imf0=neigh[5][im]; imf1=neigh[5][imf0]; if(cflag[imf0]==1) { bondflag=0; if((ising[imf0]==2)&&(ising[im]==ising[imf1])) { ranlxd(ran,1); if(ran[0] < p1) bondflag=1; } else if(ising[imf0]==0) bondflag=1; if((bondflag)&&(cflag[imf1]==1)){ i++; list[i]=imf1; /* increase list*/ if(chptr[imf1]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[imf1]=0; /* unmark spins belonging to cluster */ } cflag[imf0]=0; /* unmark the interaction */ } /* ============================================== */ /* Also check if the spin in time-slice t-3 wants to bind */ imf6=neigh[0][im]; imf7=neigh[0][imf6]; if(cflag[imf6]==1){ bondflag=0; if((ising[imf6]==2)&&(ising[im]==ising[imf7])) { ranlxd(ran,1); if(ran[0] < p1) bondflag=1; } else if(ising[imf6]==0) bondflag=1; if((bondflag)&&(cflag[imf7]==1)){ i++; list[i]=imf7; /* increase list*/ if(chptr[imf7]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[imf7]=0; /* unmark spins belonging to cluster */ } cflag[imf6]=0; /* unmark the interaction */ } /* ============================================== */ /* Next check if other spins in the time-slice t-1 want to bind */ /* To see if the spins to the right-side of neigh[0] want to bind */ imf2=neigh[1][im]; if(cflag[imf2]==1){ fwd=neigh[0][imf2]; bwd=neigh[5][imf2]; bondflag=0; if(ising[fwd]!=ising[bwd]) bondflag=1; else { if(ising[imf2]==2) { ranlxd(ran,1); if(ran[0] < p2) bondflag=1;}} r1=neigh[1][imf2]; /* x r2 x */ r2=neigh[2][imf2]; /* im imf2 r1 */ r3=neigh[4][imf2]; /* x r3 x */ if((bondflag)&&(cflag[r1]==1)){ i++; list[i]=r1; /* increase list*/ if(chptr[r1]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[r1]=0; /* unmark spins belonging to cluster */ } if((bondflag)&&(cflag[r2]==1)){ i++; list[i]=r2; /* increase list*/ if(chptr[r2]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[r2]=0; /* unmark spins belonging to cluster */ } if((bondflag)&&(cflag[r3]==1)){ i++; list[i]=r3; /* increase list*/ if(chptr[r3]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[r3]=0; /* unmark spins belonging to cluster */ } cflag[imf2]=0; /* unmark the interaction */ } /* ============================================== */ /* To see if the spins to the left-side of neigh[0] want to bind */ imf3=neigh[3][im]; if(cflag[imf3]==1){ fwd=neigh[0][imf3]; bwd=neigh[5][imf3]; bondflag=0; if(ising[fwd]!=ising[bwd]) bondflag=1; else { if(ising[imf3]==2) { ranlxd(ran,1); if(ran[0] < p2) bondflag=1;}} l1=neigh[2][imf3]; /* x l1 x */ l2=neigh[3][imf3]; /* l2 imf3 im */ l3=neigh[4][imf3]; /* x l3 x */ if((bondflag)&&(cflag[l1]==1)){ i++; list[i]=l1; /* increase list*/ if(chptr[l1]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[l1]=0; /* unmark spins belonging to cluster */ } if((bondflag)&&(cflag[l2]==1)){ i++; list[i]=l2; /* increase list*/ if(chptr[l2]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[l2]=0; /* unmark spins belonging to cluster */ } if((bondflag)&&(cflag[l3]==1)){ i++; list[i]=l3; /* increase list*/ if(chptr[l3]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[l3]=0; /* unmark spins belonging to cluster */ } cflag[imf3]=0; /* unmark the interaction */ } /* ============================================== */ /* To see if the spins to the top of neigh[0] want to bind */ imf4=neigh[2][im]; if(cflag[imf4]==1){ fwd=neigh[0][imf4]; bwd=neigh[5][imf4]; bondflag=0; if(ising[fwd]!=ising[bwd]) bondflag=1; else { if(ising[imf4]==2) { ranlxd(ran,1); if(ran[0] < p2) bondflag=1;}} u1=neigh[1][imf4]; /* x u2 x */ u2=neigh[2][imf4]; /* u3 imf4 u1 */ u3=neigh[3][imf4]; /* x im x */ if((bondflag)&&(cflag[u1]==1)){ i++; list[i]=u1; /* increase list*/ if(chptr[u1]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[u1]=0; /* unmark spins belonging to cluster */ } if((bondflag)&&(cflag[u2]==1)){ i++; list[i]=u2; /* increase list*/ if(chptr[u2]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[u2]=0; /* unmark spins belonging to cluster */ } if((bondflag)&&(cflag[u3]==1)){ i++; list[i]=u3; /* increase list*/ if(chptr[u3]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[u3]=0; /* unmark spins belonging to cluster */ } cflag[imf4]=0; /* unmark the interaction */ } /* ============================================== */ /* To see if the spins to the down of neigh[0] want to bind */ imf5=neigh[4][im]; if(cflag[imf5]==1){ fwd=neigh[0][imf5]; bwd=neigh[5][imf5]; bondflag=0; if(ising[fwd]!=ising[bwd]) bondflag=1; else { if(ising[imf5]==2) { ranlxd(ran,1); if(ran[0] < p2) bondflag=1;}} d1=neigh[1][imf5]; /* x im x */ d2=neigh[3][imf5]; /* d2 imf5 d1 */ d3=neigh[4][imf5]; /* x d3 x */ if((bondflag)&&(cflag[d1]==1)){ i++; list[i]=d1; /* increase list*/ if(chptr[d1]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[d1]=0; /* unmark spins belonging to cluster */ } if((bondflag)&&(cflag[d2]==1)){ i++; list[i]=d2; /* increase list*/ if(chptr[d2]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[d2]=0; /* unmark spins belonging to cluster */ } if((bondflag)&&(cflag[d3]==1)){ i++; list[i]=d3; /* increase list*/ if(chptr[d3]==1) chargeflag=1; /* mark the charge-carrying cluster */ cflag[d3]=0; /* unmark spins belonging to cluster */ } cflag[imf5]=0; /* unmark the interaction */ } /* ============================================== */ /* Implement the Gauss law if this is the last time slice */ if(itc[im]==(LT-1)){ /* This involves checking of 4-spins in the same time-slice */ /* if they want to get added in the cluster */ /* o-----x-----o 1-----A-----2 */ /* | | | | | | */ /* x-----o-----x B-----0-----C */ /* | | | | | | */ /* o-----x-----o 3-----D-----4 */ /* The central o decides to connect the diagonal o depending*/ /* the configuration at the crosses. For example, 0 will */ /* decide to connect with 1 depending on whether AB are in */ /* the reference configuration */ A=neigh[0][neigh[2][im]]; B=neigh[0][neigh[3][im]]; C=neigh[0][neigh[1][im]]; D=neigh[0][neigh[4][im]]; if(A > VOL2) printf("Error. Gauss Law counter\n"); if(B > VOL2) printf("Error. Gauss Law counter\n"); if(C > VOL2) printf("Error. Gauss Law counter\n"); if(D > VOL2) printf("Error. Gauss Law counter\n"); /* decide whether to join spin 1 to the cluster */ if(ising[A] != ising[B]) { aux=neigh[7][im]; if(cflag[aux]==1) { i++; list[i]=aux; /* increase list */ cflag[aux]=0; /* unmark spin belonging to cluster */ } /* check for the presence of the charge */ if((chptr[A]==1)&&(chptr[B]==1)&&(chptr[im]==1)&&(chptr[aux]==1)){ if(ising[im] != ising[aux]) printf("Error for charge\n"); } } /* decide whether to join spin 2 to the cluster */ if(ising[A] == ising[C]) { aux=neigh[6][im]; if(cflag[aux]==1) { i++; list[i]=aux; /* increase list */ cflag[aux]=0; /* unmark spin belonging to cluster */ } /* check for the presence of the charge */ if((chptr[A]==1)&&(chptr[C]==1)&&(chptr[im]==1)&&(chptr[aux]==1)){ if(ising[im] == ising[aux]) printf("Error for charge\n"); } } /* decide whether to join spin 3 to the cluster */ if(ising[B] == ising[D]) { aux=neigh[8][im]; if(cflag[aux]==1) { i++; list[i]=aux; /* increase list */ cflag[aux]=0; /* unmark spin belonging to cluster */ } /* check for the presence of the charge */ if((chptr[B]==1)&&(chptr[D]==1)&&(chptr[im]==1)&&(chptr[aux]==1)){ if(ising[im] == ising[aux]) printf("Error for charge\n"); } } /* decide whether to join spin 4 to the cluster */ if(ising[C] != ising[D]) { aux=neigh[9][im]; if(cflag[aux]==1) { i++; list[i]=aux; /* increase list */ cflag[aux]=0; /* unmark spin belonging to cluster */ } /* check for the presence of the charge */ if((chptr[C]==1)&&(chptr[D]==1)&&(chptr[im]==1)&&(chptr[aux]==1)){ if(ising[im] != ising[aux]) printf("Error for charge\n"); } } } m++; } while(m<=i); /* check if the list only contains genuine spins */ for(d=0;d<=i;d++) if(list[d]>=VOL2) printf("Cluster grown in Flag.\n"); /* check if the cluster touches the charge, and calculate the profile */ //if(thermflag==0){ //if(chargeflag==1) measureflux(i+1);} /* decide orientation wrt to the reference config */ sx=ixc[list[0]]; sy=iyc[list[0]]; val=(sx-sy)%4; if((val==-1)||(val==3)) refB=-1; else refB=1; if(ising[list[0]]==refB) refB=1; else refB=-1; mB = mB + refB*(i+1); /* size */ nclusodsq += (i+1)*(i+1); /* flip the cluster with a 50% probability */ ranlxd(ran,1); if(ran[0]<0.5){ for(d=0;d<=i;d++) ising[list[d]] = -ising[list[d]]; } } }
int main(void) { int k,test1,test2; int *state1,*state2; float sbase; float xs[NXS],ys[NXS],xsn[96]; double base; double xd[NXD],yd[NXD],xdn[48]; sbase=(float)(ldexp(1.0,24)); base=ldexp(1.0,48); state1=malloc(rlxs_size()*sizeof(int)); state2=malloc(rlxd_size()*sizeof(int)); rlxs_init(0,32767); rlxd_init(1,32767); /******************************************************************************* * * Check that the correct sequences of random numbers are obtained * *******************************************************************************/ for (k=0;k<20;k++) { ranlxs(xs,NXS); ranlxd(xd,NXD); } xsn[0]=13257445.0f; xsn[1]=15738482.0f; xsn[2]=5448599.0f; xsn[3]=9610459.0f; xsn[4]=1046025.0f; xsn[5]=2811360.0f; xsn[6]=14923726.0f; xsn[7]=2287739.0f; xsn[8]=16133204.0f; xsn[9]=16328320.0f; xsn[10]=12980218.0f; xsn[11]=9256959.0f; xsn[12]=5633754.0f; xsn[13]=7422961.0f; xsn[14]=6032411.0f; xsn[15]=14970828.0f; xsn[16]=10717272.0f; xsn[17]=2520878.0f; xsn[18]=8906135.0f; xsn[19]=8507426.0f; xsn[20]=11925022.0f; xsn[21]=12042827.0f; xsn[22]=12263021.0f; xsn[23]=4828801.0f; xsn[24]=5300508.0f; xsn[25]=13346776.0f; xsn[26]=10869790.0f; xsn[27]=8520207.0f; xsn[28]=11213953.0f; xsn[29]=14439320.0f; xsn[30]=5716476.0f; xsn[31]=13600448.0f; xsn[32]=12545579.0f; xsn[33]=3466523.0f; xsn[34]=113906.0f; xsn[35]=10407879.0f; xsn[36]=12058596.0f; xsn[37]=4390921.0f; xsn[38]=1634350.0f; xsn[39]=9823280.0f; xsn[40]=12569690.0f; xsn[41]=8267856.0f; xsn[42]=5869501.0f; xsn[43]=7210219.0f; xsn[44]=1362361.0f; xsn[45]=2956909.0f; xsn[46]=504465.0f; xsn[47]=6664636.0f; xsn[48]=6048963.0f; xsn[49]=1098525.0f; xsn[50]=1261330.0f; xsn[51]=2401071.0f; xsn[52]=8087317.0f; xsn[53]=1293933.0f; xsn[54]=555494.0f; xsn[55]=14872475.0f; xsn[56]=11261534.0f; xsn[57]=166813.0f; xsn[58]=13424516.0f; xsn[59]=15280818.0f; xsn[60]=4644497.0f; xsn[61]=6333595.0f; xsn[62]=10012569.0f; xsn[63]=6878028.0f; xsn[64]=9176136.0f; xsn[65]=8379433.0f; xsn[66]=11073957.0f; xsn[67]=2465529.0f; xsn[68]=13633550.0f; xsn[69]=12721649.0f; xsn[70]=569725.0f; xsn[71]=6375015.0f; xsn[72]=2164250.0f; xsn[73]=6725885.0f; xsn[74]=7223108.0f; xsn[75]=4890858.0f; xsn[76]=11298261.0f; xsn[77]=12086020.0f; xsn[78]=4447706.0f; xsn[79]=1164782.0f; xsn[80]=1904399.0f; xsn[81]=16669839.0f; xsn[82]=2586766.0f; xsn[83]=3605708.0f; xsn[84]=15761082.0f; xsn[85]=14937769.0f; xsn[86]=13965017.0f; xsn[87]=2175021.0f; xsn[88]=16668997.0f; xsn[89]=13996602.0f; xsn[90]=6313099.0f; xsn[91]=15646036.0f; xsn[92]=9746447.0f; xsn[93]=9596781.0f; xsn[94]=9244169.0f; xsn[95]=4731726.0f; xdn[0]=135665102723086.0; xdn[1]=259840970195871.0; xdn[2]=110726726657103.0; xdn[3]=53972500363809.0; xdn[4]=199301297412157.0; xdn[5]=63744794353870.0; xdn[6]=178745978725904.0; xdn[7]=243549380863176.0; xdn[8]=244796821836177.0; xdn[9]=223788809121855.0; xdn[10]=113720856430443.0; xdn[11]=124607822268499.0; xdn[12]=25705458431399.0; xdn[13]=155476863764950.0; xdn[14]=195602097736933.0; xdn[15]=183038707238950.0; xdn[16]=62268883953527.0; xdn[17]=157047615112119.0; xdn[18]=58134973897037.0; xdn[19]=26908869337679.0; xdn[20]=259927185454290.0; xdn[21]=130534606773507.0; xdn[22]=205295065526788.0; xdn[23]=40201323262686.0; xdn[24]=193822255723177.0; xdn[25]=239720285097881.0; xdn[26]=54433631586673.0; xdn[27]=31313178820772.0; xdn[28]=152904879618865.0; xdn[29]=256187025780734.0; xdn[30]=110292144635528.0; xdn[31]=26555117184469.0; xdn[32]=228913371644996.0; xdn[33]=126837665590799.0; xdn[34]=141069100232139.0; xdn[35]=96171028602910.0; xdn[36]=259271018918511.0; xdn[37]=65257892816619.0; xdn[38]=14254344610711.0; xdn[39]=137794868158301.0; xdn[40]=269703238916504.0; xdn[41]=35782602710520.0; xdn[42]=51447305327263.0; xdn[43]=247852246697199.0; xdn[44]=65072958134912.0; xdn[45]=273325640150591.0; xdn[46]=2768714666444.0; xdn[47]=173907458721736.0; test1=0; test2=0; for (k=0;k<96;k++) { if (xsn[k]!=(xs[k+60]*sbase)) test1=1; } for (k=0;k<48;k++) { if (xdn[k]!=(xd[k+39]*base)) test2=1; } if (test1==1) { printf("\n"); printf("Test failed: ranlxs gives incorrect results\n"); printf("=> do not use ranlxs on this machine\n"); printf("\n"); } if (test2==1) { printf("\n"); printf("Test failed: ranlxd gives incorrect results\n"); printf("=> do not use ranlxd on this machine\n"); printf("\n"); } /******************************************************************************* * * Check of the I/O routines * *******************************************************************************/ rlxs_get(state1); rlxd_get(state2); for (k=0;k<10;k++) { ranlxs(xs,NXS); ranlxd(xd,NXD); } rlxs_reset(state1); rlxd_reset(state2); for (k=0;k<10;k++) { ranlxs(ys,NXS); ranlxd(yd,NXD); } for (k=0;k<NXS;k++) { if (xs[k]!=ys[k]) test1=2; } for (k=0;k<NXD;k++) { if (xd[k]!=yd[k]) test2=2; } if (test1==2) { printf("\n"); printf("Test failed: I/O routines for ranlxs do not work properly\n"); printf("=> do not use ranlxs on this machine\n"); printf("\n"); } if (test2==2) { printf("\n"); printf("Test failed: I/O routines for ranlxd do not work properly\n"); printf("=> do not use ranlxd on this machine\n"); printf("\n"); } /******************************************************************************* * * Success messages * *******************************************************************************/ if ((test1==0)&&(test2==0)) { printf("\n"); printf("All tests passed\n"); printf("=> ranlxs and ranlxd work correctly on this machine\n"); printf("\n"); } else if (test1==0) { printf("\n"); printf("All tests on ranlxs passed\n"); printf("=> ranlxs works correctly on this machine\n"); printf("\n"); } else if (test2==0) { printf("\n"); printf("All tests on ranlxd passed\n"); printf("=> ranlxd works correctly on this machine\n"); printf("\n"); } exit(0); }
int update_tm(double *plaquette_energy, double *rectangle_energy, char * filename, const int return_check, const int acctest, const int traj_counter) { su3 *v, *w; static int ini_g_tmp = 0; int accept, i=0, j=0, iostatus=0; double yy[1]; double dh, expmdh, ret_dh=0., ret_gauge_diff=0., tmp; double atime=0., etime=0.; double ks = 0., kc = 0., ds, tr, ts, tt; char tmp_filename[50]; /* Energy corresponding to the Gauge part */ double new_plaquette_energy=0., new_rectangle_energy = 0.; /* Energy corresponding to the Momenta part */ double enep=0., enepx=0., ret_enep = 0.; /* Energy corresponding to the pseudo fermion part(s) */ FILE * datafile=NULL, * ret_check_file=NULL; hamiltonian_field_t hf; paramsXlfInfo *xlfInfo; hf.gaugefield = g_gauge_field; hf.momenta = moment; hf.derivative = df0; hf.update_gauge_copy = g_update_gauge_copy; hf.update_gauge_energy = g_update_gauge_energy; hf.update_rectangle_energy = g_update_rectangle_energy; hf.traj_counter = traj_counter; integrator_set_fields(&hf); strcpy(tmp_filename, ".conf.tmp"); if(ini_g_tmp == 0) { ini_g_tmp = init_gauge_tmp(VOLUME); if(ini_g_tmp != 0) { exit(-1); } ini_g_tmp = 1; } atime = gettime(); /* * here the momentum and spinor fields are initialized * and their respective actions are calculated */ /* * copy the gauge field to gauge_tmp */ #ifdef OMP #pragma omp parallel for private(w,v) #endif for(int ix=0;ix<VOLUME;ix++) { for(int mu=0;mu<4;mu++) { v=&hf.gaugefield[ix][mu]; w=&gauge_tmp[ix][mu]; _su3_assign(*w,*v); } } /* heatbath for all monomials */ for(i = 0; i < Integrator.no_timescales; i++) { for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) { monomial_list[ Integrator.mnls_per_ts[i][j] ].hbfunction(Integrator.mnls_per_ts[i][j], &hf); } } if(Integrator.monitor_forces) monitor_forces(&hf); /* initialize the momenta */ enep = random_su3adj_field(reproduce_randomnumber_flag, hf.momenta); g_sloppy_precision = 1; /* run the trajectory */ if(Integrator.n_int[Integrator.no_timescales-1] > 0) { Integrator.integrate[Integrator.no_timescales-1](Integrator.tau, Integrator.no_timescales-1, 1); } g_sloppy_precision = 0; /* compute the final energy contributions for all monomials */ dh = 0.; for(i = 0; i < Integrator.no_timescales; i++) { for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) { dh += monomial_list[ Integrator.mnls_per_ts[i][j] ].accfunction(Integrator.mnls_per_ts[i][j], &hf); } } enepx = moment_energy(hf.momenta); if (!bc_flag) { /* if PBC */ new_plaquette_energy = measure_gauge_action( (const su3**) hf.gaugefield); if(g_rgi_C1 > 0. || g_rgi_C1 < 0.) { new_rectangle_energy = measure_rectangles( (const su3**) hf.gaugefield); } } if(g_proc_id == 0 && g_debug_level > 3) printf("called moment_energy: dh = %1.10e\n", (enepx - enep)); /* Compute the energy difference */ dh = dh + (enepx - enep); if(g_proc_id == 0 && g_debug_level > 3) { printf("called momenta_acc dH = %e\n", (enepx - enep)); } expmdh = exp(-dh); /* the random number is only taken at node zero and then distributed to the other sites */ ranlxd(yy,1); if(g_proc_id==0) { #ifdef MPI for(i = 1; i < g_nproc; i++) { MPI_Send(&yy[0], 1, MPI_DOUBLE, i, 31, MPI_COMM_WORLD); } #endif } #ifdef MPI else{ MPI_Recv(&yy[0], 1, MPI_DOUBLE, 0, 31, MPI_COMM_WORLD, &status); } #endif accept = (!acctest | (expmdh > yy[0])); if(g_proc_id == 0) { fprintf(stdout, "# Trajectory is %saccepted.\n", (accept ? "" : "not ")); } /* Here a reversibility test is performed */ /* The trajectory is integrated back */ if(return_check) { if(g_proc_id == 0) { fprintf(stdout, "# Performing reversibility check.\n"); } if(accept) { /* save gauge file to disk before performing reversibility check */ xlfInfo = construct_paramsXlfInfo((*plaquette_energy)/(6.*VOLUME*g_nproc), -1); // Should write this to temporary file first, and then check if(g_proc_id == 0 && g_debug_level > 0) { fprintf(stdout, "# Writing gauge field to file %s.\n", tmp_filename); } if((iostatus = write_gauge_field( tmp_filename, 64, xlfInfo) != 0 )) { /* Writing failed directly */ fprintf(stderr, "Error %d while writing gauge field to %s\nAborting...\n", iostatus, tmp_filename); exit(-2); } /* There is double writing of the gauge field, also in hmc_tm.c in this case */ /* No reading back check needed here, as reading back is done further down */ if(g_proc_id == 0 && g_debug_level > 0) { fprintf(stdout, "# Writing done.\n"); } free(xlfInfo); } g_sloppy_precision = 1; /* run the trajectory back */ Integrator.integrate[Integrator.no_timescales-1](-Integrator.tau, Integrator.no_timescales-1, 1); g_sloppy_precision = 0; /* compute the energy contributions from the pseudo-fermions */ ret_dh = 0.; for(i = 0; i < Integrator.no_timescales; i++) { for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) { ret_dh += monomial_list[ Integrator.mnls_per_ts[i][j] ].accfunction(Integrator.mnls_per_ts[i][j], &hf); } } ret_enep = moment_energy(hf.momenta); /* Compute the energy difference */ ret_dh += ret_enep - enep ; /* Compute Differences in the fields */ ks = 0.; kc = 0.; #ifdef OMP #pragma omp parallel private(w,v,tt,tr,ts,ds,ks,kc) { int thread_num = omp_get_thread_num(); #endif su3 ALIGN v0; #ifdef OMP #pragma omp for #endif for(int ix = 0; ix < VOLUME; ++ix) { for(int mu = 0; mu < 4; ++mu) { v=&hf.gaugefield[ix][mu]; w=&gauge_tmp[ix][mu]; _su3_minus_su3(v0, *v, *w); _su3_square_norm(ds, v0); tr = sqrt(ds) + kc; ts = tr + ks; tt = ts-ks; ks = ts; kc = tr-tt; } } kc=ks+kc; #ifdef OMP g_omp_acc_re[thread_num] = kc; } /* OpenMP parallel section closing brace */ /* sum up contributions from thread-local kahan summations */ for(int k = 0; k < omp_num_threads; ++k) ret_gauge_diff += g_omp_acc_re[k]; #else ret_gauge_diff = kc; #endif #ifdef MPI tmp = ret_gauge_diff; MPI_Reduce(&tmp, &ret_gauge_diff, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); #endif /* compute the total H */ tmp = enep; for(i = 0; i < Integrator.no_timescales; i++) { for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) { tmp += monomial_list[ Integrator.mnls_per_ts[i][j] ].energy0; } } /* Output */ if(g_proc_id == 0) { ret_check_file = fopen("return_check.data","a"); fprintf(ret_check_file,"ddh = %1.4e ddU= %1.4e ddh/H = %1.4e\n", ret_dh, ret_gauge_diff/4./((double)(VOLUME*g_nproc))/3., ret_dh/tmp); fclose(ret_check_file); } if(accept) { /* Read back gauge field */ if(g_proc_id == 0 && g_debug_level > 0) { fprintf(stdout, "# Trying to read gauge field from file %s.\n", tmp_filename); } if((iostatus = read_gauge_field(tmp_filename) != 0)) { fprintf(stderr, "Error %d while reading gauge field from %s\nAborting...\n", iostatus, tmp_filename); exit(-2); } if(g_proc_id == 0 && g_debug_level > 0) { fprintf(stdout, "# Reading done.\n"); } } if(g_proc_id == 0) { fprintf(stdout, "# Reversibility check done.\n"); } } /* end of reversibility check */ if(accept) { *plaquette_energy = new_plaquette_energy; *rectangle_energy = new_rectangle_energy; /* put the links back to SU(3) group */ if (!bc_flag) { /* periodic boundary conditions */ #ifdef OMP #pragma omp parallel for private(v) #endif for(int ix=0;ix<VOLUME;ix++) { for(int mu=0;mu<4;mu++) { v=&hf.gaugefield[ix][mu]; restoresu3_in_place(v); } } } } else { /* reject: copy gauge_tmp to hf.gaugefield */ #ifdef OMP #pragma omp parallel for private(w) private(v) #endif for(int ix=0;ix<VOLUME;ix++) { for(int mu=0;mu<4;mu++){ v=&hf.gaugefield[ix][mu]; w=&gauge_tmp[ix][mu]; _su3_assign(*v,*w); } } } hf.update_gauge_copy = 1; g_update_gauge_copy = 1; hf.update_gauge_energy = 1; g_update_gauge_energy = 1; hf.update_rectangle_energy = 1; g_update_rectangle_energy = 1; #ifdef MPI xchange_gauge(hf.gaugefield); #endif etime=gettime(); /* printing data in the .data file */ if(g_proc_id==0) { datafile = fopen(filename, "a"); if (!bc_flag) { /* if Periodic Boundary Conditions */ fprintf(datafile, "%.8d %14.12f %14.12f %e ", traj_counter, (*plaquette_energy)/(6.*VOLUME*g_nproc), dh, expmdh); } for(i = 0; i < Integrator.no_timescales; i++) { for(j = 0; j < Integrator.no_mnls_per_ts[i]; j++) { if(monomial_list[ Integrator.mnls_per_ts[i][j] ].type != GAUGE && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != SFGAUGE && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != NDPOLY && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != NDCLOVER && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != CLOVERNDTRLOG && monomial_list[ Integrator.mnls_per_ts[i][j] ].type != CLOVERTRLOG ) { fprintf(datafile,"%d %d ", monomial_list[ Integrator.mnls_per_ts[i][j] ].iter0, monomial_list[ Integrator.mnls_per_ts[i][j] ].iter1); } } } fprintf(datafile, "%d %e", accept, etime-atime); if(g_rgi_C1 > 0. || g_rgi_C1 < 0) { fprintf(datafile, " %e", (*rectangle_energy)/(12*VOLUME*g_nproc)); } fprintf(datafile, "\n"); fflush(datafile); fclose(datafile); } return(accept); }
int main(int argc,char *argv[]) { FILE *parameterfile = NULL; char datafilename[206]; char parameterfilename[206]; char conf_filename[50]; char scalar_filename[50]; char * input_filename = NULL; char * filename = NULL; double plaquette_energy; #ifdef _USE_HALFSPINOR #undef _USE_HALFSPINOR printf("# WARNING: USE_HALFSPINOR will be ignored (not supported here).\n"); #endif if(even_odd_flag) { even_odd_flag=0; printf("# WARNING: even_odd_flag will be ignored (not supported here).\n"); } int j,j_max,k,k_max = 2; _Complex double * drvsc; #ifdef HAVE_LIBLEMON paramsXlfInfo *xlfInfo; #endif int status = 0; static double t1,t2,dt,sdt,dts,qdt,sqdt; double antioptaway=0.0; #ifdef MPI static double dt2; DUM_DERI = 6; DUM_SOLVER = DUM_DERI+2; DUM_MATRIX = DUM_SOLVER+6; NO_OF_SPINORFIELDS = DUM_MATRIX+2; #ifdef OMP int mpi_thread_provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &mpi_thread_provided); #else MPI_Init(&argc, &argv); #endif MPI_Comm_rank(MPI_COMM_WORLD, &g_proc_id); #else g_proc_id = 0; #endif g_rgi_C1 = 1.; process_args(argc,argv,&input_filename,&filename); set_default_filenames(&input_filename, &filename); /* Read the input file */ if( (j = read_input(input_filename)) != 0) { fprintf(stderr, "Could not find input file: %s\nAborting...\n", input_filename); exit(-1); } if(g_proc_id==0) { printf("parameter rho_BSM set to %f\n", rho_BSM); printf("parameter eta_BSM set to %f\n", eta_BSM); printf("parameter m0_BSM set to %f\n", m0_BSM); } #ifdef OMP init_openmp(); #endif tmlqcd_mpi_init(argc, argv); if(g_proc_id==0) { #ifdef SSE printf("# The code was compiled with SSE instructions\n"); #endif #ifdef SSE2 printf("# The code was compiled with SSE2 instructions\n"); #endif #ifdef SSE3 printf("# The code was compiled with SSE3 instructions\n"); #endif #ifdef P4 printf("# The code was compiled for Pentium4\n"); #endif #ifdef OPTERON printf("# The code was compiled for AMD Opteron\n"); #endif #ifdef _GAUGE_COPY printf("# The code was compiled with -D_GAUGE_COPY\n"); #endif #ifdef BGL printf("# The code was compiled for Blue Gene/L\n"); #endif #ifdef BGP printf("# The code was compiled for Blue Gene/P\n"); #endif #ifdef _USE_HALFSPINOR printf("# The code was compiled with -D_USE_HALFSPINOR\n"); #endif #ifdef _USE_SHMEM printf("# The code was compiled with -D_USE_SHMEM\n"); #ifdef _PERSISTENT printf("# The code was compiled for persistent MPI calls (halfspinor only)\n"); #endif #endif #ifdef MPI #ifdef _NON_BLOCKING printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n"); #endif #endif printf("\n"); fflush(stdout); } #ifdef _GAUGE_COPY init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1); #else init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0); #endif init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand); j = init_bispinor_field(VOLUMEPLUSRAND, 12); if ( j!= 0) { fprintf(stderr, "Not enough memory for bispinor fields! Aborting...\n"); exit(0); } j = init_spinor_field(VOLUMEPLUSRAND, 12); if ( j!= 0) { fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n"); exit(0); } int numbScalarFields = 4; j = init_scalar_field(VOLUMEPLUSRAND, numbScalarFields); if ( j!= 0) { fprintf(stderr, "Not enough memory for scalar fields! Aborting...\n"); exit(0); } drvsc = malloc(18*VOLUMEPLUSRAND*sizeof(_Complex double)); if(g_proc_id == 0) { fprintf(stdout,"# The number of processes is %d \n",g_nproc); printf("# The lattice size is %d x %d x %d x %d\n", (int)(T*g_nproc_t), (int)(LX*g_nproc_x), (int)(LY*g_nproc_y), (int)(g_nproc_z*LZ)); printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),(int) LZ); fflush(stdout); } /* define the geometry */ geometry(); j = init_bsm_2hop_lookup(VOLUME); if ( j!= 0) { // this should not be reached since the init function calls fatal_error anyway fprintf(stderr, "Not enough memory for BSM2b 2hop lookup table! Aborting...\n"); exit(0); } /* define the boundary conditions for the fermion fields */ /* for the actual inversion, this is done in invert.c as the operators are iterated through */ // // For the BSM operator we don't use kappa normalisation, // as a result, when twisted boundary conditions are applied this needs to be unity. // In addition, unlike in the Wilson case, the hopping term comes with a plus sign. // However, in boundary(), the minus sign for the Wilson case is implicitly included. // We therefore use -1.0 here. boundary(-1.0); status = check_geometry(); if (status != 0) { fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n"); exit(1); } #if (defined MPI && !(defined _USE_SHMEM)) // fails, we're not using spinor fields // check_xchange(); #endif start_ranlux(1, 123456); // read gauge field if( strcmp(gauge_input_filename, "create_random_gaugefield") == 0 ) { random_gauge_field(reproduce_randomnumber_flag, g_gauge_field); } else { sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore); if (g_cart_id == 0) { printf("#\n# Trying to read gauge field from file %s in %s precision.\n", conf_filename, (gauge_precision_read_flag == 32 ? "single" : "double")); fflush(stdout); } int i; if( (i = read_gauge_field(conf_filename,g_gauge_field)) !=0) { fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i, conf_filename); exit(-2); } if (g_cart_id == 0) { printf("# Finished reading gauge field.\n"); fflush(stdout); } } // read scalar field if( strcmp(scalar_input_filename, "create_random_scalarfield") == 0 ) { for( int s=0; s<numbScalarFields; s++ ) ranlxd(g_scalar_field[s], VOLUME); } else { sprintf(scalar_filename, "%s.%d", scalar_input_filename, nscalar); if (g_cart_id == 0) { printf("#\n# Trying to read scalar field from file %s in %s precision.\n", scalar_filename, (scalar_precision_read_flag == 32 ? "single" : "double")); fflush(stdout); } int i; if( (i = read_scalar_field(scalar_filename,g_scalar_field)) !=0) { fprintf(stderr, "Error %d while reading scalar field from %s\n Aborting...\n", i, scalar_filename); exit(-2); } if (g_cart_id == 0) { printf("# Finished reading scalar field.\n"); fflush(stdout); } } #ifdef MPI xchange_gauge(g_gauge_field); #endif /*compute the energy of the gauge field*/ plaquette_energy = measure_plaquette( (const su3**) g_gauge_field); if (g_cart_id == 0) { printf("# The computed plaquette value is %e.\n", plaquette_energy / (6.*VOLUME*g_nproc)); fflush(stdout); } #ifdef MPI for( int s=0; s<numbScalarFields; s++ ) generic_exchange(g_scalar_field[s], sizeof(scalar)); #endif /*initialize the bispinor fields*/ j_max=1; sdt=0.; // w random_spinor_field_lexic( (spinor*)(g_bispinor_field[4]), reproduce_randomnumber_flag, RN_GAUSS); random_spinor_field_lexic( (spinor*)(g_bispinor_field[4])+VOLUME, reproduce_randomnumber_flag, RN_GAUSS); // for the D^\dagger test: // v random_spinor_field_lexic( (spinor*)(g_bispinor_field[5]), reproduce_randomnumber_flag, RN_GAUSS); random_spinor_field_lexic( (spinor*)(g_bispinor_field[5])+VOLUME, reproduce_randomnumber_flag, RN_GAUSS); #if defined MPI generic_exchange(g_bispinor_field[4], sizeof(bispinor)); #endif // print L2-norm of source: double squarenorm = square_norm((spinor*)g_bispinor_field[4], 2*VOLUME, 1); if(g_proc_id==0) { printf("\n# square norm of the source: ||w||^2 = %e\n\n", squarenorm); fflush(stdout); } double t_MG, t_BK; /* inversion needs to be done first because it uses loads of the g_bispinor_fields internally */ #if TEST_INVERSION if(g_proc_id==1) printf("Testing inversion\n"); // Bartek's operator t1 = gettime(); cg_her_bi(g_bispinor_field[9], g_bispinor_field[4], 25000, 1.0e-14, 0, VOLUME, &Q2_psi_BSM2b); t_BK = gettime() - t1; // Marco's operator t1 = gettime(); cg_her_bi(g_bispinor_field[8], g_bispinor_field[4], 25000, 1.0e-14, 0, VOLUME, &Q2_psi_BSM2m); t_MG = gettime() - t1; if(g_proc_id==0) printf("Operator inversion time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); #endif /* now apply the operators to the same bispinor field and do various comparisons */ // Marco's operator #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t_MG = 0.0; t1 = gettime(); D_psi_BSM2m(g_bispinor_field[0], g_bispinor_field[4]); t1 = gettime() - t1; #ifdef MPI MPI_Allreduce (&t1, &t_MG, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else t_MG = t1; #endif // Bartek's operator #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); #endif t_BK = 0.0; t1 = gettime(); D_psi_BSM2b(g_bispinor_field[1], g_bispinor_field[4]); t1 = gettime() - t1; #ifdef MPI MPI_Allreduce (&t1, &t_BK, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); #else t_BK = t1; #endif if(g_proc_id==0) printf("Operator application time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); squarenorm = square_norm((spinor*)g_bispinor_field[0], 2*VOLUME, 1); if(g_proc_id==0) { printf("# || D_MG w ||^2 = %.16e\n", squarenorm); fflush(stdout); } squarenorm = square_norm((spinor*)g_bispinor_field[1], 2*VOLUME, 1); if(g_proc_id==0) { printf("# || D_BK w ||^2 = %.16e\n\n\n", squarenorm); fflush(stdout); } diff( (spinor*)g_bispinor_field[3], (spinor*)g_bispinor_field[0], (spinor*)g_bispinor_field[1], 2*VOLUME); printf("element-wise difference between (D_BK w) and (D_MG w)\n"); printf("( D_MG w - M_BK w )->sp_up.s0.c0= %.16e + I*(%.16e)\n\n", creal(g_bispinor_field[3][0].sp_up.s0.c0), cimag(g_bispinor_field[3][0].sp_up.s0.c0) ); double diffnorm = square_norm( (spinor*) g_bispinor_field[3], 2*VOLUME, 1 ); if(g_proc_id==0){ printf("Square norm of the difference\n"); printf("|| D_MG w - D_BK w ||^2 = %.16e \n\n\n", diffnorm); } // < D w, v > printf("Check consistency of D and D^dagger\n"); _Complex double prod1_MG = scalar_prod( (spinor*)g_bispinor_field[0], (spinor*)g_bispinor_field[5], 2*VOLUME, 1 ); if(g_proc_id==0) printf("< D_MG w, v > = %.16e + I*(%.16e)\n", creal(prod1_MG), cimag(prod1_MG)); _Complex double prod1_BK = scalar_prod( (spinor*)g_bispinor_field[1], (spinor*)g_bispinor_field[5], 2*VOLUME, 1 ); if(g_proc_id==0) printf("< D_BK w, v > = %.16e + I*(%.16e)\n\n", creal(prod1_BK), cimag(prod1_BK)); // < w, D^\dagger v > t_MG = gettime(); D_psi_dagger_BSM2m(g_bispinor_field[6], g_bispinor_field[5]); t_MG = gettime()-t_MG; t_BK = gettime(); D_psi_dagger_BSM2b(g_bispinor_field[7], g_bispinor_field[5]); t_BK = gettime() - t_BK; if(g_proc_id==0) printf("Operator dagger application time: t_MG = %f sec \t t_BK = %f sec\n\n", t_MG, t_BK); _Complex double prod2_MG = scalar_prod((spinor*)g_bispinor_field[4], (spinor*)g_bispinor_field[6], 2*VOLUME, 1); _Complex double prod2_BK = scalar_prod((spinor*)g_bispinor_field[4], (spinor*)g_bispinor_field[7], 2*VOLUME, 1); if( g_proc_id == 0 ){ printf("< w, D_MG^dagger v > = %.16e + I*(%.16e)\n", creal(prod2_MG), cimag(prod2_MG)); printf("< w, D_BK^dagger v > = %.16e + I*(%.16e)\n", creal(prod2_BK), cimag(prod2_BK)); printf("\n| < D_MG w, v > - < w, D_MG^dagger v > | = %.16e\n",cabs(prod2_MG-prod1_MG)); printf("| < D_BK w, v > - < w, D_BK^dagger v > | = %.16e\n\n",cabs(prod2_BK-prod1_BK)); } #if TEST_INVERSION // check result of inversion Q2_psi_BSM2m(g_bispinor_field[10], g_bispinor_field[8]); Q2_psi_BSM2b(g_bispinor_field[11], g_bispinor_field[8]); assign_diff_mul((spinor*)g_bispinor_field[10], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME); assign_diff_mul((spinor*)g_bispinor_field[11], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME); double squarenorm_MGMG = square_norm((spinor*)g_bispinor_field[10], 2*VOLUME, 1); double squarenorm_BKMG = square_norm((spinor*)g_bispinor_field[11], 2*VOLUME, 1); if(g_proc_id==0) { printf("# ||Q2_MG*(Q2_MG)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_MGMG); printf("# ||Q2_BK*(Q2_MG)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_BKMG); fflush(stdout); } Q2_psi_BSM2b(g_bispinor_field[10], g_bispinor_field[9]); Q2_psi_BSM2m(g_bispinor_field[11], g_bispinor_field[9]); assign_diff_mul((spinor*)g_bispinor_field[10], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME); assign_diff_mul((spinor*)g_bispinor_field[11], (spinor*)g_bispinor_field[4], 1.0, 2*VOLUME); double squarenorm_BKBK = square_norm((spinor*)g_bispinor_field[10], 2*VOLUME, 1); double squarenorm_MGBK = square_norm((spinor*)g_bispinor_field[11], 2*VOLUME, 1); if(g_proc_id==0) { printf("# ||Q2_BK*(Q2_BK)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_BKBK); printf("# ||Q2_MG*(Q2_BK)^-1*(b)-b||^2 = %.16e\n\n", squarenorm_MGBK); fflush(stdout); } #endif #ifdef OMP free_omp_accumulators(); #endif free_gauge_field(); free_geometry_indices(); free_bispinor_field(); free_scalar_field(); #ifdef MPI MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); #endif return(0); }
/* Florian Burger 4.11.2009 */ void source_generation_pion_zdir(spinor * const P, spinor * const Q, const int z, const int sample, const int nstore) { int reset = 0, i, x, y, t, is, ic, lt, lx, ly, lz, id=0; int coords[4], seed, r; double rnumber, si=0., co=0.; int rlxd_state[105]; const double sqr2 = 1./sqrt(2.); complex * p = NULL; zero_spinor_field(P,VOLUME/2); zero_spinor_field(Q,VOLUME/2); /* save the ranlxd_state if neccessary */ if(ranlxd_init == 1) { rlxd_get(rlxd_state); reset = 1; } /* Compute the seed */ seed =(int) abs(1 + sample + z*10*97 + nstore*100*53 + g_cart_id*13); rlxd_init(1, seed); lz = z - g_proc_coords[3]*LZ; coords[3] = z / LZ; for(t = 0; t < T*g_nproc_t; t++) { lt = t - g_proc_coords[0]*T; coords[0] = t / T; for(x = 0; x < LX*g_nproc_x; x++) { lx = x - g_proc_coords[1]*LX; coords[1] = x / LX; for(y = 0; y < LY*g_nproc_y; y++) { ly = y - g_proc_coords[2]*LY; coords[2] = y / LY; #ifdef MPI MPI_Cart_rank(g_cart_grid, coords, &id); #endif for(is = 0; is < 4; is++) { for(ic = 0; ic < 3; ic++) { ranlxd(&rnumber, 1); if(g_cart_id == id) { r = (int)floor(4.*rnumber); if(r == 0) { si = sqr2; co = sqr2; } else if(r == 1) { si = -sqr2; co = sqr2; } else if(r==2) { si = sqr2; co = -sqr2; } else { si = -sqr2; co = -sqr2; } i = g_lexic2eosub[ g_ipt[lt][lx][ly][lz] ]; if((lt+lx+ly+lz+g_proc_coords[3]*LZ+g_proc_coords[2]*LY + g_proc_coords[0]*T+g_proc_coords[1]*LX)%2 == 0) { p = (complex*)(P + i); } else { p = (complex*)(Q + i); } (*(p+3*is+ic)).re = co; (*(p+3*is+ic)).im = si; } } } } } } /* reset the ranlxd if neccessary */ if(reset) { rlxd_reset(rlxd_state); } return; }
void source_generation_nucleon(spinor * const P, spinor * const Q, const int is, const int ic, const int t, const int nt, const int nx, const int sample, const int nstore, const int meson) { double rnumber, si=0., co=0., sqr2; int rlxd_state[105]; int reset = 0, seed, r, tt, lt, xx, lx, yy, ly, zz, lz; int coords[4], id=0, i; complex * p = NULL; const double s0=0.; const double c0=1.; const double s1=sin(2.*M_PI/3.); const double c1=cos(2.*M_PI/3.); const double s2=sin(4.*M_PI/3.); const double c2=cos(4.*M_PI/3.); zero_spinor_field(P,VOLUME/2); zero_spinor_field(Q,VOLUME/2); sqr2 = 1./sqrt(2.); /* save the ranlxd_state if neccessary */ if(ranlxd_init == 1) { rlxd_get(rlxd_state); reset = 1; } /* Compute the seed */ seed =(int) abs(1 + sample + t*10*97 + nstore*100*53); rlxd_init(1, seed); for(tt = t; tt < T*g_nproc_t; tt+=nt) { lt = tt - g_proc_coords[0]*T; coords[0] = tt / T; for(xx = 0; xx < LX*g_nproc_x; xx+=nx) { lx = xx - g_proc_coords[1]*LX; coords[1] = xx / LX; for(yy = 0; yy < LY*g_nproc_y; yy+=nx) { ly = yy - g_proc_coords[2]*LY; coords[2] = yy / LY; for(zz = 0; zz < LZ*g_nproc_z; zz+=nx) { lz = zz - g_proc_coords[3]*LZ; coords[3] = zz / LZ; #ifdef MPI MPI_Cart_rank(g_cart_grid, coords, &id); #endif ranlxd(&rnumber, 1); if(g_cart_id == id) { if(meson) { r = (int)floor(4.*rnumber); if(r == 0) { si = sqr2; co = sqr2; } else if(r == 1) { si = -sqr2; co = sqr2; } else if(r==2) { si = sqr2; co = -sqr2; } else { si = -sqr2; co = -sqr2; } } else { r = (int)floor(3.*rnumber); if(r == 0) { si = s0; co = c0; } else if(r == 1) { si = s1; co = c1; } else { si = s2; co = c2; } } i = g_lexic2eosub[ g_ipt[lt][lx][ly][lz] ]; if((lt+lx+ly+lz+g_proc_coords[3]*LZ+g_proc_coords[2]*LY + g_proc_coords[0]*T+g_proc_coords[1]*LX)%2 == 0) { p = (complex*)(P + i); } else { p = (complex*)(Q + i); } (*(p+3*is+ic)).re = co; (*(p+3*is+ic)).im = si; } } } } } /* reset the ranlxd if neccessary */ if(reset) { rlxd_reset(rlxd_state); } return; }
void ranlxdf_(double vec[],int *lvec) { int lvec1; lvec1=*lvec; ranlxd(vec,lvec1); }
int main(){ int i, j, nsfere, nurti, nbin, k; double diametro, frimp, tempo, vmedia[2], encin, temperatura, press, auxilium, intervallofrimp; double *x, *y, *vx, *vy, *dativx, *dativy, **datitempicollisioni; double **matricetempi; FILE *filefreqvx, *filefreqvy; /*Inizializzo le variabili*/ i = 0; j = 0; k = 0; nsfere = 0; nurti = 0; nbin = 0; diametro = 0.0; frimp = 0.0; tempo = 0.0; encin = 0.0; temperatura = 0.0; press = 0.0; auxilium = 0.0; intervallofrimp = 0.0; x = NULL; y = NULL; vx = NULL; vy = NULL; dativx = NULL; dativy = NULL; datitempicollisioni = NULL; matricetempi = NULL; for( i = 0; i < 2; i++ ) { vmedia[i] = 0.0; } /*Titolo*/ printf("\n\n__________SFERE RIGIDE IN 2 DIMENSIONI__________\n\n"); /*Richiedo il numero di sfere*/ printf("Numero sfere (usare 2, 8, 18, 32, 50, 72, 98, 128, 162, 200, ...): "); scanf("%d", &nsfere); /*Richiedo la frazione di impacchettamento*/ printf("\n\nFrazione di impacchettamento: "); scanf("%lf", &frimp); /*Calcolo il diametro*/ diametro = 2.0*sqrt(frimp/((double)(nsfere)*PI)); printf("\n\nDiametro = %lf\n", diametro); /*Alloco memoria per posizioni e velocità*/ x = (double *) malloc( nsfere*(sizeof(double)) ); y = (double *) malloc( nsfere*(sizeof(double)) ); vx = (double *) malloc( nsfere*(sizeof(double)) ); vy = (double *) malloc( nsfere*(sizeof(double)) ); /*Inizializzo le posizioni delle sfere*/ for( i = 0; i < nsfere; i++ ) { *(x + i) = 0.0; *(y + i) = 0.0; *(vx + i) = 0.0; *(vy + i) = 0.0; } /*Creo la disposizione bcc*/ BCCdisp( x, y, nsfere, diametro ); /*Controllo che le sfere non entrino una nell'altra a causa della frazione di impacchettamento troppo grande*/ for( i = 0; i < nsfere -1; i++ ) { for( j = i + 1; j < nsfere; j++ ) { if( distanza(x, y, i, j) <= diametro ) { printf("\n\nAttenzione: la frazione di impacchettamento è troppo alta e le sfere %d e %d entrano una nell'altra\n\n", i, j); printf("\ndistanza = %lf\n", distanza(x, y, i, j)); return 0; } } } /*Inizializzazione generatore numeri casuali*/ rlxd_init(1, 1); /*Generazione casuale delle velocità*/ ranlxd( vx, nsfere ); ranlxd( vy, nsfere ); /*I numeri sono estratti in [0, 1]: raddoppio e shifto di -1 per avere velocità in [-1, 1].*/ for( i = 0; i < nsfere; i++ ) { *(vx + i) = 2.0*(*(vx + i)) - 1.0; *(vy + i) = 2.0*(*(vy + i)) - 1.0; } /*Calcolo le componenti della velocità media*/ for( i = 0; i < nsfere; i++ ) { vmedia[0] = vmedia[0] + *(vx + i); vmedia[1] = vmedia[1] + *(vy + i); } vmedia[0] = vmedia[0]/(double)(nsfere); vmedia[1] = vmedia[1]/(double)(nsfere); /*Shifto le velocità per avere momento totale nullo*/ for( i = 0; i < nsfere; i++ ) { *(vx + i) = *(vx + i) - vmedia[0]; *(vy + i) = *(vy + i) - vmedia[1]; } /*Alloco memoria per la matrice dei tempi*/ matricetempi = (double **) malloc(nsfere*sizeof(double *)); for( i = 0; i < nsfere; i++ ) { *(matricetempi + i) = (double *) malloc(nsfere*sizeof(double)); } /*Inizializzo le entrate della matrice*/ for( i = 0; i < nsfere; i++ ) { for( j = 0; j < nsfere; j++ ) { *(*(matricetempi + i) + j) = 0.0; } } /*Riempio metà matrice dei tempi con i tempi di collisione di tutte le sfere, l'altra metà è simmetrica, e non sarà mai letta*/ for( i = 0; i < nsfere; i++ ) { for( j = 0; j <= i; j++ ) { *(*(matricetempi + i) + j) = tempocollisione( x, y, vx, vy, nsfere, diametro, i, j ); } } printf("\nTempo iniziale = %lf\n", tempo); printf("\n\nNumero urti per la termalizzazione: "); scanf("%d", &nurti); printf("\n\n"); /*Si realizzano nurti urti*/ for( i = 0; i < nurti; i++ ) { tempo = tempo + urto( x, y, vx, vy, matricetempi, nsfere, diametro, &auxilium, &auxilium ); printf("Tempo = %lf\n", tempo); } /*Calcolo dell'energia cinetica totale*/ for( i = 0; i < nsfere; i++ ) { encin = encin + (*(vx + i))*(*(vx + i)) + (*(vy + i))*(*(vy + i)); } encin = encin/2.0; /*Calcolo della temperatura dal teorema di equipartizione dell'energia*/ temperatura = encin/(double)(nsfere); printf("\nEnergia cinetica totale = %lf\nTemperatura*kb = %lf\n", encin, temperatura ); /*Acquisisco il numero di campionamenti delle velocità per l'istogramma. Il numero di dati sarà nsfere*nurti*/ printf("\n\nNumero campionamenti per l'istogramma delle velocità: "); scanf("%d", &nurti); /*Acquisisco il numero di bin (deve essere pari)*/ printf("\nNumero di bin (pari): "); scanf("%d", &nbin); printf("\n"); /*Alloco memoria per i vettori che conterranno tutti i valori campionati delle velocità*/ dativx = (double *) malloc(nsfere*nurti*sizeof(double)); dativy = (double *) malloc(nsfere*nurti*sizeof(double)); /*Si realizza un campionamento di velocità ogni nsfere urti*/ for( i = 0; i < nurti; i++ ) { for( j = 0; j < nsfere; j++ ) { tempo = tempo + urto( x, y, vx, vy, matricetempi, nsfere, diametro, &auxilium, &auxilium ); } printf("Tempo = %lf\n", tempo); for( j = 0; j < nsfere; j++ ) { *(dativx + i*nsfere + j) = *(vx + j); *(dativy + i*nsfere + j) = *(vy + j); } } filefreqvx = fopen("fv_x.txt", "w"); filefreqvy = fopen("fv_y.txt", "w"); hist(dativx,nsfere,nbin,nurti,filefreqvx); hist(dativy,nsfere,nbin,nurti,filefreqvy); free( x ); free( y ); free( vx ); free( vy ); free( dativx ); free( dativy ); for( i = 0; i < nsfere; i++ ) { free( *(matricetempi + i) ); } free( matricetempi ); fclose( filefreqvx ); fclose( filefreqvy ); return 0; }