void LX_Fetch (void) { int c; pr_tokenclass = TK_NONE; pr_token[0] = 0; if (!pr_file_p) { pr_token_type = tt_eof; return; } LexWhitespace(); c = *pr_file_p; switch (ASCIIToChrCode[c]) { case CHR_LETTER: LexName(); return; case CHR_NUMBER: pr_token_type = tt_immediate; pr_immediate_type = &type_float; pr_immediate._float = LexNumber(); return; case CHR_DQUOTE: LexString(); return; case CHR_SQUOTE: LexVector(); return; case CHR_DOLLARSIGN: LexGrab(); return; case CHR_EOF: pr_token_type = tt_eof; return; case CHR_SPECIAL: default: LexPunctuation(); return; } }
extern "C" void stag_dirac_init(const void * gauge_u ) { gauge_field_addr = ( IFloat * ) gauge_u; int i,j,m,n; int blklen[NUM_DIR/2]; int numblk[NUM_DIR/2]; int stride[NUM_DIR/2]; int local_count[2]; int non_local_count[2]; int x[NUM_DIR/2]; char *cname = ""; char *fname = "stag_dirac_init(const void *gauge)"; if (initted !=0) { Fprintf(stderr,"stag_dirac_init already initted\n"); return; } VRB.Func(cname,fname); initted = 1; //------------------------------------------------------------------- // sg is a lexical index for (t,z,y,x) where x runs fastest. This is // the gauge field order produced by convert for staggered fermions. // // sg = x + L_x * ( y + L_y * ( z + L_z * t ) ) // // sc is a lexical index for (t,x,y,z) where t runs fastest. The // even and odd staggered color vectors are stored with indices // running in this order, except that even sites come before odd // sites. // // sc = t + L_t * ( x + L_x * ( y + L_y * z ) ) // // Similarly the color vectors are indexed by sc/2 for both even // and odd blocks. Even and odd blocks have a different base // address. //------------------------------------------------------------------- int sg, sc; //----------------------------------------------------------- // If t + x + y + z is odd, odd = 1. Otherwise it is 0. //----------------------------------------------------------- int odd; //----------------------------------------------------------- // The physics system storage order has vector indices as // 0-3, x,y,z,t. Our vector indices run 0-3 as t,x,y,z. // nn is used to hold physics system values for our index, // given by n. //----------------------------------------------------------- size[0] = GJP.TnodeSites(); size[1] = GJP.XnodeSites(); size[2] = GJP.YnodeSites(); size[3] = GJP.ZnodeSites(); vol = size[0] * size[1] * size[2] * size[3]; VRB.Result(cname,fname,"vol=%d\n",vol); non_local_chi = 2*(size[0]*size[1]*size[2] + size[1]*size[2]*size[3]+ size[2]*size[3]*size[0] + size[3]*size[0]*size[1]); local_chi = NUM_DIR*vol - non_local_chi; //------------------------------------------------------------- // flush_cache_spinor() function will flush 192 bytes * nflush //------------------------------------------------------------- nflush = vol/8; #if 0 if (vol>16000) tmpfrm = (IFloat *) smalloc ( 8 * vol/2 * VECT_LEN * sizeof(IFloat), cname,fname, "tmpfrm"); else tmpfrm = (IFloat *) fmalloc ( 8 * vol/2 * VECT_LEN * sizeof(IFloat), cname,fname, "tmpfrm"); #endif //----------------------------------------------------------------- // Allocate 8 receive buffers for off-node vectors //----------------------------------------------------------------- for ( i = 0; i < NUM_DIR; i++ ){ #if 1 chi_off_node[i] = ( IFloat * ) fmalloc(cname,fname,"chi_off_node[i]", VECT_LEN * vol * sizeof( IFloat ) / ( 2 * size[ i % 4 ] ) ); if(chi_off_node[i] == 0) ERR.Pointer(cname,fname, "chi_off_node[i]"); #else if( (vol/size[i%4])*VECT_LEN/2 >MAX_TBUF_LEN ){ ERR.General(cname,fname,"chi_off_node size overflow\n"); } #endif } //----------------------------------------------------------------- // Space for storage of pointers to chi's. 2 pointers per site, // but split into even and odd groups for the first part of the // computation (parallel transport of spinors). 9 pointers per site // to obtain the result of the application of the dirac operator //----------------------------------------------------------------- for ( i = 0; i < 2; i++ ){ VRB.Result(cname,fname,"local_chi=%d sizeof(IFloat)=%d\n",local_chi, sizeof(IFloat)); chi[i] = (IFloat **) fmalloc(9 * vol/2 * sizeof(IFloat *), cname,fname, "chi[i]"); chi_l[i] = ( IFloat ** ) fmalloc(2*(local_chi/2)*sizeof(IFloat *), cname,fname, "chi_l[i]"); chi_nl[i] = (IFloat ** ) fmalloc(2*(non_local_chi/2)*sizeof(IFloat *), cname,fname, "chi_nl[i]"); } for ( i = 0; i < 2; i++){ local_count[i] = 0; non_local_count[i] = 0; } //----------------------------------------------------------------- // Assembly written for double precision only, check sizeof(IFloat) //----------------------------------------------------------------- if ( sizeof(IFloat) != sizeof(double)){ ERR.General(cname, fname, "Assembly functions implemented only for double precision!"); } //----------------------------------------------------------------- // Loop over all directions //----------------------------------------------------------------- for ( n = 0; n < NUM_DIR; n++ ) { //----------------------------------------------------------------- // Loop over all sites //----------------------------------------------------------------- for (x[3] = 0; x[3] < size[3]; x[3]++){ for (x[2] = 0; x[2] < size[2]; x[2]++){ for (x[1] = 0; x[1] < size[1]; x[1]++){ for (x[0] = 0; x[0] < size[0]; x[0]++){ for (i = 0; i < 4 ; i++) coord[i] = x[i]; odd = ( coord[0] + coord[1] + coord[2] + coord[3] ) % 2; sg = coord[1] + size[1] * ( coord[2] + size[2] * ( coord[3] + size[3] * coord[0] )); m = (NUM_DIR + 1) * (sg/2); if ( CoordNN( n ) ) { // off-node //---------------------------------------------------------- // Assembly written for double precision only, multiplication // by sizeof(double) done to avoid a bitshift inside the // high performance code //---------------------------------------------------------- //pointer to source field (offset in the receive buffer) *( chi_nl[ odd ] + 2 * non_local_count[ odd ] ) = chi_off_node[n] + VECT_LEN * ( LexSurface( coord_nn, n%4 ) / 2 ); // pointer to temporary field where U*chi is stored *( chi_nl[ odd ] + 2 * non_local_count[ odd ] + 1) = ( IFloat *) ( VECT_LEN * (NUM_DIR * int(sg/2) + n ) * sizeof(IFloat)); // pointer to the above temporary field *( chi[ odd ] + m + n + 1) = ( IFloat *) ( VECT_LEN * (NUM_DIR * int(sg/2) + n) * sizeof(IFloat)); // Pointer to solution field *( chi[ odd ] + m ) = ( IFloat * ) ( VECT_LEN * (LexVector( coord ) / 2 ) * sizeof(IFloat)); non_local_count[odd]++; } else{//on node //pointer to source field *( chi_l[ odd ] + 2 * local_count[ odd ] ) = ( IFloat * ) ( VECT_LEN * ( LexVector( coord_nn ) / 2 ) * sizeof(IFloat)); // pointer to temporary field where U*chi is stored *( chi_l[ odd ] + 2 * local_count[ odd ] + 1) = ( IFloat * ) ( VECT_LEN * (NUM_DIR * int(sg/2) + n) * sizeof(IFloat)); // pointer to the above temporary field *( chi[ odd ] + m + n + 1) = ( IFloat *) ( VECT_LEN * (NUM_DIR * int(sg/2) + n) * sizeof(IFloat)); // pointer to solution field *( chi[ odd ] + m ) = ( IFloat * ) ( VECT_LEN * (LexVector( coord ) / 2 ) * sizeof(IFloat)); local_count[odd]++; } } } } } } #if 0 char buf[200]; sprintf(buf,"chi.h"); int fd = open(buf,O_CREAT|O_TRUNC|O_RDWR,00644); for(j=0;j<2;j++){ sprintf(buf,"IFloat * chi%d[] LOCATE(\"edramtransient\") = {\n",j); write(fd,buf,strlen(buf)); sprintf(buf," (IFloat *) %d",*(chi[j])); write(fd,buf,strlen(buf)); for(i=1;i< 9*vol/2;i++){ sprintf(buf,",\n (IFloat *) %d",*(chi[j]+i)); write(fd,buf,strlen(buf)); } sprintf(buf,"\n};\n"); write(fd,buf,strlen(buf)); } close(fd); #endif #if 0 char filename[200]; sprintf(filename,"%s_%d%d%d%d%d%d", chi_l_filename,CoorX(), CoorY(), CoorZ(), CoorT(), CoorS(), CoorW()); FILE *fp = Fopen(filename,"w"); for(j=0;j<2;j++){ Fprintf(fp,"IFloat * chi_l%d[] LOCATE(\"edramtransient\") = {\n",j); Fprintf(fp," (IFloat *) %d",*(chi_l[j])); for(i=1;i< 2*(local_chi/2);i++){ Fprintf(fp,",\n (IFloat *) %d",*(chi_l[j]+i)); } Fprintf(fp,"\n};\n"); } Fclose(fp); #endif #if 0 char filename[200]; sprintf(filename,"%s_%d%d%d%d%d%d", chi_nl_filename, CoorX(), CoorY(), CoorZ(), CoorT(), CoorS(), CoorW()); FILE *fp = Fopen(filename,"w"); for(j=0;j<2;j++){ Fprintf(fp,"IFloat * chi_nl%d[] LOCATE(\"edramtransient\") = {\n",j); Fprintf(fp," (IFloat *) 0x%x",*(chi_nl[j])); for(i=1;i< 2*(non_local_chi/2);i++){ Fprintf(fp,",\n (IFloat *) 0x%x",*(chi_nl[j]+i)); } Fprintf(fp,"\n};\n"); } Fclose(fp); #endif //------------------------------------------------------------------- // Set up SCU buffer parameters. T direction is special, since // the block-strided move will not work here. //------------------------------------------------------------------- blklen[0] = VECT_LEN * sizeof(IFloat) * size[1] * size[2] * size[3] / 2; blklen[1] = VECT_LEN * sizeof(IFloat) * size[0] / 2; blklen[2] = VECT_LEN * sizeof(IFloat) * size[0] * size[1] / 2; blklen[3] = VECT_LEN * sizeof(IFloat) * size[0] * size[1] * size[2] / 2; numblk[0] = 1; numblk[1] = size[2] * size[3]; numblk[2] = size[3]; numblk[3] = 1; stride[0] = 0; stride[1] = (VECT_LEN * size[0] * ( size[1] - 1 ) / 2)*sizeof(IFloat); stride[2] = (VECT_LEN * size[0] * size[1] * ( size[2] - 1 ) / 2)* sizeof(IFloat) ; stride[3] = 0; //------------------------------------------------------------------- // Calculate offsets for T transfers done one word at a time. // We have plus (P) transfers for both the even and odd // checkerboards. Same for minus (M) transfers. //------------------------------------------------------------------- for ( i = 0; i < 2; i++ ) { #if 1 Tbuffer[i] = (IFloat *) qalloc (QFAST|QNONCACHE, size[1] * size[2] * size[3] * VECT_LEN * sizeof( IFloat ) / 2); if(!Tbuffer) ERR.Pointer(cname, fname, "Tbuffer"); #else if( size[1]*size[2]*size[3]*VECT_LEN/2 >MAX_TBUF_LEN ){ ERR.General(cname,fname,"Tbuffer size overflow\n"); } #endif ToffsetP[i] = ( int * ) fmalloc ( size[1] * size[2] * size[3] * sizeof( int ) / 2 ); ToffsetM[i] = ( int * ) fmalloc ( size[1] * size[2] * size[3] * sizeof( int ) / 2 ); countP[i] = 0; countM[i] = 0; } // printf("dirac_init: Set up SCU parameters\n"); for ( sg = 0; sg < vol; sg++ ) { odd = SetCoord( sg ); sc = LexVector( coord ); if ( coord[0] == 0 ) { *( ToffsetM[ odd ] + countM[ odd ] ) = VECT_LEN * ( sc / 2 ); countM[ odd ]++; } if ( coord[0] == size[0] - 1 ) { *( ToffsetP[ odd ] + countP[ odd ] ) = VECT_LEN * ( sc / 2 ); countP[ odd ]++; } } // printf("dirac_init: Set up SCU parameters\n"); //------------------------------------------------------------------- // Index i says data has been received from TP, XP, YP, ZP, TM, XM, // YM, ZM //------------------------------------------------------------------- // for(i=0;i<4;i++) // printf("blklen numblk stride [%d]= %d %d %d\n",i, blklen[i],numblk[i],stride[i]); for ( i = 0; i < NUM_DIR; i++ ) { j = i % (NUM_DIR/2); // SCUarg[i + 8] = new SCUDirArgIR; // printf("%d: %p %d\n",i+8,chi_off_node[i],blklen[j]*numblk[j]); SCUarg[i + 8] = new SCUDirArgIR(chi_off_node[i], scudir[i], SCU_REC, blklen[j]*numblk[j], 1, 0, IR_5); // VECT_LEN * sizeof(IFloat) * vol / ( 2 * size[j] ), // 1, 0, IR_5); // buffer_flush[i] = VECT_LEN * sizeof(IFloat) * vol/ (384 * size[j]); //send arguments // SCUarg[i+8]->Print(); if ((i == 0) || ( i == 4)){ SCUarg[i] = new SCUDirArgIR(Tbuffer[(4 - i)/4], scudir[i], SCU_SEND, blklen[j], numblk[j], stride[j], IR_5 ); } else{ SCUarg[i] = new SCUDirArgIR(Tbuffer[0], scudir[i], SCU_SEND, blklen[j], numblk[j], stride[j], IR_5 ); } // SCUarg[i]->Print(); // printf("SCUarg[%d] done\n",i); } // for(i = 0;i<2*NUM_DIR;i++) SCUarg[i]->Print(); SCUmulti = new SCUDirArgMulti(); SCUmulti->Init(SCUarg, 16); // for(i = 0;i<2*NUM_DIR;i++) SCUarg[i]->Print(); //------------------------------------------------------------------- // Need send offsets for various transfers. The index for // sends is TM, XM, YM, ZM, TP, XP, YP, ZP, since the // transfers are indexed by the node data is received from. //------------------------------------------------------------------- Xoffset[0] = 0; Xoffset[1] = VECT_LEN * size[0] * (size[1] - 1) / 2; Xoffset[2] = VECT_LEN * size[0] * size[1] * (size[2] - 1) / 2; Xoffset[3] = VECT_LEN * size[0] * size[1] * size[2] * (size[3]-1) / 2; Xoffset[4] = 0; Xoffset[5] = 0; Xoffset[6] = 0; Xoffset[7] = 0; // print("dirac_init: Done\n"); }
void PT::set_hop_pointer() { char *fname = "set_hop_pointer()"; // VRB.Func("PT",fname); //Actual memory usage of vectors int vlen = VECT_LEN*sizeof(IFloat); int vlen2 =VECT_LEN_OUT*sizeof(IFloat); int x[NDIM], nei[NDIM]; //Counts how many parallel transports of given length and direction are local //and non-local, respectively int hp_local_count[MAX_HOP][2*NDIM]; int hp_non_local_count[MAX_HOP][2*NDIM]; int hop, i; //Initialize local and non-local hop counters. for (hop=0; hop<MAX_HOP; hop++) { for (i=0; i<2*NDIM; i++) { hp_non_local_count[hop][i] = 0; hp_local_count[hop][i] = 0; } } //For a given length of the parallel transport for (hop = 1; hop <= MAX_HOP; hop++) { hop_pointer **h_l = hp_l[hop-1]; hop_pointer **h_nl = hp_nl[hop-1]; //Local and non-local counts for given length of the hop int *local_count = hp_local_count[hop-1]; int *non_local_count = hp_non_local_count[hop-1]; //Loop over all directions for (i=0; i<NDIM; i++) { //Total number of sites that require non-local communication int non_local_check = hop*non_local_chi[i*2]; //Total number of sites where parallel transport can be done locally int local_check = vol - non_local_check; //Loop through all the sites on the lattice //nei represents the coordinates of the neighboring site. for(x[3]=0,nei[3]=0;x[3]<size[3];x[3]++,nei[3]++) for(x[2]=0,nei[2]=0;x[2]<size[2];x[2]++,nei[2]++) for(x[1]=0,nei[1]=0;x[1]<size[1];x[1]++,nei[1]++) for(x[0]=0,nei[0]=0;x[0]<size[0];x[0]++,nei[0]++){ //This is the parallel transport of the field in the //negative direction to another node //"Positive hop" because the link variable points in the //positive direction, even though the resulting field is //"transported" in the negative direction // positive direction if((x[i] < hop) && (!local[i])){ //This calculates the neighbor coordinate nei[i] = size[i]-hop+x[i]; //Sets the index for source and destination (h_nl[2*i]+non_local_count[2*i])->src = non_local_count[2*i]*vlen; (h_nl[2*i]+non_local_count[2*i])->dest = LexVector(nei)*vlen2; //Increments the non-local count non_local_count[i*2]++; //Make sure we haven't gone over the non non-local check if (non_local_count[i*2]>non_local_check) fprintf(stderr, "%s:non_local_count[%d](%d)>non_local_check[%d](%d)\n", fname,2*i,non_local_count[2*i],2*i,non_local_check); //The rest of the parallel transports in the local volume can //be handled locally } else { //Calculate the new coordinate nei[i] = (size[i]+x[i]-hop)%size[i]; //if ( size[i] >2){ //Calculate the index for the source and the destination (h_l[2*i]+local_count[2*i])->src = LexVector(x)*vlen; (h_l[2*i]+local_count[2*i])->dest = LexVector(nei)*vlen2; //} //Increment the local count local_count[i*2]++; //Make sure we haven't exceeded the number of local sites if (local_count[i*2]>local_check) fprintf(stderr,"%s:local_count[%d](%d)>local_check[%d](%d)\n", fname,2*i,local_count[2*i],2*i,local_check); } //Consider hopping in the negative direction, which is parallel //transport in the positive direction // negative direction if( (x[i] >= (size[i]-hop)) && (!local[i])){ //Calculate the non-local coordinate for this hop nei[i] = (x[i]+hop)%size[i]; //Calculate source and destination indices (h_nl[2*i+1]+non_local_count[2*i+1])->src = non_local_count[2*i+1]*vlen; (h_nl[2*i+1]+non_local_count[2*i+1])->dest = LexVector(nei)*vlen2; //Increment the non-local count, check that bounds have not //been exceeded non_local_count[i*2+1]++; if (non_local_count[i*2]>non_local_check) fprintf(stderr,"%s:non_local_count[%d](%d)>non_local_check[%d](%d)\n", fname,2*i,non_local_count[2*i],2*i,non_local_check); } else { //Calculate the local coordinate for this hop nei[i] = (x[i]+hop)%size[i]; //Calculate source and destination indices //if ( size[i] >2){ (h_l[2*i+1]+local_count[2*i+1])->src = LexVector(x)*vlen; (h_l[2*i+1]+local_count[2*i+1])->dest = LexVector(nei)*vlen2; //} //Increment local count, check that bounds not exceeded local_count[i*2+1]++; if (local_count[i*2]>local_check) fprintf(stderr,"%s:local_count[%d](%d)>local_check[%d](%d)\n", fname,2*i,local_count[2*i],2*i,local_check); } // Need to reset the neighbour pointer nei[i] = x[i]; } } } // VRB.Func("PT",fname); // exit(44); }