Ejemplo n.º 1
0
void LX_Fetch (void)
{
	int		c;

	pr_tokenclass = TK_NONE;

	pr_token[0] = 0;

	if (!pr_file_p)
	{
		pr_token_type = tt_eof;
		return;
	}

	LexWhitespace();

	c = *pr_file_p;

	switch (ASCIIToChrCode[c])
	{
	case CHR_LETTER:
		LexName();
		return;
	case CHR_NUMBER:
		pr_token_type = tt_immediate;
		pr_immediate_type = &type_float;
		pr_immediate._float = LexNumber();
		return;
	case CHR_DQUOTE:
		LexString();
		return;
	case CHR_SQUOTE:
		LexVector();
		return;
	case CHR_DOLLARSIGN:
		LexGrab();
		return;
	case CHR_EOF:
		pr_token_type = tt_eof;
		return;
	case CHR_SPECIAL:
	default:
		LexPunctuation();
		return;
	}
}
Ejemplo n.º 2
0
extern "C" void stag_dirac_init(const void * gauge_u )
{
  gauge_field_addr = ( IFloat * ) gauge_u;
  int i,j,m,n;
  int blklen[NUM_DIR/2];
  int numblk[NUM_DIR/2];
  int stride[NUM_DIR/2];
  int local_count[2];
  int non_local_count[2];

  int x[NUM_DIR/2];
  char *cname = "";
  char *fname = "stag_dirac_init(const void *gauge)";
  if (initted !=0) {
    Fprintf(stderr,"stag_dirac_init already initted\n");
    return;
  }
  VRB.Func(cname,fname);
  initted = 1;

  //-------------------------------------------------------------------
  //  sg is a lexical index for (t,z,y,x) where x runs fastest. This is
  //  the gauge field order produced by convert for staggered fermions.
  //
  //    sg = x + L_x * ( y + L_y * ( z + L_z * t ) )
  //
  //  sc is a lexical index for (t,x,y,z) where t runs fastest.  The
  //  even and odd staggered color vectors are stored with indices
  //  running in this order, except that even sites come before odd
  //  sites.
  //  
  //    sc = t + L_t * ( x + L_x * ( y + L_y * z ) )
  //
  //  Similarly the color vectors are indexed by sc/2 for both even
  //  and odd blocks.  Even and odd blocks have a different base
  //  address.
  //-------------------------------------------------------------------

  int sg, sc;

  //-----------------------------------------------------------
  //  If t + x + y + z is odd, odd = 1.  Otherwise it is 0.
  //-----------------------------------------------------------

  int odd;

  //-----------------------------------------------------------
  //  The physics system storage order has vector indices as
  //  0-3, x,y,z,t.  Our vector indices run 0-3 as t,x,y,z.
  //  nn is used to hold physics system values for our index,
  //  given by n.
  //-----------------------------------------------------------

  size[0] = GJP.TnodeSites();
  size[1] = GJP.XnodeSites();
  size[2] = GJP.YnodeSites();
  size[3] = GJP.ZnodeSites();


  vol = size[0] * size[1] * size[2] * size[3];
  VRB.Result(cname,fname,"vol=%d\n",vol);
  non_local_chi = 2*(size[0]*size[1]*size[2] + size[1]*size[2]*size[3]+
    size[2]*size[3]*size[0] + size[3]*size[0]*size[1]);
  local_chi = NUM_DIR*vol - non_local_chi;

  //-------------------------------------------------------------
  // flush_cache_spinor() function will flush 192 bytes * nflush 
  //-------------------------------------------------------------
  nflush = vol/8;

#if 0
  if (vol>16000)
  tmpfrm = (IFloat *) smalloc ( 8 * vol/2 * VECT_LEN * sizeof(IFloat),
				cname,fname, "tmpfrm");
  else
  tmpfrm = (IFloat *) fmalloc ( 8 * vol/2 * VECT_LEN * sizeof(IFloat),
				cname,fname, "tmpfrm");
#endif

  


  //-----------------------------------------------------------------
  //  Allocate 8 receive buffers for off-node vectors
  //-----------------------------------------------------------------
  
  for ( i = 0; i < NUM_DIR; i++ ){
#if 1
    chi_off_node[i] = ( IFloat * ) fmalloc(cname,fname,"chi_off_node[i]",
 VECT_LEN * vol * sizeof( IFloat ) / ( 2 * size[ i % 4 ] ) );    
    if(chi_off_node[i] == 0)
      ERR.Pointer(cname,fname, "chi_off_node[i]");
#else
    if( (vol/size[i%4])*VECT_LEN/2 >MAX_TBUF_LEN ){
	ERR.General(cname,fname,"chi_off_node size overflow\n");
    }
#endif
  }

  //-----------------------------------------------------------------
  //  Space for storage of pointers to chi's.  2 pointers per site,
  //  but split into even and odd groups for the first part of the 
  //  computation (parallel transport of spinors). 9 pointers per site
  //  to obtain the result of the application of the dirac operator
  //-----------------------------------------------------------------
  

  for ( i = 0; i < 2; i++ ){
      VRB.Result(cname,fname,"local_chi=%d sizeof(IFloat)=%d\n",local_chi,
		 sizeof(IFloat));
      chi[i] = (IFloat **) fmalloc(9 * vol/2 * sizeof(IFloat *),
				   cname,fname, "chi[i]");
      chi_l[i] = ( IFloat ** ) fmalloc(2*(local_chi/2)*sizeof(IFloat *),
				       cname,fname, "chi_l[i]");
      chi_nl[i] = (IFloat ** ) fmalloc(2*(non_local_chi/2)*sizeof(IFloat *),
				       cname,fname, "chi_nl[i]");
  }
  
  for ( i = 0; i < 2; i++){
    local_count[i] = 0;
    non_local_count[i] = 0;
  }
  //-----------------------------------------------------------------
  // Assembly written for double precision only, check sizeof(IFloat)
  //-----------------------------------------------------------------
  if ( sizeof(IFloat) != sizeof(double)){
     ERR.General(cname, fname, 
		 "Assembly functions implemented only for double precision!");
  }

  //-----------------------------------------------------------------
  //  Loop over all directions
  //-----------------------------------------------------------------
  for ( n = 0; n < NUM_DIR; n++ ) {
    //-----------------------------------------------------------------
    //  Loop over all sites
    //-----------------------------------------------------------------
    
    for (x[3] = 0; x[3] < size[3]; x[3]++){
      for (x[2] = 0; x[2] < size[2]; x[2]++){
	for (x[1] = 0; x[1] < size[1]; x[1]++){
	  for (x[0] = 0; x[0] < size[0]; x[0]++){  

	    for (i = 0; i < 4 ; i++) coord[i] = x[i];
	    odd = ( coord[0] + coord[1] + coord[2] + coord[3] ) % 2;
	    sg = coord[1] + size[1] * ( coord[2] + size[2] * ( coord[3] +
			       			       size[3] * coord[0] ));
	    m = (NUM_DIR + 1) * (sg/2);
	    if ( CoordNN( n ) ) {		//  off-node
	      //----------------------------------------------------------
	      // Assembly written for double precision only, multiplication
	      // by sizeof(double) done to avoid a bitshift inside the
	      // high performance code
	      //----------------------------------------------------------
	      //pointer to source field (offset in the receive buffer)
	      *( chi_nl[ odd ]  +  2 * non_local_count[ odd ] )
	    = chi_off_node[n] + VECT_LEN * ( LexSurface( coord_nn, n%4 ) / 2 );
	      // pointer to temporary field where U*chi is stored
		*( chi_nl[ odd ] + 2 * non_local_count[ odd ] + 1) = 
		( IFloat *) ( VECT_LEN * (NUM_DIR * int(sg/2) + n )
		              * sizeof(IFloat));
	      // pointer to the above temporary field 
	      *( chi[ odd ] + m + n + 1) = 
		( IFloat *) ( VECT_LEN * (NUM_DIR * int(sg/2) + n) 
		              * sizeof(IFloat));
	      // Pointer to solution field
	      *( chi[ odd ]  +  m ) = 
		( IFloat * ) ( VECT_LEN * (LexVector( coord ) / 2 ) 
			       * sizeof(IFloat));  
	      non_local_count[odd]++; 
	    }
	    else{//on node
	      //pointer to source field
	      *( chi_l[ odd ]  +  2 * local_count[ odd ] )
		= ( IFloat * ) ( VECT_LEN * ( LexVector( coord_nn ) / 2 )
		                 * sizeof(IFloat));
	      // pointer to temporary field where U*chi is stored
	      *( chi_l[ odd ]  +  2 * local_count[ odd ] + 1) = 
		( IFloat * ) ( VECT_LEN * (NUM_DIR * int(sg/2) + n)
		               * sizeof(IFloat));
	      // pointer to the above temporary field
	      *( chi[ odd ] + m + n + 1) = 
		( IFloat *) ( VECT_LEN * (NUM_DIR * int(sg/2) + n)
		              * sizeof(IFloat));
	      // pointer to solution field
	      *( chi[ odd ]  +  m ) = 
		( IFloat * ) ( VECT_LEN * (LexVector( coord ) / 2 ) 
		               * sizeof(IFloat));
	      local_count[odd]++; 
	    }
	  }
	}
      }
    }
  }

#if 0
  char buf[200];

  sprintf(buf,"chi.h");
  int fd = open(buf,O_CREAT|O_TRUNC|O_RDWR,00644);

  for(j=0;j<2;j++){
    sprintf(buf,"IFloat * chi%d[] LOCATE(\"edramtransient\") = {\n",j); 
    write(fd,buf,strlen(buf));
    sprintf(buf," (IFloat *) %d",*(chi[j])); 
    write(fd,buf,strlen(buf));
    for(i=1;i< 9*vol/2;i++){
      sprintf(buf,",\n (IFloat *) %d",*(chi[j]+i)); 
      write(fd,buf,strlen(buf));
    }
    sprintf(buf,"\n};\n"); 
    write(fd,buf,strlen(buf));
  }
  close(fd);
#endif

#if 0
  char filename[200];
  sprintf(filename,"%s_%d%d%d%d%d%d",
  chi_l_filename,CoorX(), CoorY(), CoorZ(), CoorT(), CoorS(), CoorW());
  FILE *fp = Fopen(filename,"w");
  for(j=0;j<2;j++){
    Fprintf(fp,"IFloat * chi_l%d[] LOCATE(\"edramtransient\") = {\n",j); 
    Fprintf(fp," (IFloat *) %d",*(chi_l[j])); 
    for(i=1;i< 2*(local_chi/2);i++){
      Fprintf(fp,",\n (IFloat *) %d",*(chi_l[j]+i)); 
    }
    Fprintf(fp,"\n};\n"); 
  }
  Fclose(fp);
#endif

#if 0
  char filename[200];
  sprintf(filename,"%s_%d%d%d%d%d%d",
  chi_nl_filename, CoorX(), CoorY(), CoorZ(), CoorT(), CoorS(), CoorW());
  FILE *fp = Fopen(filename,"w");
  for(j=0;j<2;j++){
    Fprintf(fp,"IFloat * chi_nl%d[] LOCATE(\"edramtransient\") = {\n",j); 
    Fprintf(fp," (IFloat *) 0x%x",*(chi_nl[j])); 
    for(i=1;i< 2*(non_local_chi/2);i++){
      Fprintf(fp,",\n (IFloat *) 0x%x",*(chi_nl[j]+i)); 
    }
    Fprintf(fp,"\n};\n"); 
  }

  Fclose(fp);

#endif 

  //-------------------------------------------------------------------
  //  Set up SCU buffer parameters.  T direction is special, since
  //  the block-strided move will not work here.
  //-------------------------------------------------------------------

  blklen[0] = VECT_LEN * sizeof(IFloat) * size[1] * size[2] * size[3] / 2;
  blklen[1] = VECT_LEN * sizeof(IFloat) * size[0] / 2;
  blklen[2] = VECT_LEN * sizeof(IFloat) * size[0] * size[1] / 2;
  blklen[3] = VECT_LEN * sizeof(IFloat) * size[0] * size[1] * size[2] / 2;
  

  numblk[0] = 1; 
  numblk[1] = size[2] * size[3];
  numblk[2] = size[3];
  numblk[3] = 1;
  
  stride[0] = 0;
  stride[1] = (VECT_LEN * size[0] * ( size[1] - 1 ) / 2)*sizeof(IFloat);
  stride[2] = (VECT_LEN * size[0] * size[1] * ( size[2] - 1 ) / 2)*
    sizeof(IFloat) ;
  stride[3] = 0;
  //-------------------------------------------------------------------
  //  Calculate offsets for T transfers done one word at a time.
  //  We have plus (P) transfers for both the even and odd
  //  checkerboards.  Same for minus (M) transfers.
  //-------------------------------------------------------------------


  for ( i = 0; i < 2; i++ ) {
#if 1
    Tbuffer[i] = (IFloat *) qalloc (QFAST|QNONCACHE, size[1] * size[2] * size[3] *
				    VECT_LEN * sizeof( IFloat ) / 2);
    if(!Tbuffer) ERR.Pointer(cname, fname, "Tbuffer");
#else
    if( size[1]*size[2]*size[3]*VECT_LEN/2 >MAX_TBUF_LEN ){
	ERR.General(cname,fname,"Tbuffer size overflow\n");
    }
#endif
    ToffsetP[i] = ( int * ) fmalloc ( size[1] * size[2] * size[3] *
      sizeof( int ) / 2 );

    ToffsetM[i] = ( int * ) fmalloc ( size[1] * size[2] * size[3] *
      sizeof( int ) / 2 );

    countP[i] = 0;
    countM[i] = 0;
  }

//  printf("dirac_init: Set up SCU parameters\n");
  for ( sg = 0; sg < vol; sg++ ) { 

    odd = SetCoord( sg );
    sc = LexVector( coord );

    if ( coord[0] == 0 ) {
      *( ToffsetM[ odd ] + countM[ odd ] ) = VECT_LEN * ( sc / 2 );
      countM[ odd ]++;

    }

    if ( coord[0] == size[0] - 1 ) {
      *( ToffsetP[ odd ] + countP[ odd ] ) = VECT_LEN * ( sc / 2 );
      countP[ odd ]++;
    }
  }
//  printf("dirac_init: Set up SCU parameters\n");

  //-------------------------------------------------------------------
  //  Index i says data has been received from TP, XP, YP, ZP, TM, XM,
  //  YM, ZM
  //-------------------------------------------------------------------

//  for(i=0;i<4;i++)
//   printf("blklen numblk stride [%d]= %d %d %d\n",i, blklen[i],numblk[i],stride[i]);

  for ( i = 0; i < NUM_DIR; i++ ) {
    j = i % (NUM_DIR/2);
//      SCUarg[i + 8] = new SCUDirArgIR;
//      printf("%d: %p %d\n",i+8,chi_off_node[i],blklen[j]*numblk[j]);
      SCUarg[i + 8]  = new SCUDirArgIR(chi_off_node[i], scudir[i], SCU_REC, 
		    blklen[j]*numblk[j], 1, 0, IR_5);
//		    VECT_LEN * sizeof(IFloat) * vol / ( 2 * size[j] ), 
//			       1, 0, IR_5);
//      buffer_flush[i] = VECT_LEN * sizeof(IFloat) * vol/ (384 * size[j]);
//send arguments
//   SCUarg[i+8]->Print();
    if ((i == 0) || ( i == 4)){
      SCUarg[i] = new SCUDirArgIR(Tbuffer[(4 - i)/4], scudir[i], SCU_SEND, 
		       blklen[j], numblk[j], stride[j], IR_5 );
    }
    else{ 
      SCUarg[i] = new SCUDirArgIR(Tbuffer[0], scudir[i], SCU_SEND, 
		       blklen[j], numblk[j], stride[j], IR_5 );
    }
//   SCUarg[i]->Print();
//    printf("SCUarg[%d] done\n",i);
  }
//  for(i = 0;i<2*NUM_DIR;i++) SCUarg[i]->Print();
  SCUmulti = new SCUDirArgMulti();
  SCUmulti->Init(SCUarg, 16);
//  for(i = 0;i<2*NUM_DIR;i++) SCUarg[i]->Print();
  //-------------------------------------------------------------------
  //  Need send offsets for various transfers.  The index for
  //  sends is TM, XM, YM, ZM, TP, XP, YP, ZP, since the
  //  transfers are indexed by the node data is received from.
  //-------------------------------------------------------------------

  Xoffset[0] = 0;
  Xoffset[1] = VECT_LEN * size[0] * (size[1] - 1) / 2;
  Xoffset[2] = VECT_LEN * size[0] * size[1] * (size[2] - 1) / 2;
  Xoffset[3] = VECT_LEN * size[0] * size[1] * size[2] * (size[3]-1) / 2;
  Xoffset[4] = 0;
  Xoffset[5] = 0;
  Xoffset[6] = 0;
  Xoffset[7] = 0;

//  print("dirac_init: Done\n");

}
Ejemplo n.º 3
0
void PT::set_hop_pointer() {

  char *fname = "set_hop_pointer()";

//  VRB.Func("PT",fname);
  //Actual memory usage of vectors
  int vlen = VECT_LEN*sizeof(IFloat);
  int vlen2 =VECT_LEN_OUT*sizeof(IFloat);

  int x[NDIM], nei[NDIM];
  
  //Counts how many parallel transports of given length and direction are local
  //and non-local, respectively
  int hp_local_count[MAX_HOP][2*NDIM];
  int hp_non_local_count[MAX_HOP][2*NDIM];
  int hop, i;


  //Initialize local and non-local hop counters.
  for (hop=0; hop<MAX_HOP; hop++) {
    for (i=0; i<2*NDIM; i++) {
      hp_non_local_count[hop][i] = 0;
      hp_local_count[hop][i] = 0;
    }
  }
  
  //For a given length of the parallel transport
  for (hop = 1; hop <= MAX_HOP; hop++) {
    hop_pointer **h_l = hp_l[hop-1];
    hop_pointer **h_nl = hp_nl[hop-1];

    //Local and non-local counts for given length of the hop
    int *local_count = hp_local_count[hop-1];
    int *non_local_count = hp_non_local_count[hop-1];

    //Loop over all directions
    for (i=0; i<NDIM; i++) {

      //Total number of sites that require non-local communication
      int non_local_check = hop*non_local_chi[i*2];
      //Total number of sites where parallel transport can be done locally
      int local_check = vol - non_local_check;

      //Loop through all the sites on the lattice
      //nei represents the coordinates of the neighboring site.
      for(x[3]=0,nei[3]=0;x[3]<size[3];x[3]++,nei[3]++)
	for(x[2]=0,nei[2]=0;x[2]<size[2];x[2]++,nei[2]++)
	  for(x[1]=0,nei[1]=0;x[1]<size[1];x[1]++,nei[1]++)
	    for(x[0]=0,nei[0]=0;x[0]<size[0];x[0]++,nei[0]++){

	      //This is the parallel transport of the field in the 
	      //negative direction to another node
	      //"Positive hop" because the link variable points in the 
	      //positive direction, even though the resulting field is 
	      //"transported" in the negative direction
	      // positive direction

	      if((x[i] < hop) && (!local[i])){
		//This calculates the neighbor coordinate
		nei[i] = size[i]-hop+x[i];  

		//Sets the index for source and destination
		(h_nl[2*i]+non_local_count[2*i])->src = non_local_count[2*i]*vlen;
		(h_nl[2*i]+non_local_count[2*i])->dest = LexVector(nei)*vlen2;

		//Increments the non-local count
		non_local_count[i*2]++;

		//Make sure we haven't gone over the non non-local check
		if (non_local_count[i*2]>non_local_check)
		  fprintf(stderr,
			"%s:non_local_count[%d](%d)>non_local_check[%d](%d)\n",
			 fname,2*i,non_local_count[2*i],2*i,non_local_check);
		//The rest of the parallel transports in the local volume can 
		//be handled locally
	      } else {
		//Calculate the new coordinate
		nei[i] = (size[i]+x[i]-hop)%size[i];

		//if ( size[i] >2){
		//Calculate the index for the source and the destination
		(h_l[2*i]+local_count[2*i])->src = LexVector(x)*vlen;
		(h_l[2*i]+local_count[2*i])->dest = LexVector(nei)*vlen2;
                //}
		
		//Increment the local count
		local_count[i*2]++;
		//Make sure we haven't exceeded the number of local sites
		if (local_count[i*2]>local_check)
		  fprintf(stderr,"%s:local_count[%d](%d)>local_check[%d](%d)\n",
			      fname,2*i,local_count[2*i],2*i,local_check);
	      }
	      
	      //Consider hopping in the negative direction, which is parallel 
	      //transport in the positive direction
	      // negative direction
	      if( (x[i] >= (size[i]-hop)) && (!local[i])){
		//Calculate the non-local coordinate for this hop
		nei[i] = (x[i]+hop)%size[i];
		//Calculate source and destination indices
		(h_nl[2*i+1]+non_local_count[2*i+1])->src = non_local_count[2*i+1]*vlen;
		(h_nl[2*i+1]+non_local_count[2*i+1])->dest = LexVector(nei)*vlen2;

		//Increment the non-local count, check that bounds have not 
		//been exceeded
		non_local_count[i*2+1]++;
		if (non_local_count[i*2]>non_local_check)
		  fprintf(stderr,"%s:non_local_count[%d](%d)>non_local_check[%d](%d)\n",
			      fname,2*i,non_local_count[2*i],2*i,non_local_check);
	      } else {
		//Calculate the local coordinate for this hop
		nei[i] = (x[i]+hop)%size[i];
		//Calculate source and destination indices
		//if ( size[i] >2){
		(h_l[2*i+1]+local_count[2*i+1])->src = LexVector(x)*vlen;
		(h_l[2*i+1]+local_count[2*i+1])->dest = LexVector(nei)*vlen2;
		//}
		//Increment local count, check that bounds not exceeded
		local_count[i*2+1]++;
		if (local_count[i*2]>local_check)
		  fprintf(stderr,"%s:local_count[%d](%d)>local_check[%d](%d)\n",
			      fname,2*i,local_count[2*i],2*i,local_check);
	      }
	      // Need to reset the neighbour pointer
	      nei[i] = x[i];
	    }
    }
  }
//  VRB.Func("PT",fname);
//  exit(44);
}