Exemple #1
0
void initQuda(int dev)
{
  static int initialized = 0;
  if (initialized) {
    return;
  }
  initialized = 1;

#if (CUDA_VERSION >= 4000) && defined(MULTI_GPU)
  //check if CUDA_NIC_INTEROP is set to 1 in the enviroment
  char* cni_str = getenv("CUDA_NIC_INTEROP");
  if(cni_str == NULL){
    errorQuda("Environment variable CUDA_NIC_INTEROP is not set\n");
  }
  int cni_int = atoi(cni_str);
  if (cni_int != 1){
    errorQuda("Environment variable CUDA_NIC_INTEROP is not set to 1\n");    
  }
#endif

  int deviceCount;
  cudaGetDeviceCount(&deviceCount);
  if (deviceCount == 0) {
    errorQuda("No devices supporting CUDA");
  }

  for(int i=0; i<deviceCount; i++) {
    cudaDeviceProp deviceProp;
    cudaGetDeviceProperties(&deviceProp, i);
    printfQuda("QUDA: Found device %d: %s\n", i, deviceProp.name);
  }

#ifdef QMP_COMMS
  int ndim;
  const int *dim;

  if ( QMP_is_initialized() != QMP_TRUE ) {
    errorQuda("QMP is not initialized");
  }
  num_QMP=QMP_get_number_of_nodes();
  rank_QMP=QMP_get_node_number();
  
  dev += rank_QMP % deviceCount;
  ndim = QMP_get_logical_number_of_dimensions();
  dim = QMP_get_logical_dimensions();

#elif defined(MPI_COMMS)

  comm_init();
  dev=comm_gpuid();

#else
  if (dev < 0) dev = deviceCount - 1;
#endif
  
  // Used for applying the gauge field boundary condition
  if( commCoords(3) == 0 ) qudaPt0=true;
  else qudaPt0=false;

  if( commCoords(3) == commDim(3)-1 ) qudaPtNm1=true;
  else qudaPtNm1=false;

  cudaDeviceProp deviceProp;
  cudaGetDeviceProperties(&deviceProp, dev);
  if (deviceProp.major < 1) {
    errorQuda("Device %d does not support CUDA", dev);
  }

  
  printfQuda("QUDA: Using device %d: %s\n", dev, deviceProp.name);

  cudaSetDevice(dev);
#ifdef HAVE_NUMA
  if(numa_config_set){
    if(gpu_affinity[dev] >=0){
      printfQuda("Numa setting to cpu node %d\n", gpu_affinity[dev]);
      if(numa_run_on_node(gpu_affinity[dev]) != 0){
        printfQuda("Warning: Setting numa to cpu node %d failed\n", gpu_affinity[dev]);
      }
    }

  }
#endif

  initCache();
  quda::initBlas();
}
Exemple #2
0
void packGhost(Float **gauge, Float **ghost, const int nFace, const int *X, 
	       const int volumeCB, const int *surfaceCB) {
  int XY=X[0]*X[1];
  int XYZ=X[0]*X[1]*X[2];

  //loop variables: a, b, c with a the most signifcant and c the least significant
  //A, B, C the maximum value
  //we need to loop in d as well, d's vlaue dims[dir]-3, dims[dir]-2, dims[dir]-1
  int A[4], B[4], C[4];
  
  //X dimension
  A[0] = X[3]; B[0] = X[2]; C[0] = X[1];
  
  //Y dimension
  A[1] = X[3]; B[1] = X[2]; C[1] = X[0];

  //Z dimension
  A[2] = X[3]; B[2] = X[1]; C[2] = X[0];

  //T dimension
  A[3] = X[2]; B[3] = X[1]; C[3] = X[0];

  //multiplication factor to compute index in original cpu memory
  int f[4][4]={
    {XYZ,    XY, X[0],     1},
    {XYZ,    XY,    1,  X[0]},
    {XYZ,  X[0],    1,    XY},
    { XY,  X[0],    1,   XYZ}
  };

  for(int dir =0; dir < 4; dir++)
    {
      Float* even_src = gauge[dir];
      Float* odd_src = gauge[dir] + volumeCB*gaugeSiteSize;

      Float* even_dst;
      Float* odd_dst;
     
     //switching odd and even ghost gauge when that dimension size is odd
     //only switch if X[dir] is odd and the gridsize in that dimension is greater than 1
      if((X[dir] % 2 ==0) || (commDim(dir) == 1)){
        even_dst = ghost[dir];
        odd_dst = ghost[dir] + nFace*surfaceCB[dir]*gaugeSiteSize;	
     }else{
	even_dst = ghost[dir] + nFace*surfaceCB[dir]*gaugeSiteSize;
        odd_dst = ghost[dir];
     }

      int even_dst_index = 0;
      int odd_dst_index = 0;

      int d;
      int a,b,c;
      for(d = X[dir]- nFace; d < X[dir]; d++){
        for(a = 0; a < A[dir]; a++){
          for(b = 0; b < B[dir]; b++){
            for(c = 0; c < C[dir]; c++){
              int index = ( a*f[dir][0] + b*f[dir][1]+ c*f[dir][2] + d*f[dir][3])>> 1;
              int oddness = (a+b+c+d)%2;
              if (oddness == 0){ //even
                for(int i=0;i < 18;i++){
                  even_dst[18*even_dst_index+i] = even_src[18*index + i];
                }
                even_dst_index++;
              }else{ //odd
                for(int i=0;i < 18;i++){
                  odd_dst[18*odd_dst_index+i] = odd_src[18*index + i];
                }
                odd_dst_index++;
              }
            }//c
          }//b
        }//a
      }//d

      assert( even_dst_index == nFace*surfaceCB[dir]);
      assert( odd_dst_index == nFace*surfaceCB[dir]);
    }

}
Exemple #3
0
int site_link_sanity_check_internal_12(Float* link, int dir, int ga_idx, QudaGaugeParam* gaugeParam, int oddBit)
{
    
    int ret =0;
    
    Float refc_buf[6];
    Float* refc = &refc_buf[0];

    memset((void*)refc, 0, sizeof(refc_buf));

    Float* a = link;
    Float* b = link + 6;
    Float* c = link + 12;
    
    accumulateConjugateProduct(refc + 0*2, a + 1*2, b + 2*2, +1);
    accumulateConjugateProduct(refc + 0*2, a + 2*2, b + 1*2, -1);
    accumulateConjugateProduct(refc + 1*2, a + 2*2, b + 0*2, +1);
    accumulateConjugateProduct(refc + 1*2, a + 0*2, b + 2*2, -1);
    accumulateConjugateProduct(refc + 2*2, a + 0*2, b + 1*2, +1);
    accumulateConjugateProduct(refc + 2*2, a + 1*2, b + 0*2, -1);


    int X1h=gaugeParam->X[0]/2;
    int X1 =gaugeParam->X[0];    
    int X2 =gaugeParam->X[1];
    int X3 =gaugeParam->X[2];
    int X4 =gaugeParam->X[3];

#if 1        
    double coeff= 1.0;
   
   {
       int index = fullLatticeIndex(ga_idx, oddBit);
       int i4 = index /(X3*X2*X1);
       int i3 = (index - i4*(X3*X2*X1))/(X2*X1);
       int i2 = (index - i4*(X3*X2*X1) - i3*(X2*X1))/X1;
       int i1 = index - i4*(X3*X2*X1) - i3*(X2*X1) - i2*X1;
       
       if (dir == XUP) {
           if (i4 % 2 == 1){
               coeff *= -1;
           }
       }

       if (dir == YUP){
           if ((i1+i4) % 2 == 1){
               coeff *= -1;
           }
       }
       if (dir == ZUP){
           if ( (i4+i1+i2) % 2 == 1){
               coeff *= -1;
           }
       }
       if (dir == TUP){
	 if ((commCoords(3) == commDim(3) -1) && i4 == (X4-1) ){
	   coeff *= -1;
	 } 
       }       
   }
 
   
   refc[0]*=coeff; refc[1]*=coeff; refc[2]*=coeff; refc[3]*=coeff; refc[4]*=coeff; refc[5]*=coeff;
#endif
   
    
    double delta = 0.0001;
    int i;
    for (i =0;i < 6; i++){
	double diff =  refc[i] -  c[i];
	double absdiff = diff > 0? diff: (-diff);
	if (absdiff  > delta){
	    printf("ERROR: sanity check failed for site link\n");
	    display_link_internal(link);
	    printf("refc = (%.10f,%.10f) (%.10f,%.10f) (%.10f,%.10f)\n", 
		   refc[0], refc[1], refc[2], refc[3], refc[4], refc[5]);
	    printf("X=%d %d %d %d, X1h=%d\n", gaugeParam->X[0], X2, X3, X4, X1h);
	    return -1;
	}
	
    }
    

    return ret;
}
Exemple #4
0
  void packGhostAllLinks(Float **cpuLink, Float **cpuGhostBack,Float**cpuGhostFwd, int dir, int nFace, int* X) {
    int XY=X[0]*X[1];
    int XYZ=X[0]*X[1]*X[2];

    int volumeCB = X[0]*X[1]*X[2]*X[3]/2;
    int faceVolumeCB[4]={
      X[1]*X[2]*X[3]/2,
      X[0]*X[2]*X[3]/2,
      X[0]*X[1]*X[3]/2,
      X[0]*X[1]*X[2]/2
    };

    //loop variables: a, b, c with a the most signifcant and c the least significant
    //A, B, C the maximum value
    //we need to loop in d as well, d's vlaue dims[dir]-3, dims[dir]-2, dims[dir]-1
    int A[4], B[4], C[4];
  
    //X dimension
    A[0] = X[3]; B[0] = X[2]; C[0] = X[1];
  
    //Y dimension
    A[1] = X[3]; B[1] = X[2]; C[1] = X[0];

    //Z dimension
    A[2] = X[3]; B[2] = X[1]; C[2] = X[0];

    //T dimension
    A[3] = X[2]; B[3] = X[1]; C[3] = X[0];


    //multiplication factor to compute index in original cpu memory
    int f[4][4]={
      {XYZ,    XY, X[0],     1},
      {XYZ,    XY,    1,  X[0]},
      {XYZ,  X[0],    1,    XY},
      { XY,  X[0],    1,   XYZ}
    };
  
  
    for(int ite = 0; ite < 2; ite++){
      //ite == 0: back
      //ite == 1: fwd
      Float** dst;
      if (ite == 0){
	dst = cpuGhostBack;
      }else{
	dst = cpuGhostFwd;
      }
    
      //collect back ghost gauge field
      //for(int dir =0; dir < 4; dir++){
      int d;
      int a,b,c;
      
      //we need copy all 4 links in the same location
      for(int linkdir=0; linkdir < 4; linkdir ++){
	Float* even_src = cpuLink[linkdir];
	Float* odd_src = cpuLink[linkdir] + volumeCB*gaugeSiteSize;

	Float* even_dst;
	Float* odd_dst;
	
	//switching odd and even ghost cpuLink when that dimension size is odd
	//only switch if X[dir] is odd and the gridsize in that dimension is greater than 1
	if((X[dir] % 2 ==0) || (commDim(dir) == 1)){
	  even_dst = dst[dir] + 2*linkdir* nFace *faceVolumeCB[dir]*gaugeSiteSize;	
	  odd_dst = even_dst + nFace*faceVolumeCB[dir]*gaugeSiteSize;	
	}else{
	  odd_dst = dst[dir] + 2*linkdir* nFace *faceVolumeCB[dir]*gaugeSiteSize;
	  even_dst = odd_dst + nFace*faceVolumeCB[dir]*gaugeSiteSize;
	}

	int even_dst_index = 0;
	int odd_dst_index = 0;
	
	int startd;
	int endd; 
	if(ite == 0){ //back
	  startd = 0; 
	  endd= nFace;
	}else{//fwd
	  startd = X[dir] - nFace;
	  endd =X[dir];
	}
	for(d = startd; d < endd; d++){
	  for(a = 0; a < A[dir]; a++){
	    for(b = 0; b < B[dir]; b++){
	      for(c = 0; c < C[dir]; c++){
		int index = ( a*f[dir][0] + b*f[dir][1]+ c*f[dir][2] + d*f[dir][3])>> 1;
		int oddness = (a+b+c+d)%2;
		if (oddness == 0){ //even
		  for(int i=0;i < 18;i++){
		    even_dst[18*even_dst_index+i] = even_src[18*index + i];
		  }
		  even_dst_index++;
		}else{ //odd
		  for(int i=0;i < 18;i++){
		    odd_dst[18*odd_dst_index+i] = odd_src[18*index + i];
		  }
		  odd_dst_index++;
		}
	      }//c
	    }//b
	  }//a
	}//d
	assert( even_dst_index == nFace*faceVolumeCB[dir]);
	assert( odd_dst_index == nFace*faceVolumeCB[dir]);	
      }//linkdir
      
      //}//dir
    }//ite
  }