void initQuda(int dev) { static int initialized = 0; if (initialized) { return; } initialized = 1; #if (CUDA_VERSION >= 4000) && defined(MULTI_GPU) //check if CUDA_NIC_INTEROP is set to 1 in the enviroment char* cni_str = getenv("CUDA_NIC_INTEROP"); if(cni_str == NULL){ errorQuda("Environment variable CUDA_NIC_INTEROP is not set\n"); } int cni_int = atoi(cni_str); if (cni_int != 1){ errorQuda("Environment variable CUDA_NIC_INTEROP is not set to 1\n"); } #endif int deviceCount; cudaGetDeviceCount(&deviceCount); if (deviceCount == 0) { errorQuda("No devices supporting CUDA"); } for(int i=0; i<deviceCount; i++) { cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, i); printfQuda("QUDA: Found device %d: %s\n", i, deviceProp.name); } #ifdef QMP_COMMS int ndim; const int *dim; if ( QMP_is_initialized() != QMP_TRUE ) { errorQuda("QMP is not initialized"); } num_QMP=QMP_get_number_of_nodes(); rank_QMP=QMP_get_node_number(); dev += rank_QMP % deviceCount; ndim = QMP_get_logical_number_of_dimensions(); dim = QMP_get_logical_dimensions(); #elif defined(MPI_COMMS) comm_init(); dev=comm_gpuid(); #else if (dev < 0) dev = deviceCount - 1; #endif // Used for applying the gauge field boundary condition if( commCoords(3) == 0 ) qudaPt0=true; else qudaPt0=false; if( commCoords(3) == commDim(3)-1 ) qudaPtNm1=true; else qudaPtNm1=false; cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, dev); if (deviceProp.major < 1) { errorQuda("Device %d does not support CUDA", dev); } printfQuda("QUDA: Using device %d: %s\n", dev, deviceProp.name); cudaSetDevice(dev); #ifdef HAVE_NUMA if(numa_config_set){ if(gpu_affinity[dev] >=0){ printfQuda("Numa setting to cpu node %d\n", gpu_affinity[dev]); if(numa_run_on_node(gpu_affinity[dev]) != 0){ printfQuda("Warning: Setting numa to cpu node %d failed\n", gpu_affinity[dev]); } } } #endif initCache(); quda::initBlas(); }
void packGhost(Float **gauge, Float **ghost, const int nFace, const int *X, const int volumeCB, const int *surfaceCB) { int XY=X[0]*X[1]; int XYZ=X[0]*X[1]*X[2]; //loop variables: a, b, c with a the most signifcant and c the least significant //A, B, C the maximum value //we need to loop in d as well, d's vlaue dims[dir]-3, dims[dir]-2, dims[dir]-1 int A[4], B[4], C[4]; //X dimension A[0] = X[3]; B[0] = X[2]; C[0] = X[1]; //Y dimension A[1] = X[3]; B[1] = X[2]; C[1] = X[0]; //Z dimension A[2] = X[3]; B[2] = X[1]; C[2] = X[0]; //T dimension A[3] = X[2]; B[3] = X[1]; C[3] = X[0]; //multiplication factor to compute index in original cpu memory int f[4][4]={ {XYZ, XY, X[0], 1}, {XYZ, XY, 1, X[0]}, {XYZ, X[0], 1, XY}, { XY, X[0], 1, XYZ} }; for(int dir =0; dir < 4; dir++) { Float* even_src = gauge[dir]; Float* odd_src = gauge[dir] + volumeCB*gaugeSiteSize; Float* even_dst; Float* odd_dst; //switching odd and even ghost gauge when that dimension size is odd //only switch if X[dir] is odd and the gridsize in that dimension is greater than 1 if((X[dir] % 2 ==0) || (commDim(dir) == 1)){ even_dst = ghost[dir]; odd_dst = ghost[dir] + nFace*surfaceCB[dir]*gaugeSiteSize; }else{ even_dst = ghost[dir] + nFace*surfaceCB[dir]*gaugeSiteSize; odd_dst = ghost[dir]; } int even_dst_index = 0; int odd_dst_index = 0; int d; int a,b,c; for(d = X[dir]- nFace; d < X[dir]; d++){ for(a = 0; a < A[dir]; a++){ for(b = 0; b < B[dir]; b++){ for(c = 0; c < C[dir]; c++){ int index = ( a*f[dir][0] + b*f[dir][1]+ c*f[dir][2] + d*f[dir][3])>> 1; int oddness = (a+b+c+d)%2; if (oddness == 0){ //even for(int i=0;i < 18;i++){ even_dst[18*even_dst_index+i] = even_src[18*index + i]; } even_dst_index++; }else{ //odd for(int i=0;i < 18;i++){ odd_dst[18*odd_dst_index+i] = odd_src[18*index + i]; } odd_dst_index++; } }//c }//b }//a }//d assert( even_dst_index == nFace*surfaceCB[dir]); assert( odd_dst_index == nFace*surfaceCB[dir]); } }
int site_link_sanity_check_internal_12(Float* link, int dir, int ga_idx, QudaGaugeParam* gaugeParam, int oddBit) { int ret =0; Float refc_buf[6]; Float* refc = &refc_buf[0]; memset((void*)refc, 0, sizeof(refc_buf)); Float* a = link; Float* b = link + 6; Float* c = link + 12; accumulateConjugateProduct(refc + 0*2, a + 1*2, b + 2*2, +1); accumulateConjugateProduct(refc + 0*2, a + 2*2, b + 1*2, -1); accumulateConjugateProduct(refc + 1*2, a + 2*2, b + 0*2, +1); accumulateConjugateProduct(refc + 1*2, a + 0*2, b + 2*2, -1); accumulateConjugateProduct(refc + 2*2, a + 0*2, b + 1*2, +1); accumulateConjugateProduct(refc + 2*2, a + 1*2, b + 0*2, -1); int X1h=gaugeParam->X[0]/2; int X1 =gaugeParam->X[0]; int X2 =gaugeParam->X[1]; int X3 =gaugeParam->X[2]; int X4 =gaugeParam->X[3]; #if 1 double coeff= 1.0; { int index = fullLatticeIndex(ga_idx, oddBit); int i4 = index /(X3*X2*X1); int i3 = (index - i4*(X3*X2*X1))/(X2*X1); int i2 = (index - i4*(X3*X2*X1) - i3*(X2*X1))/X1; int i1 = index - i4*(X3*X2*X1) - i3*(X2*X1) - i2*X1; if (dir == XUP) { if (i4 % 2 == 1){ coeff *= -1; } } if (dir == YUP){ if ((i1+i4) % 2 == 1){ coeff *= -1; } } if (dir == ZUP){ if ( (i4+i1+i2) % 2 == 1){ coeff *= -1; } } if (dir == TUP){ if ((commCoords(3) == commDim(3) -1) && i4 == (X4-1) ){ coeff *= -1; } } } refc[0]*=coeff; refc[1]*=coeff; refc[2]*=coeff; refc[3]*=coeff; refc[4]*=coeff; refc[5]*=coeff; #endif double delta = 0.0001; int i; for (i =0;i < 6; i++){ double diff = refc[i] - c[i]; double absdiff = diff > 0? diff: (-diff); if (absdiff > delta){ printf("ERROR: sanity check failed for site link\n"); display_link_internal(link); printf("refc = (%.10f,%.10f) (%.10f,%.10f) (%.10f,%.10f)\n", refc[0], refc[1], refc[2], refc[3], refc[4], refc[5]); printf("X=%d %d %d %d, X1h=%d\n", gaugeParam->X[0], X2, X3, X4, X1h); return -1; } } return ret; }
void packGhostAllLinks(Float **cpuLink, Float **cpuGhostBack,Float**cpuGhostFwd, int dir, int nFace, int* X) { int XY=X[0]*X[1]; int XYZ=X[0]*X[1]*X[2]; int volumeCB = X[0]*X[1]*X[2]*X[3]/2; int faceVolumeCB[4]={ X[1]*X[2]*X[3]/2, X[0]*X[2]*X[3]/2, X[0]*X[1]*X[3]/2, X[0]*X[1]*X[2]/2 }; //loop variables: a, b, c with a the most signifcant and c the least significant //A, B, C the maximum value //we need to loop in d as well, d's vlaue dims[dir]-3, dims[dir]-2, dims[dir]-1 int A[4], B[4], C[4]; //X dimension A[0] = X[3]; B[0] = X[2]; C[0] = X[1]; //Y dimension A[1] = X[3]; B[1] = X[2]; C[1] = X[0]; //Z dimension A[2] = X[3]; B[2] = X[1]; C[2] = X[0]; //T dimension A[3] = X[2]; B[3] = X[1]; C[3] = X[0]; //multiplication factor to compute index in original cpu memory int f[4][4]={ {XYZ, XY, X[0], 1}, {XYZ, XY, 1, X[0]}, {XYZ, X[0], 1, XY}, { XY, X[0], 1, XYZ} }; for(int ite = 0; ite < 2; ite++){ //ite == 0: back //ite == 1: fwd Float** dst; if (ite == 0){ dst = cpuGhostBack; }else{ dst = cpuGhostFwd; } //collect back ghost gauge field //for(int dir =0; dir < 4; dir++){ int d; int a,b,c; //we need copy all 4 links in the same location for(int linkdir=0; linkdir < 4; linkdir ++){ Float* even_src = cpuLink[linkdir]; Float* odd_src = cpuLink[linkdir] + volumeCB*gaugeSiteSize; Float* even_dst; Float* odd_dst; //switching odd and even ghost cpuLink when that dimension size is odd //only switch if X[dir] is odd and the gridsize in that dimension is greater than 1 if((X[dir] % 2 ==0) || (commDim(dir) == 1)){ even_dst = dst[dir] + 2*linkdir* nFace *faceVolumeCB[dir]*gaugeSiteSize; odd_dst = even_dst + nFace*faceVolumeCB[dir]*gaugeSiteSize; }else{ odd_dst = dst[dir] + 2*linkdir* nFace *faceVolumeCB[dir]*gaugeSiteSize; even_dst = odd_dst + nFace*faceVolumeCB[dir]*gaugeSiteSize; } int even_dst_index = 0; int odd_dst_index = 0; int startd; int endd; if(ite == 0){ //back startd = 0; endd= nFace; }else{//fwd startd = X[dir] - nFace; endd =X[dir]; } for(d = startd; d < endd; d++){ for(a = 0; a < A[dir]; a++){ for(b = 0; b < B[dir]; b++){ for(c = 0; c < C[dir]; c++){ int index = ( a*f[dir][0] + b*f[dir][1]+ c*f[dir][2] + d*f[dir][3])>> 1; int oddness = (a+b+c+d)%2; if (oddness == 0){ //even for(int i=0;i < 18;i++){ even_dst[18*even_dst_index+i] = even_src[18*index + i]; } even_dst_index++; }else{ //odd for(int i=0;i < 18;i++){ odd_dst[18*odd_dst_index+i] = odd_src[18*index + i]; } odd_dst_index++; } }//c }//b }//a }//d assert( even_dst_index == nFace*faceVolumeCB[dir]); assert( odd_dst_index == nFace*faceVolumeCB[dir]); }//linkdir //}//dir }//ite }