void initQuda(int dev) { static int initialized = 0; if (initialized) { return; } initialized = 1; #if (CUDA_VERSION >= 4000) && defined(MULTI_GPU) //check if CUDA_NIC_INTEROP is set to 1 in the enviroment char* cni_str = getenv("CUDA_NIC_INTEROP"); if(cni_str == NULL){ errorQuda("Environment variable CUDA_NIC_INTEROP is not set\n"); } int cni_int = atoi(cni_str); if (cni_int != 1){ errorQuda("Environment variable CUDA_NIC_INTEROP is not set to 1\n"); } #endif int deviceCount; cudaGetDeviceCount(&deviceCount); if (deviceCount == 0) { errorQuda("No devices supporting CUDA"); } for(int i=0; i<deviceCount; i++) { cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, i); printfQuda("QUDA: Found device %d: %s\n", i, deviceProp.name); } #ifdef QMP_COMMS int ndim; const int *dim; if ( QMP_is_initialized() != QMP_TRUE ) { errorQuda("QMP is not initialized"); } num_QMP=QMP_get_number_of_nodes(); rank_QMP=QMP_get_node_number(); dev += rank_QMP % deviceCount; ndim = QMP_get_logical_number_of_dimensions(); dim = QMP_get_logical_dimensions(); #elif defined(MPI_COMMS) comm_init(); dev=comm_gpuid(); #else if (dev < 0) dev = deviceCount - 1; #endif // Used for applying the gauge field boundary condition if( commCoords(3) == 0 ) qudaPt0=true; else qudaPt0=false; if( commCoords(3) == commDim(3)-1 ) qudaPtNm1=true; else qudaPtNm1=false; cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, dev); if (deviceProp.major < 1) { errorQuda("Device %d does not support CUDA", dev); } printfQuda("QUDA: Using device %d: %s\n", dev, deviceProp.name); cudaSetDevice(dev); #ifdef HAVE_NUMA if(numa_config_set){ if(gpu_affinity[dev] >=0){ printfQuda("Numa setting to cpu node %d\n", gpu_affinity[dev]); if(numa_run_on_node(gpu_affinity[dev]) != 0){ printfQuda("Warning: Setting numa to cpu node %d failed\n", gpu_affinity[dev]); } } } #endif initCache(); quda::initBlas(); }
int site_link_sanity_check_internal_12(Float* link, int dir, int ga_idx, QudaGaugeParam* gaugeParam, int oddBit) { int ret =0; Float refc_buf[6]; Float* refc = &refc_buf[0]; memset((void*)refc, 0, sizeof(refc_buf)); Float* a = link; Float* b = link + 6; Float* c = link + 12; accumulateConjugateProduct(refc + 0*2, a + 1*2, b + 2*2, +1); accumulateConjugateProduct(refc + 0*2, a + 2*2, b + 1*2, -1); accumulateConjugateProduct(refc + 1*2, a + 2*2, b + 0*2, +1); accumulateConjugateProduct(refc + 1*2, a + 0*2, b + 2*2, -1); accumulateConjugateProduct(refc + 2*2, a + 0*2, b + 1*2, +1); accumulateConjugateProduct(refc + 2*2, a + 1*2, b + 0*2, -1); int X1h=gaugeParam->X[0]/2; int X1 =gaugeParam->X[0]; int X2 =gaugeParam->X[1]; int X3 =gaugeParam->X[2]; int X4 =gaugeParam->X[3]; #if 1 double coeff= 1.0; { int index = fullLatticeIndex(ga_idx, oddBit); int i4 = index /(X3*X2*X1); int i3 = (index - i4*(X3*X2*X1))/(X2*X1); int i2 = (index - i4*(X3*X2*X1) - i3*(X2*X1))/X1; int i1 = index - i4*(X3*X2*X1) - i3*(X2*X1) - i2*X1; if (dir == XUP) { if (i4 % 2 == 1){ coeff *= -1; } } if (dir == YUP){ if ((i1+i4) % 2 == 1){ coeff *= -1; } } if (dir == ZUP){ if ( (i4+i1+i2) % 2 == 1){ coeff *= -1; } } if (dir == TUP){ if ((commCoords(3) == commDim(3) -1) && i4 == (X4-1) ){ coeff *= -1; } } } refc[0]*=coeff; refc[1]*=coeff; refc[2]*=coeff; refc[3]*=coeff; refc[4]*=coeff; refc[5]*=coeff; #endif double delta = 0.0001; int i; for (i =0;i < 6; i++){ double diff = refc[i] - c[i]; double absdiff = diff > 0? diff: (-diff); if (absdiff > delta){ printf("ERROR: sanity check failed for site link\n"); display_link_internal(link); printf("refc = (%.10f,%.10f) (%.10f,%.10f) (%.10f,%.10f)\n", refc[0], refc[1], refc[2], refc[3], refc[4], refc[5]); printf("X=%d %d %d %d, X1h=%d\n", gaugeParam->X[0], X2, X3, X4, X1h); return -1; } } return ret; }