void setup_layout(void) { int c[4]; int i,n_mach; int d[4]; #ifdef FIX_NODE_GEOM int *geom = node_geometry; #else int *geom = NULL; #endif if(mynode()==0){ printf("LAYOUT = Hypercubes, options = "); printf("QDP"); printf("\n"); } /* Is there already a grid? This could be a grid architecture with a preset dimension, or a geometry could have been set by the -qmp-geom command line arg. In either case we have a nonzero allocated number of dimensions. */ if(QMP_get_allocated_number_of_dimensions() == 0) /* Set the geometry if requested */ set_qmp_layout_grid(geom, 4); c[0] = nx; c[1] = ny; c[2] = nz; c[3] = nt; QDP_set_latsize(4, c); QDP_create_layout(); sites_on_node = QDP_sites_on_node; even_sites_on_node = QDP_subset_len(QDP_even); odd_sites_on_node = QDP_subset_len(QDP_odd); n_mach = QMP_get_logical_number_of_dimensions(); dim_mach = QMP_get_logical_dimensions(); /* Initialize I/O node function */ #ifdef FIX_IONODE_GEOM init_io_node(); #endif /* Report sublattice dimensions */ for(i = 0; i < 4; i++){ /* Any extra machine dimensions are assumed to be 1 */ if(i < n_mach)d[i] = c[i]/dim_mach[i]; else d[i] = c[i]; } if( mynode()==0) printf("ON EACH NODE %d x %d x %d x %d\n",d[0],d[1],d[2],d[3]); #if 0 mpi_whoami(); /* Debug */ #endif }
/*--------------------------------------------------------------------*/ static void setup_qmp_grid(){ int ndim = 4; int len[4]; int ndim2, i; const int *nsquares2; len[0] = nx; len[1] = ny; len[2] = nz; len[3] = nt; if(mynode()==0){ printf("qmp_grid,"); printf("\n"); } ndim2 = QMP_get_allocated_number_of_dimensions(); nsquares2 = QMP_get_allocated_dimensions(); /* If the dimensions are not already allocated, use the node_geometry request. Otherwise a hardware or command line specification trumps the parameter input. */ #ifdef FIX_NODE_GEOM if(ndim2 == 0){ ndim2 = 4; nsquares2 = node_geometry; } else{ node0_printf("setup_qmp_grid: Preallocated machine geometry overrides request\n"); } #endif if(mynode()==0){ printf("Using machine geometry: "); for(i=0; i<ndim; i++){ printf("%d ",nsquares2[i]); if(i < ndim-1)printf("X "); } printf("\n"); } /* In principle, we could now rotate coordinate axes */ /* Save this for a future upgrade */ set_qmp_layout_grid(nsquares2, ndim2); ndim2 = QMP_get_logical_number_of_dimensions(); nsquares2 = QMP_get_logical_dimensions(); for(i=0; i<ndim; i++) { if(i<ndim2) nsquares[i] = nsquares2[i]; else nsquares[i] = 1; } for(i=0; i<ndim; i++) { if(len[i]%nsquares[i] != 0) { node0_printf("LATTICE SIZE DOESN'T FIT GRID\n"); QMP_abort(0); } squaresize[i] = len[i]/nsquares[i]; } }
/* Total problem size */ static int* getLattSize() { static int first = 1; static int tot_size[4]; if (first == 1) { const int* phys_size = QMP_get_logical_dimensions(); int i; for(i=0; i < getNumDim(); ++i) { tot_size[i] = getSubgridSize()[i]*phys_size[i]; } first = 0; } return tot_size; }
void setup_layout(void) { int c[4]; int i,n_mach; int d[4]; if(mynode()==0){ printf("LAYOUT = Hypercubes, options = "); printf("QDP"); printf("\n"); } c[0] = nx; c[1] = ny; c[2] = nz; c[3] = nt; QDP_set_latsize(4, c); QDP_create_layout(); sites_on_node = QDP_sites_on_node; even_sites_on_node = QDP_subset_len(QDP_even); odd_sites_on_node = QDP_subset_len(QDP_odd); n_mach = QMP_get_logical_number_of_dimensions(); dim_mach = QMP_get_logical_dimensions(); #ifdef FIX_IONODE_GEOM /* Initialize I/O node function */ init_io_node(); #endif /* Report sublattice dimensions */ for(i = 0; i < 4; i++){ /* Any extra machine dimensions are assumed to be 1 */ if(i < n_mach)d[i] = c[i]/dim_mach[i]; else d[i] = c[i]; } if( mynode()==0) printf("ON EACH NODE %d x %d x %d x %d\n",d[0],d[1],d[2],d[3]); }
void initQuda(int dev) { static int initialized = 0; if (initialized) { return; } initialized = 1; #if (CUDA_VERSION >= 4000) && defined(MULTI_GPU) //check if CUDA_NIC_INTEROP is set to 1 in the enviroment char* cni_str = getenv("CUDA_NIC_INTEROP"); if(cni_str == NULL){ errorQuda("Environment variable CUDA_NIC_INTEROP is not set\n"); } int cni_int = atoi(cni_str); if (cni_int != 1){ errorQuda("Environment variable CUDA_NIC_INTEROP is not set to 1\n"); } #endif int deviceCount; cudaGetDeviceCount(&deviceCount); if (deviceCount == 0) { errorQuda("No devices supporting CUDA"); } for(int i=0; i<deviceCount; i++) { cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, i); printfQuda("QUDA: Found device %d: %s\n", i, deviceProp.name); } #ifdef QMP_COMMS int ndim; const int *dim; if ( QMP_is_initialized() != QMP_TRUE ) { errorQuda("QMP is not initialized"); } num_QMP=QMP_get_number_of_nodes(); rank_QMP=QMP_get_node_number(); dev += rank_QMP % deviceCount; ndim = QMP_get_logical_number_of_dimensions(); dim = QMP_get_logical_dimensions(); #elif defined(MPI_COMMS) comm_init(); dev=comm_gpuid(); #else if (dev < 0) dev = deviceCount - 1; #endif // Used for applying the gauge field boundary condition if( commCoords(3) == 0 ) qudaPt0=true; else qudaPt0=false; if( commCoords(3) == commDim(3)-1 ) qudaPtNm1=true; else qudaPtNm1=false; cudaDeviceProp deviceProp; cudaGetDeviceProperties(&deviceProp, dev); if (deviceProp.major < 1) { errorQuda("Device %d does not support CUDA", dev); } printfQuda("QUDA: Using device %d: %s\n", dev, deviceProp.name); cudaSetDevice(dev); #ifdef HAVE_NUMA if(numa_config_set){ if(gpu_affinity[dev] >=0){ printfQuda("Numa setting to cpu node %d\n", gpu_affinity[dev]); if(numa_run_on_node(gpu_affinity[dev]) != 0){ printfQuda("Warning: Setting numa to cpu node %d failed\n", gpu_affinity[dev]); } } } #endif initCache(); quda::initBlas(); }
int main(int argc, char *argv[]) { const char *msg; int status = 1; int mu, i; struct QOP_CLOVER_State *clover_state; QDP_Int *I_seed; int i_seed; QDP_RandomState *state; QLA_Real plaq; QLA_Real n[NELEMS(F)]; struct QOP_CLOVER_Gauge *c_g; struct QOP_CLOVER_Fermion *c_f[NELEMS(F)]; double kappa; double c_sw; double in_eps; int in_iter; int log_flag; double out_eps; int out_iter; int cg_status; double run_time; long long flops, sent, received; /* start QDP */ QDP_initialize(&argc, &argv); if (argc != 1 + NDIM + 6) { printf0("ERROR: usage: %s Lx ... seed kappa c_sw iter eps log?\n", argv[0]); goto end; } for (mu = 0; mu < NDIM; mu++) { lattice[mu] = atoi(argv[1 + mu]); } i_seed = atoi(argv[1 + NDIM]); kappa = atof(argv[2 + NDIM]); c_sw = atof(argv[3 + NDIM]); in_iter = atoi(argv[4 + NDIM]); in_eps = atof(argv[5 + NDIM]); log_flag = atoi(argv[6 + NDIM]) == 0? 0: QOP_CLOVER_LOG_EVERYTHING; /* set lattice size and create layout */ QDP_set_latsize(NDIM, lattice); QDP_create_layout(); primary = QMP_is_primary_node(); self = QMP_get_node_number(); get_vector(network, 1, QMP_get_logical_number_of_dimensions(), QMP_get_logical_dimensions()); get_vector(node, 0, QMP_get_logical_number_of_dimensions(), QMP_get_logical_coordinates()); printf0("network: "); for (i = 0; i < NDIM; i++) printf0(" %d", network[i]); printf0("\n"); printf0("node: "); for (i = 0; i < NDIM; i++) printf0(" %d", node[i]); printf0("\n"); printf0("kappa: %20.15f\n", kappa); printf0("c_sw: %20.15f\n", c_sw); printf0("in_iter: %d\n", in_iter); printf0("in_eps: %15.2e\n", in_eps); /* allocate the gauge field */ create_Mvector(U, NELEMS(U)); create_Mvector(C, NELEMS(C)); create_Dvector(F, NELEMS(F)); I_seed = QDP_create_I(); QDP_I_eq_funci(I_seed, icoord, QDP_all); state = QDP_create_S(); QDP_S_eq_seed_i_I(state, i_seed, I_seed, QDP_all); for (mu = 0; mu < NELEMS(U); mu++) { QDP_M_eq_gaussian_S(U[mu], state, QDP_all); } for (i = 0; i < NELEMS(F); i++) { QDP_D_eq_gaussian_S(F[i], state, QDP_all); } /* build the clovers */ clover(C, U); /* initialize CLOVER */ if (QOP_CLOVER_init(&clover_state, lattice, network, node, primary, sublattice, NULL)) { printf0("CLOVER_init() failed\n"); goto end; } if (QOP_CLOVER_import_fermion(&c_f[0], clover_state, f_reader, F[0])) { printf0("CLOVER_import_fermion(0) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[1], clover_state)) { printf0("CLOVER_allocate_fermion(1) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[2], clover_state)) { printf0("CLOVER_allocate_fermion(2) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[3], clover_state)) { printf0("CLOVER_allocate_fermion(3) failed\n"); goto end; } if (QOP_CLOVER_import_gauge(&c_g, clover_state, kappa, c_sw, u_reader, c_reader, NULL)) { printf("CLOVER_import_gauge() failed\n"); goto end; } QOP_CLOVER_D_operator(c_f[2], c_g, c_f[0]); cg_status = QOP_CLOVER_D_CG(c_f[3], &out_iter, &out_eps, c_f[2], c_g, c_f[2], in_iter, in_eps, log_flag); msg = QOP_CLOVER_error(clover_state); QOP_CLOVER_performance(&run_time, &flops, &sent, &received, clover_state); QOP_CLOVER_export_fermion(f_writer, F[3], c_f[3]); printf0("CG status: %d\n", cg_status); printf0("CG error message: %s\n", msg? msg: "<NONE>"); printf0("CG iter: %d\n", out_iter); printf0("CG eps: %20.10e\n", out_eps); printf0("CG performance: runtime %e sec\n", run_time); printf0("CG performance: flops %.3e MFlop/s (%lld)\n", flops * 1e-6 / run_time, flops); printf0("CG performance: snd %.3e MB/s (%lld)\n", sent * 1e-6 / run_time, sent); printf0("CG performance: rcv %.3e MB (%lld)/s\n", received * 1e-6 / run_time, received); /* free CLOVER */ QOP_CLOVER_free_gauge(&c_g); for (i = 0; i < NELEMS(c_f); i++) QOP_CLOVER_free_fermion(&c_f[i]); QOP_CLOVER_fini(&clover_state); /* Compute plaquette */ plaq = plaquette(U); /* field norms */ for (i = 0; i < NELEMS(F); i++) QDP_r_eq_norm2_D(&n[i], F[i], QDP_all); /* Display the values */ printf0("plaquette = %g\n", plaq / (QDP_volume() * QDP_Nc * NDIM * (NDIM - 1) / 2 )); for (i = 0; i < NELEMS(F); i++) printf0(" |f|^2 [%d] = %20.10e\n", i, (double)(n[i])); /* Compute and display <f[1] f[0]> */ show_dot("1|orig", F[1], F[0]); /* Compute and display <f[1] f[3]> */ show_dot("1|solv", F[1], F[3]); QDP_destroy_S(state); QDP_destroy_I(I_seed); destroy_Mvector(U, NELEMS(U)); destroy_Mvector(C, NELEMS(C)); destroy_Dvector(F, NELEMS(F)); status = 0; end: /* shutdown QDP */ printf0("end\n"); QDP_finalize(); return status; }
void setup_layout( void ) { const int *p_machine_dimensions = NULL; int number_machine_dimensions = -1; int i; number_machine_dimensions = QMP_get_logical_number_of_dimensions(); printf( "number of QMP machine dimensions = %i\n", number_machine_dimensions ); p_machine_dimensions = QMP_get_logical_dimensions(); if( p_machine_dimensions == NULL ) { printf( "p_machines_dimensions is NULL\n" ); terminate( 0 ); } for( i = 0; i < number_machine_dimensions; i++ ) { printf( "QMP machine dimension ( %i ) = %i\n", i, p_machine_dimensions[ i ] ); } machine_nx = p_machine_dimensions[ 0 ]; machine_ny = p_machine_dimensions[ 1 ]; machine_nz = p_machine_dimensions[ 2 ]; machine_nt = p_machine_dimensions[ 3 ]; p_machine_dimensions = NULL; machine_dimensions[ XUP ] = machine_nx; machine_dimensions[ YUP ] = machine_ny; machine_dimensions[ ZUP ] = machine_nz; machine_dimensions[ TUP ] = machine_nt; printf( "machine_nx = %i\n", machine_nx ); printf( "machine_ny = %i\n", machine_ny ); printf( "machine_nz = %i\n", machine_nz ); printf( "machine_nt = %i\n", machine_nt ); /* Each lattice dimension must be a mutliple of the corresponding machine dimension. */ if( ( nx % machine_nx ) != 0 ) { printf( "nx = %i is not a multiple of machine_nx = %i\n", nx, machine_nx ); terminate( 0 ); } if( ( ny % machine_ny ) != 0 ) { printf( "ny = %i is not a multiple of machine_ny = %i\n", ny, machine_ny ); terminate( 0 ); } if( ( nz % machine_nz ) != 0 ) { printf( "nz = %i is not a multiple of machine_nz = %i\n", nz, machine_nz ); terminate( 0 ); } if( ( nt % machine_nt ) != 0 ) { printf( "nt = %i is not a multiple of machine_nt = %i\n", nt, machine_nt ); terminate( 0 ); } sub_lattice_nx = nx / machine_nx; sub_lattice_ny = ny / machine_ny; sub_lattice_nz = nz / machine_nz; sub_lattice_nt = nt / machine_nt; sub_lattice_dimensions[ XUP ] = sub_lattice_nx; sub_lattice_dimensions[ YUP ] = sub_lattice_ny; sub_lattice_dimensions[ ZUP ] = sub_lattice_nz; sub_lattice_dimensions[ TUP ] = sub_lattice_nt; printf( "sub_lattice_nx = %i\n", sub_lattice_nx ); printf( "sub_lattice_ny = %i\n", sub_lattice_ny ); printf( "sub_lattice_nz = %i\n", sub_lattice_nz ); printf( "sub_lattice_nt = %i\n", sub_lattice_nt ); sites_on_node = sub_lattice_nx * sub_lattice_ny * sub_lattice_nz * sub_lattice_nt; sub_lattice_volume = sites_on_node; /* The number of sites per node must be even. */ if( mynode() == 0 ) { if( sites_on_node % 2 != 0) { printf( "sites_on_node is not even\n" ); terminate(0); } } even_sites_on_node = sites_on_node / 2; odd_sites_on_node = sites_on_node / 2; if( mynode()==0) printf("ON EACH NODE %d x %d x %d x %d\n",sub_lattice_nx,sub_lattice_ny, sub_lattice_nz,sub_lattice_nt); if( mynode()==0 && sites_on_node%2 != 0) printf("WATCH OUT FOR EVEN/ODD SITES ON NODE BUG!!!\n"); }
int main(int argc, char *argv[]) { int status = 1; int mu, i; struct QOP_CLOVER_State *clover_state; QDP_Int *I_seed; int i_seed; QDP_RandomState *state; QLA_Real plaq; QLA_Real n[NELEMS(F)]; struct QOP_CLOVER_Gauge *c_g; struct QOP_CLOVER_Fermion *c_f[NELEMS(F)]; double kappa; double c_sw; /* start QDP */ QDP_initialize(&argc, &argv); if (argc != 1 + NDIM + 3) { printf0("ERROR: usage: %s Lx ... seed kappa c_sw\n", argv[0]); goto end; } for (mu = 0; mu < NDIM; mu++) { lattice[mu] = atoi(argv[1 + mu]); } i_seed = atoi(argv[1 + NDIM]); kappa = atof(argv[2 + NDIM]); c_sw = atof(argv[3 + NDIM]); /* set lattice size and create layout */ QDP_set_latsize(NDIM, lattice); QDP_create_layout(); primary = QMP_is_primary_node(); self = QMP_get_node_number(); get_vector(network, 1, QMP_get_logical_number_of_dimensions(), QMP_get_logical_dimensions()); get_vector(node, 0, QMP_get_logical_number_of_dimensions(), QMP_get_logical_coordinates()); printf0("network: "); for (i = 0; i < NDIM; i++) printf0(" %d", network[i]); printf0("\n"); printf0("node: "); for (i = 0; i < NDIM; i++) printf0(" %d", node[i]); printf0("\n"); printf0("kappa: %20.15f\n", kappa); printf0("c_sw: %20.15f\n", c_sw); /* allocate the gauge field */ create_Mvector(U, NELEMS(U)); create_Mvector(C, NELEMS(C)); create_Dvector(F, NELEMS(F)); I_seed = QDP_create_I(); QDP_I_eq_funci(I_seed, icoord, QDP_all); state = QDP_create_S(); QDP_S_eq_seed_i_I(state, i_seed, I_seed, QDP_all); for (mu = 0; mu < NELEMS(U); mu++) { QDP_M_eq_gaussian_S(U[mu], state, QDP_all); } for (i = 0; i < NELEMS(F); i++) { QDP_D_eq_gaussian_S(F[i], state, QDP_all); } /* build the clovers */ clover(C, U); /* initialize CLOVER */ if (QOP_CLOVER_init(&clover_state, lattice, network, node, primary, sublattice, NULL)) { printf0("CLOVER_init() failed\n"); goto end; } if (QOP_CLOVER_import_fermion(&c_f[0], clover_state, f_reader, F[0])) { printf0("CLOVER_import_fermion(0) failed\n"); goto end; } if (QOP_CLOVER_import_fermion(&c_f[1], clover_state, f_reader, F[1])) { printf0("CLOVER_import_fermion(1) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[2], clover_state)) { printf0("CLOVER_allocate_fermion(2) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[3], clover_state)) { printf0("CLOVER_allocate_fermion(3) failed\n"); goto end; } if (QOP_CLOVER_import_gauge(&c_g, clover_state, kappa, c_sw, u_reader, c_reader, NULL)) { printf("CLOVER_import_gauge() failed\n"); goto end; } QOP_CLOVER_D_operator(c_f[2], c_g, c_f[0]); QOP_CLOVER_export_fermion(f_writer, F[2], c_f[2]); QOP_CLOVER_D_operator_conjugated(c_f[3], c_g, c_f[1]); QOP_CLOVER_export_fermion(f_writer, F[3], c_f[3]); /* free CLOVER */ QOP_CLOVER_free_gauge(&c_g); for (i = 0; i < NELEMS(c_f); i++) QOP_CLOVER_free_fermion(&c_f[i]); QOP_CLOVER_fini(&clover_state); /* Compute plaquette */ plaq = plaquette(U); /* field norms */ for (i = 0; i < NELEMS(F); i++) QDP_r_eq_norm2_D(&n[i], F[i], QDP_all); /* Display the values */ printf0("plaquette = %g\n", plaq / (QDP_volume() * QDP_Nc * NDIM * (NDIM - 1) / 2 )); for (i = 0; i < NELEMS(F); i++) printf0(" |f|^2 [%d] = %20.10e\n", i, (double)(n[i])); /* Compute and display <f[1] f[2]> */ show_dot("1|D0", F[1], F[2]); /* Compute and display <f[3] f[0]> */ show_dot("X1|0", F[3], F[0]); QDP_destroy_S(state); QDP_destroy_I(I_seed); destroy_Mvector(U, NELEMS(U)); destroy_Mvector(C, NELEMS(C)); destroy_Dvector(F, NELEMS(F)); status = 0; end: /* shutdown QDP */ printf0("end\n"); QDP_finalize(); return status; }