int main(int argc, char **argv) { int c, i, mu, status; int ispin, icol, isc; int n_c = 3; int n_s = 4; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int grid_size[4]; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix, iy, is, it, i3; int sl0, sl1, sl2, sl3, have_source_flag=0; int source_proc_coords[4], lsl0, lsl1, lsl2, lsl3; int check_residuum = 0; unsigned int VOL3, V5; int do_gt = 0; int full_orbit = 0; int smear_source = 0; char filename[200], source_filename[200], source_filename_write[200]; double ratime, retime; double plaq_r=0., plaq_m=0., norm, norm2; double spinor1[24]; double *gauge_qdp[4], *gauge_field_timeslice=NULL, *gauge_field_smeared=NULL; double _1_2_kappa, _2_kappa, phase; FILE *ofs; int mu_trans[4] = {3, 0, 1, 2}; int threadid, nthreads; int timeslice, source_timeslice; char rng_file_in[100], rng_file_out[100]; int *source_momentum=NULL; int source_momentum_class = -1; int source_momentum_no = 0; int source_momentum_runs = 1; int imom; int num_gpu_on_node=0, rank; int source_location_5d_iseven; int convert_sign=0; #ifdef HAVE_QUDA int rotate_gamma_basis = 1; #else int rotate_gamma_basis = 0; #endif omp_lock_t *lck = NULL, gen_lck[1]; int key = 0; /****************************************************************************/ /* for smearing parallel to inversion */ double *smearing_spinor_field[] = {NULL,NULL}; int dummy_flag = 0; /****************************************************************************/ /****************************************************************************/ #if (defined HAVE_QUDA) && (defined MULTI_GPU) int x_face_size, y_face_size, z_face_size, t_face_size, pad_size; #endif /****************************************************************************/ /************************************************/ int qlatt_nclass; int *qlatt_id=NULL, *qlatt_count=NULL, **qlatt_rep=NULL, **qlatt_map=NULL; double **qlatt_list=NULL; /************************************************/ /************************************************/ double boundary_condition_factor; int boundary_condition_factor_set = 0; /************************************************/ //#ifdef MPI // kernelPackT = true; //#endif /*********************************************** * QUDA parameters ***********************************************/ #ifdef HAVE_QUDA QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec_sloppy = QUDA_SINGLE_PRECISION; QudaGaugeParam gauge_param = newQudaGaugeParam(); QudaInvertParam inv_param = newQudaInvertParam(); #endif while ((c = getopt(argc, argv, "soch?vgf:p:b:S:R:")) != -1) { switch (c) { case 'v': g_verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'c': check_residuum = 1; fprintf(stdout, "# [invert_dw_quda] will check residuum again\n"); break; case 'p': n_c = atoi(optarg); fprintf(stdout, "# [invert_dw_quda] will use number of colors = %d\n", n_c); break; case 'o': full_orbit = 1; fprintf(stdout, "# [invert_dw_quda] will invert for full orbit, if source momentum set\n"); case 's': smear_source = 1; fprintf(stdout, "# [invert_dw_quda] will smear the sources if they are read from file\n"); break; case 'b': boundary_condition_factor = atof(optarg); boundary_condition_factor_set = 1; fprintf(stdout, "# [invert_dw_quda] const. boundary condition factor set to %e\n", boundary_condition_factor); break; case 'S': convert_sign = atoi(optarg); fprintf(stdout, "# [invert_dw_quda] using convert sign %d\n", convert_sign); break; case 'R': rotate_gamma_basis = atoi(optarg); fprintf(stdout, "# [invert_dw_quda] rotate gamma basis %d\n", rotate_gamma_basis); break; case 'h': case '?': default: usage(); break; } } // get the time stamp g_the_time = time(NULL); /************************************** * set the default values, read input **************************************/ if(filename_set==0) strcpy(filename, "cvc.input"); if(g_proc_id==0) fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); #ifdef MPI #ifdef HAVE_QUDA grid_size[0] = g_nproc_x; grid_size[1] = g_nproc_y; grid_size[2] = g_nproc_z; grid_size[3] = g_nproc_t; fprintf(stdout, "# [] g_nproc = (%d,%d,%d,%d)\n", g_nproc_x, g_nproc_y, g_nproc_z, g_nproc_t); initCommsQuda(argc, argv, grid_size, 4); #else MPI_Init(&argc, &argv); #endif #endif #if (defined PARALLELTX) || (defined PARALLELTXY) EXIT_WITH_MSG(1, "[] Error, 2-dim./3-dim. MPI-Version not yet implemented"); #endif // some checks on the input data if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stderr, "[invert_dw_quda] Error, T and L's must be set\n"); usage(); } // set number of openmp threads // initialize MPI parameters mpi_init(argc, argv); // the volume of a timeslice VOL3 = LX*LY*LZ; V5 = T*LX*LY*LZ*L5; g_kappa5d = 0.5 / (5. + g_m5); if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] kappa5d = %e\n", g_kappa5d); fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] L5 = %3d\n",\ g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, L5); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "[invert_dw_quda] Error from init_geometry\n"); EXIT(1); } geometry(); if( init_geometry_5d() != 0 ) { fprintf(stderr, "[invert_dw_quda] Error from init_geometry_5d\n"); EXIT(2); } geometry_5d(); /************************************** * initialize the QUDA library **************************************/ if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] initializing quda\n"); #ifdef HAVE_QUDA // cudaGetDeviceCount(&num_gpu_on_node); if(g_gpu_per_node<0) { if(g_cart_id==0) fprintf(stderr, "[] Error, number of GPUs per node not set\n"); EXIT(106); } else { num_gpu_on_node = g_gpu_per_node; } #ifdef MPI rank = comm_rank(); #else rank = 0; #endif g_gpu_device_number = rank % num_gpu_on_node; fprintf(stdout, "# [] process %d/%d uses device %d\n", rank, g_cart_id, g_gpu_device_number); initQuda(g_gpu_device_number); #endif /************************************** * prepare the gauge field **************************************/ // read the gauge field from file alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); if(strcmp( gaugefilename_prefix, "identity")==0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Setting up unit gauge field\n"); for(ix=0;ix<VOLUME; ix++) { for(mu=0;mu<4;mu++) { _cm_eq_id(g_gauge_field+_GGI(ix,mu)); } } } else if(strcmp( gaugefilename_prefix, "random")==0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Setting up random gauge field with seed = %d\n", g_seed); init_rng_state(g_seed, &g_rng_state); random_gauge_field(g_gauge_field, 1.); plaquette(&plaq_m); sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); check_error(write_lime_gauge_field(filename, plaq_m, Nconf, 64), "write_lime_gauge_field", NULL, 12); } else { if(g_gauge_file_format == 0) { // ILDG sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename); status = read_lime_gauge_field_doubleprec(filename); } else if(g_gauge_file_format == 1) { // NERSC sprintf(filename, "%s.%.5d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename); status = read_nersc_gauge_field(g_gauge_field, filename, &plaq_r); //status = read_nersc_gauge_field_3x3(g_gauge_field, filename, &plaq_r); } if(status != 0) { fprintf(stderr, "[invert_dw_quda] Error, could not read gauge field"); EXIT(12); } } #ifdef MPI xchange_gauge(); #endif // measure the plaquette plaquette(&plaq_m); if(g_cart_id==0) fprintf(stdout, "# Measured plaquette value: %25.16e\n", plaq_m); if(g_cart_id==0) fprintf(stdout, "# Read plaquette value : %25.16e\n", plaq_r); #ifndef HAVE_QUDA if(N_Jacobi>0) { #endif // allocate the smeared / qdp ordered gauge field alloc_gauge_field(&gauge_field_smeared, VOLUMEPLUSRAND); for(i=0;i<4;i++) { gauge_qdp[i] = gauge_field_smeared + i*18*VOLUME; } #ifndef HAVE_QUDA } #endif #ifdef HAVE_QUDA // transcribe the gauge field omp_set_num_threads(g_num_threads); #pragma omp parallel for private(ix,iy,mu) for(ix=0;ix<VOLUME;ix++) { iy = g_lexic2eot[ix]; for(mu=0;mu<4;mu++) { _cm_eq_cm(gauge_qdp[mu_trans[mu]]+18*iy, g_gauge_field+_GGI(ix,mu)); } } // multiply timeslice T-1 with factor of -1 (antiperiodic boundary condition) if(g_proc_coords[0]==g_nproc_t-1) { if(!boundary_condition_factor_set) boundary_condition_factor = -1.; fprintf(stdout, "# [] process %d multiplies gauge-field timeslice T_global-1 with boundary condition factor %e\n", g_cart_id, boundary_condition_factor); omp_set_num_threads(g_num_threads); #pragma omp parallel for private(ix,iy) for(ix=0;ix<VOL3;ix++) { iix = (T-1)*VOL3 + ix; iy = g_lexic2eot[iix]; _cm_ti_eq_re(gauge_qdp[mu_trans[0]]+18*iy, -1.); } } // QUDA precision parameters switch(g_cpu_prec) { case 0: cpu_prec = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = half\n"); break; case 1: cpu_prec = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = single\n"); break; case 2: cpu_prec = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] CPU prec = double\n"); break; default: cpu_prec = QUDA_DOUBLE_PRECISION; break; } switch(g_gpu_prec) { case 0: cuda_prec = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = half\n"); break; case 1: cuda_prec = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = single\n"); break; case 2: cuda_prec = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU prec = double\n"); break; default: cuda_prec = QUDA_DOUBLE_PRECISION; break; } switch(g_gpu_prec_sloppy) { case 0: cuda_prec_sloppy = QUDA_HALF_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = half\n"); break; case 1: cuda_prec_sloppy = QUDA_SINGLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = single\n"); break; case 2: cuda_prec_sloppy = QUDA_DOUBLE_PRECISION; if(g_cart_id==0) fprintf(stdout, "# [] GPU sloppy prec = double\n"); break; default: cuda_prec_sloppy = QUDA_SINGLE_PRECISION; break; } // QUDA gauge parameters gauge_param.X[0] = LX; gauge_param.X[1] = LY; gauge_param.X[2] = LZ; gauge_param.X[3] = T; inv_param.Ls = L5; gauge_param.anisotropy = 1.0; gauge_param.type = QUDA_WILSON_LINKS; gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T; gauge_param.cpu_prec = cpu_prec; gauge_param.cuda_prec = cuda_prec; gauge_param.reconstruct = QUDA_RECONSTRUCT_12; gauge_param.cuda_prec_sloppy = cuda_prec_sloppy; gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12; gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO; gauge_param.ga_pad = 0; inv_param.sp_pad = 0; inv_param.cl_pad = 0; // For multi-GPU, ga_pad must be large enough to store a time-slice #ifdef MULTI_GPU x_face_size = inv_param.Ls * gauge_param.X[1]*gauge_param.X[2]*gauge_param.X[3]/2; y_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[2]*gauge_param.X[3]/2; z_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[3]/2; t_face_size = inv_param.Ls * gauge_param.X[0]*gauge_param.X[1]*gauge_param.X[2]/2; pad_size = _MAX(x_face_size, y_face_size); pad_size = _MAX(pad_size, z_face_size); pad_size = _MAX(pad_size, t_face_size); gauge_param.ga_pad = pad_size; if(g_cart_id==0) printf("# [invert_dw_quda] pad_size = %d\n", pad_size); #endif // load the gauge field if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] loading gauge field\n"); loadGaugeQuda((void*)gauge_qdp, &gauge_param); gauge_qdp[0] = NULL; gauge_qdp[1] = NULL; gauge_qdp[2] = NULL; gauge_qdp[3] = NULL; #endif /********************************************* * APE smear the gauge field *********************************************/ if(N_Jacobi>0) { memcpy(gauge_field_smeared, g_gauge_field, 72*VOLUMEPLUSRAND*sizeof(double)); fprintf(stdout, "# [invert_dw_quda] APE smearing gauge field with paramters N_APE=%d, alpha_APE=%e\n", N_ape, alpha_ape); APE_Smearing_Step_threads(gauge_field_smeared, N_ape, alpha_ape); xchange_gauge_field(gauge_field_smeared); } // allocate memory for the spinor fields #ifdef HAVE_QUDA no_fields = 3+2; #else no_fields = 6+2; #endif g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND*L5); smearing_spinor_field[0] = g_spinor_field[no_fields-2]; smearing_spinor_field[1] = g_spinor_field[no_fields-1]; switch(g_source_type) { case 0: case 5: // the source locaton sl0 = g_source_location / (LX_global*LY_global*LZ); sl1 = ( g_source_location % (LX_global*LY_global*LZ) ) / ( LY_global*LZ); sl2 = ( g_source_location % ( LY_global*LZ) ) / ( LZ); sl3 = g_source_location % LZ; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] global sl = (%d, %d, %d, %d)\n", sl0, sl1, sl2, sl3); source_proc_coords[0] = sl0 / T; source_proc_coords[1] = sl1 / LX; source_proc_coords[2] = sl2 / LY; source_proc_coords[3] = sl3 / LZ; #ifdef MPI MPI_Cart_rank(g_cart_grid, source_proc_coords, &g_source_proc_id); #else g_source_proc_id = 0; #endif have_source_flag = g_source_proc_id == g_cart_id; lsl0 = sl0 % T; lsl1 = sl1 % LX; lsl2 = sl2 % LY; lsl3 = sl3 % LZ; if(have_source_flag) { fprintf(stdout, "# [invert_dw_quda] process %d has the source at (%d, %d, %d, %d)\n", g_cart_id, lsl0, lsl1, lsl2, lsl3); } break; case 2: case 3: case 4: // the source timeslice #ifdef MPI source_proc_coords[0] = g_source_timeslice / T; source_proc_coords[1] = 0; source_proc_coords[2] = 0; source_proc_coords[3] = 0; MPI_Cart_rank(g_cart_grid, source_proc_coords, &g_source_proc_id); have_source_flag = ( g_source_proc_id == g_cart_id ); source_timeslice = have_source_flag ? g_source_timeslice % T : -1; #else g_source_proc_id = 0; have_source_flag = 1; source_timeslice = g_source_timeslice; #endif break; } #ifdef HAVE_QUDA /************************************************************* * QUDA inverter parameters *************************************************************/ inv_param.dslash_type = QUDA_DOMAIN_WALL_DSLASH; if(strcmp(g_inverter_type_name, "cg") == 0) { inv_param.inv_type = QUDA_CG_INVERTER; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using cg inverter\n"); } else if(strcmp(g_inverter_type_name, "bicgstab") == 0) { inv_param.inv_type = QUDA_BICGSTAB_INVERTER; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using bicgstab inverter\n"); #ifdef MULTI_GPU } else if(strcmp(g_inverter_type_name, "gcr") == 0) { inv_param.inv_type = QUDA_GCR_INVERTER; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] using gcr inverter\n"); #endif } else { if(g_cart_id==0) fprintf(stderr, "[invert_dw_quda] Error, unrecognized inverter type %s\n", g_inverter_type_name); EXIT(123); } if(inv_param.inv_type == QUDA_CG_INVERTER) { inv_param.solution_type = QUDA_MAT_SOLUTION; inv_param.solve_type = QUDA_NORMEQ_PC_SOLVE; } else if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER) { inv_param.solution_type = QUDA_MAT_SOLUTION; inv_param.solve_type = QUDA_DIRECT_PC_SOLVE; } else { inv_param.solution_type = QUDA_MATPC_SOLUTION; inv_param.solve_type = QUDA_DIRECT_PC_SOLVE; } inv_param.m5 = g_m5; inv_param.kappa = 0.5 / (5. + inv_param.m5); inv_param.mass = g_m0; inv_param.tol = solver_precision; inv_param.maxiter = niter_max; inv_param.reliable_delta = reliable_delta; #ifdef MPI // domain decomposition preconditioner parameters if(inv_param.inv_type == QUDA_GCR_INVERTER) { if(g_cart_id == 0) printf("# [] settup DD parameters\n"); inv_param.gcrNkrylov = 15; inv_param.inv_type_precondition = QUDA_MR_INVERTER; inv_param.tol_precondition = 1e-6; inv_param.maxiter_precondition = 200; inv_param.verbosity_precondition = QUDA_VERBOSE; inv_param.prec_precondition = cuda_prec_sloppy; inv_param.omega = 0.7; } #endif inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; inv_param.dagger = QUDA_DAG_NO; inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION; //;QUDA_MASS_NORMALIZATION; inv_param.cpu_prec = cpu_prec; inv_param.cuda_prec = cuda_prec; inv_param.cuda_prec_sloppy = cuda_prec_sloppy; inv_param.verbosity = QUDA_VERBOSE; inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO; inv_param.dirac_order = QUDA_DIRAC_ORDER; #ifdef MPI inv_param.preserve_dirac = QUDA_PRESERVE_DIRAC_YES; inv_param.prec_precondition = cuda_prec_sloppy; inv_param.gamma_basis = QUDA_DEGRAND_ROSSI_GAMMA_BASIS; inv_param.dirac_tune = QUDA_TUNE_NO; #endif #endif /******************************************* * write initial rng state to file *******************************************/ if( g_source_type==2 && g_coherent_source==2 ) { sprintf(rng_file_out, "%s.0", g_rng_filename); status = init_rng_stat_file (g_seed, rng_file_out); if( status != 0 ) { fprintf(stderr, "[invert_dw_quda] Error, could not write rng status\n"); EXIT(210); } } else if( (g_source_type==2 /*&& g_coherent_source==1*/) || g_source_type==3 || g_source_type==4) { if( init_rng_state(g_seed, &g_rng_state) != 0 ) { fprintf(stderr, "[invert_dw_quda] Error, could initialize rng state\n"); EXIT(211); } } /******************************************* * prepare locks for openmp *******************************************/ nthreads = g_num_threads - 1; lck = (omp_lock_t*)malloc(nthreads * sizeof(omp_lock_t)); if(lck == NULL) { EXIT_WITH_MSG(97, "[invert_dw_quda] Error, could not allocate lck\n"); } // init locks for(i=0;i<nthreads;i++) { omp_init_lock(lck+i); } omp_init_lock(gen_lck); // check the source momenta if(g_source_momentum_set) { source_momentum = (int*)malloc(3*sizeof(int)); if(g_source_momentum[0]<0) g_source_momentum[0] += LX_global; if(g_source_momentum[1]<0) g_source_momentum[1] += LY_global; if(g_source_momentum[2]<0) g_source_momentum[2] += LZ_global; fprintf(stdout, "# [invert_dw_quda] using final source momentum ( %d, %d, %d )\n", g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]); if(full_orbit) { status = make_qcont_orbits_3d_parity_avg( &qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map); if(status != 0) { if(g_cart_id==0) fprintf(stderr, "\n[invert_dw_quda] Error while creating O_3-lists\n"); EXIT(4); } source_momentum_class = qlatt_id[g_ipt[0][g_source_momentum[0]][g_source_momentum[1]][g_source_momentum[2]]]; source_momentum_no = qlatt_count[source_momentum_class]; source_momentum_runs = source_momentum_class==0 ? 1 : source_momentum_no + 1; if(g_cart_id==0) fprintf(stdout, "# [] source momentum belongs to class %d with %d members, which means %d runs\n", source_momentum_class, source_momentum_no, source_momentum_runs); } } if(g_source_type == 5) { if(g_seq_source_momentum_set) { if(g_seq_source_momentum[0]<0) g_seq_source_momentum[0] += LX_global; if(g_seq_source_momentum[1]<0) g_seq_source_momentum[1] += LY_global; if(g_seq_source_momentum[2]<0) g_seq_source_momentum[2] += LZ_global; } else if(g_source_momentum_set) { g_seq_source_momentum[0] = g_source_momentum[0]; g_seq_source_momentum[1] = g_source_momentum[1]; g_seq_source_momentum[2] = g_source_momentum[2]; } fprintf(stdout, "# [invert_dw_quda] using final sequential source momentum ( %d, %d, %d )\n", g_seq_source_momentum[0], g_seq_source_momentum[1], g_seq_source_momentum[2]); } /*********************************************** * loop on spin-color-index ***********************************************/ for(isc=g_source_index[0]; isc<=g_source_index[1]; isc++) // for(isc=g_source_index[0]; isc<=g_source_index[0]; isc++) { ispin = isc / n_c; icol = isc % n_c; for(imom=0; imom<source_momentum_runs; imom++) { /*********************************************** * set source momentum ***********************************************/ if(g_source_momentum_set) { if(imom == 0) { if(full_orbit) { source_momentum[0] = 0; source_momentum[1] = 0; source_momentum[2] = 0; } else { source_momentum[0] = g_source_momentum[0]; source_momentum[1] = g_source_momentum[1]; source_momentum[2] = g_source_momentum[2]; } } else { source_momentum[0] = qlatt_map[source_momentum_class][imom-1] / (LY_global*LZ_global); source_momentum[1] = ( qlatt_map[source_momentum_class][imom-1] % (LY_global*LZ_global) ) / LZ_global; source_momentum[2] = qlatt_map[source_momentum_class][imom-1] % LZ_global; } if(g_cart_id==0) fprintf(stdout, "# [] run no. %d, source momentum (%d, %d, %d)\n", imom, source_momentum[0], source_momentum[1], source_momentum[2]); } /*********************************************** * prepare the souce ***********************************************/ if(g_read_source == 0) { // create source switch(g_source_type) { case 0: // point source if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating point source\n"); for(ix=0;ix<L5*VOLUME;ix++) { _fv_eq_zero(g_spinor_field[0]+ix); } if(have_source_flag) { if(g_source_momentum_set) { phase = 2*M_PI*( source_momentum[0]*sl1/(double)LX_global + source_momentum[1]*sl2/(double)LY_global + source_momentum[2]*sl3/(double)LZ_global ); g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol) ] = cos(phase); g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol)+1] = sin(phase); } else { g_spinor_field[0][_GSI(g_ipt[lsl0][lsl1][lsl2][lsl3]) + 2*(n_c*ispin+icol) ] = 1.; } } if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol); } #ifdef HAVE_QUDA // set matpc_tpye source_location_5d_iseven = ( (g_iseven[g_ipt[lsl0][lsl1][lsl2][lsl3]] && ispin<n_s/2) || (!g_iseven[g_ipt[lsl0][lsl1][lsl2][lsl3]] && ispin>=n_s/2) ) ? 1 : 0; if(source_location_5d_iseven) { inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] matpc type is MATPC_EVEN_EVEN\n"); } else { inv_param.matpc_type = QUDA_MATPC_ODD_ODD; if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] matpc type is MATPC_ODD_ODD\n"); } #endif break; case 2: // timeslice source if(g_coherent_source==1) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating coherent timeslice source\n"); status = prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_state, 1); if(status != 0) { fprintf(stderr, "[invert_dw_quda] Error from prepare source, status was %d\n", status); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 123); MPI_Finalize(); #endif exit(123); } check_error(prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_state, 1), "prepare_coherent_timeslice_source", NULL, 123); timeslice = g_coherent_source_base; } else { if(g_coherent_source==2) { timeslice = (g_coherent_source_base+isc*g_coherent_source_delta)%T_global; fprintf(stdout, "# [invert_dw_quda] Creating timeslice source\n"); check_error(prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, timeslice, VOLUME, g_rng_state, 1), "prepare_timeslice_source", NULL, 123); } else { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source\n"); check_error(prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_source_timeslice, VOLUME, g_rng_state, 1), "prepare_timeslice_source", NULL, 124); timeslice = g_source_timeslice; } } if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix, Nconf, timeslice, isc); } break; case 3: // timeslice sources for one-end trick (spin dilution) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source for one-end-trick\n"); check_error( prepare_timeslice_source_one_end(g_spinor_field[0], gauge_field_smeared, source_timeslice, source_momentum, isc%n_s, g_rng_state, \ ( isc%n_s==(n_s-1) && imom==source_momentum_runs-1 )), "prepare_timeslice_source_one_end", NULL, 125 ); c = N_Jacobi > 0 ? isc%n_s + n_s : isc%n_s; if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c); } break; case 4: // timeslice sources for one-end trick (spin and color dilution ) fprintf(stdout, "# [invert_dw_quda] Creating timeslice source for one-end-trick\n"); check_error(prepare_timeslice_source_one_end_color(g_spinor_field[0], gauge_field_smeared, source_timeslice, source_momentum,\ isc%(n_s*n_c), g_rng_state, ( isc%(n_s*n_c)==(n_s*n_c-1) && imom==source_momentum_runs-1 )), "prepare_timeslice_source_one_end_color", NULL, 126); c = N_Jacobi > 0 ? isc%(n_s*n_c) + (n_s*n_c) : isc%(n_s*n_c); if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c); } break; case 5: if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] preparing sequential point source\n"); check_error( prepare_sequential_point_source (g_spinor_field[0], isc, sl0, g_seq_source_momentum, smear_source, g_spinor_field[1], gauge_field_smeared), "prepare_sequential_point_source", NULL, 33); sprintf(source_filename, "%s.%.4d.t%.2dx%.2d.y%.2d.z%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc, g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]); break; default: fprintf(stderr, "\nError, unrecognized source type\n"); exit(32); break; } } else { // read source switch(g_source_type) { case 0: // point source if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", \ filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc); } fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename); check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115); break; case 2: // timeslice source if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf, g_source_timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix2, Nconf, g_source_timeslice, isc); } fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename); check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115); break; default: check_error(1, "source type", NULL, 104); break; case -1: // timeslice source sprintf(source_filename, "%s", filename_prefix2); fprintf(stdout, "# [invert_dw_quda] reading source from file %s\n", source_filename); check_error(read_lime_spinor(g_spinor_field[0], source_filename, 0), "read_lime_spinor", NULL, 115); break; } } // of if g_read_source if(g_write_source) { check_error(write_propagator(g_spinor_field[0], source_filename, 0, g_propagator_precision), "write_propagator", NULL, 27); } /*********************************************************************************************** * here threads split: ***********************************************************************************************/ if(dummy_flag==0) strcpy(source_filename_write, source_filename); memcpy((void*)(smearing_spinor_field[0]), (void*)(g_spinor_field[0]), 24*VOLUME*sizeof(double)); if(dummy_flag>0) { // copy only if smearing has been done; otherwise do not copy, do not invert if(g_cart_id==0) fprintf(stdout, "# [] copy smearing field -> g field\n"); memcpy((void*)(g_spinor_field[0]), (void*)(smearing_spinor_field[1]), 24*VOLUME*sizeof(double)); } omp_set_num_threads(g_num_threads); #pragma omp parallel private(threadid, _2_kappa, is, ix, iy, iix, ratime, retime) shared(key,g_read_source, smear_source, N_Jacobi, kappa_Jacobi, smearing_spinor_field, g_spinor_field, nthreads, convert_sign, VOLUME, VOL3, T, L5, isc, rotate_gamma_basis, g_cart_id) firstprivate(inv_param, gauge_param, ofs) { threadid = omp_get_thread_num(); if(threadid < nthreads) { fprintf(stdout, "# [] proc%.2d thread%.2d starting source preparation\n", g_cart_id, threadid); // smearing if( ( !g_read_source || (g_read_source && smear_source ) ) && N_Jacobi > 0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] smearing source with N_Jacobi=%d, kappa_Jacobi=%e\n", N_Jacobi, kappa_Jacobi); Jacobi_Smearing_threaded(gauge_field_smeared, smearing_spinor_field[0], smearing_spinor_field[1], kappa_Jacobi, N_Jacobi, threadid, nthreads); } /*********************************************** * create the 5-dim. source field ***********************************************/ if(convert_sign == 0) { spinor_4d_to_5d_threaded(smearing_spinor_field[0], smearing_spinor_field[0], threadid, nthreads); } else if(convert_sign == 1 || convert_sign == -1) { spinor_4d_to_5d_sign_threaded(smearing_spinor_field[0], smearing_spinor_field[0], convert_sign, threadid, nthreads); } for(is=0; is<L5; is++) { for(it=threadid; it<T; it+=nthreads) { memcpy((void*)(g_spinor_field[0]+_GSI(g_ipt_5d[is][it][0][0][0])), (void*)(smearing_spinor_field[0]+_GSI(g_ipt_5d[is][it][0][0][0])), VOL3*24*sizeof(double)); } } // reorder, multiply with g2 for(is=0; is<L5; is++) { for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = (is*T+it)*VOL3 + i3; _fv_eq_zero(smearing_spinor_field[1]+_GSI(ix)); }}} if(rotate_gamma_basis) { for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(0, ix); _fv_eq_gamma_ti_fv(smearing_spinor_field[1]+_GSI(iy), 2, smearing_spinor_field[0]+_GSI(ix)); }} for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(L5-1, ix); _fv_eq_gamma_ti_fv(smearing_spinor_field[1]+_GSI(iy), 2, smearing_spinor_field[0]+_GSI(ix+(L5-1)*VOLUME)); }} } else { for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(0, ix); _fv_eq_fv(smearing_spinor_field[1]+_GSI(iy), smearing_spinor_field[0]+_GSI(ix)); }} for(it=threadid; it<T; it+=nthreads) { for(i3=0; i3<VOL3; i3++) { ix = it * VOL3 + i3; iy = lexic2eot_5d(L5-1, ix); _fv_eq_fv(smearing_spinor_field[1]+_GSI(iy), smearing_spinor_field[0]+_GSI(ix+(L5-1)*VOLUME)); }} } fprintf(stdout, "# [] proc%.2d thread%.2d finished source preparation\n", g_cart_id, threadid); } else if(threadid == g_num_threads-1 && dummy_flag > 0) { // else branch on threadid fprintf(stdout, "# [] proc%.2d thread%.2d starting inversion for dummy_flag = %d\n", g_cart_id, threadid, dummy_flag); /*********************************************** * perform the inversion ***********************************************/ if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] starting inversion\n"); xchange_field_5d(g_spinor_field[0]); memset(g_spinor_field[1], 0, (VOLUME+RAND)*L5*24*sizeof(double)); ratime = CLOCK; #ifdef MPI if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER || inv_param.inv_type == QUDA_GCR_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling invertQuda\n"); invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); } else if(inv_param.inv_type == QUDA_CG_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling testCG\n"); testCG(g_spinor_field[1], g_spinor_field[0], &inv_param); } else { if(g_cart_id==0) fprintf(stderr, "# [invert_dw_quda] unrecognized inverter\n"); } #else invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); #endif retime = CLOCK; if(g_cart_id==0) { fprintf(stdout, "# [invert_dw_quda] QUDA time: %e seconds\n", inv_param.secs); fprintf(stdout, "# [invert_dw_quda] QUDA Gflops: %e\n", inv_param.gflops/inv_param.secs); fprintf(stdout, "# [invert_dw_quda] wall time: %e seconds\n", retime-ratime); fprintf(stdout, "# [invert_dw_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n", inv_param.spinorGiB, gauge_param.gaugeGiB); } } // of if threadid // wait till all threads are here #pragma omp barrier if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) { _2_kappa = 2. * g_kappa5d; for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa ); } } #pragma omp barrier // reorder, multiply with g2 for(is=0;is<L5;is++) { for(ix=threadid; ix<VOLUME; ix+=g_num_threads) { iy = lexic2eot_5d(is, ix); iix = is*VOLUME + ix; _fv_eq_fv(g_spinor_field[0]+_GSI(iix), g_spinor_field[1]+_GSI(iy)); }} #pragma omp barrier if(rotate_gamma_basis) { for(ix=threadid; ix<VOLUME*L5; ix+=g_num_threads) { _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix)); } } else { for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix)); } } if(g_cart_id==0 && threadid==g_num_threads-1) fprintf(stdout, "# [invert_dw_quda] inversion done in %e seconds\n", retime-ratime); #pragma omp single { #ifdef MPI xchange_field_5d(g_spinor_field[1]); #endif /*********************************************** * check residuum ***********************************************/ if(check_residuum && dummy_flag>0) { // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg, // which uses the tmLQCD conventions (same as in contractions) // without explicit boundary conditions #ifdef MPI xchange_field_5d(g_spinor_field[2]); xchange_field_5d(g_spinor_field[1]); #endif memset(g_spinor_field[0], 0, 24*(VOLUME+RAND)*L5*sizeof(double)); //sprintf(filename, "%s.inverted.ascii.%.2d", source_filename, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field_5d(g_spinor_field[1], ofs); //fclose(ofs); Q_DW_Wilson_phi(g_spinor_field[0], g_spinor_field[1]); for(ix=0;ix<VOLUME*L5;ix++) { _fv_mi_eq_fv(g_spinor_field[0]+_GSI(ix), g_spinor_field[2]+_GSI(ix)); } spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5); spinor_scalar_product_re(&norm, g_spinor_field[0], g_spinor_field[0], VOLUME*L5); if(g_cart_id==0) fprintf(stdout, "\n# [invert_dw_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) ); } if(dummy_flag>0) { /*********************************************** * create 4-dim. propagator ***********************************************/ if(convert_sign == 0) { spinor_5d_to_4d(g_spinor_field[1], g_spinor_field[1]); } else if(convert_sign == -1 || convert_sign == +1) { spinor_5d_to_4d_sign(g_spinor_field[1], g_spinor_field[1], convert_sign); } /*********************************************** * write the solution ***********************************************/ sprintf(filename, "%s.inverted", source_filename_write); if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] writing propagator to file %s\n", filename); check_error(write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision), "write_propagator", NULL, 22); //sprintf(filename, "prop.ascii.4d.%.2d.%.2d.%.2d", isc, g_nproc, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field(g_spinor_field[1], ofs); //fclose(ofs); } if(check_residuum) memcpy(g_spinor_field[2], smearing_spinor_field[0], 24*VOLUME*L5*sizeof(double)); } // of omp single } // of omp parallel region if(dummy_flag > 0) strcpy(source_filename_write, source_filename); dummy_flag++; } // of loop on momenta } // of isc #if 0 // last inversion { memcpy(g_spinor_field[0], smearing_spinor_field[1], 24*VOLUME*L5*sizeof(double)); if(g_cart_id==0) fprintf(stdout, "# [] proc%.2d starting last inversion\n", g_cart_id); /*********************************************** * perform the inversion ***********************************************/ if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] starting inversion\n"); xchange_field_5d(g_spinor_field[0]); memset(g_spinor_field[1], 0, (VOLUME+RAND)*L5*24*sizeof(double)); ratime = CLOCK; #ifdef MPI if(inv_param.inv_type == QUDA_BICGSTAB_INVERTER || inv_param.inv_type == QUDA_GCR_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling invertQuda\n"); invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); } else if(inv_param.inv_type == QUDA_CG_INVERTER) { if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] calling testCG\n"); testCG(g_spinor_field[1], g_spinor_field[0], &inv_param); } else { if(g_cart_id==0) fprintf(stderr, "# [invert_dw_quda] unrecognized inverter\n"); } #else invertQuda(g_spinor_field[1], g_spinor_field[0], &inv_param); #endif retime = CLOCK; if(g_cart_id==0) { fprintf(stdout, "# [invert_dw_quda] QUDA time: %e seconds\n", inv_param.secs); fprintf(stdout, "# [invert_dw_quda] QUDA Gflops: %e\n", inv_param.gflops/inv_param.secs); fprintf(stdout, "# [invert_dw_quda] wall time: %e seconds\n", retime-ratime); fprintf(stdout, "# [invert_dw_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n", inv_param.spinorGiB, gauge_param.gaugeGiB); } omp_set_num_threads(g_num_threads); #pragma omp parallel private(threadid,_2_kappa,is,ix,iy,iix) shared(VOLUME,L5,g_kappa,g_spinor_field,g_num_threads) { threadid = omp_get_thread_num(); if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) { _2_kappa = 2. * g_kappa5d; for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa ); } } #pragma omp barrier // reorder, multiply with g2 for(is=0;is<L5;is++) { for(ix=threadid; ix<VOLUME; ix+=g_num_threads) { iy = lexic2eot_5d(is, ix); iix = is*VOLUME + ix; _fv_eq_fv(g_spinor_field[0]+_GSI(iix), g_spinor_field[1]+_GSI(iy)); }} #pragma omp barrier if(rotate_gamma_basis) { for(ix=threadid; ix<VOLUME*L5; ix+=g_num_threads) { _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix)); } } else { for(ix=threadid; ix<VOLUME*L5;ix+=g_num_threads) { _fv_eq_fv(g_spinor_field[1]+_GSI(ix), g_spinor_field[0]+_GSI(ix)); } } } // end of parallel region if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] inversion done in %e seconds\n", retime-ratime); #ifdef MPI xchange_field_5d(g_spinor_field[1]); #endif /*********************************************** * check residuum ***********************************************/ if(check_residuum && dummy_flag>0) { // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg, // which uses the tmLQCD conventions (same as in contractions) // without explicit boundary conditions #ifdef MPI xchange_field_5d(g_spinor_field[2]); #endif memset(g_spinor_field[0], 0, 24*(VOLUME+RAND)*L5*sizeof(double)); //sprintf(filename, "%s.inverted.ascii.%.2d", source_filename, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field_5d(g_spinor_field[1], ofs); //fclose(ofs); Q_DW_Wilson_phi(g_spinor_field[0], g_spinor_field[1]); for(ix=0;ix<VOLUME*L5;ix++) { _fv_mi_eq_fv(g_spinor_field[0]+_GSI(ix), g_spinor_field[2]+_GSI(ix)); } spinor_scalar_product_re(&norm, g_spinor_field[0], g_spinor_field[0], VOLUME*L5); spinor_scalar_product_re(&norm2, g_spinor_field[2], g_spinor_field[2], VOLUME*L5); if(g_cart_id==0) fprintf(stdout, "\n# [invert_dw_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) ); } /*********************************************** * create 4-dim. propagator ***********************************************/ if(convert_sign == 0) { spinor_5d_to_4d(g_spinor_field[1], g_spinor_field[1]); } else if(convert_sign == -1 || convert_sign == +1) { spinor_5d_to_4d_sign(g_spinor_field[1], g_spinor_field[1], convert_sign); } /*********************************************** * write the solution ***********************************************/ sprintf(filename, "%s.inverted", source_filename_write); if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] writing propagator to file %s\n", filename); check_error(write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision), "write_propagator", NULL, 22); //sprintf(filename, "prop.ascii.4d.%.2d.%.2d.%.2d", isc, g_nproc, g_cart_id); //ofs = fopen(filename, "w"); //printf_spinor_field(g_spinor_field[1], ofs); //fclose(ofs); } // of last inversion #endif // of if 0 /*********************************************** * free the allocated memory, finalize ***********************************************/ #ifdef HAVE_QUDA // finalize the QUDA library if(g_cart_id==0) fprintf(stdout, "# [invert_dw_quda] finalizing quda\n"); #ifdef MPI freeGaugeQuda(); #endif endQuda(); #endif if(g_gauge_field != NULL) free(g_gauge_field); if(gauge_field_smeared != NULL) free(gauge_field_smeared); if(no_fields>0) { if(g_spinor_field!=NULL) { for(i=0; i<no_fields; i++) if(g_spinor_field[i]!=NULL) free(g_spinor_field[i]); free(g_spinor_field); } } free_geometry(); if(g_source_momentum_set && full_orbit) { finalize_q_orbits(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_rep); if(qlatt_map != NULL) { free(qlatt_map[0]); free(qlatt_map); } } if(source_momentum != NULL) free(source_momentum); if(lck != NULL) free(lck); #ifdef MPI #ifdef HAVE_QUDA endCommsQuda(); #else MPI_Finalize(); #endif #endif if(g_cart_id==0) { g_the_time = time(NULL); fprintf(stdout, "\n# [invert_dw_quda] %s# [invert_dw_quda] end of run\n", ctime(&g_the_time)); fprintf(stderr, "\n# [invert_dw_quda] %s# [invert_dw_quda] end of run\n", ctime(&g_the_time)); } return(0); }
void source_generation_pion_only(spinor * const P, spinor * const Q, const int t, const int sample, const int nstore, const unsigned int _seed) { int reset = 0, i, x, y, z, is, ic, lt, lx, ly, lz, id=0; int coords[4], seed, r; double rnumber, si=0., co=0.; int rlxd_state[105]; const double sqr2 = 1./sqrt(2.); _Complex double * p = NULL; zero_spinor_field(P,VOLUME/2); zero_spinor_field(Q,VOLUME/2); /* save the ranlxd_state if neccessary */ if(ranlxd_init == 1) { rlxd_get(rlxd_state); reset = 1; } /* Compute the seed */ seed =(int) abs(_seed + sample + t*10*97 + nstore*100*53); rlxd_init(2, seed); lt = t - g_proc_coords[0]*T; coords[0] = t / T; for(x = 0; x < LX*g_nproc_x; x++) { lx = x - g_proc_coords[1]*LX; coords[1] = x / LX; for(y = 0; y < LY*g_nproc_y; y++) { ly = y - g_proc_coords[2]*LY; coords[2] = y / LY; for(z = 0; z < LZ*g_nproc_z; z++) { lz = z - g_proc_coords[3]*LZ; coords[3] = z / LZ; #ifdef TM_USE_MPI MPI_Cart_rank(g_cart_grid, coords, &id); #endif for(is = 0; is < 4; is++) { for(ic = 0; ic < 3; ic++) { ranlxd(&rnumber, 1); if(g_cart_id == id) { r = (int)floor(4.*rnumber); if(r == 0) { si = sqr2; co = sqr2; } else if(r == 1) { si = -sqr2; co = sqr2; } else if(r==2) { si = sqr2; co = -sqr2; } else { si = -sqr2; co = -sqr2; } i = g_lexic2eosub[ g_ipt[lt][lx][ly][lz] ]; if((lt+lx+ly+lz+g_proc_coords[3]*LZ+g_proc_coords[2]*LY + g_proc_coords[0]*T+g_proc_coords[1]*LX)%2 == 0) { p = (_Complex double*)(P + i); } else { p = (_Complex double*)(Q + i); } (*(p+3*is+ic)) = co + si * I; } } } } } } /* reset the ranlxd if neccessary */ if(reset) { rlxd_reset(rlxd_state); } return; }
void mpi_manager_2D::determin_OtherRanks() { // Find neighbouring ranks: MPI_Cart_shift(comm2d, 0, 1, &left , &right); MPI_Cart_shift(comm2d, 1, 1, &front, &back); // Determine ranks of neighbour processes: int shiftcoord[DIM]; int lbound[DIM],ubound[DIM]; for(int dim=0;dim<DIM;dim++){ lbound[dim]=-nproc[dim]; ubound[dim]= nproc[dim]; } Neighbours.resize(lbound,ubound); Neighbours.clear(); for(int dim0=-nproc[0]; dim0<=nproc[0]; dim0++){ shiftcoord[0] = (coords[0]+dim0); if(shiftcoord[0] < 0) shiftcoord[0]+=nproc[0]; for(int dim1=-nproc[1]; dim1<=nproc[1]; dim1++){ shiftcoord[1] = (coords[1]+dim1); if(shiftcoord[1] < 0) shiftcoord[1]+=nproc[1]; if(shiftcoord[0]>=0 && shiftcoord[0]<nproc[0] && shiftcoord[1]>=0 && shiftcoord[1]<nproc[1]) { // Now determine rank at relative shifted position // std::cout << " Cart "; // std::cout << shiftcoord[0] << " "; // std::cout << shiftcoord[1] << " "; // std::cout << rank << " "; // std::cout << nproc[0] << " "; // std::cout << nproc[1] << " "; // std::cout << std::endl; MPI_Cart_rank(comm2d, shiftcoord, &Neighbours(dim0,dim1)); } else { // If outside domain set to error value Neighbours(dim0, dim1) = MPI_PROC_NULL; } } } NeighboursCyclic.resize(lbound,ubound); NeighboursCyclic.clear(); for(int dim0=-nproc[0]; dim0<=nproc[0]; dim0++){ shiftcoord[0] = (coords[0]+dim0)%nproc[0]; if(shiftcoord[0] < 0) shiftcoord[0]+=nproc[0]; for(int dim1=-nproc[1]; dim1<=nproc[1]; dim1++){ shiftcoord[1] = (coords[1]+dim1)%nproc[1]; if(shiftcoord[1] < 0) shiftcoord[1]+=nproc[1]; // Now determine rank at relative shifted position MPI_Cart_rank(comm2d, shiftcoord, &NeighboursCyclic(dim0,dim1)); } } // Now determine absolute position of ranks AllRanks.resize(Index::set(0,0), Index::set(nproc[0]-1,nproc[1]-1)); for(int dim1=0; dim1<nproc[1]; ++dim1) { for(int dim0=0; dim0<nproc[0]; ++dim0) { int coord[2] = {dim0, dim1}; MPI_Cart_rank(comm2d, coord, &AllRanks(dim0, dim1)); } } }
int main(int argc, char **argv) { int c, i, mu, status; int ispin, icol, isc; int n_c = 3; int n_s = 4; int count = 0; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int x0, x1, x2, x3, ix, iix, iy; int sl0, sl1, sl2, sl3, have_source_flag=0; int source_proc_coords[4], lsl0, lsl1, lsl2, lsl3, source_proc_id; int check_residuum = 0; unsigned int VOL3; int do_gt = 0; int full_orbit = 0; char filename[200], source_filename[200]; double ratime, retime; double plaq_r=0., plaq_m=0., norm, norm2; // double spinor1[24], spinor2[24]; double *gauge_qdp[4], *gauge_field_timeslice=NULL, *gauge_field_smeared=NULL; double _1_2_kappa, _2_kappa, phase; FILE *ofs; int mu_trans[4] = {3, 0, 1, 2}; int threadid, nthreads; int timeslice; char rng_file_in[100], rng_file_out[100]; int *source_momentum=NULL; int source_momentum_class = -1; int source_momentum_no = 0; int source_momentum_runs = 1; int imom; /************************************************/ int qlatt_nclass; int *qlatt_id=NULL, *qlatt_count=NULL, **qlatt_rep=NULL, **qlatt_map=NULL; double **qlatt_list=NULL; /************************************************/ /*********************************************** * QUDA parameters ***********************************************/ QudaPrecision cpu_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec = QUDA_DOUBLE_PRECISION; QudaPrecision cuda_prec_sloppy = QUDA_DOUBLE_PRECISION; QudaGaugeParam gauge_param = newQudaGaugeParam(); QudaInvertParam inv_param = newQudaInvertParam(); #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "och?vgf:p:")) != -1) { switch (c) { case 'v': g_verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'c': check_residuum = 1; fprintf(stdout, "# [invert_quda] will check residuum again\n"); break; case 'p': n_c = atoi(optarg); fprintf(stdout, "# [invert_quda] will use number of colors = %d\n", n_c); break; case 'o': full_orbit = 1; fprintf(stdout, "# [invert_quda] will invert for full orbit, if source momentum set\n"); break; case 'h': case '?': default: usage(); break; } } // get the time stamp g_the_time = time(NULL); /************************************** * set the default values, read input **************************************/ if(filename_set==0) strcpy(filename, "cvc.input"); if(g_proc_id==0) fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stderr, "[invert_quda] Error, T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stderr, "[invert_quda] Error, kappa should be > 0.n"); usage(); } // set number of openmp threads #ifdef OPENMP omp_set_num_threads(g_num_threads); #else fprintf(stdout, "[invert_quda_cg] Warning, resetting global number of threads to 1\n"); g_num_threads = 1; #endif /* initialize MPI parameters */ mpi_init(argc, argv); // the volume of a timeslice VOL3 = LX*LY*LZ; fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n",\ g_cart_id, g_cart_id, T, g_cart_id, Tstart); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); /************************************** * initialize the QUDA library **************************************/ fprintf(stdout, "# [invert_quda] initializing quda\n"); initQuda(g_gpu_device_number); /************************************** * prepare the gauge field **************************************/ // read the gauge field from file alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); if(strcmp( gaugefilename_prefix, "identity")==0 ) { if(g_cart_id==0) fprintf(stdout, "# [invert_quda] Setting up unit gauge field\n"); for(ix=0;ix<VOLUME; ix++) { for(mu=0;mu<4;mu++) { _cm_eq_id(g_gauge_field+_GGI(ix,mu)); } } } else { if(g_gauge_file_format == 0) { // ILDG sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename); status = read_lime_gauge_field_doubleprec(filename); } else if(g_gauge_file_format == 1) { // NERSC sprintf(filename, "%s.%.5d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "# Reading gauge field from file %s\n", filename); status = read_nersc_gauge_field(g_gauge_field, filename, &plaq_r); } if(status != 0) { fprintf(stderr, "[invert_quda] Error, could not read gauge field"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 12); MPI_Finalize(); #endif exit(12); } } #ifdef MPI xchange_gauge(); #endif // measure the plaquette plaquette(&plaq_m); if(g_cart_id==0) fprintf(stdout, "# Measured plaquette value: %25.16e\n", plaq_m); if(g_cart_id==0) fprintf(stdout, "# Read plaquette value : %25.16e\n", plaq_r); // allocate the smeared / qdp ordered gauge field alloc_gauge_field(&gauge_field_smeared, VOLUME); for(i=0;i<4;i++) { gauge_qdp[i] = gauge_field_smeared + i*18*VOLUME; } // transcribe the gauge field #ifdef OPENMP omp_set_num_threads(g_num_threads); #pragma omp parallel for private(ix,iy,mu) #endif for(ix=0;ix<VOLUME;ix++) { iy = g_lexic2eot[ix]; for(mu=0;mu<4;mu++) { _cm_eq_cm(gauge_qdp[mu_trans[mu]]+18*iy, g_gauge_field+_GGI(ix,mu)); } } // multiply timeslice T-1 with factor of -1 (antiperiodic boundary condition) #ifdef OPENMP omp_set_num_threads(g_num_threads); #pragma omp parallel for private(ix,iy) #endif for(ix=0;ix<VOL3;ix++) { iix = (T-1)*VOL3 + ix; iy = g_lexic2eot[iix]; _cm_ti_eq_re(gauge_qdp[mu_trans[0]]+18*iy, -1.); } // QUDA gauge parameters gauge_param.X[0] = LX_global; gauge_param.X[1] = LY_global; gauge_param.X[2] = LZ_global; gauge_param.X[3] = T_global; gauge_param.anisotropy = 1.0; gauge_param.type = QUDA_WILSON_LINKS; gauge_param.gauge_order = QUDA_QDP_GAUGE_ORDER; gauge_param.t_boundary = QUDA_ANTI_PERIODIC_T; gauge_param.cpu_prec = cpu_prec; gauge_param.cuda_prec = cuda_prec; gauge_param.reconstruct = QUDA_RECONSTRUCT_12; gauge_param.cuda_prec_sloppy = cuda_prec_sloppy; gauge_param.reconstruct_sloppy = QUDA_RECONSTRUCT_12; gauge_param.gauge_fix = QUDA_GAUGE_FIXED_NO; gauge_param.ga_pad = 0; // load the gauge field fprintf(stdout, "# [invert_quda] loading gauge field\n"); loadGaugeQuda((void*)gauge_qdp, &gauge_param); gauge_qdp[0] = NULL; gauge_qdp[1] = NULL; gauge_qdp[2] = NULL; gauge_qdp[3] = NULL; /********************************************* * APE smear the gauge field *********************************************/ memcpy(gauge_field_smeared, g_gauge_field, 72*VOLUME*sizeof(double)); if(N_ape>0) { fprintf(stdout, "# [invert_quda] APE smearing gauge field with paramters N_APE=%d, alpha_APE=%e\n", N_ape, alpha_ape); #ifdef OPENMP APE_Smearing_Step_threads(gauge_field_smeared, N_ape, alpha_ape); #else for(i=0; i<N_ape; i++) { APE_Smearing_Step(gauge_field_smeared, alpha_ape); } #endif } /* allocate memory for the spinor fields */ no_fields = 3; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); /* the source locaton */ sl0 = g_source_location / (LX_global*LY_global*LZ); sl1 = ( g_source_location % (LX_global*LY_global*LZ) ) / ( LY_global*LZ); sl2 = ( g_source_location % ( LY_global*LZ) ) / ( LZ); sl3 = g_source_location % LZ; if(g_cart_id==0) fprintf(stdout, "# [invert_quda] global sl = (%d, %d, %d, %d)\n", sl0, sl1, sl2, sl3); source_proc_coords[0] = sl0 / T; source_proc_coords[1] = sl1 / LX; source_proc_coords[2] = sl2 / LY; source_proc_coords[3] = sl3 / LZ; #ifdef MPI MPI_Cart_rank(g_cart_grid, source_proc_coords, &source_proc_id); #else source_proc_id = 0; #endif have_source_flag = source_proc_id == g_cart_id; lsl0 = sl0 % T; lsl1 = sl1 % LX; lsl2 = sl2 % LY; lsl3 = sl3 % LZ; if(have_source_flag) { fprintf(stdout, "# [invert_quda] process %d has the source at (%d, %d, %d, %d)\n", g_cart_id, lsl0, lsl1, lsl2, lsl3); } // QUDA inverter parameters inv_param.dslash_type = QUDA_WILSON_DSLASH; // inv_param.inv_type = QUDA_BICGSTAB_INVERTER; inv_param.inv_type = QUDA_CG_INVERTER; inv_param.kappa = g_kappa; inv_param.tol = solver_precision; inv_param.maxiter = niter_max; inv_param.reliable_delta = reliable_delta; inv_param.solution_type = QUDA_MAT_SOLUTION; // inv_param.solve_type = QUDA_DIRECT_PC_SOLVE; inv_param.solve_type = QUDA_NORMEQ_PC_SOLVE; inv_param.matpc_type = QUDA_MATPC_EVEN_EVEN; // QUDA_MATPC_EVEN_EVEN; inv_param.dagger = QUDA_DAG_NO; inv_param.mass_normalization = QUDA_KAPPA_NORMALIZATION; //;QUDA_MASS_NORMALIZATION; inv_param.cpu_prec = cpu_prec; inv_param.cuda_prec = cuda_prec; inv_param.cuda_prec_sloppy = cuda_prec_sloppy; inv_param.preserve_source = QUDA_PRESERVE_SOURCE_NO; inv_param.dirac_order = QUDA_DIRAC_ORDER; inv_param.sp_pad = 0; inv_param.cl_pad = 0; inv_param.verbosity = QUDA_VERBOSE; // write initial rng state to file if(g_source_type==2 && g_coherent_source==2) { sprintf(rng_file_out, "%s.0", g_rng_filename); if( init_rng_stat_file (g_seed, rng_file_out) != 0 ) { fprintf(stderr, "[invert_quda] Error, could not write rng status\n"); exit(210); } } else if(g_source_type==3 || g_source_type==4) { if( init_rng_state(g_seed, &g_rng_state) != 0 ) { fprintf(stderr, "[invert_quda] Error, could initialize rng state\n"); exit(211); } } // check the source momenta if(g_source_momentum_set) { source_momentum = (int*)malloc(3*sizeof(int)); if(g_source_momentum[0]<0) g_source_momentum[0] += LX; if(g_source_momentum[1]<0) g_source_momentum[1] += LY; if(g_source_momentum[2]<0) g_source_momentum[2] += LZ; fprintf(stdout, "# [invert_quda] using final source momentum ( %d, %d, %d )\n", g_source_momentum[0], g_source_momentum[1], g_source_momentum[2]); if(full_orbit) { status = make_qcont_orbits_3d_parity_avg( &qlatt_id, &qlatt_count, &qlatt_list, &qlatt_nclass, &qlatt_rep, &qlatt_map); if(status != 0) { fprintf(stderr, "\n[invert_quda] Error while creating O_3-lists\n"); exit(4); } source_momentum_class = qlatt_id[g_ipt[0][g_source_momentum[0]][g_source_momentum[1]][g_source_momentum[2]]]; source_momentum_no = qlatt_count[source_momentum_class]; source_momentum_runs = source_momentum_class==0 ? 1 : source_momentum_no + 1; fprintf(stdout, "# [] source momentum belongs to class %d with %d members, which means %d runs\n", source_momentum_class, source_momentum_no, source_momentum_runs); } } /*********************************************** * loop on spin-color-index ***********************************************/ for(isc=g_source_index[0]; isc<=g_source_index[1]; isc++) { ispin = isc / n_c; icol = isc % n_c; for(imom=0; imom<source_momentum_runs; imom++) { /*********************************************** * set source momentum ***********************************************/ if(g_source_momentum_set) { if(imom == 0) { if(full_orbit) { source_momentum[0] = 0; source_momentum[1] = 0; source_momentum[2] = 0; } else { source_momentum[0] = g_source_momentum[0]; source_momentum[1] = g_source_momentum[1]; source_momentum[2] = g_source_momentum[2]; } } else { source_momentum[0] = qlatt_map[source_momentum_class][imom-1] / (LY*LZ); source_momentum[1] = ( qlatt_map[source_momentum_class][imom-1] % (LY*LZ) ) / LZ; source_momentum[2] = qlatt_map[source_momentum_class][imom-1] % LZ; } fprintf(stdout, "# [] run no. %d, source momentum (%d, %d, %d)\n", imom, source_momentum[0], source_momentum[1], source_momentum[2]); } /*********************************************** * prepare the souce ***********************************************/ if(g_read_source == 0) { // create source switch(g_source_type) { case 0: // point source fprintf(stdout, "# [invert_quda] Creating point source\n"); for(ix=0;ix<24*VOLUME;ix++) g_spinor_field[0][ix] = 0.; if(have_source_flag) { if(g_source_momentum_set) { phase = 2*M_PI*( source_momentum[0]*lsl1/(double)LX + source_momentum[1]*lsl2/(double)LY + source_momentum[2]*lsl3/(double)LZ ); g_spinor_field[0][_GSI(g_source_location) + 2*(n_c*ispin+icol) ] = cos(phase); g_spinor_field[0][_GSI(g_source_location) + 2*(n_c*ispin+icol)+1] = sin(phase); } else { g_spinor_field[0][_GSI(g_source_location) + 2*(n_c*ispin+icol) ] = 1.; } } if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix, Nconf, sl0, sl1, sl2, sl3, n_c*ispin+icol); } break; case 2: // timeslice source if(g_coherent_source==1) { fprintf(stdout, "# [invert_quda] Creating coherent timeslice source\n"); status = prepare_coherent_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_coherent_source_base, g_coherent_source_delta, VOLUME, g_rng_filename, NULL); if(status != 0) { fprintf(stderr, "[invert_quda] Error from prepare source, status was %d\n", status); exit(123); } timeslice = g_coherent_source_base; } else { if(g_coherent_source==2) { strcpy(rng_file_in, rng_file_out); if(isc == g_source_index[1]) { strcpy(rng_file_out, g_rng_filename); } else { sprintf(rng_file_out, "%s.%d", g_rng_filename, isc+1); } timeslice = (g_coherent_source_base+isc*g_coherent_source_delta)%T_global; fprintf(stdout, "# [invert_quda] Creating timeslice source\n"); status = prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, timeslice, VOLUME, rng_file_in, rng_file_out); if(status != 0) { fprintf(stderr, "[invert_quda] Error from prepare source, status was %d\n", status); exit(123); } } else { fprintf(stdout, "# [invert_quda] Creating timeslice source\n"); status = prepare_timeslice_source(g_spinor_field[0], gauge_field_smeared, g_source_timeslice, VOLUME, g_rng_filename, g_rng_filename); if(status != 0) { fprintf(stderr, "[invert_quda] Error from prepare source, status was %d\n", status); exit(124); } timeslice = g_source_timeslice; } } if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix, Nconf, timeslice, isc); } break; case 3: // timeslice sources for one-end trick (spin dilution) fprintf(stdout, "# [invert_quda] Creating timeslice source for one-end-trick\n"); status = prepare_timeslice_source_one_end(g_spinor_field[0], gauge_field_smeared, g_source_timeslice, source_momentum, isc%n_s, g_rng_state, \ ( isc%n_s==(n_s-1) && imom==source_momentum_runs-1 ) ); if(status != 0) { fprintf(stderr, "[invert_quda] Error from prepare source, status was %d\n", status); exit(125); } c = N_Jacobi > 0 ? isc%n_s + n_s : isc%n_s; if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c); } break; case 4: // timeslice sources for one-end trick (spin and color dilution ) fprintf(stdout, "# [invert_quda] Creating timeslice source for one-end-trick\n"); status = prepare_timeslice_source_one_end_color(g_spinor_field[0], gauge_field_smeared, g_source_timeslice, source_momentum,\ isc%(n_s*n_c), g_rng_state, ( isc%(n_s*n_c)==(n_s*n_c-1) && imom==source_momentum_runs-1 ) ); if(status != 0) { fprintf(stderr, "[invert_quda] Error from prepare source, status was %d\n", status); exit(126); } c = N_Jacobi > 0 ? isc%(n_s*n_c) + (n_s*n_c) : isc%(n_s*n_c); if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.2d.qx%.2dqy%.2dqz%.2d", filename_prefix, Nconf, g_source_timeslice, c, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.2d", filename_prefix, Nconf, g_source_timeslice, c); } break; default: fprintf(stderr, "\nError, unrecognized source type\n"); exit(32); break; } } else { // read source switch(g_source_type) { case 0: // point source if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d.qx%.2dqy%.2dqz%.2d", \ filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.t%.2dx%.2dy%.2dz%.2d.%.2d", filename_prefix2, Nconf, sl0, sl1, sl2, sl3, isc); } fprintf(stdout, "# [invert_quda] reading source from file %s\n", source_filename); status = read_lime_spinor(g_spinor_field[0], source_filename, 0); if(status != 0) { fprintf(stderr, "# [invert_quda] Errro, could not read source from file %s\n", source_filename); exit(115); } break; case 2: // timeslice source if(g_source_momentum_set) { sprintf(source_filename, "%s.%.4d.%.2d.%.5d.qx%.2dqy%.2dqz%.2d", filename_prefix2, Nconf, g_source_timeslice, isc, source_momentum[0], source_momentum[1], source_momentum[2]); } else { sprintf(source_filename, "%s.%.4d.%.2d.%.5d", filename_prefix2, Nconf, g_source_timeslice, isc); } fprintf(stdout, "# [invert_quda] reading source from file %s\n", source_filename); status = read_lime_spinor(g_spinor_field[0], source_filename, 0); if(status != 0) { fprintf(stderr, "# [invert_quda] Errro, could not read source from file %s\n", source_filename); exit(115); } break; default: fprintf(stderr, "[] Error, unrecognized source type for reading\n"); exit(104); break; } } // of if g_read_source //sprintf(filename, "%s.ascii", source_filename); //ofs = fopen(filename, "w"); //printf_spinor_field(g_spinor_field[0], ofs); //fclose(ofs); if(g_write_source) { status = write_propagator(g_spinor_field[0], source_filename, 0, g_propagator_precision); if(status != 0) { fprintf(stderr, "Error from write_propagator, status was %d\n", status); exit(27); } } // smearing if(N_Jacobi > 0) { #ifdef OPENMP Jacobi_Smearing_Step_one_threads(gauge_field_smeared, g_spinor_field[0], g_spinor_field[1], N_Jacobi, kappa_Jacobi); #else for(c=0; c<N_Jacobi; c++) { Jacobi_Smearing_Step_one(gauge_field_smeared, g_spinor_field[0], g_spinor_field[1], kappa_Jacobi); } #endif } // multiply with g2 for(ix=0;ix<VOLUME;ix++) { _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[0]+_GSI(ix)); } // transcribe the spinor field to even-odd ordering with coordinates (x,y,z,t) for(ix=0;ix<VOLUME;ix++) { iy = g_lexic2eot[ix]; _fv_eq_fv(g_spinor_field[2]+_GSI(iy), g_spinor_field[1]+_GSI(ix)); } /*********************************************** * perform the inversion ***********************************************/ fprintf(stdout, "# [invert_quda] starting inversion\n"); ratime = (double)clock() / CLOCKS_PER_SEC; for(ix=0;ix<VOLUME;ix++) { _fv_eq_zero(g_spinor_field[1]+_GSI(ix) ); } invertQuda(g_spinor_field[1], g_spinor_field[2], &inv_param); retime = (double)clock() / CLOCKS_PER_SEC; fprintf(stdout, "# [invert_quda] inversion done in %e seconds\n", retime-ratime); fprintf(stdout, "# [invert_quda] Device memory used:\n\tSpinor: %f GiB\n\tGauge: %f GiB\n", inv_param.spinorGiB, gauge_param.gaugeGiB); if(inv_param.mass_normalization == QUDA_KAPPA_NORMALIZATION) { _2_kappa = 2. * g_kappa; for(ix=0;ix<VOLUME;ix++) { _fv_ti_eq_re(g_spinor_field[1]+_GSI(ix), _2_kappa ); } } // transcribe the spinor field to lexicographical order with (t,x,y,z) for(ix=0;ix<VOLUME;ix++) { iy = g_lexic2eot[ix]; _fv_eq_fv(g_spinor_field[2]+_GSI(ix), g_spinor_field[1]+_GSI(iy)); } // multiply with g2 for(ix=0;ix<VOLUME;ix++) { _fv_eq_gamma_ti_fv(g_spinor_field[1]+_GSI(ix), 2, g_spinor_field[2]+_GSI(ix)); } /*********************************************** * check residuum ***********************************************/ if(check_residuum) { // apply the Wilson Dirac operator in the gamma-basis defined in cvc_linalg, // which uses the tmLQCD conventions (same as in contractions) // without explicit boundary conditions Q_Wilson_phi(g_spinor_field[2], g_spinor_field[1]); for(ix=0;ix<VOLUME;ix++) { _fv_mi_eq_fv(g_spinor_field[2]+_GSI(ix), g_spinor_field[0]+_GSI(ix)); } spinor_scalar_product_re(&norm, g_spinor_field[2], g_spinor_field[2], VOLUME); spinor_scalar_product_re(&norm2, g_spinor_field[0], g_spinor_field[0], VOLUME); fprintf(stdout, "\n# [invert_quda] absolut residuum squared: %e; relative residuum %e\n", norm, sqrt(norm/norm2) ); } /*********************************************** * write the solution ***********************************************/ sprintf(filename, "%s.inverted", source_filename); fprintf(stdout, "# [invert_quda] writing propagator to file %s\n", filename); status = write_propagator(g_spinor_field[1], filename, 0, g_propagator_precision); if(status != 0) { fprintf(stderr, "Error from write_propagator, status was %d\n", status); exit(22); } } // of loop on momenta } // of isc /*********************************************** * free the allocated memory, finalize ***********************************************/ // finalize the QUDA library fprintf(stdout, "# [invert_quda] finalizing quda\n"); endQuda(); free(g_gauge_field); free(gauge_field_smeared); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); if(g_source_momentum_set && full_orbit) { finalize_q_orbits(&qlatt_id, &qlatt_count, &qlatt_list, &qlatt_rep); if(qlatt_map != NULL) { free(qlatt_map[0]); free(qlatt_map); } } if(source_momentum != NULL) free(source_momentum); #ifdef MPI MPI_Finalize(); #endif if(g_cart_id==0) { g_the_time = time(NULL); fprintf(stdout, "\n# [invert_quda] %s# [invert_quda] end of run\n", ctime(&g_the_time)); fprintf(stderr, "\n# [invert_quda] %s# [invert_quda] end of run\n", ctime(&g_the_time)); } return(0); }
void mpi_manager_3D::setup(NumArray<int> &nproc, NumArray<int> &mx) { // Save number of processors in each dimension for(int dir=0; dir<DIM; ++dir) { this->nproc[dir] = nproc[dir]; } // Determine the rank of the current task MPI_Comm_rank(MPI_COMM_WORLD, &rank); // Get number of ranks from MPI int ntasks; MPI_Comm_size(MPI_COMM_WORLD, &ntasks); this->ntasks = ntasks; // Set the distribution of processes: if(ntasks != nproc[0]*nproc[1]*nproc[2]){ std::cerr << " Wrong number of processes " << std::endl; std::cout << ntasks << " " << nproc[0]*nproc[1]*nproc[2] << std::endl; Finalise(); } if(rank==0) { std::cout << " Number of tasks: " << ntasks << std::endl; } // Check if grid can be subdevided as desired for(int dir = 0; dir < DIM; ++dir) { if(mx[dir] < nproc[dir] && nproc[dir] > 1) { if(rank == 0) { std::cerr << " Wrong grid topology for dimension "; std::cerr << dir << std::endl; std::cerr << " mx[" << dir << "]:" << mx[dir] << std::endl; std::cerr << " nproc[" << dir << "]:" << nproc[dir] << std::endl; } Finalise(); } } // Check if grid is a power of 2: double eps = 1.e-12; for(int dir = 0; dir < DIM; ++dir) { double exponent = log(mx[dir])/log(2.); int i_exponent = static_cast<int>(exponent+eps); if(exponent - i_exponent > 2.*eps) { if(rank == 0) { std::cerr << " Error: grid must be of the form mx = 2^n "; std::cerr << std::endl; std::cerr << " Exiting " << std::endl; } Finalise(); } } // Grid is not periodic int periods[3] = {false, false, false}; int reorder = false; // If all is okay: Create new communicator "comm3d" MPI_Cart_create(MPI_COMM_WORLD, DIM, nproc, periods, reorder, &comm3d); // Retrieve the cartesian topology if (rank == 0) { int TopoType; std::cout << " Cart topology: "; MPI_Topo_test(comm3d, &TopoType); switch (TopoType) { case MPI_UNDEFINED : std::cout << " MPI_UNDEFINED " << std::endl; break; case MPI_GRAPH : std::cout << "MPI_GRAPH" << std::endl; break; case MPI_CART : std::cout << "MPI_CART" << std::endl; break; } } // Determine rank again for cartesian communicator -> overwrite rank MPI_Comm_rank(comm3d, &rank); // std::cout << " my rank: " << rank << std::endl; // Translate rank to coordinates MPI_Cart_coords(comm3d, rank, DIM, coords); // // Backwards translation // int TranslateRank; // MPI_Cart_rank(comm3d, coords, &TranslateRank); // Find neighbouring ranks // Syntax: comm3d, shift direction, displacement, source, destination MPI_Cart_shift(comm3d, 0, 1, &left , &right); MPI_Cart_shift(comm3d, 1, 1, &front, &back); MPI_Cart_shift(comm3d, 2, 1, &bottom, &top); // std::cout << " My rank " << rank << " " << left << " " << right << " " << front << " " << back << " " << bottom << " " << top << std::endl; if(rank==0) { std::cout << " nearby " << right << " " << back << " " << top << std::endl; } // Determine ranks of neighbour processes: int shiftcoord[DIM]; int lbound[DIM],ubound[DIM]; for(int dim=0;dim<DIM;dim++){ lbound[dim]=-1; ubound[dim]= 1; } Neighbour.resize(lbound,ubound); Neighbour.clear(); for(int dim0=-1; dim0<=1; dim0++){ shiftcoord[0] = (coords[0]+dim0)%nproc[0]; if(shiftcoord[0] < 0) shiftcoord[0]+=nproc[0]; for(int dim1=-1; dim1<=1; dim1++){ shiftcoord[1] = (coords[1]+dim1)%nproc[1]; if(shiftcoord[1] < 0) shiftcoord[1]+=nproc[1]; for(int dim2=-1; dim2<=1; dim2++){ shiftcoord[2] = (coords[2]+dim2)%nproc[2]; if(shiftcoord[2] < 0) shiftcoord[2]+=nproc[2]; MPI_Cart_rank(comm3d, shiftcoord,&Neighbour(dim0,dim1,dim2)); } } } // if(rank==1) { // for(int dim0=-1; dim0<=1; dim0++){ // for(int dim1=-1; dim1<=1; dim1++){ // for(int dim2=-1; dim2<=1; dim2++){ // std::cout << " neighbour " << dim0 << " " << dim1 << " "; // std::cout << dim2 << " " << Neighbour(dim0, dim1, dim2); // std::cout << std::endl; // } // } // } // } // Determine absolute position of any rank: AllRanks.resize(Index::set(0,0,0), Index::set(nproc[0]-1,nproc[1]-1,nproc[2]-1)); for(int dim0=0; dim0<nproc[0]; ++dim0) { for(int dim1=0; dim1<nproc[1]; ++dim1) { for(int dim2=0; dim2<nproc[2]; ++dim2) { int coord[3] = {dim0, dim1, dim2}; MPI_Cart_rank(comm3d, coord, &AllRanks(dim0, dim1, dim2)); } } } // if(rank==2) { // std::cout << " Neigh: " << rank << " "<<Neighbour(0,0,0) << " " << AllRanks(2,0,0) << std::endl; // } // Now make additional mpi groups relating to planes: int count(0); int num_xy = nproc[0]*nproc[1]; int num_xz = nproc[0]*nproc[2]; int num_yz = nproc[1]*nproc[2]; NumMatrix<int,1> x_ranks[nproc[0]]; NumMatrix<int,1> y_ranks[nproc[1]]; NumMatrix<int,1> z_ranks[nproc[2]]; // Walk trough z-axis -- xy plane for(int irz=0; irz<nproc[2]; irz++) { count = 0; z_ranks[irz].resize(Index::set(0), Index::set(num_xy)); for(int irx=0; irx<nproc[0]; irx++) { for(int iry=0; iry<nproc[1]; iry++) { z_ranks[irz](count) = AllRanks(irx,iry,irz); count++; } } } // Walk trough y-axis -- xz plane for(int iry=0; iry<nproc[1]; iry++) { count = 0; y_ranks[iry].resize(Index::set(0), Index::set(num_xz)); for(int irx=0; irx<nproc[0]; irx++) { for(int irz=0; irz<nproc[2]; irz++) { y_ranks[iry](count) = AllRanks(irx,iry,irz); count++; } } } // Walk trough x-axis -- yz plane for(int irx=0; irx<nproc[0]; irx++) { count = 0; x_ranks[irx].resize(Index::set(0), Index::set(num_yz)); for(int iry=0; iry<nproc[1]; iry++) { for(int irz=0; irz<nproc[2]; irz++) { x_ranks[irx](count) = AllRanks(irx,iry,irz); count++; } } } // Build local communicator: MPI_Group group_all, group_constz, group_consty, group_constx; // Get standard group handle: MPI_Comm_group(comm3d, &group_all); // Devide tasks into groups based on z-position MPI_Group_incl(group_all, num_xy, z_ranks[coords[2]], &group_constz); // Devide tasks into groups based on z-position MPI_Group_incl(group_all, num_xz, y_ranks[coords[1]], &group_consty); // Devide tasks into groups based on x-position MPI_Group_incl(group_all, num_yz, x_ranks[coords[0]], &group_constx); // // Make corresponding communicators: // MPI_Comm_create(comm3d, group_constz, &comm_plane_xy); // const z // MPI_Comm_create(comm3d, group_consty, &comm_plane_xz); // const x // MPI_Comm_create(comm3d, group_constx, &comm_plane_yz); // const x // // Get corresponding rank // MPI_Group_rank (group_constz, &rank_plane_xy); // MPI_Group_rank (group_consty, &rank_plane_xz); // MPI_Group_rank (group_constx, &rank_plane_yz); int remain_dims[3]; // x-y plane: remain_dims[0] = 1; remain_dims[1] = 1; remain_dims[2] = 0; MPI_Cart_sub(comm3d, remain_dims, &comm_plane_xy); MPI_Comm_rank(comm_plane_xy, &rank_plane_xy); // x-z plane remain_dims[0] = 1; remain_dims[1] = 0; remain_dims[2] = 1; MPI_Cart_sub(comm3d, remain_dims, &comm_plane_xz); MPI_Comm_rank(comm_plane_xz, &rank_plane_xz); // y-z plane remain_dims[0] = 0; remain_dims[1] = 1; remain_dims[2] = 1; MPI_Cart_sub(comm3d, remain_dims, &comm_plane_yz); MPI_Comm_rank(comm_plane_yz, &rank_plane_yz); }
/** * accumulates pieces of the spinor field on nodes with index 0 in the dimensions given in which * the collected data is returned */ void spinor_fft_reduce_2d(spinor *localSpinorField,int *collectionRank,spinor*** field_collection,spinor **membuff){ /* this implementation is intended for four dimensional parallelisation */ #if (defined PARALLELXYZT && defined MPI && defined HAVE_FFTW) int sendRecvCoord[4]; int i; int dims[]={g_nproc_t,g_nproc_x,g_nproc_y,g_nproc_z}; /* logfile variables */ char *logFilePrefix="Process"; char logFileName[512]; FILE *logFile; const int MSG_LOCALDATA = 457; MPI_Status ierr; MPI_Datatype mpi_local_spinor; const int which[]={0,1}; (*field_collection)=NULL; (*membuff)=NULL; /* int result; */ sprintf(logFileName,"./%s_%02d.log",logFilePrefix,g_cart_id); logFile=fopen(logFileName,"a"); MPI_Type_contiguous(VOLUME, field_point, &mpi_local_spinor); MPI_Type_commit(&mpi_local_spinor); for(i=0;i<4;i++) sendRecvCoord[i]=g_proc_coords[i]; if( g_proc_coords[which[0]] == 0 && g_proc_coords[which[1]] == 0 ){ /* i am one of the nodes where data is accumulated */ spinor **accu_field; spinor **fft_field; spinor *memory_buffer_accu_field; spinor *memory_buffer_fft_field; int REDUCTIONVOLUME=1; int recvRank; MPI_Request *requests; MPI_Status *status; int request_count=0; int num_requests; fftw_plan local_2d_fft_forward; *collectionRank=TRUE; /* calculate the number of reduced 2d volume accumulated in this node */ /* number of spinor fields in local units */ REDUCTIONVOLUME*=dims[which[0]]*dims[which[1]]; /* number of receive messages */ num_requests=REDUCTIONVOLUME-1; /* reserve space for receive messages */ requests=(MPI_Request*)malloc(sizeof(MPI_Request)*num_requests); status=(MPI_Status*)malloc(sizeof(MPI_Status)*num_requests); fprintf(logFile,"reduction volume = %d\n",REDUCTIONVOLUME); /* allocate space for spinor field collection */ allocate_spinor_field_array(&accu_field,&memory_buffer_accu_field,VOLUME,REDUCTIONVOLUME); allocate_spinor_field_array(&fft_field,&memory_buffer_fft_field,VOLUME,REDUCTIONVOLUME); /* receive from certain nodes pieces of the spinor field */ for(sendRecvCoord[which[0]] = 0 ; sendRecvCoord[which[0]]< dims[which[0]] ; sendRecvCoord[which[0]]++){ for(sendRecvCoord[which[1]] = 0 ; sendRecvCoord[which[1]]< dims[which[1]] ; sendRecvCoord[which[1]]++){ if( sendRecvCoord[which[0]] != 0 || sendRecvCoord[which[1]] != 0){ MPI_Cart_rank(g_cart_grid,sendRecvCoord,&recvRank); MPI_Irecv(accu_field[sendRecvCoord[which[0]]*dims[which[1]]+sendRecvCoord[which[1]] ] /* buffer */, 1, /* how may */ mpi_local_spinor, /* mpi data type */ recvRank, /* from whom i get it */ MSG_LOCALDATA, /* msg id */ g_cart_grid, /* communicator , status */ requests+request_count); ++request_count; } } } /* wait until all request finished */ MPI_Waitall(num_requests, requests, status); assign(accu_field[0],localSpinorField,VOLUME); /* transpose in xp-t space */ spinor_fft_transpose_xp_t(fft_field[0],accu_field[0],dims[0],dims[1],TRUE,1.); /* create fftw plan */ local_2d_fft_forward=spinor_fftw_plan2d(fft_field[0],accu_field[0],T*dims[0],LX*dims[1],LY*LZ,1,FFTW_ESTIMATE); fftw_execute(local_2d_fft_forward); fftw_destroy_plan(local_2d_fft_forward); /* assign(accu_field[0],fft_field[0],VOLUME*REDUCTIONVOLUME); */ free_spinor_field_array(&memory_buffer_fft_field); memory_buffer_fft_field=NULL; /* free_spinor_field_array(&memory_buffer_accu_field); memory_buffer_accu_field=NULL; */ (*field_collection)=accu_field; (*membuff)=memory_buffer_accu_field; free(requests); requests = NULL; free(status); status=NULL; } else { int sendRank; MPI_Request request; MPI_Status status; *collectionRank=FALSE; /* coordinates of the "root" */ sendRecvCoord[which[0]]=0; sendRecvCoord[which[1]]=0; MPI_Cart_rank(g_cart_grid,sendRecvCoord,&sendRank); MPI_Isend(localSpinorField,1,mpi_local_spinor,sendRank,MSG_LOCALDATA,g_cart_grid,&request); MPI_Wait(&request,&status); } MPI_Type_free(&mpi_local_spinor); fclose(logFile); #else if(g_proc_id==0) fprintf(stderr,"Error: Please choose FOUR dimensional parallelization!!!\n"); #endif }
int main( int argc, char **argv ) { int rank, size, i; int errors=0; int dims[NUM_DIMS]; int periods[NUM_DIMS]; int coords[NUM_DIMS]; int new_coords[NUM_DIMS]; int reorder = 0; MPI_Comm comm_temp, comm_cart, new_comm; int topo_status; int ndims; int new_rank; int remain_dims[NUM_DIMS]; int newnewrank; MPI_Init( &argc, &argv ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); MPI_Comm_size( MPI_COMM_WORLD, &size ); /* Clear dims array and get dims for topology */ for(i=0;i<NUM_DIMS;i++) { dims[i] = 0; periods[i] = 0; } MPI_Dims_create ( size, NUM_DIMS, dims ); /* Make a new communicator with a topology */ MPI_Cart_create ( MPI_COMM_WORLD, 2, dims, periods, reorder, &comm_temp ); MPI_Comm_dup ( comm_temp, &comm_cart ); /* Determine the status of the new communicator */ MPI_Topo_test ( comm_cart, &topo_status ); if (topo_status != MPI_CART) errors++; /* How many dims do we have? */ MPI_Cartdim_get( comm_cart, &ndims ); if ( ndims != NUM_DIMS ) errors++; /* Get the topology, does it agree with what we put in? */ for(i=0;i<NUM_DIMS;i++) { dims[i] = 0; periods[i] = 0; } MPI_Cart_get ( comm_cart, NUM_DIMS, dims, periods, coords ); /* Check that the coordinates are correct */ #if NUM_DIMS == 2 if (rank != coords[1] + coords[0] * dims[1]) { errors++; fprintf( stderr, "Did not get expected coordinate (row major required by MPI standard 6.2)\n" ); } #endif /* Does the mapping from coords to rank work? */ MPI_Cart_rank ( comm_cart, coords, &new_rank ); if ( new_rank != rank ) errors++; /* Does the mapping from rank to coords work */ MPI_Cart_coords ( comm_cart, rank, NUM_DIMS, new_coords ); for (i=0;i<NUM_DIMS;i++) if ( coords[i] != new_coords[i] ) errors++; /* Let's shift in each dimension and see how it works! */ /* Because it's late and I'm tired, I'm not making this */ /* automatically test itself. */ for (i=0;i<NUM_DIMS;i++) { int source, dest; MPI_Cart_shift(comm_cart, i, 1, &source, &dest); #ifdef VERBOSE printf ("[%d] Shifting %d in the %d dimension\n",rank,1,i); printf ("[%d] source = %d dest = %d\n",rank,source,dest); #endif } /* Subdivide */ remain_dims[0] = 0; for (i=1; i<NUM_DIMS; i++) remain_dims[i] = 1; MPI_Cart_sub ( comm_cart, remain_dims, &new_comm ); /* Determine the status of the new communicator */ MPI_Topo_test ( new_comm, &topo_status ); if (topo_status != MPI_CART) errors++; /* How many dims do we have? */ MPI_Cartdim_get( new_comm, &ndims ); if ( ndims != NUM_DIMS-1 ) errors++; /* Get the topology, does it agree with what we put in? */ for(i=0;i<NUM_DIMS-1;i++) { dims[i] = 0; periods[i] = 0; } MPI_Cart_get ( new_comm, ndims, dims, periods, coords ); /* Does the mapping from coords to rank work? */ MPI_Comm_rank ( new_comm, &newnewrank ); MPI_Cart_rank ( new_comm, coords, &new_rank ); if ( new_rank != newnewrank ) errors++; /* Does the mapping from rank to coords work */ MPI_Cart_coords ( new_comm, new_rank, NUM_DIMS -1, new_coords ); for (i=0;i<NUM_DIMS-1;i++) if ( coords[i] != new_coords[i] ) errors++; /* We're at the end */ MPI_Comm_free( &new_comm ); MPI_Comm_free( &comm_temp ); MPI_Comm_free( &comm_cart ); Test_Waitforall( ); if (errors) printf( "[%d] done with %d ERRORS!\n", rank,errors ); MPI_Finalize(); return 0; }
/* Check that the MPI implementation properly handles zero-dimensional Cartesian communicators - the original standard implies that these should be consistent with higher dimensional topologies and thus these should work with any MPI implementation. MPI 2.1 made this requirement explicit. */ int main(int argc, char *argv[]) { int errs = 0; int size, rank, ndims; MPI_Comm comm, newcomm; MTest_Init(&argc, &argv); /* Create a new cartesian communicator in a subset of the processes */ MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if (size < 2) { fprintf(stderr, "This test needs at least 2 processes\n"); MPI_Abort(MPI_COMM_WORLD, 1); } MPI_Cart_create(MPI_COMM_WORLD, 0, NULL, NULL, 0, &comm); if (comm != MPI_COMM_NULL) { int csize; MPI_Comm_size(comm, &csize); if (csize != 1) { errs++; fprintf(stderr, "Sizes is wrong in cart communicator. Is %d, should be 1\n", csize); } /* This function is not meaningful, but should not fail */ MPI_Dims_create(1, 0, NULL); ndims = -1; MPI_Cartdim_get(comm, &ndims); if (ndims != 0) { errs++; fprintf(stderr, "MPI_Cartdim_get: ndims is %d, should be 0\n", ndims); } /* this function should not fail */ MPI_Cart_get(comm, 0, NULL, NULL, NULL); MPI_Cart_rank(comm, NULL, &rank); if (rank != 0) { errs++; fprintf(stderr, "MPI_Cart_rank: rank is %d, should be 0\n", rank); } /* this function should not fail */ MPI_Cart_coords(comm, 0, 0, NULL); MPI_Cart_sub(comm, NULL, &newcomm); ndims = -1; MPI_Cartdim_get(newcomm, &ndims); if (ndims != 0) { errs++; fprintf(stderr, "MPI_Cart_sub did not return zero-dimensional communicator\n"); } MPI_Barrier(comm); MPI_Comm_free(&comm); MPI_Comm_free(&newcomm); } else if (rank == 0) { errs++; fprintf(stderr, "Communicator returned is null!"); } MTest_Finalize(errs); return MTestReturnValue(errs); }
int main(int argc, char **argv) { const int n_c = 3; // number of colors int c, i, j, mu, nu, ir, is, ia, imunu; int filename_set = 0; int dims[4] = {0,0,0,0}; int l_LX_at, l_LXstart_at; int source_location, have_source_flag = 0; int x0, x1, x2, x3, ix; int sx0, sx1, sx2, sx3; int isimag[4]; int gperm[5][4], gperm2[4][4]; int check_position_space_WI=0; int num_threads = 1, nthreads=-1, threadid=-1; int exitstatus; int write_ascii=0; int mms = 0, mass_id = -1; int outfile_prefix_set = 0; int source_proc_coords[4], source_proc_id = -1; int ud_single_file = 0; double gperm_sign[5][4], gperm2_sign[4][4]; double *conn = NULL; double *conn2 = NULL; double contact_term[8]; double *work=NULL; int verbose = 0; int do_gt = 0, status; char filename[100], contype[400], outfile_prefix[400]; double ratime, retime; double plaq; double spinor1[24], spinor2[24], U_[18]; double *gauge_trafo=(double*)NULL; double *phi=NULL, *chi=NULL; complex w; double Usourcebuff[72], *Usource[4]; FILE *ofs; #ifdef MPI int *status; #endif #ifdef MPI MPI_Init(&argc, &argv); #endif while ((c = getopt(argc, argv, "swah?vgf:t:m:o:")) != -1) { switch (c) { case 'v': verbose = 1; break; case 'g': do_gt = 1; break; case 'f': strcpy(filename, optarg); filename_set=1; break; case 'w': check_position_space_WI = 1; fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] will check Ward identity in position space\n"); break; case 't': num_threads = atoi(optarg); fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] will use %d threads in spacetime loops\n", num_threads); break; case 'a': write_ascii = 1; fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] will write data in ASCII format too\n"); break; case 'm': mms = 1; mass_id = atoi(optarg); fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] will read propagators in MMS format with mass id %d\n", mass_id); break; case 'o': strcpy(outfile_prefix, optarg); fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] will use prefix %s for output filenames\n", outfile_prefix); outfile_prefix_set = 1; break; case 's': ud_single_file = 1; fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] will read up and down propagator from same file\n"); break; case 'h': case '?': default: usage(); break; } } if(g_cart_id==0) { g_the_time = time(NULL); fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] using global time stamp %s", ctime(&g_the_time)); } /********************************* * set number of openmp threads *********************************/ #ifdef OPENMP omp_set_num_threads(num_threads); #endif /* set the default values */ if(filename_set==0) strcpy(filename, "cvc.input"); fprintf(stdout, "# Reading input from file %s\n", filename); read_input_parser(filename); /* some checks on the input data */ if((T_global == 0) || (LX==0) || (LY==0) || (LZ==0)) { if(g_proc_id==0) fprintf(stderr, "\n[avc_exact2_lowmem_xspace] T and L's must be set\n"); usage(); } if(g_kappa == 0.) { if(g_proc_id==0) fprintf(stderr, "\n[avc_exact2_lowmem_xspace] kappa should be > 0.n"); usage(); } /* initialize MPI parameters */ mpi_init(argc, argv); #ifdef MPI if((status = (int*)calloc(g_nproc, sizeof(int))) == (int*)NULL) { MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(7); } #endif dims[0]=T_global; dims[1]=LX; dims[2]=LY; dims[3]=LZ; #ifndef MPI T = T_global; Tstart = 0; l_LX_at = LX; l_LXstart_at = 0; #endif fprintf(stdout, "# [%2d] parameters:\n"\ "# [%2d] T = %3d\n"\ "# [%2d] Tstart = %3d\n"\ "# [%2d] l_LX_at = %3d\n"\ "# [%2d] l_LXstart_at = %3d\n", g_cart_id, g_cart_id, T, g_cart_id, Tstart, g_cart_id, l_LX_at, g_cart_id, l_LXstart_at); #ifdef MPI if(T==0) { fprintf(stderr, "[%2d] local T is zero; exit\n", g_cart_id); MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); exit(2); } #endif if(init_geometry() != 0) { fprintf(stderr, "ERROR from init_geometry\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 1); MPI_Finalize(); #endif exit(1); } geometry(); alloc_gauge_field(&g_gauge_field, VOLUMEPLUSRAND); if(!(strcmp(gaugefilename_prefix,"identity")==0)) { /* read the gauge field */ sprintf(filename, "%s.%.4d", gaugefilename_prefix, Nconf); if(g_cart_id==0) fprintf(stdout, "reading gauge field from file %s\n", filename); read_lime_gauge_field_doubleprec(filename); } else { /* initialize unit matrices */ if(g_cart_id==0) fprintf(stdout, "\n# [avc_exact] initializing unit matrices\n"); for(ix=0;ix<VOLUME;ix++) { _cm_eq_id( g_gauge_field + _GGI(ix, 0) ); _cm_eq_id( g_gauge_field + _GGI(ix, 1) ); _cm_eq_id( g_gauge_field + _GGI(ix, 2) ); _cm_eq_id( g_gauge_field + _GGI(ix, 3) ); } } #ifdef MPI xchange_gauge(); #endif /* measure the plaquette */ plaquette(&plaq); if(g_cart_id==0) fprintf(stdout, "measured plaquette value: %25.16e\n", plaq); /* sprintf(filename, "gauge.%.2d", g_cart_id); ofs = fopen(filename, "w"); for(x0=0;x0<T;x0++) { for(x1=0;x1<LX;x1++) { for(x2=0;x2<LY;x2++) { for(x3=0;x3<LZ;x3++) { ix = g_ipt[x0][x1][x2][x3]; for(mu=0;mu<4;mu++) { for(i=0;i<9;i++) { fprintf(ofs, "%8d%3d%3d%3d%3d%3d%3d%25.16e%25.16e\n", ix, x0+Tstart, x1+LXstart, x2+LYstart, x3, mu, i, g_gauge_field[_GGI(ix,mu)+2*i], g_gauge_field[_GGI(ix,mu)+2*i+1]); } } }}}} fclose(ofs); if(g_cart_id==0) fprintf(stdout, "\nWarning: forced exit\n"); fflush(stdout); fflush(stderr); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 255); MPI_Finalize(); #endif exit(255); */ /* allocate memory for the spinor fields */ no_fields = 2; if(mms) no_fields++; g_spinor_field = (double**)calloc(no_fields, sizeof(double*)); for(i=0; i<no_fields; i++) alloc_spinor_field(&g_spinor_field[i], VOLUMEPLUSRAND); if(mms) { work = g_spinor_field[no_fields-1]; } /* allocate memory for the contractions */ conn = (double*)calloc(2 * 16 * VOLUME, sizeof(double)); if( conn==(double*)NULL ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 3); MPI_Finalize(); #endif exit(3); } #ifdef OPENMP #pragma omp parallel for #endif for(ix=0; ix<32*VOLUME; ix++) conn[ix] = 0.; conn2 = (double*)calloc(2 * 16 * VOLUME, sizeof(double)); if( conn2 == NULL ) { fprintf(stderr, "could not allocate memory for contr. fields\n"); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 3); MPI_Finalize(); #endif exit(3); } #ifdef OPENMP #pragma omp parallel for #endif for(ix=0; ix<32*VOLUME; ix++) conn2[ix] = 0.; /*********************************************************** * determine source coordinates, find out, if source_location is in this process ***********************************************************/ #if (defined PARALLELTX) || (defined PARALLELTXY) sx0 = g_source_location / (LX_global*LY_global*LZ); sx1 = (g_source_location%(LX_global*LY_global*LZ)) / (LY_global*LZ); sx2 = (g_source_location%(LY_global*LZ)) / LZ; sx3 = (g_source_location%LZ); source_proc_coords[0] = sx0 / T; source_proc_coords[1] = sx1 / LX; source_proc_coords[2] = sx2 / LY; source_proc_coords[3] = 0; MPI_Cart_rank(g_cart_grid, source_proc_coords, &source_proc_id); have_source_flag = (int)(g_cart_id == source_proc_id); if(have_source_flag==1) { fprintf(stdout, "\n# process %2d has source location\n", source_proc_id); fprintf(stdout, "\n# global source coordinates: (%3d,%3d,%3d,%3d)\n", sx0, sx1, sx2, sx3); fprintf(stdout, "\n# source proc coordinates: (%3d,%3d,%3d,%3d)\n", source_proc_coords[0], source_proc_coords[1], source_proc_coords[2], source_proc_coords[3]); } sx0 = sx0 % T; sx1 = sx1 % LX; sx2 = sx2 % LY; sx3 = sx3 % LZ; # else have_source_flag = (int)(g_source_location/(LX*LY*LZ)>=Tstart && g_source_location/(LX*LY*LZ)<(Tstart+T)); if(have_source_flag==1) fprintf(stdout, "process %2d has source location\n", g_cart_id); sx0 = g_source_location/(LX*LY*LZ)-Tstart; sx1 = (g_source_location%(LX*LY*LZ)) / (LY*LZ); sx2 = (g_source_location%(LY*LZ)) / LZ; sx3 = (g_source_location%LZ); #endif if(have_source_flag==1) { fprintf(stdout, "local source coordinates: (%3d,%3d,%3d,%3d)\n", sx0, sx1, sx2, sx3); source_location = g_ipt[sx0][sx1][sx2][sx3]; } #ifdef MPI # if (defined PARALLELTX) || (defined PARALLELTXY) have_source_flag = source_proc_id; MPI_Bcast(Usourcebuff, 72, MPI_DOUBLE, have_source_flag, g_cart_grid); # else MPI_Gather(&have_source_flag, 1, MPI_INT, status, 1, MPI_INT, 0, g_cart_grid); if(g_cart_id==0) { for(mu=0; mu<g_nproc; mu++) fprintf(stdout, "status[%1d]=%d\n", mu,status[mu]); } if(g_cart_id==0) { for(have_source_flag=0; status[have_source_flag]!=1; have_source_flag++); fprintf(stdout, "have_source_flag= %d\n", have_source_flag); } MPI_Bcast(&have_source_flag, 1, MPI_INT, 0, g_cart_grid); # endif fprintf(stdout, "[%2d] have_source_flag = %d\n", g_cart_id, have_source_flag); #else have_source_flag = 0; #endif /* if(g_cart_id==0) fprintf(stdout, "\nWarning: forced exit\n"); fflush(stdout); fflush(stderr); #ifdef MPI MPI_Abort(MPI_COMM_WORLD, 255); MPI_Finalize(); #endif exit(255); */ #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif /*********************************************************** * initialize the Gamma matrices ***********************************************************/ // gamma_5: gperm[4][0] = gamma_permutation[5][ 0] / 6; gperm[4][1] = gamma_permutation[5][ 6] / 6; gperm[4][2] = gamma_permutation[5][12] / 6; gperm[4][3] = gamma_permutation[5][18] / 6; gperm_sign[4][0] = gamma_sign[5][ 0]; gperm_sign[4][1] = gamma_sign[5][ 6]; gperm_sign[4][2] = gamma_sign[5][12]; gperm_sign[4][3] = gamma_sign[5][18]; // gamma_nu gamma_5 for(nu=0;nu<4;nu++) { // permutation gperm[nu][0] = gamma_permutation[6+nu][ 0] / 6; gperm[nu][1] = gamma_permutation[6+nu][ 6] / 6; gperm[nu][2] = gamma_permutation[6+nu][12] / 6; gperm[nu][3] = gamma_permutation[6+nu][18] / 6; // is imaginary ? isimag[nu] = gamma_permutation[6+nu][0] % 2; // (overall) sign gperm_sign[nu][0] = gamma_sign[6+nu][ 0]; gperm_sign[nu][1] = gamma_sign[6+nu][ 6]; gperm_sign[nu][2] = gamma_sign[6+nu][12]; gperm_sign[nu][3] = gamma_sign[6+nu][18]; // write to stdout if(g_cart_id == 0) { fprintf(stdout, "# gamma_%d5 = (%f %d, %f %d, %f %d, %f %d)\n", nu, gperm_sign[nu][0], gperm[nu][0], gperm_sign[nu][1], gperm[nu][1], gperm_sign[nu][2], gperm[nu][2], gperm_sign[nu][3], gperm[nu][3]); } } // gamma_nu for(nu=0;nu<4;nu++) { // permutation gperm2[nu][0] = gamma_permutation[nu][ 0] / 6; gperm2[nu][1] = gamma_permutation[nu][ 6] / 6; gperm2[nu][2] = gamma_permutation[nu][12] / 6; gperm2[nu][3] = gamma_permutation[nu][18] / 6; // (overall) sign gperm2_sign[nu][0] = gamma_sign[nu][ 0]; gperm2_sign[nu][1] = gamma_sign[nu][ 6]; gperm2_sign[nu][2] = gamma_sign[nu][12]; gperm2_sign[nu][3] = gamma_sign[nu][18]; // write to stdout if(g_cart_id == 0) { fprintf(stdout, "# gamma_%d = (%f %d, %f %d, %f %d, %f %d)\n", nu, gperm2_sign[nu][0], gperm2[nu][0], gperm2_sign[nu][1], gperm2[nu][1], gperm2_sign[nu][2], gperm2[nu][2], gperm2_sign[nu][3], gperm2[nu][3]); } } /********************************************************** ********************************************************** ** ** first contribution ** ********************************************************** **********************************************************/ /********************************************** * loop on the Lorentz index nu at source **********************************************/ for(ia=0; ia<n_c; ia++) { for(nu=0; nu<4; nu++) //for(nu=0; nu<4; nu++) { // fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] 1st part, processing nu = %d ...\n", nu); for(ir=0; ir<4; ir++) { // read 1 up-type propagator color components for spinor index ir if(!mms) { get_filename(filename, 0, 3*ir+ia, 1); exitstatus = read_lime_spinor(g_spinor_field[0], filename, 0); if(exitstatus != 0) { fprintf(stderr, "\n# [avc_exact2_lowmem_xspace] Error from read_lime_spinor\n"); exit(111); } xchange_field(g_spinor_field[0]); } else { sprintf(filename, "%s.%.4d.00.%.2d.cgmms.%.2d.inverted", filename_prefix, Nconf, 3*ir+ia, mass_id); exitstatus = read_lime_spinor(work, filename, 0); if(exitstatus != 0) { fprintf(stderr, "\n# [avc_exact2_lowmem_xspace] Error from read_lime_spinor\n"); exit(111); } xchange_field(work); Qf5(g_spinor_field[0], work, -g_mu); xchange_field(g_spinor_field[0]); } // read 1 dn-type propagator color components for spinor index gamma_perm ( ir ) if(!mms) { if(ud_single_file) { get_filename(filename, 0, 3*gperm[nu][ir]+ia, 1); exitstatus = read_lime_spinor(g_spinor_field[1], filename, 1); } else { get_filename(filename, 0, 3*gperm[nu][ir]+ia, -1); exitstatus = read_lime_spinor(g_spinor_field[1], filename, 0); } if(exitstatus != 0) { fprintf(stderr, "\n# [avc_exact2_lowmem_xspace] Error from read_lime_spinor\n"); exit(111); } xchange_field(g_spinor_field[1]); } else { sprintf(filename, "%s.%.4d.%.2d.%.2d.cgmms.%.2d.inverted", filename_prefix, Nconf, 4, 3*gperm[nu][ir]+ia, mass_id); exitstatus = read_lime_spinor(work, filename, 0); if(exitstatus != 0) { fprintf(stderr, "\n# [avc_exact2_lowmem_xspace] Error from read_lime_spinor\n"); exit(111); } xchange_field(work); Qf5(g_spinor_field[1], work, g_mu); xchange_field(g_spinor_field[1]); } phi = g_spinor_field[0]; chi = g_spinor_field[1]; //fprintf(stdout, "\n# [nu5] spin index pair (%d, %d); col index %d\n", ir, gperm[nu][ir], ia); // 1) gamma_nu gamma_5 x U for(mu=0; mu<4; mu++) //for(mu=0; mu<1; mu++) { imunu = 4*mu+nu; #ifdef OPENMP #pragma omp parallel for private(ix, spinor1, spinor2, U_, w) shared(imunu, ia, nu, mu) #endif for(ix=0; ix<VOLUME; ix++) { /* threadid = omp_get_thread_num(); nthreads = omp_get_num_threads(); fprintf(stdout, "[thread%d] number of threads = %d\n", threadid, nthreads); */ _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix,mu)], &co_phase_up[mu]); _fv_eq_cm_ti_fv(spinor1, U_, phi+_GSI(g_iup[ix][mu])); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_mi_eq_fv(spinor2, spinor1); _fv_eq_gamma_ti_fv(spinor1, 5, spinor2); _co_eq_fv_dag_ti_fv(&w, chi+_GSI(ix), spinor1); if(!isimag[nu]) { conn[_GWI(imunu,ix,VOLUME) ] += gperm_sign[nu][ir] * w.re; conn[_GWI(imunu,ix,VOLUME)+1] += gperm_sign[nu][ir] * w.im; } else { conn[_GWI(imunu,ix,VOLUME) ] += gperm_sign[nu][ir] * w.im; conn[_GWI(imunu,ix,VOLUME)+1] -= gperm_sign[nu][ir] * w.re; } } // of ix #ifdef OPENMP #pragma omp parallel for private(ix, spinor1, spinor2, U_, w) shared(imunu, ia, nu, mu) #endif for(ix=0; ix<VOLUME; ix++) { _cm_eq_cm_ti_co(U_, &g_gauge_field[_GGI(ix,mu)], &co_phase_up[mu]); _fv_eq_cm_dag_ti_fv(spinor1, U_, phi+_GSI(ix)); _fv_eq_gamma_ti_fv(spinor2, mu, spinor1); _fv_pl_eq_fv(spinor2, spinor1); _fv_eq_gamma_ti_fv(spinor1, 5, spinor2); _co_eq_fv_dag_ti_fv(&w, chi+_GSI(g_iup[ix][mu]), spinor1); if(!isimag[nu]) { conn[_GWI(imunu,ix,VOLUME) ] += gperm_sign[nu][ir] * w.re; conn[_GWI(imunu,ix,VOLUME)+1] += gperm_sign[nu][ir] * w.im; } else { conn[_GWI(imunu,ix,VOLUME) ] += gperm_sign[nu][ir] * w.im; conn[_GWI(imunu,ix,VOLUME)+1] -= gperm_sign[nu][ir] * w.re; } } // of ix // contribution to local-local correlator #ifdef OPENMP #pragma omp parallel for private(ix, spinor1, spinor2, U_, w) shared(imunu, ia, nu, mu) #endif for(ix=0; ix<VOLUME; ix++) { _fv_eq_gamma_ti_fv(spinor2, mu, phi+_GSI(ix) ); _fv_eq_gamma_ti_fv(spinor1, 5, spinor2); _co_eq_fv_dag_ti_fv(&w, chi+_GSI(ix), spinor1); if(!isimag[nu]) { conn2[_GWI(imunu,ix,VOLUME) ] += gperm_sign[nu][ir] * w.re; conn2[_GWI(imunu,ix,VOLUME)+1] += gperm_sign[nu][ir] * w.im; } else { conn2[_GWI(imunu,ix,VOLUME) ] += gperm_sign[nu][ir] * w.im; conn2[_GWI(imunu,ix,VOLUME)+1] -= gperm_sign[nu][ir] * w.re; } } // of ix } // of mu } // of ir } // of nu } // of ia loop on colors // normalisation of contractions #ifdef OPENMP #pragma omp parallel for #endif for(ix=0; ix<32*VOLUME; ix++) conn[ix] *= -0.5; #ifdef OPENMP #pragma omp parallel for #endif for(ix=0; ix<32*VOLUME; ix++) conn2[ix] *= -1.; #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "contractions in %e seconds\n", retime-ratime); // save results #ifdef MPI ratime = MPI_Wtime(); #else ratime = (double)clock() / CLOCKS_PER_SEC; #endif if(outfile_prefix_set) { sprintf(filename, "%s/cvc_lvc_x.%.4d.t%.2dx%.2dy%.2dz%.2d", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); } else { sprintf(filename, "cvc_lvc_x.%.4d.t%.2dx%.2dy%.2dz%.2d", Nconf, sx0, sx1, sx2, sx3); } sprintf(contype, "cvc - lvc in position space, all 16 components"); status = write_lime_contraction(conn, filename, 64, 16, contype, Nconf, 0); if(status != 0) { fprintf(stderr, "[] Error from write_lime_contractions, status was %d\n", status); exit(16); } if(outfile_prefix_set) { sprintf(filename, "%s/lvc_lvc_x.%.4d.t%.2dx%.2dy%.2dz%.2d", outfile_prefix, Nconf, sx0, sx1, sx2, sx3); } else { sprintf(filename, "lvc_lvc_x.%.4d.t%.2dx%.2dy%.2dz%.2d", Nconf, sx0, sx1, sx2, sx3); } sprintf(contype, "lvc - lvc in position space, all 16 components"); status = write_lime_contraction(conn2, filename, 64, 16, contype, Nconf, 0); if(status != 0) { fprintf(stderr, "[] Error from write_lime_contractions, status was %d\n", status); exit(17); } #ifndef MPI if(write_ascii) { if(outfile_prefix_set) { sprintf(filename, "%s/cvc_lvc_x.%.4d.ascii", outfile_prefix, Nconf); } else { sprintf(filename, "cvc_lvc_x.%.4d.ascii", Nconf); } write_contraction(conn, NULL, filename, 16, 2, 0); if(outfile_prefix_set) { sprintf(filename, "%s/lvc_lvc_x.%.4d.ascii", outfile_prefix, Nconf); } else { sprintf(filename, "lvc_lvc_x.%.4d.ascii", Nconf); } write_contraction(conn2, NULL, filename, 16, 2, 0); } #endif #ifdef MPI retime = MPI_Wtime(); #else retime = (double)clock() / CLOCKS_PER_SEC; #endif if(g_cart_id==0) fprintf(stdout, "saved position space results in %e seconds\n", retime-ratime); #ifndef MPI // check the Ward identity in position space if(check_position_space_WI) { sprintf(filename, "WI_X.%.4d", Nconf); ofs = fopen(filename,"w"); fprintf(stdout, "\n# [avc_exact2_lowmem_xspace] checking Ward identity in position space ...\n"); for(x0=0; x0<T; x0++) { for(x1=0; x1<LX; x1++) { for(x2=0; x2<LY; x2++) { for(x3=0; x3<LZ; x3++) { fprintf(ofs, "# t=%2d x=%2d y=%2d z=%2d\n", x0, x1, x2, x3); ix=g_ipt[x0][x1][x2][x3]; for(nu=0; nu<4; nu++) { w.re = conn[_GWI(4*0+nu,ix,VOLUME)] + conn[_GWI(4*1+nu,ix,VOLUME)] + conn[_GWI(4*2+nu,ix,VOLUME)] + conn[_GWI(4*3+nu,ix,VOLUME)] - conn[_GWI(4*0+nu,g_idn[ix][0],VOLUME)] - conn[_GWI(4*1+nu,g_idn[ix][1],VOLUME)] - conn[_GWI(4*2+nu,g_idn[ix][2],VOLUME)] - conn[_GWI(4*3+nu,g_idn[ix][3],VOLUME)]; w.im = conn[_GWI(4*0+nu,ix,VOLUME)+1] + conn[_GWI(4*1+nu,ix,VOLUME)+1] + conn[_GWI(4*2+nu,ix,VOLUME)+1] + conn[_GWI(4*3+nu,ix,VOLUME)+1] - conn[_GWI(4*0+nu,g_idn[ix][0],VOLUME)+1] - conn[_GWI(4*1+nu,g_idn[ix][1],VOLUME)+1] - conn[_GWI(4*2+nu,g_idn[ix][2],VOLUME)+1] - conn[_GWI(4*3+nu,g_idn[ix][3],VOLUME)+1]; fprintf(ofs, "\t%3d%25.16e%25.16e\n", nu, w.re, w.im); } }}}} fclose(ofs); } #endif /**************************************** * free the allocated memory, finalize ****************************************/ free(g_gauge_field); for(i=0; i<no_fields; i++) free(g_spinor_field[i]); free(g_spinor_field); free_geometry(); if(conn != NULL) free(conn); if(conn2 != NULL) free(conn2); #ifdef MPI free(status); MPI_Finalize(); #endif if(g_cart_id==0) { g_the_time = time(NULL); fprintf(stdout, "\n# [cvc_lvc_exact2_lowmem_xspace] %s# [cvc_lvc_exact2_lowmem_xspace] end of run\n", ctime(&g_the_time)); fprintf(stderr, "\n# [cvc_lvc_exact2_lowmem_xspace] %s# [cvc_lvc_exact2_lowmem_xspace] end of run\n", ctime(&g_the_time)); } return(0); }