double bench_inv(QOP_info_t *info, QOP_invert_arg_t *inv_arg, QOP_resid_arg_t *res_arg, QDP_DiracFermion *out, QDP_DiracFermion *in) { static QLA_Real r2s=-1, r2; double sec=0, flop=0, mf=0; int i, iter=0; QOP_DiracFermion *qopout, *qopin; QDP_D_eq_zero(out, QDP_all); qopout = QOP_create_D_from_qdp(out); qopin = QOP_create_D_from_qdp(in); for(i=0; i<=nit; i++) { QMP_barrier(); QOP_wilson_invert(info, flw, inv_arg, res_arg, kappa, qopout, qopin); QMP_barrier(); printf("%i\t%i\t%g\t%i\n", i, res_arg->final_iter, info->final_sec, (int)info->final_flop); if(i>0) { iter += res_arg->final_iter; sec += info->final_sec; flop += info->final_flop; //mf += info->final_flop/(1e6*info->final_sec); } } QOP_destroy_D(qopout); QOP_destroy_D(qopin); QDP_r_eq_norm2_D(&r2, out, QDP_even); if(r2s<0) r2s = r2; if(fabs(1-r2/r2s)>1e-3) { printf0("first norm = %g this norn = %g\n", r2s, r2); } mf = 1; QMP_sum_double(&mf); QMP_sum_double(&sec); QMP_sum_double(&flop); res_arg->final_iter = iter/nit; info->final_sec = sec/(mf*nit); info->final_flop = flop/(mf*nit); mf = info->final_flop/(1e6*info->final_sec); return mf; }
void qopWilsonDslash(Layout *l, real *x, real *u[8], real mass, int sign, real *y, char *sub) { QDP_ColorMatrix *qu[4]; QDP_DiracFermion *out, *in; in = QDP_create_D(); out = QDP_create_D(); unpackD(l, in, y); unpackD(l, out, x); for(int i=0; i<4; i++) { qu[i] = QDP_create_M(); unpackM(l, qu[i], u[2*i]); QLA_Real two = 2; QDP_M_eq_r_times_M(qu[i], &two, qu[i], QDP_all); } QOP_FermionLinksWilson *fla; fla = QOP_wilson_create_L_from_qdp(qu, NULL); QOP_evenodd_t eoOut=QOP_EVENODD, eoIn=QOP_EVENODD; if(sub[0]=='e') { eoOut = QOP_EVEN; eoIn = QOP_ODD; } if(sub[0]=='o') { eoOut = QOP_ODD; eoIn = QOP_EVEN; } real kappa = 0.5/(4+mass); QOP_wilson_dslash_qdp(NULL, fla, kappa, sign, out, in, eoOut, eoIn); QLA_Real n2; QDP_r_eq_norm2_D(&n2, out, QDP_all); printf0("out2: %g\n", n2); packD(l, x, out); QDP_destroy_D(in); QDP_destroy_D(out); for(int i=0; i<4; i++) { QDP_destroy_M(qu[i]); } }
int main(int argc, char *argv[]) { const char *msg; int status = 1; int mu, i; struct QOP_CLOVER_State *clover_state; QDP_Int *I_seed; int i_seed; QDP_RandomState *state; QLA_Real plaq; QLA_Real n[NELEMS(F)]; struct QOP_CLOVER_Gauge *c_g; struct QOP_CLOVER_Fermion *c_f[NELEMS(F)]; double kappa; double c_sw; double in_eps; int in_iter; int log_flag; double out_eps; int out_iter; int cg_status; double run_time; long long flops, sent, received; /* start QDP */ QDP_initialize(&argc, &argv); if (argc != 1 + NDIM + 6) { printf0("ERROR: usage: %s Lx ... seed kappa c_sw iter eps log?\n", argv[0]); goto end; } for (mu = 0; mu < NDIM; mu++) { lattice[mu] = atoi(argv[1 + mu]); } i_seed = atoi(argv[1 + NDIM]); kappa = atof(argv[2 + NDIM]); c_sw = atof(argv[3 + NDIM]); in_iter = atoi(argv[4 + NDIM]); in_eps = atof(argv[5 + NDIM]); log_flag = atoi(argv[6 + NDIM]) == 0? 0: QOP_CLOVER_LOG_EVERYTHING; /* set lattice size and create layout */ QDP_set_latsize(NDIM, lattice); QDP_create_layout(); primary = QMP_is_primary_node(); self = QMP_get_node_number(); get_vector(network, 1, QMP_get_logical_number_of_dimensions(), QMP_get_logical_dimensions()); get_vector(node, 0, QMP_get_logical_number_of_dimensions(), QMP_get_logical_coordinates()); printf0("network: "); for (i = 0; i < NDIM; i++) printf0(" %d", network[i]); printf0("\n"); printf0("node: "); for (i = 0; i < NDIM; i++) printf0(" %d", node[i]); printf0("\n"); printf0("kappa: %20.15f\n", kappa); printf0("c_sw: %20.15f\n", c_sw); printf0("in_iter: %d\n", in_iter); printf0("in_eps: %15.2e\n", in_eps); /* allocate the gauge field */ create_Mvector(U, NELEMS(U)); create_Mvector(C, NELEMS(C)); create_Dvector(F, NELEMS(F)); I_seed = QDP_create_I(); QDP_I_eq_funci(I_seed, icoord, QDP_all); state = QDP_create_S(); QDP_S_eq_seed_i_I(state, i_seed, I_seed, QDP_all); for (mu = 0; mu < NELEMS(U); mu++) { QDP_M_eq_gaussian_S(U[mu], state, QDP_all); } for (i = 0; i < NELEMS(F); i++) { QDP_D_eq_gaussian_S(F[i], state, QDP_all); } /* build the clovers */ clover(C, U); /* initialize CLOVER */ if (QOP_CLOVER_init(&clover_state, lattice, network, node, primary, sublattice, NULL)) { printf0("CLOVER_init() failed\n"); goto end; } if (QOP_CLOVER_import_fermion(&c_f[0], clover_state, f_reader, F[0])) { printf0("CLOVER_import_fermion(0) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[1], clover_state)) { printf0("CLOVER_allocate_fermion(1) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[2], clover_state)) { printf0("CLOVER_allocate_fermion(2) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[3], clover_state)) { printf0("CLOVER_allocate_fermion(3) failed\n"); goto end; } if (QOP_CLOVER_import_gauge(&c_g, clover_state, kappa, c_sw, u_reader, c_reader, NULL)) { printf("CLOVER_import_gauge() failed\n"); goto end; } QOP_CLOVER_D_operator(c_f[2], c_g, c_f[0]); cg_status = QOP_CLOVER_D_CG(c_f[3], &out_iter, &out_eps, c_f[2], c_g, c_f[2], in_iter, in_eps, log_flag); msg = QOP_CLOVER_error(clover_state); QOP_CLOVER_performance(&run_time, &flops, &sent, &received, clover_state); QOP_CLOVER_export_fermion(f_writer, F[3], c_f[3]); printf0("CG status: %d\n", cg_status); printf0("CG error message: %s\n", msg? msg: "<NONE>"); printf0("CG iter: %d\n", out_iter); printf0("CG eps: %20.10e\n", out_eps); printf0("CG performance: runtime %e sec\n", run_time); printf0("CG performance: flops %.3e MFlop/s (%lld)\n", flops * 1e-6 / run_time, flops); printf0("CG performance: snd %.3e MB/s (%lld)\n", sent * 1e-6 / run_time, sent); printf0("CG performance: rcv %.3e MB (%lld)/s\n", received * 1e-6 / run_time, received); /* free CLOVER */ QOP_CLOVER_free_gauge(&c_g); for (i = 0; i < NELEMS(c_f); i++) QOP_CLOVER_free_fermion(&c_f[i]); QOP_CLOVER_fini(&clover_state); /* Compute plaquette */ plaq = plaquette(U); /* field norms */ for (i = 0; i < NELEMS(F); i++) QDP_r_eq_norm2_D(&n[i], F[i], QDP_all); /* Display the values */ printf0("plaquette = %g\n", plaq / (QDP_volume() * QDP_Nc * NDIM * (NDIM - 1) / 2 )); for (i = 0; i < NELEMS(F); i++) printf0(" |f|^2 [%d] = %20.10e\n", i, (double)(n[i])); /* Compute and display <f[1] f[0]> */ show_dot("1|orig", F[1], F[0]); /* Compute and display <f[1] f[3]> */ show_dot("1|solv", F[1], F[3]); QDP_destroy_S(state); QDP_destroy_I(I_seed); destroy_Mvector(U, NELEMS(U)); destroy_Mvector(C, NELEMS(C)); destroy_Dvector(F, NELEMS(F)); status = 0; end: /* shutdown QDP */ printf0("end\n"); QDP_finalize(); return status; }
int main(int argc, char *argv[]) { int status = 1; int mu, i; struct QOP_CLOVER_State *clover_state; QDP_Int *I_seed; int i_seed; QDP_RandomState *state; QLA_Real plaq; QLA_Real n[NELEMS(F)]; struct QOP_CLOVER_Gauge *c_g; struct QOP_CLOVER_Fermion *c_f[NELEMS(F)]; double kappa; double c_sw; /* start QDP */ QDP_initialize(&argc, &argv); if (argc != 1 + NDIM + 3) { printf0("ERROR: usage: %s Lx ... seed kappa c_sw\n", argv[0]); goto end; } for (mu = 0; mu < NDIM; mu++) { lattice[mu] = atoi(argv[1 + mu]); } i_seed = atoi(argv[1 + NDIM]); kappa = atof(argv[2 + NDIM]); c_sw = atof(argv[3 + NDIM]); /* set lattice size and create layout */ QDP_set_latsize(NDIM, lattice); QDP_create_layout(); primary = QMP_is_primary_node(); self = QMP_get_node_number(); get_vector(network, 1, QMP_get_logical_number_of_dimensions(), QMP_get_logical_dimensions()); get_vector(node, 0, QMP_get_logical_number_of_dimensions(), QMP_get_logical_coordinates()); printf0("network: "); for (i = 0; i < NDIM; i++) printf0(" %d", network[i]); printf0("\n"); printf0("node: "); for (i = 0; i < NDIM; i++) printf0(" %d", node[i]); printf0("\n"); printf0("kappa: %20.15f\n", kappa); printf0("c_sw: %20.15f\n", c_sw); /* allocate the gauge field */ create_Mvector(U, NELEMS(U)); create_Mvector(C, NELEMS(C)); create_Dvector(F, NELEMS(F)); I_seed = QDP_create_I(); QDP_I_eq_funci(I_seed, icoord, QDP_all); state = QDP_create_S(); QDP_S_eq_seed_i_I(state, i_seed, I_seed, QDP_all); for (mu = 0; mu < NELEMS(U); mu++) { QDP_M_eq_gaussian_S(U[mu], state, QDP_all); } for (i = 0; i < NELEMS(F); i++) { QDP_D_eq_gaussian_S(F[i], state, QDP_all); } /* build the clovers */ clover(C, U); /* initialize CLOVER */ if (QOP_CLOVER_init(&clover_state, lattice, network, node, primary, sublattice, NULL)) { printf0("CLOVER_init() failed\n"); goto end; } if (QOP_CLOVER_import_fermion(&c_f[0], clover_state, f_reader, F[0])) { printf0("CLOVER_import_fermion(0) failed\n"); goto end; } if (QOP_CLOVER_import_fermion(&c_f[1], clover_state, f_reader, F[1])) { printf0("CLOVER_import_fermion(1) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[2], clover_state)) { printf0("CLOVER_allocate_fermion(2) failed\n"); goto end; } if (QOP_CLOVER_allocate_fermion(&c_f[3], clover_state)) { printf0("CLOVER_allocate_fermion(3) failed\n"); goto end; } if (QOP_CLOVER_import_gauge(&c_g, clover_state, kappa, c_sw, u_reader, c_reader, NULL)) { printf("CLOVER_import_gauge() failed\n"); goto end; } QOP_CLOVER_D_operator(c_f[2], c_g, c_f[0]); QOP_CLOVER_export_fermion(f_writer, F[2], c_f[2]); QOP_CLOVER_D_operator_conjugated(c_f[3], c_g, c_f[1]); QOP_CLOVER_export_fermion(f_writer, F[3], c_f[3]); /* free CLOVER */ QOP_CLOVER_free_gauge(&c_g); for (i = 0; i < NELEMS(c_f); i++) QOP_CLOVER_free_fermion(&c_f[i]); QOP_CLOVER_fini(&clover_state); /* Compute plaquette */ plaq = plaquette(U); /* field norms */ for (i = 0; i < NELEMS(F); i++) QDP_r_eq_norm2_D(&n[i], F[i], QDP_all); /* Display the values */ printf0("plaquette = %g\n", plaq / (QDP_volume() * QDP_Nc * NDIM * (NDIM - 1) / 2 )); for (i = 0; i < NELEMS(F); i++) printf0(" |f|^2 [%d] = %20.10e\n", i, (double)(n[i])); /* Compute and display <f[1] f[2]> */ show_dot("1|D0", F[1], F[2]); /* Compute and display <f[3] f[0]> */ show_dot("X1|0", F[3], F[0]); QDP_destroy_S(state); QDP_destroy_I(I_seed); destroy_Mvector(U, NELEMS(U)); destroy_Mvector(C, NELEMS(C)); destroy_Dvector(F, NELEMS(F)); status = 0; end: /* shutdown QDP */ printf0("end\n"); QDP_finalize(); return status; }
int congrad_w(int niter, Real rsqmin, Real *final_rsq_ptr) { int i; int iteration; /* counter for iterations */ double source_norm; double rsqstop; QLA_Real a, b; double rsq,oldrsq,pkp; /* Sugar's a,b,resid**2,previous resid*2 */ /* pkp = cg_p.K.cg_p */ QLA_Real mkappa; QLA_Real sum; #ifdef CGTIME double dtime; #endif #ifdef LU mkappa = -kappa*kappa; #else mkappa = -kappa; #endif setup_cg(); for(i=0; i<4; i++) { set_M_from_site(gaugelink[i], F_OFFSET(link[i]),EVENANDODD); } set_D_from_site(psi, F_OFFSET(psi),EVENANDODD); set_D_from_site(chi, F_OFFSET(chi),EVENANDODD); #ifdef PRESHIFT_LINKS { QDP_ColorMatrix *tcm; tcm = QDP_create_M(); for(i=0; i<4; i++) { QDP_M_eq_sM(tcm, gaugelink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(gaugelink[i+4], tcm, QDP_all); } QDP_destroy_M(tcm); } #endif #ifdef CGTIME dtime = -dclock(); #endif iteration=0; start: /* mp <- M_adjoint*M*psi r,p <- chi - mp rsq = |r|^2 source_norm = |chi|^2 */ rsq = source_norm = 0.0; #ifdef LU QDP_D_eq_D(cgp, psi, QDP_even); dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1); dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even); dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3); dslash_special_qdp(mp, tt2, -1, QDP_even, temp4); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even); QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_even); QDP_D_eq_D(cgp, cgr, QDP_even); QDP_r_eq_norm2_D(&sum, chi, QDP_even); source_norm = sum; QDP_r_eq_norm2_D(&sum, cgr, QDP_even); rsq = sum; #else QDP_D_eq_D(cgp, psi, QDP_even); dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all); dslash_special_qdp(mp, ttt, -1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all); QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_all); QDP_D_eq_D(cgp, cgr, QDP_all); QDP_r_eq_norm2_D(&sum, chi, QDP_all); source_norm = sum; QDP_r_eq_norm2_D(&sum, cgr, QDP_all); rsq = sum; #endif iteration++ ; /* iteration counts number of multiplications by M_adjoint*M */ total_iters++; /**if(this_node==0)printf("congrad2: source_norm = %e\n",source_norm); if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n", iteration,(double)rsq,(double)pkp,(double)a );**/ rsqstop = rsqmin * source_norm; if( rsq <= rsqstop ){ *final_rsq_ptr= (Real)rsq; return (iteration); } /* main loop - do until convergence or time to restart */ /* oldrsq <- rsq mp <- M_adjoint*M*p pkp <- p.M_adjoint*M.p a <- rsq/pkp psi <- psi + a*p r <- r - a*mp rsq <- |r|^2 b <- rsq/oldrsq p <- r + b*p */ do { oldrsq = rsq; #ifdef LU dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1); dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even); dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3); dslash_special_qdp(mp, tt2, -1, QDP_even, temp4); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even); QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_even); pkp = sum; #else dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all); dslash_special_qdp(mp, ttt, -1, QDP_all, temp1); QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all); QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_all); pkp = sum; #endif iteration++; total_iters++; a = rsq / pkp; QDP_D_peq_r_times_D(psi, &a, cgp, MYSUBSET); QDP_D_meq_r_times_D(cgr, &a, mp, MYSUBSET); QDP_r_eq_norm2_D(&sum, cgr, MYSUBSET); rsq = sum; /**if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n", iteration,(double)rsq,(double)pkp,(double)a );**/ if( rsq <= rsqstop ){ *final_rsq_ptr= (Real)rsq; #ifdef CGTIME dtime += dclock(); if(this_node==0) printf("CONGRAD2: time = %.2e size_r= %.2e iters= %d MF = %.1f\n", dtime,rsq,iteration, (double)6480*iteration*even_sites_on_node/(dtime*1e6)); //(double)5616*iteration*even_sites_on_node/(dtime*1e6)); #endif set_site_from_D(F_OFFSET(psi), psi,EVENANDODD); return (iteration); } b = rsq / oldrsq; QDP_D_eq_r_times_D_plus_D(cgp, &b, cgp, cgr, MYSUBSET); } while( iteration%niter != 0); set_site_from_D(F_OFFSET(psi), psi,EVENANDODD); if( iteration < 3*niter ) goto start; *final_rsq_ptr= (Real)rsq; return(iteration); }