示例#1
0
double
bench_inv(QOP_info_t *info, QOP_invert_arg_t *inv_arg,
	  QOP_resid_arg_t *res_arg, QDP_DiracFermion *out,
	  QDP_DiracFermion *in)
{
  static QLA_Real r2s=-1, r2;
  double sec=0, flop=0, mf=0;
  int i, iter=0;
  QOP_DiracFermion *qopout, *qopin;

  QDP_D_eq_zero(out, QDP_all);
  qopout = QOP_create_D_from_qdp(out);
  qopin = QOP_create_D_from_qdp(in);
  for(i=0; i<=nit; i++) {
    QMP_barrier();
    QOP_wilson_invert(info, flw, inv_arg, res_arg, kappa, qopout, qopin);
    QMP_barrier();
    printf("%i\t%i\t%g\t%i\n", i, res_arg->final_iter, info->final_sec, (int)info->final_flop);
    if(i>0) {
      iter += res_arg->final_iter;
      sec += info->final_sec;
      flop += info->final_flop;
      //mf += info->final_flop/(1e6*info->final_sec);
    }
  }
  QOP_destroy_D(qopout);
  QOP_destroy_D(qopin);
  QDP_r_eq_norm2_D(&r2, out, QDP_even);
  if(r2s<0) r2s = r2;
  if(fabs(1-r2/r2s)>1e-3) {
    printf0("first norm = %g  this norn = %g\n", r2s, r2);
  }
  mf = 1;
  QMP_sum_double(&mf);
  QMP_sum_double(&sec);
  QMP_sum_double(&flop);
  res_arg->final_iter = iter/nit;
  info->final_sec = sec/(mf*nit);
  info->final_flop = flop/(mf*nit);
  mf = info->final_flop/(1e6*info->final_sec);
  return mf;
}
示例#2
0
文件: qopwilson.c 项目: jcosborn/qll
void
qopWilsonDslash(Layout *l, real *x, real *u[8], real mass, int sign,
		real *y, char *sub)
{
  QDP_ColorMatrix *qu[4];
  QDP_DiracFermion *out, *in;
  in = QDP_create_D();
  out = QDP_create_D();
  unpackD(l, in, y);
  unpackD(l, out, x);
  for(int i=0; i<4; i++) {
    qu[i] = QDP_create_M();
    unpackM(l, qu[i], u[2*i]);
    QLA_Real two = 2;
    QDP_M_eq_r_times_M(qu[i], &two, qu[i], QDP_all);
  }
  QOP_FermionLinksWilson *fla;
  fla = QOP_wilson_create_L_from_qdp(qu, NULL);
  QOP_evenodd_t eoOut=QOP_EVENODD, eoIn=QOP_EVENODD;
  if(sub[0]=='e') {
    eoOut = QOP_EVEN;
    eoIn = QOP_ODD;
  }
  if(sub[0]=='o') {
    eoOut = QOP_ODD;
    eoIn = QOP_EVEN;
  }
  real kappa = 0.5/(4+mass);
  QOP_wilson_dslash_qdp(NULL, fla, kappa, sign, out, in, eoOut, eoIn);
  QLA_Real n2;
  QDP_r_eq_norm2_D(&n2, out, QDP_all);
  printf0("out2: %g\n", n2);
  packD(l, x, out);
  QDP_destroy_D(in);
  QDP_destroy_D(out);
  for(int i=0; i<4; i++) {
    QDP_destroy_M(qu[i]);
  }
}
示例#3
0
int
main(int argc, char *argv[])
{
    const char *msg;
    int status = 1;
    int mu, i;
    struct QOP_CLOVER_State *clover_state;
    QDP_Int *I_seed;
    int i_seed;
    QDP_RandomState *state;
    QLA_Real plaq;
    QLA_Real n[NELEMS(F)];
    struct QOP_CLOVER_Gauge *c_g;
    struct QOP_CLOVER_Fermion *c_f[NELEMS(F)];
    double kappa;
    double c_sw;
    double in_eps;
    int in_iter;
    int log_flag;
    double out_eps;
    int out_iter;
    int cg_status;
    double run_time;
    long long flops, sent, received;
    
    /* start QDP */
    QDP_initialize(&argc, &argv);

    if (argc != 1 + NDIM + 6) {
        printf0("ERROR: usage: %s Lx ... seed kappa c_sw iter eps log?\n",
                argv[0]);
        goto end;
    }

    for (mu = 0; mu < NDIM; mu++) {
        lattice[mu] = atoi(argv[1 + mu]);
    }
    i_seed = atoi(argv[1 + NDIM]);
    kappa = atof(argv[2 + NDIM]);
    c_sw = atof(argv[3 + NDIM]);
    in_iter = atoi(argv[4 + NDIM]);
    in_eps = atof(argv[5 + NDIM]);
    
    log_flag = atoi(argv[6 + NDIM]) == 0? 0: QOP_CLOVER_LOG_EVERYTHING;

    /* set lattice size and create layout */
    QDP_set_latsize(NDIM, lattice);
    QDP_create_layout();

    primary = QMP_is_primary_node();
    self = QMP_get_node_number();
    get_vector(network, 1, QMP_get_logical_number_of_dimensions(),
               QMP_get_logical_dimensions());
    get_vector(node, 0, QMP_get_logical_number_of_dimensions(),
               QMP_get_logical_coordinates());
        
    printf0("network: ");
    for (i = 0; i < NDIM; i++)
        printf0(" %d", network[i]);
    printf0("\n");

    printf0("node: ");
    for (i = 0; i < NDIM; i++)
        printf0(" %d", node[i]);
    printf0("\n");

    printf0("kappa: %20.15f\n", kappa);
    printf0("c_sw:  %20.15f\n", c_sw);

    printf0("in_iter: %d\n", in_iter);
    printf0("in_eps: %15.2e\n", in_eps);

    /* allocate the gauge field */
    create_Mvector(U, NELEMS(U));
    create_Mvector(C, NELEMS(C));
    create_Dvector(F, NELEMS(F));
    I_seed = QDP_create_I();
    QDP_I_eq_funci(I_seed, icoord, QDP_all);
    state = QDP_create_S();
    QDP_S_eq_seed_i_I(state, i_seed, I_seed, QDP_all);
    
    for (mu = 0; mu < NELEMS(U); mu++) {
        QDP_M_eq_gaussian_S(U[mu], state, QDP_all);
    }
    
    for (i = 0; i < NELEMS(F); i++) {
        QDP_D_eq_gaussian_S(F[i], state, QDP_all);
    }

    /* build the clovers */
    clover(C, U);

    /* initialize CLOVER */
    if (QOP_CLOVER_init(&clover_state, lattice, network, node, primary,
                        sublattice, NULL)) {
        printf0("CLOVER_init() failed\n");
        goto end;
    }

    if (QOP_CLOVER_import_fermion(&c_f[0], clover_state, f_reader, F[0])) {
        printf0("CLOVER_import_fermion(0) failed\n");
        goto end;
    }

    if (QOP_CLOVER_allocate_fermion(&c_f[1], clover_state)) {
        printf0("CLOVER_allocate_fermion(1) failed\n");
        goto end;
    }

    if (QOP_CLOVER_allocate_fermion(&c_f[2], clover_state)) {
        printf0("CLOVER_allocate_fermion(2) failed\n");
        goto end;
    }

    if (QOP_CLOVER_allocate_fermion(&c_f[3], clover_state)) {
        printf0("CLOVER_allocate_fermion(3) failed\n");
        goto end;
    }

    if (QOP_CLOVER_import_gauge(&c_g, clover_state, kappa, c_sw,
                                u_reader, c_reader, NULL)) {
        printf("CLOVER_import_gauge() failed\n");
        goto end;
    }

    QOP_CLOVER_D_operator(c_f[2], c_g, c_f[0]);
    cg_status = QOP_CLOVER_D_CG(c_f[3], &out_iter, &out_eps,
                                c_f[2], c_g, c_f[2], in_iter, in_eps,
                                log_flag);

    msg = QOP_CLOVER_error(clover_state);

    QOP_CLOVER_performance(&run_time, &flops, &sent, &received, clover_state);

    QOP_CLOVER_export_fermion(f_writer, F[3], c_f[3]);

    printf0("CG status: %d\n", cg_status);
    printf0("CG error message: %s\n", msg? msg: "<NONE>");
    printf0("CG iter: %d\n", out_iter);
    printf0("CG eps: %20.10e\n", out_eps);
    printf0("CG performance: runtime %e sec\n", run_time);
    printf0("CG performance: flops  %.3e MFlop/s (%lld)\n",
            flops * 1e-6 / run_time, flops);
    printf0("CG performance: snd    %.3e MB/s (%lld)\n",
            sent * 1e-6 / run_time, sent);
    printf0("CG performance: rcv    %.3e MB (%lld)/s\n",
            received * 1e-6 / run_time, received);

    /* free CLOVER */
    QOP_CLOVER_free_gauge(&c_g);
    for (i = 0; i < NELEMS(c_f); i++)
        QOP_CLOVER_free_fermion(&c_f[i]);

    QOP_CLOVER_fini(&clover_state);

    /* Compute plaquette */
    plaq = plaquette(U);

    /* field norms */
    for (i = 0; i < NELEMS(F); i++)
        QDP_r_eq_norm2_D(&n[i], F[i], QDP_all);
        
    /* Display the values */
    printf0("plaquette = %g\n",
            plaq / (QDP_volume() * QDP_Nc * NDIM * (NDIM - 1) / 2 ));
    for (i = 0; i < NELEMS(F); i++)
        printf0(" |f|^2 [%d] = %20.10e\n", i, (double)(n[i]));

    /* Compute and display <f[1] f[0]> */
    show_dot("1|orig", F[1], F[0]);
    /* Compute and display <f[1] f[3]> */
    show_dot("1|solv", F[1], F[3]);

    QDP_destroy_S(state);
    QDP_destroy_I(I_seed);
    destroy_Mvector(U, NELEMS(U));
    destroy_Mvector(C, NELEMS(C));
    destroy_Dvector(F, NELEMS(F));

    status = 0;
end:
    /* shutdown QDP */
    printf0("end\n");
    QDP_finalize();
        
    return status;
}
示例#4
0
int
main(int argc, char *argv[])
{
    int status = 1;
    int mu, i;
    struct QOP_CLOVER_State *clover_state;
    QDP_Int *I_seed;
    int i_seed;
    QDP_RandomState *state;
    QLA_Real plaq;
    QLA_Real n[NELEMS(F)];
    struct QOP_CLOVER_Gauge *c_g;
    struct QOP_CLOVER_Fermion *c_f[NELEMS(F)];
    double kappa;
    double c_sw;

    /* start QDP */
    QDP_initialize(&argc, &argv);

    if (argc != 1 + NDIM + 3) {
        printf0("ERROR: usage: %s Lx ... seed kappa c_sw\n", argv[0]);
        goto end;
    }

    for (mu = 0; mu < NDIM; mu++) {
        lattice[mu] = atoi(argv[1 + mu]);
    }
    i_seed = atoi(argv[1 + NDIM]);
    kappa = atof(argv[2 + NDIM]);
    c_sw = atof(argv[3 + NDIM]);
    
    /* set lattice size and create layout */
    QDP_set_latsize(NDIM, lattice);
    QDP_create_layout();

    primary = QMP_is_primary_node();
    self = QMP_get_node_number();
    get_vector(network, 1, QMP_get_logical_number_of_dimensions(),
               QMP_get_logical_dimensions());
    get_vector(node, 0, QMP_get_logical_number_of_dimensions(),
               QMP_get_logical_coordinates());
        
    printf0("network: ");
    for (i = 0; i < NDIM; i++)
        printf0(" %d", network[i]);
    printf0("\n");

    printf0("node: ");
    for (i = 0; i < NDIM; i++)
        printf0(" %d", node[i]);
    printf0("\n");

    printf0("kappa: %20.15f\n", kappa);
    printf0("c_sw:  %20.15f\n", c_sw);

    /* allocate the gauge field */
    create_Mvector(U, NELEMS(U));
    create_Mvector(C, NELEMS(C));
    create_Dvector(F, NELEMS(F));
    I_seed = QDP_create_I();
    QDP_I_eq_funci(I_seed, icoord, QDP_all);
    state = QDP_create_S();
    QDP_S_eq_seed_i_I(state, i_seed, I_seed, QDP_all);
    
    for (mu = 0; mu < NELEMS(U); mu++) {
        QDP_M_eq_gaussian_S(U[mu], state, QDP_all);
    }
    
    for (i = 0; i < NELEMS(F); i++) {
        QDP_D_eq_gaussian_S(F[i], state, QDP_all);
    }

    /* build the clovers */
    clover(C, U);

    /* initialize CLOVER */
    if (QOP_CLOVER_init(&clover_state, lattice, network, node, primary,
                        sublattice, NULL)) {
        printf0("CLOVER_init() failed\n");
        goto end;
    }

    if (QOP_CLOVER_import_fermion(&c_f[0], clover_state, f_reader, F[0])) {
        printf0("CLOVER_import_fermion(0) failed\n");
        goto end;
    }

    if (QOP_CLOVER_import_fermion(&c_f[1], clover_state, f_reader, F[1])) {
        printf0("CLOVER_import_fermion(1) failed\n");
        goto end;
    }

    if (QOP_CLOVER_allocate_fermion(&c_f[2], clover_state)) {
        printf0("CLOVER_allocate_fermion(2) failed\n");
        goto end;
    }

    if (QOP_CLOVER_allocate_fermion(&c_f[3], clover_state)) {
        printf0("CLOVER_allocate_fermion(3) failed\n");
        goto end;
    }

    if (QOP_CLOVER_import_gauge(&c_g, clover_state, kappa, c_sw,
                                u_reader, c_reader, NULL)) {
        printf("CLOVER_import_gauge() failed\n");
        goto end;
    }

    QOP_CLOVER_D_operator(c_f[2], c_g, c_f[0]);
    QOP_CLOVER_export_fermion(f_writer, F[2], c_f[2]);

    QOP_CLOVER_D_operator_conjugated(c_f[3], c_g, c_f[1]);
    QOP_CLOVER_export_fermion(f_writer, F[3], c_f[3]);
    
    /* free CLOVER */
    QOP_CLOVER_free_gauge(&c_g);
    for (i = 0; i < NELEMS(c_f); i++)
        QOP_CLOVER_free_fermion(&c_f[i]);

    QOP_CLOVER_fini(&clover_state);

    /* Compute plaquette */
    plaq = plaquette(U);

    /* field norms */
    for (i = 0; i < NELEMS(F); i++)
        QDP_r_eq_norm2_D(&n[i], F[i], QDP_all);
        


    /* Display the values */
    printf0("plaquette = %g\n",
            plaq / (QDP_volume() * QDP_Nc * NDIM * (NDIM - 1) / 2 ));
    for (i = 0; i < NELEMS(F); i++)
        printf0(" |f|^2 [%d] = %20.10e\n", i, (double)(n[i]));

    /* Compute and display <f[1] f[2]> */
    show_dot("1|D0", F[1], F[2]);
    /* Compute and display <f[3] f[0]> */
    show_dot("X1|0", F[3], F[0]);

    QDP_destroy_S(state);
    QDP_destroy_I(I_seed);
    destroy_Mvector(U, NELEMS(U));
    destroy_Mvector(C, NELEMS(C));
    destroy_Dvector(F, NELEMS(F));

    status = 0;
end:
    /* shutdown QDP */
    printf0("end\n");
    QDP_finalize();
        
    return status;
}
示例#5
0
int
congrad_w(int niter, Real rsqmin, Real *final_rsq_ptr) 
{
  int i;
  int iteration;	/* counter for iterations */
  double source_norm;
  double rsqstop;
  QLA_Real a, b;
  double rsq,oldrsq,pkp;	/* Sugar's a,b,resid**2,previous resid*2 */
				/* pkp = cg_p.K.cg_p */
  QLA_Real mkappa;
  QLA_Real sum;
#ifdef CGTIME
  double dtime;
#endif
#ifdef LU
  mkappa = -kappa*kappa;
#else
  mkappa = -kappa;
#endif

  setup_cg();

  for(i=0; i<4; i++) {
    set_M_from_site(gaugelink[i], F_OFFSET(link[i]),EVENANDODD);
  }
  set_D_from_site(psi, F_OFFSET(psi),EVENANDODD);
  set_D_from_site(chi, F_OFFSET(chi),EVENANDODD);

#ifdef PRESHIFT_LINKS
  {
    QDP_ColorMatrix *tcm;
    tcm = QDP_create_M();
    for(i=0; i<4; i++) {
      QDP_M_eq_sM(tcm, gaugelink[i], QDP_neighbor[i], QDP_backward, QDP_all);
      QDP_M_eq_Ma(gaugelink[i+4], tcm, QDP_all);
    }
    QDP_destroy_M(tcm);
  }
#endif

#ifdef CGTIME
  dtime = -dclock();
#endif

  iteration=0;
 start:
  /* mp <-  M_adjoint*M*psi
     r,p <- chi - mp
     rsq = |r|^2
     source_norm = |chi|^2
  */
  rsq = source_norm = 0.0;

#ifdef LU

  QDP_D_eq_D(cgp, psi, QDP_even);
  dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1);
  dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2);
  QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even);

  dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3);
  dslash_special_qdp(mp, tt2, -1, QDP_even, temp4);
  QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even);
  QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_even);
  QDP_D_eq_D(cgp, cgr, QDP_even);

  QDP_r_eq_norm2_D(&sum, chi, QDP_even);
  source_norm = sum;
  QDP_r_eq_norm2_D(&sum, cgr, QDP_even);
  rsq = sum;

#else

  QDP_D_eq_D(cgp, psi, QDP_even);
  dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1);
  QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all);

  dslash_special_qdp(mp, ttt, -1, QDP_all, temp1);
  QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all);

  QDP_D_eq_D_minus_D(cgr, chi, mp, QDP_all);
  QDP_D_eq_D(cgp, cgr, QDP_all);

  QDP_r_eq_norm2_D(&sum, chi, QDP_all);
  source_norm = sum;
  QDP_r_eq_norm2_D(&sum, cgr, QDP_all);
  rsq = sum;

#endif

  iteration++ ;	/* iteration counts number of multiplications
		   by M_adjoint*M */
  total_iters++;
  /**if(this_node==0)printf("congrad2: source_norm = %e\n",source_norm);
     if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n",
     iteration,(double)rsq,(double)pkp,(double)a );**/
  rsqstop = rsqmin * source_norm;
  if( rsq <= rsqstop ){
    *final_rsq_ptr= (Real)rsq;
    return (iteration);
  }

  /* main loop - do until convergence or time to restart */
  /* 
     oldrsq <- rsq
     mp <- M_adjoint*M*p
     pkp <- p.M_adjoint*M.p
     a <- rsq/pkp
     psi <- psi + a*p
     r <- r - a*mp
     rsq <- |r|^2
     b <- rsq/oldrsq
     p <- r + b*p
  */
  do {
    oldrsq = rsq;
#ifdef LU
    dslash_special_qdp(tt1, cgp, 1, QDP_odd, temp1);
    dslash_special_qdp(ttt, tt1, 1, QDP_even, temp2);
    QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_even);

    dslash_special_qdp(tt2, ttt, -1, QDP_odd, temp3);
    dslash_special_qdp(mp, tt2, -1, QDP_even, temp4);
    QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_even);

    QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_even);
    pkp = sum;
#else
    dslash_special_qdp(ttt, cgp, 1, QDP_all, temp1);
    QDP_D_eq_r_times_D_plus_D(ttt, &mkappa, ttt, cgp, QDP_all);

    dslash_special_qdp(mp, ttt, -1, QDP_all, temp1);
    QDP_D_eq_r_times_D_plus_D(mp, &mkappa, mp, ttt, QDP_all);

    QDP_r_eq_re_D_dot_D(&sum, cgp, mp, QDP_all);
    pkp = sum;
#endif
    iteration++;
    total_iters++;

    a = rsq / pkp;
    QDP_D_peq_r_times_D(psi, &a, cgp, MYSUBSET);
    QDP_D_meq_r_times_D(cgr, &a, mp, MYSUBSET);
    QDP_r_eq_norm2_D(&sum, cgr, MYSUBSET);
    rsq = sum;

    /**if(this_node==0)printf("congrad2: iter %d, rsq %e, pkp %e, a %e\n",
       iteration,(double)rsq,(double)pkp,(double)a );**/
    if( rsq <= rsqstop ){
      *final_rsq_ptr= (Real)rsq;
#ifdef CGTIME
      dtime += dclock();
      if(this_node==0)
	printf("CONGRAD2: time = %.2e size_r= %.2e iters= %d MF = %.1f\n",
	       dtime,rsq,iteration,
	       (double)6480*iteration*even_sites_on_node/(dtime*1e6));
      //(double)5616*iteration*even_sites_on_node/(dtime*1e6));
#endif
      set_site_from_D(F_OFFSET(psi), psi,EVENANDODD);
      return (iteration);
    }

    b = rsq / oldrsq;
    QDP_D_eq_r_times_D_plus_D(cgp, &b, cgp, cgr, MYSUBSET);

  } while( iteration%niter != 0);

  set_site_from_D(F_OFFSET(psi), psi,EVENANDODD);

  if( iteration < 3*niter ) goto start;
  *final_rsq_ptr= (Real)rsq;
  return(iteration);
}