Пример #1
0
static void
create_asqtad_links(int both, ferm_links_t *fn, ks_action_paths *ap) {
  Real *act_path_coeff = ap->act_path_coeff;

  double remaptime;
  char myname[] = "create_asqtad_links";

  if( phases_in != 1){
    node0_printf("create_asqtad_links: BOTCH: needs phases in\n");
    terminate(1);
  }

  /* Initialize QOP */
  if(initialize_qop() != QOP_SUCCESS){
    printf("%s(%d): Error initializing QOP\n",myname,this_node);
    terminate(1);
  }

  /* Use MILC link fattening routines */
  load_fatlinks(fn, ap);
  load_longlinks(fn, ap);

  /* Map to MILC fat and long links to QOP including possible change
     of precision */
  create_qop_links_from_milc_fn(fn);
}
Пример #2
0
void load_ferm_links(ferm_links_t *fn){
  if(fn->valid == 1)return;

  load_fatlinks(fn);
  load_longlinks(fn);

#ifdef DBLSTORE_FN
  load_fatbacklinks(fn);
  load_longbacklinks(fn);
#endif

  fn->valid = 1;
}
Пример #3
0
void load_ferm_links(ferm_links_t *fn, ks_action_paths *ap){
  if(fn->valid == 1)return;

  load_fatlinks(fn, ap);
  load_longlinks(fn, ap);
  fn->ap = ap;

#ifdef DBLSTORE_FN
  load_fatbacklinks(fn);
  load_longbacklinks(fn);
#endif

  fn->valid = 1;
}
Пример #4
0
int ks_congrad( field_offset src, field_offset dest, float mass,
    int niter, float rsqmin, int parity, float *final_rsq_ptr ){
    register int i;
    register site *s;
    int iteration;	/* counter for iterations */
    float a,b;			/* Sugar's a,b,resid**2,last resid*2 */
    double rsq,oldrsq,pkp;		/* pkp = cg_p.K.cg_p */
    float msq_x4;	/* 4*mass*mass */
    double source_norm;	/* squared magnitude of source vector */
    double rsqstop;	/* stopping residual normalized by source norm */
    int l_parity;	/* parity we are currently doing */
    int l_otherparity;	/* the other parity */
    msg_tag * tags1[16], *tags2[16];	/* tags for gathers to parity and opposite */
    int special_started;	/* 1 if dslash_special has been called */

/* Timing */

#ifdef CGTIME
double dtimed,dtimec;
#endif
double nflop;

/* debug */
#ifdef CGTIME
 dtimec = -dclock(); 
#endif

nflop = 1187;
if(parity==EVENANDODD)nflop *=2;
	
	special_started=0;
	/* if we want both parities, we will do even first. */
	switch(parity){
	    case(EVEN): l_parity=EVEN; l_otherparity=ODD; break;
	    case(ODD):  l_parity=ODD; l_otherparity=EVEN; break;
	    case(EVENANDODD):  l_parity=EVEN; l_otherparity=ODD; break;
	}
	msq_x4 = 4.0*mass*mass;
        iteration = 0;

        if (!valid_longlinks) load_longlinks();
        if (!valid_fatlinks) load_fatlinks();
#ifdef CONGRAD_TMP_VECTORS
	/* now we can allocate temporary variables and copy then */
	/* PAD may be used to avoid cache trashing */
#define PAD 0

 	if(first_congrad) {
	  ttt = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector));
	  cg_p = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector));
	  resid = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector));
	  t_dest = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector));
	  first_congrad = 0;
 	}
#endif

#ifdef CGTIME
 dtimec = -dclock(); 
#endif

#ifdef CONGRAD_TMP_VECTORS
	/* now we copy dest to temporaries */
  FORALLSITES(i,s) {
    t_dest[i] = *(su3_vector *)F_PT(s,dest);
  }
Пример #5
0
/* D_slash routine - sets dest. on each site equal to sum of
   sources parallel transported to site, with minus sign for transport
   from negative directions.  Use "fatlinks" for one link transport,
   "longlinks" for three link transport. */
void dslash_fn( field_offset src, field_offset dest, int parity ) {
   register int i;
   register site *s;
   register int dir,otherparity;
   register su3_matrix *fat4, *long4;
   msg_tag *tag[16];

    if(!valid_longlinks)load_longlinks();
    if(!valid_fatlinks)load_fatlinks();
    switch(parity){
	case EVEN:	otherparity=ODD; break;
	case ODD:	otherparity=EVEN; break;
	case EVENANDODD:	otherparity=EVENANDODD; break;
    }

    /* Start gathers from positive directions */
    /* And start the 3-step gather too */
    for( dir=XUP; dir<=TUP; dir++ ){
	tag[dir] = start_gather( src, sizeof(su3_vector), dir, parity,
	    gen_pt[dir] );
	tag[DIR3(dir)] = start_gather( src, sizeof(su3_vector), DIR3(dir),
	    parity, gen_pt[DIR3(dir)] );
    }

    /* Multiply by adjoint matrix at other sites */
    /* Use fat link for single link transport */
    FORSOMEPARITY( i, s, otherparity ){
      if( i < loopend-FETCH_UP ){
#ifdef DSLASH_TMP_LINKS
	fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
	long4 = &(t_longlink[4*(i+FETCH_UP)]);
#else
	fat4 = (s+FETCH_UP)->fatlink;
	long4 = (s+FETCH_UP)->longlink;
#endif
	prefetch_4MV4V( 
		       fat4,
		       (su3_vector *)F_PT(s+FETCH_UP,src),
		       (s+FETCH_UP)->tempvec );
	prefetch_4MV4V(
		       long4,
		       (su3_vector *)F_PT(s+FETCH_UP,src),
		       (s+FETCH_UP)->templongvec );
      }

#ifdef DSLASH_TMP_LINKS
      fat4 = &(t_fatlink[4*i]);
      long4 = &(t_longlink[4*i]);
#else
      fat4 = s->fatlink;
      long4 = s->longlink;
#endif
	mult_adj_su3_mat_vec_4dir( fat4,
	    (su3_vector *)F_PT(s,src), s->tempvec );
	/* multiply by 3-link matrices too */
	mult_adj_su3_mat_vec_4dir( long4,
	    (su3_vector *)F_PT(s,src), s->templongvec );
    } END_LOOP

    /* Start gathers from negative directions */
    for( dir=XUP; dir <= TUP; dir++){
	tag[OPP_DIR(dir)] = start_gather( F_OFFSET(tempvec[dir]),
	    sizeof(su3_vector), OPP_DIR( dir), parity,
	    gen_pt[OPP_DIR(dir)] );
    }

    /* Start 3-neighbour gathers from negative directions */
    for( dir=X3UP; dir <= T3UP; dir++){
	tag[OPP_3_DIR(dir)] 
           = start_gather( F_OFFSET(templongvec[INDEX_3RD(dir)]),
			   sizeof(su3_vector), OPP_3_DIR( dir), parity,
			   gen_pt[OPP_3_DIR(dir)] );
    }

    /* Wait gathers from positive directions, multiply by matrix and
	accumulate */
    /* wait for the 3-neighbours from positive directions, multiply */
    for(dir=XUP; dir<=TUP; dir++){
	wait_gather(tag[dir]);
	wait_gather(tag[DIR3(dir)]);
    }
    /* Wait gathers from negative directions, accumulate (negative) */
    /* and the same for the negative 3-rd neighbours */
    for(dir=XUP; dir<=TUP; dir++){
	wait_gather(tag[OPP_DIR(dir)]);
    }
    for(dir=X3UP; dir<=T3UP; dir++){
	wait_gather(tag[OPP_3_DIR(dir)]);
    }


    FORSOMEPARITY(i,s,parity){
#ifdef DSLASH_TMP_LINKS
      fat4 = &(t_fatlink[4*i]);
      long4 = &(t_longlink[4*i]);
#else
      fat4 = s->fatlink;
      long4 = s->longlink;
#endif
      mult_su3_mat_vec_sum_4dir( fat4,
	    (su3_vector *)gen_pt[XUP][i], (su3_vector *)gen_pt[YUP][i],
	    (su3_vector *)gen_pt[ZUP][i], (su3_vector *)gen_pt[TUP][i],
	    (su3_vector *)F_PT(s,dest));

      mult_su3_mat_vec_sum_4dir( long4,
	    (su3_vector *)gen_pt[X3UP][i], (su3_vector *)gen_pt[Y3UP][i],
	    (su3_vector *)gen_pt[Z3UP][i], (su3_vector *)gen_pt[T3UP][i],
	    (su3_vector *) &(s->templongv1));

      if( i < loopend-FETCH_UP ){
#ifdef DSLASH_TMP_LINKS
	fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
	long4 = &(t_longlink[4*(i+FETCH_UP)]);
#else
	fat4 = (s+FETCH_UP)->fatlink;
	long4 = (s+FETCH_UP)->longlink;
#endif
	prefetch_4MVVVV( 
              fat4,
	      (su3_vector *)gen_pt[XUP][i+FETCH_UP],
              (su3_vector *)gen_pt[YUP][i+FETCH_UP],
              (su3_vector *)gen_pt[ZUP][i+FETCH_UP],
              (su3_vector *)gen_pt[TUP][i+FETCH_UP] );
	prefetch_4MVVVV( 
              long4,
              (su3_vector *)gen_pt[X3UP][i+FETCH_UP],
              (su3_vector *)gen_pt[Y3UP][i+FETCH_UP],
              (su3_vector *)gen_pt[Z3UP][i+FETCH_UP],
              (su3_vector *)gen_pt[T3UP][i+FETCH_UP] );
	prefetch_VVVV( 
              (su3_vector *)gen_pt[XDOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[YDOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[ZDOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[TDOWN][i+FETCH_UP] );
	prefetch_VVVV( 
              (su3_vector *)gen_pt[X3DOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[Y3DOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[Z3DOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[T3DOWN][i+FETCH_UP] );
        }

        sub_four_su3_vecs( (su3_vector *)F_PT(s,dest),
	    (su3_vector *)(gen_pt[XDOWN][i]),
	    (su3_vector *)(gen_pt[YDOWN][i]),
	    (su3_vector *)(gen_pt[ZDOWN][i]),
	    (su3_vector *)(gen_pt[TDOWN][i]) );
        sub_four_su3_vecs( &(s->templongv1), 
	    (su3_vector *)(gen_pt[X3DOWN][i]),
	    (su3_vector *)(gen_pt[Y3DOWN][i]),
	    (su3_vector *)(gen_pt[Z3DOWN][i]),
	    (su3_vector *)(gen_pt[T3DOWN][i]) );
        /* Now need to add these things together */
        add_su3_vector((su3_vector *)F_PT(s,dest), & (s->templongv1),
			           (su3_vector *)F_PT(s,dest));
    } END_LOOP