static void create_asqtad_links(int both, ferm_links_t *fn, ks_action_paths *ap) { Real *act_path_coeff = ap->act_path_coeff; double remaptime; char myname[] = "create_asqtad_links"; if( phases_in != 1){ node0_printf("create_asqtad_links: BOTCH: needs phases in\n"); terminate(1); } /* Initialize QOP */ if(initialize_qop() != QOP_SUCCESS){ printf("%s(%d): Error initializing QOP\n",myname,this_node); terminate(1); } /* Use MILC link fattening routines */ load_fatlinks(fn, ap); load_longlinks(fn, ap); /* Map to MILC fat and long links to QOP including possible change of precision */ create_qop_links_from_milc_fn(fn); }
void load_ferm_links(ferm_links_t *fn){ if(fn->valid == 1)return; load_fatlinks(fn); load_longlinks(fn); #ifdef DBLSTORE_FN load_fatbacklinks(fn); load_longbacklinks(fn); #endif fn->valid = 1; }
void load_ferm_links(ferm_links_t *fn, ks_action_paths *ap){ if(fn->valid == 1)return; load_fatlinks(fn, ap); load_longlinks(fn, ap); fn->ap = ap; #ifdef DBLSTORE_FN load_fatbacklinks(fn); load_longbacklinks(fn); #endif fn->valid = 1; }
int ks_congrad( field_offset src, field_offset dest, float mass, int niter, float rsqmin, int parity, float *final_rsq_ptr ){ register int i; register site *s; int iteration; /* counter for iterations */ float a,b; /* Sugar's a,b,resid**2,last resid*2 */ double rsq,oldrsq,pkp; /* pkp = cg_p.K.cg_p */ float msq_x4; /* 4*mass*mass */ double source_norm; /* squared magnitude of source vector */ double rsqstop; /* stopping residual normalized by source norm */ int l_parity; /* parity we are currently doing */ int l_otherparity; /* the other parity */ msg_tag * tags1[16], *tags2[16]; /* tags for gathers to parity and opposite */ int special_started; /* 1 if dslash_special has been called */ /* Timing */ #ifdef CGTIME double dtimed,dtimec; #endif double nflop; /* debug */ #ifdef CGTIME dtimec = -dclock(); #endif nflop = 1187; if(parity==EVENANDODD)nflop *=2; special_started=0; /* if we want both parities, we will do even first. */ switch(parity){ case(EVEN): l_parity=EVEN; l_otherparity=ODD; break; case(ODD): l_parity=ODD; l_otherparity=EVEN; break; case(EVENANDODD): l_parity=EVEN; l_otherparity=ODD; break; } msq_x4 = 4.0*mass*mass; iteration = 0; if (!valid_longlinks) load_longlinks(); if (!valid_fatlinks) load_fatlinks(); #ifdef CONGRAD_TMP_VECTORS /* now we can allocate temporary variables and copy then */ /* PAD may be used to avoid cache trashing */ #define PAD 0 if(first_congrad) { ttt = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector)); cg_p = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector)); resid = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector)); t_dest = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector)); first_congrad = 0; } #endif #ifdef CGTIME dtimec = -dclock(); #endif #ifdef CONGRAD_TMP_VECTORS /* now we copy dest to temporaries */ FORALLSITES(i,s) { t_dest[i] = *(su3_vector *)F_PT(s,dest); }
/* D_slash routine - sets dest. on each site equal to sum of sources parallel transported to site, with minus sign for transport from negative directions. Use "fatlinks" for one link transport, "longlinks" for three link transport. */ void dslash_fn( field_offset src, field_offset dest, int parity ) { register int i; register site *s; register int dir,otherparity; register su3_matrix *fat4, *long4; msg_tag *tag[16]; if(!valid_longlinks)load_longlinks(); if(!valid_fatlinks)load_fatlinks(); switch(parity){ case EVEN: otherparity=ODD; break; case ODD: otherparity=EVEN; break; case EVENANDODD: otherparity=EVENANDODD; break; } /* Start gathers from positive directions */ /* And start the 3-step gather too */ for( dir=XUP; dir<=TUP; dir++ ){ tag[dir] = start_gather( src, sizeof(su3_vector), dir, parity, gen_pt[dir] ); tag[DIR3(dir)] = start_gather( src, sizeof(su3_vector), DIR3(dir), parity, gen_pt[DIR3(dir)] ); } /* Multiply by adjoint matrix at other sites */ /* Use fat link for single link transport */ FORSOMEPARITY( i, s, otherparity ){ if( i < loopend-FETCH_UP ){ #ifdef DSLASH_TMP_LINKS fat4 = &(t_fatlink[4*(i+FETCH_UP)]); long4 = &(t_longlink[4*(i+FETCH_UP)]); #else fat4 = (s+FETCH_UP)->fatlink; long4 = (s+FETCH_UP)->longlink; #endif prefetch_4MV4V( fat4, (su3_vector *)F_PT(s+FETCH_UP,src), (s+FETCH_UP)->tempvec ); prefetch_4MV4V( long4, (su3_vector *)F_PT(s+FETCH_UP,src), (s+FETCH_UP)->templongvec ); } #ifdef DSLASH_TMP_LINKS fat4 = &(t_fatlink[4*i]); long4 = &(t_longlink[4*i]); #else fat4 = s->fatlink; long4 = s->longlink; #endif mult_adj_su3_mat_vec_4dir( fat4, (su3_vector *)F_PT(s,src), s->tempvec ); /* multiply by 3-link matrices too */ mult_adj_su3_mat_vec_4dir( long4, (su3_vector *)F_PT(s,src), s->templongvec ); } END_LOOP /* Start gathers from negative directions */ for( dir=XUP; dir <= TUP; dir++){ tag[OPP_DIR(dir)] = start_gather( F_OFFSET(tempvec[dir]), sizeof(su3_vector), OPP_DIR( dir), parity, gen_pt[OPP_DIR(dir)] ); } /* Start 3-neighbour gathers from negative directions */ for( dir=X3UP; dir <= T3UP; dir++){ tag[OPP_3_DIR(dir)] = start_gather( F_OFFSET(templongvec[INDEX_3RD(dir)]), sizeof(su3_vector), OPP_3_DIR( dir), parity, gen_pt[OPP_3_DIR(dir)] ); } /* Wait gathers from positive directions, multiply by matrix and accumulate */ /* wait for the 3-neighbours from positive directions, multiply */ for(dir=XUP; dir<=TUP; dir++){ wait_gather(tag[dir]); wait_gather(tag[DIR3(dir)]); } /* Wait gathers from negative directions, accumulate (negative) */ /* and the same for the negative 3-rd neighbours */ for(dir=XUP; dir<=TUP; dir++){ wait_gather(tag[OPP_DIR(dir)]); } for(dir=X3UP; dir<=T3UP; dir++){ wait_gather(tag[OPP_3_DIR(dir)]); } FORSOMEPARITY(i,s,parity){ #ifdef DSLASH_TMP_LINKS fat4 = &(t_fatlink[4*i]); long4 = &(t_longlink[4*i]); #else fat4 = s->fatlink; long4 = s->longlink; #endif mult_su3_mat_vec_sum_4dir( fat4, (su3_vector *)gen_pt[XUP][i], (su3_vector *)gen_pt[YUP][i], (su3_vector *)gen_pt[ZUP][i], (su3_vector *)gen_pt[TUP][i], (su3_vector *)F_PT(s,dest)); mult_su3_mat_vec_sum_4dir( long4, (su3_vector *)gen_pt[X3UP][i], (su3_vector *)gen_pt[Y3UP][i], (su3_vector *)gen_pt[Z3UP][i], (su3_vector *)gen_pt[T3UP][i], (su3_vector *) &(s->templongv1)); if( i < loopend-FETCH_UP ){ #ifdef DSLASH_TMP_LINKS fat4 = &(t_fatlink[4*(i+FETCH_UP)]); long4 = &(t_longlink[4*(i+FETCH_UP)]); #else fat4 = (s+FETCH_UP)->fatlink; long4 = (s+FETCH_UP)->longlink; #endif prefetch_4MVVVV( fat4, (su3_vector *)gen_pt[XUP][i+FETCH_UP], (su3_vector *)gen_pt[YUP][i+FETCH_UP], (su3_vector *)gen_pt[ZUP][i+FETCH_UP], (su3_vector *)gen_pt[TUP][i+FETCH_UP] ); prefetch_4MVVVV( long4, (su3_vector *)gen_pt[X3UP][i+FETCH_UP], (su3_vector *)gen_pt[Y3UP][i+FETCH_UP], (su3_vector *)gen_pt[Z3UP][i+FETCH_UP], (su3_vector *)gen_pt[T3UP][i+FETCH_UP] ); prefetch_VVVV( (su3_vector *)gen_pt[XDOWN][i+FETCH_UP], (su3_vector *)gen_pt[YDOWN][i+FETCH_UP], (su3_vector *)gen_pt[ZDOWN][i+FETCH_UP], (su3_vector *)gen_pt[TDOWN][i+FETCH_UP] ); prefetch_VVVV( (su3_vector *)gen_pt[X3DOWN][i+FETCH_UP], (su3_vector *)gen_pt[Y3DOWN][i+FETCH_UP], (su3_vector *)gen_pt[Z3DOWN][i+FETCH_UP], (su3_vector *)gen_pt[T3DOWN][i+FETCH_UP] ); } sub_four_su3_vecs( (su3_vector *)F_PT(s,dest), (su3_vector *)(gen_pt[XDOWN][i]), (su3_vector *)(gen_pt[YDOWN][i]), (su3_vector *)(gen_pt[ZDOWN][i]), (su3_vector *)(gen_pt[TDOWN][i]) ); sub_four_su3_vecs( &(s->templongv1), (su3_vector *)(gen_pt[X3DOWN][i]), (su3_vector *)(gen_pt[Y3DOWN][i]), (su3_vector *)(gen_pt[Z3DOWN][i]), (su3_vector *)(gen_pt[T3DOWN][i]) ); /* Now need to add these things together */ add_su3_vector((su3_vector *)F_PT(s,dest), & (s->templongv1), (su3_vector *)F_PT(s,dest)); } END_LOOP