/* prepare field as source field */ void QDP_prepare_src(QDP_data_common_t *dc) { ENTER; if(dc->discarded) { fprintf(stderr,"error: attempt to use discarded data\n"); QDP_abort(1); } if(dc->exposed) { fprintf(stderr,"error: attempt to use exposed field\n"); QDP_abort(1); } dc->srcprep = 1; QDP_finish_shifts(dc); LEAVE; }
/* creates data array if necessary and destroys pointers */ void QDP_switch_ptr_to_data(QDP_data_common_t *dc) { ENTER; if(*(dc->data)==NULL) { //*(dc->data) = (char *) malloc(QDP_sites_on_node*dc->size); dc->qmpmem = QMP_allocate_aligned_memory( QDP_sites_on_node_L(dc->lat)*dc->size, QDP_mem_align, QDP_mem_flags ); if(!dc->qmpmem) { QMP_error("QDP error: can't allocate memory\n"); QDP_abort(1); } *(dc->data) = QMP_get_memory_pointer(dc->qmpmem); } else { QDP_clear_valid_shift_dest(dc); } if(*(dc->ptr)!=NULL) { QDP_finish_shifts(dc); if(!dc->discarded) QDP_copy_ptr_to_data(dc); QDP_clear_shift_src(dc); free((void*)*(dc->ptr)); *(dc->ptr) = NULL; } LEAVE; }
void qlua_assert(int status, const char *msg) { if (status != 0) return; fprintf(stderr, "QLUA ASSERT failed: %s\n", msg); fflush(stdout); fflush(stderr); QDP_abort(1); exit(1); }
void fromQDP_F(real *yy, Layout *l, real *xx, QDP_Lattice *lat, int nelem, int swap) { int nd = l->nDim; int nl = l->nSitesInner; int ysize = nelem * nl; //double n2=0; int nsites = l->nSites; if(nsites!=QDP_sites_on_node_L(lat)) { printf("%s: nsites(%i) != QDP_sites_on_node_L(lat)(%i)\n", __func__, nsites, QDP_sites_on_node_L(lat)); QDP_abort(-1); } for(int j=0; j<nsites; j++) { // qll sites int x[nd]; LayoutIndex li; li.rank = myrank; li.index = j; layoutCoord(l, x, &li); int r = QDP_node_number_L(lat, x); if(r==QDP_this_node) { int i = QDP_index_L(lat, x); int oi = j / l->nSitesInner; int ii = j % l->nSitesInner; real *yi = yy + (ysize*oi + ii); for(int e=0; e<nelem; e++) { int e2 = e/2; int ei = e2 % swap; int eo = e2 / swap; int es = ei*(nelem/swap) + eo*2 + (e&1); yi[nl*e] = xx[i*nelem+es]; //n2 += er*er + ei*ei; } } else { printf("unpack: site on wrong node!\n"); QDP_abort(-1); } } //printf("unpack2: %g\n", n2); }
/* prepare data array of field for writing */ void QDP_prepare_dest(QDP_data_common_t *dc) { ENTER; if(dc->exposed) { fprintf(stderr,"error: attempt to use exposed field\n"); QDP_abort(1); } dc->discarded = 0; dc->srcprep = 1; dc->destprep = 1; QDP_switch_ptr_to_data(dc); LEAVE; }
void toQDP_F(real *xx, QDP_Lattice *lat, real *yy, Layout *l, int nelem, int swap) { int nd = l->nDim; int nl = l->nSitesInner; int ysize = nelem * nl; //double n2=0; int nsites = l->nSites; if(nsites!=QDP_sites_on_node_L(lat)) { printf("%s: nsites(%i) != QDP_sites_on_node_L(lat)(%i)\n", __func__, nsites, QDP_sites_on_node_L(lat)); QDP_abort(-1); } for(int i=0; i<nsites; i++) { // QDP sites int x[nd]; LayoutIndex li; QDP_get_coords_L(lat, x, QDP_this_node, i); layoutIndex(l, &li, x); if(li.rank==l->myrank) { int oi = li.index / l->nSitesInner; int ii = li.index % l->nSitesInner; real *yi = yy + (ysize*oi + ii); for(int e=0; e<nelem; e++) { int e2 = e/2; int ei = e2 % swap; int eo = e2 / swap; int es = ei*(nelem/swap) + eo*2 + (e&1); xx[i*nelem+es] = yi[nl*e]; //n2 += er*er + ei*ei; } } else { printf("unpack: site on wrong node!\n"); QDP_abort(-1); } } //printf("unpack2: %g\n", n2); }
void QOP_asqtad_force_multi_asvec_qdp(QOP_info_t *info, QDP_ColorMatrix *links[], QDP_ColorMatrix *force[], QOP_asqtad_coeffs_t *coef, REAL eps[], QDP_ColorVector *xin[], int nsrc) { #define NC QDP_get_nc(xin[0]) REAL coeff[nsrc]; REAL OneLink[nsrc], Lepage[nsrc], Naik[nsrc], FiveSt[nsrc], ThreeSt[nsrc], SevenSt[nsrc]; REAL mNaik[nsrc], mLepage[nsrc], mFiveSt[nsrc], mThreeSt[nsrc], mSevenSt[nsrc]; QDP_ColorVector *P3[8][nsrc]; QDP_ColorVector *P5[8][nsrc]; QDP_ColorVector *P5tmp[8][8][nsrc]; QDP_ColorVector *P5s[4][nsrc]; QDP_ColorVector *P5tmps[4][8][nsrc]; //QDP_ColorVector *xin[nsrc]; QDP_ColorVector *xintmp[8][nsrc]; QDP_ColorVector *Pmu[nsrc]; QDP_ColorVector *Pmutmp[8][nsrc]; QDP_ColorVector *Pnumu[nsrc]; QDP_ColorVector *Pnumutmp[8][nsrc]; QDP_ColorVector *Prhonumu[nsrc]; QDP_ColorVector *Prhonumutmp[8][nsrc]; QDP_ColorVector *P7[nsrc]; QDP_ColorVector *P7tmp[8][nsrc]; QDP_ColorVector *P7rho[nsrc]; QDP_ColorVector *ttv[nsrc]; int i, dir; int mu, nu, rho, sig; double nflop1 = 253935; double nflop2 = 433968; double nflop = nflop1 + (nflop2-nflop1)*(nsrc-1); double dtime; dtime = -QOP_time(); ASQTAD_FORCE_BEGIN; QOP_trace("test 1\n"); /* setup parallel transport */ QDP_ColorMatrix *tmpmat = QDP_create_M(); for(i=0; i<QOP_common.ndim; i++) { fbshift[i] = QDP_neighbor[i]; fbshiftdir[i] = QDP_forward; fblink[i] = links[i]; fbshift[OPP_DIR(i)] = QDP_neighbor[i]; fbshiftdir[OPP_DIR(i)] = QDP_backward; fblink[OPP_DIR(i)] = QDP_create_M(); QDP_M_eq_sM(tmpmat, fblink[i], QDP_neighbor[i], QDP_backward, QDP_all); QDP_M_eq_Ma(fblink[OPP_DIR(i)], tmpmat, QDP_all); } tv = ttv; for(i=0; i<nsrc; i++) { tv[i] = QDP_create_V(); } QOP_trace("test 2\n"); /* Allocate temporary vectors */ for(i=0; i<nsrc; i++) { Pmu[i] = QDP_create_V(); Pnumu[i] = QDP_create_V(); Prhonumu[i] = QDP_create_V(); P7[i] = QDP_create_V(); P7rho[i] = QDP_create_V(); for(dir=0; dir<8; dir++) { xintmp[dir][i] = QDP_create_V(); Pmutmp[dir][i] = QDP_create_V(); Pnumutmp[dir][i] = QDP_create_V(); Prhonumutmp[dir][i] = QDP_create_V(); P7tmp[dir][i] = QDP_create_V(); } #if 1 for(mu=0; mu<4; mu++) { P5s[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmps[mu][dir][i] = QDP_create_V(); } } #else for(mu=0; mu<8; mu++) { P5[mu][i] = QDP_create_V(); for(dir=0; dir<8; dir++) { P5tmp[mu][dir][i] = QDP_create_V(); //printf("%p %p\n", P5tmp[mu][dir][i], &(P5tmp[mu][dir][i])); fflush(stdout); if(P5tmp[mu][dir][i]==NULL) { fprintf(stderr, "error: can't create V\n"); QDP_abort(); } } } #endif } //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { P3[mu][i] = QDP_create_V(); //P5[mu][i] = QDP_create_V(); } } for(mu=0; mu<4; mu++) { tempmom_qdp[mu] = force[mu]; QDP_M_eqm_M(tempmom_qdp[mu], tempmom_qdp[mu], QDP_odd); } /* Path coefficients times fermion epsilon */ /* Load path coefficients from table */ for(i=0; i<nsrc; i++) { OneLink[i] = coef->one_link * eps[i]; Naik[i] = coef->naik * eps[i]; mNaik[i] = -Naik[i]; ThreeSt[i] = coef->three_staple * eps[i]; mThreeSt[i] = -ThreeSt[i]; FiveSt[i] = coef->five_staple * eps[i]; mFiveSt[i] = -FiveSt[i]; SevenSt[i] = coef->seven_staple * eps[i]; mSevenSt[i] = -SevenSt[i]; Lepage[i] = coef->lepage * eps[i]; mLepage[i] = -Lepage[i]; } #if 0 printf("nsrc = %i\n", nsrc); printf("coeffs = %g %g %g %g %g %g\n", OneLink[0], ThreeSt[0], FiveSt[0], SevenSt[0], Lepage[0], Naik[0]); #endif /* *************************************** */ QOP_trace("start force loop\n"); for(mu=0; mu<8; mu++) { //u_shift_hw_fermion(temp_x_qdp, Pmu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(xin, Pmu, OPP_DIR(mu), nsrc, xintmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //u_shift_hw_fermion(Pmu, P3[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pmu, P3[sig], sig, nsrc, Pmutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu]: x--+ * * | | * * o o * * the 1 link in the path: - (numbering starts form 0) */ add_forces_to_mom(P3[sig], Pmu, sig, mThreeSt, nsrc); } } for(nu=0; nu<8; nu++) if( (nu!=mu)&&(nu!=OPP_DIR(mu)) ) { int nP5 = 0; //Pnumu = hw_qdp[OPP_DIR(nu)]; //u_shift_hw_fermion(Pmu, Pnumu, OPP_DIR(nu), temp_hw[OPP_DIR(nu)]); u_shift_color_vecs(Pmu, Pnumu, OPP_DIR(nu), nsrc, Pmutmp[OPP_DIR(nu)]); //QDP_V_veq_V(Pnumu, P3[OPP_DIR(nu)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { #if 1 for(i=0; i<nsrc; i++) { P5[sig][i] = P5s[nP5][i]; for(dir=0; dir<8; dir++) P5tmp[sig][dir][i] = P5tmps[nP5][dir][i]; } #endif nP5++; //u_shift_hw_fermion(Pnumu, P5[sig], sig, temp_hw[sig]); u_shift_color_vecs(Pnumu, P5[sig], sig, nsrc, Pnumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5[sig], Pnumu, sig, FiveSt, nsrc); } } QOP_trace("test 4\n"); for(rho=0; rho<8; rho++) if( (rho!=mu)&&(rho!=OPP_DIR(mu)) && (rho!=nu)&&(rho!=OPP_DIR(nu)) ) { //Prhonumu = hw_qdp[OPP_DIR(rho)]; //u_shift_hw_fermion(Pnumu, Prhonumu, OPP_DIR(rho), // temp_hw[OPP_DIR(rho)] ); u_shift_color_vecs(Pnumu, Prhonumu, OPP_DIR(rho), nsrc, Pnumutmp[OPP_DIR(rho)]); //QDP_V_veq_V(Prhonumu, P5[OPP_DIR(rho)], QDP_all, nsrc); for(sig=0; sig<8; sig++) if( (sig!=mu )&&(sig!=OPP_DIR(mu )) && (sig!=nu )&&(sig!=OPP_DIR(nu )) && (sig!=rho)&&(sig!=OPP_DIR(rho)) ) { /* Length 7 paths */ //P7 = hw_qdp[sig]; //u_shift_hw_fermion(Prhonumu, P7, sig, temp_hw[sig] ); QOP_trace("test 43\n"); u_shift_color_vecs(Prhonumu, P7, sig, nsrc, Prhonumutmp[sig]); QOP_trace("test 44\n"); //QDP_V_eq_r_times_V(P7[0], &SevenSt[0], P7[0], QDP_all); //QDP_V_eq_r_times_V(P7[1], &SevenSt[1], P7[1], QDP_all); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu+rho]: x--+ * * | | * * o o * * the 3 link in the path: - (numbering starts form 0) */ QOP_trace("test 45\n"); add_forces_to_mom(P7, Prhonumu, sig, mSevenSt, nsrc); QOP_trace("test 46\n"); //mom_meq_force(P7, Prhonumu, sig); } /* Add the force F_rho the 2(4) link in the path: + */ //P7rho = hw_qdp[rho]; //u_shift_hw_fermion(P7, P7rho, rho, temp_hw[rho]); QOP_trace("test 47\n"); u_shift_color_vecs(P7, P7rho, rho, nsrc, P7tmp[rho]); QOP_trace("test 48\n"); side_link_forces(rho,sig,SevenSt,Pnumu,P7,Prhonumu,P7rho, nsrc); QOP_trace("test 49\n"); //side_link_3f_force2(rho,sig,Pnumu,P7,Prhonumu,P7rho); /* Add the P7rho vector to P5 */ for(i=0; i<nsrc; i++) { if(FiveSt[i]!=0) coeff[i] = SevenSt[i]/FiveSt[i]; else coeff[i] = 0; QOP_trace("test 410\n"); QDP_V_peq_r_times_V(P5[sig][i], &coeff[i], P7rho[i], QDP_all); QOP_trace("test 411\n"); } } /* sig */ } /* rho */ QOP_trace("test 5\n"); #define P5nu P7 for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) && (sig!=nu)&&(sig!=OPP_DIR(nu)) ) { /* Length 5 paths */ /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[nu]; //u_shift_hw_fermion(P5[sig], P5nu, nu, temp_hw[nu]); u_shift_color_vecs(P5[sig], P5nu, nu, nsrc, P5tmp[sig][nu]); side_link_forces(nu, sig, mFiveSt, Pmu, P5[sig], Pnumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = FiveSt[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } } /* sig */ } /* nu */ #define Pmumu Pnumu #define Pmumutmp Pnumutmp #define P5sig Prhonumu #define P5sigtmp Prhonumutmp #define P3mu P7 #define Popmu P7 #define Pmumumu P7 /* Now the Lepage term... It is the same as 5-link paths with nu=mu and FiveSt=Lepage. */ //u_shift_hw_fermion(Pmu, Pmumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)] ); u_shift_color_vecs(Pmu, Pmumu, OPP_DIR(mu), nsrc, Pmutmp[OPP_DIR(mu)]); for(sig=0; sig<8; sig++) if( (sig!=mu)&&(sig!=OPP_DIR(mu)) ) { //P5sig = hw_qdp[sig]; //u_shift_hw_fermion(Pmumu, P5sig, sig, temp_hw[sig]); u_shift_color_vecs(Pmumu, P5sig, sig, nsrc, Pmumutmp[sig]); if(GOES_FORWARDS(sig)) { /* Add the force F_sig[x+mu+nu]: x--+ * * | | * * o o * * the 2 link in the path: + (numbering starts form 0) */ add_forces_to_mom(P5sig, Pmumu, sig, Lepage, nsrc); } /* Add the force F_nu the 1(3) link in the path: - */ //P5nu = hw_qdp[mu]; //u_shift_hw_fermion(P5sig, P5nu, mu, temp_hw[mu]); u_shift_color_vecs(P5sig, P5nu, mu, nsrc, P5sigtmp[mu]); side_link_forces(mu, sig, mLepage, Pmu, P5sig, Pmumu, P5nu, nsrc); /* Add the P5nu vector to P3 */ for(i=0; i<nsrc; i++) { if(ThreeSt[i]!=0) coeff[i] = Lepage[i]/ThreeSt[i]; else coeff[i] = 0; QDP_V_peq_r_times_V(P3[sig][i], &coeff[i], P5nu[i], QDP_all); } /* Length 3 paths (Not the Naik term) */ /* Add the force F_mu the 0(2) link in the path: + */ if(GOES_FORWARDS(mu)) { //P3mu = hw_qdp[mu]; /* OK to clobber P5nu */ //u_shift_hw_fermion(P3[sig], P3mu, mu, temp_hw[mu]); //u_shift_color_vecs(P3[sig], P3mu, mu, 2, temp_hw[mu]); for(i=0; i<nsrc; i++) { QDP_V_eq_V(P5sig[i], P3[sig][i], QDP_all); } u_shift_color_vecs(P5sig, P3mu, mu, nsrc, P5sigtmp[mu]); } /* The above shift is not needed if mu is backwards */ side_link_forces(mu, sig, ThreeSt, xin, P3[sig], Pmu, P3mu, nsrc); } /* Finally the OneLink and the Naik term */ if(GOES_BACKWARDS(mu)) { /* Do only the forward terms in the Dslash */ /* Because I have shifted with OPP_DIR(mu) Pmu is a forward * * shift. */ /* The one link */ add_forces_to_mom(Pmu, xin, OPP_DIR(mu), OneLink, nsrc); /* For the same reason Pmumu is the forward double link */ /* Popmu is a backward shift */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* The Naik */ /* link no 1: - */ add_forces_to_mom(Pmumu, Popmu, OPP_DIR(mu), mNaik, nsrc); /* Pmumumu can overwrite Popmu which is no longer needed */ //Pmumumu = hw_qdp[OPP_DIR(mu)]; //u_shift_hw_fermion(Pmumu, Pmumumu, OPP_DIR(mu), temp_hw[OPP_DIR(mu)]); u_shift_color_vecs(Pmumu, Pmumumu, OPP_DIR(mu), nsrc, Pmumutmp[OPP_DIR(mu)]); /* link no 0: + */ add_forces_to_mom(Pmumumu, xin, OPP_DIR(mu), Naik, nsrc); } else { /* The rest of the Naik terms */ //Popmu = hw_qdp[mu]; /* OK to clobber P3mu */ //u_shift_hw_fermion(xin, Popmu, mu, temp_hw[mu]); u_shift_color_vecs(xin, Popmu, mu, nsrc, xintmp[mu]); /* link no 2: + */ /* Pmumu is double backward shift */ add_forces_to_mom(Popmu, Pmumu, mu, Naik, nsrc); } /* Here we have to do together the Naik term and the one link term */ }/* mu */ QOP_trace("test 6\n"); QOP_trace("test 7\n"); for(mu=0; mu<4; mu++) { QDP_M_eq_M(tmpmat, tempmom_qdp[mu], QDP_even); QDP_M_eqm_M(tmpmat, tempmom_qdp[mu], QDP_odd); QDP_M_eq_antiherm_M(tempmom_qdp[mu], tmpmat, QDP_all); } QDP_destroy_M(tmpmat); //printf("%p\n", P5tmp[0][4][0]); fflush(stdout); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } /* Free temporary vectors */ for(i=0; i<nsrc; i++) { QDP_destroy_V(Pmu[i]); QDP_destroy_V(Pnumu[i]); QDP_destroy_V(Prhonumu[i]); QDP_destroy_V(P7[i]); QDP_destroy_V(P7rho[i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { QDP_destroy_V(xintmp[dir][i]); QDP_destroy_V(Pmutmp[dir][i]); QDP_destroy_V(Pnumutmp[dir][i]); QDP_destroy_V(Prhonumutmp[dir][i]); QDP_destroy_V(P7tmp[dir][i]); } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(mu=0; mu<4; mu++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5s[mu][i]); //QDP_destroy_V(P5[mu][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } for(dir=0; dir<8; dir++) { //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } QDP_destroy_V(P5tmps[mu][dir][i]); //printf("%p\n", P5tmp[mu][dir][i]); fflush(stdout); //QDP_destroy_V(P5tmp[mu][dir][i]); //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("line %i\n",__LINE__); fflush(stdout); } } //if(QDP_this_node==0) { printf("here3\n"); fflush(stdout); } for(mu=0; mu<8; mu++) { for(i=0; i<nsrc; i++) { QDP_destroy_V(P3[mu][i]); } //QDP_destroy_V(P5[mu][0]); //QDP_destroy_V(P5[mu][1]); } for(i=0; i<nsrc; i++) { QDP_destroy_V(tv[i]); } //if(QDP_this_node==0) { printf("here4\n"); fflush(stdout); } for(i=4; i<8; i++) { QDP_destroy_M(fblink[i]); } dtime += QOP_time(); info->final_sec = dtime; info->final_flop = nflop*QDP_sites_on_node; info->status = QOP_SUCCESS; ASQTAD_FORCE_END; #undef NC }
!GAUGEMULT1 #include "qdp_$lib_internal.h" #include "com_common.h" #include "com_common_internal.h" #define fvdp QLA$PC_$ABBR3_v$EQOP_$ABBR1$ADJ1_times_p$ABBR2$ADJ2 #define fxdp QLA$PC_$ABBR3_x$EQOP_$ABBR1$ADJ1_times_p$ABBR2$ADJ2 #define fvpp QLA$PC_$ABBR3_v$EQOP_p$ABBR1$ADJ1_times_p$ABBR2$ADJ2 #define fxpp QLA$PC_$ABBR3_x$EQOP_p$ABBR1$ADJ1_times_p$ABBR2$ADJ2 void QDP$PC_$ABBR3_$EQOP_$ABBR1$ADJ1_times_s$ABBR2$ADJ2( $QDPPCTYPE3 *dest, $QDPPCTYPE1 *src1, $QDPPCTYPE2 *src2, QDP_Shift shift, QDP_ShiftDir fb, QDP_Subset subset) { char **temp2 = NULL; QDP_msg_tag *mtag = NULL; TGET; ONE { temp2 = (char **)malloc(QDP_sites_on_node_L(get_lat(dest))*sizeof(char *)); if((fb!=QDP_forward)&&(fb!=QDP_backward)) { fprintf(stderr,"QDP: error: bad fb in QDP$PC_$ABBR_eq_s$ABBR\n"); QDP_abort(1); } /* prepare shift source */ if(src2->ptr==NULL) { if(src2->data==NULL) { fprintf(stderr,"error: shifting from uninitialized source\n"); QDP_abort(1); } } else { QDP_switch_ptr_to_data(&src2->dc); } mtag = QDP_declare_shift( temp2, (char *)src2->data, src2->dc.size, shift, fb, subset ); QDP_do_gather(mtag); QDP_prepare_dest(&dest->dc); QDP_prepare_src(&src1->dc); QDP_wait_gather(mtag); } TBARRIER; #define SRC2O(o) ((void *)(((void **)(temp2))+(o))) #define N -1 #if ($C+0) == -1 int nc = QDP_get_nc(dest); #endif int toff, toff1; TSPLIT(toff, toff1, subset->len); int tlen = toff1-toff; toff += subset->offset; if(src1->ptr==NULL) { if(subset->indexed==0) { fvdp($NCVAR QDP_offset_data(dest,toff), QDP_offset_data(src1,toff), SRC2O(toff), tlen ); } else { fxdp($NCVAR QDP_offset_data(dest,0), QDP_offset_data(src1,0), SRC2O(0), subset->index, tlen ); } } else { if(subset->indexed==0) { fvpp($NCVAR QDP_offset_data(dest,toff), QDP_offset_ptr(src1,toff), SRC2O(toff), tlen ); } else { fxpp($NCVAR QDP_offset_data(dest,0), QDP_offset_ptr(src1,0), SRC2O(0), subset->index, tlen ); } } ONE { QDP_cleanup_gather(mtag); free((void*)temp2); } }
/* return value tells whether restart is possible */ int QDP_prepare_shift(QDP_data_common_t *dest_dc, QDP_data_common_t *src_dc, QDP_Shift shift, QDP_ShiftDir fb, QDP_Subset subset) { QDP_shift_src_t **pss, *ss; int restart=0; ENTER; TRACE; if(src_dc->discarded) { fprintf(stderr,"error: attempt to use discarded data\n"); QDP_abort(1); } TRACE; if(src_dc->exposed) { fprintf(stderr,"error: attempt to use exposed field\n"); QDP_abort(1); } TRACE; if(dest_dc->exposed) { fprintf(stderr,"error: attempt to use exposed field\n"); QDP_abort(1); } TRACE; /* prepare shift source */ if(*(src_dc->ptr)==NULL) { if(*(src_dc->data)==NULL) { fprintf(stderr,"error: shifting from uninitialized source\n"); QDP_abort(1); } } else { QDP_switch_ptr_to_data(src_dc); } TRACE; /* check if this shift has been done before */ pss = &dest_dc->shift_src; while(1) { if(*pss==NULL) { ss = QDP_alloc_shift_src_t(src_dc, shift, fb, subset); ss->next = dest_dc->shift_src; dest_dc->shift_src = ss; src_dc->shift_dest = QDP_alloc_shift_dest_t(dest_dc, src_dc->shift_dest); break; } ss = *pss; if( (ss->dc==src_dc) && (ss->shiftId==shift->id) && (ss->fb==fb) && (ss->subsetId==subset->id) ) { if(ss->st->shift_pending) { ss->st->shift_pending = 0; QDP_wait_gather(ss->st->msgtag); } if(ss==dest_dc->shift_src) { restart = 1; } else { *pss = ss->next; //QDP_clear_shift_src(dest_dc); // don't save old shifts ss->next = dest_dc->shift_src; dest_dc->shift_src = ss; QDP_remove_shift_tag_reference(ss->st); } break; } #if 0 if(ss->subset==subset) { if(ss==dest_dc->shift_src) { if(ss->st->shift_pending) { QDP_wait_gather(ss->st->msgtag); } QDP_remove_shift_tag_reference(ss->st); } } #endif pss = &(ss->next); } dest_dc->discarded = 0; { QDP_shift_list_t *sl; if(sl_free_list==NULL) { sl = (QDP_shift_list_t *) malloc(sizeof(QDP_shift_list_t)); } else { sl = sl_free_list; sl_free_list = sl->next; } //printf("alloc sl\n"); sl->next = shift_list; sl->prev = NULL; if(shift_list) shift_list->prev = sl; shift_list = sl; sl->ss = ss; ss->sl = sl; } /* prepare shift destination */ if(*(dest_dc->ptr)==NULL) { *(dest_dc->ptr) = (char **)malloc(QDP_sites_on_node_L(dest_dc->lat)*sizeof(char *)); if(*(dest_dc->data)!=NULL) { char *data, **ptr; int i; data = *(dest_dc->data); ptr = *(dest_dc->ptr); for(i=0; i<QDP_sites_on_node_L(dest_dc->lat); ++i) { ptr[i] = data + i*dest_dc->size; } } else { char **ptr; int i; ptr = *(dest_dc->ptr); for(i=0; i<QDP_sites_on_node_L(dest_dc->lat); ++i) { ptr[i] = NULL; } } } LEAVE; return restart; }
/* the driver */ int main(int argc, char *argv[]) { int status = 1; int i; lua_State *L = NULL; if (QDP_initialize(&argc, &argv)) { fprintf(stderr, "QDP initialization failed\n"); return 1; } QDP_profcontrol(0); double node = QDP_this_node; QMP_min_double(&node); qlua_master_node = node; L = lua_open(); if (L == NULL) { message("can not create Lua state"); goto end; } qlua_init(L, argc, argv); /* open libraries */ if (argc < 2) { message("QLUA component versions:\n"); for (i = 0; versions[i].name; i++) message(" %10s: %s\n", versions[i].name, versions[i].value); } else { for (i = 1; i < argc; i++) { char *source; if(strcmp(argv[i],"-e")==0) { // process command const char *chunk = argv[i] + 2; if (*chunk == '\0') { if (++i >= argc) { message("missing argument to -e"); goto end; } chunk = argv[i]; } QLUA_ASSERT(chunk != NULL); status = dostring(L, chunk); source = "=(command line)"; } else { status = dofile(L, argv[i]); source = argv[i]; } report(L, source, status); if (status) { fflush(stdout); fflush(stderr); QDP_abort(1); break; } } } qlua_fini(L); lua_close(L); end: QDP_finalize(); return status; }