/* creates data array if necessary and destroys pointers */ void QDP_switch_ptr_to_data(QDP_data_common_t *dc) { ENTER; if(*(dc->data)==NULL) { //*(dc->data) = (char *) malloc(QDP_sites_on_node*dc->size); dc->qmpmem = QMP_allocate_aligned_memory( QDP_sites_on_node_L(dc->lat)*dc->size, QDP_mem_align, QDP_mem_flags ); if(!dc->qmpmem) { QMP_error("QDP error: can't allocate memory\n"); QDP_abort(1); } *(dc->data) = QMP_get_memory_pointer(dc->qmpmem); } else { QDP_clear_valid_shift_dest(dc); } if(*(dc->ptr)!=NULL) { QDP_finish_shifts(dc); if(!dc->discarded) QDP_copy_ptr_to_data(dc); QDP_clear_shift_src(dc); free((void*)*(dc->ptr)); *(dc->ptr) = NULL; } LEAVE; }
static int q_I_sum(lua_State *L) { mLatInt *a = qlua_checkLatInt(L, 1, NULL); int argc = lua_gettop(L); mLattice *S = qlua_ObjLattice(L, 1); int Sidx = lua_gettop(L); switch (argc) { case 1: { QLA_Real sum; CALL_QDP(L); if (S->lss.mask) { mLatInt *b = qlua_newZeroLatInt(L, Sidx); QDP_I_eq_I_mask_I(b->ptr, a->ptr, S->lss.mask, *S->qss); QDP_r_eq_sum_I(&sum, b->ptr, *S->qss); lua_pop(L, 1); } else { QDP_r_eq_sum_I(&sum, a->ptr, *S->qss); } lua_pushnumber(L, sum); return 1; } case 2: { /* NB: does not use the subset mask */ mLatMulti *m = qlua_checkLatMulti(L, 2, S); int size = m->size; QLA_Int *ii = m->idx; mVecReal *r = qlua_newVecReal(L, size); int sites = QDP_sites_on_node_L(S->lat); int k; QLA_Int *xx; for (k = 0; k < size; k++) r->val[k] = 0; CALL_QDP(L); xx = QDP_expose_I(a->ptr); for (k = 0; k < sites; k++, xx++, ii++) { int t = *ii; if ((t < 0) || (t >= size)) continue; r->val[t] += *xx; } QDP_reset_I(a->ptr); QMP_sum_double_array(r->val, size); return 1; } } return luaL_error(L, "bad arguments for Int:sum()"); }
void fromQDP_F(real *yy, Layout *l, real *xx, QDP_Lattice *lat, int nelem, int swap) { int nd = l->nDim; int nl = l->nSitesInner; int ysize = nelem * nl; //double n2=0; int nsites = l->nSites; if(nsites!=QDP_sites_on_node_L(lat)) { printf("%s: nsites(%i) != QDP_sites_on_node_L(lat)(%i)\n", __func__, nsites, QDP_sites_on_node_L(lat)); QDP_abort(-1); } for(int j=0; j<nsites; j++) { // qll sites int x[nd]; LayoutIndex li; li.rank = myrank; li.index = j; layoutCoord(l, x, &li); int r = QDP_node_number_L(lat, x); if(r==QDP_this_node) { int i = QDP_index_L(lat, x); int oi = j / l->nSitesInner; int ii = j % l->nSitesInner; real *yi = yy + (ysize*oi + ii); for(int e=0; e<nelem; e++) { int e2 = e/2; int ei = e2 % swap; int eo = e2 / swap; int es = ei*(nelem/swap) + eo*2 + (e&1); yi[nl*e] = xx[i*nelem+es]; //n2 += er*er + ei*ei; } } else { printf("unpack: site on wrong node!\n"); QDP_abort(-1); } } //printf("unpack2: %g\n", n2); }
void toQDP_F(real *xx, QDP_Lattice *lat, real *yy, Layout *l, int nelem, int swap) { int nd = l->nDim; int nl = l->nSitesInner; int ysize = nelem * nl; //double n2=0; int nsites = l->nSites; if(nsites!=QDP_sites_on_node_L(lat)) { printf("%s: nsites(%i) != QDP_sites_on_node_L(lat)(%i)\n", __func__, nsites, QDP_sites_on_node_L(lat)); QDP_abort(-1); } for(int i=0; i<nsites; i++) { // QDP sites int x[nd]; LayoutIndex li; QDP_get_coords_L(lat, x, QDP_this_node, i); layoutIndex(l, &li, x); if(li.rank==l->myrank) { int oi = li.index / l->nSitesInner; int ii = li.index % l->nSitesInner; real *yi = yy + (ysize*oi + ii); for(int e=0; e<nelem; e++) { int e2 = e/2; int ei = e2 % swap; int eo = e2 / swap; int es = ei*(nelem/swap) + eo*2 + (e&1); xx[i*nelem+es] = yi[nl*e]; //n2 += er*er + ei*ei; } } else { printf("unpack: site on wrong node!\n"); QDP_abort(-1); } } //printf("unpack2: %g\n", n2); }
/* assumes data array exists */ static void QDP_copy_ptr_to_data(QDP_data_common_t *dc) { char *data, **ptr; ENTER; data = *(dc->data); ptr = *(dc->ptr); if(ptr!=NULL) { int i; for(i=0; i<QDP_sites_on_node_L(dc->lat); i++) { if(ptr[i]) memcpy(&data[i*dc->size], ptr[i], dc->size); } } LEAVE; }
/* optimized version of building blocks compute tr(B^+ \Gamma_n F) [n=0..15] and projects on n_qext momenta select time interval [tsrc: tsnk] and does time reversal if time_rev==1 save results to aff_w[aff_kpath . 'g%d/qx%d_qy%d_qz%d'] Parameters: csrc = { xsrc, ysrc, zsrc, tsrc } tsnk qext[4 * i_qext + dir] ext.mom components time_rev ==0 for proton_3, ==1 for proton_negpar_3 bc_baryon_t =+/-1 boundary condition for baryon 2pt[sic!] function; =bc_quark^3 */ const char * save_bb(lua_State *L, mLattice *S, mAffWriter *aff_w, const char *aff_kpath, QDP_D3_DiracPropagator *F, QDP_D3_DiracPropagator *B, const int *csrc, /* [qRank] */ int tsnk, int n_mom, const int *mom, /* [n_mom][qRank] */ int time_rev, /* 1 to reverse, 0 to not */ int t_axis, /* 0-based */ double bc_baryon_t) { /* gamma matrix parameterization for left multiplication: Gamma_n [i,j] = gamma_coeff[n][i] * \delta_{i,gamma_ind[n][i]} v[0] a[0]*v[I[0]] Gamma * v[1] = a[1]*v[I[1]] v[2] a[2]*v[I[2]] v[3] a[3]*v[I[3]] or (Gamma * X)_{ik} = a[i] * X[I[i],k] */ double complex gamma_left_coeff[16][4] = { { 1, 1, 1, 1 }, /* G0 = 1 */ { I, I,-I,-I }, /* G1 = g1 */ {-1, 1, 1,-1 }, /* G2 = g2 */ {-I, I,-I, I }, /* G3 = g1 g2 */ { I,-I,-I, I }, /* G4 = g3 */ {-1, 1,-1, 1 }, /* G5 = g1 g3 */ {-I,-I,-I,-I }, /* G6 = g2 g3 */ { 1, 1,-1,-1 }, /* G7 = g1 g2 g3 */ { 1, 1, 1, 1 }, /* G8 = g4 */ { I, I,-I,-I }, /* G9 = g1 g4 */ {-1, 1, 1,-1 }, /* G10= g2 g4 */ {-I, I,-I, I }, /* G11= g1 g2 g4 */ { I,-I,-I, I }, /* G12= g3 g4 */ {-1, 1,-1, 1 }, /* G13= g1 g3 g4 */ {-I,-I,-I,-I }, /* G14= g2 g3 g4 */ { 1, 1,-1,-1 }, /* G15= g1 g2 g3 g4 */ }; int gamma_left_ind[16][4] = { { 0, 1, 2, 3 }, /* G0 = 1 */ { 3, 2, 1, 0 }, /* G1 = g1 */ { 3, 2, 1, 0 }, /* G2 = g2 */ { 0, 1, 2, 3 }, /* G3 = g1 g2 */ { 2, 3, 0, 1 }, /* G4 = g3 */ { 1, 0, 3, 2 }, /* G5 = g1 g3 */ { 1, 0, 3, 2 }, /* G6 = g2 g3 */ { 2, 3, 0, 1 }, /* G7 = g1 g2 g3 */ { 2, 3, 0, 1 }, /* G8 = g4 */ { 1, 0, 3, 2 }, /* G9 = g1 g4 */ { 1, 0, 3, 2 }, /* G10= g2 g4 */ { 2, 3, 0, 1 }, /* G11= g1 g2 g4 */ { 0, 1, 2, 3 }, /* G12= g3 g4 */ { 3, 2, 1, 0 }, /* G13= g1 g3 g4 */ { 3, 2, 1, 0 }, /* G14= g2 g3 g4 */ { 0, 1, 2, 3 }, /* G15= g1 g2 g3 g4 */ }; #define get_mom(mom_list, i_mom) ((mom_list) + 4*(i_mom)) if (4 != S->rank || 4 != QDP_Ns || 3 != t_axis) { return "not implemented for this dim, spin, color, or t-axis"; } int latsize[4]; QDP_latsize_L(S->lat, latsize); if (NULL == aff_w || NULL == aff_kpath || NULL == mom || n_mom < 0) { return "incorrect pointer parameters"; } int i; for (i = 0 ; i < S->rank; i++) { if (csrc[i] < 0 || latsize[i] <= csrc[i]) { return "incorrect source coordinates"; } } if (tsnk < 0 || latsize[t_axis] <= tsnk) { return "incorrect sink t-coordinate"; } if (n_mom <= 0) return NULL; /* relax */ int src_snk_dt = -1; int lt = latsize[t_axis]; if (!time_rev) { src_snk_dt = (lt + tsnk - csrc[t_axis]) % lt; } else { src_snk_dt = (lt + csrc[t_axis] - tsnk) % lt; } int bb_arr_size = 16 * n_mom * (src_snk_dt + 1) * 2 * sizeof(double); double *bb_arr = qlua_malloc(L, bb_arr_size); memset(bb_arr, 0, bb_arr_size); #define bb_real(i_gamma, i_mom) ((bb_arr) + (src_snk_dt + 1) * (0 + 2 * ((i_mom) + n_mom * (i_gamma)))) #define bb_imag(i_gamma, i_mom) ((bb_arr) + (src_snk_dt + 1) * (1 + 2 * ((i_mom) + n_mom * (i_gamma)))) double complex *exp_iphase = qlua_malloc(L, n_mom * sizeof(double complex)); int coord[4]; double complex trc_FBd[4][4]; QLA_D3_DiracPropagator *F_exp = QDP_D3_expose_P(F); QLA_D3_DiracPropagator *B_exp = QDP_D3_expose_P(B); int i_site; int sites = QDP_sites_on_node_L(S->lat); for (i_site = 0; i_site < sites; i_site++) { QDP_get_coords_L(S->lat, coord, QDP_this_node, i_site); int t = -1; if (!time_rev) { t = (lt + coord[t_axis] - csrc[t_axis]) % lt; } else { t = (lt + csrc[t_axis] - coord[t_axis]) % lt; } if (src_snk_dt < t) continue; /* precalc phases for inner contraction loop */ int i_mom; for (i_mom = 0 ; i_mom < n_mom ; i_mom++) { exp_iphase[i_mom] = calc_exp_iphase(coord, csrc, latsize, get_mom(mom, i_mom)); // printf("%e+I*%e\n", creal(exp_iphase[i_mom]), cimag(exp_iphase[i_mom])); } /* compute trace_{color} [ F * B^\dag] [is,js] = sum_{ic,jc,ks} F[ic,is; jc,ks] * (B[ic,js; jc,ks])^* is,js,ks - spin, ic,jc - color */ int is, js, ks, ic, jc; for (is = 0; is < 4; is++) { for (js = 0; js < 4; js++) { QLA_D_Complex sum; QLA_c_eq_r(sum, 0); for (ks = 0; ks < 4; ks++) { for (ic = 0; ic < 3 ; ic++) for (jc = 0; jc < 3 ; jc++) QLA_c_peq_c_times_ca(sum, QLA_elem_P(F_exp[i_site], ic,is, jc,ks), QLA_elem_P(B_exp[i_site], ic,js, jc,ks)); } trc_FBd[is][js] = QLA_real(sum) + I*QLA_imag(sum); } } /* cycle over Gamma */ int gn; for (gn = 0; gn < 16 ; gn++) { double complex sum = 0.; /* compute contractions Gamma(n) */ for (is = 0; is < 4; is++) sum += gamma_left_coeff[gn][is] * trc_FBd[gamma_left_ind[gn][is]][is]; /* mult. by phase and add to timeslice sum */ for (i_mom = 0; i_mom < n_mom; i_mom++) { double complex aux = exp_iphase[i_mom] * sum; bb_real(gn, i_mom)[t] += creal(aux); bb_imag(gn, i_mom)[t] += cimag(aux); } } } qlua_free(L, exp_iphase); /* global sum */ if (QMP_sum_double_array(bb_arr, bb_arr_size / sizeof(double))) { qlua_free(L, bb_arr); return "QMP_sum_double_array error"; } /* save to AFF */ if (aff_w->master) { struct AffNode_s *aff_top = NULL; aff_top = aff_writer_mkpath(aff_w->ptr, aff_w->dir, aff_kpath); if (NULL == aff_top) { qlua_free(L, bb_arr); return aff_writer_errstr(aff_w->ptr); } double complex *cplx_buf = qlua_malloc(L, (src_snk_dt + 1) * sizeof(double complex)); char buf[200]; int gn, i_mom, t; for (gn = 0; gn < 16; gn++) for (i_mom = 0; i_mom < n_mom; i_mom++) { /* copy & mult by bc, if necessary */ const double *bb_re_cur = bb_real(gn, i_mom), *bb_im_cur = bb_imag(gn, i_mom); if (!time_rev) { /* no bc */ for (t = 0 ; t <= src_snk_dt; t++) cplx_buf[t] = bb_re_cur[t] + I*bb_im_cur[t]; } else { if (gn < 8) { for (t = 0 ; t <= src_snk_dt; t++) cplx_buf[t] = bc_baryon_t * (bb_re_cur[t] + I*bb_im_cur[t]); } else { for (t = 0 ; t <= src_snk_dt; t++) cplx_buf[t] = -bc_baryon_t * (bb_re_cur[t] + I*bb_im_cur[t]); } } /* write to AFF */ snprintf(buf, sizeof(buf), "g%d/qx%d_qy%d_qz%d", gn, get_mom(mom, i_mom)[0], get_mom(mom, i_mom)[1], get_mom(mom, i_mom)[2]); struct AffNode_s *node = aff_writer_mkpath(aff_w->ptr, aff_top, buf); if (NULL == node) { qlua_free(L, bb_arr); qlua_free(L, cplx_buf); return aff_writer_errstr(aff_w->ptr); } if (aff_node_put_complex(aff_w->ptr, node, cplx_buf, src_snk_dt + 1)) { qlua_free(L, bb_arr); qlua_free(L, cplx_buf); return aff_writer_errstr(aff_w->ptr); } } qlua_free(L, cplx_buf); } #undef bb_real #undef bb_imag #undef get_mom qlua_free(L, bb_arr); QDP_D3_reset_P(F); QDP_D3_reset_P(B); return 0; }
!GAUGEMULT1 #include "qdp_$lib_internal.h" #include "com_common.h" #include "com_common_internal.h" #define fvdp QLA$PC_$ABBR3_v$EQOP_$ABBR1$ADJ1_times_p$ABBR2$ADJ2 #define fxdp QLA$PC_$ABBR3_x$EQOP_$ABBR1$ADJ1_times_p$ABBR2$ADJ2 #define fvpp QLA$PC_$ABBR3_v$EQOP_p$ABBR1$ADJ1_times_p$ABBR2$ADJ2 #define fxpp QLA$PC_$ABBR3_x$EQOP_p$ABBR1$ADJ1_times_p$ABBR2$ADJ2 void QDP$PC_$ABBR3_$EQOP_$ABBR1$ADJ1_times_s$ABBR2$ADJ2( $QDPPCTYPE3 *dest, $QDPPCTYPE1 *src1, $QDPPCTYPE2 *src2, QDP_Shift shift, QDP_ShiftDir fb, QDP_Subset subset) { char **temp2 = NULL; QDP_msg_tag *mtag = NULL; TGET; ONE { temp2 = (char **)malloc(QDP_sites_on_node_L(get_lat(dest))*sizeof(char *)); if((fb!=QDP_forward)&&(fb!=QDP_backward)) { fprintf(stderr,"QDP: error: bad fb in QDP$PC_$ABBR_eq_s$ABBR\n"); QDP_abort(1); } /* prepare shift source */ if(src2->ptr==NULL) { if(src2->data==NULL) { fprintf(stderr,"error: shifting from uninitialized source\n"); QDP_abort(1); } } else { QDP_switch_ptr_to_data(&src2->dc); } mtag = QDP_declare_shift( temp2, (char *)src2->data, src2->dc.size, shift, fb, subset ); QDP_do_gather(mtag); QDP_prepare_dest(&dest->dc); QDP_prepare_src(&src1->dc); QDP_wait_gather(mtag); } TBARRIER; #define SRC2O(o) ((void *)(((void **)(temp2))+(o))) #define N -1 #if ($C+0) == -1 int nc = QDP_get_nc(dest); #endif int toff, toff1; TSPLIT(toff, toff1, subset->len); int tlen = toff1-toff; toff += subset->offset; if(src1->ptr==NULL) { if(subset->indexed==0) { fvdp($NCVAR QDP_offset_data(dest,toff), QDP_offset_data(src1,toff), SRC2O(toff), tlen ); } else { fxdp($NCVAR QDP_offset_data(dest,0), QDP_offset_data(src1,0), SRC2O(0), subset->index, tlen ); } } else { if(subset->indexed==0) { fvpp($NCVAR QDP_offset_data(dest,toff), QDP_offset_ptr(src1,toff), SRC2O(toff), tlen ); } else { fxpp($NCVAR QDP_offset_data(dest,0), QDP_offset_ptr(src1,0), SRC2O(0), subset->index, tlen ); } } ONE { QDP_cleanup_gather(mtag); free((void*)temp2); } }
int sign, #if QNc == 'N' QLA_DN_DiracPropagator(nc, (*f)) #else Qx(QLA_D,_DiracPropagator) *f #endif ) { #if QNc == 'N' typedef QLA_DN_DiracFermion(nc, Vtype); typedef QLA_DN_HalfFermion(nc, Htype); #else typedef Qx(QLA_D,_DiracFermion) Vtype; typedef Qx(QLA_D,_HalfFermion) Htype; #endif int count = QDP_sites_on_node_L(S->lat); int k, ic, is; for (k = 0; k < count; k++) { for (ic = 0; ic < nc; ic++) { for (is = 0; is < QDP_Ns; is++) { int jc, js; Vtype fk; Htype hk; for (jc = 0; jc < nc; jc++) { for (js = 0; js < QDP_Ns; js++) { QLA_c_eq_c(QLA_elem_D(fk, jc, js), QLA_elem_P(f[0], jc, js, ic, is)); } } #if QNc == 'N'
/* return value tells whether restart is possible */ int QDP_prepare_shift(QDP_data_common_t *dest_dc, QDP_data_common_t *src_dc, QDP_Shift shift, QDP_ShiftDir fb, QDP_Subset subset) { QDP_shift_src_t **pss, *ss; int restart=0; ENTER; TRACE; if(src_dc->discarded) { fprintf(stderr,"error: attempt to use discarded data\n"); QDP_abort(1); } TRACE; if(src_dc->exposed) { fprintf(stderr,"error: attempt to use exposed field\n"); QDP_abort(1); } TRACE; if(dest_dc->exposed) { fprintf(stderr,"error: attempt to use exposed field\n"); QDP_abort(1); } TRACE; /* prepare shift source */ if(*(src_dc->ptr)==NULL) { if(*(src_dc->data)==NULL) { fprintf(stderr,"error: shifting from uninitialized source\n"); QDP_abort(1); } } else { QDP_switch_ptr_to_data(src_dc); } TRACE; /* check if this shift has been done before */ pss = &dest_dc->shift_src; while(1) { if(*pss==NULL) { ss = QDP_alloc_shift_src_t(src_dc, shift, fb, subset); ss->next = dest_dc->shift_src; dest_dc->shift_src = ss; src_dc->shift_dest = QDP_alloc_shift_dest_t(dest_dc, src_dc->shift_dest); break; } ss = *pss; if( (ss->dc==src_dc) && (ss->shiftId==shift->id) && (ss->fb==fb) && (ss->subsetId==subset->id) ) { if(ss->st->shift_pending) { ss->st->shift_pending = 0; QDP_wait_gather(ss->st->msgtag); } if(ss==dest_dc->shift_src) { restart = 1; } else { *pss = ss->next; //QDP_clear_shift_src(dest_dc); // don't save old shifts ss->next = dest_dc->shift_src; dest_dc->shift_src = ss; QDP_remove_shift_tag_reference(ss->st); } break; } #if 0 if(ss->subset==subset) { if(ss==dest_dc->shift_src) { if(ss->st->shift_pending) { QDP_wait_gather(ss->st->msgtag); } QDP_remove_shift_tag_reference(ss->st); } } #endif pss = &(ss->next); } dest_dc->discarded = 0; { QDP_shift_list_t *sl; if(sl_free_list==NULL) { sl = (QDP_shift_list_t *) malloc(sizeof(QDP_shift_list_t)); } else { sl = sl_free_list; sl_free_list = sl->next; } //printf("alloc sl\n"); sl->next = shift_list; sl->prev = NULL; if(shift_list) shift_list->prev = sl; shift_list = sl; sl->ss = ss; ss->sl = sl; } /* prepare shift destination */ if(*(dest_dc->ptr)==NULL) { *(dest_dc->ptr) = (char **)malloc(QDP_sites_on_node_L(dest_dc->lat)*sizeof(char *)); if(*(dest_dc->data)!=NULL) { char *data, **ptr; int i; data = *(dest_dc->data); ptr = *(dest_dc->ptr); for(i=0; i<QDP_sites_on_node_L(dest_dc->lat); ++i) { ptr[i] = data + i*dest_dc->size; } } else { char **ptr; int i; ptr = *(dest_dc->ptr); for(i=0; i<QDP_sites_on_node_L(dest_dc->lat); ++i) { ptr[i] = NULL; } } } LEAVE; return restart; }