void Hopping_Matrix(const int ieo, spinor * const l, spinor * const k){ int ix, i; su3 * restrict U ALIGN; static spinor rs; spinor * restrict s ALIGN; halfspinor ** phi ALIGN; #if defined OPTERON const int predist=2; #else const int predist=1; #endif #ifdef _KOJAK_INST #pragma pomp inst begin(hoppingmatrix) #endif #ifdef _GAUGE_COPY if(g_update_gauge_copy) { update_backward_gauge(); } #endif /* We will run through the source vector now */ /* instead of the solution vector */ s = k; _prefetch_spinor(s); if(ieo == 0) { U = g_gauge_field_copy[0][0]; } else { U = g_gauge_field_copy[1][0]; } phi = NBPointer[ieo]; _prefetch_su3(U); /**************** loop over all lattice sites ******************/ ix=0; for(i = 0; i < (VOLUME)/2; i++){ /*********************** direction +0 ************************/ _prefetch_su3(U+predist); _sse_load((*s).s0); _sse_load_up((*s).s2); _sse_vector_add(); _sse_su3_multiply((*U)); _sse_vector_cmplx_mul(ka0); _sse_store_nt_up((*phi[ix]).s0); _sse_load((*s).s1); _sse_load_up((*s).s3); _sse_vector_add(); _sse_su3_multiply((*U)); _sse_vector_cmplx_mul(ka0); _sse_store_nt_up((*phi[ix]).s1); U++; ix++; /*********************** direction -0 ************************/ _sse_load((*s).s0); _sse_load_up((*s).s2); _sse_vector_sub(); _sse_store_nt((*phi[ix]).s0); _sse_load((*s).s1); _sse_load_up((*s).s3); _sse_vector_sub(); _sse_store_nt((*phi[ix]).s1); ix++; /*********************** direction +1 ************************/ _prefetch_su3(U+predist); _sse_load((*s).s0); /*next not needed?*/ _sse_load_up((*s).s3); _sse_vector_i_mul(); _sse_vector_add(); _sse_su3_multiply((*U)); _sse_vector_cmplx_mul(ka1); _sse_store_nt_up((*phi[ix]).s0); _sse_load((*s).s1); _sse_load_up((*s).s2); _sse_vector_i_mul(); _sse_vector_add(); _sse_su3_multiply((*U)); _sse_vector_cmplx_mul(ka1); _sse_store_nt_up((*phi[ix]).s1); ix++; U++; /*********************** direction -1 ************************/ _sse_load((*s).s0); _sse_load_up((*s).s3); _sse_vector_i_mul(); _sse_vector_sub(); _sse_store_nt((*phi[ix]).s0); _sse_load((*s).s1); _sse_load_up((*s).s2); _sse_vector_i_mul(); _sse_vector_sub(); _sse_store_nt((*phi[ix]).s1); ix++; /*********************** direction +2 ************************/ _prefetch_su3(U+predist); _sse_load((*s).s0); _sse_load_up((*s).s3); _sse_vector_add(); _sse_su3_multiply((*U)); _sse_vector_cmplx_mul(ka2); _sse_store_nt_up((*phi[ix]).s0); _sse_load((*s).s1); _sse_load_up((*s).s2); _sse_vector_sub(); _sse_su3_multiply((*U)); _sse_vector_cmplx_mul(ka2); _sse_store_nt_up((*phi[ix]).s1); ix++; U++; /*********************** direction -2 ************************/ _sse_load((*s).s0); _sse_load_up((*s).s3); _sse_vector_sub(); _sse_store_nt((*phi[ix]).s0); _sse_load((*s).s1); _sse_load_up((*s).s2); _sse_vector_add(); _sse_store_nt((*phi[ix]).s1); ix++; /*********************** direction +3 ************************/ _prefetch_su3(U+predist); _prefetch_spinor(s+1); _sse_load((*s).s0); _sse_load_up((*s).s2); _sse_vector_i_mul(); _sse_vector_add(); _sse_su3_multiply((*U)); _sse_vector_cmplx_mul(ka3); _sse_store_nt_up((*phi[ix]).s0); _sse_load((*s).s1); _sse_load_up((*s).s3); _sse_vector_i_mul(); _sse_vector_sub(); _sse_su3_multiply((*U)); _sse_vector_cmplx_mul(ka3); _sse_store_nt_up((*phi[ix]).s1); ix++; U++; /*********************** direction -3 ************************/ _sse_load((*s).s0); _sse_load_up((*s).s2); _sse_vector_i_mul(); _sse_vector_sub(); _sse_store_nt((*phi[ix]).s0); _sse_load((*s).s1); _sse_load_up((*s).s3); _sse_vector_i_mul(); _sse_vector_add(); _sse_store_nt((*phi[ix]).s1); ix++; s++; } # if (defined MPI && !defined _NO_COMM) xchange_halffield(); # endif s = l; phi = NBPointer[2 + ieo]; if(ieo == 0) { U = g_gauge_field_copy[1][0]; } else { U = g_gauge_field_copy[0][0]; } _prefetch_su3(U); /* Now we sum up and expand to a full spinor */ ix = 0; for(i = 0; i < (VOLUME)/2; i++){ /*********************** direction +0 ************************/ _vector_assign(rs.s0, (*phi[ix]).s0); _vector_assign(rs.s2, (*phi[ix]).s0); _vector_assign(rs.s1, (*phi[ix]).s1); _vector_assign(rs.s3, (*phi[ix]).s1); ix++; /*********************** direction -0 ************************/ _prefetch_su3(U+predist); _sse_load((*phi[ix]).s0); _sse_su3_inverse_multiply((*U)); _sse_vector_cmplxcg_mul(ka0); _sse_load(rs.s0); _sse_vector_add(); _sse_store(rs.s0); _sse_load(rs.s2); _sse_vector_sub(); _sse_store(rs.s2); _sse_load((*phi[ix]).s1); _sse_su3_inverse_multiply((*U)); _sse_vector_cmplxcg_mul(ka0); _sse_load(rs.s1); _sse_vector_add(); _sse_store(rs.s1); _sse_load(rs.s3); _sse_vector_sub(); _sse_store(rs.s3); ix++; U++; /*********************** direction +1 ************************/ _sse_load_up((*phi[ix]).s0); _sse_load(rs.s0); _sse_vector_add(); _sse_store(rs.s0); _sse_load(rs.s3); _sse_vector_i_mul(); _sse_vector_sub(); _sse_store(rs.s3); _sse_load_up((*phi[ix]).s1); _sse_load(rs.s1); _sse_vector_add(); _sse_store(rs.s1); _sse_load(rs.s2); _sse_vector_i_mul(); _sse_vector_sub(); _sse_store(rs.s2); ix++; /*********************** direction -1 ************************/ _prefetch_su3(U+predist); _sse_load((*phi[ix]).s0); _sse_su3_inverse_multiply((*U)); _sse_vector_cmplxcg_mul(ka1); _sse_load(rs.s0); _sse_vector_add(); _sse_store(rs.s0); _sse_load(rs.s3); _sse_vector_i_mul(); _sse_vector_add(); _sse_store(rs.s3); _sse_load((*phi[ix]).s1); _sse_su3_inverse_multiply((*U)); _sse_vector_cmplxcg_mul(ka1); _sse_load(rs.s1); _sse_vector_add(); _sse_store(rs.s1); _sse_load(rs.s2); _sse_vector_i_mul(); _sse_vector_add(); _sse_store(rs.s2); ix++; U++; /*********************** direction +2 ************************/ _sse_load_up((*phi[ix]).s0); _sse_load(rs.s0); _sse_vector_add(); _sse_store(rs.s0); _sse_load(rs.s3); _sse_vector_add(); _sse_store(rs.s3); _sse_load_up((*phi[ix]).s1); _sse_load(rs.s1); _sse_vector_add(); _sse_store(rs.s1); _sse_load(rs.s2); _sse_vector_sub(); _sse_store(rs.s2); ix++; /*********************** direction -2 ************************/ _prefetch_su3(U+predist); _sse_load((*phi[ix]).s0); _sse_su3_inverse_multiply((*U)); _sse_vector_cmplxcg_mul(ka2); _sse_load(rs.s0); _sse_vector_add(); _sse_store(rs.s0); _sse_load(rs.s3); _sse_vector_sub(); _sse_store(rs.s3); _sse_load((*phi[ix]).s1); _sse_su3_inverse_multiply((*U)); _sse_vector_cmplxcg_mul(ka2); _sse_load(rs.s1); _sse_vector_add(); _sse_store(rs.s1); _sse_load(rs.s2); _sse_vector_add(); _sse_store(rs.s2); ix++; U++; /*********************** direction +3 ************************/ _sse_load_up((*phi[ix]).s0); _sse_load(rs.s0); _sse_vector_add(); _sse_store(rs.s0); _sse_load(rs.s2); _sse_vector_i_mul(); _sse_vector_sub(); _sse_store(rs.s2); _sse_load_up((*phi[ix]).s1); _sse_load(rs.s1); _sse_vector_add(); _sse_store(rs.s1); _sse_load(rs.s3); _sse_vector_i_mul(); _sse_vector_add(); _sse_store(rs.s3); ix++; /*********************** direction -3 ************************/ _prefetch_su3(U+predist); _prefetch_spinor(s+1); _sse_load((*phi[ix]).s0); _sse_su3_inverse_multiply((*U)); _sse_vector_cmplxcg_mul(ka3); _sse_load(rs.s0); _sse_vector_add(); _sse_store_nt((*s).s0); _sse_load(rs.s2); _sse_vector_i_mul(); _sse_vector_add(); _sse_store_nt((*s).s2); _sse_load((*phi[ix]).s1); _sse_su3_inverse_multiply((*U)); _sse_vector_cmplxcg_mul(ka3); _sse_load(rs.s1); _sse_vector_add(); _sse_store_nt((*s).s1); _sse_load(rs.s3); _sse_vector_i_mul(); _sse_vector_sub(); _sse_store_nt((*s).s3); ix++; U++; s++; } #ifdef _KOJAK_INST #pragma pomp inst end(hoppingmatrix) #endif }
void Hopping_Matrix(const int ieo, spinor * const l, spinor * const k){ int ix; su3 * restrict ALIGN U; spinor * restrict ALIGN s; halfspinor * restrict * phi ALIGN; halfspinor32 * restrict * phi32 ALIGN; /* We have 32 registers available */ _declare_hregs(); #ifdef _KOJAK_INST #pragma pomp inst begin(hoppingmatrix) #endif #pragma disjoint(*s, *U) #ifdef _GAUGE_COPY if(g_update_gauge_copy) { update_backward_gauge(g_gauge_field); } #endif __alignx(16, l); __alignx(16, k); if(g_sloppy_precision == 1 && g_sloppy_precision_flag == 1) { __alignx(16, HalfSpinor32); /* We will run through the source vector now */ /* instead of the solution vector */ s = k; _prefetch_spinor(s); /* s contains the source vector */ if(ieo == 0) { U = g_gauge_field_copy[0][0]; } else { U = g_gauge_field_copy[1][0]; } phi32 = NBPointer32[ieo]; _prefetch_su3(U); /**************** loop over all lattice sites ******************/ ix=0; for(int i = 0; i < (VOLUME)/2; i++){ /*********************** direction +0 ************************/ _hop_t_p_pre32(); s++; U++; ix++; /*********************** direction -0 ************************/ _hop_t_m_pre32(); ix++; /*********************** direction +1 ************************/ _hop_x_p_pre32(); ix++; U++; /*********************** direction -1 ************************/ _hop_x_m_pre32(); ix++; /*********************** direction +2 ************************/ _hop_y_p_pre32(); ix++; U++; /*********************** direction -2 ************************/ _hop_y_m_pre32(); ix++; /*********************** direction +3 ************************/ _hop_z_p_pre32(); ix++; U++; /*********************** direction -3 ************************/ _hop_z_m_pre32(); ix++; /************************ end of loop ************************/ } # if (defined TM_USE_MPI && !defined _NO_COMM) xchange_halffield32(); # endif s = l; phi32 = NBPointer32[2 + ieo]; if(ieo == 0) { U = g_gauge_field_copy[1][0]; } else { U = g_gauge_field_copy[0][0]; } //_prefetch_halfspinor(phi32[0]); _prefetch_su3(U); /* Now we sum up and expand to a full spinor */ ix = 0; /* _prefetch_spinor_for_store(s); */ for(int i = 0; i < (VOLUME)/2; i++){ /* This causes a lot of trouble, do we understand this? */ /* _prefetch_spinor_for_store(s); */ //_prefetch_halfspinor(phi32[ix+1]); /*********************** direction +0 ************************/ _hop_t_p_post32(); ix++; /*********************** direction -0 ************************/ _hop_t_m_post32(); U++; ix++; /*********************** direction +1 ************************/ _hop_x_p_post32(); ix++; /*********************** direction -1 ************************/ _hop_x_m_post32(); U++; ix++; /*********************** direction +2 ************************/ _hop_y_p_post32(); ix++; /*********************** direction -2 ************************/ _hop_y_m_post32(); U++; ix++; /*********************** direction +3 ************************/ _hop_z_p_post32(); ix++; /*********************** direction -3 ************************/ _hop_z_m_post32(); U++; ix++; s++; } } else { __alignx(16, HalfSpinor); /* We will run through the source vector now */ /* instead of the solution vector */ s = k; _prefetch_spinor(s); /* s contains the source vector */ if(ieo == 0) { U = g_gauge_field_copy[0][0]; } else { U = g_gauge_field_copy[1][0]; } phi = NBPointer[ieo]; _prefetch_su3(U); /**************** loop over all lattice sites ******************/ ix=0; for(int i = 0; i < (VOLUME)/2; i++){ /*********************** direction +0 ************************/ _hop_t_p_pre(); s++; U++; ix++; /*********************** direction -0 ************************/ _hop_t_m_pre(); ix++; /*********************** direction +1 ************************/ _hop_x_p_pre(); ix++; U++; /*********************** direction -1 ************************/ _hop_x_m_pre(); ix++; /*********************** direction +2 ************************/ _hop_y_p_pre(); ix++; U++; /*********************** direction -2 ************************/ _hop_y_m_pre(); ix++; /*********************** direction +3 ************************/ _hop_z_p_pre(); ix++; U++; /*********************** direction -3 ************************/ _hop_z_m_pre(); ix++; /************************ end of loop ************************/ } # if (defined TM_USE_MPI && !defined _NO_COMM) xchange_halffield(); # endif s = l; phi = NBPointer[2 + ieo]; //_prefetch_halfspinor(phi[0]); if(ieo == 0) { U = g_gauge_field_copy[1][0]; } else { U = g_gauge_field_copy[0][0]; } _prefetch_su3(U); /* Now we sum up and expand to a full spinor */ ix = 0; /* _prefetch_spinor_for_store(s); */ for(int i = 0; i < (VOLUME)/2; i++){ /* This causes a lot of trouble, do we understand this? */ /* _prefetch_spinor_for_store(s); */ //_prefetch_halfspinor(phi[ix+1]); /*********************** direction +0 ************************/ _hop_t_p_post(); ix++; /*********************** direction -0 ************************/ _hop_t_m_post(); U++; ix++; /*********************** direction +1 ************************/ _hop_x_p_post(); ix++; /*********************** direction -1 ************************/ _hop_x_m_post(); U++; ix++; /*********************** direction +2 ************************/ _hop_y_p_post(); ix++; /*********************** direction -2 ************************/ _hop_y_m_post(); U++; ix++; /*********************** direction +3 ************************/ _hop_z_p_post(); ix++; /*********************** direction -3 ************************/ _hop_z_m_post(); U++; ix++; s++; } } #ifdef _KOJAK_INST #pragma pomp inst end(hoppingmatrix) #endif }
/* for ieo=0, k resides on odd sites and l on even sites */ void Hopping_Matrix(const int ieo, spinor * const l, spinor * const k){ int i,ix; su3 * restrict U ALIGN; spinor * restrict s ALIGN; spinor rs; static su3_vector psi, chi, psi2, chi2; halfspinor * restrict * phi ALIGN; halfspinor32 * restrict * phi32 ALIGN; #ifdef _KOJAK_INST #pragma pomp inst begin(hoppingmatrix) #endif #ifdef XLC #pragma disjoint(*l, *k, *U, *s) #endif #ifdef _GAUGE_COPY if(g_update_gauge_copy) { update_backward_gauge(); } #endif if(k == l){ printf("Error in H_psi (simple.c):\n"); printf("Arguments k and l must be different\n"); printf("Program aborted\n"); exit(1); } s = k; if(ieo == 0) { U = g_gauge_field_copy[0][0]; } else { U = g_gauge_field_copy[1][0]; } if(g_sloppy_precision == 1 && g_sloppy_precision_flag == 1) { phi32 = NBPointer32[ieo]; /**************** loop over all lattice sites ****************/ ix=0; for(i = 0; i < (VOLUME)/2; i++){ _vector_assign(rs.s0, (*s).s0); _vector_assign(rs.s1, (*s).s1); _vector_assign(rs.s2, (*s).s2); _vector_assign(rs.s3, (*s).s3); s++; /*********************** direction +0 ************************/ _vector_add(psi, rs.s0, rs.s2); _su3_multiply(chi,(*U),psi); _complex_times_vector((*phi32[ix]).s0, ka0, chi); _vector_add(psi, rs.s1, rs.s3); _su3_multiply(chi,(*U),psi); _complex_times_vector((*phi32[ix]).s1, ka0, chi); U++; ix++; /*********************** direction -0 ************************/ _vector_sub((*phi32[ix]).s0, rs.s0, rs.s2); _vector_sub((*phi32[ix]).s1, rs.s1, rs.s3); ix++; /*********************** direction +1 ************************/ _vector_i_add(psi, rs.s0, rs.s3); _su3_multiply(chi, (*U), psi); _complex_times_vector((*phi32[ix]).s0, ka1, chi); _vector_i_add(psi, rs.s1, rs.s2); _su3_multiply(chi, (*U), psi); _complex_times_vector((*phi32[ix]).s1, ka1, chi); U++; ix++; /*********************** direction -1 ************************/ _vector_i_sub((*phi32[ix]).s0, rs.s0, rs.s3); _vector_i_sub((*phi32[ix]).s1, rs.s1, rs.s2); ix++; /*********************** direction +2 ************************/ _vector_add(psi, rs.s0, rs.s3); _su3_multiply(chi,(*U),psi); _complex_times_vector((*phi32[ix]).s0, ka2, chi); _vector_sub(psi, rs.s1, rs.s2); _su3_multiply(chi,(*U),psi); _complex_times_vector((*phi32[ix]).s1, ka2, chi); U++; ix++; /*********************** direction -2 ************************/ _vector_sub((*phi32[ix]).s0, rs.s0, rs.s3); _vector_add((*phi32[ix]).s1, rs.s1, rs.s2); ix++; /*********************** direction +3 ************************/ _vector_i_add(psi, rs.s0, rs.s2); _su3_multiply(chi, (*U), psi); _complex_times_vector((*phi32[ix]).s0, ka3, chi); _vector_i_sub(psi, rs.s1, rs.s3); _su3_multiply(chi,(*U),psi); _complex_times_vector((*phi32[ix]).s1, ka3, chi); U++; ix++; /*********************** direction -3 ************************/ _vector_i_sub((*phi32[ix]).s0, rs.s0, rs.s2); _vector_i_add((*phi32[ix]).s1, rs.s1, rs.s3); ix++; /************************ end of loop ************************/ } # if (defined MPI && !defined _NO_COMM) xchange_halffield32(); # endif s = l; phi32 = NBPointer32[2 + ieo]; if(ieo == 0) { U = g_gauge_field_copy[1][0]; } else { U = g_gauge_field_copy[0][0]; } ix = 0; for(i = 0; i < (VOLUME)/2; i++){ /*********************** direction +0 ************************/ _vector_assign(rs.s0, (*phi32[ix]).s0); _vector_assign(rs.s2, (*phi32[ix]).s0); _vector_assign(rs.s1, (*phi32[ix]).s1); _vector_assign(rs.s3, (*phi32[ix]).s1); ix++; /*********************** direction -0 ************************/ _vector_assign(psi, (*phi32[ix]).s0); _su3_inverse_multiply(chi,(*U), psi); _complexcjg_times_vector(psi,ka0,chi); _vector_add_assign(rs.s0, psi); _vector_sub_assign(rs.s2, psi); _vector_assign(psi, (*phi32[ix]).s1); _su3_inverse_multiply(chi,(*U), psi); _complexcjg_times_vector(psi,ka0,chi); _vector_add_assign(rs.s1, psi); _vector_sub_assign(rs.s3, psi); ix++; U++; /*********************** direction +1 ************************/ _vector_add_assign(rs.s0, (*phi32[ix]).s0); _vector_i_sub_assign(rs.s3, (*phi32[ix]).s0); _vector_add_assign(rs.s1, (*phi32[ix]).s1); _vector_i_sub_assign(rs.s2, (*phi32[ix]).s1); ix++; /*********************** direction -1 ************************/ _vector_assign(psi, (*phi32[ix]).s0); _su3_inverse_multiply(chi,(*U), psi); _complexcjg_times_vector(psi,ka1,chi); _vector_add_assign(rs.s0, psi); _vector_i_add_assign(rs.s3, psi); _vector_assign(psi, (*phi32[ix]).s1); _su3_inverse_multiply(chi,(*U), psi); _complexcjg_times_vector(psi,ka1,chi); _vector_add_assign(rs.s1, psi); _vector_i_add_assign(rs.s2, psi); U++; ix++; /*********************** direction +2 ************************/ _vector_add_assign(rs.s0, (*phi32[ix]).s0); _vector_add_assign(rs.s3, (*phi32[ix]).s0); _vector_add_assign(rs.s1, (*phi32[ix]).s1); _vector_sub_assign(rs.s2, (*phi32[ix]).s1); ix++; /*********************** direction -2 ************************/ _vector_assign(psi, (*phi32[ix]).s0); _su3_inverse_multiply(chi,(*U), psi); _complexcjg_times_vector(psi,ka2,chi); _vector_add_assign(rs.s0, psi); _vector_sub_assign(rs.s3, psi); _vector_assign(psi, (*phi32[ix]).s1); _su3_inverse_multiply(chi, (*U), psi); _complexcjg_times_vector(psi,ka2,chi); _vector_add_assign(rs.s1, psi); _vector_add_assign(rs.s2, psi); U++; ix++; /*********************** direction +3 ************************/ _vector_add_assign(rs.s0, (*phi32[ix]).s0); _vector_i_sub_assign(rs.s2, (*phi32[ix]).s0); _vector_add_assign(rs.s1, (*phi32[ix]).s1); _vector_i_add_assign(rs.s3, (*phi32[ix]).s1); ix++; /*********************** direction -3 ************************/ _vector_assign(psi, (*phi32[ix]).s0); _su3_inverse_multiply(chi,(*U), psi); _complexcjg_times_vector(psi,ka3,chi); _vector_add((*s).s0, rs.s0, psi); _vector_i_add((*s).s2, rs.s2, psi); _vector_assign(psi, (*phi32[ix]).s1); _su3_inverse_multiply(chi,(*U), psi); _complexcjg_times_vector(psi,ka3,chi); _vector_add((*s).s1, rs.s1, psi); _vector_i_sub((*s).s3, rs.s3, psi); U++; ix++; s++; } } else { phi = NBPointer[ieo]; /**************** loop over all lattice sites ****************/ ix=0; /* #pragma ivdep*/ for(i = 0; i < (VOLUME)/2; i++){ _vector_assign(rs.s0, (*s).s0); _vector_assign(rs.s1, (*s).s1); _vector_assign(rs.s2, (*s).s2); _vector_assign(rs.s3, (*s).s3); s++; /*********************** direction +0 ************************/ _vector_add(psi, rs.s0, rs.s2); _vector_add(psi2, rs.s1, rs.s3); _su3_multiply(chi,(*U),psi); _su3_multiply(chi2,(*U),psi2); _complex_times_vector((*phi[ix]).s0, ka0, chi); _complex_times_vector((*phi[ix]).s1, ka0, chi2); U++; ix++; /*********************** direction -0 ************************/ _vector_sub((*phi[ix]).s0, rs.s0, rs.s2); _vector_sub((*phi[ix]).s1, rs.s1, rs.s3); ix++; /*********************** direction +1 ************************/ _vector_i_add(psi, rs.s0, rs.s3); _vector_i_add(psi2, rs.s1, rs.s2); _su3_multiply(chi, (*U), psi); _su3_multiply(chi2, (*U), psi2); _complex_times_vector((*phi[ix]).s0, ka1, chi); _complex_times_vector((*phi[ix]).s1, ka1, chi2); U++; ix++; /*********************** direction -1 ************************/ _vector_i_sub((*phi[ix]).s0, rs.s0, rs.s3); _vector_i_sub((*phi[ix]).s1, rs.s1, rs.s2); ix++; /*********************** direction +2 ************************/ _vector_add(psi, rs.s0, rs.s3); _vector_sub(psi2, rs.s1, rs.s2); _su3_multiply(chi,(*U),psi); _su3_multiply(chi2,(*U),psi2); _complex_times_vector((*phi[ix]).s0, ka2, chi); _complex_times_vector((*phi[ix]).s1, ka2, chi2); U++; ix++; /*********************** direction -2 ************************/ _vector_sub((*phi[ix]).s0, rs.s0, rs.s3); _vector_add((*phi[ix]).s1, rs.s1, rs.s2); ix++; /*********************** direction +3 ************************/ _vector_i_add(psi, rs.s0, rs.s2); _vector_i_sub(psi2, rs.s1, rs.s3); _su3_multiply(chi, (*U), psi); _su3_multiply(chi2,(*U),psi2); _complex_times_vector((*phi[ix]).s0, ka3, chi); _complex_times_vector((*phi[ix]).s1, ka3, chi2); U++; ix++; /*********************** direction -3 ************************/ _vector_i_sub((*phi[ix]).s0, rs.s0, rs.s2); _vector_i_add((*phi[ix]).s1, rs.s1, rs.s3); ix++; /************************ end of loop ************************/ } # if (defined MPI && !defined _NO_COMM) xchange_halffield(); # endif s = l; phi = NBPointer[2 + ieo]; if(ieo == 0) { U = g_gauge_field_copy[1][0]; } else { U = g_gauge_field_copy[0][0]; } ix = 0; /* #pragma ivdep */ for(i = 0; i < (VOLUME)/2; i++){ /*********************** direction +0 ************************/ _vector_assign(rs.s0, (*phi[ix]).s0); _vector_assign(rs.s2, (*phi[ix]).s0); _vector_assign(rs.s1, (*phi[ix]).s1); _vector_assign(rs.s3, (*phi[ix]).s1); ix++; /*********************** direction -0 ************************/ _su3_inverse_multiply(chi,(*U),(*phi[ix]).s0); _su3_inverse_multiply(chi2,(*U),(*phi[ix]).s1); _complexcjg_times_vector(psi,ka0,chi); _complexcjg_times_vector(psi2,ka0,chi2); _vector_add_assign(rs.s0, psi); _vector_sub_assign(rs.s2, psi); _vector_add_assign(rs.s1, psi2); _vector_sub_assign(rs.s3, psi2); ix++; U++; /*********************** direction +1 ************************/ _vector_add_assign(rs.s0, (*phi[ix]).s0); _vector_i_sub_assign(rs.s3, (*phi[ix]).s0); _vector_add_assign(rs.s1, (*phi[ix]).s1); _vector_i_sub_assign(rs.s2, (*phi[ix]).s1); ix++; /*********************** direction -1 ************************/ _su3_inverse_multiply(chi,(*U), (*phi[ix]).s0); _su3_inverse_multiply(chi2, (*U), (*phi[ix]).s1); _complexcjg_times_vector(psi,ka1,chi); _complexcjg_times_vector(psi2,ka1,chi2); _vector_add_assign(rs.s0, psi); _vector_i_add_assign(rs.s3, psi); _vector_add_assign(rs.s1, psi2); _vector_i_add_assign(rs.s2, psi2); U++; ix++; /*********************** direction +2 ************************/ _vector_add_assign(rs.s0, (*phi[ix]).s0); _vector_add_assign(rs.s3, (*phi[ix]).s0); _vector_add_assign(rs.s1, (*phi[ix]).s1); _vector_sub_assign(rs.s2, (*phi[ix]).s1); ix++; /*********************** direction -2 ************************/ _su3_inverse_multiply(chi,(*U), (*phi[ix]).s0); _su3_inverse_multiply(chi2, (*U), (*phi[ix]).s1); _complexcjg_times_vector(psi,ka2,chi); _complexcjg_times_vector(psi2,ka2,chi2); _vector_add_assign(rs.s0, psi); _vector_sub_assign(rs.s3, psi); _vector_add_assign(rs.s1, psi2); _vector_add_assign(rs.s2, psi2); U++; ix++; /*********************** direction +3 ************************/ _vector_add_assign(rs.s0, (*phi[ix]).s0); _vector_i_sub_assign(rs.s2, (*phi[ix]).s0); _vector_add_assign(rs.s1, (*phi[ix]).s1); _vector_i_add_assign(rs.s3, (*phi[ix]).s1); ix++; /*********************** direction -3 ************************/ _su3_inverse_multiply(chi,(*U), (*phi[ix]).s0); _su3_inverse_multiply(chi2, (*U), (*phi[ix]).s1); _complexcjg_times_vector(psi,ka3,chi); _complexcjg_times_vector(psi2,ka3,chi2); _vector_add((*s).s0, rs.s0, psi); _vector_i_add((*s).s2, rs.s2, psi); _vector_add((*s).s1, rs.s1, psi2); _vector_i_sub((*s).s3, rs.s3, psi2); U++; ix++; s++; } } #ifdef _KOJAK_INST #pragma pomp inst end(hoppingmatrix) #endif }