Example #1
0
extern void jacobi_source(spinor_dble *sd, srcdef src, int idirac, int icolor) {

	int i = 0, ix = 0, iy = 0, n = 0, mu = 0, my_rank, ip1 = 0, ip2 = 0, iw = 0;
	su3_dble *up, *um;
	spinor_dble *chi, *psi, *phi, **wsd0, **wsd1, **wsd2, *p, *(*p0)[NSPIN];
	const spinor_dble sd0 = { { { 0.0 } } };
	double zw[18] = { 0 }, d;
	double norm = 1. / (1. + 6. * src.kappa);

	MPI_Status status1;
	MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);

	p0 = amalloc(3 * sizeof(*p0), 3);
	p = amalloc(3 * NSPIN * sizeof(spinor_dble), ALIGN);

	for (i = 0; i < 3; i++) {
		for (ix = 0; ix < NSPIN; ix++) {
			p0[i][ix] = p;
			*p = sd0;
			p += 1;
		}
	}

	wsd0 = &(p0[0][0]);
	wsd1 = &(p0[1][0]);
	wsd2 = &(p0[2][0]);

	message("sourceposition = (%d, %d, %d, %d)\n", src.pos[0], src.pos[1],
			src.pos[2], src.pos[3]);

	for (n = 0; n < src.n; n++) {
		if (n > 0) {

			for (ix = 0; ix < VOLUME; ix++)
				*wsd0[ix] = *wsd2[ix];

		}

		for (ix = 0; ix < VOLUME; ix++) {
			if (coords[ix].t == src.pos[0]) {

				phi = wsd1[ix];
				if (n == 0)
					*wsd2[ix] = sd[ix];
				psi = wsd2[ix];
				for (mu = 1; mu < 4; mu++) {

					iy = idn[ix][mu];
					if ((iy >= VOLUME) && (iy < (VOLUME + BNDRY))) {

						ip1 = npr[2 * mu];
						ip2 = npr[2 * mu + 1];
						iw = map[iy - VOLUME];

						if (n == 0)
							*wsd0[iw] = sd[iw];
						*phi = *wsd0[iw];
						chi = wsd1[iw];

						um = pud_sm1[iw][mu];

						MPI_Sendrecv((double*) (um), 18, MPI_DOUBLE, ip2, 37,
								zw, 18, MPI_DOUBLE, ip1, 37, MPI_COMM_WORLD,
								&status1);

						um = (su3_dble*) zw;
						MPI_Sendrecv((double*) (phi), 24, MPI_DOUBLE, ip2, 37,
								(double*) (chi), 24, MPI_DOUBLE, ip1, 37,
								MPI_COMM_WORLD, &status1);

					} else {
						um = pud_sm1[iy][mu];
						if (n == 0)
							*wsd0[iy] = sd[iy];
						chi = wsd0[iy];
					}
					_su3_inverse_multiply((*phi).c1, (*um), (*chi).c1);
					_su3_inverse_multiply((*phi).c2, (*um), (*chi).c2);
					_su3_inverse_multiply((*phi).c3, (*um), (*chi).c3);
					_su3_inverse_multiply((*phi).c4, (*um), (*chi).c4);
					_vector_add_assign((*psi).c1, (*phi).c1);
					_vector_add_assign((*psi).c2, (*phi).c2);
					_vector_add_assign((*psi).c3, (*phi).c3);
					_vector_add_assign((*psi).c4, (*phi).c4);

					iy = iup[ix][mu];

					if ((iy >= VOLUME) && (iy < (VOLUME + BNDRY))) {
						ip1 = npr[2 * mu];
						ip2 = npr[2 * mu + 1];
						iw = map[iy - VOLUME];

						if (n == 0)
							*wsd0[iw] = sd[iw];
						*phi = *wsd0[iw];
						chi = wsd1[iw];

						MPI_Sendrecv((double*) (phi), 24, MPI_DOUBLE, ip1, 37,
								(double*) (chi), 24, MPI_DOUBLE, ip2, 37,
								MPI_COMM_WORLD, &status1);
					} else {
						if (n == 0)
							*wsd0[iy] = sd[iy];
						chi = wsd0[iy];
					}

					up = pud_sm1[ix][mu];

					_su3_multiply((*phi).c1, (*up), (*chi).c1);
					_su3_multiply((*phi).c2, (*up), (*chi).c2);
					_su3_multiply((*phi).c3, (*up), (*chi).c3);
					_su3_multiply((*phi).c4, (*up), (*chi).c4);
					_vector_add_assign((*psi).c1, (*phi).c1);
					_vector_add_assign((*psi).c2, (*phi).c2);
					_vector_add_assign((*psi).c3, (*phi).c3);
					_vector_add_assign((*psi).c4, (*phi).c4);
				}

				_vector_mul((*psi).c1, src.kappa, (*psi).c1);
				_vector_mul((*psi).c2, src.kappa, (*psi).c2);
				_vector_mul((*psi).c3, src.kappa, (*psi).c3);
				_vector_mul((*psi).c4, src.kappa, (*psi).c4);

				if (n == 0)
					*wsd1[ix] = sd[ix];
				else
					*wsd1[ix] = *wsd0[ix];
				phi = wsd1[ix];
				_vector_add_assign((*psi).c1, (*phi).c1);
				_vector_add_assign((*psi).c2, (*phi).c2);
				_vector_add_assign((*psi).c3, (*phi).c3);
				_vector_add_assign((*psi).c4, (*phi).c4);

				_spinor_mul(*(psi),norm, (*psi));

			}
		}
		if(n == (src.n - 1)) {
			for(ix = 0; ix < VOLUME; ix++) {
				 sd[ix] = *wsd2[ix];
				 _spinor_mul(sd[ix],1./(sqrt((double)(src.n*VOLUME*NPROC))),sd[ix]);
			}
			d = norm_square_dble(VOLUME, 1, sd);
			message("norm of the jacobi-smeared vector %.2e\n", sqrt(d));
		}
	}

	SourceRadius(src, sd);
/*	SourceMom4(src, sd);
*/	afree(p);
	afree(p0[0][0]);
	afree(p0[0]);
	afree(p0);
	p0 = NULL;
}
Example #2
0
/* for ieo=0, k resides on  odd sites and l on even sites */
void Hopping_Matrix(int ieo, spinor * const l, spinor * const k){
  int ix,iy;
  int ioff,ioff2,icx,icy;
  su3 * restrict up, * restrict um;
  spinor * restrict r, * restrict sp, * restrict sm;
  spinor temp;

#ifdef _GAUGE_COPY
  if(g_update_gauge_copy) {
    update_backward_gauge();
  }
#endif

  /* for parallelization */
#    if (defined MPI && !(defined _NO_COMM))
  xchange_field(k, ieo);
#    endif

  if(k == l){
    printf("Error in H_psi (simple.c):\n");
    printf("Arguments k and l must be different\n");
    printf("Program aborted\n");
    exit(1);
  }
  if(ieo == 0){
    ioff = 0;
  } 
  else{
    ioff = (VOLUME+RAND)/2;
  } 
  ioff2 = (VOLUME+RAND)/2-ioff;
  /**************** loop over all lattice sites ****************/

  for (icx = ioff; icx < (VOLUME/2 + ioff); icx++){
    ix=g_eo2lexic[icx];

    r=l+(icx-ioff);

    /*********************** direction +0 ************************/
    iy=g_iup[ix][0]; icy=g_lexic2eosub[iy];


    sp=k+icy;
#    if ((defined _GAUGE_COPY))
    up=&g_gauge_field_copy[icx][0];
#    else
    up=&g_gauge_field[ix][0];
#    endif
      
    _vector_add(psi,(*sp).s0,(*sp).s2);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka0,chi);
      
    _vector_assign(temp.s0,psi);
    _vector_assign(temp.s2,psi);

    _vector_add(psi,(*sp).s1,(*sp).s3);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka0,chi);
            
    _vector_assign(temp.s1,psi);
    _vector_assign(temp.s3,psi);

    /*********************** direction -0 ************************/

    iy=g_idn[ix][0]; icy=g_lexic2eosub[iy];

    sm=k+icy;
#    if ((defined _GAUGE_COPY))
    um = up+1;
#    else
    um=&g_gauge_field[iy][0];
#    endif

    _vector_sub(psi,(*sm).s0,(*sm).s2);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka0,chi);

    _vector_add_assign(temp.s0,psi);
    _vector_sub_assign(temp.s2,psi);

    _vector_sub(psi,(*sm).s1,(*sm).s3);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka0,chi);
      
    _vector_add_assign(temp.s1,psi);
    _vector_sub_assign(temp.s3,psi);

    /*********************** direction +1 ************************/

    iy=g_iup[ix][1]; icy=g_lexic2eosub[iy];

    sp=k+icy;

#    if ((defined _GAUGE_COPY))
    up=um+1;
#    else
    up+=1;
#    endif
      
    _vector_i_add(psi,(*sp).s0,(*sp).s3);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka1,chi);

    _vector_add_assign(temp.s0,psi);
    _vector_i_sub_assign(temp.s3,psi);

    _vector_i_add(psi,(*sp).s1,(*sp).s2);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka1,chi);

    _vector_add_assign(temp.s1,psi);
    _vector_i_sub_assign(temp.s2,psi);

    /*********************** direction -1 ************************/

    iy=g_idn[ix][1]; icy=g_lexic2eosub[iy];

    sm=k+icy;
#    ifndef _GAUGE_COPY
    um=&g_gauge_field[iy][1];
#    else
    um=up+1;
#    endif

    _vector_i_sub(psi,(*sm).s0,(*sm).s3);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka1,chi);

    _vector_add_assign(temp.s0,psi);
    _vector_i_add_assign(temp.s3,psi);

    _vector_i_sub(psi,(*sm).s1,(*sm).s2);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka1,chi);

    _vector_add_assign(temp.s1,psi);
    _vector_i_add_assign(temp.s2,psi);

    /*********************** direction +2 ************************/

    iy=g_iup[ix][2]; icy=g_lexic2eosub[iy];

    sp=k+icy;
#    if ((defined _GAUGE_COPY))
    up=um+1;
#    else
    up+=1;
#    endif 
    _vector_add(psi,(*sp).s0,(*sp).s3);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka2,chi);

    _vector_add_assign(temp.s0,psi);
    _vector_add_assign(temp.s3,psi);

    _vector_sub(psi,(*sp).s1,(*sp).s2);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka2,chi);
      
    _vector_add_assign(temp.s1,psi);
    _vector_sub_assign(temp.s2,psi);


    /*********************** direction -2 ************************/

    iy=g_idn[ix][2]; icy=g_lexic2eosub[iy];

    sm=k+icy;
#    ifndef _GAUGE_COPY
    um = &g_gauge_field[iy][2];
#    else
    um = up +1;
#    endif

    _vector_sub(psi,(*sm).s0,(*sm).s3);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka2,chi);

    _vector_add_assign(temp.s0,psi);
    _vector_sub_assign(temp.s3,psi);

    _vector_add(psi,(*sm).s1,(*sm).s2);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka2,chi);
      
    _vector_add_assign(temp.s1,psi);
    _vector_add_assign(temp.s2,psi);

    /*********************** direction +3 ************************/

    iy=g_iup[ix][3]; icy=g_lexic2eosub[iy];

    sp=k+icy;
#    if ((defined _GAUGE_COPY))
    up=um+1;
#    else
    up+=1;
#    endif 
    _vector_i_add(psi,(*sp).s0,(*sp).s2);
      
    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka3,chi);

    _vector_add_assign(temp.s0,psi);
    _vector_i_sub_assign(temp.s2,psi);

    _vector_i_sub(psi,(*sp).s1,(*sp).s3);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka3,chi);

    _vector_add_assign(temp.s1,psi);
    _vector_i_add_assign(temp.s3,psi);

    /*********************** direction -3 ************************/

    iy=g_idn[ix][3]; icy=g_lexic2eosub[iy];

    sm=k+icy;
#    ifndef _GAUGE_COPY
    um = &g_gauge_field[iy][3];
#    else
    um = up+1;
#    endif

    _vector_i_sub(psi,(*sm).s0,(*sm).s2);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka3,chi);
      
    _vector_add((*r).s0, temp.s0, psi);
    _vector_i_add((*r).s2, temp.s2, psi);

    _vector_i_add(psi,(*sm).s1,(*sm).s3);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka3,chi);

    _vector_add((*r).s1, temp.s1, psi);
    _vector_i_sub((*r).s3, temp.s3, psi);
    /************************ end of loop ************************/
  }
}
Example #3
0
/* for ieo=0, k resides on  odd sites and l on even sites */
void Hopping_Matrix(const int ieo, spinor * const l, spinor * const k){
  int i,ix;
  su3 * restrict U ALIGN;
  spinor * restrict s ALIGN;
  spinor rs;
  static su3_vector psi, chi, psi2, chi2;
  halfspinor * restrict * phi ALIGN;
  halfspinor32 * restrict * phi32 ALIGN;
#ifdef _KOJAK_INST
#pragma pomp inst begin(hoppingmatrix)
#endif
#ifdef XLC
#pragma disjoint(*l, *k, *U, *s)
#endif

#ifdef _GAUGE_COPY
  if(g_update_gauge_copy) {
    update_backward_gauge();
  }
#endif

  if(k == l){
    printf("Error in H_psi (simple.c):\n");
    printf("Arguments k and l must be different\n");
    printf("Program aborted\n");
    exit(1);
  }
  s = k;

  if(ieo == 0) {
    U = g_gauge_field_copy[0][0];
  }
  else {
    U = g_gauge_field_copy[1][0];
  }
  if(g_sloppy_precision == 1 && g_sloppy_precision_flag == 1) {
    phi32 = NBPointer32[ieo];
      
    /**************** loop over all lattice sites ****************/
    ix=0;
    for(i = 0; i < (VOLUME)/2; i++){
      _vector_assign(rs.s0, (*s).s0);
      _vector_assign(rs.s1, (*s).s1);
      _vector_assign(rs.s2, (*s).s2);
      _vector_assign(rs.s3, (*s).s3);
      s++;
      /*********************** direction +0 ************************/
      
      _vector_add(psi, rs.s0, rs.s2);

      _su3_multiply(chi,(*U),psi);
      _complex_times_vector((*phi32[ix]).s0, ka0, chi);
      
      _vector_add(psi, rs.s1, rs.s3);

      _su3_multiply(chi,(*U),psi);
      _complex_times_vector((*phi32[ix]).s1, ka0, chi);
            
      U++;
      ix++;
    
      /*********************** direction -0 ************************/

      _vector_sub((*phi32[ix]).s0, rs.s0, rs.s2);
      _vector_sub((*phi32[ix]).s1, rs.s1, rs.s3);

      ix++;

      /*********************** direction +1 ************************/

      _vector_i_add(psi, rs.s0, rs.s3);

      _su3_multiply(chi, (*U), psi);
      _complex_times_vector((*phi32[ix]).s0, ka1, chi);

      _vector_i_add(psi, rs.s1, rs.s2);

      _su3_multiply(chi, (*U), psi);
      _complex_times_vector((*phi32[ix]).s1, ka1, chi);

      U++;
      ix++;

      /*********************** direction -1 ************************/

      _vector_i_sub((*phi32[ix]).s0, rs.s0, rs.s3);
      _vector_i_sub((*phi32[ix]).s1, rs.s1, rs.s2);

      ix++;
      /*********************** direction +2 ************************/

      _vector_add(psi, rs.s0, rs.s3);

      _su3_multiply(chi,(*U),psi);
      _complex_times_vector((*phi32[ix]).s0, ka2, chi);

      _vector_sub(psi, rs.s1, rs.s2);

      _su3_multiply(chi,(*U),psi);
      _complex_times_vector((*phi32[ix]).s1, ka2, chi);
      
      U++;
      ix++;

      /*********************** direction -2 ************************/

      _vector_sub((*phi32[ix]).s0, rs.s0, rs.s3);
      _vector_add((*phi32[ix]).s1, rs.s1, rs.s2);
      ix++;

      /*********************** direction +3 ************************/

      _vector_i_add(psi, rs.s0, rs.s2);
      
      _su3_multiply(chi, (*U), psi);
      _complex_times_vector((*phi32[ix]).s0, ka3, chi);


      _vector_i_sub(psi, rs.s1, rs.s3);

      _su3_multiply(chi,(*U),psi);
      _complex_times_vector((*phi32[ix]).s1, ka3, chi);

      U++;
      ix++;
      /*********************** direction -3 ************************/

      _vector_i_sub((*phi32[ix]).s0, rs.s0, rs.s2);
      _vector_i_add((*phi32[ix]).s1, rs.s1, rs.s3);

      ix++;
      /************************ end of loop ************************/
    }
#    if (defined MPI && !defined _NO_COMM)
    xchange_halffield32(); 
#    endif
    s = l;
    phi32 = NBPointer32[2 + ieo];
    if(ieo == 0) {
      U = g_gauge_field_copy[1][0];
    }
    else {
      U = g_gauge_field_copy[0][0];
    }

    ix = 0;
    for(i = 0; i < (VOLUME)/2; i++){
      /*********************** direction +0 ************************/
      _vector_assign(rs.s0, (*phi32[ix]).s0);
      _vector_assign(rs.s2, (*phi32[ix]).s0);
      _vector_assign(rs.s1, (*phi32[ix]).s1);
      _vector_assign(rs.s3, (*phi32[ix]).s1);
      ix++;
      /*********************** direction -0 ************************/
      _vector_assign(psi, (*phi32[ix]).s0);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka0,chi);

      _vector_add_assign(rs.s0, psi);
      _vector_sub_assign(rs.s2, psi);

      _vector_assign(psi, (*phi32[ix]).s1);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka0,chi);
      
      _vector_add_assign(rs.s1, psi);
      _vector_sub_assign(rs.s3, psi);
      ix++;
      U++;
      /*********************** direction +1 ************************/

      _vector_add_assign(rs.s0, (*phi32[ix]).s0);
      _vector_i_sub_assign(rs.s3, (*phi32[ix]).s0);

      _vector_add_assign(rs.s1, (*phi32[ix]).s1);
      _vector_i_sub_assign(rs.s2, (*phi32[ix]).s1);
    
      ix++;
      /*********************** direction -1 ************************/
      _vector_assign(psi, (*phi32[ix]).s0);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka1,chi);

      _vector_add_assign(rs.s0, psi);
      _vector_i_add_assign(rs.s3, psi);

      _vector_assign(psi, (*phi32[ix]).s1);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka1,chi);

      _vector_add_assign(rs.s1, psi);
      _vector_i_add_assign(rs.s2, psi);

      U++;
      ix++;

      /*********************** direction +2 ************************/

      _vector_add_assign(rs.s0, (*phi32[ix]).s0);
      _vector_add_assign(rs.s3, (*phi32[ix]).s0);

      _vector_add_assign(rs.s1, (*phi32[ix]).s1);
      _vector_sub_assign(rs.s2, (*phi32[ix]).s1);
    
      ix++;
      /*********************** direction -2 ************************/

      _vector_assign(psi, (*phi32[ix]).s0);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka2,chi);

      _vector_add_assign(rs.s0, psi);
      _vector_sub_assign(rs.s3, psi);

      _vector_assign(psi, (*phi32[ix]).s1);
      _su3_inverse_multiply(chi, (*U), psi);
      _complexcjg_times_vector(psi,ka2,chi);
      
      _vector_add_assign(rs.s1, psi);
      _vector_add_assign(rs.s2, psi);

      U++;
      ix++;
      /*********************** direction +3 ************************/

      _vector_add_assign(rs.s0, (*phi32[ix]).s0);
      _vector_i_sub_assign(rs.s2, (*phi32[ix]).s0);

      _vector_add_assign(rs.s1, (*phi32[ix]).s1);
      _vector_i_add_assign(rs.s3, (*phi32[ix]).s1);

      ix++;

      /*********************** direction -3 ************************/

      _vector_assign(psi, (*phi32[ix]).s0);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka3,chi);
      
      _vector_add((*s).s0, rs.s0, psi);
      _vector_i_add((*s).s2, rs.s2, psi);

      _vector_assign(psi, (*phi32[ix]).s1);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka3,chi);

      _vector_add((*s).s1, rs.s1, psi);
      _vector_i_sub((*s).s3, rs.s3, psi);

      U++;
      ix++;
      s++;
    }
  }
  else {
    phi = NBPointer[ieo];
      
    /**************** loop over all lattice sites ****************/
    ix=0;
    /* #pragma ivdep*/
    for(i = 0; i < (VOLUME)/2; i++){
      _vector_assign(rs.s0, (*s).s0);
      _vector_assign(rs.s1, (*s).s1);
      _vector_assign(rs.s2, (*s).s2);
      _vector_assign(rs.s3, (*s).s3);
      s++;
      /*********************** direction +0 ************************/
      
      _vector_add(psi, rs.s0, rs.s2);
      _vector_add(psi2, rs.s1, rs.s3);
      _su3_multiply(chi,(*U),psi);
      _su3_multiply(chi2,(*U),psi2);
      _complex_times_vector((*phi[ix]).s0, ka0, chi);
      _complex_times_vector((*phi[ix]).s1, ka0, chi2);
            
      U++;
      ix++;
    
      /*********************** direction -0 ************************/

      _vector_sub((*phi[ix]).s0, rs.s0, rs.s2);
      _vector_sub((*phi[ix]).s1, rs.s1, rs.s3);

      ix++;

      /*********************** direction +1 ************************/

      _vector_i_add(psi, rs.s0, rs.s3);
      _vector_i_add(psi2, rs.s1, rs.s2);
      _su3_multiply(chi, (*U), psi);
      _su3_multiply(chi2, (*U), psi2);
      _complex_times_vector((*phi[ix]).s0, ka1, chi);
      _complex_times_vector((*phi[ix]).s1, ka1, chi2);

      U++;
      ix++;

      /*********************** direction -1 ************************/

      _vector_i_sub((*phi[ix]).s0, rs.s0, rs.s3);
      _vector_i_sub((*phi[ix]).s1, rs.s1, rs.s2);

      ix++;
      /*********************** direction +2 ************************/

      _vector_add(psi, rs.s0, rs.s3);
      _vector_sub(psi2, rs.s1, rs.s2);
      _su3_multiply(chi,(*U),psi);
      _su3_multiply(chi2,(*U),psi2);
      _complex_times_vector((*phi[ix]).s0, ka2, chi);
      _complex_times_vector((*phi[ix]).s1, ka2, chi2);
      
      U++;
      ix++;

      /*********************** direction -2 ************************/

      _vector_sub((*phi[ix]).s0, rs.s0, rs.s3);
      _vector_add((*phi[ix]).s1, rs.s1, rs.s2);
      ix++;

      /*********************** direction +3 ************************/

      _vector_i_add(psi, rs.s0, rs.s2);
      _vector_i_sub(psi2, rs.s1, rs.s3);      
      _su3_multiply(chi, (*U), psi);
      _su3_multiply(chi2,(*U),psi2);
      _complex_times_vector((*phi[ix]).s0, ka3, chi);
      _complex_times_vector((*phi[ix]).s1, ka3, chi2);

      U++;
      ix++;
      /*********************** direction -3 ************************/

      _vector_i_sub((*phi[ix]).s0, rs.s0, rs.s2);
      _vector_i_add((*phi[ix]).s1, rs.s1, rs.s3);

      ix++;
      /************************ end of loop ************************/
    }
#    if (defined MPI && !defined _NO_COMM)
    xchange_halffield(); 
#    endif
    s = l;
    phi = NBPointer[2 + ieo];
    if(ieo == 0) {
      U = g_gauge_field_copy[1][0];
    }
    else {
      U = g_gauge_field_copy[0][0];
    }

    ix = 0;
    /* #pragma ivdep */
    for(i = 0; i < (VOLUME)/2; i++){
      /*********************** direction +0 ************************/
      _vector_assign(rs.s0, (*phi[ix]).s0);
      _vector_assign(rs.s2, (*phi[ix]).s0);
      _vector_assign(rs.s1, (*phi[ix]).s1);
      _vector_assign(rs.s3, (*phi[ix]).s1);
      ix++;
      /*********************** direction -0 ************************/
      _su3_inverse_multiply(chi,(*U),(*phi[ix]).s0);
      _su3_inverse_multiply(chi2,(*U),(*phi[ix]).s1);
      _complexcjg_times_vector(psi,ka0,chi);
      _complexcjg_times_vector(psi2,ka0,chi2);
      _vector_add_assign(rs.s0, psi);
      _vector_sub_assign(rs.s2, psi);
      _vector_add_assign(rs.s1, psi2);
      _vector_sub_assign(rs.s3, psi2);
      ix++;
      U++;
      /*********************** direction +1 ************************/

      _vector_add_assign(rs.s0, (*phi[ix]).s0);
      _vector_i_sub_assign(rs.s3, (*phi[ix]).s0);

      _vector_add_assign(rs.s1, (*phi[ix]).s1);
      _vector_i_sub_assign(rs.s2, (*phi[ix]).s1);
    
      ix++;
      /*********************** direction -1 ************************/

      _su3_inverse_multiply(chi,(*U), (*phi[ix]).s0);
      _su3_inverse_multiply(chi2, (*U), (*phi[ix]).s1);
      _complexcjg_times_vector(psi,ka1,chi);
      _complexcjg_times_vector(psi2,ka1,chi2);
      _vector_add_assign(rs.s0, psi);
      _vector_i_add_assign(rs.s3, psi);
      _vector_add_assign(rs.s1, psi2);
      _vector_i_add_assign(rs.s2, psi2);

      U++;
      ix++;

      /*********************** direction +2 ************************/

      _vector_add_assign(rs.s0, (*phi[ix]).s0);
      _vector_add_assign(rs.s3, (*phi[ix]).s0);

      _vector_add_assign(rs.s1, (*phi[ix]).s1);
      _vector_sub_assign(rs.s2, (*phi[ix]).s1);
    
      ix++;
      /*********************** direction -2 ************************/

      _su3_inverse_multiply(chi,(*U), (*phi[ix]).s0);
      _su3_inverse_multiply(chi2, (*U), (*phi[ix]).s1);
      _complexcjg_times_vector(psi,ka2,chi);
      _complexcjg_times_vector(psi2,ka2,chi2);
      _vector_add_assign(rs.s0, psi);
      _vector_sub_assign(rs.s3, psi);
      _vector_add_assign(rs.s1, psi2);
      _vector_add_assign(rs.s2, psi2);

      U++;
      ix++;
      /*********************** direction +3 ************************/

      _vector_add_assign(rs.s0, (*phi[ix]).s0);
      _vector_i_sub_assign(rs.s2, (*phi[ix]).s0);

      _vector_add_assign(rs.s1, (*phi[ix]).s1);
      _vector_i_add_assign(rs.s3, (*phi[ix]).s1);

      ix++;

      /*********************** direction -3 ************************/

      _su3_inverse_multiply(chi,(*U), (*phi[ix]).s0);
      _su3_inverse_multiply(chi2, (*U), (*phi[ix]).s1);
      _complexcjg_times_vector(psi,ka3,chi);
      _complexcjg_times_vector(psi2,ka3,chi2);      
      _vector_add((*s).s0, rs.s0, psi);
      _vector_i_add((*s).s2, rs.s2, psi);
      _vector_add((*s).s1, rs.s1, psi2);
      _vector_i_sub((*s).s3, rs.s3, psi2);

      U++;
      ix++;
      s++;
    }
  }
#ifdef _KOJAK_INST
#pragma pomp inst end(hoppingmatrix)
#endif
}