Exemplo n.º 1
0
void mul_one_pm_imu_inv(spinor * const l, const double _sign, const int N){
#ifdef OMP
#pragma omp parallel
  {
#endif
  _Complex double ALIGN z,w;
  int ix;
  double sign=-1.; 
  spinor *r;

  su3_vector ALIGN phi1;

  double ALIGN nrm = 1./(1.+g_mu*g_mu);

  if(_sign < 0.){
    sign = 1.; 
  }

  z = nrm + (sign * nrm * g_mu) * I;
  w = conj(z);
  /************ loop over all lattice sites ************/
#ifdef OMP
#pragma omp for
#endif
  for(ix = 0; ix < N; ix++){
    r=l + ix;
    /* Multiply the spinorfield with the inverse of 1+imu\gamma_5 */
#if ( defined SSE2 || defined SSE3 )
    _prefetch_spinor((r+predist)); 
    _sse_load_up(r->s0);
    _sse_vector_cmplx_mul(z);
    _sse_store_nt_up(r->s0);
    _sse_load_up(r->s1);
    _sse_vector_cmplx_mul_two();
    _sse_store_nt_up(r->s1);
    _sse_load_up(r->s2);
    _sse_vector_cmplx_mul(w);
    _sse_store_nt_up(r->s2);
    _sse_load_up(r->s3);
    _sse_vector_cmplx_mul_two();
    _sse_store_nt_up(r->s3);
#else
    _complex_times_vector(phi1, z, r->s0);
    _vector_assign(r->s0, phi1);
    _complex_times_vector(phi1, z, r->s1);
    _vector_assign(r->s1, phi1);
    _complex_times_vector(phi1, w, r->s2);
    _vector_assign(r->s2, phi1);
    _complex_times_vector(phi1, w, r->s3);
    _vector_assign(r->s3, phi1);
#endif
  }

#ifdef OMP
  } /* OpenMP closing brace */
#endif

}
Exemplo n.º 2
0
void apply_gtrafo_spinor_even (spinor * spin, su3 * trafofield) {

  int it, iz, iy, ix;
  int pos;
  int evenpos;
  
  spinor temp;
  
  if (g_proc_id == 0) {
    printf("Applying  gauge transformation to even spinor...");
  }
  
  for (it = 0; it < T; it++) {
    for (ix = 0; ix < LX; ix++) { 
      for (iy = 0; iy < LY; iy++) {
        for (iz = 0; iz < LZ; iz++) {
         
          if ((it + ix + iy + iz) % 2 == 0) {
          
            /* even positions */
            pos = g_ipt[it][ix][iy][iz];
            evenpos = g_lexic2eosub[ pos  ];

            _su3_multiply(temp.s0, trafofield[pos], spin[evenpos].s0);
            _su3_multiply(temp.s1, trafofield[pos], spin[evenpos].s1);
            _su3_multiply(temp.s2, trafofield[pos], spin[evenpos].s2);
            _su3_multiply(temp.s3, trafofield[pos], spin[evenpos].s3);
            
            _vector_assign(spin[evenpos].s0, temp.s0);
            _vector_assign(spin[evenpos].s1, temp.s1);
            _vector_assign(spin[evenpos].s2, temp.s2);
            _vector_assign(spin[evenpos].s3, temp.s3);
            
          }
        }
      }
    } 
  }
  
  if (g_proc_id == 0) {
    printf("done\n");
  }
  
}
Exemplo n.º 3
0
/* direction +t */
void boundary_D_0(spinor * const r, spinor * const s, su3 * const u) {
  static su3_vector chi, psi;

  _vector_add(psi,s->s0,s->s2);

  _su3_multiply(chi,(*u),psi);

  _complex_times_vector(r->s0, phase_0, chi);
  _vector_assign(r->s2,r->s0);

  _vector_add(psi,s->s1,s->s3);

  _su3_multiply(chi,(*u),psi);

  _complex_times_vector(r->s1, phase_0, chi);
  _vector_assign(r->s3, r->s1);

  return;
}
Exemplo n.º 4
0
void mul_one_pm_imu(spinor * const l, const double _sign){
#ifdef OMP
#pragma omp parallel
  {
#endif
  _Complex double z,w;
  int ix;
  double sign = 1.; 
  spinor *r;

  su3_vector ALIGN phi1;

  if(_sign < 0.){
    sign = -1.; 
  }

  z = 1. + (sign * g_mu) * I;
  w = conj(z);

  /************ loop over all lattice sites ************/
#ifdef OMP
#pragma omp for
#endif
  for(ix = 0; ix < (VOLUME/2); ix++){
    r=l+ix;
    /* Multiply the spinorfield with 1+imu\gamma_5 */
    _complex_times_vector(phi1, z, r->s0);
    _vector_assign(r->s0, phi1);
    _complex_times_vector(phi1, z, r->s1);
    _vector_assign(r->s1, phi1);
    _complex_times_vector(phi1, w, r->s2);
    _vector_assign(r->s2, phi1);
    _complex_times_vector(phi1, w, r->s3);
    _vector_assign(r->s3, phi1);
  }

#ifdef OMP
  } /* OpenMP closing brace */
#endif

}
Exemplo n.º 5
0
void apply_inv_gtrafo_spinor (spinor * spin, su3 * trafofield) {

 int it, iz, iy, ix;
 int pos;
  
 spinor temp;
 
 if(g_proc_id == 0) {
   printf("Applying INVERSE gauge transformation to spinor...");
 }
 
 for (it = 0; it < T; it++) {
  for (ix = 0; ix < LX; ix++) {
    for (iy = 0; iy < LY; iy++) {
      for (iz = 0; iz < LZ; iz++) {
      
        pos = g_ipt[it][ix][iy][iz];
        
        _su3_inverse_multiply(temp.s0, trafofield[pos], spin[pos].s0);
        _su3_inverse_multiply(temp.s1, trafofield[pos], spin[pos].s1);
        _su3_inverse_multiply(temp.s2, trafofield[pos], spin[pos].s2);
        _su3_inverse_multiply(temp.s3, trafofield[pos], spin[pos].s3);
        
        _vector_assign(spin[pos].s0,temp.s0);
        _vector_assign(spin[pos].s1,temp.s1);
        _vector_assign(spin[pos].s2,temp.s2);
        _vector_assign(spin[pos].s3,temp.s3);
      
        }
      }
    } 
  }
  
  if (g_proc_id == 0) {
    printf("done\n");
  }
  
}
Exemplo n.º 6
0
int vector_assign(Vector* vector, size_t index, void* element) {
    assert(vector != NULL);
    assert(element != NULL);
    assert(index < vector->size);

    if (vector == NULL) return VECTOR_ERROR;
    if (element == NULL) return VECTOR_ERROR;
    if (vector->element_size == 0) return VECTOR_ERROR;
    if (index >= vector->size) return VECTOR_ERROR;

    _vector_assign(vector, index, element);

    return VECTOR_SUCCESS;
}
Exemplo n.º 7
0
/* Insertion */
int vector_push_back(Vector* vector, void* element) {
    assert(vector != NULL);
    assert(element != NULL);

    if (_vector_should_grow(vector)) {
        if (_vector_adjust_capacity(vector) == VECTOR_ERROR) {
            return VECTOR_ERROR;
        }
    }

    _vector_assign(vector, vector->size, element);

    ++vector->size;

    return VECTOR_SUCCESS;
}
Exemplo n.º 8
0
/* for ieo=0, k resides on  odd sites and l on even sites */
void Hopping_Matrix(int ieo, spinor * const l, spinor * const k){
  int ix,iy;
  int ioff,ioff2,icx,icy;
  su3 * restrict up, * restrict um;
  spinor * restrict r, * restrict sp, * restrict sm;
  spinor temp;

#ifdef _GAUGE_COPY
  if(g_update_gauge_copy) {
    update_backward_gauge();
  }
#endif

  /* for parallelization */
#    if (defined MPI && !(defined _NO_COMM))
  xchange_field(k, ieo);
#    endif

  if(k == l){
    printf("Error in H_psi (simple.c):\n");
    printf("Arguments k and l must be different\n");
    printf("Program aborted\n");
    exit(1);
  }
  if(ieo == 0){
    ioff = 0;
  } 
  else{
    ioff = (VOLUME+RAND)/2;
  } 
  ioff2 = (VOLUME+RAND)/2-ioff;
  /**************** loop over all lattice sites ****************/

  for (icx = ioff; icx < (VOLUME/2 + ioff); icx++){
    ix=g_eo2lexic[icx];

    r=l+(icx-ioff);

    /*********************** direction +0 ************************/
    iy=g_iup[ix][0]; icy=g_lexic2eosub[iy];


    sp=k+icy;
#    if ((defined _GAUGE_COPY))
    up=&g_gauge_field_copy[icx][0];
#    else
    up=&g_gauge_field[ix][0];
#    endif
      
    _vector_add(psi,(*sp).s0,(*sp).s2);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka0,chi);
      
    _vector_assign(temp.s0,psi);
    _vector_assign(temp.s2,psi);

    _vector_add(psi,(*sp).s1,(*sp).s3);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka0,chi);
            
    _vector_assign(temp.s1,psi);
    _vector_assign(temp.s3,psi);

    /*********************** direction -0 ************************/

    iy=g_idn[ix][0]; icy=g_lexic2eosub[iy];

    sm=k+icy;
#    if ((defined _GAUGE_COPY))
    um = up+1;
#    else
    um=&g_gauge_field[iy][0];
#    endif

    _vector_sub(psi,(*sm).s0,(*sm).s2);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka0,chi);

    _vector_add_assign(temp.s0,psi);
    _vector_sub_assign(temp.s2,psi);

    _vector_sub(psi,(*sm).s1,(*sm).s3);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka0,chi);
      
    _vector_add_assign(temp.s1,psi);
    _vector_sub_assign(temp.s3,psi);

    /*********************** direction +1 ************************/

    iy=g_iup[ix][1]; icy=g_lexic2eosub[iy];

    sp=k+icy;

#    if ((defined _GAUGE_COPY))
    up=um+1;
#    else
    up+=1;
#    endif
      
    _vector_i_add(psi,(*sp).s0,(*sp).s3);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka1,chi);

    _vector_add_assign(temp.s0,psi);
    _vector_i_sub_assign(temp.s3,psi);

    _vector_i_add(psi,(*sp).s1,(*sp).s2);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka1,chi);

    _vector_add_assign(temp.s1,psi);
    _vector_i_sub_assign(temp.s2,psi);

    /*********************** direction -1 ************************/

    iy=g_idn[ix][1]; icy=g_lexic2eosub[iy];

    sm=k+icy;
#    ifndef _GAUGE_COPY
    um=&g_gauge_field[iy][1];
#    else
    um=up+1;
#    endif

    _vector_i_sub(psi,(*sm).s0,(*sm).s3);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka1,chi);

    _vector_add_assign(temp.s0,psi);
    _vector_i_add_assign(temp.s3,psi);

    _vector_i_sub(psi,(*sm).s1,(*sm).s2);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka1,chi);

    _vector_add_assign(temp.s1,psi);
    _vector_i_add_assign(temp.s2,psi);

    /*********************** direction +2 ************************/

    iy=g_iup[ix][2]; icy=g_lexic2eosub[iy];

    sp=k+icy;
#    if ((defined _GAUGE_COPY))
    up=um+1;
#    else
    up+=1;
#    endif 
    _vector_add(psi,(*sp).s0,(*sp).s3);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka2,chi);

    _vector_add_assign(temp.s0,psi);
    _vector_add_assign(temp.s3,psi);

    _vector_sub(psi,(*sp).s1,(*sp).s2);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka2,chi);
      
    _vector_add_assign(temp.s1,psi);
    _vector_sub_assign(temp.s2,psi);


    /*********************** direction -2 ************************/

    iy=g_idn[ix][2]; icy=g_lexic2eosub[iy];

    sm=k+icy;
#    ifndef _GAUGE_COPY
    um = &g_gauge_field[iy][2];
#    else
    um = up +1;
#    endif

    _vector_sub(psi,(*sm).s0,(*sm).s3);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka2,chi);

    _vector_add_assign(temp.s0,psi);
    _vector_sub_assign(temp.s3,psi);

    _vector_add(psi,(*sm).s1,(*sm).s2);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka2,chi);
      
    _vector_add_assign(temp.s1,psi);
    _vector_add_assign(temp.s2,psi);

    /*********************** direction +3 ************************/

    iy=g_iup[ix][3]; icy=g_lexic2eosub[iy];

    sp=k+icy;
#    if ((defined _GAUGE_COPY))
    up=um+1;
#    else
    up+=1;
#    endif 
    _vector_i_add(psi,(*sp).s0,(*sp).s2);
      
    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka3,chi);

    _vector_add_assign(temp.s0,psi);
    _vector_i_sub_assign(temp.s2,psi);

    _vector_i_sub(psi,(*sp).s1,(*sp).s3);

    _su3_multiply(chi,(*up),psi);
    _complex_times_vector(psi,ka3,chi);

    _vector_add_assign(temp.s1,psi);
    _vector_i_add_assign(temp.s3,psi);

    /*********************** direction -3 ************************/

    iy=g_idn[ix][3]; icy=g_lexic2eosub[iy];

    sm=k+icy;
#    ifndef _GAUGE_COPY
    um = &g_gauge_field[iy][3];
#    else
    um = up+1;
#    endif

    _vector_i_sub(psi,(*sm).s0,(*sm).s2);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka3,chi);
      
    _vector_add((*r).s0, temp.s0, psi);
    _vector_i_add((*r).s2, temp.s2, psi);

    _vector_i_add(psi,(*sm).s1,(*sm).s3);

    _su3_inverse_multiply(chi,(*um),psi);
    _complexcjg_times_vector(psi,ka3,chi);

    _vector_add((*r).s1, temp.s1, psi);
    _vector_i_sub((*r).s3, temp.s3, psi);
    /************************ end of loop ************************/
  }
}
Exemplo n.º 9
0
void Hopping_Matrix(const int ieo, spinor * const l, spinor * const k){
  int ix, i;
  su3 * restrict U ALIGN;
  static spinor rs;
  spinor * restrict s ALIGN;
  halfspinor ** phi ALIGN;
#if defined OPTERON
  const int predist=2;
#else
  const int predist=1;
#endif
#ifdef _KOJAK_INST
#pragma pomp inst begin(hoppingmatrix)
#endif

#ifdef _GAUGE_COPY
  if(g_update_gauge_copy) {
    update_backward_gauge();
  }
#endif
  /* We will run through the source vector now */
  /* instead of the solution vector            */
  s = k;
  _prefetch_spinor(s);

  if(ieo == 0) {
    U = g_gauge_field_copy[0][0];
  }
  else {
    U = g_gauge_field_copy[1][0];
  }
  phi = NBPointer[ieo];

  _prefetch_su3(U);
  /**************** loop over all lattice sites ******************/
  ix=0;
  for(i = 0; i < (VOLUME)/2; i++){

    /*********************** direction +0 ************************/
    _prefetch_su3(U+predist);

    _sse_load((*s).s0);
    _sse_load_up((*s).s2);
    _sse_vector_add();

    _sse_su3_multiply((*U));
    _sse_vector_cmplx_mul(ka0);
    _sse_store_nt_up((*phi[ix]).s0);

    _sse_load((*s).s1);
    _sse_load_up((*s).s3);
    _sse_vector_add();
      
    _sse_su3_multiply((*U));
    _sse_vector_cmplx_mul(ka0);
    _sse_store_nt_up((*phi[ix]).s1);
    U++;
    ix++;
    /*********************** direction -0 ************************/
    _sse_load((*s).s0);
    _sse_load_up((*s).s2);
    _sse_vector_sub();
    _sse_store_nt((*phi[ix]).s0);

    _sse_load((*s).s1);
    _sse_load_up((*s).s3);
    _sse_vector_sub();
    _sse_store_nt((*phi[ix]).s1);
    ix++;

    /*********************** direction +1 ************************/
    _prefetch_su3(U+predist);

    _sse_load((*s).s0);
    /*next not needed?*/
    _sse_load_up((*s).s3);
    _sse_vector_i_mul();
    _sse_vector_add();

    _sse_su3_multiply((*U));
    _sse_vector_cmplx_mul(ka1);
    _sse_store_nt_up((*phi[ix]).s0);

    _sse_load((*s).s1);
    _sse_load_up((*s).s2);
    _sse_vector_i_mul();
    _sse_vector_add();

    _sse_su3_multiply((*U));
    _sse_vector_cmplx_mul(ka1);
    _sse_store_nt_up((*phi[ix]).s1);
    ix++;
    U++;

    /*********************** direction -1 ************************/
    _sse_load((*s).s0);
    _sse_load_up((*s).s3);
    _sse_vector_i_mul();
    _sse_vector_sub();
    _sse_store_nt((*phi[ix]).s0);

    _sse_load((*s).s1);
    _sse_load_up((*s).s2);
    _sse_vector_i_mul();
    _sse_vector_sub();
    _sse_store_nt((*phi[ix]).s1);
    ix++;

    /*********************** direction +2 ************************/
    _prefetch_su3(U+predist);

    _sse_load((*s).s0);
    _sse_load_up((*s).s3);
    _sse_vector_add();

    _sse_su3_multiply((*U));
    _sse_vector_cmplx_mul(ka2);
    _sse_store_nt_up((*phi[ix]).s0);

    _sse_load((*s).s1);
    _sse_load_up((*s).s2);
    _sse_vector_sub();

    _sse_su3_multiply((*U));
    _sse_vector_cmplx_mul(ka2);
    _sse_store_nt_up((*phi[ix]).s1);
    ix++;
    U++;
    /*********************** direction -2 ************************/
    _sse_load((*s).s0);
    _sse_load_up((*s).s3);
    _sse_vector_sub();
    _sse_store_nt((*phi[ix]).s0);

    _sse_load((*s).s1);
    _sse_load_up((*s).s2);
    _sse_vector_add();
    _sse_store_nt((*phi[ix]).s1);
    ix++;

    /*********************** direction +3 ************************/
    _prefetch_su3(U+predist);
    _prefetch_spinor(s+1);

    _sse_load((*s).s0);
    _sse_load_up((*s).s2);
    _sse_vector_i_mul();
    _sse_vector_add();

    _sse_su3_multiply((*U));
    _sse_vector_cmplx_mul(ka3);
    _sse_store_nt_up((*phi[ix]).s0);

    _sse_load((*s).s1);
    _sse_load_up((*s).s3);
    _sse_vector_i_mul();
    _sse_vector_sub();

    _sse_su3_multiply((*U));
    _sse_vector_cmplx_mul(ka3);
    _sse_store_nt_up((*phi[ix]).s1);
    ix++;
    U++;

    /*********************** direction -3 ************************/
    _sse_load((*s).s0);
    _sse_load_up((*s).s2);
    _sse_vector_i_mul();
    _sse_vector_sub();
    _sse_store_nt((*phi[ix]).s0);

    _sse_load((*s).s1);
    _sse_load_up((*s).s3);
    _sse_vector_i_mul();
    _sse_vector_add();
    _sse_store_nt((*phi[ix]).s1);
    ix++;
    s++;
  }

#    if (defined MPI && !defined _NO_COMM)
  xchange_halffield(); 
#    endif
  s = l;
  phi = NBPointer[2 + ieo];
  if(ieo == 0) {
    U = g_gauge_field_copy[1][0];
  }
  else {
    U = g_gauge_field_copy[0][0];
  }
  _prefetch_su3(U);
  
  /* Now we sum up and expand to a full spinor */
  ix = 0;
  for(i = 0; i < (VOLUME)/2; i++){
    /*********************** direction +0 ************************/
    _vector_assign(rs.s0, (*phi[ix]).s0);
    _vector_assign(rs.s2, (*phi[ix]).s0);
    _vector_assign(rs.s1, (*phi[ix]).s1);
    _vector_assign(rs.s3, (*phi[ix]).s1);
    ix++;

    /*********************** direction -0 ************************/
    _prefetch_su3(U+predist);
      
    _sse_load((*phi[ix]).s0);
    _sse_su3_inverse_multiply((*U));
    _sse_vector_cmplxcg_mul(ka0);
    _sse_load(rs.s0);
    _sse_vector_add();
    _sse_store(rs.s0);

    _sse_load(rs.s2);
    _sse_vector_sub();
    _sse_store(rs.s2);

    _sse_load((*phi[ix]).s1);
    _sse_su3_inverse_multiply((*U));
    _sse_vector_cmplxcg_mul(ka0);

    _sse_load(rs.s1);
    _sse_vector_add();
    _sse_store(rs.s1);

    _sse_load(rs.s3);
    _sse_vector_sub();
    _sse_store(rs.s3);

    ix++;
    U++;
    /*********************** direction +1 ************************/
    _sse_load_up((*phi[ix]).s0);
    _sse_load(rs.s0);
    _sse_vector_add();
    _sse_store(rs.s0);

    _sse_load(rs.s3);
    _sse_vector_i_mul();      
    _sse_vector_sub();
    _sse_store(rs.s3); 

    _sse_load_up((*phi[ix]).s1);
    _sse_load(rs.s1);
    _sse_vector_add();
    _sse_store(rs.s1);

    _sse_load(rs.s2);
    _sse_vector_i_mul();      
    _sse_vector_sub();
    _sse_store(rs.s2);       
    ix++;

    /*********************** direction -1 ************************/

    _prefetch_su3(U+predist);

    _sse_load((*phi[ix]).s0);
    _sse_su3_inverse_multiply((*U));
    _sse_vector_cmplxcg_mul(ka1);

    _sse_load(rs.s0);
    _sse_vector_add();
    _sse_store(rs.s0);

    _sse_load(rs.s3);
    _sse_vector_i_mul();      
    _sse_vector_add();
    _sse_store(rs.s3);

    _sse_load((*phi[ix]).s1);
      
    _sse_su3_inverse_multiply((*U));
    _sse_vector_cmplxcg_mul(ka1);

    _sse_load(rs.s1);
    _sse_vector_add();
    _sse_store(rs.s1);

    _sse_load(rs.s2);
    _sse_vector_i_mul();      
    _sse_vector_add();
    _sse_store(rs.s2);
    ix++;
    U++;

    /*********************** direction +2 ************************/
    _sse_load_up((*phi[ix]).s0);
    _sse_load(rs.s0);
    _sse_vector_add();
    _sse_store(rs.s0);

    _sse_load(rs.s3);
    _sse_vector_add();
    _sse_store(rs.s3);

    _sse_load_up((*phi[ix]).s1);
    _sse_load(rs.s1);
    _sse_vector_add();
    _sse_store(rs.s1);

    _sse_load(rs.s2);
    _sse_vector_sub();
    _sse_store(rs.s2);      
    ix++;

    /*********************** direction -2 ************************/

    _prefetch_su3(U+predist);

    _sse_load((*phi[ix]).s0);
    _sse_su3_inverse_multiply((*U));
    _sse_vector_cmplxcg_mul(ka2);

    _sse_load(rs.s0);
    _sse_vector_add();
    _sse_store(rs.s0);

    _sse_load(rs.s3);
    _sse_vector_sub();
    _sse_store(rs.s3);

    _sse_load((*phi[ix]).s1);
      
    _sse_su3_inverse_multiply((*U));
    _sse_vector_cmplxcg_mul(ka2);

    _sse_load(rs.s1);
    _sse_vector_add();
    _sse_store(rs.s1);

    _sse_load(rs.s2);
    _sse_vector_add();
    _sse_store(rs.s2);      
    ix++;
    U++;
    /*********************** direction +3 ************************/
    _sse_load_up((*phi[ix]).s0);

    _sse_load(rs.s0);
    _sse_vector_add();
    _sse_store(rs.s0);

    _sse_load(rs.s2);
    _sse_vector_i_mul();      
    _sse_vector_sub();
    _sse_store(rs.s2);

    _sse_load_up((*phi[ix]).s1);

    _sse_load(rs.s1);
    _sse_vector_add();
    _sse_store(rs.s1);

    _sse_load(rs.s3);
    _sse_vector_i_mul();      
    _sse_vector_add();
    _sse_store(rs.s3);

    ix++;
    /*********************** direction -3 ************************/

    _prefetch_su3(U+predist); 
    _prefetch_spinor(s+1);

    _sse_load((*phi[ix]).s0);
      
    _sse_su3_inverse_multiply((*U));
    _sse_vector_cmplxcg_mul(ka3);

    _sse_load(rs.s0);
    _sse_vector_add();
    _sse_store_nt((*s).s0);

    _sse_load(rs.s2);
    _sse_vector_i_mul();      
    _sse_vector_add();
    _sse_store_nt((*s).s2);

    _sse_load((*phi[ix]).s1);
      
    _sse_su3_inverse_multiply((*U));
    _sse_vector_cmplxcg_mul(ka3);

    _sse_load(rs.s1);
    _sse_vector_add();
    _sse_store_nt((*s).s1);

    _sse_load(rs.s3);
    _sse_vector_i_mul();      
    _sse_vector_sub();
    _sse_store_nt((*s).s3);
    ix++;
    U++;
    s++;
  }
#ifdef _KOJAK_INST
#pragma pomp inst end(hoppingmatrix)
#endif
}
Exemplo n.º 10
0
/* for ieo=0, k resides on  odd sites and l on even sites */
void Hopping_Matrix(const int ieo, spinor * const l, spinor * const k){
  int i,ix;
  su3 * restrict U ALIGN;
  spinor * restrict s ALIGN;
  spinor rs;
  static su3_vector psi, chi, psi2, chi2;
  halfspinor * restrict * phi ALIGN;
  halfspinor32 * restrict * phi32 ALIGN;
#ifdef _KOJAK_INST
#pragma pomp inst begin(hoppingmatrix)
#endif
#ifdef XLC
#pragma disjoint(*l, *k, *U, *s)
#endif

#ifdef _GAUGE_COPY
  if(g_update_gauge_copy) {
    update_backward_gauge();
  }
#endif

  if(k == l){
    printf("Error in H_psi (simple.c):\n");
    printf("Arguments k and l must be different\n");
    printf("Program aborted\n");
    exit(1);
  }
  s = k;

  if(ieo == 0) {
    U = g_gauge_field_copy[0][0];
  }
  else {
    U = g_gauge_field_copy[1][0];
  }
  if(g_sloppy_precision == 1 && g_sloppy_precision_flag == 1) {
    phi32 = NBPointer32[ieo];
      
    /**************** loop over all lattice sites ****************/
    ix=0;
    for(i = 0; i < (VOLUME)/2; i++){
      _vector_assign(rs.s0, (*s).s0);
      _vector_assign(rs.s1, (*s).s1);
      _vector_assign(rs.s2, (*s).s2);
      _vector_assign(rs.s3, (*s).s3);
      s++;
      /*********************** direction +0 ************************/
      
      _vector_add(psi, rs.s0, rs.s2);

      _su3_multiply(chi,(*U),psi);
      _complex_times_vector((*phi32[ix]).s0, ka0, chi);
      
      _vector_add(psi, rs.s1, rs.s3);

      _su3_multiply(chi,(*U),psi);
      _complex_times_vector((*phi32[ix]).s1, ka0, chi);
            
      U++;
      ix++;
    
      /*********************** direction -0 ************************/

      _vector_sub((*phi32[ix]).s0, rs.s0, rs.s2);
      _vector_sub((*phi32[ix]).s1, rs.s1, rs.s3);

      ix++;

      /*********************** direction +1 ************************/

      _vector_i_add(psi, rs.s0, rs.s3);

      _su3_multiply(chi, (*U), psi);
      _complex_times_vector((*phi32[ix]).s0, ka1, chi);

      _vector_i_add(psi, rs.s1, rs.s2);

      _su3_multiply(chi, (*U), psi);
      _complex_times_vector((*phi32[ix]).s1, ka1, chi);

      U++;
      ix++;

      /*********************** direction -1 ************************/

      _vector_i_sub((*phi32[ix]).s0, rs.s0, rs.s3);
      _vector_i_sub((*phi32[ix]).s1, rs.s1, rs.s2);

      ix++;
      /*********************** direction +2 ************************/

      _vector_add(psi, rs.s0, rs.s3);

      _su3_multiply(chi,(*U),psi);
      _complex_times_vector((*phi32[ix]).s0, ka2, chi);

      _vector_sub(psi, rs.s1, rs.s2);

      _su3_multiply(chi,(*U),psi);
      _complex_times_vector((*phi32[ix]).s1, ka2, chi);
      
      U++;
      ix++;

      /*********************** direction -2 ************************/

      _vector_sub((*phi32[ix]).s0, rs.s0, rs.s3);
      _vector_add((*phi32[ix]).s1, rs.s1, rs.s2);
      ix++;

      /*********************** direction +3 ************************/

      _vector_i_add(psi, rs.s0, rs.s2);
      
      _su3_multiply(chi, (*U), psi);
      _complex_times_vector((*phi32[ix]).s0, ka3, chi);


      _vector_i_sub(psi, rs.s1, rs.s3);

      _su3_multiply(chi,(*U),psi);
      _complex_times_vector((*phi32[ix]).s1, ka3, chi);

      U++;
      ix++;
      /*********************** direction -3 ************************/

      _vector_i_sub((*phi32[ix]).s0, rs.s0, rs.s2);
      _vector_i_add((*phi32[ix]).s1, rs.s1, rs.s3);

      ix++;
      /************************ end of loop ************************/
    }
#    if (defined MPI && !defined _NO_COMM)
    xchange_halffield32(); 
#    endif
    s = l;
    phi32 = NBPointer32[2 + ieo];
    if(ieo == 0) {
      U = g_gauge_field_copy[1][0];
    }
    else {
      U = g_gauge_field_copy[0][0];
    }

    ix = 0;
    for(i = 0; i < (VOLUME)/2; i++){
      /*********************** direction +0 ************************/
      _vector_assign(rs.s0, (*phi32[ix]).s0);
      _vector_assign(rs.s2, (*phi32[ix]).s0);
      _vector_assign(rs.s1, (*phi32[ix]).s1);
      _vector_assign(rs.s3, (*phi32[ix]).s1);
      ix++;
      /*********************** direction -0 ************************/
      _vector_assign(psi, (*phi32[ix]).s0);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka0,chi);

      _vector_add_assign(rs.s0, psi);
      _vector_sub_assign(rs.s2, psi);

      _vector_assign(psi, (*phi32[ix]).s1);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka0,chi);
      
      _vector_add_assign(rs.s1, psi);
      _vector_sub_assign(rs.s3, psi);
      ix++;
      U++;
      /*********************** direction +1 ************************/

      _vector_add_assign(rs.s0, (*phi32[ix]).s0);
      _vector_i_sub_assign(rs.s3, (*phi32[ix]).s0);

      _vector_add_assign(rs.s1, (*phi32[ix]).s1);
      _vector_i_sub_assign(rs.s2, (*phi32[ix]).s1);
    
      ix++;
      /*********************** direction -1 ************************/
      _vector_assign(psi, (*phi32[ix]).s0);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka1,chi);

      _vector_add_assign(rs.s0, psi);
      _vector_i_add_assign(rs.s3, psi);

      _vector_assign(psi, (*phi32[ix]).s1);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka1,chi);

      _vector_add_assign(rs.s1, psi);
      _vector_i_add_assign(rs.s2, psi);

      U++;
      ix++;

      /*********************** direction +2 ************************/

      _vector_add_assign(rs.s0, (*phi32[ix]).s0);
      _vector_add_assign(rs.s3, (*phi32[ix]).s0);

      _vector_add_assign(rs.s1, (*phi32[ix]).s1);
      _vector_sub_assign(rs.s2, (*phi32[ix]).s1);
    
      ix++;
      /*********************** direction -2 ************************/

      _vector_assign(psi, (*phi32[ix]).s0);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka2,chi);

      _vector_add_assign(rs.s0, psi);
      _vector_sub_assign(rs.s3, psi);

      _vector_assign(psi, (*phi32[ix]).s1);
      _su3_inverse_multiply(chi, (*U), psi);
      _complexcjg_times_vector(psi,ka2,chi);
      
      _vector_add_assign(rs.s1, psi);
      _vector_add_assign(rs.s2, psi);

      U++;
      ix++;
      /*********************** direction +3 ************************/

      _vector_add_assign(rs.s0, (*phi32[ix]).s0);
      _vector_i_sub_assign(rs.s2, (*phi32[ix]).s0);

      _vector_add_assign(rs.s1, (*phi32[ix]).s1);
      _vector_i_add_assign(rs.s3, (*phi32[ix]).s1);

      ix++;

      /*********************** direction -3 ************************/

      _vector_assign(psi, (*phi32[ix]).s0);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka3,chi);
      
      _vector_add((*s).s0, rs.s0, psi);
      _vector_i_add((*s).s2, rs.s2, psi);

      _vector_assign(psi, (*phi32[ix]).s1);
      _su3_inverse_multiply(chi,(*U), psi);
      _complexcjg_times_vector(psi,ka3,chi);

      _vector_add((*s).s1, rs.s1, psi);
      _vector_i_sub((*s).s3, rs.s3, psi);

      U++;
      ix++;
      s++;
    }
  }
  else {
    phi = NBPointer[ieo];
      
    /**************** loop over all lattice sites ****************/
    ix=0;
    /* #pragma ivdep*/
    for(i = 0; i < (VOLUME)/2; i++){
      _vector_assign(rs.s0, (*s).s0);
      _vector_assign(rs.s1, (*s).s1);
      _vector_assign(rs.s2, (*s).s2);
      _vector_assign(rs.s3, (*s).s3);
      s++;
      /*********************** direction +0 ************************/
      
      _vector_add(psi, rs.s0, rs.s2);
      _vector_add(psi2, rs.s1, rs.s3);
      _su3_multiply(chi,(*U),psi);
      _su3_multiply(chi2,(*U),psi2);
      _complex_times_vector((*phi[ix]).s0, ka0, chi);
      _complex_times_vector((*phi[ix]).s1, ka0, chi2);
            
      U++;
      ix++;
    
      /*********************** direction -0 ************************/

      _vector_sub((*phi[ix]).s0, rs.s0, rs.s2);
      _vector_sub((*phi[ix]).s1, rs.s1, rs.s3);

      ix++;

      /*********************** direction +1 ************************/

      _vector_i_add(psi, rs.s0, rs.s3);
      _vector_i_add(psi2, rs.s1, rs.s2);
      _su3_multiply(chi, (*U), psi);
      _su3_multiply(chi2, (*U), psi2);
      _complex_times_vector((*phi[ix]).s0, ka1, chi);
      _complex_times_vector((*phi[ix]).s1, ka1, chi2);

      U++;
      ix++;

      /*********************** direction -1 ************************/

      _vector_i_sub((*phi[ix]).s0, rs.s0, rs.s3);
      _vector_i_sub((*phi[ix]).s1, rs.s1, rs.s2);

      ix++;
      /*********************** direction +2 ************************/

      _vector_add(psi, rs.s0, rs.s3);
      _vector_sub(psi2, rs.s1, rs.s2);
      _su3_multiply(chi,(*U),psi);
      _su3_multiply(chi2,(*U),psi2);
      _complex_times_vector((*phi[ix]).s0, ka2, chi);
      _complex_times_vector((*phi[ix]).s1, ka2, chi2);
      
      U++;
      ix++;

      /*********************** direction -2 ************************/

      _vector_sub((*phi[ix]).s0, rs.s0, rs.s3);
      _vector_add((*phi[ix]).s1, rs.s1, rs.s2);
      ix++;

      /*********************** direction +3 ************************/

      _vector_i_add(psi, rs.s0, rs.s2);
      _vector_i_sub(psi2, rs.s1, rs.s3);      
      _su3_multiply(chi, (*U), psi);
      _su3_multiply(chi2,(*U),psi2);
      _complex_times_vector((*phi[ix]).s0, ka3, chi);
      _complex_times_vector((*phi[ix]).s1, ka3, chi2);

      U++;
      ix++;
      /*********************** direction -3 ************************/

      _vector_i_sub((*phi[ix]).s0, rs.s0, rs.s2);
      _vector_i_add((*phi[ix]).s1, rs.s1, rs.s3);

      ix++;
      /************************ end of loop ************************/
    }
#    if (defined MPI && !defined _NO_COMM)
    xchange_halffield(); 
#    endif
    s = l;
    phi = NBPointer[2 + ieo];
    if(ieo == 0) {
      U = g_gauge_field_copy[1][0];
    }
    else {
      U = g_gauge_field_copy[0][0];
    }

    ix = 0;
    /* #pragma ivdep */
    for(i = 0; i < (VOLUME)/2; i++){
      /*********************** direction +0 ************************/
      _vector_assign(rs.s0, (*phi[ix]).s0);
      _vector_assign(rs.s2, (*phi[ix]).s0);
      _vector_assign(rs.s1, (*phi[ix]).s1);
      _vector_assign(rs.s3, (*phi[ix]).s1);
      ix++;
      /*********************** direction -0 ************************/
      _su3_inverse_multiply(chi,(*U),(*phi[ix]).s0);
      _su3_inverse_multiply(chi2,(*U),(*phi[ix]).s1);
      _complexcjg_times_vector(psi,ka0,chi);
      _complexcjg_times_vector(psi2,ka0,chi2);
      _vector_add_assign(rs.s0, psi);
      _vector_sub_assign(rs.s2, psi);
      _vector_add_assign(rs.s1, psi2);
      _vector_sub_assign(rs.s3, psi2);
      ix++;
      U++;
      /*********************** direction +1 ************************/

      _vector_add_assign(rs.s0, (*phi[ix]).s0);
      _vector_i_sub_assign(rs.s3, (*phi[ix]).s0);

      _vector_add_assign(rs.s1, (*phi[ix]).s1);
      _vector_i_sub_assign(rs.s2, (*phi[ix]).s1);
    
      ix++;
      /*********************** direction -1 ************************/

      _su3_inverse_multiply(chi,(*U), (*phi[ix]).s0);
      _su3_inverse_multiply(chi2, (*U), (*phi[ix]).s1);
      _complexcjg_times_vector(psi,ka1,chi);
      _complexcjg_times_vector(psi2,ka1,chi2);
      _vector_add_assign(rs.s0, psi);
      _vector_i_add_assign(rs.s3, psi);
      _vector_add_assign(rs.s1, psi2);
      _vector_i_add_assign(rs.s2, psi2);

      U++;
      ix++;

      /*********************** direction +2 ************************/

      _vector_add_assign(rs.s0, (*phi[ix]).s0);
      _vector_add_assign(rs.s3, (*phi[ix]).s0);

      _vector_add_assign(rs.s1, (*phi[ix]).s1);
      _vector_sub_assign(rs.s2, (*phi[ix]).s1);
    
      ix++;
      /*********************** direction -2 ************************/

      _su3_inverse_multiply(chi,(*U), (*phi[ix]).s0);
      _su3_inverse_multiply(chi2, (*U), (*phi[ix]).s1);
      _complexcjg_times_vector(psi,ka2,chi);
      _complexcjg_times_vector(psi2,ka2,chi2);
      _vector_add_assign(rs.s0, psi);
      _vector_sub_assign(rs.s3, psi);
      _vector_add_assign(rs.s1, psi2);
      _vector_add_assign(rs.s2, psi2);

      U++;
      ix++;
      /*********************** direction +3 ************************/

      _vector_add_assign(rs.s0, (*phi[ix]).s0);
      _vector_i_sub_assign(rs.s2, (*phi[ix]).s0);

      _vector_add_assign(rs.s1, (*phi[ix]).s1);
      _vector_i_add_assign(rs.s3, (*phi[ix]).s1);

      ix++;

      /*********************** direction -3 ************************/

      _su3_inverse_multiply(chi,(*U), (*phi[ix]).s0);
      _su3_inverse_multiply(chi2, (*U), (*phi[ix]).s1);
      _complexcjg_times_vector(psi,ka3,chi);
      _complexcjg_times_vector(psi2,ka3,chi2);      
      _vector_add((*s).s0, rs.s0, psi);
      _vector_i_add((*s).s2, rs.s2, psi);
      _vector_add((*s).s1, rs.s1, psi2);
      _vector_i_sub((*s).s3, rs.s3, psi2);

      U++;
      ix++;
      s++;
    }
  }
#ifdef _KOJAK_INST
#pragma pomp inst end(hoppingmatrix)
#endif
}