void deriv_Sb_D_psi(spinor * const l, spinor * const k, 
		    hamiltonian_field_t * const hf, const double factor) {

  int ix,iy, iz;
  int ioff,ioff2,icx,icy, icz;
  su3 * restrict up ALIGN;
  su3 * restrict um ALIGN;
  su3adj * restrict ddd;
  static su3adj der;
  static su3 v1,v2;
  static su3_vector psia,psib,phia,phib;
  static spinor rr;
  spinor * restrict r ALIGN;
  spinor * restrict sp ALIGN;
  spinor * restrict sm ALIGN;

  /* We have 32 registers available */
  double _Complex reg00, reg01, reg02, reg03, reg04, reg05;
  double _Complex reg10, reg11, reg12, reg13, reg14, reg15;
  /* For su3 matrix, use reg00 for missing register */
  double _Complex v00, v01, v02, v10, v11, v12, v20, v21;
  /* The following contains the left spinor (12 regs) and the final  */
  /* su3 matrix to trace over */
  double _Complex r00, r01, r02, r10, r11, r12, r20, r21, r22, 
    r30, r31, r32;

#ifdef _KOJAK_INST
# pragma pomp inst begin(derivSb)
#endif

#pragma disjoint(*r, *sp, *sm, *up, *um, *ddd)
  __alignx(16, l);
  __alignx(16, k);

  if(ieo==0) {
    ioff=0;
  }
  else {
    ioff=(VOLUME+RAND)/2;
  } 
  ioff2=(VOLUME+RAND)/2-ioff;

  /* for parallelization */
#ifdef MPI
  xchange_field(k, ieo);
  xchange_field(l, (ieo+1)%2);
#endif
  /************** loop over all lattice sites ****************/

  ix=ioff;
  iy=g_iup[ix][0]; icy=iy;
  sp = k + icy;
  _prefetch_spinor(sp);
  up=&hf->gaugefield[ix][0];
  _prefetch_su3(up);

  for(icx = ioff; icx < (VOLUME+ioff); icx++){

    /* load left vector r and */
    /* multiply with gamma5   */
    r = l + (icx-ioff);
    ix=icx;

    /*********************** direction +0 ********************/

    ddd = &hf->derivative[ix][0];
    _bgl_load_r0((*r).s0);
    _bgl_load_r1((*r).s1);
    _bgl_load_minus_r2((*r).s2);
    _bgl_load_minus_r3((*r).s3);

    _bgl_load_reg0((*sp).s0);
    _bgl_load_reg0_up((*sp).s1);
    _bgl_load_reg1((*sp).s2);
    _bgl_load_reg1_up((*sp).s3);
    
    _bgl_add_to_reg0_reg1();
    _bgl_add_to_reg0_up_reg1_up();
      
    _bgl_add_r0_to_r2_reg1();
    _bgl_add_r1_to_r3_reg1_up();

    iy=g_idn[ix][0]; icy=iy;
    sm = k + icy;
    _prefetch_spinor(sm);
    um=&hf->gaugefield[iy][0];
    _prefetch_su3(um);

    _bgl_tensor_product_and_add();
    /* result in v now */
    _bgl_su3_times_v_dagger(*up);
    /* result in r now */
    _bgl_complex_times_r(ka0);
    _bgl_trace_lambda_mul_add_assign((*ddd), 2.*factor);

    /************** direction -0 ****************************/

    ddd = &hf->derivative[iy][0];
    _bgl_load_r0((*r).s0);
    _bgl_load_r1((*r).s1);
    _bgl_load_minus_r2((*r).s2);
    _bgl_load_minus_r3((*r).s3);

    _bgl_load_reg0((*sm).s0);
    _bgl_load_reg0_up((*sm).s1);
    _bgl_load_reg1((*sm).s2);
    _bgl_load_reg1_up((*sm).s3);

    _bgl_sub_from_reg0_reg1();
    _bgl_sub_from_reg0_up_reg1_up();

    _bgl_sub_from_r0_r2_reg1();
    _bgl_sub_from_r1_r3_reg1_up();

    iy=g_iup[ix][1]; icy=[iy];

    sp = k + icy;
    _prefetch_spinor(sp);
    up=&hf->gaugefield[ix][1];      
    _prefetch_su3(up);

    _bgl_tensor_product_and_add_d();
    /* result in v now */
    _bgl_su3_times_v_dagger(*um);
    /* result in r now */
    _bgl_complex_times_r(ka0);
    _bgl_trace_lambda_mul_add_assign((*ddd), 2.*factor);

    /*************** direction +1 **************************/

    ddd = &hf->derivative[ix][1];
    _bgl_load_r0((*r).s0);
    _bgl_load_r1((*r).s1);
    _bgl_load_minus_r2((*r).s2);
    _bgl_load_minus_r3((*r).s3);

    _bgl_load_reg0((*sp).s0);
    _bgl_load_reg0_up((*sp).s1);
    _bgl_load_reg1((*sp).s2);
    _bgl_load_reg1_up((*sp).s3);
    
    _bgl_i_mul_add_to_reg0_reg1_up();
    _bgl_i_mul_add_to_reg0_up_reg1();
      
    _bgl_i_mul_add_r0_to_r3_reg1();
    _bgl_i_mul_add_r1_to_r2_reg1_up();

    iy=g_idn[ix][1]; icy=iy;

    sm = k + icy;
    _prefetch_spinor(sm);
    um=&hf->gaugefield[iy][1];
    _prefetch_su3(um);

    _bgl_tensor_product_and_add();
    /* result in v now */
    _bgl_su3_times_v_dagger(*up);
    /* result in r now */
    _bgl_complex_times_r(ka1);
    _bgl_trace_lambda_mul_add_assign((*ddd), 2.*factor);

    /**************** direction -1 *************************/

    ddd = &hf->derivative[iy][1];
    _bgl_load_r0((*r).s0);
    _bgl_load_r1((*r).s1);
    _bgl_load_minus_r2((*r).s2);
    _bgl_load_minus_r3((*r).s3);

    _bgl_load_reg0((*sp).s0);
    _bgl_load_reg0_up((*sp).s1);
    _bgl_load_reg1((*sp).s2);
    _bgl_load_reg1_up((*sp).s3);
    
    _bgl_i_mul_sub_from_reg0_reg1_up();
    _bgl_i_mul_sub_from_reg0_up_reg1();
      
    _bgl_i_mul_sub_from_r0_r3_reg1();
    _bgl_i_mul_sub_from_r1_r2_reg1_up();

    iy=g_iup[ix][2]; icy=iy;

    sp = k + icy;
    _prefetch_spinor(sp);
    up=&hf->gaugefield[ix][2];
    _prefetch_su3(up);

    _bgl_tensor_product_and_add_d();
    /* result in v now */
    _bgl_su3_times_v_dagger(*um);
    /* result in r now */
    _bgl_complex_times_r(ka1);
    _bgl_trace_lambda_mul_add_assign((*ddd), 2.*factor);

    /*************** direction +2 **************************/

    ddd = &hf->derivative[ix][2];
    _bgl_load_r0((*r).s0);
    _bgl_load_r1((*r).s1);
    _bgl_load_minus_r2((*r).s2);
    _bgl_load_minus_r3((*r).s3);

    _bgl_load_reg0((*sp).s0);
    _bgl_load_reg0_up((*sp).s1);
    _bgl_load_reg1((*sp).s2);
    _bgl_load_reg1_up((*sp).s3);
    
    _bgl_add_to_reg0_reg1_up();
    _bgl_sub_from_reg0_up_reg1();
      
    _bgl_add_r0_to_r3_reg1();
    _bgl_sub_from_r1_r2_reg1_up();

    iy=g_idn[ix][2]; icy=iy;

    sm = k + icy;
    _prefetch_spinor(sm);
    um=&hf->gaugefield[iy][2];
    _prefetch_su3(um);

    _bgl_tensor_product_and_add();
    /* result in v now */
    _bgl_su3_times_v_dagger(*up);
    /* result in r now */
    _bgl_complex_times_r(ka2);
    _bgl_trace_lambda_mul_add_assign((*ddd), 2.*factor);
      
    /***************** direction -2 ************************/

    ddd = &hf->derivative[iy][2];
    _bgl_load_r0((*r).s0);
    _bgl_load_r1((*r).s1);
    _bgl_load_minus_r2((*r).s2);
    _bgl_load_minus_r3((*r).s3);

    _bgl_load_reg0((*sp).s0);
    _bgl_load_reg0_up((*sp).s1);
    _bgl_load_reg1((*sp).s2);
    _bgl_load_reg1_up((*sp).s3);
    
    _bgl_sub_from_reg0_reg1_up();
    _bgl_add_to_reg0_up_reg1();
      
    _bgl_sub_from_r0_r3_reg1();
    _bgl_add_r1_to_r2_reg1_up();

    iy=g_iup[ix][3]; icy=iy;

    sp = k + icy;
    _prefetch_spinor(sp);
    up=&hf->gaugefield[ix][3];
    _prefetch_su3(up);

    _bgl_tensor_product_and_add_d();
    /* result in v now */
    _bgl_su3_times_v_dagger(*um);
    /* result in r now */
    _bgl_complex_times_r(ka1);
    _bgl_trace_lambda_mul_add_assign(*ddd, 2.*factor);

    /****************** direction +3 ***********************/

    ddd = &hf->derivative[ix][3];
    _bgl_load_r0((*r).s0);
    _bgl_load_r1((*r).s1);
    _bgl_load_minus_r2((*r).s2);
    _bgl_load_minus_r3((*r).s3);

    _bgl_load_reg0((*sp).s0);
    _bgl_load_reg0_up((*sp).s1);
    _bgl_load_reg1((*sp).s2);
    _bgl_load_reg1_up((*sp).s3);
    
    _bgl_i_mul_add_to_reg0_reg1();
    _bgl_i_mul_sub_from_reg0_up_reg1_up();
      
    _bgl_i_mul_add_r0_to_r2_reg1();
    _bgl_i_mul_sub_from_r1_r3_reg1_up();

    iy=g_idn[ix][3]; icy=iy;

    sm = k + icy;
    _prefetch_spinor(sm);
    um=&hf->gaugefield[iy][3];
    _prefetch_su3(um);

    _bgl_tensor_product_and_add();
    /* result in v now */
    _bgl_su3_times_v_dagger(*up);
    /* result in r now */
    _bgl_complex_times_r(ka3);
    _bgl_trace_lambda_mul_add_assign((*ddd), 2.*factor);

    /***************** direction -3 ************************/

    ddd = &hf->derivative[iy][3];
    _bgl_load_r0((*r).s0);
    _bgl_load_r1((*r).s1);
    _bgl_load_minus_r2((*r).s2);
    _bgl_load_minus_r3((*r).s3);

    _bgl_load_reg0((*sp).s0);
    _bgl_load_reg0_up((*sp).s1);
    _bgl_load_reg1((*sp).s2);
    _bgl_load_reg1_up((*sp).s3);
    
    _bgl_i_mul_sub_from_reg0_reg1();
    _bgl_i_mul_add_to_reg0_up_reg1_up();
      
    _bgl_i_mul_sub_from_r0_r2_reg1();
    _bgl_i_mul_add_r1_to_r3_reg1_up();

    /* something wrong here...*/
    icz=icx+1;
    if(icz==((VOLUME+RAND)/2+ioff)) icz=ioff;
    iz=icz;
    iy=g_iup[iz][0]; icy=iy;

    sp = k + icy;
    _prefetch_spinor(sp);
    up=&hf->gaugefield[iz][0];
    _prefetch_su3(up);

    _bgl_tensor_product_and_add_d();
    /* result in v now */
    _bgl_su3_times_v_dagger(*um);
    /* result in r now */
    _bgl_complex_times_r(ka3);
    _bgl_trace_lambda_mul_add_assign((*ddd), 2.*factor);

    /****************** end of loop ************************/
  }
#ifdef _KOJAK_INST
#pragma pomp inst end(derivSb)
#endif
}
Exemple #2
0
/* this is the hopping part only */
void local_H(spinor * const rr, spinor * const s, su3 * u, int * _idx) {

  int * idx = _idx;
  su3 * restrict up ALIGN;
  su3 * restrict um ALIGN;
  spinor * restrict sp ALIGN;
  spinor * restrict sm ALIGN;

#pragma disjoint(*s, *sp, *sm, *rr, *up, *um)

  __alignx(16,rr);
  __alignx(16,s);

  /*********************** direction +0 ************************/
  up = u;
  sp = (spinor *) s + (*idx);
  idx++;

  um = up+1;
  _prefetch_su3(um); 
  sm = (spinor *) s + (*idx);
  _prefetch_spinor(sm); 
  idx++;

  _bgl_load_reg0(sp->s0);
  _bgl_load_reg1(sp->s1);
  _bgl_load_reg0_up(sp->s2);
  _bgl_load_reg1_up(sp->s3);
  _bgl_vector_add_reg0();
  _bgl_vector_add_reg1();
  /* result is now in regx0, regx1, regx2 x = 0,1 */
  
  _bgl_su3_multiply_double((*up));
  _bgl_vector_cmplx_mul_double(phase_0);
  _bgl_add_to_rs0_reg0();
  _bgl_add_to_rs2_reg0();
  _bgl_add_to_rs1_reg1();
  _bgl_add_to_rs3_reg1();

  /*********************** direction -0 ************************/
  up = um+1;
  _prefetch_su3(up); 
  sp = (spinor*) s + (*idx);
  _prefetch_spinor(sp); 
  idx++;

  _bgl_load_reg0(sm->s0);
  _bgl_load_reg1(sm->s1);
  _bgl_load_reg0_up(sm->s2);
  _bgl_load_reg1_up(sm->s3);
  _bgl_vector_sub_reg0();
  _bgl_vector_sub_reg1();
  
  _bgl_su3_inverse_multiply_double((*um));
  _bgl_vector_cmplxcg_mul_double(phase_0);
  
  _bgl_add_to_rs0_reg0();
  _bgl_sub_from_rs2_reg0();
  _bgl_add_to_rs1_reg1();
  _bgl_sub_from_rs3_reg1();
  
  /*********************** direction +1 ************************/
  
  um = up+1;
  _prefetch_su3(um); 
  sm = (spinor*) s + (*idx);
  _prefetch_spinor(sm); 
  idx++;

  _bgl_load_reg0(sp->s0);
  _bgl_load_reg1(sp->s1);
  _bgl_load_reg0_up(sp->s3);
  _bgl_load_reg1_up(sp->s2);
  _bgl_vector_i_mul_add_reg0();
  _bgl_vector_i_mul_add_reg1();
  
  _bgl_su3_multiply_double((*up));
  _bgl_vector_cmplx_mul_double(phase_1);
  
  _bgl_add_to_rs0_reg0();
  _bgl_i_mul_sub_from_rs3_reg0();
  _bgl_add_to_rs1_reg1();
  _bgl_i_mul_sub_from_rs2_reg1();
  
  /*********************** direction -1 ************************/

  up = um+1;
  _prefetch_su3(up); 
  sp = (spinor*) s + (*idx);
  _prefetch_spinor(sp); 
  idx++;

  _bgl_load_reg0(sm->s0);
  _bgl_load_reg1(sm->s1);
  _bgl_load_reg0_up(sm->s3);
  _bgl_load_reg1_up(sm->s2);
  _bgl_vector_i_mul_sub_reg0();
  _bgl_vector_i_mul_sub_reg1();
  
  _bgl_su3_inverse_multiply_double((*um));
  _bgl_vector_cmplxcg_mul_double(phase_1);
  
  _bgl_add_to_rs0_reg0();
  _bgl_add_to_rs1_reg1();
  _bgl_i_mul_add_to_rs3_reg0();
  _bgl_i_mul_add_to_rs2_reg1();      
  
  /*********************** direction +2 ************************/
  
  um = up+1;
  _prefetch_su3(um); 
  sm = (spinor*) s + (*idx);
  _prefetch_spinor(sm); 
  idx++;

  _bgl_load_reg0(sp->s0);
  _bgl_load_reg1(sp->s1);
  _bgl_load_reg1_up(sp->s2);
  _bgl_load_reg0_up(sp->s3);
  _bgl_vector_add_reg0();
  _bgl_vector_sub_reg1();
  
  _bgl_su3_multiply_double((*up));
  _bgl_vector_cmplx_mul_double(phase_2);
  
  _bgl_add_to_rs0_reg0();
  _bgl_add_to_rs1_reg1();
  _bgl_sub_from_rs2_reg1();
  _bgl_add_to_rs3_reg0();
  

  /*********************** direction -2 ************************/
  up = um+1;
  _prefetch_su3(up); 
  sp = (spinor*) s + (*idx);
  _prefetch_spinor(sp); 
  idx++;

  _bgl_load_reg0(sm->s0);
  _bgl_load_reg1(sm->s1);
  _bgl_load_reg1_up(sm->s2);
  _bgl_load_reg0_up(sm->s3);
  _bgl_vector_sub_reg0();
  _bgl_vector_add_reg1();
  
  _bgl_su3_inverse_multiply_double((*um));
  _bgl_vector_cmplxcg_mul_double(phase_2);
  
  _bgl_add_to_rs0_reg0();
  _bgl_add_to_rs1_reg1();
  _bgl_add_to_rs2_reg1();
  _bgl_sub_from_rs3_reg0();
  
  /*********************** direction +3 ************************/
  um = up+1;
  _prefetch_su3(um); 
  sm = (spinor*) s + (*idx);
  _prefetch_spinor(sm); 

  _bgl_load_reg0(sp->s0);
  _bgl_load_reg1(sp->s1);
  _bgl_load_reg0_up(sp->s2);
  _bgl_load_reg1_up(sp->s3);
  _bgl_vector_i_mul_add_reg0();
  _bgl_vector_i_mul_sub_reg1();
  
  _bgl_su3_multiply_double((*up));
  _bgl_vector_cmplx_mul_double(phase_3);
  
  _bgl_add_to_rs0_reg0();
  _bgl_add_to_rs1_reg1();
  _bgl_i_mul_sub_from_rs2_reg0();
  _bgl_i_mul_add_to_rs3_reg1();
  
  /*********************** direction -3 ************************/

  _bgl_load_reg0(sm->s0);
  _bgl_load_reg1(sm->s1);
  _bgl_load_reg0_up(sm->s2);
  _bgl_load_reg1_up(sm->s3);
  _bgl_vector_i_mul_sub_reg0();
  _bgl_vector_i_mul_add_reg1();
  
  _bgl_su3_inverse_multiply_double((*um));
  _bgl_vector_cmplxcg_mul_double(phase_3);
  
  _bgl_add_to_rs0_reg0();
  _bgl_store_rs0(rr->s0);
  _bgl_i_mul_add_to_rs2_reg0();
  _bgl_store_rs2(rr->s2);
  
  _bgl_add_to_rs1_reg1();
  _bgl_store_rs1(rr->s1);
  _bgl_i_mul_sub_from_rs3_reg1();
  _bgl_store_rs3(rr->s3);

}