Example #1
0
	/* ttt  <- ttt - msq_x4*src	(msq = mass squared) */
	FORSOMEPARITY(i,s,l_parity){
#ifdef CONGRAD_TMP_VECTORS
	  if( i < loopend-FETCH_UP ){
	    prefetch_VVVV( &ttt[i+FETCH_UP], 
			   &t_dest[i+FETCH_UP],
			   (su3_vector *)F_PT(s+FETCH_UP,src),
			   &resid[i+FETCH_UP]);
	  }
	  scalar_mult_add_su3_vector( &ttt[i], &t_dest[i],
				      -msq_x4, &ttt[i] );
	    /* note that we go back to the site structure for src */
	  add_su3_vector( (su3_vector *)F_PT(s,src),
			  &ttt[i], &resid[i] );
	  /* remember ttt contains -M_adjoint*M*src */
	  cg_p[i] = resid[i];
	  /* note that we go back to the site structure for src */
	  source_norm += (double)magsq_su3vec( (su3_vector *)F_PT(s,src) );
	  rsq += (double)magsq_su3vec( &resid[i] );
#else
	  if( i < loopend-FETCH_UP ){
	    prefetch_VVVV( &((s+FETCH_UP)->ttt), 
			   (su3_vector *)F_PT(s+FETCH_UP,dest),
			   (su3_vector *)F_PT(s+FETCH_UP,src),
			   &((s+FETCH_UP)->resid));
	  }
	  scalar_mult_add_su3_vector( &(s->ttt), (su3_vector *)F_PT(s,dest),
				      -msq_x4, &(s->ttt) );
	  add_su3_vector( (su3_vector *)F_PT(s,src),
			  &(s->ttt), &(s->resid) );
	  s->cg_p = s->resid;
	  source_norm += (double) magsq_su3vec( (su3_vector *)F_PT(s,src) );
	  rsq += (double) magsq_su3vec( &(s->resid) );
#endif
	} END_LOOP
Example #2
0
void accum_delta_prop(int i,int c[3])
     /* Load a determinant for the calculation of the delta propagator */
     /* Each column of the determinant is the symmetrically shifted   */
     /* "q" propagator.  Each column has a differenct shift direction */
     /* Each column also corresponds to a different source wall color */
     /* as specified by the color vector c                            */
     /* Result is put in tempmat1 */
{
  int j,dir;
  su3_matrix *af, *ab;
  
  
  FORALLUPDIRBUT(TUP,dir)
    {
      /* For timeward propagation, this step is pedantic */
      /* since we could simply take j = dir */
      /* but for spacelike propagation, it is necessary */
      switch (dir)
	{
	case XUP: j = 0; break;
	case YUP: j = 1; break;
	case ZUP: j = 2;
	}

      /* gen_pt points to the "q" propagator on the next neighbor    */
      af = (su3_matrix *)gen_pt[dir][i];
      ab = (su3_matrix *)gen_pt[dir+4][i];
      su3vec_copy( (su3_vector *)(af->e[c[j]]),
		   (su3_vector *)(lattice[i].tempmat1.e[j]) );
      add_su3_vector( (su3_vector *)(ab->e[c[j]]),
		     (su3_vector *)(lattice[i].tempmat1.e[j]),
		     (su3_vector *)(lattice[i].tempmat1.e[j]) );

    }
}
Example #3
0
	FORSOMEPARITY(i,s,l_parity) {
		scalar_mult_add_su3_vector( &temp[i],&init_guess[i],msq_xm4,&temp[i] );
		add_su3_vector((su3_vector *)F_PT(s,src1),&temp[i],&common_source[i] );
	        source_norm += (double)magsq_su3vec( &common_source[i] );	
		source_norm1 += (double)magsq_su3vec( (su3_vector *)F_PT(s,src1) );  
		/*pm_strange[i] = cg_p[i] = resid[i] = common_source[i];*/
		su3vec_copy( &common_source[i],&(resid[i]) );
		su3vec_copy(&(resid[i]),&(cg_p[i]) );
                su3vec_copy(&(resid[i]), &pm_strange[i]);
                su3vec_copy(&init_guess[i],&destvec1[i] );
                su3vec_copy(&init_guess[i],&destvec2[i] );
	} END_LOOP
Example #4
0
int main(int argc, char **argv)
{
  su3_vector a, b, c, d;
  int i,j,iter=1;
  struct sched_param param={sched_priority:20};
  volatile unsigned long long timeA, timeB, timeMILC, timeSSE;
  unsigned int seed=1;
  int randomfd;
  
  if ((randomfd=open("/dev/urandom", O_RDONLY)) < 0)
    perror("Attempt to open /dev/urandom");
  if (read(randomfd, &seed, sizeof(seed)) < sizeof(seed))
    perror("Attempt to read /dev/urandom");
  close(randomfd);
  //  srand(seed);

  if (sched_setscheduler(0, SCHED_FIFO, &param) < 0)
    perror("Attempt to put in real time queue");

  if (argc > 1) sscanf(argv[1],"%d",&iter);

  for (i=0; i<3; i++) {
    a.c[i].real = (Real)(rand() - RAND_MAX/2)/(Real)(RAND_MAX/2)*2.0;
    a.c[i].imag = (Real)(rand() - RAND_MAX/2)/(Real)(RAND_MAX/2)*2.0;
    b.c[i].real = (Real)(rand() - RAND_MAX/2)/(Real)(RAND_MAX/2)*2.0;
    b.c[i].imag = (Real)(rand() - RAND_MAX/2)/(Real)(RAND_MAX/2)*2.0;
  }

  rdtscll(timeA);
  for (i=0; i<iter; i++) {
    add_su3_vector(&a, &b, &c);
  }
  rdtscll(timeMILC);
  timeMILC -= timeA;

  rdtscll(timeB);
  for (i=0; i<iter; i++) {
    _inline_sse_add_su3_vector(&a, &b, &d);
  }
  rdtscll(timeSSE);
  timeSSE -= timeB;

  for (i=0; i<3; i++) {
    printf("%4.1f%+4.1fi    %4.1f%+4.1fi\n",
           c.c[i].real, c.c[i].imag, d.c[i].real, d.c[i].imag);
  }

  printf("Time per iteration:\n  MILC: %Lu\n  SSE:  %Lu\n", timeMILC/(unsigned long long)iter,
	 timeSSE/(unsigned long long)iter);

  exit(0);
}
Example #5
0
int nl_spectrum( Real vmass, field_offset temp1, field_offset temp2 ) { 
  /* return the C.G. iteration number */
  double *piprop,*pi2prop,*rhoprop,*rho2prop,*barprop;
  double *nlpiprop,*nlpi2prop,*ckpiprop,*ckpi2prop;
  double *delprop,*ckbarprop;
  Real vmass_x2;
  site* s;
  register complex cc;
  Real finalrsq;
  register int i,x,y,z,t,icol,cgn;
  register int t_source,t_off;
  int dir,isrc;

  msg_tag *mtag[16];

  piprop = (double *)malloc( nt*sizeof(double) );
  pi2prop = (double *)malloc( nt*sizeof(double) );
  rhoprop = (double *)malloc( nt*sizeof(double) );
  rho2prop = (double *)malloc( nt*sizeof(double) );
  barprop = (double *)malloc( nt*sizeof(double) );
  nlpiprop = (double *)malloc( nt*sizeof(double) );
  nlpi2prop = (double *)malloc( nt*sizeof(double) );
  ckpiprop = (double *)malloc( nt*sizeof(double) );
  ckpi2prop = (double *)malloc( nt*sizeof(double) );
  delprop = (double *)malloc( nt*sizeof(double) );
  ckbarprop = (double *)malloc( nt*sizeof(double) );

  for( t=0; t<nt; t++ ){
    piprop[t]=0.0; pi2prop[t]=0.0; rhoprop[t]=0.0; rho2prop[t]=0.0;
    nlpiprop[t]=0.0; nlpi2prop[t]=0.0;
    ckpiprop[t]=0.0; ckpi2prop[t]=0.0;
    barprop[t]=0.0; delprop[t]=0.0; ckbarprop[t]=0.0;
  }

  vmass_x2 = 2.*vmass;
  cgn=0;

  /* Fix TUP Coulomb gauge - gauge links only*/
  rephase( OFF );
  gaugefix(TUP,(Real)1.8,500,(Real)GAUGE_FIX_TOL);
  rephase( ON );
#ifdef FN
  invalidate_all_ferm_links(&fn_links);
#endif

  /* Unlike spectrum.c, here we calculate only with wall sources */
  for(t_source=source_start, isrc=0; t_source<2*nt && isrc < n_sources;
        ++isrc, t_source += source_inc ) {
      
      /* Only work for even source slices */
      if( t_source%2 != 0 ){
	printf("DUMMY:  Use even time slices for nl_spectrum()\n");
	terminate(0);
      }

      /* Compute propagator from even wall sites */
      /* Sources are normalized to 1/8 to make them comparable to */
      /* propagators from a wall with ones on the cube origin. */
      /* Put result in propmat */
      
      for(icol=0; icol<3; icol++) {
	  
	  /* initialize temp1 and temp2 */
	  clear_latvec( temp1, EVEN);
	  clear_latvec( temp2, EVEN);
	  
	  for(x=0;x<nx;x++)for(y=0;y<ny;y++)for(z=0;z<nz;z++) {
	      if((x+y+z) % 2 == 0) {
		  if( node_number(x,y,z,t_source) != mynode() )continue;
		  i=node_index(x,y,z,t_source);
		  ((su3_vector *)(F_PT(&lattice[i],temp1)))->c[icol].real = 
		    -0.25;
		}
	    }
	  
	  /* do a C.G. */
	  load_ferm_links(&fn_links);
	  cgn += congrad(niter,rsqprop,EVEN,&finalrsq, &fn_links);
	  /* Multiply by -Madjoint */
	  dslash_site( temp2, temp2, ODD, &fn_links);
	  scalar_mult_latvec( temp2, -vmass_x2, temp2, EVEN);
	  
	  /* fill the hadron matrix */
	  copy_latvec( temp2, F_OFFSET(propmat[icol]), EVENANDODD);
	} /* end loop on icol */
      
      
      /* Compute propagator from odd wall sites */
      /* Put result in propmat2 */
      
      for(icol=0; icol<3; icol++) {
	  
	  /* initialize temp1 and temp2 */
	  clear_latvec( temp1, ODD);
	  clear_latvec( temp2, ODD);
	  for(x=0;x<nx;x++)for(y=0;y<ny;y++)for(z=0;z<nz;z++) {
	      if((x+y+z) % 2 == 1) {
		  if( node_number(x,y,z,t_source) != mynode() )continue;
		  i=node_index(x,y,z,t_source);
		  ((su3_vector *)(F_PT(&lattice[i],temp1)))->c[icol].real = 
		    -0.25;
		}
	    }
	  
	  /* do a C.G. */
	  load_ferm_links(&fn_links);
	  cgn += congrad(niter,rsqprop,ODD,&finalrsq,&fn_links);
	  /* Multiply by -Madjoint */
	  dslash_site( temp2, temp2, EVEN, &fn_links);
	  scalar_mult_latvec( temp2, -vmass_x2, temp2, ODD);
	  
	  /* fill the hadron matrix */
	  copy_latvec( temp2, F_OFFSET(propmat2[icol]), EVENANDODD);
	} /* end loop on icol */
      
      /* cgn now gives the sum for both inversions */
      
      
      /* measure the meson propagator for the E wall source */
      for(t=0; t<nt; t++) {
	  /* define the time value offset t from t_source */
	  t_off = (t+t_source)%nt;
	  
	  for(x=0;x<nx;x++)for(y=0;y<ny;y++)for(z=0;z<nz;z++)
	    for(icol=0;icol<3;icol++) {
		if( node_number(x,y,z,t_off) != mynode() )continue;
		i=node_index(x,y,z,t_off);
		cc = su3_dot( &lattice[i].propmat[icol],
			     &lattice[i].propmat[icol] );
		
		piprop[t] += cc.real;
		/* (rhoprop and rho2prop are not generated by this source) */
		
		if( (x+y+z)%2==0)pi2prop[t] += cc.real;
		else	     pi2prop[t] -= cc.real;
		
	      }
	  
	} /* nt-loop */
      
      /* measure the baryon propagator for the E wall source */
      for(t=0; t<nt; t++) {
	  /* define the time value offset t from t_source */
	  t_off = (t+t_source)%nt;
	  
	  for(x=0;x<nx;x+=2)for(y=0;y<ny;y+=2)for(z=0;z<nz;z+=2) {
	      if( node_number(x,y,z,t_off) != mynode() )continue;
	      i=node_index(x,y,z,t_off);
	      cc = det_su3( (su3_matrix *)(lattice[i].propmat) );
	      barprop[t] += cc.real;
	  }
	  
	  /* must get sign right.  This looks to see if we have
	     wrapped around the lattice.  "t" is the distance
	     from the source to the measurement, so we are
	     trying to find out if t_source+t is greater than
	     or equal to nt. */
	  if( (((t+t_source)/nt-t_source/nt)%2) == 1 )barprop[t] *= -1.0;
	  /* change sign because antiperiodic b.c.  sink point
	     should really be in a copy of the lattice */
	} /* nt-loop */
      
      
      /* Measure nonlocal (and some local for checking) propagators    */
      /* These propagators include the delta and some nonlocal mesons  */
      /* The method for the delta is described in M.F.L. Golterman and */
      /* J. Smit, Nucl. Phys. B 255, 328 (1985)                        */
      /* Equation (6.3) defines the sink operator for the delta        */
      /* The method for the mesons is described in M.F.L. Golterman    */
      /* Nucl. Phys. B 273, 663 (1986)                                 */
      /* The treatment of the source wall is described in Gupta,       */
      /* Guralnik, Kilcup, and Sharpe, (GGKS) NSF-ITP-90-172 (1990)    */
      /* To get the delta propagator, we take the "q" propagator       */
      /* matrices for each of the wall colors and antisymmetrize over  */
      /* wall color as well as s                    */
      
      /* First construct the "q" and "o" propagators                   */
      /* Put q = E + O in propmat and o = E - O in propmat2 */
      
      FORALLSITES(i,s) {
	  for(icol=0; icol<3; icol++) {
	      add_su3_vector (&(s->propmat[icol]), &(s->propmat2[icol]), 
			      (su3_vector *)(s->tempmat1.e[icol]) );
	      sub_su3_vector (&(s->propmat[icol]), &(s->propmat2[icol]), 
			      &(s->propmat2[icol]) );
	      su3vec_copy( (su3_vector *)(s->tempmat1.e[icol]),
		&(s->propmat[icol]) );
	    }
	}
      
      
      
      /* Next gather the propagators in preparation for calculating   */
      /* shifted propagators Dq and Do                                */
      
      FORALLUPDIRBUT(TUP,dir) {
	  /* Start bringing "q" = propmat from forward sites    */
	  
	  mtag[dir] = start_gather_site(F_OFFSET(propmat[0]), 
		   sizeof(su3_matrix), dir, EVENANDODD, gen_pt[dir]);
	  
	  /* Start bringing "q" from backward neighbors       */
	  
	  mtag[dir+4] = start_gather_site(F_OFFSET(propmat[0]), 
		   sizeof(su3_matrix), OPP_DIR(dir), EVENANDODD,
		   gen_pt[dir+4]);
	  wait_gather(mtag[dir]);
	  wait_gather(mtag[dir+4]);
	  
	  /* Start bringing "o" = propmat2 from forward sites   */
	  
	  mtag[8+dir] = start_gather_site(F_OFFSET(propmat2[0]), 
		   sizeof(su3_matrix), dir, EVENANDODD, gen_pt[8+dir]);
	  
	      /* Start bringing "o" from backward neighbors       */
	  
	  mtag[8+dir+4] = start_gather_site(F_OFFSET(propmat2[0]), 
		    sizeof(su3_matrix), OPP_DIR(dir), EVENANDODD,
		    gen_pt[8+dir+4]);
	  wait_gather(mtag[8+dir]);
	  wait_gather(mtag[8+dir+4]);
	  
	}
      
      
      /* Calculate and dump delta propagator */
      for(t=0; t<nt; t++) {
	  /* define the time value offset t from t_source */
	  t_off = (t+t_source)%nt;
	  
	  /* Calculate contribution for each permutation of source color */
	  delta_prop (0,1,2, 1, t_off, &delprop[t]);
	  delta_prop (1,2,0, 1, t_off, &delprop[t]);
	  delta_prop (2,0,1, 1, t_off, &delprop[t]);
	  delta_prop (1,0,2,-1, t_off, &delprop[t]);
	  delta_prop (0,2,1,-1, t_off, &delprop[t]);
	  delta_prop (2,1,0,-1, t_off, &delprop[t]);
	  
	  if( (((t+t_source)/nt-t_source/nt)%2) == 1 ) delprop[t] *= -1;
	} /* nt-loop */
      
      /* Calculate the "q" source nucleon as a check */
      
      /* Calculate and dump nucleon check propagator */
      for(t=0; t<nt; t++) {
	  /* define the time value offset t from t_source */
	  t_off = (t+t_source)%nt;
	  
	  for(x=0;x<nx;x+=2)for(y=0;y<ny;y+=2)for(z=0;z<nz;z+=2) {
	      if( node_number(x,y,z,t_off) != mynode() )continue;
	      i=node_index(x,y,z,t_off);
	      /* The q propagator is in propmat */
	      cc = det_su3( (su3_matrix *)(lattice[i].propmat) );
	      ckbarprop[t] += cc.real;
	    }
	  
	  if( (((t+t_source)/nt-t_source/nt)%2) == 1 )ckbarprop[t]*= -1.0;
	  /* change sign because antiperiodic b.c.  sink point
	     should really be in a copy of the lattice */
	} /* nt-loop */
      
      /* Calculate nonlocal meson propagators and local check */
      for(t=0; t<nt; t++) {
	  /* Calculate two nonlocal pion propagators */
	  /* These are pi_1 and pi_1 tilde of Gupta et al */
	  /* Also calculate two local propagators as a check */
	  
	  /* define the time value offset t from t_source */
	  t_off = (t+t_source)%nt;
	  
	  nl_meson_prop(t_off,&nlpiprop[t],&nlpi2prop[t],&ckpiprop[t],
	     &ckpi2prop[t]);
	  
	} /* nt-loop */
      
      /* Clean up gathers */
      FORALLUPDIRBUT(TUP,dir) {
	  cleanup_gather(mtag[dir]);
	  cleanup_gather(mtag[dir+4]);
	  cleanup_gather(mtag[8+dir]);
	  cleanup_gather(mtag[8+dir+4]);
	}
Example #6
0
/* Special dslash_site for use by congrad.  Uses restart_gather_site() when
  possible. Third to last argument is an array of message tags, to be set
  if this is the first use, otherwise reused. If start=1,use
  start_gather_site, otherwise use restart_gather_site. 
  The calling program must clean up the gathers! */
void dslash_fn_site_special( field_offset src, field_offset dest,
			     int parity, msg_tag **tag, int start,
			     fn_links_t *fn){
  register int i;
  register site *s;
  register int dir,otherparity=0;
  register su3_matrix *fat4;
  su3_matrix *t_fatlink;
#ifndef NO_LONG_LINKS
  register su3_matrix *long4;
  su3_matrix *t_longlink;
  su3_vector *templongvec, *templongv1;
#endif
  su3_vector *tempvec;
  char myname[] = "dslash_fn_site_special";
  
  if(fn == NULL){
    printf("dslash_fn_site_special: invalid fn links!\n");
    terminate(1);
  }
#ifndef NO_LONG_LINKS
  t_longlink = get_lnglinks(fn);
#endif
  t_fatlink = get_fatlinks(fn);

  tempvec = (su3_vector *) malloc(sizeof(su3_vector)*4*sites_on_node);
  if(tempvec == NULL){
    printf("%s(%d)No room for temporary\n",myname, this_node);
    terminate(1);
  }
  
#ifndef NO_LONG_LINKS
  templongvec = (su3_vector *) malloc(sizeof(su3_vector)*4*sites_on_node);
  if(templongvec == NULL){
    printf("%s(%d)No room for temporary\n",myname, this_node);
    terminate(1);
  }
  
  templongv1 = create_v_field();
#endif
  
  switch(parity){
  case EVEN:	otherparity=ODD; break;
  case ODD:	otherparity=EVEN; break;
  case EVENANDODD:	otherparity=EVENANDODD; break;
  }
  
  /* Start gathers from positive directions */
  for(dir=XUP; dir<=TUP; dir++){
    if(start==1) tag[dir] = start_gather_site( src, sizeof(su3_vector),
					       dir, parity, gen_pt[dir] );
    else restart_gather_site( src, sizeof(su3_vector),
			      dir, parity, gen_pt[dir] , tag[dir] );
  }
  
  /* and start the 3rd neighbor gather */
  for(dir=X3UP; dir<=T3UP; dir++){
    if(start==1) tag[dir] = start_gather_site( src, sizeof(su3_vector),
					       dir, parity, gen_pt[dir] );
    else restart_gather_site( src, sizeof(su3_vector),
			      dir, parity, gen_pt[dir] , tag[dir] ); 
  }
  
  /* Multiply by adjoint matrix at other sites */
  FORSOMEPARITYDOMAIN_OMP(i,s,otherparity,private(fat4,long4)){
    if( i < loopend-FETCH_UP ){
      fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
      prefetch_4MV4V( 
		     fat4,
		     (su3_vector *)F_PT(s+FETCH_UP,src),
		     tempvec+4*i+FETCH_UP );
#ifndef NO_LONG_LINKS
      long4 = &(t_longlink[4*(i+FETCH_UP)]);
      prefetch_4MV4V(
		     long4,
		     (su3_vector *)F_PT(s+FETCH_UP,src),
		     templongvec+4*i+FETCH_UP );
#endif
    }
    
    fat4 = &(t_fatlink[4*i]);
#ifndef NO_LONG_LINKS
    long4 = &(t_longlink[4*i]);
#endif
    mult_adj_su3_mat_vec_4dir( fat4,
			       (su3_vector *)F_PT(s,src), (tempvec+4*i) );
    /* multiply by 3-link matrices too */
#ifndef NO_LONG_LINKS
    mult_adj_su3_mat_vec_4dir( long4,
			       (su3_vector *)F_PT(s,src), (templongvec+4*i) );
#endif
  } END_LOOP_OMP
      
  /* Start gathers from negative directions */
  for( dir=XUP; dir <= TUP; dir++){
    if (start==1){
      /* We need the strided gather so we can pick off one of a
	 group of four vectors in tempvec */
      tag[OPP_DIR(dir)] = 
	declare_strided_gather( (char *)(tempvec+dir), 4*sizeof(su3_vector), 
				sizeof(su3_vector), OPP_DIR( dir), parity, 
				gen_pt[OPP_DIR(dir)] );
      prepare_gather(tag[OPP_DIR(dir)]);
      do_gather(tag[OPP_DIR(dir)]);
    }	else {
      do_gather(tag[OPP_DIR(dir)]);
    }
  }
  
#ifndef NO_LONG_LINKS
  /* and 3rd neighbours */
  for( dir=X3UP; dir <= T3UP; dir++){
    /**printf("dslash_fn_site_special: down gathers, start=%d\n",start);**/
    if (start==1){
      tag[OPP_3_DIR(dir)] = 
	declare_strided_gather( (char *)(templongvec+INDEX_3RD(dir)), 
				4*sizeof(su3_vector), 
				sizeof(su3_vector), OPP_3_DIR(dir), 
				parity, gen_pt[OPP_3_DIR(dir)] );
      prepare_gather(tag[OPP_3_DIR(dir)]);
      do_gather(tag[OPP_3_DIR(dir)]);
    }	else {
      do_gather(tag[OPP_3_DIR(dir)]);
    }
  }
#endif
  
  /* Wait gathers from positive directions, multiply by matrix and
     accumulate */
  for(dir=XUP; dir<=TUP; dir++){
    wait_gather(tag[dir]);
  }
  
  /* wait for the 3-neighbours from positive directions, multiply */
  for(dir=X3UP; dir<=T3UP; dir++){
    wait_gather(tag[dir]);
  }
  FORSOMEPARITYDOMAIN_OMP(i,s,parity, private(fat4,long4) ){
    if( i < loopend-FETCH_UP ){
      fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
      prefetch_4MVVVV( 
		      fat4,
		      (su3_vector *)gen_pt[XUP][i+FETCH_UP],
		      (su3_vector *)gen_pt[YUP][i+FETCH_UP],
		      (su3_vector *)gen_pt[ZUP][i+FETCH_UP],
		      (su3_vector *)gen_pt[TUP][i+FETCH_UP] );
#ifndef NO_LONG_LINKS
      long4 = &(t_longlink[4*(i+FETCH_UP)]);
      prefetch_VV(
		  (su3_vector *)F_PT(s+FETCH_UP,dest),
		  templongv1+i+FETCH_UP);
      prefetch_4MVVVV( 
		      long4,
		      (su3_vector *)gen_pt[X3UP][i+FETCH_UP],
		      (su3_vector *)gen_pt[Y3UP][i+FETCH_UP],
		      (su3_vector *)gen_pt[Z3UP][i+FETCH_UP],
		      (su3_vector *)gen_pt[T3UP][i+FETCH_UP] );
#endif
    }
    fat4 = &(t_fatlink[4*i]);
    mult_su3_mat_vec_sum_4dir( fat4,
	       (su3_vector *)gen_pt[XUP][i], (su3_vector *)gen_pt[YUP][i],
	       (su3_vector *)gen_pt[ZUP][i], (su3_vector *)gen_pt[TUP][i],
	       (su3_vector *)F_PT(s,dest));
#ifndef NO_LONG_LINKS
    long4 = &(t_longlink[4*i]);
    mult_su3_mat_vec_sum_4dir( long4,
	       (su3_vector *)gen_pt[X3UP][i], (su3_vector *)gen_pt[Y3UP][i],
	       (su3_vector *)gen_pt[Z3UP][i], (su3_vector *)gen_pt[T3UP][i],
	       templongv1+i);
#endif
  } END_LOOP_OMP
      
  /* Wait gathers from negative directions, accumulate (negative) */
  for(dir=XUP; dir<=TUP; dir++){
    wait_gather(tag[OPP_DIR(dir)]);
  } 
  
  /* and the same for the negative 3-rd neighbours */
  
  for(dir=X3UP; dir<=T3UP; dir++){
    wait_gather(tag[OPP_3_DIR(dir)]);
  }
  
  FORSOMEPARITYDOMAIN_OMP(i,s,parity, ){
    if( i < loopend-FETCH_UP ){
#ifndef NO_LONG_LINKS
      prefetch_VV(
		  (su3_vector *)F_PT(s+FETCH_UP,dest),
		  templongv1+i+FETCH_UP);
#endif
      prefetch_VVVV( 
		    (su3_vector *)gen_pt[XDOWN][i+FETCH_UP],
		    (su3_vector *)gen_pt[YDOWN][i+FETCH_UP],
		    (su3_vector *)gen_pt[ZDOWN][i+FETCH_UP],
		    (su3_vector *)gen_pt[TDOWN][i+FETCH_UP] );
      prefetch_VVVV( 
		    (su3_vector *)gen_pt[X3DOWN][i+FETCH_UP],
		    (su3_vector *)gen_pt[Y3DOWN][i+FETCH_UP],
		    (su3_vector *)gen_pt[Z3DOWN][i+FETCH_UP],
		    (su3_vector *)gen_pt[T3DOWN][i+FETCH_UP] );
    }
    sub_four_su3_vecs( (su3_vector *)F_PT(s,dest),
		       (su3_vector *)(gen_pt[XDOWN][i]),
		       (su3_vector *)(gen_pt[YDOWN][i]),
		       (su3_vector *)(gen_pt[ZDOWN][i]),
		       (su3_vector *)(gen_pt[TDOWN][i]) );
#ifndef NO_LONG_LINKS
    sub_four_su3_vecs( templongv1+i,
		       (su3_vector *)(gen_pt[X3DOWN][i]),
		       (su3_vector *)(gen_pt[Y3DOWN][i]),
		       (su3_vector *)(gen_pt[Z3DOWN][i]),
		       (su3_vector *)(gen_pt[T3DOWN][i]) );
    /*** Now need to add these things together ***/
    add_su3_vector((su3_vector *)F_PT(s,dest), templongv1+i,
		   (su3_vector *)F_PT(s,dest));
#endif
  } END_LOOP_OMP
      
}
Example #7
0
/* Special dslash for use by congrad.  Uses restart_gather_field() when
  possible. Next to last argument is an array of message tags, to be set
  if this is the first use, otherwise reused. If start=1,use
  start_gather_field, otherwise use restart_gather_field. 
  The calling program must clean up the gathers and temps! */
void dslash_fn_field_special(su3_vector *src, su3_vector *dest,
			     int parity, msg_tag **tag, int start,
			     fn_links_t *fn){
  register int i;
  register site *s;
  register int dir,otherparity=0;
  register su3_matrix *fat4;
  su3_matrix *t_fatlink;
#ifndef NO_LONG_LINKS
  register su3_matrix *long4;
  su3_matrix *t_longlink;
#endif
  
  /* allocate temporary work space only if not already allocated */
  if(temp_not_allocated)
    {
      for( dir=XUP; dir<=TUP; dir++ ){
	temp[dir]  =(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
	temp[dir+4]=(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
      }
      temp[8]=(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
      temp_not_allocated = 0 ;
    }
  
  /* load fatlinks and longlinks */
  if(fn == NULL){
    printf("dslash_fn_field_special: invalid fn links!\n");
    terminate(1);
  }
#ifndef NO_LONG_LINKS
  t_longlink = get_lnglinks(fn);
#endif
  t_fatlink = get_fatlinks(fn);

  switch(parity)
    {
    case EVEN:	otherparity=ODD; break;
    case ODD:	otherparity=EVEN; break;
    case EVENANDODD:	otherparity=EVENANDODD; break;
    }
  
  /* Start gathers from positive directions */
  /* And start the 3-step gather too */
  for( dir=XUP; dir<=TUP; dir++ ){
    if(start==1)
      {
	tag[dir] = start_gather_field( src, sizeof(su3_vector), 
					   dir, parity,gen_pt[dir] );
#ifndef NO_LONG_LINKS
	tag[DIR3(dir)] = start_gather_field(src, sizeof(su3_vector),
						DIR3(dir),parity, 
						gen_pt[DIR3(dir)] );
#endif
      }
    else
      {
	restart_gather_field( src, sizeof(su3_vector), 
				  dir, parity,gen_pt[dir], tag[dir]);
#ifndef NO_LONG_LINKS
	restart_gather_field(src, sizeof(su3_vector), DIR3(dir), parity, 
				 gen_pt[DIR3(dir)], tag[DIR3(dir)]);
#endif
      }
  }
  
  /* Multiply by adjoint matrix at other sites */
  /* Use fat link for single link transport */
  FORSOMEPARITYDOMAIN_OMP( i, s, otherparity, private(fat4,long4) ){
    //NOPRE if( i < loopend-FETCH_UP ){
       //NOPRE fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
       //NOPRE prefetch_V(&(src[i+FETCH_UP]));
       //NOPRE prefetch_4MVVVV( 
		       //NOPRE fat4,
		       //NOPRE &(temp[0][i+FETCH_UP]),
		       //NOPRE &(temp[1][i+FETCH_UP]),
		       //NOPRE &(temp[2][i+FETCH_UP]),
		       //NOPRE &(temp[3][i+FETCH_UP]) );
#ifndef NO_LONG_LINKS
       //NOPRE long4 = &(t_longlink[4*(i+FETCH_UP)]);
       //NOPRE prefetch_4MVVVV( 
		       //NOPRE long4,
		       //NOPRE &(temp[4][i+FETCH_UP]),
		       //NOPRE &(temp[5][i+FETCH_UP]),
		       //NOPRE &(temp[6][i+FETCH_UP]),
		       //NOPRE &(temp[7][i+FETCH_UP]) );
#endif
    //NOPRE }

    fat4 = &(t_fatlink[4*i]);
    mult_adj_su3_mat_4vec( fat4, &(src[i]), &(temp[0][i]),
			   &(temp[1][i]), &(temp[2][i]), &(temp[3][i]) );
#ifndef NO_LONG_LINKS
    /* multiply by 3-link matrices too */
    long4 = &(t_longlink[4*i]);
    mult_adj_su3_mat_4vec( long4, &(src[i]),&(temp[4][i]),
			   &(temp[5][i]), &(temp[6][i]), &(temp[7][i]) );
#endif
  } END_LOOP_OMP
      
  /* Start gathers from negative directions */
  for( dir=XUP; dir <= TUP; dir++){
    if (start==1) tag[OPP_DIR(dir)] = start_gather_field( temp[dir],
	  sizeof(su3_vector), OPP_DIR( dir), parity, gen_pt[OPP_DIR(dir)] );
    else restart_gather_field( temp[dir], sizeof(su3_vector), 
	   OPP_DIR( dir), parity, gen_pt[OPP_DIR(dir)], tag[OPP_DIR(dir)] );
  }

  /* Start 3-neighbour gathers from negative directions */
  for( dir=X3UP; dir <= T3UP; dir++){
    if (start==1) tag[OPP_3_DIR(dir)]=start_gather_field(
		 temp[INDEX_3RD(dir)+4], sizeof(su3_vector), 
		 OPP_3_DIR( dir), parity, gen_pt[OPP_3_DIR(dir)] );
    else restart_gather_field(temp[INDEX_3RD(dir)+4], 
	      sizeof(su3_vector), OPP_3_DIR( dir),parity, 
	      gen_pt[OPP_3_DIR(dir)], tag[OPP_3_DIR(dir)] );
  }

  /* Wait gathers from positive directions, multiply by matrix and
     accumulate */
  /* wait for the 3-neighbours from positive directions, multiply */
  for(dir=XUP; dir<=TUP; dir++){
    wait_gather(tag[dir]);
#ifndef NO_LONG_LINKS
    wait_gather(tag[DIR3(dir)]);
#endif
  }
  
  FORSOMEPARITYDOMAIN_OMP(i,s,parity, private(fat4,long4) ){
    //NOPRE if( i < loopend-FETCH_UP ){
      //NOPRE fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
      //NOPRE prefetch_4MVVVV( 
		      //NOPRE fat4,
		      //NOPRE (su3_vector *)gen_pt[XUP][i+FETCH_UP],
		      //NOPRE (su3_vector *)gen_pt[YUP][i+FETCH_UP],
		      //NOPRE (su3_vector *)gen_pt[ZUP][i+FETCH_UP],
		      //NOPRE (su3_vector *)gen_pt[TUP][i+FETCH_UP] );
      //NOPRE prefetch_VVVV( 
		    //NOPRE (su3_vector *)gen_pt[XDOWN][i+FETCH_UP],
		    //NOPRE (su3_vector *)gen_pt[YDOWN][i+FETCH_UP],
		    //NOPRE (su3_vector *)gen_pt[ZDOWN][i+FETCH_UP],
		    //NOPRE (su3_vector *)gen_pt[TDOWN][i+FETCH_UP] );
#ifndef NO_LONG_LINKS
      //NOPRE long4 = &(t_longlink[4*(i+FETCH_UP)]);
      //NOPRE prefetch_4MVVVV( 
		      //NOPRE long4,
		      //NOPRE (su3_vector *)gen_pt[X3UP][i+FETCH_UP],
		      //NOPRE (su3_vector *)gen_pt[Y3UP][i+FETCH_UP],
		      //NOPRE (su3_vector *)gen_pt[Z3UP][i+FETCH_UP],
		      //NOPRE (su3_vector *)gen_pt[T3UP][i+FETCH_UP] );
      //NOPRE prefetch_VVVV( 
		    //NOPRE (su3_vector *)gen_pt[X3DOWN][i+FETCH_UP],
		    //NOPRE (su3_vector *)gen_pt[Y3DOWN][i+FETCH_UP],
		    //NOPRE (su3_vector *)gen_pt[Z3DOWN][i+FETCH_UP],
		    //NOPRE (su3_vector *)gen_pt[T3DOWN][i+FETCH_UP] );
#endif
    //NOPRE }
    
    fat4 = &(t_fatlink[4*i]);
    mult_su3_mat_vec_sum_4dir( fat4,
	       (su3_vector *)gen_pt[XUP][i], (su3_vector *)gen_pt[YUP][i],
	       (su3_vector *)gen_pt[ZUP][i], (su3_vector *)gen_pt[TUP][i],
	       &(dest[i]) );
    
#ifndef NO_LONG_LINKS
    long4 = &(t_longlink[4*i]);
    mult_su3_mat_vec_sum_4dir( long4,
	    (su3_vector *)gen_pt[X3UP][i], (su3_vector *)gen_pt[Y3UP][i],
	    (su3_vector *)gen_pt[Z3UP][i], (su3_vector *)gen_pt[T3UP][i],
	    &(temp[8][i]));
#endif
  } END_LOOP_OMP
   
  /* Wait gathers from negative directions, accumulate (negative) */
  /* and the same for the negative 3-rd neighbours */
  for(dir=XUP; dir<=TUP; dir++){
    wait_gather(tag[OPP_DIR(dir)]);
  }
  for(dir=X3UP; dir<=T3UP; dir++){
    wait_gather(tag[OPP_3_DIR(dir)]);
  }
  
  FORSOMEPARITYDOMAIN_OMP(i,s,parity, ){
    //NOPRE if( i < loopend-FETCH_UP ){
      //NOPRE prefetch_VVVVV( 
		     //NOPRE &(dest[i+FETCH_UP]),
		     //NOPRE (su3_vector *)gen_pt[XDOWN][i+FETCH_UP],
		     //NOPRE (su3_vector *)gen_pt[YDOWN][i+FETCH_UP],
		     //NOPRE (su3_vector *)gen_pt[ZDOWN][i+FETCH_UP],
		     //NOPRE (su3_vector *)gen_pt[TDOWN][i+FETCH_UP] );
      //NOPRE prefetch_VVVVV( 
		     //NOPRE &(temp[8][i+FETCH_UP]), 
		     //NOPRE (su3_vector *)gen_pt[X3DOWN][i+FETCH_UP],
		     //NOPRE (su3_vector *)gen_pt[Y3DOWN][i+FETCH_UP],
		     //NOPRE (su3_vector *)gen_pt[Z3DOWN][i+FETCH_UP],
		     //NOPRE (su3_vector *)gen_pt[T3DOWN][i+FETCH_UP] );
    //NOPRE }
    
    sub_four_su3_vecs( &(dest[i]),
		       (su3_vector *)(gen_pt[XDOWN][i]),
		       (su3_vector *)(gen_pt[YDOWN][i]),
		       (su3_vector *)(gen_pt[ZDOWN][i]),
		       (su3_vector *)(gen_pt[TDOWN][i]) );
    sub_four_su3_vecs( &(temp[8][i]), 
		       (su3_vector *)(gen_pt[X3DOWN][i]),
		       (su3_vector *)(gen_pt[Y3DOWN][i]),
		       (su3_vector *)(gen_pt[Z3DOWN][i]),
		       (su3_vector *)(gen_pt[T3DOWN][i]) );
    /* Now need to add these things together */
    add_su3_vector(&(dest[i]), &(temp[8][i]),&(dest[i]));
  } END_LOOP_OMP
      
}
Example #8
0
/* D_slash routine - sets dest. on each site equal to sum of
   sources parallel transported to site, with minus sign for transport
   from negative directions.  Use "fatlinks" for one link transport,
   "longlinks" for three link transport. */
void dslash_fn( field_offset src, field_offset dest, int parity ) {
   register int i;
   register site *s;
   register int dir,otherparity;
   register su3_matrix *fat4, *long4;
   msg_tag *tag[16];

    if(!valid_longlinks)load_longlinks();
    if(!valid_fatlinks)load_fatlinks();
    switch(parity){
	case EVEN:	otherparity=ODD; break;
	case ODD:	otherparity=EVEN; break;
	case EVENANDODD:	otherparity=EVENANDODD; break;
    }

    /* Start gathers from positive directions */
    /* And start the 3-step gather too */
    for( dir=XUP; dir<=TUP; dir++ ){
	tag[dir] = start_gather( src, sizeof(su3_vector), dir, parity,
	    gen_pt[dir] );
	tag[DIR3(dir)] = start_gather( src, sizeof(su3_vector), DIR3(dir),
	    parity, gen_pt[DIR3(dir)] );
    }

    /* Multiply by adjoint matrix at other sites */
    /* Use fat link for single link transport */
    FORSOMEPARITY( i, s, otherparity ){
      if( i < loopend-FETCH_UP ){
#ifdef DSLASH_TMP_LINKS
	fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
	long4 = &(t_longlink[4*(i+FETCH_UP)]);
#else
	fat4 = (s+FETCH_UP)->fatlink;
	long4 = (s+FETCH_UP)->longlink;
#endif
	prefetch_4MV4V( 
		       fat4,
		       (su3_vector *)F_PT(s+FETCH_UP,src),
		       (s+FETCH_UP)->tempvec );
	prefetch_4MV4V(
		       long4,
		       (su3_vector *)F_PT(s+FETCH_UP,src),
		       (s+FETCH_UP)->templongvec );
      }

#ifdef DSLASH_TMP_LINKS
      fat4 = &(t_fatlink[4*i]);
      long4 = &(t_longlink[4*i]);
#else
      fat4 = s->fatlink;
      long4 = s->longlink;
#endif
	mult_adj_su3_mat_vec_4dir( fat4,
	    (su3_vector *)F_PT(s,src), s->tempvec );
	/* multiply by 3-link matrices too */
	mult_adj_su3_mat_vec_4dir( long4,
	    (su3_vector *)F_PT(s,src), s->templongvec );
    } END_LOOP

    /* Start gathers from negative directions */
    for( dir=XUP; dir <= TUP; dir++){
	tag[OPP_DIR(dir)] = start_gather( F_OFFSET(tempvec[dir]),
	    sizeof(su3_vector), OPP_DIR( dir), parity,
	    gen_pt[OPP_DIR(dir)] );
    }

    /* Start 3-neighbour gathers from negative directions */
    for( dir=X3UP; dir <= T3UP; dir++){
	tag[OPP_3_DIR(dir)] 
           = start_gather( F_OFFSET(templongvec[INDEX_3RD(dir)]),
			   sizeof(su3_vector), OPP_3_DIR( dir), parity,
			   gen_pt[OPP_3_DIR(dir)] );
    }

    /* Wait gathers from positive directions, multiply by matrix and
	accumulate */
    /* wait for the 3-neighbours from positive directions, multiply */
    for(dir=XUP; dir<=TUP; dir++){
	wait_gather(tag[dir]);
	wait_gather(tag[DIR3(dir)]);
    }
    /* Wait gathers from negative directions, accumulate (negative) */
    /* and the same for the negative 3-rd neighbours */
    for(dir=XUP; dir<=TUP; dir++){
	wait_gather(tag[OPP_DIR(dir)]);
    }
    for(dir=X3UP; dir<=T3UP; dir++){
	wait_gather(tag[OPP_3_DIR(dir)]);
    }


    FORSOMEPARITY(i,s,parity){
#ifdef DSLASH_TMP_LINKS
      fat4 = &(t_fatlink[4*i]);
      long4 = &(t_longlink[4*i]);
#else
      fat4 = s->fatlink;
      long4 = s->longlink;
#endif
      mult_su3_mat_vec_sum_4dir( fat4,
	    (su3_vector *)gen_pt[XUP][i], (su3_vector *)gen_pt[YUP][i],
	    (su3_vector *)gen_pt[ZUP][i], (su3_vector *)gen_pt[TUP][i],
	    (su3_vector *)F_PT(s,dest));

      mult_su3_mat_vec_sum_4dir( long4,
	    (su3_vector *)gen_pt[X3UP][i], (su3_vector *)gen_pt[Y3UP][i],
	    (su3_vector *)gen_pt[Z3UP][i], (su3_vector *)gen_pt[T3UP][i],
	    (su3_vector *) &(s->templongv1));

      if( i < loopend-FETCH_UP ){
#ifdef DSLASH_TMP_LINKS
	fat4 = &(t_fatlink[4*(i+FETCH_UP)]);
	long4 = &(t_longlink[4*(i+FETCH_UP)]);
#else
	fat4 = (s+FETCH_UP)->fatlink;
	long4 = (s+FETCH_UP)->longlink;
#endif
	prefetch_4MVVVV( 
              fat4,
	      (su3_vector *)gen_pt[XUP][i+FETCH_UP],
              (su3_vector *)gen_pt[YUP][i+FETCH_UP],
              (su3_vector *)gen_pt[ZUP][i+FETCH_UP],
              (su3_vector *)gen_pt[TUP][i+FETCH_UP] );
	prefetch_4MVVVV( 
              long4,
              (su3_vector *)gen_pt[X3UP][i+FETCH_UP],
              (su3_vector *)gen_pt[Y3UP][i+FETCH_UP],
              (su3_vector *)gen_pt[Z3UP][i+FETCH_UP],
              (su3_vector *)gen_pt[T3UP][i+FETCH_UP] );
	prefetch_VVVV( 
              (su3_vector *)gen_pt[XDOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[YDOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[ZDOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[TDOWN][i+FETCH_UP] );
	prefetch_VVVV( 
              (su3_vector *)gen_pt[X3DOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[Y3DOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[Z3DOWN][i+FETCH_UP],
              (su3_vector *)gen_pt[T3DOWN][i+FETCH_UP] );
        }

        sub_four_su3_vecs( (su3_vector *)F_PT(s,dest),
	    (su3_vector *)(gen_pt[XDOWN][i]),
	    (su3_vector *)(gen_pt[YDOWN][i]),
	    (su3_vector *)(gen_pt[ZDOWN][i]),
	    (su3_vector *)(gen_pt[TDOWN][i]) );
        sub_four_su3_vecs( &(s->templongv1), 
	    (su3_vector *)(gen_pt[X3DOWN][i]),
	    (su3_vector *)(gen_pt[Y3DOWN][i]),
	    (su3_vector *)(gen_pt[Z3DOWN][i]),
	    (su3_vector *)(gen_pt[T3DOWN][i]) );
        /* Now need to add these things together */
        add_su3_vector((su3_vector *)F_PT(s,dest), & (s->templongv1),
			           (su3_vector *)F_PT(s,dest));
    } END_LOOP
Example #9
0
/* dest <- dest + sum_i eigVec[i]*H^{-1}*eigVec[i].resid, 
   resid = src - (-Dslash^2 + 4*mass^2)*dest
*/
static void initCG(su3_vector *src, su3_vector *dest, int Nvecs_curr, int Nvecs_max,
		   su3_vector **eigVec, double_complex *H, Real mass, int parity,
		   imp_ferm_links_t *fn){

  /* Constants */
  int ione = 1;
  int otherparity = (parity == EVEN) ? ODD : EVEN;
  Real msq_x4 = 4.0*mass*mass;
  double dzero = (double)0.0;
  double_complex zzero = dcmplx(dzero, dzero);

  register int i;
  int j, info;
  double_complex cc;
  double_complex *c, *H2;
  su3_vector *resid;

  c = (double_complex *)malloc(Nvecs_curr*sizeof(double_complex));
  resid = (su3_vector *)malloc(sites_on_node*sizeof(su3_vector));

  /* resid <- src - (-Dslash^2 + 4*mass^2)*dest */
  dslash_fn_field(dest, resid, otherparity, fn);
  dslash_fn_field(resid, resid, parity, fn);
  FORSOMEFIELDPARITY_OMP(i, parity, default(shared)){
    scalar_mult_sum_su3_vector(resid+i, dest+i, -msq_x4);
    add_su3_vector(resid+i, src+i, resid+i);
  } END_LOOP_OMP

  /* c[i] = eigVec[i].resid */
  for(j = 0; j < Nvecs_curr; j++){
//    c[j] = zzero;
//    FORSOMEFIELDPARITY_OMP(i, parity, private(cc) reduction(+:c[j])){
//      cc = su3_dot(eigVec[j]+i, resid+i);
//      CSUM(c[j], cc);
//    } END_LOOP_OMP

    double cctotr=0., cctoti=0.;
    FORSOMEFIELDPARITY_OMP(i, parity, private(cc) reduction(+:cctotr,cctoti)){
      cc = su3_dot(eigVec[j]+i, resid+i);
      cctotr += cc.real;
      cctoti += cc.imag;
    } END_LOOP_OMP;
    c[j].real = cctotr;
    c[j].imag = cctoti;

  }
  g_vecdcomplexsum(c, Nvecs_curr);

  free(resid);

  H2 = (double_complex *)malloc(Nvecs_curr*Nvecs_curr*sizeof(double_complex));

  /* H2 = H + 4*mass^2*I */
  for(j = 0; j < Nvecs_curr; j++){
    zcopy_(&Nvecs_curr, H+Nvecs_max*j, &ione, H2+Nvecs_curr*j, &ione);
    CSUM(H2[(Nvecs_curr+1)*j], dcmplx(msq_x4, dzero));
  }

  /* Compute H^{-1}*c = H^{-1}*eigVec.resid with Cholesky decomposition */
  zpotrf_("U", &Nvecs_curr, H2, &Nvecs_curr, &info);
  zpotrs_("U", &Nvecs_curr, &ione, H2, &Nvecs_curr, c, &Nvecs_curr, &info);

  free(H2);

  /* dest <- dest + sum_j c[i]*eigVec[i] = dest + sum_i eigVec[i]*H^{-1}*eigVec[i].resid */ 
  for(j = 0; j < Nvecs_curr; j++){
    FORSOMEFIELDPARITY_OMP(i, parity, default(shared)){
      c_scalar_mult_add_su3vec(dest+i, c+j, eigVec[j]+i);
    } END_LOOP_OMP
  }

  free(c);
}
Example #10
0
  FORALLSITES(i,s)
    {
      add_su3_vector( (su3_vector *)F_PT(s,dest), (su3_vector *)gen_pt[1][i], 
		      (su3_vector *)F_PT(s,dest) ) ;    
    }
Example #11
0
 FORALLSITES(i,s)
   {
     add_su3_vector( dest+i, (su3_vector *)gen_pt[1][i], dest+i ) ;    
   }