Exemplo n.º 1
0
static void ks_multicg_reverse_field(	/* Return value is number of iterations taken */
    su3_vector *src,	/* source vector (type su3_vector) */
    su3_vector **psim,	/* solution vectors */
    ks_param *ksp,	/* KS parametes, including the offsets */
    int num_offsets,	/* number of offsets */
    quark_invert_control *qic,
    imp_ferm_links_t *fn      /* Storage for fermion links */
    )
{
    char myname[] = "ks_multicg_reverse_field";
    /* Site su3_vector's resid, cg_p and ttt are used as temporaies */
    register int i;
    register site *s;
    int iteration;	/* counter for iterations */
    int num_offsets_now; /* number of offsets still being worked on */
    double c1, c2, rsq, oldrsq, pkp;		/* pkp = cg_p.K.cg_p */
    double source_norm;	/* squared magnitude of source vector */
    double rsqstop;	/* stopping residual normalized by source norm */
    int l_parity=0;	/* parity we are currently doing */
    int l_otherparity=0; /* the other parity */
#ifdef FN
    msg_tag *tags1[16], *tags2[16];	/* tags for gathers to parity and opposite */
#endif
    int special_started;	/* 1 if dslash_special has been called */
    int j, j_low;
    Real *shifts, mass_low, msq_xm4;
    double *zeta_i, *zeta_im1, *zeta_ip1;
    double *beta_i, *beta_im1, *alpha;
    // su3_vector **pm;	/* vectors not involved in gathers */

    // Switch indices
    su3_vector **psim_rev; su3_vector *psim_space;
    su3_vector **pm_rev; su3_vector *pm_space;

    /* Unpack structure */
    /* We don't restart this algorithm, so we adopt the convention of
       taking the product here */
    int niter        = qic->max*qic->nrestart;
    Real rsqmin      = qic->resid * qic->resid;    /* desired squared residual - 
					 normalized as sqrt(r*r)/sqrt(src_e*src_e) */
    int parity       = qic->parity;   /* EVEN, ODD */
    
/* Timing */
#ifdef CGTIME
    double dtimec;
#endif
    double nflop;

    qic->final_iters = 0;
    qic->final_restart = 0;

    //#if FERM_ACTION == HISQ
    //    fn->hl.current_X_set = 0;
    //    restore_fn_links(fn);
    //#endif
    if( num_offsets==0 )return;

    if(fn == NULL){
      printf("%s(%d): Called with NULL fn\n", myname, this_node);
      terminate(1);
    }

    // Switch indices
    psim_rev = (su3_vector **)malloc( sizeof(su3_vector *)*sites_on_node );
    psim_space = (su3_vector *)malloc( sizeof(su3_vector)*sites_on_node*num_offsets );
    pm_rev = (su3_vector **)malloc( sizeof(su3_vector *)*sites_on_node );
    pm_space = (su3_vector *)malloc( sizeof(su3_vector)*sites_on_node*num_offsets );
    if( psim_space == NULL || pm_space == NULL){printf("%s: NO ROOM!\n",myname); exit(0); }
    for( i=0; i<sites_on_node; i++ ){
	psim_rev[i] = &(psim_space[num_offsets*i]);
	pm_rev[i] = &(pm_space[num_offsets*i]);
	for( j=0; j<num_offsets; j++){
	    psim_rev[i][j] = psim[j][i];
	}
    }

/* debug */
#ifdef CGTIME
    dtimec = -dclock(); 
#endif

    nflop = 1205 + 15*num_offsets;
    if(parity==EVENANDODD)nflop *=2;
	
    special_started = 0;
    /* if we want both parities, we will do even first. */
    switch(parity){
	case(EVEN): l_parity=EVEN; l_otherparity=ODD; break;
	case(ODD):  l_parity=ODD; l_otherparity=EVEN; break;
	case(EVENANDODD):  l_parity=EVEN; l_otherparity=ODD; break;
    }

    shifts = (Real *)malloc(num_offsets*sizeof(Real));
    zeta_i = (double *)malloc(num_offsets*sizeof(double));
    zeta_im1 = (double *)malloc(num_offsets*sizeof(double));
    zeta_ip1 = (double *)malloc(num_offsets*sizeof(double));
    beta_i = (double *)malloc(num_offsets*sizeof(double));
    beta_im1 = (double *)malloc(num_offsets*sizeof(double));
    alpha = (double *)malloc(num_offsets*sizeof(double));

    //pm = (su3_vector **)malloc(num_offsets*sizeof(su3_vector *));
    mass_low = 1.0e+20;
    j_low = -1;
    for(j=0;j<num_offsets;j++){
	shifts[j] = ksp[j].offset;
	if (ksp[j].offset < mass_low){
	    mass_low = ksp[j].offset;
	    j_low = j;
	}
    }
    for(j=0;j<num_offsets;j++) if(j!=j_low){
	//pm[j] = (su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
	shifts[j] -= shifts[j_low];
    }
    msq_xm4 = -shifts[j_low];


    iteration = 0;

#define PAD 0
    /* now we can allocate temporary variables and copy then */
    /* PAD may be used to avoid cache thrashing */
    if(first_multicongrad) {
      ttt = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector));
      cg_p = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector));
      resid = (su3_vector *) malloc((sites_on_node+PAD)*sizeof(su3_vector));
      first_multicongrad = 0;
    }

#ifdef CGTIME
    dtimec = -dclock(); 
#endif



    /* initialization process */
    start:
#ifdef FN
	if(special_started==1) {        /* clean up gathers */
	    cleanup_gathers(tags1, tags2);
	    special_started = 0;
	}
#endif
	num_offsets_now = num_offsets;
	source_norm = 0.0;
	FORSOMEPARITY(i,s,l_parity){
	    source_norm += (double) magsq_su3vec( src+i );
	    su3vec_copy( src+i, &(resid[i]));
	    su3vec_copy(&(resid[i]), &(cg_p[i]));
	    clearvec(&(psim_rev[i][j_low]));
	    for(j=0;j<num_offsets;j++) if(j!=j_low){
		clearvec(&(psim_rev[i][j]));
		su3vec_copy(&(resid[i]), &(pm_rev[i][j]));
	    }
	} END_LOOP;
Exemplo n.º 2
0
Until we have EO versions of dslash_field, we require FN
#endif

#include "generic_ks_includes.h"	/* definitions files and prototypes */
#include "../include/dslash_ks_redefine.h"   /* Actually not used, yet */
#include "../include/loopend.h"

/*#define CGTIME*/

int ks_congrad_two_src(	/* Return value is number of iterations taken */
    field_offset src1,    /* source vector (type su3_vector) */
    field_offset src2,
    field_offset dest1,	/* solution vectors */
    field_offset dest2,
    Real mass1,
    Real mass2,
    int niter,		/* maximal number of CG interations */
    int nrestart,       /* maximal number of CG restarts */
    Real rsqmin,	/* desired residue squared */
    int prec,           /* internal precision for the inversion (ignored) */
    int parity,		/* parity to be worked on */
    Real  *final_rsq_ptr, /* final residue squared */
    ferm_links_t *fn       /* Storage for fermion links */
    )
{
    /* Site su3_vector's resid, cg_p and ttt are used as temporaries */
    register int i;
    register site *s;
    int iteration;       /* counter for iterations */
    double c1, c2, rsq, oldrsq, pkp;            /* pkp = cg_p.K.cg_p */
    double source_norm,source_norm1;	/* squared magnitude of source vector */
    double rsqstop;	/* stopping residual normalized by source norm */
    int l_parity;	/* parity we are currently doing */
    int l_otherparity;	/* the other parity */
    msg_tag *tags1[16], *tags2[16];	/* tags for gathers to parity and opposite */
    int special_started;	/* 1 if dslash_special has been called */
    int j, jud, jstrange;
    double shift, msq_xm4;
    double zeta_i[2], zeta_im1[2], zeta_ip1[2];
    double beta_i[2], beta_im1[2], alpha[2];
    int first;                                                 
    su3_vector *temp;
    su3_vector *destvec1;
    su3_vector *destvec2;
    su3_vector *pm_strange;
    su3_vector *init_guess;
    su3_vector *common_source;
    su3_vector *ttt;
    su3_vector *cg_p;
    su3_vector *resid;

/* Timing */
#ifdef CGTIME
    double dtimed,dtimec;
#endif
    double nflop;

/* debug */
#ifdef CGTIME
    dtimec = -dclock();   
#endif
    first = 0;
    nflop = 1187;	/* THIS LOOKS WRONG - DT */
    if(parity==EVENANDODD)nflop *=2;
	
    special_started = 0;
    /* if we want both parities, we will do even first. */
    switch(parity){
	case(EVEN): l_parity=EVEN; l_otherparity=ODD; break;
	case(ODD):  l_parity=ODD; l_otherparity=EVEN; break;
	case(EVENANDODD):  l_parity=EVEN; l_otherparity=ODD; break;
     }
    jud = 0;
    jstrange = 1;
    temp=(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
    destvec1=(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
    destvec2=(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
    ttt=(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
    cg_p=(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
    resid=(su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
    pm_strange = (su3_vector *)malloc(sites_on_node*sizeof(su3_vector));
    init_guess = (su3_vector *)malloc(sites_on_node*sizeof(su3_vector) );
    common_source = (su3_vector *)malloc(sites_on_node*sizeof(su3_vector) );
    shift = 4.0*( mass2*mass2 - mass1*mass1);
    
    msq_xm4 = -4.0*mass1*mass1;                              
    iteration = 0;

#ifdef CGTIME
    dtimec = -dclock();                   
#endif
    /* initialization process */
    start:
#ifdef FN
	if(special_started==1) {        /* clean up gathers */
	    cleanup_gathers(tags1, tags2);
	    special_started = 0;
	}
#endif
	/*This loop calculates init_guess = (phi2 - phi1)/shift = X0 , which
	 * is used to equalize the two sources*/
	
	FORSOMEPARITY(i,s,l_parity){
	    sub_su3_vector( (su3_vector *)F_PT(s,src2),(su3_vector *)F_PT(s,src1),&temp[i] );      
	    scalar_mult_su3_vector(&temp[i],1.0/shift,&init_guess[i] );
       }END_LOOP