C++ (Cpp) update_momenta示例

编程语言: C++ (Cpp)

方法/功能: update_momenta

hotexamples.com的示例: 9

C++ (Cpp) update_momenta - 已找到9个示例。这些是从开源项目中提取的最受好评的update_momenta现实C++ (Cpp)示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： leapfrog.c 项目： MrMage/schwinger

/*  leap frog */
void leapfrog(const int nsteps, const double dtau) {
  int l;

  /* first phase: \Delta\Tau / 2 step for p */
  update_momenta(0.5*dtau); 

  /*  second phase: iterate with steps of \Delta\Tau */
  for(l = 0; l < nsteps-1; l++) {
    update_gauge(dtau);
    update_momenta(dtau);
  }
  /* a last one for the fields (because N steps for fields, */
  /*      and N-1 steps for impulses) */
  update_gauge(dtau);

  /*  last phase: \Delta\Tau / 2 step for p */
  update_momenta(dtau*0.5);
}

示例#2

显示文件

文件： integrator.c 项目： bknippsch/tmLQCD

void dohalfstep(const double tau, const int S) {
    integrator * itgr = &Integrator;
    double eps = tau/((double)itgr->n_int[S]);
    for(int i = S; i > 0; i--) {
        if(itgr->type[i] == LEAPFROG) {
            update_momenta(itgr->mnls_per_ts[i], 0.5*eps, itgr->no_mnls_per_ts[i], &itgr->hf);
            eps /= ((double)itgr->n_int[i-1]);
        }
        else if(itgr->type[i] == MN2) {
            update_momenta(itgr->mnls_per_ts[i], itgr->lambda[i]*eps, itgr->no_mnls_per_ts[i], &itgr->hf);
            eps /= ((double)itgr->n_int[i-1])*2;
        }
        else if(itgr->type[i] == OMF4) {
            update_momenta(itgr->mnls_per_ts[i], omf4_vartheta*eps, itgr->no_mnls_per_ts[i], &itgr->hf);
            eps /= ((double)itgr->n_int[i-1])/omf4_rho;
        }
    }
    if(itgr->type[0] == LEAPFROG) {
        update_momenta(itgr->mnls_per_ts[0], 0.5*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
    }
    else if(itgr->type[0] == MN2) {
        update_momenta(itgr->mnls_per_ts[0], itgr->lambda[0]*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
    }
    else if(itgr->type[0] == OMF4) {
        update_momenta(itgr->mnls_per_ts[0], omf4_vartheta*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
    }
    return;
}

示例#3

显示文件

文件： integrator.c 项目： bknippsch/tmLQCD

void integrate_2mnp(const double tau, const int S, const int halfstep) {
    int i;
    integrator * itgr = &Integrator;
    double eps = tau/((double)itgr->n_int[S]);
    double oneminus2lambda = (1.-2.*itgr->lambda[S]);

    if(S == 0) {
        update_gauge(itgr->lambda[0]*eps, &itgr->hf);
        for(i = 1; i < itgr->n_int[0]; i++) {
            update_momenta(itgr->mnls_per_ts[0], 0.5*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
            update_gauge(oneminus2lambda*eps, &itgr->hf);
            update_momenta(itgr->mnls_per_ts[0], 0.5*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
            update_gauge(2*itgr->lambda[0]*eps, &itgr->hf);
        }
        update_momenta(itgr->mnls_per_ts[0], 0.5*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
        update_gauge(oneminus2lambda*eps, &itgr->hf);
        update_momenta(itgr->mnls_per_ts[0], 0.5*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
        update_gauge(itgr->lambda[0]*eps, &itgr->hf);
    }
    else {
        for(i = 0; i < itgr->n_int[S]; i++) {
            integrate_2mnp(itgr->lambda[S]*eps, S-1, halfstep);
            update_momenta(itgr->mnls_per_ts[S], 0.5*eps, itgr->no_mnls_per_ts[S], &itgr->hf);

            integrate_2mnp(oneminus2lambda*eps, S-1, halfstep);
            update_momenta(itgr->mnls_per_ts[S], 0.5*eps, itgr->no_mnls_per_ts[S], &itgr->hf);

            integrate_2mnp(itgr->lambda[S]*eps, S-1, halfstep);
        }
    }
}

示例#4

显示文件

文件： integrator.c 项目： bknippsch/tmLQCD

void integrate_leap_frog(const double tau, const int S, const int halfstep) {
    int i;
    integrator * itgr = &Integrator;
    double eps, eps0;

    if(S == itgr->no_timescales-1) {
        dohalfstep(tau, S);
    }

    eps = tau/((double)itgr->n_int[S]);
    if(S == 0) {
        eps0 = tau/((double)itgr->n_int[0]);
        for(i = 1; i < itgr->n_int[0]; i++) {
            update_gauge(eps0, &itgr->hf);
            update_momenta(itgr->mnls_per_ts[0], eps0, itgr->no_mnls_per_ts[0], &itgr->hf);
        }
        update_gauge(eps0, &itgr->hf);
        if(halfstep != 1) {
            update_momenta(itgr->mnls_per_ts[0], eps0, itgr->no_mnls_per_ts[0], &itgr->hf);
        }
    }
    else {
        for(i = 1; i < itgr->n_int[S]; i++) {
            itgr->integrate[S-1](eps, S-1, 0);
            update_momenta(itgr->mnls_per_ts[S], eps, itgr->no_mnls_per_ts[S], &itgr->hf);
        }
        if(S == itgr->no_timescales-1) {
            itgr->integrate[S-1](eps, S-1, 1);
        }
        else itgr->integrate[S-1](eps, S-1, halfstep);
        if(halfstep != 1 && S != itgr->no_timescales-1) {
            update_momenta(itgr->mnls_per_ts[S], eps, itgr->no_mnls_per_ts[S], &itgr->hf);
        }
    }

    if(S == itgr->no_timescales-1) {
        dohalfstep(tau, S);
    }
}

示例#5

显示文件

文件： integrator.c 项目： bknippsch/tmLQCD

void integrate_2mn(const double tau, const int S, const int halfstep) {
    int i,j=0;
    integrator * itgr = &Integrator;
    double eps,
           oneminus2lambda = (1.-2.*itgr->lambda[S]);

    if(S == itgr->no_timescales-1) {
        dohalfstep(tau, S);
    }

    eps = tau/((double)itgr->n_int[S]);
    if(S == 0) {

        for(j = 1; j < itgr->n_int[0]; j++) {
            update_gauge(0.5*eps, &itgr->hf);
            update_momenta(itgr->mnls_per_ts[0], oneminus2lambda*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
            update_gauge(0.5*eps, &itgr->hf);
            update_momenta(itgr->mnls_per_ts[0], 2.*itgr->lambda[0]*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
        }
        update_gauge(0.5*eps, &itgr->hf);
        update_momenta(itgr->mnls_per_ts[0], oneminus2lambda*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
        update_gauge(0.5*eps, &itgr->hf);
        if(halfstep != 1) {
            update_momenta(itgr->mnls_per_ts[0], 2*itgr->lambda[0]*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
        }
    }
    else {
        for(i = 1; i < itgr->n_int[S]; i++) {
            itgr->integrate[S-1](eps/2., S-1, 0);
            update_momenta(itgr->mnls_per_ts[S], oneminus2lambda*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
            itgr->integrate[S-1](eps/2., S-1, 0);
            update_momenta(itgr->mnls_per_ts[S], 2*itgr->lambda[S]*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
        }
        itgr->integrate[S-1](eps/2., S-1, 0);
        update_momenta(itgr->mnls_per_ts[S], oneminus2lambda*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
        if(S == itgr->no_timescales-1) {
            itgr->integrate[S-1](eps/2., S-1, 1);
        }
        else itgr->integrate[S-1](eps/2., S-1, halfstep);
        if(halfstep != 1 && S != itgr->no_timescales-1) {
            update_momenta(itgr->mnls_per_ts[S], 2*itgr->lambda[S]*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
        }
    }

    if(S == itgr->no_timescales-1) {
        dohalfstep(tau, S);
    }
}

示例#6

显示文件

文件： test_atomic.c 项目： kostrzewa/openmp_tests

int main(void) {
  printf("%lf %lf %lf %lf %lf %lf %lf %lf\n",momenta.d1,momenta.d2,momenta.d3,momenta.d4,momenta.d5,momenta.d6,momenta.d7,momenta.d8);
 
  #pragma omp parallel
  {
    
  if( omp_get_thread_num() == 0 ) {
    printf("%d OpenMP threads!\n",omp_get_num_threads());
  }
     
  double in = 33;
  double out = 0;
  double res = 0;

  complex double a1 = 1.0 + I*2.0; // {1.0,2.0};
  complex double a2 = 3.0 + I*4.0; // {3.0,4.0};
  complex double a3 = 5.0 - I*6.0; // {5.0,6.0};
  complex double a4 = 7.0 + I*8.0; //{7.0;8.0};
  complex double a5 = 9.0 + I*10.0;// ; {9.0,10.0};
  
  #pragma omp for
  for(int i = 0; i < MAX; ++i) {
    waste_cycles(&in);
    update_momenta(momenta);
    waste_cycles(&in);
    update_momenta(momenta);
    waste_cycles(&in);
    update_momenta(momenta);
    waste_cycles(&in);
    update_momenta(momenta);
    waste_cycles(&in);
    update_momenta(momenta);
    waste_cycles(&in);
    update_momenta(momenta);
    waste_cycles(&in);
    update_momenta(momenta);
  }

  printf("thread %d %lf \n",omp_get_thread_num(),in);

  } /* OpenMP closing brace */

  printf("%lf %lf %lf %lf %lf %lf %lf %lf\n",momenta.d1,momenta.d2,momenta.d3,momenta.d4,momenta.d5,momenta.d6,momenta.d7,momenta.d8);
  return(0); 
}

示例#7

显示文件

文件： main.c 项目： ericaddison/Binary-SMBH-Encounter-Simulation

int main(int argc, char *argv[])
{


/* Clear Screen */
	system("clear");
	printf("\nBEMRI simulator!\n");
	
/* Variable Declarations */	
    BEMRI *b;
    flags f;
    iParams iPars;
    double dt1, dt, tmax, e[3], t, t0, r_min, t_RK4 = 0.0,tau, t_node, sim_time, E0, PbN, dt_test;
    double r_bemri, r_current, r_node, r_1, r_2, theta_N, ecc_N, theta0h, r_tid, r0;
    double E1h,E2h,l1h,l2h,e1h,e2h,dAdt, dt_temp, dt_min, fmax[2], alpha = 1e-5, chi, node_phase, E_last, a1h, a2h;
    double Ph_init,eh_init, Q[3][3], Qtt[3][3], T_pm[4];
	double R[3], m[3], timer=1e300;
    double test_rad, test_angle, del_ia, del_aop, frac = 1.0, max_hard_count, eh0;
    int xed, a = 0, N=1, single, minned, status, entered_node, passed_node, times_around, max_orbits, num_params;
    int m1NegY=0, hard_count, agent, amax, astep, nvar, zeroed, angle_counter = 0, inner_runs, ipsval = 1, past;
    double t_broken, Pb0, Esys0,Lsys0, eb0, Hrat = 3, ri, r_cm, percentage_as_double, gam_now, eb_prev, ab_prev, k1, k2;
    double e1h_prev, e2h_prev, timer0;
    long seed, iseed;
	double y[22],yscal[22],dydx[22], dscale, mconv, tconv;	//these are the vectors used for BS integration, 22 = 7*N+1
	double lan0, ia0, aop0, ab0;
	char fname[80];
	


/* for BS ODE integrator setup */
	for(int i=0;i<22;i++)
		yscal[i] = 1;
	
/* Vector from observer to system center of mass */	
	R[0] = 0; R[1] = 0; R[2] = 2.45027686e20;	//distance to Sgr A*

/* timing variables */	
    time_t start;
    time_t stop;
    struct timeval t1;

/* command line argument check */
	//possible flags:
	//	-c				run to completion
	//	-s [value]		use value for seed
	//	-t [value]		run to tmax = value*Ph
	//	-nd				do not stop sim when BEMRI is disrupted
	//	-N [value]		run simulation N times, do not output position or quadrupole data
	//	-so				suppress any screen output
    //  -PbN [value]	set Pb = value*tnode
    //	-th0 [value]	set theta0 = value
    //	-RK4			use RK4 integrator instead of BS2
    //	-fname [string]	use string as file name instead of BEMRI.dat
    //	-fpars [string] use string as filename to find input parameters
    //	-beta [value]	use beta = value
	//	-gam [value]	use gamma = value
    //	-ips [value]	run [value] runs per set of parameters, sampling initial phases
    //	-eh [value]		set BH orbit eccentricity
	//	-Htest			Test Heggie values
	//	-Hrat [value]	Use [value] for rp/a ratio in Heggie test
	//	-geo			Use geometricized units -- G = c = 1
	//	-ang			Randomize binary orientation angles
	
    b = (BEMRI *) malloc( sizeof(BEMRI) );
	if(args(argc,argv,&iPars,&f,&N,&frac,&seed,&PbN,fname,&eh,&ipsval,&Hrat))
		return 1;
	

/* initialize BEMRI parameters */
    init_params(b,f);

/* file pointer declarations */	
    FILE *results_fp, *Q_fp, *pos_fp;
    if(f.fnameflag == 0)
    	results_fp = fopen("BEMRI.dat","w");
    else
    	results_fp = fopen(fname,"w");

	if(f.cflag != 1 && f.Nflag != 1)
	{
		Q_fp = fopen("QP.dat","w");
		pos_fp = fopen("pos.dat","w");
	}

/* Chain declaration */
	C = (CHAIN *) malloc( sizeof(CHAIN) );
	
/* set seed if not given on command line */
	if(f.sflag == 0)
	{gettimeofday(&t1,NULL); seed = t1.tv_usec;}
	iseed = seed;		//save initial seed to output
	
	
	
/* master loop, changes parameters */
/* this is the total number of unique (gamma,inc,lan) values that will be run */
for(int ii=0;ii<N;ii++)
    {
		
		/* randomly assign angles if -ang flag is used */
		if(f.angflag)							// angle variation
		{
			iPars.lanA = 2*PI*ran2(&seed);
			iPars.incA = acos(2*ran2(&seed)-1);
		}
		/* if not using random angles OR a parameter file, then assign angles of zero */
		else if(!f.fParflag)
			iPars.lanA = iPars.incA = 0.0;
		
		/* assign random gamma value unless certain flags are used */ 
		if(!f.fParflag && !f.betaflag && !f.gamflag){
			iPars.gamma = (0.35 + 4.65*ran2(&seed));  // randomly assigning beta from being uniform over gamma
			iPars.beta = 1.0/iPars.gamma;
		}

/* starting the loop over all theta0 initial binary phase values */
  for(angle_counter = 0; angle_counter<ipsval; angle_counter++)
    {

/* set G and c, which will be reset if -geo is used */		
	c = 299792458e0;
	G = 6.673e-11;
		
/* reset timer and m1NegY */
	timer = 1e300;
	m1NegY = 0;

/* parameter setup */
    	init_params(b,f);	// initialize BEMRI parameters
	eb = 0.0;
	aop = 0.0;			// aop is redundant for circular orbits, always set to 0	
	lan = iPars.lanA;	// set dynamic value lan
	ia = iPars.incA;	// set dynamic value ia
	if(!f.th0flag && !f.fParflag && f.ipsflag)	// assign appropriate value of th0 if using -ips flag
		iPars.th0 = 2*PI * (float)angle_counter/ipsval;		//only set theta0 if th0flag has NOT been set

	
/* Setup for testing results from Heggie and Rasio paper */			
	if(f.Htflag == 1)
		{
			G = c = 1;
			ia = 0;
			lan = 0;
			aop = 0;
			m1 = m2 = 1;
			m3 = 1;
			ab = 1;
			eb = 0.0;
			eh = 1.0;
			rph = Hrat*ab;
			r0 = 100*rph;
			theta0h = -acos(2*rph/r0 - 1);
			Pb = 2*PI*sqrt(ab*ab*ab/(G*(m1+m2)));
		}
	
/* setup for elliptical BEMRI */
	else if(eh < 1)
		theta0h = PI;
		

/* Setup for parabolic orbit evolution */	
	else if(eh >= 1)
	{
		if(f.geoflag)								// if using geometricized units
		{
			mconv = G/(c*c);						// conversion factor for masses
			tconv = c;
			G = c = 1.0;							// set constants to 1
			//ab /= dscale;
			m1 *= mconv;							// convert mass values to meters
			m2 *= mconv;
			m3 *= mconv;
			Pb *= tconv;							// recalculate the binary period with new ab and masses
		}

		r_tid = ab*pow(m3/(m1+m2),1.0/3.0);			// calculate tidal radius r_tid
		rph = r_tid/iPars.beta;						// calculate pericenter distance for BEMRI orbit
		r0 = 200.0*rph;								// calculate r0 = initial separation
		theta0h = -acos(2.0*rph/r0 - 1.0);			// calculate corresponding true anomaly
		if(isnan(theta0h) || fabs(theta0h) < 2.0)	// see if the anomaly was in acceptable range
			theta0h = -2.0;							// if not, just set to -2.0 radians
		//theta0h = -3.0;
	}


/* recalculate BEMRI parameters using new values */		
	recalc_params(b);
	
/* initial conditions and node passage time */
    lh = sqrt(rph*(1+eh)*G*(m3*m3*m4*m4)/(m3+m4));		//initial angular momentum of SMBH orbit
    advance_orbit(&(b->binary_h),lh,theta0h);			//begin cm-SMBH orbit at theta0h
   
/* timestep tau and max simulation time tmax */ 
	if(f.Htflag == 1)
	{
		tau = Pb/pow(2,7);
		tmax = 1e100;
	}
    else if(eh<1)
    {
		tau = Pb/pow(2,7);
		tmax = 1*Pb;
    }
    else
    {
    	tau = Pb/pow(2,7);
		tmax = 1e100;		  
    }


    recalc_params(b);
// update little binary positions and velocities for orbit around hole
    lb = sqrt(rpb*(1+eb)*G*(m1*m1*m2*m2)/(m1+m2));		//initial angular momentum of BEMRI
    advance_orbit(&(b->binary_b),lb,iPars.th0);						//begin BEMRI at theta0
	BEMRI_CM_update(b);
    load_vectors(rr,vv,m,b);		//load initial values into working vectors
    dt = tau;
    dt1 = dt;
			   
/* Chain setup and testing */
	setup_chain(C,m,3);
	nvar = 6*(C->N-1);
		
/* BEMRI lifetime check */			
    if ( peters_lifetime(eb,ab,m1,m2) < 100*Ph )			//BEMRI lifetime too short, abort current run
    {status = 3;times_around = -1;}

		
/* other setup */
    eh0 = eh;
    t0 = 0;
    zeroed = 0;
    t_broken = 0.0;
	xed = 0;
	minned = 0;
	t_RK4 = 0.0;
	status = 0;
	entered_node = 0;
	passed_node = 0;
	times_around = 0;
	max_orbits = 1;
    a = -1;
	if(eh < 1)
		amax = (int)(abs((tmax-t0)/dt1));
	else
		amax = 1e6;
	astep = 1;//(int)ceil((amax/200000.0));		//output management
    start = time(NULL);
	max_hard_count = 20;
	hard_count = 0;
	Pb0 = Pb;
	ri = sqrt( pow(X4-X3,2) + pow(Y4-Y3,2) + pow(Z4-Z3,2) );	// initial distance from binary cm to SMBH
	k1 = G*m3*m1;
	k2 = G*m3*m2;
		
		
/* initial energy setup */			
	calc_energies(b,&E1h,&E2h);
    E0 = Eb;
	eb0 = eb;
	ab0 = ab;
    E_last = E0;			//initialize E_last
    calc_total_energy(b);					// compute total system energy
    calc_total_angular_momentum(b);			// compute total system ang. mom.
   	Esys0 = b->energy;
	Lsys0 = b->L;
		
/* print out some diagnostic info */
	if(!f.Nflag)
	{
		printf("\nbeta = %.3f",iPars.beta);
		printf("\ngamma = %.3f",iPars.gamma);
		printf("\nt_max = %.3e",tmax);
		printf("\ntheta0h = %.3e",theta0h);
		printf("\ntheta0 = %.10e",iPars.th0);
		printf("\nab = %.4e",ab);
		printf("\nPb = %.4e",Pb);
		printf("\nr_cm0 = %.4e",ri);
		printf("\nG = %.3e\nc = %.3e",G,c);
		printf("\nia = %.3f\tlan = %.3f\n",ia*180/PI,lan*180/PI);
		
		if(angle_counter==0) anykey();
	}

		
/* main simulation loop */
	while( f.cflag*times_around < 100 && times_around >= 0 && t_RK4 <= tmax )	//something should happen before this, but if not...
		{
		/* this while loop will run under one of two conditions:
			if the -c flag is used, then completion = 1 and tmax = HUGE, so it will run until
			times_around < 100. If the -t or no flag is used, then completion = 0 and the sim
			will run until t_RK4 = tmax. */
			
			gam_now = ah*(1-eh)/(ab*pow(m3/(m1+m2),1.0/3.0));	
			if( !f.Nflag )
			{
				printf("t_RK4 = %.3e\r",t_RK4);
				//printf("Y1 = %.3e\r",Y1);
				fflush(stdout);
			}

	/* upkeep */	
		a += 1;
       	E_last = Eb;		//store old BEMRI energy
		eb_prev = eb;
		e1h_prev = e1h;
		e2h_prev = e2h;
		ab_prev = ab;
			
	/* actual integration call */
		if(!f.bs2flag)
		{
			pack_y_vector_C(C,y);
			leap_derivs2(t_RK4,y,dydx);
			for(int i=0;i<nvar;i++)
				yscal[i]=FMAX(fabs(y[i])+fabs(dydx[i]*dt)+TINY,1);
			//bs_const_step(y,nvar,&t_RK4,dt1,&dt,1e-12,1e-6,yscal,1,leap_derivs2);
			bsstep(y,nvar,&t_RK4,&dt1,1e-12,1e-9,yscal,1,leap_derivs2);
			unpack_y_vector_C(C,y);
			if(a%10 == 0)
				check_chain(C);
			update_momenta(C);		
			update_positions(C);
		}
		else{
			//t_RK4 += N_body_main(rr,vv,m,dt1,3,&dt,1e-8,&minned);	//evolves orbit from time t to t+dt1 with initial step size dt
			pack_y_vector(rr,vv,m,y);
			RK4A_const_step(y,22,&t_RK4,dt1,&dt,1e-9,1e-6,Nbody_derivs1);
			unpack_y_vector(rr,vv,m,y);
		}		
			
        update_binaries(b,rr,vv);								//copies values from v and r into the structures
        CM_values(b);										//calculate values for the center of mass
        if(dt > dt1)										//keeps integration step size at maximum of dt1
			dt = dt1;
		timer -= dt1;										// update timer value	
			
	/* calculate desired values */	
		calc_angles(&(b->binary_b));			// calculate current orbital angles
        past = true_anomaly(&(b->binary_h));	// calculate current true anomaly of BH orbit
        calc_energies(b,&E1h,&E2h);				// compute current binding energies
        calc_angular_momenta(b,&l1h,&l2h);		// compute current pairwise angular momenta
        calc_ecc(b,&e1h,&e2h,E1h,E2h,l1h,l2h);	// compute current pairwise eccentricities
        calc_total_energy(b);					// compute total system energy
        calc_total_angular_momentum(b);			// compute total system ang. mom.
        r_bemri = sqrt( pow(X1-X2,2) + pow(Y1-Y2,2) + pow(Z1-Z2,2));	//BEMRI separation
        r_1 = sqrt( pow(X1-X3,2) + pow(Y1-Y3,2) + pow(Z1-Z3,2) );		// m1-SMBH separation
        r_2 = sqrt( pow(X2-X3,2) + pow(Y2-Y3,2) + pow(Z2-Z3,2) );		// m2-SMBH separation
		r_current = min(r_1,r_2);										//current distance from SMBH to closest BEMRI component
		r_cm = sqrt( pow(X4-X3,2) + pow(Y4-Y3,2) + pow(Z4-Z3,2) );
    	ab = E_to_a(Eb,m1,m2); 					//update semi-major axis of the BEMRI
    	ah = E_to_a(Eh,m3,m1+m2);  				//update semi-major axis of the BH orbit
    	Pb = 2*PI*sqrt(pow(ab,3)/(G*(m1+m2)));	//current orbital periods
    	Ph = 2*PI*sqrt(pow(ah,3)/(G*(m1+m2+m3)));
		b->binary_b.rp = ab*(1-eb);				// compute new binary periapse
		b->binary_h.rp = ah*(1-eh);				// compute new BEMRI periapse
		point_mass_QP(m,rr,3,R,Q,Qtt);			// compute GW output
		if(eh<1) r_node = ah*(1-eh*eh);			//SMBH-BEMRI separation when BEMRI is at the node

			
	/* stopping conditions */
	if(eh0 < 1 || f.ipsflag==0)				// for elliptical BEMRIs or single runs
	{
		if( passed_node && theta_h >= PI )	//if BEMRI has passed the node AND passed apoapse
		{
			passed_node = 0;				//reset passed_node
			times_around += 1;				//increment times_around
           	if( (Eb/E_last) > 1 )			//compare current Eb to last orbit's initial Eb
          		hard_count++;				//the BEMRI has hardened, increment hard_count
           	else
           		hard_count = 0;				//the BEMRI has softened, reset hard_count
           	if( hard_count == max_hard_count)
           	{
           		status = 2;
           		break;
           	}
          	E_last = Eb;
		}
		if(r_current < r_node && entered_node == 0)	//updates values for overall while loop condition, don't want to update while BEMRI is in the node.
		{
			entered_node = 1;	//then the node has been entered
    		theta_N = theta_b;		//save the phase when BEMRI entered the node
    		ecc_N = eb;
		}
		if(r_current > r_node && entered_node == 1)	//if BEMRI has just left the node
		{
			passed_node = 1;						//set passed_node
			entered_node = 0;						//reset entered_node
		}
	}
			

	/* if m1 has gone into negative y territory, start the clock */
	if(Y1<=0 && !m1NegY){
		m1NegY = 1;
		timer0 = t_RK4;
		timer = t_RK4;
		printf("\nTimer started!\n");
	} 

	
	

	/* if timer has gone off, then break */
	if(timer<=0) 
        {
//	printf("TIMER! eb=%.3e | e1h = %.3e | e2h = %.2e\n",eb,e1h,e2h);
         	if(fabs(eb_prev-eb)/eb <1e-9 && k1/r_1 > Eb && k2/r_2 > Eb)
                {
                	status = 0;
                        printf("No change condition met\n");
                        break;
                }
		else if(fabs(e1h_prev-e1h)/e1h < 1e-9 || fabs(e2h_prev-e2h)/e2h < 1e-9)
		{
                	status = 0;
                        printf("No change condition met\n");
                        break;
                }	
        }		
			
	if(f.Htflag == 1 && r_cm > ri )	// if 
        {
           	status = 0;
           	break;
        }			
        if(r_bemri < 2*R_NS && G < 1)
        {
           	status = -1;
           	break;
        }
		
		//Energy conservation check
        if( fabs(Esys0 - b->energy) / fabs(Esys0) > 1e-6 )
        {
        	status = -3;
			printf("\nenergy violation\nEsys0 = %.10e\nEsys(t) = %.10e\n",Esys0,b->energy);
        	break;
        }
        
		// angular momentum conservation check 
		if( fabs(Lsys0 - b->L) / fabs(Lsys0) > 1e-6 )
        {
        	status = -4;
			printf("\nAng. mom violation\nLsys0 = %.10e\nLsys(t) = %.10e\n",Lsys0,b->L);
        	break;
        }


/* print progress */	
		if(f.cflag == 1 && f.soflag==0)
		{
			printf("Current Theta: %.3f\tOrbits Completed: %d\r",theta_h,times_around);
			fflush(stdout);
		}
		else if(f.soflag==0 && eh0 < 1)
		{
			if(print_percentage(a,ii,t_RK4,tmax,ipsval))
			{
				xed = 1;
				status = -2;
				fflush(stdout);
				break;
			}
		}


	/* Full Data Output */
		if(a%astep == 0 && f.cflag != 1 && f.Nflag != 1)
		{	
			output_time(t_RK4+dt1,pos_fp);
			output_positions(rr,b,pos_fp);
			fprintf(pos_fp,"%.10e,%.10e,%.10e,%.10e,%.10e,",Eb,Eh,b->energy,b->L,eb);
			fprintf(pos_fp,"\n");
			output_QP(Qtt, Q_fp);
		}


	} // END OF CURRENT SINGLE RUN

/* initial clean up */					
	stop = time(NULL);
	sim_time = difftime(stop,start)/60.0;

	a1h = E_to_a(E1h,m1,m3);
	a2h = E_to_a(E2h,m2,m3);

/* print percentage if eh0 >= 1 */		
if(f.soflag==0 && eh0 >= 1)
{
	print_percentage(0,ii,angle_counter,ipsval,N);
	printf("\nde = %.10e\n",eb - eb0);
}


/* assign proper status if came out with a zero */
if(status==0)
{
	if(Eb>0)	// binary disrupted
		status=0;
	else if(Eh < 0)	// binary survived, bound to SMBH
		status=1;
	else
		status=2;
}


/* final state output */
	if(Eb < 0)
	{ e1h = e2h = -1; }
	fprintf(results_fp,"%d,%ld,",status,iseed);
	fprintf(results_fp,"%.4e,%.4e",t_RK4,timer0);
	fprintf(results_fp,"%.10e,%.10e,%.10e,%.10e,%.10e,",eb0,eb,eh,e1h,e2h);
	fprintf(results_fp,"%.10e,%.10e,%.10e,%.10e,%.10e,",ab0,ab,ah,a1h,a2h);
	fprintf(results_fp,"%.10e,%.10e,%.10e,%.10e,%.10e,",Eh,E1h,E2h,E0,Esys0);
	fprintf(results_fp,"%.10e,%.10e,%.10e,%.10e,",lh,l1h,l2h,Lsys0);
   	fprintf(results_fp,"%.10e,%.10e,%.10e,%.10e",iPars.incA,iPars.lanA,iPars.th0,iPars.gamma);
    fprintf(results_fp,"\n");
		
/* status codes: */
	//-4: ang. mom. conservation violation
    //-3: energy conservation violation
	//-2: run manually canceled
	//-1: simulation halted due to BEMRI proximity
    //0 : good simulation, BEMRI disrupted
    //1 : good simulation, BEMRI survived but bound to SMBH
    //2 : good simulation, BEMRI survived and remained unbound
    //3 : lifetime for given parameters was too short

}	// END OF CURRENT ANGLE_COUNTER LOOP 

/* 100% print */
	if(xed == 0 && f.cflag == 0 && f.soflag==0)
	{
		fflush(stdout);
		printf("Running %d of %d... (100%%)\n\r",ii+1,N);
		printf("\n\r");
	}

}	// END OF OVERALL N LOOP
	
/* file clean up */
	fclose(results_fp);
	if(f.cflag != 1 && f.Nflag != 1)
	{
		fclose(pos_fp);
		fclose(Q_fp);
	}
	printf("\nfcount = %d\n",fcount);
	free(b);
 	free(C);	
	return 0;
}

示例#8

显示文件

文件： p4_Fforce.C 项目： DeanHowarth/QUDA-CPS

ForceArg Fp4::EvolveMomFforce(Matrix *mom, Vector *frm, Float mass, Float dt){
  char *fname = "EvolveMomFforce(M*,V*,F,F,F)";
  ERR.NotImplemented(cname,fname);
  ForceArg  Fdt;

#if 0
  VRB.Func(cname,fname);

#ifdef PROFILE
  Float dtime;
  ParTrans::PTflops=0;
  ForceFlops=0;
#endif
  size_t size;
  //	int nflops=0;
  static int vax_len = 0;
  if (vax_len == 0)
    vax_len = GJP.VolNodeSites()*VECT_LEN/VAXPY_UNROLL;

  size = GJP.VolNodeSites()/2*FsiteSize()*sizeof(Float);
  Vector *X = (Vector *)smalloc(2*size);
  //    printf("X=%p\n",X);
  Vector *X_e = X;                             // even sites
  Vector *X_o = X+GJP.VolNodeSites()/2;  // odd sites

  // The argument frm should have the CG solution.
  // The FstagTypes protected pointer f_tmp should contain Dslash frm

  moveMem(X_e, frm, size);
#ifdef DEBUGGING
  f_tmp = frm+GJP.VolNodeSites()/2; // debugging only
#endif
  moveMem(X_o, f_tmp, size);
  Fconvert(X, CANONICAL, STAG);

  Convert(STAG);  // Puts staggered phases into gauge field.
    
    
  int N; // N can be 1, 2 or 4.
  N = 4;
  if (GJP.VolNodeSites()>256)
    N = 2;  
  else if (GJP.VolNodeSites()>512)
    N = 1;
  VRB.Flow(cname,fname,"N=%d\n",N);

  enum{plus=0, minus=1, n_sign=2};

  // Array in which to accumulate the force term:
  // this must be initialised to zero 
#if 0
  Matrix **force = (Matrix**)amalloc(sizeof(Matrix), 2, 4, GJP.VolNodeSites());
  if(!force) ERR.Pointer(cname, fname, "force");
#else
  size = GJP.VolNodeSites()*sizeof(Matrix);
  Matrix *force[4];
  for(int i = 0;i<4;i++)
    force[i] = (Matrix *)v_alloc("force[i]",size);
#endif
  for(int i=0; i<4; i++)
    for(int s=0; s<GJP.VolNodeSites(); s++) force[i][s].ZeroMatrix();
  ParTransAsqtad parallel_transport(*this);


  // Vector arrays for which we must allocate memory

#if 0
  Vector ***Pnu = (Vector***)amalloc(sizeof(Vector), 3, n_sign, N, GJP.VolNodeSites());
  if(!Pnu) ERR.Pointer(cname, fname, "Pnu");
    
  Vector ****P3 = (Vector****)amalloc(sizeof(Vector), 4, n_sign, n_sign, N, GJP.VolNodeSites());
  if(!P3) ERR.Pointer(cname, fname, "P3");
  Vector ****Prhonu = (Vector****)amalloc(sizeof(Vector), 4, n_sign, n_sign, N, GJP.VolNodeSites());
  if(!Prhonu) ERR.Pointer(cname, fname, "Prhonu");
  Vector *****P5 = (Vector*****)amalloc(sizeof(Vector), 5, n_sign, n_sign, n_sign, N, GJP.VolNodeSites());
  if(!P5) ERR.Pointer(cname, fname, "P5");
  Vector ******P7 = (Vector******)amalloc(sizeof(Vector), 6, n_sign, n_sign, n_sign, n_sign, N, GJP.VolNodeSites());
  if(!P7) ERR.Pointer(cname, fname, "P7");
  Vector ******Psigma7 = (Vector******)amalloc(sizeof(Vector), 6, n_sign, n_sign, n_sign, n_sign, N, GJP.VolNodeSites());
  if(!Psigma7) ERR.Pointer(cname, fname, "Psigma7");

    
  // These vectors can be overlapped with previously allocated memory
    
  Vector **Pnununu = Prhonu[0][0];
  Vector ***Pnunu = Psigma7[0][0][0];;
  Vector ****Pnu5 = P7[0][0];
  Vector ****Pnu3 = P7[0][0];
  Vector *****Prho5 = Psigma7[0];
  Vector *****Psigmarhonu = Psigma7[0];
#else
  size = GJP.VolNodeSites()*sizeof(Vector);
  Vector *Pnu[n_sign][N];
  Vector *P3[n_sign][n_sign][N];
  Vector *Prhonu[n_sign][n_sign][N];
  Vector *P5[n_sign][n_sign][n_sign][N];
  Vector *P7[n_sign][n_sign][n_sign][n_sign][N];
  Vector *Psigma7[n_sign][n_sign][n_sign][n_sign][N];
  Vector *Pnununu[N];
  Vector *Pnunu[n_sign][N];
  Vector *Pnu5[n_sign][n_sign][N];
  Vector *Pnu3[n_sign][n_sign][N];
  Vector *Prho5[n_sign][n_sign][n_sign][N];
  Vector *Psigmarhonu[n_sign][n_sign][n_sign][N];
  //printf("Pnu=%p Psigmarhonu=%p\n",Pnu,Psigmarhonu);

  for(int w = 0;w<N;w++){
    for(int i = 0;i<n_sign;i++){
      Pnu[i][w]= (Vector *)v_alloc("Pnu",size);
      for(int j = 0;j<n_sign;j++){
	P3[i][j][w]= (Vector *)v_alloc("P3",size);
	Prhonu[i][j][w]= (Vector *)v_alloc("Prhonu",size);
	for(int k = 0;k<n_sign;k++){
	  P5[i][j][k][w]= (Vector *)v_alloc("P5",size);
	  for(int l = 0;l<n_sign;l++){
	    P7[i][j][k][l][w]= (Vector *)v_alloc("P7",size);
	    Psigma7[i][j][k][l][w]= (Vector *)v_alloc("Psigma7",size);
	  }
	  Prho5[i][j][k][w] = Psigma7[0][i][j][k][w];
	  Psigmarhonu[i][j][k][w] = Psigma7[0][i][j][k][w];
	}
	Pnu5[i][j][w]=P7[0][0][i][j][w];
	Pnu3[i][j][w]=P7[0][0][i][j][w];
      }
      Pnunu[i][w]=Psigma7[0][0][0][i][w];
    }
    Pnununu[w]=Prhonu[0][0][w];
  }

#endif
    


  // input/output arrays for the parallel transport routines
  Vector *vin[n_sign*N], *vout[n_sign*N];
  int dir[n_sign*N];
	
  int mu[N], nu[N], rho[N], sigma[N];   // Sets of directions
  int w;                                // The direction index 0...N-1
  int ms, ns, rs, ss;                   // Sign of direction
  bool done[4] = {false,false,false,false};  // Flags to tell us which 
  // nu directions we have done.
	    
#ifdef PROFILE
  dtime = -dclock();
#endif
  for (int m=0; m<4; m+=N){                     	    // Loop over mu
    for(w=0; w<N; w++) mu[w] = (m+w)%4; 

    for (int n=m+1; n<m+4; n++){                        // Loop over nu
      for(w=0; w<N; w++) nu[w] = (n+w)%4;

      // Pnu = U_nu X

      for(int i=0; i<N; i++){
	vin[i] = vin[i+N] = X;
	dir[n_sign*i] = n_sign*nu[i]+plus;        // nu_i
	dir[n_sign*i+1] = n_sign*nu[i]+minus;    // -nu_i
	vout[n_sign*i] = Pnu[minus][i];
	vout[n_sign*i+1] = Pnu[plus][i];
      }
      parallel_transport.run(n_sign*N, vout, vin, dir);

      // P3 = U_mu Pnu
      // ms is the nu sign index, ms is the mu sign index,
      // w is the direction index
      for(int i=0; i<N; i++){
	dir[n_sign*i] = n_sign*mu[i]+plus;        // mu_i
	dir[n_sign*i+1] = n_sign*mu[i]+minus;    // -mu_i
      }
      for(ns=0; ns<n_sign; ns++){               // ns is the sign of nu
	for(int i=0; i<N; i++){
	  vin[n_sign*i] = vin[n_sign*i+1] = Pnu[ns][i];
	  vout[n_sign*i] = P3[plus][ns][i];
	  vout[n_sign*i+1] = P3[minus][ns][i];
	}
	parallel_transport.run(n_sign*N, vout, vin, dir);
      }
	    
      for(w=0; w<N; w++)
	for(ns=0; ns<n_sign; ns++){
	  force_product_sum(P3[plus][ns][w], Pnu[ns][w],
			    GJP.staple3_coeff(),
			    force[mu[w]]);
	}

      for(int r=n+1; r<n+4; r++){                     // Loop over rho
	bool nextr = false;
	for(w=0; w<N; w++){
	  rho[w] = (r+w)%4;		
	  if(rho[w]==mu[w]){
	    nextr = true;
	    break;
	  }
	}
	if(nextr) continue;

	for(w=0; w<N; w++){                         // sigma
	  for(int s=rho[w]+1; s<rho[w]+4; s++){
	    sigma[w] = s%4;
	    if(sigma[w]!=mu[w] && sigma[w]!=nu[w]) break;
	  }
	}

	// Prhonu = U_rho Pnu 

	for(int i=0; i<N; i++){
	  dir[n_sign*i] = n_sign*rho[i]+plus;        
	  dir[n_sign*i+1] = n_sign*rho[i]+minus;    
	}
	for(ns=0; ns<n_sign; ns++){
	  for(int i=0; i<N; i++){
	    vin[n_sign*i] = vin[n_sign*i+1] = Pnu[ns][i];
	    vout[n_sign*i] = Prhonu[ns][minus][i];
	    vout[n_sign*i+1] = Prhonu[ns][plus][i];
	  }
	  parallel_transport.run(n_sign*N, vout, vin, dir);
	}

	// P5 = U_mu Prhonu

	for(int i=0; i<N; i++){
	  dir[n_sign*i] = n_sign*mu[i]+plus;        
	  dir[n_sign*i+1] = n_sign*mu[i]+minus;    
	}
	for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) {
	  for(int i=0; i<N; i++){
	    vin[n_sign*i] = vin[n_sign*i+1] = Prhonu[ns][rs][i];
	    vout[n_sign*i] = P5[plus][ns][rs][i];
	    vout[n_sign*i+1] = P5[minus][ns][rs][i];
	  }
	  parallel_transport.run(n_sign*N, vout, vin, dir);
	}

	// F_mu += P5 Prhonu^dagger
		      
	for(w=0; w<N; w++)
	  for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++)
	    force_product_sum(P5[plus][ns][rs][w],
			      Prhonu[ns][rs][w],
			      GJP.staple5_coeff(),
			      force[mu[w]]);

	// Psigmarhonu = U_sigma P_rhonu
		
	for(int i=0; i<N; i++){
	  dir[n_sign*i] = (n_sign*sigma[i]);        
	  dir[n_sign*i+1] = (n_sign*sigma[i]+1);    
	}
	for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++){
	  for(int i=0; i<N; i++){
	    vin[n_sign*i] = vin[n_sign*i+1] = Prhonu[ns][rs][i];
	    vout[n_sign*i] = Psigmarhonu[ns][rs][minus][i];
	    vout[n_sign*i+1] = Psigmarhonu[ns][rs][plus][i];
	  }
	  parallel_transport.run(n_sign*N, vout, vin, dir);
	}

	// P7 = U_mu P_sigmarhonu
	for(int i=0; i<N; i++){
	  dir[n_sign*i] = n_sign*mu[i]+plus;        
	  dir[n_sign*i+1] = n_sign*mu[i]+minus;    
	}
	for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) for(ss=0; ss<n_sign; ss++){
	  for(int i=0; i<N; i++){
	    vin[n_sign*i] = vin[n_sign*i+1] = Psigmarhonu[ns][rs][ss][i];
	    vout[n_sign*i] = P7[plus][ns][rs][ss][i];
	    vout[n_sign*i+1] = P7[minus][ns][rs][ss][i];
	  }
	  parallel_transport.run(n_sign*N, vout, vin, dir);
	}

	// F_mu -= P7 Psigmarhonu^\dagger
		
	for(w=0; w<N; w++)
	  for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) for(ss=0; ss<n_sign; ss++)
	    force_product_sum(P7[plus][ns][rs][ss][w],
			      Psigmarhonu[ns][rs][ss][w],
			      GJP.staple7_coeff(),
			      force[mu[w]]);

	// F_sigma += P7 Psigmarhonu^\dagger
	// N.B. this is the same as one of the previous products.
		
	for(w=0; w<N; w++)
	  for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) 
	    force_product_sum(P7[plus][ns][rs][minus][w],
			      Psigmarhonu[ns][rs][minus][w],
			      -GJP.staple7_coeff(),
			      force[sigma[w]]);

	// F_sigma += Psigmarhonu P7^\dagger
		
	for(w=0; w<N; w++)
	  for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) 
	    force_product_sum(Psigmarhonu[ns][rs][minus][w],
			      P7[minus][ns][rs][minus][w],
			      -GJP.staple7_coeff(),
			      force[sigma[w]]);

	// Psigma7 = U_sigma P7 
	for(int i=0; i<N; i++){
	  dir[n_sign*i] = (n_sign*sigma[i]);        
	  dir[n_sign*i+1] = (n_sign*sigma[i]+1);    
	}
	for(ms=0; ms<n_sign; ms++) for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++){
	  for(int i=0; i<N; i++){
	    vin[n_sign*i] = P7[ms][ns][rs][plus][i];
	    vin[n_sign*i+1] = P7[ms][ns][rs][minus][i];
	    vout[n_sign*i] = Psigma7[ms][ns][rs][plus][i];
	    vout[n_sign*i+1] = Psigma7[ms][ns][rs][minus][i];
	  }
	  parallel_transport.run(n_sign*N, vout, vin, dir);
	}

	// F_sigma += Fsigma7 Frhonu^\dagger

	for(w=0; w<N; w++)
	  for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) 
	    force_product_sum(Psigma7[plus][ns][rs][plus][w],
			      Prhonu[ns][rs][w],
			      -GJP.staple7_coeff(),
			      force[sigma[w]]);

	// F_sigma += Frhonu Fsigma7^\dagger

	for(w=0; w<N; w++)
	  for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) 
	    force_product_sum(Prhonu[ns][rs][w],
			      Psigma7[minus][ns][rs][plus][w],
			      -GJP.staple7_coeff(),
			      force[sigma[w]]);

	// P5 += c_7/c_5 Psigma7

	if(GJP.staple5_coeff()!=0.0){
	  Float c75 = -GJP.staple7_coeff()/GJP.staple5_coeff();
	  for(ms=0; ms<n_sign; ms++) 
	    for(ns=0; ns<n_sign; ns++) 
	      for(rs=0; rs<n_sign; rs++) 
		for(ss=0; ss<n_sign; ss++) 
		  for(w=0; w<N; w++)
		    vaxpy3(P5[ms][ns][rs][w],&c75, Psigma7[ms][ns][rs][ss][w], P5[ms][ns][rs][w], vax_len);
	  //			P5[ms][ns][rs][w]->FTimesV1PlusV2(-GJP.staple7_coeff()/GJP.staple5_coeff(), Psigma7[ms][ns][rs][ss][w], P5[ms][ns][rs][w], GJP.VolNodeSites()*VECT_LEN);
	  ForceFlops += 2*GJP.VolNodeSites()*VECT_LEN*N*n_sign*n_sign*n_sign*n_sign;
	}

	// F_rho -= P5 Prhonu^\dagger
	for(w=0; w<N; w++)
	  for(ns=0; ns<n_sign; ns++)
	    force_product_sum(P5[plus][ns][minus][w],
			      Prhonu[ns][minus][w],
			      -GJP.staple5_coeff(),
			      force[rho[w]]);

	// F_rho -= Prhonu P5^\dagger
		    
	for(w=0; w<N; w++)
	  for(ns=0; ns<n_sign; ns++)
	    force_product_sum(Prhonu[ns][minus][w],
			      P5[minus][ns][minus][w],
			      -GJP.staple5_coeff(),
			      force[rho[w]]);

	// Prho5 = U_rho P5

	for(int i=0; i<N; i++){
	  dir[n_sign*i] = n_sign*rho[i]+plus;        
	  dir[n_sign*i+1] = n_sign*rho[i]+minus;    
	}
	for(ms=0; ms<n_sign; ms++) for(ns=0; ns<n_sign; ns++){
	  for(int i=0; i<N; i++){
	    vin[n_sign*i] = P5[ms][ns][plus][i];
	    vin[n_sign*i+1] = P5[ms][ns][minus][i];
	    vout[n_sign*i] = Prho5[ms][ns][plus][i];
	    vout[n_sign*i+1] = Prho5[ms][ns][minus][i];
	  }
	  parallel_transport.run(n_sign*N, vout, vin, dir);
	}

	// F_rho -= Prho5 Pnu^\dagger
		
	for(w=0; w<N; w++)
	  for(ns=0; ns<n_sign; ns++)
	    force_product_sum(Prho5[plus][ns][plus][w],
			      Pnu[ns][w],
			      -GJP.staple5_coeff(),
			      force[rho[w]]);

	// F_rho -= Pnu Prho5^\dagger
		
	for(w=0; w<N; w++)
	  for(ns=0; ns<n_sign; ns++)
	    force_product_sum(Pnu[ns][w],
			      Prho5[minus][ns][plus][w],
			      -GJP.staple5_coeff(),
			      force[rho[w]]);
		
	// P3 += c_5/c_3 Prho5

	if(GJP.staple3_coeff()!=0.0){		
	  Float c53 = -GJP.staple5_coeff()/GJP.staple3_coeff();
	  for(ms=0; ms<n_sign; ms++) 
	    for(ns=0; ns<n_sign; ns++) 
	      for(rs=0; rs<n_sign; rs++) 
		for(w=0; w<N; w++)
		  vaxpy3(P3[ms][ns][w],&c53,Prho5[ms][ns][rs][w], P3[ms][ns][w], vax_len);
	  //			P3[ms][ns][w]->FTimesV1PlusV2(-GJP.staple5_coeff()/GJP.staple3_coeff(), Prho5[ms][ns][rs][w], P3[ms][ns][w], GJP.VolNodeSites()*VECT_LEN);
	  ForceFlops += 2*GJP.VolNodeSites()*VECT_LEN*N*n_sign*n_sign*n_sign;
	}

      } // rho+sigma loop

      // Pnunu = U_nu Pnu

      for(int i=0; i<N; i++){
	dir[n_sign*i] = n_sign*nu[i]+plus;        
	dir[n_sign*i+1] = n_sign*nu[i]+minus;    
      }
      for(int i=0; i<N; i++){
	vin[n_sign*i] = Pnu[minus][i];
	vin[n_sign*i+1] = Pnu[plus][i];
	vout[n_sign*i] = Pnunu[minus][i];
	vout[n_sign*i+1] = Pnunu[plus][i];
      }
      parallel_transport.run(n_sign*N, vout, vin, dir);

      // P5 = U_mu Pnunu

      for(int i=0; i<N; i++){
	dir[n_sign*i] = n_sign*mu[i]+plus;        
	dir[n_sign*i+1] = n_sign*mu[i]+minus;    
      }
      for(ns=0; ns<n_sign; ns++){
	for(int i=0; i<N; i++){
	  vin[n_sign*i] = Pnunu[ns][i];
	  vin[n_sign*i+1] = Pnunu[ns][i];
	  vout[n_sign*i] = P5[plus][ns][0][i];
	  vout[n_sign*i+1] = P5[minus][ns][0][i];
	}
	parallel_transport.run(n_sign*N, vout, vin, dir);
      }

      // F_mu += P5 Pnunu^\dagger

      for(w=0; w<N; w++)
	for(ns=0; ns<n_sign; ns++)
	  force_product_sum(P5[plus][ns][0][w],
			    Pnunu[ns][w],
			    GJP.Lepage_coeff(),
			    force[mu[w]]);

      // F_nu -= P5 Pnunu^\dagger
      // N.B. this is the same as one of the previous products
	    
      for(w=0; w<N; w++)
	force_product_sum(P5[plus][minus][0][w],
			  Pnunu[minus][w],
			  -GJP.Lepage_coeff(),
			  force[nu[w]]);
	    
      // F_nu -= Pnunu P5^\dagger
	    
      for(w=0; w<N; w++)
	force_product_sum(Pnunu[minus][w],
			  P5[minus][minus][0][w],
			  -GJP.Lepage_coeff(),
			  force[nu[w]]);

      // Pnu5 = U_nu P5

      for(int i=0; i<N; i++){
	dir[n_sign*i] = n_sign*nu[i]+plus;        
	dir[n_sign*i+1] = n_sign*nu[i]+minus;    
      }
      for(ms=0; ms<n_sign; ms++){
	for(int i=0; i<N; i++){
	  vin[n_sign*i] =   P5[ms][plus][0][i]; 
	  vin[n_sign*i+1] = P5[ms][minus][0][i];
	  vout[n_sign*i] =   Pnu5[ms][plus][i];
	  vout[n_sign*i+1] = Pnu5[ms][minus][i];
	}
	parallel_transport.run(n_sign*N, vout, vin, dir);
      }

      // F_nu -= Pnu5 Pnu^\dagger

      for(w=0; w<N; w++)
	force_product_sum(Pnu5[plus][plus][w],
			  Pnu[plus][w],
			  -GJP.Lepage_coeff(),
			  force[nu[w]]);

      // F_nu -= Pnu Pnu5^\dagger

      for(w=0; w<N; w++)
	force_product_sum(Pnu[plus][w],
			  Pnu5[minus][plus][w],
			  -GJP.Lepage_coeff(),
			  force[nu[w]]);

      // P3 += c_L/c_3 Pnu5

      if(GJP.staple3_coeff()!=0.0){
	Float cl3 = -GJP.Lepage_coeff()/GJP.staple3_coeff();
	for(ms=0; ms<n_sign; ms++) 
	  for(ns=0; ns<n_sign; ns++) 
	    for(w=0; w<N; w++)
	      vaxpy3(P3[ms][ns][w],&cl3,Pnu5[ms][ns][w],P3[ms][ns][w], vax_len);
	//		   		P3[ms][ns][w]->FTimesV1PlusV2(-GJP.Lepage_coeff()/GJP.staple3_coeff(), Pnu5[ms][ns][w], P3[ms][ns][w], GJP.VolNodeSites()*VECT_LEN);
	ForceFlops += 2*GJP.VolNodeSites()*VECT_LEN*N*n_sign*n_sign;
      }

      // F_nu += P3 Pnu^\dagger

      for(w=0; w<N; w++)
	force_product_sum(P3[plus][minus][w],
			  Pnu[minus][w],
			  -GJP.staple3_coeff(),
			  force[nu[w]]);

      // F_nu +=  Pnu P3^\dagger

      for(w=0; w<N; w++)
	force_product_sum(Pnu[minus][w],
			  P3[minus][minus][w],
			  -GJP.staple3_coeff(),
			  force[nu[w]]);
	    
      // Pnu3 = U_nu P3

      for(int i=0; i<N; i++)
	dir[i] = n_sign*nu[i]+plus;        
      for(ms=0; ms<n_sign; ms++){
	for(int i=0; i<N; i++){
	  vin[i] = P3[ms][plus][i]; 
	  vout[i] = Pnu3[ms][plus][i];
	}
	parallel_transport.run(N, vout, vin, dir);
      }

      // F_nu += Pnu3 X^\dagger

      for(w=0; w<N; w++)
	force_product_sum(Pnu3[plus][plus][w], X,
			  -GJP.staple3_coeff(),
			  force[nu[w]]);

      // F_nu += X Pnu3^\dagger

      for(w=0; w<N; w++)
	force_product_sum(X, Pnu3[minus][plus][w], 
			  -GJP.staple3_coeff(),
			  force[nu[w]]);

      // This stuff is to be done once only for each value of nu[w].
      // Look for  N nu's that haven't been done before.

      bool nextn = false;
      for(w=0; w<N; w++)
	if(done[nu[w]]){
	  nextn = true;
	  break;
	}
      if(nextn) continue;
      for(w=0; w<N; w++) done[nu[w]] = true;

      // Got N new nu's, so do some stuff...
	    
      // F_nu += Pnu X^\dagger

      for(w=0; w<N; w++)
	force_product_sum(Pnu[minus][w], X,
			  GJP.KS_coeff(),
			  force[nu[w]]);

      // F_nu += Pnunu Pnu^\dagger

      for(w=0; w<N; w++)
	force_product_sum(Pnunu[minus][w], Pnu[plus][w],
			  -GJP.Naik_coeff(),
			  force[nu[w]]);

      // F_nu += Pnu Pnunu^\dagger
	    
      for(w=0; w<N; w++)
	force_product_sum(Pnu[minus][w], Pnunu[plus][w],
			  GJP.Naik_coeff(),
			  force[nu[w]]);

      // Pnununu = U_nu Pnunu

      for(int i=0; i<N; i++){
	dir[i] = n_sign*nu[i]+plus;        
	vin[i] = Pnunu[minus][i]; 
	vout[i] = Pnununu[i];
      }
      parallel_transport.run(N, vout, vin, dir);
	    
      // F_nu += Pnununu X^\dagger
		
      for(w=0; w<N; w++)
	force_product_sum(Pnununu[w], X,
			  GJP.Naik_coeff(),
			  force[nu[w]]);
		
	    

    } // nu loop
  } // mu loop


    // Now that we have computed the force, we can update the momenta

  //	nflops +=ParTrans::PTflops + ForceFlops;

#ifdef PROFILE
  dtime += dclock();
  int nflops = ParTrans::PTflops + ForceFlops;
  printf("%s:%s:",cname,fname);
  print_flops(nflops,dtime);
#endif

  Fdt = update_momenta(force, dt, mom);


  // Tidy up
    
#if 0
  sfree(Pnu);
  sfree(P3);
  sfree(Prhonu);
  sfree(P5);
  sfree(P7);
  sfree(Psigma7);
#else
  for(int w = 0;w<N;w++){
    for(int i = 0;i<n_sign;i++){
      v_free(Pnu[i][w]);
      for(int j = 0;j<n_sign;j++){
	v_free(P3[i][j][w]);
	v_free(Prhonu[i][j][w]);
	for(int k = 0;k<n_sign;k++){
	  v_free(P5[i][j][k][w]);
	  for(int l = 0;l<n_sign;l++){
	    v_free(P7[i][j][k][l][w]);
	    v_free(Psigma7[i][j][k][l][w]);
	  }
	}
      }
    }
  }
#endif

  for(int i = 0;i<4;i++) v_free(force[i]);    
  sfree(X);

  Convert(CANONICAL);
#endif   

  return Fdt;
}

示例#9

显示文件

文件： integrator.c 项目： bknippsch/tmLQCD

void integrate_omf4(const double tau, const int S, const int halfstep) {
    int i,j=0;
    integrator * itgr = &Integrator;
    double eps;

    if(S == itgr->no_timescales-1) {
        dohalfstep(tau, S);
    }
    eps = tau/((double)itgr->n_int[S]);

    if(S == 0) {

        for(j = 1; j < itgr->n_int[0]; j++) {
            update_gauge(omf4_rho*eps, &itgr->hf);
            update_momenta(itgr->mnls_per_ts[0], omf4_lamb*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
            update_gauge(omf4_theta*eps, &itgr->hf);
            update_momenta(itgr->mnls_per_ts[0], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
            update_gauge((1-2.*(omf4_theta+omf4_rho))*eps, &itgr->hf);
            update_momenta(itgr->mnls_per_ts[0], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
            update_gauge(omf4_theta*eps, &itgr->hf);
            update_momenta(itgr->mnls_per_ts[0], omf4_lamb*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
            update_gauge(omf4_rho*eps, &itgr->hf);
            update_momenta(itgr->mnls_per_ts[0], 2*omf4_vartheta*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
        }
        update_gauge(omf4_rho*eps, &itgr->hf);
        update_momenta(itgr->mnls_per_ts[0], omf4_lamb*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
        update_gauge(omf4_theta*eps, &itgr->hf);
        update_momenta(itgr->mnls_per_ts[0], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
        update_gauge((1-2.*(omf4_theta+omf4_rho))*eps, &itgr->hf);
        update_momenta(itgr->mnls_per_ts[0], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
        update_gauge(omf4_theta*eps, &itgr->hf);
        update_momenta(itgr->mnls_per_ts[0], omf4_lamb*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
        update_gauge(omf4_rho*eps, &itgr->hf);
        if(halfstep != 1) {
            update_momenta(itgr->mnls_per_ts[0], 2*omf4_vartheta*eps, itgr->no_mnls_per_ts[0], &itgr->hf);
        }
    }
    else {
        for(i = 1; i < itgr->n_int[S]; i++) {
            itgr->integrate[S-1](omf4_rho*eps, S-1, 0);
            update_momenta(itgr->mnls_per_ts[S], omf4_lamb*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
            itgr->integrate[S-1](omf4_theta*eps, S-1, 0);
            update_momenta(itgr->mnls_per_ts[S], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
            itgr->integrate[S-1]((1-2.*(omf4_theta+omf4_rho))*eps, S-1, 0);
            update_momenta(itgr->mnls_per_ts[S], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
            itgr->integrate[S-1](omf4_theta*eps, S-1, 0);
            update_momenta(itgr->mnls_per_ts[S], omf4_lamb*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
            itgr->integrate[S-1](omf4_rho*eps, S-1, 0);
            update_momenta(itgr->mnls_per_ts[S], 2*omf4_vartheta*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
        }
        itgr->integrate[S-1](omf4_rho*eps, S-1, 0);
        update_momenta(itgr->mnls_per_ts[S], omf4_lamb*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
        itgr->integrate[S-1](omf4_theta*eps, S-1, 0);
        update_momenta(itgr->mnls_per_ts[S], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
        itgr->integrate[S-1]((1-2.*(omf4_theta+omf4_rho))*eps, S-1, 0);
        update_momenta(itgr->mnls_per_ts[S], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
        itgr->integrate[S-1](omf4_theta*eps, S-1, 0);
        update_momenta(itgr->mnls_per_ts[S], omf4_lamb*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
        if(S == itgr->no_timescales-1) {
            itgr->integrate[S-1](omf4_rho*eps, S-1, 1);
        }
        else itgr->integrate[S-1](omf4_rho*eps, S-1, halfstep);
        if(halfstep != 1 && S != itgr->no_timescales-1) {
            update_momenta(itgr->mnls_per_ts[S], 2*omf4_vartheta*eps, itgr->no_mnls_per_ts[S], &itgr->hf);
        }
    }

    if(S == itgr->no_timescales-1) {
        dohalfstep(tau, S);
    }
    return;
}