/* leap frog */ void leapfrog(const int nsteps, const double dtau) { int l; /* first phase: \Delta\Tau / 2 step for p */ update_momenta(0.5*dtau); /* second phase: iterate with steps of \Delta\Tau */ for(l = 0; l < nsteps-1; l++) { update_gauge(dtau); update_momenta(dtau); } /* a last one for the fields (because N steps for fields, */ /* and N-1 steps for impulses) */ update_gauge(dtau); /* last phase: \Delta\Tau / 2 step for p */ update_momenta(dtau*0.5); }
void dohalfstep(const double tau, const int S) { integrator * itgr = &Integrator; double eps = tau/((double)itgr->n_int[S]); for(int i = S; i > 0; i--) { if(itgr->type[i] == LEAPFROG) { update_momenta(itgr->mnls_per_ts[i], 0.5*eps, itgr->no_mnls_per_ts[i], &itgr->hf); eps /= ((double)itgr->n_int[i-1]); } else if(itgr->type[i] == MN2) { update_momenta(itgr->mnls_per_ts[i], itgr->lambda[i]*eps, itgr->no_mnls_per_ts[i], &itgr->hf); eps /= ((double)itgr->n_int[i-1])*2; } else if(itgr->type[i] == OMF4) { update_momenta(itgr->mnls_per_ts[i], omf4_vartheta*eps, itgr->no_mnls_per_ts[i], &itgr->hf); eps /= ((double)itgr->n_int[i-1])/omf4_rho; } } if(itgr->type[0] == LEAPFROG) { update_momenta(itgr->mnls_per_ts[0], 0.5*eps, itgr->no_mnls_per_ts[0], &itgr->hf); } else if(itgr->type[0] == MN2) { update_momenta(itgr->mnls_per_ts[0], itgr->lambda[0]*eps, itgr->no_mnls_per_ts[0], &itgr->hf); } else if(itgr->type[0] == OMF4) { update_momenta(itgr->mnls_per_ts[0], omf4_vartheta*eps, itgr->no_mnls_per_ts[0], &itgr->hf); } return; }
void integrate_2mnp(const double tau, const int S, const int halfstep) { int i; integrator * itgr = &Integrator; double eps = tau/((double)itgr->n_int[S]); double oneminus2lambda = (1.-2.*itgr->lambda[S]); if(S == 0) { update_gauge(itgr->lambda[0]*eps, &itgr->hf); for(i = 1; i < itgr->n_int[0]; i++) { update_momenta(itgr->mnls_per_ts[0], 0.5*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge(oneminus2lambda*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], 0.5*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge(2*itgr->lambda[0]*eps, &itgr->hf); } update_momenta(itgr->mnls_per_ts[0], 0.5*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge(oneminus2lambda*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], 0.5*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge(itgr->lambda[0]*eps, &itgr->hf); } else { for(i = 0; i < itgr->n_int[S]; i++) { integrate_2mnp(itgr->lambda[S]*eps, S-1, halfstep); update_momenta(itgr->mnls_per_ts[S], 0.5*eps, itgr->no_mnls_per_ts[S], &itgr->hf); integrate_2mnp(oneminus2lambda*eps, S-1, halfstep); update_momenta(itgr->mnls_per_ts[S], 0.5*eps, itgr->no_mnls_per_ts[S], &itgr->hf); integrate_2mnp(itgr->lambda[S]*eps, S-1, halfstep); } } }
void integrate_leap_frog(const double tau, const int S, const int halfstep) { int i; integrator * itgr = &Integrator; double eps, eps0; if(S == itgr->no_timescales-1) { dohalfstep(tau, S); } eps = tau/((double)itgr->n_int[S]); if(S == 0) { eps0 = tau/((double)itgr->n_int[0]); for(i = 1; i < itgr->n_int[0]; i++) { update_gauge(eps0, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], eps0, itgr->no_mnls_per_ts[0], &itgr->hf); } update_gauge(eps0, &itgr->hf); if(halfstep != 1) { update_momenta(itgr->mnls_per_ts[0], eps0, itgr->no_mnls_per_ts[0], &itgr->hf); } } else { for(i = 1; i < itgr->n_int[S]; i++) { itgr->integrate[S-1](eps, S-1, 0); update_momenta(itgr->mnls_per_ts[S], eps, itgr->no_mnls_per_ts[S], &itgr->hf); } if(S == itgr->no_timescales-1) { itgr->integrate[S-1](eps, S-1, 1); } else itgr->integrate[S-1](eps, S-1, halfstep); if(halfstep != 1 && S != itgr->no_timescales-1) { update_momenta(itgr->mnls_per_ts[S], eps, itgr->no_mnls_per_ts[S], &itgr->hf); } } if(S == itgr->no_timescales-1) { dohalfstep(tau, S); } }
void integrate_2mn(const double tau, const int S, const int halfstep) { int i,j=0; integrator * itgr = &Integrator; double eps, oneminus2lambda = (1.-2.*itgr->lambda[S]); if(S == itgr->no_timescales-1) { dohalfstep(tau, S); } eps = tau/((double)itgr->n_int[S]); if(S == 0) { for(j = 1; j < itgr->n_int[0]; j++) { update_gauge(0.5*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], oneminus2lambda*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge(0.5*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], 2.*itgr->lambda[0]*eps, itgr->no_mnls_per_ts[0], &itgr->hf); } update_gauge(0.5*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], oneminus2lambda*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge(0.5*eps, &itgr->hf); if(halfstep != 1) { update_momenta(itgr->mnls_per_ts[0], 2*itgr->lambda[0]*eps, itgr->no_mnls_per_ts[0], &itgr->hf); } } else { for(i = 1; i < itgr->n_int[S]; i++) { itgr->integrate[S-1](eps/2., S-1, 0); update_momenta(itgr->mnls_per_ts[S], oneminus2lambda*eps, itgr->no_mnls_per_ts[S], &itgr->hf); itgr->integrate[S-1](eps/2., S-1, 0); update_momenta(itgr->mnls_per_ts[S], 2*itgr->lambda[S]*eps, itgr->no_mnls_per_ts[S], &itgr->hf); } itgr->integrate[S-1](eps/2., S-1, 0); update_momenta(itgr->mnls_per_ts[S], oneminus2lambda*eps, itgr->no_mnls_per_ts[S], &itgr->hf); if(S == itgr->no_timescales-1) { itgr->integrate[S-1](eps/2., S-1, 1); } else itgr->integrate[S-1](eps/2., S-1, halfstep); if(halfstep != 1 && S != itgr->no_timescales-1) { update_momenta(itgr->mnls_per_ts[S], 2*itgr->lambda[S]*eps, itgr->no_mnls_per_ts[S], &itgr->hf); } } if(S == itgr->no_timescales-1) { dohalfstep(tau, S); } }
int main(void) { printf("%lf %lf %lf %lf %lf %lf %lf %lf\n",momenta.d1,momenta.d2,momenta.d3,momenta.d4,momenta.d5,momenta.d6,momenta.d7,momenta.d8); #pragma omp parallel { if( omp_get_thread_num() == 0 ) { printf("%d OpenMP threads!\n",omp_get_num_threads()); } double in = 33; double out = 0; double res = 0; complex double a1 = 1.0 + I*2.0; // {1.0,2.0}; complex double a2 = 3.0 + I*4.0; // {3.0,4.0}; complex double a3 = 5.0 - I*6.0; // {5.0,6.0}; complex double a4 = 7.0 + I*8.0; //{7.0;8.0}; complex double a5 = 9.0 + I*10.0;// ; {9.0,10.0}; #pragma omp for for(int i = 0; i < MAX; ++i) { waste_cycles(&in); update_momenta(momenta); waste_cycles(&in); update_momenta(momenta); waste_cycles(&in); update_momenta(momenta); waste_cycles(&in); update_momenta(momenta); waste_cycles(&in); update_momenta(momenta); waste_cycles(&in); update_momenta(momenta); waste_cycles(&in); update_momenta(momenta); } printf("thread %d %lf \n",omp_get_thread_num(),in); } /* OpenMP closing brace */ printf("%lf %lf %lf %lf %lf %lf %lf %lf\n",momenta.d1,momenta.d2,momenta.d3,momenta.d4,momenta.d5,momenta.d6,momenta.d7,momenta.d8); return(0); }
int main(int argc, char *argv[]) { /* Clear Screen */ system("clear"); printf("\nBEMRI simulator!\n"); /* Variable Declarations */ BEMRI *b; flags f; iParams iPars; double dt1, dt, tmax, e[3], t, t0, r_min, t_RK4 = 0.0,tau, t_node, sim_time, E0, PbN, dt_test; double r_bemri, r_current, r_node, r_1, r_2, theta_N, ecc_N, theta0h, r_tid, r0; double E1h,E2h,l1h,l2h,e1h,e2h,dAdt, dt_temp, dt_min, fmax[2], alpha = 1e-5, chi, node_phase, E_last, a1h, a2h; double Ph_init,eh_init, Q[3][3], Qtt[3][3], T_pm[4]; double R[3], m[3], timer=1e300; double test_rad, test_angle, del_ia, del_aop, frac = 1.0, max_hard_count, eh0; int xed, a = 0, N=1, single, minned, status, entered_node, passed_node, times_around, max_orbits, num_params; int m1NegY=0, hard_count, agent, amax, astep, nvar, zeroed, angle_counter = 0, inner_runs, ipsval = 1, past; double t_broken, Pb0, Esys0,Lsys0, eb0, Hrat = 3, ri, r_cm, percentage_as_double, gam_now, eb_prev, ab_prev, k1, k2; double e1h_prev, e2h_prev, timer0; long seed, iseed; double y[22],yscal[22],dydx[22], dscale, mconv, tconv; //these are the vectors used for BS integration, 22 = 7*N+1 double lan0, ia0, aop0, ab0; char fname[80]; /* for BS ODE integrator setup */ for(int i=0;i<22;i++) yscal[i] = 1; /* Vector from observer to system center of mass */ R[0] = 0; R[1] = 0; R[2] = 2.45027686e20; //distance to Sgr A* /* timing variables */ time_t start; time_t stop; struct timeval t1; /* command line argument check */ //possible flags: // -c run to completion // -s [value] use value for seed // -t [value] run to tmax = value*Ph // -nd do not stop sim when BEMRI is disrupted // -N [value] run simulation N times, do not output position or quadrupole data // -so suppress any screen output // -PbN [value] set Pb = value*tnode // -th0 [value] set theta0 = value // -RK4 use RK4 integrator instead of BS2 // -fname [string] use string as file name instead of BEMRI.dat // -fpars [string] use string as filename to find input parameters // -beta [value] use beta = value // -gam [value] use gamma = value // -ips [value] run [value] runs per set of parameters, sampling initial phases // -eh [value] set BH orbit eccentricity // -Htest Test Heggie values // -Hrat [value] Use [value] for rp/a ratio in Heggie test // -geo Use geometricized units -- G = c = 1 // -ang Randomize binary orientation angles b = (BEMRI *) malloc( sizeof(BEMRI) ); if(args(argc,argv,&iPars,&f,&N,&frac,&seed,&PbN,fname,&eh,&ipsval,&Hrat)) return 1; /* initialize BEMRI parameters */ init_params(b,f); /* file pointer declarations */ FILE *results_fp, *Q_fp, *pos_fp; if(f.fnameflag == 0) results_fp = fopen("BEMRI.dat","w"); else results_fp = fopen(fname,"w"); if(f.cflag != 1 && f.Nflag != 1) { Q_fp = fopen("QP.dat","w"); pos_fp = fopen("pos.dat","w"); } /* Chain declaration */ C = (CHAIN *) malloc( sizeof(CHAIN) ); /* set seed if not given on command line */ if(f.sflag == 0) {gettimeofday(&t1,NULL); seed = t1.tv_usec;} iseed = seed; //save initial seed to output /* master loop, changes parameters */ /* this is the total number of unique (gamma,inc,lan) values that will be run */ for(int ii=0;ii<N;ii++) { /* randomly assign angles if -ang flag is used */ if(f.angflag) // angle variation { iPars.lanA = 2*PI*ran2(&seed); iPars.incA = acos(2*ran2(&seed)-1); } /* if not using random angles OR a parameter file, then assign angles of zero */ else if(!f.fParflag) iPars.lanA = iPars.incA = 0.0; /* assign random gamma value unless certain flags are used */ if(!f.fParflag && !f.betaflag && !f.gamflag){ iPars.gamma = (0.35 + 4.65*ran2(&seed)); // randomly assigning beta from being uniform over gamma iPars.beta = 1.0/iPars.gamma; } /* starting the loop over all theta0 initial binary phase values */ for(angle_counter = 0; angle_counter<ipsval; angle_counter++) { /* set G and c, which will be reset if -geo is used */ c = 299792458e0; G = 6.673e-11; /* reset timer and m1NegY */ timer = 1e300; m1NegY = 0; /* parameter setup */ init_params(b,f); // initialize BEMRI parameters eb = 0.0; aop = 0.0; // aop is redundant for circular orbits, always set to 0 lan = iPars.lanA; // set dynamic value lan ia = iPars.incA; // set dynamic value ia if(!f.th0flag && !f.fParflag && f.ipsflag) // assign appropriate value of th0 if using -ips flag iPars.th0 = 2*PI * (float)angle_counter/ipsval; //only set theta0 if th0flag has NOT been set /* Setup for testing results from Heggie and Rasio paper */ if(f.Htflag == 1) { G = c = 1; ia = 0; lan = 0; aop = 0; m1 = m2 = 1; m3 = 1; ab = 1; eb = 0.0; eh = 1.0; rph = Hrat*ab; r0 = 100*rph; theta0h = -acos(2*rph/r0 - 1); Pb = 2*PI*sqrt(ab*ab*ab/(G*(m1+m2))); } /* setup for elliptical BEMRI */ else if(eh < 1) theta0h = PI; /* Setup for parabolic orbit evolution */ else if(eh >= 1) { if(f.geoflag) // if using geometricized units { mconv = G/(c*c); // conversion factor for masses tconv = c; G = c = 1.0; // set constants to 1 //ab /= dscale; m1 *= mconv; // convert mass values to meters m2 *= mconv; m3 *= mconv; Pb *= tconv; // recalculate the binary period with new ab and masses } r_tid = ab*pow(m3/(m1+m2),1.0/3.0); // calculate tidal radius r_tid rph = r_tid/iPars.beta; // calculate pericenter distance for BEMRI orbit r0 = 200.0*rph; // calculate r0 = initial separation theta0h = -acos(2.0*rph/r0 - 1.0); // calculate corresponding true anomaly if(isnan(theta0h) || fabs(theta0h) < 2.0) // see if the anomaly was in acceptable range theta0h = -2.0; // if not, just set to -2.0 radians //theta0h = -3.0; } /* recalculate BEMRI parameters using new values */ recalc_params(b); /* initial conditions and node passage time */ lh = sqrt(rph*(1+eh)*G*(m3*m3*m4*m4)/(m3+m4)); //initial angular momentum of SMBH orbit advance_orbit(&(b->binary_h),lh,theta0h); //begin cm-SMBH orbit at theta0h /* timestep tau and max simulation time tmax */ if(f.Htflag == 1) { tau = Pb/pow(2,7); tmax = 1e100; } else if(eh<1) { tau = Pb/pow(2,7); tmax = 1*Pb; } else { tau = Pb/pow(2,7); tmax = 1e100; } recalc_params(b); // update little binary positions and velocities for orbit around hole lb = sqrt(rpb*(1+eb)*G*(m1*m1*m2*m2)/(m1+m2)); //initial angular momentum of BEMRI advance_orbit(&(b->binary_b),lb,iPars.th0); //begin BEMRI at theta0 BEMRI_CM_update(b); load_vectors(rr,vv,m,b); //load initial values into working vectors dt = tau; dt1 = dt; /* Chain setup and testing */ setup_chain(C,m,3); nvar = 6*(C->N-1); /* BEMRI lifetime check */ if ( peters_lifetime(eb,ab,m1,m2) < 100*Ph ) //BEMRI lifetime too short, abort current run {status = 3;times_around = -1;} /* other setup */ eh0 = eh; t0 = 0; zeroed = 0; t_broken = 0.0; xed = 0; minned = 0; t_RK4 = 0.0; status = 0; entered_node = 0; passed_node = 0; times_around = 0; max_orbits = 1; a = -1; if(eh < 1) amax = (int)(abs((tmax-t0)/dt1)); else amax = 1e6; astep = 1;//(int)ceil((amax/200000.0)); //output management start = time(NULL); max_hard_count = 20; hard_count = 0; Pb0 = Pb; ri = sqrt( pow(X4-X3,2) + pow(Y4-Y3,2) + pow(Z4-Z3,2) ); // initial distance from binary cm to SMBH k1 = G*m3*m1; k2 = G*m3*m2; /* initial energy setup */ calc_energies(b,&E1h,&E2h); E0 = Eb; eb0 = eb; ab0 = ab; E_last = E0; //initialize E_last calc_total_energy(b); // compute total system energy calc_total_angular_momentum(b); // compute total system ang. mom. Esys0 = b->energy; Lsys0 = b->L; /* print out some diagnostic info */ if(!f.Nflag) { printf("\nbeta = %.3f",iPars.beta); printf("\ngamma = %.3f",iPars.gamma); printf("\nt_max = %.3e",tmax); printf("\ntheta0h = %.3e",theta0h); printf("\ntheta0 = %.10e",iPars.th0); printf("\nab = %.4e",ab); printf("\nPb = %.4e",Pb); printf("\nr_cm0 = %.4e",ri); printf("\nG = %.3e\nc = %.3e",G,c); printf("\nia = %.3f\tlan = %.3f\n",ia*180/PI,lan*180/PI); if(angle_counter==0) anykey(); } /* main simulation loop */ while( f.cflag*times_around < 100 && times_around >= 0 && t_RK4 <= tmax ) //something should happen before this, but if not... { /* this while loop will run under one of two conditions: if the -c flag is used, then completion = 1 and tmax = HUGE, so it will run until times_around < 100. If the -t or no flag is used, then completion = 0 and the sim will run until t_RK4 = tmax. */ gam_now = ah*(1-eh)/(ab*pow(m3/(m1+m2),1.0/3.0)); if( !f.Nflag ) { printf("t_RK4 = %.3e\r",t_RK4); //printf("Y1 = %.3e\r",Y1); fflush(stdout); } /* upkeep */ a += 1; E_last = Eb; //store old BEMRI energy eb_prev = eb; e1h_prev = e1h; e2h_prev = e2h; ab_prev = ab; /* actual integration call */ if(!f.bs2flag) { pack_y_vector_C(C,y); leap_derivs2(t_RK4,y,dydx); for(int i=0;i<nvar;i++) yscal[i]=FMAX(fabs(y[i])+fabs(dydx[i]*dt)+TINY,1); //bs_const_step(y,nvar,&t_RK4,dt1,&dt,1e-12,1e-6,yscal,1,leap_derivs2); bsstep(y,nvar,&t_RK4,&dt1,1e-12,1e-9,yscal,1,leap_derivs2); unpack_y_vector_C(C,y); if(a%10 == 0) check_chain(C); update_momenta(C); update_positions(C); } else{ //t_RK4 += N_body_main(rr,vv,m,dt1,3,&dt,1e-8,&minned); //evolves orbit from time t to t+dt1 with initial step size dt pack_y_vector(rr,vv,m,y); RK4A_const_step(y,22,&t_RK4,dt1,&dt,1e-9,1e-6,Nbody_derivs1); unpack_y_vector(rr,vv,m,y); } update_binaries(b,rr,vv); //copies values from v and r into the structures CM_values(b); //calculate values for the center of mass if(dt > dt1) //keeps integration step size at maximum of dt1 dt = dt1; timer -= dt1; // update timer value /* calculate desired values */ calc_angles(&(b->binary_b)); // calculate current orbital angles past = true_anomaly(&(b->binary_h)); // calculate current true anomaly of BH orbit calc_energies(b,&E1h,&E2h); // compute current binding energies calc_angular_momenta(b,&l1h,&l2h); // compute current pairwise angular momenta calc_ecc(b,&e1h,&e2h,E1h,E2h,l1h,l2h); // compute current pairwise eccentricities calc_total_energy(b); // compute total system energy calc_total_angular_momentum(b); // compute total system ang. mom. r_bemri = sqrt( pow(X1-X2,2) + pow(Y1-Y2,2) + pow(Z1-Z2,2)); //BEMRI separation r_1 = sqrt( pow(X1-X3,2) + pow(Y1-Y3,2) + pow(Z1-Z3,2) ); // m1-SMBH separation r_2 = sqrt( pow(X2-X3,2) + pow(Y2-Y3,2) + pow(Z2-Z3,2) ); // m2-SMBH separation r_current = min(r_1,r_2); //current distance from SMBH to closest BEMRI component r_cm = sqrt( pow(X4-X3,2) + pow(Y4-Y3,2) + pow(Z4-Z3,2) ); ab = E_to_a(Eb,m1,m2); //update semi-major axis of the BEMRI ah = E_to_a(Eh,m3,m1+m2); //update semi-major axis of the BH orbit Pb = 2*PI*sqrt(pow(ab,3)/(G*(m1+m2))); //current orbital periods Ph = 2*PI*sqrt(pow(ah,3)/(G*(m1+m2+m3))); b->binary_b.rp = ab*(1-eb); // compute new binary periapse b->binary_h.rp = ah*(1-eh); // compute new BEMRI periapse point_mass_QP(m,rr,3,R,Q,Qtt); // compute GW output if(eh<1) r_node = ah*(1-eh*eh); //SMBH-BEMRI separation when BEMRI is at the node /* stopping conditions */ if(eh0 < 1 || f.ipsflag==0) // for elliptical BEMRIs or single runs { if( passed_node && theta_h >= PI ) //if BEMRI has passed the node AND passed apoapse { passed_node = 0; //reset passed_node times_around += 1; //increment times_around if( (Eb/E_last) > 1 ) //compare current Eb to last orbit's initial Eb hard_count++; //the BEMRI has hardened, increment hard_count else hard_count = 0; //the BEMRI has softened, reset hard_count if( hard_count == max_hard_count) { status = 2; break; } E_last = Eb; } if(r_current < r_node && entered_node == 0) //updates values for overall while loop condition, don't want to update while BEMRI is in the node. { entered_node = 1; //then the node has been entered theta_N = theta_b; //save the phase when BEMRI entered the node ecc_N = eb; } if(r_current > r_node && entered_node == 1) //if BEMRI has just left the node { passed_node = 1; //set passed_node entered_node = 0; //reset entered_node } } /* if m1 has gone into negative y territory, start the clock */ if(Y1<=0 && !m1NegY){ m1NegY = 1; timer0 = t_RK4; timer = t_RK4; printf("\nTimer started!\n"); } /* if timer has gone off, then break */ if(timer<=0) { // printf("TIMER! eb=%.3e | e1h = %.3e | e2h = %.2e\n",eb,e1h,e2h); if(fabs(eb_prev-eb)/eb <1e-9 && k1/r_1 > Eb && k2/r_2 > Eb) { status = 0; printf("No change condition met\n"); break; } else if(fabs(e1h_prev-e1h)/e1h < 1e-9 || fabs(e2h_prev-e2h)/e2h < 1e-9) { status = 0; printf("No change condition met\n"); break; } } if(f.Htflag == 1 && r_cm > ri ) // if { status = 0; break; } if(r_bemri < 2*R_NS && G < 1) { status = -1; break; } //Energy conservation check if( fabs(Esys0 - b->energy) / fabs(Esys0) > 1e-6 ) { status = -3; printf("\nenergy violation\nEsys0 = %.10e\nEsys(t) = %.10e\n",Esys0,b->energy); break; } // angular momentum conservation check if( fabs(Lsys0 - b->L) / fabs(Lsys0) > 1e-6 ) { status = -4; printf("\nAng. mom violation\nLsys0 = %.10e\nLsys(t) = %.10e\n",Lsys0,b->L); break; } /* print progress */ if(f.cflag == 1 && f.soflag==0) { printf("Current Theta: %.3f\tOrbits Completed: %d\r",theta_h,times_around); fflush(stdout); } else if(f.soflag==0 && eh0 < 1) { if(print_percentage(a,ii,t_RK4,tmax,ipsval)) { xed = 1; status = -2; fflush(stdout); break; } } /* Full Data Output */ if(a%astep == 0 && f.cflag != 1 && f.Nflag != 1) { output_time(t_RK4+dt1,pos_fp); output_positions(rr,b,pos_fp); fprintf(pos_fp,"%.10e,%.10e,%.10e,%.10e,%.10e,",Eb,Eh,b->energy,b->L,eb); fprintf(pos_fp,"\n"); output_QP(Qtt, Q_fp); } } // END OF CURRENT SINGLE RUN /* initial clean up */ stop = time(NULL); sim_time = difftime(stop,start)/60.0; a1h = E_to_a(E1h,m1,m3); a2h = E_to_a(E2h,m2,m3); /* print percentage if eh0 >= 1 */ if(f.soflag==0 && eh0 >= 1) { print_percentage(0,ii,angle_counter,ipsval,N); printf("\nde = %.10e\n",eb - eb0); } /* assign proper status if came out with a zero */ if(status==0) { if(Eb>0) // binary disrupted status=0; else if(Eh < 0) // binary survived, bound to SMBH status=1; else status=2; } /* final state output */ if(Eb < 0) { e1h = e2h = -1; } fprintf(results_fp,"%d,%ld,",status,iseed); fprintf(results_fp,"%.4e,%.4e",t_RK4,timer0); fprintf(results_fp,"%.10e,%.10e,%.10e,%.10e,%.10e,",eb0,eb,eh,e1h,e2h); fprintf(results_fp,"%.10e,%.10e,%.10e,%.10e,%.10e,",ab0,ab,ah,a1h,a2h); fprintf(results_fp,"%.10e,%.10e,%.10e,%.10e,%.10e,",Eh,E1h,E2h,E0,Esys0); fprintf(results_fp,"%.10e,%.10e,%.10e,%.10e,",lh,l1h,l2h,Lsys0); fprintf(results_fp,"%.10e,%.10e,%.10e,%.10e",iPars.incA,iPars.lanA,iPars.th0,iPars.gamma); fprintf(results_fp,"\n"); /* status codes: */ //-4: ang. mom. conservation violation //-3: energy conservation violation //-2: run manually canceled //-1: simulation halted due to BEMRI proximity //0 : good simulation, BEMRI disrupted //1 : good simulation, BEMRI survived but bound to SMBH //2 : good simulation, BEMRI survived and remained unbound //3 : lifetime for given parameters was too short } // END OF CURRENT ANGLE_COUNTER LOOP /* 100% print */ if(xed == 0 && f.cflag == 0 && f.soflag==0) { fflush(stdout); printf("Running %d of %d... (100%%)\n\r",ii+1,N); printf("\n\r"); } } // END OF OVERALL N LOOP /* file clean up */ fclose(results_fp); if(f.cflag != 1 && f.Nflag != 1) { fclose(pos_fp); fclose(Q_fp); } printf("\nfcount = %d\n",fcount); free(b); free(C); return 0; }
ForceArg Fp4::EvolveMomFforce(Matrix *mom, Vector *frm, Float mass, Float dt){ char *fname = "EvolveMomFforce(M*,V*,F,F,F)"; ERR.NotImplemented(cname,fname); ForceArg Fdt; #if 0 VRB.Func(cname,fname); #ifdef PROFILE Float dtime; ParTrans::PTflops=0; ForceFlops=0; #endif size_t size; // int nflops=0; static int vax_len = 0; if (vax_len == 0) vax_len = GJP.VolNodeSites()*VECT_LEN/VAXPY_UNROLL; size = GJP.VolNodeSites()/2*FsiteSize()*sizeof(Float); Vector *X = (Vector *)smalloc(2*size); // printf("X=%p\n",X); Vector *X_e = X; // even sites Vector *X_o = X+GJP.VolNodeSites()/2; // odd sites // The argument frm should have the CG solution. // The FstagTypes protected pointer f_tmp should contain Dslash frm moveMem(X_e, frm, size); #ifdef DEBUGGING f_tmp = frm+GJP.VolNodeSites()/2; // debugging only #endif moveMem(X_o, f_tmp, size); Fconvert(X, CANONICAL, STAG); Convert(STAG); // Puts staggered phases into gauge field. int N; // N can be 1, 2 or 4. N = 4; if (GJP.VolNodeSites()>256) N = 2; else if (GJP.VolNodeSites()>512) N = 1; VRB.Flow(cname,fname,"N=%d\n",N); enum{plus=0, minus=1, n_sign=2}; // Array in which to accumulate the force term: // this must be initialised to zero #if 0 Matrix **force = (Matrix**)amalloc(sizeof(Matrix), 2, 4, GJP.VolNodeSites()); if(!force) ERR.Pointer(cname, fname, "force"); #else size = GJP.VolNodeSites()*sizeof(Matrix); Matrix *force[4]; for(int i = 0;i<4;i++) force[i] = (Matrix *)v_alloc("force[i]",size); #endif for(int i=0; i<4; i++) for(int s=0; s<GJP.VolNodeSites(); s++) force[i][s].ZeroMatrix(); ParTransAsqtad parallel_transport(*this); // Vector arrays for which we must allocate memory #if 0 Vector ***Pnu = (Vector***)amalloc(sizeof(Vector), 3, n_sign, N, GJP.VolNodeSites()); if(!Pnu) ERR.Pointer(cname, fname, "Pnu"); Vector ****P3 = (Vector****)amalloc(sizeof(Vector), 4, n_sign, n_sign, N, GJP.VolNodeSites()); if(!P3) ERR.Pointer(cname, fname, "P3"); Vector ****Prhonu = (Vector****)amalloc(sizeof(Vector), 4, n_sign, n_sign, N, GJP.VolNodeSites()); if(!Prhonu) ERR.Pointer(cname, fname, "Prhonu"); Vector *****P5 = (Vector*****)amalloc(sizeof(Vector), 5, n_sign, n_sign, n_sign, N, GJP.VolNodeSites()); if(!P5) ERR.Pointer(cname, fname, "P5"); Vector ******P7 = (Vector******)amalloc(sizeof(Vector), 6, n_sign, n_sign, n_sign, n_sign, N, GJP.VolNodeSites()); if(!P7) ERR.Pointer(cname, fname, "P7"); Vector ******Psigma7 = (Vector******)amalloc(sizeof(Vector), 6, n_sign, n_sign, n_sign, n_sign, N, GJP.VolNodeSites()); if(!Psigma7) ERR.Pointer(cname, fname, "Psigma7"); // These vectors can be overlapped with previously allocated memory Vector **Pnununu = Prhonu[0][0]; Vector ***Pnunu = Psigma7[0][0][0];; Vector ****Pnu5 = P7[0][0]; Vector ****Pnu3 = P7[0][0]; Vector *****Prho5 = Psigma7[0]; Vector *****Psigmarhonu = Psigma7[0]; #else size = GJP.VolNodeSites()*sizeof(Vector); Vector *Pnu[n_sign][N]; Vector *P3[n_sign][n_sign][N]; Vector *Prhonu[n_sign][n_sign][N]; Vector *P5[n_sign][n_sign][n_sign][N]; Vector *P7[n_sign][n_sign][n_sign][n_sign][N]; Vector *Psigma7[n_sign][n_sign][n_sign][n_sign][N]; Vector *Pnununu[N]; Vector *Pnunu[n_sign][N]; Vector *Pnu5[n_sign][n_sign][N]; Vector *Pnu3[n_sign][n_sign][N]; Vector *Prho5[n_sign][n_sign][n_sign][N]; Vector *Psigmarhonu[n_sign][n_sign][n_sign][N]; //printf("Pnu=%p Psigmarhonu=%p\n",Pnu,Psigmarhonu); for(int w = 0;w<N;w++){ for(int i = 0;i<n_sign;i++){ Pnu[i][w]= (Vector *)v_alloc("Pnu",size); for(int j = 0;j<n_sign;j++){ P3[i][j][w]= (Vector *)v_alloc("P3",size); Prhonu[i][j][w]= (Vector *)v_alloc("Prhonu",size); for(int k = 0;k<n_sign;k++){ P5[i][j][k][w]= (Vector *)v_alloc("P5",size); for(int l = 0;l<n_sign;l++){ P7[i][j][k][l][w]= (Vector *)v_alloc("P7",size); Psigma7[i][j][k][l][w]= (Vector *)v_alloc("Psigma7",size); } Prho5[i][j][k][w] = Psigma7[0][i][j][k][w]; Psigmarhonu[i][j][k][w] = Psigma7[0][i][j][k][w]; } Pnu5[i][j][w]=P7[0][0][i][j][w]; Pnu3[i][j][w]=P7[0][0][i][j][w]; } Pnunu[i][w]=Psigma7[0][0][0][i][w]; } Pnununu[w]=Prhonu[0][0][w]; } #endif // input/output arrays for the parallel transport routines Vector *vin[n_sign*N], *vout[n_sign*N]; int dir[n_sign*N]; int mu[N], nu[N], rho[N], sigma[N]; // Sets of directions int w; // The direction index 0...N-1 int ms, ns, rs, ss; // Sign of direction bool done[4] = {false,false,false,false}; // Flags to tell us which // nu directions we have done. #ifdef PROFILE dtime = -dclock(); #endif for (int m=0; m<4; m+=N){ // Loop over mu for(w=0; w<N; w++) mu[w] = (m+w)%4; for (int n=m+1; n<m+4; n++){ // Loop over nu for(w=0; w<N; w++) nu[w] = (n+w)%4; // Pnu = U_nu X for(int i=0; i<N; i++){ vin[i] = vin[i+N] = X; dir[n_sign*i] = n_sign*nu[i]+plus; // nu_i dir[n_sign*i+1] = n_sign*nu[i]+minus; // -nu_i vout[n_sign*i] = Pnu[minus][i]; vout[n_sign*i+1] = Pnu[plus][i]; } parallel_transport.run(n_sign*N, vout, vin, dir); // P3 = U_mu Pnu // ms is the nu sign index, ms is the mu sign index, // w is the direction index for(int i=0; i<N; i++){ dir[n_sign*i] = n_sign*mu[i]+plus; // mu_i dir[n_sign*i+1] = n_sign*mu[i]+minus; // -mu_i } for(ns=0; ns<n_sign; ns++){ // ns is the sign of nu for(int i=0; i<N; i++){ vin[n_sign*i] = vin[n_sign*i+1] = Pnu[ns][i]; vout[n_sign*i] = P3[plus][ns][i]; vout[n_sign*i+1] = P3[minus][ns][i]; } parallel_transport.run(n_sign*N, vout, vin, dir); } for(w=0; w<N; w++) for(ns=0; ns<n_sign; ns++){ force_product_sum(P3[plus][ns][w], Pnu[ns][w], GJP.staple3_coeff(), force[mu[w]]); } for(int r=n+1; r<n+4; r++){ // Loop over rho bool nextr = false; for(w=0; w<N; w++){ rho[w] = (r+w)%4; if(rho[w]==mu[w]){ nextr = true; break; } } if(nextr) continue; for(w=0; w<N; w++){ // sigma for(int s=rho[w]+1; s<rho[w]+4; s++){ sigma[w] = s%4; if(sigma[w]!=mu[w] && sigma[w]!=nu[w]) break; } } // Prhonu = U_rho Pnu for(int i=0; i<N; i++){ dir[n_sign*i] = n_sign*rho[i]+plus; dir[n_sign*i+1] = n_sign*rho[i]+minus; } for(ns=0; ns<n_sign; ns++){ for(int i=0; i<N; i++){ vin[n_sign*i] = vin[n_sign*i+1] = Pnu[ns][i]; vout[n_sign*i] = Prhonu[ns][minus][i]; vout[n_sign*i+1] = Prhonu[ns][plus][i]; } parallel_transport.run(n_sign*N, vout, vin, dir); } // P5 = U_mu Prhonu for(int i=0; i<N; i++){ dir[n_sign*i] = n_sign*mu[i]+plus; dir[n_sign*i+1] = n_sign*mu[i]+minus; } for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) { for(int i=0; i<N; i++){ vin[n_sign*i] = vin[n_sign*i+1] = Prhonu[ns][rs][i]; vout[n_sign*i] = P5[plus][ns][rs][i]; vout[n_sign*i+1] = P5[minus][ns][rs][i]; } parallel_transport.run(n_sign*N, vout, vin, dir); } // F_mu += P5 Prhonu^dagger for(w=0; w<N; w++) for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) force_product_sum(P5[plus][ns][rs][w], Prhonu[ns][rs][w], GJP.staple5_coeff(), force[mu[w]]); // Psigmarhonu = U_sigma P_rhonu for(int i=0; i<N; i++){ dir[n_sign*i] = (n_sign*sigma[i]); dir[n_sign*i+1] = (n_sign*sigma[i]+1); } for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++){ for(int i=0; i<N; i++){ vin[n_sign*i] = vin[n_sign*i+1] = Prhonu[ns][rs][i]; vout[n_sign*i] = Psigmarhonu[ns][rs][minus][i]; vout[n_sign*i+1] = Psigmarhonu[ns][rs][plus][i]; } parallel_transport.run(n_sign*N, vout, vin, dir); } // P7 = U_mu P_sigmarhonu for(int i=0; i<N; i++){ dir[n_sign*i] = n_sign*mu[i]+plus; dir[n_sign*i+1] = n_sign*mu[i]+minus; } for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) for(ss=0; ss<n_sign; ss++){ for(int i=0; i<N; i++){ vin[n_sign*i] = vin[n_sign*i+1] = Psigmarhonu[ns][rs][ss][i]; vout[n_sign*i] = P7[plus][ns][rs][ss][i]; vout[n_sign*i+1] = P7[minus][ns][rs][ss][i]; } parallel_transport.run(n_sign*N, vout, vin, dir); } // F_mu -= P7 Psigmarhonu^\dagger for(w=0; w<N; w++) for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) for(ss=0; ss<n_sign; ss++) force_product_sum(P7[plus][ns][rs][ss][w], Psigmarhonu[ns][rs][ss][w], GJP.staple7_coeff(), force[mu[w]]); // F_sigma += P7 Psigmarhonu^\dagger // N.B. this is the same as one of the previous products. for(w=0; w<N; w++) for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) force_product_sum(P7[plus][ns][rs][minus][w], Psigmarhonu[ns][rs][minus][w], -GJP.staple7_coeff(), force[sigma[w]]); // F_sigma += Psigmarhonu P7^\dagger for(w=0; w<N; w++) for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) force_product_sum(Psigmarhonu[ns][rs][minus][w], P7[minus][ns][rs][minus][w], -GJP.staple7_coeff(), force[sigma[w]]); // Psigma7 = U_sigma P7 for(int i=0; i<N; i++){ dir[n_sign*i] = (n_sign*sigma[i]); dir[n_sign*i+1] = (n_sign*sigma[i]+1); } for(ms=0; ms<n_sign; ms++) for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++){ for(int i=0; i<N; i++){ vin[n_sign*i] = P7[ms][ns][rs][plus][i]; vin[n_sign*i+1] = P7[ms][ns][rs][minus][i]; vout[n_sign*i] = Psigma7[ms][ns][rs][plus][i]; vout[n_sign*i+1] = Psigma7[ms][ns][rs][minus][i]; } parallel_transport.run(n_sign*N, vout, vin, dir); } // F_sigma += Fsigma7 Frhonu^\dagger for(w=0; w<N; w++) for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) force_product_sum(Psigma7[plus][ns][rs][plus][w], Prhonu[ns][rs][w], -GJP.staple7_coeff(), force[sigma[w]]); // F_sigma += Frhonu Fsigma7^\dagger for(w=0; w<N; w++) for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) force_product_sum(Prhonu[ns][rs][w], Psigma7[minus][ns][rs][plus][w], -GJP.staple7_coeff(), force[sigma[w]]); // P5 += c_7/c_5 Psigma7 if(GJP.staple5_coeff()!=0.0){ Float c75 = -GJP.staple7_coeff()/GJP.staple5_coeff(); for(ms=0; ms<n_sign; ms++) for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) for(ss=0; ss<n_sign; ss++) for(w=0; w<N; w++) vaxpy3(P5[ms][ns][rs][w],&c75, Psigma7[ms][ns][rs][ss][w], P5[ms][ns][rs][w], vax_len); // P5[ms][ns][rs][w]->FTimesV1PlusV2(-GJP.staple7_coeff()/GJP.staple5_coeff(), Psigma7[ms][ns][rs][ss][w], P5[ms][ns][rs][w], GJP.VolNodeSites()*VECT_LEN); ForceFlops += 2*GJP.VolNodeSites()*VECT_LEN*N*n_sign*n_sign*n_sign*n_sign; } // F_rho -= P5 Prhonu^\dagger for(w=0; w<N; w++) for(ns=0; ns<n_sign; ns++) force_product_sum(P5[plus][ns][minus][w], Prhonu[ns][minus][w], -GJP.staple5_coeff(), force[rho[w]]); // F_rho -= Prhonu P5^\dagger for(w=0; w<N; w++) for(ns=0; ns<n_sign; ns++) force_product_sum(Prhonu[ns][minus][w], P5[minus][ns][minus][w], -GJP.staple5_coeff(), force[rho[w]]); // Prho5 = U_rho P5 for(int i=0; i<N; i++){ dir[n_sign*i] = n_sign*rho[i]+plus; dir[n_sign*i+1] = n_sign*rho[i]+minus; } for(ms=0; ms<n_sign; ms++) for(ns=0; ns<n_sign; ns++){ for(int i=0; i<N; i++){ vin[n_sign*i] = P5[ms][ns][plus][i]; vin[n_sign*i+1] = P5[ms][ns][minus][i]; vout[n_sign*i] = Prho5[ms][ns][plus][i]; vout[n_sign*i+1] = Prho5[ms][ns][minus][i]; } parallel_transport.run(n_sign*N, vout, vin, dir); } // F_rho -= Prho5 Pnu^\dagger for(w=0; w<N; w++) for(ns=0; ns<n_sign; ns++) force_product_sum(Prho5[plus][ns][plus][w], Pnu[ns][w], -GJP.staple5_coeff(), force[rho[w]]); // F_rho -= Pnu Prho5^\dagger for(w=0; w<N; w++) for(ns=0; ns<n_sign; ns++) force_product_sum(Pnu[ns][w], Prho5[minus][ns][plus][w], -GJP.staple5_coeff(), force[rho[w]]); // P3 += c_5/c_3 Prho5 if(GJP.staple3_coeff()!=0.0){ Float c53 = -GJP.staple5_coeff()/GJP.staple3_coeff(); for(ms=0; ms<n_sign; ms++) for(ns=0; ns<n_sign; ns++) for(rs=0; rs<n_sign; rs++) for(w=0; w<N; w++) vaxpy3(P3[ms][ns][w],&c53,Prho5[ms][ns][rs][w], P3[ms][ns][w], vax_len); // P3[ms][ns][w]->FTimesV1PlusV2(-GJP.staple5_coeff()/GJP.staple3_coeff(), Prho5[ms][ns][rs][w], P3[ms][ns][w], GJP.VolNodeSites()*VECT_LEN); ForceFlops += 2*GJP.VolNodeSites()*VECT_LEN*N*n_sign*n_sign*n_sign; } } // rho+sigma loop // Pnunu = U_nu Pnu for(int i=0; i<N; i++){ dir[n_sign*i] = n_sign*nu[i]+plus; dir[n_sign*i+1] = n_sign*nu[i]+minus; } for(int i=0; i<N; i++){ vin[n_sign*i] = Pnu[minus][i]; vin[n_sign*i+1] = Pnu[plus][i]; vout[n_sign*i] = Pnunu[minus][i]; vout[n_sign*i+1] = Pnunu[plus][i]; } parallel_transport.run(n_sign*N, vout, vin, dir); // P5 = U_mu Pnunu for(int i=0; i<N; i++){ dir[n_sign*i] = n_sign*mu[i]+plus; dir[n_sign*i+1] = n_sign*mu[i]+minus; } for(ns=0; ns<n_sign; ns++){ for(int i=0; i<N; i++){ vin[n_sign*i] = Pnunu[ns][i]; vin[n_sign*i+1] = Pnunu[ns][i]; vout[n_sign*i] = P5[plus][ns][0][i]; vout[n_sign*i+1] = P5[minus][ns][0][i]; } parallel_transport.run(n_sign*N, vout, vin, dir); } // F_mu += P5 Pnunu^\dagger for(w=0; w<N; w++) for(ns=0; ns<n_sign; ns++) force_product_sum(P5[plus][ns][0][w], Pnunu[ns][w], GJP.Lepage_coeff(), force[mu[w]]); // F_nu -= P5 Pnunu^\dagger // N.B. this is the same as one of the previous products for(w=0; w<N; w++) force_product_sum(P5[plus][minus][0][w], Pnunu[minus][w], -GJP.Lepage_coeff(), force[nu[w]]); // F_nu -= Pnunu P5^\dagger for(w=0; w<N; w++) force_product_sum(Pnunu[minus][w], P5[minus][minus][0][w], -GJP.Lepage_coeff(), force[nu[w]]); // Pnu5 = U_nu P5 for(int i=0; i<N; i++){ dir[n_sign*i] = n_sign*nu[i]+plus; dir[n_sign*i+1] = n_sign*nu[i]+minus; } for(ms=0; ms<n_sign; ms++){ for(int i=0; i<N; i++){ vin[n_sign*i] = P5[ms][plus][0][i]; vin[n_sign*i+1] = P5[ms][minus][0][i]; vout[n_sign*i] = Pnu5[ms][plus][i]; vout[n_sign*i+1] = Pnu5[ms][minus][i]; } parallel_transport.run(n_sign*N, vout, vin, dir); } // F_nu -= Pnu5 Pnu^\dagger for(w=0; w<N; w++) force_product_sum(Pnu5[plus][plus][w], Pnu[plus][w], -GJP.Lepage_coeff(), force[nu[w]]); // F_nu -= Pnu Pnu5^\dagger for(w=0; w<N; w++) force_product_sum(Pnu[plus][w], Pnu5[minus][plus][w], -GJP.Lepage_coeff(), force[nu[w]]); // P3 += c_L/c_3 Pnu5 if(GJP.staple3_coeff()!=0.0){ Float cl3 = -GJP.Lepage_coeff()/GJP.staple3_coeff(); for(ms=0; ms<n_sign; ms++) for(ns=0; ns<n_sign; ns++) for(w=0; w<N; w++) vaxpy3(P3[ms][ns][w],&cl3,Pnu5[ms][ns][w],P3[ms][ns][w], vax_len); // P3[ms][ns][w]->FTimesV1PlusV2(-GJP.Lepage_coeff()/GJP.staple3_coeff(), Pnu5[ms][ns][w], P3[ms][ns][w], GJP.VolNodeSites()*VECT_LEN); ForceFlops += 2*GJP.VolNodeSites()*VECT_LEN*N*n_sign*n_sign; } // F_nu += P3 Pnu^\dagger for(w=0; w<N; w++) force_product_sum(P3[plus][minus][w], Pnu[minus][w], -GJP.staple3_coeff(), force[nu[w]]); // F_nu += Pnu P3^\dagger for(w=0; w<N; w++) force_product_sum(Pnu[minus][w], P3[minus][minus][w], -GJP.staple3_coeff(), force[nu[w]]); // Pnu3 = U_nu P3 for(int i=0; i<N; i++) dir[i] = n_sign*nu[i]+plus; for(ms=0; ms<n_sign; ms++){ for(int i=0; i<N; i++){ vin[i] = P3[ms][plus][i]; vout[i] = Pnu3[ms][plus][i]; } parallel_transport.run(N, vout, vin, dir); } // F_nu += Pnu3 X^\dagger for(w=0; w<N; w++) force_product_sum(Pnu3[plus][plus][w], X, -GJP.staple3_coeff(), force[nu[w]]); // F_nu += X Pnu3^\dagger for(w=0; w<N; w++) force_product_sum(X, Pnu3[minus][plus][w], -GJP.staple3_coeff(), force[nu[w]]); // This stuff is to be done once only for each value of nu[w]. // Look for N nu's that haven't been done before. bool nextn = false; for(w=0; w<N; w++) if(done[nu[w]]){ nextn = true; break; } if(nextn) continue; for(w=0; w<N; w++) done[nu[w]] = true; // Got N new nu's, so do some stuff... // F_nu += Pnu X^\dagger for(w=0; w<N; w++) force_product_sum(Pnu[minus][w], X, GJP.KS_coeff(), force[nu[w]]); // F_nu += Pnunu Pnu^\dagger for(w=0; w<N; w++) force_product_sum(Pnunu[minus][w], Pnu[plus][w], -GJP.Naik_coeff(), force[nu[w]]); // F_nu += Pnu Pnunu^\dagger for(w=0; w<N; w++) force_product_sum(Pnu[minus][w], Pnunu[plus][w], GJP.Naik_coeff(), force[nu[w]]); // Pnununu = U_nu Pnunu for(int i=0; i<N; i++){ dir[i] = n_sign*nu[i]+plus; vin[i] = Pnunu[minus][i]; vout[i] = Pnununu[i]; } parallel_transport.run(N, vout, vin, dir); // F_nu += Pnununu X^\dagger for(w=0; w<N; w++) force_product_sum(Pnununu[w], X, GJP.Naik_coeff(), force[nu[w]]); } // nu loop } // mu loop // Now that we have computed the force, we can update the momenta // nflops +=ParTrans::PTflops + ForceFlops; #ifdef PROFILE dtime += dclock(); int nflops = ParTrans::PTflops + ForceFlops; printf("%s:%s:",cname,fname); print_flops(nflops,dtime); #endif Fdt = update_momenta(force, dt, mom); // Tidy up #if 0 sfree(Pnu); sfree(P3); sfree(Prhonu); sfree(P5); sfree(P7); sfree(Psigma7); #else for(int w = 0;w<N;w++){ for(int i = 0;i<n_sign;i++){ v_free(Pnu[i][w]); for(int j = 0;j<n_sign;j++){ v_free(P3[i][j][w]); v_free(Prhonu[i][j][w]); for(int k = 0;k<n_sign;k++){ v_free(P5[i][j][k][w]); for(int l = 0;l<n_sign;l++){ v_free(P7[i][j][k][l][w]); v_free(Psigma7[i][j][k][l][w]); } } } } } #endif for(int i = 0;i<4;i++) v_free(force[i]); sfree(X); Convert(CANONICAL); #endif return Fdt; }
void integrate_omf4(const double tau, const int S, const int halfstep) { int i,j=0; integrator * itgr = &Integrator; double eps; if(S == itgr->no_timescales-1) { dohalfstep(tau, S); } eps = tau/((double)itgr->n_int[S]); if(S == 0) { for(j = 1; j < itgr->n_int[0]; j++) { update_gauge(omf4_rho*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], omf4_lamb*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge(omf4_theta*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge((1-2.*(omf4_theta+omf4_rho))*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge(omf4_theta*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], omf4_lamb*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge(omf4_rho*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], 2*omf4_vartheta*eps, itgr->no_mnls_per_ts[0], &itgr->hf); } update_gauge(omf4_rho*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], omf4_lamb*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge(omf4_theta*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge((1-2.*(omf4_theta+omf4_rho))*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge(omf4_theta*eps, &itgr->hf); update_momenta(itgr->mnls_per_ts[0], omf4_lamb*eps, itgr->no_mnls_per_ts[0], &itgr->hf); update_gauge(omf4_rho*eps, &itgr->hf); if(halfstep != 1) { update_momenta(itgr->mnls_per_ts[0], 2*omf4_vartheta*eps, itgr->no_mnls_per_ts[0], &itgr->hf); } } else { for(i = 1; i < itgr->n_int[S]; i++) { itgr->integrate[S-1](omf4_rho*eps, S-1, 0); update_momenta(itgr->mnls_per_ts[S], omf4_lamb*eps, itgr->no_mnls_per_ts[S], &itgr->hf); itgr->integrate[S-1](omf4_theta*eps, S-1, 0); update_momenta(itgr->mnls_per_ts[S], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[S], &itgr->hf); itgr->integrate[S-1]((1-2.*(omf4_theta+omf4_rho))*eps, S-1, 0); update_momenta(itgr->mnls_per_ts[S], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[S], &itgr->hf); itgr->integrate[S-1](omf4_theta*eps, S-1, 0); update_momenta(itgr->mnls_per_ts[S], omf4_lamb*eps, itgr->no_mnls_per_ts[S], &itgr->hf); itgr->integrate[S-1](omf4_rho*eps, S-1, 0); update_momenta(itgr->mnls_per_ts[S], 2*omf4_vartheta*eps, itgr->no_mnls_per_ts[S], &itgr->hf); } itgr->integrate[S-1](omf4_rho*eps, S-1, 0); update_momenta(itgr->mnls_per_ts[S], omf4_lamb*eps, itgr->no_mnls_per_ts[S], &itgr->hf); itgr->integrate[S-1](omf4_theta*eps, S-1, 0); update_momenta(itgr->mnls_per_ts[S], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[S], &itgr->hf); itgr->integrate[S-1]((1-2.*(omf4_theta+omf4_rho))*eps, S-1, 0); update_momenta(itgr->mnls_per_ts[S], 0.5*(1-2.*(omf4_lamb+omf4_vartheta))*eps, itgr->no_mnls_per_ts[S], &itgr->hf); itgr->integrate[S-1](omf4_theta*eps, S-1, 0); update_momenta(itgr->mnls_per_ts[S], omf4_lamb*eps, itgr->no_mnls_per_ts[S], &itgr->hf); if(S == itgr->no_timescales-1) { itgr->integrate[S-1](omf4_rho*eps, S-1, 1); } else itgr->integrate[S-1](omf4_rho*eps, S-1, halfstep); if(halfstep != 1 && S != itgr->no_timescales-1) { update_momenta(itgr->mnls_per_ts[S], 2*omf4_vartheta*eps, itgr->no_mnls_per_ts[S], &itgr->hf); } } if(S == itgr->no_timescales-1) { dohalfstep(tau, S); } return; }