static void bench_record(const SkPicture& src, const double timerOverhead, const char* name, SkBBHFactory* bbhFactory) { // Rerecord once to warm up any caches. Otherwise the first sample can be very noisy. rerecord(src, bbhFactory); // Rerecord once to see how many times we should loop to make timer overhead insignificant. Timer timer; do { timer.start(timescale()); rerecord(src, bbhFactory); timer.end(); } while (timer.fWall < timerOverhead); // Loop just in case something bizarre happens. // We want (timer overhead / measurement) to be less than FLAGS_overheadGoal. // So in each sample, we'll loop enough times to have made that true for our first measurement. const int loops = (int)ceil(timerOverhead / timer.fWall / FLAGS_overheadGoal); SkAutoTMalloc<double> samples(FLAGS_samples); for (int i = 0; i < FLAGS_samples; i++) { timer.start(timescale()); for (int j = 0; j < loops; j++) { rerecord(src, bbhFactory); } timer.end(); samples[i] = timer.fWall / loops; } Stats stats(samples.get(), FLAGS_samples); if (FLAGS_verbose == 0) { printf("%g\t%s\n", stats.min, name); } else if (FLAGS_verbose == 1) { // Get a rough idea of how noisy the measurements were. const double noisePercent = 100 * sqrt(stats.var) / stats.mean; printf("%g\t%g\t%g\t±%.0f%%\t%s\n", stats.min, stats.mean, stats.max, noisePercent, name); } else if (FLAGS_verbose == 2) { printf("%s", name); for (int i = 0; i < FLAGS_samples; i++) { printf("\t%g", samples[i]); } printf("\n"); } }
bool velocityConstraint(rw::math::Q &dq, rw::models::Device::Ptr &device, double ×tep, double &tau){ if(device->getDOF() != dq.size()){ rw::common::Log::log().error() << "ERROR: Dimensions of the input of dq and device dof must agree in velocityConstraint.\n"; rw::common::Log::log().error() << " - dq: " << dq.size() << ", dof: " << device->getDOF() << "\n"; } if(!(timestep > 0)){ rw::common::Log::log().error() << "ERROR: Timestep must be greater than 0.\n"; rw::common::Log::log().error() << " - dt: " << timestep << "\n"; } bool ret = false; rw::math::Q vC = device->getVelocityLimits(); // rw::common::Log::log().info() << " dq:\n" << dq << "\n"; // rw::common::Log::log().info() << " dq_act:\n" << dq/timestep << "\n"; // rw::common::Log::log().info() << " dq_vec:\n" << vC << "\n"; // find how much to fast it's moving rw::math::Q timescale(vC.size()); double maxscale = 0; for(unsigned int i = 0; i < timescale.size(); i++){ timescale(i) = fabs((dq(i) / timestep) / vC(i)); if(timescale(i) > maxscale){ maxscale = timescale(i); } } // apply timescaling to make it go within the bounds if(maxscale > 1){ tau = timestep * maxscale; ret = true; } else{ tau = timestep; } return ret; }
int tool_main(int argc, char** argv) { SkCommandLineFlags::Parse(argc, argv); SkAutoGraphics autoGraphics; if (FLAGS_bbh.count() > 1) { SkDebugf("Multiple bbh arguments supplied.\n"); return 1; } SkAutoTDelete<SkBBHFactory> bbhFactory(parse_FLAGS_bbh()); // Each run will use this timer overhead estimate to guess how many times it should run. static const int kOverheadLoops = 10000000; WallTimer timer; double overheadEstimate = 0.0; const double scale = timescale(); for (int i = 0; i < kOverheadLoops; i++) { timer.start(); timer.end(); overheadEstimate += timer.fWall * scale; } overheadEstimate /= kOverheadLoops; SkOSFile::Iter it(FLAGS_skps[0], ".skp"); SkString filename; bool failed = false; while (it.next(&filename)) { if (SkCommandLineFlags::ShouldSkip(FLAGS_match, filename.c_str())) { continue; } const SkString path = SkOSPath::Join(FLAGS_skps[0], filename.c_str()); SkAutoTUnref<SkStream> stream(SkStream::NewFromFile(path.c_str())); if (!stream) { SkDebugf("Could not read %s.\n", path.c_str()); failed = true; continue; } SkAutoTUnref<SkPicture> src( SkPicture::CreateFromStream(stream, sk_tools::LazyDecodeBitmap)); if (!src) { SkDebugf("Could not read %s as an SkPicture.\n", path.c_str()); failed = true; continue; } bench_record(*src, overheadEstimate, filename.c_str(), bbhFactory.get()); } return failed ? 1 : 0; }
static void bench(SkPMColor* scratch, SkPicture& src, const char* name) { SkAutoTUnref<SkPicture> picture(rerecord_with_tilegrid(src)); SkAutoTDelete<EXPERIMENTAL::SkPlayback> record(rerecord_with_skr(src)); SkAutoTDelete<SkCanvas> canvas(SkCanvas::NewRasterDirectN32(src.width(), src.height(), scratch, src.width() * sizeof(SkPMColor))); canvas->clipRect(SkRect::MakeWH(SkIntToScalar(FLAGS_tile), SkIntToScalar(FLAGS_tile))); // Draw once to warm any caches. The first sample otherwise can be very noisy. draw(*record, *picture, canvas.get()); WallTimer timer; const double scale = timescale(); SkAutoTMalloc<double> samples(FLAGS_samples); for (int i = 0; i < FLAGS_samples; i++) { // We assume timer overhead (typically, ~30ns) is insignificant // compared to draw runtime (at least ~100us, usually several ms). timer.start(); draw(*record, *picture, canvas.get()); timer.end(); samples[i] = timer.fWall * scale; } Stats stats(samples.get(), FLAGS_samples); if (FLAGS_verbose == 0) { printf("%g\t%s\n", stats.min, name); } else if (FLAGS_verbose == 1) { // Get a rough idea of how noisy the measurements were. const double noisePercent = 100 * sqrt(stats.var) / stats.mean; printf("%g\t%g\t%g\t±%.0f%%\t%s\n", stats.min, stats.mean, stats.max, noisePercent, name); } else if (FLAGS_verbose == 2) { printf("%s", name); for (int i = 0; i < FLAGS_samples; i++) { printf("\t%g", samples[i]); } printf("\n"); } }
void timestep(void) { FTYPE dtother; int i,j,k,l ; FTYPE idt2[NUMDTCHECKS+1]; int ks[NUMDTCHECKS+1], js[NUMDTCHECKS+1], is[NUMDTCHECKS+1]; FTYPE dt2inv_max[NUMDTCHECKS+1] ; int didfail,didfail_full; int bigger; FTYPE finaln; static int firsttime=1; static FTYPE ttimestep=0,ttimescale=0; static FTYPE dtlast; char tempc1[50]; // for slow idtcreate change FTYPE bxa,bya,bza,dv,dvdx,delv; FTYPE l2_ten; FTYPE rho,u ; FTYPE odx1,odx2,odx3,ods,odl; FTYPE valphen,velfastm,valphen2,cs2 ; int reall,viscl,nonvl; FTYPE vel1,vel2,vel3; FTYPE ftemp; FTYPE dvx,dvy,dvz,ftemp1,ftemp2,ftemp3; static FTYPE dtrecv; int nstepmin,nstepmax; int gosub,gosup; FTYPE dt2invl[3]; static FTYPE dtotherlowest; static int laststep; if(visc_real==1){ nu_compute(); } if(RESMEM&&(res_real==1)){ if(rreal==2) current_compute(123); nu_res_compute(); } for(l=2;l<=NUMDTCHECKS;l++){ dt2inv_max[l]=0.; ks[l]=js[l]=is[l]=0; } dtlast = dt ; didfail=0; didfail_full=0; if(firsttime==1){ ttimestep=t-1.E-12; ttimescale=t-1.E-12; laststep=0; } LOOPTIMESTEP{ #if(BOUNDTYPE==3) if(bzmask[k][j][i]!=0) continue; #endif #if(TS0CHECK) if(s[2][k][j][i] < 0) { // actually detects nan too sprintf(tempc1,"%3f",s[2][k][j][i]); if(tempc1[0]=='n'){ fprintf(fail_file,"nan internal energy density error: k: %3d j: %3d i: %3d u: %15.10g \n",k,j,i,s[2][k][j][i]) ; } else if(s[2][k][j][i]<0) fprintf(fail_file,"negative internal energy density error: k: %3d j: %3d i: %3d en: %15.10g \n",k,j,i,s[2][k][j][i]) ; didfail=1; } if(s[1][k][j][i] < 0) { sprintf(tempc1,"%3f",s[1][k][j][i]); if(tempc1[0]=='n'){ fprintf(fail_file,"nan mass density error: k: %3d j: %3d i: %3d rho: %15.10g \n",k,j,i,s[1][k][j][i]) ; } else if(s[1][k][j][i]<0) fprintf(fail_file,"negative mass density error: k: %3d j: %3d i: %3d rho: %15.10g \n",k,j,i,s[1][k][j][i]) ; didfail=1; } #endif // not inlining this function for some reason, so slow in loop, so make .h file // idtcreate(idt2,k,j,i); #include "timestep.h" for(l=2;l<=NUMDTCHECKS;l++){ ftemp=idt2[l]; if(ftemp > dt2inv_max[l]){ dt2inv_max[l] = ftemp ; ks[l]=k; js[l]=j; is[l]=i; } if(CHECKDTLOW==1){ if(ftemp>SQIDTLOWEST){ timecheck(-l,idt2,k,j,i,0); didfail=1; fflush(fail_file); } } } }// end loop over domain if(CHECKDTLOW==1){ // check if any cpu has failure if(numprocs>1){ #if(USEMPI) MPI_Allreduce(&didfail, &didfail_full, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); #endif } else{ didfail_full=didfail; } if(didfail_full){ if(myid<=0){ fprintf(log_file,"timestep failure\n"); } if(DOGENDIAG){ diag(2); } if(DOAVGDIAG){ diagavg(2); } #if(USEMPI) MPI_Barrier(MPI_COMM_WORLD); // allow to finish diags #endif myexit(5); } } // find lowest constrainer on dt reall=2; for(l=3;l<=NUMDTCHECKS;l++){ if(dt2inv_max[l]>dt2inv_max[reall]){ reall=l; } } if(DODTDIAG){ // do check up on dominates of timestep for each type if((t>ttimestep)||(dt<DTLOWEST)){ // per cpu pure dt data for(l=2;l<=NUMDTCHECKS;l++){ timecheck(l,idt2,ks[l],js[l],is[l],reall); } fflush(logdt_file); ttimestep=t+DTtimestep; } } if(DOTSTEPDIAG){ if(t>ttimescale){ if(numprocs==1){ timescale(); // SUPERMARK -- need to fix timescale to be correct in new cpu setup ttimescale=t+DTtimescale; } } } // find lowest constrainer on dt due to visc if(dt2inv_max[8]>dt2inv_max[9]){ viscl=8; } else viscl=9; // find lowest constrainer on dt of non-viscosity type (next highest dt^2) nonvl=2; for(l=3;l<=NUMDTCHECKS;l++){ if(l==8) l=10; // skip viscosity if(dt2inv_max[l]>dt2inv_max[nonvl]){ nonvl=l; } } // communicate the lowest dt values to all cpus if(numprocs>1){ #if(USEMPI) MPI_Allreduce(&(dt2inv_max[reall]), &dt2invl[0], 1, MPI_FTYPE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&(dt2inv_max[viscl]), &dt2invl[1], 1, MPI_FTYPE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&(dt2inv_max[nonvl]), &dt2invl[2], 1, MPI_FTYPE, MPI_MAX, MPI_COMM_WORLD); #endif } else{ dt2invl[0]=dt2inv_max[reall]; dt2invl[1]=dt2inv_max[viscl]; dt2invl[2]=dt2inv_max[nonvl]; } if(TRYSUBCYCLE){ finaln=sqrt(dt2invl[1]/dt2invl[2]); // fraction of other dt to viscosity dt if(subcyclen<=1){ dtotherlowest=1.E+9; // used to check on subcycling below gosub=0; // assume by default won't be able to subcycle // if visc limits entire comp grid by factor of 2 in dt or more on other dts, then do subcycle if((!laststep)&&(finaln>=2.0) ){ // check if stable enough to subcycle // if(fabs( (dt2invl[1]-dtlast)/dtlast)<5.0){ // so stable, now find next lowest dt dt=1.0/sqrt(dt2invl[1]); // dt to be used on viscosity // number of subcycles over viscosity allowed given current estimates of dt subcyclen=(int)(floor(finaln)); // floor to be conservative on fraction // check if subcycle possible if(subcyclen>=2){// should be true! // setup subcycle tscycleto=t+dt*(FTYPE)(subcyclen); // time to cycle to if stable cycle tscyclefrom=t; // time starting subcycle dtlastscycle=1.0/sqrt(dt2invl[2]); // need to make sure not cycling past newest other dts nthsubcycle=1; // first subcycle is now gosub=1; } else{ fprintf(fail_file,"Unexpected failure in subcycle code: finaln: %15.10g subcyclen: %d\n",finaln,subcyclen); myexit(1); } //}// end if stable to subcycle }// end if viscosity limit and want to try to subcycle on it if(gosub==0){ // if no subcycling possible dt=1.0/sqrt(dt2invl[0]); // normal case of no subcycling subcyclen=1; nthsubcycle=0; } }// endif not subcycling else{ // if currently subcycling gosup=0; // assume no need to supercycle yet nthsubcycle++; // check to see if done with subcycling or need to quit // check to see if visc no longer limit and so supercycle(do all but visc up to viscs time) if(finaln<=2.0){ gosup=1; } else{// visc still limit dt=1.0/sqrt(dt2invl[0]); // trial dt assuming still going to subcycle(should be same as [1]) dtother=1.0/sqrt(dt2invl[2]); if(dtother<dtotherlowest){ dtotherlowest=dtother; } // check if prospective timestep for viscosity still keeps other terms t0+dt further down t to avoid overstepping the other limits based on current data if( (t+dt)>(tscyclefrom+dtotherlowest) ){ gosup=1; } } if(gosup){ // general setup for supercycle dt=(t-tscyclefrom); subcyclen=-1; tscycleto=t; t=tscyclefrom; nthsubcycle=0; } }// endif was/still are subcycling } else{ dt=1.0/sqrt(dt2invl[0]); // normal case of no subcycling } if(analoutput==6){ // for checking visc code dt=pow(invcour2*alpha_real/(dx[2][1][0]*dx[2][1][0]),-1.0); ftemp=pow(invcour2*alpha_real/(x[2][1][0]*x[2][1][0]*dx[2][2][N2/2]*dx[2][2][N2/2]),-1.0); if(ftemp<dt) ftemp=dt; } #if(USEMPI) #if(DEBUGMPI>0) // first check if correct place in code(debug) MPI_Allreduce(&nstep, &nstepmin, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); MPI_Allreduce(&nstep, &nstepmax, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); if( (nstep!=nstepmin)||(nstep!=nstepmax)){ fprintf(fail_file,"out of synch!\n"); fprintf(fail_file,"proc: %d dt nstep: %d\n",myid,nstep); fflush(fail_file); } #endif // don't need anymore // MPI_Allreduce(&dt, &dtrecv, 1, MPI_FTYPE, MPI_MIN, MPI_COMM_WORLD); //MPI_Barrier(MPI_COMM_WORLD); //dt=dtrecv; #endif // because first time step is bad if e.g. viscosity on and no v at first if(firsttime==1){ // tweak for given problem so starts out good if(dt>1.E-5) dt=1.E-5; firsttime=0; } // don't increase timestep by too much on subcycle or normal cycle. // don't check if supercycle since need to force non-visc back to visc time. if(subcyclen>=0){ if(dt > 1.3*dtlast) dt = 1.3*dtlast ; } /* don't step beyond end of run */ if(t + dt >= tf){ // last timestep laststep=1; if(subcyclen==1){ dt = tf - t ; reallaststep=1; } if(subcyclen==-1){ fprintf(fail_file,"shouldn't be here at end of run at super cycle\n"); myexit(1); } if(subcyclen>=2){ // just end subcycle and let next timestep() figure final dt, never subcycling again dt=(t-tscyclefrom); subcyclen=-1; tscycleto=t; t=tscyclefrom; nthsubcycle=0; reallaststep=0; } // make sure don't get pathological case of dt=0 on last step if(dt<SSMALL){ reallaststep=1; laststep=1; dt=SSMALL; } } }
void timestep(void) { FTYPE dtother; int i, j, k, l; FTYPE idt2[NUMDTCHECKS + 1]; int ks[NUMDTCHECKS + 1], js[NUMDTCHECKS + 1], is[NUMDTCHECKS + 1]; FTYPE dt2inv_max[NUMDTCHECKS + 1]; int didfail, didfail_full; FTYPE finaln; static int firsttime = 1; static FTYPE ttimestep = 0; static FTYPE dtlast; // for slow idtcreate change FTYPE bxa, bya, dv; FTYPE rho, u; FTYPE odx1, odx2, ods, odl; FTYPE valphen, valphen2, cs2; int reall, viscl, nonvl; FTYPE ftemp; FTYPE vel1, vel2; int gosub, gosup; FTYPE dt2invl[3]; static FTYPE dtotherlowest; static int laststep; if (visc_real == 1) { nu_compute(); } for (l = 2; l <= NUMDTCHECKS; l++) { dt2inv_max[l] = 0.; ks[l] = js[l] = is[l] = 0; } dtlast = dt; didfail = 0; didfail_full = 0; if (firsttime == 1) { ttimestep = t - 1.E-12; laststep = 0; } LOOP { #if(TS0CHECK) if (s[2][k][j][i] < 0) { // actually detects nan too sprintf(tempc1, "%3f", s[2][k][j][i]); if (tempc1[0] == 'n') { fprintf(fail_file, "nan internal energy density error: k: %3d j: %3d i: %3d u: %15.10g \n", k, j, i, s[2][k][j][i]); } else if (s[2][k][j][i] < 0) fprintf(fail_file, "negative internal energy density error: k: %3d j: %3d i: %3d en: %15.10g \n", k, j, i, s[2][k][j][i]); didfail = 1; } if (s[1][k][j][i] < 0) { sprintf(tempc1, "%3f", s[1][k][j][i]); if (tempc1[0] == 'n') { fprintf(fail_file, "nan mass density error: k: %3d j: %3d i: %3d rho: %15.10g \n", k, j, i, s[1][k][j][i]); } else if (s[1][k][j][i] < 0) fprintf(fail_file, "negative mass density error: k: %3d j: %3d i: %3d rho: %15.10g \n", k, j, i, s[1][k][j][i]); didfail = 1; } #endif // not inlining this function for some reason, so slow in loop, #include "timestep1.h" for (l = 2; l <= NUMDTCHECKS; l++) { ftemp = idt2[l]; if (ftemp > dt2inv_max[l]) { dt2inv_max[l] = ftemp; ks[l] = k; js[l] = j; is[l] = i; } #if(CHECKDTLOW==1) if (ftemp > SQIDTLOWEST) { timecheck(-l, idt2, k, j, i, 0); didfail = 1; fflush(fail_file); } #endif } } // end loop over domain #if(CHECKDTLOW==1) // check if any cpu has failure if (numprocs > 1) { } else { didfail_full = didfail; } if (didfail_full) { if (myid <= 0) { fprintf(log_file, "timestep failure\n"); } if (DOGENDIAG) { diag(2); } myexit(5); } #endif // find lowest constrainer on dt reall = 2; for (l = 3; l <= NUMDTCHECKS; l++) { if (dt2inv_max[l] > dt2inv_max[reall]) { reall = l; } } #if(DODTDIAG) // do check up on dominates of timestep for each type if (t > ttimestep) { // per cpu pure dt data for (l = 2; l <= NUMDTCHECKS; l++) { timecheck(l, idt2, ks[l], js[l], is[l], reall); } fflush(logdt_file); } #endif #if(DOTSTEPDIAG) if (t > ttimestep) { // output timescales (create own DTtimescale later) timescale(); } #endif #if((DODTDIAG)||(DOTSTEPDIAG)) ttimestep = t + DTtimestep; #endif // find lowest constrainer on dt due to visc if (dt2inv_max[8] > dt2inv_max[9]) { viscl = 8; } else viscl = 9; // find lowest constrainer on dt of non-viscosity type (next // highest // dt^2) nonvl = 2; for (l = 3; l <= NUMDTCHECKS; l++) { if (l == 8) l = 10; // skip viscosity if (dt2inv_max[l] > dt2inv_max[nonvl]) { nonvl = l; } } // communicate the lowest dt values to all cpus if (numprocs > 1) { } else { dt2invl[0] = dt2inv_max[reall]; dt2invl[1] = dt2inv_max[viscl]; dt2invl[2] = dt2inv_max[nonvl]; } dt = 1.0 / sqrt(dt2invl[0]); // normal case of no subcycling if (analoutput == 6) { // for checking visc code dt = pow(invcour2 * alpha_real / (dx[2][1][0] * dx[2][1][0]), -1.0); ftemp = pow(invcour2 * alpha_real / (x[2][1][0] * x[2][1][0] * dx[2][2][N2 / 2] * dx[2][2][N2 / 2]), -1.0); if (ftemp < dt) ftemp = dt; } // because first time step is bad if e.g. viscosity on and no v at // first if (firsttime == 1) { // tweak for given problem so starts // out // good if (dt > 1.E-5) dt = 1.E-5; firsttime = 0; } // don't increase timestep by too much on subcycle or normal cycle. // don't check if supercycle since need to force non-visc back to // visc // time. if (subcyclen >= 0) { if (dt > 1.3 * dtlast) dt = 1.3 * dtlast; } /* don't step beyond end of run */ if (t + dt >= tf) { // last timestep laststep = 1; if (subcyclen == 1) { dt = tf - t; reallaststep = 1; } if (subcyclen == -1) { fprintf(fail_file, "shouldn't be here at end of run at super cycle\n"); myexit(1); } if (subcyclen >= 2) { // just end subcycle and let next timestep() figure final // dt, // never subcycling again dt = (t - tscyclefrom); subcyclen = -1; tscycleto = t; t = tscyclefrom; nthsubcycle = 0; reallaststep = 0; } // make sure don't get pathological case of dt=0 on last step if (dt < SSMALL) { reallaststep = 1; laststep = 1; dt = SSMALL; } } }
main() { double dt, dtcoef; double tmax; double dtout; double time, tout; VECT x, v; VECT newx, newv; scanf("%le%le%le", &dtcoef, &tmax, &dtout); scanf("%le%le%le%le", &(x.x), &(x.y), &(v.x), &(v.y)); printf("dtcoef, tmax, dtout = %e %e %e\n", dtcoef, tmax, dtout); printf("x, v = %e %e %e %e\n", (x.x), (x.y), (v.x), (v.y)); time=0; tout = dtout; printenergy(x, v, time); while (time < tmax){ double newdt, dt0; int i; dt = timescale(x, v)*dtcoef; dt0=dt; #ifdef SIMPLE_SYMMETRIC for (i=0;i<5; i++){ leapfrog(x,v, &newx, &newv, dt); newdt=timescale(newx, newv)*dtcoef; dt = 0.5*(dt0+newdt); } #endif #ifdef CORRECTED_BLOCKSTEP dt=force2(dt0); if (fmod(time, dt*2)== 0.0) dt*=2; for (i=0;i<5; i++){ leapfrog(x,v, &newx, &newv, dt); newdt=timescale(newx, newv)*dtcoef; if (dt > 0.5*(dt0+newdt)) dt *= 0.5; } #endif #ifdef SIMPLE_BLOCKSTEP #define NSYM 5 dt = force2(dt); for (i=0;i<NSYM; i++){ leapfrog(x,v, &newx, &newv, dt); newdt=timescale(newx, newv)*dtcoef; if (i < NSYM-1){ dt = force2(0.5*(dt0+newdt)); } } #endif #ifdef MIN_BLOCKSTEP #define NSYM 5 { double steps[NSYM]; dt = force2(dt); for (i=0;i<NSYM; i++){ leapfrog(x,v, &newx, &newv, dt); newdt=timescale(newx, newv)*dtcoef; if (i < NSYM-1){ steps[i] = force2(0.5*(dt0+newdt)); if (i < NSYM-2) { dt = steps[i]; }else{ dt = (steps[i]>steps[i-1])? steps[i-1]:steps[i]; } } } } #endif time += dt; x=newx; v=newv; if (time >= tout){ printv(x, "x"); printv(v, "v"); printenergy(x, v, time); tout += dtout; } } }