void ClientCursor::idleTimeReport(unsigned millis) { bool foundSomeToTimeout = false; // two passes so that we don't need to readlock unless we really do some timeouts // we assume here that incrementing _idleAgeMillis outside readlock is ok. { recursive_scoped_lock lock(ccmutex); { unsigned sz = clientCursorsById.size(); static time_t last; if( sz >= 100000 ) { if( time(0) - last > 300 ) { last = time(0); log() << "warning number of open cursors is very large: " << sz << endl; } } } for ( CCById::iterator i = clientCursorsById.begin(); i != clientCursorsById.end(); ) { CCById::iterator j = i; i++; if( j->second->shouldTimeout( millis ) ) { foundSomeToTimeout = true; } } } if( foundSomeToTimeout ) { Lock::GlobalRead lk; recursive_scoped_lock cclock(ccmutex); CCById::const_iterator it = clientCursorsById.begin(); while (it != clientCursorsById.end()) { ClientCursor* cc = it->second; if( cc->shouldTimeout(0) ) { numberTimedOut++; LOG(1) << "killing old cursor " << cc->_cursorid << ' ' << cc->_ns << " idle:" << cc->idleTime() << "ms\n"; ClientCursor* toDelete = it->second; CursorId id = toDelete->cursorid(); // This is what winds up removing it from the map. delete toDelete; it = clientCursorsById.upper_bound(id); } else { ++it; } } } }
void SoftwareRendererImp::rasterize_triangle( float x0, float y0, float x1, float y1, float x2, float y2, Color color ) { //modify task 2: scale up the point location: /* x0 = 100.0 + 000.0; y0 = 000.0; x1 = 100.0 + 000.0; y1 = 100.0; x2 = 100.0 + 100.0; y2 = 100.0; */ //printf("Color: (%f, %f, %f, %f)\n", color.r, color.g, color.b, color.a); scale_point(x0, y0, sample_rate); scale_point(x1, y1, sample_rate); scale_point(x2, y2, sample_rate); // Task 2: // Implement triangle rasterization // Method 1: use bounding box, does not consider "edge rule" if (!cclock(x0, y0, x1, y1, x2, y2)) { //printf("before swap: (%f, %f, %f, %f)", x0, y0, x1, y1); swapPoints(x1, y1, x2, y2); //printf("after swap: (%f, %f, %f, %f)", x0, y0, x1, y1); } float lowX, highX, lowY, highY; lowX = (floor(min(min(x0, x1), x2))) + 0.5; lowY = (floor(min(min(y0, y1), y2))) + 0.5; highX = (ceil(max(max(x0, x1), x2))) - 0.5; highY = (ceil(max(max(y0, y1), y2))) - 0.5; for ( float x = lowX; x <= highX; x += 1 ) { for (float y = lowY; y <= highY; y += 1 ) { if (lineSide(x, y, x0, y0, x1, y1) && lineSide(x, y, x1, y1, x2, y2) && lineSide(x, y, x2, y2, x0, y0)) { rasterize_point(x, y, color); } } } }
void ClientCursor::invalidate(const StringData& ns) { Lock::assertWriteLocked(ns); size_t dot = ns.find( '.' ); verify( dot != string::npos ); // first (and only) dot is the last char bool isDB = dot == ns.size() - 1; Database *db = cc().database(); verify(db); verify(ns.startsWith(db->name())); recursive_scoped_lock cclock(ccmutex); // Look at all active non-cached Runners. These are the runners that are in auto-yield mode // that are not attached to the the client cursor. For example, all internal runners don't // need to be cached -- there will be no getMore. for (set<Runner*>::iterator it = nonCachedRunners.begin(); it != nonCachedRunners.end(); ++it) { Runner* runner = *it; const string& runnerNS = runner->ns(); if ( ( isDB && StringData(runnerNS).startsWith(ns) ) || ns == runnerNS ) { runner->kill(); } } // Look at all cached ClientCursor(s). The CC may have a Runner, a Cursor, or nothing (see // sharding_block.h). CCById::const_iterator it = clientCursorsById.begin(); while (it != clientCursorsById.end()) { ClientCursor* cc = it->second; // We're only interested in cursors over one db. if (cc->_db != db) { ++it; continue; } // Note that a valid ClientCursor state is "no cursor no runner." This is because // the set of active cursor IDs in ClientCursor is used as representation of query // state. See sharding_block.h. TODO(greg,hk): Move this out. if (NULL == cc->c() && NULL == cc->_runner.get()) { ++it; continue; } bool shouldDelete = false; // We will only delete CCs with runners that are not actively in use. The runners that // are actively in use are instead kill()-ed. if (NULL != cc->_runner.get()) { verify(NULL == cc->c()); if (isDB || cc->_runner->ns() == ns) { // If there is a pinValue >= 100, somebody is actively using the CC and we do // not delete it. Instead we notify the holder that we killed it. The holder // will then delete the CC. if (cc->_pinValue >= 100) { cc->_runner->kill(); } else { // pinvalue is <100, so there is nobody actively holding the CC. We can // safely delete it as nobody is holding the CC. shouldDelete = true; } } } // Begin cursor-only DEPRECATED else if (cc->c()->shouldDestroyOnNSDeletion()) { verify(NULL == cc->_runner.get()); if (isDB) { // already checked that db matched above dassert( StringData(cc->_ns).startsWith( ns ) ); shouldDelete = true; } else { if ( ns == cc->_ns ) { shouldDelete = true; } } } // End cursor-only DEPRECATED if (shouldDelete) { ClientCursor* toDelete = it->second; CursorId id = toDelete->cursorid(); delete toDelete; // We're not following the usual paradigm of saving it, ++it, and deleting the saved // 'it' because deleting 'it' might invalidate the next thing in clientCursorsById. // TODO: Why? it = clientCursorsById.upper_bound(id); } else { ++it; } } }
int main(int argc, char **argv) { double dt = 0; long nvtk = 0; char outnum[240]; long time_output = 0; long flops = 0; // double output_time = 0.0; double next_output_time = 0; double start_time = 0, end_time = 0; double start_iter = 0, end_iter = 0; double elaps = 0; double avgMcps = 0; long nAvgMcps = 0; #ifdef MPI MPI_Init(&argc, &argv); DeviceSet(); #endif if (H.mype == 1) fprintf(stdout, "Hydro starts.\n"); process_args(argc, argv, &H); hydro_init(&H, &Hv); // PRINTUOLD(H, &Hv); cuAllocOnDevice(H); // Allocate work space for 1D sweeps allocate_work_space(H, &Hw, &Hvw); // vtkfile(nvtk, H, &Hv); if (H.dtoutput > 0) { // outputs are in physical time not in time steps time_output = 1; next_output_time = next_output_time + H.dtoutput; } if (H.dtoutput > 0 || H.noutput > 0) vtkfile(++nvtk, H, &Hv); if (H.mype == 0) fprintf(stdout, "Hydro starts main loop.\n"); cuPutUoldOnDevice(H, &Hv); start_time = cclock(); // fprintf(stdout, "%lg %lg %d %d \n", H.t, H.tend, H.nstep, H.nstepmax); while ((H.t < H.tend) && (H.nstep < H.nstepmax)) { double iter_time = 0; flopsAri = flopsSqr = flopsMin = flopsTra = 0; start_iter = cclock(); outnum[0] = 0; flops = 0; if ((H.nstep % 2) == 0) { cuComputeDeltat(&dt, H, &Hw, &Hv, &Hvw); // fprintf(stdout, "dt=%lg\n", dt); if (H.nstep == 0) { dt = dt / 2.0; } if (H.nproc > 1) { #ifdef MPI double dtmin; int uno = 1; MPI_Allreduce(&dt, &dtmin, uno, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); dt = dtmin; #endif } } if ((H.nstep % 2) == 0) { cuHydroGodunov(1, dt, H, &Hv, &Hw, &Hvw); } else { cuHydroGodunov(2, dt, H, &Hv, &Hw, &Hvw); } end_iter = cclock(); iter_time = (double) (end_iter - start_iter); H.nstep++; H.t += dt; { double iter_time = (double) (end_iter - start_iter); #ifdef MPI long flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t; MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); // if (H.mype == 1) // printf("%ld %ld %ld %ld %ld %ld %ld %ld \n", flopsAri, flopsSqr, flopsMin, flopsTra, flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t); flops = flopsAri_t * FLOPSARI + flopsSqr_t * FLOPSSQR + flopsMin_t * FLOPSMIN + flopsTra_t * FLOPSTRA; #else flops = flopsAri * FLOPSARI + flopsSqr * FLOPSSQR + flopsMin * FLOPSMIN + flopsTra * FLOPSTRA; #endif nbFLOPS++; if (flops > 0) { if (iter_time > 1.e-9) { double mflops = (double) flops / (double) 1.e+6 / iter_time; MflopsSUM += mflops; sprintf(outnum, "%s {%.2f Mflops %ld Ops} (%.3fs)", outnum, mflops, flops, iter_time); } } else { sprintf(outnum, "%s (%.3fs)", outnum, iter_time); } } if (iter_time > 1.e-9) { double mcps = ((double) H.globnx * (double) H.globny) / iter_time / 1e6l; if (H.nstep > 5) { sprintf(outnum, "%s (%.1lf MC/s)", outnum, mcps); nAvgMcps++; avgMcps += mcps; } } if (time_output == 0 && H.noutput > 0) { if ((H.nstep % H.noutput) == 0) { cuGetUoldFromDevice(H, &Hv); vtkfile(++nvtk, H, &Hv); sprintf(outnum, "%s [%04ld]", outnum, nvtk); } } else { if (time_output == 1 && H.t >= next_output_time) { cuGetUoldFromDevice(H, &Hv); vtkfile(++nvtk, H, &Hv); next_output_time = next_output_time + H.dtoutput; sprintf(outnum, "%s [%04ld]", outnum, nvtk); } } if (H.mype == 0) { fprintf(stdout, "--> step=%-4ld %12.5e, %10.5e %s\n", H.nstep, H.t, dt, outnum); fflush(stdout); } } end_time = cclock(); hydro_finish(H, &Hv); cuFreeOnDevice(); // Deallocate work space deallocate_work_space(H, &Hw, &Hvw); elaps = (double) (end_time - start_time); timeToString(outnum, elaps); if (H.mype == 0) fprintf(stdout, "Hydro ends in %ss (%.3lf) <%.2lf MFlops>.\n", outnum, elaps, (float) (MflopsSUM / nbFLOPS)); if (H.mype == 0) { avgMcps /= nAvgMcps; fprintf(stdout, "Average MC/s: %.1lf\n", avgMcps); } #ifdef MPI MPI_Finalize(); #endif return 0; }
int main(int argc, char **argv) { int nb_th=1; double dt = 0; long nvtk = 0; char outnum[80]; long time_output = 0; // double output_time = 0.0; double next_output_time = 0; double start_time = 0, end_time = 0; double start_iter = 0, end_iter = 0; double elaps = 0; start_time = cclock(); process_args(argc, argv, &H); hydro_init(&H, &Hv); PRINTUOLD(H, &Hv); printf("Hydro starts - sequential version \n"); // vtkfile(nvtk, H, &Hv); if (H.dtoutput > 0) { // outputs are in physical time not in time steps time_output = 1; next_output_time = next_output_time + H.dtoutput; } while ((H.t < H.tend) && (H.nstep < H.nstepmax)) { start_iter = cclock(); outnum[0] = 0; flops = 0; if ((H.nstep % 2) == 0) { compute_deltat(&dt, H, &Hw, &Hv, &Hvw); if (H.nstep == 0) { dt = dt / 2.0; } } if ((H.nstep % 2) == 0) { hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw); hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw); } else { hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw); hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw); } end_iter = cclock(); H.nstep++; H.t += dt; if (flops > 0) { double iter_time = (double) (end_iter - start_iter); if (iter_time > 1.e-9) { double mflops = (double) flops / (double) 1.e+6 / iter_time; sprintf(outnum, "%s {%.3f Mflops} (%.3fs)", outnum, mflops, iter_time); } } else { double iter_time = (double) (end_iter - start_iter); sprintf(outnum, "%s (%.3fs)", outnum, iter_time); } if (time_output == 0) { if ((H.nstep % H.noutput) == 0) { vtkfile(++nvtk, H, &Hv); sprintf(outnum, "%s [%04ld]", outnum, nvtk); } } else { if (H.t >= next_output_time) { vtkfile(++nvtk, H, &Hv); next_output_time = next_output_time + H.dtoutput; sprintf(outnum, "%s [%04ld]", outnum, nvtk); } } fprintf(stdout, "--> step=%-4ld %12.5e, %10.5e %s\n", H.nstep, H.t, dt, outnum); } // end while loop hydro_finish(H, &Hv); end_time = cclock(); elaps = (double) (end_time - start_time); timeToString(outnum, elaps); fprintf(stdout, "Hydro ends in %ss (%.3lf).\n", outnum, elaps); return 0; }
int main(int argc, char **argv) { real_t dt = 0; int nvtk = 0; char outnum[80]; int time_output = 0; long flops = 0; // real_t output_time = 0.0; real_t next_output_time = 0; double start_time = 0, end_time = 0; double start_iter = 0, end_iter = 0; double elaps = 0; struct timespec start, end; // array of timers to profile the code memset(functim, 0, TIM_END * sizeof(functim[0])); #ifdef MPI MPI_Init(&argc, &argv); #endif process_args(argc, argv, &H); hydro_init(&H, &Hv); if (H.mype == 0) fprintf(stdout, "Hydro starts in %s precision.\n", ((sizeof(real_t) == sizeof(double))? "double": "single")); #ifdef _OPENMP if (H.mype == 0) { fprintf(stdout, "Hydro: OpenMP mode ON\n"); fprintf(stdout, "Hydro: OpenMP %d max threads\n", omp_get_max_threads()); fprintf(stdout, "Hydro: OpenMP %d num threads\n", omp_get_num_threads()); fprintf(stdout, "Hydro: OpenMP %d num procs\n", omp_get_num_procs()); } #endif #ifdef MPI if (H.mype == 0) { fprintf(stdout, "Hydro: MPI run with %d procs\n", H.nproc); } #else fprintf(stdout, "Hydro: standard build\n"); #endif // PRINTUOLD(H, &Hv); #ifdef MPI if (H.nproc > 1) MPI_Barrier(MPI_COMM_WORLD); #endif if (H.dtoutput > 0) { // outputs are in physical time not in time steps time_output = 1; next_output_time = next_output_time + H.dtoutput; } if (H.dtoutput > 0 || H.noutput > 0) vtkfile(++nvtk, H, &Hv); if (H.mype == 0) fprintf(stdout, "Hydro starts main loop.\n"); //pre-allocate memory before entering in loop //For godunov scheme start = cclock(); start = cclock(); allocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov); compute_deltat_init_mem(H, &Hw_deltat, &Hvw_deltat); end = cclock(); if (H.mype == 0) fprintf(stdout, "Hydro: init mem %lfs\n", ccelaps(start, end)); // we start timings here to avoid the cost of initial memory allocation start_time = dcclock(); while ((H.t < H.tend) && (H.nstep < H.nstepmax)) { // reset perf counter for this iteration flopsAri = flopsSqr = flopsMin = flopsTra = 0; start_iter = dcclock(); outnum[0] = 0; if ((H.nstep % 2) == 0) { dt = 0; // if (H.mype == 0) fprintf(stdout, "Hydro computes deltat.\n"); start = cclock(); compute_deltat(&dt, H, &Hw_deltat, &Hv, &Hvw_deltat); end = cclock(); functim[TIM_COMPDT] += ccelaps(start, end); if (H.nstep == 0) { dt = dt / 2.0; } #ifdef MPI if (H.nproc > 1) { real_t dtmin; // printf("pe=%4d\tdt=%lg\n",H.mype, dt); if (sizeof(real_t) == sizeof(double)) { MPI_Allreduce(&dt, &dtmin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); } else { MPI_Allreduce(&dt, &dtmin, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD); } dt = dtmin; } #endif } // if (H.mype == 1) fprintf(stdout, "Hydro starts godunov.\n"); if ((H.nstep % 2) == 0) { hydro_godunov(1, dt, H, &Hv, &Hw_godunov, &Hvw_godunov); // hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw); } else { hydro_godunov(2, dt, H, &Hv, &Hw_godunov, &Hvw_godunov); // hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw); } end_iter = dcclock(); H.nstep++; H.t += dt; { real_t iter_time = (real_t) (end_iter - start_iter); #ifdef MPI long flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t; start = cclock(); MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD); // if (H.mype == 1) // printf("%ld %ld %ld %ld %ld %ld %ld %ld \n", flopsAri, flopsSqr, flopsMin, flopsTra, flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t); flops = flopsAri_t * FLOPSARI + flopsSqr_t * FLOPSSQR + flopsMin_t * FLOPSMIN + flopsTra_t * FLOPSTRA; end = cclock(); functim[TIM_ALLRED] += ccelaps(start, end); #else flops = flopsAri * FLOPSARI + flopsSqr * FLOPSSQR + flopsMin * FLOPSMIN + flopsTra * FLOPSTRA; #endif nbFLOPS++; if (flops > 0) { if (iter_time > 1.e-9) { double mflops = (double) flops / (double) 1.e+6 / iter_time; MflopsSUM += mflops; sprintf(outnum, "%s {%.2f Mflops %ld Ops} (%.3fs)", outnum, mflops, flops, iter_time); } } else { sprintf(outnum, "%s (%.3fs)", outnum, iter_time); } } if (time_output == 0 && H.noutput > 0) { if ((H.nstep % H.noutput) == 0) { vtkfile(++nvtk, H, &Hv); sprintf(outnum, "%s [%04d]", outnum, nvtk); } } else { if (time_output == 1 && H.t >= next_output_time) { vtkfile(++nvtk, H, &Hv); next_output_time = next_output_time + H.dtoutput; sprintf(outnum, "%s [%04d]", outnum, nvtk); } } if (H.mype == 0) { fprintf(stdout, "--> step=%4d, %12.5e, %10.5e %s\n", H.nstep, H.t, dt, outnum); fflush(stdout); } } // while end_time = dcclock(); // Deallocate work spaces deallocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov); compute_deltat_clean_mem(H, &Hw_deltat, &Hvw_deltat); hydro_finish(H, &Hv); elaps = (double) (end_time - start_time); timeToString(outnum, elaps); if (H.mype == 0) { fprintf(stdout, "Hydro ends in %ss (%.3lf) <%.2lf MFlops>.\n", outnum, elaps, (float) (MflopsSUM / nbFLOPS)); fprintf(stdout, " "); } if (H.nproc == 1) { int sizeFmt = sizeLabel(functim, TIM_END); printTimingsLabel(TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "PE0 "); printTimings(functim, TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "%% "); percentTimings(functim, TIM_END); printTimings(functim, TIM_END, sizeFmt); fprintf(stdout, "\n"); } #ifdef MPI if (H.nproc > 1) { double timMAX[TIM_END]; double timMIN[TIM_END]; double timSUM[TIM_END]; MPI_Allreduce(functim, timMAX, TIM_END, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(functim, timMIN, TIM_END, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); MPI_Allreduce(functim, timSUM, TIM_END, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); if (H.mype == 0) { int sizeFmt = sizeLabel(timMAX, TIM_END); printTimingsLabel(TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "MIN "); printTimings(timMIN, TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "MAX "); printTimings(timMAX, TIM_END, sizeFmt); fprintf(stdout, "\n"); fprintf(stdout, "AVG "); avgTimings(timSUM, TIM_END, H.nproc); printTimings(timSUM, TIM_END, sizeFmt); fprintf(stdout, "\n"); } } #endif #ifdef MPI MPI_Finalize(); #endif return 0; }
void oclHydroGodunov(long idimStart, real_t dt, const hydroparam_t H, hydrovar_t * Hv, hydrowork_t * Hw, hydrovarwork_t * Hvw) { cl_int status; // Local variables struct timespec start, end; int i, j, idim, idimIndex; int Hmin, Hmax, Hstep, Hnxystep; int Hdimsize; int Hndim_1; int slices, iend; real_t dtdx; size_t lVarSz = H.arVarSz * H.nxystep * sizeof(real_t); long Hnxyt = H.nxyt; int clear = 0; static FILE *fic = NULL; if (fic == NULL && H.prt) { char logname[256]; sprintf(logname, "TRACE.%04d_%04d.txt", H.nproc, H.mype); fic = fopen(logname, "w"); } WHERE("hydro_godunov"); if (H.prt) fprintf(fic, "loop dt=%lg\n", dt); for (idimIndex = 0; idimIndex < 2; idimIndex++) { idim = (idimStart - 1 + idimIndex) % 2 + 1; // constant // constant dtdx = dt / H.dx; // Update boundary conditions if (H.prt) { fprintf(fic, "godunov %d\n", idim); PRINTUOLD(fic, H, Hv); } #define GETUOLD oclGetUoldFromDevice(H, Hv) start = cclock(); oclMakeBoundary(idim, H, Hv, uoldDEV); end = cclock(); functim[TIM_MAKBOU] += ccelaps(start, end); if (H.prt) {fprintf(fic, "MakeBoundary\n");} if (H.prt) {GETUOLD; PRINTUOLD(fic, H, Hv);} if (idim == 1) { Hmin = H.jmin + ExtraLayer; Hmax = H.jmax - ExtraLayer; Hdimsize = H.nxt; Hndim_1 = H.nx + 1; Hstep = H.nxystep; } else { Hmin = H.imin + ExtraLayer; Hmax = H.imax - ExtraLayer; Hdimsize = H.nyt; Hndim_1 = H.ny + 1; Hstep = H.nxystep; } Hnxystep = Hstep; for (i = Hmin; i < Hmax; i += Hstep) { long offsetIP = IHVWS(0, 0, IP); long offsetID = IHVWS(0, 0, ID); int Hnxyt = H.nxyt; iend = i + Hstep; if (iend >= Hmax) iend = Hmax; slices = iend - i; if (clear) oclMemset(uDEV, 0, lVarSz); start = cclock(); oclGatherConservativeVars(idim, i, H.imin, H.imax, H.jmin, H.jmax, H.nvar, H.nxt, H.nyt, H.nxyt, slices, Hnxystep, uoldDEV, uDEV); end = cclock(); functim[TIM_GATCON] += ccelaps(start, end); if (H.prt) {fprintf(fic, "ConservativeVars %ld %ld %ld %ld %d %d\n", H.nvar, H.nxt, H.nyt, H.nxyt, slices, Hstep);} if (H.prt) { GETARRV(uDEV, Hvw->u); } PRINTARRAYV2(fic, Hvw->u, Hdimsize, "u", H); // Convert to primitive variables start = cclock(); oclConstoprim(Hdimsize, H.nxyt, H.nvar, H.smallr, slices, Hnxystep, uDEV, qDEV, eDEV); end = cclock(); functim[TIM_CONPRI] += ccelaps(start, end); if (H.prt) { GETARR (eDEV, Hw->e); } if (H.prt) { GETARRV(qDEV, Hvw->q); } PRINTARRAY(fic, Hw->e, Hdimsize, "e", H); PRINTARRAYV2(fic, Hvw->q, Hdimsize, "q", H); start = cclock(); oclEquationOfState(offsetIP, offsetID, 0, Hdimsize, H.smallc, H.gamma, slices, H.nxyt, qDEV, eDEV, cDEV); end = cclock(); functim[TIM_EOS] += ccelaps(start, end); if (H.prt) { GETARR (cDEV, Hw->c); } PRINTARRAY(fic, Hw->c, Hdimsize, "c", H); if (H.prt) { GETARRV (qDEV, Hvw->q); } PRINTARRAYV2(fic, Hvw->q, Hdimsize, "q", H); if (clear) oclMemset(dqDEV, 0, H.arVarSz * H.nxystep); // Characteristic tracing if (H.iorder != 1) { if (clear) oclMemset(dqDEV, 0, H.arVarSz); start = cclock(); oclSlope(Hdimsize, H.nvar, H.nxyt, H.slope_type, slices, Hstep, qDEV, dqDEV); end = cclock(); functim[TIM_SLOPE] += ccelaps(start, end); if (H.prt) { GETARRV(dqDEV, Hvw->dq); } PRINTARRAYV2(fic, Hvw->dq, Hdimsize, "dq", H); } start = cclock(); oclTrace(dtdx, Hdimsize, H.scheme, H.nvar, H.nxyt, slices, Hstep, qDEV, dqDEV, cDEV, qxmDEV, qxpDEV); end = cclock(); functim[TIM_TRACE] += ccelaps(start, end); if (H.prt) { GETARRV(qxmDEV, Hvw->qxm); } if (H.prt) { GETARRV(qxpDEV, Hvw->qxp); } PRINTARRAYV2(fic, Hvw->qxm, Hdimsize, "qxm", H); PRINTARRAYV2(fic, Hvw->qxp, Hdimsize, "qxp", H); start = cclock(); oclQleftright(idim, H.nx, H.ny, H.nxyt, H.nvar, slices, Hstep, qxmDEV, qxpDEV, qleftDEV, qrightDEV); end = cclock(); functim[TIM_QLEFTR] += ccelaps(start, end); if (H.prt) { GETARRV(qleftDEV, Hvw->qleft); } if (H.prt) { GETARRV(qrightDEV, Hvw->qright); } PRINTARRAYV2(fic, Hvw->qleft, Hdimsize, "qleft", H); PRINTARRAYV2(fic, Hvw->qright, Hdimsize, "qright", H); // Solve Riemann problem at interfaces start = cclock(); oclRiemann(Hndim_1, H.smallr, H.smallc, H.gamma, H.niter_riemann, H.nvar, H.nxyt, slices, Hstep, qleftDEV, qrightDEV, qgdnvDEV,sgnmDEV); end = cclock(); functim[TIM_RIEMAN] += ccelaps(start, end); if (H.prt) { GETARRV(qgdnvDEV, Hvw->qgdnv); } PRINTARRAYV2(fic, Hvw->qgdnv, Hdimsize, "qgdnv", H); // Compute fluxes if (clear) oclMemset(fluxDEV, 0, H.arVarSz); start = cclock(); oclCmpflx(Hdimsize, H.nxyt, H.nvar, H.gamma, slices, Hnxystep, qgdnvDEV, fluxDEV); end = cclock(); functim[TIM_CMPFLX] += ccelaps(start, end); if (H.prt) { GETARRV(fluxDEV, Hvw->flux); } PRINTARRAYV2(fic, Hvw->flux, Hdimsize, "flux", H); if (H.prt) { GETARRV(uDEV, Hvw->u); } PRINTARRAYV2(fic, Hvw->u, Hdimsize, "u", H); // if (H.prt) { // GETUOLD; PRINTUOLD(fic, H, Hv); // } if (H.prt) fprintf(fic, "dxdt=%lg\n", dtdx); start = cclock(); oclUpdateConservativeVars(idim, i, dtdx, H.imin, H.imax, H.jmin, H.jmax, H.nvar, H.nxt, H.nyt, H.nxyt, slices, Hnxystep, uoldDEV, uDEV, fluxDEV); end = cclock(); functim[TIM_UPDCON] += ccelaps(start, end); if (H.prt) { GETUOLD; PRINTUOLD(fic, H, Hv); } } // for j if (H.prt) { // printf("After pass %d\n", idim); PRINTUOLD(fic, H, Hv); } } } // hydro_godunov
// variables auxiliaires pour mettre en place le mode resident de HMPP void hydro_godunov(int idimStart, real_t dt, const hydroparam_t H, hydrovar_t * Hv, hydrowork_t * Hw, hydrovarwork_t * Hvw) { // Local variables struct timespec start, end; int j; real_t dtdx; int clear=0; real_t (*e)[H.nxyt]; real_t (*flux)[H.nxystep][H.nxyt]; real_t (*qleft)[H.nxystep][H.nxyt]; real_t (*qright)[H.nxystep][H.nxyt]; real_t (*c)[H.nxyt]; real_t *uold; int (*sgnm)[H.nxyt]; real_t (*qgdnv)[H.nxystep][H.nxyt]; real_t (*u)[H.nxystep][H.nxyt]; real_t (*qxm)[H.nxystep][H.nxyt]; real_t (*qxp)[H.nxystep][H.nxyt]; real_t (*q)[H.nxystep][H.nxyt]; real_t (*dq)[H.nxystep][H.nxyt]; static FILE *fic = NULL; if (fic == NULL && H.prt == 1) { char logname[256]; sprintf(logname, "TRACE.%04d_%04d.txt", H.nproc, H.mype); fic = fopen(logname, "w"); } WHERE("hydro_godunov"); // int hmppGuard = 1; int idimIndex = 0; for (idimIndex = 0; idimIndex < 2; idimIndex++) { int idim = (idimStart - 1 + idimIndex) % 2 + 1; // constant dtdx = dt / H.dx; // Update boundary conditions if (H.prt) { fprintf(fic, "godunov %d\n", idim); PRINTUOLD(fic, H, Hv); } // if (H.mype == 1) fprintf(fic, "Hydro makes boundary.\n"); start = cclock(); make_boundary(idim, H, Hv); end = cclock(); functim[TIM_MAKBOU] += ccelaps(start, end); if (H.prt) {fprintf(fic, "MakeBoundary\n");} PRINTUOLD(fic, H, Hv); uold = Hv->uold; qgdnv = (real_t (*)[H.nxystep][H.nxyt]) Hvw->qgdnv; flux = (real_t (*)[H.nxystep][H.nxyt]) Hvw->flux; c = (real_t (*)[H.nxyt]) Hw->c; e = (real_t (*)[H.nxyt]) Hw->e; qleft = (real_t (*)[H.nxystep][H.nxyt]) Hvw->qleft; qright = (real_t (*)[H.nxystep][H.nxyt]) Hvw->qright; sgnm = (int (*)[H.nxyt]) Hw->sgnm; q = (real_t (*)[H.nxystep][H.nxyt]) Hvw->q; dq = (real_t (*)[H.nxystep][H.nxyt]) Hvw->dq; u = (real_t (*)[H.nxystep][H.nxyt]) Hvw->u; qxm = (real_t (*)[H.nxystep][H.nxyt]) Hvw->qxm; qxp = (real_t (*)[H.nxystep][H.nxyt]) Hvw->qxp; int Hmin, Hmax, Hstep; int Hdimsize; int Hndim_1; if (idim == 1) { Hmin = H.jmin + ExtraLayer; Hmax = H.jmax - ExtraLayer; Hdimsize = H.nxt; Hndim_1 = H.nx + 1; Hstep = H.nxystep; } else { Hmin = H.imin + ExtraLayer; Hmax = H.imax - ExtraLayer; Hdimsize = H.nyt; Hndim_1 = H.ny + 1; Hstep = H.nxystep; } if (!H.nstep && idim == 1) { /* LM -- HERE a more secure implementation should be used: a new parameter ? */ } // if (H.mype == 1) fprintf(fic, "Hydro computes slices.\n"); for (j = Hmin; j < Hmax; j += Hstep) { // we try to compute many slices each pass int jend = j + Hstep; if (jend >= Hmax) jend = Hmax; int slices = jend - j; // numbre of slices to compute // fprintf(stderr, "Godunov idim=%d, j=%d %d \n", idim, j, slices); if (clear) Dmemset((H.nxyt) * H.nxystep * H.nvar, (real_t *) dq, 0); start = cclock(); gatherConservativeVars(idim, j, H.imin, H.imax, H.jmin, H.jmax, H.nvar, H.nxt, H.nyt, H.nxyt, slices, Hstep, uold, u); end = cclock(); functim[TIM_GATCON] += ccelaps(start, end); if (H.prt) {fprintf(fic, "ConservativeVars %d %d %d %d %d %d\n", H.nvar, H.nxt, H.nyt, H.nxyt, slices, Hstep);} PRINTARRAYV2(fic, u, Hdimsize, "u", H); if (clear) Dmemset((H.nxyt) * H.nxystep * H.nvar, (real_t *) dq, 0); // Convert to primitive variables start = cclock(); constoprim(Hdimsize, H.nxyt, H.nvar, H.smallr, slices, Hstep, u, q, e); end = cclock(); functim[TIM_CONPRI] += ccelaps(start, end); PRINTARRAY(fic, e, Hdimsize, "e", H); PRINTARRAYV2(fic, q, Hdimsize, "q", H); start = cclock(); equation_of_state(0, Hdimsize, H.nxyt, H.nvar, H.smallc, H.gamma, slices, Hstep, e, q, c); end = cclock(); functim[TIM_EOS] += ccelaps(start, end); PRINTARRAY(fic, c, Hdimsize, "c", H); PRINTARRAYV2(fic, q, Hdimsize, "q", H); // Characteristic tracing if (H.iorder != 1) { start = cclock(); slope(Hdimsize, H.nvar, H.nxyt, H.slope_type, slices, Hstep, q, dq); end = cclock(); functim[TIM_SLOPE] += ccelaps(start, end); PRINTARRAYV2(fic, dq, Hdimsize, "dq", H); } if (clear) Dmemset(H.nxyt * H.nxystep * H.nvar, (real_t *) qxm, 0); if (clear) Dmemset(H.nxyt * H.nxystep * H.nvar, (real_t *) qxp, 0); if (clear) Dmemset(H.nxyt * H.nxystep * H.nvar, (real_t *) qleft, 0); if (clear) Dmemset(H.nxyt * H.nxystep * H.nvar, (real_t *) qright, 0); if (clear) Dmemset(H.nxyt * H.nxystep * H.nvar, (real_t *) flux, 0); if (clear) Dmemset(H.nxyt * H.nxystep * H.nvar, (real_t *) qgdnv, 0); start = cclock(); trace(dtdx, Hdimsize, H.scheme, H.nvar, H.nxyt, slices, Hstep, q, dq, c, qxm, qxp); end = cclock(); functim[TIM_TRACE] += ccelaps(start, end); PRINTARRAYV2(fic, qxm, Hdimsize, "qxm", H); PRINTARRAYV2(fic, qxp, Hdimsize, "qxp", H); start = cclock(); qleftright(idim, H.nx, H.ny, H.nxyt, H.nvar, slices, Hstep, qxm, qxp, qleft, qright); end = cclock(); functim[TIM_QLEFTR] += ccelaps(start, end); PRINTARRAYV2(fic, qleft, Hdimsize, "qleft", H); PRINTARRAYV2(fic, qright, Hdimsize, "qright", H); start = cclock(); riemann(Hndim_1, H.smallr, H.smallc, H.gamma, H.niter_riemann, H.nvar, H.nxyt, slices, Hstep, qleft, qright, qgdnv, sgnm, Hw); end = cclock(); functim[TIM_RIEMAN] += ccelaps(start, end); PRINTARRAYV2(fic, qgdnv, Hdimsize, "qgdnv", H); start = cclock(); cmpflx(Hdimsize, H.nxyt, H.nvar, H.gamma, slices, Hstep, qgdnv, flux); end = cclock(); functim[TIM_CMPFLX] += ccelaps(start, end); PRINTARRAYV2(fic, flux, Hdimsize, "flux", H); PRINTARRAYV2(fic, u, Hdimsize, "u", H); start = cclock(); updateConservativeVars(idim, j, dtdx, H.imin, H.imax, H.jmin, H.jmax, H.nvar, H.nxt, H.nyt, H.nxyt, slices, Hstep, uold, u, flux); end = cclock(); functim[TIM_UPDCON] += ccelaps(start, end); PRINTUOLD(fic, H, Hv); } // for j if (H.prt) { // printf("[%d] After pass %d\n", H.mype, idim); PRINTUOLD(fic, H, Hv); } } if ((H.t + dt >= H.tend) || (H.nstep + 1 >= H.nstepmax)) { /* LM -- HERE a more secure implementation should be used: a new parameter ? */ } } // hydro_godunov