Exemple #1
0
    void ClientCursor::idleTimeReport(unsigned millis) {
        bool foundSomeToTimeout = false;

        // two passes so that we don't need to readlock unless we really do some timeouts
        // we assume here that incrementing _idleAgeMillis outside readlock is ok.
        {
            recursive_scoped_lock lock(ccmutex);
            {
                unsigned sz = clientCursorsById.size();
                static time_t last;
                if( sz >= 100000 ) { 
                    if( time(0) - last > 300 ) {
                        last = time(0);
                        log() << "warning number of open cursors is very large: " << sz << endl;
                    }
                }
            }
            for ( CCById::iterator i = clientCursorsById.begin(); i != clientCursorsById.end();  ) {
                CCById::iterator j = i;
                i++;
                if( j->second->shouldTimeout( millis ) ) {
                    foundSomeToTimeout = true;
                }
            }
        }

        if( foundSomeToTimeout ) {
            Lock::GlobalRead lk;

            recursive_scoped_lock cclock(ccmutex);
            CCById::const_iterator it = clientCursorsById.begin();
            while (it != clientCursorsById.end()) {
                ClientCursor* cc = it->second;
                if( cc->shouldTimeout(0) ) {
                    numberTimedOut++;
                    LOG(1) << "killing old cursor " << cc->_cursorid << ' ' << cc->_ns
                           << " idle:" << cc->idleTime() << "ms\n";
                    ClientCursor* toDelete = it->second;
                    CursorId id = toDelete->cursorid();
                    // This is what winds up removing it from the map.
                    delete toDelete;
                    it = clientCursorsById.upper_bound(id);
                }
                else {
                    ++it;
                }
            }
        }
    }
void SoftwareRendererImp::rasterize_triangle( float x0, float y0,
                                              float x1, float y1,
                                              float x2, float y2,
                                              Color color ) {

  //modify task 2: scale up the point location:
  /*
  x0 = 100.0 + 000.0;
  y0 = 000.0;
  x1 = 100.0 + 000.0;
  y1 = 100.0;
  x2 = 100.0 + 100.0;
  y2 = 100.0;
  */
  //printf("Color: (%f, %f, %f, %f)\n", color.r, color.g, color.b, color.a);

  scale_point(x0, y0, sample_rate);
  scale_point(x1, y1, sample_rate);
  scale_point(x2, y2, sample_rate);

  // Task 2:
  // Implement triangle rasterization
  // Method 1: use bounding box, does not consider "edge rule"
  if (!cclock(x0, y0, x1, y1, x2, y2)) {
    //printf("before swap: (%f, %f, %f, %f)", x0, y0, x1, y1);
    swapPoints(x1, y1, x2, y2);
    //printf("after swap: (%f, %f, %f, %f)", x0, y0, x1, y1);
  }

  float lowX, highX, lowY, highY;
  lowX = (floor(min(min(x0, x1), x2))) + 0.5;
  lowY = (floor(min(min(y0, y1), y2))) + 0.5;
  highX = (ceil(max(max(x0, x1), x2))) - 0.5;
  highY = (ceil(max(max(y0, y1), y2))) - 0.5;

  for ( float x = lowX; x <= highX; x += 1 ) {
    for (float y = lowY; y <= highY; y += 1 )  {
      if (lineSide(x, y, x0, y0, x1, y1) &&
          lineSide(x, y, x1, y1, x2, y2) &&
          lineSide(x, y, x2, y2, x0, y0)) {
          rasterize_point(x, y, color);
      }
    }
  }
}
Exemple #3
0
    void ClientCursor::invalidate(const StringData& ns) {
        Lock::assertWriteLocked(ns);

        size_t dot = ns.find( '.' );
        verify( dot != string::npos );

        // first (and only) dot is the last char
        bool isDB = dot == ns.size() - 1;

        Database *db = cc().database();
        verify(db);
        verify(ns.startsWith(db->name()));

        recursive_scoped_lock cclock(ccmutex);
        // Look at all active non-cached Runners.  These are the runners that are in auto-yield mode
        // that are not attached to the the client cursor. For example, all internal runners don't
        // need to be cached -- there will be no getMore.
        for (set<Runner*>::iterator it = nonCachedRunners.begin(); it != nonCachedRunners.end();
             ++it) {

            Runner* runner = *it;
            const string& runnerNS = runner->ns();
            if ( ( isDB && StringData(runnerNS).startsWith(ns) ) || ns == runnerNS ) {
                runner->kill();
            }
        }

        // Look at all cached ClientCursor(s).  The CC may have a Runner, a Cursor, or nothing (see
        // sharding_block.h).
        CCById::const_iterator it = clientCursorsById.begin();
        while (it != clientCursorsById.end()) {
            ClientCursor* cc = it->second;

            // We're only interested in cursors over one db.
            if (cc->_db != db) {
                ++it;
                continue;
            }

            // Note that a valid ClientCursor state is "no cursor no runner."  This is because
            // the set of active cursor IDs in ClientCursor is used as representation of query
            // state.  See sharding_block.h.  TODO(greg,hk): Move this out.
            if (NULL == cc->c() && NULL == cc->_runner.get()) {
                ++it;
                continue;
            }

            bool shouldDelete = false;

            // We will only delete CCs with runners that are not actively in use.  The runners that
            // are actively in use are instead kill()-ed.
            if (NULL != cc->_runner.get()) {
                verify(NULL == cc->c());

                if (isDB || cc->_runner->ns() == ns) {
                    // If there is a pinValue >= 100, somebody is actively using the CC and we do
                    // not delete it.  Instead we notify the holder that we killed it.  The holder
                    // will then delete the CC.
                    if (cc->_pinValue >= 100) {
                        cc->_runner->kill();
                    }
                    else {
                        // pinvalue is <100, so there is nobody actively holding the CC.  We can
                        // safely delete it as nobody is holding the CC.
                        shouldDelete = true;
                    }
                }
            }
            // Begin cursor-only DEPRECATED
            else if (cc->c()->shouldDestroyOnNSDeletion()) {
                verify(NULL == cc->_runner.get());

                if (isDB) {
                    // already checked that db matched above
                    dassert( StringData(cc->_ns).startsWith( ns ) );
                    shouldDelete = true;
                }
                else {
                    if ( ns == cc->_ns ) {
                        shouldDelete = true;
                    }
                }
            }
            // End cursor-only DEPRECATED

            if (shouldDelete) {
                ClientCursor* toDelete = it->second;
                CursorId id = toDelete->cursorid();
                delete toDelete;
                // We're not following the usual paradigm of saving it, ++it, and deleting the saved
                // 'it' because deleting 'it' might invalidate the next thing in clientCursorsById.
                // TODO: Why?
                it = clientCursorsById.upper_bound(id);
            }
            else {
                ++it;
            }
        }
    }
Exemple #4
0
int
main(int argc, char **argv)
{
  double dt = 0;
  long nvtk = 0;
  char outnum[240];
  long time_output = 0;
  long flops = 0;

  // double output_time = 0.0;
  double next_output_time = 0;
  double start_time = 0, end_time = 0;
  double start_iter = 0, end_iter = 0;
  double elaps = 0;
  double avgMcps = 0;
  long nAvgMcps = 0;

#ifdef MPI
  MPI_Init(&argc, &argv);
  
  DeviceSet();
#endif

  if (H.mype == 1) fprintf(stdout, "Hydro starts.\n");

  process_args(argc, argv, &H);
  hydro_init(&H, &Hv);
  // PRINTUOLD(H, &Hv);

  cuAllocOnDevice(H);
  // Allocate work space for 1D sweeps
  allocate_work_space(H, &Hw, &Hvw);

  // vtkfile(nvtk, H, &Hv);
  if (H.dtoutput > 0) {

    // outputs are in physical time not in time steps
    time_output = 1;
    next_output_time = next_output_time + H.dtoutput;
  }
  if (H.dtoutput > 0 || H.noutput > 0)
    vtkfile(++nvtk, H, &Hv);
  if (H.mype == 0)
    fprintf(stdout, "Hydro starts main loop.\n");

  cuPutUoldOnDevice(H, &Hv);
  start_time = cclock();

  // fprintf(stdout, "%lg %lg %d %d \n", H.t, H.tend, H.nstep, H.nstepmax);

  while ((H.t < H.tend) && (H.nstep < H.nstepmax)) {
	  double iter_time = 0;
    flopsAri = flopsSqr = flopsMin = flopsTra = 0;
    start_iter = cclock();
    outnum[0] = 0;
    flops = 0;
    if ((H.nstep % 2) == 0) {
      cuComputeDeltat(&dt, H, &Hw, &Hv, &Hvw);
      // fprintf(stdout, "dt=%lg\n", dt);
      if (H.nstep == 0) {
        dt = dt / 2.0;
      }
      if (H.nproc > 1) {
#ifdef MPI
	double dtmin;
	int uno = 1;
	MPI_Allreduce(&dt, &dtmin, uno, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
	dt = dtmin;
#endif
      }
    }
    if ((H.nstep % 2) == 0) {
      cuHydroGodunov(1, dt, H, &Hv, &Hw, &Hvw);
    } else {
      cuHydroGodunov(2, dt, H, &Hv, &Hw, &Hvw);
    }
    end_iter = cclock();
    iter_time = (double) (end_iter - start_iter);
    H.nstep++;
    H.t += dt;
    {
      double iter_time = (double) (end_iter - start_iter);
#ifdef MPI
      long flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t;
      MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      //       if (H.mype == 1)
      //        printf("%ld %ld %ld %ld %ld %ld %ld %ld \n", flopsAri, flopsSqr, flopsMin, flopsTra, flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t);
      flops = flopsAri_t * FLOPSARI + flopsSqr_t * FLOPSSQR + flopsMin_t * FLOPSMIN + flopsTra_t * FLOPSTRA;
#else
      flops = flopsAri * FLOPSARI + flopsSqr * FLOPSSQR + flopsMin * FLOPSMIN + flopsTra * FLOPSTRA;
#endif
      nbFLOPS++;

      if (flops > 0) {
        if (iter_time > 1.e-9) {
          double mflops = (double) flops / (double) 1.e+6 / iter_time;
          MflopsSUM += mflops;
          sprintf(outnum, "%s {%.2f Mflops %ld Ops} (%.3fs)", outnum, mflops, flops, iter_time);
        }
      } else {
        sprintf(outnum, "%s (%.3fs)", outnum, iter_time);
      }
    }
    if (iter_time > 1.e-9) {
	    double mcps = ((double) H.globnx * (double) H.globny) / iter_time / 1e6l;
	    if (H.nstep > 5) {
		    sprintf(outnum, "%s (%.1lf MC/s)", outnum, mcps);
		    nAvgMcps++;
		    avgMcps += mcps;
	    }
    }
    if (time_output == 0 && H.noutput > 0) {
      if ((H.nstep % H.noutput) == 0) {
        cuGetUoldFromDevice(H, &Hv);
        vtkfile(++nvtk, H, &Hv);
        sprintf(outnum, "%s [%04ld]", outnum, nvtk);
      }
    } else {
      if (time_output == 1 && H.t >= next_output_time) {
        cuGetUoldFromDevice(H, &Hv);
        vtkfile(++nvtk, H, &Hv);
        next_output_time = next_output_time + H.dtoutput;
        sprintf(outnum, "%s [%04ld]", outnum, nvtk);
      }
    }
    if (H.mype == 0) {
      fprintf(stdout, "--> step=%-4ld %12.5e, %10.5e %s\n", H.nstep, H.t, dt, outnum);
      fflush(stdout);
    }
  }
  end_time = cclock();

  hydro_finish(H, &Hv);
  cuFreeOnDevice();
  // Deallocate work space
  deallocate_work_space(H, &Hw, &Hvw);

  elaps = (double) (end_time - start_time);
  timeToString(outnum, elaps);
  if (H.mype == 0)
    fprintf(stdout, "Hydro ends in %ss (%.3lf) <%.2lf MFlops>.\n", outnum, elaps, (float) (MflopsSUM / nbFLOPS));
  if (H.mype == 0) {
	  avgMcps /= nAvgMcps;
	  fprintf(stdout, "Average MC/s: %.1lf\n", avgMcps);
  }
#ifdef MPI
  MPI_Finalize();
#endif
  return 0;
}
Exemple #5
0
int
main(int argc, char **argv)
{
  int nb_th=1;
  double dt = 0;
  long nvtk = 0;
  char outnum[80];
  long time_output = 0;
  
  // double output_time = 0.0;
  double next_output_time = 0;
  double start_time = 0, end_time = 0;
  double start_iter = 0, end_iter = 0;
  double elaps = 0;

  start_time = cclock();
  process_args(argc, argv, &H);
  hydro_init(&H, &Hv);
  PRINTUOLD(H, &Hv);
  
  printf("Hydro starts - sequential version \n");

  // vtkfile(nvtk, H, &Hv);
  if (H.dtoutput > 0) 
    {	
      // outputs are in physical time not in time steps
      time_output = 1;
      next_output_time = next_output_time + H.dtoutput;
    }

  while ((H.t < H.tend) && (H.nstep < H.nstepmax)) 
    {	
      start_iter = cclock();
      outnum[0] = 0;
      flops = 0;
      if ((H.nstep % 2) == 0) 
	{
	  compute_deltat(&dt, H, &Hw, &Hv, &Hvw);
	  if (H.nstep == 0) {
	    dt = dt / 2.0;
	  }
	}
      
      if ((H.nstep % 2) == 0) {
	hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw);
	hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw); 
      } else {
	hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw);
	hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw); 
      }
      
      end_iter = cclock();
      H.nstep++;
      H.t += dt;
      
      if (flops > 0) {
	double iter_time = (double) (end_iter - start_iter);
	if (iter_time > 1.e-9) {
	  double mflops = (double) flops / (double) 1.e+6 / iter_time;
	  sprintf(outnum, "%s {%.3f Mflops} (%.3fs)", outnum, mflops, iter_time);
	}
      } else {
	double iter_time = (double) (end_iter - start_iter);
	sprintf(outnum, "%s (%.3fs)", outnum, iter_time);
      }
      if (time_output == 0) {
	if ((H.nstep % H.noutput) == 0) {
	  vtkfile(++nvtk, H, &Hv);
	  sprintf(outnum, "%s [%04ld]", outnum, nvtk);
	}
      } else {
	if (H.t >= next_output_time) {
	  vtkfile(++nvtk, H, &Hv);
	  next_output_time = next_output_time + H.dtoutput;
	  sprintf(outnum, "%s [%04ld]", outnum, nvtk);
	}
      }
	fprintf(stdout, "--> step=%-4ld %12.5e, %10.5e %s\n", H.nstep, H.t, dt, outnum);
    }   // end while loop
  hydro_finish(H, &Hv);
  end_time = cclock();
  elaps = (double) (end_time - start_time);
  timeToString(outnum, elaps); 
  
  fprintf(stdout, "Hydro ends in %ss (%.3lf).\n", outnum, elaps);
  
return 0;
}
Exemple #6
0
int
main(int argc, char **argv) {
  real_t dt = 0;
  int nvtk = 0;
  char outnum[80];
  int time_output = 0;
  long flops = 0;

  // real_t output_time = 0.0;
  real_t next_output_time = 0;
  double start_time = 0, end_time = 0;
  double start_iter = 0, end_iter = 0;
  double elaps = 0;
  struct timespec start, end;

  // array of timers to profile the code
  memset(functim, 0, TIM_END * sizeof(functim[0]));

#ifdef MPI
  MPI_Init(&argc, &argv);
#endif

  process_args(argc, argv, &H);
  hydro_init(&H, &Hv);

  if (H.mype == 0)
    fprintf(stdout, "Hydro starts in %s precision.\n", ((sizeof(real_t) == sizeof(double))? "double": "single"));

#ifdef _OPENMP
  if (H.mype == 0) {
    fprintf(stdout, "Hydro:    OpenMP mode ON\n");
    fprintf(stdout, "Hydro: OpenMP %d max threads\n", omp_get_max_threads());
    fprintf(stdout, "Hydro: OpenMP %d num threads\n", omp_get_num_threads());
    fprintf(stdout, "Hydro: OpenMP %d num procs\n", omp_get_num_procs());
  }
#endif
#ifdef MPI
  if (H.mype == 0) {
    fprintf(stdout, "Hydro: MPI run with %d procs\n", H.nproc);
  }
#else
  fprintf(stdout, "Hydro: standard build\n");
#endif


  // PRINTUOLD(H, &Hv);
#ifdef MPI
  if (H.nproc > 1)
    MPI_Barrier(MPI_COMM_WORLD);
#endif

  if (H.dtoutput > 0) {
    // outputs are in physical time not in time steps
    time_output = 1;
    next_output_time = next_output_time + H.dtoutput;
  }

  if (H.dtoutput > 0 || H.noutput > 0)
    vtkfile(++nvtk, H, &Hv);

  if (H.mype == 0)
    fprintf(stdout, "Hydro starts main loop.\n");

  //pre-allocate memory before entering in loop
  //For godunov scheme
  start = cclock();
  start = cclock();
  allocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov);
  compute_deltat_init_mem(H, &Hw_deltat, &Hvw_deltat);
  end = cclock();
  if (H.mype == 0) fprintf(stdout, "Hydro: init mem %lfs\n", ccelaps(start, end));
  // we start timings here to avoid the cost of initial memory allocation
  start_time = dcclock();

  while ((H.t < H.tend) && (H.nstep < H.nstepmax)) {
    // reset perf counter for this iteration
    flopsAri = flopsSqr = flopsMin = flopsTra = 0;
    start_iter = dcclock();
    outnum[0] = 0;
    if ((H.nstep % 2) == 0) {
      dt = 0;
      // if (H.mype == 0) fprintf(stdout, "Hydro computes deltat.\n");
      start = cclock();
      compute_deltat(&dt, H, &Hw_deltat, &Hv, &Hvw_deltat);
      end = cclock();
      functim[TIM_COMPDT] += ccelaps(start, end);
      if (H.nstep == 0) {
        dt = dt / 2.0;
      }
#ifdef MPI
      if (H.nproc > 1) {
        real_t dtmin;
        // printf("pe=%4d\tdt=%lg\n",H.mype, dt);
	if (sizeof(real_t) == sizeof(double)) {
	    MPI_Allreduce(&dt, &dtmin, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
	  } else {
	    MPI_Allreduce(&dt, &dtmin, 1, MPI_FLOAT, MPI_MIN, MPI_COMM_WORLD);
	  }
        dt = dtmin;
      }
#endif
    }
    // if (H.mype == 1) fprintf(stdout, "Hydro starts godunov.\n");
    if ((H.nstep % 2) == 0) {
      hydro_godunov(1, dt, H, &Hv, &Hw_godunov, &Hvw_godunov);
      //            hydro_godunov(2, dt, H, &Hv, &Hw, &Hvw);
    } else {
      hydro_godunov(2, dt, H, &Hv, &Hw_godunov, &Hvw_godunov);
      //            hydro_godunov(1, dt, H, &Hv, &Hw, &Hvw);
    }
    end_iter = dcclock();
    H.nstep++;
    H.t += dt;
    {
      real_t iter_time = (real_t) (end_iter - start_iter);
#ifdef MPI
      long flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t;
      start = cclock();
      MPI_Allreduce(&flopsAri, &flopsAri_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(&flopsSqr, &flopsSqr_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(&flopsMin, &flopsMin_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      MPI_Allreduce(&flopsTra, &flopsTra_t, 1, MPI_LONG, MPI_SUM, MPI_COMM_WORLD);
      //       if (H.mype == 1)
      //        printf("%ld %ld %ld %ld %ld %ld %ld %ld \n", flopsAri, flopsSqr, flopsMin, flopsTra, flopsAri_t, flopsSqr_t, flopsMin_t, flopsTra_t);
      flops = flopsAri_t * FLOPSARI + flopsSqr_t * FLOPSSQR + flopsMin_t * FLOPSMIN + flopsTra_t * FLOPSTRA;
      end = cclock();
      functim[TIM_ALLRED] += ccelaps(start, end);
#else
      flops = flopsAri * FLOPSARI + flopsSqr * FLOPSSQR + flopsMin * FLOPSMIN + flopsTra * FLOPSTRA;
#endif
      nbFLOPS++;

      if (flops > 0) {
        if (iter_time > 1.e-9) {
          double mflops = (double) flops / (double) 1.e+6 / iter_time;
          MflopsSUM += mflops;
          sprintf(outnum, "%s {%.2f Mflops %ld Ops} (%.3fs)", outnum, mflops, flops, iter_time);
        }
      } else {
        sprintf(outnum, "%s (%.3fs)", outnum, iter_time);
      }
    }
    if (time_output == 0 && H.noutput > 0) {
      if ((H.nstep % H.noutput) == 0) {
        vtkfile(++nvtk, H, &Hv);
        sprintf(outnum, "%s [%04d]", outnum, nvtk);
      }
    } else {
      if (time_output == 1 && H.t >= next_output_time) {
        vtkfile(++nvtk, H, &Hv);
        next_output_time = next_output_time + H.dtoutput;
        sprintf(outnum, "%s [%04d]", outnum, nvtk);
      }
    }
    if (H.mype == 0) {
      fprintf(stdout, "--> step=%4d, %12.5e, %10.5e %s\n", H.nstep, H.t, dt, outnum);
      fflush(stdout);
    }
  } // while
  end_time = dcclock();

  // Deallocate work spaces
  deallocate_work_space(H.nxyt, H, &Hw_godunov, &Hvw_godunov);
  compute_deltat_clean_mem(H, &Hw_deltat, &Hvw_deltat);

  hydro_finish(H, &Hv);
  elaps = (double) (end_time - start_time);
  timeToString(outnum, elaps);
  if (H.mype == 0) {
    fprintf(stdout, "Hydro ends in %ss (%.3lf) <%.2lf MFlops>.\n", outnum, elaps, (float) (MflopsSUM / nbFLOPS));
    fprintf(stdout, "    ");
  }
  if (H.nproc == 1) {
    int sizeFmt = sizeLabel(functim, TIM_END);
    printTimingsLabel(TIM_END, sizeFmt);
    fprintf(stdout, "\n");
    fprintf(stdout, "PE0 ");
    printTimings(functim, TIM_END, sizeFmt);
    fprintf(stdout, "\n");
    fprintf(stdout, "%%   ");
    percentTimings(functim, TIM_END);
    printTimings(functim, TIM_END, sizeFmt);
    fprintf(stdout, "\n");
  }
#ifdef MPI
  if (H.nproc > 1) {
    double timMAX[TIM_END];
    double timMIN[TIM_END];
    double timSUM[TIM_END];
    MPI_Allreduce(functim, timMAX, TIM_END, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
    MPI_Allreduce(functim, timMIN, TIM_END, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD);
    MPI_Allreduce(functim, timSUM, TIM_END, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
    if (H.mype == 0) {
      int sizeFmt = sizeLabel(timMAX, TIM_END);
      printTimingsLabel(TIM_END, sizeFmt);
      fprintf(stdout, "\n");
      fprintf(stdout, "MIN ");
      printTimings(timMIN, TIM_END, sizeFmt);
      fprintf(stdout, "\n");
      fprintf(stdout, "MAX ");
      printTimings(timMAX, TIM_END, sizeFmt);
      fprintf(stdout, "\n");
      fprintf(stdout, "AVG ");
      avgTimings(timSUM, TIM_END, H.nproc);
      printTimings(timSUM, TIM_END, sizeFmt);
      fprintf(stdout, "\n");
    }
  }
#endif

#ifdef MPI
  MPI_Finalize();
#endif
  return 0;
}
Exemple #7
0
void
oclHydroGodunov(long idimStart, real_t dt, const hydroparam_t H, hydrovar_t * Hv, hydrowork_t * Hw, hydrovarwork_t * Hvw)
{
  cl_int status;
  // Local variables
  struct timespec start, end;
  int i, j, idim, idimIndex;
  int Hmin, Hmax, Hstep, Hnxystep;
  int Hdimsize;
  int Hndim_1;
  int slices, iend;
  real_t dtdx;
  size_t lVarSz = H.arVarSz * H.nxystep * sizeof(real_t);
  long Hnxyt = H.nxyt;
  int clear = 0;
  static FILE *fic = NULL;

  if (fic == NULL && H.prt) {
    char logname[256];
    sprintf(logname, "TRACE.%04d_%04d.txt", H.nproc, H.mype);
    fic = fopen(logname, "w");
  }

  WHERE("hydro_godunov");
  if (H.prt) fprintf(fic, "loop dt=%lg\n", dt);

  for (idimIndex = 0; idimIndex < 2; idimIndex++) {
    idim = (idimStart - 1 + idimIndex) % 2 + 1;
    // constant
    // constant
    dtdx = dt / H.dx;

    // Update boundary conditions
    if (H.prt) {
      fprintf(fic, "godunov %d\n", idim);
      PRINTUOLD(fic, H, Hv);
    }
#define GETUOLD oclGetUoldFromDevice(H, Hv)
    start = cclock();
    oclMakeBoundary(idim, H, Hv, uoldDEV);
    end = cclock();
    functim[TIM_MAKBOU] += ccelaps(start, end);
    if (H.prt) {fprintf(fic, "MakeBoundary\n");}
    if (H.prt) {GETUOLD; PRINTUOLD(fic, H, Hv);}

    if (idim == 1) {
      Hmin = H.jmin + ExtraLayer;
      Hmax = H.jmax - ExtraLayer;
      Hdimsize = H.nxt;
      Hndim_1 = H.nx + 1;
      Hstep = H.nxystep;
    } else {
      Hmin = H.imin + ExtraLayer;
      Hmax = H.imax - ExtraLayer;
      Hdimsize = H.nyt;
      Hndim_1 = H.ny + 1;
      Hstep = H.nxystep;
    }
    Hnxystep = Hstep;
    for (i = Hmin; i < Hmax; i += Hstep) {
      long offsetIP = IHVWS(0, 0, IP);
      long offsetID = IHVWS(0, 0, ID);
      int Hnxyt = H.nxyt;
      iend = i + Hstep;
      if (iend >= Hmax) iend = Hmax;
      slices = iend - i;

      if (clear) oclMemset(uDEV, 0, lVarSz);
      start = cclock();
      oclGatherConservativeVars(idim, i, H.imin, H.imax, H.jmin, H.jmax, H.nvar, H.nxt, H.nyt, H.nxyt, slices, Hnxystep, uoldDEV, uDEV);
      end = cclock();
      functim[TIM_GATCON] += ccelaps(start, end);
      if (H.prt) {fprintf(fic, "ConservativeVars %ld %ld %ld %ld %d %d\n", H.nvar, H.nxt, H.nyt, H.nxyt, slices, Hstep);}
      if (H.prt) { GETARRV(uDEV, Hvw->u); }
      PRINTARRAYV2(fic, Hvw->u, Hdimsize, "u", H);

      // Convert to primitive variables
      start = cclock();
      oclConstoprim(Hdimsize, H.nxyt, H.nvar, H.smallr, slices, Hnxystep, uDEV, qDEV, eDEV);
      end = cclock();
      functim[TIM_CONPRI] += ccelaps(start, end);
      if (H.prt) { GETARR (eDEV, Hw->e); }
      if (H.prt) { GETARRV(qDEV, Hvw->q); }
      PRINTARRAY(fic, Hw->e, Hdimsize, "e", H);
      PRINTARRAYV2(fic, Hvw->q, Hdimsize, "q", H);

      start = cclock();
      oclEquationOfState(offsetIP, offsetID, 0, Hdimsize, H.smallc, H.gamma, slices, H.nxyt, qDEV, eDEV, cDEV);
      end = cclock();
      functim[TIM_EOS] += ccelaps(start, end);
      if (H.prt) { GETARR (cDEV, Hw->c); }
      PRINTARRAY(fic, Hw->c, Hdimsize, "c", H);
      if (H.prt) { GETARRV (qDEV, Hvw->q); }
      PRINTARRAYV2(fic, Hvw->q, Hdimsize, "q", H);

      if (clear) oclMemset(dqDEV, 0, H.arVarSz * H.nxystep);
      // Characteristic tracing
      if (H.iorder != 1) {
	if (clear) oclMemset(dqDEV, 0, H.arVarSz);
	start = cclock();
        oclSlope(Hdimsize, H.nvar, H.nxyt, H.slope_type, slices, Hstep, qDEV, dqDEV);
	end = cclock();
	functim[TIM_SLOPE] += ccelaps(start, end);
	if (H.prt) { GETARRV(dqDEV, Hvw->dq); }
	PRINTARRAYV2(fic, Hvw->dq, Hdimsize, "dq", H);
      }
      start = cclock();
      oclTrace(dtdx, Hdimsize, H.scheme, H.nvar, H.nxyt, slices, Hstep, qDEV, dqDEV, cDEV, qxmDEV, qxpDEV);
      end = cclock();
      functim[TIM_TRACE] += ccelaps(start, end);
      if (H.prt) { GETARRV(qxmDEV, Hvw->qxm); }
      if (H.prt) { GETARRV(qxpDEV, Hvw->qxp); }
      PRINTARRAYV2(fic, Hvw->qxm, Hdimsize, "qxm", H);
      PRINTARRAYV2(fic, Hvw->qxp, Hdimsize, "qxp", H);
      start = cclock();
      oclQleftright(idim, H.nx, H.ny, H.nxyt, H.nvar, slices, Hstep, qxmDEV, qxpDEV, qleftDEV, qrightDEV);
      end = cclock();
      functim[TIM_QLEFTR] += ccelaps(start, end);
      if (H.prt) { GETARRV(qleftDEV, Hvw->qleft); }
      if (H.prt) { GETARRV(qrightDEV, Hvw->qright); }
      PRINTARRAYV2(fic, Hvw->qleft, Hdimsize, "qleft", H);
      PRINTARRAYV2(fic, Hvw->qright, Hdimsize, "qright", H);

      // Solve Riemann problem at interfaces
      start = cclock();
      oclRiemann(Hndim_1, H.smallr, H.smallc, H.gamma, H.niter_riemann, H.nvar, H.nxyt, slices, Hstep,
		 qleftDEV, qrightDEV, qgdnvDEV,sgnmDEV);
      end = cclock();
      functim[TIM_RIEMAN] += ccelaps(start, end);
      if (H.prt) { GETARRV(qgdnvDEV, Hvw->qgdnv); }
      PRINTARRAYV2(fic, Hvw->qgdnv, Hdimsize, "qgdnv", H);
      // Compute fluxes
      if (clear) oclMemset(fluxDEV, 0, H.arVarSz);
      start = cclock();
      oclCmpflx(Hdimsize, H.nxyt, H.nvar, H.gamma, slices, Hnxystep, qgdnvDEV, fluxDEV);
      end = cclock();
      functim[TIM_CMPFLX] += ccelaps(start, end);
      if (H.prt) { GETARRV(fluxDEV, Hvw->flux); }
      PRINTARRAYV2(fic, Hvw->flux, Hdimsize, "flux", H);
      if (H.prt) { GETARRV(uDEV, Hvw->u); }
      PRINTARRAYV2(fic, Hvw->u, Hdimsize, "u", H);
      // if (H.prt) {
      // 	GETUOLD; PRINTUOLD(fic, H, Hv);
      // }
      if (H.prt) fprintf(fic, "dxdt=%lg\n", dtdx);
      start = cclock();
      oclUpdateConservativeVars(idim, i, dtdx, H.imin, H.imax, H.jmin, H.jmax, H.nvar, H.nxt, H.nyt, H.nxyt, slices, Hnxystep, 
				uoldDEV, uDEV, fluxDEV);
      end = cclock();
      functim[TIM_UPDCON] += ccelaps(start, end);
      if (H.prt) {
	GETUOLD; PRINTUOLD(fic, H, Hv);
      }
    }                           // for j

    if (H.prt) {
      // printf("After pass %d\n", idim);
      PRINTUOLD(fic, H, Hv);
    }
  } 
}                               // hydro_godunov
Exemple #8
0
// variables auxiliaires pour mettre en place le mode resident de HMPP
void
hydro_godunov(int idimStart, real_t dt, const hydroparam_t H, hydrovar_t * Hv, hydrowork_t * Hw, hydrovarwork_t * Hvw) {
  // Local variables
  struct timespec start, end;
  int j;
  real_t dtdx;
  int clear=0;

  real_t (*e)[H.nxyt];
  real_t (*flux)[H.nxystep][H.nxyt];
  real_t (*qleft)[H.nxystep][H.nxyt];
  real_t (*qright)[H.nxystep][H.nxyt];
  real_t (*c)[H.nxyt];
  real_t *uold;
  int (*sgnm)[H.nxyt];
  real_t (*qgdnv)[H.nxystep][H.nxyt];
  real_t (*u)[H.nxystep][H.nxyt];
  real_t (*qxm)[H.nxystep][H.nxyt];
  real_t (*qxp)[H.nxystep][H.nxyt];
  real_t (*q)[H.nxystep][H.nxyt];
  real_t (*dq)[H.nxystep][H.nxyt];

  static FILE *fic = NULL;

  if (fic == NULL && H.prt == 1) {
    char logname[256];
    sprintf(logname, "TRACE.%04d_%04d.txt", H.nproc, H.mype);
    fic = fopen(logname, "w");
  }

  WHERE("hydro_godunov");

  // int hmppGuard = 1;
  int idimIndex = 0;

  for (idimIndex = 0; idimIndex < 2; idimIndex++) {
    int idim = (idimStart - 1 + idimIndex) % 2 + 1;
    // constant
    dtdx = dt / H.dx;

    // Update boundary conditions
    if (H.prt) {
      fprintf(fic, "godunov %d\n", idim);
      PRINTUOLD(fic, H, Hv);
    }
    // if (H.mype == 1) fprintf(fic, "Hydro makes boundary.\n");
    start = cclock();
    make_boundary(idim, H, Hv);
    end = cclock();
    functim[TIM_MAKBOU] += ccelaps(start, end);

    if (H.prt) {fprintf(fic, "MakeBoundary\n");}
    PRINTUOLD(fic, H, Hv);

    uold = Hv->uold;
    qgdnv = (real_t (*)[H.nxystep][H.nxyt]) Hvw->qgdnv;
    flux = (real_t (*)[H.nxystep][H.nxyt]) Hvw->flux;
    c = (real_t (*)[H.nxyt]) Hw->c;
    e = (real_t (*)[H.nxyt]) Hw->e;
    qleft = (real_t (*)[H.nxystep][H.nxyt]) Hvw->qleft;
    qright = (real_t (*)[H.nxystep][H.nxyt]) Hvw->qright;
    sgnm = (int (*)[H.nxyt]) Hw->sgnm;
    q = (real_t (*)[H.nxystep][H.nxyt]) Hvw->q;
    dq = (real_t (*)[H.nxystep][H.nxyt]) Hvw->dq;
    u = (real_t (*)[H.nxystep][H.nxyt]) Hvw->u;
    qxm = (real_t (*)[H.nxystep][H.nxyt]) Hvw->qxm;
    qxp = (real_t (*)[H.nxystep][H.nxyt]) Hvw->qxp;

    int Hmin, Hmax, Hstep;
    int Hdimsize;
    int Hndim_1;

    if (idim == 1) {
      Hmin = H.jmin + ExtraLayer;
      Hmax = H.jmax - ExtraLayer;
      Hdimsize = H.nxt;
      Hndim_1 = H.nx + 1;
      Hstep = H.nxystep;
    } else {
      Hmin = H.imin + ExtraLayer;
      Hmax = H.imax - ExtraLayer;
      Hdimsize = H.nyt;
      Hndim_1 = H.ny + 1;
      Hstep = H.nxystep;
    }

    if (!H.nstep && idim == 1) {
      /* LM -- HERE a more secure implementation should be used: a new parameter ? */
    }
    // if (H.mype == 1) fprintf(fic, "Hydro computes slices.\n");
    for (j = Hmin; j < Hmax; j += Hstep) {
      // we try to compute many slices each pass
      int jend = j + Hstep;
      if (jend >= Hmax)
        jend = Hmax;
      int slices = jend - j;    // numbre of slices to compute
      // fprintf(stderr, "Godunov idim=%d, j=%d %d \n", idim, j, slices);

      if (clear) Dmemset((H.nxyt) * H.nxystep * H.nvar, (real_t *) dq, 0);
      start = cclock();
      gatherConservativeVars(idim, j, H.imin, H.imax, H.jmin, H.jmax, H.nvar, H.nxt, H.nyt, H.nxyt, slices, Hstep, uold,
                             u);
      end = cclock();
      functim[TIM_GATCON] += ccelaps(start, end);
      if (H.prt) {fprintf(fic, "ConservativeVars %d %d %d %d %d %d\n", H.nvar, H.nxt, H.nyt, H.nxyt, slices, Hstep);}
      PRINTARRAYV2(fic, u, Hdimsize, "u", H);

      if (clear) Dmemset((H.nxyt) * H.nxystep * H.nvar, (real_t *) dq, 0);

      // Convert to primitive variables
      start = cclock();
      constoprim(Hdimsize, H.nxyt, H.nvar, H.smallr, slices, Hstep, u, q, e);
      end = cclock();
      functim[TIM_CONPRI] += ccelaps(start, end);
      PRINTARRAY(fic, e, Hdimsize, "e", H);
      PRINTARRAYV2(fic, q, Hdimsize, "q", H);

      start = cclock();
      equation_of_state(0, Hdimsize, H.nxyt, H.nvar, H.smallc, H.gamma, slices, Hstep, e, q, c);
      end = cclock();
      functim[TIM_EOS] += ccelaps(start, end);
      PRINTARRAY(fic, c, Hdimsize, "c", H);
      PRINTARRAYV2(fic, q, Hdimsize, "q", H);

      // Characteristic tracing
      if (H.iorder != 1) {
	start = cclock();
        slope(Hdimsize, H.nvar, H.nxyt, H.slope_type, slices, Hstep, q, dq);
	end = cclock();
	functim[TIM_SLOPE] += ccelaps(start, end);
        PRINTARRAYV2(fic, dq, Hdimsize, "dq", H);
      }

      if (clear) Dmemset(H.nxyt * H.nxystep * H.nvar, (real_t *) qxm, 0);
      if (clear) Dmemset(H.nxyt * H.nxystep * H.nvar, (real_t *) qxp, 0);
      if (clear) Dmemset(H.nxyt * H.nxystep * H.nvar, (real_t *) qleft, 0);
      if (clear) Dmemset(H.nxyt * H.nxystep * H.nvar, (real_t *) qright, 0);
      if (clear) Dmemset(H.nxyt * H.nxystep * H.nvar, (real_t *) flux, 0);
      if (clear) Dmemset(H.nxyt * H.nxystep * H.nvar, (real_t *) qgdnv, 0);
      start = cclock();
      trace(dtdx, Hdimsize, H.scheme, H.nvar, H.nxyt, slices, Hstep, q, dq, c, qxm, qxp);
      end = cclock();
      functim[TIM_TRACE] += ccelaps(start, end);
      PRINTARRAYV2(fic, qxm, Hdimsize, "qxm", H);
      PRINTARRAYV2(fic, qxp, Hdimsize, "qxp", H);

      start = cclock();
      qleftright(idim, H.nx, H.ny, H.nxyt, H.nvar, slices, Hstep, qxm, qxp, qleft, qright);
      end = cclock();
      functim[TIM_QLEFTR] += ccelaps(start, end);
      PRINTARRAYV2(fic, qleft, Hdimsize, "qleft", H);
      PRINTARRAYV2(fic, qright, Hdimsize, "qright", H);

      start = cclock();
      riemann(Hndim_1, H.smallr, H.smallc, H.gamma, H.niter_riemann, H.nvar, H.nxyt, slices, Hstep, qleft, qright, qgdnv, sgnm, Hw);
      end = cclock();
      functim[TIM_RIEMAN] += ccelaps(start, end);
      PRINTARRAYV2(fic, qgdnv, Hdimsize, "qgdnv", H);

      start = cclock();
      cmpflx(Hdimsize, H.nxyt, H.nvar, H.gamma, slices, Hstep, qgdnv, flux);
      end = cclock();
      functim[TIM_CMPFLX] += ccelaps(start, end);
      PRINTARRAYV2(fic, flux, Hdimsize, "flux", H);
      PRINTARRAYV2(fic, u, Hdimsize, "u", H);

      start = cclock();
      updateConservativeVars(idim, j, dtdx, H.imin, H.imax, H.jmin, H.jmax, H.nvar, H.nxt, H.nyt, H.nxyt, slices, Hstep,
                             uold, u, flux);
      end = cclock();
      functim[TIM_UPDCON] += ccelaps(start, end);
      PRINTUOLD(fic, H, Hv);
    }                           // for j

    if (H.prt) {
      // printf("[%d] After pass %d\n", H.mype, idim);
      PRINTUOLD(fic, H, Hv);
    }
  }

  if ((H.t + dt >= H.tend) || (H.nstep + 1 >= H.nstepmax)) {
    /* LM -- HERE a more secure implementation should be used: a new parameter ? */
  }

}                               // hydro_godunov