Пример #1
0
int main (int argc, char ** argv ) 
{
    int i;
    MPI_Init (&argc, &argv);
    MPI_Comm_dup (MPI_COMM_WORLD, &comm);
    MPI_Comm_rank (comm, &rank);
    MPI_Comm_size (comm, &nproc);
    comp_comm_init(comm);

    if (processArgs(argc, argv)) {
        return 1;
    }

    if (!rank) {
        printf ("Setup parameters:\n");
        printf ("  # of steps (outputs):     %d\n", nsteps);
        printf ("  # of iterations per step: %d\n", niterations);
        printf ("  # of computation units in each iteration:   %d\n", ncomp);
        printf ("  # of communication units in each iteration: %d\n", ncomm);
        printf ("  output size per process: %d x %d doubles = %lld bytes\n", 
                nx, ny, sizeof(double) * nx * (uint64_t) ny);
        printf ("  output size per step: %lld bytes\n", 
                nproc * sizeof(double) * nx * (uint64_t) ny);
    }

    //2D array with 1D decomposition
    offs_x = rank * nx;
    offs_y = 0;
    gnx = nproc * nx;
    gny = ny;

    data = (double*) malloc (sizeof(double) * nx * (size_t) ny);
    timing_alloc(nsteps);


    int bufsizeMB = nx*ny*sizeof(double)/1048576 + 1;
    output_init(comm, bufsizeMB);
    output_define(nx, ny, gnx, gny, offs_x, offs_y);

    int it, step, icomp, icomm;

    /* Warm up a bit */
    if (!rank) printf ("Warm up for 1 steps, %d iterations per step...\n", niterations);
    for (step=0; step < 1; step++) {
        for (it=0; it < niterations; it++) {
            for (icomp=0; icomp < ncomp; icomp++) {
                do_calc_unit (data, nx, ny);
            }
            for (icomm=0; icomm < ncomm; icomm++) {
                do_comm_unit (comm);
            }
        }
    }


    /* Do the steps with output now */
    data_init();
    if (!rank) printf ("Start running with I/O and measurements...\n");
    double Tcalc_it, Tcomm_it;
    double Truntime; //to print total time for the loop below (for overhead calculation)
    char filename[256];

    MPI_Barrier (comm);
    Truntime = MPI_Wtime();

    for (step=0; step < nsteps; step++) {
        if (!rank) printf ("Start step %d\n", step);
        Tcalc[step] = 0;
        Tcomm[step] = 0;
        for (it=0; it < niterations; it++) {
            // spend some time with computation
            Tcalc_it = MPI_Wtime();
            for (icomp=0; icomp < ncomp; icomp++) {
                do_calc_unit (data, nx, ny);
            }
            Tcalc_it = MPI_Wtime() - Tcalc_it;
            Tcalc[step] += Tcalc_it;

            // spend some time with communication
            Tcomm_it = MPI_Wtime();
            for (icomm=0; icomm < ncomm; icomm++) {
                do_comm_unit (comm);
            }
            Tcomm_it = MPI_Wtime() - Tcomm_it;
            Tcomm[step] += Tcomm_it;
        }
        // output per step
        snprintf (filename, sizeof(filename), "data%6.6d", step);
        if (!rank) printf ("Output to %s\n", filename);
        MPI_Barrier (comm);
        output_dump(filename, step, data);
    }

    MPI_Barrier (comm);
    Truntime = MPI_Wtime() - Truntime;

    if (!rank) printf ("Finalize...\n");
    MPI_Barrier (comm);
    output_finalize (rank);

    timing_report(nsteps, comm);
    double Truntime_max;
    MPI_Reduce (&Truntime, &Truntime_max, 1, MPI_DOUBLE, MPI_MAX, 0, comm);
    if (!rank) printf ("Total runtime of main loop: %9.3f\n", Truntime);
    free (data);
    timing_free();

    MPI_Barrier (comm);
    MPI_Finalize ();
    return 0;
}
Пример #2
0
/*
 * Dump the setup code read in the various prologue (.pro and .ps)
 * files.  The hard part is that we don't want to dump too
 * many definitions of fonts, to avoid running out of memory on
 * too old PS level 1 printers.
 * Nevertheless, I still wait for somebody to tell me if this is
 * really needed (useful is sure, needed is not)
 */
void
dump_setup (FILE * stream, a2ps_job * job)
{
  output_dump (job->status->setup, stream);
}