void configuration_test_round(int iDomain, int iMinusBoundary, int iPlusBoundary, int jDomain, int jMinusBoundary, int jPlusBoundary)
{
    IJKSize calculationDomain;
    IJBoundary boundary;

    calculationDomain.Init(iDomain, jDomain, 0);
    boundary.Init(iMinusBoundary, iPlusBoundary, jMinusBoundary, jPlusBoundary);

    LaunchConfigurationImpl launchConfig;
    launchConfig.Init(calculationDomain, boundary);

    // check the whole domain with a boundary 
    for(int i = iMinusBoundary - iBlockSize - 1; i < iDomain + iPlusBoundary + iBlockSize + 1; ++i)
    {
        for(int j = jMinusBoundary - jBlockSize - 1; j < iDomain + jPlusBoundary + jBlockSize + 1; ++j)
        {
            // check inside is covered
            if( i >= iMinusBoundary && i < iDomain + iPlusBoundary &&
                j >= jMinusBoundary && j < jDomain + jPlusBoundary    )
            {
                ASSERT_EQ(1, count_block_occurrences(launchConfig.blockConfigurations(), i, j));
            }
            // check boundary is not covered
            else
            {
                ASSERT_EQ(0, count_block_occurrences(launchConfig.blockConfigurations(), i, j));
            }
        }
    }
}
// test copy
TEST(LaunchConfigurationUnittest, Copy)
{
    IJKSize calculationDomain;
    IJBoundary boundary;
    
    calculationDomain.Init(5, 7, 0);
    boundary.Init(-1, 3, -7, 1);
    
    LaunchConfigurationImpl launchConfig1, launchConfig2;
    launchConfig1.Init(calculationDomain, boundary);

    launchConfig2 = launchConfig1;

    // check boundary
    ASSERT_EQ(launchConfig1.boundary().iMinusOffset(), launchConfig2.boundary().iMinusOffset());
    ASSERT_EQ(launchConfig1.boundary().iPlusOffset(), launchConfig2.boundary().iPlusOffset());
    ASSERT_EQ(launchConfig1.boundary().jMinusOffset(), launchConfig2.boundary().jMinusOffset());
    ASSERT_EQ(launchConfig1.boundary().jPlusOffset(), launchConfig2.boundary().jPlusOffset());
    
    // check blocks
    ASSERT_EQ(launchConfig1.blockConfigurations().size(), launchConfig2.blockConfigurations().size());
    for(int i = 0; i < static_cast<int>(launchConfig1.blockConfigurations().size()); ++i)
    {
        ASSERT_EQ(launchConfig1.blockConfigurations()[i].iBlockIndex, launchConfig2.blockConfigurations()[i].iBlockIndex);
        ASSERT_EQ(launchConfig1.blockConfigurations()[i].iStart, launchConfig2.blockConfigurations()[i].iStart);
        ASSERT_EQ(launchConfig1.blockConfigurations()[i].iEnd, launchConfig2.blockConfigurations()[i].iEnd);
        ASSERT_EQ(launchConfig1.blockConfigurations()[i].jBlockIndex, launchConfig2.blockConfigurations()[i].jBlockIndex);
        ASSERT_EQ(launchConfig1.blockConfigurations()[i].jStart, launchConfig2.blockConfigurations()[i].jStart);
        ASSERT_EQ(launchConfig1.blockConfigurations()[i].jEnd, launchConfig2.blockConfigurations()[i].jEnd);
    }
}
void UnittestEnvironment::SetUp()
{
        // make sure the repository is null
        assert(!pRepository_);
        pRepository_ = new HoriDiffRepository();

        // prepare the repository
        calculationDomain_.Init(
                Options::getInstance().domain_.iSize(),
                Options::getInstance().domain_.jSize(),
                Options::getInstance().domain_.kSize()
        );
        pRepository_->Init(calculationDomain_);
        pRepository_->AllocateDataFields();
        pRepository_->SetInitalValues();

        IJKSize globalDomainSize;
        globalDomainSize.Init(calculationDomain_.iSize() + cNumBoundaryLines*2,
            calculationDomain_.jSize() + cNumBoundaryLines*2,
            calculationDomain_.kSize());

        int subdomainPosition_[4];
        subdomainPosition_[0] = cNumBoundaryLines+1;
        subdomainPosition_[1] = cNumBoundaryLines+1;
        subdomainPosition_[2] = cNumBoundaryLines+calculationDomain_.iSize();
        subdomainPosition_[3] = cNumBoundaryLines+calculationDomain_.jSize();
        // Initialize the halo update configuration
        
        communicationConfiguration_.Init(true, true, false, false, false, false,
            globalDomainSize, 1, subdomainPosition_);
}
    virtual void SetUp()
    {
        calculationDomain_.Init(12, 14, 60);
        kBoundary_.Init(-1, 2);

        storage3D_.Init(calculationDomain_, kBoundary_);
        storage2D_.Init(calculationDomain_, kBoundary_);
        storage1D_.Init(calculationDomain_, kBoundary_);
    };
Esempio n. 5
0
int main(int argc, char **argv)
{
    MPI_Init(&argc, &argv);
    int commsize, commrank;
    MPI_Comm_size(MPI_COMM_WORLD, &commsize);
    MPI_Comm_rank(MPI_COMM_WORLD, &commrank);
    const bool isRoot = commrank == 0;
    const bool isLast = commrank == commsize - 1;

    if (isRoot)
        std::cout << "Initialization...\n" << std::endl;

    RuntimeConfiguration conf(argc, argv);

    // Compute my start and end time
    const double timeStart = conf.timeSliceSize() * commrank;
    const double timeEnd = conf.timeSliceSize() * (commrank + 1);

    if (isRoot)
    std::cout << "Running with:\n"
        << " - initial diffusion coefficient: " << conf.nu0() << "\n"
        << " - frequence of diffusion coefficient: " << conf.nufreq() << "\n"
        << " - advection velocity in x: " << conf.cx() << "\n"
        << " - advection velocity in y: " << conf.cy() << "\n"
        << " - advection velocity in z: " << conf.cz() << "\n"
        << " - spatial discretization step: " << conf.dx() << "\n"
        << " - endtime: " << conf.endTime() << "\n"
        << " - number of time slices: " << conf.timeSlices() << "\n"
        << " - time slice size: " << conf.timeSliceSize() << "\n"
        << " - CFL fine: " << conf.cflFine() << "\n"
        << " - CFL coarse: " << conf.cflCoarse() << "\n"
        << " - timestep size fine: " << conf.dtFine() << "\n"
        << " - timestep size coarse: " << conf.dtCoarse() << "\n"
        << " - timesteps per slice fine propagator: " << conf.timeStepsFinePerTimeSlice() << "\n"
        << " - timesteps per slice coarse propagator: " << conf.timeStepsCoarsePerTimeSlice() << "\n"
        << " - parareal iterations: " << conf.kmax() << "\n"
        << " - asynchronous communications: " << (conf.async() ? "Enabled" : "Disabled") << "\n"
        << " - intermediate fields in mat files: " << (conf.mat() ? "Yes" : "No") << "\n"
        << std::endl;

    // Calculation domain and boundaries
    IJKSize domain; domain.Init(conf.gridSize(), conf.gridSize(), conf.gridSize());
    KBoundary kboundary; kboundary.Init(-convectionBoundaryLines, convectionBoundaryLines);

    // Initialize fields
    ConvectionField q, qinitial;
    q.Init("q", domain, kboundary);
    qinitial.Init("qinitial", domain, kboundary);
    Convection convection(conf.gridSize(), conf.gridSize(), conf.gridSize(), conf.dx(), conf.nu0(), conf.nufreq(), conf.cx(), conf.cy(), conf.cz());

    // Initialize parareal
    Parareal<Convection, ConvectionField> parareal(convection, qinitial, q, timeStart, conf, MPI_COMM_WORLD);

    if (conf.mode() == ModeCompare)
    {
        // Measure time required by convection
        const int tauSamples = 4;
        double tauF = MPI_Wtime();
        convection.DoRK4(qinitial, qinitial, 0., conf.dtFine(), tauSamples*conf.timeStepsFinePerTimeSlice());
        SynchronizeCUDA();
        tauF = MPI_Wtime() - tauF;
        double tauG = MPI_Wtime();
        convection.DoEuler(qinitial, qinitial, 0., conf.dtCoarse(), tauSamples*conf.timeStepsCoarsePerTimeSlice());
        SynchronizeCUDA();
        tauG = MPI_Wtime() - tauG;

        const double tauRatio = tauG / tauF;
        const double Nit_Np = static_cast<double>(conf.kmax()) / commsize;
        const double maxSpeedup = 1. / (tauRatio * (1. + Nit_Np) + Nit_Np);

        // Fill initial solution
        SynchronizeHost(qinitial);
        fillQ(qinitial, conf.nu0(), conf.nufreq(), conf.cx(), conf.cy(), conf.cz(), 0., 0., 1., 0., 1., 0., 1.);
        SynchronizeDevice(qinitial);

        // Run serial
        MPI_Barrier(MPI_COMM_WORLD);
        double eserial = MPI_Wtime();
        parareal.DoSerial();
        eserial = MPI_Wtime() - eserial;

        // Save reference
        ConvectionField qreference = q;
        SynchronizeHost(qreference);

        // Fill initial solution
        SynchronizeHost(qinitial);
        fillQ(qinitial, conf.nu0(), conf.nufreq(), conf.cx(), conf.cy(), conf.cz(), 0., 0., 1., 0., 1., 0., 1.);
        SynchronizeDevice(qinitial);

        // Run serial
        MPI_Barrier(MPI_COMM_WORLD);
        double eparallel = MPI_Wtime();
        parareal.DoParallel();
        eparallel = MPI_Wtime() - eparallel;

        // Output
        MPI_Barrier(MPI_COMM_WORLD);
        if (isLast)
        {
            double e = computeErrorReference(q, qreference);
            std::cout << "\n"
                << "Serial run time: " << eserial << "\n"
                << "Parallel run time: " << eparallel << "\n"
                << "Speedup: " << eserial / eparallel << "\n"
                << "Maximal speedup: " << maxSpeedup << "\n"
                << "Error at end: " << e << "\n"
                << std::endl;

            MatFile matfile("result.mat");
            matfile.addField("q", q);
            matfile.addField("qreference", qreference);
        }
    }
    else if (conf.mode() == ModeSerial)
    {
        // Fill initial solution
        SynchronizeHost(qinitial);
        fillQ(qinitial, conf.nu0(), conf.nufreq(), conf.cx(), conf.cy(), conf.cz(), 0., 0., 1., 0., 1., 0., 1.);
        SynchronizeDevice(qinitial);

        // Run serial
        double e = MPI_Wtime();
        double energyStart = energy();
        double deviceEnergyStart = deviceEnergy();

        parareal.DoSerial();

        e = MPI_Wtime() - e;
        double energyEnd = energy();
        double deviceEnergyEnd = deviceEnergy();

        const double totDevice = totalEnergy(deviceEnergyStart, deviceEnergyEnd);
        const double totNode = totalEnergy(energyStart, energyEnd) - totDevice;
        const double totNetwork = e * powerNetwork;
        const double totBlower = e * powerBlower;
        const double totEnergy = totNode + totDevice + totNetwork + totBlower;

        // Output
        MPI_Barrier(MPI_COMM_WORLD);
        if (isLast)
        {
            std::cout << "\n" << "Serial run time: " << e << "\n";
            std::printf("Node energy   : %8f J  (%8.3e W/node)\n", totNode   , totNode/e);
            std::printf("Device energy : %8f J  (%8.3e W/node)\n", totDevice , totDevice/e);
            std::printf("Network energy: %8f J  (%8.3e W/node)\n", totNetwork, totNetwork/e);
            std::printf("Blower energy : %8f J  (%8.3e W/node)\n", totBlower , totBlower/e);
            std::printf("Total energy  : %8f J  (%8.3e W/node)\n", totEnergy , totEnergy/e);
            std::cout << std::endl;
        }
    }
    else if (conf.mode() == ModeParallel)
    {
        // Fill initial solution
        SynchronizeHost(qinitial);
        fillQ(qinitial, conf.nu0(), conf.nufreq(), conf.cx(), conf.cy(), conf.cz(), 0., 0., 1., 0., 1., 0., 1.);
        SynchronizeDevice(qinitial);

        // Run serial
        parareal.DoSerial();
        std::cout << " -- The serial computation is done\n";
        ConvectionField qreference = q;

        // Run parallel
        MPI_Barrier(MPI_COMM_WORLD);
        double e = MPI_Wtime();
        double energyStart = energy();
        double deviceEnergyStart = deviceEnergy();

        parareal.DoParallel();

        MPI_Barrier(MPI_COMM_WORLD);
        e = MPI_Wtime() - e;

        std::cout << " -- The parallel computation is done\n";
        double energyEnd = energy();
        double deviceEnergyEnd = deviceEnergy();

        const double totDevice = totalEnergy(deviceEnergyStart, deviceEnergyEnd, MPI_COMM_WORLD);
        const double totNode = totalEnergy(energyStart, energyEnd, MPI_COMM_WORLD) - totDevice;
        const double totNetwork = e * powerNetwork * commsize;
        const double totBlower = e * powerBlower * commsize;
        const double totEnergy = totNode + totDevice + totNetwork + totBlower;

        // Compute error
        double error = computeErrorReference(q, qreference);

        // Output
        MPI_Barrier(MPI_COMM_WORLD);
        if (isLast)
        {
            const double fac = 1./e/commsize;

            std::cout << std::endl;
            std::printf("Parallel run time: %f s\n", e);
            std::printf("Node energy   : %8f J  (%8.3e W/node)\n", totNode   , fac*totNode);
            std::printf("Device energy : %8f J  (%8.3e W/node)\n", totDevice , fac*totDevice);
            std::printf("Network energy: %8f J  (%8.3e W/node)\n", totNetwork, fac*totNetwork);
            std::printf("Blower energy : %8f J  (%8.3e W/node)\n", totBlower , fac*totBlower);
            std::printf("Total energy  : %8f J  (%8.3e W/node)\n", totEnergy , fac*totEnergy);
            std::printf("Error of parareal: %.4e\n", error);
            std::cout << std::endl;
        }
    }
    else if (conf.mode() == ModeTiming)
    {
        // Fill initial solution
        SynchronizeHost(qinitial);
        fillQ(qinitial, conf.nu0(), conf.nufreq(), conf.cx(), conf.cy(), conf.cz(), 0., 0., 1., 0., 1., 0., 1.);
        SynchronizeDevice(qinitial);

        // Run serial
        std::vector<double> times;
        MPI_Barrier(MPI_COMM_WORLD);
        parareal.DoTimedParallel(times);

        // Gather on root
        const int s = times.size();
        std::vector<double> timesGlobal;
        timesGlobal.resize(s * commsize);
        MPI_Gather(&times[0], s, MPI_DOUBLE, &timesGlobal[0], s, MPI_DOUBLE, 0, MPI_COMM_WORLD);

        // Output
        if (isRoot)
        {
            std::cout << "\nTimes:\n";
            for(int i = 0; i < s; ++i)
            {
                for(int p = 0; p < commsize; ++p)
                {
                    std::cout << std::scientific << std::setprecision(6) << timesGlobal[p*s + i] << "   ";
                }
                std::cout << "\n";
            }
        }
    }

    // Finalize
    MPI_Finalize();

    return 0;
}