// test copy TEST(LaunchConfigurationUnittest, Copy) { IJKSize calculationDomain; IJBoundary boundary; calculationDomain.Init(5, 7, 0); boundary.Init(-1, 3, -7, 1); LaunchConfigurationImpl launchConfig1, launchConfig2; launchConfig1.Init(calculationDomain, boundary); launchConfig2 = launchConfig1; // check boundary ASSERT_EQ(launchConfig1.boundary().iMinusOffset(), launchConfig2.boundary().iMinusOffset()); ASSERT_EQ(launchConfig1.boundary().iPlusOffset(), launchConfig2.boundary().iPlusOffset()); ASSERT_EQ(launchConfig1.boundary().jMinusOffset(), launchConfig2.boundary().jMinusOffset()); ASSERT_EQ(launchConfig1.boundary().jPlusOffset(), launchConfig2.boundary().jPlusOffset()); // check blocks ASSERT_EQ(launchConfig1.blockConfigurations().size(), launchConfig2.blockConfigurations().size()); for(int i = 0; i < static_cast<int>(launchConfig1.blockConfigurations().size()); ++i) { ASSERT_EQ(launchConfig1.blockConfigurations()[i].iBlockIndex, launchConfig2.blockConfigurations()[i].iBlockIndex); ASSERT_EQ(launchConfig1.blockConfigurations()[i].iStart, launchConfig2.blockConfigurations()[i].iStart); ASSERT_EQ(launchConfig1.blockConfigurations()[i].iEnd, launchConfig2.blockConfigurations()[i].iEnd); ASSERT_EQ(launchConfig1.blockConfigurations()[i].jBlockIndex, launchConfig2.blockConfigurations()[i].jBlockIndex); ASSERT_EQ(launchConfig1.blockConfigurations()[i].jStart, launchConfig2.blockConfigurations()[i].jStart); ASSERT_EQ(launchConfig1.blockConfigurations()[i].jEnd, launchConfig2.blockConfigurations()[i].jEnd); } }
void configuration_test_round(int iDomain, int iMinusBoundary, int iPlusBoundary, int jDomain, int jMinusBoundary, int jPlusBoundary) { IJKSize calculationDomain; IJBoundary boundary; calculationDomain.Init(iDomain, jDomain, 0); boundary.Init(iMinusBoundary, iPlusBoundary, jMinusBoundary, jPlusBoundary); LaunchConfigurationImpl launchConfig; launchConfig.Init(calculationDomain, boundary); // check the whole domain with a boundary for(int i = iMinusBoundary - iBlockSize - 1; i < iDomain + iPlusBoundary + iBlockSize + 1; ++i) { for(int j = jMinusBoundary - jBlockSize - 1; j < iDomain + jPlusBoundary + jBlockSize + 1; ++j) { // check inside is covered if( i >= iMinusBoundary && i < iDomain + iPlusBoundary && j >= jMinusBoundary && j < jDomain + jPlusBoundary ) { ASSERT_EQ(1, count_block_occurrences(launchConfig.blockConfigurations(), i, j)); } // check boundary is not covered else { ASSERT_EQ(0, count_block_occurrences(launchConfig.blockConfigurations(), i, j)); } } } }
TEST_F(DataFieldOpenMPStorageUnittest, ExternalStorage) { // setup external memory IJKSize paddedSize = storage3D_.paddedSize(); int size = paddedSize.iSize() * paddedSize.jSize() * paddedSize.kSize(); std::vector<DataType3D> memory1, memory2; memory1.resize(size, 11.0); memory2.resize(size, 22.0); // define external storage ExternalStorage<DataType3D> externalStorage1, externalStorage2; externalStorage1.Init(&memory1[0], storage3D_.paddedSize()); externalStorage2.Init(&memory2[0], storage3D_.paddedSize()); // storage with external storage DataFieldOpenMPStorage<DataType3D, IJKStorageFormat> storage; storage.Init(externalStorage1, calculationDomain_, kBoundary_); // check the memory is ok ASSERT_EQ(&memory1[0], storage.pStorageBase()); ASSERT_EQ(11.0, *storage.pStorageBase()); // set the second external storage storage.SetExternalStorage(externalStorage2); // check the memory is ok ASSERT_EQ(&memory2[0], storage.pStorageBase()); ASSERT_EQ(22.0, *storage.pStorageBase()); }
void UnittestEnvironment::SetUp() { // make sure the repository is null assert(!pRepository_); pRepository_ = new HoriDiffRepository(); // prepare the repository calculationDomain_.Init( Options::getInstance().domain_.iSize(), Options::getInstance().domain_.jSize(), Options::getInstance().domain_.kSize() ); pRepository_->Init(calculationDomain_); pRepository_->AllocateDataFields(); pRepository_->SetInitalValues(); IJKSize globalDomainSize; globalDomainSize.Init(calculationDomain_.iSize() + cNumBoundaryLines*2, calculationDomain_.jSize() + cNumBoundaryLines*2, calculationDomain_.kSize()); int subdomainPosition_[4]; subdomainPosition_[0] = cNumBoundaryLines+1; subdomainPosition_[1] = cNumBoundaryLines+1; subdomainPosition_[2] = cNumBoundaryLines+calculationDomain_.iSize(); subdomainPosition_[3] = cNumBoundaryLines+calculationDomain_.jSize(); // Initialize the halo update configuration communicationConfiguration_.Init(true, true, false, false, false, false, globalDomainSize, 1, subdomainPosition_); }
virtual void SetUp() { calculationDomain_.Init(12, 14, 60); kBoundary_.Init(-1, 2); storage3D_.Init(calculationDomain_, kBoundary_); storage2D_.Init(calculationDomain_, kBoundary_); storage1D_.Init(calculationDomain_, kBoundary_); };
void randomStorageInit(TStorage& storage) { IJKSize allocSize = storage.allocatedSize(); typename TStorage::StorageIteratorType iter = storage.originIterator(); iter.Advance( -storage.originOffset().iIndex(), -storage.originOffset().jIndex(), -storage.originOffset().kIndex() ); for(int i = 0; i < allocSize.iSize(); ++i) { for(int j = 0; j < allocSize.jSize(); ++j) { for(int k = 0; k < allocSize.kSize(); ++k) { iter.At(i,j,k) = static_cast<typename TStorage::ValueType>(rand()); } } } }
int main(int argc, char **argv) { MPI_Init(&argc, &argv); int commsize, commrank; MPI_Comm_size(MPI_COMM_WORLD, &commsize); MPI_Comm_rank(MPI_COMM_WORLD, &commrank); const bool isRoot = commrank == 0; const bool isLast = commrank == commsize - 1; if (isRoot) std::cout << "Initialization...\n" << std::endl; RuntimeConfiguration conf(argc, argv); // Compute my start and end time const double timeStart = conf.timeSliceSize() * commrank; const double timeEnd = conf.timeSliceSize() * (commrank + 1); if (isRoot) std::cout << "Running with:\n" << " - initial diffusion coefficient: " << conf.nu0() << "\n" << " - frequence of diffusion coefficient: " << conf.nufreq() << "\n" << " - advection velocity in x: " << conf.cx() << "\n" << " - advection velocity in y: " << conf.cy() << "\n" << " - advection velocity in z: " << conf.cz() << "\n" << " - spatial discretization step: " << conf.dx() << "\n" << " - endtime: " << conf.endTime() << "\n" << " - number of time slices: " << conf.timeSlices() << "\n" << " - time slice size: " << conf.timeSliceSize() << "\n" << " - CFL fine: " << conf.cflFine() << "\n" << " - CFL coarse: " << conf.cflCoarse() << "\n" << " - timestep size fine: " << conf.dtFine() << "\n" << " - timestep size coarse: " << conf.dtCoarse() << "\n" << " - timesteps per slice fine propagator: " << conf.timeStepsFinePerTimeSlice() << "\n" << " - timesteps per slice coarse propagator: " << conf.timeStepsCoarsePerTimeSlice() << "\n" << " - parareal iterations: " << conf.kmax() << "\n" << " - asynchronous communications: " << (conf.async() ? "Enabled" : "Disabled") << "\n" << " - intermediate fields in mat files: " << (conf.mat() ? "Yes" : "No") << "\n" << std::endl; // Calculation domain and boundaries IJKSize domain; domain.Init(conf.gridSize(), conf.gridSize(), conf.gridSize()); KBoundary kboundary; kboundary.Init(-convectionBoundaryLines, convectionBoundaryLines); // Initialize fields ConvectionField q, qinitial; q.Init("q", domain, kboundary); qinitial.Init("qinitial", domain, kboundary); Convection convection(conf.gridSize(), conf.gridSize(), conf.gridSize(), conf.dx(), conf.nu0(), conf.nufreq(), conf.cx(), conf.cy(), conf.cz()); // Initialize parareal Parareal<Convection, ConvectionField> parareal(convection, qinitial, q, timeStart, conf, MPI_COMM_WORLD); if (conf.mode() == ModeCompare) { // Measure time required by convection const int tauSamples = 4; double tauF = MPI_Wtime(); convection.DoRK4(qinitial, qinitial, 0., conf.dtFine(), tauSamples*conf.timeStepsFinePerTimeSlice()); SynchronizeCUDA(); tauF = MPI_Wtime() - tauF; double tauG = MPI_Wtime(); convection.DoEuler(qinitial, qinitial, 0., conf.dtCoarse(), tauSamples*conf.timeStepsCoarsePerTimeSlice()); SynchronizeCUDA(); tauG = MPI_Wtime() - tauG; const double tauRatio = tauG / tauF; const double Nit_Np = static_cast<double>(conf.kmax()) / commsize; const double maxSpeedup = 1. / (tauRatio * (1. + Nit_Np) + Nit_Np); // Fill initial solution SynchronizeHost(qinitial); fillQ(qinitial, conf.nu0(), conf.nufreq(), conf.cx(), conf.cy(), conf.cz(), 0., 0., 1., 0., 1., 0., 1.); SynchronizeDevice(qinitial); // Run serial MPI_Barrier(MPI_COMM_WORLD); double eserial = MPI_Wtime(); parareal.DoSerial(); eserial = MPI_Wtime() - eserial; // Save reference ConvectionField qreference = q; SynchronizeHost(qreference); // Fill initial solution SynchronizeHost(qinitial); fillQ(qinitial, conf.nu0(), conf.nufreq(), conf.cx(), conf.cy(), conf.cz(), 0., 0., 1., 0., 1., 0., 1.); SynchronizeDevice(qinitial); // Run serial MPI_Barrier(MPI_COMM_WORLD); double eparallel = MPI_Wtime(); parareal.DoParallel(); eparallel = MPI_Wtime() - eparallel; // Output MPI_Barrier(MPI_COMM_WORLD); if (isLast) { double e = computeErrorReference(q, qreference); std::cout << "\n" << "Serial run time: " << eserial << "\n" << "Parallel run time: " << eparallel << "\n" << "Speedup: " << eserial / eparallel << "\n" << "Maximal speedup: " << maxSpeedup << "\n" << "Error at end: " << e << "\n" << std::endl; MatFile matfile("result.mat"); matfile.addField("q", q); matfile.addField("qreference", qreference); } } else if (conf.mode() == ModeSerial) { // Fill initial solution SynchronizeHost(qinitial); fillQ(qinitial, conf.nu0(), conf.nufreq(), conf.cx(), conf.cy(), conf.cz(), 0., 0., 1., 0., 1., 0., 1.); SynchronizeDevice(qinitial); // Run serial double e = MPI_Wtime(); double energyStart = energy(); double deviceEnergyStart = deviceEnergy(); parareal.DoSerial(); e = MPI_Wtime() - e; double energyEnd = energy(); double deviceEnergyEnd = deviceEnergy(); const double totDevice = totalEnergy(deviceEnergyStart, deviceEnergyEnd); const double totNode = totalEnergy(energyStart, energyEnd) - totDevice; const double totNetwork = e * powerNetwork; const double totBlower = e * powerBlower; const double totEnergy = totNode + totDevice + totNetwork + totBlower; // Output MPI_Barrier(MPI_COMM_WORLD); if (isLast) { std::cout << "\n" << "Serial run time: " << e << "\n"; std::printf("Node energy : %8f J (%8.3e W/node)\n", totNode , totNode/e); std::printf("Device energy : %8f J (%8.3e W/node)\n", totDevice , totDevice/e); std::printf("Network energy: %8f J (%8.3e W/node)\n", totNetwork, totNetwork/e); std::printf("Blower energy : %8f J (%8.3e W/node)\n", totBlower , totBlower/e); std::printf("Total energy : %8f J (%8.3e W/node)\n", totEnergy , totEnergy/e); std::cout << std::endl; } } else if (conf.mode() == ModeParallel) { // Fill initial solution SynchronizeHost(qinitial); fillQ(qinitial, conf.nu0(), conf.nufreq(), conf.cx(), conf.cy(), conf.cz(), 0., 0., 1., 0., 1., 0., 1.); SynchronizeDevice(qinitial); // Run serial parareal.DoSerial(); std::cout << " -- The serial computation is done\n"; ConvectionField qreference = q; // Run parallel MPI_Barrier(MPI_COMM_WORLD); double e = MPI_Wtime(); double energyStart = energy(); double deviceEnergyStart = deviceEnergy(); parareal.DoParallel(); MPI_Barrier(MPI_COMM_WORLD); e = MPI_Wtime() - e; std::cout << " -- The parallel computation is done\n"; double energyEnd = energy(); double deviceEnergyEnd = deviceEnergy(); const double totDevice = totalEnergy(deviceEnergyStart, deviceEnergyEnd, MPI_COMM_WORLD); const double totNode = totalEnergy(energyStart, energyEnd, MPI_COMM_WORLD) - totDevice; const double totNetwork = e * powerNetwork * commsize; const double totBlower = e * powerBlower * commsize; const double totEnergy = totNode + totDevice + totNetwork + totBlower; // Compute error double error = computeErrorReference(q, qreference); // Output MPI_Barrier(MPI_COMM_WORLD); if (isLast) { const double fac = 1./e/commsize; std::cout << std::endl; std::printf("Parallel run time: %f s\n", e); std::printf("Node energy : %8f J (%8.3e W/node)\n", totNode , fac*totNode); std::printf("Device energy : %8f J (%8.3e W/node)\n", totDevice , fac*totDevice); std::printf("Network energy: %8f J (%8.3e W/node)\n", totNetwork, fac*totNetwork); std::printf("Blower energy : %8f J (%8.3e W/node)\n", totBlower , fac*totBlower); std::printf("Total energy : %8f J (%8.3e W/node)\n", totEnergy , fac*totEnergy); std::printf("Error of parareal: %.4e\n", error); std::cout << std::endl; } } else if (conf.mode() == ModeTiming) { // Fill initial solution SynchronizeHost(qinitial); fillQ(qinitial, conf.nu0(), conf.nufreq(), conf.cx(), conf.cy(), conf.cz(), 0., 0., 1., 0., 1., 0., 1.); SynchronizeDevice(qinitial); // Run serial std::vector<double> times; MPI_Barrier(MPI_COMM_WORLD); parareal.DoTimedParallel(times); // Gather on root const int s = times.size(); std::vector<double> timesGlobal; timesGlobal.resize(s * commsize); MPI_Gather(×[0], s, MPI_DOUBLE, ×Global[0], s, MPI_DOUBLE, 0, MPI_COMM_WORLD); // Output if (isRoot) { std::cout << "\nTimes:\n"; for(int i = 0; i < s; ++i) { for(int p = 0; p < commsize; ++p) { std::cout << std::scientific << std::setprecision(6) << timesGlobal[p*s + i] << " "; } std::cout << "\n"; } } } // Finalize MPI_Finalize(); return 0; }