/* ------------------------------------------------------------ */ void benchmark__ (int m, int n, int k, int P_row, int P_col, int s, int type) { if (!checkEnvEnabled__ ("BENCH", 1)) return; MPI_Comm comm2d = summa_createTopology (MPI_COMM_WORLD, P_row, P_col); int rank = mpih_getRank (comm2d); if (rank == 0) { mpih_debugmsg (comm2d, "Beginning benchmark...\n"); } /* Create a synthetic problem to benchmark. */ double* A_local = summa_alloc (m, k, comm2d); double* B_local = summa_alloc (k, n, comm2d); double* C_local = summa_alloc (m, n, comm2d); summa_randomize (m, k, A_local, comm2d); summa_randomize (k, n, B_local, comm2d); const int TOTAL = 0; const int COMP = 1; const int COMM = 2; double t[3]; bzero (t, sizeof (t)); const int MAX_TRIALS = 10; if (rank == 0) mpih_debugmsg (comm2d, "Multiplying [%d trials]...\n", MAX_TRIALS); for (int trial = 0; trial < MAX_TRIALS; ++trial) { summa_setZero (m, n, C_local, comm2d); double t_start = MPI_Wtime (); summa_mult (m, n, k, s, A_local, B_local, C_local, comm2d, &t[COMP], &t[COMM],type); t[TOTAL] += MPI_Wtime () - t_start; } t[TOTAL] = t[TOTAL]/MAX_TRIALS; t[COMP] = t[COMP]/MAX_TRIALS; t[COMM] = t[COMM]/MAX_TRIALS; if (rank == 0) mpih_debugmsg (comm2d, "Done!\n"); summarize__ (m, n, k, s, t, 3, comm2d, 0,type); summa_free (A_local, comm2d); summa_free (B_local, comm2d); summa_free (C_local, comm2d); summa_freeTopology (comm2d); }
static void verify__ (int m, int n, int k, int P_row, int P_col, int s) { if (!checkEnvEnabled__ ("VERIFY", 1)) return; MPI_Comm comm2d = summa_createTopology (MPI_COMM_WORLD, P_row, P_col); int rank = mpih_getRank (comm2d); double* A = NULL; double* B = NULL; double* C_soln = NULL; double* C_bound = NULL; /* Whoever has rank == 0 will create the test problem. */ if (rank == 0) { setupSeqProblem__ (m, n, k, &A, &B, &C_soln, &C_bound); /* Measure time for the sequential problem. */ mat_setZero (m, n, C_soln); double t_start = MPI_Wtime (); mat_multiply (m, n, k, A, m, B, k, C_soln, m); double dt_seq = MPI_Wtime () - t_start; mpih_debugmsg (MPI_COMM_WORLD, "t_seq = %g s\n", dt_seq); /* Recompute, to get the error bound this time */ mpih_debugmsg (MPI_COMM_WORLD, "Estimating error bound...\n"); mat_multiplyErrorbound (m, n, k, A, m, B, k, C_soln, m, C_bound, m); } /* Next, run the (untrusted) SUMMA algorithm */ if (rank == 0) mpih_debugmsg (comm2d, "Distributing A, B, and C...\n"); double* A_local = summa_distribute (m, k, A, 0, comm2d); double* B_local = summa_distribute (k, n, B, 0, comm2d); double* C_local = summa_alloc (m, n, comm2d); summa_setZero (m, n, C_local, comm2d); /* Do multiply */ if (rank == 0) mpih_debugmsg (comm2d, "Computing C <- C + A*B...\n"); summa_mult (m, n, k, s, A_local, B_local, C_local, comm2d, NULL, NULL); /* Compare the two answers (in parallel) */ if (rank == 0) mpih_debugmsg (comm2d, "Verifying...\n"); int rank_row, rank_col; summa_getProcCoords (comm2d, &rank_row, &rank_col); double* C_soln_local = summa_distribute (m, n, C_soln, 0, comm2d); double* C_bound_local = summa_distribute (m, n, C_bound, 0, comm2d); int m_local = mm1d_getBlockLength (m, P_row, rank_row); int n_local = mm1d_getBlockLength (n, P_col, rank_col); for (int i = 0; i < m_local; ++i) { for (int j = 0; j < n_local; ++j) { const double errbound = C_bound_local[i + j*m_local] * 3.0 * k * DBL_EPSILON; const double c_trusted = C_soln_local[i + j*m_local]; const double c_untrusted = C_local[i + j*m_local]; double delta = fabs (c_untrusted - c_trusted); if (delta > errbound) mpih_debugmsg (comm2d, "*** Entry (%d, %d) --- Error bound violated ***\n ==> |%g - %g| == %g > %g\n", c_untrusted, c_trusted, delta, errbound, i, j); mpih_assert (delta <= errbound); } } if (rank == 0) mpih_debugmsg (comm2d, "Passed!\n"); /* Clean-up */ summa_free (A_local, comm2d); summa_free (B_local, comm2d); summa_free (C_local, comm2d); summa_free (C_soln_local, comm2d); summa_free (C_bound_local, comm2d); if (rank == 0) { free (A); free (B); free (C_soln); free (C_bound); } summa_freeTopology (comm2d); }