예제 #1
0
/* ------------------------------------------------------------ */
void
benchmark__ (int m, int n, int k, int P_row, int P_col, int s, int type)
{
  if (!checkEnvEnabled__ ("BENCH", 1)) return;

  MPI_Comm comm2d = summa_createTopology (MPI_COMM_WORLD, P_row, P_col);
  int rank = mpih_getRank (comm2d);

  if (rank == 0) {
		mpih_debugmsg (comm2d, "Beginning benchmark...\n");
	}

  /* Create a synthetic problem to benchmark. */
  double* A_local = summa_alloc (m, k, comm2d);
  double* B_local = summa_alloc (k, n, comm2d);
  double* C_local = summa_alloc (m, n, comm2d);

  summa_randomize (m, k, A_local, comm2d);
  summa_randomize (k, n, B_local, comm2d);

  const int TOTAL = 0;
  const int COMP = 1;
  const int COMM = 2;
  double t[3];  bzero (t, sizeof (t));

  const int MAX_TRIALS = 10;
  if (rank == 0)
    mpih_debugmsg (comm2d, "Multiplying [%d trials]...\n", MAX_TRIALS);

  for (int trial = 0; trial < MAX_TRIALS; ++trial) {
    summa_setZero (m, n, C_local, comm2d);
    double t_start = MPI_Wtime ();
    summa_mult (m, n, k, s, A_local, B_local, C_local, comm2d,
		&t[COMP], &t[COMM],type);
    t[TOTAL] += MPI_Wtime () - t_start;
  }
	t[TOTAL] = t[TOTAL]/MAX_TRIALS;
	t[COMP] = t[COMP]/MAX_TRIALS;
	t[COMM] = t[COMM]/MAX_TRIALS;
  if (rank == 0) mpih_debugmsg (comm2d, "Done!\n");
  summarize__ (m, n, k, s, t, 3, comm2d, 0,type);

  summa_free (A_local, comm2d);
  summa_free (B_local, comm2d);
  summa_free (C_local, comm2d);
  summa_freeTopology (comm2d);
}
예제 #2
0
static
void
verify__ (int m, int n, int k, int P_row, int P_col, int s)
{
  if (!checkEnvEnabled__ ("VERIFY", 1)) return;

  MPI_Comm comm2d = summa_createTopology (MPI_COMM_WORLD, P_row, P_col);
  int rank = mpih_getRank (comm2d);

  double* A = NULL;
  double* B = NULL;
  double* C_soln = NULL;
  double* C_bound = NULL;

  /* Whoever has rank == 0 will create the test problem. */
  if (rank == 0) {
    setupSeqProblem__ (m, n, k, &A, &B, &C_soln, &C_bound);

    /* Measure time for the sequential problem. */
    mat_setZero (m, n, C_soln);
    double t_start = MPI_Wtime ();
    mat_multiply (m, n, k, A, m, B, k, C_soln, m);
    double dt_seq = MPI_Wtime () - t_start;
    mpih_debugmsg (MPI_COMM_WORLD, "t_seq = %g s\n", dt_seq);

    /* Recompute, to get the error bound this time */
    mpih_debugmsg (MPI_COMM_WORLD, "Estimating error bound...\n");
    mat_multiplyErrorbound (m, n, k, A, m, B, k, C_soln, m, C_bound, m);
  }

  /* Next, run the (untrusted) SUMMA algorithm */
  if (rank == 0) mpih_debugmsg (comm2d, "Distributing A, B, and C...\n");
  double* A_local = summa_distribute (m, k, A, 0, comm2d);
  double* B_local = summa_distribute (k, n, B, 0, comm2d);
  double* C_local = summa_alloc (m, n, comm2d);
  summa_setZero (m, n, C_local, comm2d);

  /* Do multiply */
  if (rank == 0) mpih_debugmsg (comm2d, "Computing C <- C + A*B...\n");
  summa_mult (m, n, k, s, A_local, B_local, C_local, comm2d, NULL, NULL);

  /* Compare the two answers (in parallel) */
  if (rank == 0) mpih_debugmsg (comm2d, "Verifying...\n");
  int rank_row, rank_col;
  summa_getProcCoords (comm2d, &rank_row, &rank_col);
  double* C_soln_local = summa_distribute (m, n, C_soln, 0, comm2d);
  double* C_bound_local = summa_distribute (m, n, C_bound, 0, comm2d);
  int m_local = mm1d_getBlockLength (m, P_row, rank_row);
  int n_local = mm1d_getBlockLength (n, P_col, rank_col);
  for (int i = 0; i < m_local; ++i) {
    for (int j = 0; j < n_local; ++j) {
      const double errbound = C_bound_local[i + j*m_local] * 3.0 * k * DBL_EPSILON;
      const double c_trusted = C_soln_local[i + j*m_local]; 
      const double c_untrusted = C_local[i + j*m_local];
      double delta = fabs (c_untrusted - c_trusted);
      if (delta > errbound)
	mpih_debugmsg (comm2d,
		       "*** Entry (%d, %d) --- Error bound violated ***\n    ==> |%g - %g| == %g > %g\n",
		       c_untrusted, c_trusted, delta, errbound, i, j);
      mpih_assert (delta <= errbound);
    }
  }
  if (rank == 0) mpih_debugmsg (comm2d, "Passed!\n");

  /* Clean-up */
  summa_free (A_local, comm2d);
  summa_free (B_local, comm2d);
  summa_free (C_local, comm2d);
  summa_free (C_soln_local, comm2d);
  summa_free (C_bound_local, comm2d);
  if (rank == 0) {
    free (A);
    free (B);
    free (C_soln);
    free (C_bound);
  }
  summa_freeTopology (comm2d);
}