static bool newton_euler_step(void* context, real_t max_dt, real_t* t, real_t* x)
{
  START_FUNCTION_TIMER();
  euler_ode_t* integ = context;
  integ->dt = max_dt;
  int N_local = integ->num_local_values;
  int num_iters = 0;
  memcpy(integ->x_old, x, sizeof(real_t) * N_local);
  memcpy(integ->x_new, x, sizeof(real_t) * N_local);
  
  // Make sure the preconditioner computes P = I - dt * J.
  newton_pc_t* precond = newton_solver_preconditioner(integ->newton);
  newton_pc_fix_coefficients(precond, 1.0, -integ->dt, 0.0);

  // Now solve the thing, storing the solution in integ->x_new.
  bool solved = newton_solver_solve(integ->newton, *t + integ->dt, integ->x_new, &num_iters);
  if (solved)
  {
    // Increment the time and the solution.
    *t += integ->dt;
    memcpy(x, integ->x_new, sizeof(real_t) * integ->num_local_values);
  }
  STOP_FUNCTION_TIMER();
  return solved;
}
// Here's the same finite difference calculation for dF/d(xdot).
static void cpr_finite_diff_dFdxdot_v(void* context, 
                                      int (*F)(void* context, real_t t, real_t* x, real_t* xdot, real_t* F), 
                                      real_t t, 
                                      real_t* x, 
                                      real_t* xdot, 
                                      int num_local_rows,
                                      int num_remote_rows,
                                      real_t* v, 
                                      real_t** work, 
                                      real_t* dFdxdot_v)
{
  START_FUNCTION_TIMER();
  real_t eps = sqrt(UNIT_ROUNDOFF);
  real_t eps_inv = 1.0 / eps;

  // work[0] == v
  // work[1] contains F(t, x, xdot).
  // work[2] == xdot + eps*v
  // work[3] == F(t, x, xdot + eps*v)

  // xdot + eps*v -> work[2].
  for (int i = 0; i < num_local_rows + num_remote_rows; ++i)
    work[2][i] = xdot[i] + eps*v[i];

  // F(t, x, xdot + eps*v) -> work[3].
  F(context, t, x, work[2], work[3]);

  // (F(t, x, xdot + eps*v) - F(t, x, xdot)) / eps -> dF/d(xdot) * v
  for (int i = 0; i < num_local_rows; ++i)
    dFdxdot_v[i] = (work[3][i] - work[1][i]) * eps_inv;
  STOP_FUNCTION_TIMER();
}
Exemple #3
0
void exchanger_exchange(exchanger_t* ex, void* data, int stride, int tag, MPI_Datatype type)
{
  START_FUNCTION_TIMER();
  int token = exchanger_start_exchange(ex, data, stride, tag, type);
  exchanger_finish_exchange(ex, token);
  STOP_FUNCTION_TIMER();
}
void gmls_functional_compute(gmls_functional_t* functional,
                             int i,
                             real_t t,
                             multicomp_poly_basis_t* poly_basis,
                             real_t* solution,
                             real_t* lambdas)
{
  START_FUNCTION_TIMER();
  if (functional->surface_quad_rule != NULL)
  {
    surface_integral_set_domain(functional->surface_quad_rule, i);
    int num_quad_points = surface_integral_num_points(functional->surface_quad_rule);
    point_t quad_points[num_quad_points];
    real_t quad_weights[num_quad_points];
    vector_t quad_normals[num_quad_points];
    surface_integral_get_quadrature(functional->surface_quad_rule, quad_points, quad_weights, quad_normals);
    compute_integral(functional, t, poly_basis, solution,
                     quad_points, quad_weights, quad_normals, num_quad_points, 
                     lambdas);
  }
  else
  {
    volume_integral_set_domain(functional->volume_quad_rule, i);
    int num_quad_points = volume_integral_num_points(functional->volume_quad_rule);
    point_t quad_points[num_quad_points];
    real_t quad_weights[num_quad_points];
    volume_integral_get_quadrature(functional->volume_quad_rule, quad_points, quad_weights);
    compute_integral(functional, t, poly_basis, solution, 
                     quad_points, quad_weights, NULL, num_quad_points, 
                     lambdas);
  }
  STOP_FUNCTION_TIMER();
}
Exemple #5
0
void exchanger_finish_metadata_transfer(exchanger_t* ex,
                                        int token)
{
#if POLYMEC_HAVE_MPI
  START_FUNCTION_TIMER();
  ASSERT(token >= 0);
  ASSERT(token < ex->num_pending_msgs);
  ASSERT(ex->transfer_counts[token] == NULL); // We can't finish a transfer as an exchange!
  ASSERT(ex->orig_buffers[token] == NULL); // This is a metadata transfer, right?

  // Retrieve the message for the given token.
  mpi_message_t* msg = ex->pending_msgs[token];
  int err = exchanger_waitall(ex, msg);
  if (err != MPI_SUCCESS) return;

  // Release the send/receive buffers.
  msg->send_buffers = NULL;
  msg->receive_buffers = NULL;

  // Pull the message out of our list of pending messages and delete it.
  ex->pending_msgs[token] = NULL;
  mpi_message_free(msg);
  STOP_FUNCTION_TIMER();
#endif
}
// This helper constructs and returns a point cloud from the points with the
// given indices in the given point cloud.
static point_cloud_t* create_subcloud(MPI_Comm comm,
                                      point_cloud_t* cloud,
                                      int* indices, int num_indices)
{
    START_FUNCTION_TIMER();
    // This is super easy--just pick out the points we want!
    point_cloud_t* subcloud = point_cloud_new(comm, num_indices);
    for (int i = 0; i < num_indices; ++i)
        subcloud->points[i] = cloud->points[indices[i]];

    // Now copy properties using their serializers.
    int pos = 0;
    char* prop_name;
    void* prop_data;
    serializer_t* prop_ser;
    while (point_cloud_next_property(cloud, &pos, &prop_name, &prop_data, &prop_ser))
    {
        if (prop_ser != NULL)
        {
            void* prop_data_copy = serializer_clone_object(prop_ser, prop_data);
            point_cloud_set_property(subcloud, prop_name, prop_data_copy, prop_ser);
        }
        else
            log_debug("create_subcloud: property '%s' has no serializer.", prop_name);
    }

    STOP_FUNCTION_TIMER();
    return subcloud;
}
Exemple #7
0
void exchanger_verify(exchanger_t* ex, void (*handler)(const char* format, ...))
{
#if POLYMEC_HAVE_MPI
  START_FUNCTION_TIMER();
  // An exchanger is valid/consistent iff the number of elements that 
  // are exchanged between any two processors are agreed upon between those 
  // two processors. So we send our expected number of exchanged elements 
  // to our neighbors, and receive their numbers, and then compare.
  int num_send_procs = ex->send_map->size;
  int num_receive_procs = ex->receive_map->size;
  int num_sent_elements[num_send_procs], 
      num_elements_expected_by_neighbors[num_send_procs],
      send_procs[num_send_procs];
  MPI_Request requests[num_send_procs + num_receive_procs];
  int pos = 0, proc, *indices, num_indices;

  // Post receives for numbers of elements we send to others.
  int p = 0;
  while (exchanger_next_send(ex, &pos, &proc, &indices, &num_indices))
  {
    num_sent_elements[p] = num_indices;
    send_procs[p] = proc;
    int err = MPI_Irecv(&num_elements_expected_by_neighbors[p], 1, 
                        MPI_INT, proc, 0, ex->comm, &requests[p]);
    if (err != MPI_SUCCESS)
      polymec_error("exchanger_verify: Could not receive sent element data from %d", proc);
    ++p;
  }

  // Send our receive neighbors the number that we expect to receive. This 
  // means that we will get the number that our neighbors expect to receive,
  // which should be the same number as what we're sending.
  pos = p = 0;
  while (exchanger_next_receive(ex, &pos, &proc, &indices, &num_indices))
  {
    int err = MPI_Isend(&num_indices, 1, MPI_INT, proc, 0, ex->comm, &requests[num_send_procs + p]);
    if (err != MPI_SUCCESS)
      polymec_error("exchanger_verify: Could not send number of received elements to %d", proc);
    ++p;
  }

  // Wait for messages to fly.
  MPI_Status statuses[num_send_procs + num_receive_procs];
  MPI_Waitall(num_send_procs + num_receive_procs, requests, statuses);

  // Now verify that the numbers are the same.
  for (p = 0; p < num_send_procs; ++p)
  {
    if (num_sent_elements[p] != num_elements_expected_by_neighbors[p])
    {
      handler("exchanger_verify: Sending %d elements to proc %d, but %d elements are expected.", 
              num_sent_elements[p], send_procs[p],
              num_elements_expected_by_neighbors[p]);
    }
  }
  STOP_FUNCTION_TIMER();
#endif
}
cpr_differencer_t* cpr_differencer_new(MPI_Comm comm,
                                       void* F_context,
                                       int (*F)(void* context, real_t, real_t* x, real_t* Fval),
                                       int (*F_dae)(void* context, real_t, real_t* x, real_t* xdot, real_t* Fval),
                                       void (*F_dtor)(void* context),
                                       adj_graph_t* sparsity,
                                       int num_local_rows,
                                       int num_remote_rows)
{
  ASSERT(num_local_rows > 0);
  ASSERT(num_remote_rows >= 0);

  // Exactly one of F and F_dae must be given.
  ASSERT((F != NULL) || (F_dae != NULL));
  ASSERT((F == NULL) || (F_dae == NULL));

  START_FUNCTION_TIMER();

  cpr_differencer_t* diff = polymec_malloc(sizeof(cpr_differencer_t));
  diff->comm = comm;
  diff->F_context = F_context;
  diff->F = F;
  diff->F_dae = F_dae;
  diff->F_dtor = F_dtor;

  // Steal the graph.
  diff->sparsity = sparsity;

  // The number of local rows is known at this point.
  diff->num_local_rows = num_local_rows;

  // However, we can't know the actual number of remote block rows without 
  // knowing the specific communication pattern! However, it is safe to just 
  // multiply the given number of remote block rows by the maximum block size, 
  // since only the underlying RHS function will actually use the data.
  diff->num_remote_rows = num_remote_rows;

  // Assemble a graph coloring for any matrix we treat.
  diff->coloring = adj_graph_coloring_new(diff->sparsity, SMALLEST_LAST);

  // Get the maximum number of colors on all MPI processes so that we can 
  // compute in lockstep.
  int num_colors = adj_graph_coloring_num_colors(diff->coloring);
  MPI_Allreduce(&num_colors, &diff->max_colors, 1, MPI_INT, MPI_MAX, diff->comm);
  log_debug("cpr_differencer: graph coloring produced %d colors.", diff->max_colors);

  // Make work vectors.
  diff->num_work_vectors = 4;
  diff->work = polymec_malloc(sizeof(real_t*) * diff->num_work_vectors);
  int N = diff->num_local_rows + diff->num_remote_rows;
  for (int i = 0; i < diff->num_work_vectors; ++i)
    diff->work[i] = polymec_malloc(sizeof(real_t) * N);
  diff->Jv = polymec_malloc(sizeof(real_t) * (diff->num_local_rows + diff->num_remote_rows));

  STOP_FUNCTION_TIMER();
  return diff;
}
void gmls_functional_eval_integrands(gmls_functional_t* functional,
                                     real_t t,
                                     multicomp_poly_basis_t* poly_basis,
                                     point_t* x, vector_t* n,
                                     real_t* solution,
                                     real_t* integrands)
{
  START_FUNCTION_TIMER();
  functional->vtable.eval_integrands(functional->context, t, poly_basis, 
                                     x, n, solution, integrands);
  STOP_FUNCTION_TIMER();
}
static int evaluate_residual(void* context, real_t t, real_t* x, real_t* R)
{
  START_FUNCTION_TIMER();
  euler_ode_t* integ = context;
  int status = eval_rhs(integ, t, x, R);
  if (status == 0)
  {
    for (int i = 0; i < integ->num_local_values; ++i)
      R[i] = x[i] - integ->x_old[i] - integ->dt * R[i];
  }
  STOP_FUNCTION_TIMER();
  return status;
}
Exemple #11
0
int exchanger_start_exchange(exchanger_t* ex, void* data, int stride, int tag, MPI_Datatype type)
{
#if POLYMEC_HAVE_MPI
  START_FUNCTION_TIMER();
  // Create a message for this array.
  mpi_message_t* msg = mpi_message_new(type, stride, tag);
  mpi_message_pack(msg, data, ex->send_offset, ex->send_map, ex->receive_map);
  
  // Begin the transmission and allocate a token for it.
  int token = exchanger_send_message(ex, data, msg);
  STOP_FUNCTION_TIMER();
  return token;
#else
  return 0;
#endif
}
Exemple #12
0
static void compute_integral(gmls_functional_t* functional,
                             real_t t,
                             multicomp_poly_basis_t* poly_basis,
                             real_t* solution, 
                             point_t* quad_points,
                             real_t* quad_weights,
                             vector_t* quad_normals,
                             int num_quad_points,
                             real_t* lambdas)
{
  START_FUNCTION_TIMER();
  bool on_boundary = (quad_normals != NULL);

  // Loop through the points and compute the lambda matrix of functional 
  // approximants.
  int num_comp = functional->num_comp;
  int basis_dim = multicomp_poly_basis_dim(poly_basis);
  memset(lambdas, 0, sizeof(real_t) * num_comp * basis_dim * num_comp);
  DECLARE_3D_ARRAY(real_t, lam, lambdas, num_comp, basis_dim, num_comp);
  for (int q = 0; q < num_quad_points; ++q)
  {
    point_t* xq = &quad_points[q];
    real_t wq = quad_weights[q];
    vector_t* nq = (on_boundary) ? &quad_normals[q] : NULL;

    // Now compute the (multi-component) integrands for the functional at 
    // this point.
    real_t integrands[num_comp*basis_dim*num_comp];
    functional->vtable.eval_integrands(functional->context, t, poly_basis, 
                                       xq, nq, solution, integrands);

    // Integrate.
    DECLARE_3D_ARRAY(real_t, I, integrands, num_comp, num_comp, basis_dim);
    for (int i = 0; i < num_comp; ++i)
      for (int j = 0; j < basis_dim; ++j)
        for (int k = 0; k < num_comp; ++k)
          lam[i][j][k] += wq * I[i][k][j];
  }
  STOP_FUNCTION_TIMER();
}
Exemple #13
0
void exchanger_finish_exchange(exchanger_t* ex, int token) 
{
#if POLYMEC_HAVE_MPI
  START_FUNCTION_TIMER();
  ASSERT(token >= 0);
  ASSERT(token < ex->num_pending_msgs);
  ASSERT(ex->transfer_counts[token] == NULL); // We can't finish a transfer as an exchange!

  // Retrieve the message for the given token.
  mpi_message_t* msg = ex->pending_msgs[token];
  int err = exchanger_waitall(ex, msg);
  if (err != MPI_SUCCESS) return;

  // Copy the received data into the original array.
  void* orig_buffer = ex->orig_buffers[token];
  mpi_message_unpack(msg, orig_buffer, ex->receive_offset, ex->receive_map);

  // Pull the message out of our list of pending messages and delete it.
  ex->pending_msgs[token] = NULL;
  ex->orig_buffers[token] = NULL;
  mpi_message_free(msg);
  STOP_FUNCTION_TIMER();
#endif
}
Exemple #14
0
void cpr_differencer_compute(cpr_differencer_t* diff, 
                             real_t alpha, real_t beta, real_t gamma,  
                             real_t t, real_t* x, real_t* xdot,
                             local_matrix_t* matrix)
{
  START_FUNCTION_TIMER();
  adj_graph_coloring_t* coloring = diff->coloring;
  real_t** work = diff->work;

  // Normalize F.
  int (*F)(void* context, real_t t, real_t* x, real_t* xdot, real_t* Fval);
  void* F_context;
  if (diff->F_dae != NULL)
  {
    ASSERT(xdot != NULL);
    F = diff->F_dae;
    F_context = diff->F_context;
  }
  else
  {
    ASSERT(gamma == 0.0);
    ASSERT(xdot == NULL);
    F = F_adaptor;
    F_context = diff;
  }

  // First, zero the matrix.
  local_matrix_zero(matrix);

  // If all the coefficients are zero, we're finished!
  if ((alpha == 0.0) && (beta == 0.0) && (gamma == 0.0))
  {
    STOP_FUNCTION_TIMER();
    return;
  }

  // Then set up an identity matrix.
  local_matrix_add_identity(matrix, alpha);

  // If beta and gamma are zero, we're finished!
  if ((beta == 0.0) && (gamma == 0.0))
  {
    STOP_FUNCTION_TIMER();
    return;
  }

  // Keep track of the number of function evaluations.
  int num_F_evals = 0;

  // We compute the system Jacobian using the method described in 
  // Curtis, Powell, and Reed.
  if ((alpha != 0.0) && (beta != 0.0) && (gamma != 0.0))
    log_debug("cpr_differencer: approximating J = %g * I + %g * dF/dx + %g * dF/d(xdot)...", alpha, beta, gamma);
  else if ((alpha == 0.0) && (beta != 0.0) && (gamma != 0.0))
    log_debug("cpr_differencer: approximating J = %g * dF/dx + %g * dF/d(xdot)...", beta, gamma);
  else if ((alpha == 0.0) && (beta == 0.0) && (gamma != 0.0))
    log_debug("cpr_differencer: approximating J = %g * dF/d(xdot)...", gamma);
  else if ((alpha != 0.0) && (beta != 0.0))
    log_debug("cpr_differencer: approximating J = %g * I + %g * dF/dx...", alpha, beta);
  else if ((alpha == 0.0) && (beta != 0.0))
    log_debug("cpr_differencer: approximating J = %g * dF/dx...", beta);

  // Now iterate over all of the colors in our coloring. 
  int num_colors = adj_graph_coloring_num_colors(coloring);
  for (int c = 0; c < num_colors; ++c)
  {
    // We construct d, the binary vector corresponding to this color, in work[0].
    memset(work[0], 0, sizeof(real_t) * (diff->num_local_rows + diff->num_remote_rows));
    int pos = 0, i;
    while (adj_graph_coloring_next_vertex(coloring, c, &pos, &i))
      work[0][i] = 1.0;

    // We evaluate F(t, x, xdot) and place it into work[1].
    F(F_context, t, x, xdot, work[1]);
    ++num_F_evals;

    // Evaluate dF/dx * d.
    memset(diff->Jv, 0, sizeof(real_t) * diff->num_local_rows);
    cpr_finite_diff_dFdx_v(F_context, F, t, x, xdot, diff->num_local_rows, 
                           diff->num_remote_rows, work[0], work, diff->Jv);
    ++num_F_evals;

    // Add the column vector J*v into our matrix.
    pos = 0;
    while (adj_graph_coloring_next_vertex(coloring, c, &pos, &i))
      local_matrix_add_column_vector(matrix, beta, i, diff->Jv);

    if ((gamma != 0.0) && (xdot != NULL))
    {
      // Now evaluate dF/d(xdot) * d.
      memset(diff->Jv, 0, sizeof(real_t) * diff->num_local_rows);
      cpr_finite_diff_dFdxdot_v(F_context, F, t, x, xdot, diff->num_local_rows, 
                                diff->num_remote_rows, work[0], work, diff->Jv);
      ++num_F_evals;

      // Add in the column vector.
      pos = 0;
      while (adj_graph_coloring_next_vertex(coloring, c, &pos, &i))
        local_matrix_add_column_vector(matrix, gamma, i, diff->Jv);
    }
  }

  // Now call the RHS functions in the same way as we would for all the colors
  // we don't have, up through the maximum number, so our neighboring 
  // processes can get exchanged data from us if they need it.
  int num_neighbor_colors = diff->max_colors - num_colors;
  for (int c = 0; c < num_neighbor_colors; ++c)
  {
    F(F_context, t, x, xdot, work[1]);
    ++num_F_evals;
    cpr_finite_diff_dFdx_v(F_context, F, t, x, xdot, diff->num_local_rows, 
                           diff->num_remote_rows, work[0], work, diff->Jv);
    ++num_F_evals;
    if ((gamma != 0.0) && (xdot != NULL))
    {
      cpr_finite_diff_dFdxdot_v(F_context, F, t, x, xdot, diff->num_local_rows, 
                                diff->num_remote_rows, work[0], work, diff->Jv);
      ++num_F_evals;
    }
  }

  log_debug("cpr_differencer: Evaluated F %d times.", num_F_evals);
  STOP_FUNCTION_TIMER();
}
Exemple #15
0
void exchanger_finish_transfer(exchanger_t* ex, int token)
{
#if POLYMEC_HAVE_MPI
  START_FUNCTION_TIMER();
  ASSERT(token >= 0);
  ASSERT(token < ex->num_pending_msgs);
  ASSERT(ex->transfer_counts[token] != NULL); // We can't finish an exchange as a transfer!

  // Retrieve the message for the given token.
  mpi_message_t* msg = ex->pending_msgs[token];
  int* count = ex->transfer_counts[token];
  int err = exchanger_waitall(ex, msg);
  if (err != MPI_SUCCESS) return;

  // Copy the received data into the original array.
  void* orig_buffer = ex->orig_buffers[token];
  mpi_message_unpack(msg, orig_buffer, ex->receive_offset, ex->receive_map);

  // Now cull the sent elements from the array.
  int num_sent = 0, pos = 0, proc;
  exchanger_channel_t* c;
  while (exchanger_map_next(ex->send_map, &pos, &proc, &c))
  {
    // Move the sent elements to the back.
    int stride = msg->stride;
    if (msg->type == MPI_REAL_T)
    {
      real_t* array = (real_t*)orig_buffer;
      for (int i = 0; i < c->num_indices; ++i)
      {
        for (int s = 0; s < stride; ++s)
        {
          array[stride*c->indices[i]+s] = array[*count-1-num_sent];
          ++num_sent;
        }
      }
    }
    else if (msg->type == MPI_DOUBLE)
    {
      double* array = (double*)orig_buffer;
      for (int i = 0; i < c->num_indices; ++i)
      {
        for (int s = 0; s < stride; ++s)
        {
          array[stride*c->indices[i]+s] = array[*count-1-num_sent];
          ++num_sent;
        }
      }
    }
    else 
    {
      // FIXME: What about other data types???
      polymec_error("exchanger_finish_transfer: unimplemented data type.");
    }
  }

  // Convey the change in count of the transferred data.
  *count -= num_sent;
  ASSERT(*count >= 0);

  // Pull the message out of our list of pending messages and delete it.
  ex->pending_msgs[token] = NULL;
  ex->orig_buffers[token] = NULL;
  ex->transfer_counts[token] = NULL;
  mpi_message_free(msg);
  STOP_FUNCTION_TIMER();
#endif
}
Exemple #16
0
static int exchanger_send_message(exchanger_t* ex, void* data, mpi_message_t* msg)
{
  START_FUNCTION_TIMER();
  // If we are expecting data, post asynchronous receives. 
  int j = 0;
  for (int i = 0; i < msg->num_receives; ++i)
  {
    ASSERT(ex->rank != msg->source_procs[i]);
    int err = MPI_Irecv(msg->receive_buffers[i], 
                        msg->stride * msg->receive_buffer_sizes[i],
                        msg->type, msg->source_procs[i], msg->tag, ex->comm, 
                        &(msg->requests[j++]));
    if (err != MPI_SUCCESS)
    {
      int resultlen;
      char str[MPI_MAX_ERROR_STRING];
      MPI_Error_string(err, str, &resultlen);
      char err_msg[1024];
      snprintf(err_msg, 1024, "%d: MPI Error posting receive from %d: %d\n(%s)\n", 
          ex->rank, msg->source_procs[i], err, str);
      polymec_error(err_msg);
    }
  }

  // Send the data asynchronously. 
  for (int i = 0; i < msg->num_sends; ++i)
  {
    int err = MPI_Isend(msg->send_buffers[i], 
                        msg->stride * msg->send_buffer_sizes[i], 
                        msg->type, msg->dest_procs[i], msg->tag, ex->comm, 
                        &(msg->requests[j++])); 
    if (err != MPI_SUCCESS)
    {
      int resultlen;
      char str[MPI_MAX_ERROR_STRING];
      MPI_Error_string(err, str, &resultlen);
      char err_msg[1024];
      snprintf(err_msg, 1024, "%d: MPI Error sending to %d: %d\n(%s)\n", 
          ex->rank, msg->dest_procs[i], err, str);
      polymec_error(err_msg);
    }
  }
  msg->num_requests = j;

  // Allocate a token.
  int token = 0;
  while ((token < ex->num_pending_msgs) && (ex->pending_msgs[token] != NULL))
    ++token;
  if (token == ex->num_pending_msgs) ++ex->num_pending_msgs;
  if (ex->num_pending_msgs == ex->pending_msg_cap)
  {
    ex->pending_msg_cap *= 2;
    ex->pending_msgs = polymec_realloc(ex->pending_msgs, ex->pending_msg_cap*sizeof(mpi_message_t));
    ex->orig_buffers = polymec_realloc(ex->orig_buffers, ex->pending_msg_cap*sizeof(void*));
    ex->transfer_counts = polymec_realloc(ex->transfer_counts, ex->pending_msg_cap*sizeof(int*));
  }
  ex->pending_msgs[token] = msg;
  ex->orig_buffers[token] = data;
  STOP_FUNCTION_TIMER();
  return token;
}
static bool euler_step(void* context, real_t max_dt, real_t* t, real_t* x)
{
  START_FUNCTION_TIMER();
  euler_ode_t* integ = context;
  integ->dt = max_dt;
  real_t theta = integ->theta;
  int N_local = integ->num_local_values;

  if (theta == 0.0)
  {
    // No implicitness here! Just compute the RHS and mash it into 
    // our solution.

    // Copy the solution into place.
    memcpy(integ->x_new, x, sizeof(real_t) * N_local);

    int status = eval_rhs(integ, *t, integ->x_new, integ->f1);
    if (status != 0)
    {
      log_debug("euler_ode_integrator: explicit call to RHS failed.");
      STOP_FUNCTION_TIMER();
      return false;
    }
    for (int i = 0; i < N_local; ++i)
      x[i] += max_dt * integ->f1[i];
    *t += max_dt;
    STOP_FUNCTION_TIMER();
    return true;
  }
  else
  {
    real_t dt = max_dt;

    // Copy the solution into place.
    memcpy(integ->x_new, x, sizeof(real_t) * N_local);

    // Compute the right-hand function at t.
    real_t t1 = *t;
    int status = eval_rhs(integ, t1, integ->x_new, integ->f1);
    if (status != 0) 
    {
      log_debug("euler_ode_integrator: implicit call to RHS at t failed.");
      STOP_FUNCTION_TIMER();
      return false;
    }

    // Okay, let's do this iteration thing.
    bool converged = false;
    for (int i = 0; i < integ->max_iters; ++i)
    {
      // Copy our latest iterate into our previous one.
      memcpy(integ->x_old, integ->x_new, sizeof(real_t) * N_local);

      // Compute the right-hand function at t + dt.
      real_t t2 = t1 + dt;
      status = eval_rhs(integ, t2, integ->x_new, integ->f2);
      if (status != 0)
      {
        log_debug("euler_ode_integrator: implicit call to RHS at t + dt failed.");
        break; // Error, not converged.
      }

      // Update x_new: x_new <- x_orig + dt * RHS.
      for (int j = 0; j < N_local; ++j)
        integ->x_new[j] = x[j] + dt * ((1.0 - theta) * integ->f1[j] + theta * integ->f2[j]);

      // Compute the relative and absolute norms of the change to the solution.
      real_t rel_norm = 0.0, abs_norm = 0.0;
      integ->compute_norms(integ->comm, integ->x_old, integ->x_new, integ->num_local_values,
                           &rel_norm, &abs_norm);
       
      log_debug("euler_ode_integrator: iteration %d/%d", i+1, integ->max_iters);
      log_debug("  rel_norm = %g, abs_norm = %g", rel_norm, abs_norm);

      // Check for convergence.
      if ((rel_norm < integ->rel_tol) && (abs_norm < integ->abs_tol))
      {
        memcpy(x, integ->x_new, sizeof(real_t) * N_local);
        converged = true;
        *t += dt;
        break;
      }
    }

    STOP_FUNCTION_TIMER();
    return converged;
  }
}
Exemple #18
0
// This helper sets up the exchanger ex so that it can distribute data from the 
// root process (0) according to the global partition vector. The partition 
// vector is NULL on all nonzero ranks and defined on rank 0.
exchanger_t* create_distributor(MPI_Comm comm, 
                                int64_t* global_partition,
                                int num_global_vertices)
{
  START_FUNCTION_TIMER();
  int nprocs, rank;
  MPI_Comm_size(comm, &nprocs);
  MPI_Comm_rank(comm, &rank);
  ASSERT((rank == 0) || (global_partition == NULL));
  ASSERT((rank == 0) || (num_global_vertices == 0));

  exchanger_t* distributor = exchanger_new(comm);
  
  if (rank == 0)
  {
    // Get the number of vertices we're going to send to each other process.
    int num_vertices_to_send[nprocs];
    memset(num_vertices_to_send, 0, sizeof(int) * nprocs);
    for (int v = 0; v < num_global_vertices; ++v)
      num_vertices_to_send[global_partition[v]]++;

    // Send this number.
    int n;
    MPI_Scatter(num_vertices_to_send, 1, MPI_INT, &n, 1, MPI_INT, 0, comm);

    // Now send the vertices to each process and register these sends
    // with the distributor.
    for (int p = 1; p < nprocs; ++p)
    {
      ASSERT(num_vertices_to_send[p] > 0);
      int vertices[num_vertices_to_send[p]], k = 0, p_tag = p;
      for (int i = 0; i < num_global_vertices; ++i)
      {
        if (global_partition[i] == p)
          vertices[k++] = i;
      }
      MPI_Send(vertices, num_vertices_to_send[p], MPI_INT, p, p_tag, comm);
      exchanger_set_send(distributor, p, vertices, num_vertices_to_send[p], true);
    }

    // Figure out the local vertices for rank 0.
    int num_local_vertices = num_vertices_to_send[0];
    int local_vertices[num_local_vertices], k = 0;
    for (int i = 0; i < num_global_vertices; ++i)
    {
      if (global_partition[i] == 0)
        local_vertices[k++] = i;
    }
  }
  else
  {
    // Get the number of vertices we will receive from rank 0.
    int num_local_vertices;
    MPI_Scatter(NULL, 1, MPI_INT, &num_local_vertices, 1, MPI_INT, 0, comm);

    // Now get the vertices.
    int local_vertices[num_local_vertices], p_tag = rank;
    MPI_Status status;
    MPI_Recv(local_vertices, num_local_vertices, MPI_INT, 0, p_tag, comm, &status);

    // Now register all the vertices we're receiving with the migrator.
    ASSERT(num_local_vertices > 0);
    exchanger_set_receive(distributor, 0, local_vertices, num_local_vertices, true);
  }

  STOP_FUNCTION_TIMER();
  return distributor;
}
Exemple #19
0
// This helper sets up the exchanger ex so that it can migrate data from the 
// current process according to the local partition vector.
exchanger_t* create_migrator(MPI_Comm comm,
                             int64_t* local_partition,
                             int num_vertices)
{
  START_FUNCTION_TIMER();
  exchanger_t* migrator = exchanger_new(comm);
  
  // Tally up the number of vertices we're going to send to every other process, 
  // including those that are staying on our own.
  int nprocs, rank;
  MPI_Comm_size(comm, &nprocs);
  MPI_Comm_rank(comm, &rank);
  int num_vertices_to_send[nprocs];
  memset(num_vertices_to_send, 0, sizeof(int) * nprocs);
  for (int v = 0; v < num_vertices; ++v)
    num_vertices_to_send[local_partition[v]]++;

  // Get the number of vertices we're going to receive from every other process.
  int num_vertices_to_receive[nprocs];
  MPI_Alltoall(num_vertices_to_send, 1, MPI_INT, 
               num_vertices_to_receive, 1, MPI_INT, comm);

  // Send and receive the actual vertices.
  int** receive_vertices = polymec_malloc(sizeof(int*) * nprocs);
  memset(receive_vertices, 0, sizeof(int*) * nprocs);
  int num_requests = 0;
  for (int p = 0; p < nprocs; ++p)
  {
    if ((rank != p) && (num_vertices_to_receive[p] > 0)) ++num_requests;
    if ((rank != p) && (num_vertices_to_send[p] > 0)) ++num_requests;
  }
  if (num_requests > 0)
  {
    MPI_Request requests[num_requests];
    int r = 0;
    for (int p = 0; p < nprocs; ++p)
    {
      if (num_vertices_to_receive[p] > 0)
      {
        receive_vertices[p] = polymec_malloc(sizeof(int) * num_vertices_to_receive[p]);
        if (p != rank)
        {
          // Note that we use this rank as our tag.
          MPI_Irecv(receive_vertices[p], num_vertices_to_receive[p], MPI_INT, p, rank, comm, &requests[r++]);
        }
      }
      if (num_vertices_to_send[p] > 0)
      {
        int send_vertices[num_vertices_to_send[p]], s = 0;
        for (int v = 0; v < num_vertices; ++v)
        {
          if (local_partition[v] == p)
            send_vertices[s++] = v;
        }
        if (p != rank)
        {
          // Note that we use the destination rank p as our tag.
          exchanger_set_send(migrator, p, send_vertices, num_vertices_to_send[p], true);
          MPI_Isend(send_vertices, num_vertices_to_send[p], MPI_INT, p, p, comm, &requests[r++]);
        }
        else
          memcpy(receive_vertices[rank], send_vertices, sizeof(int) * num_vertices_to_receive[rank]);
      }
    }
    ASSERT(r == num_requests);

    // Wait for exchanges to finish. We can't use MPI_Waitall here because 
    // not all processes necessary participate.
    MPI_Status statuses[num_requests];
    int finished[num_requests];
    memset(finished, 0, num_requests*sizeof(int));
    while (true)
    {
      bool all_finished = true;
      for (int i = 0; i < num_requests; ++i)
      {
        if (!finished[i])
        {
          if (MPI_Test(&requests[i], &finished[i], &statuses[i]) != MPI_SUCCESS)
            polymec_error("create_migrator: Internal error.");
          if (!finished[i]) all_finished = false;
        }
      }

      // If the transmissions have finished at this point, we 
      // can break out of the loop. 
      if (all_finished) break;
    } 

    // Now register all the vertices we're receiving with the migrator.
    for (int p = 0; p < nprocs; ++p)
    {
      if (num_vertices_to_receive[p] > 0)
      {
        ASSERT(receive_vertices[p] != NULL);
        if (rank != p)
          exchanger_set_receive(migrator, p, receive_vertices[p], num_vertices_to_receive[p], true);
        polymec_free(receive_vertices[p]);
      }
    }
    polymec_free(receive_vertices);
  }

  STOP_FUNCTION_TIMER();
  return migrator;
}
Exemple #20
0
int exchanger_start_metadata_transfer(exchanger_t* ex,
                                      void** send_arrays,
                                      void** receive_arrays,
                                      int stride,
                                      int tag,
                                      MPI_Datatype type,
                                      exchanger_metadata_dir direction)
{
#if POLYMEC_HAVE_MPI
  START_FUNCTION_TIMER();

  // Create a message using these metadata arrays, and set it up manually.
  mpi_message_t* msg = mpi_message_new(type, stride, tag);

  int num_sends = exchanger_num_sends(ex);
  int* dest_procs = polymec_malloc(sizeof(int) * num_sends);
  int* send_buffer_sizes = polymec_malloc(sizeof(int) * num_sends);
  int pos = 0, proc, *indices, num_indices, i = 0;
  while (exchanger_next_send(ex, &pos, &proc, &indices, &num_indices))
  {
    dest_procs[i] = proc;
    send_buffer_sizes[i++] = num_indices;
  }

  int num_receives = exchanger_num_receives(ex);
  int* source_procs = polymec_malloc(sizeof(int) * num_receives);
  int* receive_buffer_sizes = polymec_malloc(sizeof(int) * num_receives);
  pos = 0, i = 0;
  while (exchanger_next_receive(ex, &pos, &proc, &indices, &num_indices))
  {
    source_procs[i] = proc;
    receive_buffer_sizes[i++] = num_indices;
  }

  // Set things up based on the direction.
  if (direction == EX_METADATA_FORWARD)
  {
    msg->num_sends = num_sends;
    msg->dest_procs = dest_procs;
    msg->send_buffer_sizes = send_buffer_sizes;
    msg->send_buffers = send_arrays;
    msg->source_procs = source_procs;
    msg->receive_buffer_sizes = receive_buffer_sizes;
    msg->receive_buffers = receive_arrays;
  }
  else
  {
    // Reverse!
    msg->num_sends = num_receives;
    msg->dest_procs = source_procs;
    msg->send_buffer_sizes = receive_buffer_sizes;
    msg->send_buffers = receive_arrays;
    msg->source_procs = dest_procs;
    msg->receive_buffer_sizes = send_buffer_sizes;
    msg->receive_buffers = send_arrays;
  }

  // Allocate requests.
  msg->requests = polymec_malloc((msg->num_sends+msg->num_receives)*sizeof(MPI_Request));

  int token = exchanger_send_message(ex, NULL, msg);
  STOP_FUNCTION_TIMER();
  return token;
#else
  return 0;
#endif
}