void exchanger_transfer(exchanger_t* ex, void* data, int* count, int stride, int tag, MPI_Datatype type) { START_FUNCTION_TIMER(); int token = exchanger_start_transfer(ex, data, count, stride, tag, type); exchanger_finish_transfer(ex, token); STOP_FUNCTION_TIMER(); }
// This helper constructs and returns a point cloud from the points with the // given indices in the given point cloud. static point_cloud_t* create_subcloud(MPI_Comm comm, point_cloud_t* cloud, int* indices, int num_indices) { START_FUNCTION_TIMER(); // This is super easy--just pick out the points we want! point_cloud_t* subcloud = point_cloud_new(comm, num_indices); for (int i = 0; i < num_indices; ++i) subcloud->points[i] = cloud->points[indices[i]]; // Now copy properties using their serializers. int pos = 0; char* prop_name; void* prop_data; serializer_t* prop_ser; while (point_cloud_next_property(cloud, &pos, &prop_name, &prop_data, &prop_ser)) { if (prop_ser != NULL) { void* prop_data_copy = serializer_clone_object(prop_ser, prop_data); point_cloud_set_property(subcloud, prop_name, prop_data_copy, prop_ser); } else log_debug("create_subcloud: property '%s' has no serializer.", prop_name); } STOP_FUNCTION_TIMER(); return subcloud; }
void exchanger_finish_metadata_transfer(exchanger_t* ex, int token) { #if POLYMEC_HAVE_MPI START_FUNCTION_TIMER(); ASSERT(token >= 0); ASSERT(token < ex->num_pending_msgs); ASSERT(ex->transfer_counts[token] == NULL); // We can't finish a transfer as an exchange! ASSERT(ex->orig_buffers[token] == NULL); // This is a metadata transfer, right? // Retrieve the message for the given token. mpi_message_t* msg = ex->pending_msgs[token]; int err = exchanger_waitall(ex, msg); if (err != MPI_SUCCESS) return; // Release the send/receive buffers. msg->send_buffers = NULL; msg->receive_buffers = NULL; // Pull the message out of our list of pending messages and delete it. ex->pending_msgs[token] = NULL; mpi_message_free(msg); STOP_FUNCTION_TIMER(); #endif }
static bool newton_euler_step(void* context, real_t max_dt, real_t* t, real_t* x) { START_FUNCTION_TIMER(); euler_ode_t* integ = context; integ->dt = max_dt; int N_local = integ->num_local_values; int num_iters = 0; memcpy(integ->x_old, x, sizeof(real_t) * N_local); memcpy(integ->x_new, x, sizeof(real_t) * N_local); // Make sure the preconditioner computes P = I - dt * J. newton_pc_t* precond = newton_solver_preconditioner(integ->newton); newton_pc_fix_coefficients(precond, 1.0, -integ->dt, 0.0); // Now solve the thing, storing the solution in integ->x_new. bool solved = newton_solver_solve(integ->newton, *t + integ->dt, integ->x_new, &num_iters); if (solved) { // Increment the time and the solution. *t += integ->dt; memcpy(x, integ->x_new, sizeof(real_t) * integ->num_local_values); } STOP_FUNCTION_TIMER(); return solved; }
// Here's the same finite difference calculation for dF/d(xdot). static void cpr_finite_diff_dFdxdot_v(void* context, int (*F)(void* context, real_t t, real_t* x, real_t* xdot, real_t* F), real_t t, real_t* x, real_t* xdot, int num_local_rows, int num_remote_rows, real_t* v, real_t** work, real_t* dFdxdot_v) { START_FUNCTION_TIMER(); real_t eps = sqrt(UNIT_ROUNDOFF); real_t eps_inv = 1.0 / eps; // work[0] == v // work[1] contains F(t, x, xdot). // work[2] == xdot + eps*v // work[3] == F(t, x, xdot + eps*v) // xdot + eps*v -> work[2]. for (int i = 0; i < num_local_rows + num_remote_rows; ++i) work[2][i] = xdot[i] + eps*v[i]; // F(t, x, xdot + eps*v) -> work[3]. F(context, t, x, work[2], work[3]); // (F(t, x, xdot + eps*v) - F(t, x, xdot)) / eps -> dF/d(xdot) * v for (int i = 0; i < num_local_rows; ++i) dFdxdot_v[i] = (work[3][i] - work[1][i]) * eps_inv; STOP_FUNCTION_TIMER(); }
void gmls_functional_compute(gmls_functional_t* functional, int i, real_t t, multicomp_poly_basis_t* poly_basis, real_t* solution, real_t* lambdas) { START_FUNCTION_TIMER(); if (functional->surface_quad_rule != NULL) { surface_integral_set_domain(functional->surface_quad_rule, i); int num_quad_points = surface_integral_num_points(functional->surface_quad_rule); point_t quad_points[num_quad_points]; real_t quad_weights[num_quad_points]; vector_t quad_normals[num_quad_points]; surface_integral_get_quadrature(functional->surface_quad_rule, quad_points, quad_weights, quad_normals); compute_integral(functional, t, poly_basis, solution, quad_points, quad_weights, quad_normals, num_quad_points, lambdas); } else { volume_integral_set_domain(functional->volume_quad_rule, i); int num_quad_points = volume_integral_num_points(functional->volume_quad_rule); point_t quad_points[num_quad_points]; real_t quad_weights[num_quad_points]; volume_integral_get_quadrature(functional->volume_quad_rule, quad_points, quad_weights); compute_integral(functional, t, poly_basis, solution, quad_points, quad_weights, NULL, num_quad_points, lambdas); } STOP_FUNCTION_TIMER(); }
void exchanger_verify(exchanger_t* ex, void (*handler)(const char* format, ...)) { #if POLYMEC_HAVE_MPI START_FUNCTION_TIMER(); // An exchanger is valid/consistent iff the number of elements that // are exchanged between any two processors are agreed upon between those // two processors. So we send our expected number of exchanged elements // to our neighbors, and receive their numbers, and then compare. int num_send_procs = ex->send_map->size; int num_receive_procs = ex->receive_map->size; int num_sent_elements[num_send_procs], num_elements_expected_by_neighbors[num_send_procs], send_procs[num_send_procs]; MPI_Request requests[num_send_procs + num_receive_procs]; int pos = 0, proc, *indices, num_indices; // Post receives for numbers of elements we send to others. int p = 0; while (exchanger_next_send(ex, &pos, &proc, &indices, &num_indices)) { num_sent_elements[p] = num_indices; send_procs[p] = proc; int err = MPI_Irecv(&num_elements_expected_by_neighbors[p], 1, MPI_INT, proc, 0, ex->comm, &requests[p]); if (err != MPI_SUCCESS) polymec_error("exchanger_verify: Could not receive sent element data from %d", proc); ++p; } // Send our receive neighbors the number that we expect to receive. This // means that we will get the number that our neighbors expect to receive, // which should be the same number as what we're sending. pos = p = 0; while (exchanger_next_receive(ex, &pos, &proc, &indices, &num_indices)) { int err = MPI_Isend(&num_indices, 1, MPI_INT, proc, 0, ex->comm, &requests[num_send_procs + p]); if (err != MPI_SUCCESS) polymec_error("exchanger_verify: Could not send number of received elements to %d", proc); ++p; } // Wait for messages to fly. MPI_Status statuses[num_send_procs + num_receive_procs]; MPI_Waitall(num_send_procs + num_receive_procs, requests, statuses); // Now verify that the numbers are the same. for (p = 0; p < num_send_procs; ++p) { if (num_sent_elements[p] != num_elements_expected_by_neighbors[p]) { handler("exchanger_verify: Sending %d elements to proc %d, but %d elements are expected.", num_sent_elements[p], send_procs[p], num_elements_expected_by_neighbors[p]); } } STOP_FUNCTION_TIMER(); #endif }
cpr_differencer_t* cpr_differencer_new(MPI_Comm comm, void* F_context, int (*F)(void* context, real_t, real_t* x, real_t* Fval), int (*F_dae)(void* context, real_t, real_t* x, real_t* xdot, real_t* Fval), void (*F_dtor)(void* context), adj_graph_t* sparsity, int num_local_rows, int num_remote_rows) { ASSERT(num_local_rows > 0); ASSERT(num_remote_rows >= 0); // Exactly one of F and F_dae must be given. ASSERT((F != NULL) || (F_dae != NULL)); ASSERT((F == NULL) || (F_dae == NULL)); START_FUNCTION_TIMER(); cpr_differencer_t* diff = polymec_malloc(sizeof(cpr_differencer_t)); diff->comm = comm; diff->F_context = F_context; diff->F = F; diff->F_dae = F_dae; diff->F_dtor = F_dtor; // Steal the graph. diff->sparsity = sparsity; // The number of local rows is known at this point. diff->num_local_rows = num_local_rows; // However, we can't know the actual number of remote block rows without // knowing the specific communication pattern! However, it is safe to just // multiply the given number of remote block rows by the maximum block size, // since only the underlying RHS function will actually use the data. diff->num_remote_rows = num_remote_rows; // Assemble a graph coloring for any matrix we treat. diff->coloring = adj_graph_coloring_new(diff->sparsity, SMALLEST_LAST); // Get the maximum number of colors on all MPI processes so that we can // compute in lockstep. int num_colors = adj_graph_coloring_num_colors(diff->coloring); MPI_Allreduce(&num_colors, &diff->max_colors, 1, MPI_INT, MPI_MAX, diff->comm); log_debug("cpr_differencer: graph coloring produced %d colors.", diff->max_colors); // Make work vectors. diff->num_work_vectors = 4; diff->work = polymec_malloc(sizeof(real_t*) * diff->num_work_vectors); int N = diff->num_local_rows + diff->num_remote_rows; for (int i = 0; i < diff->num_work_vectors; ++i) diff->work[i] = polymec_malloc(sizeof(real_t) * N); diff->Jv = polymec_malloc(sizeof(real_t) * (diff->num_local_rows + diff->num_remote_rows)); STOP_FUNCTION_TIMER(); return diff; }
void gmls_functional_eval_integrands(gmls_functional_t* functional, real_t t, multicomp_poly_basis_t* poly_basis, point_t* x, vector_t* n, real_t* solution, real_t* integrands) { START_FUNCTION_TIMER(); functional->vtable.eval_integrands(functional->context, t, poly_basis, x, n, solution, integrands); STOP_FUNCTION_TIMER(); }
static int evaluate_residual(void* context, real_t t, real_t* x, real_t* R) { START_FUNCTION_TIMER(); euler_ode_t* integ = context; int status = eval_rhs(integ, t, x, R); if (status == 0) { for (int i = 0; i < integ->num_local_values; ++i) R[i] = x[i] - integ->x_old[i] - integ->dt * R[i]; } STOP_FUNCTION_TIMER(); return status; }
int exchanger_start_exchange(exchanger_t* ex, void* data, int stride, int tag, MPI_Datatype type) { #if POLYMEC_HAVE_MPI START_FUNCTION_TIMER(); // Create a message for this array. mpi_message_t* msg = mpi_message_new(type, stride, tag); mpi_message_pack(msg, data, ex->send_offset, ex->send_map, ex->receive_map); // Begin the transmission and allocate a token for it. int token = exchanger_send_message(ex, data, msg); STOP_FUNCTION_TIMER(); return token; #else return 0; #endif }
static void compute_integral(gmls_functional_t* functional, real_t t, multicomp_poly_basis_t* poly_basis, real_t* solution, point_t* quad_points, real_t* quad_weights, vector_t* quad_normals, int num_quad_points, real_t* lambdas) { START_FUNCTION_TIMER(); bool on_boundary = (quad_normals != NULL); // Loop through the points and compute the lambda matrix of functional // approximants. int num_comp = functional->num_comp; int basis_dim = multicomp_poly_basis_dim(poly_basis); memset(lambdas, 0, sizeof(real_t) * num_comp * basis_dim * num_comp); DECLARE_3D_ARRAY(real_t, lam, lambdas, num_comp, basis_dim, num_comp); for (int q = 0; q < num_quad_points; ++q) { point_t* xq = &quad_points[q]; real_t wq = quad_weights[q]; vector_t* nq = (on_boundary) ? &quad_normals[q] : NULL; // Now compute the (multi-component) integrands for the functional at // this point. real_t integrands[num_comp*basis_dim*num_comp]; functional->vtable.eval_integrands(functional->context, t, poly_basis, xq, nq, solution, integrands); // Integrate. DECLARE_3D_ARRAY(real_t, I, integrands, num_comp, num_comp, basis_dim); for (int i = 0; i < num_comp; ++i) for (int j = 0; j < basis_dim; ++j) for (int k = 0; k < num_comp; ++k) lam[i][j][k] += wq * I[i][k][j]; } STOP_FUNCTION_TIMER(); }
void exchanger_finish_exchange(exchanger_t* ex, int token) { #if POLYMEC_HAVE_MPI START_FUNCTION_TIMER(); ASSERT(token >= 0); ASSERT(token < ex->num_pending_msgs); ASSERT(ex->transfer_counts[token] == NULL); // We can't finish a transfer as an exchange! // Retrieve the message for the given token. mpi_message_t* msg = ex->pending_msgs[token]; int err = exchanger_waitall(ex, msg); if (err != MPI_SUCCESS) return; // Copy the received data into the original array. void* orig_buffer = ex->orig_buffers[token]; mpi_message_unpack(msg, orig_buffer, ex->receive_offset, ex->receive_map); // Pull the message out of our list of pending messages and delete it. ex->pending_msgs[token] = NULL; ex->orig_buffers[token] = NULL; mpi_message_free(msg); STOP_FUNCTION_TIMER(); #endif }
static int exchanger_send_message(exchanger_t* ex, void* data, mpi_message_t* msg) { START_FUNCTION_TIMER(); // If we are expecting data, post asynchronous receives. int j = 0; for (int i = 0; i < msg->num_receives; ++i) { ASSERT(ex->rank != msg->source_procs[i]); int err = MPI_Irecv(msg->receive_buffers[i], msg->stride * msg->receive_buffer_sizes[i], msg->type, msg->source_procs[i], msg->tag, ex->comm, &(msg->requests[j++])); if (err != MPI_SUCCESS) { int resultlen; char str[MPI_MAX_ERROR_STRING]; MPI_Error_string(err, str, &resultlen); char err_msg[1024]; snprintf(err_msg, 1024, "%d: MPI Error posting receive from %d: %d\n(%s)\n", ex->rank, msg->source_procs[i], err, str); polymec_error(err_msg); } } // Send the data asynchronously. for (int i = 0; i < msg->num_sends; ++i) { int err = MPI_Isend(msg->send_buffers[i], msg->stride * msg->send_buffer_sizes[i], msg->type, msg->dest_procs[i], msg->tag, ex->comm, &(msg->requests[j++])); if (err != MPI_SUCCESS) { int resultlen; char str[MPI_MAX_ERROR_STRING]; MPI_Error_string(err, str, &resultlen); char err_msg[1024]; snprintf(err_msg, 1024, "%d: MPI Error sending to %d: %d\n(%s)\n", ex->rank, msg->dest_procs[i], err, str); polymec_error(err_msg); } } msg->num_requests = j; // Allocate a token. int token = 0; while ((token < ex->num_pending_msgs) && (ex->pending_msgs[token] != NULL)) ++token; if (token == ex->num_pending_msgs) ++ex->num_pending_msgs; if (ex->num_pending_msgs == ex->pending_msg_cap) { ex->pending_msg_cap *= 2; ex->pending_msgs = polymec_realloc(ex->pending_msgs, ex->pending_msg_cap*sizeof(mpi_message_t)); ex->orig_buffers = polymec_realloc(ex->orig_buffers, ex->pending_msg_cap*sizeof(void*)); ex->transfer_counts = polymec_realloc(ex->transfer_counts, ex->pending_msg_cap*sizeof(int*)); } ex->pending_msgs[token] = msg; ex->orig_buffers[token] = data; STOP_FUNCTION_TIMER(); return token; }
int exchanger_start_metadata_transfer(exchanger_t* ex, void** send_arrays, void** receive_arrays, int stride, int tag, MPI_Datatype type, exchanger_metadata_dir direction) { #if POLYMEC_HAVE_MPI START_FUNCTION_TIMER(); // Create a message using these metadata arrays, and set it up manually. mpi_message_t* msg = mpi_message_new(type, stride, tag); int num_sends = exchanger_num_sends(ex); int* dest_procs = polymec_malloc(sizeof(int) * num_sends); int* send_buffer_sizes = polymec_malloc(sizeof(int) * num_sends); int pos = 0, proc, *indices, num_indices, i = 0; while (exchanger_next_send(ex, &pos, &proc, &indices, &num_indices)) { dest_procs[i] = proc; send_buffer_sizes[i++] = num_indices; } int num_receives = exchanger_num_receives(ex); int* source_procs = polymec_malloc(sizeof(int) * num_receives); int* receive_buffer_sizes = polymec_malloc(sizeof(int) * num_receives); pos = 0, i = 0; while (exchanger_next_receive(ex, &pos, &proc, &indices, &num_indices)) { source_procs[i] = proc; receive_buffer_sizes[i++] = num_indices; } // Set things up based on the direction. if (direction == EX_METADATA_FORWARD) { msg->num_sends = num_sends; msg->dest_procs = dest_procs; msg->send_buffer_sizes = send_buffer_sizes; msg->send_buffers = send_arrays; msg->source_procs = source_procs; msg->receive_buffer_sizes = receive_buffer_sizes; msg->receive_buffers = receive_arrays; } else { // Reverse! msg->num_sends = num_receives; msg->dest_procs = source_procs; msg->send_buffer_sizes = receive_buffer_sizes; msg->send_buffers = receive_arrays; msg->source_procs = dest_procs; msg->receive_buffer_sizes = send_buffer_sizes; msg->receive_buffers = send_arrays; } // Allocate requests. msg->requests = polymec_malloc((msg->num_sends+msg->num_receives)*sizeof(MPI_Request)); int token = exchanger_send_message(ex, NULL, msg); STOP_FUNCTION_TIMER(); return token; #else return 0; #endif }
// This helper sets up the exchanger ex so that it can migrate data from the // current process according to the local partition vector. exchanger_t* create_migrator(MPI_Comm comm, int64_t* local_partition, int num_vertices) { START_FUNCTION_TIMER(); exchanger_t* migrator = exchanger_new(comm); // Tally up the number of vertices we're going to send to every other process, // including those that are staying on our own. int nprocs, rank; MPI_Comm_size(comm, &nprocs); MPI_Comm_rank(comm, &rank); int num_vertices_to_send[nprocs]; memset(num_vertices_to_send, 0, sizeof(int) * nprocs); for (int v = 0; v < num_vertices; ++v) num_vertices_to_send[local_partition[v]]++; // Get the number of vertices we're going to receive from every other process. int num_vertices_to_receive[nprocs]; MPI_Alltoall(num_vertices_to_send, 1, MPI_INT, num_vertices_to_receive, 1, MPI_INT, comm); // Send and receive the actual vertices. int** receive_vertices = polymec_malloc(sizeof(int*) * nprocs); memset(receive_vertices, 0, sizeof(int*) * nprocs); int num_requests = 0; for (int p = 0; p < nprocs; ++p) { if ((rank != p) && (num_vertices_to_receive[p] > 0)) ++num_requests; if ((rank != p) && (num_vertices_to_send[p] > 0)) ++num_requests; } if (num_requests > 0) { MPI_Request requests[num_requests]; int r = 0; for (int p = 0; p < nprocs; ++p) { if (num_vertices_to_receive[p] > 0) { receive_vertices[p] = polymec_malloc(sizeof(int) * num_vertices_to_receive[p]); if (p != rank) { // Note that we use this rank as our tag. MPI_Irecv(receive_vertices[p], num_vertices_to_receive[p], MPI_INT, p, rank, comm, &requests[r++]); } } if (num_vertices_to_send[p] > 0) { int send_vertices[num_vertices_to_send[p]], s = 0; for (int v = 0; v < num_vertices; ++v) { if (local_partition[v] == p) send_vertices[s++] = v; } if (p != rank) { // Note that we use the destination rank p as our tag. exchanger_set_send(migrator, p, send_vertices, num_vertices_to_send[p], true); MPI_Isend(send_vertices, num_vertices_to_send[p], MPI_INT, p, p, comm, &requests[r++]); } else memcpy(receive_vertices[rank], send_vertices, sizeof(int) * num_vertices_to_receive[rank]); } } ASSERT(r == num_requests); // Wait for exchanges to finish. We can't use MPI_Waitall here because // not all processes necessary participate. MPI_Status statuses[num_requests]; int finished[num_requests]; memset(finished, 0, num_requests*sizeof(int)); while (true) { bool all_finished = true; for (int i = 0; i < num_requests; ++i) { if (!finished[i]) { if (MPI_Test(&requests[i], &finished[i], &statuses[i]) != MPI_SUCCESS) polymec_error("create_migrator: Internal error."); if (!finished[i]) all_finished = false; } } // If the transmissions have finished at this point, we // can break out of the loop. if (all_finished) break; } // Now register all the vertices we're receiving with the migrator. for (int p = 0; p < nprocs; ++p) { if (num_vertices_to_receive[p] > 0) { ASSERT(receive_vertices[p] != NULL); if (rank != p) exchanger_set_receive(migrator, p, receive_vertices[p], num_vertices_to_receive[p], true); polymec_free(receive_vertices[p]); } } polymec_free(receive_vertices); } STOP_FUNCTION_TIMER(); return migrator; }
// This helper sets up the exchanger ex so that it can distribute data from the // root process (0) according to the global partition vector. The partition // vector is NULL on all nonzero ranks and defined on rank 0. exchanger_t* create_distributor(MPI_Comm comm, int64_t* global_partition, int num_global_vertices) { START_FUNCTION_TIMER(); int nprocs, rank; MPI_Comm_size(comm, &nprocs); MPI_Comm_rank(comm, &rank); ASSERT((rank == 0) || (global_partition == NULL)); ASSERT((rank == 0) || (num_global_vertices == 0)); exchanger_t* distributor = exchanger_new(comm); if (rank == 0) { // Get the number of vertices we're going to send to each other process. int num_vertices_to_send[nprocs]; memset(num_vertices_to_send, 0, sizeof(int) * nprocs); for (int v = 0; v < num_global_vertices; ++v) num_vertices_to_send[global_partition[v]]++; // Send this number. int n; MPI_Scatter(num_vertices_to_send, 1, MPI_INT, &n, 1, MPI_INT, 0, comm); // Now send the vertices to each process and register these sends // with the distributor. for (int p = 1; p < nprocs; ++p) { ASSERT(num_vertices_to_send[p] > 0); int vertices[num_vertices_to_send[p]], k = 0, p_tag = p; for (int i = 0; i < num_global_vertices; ++i) { if (global_partition[i] == p) vertices[k++] = i; } MPI_Send(vertices, num_vertices_to_send[p], MPI_INT, p, p_tag, comm); exchanger_set_send(distributor, p, vertices, num_vertices_to_send[p], true); } // Figure out the local vertices for rank 0. int num_local_vertices = num_vertices_to_send[0]; int local_vertices[num_local_vertices], k = 0; for (int i = 0; i < num_global_vertices; ++i) { if (global_partition[i] == 0) local_vertices[k++] = i; } } else { // Get the number of vertices we will receive from rank 0. int num_local_vertices; MPI_Scatter(NULL, 1, MPI_INT, &num_local_vertices, 1, MPI_INT, 0, comm); // Now get the vertices. int local_vertices[num_local_vertices], p_tag = rank; MPI_Status status; MPI_Recv(local_vertices, num_local_vertices, MPI_INT, 0, p_tag, comm, &status); // Now register all the vertices we're receiving with the migrator. ASSERT(num_local_vertices > 0); exchanger_set_receive(distributor, 0, local_vertices, num_local_vertices, true); } STOP_FUNCTION_TIMER(); return distributor; }
static bool euler_step(void* context, real_t max_dt, real_t* t, real_t* x) { START_FUNCTION_TIMER(); euler_ode_t* integ = context; integ->dt = max_dt; real_t theta = integ->theta; int N_local = integ->num_local_values; if (theta == 0.0) { // No implicitness here! Just compute the RHS and mash it into // our solution. // Copy the solution into place. memcpy(integ->x_new, x, sizeof(real_t) * N_local); int status = eval_rhs(integ, *t, integ->x_new, integ->f1); if (status != 0) { log_debug("euler_ode_integrator: explicit call to RHS failed."); STOP_FUNCTION_TIMER(); return false; } for (int i = 0; i < N_local; ++i) x[i] += max_dt * integ->f1[i]; *t += max_dt; STOP_FUNCTION_TIMER(); return true; } else { real_t dt = max_dt; // Copy the solution into place. memcpy(integ->x_new, x, sizeof(real_t) * N_local); // Compute the right-hand function at t. real_t t1 = *t; int status = eval_rhs(integ, t1, integ->x_new, integ->f1); if (status != 0) { log_debug("euler_ode_integrator: implicit call to RHS at t failed."); STOP_FUNCTION_TIMER(); return false; } // Okay, let's do this iteration thing. bool converged = false; for (int i = 0; i < integ->max_iters; ++i) { // Copy our latest iterate into our previous one. memcpy(integ->x_old, integ->x_new, sizeof(real_t) * N_local); // Compute the right-hand function at t + dt. real_t t2 = t1 + dt; status = eval_rhs(integ, t2, integ->x_new, integ->f2); if (status != 0) { log_debug("euler_ode_integrator: implicit call to RHS at t + dt failed."); break; // Error, not converged. } // Update x_new: x_new <- x_orig + dt * RHS. for (int j = 0; j < N_local; ++j) integ->x_new[j] = x[j] + dt * ((1.0 - theta) * integ->f1[j] + theta * integ->f2[j]); // Compute the relative and absolute norms of the change to the solution. real_t rel_norm = 0.0, abs_norm = 0.0; integ->compute_norms(integ->comm, integ->x_old, integ->x_new, integ->num_local_values, &rel_norm, &abs_norm); log_debug("euler_ode_integrator: iteration %d/%d", i+1, integ->max_iters); log_debug(" rel_norm = %g, abs_norm = %g", rel_norm, abs_norm); // Check for convergence. if ((rel_norm < integ->rel_tol) && (abs_norm < integ->abs_tol)) { memcpy(x, integ->x_new, sizeof(real_t) * N_local); converged = true; *t += dt; break; } } STOP_FUNCTION_TIMER(); return converged; } }
void exchanger_finish_transfer(exchanger_t* ex, int token) { #if POLYMEC_HAVE_MPI START_FUNCTION_TIMER(); ASSERT(token >= 0); ASSERT(token < ex->num_pending_msgs); ASSERT(ex->transfer_counts[token] != NULL); // We can't finish an exchange as a transfer! // Retrieve the message for the given token. mpi_message_t* msg = ex->pending_msgs[token]; int* count = ex->transfer_counts[token]; int err = exchanger_waitall(ex, msg); if (err != MPI_SUCCESS) return; // Copy the received data into the original array. void* orig_buffer = ex->orig_buffers[token]; mpi_message_unpack(msg, orig_buffer, ex->receive_offset, ex->receive_map); // Now cull the sent elements from the array. int num_sent = 0, pos = 0, proc; exchanger_channel_t* c; while (exchanger_map_next(ex->send_map, &pos, &proc, &c)) { // Move the sent elements to the back. int stride = msg->stride; if (msg->type == MPI_REAL_T) { real_t* array = (real_t*)orig_buffer; for (int i = 0; i < c->num_indices; ++i) { for (int s = 0; s < stride; ++s) { array[stride*c->indices[i]+s] = array[*count-1-num_sent]; ++num_sent; } } } else if (msg->type == MPI_DOUBLE) { double* array = (double*)orig_buffer; for (int i = 0; i < c->num_indices; ++i) { for (int s = 0; s < stride; ++s) { array[stride*c->indices[i]+s] = array[*count-1-num_sent]; ++num_sent; } } } else { // FIXME: What about other data types??? polymec_error("exchanger_finish_transfer: unimplemented data type."); } } // Convey the change in count of the transferred data. *count -= num_sent; ASSERT(*count >= 0); // Pull the message out of our list of pending messages and delete it. ex->pending_msgs[token] = NULL; ex->orig_buffers[token] = NULL; ex->transfer_counts[token] = NULL; mpi_message_free(msg); STOP_FUNCTION_TIMER(); #endif }
void cpr_differencer_compute(cpr_differencer_t* diff, real_t alpha, real_t beta, real_t gamma, real_t t, real_t* x, real_t* xdot, local_matrix_t* matrix) { START_FUNCTION_TIMER(); adj_graph_coloring_t* coloring = diff->coloring; real_t** work = diff->work; // Normalize F. int (*F)(void* context, real_t t, real_t* x, real_t* xdot, real_t* Fval); void* F_context; if (diff->F_dae != NULL) { ASSERT(xdot != NULL); F = diff->F_dae; F_context = diff->F_context; } else { ASSERT(gamma == 0.0); ASSERT(xdot == NULL); F = F_adaptor; F_context = diff; } // First, zero the matrix. local_matrix_zero(matrix); // If all the coefficients are zero, we're finished! if ((alpha == 0.0) && (beta == 0.0) && (gamma == 0.0)) { STOP_FUNCTION_TIMER(); return; } // Then set up an identity matrix. local_matrix_add_identity(matrix, alpha); // If beta and gamma are zero, we're finished! if ((beta == 0.0) && (gamma == 0.0)) { STOP_FUNCTION_TIMER(); return; } // Keep track of the number of function evaluations. int num_F_evals = 0; // We compute the system Jacobian using the method described in // Curtis, Powell, and Reed. if ((alpha != 0.0) && (beta != 0.0) && (gamma != 0.0)) log_debug("cpr_differencer: approximating J = %g * I + %g * dF/dx + %g * dF/d(xdot)...", alpha, beta, gamma); else if ((alpha == 0.0) && (beta != 0.0) && (gamma != 0.0)) log_debug("cpr_differencer: approximating J = %g * dF/dx + %g * dF/d(xdot)...", beta, gamma); else if ((alpha == 0.0) && (beta == 0.0) && (gamma != 0.0)) log_debug("cpr_differencer: approximating J = %g * dF/d(xdot)...", gamma); else if ((alpha != 0.0) && (beta != 0.0)) log_debug("cpr_differencer: approximating J = %g * I + %g * dF/dx...", alpha, beta); else if ((alpha == 0.0) && (beta != 0.0)) log_debug("cpr_differencer: approximating J = %g * dF/dx...", beta); // Now iterate over all of the colors in our coloring. int num_colors = adj_graph_coloring_num_colors(coloring); for (int c = 0; c < num_colors; ++c) { // We construct d, the binary vector corresponding to this color, in work[0]. memset(work[0], 0, sizeof(real_t) * (diff->num_local_rows + diff->num_remote_rows)); int pos = 0, i; while (adj_graph_coloring_next_vertex(coloring, c, &pos, &i)) work[0][i] = 1.0; // We evaluate F(t, x, xdot) and place it into work[1]. F(F_context, t, x, xdot, work[1]); ++num_F_evals; // Evaluate dF/dx * d. memset(diff->Jv, 0, sizeof(real_t) * diff->num_local_rows); cpr_finite_diff_dFdx_v(F_context, F, t, x, xdot, diff->num_local_rows, diff->num_remote_rows, work[0], work, diff->Jv); ++num_F_evals; // Add the column vector J*v into our matrix. pos = 0; while (adj_graph_coloring_next_vertex(coloring, c, &pos, &i)) local_matrix_add_column_vector(matrix, beta, i, diff->Jv); if ((gamma != 0.0) && (xdot != NULL)) { // Now evaluate dF/d(xdot) * d. memset(diff->Jv, 0, sizeof(real_t) * diff->num_local_rows); cpr_finite_diff_dFdxdot_v(F_context, F, t, x, xdot, diff->num_local_rows, diff->num_remote_rows, work[0], work, diff->Jv); ++num_F_evals; // Add in the column vector. pos = 0; while (adj_graph_coloring_next_vertex(coloring, c, &pos, &i)) local_matrix_add_column_vector(matrix, gamma, i, diff->Jv); } } // Now call the RHS functions in the same way as we would for all the colors // we don't have, up through the maximum number, so our neighboring // processes can get exchanged data from us if they need it. int num_neighbor_colors = diff->max_colors - num_colors; for (int c = 0; c < num_neighbor_colors; ++c) { F(F_context, t, x, xdot, work[1]); ++num_F_evals; cpr_finite_diff_dFdx_v(F_context, F, t, x, xdot, diff->num_local_rows, diff->num_remote_rows, work[0], work, diff->Jv); ++num_F_evals; if ((gamma != 0.0) && (xdot != NULL)) { cpr_finite_diff_dFdxdot_v(F_context, F, t, x, xdot, diff->num_local_rows, diff->num_remote_rows, work[0], work, diff->Jv); ++num_F_evals; } } log_debug("cpr_differencer: Evaluated F %d times.", num_F_evals); STOP_FUNCTION_TIMER(); }