static p4est_locidx_t mesh_corner_allocate (p4est_mesh_t * mesh, p4est_locidx_t clen, p4est_locidx_t ** pcquad, int8_t ** pccorner) { p4est_locidx_t cornerid, cstart, cend; P4EST_ASSERT (clen > 0); P4EST_ASSERT (mesh->corner_offset->elem_count == (size_t) (mesh->local_num_corners + 1)); cornerid = mesh->local_num_corners++; cstart = *(p4est_locidx_t *) sc_array_index (mesh->corner_offset, cornerid); cend = cstart + clen; *(p4est_locidx_t *) sc_array_push (mesh->corner_offset) = cend; P4EST_ASSERT (mesh->corner_offset->elem_count == (size_t) (mesh->local_num_corners + 1)); P4EST_ASSERT (mesh->corner_quad->elem_count == (size_t) cstart); *pcquad = (p4est_locidx_t *) sc_array_push_count (mesh->corner_quad, clen); P4EST_ASSERT (mesh->corner_quad->elem_count == (size_t) cend); P4EST_ASSERT (mesh->corner_corner->elem_count == (size_t) cstart); *pccorner = (int8_t *) sc_array_push_count (mesh->corner_corner, clen); P4EST_ASSERT (mesh->corner_corner->elem_count == (size_t) cend); return cornerid; }
static void p6est_profile_balance_self_one_pass (sc_array_t * read, sc_array_t * write) { int stackcount; int8_t n, newn, p, l; int8_t *wc; size_t count = read->elem_count; size_t zy; P4EST_ASSERT (SC_ARRAY_IS_OWNER (write)); P4EST_ASSERT (read->elem_size == sizeof (int8_t)); P4EST_ASSERT (write->elem_size == sizeof (int8_t)); sc_array_truncate (write); wc = (int8_t *) sc_array_push (write); n = *((int8_t *) sc_array_index (read, count - 1)); *wc = l = n; for (zy = 1; zy < count; zy++) { n = *((int8_t *) sc_array_index (read, count - 1 - zy)); p = l - 1; newn = SC_MAX (p, n); stackcount = newn - n; wc = (int8_t *) sc_array_push_count (write, 1 + stackcount); *wc = l = newn; while (stackcount--) { *(++wc) = l = newn--; } } }
static void p6est_profile_balance_full_one_pass (sc_array_t * read, sc_array_t * write, p4est_qcoord_t readh) { int8_t *wc; size_t count; int stackcount; int8_t n, nn, newn, p, l, prevl, nextl; size_t zy; P4EST_ASSERT (SC_ARRAY_IS_OWNER (write)); P4EST_ASSERT (read->elem_size == sizeof (int8_t)); P4EST_ASSERT (write->elem_size == sizeof (int8_t)); count = read->elem_count; sc_array_truncate (write); l = 0; zy = 0; while (zy < count) { n = *((int8_t *) sc_array_index (read, count - 1 - zy++)); if (n && !(readh & P4EST_QUADRANT_LEN (n))) { P4EST_ASSERT (zy < count); nn = *((int8_t *) sc_array_index (read, count - 1 - zy)); if (n == nn) { if (zy > 1) { prevl = *((int8_t *) sc_array_index (read, count - 1 - (zy - 2))); } else { prevl = -1; } if (zy < count - 1) { nextl = *((int8_t *) sc_array_index (read, count - 1 - (zy + 1))); } else { nextl = -1; } if (n >= SC_MAX (nextl, prevl) - 1) { zy++; n--; } } } readh += P4EST_QUADRANT_LEN (n); p = l - 1; newn = SC_MAX (p, n); stackcount = newn - n; wc = (int8_t *) sc_array_push_count (write, 1 + stackcount); *wc = l = newn; while (stackcount--) { *(++wc) = l = newn--; } } }
int p6est_profile_sync (p6est_profile_t * profile) { p4est_lnodes_t *lnodes = profile->lnodes; p4est_locidx_t nln = lnodes->num_local_nodes; sc_array_t lrview; p4est_lnodes_buffer_t *countbuf; sc_array_t *sharers; size_t zz, nsharers; int nleft; int8_t *recv, *send; int *array_of_indices; p4est_locidx_t recv_total; p4est_locidx_t *recv_offsets, recv_offset; p4est_locidx_t send_total; p4est_locidx_t *send_offsets, send_offset; p4est_locidx_t (*lr)[2]; sc_array_t *lc = profile->lnode_columns; sc_MPI_Request *recv_request, *send_request; sc_array_t *work; int any_change = 0; int any_global_change; int mpiret, mpirank; int evenodd = profile->evenodd; lr = (p4est_locidx_t (*)[2]) profile->lnode_ranges; sharers = lnodes->sharers; nsharers = sharers->elem_count; mpiret = sc_MPI_Comm_rank (lnodes->mpicomm, &mpirank); SC_CHECK_MPI (mpiret); sc_array_init_data (&lrview, lr, 2 * sizeof (p4est_locidx_t), nln); countbuf = p4est_lnodes_share_all_begin (&lrview, lnodes); send_offsets = P4EST_ALLOC (p4est_locidx_t, nsharers + 1); send_offset = 0; for (zz = 0; zz < nsharers; zz++) { p4est_lnodes_rank_t *sharer; sc_array_t *send_buf; size_t zy, nnodes; send_offsets[zz] = send_offset; sharer = p4est_lnodes_rank_array_index (sharers, zz); if (sharer->rank == mpirank) { continue; } send_buf = (sc_array_t *) sc_array_index (countbuf->send_buffers, zz); nnodes = sharer->shared_nodes.elem_count; P4EST_ASSERT (nnodes == send_buf->elem_count); P4EST_ASSERT (send_buf->elem_size == 2 * sizeof (p4est_locidx_t)); for (zy = 0; zy < nnodes; zy++) { p4est_locidx_t *lp = (p4est_locidx_t *) sc_array_index (send_buf, zy); P4EST_ASSERT (lp[0] >= 0); P4EST_ASSERT (lp[1] >= 0); send_offset += lp[1]; } } send_total = send_offsets[nsharers] = send_offset; p4est_lnodes_share_all_end (countbuf); recv_offsets = P4EST_ALLOC (p4est_locidx_t, nsharers + 1); recv_offset = 0; for (zz = 0; zz < nsharers; zz++) { p4est_lnodes_rank_t *sharer; sc_array_t *recv_buf; size_t zy, nnodes; recv_offsets[zz] = recv_offset; sharer = p4est_lnodes_rank_array_index (sharers, zz); if (sharer->rank == mpirank) { continue; } recv_buf = (sc_array_t *) sc_array_index (countbuf->recv_buffers, zz); nnodes = sharer->shared_nodes.elem_count; P4EST_ASSERT (nnodes == recv_buf->elem_count); P4EST_ASSERT (recv_buf->elem_size == 2 * sizeof (p4est_locidx_t)); for (zy = 0; zy < nnodes; zy++) { p4est_locidx_t *lp = (p4est_locidx_t *) sc_array_index (recv_buf, zy); P4EST_ASSERT (lp[0] >= 0); P4EST_ASSERT (lp[1] >= 0); recv_offset += lp[1]; } } recv_total = recv_offsets[nsharers] = recv_offset; recv = P4EST_ALLOC (int8_t, recv_total); recv_request = P4EST_ALLOC (sc_MPI_Request, nsharers); send = P4EST_ALLOC (int8_t, send_total); send_request = P4EST_ALLOC (sc_MPI_Request, nsharers); /* post receives */ nleft = 0; for (zz = 0; zz < nsharers; zz++) { p4est_lnodes_rank_t *sharer; int icount = recv_offsets[zz + 1] - recv_offsets[zz]; sharer = p4est_lnodes_rank_array_index (sharers, zz); if (sharer->rank == mpirank) { recv_request[zz] = sc_MPI_REQUEST_NULL; continue; } if (icount) { mpiret = sc_MPI_Irecv (recv + recv_offsets[zz], icount * sizeof (int8_t), sc_MPI_BYTE, sharer->rank, P6EST_COMM_BALANCE, lnodes->mpicomm, recv_request + zz); SC_CHECK_MPI (mpiret); nleft++; } else { recv_request[zz] = sc_MPI_REQUEST_NULL; } } /* post sends */ for (zz = 0; zz < nsharers; zz++) { p4est_lnodes_rank_t *sharer; size_t zy, nnodes; int icount; sc_array_t *shared_nodes; sharer = p4est_lnodes_rank_array_index (sharers, zz); if (sharer->rank == mpirank) { send_request[zz] = sc_MPI_REQUEST_NULL; continue; } shared_nodes = &sharer->shared_nodes; nnodes = shared_nodes->elem_count; icount = 0; for (zy = 0; zy < nnodes; zy++) { p4est_locidx_t nidx; int8_t *c; nidx = *((p4est_locidx_t *) sc_array_index (shared_nodes, zy)); if (lr[nidx][1]) { c = (int8_t *) sc_array_index (lc, lr[nidx][0]); memcpy (send + send_offsets[zz] + icount, c, lr[nidx][1] * sizeof (int8_t)); icount += lr[nidx][1]; } else { P4EST_ASSERT (!lr[nidx][0]); } } P4EST_ASSERT (icount == send_offsets[zz + 1] - send_offsets[zz]); if (icount) { mpiret = sc_MPI_Isend (send + send_offsets[zz], icount * sizeof (int8_t), sc_MPI_BYTE, sharer->rank, P6EST_COMM_BALANCE, lnodes->mpicomm, send_request + zz); SC_CHECK_MPI (mpiret); } else { send_request[zz] = sc_MPI_REQUEST_NULL; } } work = sc_array_new (sizeof (int8_t)); array_of_indices = P4EST_ALLOC (int, nsharers); while (nleft) { int outcount; int i; mpiret = sc_MPI_Waitsome (nsharers, recv_request, &outcount, array_of_indices, sc_MPI_STATUSES_IGNORE); SC_CHECK_MPI (mpiret); for (i = 0; i < outcount; i++) { p4est_lnodes_rank_t *sharer; size_t zy, nnode; sc_array_t *shared_nodes; sc_array_t *recv_buf; zz = array_of_indices[i]; sharer = p4est_lnodes_rank_array_index (sharers, zz); shared_nodes = &sharer->shared_nodes; recv_buf = (sc_array_t *) sc_array_index (countbuf->recv_buffers, zz); nnode = shared_nodes->elem_count; P4EST_ASSERT (nnode == recv_buf->elem_count); recv_offset = recv_offsets[zz]; for (zy = 0; zy < nnode; zy++) { p4est_locidx_t *lp; p4est_locidx_t nidx; sc_array_t oldview, newview; nidx = *((p4est_locidx_t *) sc_array_index (shared_nodes, zy)); lp = (p4est_locidx_t *) sc_array_index (recv_buf, zy); sc_array_init_view (&oldview, lc, lr[nidx][0], lr[nidx][1]); sc_array_init_data (&newview, recv + recv_offset, sizeof (int8_t), lp[1]); if (profile->ptype == P6EST_PROFILE_UNION) { p6est_profile_union (&oldview, &newview, work); if (work->elem_count > oldview.elem_count) { int8_t *c; any_change = 1; lr[nidx][0] = lc->elem_count; lr[nidx][1] = work->elem_count; profile->lnode_changed[evenodd][nidx] = 1; c = (int8_t *) sc_array_push_count (lc, work->elem_count); memcpy (c, work->array, work->elem_count * work->elem_size); } } else { p6est_profile_intersection (&oldview, &newview, work); P4EST_ASSERT (work->elem_count <= oldview.elem_count); if (work->elem_count < oldview.elem_count) { lr[nidx][1] = work->elem_count; memcpy (oldview.array, work->array, work->elem_count * work->elem_size); } } recv_offset += lp[1]; } P4EST_ASSERT (recv_offset == recv_offsets[zz + 1]); } nleft -= outcount; P4EST_ASSERT (nleft >= 0); } P4EST_FREE (array_of_indices); sc_array_destroy (work); p6est_profile_compress (profile); p4est_lnodes_buffer_destroy (countbuf); P4EST_FREE (recv_request); P4EST_FREE (recv_offsets); P4EST_FREE (recv); { mpiret = sc_MPI_Waitall (nsharers, send_request, sc_MPI_STATUSES_IGNORE); SC_CHECK_MPI (mpiret); P4EST_FREE (send_request); P4EST_FREE (send_offsets); P4EST_FREE (send); any_global_change = any_change; mpiret = sc_MPI_Allreduce (&any_change, &any_global_change, 1, sc_MPI_INT, sc_MPI_LOR, lnodes->mpicomm); SC_CHECK_MPI (mpiret); } return any_global_change; }
void p6est_profile_balance_local (p6est_profile_t * profile) { p4est_lnodes_t *lnodes = profile->lnodes; p4est_locidx_t nln, nle; p4est_locidx_t *en, (*lr)[2]; sc_array_t *lc; int i, j; p4est_locidx_t nidx, enidx, eidx; p8est_connect_type_t btype = profile->btype; p4est_connect_type_t hbtype; int8_t *c; sc_array_t *thisprof; sc_array_t *selfprof; sc_array_t *faceprof; sc_array_t *cornerprof; sc_array_t *work; sc_array_t oldprof; sc_array_t testprof; int any_prof_change; int any_local_change; int evenodd = profile->evenodd; p4est_qcoord_t diff = profile->diff; P4EST_ASSERT (profile->lnodes->degree == 2); if (btype == P8EST_CONNECT_FACE) { hbtype = P4EST_CONNECT_FACE; } else { hbtype = P4EST_CONNECT_FULL; } en = lnodes->element_nodes; nln = lnodes->num_local_nodes; nle = lnodes->num_local_elements; lr = (p4est_locidx_t (*)[2]) profile->lnode_ranges; lc = profile->lnode_columns; selfprof = sc_array_new (sizeof (int8_t)); work = sc_array_new (sizeof (int8_t)); faceprof = sc_array_new (sizeof (int8_t)); cornerprof = sc_array_new (sizeof (int8_t)); do { /* We read from evenodd and write to evenodd ^ 1 */ memset (&(profile->lnode_changed[evenodd ^ 1][0]), 0, sizeof (int) * nln); P4EST_GLOBAL_VERBOSE ("p6est_balance local loop\n"); any_local_change = 0; for (eidx = 0, enidx = 0; eidx < nle; eidx++) { p4est_locidx_t start_enidx = enidx; nidx = en[start_enidx + P4EST_INSUL / 2]; P4EST_ASSERT (lr[nidx][1]); sc_array_init_view (&oldprof, lc, lr[nidx][0], lr[nidx][1]); thisprof = &oldprof; any_prof_change = 0; for (j = 0; j < 3; j++) { for (i = 0; i < 3; i++, enidx++) { nidx = en[enidx]; if (!profile->lnode_changed[evenodd][nidx]) { /* if the profile hasn't changed since I wrote to it, there's no * need to balance against it */ continue; } if (i != 1 && j != 1) { if (hbtype == P4EST_CONNECT_FACE) { /* skip corners if we don't need to balance them */ P4EST_ASSERT (!lr[nidx][0]); P4EST_ASSERT (!lr[nidx][1]); continue; } } if (i == 1 && j == 1) { /* no need to further balance against oneself */ continue; } P4EST_ASSERT (lr[nidx][1]); P4EST_ASSERT (profile->enode_counts[enidx] <= lr[nidx][1]); if (profile->enode_counts[enidx] == lr[nidx][1]) { /* if the profile hasn't changed since I wrote to it, there's no * need to balance against it */ continue; } sc_array_init_view (&testprof, lc, lr[nidx][0], lr[nidx][1]); p6est_profile_union (thisprof, &testprof, work); if (work->elem_count > thisprof->elem_count) { P4EST_ASSERT (profile->lnode_changed[evenodd][nidx]); any_prof_change = 1; sc_array_copy (selfprof, work); thisprof = selfprof; } } } if (any_prof_change) { P4EST_ASSERT (thisprof == selfprof); P4EST_ASSERT (selfprof->elem_count > oldprof.elem_count); /* update */ if (btype == P8EST_CONNECT_FACE) { p6est_profile_balance_face (selfprof, faceprof, work, diff); } else { p6est_profile_balance_full (selfprof, faceprof, work, diff); } if (btype == P8EST_CONNECT_EDGE) { p6est_profile_balance_face (selfprof, cornerprof, work, diff); } else if (btype == P8EST_CONNECT_FULL) { p6est_profile_balance_full (selfprof, cornerprof, work, diff); } enidx = start_enidx; for (j = 0; j < 3; j++) { for (i = 0; i < 3; i++, enidx++) { thisprof = NULL; nidx = en[enidx]; if (i != 1 && j != 1) { if (hbtype == P4EST_CONNECT_FACE) { /* skip corners if we don't need to balance them */ P4EST_ASSERT (!lr[nidx][0]); P4EST_ASSERT (!lr[nidx][1]); continue; } else { thisprof = cornerprof; } } else if (i == 1 && j == 1) { thisprof = selfprof; } else { thisprof = faceprof; } P4EST_ASSERT (lr[nidx][1]); /* if this node has been initialized, combine the two profiles, * taking the finer layers from each */ sc_array_init_view (&oldprof, lc, lr[nidx][0], lr[nidx][1]); if (i == 1 && j == 1) { sc_array_copy (work, thisprof); } else { p6est_profile_union (thisprof, &oldprof, work); } if (work->elem_count > oldprof.elem_count) { if (!(i == 1 && j == 1)) { /* we don't count changing self */ profile->lnode_changed[evenodd ^ 1][nidx] = 1; any_local_change = 1; } lr[nidx][0] = lc->elem_count; lr[nidx][1] = work->elem_count; c = (int8_t *) sc_array_push_count (lc, work->elem_count); memcpy (c, work->array, work->elem_count * work->elem_size); } profile->enode_counts[enidx] = lr[nidx][1]; } } } } p6est_profile_compress (profile); evenodd ^= 1; } while (any_local_change); profile->evenodd = evenodd; sc_array_destroy (selfprof); sc_array_destroy (faceprof); sc_array_destroy (cornerprof); sc_array_destroy (work); }
p6est_profile_t * p6est_profile_new_local (p6est_t * p6est, p6est_ghost_t * ghost, p6est_profile_type_t ptype, p8est_connect_type_t btype, int degree) { p6est_profile_t *profile = P4EST_ALLOC (p6est_profile_t, 1); p4est_lnodes_t *lnodes; p4est_locidx_t nln, nle; p4est_topidx_t jt; p4est_t *columns = p6est->columns; p4est_tree_t *tree; sc_array_t *tquadrants; p4est_quadrant_t *col; p4est_qcoord_t diff = P4EST_ROOT_LEN - p6est->root_len; size_t first, last, count, zz, zy; p4est_locidx_t *en, (*lr)[2]; sc_array_t *lc; int i, j; p2est_quadrant_t *layer; sc_array_t *layers = p6est->layers; p4est_locidx_t nidx, enidx; p4est_connect_type_t hbtype; int8_t *c; sc_array_t *thisprof; sc_array_t *selfprof; sc_array_t *faceprof; sc_array_t *cornerprof; sc_array_t *work; sc_array_t oldprof; const int Nrp = degree + 1; P4EST_ASSERT (degree > 1); profile->ptype = ptype; profile->btype = btype; profile->lnode_changed[0] = NULL; profile->lnode_changed[1] = NULL; profile->enode_counts = NULL; profile->diff = diff; if (btype == P8EST_CONNECT_FACE) { hbtype = P4EST_CONNECT_FACE; } else { hbtype = P4EST_CONNECT_FULL; } if (ghost == NULL) { profile->cghost = p4est_ghost_new (p6est->columns, P4EST_CONNECT_FULL); profile->ghost_owned = 1; } else { P4EST_ASSERT (ghost->column_ghost->btype == P4EST_CONNECT_FULL); profile->cghost = ghost->column_ghost; profile->ghost_owned = 0; } if (ptype == P6EST_PROFILE_UNION) { P4EST_ASSERT (degree == 2); } profile->lnodes = lnodes = p4est_lnodes_new (p6est->columns, profile->cghost, degree); en = lnodes->element_nodes; nln = lnodes->num_local_nodes; nle = lnodes->num_local_elements; profile->lnode_ranges = P4EST_ALLOC_ZERO (p4est_locidx_t, 2 * nln); lr = (p4est_locidx_t (*)[2]) profile->lnode_ranges; profile->lnode_columns = lc = sc_array_new (sizeof (int8_t)); selfprof = sc_array_new (sizeof (int8_t)); work = sc_array_new (sizeof (int8_t)); faceprof = sc_array_new (sizeof (int8_t)); cornerprof = sc_array_new (sizeof (int8_t)); if (ptype == P6EST_PROFILE_UNION) { profile->lnode_changed[0] = P4EST_ALLOC (p4est_locidx_t, nln); profile->lnode_changed[1] = P4EST_ALLOC (p4est_locidx_t, nln); profile->enode_counts = P4EST_ALLOC (p4est_locidx_t, P4EST_INSUL * nle); profile->evenodd = 0; memset (profile->lnode_changed[0], -1, nln * sizeof (int)); } /* create the profiles for each node: layers are reduced to just their level * */ for (enidx = 0, jt = columns->first_local_tree; jt <= columns->last_local_tree; ++jt) { tree = p4est_tree_array_index (columns->trees, jt); tquadrants = &tree->quadrants; for (zz = 0; zz < tquadrants->elem_count; ++zz) { col = p4est_quadrant_array_index (tquadrants, zz); P6EST_COLUMN_GET_RANGE (col, &first, &last); count = last - first; sc_array_truncate (selfprof); c = (int8_t *) sc_array_push_count (selfprof, count); for (zy = first; zy < last; zy++) { layer = p2est_quadrant_array_index (layers, zy); *(c++) = layer->level; } if (ptype == P6EST_PROFILE_UNION) { p6est_profile_balance_self (selfprof, work); if (btype == P8EST_CONNECT_FACE) { p6est_profile_balance_face (selfprof, faceprof, work, diff); } else { p6est_profile_balance_full (selfprof, faceprof, work, diff); } if (btype == P8EST_CONNECT_EDGE) { p6est_profile_balance_face (selfprof, cornerprof, work, diff); } else if (btype == P8EST_CONNECT_FULL) { p6est_profile_balance_full (selfprof, cornerprof, work, diff); } } for (j = 0; j < Nrp; j++) { for (i = 0; i < Nrp; i++, enidx++) { nidx = en[enidx]; if (ptype == P6EST_PROFILE_UNION) { thisprof = NULL; if (!(i % degree) && !(j % degree)) { if (hbtype == P4EST_CONNECT_FACE) { /* skip corners if we don't need to balance them */ P4EST_ASSERT (!lr[nidx][0]); P4EST_ASSERT (!lr[nidx][1]); continue; } else { thisprof = cornerprof; } } else if ((i % degree) && (j % degree)) { thisprof = selfprof; } else { thisprof = faceprof; } count = thisprof->elem_count; profile->enode_counts[enidx] = count; if (!lr[nidx][1]) { /* if this node has not yet been initialized, initialize it */ lr[nidx][0] = lc->elem_count; lr[nidx][1] = count; c = (int8_t *) sc_array_push_count (lc, count); memcpy (c, thisprof->array, count * sizeof (int8_t)); } else { /* if this node has been initialized, combine the two profiles, * taking the finer layers from each */ sc_array_init_view (&oldprof, lc, lr[nidx][0], lr[nidx][1]); p6est_profile_union (thisprof, &oldprof, work); if (work->elem_count > oldprof.elem_count) { lr[nidx][0] = lc->elem_count; lr[nidx][1] = work->elem_count; c = (int8_t *) sc_array_push_count (lc, work->elem_count); memcpy (c, work->array, work->elem_count * work->elem_size); } } } else { count = selfprof->elem_count; if (!lr[nidx][1]) { /* if this node has not yet been initialized, initialize it */ lr[nidx][0] = lc->elem_count; lr[nidx][1] = count; c = (int8_t *) sc_array_push_count (lc, count); memcpy (c, selfprof->array, count * sizeof (int8_t)); } else { /* if this node has been initialized, combine the two profiles, * taking the coarser layers from each */ sc_array_init_view (&oldprof, lc, lr[nidx][0], lr[nidx][1]); p6est_profile_intersection (selfprof, &oldprof, work); P4EST_ASSERT (work->elem_count <= oldprof.elem_count); if (work->elem_count < oldprof.elem_count) { lr[nidx][1] = work->elem_count; memcpy (oldprof.array, work->array, work->elem_count * work->elem_size); } } } } } } } p6est_profile_compress (profile); sc_array_destroy (selfprof); sc_array_destroy (faceprof); sc_array_destroy (cornerprof); sc_array_destroy (work); return profile; }
void p6est_refine_to_profile (p6est_t * p6est, p6est_profile_t * profile, p6est_init_t init_fn, p6est_replace_t replace_fn) { size_t zz, zy, first, last; p4est_topidx_t jt; p4est_quadrant_t *col; p4est_tree_t *tree; sc_array_t *tquadrants; p4est_locidx_t eidx; p4est_locidx_t *en = profile->lnodes->element_nodes; p4est_locidx_t (*lr)[2]; p4est_locidx_t nidx, pidx, pfirst, plast; sc_array_t *layers = p6est->layers; sc_array_t *lc = profile->lnode_columns; sc_array_t *work; P4EST_ASSERT (profile->lnodes->degree == 2); lr = (p4est_locidx_t (*)[2]) profile->lnode_ranges; work = sc_array_new (sizeof (p2est_quadrant_t)); for (eidx = 0, jt = p6est->columns->first_local_tree; jt <= p6est->columns->last_local_tree; ++jt) { tree = p4est_tree_array_index (p6est->columns->trees, jt); tquadrants = &tree->quadrants; for (zz = 0; zz < tquadrants->elem_count; ++zz, eidx++) { col = p4est_quadrant_array_index (tquadrants, zz); P6EST_COLUMN_GET_RANGE (col, &first, &last); nidx = en[P4EST_INSUL * eidx + P4EST_INSUL / 2]; P4EST_ASSERT ((size_t) lr[nidx][1] >= last - first); pfirst = lr[nidx][0]; plast = pfirst + lr[nidx][1]; if ((size_t) lr[nidx][1] > last - first) { p2est_quadrant_t stack[P4EST_QMAXLEVEL]; p2est_quadrant_t *q, *r, s, t; int stackcount; sc_array_truncate (work); stackcount = 0; zy = first; for (pidx = pfirst; pidx < plast; pidx++) { int8_t p; P4EST_ASSERT (stackcount || zy < last); p = *((int8_t *) sc_array_index (lc, pidx)); if (stackcount) { q = &(stack[--stackcount]); } else { q = p2est_quadrant_array_index (layers, zy++); } P4EST_ASSERT (q->level <= p); while (q->level < p) { p2est_quadrant_t *child[2]; t = *q; s = *q; s.level++; stack[stackcount] = s; stack[stackcount].z += P4EST_QUADRANT_LEN (s.level); child[0] = &s; child[1] = &stack[stackcount++]; p6est_layer_init_data (p6est, jt, col, child[0], init_fn); p6est_layer_init_data (p6est, jt, col, child[1], init_fn); q = &t; if (replace_fn) { replace_fn (p6est, jt, 1, 1, &col, &q, 1, 2, &col, child); } p6est_layer_free_data (p6est, &t); q = &s; } r = p2est_quadrant_array_push (work); *r = *q; } P4EST_ASSERT (work->elem_count == (size_t) lr[nidx][1]); first = layers->elem_count; last = first + work->elem_count; P6EST_COLUMN_SET_RANGE (col, first, last); q = (p2est_quadrant_t *) sc_array_push_count (layers, work->elem_count); memcpy (q, work->array, work->elem_count * work->elem_size); } } } sc_array_destroy (work); p6est_compress_columns (p6est); p6est_update_offsets (p6est); }
p6est_lnodes_t * p6est_lnodes_new (p6est_t * p6est, p6est_ghost_t * ghost, int degree) { p6est_lnodes_t *lnodes; p6est_profile_t *profile; p4est_lnodes_t *clnodes; int nperelem = (degree + 1) * (degree + 1) * (degree + 1); /* int nperface = (degree - 1) * (degree - 1); */ /* int nperedge = (degree - 1); */ p4est_locidx_t ncid, cid, enid, *en; p4est_locidx_t nnodecols; p4est_locidx_t nelemcols; p4est_locidx_t nll; p4est_locidx_t nlayers; p4est_locidx_t *layernodecount; p4est_locidx_t *layernodeoffsets; p4est_locidx_t (*lr)[2]; p4est_locidx_t ncolnodes; p4est_locidx_t *global_owned_count; p4est_locidx_t num_owned, num_local; p4est_gloidx_t gnum_owned, offset; p4est_gloidx_t *owned_offsets; int i, j, k; int mpisize = p6est->mpisize; int mpiret; sc_array_t lnoview; size_t zz, nsharers; int Nrp = degree + 1; if (degree == 1) { p4est_locidx_t eid, nid, enid2, nid2; p4est_locidx_t *newnum, newlocal, newowned; P4EST_GLOBAL_PRODUCTION ("Into adapt p6est_lnodes_new for degree = 1\n"); p4est_log_indent_push (); /* adapt 2 to 1 */ lnodes = p6est_lnodes_new (p6est, ghost, 2); nll = p6est->layers->elem_count; num_local = lnodes->num_local_nodes; num_owned = lnodes->owned_count; en = lnodes->element_nodes; newnum = P4EST_ALLOC (p4est_locidx_t, P8EST_INSUL * nll); memset (newnum, -1, P8EST_INSUL * nll * sizeof (p4est_locidx_t)); for (enid = 0, eid = 0; eid < nll; eid++) { for (k = 0; k < 3; k++) { for (j = 0; j < 3; j++) { for (i = 0; i < 3; i++, enid++) { if (k != 1 && j != 1 && i != 1) { newnum[en[enid]] = 0; } } } } } newlocal = 0; newowned = 0; for (nid = 0; nid < num_local; nid++) { if (newnum[nid] >= 0) { newnum[nid] = newlocal++; if (nid < num_owned) { newowned++; } } } /* compress en */ enid2 = 0; for (enid = 0, eid = 0; eid < nll; eid++) { for (k = 0; k < 3; k++) { for (j = 0; j < 3; j++) { for (i = 0; i < 3; i++, enid++) { if (k != 1 && j != 1 && i != 1) { en[enid2++] = newnum[en[enid]]; } } } } } P4EST_ASSERT (enid2 == P8EST_CHILDREN * nll); lnodes->element_nodes = P4EST_REALLOC (en, p4est_locidx_t, P8EST_CHILDREN * nll); owned_offsets = P4EST_ALLOC (p4est_gloidx_t, mpisize + 1); mpiret = sc_MPI_Allgather (&newowned, 1, P4EST_MPI_LOCIDX, lnodes->global_owned_count, 1, P4EST_MPI_LOCIDX, p6est->mpicomm); owned_offsets[0] = 0; for (i = 0; i < mpisize; i++) { owned_offsets[i + 1] = owned_offsets[i] + lnodes->global_owned_count[i]; } lnodes->global_offset = owned_offsets[p6est->mpirank]; lnodes->num_local_nodes = newlocal; lnodes->owned_count = newowned; lnodes->degree = 1; lnodes->vnodes = P8EST_CHILDREN; lnodes->nonlocal_nodes = P4EST_REALLOC (lnodes->nonlocal_nodes, p4est_gloidx_t, newlocal - newowned); nsharers = lnodes->sharers->elem_count; for (zz = 0; zz < nsharers; zz++) { size_t nshared, zy, zw; p6est_lnodes_rank_t *rank = p6est_lnodes_rank_array_index (lnodes->sharers, zz); if (rank->owned_count) { if (rank->rank != p6est->mpirank) { p4est_locidx_t newrankowned = 0; p4est_locidx_t newrankoffset = -1; for (nid = rank->owned_offset; nid < rank->owned_offset + rank->owned_count; nid++) { if (newnum[nid] >= 0) { lnodes->nonlocal_nodes[newnum[nid] - newowned] = owned_offsets[rank->rank]; newrankowned++; if (newrankoffset < 0) { newrankoffset = newnum[nid]; } } } rank->owned_offset = newrankoffset; rank->owned_count = newrankowned; } else { rank->owned_offset = 0; rank->owned_count = newowned; } } rank->shared_mine_count = 0; rank->shared_mine_offset = -1; zw = 0; nshared = rank->shared_nodes.elem_count; for (zy = 0; zy < nshared; zy++) { nid = *((p4est_locidx_t *) sc_array_index (&rank->shared_nodes, zy)); if (newnum[nid] >= 0) { p4est_locidx_t *lp; lp = (p4est_locidx_t *) sc_array_index (&rank->shared_nodes, zw++); *lp = newnum[nid]; if (newnum[nid] < newowned) { rank->shared_mine_count++; if (rank->shared_mine_offset == -1) { rank->shared_mine_offset = zw - 1; } } } } sc_array_resize (&rank->shared_nodes, zw); } /* send local numbers to others */ { sc_array_t view; sc_array_init_data (&view, newnum, sizeof (p4est_locidx_t), newlocal); p6est_lnodes_share_owned (&view, lnodes); } nid2 = 0; for (nid = num_owned; nid < num_local; nid++) { if (newnum[nid] >= 0) { lnodes->nonlocal_nodes[nid2++] += (p4est_gloidx_t) newnum[nid]; } } P4EST_ASSERT (nid2 == newlocal - newowned); P4EST_FREE (owned_offsets); P4EST_FREE (newnum); p4est_log_indent_pop (); P4EST_GLOBAL_PRODUCTION ("Done adapt p6est_lnodes_new for degree = 1\n"); return lnodes; } P4EST_GLOBAL_PRODUCTION ("Into p6est_lnodes_new\n"); p4est_log_indent_push (); P4EST_ASSERT (degree >= 1); lnodes = P4EST_ALLOC (p6est_lnodes_t, 1); /* first get the profile */ profile = p6est_profile_new_local (p6est, ghost, P6EST_PROFILE_INTERSECTION, P8EST_CONNECT_FULL, degree); p6est_profile_sync (profile); lr = (p4est_locidx_t (*)[2]) profile->lnode_ranges; clnodes = profile->lnodes; nnodecols = clnodes->num_local_nodes; nelemcols = clnodes->num_local_elements; en = clnodes->element_nodes; layernodecount = P4EST_ALLOC_ZERO (p4est_locidx_t, nnodecols); layernodeoffsets = P4EST_ALLOC_ZERO (p4est_locidx_t, nnodecols + 1); for (cid = 0, enid = 0; cid < nelemcols; cid++) { for (j = 0; j < Nrp; j++) { for (i = 0; i < Nrp; i++, enid++) { ncid = en[enid]; nlayers = lr[ncid][1]; P4EST_ASSERT (nlayers); ncolnodes = nlayers * degree + 1; layernodecount[ncid] = ncolnodes; } } } num_owned = 0; num_local = 0; for (ncid = 0; ncid < nnodecols; ncid++) { num_local += layernodecount[ncid]; if (ncid < clnodes->owned_count) { num_owned += layernodecount[ncid]; } } P4EST_VERBOSEF ("p6est_lnodes: %d owned %d local\n", num_owned, num_local); if (nnodecols) { layernodeoffsets[0] = 0; for (ncid = 0; ncid < nnodecols; ncid++) { layernodeoffsets[ncid + 1] = layernodeoffsets[ncid] + layernodecount[ncid]; } } gnum_owned = num_owned; owned_offsets = P4EST_ALLOC (p4est_gloidx_t, mpisize + 1); global_owned_count = P4EST_ALLOC (p4est_locidx_t, mpisize); mpiret = sc_MPI_Allgather (&gnum_owned, 1, P4EST_MPI_GLOIDX, owned_offsets, 1, P4EST_MPI_GLOIDX, p6est->mpicomm); SC_CHECK_MPI (mpiret); offset = 0; for (i = 0; i < mpisize; i++) { global_owned_count[i] = (p4est_locidx_t) owned_offsets[i]; gnum_owned = owned_offsets[i]; owned_offsets[i] = offset; offset += gnum_owned; } owned_offsets[mpisize] = offset; nll = p6est->layers->elem_count; nsharers = clnodes->sharers->elem_count; lnodes->mpicomm = p6est->mpicomm; lnodes->num_local_nodes = num_local; lnodes->owned_count = num_owned; lnodes->global_offset = owned_offsets[p6est->mpirank]; lnodes->nonlocal_nodes = P4EST_ALLOC (p4est_gloidx_t, num_local - num_owned); lnodes->sharers = sc_array_new_size (sizeof (p6est_lnodes_rank_t), nsharers); lnodes->global_owned_count = global_owned_count; lnodes->degree = degree; lnodes->vnodes = nperelem; lnodes->num_local_elements = nll; lnodes->face_code = P4EST_ALLOC (p6est_lnodes_code_t, nll); lnodes->element_nodes = P4EST_ALLOC (p4est_locidx_t, nperelem * nll); p6est_profile_element_to_node (p6est, profile, layernodeoffsets, lnodes->element_nodes, lnodes->face_code); for (zz = 0; zz < nsharers; zz++) { p4est_lnodes_rank_t *crank = p4est_lnodes_rank_array_index (clnodes->sharers, zz); p6est_lnodes_rank_t *rank = p6est_lnodes_rank_array_index (lnodes->sharers, zz); size_t zy; size_t nshared; rank->rank = crank->rank; sc_array_init (&rank->shared_nodes, sizeof (p4est_locidx_t)); nshared = crank->shared_nodes.elem_count; rank->owned_offset = -1; rank->owned_count = 0; rank->shared_mine_count = 0; rank->shared_mine_offset = -1; for (zy = 0; zy < nshared; zy++) { p4est_locidx_t cnid = *((p4est_locidx_t *) sc_array_index (&crank->shared_nodes, zy)); p4est_locidx_t *lp; p4est_locidx_t nthis, il; p4est_locidx_t old_count = rank->shared_nodes.elem_count; nthis = layernodecount[cnid]; lp = (p4est_locidx_t *) sc_array_push_count (&rank->shared_nodes, nthis); for (il = 0; il < nthis; il++) { lp[il] = layernodeoffsets[cnid] + il; if (zy >= (size_t) crank->shared_mine_offset && (p4est_locidx_t) zy - crank->shared_mine_offset < crank->shared_mine_count) { rank->shared_mine_count++; if (rank->shared_mine_offset == -1) { rank->shared_mine_offset = old_count + il; } } if (cnid >= crank->owned_offset && cnid - crank->owned_offset < crank->owned_count) { rank->owned_count++; if (rank->owned_offset == -1) { rank->owned_offset = lp[il]; } } } } if (rank->rank == p6est->mpirank) { rank->owned_offset = 0; rank->owned_count = num_owned; } } memcpy (layernodecount, layernodeoffsets, nnodecols * sizeof (p4est_locidx_t)); sc_array_init_data (&lnoview, layernodecount, sizeof (p4est_locidx_t), (size_t) nnodecols); p4est_lnodes_share_owned (&lnoview, clnodes); for (zz = 0; zz < nsharers; zz++) { p4est_lnodes_rank_t *crank = p4est_lnodes_rank_array_index (clnodes->sharers, zz); if (crank->rank == p6est->mpirank) { continue; } for (ncid = crank->owned_offset; ncid < crank->owned_offset + crank->owned_count; ncid++) { p4est_gloidx_t owners_offset; p4est_locidx_t nid; P4EST_ASSERT (ncid >= clnodes->owned_count); owners_offset = owned_offsets[crank->rank] + layernodecount[ncid]; for (nid = layernodeoffsets[ncid]; nid < layernodeoffsets[ncid + 1]; nid++) { P4EST_ASSERT (nid >= num_owned); P4EST_ASSERT (nid < num_local); lnodes->nonlocal_nodes[nid - num_owned] = owners_offset++; } } } p6est_profile_destroy (profile); P4EST_FREE (owned_offsets); P4EST_FREE (layernodecount); P4EST_FREE (layernodeoffsets); p4est_log_indent_pop (); P4EST_GLOBAL_PRODUCTION ("Done p6est_lnodes_new\n"); return lnodes; }