static int progress_callback(void) { int ret, count = 0; ptl_event_t ev; ompi_osc_portals4_request_t *req; int32_t ops; while (true) { ret = PtlEQGet(mca_osc_portals4_component.matching_eq_h, &ev); if (PTL_OK == ret) { goto process; } else if (PTL_EQ_DROPPED == ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlEQGet reported dropped event", __FILE__, __LINE__); goto process; } else if (PTL_EQ_EMPTY == ret) { return 0; } else { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlEQGet failed: %d\n", __FILE__, __LINE__, ret); return 0; } process: if (ev.ni_fail_type != PTL_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: event failure: %d %d", __FILE__, __LINE__, ev.type, ev.ni_fail_type); return 0; } count++; if (NULL != ev.user_ptr) { /* be sure that we receive the PTL_EVENT_LINK */ if (ev.type == PTL_EVENT_LINK) { *(int *)ev.user_ptr = *(int *)ev.user_ptr + 1; opal_condition_broadcast(&mca_osc_portals4_component.cond); continue; } req = (ompi_osc_portals4_request_t*) ev.user_ptr; opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength); ops = opal_atomic_add_32(&req->ops_committed, 1); if (ops == req->ops_expected) { OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(&req->super, true); OPAL_THREAD_UNLOCK(&ompi_request_lock); } } } return count; }
int ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm, mca_coll_base_module_t *module) { mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module; int ret, i, dim, hibit, mask, num_msgs; int size = ompi_comm_size(comm); int rank = ompi_comm_rank(comm); ptl_ct_event_t ct; ptl_handle_ct_t ct_h; ptl_handle_me_t me_h; ptl_me_t me; size_t count; ptl_match_bits_t match_bits; ptl_handle_md_t md_h; void *base; ompi_coll_portals4_get_md(0, &md_h, &base); count = opal_atomic_add_size_t(&portals4_module->barrier_count, 1); ret = PtlCTAlloc(mca_coll_portals4_component.ni_h, &ct_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlCTAlloc failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERR_TEMP_OUT_OF_RESOURCE; } COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, COLL_PORTALS4_BARRIER, count); /* Build "tree" out of hypercube */ dim = comm->c_cube_dim; hibit = opal_hibit(rank, dim); --dim; /* receive space */ me.start = NULL; me.length = 0; me.ct_handle = ct_h; me.min_free = 0; me.uid = mca_coll_portals4_component.uid; me.options = PTL_ME_OP_PUT | PTL_ME_EVENT_SUCCESS_DISABLE | PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE | PTL_ME_EVENT_CT_COMM | PTL_ME_EVENT_CT_OVERFLOW; me.match_id.phys.nid = PTL_NID_ANY; me.match_id.phys.pid = PTL_PID_ANY; me.match_bits = match_bits; me.ignore_bits = 0; ret = PtlMEAppend(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.pt_idx, &me, PTL_PRIORITY_LIST, NULL, &me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEAppend failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* calculate number of children to receive from */ num_msgs = ompi_coll_portals4_get_nchildren(dim + 1, hibit, rank, size); /* send to parent when children have sent to us */ if (rank > 0) { int parent = rank & ~(1 << hibit); ret = PtlTriggeredPut(md_h, 0, 0, PTL_NO_ACK_REQ, ompi_coll_portals4_get_peer(comm, parent), mca_coll_portals4_component.pt_idx, match_bits, 0, NULL, 0, ct_h, num_msgs); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlTriggeredPut failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* we'll need to wait for the parent response before the next set of comms */ num_msgs++; } /* send to children when parent (or all children if root) has sent to us */ for (i = hibit + 1, mask = 1 << i; i <= dim; ++i, mask <<= 1) { int peer = rank | mask; if (peer < size) { ret = PtlTriggeredPut(md_h, 0, 0, PTL_NO_ACK_REQ, ompi_coll_portals4_get_peer(comm, peer), mca_coll_portals4_component.pt_idx, match_bits, 0, NULL, 0, ct_h, num_msgs); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlTriggeredPut failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } } } /* Wait for all incoming messages */ ret = PtlCTWait(ct_h, num_msgs, &ct); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlCTWait failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } /* cleanup */ ret = PtlMEUnlink(me_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlMEUnlink failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } ret = PtlCTFree(ct_h); if (PTL_OK != ret) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "%s:%d: PtlCTFree failed: %d\n", __FILE__, __LINE__, ret); return OMPI_ERROR; } return OMPI_SUCCESS; }
static int ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct ompi_datatype_t *sdtype, void *rbuf, int rcount, struct ompi_datatype_t *rdtype, int root, struct ompi_communicator_t *comm, ompi_coll_portals4_request_t *request, mca_coll_base_module_t *module) { mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module; int ret, line; ptl_ct_event_t ct; ptl_ct_event_t sync_incr_event; int8_t i_am_root; int32_t expected_rtrs = 0; int32_t expected_puts = 0; int32_t expected_acks = 0; int32_t expected_ops = 0; int32_t expected_chained_rtrs = 0; int32_t expected_chained_acks = 0; OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:portals4:scatter_intra_linear_top enter rank %d", request->u.scatter.my_rank)); request->type = OMPI_COLL_PORTALS4_TYPE_SCATTER; request->u.scatter.scatter_buf = NULL; request->u.scatter.scatter_mdh = PTL_INVALID_HANDLE; request->u.scatter.scatter_cth = PTL_INVALID_HANDLE; request->u.scatter.scatter_meh = PTL_INVALID_HANDLE; request->u.scatter.sync_mdh = PTL_INVALID_HANDLE; request->u.scatter.sync_cth = PTL_INVALID_HANDLE; request->u.scatter.sync_meh = PTL_INVALID_HANDLE; request->u.scatter.my_rank = ompi_comm_rank(comm); request->u.scatter.size = ompi_comm_size(comm); request->u.scatter.root_rank = root; request->u.scatter.sbuf = sbuf; request->u.scatter.rbuf = rbuf; request->u.scatter.pack_src_buf = sbuf; request->u.scatter.pack_src_count = scount; request->u.scatter.pack_src_dtype = sdtype; ompi_datatype_get_extent(request->u.scatter.pack_src_dtype, &request->u.scatter.pack_src_lb, &request->u.scatter.pack_src_extent); ompi_datatype_get_true_extent(request->u.scatter.pack_src_dtype, &request->u.scatter.pack_src_true_lb, &request->u.scatter.pack_src_true_extent); if ((root == request->u.scatter.my_rank) && (rbuf == MPI_IN_PLACE)) { request->u.scatter.unpack_dst_buf = NULL; request->u.scatter.unpack_dst_count = 0; request->u.scatter.unpack_dst_dtype = MPI_DATATYPE_NULL; } else { request->u.scatter.unpack_dst_buf = rbuf; request->u.scatter.unpack_dst_count = rcount; request->u.scatter.unpack_dst_dtype = rdtype; request->u.scatter.unpack_dst_offset = 0; ompi_datatype_get_extent(request->u.scatter.unpack_dst_dtype, &request->u.scatter.unpack_dst_lb, &request->u.scatter.unpack_dst_extent); ompi_datatype_get_true_extent(request->u.scatter.unpack_dst_dtype, &request->u.scatter.unpack_dst_true_lb, &request->u.scatter.unpack_dst_true_extent); } opal_output_verbose(30, ompi_coll_base_framework.framework_output, "%s:%d:rank(%d): request->u.scatter.unpack_dst_offset(%lu)", __FILE__, __LINE__, request->u.scatter.my_rank, request->u.scatter.unpack_dst_offset); /**********************************/ /* Setup Common Parameters */ /**********************************/ i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank); request->u.scatter.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1); ret = setup_scatter_buffers_linear(comm, request, portals4_module); if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } ret = setup_scatter_handles(comm, request, portals4_module); if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } ret = setup_sync_handles(comm, request, portals4_module); if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; } /**********************************/ /* do the scatter */ /**********************************/ if (i_am_root) { /* operations on the sync counter */ expected_rtrs = request->u.scatter.size - 1; /* expect RTRs from non-root ranks */ expected_acks = request->u.scatter.size - 1; /* expect Recv-ACKs from non-root ranks */ /* operations on the scatter counter */ expected_puts = 0; expected_chained_rtrs = 1; expected_chained_acks = 1; /* Chain the RTR and Recv-ACK to the Scatter CT */ sync_incr_event.success=1; sync_incr_event.failure=0; ret = PtlTriggeredCTInc(request->u.scatter.scatter_cth, sync_incr_event, request->u.scatter.sync_cth, expected_rtrs); if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } ret = PtlTriggeredCTInc(request->u.scatter.scatter_cth, sync_incr_event, request->u.scatter.sync_cth, expected_rtrs + expected_acks); if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } /* root, so put packed bytes to other ranks */ for (int32_t i=0; i<request->u.scatter.size; i++) { /* do not put to my scatter_buf. my data gets unpacked into my out buffer in linear_bottom(). */ if (i == request->u.scatter.my_rank) { continue; } ptl_size_t offset = request->u.scatter.packed_size * i; opal_output_verbose(30, ompi_coll_base_framework.framework_output, "%s:%d:rank(%d): offset(%lu)=rank(%d) * packed_size(%ld)", __FILE__, __LINE__, request->u.scatter.my_rank, offset, i, request->u.scatter.packed_size); ret = PtlTriggeredPut(request->u.scatter.scatter_mdh, (ptl_size_t)request->u.scatter.scatter_buf + offset, request->u.scatter.packed_size, PTL_NO_ACK_REQ, ompi_coll_portals4_get_peer(comm, i), mca_coll_portals4_component.pt_idx, request->u.scatter.scatter_match_bits, 0, NULL, 0, request->u.scatter.scatter_cth, expected_chained_rtrs); if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } } } else { /* non-root, so do nothing */ /* operations on the sync counter */ expected_rtrs = 0; expected_acks = 0; /* operations on the scatter counter */ expected_puts = 1; /* scatter put from root */ expected_chained_rtrs = 0; expected_chained_acks = 0; } expected_ops = expected_chained_rtrs + expected_puts; /**********************************************/ /* only non-root ranks are PUT to, so only */ /* non-root ranks must PUT a Recv-ACK to root */ /**********************************************/ if (!i_am_root) { ret = PtlTriggeredPut(request->u.scatter.sync_mdh, 0, 0, PTL_NO_ACK_REQ, ompi_coll_portals4_get_peer(comm, request->u.scatter.root_rank), mca_coll_portals4_component.pt_idx, request->u.scatter.sync_match_bits, 0, NULL, 0, request->u.scatter.scatter_cth, expected_ops); if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } } expected_ops += expected_chained_acks; if (!request->u.scatter.is_sync) { /******************************************/ /* put to finish pt when all ops complete */ /******************************************/ ret = PtlTriggeredPut(mca_coll_portals4_component.zero_md_h, 0, 0, PTL_NO_ACK_REQ, ompi_coll_portals4_get_peer(comm, request->u.scatter.my_rank), mca_coll_portals4_component.finish_pt_idx, 0, 0, NULL, (uintptr_t) request, request->u.scatter.scatter_cth, expected_ops); if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } } /**************************************/ /* all non-root ranks put RTR to root */ /**************************************/ if (!i_am_root) { ret = PtlPut(request->u.scatter.sync_mdh, 0, 0, PTL_NO_ACK_REQ, ompi_coll_portals4_get_peer(comm, request->u.scatter.root_rank), mca_coll_portals4_component.pt_idx, request->u.scatter.sync_match_bits, 0, NULL, 0); if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } } if (request->u.scatter.is_sync) { opal_output_verbose(1, ompi_coll_base_framework.framework_output, "calling CTWait(expected_ops=%d)\n", expected_ops); /********************************/ /* Wait for all ops to complete */ /********************************/ ret = PtlCTWait(request->u.scatter.scatter_cth, expected_ops, &ct); if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; } opal_output_verbose(1, ompi_coll_base_framework.framework_output, "completed CTWait(expected_ops=%d)\n", expected_ops); } OPAL_OUTPUT((ompi_coll_base_framework.framework_output, "coll:portals4:scatter_intra_linear_top exit rank %d", request->u.scatter.my_rank)); return OMPI_SUCCESS; err_hdlr: if (NULL != request->u.scatter.scatter_buf) free(request->u.scatter.scatter_buf); opal_output(ompi_coll_base_framework.framework_output, "%s:%4d:%4d\tError occurred ret=%d, rank %2d", __FILE__, __LINE__, line, ret, request->u.scatter.my_rank); return ret; }