int ompi_mtl_portals4_progress(void) { int count = 0, ret; ptl_event_t ev; ompi_mtl_portals4_base_request_t *ptl_request; while (true) { ret = PtlEQGet(ompi_mtl_portals4.eq_h, &ev); if (PTL_OK == ret) { OPAL_OUTPUT_VERBOSE((60, ompi_mtl_base_output, "Found event of type %d\n", ev.type)); switch (ev.type) { case PTL_EVENT_GET: case PTL_EVENT_PUT: case PTL_EVENT_PUT_OVERFLOW: case PTL_EVENT_ATOMIC: case PTL_EVENT_ATOMIC_OVERFLOW: case PTL_EVENT_REPLY: case PTL_EVENT_SEND: case PTL_EVENT_ACK: case PTL_EVENT_AUTO_FREE: case PTL_EVENT_SEARCH: if (NULL != ev.user_ptr) { ptl_request = ev.user_ptr; ret = ptl_request->event_callback(&ev, ptl_request); if (OMPI_SUCCESS != ret) { opal_output(ompi_mtl_base_output, "Error returned from target event callback: %d", ret); abort(); } } break; case PTL_EVENT_PT_DISABLED: /* BWB: FIX ME: do stuff - flow control */ opal_output(ompi_mtl_base_output, "Unhandled send flow control event."); abort(); break; case PTL_EVENT_AUTO_UNLINK: opal_output_verbose(1, ompi_mtl_base_output, "Unexpected auto unlink event"); break; case PTL_EVENT_GET_OVERFLOW: case PTL_EVENT_FETCH_ATOMIC: case PTL_EVENT_FETCH_ATOMIC_OVERFLOW: opal_output_verbose(1, ompi_mtl_base_output, "Unexpected event of type %d", ev.type); } } else if (PTL_EQ_EMPTY == ret) { break; } else { opal_output(ompi_mtl_base_output, "Error returned from PtlEQGet: %d", ret); abort(); } } return count; }
static int progress_callback(void) { int ret, count = 0; ptl_event_t ev; ompi_osc_portals4_request_t *req; int32_t ops; while (true) { ret = PtlEQGet(mca_osc_portals4_component.matching_eq_h, &ev); if (PTL_OK == ret) { goto process; } else if (PTL_EQ_DROPPED == ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlEQGet reported dropped event", __FILE__, __LINE__); goto process; } else if (PTL_EQ_EMPTY == ret) { return 0; } else { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlEQGet failed: %d\n", __FILE__, __LINE__, ret); return 0; } process: if (ev.ni_fail_type != PTL_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: event failure: %d %d", __FILE__, __LINE__, ev.type, ev.ni_fail_type); return 0; } count++; if (NULL != ev.user_ptr) { /* be sure that we receive the PTL_EVENT_LINK */ if (ev.type == PTL_EVENT_LINK) { *(int *)ev.user_ptr = *(int *)ev.user_ptr + 1; opal_condition_broadcast(&mca_osc_portals4_component.cond); continue; } req = (ompi_osc_portals4_request_t*) ev.user_ptr; opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength); ops = opal_atomic_add_32(&req->ops_committed, 1); if (ops == req->ops_expected) { OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(&req->super, true); OPAL_THREAD_UNLOCK(&ompi_request_lock); } } } return count; }
static ompi_mtl_portals_event_t* ompi_mtl_portals_search_unex_events(ptl_match_bits_t match_bits, ptl_match_bits_t ignore_bits) { ptl_event_t ev; int ret; /* check to see if there are any events in the unexpected event queue */ while (true) { ret = PtlEQGet(ompi_mtl_portals.ptl_unexpected_recv_eq_h,&ev); if (PTL_OK == ret) { if (PTL_EVENT_PUT_START == ev.type) { ompi_free_list_item_t *item; ompi_mtl_portals_event_t *recv_event; OMPI_FREE_LIST_GET(&ompi_mtl_portals.event_fl, item, ret); recv_event = (ompi_mtl_portals_event_t*) item; recv_event->ev = ev; recv_event->is_complete = false; if (PTL_IS_SHORT_MSG(recv_event->ev.match_bits)) { ompi_mtl_portals_recv_short_block_t *block = recv_event->ev.md.user_ptr; OPAL_THREAD_ADD32(&block->pending, 1); } if (CHECK_MATCH(recv_event->ev.match_bits, match_bits, ignore_bits)) { /* the one we want */ ompi_mtl_portals_wait_for_put_end(recv_event->ev.link); return recv_event; } else { /* not the one we want, so add it to the unex list */ opal_list_append(&(ompi_mtl_portals.unexpected_messages), (opal_list_item_t*) recv_event); } } else if (PTL_EVENT_PUT_END == ev.type) { /* can't be the one we want */ ompi_mtl_portals_match_up_put_end(ev.link); } else { opal_output(fileno(stderr)," Unrecognised event type - %d - ompi_mtl_portals_search_unex_events : %d \n",ev.type,ret); abort(); } } else if (PTL_EQ_EMPTY == ret) { break; } else { opal_output(fileno(stderr)," Error returned in ompi_mtl_portals_search_unex_events from PtlEQWait : %d \n",ret); abort(); } } return NULL; }
/* Target EQ */ static int portals4_progress(void) { int count = 0, ret; ptl_event_t ev; ompi_coll_portals4_request_t *ptl_request; while (true) { ret = PtlEQGet(mca_coll_portals4_component.eq_h, &ev); if (PTL_OK == ret) { OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, "event type=%s\n", evname[ev.type])); count++; switch (ev.type) { case PTL_EVENT_PUT: /* Non-Blocking / request */ if (PTL_OK == ev.ni_fail_type) { OPAL_OUTPUT_VERBOSE((50, ompi_coll_base_framework.framework_output, "hdr_data %p, matchbits 0x%lx", (void*) ev.hdr_data, ev.match_bits)); assert(0 != ev.hdr_data); ptl_request = (ompi_coll_portals4_request_t*) ev.hdr_data; assert(NULL != ptl_request); switch (ptl_request->type) { case OMPI_COLL_PORTALS4_TYPE_BARRIER: ompi_coll_portals4_ibarrier_intra_fini(ptl_request); break; case OMPI_COLL_PORTALS4_TYPE_BCAST: ompi_coll_portals4_ibcast_intra_fini(ptl_request); break; case OMPI_COLL_PORTALS4_TYPE_REDUCE: ompi_coll_portals4_ireduce_intra_fini(ptl_request); break; case OMPI_COLL_PORTALS4_TYPE_ALLREDUCE: ompi_coll_portals4_iallreduce_intra_fini(ptl_request); break; case OMPI_COLL_PORTALS4_TYPE_SCATTER: case OMPI_COLL_PORTALS4_TYPE_GATHER: opal_output(ompi_coll_base_framework.framework_output, "allreduce is not supported yet\n"); break; } } if (PTL_OK != ev.ni_fail_type) { OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, "ni_fail_type=%s\n", failtype[ev.ni_fail_type])); } break; default: opal_output(ompi_coll_base_framework.framework_output, "Unexpected event of type %d", ev.type); break; } } else if (PTL_EQ_EMPTY == ret) { break; } else if (PTL_EQ_DROPPED == ret) { opal_output(ompi_coll_base_framework.framework_output, "Flow control situation without recovery (EQ_DROPPED)\n"); abort(); } else { opal_output(ompi_coll_base_framework.framework_output, "Error returned from PtlEQGet: %d", ret); break; } } return count; }
int main(int argc, char *argv[]) { ptl_handle_ni_t ni_handle; ptl_process_t *procs; int rank; ptl_pt_index_t pt_index, signal_pt_index; HANDLE_T signal_e_handle; HANDLE_T signal_e2_handle; int num_procs; ptl_handle_eq_t eq_handle; ptl_handle_ct_t ct_handle; ptl_handle_md_t md_handle; ptl_ni_limits_t limits_reqd, limits_actual; ENTRY_T value_e; limits_reqd.max_entries = 1024; limits_reqd.max_unexpected_headers = ITERS*2; limits_reqd.max_mds = 1024; limits_reqd.max_eqs = 1024; limits_reqd.max_cts = 1024; limits_reqd.max_pt_index = 64; limits_reqd.max_iovecs = 1024; limits_reqd.max_list_size = 1024; limits_reqd.max_triggered_ops = 1024; limits_reqd.max_msg_size = 1048576; limits_reqd.max_atomic_size = 1048576; limits_reqd.max_fetch_atomic_size = 1048576; limits_reqd.max_waw_ordered_size = 1048576; limits_reqd.max_war_ordered_size = 1048576; limits_reqd.max_volatile_size = 1048576; limits_reqd.features = 0; CHECK_RETURNVAL(PtlInit()); CHECK_RETURNVAL(libtest_init()); rank = libtest_get_rank(); num_procs = libtest_get_size(); if (num_procs < 2) { fprintf(stderr, "test_flowctl_noeq requires at least two processes\n"); return 77; } int iters; if (num_procs < ITERS) iters = ITERS*2+1; else iters = ITERS; CHECK_RETURNVAL(PtlNIInit(PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL, PTL_PID_ANY, &limits_reqd, &limits_actual, &ni_handle)); procs = libtest_get_mapping(ni_handle); CHECK_RETURNVAL(PtlSetMap(ni_handle, num_procs, procs)); if (0 == rank) { /* create data PT space */ CHECK_RETURNVAL(PtlEQAlloc(ni_handle, (num_procs - 1) * iters + 64, &eq_handle)); CHECK_RETURNVAL(PtlPTAlloc(ni_handle, PTL_PT_FLOWCTRL, eq_handle, 5, &pt_index)); /* create signal ME */ CHECK_RETURNVAL(PtlCTAlloc(ni_handle, &ct_handle)); CHECK_RETURNVAL(PtlPTAlloc(ni_handle, 1, eq_handle, 6, &signal_pt_index)); value_e.start = NULL; value_e.length = 0; value_e.ct_handle = ct_handle; value_e.uid = PTL_UID_ANY; value_e.options = OPTIONS | PTL_LE_EVENT_CT_COMM; #if INTERFACE == 1 value_e.match_id.rank = PTL_RANK_ANY; value_e.match_bits = 0; value_e.ignore_bits = 0; #endif CHECK_RETURNVAL(APPEND(ni_handle, 5, &value_e, PTL_OVERFLOW_LIST, NULL, &signal_e_handle)); } else { ptl_md_t md; /* 16 extra just in case... */ CHECK_RETURNVAL(PtlEQAlloc(ni_handle, iters*2 + 16, &eq_handle)); md.start = NULL; md.length = 0; md.options = 0; md.eq_handle = eq_handle; md.ct_handle = PTL_CT_NONE; CHECK_RETURNVAL(PtlMDBind(ni_handle, &md, &md_handle)); } fprintf(stderr,"at barrier \n"); libtest_barrier(); if (0 == rank) { ptl_ct_event_t ct; ptl_event_t ev; int ret, count = 0, saw_flowctl = 0; fprintf(stderr,"begin ctwait \n"); /* wait for signal counts */ CHECK_RETURNVAL(PtlCTWait(ct_handle, iters / 2 , &ct)); if (ct.success != iters / 2 || ct.failure != 0) { return 1; } fprintf(stderr,"done CT wait \n"); /* wait for event entries */ while (1) { ret = PtlEQGet(eq_handle, &ev); if (PTL_OK == ret) { count++; fprintf(stderr, "found EQ value \n"); } else if (ret == PTL_EQ_EMPTY) { continue; } else { fprintf(stderr, "0: Unexpected return code from EQGet: %d\n", ret); return 1; } if (ev.type == PTL_EVENT_PT_DISABLED) { saw_flowctl++; break; } } fprintf(stderr, "0: Saw %d flowctl\n", saw_flowctl); if (saw_flowctl == 0) { return 1; } /* Now clear out all of the unexpected messages so we can clean up everything */ CHECK_RETURNVAL(APPEND(ni_handle, 5, &value_e, PTL_PRIORITY_LIST, NULL, &signal_e2_handle)); ret = PTL_OK; while (ret != PTL_EQ_EMPTY) ret = PtlEQGet(eq_handle, &ev); } else { ptl_process_t target; ptl_event_t ev; int ret, count = 0, fails = 0; int i; target.rank = 0; printf("beginning puts \n"); for (i = 0 ; i < iters ; ++i) { CHECK_RETURNVAL(PtlPut(md_handle, 0, 0, PTL_ACK_REQ, target, 5, 0, 0, NULL, 0)); usleep(100); } while (count < iters) { ret = PtlEQGet(eq_handle, &ev); if (PTL_EQ_EMPTY == ret) { continue; } else if (PTL_OK != ret) { fprintf(stderr, "%d: PtlEQGet returned %d\n", rank, ret); return 1; } if (ev.ni_fail_type == PTL_NI_OK) { if (ev.type == PTL_EVENT_SEND) { continue; } else if (ev.type == PTL_EVENT_ACK) { count++; } else { fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type); } } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) { count++; fails++; } else if (ev.ni_fail_type == PTL_EQ_EMPTY) { continue; } else if (ev.ni_fail_type == PTL_EQ_DROPPED) { continue; } else { fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type); return 1; } } fprintf(stderr, "%d: Saw %d of %d ACKs as fails\n", rank, fails, count); } fprintf(stderr,"at final barrier \n"); libtest_barrier(); if (0 == rank) { CHECK_RETURNVAL(UNLINK(signal_e_handle)); CHECK_RETURNVAL(UNLINK(signal_e2_handle)); CHECK_RETURNVAL(PtlPTFree(ni_handle, signal_pt_index)); CHECK_RETURNVAL(PtlCTFree(ct_handle)); CHECK_RETURNVAL(PtlPTFree(ni_handle, pt_index)); CHECK_RETURNVAL(PtlEQFree(eq_handle)); } else { CHECK_RETURNVAL(PtlMDRelease(md_handle)); CHECK_RETURNVAL(PtlEQFree(eq_handle)); } fprintf(stderr,"final cleanup \n"); CHECK_RETURNVAL(PtlNIFini(ni_handle)); CHECK_RETURNVAL(libtest_fini()); PtlFini(); return 0; }
int main(int argc, char *argv[]) { ptl_handle_ni_t ni_handle; ptl_process_t *procs; int rank; ptl_pt_index_t pt_index, signal_pt_index; HANDLE_T value_e_handle, signal_e_handle; int num_procs; ptl_handle_eq_t eq_handle; ptl_handle_ct_t ct_handle; ptl_handle_md_t md_handle; CHECK_RETURNVAL(PtlInit()); CHECK_RETURNVAL(libtest_init()); rank = libtest_get_rank(); num_procs = libtest_get_size(); if (num_procs < 2) { fprintf(stderr, "test_flowctl_noeq requires at least two processes\n"); return 77; } CHECK_RETURNVAL(PtlNIInit(PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL, PTL_PID_ANY, NULL, NULL, &ni_handle)); procs = libtest_get_mapping(ni_handle); CHECK_RETURNVAL(PtlSetMap(ni_handle, num_procs, procs)); if (0 == rank) { ENTRY_T value_e; /* create data ME */ CHECK_RETURNVAL(PtlEQAlloc(ni_handle, (num_procs - 1) * ITERS / 2, &eq_handle)); CHECK_RETURNVAL(PtlPTAlloc(ni_handle, PTL_PT_FLOWCTRL, eq_handle, 5, &pt_index)); value_e.start = NULL; value_e.length = 0; value_e.ct_handle = PTL_CT_NONE; value_e.uid = PTL_UID_ANY; value_e.options = OPTIONS; #if INTERFACE == 1 value_e.match_id.rank = PTL_RANK_ANY; value_e.match_bits = 0; value_e.ignore_bits = 0; #endif CHECK_RETURNVAL(APPEND(ni_handle, 5, &value_e, PTL_PRIORITY_LIST, NULL, &value_e_handle)); /* create signal ME */ CHECK_RETURNVAL(PtlCTAlloc(ni_handle, &ct_handle)); CHECK_RETURNVAL(PtlPTAlloc(ni_handle, 0, PTL_EQ_NONE, 6, &signal_pt_index)); value_e.start = NULL; value_e.length = 0; value_e.ct_handle = ct_handle; value_e.uid = PTL_UID_ANY; value_e.options = OPTIONS | PTL_LE_EVENT_SUCCESS_DISABLE | PTL_LE_EVENT_CT_COMM; #if INTERFACE == 1 value_e.match_id.rank = PTL_RANK_ANY; value_e.match_bits = 0; value_e.ignore_bits = 0; #endif CHECK_RETURNVAL(APPEND(ni_handle, 6, &value_e, PTL_PRIORITY_LIST, NULL, &signal_e_handle)); } else { ptl_md_t md; /* 16 extra just in case... */ CHECK_RETURNVAL(PtlEQAlloc(ni_handle, ITERS * 2 + 16, &eq_handle)); md.start = NULL; md.length = 0; md.options = 0; md.eq_handle = eq_handle; md.ct_handle = PTL_CT_NONE; CHECK_RETURNVAL(PtlMDBind(ni_handle, &md, &md_handle)); } libtest_barrier(); if (0 == rank) { ptl_ct_event_t ct; ptl_event_t ev; int ret, count = 0, saw_dropped = 0, saw_flowctl = 0; /* wait for signal counts */ CHECK_RETURNVAL(PtlCTWait(ct_handle, num_procs - 1, &ct)); if (ct.success != num_procs - 1 || ct.failure != 0) { return 1; } /* wait for event entries */ while (count < ITERS * (num_procs - 1)) { ret = PtlEQWait(eq_handle, &ev); if (PTL_OK == ret) { ; } else if (PTL_EQ_DROPPED == ret) { saw_dropped++; if (ev.type == PTL_EVENT_PT_DISABLED){ saw_flowctl++; CHECK_RETURNVAL(PtlPTEnable(ni_handle, pt_index)); } break; } else { fprintf(stderr, "0: Unexpected return code from EQWait: %d\n", ret); return 1; } if (ev.type == PTL_EVENT_PT_DISABLED) { CHECK_RETURNVAL(PtlPTEnable(ni_handle, pt_index)); saw_flowctl++; } else { count++; } } fprintf(stderr, "0: Saw %d dropped, %d flowctl\n", saw_dropped, saw_flowctl); if (saw_flowctl == 0) { return 1; } } else { ptl_process_t target; ptl_event_t ev; int ret, count = 0, fails = 0; int i; int *fail_seen; fail_seen = malloc(sizeof(int) * ITERS); if (NULL == fail_seen) { fprintf(stderr, "%d: malloc failed\n", rank); return 1; } memset(fail_seen, 0, sizeof(int) * ITERS); target.rank = 0; for (i = 0 ; i < ITERS ; ++i) { CHECK_RETURNVAL(PtlPut(md_handle, 0, 0, PTL_ACK_REQ, target, 5, 0, 0, (void*)(size_t)i, 0)); usleep(100); } while (count < ITERS) { ret = PtlEQGet(eq_handle, &ev); if (PTL_EQ_EMPTY == ret) { continue; } else if (PTL_OK != ret) { fprintf(stderr, "%d: PtlEQGet returned %d\n", rank, ret); return 1; } if (ev.ni_fail_type == PTL_NI_OK) { if (ev.type == PTL_EVENT_SEND) { continue; } else if (ev.type == PTL_EVENT_ACK) { count++; } else { fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type); } } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) { int iter = (size_t) ev.user_ptr; if (fail_seen[iter]++ > 0) { fprintf(stderr, "%d: Double report of PT_DISABLED for " "iteration %d\n", rank, iter); return 1; } count++; fails++; } else { fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type); return 1; } } fprintf(stderr, "%d: Saw %d of %d events as fails\n", rank, fails, count); CHECK_RETURNVAL(PtlPut(md_handle, 0, 0, PTL_NO_ACK_REQ, target, 6, 0, 0, NULL, 0)); /* wait for the send event on the last put */ CHECK_RETURNVAL(PtlEQWait(eq_handle, &ev)); while (fails > 0) { CHECK_RETURNVAL(PtlPut(md_handle, 0, 0, PTL_ACK_REQ, target, 5, 0, 0, NULL, 0)); while (1) { ret = PtlEQWait(eq_handle, &ev); if (PTL_OK != ret) { fprintf(stderr, "%d: PtlEQWait returned %d\n", rank, ret); return 1; } if (ev.ni_fail_type == PTL_NI_OK) { if (ev.type == PTL_EVENT_SEND) { continue; } else if (ev.type == PTL_EVENT_ACK) { fails--; break; } else { fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type); } } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) { break; } else { fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type); return 1; } } } } libtest_barrier(); if (0 == rank) { CHECK_RETURNVAL(UNLINK(signal_e_handle)); CHECK_RETURNVAL(PtlPTFree(ni_handle, signal_pt_index)); CHECK_RETURNVAL(PtlCTFree(ct_handle)); CHECK_RETURNVAL(UNLINK(value_e_handle)); CHECK_RETURNVAL(PtlPTFree(ni_handle, pt_index)); CHECK_RETURNVAL(PtlEQFree(eq_handle)); } else { CHECK_RETURNVAL(PtlMDRelease(md_handle)); CHECK_RETURNVAL(PtlEQFree(eq_handle)); } CHECK_RETURNVAL(PtlNIFini(ni_handle)); CHECK_RETURNVAL(libtest_fini()); PtlFini(); return 0; }