static int flowctl_fanout_callback(ptl_event_t *ev, ompi_mtl_portals4_base_request_t *ptl_base_request) { int ret; struct timeval tv; ompi_mtl_portals4.flowctl.flowctl_active = false; ret = PtlPTEnable(ompi_mtl_portals4.ni_h, ompi_mtl_portals4.recv_idx); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPTEnabled failed: %d\n", __FILE__, __LINE__, ret); return ret; } gettimeofday(&tv, NULL); if (((tv.tv_sec * 1000000 + tv.tv_usec) - (ompi_mtl_portals4.flowctl.tv.tv_sec * 1000000 + ompi_mtl_portals4.flowctl.tv.tv_usec)) < 1000000 * ompi_mtl_portals4.flowctl.backoff_count) { usleep(++ompi_mtl_portals4.flowctl.backoff_count); } else { ompi_mtl_portals4.flowctl.backoff_count = 0; } ompi_mtl_portals4.flowctl.tv = tv; ompi_mtl_portals4_pending_list_progress(); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Exiting flowctl_fanout_callback %ld", ompi_mtl_portals4.flowctl.epoch_counter)); return OMPI_SUCCESS; }
static int poke_progress(void) { int ret = PTL_OK; struct rptl_target *target; struct rptl_op *op; struct rptl *rptl; int i; int mpi_errno = MPI_SUCCESS; ptl_process_t id; ptl_pt_index_t data_pt, control_pt; MPIDI_STATE_DECL(MPID_STATE_POKE_PROGRESS); MPIDI_FUNC_ENTER(MPID_STATE_POKE_PROGRESS); /* make progress on local RPTLs */ for (rptl = rptl_info.rptl_list; rptl; rptl = rptl->next) { /* if the local state is active, there's nothing to do */ if (rptl->local_state == RPTL_LOCAL_STATE_ACTIVE) continue; /* if we are in a local AWAITING PAUSE ACKS state, see if we * can send out the unpause message */ if (rptl->local_state == RPTL_LOCAL_STATE_AWAITING_PAUSE_ACKS && rptl->pause_ack_counter == rptl_info.world_size - 1) { /* if we are over the max count limit, do not send an * unpause message yet */ if (rptl->data.ob_curr_count > rptl->data.ob_max_count) continue; ret = PtlPTEnable(rptl->ni, rptl->data.pt); RPTLU_ERR_POP(ret, "Error returned while reenabling PT\n"); rptl->local_state = RPTL_LOCAL_STATE_ACTIVE; for (i = 0; i < rptl_info.world_size; i++) { if (i == MPIDI_Process.my_pg_rank) continue; mpi_errno = rptl_info.get_target_info(i, &id, rptl->data.pt, &data_pt, &control_pt); if (mpi_errno) { ret = PTL_FAIL; RPTLU_ERR_POP(ret, "Error getting target info\n"); } /* make sure the user setup a control portal */ assert(control_pt != PTL_PT_ANY); ret = rptl_put(rptl->md, 0, 0, PTL_NO_ACK_REQ, id, control_pt, 0, 0, NULL, RPTL_CONTROL_MSG_UNPAUSE, RPTL_PT_CONTROL); RPTLU_ERR_POP(ret, "Error sending unpause message\n"); } } } /* make progress on targets */ for (target = rptl_info.target_list; target; target = target->next) { if (target->state == RPTL_TARGET_STATE_RECEIVED_PAUSE) { for (op = target->data_op_list; op; op = op->next) if (op->state == RPTL_OP_STATE_ISSUED) break; if (op) continue; /* send a pause ack message */ assert(target->rptl); for (i = 0; i < rptl_info.world_size; i++) { if (i == MPIDI_Process.my_pg_rank) continue; /* find the target that has this target id and get the * control portal information for it */ mpi_errno = rptl_info.get_target_info(i, &id, target->rptl->data.pt, &data_pt, &control_pt); if (mpi_errno) { ret = PTL_FAIL; RPTLU_ERR_POP(ret, "Error getting target info\n"); } if (IDS_ARE_EQUAL(id, target->id)) break; } /* make sure the user setup a control portal */ assert(control_pt != PTL_PT_ANY); target->state = RPTL_TARGET_STATE_PAUSE_ACKED; ret = rptl_put(target->rptl->md, 0, 0, PTL_NO_ACK_REQ, id, control_pt, 0, 0, NULL, RPTL_CONTROL_MSG_PAUSE_ACK, RPTL_PT_CONTROL); RPTLU_ERR_POP(ret, "Error sending pause ack message\n"); continue; } /* issue out all the control messages first */ for (op = target->control_op_list; op; op = op->next) { assert(op->op_type == RPTL_OP_PUT); /* skip all the issued ops */ if (op->state == RPTL_OP_STATE_ISSUED) continue; /* we should not get any NACKs on the control portal */ assert(op->state != RPTL_OP_STATE_NACKED); if (rptl_info.origin_events_left < 2 || target->issued_data_ops > PER_TARGET_THRESHOLD) { /* too few origin events left. we can't issue this op * or any following op to this target in order to * maintain ordering */ break; } rptl_info.origin_events_left -= 2; target->issued_data_ops++; /* force request for an ACK even if the user didn't ask * for it. replace the user pointer with the OP id. */ ret = PtlPut(op->u.put.md_handle, op->u.put.local_offset, op->u.put.length, PTL_ACK_REQ, op->u.put.target_id, op->u.put.pt_index, op->u.put.match_bits, op->u.put.remote_offset, op, op->u.put.hdr_data); RPTLU_ERR_POP(ret, "Error issuing PUT\n"); op->state = RPTL_OP_STATE_ISSUED; } if (target->state == RPTL_TARGET_STATE_DISABLED || target->state == RPTL_TARGET_STATE_PAUSE_ACKED) continue; /* then issue out all the data messages */ for (op = target->data_op_list; op; op = op->next) { if (op->op_type == RPTL_OP_PUT) { /* skip all the issued ops */ if (op->state == RPTL_OP_STATE_ISSUED) continue; /* if an op has been nacked, don't issue anything else * to this target */ if (op->state == RPTL_OP_STATE_NACKED) break; if (rptl_info.origin_events_left < 2 || target->issued_data_ops > PER_TARGET_THRESHOLD) { /* too few origin events left. we can't issue * this op or any following op to this target in * order to maintain ordering */ break; } rptl_info.origin_events_left -= 2; target->issued_data_ops++; /* force request for an ACK even if the user didn't * ask for it. replace the user pointer with the OP * id. */ ret = PtlPut(op->u.put.md_handle, op->u.put.local_offset, op->u.put.length, PTL_ACK_REQ, op->u.put.target_id, op->u.put.pt_index, op->u.put.match_bits, op->u.put.remote_offset, op, op->u.put.hdr_data); RPTLU_ERR_POP(ret, "Error issuing PUT\n"); } else if (op->op_type == RPTL_OP_GET) { /* skip all the issued ops */ if (op->state == RPTL_OP_STATE_ISSUED) continue; /* if an op has been nacked, don't issue anything else * to this target */ if (op->state == RPTL_OP_STATE_NACKED) break; if (rptl_info.origin_events_left < 1 || target->issued_data_ops > PER_TARGET_THRESHOLD) { /* too few origin events left. we can't issue * this op or any following op to this target in * order to maintain ordering */ break; } rptl_info.origin_events_left--; target->issued_data_ops++; ret = PtlGet(op->u.get.md_handle, op->u.get.local_offset, op->u.get.length, op->u.get.target_id, op->u.get.pt_index, op->u.get.match_bits, op->u.get.remote_offset, op); RPTLU_ERR_POP(ret, "Error issuing GET\n"); } op->state = RPTL_OP_STATE_ISSUED; } } fn_exit: MPIDI_FUNC_EXIT(MPID_STATE_POKE_PROGRESS); return ret; fn_fail: goto fn_exit; }
int main(int argc, char *argv[]) { ptl_handle_ni_t ni_handle; ptl_process_t *procs; int rank; ptl_pt_index_t pt_index, signal_pt_index; HANDLE_T value_e_handle, signal_e_handle; int num_procs; ptl_handle_eq_t eq_handle; ptl_handle_ct_t ct_handle; ptl_handle_md_t md_handle; CHECK_RETURNVAL(PtlInit()); CHECK_RETURNVAL(libtest_init()); rank = libtest_get_rank(); num_procs = libtest_get_size(); if (num_procs < 2) { fprintf(stderr, "test_flowctl_noeq requires at least two processes\n"); return 77; } CHECK_RETURNVAL(PtlNIInit(PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL, PTL_PID_ANY, NULL, NULL, &ni_handle)); procs = libtest_get_mapping(ni_handle); CHECK_RETURNVAL(PtlSetMap(ni_handle, num_procs, procs)); if (0 == rank) { ENTRY_T value_e; /* create data ME */ CHECK_RETURNVAL(PtlEQAlloc(ni_handle, (num_procs - 1) * ITERS / 2, &eq_handle)); CHECK_RETURNVAL(PtlPTAlloc(ni_handle, PTL_PT_FLOWCTRL, eq_handle, 5, &pt_index)); value_e.start = NULL; value_e.length = 0; value_e.ct_handle = PTL_CT_NONE; value_e.uid = PTL_UID_ANY; value_e.options = OPTIONS; #if INTERFACE == 1 value_e.match_id.rank = PTL_RANK_ANY; value_e.match_bits = 0; value_e.ignore_bits = 0; #endif CHECK_RETURNVAL(APPEND(ni_handle, 5, &value_e, PTL_PRIORITY_LIST, NULL, &value_e_handle)); /* create signal ME */ CHECK_RETURNVAL(PtlCTAlloc(ni_handle, &ct_handle)); CHECK_RETURNVAL(PtlPTAlloc(ni_handle, 0, PTL_EQ_NONE, 6, &signal_pt_index)); value_e.start = NULL; value_e.length = 0; value_e.ct_handle = ct_handle; value_e.uid = PTL_UID_ANY; value_e.options = OPTIONS | PTL_LE_EVENT_SUCCESS_DISABLE | PTL_LE_EVENT_CT_COMM; #if INTERFACE == 1 value_e.match_id.rank = PTL_RANK_ANY; value_e.match_bits = 0; value_e.ignore_bits = 0; #endif CHECK_RETURNVAL(APPEND(ni_handle, 6, &value_e, PTL_PRIORITY_LIST, NULL, &signal_e_handle)); } else { ptl_md_t md; /* 16 extra just in case... */ CHECK_RETURNVAL(PtlEQAlloc(ni_handle, ITERS * 2 + 16, &eq_handle)); md.start = NULL; md.length = 0; md.options = 0; md.eq_handle = eq_handle; md.ct_handle = PTL_CT_NONE; CHECK_RETURNVAL(PtlMDBind(ni_handle, &md, &md_handle)); } libtest_barrier(); if (0 == rank) { ptl_ct_event_t ct; ptl_event_t ev; int ret, count = 0, saw_dropped = 0, saw_flowctl = 0; /* wait for signal counts */ CHECK_RETURNVAL(PtlCTWait(ct_handle, num_procs - 1, &ct)); if (ct.success != num_procs - 1 || ct.failure != 0) { return 1; } /* wait for event entries */ while (count < ITERS * (num_procs - 1)) { ret = PtlEQWait(eq_handle, &ev); if (PTL_OK == ret) { ; } else if (PTL_EQ_DROPPED == ret) { saw_dropped++; if (ev.type == PTL_EVENT_PT_DISABLED){ saw_flowctl++; CHECK_RETURNVAL(PtlPTEnable(ni_handle, pt_index)); } break; } else { fprintf(stderr, "0: Unexpected return code from EQWait: %d\n", ret); return 1; } if (ev.type == PTL_EVENT_PT_DISABLED) { CHECK_RETURNVAL(PtlPTEnable(ni_handle, pt_index)); saw_flowctl++; } else { count++; } } fprintf(stderr, "0: Saw %d dropped, %d flowctl\n", saw_dropped, saw_flowctl); if (saw_flowctl == 0) { return 1; } } else { ptl_process_t target; ptl_event_t ev; int ret, count = 0, fails = 0; int i; int *fail_seen; fail_seen = malloc(sizeof(int) * ITERS); if (NULL == fail_seen) { fprintf(stderr, "%d: malloc failed\n", rank); return 1; } memset(fail_seen, 0, sizeof(int) * ITERS); target.rank = 0; for (i = 0 ; i < ITERS ; ++i) { CHECK_RETURNVAL(PtlPut(md_handle, 0, 0, PTL_ACK_REQ, target, 5, 0, 0, (void*)(size_t)i, 0)); usleep(100); } while (count < ITERS) { ret = PtlEQGet(eq_handle, &ev); if (PTL_EQ_EMPTY == ret) { continue; } else if (PTL_OK != ret) { fprintf(stderr, "%d: PtlEQGet returned %d\n", rank, ret); return 1; } if (ev.ni_fail_type == PTL_NI_OK) { if (ev.type == PTL_EVENT_SEND) { continue; } else if (ev.type == PTL_EVENT_ACK) { count++; } else { fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type); } } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) { int iter = (size_t) ev.user_ptr; if (fail_seen[iter]++ > 0) { fprintf(stderr, "%d: Double report of PT_DISABLED for " "iteration %d\n", rank, iter); return 1; } count++; fails++; } else { fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type); return 1; } } fprintf(stderr, "%d: Saw %d of %d events as fails\n", rank, fails, count); CHECK_RETURNVAL(PtlPut(md_handle, 0, 0, PTL_NO_ACK_REQ, target, 6, 0, 0, NULL, 0)); /* wait for the send event on the last put */ CHECK_RETURNVAL(PtlEQWait(eq_handle, &ev)); while (fails > 0) { CHECK_RETURNVAL(PtlPut(md_handle, 0, 0, PTL_ACK_REQ, target, 5, 0, 0, NULL, 0)); while (1) { ret = PtlEQWait(eq_handle, &ev); if (PTL_OK != ret) { fprintf(stderr, "%d: PtlEQWait returned %d\n", rank, ret); return 1; } if (ev.ni_fail_type == PTL_NI_OK) { if (ev.type == PTL_EVENT_SEND) { continue; } else if (ev.type == PTL_EVENT_ACK) { fails--; break; } else { fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type); } } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) { break; } else { fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type); return 1; } } } } libtest_barrier(); if (0 == rank) { CHECK_RETURNVAL(UNLINK(signal_e_handle)); CHECK_RETURNVAL(PtlPTFree(ni_handle, signal_pt_index)); CHECK_RETURNVAL(PtlCTFree(ct_handle)); CHECK_RETURNVAL(UNLINK(value_e_handle)); CHECK_RETURNVAL(PtlPTFree(ni_handle, pt_index)); CHECK_RETURNVAL(PtlEQFree(eq_handle)); } else { CHECK_RETURNVAL(PtlMDRelease(md_handle)); CHECK_RETURNVAL(PtlEQFree(eq_handle)); } CHECK_RETURNVAL(PtlNIFini(ni_handle)); CHECK_RETURNVAL(libtest_fini()); PtlFini(); return 0; }