static void local_eviction_callback(int fd, short flags, void *arg) { opal_hotel_room_eviction_callback_arg_t *eargs = (opal_hotel_room_eviction_callback_arg_t*) arg; void *occupant = eargs->hotel->rooms[eargs->room_num].occupant; /* Remove the occupant from the room and invoke the user callback to tell them that they were evicted */ opal_hotel_checkout(eargs->hotel, eargs->room_num); eargs->hotel->evict_callback_fn(eargs->hotel, eargs->room_num, occupant); }
static void _mdxresp(int sd, short args, void *cbdata) { pmix_server_req_t *req = (pmix_server_req_t*)cbdata; int rc; opal_buffer_t *reply; /* check us out of the hotel */ opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num); reply = OBJ_NEW(opal_buffer_t); /* return the status */ if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &req->status, 1, OPAL_INT))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(reply); goto done; } /* pack the id of the requested proc */ if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &req->target, 1, OPAL_NAME))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(reply); goto done; } /* pack the remote daemon's request room number */ if (OPAL_SUCCESS != (rc = opal_dss.pack(reply, &req->remote_room_num, 1, OPAL_INT))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(reply); goto done; } /* return any provided data */ opal_dss.copy_payload(reply, &req->msg); /* send the response */ orte_rml.send_buffer_nb(&req->proxy, reply, ORTE_RML_TAG_DIRECT_MODEX_RESP, orte_rml_send_callback, NULL); done: /* if they asked for a release, give it to them */ if (NULL != req->rlcbfunc) { req->rlcbfunc(req->cbdata); } OBJ_RELEASE(req); return; }
static void execute(int sd, short args, void *cbdata) { pmix_server_req_t *req = (pmix_server_req_t*)cbdata; int rc; opal_buffer_t *xfer; /* add this request to our tracker hotel */ if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { ORTE_ERROR_LOG(rc); goto callback; } /* setup the xfer */ xfer = OBJ_NEW(opal_buffer_t); /* pack the room number */ if (OPAL_SUCCESS != (rc = opal_dss.pack(xfer, &req->room_num, 1, OPAL_INT))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(xfer); goto callback; } opal_dss.copy_payload(xfer, &req->msg); /* send the request to the target */ rc = orte_rml.send_buffer_nb(&req->target, xfer, ORTE_RML_TAG_DATA_SERVER, orte_rml_send_callback, NULL); if (ORTE_SUCCESS == rc) { return; } callback: /* execute the callback to avoid having the client hang */ if (NULL != req->opcbfunc) { req->opcbfunc(rc, req->cbdata); } else if (NULL != req->lkcbfunc) { req->lkcbfunc(rc, NULL, req->cbdata); } opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num); OBJ_RELEASE(req); }
/* * Force a retrans of a segment */ static void opal_btl_usnic_force_retrans( opal_btl_usnic_endpoint_t *endpoint, opal_btl_usnic_seq_t ack_seq) { opal_btl_usnic_send_segment_t *sseg; int is; is = WINDOW_SIZE_MOD(ack_seq+1); sseg = endpoint->endpoint_sent_segs[is]; if (sseg == NULL || sseg->ss_hotel_room == -1) { return; } /* cancel retrans timer */ opal_hotel_checkout(&endpoint->endpoint_hotel, sseg->ss_hotel_room); sseg->ss_hotel_room = -1; /* Queue up this segment to be resent */ opal_list_append(&(endpoint->endpoint_module->pending_resend_segs), &(sseg->ss_base.us_list.super)); ++endpoint->endpoint_module->stats.num_fast_retrans; }
/* * We have received an ACK for a given sequence number (either standalone * or via piggy-back on a regular send) */ void opal_btl_usnic_handle_ack( opal_btl_usnic_endpoint_t *endpoint, opal_btl_usnic_seq_t ack_seq) { opal_btl_usnic_seq_t is; opal_btl_usnic_send_segment_t *sseg; opal_btl_usnic_send_frag_t *frag; opal_btl_usnic_module_t *module; uint32_t bytes_acked; module = endpoint->endpoint_module; /* ignore if this is an old ACK */ if (SEQ_LT(ack_seq, endpoint->endpoint_ack_seq_rcvd)) { #if MSGDEBUG1 opal_output(0, "Got OLD DUP ACK seq %"UDSEQ" < %"UDSEQ"\n", ack_seq, endpoint->endpoint_ack_seq_rcvd); #endif ++module->stats.num_old_dup_acks; return; /* A duplicate ACK means next seg was lost */ } else if (ack_seq == endpoint->endpoint_ack_seq_rcvd) { ++module->stats.num_dup_acks; opal_btl_usnic_force_retrans(endpoint, ack_seq); return; } /* Does this ACK have a new sequence number that we haven't seen before? */ for (is = endpoint->endpoint_ack_seq_rcvd + 1; SEQ_LE(is, ack_seq); ++is) { sseg = endpoint->endpoint_sent_segs[WINDOW_SIZE_MOD(is)]; #if MSGDEBUG1 opal_output(0, " Checking ACK/sent_segs window %p, index %lu, seq %lu, occupied=%p, seg_room=%d", (void*) endpoint->endpoint_sent_segs, WINDOW_SIZE_MOD(is), is, (void*)sseg, (sseg?sseg->ss_hotel_room:-2)); #endif assert(sseg != NULL); assert(sseg->ss_base.us_btl_header->pkt_seq == is); #if MSGDEBUG1 if (sseg->ss_hotel_room == -1) { opal_output(0, "=== ACKed frag in sent_frags array is not in hotel/enqueued, module %p, endpoint %p, seg %p, seq %" UDSEQ ", slot %lu", (void*) module, (void*) endpoint, (void*) sseg, is, WINDOW_SIZE_MOD(is)); } #endif /* Check the sending segment out from the hotel. NOTE: The segment might not actually be in a hotel room if it has already been evicted and queued for resend. If it's not in the hotel, don't check it out! */ if (OPAL_LIKELY(sseg->ss_hotel_room != -1)) { opal_hotel_checkout(&endpoint->endpoint_hotel, sseg->ss_hotel_room); sseg->ss_hotel_room = -1; /* hotel_room == -1 means queued for resend, remove it */ } else { opal_list_remove_item((&module->pending_resend_segs), &sseg->ss_base.us_list.super); } /* update the owning fragment */ bytes_acked = sseg->ss_base.us_btl_header->payload_len; frag = sseg->ss_parent_frag; #if MSGDEBUG1 opal_output(0, " ACKED seg %p frag %p ack_bytes=%"PRIu32" left=%zd dst_seg[0].seg_addr=%p des_flags=0x%x\n", (void*)sseg, (void*)frag, bytes_acked, frag->sf_ack_bytes_left - bytes_acked, frag->sf_base.uf_local_seg[0].seg_addr.pval, frag->sf_base.uf_base.des_flags); #endif /* If all ACKs received, and this is a put or a regular send * that needs a callback, perform the callback now * * NOTE on sf_ack_bytes_left - here we check for * sf_ack_bytes_left == bytes_acked * as opposed to adjusting sf_ack_bytes_left and checking for 0 because * if we don't, the callback function may call usnic_free() and free * the fragment out from under us which we do not want. If the * fragment really needs to be freed, we'll take care of it in a few * lines below. */ if (frag->sf_ack_bytes_left == bytes_acked && ((frag->sf_base.uf_remote_seg[0].seg_addr.pval != NULL) || (frag->sf_base.uf_base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK))) { OPAL_BTL_USNIC_DO_SEND_FRAG_CB(module, frag, "send completion"); } /* free this segment */ sseg->ss_ack_pending = false; if (sseg->ss_send_posted == 0) { opal_btl_usnic_release_send_segment(module, frag, sseg); } /* when no bytes left to ACK, fragment send is truly done */ /* see note above on why this is done here as opposed to earlier */ frag->sf_ack_bytes_left -= bytes_acked; /* OK to return this fragment? */ opal_btl_usnic_send_frag_return_cond(module, frag); /* indicate this segment has been ACKed */ endpoint->endpoint_sent_segs[WINDOW_SIZE_MOD(is)] = NULL; } /* update ACK received */ endpoint->endpoint_ack_seq_rcvd = ack_seq; /* send window may have opened, possibly make endpoint ready-to-send */ opal_btl_usnic_check_rts(endpoint); }
static void pmix_server_dmdx_resp(int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tg, void *cbdata) { int rc, ret, room_num, rnum; int32_t cnt; opal_process_name_t target; pmix_server_req_t *req; datacaddy_t *d; opal_output_verbose(2, orte_pmix_server_globals.output, "%s dmdx:recv response from proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender)); /* unpack the status */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ret, &cnt, OPAL_INT))) { ORTE_ERROR_LOG(rc); return; } /* unpack the id of the target whose info we just received */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &target, &cnt, OPAL_NAME))) { ORTE_ERROR_LOG(rc); return; } /* unpack our tracking room number */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &room_num, &cnt, OPAL_INT))) { ORTE_ERROR_LOG(rc); return; } /* unload the remainder of the buffer */ d = OBJ_NEW(datacaddy_t); if (OPAL_SUCCESS != (rc = opal_dss.unload(buffer, (void**)&d->data, &d->ndata))) { ORTE_ERROR_LOG(rc); return; } /* check the request out of the tracking hotel */ opal_hotel_checkout_and_return_occupant(&orte_pmix_server_globals.reqs, room_num, (void**)&req); /* return the returned data to the requestor */ if (NULL != req) { if (NULL != req->mdxcbfunc) { OBJ_RETAIN(d); req->mdxcbfunc(ret, d->data, d->ndata, req->cbdata, relcbfunc, d); } OBJ_RELEASE(req); } /* now see if anyone else was waiting for data from this target */ for (rnum=0; rnum < orte_pmix_server_globals.reqs.num_rooms; rnum++) { opal_hotel_knock(&orte_pmix_server_globals.reqs, rnum, (void**)&req); if (NULL == req) { continue; } if (req->target.jobid == target.jobid && req->target.vpid == target.vpid) { if (NULL != req->mdxcbfunc) { OBJ_RETAIN(d); req->mdxcbfunc(ret, d->data, d->ndata, req->cbdata, relcbfunc, d); } opal_hotel_checkout(&orte_pmix_server_globals.reqs, rnum); OBJ_RELEASE(req); } } OBJ_RELEASE(d); // maintain accounting }
static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tg, void *cbdata) { int rc, room_num; int32_t cnt; opal_process_name_t idreq; orte_process_name_t name; orte_job_t *jdata; orte_proc_t *proc; pmix_server_req_t *req; /* unpack the id of the proc whose data is being requested */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &idreq, &cnt, OPAL_NAME))) { ORTE_ERROR_LOG(rc); return; } opal_output_verbose(2, orte_pmix_server_globals.output, "%s dmdx:recv request from proc %s for proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender), ORTE_NAME_PRINT(&idreq)); /* and the remote daemon's tracking room number */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &room_num, &cnt, OPAL_INT))) { ORTE_ERROR_LOG(rc); return; } /* is this proc one of mine? */ memcpy((char*)&name, (char*)&idreq, sizeof(orte_process_name_t)); if (NULL == (jdata = orte_get_job_data_object(name.jobid))) { /* not having the jdata means that we haven't unpacked the * the launch message for this job yet - this is a race * condition, so just log the request and we will fill * it later */ req = OBJ_NEW(pmix_server_req_t); req->proxy = *sender; req->target = idreq; req->remote_room_num = room_num; if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { OBJ_RELEASE(req); send_error(rc, &idreq, sender); } return; } if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, name.vpid))) { /* this is truly an error, so notify the sender */ send_error(ORTE_ERR_NOT_FOUND, &idreq, sender); return; } if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_LOCAL)) { /* send back an error - they obviously have made a mistake */ send_error(ORTE_ERR_NOT_FOUND, &idreq, sender); return; } /* track the request since the call down to the PMIx server * is asynchronous */ req = OBJ_NEW(pmix_server_req_t); req->proxy = *sender; req->target = idreq; req->remote_room_num = room_num; if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { OBJ_RELEASE(req); send_error(rc, &idreq, sender); return; } /* ask our local pmix server for the data */ if (OPAL_SUCCESS != (rc = opal_pmix.server_dmodex_request(&idreq, modex_resp, req))) { ORTE_ERROR_LOG(rc); opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num); OBJ_RELEASE(req); send_error(rc, &idreq, sender); return; } return; }