static void eviction_cbfunc(struct opal_hotel_t *hotel, int room_num, void *occupant) { pmix_server_req_t *req = (pmix_server_req_t*)occupant; bool timeout = false; int rc; /* decrement the request timeout */ req->timeout -= orte_pmix_server_globals.timeout; if (req->timeout > 0) { req->timeout -= orte_pmix_server_globals.timeout; if (0 >= req->timeout) { timeout = true; } } if (!timeout) { /* not done yet - check us back in */ if (OPAL_SUCCESS == (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { return; } ORTE_ERROR_LOG(rc); /* fall thru and return an error so the caller doesn't hang */ } /* don't let the caller hang */ if (NULL != req->opcbfunc) { req->opcbfunc(OPAL_ERR_TIMEOUT, req->cbdata); } else if (NULL != req->mdxcbfunc) { req->mdxcbfunc(OPAL_ERR_TIMEOUT, NULL, 0, req->cbdata, NULL, NULL); } else if (NULL != req->spcbfunc) { req->spcbfunc(OPAL_ERR_TIMEOUT, ORTE_JOBID_INVALID, req->cbdata); } else if (NULL != req->lkcbfunc) { req->lkcbfunc(OPAL_ERR_TIMEOUT, NULL, req->cbdata); } OBJ_RELEASE(req); }
static int ack_send ( void *qos_channel, orte_rml_send_t *msg) { int32_t room_num; orte_qos_ack_channel_t *ack_chan = (orte_qos_ack_channel_t*) (qos_channel); if (ack_chan->out_msg_seq_num == ack_chan->window_first_seq_num -1 ) { /* begining msg window */ ack_chan->out_msg_seq_num = ack_chan->window_first_seq_num; OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "%s ack_send msg = %p to peer = %s\n begining window at seq_num = %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void*)msg, ORTE_NAME_PRINT(&msg->dst), ack_chan->out_msg_seq_num)); ack_chan->state = orte_qos_ack_channel_state_filling_window; } else ack_chan->out_msg_seq_num++; if(ack_chan->out_msg_seq_num - ack_chan->window_first_seq_num == ack_chan->window - 1) { /* we are at the end of the window. */ /* update state */ ack_chan->state = orte_qos_ack_channel_state_window_completed; /* set begin window for next sequence */ ack_chan->window_first_seq_num = ack_chan->out_msg_seq_num + 1; } msg->seq_num = ack_chan->out_msg_seq_num; /* check msg into hotel */ if( OPAL_SUCCESS == (opal_hotel_checkin(&ack_chan->outstanding_msgs, msg, &room_num ))) { /* store room number */ orte_qos_ack_channel_set_msg_room(ack_chan, msg->seq_num, room_num); } else { OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "%s ack_send msg = %p to peer = %s returned with error %d\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void*)msg, ORTE_NAME_PRINT(&msg->dst), ORTE_ERR_QOS_ACK_WINDOW_FULL)); return ORTE_ERR_QOS_ACK_WINDOW_FULL; } OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "%s ack_send msg = %p to peer = %s\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void*)msg, ORTE_NAME_PRINT(&msg->dst))); return ORTE_SUCCESS; }
static void execute(int sd, short args, void *cbdata) { pmix_server_req_t *req = (pmix_server_req_t*)cbdata; int rc; opal_buffer_t *xfer; /* add this request to our tracker hotel */ if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { ORTE_ERROR_LOG(rc); goto callback; } /* setup the xfer */ xfer = OBJ_NEW(opal_buffer_t); /* pack the room number */ if (OPAL_SUCCESS != (rc = opal_dss.pack(xfer, &req->room_num, 1, OPAL_INT))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(xfer); goto callback; } opal_dss.copy_payload(xfer, &req->msg); /* send the request to the target */ rc = orte_rml.send_buffer_nb(&req->target, xfer, ORTE_RML_TAG_DATA_SERVER, orte_rml_send_callback, NULL); if (ORTE_SUCCESS == rc) { return; } callback: /* execute the callback to avoid having the client hang */ if (NULL != req->opcbfunc) { req->opcbfunc(rc, req->cbdata); } else if (NULL != req->lkcbfunc) { req->lkcbfunc(rc, NULL, req->cbdata); } opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num); OBJ_RELEASE(req); }
void orte_qos_ack_channel_process_ack (int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata) { /* process ack received for the msg */ uint32_t num_msgs_acked, channel_num, i; int32_t num_values, room_num; orte_rml_send_t *msg, *missed_msg; void *occupant = NULL; orte_rml_channel_t *channel; orte_qos_ack_channel_t *ack_chan; uint32_t *seq_num_array; uint32_t ack_type; uint32_t missed_msg_seq_num = 0; num_values = 1; /* unpack channel number first */ opal_dss.unpack(buffer, (void*) &channel_num, &num_values, OPAL_UINT32); OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, "orte_qos_ack_channel_process_ack recieved ack on channel = %d", channel_num)); channel = orte_rml_base_get_channel (channel_num); if ((NULL != channel) || (NULL != channel->qos_channel_ptr)) { ack_chan = (orte_qos_ack_channel_t *) (channel->qos_channel_ptr); seq_num_array = malloc (sizeof(uint32_t) * ack_chan->window); num_values = 1; /* unpack ack type */ opal_dss.unpack(buffer, (void*) &ack_type, &num_values, OPAL_UINT32); num_values = 1; /* unpack num messages acked */ opal_dss.unpack(buffer, (void*) &num_msgs_acked, &num_values, OPAL_UINT32); OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, "orte_qos_ack_channel_process_ack recieved ack type %d for %d msgs on channel = %d", ack_type, num_msgs_acked, channel_num)); if (ACK_OUT_OF_ORDER != ack_type) { //handle normal ACK for (i = 0; i < num_msgs_acked; i++) { opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32); room_num = orte_qos_ack_channel_get_msg_room (ack_chan, seq_num_array[i]); opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant); orte_qos_ack_channel_set_msg_room(ack_chan, seq_num_array[i], -1); if((occupant != NULL) && (room_num != -1)) { msg = (orte_rml_send_t*) occupant; OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, "Releasing sent message with tag %d and seq_num %d after receiving Ack from dest ", msg->tag, msg->seq_num )); msg->status = ORTE_SUCCESS; ORTE_RML_SEND_COMPLETE(msg); } else { OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, "OOPS received an ACK for already completed seq_num =%d ", seq_num_array[i] )); } } } else { // handle out of order ACK - complete msgs received in order, retry the lost msg. for (i = 0; i < num_msgs_acked; i++) { opal_dss.unpack(buffer, (void*) &seq_num_array[i], &num_values, OPAL_UINT32); room_num = orte_qos_ack_channel_get_msg_room (ack_chan, seq_num_array[i]); opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant); orte_qos_ack_channel_set_msg_room(ack_chan, seq_num_array[i], -1); if ((NULL != occupant) && ((i == 0 )|| (seq_num_array[i] == seq_num_array[i-1] +1 ))) { msg = (orte_rml_send_t*) occupant; msg->status = ORTE_SUCCESS; ORTE_RML_SEND_COMPLETE(msg); } else { if (NULL != occupant) { // num_missed_msgs = (seq_num_array[i] - seq_num_array [i-1] - 1); assert( i == num_msgs_acked -1); /* recheck the ith msg */ opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)occupant, &room_num); orte_qos_ack_channel_set_msg_room (ack_chan, seq_num_array[i], room_num); /* resend and recheck all the missed msgs*/ missed_msg_seq_num = seq_num_array[i-1] + 1; for (; missed_msg_seq_num < seq_num_array[i]; missed_msg_seq_num++) { room_num = orte_qos_ack_channel_get_msg_room (ack_chan, missed_msg_seq_num); opal_hotel_checkout_and_return_occupant (&ack_chan->outstanding_msgs, room_num, &occupant); assert ( NULL != occupant); missed_msg = (orte_rml_send_t*) occupant; missed_msg->status = ORTE_ERR_LOST_MSG_IN_WINDOW; opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)missed_msg, &room_num); orte_qos_ack_channel_set_msg_room (ack_chan, missed_msg_seq_num, room_num); /* send this out on wire directly */ ORTE_OOB_SEND (missed_msg); } //end for } else { OPAL_OUTPUT_VERBOSE((10, orte_rml_base_framework.framework_output, "OOPS received an ACK for already completed seq_num =%d ", seq_num_array[i] )); }//end if (NULL != occupant) } //end else } // end for }//end out of order ack processing free(seq_num_array); }else { OPAL_OUTPUT_VERBOSE((5, orte_qos_base_framework.framework_output, "orte_qos_ack_channel_msg_ack_recv_callback recieved ack on non existent channel = %d", channel_num)); } }
static inline int process_out_of_order_msg ( orte_qos_ack_channel_t *ack_chan, orte_rml_recv_t *msg) { int32_t rc, room_num, first_lost_msg_seq_num, num_lost_msgs, i; orte_rml_recv_t *out_msg; void *occupant = NULL; OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "%s process_out_of_order_msg msg %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg->seq_num)); /* if this msg is a duplicate - then do nothing */ if ((orte_qos_ack_channel_get_msg_room(ack_chan, msg->seq_num)) != -1) { OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "%s process_out_of_order_msg msg %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg->seq_num)); rc = ORTE_ERR_DUPLICATE_MSG; } else { if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&ack_chan->outstanding_msgs, (void*)msg, &room_num))) { return rc; } OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "process_out_of_order_msg checked in msg %d in room %d\n", msg->seq_num, room_num)); orte_qos_ack_channel_set_msg_room (ack_chan, msg->seq_num, room_num); rc = ORTE_ERR_OUT_OF_ORDER_MSG; /* check if we need to send an ACK */ if (ack_chan->ack_msg_seq_num <= ack_chan->in_msg_seq_num) { OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "%s process_out_of_order_msg sending ack last seq_num = %d\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg->seq_num)); /* send ACK. */ send_ack (ack_chan, msg->channel_num, ACK_OUT_OF_ORDER, msg->seq_num); /* stop window ack timer */ opal_event_evtimer_del (&ack_chan->msg_ack_timer_event); } else { /* if we got a lost msg - any seq num between in_msg_seq_num and ack_seq_num*/ if (ack_chan->ack_msg_seq_num > msg->seq_num) { /* check if we have got all lost msgs */ first_lost_msg_seq_num = ack_chan->in_msg_seq_num + 1; num_lost_msgs = ack_chan->ack_msg_seq_num - ack_chan->in_msg_seq_num; OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "%s process_out_of_order_msg msg %d first_lost_msg =%d num_lost_msgs =%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg->seq_num, first_lost_msg_seq_num, num_lost_msgs)); for (i =0 ; i < num_lost_msgs; i++) { if ((orte_qos_ack_channel_get_msg_room(ack_chan, first_lost_msg_seq_num +i)) == -1) break; } if (i == num_lost_msgs) { /* we got all the lost msgs so we can complete all the msgs in the hotel now */ /* reset ack_seq_num */ ack_chan->ack_msg_seq_num = first_lost_msg_seq_num -1; room_num = 0; for ( i = 0; room_num != -1; i++) { OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "%s process_out_of_order_msg got all lost msgs completing outstanding msgs %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (first_lost_msg_seq_num + i))); /* evict msg and complete it */ room_num = orte_qos_ack_channel_get_msg_room (ack_chan, first_lost_msg_seq_num +i); opal_hotel_checkout_and_return_occupant(&ack_chan->outstanding_msgs, room_num, &occupant); orte_qos_ack_channel_set_msg_room(ack_chan, first_lost_msg_seq_num +i, -1); out_msg = (orte_rml_recv_t *) occupant; if ((NULL != out_msg) && (room_num != -1)) { // set in seq num */ ack_chan->in_msg_seq_num = out_msg->seq_num; orte_rml_base_complete_recv_msg(&out_msg); /* completing recv msg to rml */ OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "process_out_of_order_msg completed recv msg %d", (first_lost_msg_seq_num + i))); } else { OPAL_OUTPUT_VERBOSE((1, orte_qos_base_framework.framework_output, "%s process_out_of_order_msg lost msg %d not in hotel", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (first_lost_msg_seq_num + i))); } } //end for /* send ACK */ send_ack (ack_chan, ack_chan->channel_num, ACK_RECV_MISSED_MSG, ack_chan->in_msg_seq_num); } //end if (i== num_lost_msgs) } // if (ack_chan->ack_msg_seq_num > msg->seq_num) } //end else } // end duplicate else return rc; }
static void pmix_server_dmdx_recv(int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tg, void *cbdata) { int rc, room_num; int32_t cnt; opal_process_name_t idreq; orte_process_name_t name; orte_job_t *jdata; orte_proc_t *proc; pmix_server_req_t *req; /* unpack the id of the proc whose data is being requested */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &idreq, &cnt, OPAL_NAME))) { ORTE_ERROR_LOG(rc); return; } opal_output_verbose(2, orte_pmix_server_globals.output, "%s dmdx:recv request from proc %s for proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender), ORTE_NAME_PRINT(&idreq)); /* and the remote daemon's tracking room number */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &room_num, &cnt, OPAL_INT))) { ORTE_ERROR_LOG(rc); return; } /* is this proc one of mine? */ memcpy((char*)&name, (char*)&idreq, sizeof(orte_process_name_t)); if (NULL == (jdata = orte_get_job_data_object(name.jobid))) { /* not having the jdata means that we haven't unpacked the * the launch message for this job yet - this is a race * condition, so just log the request and we will fill * it later */ req = OBJ_NEW(pmix_server_req_t); req->proxy = *sender; req->target = idreq; req->remote_room_num = room_num; if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { OBJ_RELEASE(req); send_error(rc, &idreq, sender); } return; } if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, name.vpid))) { /* this is truly an error, so notify the sender */ send_error(ORTE_ERR_NOT_FOUND, &idreq, sender); return; } if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_LOCAL)) { /* send back an error - they obviously have made a mistake */ send_error(ORTE_ERR_NOT_FOUND, &idreq, sender); return; } /* track the request since the call down to the PMIx server * is asynchronous */ req = OBJ_NEW(pmix_server_req_t); req->proxy = *sender; req->target = idreq; req->remote_room_num = room_num; if (OPAL_SUCCESS != (rc = opal_hotel_checkin(&orte_pmix_server_globals.reqs, req, &req->room_num))) { OBJ_RELEASE(req); send_error(rc, &idreq, sender); return; } /* ask our local pmix server for the data */ if (OPAL_SUCCESS != (rc = opal_pmix.server_dmodex_request(&idreq, modex_resp, req))) { ORTE_ERROR_LOG(rc); opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num); OBJ_RELEASE(req); send_error(rc, &idreq, sender); return; } return; }