static int udp_recv_buffer(orte_process_name_t *name, orte_rmcast_channel_t channel, orte_rmcast_tag_t tag, orte_rmcast_seq_t *seq_num, opal_buffer_t *buf) { rmcast_base_recv_t *recvptr; int ret; orte_rmcast_channel_t chan; ORTE_ACQUIRE_THREAD(&ctl); if (!comm_enabled) { ORTE_RELEASE_THREAD(&ctl); return ORTE_ERR_COMM_DISABLED; } OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s rmcast:udp: recv_buffer called on multicast channel %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), channel)); if (ORTE_RMCAST_GROUP_INPUT_CHANNEL == channel) { chan = orte_rmcast_base.my_input_channel->channel; } else if (ORTE_RMCAST_GROUP_OUTPUT_CHANNEL == channel) { chan = orte_rmcast_base.my_output_channel->channel; } else { chan = channel; } if (ORTE_SUCCESS != (ret = orte_rmcast_base_queue_recv(&recvptr, chan, tag, ORTE_RMCAST_NON_PERSISTENT, NULL, NULL, NULL, true))) { ORTE_ERROR_LOG(ret); ORTE_RELEASE_THREAD(&ctl); return ret; } ORTE_RELEASE_THREAD(&ctl); recvptr->ctl.active = true; ORTE_ACQUIRE_THREAD(&recvptr->ctl); /* xfer the data */ if (NULL != name) { /* caller requested id of sender */ name->jobid = recvptr->name.jobid; name->vpid = recvptr->name.vpid; ORTE_EPOCH_SET(name->epoch,recvptr->name.epoch); } *seq_num = recvptr->seq_num; if (ORTE_SUCCESS != (ret = opal_dss.copy_payload(buf, recvptr->buf))) { ORTE_ERROR_LOG(ret); } /* release the data */ OBJ_RELEASE(recvptr); return ret; }
static int udp_recv(orte_process_name_t *name, orte_rmcast_channel_t channel, orte_rmcast_tag_t tag, orte_rmcast_seq_t *seq_num, struct iovec **msg, int *count) { rmcast_base_recv_t *recvptr; int ret; orte_rmcast_channel_t chan; ORTE_ACQUIRE_THREAD(&ctl); if (!comm_enabled) { ORTE_RELEASE_THREAD(&ctl); return ORTE_ERR_COMM_DISABLED; } if (ORTE_RMCAST_GROUP_INPUT_CHANNEL == channel) { chan = orte_rmcast_base.my_input_channel->channel; } else if (ORTE_RMCAST_GROUP_OUTPUT_CHANNEL == channel) { chan = orte_rmcast_base.my_output_channel->channel; } else { chan = channel; } if (ORTE_SUCCESS != (ret = orte_rmcast_base_queue_recv(&recvptr, chan, tag, ORTE_RMCAST_NON_PERSISTENT, NULL, NULL, NULL, true))) { ORTE_ERROR_LOG(ret); ORTE_RELEASE_THREAD(&ctl); return ret; } ORTE_RELEASE_THREAD(&ctl); recvptr->ctl.active = true; ORTE_ACQUIRE_THREAD(&recvptr->ctl); /* xfer the data */ if (NULL != name) { /* caller requested id of sender */ name->jobid = recvptr->name.jobid; name->vpid = recvptr->name.vpid; ORTE_EPOCH_SET(name->epoch,recvptr->name.epoch); } *seq_num = recvptr->seq_num; *msg = recvptr->iovec_array; *count = recvptr->iovec_count; /* carefully release the recv */ recvptr->iovec_array = NULL; recvptr->iovec_count = 0; OBJ_RELEASE(recvptr); return ORTE_SUCCESS; }
static void enable_comm(void) { ORTE_ACQUIRE_THREAD(&ctl); orte_rmcast_base_start_threads(); comm_enabled = true; ORTE_RELEASE_THREAD(&ctl); }
static void disable_comm(void) { ORTE_ACQUIRE_THREAD(&ctl); comm_enabled = false; orte_rmcast_base_stop_threads(); ORTE_RELEASE_THREAD(&ctl); }
static int udp_send(orte_rmcast_channel_t channel, orte_rmcast_tag_t tag, struct iovec *msg, int count) { rmcast_base_send_t snd; int ret; ORTE_ACQUIRE_THREAD(&ctl); if (!comm_enabled) { ORTE_RELEASE_THREAD(&ctl); return ORTE_ERR_COMM_DISABLED; } /* queue it to be sent - preserves order! */ OBJ_CONSTRUCT(&snd, rmcast_base_send_t); snd.iovec_array = msg; snd.iovec_count = count; snd.tag = tag; if (ORTE_SUCCESS != (ret = send_data(&snd, channel))) { ORTE_ERROR_LOG(ret); } /* carefully release the send */ snd.iovec_array = NULL; snd.iovec_count = 0; OBJ_DESTRUCT(&snd); ORTE_RELEASE_THREAD(&ctl); return ret; }
static int udp_send_buffer_nb(orte_rmcast_channel_t channel, orte_rmcast_tag_t tag, opal_buffer_t *buf, orte_rmcast_callback_buffer_fn_t cbfunc, void *cbdata) { int ret; rmcast_base_send_t snd; ORTE_ACQUIRE_THREAD(&ctl); if (!comm_enabled) { ORTE_RELEASE_THREAD(&ctl); return ORTE_ERR_COMM_DISABLED; } /* queue it to be sent - preserves order! */ OBJ_CONSTRUCT(&snd, rmcast_base_send_t); snd.buf = buf; snd.tag = tag; snd.cbfunc_buffer = cbfunc; snd.cbdata = cbdata; if (ORTE_SUCCESS != (ret = send_data(&snd, channel))) { ORTE_ERROR_LOG(ret); } /* carefully release the send */ snd.buf = NULL; OBJ_DESTRUCT(&snd); ORTE_RELEASE_THREAD(&ctl); return ret; }
static int udp_recv_buffer_nb(orte_rmcast_channel_t channel, orte_rmcast_tag_t tag, orte_rmcast_flag_t flags, orte_rmcast_callback_buffer_fn_t cbfunc, void *cbdata) { orte_rmcast_channel_t chan; int ret; OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s rmcast:udp: recv_buffer_nb called on multicast channel %d tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), channel, tag)); ORTE_ACQUIRE_THREAD(&ctl); if (ORTE_RMCAST_GROUP_INPUT_CHANNEL == channel) { chan = orte_rmcast_base.my_input_channel->channel; } else if (ORTE_RMCAST_GROUP_OUTPUT_CHANNEL == channel) { chan = orte_rmcast_base.my_output_channel->channel; } else { chan = channel; } if (ORTE_SUCCESS != (ret = orte_rmcast_base_queue_recv(NULL, chan, tag, flags, NULL, cbfunc, cbdata, false))) { if (ORTE_EXISTS == ret) { ret = ORTE_SUCCESS; } else { ORTE_ERROR_LOG(ret); } } ORTE_RELEASE_THREAD(&ctl); return ret; }
void orte_rmcast_base_stop_threads(void) { opal_buffer_t *msg=NULL; OPAL_OUTPUT_VERBOSE((5, orte_rmcast_base.rmcast_output, "%s rmcast:base: stopping recv processing thread", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_ACQUIRE_THREAD(&orte_rmcast_base.recv_process_ctl); if (orte_rmcast_base.recv_process_ctl.running) { ORTE_RELEASE_THREAD(&orte_rmcast_base.recv_process_ctl); opal_fd_write(orte_rmcast_base.recv_pipe[1], sizeof(opal_buffer_t*), &msg); opal_thread_join(&orte_rmcast_base.recv_process, NULL); ORTE_ACQUIRE_THREAD(&orte_rmcast_base.recv_process_ctl); } ORTE_RELEASE_THREAD(&orte_rmcast_base.recv_process_ctl); OPAL_OUTPUT_VERBOSE((5, orte_rmcast_base.rmcast_output, "%s rmcast:base: all threads stopped", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); }
static void* rcv_processing_thread(opal_object_t *obj) { orte_rmcast_msg_t *msg; int rc; struct timespec tp={0, 10}; OPAL_OUTPUT_VERBOSE((5, orte_rmcast_base.rmcast_output, "%s rmcast:base: recv processing thread operational", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_ACQUIRE_THREAD(&orte_rmcast_base.recv_process_ctl); orte_rmcast_base.recv_process_ctl.running = true; ORTE_RELEASE_THREAD(&orte_rmcast_base.recv_process_ctl); while (1) { /* block here until a trigger arrives */ if (0 > (rc = opal_fd_read(orte_rmcast_base.recv_pipe[0], sizeof(orte_rmcast_msg_t*), &msg))) { /* if something bad happened, punt */ opal_output(0, "%s PUNTING THREAD", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_ACQUIRE_THREAD(&orte_rmcast_base.recv_process_ctl); orte_rmcast_base.recv_process_ctl.running = false; ORTE_RELEASE_THREAD(&orte_rmcast_base.recv_process_ctl); /* give a little delay to ensure the main thread gets into * opal_thread_join before we exit */ nanosleep(&tp, NULL); return OPAL_THREAD_CANCELLED; } /* check to see if we were told to stop */ if (NULL == msg) { ORTE_ACQUIRE_THREAD(&orte_rmcast_base.recv_process_ctl); orte_rmcast_base.recv_process_ctl.running = false; ORTE_RELEASE_THREAD(&orte_rmcast_base.recv_process_ctl); return OPAL_THREAD_CANCELLED; } /* process it - processing function releases the msg */ orte_rmcast.process_msg(msg); } }
static int open_channel(const char *app, const char *version, const char *release, orte_jobid_t jobid, orcm_pnp_open_channel_cbfunc_t cbfunc) { orcm_triplet_t *triplet; orcm_triplet_group_t *grp; orcm_pnp_request_t *request; opal_list_item_t *item; int i, rc; bool done; if (NULL == cbfunc) { /* makes no sense */ ORTE_ERROR_LOG(ORCM_ERR_BAD_PARAM); return ORCM_ERR_BAD_PARAM; } OPAL_OUTPUT_VERBOSE((2, orcm_pnp_base.output, "%s pnp:default:open_channel for %s:%s:%s job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == app) ? "NULL" : app, (NULL == version) ? "NULL" : version, (NULL == release) ? "NULL": release, ORTE_JOBID_PRINT(jobid))); /* protect the global arrays */ ORTE_ACQUIRE_THREAD(&local_thread); /* see if we already know this triplet - automatically * creates it if not */ triplet = orcm_get_triplet(app, version, release, true); /* record the policy */ triplet->pnp_cb_policy = jobid; triplet->pnp_cbfunc = cbfunc; /* if the jobid is wildcard, we execute the callback for every group */ if (ORTE_JOBID_WILDCARD == jobid) { /* cycle thru this triplet's known groups */ for (i=0; i < triplet->groups.size; i++) { if (NULL == (grp = (orcm_triplet_group_t*)opal_pointer_array_get_item(&triplet->groups, i))) { continue; } grp->pnp_cbfunc = cbfunc; if (ORCM_PNP_INVALID_CHANNEL != grp->input) { /* if the user requested a callback, they probably intend to send * something to this triplet - so ensure the channel to its input is open. * No need to release threads first as this call cannot result in callbacks */ if (ORTE_SUCCESS != (rc = orte_rmcast.open_channel(grp->input, triplet->string_id, NULL, -1, NULL, ORTE_RMCAST_XMIT))) { ORTE_ERROR_LOG(rc); ORTE_RELEASE_THREAD(&triplet->ctl); continue; } /* release the threads before doing the callback in * case the caller sends messages */ ORTE_RELEASE_THREAD(&triplet->ctl); ORTE_RELEASE_THREAD(&local_thread); cbfunc(app, version, release, grp->input); /* reacquire the threads */ ORTE_ACQUIRE_THREAD(&local_thread); ORTE_ACQUIRE_THREAD(&triplet->ctl); /* flag that this group has executed its callback */ grp->pnp_cbfunc = NULL; } } /* release the threads */ ORTE_RELEASE_THREAD(&triplet->ctl); ORTE_RELEASE_THREAD(&local_thread); return ORCM_SUCCESS; } if (ORTE_JOBID_INVALID == jobid) { /* see if we have know about any group with this triplet */ done = false; for (i=0; i < triplet->groups.size; i++) { if (NULL == (grp = (orcm_triplet_group_t*)opal_pointer_array_get_item(&triplet->groups, i))) { continue; } grp->pnp_cbfunc = cbfunc; if (ORCM_PNP_INVALID_CHANNEL != grp->input) { /* if the user requested a callback, they probably intend to send * something to this triplet - so ensure the channel to its input is open. * No need to release threads first as this call cannot result in callbacks */ if (ORTE_SUCCESS != (rc = orte_rmcast.open_channel(grp->input, triplet->string_id, NULL, -1, NULL, ORTE_RMCAST_XMIT))) { ORTE_ERROR_LOG(rc); continue; } /* flag that we already did the callback so we don't do it again */ done = true; ORTE_RELEASE_THREAD(&triplet->ctl); ORTE_RELEASE_THREAD(&local_thread); cbfunc(app, version, release, grp->input); /* reacquire the threads */ ORTE_ACQUIRE_THREAD(&local_thread); ORTE_ACQUIRE_THREAD(&triplet->ctl); break; } } /* if we did the callback, remove any remaining cbfunc entries to ensure * that we only do this once for the triplet */ if (done) { for (i=0; i < triplet->groups.size; i++) { if (NULL == (grp = (orcm_triplet_group_t*)opal_pointer_array_get_item(&triplet->groups, i))) { continue; } grp->pnp_cbfunc = NULL; } } /* release the threads */ ORTE_RELEASE_THREAD(&triplet->ctl); ORTE_RELEASE_THREAD(&local_thread); return ORCM_SUCCESS; } /* left with the case of a specific jobid - record the policy */ done = false; for (i=0; i < triplet->groups.size; i++) { if (NULL == (grp = (orcm_triplet_group_t*)opal_pointer_array_get_item(&triplet->groups, i))) { continue; } if (grp->jobid == jobid) { /* found the group */ grp->pnp_cbfunc = cbfunc; done = true; /* flag that we found the group */ if (ORCM_PNP_INVALID_CHANNEL != grp->input) { /* if the user requested a callback, they probably intend to send * something to this triplet - so ensure the channel to its input is open. * No need to release threads first as this call cannot result in callbacks */ if (ORTE_SUCCESS != (rc = orte_rmcast.open_channel(grp->input, triplet->string_id, NULL, -1, NULL, ORTE_RMCAST_XMIT))) { ORTE_ERROR_LOG(rc); continue; } /* release the threads before doing the callback in * case the caller sends messages */ ORTE_RELEASE_THREAD(&triplet->ctl); ORTE_RELEASE_THREAD(&local_thread); cbfunc(app, version, release, grp->input); /* reacquire the threads */ ORTE_ACQUIRE_THREAD(&local_thread); ORTE_ACQUIRE_THREAD(&triplet->ctl); /* flag that this group has executed its callback */ grp->pnp_cbfunc = NULL; break; } } } /* if we didn't find the group, then we have to add it */ if (!done) { grp = OBJ_NEW(orcm_triplet_group_t); grp->jobid = jobid; grp->pnp_cbfunc = cbfunc; opal_pointer_array_add(&triplet->groups, grp); } ORTE_RELEASE_THREAD(&triplet->ctl); ORTE_RELEASE_THREAD(&local_thread); return ORCM_SUCCESS; }
static int cancel_receive(const char *app, const char *version, const char *release, orcm_pnp_channel_t channel, orcm_pnp_tag_t tag) { orcm_pnp_channel_obj_t *chan; orcm_pnp_request_t *req; orcm_triplet_t *triplet; orcm_triplet_group_t *grp; opal_list_item_t *item, *next; char *string_id; int ret=ORCM_SUCCESS; int i; OPAL_OUTPUT_VERBOSE((2, orcm_pnp_base.output, "%s pnp:default:cancel_recv app %s version %s release %s channel %s tag %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == app) ? "NULL" : app, (NULL == version) ? "NULL" : version, (NULL == release) ? "NULL" : release, orcm_pnp_print_channel(channel), orcm_pnp_print_tag(tag))); /* since we are modifying global lists, lock * the thread */ ORTE_ACQUIRE_THREAD(&local_thread); /* if this is the wildcard channel, loop across all channels */ if (ORCM_PNP_WILDCARD_CHANNEL == channel) { /* get the string id for this triplet */ ORCM_CREATE_STRING_ID(&string_id, app, version, release); for (i=0; i < orcm_pnp_base.channels.size; i++) { if (NULL == (chan = (orcm_pnp_channel_obj_t*)opal_pointer_array_get_item(&orcm_pnp_base.channels, i))) { continue; } item = opal_list_get_first(&chan->recvs); while (item != opal_list_get_end(&chan->recvs)) { next = opal_list_get_next(item); req = (orcm_pnp_request_t*)item; if (0 == strcasecmp(string_id, ORCM_WILDCARD_STRING_ID) || 0 == strcasecmp(string_id, req->string_id)) { if (ORCM_PNP_TAG_WILDCARD == tag || tag == req->tag) { opal_list_remove_item(&chan->recvs, item); OBJ_RELEASE(item); } } item = next; } } goto cleanup; } /* are we looking at the group input channel? */ if (ORCM_PNP_GROUP_INPUT_CHANNEL == channel) { triplet = orcm_get_triplet(app, version, release, false); if (NULL != triplet) { /* remove the triplet-stored recvs */ item = opal_list_get_first(&triplet->input_recvs); while (item != opal_list_get_end(&triplet->input_recvs)) { next = opal_list_get_next(item); req = (orcm_pnp_request_t*)item; if (ORCM_PNP_TAG_WILDCARD == tag || tag == req->tag) { opal_list_remove_item(&triplet->input_recvs, item); OBJ_RELEASE(item); } item = next; } for (i=0; i < triplet->groups.size; i++) { if (NULL == (grp = (orcm_triplet_group_t*)opal_pointer_array_get_item(&triplet->groups, i))) { continue; } if (ORCM_PNP_INVALID_CHANNEL != grp->input) { /* just look thru the default group input channel */ chan = (orcm_pnp_channel_obj_t*)opal_pointer_array_get_item(&orcm_pnp_base.channels, grp->input); if (NULL == chan) { /* nothing to do */ ORTE_RELEASE_THREAD(&triplet->ctl); goto cleanup; } item = opal_list_get_first(&chan->recvs); while (item != opal_list_get_end(&chan->recvs)) { next = opal_list_get_next(item); req = (orcm_pnp_request_t*)item; if (ORCM_PNP_TAG_WILDCARD == tag || tag == req->tag) { opal_list_remove_item(&chan->recvs, item); OBJ_RELEASE(item); } item = next; } } } /* release the triplet */ ORTE_RELEASE_THREAD(&triplet->ctl); } goto cleanup; } /* are we looking at the group output channel? */ if (ORCM_PNP_GROUP_OUTPUT_CHANNEL == channel) { triplet = orcm_get_triplet(app, version, release, false); if (NULL != triplet) { /* remove the triplet-stored recvs */ item = opal_list_get_first(&triplet->output_recvs); while (item != opal_list_get_end(&triplet->output_recvs)) { next = opal_list_get_next(item); req = (orcm_pnp_request_t*)item; if (ORCM_PNP_TAG_WILDCARD == tag || tag == req->tag) { opal_list_remove_item(&triplet->output_recvs, item); OBJ_RELEASE(item); } item = next; } for (i=0; i < triplet->groups.size; i++) { if (NULL == (grp = (orcm_triplet_group_t*)opal_pointer_array_get_item(&triplet->groups, i))) { continue; } if (ORCM_PNP_INVALID_CHANNEL != grp->output) { /* just look thru the default group output channel */ chan = (orcm_pnp_channel_obj_t*)opal_pointer_array_get_item(&orcm_pnp_base.channels, grp->output); if (NULL == chan) { /* nothing to do */ ORTE_RELEASE_THREAD(&triplet->ctl); goto cleanup; } item = opal_list_get_first(&chan->recvs); while (item != opal_list_get_end(&chan->recvs)) { next = opal_list_get_next(item); req = (orcm_pnp_request_t*)item; if (ORCM_PNP_TAG_WILDCARD == tag || tag == req->tag) { opal_list_remove_item(&chan->recvs, item); OBJ_RELEASE(item); } item = next; } } } /* release the triplet */ ORTE_RELEASE_THREAD(&triplet->ctl); } goto cleanup; } /* if this isn't either input or output channel, then get the channel object */ if (NULL != (chan = (orcm_pnp_channel_obj_t*)opal_pointer_array_get_item(&orcm_pnp_base.channels, channel))) { ORCM_CREATE_STRING_ID(&string_id, app, version, release); item = opal_list_get_first(&chan->recvs); while (item != opal_list_get_end(&chan->recvs)) { next = opal_list_get_next(item); req = (orcm_pnp_request_t*)item; if (0 == strcasecmp(string_id, req->string_id)) { if (ORCM_PNP_TAG_WILDCARD == tag || tag == req->tag) { opal_list_remove_item(&chan->recvs, item); OBJ_RELEASE(item); } } item = next; } free(string_id); } cleanup: /* clear the thread */ ORTE_RELEASE_THREAD(&local_thread); return ret; }
static int register_receive(const char *app, const char *version, const char *release, orcm_pnp_channel_t channel, orcm_pnp_tag_t tag, orcm_pnp_callback_fn_t cbfunc, void *cbdata) { orcm_triplet_t *triplet, *trp; int i; int ret=ORCM_SUCCESS; orcm_pnp_channel_obj_t *recvr; orcm_pnp_request_t *req; OPAL_OUTPUT_VERBOSE((2, orcm_pnp_base.output, "%s pnp:default:register_recv app %s version %s release %s channel %s tag %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == app) ? "NULL" : app, (NULL == version) ? "NULL" : version, (NULL == release) ? "NULL" : release, orcm_pnp_print_channel(channel), orcm_pnp_print_tag(tag))); /* bozo check - can't receive on an invalid channel */ if (ORCM_PNP_INVALID_CHANNEL == channel) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } /* since we are modifying global lists, lock * the thread */ ORTE_ACQUIRE_THREAD(&local_thread); /* get a triplet object for this triplet - creates * it if one doesn't already exist */ triplet = orcm_get_triplet(app, version, release, true); /* if the triplet involves wildcards, we treat it separately. Such * recvs are maintained on a separate list so they can be properly * applied to any subsequent triplets covered by the wildcard */ if (NULL != strchr(triplet->string_id, '@')) { /* if we were given an INPUT or OUTPUT channel, then we * have to record the recv so we can apply it to triplets * as they become known since we don't know the channel */ if (ORCM_PNP_GROUP_INPUT_CHANNEL == channel || ORCM_PNP_GROUP_OUTPUT_CHANNEL == channel) { /* store this recv on this wildcard triplet so we retain a record of it, * ensuring no duplicates */ if (ORCM_PNP_GROUP_INPUT_CHANNEL == channel) { if (NULL == orcm_pnp_base_find_request(&triplet->input_recvs, triplet->string_id, tag)) { /* create it */ req = OBJ_NEW(orcm_pnp_request_t); req->string_id = strdup(triplet->string_id); req->tag = tag; req->cbfunc = cbfunc; req->cbdata = cbdata; opal_list_append(&triplet->input_recvs, &req->super); } } else { if (NULL == orcm_pnp_base_find_request(&triplet->output_recvs, triplet->string_id, tag)) { /* create it */ req = OBJ_NEW(orcm_pnp_request_t); req->string_id = strdup(triplet->string_id); req->tag = tag; req->cbfunc = cbfunc; req->cbdata = cbdata; opal_list_append(&triplet->output_recvs, &req->super); } } /* lock the global triplet arrays for our use */ ORTE_ACQUIRE_THREAD(&orcm_triplets->ctl); /* check all known triplets to find those that match */ for (i=0; i < orcm_triplets->array.size; i++) { if (NULL == (trp = (orcm_triplet_t*)opal_pointer_array_get_item(&orcm_triplets->array, i))) { continue; } if (trp == triplet) { /* don't copy from ourselves */ continue; } /* lock the triplet thread */ ORTE_ACQUIRE_THREAD(&trp->ctl); if (orcm_triplet_cmp(trp->string_id, triplet->string_id)) { /* triplet matches - transfer the recv */ if (ORCM_SUCCESS != (ret = orcm_pnp_base_record_recv(trp, channel, tag, cbfunc, cbdata))) { ORTE_ERROR_LOG(ret); } } /* release this triplet */ ORTE_RELEASE_THREAD(&trp->ctl); } /* release the global arrays */ ORTE_RELEASE_THREAD(&orcm_triplets->ctl); } else { /* if we were given a specific channel, then we can add this * recv to it */ if (NULL == (recvr = (orcm_pnp_channel_obj_t*)opal_pointer_array_get_item(&orcm_pnp_base.channels, channel))) { recvr = OBJ_NEW(orcm_pnp_channel_obj_t); recvr->channel = channel; opal_pointer_array_set_item(&orcm_pnp_base.channels, recvr->channel, recvr); } if (NULL == (req = orcm_pnp_base_find_request(&recvr->recvs, triplet->string_id, tag))) { /* not already present - create it */ req = OBJ_NEW(orcm_pnp_request_t); req->string_id = strdup(req->string_id); req->tag = req->tag; req->cbfunc = cbfunc; req->cbdata = cbdata; opal_list_append(&recvr->recvs, &req->super); } if (channel < ORCM_PNP_SYS_CHANNEL) { /* can't register rmcast recvs on group_input, group_output, and wildcard channels */ goto cleanup; } /* open this channel - will just return if already open */ if (ORCM_SUCCESS != (ret = orte_rmcast.open_channel(channel, triplet->string_id, NULL, -1, NULL, ORTE_RMCAST_RECV))) { if (ORTE_EXISTS != ret) { ORTE_ERROR_LOG(ret); goto cleanup; } } /* setup to listen to it - will just return if we already are */ if (ORTE_SUCCESS != (ret = orte_rmcast.recv_buffer_nb(channel, ORTE_RMCAST_TAG_WILDCARD, ORTE_RMCAST_PERSISTENT, orcm_pnp_base_recv_input_buffers, NULL))) { if (ORTE_EXISTS == ret) { ret = ORTE_SUCCESS; goto cleanup; } ORTE_ERROR_LOG(ret); } } } else { /* we are dealing with a non-wildcard triplet - record the request */ if (ORCM_SUCCESS != (ret = orcm_pnp_base_record_recv(triplet, channel, tag, cbfunc, cbdata))) { ORTE_ERROR_LOG(ret); } } cleanup: /* clear the threads */ ORTE_RELEASE_THREAD(&triplet->ctl); ORTE_RELEASE_THREAD(&local_thread); return ret; }
static void activate(void) { int rc; DIR *dirp; /* take control */ ORTE_ACQUIRE_THREAD(&orcm_cfgi_base.ctl); if (enabled) { /* we get reentered when daemons reappear so that * any pending jobs can be started */ check_installed(true); /* release control */ ORTE_RELEASE_THREAD(&orcm_cfgi_base.ctl); return; } enabled = true; /* check for existence of the directory. If it doesn't yet * exist, then we have to use the timer until it shows up */ if (NULL == (dirp = opendir(mca_orcm_cfgi_file_component.dir))) { if (0 < opal_output_get_verbosity(orcm_cfgi_base.output)) { orte_show_help("help-cfgi-file.txt", "no-dir", true, mca_orcm_cfgi_file_component.dir); } timer_in_use = true; goto fallback; } #ifdef HAVE_SYS_INOTIFY_H /* setup to watch the config dir - CREATE always is followed by * a MODIFY event, so don't need both */ if (0 > (watch = inotify_add_watch(notifier, mca_orcm_cfgi_file_component.dir, IN_DELETE | IN_MODIFY | IN_MOVE))) { /* error */ close(notifier); goto fallback; } /* start the watcher event */ probe_ev = (opal_event_t*)malloc(sizeof(opal_event_t)); opal_event_set(opal_event_base, probe_ev, notifier, OPAL_EV_READ|OPAL_EV_PERSIST, inotify_handler, NULL); timer_in_use = false; ORTE_RELEASE_THREAD(&orcm_cfgi_base.ctl); /* process it the first time */ check_config(0, 0, NULL); return; #endif fallback: /* setup the probe timer */ if (0 < mca_orcm_cfgi_file_component.rate) { probe_time.tv_sec = mca_orcm_cfgi_file_component.rate; probe_time.tv_usec = 0; probe_ev = (opal_event_t*)malloc(sizeof(opal_event_t)); opal_event_evtimer_set(opal_event_base, probe_ev, check_config, NULL); timer_in_use = true; /* process it the first time */ ORTE_RELEASE_THREAD(&orcm_cfgi_base.ctl); check_config(0, 0, NULL); return; } opal_output(0, "%s CANNOT ACTIVATE INSTALL CONFIG MONITORING", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); enabled = false; ORTE_RELEASE_THREAD(&orcm_cfgi_base.ctl); }
static int default_output_nb(orcm_pnp_channel_t channel, orte_process_name_t *recipient, orcm_pnp_tag_t tag, struct iovec *msg, int count, opal_buffer_t *buffer, orcm_pnp_callback_fn_t cbfunc, void *cbdata) { int i, ret; orcm_pnp_send_t *send; opal_buffer_t *buf; orcm_pnp_channel_t chan; /* if we have not announced, ignore this message */ if (NULL == orcm_pnp_base.my_string_id) { return ORCM_ERR_NOT_AVAILABLE; } if (!orcm_pnp_base.comm_enabled) { return ORCM_ERR_COMM_DISABLED; } /* protect against threading */ ORTE_ACQUIRE_THREAD(&local_thread); send = OBJ_NEW(orcm_pnp_send_t); send->tag = tag; send->msg = msg; send->count = count; send->buffer = buffer; send->cbfunc = cbfunc; send->cbdata = cbdata; /* setup the message for xmission */ if (ORTE_SUCCESS != (ret = orcm_pnp_base_construct_msg(&buf, buffer, tag, msg, count))) { ORTE_ERROR_LOG(ret); ORTE_RELEASE_THREAD(&local_thread); return ret; } /* if this is intended for everyone who might be listening to my output, * multicast it */ if (NULL == recipient || (ORTE_JOBID_WILDCARD == recipient->jobid && ORTE_VPID_WILDCARD == recipient->vpid)) { /* if this is going on the group channel, then substitute that channel here */ if (ORCM_PNP_GROUP_OUTPUT_CHANNEL == channel) { chan = orcm_pnp_base.my_output_channel->channel; } else if (ORCM_PNP_GROUP_INPUT_CHANNEL == channel) { chan = orcm_pnp_base.my_input_channel->channel; } else { chan = channel; } OPAL_OUTPUT_VERBOSE((2, orcm_pnp_base.output, "%s pnp:default:sending_nb multicast of %d %s to channel %s tag %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == msg) ? (int)buffer->bytes_used : count, (NULL == msg) ? "bytes" : "iovecs", orcm_pnp_print_channel(channel), orcm_pnp_print_tag(tag))); /* release thread prior to send */ ORTE_RELEASE_THREAD(&local_thread); /* send the data to the channel */ if (ORCM_SUCCESS != (ret = orte_rmcast.send_buffer_nb(chan, tag, buf, rmcast_callback, send))) { ORTE_ERROR_LOG(ret); } return ret; } /* if only one name field is WILDCARD, I don't know how to send * it - at least, not right now */ if (ORTE_JOBID_WILDCARD == recipient->jobid || ORTE_VPID_WILDCARD == recipient->vpid) { ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); OBJ_RELEASE(send); ORTE_RELEASE_THREAD(&local_thread); return ORTE_ERR_NOT_IMPLEMENTED; } /* intended for a specific recipient, send it over p2p */ OPAL_OUTPUT_VERBOSE((2, orcm_pnp_base.output, "%s pnp:default:sending_nb p2p message of %d %s to %s tag %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == msg) ? (int)buffer->bytes_used : count, (NULL == msg) ? "bytes" : "iovecs", ORTE_NAME_PRINT(recipient), orcm_pnp_print_tag(tag))); /* release thread prior to send */ ORTE_RELEASE_THREAD(&local_thread); /* send the msg */ if (0 > (ret = orte_rml.send_buffer_nb(recipient, buf, ORTE_RML_TAG_MULTICAST_DIRECT, 0, rml_callback, send))) { ORTE_ERROR_LOG(ret); } else { ret = ORCM_SUCCESS; } return ret; }
static void resend_data(int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { int n, rc; orte_rmcast_channel_t channel; orte_rmcast_seq_t start; rmcast_base_channel_t *ch; rmcast_send_log_t *log; opal_buffer_t *recover; /* block any further ops until we complete the missing * message repair */ ORTE_ACQUIRE_THREAD(&ctl); n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &channel, &n, ORTE_RMCAST_CHANNEL_T))) { ORTE_ERROR_LOG(rc); goto release; } /* if the channel is UINT32_MAX, then we know that this is a * a response from a sender telling us that our request for * missing messages is too far behind, so we should just * abort */ if (UINT32_MAX == channel) { opal_output(0, "%s CANNOT RECOVER FROM LOST MESSAGE - TOO FAR BEHIND - ABORTING", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); orte_errmgr.abort(1, NULL); goto release; } n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &start, &n, ORTE_RMCAST_SEQ_T))) { ORTE_ERROR_LOG(rc); goto release; } opal_output(0, "%s request resend data from %s for channel %d start %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender), channel, start); /* get the referenced channel object */ if (NULL == (ch = orte_rmcast_base_get_channel(channel))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); goto release; } /* see if we can bring the proc up to date - if it is too * far behind, then there is no hope of recovery */ log = (rmcast_send_log_t*)opal_ring_buffer_poke(&ch->cache, 0); if (NULL == log || start < log->seq_num) { /* no hope - tell them */ channel = UINT32_MAX; recover = OBJ_NEW(opal_buffer_t); if (ORTE_SUCCESS != (rc = opal_dss.pack(recover, &channel, 1, ORTE_RMCAST_CHANNEL_T))) { ORTE_ERROR_LOG(rc); goto release; } if (0 > (rc = orte_rml.send_buffer_nb(sender, recover, ORTE_RML_TAG_MULTICAST, 0, cbfunc, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(recover); } goto release; } /* search its ring buffer for the starting message - function * automatically starts at the oldest message and works up * from there */ for (n=0; n < ch->cache.size; n++) { log = (rmcast_send_log_t*)opal_ring_buffer_poke(&ch->cache, n); if (NULL == log || log->seq_num <= start) { continue; } OPAL_OUTPUT_VERBOSE((0, orte_rmcast_base.rmcast_output, "%s resending msg %d to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), log->seq_num, ORTE_NAME_PRINT(sender))); recover = OBJ_NEW(opal_buffer_t); opal_dss.copy_payload(recover, log->buf); if (0 > (rc = orte_rml.send_buffer_nb(sender, recover, ORTE_RML_TAG_MULTICAST, 0, cbfunc, NULL))) { OBJ_RELEASE(recover); ORTE_ERROR_LOG(rc); goto release; } } release: ORTE_RELEASE_THREAD(&ctl); }
void orte_rmcast_base_process_msg(orte_rmcast_msg_t *msg) { orte_rmcast_channel_t channel; rmcast_base_recv_t *ptr, *recv=NULL; orte_process_name_t name; orte_rmcast_tag_t tag; int8_t flag; struct iovec *iovec_array=NULL; int32_t iovec_count=0, i, n, isz; int rc=ORTE_SUCCESS; orte_rmcast_seq_t recvd_seq_num; opal_list_item_t *item; rmcast_seq_tracker_t *trkr, *tptr; rmcast_recv_log_t *log, *logptr; bool restart; opal_buffer_t alert; /* extract the header */ if (ORTE_SUCCESS != (rc = extract_hdr(msg->buf, &name, &channel, &tag, &restart, &recvd_seq_num))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* if this message is from myself, ignore it */ if (name.jobid == ORTE_PROC_MY_NAME->jobid && name.vpid == ORTE_PROC_MY_NAME->vpid) { OPAL_OUTPUT_VERBOSE((10, orte_rmcast_base.rmcast_output, "%s rmcast:base:process_recv sent from myself: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&name))); goto cleanup; } /* if this is a heartbeat and I am not a daemon, then ignore it * to avoid swamping tools */ if (!ORTE_PROC_IS_DAEMON && ORTE_RMCAST_TAG_HEARTBEAT == tag) { OPAL_OUTPUT_VERBOSE((10, orte_rmcast_base.rmcast_output, "%s rmcast:base:process_recv ignoring heartbeat", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto cleanup; } /* if this message is from a different job family, ignore it unless * it is on the system channel. We ignore these messages to avoid * confusion between different jobs since we all may be sharing * multicast channels. The system channel is left open to support * cross-job communications for detecting multiple conflicting DVMs. */ if (ORTE_JOB_FAMILY(name.jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid) && (ORTE_RMCAST_SYS_CHANNEL != channel)) { /* if we are not the HNP or a daemon, then we ignore this */ if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { OPAL_OUTPUT_VERBOSE((10, orte_rmcast_base.rmcast_output, "%s rmcast:base:process_recv from a different job family: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&name))); } else { goto cleanup; } } if (orte_rmcast_base.unreliable_xport) { /* if the message is not on a system-specified channel, then check to see if we * are missing any messages and need a resend */ if (ORTE_RMCAST_DYNAMIC_CHANNELS <= channel) { log = NULL; for (item = opal_list_get_first(&orte_rmcast_base.msg_logs); item != opal_list_get_end(&orte_rmcast_base.msg_logs); item = opal_list_get_next(item)) { logptr = (rmcast_recv_log_t*)item; /* look for this source */ if (name.jobid == logptr->name.jobid && name.vpid == logptr->name.vpid) { log = logptr; break; } } if (NULL == log) { /* new source */ log = OBJ_NEW(rmcast_recv_log_t); log->name.jobid = name.jobid; log->name.vpid = name.vpid; opal_list_append(&orte_rmcast_base.msg_logs, &log->super); } /* look for the channel */ trkr = NULL; for (item = opal_list_get_first(&log->last_msg); item != opal_list_get_end(&log->last_msg); item = opal_list_get_next(item)) { tptr = (rmcast_seq_tracker_t*)item; if (channel == tptr->channel) { trkr = tptr; break; } } if (NULL == trkr) { /* new channel */ trkr = OBJ_NEW(rmcast_seq_tracker_t); trkr->channel = channel; opal_list_append(&log->last_msg, &trkr->super); OPAL_OUTPUT_VERBOSE((10, orte_rmcast_base.rmcast_output, "%s NEW CHANNEL: %d SENDER: %s SEQ %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), channel, ORTE_NAME_PRINT(&log->name), recvd_seq_num)); } else if (ORTE_RMCAST_SEQ_INVALID != trkr->seq_num && !restart) { /* if this is a repeat msg, ignore it */ if (recvd_seq_num <= trkr->seq_num) { OPAL_OUTPUT_VERBOSE((1, orte_rmcast_base.rmcast_output, "%s Repeat msg %d on channel %d from source %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recvd_seq_num, channel, ORTE_NAME_PRINT(&name))); } if (1 != (recvd_seq_num - trkr->seq_num) || (ORTE_RMCAST_SEQ_MAX == trkr->seq_num && 0 != recvd_seq_num)) { /* missing a message - request it */ OPAL_OUTPUT_VERBOSE((1, orte_rmcast_base.rmcast_output, "%s Missing msg %d (%d) on channel %d from source %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recvd_seq_num, trkr->seq_num, channel, ORTE_NAME_PRINT(&name))); OBJ_CONSTRUCT(&alert, opal_buffer_t); if (ORTE_SUCCESS != (rc = opal_dss.pack(&alert, &channel, 1, ORTE_RMCAST_CHANNEL_T))) { ORTE_ERROR_LOG(rc); exit(1); } if (ORTE_SUCCESS != (rc = opal_dss.pack(&alert, &trkr->seq_num, 1, ORTE_RMCAST_SEQ_T))) { ORTE_ERROR_LOG(rc); exit(1); } if (0 > (rc = orte_rml.send_buffer(&name, &alert, ORTE_RML_TAG_MISSED_MSG, 0))) { ORTE_ERROR_LOG(rc); exit(1); } OBJ_DESTRUCT(&alert); goto cleanup; } OPAL_OUTPUT_VERBOSE((10, orte_rmcast_base.rmcast_output, "%s CHANNEL: %d SENDER: %s SEQ: %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), channel, ORTE_NAME_PRINT(&log->name), recvd_seq_num)); } trkr->seq_num = recvd_seq_num; } } /* unpack the iovec vs buf flag */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(msg->buf, &flag, &n, OPAL_INT8))) { ORTE_ERROR_LOG(rc); goto cleanup; } OPAL_OUTPUT_VERBOSE((5, orte_rmcast_base.rmcast_output, "%s rmcast:base:process_recv sender: %s channel: %d tag: %d %s seq_num: %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&name), channel, (int)tag, (0 == flag) ? "iovecs" : "buffer", recvd_seq_num)); /* find the recv for this channel, tag, and type */ ORTE_ACQUIRE_THREAD(&orte_rmcast_base.main_ctl); for (item = opal_list_get_first(&orte_rmcast_base.recvs); item != opal_list_get_end(&orte_rmcast_base.recvs); item = opal_list_get_next(item)) { ptr = (rmcast_base_recv_t*)item; OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s rmcast:base:process_recv checking channel %d tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)ptr->channel, (int)ptr->tag)); if (channel != ptr->channel) { continue; } if (tag != ptr->tag && ORTE_RMCAST_TAG_WILDCARD != ptr->tag) { continue; } ptr->seq_num = recvd_seq_num; recv = ptr; break; } if (NULL == recv) { /* recv not found - dump msg */ ORTE_RELEASE_THREAD(&orte_rmcast_base.main_ctl); goto cleanup; } if (!(ORTE_RMCAST_PERSISTENT & recv->flags)) { OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s rmcast:base:process_recv removing non-persistent recv", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); opal_list_remove_item(&orte_rmcast_base.recvs, &recv->item); } ORTE_RELEASE_THREAD(&orte_rmcast_base.main_ctl); OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s rmcast:base:process_recv delivering message to channel %d tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv->channel, (int)tag)); /* we have a matching recv - unpack the data */ if (0 == flag) { /* get the number of iovecs in the buffer */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(msg->buf, &iovec_count, &n, OPAL_INT32))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* malloc the required space */ iovec_array = (struct iovec *)malloc(iovec_count * sizeof(struct iovec)); /* unpack the iovecs */ for (i=0; i < iovec_count; i++) { /* unpack the number of bytes in this iovec */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(msg->buf, &isz, &n, OPAL_INT32))) { ORTE_ERROR_LOG(rc); goto cleanup; } iovec_array[i].iov_base = NULL; iovec_array[i].iov_len = isz; if (0 < isz) { /* allocate the space */ iovec_array[i].iov_base = (IOVBASE_TYPE*)malloc(isz); /* unpack the data */ if (ORTE_SUCCESS != (rc = opal_dss.unpack(msg->buf, iovec_array[i].iov_base, &isz, OPAL_UINT8))) { ORTE_ERROR_LOG(rc); goto cleanup; } } } if (NULL != recv->cbfunc_iovec) { OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s rmcast:base:process_recv delivering iovecs to channel %d tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv->channel, (int)tag)); recv->cbfunc_iovec(ORTE_SUCCESS, recv->channel, recv->seq_num, tag, &name, iovec_array, iovec_count, recv->cbdata); } else { /* if something is already present, then we have a problem */ if (NULL != recv->iovec_array) { OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s rmcast:base:process_recv blocking recv already fulfilled", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto cleanup; } /* copy over the iovec array since it will be released by * the blocking recv */ recv->iovec_array = (struct iovec *)malloc(iovec_count * sizeof(struct iovec)); recv->iovec_count = iovec_count; for (i=0; i < iovec_count; i++) { recv->iovec_array[i].iov_base = (IOVBASE_TYPE*)malloc(iovec_array[i].iov_len); recv->iovec_array[i].iov_len = iovec_array[i].iov_len; memcpy(recv->iovec_array[i].iov_base, iovec_array[i].iov_base, iovec_array[i].iov_len); } /* release blocking recv */ ORTE_WAKEUP_THREAD(&recv->ctl); } } else { if (NULL != recv->cbfunc_buffer) { OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s rmcast:base:process_recv delivering buffer to channel %d tag %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv->channel, (int)tag)); recv->cbfunc_buffer(ORTE_SUCCESS, recv->channel, recv->seq_num, tag, &name, msg->buf, recv->cbdata); } else { /* if something is already present, then we have a problem */ if (NULL != recv->buf) { OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s rmcast:base:process_recv blocking recv already fulfilled", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto cleanup; } OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s rmcast:base:process_recv copying buffer for blocking recv", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* copy the buffer across since it will be released * by the blocking recv */ recv->buf = OBJ_NEW(opal_buffer_t); if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(recv->buf, msg->buf))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* release blocking recv */ ORTE_WAKEUP_THREAD(&recv->ctl); } } cleanup: if (NULL != iovec_array) { for (i=0; i < iovec_count; i++) { free(iovec_array[i].iov_base); } free(iovec_array); iovec_array = NULL; iovec_count = 0; } if (NULL != msg) { OBJ_RELEASE(msg); } if (NULL != recv && !(ORTE_RMCAST_PERSISTENT & recv->flags)) { OBJ_RELEASE(recv); } return; }
static int announce(const char *app, const char *version, const char *release, orcm_pnp_announce_fn_t cbfunc) { int ret; opal_buffer_t buf; orcm_pnp_channel_t chan; /* bozo check */ if (NULL == app || NULL == version || NULL == release) { ORTE_ERROR_LOG(ORCM_ERR_BAD_PARAM); return ORCM_ERR_BAD_PARAM; } if (!orcm_pnp_base.comm_enabled) { return ORCM_ERR_COMM_DISABLED; } /* protect against threading */ ORTE_ACQUIRE_THREAD(&local_thread); if (NULL != orcm_pnp_base.my_string_id) { /* must have been called before */ OPAL_OUTPUT_VERBOSE((2, orcm_pnp_base.output, "%s pnp:default:announce called before", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_RELEASE_THREAD(&local_thread); return ORCM_SUCCESS; } OPAL_OUTPUT_VERBOSE((2, orcm_pnp_base.output, "%s pnp:default:announce app %s version %s release %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app, version, release)); /* retain a local record of my info - this enables communication * by setting my_string_id != NULL */ ORCM_CREATE_STRING_ID(&orcm_pnp_base.my_string_id, app, version, release); /* retain the callback function */ orcm_pnp_base.my_announce_cbfunc = cbfunc; /* get a triplet object for myself - creates * it if one doesn't already exist */ orcm_pnp_base.my_triplet = orcm_get_triplet(app, version, release, true); /* get my group object */ orcm_pnp_base.my_group = orcm_get_triplet_group(orcm_pnp_base.my_triplet, ORTE_PROC_MY_NAME->jobid, true); orcm_pnp_base.my_group->uid = orcm_pnp_base.my_uid; orcm_pnp_base.my_group->input = orcm_pnp_base.my_input_channel->channel; orcm_pnp_base.my_group->output = orcm_pnp_base.my_output_channel->channel; /* check for pending recvs for these channels - this will copy * recvs that were pre-posted on the triplet to the channel * array */ orcm_pnp_base_check_pending_recvs(orcm_pnp_base.my_triplet, orcm_pnp_base.my_group); /* release the triplet as we no longer require it */ ORTE_RELEASE_THREAD(&orcm_pnp_base.my_triplet->ctl); /* no need to hold the lock any further */ ORTE_RELEASE_THREAD(&local_thread); /* assemble the announcement message */ OBJ_CONSTRUCT(&buf, opal_buffer_t); /* pack the common elements */ if (ORCM_SUCCESS != (ret = orcm_pnp_base_pack_announcement(&buf, ORTE_NAME_INVALID))) { ORTE_ERROR_LOG(ret); OBJ_DESTRUCT(&buf); return ret; } /* select the channel */ if (ORCM_PROC_IS_APP) { chan = ORTE_RMCAST_APP_PUBLIC_CHANNEL; } else { chan = ORTE_RMCAST_SYS_CHANNEL; } /* send it */ if (ORCM_SUCCESS != (ret = default_output(chan, NULL, ORCM_PNP_TAG_ANNOUNCE, NULL, 0, &buf))) { ORTE_ERROR_LOG(ret); } /* cleanup */ OBJ_DESTRUCT(&buf); return ret; }
static void check_config(int fd, short args, void *cbdata) { DIR *dirp = NULL; struct dirent * dir_entry; struct stat buf; int i, rc, n, j, k, m; char *fullpath; orcm_cfgi_app_t *app, *app2, *aptr; orcm_cfgi_run_t *run; orcm_cfgi_exec_t *exec, *exec2, *eptr; orcm_cfgi_version_t *vers, *vers2, *vptr; orcm_cfgi_bin_t *bin; orte_job_t *jdat, *jptr; orte_app_context_t *ax; opal_pointer_array_t found_apps; bool found, dir_found; orcm_cfgi_caddy_t *caddy; /* take control */ ORTE_ACQUIRE_THREAD(&orcm_cfgi_base.ctl); OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output, "%s CHECKING CONFIG DIRECTORY %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), mca_orcm_cfgi_file_component.dir)); /* Open the directory so we can get a listing */ if (NULL == (dirp = opendir(mca_orcm_cfgi_file_component.dir))) { if (0 < opal_output_get_verbosity(orcm_cfgi_base.output)) { orte_show_help("help-cfgi-file.txt", "no-dir", true, mca_orcm_cfgi_file_component.dir); } dir_found = false; goto restart; } dir_found = true; /* setup the array of apps */ OBJ_CONSTRUCT(&found_apps, opal_pointer_array_t); opal_pointer_array_init(&found_apps, 16, INT_MAX, 16); /* cycle thru the directory */ while (NULL != (dir_entry = readdir(dirp))) { /* Skip the obvious */ if (0 == strncmp(dir_entry->d_name, ".", strlen(".")) || 0 == strncmp(dir_entry->d_name, "..", strlen(".."))) { continue; } /* Skip editor-related files */ if (NULL != strstr(dir_entry->d_name, ".swp") || NULL != strstr(dir_entry->d_name, ".swx") || NULL != strchr(dir_entry->d_name, '~')) { continue; } if ('#' == dir_entry->d_name[0]) { continue; } /* parse the file, adding all found apps to the array */ fullpath = opal_os_path(false, mca_orcm_cfgi_file_component.dir, dir_entry->d_name, NULL); if (ORCM_SUCCESS != (rc = parse_file(fullpath, &found_apps))) { OPAL_OUTPUT_VERBOSE((1, orcm_cfgi_base.output, "%s CANNOT PARSE FILE %s: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), dir_entry->d_name, ORTE_ERROR_NAME(rc))); } free(fullpath); } closedir(dirp); /* cycle thru the installed apps */ for (i=0; i < orcm_cfgi_base.installed_apps.size; i++) { if (NULL == (app = (orcm_cfgi_app_t*)opal_pointer_array_get_item(&orcm_cfgi_base.installed_apps, i))) { continue; } app->modified = false; /* is this app present in the found apps? */ app2 = NULL; for (j=0; j < found_apps.size; j++) { if (NULL == (aptr = (orcm_cfgi_app_t*)opal_pointer_array_get_item(&found_apps, j))) { continue; } if (0 == strcmp(app->application, aptr->application)) { app2 = aptr; /* remove it from the found_apps array as we will now process it */ opal_pointer_array_set_item(&found_apps, j, NULL); break; } } if (NULL == app2) { OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output, "%s APP %s IS NO LONGER INSTALLED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->application)); /* no longer present - remove this object from the installed array */ opal_pointer_array_set_item(&orcm_cfgi_base.installed_apps, app->idx, NULL); /* find all instances */ for (j=0; j < app->instances.size; j++) { if (NULL == (run = (orcm_cfgi_run_t*)opal_pointer_array_get_item(&app->instances, j))) { continue; } OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output, "%s APP %s IS NO LONGER INSTALLED - KILLING INSTANCE %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->application, run->instance)); /* remove it from the array */ opal_pointer_array_set_item(&app->instances, j, NULL); run->app = NULL; run->app_idx = -1; /* delink all the binaries */ for (k=0; k < run->binaries.size; k++) { if (NULL == (bin = (orcm_cfgi_bin_t*)opal_pointer_array_get_item(&run->binaries, k))) { continue; } bin->vers = NULL; bin->exec = NULL; } /* kill the associated executing job, if any */ caddy = OBJ_NEW(orcm_cfgi_caddy_t); caddy->cmd = ORCM_CFGI_KILL_JOB; /* retain the run object as it has -not- been removed from * the running config */ OBJ_RETAIN(run); caddy->run = run; /* send it off to be processed */ opal_fd_write(orcm_cfgi_base.launch_pipe[1], sizeof(orcm_cfgi_caddy_t*), &caddy); } /* release it */ OBJ_RELEASE(app); continue; } /* app was present - did we modify it */ if (app->max_instances != app2->max_instances) { app->max_instances = app2->max_instances; app->modified = true; } /* did we remove any executables? */ for (j=0; j < app->executables.size; j++) { if (NULL == (exec = (orcm_cfgi_exec_t*)opal_pointer_array_get_item(&app->executables, j))) { continue; } /* is it present in the found apps */ exec2 = NULL; for (k=0; k < app2->executables.size; k++) { if (NULL == (eptr = (orcm_cfgi_exec_t*)opal_pointer_array_get_item(&app2->executables, k))) { continue; } if (0 == strcmp(exec->appname, eptr->appname)) { exec2 = eptr; break; } } if (NULL == exec2) { OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output, "%s APP %s EXECUTABLE %s IS NO LONGER INSTALLED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->application, exec->appname)); /* this executable has been removed */ opal_pointer_array_set_item(&app->executables, j, NULL); /* find all instances * that use this executable and kill associated binaries */ for (k=0; k < app->instances.size; k++) { if (NULL == (run = (orcm_cfgi_run_t*)opal_pointer_array_get_item(&app->instances, k))) { continue; } /* search the binaries to see if they include this executable */ for (n=0; n < run->binaries.size; n++) { if (NULL == (bin = (orcm_cfgi_bin_t*)opal_pointer_array_get_item(&run->binaries, n))) { continue; } if (0 == strcmp(bin->appname, exec->appname)) { OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output, "%s APP %s EXECUTABLE %s IS NO LONGER INSTALLED - KILLING BINARY %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->application, exec->appname, bin->binary)); exec->total_procs -= bin->num_procs; /* ensure we know it is no longer pointing to an installed exec/version */ bin->vers = NULL; bin->exec = NULL; /* kill the associated executing exec, if any */ caddy = OBJ_NEW(orcm_cfgi_caddy_t); caddy->cmd = ORCM_CFGI_KILL_EXE; /* retain the run object as it has -not- been removed from * the running config */ OBJ_RETAIN(run); caddy->run = run; /* send it off to be processed */ opal_fd_write(orcm_cfgi_base.launch_pipe[1], sizeof(orcm_cfgi_caddy_t*), &caddy); break; } } } OBJ_RELEASE(exec); continue; } /* kept the exec, but was it modified */ if (exec->process_limit != exec2->process_limit) { exec->process_limit = exec2->process_limit; app->modified = true; } /* did we remove any versions */ for (k=0; k < exec->versions.size; k++) { if (NULL == (vers = (orcm_cfgi_version_t*)opal_pointer_array_get_item(&exec->versions, k))) { continue; } /* is it present in the found app/exec */ vers2 = NULL; for (n=0; n < exec2->versions.size; n++) { if (NULL == (vptr = (orcm_cfgi_version_t*)opal_pointer_array_get_item(&exec2->versions, n))) { continue; } if (0 == strcmp(vptr->version, vers->version)) { vers2 = vptr; /* since we have this version, we can remove it from * the found app */ opal_pointer_array_set_item(&exec2->versions, n, NULL); break; } } if (NULL != vers2) { OBJ_RELEASE(vers2); continue; } OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output, "%s APP %s EXEC %s VERSION %s IS NO LONGER INSTALLED", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->application, exec->appname, vers->version)); /* nope - been removed, so take it out of the array */ opal_pointer_array_set_item(&exec->versions, k, NULL); /* find all instances and kill this version */ for (m=0; m < app->instances.size; m++) { if (NULL == (run = (orcm_cfgi_run_t*)opal_pointer_array_get_item(&app->instances, m))) { continue; } /* search the binaries to see if they include this version */ for (n=0; n < run->binaries.size; n++) { if (NULL == (bin = (orcm_cfgi_bin_t*)opal_pointer_array_get_item(&run->binaries, n))) { continue; } if (0 == strcmp(bin->appname, exec->appname) && 0 == strcmp(bin->version, vers->version)) { OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output, "%s APP %s EXECUTABLE %s VERSION %s IS NO LONGER INSTALLED - KILLING BINARY %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->application, exec->appname, vers->version, bin->binary)); exec->total_procs -= bin->num_procs; /* ensure we know it is no longer pointing to an installed version */ bin->vers = NULL; /* kill the associated executing exec, if any */ caddy = OBJ_NEW(orcm_cfgi_caddy_t); caddy->cmd = ORCM_CFGI_KILL_EXE; /* retain the run object as it has -not- been removed from * the running config */ OBJ_RETAIN(run); caddy->run = run; /* send it off to be processed */ opal_fd_write(orcm_cfgi_base.launch_pipe[1], sizeof(orcm_cfgi_caddy_t*), &caddy); break; } } } /* cleanup */ OBJ_RELEASE(vers); } } /* did we add any executables or versions */ for (k=0; k < app2->executables.size; k++) { if (NULL == (exec2 = (orcm_cfgi_exec_t*)opal_pointer_array_get_item(&app2->executables, k))) { continue; } exec = NULL; for (j=0; j < app->executables.size; j++) { if (NULL == (eptr = (orcm_cfgi_exec_t*)opal_pointer_array_get_item(&app->executables, j))) { continue; } if (0 == strcmp(eptr->appname, exec2->appname)) { exec = eptr; break; } } if (NULL == exec) { OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output, "%s APP %s ADDING EXECUTABLE %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->application, exec2->appname)); /* added this exec - just move it across */ opal_pointer_array_set_item(&app2->executables, k, NULL); exec2->idx = opal_pointer_array_add(&app->executables, exec2); app->modified = true; continue; } /* exec already present, and we dealt with mods above - so * see if any versions were added. */ for (j=0; j < exec2->versions.size; j++) { if (NULL == (vers = (orcm_cfgi_version_t*)opal_pointer_array_get_item(&exec2->versions, j))) { continue; } /* if already present, ignore */ vers2 = NULL; for (n=0; n < exec->versions.size; n++) { if (NULL == (vptr = (orcm_cfgi_version_t*)opal_pointer_array_get_item(&exec->versions, n))) { continue; } if (0 == strcmp(vptr->version, vers->version)) { vers2 = vptr; break; } } if (NULL == vers2) { OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output, "%s APP %s ADDING EXECUTABLE %s VERSION %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->application, exec2->appname, vers->version)); opal_pointer_array_set_item(&exec2->versions, j, NULL); vers->exec = exec; vers->idx = opal_pointer_array_add(&exec->versions, vers); app->modified = true; } else { OBJ_RELEASE(vers2); } } } /* done with this entry */ OBJ_RELEASE(app2); } /* any added applications get handled now - anything still in found_apps * would have been added */ for (j=0; j < found_apps.size; j++) { if (NULL == (aptr = (orcm_cfgi_app_t*)opal_pointer_array_get_item(&found_apps, j))) { continue; } OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output, "%s ADDING APP %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), aptr->application)); /* just shift the entry to the installed_apps array */ aptr->idx = opal_pointer_array_add(&orcm_cfgi_base.installed_apps, aptr); /* mark it as modified so it will be handled below */ aptr->modified = true; } OBJ_DESTRUCT(&found_apps); /* check installed vs configd for anything needing starting, * but only check modified apps */ check_installed(false); restart: #ifdef HAVE_SYS_INOTIFY_H if (dir_found) { if (timer_in_use) { /* redefine the event to use inotify now * that the dir has been found */ if (0 > (watch = inotify_add_watch(notifier, mca_orcm_cfgi_file_component.dir, IN_DELETE | IN_MODIFY | IN_MOVE))) { close(notifier); opal_event_evtimer_add(probe_ev, &probe_time); } else { opal_event_del(probe_ev); opal_event_set(opal_event_base, probe_ev, notifier, OPAL_EV_READ|OPAL_EV_PERSIST, inotify_handler, NULL); opal_event_add(probe_ev, 0); timer_in_use = false; } } else { /* reset the event */ opal_event_add(probe_ev, 0); } } else { /* restart the timer so we keep looking for it */ opal_event_evtimer_add(probe_ev, &probe_time); } #else /* restart the timer */ opal_event_evtimer_add(probe_ev, &probe_time); #endif /* release control */ ORTE_RELEASE_THREAD(&orcm_cfgi_base.ctl); }
static int update_state(orte_jobid_t job, orte_job_state_t jobstate, orte_process_name_t *proc, orte_proc_state_t state, pid_t pid, orte_exit_code_t exit_code) { int rc=ORTE_SUCCESS, i; orte_app_context_t *app; orte_node_t *node; orte_proc_t *pptr, *daemon, *pptr2; opal_buffer_t *notify; orcm_triplet_t *trp; orcm_source_t *src; bool procs_recovered; orte_job_t *jdt; uint16_t jfam; bool send_msg; OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output, "%s errmgr:update_state for job %s proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), (NULL == proc) ? "NULL" : ORTE_NAME_PRINT(proc))); /* protect against threads */ ORTE_ACQUIRE_THREAD(&ctl); /* * if orte is trying to shutdown, just let it */ if (orte_finalizing) { ORTE_RELEASE_THREAD(&ctl); return ORTE_SUCCESS; } /*** UPDATE COMMAND FOR A JOB ***/ if (NULL == proc) { /* should only get this if a daemon restarted and we need * to check for procs waiting to migrate */ if (ORTE_JOB_STATE_PROCS_MIGRATING != jobstate) { /* we should never get this situation */ opal_output(0, "%s UNKNOWN JOB ERROR ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_job_state_to_str(jobstate)); ORTE_RELEASE_THREAD(&ctl); return ORTE_ERROR; } /* cycle thru all known jobs looking for those with procs * awaiting resources to migrate */ for (i=0; i < orte_job_data->size; i++) { if (NULL == (jdt = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, i))) { continue; } if (ORTE_JOB_STATE_PROCS_MIGRATING != jdt->state) { continue; } /* reset the job */ orte_plm_base_reset_job(jdt); /* map the job again */ if (ORTE_SUCCESS != (rc = orte_rmaps.map_job(jdt))) { ORTE_ERROR_LOG(rc); continue; } /* launch any procs that could be mapped - note that not * all procs that were waiting for migration may have * been successfully mapped, so this could in fact * result in no action by the daemons */ notify = OBJ_NEW(opal_buffer_t); /* indicate the target DVM */ jfam = ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid); opal_dss.pack(notify, &jfam, 1, OPAL_UINT16); /* get the launch data */ if (ORTE_SUCCESS != (rc = orte_odls.get_add_procs_data(notify, jdt->jobid))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(notify); ORTE_RELEASE_THREAD(&ctl); return ORTE_SUCCESS; } /* send it to the daemons */ if (ORCM_SUCCESS != (rc = orcm_pnp.output_nb(ORCM_PNP_SYS_CHANNEL, NULL, ORCM_PNP_TAG_COMMAND, NULL, 0, notify, cbfunc, NULL))) { ORTE_ERROR_LOG(rc); } } ORTE_RELEASE_THREAD(&ctl); return ORTE_SUCCESS; } /**** DEAL WITH INDIVIDUAL PROCS ****/ OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base.output, "%s errmgr:sched got state %s for proc %s pid %d exit_code %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_proc_state_to_str(state), ORTE_NAME_PRINT(proc), pid, exit_code)); /* if this was a failed comm or heartbeat */ if (ORTE_PROC_STATE_COMM_FAILED == state) { /* ignore this */ ORTE_RELEASE_THREAD(&ctl); return ORTE_SUCCESS; } if (ORTE_PROC_STATE_HEARTBEAT_FAILED == state) { /* get the proc object for this daemon */ if (NULL == (daemon = (orte_proc_t*)opal_pointer_array_get_item(daemon_job->procs, proc->vpid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ORTE_RELEASE_THREAD(&ctl); return ORTE_ERR_NOT_FOUND; } /* ensure that the heartbeat system knows to ignore this proc * from this point forward */ daemon->beat = 0; /* if we have already heard about this proc, ignore repeats */ if (ORTE_PROC_STATE_HEARTBEAT_FAILED == daemon->state) { /* already heard */ ORTE_RELEASE_THREAD(&ctl); return ORTE_SUCCESS; } #if 0 /* delete the route */ orte_routed.delete_route(proc); /* purge the oob */ orte_rml.purge(proc); #endif /* get the triplet/source and mark this source as "dead" */ if (NULL == (trp = orcm_get_triplet_stringid("orcmd:0.1:alpha"))) { opal_output(0, "%s CANNOT FIND DAEMON TRIPLET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_RELEASE_THREAD(&ctl); return ORTE_ERR_NOT_FOUND; } if (NULL == (src = orcm_get_source(trp, proc, false))) { opal_output(0, "%s DAEMON %s IS UNKNOWN SOURCE", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc)); ORTE_RELEASE_THREAD(&trp->ctl); ORTE_RELEASE_THREAD(&ctl); return ORTE_ERR_NOT_FOUND; } src->alive = false; ORTE_RELEASE_THREAD(&src->ctl); ORTE_RELEASE_THREAD(&trp->ctl); /* notify all apps immediately */ if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, proc->vpid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ORTE_RELEASE_THREAD(&ctl); return ORTE_ERR_NOT_FOUND; } notify = OBJ_NEW(opal_buffer_t); send_msg = false; for (i=0; i < node->procs->size; i++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) { continue; } if (ORTE_SUCCESS != (rc = opal_dss.pack(notify, &pptr->name, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); ORTE_RELEASE_THREAD(&ctl); return rc; } /* reset the proc stats */ OBJ_DESTRUCT(&pptr->stats); OBJ_CONSTRUCT(&pptr->stats, opal_pstats_t); /* since we added something, need to send msg */ send_msg = true; } if (send_msg) { /* send it to all apps */ if (ORCM_SUCCESS != (rc = orcm_pnp.output_nb(ORCM_PNP_ERROR_CHANNEL, NULL, ORCM_PNP_TAG_ERRMGR, NULL, 0, notify, cbfunc, NULL))) { ORTE_ERROR_LOG(rc); } } else { OBJ_RELEASE(notify); } /* reset the node stats */ OBJ_DESTRUCT(&node->stats); OBJ_CONSTRUCT(&node->stats, opal_node_stats_t); /* record that the daemon died */ daemon->state = state; daemon->exit_code = exit_code; daemon->pid = 0; /* reset the daemon stats */ OBJ_DESTRUCT(&daemon->stats); OBJ_CONSTRUCT(&daemon->stats, opal_pstats_t); node = daemon->node; if (NULL == node) { opal_output(0, "%s Detected failure of daemon %s on unknown node", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc)); /* can't do anything further */ ORTE_RELEASE_THREAD(&ctl); return ORTE_SUCCESS; } else { opal_output(0, "%s Detected failure of daemon %s on node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc), (NULL == node->name) ? "UNKNOWN" : node->name); } /* see if any usable daemons are left alive */ procs_recovered = false; for (i=2; i < daemon_job->procs->size; i++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(daemon_job->procs, i))) { continue; } if (ORTE_PROC_STATE_UNTERMINATED < pptr->state) { continue; } /* at least one alive! recover procs from the failed one */ recover_procs(proc); procs_recovered = true; break; } if (!procs_recovered) { daemon->node = NULL; node->state = ORTE_NODE_STATE_DOWN; node->daemon = NULL; /* mark all procs on this node as having terminated */ for (i=0; i < node->procs->size; i++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) { continue; } /* get the job data object for this process */ if (NULL == (jdt = orte_get_job_data_object(pptr->name.jobid))) { /* major problem */ opal_output(0, "%s COULD NOT GET JOB OBJECT FOR PROC %s(%d): state %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pptr->name), i, orte_proc_state_to_str(pptr->state)); continue; } if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdt->apps, pptr->app_idx))) { continue; } OPAL_OUTPUT_VERBOSE((3, orte_errmgr_base.output, "%s REMOVING PROC %s FROM NODE %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pptr->name), node->name)); app->num_procs--; opal_pointer_array_set_item(jdt->procs, pptr->name.vpid, NULL); OBJ_RELEASE(pptr); /* clean it off the node */ opal_pointer_array_set_item(node->procs, i, NULL); node->num_procs--; /* maintain acctg */ OBJ_RELEASE(pptr); /* see if job is empty */ jdt->num_terminated++; if (jdt->num_procs <= jdt->num_terminated) { OPAL_OUTPUT_VERBOSE((3, orte_errmgr_base.output, "%s REMOVING JOB %s FROM ACTIVE ARRAY", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdt->jobid))); opal_pointer_array_set_item(orte_job_data, ORTE_LOCAL_JOBID(jdt->jobid), NULL); OBJ_RELEASE(jdt); } } } ORTE_RELEASE_THREAD(&ctl); return ORTE_SUCCESS; } if (ORTE_PROC_STATE_RESTARTED == state) { OPAL_OUTPUT_VERBOSE((3, orte_errmgr_base.output, "%s RESTART OF DAEMON %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); /* get the proc object for this daemon */ if (NULL == (daemon = (orte_proc_t*)opal_pointer_array_get_item(daemon_job->procs, proc->vpid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ORTE_RELEASE_THREAD(&ctl); return ORTE_ERR_NOT_FOUND; } /* if apps were on that node, notify all apps immediately that * those procs have failed */ if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, proc->vpid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ORTE_RELEASE_THREAD(&ctl); return ORTE_ERR_NOT_FOUND; } notify = OBJ_NEW(opal_buffer_t); send_msg = false; for (i=0; i < node->procs->size; i++) { if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) { continue; } if (ORTE_SUCCESS != (rc = opal_dss.pack(notify, &pptr->name, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); ORTE_RELEASE_THREAD(&ctl); return rc; } /* since we added something, we need to send msg */ send_msg = true; /* remove the proc from the app so that it will get * restarted when we re-activate the config */ if (NULL == (jdt = orte_get_job_data_object(pptr->name.jobid))) { continue; } if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdt->apps, pptr->app_idx))) { continue; } OPAL_OUTPUT_VERBOSE((3, orte_errmgr_base.output, "%s REMOVING PROC %s FROM NODE %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&pptr->name), node->name)); app->num_procs--; opal_pointer_array_set_item(jdt->procs, pptr->name.vpid, NULL); OBJ_RELEASE(pptr); /* clean it off the node */ opal_pointer_array_set_item(node->procs, i, NULL); node->num_procs--; /* maintain acctg */ OBJ_RELEASE(pptr); /* see if job is empty */ jdt->num_terminated++; if (jdt->num_procs <= jdt->num_terminated) { OPAL_OUTPUT_VERBOSE((3, orte_errmgr_base.output, "%s REMOVING JOB %s FROM ACTIVE ARRAY", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdt->jobid))); opal_pointer_array_set_item(orte_job_data, ORTE_LOCAL_JOBID(jdt->jobid), NULL); OBJ_RELEASE(jdt); } } if (send_msg) { /* send it to all apps */ if (ORCM_SUCCESS != (rc = orcm_pnp.output_nb(ORCM_PNP_ERROR_CHANNEL, NULL, ORCM_PNP_TAG_ERRMGR, NULL, 0, notify, cbfunc, NULL))) { ORTE_ERROR_LOG(rc); } } else { OBJ_RELEASE(notify); } /* reset the node stats */ OBJ_DESTRUCT(&node->stats); OBJ_CONSTRUCT(&node->stats, opal_node_stats_t); /* reset the daemon stats */ OBJ_DESTRUCT(&daemon->stats); OBJ_CONSTRUCT(&daemon->stats, opal_pstats_t); /* don't restart procs - we'll do that later after * we allow time for multiple daemons to restart */ ORTE_RELEASE_THREAD(&ctl); return ORTE_SUCCESS; } /* to arrive here is an error */ opal_output(0, "%s GOT UNRECOGNIZED STATE %s FOR PROC %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_proc_state_to_str(state), ORTE_NAME_PRINT(proc)); return ORTE_ERROR; }
static int open_channel(orte_rmcast_channel_t channel, char *name, char *network, int port, char *interface, uint8_t direction) { opal_list_item_t *item; rmcast_base_channel_t *nchan, *chan; uint32_t netaddr=0, netmask=0, intr=0; int rc; unsigned int i, n, start, end, range; bool port_assigned; OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s opening channel %d for %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), channel, name)); /* parse the network, if provided */ if (NULL != network) { if (ORTE_SUCCESS != (rc = opal_iftupletoaddr(network, &netaddr, &netmask))) { orte_show_help("help-rmcast-base.txt", "invalid-net-mask", true, network, ORTE_ERROR_NAME(rc)); return ORTE_ERR_SILENT; } } /* parse the interface, if provided */ if (NULL != interface) { if (ORTE_SUCCESS != (rc = opal_iftupletoaddr(interface, &intr, NULL))) { orte_show_help("help-rmcast-base.txt", "invalid-net-mask", true, interface, ORTE_ERROR_NAME(rc)); return ORTE_ERR_SILENT; } } /* see if this name has already been assigned a channel on the specified network */ OPAL_OUTPUT_VERBOSE((7, orte_rmcast_base.rmcast_output, "%s open_channel: searching for %s:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), name, channel)); chan = NULL; ORTE_ACQUIRE_THREAD(&orte_rmcast_base.main_ctl); for (item = opal_list_get_first(&orte_rmcast_base.channels); item != opal_list_get_end(&orte_rmcast_base.channels); item = opal_list_get_next(item)) { nchan = (rmcast_base_channel_t*)item; OPAL_OUTPUT_VERBOSE((7, orte_rmcast_base.rmcast_output, "%s open_channel: channel %s:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nchan->name, channel)); if (nchan->channel == channel || 0 == strcasecmp(nchan->name, name)) { chan = nchan; break; } } if (NULL != chan) { /* already exists - check that the requested * sockets are setup */ OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s rmcast:udp using existing channel %s:%d network %03d.%03d.%03d.%03d port %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), chan->name, chan->channel, OPAL_IF_FORMAT_ADDR(chan->network), (int)chan->port)); if (ORTE_SUCCESS != (rc = setup_channel(chan, direction))) { ORTE_ERROR_LOG(rc); ORTE_RELEASE_THREAD(&orte_rmcast_base.main_ctl); return rc; } ORTE_RELEASE_THREAD(&orte_rmcast_base.main_ctl); return ORTE_SUCCESS; } /* we didn't find an existing match, so create a new channel */ OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s creating new channel %s for %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_rmcast_base_print_channel(channel), name)); chan = OBJ_NEW(rmcast_base_channel_t); chan->name = strdup(name); chan->channel = channel; /* if we were not given a network, use the default */ if (NULL == network) { chan->network = orte_rmcast_base.xmit_network; } else { chan->network = netaddr; } /* if we were not given an interface, use the default */ if (NULL == interface) { chan->interface = orte_rmcast_base.interface; } else { chan->interface = intr; } /* if we were not given a port, use a default one */ if (port < 0) { /* cycle thru the port ranges until we find the * port corresponding to this channel number */ n=0; port_assigned = false; for (i=0; NULL != orte_rmcast_base.ports.start[i]; i++) { /* how many ports are in this range? */ start = strtol(orte_rmcast_base.ports.start[i], NULL, 10); end = strtol(orte_rmcast_base.ports.end[i], NULL, 10); range = end - start + 1; if (chan->channel < (n + range)) { /* take the corresponding port */ chan->port = start + (chan->channel - n); port_assigned = true; break; } n += range; } if (!port_assigned) { opal_output(0, "%s CANNOT ASSIGN PORT TO CHANNEL %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_rmcast_base_print_channel(chan->channel)); return ORTE_ERROR; } } else { chan->port = port; } opal_list_append(&orte_rmcast_base.channels, &chan->item); ORTE_RELEASE_THREAD(&orte_rmcast_base.main_ctl); /* if this is my input, set that value */ if (ORTE_RMCAST_MY_INPUT & direction) { orte_rmcast_base.my_input_channel = chan; } /* if this is my output, set that value */ if (ORTE_RMCAST_MY_OUTPUT & direction) { orte_rmcast_base.my_output_channel = chan; } OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s rmcast:udp opening new channel %s:%s network %03d.%03d.%03d.%03d port %d for%s%s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), chan->name, orte_rmcast_base_print_channel(chan->channel), OPAL_IF_FORMAT_ADDR(chan->network), (int)chan->port, (ORTE_RMCAST_RECV & direction) ? " RECV" : " ", (ORTE_RMCAST_XMIT & direction) ? " XMIT" : " ")); if (ORTE_SUCCESS != (rc = setup_channel(chan, direction))) { ORTE_ERROR_LOG(rc); return rc; } return ORTE_SUCCESS; }
static void tool_messages(int status, orte_process_name_t *sender, orcm_pnp_tag_t tag, struct iovec *msg, int count, opal_buffer_t *buffer, void *cbdata) { int32_t rc=ORCM_SUCCESS, n, j; orte_job_t *jdata, *jdt, *jdt2; uint16_t jfam; orcm_tool_cmd_t flag=ORCM_TOOL_ILLEGAL_CMD; opal_buffer_t *response; orcm_cfgi_caddy_t *caddy; /* wait for any existing action to complete */ ORTE_ACQUIRE_THREAD(&orcm_cfgi_base.ctl); OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output, "%s cfgi:tool released to process cmd", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* setup the response - we send it regardless so the tool won't hang */ response = OBJ_NEW(opal_buffer_t); n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &jfam, &n, OPAL_UINT16))) { ORTE_ERROR_LOG(rc); opal_dss.pack(response, &flag, 1, ORCM_TOOL_CMD_T); goto cleanup; } /* unpack the cmd */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &flag, &n, ORCM_TOOL_CMD_T))) { ORTE_ERROR_LOG(rc); opal_dss.pack(response, &flag, 1, ORCM_TOOL_CMD_T); goto cleanup; } /* return the cmd flag */ opal_dss.pack(response, &flag, 1, ORCM_TOOL_CMD_T); /* if this isn't intended for my DVM, ignore it */ if (jfam != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { opal_output(0, "%s cfgi:tool CMD NOT FOR ME!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); rc = ORTE_ERROR; goto cleanup; } if (ORCM_TOOL_START_CMD == flag) { OPAL_OUTPUT_VERBOSE((2, orcm_cfgi_base.output, "%s spawn cmd from %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender))); /* unpack the job object */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &jdata, &n, ORTE_JOB))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* check it */ if (ORCM_SUCCESS != (rc = orcm_cfgi_base_check_job(jdata))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* launch the job */ caddy = OBJ_NEW(orcm_cfgi_caddy_t); caddy->cmd = ORCM_CFGI_SPAWN; /* don't retain the jdata - the base functions will either * keep it or not */ caddy->jdata = jdata; opal_fd_write(orcm_cfgi_base.launch_pipe[1], sizeof(orcm_cfgi_caddy_t*), &caddy); } else if (ORCM_TOOL_STOP_CMD == flag) { /* unpack the job object */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &jdt, &n, ORTE_JOB))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* check for correctness */ if (NULL == jdt->instance && NULL == jdt->name) { /* had to specify one of them */ rc = ORTE_ERR_BAD_PARAM; OBJ_RELEASE(jdt); goto cleanup; } /* search for the specified job */ jdata = NULL; for (j=0; j < orte_job_data->size; j++) { if (NULL == (jdt2 = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, j))) { continue; } if (NULL != jdt2->instance && NULL != jdt->instance) { if (0 == strcmp(jdt2->instance, jdt->instance)) { jdata = jdt2; break; } continue; } if (NULL != jdt2->name && NULL != jdt->name) { if (0 == strcmp(jdt2->name, jdt->name)) { jdata = jdt2; break; } continue; } } if (NULL == jdata) { /* couldn't find the job */ rc = ORTE_ERR_BAD_PARAM; OBJ_RELEASE(jdt); goto cleanup; } /* order the termination */ caddy = OBJ_NEW(orcm_cfgi_caddy_t); caddy->cmd = ORCM_CFGI_KILL_JOB; caddy->jdata = jdata; opal_fd_write(orcm_cfgi_base.launch_pipe[1], sizeof(orcm_cfgi_caddy_t*), &caddy); /* cleanup */ OBJ_RELEASE(jdt); } else { opal_output(0, "%s: UNKNOWN TOOL CMD FLAG %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)flag); } cleanup: /* return the result of the cmd */ opal_dss.pack(response, &rc, 1, OPAL_INT); /* release the thread */ ORTE_RELEASE_THREAD(&orcm_cfgi_base.ctl); if (ORCM_SUCCESS != (rc = orcm_pnp.output_nb(ORCM_PNP_SYS_CHANNEL, sender, ORCM_PNP_TAG_TOOL, NULL, 0, response, cbfunc, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(response); } }