int orte_gpr_proxy_enter_subscription(orte_std_cntr_t cnt, orte_gpr_subscription_t **subscriptions) { orte_gpr_proxy_subscriber_t *sub; orte_std_cntr_t i; OPAL_TRACE(2); for (i=0; i < cnt; i++) { sub = OBJ_NEW(orte_gpr_proxy_subscriber_t); if (NULL == sub) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } if (NULL != subscriptions[i]->name) { sub->name = strdup(subscriptions[i]->name); } sub->callback = subscriptions[i]->cbfunc; sub->user_tag = subscriptions[i]->user_tag; if (0 > orte_pointer_array_add(&sub->index, orte_gpr_proxy_globals.subscriptions, sub)) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } sub->id = orte_gpr_proxy_globals.num_subs; subscriptions[i]->id = sub->id; (orte_gpr_proxy_globals.num_subs)++; } return ORTE_SUCCESS; }
/** * Terminate the orteds for a given job */ int orte_pls_rsh_terminate_orteds(orte_jobid_t jobid, struct timeval *timeout, opal_list_t *attrs) { int rc; opal_list_t daemons; opal_list_item_t *item; OPAL_TRACE(1); /* construct the list of active daemons on this job */ OBJ_CONSTRUCT(&daemons, opal_list_t); if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid, attrs))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } /* now tell them to die! */ if (ORTE_SUCCESS != (rc = orte_pls_base_orted_exit(&daemons, timeout))) { ORTE_ERROR_LOG(rc); } CLEANUP: while (NULL != (item = opal_list_remove_first(&daemons))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&daemons); return rc; }
int orte_pls_rsh_signal_job(orte_jobid_t jobid, int32_t signal, opal_list_t *attrs) { int rc; opal_list_t daemons; opal_list_item_t *item; OPAL_TRACE(1); /* construct the list of active daemons on this job */ OBJ_CONSTRUCT(&daemons, opal_list_t); if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid, attrs))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&daemons); return rc; } /* order them to pass this signal to their local procs */ if (ORTE_SUCCESS != (rc = orte_pls_base_orted_signal_local_procs(&daemons, signal))) { ORTE_ERROR_LOG(rc); } while (NULL != (item = opal_list_remove_first(&daemons))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&daemons); return rc; }
int orte_gpr_replica_create_container(orte_gpr_replica_container_t **cptr, orte_gpr_replica_segment_t *seg, orte_std_cntr_t num_itags, orte_gpr_replica_itag_t *itags) { int rc; orte_std_cntr_t index; OPAL_TRACE(3); *cptr = OBJ_NEW(orte_gpr_replica_container_t); if (NULL == *cptr) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } if (ORTE_SUCCESS != (rc = orte_gpr_replica_copy_itag_list(&((*cptr)->itags), itags, num_itags))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(*cptr); return rc; } (*cptr)->num_itags = num_itags; if (0 > orte_pointer_array_add(&index, seg->containers, (void*)(*cptr))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } (seg->num_containers)++; (*cptr)->index = index; return ORTE_SUCCESS; }
int orte_gpr_replica_find_containers(orte_gpr_replica_segment_t *seg, orte_gpr_replica_addr_mode_t addr_mode, orte_gpr_replica_itag_t *taglist, orte_std_cntr_t num_tags) { orte_gpr_replica_container_t **cptr; orte_std_cntr_t i, j, index; OPAL_TRACE(3); /* ensure the search array is clear */ orte_pointer_array_clear(orte_gpr_replica_globals.srch_cptr); orte_gpr_replica_globals.num_srch_cptr = 0; cptr = (orte_gpr_replica_container_t**)((seg->containers)->addr); for (i=0, j=0; j < seg->num_containers && i < (seg->containers)->size; i++) { if (NULL != cptr[i]) { j++; if (orte_gpr_replica_check_itag_list(addr_mode, num_tags, taglist, cptr[i]->num_itags, cptr[i]->itags)) { if (0 > orte_pointer_array_add(&index, orte_gpr_replica_globals.srch_cptr, cptr[i])) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); orte_pointer_array_clear(orte_gpr_replica_globals.srch_cptr); return ORTE_ERR_OUT_OF_RESOURCE; } (orte_gpr_replica_globals.num_srch_cptr)++; } } } return ORTE_SUCCESS; }
int orte_ns_replica_reserve_range(orte_jobid_t job, orte_vpid_t range, orte_vpid_t *start) { orte_ns_replica_jobitem_t *ptr; OPAL_TRACE(1); OPAL_THREAD_LOCK(&orte_ns_replica.mutex); /* find the job's record */ if (NULL == (ptr = orte_ns_replica_find_job(job))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); return ORTE_ERR_NOT_FOUND; } if ((ORTE_VPID_MAX-range-(ptr->next_vpid)) > 0) { *start = ptr->next_vpid; ptr->next_vpid += range; OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); return ORTE_SUCCESS; } /* get here if the range isn't available */ ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); return ORTE_ERR_OUT_OF_RESOURCE; }
int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *input_buffer, orte_buffer_t *answer) { orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_TRIGGERS_CMD; orte_gpr_trigger_id_t start; orte_std_cntr_t n; int rc; OPAL_TRACE(3); if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &command, 1, ORTE_GPR_CMD))) { ORTE_ERROR_LOG(rc); return rc; } n=1; if (ORTE_SUCCESS != (rc = orte_dss.unpack(input_buffer, &start, &n, ORTE_GPR_TRIGGER_ID))) { ORTE_ERROR_LOG(rc); return rc; } rc = orte_gpr_replica_dump_triggers_fn(answer, start); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); } return rc; }
int orte_gpr_replica_recv_dump_segments_cmd(orte_buffer_t *input_buffer, orte_buffer_t *answer) { orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_SEGMENTS_CMD; char *segment; orte_std_cntr_t n; int rc; OPAL_TRACE(3); if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &command, 1, ORTE_GPR_CMD))) { ORTE_ERROR_LOG(rc); return rc; } n=1; if (ORTE_SUCCESS != (rc = orte_dss.unpack(input_buffer, &segment, &n, ORTE_STRING))) { ORTE_ERROR_LOG(rc); return rc; } rc = orte_gpr_replica_dump_segments_fn(answer, segment); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); } return rc; }
int orte_gpr_replica_preallocate_segment(char *name, orte_std_cntr_t num_slots) { int rc; orte_gpr_replica_segment_t *seg=NULL; OPAL_TRACE(1); OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); /* find the segment */ if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&seg, true, name))) { OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); return rc; } if (0 < (seg->containers)->size) { /* segment already exists! */ return ORTE_ERR_BAD_PARAM; } rc = orte_pointer_array_init(&(seg->containers), num_slots, (orte_std_cntr_t)orte_gpr_array_max_size, (orte_std_cntr_t)orte_gpr_array_block_size); OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); return rc; }
int orte_errmgr_base_close(void) { OPAL_TRACE(5); /* If we have a selected component and module, then finalize it */ if (orte_errmgr_base_selected) { orte_errmgr_base_selected_component.errmgr_finalize(); } /* Close all remaining available components (may be one if this is a OMPI RTE program, or [possibly] multiple if this is ompi_info) */ mca_base_components_close(orte_errmgr_base_output, &orte_errmgr_base_components_available, NULL); orte_errmgr_initialized = false; /* set the module back to the default so that error logging can continue */ orte_errmgr = orte_errmgr_default; /* All done */ return ORTE_SUCCESS; }
int orte_pls_proxy_cancel_operation(void) { orte_buffer_t* cmd; orte_buffer_t* answer; orte_pls_cmd_flag_t command, ret_cmd; orte_std_cntr_t count; int rc; OPAL_TRACE(1); command = ORTE_PLS_CANCEL_OPERATION_CMD; cmd = OBJ_NEW(orte_buffer_t); if (cmd == NULL) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_PLS_CMD))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(cmd); return rc; } if (0 > orte_rml.send_buffer(orte_pls_proxy_replica, cmd, ORTE_RML_TAG_PLS, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_RELEASE(cmd); return ORTE_ERR_COMM_FAILURE; } OBJ_RELEASE(cmd); answer = OBJ_NEW(orte_buffer_t); if(answer == NULL) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_RELEASE(answer); return ORTE_ERR_COMM_FAILURE; } count = 1; if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &ret_cmd, &count, ORTE_PLS_CMD))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(answer); return rc; } if (ret_cmd != command) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_RELEASE(answer); return ORTE_ERR_COMM_FAILURE; } OBJ_RELEASE(answer); return ORTE_SUCCESS; }
int orte_gpr_proxy_cleanup_job(orte_jobid_t jobid) { orte_buffer_t *cmd, *answer; int rc, ret; OPAL_TRACE(1); if (orte_gpr_proxy_globals.compound_cmd_mode) { if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_cleanup_job(orte_gpr_proxy_globals.compound_cmd, jobid))) { ORTE_ERROR_LOG(rc); } return rc; } cmd = OBJ_NEW(orte_buffer_t); if (NULL == cmd) { /* got a problem */ ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_cleanup_job(cmd, jobid))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(cmd); return rc; } if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_RELEASE(cmd); return ORTE_ERR_COMM_FAILURE; } OBJ_RELEASE(cmd); answer = OBJ_NEW(orte_buffer_t); if (NULL == answer) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_RELEASE(answer); return ORTE_ERR_COMM_FAILURE; } if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_cleanup_job(answer, &ret))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(answer); return rc; } OBJ_RELEASE(answer); return ret; }
/** * Cancel an operation involving comm to an orted */ int orte_pls_rsh_cancel_operation(void) { int rc; OPAL_TRACE(1); if (ORTE_SUCCESS != (rc = orte_pls_base_orted_cancel_operation())) { ORTE_ERROR_LOG(rc); } return rc; }
void orte_errmgr_base_log(int error_code, char *filename, int line) { OPAL_TRACE(1); if (ORTE_ERR_SILENT == error_code) { /* if the error is silent, say nothing */ return; } opal_output(0, "%s ORTE_ERROR_LOG: %s in file %s at line %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_ERROR_NAME(error_code), filename, line); }
/* * NOTIFY ACTION */ int orte_gpr_base_unpack_notify_action(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, orte_data_type_t type) { int rc; OPAL_TRACE(4); if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_GPR_NOTIFY_ACTION_T))) { ORTE_ERROR_LOG(rc); } return rc; }
/* * TRIGGER ID */ int orte_gpr_base_unpack_trigger_id(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, orte_data_type_t type) { int rc; OPAL_TRACE(4); if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_GPR_TRIGGER_ID_T))) { ORTE_ERROR_LOG(rc); } return rc; }
/* * ADDR MODE */ int orte_gpr_base_unpack_addr_mode(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, orte_data_type_t type) { int rc; OPAL_TRACE(4); if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_GPR_ADDR_MODE_T))) { ORTE_ERROR_LOG(rc); } return rc; }
int orte_gpr_replica_release_segment(orte_gpr_replica_segment_t **seg) { int rc; orte_std_cntr_t i; OPAL_TRACE(3); i = (*seg)->itag; OBJ_RELEASE(*seg); if (0 > (rc = orte_pointer_array_set_item(orte_gpr_replica.segments, i, NULL))) { return rc; } (orte_gpr_replica.num_segs)--; return ORTE_SUCCESS; }
/* * Public interfaces */ void orte_errmgr_base_log(int error_code, char *filename, int line) { char *errstring = NULL; OPAL_TRACE(1); errstring = (char*)ORTE_ERROR_NAME(error_code); if (NULL == errstring) { /* if the error is silent, say nothing */ return; } opal_output(0, "%s ORTE_ERROR_LOG: %s in file %s at line %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), errstring, filename, line); }
int orte_gpr_replica_delete_itagval(orte_gpr_replica_segment_t *seg, orte_gpr_replica_container_t *cptr, orte_gpr_replica_itagval_t *iptr) { orte_std_cntr_t i; int rc; OPAL_TRACE(3); /* record that we are going to do this * NOTE: it is important that we make the record BEFORE doing the release. * The record_action function will do a RETAIN on the object so it * doesn't actually get released until we check subscriptions to see * if someone wanted to be notified if/when this object was released */ if (ORTE_SUCCESS != (rc = orte_gpr_replica_record_action(seg, cptr, iptr, ORTE_GPR_REPLICA_ENTRY_DELETED))) { ORTE_ERROR_LOG(rc); return rc; } /* remove the itag value from the container's list */ for (i=0; i < orte_value_array_get_size(&(cptr->itaglist)); i++) { if (iptr->itag == ORTE_VALUE_ARRAY_GET_ITEM(&(cptr->itaglist), orte_gpr_replica_itag_t, i)) { orte_value_array_remove_item(&(cptr->itaglist), i); goto MOVEON; } } ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; MOVEON: /* release the data storage */ i = iptr->index; OBJ_RELEASE(iptr); /* remove the entry from the container's itagval array */ orte_pointer_array_set_item(cptr->itagvals, i, NULL); (cptr->num_itagvals)--; /* NOTE: If the container is now empty, *don't* remove it here * This is cause improper recursion if called from orte_gpr_replica_release_container */ return ORTE_SUCCESS; }
int orte_gpr_replica_purge_itag(orte_gpr_replica_segment_t *seg, orte_gpr_replica_itag_t itag) { OPAL_TRACE(3); /* * Begin by looping through the segment's containers and check * their descriptions first - if removing this name leaves that * list empty, then remove the container. * If the container isn't to be removed, then loop through all * the container's keyvalue pairs and check the "key" - if * it matches, then remove that pair. If all pairs are removed, * then remove the container * */ return ORTE_SUCCESS; }
int orte_gpr_proxy_remove_trigger(orte_gpr_proxy_trigger_t *trig) { orte_std_cntr_t index; OPAL_TRACE(2); if (NULL == trig) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } index = trig->index; OBJ_RELEASE(trig); orte_pointer_array_set_item(orte_gpr_proxy_globals.triggers, index, NULL); return ORTE_SUCCESS; }
int orte_gpr_proxy_remove_subscription(orte_gpr_proxy_subscriber_t *sub) { orte_std_cntr_t index; OPAL_TRACE(2); if (NULL == sub) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); return ORTE_ERR_BAD_PARAM; } index = sub->index; OBJ_RELEASE(sub); orte_pointer_array_set_item(orte_gpr_proxy_globals.subscriptions, index, NULL); return ORTE_SUCCESS; }
int orte_gpr_replica_recv_dump_all_cmd(orte_buffer_t *answer) { orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_ALL_CMD; int rc; OPAL_TRACE(3); if (ORTE_SUCCESS != (rc = orte_dss.pack(answer, &command, 1, ORTE_GPR_CMD))) { ORTE_ERROR_LOG(rc); return rc; } rc = orte_gpr_replica_dump_all_fn(answer); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); } return rc; }
int orte_gpr_replica_search_container(orte_gpr_replica_addr_mode_t addr_mode, orte_gpr_replica_itag_t *itags, orte_std_cntr_t num_itags, orte_gpr_replica_container_t *cptr) { orte_gpr_replica_itagval_t **ptr; orte_std_cntr_t i, j, index; OPAL_TRACE(3); /* ensure the search array is clear */ orte_pointer_array_clear(orte_gpr_replica_globals.srch_ival); orte_gpr_replica_globals.num_srch_ival = 0; /* check list of itags in container to see if there is a match according * to addr_mode spec */ if (orte_gpr_replica_check_itag_list(addr_mode, num_itags, itags, orte_value_array_get_size(&(cptr->itaglist)), ORTE_VALUE_ARRAY_GET_BASE(&(cptr->itaglist), orte_gpr_replica_itag_t))) { /* there is! so now collect those values into the search array */ ptr = (orte_gpr_replica_itagval_t**)((cptr->itagvals)->addr); for (i=0, j=0; j < cptr->num_itagvals && i < (cptr->itagvals)->size; i++) { if (NULL != ptr[i]) { j++; if (orte_gpr_replica_check_itag_list(ORTE_GPR_REPLICA_OR, num_itags, itags, 1, &(ptr[i]->itag))) { if (0 > orte_pointer_array_add(&index, orte_gpr_replica_globals.srch_ival, ptr[i])) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); orte_pointer_array_clear(orte_gpr_replica_globals.srch_ival); return ORTE_ERR_OUT_OF_RESOURCE; } (orte_gpr_replica_globals.num_srch_ival)++; } } } } return ORTE_SUCCESS; }
int orte_gpr_base_close(void) { OPAL_TRACE(5); /* If we have a selected component and module, then finalize it */ if (orte_gpr_base_selected) { orte_gpr_base_selected_component.gpr_finalize(); } /* Close all remaining available components (may be one if this is a OMPI RTE program, or [possibly] multiple if this is ompi_info) */ mca_base_components_close(orte_gpr_base_output, &orte_gpr_base_components_available, NULL); /* All done */ return ORTE_SUCCESS; }
/* * KEYVAL */ int orte_gpr_base_unpack_keyval(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, orte_data_type_t type) { int rc; orte_gpr_keyval_t **keyval; orte_std_cntr_t i, max_n; OPAL_TRACE(4); /* unpack into an array of keyval objects */ keyval = (orte_gpr_keyval_t**) dest; for (i=0; i < *num_vals; i++) { /* allocate the memory storage */ keyval[i] = OBJ_NEW(orte_gpr_keyval_t); if (NULL == keyval[i]) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } /* unpack the key */ max_n=1; if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(keyval[i]->key), &max_n, ORTE_STRING))) { ORTE_ERROR_LOG(rc); return rc; } /* unpack the data value */ max_n=1; if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, &(keyval[i]->value), &max_n, ORTE_DATA_VALUE))) { ORTE_ERROR_LOG(rc); return rc; } } return ORTE_SUCCESS; }
int orte_gpr_replica_release_container(orte_gpr_replica_segment_t *seg, orte_gpr_replica_container_t *cptr) { orte_gpr_replica_itagval_t **iptr; orte_std_cntr_t i; int rc; OPAL_TRACE(3); /* delete all the itagvals in the container */ iptr = (orte_gpr_replica_itagval_t**)((cptr->itagvals)->addr); for (i=0; i < (cptr->itagvals)->size; i++) { if (NULL != iptr[i]) { if (ORTE_SUCCESS != (rc = orte_gpr_replica_delete_itagval(seg, cptr, iptr[i]))) { ORTE_ERROR_LOG(rc); return rc; } } } /* remove container from segment and release it */ i = cptr->index; OBJ_RELEASE(cptr); orte_pointer_array_set_item(seg->containers, i, NULL); (seg->num_containers)--; /* if the segment is now empty of containers, release it too */ if (0 == seg->num_containers) { if (ORTE_SUCCESS != (rc = orte_gpr_replica_release_segment(&seg))) { ORTE_ERROR_LOG(rc); return rc; } } return ORTE_SUCCESS; }
/* * This function gets called by the PLM when an orted notifies us that * a job failed to start. * Various components will follow their own strategy for dealing with * this situation. For this component, we simply kill the job. */ void orte_errmgr_default_incomplete_start(orte_jobid_t job, int exit_code) { int rc; OPAL_TRACE(1); /* if we are already in progress, then ignore this call */ if (!opal_atomic_trylock(&orte_abort_inprogress_lock)) { /* returns 1 if already locked */ OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output, "%s errmgr:default: abort in progress, ignoring incomplete start on job %s with status %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), exit_code)); return; } OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_output, "%s errmgr:default: job %s reported incomplete start with status %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(job), exit_code)); orte_job_term_ordered = true; /* tell the plm to terminate all jobs */ if (ORTE_SUCCESS != (rc = orte_plm.terminate_job(ORTE_JOBID_WILDCARD))) { ORTE_ERROR_LOG(rc); } /* set the exit status, just in case whomever called us failed * to do so - it can only be done once, so we are protected * from overwriting it */ ORTE_UPDATE_EXIT_STATUS(exit_code); /* wakeup orterun so we can exit */ orte_trigger_event(&orte_exit); }
int orte_odls_base_close(void) { OPAL_TRACE(5); /* if no components are available, then punt */ if (!orte_odls_base.components_available) { return ORTE_SUCCESS; } /* If we have a selected component and module, then finalize it */ if (orte_odls_base.selected) { orte_odls_base.selected_component.finalize(); } /* Close all available components (only one in this case) */ mca_base_components_close(orte_odls_globals.output, &orte_odls_base.available_components, NULL); /* All done */ return ORTE_SUCCESS; }