예제 #1
0
파일: attribute.c 프로젝트: ICLDisco/ompi
/*
 * Front-end function to delete all the attributes on an MPI object
 */
int ompi_attr_delete_all(ompi_attribute_type_t type, void *object,
                         opal_hash_table_t *attr_hash)
{
    int ret, i, num_attrs;
    uint32_t key;
    void *node, *in_node, *attr;
    attribute_value_t **attrs;

    /* Ensure that the table is not empty */

    if (NULL == attr_hash) {
        return MPI_SUCCESS;
    }

    OPAL_THREAD_LOCK(&attribute_lock);

    /* Make an array that contains all attributes in local object's hash */
    num_attrs = opal_hash_table_get_size(attr_hash);
    if (0 == num_attrs) {
        OPAL_THREAD_UNLOCK(&attribute_lock);
        return MPI_SUCCESS;
    }

    attrs = malloc(sizeof(attribute_value_t *) * num_attrs);
    if (NULL == attrs) {
        OPAL_THREAD_UNLOCK(&attribute_lock);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    ret = opal_hash_table_get_first_key_uint32(attr_hash, &key, &attr, &node);
    for (i = 0; OMPI_SUCCESS == ret; i++) {
        attrs[i] = attr;
        in_node = node;
        ret = opal_hash_table_get_next_key_uint32(attr_hash, &key, &attr,
                                                  in_node, &node);
    }

    /* Sort attributes in the order that they were set */
    qsort(attrs, num_attrs, sizeof(attribute_value_t *), compare_attr_sequence);

    /* Delete attributes in the reverse order that they were set.
       Actually this ordering is required only for MPI_COMM_SELF, as
       specified in MPI-2.2: 8.7.1 Allowing User Functions at Process
       Termination, but we do it for everything -- what the heck.
       :-) */
    for (i = num_attrs - 1; i >= 0; i--) {
        ret = ompi_attr_delete_impl(type, object, attr_hash,
                                    attrs[i]->av_key, true);
        if (OMPI_SUCCESS != ret) {
            break;
        }
    }

    /* All done */

    free(attrs);
    opal_atomic_wmb();
    OPAL_THREAD_UNLOCK(&attribute_lock);
    return ret;
}
예제 #2
0
int ompi_osc_rdma_flush_all (struct ompi_win_t *win)
{
    ompi_osc_rdma_module_t *module = GET_MODULE(win);
    ompi_osc_rdma_sync_t *lock;
    int ret = OMPI_SUCCESS;
    uint32_t key;
    void *node;

    /* flush is only allowed from within a passive target epoch */
    if (!ompi_osc_rdma_in_passive_epoch (module)) {
        return OMPI_ERR_RMA_SYNC;
    }

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush_all: %s", win->w_name);

    /* globally complete all outstanding rdma requests */
    if (OMPI_OSC_RDMA_SYNC_TYPE_LOCK == module->all_sync.type) {
        ompi_osc_rdma_sync_rdma_complete (&module->all_sync);
    }

    /* flush all locks */
    ret = opal_hash_table_get_first_key_uint32 (&module->outstanding_locks, &key, (void **) &lock, &node);
    while (OPAL_SUCCESS == ret) {
        OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "flushing lock %p", (void *) lock);
        ompi_osc_rdma_sync_rdma_complete (lock);
        ret = opal_hash_table_get_next_key_uint32 (&module->outstanding_locks, &key, (void **) &lock,
                                                   node, &node);
    }

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush_all complete");

    return OPAL_SUCCESS;
}
예제 #3
0
파일: attribute.c 프로젝트: bringhurst/ompi
/*
 * Delete all the attributes on an MPI object
 */
int ompi_attr_delete_all(ompi_attribute_type_t type, void *object, 
                         opal_hash_table_t *attr_hash)
{
    int key_ret, del_ret;
    uint32_t key, oldkey;
    void *node, *in_node, *old_attr;

    /* Ensure that the table is not empty */

    if (NULL == attr_hash) {
        return MPI_SUCCESS;
    }
        
    /* Lock this whole sequence of events -- don't let any other
       thread modify the structure of the attribute hash or bitmap
       while we're traversing it */

    OPAL_THREAD_LOCK(&attr_hash_lock);
    /* Get the first key in local object's hash  */
    key_ret = opal_hash_table_get_first_key_uint32(attr_hash,
                                               &key, &old_attr,
                                               &node);
    OPAL_THREAD_UNLOCK(&attr_hash_lock);

    del_ret = OMPI_SUCCESS;
    while (OMPI_SUCCESS == key_ret && OMPI_SUCCESS == del_ret) {

        /* Save this node info for deletion, before we move onto the
           next node */

        in_node = node;
        oldkey = key;
        
        /* Move to the next node */

	OPAL_THREAD_LOCK(&attr_hash_lock);
        key_ret = opal_hash_table_get_next_key_uint32(attr_hash,
                                                      &key, &old_attr, 
                                                      in_node, &node);
	OPAL_THREAD_UNLOCK(&attr_hash_lock);

        /* Now delete this attribute */

        del_ret = ompi_attr_delete(type, object, attr_hash, oldkey, true);
    }

    /* All done */

    return del_ret;
}
예제 #4
0
/** 
 * Function to pack all the entries in the SOS table and send it
 * over to the HNP.
 *
 * @return OPAL_SUCCESS Upon success
 * @return OPAL_FAILURE Upon failure
 *
 * ADK: Presently, we simply rely on orte_show_help to do the aggregation on
 * a per-error basis.
 */
static int opal_sos_send_table(void)
{
    opal_sos_error_t *opal_error;
    opal_buffer_t *buf;
    uint32_t key;
    int rc;
    size_t table_size;
    void *prev_error, *next_error;
    next_error = NULL;

    buf = OBJ_NEW(opal_buffer_t);
    if (NULL == buf) {
        return ORTE_ERR_OUT_OF_RESOURCE;
    }

    OPAL_THREAD_LOCK(&opal_sos_table_lock);
    table_size = opal_hash_table_get_size(&opal_sos_table);

    /* Pack the size of the SOS error table */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &table_size, 1, OPAL_SIZE))) {
        ORTE_ERROR_LOG(rc);
        goto error;
    }

    if (OPAL_SUCCESS != opal_hash_table_get_first_key_uint32(&opal_sos_table,
                                                             &key, (void**)&opal_error,
                                                             &prev_error)) {
        rc = ORTE_ERROR;
        goto error;
    }

    /* Pack the sos error object */
    if (ORTE_SUCCESS != (rc = opal_dss_pack_sos_error(buf, opal_error))) {
        ORTE_ERROR_LOG(rc);
        goto error;
    }

    while (OPAL_SUCCESS == opal_hash_table_get_next_key_uint32(&opal_sos_table,
                                                               &key, (void**)&opal_error,
                                                               &prev_error, &next_error))
    {
        if (ORTE_SUCCESS != (rc = opal_dss_pack_sos_error(buf, opal_error))) {
            ORTE_ERROR_LOG(rc);
            goto error;
        }
    }
    OPAL_THREAD_UNLOCK(&opal_sos_table_lock);

    /* Now send the buffer (rc = number of bytes sent) */
    rc = orte_rml.send_buffer(ORTE_PROC_MY_HNP, buf,
                              ORTE_RML_TAG_NOTIFIER_HNP, 0);
    if (rc <= 0) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(buf);
        return rc;
    }

    return ORTE_SUCCESS;

error:
    OPAL_THREAD_UNLOCK(&opal_sos_table_lock);
    OBJ_RELEASE(buf);
    return rc;
}
예제 #5
0
파일: attribute.c 프로젝트: ICLDisco/ompi
/*
 * Copy all the attributes from one MPI object to another.  Called
 * when MPI objects are copied (e.g., back-end actions to
 * MPI_COMM_DUP).
 */
int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object,
                       void *new_object, opal_hash_table_t *oldattr_hash,
                       opal_hash_table_t *newattr_hash)
{
    int ret;
    int err;
    uint32_t key;
    int flag;
    void *node, *in_node;
    attribute_value_t *old_attr, *new_attr;
    ompi_attribute_keyval_t *hash_value;

    /* If there's nothing to do, just return */
    if (NULL == oldattr_hash) {
        return MPI_SUCCESS;
    }

    OPAL_THREAD_LOCK(&attribute_lock);

    /* Get the first attribute in the object's hash */
    ret = opal_hash_table_get_first_key_uint32(oldattr_hash, &key,
                                               (void **) &old_attr,
                                               &node);

    /* While we still have some attribute in the object's key hash */
    while (OMPI_SUCCESS == ret) {
        in_node = node;

        /* Get the keyval in the main keyval hash - so that we know
           what the copy_attr_fn is */
        err = opal_hash_table_get_value_uint32(keyval_hash, key,
                                               (void **) &hash_value);
        if (OMPI_SUCCESS != err) {
            /* This should not happen! */
            ret = MPI_ERR_INTERN;
            goto out;
        }

        err = 0;
        new_attr = OBJ_NEW(attribute_value_t);
        switch (type) {
        case COMM_ATTR:
            /* Now call the copy_attr_fn */
            COPY_ATTR_CALLBACKS(communicator, old_object, hash_value,
                                old_attr, new_object, new_attr, err);
            break;

        case TYPE_ATTR:
            /* Now call the copy_attr_fn */
            COPY_ATTR_CALLBACKS(datatype, old_object, hash_value,
                                old_attr, new_object, new_attr, err);
            break;

        case WIN_ATTR:
            /* Now call the copy_attr_fn */
            COPY_ATTR_CALLBACKS(win, old_object, hash_value,
                                old_attr, new_object, new_attr, err);
            break;

        default:
            /* This should not happen */
            assert(0);
            break;
        }
        /* Did the callback return non-MPI_SUCCESS? */
        if (0 != err) {
            ret = err;
            goto out;
        }

        /* Hang this off the object's hash */

        /* The COPY_ATTR_CALLBACKS macro will have converted the
           _flag_ callback output value from Fortran's .TRUE. value to
           0/1 (if necessary).  So we only need to check for 0/1 here
           -- not .TRUE. */
        if (1 == flag) {
            if (0 != (hash_value->attr_flag & OMPI_KEYVAL_F77)) {
                if (0 != (hash_value->attr_flag & OMPI_KEYVAL_F77_INT)) {
                    new_attr->av_set_from = OMPI_ATTRIBUTE_FINT;
                } else {
                    new_attr->av_set_from = OMPI_ATTRIBUTE_AINT;
                }
            } else {
                new_attr->av_set_from = OMPI_ATTRIBUTE_C;
            }
            ret = set_value(type, new_object, &newattr_hash, key,
                            new_attr, true);
            if (MPI_SUCCESS != ret) {
                goto out;
            }
        } else {
            OBJ_RELEASE(new_attr);
        }

        ret = opal_hash_table_get_next_key_uint32(oldattr_hash, &key,
                                                  (void **) &old_attr,
                                                  in_node, &node);
    }
    ret = MPI_SUCCESS;

 out:
    /* All done */
    opal_atomic_wmb();
    OPAL_THREAD_UNLOCK(&attribute_lock);
    return ret;
}
예제 #6
0
파일: attribute.c 프로젝트: bringhurst/ompi
/*
 * Copy all the attributes from one MPI object to another
 */
int ompi_attr_copy_all(ompi_attribute_type_t type, void *old_object, 
                       void *new_object, opal_hash_table_t *oldattr_hash,
                       opal_hash_table_t *newattr_hash)
{
    int ret;
    int err;
    uint32_t key;
    int flag;
    void *node, *in_node;
    attribute_value_t *old_attr, *new_attr;
    ompi_attribute_keyval_t *hash_value;

    /* If there's nothing to do, just return */

    if (NULL == oldattr_hash) {
        return MPI_SUCCESS;
    }

    /* Lock this whole sequence of events -- don't let any other
       thread modify the structure of the attrbitue hash or bitmap
       while we're traversing it */

    OPAL_THREAD_LOCK(&attr_hash_lock);
    /* Get the first attribute in the object's hash */
    ret = opal_hash_table_get_first_key_uint32(oldattr_hash, &key, 
                                               (void **) &old_attr,
                                               &node);
    OPAL_THREAD_UNLOCK(&attr_hash_lock);

    /* While we still have some attribute in the object's key hash */
    while (OMPI_SUCCESS == ret) {
        in_node = node;

        /* Get the keyval in the main keyval hash - so that we know
           what the copy_attr_fn is */

	OPAL_THREAD_LOCK(&keyval_hash_lock);
        err = opal_hash_table_get_value_uint32(keyval_hash, key, 
                                               (void **) &hash_value);
	OPAL_THREAD_UNLOCK(&keyval_hash_lock);

        new_attr = OBJ_NEW(attribute_value_t);
        switch (type) {
        case UNUSED_ATTR:  /* keep the compiler happy */
            assert(0);
            break;
        case COMM_ATTR:
            /* Now call the copy_attr_fn */
            COPY_ATTR_CALLBACKS(communicator, old_object, hash_value, 
                                old_attr, new_object, new_attr);
            break;
            
        case TYPE_ATTR:
            /* Now call the copy_attr_fn */
            COPY_ATTR_CALLBACKS(datatype, old_object, hash_value, 
                                old_attr, new_object, new_attr);
            break;

        case WIN_ATTR:
            /* Now call the copy_attr_fn */
            COPY_ATTR_CALLBACKS(win, old_object, hash_value, 
                                old_attr, new_object, new_attr);
            break;
        }

        /* Hang this off the object's hash */
            
        /* The "predefined" parameter to ompi_attr_set() is set to 1,
           so that no comparison is done for prdefined at all and it
           just falls off the error checking loop in attr_set  */

        if (1 == flag) {
            if (0 != (hash_value->attr_flag & OMPI_KEYVAL_F77)) {
                if (0 != (hash_value->attr_flag & OMPI_KEYVAL_F77_MPI1)) {
                    new_attr->av_set_from = OMPI_ATTRIBUTE_FORTRAN_MPI1;
                } else {
                    new_attr->av_set_from = OMPI_ATTRIBUTE_FORTRAN_MPI2;
                }
            } else {
                new_attr->av_set_from = OMPI_ATTRIBUTE_C;
            }
            set_value(type, new_object, &newattr_hash, key, 
                      new_attr, true);

        } else {
            OBJ_RELEASE(new_attr);
        }

	OPAL_THREAD_LOCK(&attr_hash_lock);
        ret = opal_hash_table_get_next_key_uint32(oldattr_hash, &key, 
                                                  (void **) &old_attr, 
                                                  in_node, &node);
	OPAL_THREAD_UNLOCK(&attr_hash_lock);
    }

    /* All done */

    return MPI_SUCCESS;
}
예제 #7
0
int ompi_osc_pt2pt_free(ompi_win_t *win)
{
    int ret = OMPI_SUCCESS;
    ompi_osc_pt2pt_module_t *module = GET_MODULE(win);
    ompi_osc_pt2pt_peer_t *peer;
    uint32_t key;
    void *node;

    if (NULL == module) {
        return OMPI_SUCCESS;
    }

    if (NULL != module->comm) {
        opal_output_verbose(1, ompi_osc_base_framework.framework_output,
                            "pt2pt component destroying window with id %d",
                            ompi_comm_get_cid(module->comm));

        /* finish with a barrier */
        if (ompi_group_size(win->w_group) > 1) {
            (void) module->comm->c_coll.coll_barrier (module->comm,
                                                      module->comm->c_coll.coll_barrier_module);
        }

        /* remove from component information */
        OPAL_THREAD_SCOPED_LOCK(&mca_osc_pt2pt_component.lock,
                                opal_hash_table_remove_value_uint32(&mca_osc_pt2pt_component.modules,
                                                                    ompi_comm_get_cid(module->comm)));
    }

    win->w_osc_module = NULL;

    OBJ_DESTRUCT(&module->outstanding_locks);
    OBJ_DESTRUCT(&module->locks_pending);
    OBJ_DESTRUCT(&module->locks_pending_lock);
    OBJ_DESTRUCT(&module->cond);
    OBJ_DESTRUCT(&module->lock);
    OBJ_DESTRUCT(&module->all_sync);

    /* it is erroneous to close a window with active operations on it so we should
     * probably produce an error here instead of cleaning up */
    OPAL_LIST_DESTRUCT(&module->pending_acc);

    osc_pt2pt_gc_clean (module);
    OPAL_LIST_DESTRUCT(&module->request_gc);
    OPAL_LIST_DESTRUCT(&module->buffer_gc);
    OBJ_DESTRUCT(&module->gc_lock);

    ret = opal_hash_table_get_first_key_uint32 (&module->peer_hash, &key, (void **) &peer, &node);
    while (OPAL_SUCCESS == ret) {
        OBJ_RELEASE(peer);
        ret = opal_hash_table_get_next_key_uint32 (&module->peer_hash, &key, (void **) &peer, node,
                                                   &node);
    }

    OBJ_DESTRUCT(&module->peer_hash);
    OBJ_DESTRUCT(&module->peer_lock);

    if (NULL != module->epoch_outgoing_frag_count) free(module->epoch_outgoing_frag_count);

    if (NULL != module->frag_request && MPI_REQUEST_NULL != module->frag_request) {
        module->frag_request->req_complete_cb = NULL;
        ompi_request_cancel (module->frag_request);
        ompi_request_free (&module->frag_request);
    }

    if (NULL != module->comm) {
        ompi_comm_free(&module->comm);
    }
    if (NULL != module->incoming_buffer) free (module->incoming_buffer);
    if (NULL != module->free_after) free(module->free_after);

    free (module);

    return OMPI_SUCCESS;
}
예제 #8
0
void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
{
    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
    orte_job_t *jdata = caddy->jdata;

    orte_proc_t *proc;
    int i;
    orte_std_cntr_t j;
    orte_job_t *job;
    orte_node_t *node;
    orte_job_map_t *map;
    orte_std_cntr_t index;
    bool one_still_alive;
    orte_vpid_t lowest=0;
    int32_t i32, *i32ptr;
    uint32_t u32;
    void *nptr;
    char *rtmod;

    opal_output_verbose(2, orte_state_base_framework.framework_output,
                        "%s state:base:check_job_complete on job %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid));

    /* get our "lifeline" routed module */
    rtmod = orte_rml.get_routed(orte_mgmt_conduit);


    if (NULL == jdata || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
        /* just check to see if the daemons are complete */
        OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                             "%s state:base:check_job_complete - received NULL job, checking daemons",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        goto CHECK_DAEMONS;
    } else {
        /* mark the job as terminated, but don't override any
         * abnormal termination flags
         */
        if (jdata->state < ORTE_JOB_STATE_UNTERMINATED) {
            jdata->state = ORTE_JOB_STATE_TERMINATED;
        }
    }

    /* tell the IOF that the job is complete */
    if (NULL != orte_iof.complete) {
        orte_iof.complete(jdata);
    }

    /* tell the PMIx server to release its data */
    if (NULL != opal_pmix.server_deregister_nspace) {
        opal_pmix.server_deregister_nspace(jdata->jobid, NULL, NULL);
    }

    i32ptr = &i32;
    if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32) && !orte_abort_non_zero_exit) {
        if (!orte_report_child_jobs_separately || 1 == ORTE_LOCAL_JOBID(jdata->jobid)) {
            /* update the exit code */
            ORTE_UPDATE_EXIT_STATUS(lowest);
        }

        /* warn user */
        opal_output(orte_clean_output,
                    "-------------------------------------------------------\n"
                    "While %s job %s terminated normally, %d %s. Further examination may be required.\n"
                    "-------------------------------------------------------",
                    (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "the primary" : "child",
                    (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
                    i32, (1 == i32) ? "process returned\na non-zero exit code." :
                    "processes returned\nnon-zero exit codes.");
    }

    OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                         "%s state:base:check_job_completed declared job %s terminated with state %s - checking all jobs",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jdata->jobid),
                         orte_job_state_to_str(jdata->state)));

    /* if this job is a continuously operating one, then don't do
     * anything further - just return here
     */
    if (NULL != jdata &&
            (orte_get_attribute(&jdata->attributes, ORTE_JOB_CONTINUOUS_OP, NULL, OPAL_BOOL) ||
             ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RECOVERABLE))) {
        goto CHECK_ALIVE;
    }

    /* if the job that is being checked is the HNP, then we are
     * trying to terminate the orteds. In that situation, we
     * do -not- check all jobs - we simply notify the HNP
     * that the orteds are complete. Also check special case
     * if jdata is NULL - we want
     * to definitely declare the job done if the orteds
     * have completed, no matter what else may be happening.
     * This can happen if a ctrl-c hits in the "wrong" place
     * while launching
     */
CHECK_DAEMONS:
    if (jdata == NULL || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
        if (0 == orte_routed.num_routes(rtmod)) {
            /* orteds are done! */
            OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                 "%s orteds complete - exiting",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            if (NULL == jdata) {
                jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
            }
            ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED);
            OBJ_RELEASE(caddy);
            return;
        }
        OBJ_RELEASE(caddy);
        return;
    }

    /* Release the resources used by this job. Since some errmgrs may want
     * to continue using resources allocated to the job as part of their
     * fault recovery procedure, we only do this once the job is "complete".
     * Note that an aborted/killed job -is- flagged as complete and will
     * therefore have its resources released. We need to do this after
     * we call the errmgr so that any attempt to restart the job will
     * avoid doing so in the exact same place as the current job
     */
    if (NULL != jdata->map && jdata->state == ORTE_JOB_STATE_TERMINATED) {
        map = jdata->map;
        for (index = 0; index < map->nodes->size; index++) {
            if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, index))) {
                continue;
            }
            OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                 "%s releasing procs for job %s from node %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_JOBID_PRINT(jdata->jobid), node->name));
            for (i = 0; i < node->procs->size; i++) {
                if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
                    continue;
                }
                if (proc->name.jobid != jdata->jobid) {
                    /* skip procs from another job */
                    continue;
                }
                node->slots_inuse--;
                node->num_procs--;
                OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                     "%s releasing proc %s from node %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     ORTE_NAME_PRINT(&proc->name), node->name));
                /* set the entry in the node array to NULL */
                opal_pointer_array_set_item(node->procs, i, NULL);
                /* release the proc once for the map entry */
                OBJ_RELEASE(proc);
            }
            /* set the node location to NULL */
            opal_pointer_array_set_item(map->nodes, index, NULL);
            /* maintain accounting */
            OBJ_RELEASE(node);
            /* flag that the node is no longer in a map */
            ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
        }
        OBJ_RELEASE(map);
        jdata->map = NULL;
    }

CHECK_ALIVE:
    /* now check to see if all jobs are done - trigger notification of this jdata
     * object when we find it
     */
    one_still_alive = false;
    j = opal_hash_table_get_first_key_uint32(orte_job_data, &u32, (void **)&job, &nptr);
    while (OPAL_SUCCESS == j) {
        /* skip the daemon job */
        if (job->jobid == ORTE_PROC_MY_NAME->jobid) {
            goto next;
        }
        /* if this is the job we are checking AND it normally terminated,
         * then activate the "notify_completed" state - this will release
         * the job state, but is provided so that the HNP main code can
         * take alternative actions if desired. If the state is killed_by_cmd,
         * then go ahead and release it. We cannot release it if it
         * abnormally terminated as mpirun needs the info so it can
         * report appropriately to the user
         *
         * NOTE: do not release the primary job (j=1) so we
         * can pretty-print completion message
         */
        if (NULL != jdata && job->jobid == jdata->jobid) {
            if (jdata->state == ORTE_JOB_STATE_TERMINATED) {
                OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                     "%s state:base:check_job_completed state is terminated - activating notify",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_NOTIFY_COMPLETED);
                one_still_alive = true;
            } else if (jdata->state == ORTE_JOB_STATE_KILLED_BY_CMD ||
                       jdata->state == ORTE_JOB_STATE_NOTIFIED) {
                OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                     "%s state:base:check_job_completed state is killed or notified - cleaning up",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                /* release this object, ensuring that the
                 * pointer array internal accounting
                 * is maintained!
                 */
                if (1 < j) {
                    if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
                        /* this was a debugger daemon. notify that a debugger has detached */
                        ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DEBUGGER_DETACH);
                    }
                    OBJ_RELEASE(jdata);
                }
            }
            goto next;
        }
        /* if the job is flagged to not be monitored, skip it */
        if (ORTE_FLAG_TEST(job, ORTE_JOB_FLAG_DO_NOT_MONITOR)) {
            goto next;
        }
        /* when checking for job termination, we must be sure to NOT check
         * our own job as it - rather obviously - has NOT terminated!
         */
        if (ORTE_JOB_STATE_NOTIFIED != job->state) {
            /* we have at least one job that is not done yet - we cannot
             * just return, though, as we need to ensure we cleanout the
             * job data for the job that just completed
             */
            OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                 "%s state:base:check_job_completed job %s is not terminated (%d:%d)",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_JOBID_PRINT(job->jobid),
                                 job->num_terminated, job->num_procs));
            one_still_alive = true;
        }
        else {
            OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                 "%s state:base:check_job_completed job %s is terminated (%d vs %d [%s])",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_JOBID_PRINT(job->jobid),
                                 job->num_terminated, job->num_procs,
                                 (NULL == jdata) ? "UNKNOWN" : orte_job_state_to_str(jdata->state) ));
        }
next:
        j = opal_hash_table_get_next_key_uint32(orte_job_data, &u32, (void **)&job, nptr, &nptr);
    }

    /* if a job is still alive, we just return */
    if (one_still_alive) {
        OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                             "%s state:base:check_job_completed at least one job is not terminated",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        OBJ_RELEASE(caddy);
        return;
    }
    /* if we get here, then all jobs are done, so terminate */
    OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                         "%s state:base:check_job_completed all jobs terminated",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    /* stop the job timeout event, if set */
    if (NULL != orte_mpiexec_timeout) {
        OBJ_RELEASE(orte_mpiexec_timeout);
        orte_mpiexec_timeout = NULL;
    }

    /* set the exit status to 0 - this will only happen if it
     * wasn't already set by an error condition
     */
    ORTE_UPDATE_EXIT_STATUS(0);

    /* order daemon termination - this tells us to cleanup
     * our local procs as well as telling remote daemons
     * to die
     */
    orte_plm.terminate_orteds();

    OBJ_RELEASE(caddy);
}