Example #1
0
int orte_util_add_hostfile_nodes(opal_list_t *nodes,
                                 bool *override_oversubscribed,
                                 char *hostfile)
{
    opal_list_t exclude;
    opal_list_item_t *item, *itm;
    int rc;

    
    OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                         "%s hostfile: checking hostfile %s for nodes",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));

    OBJ_CONSTRUCT(&exclude, opal_list_t);
    
    /* parse the hostfile and add the contents to the list */
    if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, nodes, &exclude, false))) {
        goto cleanup;
    }
    
    /* parse the nodes to check for any relative node directives */
    for (item = opal_list_get_first(nodes);
         item != opal_list_get_end(nodes);
         item = opal_list_get_next(item)) {
        orte_node_t *node=(orte_node_t*)item;
        
        if ('+' == node->name[0]) {
            orte_show_help("help-hostfile.txt", "hostfile:relative-syntax",
                           true, node->name);
            rc = ORTE_ERR_SILENT;
            goto cleanup;
        }
    }
    
    /* remove from the list of nodes those that are in the exclude list */
    while(NULL != (item = opal_list_remove_first(&exclude))) {
        orte_node_t *exnode = (orte_node_t*)item;
        /* check for matches on nodes */
        for (itm = opal_list_get_first(nodes);
             itm != opal_list_get_end(nodes);
             itm = opal_list_get_next(itm)) {
            orte_node_t *node=(orte_node_t*)itm;
            if (0 == strcmp(exnode->name, node->name)) {
                /* match - remove it */
                opal_list_remove_item(nodes, itm);
                OBJ_RELEASE(itm);
                break;
            }
        }
        OBJ_RELEASE(item);
    }
    
    /* indicate that ORTE should override any oversubscribed conditions
     * based on local hardware limits since the user (a) might not have
     * provided us any info on the #slots for a node, and (b) the user
     * might have been wrong! If we don't check the number of local physical
     * processors, then we could be too aggressive on our sched_yield setting
     * and cause performance problems.
     */
    *override_oversubscribed = true;

cleanup:
    OBJ_DESTRUCT(&exclude);

    return rc;
}
Example #2
0
int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
                         orte_process_name_t* peer,
                         struct iovec* iov,
                         int count,
                         orte_rml_tag_t tag,
                         orte_rml_callback_fn_t cbfunc,
                         void* cbdata)
{
    orte_rml_recv_t *rcv;
    orte_rml_send_t *snd;
    int bytes;
    orte_self_send_xfer_t *xfer;
    int i;
    char* ptr;

    OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
                         "%s rml_send to peer %s at tag %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(peer), tag));

    if (ORTE_RML_TAG_INVALID == tag) {
        /* cannot send to an invalid tag */
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return ORTE_ERR_BAD_PARAM;
    }
    if (NULL == peer ||
        OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_NAME_INVALID, peer)) {
        /* cannot send to an invalid peer */
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        return ORTE_ERR_BAD_PARAM;
    }

    OPAL_TIMING_EVENT((&tm_rml, "to %s", ORTE_NAME_PRINT(peer)));

    /* if this is a message to myself, then just post the message
     * for receipt - no need to dive into the oob
     */
    if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, peer, ORTE_PROC_MY_NAME)) {  /* local delivery */
        OPAL_OUTPUT_VERBOSE((1, orte_rml_base_framework.framework_output,
                             "%s rml_send_iovec_to_self at tag %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tag));
        /* send to self is a tad tricky - we really don't want
         * to track the send callback function throughout the recv
         * process and execute it upon receipt as this would provide
         * very different timing from a non-self message. Specifically,
         * if we just retain a pointer to the incoming data
         * and then execute the send callback prior to the receive,
         * then the caller will think we are done with the data and
         * can release it. So we have to copy the data in order to
         * execute the send callback prior to receiving the message.
         *
         * In truth, this really is a better mimic of the non-self
         * message behavior. If we actually pushed the message out
         * on the wire and had it loop back, then we would receive
         * a new block of data anyway.
         */

        /* setup the send callback */
        xfer = OBJ_NEW(orte_self_send_xfer_t);
        xfer->iov = iov;
        xfer->count = count;
        xfer->cbfunc.iov = cbfunc;
        xfer->tag = tag;
        xfer->cbdata = cbdata;
        /* setup the event for the send callback */
        opal_event_set(orte_event_base, &xfer->ev, -1, OPAL_EV_WRITE, send_self_exe, xfer);
        opal_event_set_priority(&xfer->ev, ORTE_MSG_PRI);
        opal_event_active(&xfer->ev, OPAL_EV_WRITE, 1);

        /* copy the message for the recv */
        rcv = OBJ_NEW(orte_rml_recv_t);
        rcv->sender = *peer;
        rcv->tag = tag;
        /* get the total number of bytes in the iovec array */
        bytes = 0;
        for (i = 0 ; i < count ; ++i) {
            bytes += iov[i].iov_len;
        }
        /* get the required memory allocation */
        if (0 < bytes) {
            rcv->iov.iov_base = (IOVBASE_TYPE*)malloc(bytes);
            rcv->iov.iov_len = bytes;
            /* transfer the bytes */
            ptr =  (char*)rcv->iov.iov_base;
            for (i = 0 ; i < count ; ++i) {
                memcpy(ptr, iov[i].iov_base, iov[i].iov_len);
                ptr += iov[i].iov_len;
            }
        }
        /* post the message for receipt - since the send callback was posted
         * first and has the same priority, it will execute first
         */
        ORTE_RML_ACTIVATE_MESSAGE(rcv);
        return ORTE_SUCCESS;
    }

    snd = OBJ_NEW(orte_rml_send_t);
    snd->dst = *peer;
    snd->origin = *ORTE_PROC_MY_NAME;
    snd->tag = tag;
    snd->iov = iov;
    snd->count = count;
    snd->cbfunc.iov = cbfunc;
    snd->cbdata = cbdata;
    snd->routed = strdup(mod->routed);

    /* activate the OOB send state */
    ORTE_OOB_SEND(snd);

    return ORTE_SUCCESS;
}
Example #3
0
int
ompi_osc_portals4_rget(void *origin_addr,
                       int origin_count,
                       struct ompi_datatype_t *origin_dt,
                       int target,
                       OPAL_PTRDIFF_TYPE target_disp,
                       int target_count,
                       struct ompi_datatype_t *target_dt,
                       struct ompi_win_t *win,
                       struct ompi_request_t **ompi_req)
{
    int ret;
    ompi_osc_portals4_request_t *request;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length;
    size_t offset;
    ptl_handle_md_t md_h;
    void *md_base;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "rget: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         (unsigned long) win));

    OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
    if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
    *ompi_req = &request->super;

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Rget: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        (void)opal_atomic_add_64(&module->opcount, 1);
        request->ops_expected = 1;
        ret = ompi_datatype_type_size(origin_dt, &length);
        if (OMPI_SUCCESS != ret) {
            OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
            return ret;
        }
        length *= origin_count;
        ompi_osc_portals4_get_md(origin_addr, module->req_md_h, &md_h, &md_base);
        ret = PtlGet(md_h,
                     (ptl_size_t) ((char*) origin_addr - (char*) md_base),
                     length,
                     peer,
                     module->pt_idx,
                     module->match_bits,
                     offset,
                     request);
        if (OMPI_SUCCESS != ret) {
            OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
            return ret;
        }
    }

    return OMPI_SUCCESS;
}
Example #4
0
/* Target EQ */
static int
portals4_progress(void)
{
    int count = 0, ret;
    ptl_event_t ev;
    ompi_coll_portals4_request_t *ptl_request;

    while (true) {
        ret = PtlEQGet(mca_coll_portals4_component.eq_h, &ev);
        if (PTL_OK == ret) {

            OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, "event type=%s\n", evname[ev.type]));
            count++;

            switch (ev.type) {
            case PTL_EVENT_PUT:
                /* Non-Blocking / request */
                if (PTL_OK == ev.ni_fail_type) {
                    OPAL_OUTPUT_VERBOSE((50, ompi_coll_base_framework.framework_output,
                            "hdr_data %p, matchbits 0x%lx",
                            (void*) ev.hdr_data, ev.match_bits));
                    assert(0 != ev.hdr_data);
                    ptl_request = (ompi_coll_portals4_request_t*) ev.hdr_data;
                    assert(NULL != ptl_request);

                    switch (ptl_request->type) {
                    case OMPI_COLL_PORTALS4_TYPE_BARRIER:
                        ompi_coll_portals4_ibarrier_intra_fini(ptl_request);
                        break;
                    case OMPI_COLL_PORTALS4_TYPE_BCAST:
                        ompi_coll_portals4_ibcast_intra_fini(ptl_request);
                        break;
                    case OMPI_COLL_PORTALS4_TYPE_REDUCE:
                        ompi_coll_portals4_ireduce_intra_fini(ptl_request);
                        break;
                    case OMPI_COLL_PORTALS4_TYPE_ALLREDUCE:
                        ompi_coll_portals4_iallreduce_intra_fini(ptl_request);
                        break;
                    case OMPI_COLL_PORTALS4_TYPE_SCATTER:
                        ompi_coll_portals4_iscatter_intra_fini(ptl_request);
                        break;
                    case OMPI_COLL_PORTALS4_TYPE_GATHER:
                        ompi_coll_portals4_igather_intra_fini(ptl_request);
                        break;
                    }
                }

                if (PTL_OK != ev.ni_fail_type) {
                    OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, "ni_fail_type=%s\n", failtype[ev.ni_fail_type]));
                }
                break;
            default:
                opal_output(ompi_coll_base_framework.framework_output,
                        "Unexpected event of type %d", ev.type);
                break;
            }

        }
        else if (PTL_EQ_EMPTY == ret) {
            break;
        }
        else if (PTL_EQ_DROPPED == ret) {
            opal_output(ompi_coll_base_framework.framework_output, "Flow control situation without recovery (EQ_DROPPED)\n");
            abort();
        }
        else {
            opal_output(ompi_coll_base_framework.framework_output, "Error returned from PtlEQGet: %d", ret);
            break;
        }
    }
    return count;
}
/* ////////////////////////////////////////////////////////////////////////// */
static int
segment_create(opal_shmem_ds_t *ds_buf,
               const char *file_name,
               size_t size)
{
    int rc = OPAL_SUCCESS;
    char *real_file_name = NULL;
    pid_t my_pid = getpid();
    bool space_available = false;
    uint64_t amount_space_avail = 0;

    /* the real size of the shared memory segment.  this includes enough space
     * to store our segment header.
     */
    size_t real_size = size + sizeof(opal_shmem_seg_hdr_t);
    opal_shmem_seg_hdr_t *seg_hdrp = MAP_FAILED;

    /* init the contents of opal_shmem_ds_t */
    shmem_ds_reset(ds_buf);

    /* change the path of shmem mmap's backing store? */
    if (0 != opal_shmem_mmap_relocate_backing_file) {
        int err;
        if (path_usable(opal_shmem_mmap_backing_file_base_dir, &err)) {
            if (NULL ==
                (real_file_name =
                     get_uniq_file_name(opal_shmem_mmap_backing_file_base_dir,
                                        file_name))) {
                /* out of resources */
                return OPAL_ERROR;
            }
        }
        /* a relocated backing store was requested, but the path specified
         * cannot be used :-(. if the flag is negative, then warn and continue
         * with the default path.  otherwise, fail.
         */
        else if (opal_shmem_mmap_relocate_backing_file < 0) {
            opal_output(0, "shmem: mmap: WARNING: could not relocate "
                        "backing store to \"%s\" (%s).  Continuing with "
                        "default path.\n",
                        opal_shmem_mmap_backing_file_base_dir, strerror(err));
        }
        /* must be positive, so fail */
        else {
            opal_output(0, "shmem: mmap: WARNING: could not relocate "
                        "backing store to \"%s\" (%s).  Cannot continue with "
                        "shmem mmap.\n", opal_shmem_mmap_backing_file_base_dir,
                        strerror(err));
            return OPAL_ERROR;
        }
    }
    /* are we using the default path? */
    if (NULL == real_file_name) {
        /* use the path specified by the caller of this function */
        if (NULL == (real_file_name = strdup(file_name))) {
            /* out of resources */
            return OPAL_ERROR;
        }
    }

    OPAL_OUTPUT_VERBOSE(
        (70, opal_shmem_base_framework.framework_output,
         "%s: %s: backing store base directory: %s\n",
         mca_shmem_mmap_component.super.base_version.mca_type_name,
         mca_shmem_mmap_component.super.base_version.mca_component_name,
         real_file_name)
    );

    /* determine whether the specified filename is on a network file system.
     * this is an important check because if the backing store is located on
     * a network filesystem, the user may see a shared memory performance hit.
     */
    if (opal_shmem_mmap_nfs_warning && opal_path_nfs(real_file_name)) {
        char hn[MAXHOSTNAMELEN];
        gethostname(hn, MAXHOSTNAMELEN - 1);
        hn[MAXHOSTNAMELEN - 1] = '\0';
        opal_show_help("help-opal-shmem-mmap.txt", "mmap on nfs", 1, hn,
                       real_file_name);
    }
    /* let's make sure we have enough space for the backing file */
    if (OPAL_SUCCESS != (rc = enough_space(real_file_name,
                                           real_size,
                                           &amount_space_avail,
                                           &space_available))) {
        opal_output(0, "shmem: mmap: an error occurred while determining "
                    "whether or not %s could be created.", real_file_name);
        /* rc is set */
        goto out;
    }
    if (!space_available) {
        char hn[MAXHOSTNAMELEN];
        gethostname(hn, MAXHOSTNAMELEN - 1);
        hn[MAXHOSTNAMELEN - 1] = '\0';
        rc = OPAL_ERR_OUT_OF_RESOURCE;
        opal_show_help("help-opal-shmem-mmap.txt", "target full", 1,
                       real_file_name, hn, (unsigned long)real_size,
                       (unsigned long long)amount_space_avail);
        goto out;
    }
    /* enough space is available, so create the segment */
    if (-1 == (ds_buf->seg_id = open(real_file_name, O_CREAT | O_RDWR, 0600))) {
        int err = errno;
        char hn[MAXHOSTNAMELEN];
        gethostname(hn, MAXHOSTNAMELEN - 1);
        hn[MAXHOSTNAMELEN - 1] = '\0';
        opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
                       "open(2)", "", strerror(err), err);
        rc = OPAL_ERROR;
        goto out;
    }
    /* size backing file - note the use of real_size here */
    if (0 != ftruncate(ds_buf->seg_id, real_size)) {
        int err = errno;
        char hn[MAXHOSTNAMELEN];
        gethostname(hn, MAXHOSTNAMELEN - 1);
        hn[MAXHOSTNAMELEN - 1] = '\0';
        opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
                       "ftruncate(2)", "", strerror(err), err);
        rc = OPAL_ERROR;
        goto out;
    }
    if (MAP_FAILED == (seg_hdrp = (opal_shmem_seg_hdr_t *)
                                  mmap(NULL, real_size,
                                       PROT_READ | PROT_WRITE, MAP_SHARED,
                                       ds_buf->seg_id, 0))) {
        int err = errno;
        char hn[MAXHOSTNAMELEN];
        gethostname(hn, MAXHOSTNAMELEN - 1);
        hn[MAXHOSTNAMELEN - 1] = '\0';
        opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
                       "mmap(2)", "", strerror(err), err);
        rc = OPAL_ERROR;
        goto out;
    }
    /* all is well */
    else {
        /* -- initialize the shared memory segment -- */
        opal_atomic_rmb();

        /* init segment lock */
        opal_atomic_init(&seg_hdrp->lock, OPAL_ATOMIC_UNLOCKED);
        /* i was the creator of this segment, so note that fact */
        seg_hdrp->cpid = my_pid;

        opal_atomic_wmb();

        /* -- initialize the contents of opal_shmem_ds_t -- */
        ds_buf->seg_cpid = my_pid;
        ds_buf->seg_size = real_size;
        ds_buf->seg_base_addr = (unsigned char *)seg_hdrp;
        (void)strncpy(ds_buf->seg_name, real_file_name, OPAL_PATH_MAX - 1);

        /* set "valid" bit because setment creation was successful */
        OPAL_SHMEM_DS_SET_VALID(ds_buf);

        OPAL_OUTPUT_VERBOSE(
            (70, opal_shmem_base_framework.framework_output,
             "%s: %s: create successful "
             "(id: %d, size: %lu, name: %s)\n",
             mca_shmem_mmap_component.super.base_version.mca_type_name,
             mca_shmem_mmap_component.super.base_version.mca_component_name,
             ds_buf->seg_id, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
        );
    }

out:
    /* in this component, the id is the file descriptor returned by open.  this
     * check is here to see if it is safe to call close on the file descriptor.
     * that is, we are making sure that our call to open was successful and
     * we are not not in an error path.
     */
    if (-1 != ds_buf->seg_id) {
        if (0 != close(ds_buf->seg_id)) {
            int err = errno;
            char hn[MAXHOSTNAMELEN];
            gethostname(hn, MAXHOSTNAMELEN - 1);
            hn[MAXHOSTNAMELEN - 1] = '\0';
            opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
                           "close(2)", "", strerror(err), err);
            rc = OPAL_ERROR;
         }
     }
    /* an error occured, so invalidate the shmem object and munmap if needed */
    if (OPAL_SUCCESS != rc) {
        if (MAP_FAILED != seg_hdrp) {
            munmap((void *)seg_hdrp, real_size);
        }
        shmem_ds_reset(ds_buf);
    }
    /* safe to free now because its contents have already been copied */
    if (NULL != real_file_name) {
        free(real_file_name);
    }
    return rc;
}
int orte_grpcomm_base_comm_start(void)
{
    int rc;

    OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:receive start comm",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    if (!recv_issued) {
        if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) {
            if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
                                                              ORTE_RML_TAG_COLLECTIVE,
                                                              ORTE_RML_PERSISTENT,
                                                              daemon_local_recv, NULL))) {
                ORTE_ERROR_LOG(rc);
                recv_issued = false;
                return rc;
            }
            if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
                                                              ORTE_RML_TAG_XCAST,
                                                              ORTE_RML_PERSISTENT,
                                                              orte_grpcomm_base_xcast_recv, NULL))) {
                ORTE_ERROR_LOG(rc);
                recv_issued = false;
                return rc;
            }
            if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
                                                              ORTE_RML_TAG_DAEMON_COLL,
                                                              ORTE_RML_PERSISTENT,
                                                              daemon_coll_recv, NULL))) {
                ORTE_ERROR_LOG(rc);
                recv_issued = false;
                return rc;
            }
            if (ORTE_PROC_IS_DAEMON) {
                if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
                                                                  ORTE_RML_TAG_ROLLUP,
                                                                  ORTE_RML_PERSISTENT,
                                                                  orte_grpcomm_base_rollup_recv, NULL))) {
                    ORTE_ERROR_LOG(rc);
                    recv_issued = false;
                    return rc;
                }
            }
            if (ORTE_PROC_IS_HNP) {
                if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
                                                                  ORTE_RML_TAG_COLL_ID_REQ,
                                                                  ORTE_RML_PERSISTENT,
                                                                  coll_id_req, NULL))) {
                    ORTE_ERROR_LOG(rc);
                    recv_issued = false;
                    return rc;
                }
            }
            recv_issued = true;
        } else if (ORTE_PROC_IS_APP) {
            if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
                                                              ORTE_RML_TAG_COLLECTIVE,
                                                              ORTE_RML_PERSISTENT,
                                                              app_recv, NULL))) {
                ORTE_ERROR_LOG(rc);
                recv_issued = false;
                return rc;
            }
            recv_issued = true;
        }
    }

    return ORTE_SUCCESS;
}
int
mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
                    struct mca_btl_base_endpoint_t* btl_peer,
                    void *local_address,
                    uint64_t remote_address,
                    struct mca_btl_base_registration_handle_t *local_handle,
                    struct mca_btl_base_registration_handle_t *remote_handle,
                    size_t size,
                    int flags,
                    int order,
                    mca_btl_base_rdma_completion_fn_t cbfunc,
                    void *cbcontext,
                    void *cbdata)
{
    mca_btl_portals4_module_t *portals4_btl = (mca_btl_portals4_module_t *) btl_base;
    mca_btl_portals4_frag_t   *frag         = NULL;
    ptl_md_t md;
    int ret;

    /* reserve space in the event queue for rdma operations immediately */
    while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
           portals4_btl->portals_max_outstanding_ops) {
        OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n"));
        mca_btl_portals4_component_progress();
    }

    OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag);
    if (NULL == frag){
        OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
        return OPAL_ERROR;
    }
    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
        "mca_btl_portals4_get: Incrementing portals_outstanding_ops=%d frag=%p",
        portals4_btl->portals_outstanding_ops, (void *)frag));

    frag->rdma_cb.func         = cbfunc;
    frag->rdma_cb.context      = cbcontext;
    frag->rdma_cb.data         = cbdata;
    frag->rdma_cb.local_handle = local_handle;

    frag->endpoint = btl_peer;
    frag->hdr.tag = MCA_BTL_TAG_MAX;

    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d",
                            __FILE__, __LINE__, ret);
        return OPAL_ERROR;
    }

    frag->match_bits = remote_handle->key;
    frag->addr = local_address;
    frag->length = size;
    frag->peer_proc = btl_peer->ptl_proc;

    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
        md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));

    ret = PtlGet(portals4_btl->send_md_h,
                 (ptl_size_t) local_address,
                 size,
                 btl_peer->ptl_proc,
                 portals4_btl->recv_idx,
                 frag->match_bits, /* match bits */
                 0,
                 frag);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlGet failed: %d",
                            __FILE__, __LINE__, ret);
        return OPAL_ERROR;
    }
    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "SUCCESS: PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
        md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));

    return OPAL_SUCCESS;
}
Example #8
0
void orte_state_base_track_procs(int fd, short argc, void *cbdata)
{
    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
    orte_process_name_t *proc = &caddy->name;
    orte_proc_state_t state = caddy->proc_state;
    orte_job_t *jdata;
    orte_proc_t *pdata;
    int i;

    opal_output_verbose(5, orte_state_base_framework.framework_output,
                        "%s state:base:track_procs called for proc %s state %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        ORTE_NAME_PRINT(proc),
                        orte_proc_state_to_str(state));

    /* get the job object for this proc */
    if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        goto cleanup;
    }
    pdata = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid);

    if (ORTE_PROC_STATE_RUNNING == state) {
        /* update the proc state */
        pdata->state = state;
        jdata->num_launched++;
        if (jdata->num_launched == jdata->num_procs) {
            if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
                ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_READY_FOR_DEBUGGERS);
            } else {
                ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_RUNNING);
            }
        }
    } else if (ORTE_PROC_STATE_REGISTERED == state) {
        /* update the proc state */
        pdata->state = state;
        jdata->num_reported++;
        if (jdata->num_reported == jdata->num_procs) {
            ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_REGISTERED);
        }
    } else if (ORTE_PROC_STATE_IOF_COMPLETE == state) {
        /* update the proc state */
        pdata->state = state;
        /* Release only the stdin IOF file descriptor for this child, if one
         * was defined. File descriptors for the other IOF channels - stdout,
         * stderr, and stddiag - were released when their associated pipes
         * were cleared and closed due to termination of the process
         */
        if (NULL != orte_iof.close) {
            orte_iof.close(proc, ORTE_IOF_STDIN);
        }
        ORTE_FLAG_SET(pdata, ORTE_PROC_FLAG_IOF_COMPLETE);
        if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_WAITPID)) {
            ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
        }
    } else if (ORTE_PROC_STATE_WAITPID_FIRED == state) {
        /* update the proc state */
        pdata->state = state;
        ORTE_FLAG_SET(pdata, ORTE_PROC_FLAG_WAITPID);
        if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_IOF_COMPLETE)) {
            ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
        }
    } else if (ORTE_PROC_STATE_TERMINATED == state) {
        /* update the proc state */
        ORTE_FLAG_UNSET(pdata, ORTE_PROC_FLAG_ALIVE);
        pdata->state = state;
	if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_LOCAL)) {
            /* Clean up the session directory as if we were the process
             * itself.  This covers the case where the process died abnormally
             * and didn't cleanup its own session directory.
             */
            orte_session_dir_finalize(proc);
	}
        /* if we are trying to terminate and our routes are
         * gone, then terminate ourselves IF no local procs
         * remain (might be some from another job)
         */
        if (orte_orteds_term_ordered &&
            0 == orte_routed.num_routes()) {
            for (i=0; i < orte_local_children->size; i++) {
                if (NULL != (pdata = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
                    ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_ALIVE)) {
                    /* at least one is still alive */
                    goto cleanup;
                }
            }
            /* call our appropriate exit procedure */
            OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output,
                                 "%s state:base all routes and children gone - exiting",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_DAEMONS_TERMINATED);
            goto cleanup;
        }
        /* return the allocated slot for reuse */
        cleanup_node(pdata);
	/* track job status */
	jdata->num_terminated++;
	if (jdata->num_terminated == jdata->num_procs) {
            ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_TERMINATED);
	}
    }

 cleanup:
    OBJ_RELEASE(caddy);
}
Example #9
0
void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
{
    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
    orte_job_t *jdata = caddy->jdata;

    orte_proc_t *proc;
    int i;
    orte_std_cntr_t j;
    orte_job_t *job;
    orte_node_t *node;
    orte_job_map_t *map;
    orte_std_cntr_t index;
    bool one_still_alive;
    orte_vpid_t lowest=0;
    int32_t i32, *i32ptr;

    opal_output_verbose(2, orte_state_base_framework.framework_output,
                        "%s state:base:check_job_complete on job %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                        (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid));

    if (NULL == jdata || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
        /* just check to see if the daemons are complete */
        OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                             "%s state:base:check_job_complete - received NULL job, checking daemons",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        goto CHECK_DAEMONS;
    } else {
        /* mark the job as terminated, but don't override any
         * abnormal termination flags
         */
        if (jdata->state < ORTE_JOB_STATE_UNTERMINATED) {
            jdata->state = ORTE_JOB_STATE_TERMINATED;
        }
    }

    /* tell the IOF that the job is complete */
    if (NULL != orte_iof.complete) {
        orte_iof.complete(jdata);
    }

    i32ptr = &i32;
    if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32) && !orte_abort_non_zero_exit) {
        if (!orte_report_child_jobs_separately || 1 == ORTE_LOCAL_JOBID(jdata->jobid)) {
            /* update the exit code */
            ORTE_UPDATE_EXIT_STATUS(lowest);
        }

        /* warn user */
        opal_output(orte_clean_output,
                    "-------------------------------------------------------\n"
                    "While %s job %s terminated normally, %d %s. Further examination may be required.\n"
                    "-------------------------------------------------------",
                    (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "the primary" : "child",
                    (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
                    i32, (1 == i32) ? "process returned\na non-zero exit code." :
                    "processes returned\nnon-zero exit codes.");
    }

    OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                         "%s state:base:check_job_completed declared job %s terminated with state %s - checking all jobs",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jdata->jobid),
                         orte_job_state_to_str(jdata->state)));
    
    /* if this job is a continuously operating one, then don't do
     * anything further - just return here
     */
    if (NULL != jdata &&
        (orte_get_attribute(&jdata->attributes, ORTE_JOB_CONTINUOUS_OP, NULL, OPAL_BOOL) ||
         ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RECOVERABLE))) {
        goto CHECK_ALIVE;
    }
    
    /* if the job that is being checked is the HNP, then we are
     * trying to terminate the orteds. In that situation, we
     * do -not- check all jobs - we simply notify the HNP
     * that the orteds are complete. Also check special case
     * if jdata is NULL - we want
     * to definitely declare the job done if the orteds
     * have completed, no matter what else may be happening.
     * This can happen if a ctrl-c hits in the "wrong" place
     * while launching
     */
 CHECK_DAEMONS:
    if (jdata == NULL || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
        if (0 == orte_routed.num_routes()) {
            /* orteds are done! */
            OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                 "%s orteds complete - exiting",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            if (NULL == jdata) {
                jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
            }
            ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED);
            OBJ_RELEASE(caddy);
            return;
        }
        OBJ_RELEASE(caddy);
        return;
    }
    
    /* Release the resources used by this job. Since some errmgrs may want
     * to continue using resources allocated to the job as part of their
     * fault recovery procedure, we only do this once the job is "complete".
     * Note that an aborted/killed job -is- flagged as complete and will
     * therefore have its resources released. We need to do this after
     * we call the errmgr so that any attempt to restart the job will
     * avoid doing so in the exact same place as the current job
     */
    if (NULL != jdata->map  && jdata->state == ORTE_JOB_STATE_TERMINATED) {
        map = jdata->map;
        for (index = 0; index < map->nodes->size; index++) {
            if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, index))) {
                continue;
            }
            OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                 "%s releasing procs from node %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 node->name));
            for (i = 0; i < node->procs->size; i++) {
                if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
                    continue;
                }
                if (proc->name.jobid != jdata->jobid) {
                    /* skip procs from another job */
                    continue;
                }
                node->slots_inuse--;
                node->num_procs--;
                OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                     "%s releasing proc %s from node %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     ORTE_NAME_PRINT(&proc->name), node->name));
                /* set the entry in the node array to NULL */
                opal_pointer_array_set_item(node->procs, i, NULL);
                /* release the proc once for the map entry */
                OBJ_RELEASE(proc);
            }
            /* set the node location to NULL */
            opal_pointer_array_set_item(map->nodes, index, NULL);
            /* maintain accounting */
            OBJ_RELEASE(node);
            /* flag that the node is no longer in a map */
            ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
        }
        OBJ_RELEASE(map);
        jdata->map = NULL;
    }
    
 CHECK_ALIVE:
    /* now check to see if all jobs are done - trigger notification of this jdata
     * object when we find it
     */
    one_still_alive = false;
    for (j=1; j < orte_job_data->size; j++) {
        if (NULL == (job = (orte_job_t*)opal_pointer_array_get_item(orte_job_data, j))) {
            /* since we are releasing jdata objects as we
             * go, we can no longer assume that the job_data
             * array is left justified
             */
            continue;
        }
        /* if this is the job we are checking AND it normally terminated,
         * then activate the "notify_completed" state - this will release
         * the job state, but is provided so that the HNP main code can
         * take alternative actions if desired. If the state is killed_by_cmd,
         * then go ahead and release it. We cannot release it if it
         * abnormally terminated as mpirun needs the info so it can
         * report appropriately to the user
         *
         * NOTE: do not release the primary job (j=1) so we
         * can pretty-print completion message
         */
        if (NULL != jdata && job->jobid == jdata->jobid) {
            if (jdata->state == ORTE_JOB_STATE_TERMINATED) {
                OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                     "%s state:base:check_job_completed state is terminated - activating notify",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_NOTIFY_COMPLETED);
                one_still_alive = true;
            } else if (jdata->state == ORTE_JOB_STATE_KILLED_BY_CMD ||
                       jdata->state == ORTE_JOB_STATE_NOTIFIED) {
                OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                     "%s state:base:check_job_completed state is killed or notified - cleaning up",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                /* release this object, ensuring that the
                 * pointer array internal accounting
                 * is maintained!
                 */
                if (1 < j) {
		    if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
			/* this was a debugger daemon. notify that a debugger has detached */
			ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DEBUGGER_DETACH);
		    }
                    opal_pointer_array_set_item(orte_job_data, j, NULL);  /* ensure the array has a NULL */
                    OBJ_RELEASE(jdata);
                }
            }
            continue;
        }
        /* if the job is flagged to not be monitored, skip it */
        if (ORTE_FLAG_TEST(job, ORTE_JOB_FLAG_DO_NOT_MONITOR)) {
            continue;
        }
        /* when checking for job termination, we must be sure to NOT check
         * our own job as it - rather obviously - has NOT terminated!
         */
        if (job->num_terminated < job->num_procs) {
            /* we have at least one job that is not done yet - we cannot
             * just return, though, as we need to ensure we cleanout the
             * job data for the job that just completed
             */
            OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                 "%s state:base:check_job_completed job %s is not terminated (%d:%d)",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_JOBID_PRINT(job->jobid),
                                 job->num_terminated, job->num_procs));
            one_still_alive = true;
        }
        else {
            OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                                 "%s state:base:check_job_completed job %s is terminated (%d vs %d [%s])",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_JOBID_PRINT(job->jobid),
                                 job->num_terminated, job->num_procs,
                                 (NULL == jdata) ? "UNKNOWN" : orte_job_state_to_str(jdata->state) ));
        }
    }
    /* if a job is still alive, we just return */
    if (one_still_alive) {
        OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                             "%s state:base:check_job_completed at least one job is not terminated",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        OBJ_RELEASE(caddy);
        return;
    }
    /* if we get here, then all jobs are done, so terminate */
    OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
                         "%s state:base:check_job_completed all jobs terminated",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    /* stop the job timeout event, if set */
    if (NULL != orte_mpiexec_timeout) {
        OBJ_RELEASE(orte_mpiexec_timeout);
        orte_mpiexec_timeout = NULL;
    }

    /* set the exit status to 0 - this will only happen if it
     * wasn't already set by an error condition
     */
    ORTE_UPDATE_EXIT_STATUS(0);

    /* order daemon termination - this tells us to cleanup
     * our local procs as well as telling remote daemons
     * to die
     */
    orte_plm.terminate_orteds();

    OBJ_RELEASE(caddy);
}
Example #10
0
int orte_regex_extract_node_names(char *regexp, char ***names)
{
    int i, j, len, ret;
    char *base;
    char *orig;
    bool found_range = false;
    bool more_to_come = false;
    
    if (NULL == regexp) {
        *names = NULL;
        return ORTE_SUCCESS;
    }
    
    orig = base = strdup(regexp);
    if (NULL == base) {
        ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
        return ORTE_ERR_OUT_OF_RESOURCE;
    }
    
    OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                         "%s regex:extract:nodenames: checking nodelist: %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         regexp));
    
    do {
        /* Find the base */
        len = strlen(base);
        for (i = 0; i <= len; ++i) {
            if (base[i] == '[') {
                /* we found a range. this gets dealt with below */
                base[i] = '\0';
                found_range = true;
                break;
            }
            if (base[i] == ',') {
                /* we found a singleton node, and there are more to come */
                base[i] = '\0';
                found_range = false;
                more_to_come = true;
                break;
            }
            if (base[i] == '\0') {
                /* we found a singleton node */
                found_range = false;
                more_to_come = false;
                break;
            }
        }
        if(i == 0) {
            /* we found a special character at the beginning of the string */
            orte_show_help("help-regex.txt", "regex:special-char", true, regexp);
            free(orig);
            return ORTE_ERR_BAD_PARAM;
        }
        
        if (found_range) {
            /* If we found a range, now find the end of the range */
            for (j = i; j < len; ++j) {
                if (base[j] == ']') {
                    base[j] = '\0';
                    break;
                }
            }
            if (j >= len) {
                /* we didn't find the end of the range */
                orte_show_help("help-regex.txt", "regex:end-range-missing", true, regexp);
                free(orig);
                return ORTE_ERR_BAD_PARAM;
            }
            
            ret = regex_parse_node_ranges(base, base + i + 1, names);
            if(ORTE_SUCCESS != ret) {
                orte_show_help("help-regex.txt", "regex:bad-value", true, regexp);
                free(orig);
                return ret;
            }    
            if(base[j + 1] == ',') {
                more_to_come = true;
                base = &base[j + 2];
            } else {
                more_to_come = false;
            }
        } else {
            /* If we didn't find a range, just add the node */
            
            OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                                 "%s regex:extract:nodenames: found node: %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), base));
            
            if(ORTE_SUCCESS != (ret = opal_argv_append_nosize(names, base))) {
                ORTE_ERROR_LOG(ret);
                free(orig);
                return ret;
            }
            /* set base equal to the (possible) next base to look at */
            base = &base[i + 1];
        }
    } while(more_to_come);
    
    free(orig);
    
    /* All done */
    return ret;
}
Example #11
0
void orte_state_base_activate_job_state(orte_job_t *jdata,
                                        orte_job_state_t state)
{
    opal_list_item_t *itm, *any=NULL, *error=NULL;
    orte_state_t *s;
    orte_state_caddy_t *caddy;

    for (itm = opal_list_get_first(&orte_job_states);
         itm != opal_list_get_end(&orte_job_states);
         itm = opal_list_get_next(itm)) {
        s = (orte_state_t*)itm;
        if (s->job_state == ORTE_JOB_STATE_ANY) {
            /* save this place */
            any = itm;
        }
        if (s->job_state == ORTE_JOB_STATE_ERROR) {
            error = itm;
        }
        if (s->job_state == state) {
            OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output,
                                 "%s ACTIVATING JOB %s STATE %s PRI %d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid),
                                 orte_job_state_to_str(state), s->priority));
            if (NULL == s->cbfunc) {
                OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output,
                                     "%s NULL CBFUNC FOR JOB %s STATE %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     (NULL == jdata) ? "ALL" : ORTE_JOBID_PRINT(jdata->jobid),
                                     orte_job_state_to_str(state)));
                return;
            }
            caddy = OBJ_NEW(orte_state_caddy_t);
            if (NULL != jdata) {
                caddy->jdata = jdata;
                caddy->job_state = state;
                OBJ_RETAIN(jdata);
            }
            opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy);
            opal_event_set_priority(&caddy->ev, s->priority);
            opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1);
            return;
        }
    }
    /* if we get here, then the state wasn't found, so execute
     * the default handler if it is defined
     */
    if (ORTE_JOB_STATE_ERROR < state && NULL != error) {
        s = (orte_state_t*)error;
    } else if (NULL != any) {
        s = (orte_state_t*)any;
    } else {
        OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output,
                             "ACTIVATE: ANY STATE NOT FOUND"));
        return;
    }
    if (NULL == s->cbfunc) {
        OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output,
                             "ACTIVATE: ANY STATE HANDLER NOT DEFINED"));
        return;
    }
    caddy = OBJ_NEW(orte_state_caddy_t);
    if (NULL != jdata) {
        caddy->jdata = jdata;
        caddy->job_state = state;
        OBJ_RETAIN(jdata);
    }
            OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output,
                                 "%s ACTIVATING JOB %s STATE %s PRI %d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid),
                                 orte_job_state_to_str(state), s->priority));
    opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy);
    opal_event_set_priority(&caddy->ev, s->priority);
    opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1);
}
Example #12
0
/*
 * Sequentially map the ranks according to the placement in the
 * specified hostfile
 */
static int orte_rmaps_seq_map(orte_job_t *jdata)
{
    orte_job_map_t *map;
    orte_app_context_t *app;
    int i, n;
    orte_std_cntr_t j;
    opal_list_item_t *item;
    orte_node_t *node, *nd;
    seq_node_t *sq, *save=NULL, *seq;;
    orte_vpid_t vpid;
    orte_std_cntr_t num_nodes;
    int rc;
    opal_list_t default_seq_list;
    opal_list_t node_list, *seq_list, sq_list;
    orte_proc_t *proc;
    mca_base_component_t *c = &mca_rmaps_seq_component.base_version;
    char *hosts, *hstname, *sep, *eptr;
    FILE *fp;
#if OPAL_HAVE_HWLOC
    opal_hwloc_resource_type_t rtype;
#endif

    OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base_framework.framework_output,
                         "%s rmaps:seq called on job %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(jdata->jobid)));

    /* this mapper can only handle initial launch
     * when seq mapping is desired - allow
     * restarting of failed apps
     */
    if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps:seq: job %s is being restarted - seq cannot map",
                            ORTE_JOBID_PRINT(jdata->jobid));
        return ORTE_ERR_TAKE_NEXT_OPTION;
    }
    if (NULL != jdata->map->req_mapper) {
        if (0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) {
            /* a mapper has been specified, and it isn't me */
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:seq: job %s not using sequential mapper",
                                ORTE_JOBID_PRINT(jdata->jobid));
            return ORTE_ERR_TAKE_NEXT_OPTION;
        }
        /* we need to process it */
        goto process;
    }
    if (ORTE_MAPPING_SEQ != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
        /* I don't know how to do these - defer */
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps:seq: job %s not using seq mapper",
                            ORTE_JOBID_PRINT(jdata->jobid));
        return ORTE_ERR_TAKE_NEXT_OPTION;
    }

 process:
    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                        "mca:rmaps:seq: mapping job %s",
                        ORTE_JOBID_PRINT(jdata->jobid));

    /* flag that I did the mapping */
    if (NULL != jdata->map->last_mapper) {
        free(jdata->map->last_mapper);
    }
    jdata->map->last_mapper = strdup(c->mca_component_name);

    /* convenience def */
    map = jdata->map;

    /* if there is a default hostfile, go and get its ordered list of nodes */
    OBJ_CONSTRUCT(&default_seq_list, opal_list_t);
    if (NULL != orte_default_hostfile) {
        /* open the file */
        fp = fopen(orte_default_hostfile, "r");
        if (NULL == fp) {
            ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
            rc = ORTE_ERR_NOT_FOUND;
            goto error;
        }
        while (NULL != (hstname = orte_getline(fp))) {
            if (0 == strlen(hstname)) {
                /* blank line - ignore */
                continue;
            }
            sq = OBJ_NEW(seq_node_t);
            if (NULL != (sep = strchr(hstname, ' '))) {
                *sep = '\0';
                sep++;
                /* remove any trailing space */
                eptr = sep + strlen(sep) - 1;
                while (eptr > sep && isspace(*eptr)) {
                    eptr--;
                }
                *(eptr+1) = 0;
                sq->cpuset = strdup(sep);
            }
            sq->hostname = strdup(hstname);
            opal_list_append(&default_seq_list, &sq->super);
        }
        fclose(fp);
    }
    
    /* start at the beginning... */
    vpid = 0;
    jdata->num_procs = 0;
    if (0 < opal_list_get_size(&default_seq_list)) {
        save = (seq_node_t*)opal_list_get_first(&default_seq_list);
    }
    
#if OPAL_HAVE_HWLOC
    /* default to LOGICAL processors */
    if (orte_get_attribute(&jdata->attributes, ORTE_JOB_PHYSICAL_CPUIDS, NULL, OPAL_BOOL)) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps:seq: using PHYSICAL processors");
        rtype = OPAL_HWLOC_PHYSICAL;
    } else {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps:seq: using LOGICAL processors");
        rtype = OPAL_HWLOC_LOGICAL;
    }
#endif

    /* initialize all the nodes as not included in this job map */
    for (j=0; j < orte_node_pool->size; j++) {
        if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) {
            ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
        } 
    }
    
    /* cycle through the app_contexts, mapping them sequentially */
    for(i=0; i < jdata->apps->size; i++) {
        if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
            continue;
        }
    
        /* dash-host trumps hostfile */
        if (orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) {
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:seq: using dash-host nodes on app %s", app->app);
            OBJ_CONSTRUCT(&node_list, opal_list_t);
            /* dash host entries cannot specify cpusets, so used the std function to retrieve the list */
            if (ORTE_SUCCESS != (rc = orte_util_get_ordered_dash_host_list(&node_list, hosts))) {
                ORTE_ERROR_LOG(rc);
                free(hosts);
                goto error;
            }
            free(hosts);
            /* transfer the list to a seq_node_t list */
            OBJ_CONSTRUCT(&sq_list, opal_list_t);
            while (NULL != (nd = (orte_node_t*)opal_list_remove_first(&node_list))) {
                sq = OBJ_NEW(seq_node_t);
                sq->hostname = strdup(nd->name);
                opal_list_append(&sq_list, &sq->super);
                OBJ_RELEASE(nd);
            }
            OBJ_DESTRUCT(&node_list);
            seq_list = &sq_list;
        } else if (orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)) {
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:seq: using hostfile %s nodes on app %s", hosts, app->app);
            OBJ_CONSTRUCT(&sq_list, opal_list_t);
            /* open the file */
            fp = fopen(hosts, "r");
            if (NULL == fp) {
                ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
                rc = ORTE_ERR_NOT_FOUND;
                OBJ_DESTRUCT(&sq_list);
                goto error;
            }
            while (NULL != (hstname = orte_getline(fp))) {
                sq = OBJ_NEW(seq_node_t);
                if (NULL != (sep = strchr(hstname, ' '))) {
                    *sep = '\0';
                    sep++;
                    /* remove any trailing space */
                    eptr = sep + strlen(sep) - 1;
                    while (eptr > sep && isspace(*eptr)) {
                        eptr--;
                    }
                    *(eptr+1) = 0;
                    sq->cpuset = strdup(sep);
                }
                sq->hostname = hstname;
                opal_list_append(&sq_list, &sq->super);
            }
            fclose(fp);
            free(hosts);
            seq_list = &sq_list;
        } else if (0 < opal_list_get_size(&default_seq_list)) {
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:seq: using default hostfile nodes on app %s", app->app);
            seq_list = &default_seq_list;
        } else {
            /* can't do anything - no nodes available! */
            orte_show_help("help-orte-rmaps-base.txt",
                           "orte-rmaps-base:no-available-resources",
                           true);
            return ORTE_ERR_SILENT;
        }
        
        /* check for nolocal and remove the head node, if required */
        if (map->mapping & ORTE_MAPPING_NO_USE_LOCAL) {
            for (item  = opal_list_get_first(seq_list);
                 item != opal_list_get_end(seq_list);
                 item  = opal_list_get_next(item) ) {
                seq = (seq_node_t*)item;
                /* need to check ifislocal because the name in the
                 * hostfile may not have been FQDN, while name returned
                 * by gethostname may have been (or vice versa)
                 */
                if (opal_ifislocal(seq->hostname)) {
                    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                        "mca:rmaps:seq: removing head node %s", seq->hostname);
                    opal_list_remove_item(seq_list, item);
                    OBJ_RELEASE(item);  /* "un-retain" it */
                }
            }
        }
            
        if (NULL == seq_list || 0 == (num_nodes = (orte_std_cntr_t)opal_list_get_size(seq_list))) {
            orte_show_help("help-orte-rmaps-base.txt",
                           "orte-rmaps-base:no-available-resources",
                           true);
            return ORTE_ERR_SILENT;
        }

        /* if num_procs wasn't specified, set it now */
        if (0 == app->num_procs) {
            app->num_procs = num_nodes;
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:seq: setting num procs to %s for app %s",
                                ORTE_VPID_PRINT(app->num_procs), app->app);
        } else if (num_nodes < app->num_procs) {
            orte_show_help("help-orte-rmaps-base.txt", "seq:not-enough-resources", true,
                           app->num_procs, num_nodes);
            return ORTE_ERR_SILENT;
        }

        if (seq_list == &default_seq_list) {
            sq = save;
        } else {
            sq = (seq_node_t*)opal_list_get_first(seq_list);
        }
        for (n=0; n < app->num_procs; n++) {
            /* find this node on the global array - this is necessary so
             * that our mapping gets saved on that array as the objects
             * returned by the hostfile function are -not- on the array
             */
            node = NULL;
            for (j=0; j < orte_node_pool->size; j++) {
                if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) {
                    continue;
                } 
                if (0 == strcmp(sq->hostname, node->name)) {
                    break;
                }
            }
            if (NULL == node) {
                /* wasn't found - that is an error */
                orte_show_help("help-orte-rmaps-seq.txt",
                               "orte-rmaps-seq:resource-not-found",
                               true, sq->hostname);
                rc = ORTE_ERR_SILENT;
                goto error;
            }
            /* ensure the node is in the map */
            if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
                OBJ_RETAIN(node);
                opal_pointer_array_add(map->nodes, node);
                jdata->map->num_nodes++;
                ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
            }
            proc = orte_rmaps_base_setup_proc(jdata, node, i);
            if ((node->slots < (int)node->num_procs) ||
                (0 < node->slots_max && node->slots_max < (int)node->num_procs)) {
                if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
                    orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
                                   true, node->num_procs, app->app);
                    rc = ORTE_ERR_SILENT;
                    goto error;
                }
                /* flag the node as oversubscribed so that sched-yield gets
                 * properly set
                 */
                ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
            }
            /* assign the vpid */
            proc->name.vpid = vpid++;
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "mca:rmaps:seq: assign proc %s to node %s for app %s",
                                ORTE_VPID_PRINT(proc->name.vpid), sq->hostname, app->app);

#if OPAL_HAVE_HWLOC
            {
                /* record the cpuset, if given */
                if (NULL != sq->cpuset) {
                    hwloc_cpuset_t bitmap;
                    char *cpu_bitmap;
                    if (NULL == node->topology) {
                        /* not allowed - for sequential cpusets, we must have
                         * the topology info
                         */
                        orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-topology", true, node->name);
                        rc = ORTE_ERR_SILENT;
                        goto error;
                    }
                    /* if we are using hwthreads as cpus and binding to hwthreads, then
                     * we can just copy the cpuset across as it already specifies things
                     * at that level */
                    if (opal_hwloc_use_hwthreads_as_cpus &&
                        OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                        cpu_bitmap = strdup(sq->cpuset);
                    } else {
                        /* setup the bitmap */
                        bitmap = hwloc_bitmap_alloc();
                        /* parse the slot_list to find the socket and core */
                        if (ORTE_SUCCESS != (rc = opal_hwloc_base_slot_list_parse(sq->cpuset, node->topology, rtype, bitmap))) {
                            ORTE_ERROR_LOG(rc);
                            hwloc_bitmap_free(bitmap);
                            goto error;
                        }
                        /* note that we cannot set the proc locale to any specific object
                         * as the slot list may have assigned it to more than one - so
                         * leave that field NULL
                         */
                        /* set the proc to the specified map */
                        hwloc_bitmap_list_asprintf(&cpu_bitmap, bitmap);
                        hwloc_bitmap_free(bitmap);
                    }
                    orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING);
                    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                        "mca:rmaps:seq: binding proc %s to cpuset %s bitmap %s",
                                        ORTE_VPID_PRINT(proc->name.vpid), sq->cpuset, cpu_bitmap);
                    /* we are going to bind to cpuset since the user is specifying the cpus */
                    OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_CPUSET);
                    /* note that the user specified the mapping */
                    ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYUSER);
                    ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_GIVEN);
                    /* cleanup */
                    free(cpu_bitmap);
                } else {
                    hwloc_obj_t locale;

                    /* assign the locale - okay for the topo to be null as
                     * it just means it wasn't returned
                     */
                    if (NULL != node->topology) {
                        locale = hwloc_get_root_obj(node->topology);
                        orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE,
                                           ORTE_ATTR_LOCAL, locale, OPAL_PTR);
                    }
                }
            }
#endif

            /* add to the jdata proc array */
            if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
                ORTE_ERROR_LOG(rc);
                goto error;
            }
            /* move to next node */
            sq = (seq_node_t*)opal_list_get_next(&sq->super);
        }

        /** track the total number of processes we mapped */
        jdata->num_procs += app->num_procs;
        
        /* cleanup the node list if it came from this app_context */
        if (seq_list != &default_seq_list) {
            OPAL_LIST_DESTRUCT(seq_list);
        } else {
            save = sq;
        }
    }

    return ORTE_SUCCESS;

 error:
    OPAL_LIST_DESTRUCT(&default_seq_list);
    return rc;
}
Example #13
0
int orte_util_get_ordered_host_list(opal_list_t *nodes,
                                    char *hostfile)
{
    opal_list_t exclude;
    opal_list_item_t *item, *itm, *item2, *item1;
    char *cptr;
    int num_empty, i, nodeidx, startempty=0;
    bool want_all_empty=false;
    orte_node_t *node_from_pool, *newnode;
    int rc;
    
    OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                         "%s hostfile: creating ordered list of hosts from hostfile %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));
    
    OBJ_CONSTRUCT(&exclude, opal_list_t);
    
    /* parse the hostfile and add the contents to the list, keeping duplicates */
    if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, nodes, &exclude, true))) {
        goto cleanup;
    }
    
    /* parse the nodes to process any relative node directives */
    item2 = opal_list_get_first(nodes);
    while (item2 != opal_list_get_end(nodes)) {
        orte_node_t *node=(orte_node_t*)item2;
        
        /* save the next location in case this one gets removed */
        item1 = opal_list_get_next(item2);
        
        if ('+' != node->name[0]) {
            item2 = item1;
            continue;
        }
        
        /* see if we specified empty nodes */
        if ('e' == node->name[1] ||
            'E' == node->name[1]) {
            /* request for empty nodes - do they want
             * all of them?
             */
            if (NULL != (cptr = strchr(node->name, ':'))) {
                /* the colon indicates a specific # are requested */
                cptr++; /* step past : */
                num_empty = strtol(cptr, NULL, 10);
            } else {
                /* want them all - set num_empty to max */
                num_empty = INT_MAX;
                want_all_empty = true;
            }
            /* insert empty nodes into newnodes list in place of the current item.
             * since item1 is the next item, we insert in front of it
             */
            if (!orte_hnp_is_allocated && 0 == startempty) {
               startempty = 1;
            }
            for (i=startempty; 0 < num_empty && i < orte_node_pool->size; i++) {
                if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
                    continue;
                }
                if (0 == node_from_pool->slots_inuse) {
                    newnode = OBJ_NEW(orte_node_t);
                    newnode->name = strdup(node_from_pool->name);
                    /* if the slot count here is less than the
                     * total slots avail on this node, set it
                     * to the specified count - this allows people
                     * to subdivide an allocation
                     */
                    if (node->slots < node_from_pool->slots) {
                        newnode->slots_alloc = node->slots;
                    } else {
                        newnode->slots_alloc = node_from_pool->slots;
                    }
                    opal_list_insert_pos(nodes, item1, &newnode->super);
                    /* track number added */
                    --num_empty;
                }
            }
            /* bookmark where we stopped in case they ask for more */
            startempty = i;
            /* did they get everything they wanted? */
            if (!want_all_empty && 0 < num_empty) {
                orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty",
                               true, num_empty);
                rc = ORTE_ERR_SILENT;
                goto cleanup;
            }            
            /* since we have expanded the provided node, remove
             * it from list
             */
            opal_list_remove_item(nodes, item2);
            OBJ_RELEASE(item2);
        } else if ('n' == node->name[1] ||
                   'N' == node->name[1]) {
            /* they want a specific relative node #, so
             * look it up on global pool
             */
            nodeidx = strtol(&node->name[2], NULL, 10);
            /* if the HNP is not allocated, then we need to
             * adjust the index as the node pool is offset
             * by one
             */
            if (!orte_hnp_is_allocated) {
                nodeidx++;
            }
            /* see if that location is filled */
            if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeidx))) {
                /* this is an error */
                orte_show_help("help-hostfile.txt", "hostfile:relative-node-not-found",
                               true, nodeidx, node->name);
                rc = ORTE_ERR_SILENT;
                goto cleanup;
            }
            /* create the node object */
            newnode = OBJ_NEW(orte_node_t);
            newnode->name = strdup(node_from_pool->name);
            /* if the slot count here is less than the
             * total slots avail on this node, set it
             * to the specified count - this allows people
             * to subdivide an allocation
             */
            if (node->slots < node_from_pool->slots) {
                newnode->slots_alloc = node->slots;
            } else {
                newnode->slots_alloc = node_from_pool->slots;
            }
            /* insert it before item1 */
            opal_list_insert_pos(nodes, item1, &newnode->super);
            /* since we have expanded the provided node, remove
             * it from list
             */
            opal_list_remove_item(nodes, item2);
            OBJ_RELEASE(item2);
        } else {
            /* invalid relative node syntax */
            orte_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax",
                           true, node->name);
            rc = ORTE_ERR_SILENT;
            goto cleanup;
        }
        
        /* move to next */
        item2 = item1;
    }

    /* remove from the list of nodes those that are in the exclude list */
    while(NULL != (item = opal_list_remove_first(&exclude))) {
        orte_node_t *exnode = (orte_node_t*)item;
        /* check for matches on nodes */
        for (itm = opal_list_get_first(nodes);
             itm != opal_list_get_end(nodes);
             itm = opal_list_get_next(itm)) {
            orte_node_t *node=(orte_node_t*)itm;
            if (0 == strcmp(exnode->name, node->name)) {
                /* match - remove it */
                opal_list_remove_item(nodes, itm);
                OBJ_RELEASE(itm);
                /* have to cycle through the entire list as we could
                 * have duplicates
                 */
            }
        }
        OBJ_RELEASE(item);
    }
    
cleanup:
    OBJ_DESTRUCT(&exclude);
    
    return rc;
}
Example #14
0
/* Parse the provided hostfile and filter the nodes that are
 * on the input list, removing those that
 * are not found in the hostfile
 */
int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
                                    char *hostfile)
{
    opal_list_t newnodes, exclude;
    opal_list_item_t *item1, *item2, *next, *item3;
    orte_node_t *node_from_list, *node_from_file, *node_from_pool, *node3;
    int rc = ORTE_SUCCESS;
    char *cptr;
    int num_empty, nodeidx;
    bool want_all_empty = false;
    opal_list_t keep;
    
    OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                        "%s hostfile: filtering nodes through hostfile %s",
                        ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));

    /* parse the hostfile and create local list of findings */
    OBJ_CONSTRUCT(&newnodes, opal_list_t);
    OBJ_CONSTRUCT(&exclude, opal_list_t);
    if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, &newnodes, &exclude, false))) {
        OBJ_DESTRUCT(&newnodes);
        return rc;
    }
    
    /* remove from the list of newnodes those that are in the exclude list
     * since we could have added duplicate names above due to the */
    while (NULL != (item1 = opal_list_remove_first(&exclude))) {
        node_from_file = (orte_node_t*)item1;
        /* check for matches on nodes */
        for (item2 = opal_list_get_first(&newnodes);
             item2 != opal_list_get_end(&newnodes);
             item2 = opal_list_get_next(item2)) {
            orte_node_t *node = (orte_node_t*)item2;
            if (0 == strcmp(node_from_file->name, node->name)) {
                /* match - remove it */
                opal_list_remove_item(&newnodes, item2);
                OBJ_RELEASE(item2);
                break;
            }
        }
        OBJ_RELEASE(item1);
    }
    
    /* now check our nodes and keep those that match. We can
     * destruct our hostfile list as we go since this won't be needed
     */
    OBJ_CONSTRUCT(&keep, opal_list_t);
    while (NULL != (item2 = opal_list_remove_first(&newnodes))) {
        node_from_file = (orte_node_t*)item2;
        
        next = opal_list_get_next(item2);
        
        /* see if this is a relative node syntax */
        if ('+' == node_from_file->name[0]) {
            /* see if we specified empty nodes */
            if ('e' == node_from_file->name[1] ||
                'E' == node_from_file->name[1]) {
                /* request for empty nodes - do they want
                 * all of them?
                 */
                if (NULL != (cptr = strchr(node_from_file->name, ':'))) {
                    /* the colon indicates a specific # are requested */
                    cptr++; /* step past : */
                    num_empty = strtol(cptr, NULL, 10);
                } else {
                    /* want them all - set num_empty to max */
                    num_empty = INT_MAX;
                    want_all_empty = true;
                }
                /* search the list of nodes provided to us and find those
                 * that are empty
                 */
                item1 = opal_list_get_first(nodes);
                while (0 < num_empty && item1 != opal_list_get_end(nodes)) {
                    node_from_list = (orte_node_t*)item1;
                    next = opal_list_get_next(item1);  /* keep our place */
                    if (0 == node_from_list->slots_inuse) {
                        /* check to see if this node is explicitly called
                         * out later - if so, don't use it here
                         */
                        for (item3 = opal_list_get_first(&newnodes);
                             item3 != opal_list_get_end(&newnodes);
                             item3 = opal_list_get_next(item3)) {
                            node3 = (orte_node_t*)item3;
                            if (0 == strcmp(node3->name, node_from_list->name)) {
                                /* match - don't use it */
                                goto skipnode;
                            }
                        }
                        /* remove item from list */
                        opal_list_remove_item(nodes, item1);
                        /* xfer to keep list */
                        opal_list_append(&keep, item1);
                        --num_empty;
                    }
                skipnode:
                    item1 = next;
                }
                /* did they get everything they wanted? */
                if (!want_all_empty && 0 < num_empty) {
                    orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty",
                                   true, num_empty);
                    rc = ORTE_ERR_SILENT;
                    goto cleanup;
                }            
            } else if ('n' == node_from_file->name[1] ||
                       'N' == node_from_file->name[1]) {
                /* they want a specific relative node #, so
                 * look it up on global pool
                 */
                nodeidx = strtol(&node_from_file->name[2], NULL, 10);
                if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeidx))) {
                    /* this is an error */
                    orte_show_help("help-hostfile.txt", "hostfile:relative-node-not-found",
                                   true, nodeidx, node_from_file->name);
                    rc = ORTE_ERR_SILENT;
                    goto cleanup;
                }
                /* search the list of nodes provided to us and find it */
                for (item1 = opal_list_get_first(nodes);
                     item1 != opal_list_get_end(nodes);
                     item1 = opal_list_get_next(nodes)) {
                    node_from_list = (orte_node_t*)item1;
                    if (0 == strcmp(node_from_list->name, node_from_pool->name)) {
                        /* match - remove item from list */
                        opal_list_remove_item(nodes, item1);
                        /* xfer to keep list */
                        opal_list_append(&keep, item1);
                        break;
                    }
                }
            } else {
                /* invalid relative node syntax */
                orte_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax",
                               true, node_from_file->name);
                rc = ORTE_ERR_SILENT;
                goto cleanup;
            }
        } else {
            /* we are looking for a specific node on the list
             * search the provided list of nodes to see if this
             * one is found
             */
            for (item1 = opal_list_get_first(nodes);
                 item1 != opal_list_get_end(nodes);
                 item1 = opal_list_get_next(item1)) {
                node_from_list = (orte_node_t*)item1;
                /* since the name in the hostfile might not match
                 * our local name, and yet still be intended to match,
                 * we have to check for local interfaces
                 */
                if (0 == strcmp(node_from_file->name, node_from_list->name) ||
                    (opal_ifislocal(node_from_list->name) &&
                     opal_ifislocal(node_from_file->name))) {
                    /* if the slot count here is less than the
                     * total slots avail on this node, set it
                     * to the specified count - this allows people
                     * to subdivide an allocation
                     */
                    if (node_from_file->slots < node_from_list->slots) {
                        node_from_list->slots_alloc = node_from_file->slots;
                    }
                    /* remove the node from the list */
                    opal_list_remove_item(nodes, item1);
                    /* xfer it to keep list */
                    opal_list_append(&keep, item1);
                    break;
                }
            }
        }
        /* cleanup the newnode list */
        OBJ_RELEASE(item2);
    }
    
    /* if we still have entries on our hostfile list, then
     * there were requested hosts that were not in our allocation.
     * This is an error - report it to the user and return an error
     */
    if (0 != opal_list_get_size(&newnodes)) {
        orte_show_help("help-hostfile.txt", "not-all-mapped-alloc",
                       true, hostfile);
        while (NULL != (item1 = opal_list_remove_first(&newnodes))) {
            OBJ_RELEASE(item1);
        }
        OBJ_DESTRUCT(&newnodes);
        return ORTE_ERR_SILENT;
    }

    /* clear the rest of the nodes list */
    while (NULL != (item1 = opal_list_remove_first(nodes))) {
        OBJ_RELEASE(item1);
    }
    
    /* the nodes list has been cleared - rebuild it in order */
    while (NULL != (item1 = opal_list_remove_first(&keep))) {
        opal_list_append(nodes, item1);
    }
    
cleanup:
    OBJ_DESTRUCT(&newnodes);

    return rc;
}
void orte_grpcomm_base_progress_collectives(void)
{
    opal_list_item_t *item;
    orte_grpcomm_collective_t *coll;
    orte_namelist_t *nm;
    orte_job_t *jdata;
    opal_buffer_t *relay;
    int rc;

    /* cycle thru all known collectives - any collective on the list
     * must have come from either a local proc or receiving a global
     * collective. Either way, the number of required recipients
     * is the number of local procs for that job
     */
    item = opal_list_get_first(&orte_grpcomm_base.active_colls);
    while (item != opal_list_get_end(&orte_grpcomm_base.active_colls)) {
        coll = (orte_grpcomm_collective_t*)item;
        OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                             "%s PROGRESSING COLL id %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             coll->id));
        /* if this collective is already locally complete, then ignore it */
        if (coll->locally_complete) {
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                 "%s COLL %d IS LOCALLY COMPLETE",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 coll->id));
            goto next_coll;
        }
        /* get the jobid of the participants in this collective */
        if (NULL == (nm = (orte_namelist_t*)opal_list_get_first(&coll->participants))) {
            opal_output(0, "NO PARTICIPANTS");
            goto next_coll;
        }
        /* get the job object for this participant */
        if (NULL == (jdata = orte_get_job_data_object(nm->name.jobid))) {
            /* if the job object isn't found, then we can't progress
             * this collective
             */
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                 "%s COLL %d JOBID %s NOT FOUND",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 coll->id, ORTE_JOBID_PRINT(nm->name.jobid)));
            goto next_coll;
        }
        /* all local procs from this job are required to participate */
        OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                             "%s ALL LOCAL PROCS FOR JOB %s CONTRIBUTE %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_JOBID_PRINT(jdata->jobid),
                             (int)jdata->num_local_procs));
        /* see if all reqd participants are done */
        if (jdata->num_local_procs == coll->num_local_recvd) {
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                 "%s COLLECTIVE %d LOCALLY COMPLETE - SENDING TO GLOBAL COLLECTIVE",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), coll->id));
            /* mark it as locally complete */
            coll->locally_complete = true;
            /* pack the collective */
            relay = OBJ_NEW(opal_buffer_t);
            orte_grpcomm_base_pack_collective(relay, jdata->jobid,
                                              coll, ORTE_GRPCOMM_INTERNAL_STG_LOCAL);
            /* send it to our global collective handler */
            if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, relay,
                                                  ORTE_RML_TAG_DAEMON_COLL, 0,
                                                  orte_rml_send_callback, NULL))) {
                ORTE_ERROR_LOG(rc);
                OBJ_RELEASE(relay);
            }
        }

    next_coll:
        item = opal_list_get_next(item);
    }
}
Example #16
0
static void track_procs(int fd, short argc, void *cbdata)
{
    orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
    orte_process_name_t *proc = &caddy->name;
    orte_proc_state_t state = caddy->proc_state;
    orte_job_t *jdata;
    orte_proc_t *pdata, *pptr;
    opal_buffer_t *alert;
    int rc, i;
    orte_plm_cmd_flag_t cmd;
    int8_t flag;

    OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output,
                         "%s state:orcm:track_procs called for proc %s state %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(proc),
                         orte_proc_state_to_str(state)));

    /* get the job object for this proc */
    if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        goto cleanup;
    }
    pdata = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid);
    if (NULL == pdata) {
        ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
        goto cleanup;
    }

    if (ORTE_PROC_STATE_RUNNING == state) {
        /* update the proc state */
        pdata->state = state;
        jdata->num_launched++;
        /* don't update until we are told that all are done */
    } else if (ORTE_PROC_STATE_REGISTERED == state) {
        /* update the proc state */
        pdata->state = state;
        jdata->num_reported++;
        if (jdata->num_reported == jdata->num_local_procs) {
            /* once everyone registers, send their contact info to
             * the HNP so it is available to debuggers and anyone
             * else that needs it
             */

            OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output,
                                 "%s state:orcm: notifying HNP all local registered",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

            alert = OBJ_NEW(opal_buffer_t);
            /* pack registered command */
            cmd = ORTE_PLM_REGISTERED_CMD;
            if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) {
                ORTE_ERROR_LOG(rc);
                goto cleanup;
            }
            /* pack the jobid */
            if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &proc->jobid, 1, ORTE_JOBID))) {
                ORTE_ERROR_LOG(rc);
                goto cleanup;
            }
            /* pack all the local child vpids */
            for (i=0; i < orte_local_children->size; i++) {
                if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) {
                    continue;
                }
                if (pptr->name.jobid == proc->jobid) {
                    if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &pptr->name.vpid, 1, ORTE_VPID))) {
                        ORTE_ERROR_LOG(rc);
                        goto cleanup;
                    }
                    if (ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_AS_MPI)) {
                        flag = 1;
                    } else {
                        flag = 0;
                    }
                    if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &flag, 1, OPAL_INT8))) {
                        ORTE_ERROR_LOG(rc);
                        goto cleanup;
                    }
                }
            }
            /* send it */
            if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
                                                              ORTE_RML_TAG_PLM,
                                                              orte_rml_send_callback, NULL))) {
                ORTE_ERROR_LOG(rc);
            } else {
                rc = ORTE_SUCCESS;
            }
        }
    } else if (ORTE_PROC_STATE_IOF_COMPLETE == state) {
        /* do NOT update the proc state as this can hit
         * while we are still trying to notify the HNP of
         * successful launch for short-lived procs
         */
        ORTE_FLAG_SET(pdata, ORTE_PROC_FLAG_IOF_COMPLETE);
        if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_WAITPID) &&
            !ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_RECORDED)) {
            ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
        }
    } else if (ORTE_PROC_STATE_WAITPID_FIRED == state) {
        /* do NOT update the proc state as this can hit
         * while we are still trying to notify the HNP of
         * successful launch for short-lived procs
         */
        ORTE_FLAG_SET(pdata, ORTE_PROC_FLAG_WAITPID);
        if (ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_IOF_COMPLETE) &&
            !ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_RECORDED)) {
            ORTE_ACTIVATE_PROC_STATE(proc, ORTE_PROC_STATE_TERMINATED);
        }
    } else if (ORTE_PROC_STATE_TERMINATED == state) {
        /* if this proc has not already recorded as terminated, then
         * update the accounting here */
        if (!ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_RECORDED)) {
            jdata->num_terminated++;
        }
        /* update the proc state */
        ORTE_FLAG_SET(pdata, ORTE_PROC_FLAG_RECORDED);
        ORTE_FLAG_UNSET(pdata, ORTE_PROC_FLAG_ALIVE);
        pdata->state = state;
        /* Clean up the session directory as if we were the process
         * itself.  This covers the case where the process died abnormally
         * and didn't cleanup its own session directory.
         */
        orte_session_dir_finalize(proc);
        /* track job status */
        if (jdata->num_terminated == jdata->num_local_procs &&
            !orte_get_attribute(&jdata->attributes, ORTE_JOB_TERM_NOTIFIED, NULL, OPAL_BOOL)) {
            /* pack update state command */
            cmd = ORTE_PLM_UPDATE_PROC_STATE;
            alert = OBJ_NEW(opal_buffer_t);
            if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) {
                ORTE_ERROR_LOG(rc);
                goto cleanup;
            }
            /* pack the job info */
            if (ORTE_SUCCESS != (rc = pack_state_update(alert, jdata))) {
                ORTE_ERROR_LOG(rc);
            }
            /* send it */
            OPAL_OUTPUT_VERBOSE((5, orte_state_base_framework.framework_output,
                                 "%s state:orcm: SENDING JOB LOCAL TERMINATION UPDATE FOR JOB %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_JOBID_PRINT(jdata->jobid)));
            if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
                                                  ORTE_RML_TAG_PLM,
                                                  orte_rml_send_callback, NULL))) {
                ORTE_ERROR_LOG(rc);
            }
            /* mark that we sent it so we ensure we don't do it again */
            orte_set_attribute(&jdata->attributes, ORTE_JOB_TERM_NOTIFIED, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL);
        }
    }

 cleanup:
    OBJ_RELEASE(caddy);
}
static void daemon_coll_recv(int status, orte_process_name_t* sender,
                             opal_buffer_t* data, orte_rml_tag_t tag,
                             void* cbdata)
{
    orte_job_t *jdata;
    orte_std_cntr_t n;
    opal_list_item_t *item;
    orte_vpid_t np;
    int rc;
    orte_grpcomm_collective_t *coll;
    orte_namelist_t *nm;
    orte_grpcomm_coll_id_t id;
    bool do_progress;
    opal_buffer_t *relay;
    orte_jobid_t jobid;

    OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:daemon_coll: daemon collective recvd from %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(sender)));
    
    /* get the collective id */
    n = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &id, &n, ORTE_GRPCOMM_COLL_ID_T))) {
        ORTE_ERROR_LOG(rc);
        return;
    }

    OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:daemon_coll: WORKING COLLECTIVE %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), id));

    /* setup the collective for this id - if it's already present,
     * then this will just return the existing structure
     */
    coll = orte_grpcomm_base_setup_collective(id);

    /* record that we received a bucket */
    coll->num_peer_buckets++;

    /* unpack the jobid */
    n = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &jobid, &n, ORTE_JOBID))) {
        ORTE_ERROR_LOG(rc);
        return;
    }

    /*  find this job */
    do_progress = true;
    if (NULL == (jdata = orte_get_job_data_object(jobid))) {
        /* if we can't find it, then we haven't processed the
         * launch msg for this job yet - can't happen with
         * our own local procs, but this could involve a proc
         * running remotely that we don't know about yet
         */
        do_progress = false;
    }
    if (do_progress && 0 == jdata->num_local_procs) {
        coll->locally_complete = true;
    }

    /* unpack the number of contributors involved in the incoming data */
    n = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(data, &np, &n, ORTE_VPID))) {
        ORTE_ERROR_LOG(rc);
        return;
    }
    OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:daemon_coll: NUM CONTRIBS: %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_VPID_PRINT(np)));
    /* add it to the number of global recvd */
    coll->num_global_recvd += np;

    /* transfer the data */
    opal_dss.copy_payload(&coll->buffer, data);

    /* are we done? */
    if (!do_progress || !coll->locally_complete) {
        /* can't continue - missing at least one launch msg
         * or not locally complete
         */
        OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                             "%s grpcomm:base:daemon_coll: CANNOT PROGRESS",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        return;
    }

    /* determine how many buckets we should receive from others
     * involved in this collective - need to know the number
     * of total contributors from all buckets being relayed
     * thru us
     */
    orte_routed.get_routing_list(ORTE_GRPCOMM_COLL_PEERS, coll);
    np = 1;  /* account for our own bucket */
    while (NULL != (item = opal_list_remove_first(&coll->targets))) {
        nm = (orte_namelist_t*)item;
        if (ORTE_VPID_WILDCARD == nm->name.vpid) {
            /* wait for input from all daemons */
            np = orte_process_info.num_procs;
            break;
        } else {
            np++;
        }
    }
    /* clear the list for reuse */
    while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&coll->targets))) {
        OBJ_RELEASE(nm);
    }

    /* relay the data, if required */
    if (np == coll->num_peer_buckets) {
        orte_routed.get_routing_list(ORTE_GRPCOMM_COLL_RELAY, coll);

        while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&coll->targets))) {
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                 "%s grpcomm:base:daemon_coll: RELAYING COLLECTIVE TO %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&nm->name)));        
            relay = OBJ_NEW(opal_buffer_t);
            orte_grpcomm_base_pack_collective(relay, jobid,
                                              coll, ORTE_GRPCOMM_INTERNAL_STG_GLOBAL);
            if (ORTE_VPID_WILDCARD == nm->name.vpid) {
                /* this is going to everyone in this job, so use xcast */
                orte_grpcomm.xcast(nm->name.jobid, relay, ORTE_RML_TAG_DAEMON_COLL);
                OBJ_RELEASE(relay);
            }
            /* otherwise, send to each member, but don't send it back to the
             * sender as that can create an infinite loop
             */
            if (nm->name.vpid == sender->vpid) {
                OBJ_RELEASE(relay);
            } else {
                if (0 > orte_rml.send_buffer_nb(&nm->name, relay, ORTE_RML_TAG_DAEMON_COLL, 0,
                                                orte_rml_send_callback, NULL)) {
                    ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
                    OBJ_RELEASE(relay);
                }
            }
            OBJ_RELEASE(nm);
        }
    }
    /* clear the list for reuse */
    while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&coll->targets))) {
        OBJ_RELEASE(nm);
    }

    /* determine how many contributors we need to recv - we know
     * that all job objects were found, so we can skip that test
     * while counting
     */
    np = 0;
    for (item = opal_list_get_first(&coll->participants);
         item != opal_list_get_end(&coll->participants);
         item = opal_list_get_next(item)) {
        nm = (orte_namelist_t*)item;
        /* get the job object for this participant */
        jdata = orte_get_job_data_object(nm->name.jobid);
        if (ORTE_VPID_WILDCARD == nm->name.vpid) {
            /* all procs from this job are required to participate */
            np += jdata->num_procs;
        } else {
            np++;
        }
    }

    /* are we done? */
    if (np != coll->num_global_recvd) {
        OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                             "%s grpcomm:base:daemon_coll: MISSING CONTRIBUTORS: np %s ngr %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_VPID_PRINT(np),
                             ORTE_VPID_PRINT(coll->num_global_recvd)));
        return;
    }

    /* since we discovered that the collective is complete, we
     * need to send it to all the participants
     */
    for (item = opal_list_get_first(&coll->participants);
         item != opal_list_get_end(&coll->participants);
         item = opal_list_get_next(item)) {
        nm = (orte_namelist_t*)item;
        relay = OBJ_NEW(opal_buffer_t);
        opal_dss.pack(relay, &coll->id, 1, ORTE_GRPCOMM_COLL_ID_T);
        opal_dss.copy_payload(relay, &coll->buffer);
        /* if the vpid is wildcard, then this goes to
         * all daemons for relay
         */
        if (ORTE_VPID_WILDCARD == nm->name.vpid) {
            orte_grpcomm.xcast(nm->name.jobid, relay, ORTE_RML_TAG_COLLECTIVE);
            OBJ_RELEASE(relay);
        } else {
            /* send it to this proc */
            if (0 > orte_rml.send_buffer_nb(&nm->name, relay, ORTE_RML_TAG_COLLECTIVE, 0,
                                            orte_rml_send_callback, NULL)) {
                ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
                OBJ_RELEASE(relay);
            }
        }
    }

    /* remove this collective */
    opal_list_remove_item(&orte_grpcomm_base.active_colls, &coll->super);
    OBJ_RELEASE(coll);
}
Example #18
0
void orte_iof_orted_read_handler(int fd, short event, void *cbdata)
{
    orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata;
    unsigned char data[ORTE_IOF_BASE_MSG_MAX];
    opal_buffer_t *buf=NULL;
    int rc;
    int32_t numbytes;
    opal_list_item_t *item;
    orte_iof_proc_t *proct;
    orte_ns_cmp_bitmask_t mask;

    /* read up to the fragment size */
#if !defined(__WINDOWS__)
    numbytes = read(fd, data, sizeof(data));
#else
    {
        DWORD readed;
        HANDLE handle = (HANDLE)_get_osfhandle(fd);
        ReadFile(handle, data, sizeof(data), &readed, NULL);
        numbytes = (int)readed;
    }
#endif  /* !defined(__WINDOWS__) */

    OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                         "%s iof:orted:read handler read %d bytes from %s, fd %d",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         numbytes, ORTE_NAME_PRINT(&rev->name), fd));

    if (numbytes <= 0) {
        if (0 > numbytes) {
            /* either we have a connection error or it was a non-blocking read */
            if (EAGAIN == errno || EINTR == errno) {
                /* non-blocking, retry */
                opal_event_add(rev->ev, 0);
                return;
            }

            OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                                 "%s iof:orted:read handler %s Error on connection:%d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 ORTE_NAME_PRINT(&rev->name), fd));
        }
        /* numbytes must have been zero, so go down and close the fd etc */
        goto CLEAN_RETURN;
    }

    /* see if the user wanted the output directed to files */
    if (NULL != orte_output_filename) {
        /* find the sink for this rank */
        for (item = opal_list_get_first(&mca_iof_orted_component.sinks);
             item != opal_list_get_end(&mca_iof_orted_component.sinks);
             item = opal_list_get_next(item)) {
            orte_iof_sink_t *sink = (orte_iof_sink_t*)item;
            /* if the target is set, then this sink is for another purpose - ignore it */
            if (ORTE_JOBID_INVALID != sink->daemon.jobid) {
                continue;
            }
            /* if this sink isn't for output, ignore it */
            if (ORTE_IOF_STDIN & sink->tag) {
                continue;
            }

            mask = ORTE_NS_CMP_ALL;

            /* is this the desired proc? */
            if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &sink->name, &rev->name)) {
                /* output to the corresponding file */
                orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, sink->wev);
                /* done */
                break;
            }
        }
        goto RESTART;
    }

    /* prep the buffer */
    buf = OBJ_NEW(opal_buffer_t);

    /* pack the stream first - we do this so that flow control messages can
     * consist solely of the tag
     */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->tag, 1, ORTE_IOF_TAG))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }

    /* pack name of process that gave us this data */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &rev->name, 1, ORTE_NAME))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }

    /* pack the data - only pack the #bytes we read! */
    if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &data, numbytes, OPAL_BYTE))) {
        ORTE_ERROR_LOG(rc);
        goto CLEAN_RETURN;
    }

    /* start non-blocking RML call to forward received data */
    OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output,
                         "%s iof:orted:read handler sending %d bytes to HNP",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes));

    orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
                            send_cb, NULL);

 RESTART:
    /* re-add the event */
    opal_event_add(rev->ev, 0);

    return;

 CLEAN_RETURN:
    /* must be an error, or zero bytes were read indicating that the
     * proc terminated this IOF channel - either way, find this proc
     * on our list and clean up
     */
    for (item = opal_list_get_first(&mca_iof_orted_component.procs);
         item != opal_list_get_end(&mca_iof_orted_component.procs);
         item = opal_list_get_next(item)) {
        proct = (orte_iof_proc_t*)item;
        mask = ORTE_NS_CMP_ALL;
        if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, &rev->name)) {
            /* found it - release corresponding event. This deletes
             * the read event and closes the file descriptor
             */
            if (rev->tag & ORTE_IOF_STDOUT) {
                if( NULL != proct->revstdout ) {
                    OBJ_RELEASE(proct->revstdout);
                }
            } else if (rev->tag & ORTE_IOF_STDERR) {
                if( NULL != proct->revstderr ) {
                    OBJ_RELEASE(proct->revstderr);
                }
            } else if (rev->tag & ORTE_IOF_STDDIAG) {
                if( NULL != proct->revstddiag ) {
                    OBJ_RELEASE(proct->revstddiag);
                }
            }
            /* check to see if they are all done */
            if (NULL == proct->revstdout &&
                NULL == proct->revstderr &&
                NULL == proct->revstddiag) {
                /* this proc's iof is complete */
                opal_list_remove_item(&mca_iof_orted_component.procs, item);
                ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE);
                OBJ_RELEASE(proct);
            }
            break;
        }
    }
    if (NULL != buf) {
        OBJ_RELEASE(buf);
    }
    return;
}
Example #19
0
/**
 * this routine performs a test that indicates whether or not posix shared
 * memory can safely be used during this run.
 * note: that we want to run this test as few times as possible.
 *
 * @return OPAL_SUCCESS when posix can safely be used.
 */
static int
posix_runtime_query(mca_base_module_t **module,
                    int *priority,
                    const char *hint)
{
    char tmp_buff[OPAL_SHMEM_POSIX_FILE_LEN_MAX];
    int fd = -1;

    *priority = 0;
    *module = NULL;

    /* if hint isn't null, then someone else already figured out who is the
     * best runnable component is AND the caller is relaying that info so we
     * don't have to perform a run-time query.
     */
    if (NULL != hint) {
        OPAL_OUTPUT_VERBOSE(
            (70, opal_shmem_base_output,
             "shmem: posix: runtime_query: "
             "attempting to use runtime hint (%s)\n", hint)
        );
        /* was i selected? if so, then we are done.
         * otherwise, disqualify myself.
         */
        if (0 == strcasecmp(hint,
            mca_shmem_posix_component.super.base_version.mca_component_name)) {
            *priority = mca_shmem_posix_component.priority;
            *module = (mca_base_module_t *)&opal_shmem_posix_module.super;
            return OPAL_SUCCESS;
        }
        else {
            *priority = 0;
            *module = NULL;
            return OPAL_SUCCESS;
        }
    }
    /* if we are here, then perform a run-time query because we didn't get a
     * hint.  it's either up to us to figure it out, or the caller wants us to
     * re-run the runtime query.
     */
    OPAL_OUTPUT_VERBOSE(
        (70, opal_shmem_base_output,
         "shmem: posix: runtime_query: NO HINT PROVIDED:"
         "starting run-time test...\n")
    );
    /* shmem_posix_shm_open successfully shm_opened - we can use posix sm! */
    if (-1 != (fd = shmem_posix_shm_open(tmp_buff,
                                         OPAL_SHMEM_POSIX_FILE_LEN_MAX -1))) {
        /* free up allocated resources before we return */
        if (0 != shm_unlink(tmp_buff)) {
            int err = errno;
            char hn[MAXHOSTNAMELEN];
            gethostname(hn, MAXHOSTNAMELEN - 1);
            hn[MAXHOSTNAMELEN - 1] = '\0';
            opal_show_help("help-opal-shmem-posix.txt", "sys call fail", 1,
                           hn, "shm_unlink(2)", "", strerror(err), err);
            /* something strange happened, so consider this a run-time test
             * failure even though shmem_posix_shm_open was successful */
        }
        /* all is well */
        else {
            *priority = mca_shmem_posix_component.priority;
            *module = (mca_base_module_t *)&opal_shmem_posix_module.super;
            rt_successful = true;
        }
    }

    return OPAL_SUCCESS;
}
Example #20
0
int orte_ess_base_app_setup(bool db_restrict_local)
{
    int ret;
    char *error = NULL;
    opal_value_t kv;

    /*
     * stdout/stderr buffering
     * If the user requested to override the default setting then do
     * as they wish.
     */
    if( orte_ess_base_std_buffering > -1 ) {
        if( 0 == orte_ess_base_std_buffering ) {
            setvbuf(stdout, NULL, _IONBF, 0);
            setvbuf(stderr, NULL, _IONBF, 0);
        }
        else if( 1 == orte_ess_base_std_buffering ) {
            setvbuf(stdout, NULL, _IOLBF, 0);
            setvbuf(stderr, NULL, _IOLBF, 0);
        }
        else if( 2 == orte_ess_base_std_buffering ) {
            setvbuf(stdout, NULL, _IOFBF, 0);
            setvbuf(stderr, NULL, _IOFBF, 0);
        }
    }

    /* if I am an MPI app, we will let the MPI layer define and
     * control the opal_proc_t structure. Otherwise, we need to
     * do so here */
    if (ORTE_PROC_NON_MPI) {
        orte_process_info.super.proc_name = *(opal_process_name_t*)ORTE_PROC_MY_NAME;
        orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename);
        orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
        orte_process_info.super.proc_arch = opal_local_arch;
        opal_proc_local_set(&orte_process_info.super);
    }

    /* get an async event base - we use the opal_async one so
     * we don't startup extra threads if not needed */
    orte_event_base = opal_start_progress_thread("opal_async", true);
    progress_thread_running = true;
    /* open and setup the state machine */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_state_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_state_base_select";
        goto error;
    }

    /* open the errmgr */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_errmgr_base_open";
        goto error;
    }

    /* setup my session directory */
    if (orte_create_session_dirs) {
        OPAL_OUTPUT_VERBOSE((2, orte_ess_base_framework.framework_output,
                             "%s setting up session dir with\n\ttmpdir: %s\n\thost %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             (NULL == orte_process_info.tmpdir_base) ? "UNDEF" : orte_process_info.tmpdir_base,
                             orte_process_info.nodename));
        if (ORTE_SUCCESS != (ret = orte_session_dir(true,
                                                    orte_process_info.tmpdir_base,
                                                    orte_process_info.nodename, NULL,
                                                    ORTE_PROC_MY_NAME))) {
            ORTE_ERROR_LOG(ret);
            error = "orte_session_dir";
            goto error;
        }
        /* Once the session directory location has been established, set
           the opal_output env file location to be in the
           proc-specific session directory. */
        opal_output_set_output_file_info(orte_process_info.proc_session_dir,
                                         "output-", NULL, NULL);
        /* store the session directory location in the database */
        OBJ_CONSTRUCT(&kv, opal_value_t);
        kv.key = strdup(OPAL_DSTORE_JOB_SDIR);
        kv.type = OPAL_STRING;
        kv.data.string = strdup(orte_process_info.job_session_dir);
        if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal,
                                                     ORTE_PROC_MY_NAME,
                                                     &kv))) {
            ORTE_ERROR_LOG(ret);
            OBJ_DESTRUCT(&kv);
            error = "opal dstore store";
            goto error;
        }
        OBJ_DESTRUCT(&kv);
        OBJ_CONSTRUCT(&kv, opal_value_t);
        kv.key = strdup(OPAL_DSTORE_MY_SDIR);
        kv.type = OPAL_STRING;
        kv.data.string = strdup(orte_process_info.proc_session_dir);
        if (OPAL_SUCCESS != (ret = opal_dstore.store(opal_dstore_internal,
                                                     ORTE_PROC_MY_NAME,
                                                     &kv))) {
            ORTE_ERROR_LOG(ret);
            OBJ_DESTRUCT(&kv);
            error = "opal dstore store";
            goto error;
        }
        OBJ_DESTRUCT(&kv);
    }
    /* Setup the communication infrastructure */
    /*
     * OOB Layer
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_oob_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_oob_base_select";
        goto error;
    }
    /* Runtime Messaging Layer */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rml_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rml_base_select";
        goto error;
    }
    /* Messaging QoS Layer */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_qos_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_qos_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_qos_base_select";
        goto error;
    }
    /* setup the errmgr */
    if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_errmgr_base_select";
        goto error;
    }
    /* Routed system */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_routed_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_routed_base_select";
        goto error;
    }
    /*
     * Group communications
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_grpcomm_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_grpcomm_base_select";
        goto error;
    }
    /* non-daemon/HNP apps can only have the default proxy PLM
     * module open - provide a chance for it to initialize
     */
    if (ORTE_SUCCESS != (ret = orte_plm.init())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_plm_init";
        goto error;
    }
    /* enable communication via the rml */
    if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_rml.enable_comm";
        goto error;
    }
    /* setup the routed info  */
    if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_routed.init_routes";
        goto error;
    }
#if OPAL_ENABLE_FT_CR == 1
    /*
     * Setup the SnapC
     */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_snapc_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_snapc_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_sstore_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_sstore_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_snapc_base_select(ORTE_PROC_IS_HNP, ORTE_PROC_IS_APP))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_snapc_base_select";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_sstore_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_sstore_base_select";
        goto error;
    }
    /* apps need the OPAL CR stuff */
    opal_cr_set_enabled(true);
#else
    opal_cr_set_enabled(false);
#endif
    /* Initalize the CR setup
     * Note: Always do this, even in non-FT builds.
     * If we don't some user level tools may hang.
     */
    if (ORTE_SUCCESS != (ret = orte_cr_init())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_cr_init";
        goto error;
    }
    /* open the distributed file system */
    if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_dfs_base_framework, 0))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_dfs_base_open";
        goto error;
    }
    if (ORTE_SUCCESS != (ret = orte_dfs_base_select())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_dfs_base_select";
        goto error;
    }
    return ORTE_SUCCESS;
 error:
    if (!progress_thread_running) {
        /* can't send the help message, so ensure it
         * comes out locally
         */
        orte_show_help_finalize();
    }
    orte_show_help("help-orte-runtime.txt",
                   "orte_init:startup:internal-failure",
                   true, error, ORTE_ERROR_NAME(ret), ret);
    return ret;
}
Example #21
0
/*
    /!\ Called for each processes /!\
 */
static int
portals4_init_query(bool enable_progress_threads,
        bool enable_mpi_threads)
{
    int ret;
    ptl_md_t md;
    ptl_me_t me;

    /* Initialize Portals and create a physical, matching interface */
    ret = PtlInit();
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlInit failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    ret = PtlNIInit(PTL_IFACE_DEFAULT,
            PTL_NI_PHYSICAL | PTL_NI_MATCHING,
            PTL_PID_ANY,
            NULL,
            &mca_coll_portals4_component.ni_limits,
            &mca_coll_portals4_component.ni_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlNIInit failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }
    opal_output_verbose(10, ompi_coll_base_framework.framework_output,
        "ni_limits.max_atomic_size=%ld", mca_coll_portals4_component.ni_limits.max_atomic_size);

    if (mca_coll_portals4_component.portals_max_msg_size < mca_coll_portals4_component.ni_limits.max_msg_size)
        mca_coll_portals4_component.ni_limits.max_msg_size = mca_coll_portals4_component.portals_max_msg_size;
    opal_output_verbose(10, ompi_coll_base_framework.framework_output,
        "ni_limits.max_msg_size=%lu", mca_coll_portals4_component.ni_limits.max_msg_size);

    ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlGetid failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }
    /* FIX ME: Need to make sure our ID matches with the MTL... */
    ret = PtlGetUid(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.uid);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlGetUid failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    ret = PtlEQAlloc(mca_coll_portals4_component.ni_h,
            MCA_COLL_PORTALS4_EQ_SIZE,
            &mca_coll_portals4_component.eq_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlEQAlloc failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    ret = PtlPTAlloc(mca_coll_portals4_component.ni_h,
            0,
            mca_coll_portals4_component.eq_h,
            REQ_COLL_TABLE_ID,
            &mca_coll_portals4_component.pt_idx);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    if (mca_coll_portals4_component.pt_idx != REQ_COLL_TABLE_ID) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n",
                __FILE__, __LINE__,
                mca_coll_portals4_component.finish_pt_idx);
        return OMPI_ERROR;
    }

    ret = PtlPTAlloc(mca_coll_portals4_component.ni_h,
            0,
            mca_coll_portals4_component.eq_h,
            REQ_COLL_FINISH_TABLE_ID,
            &mca_coll_portals4_component.finish_pt_idx);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    if (mca_coll_portals4_component.finish_pt_idx != REQ_COLL_FINISH_TABLE_ID) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n",
                __FILE__, __LINE__,
                mca_coll_portals4_component.finish_pt_idx);
        return OMPI_ERROR;
    }

    /* Bind MD/MDs across all memory.  We prefer (for obvious reasons)
       to have a single MD across all of memory */
    memset(&md, 0, sizeof(ptl_md_t));
    md.start = 0;
    md.length = 0;
    md.options = 0;
    md.eq_handle = PTL_EQ_NONE;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(mca_coll_portals4_component.ni_h,
            &md,
            &mca_coll_portals4_component.zero_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMDBind failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    md.start = 0;
    md.length = PTL_SIZE_MAX;
    md.options = 0;
    md.eq_handle = PTL_EQ_NONE;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(mca_coll_portals4_component.ni_h,
            &md,
            &mca_coll_portals4_component.data_md_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMDBind failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }
    OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%lx\n", md.start, md.length));

    /* setup finish ack ME */
    me.start = NULL;
    me.length = 0;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = mca_coll_portals4_component.uid;
    me.options = PTL_ME_OP_PUT |
        PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;
    me.match_bits = 0;
    me.ignore_bits = 0;

    ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
            mca_coll_portals4_component.finish_pt_idx,
            &me,
            PTL_PRIORITY_LIST,
            NULL,
            &mca_coll_portals4_component.finish_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    /* This ME is used for RTR exchange only */
    me.start = NULL;
    me.length = 0;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = mca_coll_portals4_component.uid;
    me.options = PTL_ME_OP_PUT |
            PTL_ME_EVENT_SUCCESS_DISABLE | PTL_ME_EVENT_OVER_DISABLE |
            PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
    me.match_id.phys.nid = PTL_NID_ANY;
    me.match_id.phys.pid = PTL_PID_ANY;

    /* Note : the RTR bit must be set to match this ME,
     * this allows to discriminate the RTR from data flow
     * (especially for the Barrier operations)
     */
    COLL_PORTALS4_SET_BITS(me.match_bits, 0, 0, 1, 0, 0, 0);
    me.ignore_bits = ~COLL_PORTALS4_RTR_MASK;

    ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
            mca_coll_portals4_component.pt_idx,
            &me,
            PTL_OVERFLOW_LIST,
            NULL,
            &mca_coll_portals4_component.unex_me_h);
    if (PTL_OK != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;
    }

    /* activate progress callback */
    ret = opal_progress_register(portals4_progress);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: opal_progress_register failed: %d\n",
                __FILE__, __LINE__, ret);
        return OMPI_ERROR;

    }
    return OMPI_SUCCESS;

}
Example #22
0
void orte_data_server(int status, orte_process_name_t* sender,
                      opal_buffer_t* buffer, orte_rml_tag_t tag,
                      void* cbdata)
{
    orte_data_server_cmd_t command;
    orte_std_cntr_t count;
    char *service_name, *port_name;
    orte_data_object_t *data;
    opal_buffer_t *answer;
    int rc, ret;
    bool unique;

    OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                         "%s data server got message from %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_NAME_PRINT(sender)));

    count = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &command, &count, ORTE_DATA_SERVER_CMD))) {
        ORTE_ERROR_LOG(rc);
        return;
    }

    answer = OBJ_NEW(opal_buffer_t);

    switch(command) {
    case ORTE_DATA_SERVER_PUBLISH:
        /* unpack the service name */
        count = 1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &service_name, &count, OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            goto SEND_ERROR;
        }

        /* unpack the port name */
        count = 1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &port_name, &count, OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            goto SEND_ERROR;
        }

        /* unpack uniqueness flag */
        count = 1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &unique, &count, OPAL_BOOL))) {
            ORTE_ERROR_LOG(rc);
            goto SEND_ERROR;
        }

        OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                             "%s data server: publishing service %s port %s %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             service_name, port_name,
                             unique ? "UNIQUE" : "OVERWRITE"));

        /* check the current data store to see if this service name has already
         * been published
         */
        if (NULL != (data = lookup(service_name))) {
            /* already exists - see if overwrite allowed */
            if (unique) {
                /* return ORTE_EXISTS error code */

                OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                                     "%s data server: publishing service %s port %s already exists",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     service_name, port_name));

                ret = ORTE_EXISTS;
            } else {
                OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                                     "%s data server: overwriting service %s with port %s",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     service_name, port_name));
                if (NULL != data->port) {
                    free(data->port);
                }
                data->port = port_name;
                data->owner.jobid = sender->jobid;
                data->owner.vpid = sender->vpid;
                ret = ORTE_SUCCESS;
            }
            if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &ret, 1, OPAL_INT))) {
                ORTE_ERROR_LOG(rc);
                /* if we can't pack it, we probably can't pack the
                 * rc value either, so just send whatever is there
                 */
            }
            goto SEND_ANSWER;

        }

        /* create a new data object */
        data = OBJ_NEW(orte_data_object_t);

        /* pass over the data values - these were malloc'd when unpacked,
         * so we don't need to strdup them here
         */
        data->service = service_name;
        data->port = port_name;
        data->owner.jobid = sender->jobid;
        data->owner.vpid = sender->vpid;

        /* store the data */
        data->index = opal_pointer_array_add(orte_data_server_store, data);

        OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                             "%s data server: successfully published service %s port %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             service_name, port_name));

        /* tell the user it was wonderful... */
        ret = ORTE_SUCCESS;
        if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &ret, 1, OPAL_INT))) {
            ORTE_ERROR_LOG(rc);
            /* if we can't pack it, we probably can't pack the
             * rc value either, so just send whatever is there
             */
        }
        goto SEND_ANSWER;
        break;

    case ORTE_DATA_SERVER_LOOKUP:
        /* unpack the service name */
        count = 1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &service_name, &count, OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            goto SEND_ERROR;
        }

        OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                             "%s data server: lookup on service %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             service_name));

        /* locate this record in the data store */
        if (NULL == (data = lookup(service_name))) {

            OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                                 "%s data server: service %s not found",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 service_name));

            /* return ORTE_ERR_NOT_FOUND error code */
            ret = ORTE_ERR_NOT_FOUND;
            if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &ret, 1, OPAL_INT))) {
                ORTE_ERROR_LOG(rc);
                /* if we can't pack it, we probably can't pack the
                 * rc value either, so just send whatever is there
                 */
            }
            goto SEND_ANSWER;
        }

        OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                             "%s data server: successful lookup on service %s port %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             service_name, data->port));

        /* pack success so the unpack on the other end can
         * always unpack an int first
         */
        ret = ORTE_SUCCESS;
        if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &ret, 1, OPAL_INT))) {
            ORTE_ERROR_LOG(rc);
            /* if we can't pack it, we probably can't pack the
             * rc value either, so just send whatever is there
             */
            goto SEND_ANSWER;
        }

        /* pack the returned port */
        if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &data->port, 1, OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            /* if we can't pack it, we probably can't pack the
             * rc value either, so just send whatever is there
             */
        }
        goto SEND_ANSWER;
        break;

    case ORTE_DATA_SERVER_UNPUBLISH:
        /* unpack the service name */
        count = 1;
        if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &service_name, &count, OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            goto SEND_ERROR;
        }

        OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                             "%s data server: unpublish on service %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             service_name));

        /* locate this record in the data store */
        if (NULL == (data = lookup(service_name))) {

            OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                                 "%s data server: service %s not found",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 service_name));

            /* return ORTE_ERR_NOT_FOUND error code */
            ret = ORTE_ERR_NOT_FOUND;
            if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &ret, 1, OPAL_INT))) {
                ORTE_ERROR_LOG(rc);
                /* if we can't pack it, we probably can't pack the
                 * rc value either, so just send whatever is there
                 */
            }
            goto SEND_ANSWER;
        }

        /* check to see if the sender owns it - must be exact match */
        if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL,
                                                        &data->owner, sender)) {

            OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                                 "%s data server: service %s not owned by sender %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 service_name, ORTE_NAME_PRINT(sender)));

            /* nope - return ORTE_ERR_PERM error code */
            ret = ORTE_ERR_PERM;
            if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &ret, 1, OPAL_INT))) {
                ORTE_ERROR_LOG(rc);
                /* if we can't pack it, we probably can't pack the
                 * rc value either, so just send whatever is there
                 */
            }
            goto SEND_ANSWER;
        }

        /* delete the object from the data store */
        opal_pointer_array_set_item(orte_data_server_store, data->index, NULL);
        OBJ_RELEASE(data);

        OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
                             "%s data server: service %s unpublished",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             service_name));

        /* tell the sender this succeeded */
        ret = ORTE_SUCCESS;
        if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &ret, 1, OPAL_INT))) {
            ORTE_ERROR_LOG(rc);
            /* if we can't pack it, we probably can't pack the
             * rc value either, so just send whatever is there
             */
        }
        goto SEND_ANSWER;
        break;

    default:
        ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
        rc = ORTE_ERR_BAD_PARAM;
        break;
    }

 SEND_ERROR:
    /* pack the error code */
    if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &rc, 1, OPAL_INT))) {
        ORTE_ERROR_LOG(ret);
    }

 SEND_ANSWER:
    if (0 > (rc = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_DATA_CLIENT, rml_cbfunc, NULL))) {
        ORTE_ERROR_LOG(rc);
        OBJ_RELEASE(answer);
    }
}
/***   MODEX SECTION ***/
static int modex(orte_grpcomm_collective_t *coll)
{
    char *cptr, **fields;
    orte_vpid_t v;
    orte_process_name_t name;
    int rc;
    opal_hwloc_locality_t locality;
    orte_local_rank_t local_rank;
    orte_node_rank_t node_rank;
    bool bound;

     OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:pmi: modex entered",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

     /* our RTE data was constructed and pushed in the ESS pmi component */

     /* commit our modex info */
     opal_db.commit((opal_identifier_t *)ORTE_PROC_MY_NAME);

    /* cycle thru all my peers and collect their RTE info */
    name.jobid = ORTE_PROC_MY_NAME->jobid;
    fields = NULL;
    for (v=0; v < orte_process_info.num_procs; v++) {
        if (v == ORTE_PROC_MY_NAME->vpid) {
            continue;
        }
        name.vpid = v;
        /* fetch the RTE data for this proc */
	if (ORTE_SUCCESS != (rc = opal_db.fetch((opal_identifier_t*)&name, "RTE", (void **)&cptr, OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        /* split on commas */
        fields = opal_argv_split(cptr, ',');
        free(cptr);
        /* sanity check */
        if (4 > opal_argv_count(fields)) {
            ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
            opal_argv_free(fields);
            return ORTE_ERR_BAD_PARAM;
        }
        
        /* store the composite parts */
        /* first field is the URI */
        if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)&name, OPAL_DB_INTERNAL, ORTE_DB_RMLURI, fields[0], OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            opal_argv_free(fields);
            return rc;
        }
        OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
                             "%s grpcomm:pmi: proc %s oob endpoint %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             ORTE_NAME_PRINT(&name), fields[0]));
        /* set the contact info into the hash table */
        if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(fields[0]))) {
            opal_argv_free(fields);
            return rc;
        }
        /* next is the hostname */
        if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)&name, OPAL_DB_INTERNAL, ORTE_DB_HOSTNAME, fields[1], OPAL_STRING))) {
            ORTE_ERROR_LOG(rc);
            opal_argv_free(fields);
            return rc;
        }
        /* local rank */
        local_rank = strtoul(fields[2], NULL, 10);
        if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)&name, OPAL_DB_INTERNAL, ORTE_DB_LOCALRANK, &local_rank, ORTE_LOCAL_RANK))) {
            ORTE_ERROR_LOG(rc);
            opal_argv_free(fields);
            return rc;
        }
        /* node rank */
        node_rank = strtoul(fields[3], NULL, 10);
        if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)&name, OPAL_DB_INTERNAL, ORTE_DB_NODERANK, &node_rank, ORTE_NODE_RANK))) {
            ORTE_ERROR_LOG(rc);
            opal_argv_free(fields);
            return rc;
        }
        /* if the process was bound, then there will be another field
         * that contains its cpuset
         */
        if (5 == opal_argv_count(fields)) {
            if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)&name, OPAL_DB_INTERNAL, ORTE_DB_CPUSET, fields[4], OPAL_STRING))) {
                ORTE_ERROR_LOG(rc);
                opal_argv_free(fields);
                return rc;
            }
            bound = true;
        } else {
            /* store a placeholder so we know that this value was retrieved,
             * but the proc wasn't bound
             */
            if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)&name, OPAL_DB_INTERNAL, ORTE_DB_CPUSET, NULL, OPAL_STRING))) {
                ORTE_ERROR_LOG(rc);
                opal_argv_free(fields);
                return rc;
            }
            bound = false;
        }

        /* compute and store the locality as it isn't something that gets pushed to PMI */
        if (0 != strcmp(fields[1], orte_process_info.nodename)) {
            /* this is on a different node, then mark as non-local */
            locality = OPAL_PROC_NON_LOCAL;
        } else if (!bound) {
            /* if we share a node, but we don't know anything more, then
             * mark us as on the node as this is all we know
             */
            locality = OPAL_PROC_ON_NODE;
        } else {
            /* determine relative location on our node */
            locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
                                                             orte_process_info.cpuset,
                                                             fields[4]);
        }
        if (ORTE_SUCCESS != (rc = opal_db.store((opal_identifier_t*)&name, OPAL_DB_INTERNAL, ORTE_DB_LOCALITY, &locality, OPAL_HWLOC_LOCALITY_T))) {
            ORTE_ERROR_LOG(rc);
            opal_argv_free(fields);
            return rc;
        }

        /* cleanup */
        opal_argv_free(fields);
        fields = NULL;
    }

    /* execute the callback */
    coll->active = false;
    if (NULL != coll->cbfunc) {
        coll->cbfunc(NULL, coll->cbdata);
    }
    return rc;
}
Example #24
0
int orte_sstore_central_global_sync(orte_sstore_base_handle_t handle)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_sstore_central_global_snapshot_info_t *handle_info = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(global): sync()"));

    /*
     * Lookup the handle
     */
    handle_info = find_handle_info(handle);
    if( SSTORE_GLOBAL_SYNCING != handle_info->state ) {
        handle_info->state = SSTORE_GLOBAL_SYNCING;
        if( ORTE_SNAPC_LOCAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_LOCAL_COORD_TYPE) ) {
            return orte_sstore_central_local_sync(handle);
        }
    }

    /*
     * Synchronize all of the files
     */
    while(handle_info->num_procs_synced < handle_info->num_procs_total) {
        opal_progress();
    }

    /*
     * Finalize and close the metadata
     */
    if( ORTE_SUCCESS != (ret = metadata_write_timestamp(handle_info)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    if( handle_info->migrating ) {
        if( ORTE_SUCCESS != (ret = metadata_write_int(handle_info,
                                                      SSTORE_METADATA_INTERNAL_DONE_MIG_SEQ_STR,
                                                      handle_info->seq_num)) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    } else {
        if( ORTE_SUCCESS != (ret = metadata_write_int(handle_info,
                                                      SSTORE_METADATA_INTERNAL_DONE_SEQ_STR,
                                                      handle_info->seq_num)) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    }

    if( ORTE_SUCCESS != (ret = metadata_close(handle_info)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /* JJH: We should lock this var! */
    if( !handle_info->migrating ) {
        orte_sstore_base_is_checkpoint_available = true;
        orte_sstore_handle_last_stable = orte_sstore_handle_current;
    }

 cleanup:
    return exit_status;
}
/**
 * segment_attach can only be called after a successful call to segment_create
 */
static void *
segment_attach(opal_shmem_ds_t *ds_buf)
{
    pid_t my_pid = getpid();

    if (my_pid != ds_buf->seg_cpid) {
        if (-1 == (ds_buf->seg_id = open(ds_buf->seg_name, O_CREAT | O_RDWR,
                                         0600))) {
            int err = errno;
            char hn[MAXHOSTNAMELEN];
            gethostname(hn, MAXHOSTNAMELEN - 1);
            hn[MAXHOSTNAMELEN - 1] = '\0';
            opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
                           "open(2)", "", strerror(err), err);
            return NULL;
        }
        if (MAP_FAILED == (ds_buf->seg_base_addr = (unsigned char *)
                              mmap(NULL, ds_buf->seg_size,
                                   PROT_READ | PROT_WRITE, MAP_SHARED,
                                   ds_buf->seg_id, 0))) {
            int err = errno;
            char hn[MAXHOSTNAMELEN];
            gethostname(hn, MAXHOSTNAMELEN - 1);
            hn[MAXHOSTNAMELEN - 1] = '\0';
            opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1, hn,
                           "mmap(2)", "", strerror(err), err);
            /* mmap failed, so close the file and return NULL - no error check
             * here because we are already in an error path...
             */
            close(ds_buf->seg_id);
            return NULL;
        }
        /* all is well */
        /* if close fails here, that's okay.  just let the user know and
         * continue.  if we got this far, open and mmap were successful...
         */
        if (0 != close(ds_buf->seg_id)) {
            int err = errno;
            char hn[MAXHOSTNAMELEN];
            gethostname(hn, MAXHOSTNAMELEN - 1);
            hn[MAXHOSTNAMELEN - 1] = '\0';
            opal_show_help("help-opal-shmem-mmap.txt", "sys call fail", 1,
                           hn, "close(2)", "", strerror(err), err);
        }
    }
    /* else i was the segment creator.  nothing to do here because all the hard
     * work was done in segment_create :-).
     */

    OPAL_OUTPUT_VERBOSE(
        (70, opal_shmem_base_framework.framework_output,
         "%s: %s: attach successful "
         "(id: %d, size: %lu, name: %s)\n",
         mca_shmem_mmap_component.super.base_version.mca_type_name,
         mca_shmem_mmap_component.super.base_version.mca_component_name,
         ds_buf->seg_id, (unsigned long)ds_buf->seg_size, ds_buf->seg_name)
    );

    /* update returned base pointer with an offset that hides our stuff */
    return (ds_buf->seg_base_addr + sizeof(opal_shmem_seg_hdr_t));
}
Example #26
0
static void sstore_central_global_recv(int status,
                                       orte_process_name_t* sender,
                                       opal_buffer_t* buffer,
                                       orte_rml_tag_t tag,
                                       void* cbdata)
{
    int ret;
    orte_sstore_central_cmd_flag_t command;
    orte_std_cntr_t count;
    orte_sstore_base_handle_t loc_id;
    orte_sstore_central_global_snapshot_info_t *handle_info = NULL;

    if( ORTE_RML_TAG_SSTORE_INTERNAL != tag ) {
        return;
    }

    /*
     * If this was an application process contacting us, then act like an orted
     * instead of an HNP
     */
    if(OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_JOBID,
                                                   ORTE_PROC_MY_NAME,
                                                   sender)) {
        orte_sstore_central_local_recv(status, sender, buffer, tag, cbdata);
        return;
    }


    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(global): process_cmd(%s)",
                         ORTE_NAME_PRINT(sender)));

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &command, &count, ORTE_SSTORE_CENTRAL_CMD))) {
        ORTE_ERROR_LOG(ret);
        goto cleanup;
    }

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &loc_id, &count, ORTE_SSTORE_HANDLE )) ) {
        ORTE_ERROR_LOG(ret);
        goto cleanup;
    }

    /*
     * Find the referenced handle
     */
    if(NULL == (handle_info = find_handle_info(loc_id)) ) {
        ; /* JJH big problem */
    }

    /*
     * Process the command
     */
    if( ORTE_SSTORE_CENTRAL_PULL == command ) {
        process_local_pull(sender, buffer, handle_info);
    }
    else if( ORTE_SSTORE_CENTRAL_PUSH == command ) {
        process_local_push(sender, buffer, handle_info);
    }

 cleanup:
    return;
}
Example #27
0
int
ompi_osc_portals4_fetch_and_op(void *origin_addr,
                               void *result_addr,
                               struct ompi_datatype_t *dt,
                               int target,
                               OPAL_PTRDIFF_TYPE target_disp,
                               struct ompi_op_t *op,
                               struct ompi_win_t *win)
{
    int ret;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length;
    size_t offset;
    ptl_op_t ptl_op;
    ptl_datatype_t ptl_dt;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %d, %s, 0x%lx",
                         (unsigned long) origin_addr, 
                         (unsigned long) result_addr,
                         dt->name, target, (int) target_disp,
                         op->o_name,
                         (unsigned long) win));

    ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
    if (OMPI_SUCCESS != ret) return ret;

    offset = get_displacement(module, target) * target_disp;

    ret = ompi_datatype_type_size(dt, &length);
    if (OMPI_SUCCESS != ret) return ret;

    assert(length < module->fetch_atomic_max);

    (void)opal_atomic_add_64(&module->opcount, 1);

    if (MPI_REPLACE == op) {
        ptl_handle_md_t result_md_h, origin_md_h;
        void *result_md_base, *origin_md_base;
        ptl_size_t result_md_offset, origin_md_offset;

        ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
        result_md_offset = ((char*) result_addr - (char*) result_md_base);
        ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
        origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);

        ret = PtlSwap(result_md_h,
                      result_md_offset,
                      origin_md_h,
                      origin_md_offset,
                      length,
                      peer,
                      module->pt_idx,
                      module->match_bits,
                      offset,
                      NULL,
                      0,
                      NULL,
                      PTL_SWAP,
                      ptl_dt);
    } else if (MPI_NO_OP == op) {
        ptl_handle_md_t md_h;
        void *md_base;
        ptl_size_t md_offset;

        ompi_osc_portals4_get_md(result_addr, module->md_h, &md_h, &md_base);
        md_offset = ((char*) result_addr - (char*) md_base);

        ret = PtlGet(md_h,
                     md_offset,
                     length,
                     peer,
                     module->pt_idx,
                     module->match_bits,
                     offset,
                     NULL);
    } else {
        ptl_handle_md_t result_md_h, origin_md_h;
        void *result_md_base, *origin_md_base;
        ptl_size_t result_md_offset, origin_md_offset; 

        ret = ompi_osc_portals4_get_op(op, &ptl_op);
        if (OMPI_SUCCESS != ret) return ret;

        ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base);
        result_md_offset = ((char*) result_addr - (char*) result_md_base);
        ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base);
        origin_md_offset = ((char*) origin_addr - (char*) origin_md_base);

        ret = PtlFetchAtomic(result_md_h,
                             result_md_offset,
                             origin_md_h,
                             origin_md_offset,
                             length,
                             peer,
                             module->pt_idx,
                             module->match_bits,
                             offset,
                             NULL,
                             0,
                             ptl_op,
                             ptl_dt);
    }
    if (OMPI_SUCCESS != ret) {
        return ret;
    }

    return OMPI_SUCCESS;
}
/* process incoming coll returns */
static void app_recv(int status, orte_process_name_t* sender,
                     opal_buffer_t* buffer, orte_rml_tag_t tag,
                     void* cbdata)
{
    orte_grpcomm_collective_t *coll, *cptr;
    opal_list_item_t *item;
    int n, rc;
    orte_grpcomm_coll_id_t id;
    orte_namelist_t *nm;

    /* get the collective id */
    n = 1;
    if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &id, &n, ORTE_GRPCOMM_COLL_ID_T))) {
        ORTE_ERROR_LOG(rc);
        return;
    }
    
    OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                         "%s grpcomm:base:receive processing collective return for id %d recvd from %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), id, ORTE_NAME_PRINT(sender)));

    /* if the sender is my daemon, then this collective is
     * a global one and is complete
     */
    if (ORTE_PROC_MY_DAEMON->jobid == sender->jobid &&
        ORTE_PROC_MY_DAEMON->vpid == sender->vpid) {
        /* search my list of active collectives */
        for (item = opal_list_get_first(&orte_grpcomm_base.active_colls);
             item != opal_list_get_end(&orte_grpcomm_base.active_colls);
             item = opal_list_get_next(item)) {
            coll = (orte_grpcomm_collective_t*)item;
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                 "%s CHECKING COLL id %d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 coll->id));
            
            if (id == coll->id) {
                /* see if the collective needs another step */
                if (NULL != coll->next_cb) {
                    /* have to go here next */
                    coll->next_cb(buffer, coll->next_cbdata);
                    break;
                }
                /* flag the collective as complete */
                coll->active = false;
                /* cleanup */
                opal_list_remove_item(&orte_grpcomm_base.active_colls, item);
                /* callback the specified function */
                if (NULL != coll->cbfunc) {
                    OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                         "%s grpcomm:base:receive executing callback",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                    
                    coll->cbfunc(buffer, coll->cbdata);
                }
                /* do NOT release the collective - it is the responsibility
                 * of whomever passed it down to us
                 */
                break;
            }
        }
        return;
    }

    /* this came from another application process, so it
     * belongs to a non-global collective taking place
     * only between procs. Since there is a race condition
     * between when we might create our own collective and
     * when someone might send it to us, we may not have
     * the collective on our list - see if we do
     */
    coll = NULL;
    for (item = opal_list_get_first(&orte_grpcomm_base.active_colls);
         item != opal_list_get_end(&orte_grpcomm_base.active_colls);
         item = opal_list_get_next(item)) {
        cptr = (orte_grpcomm_collective_t*)item;
        OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                             "%s CHECKING COLL id %d",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                             cptr->id));
        
        if (id == cptr->id) {
            /* aha - we do have it */
            coll = cptr;
            break;
        }
    }
    if (NULL == coll) {
        /* nope - add it */
        coll = OBJ_NEW(orte_grpcomm_collective_t);
        coll->id = id;
        opal_list_append(&orte_grpcomm_base.active_colls, &coll->super);
   }
    /* append the sender to the list of targets so
     * we know we already have their contribution
     */
    nm = OBJ_NEW(orte_namelist_t);
    nm->name.jobid = sender->jobid;
    nm->name.vpid = sender->vpid;
    opal_list_append(&coll->targets, &nm->super);

    /* transfer the rest of the incoming data to the collection bucket.
     * Note that we don't transfer it to the collective's buffer
     * as the modex itself uses that
     */
    opal_dss.copy_payload(&coll->local_bucket, buffer);

    /* if the length of the participant list equals the
     * length of the target list, then the collective is
     * complete
     */
    if (opal_list_get_size(&coll->participants) ==  opal_list_get_size(&coll->targets)) {
        /* replace whatever is in the collective's buffer
         * field with what we collected
         */
        OBJ_DESTRUCT(&coll->buffer);
        OBJ_CONSTRUCT(&coll->buffer, opal_buffer_t);
        opal_dss.copy_payload(&coll->buffer, &coll->local_bucket);
        /* see if the collective needs another step */
        if (NULL != coll->next_cb) {
            /* have to go here next */
            coll->next_cb(&coll->buffer, coll->next_cbdata);
            return;
        }
        /* flag the collective as complete */
        coll->active = false;
        /* cleanup */
        opal_list_remove_item(&orte_grpcomm_base.active_colls, item);
        /* callback the specified function */
        if (NULL != coll->cbfunc) {
            OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
                                 "%s grpcomm:base:receive executing callback",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            
            coll->cbfunc(&coll->buffer, coll->cbdata);
        }
        /* do NOT release the collective - it is the responsibility
         * of whomever passed it down to us
         */
    }
}
Example #29
0
int
ompi_osc_portals4_raccumulate(void *origin_addr,
                              int origin_count,
                              struct ompi_datatype_t *origin_dt,
                              int target,
                              OPAL_PTRDIFF_TYPE target_disp,
                              int target_count,
                              struct ompi_datatype_t *target_dt,
                              struct ompi_op_t *op,
                              struct ompi_win_t *win,
                              struct ompi_request_t **ompi_req)
{
    int ret;
    ompi_osc_portals4_request_t *request;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length, sent;
    size_t offset;
    ptl_op_t ptl_op;
    ptl_datatype_t ptl_dt;
    ptl_handle_md_t md_h;
    void *md_base;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "raccumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         op->o_name,
                         (unsigned long) win));

    OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
    if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
    *ompi_req = &request->super;

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Raccumulate: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        ptl_size_t md_offset;

        ret = ompi_datatype_type_size(origin_dt, &length);
        if (OMPI_SUCCESS != ret) {
            OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
            return ret;
        }
        length *= origin_count;
        sent = 0;

        ompi_osc_portals4_get_md(origin_addr, module->req_md_h, &md_h, &md_base);
        md_offset = ((char*) origin_addr - (char*) md_base);

        do {
            size_t msg_length = MIN(module->atomic_max, length - sent);
            (void)opal_atomic_add_64(&module->opcount, 1);
            request->ops_expected++;

            if (MPI_REPLACE == op) {
                ret = PtlPut(md_h,
                             md_offset + sent,
                             msg_length,
                             PTL_ACK_REQ,
                             peer,
                             module->pt_idx,
                             module->match_bits,
                             offset + sent,
                             request,
                             0);
            } else {
                ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
                if (OMPI_SUCCESS != ret) return ret;

                ret = ompi_osc_portals4_get_op(op, &ptl_op);
                if (OMPI_SUCCESS != ret) return ret;

                ret = PtlAtomic(md_h,
                                offset + sent,
                                msg_length,
                                PTL_ACK_REQ,
                                peer,
                                module->pt_idx,
                                module->match_bits,
                                offset + sent,
                                request,
                                0,
                                ptl_op,
                                ptl_dt);
            }
            if (OMPI_SUCCESS != ret) {
                OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
                return ret;
            }
            sent += msg_length;
        } while (sent < length);
    }

    return OMPI_SUCCESS;
}
Example #30
0
static int hostfile_parse_line(int token, opal_list_t* updates, opal_list_t* exclude, bool keep_all)
{
    int rc;
    orte_node_t* node;
    bool got_count = false;
    bool got_max = false;
    char* value;
    char** argv;
    char* node_name = NULL;
    char* node_alias = NULL;
    char* username = NULL;
    int cnt;
    int number_of_slots = 0;
    char buff[64];

    if (ORTE_HOSTFILE_STRING == token ||
        ORTE_HOSTFILE_HOSTNAME == token ||
        ORTE_HOSTFILE_INT == token ||
        ORTE_HOSTFILE_IPV4 == token ||
        ORTE_HOSTFILE_IPV6 == token) {

        if(ORTE_HOSTFILE_INT == token) {
            snprintf(buff, 64, "%d", orte_util_hostfile_value.ival);
            value = buff;
        } else {
            value = orte_util_hostfile_value.sval;
        }
        argv = opal_argv_split (value, '@');
        
        cnt = opal_argv_count (argv);
        if (1 == cnt) {
            node_name = strdup(argv[0]);
        } else if (2 == cnt) {
            username = strdup(argv[0]);
            node_name = strdup(argv[1]);
        } else {
            opal_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */
        }
        opal_argv_free (argv);

        /* if the first letter of the name is '^', then this is a node
         * to be excluded. Remove the ^ character so the nodename is
         * usable, and put it on the exclude list
         */
        if ('^' == node_name[0]) {
            int i, len;
            len = strlen(node_name);
            for (i=1; i < len; i++) {
                node_name[i-1] = node_name[i];
            }
            node_name[len-1] = '\0';  /* truncate */
            
            OPAL_OUTPUT_VERBOSE((3, orte_debug_output,
                                 "%s hostfile: node %s is being excluded",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name));
            
            /* convert this into something globally unique */
            if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) {
                /* Nodename has been allocated, that is for sure */
                if (orte_show_resolved_nodenames &&
                    0 != strcmp(node_name, orte_process_info.nodename)) {
                    node_alias = strdup(node_name);
                }
                free (node_name);
                node_name = strdup(orte_process_info.nodename);
            }
            
            /* Do we need to make a new node object?  First check to see
             if it's already in the exclude list */
            if (NULL == (node = hostfile_lookup(exclude, node_name))) {
                node = OBJ_NEW(orte_node_t);
                node->name = node_name;
                if (NULL != username) {
                    node->username = strdup(username);
                }
            }
            /* Note that we need to add this back to the exclude list.
             If it was found, we just removed it (in hostfile_lookup()),
             so this puts it back. If it was not found, then we have to
             add it to the exclude list anyway. */
            opal_list_append(exclude, &node->super);
            return ORTE_SUCCESS;
        }
        
        /* this is not a node to be excluded, so we need to process it and
         * add it to the "include" list. See if this host is actually us.
         */
        if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) {
            /* Nodename has been allocated, that is for sure */
            if (orte_show_resolved_nodenames &&
                0 != strcmp(node_name, orte_process_info.nodename)) {
                node_alias = strdup(node_name);
            }
            free (node_name);
            node_name = strdup(orte_process_info.nodename);
        }

        OPAL_OUTPUT_VERBOSE((3, orte_debug_output,
                             "%s hostfile: node %s is being included - keep all is %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name,
                             keep_all ? "TRUE" : "FALSE"));

        /* Do we need to make a new node object?  First check to see
         * if we are keeping everything or if it's already in the updates
         * list. Because we check keep_all first, if that is set we will
         * not do the hostfile_lookup call, and thus won't remove the
         * pre-existing node from the updates list
         */
        if (keep_all || NULL == (node = hostfile_lookup(updates, node_name))) {
            node = OBJ_NEW(orte_node_t);
            node->name = node_name;
            if (NULL != username) {
                node->username = strdup(username);
            }
        }
        /* do we need to record an alias for this node? */
        if (NULL != node_alias) {
            /* add to list of aliases for this node - only add if unique */
            opal_argv_append_unique_nosize(&node->alias, node_alias, false);
            free(node_alias);
        }
    } else if (ORTE_HOSTFILE_RELATIVE == token) {
        /* store this for later processing */
        node = OBJ_NEW(orte_node_t);
        node->name = strdup(orte_util_hostfile_value.sval);
    } else if (ORTE_HOSTFILE_RANK == token) {
        /* we can ignore the rank, but we need to extract the node name. we
         * first need to shift over to the other side of the equal sign as
         * this is where the node name will be
         */
        while (!orte_util_hostfile_done &&
               ORTE_HOSTFILE_EQUAL != token) {
            token = orte_util_hostfile_lex();
        }
        if (orte_util_hostfile_done) {
            /* bad syntax somewhere */
            return ORTE_ERROR;
        }
        /* next position should be the node name */
        token = orte_util_hostfile_lex();
        if(ORTE_HOSTFILE_INT == token) {
            snprintf(buff, 64, "%d", orte_util_hostfile_value.ival);
            value = buff;
        } else {
            value = orte_util_hostfile_value.sval;
        }
        
        argv = opal_argv_split (value, '@');
        
        cnt = opal_argv_count (argv);
        if (1 == cnt) {
            node_name = strdup(argv[0]);
        } else if (2 == cnt) {
            username = strdup(argv[0]);
            node_name = strdup(argv[1]);
        } else {
            opal_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */
        }
        opal_argv_free (argv);
        /* Do we need to make a new node object?  First check to see
         * if we are keeping everything or if it's already in the updates
         * list. Because we check keep_all first, if that is set we will
         * not do the hostfile_lookup call, and thus won't remove the
         * pre-existing node from the updates list
         */
        if (keep_all || NULL == (node = hostfile_lookup(updates, node_name))) {
            node = OBJ_NEW(orte_node_t);
            node->name = node_name;
            if (NULL != username) {
                node->username = strdup(username);
            }
        }
        /* add a slot */
        node->slots++;
        /* do we need to record an alias for this node? */
        if (NULL != node_alias) {
            /* add to list of aliases for this node - only add if unique */
            opal_argv_append_unique_nosize(&node->alias, node_alias, false);
            free(node_alias);
        }
        /* skip to end of line */
        while (!orte_util_hostfile_done &&
               ORTE_HOSTFILE_NEWLINE != token) {
            token = orte_util_hostfile_lex();
        }
        opal_list_append(updates, &node->super);
        return ORTE_SUCCESS;
    } else {
        hostfile_parse_error(token);
        return ORTE_ERROR;
    }
    
    got_count = false;
    while (!orte_util_hostfile_done) {
        token = orte_util_hostfile_lex();
        
        switch (token) {            
        case ORTE_HOSTFILE_DONE:
            goto done;

        case ORTE_HOSTFILE_NEWLINE:
            goto done;

        case ORTE_HOSTFILE_USERNAME:
            node->username = hostfile_parse_string();
            break;

        case ORTE_HOSTFILE_BOARDS:
            rc = hostfile_parse_int();
            if (rc < 0) {
                orte_show_help("help-hostfile.txt", "boards",
                               true,
                               cur_hostfile_name, rc);
                OBJ_RELEASE(node);
                return ORTE_ERROR;
            }
            node->boards = rc;
            break;

        case ORTE_HOSTFILE_SOCKETS_PER_BOARD:
            rc = hostfile_parse_int();
            if (rc < 0) {
                orte_show_help("help-hostfile.txt", "sockets",
                               true,
                               cur_hostfile_name, rc);
                OBJ_RELEASE(node);
                return ORTE_ERROR;
            }
            node->sockets_per_board = rc;
            break;

        case ORTE_HOSTFILE_CORES_PER_SOCKET:
            rc = hostfile_parse_int();
            if (rc < 0) {
                orte_show_help("help-hostfile.txt", "cores",
                               true,
                               cur_hostfile_name, rc);
                OBJ_RELEASE(node);
                return ORTE_ERROR;
            }
            node->cores_per_socket = rc;
            break;

        case ORTE_HOSTFILE_CPU_SET:
            if (NULL != node->cpu_set) {
                free(node->cpu_set);
            }
            node->cpu_set = hostfile_parse_string();
            break;
                
        case ORTE_HOSTFILE_COUNT:
        case ORTE_HOSTFILE_CPU:
        case ORTE_HOSTFILE_SLOTS:
            rc = hostfile_parse_int();
            if (rc < 0) {
                orte_show_help("help-hostfile.txt", "slots",
                               true,
                               cur_hostfile_name, rc);
                OBJ_RELEASE(node);
                return ORTE_ERROR;
            }
            node->slots += rc;
            got_count = true;

            /* Ensure that slots_max >= slots */
            if (node->slots_max != 0 && node->slots_max < node->slots) {
                node->slots_max = node->slots;
            }
            break;

        case ORTE_HOSTFILE_SLOTS_MAX:
            rc = hostfile_parse_int();
            if (rc < 0) {
                orte_show_help("help-hostfile.txt", "max_slots",
                               true,
                               cur_hostfile_name, ((size_t) rc));
                OBJ_RELEASE(node);
                return ORTE_ERROR;
            }
            /* Only take this update if it puts us >= node_slots */
            if (rc >= node->slots) {
                if (node->slots_max != rc) {
                    node->slots_max = rc;
                    got_max = true;
                }
            } else {
                orte_show_help("help-hostfile.txt", "max_slots_lt",
                               true,
                               cur_hostfile_name, node->slots, rc);
                ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
                OBJ_RELEASE(node);
                return ORTE_ERROR;
            }
            break;

        default:
            hostfile_parse_error(token);
            OBJ_RELEASE(node);
            return ORTE_ERROR;
        }
        if (number_of_slots > node->slots) {
            ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
            OBJ_RELEASE(node);
            return ORTE_ERROR;
        }
    }

done:
    if (!got_count) {
        if (got_max) {
            node->slots = node->slots_max;
        } else {
            ++node->slots;
        }
    }
    opal_list_append(updates, &node->super);

    return ORTE_SUCCESS;
}