Example #1
0
/* ////////////////////////////////////////////////////////////////////////// */
static mca_common_sm_module_t *
attach_and_init(opal_shmem_ds_t *shmem_bufp,
                size_t size,
                size_t size_ctl_structure,
                size_t data_seg_alignment,
                bool first_call)
{
    mca_common_sm_module_t *map = NULL;
    mca_common_sm_seg_header_t *seg = NULL;
    unsigned char *addr = NULL;

    /* attach to the specified segment. note that at this point, the contents of
     * *shmem_bufp have already been initialized via opal_shmem_segment_create.
     */
    if (NULL == (seg = (mca_common_sm_seg_header_t *)
                       opal_shmem_segment_attach(shmem_bufp))) {
        return NULL;
    }
    opal_atomic_rmb();

    if (NULL == (map = OBJ_NEW(mca_common_sm_module_t))) {
        OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
        (void)opal_shmem_segment_detach(shmem_bufp);
        return NULL;
    }

    /* copy meta information into common sm module
     *                                     from ====> to                */
    if (OPAL_SUCCESS != opal_shmem_ds_copy(shmem_bufp, &map->shmem_ds)) {
        (void)opal_shmem_segment_detach(shmem_bufp);
        free(map);
        return NULL;
    }

    /* the first entry in the file is the control structure. the first
     * entry in the control structure is an mca_common_sm_seg_header_t
     * element.
     */
    map->module_seg = seg;

    addr = ((unsigned char *)seg) + size_ctl_structure;
    /* if we have a data segment (i.e., if 0 != data_seg_alignment),
     * then make it the first aligned address after the control
     * structure.  IF THIS HAPPENS, THIS IS A PROGRAMMING ERROR IN
     * OPEN MPI!
     */
    if (0 != data_seg_alignment) {
        addr = OPAL_ALIGN_PTR(addr, data_seg_alignment, unsigned char *);
        /* is addr past end of the shared memory segment? */
        if ((unsigned char *)seg + shmem_bufp->seg_size < addr) {
            opal_show_help("help-mpi-common-sm.txt", "mmap too small", 1,
                           opal_proc_local_get()->proc_hostname,
                           (unsigned long)shmem_bufp->seg_size,
                           (unsigned long)size_ctl_structure,
                           (unsigned long)data_seg_alignment);
            (void)opal_shmem_segment_detach(shmem_bufp);
            free(map);
            return NULL;
        }
    }
Example #2
0
/**
 * this routine assumes that sorted_procs is in the following state:
 *     o all the local procs at the beginning.
 *     o sorted_procs[0] is the lowest named process.
 */
int
mca_common_sm_rml_info_bcast(opal_shmem_ds_t *ds_buf,
                             ompi_proc_t **procs,
                             size_t num_procs,
                             int tag,
                             bool bcast_root,
                             char *msg_id_str,
                             opal_list_t *pending_rml_msgs)
{
    int rc = OMPI_SUCCESS;
    struct iovec iov[MCA_COMMON_SM_RML_MSG_LEN];
    int iovrc;
    size_t p;
    char msg_id_str_to_tx[OPAL_PATH_MAX];

    strncpy(msg_id_str_to_tx, msg_id_str, sizeof(msg_id_str_to_tx) - 1);

    /* let the first item be the queueing id name */
    iov[0].iov_base = (ompi_iov_base_ptr_t)msg_id_str_to_tx;
    iov[0].iov_len = sizeof(msg_id_str_to_tx);
    iov[1].iov_base = (ompi_iov_base_ptr_t)ds_buf;
    iov[1].iov_len = sizeof(opal_shmem_ds_t);

    /* figure out if i am the root proc in the group.
     * if i am, bcast the message the rest of the local procs.
     */
    if (bcast_root) {
        opal_progress_event_users_increment();
        /* first num_procs items should be local procs */
        for (p = 1; p < num_procs; ++p) {
            iovrc = orte_rml.send(&(procs[p]->proc_name), iov,
                                  MCA_COMMON_SM_RML_MSG_LEN, tag, 0);
            if ((ssize_t)(iov[0].iov_len + iov[1].iov_len) > iovrc) {
                ORTE_ERROR_LOG(OMPI_ERR_COMM_FAILURE);
                opal_progress_event_users_decrement();
                rc = OMPI_ERROR;
                goto out;
            }
        }
        opal_progress_event_users_decrement();
    }
    else { /* i am NOT the root ("lowest") proc */
        opal_list_item_t *item;
        mca_common_sm_rml_pending_rml_msg_types_t *rml_msg;
        /* because a component query can be performed simultaneously in multiple
         * threads, the RML messages may arrive in any order.  so first check to
         * see if we previously received a message for me.
         */
        for (item = opal_list_get_first(pending_rml_msgs);
             opal_list_get_end(pending_rml_msgs) != item;
             item = opal_list_get_next(item)) {
            rml_msg = (mca_common_sm_rml_pending_rml_msg_types_t *)item;
            /* was the message for me? */
            if (0 == strcmp(rml_msg->msg_id_str, msg_id_str)) {
                opal_list_remove_item(pending_rml_msgs, item);
                /*                 from ==============> to */
                opal_shmem_ds_copy(&rml_msg->shmem_ds, ds_buf);
                OBJ_RELEASE(item);
                break;
            }
        }
        /* if we didn't find a message already waiting, block on receiving from
         * the RML.
         */
        if (opal_list_get_end(pending_rml_msgs) == item) {
            do {
                /* bump up the libevent polling frequency while we're in this
                 * RML recv, just to ensure we're checking libevent frequently.
                 */
                opal_progress_event_users_increment();
                iovrc = orte_rml.recv(&(procs[0]->proc_name), iov,
                                      MCA_COMMON_SM_RML_MSG_LEN, tag, 0);
                opal_progress_event_users_decrement();
                if (iovrc < 0) {
                    ORTE_ERROR_LOG(OMPI_ERR_RECV_LESS_THAN_POSTED);
                    rc = OMPI_ERROR;
                    goto out;
                }
                /* was the message for me?  if so, we're done */
                if (0 == strcmp(msg_id_str_to_tx, msg_id_str)) {
                    break;
                }
                /* if not, put it on the pending list and try again */
                if (NULL == (rml_msg =
                            OBJ_NEW(mca_common_sm_rml_pending_rml_msg_types_t)))
                {
                    ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE);
                    rc = OMPI_ERROR;
                    goto out;
                }
                /* not for me, so place on list */
                /*                 from ========> to */
                opal_shmem_ds_copy(ds_buf, &rml_msg->shmem_ds);
                memcpy(rml_msg->msg_id_str, msg_id_str_to_tx, OPAL_PATH_MAX);
                opal_list_append(pending_rml_msgs, &(rml_msg->super));
            } while(1);
        }
    }

out:
    return rc;
}