/* ////////////////////////////////////////////////////////////////////////// */ static mca_common_sm_module_t * attach_and_init(opal_shmem_ds_t *shmem_bufp, size_t size, size_t size_ctl_structure, size_t data_seg_alignment, bool first_call) { mca_common_sm_module_t *map = NULL; mca_common_sm_seg_header_t *seg = NULL; unsigned char *addr = NULL; /* attach to the specified segment. note that at this point, the contents of * *shmem_bufp have already been initialized via opal_shmem_segment_create. */ if (NULL == (seg = (mca_common_sm_seg_header_t *) opal_shmem_segment_attach(shmem_bufp))) { return NULL; } opal_atomic_rmb(); if (NULL == (map = OBJ_NEW(mca_common_sm_module_t))) { OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); (void)opal_shmem_segment_detach(shmem_bufp); return NULL; } /* copy meta information into common sm module * from ====> to */ if (OPAL_SUCCESS != opal_shmem_ds_copy(shmem_bufp, &map->shmem_ds)) { (void)opal_shmem_segment_detach(shmem_bufp); free(map); return NULL; } /* the first entry in the file is the control structure. the first * entry in the control structure is an mca_common_sm_seg_header_t * element. */ map->module_seg = seg; addr = ((unsigned char *)seg) + size_ctl_structure; /* if we have a data segment (i.e., if 0 != data_seg_alignment), * then make it the first aligned address after the control * structure. IF THIS HAPPENS, THIS IS A PROGRAMMING ERROR IN * OPEN MPI! */ if (0 != data_seg_alignment) { addr = OPAL_ALIGN_PTR(addr, data_seg_alignment, unsigned char *); /* is addr past end of the shared memory segment? */ if ((unsigned char *)seg + shmem_bufp->seg_size < addr) { opal_show_help("help-mpi-common-sm.txt", "mmap too small", 1, opal_proc_local_get()->proc_hostname, (unsigned long)shmem_bufp->seg_size, (unsigned long)size_ctl_structure, (unsigned long)data_seg_alignment); (void)opal_shmem_segment_detach(shmem_bufp); free(map); return NULL; } }
/** * this routine assumes that sorted_procs is in the following state: * o all the local procs at the beginning. * o sorted_procs[0] is the lowest named process. */ int mca_common_sm_rml_info_bcast(opal_shmem_ds_t *ds_buf, ompi_proc_t **procs, size_t num_procs, int tag, bool bcast_root, char *msg_id_str, opal_list_t *pending_rml_msgs) { int rc = OMPI_SUCCESS; struct iovec iov[MCA_COMMON_SM_RML_MSG_LEN]; int iovrc; size_t p; char msg_id_str_to_tx[OPAL_PATH_MAX]; strncpy(msg_id_str_to_tx, msg_id_str, sizeof(msg_id_str_to_tx) - 1); /* let the first item be the queueing id name */ iov[0].iov_base = (ompi_iov_base_ptr_t)msg_id_str_to_tx; iov[0].iov_len = sizeof(msg_id_str_to_tx); iov[1].iov_base = (ompi_iov_base_ptr_t)ds_buf; iov[1].iov_len = sizeof(opal_shmem_ds_t); /* figure out if i am the root proc in the group. * if i am, bcast the message the rest of the local procs. */ if (bcast_root) { opal_progress_event_users_increment(); /* first num_procs items should be local procs */ for (p = 1; p < num_procs; ++p) { iovrc = orte_rml.send(&(procs[p]->proc_name), iov, MCA_COMMON_SM_RML_MSG_LEN, tag, 0); if ((ssize_t)(iov[0].iov_len + iov[1].iov_len) > iovrc) { ORTE_ERROR_LOG(OMPI_ERR_COMM_FAILURE); opal_progress_event_users_decrement(); rc = OMPI_ERROR; goto out; } } opal_progress_event_users_decrement(); } else { /* i am NOT the root ("lowest") proc */ opal_list_item_t *item; mca_common_sm_rml_pending_rml_msg_types_t *rml_msg; /* because a component query can be performed simultaneously in multiple * threads, the RML messages may arrive in any order. so first check to * see if we previously received a message for me. */ for (item = opal_list_get_first(pending_rml_msgs); opal_list_get_end(pending_rml_msgs) != item; item = opal_list_get_next(item)) { rml_msg = (mca_common_sm_rml_pending_rml_msg_types_t *)item; /* was the message for me? */ if (0 == strcmp(rml_msg->msg_id_str, msg_id_str)) { opal_list_remove_item(pending_rml_msgs, item); /* from ==============> to */ opal_shmem_ds_copy(&rml_msg->shmem_ds, ds_buf); OBJ_RELEASE(item); break; } } /* if we didn't find a message already waiting, block on receiving from * the RML. */ if (opal_list_get_end(pending_rml_msgs) == item) { do { /* bump up the libevent polling frequency while we're in this * RML recv, just to ensure we're checking libevent frequently. */ opal_progress_event_users_increment(); iovrc = orte_rml.recv(&(procs[0]->proc_name), iov, MCA_COMMON_SM_RML_MSG_LEN, tag, 0); opal_progress_event_users_decrement(); if (iovrc < 0) { ORTE_ERROR_LOG(OMPI_ERR_RECV_LESS_THAN_POSTED); rc = OMPI_ERROR; goto out; } /* was the message for me? if so, we're done */ if (0 == strcmp(msg_id_str_to_tx, msg_id_str)) { break; } /* if not, put it on the pending list and try again */ if (NULL == (rml_msg = OBJ_NEW(mca_common_sm_rml_pending_rml_msg_types_t))) { ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE); rc = OMPI_ERROR; goto out; } /* not for me, so place on list */ /* from ========> to */ opal_shmem_ds_copy(ds_buf, &rml_msg->shmem_ds); memcpy(rml_msg->msg_id_str, msg_id_str_to_tx, OPAL_PATH_MAX); opal_list_append(pending_rml_msgs, &(rml_msg->super)); } while(1); } } out: return rc; }