Exemplo n.º 1
0
static int send_ep_address(void)
{
    mxm_error_t error;
    void *address;
    size_t addrlen;
    int rc;

    addrlen = 0;
    error = mxm_ep_get_address(ompi_pml_yalla.mxm_ep, NULL, &addrlen);
    PML_YALLA_ASSERT(error == MXM_ERR_BUFFER_TOO_SMALL);

    address = alloca(addrlen);
    error = mxm_ep_get_address(ompi_pml_yalla.mxm_ep, address, &addrlen);
    if (MXM_OK != error) {
        PML_YALLA_ERROR("Failed to get EP address");
        return OMPI_ERROR;
    }

    OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL,
                    &mca_pml_yalla_component.pmlm_version, address, addrlen);
    if (OMPI_SUCCESS != rc) {
        PML_YALLA_ERROR("Open MPI couldn't distribute EP connection details");
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;
}
Exemplo n.º 2
0
/**************************
 * usNIC BTL-specific functions to hide differences between master and
 * v1.8
 **************************/
void usnic_compat_modex_send(int *rc,
                             mca_base_component_t *component,
                             opal_btl_usnic_modex_t *modexes,
                             size_t size)
{
    OPAL_MODEX_SEND(*rc, OPAL_PMIX_REMOTE, component,
                    modexes, size);
}
Exemplo n.º 3
0
static int mca_btl_scif_modex_send (void)
{
    mca_btl_scif_modex_t modex;
    int rc;

    memset(&modex, 0, sizeof(mca_btl_scif_modex_t));
    modex.port_id = mca_btl_scif_module.port_id;

    OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_LOCAL,
                    &mca_btl_scif_component.super.btl_version,
                    &modex, sizeof (modex));
    return rc;
}
Exemplo n.º 4
0
/*
 * Send local device information and other information
 * required for setup
 */
static int opal_common_ugni_send_modex (int my_cdm_id)
{
    uint32_t modex_size, total_msg_size, msg_offset;
    struct opal_common_ugni_modex_t modex;
    char *modex_msg;
    int rc, i;

    modex_size = sizeof (struct opal_common_ugni_modex_t);
    total_msg_size = opal_common_ugni_module.device_count * modex_size;

    modex_msg = (char *) malloc (total_msg_size);
    if (NULL == modex_msg) {
        OPAL_OUTPUT((-1, "Error allocating memory for modex @ %s:%d",
                     __FILE__, __LINE__));
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    /* pack modex for all available devices */
    for (i = 0, msg_offset = 0; i < opal_common_ugni_module.device_count ; ++i) {
        opal_common_ugni_device_t *dev = opal_common_ugni_module.devices + i;

        modex.addr = dev->dev_addr;
        modex.id   = my_cdm_id;

        memcpy ((void *)((uintptr_t) modex_msg + msg_offset),
                (void *)&modex, modex_size);

        msg_offset += modex_size;
    }

    /*
     * need global for edge cases like MPI_Comm_spawn support with
     * new ranks started on the same nodes as the spawnee ranks, etc.
     */

    OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
                    &opal_common_ugni_component,
                    modex_msg, total_msg_size);

    free(modex_msg);

    return rc;
}
Exemplo n.º 5
0
/*
 * Send local device information and other information
 * required for setup
 */
static int opal_common_ugni_send_modex (int my_cdm_id)
{
    uint32_t modex_size, total_msg_size, msg_offset;
    struct opal_common_ugni_modex_t modex;
    char *modex_msg;
    int rc, i;

    modex_size = sizeof (struct opal_common_ugni_modex_t);
    total_msg_size = opal_common_ugni_module.device_count * modex_size;

    modex_msg = (char *) malloc (total_msg_size);
    if (NULL == modex_msg) {
        OPAL_OUTPUT((-1, "Error allocating memory for modex @ %s:%d",
                     __FILE__, __LINE__));
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    /* pack modex for all available devices */
    for (i = 0, msg_offset = 0; i < opal_common_ugni_module.device_count ; ++i) {
        opal_common_ugni_device_t *dev = opal_common_ugni_module.devices + i;

        modex.addr = dev->dev_addr;
        modex.id   = my_cdm_id;

        memcpy ((void *)((uintptr_t) modex_msg + msg_offset),
                (void *)&modex, modex_size);

        msg_offset += modex_size;
    }

    OPAL_MODEX_SEND(rc, PMIX_ASYNC_RDY, PMIX_REMOTE,
                    &opal_common_ugni_component,
                    modex_msg, total_msg_size);

    free(modex_msg);

    return rc;
}
Exemplo n.º 6
0
static int mca_pml_ucx_send_worker_address(void)
{
    ucp_address_t *address;
    ucs_status_t status;
    size_t addrlen;
    int rc;

    status = ucp_worker_get_address(ompi_pml_ucx.ucp_worker, &address, &addrlen);
    if (UCS_OK != status) {
        PML_UCX_ERROR("Failed to get worker address");
        return OMPI_ERROR;
    }

    OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
                    &mca_pml_ucx_component.pmlm_version, (void*)address, addrlen);
    if (OMPI_SUCCESS != rc) {
        PML_UCX_ERROR("Open MPI couldn't distribute EP connection details");
        return OMPI_ERROR;
    }

    ucp_worker_release_address(ompi_pml_ucx.ucp_worker, address);

    return OMPI_SUCCESS;
}
Exemplo n.º 7
0
int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
    psm2_error_t err;
    psm2_ep_t	ep; /* endpoint handle */
    psm2_mq_t	mq;
    psm2_epid_t	epid; /* unique lid+port identifier */
    psm2_uuid_t  unique_job_key;
    struct psm2_ep_open_opts ep_opt;
    unsigned long long *uu = (unsigned long long *) unique_job_key;
    char *generated_key;
    char env_string[256];
    int rc;

    generated_key = getenv("OMPI_MCA_orte_precondition_transports");
    memset(uu, 0, sizeof(psm2_uuid_t));

    if (!generated_key || (strlen(generated_key) != 33) ||
        sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2)
    {
      opal_show_help("help-mtl-psm2.txt",
		     "no uuid present", true,
		     generated_key ? "could not be parsed from" :
		     "not present in", ompi_process_info.nodename);
      return OMPI_ERROR;

    }

    /* Handle our own errors for opening endpoints */
    psm2_error_register_handler(ompi_mtl_psm2.ep, ompi_mtl_psm2_errhandler);

    /* Setup MPI_LOCALRANKID and MPI_LOCALNRANKS so PSM2 can allocate hardware
     * contexts correctly.
     */
    snprintf(env_string, sizeof(env_string), "%d", local_rank);
    setenv("MPI_LOCALRANKID", env_string, 0);
    snprintf(env_string, sizeof(env_string), "%d", num_local_procs);
    setenv("MPI_LOCALNRANKS", env_string, 0);

    /* Setup the endpoint options. */
    psm2_ep_open_opts_get_defaults(&ep_opt);
    ep_opt.timeout = ompi_mtl_psm2.connect_timeout * 1e9;
    ep_opt.affinity = PSM2_EP_OPEN_AFFINITY_SKIP; /* do not let PSM2 set affinity */

    /* Open PSM2 endpoint */
    err = psm2_ep_open(unique_job_key, &ep_opt, &ep, &epid);
    if (err) {
      opal_show_help("help-mtl-psm2.txt",
		     "unable to open endpoint", true,
		     psm2_error_get_string(err));
      return OMPI_ERROR;
    }

    /* Future errors are handled by the default error handler */
    psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT);

    err = psm2_mq_init(ep,
		      0xffff000000000000ULL,
		      NULL,
		      0,
		      &mq);
    if (err) {
      opal_show_help("help-mtl-psm2.txt",
		     "psm2 init", true,
		     psm2_error_get_string(err));
      return OMPI_ERROR;
    }

    ompi_mtl_psm2.ep   = ep;
    ompi_mtl_psm2.epid = epid;
    ompi_mtl_psm2.mq   = mq;

    OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
                    &mca_mtl_psm2_component.super.mtl_version,
                    &ompi_mtl_psm2.epid,
                    sizeof(psm2_epid_t));

    if (OMPI_SUCCESS != rc) {
	opal_output(0, "Open MPI couldn't send PSM2 epid to head node process");
	return OMPI_ERROR;
    }


    /* register the psm2 progress function */
    opal_progress_register(ompi_mtl_psm2_progress);

    return OMPI_SUCCESS;
}