Exemplo n.º 1
0
int mca_btl_self_add_procs( struct mca_btl_base_module_t* btl, 
                            size_t nprocs, 
                            struct opal_proc_t **procs, 
                            struct mca_btl_base_endpoint_t **peers,
                            opal_bitmap_t* reachability )
{
    int i;

    for( i = 0; i < (int)nprocs; i++ ) {
        if( 0 == opal_compare_proc(procs[i]->proc_name, OPAL_PROC_MY_NAME) ) {
            opal_bitmap_set_bit( reachability, i );
            break;  /* there will always be only one ... */
        }
    }
    return OPAL_SUCCESS;
}
Exemplo n.º 2
0
/**
 * PML->BTL notification of change in the process list.
 * PML->BTL Notification that a receive fragment has been matched.
 * Called for message that is send from process with the virtual
 * address of the shared memory segment being different than that of
 * the receiver.
 *
 * @param btl (IN)
 * @param proc (IN)
 * @param peer (OUT)
 * @return     OPAL_SUCCESS or error status on failure.
 *
 */
static int mca_btl_self_add_procs (struct mca_btl_base_module_t *btl, size_t nprocs,
                                   struct opal_proc_t **procs,
                                   struct mca_btl_base_endpoint_t **peers,
                                   opal_bitmap_t* reachability)
{
    for (int i = 0; i < (int)nprocs; i++ ) {
        if( 0 == opal_compare_proc(procs[i]->proc_name, OPAL_PROC_MY_NAME) ) {
            opal_bitmap_set_bit( reachability, i );
            /* need to return something to keep the bml from ignoring us */
            peers[i] = (struct mca_btl_base_endpoint_t *) 1;
            break;  /* there will always be only one ... */
        }
    }

    return OPAL_SUCCESS;
}
Exemplo n.º 3
0
/**
 * This PML monitors only the processes in the MPI_COMM_WORLD. As OMPI is now lazily
 * adding peers on the first call to add_procs we need to check how many processes
 * are in the MPI_COMM_WORLD to create the storage with the right size.
 */
int mca_pml_monitoring_add_procs(struct ompi_proc_t **procs,
                                 size_t nprocs)
{
    opal_process_name_t tmp, wp_name;
    size_t i, peer_rank, nprocs_world;
    uint64_t key;

    if(NULL == translation_ht) {
        translation_ht = OBJ_NEW(opal_hash_table_t);
        opal_hash_table_init(translation_ht, 2048);
        /* get my rank in the MPI_COMM_WORLD */
        my_rank = ompi_comm_rank((ompi_communicator_t*)&ompi_mpi_comm_world);
    }

    nprocs_world = ompi_comm_size((ompi_communicator_t*)&ompi_mpi_comm_world);
    /* For all procs in the same MPI_COMM_WORLD we need to add them to the hash table */
    for( i = 0; i < nprocs; i++ ) {

        /* Extract the peer procname from the procs array */
        if( ompi_proc_is_sentinel(procs[i]) ) {
            tmp = ompi_proc_sentinel_to_name((uintptr_t)procs[i]);
        } else {
            tmp = procs[i]->super.proc_name;
        }
        if( tmp.jobid != ompi_proc_local_proc->super.proc_name.jobid )
            continue;

        for( peer_rank = 0; peer_rank < nprocs_world; peer_rank++ ) {
            wp_name = ompi_group_get_proc_name(((ompi_communicator_t*)&ompi_mpi_comm_world)->c_remote_group, peer_rank);
            if( 0 != opal_compare_proc( tmp, wp_name) )
                continue;

            /* Find the rank of the peer in MPI_COMM_WORLD */
            key = *((uint64_t*)&tmp);
            /* store the rank (in COMM_WORLD) of the process
               with its name (a uniq opal ID) as key in the hash table*/
            if( OPAL_SUCCESS != opal_hash_table_set_value_uint64(translation_ht,
                                                                 key, (void*)(uintptr_t)peer_rank) ) {
                return OMPI_ERR_OUT_OF_RESOURCE;  /* failed to allocate memory or growing the hash table */
            }
            break;
        }
    }
    return pml_selected_module.pml_add_procs(procs, nprocs);
}
Exemplo n.º 4
0
/*
 *  Receive the peers globally unique process identification from a newly
 *  connected socket and verify the expected response. If so, move the
 *  socket to a connected state.
 */
static int usock_recv_connect_ack(void)
{
    char *msg;
    char *version;
    int rc;
    char *cred;
    size_t credsize;
    pmix_usock_hdr_t hdr;

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s RECV CONNECT ACK FROM SERVER ON SOCKET %d",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                        mca_pmix_native_component.sd);

    /* ensure all is zero'd */
    memset(&hdr, 0, sizeof(pmix_usock_hdr_t));

    if (usock_recv_blocking((char*)&hdr, sizeof(pmix_usock_hdr_t))) {
        /* If the state is CONNECT_ACK, then we were waiting for
         * the connection to be ack'd
         */
        if (mca_pmix_native_component.state != PMIX_USOCK_CONNECT_ACK) {
            /* handshake broke down - abort this connection */
            opal_output(0, "%s RECV CONNECT BAD HANDSHAKE FROM SERVER ON SOCKET %d",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                        mca_pmix_native_component.sd);
            mca_pmix_native_component.state = PMIX_USOCK_FAILED;
            CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
            return OPAL_ERR_UNREACH;
        }
    } else {
        /* unable to complete the recv */
        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                            "%s unable to complete recv of connect-ack from server ON SOCKET %d",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                            mca_pmix_native_component.sd);
        return OPAL_ERR_UNREACH;
    }

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s connect-ack recvd from server",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    /* compare the servers name to the expected value */
    if (0 != opal_compare_proc(hdr.id, mca_pmix_native_component.server)) {
        opal_output(0, "usock_peer_recv_connect_ack: "
                    "%s received unexpected process identifier (%s) from server: expected (%s)",
                    OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                    OPAL_NAME_PRINT(hdr.id),
                    OPAL_NAME_PRINT(mca_pmix_native_component.server));
        mca_pmix_native_component.state = PMIX_USOCK_FAILED;
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
        return OPAL_ERR_UNREACH;
    }

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s connect-ack header from server is okay",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    /* get the authentication and version payload */
    if (NULL == (msg = (char*)malloc(hdr.nbytes))) {
        mca_pmix_native_component.state = PMIX_USOCK_FAILED;
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
        return OPAL_ERR_OUT_OF_RESOURCE;
    }
    if (!usock_recv_blocking(msg, hdr.nbytes)) {
        /* unable to complete the recv */
        opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                            "%s unable to complete recv of connect-ack from server ON SOCKET %d",
                            OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                            mca_pmix_native_component.sd);
        free(msg);
        return OPAL_ERR_UNREACH;
    }

    /* check that this is from a matching version */
    version = (char*)(msg);
    if (0 != strcmp(version, opal_version_string)) {
        opal_output(0, "usock_peer_recv_connect_ack: "
                    "%s received different version from server: %s instead of %s",
                    OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
                    version, opal_version_string);
        mca_pmix_native_component.state = PMIX_USOCK_FAILED;
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
        free(msg);
        return OPAL_ERR_UNREACH;
    }

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s connect-ack version from server matches ours",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    /* check security token */
    cred = (char*)(msg + strlen(version) + 1);
    credsize = hdr.nbytes - strlen(version) - 1;
    if (OPAL_SUCCESS != (rc = opal_sec.authenticate(cred, credsize, NULL))) {
        OPAL_ERROR_LOG(rc);
        mca_pmix_native_component.state = PMIX_USOCK_FAILED;
        CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
        free(msg);
        return OPAL_ERR_UNREACH;
    }
    free(msg);

    opal_output_verbose(2, opal_pmix_base_framework.framework_output,
                        "%s connect-ack from server authenticated",
                        OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));

    /* connected */
    mca_pmix_native_component.state = PMIX_USOCK_CONNECTED;
    /* initiate send of first message on queue */
    if (NULL == mca_pmix_native_component.send_msg) {
        mca_pmix_native_component.send_msg = (pmix_usock_send_t*)
            opal_list_remove_first(&mca_pmix_native_component.send_queue);
    }
    if (NULL != mca_pmix_native_component.send_msg && !mca_pmix_native_component.send_ev_active) {
        opal_event_add(&mca_pmix_native_component.send_event, 0);
        mca_pmix_native_component.send_ev_active = true;
    }
    if (2 <= opal_output_get_verbosity(opal_pmix_base_framework.framework_output)) {
        pmix_usock_dump("connected");
    }
    return OPAL_SUCCESS;
}
Exemplo n.º 5
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_scoll_base_module_t *
mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
{
    mca_scoll_base_module_t *module;
    mca_scoll_mpi_module_t *mpi_module;
    int err, i;
    int tag;
    ompi_group_t* parent_group, *new_group;
    ompi_communicator_t* newcomm = NULL;
    *priority = 0;
    mca_scoll_mpi_component_t *cm;
    cm = &mca_scoll_mpi_component;
    int* ranks;
    if (!cm->mpi_enable){
        return NULL;
    }
    if ((osh_group->proc_count < 2) || (osh_group->proc_count < cm->mpi_np)) {
        return NULL;
    }
    /* Create OMPI_Comm object and store ptr to it in group obj*/
    if (NULL == oshmem_group_all) {
        osh_group->ompi_comm = &(ompi_mpi_comm_world.comm);
    } else {
        int my_rank = MPI_UNDEFINED;

        err = ompi_comm_group(&(ompi_mpi_comm_world.comm), &parent_group);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
            return NULL;
        }
        ranks = (int*) malloc(osh_group->proc_count * sizeof(int));
        if (OPAL_UNLIKELY(NULL == ranks)) {
            return NULL;
        }
        tag = 1;

        for (i = 0; i < osh_group->proc_count; i++) {
            ompi_proc_t* ompi_proc;
            for( int j = 0; j < ompi_group_size(parent_group); j++ ) {
                ompi_proc = ompi_group_peer_lookup(parent_group, j);
                if( 0 == opal_compare_proc(ompi_proc->super.proc_name, osh_group->proc_array[i]->super.proc_name)) {
                    ranks[i] = j;
                    break;
                }
            }
            /* NTH: keep track of my rank in the new group for the workaround below */
            if (ranks[i] == ompi_comm_rank (&ompi_mpi_comm_world.comm)) {
                my_rank = i;
            }
        }

        err = ompi_group_incl(parent_group, osh_group->proc_count, ranks, &new_group);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
            free(ranks);
            return NULL;
        }

        /* NTH: XXX -- WORKAROUND -- The oshmem code overwrites ompi_proc_local_proc with its
         * own proc but does not update the proc list in comm world or comm self. This causes
         * the code in ompi_group_incl that updates grp_my_rank to fail. This will cause failures
         * here and when an application attempts to mix oshmem and mpi so it will really need to
         * be fixed in oshmem/proc and not here. For now we need to work around a new jenkins
         * failure so set my group ranking so we do not crash when running ompi_comm_create_group. */
        new_group->grp_my_rank = my_rank;

        err = ompi_comm_create_group(&(ompi_mpi_comm_world.comm), new_group, tag, &newcomm);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
            free(ranks);
            return NULL;
        }
        err = ompi_group_free(&new_group);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
            free(ranks);
            return NULL;
        }

        free(ranks);
        osh_group->ompi_comm = newcomm;
    }
    mpi_module = OBJ_NEW(mca_scoll_mpi_module_t);
    if (!mpi_module){
        return NULL;
    }
    mpi_module->comm = osh_group->ompi_comm;

    mpi_module->super.scoll_module_enable = mca_scoll_mpi_module_enable;
    mpi_module->super.scoll_barrier = mca_scoll_mpi_barrier;
    mpi_module->super.scoll_broadcast = mca_scoll_mpi_broadcast;
    mpi_module->super.scoll_reduce = mca_scoll_mpi_reduce;
    mpi_module->super.scoll_collect = mca_scoll_mpi_collect;

    *priority = cm->mpi_priority;
    module = &mpi_module->super;

    return module;
}
Exemplo n.º 6
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_scoll_base_module_t *
mca_scoll_mpi_comm_query(oshmem_group_t *osh_group, int *priority)
{
    mca_scoll_base_module_t *module;
    mca_scoll_mpi_module_t *mpi_module;
    int err, i;
    int tag;
    ompi_group_t* parent_group, *new_group;
    ompi_communicator_t* newcomm = NULL;
    *priority = 0;
    mca_scoll_mpi_component_t *cm;
    cm = &mca_scoll_mpi_component;
    int* ranks;
    if (!cm->mpi_enable){
        return NULL;
    }
    if ((osh_group->proc_count < 2) || (osh_group->proc_count < cm->mpi_np)) {
        return NULL;
    }
    /* Create OMPI_Comm object and store ptr to it in group obj*/
    if (NULL == oshmem_group_all) {
        osh_group->ompi_comm = &(ompi_mpi_comm_world.comm);
    } else {
        err = ompi_comm_group(&(ompi_mpi_comm_world.comm), &parent_group);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
            return NULL;
        }
        ranks = (int*) malloc(osh_group->proc_count * sizeof(int));
        if (OPAL_UNLIKELY(NULL == ranks)) {
            return NULL;
        }
        tag = 1;

        for (i = 0; i < osh_group->proc_count; i++) {
            ompi_proc_t* ompi_proc;
            for( int j = 0; j < ompi_group_size(parent_group); j++ ) {
                ompi_proc = ompi_group_peer_lookup(parent_group, j);
                if( 0 == opal_compare_proc(ompi_proc->super.proc_name, osh_group->proc_array[i]->super.proc_name)) {
                    ranks[i] = j;
                    break;
                }
            }
        }

        err = ompi_group_incl(parent_group, osh_group->proc_count, ranks, &new_group);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
            free(ranks);
            return NULL;
        }
        err = ompi_comm_create_group(&(ompi_mpi_comm_world.comm), new_group, tag, &newcomm);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
            free(ranks);
            return NULL;
        }
        err = ompi_group_free(&new_group);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != err)) {
            free(ranks);
            return NULL;
        }

        free(ranks);
        osh_group->ompi_comm = newcomm;
    }
    mpi_module = OBJ_NEW(mca_scoll_mpi_module_t);
    if (!mpi_module){
        return NULL;
    }
    mpi_module->comm = osh_group->ompi_comm;

    mpi_module->super.scoll_module_enable = mca_scoll_mpi_module_enable;
    mpi_module->super.scoll_barrier = mca_scoll_mpi_barrier;
    mpi_module->super.scoll_broadcast = mca_scoll_mpi_broadcast;
    mpi_module->super.scoll_reduce = mca_scoll_mpi_reduce;
    mpi_module->super.scoll_collect = mca_scoll_mpi_collect;
    mpi_module->super.scoll_alltoall = NULL;

    *priority = cm->mpi_priority;
    module = &mpi_module->super;

    return module;
}