Beispiel #1
0
int ompi_comm_connect_accept ( ompi_communicator_t *comm, int root,
                               orte_process_name_t *port, int send_first,
                               ompi_communicator_t **newcomm, orte_rml_tag_t tag )
{
    int size, rsize, rank, rc;
    orte_std_cntr_t num_vals;
    orte_std_cntr_t rnamebuflen = 0;
    int rnamebuflen_int = 0;
    void *rnamebuf=NULL;

    ompi_communicator_t *newcomp=MPI_COMM_NULL;
    ompi_proc_t **rprocs=NULL;
    ompi_group_t *group=comm->c_local_group;
    orte_process_name_t *rport=NULL, tmp_port_name;
    orte_buffer_t *nbuf=NULL, *nrbuf=NULL;

    size = ompi_comm_size ( comm );
    rank = ompi_comm_rank ( comm );

    /* tell the progress engine to tick the event library more
       often, to make sure that the OOB messages get sent */
    opal_progress_event_increment();

    if ( rank == root ) {
        /* The process receiving first does not have yet the contact
           information of the remote process. Therefore, we have to
           exchange that.
        */
        if ( OMPI_COMM_JOIN_TAG != (int)tag ) {
            rc = ompi_comm_get_rport(port,send_first,
                                     group->grp_proc_pointers[rank], tag,
                                     &tmp_port_name);
            if (OMPI_SUCCESS != rc) return rc;
            rport = &tmp_port_name;
        } else {
            rport = port;
        }

        /* Generate the message buffer containing the number of processes and the list of
           participating processes */
        nbuf = OBJ_NEW(orte_buffer_t);
        if (NULL == nbuf) {
            return OMPI_ERROR;
        }

        if (ORTE_SUCCESS != (rc = orte_dss.pack(nbuf, &size, 1, ORTE_INT))) {
            ORTE_ERROR_LOG(rc);
            goto exit;
        }
        ompi_proc_pack(group->grp_proc_pointers, size, nbuf);

        nrbuf = OBJ_NEW(orte_buffer_t);
        if (NULL == nrbuf ) {
            rc = OMPI_ERROR;
            goto exit;
        }

        /* Exchange the number and the list of processes in the groups */
        if ( send_first ) {
            rc = orte_rml.send_buffer(rport, nbuf, tag, 0);
            rc = orte_rml.recv_buffer(rport, nrbuf, tag);
        } else {
            rc = orte_rml.recv_buffer(rport, nrbuf, tag);
            rc = orte_rml.send_buffer(rport, nbuf, tag, 0);
        }

        if (ORTE_SUCCESS != (rc = orte_dss.unload(nrbuf, &rnamebuf, &rnamebuflen))) {
            ORTE_ERROR_LOG(rc);
            goto exit;
        }
    }

    /* First convert the size_t to an int so we can cast in the bcast to a void *
     * if we don't then we will get badness when using big vs little endian
     * THIS IS NO LONGER REQUIRED AS THE LENGTH IS NOW A STD_CNTR_T, WHICH
     * CORRELATES TO AN INT32
     */
    rnamebuflen_int = (int)rnamebuflen;

    /* bcast the buffer-length to all processes in the local comm */
    rc = comm->c_coll.coll_bcast (&rnamebuflen_int, 1, MPI_INT, root, comm );
    if ( OMPI_SUCCESS != rc ) {
        goto exit;
    }
    rnamebuflen = rnamebuflen_int;

    if ( rank != root ) {
        /* non root processes need to allocate the buffer manually */
        rnamebuf = (char *) malloc(rnamebuflen);
        if ( NULL == rnamebuf ) {
            rc = OMPI_ERR_OUT_OF_RESOURCE;
            goto exit;
        }
    }

    /* bcast list of processes to all procs in local group
       and reconstruct the data. Note that proc_get_proclist
       adds processes, which were not known yet to our
       process pool.
    */
    rc = comm->c_coll.coll_bcast (rnamebuf, rnamebuflen_int, MPI_BYTE, root, comm );
    if ( OMPI_SUCCESS != rc ) {
        goto exit;
    }

    nrbuf = OBJ_NEW(orte_buffer_t);
    if (NULL == nrbuf) {
        goto exit;
    }
    if ( ORTE_SUCCESS != ( rc = orte_dss.load(nrbuf, rnamebuf, rnamebuflen))) {
        ORTE_ERROR_LOG(rc);
        goto exit;
    }

    num_vals = 1;
    if (ORTE_SUCCESS != (rc = orte_dss.unpack(nrbuf, &rsize, &num_vals, ORTE_INT))) {
        ORTE_ERROR_LOG(rc);
        goto exit;
    }

    rc = ompi_proc_unpack(nrbuf, rsize, &rprocs);
    if ( OMPI_SUCCESS != rc ) {
        goto exit;
    }

    OBJ_RELEASE(nrbuf);
    if ( rank == root ) {
        OBJ_RELEASE(nbuf);
    }

    /* allocate comm-structure */
    newcomp = ompi_comm_allocate ( size, rsize );
    if ( NULL == newcomp ) {
        rc = OMPI_ERR_OUT_OF_RESOURCE;
        goto exit;
    }

    /* allocate comm_cid */
    rc = ompi_comm_nextcid ( newcomp,                 /* new communicator */
                             comm,                    /* old communicator */
                             NULL,                    /* bridge comm */
                             &root,                   /* local leader */
                             rport,                   /* remote leader */
                             OMPI_COMM_CID_INTRA_OOB, /* mode */
                             send_first );            /* send or recv first */
    if ( OMPI_SUCCESS != rc ) {
        goto exit;
    }

    /* set up communicator structure */
    rc = ompi_comm_set ( newcomp,                  /* new comm */
                         comm,                     /* old comm */
                         group->grp_proc_count,    /* local_size */
                         group->grp_proc_pointers, /* local_procs*/
                         rsize,                    /* remote_size */
                         rprocs,                   /* remote_procs */
                         NULL,                     /* attrs */
                         comm->error_handler,      /* error handler */
                         NULL                      /* topo component */
                         );


    /* activate comm and init coll-component */
    rc = ompi_comm_activate ( newcomp,                 /* new communicator */
                              comm,                    /* old communicator */
                              NULL,                    /* bridge comm */
                              &root,                   /* local leader */
                              rport,                   /* remote leader */
                              OMPI_COMM_CID_INTRA_OOB, /* mode */
                              send_first,              /* send or recv first */
                              NULL );                  /* coll component */
    if ( OMPI_SUCCESS != rc ) {
        goto exit;
    }

    /* Question: do we have to re-start some low level stuff
       to enable the usage of fast communication devices
       between the two worlds ?
    */


 exit:
    /* done with OOB and such - slow our tick rate again */
    opal_progress();
    opal_progress_event_decrement();


    if ( NULL != rprocs ) {
        free ( rprocs );
    }
    if ( OMPI_SUCCESS != rc ) {
        if ( MPI_COMM_NULL != newcomp ) {
            OBJ_RETAIN(newcomp);
            newcomp = MPI_COMM_NULL;
        }
    }

    *newcomm = newcomp;
    return rc;
}
Beispiel #2
0
int mca_topo_base_cart_create(mca_topo_base_module_t *topo,
                              ompi_communicator_t* old_comm,
                              int ndims,
                              const int *dims,
                              const int *periods,
                              bool reorder,
                              ompi_communicator_t** comm_topo)
{
    int nprocs = 1, i, new_rank, num_procs, ret;
    ompi_communicator_t *new_comm;
    ompi_proc_t **topo_procs = NULL;
    mca_topo_base_comm_cart_2_2_0_t* cart;

    num_procs = old_comm->c_local_group->grp_proc_count;
    new_rank = old_comm->c_local_group->grp_my_rank;
    assert(topo->type == OMPI_COMM_CART);

    /* Calculate the number of processes in this grid */
    for (i = 0; i < ndims; ++i) {
        if(dims[i] <= 0) {
            return OMPI_ERROR;
        }
        nprocs *= dims[i];
    }

    /* check for the error condition */
    if (num_procs < nprocs) {
        return MPI_ERR_DIMS;
    }

    /* check if we have to trim the list of processes */
    if (nprocs < num_procs) {
        num_procs = nprocs;
    }

    if (new_rank > (nprocs-1)) {
        ndims = 0;
        new_rank = MPI_UNDEFINED;
        num_procs = 0;
    }

    cart = OBJ_NEW(mca_topo_base_comm_cart_2_2_0_t);
    if( NULL == cart ) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }
    cart->ndims = ndims;

    /* MPI-2.1 allows 0-dimension cartesian communicators, so prevent
       a 0-byte malloc -- leave dims as NULL */
    if( ndims > 0 ) {
        cart->dims = (int*)malloc(sizeof(int) * ndims);
        if (NULL == cart->dims) {
            OBJ_RELEASE(cart);
            return OMPI_ERROR;
        }
        memcpy(cart->dims, dims, ndims * sizeof(int));

        /* Cartesian communicator; copy the right data to the common information */
        cart->periods = (int*)malloc(sizeof(int) * ndims);
        if (NULL == cart->periods) {
            OBJ_RELEASE(cart);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        memcpy(cart->periods, periods, ndims * sizeof(int));

        cart->coords = (int*)malloc(sizeof(int) * ndims);
        if (NULL == cart->coords) {
            OBJ_RELEASE(cart);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        {  /* setup the cartesian topology */
            int nprocs = num_procs, rank = new_rank;

            for (i = 0; i < ndims; ++i) {
                nprocs /= cart->dims[i];
                cart->coords[i] = rank / nprocs;
                rank %= nprocs;
            }
        }
    }

    /* JMS: This should really be refactored to use
       comm_create_group(), because ompi_comm_allocate() still
       complains about 0-byte mallocs in debug builds for 0-member
       groups. */
    if (num_procs > 0) {
        /* Copy the proc structure from the previous communicator over to
           the new one.  The topology module is then able to work on this
           copy and rearrange it as it deems fit. */
        topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *));
        if (NULL == topo_procs) {
            OBJ_RELEASE(cart);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        if(OMPI_GROUP_IS_DENSE(old_comm->c_local_group)) {
            memcpy(topo_procs,
                   old_comm->c_local_group->grp_proc_pointers,
                   num_procs * sizeof(ompi_proc_t *));
        } else {
            for(i = 0 ; i < num_procs; i++) {
                topo_procs[i] = ompi_group_peer_lookup(old_comm->c_local_group,i);
            }
        }
    }

    /* allocate a new communicator */
    new_comm = ompi_comm_allocate(num_procs, 0);
    if (NULL == new_comm) {
        free(topo_procs);
        OBJ_RELEASE(cart);
        return MPI_ERR_INTERN;
    }

    assert(NULL == new_comm->c_topo);
    assert(!(new_comm->c_flags & OMPI_COMM_CART));
    new_comm->c_topo           = topo;
    new_comm->c_topo->mtc.cart = cart;
    new_comm->c_topo->reorder  = reorder;
    new_comm->c_flags         |= OMPI_COMM_CART;
    ret = ompi_comm_enable(old_comm, new_comm,
                           new_rank, num_procs, topo_procs);
    if (OMPI_SUCCESS != ret) {
        /* something wrong happened during setting the communicator */
        free(topo_procs);
        OBJ_RELEASE(cart);
        if (MPI_COMM_NULL != new_comm) {
            new_comm->c_topo = NULL;
            new_comm->c_flags &= ~OMPI_COMM_CART;
            ompi_comm_free (&new_comm);
        }
        return ret;
    }

    *comm_topo = new_comm;

    if( MPI_UNDEFINED == new_rank ) {
        ompi_comm_free(&new_comm);
        *comm_topo = MPI_COMM_NULL;
    }

    /* end here */
    return OMPI_SUCCESS;
}
int mca_topo_base_graph_create(mca_topo_base_module_t *topo,
                               ompi_communicator_t* old_comm,
                               int nnodes,
                               int *index,
                               int *edges,
                               bool reorder,
                               ompi_communicator_t** comm_topo)
{
    ompi_communicator_t *new_comm;
    int new_rank, num_procs, ret, i;
    ompi_proc_t **topo_procs = NULL;
    mca_topo_base_comm_graph_2_2_0_t* graph;

    num_procs = old_comm->c_local_group->grp_proc_count;
    new_rank = old_comm->c_local_group->grp_my_rank;
    assert(topo->type == OMPI_COMM_GRAPH);

    if( num_procs < nnodes ) {
        return MPI_ERR_DIMS;
    }
    if( num_procs > nnodes ) {
        num_procs = nnodes;
    }
    if( new_rank > (nnodes - 1) ) {
        new_rank = MPI_UNDEFINED;
        num_procs = 0;
        nnodes = 0;
    }

    graph = OBJ_NEW(mca_topo_base_comm_graph_2_2_0_t);
    if( NULL == graph ) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }
    graph->nnodes = nnodes;

    /* Don't do any of the other initialization if we're not supposed
       to be part of the new communicator (because nnodes has been
       reset to 0, making things like index[nnodes-1] be junk).

       JMS: This should really be refactored to use
       comm_create_group(), because ompi_comm_allocate() still
       complains about 0-byte mallocs in debug builds for 0-member
       groups. */
    if (MPI_UNDEFINED != new_rank) {
        graph->index = (int*)malloc(sizeof(int) * nnodes);
        if (NULL == graph->index) {
            OBJ_RELEASE(graph);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        memcpy(graph->index, index, nnodes * sizeof(int));

        /* Graph communicator; copy the right data to the common information */
        graph->edges = (int*)malloc(sizeof(int) * index[nnodes-1]);
        if (NULL == graph->edges) {
            OBJ_RELEASE(graph);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        memcpy(graph->edges, edges, index[nnodes-1] * sizeof(int));

        topo_procs = (ompi_proc_t**)malloc(num_procs * sizeof(ompi_proc_t *));
        if (NULL == topo_procs) {
           OBJ_RELEASE(graph);
           return OMPI_ERR_OUT_OF_RESOURCE;
        }
        if(OMPI_GROUP_IS_DENSE(old_comm->c_local_group)) {
            memcpy(topo_procs, 
                   old_comm->c_local_group->grp_proc_pointers,
                   num_procs * sizeof(ompi_proc_t *));
        } else {
            for(i = 0 ; i < num_procs; i++) {
                topo_procs[i] = ompi_group_peer_lookup(old_comm->c_local_group,i);
            }
        }
    }

    /* allocate a new communicator */
    new_comm = ompi_comm_allocate(nnodes, 0);
    if (NULL == new_comm) {
        free(topo_procs);
        OBJ_RELEASE(graph);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    ret = ompi_comm_enable(old_comm, new_comm,
                           new_rank, num_procs, topo_procs);
    if (OMPI_SUCCESS != ret) {
        free(topo_procs);
        OBJ_RELEASE(graph);
        ompi_comm_free (&new_comm);
        return ret;
    }
    
    new_comm->c_topo            = topo;
    new_comm->c_topo->mtc.graph = graph;
    new_comm->c_flags          |= OMPI_COMM_GRAPH;
    new_comm->c_topo->reorder   = reorder;
    *comm_topo = new_comm;

    if( MPI_UNDEFINED == new_rank ) {
        ompi_comm_free(&new_comm);
        *comm_topo = MPI_COMM_NULL;
    }

    return OMPI_SUCCESS;
}