Beispiel #1
0
/* This routine serves two purposes:
 * - the allreduce acts as a kind of Barrier,
 *   which avoids, that we have incoming fragments 
 *   on the new communicator before everybody has set
 *   up the comm structure.
 * - some components (e.g. the collective MagPIe component
 *   might want to generate new communicators and communicate
 *   using the new comm. Thus, it can just be called after
 *   the 'barrier'.
 *
 * The reason that this routine is in comm_cid and not in
 * comm.c is, that this file contains the allreduce implementations
 * which are required, and thus we avoid having duplicate code...
 */
int ompi_comm_activate ( ompi_communicator_t** newcomm, 
                         ompi_communicator_t* comm,
                         ompi_communicator_t* bridgecomm,
                         void* local_leader,
                         void* remote_leader,
                         int mode,
                         int send_first )
{
    int ret = 0;

    int ok=0, gok=0;
    ompi_comm_cid_allredfct* allredfnct;

    /* Step 1: the barrier, after which it is allowed to
     * send messages over the new communicator
     */
    switch (mode)
    {
        case OMPI_COMM_CID_INTRA:
            allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra;
            break;
        case OMPI_COMM_CID_INTER:
            allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_inter;
            break;
        case OMPI_COMM_CID_INTRA_BRIDGE:
            allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_bridge;
            break;
        case OMPI_COMM_CID_INTRA_OOB:
            allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_oob;
            break;
        default:
            return MPI_UNDEFINED;
            break;
    }

    if (MPI_UNDEFINED != (*newcomm)->c_local_group->grp_my_rank) {

	/* Initialize the PML stuff in the newcomm  */
	if ( OMPI_SUCCESS != (ret = MCA_PML_CALL(add_comm(*newcomm))) ) {
	    goto bail_on_error;
	}
	OMPI_COMM_SET_PML_ADDED(*newcomm);
    }


    (allredfnct)(&ok, &gok, 1, MPI_MIN, comm, bridgecomm,
                 local_leader, remote_leader, send_first );



    /**
     * Check to see if this process is in the new communicator.
     *
     * Specifically, this function is invoked by all proceses in the
     * old communicator, regardless of whether they are in the new
     * communicator or not.  This is because it is far simpler to use
     * MPI collective functions on the old communicator to determine
     * some data for the new communicator (e.g., remote_leader) than
     * to kludge up our own pseudo-collective routines over just the
     * processes in the new communicator.  Hence, *all* processes in
     * the old communicator need to invoke this function.
     *
     * That being said, only processes in the new communicator need to
     * select a coll module for the new communicator.  More
     * specifically, proceses who are not in the new communicator
     * should *not* select a coll module -- for example,
     * ompi_comm_rank(newcomm) returns MPI_UNDEFINED for processes who
     * are not in the new communicator.  This can cause errors in the
     * selection / initialization of a coll module.  Plus, it's
     * wasteful -- processes in the new communicator will end up
     * freeing the new communicator anyway, so we might as well leave
     * the coll selection as NULL (the coll base comm unselect code
     * handles that case properly).
     */
    if (MPI_UNDEFINED == (*newcomm)->c_local_group->grp_my_rank) {
        return OMPI_SUCCESS;
    }

    /* Let the collectives components fight over who will do
       collective on this new comm.  */
    if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(*newcomm))) {
	goto bail_on_error;
    }

    /* For an inter communicator, we have to deal with the potential
     * problem of what is happening if the local_comm that we created
     * has a lower CID than the parent comm. This is not a problem
     * as long as the user calls MPI_Comm_free on the inter communicator.
     * However, if the communicators are not freed by the user but released
     * by Open MPI in MPI_Finalize, we walk through the list of still available
     * communicators and free them one by one. Thus, local_comm is freed before
     * the actual inter-communicator. However, the local_comm pointer in the
     * inter communicator will still contain the 'previous' address of the local_comm
     * and thus this will lead to a segmentation violation. In order to prevent
     * that from happening, we increase the reference counter local_comm
     * by one if its CID is lower than the parent. We cannot increase however
     *  its reference counter if the CID of local_comm is larger than
     * the CID of the inter communicators, since a regular MPI_Comm_free would
     * leave in that the case the local_comm hanging around and thus we would not
     * recycle CID's properly, which was the reason and the cause for this trouble.
     */
    if ( OMPI_COMM_IS_INTER(*newcomm)) {
        if ( OMPI_COMM_CID_IS_LOWER(*newcomm, comm)) {
            OMPI_COMM_SET_EXTRA_RETAIN (*newcomm);
            OBJ_RETAIN (*newcomm);
        }
    }


    return OMPI_SUCCESS;

 bail_on_error:
    OBJ_RELEASE(*newcomm);
    *newcomm = MPI_COMM_NULL;
    return ret;
}                         
Beispiel #2
0
struct ompi_communicator_t*  mca_coll_hierarch_get_llcomm (int root, 
                                                           mca_coll_hierarch_module_t *hierarch_module,
                                                           int* llroot,
                                                           int* lroot) 
{
    struct ompi_communicator_t *llcomm=NULL;
    struct ompi_group_t *llgroup=NULL;
    struct ompi_group_t *group=NULL;
    struct mca_coll_hierarch_llead_t *llead=NULL;
    int found, i, rc, num_llead, offset;
    int rank = ompi_comm_rank (hierarch_module->hier_comm);
    int color;
    
    /* determine what our offset of root is in the colorarr */
    offset = mca_coll_hierarch_get_offset ( root, 
					    hierarch_module->hier_num_colorarr, 
					    hierarch_module->hier_colorarr );
    
    num_llead = opal_pointer_array_get_size ( &(hierarch_module->hier_llead) );
    for ( found=0, i=0; i < num_llead; i++ ) {
        llead = (struct mca_coll_hierarch_llead_t *) opal_pointer_array_get_item (
                                                                                  &(hierarch_module->hier_llead), i );
	if ( NULL == llead ) {
            continue;
	}

	if (llead->offset == offset ) {
	    found = 1;
	    break;
	}
#if 0
	else if () {
            /* the offset of root = maxoffset of this color and
             * the offset on llead is larger then offset of root.
             * then we can also use this llead structure 
             */
	}
#endif
    }
    
    if ( !found ) {
	/* allocate a new llead element */
	llead = (struct mca_coll_hierarch_llead_t *) malloc (
                                                             sizeof(struct mca_coll_hierarch_llead_t));
	if ( NULL == llead ) {
	    return NULL;
	}
	
	/* generate the list of lleaders with this offset */
	mca_coll_hierarch_get_all_lleaders ( rank, hierarch_module, llead, offset );   
	color = MPI_UNDEFINED;
	if ( llead->am_lleader ) {
	    color = 1;
	}

	/* create new lleader subcommunicator */
	rc = ompi_comm_split ( hierarch_module->hier_comm, color, root, &llcomm, 0);
	if ( OMPI_SUCCESS != rc ) {
	    return NULL;
	}
	if ( OMPI_COMM_CID_IS_LOWER ( llcomm, hierarch_module->hier_comm ) ) {
            /* Mark the communicator as 'extra retain' and increase the
               reference count by one more. See ompi_comm_activate 
	       for detailed explanation. */
            OMPI_COMM_SET_EXTRA_RETAIN (llcomm);
            OBJ_RETAIN(llcomm);
        }


	llead->llcomm = llcomm;

	/* Store the new element on the hierarch_module struct */
	opal_pointer_array_add ( &(hierarch_module->hier_llead), llead);
    }

    llcomm = llead->llcomm;
    *lroot  = llead->my_lleader;
    *llroot = MPI_UNDEFINED;

    if ( MPI_COMM_NULL != llcomm ) {
	group   = hierarch_module->hier_comm->c_local_group;
	llgroup = llcomm->c_local_group;

        rc = ompi_group_translate_ranks ( group, 1, &root, llgroup, llroot);
        if ( OMPI_SUCCESS != rc ) {
            return NULL;
        }
    }
     
    return llcomm;
}
Beispiel #3
0
static int ompi_comm_activate_nb_complete (ompi_comm_request_t *request)
{
    ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context;
    int ret;

    /**
     * Check to see if this process is in the new communicator.
     *
     * Specifically, this function is invoked by all proceses in the
     * old communicator, regardless of whether they are in the new
     * communicator or not.  This is because it is far simpler to use
     * MPI collective functions on the old communicator to determine
     * some data for the new communicator (e.g., remote_leader) than
     * to kludge up our own pseudo-collective routines over just the
     * processes in the new communicator.  Hence, *all* processes in
     * the old communicator need to invoke this function.
     *
     * That being said, only processes in the new communicator need to
     * select a coll module for the new communicator.  More
     * specifically, proceses who are not in the new communicator
     * should *not* select a coll module -- for example,
     * ompi_comm_rank(newcomm) returns MPI_UNDEFINED for processes who
     * are not in the new communicator.  This can cause errors in the
     * selection / initialization of a coll module.  Plus, it's
     * wasteful -- processes in the new communicator will end up
     * freeing the new communicator anyway, so we might as well leave
     * the coll selection as NULL (the coll base comm unselect code
     * handles that case properly).
     */
    if (MPI_UNDEFINED == (context->newcomm)->c_local_group->grp_my_rank) {
        return OMPI_SUCCESS;
    }

    /* Let the collectives components fight over who will do
       collective on this new comm.  */
    if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(context->newcomm))) {
        OBJ_RELEASE(context->newcomm);
        *context->newcommp = MPI_COMM_NULL;
        return ret;
    }

    /* For an inter communicator, we have to deal with the potential
     * problem of what is happening if the local_comm that we created
     * has a lower CID than the parent comm. This is not a problem
     * as long as the user calls MPI_Comm_free on the inter communicator.
     * However, if the communicators are not freed by the user but released
     * by Open MPI in MPI_Finalize, we walk through the list of still available
     * communicators and free them one by one. Thus, local_comm is freed before
     * the actual inter-communicator. However, the local_comm pointer in the
     * inter communicator will still contain the 'previous' address of the local_comm
     * and thus this will lead to a segmentation violation. In order to prevent
     * that from happening, we increase the reference counter local_comm
     * by one if its CID is lower than the parent. We cannot increase however
     *  its reference counter if the CID of local_comm is larger than
     * the CID of the inter communicators, since a regular MPI_Comm_free would
     * leave in that the case the local_comm hanging around and thus we would not
     * recycle CID's properly, which was the reason and the cause for this trouble.
     */
    if (OMPI_COMM_IS_INTER(context->newcomm)) {
        if (OMPI_COMM_CID_IS_LOWER(context->newcomm, context->comm)) {
            OMPI_COMM_SET_EXTRA_RETAIN (context->newcomm);
            OBJ_RETAIN (context->newcomm);
        }
    }

    /* done */
    return OMPI_SUCCESS;
}
Beispiel #4
0
/*
 * Init module on the communicator
 */
int mca_coll_hierarch_module_enable (mca_coll_base_module_t *module,
				     struct ompi_communicator_t *comm)
{
    int color;
    int size, rank, ret=OMPI_SUCCESS;
    
    struct ompi_communicator_t *lcomm=NULL;
    struct ompi_communicator_t *llcomm=NULL;
    struct mca_coll_hierarch_llead_t *llead=NULL;
    mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;

    rank = ompi_comm_rank(comm);
    size = ompi_comm_size(comm);
    
    color = hierarch_module->hier_colorarr[rank];
    
    /* Generate the subcommunicator based on the color returned by
       the previous function. */
    ret = ompi_comm_split ( comm, color, rank, &lcomm, 0 );
    if ( OMPI_SUCCESS != ret ) {
        goto exit;
    }
    if ( OMPI_COMM_CID_IS_LOWER ( lcomm, comm ) ) {
        /* Mark the communicator as 'extra retain' and increase the
           reference count by one more. See ompi_comm_activate
           for detailed comments
	*/
        OMPI_COMM_SET_EXTRA_RETAIN (lcomm);
        OBJ_RETAIN(lcomm);
    }
    
    hierarch_module->hier_comm     = comm;
    hierarch_module->hier_lcomm    = lcomm;
    hierarch_module->hier_num_reqs = 2 * size;
    hierarch_module->hier_reqs     = (ompi_request_t **) malloc (sizeof(ompi_request_t)*size*2);
    if ( NULL == hierarch_module->hier_reqs ) {
        goto exit;
    }
    
    /* allocate a certain number of the hierarch_llead structures, which store
       information about local leader and the according subcommunicators 
    */
    llead = (struct mca_coll_hierarch_llead_t * ) malloc ( 
                                                          sizeof(struct mca_coll_hierarch_llead_t));
    if ( NULL == llead ) {
        goto exit;
    }

    /* These two routines set all relevant entries in the mca_coll_base_comm_t 
     * structure. The first one makes all entries which are independent of the 
     * offset (and have to be done only once per module. The second one is 
     * depending on the offset, and has to be called therefore every time we need 
     * a new llcomm 
     */
    mca_coll_hierarch_get_llr ( hierarch_module );
    mca_coll_hierarch_get_all_lleaders ( rank, hierarch_module, llead, 1 );        
    
    /* Generate the lleader communicator assuming that all lleaders are the first
       process in the list of processes with the same color. A function generating 
       other lleader-comms will follow soon. */
    color = MPI_UNDEFINED;
    if ( llead->am_lleader ) {
	color = 1;
    }
    ret = ompi_comm_split ( comm, color, rank, &llcomm, 0);
    if ( OMPI_SUCCESS != ret ) {
        goto exit;
    }
    if ( OMPI_COMM_CID_IS_LOWER ( llcomm, comm ) ) {
        /* Mark the communicator as 'extra retain' and increase the
           reference count by one more. See ompi_comm_activate
	   for detailed explanation. 
	*/
        OMPI_COMM_SET_EXTRA_RETAIN (llcomm);
        OBJ_RETAIN(llcomm);
    }

    
    llead->llcomm = llcomm;
    
    /* Store it now on the data structure */
    OBJ_CONSTRUCT(&(hierarch_module->hier_llead), opal_pointer_array_t);
    opal_pointer_array_add ( &(hierarch_module->hier_llead), llead);
    
    if ( mca_coll_hierarch_verbose_param ) {
        mca_coll_hierarch_dump_struct (hierarch_module);
    }
    
 exit:
    if ( OMPI_SUCCESS != ret ) {
        if (NULL != llead) {
            free(llead);
        }
        ompi_comm_free ( &lcomm );
	return OMPI_ERROR;
    }

    return OMPI_SUCCESS;
}