Ejemplo n.º 1
0
/*@
MPI_Allreduce - Combines values from all processes and distributes the result
                back to all processes

Input Parameters:
+ sendbuf - starting address of send buffer (choice) 
. count - number of elements in send buffer (integer) 
. datatype - data type of elements of send buffer (handle) 
. op - operation (handle) 
- comm - communicator (handle) 

Output Parameter:
. recvbuf - starting address of receive buffer (choice) 

.N ThreadSafe

.N Fortran

.N collops

.N Errors
.N MPI_ERR_BUFFER
.N MPI_ERR_COUNT
.N MPI_ERR_TYPE
.N MPI_ERR_OP
.N MPI_ERR_COMM
@*/
int MPI_Allreduce ( void *sendbuf, void *recvbuf, int count, 
		    MPI_Datatype datatype, MPI_Op op, MPI_Comm comm )
{
    static const char FCNAME[] = "MPI_Allreduce";
    int mpi_errno = MPI_SUCCESS;
    MPID_Comm *comm_ptr = NULL;
    int errflag = FALSE;
    MPID_MPI_STATE_DECL(MPID_STATE_MPI_ALLREDUCE);

    MPIR_ERRTEST_INITIALIZED_ORDIE();
    
    MPIU_THREAD_CS_ENTER(ALLFUNC,);
    MPID_MPI_COLL_FUNC_ENTER(MPID_STATE_MPI_ALLREDUCE);

    /* Validate parameters, especially handles needing to be converted */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
	    MPIR_ERRTEST_COMM(comm, mpi_errno);
            if (mpi_errno != MPI_SUCCESS) goto fn_fail;
	}
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* Convert MPI object handles to object pointers */
    MPID_Comm_get_ptr( comm, comm_ptr );

    /* Validate parameters and objects (post conversion) */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            MPID_Datatype *datatype_ptr = NULL;
            MPID_Op *op_ptr = NULL;

            MPID_Comm_valid_ptr( comm_ptr, mpi_errno );
            if (mpi_errno != MPI_SUCCESS) goto fn_fail;
	    MPIR_ERRTEST_COUNT(count, mpi_errno);
	    MPIR_ERRTEST_DATATYPE(datatype, "datatype", mpi_errno);
	    MPIR_ERRTEST_OP(op, mpi_errno);
	    
            if (HANDLE_GET_KIND(datatype) != HANDLE_KIND_BUILTIN) {
                MPID_Datatype_get_ptr(datatype, datatype_ptr);
                MPID_Datatype_valid_ptr( datatype_ptr, mpi_errno );
                MPID_Datatype_committed_ptr( datatype_ptr, mpi_errno );
            }

	    if (comm_ptr->comm_kind == MPID_INTERCOMM)
                MPIR_ERRTEST_SENDBUF_INPLACE(sendbuf, count, mpi_errno);
            
            if (sendbuf != MPI_IN_PLACE) 
                MPIR_ERRTEST_USERBUFFER(sendbuf,count,datatype,mpi_errno);

            MPIR_ERRTEST_RECVBUF_INPLACE(recvbuf, count, mpi_errno);
	    MPIR_ERRTEST_USERBUFFER(recvbuf,count,datatype,mpi_errno);

	    if (mpi_errno != MPI_SUCCESS) goto fn_fail;

            if (HANDLE_GET_KIND(op) != HANDLE_KIND_BUILTIN) {
                MPID_Op_get_ptr(op, op_ptr);
                MPID_Op_valid_ptr( op_ptr, mpi_errno );
            }
            if (HANDLE_GET_KIND(op) == HANDLE_KIND_BUILTIN) {
                mpi_errno = 
                    ( * MPIR_Op_check_dtype_table[op%16 - 1] )(datatype); 
            }
	    if (count != 0) {
		MPIR_ERRTEST_ALIAS_COLL(sendbuf, recvbuf, mpi_errno);
	    }
	    if (mpi_errno != MPI_SUCCESS) goto fn_fail;
	}
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ...  */

    mpi_errno = MPIR_Allreduce_impl(sendbuf, recvbuf, count, datatype, op, comm_ptr, &errflag);
    if (mpi_errno) goto fn_fail;

    /* ... end of body of routine ... */
    
  fn_exit:
    MPID_MPI_COLL_FUNC_EXIT(MPID_STATE_MPI_ALLREDUCE);
    MPIU_THREAD_CS_EXIT(ALLFUNC,);
    return mpi_errno;

  fn_fail:
    /* --BEGIN ERROR HANDLING-- */
#   ifdef HAVE_ERROR_CHECKING
    {
	mpi_errno = MPIR_Err_create_code(
	    mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, "**mpi_allreduce",
	    "**mpi_allreduce %p %p %d %D %O %C", sendbuf, recvbuf, count, datatype, op, comm);
    }
#   endif
    mpi_errno = MPIR_Err_return_comm( comm_ptr, FCNAME, mpi_errno );
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Ejemplo n.º 2
0
/*@
MPI_Intercomm_merge - Creates an intracommuncator from an intercommunicator

Input Parameters:
+ intercomm - Intercommunicator (handle)
- high - Used to order the groups within comm (logical)
  when creating the new communicator.  This is a boolean value; the group
  that sets high true has its processes ordered `after` the group that sets 
  this value to false.  If all processes in the intercommunicator provide
  the same value, the choice of which group is ordered first is arbitrary.

Output Parameters:
. newintracomm - Created intracommunicator (handle)

Notes:
 While all processes may provide the same value for the 'high' parameter,
 this requires the MPI implementation to determine which group of 
 processes should be ranked first. 

.N ThreadSafe

.N Fortran

Algorithm:
.Eb
.i Allocate contexts 
.i Local and remote group leaders swap high values
.i Determine the high value.
.i Merge the two groups and make the intra-communicator
.Ee

.N Errors
.N MPI_SUCCESS
.N MPI_ERR_COMM
.N MPI_ERR_EXHAUSTED

.seealso: MPI_Intercomm_create, MPI_Comm_free
@*/
int MPI_Intercomm_merge(MPI_Comm intercomm, int high, MPI_Comm *newintracomm)
{
    int mpi_errno = MPI_SUCCESS;
    MPID_Comm *comm_ptr = NULL;
    MPID_Comm *new_intracomm_ptr;
    MPID_MPI_STATE_DECL(MPID_STATE_MPI_INTERCOMM_MERGE);

    MPIR_ERRTEST_INITIALIZED_ORDIE();
    
    MPID_THREAD_CS_ENTER(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);  
    MPID_MPI_FUNC_ENTER(MPID_STATE_MPI_INTERCOMM_MERGE);

    /* Validate parameters, especially handles needing to be converted */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
	    MPIR_ERRTEST_COMM(intercomm, mpi_errno);
	}
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* Convert MPI object handles to object pointers */
    MPID_Comm_get_ptr( intercomm, comm_ptr );
    
    /* Validate parameters and objects (post conversion) */
#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
            /* Validate comm_ptr */
            MPID_Comm_valid_ptr( comm_ptr, mpi_errno, FALSE );
	    /* If comm_ptr is not valid, it will be reset to null */
	    if (comm_ptr && comm_ptr->comm_kind != MPID_INTERCOMM) {
		mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, 
		    MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_COMM,
						  "**commnotinter", 0 );
	    }
            if (mpi_errno) goto fn_fail;
        }
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* Make sure that we have a local intercommunicator */
    if (!comm_ptr->local_comm) {
	/* Manufacture the local communicator */
	MPIR_Setup_intercomm_localcomm( comm_ptr );
    }

#   ifdef HAVE_ERROR_CHECKING
    {
        MPID_BEGIN_ERROR_CHECKS;
        {
	    int acthigh;
            MPIR_Errflag_t errflag = MPIR_ERR_NONE;
	    /* Check for consistent valus of high in each local group.
               The Intel test suite checks for this; it is also an easy
               error to make */
	    acthigh = high ? 1 : 0;   /* Clamp high into 1 or 0 */
	    mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, &acthigh, 1, MPI_INT,
                                             MPI_SUM, comm_ptr->local_comm, &errflag );
	    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
            MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
	    /* acthigh must either == 0 or the size of the local comm */
	    if (acthigh != 0 && acthigh != comm_ptr->local_size) {
		mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, 
                                                  MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_ARG, 
						  "**notsame",
						  "**notsame %s %s", "high", 
						  "MPI_Intercomm_merge" );
		goto fn_fail;
	    }
        }
        MPID_END_ERROR_CHECKS;
    }
#   endif /* HAVE_ERROR_CHECKING */

    /* ... body of routine ... */

    mpi_errno = MPIR_Intercomm_merge_impl(comm_ptr, high, &new_intracomm_ptr);
    if (mpi_errno) goto fn_fail;
    
    MPIR_OBJ_PUBLISH_HANDLE(*newintracomm, new_intracomm_ptr->handle);

    /* ... end of body of routine ... */
    
  fn_exit:
    MPID_MPI_FUNC_EXIT(MPID_STATE_MPI_INTERCOMM_MERGE);
    MPID_THREAD_CS_EXIT(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
    return mpi_errno;

  fn_fail:
    /* --BEGIN ERROR HANDLING-- */
#   ifdef HAVE_ERROR_CHECKING
    {
	mpi_errno = MPIR_Err_create_code(
	    mpi_errno, MPIR_ERR_RECOVERABLE, FCNAME, __LINE__, MPI_ERR_OTHER, 
	    "**mpi_intercomm_merge",
	    "**mpi_intercomm_merge %C %d %p", intercomm, high, newintracomm);
    }
#   endif
    mpi_errno = MPIR_Err_return_comm( comm_ptr, FCNAME, mpi_errno );
    goto fn_exit;
    /* --END ERROR HANDLING-- */
}
Ejemplo n.º 3
0
int create_2level_comm (MPI_Comm comm, int size, int my_rank)
{
    static const char FCNAME[] = "create_2level_comm";
    int mpi_errno = MPI_SUCCESS;
    MPID_Comm* comm_ptr;
    MPID_Comm* comm_world_ptr;
    MPI_Group subgroup1, comm_group;
    MPID_Group *group_ptr=NULL;
    int leader_comm_size, my_local_size, my_local_id, input_flag =0, output_flag=0;
    int errflag = FALSE;
    int leader_group_size=0;
  
    MPIU_THREADPRIV_DECL;
    MPIU_THREADPRIV_GET;
    MPID_Comm_get_ptr( comm, comm_ptr );
    MPID_Comm_get_ptr( MPI_COMM_WORLD, comm_world_ptr );

    int* shmem_group = MPIU_Malloc(sizeof(int) * size);
    if (NULL == shmem_group){
        printf("Couldn't malloc shmem_group\n");
        ibv_error_abort (GEN_EXIT_ERR, "create_2level_com");
    }

    /* Creating local shmem group */
    int i = 0;
    int local_rank = 0;
    int grp_index = 0;
    comm_ptr->ch.leader_comm=MPI_COMM_NULL;
    comm_ptr->ch.shmem_comm=MPI_COMM_NULL;

    MPIDI_VC_t* vc = NULL;
    for (; i < size ; ++i){
       MPIDI_Comm_get_vc(comm_ptr, i, &vc);
       if (my_rank == i || vc->smp.local_rank >= 0){
           shmem_group[grp_index] = i;
           if (my_rank == i){
               local_rank = grp_index;
           }
           ++grp_index;
       }  
    } 

    /* Creating leader group */
    int leader = 0;
    leader = shmem_group[0];


    /* Gives the mapping to any process's leader in comm */
    comm_ptr->ch.leader_map = MPIU_Malloc(sizeof(int) * size);
    if (NULL == comm_ptr->ch.leader_map){
        printf("Couldn't malloc group\n");
        ibv_error_abort (GEN_EXIT_ERR, "create_2level_com");
    }
    
    mpi_errno = MPIR_Allgather_impl (&leader, 1, MPI_INT , comm_ptr->ch.leader_map, 1, MPI_INT, comm_ptr, &errflag);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }


    int* leader_group = MPIU_Malloc(sizeof(int) * size);
    if (NULL == leader_group){
        printf("Couldn't malloc leader_group\n");
        ibv_error_abort (GEN_EXIT_ERR, "create_2level_com");
    }

    /* Gives the mapping from leader's rank in comm to 
     * leader's rank in leader_comm */
    comm_ptr->ch.leader_rank = MPIU_Malloc(sizeof(int) * size);
    if (NULL == comm_ptr->ch.leader_rank){
        printf("Couldn't malloc marker\n");
        ibv_error_abort (GEN_EXIT_ERR, "create_2level_com");
    }

    for (i=0; i < size ; ++i){
         comm_ptr->ch.leader_rank[i] = -1;
    }
    int* group = comm_ptr->ch.leader_map;
    grp_index = 0;
    for (i=0; i < size ; ++i){
        if (comm_ptr->ch.leader_rank[(group[i])] == -1){
            comm_ptr->ch.leader_rank[(group[i])] = grp_index;
            leader_group[grp_index++] = group[i];
           
        }
    }
    leader_group_size = grp_index;
    comm_ptr->ch.leader_group_size = leader_group_size;

    mpi_errno = PMPI_Comm_group(comm, &comm_group);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }
    
    mpi_errno = PMPI_Group_incl(comm_group, leader_group_size, leader_group, &subgroup1);
     if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }

    mpi_errno = PMPI_Comm_create(comm, subgroup1, &(comm_ptr->ch.leader_comm));
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }

    MPID_Comm *leader_ptr;
    MPID_Comm_get_ptr( comm_ptr->ch.leader_comm, leader_ptr );
       
    MPIU_Free(leader_group);
    MPID_Group_get_ptr( subgroup1, group_ptr );
    if(group_ptr != NULL) { 
       mpi_errno = PMPI_Group_free(&subgroup1);
       if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
       }
    } 

    mpi_errno = PMPI_Comm_split(comm, leader, local_rank, &(comm_ptr->ch.shmem_comm));
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }

    MPID_Comm *shmem_ptr;
    MPID_Comm_get_ptr(comm_ptr->ch.shmem_comm, shmem_ptr);


    mpi_errno = PMPI_Comm_rank(comm_ptr->ch.shmem_comm, &my_local_id);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }
    mpi_errno = PMPI_Comm_size(comm_ptr->ch.shmem_comm, &my_local_size);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }

    if(my_local_id == 0) { 
           int array_index=0;
           mpi_errno = PMPI_Comm_size(comm_ptr->ch.leader_comm, &leader_comm_size);
           if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
           }

           comm_ptr->ch.node_sizes = MPIU_Malloc(sizeof(int)*leader_comm_size);
           mpi_errno = PMPI_Allgather(&my_local_size, 1, MPI_INT,
				 comm_ptr->ch.node_sizes, 1, MPI_INT, comm_ptr->ch.leader_comm);
           if(mpi_errno) {
              MPIU_ERR_POP(mpi_errno);
           }
           comm_ptr->ch.is_uniform = 1; 
           for(array_index=0; array_index < leader_comm_size; array_index++) { 
                if(comm_ptr->ch.node_sizes[0] != comm_ptr->ch.node_sizes[array_index]) {
                     comm_ptr->ch.is_uniform = 0; 
                     break;
                }
           }
     } 
    
    comm_ptr->ch.is_global_block = 0; 
    /* We need to check to see if the ranks are block or not. Each node leader
     * gets the global ranks of all of its children processes. It scans through
     * this array to see if the ranks are in block order. The node-leaders then
     * do an allreduce to see if all the other nodes are also in block order.
     * This is followed by an intra-node bcast to let the children processes
     * know of the result of this step */ 
    if(my_local_id == 0) {
            int is_local_block = 1; 
            int index = 1; 
            
            while( index < my_local_size) { 
                    if( (shmem_group[index] - 1) != 
                         shmem_group[index - 1]) { 
                            is_local_block = 0; 
                            break; 
                    }
            index++;  
            }  

            comm_ptr->ch.shmem_coll_ok = 0;/* To prevent Allreduce taking shmem route*/
            mpi_errno = MPIR_Allreduce_impl(&(is_local_block), 
                                      &(comm_ptr->ch.is_global_block), 1, 
                                      MPI_INT, MPI_LAND, leader_ptr, &errflag);
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            } 
            mpi_errno = MPIR_Bcast_impl(&(comm_ptr->ch.is_global_block),1, MPI_INT, 0,
                                   shmem_ptr, &errflag); 
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            } 
    } else { 
            mpi_errno = MPIR_Bcast_impl(&(comm_ptr->ch.is_global_block),1, MPI_INT, 0,
                                   shmem_ptr, &errflag); 
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            } 
    }      
                              

    if (my_local_id == 0){
        lock_shmem_region();
        increment_shmem_comm_count();
        shmem_comm_count = get_shmem_comm_count();
        unlock_shmem_region();
    }
    
    shmem_ptr->ch.shmem_coll_ok = 0; 
    /* To prevent Bcast taking the knomial_2level_bcast route */
    mpi_errno = MPIR_Bcast_impl (&shmem_comm_count, 1, MPI_INT, 0, shmem_ptr, &errflag);
     if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }


    if (shmem_comm_count <= g_shmem_coll_blocks){
        shmem_ptr->ch.shmem_comm_rank = shmem_comm_count-1;
        input_flag = 1;
    } else{
        input_flag = 0;
    }
    comm_ptr->ch.shmem_coll_ok = 0;/* To prevent Allreduce taking shmem route*/
    mpi_errno = MPIR_Allreduce_impl(&input_flag, &output_flag, 1, MPI_INT, MPI_LAND, comm_ptr, &errflag);
    if(mpi_errno) {
       MPIU_ERR_POP(mpi_errno);
    }
    comm_ptr->ch.allgather_comm_ok = 0;
    if (allgather_ranking){
        int is_contig =1, check_leader =1, check_size=1, is_local_ok=0,is_block=0;
        int PPN;
        int shmem_grp_size = my_local_size;
        int leader_rank; 
        MPI_Group allgather_group; 
        comm_ptr->ch.allgather_comm=MPI_COMM_NULL; 
        comm_ptr->ch.allgather_new_ranks=NULL;

        if(comm_ptr->ch.leader_comm != MPI_COMM_NULL) { 
            PMPI_Comm_rank(comm_ptr->ch.leader_comm, &leader_rank); 
        } 

        mpi_errno=MPIR_Bcast_impl(&leader_rank, 1, MPI_INT, 0, shmem_ptr, &errflag);
        if(mpi_errno) {
        	MPIU_ERR_POP(mpi_errno);
        } 

        for (i=1; i < shmem_grp_size; i++ ){
            if (shmem_group[i] != shmem_group[i-1]+1){
                is_contig =0; 
                break;
            }
        }

        if (leader != (shmem_grp_size*leader_rank)){
            check_leader=0;
        }

        if (shmem_grp_size != (size/leader_group_size)){
            check_size=0;
        }

        is_local_ok = is_contig && check_leader && check_size;

        mpi_errno = MPIR_Allreduce_impl(&is_local_ok, &is_block, 1, MPI_INT, MPI_LAND, comm_ptr, &errflag);
        if(mpi_errno) {
            MPIU_ERR_POP(mpi_errno);
        }

        if (is_block){
            int counter=0,j;
            comm_ptr->ch.allgather_new_ranks = MPIU_Malloc(sizeof(int)*size);
            if (NULL == comm_ptr->ch.allgather_new_ranks){
                    mpi_errno = MPIR_Err_create_code( MPI_SUCCESS, MPIR_ERR_RECOVERABLE, 
                                                  FCNAME, __LINE__, MPI_ERR_OTHER, 
                                                  "**nomem", 0 );
                    return mpi_errno;
            }
   
            PPN = shmem_grp_size;
            
            for (j=0; j < PPN; j++){
                for (i=0; i < leader_group_size; i++){
                    comm_ptr->ch.allgather_new_ranks[counter] = j + i*PPN;
                    counter++;
                }
            }

            mpi_errno = PMPI_Group_incl(comm_group, size, comm_ptr->ch.allgather_new_ranks, &allgather_group);
            if(mpi_errno) {
                 MPIU_ERR_POP(mpi_errno);
            }  
            mpi_errno = PMPI_Comm_create(comm_ptr->handle, allgather_group, &(comm_ptr->ch.allgather_comm));
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            }
            comm_ptr->ch.allgather_comm_ok = 1;
            
            mpi_errno=PMPI_Group_free(&allgather_group);
            if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
            }
        }
    }
    mpi_errno=PMPI_Group_free(&comm_group);
    if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
    }

    if (output_flag == 1){
        comm_ptr->ch.shmem_coll_ok = 1;
        comm_registry[comm_registered++] = comm_ptr->context_id;
    } else{
        comm_ptr->ch.shmem_coll_ok = 0;
        MPID_Group_get_ptr( subgroup1, group_ptr );
        if(group_ptr != NULL) { 
             mpi_errno = PMPI_Group_free(&subgroup1);
             if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
             }
        }
        MPID_Group_get_ptr( comm_group, group_ptr );
        if(group_ptr != NULL) { 
             mpi_errno = PMPI_Group_free(&comm_group);
             if(mpi_errno) {
               MPIU_ERR_POP(mpi_errno);
             }
        }
        free_2level_comm(comm_ptr);
        comm_ptr->ch.shmem_comm = MPI_COMM_NULL; 
        comm_ptr->ch.leader_comm = MPI_COMM_NULL; 
    }
    ++comm_count;
    MPIU_Free(shmem_group);

    fn_fail: 
       MPIDU_ERR_CHECK_MULTIPLE_THREADS_EXIT( comm_ptr );
    
    return (mpi_errno);


}
Ejemplo n.º 4
0
int MPID_PG_ForwardPGInfo( MPID_Comm *peer_ptr, MPID_Comm *comm_ptr, 
			   int nPGids, const MPID_Gpid in_gpids[],
			   int root )
{
    int mpi_errno = MPI_SUCCESS;
    int i, allfound = 1, pgid, pgidWorld;
    MPIDI_PG_t *pg = 0;
    MPIDI_PG_iterator iter;
    MPIR_Errflag_t errflag = MPIR_ERR_NONE;
    
    const int *gpids = (const int*)&in_gpids[0];

    /* Get the pgid for CommWorld (always attached to the first process 
       group) */
    MPIDI_PG_Get_iterator(&iter);
    MPIDI_PG_Get_next( &iter, &pg );
    MPIDI_PG_IdToNum( pg, &pgidWorld );
    
    /* Extract the unique process groups */
    for (i=0; i<nPGids && allfound; i++) {
	if (gpids[0] != pgidWorld) {
	    /* Add this gpid to the list of values to check */
	    /* FIXME: For testing, we just test in place */
            MPIDI_PG_Get_iterator(&iter);
	    do {
                MPIDI_PG_Get_next( &iter, &pg );
		if (!pg) {
		    /* We don't know this pgid */
		    allfound = 0;
		    break;
		}
		MPIDI_PG_IdToNum( pg, &pgid );
	    } while (pgid != gpids[0]);
	}
	gpids += 2;
    }

    /* See if everyone is happy */
    mpi_errno = MPIR_Allreduce_impl( MPI_IN_PLACE, &allfound, 1, MPI_INT, MPI_LAND, comm_ptr, &errflag );
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");
    
    if (allfound) return MPI_SUCCESS;

    /* FIXME: We need a cleaner way to handle this case than using an ifdef.
       We could have an empty version of MPID_PG_BCast in ch3u_port.c, but
       that's a rather crude way of addressing this problem.  Better is to
       make the handling of local and remote PIDS for the dynamic process
       case part of the dynamic process "module"; devices that don't support
       dynamic processes (and hence have only COMM_WORLD) could optimize for 
       that case */
#ifndef MPIDI_CH3_HAS_NO_DYNAMIC_PROCESS
    /* We need to share the process groups.  We use routines
       from ch3u_port.c */
    MPID_PG_BCast( peer_ptr, comm_ptr, root );
#endif
 fn_exit:
    return MPI_SUCCESS;
 fn_fail:
    goto fn_exit;
}
Ejemplo n.º 5
0
static int sched_cb_gcn_allocate_cid(MPID_Comm * comm, int tag, void *state)
{
    int mpi_errno = MPI_SUCCESS;
    struct gcn_state *st = state, *tmp;
    MPIU_Context_id_t newctxid;
    MPIR_Errflag_t errflag = MPIR_ERR_NONE;
    if (st->own_eager_mask) {
        newctxid = find_and_allocate_context_id(st->local_mask);
        if (st->ctx0)
            *st->ctx0 = newctxid;
        if (st->ctx1)
            *st->ctx1 = newctxid;

        st->own_eager_mask = 0;
        eager_in_use = 0;
    }
    else if (st->own_mask) {
        newctxid = find_and_allocate_context_id(st->local_mask);
        if (st->ctx0)
            *st->ctx0 = newctxid;
        if (st->ctx1)
            *st->ctx1 = newctxid;

        /* reset flag for the next try */
        mask_in_use = 0;
        /* If we found a ctx, remove element form list */
        if (newctxid > 0) {
            if (next_gcn == st) {
                next_gcn = st->next;
            }
            else {
                for (tmp = next_gcn; tmp->next != st; tmp = tmp->next);
                tmp->next = st->next;
            }
        }
    }

    if (*st->ctx0 == 0) {
        if (st->local_mask[ALL_OWN_MASK_FLAG] == 1) {
            /* --BEGIN ERROR HANDLING-- */
            int nfree = 0;
            int ntotal = 0;
            int minfree;
            context_mask_stats(&nfree, &ntotal);
            minfree = nfree;
            MPIR_Allreduce_impl(MPI_IN_PLACE, &minfree, 1, MPI_INT,
                                MPI_MIN, st->comm_ptr, &errflag);
            if (minfree > 0) {
                MPIR_ERR_SETANDJUMP3(mpi_errno, MPI_ERR_OTHER,
                                     "**toomanycommfrag", "**toomanycommfrag %d %d %d",
                                     nfree, ntotal, minfree);
            }
            else {
                MPIR_ERR_SETANDJUMP3(mpi_errno, MPI_ERR_OTHER,
                                     "**toomanycomm", "**toomanycomm %d %d %d",
                                     nfree, ntotal, minfree);
            }
            /* --END ERROR HANDLING-- */
        }
        else {
            /* do not own mask, try again */
            if (st->first_iter == 1) {
                st->first_iter = 0;
                /* Set the Tag for the idup-operations. We have two problems here:
                 *  1.) The tag should not be used by another (blocking) context_id allocation.
                 *      Therefore, we set tag_up as lower bound for the operation. tag_ub is used by
                 *      most of the other blocking operations, but tag is always >0, so this
                 *      should be fine.
                 *  2.) We need odering between multiple idup operations on the same communicator.
                 *       The problem here is that the iallreduce operations of the first iteration
                 *       are not necessarily completed in the same order as they are issued, also on the
                 *       same communicator. To avoid deadlocks, we cannot add the elements to the
                 *       list bevfore the first iallreduce is completed. The "tag" is created for the
                 *       scheduling - by calling  MPID_Sched_next_tag(comm_ptr, &tag) - and the same
                 *       for a idup operation on all processes. So we use it here. */
                /* FIXME I'm not sure if there can be an overflows for this tag */
                st->tag = (uint64_t) tag + MPIR_Process.attrs.tag_ub;
                add_gcn_to_list(st);
            }
            mpi_errno = MPID_Sched_cb(&sched_cb_gcn_copy_mask, st, st->s);
            if (mpi_errno)
                MPIR_ERR_POP(mpi_errno);
            MPID_SCHED_BARRIER(st->s);
        }
    }
    else {
        /* Successfully allocated a context id */
        mpi_errno = MPID_Sched_cb(&sched_cb_gcn_bcast, st, st->s);
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
        MPID_SCHED_BARRIER(st->s);
    }

  fn_exit:
    return mpi_errno;
  fn_fail:
    /* make sure that the pending allocations are scheduled */
    if (!st->first_iter) {
        if (next_gcn == st) {
            next_gcn = st->next;
        }
        else {
            for (tmp = next_gcn; tmp && tmp->next != st; tmp = tmp->next);
            tmp->next = st->next;
        }
    }
    /* In the case of failure, the new communicator was half created.
     * So we need to clean the memory allocated for it. */
    MPIR_Comm_map_free(st->new_comm);
    MPIU_Handle_obj_free(&MPID_Comm_mem, st->new_comm);
    MPIU_Free(st);
    goto fn_exit;
}
Ejemplo n.º 6
0
int MPIR_Get_contextid_sparse_group(MPID_Comm * comm_ptr, MPID_Group * group_ptr, int tag,
                                    MPIU_Context_id_t * context_id, int ignore_id)
{
    int mpi_errno = MPI_SUCCESS;
    MPIR_Errflag_t errflag = MPIR_ERR_NONE;
    struct gcn_state st;
    struct gcn_state *tmp;
    MPID_MPI_STATE_DECL(MPID_STATE_MPIR_GET_CONTEXTID);

    MPID_MPI_FUNC_ENTER(MPID_STATE_MPIR_GET_CONTEXTID);

    st.first_iter = 1;
    st.comm_ptr = comm_ptr;
    st.tag = tag;
    st.own_mask = 0;
    st.own_eager_mask = 0;
    /* Group-collective and ignore_id should never be combined */
    MPIU_Assert(!(group_ptr != NULL && ignore_id));

    *context_id = 0;

    MPIU_DBG_MSG_FMT(COMM, VERBOSE, (MPIU_DBG_FDEST,
                                     "Entering; shared state is %d:%d, my ctx id is %d, tag=%d",
                                     mask_in_use, eager_in_use, comm_ptr->context_id, tag));

    while (*context_id == 0) {
        /* We lock only around access to the mask (except in the global locking
         * case).  If another thread is using the mask, we take a mask of zero. */
        MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);

        if (initialize_context_mask) {
            context_id_init();
        }

        if (eager_nelem < 0) {
            /* Ensure that at least one word of deadlock-free context IDs is
             * always set aside for the base protocol */
            MPIU_Assert(MPIR_CVAR_CTXID_EAGER_SIZE >= 0 &&
                        MPIR_CVAR_CTXID_EAGER_SIZE < MPIR_MAX_CONTEXT_MASK - 1);
            eager_nelem = MPIR_CVAR_CTXID_EAGER_SIZE;
        }

        if (ignore_id) {
            /* We are not participating in the resulting communicator, so our
             * context ID space doesn't matter.  Set the mask to "all available". */
            memset(st.local_mask, 0xff, MPIR_MAX_CONTEXT_MASK * sizeof(int));
            st.own_mask = 0;
            /* don't need to touch mask_in_use/lowest_context_id b/c our thread
             * doesn't ever need to "win" the mask */
        }

        /* Deadlock avoidance: Only participate in context id loop when all
         * processes have called this routine.  On the first iteration, use the
         * "eager" allocation protocol.
         */
        else if (st.first_iter) {
            memset(st.local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int));
            st.own_eager_mask = 0;
            /* Attempt to reserve the eager mask segment */
            if (!eager_in_use && eager_nelem > 0) {
                int i;
                for (i = 0; i < eager_nelem; i++)
                    st.local_mask[i] = context_mask[i];

                eager_in_use = 1;
                st.own_eager_mask = 1;
            }
        }

        else {
            MPIU_Assert(next_gcn != NULL);
            /*If we are here, at least one element must be in the list, at least myself */

            /* only the first element in the list can own the mask. However, maybe the mask is used
             * by another thread, which added another allcoation to the list bevore. So we have to check,
             * if the mask is used and mark, if we own it */
            if (mask_in_use || &st != next_gcn) {
                memset(st.local_mask, 0, MPIR_MAX_CONTEXT_MASK * sizeof(int));
                st.own_mask = 0;
                MPIU_DBG_MSG_FMT(COMM, VERBOSE, (MPIU_DBG_FDEST,
                                                 "Mask is in use, my context_id is %d, owner context id is %d",
                                                 st.comm_ptr->context_id,
                                                 next_gcn->comm_ptr->context_id));
            }
            else {
                int i;
                /* Copy safe mask segment to local_mask */
                for (i = 0; i < eager_nelem; i++)
                    st.local_mask[i] = 0;
                for (i = eager_nelem; i < MPIR_MAX_CONTEXT_MASK; i++)
                    st.local_mask[i] = context_mask[i];

                mask_in_use = 1;
                st.own_mask = 1;
                MPIU_DBG_MSG(COMM, VERBOSE, "Copied local_mask");
            }
        }
        MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);

        /* Note: MPIR_MAX_CONTEXT_MASK elements of local_mask are used by the
         * context ID allocation algorithm.  The additional element is ignored
         * by the context ID mask access routines and is used as a flag for
         * detecting context ID exhaustion (explained below). */
        if (st.own_mask || ignore_id)
            st.local_mask[ALL_OWN_MASK_FLAG] = 1;
        else
            st.local_mask[ALL_OWN_MASK_FLAG] = 0;

        /* Now, try to get a context id */
        MPIU_Assert(comm_ptr->comm_kind == MPID_INTRACOMM);
        /* In the global and brief-global cases, note that this routine will
         * release that global lock when it needs to wait.  That will allow
         * other processes to enter the global or brief global critical section.
         */
        if (group_ptr != NULL) {
            int coll_tag = tag | MPIR_Process.tagged_coll_mask; /* Shift tag into the tagged coll space */
            mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1,
                                             MPI_INT, MPI_BAND, comm_ptr, group_ptr, coll_tag,
                                             &errflag);
        }
        else {
            mpi_errno = MPIR_Allreduce_impl(MPI_IN_PLACE, st.local_mask, MPIR_MAX_CONTEXT_MASK + 1,
                                            MPI_INT, MPI_BAND, comm_ptr, &errflag);
        }
        if (mpi_errno)
            MPIR_ERR_POP(mpi_errno);
        MPIR_ERR_CHKANDJUMP(errflag, mpi_errno, MPI_ERR_OTHER, "**coll_fail");

        /* MT FIXME 2/3 cases don't seem to need the CONTEXTID CS, check and
         * narrow this region */
        MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
        if (ignore_id) {
            /* we don't care what the value was, but make sure that everyone
             * who did care agreed on a value */
            *context_id = locate_context_bit(st.local_mask);
            /* used later in out-of-context ids check and outer while loop condition */
        }
        else if (st.own_eager_mask) {
            /* There is a chance that we've found a context id */
            /* Find_and_allocate_context_id updates the context_mask if it finds a match */
            *context_id = find_and_allocate_context_id(st.local_mask);
            MPIU_DBG_MSG_D(COMM, VERBOSE, "Context id is now %hd", *context_id);

            st.own_eager_mask = 0;
            eager_in_use = 0;
            if (*context_id <= 0) {
                /* else we did not find a context id. Give up the mask in case
                 * there is another thread (with a lower input context id)
                 * waiting for it.  We need to ensure that any other threads
                 * have the opportunity to run, hence yielding */
                /* FIXME: Do we need to do an GLOBAL yield here?
                 * When we do a collective operation, we anyway yield
                 * for other others */
                MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX); /* FG: TODO Double-check */
                MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
            }
        }
        else if (st.own_mask) {
            /* There is a chance that we've found a context id */
            /* Find_and_allocate_context_id updates the context_mask if it finds a match */
            *context_id = find_and_allocate_context_id(st.local_mask);
            MPIU_DBG_MSG_D(COMM, VERBOSE, "Context id is now %hd", *context_id);

            mask_in_use = 0;

            if (*context_id > 0) {
                /* If we found a new context id, we have to remove the element from the list, so the
                 * next allocation can own the mask */
                if (next_gcn == &st) {
                    next_gcn = st.next;
                }
                else {
                    for (tmp = next_gcn; tmp->next != &st; tmp = tmp->next);    /* avoid compiler warnings */
                    tmp->next = st.next;
                }
            }
            else {
                /* else we did not find a context id. Give up the mask in case
                 * there is another thread in the gcn_next_list
                 * waiting for it.  We need to ensure that any other threads
                 * have the opportunity to run, hence yielding */
                /* FIXME: Do we need to do an GLOBAL yield here?
                 * When we do a collective operation, we anyway yield
                 * for other others */
                MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
                MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
            }
        }
        else {
            /* As above, force this thread to yield */
            /* FIXME: Do we need to do an GLOBAL yield here?  When we
             * do a collective operation, we anyway yield for other
             * others */
            MPID_THREAD_CS_YIELD(GLOBAL, MPIR_THREAD_GLOBAL_ALLFUNC_MUTEX);
            MPID_THREAD_CS_YIELD(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
        }
        MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);

        /* Test for context ID exhaustion: All threads that will participate in
         * the new communicator owned the mask and could not allocate a context
         * ID.  This indicates that either some process has no context IDs
         * available, or that some are available, but the allocation cannot
         * succeed because there is no common context ID. */
        if (*context_id == 0 && st.local_mask[ALL_OWN_MASK_FLAG] == 1) {
            /* --BEGIN ERROR HANDLING-- */
            int nfree = 0;
            int ntotal = 0;
            int minfree;

            if (st.own_mask) {
                MPID_THREAD_CS_ENTER(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
                mask_in_use = 0;
                MPID_THREAD_CS_EXIT(POBJ, MPIR_THREAD_POBJ_CTX_MUTEX);
            }

            context_mask_stats(&nfree, &ntotal);
            if (ignore_id)
                minfree = INT_MAX;
            else
                minfree = nfree;

            if (group_ptr != NULL) {
                int coll_tag = tag | MPIR_Process.tagged_coll_mask;     /* Shift tag into the tagged coll space */
                mpi_errno = MPIR_Allreduce_group(MPI_IN_PLACE, &minfree, 1, MPI_INT, MPI_MIN,
                                                 comm_ptr, group_ptr, coll_tag, &errflag);
            }
            else {
                mpi_errno = MPIR_Allreduce_impl(MPI_IN_PLACE, &minfree, 1, MPI_INT,
                                                MPI_MIN, comm_ptr, &errflag);
            }

            if (minfree > 0) {
                MPIR_ERR_SETANDJUMP3(mpi_errno, MPI_ERR_OTHER,
                                     "**toomanycommfrag", "**toomanycommfrag %d %d %d",
                                     nfree, ntotal, ignore_id);
            }
            else {
                MPIR_ERR_SETANDJUMP3(mpi_errno, MPI_ERR_OTHER,
                                     "**toomanycomm", "**toomanycomm %d %d %d",
                                     nfree, ntotal, ignore_id);
            }
            /* --END ERROR HANDLING-- */
        }
        if (st.first_iter == 1) {
            st.first_iter = 0;
            /* to avoid deadlocks, the element is not added to the list bevore the first iteration */
            if (!ignore_id && *context_id == 0)
                add_gcn_to_list(&st);
        }
    }

  fn_exit:
    if (ignore_id)
        *context_id = MPIU_INVALID_CONTEXT_ID;
    MPIU_DBG_MSG_S(COMM, VERBOSE, "Context mask = %s", context_mask_to_str());
    MPID_MPI_FUNC_EXIT(MPID_STATE_MPIR_GET_CONTEXTID);
    return mpi_errno;

    /* --BEGIN ERROR HANDLING-- */
  fn_fail:
    /* Release the masks */
    if (st.own_mask) {
        /* is it safe to access this without holding the CS? */
        mask_in_use = 0;
    }
    /*If in list, remove it */
    if (!st.first_iter && !ignore_id) {
        if (next_gcn == &st) {
            next_gcn = st.next;
        }
        else {
            for (tmp = next_gcn; tmp->next != &st; tmp = tmp->next);
            tmp->next = st.next;
        }
    }


    goto fn_exit;
    /* --END ERROR HANDLING-- */
}