Ejemplo n.º 1
0
int MPI_Alltoallv(const void *sendbuf, const int sendcounts[],
                  const int sdispls[], MPI_Datatype sendtype,
                  void *recvbuf, const int recvcounts[], const int rdispls[],
                  MPI_Datatype recvtype, MPI_Comm comm)
{
    int i, size, err;

    MEMCHECKER(
        ptrdiff_t recv_ext;
        ptrdiff_t send_ext;

        if (MPI_IN_PLACE != sendbuf) {
            memchecker_datatype(sendtype);
            ompi_datatype_type_extent(sendtype, &send_ext);
        }
        memchecker_datatype(recvtype);
        ompi_datatype_type_extent(recvtype, &recv_ext);

        memchecker_comm(comm);

        size = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm);
        for ( i = 0; i < size; i++ ) {
            if (MPI_IN_PLACE != sendbuf) {
                /* check if send chunks are defined. */
                memchecker_call(&opal_memchecker_base_isdefined,
                                (char *)(sendbuf)+sdispls[i]*send_ext,
                                sendcounts[i], sendtype);
            }
            /* check if receive chunks are addressable. */
            memchecker_call(&opal_memchecker_base_isaddressable,
                            (char *)(recvbuf)+rdispls[i]*recv_ext,
                            recvcounts[i], recvtype);
        }
    );
Ejemplo n.º 2
0
int MPI_Ialltoallw(const void *sendbuf, const int sendcounts[], const int sdispls[],
                   const MPI_Datatype sendtypes[], void *recvbuf, const int recvcounts[],
                   const int rdispls[], const MPI_Datatype recvtypes[], MPI_Comm comm,
                   MPI_Request *request)
{
    int i, size, err;
    size_t sendtype_size, recvtype_size;

    MEMCHECKER(
        ptrdiff_t recv_ext;
        ptrdiff_t send_ext;


        memchecker_comm(comm);

        size = OMPI_COMM_IS_INTER(comm)?ompi_comm_remote_size(comm):ompi_comm_size(comm);
        for ( i = 0; i < size; i++ ) {
            if (MPI_IN_PLACE != sendbuf) {
                memchecker_datatype(sendtypes[i]);
                ompi_datatype_type_extent(sendtypes[i], &send_ext);
                memchecker_call(&opal_memchecker_base_isdefined,
                                (char *)(sendbuf)+sdispls[i]*send_ext,
                                sendcounts[i], sendtypes[i]);
            }

            memchecker_datatype(recvtypes[i]);
            ompi_datatype_type_extent(recvtypes[i], &recv_ext);
            memchecker_call(&opal_memchecker_base_isaddressable,
                            (char *)(recvbuf)+sdispls[i]*recv_ext,
                            recvcounts[i], recvtypes[i]);
        }
    );
Ejemplo n.º 3
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority)
{
    mca_coll_base_module_t *module;
    mca_coll_hcoll_module_t *hcoll_module;
    static bool libhcoll_initialized = false;
    *priority = 0;
    module = NULL;

    if (!mca_coll_hcoll_component.hcoll_enable){
        goto exit;
    }

    if (!libhcoll_initialized)
    {
        /* libhcoll should be initialized here since current implmentation of
           mxm bcol in libhcoll needs world_group fully functional during init

           world_group, i.e. ompi_comm_world, is not ready at hcoll component open
           call */
        opal_progress_register(hcoll_progress_fn);
        int rc = hcoll_init();

        if (HCOLL_SUCCESS != rc){
            mca_coll_hcoll_component.hcoll_enable = 0;
            opal_progress_unregister(hcoll_progress_fn);
            HCOL_VERBOSE(0,"Hcol library init failed");
            return NULL;
        }
        libhcoll_initialized = true;
    }
    hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t);
    if (!hcoll_module){
        goto exit;
    }

    if (ompi_comm_size(comm) < 2 || OMPI_COMM_IS_INTER(comm)){
        goto exit;
    }



    hcoll_module->super.coll_module_enable = mca_coll_hcoll_module_enable;
    hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL;
    hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL;
    hcoll_module->super.coll_allgather = hcoll_collectives.coll_allgather ? mca_coll_hcoll_allgather : NULL;
    hcoll_module->super.coll_allreduce = hcoll_collectives.coll_allreduce ? mca_coll_hcoll_allreduce : NULL;
    hcoll_module->super.coll_alltoall = /*hcoll_collectives.coll_alltoall ? mca_coll_hcoll_alltoall : */  NULL;
    hcoll_module->super.coll_ibarrier = hcoll_collectives.coll_ibarrier ? mca_coll_hcoll_ibarrier : NULL;
    hcoll_module->super.coll_ibcast = hcoll_collectives.coll_ibcast ? mca_coll_hcoll_ibcast : NULL;
    hcoll_module->super.coll_iallgather = hcoll_collectives.coll_iallgather ? mca_coll_hcoll_iallgather : NULL;
    hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL;

    *priority = mca_coll_hcoll_component.hcoll_priority;
    module = &hcoll_module->super;


exit:
    return module;
}
Ejemplo n.º 4
0
static int ompi_comm_allreduce_inter_nb (int *inbuf, int *outbuf,
        int count, struct ompi_op_t *op,
        ompi_comm_cid_context_t *cid_context,
        ompi_request_t **req)
{
    ompi_communicator_t *intercomm = cid_context->comm;
    ompi_comm_allreduce_context_t *context;
    ompi_comm_request_t *request;
    ompi_request_t *subreq;
    int local_rank, rc;

    if (!OMPI_COMM_IS_INTER (cid_context->comm)) {
        return MPI_ERR_COMM;
    }

    request = ompi_comm_request_get ();
    if (OPAL_UNLIKELY(NULL == request)) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    context = ompi_comm_allreduce_context_alloc (inbuf, outbuf, count, op, cid_context);
    if (OPAL_UNLIKELY(NULL == context)) {
        ompi_comm_request_return (request);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    request->context = &context->super;

    /* Allocate temporary arrays */
    local_rank = ompi_comm_rank (intercomm);

    if (0 == local_rank) {
        context->tmpbuf  = (int *) calloc (count, sizeof(int));
        if (OPAL_UNLIKELY (NULL == context->tmpbuf)) {
            ompi_comm_request_return (request);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
    }

    /* Execute the inter-allreduce: the result from the local will be in the buffer of the remote group
     * and vise-versa. */
    rc = intercomm->c_local_comm->c_coll.coll_ireduce (inbuf, context->tmpbuf, count, MPI_INT, op, 0,
            intercomm->c_local_comm, &subreq,
            intercomm->c_local_comm->c_coll.coll_ireduce_module);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
        ompi_comm_request_return (request);
        return rc;
    }

    if (0 == local_rank) {
        ompi_comm_request_schedule_append (request, ompi_comm_allreduce_inter_leader_exchange, &subreq, 1);
    } else {
        ompi_comm_request_schedule_append (request, ompi_comm_allreduce_inter_bcast, &subreq, 1);
    }

    ompi_comm_request_start (request);
    *req = &request->super;

    return OMPI_SUCCESS;
}
Ejemplo n.º 5
0
int MPI_File_open(MPI_Comm comm, char *filename, int amode,
                  MPI_Info info, MPI_File *fh)
{
    int rc;

    if (MPI_PARAM_CHECK) {
        OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
        if (NULL == info || ompi_info_is_freed(info)) {
            return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_INFO,
                                          FUNC_NAME);
        } else if (ompi_comm_invalid(comm)) {
            return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM,
                                          FUNC_NAME);
        }
        if (OMPI_COMM_IS_INTER(comm)) {
            return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_COMM,
                                          FUNC_NAME);
        }
	
    }

    /* Note that MPI-2:9.7 (p265 in the ps; p261 in the pdf) says that
       errors in MPI_FILE_OPEN (before the file handle is created)
       should invoke the default error handler on MPI_FILE_NULL.
       Hence, if we get a file handle out of ompi_file_open(), invoke
       the error handler on that.  If not, invoke the error handler on
       MPI_FILE_NULL. */

    /* The io framework is only initialized lazily.  If it hasn't
       already been initialized, do so now (note that MPI_FILE_OPEN
       and MPI_FILE_DELETE are the only two places that it will be
       initialized). */

    if (!(mca_io_base_components_opened_valid ||
          mca_io_base_components_available_valid)) {
        if (OMPI_SUCCESS != (rc = mca_io_base_open())) {
            return OMPI_ERRHANDLER_INVOKE(MPI_FILE_NULL, rc, FUNC_NAME);
        }
        if (OMPI_SUCCESS != 
            (rc = mca_io_base_find_available(OMPI_ENABLE_PROGRESS_THREADS,
                                             OMPI_ENABLE_MPI_THREADS))) {
            return OMPI_ERRHANDLER_INVOKE(MPI_FILE_NULL, rc, FUNC_NAME);
        }
    }

    /* Create an empty MPI_File handle */

    *fh = MPI_FILE_NULL;
    rc = ompi_file_open(comm, filename, amode, info, fh);

    /* Creating the file handle also selects a component to use,
       creates a module, and calls file_open() on the module.  So
       we're good to go. */

    OMPI_ERRHANDLER_RETURN(rc, *fh, rc, FUNC_NAME);
}
Ejemplo n.º 6
0
ompi_dpm_base_disconnect_obj *ompi_dpm_base_disconnect_init ( ompi_communicator_t *comm)
{
    ompi_dpm_base_disconnect_obj *obj=NULL;
    int ret;
    int i;

    obj = (ompi_dpm_base_disconnect_obj *) calloc(1,sizeof(ompi_dpm_base_disconnect_obj));
    if ( NULL == obj ) {
        printf("Could not allocate disconnect object\n");
        return NULL;
    }

    if ( OMPI_COMM_IS_INTER(comm) ) {
        obj->size = ompi_comm_remote_size (comm);
    } else {
        obj->size = ompi_comm_size (comm);
    }

    obj->comm = comm;
    obj->reqs = (ompi_request_t **) malloc(2*obj->size*sizeof(ompi_request_t *));
    if ( NULL == obj->reqs ) {
        printf("Could not allocate request array for disconnect object\n");
        free (obj);
        return NULL;
    }

    /* initiate all isend_irecvs. We use a dummy buffer stored on
       the object, since we are sending zero size messages anyway. */
    for ( i=0; i < obj->size; i++ ) {
        ret = MCA_PML_CALL(irecv (&(obj->buf), 0, MPI_INT, i,
                     OMPI_COMM_BARRIER_TAG, comm,
                     &(obj->reqs[2*i])));

        if ( OMPI_SUCCESS != ret ) {
            printf("dpm_base_disconnect_init: error %d in irecv to process %d\n", ret, i);
            free (obj->reqs);
            free (obj);
            return NULL;
        }
        ret = MCA_PML_CALL(isend (&(obj->buf), 0, MPI_INT, i,
                     OMPI_COMM_BARRIER_TAG,
                     MCA_PML_BASE_SEND_SYNCHRONOUS,
                     comm, &(obj->reqs[2*i+1])));

        if ( OMPI_SUCCESS != ret ) {
            printf("dpm_base_disconnect_init: error %d in isend to process %d\n", ret, i);
            free (obj->reqs);
            free (obj);
            return NULL;
        }
    }

    /* return handle */
    return obj;
}
Ejemplo n.º 7
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
portals4_comm_query(struct ompi_communicator_t *comm,
        int *priority)
{
    mca_coll_portals4_module_t *portals4_module;
    ptl_process_t              *proc;

    /* For now, we don't support intercommunicators and we probably
       never should handle the single proc case, since there's the
       self module... */
    if (OMPI_COMM_IS_INTER(comm) || ompi_comm_size(comm) < 2) {
        return NULL;
    }

    /* Make sure someone is populating the proc table, since we're not
       in a really good position to do so */
    proc = ompi_proc_local()->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4];
    if (NULL == proc) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: Proc table not previously populated",
                __FILE__, __LINE__);
        return NULL;
    }

    /* check for logical addressing mode in the MTL */
    if (0 == proc->phys.pid) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                "%s:%d: proc->phys.pid==0, so mtl-portals4 is using logical addressing which coll-portals4 doesn't support.  Disqualifying myself.",
                __FILE__, __LINE__);
        return NULL;
    }

    portals4_module = OBJ_NEW(mca_coll_portals4_module_t);
    if (NULL == portals4_module) return NULL;

    *priority = mca_coll_portals4_priority;
    portals4_module->coll_count = 0;
    portals4_module->super.coll_module_enable = portals4_module_enable;
    portals4_module->super.ft_event = NULL;

    portals4_module->super.coll_barrier = ompi_coll_portals4_barrier_intra;
    portals4_module->super.coll_ibarrier = ompi_coll_portals4_ibarrier_intra;

    portals4_module->super.coll_bcast = ompi_coll_portals4_bcast_intra;
    portals4_module->super.coll_ibcast = ompi_coll_portals4_ibcast_intra;

    portals4_module->super.coll_allreduce = ompi_coll_portals4_allreduce_intra;
    portals4_module->super.coll_iallreduce = ompi_coll_portals4_iallreduce_intra;

    portals4_module->super.coll_reduce = ompi_coll_portals4_reduce_intra;
    portals4_module->super.coll_ireduce = ompi_coll_portals4_ireduce_intra;

    return &(portals4_module->super);
}
Ejemplo n.º 8
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
mca_coll_sm_comm_query(struct ompi_communicator_t *comm, int *priority)
{
    mca_coll_sm_module_t *sm_module;

    /* If we're intercomm, or if there's only one process in the
       communicator, or if not all the processes in the communicator
       are not on this node, then we don't want to run */
    if (OMPI_COMM_IS_INTER(comm) || 1 == ompi_comm_size(comm) ||
        !have_local_peers(comm->c_local_group, ompi_comm_size(comm))) {
        opal_output_verbose(10, ompi_coll_base_framework.framework_output,
                            "coll:sm:comm_query (%d/%s): intercomm, comm is too small, or not all peers local; disqualifying myself", comm->c_contextid, comm->c_name);
	return NULL;
    }

    /* Get the priority level attached to this module. If priority is less
     * than or equal to 0, then the module is unavailable. */
    *priority = mca_coll_sm_component.sm_priority;
    if (mca_coll_sm_component.sm_priority <= 0) {
        opal_output_verbose(10, ompi_coll_base_framework.framework_output,
                            "coll:sm:comm_query (%d/%s): priority too low; disqualifying myself", comm->c_contextid, comm->c_name);
	return NULL;
    }

    sm_module = OBJ_NEW(mca_coll_sm_module_t);
    if (NULL == sm_module) {
        return NULL;
    }

    /* All is good -- return a module */
    sm_module->super.coll_module_enable = sm_module_enable;
    sm_module->super.ft_event        = mca_coll_sm_ft_event;
    sm_module->super.coll_allgather  = NULL;
    sm_module->super.coll_allgatherv = NULL;
    sm_module->super.coll_allreduce  = mca_coll_sm_allreduce_intra;
    sm_module->super.coll_alltoall   = NULL;
    sm_module->super.coll_alltoallv  = NULL;
    sm_module->super.coll_alltoallw  = NULL;
    sm_module->super.coll_barrier    = mca_coll_sm_barrier_intra;
    sm_module->super.coll_bcast      = mca_coll_sm_bcast_intra;
    sm_module->super.coll_exscan     = NULL;
    sm_module->super.coll_gather     = NULL;
    sm_module->super.coll_gatherv    = NULL;
    sm_module->super.coll_reduce     = mca_coll_sm_reduce_intra;
    sm_module->super.coll_reduce_scatter = NULL;
    sm_module->super.coll_scan       = NULL;
    sm_module->super.coll_scatter    = NULL;
    sm_module->super.coll_scatterv   = NULL;

    opal_output_verbose(10, ompi_coll_base_framework.framework_output,
                        "coll:sm:comm_query (%d/%s): pick me! pick me!", 
                        comm->c_contextid, comm->c_name);
    return &(sm_module->super);
}
Ejemplo n.º 9
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
mca_coll_fca_comm_query(struct ompi_communicator_t *comm, int *priority)
{
    mca_coll_base_module_t *module;
    int size = ompi_comm_size(comm);
    int local_peers = 0;
    mca_coll_fca_module_t *fca_module;

    *priority = 0;
    module = NULL;

    if (!mca_coll_fca_component.fca_enable)
        goto exit;

    if (size < mca_coll_fca_component.fca_np)
        goto exit;

    if (!ompi_group_have_remote_peers(comm->c_local_group) || OMPI_COMM_IS_INTER(comm))
        goto exit;

    fca_module = OBJ_NEW(mca_coll_fca_module_t);
    if (!fca_module)
        goto exit;

    fca_module->super.coll_module_enable = mca_coll_fca_module_enable;
    fca_module->super.ft_event        = mca_coll_fca_ft_event;
    fca_module->super.coll_allgather  = mca_coll_fca_component.fca_enable_allgather?  mca_coll_fca_allgather  : NULL;
    fca_module->super.coll_allgatherv = mca_coll_fca_component.fca_enable_allgatherv? mca_coll_fca_allgatherv : NULL;
    fca_module->super.coll_allreduce  = mca_coll_fca_component.fca_enable_allreduce?  mca_coll_fca_allreduce  : NULL;
    fca_module->super.coll_alltoall   = mca_coll_fca_component.fca_enable_alltoall?   mca_coll_fca_alltoall   : NULL;
    fca_module->super.coll_alltoallv  = mca_coll_fca_component.fca_enable_alltoallv?  mca_coll_fca_alltoallv  : NULL;
    fca_module->super.coll_alltoallw  = mca_coll_fca_component.fca_enable_alltoallw?  mca_coll_fca_alltoallw  : NULL;
    fca_module->super.coll_barrier    = mca_coll_fca_component.fca_enable_barrier?    mca_coll_fca_barrier    : NULL;
    fca_module->super.coll_bcast      = mca_coll_fca_component.fca_enable_bcast?      mca_coll_fca_bcast      : NULL;
    fca_module->super.coll_exscan     = NULL;
    fca_module->super.coll_gather     = mca_coll_fca_component.fca_enable_gather?     mca_coll_fca_gather     : NULL;
    fca_module->super.coll_gatherv    = mca_coll_fca_component.fca_enable_gatherv?    mca_coll_fca_gatherv    : NULL;
    fca_module->super.coll_reduce     = mca_coll_fca_component.fca_enable_reduce?     mca_coll_fca_reduce     : NULL;
    fca_module->super.coll_reduce_scatter = mca_coll_fca_component.fca_enable_reduce_scatter? mca_coll_fca_reduce_scatter  : NULL;
    fca_module->super.coll_scan       = NULL;
    fca_module->super.coll_scatter    = NULL;
    fca_module->super.coll_scatterv   = NULL;

    *priority = mca_coll_fca_component.fca_priority;
    module = &fca_module->super;

exit:
    FCA_VERBOSE(4, "Query FCA module for comm %p size %d rank %d local_peers=%d: priority=%d %s",
                (void *)comm, size, ompi_comm_rank(comm), local_peers,
                *priority, module ? "enabled" : "disabled");
    return module;
}
Ejemplo n.º 10
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
mca_coll_inter_comm_query(struct ompi_communicator_t *comm, int *priority)
{
    int size, rsize;
    mca_coll_inter_module_t *inter_module;

    /* This module only works for inter-communicators */
    if (!OMPI_COMM_IS_INTER(comm)) {
        return NULL;
    }
 
    /* Get the priority level attached to this module. If priority is less
     * than or equal to 0, then the module is unavailable. */
    *priority = mca_coll_inter_priority_param;
    if (0 >= mca_coll_inter_priority_param) {
	return NULL;
    }

    size = ompi_comm_size(comm);
    rsize = ompi_comm_remote_size(comm);
    
    if ( size < mca_coll_inter_crossover && rsize < mca_coll_inter_crossover) {
	return NULL;
    }

    inter_module = OBJ_NEW(mca_coll_inter_module_t);
    if (NULL == inter_module) {
	return NULL;
    }

    inter_module->super.coll_module_enable = mca_coll_inter_module_enable;
    inter_module->super.ft_event = NULL;

    inter_module->super.coll_allgather  = mca_coll_inter_allgather_inter;
    inter_module->super.coll_allgatherv = mca_coll_inter_allgatherv_inter;
    inter_module->super.coll_allreduce  = mca_coll_inter_allreduce_inter;
    inter_module->super.coll_alltoall   = NULL;
    inter_module->super.coll_alltoallv  = NULL;
    inter_module->super.coll_alltoallw  = NULL;
    inter_module->super.coll_barrier    = NULL;
    inter_module->super.coll_bcast      = mca_coll_inter_bcast_inter;
    inter_module->super.coll_exscan     = NULL;
    inter_module->super.coll_gather     = mca_coll_inter_gather_inter;
    inter_module->super.coll_gatherv    = mca_coll_inter_gatherv_inter;
    inter_module->super.coll_reduce     = mca_coll_inter_reduce_inter;
    inter_module->super.coll_reduce_scatter = NULL;
    inter_module->super.coll_scan       = NULL;
    inter_module->super.coll_scatter    = mca_coll_inter_scatter_inter;
    inter_module->super.coll_scatterv   = mca_coll_inter_scatterv_inter;
 
    return &(inter_module->super);
}
Ejemplo n.º 11
0
ompi_cxx_communicator_type_t ompi_cxx_comm_get_type (MPI_Comm comm)
{
    if (OMPI_COMM_IS_GRAPH(comm)) {
        return OMPI_CXX_COMM_TYPE_GRAPH;
    } else if (OMPI_COMM_IS_CART(comm)) {
        return OMPI_CXX_COMM_TYPE_CART;
    } else if (OMPI_COMM_IS_INTRA(comm)) {
        return OMPI_CXX_COMM_TYPE_INTRACOMM;
    } else if (OMPI_COMM_IS_INTER(comm)) {
        return OMPI_CXX_COMM_TYPE_INTERCOMM;
    }

    return OMPI_CXX_COMM_TYPE_UNKNOWN;
}
Ejemplo n.º 12
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
const mca_coll_base_module_1_0_0_t *
ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority,
                           struct mca_coll_base_comm_t **data)
{
    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:module_tuned query called"));

    *priority = ompi_coll_tuned_priority;

    /* 
     * Choose whether to use [intra|inter] decision functions 
     * and if using fixed OR dynamic rule sets.
     * Right now you cannot mix them, maybe later on it can be changed
     * but this would probably add an extra if and funct call to the path
     */

    if (OMPI_COMM_IS_INTER(comm)) {
        if (ompi_coll_tuned_use_dynamic_rules) {
            OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_dynamic"));
            to_use = &inter_dynamic;
        } else {
            OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using inter_fixed"));
            to_use = &inter_fixed;
        }
    } else { /* is an intra comm */
        /**
         * If the communicator size is less than 2 we have specialized modules
         * to handle the intra collective communications.
         */
        if( ompi_comm_size(comm) < 2) {
            *priority = 0;
            return NULL;
        }
        if (ompi_coll_tuned_use_dynamic_rules) {
            OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_dynamic"));
            to_use = &intra_dynamic;
        } else {
            OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_query using intra_fixed"));
            to_use = &intra_fixed;
        }
    }
    return to_use;
}
Ejemplo n.º 13
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
mca_coll_self_comm_query(struct ompi_communicator_t *comm, 
                         int *priority)
{
    mca_coll_self_module_t *module;

    /* We only work on intracommunicators of size 1 */

    if (!OMPI_COMM_IS_INTER(comm) && 1 == ompi_comm_size(comm)) {
        if (OMPI_SUCCESS != 
            mca_base_param_lookup_int(mca_coll_self_priority_param,
                                      priority)) {
            return NULL;
        }

        module = OBJ_NEW(mca_coll_self_module_t);
        if (NULL == module) return NULL;

        module->super.coll_module_enable = mca_coll_self_module_enable;
        module->super.ft_event        = mca_coll_self_ft_event;
        module->super.coll_allgather  = mca_coll_self_allgather_intra;
        module->super.coll_allgatherv = mca_coll_self_allgatherv_intra;
        module->super.coll_allreduce  = mca_coll_self_allreduce_intra;
        module->super.coll_alltoall   = mca_coll_self_alltoall_intra;
        module->super.coll_alltoallv  = mca_coll_self_alltoallv_intra;
        module->super.coll_alltoallw  = mca_coll_self_alltoallw_intra;
        module->super.coll_barrier    = mca_coll_self_barrier_intra;
        module->super.coll_bcast      = mca_coll_self_bcast_intra;
        module->super.coll_exscan     = mca_coll_self_exscan_intra;
        module->super.coll_gather     = mca_coll_self_gather_intra;
        module->super.coll_gatherv    = mca_coll_self_gatherv_intra;
        module->super.coll_reduce     = mca_coll_self_reduce_intra;
        module->super.coll_reduce_scatter = mca_coll_self_reduce_scatter_intra;
        module->super.coll_scan       = mca_coll_self_scan_intra;
        module->super.coll_scatter    = mca_coll_self_scatter_intra;
        module->super.coll_scatterv   = mca_coll_self_scatterv_intra;

        return &(module->super);
    }

    return NULL;
}
Ejemplo n.º 14
0
int MPI_Win_create(void *base, MPI_Aint size, int disp_unit,
                   MPI_Info info, MPI_Comm comm, MPI_Win *win) 
{
    int ret = MPI_SUCCESS;

    /* argument checking */
    if (MPI_PARAM_CHECK) {
        OMPI_ERR_INIT_FINALIZE(FUNC_NAME);

        if (ompi_comm_invalid (comm)) {
            return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, 
                                          FUNC_NAME);

        } else if (NULL == info || ompi_info_is_freed(info)) {
            return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_INFO,
                                          FUNC_NAME);

        } else if (NULL == win) {
            return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_WIN, FUNC_NAME);
        } else if ( size < 0 ) {
            return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_SIZE, FUNC_NAME);            
        } else if ( disp_unit <= 0 ) {
            return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_DISP, FUNC_NAME);
        }
    }

    /* communicator must be an intracommunicator */
    if (OMPI_COMM_IS_INTER(comm)) {
        return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_COMM, FUNC_NAME);
    }

    /* create window and return */
    ret = ompi_win_create(base, (size_t)size, disp_unit, comm,
                          info, win);
    if (OMPI_SUCCESS != ret) {
        *win = MPI_WIN_NULL;
        return OMPI_ERRHANDLER_INVOKE(comm, MPI_ERR_WIN, FUNC_NAME);
    }

    return MPI_SUCCESS;
}
Ejemplo n.º 15
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
const mca_coll_base_module_1_0_0_t *
mca_coll_basic_comm_query(struct ompi_communicator_t *comm, int *priority,
                          struct mca_coll_base_comm_t **data)
{
    *priority = mca_coll_basic_priority;

    /* Choose whether to use [intra|inter], and [linear|log]-based
     * algorithms. */

    if (OMPI_COMM_IS_INTER(comm)) {
        return &inter_linear;
    } else {
        if (ompi_comm_size(comm) <= mca_coll_basic_crossover) {
            return &intra_linear;
        } else {
            return &intra_log;
        }
    }

    /* Never reach here */
}
int mca_coll_basic_neighbor_alltoallv(void *sbuf, int scounts[], int sdisps[],
                                      struct ompi_datatype_t *sdtype, void *rbuf, int rcounts[],
                                      int rdisps[], struct ompi_datatype_t *rdtype,
                                      struct ompi_communicator_t *comm, mca_coll_base_module_t *module)
{
    if (OMPI_COMM_IS_INTER(comm)) {
        return OMPI_ERR_NOT_SUPPORTED;
    }

    if (OMPI_COMM_IS_CART(comm)) {
        return mca_coll_basic_neighbor_alltoallv_cart (sbuf, scounts, sdisps, sdtype, rbuf,
                                                       rcounts, rdisps, rdtype, comm, module);
    } else if (OMPI_COMM_IS_GRAPH(comm)) {
        return mca_coll_basic_neighbor_alltoallv_graph (sbuf, scounts, sdisps, sdtype, rbuf,
                                                        rcounts, rdisps, rdtype, comm, module);
    } else if (OMPI_COMM_IS_DIST_GRAPH(comm)) {
        return mca_coll_basic_neighbor_alltoallv_dist_graph (sbuf, scounts, sdisps, sdtype, rbuf,
                                                             rcounts, rdisps, rdtype, comm, module);
    }

    return OMPI_ERR_NOT_SUPPORTED;
}
Ejemplo n.º 17
0
int MPI_Graph_get(MPI_Comm comm, int maxindex, int maxedges,
                  int *index, int *edges) 
{
    int err;
    mca_topo_base_module_graph_get_fn_t func;

    /* check the arguments */
    if (MPI_PARAM_CHECK) {
        if (MPI_COMM_NULL == comm) {
            return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_COMM,
                                           FUNC_NAME);
        }
        if (OMPI_COMM_IS_INTER(comm)) {
            return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_COMM,
                                           FUNC_NAME);
        }
        if (!OMPI_COMM_IS_GRAPH(comm)) {
            return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_TOPOLOGY,
                                           FUNC_NAME);
        }
        if (0 > maxindex || 0 > maxedges || NULL == index || NULL == edges) {
            return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_ARG,
                                           FUNC_NAME);
        }
    }
    /* get the function pointer to do the right thing */
    func = comm->c_topo->topo_graph_get;

    /* call the function */
    if ( MPI_SUCCESS != 
            (err = func(comm, maxindex, maxedges, index, edges))) {
        return OMPI_ERRHANDLER_INVOKE(comm, err, FUNC_NAME);
    }
    
    /* All done */
    return MPI_SUCCESS;
}
Ejemplo n.º 18
0
int MPI_Cart_get(MPI_Comm comm, int maxdims, int *dims,
                 int *periods, int *coords) 
{
    /* local variables */
    mca_topo_base_module_cart_get_fn_t func;
    int err;

    /* check the arguments */
    if (MPI_PARAM_CHECK) {
        OMPI_ERR_INIT_FINALIZE(FUNC_NAME);
        if (MPI_COMM_NULL == comm || OMPI_COMM_IS_INTER(comm)) {
            return OMPI_ERRHANDLER_INVOKE (MPI_COMM_WORLD, MPI_ERR_COMM,
                                          FUNC_NAME);
        }
        if (!OMPI_COMM_IS_CART(comm)) {
            return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_TOPOLOGY,
                                          FUNC_NAME);
        }
        if ((0 > maxdims) || (0 < maxdims && 
                              ((NULL == dims) || (NULL == periods) ||
                               (NULL == coords)))) {
            return OMPI_ERRHANDLER_INVOKE (comm, MPI_ERR_ARG,
                                          FUNC_NAME);
        }
    }
    /* get the function pointer to do the right thing */
    func = comm->c_topo->topo_cart_get;

    /* all arguments are checked and now call the back end function */
    if ( MPI_SUCCESS != 
            (err = func(comm, maxdims, dims, periods, coords))) {
        return OMPI_ERRHANDLER_INVOKE(comm, err, FUNC_NAME);
    }
    
    /* All done */
    return MPI_SUCCESS;
}
Ejemplo n.º 19
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority )
{
    int size, rank;
    int color, ncount=0, maxncount;
    int level;
    int ret=OMPI_SUCCESS;
    int ignore_sm=0;
    int detection_alg=0;
    mca_coll_hierarch_module_t *hierarch_module;

    /* This module only works for intra-communicators at the moment */
    if (OMPI_COMM_IS_INTER(comm)) {
        return NULL;
    }
    
    /* Get the priority level attached to this module. If priority is less
     * than or equal to 0, then the module is unavailable. */
    *priority = mca_coll_hierarch_priority_param;
    if (0 >= mca_coll_hierarch_priority_param) {
	return NULL;
    }

    /* This module only works when the BTLs are alive.  If they aren't, time to exit. */
    if (!mca_bml_base_inited()) return NULL;

    size = ompi_comm_size(comm);
    if (size < 3) {
	/* No need for hierarchical collectives for 1 or 2 procs. */
	return NULL;
    }

    hierarch_module = OBJ_NEW(mca_coll_hierarch_module_t);
    if (NULL == hierarch_module) {
	return NULL;
    }

    hierarch_module->super.coll_module_enable = mca_coll_hierarch_module_enable;
    hierarch_module->super.ft_event = mca_coll_hierarch_ft_event;
    hierarch_module->super.coll_allgather  = NULL;
    hierarch_module->super.coll_allgatherv = NULL;
    hierarch_module->super.coll_allreduce  = mca_coll_hierarch_allreduce_intra;
    hierarch_module->super.coll_alltoall   = NULL;
    hierarch_module->super.coll_alltoallv  = NULL;
    hierarch_module->super.coll_alltoallw  = NULL;
    hierarch_module->super.coll_barrier    = mca_coll_hierarch_barrier_intra;
    hierarch_module->super.coll_bcast      = mca_coll_hierarch_bcast_intra;
    hierarch_module->super.coll_exscan     = NULL;
    hierarch_module->super.coll_gather     = NULL;
    hierarch_module->super.coll_gatherv    = NULL;
    hierarch_module->super.coll_reduce     = mca_coll_hierarch_reduce_intra;
    hierarch_module->super.coll_reduce_scatter = NULL;
    hierarch_module->super.coll_scan       = NULL;
    hierarch_module->super.coll_scatter    = NULL;
    hierarch_module->super.coll_scatterv   = NULL;


    /* Check whether we should ignore sm. This might be necessary to take advantage
       of the some ib or gm collectives. */
    ignore_sm = mca_coll_hierarch_ignore_sm_param;

    rank = ompi_comm_rank(comm);

    hierarch_module->hier_num_colorarr  = size;
    hierarch_module->hier_colorarr      = (int *) malloc ( sizeof(int) * size);
    if ( NULL == hierarch_module->hier_colorarr ) {
        *priority = 0;
        return NULL;
    }

    /* 
     * walk through the list of registered protocols, and check which one
     * is feasible. 
     * Later we start with level=0, and introduce the multi-cell check 
     */
    if ( ignore_sm ) {
	mca_coll_hierarch_max_protocol = HIER_MAXPROTOCOL - 1;
    }

    /* if number of levels is not specified, or if it is specified as ALL_LEVELS,
    * proceed in the usual way
    */

    detection_alg = mca_coll_hierarch_detection_alg_param;
    if( TWO_LEVELS == detection_alg ) {
	mca_coll_hierarch_max_protocol = 2;
	if ( mca_coll_hierarch_verbose_param ) {
	    printf("Using two level hierarchy detection\n");
	}
    }

    for ( level = mca_coll_hierarch_max_protocol - 1; level >0 ; level--) {
	if ( ALL_LEVELS == detection_alg ) {
	    mca_coll_hierarch_checkfor_component ( comm, 
						   level, 
						   hier_prot[level], 
						   &color, 
						   &ncount);
	}
        else if  (TWO_LEVELS == detection_alg ) {
            mca_coll_hierarch_checkfor_sm ( comm, &color, &ncount );
	}

        /* This is probably a no-no! but for the moment we agreed with Jeff,
	** that this might be the best solution. These functions emulate an 
        ** allreduce and  an allgather.
	*/
	ret = mca_coll_hierarch_allreduce_tmp (&ncount, &maxncount, 1, MPI_INT, 
					       MPI_MAX, comm );
	if ( OMPI_SUCCESS != ret ) {
	    return NULL;
	}

	if ( 0 == maxncount ) {
	    if ( mca_coll_hierarch_verbose_param ) {
		printf("%s:%d: nobody talks with %s. Continuing to next level.\n",  
		       comm->c_name, rank, hier_prot[level]);
	    }
	    continue;
	}
	else if ( maxncount == (size-1) ) {
	    /* 
	     * everybody can talk to every other process with this protocol, 
	     * no need to continue in the hierarchy tree and for the 
	     * hierarchical component.
	     * Its (size-1) because we do not count ourselves.
	     * maxncount[1] should be zero.
	     */
	    if ( mca_coll_hierarch_verbose_param ) {
		if ( ALL_LEVELS == detection_alg ) {
		    printf("%s:%d: everybody talks with %s. No need to continue\n", 
			   comm->c_name, rank, hier_prot[level]);
		}
		else if ( TWO_LEVELS == detection_alg ) {
		    printf("%s:%d: everybody talks with sm. No need to continue\n",
			   comm->c_name, rank );
		}
	    }
	    goto exit;
	}
	else {
	    if ( mca_coll_hierarch_verbose_param ) {
		printf("%s:%d: %d procs talk with %s. Use this protocol, key %d\n", 
		       comm->c_name, rank, maxncount, hier_prot[level], color);
	    }
	    
	    ret = mca_coll_hierarch_allgather_tmp (&color, 1, MPI_INT, 
						   hierarch_module->hier_colorarr, 1, 
						   MPI_INT, comm );
	    if ( OMPI_SUCCESS != ret ) {
		return NULL;
	    }
	    
	    hierarch_module->hier_level = level;
	    return &(hierarch_module->super);
	}
    }
        
 exit:
    *priority = 0;
    return NULL;
}
Ejemplo n.º 20
0
static int ompi_comm_activate_nb_complete (ompi_comm_request_t *request)
{
    ompi_comm_cid_context_t *context = (ompi_comm_cid_context_t *) request->context;
    int ret;

    /**
     * Check to see if this process is in the new communicator.
     *
     * Specifically, this function is invoked by all proceses in the
     * old communicator, regardless of whether they are in the new
     * communicator or not.  This is because it is far simpler to use
     * MPI collective functions on the old communicator to determine
     * some data for the new communicator (e.g., remote_leader) than
     * to kludge up our own pseudo-collective routines over just the
     * processes in the new communicator.  Hence, *all* processes in
     * the old communicator need to invoke this function.
     *
     * That being said, only processes in the new communicator need to
     * select a coll module for the new communicator.  More
     * specifically, proceses who are not in the new communicator
     * should *not* select a coll module -- for example,
     * ompi_comm_rank(newcomm) returns MPI_UNDEFINED for processes who
     * are not in the new communicator.  This can cause errors in the
     * selection / initialization of a coll module.  Plus, it's
     * wasteful -- processes in the new communicator will end up
     * freeing the new communicator anyway, so we might as well leave
     * the coll selection as NULL (the coll base comm unselect code
     * handles that case properly).
     */
    if (MPI_UNDEFINED == (context->newcomm)->c_local_group->grp_my_rank) {
        return OMPI_SUCCESS;
    }

    /* Let the collectives components fight over who will do
       collective on this new comm.  */
    if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(context->newcomm))) {
        OBJ_RELEASE(context->newcomm);
        *context->newcommp = MPI_COMM_NULL;
        return ret;
    }

    /* For an inter communicator, we have to deal with the potential
     * problem of what is happening if the local_comm that we created
     * has a lower CID than the parent comm. This is not a problem
     * as long as the user calls MPI_Comm_free on the inter communicator.
     * However, if the communicators are not freed by the user but released
     * by Open MPI in MPI_Finalize, we walk through the list of still available
     * communicators and free them one by one. Thus, local_comm is freed before
     * the actual inter-communicator. However, the local_comm pointer in the
     * inter communicator will still contain the 'previous' address of the local_comm
     * and thus this will lead to a segmentation violation. In order to prevent
     * that from happening, we increase the reference counter local_comm
     * by one if its CID is lower than the parent. We cannot increase however
     *  its reference counter if the CID of local_comm is larger than
     * the CID of the inter communicators, since a regular MPI_Comm_free would
     * leave in that the case the local_comm hanging around and thus we would not
     * recycle CID's properly, which was the reason and the cause for this trouble.
     */
    if (OMPI_COMM_IS_INTER(context->newcomm)) {
        if (OMPI_COMM_CID_IS_LOWER(context->newcomm, context->comm)) {
            OMPI_COMM_SET_EXTRA_RETAIN (context->newcomm);
            OBJ_RETAIN (context->newcomm);
        }
    }

    /* done */
    return OMPI_SUCCESS;
}
Ejemplo n.º 21
0
/*
 * Init module on the communicator
 */
const struct mca_coll_base_module_1_0_0_t *
ompi_coll_tuned_module_init(struct ompi_communicator_t *comm)
{
    int size, rank;
    struct mca_coll_base_comm_t *data;
    /* fanout parameters */
    int rc=0;
    int i;


    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init called."));

    /* This routine will become more complex and might have to be */
    /* broken into more sections/function calls */

    /* Order of operations:
     * alloc memory for nb reqs (in case we fall through) 
     * add decision rules if using dynamic rules
     *     compact rules using communicator size info etc
     * build first guess cached topologies (might depend on the rules from above)
     *
     * then attach all to the communicator and return base module funct ptrs 
     */

    /* Allocate the data that hangs off the communicator */

    if (OMPI_COMM_IS_INTER(comm)) {
        size = ompi_comm_remote_size(comm);
    } else {
        size = ompi_comm_size(comm);
    }


    /* 
     * we still malloc data as it is used by the TUNED modules
     * if we don't allocate it and fall back to a BASIC module routine then confuses debuggers 
     * we place any special info after the default data
     *
     * BUT on very large systems we might not be able to allocate all this memory so
     * we do check a MCA parameter to see if if we should allocate this memory
     *
     * The default is set very high  
     *
     */

    /* if we within the memory/size limit, allow preallocated data */


    if (size<=ompi_coll_tuned_preallocate_memory_comm_size_limit) {
        data = (mca_coll_base_comm_t*)malloc(sizeof(struct mca_coll_base_comm_t) +
                                             (sizeof(ompi_request_t *) * size * 2));
  
        if (NULL == data) {
            return NULL;
        }
        data->mcct_reqs = (ompi_request_t **) (data + 1);
        data->mcct_num_reqs = size * 2;
    }
    else {
        data = (mca_coll_base_comm_t*)malloc(sizeof(struct mca_coll_base_comm_t)); 
  
        if (NULL == data) {
            return NULL;
        }
        data->mcct_reqs = (ompi_request_t **) NULL;
        data->mcct_num_reqs = 0;
    }


    /*
     * If using dynamic and you are MPI_COMM_WORLD and you want to use a parameter file..
     * then this effects how much storage space you need
     * (This is a basic version of what will go into V2)
     *
     */


    size = ompi_comm_size(comm);  /* find size so we can (A) decide if to access the file directly */
    /* (B) so we can get our very own customised ompi_coll_com_rule_t ptr */
    /* which only has rules in it for our com size */

    rank = ompi_comm_rank(comm);    /* find rank as only MCW:0 opens any tuned conf files */
    /* actually if they are below a threadhold, they all open it */
    /* have to build a collective in here.. but just for MCW.. */
    /* but we have to make sure we have the same rules everywhere :( */

    /* if using dynamic rules make sure all overrides are NULL before we start override anything accidently */
    if (ompi_coll_tuned_use_dynamic_rules) {
        /* base rules */
        data->all_base_rules = (ompi_coll_alg_rule_t*) NULL;

        /* each collective rule for my com size */
        for (i=0;i<COLLCOUNT;i++) {
            data->com_rules[i] = (ompi_coll_com_rule_t*) NULL;
        }
    }

    /* next dynamic state, recheck all forced rules as well */
    /* warning, we should check to make sure this is really an INTRA comm here... */
    if (ompi_coll_tuned_use_dynamic_rules) {
        ompi_coll_tuned_forced_getvalues         (ompi_coll_tuned_forced_params[ALLREDUCE], &(data->user_forced[ALLREDUCE]));
        ompi_coll_tuned_forced_getvalues         (ompi_coll_tuned_forced_params[ALLTOALL],  &(data->user_forced[ALLTOALL]));
        ompi_coll_tuned_forced_getvalues         (ompi_coll_tuned_forced_params[ALLGATHER],  &(data->user_forced[ALLGATHER]));
        /*         ompi_coll_tuned_forced_getvalues (ompi_coll_tuned_forced_params[ALLTOALLV], &(data->user_forced[ALLTOALLV])); */
        ompi_coll_tuned_forced_getvalues_barrier (ompi_coll_tuned_forced_params[BARRIER],   &(data->user_forced[BARRIER]));
        ompi_coll_tuned_forced_getvalues         (ompi_coll_tuned_forced_params[BCAST],     &(data->user_forced[BCAST]));
        ompi_coll_tuned_forced_getvalues         (ompi_coll_tuned_forced_params[REDUCE],    &(data->user_forced[REDUCE]));
    }


    if (&ompi_mpi_comm_world==comm) {

        if (ompi_coll_tuned_use_dynamic_rules) {

            OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic"));

            if (ompi_coll_tuned_dynamic_rules_filename) {
                OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Opening [%s]", 
                             ompi_coll_tuned_dynamic_rules_filename));
                rc = ompi_coll_tuned_read_rules_config_file (ompi_coll_tuned_dynamic_rules_filename,
                                                             &(data->all_base_rules), COLLCOUNT);
                if (rc>=0) {
                    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Read %d valid rules\n", rc));
                    /* at this point we all have a base set of rules */
                    /* now we can get our customized communicator sized rule set, for each collective */
                    for (i=0;i<COLLCOUNT;i++) {
                        data->com_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size);
                    }
                }
                else { /* failed to read config file, thus make sure its a NULL... */
                    data->all_base_rules = (ompi_coll_alg_rule_t*) NULL;
                }


            } /* end if a config filename exists */

        } /* end if dynamic_rules */

    } /* end if MCW */
  
    /* ok, if using dynamic rules, not MCW and we are just any rank and a base set of rules exist.. ref them */
    /* order of eval is important here, if we are MCW ompi_mpi_comm_world.c_coll_selected_data is NULL still.. */
    if ((ompi_coll_tuned_use_dynamic_rules)&&(!(&ompi_mpi_comm_world==comm))&&
        ((ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules)) {

        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init NOT MCW & Dynamic"));

        /* this will, erm fail if MCW doesn't exist which it should! */
        data->all_base_rules = (ompi_mpi_comm_world.c_coll_selected_data)->all_base_rules;

        /* at this point we all have a base set of rules if they exist atall */
        /* now we can get our customized communicator sized rule set, for each collective */
        for (i=0;i<COLLCOUNT;i++) {
            data->com_rules[i] = ompi_coll_tuned_get_com_rule_ptr (data->all_base_rules, i, size);
        }
    }

    /* 
     * now for the cached topo functions 
     * guess the initial topologies to use rank 0 as root 
     */

    /* general n fan out tree */
    data->cached_ntree = ompi_coll_tuned_topo_build_tree (ompi_coll_tuned_init_tree_fanout, comm, 0); 
    data->cached_ntree_root = 0;
    data->cached_ntree_fanout = ompi_coll_tuned_init_tree_fanout;

    /* binary tree */
    data->cached_bintree = ompi_coll_tuned_topo_build_tree (2, comm, 0); 
    data->cached_bintree_root = 0;

    /* binomial tree */
    data->cached_bmtree = ompi_coll_tuned_topo_build_bmtree (comm, 0);
    data->cached_bmtree_root = 0;

    /* 
     * chains (fanout followed by pipelines)
     * are more difficuilt as the fan out really really depends on message size [sometimes].. 
     * as size gets larger fan-out gets smaller [usually]
     * 
     * will probably change how we cache this later, for now a midsize
     * GEF
     */
    data->cached_chain = ompi_coll_tuned_topo_build_chain (ompi_coll_tuned_init_chain_fanout, comm, 0);
    data->cached_chain_root = 0;
    data->cached_chain_fanout = ompi_coll_tuned_init_chain_fanout;

    /* standard pipeline */
    data->cached_pipeline = ompi_coll_tuned_topo_build_chain (1, comm, 0);
    data->cached_pipeline_root = 0;

    /* All done */

    comm->c_coll_selected_data = data;

    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Tuned is in use"));
    return to_use;
}
Ejemplo n.º 22
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
mca_coll_basic_comm_query(struct ompi_communicator_t *comm, 
                          int *priority)
{
    int size;
    mca_coll_basic_module_t *basic_module;

    basic_module = OBJ_NEW(mca_coll_basic_module_t);
    if (NULL == basic_module) return NULL;

    *priority = mca_coll_basic_priority;

    /* Allocate the data that hangs off the communicator */

    if (OMPI_COMM_IS_INTER(comm)) {
        size = ompi_comm_remote_size(comm);
    } else {
        size = ompi_comm_size(comm);
    }
    basic_module->mccb_num_reqs = size * 2;
    basic_module->mccb_reqs = (ompi_request_t**) 
        malloc(sizeof(ompi_request_t *) * basic_module->mccb_num_reqs);

    /* Choose whether to use [intra|inter], and [linear|log]-based
     * algorithms. */
    basic_module->super.coll_module_enable = mca_coll_basic_module_enable;
    basic_module->super.ft_event = mca_coll_basic_ft_event;

    if (OMPI_COMM_IS_INTER(comm)) {
        basic_module->super.coll_allgather  = mca_coll_basic_allgather_inter;
        basic_module->super.coll_allgatherv = mca_coll_basic_allgatherv_inter;
        basic_module->super.coll_allreduce  = mca_coll_basic_allreduce_inter;
        basic_module->super.coll_alltoall   = mca_coll_basic_alltoall_inter;
        basic_module->super.coll_alltoallv  = mca_coll_basic_alltoallv_inter;
        basic_module->super.coll_alltoallw  = mca_coll_basic_alltoallw_inter;
        basic_module->super.coll_barrier    = mca_coll_basic_barrier_inter_lin;
        basic_module->super.coll_bcast      = mca_coll_basic_bcast_lin_inter;
        basic_module->super.coll_exscan     = NULL;
        basic_module->super.coll_gather     = mca_coll_basic_gather_inter;
        basic_module->super.coll_gatherv    = mca_coll_basic_gatherv_inter;
        basic_module->super.coll_reduce     = mca_coll_basic_reduce_lin_inter;
        basic_module->super.coll_reduce_scatter_block = mca_coll_basic_reduce_scatter_block_inter;
        basic_module->super.coll_reduce_scatter = mca_coll_basic_reduce_scatter_inter;
        basic_module->super.coll_scan       = NULL;
        basic_module->super.coll_scatter    = mca_coll_basic_scatter_inter;
        basic_module->super.coll_scatterv   = mca_coll_basic_scatterv_inter;
    } else if (ompi_comm_size(comm) <= mca_coll_basic_crossover) {
        basic_module->super.coll_allgather  = mca_coll_basic_allgather_intra;
        basic_module->super.coll_allgatherv = mca_coll_basic_allgatherv_intra;
        basic_module->super.coll_allreduce  = mca_coll_basic_allreduce_intra;
        basic_module->super.coll_alltoall   = mca_coll_basic_alltoall_intra;
        basic_module->super.coll_alltoallv  = mca_coll_basic_alltoallv_intra;
        basic_module->super.coll_alltoallw  = mca_coll_basic_alltoallw_intra;
        basic_module->super.coll_barrier    = mca_coll_basic_barrier_intra_lin;
        basic_module->super.coll_bcast      = mca_coll_basic_bcast_lin_intra;
        basic_module->super.coll_exscan     = mca_coll_basic_exscan_intra;
        basic_module->super.coll_gather     = mca_coll_basic_gather_intra;
        basic_module->super.coll_gatherv    = mca_coll_basic_gatherv_intra;
        basic_module->super.coll_reduce     = mca_coll_basic_reduce_lin_intra;
        basic_module->super.coll_reduce_scatter_block = mca_coll_basic_reduce_scatter_block_intra;
        basic_module->super.coll_reduce_scatter = mca_coll_basic_reduce_scatter_intra;
        basic_module->super.coll_scan       = mca_coll_basic_scan_intra;
        basic_module->super.coll_scatter    = mca_coll_basic_scatter_intra;
        basic_module->super.coll_scatterv   = mca_coll_basic_scatterv_intra;
    } else {
        basic_module->super.coll_allgather  = mca_coll_basic_allgather_intra;
        basic_module->super.coll_allgatherv = mca_coll_basic_allgatherv_intra;
        basic_module->super.coll_allreduce  = mca_coll_basic_allreduce_intra;
        basic_module->super.coll_alltoall   = mca_coll_basic_alltoall_intra;
        basic_module->super.coll_alltoallv  = mca_coll_basic_alltoallv_intra;
        basic_module->super.coll_alltoallw  = mca_coll_basic_alltoallw_intra;
        basic_module->super.coll_barrier    = mca_coll_basic_barrier_intra_log;
        basic_module->super.coll_bcast      = mca_coll_basic_bcast_log_intra;
        basic_module->super.coll_exscan     = mca_coll_basic_exscan_intra;
        basic_module->super.coll_gather     = mca_coll_basic_gather_intra;
        basic_module->super.coll_gatherv    = mca_coll_basic_gatherv_intra;
        basic_module->super.coll_reduce     = mca_coll_basic_reduce_log_intra;
        basic_module->super.coll_reduce_scatter_block = mca_coll_basic_reduce_scatter_block_intra;
        basic_module->super.coll_reduce_scatter = mca_coll_basic_reduce_scatter_intra;
        basic_module->super.coll_scan       = mca_coll_basic_scan_intra;
        basic_module->super.coll_scatter    = mca_coll_basic_scatter_intra;
        basic_module->super.coll_scatterv   = mca_coll_basic_scatterv_intra;
    }

    return &(basic_module->super);
}
Ejemplo n.º 23
0
/* Arguments not used in this implementation:
 *  - bridgecomm
 *  - local_leader
 *  - remote_leader
 *  - send_first
 */
static int ompi_comm_allreduce_inter ( int *inbuf, int *outbuf, 
                                       int count, struct ompi_op_t *op, 
                                       ompi_communicator_t *intercomm,
                                       ompi_communicator_t *bridgecomm, 
                                       void* local_leader, 
                                       void* remote_leader, 
                                       int send_first )
{
    int local_rank, rsize;
    int i, rc;
    int *sbuf;
    int *tmpbuf=NULL;
    int *rcounts=NULL, scount=0;
    int *rdisps=NULL;

    if ( &ompi_mpi_op_sum.op != op && &ompi_mpi_op_prod.op != op &&
         &ompi_mpi_op_max.op != op && &ompi_mpi_op_min.op  != op ) {
        return MPI_ERR_OP;
    }

    if ( !OMPI_COMM_IS_INTER (intercomm)) {
        return MPI_ERR_COMM;
    }

    /* Allocate temporary arrays */
    rsize      = ompi_comm_remote_size (intercomm);
    local_rank = ompi_comm_rank ( intercomm );

    tmpbuf  = (int *) malloc ( count * sizeof(int));
    rdisps  = (int *) calloc ( rsize, sizeof(int));
    rcounts = (int *) calloc ( rsize, sizeof(int) );
    if ( OPAL_UNLIKELY (NULL == tmpbuf || NULL == rdisps || NULL == rcounts)) {
        rc = OMPI_ERR_OUT_OF_RESOURCE;
        goto exit;
    }
    
    /* Execute the inter-allreduce: the result of our group will
       be in the buffer of the remote group */
    rc = intercomm->c_coll.coll_allreduce ( inbuf, tmpbuf, count, MPI_INT,
                                            op, intercomm,
                                            intercomm->c_coll.coll_allreduce_module);
    if ( OMPI_SUCCESS != rc ) {
        goto exit;
    }

    if ( 0 == local_rank ) {
        MPI_Request req;

        /* for the allgatherv later */
        scount = count;

        /* local leader exchange their data and determine the overall result
           for both groups */
        rc = MCA_PML_CALL(irecv (outbuf, count, MPI_INT, 0, 
                                 OMPI_COMM_ALLREDUCE_TAG,
                                 intercomm, &req));
        if ( OMPI_SUCCESS != rc ) {
            goto exit;
        }
        rc = MCA_PML_CALL(send (tmpbuf, count, MPI_INT, 0,
                                OMPI_COMM_ALLREDUCE_TAG,
                                MCA_PML_BASE_SEND_STANDARD,
                                intercomm));
        if ( OMPI_SUCCESS != rc ) {
            goto exit;
        }
        rc = ompi_request_wait ( &req, MPI_STATUS_IGNORE );
        if ( OMPI_SUCCESS != rc ) {
            goto exit;
        }

        if ( &ompi_mpi_op_max.op == op ) {
            for ( i = 0 ; i < count; i++ ) {
                if (tmpbuf[i] > outbuf[i]) outbuf[i] = tmpbuf[i];
            }
        }
        else if ( &ompi_mpi_op_min.op == op ) {
            for ( i = 0 ; i < count; i++ ) {
                if (tmpbuf[i] < outbuf[i]) outbuf[i] = tmpbuf[i];
            }
        }
        else if ( &ompi_mpi_op_sum.op == op ) {
            for ( i = 0 ; i < count; i++ ) {
                outbuf[i] += tmpbuf[i];
            }
        }
        else if ( &ompi_mpi_op_prod.op == op ) {
            for ( i = 0 ; i < count; i++ ) {
                outbuf[i] *= tmpbuf[i];
            }
        }
    }
 
    /* distribute the overall result to all processes in the other group.
       Instead of using bcast, we are using here allgatherv, to avoid the
       possible deadlock. Else, we need an algorithm to determine, 
       which group sends first in the inter-bcast and which receives 
       the result first.
    */
    rcounts[0] = count;
    sbuf       = outbuf;
    rc = intercomm->c_coll.coll_allgatherv (sbuf, scount, MPI_INT, outbuf,
                                            rcounts, rdisps, MPI_INT, 
                                            intercomm,
                                            intercomm->c_coll.coll_allgatherv_module);
    
 exit:
    if ( NULL != tmpbuf ) {
        free ( tmpbuf );
    }
    if ( NULL != rcounts ) {
        free ( rcounts );
    }
    if ( NULL != rdisps ) {
        free ( rdisps );
    }
    
    return (rc);
}
Ejemplo n.º 24
0
/* This routine serves two purposes:
 * - the allreduce acts as a kind of Barrier,
 *   which avoids, that we have incoming fragments 
 *   on the new communicator before everybody has set
 *   up the comm structure.
 * - some components (e.g. the collective MagPIe component
 *   might want to generate new communicators and communicate
 *   using the new comm. Thus, it can just be called after
 *   the 'barrier'.
 *
 * The reason that this routine is in comm_cid and not in
 * comm.c is, that this file contains the allreduce implementations
 * which are required, and thus we avoid having duplicate code...
 */
int ompi_comm_activate ( ompi_communicator_t** newcomm, 
                         ompi_communicator_t* comm,
                         ompi_communicator_t* bridgecomm,
                         void* local_leader,
                         void* remote_leader,
                         int mode,
                         int send_first )
{
    int ret = 0;

    int ok=0, gok=0;
    ompi_comm_cid_allredfct* allredfnct;

    /* Step 1: the barrier, after which it is allowed to
     * send messages over the new communicator
     */
    switch (mode)
    {
        case OMPI_COMM_CID_INTRA:
            allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra;
            break;
        case OMPI_COMM_CID_INTER:
            allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_inter;
            break;
        case OMPI_COMM_CID_INTRA_BRIDGE:
            allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_bridge;
            break;
        case OMPI_COMM_CID_INTRA_OOB:
            allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_oob;
            break;
        default:
            return MPI_UNDEFINED;
            break;
    }

    if (MPI_UNDEFINED != (*newcomm)->c_local_group->grp_my_rank) {

	/* Initialize the PML stuff in the newcomm  */
	if ( OMPI_SUCCESS != (ret = MCA_PML_CALL(add_comm(*newcomm))) ) {
	    goto bail_on_error;
	}
	OMPI_COMM_SET_PML_ADDED(*newcomm);
    }


    (allredfnct)(&ok, &gok, 1, MPI_MIN, comm, bridgecomm,
                 local_leader, remote_leader, send_first );



    /**
     * Check to see if this process is in the new communicator.
     *
     * Specifically, this function is invoked by all proceses in the
     * old communicator, regardless of whether they are in the new
     * communicator or not.  This is because it is far simpler to use
     * MPI collective functions on the old communicator to determine
     * some data for the new communicator (e.g., remote_leader) than
     * to kludge up our own pseudo-collective routines over just the
     * processes in the new communicator.  Hence, *all* processes in
     * the old communicator need to invoke this function.
     *
     * That being said, only processes in the new communicator need to
     * select a coll module for the new communicator.  More
     * specifically, proceses who are not in the new communicator
     * should *not* select a coll module -- for example,
     * ompi_comm_rank(newcomm) returns MPI_UNDEFINED for processes who
     * are not in the new communicator.  This can cause errors in the
     * selection / initialization of a coll module.  Plus, it's
     * wasteful -- processes in the new communicator will end up
     * freeing the new communicator anyway, so we might as well leave
     * the coll selection as NULL (the coll base comm unselect code
     * handles that case properly).
     */
    if (MPI_UNDEFINED == (*newcomm)->c_local_group->grp_my_rank) {
        return OMPI_SUCCESS;
    }

    /* Let the collectives components fight over who will do
       collective on this new comm.  */
    if (OMPI_SUCCESS != (ret = mca_coll_base_comm_select(*newcomm))) {
	goto bail_on_error;
    }

    /* For an inter communicator, we have to deal with the potential
     * problem of what is happening if the local_comm that we created
     * has a lower CID than the parent comm. This is not a problem
     * as long as the user calls MPI_Comm_free on the inter communicator.
     * However, if the communicators are not freed by the user but released
     * by Open MPI in MPI_Finalize, we walk through the list of still available
     * communicators and free them one by one. Thus, local_comm is freed before
     * the actual inter-communicator. However, the local_comm pointer in the
     * inter communicator will still contain the 'previous' address of the local_comm
     * and thus this will lead to a segmentation violation. In order to prevent
     * that from happening, we increase the reference counter local_comm
     * by one if its CID is lower than the parent. We cannot increase however
     *  its reference counter if the CID of local_comm is larger than
     * the CID of the inter communicators, since a regular MPI_Comm_free would
     * leave in that the case the local_comm hanging around and thus we would not
     * recycle CID's properly, which was the reason and the cause for this trouble.
     */
    if ( OMPI_COMM_IS_INTER(*newcomm)) {
        if ( OMPI_COMM_CID_IS_LOWER(*newcomm, comm)) {
            OMPI_COMM_SET_EXTRA_RETAIN (*newcomm);
            OBJ_RETAIN (*newcomm);
        }
    }


    return OMPI_SUCCESS;

 bail_on_error:
    OBJ_RELEASE(*newcomm);
    *newcomm = MPI_COMM_NULL;
    return ret;
}                         
Ejemplo n.º 25
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority)
{
    mca_coll_tuned_module_t *tuned_module;

    OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:module_tuned query called"));

    /**
     * No support for inter-communicator yet.
     */
    if (OMPI_COMM_IS_INTER(comm)) {
        *priority = 0;
        return NULL;
    }

    /**
     * If it is inter-communicator and size is less than 2 we have specialized modules
     * to handle the intra collective communications.
     */
    if (OMPI_COMM_IS_INTRA(comm) && ompi_comm_size(comm) < 2) {
        *priority = 0;
        return NULL;
    }

    tuned_module = OBJ_NEW(mca_coll_tuned_module_t);
    if (NULL == tuned_module) return NULL;

    *priority = ompi_coll_tuned_priority;

    /* 
     * Choose whether to use [intra|inter] decision functions 
     * and if using fixed OR dynamic rule sets.
     * Right now you cannot mix them, maybe later on it can be changed
     * but this would probably add an extra if and funct call to the path
     */
    tuned_module->super.coll_module_enable = tuned_module_enable;
    tuned_module->super.ft_event = mca_coll_tuned_ft_event;

    /* By default stick with the fied version of the tuned collectives. Later on,
     * when the module get enabled, set the correct version based on the availability
     * of the dynamic rules.
     */
    tuned_module->super.coll_allgather  = ompi_coll_tuned_allgather_intra_dec_fixed;
    tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_fixed;
    tuned_module->super.coll_allreduce  = ompi_coll_tuned_allreduce_intra_dec_fixed;
    tuned_module->super.coll_alltoall   = ompi_coll_tuned_alltoall_intra_dec_fixed;
    tuned_module->super.coll_alltoallv  = ompi_coll_tuned_alltoallv_intra_dec_fixed;
    tuned_module->super.coll_alltoallw  = NULL;
    tuned_module->super.coll_barrier    = ompi_coll_tuned_barrier_intra_dec_fixed;
    tuned_module->super.coll_bcast      = ompi_coll_tuned_bcast_intra_dec_fixed;
    tuned_module->super.coll_exscan     = NULL;
    tuned_module->super.coll_gather     = ompi_coll_tuned_gather_intra_dec_fixed;
    tuned_module->super.coll_gatherv    = NULL;
    tuned_module->super.coll_reduce     = ompi_coll_tuned_reduce_intra_dec_fixed;
    tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_fixed;
    tuned_module->super.coll_scan       = NULL;
    tuned_module->super.coll_scatter    = ompi_coll_tuned_scatter_intra_dec_fixed;
    tuned_module->super.coll_scatterv   = NULL;

    return &(tuned_module->super);
}
Ejemplo n.º 26
0
/*
 * Init module on the communicator
 */
static int
tuned_module_enable( mca_coll_base_module_t *module,
                     struct ompi_communicator_t *comm )
{
    int size;
    mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t *) module;
    mca_coll_tuned_comm_t *data = NULL;

    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init called."));

    /* Allocate the data that hangs off the communicator */
    if (OMPI_COMM_IS_INTER(comm)) {
        size = ompi_comm_remote_size(comm);
    } else {
        size = ompi_comm_size(comm);
    }

    /**
     * we still malloc data as it is used by the TUNED modules
     * if we don't allocate it and fall back to a BASIC module routine then confuses debuggers 
     * we place any special info after the default data
     *
     * BUT on very large systems we might not be able to allocate all this memory so
     * we do check a MCA parameter to see if if we should allocate this memory
     *
     * The default is set very high  
     *
     */

    /* if we within the memory/size limit, allow preallocated data */
    if( size <= ompi_coll_tuned_preallocate_memory_comm_size_limit ) {
        data = (mca_coll_tuned_comm_t*)malloc(sizeof(struct mca_coll_tuned_comm_t) +
                                              (sizeof(ompi_request_t *) * size * 2));
        if (NULL == data) {
            return OMPI_ERROR;
        }
        data->mcct_reqs = (ompi_request_t **) (data + 1);
        data->mcct_num_reqs = size * 2;
    } else {
        data = (mca_coll_tuned_comm_t*)malloc(sizeof(struct mca_coll_tuned_comm_t)); 
        if (NULL == data) {
            return OMPI_ERROR;
        }
        data->mcct_reqs = (ompi_request_t **) NULL;
        data->mcct_num_reqs = 0;
    }

    if (ompi_coll_tuned_use_dynamic_rules) {
        OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic"));
        /**
         * Reset it to 0, it will be enabled again if we discover any need for dynamic decisions.
         */
        ompi_coll_tuned_use_dynamic_rules = false;

        /**
         * next dynamic state, recheck all forced rules as well
         * warning, we should check to make sure this is really an INTRA comm here...
         */
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLGATHER,
                                      tuned_module->super.coll_allgather  = ompi_coll_tuned_allgather_intra_dec_dynamic);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLGATHERV,
                                      tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_dynamic);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLREDUCE,
                                      tuned_module->super.coll_allreduce  = ompi_coll_tuned_allreduce_intra_dec_dynamic);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLTOALL,
                                      tuned_module->super.coll_alltoall   = ompi_coll_tuned_alltoall_intra_dec_dynamic);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLTOALLV,
                                      tuned_module->super.coll_alltoallv  = ompi_coll_tuned_alltoallv_intra_dec_dynamic);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, ALLTOALLW,
                                      tuned_module->super.coll_alltoallw  = NULL);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, BARRIER,
                                      tuned_module->super.coll_barrier    = ompi_coll_tuned_barrier_intra_dec_dynamic);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, BCAST,
                                      tuned_module->super.coll_bcast      = ompi_coll_tuned_bcast_intra_dec_dynamic);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, EXSCAN,
                                      tuned_module->super.coll_exscan     = NULL);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, GATHER,
                                      tuned_module->super.coll_gather     = ompi_coll_tuned_gather_intra_dec_dynamic);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, GATHERV,
                                      tuned_module->super.coll_gatherv    = NULL);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, REDUCE,
                                      tuned_module->super.coll_reduce     = ompi_coll_tuned_reduce_intra_dec_dynamic);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, REDUCESCATTER,
                                      tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_dynamic);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, SCAN,
                                      tuned_module->super.coll_scan       = NULL);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, SCATTER,
                                      tuned_module->super.coll_scatter    = ompi_coll_tuned_scatter_intra_dec_dynamic);
        COLL_TUNED_EXECUTE_IF_DYNAMIC(data, SCATTERV,
                                      tuned_module->super.coll_scatterv   = NULL);

        if( false == ompi_coll_tuned_use_dynamic_rules ) {
            /* no real need for dynamic decisions */
            OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:module_enable switch back to fixed"
                         " decision by lack of dynamic rules"));
        }
    }
    
    /* general n fan out tree */
    data->cached_ntree = NULL;
    /* binary tree */
    data->cached_bintree = NULL;
    /* binomial tree */
    data->cached_bmtree = NULL;
    /* binomial tree */
    data->cached_in_order_bmtree = NULL;
    /* chains (fanout followed by pipelines) */
    data->cached_chain = NULL;
    /* standard pipeline */
    data->cached_pipeline = NULL;
    /* in-order binary tree */
    data->cached_in_order_bintree = NULL;

    /* All done */
    tuned_module->tuned_data = data;

    OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Tuned is in use"));
    return OMPI_SUCCESS;
}
Ejemplo n.º 27
0
/*
 * Invoked when there's a new communicator that has been created.
 * Look at the communicator and decide which set of functions and
 * priority we want to return.
 */
mca_coll_base_module_t *
mca_coll_hcoll_comm_query(struct ompi_communicator_t *comm, int *priority)
{
    mca_coll_base_module_t *module;
    mca_coll_hcoll_module_t *hcoll_module;
    ompi_attribute_fn_ptr_union_t del_fn;
    ompi_attribute_fn_ptr_union_t copy_fn;
    mca_coll_hcoll_component_t *cm;
    int err;
    int rc;
    cm = &mca_coll_hcoll_component;
    *priority = 0;
    module = NULL;

    if (!cm->hcoll_enable) {
        return NULL;
    }

    if (OMPI_COMM_IS_INTER(comm) || ompi_comm_size(comm) < cm->hcoll_np
            || ompi_comm_size(comm) < 2) {
        return NULL;
    }


    if (!cm->libhcoll_initialized)
    {
        /* libhcoll should be initialized here since current implmentation of
           mxm bcol in libhcoll needs world_group fully functional during init
           world_group, i.e. ompi_comm_world, is not ready at hcoll component open
           call */
        opal_progress_register(mca_coll_hcoll_progress);

        HCOL_VERBOSE(10,"Calling hcoll_init();");
#if HCOLL_API >= HCOLL_VERSION(3,2)
        hcoll_read_init_opts(&cm->init_opts);
        cm->init_opts->base_tag = MCA_COLL_BASE_TAG_HCOLL_BASE;
        cm->init_opts->max_tag = mca_pml.pml_max_tag;
        cm->init_opts->enable_thread_support = ompi_mpi_thread_multiple;

        rc = hcoll_init_with_opts(&cm->init_opts);
#else
        hcoll_set_runtime_tag_offset(MCA_COLL_BASE_TAG_HCOLL_BASE, mca_pml.pml_max_tag);
        rc = hcoll_init();
#endif

        if (HCOLL_SUCCESS != rc) {
            cm->hcoll_enable = 0;
            opal_progress_unregister(mca_coll_hcoll_progress);
            HCOL_ERROR("Hcol library init failed");
            return NULL;
        }

#if HCOLL_API >= HCOLL_VERSION(3,2)
        if (cm->using_mem_hooks && cm->init_opts->mem_hook_needed) {
#else
        if (cm->using_mem_hooks && hcoll_check_mem_release_cb_needed()) {
#endif
            opal_mem_hooks_register_release(mca_coll_hcoll_mem_release_cb, NULL);
        } else {
            cm->using_mem_hooks = 0;
        }

        copy_fn.attr_communicator_copy_fn = (MPI_Comm_internal_copy_attr_function*) MPI_COMM_NULL_COPY_FN;
        del_fn.attr_communicator_delete_fn = hcoll_comm_attr_del_fn;
        err = ompi_attr_create_keyval(COMM_ATTR, copy_fn, del_fn, &hcoll_comm_attr_keyval, NULL ,0, NULL);
        if (OMPI_SUCCESS != err) {
            cm->hcoll_enable = 0;
            hcoll_finalize();
            opal_progress_unregister(mca_coll_hcoll_progress);
            HCOL_ERROR("Hcol comm keyval create failed");
            return NULL;
        }

        if (mca_coll_hcoll_component.derived_types_support_enabled) {
            copy_fn.attr_datatype_copy_fn = (MPI_Type_internal_copy_attr_function *) MPI_TYPE_NULL_COPY_FN;
            del_fn.attr_datatype_delete_fn = hcoll_type_attr_del_fn;
            err = ompi_attr_create_keyval(TYPE_ATTR, copy_fn, del_fn, &hcoll_type_attr_keyval, NULL ,0, NULL);
            if (OMPI_SUCCESS != err) {
                cm->hcoll_enable = 0;
                hcoll_finalize();
                opal_progress_unregister(mca_coll_hcoll_progress);
                HCOL_ERROR("Hcol type keyval create failed");
                return NULL;
            }
        }
        OBJ_CONSTRUCT(&cm->dtypes, opal_free_list_t);
        opal_free_list_init(&cm->dtypes, sizeof(mca_coll_hcoll_dtype_t),
                            8, OBJ_CLASS(mca_coll_hcoll_dtype_t), 0, 0,
                            32, -1, 32, NULL, 0, NULL, NULL, NULL);

    }

    hcoll_module = OBJ_NEW(mca_coll_hcoll_module_t);
    if (!hcoll_module) {
        if (!cm->libhcoll_initialized) {
            cm->hcoll_enable = 0;
            hcoll_finalize();
            opal_progress_unregister(mca_coll_hcoll_progress);
        }
        return NULL;
    }

    hcoll_module->comm = comm;

    HCOL_VERBOSE(10,"Creating hcoll_context for comm %p, comm_id %d, comm_size %d",
                 (void*)comm,comm->c_contextid,ompi_comm_size(comm));

    hcoll_module->hcoll_context =
        hcoll_create_context((rte_grp_handle_t)comm);

    if (NULL == hcoll_module->hcoll_context) {
        HCOL_VERBOSE(1,"hcoll_create_context returned NULL");
        OBJ_RELEASE(hcoll_module);
        if (!cm->libhcoll_initialized) {
            cm->hcoll_enable = 0;
            hcoll_finalize();
            opal_progress_unregister(mca_coll_hcoll_progress);
        }
        return NULL;
    }

    hcoll_module->super.coll_module_enable = mca_coll_hcoll_module_enable;
    hcoll_module->super.coll_barrier = hcoll_collectives.coll_barrier ? mca_coll_hcoll_barrier : NULL;
    hcoll_module->super.coll_bcast = hcoll_collectives.coll_bcast ? mca_coll_hcoll_bcast : NULL;
    hcoll_module->super.coll_allgather = hcoll_collectives.coll_allgather ? mca_coll_hcoll_allgather : NULL;
    hcoll_module->super.coll_allgatherv = hcoll_collectives.coll_allgatherv ? mca_coll_hcoll_allgatherv : NULL;
    hcoll_module->super.coll_allreduce = hcoll_collectives.coll_allreduce ? mca_coll_hcoll_allreduce : NULL;
    hcoll_module->super.coll_alltoall = hcoll_collectives.coll_alltoall ? mca_coll_hcoll_alltoall : NULL;
    hcoll_module->super.coll_alltoallv = hcoll_collectives.coll_alltoallv ? mca_coll_hcoll_alltoallv : NULL;
    hcoll_module->super.coll_gatherv = hcoll_collectives.coll_gatherv ? mca_coll_hcoll_gatherv : NULL;
    hcoll_module->super.coll_reduce = hcoll_collectives.coll_reduce ? mca_coll_hcoll_reduce : NULL;
    hcoll_module->super.coll_ibarrier = hcoll_collectives.coll_ibarrier ? mca_coll_hcoll_ibarrier : NULL;
    hcoll_module->super.coll_ibcast = hcoll_collectives.coll_ibcast ? mca_coll_hcoll_ibcast : NULL;
    hcoll_module->super.coll_iallgather = hcoll_collectives.coll_iallgather ? mca_coll_hcoll_iallgather : NULL;
#if HCOLL_API >= HCOLL_VERSION(3,5)
    hcoll_module->super.coll_iallgatherv = hcoll_collectives.coll_iallgatherv ? mca_coll_hcoll_iallgatherv : NULL;
#else
    hcoll_module->super.coll_iallgatherv = NULL;
#endif
    hcoll_module->super.coll_iallreduce = hcoll_collectives.coll_iallreduce ? mca_coll_hcoll_iallreduce : NULL;
#if HCOLL_API >= HCOLL_VERSION(3,5)
    hcoll_module->super.coll_ireduce = hcoll_collectives.coll_ireduce ? mca_coll_hcoll_ireduce : NULL;
#else
    hcoll_module->super.coll_ireduce = NULL;
#endif
    hcoll_module->super.coll_gather = /*hcoll_collectives.coll_gather ? mca_coll_hcoll_gather :*/ NULL;
    hcoll_module->super.coll_igatherv = hcoll_collectives.coll_igatherv ? mca_coll_hcoll_igatherv : NULL;
    hcoll_module->super.coll_ialltoall = /*hcoll_collectives.coll_ialltoall ? mca_coll_hcoll_ialltoall : */ NULL;
#if HCOLL_API >= HCOLL_VERSION(3,7)
    hcoll_module->super.coll_ialltoallv = hcoll_collectives.coll_ialltoallv ? mca_coll_hcoll_ialltoallv : NULL;
#else
    hcoll_module->super.coll_ialltoallv = NULL;
#endif
    *priority = cm->hcoll_priority;
    module = &hcoll_module->super;

    if (!cm->libhcoll_initialized) {
        cm->libhcoll_initialized = true;
    }

    return module;
}


OBJ_CLASS_INSTANCE(mca_coll_hcoll_module_t,
                   mca_coll_base_module_t,
                   mca_coll_hcoll_module_construct,
                   mca_coll_hcoll_module_destruct);