Exemple #1
0
/**
 * Function for finding and opening either all MCA components, or the one
 * that was specifically requested via a MCA parameter.
 */
static int orte_rmaps_base_open(mca_base_open_flag_t flags)
{
    int rc;

    /* init the globals */
    OBJ_CONSTRUCT(&orte_rmaps_base.selected_modules, opal_list_t);
    orte_rmaps_base.slot_list = NULL;
    orte_rmaps_base.mapping = 0;
    orte_rmaps_base.ranking = 0;
    orte_rmaps_base.device = NULL;

    /* if a topology file was given, then set our topology
     * from it. Even though our actual topology may differ,
     * mpirun only needs to see the compute node topology
     * for mapping purposes
     */
    if (NULL != rmaps_base_topo_file) {
        if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(rmaps_base_topo_file))) {
            orte_show_help("help-orte-rmaps-base.txt", "topo-file", true, rmaps_base_topo_file);
            return ORTE_ERR_SILENT;
        }
    }

    /* check for violations that has to be detected before we parse the mapping option */
    if (NULL != orte_rmaps_base.ppr) {
        orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
                       "--ppr, -ppr", "--map-by ppr:<pattern>",
                       "rmaps_base_pattern, rmaps_ppr_pattern",
                       "rmaps_base_mapping_policy=ppr:<pattern>");
        /* if the mapping policy is NULL, then we can proceed */
        if (NULL == rmaps_base_mapping_policy) {
            asprintf(&rmaps_base_mapping_policy, "ppr:%s", orte_rmaps_base.ppr);
        } else {
            return ORTE_ERR_SILENT;
        }
    }
    if (1 < orte_rmaps_base.cpus_per_rank) {
        orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
                       "--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank",
                       "--map-by <obj>:PE=N, default <obj>=NUMA",
                       "rmaps_base_cpus_per_proc", "rmaps_base_mapping_policy=<obj>:PE=N, default <obj>=NUMA");
    }

    if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping,
                                                                 &orte_rmaps_base.device,
                                                                 rmaps_base_mapping_policy))) {
        return rc;
    }

    if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&orte_rmaps_base.ranking,
                                                                 orte_rmaps_base.mapping,
                                                                 rmaps_base_ranking_policy))) {
        return rc;
    }

    if (rmaps_base_bycore) {
        orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
                       "--bycore, -bycore", "--map-by core",
                       "rmaps_base_bycore", "rmaps_base_mapping_policy=core");
        /* set mapping policy to bycore - error if something else already set */
        if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
            ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYCORE) {
            /* error - cannot redefine the default mapping policy */
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
                           "bycore", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
            return ORTE_ERR_SILENT;
        }
        ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYCORE);
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
        /* set ranking policy to bycore - error if something else already set */
        if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) &&
            ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_CORE) {
            /* error - cannot redefine the default ranking policy */
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking",
                           "bycore", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking));
            return ORTE_ERR_SILENT;
        }
        ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_CORE);
        ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
    }

    if (rmaps_base_byslot) {
        orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
                       "--byslot, -byslot", "--map-by slot",
                       "rmaps_base_byslot", "rmaps_base_mapping_policy=slot");
        /* set mapping policy to byslot - error if something else already set */
        if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
            ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYSLOT) {
            /* error - cannot redefine the default mapping policy */
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
                           "byslot", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
            return ORTE_ERR_SILENT;
        }
        ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSLOT);
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
        /* set ranking policy to byslot - error if something else already set */
        if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) &&
            ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_SLOT) {
            /* error - cannot redefine the default ranking policy */
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking",
                           "byslot", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking));
            return ORTE_ERR_SILENT;
        }
        ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SLOT);
        ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
    }

    if (rmaps_base_bynode) {
        orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
                       "--bynode, -bynode", "--map-by node",
                       "rmaps_base_bynode", "rmaps_base_mapping_policy=node");
        /* set mapping policy to bynode - error if something else already set */
        if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
            ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYNODE) {
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
                           "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
            return ORTE_ERR_SILENT;
        }
        ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNODE);
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
        /* set ranking policy to bynode - error if something else already set */
        if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) &&
            ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_NODE) {
            /* error - cannot redefine the default ranking policy */
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking",
                           "bynode", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking));
            return ORTE_ERR_SILENT;
        }
        ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_NODE);
        ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
    }

    if (1 < orte_rmaps_base.cpus_per_rank) {
        /* if we were asked for multiple cpus/proc, then we have to
         * bind to those cpus - any other binding policy is an
         * error
         */
        if (OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
            if (opal_hwloc_use_hwthreads_as_cpus) {
                if (OPAL_BIND_TO_HWTHREAD != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) &&
                    OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                    orte_show_help("help-orte-rmaps-base.txt", "mismatch-binding", true,
                                   orte_rmaps_base.cpus_per_rank, "use-hwthreads-as-cpus",
                                   opal_hwloc_base_print_binding(opal_hwloc_binding_policy),
                                   "bind-to hwthread");
                    return ORTE_ERR_SILENT;
                }
            } else if (OPAL_BIND_TO_CORE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) &&
                       OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
                orte_show_help("help-orte-rmaps-base.txt", "mismatch-binding", true,
                               orte_rmaps_base.cpus_per_rank, "cores as cpus",
                               opal_hwloc_base_print_binding(opal_hwloc_binding_policy),
                               "bind-to core");
                return ORTE_ERR_SILENT;
            }
        } else {
            if (opal_hwloc_use_hwthreads_as_cpus) {
                OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD);
            } else {
                OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE);
            }
        }
        /* we also need to ensure we are mapping to a high-enough level to have
         * multiple cpus beneath it - by default, we'll go to the NUMA level */
        if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
            if (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYHWTHREAD ||
              (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYCORE &&
              !opal_hwloc_use_hwthreads_as_cpus)) {
                orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low-init", true);
                return ORTE_ERR_SILENT;
            }
        } else {
            opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                "%s rmaps:base pe/rank set - setting mapping to BYNUMA",
                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
            ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNUMA);
            ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
        }
    }

    if (orte_rmaps_base_pernode) {
        /* there is no way to resolve this conflict, so if something else was
         * given, we have no choice but to error out
         */
        if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
                           "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
            return ORTE_ERR_SILENT;
        }
        /* ensure we set the mapping policy to ppr */
        ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR);
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
        /* define the ppr */
        orte_rmaps_base.ppr = strdup("1:node");
    }

    if (0 < orte_rmaps_base_n_pernode) {
        /* there is no way to resolve this conflict, so if something else was
         * given, we have no choice but to error out
         */
        if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
                           "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
            return ORTE_ERR_SILENT;
        }
        /* ensure we set the mapping policy to ppr */
        ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR);
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
        /* define the ppr */
        asprintf(&orte_rmaps_base.ppr, "%d:node", orte_rmaps_base_n_pernode);
    }

    if (0 < orte_rmaps_base_n_persocket) {
        /* there is no way to resolve this conflict, so if something else was
         * given, we have no choice but to error out
         */
        if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
                           "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
            return ORTE_ERR_SILENT;
        }
        /* ensure we set the mapping policy to ppr */
        ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR);
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
        /* define the ppr */
        asprintf(&orte_rmaps_base.ppr, "%d:socket", orte_rmaps_base_n_persocket);
    }

    /* Should we schedule on the local node or not? */
    if (rmaps_base_no_schedule_local) {
        orte_rmaps_base.mapping |= ORTE_MAPPING_NO_USE_LOCAL;
    }

    /* Should we oversubscribe or not? */
    if (rmaps_base_no_oversubscribe) {
        if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
            !(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
            /* error - cannot redefine the default mapping policy */
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
                           "no-oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
            return ORTE_ERR_SILENT;
        }
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN);
    }

    /** force oversubscription permission */
    if (rmaps_base_oversubscribe) {
        if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
            (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
            /* error - cannot redefine the default mapping policy */
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
                           "oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
            return ORTE_ERR_SILENT;
        }
        ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN);
        /* also set the overload allowed flag */
        opal_hwloc_binding_policy |= OPAL_BIND_ALLOW_OVERLOAD;
    }

    /* should we display a detailed (developer-quality) version of the map after determining it? */
    if (rmaps_base_display_devel_map) {
        orte_rmaps_base.display_map = true;
        orte_devel_level_output = true;
    }

    /* should we display a diffable report of proc locations after determining it? */
    if (rmaps_base_display_diffable_map) {
        orte_rmaps_base.display_map = true;
        orte_display_diffable_output = true;
    }

    /* Open up all available components */
    rc = mca_base_framework_components_open(&orte_rmaps_base_framework, flags);

    /* check to see if any component indicated a problem */
    if (ORTE_MAPPING_CONFLICTED & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
        /* the component would have already reported the error, so
         * tell the rest of the chain to shut up
         */
        return ORTE_ERR_SILENT;
    }

    /* All done */
    return rc;
}
static int rank_by(orte_job_t *jdata,
                   orte_app_context_t *app,
                   opal_list_t *nodes,
                   hwloc_obj_type_t target,
                   unsigned cache_level)
{
    hwloc_obj_t obj;
    int num_objs, i, j, rc;
    orte_vpid_t num_ranked=0;
    orte_node_t *node;
    orte_proc_t *proc;
    orte_vpid_t vpid;
    int cnt;
    opal_pointer_array_t objs;
    bool all_done;
    opal_list_item_t *item;
    hwloc_obj_t locale;

    if (ORTE_RANKING_SPAN & ORTE_GET_RANKING_DIRECTIVE(jdata->map->ranking)) {
        return rank_span(jdata, app, nodes, target, cache_level);
    } else if (ORTE_RANKING_FILL & ORTE_GET_RANKING_DIRECTIVE(jdata->map->ranking)) {
        return rank_fill(jdata, app, nodes, target, cache_level);
    }

    /* if ranking is not spanned or filled, then we
     * default to assign ranks sequentially across
     * target objects within a node until that node
     * is fully ranked, and then move on to the next
     * node
     *
     *        Node 0                Node 1
     *    Obj 0     Obj 1       Obj 0     Obj 1
     *     0 2       1 3         8 10      9 11
     *     4 6       5 7        12 14     13 15
     */

    /* setup the pointer array */
    OBJ_CONSTRUCT(&objs, opal_pointer_array_t);
    opal_pointer_array_init(&objs, 2, INT_MAX, 2);

    vpid = jdata->num_procs;
    cnt = 0;
    for (item = opal_list_get_first(nodes);
         item != opal_list_get_end(nodes);
         item = opal_list_get_next(item)) {
        node = (orte_node_t*)item;
        /* get the number of objects - only consider those we can actually use */
        num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology, target,
                                                      cache_level, OPAL_HWLOC_AVAILABLE);
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps:rank_by: found %d objects on node %s with %d procs",
                            num_objs, node->name, (int)node->num_procs);
        if (0 == num_objs) {
            return ORTE_ERR_NOT_SUPPORTED;
        }
        /* collect all the objects */
        for (i=0; i < num_objs; i++) {
            obj = opal_hwloc_base_get_obj_by_type(node->topology, target,
                                                  cache_level, i, OPAL_HWLOC_AVAILABLE);
            opal_pointer_array_set_item(&objs, i, obj);
        }

        /* cycle across the objects, assigning a proc to each one,
         * until all procs have been assigned - unfortunately, since
         * more than this job may be mapped onto a node, the number
         * of procs on the node can't be used to tell us when we
         * are done. Instead, we have to just keep going until all
         * procs are ranked - which means we have to make one extra
         * pass thru the loop
         *
         * Perhaps someday someone will come up with a more efficient
         * algorithm, but this works for now.
         */
        all_done = false;
        while (!all_done && cnt < app->num_procs) {
            all_done = true;
            /* cycle across the objects */
            for (i=0; i < num_objs && cnt < app->num_procs; i++) {
                obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i);

                /* find the next proc on this object */
                for (j=0; j < node->procs->size && cnt < app->num_procs; j++) {
                    if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
                        continue;
                    }
                    /* ignore procs from other jobs */
                    if (proc->name.jobid != jdata->jobid) {
                        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                            "mca:rmaps:rank_by skipping proc %s - from another job, num_ranked %d",
                                            ORTE_NAME_PRINT(&proc->name), num_ranked);
                        continue;
                    }
                    /* ignore procs that are already ranked */
                    if (ORTE_VPID_INVALID != proc->name.vpid) {
                        continue;
                    }
                    /* ignore procs from other apps */
                    if (proc->app_idx != app->idx) {
                        continue;
                    }
                    if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
                        continue;
                    }
                    /* ignore procs on other objects */
                    if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
                        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                            "mca:rmaps:rank_by: proc at position %d is not on object %d",
                                            j, i);
                        continue;
                    }
                    proc->name.vpid = vpid++;
                    if (0 == cnt) {
                        app->first_rank = proc->name.vpid;
                    }
                    cnt++;
                    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                        "mca:rmaps:rank_by: assigned rank %s", ORTE_VPID_PRINT(proc->name.vpid));
                    /* insert the proc into the jdata array - no harm if already there */
                    if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
                        ORTE_ERROR_LOG(rc);
                        return rc;
                    }
                   /* flag that one was mapped */
                    all_done = false;
                    /* track where the highest vpid landed - this is our
                     * new bookmark
                     */
                    jdata->bookmark = node;
                    /* move to next object */
                    break;
                }
            }
        }
    }

    /* cleanup */
    OBJ_DESTRUCT(&objs);

    return ORTE_SUCCESS;
}
int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
                                  orte_app_context_t *app,
                                  opal_list_t *nodes)
{
    orte_job_map_t *map;
    orte_vpid_t vpid;
    int j, cnt;
    orte_node_t *node;
    orte_proc_t *proc;
    int rc;
    opal_list_item_t *item;
    bool one_found;

    map = jdata->map;

    /* start with the rank-by object options - if the object isn't
     * included in the topology, then we obviously cannot rank by it.
     * However, if this was the default ranking policy (as opposed to
     * something given by the user), then fall back to rank-by slot
     */
#if OPAL_HAVE_HWLOC
    if (ORTE_RANK_BY_NUMA == ORTE_GET_RANKING_POLICY(map->ranking)) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps: computing ranks by NUMA for job %s",
                            ORTE_JOBID_PRINT(jdata->jobid));
        if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_NODE, 0))) {
            if (ORTE_ERR_NOT_SUPPORTED == rc &&
                !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
                ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
                goto rankbyslot;
            }
            ORTE_ERROR_LOG(rc);
        }
        return rc;
    }

    if (ORTE_RANK_BY_SOCKET == ORTE_GET_RANKING_POLICY(map->ranking)) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps: computing ranks by socket for job %s",
                            ORTE_JOBID_PRINT(jdata->jobid));
        if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_SOCKET, 0))) {
            if (ORTE_ERR_NOT_SUPPORTED == rc &&
                !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
                ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
                goto rankbyslot;
            }
            ORTE_ERROR_LOG(rc);
        }
        return rc;
    }

    if (ORTE_RANK_BY_L3CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps: computing ranks by L3cache for job %s",
                            ORTE_JOBID_PRINT(jdata->jobid));
        if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 3))) {
            if (ORTE_ERR_NOT_SUPPORTED == rc &&
                !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
                ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
                goto rankbyslot;
            }
            ORTE_ERROR_LOG(rc);
        }
        return rc;
    }

    if (ORTE_RANK_BY_L2CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps: computing ranks by L2cache for job %s",
                            ORTE_JOBID_PRINT(jdata->jobid));
        if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 2))) {
            if (ORTE_ERR_NOT_SUPPORTED == rc &&
                !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
                ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
                goto rankbyslot;
            }
            ORTE_ERROR_LOG(rc);
        }
        return rc;
    }

    if (ORTE_RANK_BY_L1CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps: computing ranks by L1cache for job %s",
                            ORTE_JOBID_PRINT(jdata->jobid));
        if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CACHE, 1))) {
            if (ORTE_ERR_NOT_SUPPORTED == rc &&
                !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
                ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
                goto rankbyslot;
            }
            ORTE_ERROR_LOG(rc);
        }
        return rc;
    }

    if (ORTE_RANK_BY_CORE == ORTE_GET_RANKING_POLICY(map->ranking)) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps: computing ranks by core for job %s",
                            ORTE_JOBID_PRINT(jdata->jobid));
        if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_CORE, 0))) {
            if (ORTE_ERR_NOT_SUPPORTED == rc &&
                !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
                ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
                goto rankbyslot;
            }
            ORTE_ERROR_LOG(rc);
        }
        return rc;
    }

    if (ORTE_RANK_BY_HWTHREAD == ORTE_GET_RANKING_POLICY(map->ranking)) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps: computing ranks by hwthread for job %s",
                            ORTE_JOBID_PRINT(jdata->jobid));
        if (ORTE_SUCCESS != (rc = rank_by(jdata, app, nodes, HWLOC_OBJ_PU, 0))) {
            if (ORTE_ERR_NOT_SUPPORTED == rc &&
                !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
                ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
                goto rankbyslot;
            }
            ORTE_ERROR_LOG(rc);
        }
        return rc;
    }
#endif

    if (ORTE_RANK_BY_NODE == ORTE_GET_RANKING_POLICY(map->ranking) ||
        ORTE_RANK_BY_BOARD == ORTE_GET_RANKING_POLICY(map->ranking)) {
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps:base: computing vpids by node for job %s app %d on %d nodes",
                            ORTE_JOBID_PRINT(jdata->jobid), (int)app->idx,
                            (int)opal_list_get_size(nodes));
        /* bozo check */
        if (0 == opal_list_get_size(nodes)) {
            ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
            return ORTE_ERR_BAD_PARAM;
        }
        /* assign the ranks round-robin across nodes - only one board/node
         * at this time, so they are equivalent
         */
        cnt=0;
        vpid=jdata->num_procs;
        one_found = true;
        while (cnt < app->num_procs && one_found) {
            one_found = false;
            for (item = opal_list_get_first(nodes);
                 item != opal_list_get_end(nodes);
                 item = opal_list_get_next(item)) {
                node = (orte_node_t*)item;
                for (j=0; j < node->procs->size; j++) {
                    if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
                        continue;
                    }
                    /* ignore procs from other jobs */
                    if (proc->name.jobid != jdata->jobid) {
                        continue;
                    }
                    /* ignore procs from other apps */
                    if (proc->app_idx != app->idx) {
                        continue;
                    }
                    if (ORTE_VPID_INVALID != proc->name.vpid) {
                        continue;
                    }
                    proc->name.vpid = vpid++;
                    /* insert the proc into the jdata array - no harm if already there */
                    if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
                        ORTE_ERROR_LOG(rc);
                        return rc;
                    }
                    cnt++;
                    one_found = true;
                    /* track where the highest vpid landed - this is our
                     * new bookmark
                     */
                    jdata->bookmark = node;
                    break;  /* move on to next node */
                }
            }
        }
        if (cnt < app->num_procs) {
            ORTE_ERROR_LOG(ORTE_ERR_FATAL);
            return ORTE_ERR_FATAL;
        }
        return ORTE_SUCCESS;
    }

#if OPAL_HAVE_HWLOC
 rankbyslot:
#endif
    if (ORTE_RANK_BY_SLOT == ORTE_GET_RANKING_POLICY(map->ranking)) {
        /* assign the ranks sequentially */
        opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                            "mca:rmaps:base: computing vpids by slot for job %s",
                            ORTE_JOBID_PRINT(jdata->jobid));
        vpid = jdata->num_procs;
        for (item = opal_list_get_first(nodes);
             item != opal_list_get_end(nodes);
             item = opal_list_get_next(item)) {
            node = (orte_node_t*)item;

            for (j=0; j < node->procs->size; j++) {
                if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
                    continue;
                }
                /* ignore procs from other jobs */
                if (proc->name.jobid != jdata->jobid) {
                    continue;
                }
                /* ignore procs from other apps */
                if (proc->app_idx != app->idx) {
                    continue;
                }
                if (ORTE_VPID_INVALID == proc->name.vpid) {
                    opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
                                        "mca:rmaps:base: assigning rank %s to node %s",
                                        ORTE_VPID_PRINT(vpid), node->name);
                    proc->name.vpid = vpid++;
                   /* track where the highest vpid landed - this is our
                     * new bookmark
                     */
                    jdata->bookmark = node;
                }
                /* insert the proc into the jdata array - no harm if already there */
                if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
                    ORTE_ERROR_LOG(rc);
                    return rc;
                }
            }
        }
        return ORTE_SUCCESS;
    }

    return ORTE_ERR_NOT_IMPLEMENTED;
}
Exemple #4
0
/**
 * Function for finding and opening either all MCA components, or the one
 * that was specifically requested via a MCA parameter.
 */
static int orte_rmaps_base_open(mca_base_open_flag_t flags)
{
    int rc;

    /* init the globals */
    OBJ_CONSTRUCT(&orte_rmaps_base.selected_modules, opal_list_t);
    orte_rmaps_base.ppr = NULL;
    orte_rmaps_base.slot_list = NULL;
    orte_rmaps_base.mapping = 0;
    orte_rmaps_base.ranking = 0;

#if OPAL_HAVE_HWLOC
    /* if a topology file was given, then set our topology
     * from it. Even though our actual topology may differ,
     * mpirun only needs to see the compute node topology
     * for mapping purposes
     */
    if (NULL != rmaps_base_topo_file) {
        if (OPAL_SUCCESS != (rc = opal_hwloc_base_set_topology(rmaps_base_topo_file))) {
            orte_show_help("help-orte-rmaps-base.txt", "topo-file", true, rmaps_base_topo_file);
            return ORTE_ERR_SILENT;
        }
    }
#endif

    if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping,
                                                                 &orte_rmaps_base.device,
                                                                 rmaps_base_mapping_policy))) {
        return rc;
    }

    if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&orte_rmaps_base.ranking,
                                                                 orte_rmaps_base.mapping,
                                                                 rmaps_base_ranking_policy))) {
        return rc;
    }

    if (rmaps_base_byslot) {
        /* set mapping policy to byslot - error if something else already set */
        if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
            ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYSLOT) {
            /* error - cannot redefine the default mapping policy */
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
                           "byslot", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
            return ORTE_ERR_SILENT;
        }
        ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSLOT);
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
        /* set ranking policy to byslot - error if something else already set */
        if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) &&
            ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_SLOT) {
            /* error - cannot redefine the default ranking policy */
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking",
                           "byslot", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking));
            return ORTE_ERR_SILENT;
        }
        ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SLOT);
        ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
    }

    if (rmaps_base_bynode) {
        /* set mapping policy to bynode - error if something else already set */
        if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
            ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) != ORTE_MAPPING_BYNODE) {
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
                           "bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
            return ORTE_ERR_SILENT;
        }
        ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNODE);
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
        /* set ranking policy to bynode - error if something else already set */
        if ((ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) &&
            ORTE_GET_RANKING_POLICY(orte_rmaps_base.ranking) != ORTE_RANK_BY_NODE) {
            /* error - cannot redefine the default ranking policy */
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "ranking",
                           "bynode", orte_rmaps_base_print_ranking(orte_rmaps_base.ranking));
            return ORTE_ERR_SILENT;
        }
        ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_NODE);
        ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
    }

    /* Should we schedule on the local node or not? */
    if (rmaps_base_no_schedule_local) {
        orte_rmaps_base.mapping |= ORTE_MAPPING_NO_USE_LOCAL;
    }

    /* Should we oversubscribe or not? */
    if (rmaps_base_no_oversubscribe) {
        if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
            !(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
            /* error - cannot redefine the default mapping policy */
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
                           "no-oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
            return ORTE_ERR_SILENT;
        }
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN);
    }

    /** force oversubscription permission */
    if (rmaps_base_oversubscribe) {
        if ((ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
            (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
            /* error - cannot redefine the default mapping policy */
            orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
                           "oversubscribe", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
            return ORTE_ERR_SILENT;
        }
        ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
        ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SUBSCRIBE_GIVEN);
    }

    /* should we display a detailed (developer-quality) version of the map after determining it? */
    if (rmaps_base_display_devel_map) {
        orte_rmaps_base.display_map = true;
        orte_devel_level_output = true;
    }
   
    /* should we display a diffable report of proc locations after determining it? */
    if (rmaps_base_display_diffable_map) {
        orte_rmaps_base.display_map = true;
        orte_display_diffable_output = true;
    }

    /* Open up all available components */
    rc = mca_base_framework_components_open(&orte_rmaps_base_framework, flags);

    /* check to see if any component indicated a problem */
    if (ORTE_MAPPING_CONFLICTED & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
        /* the component would have already reported the error, so
         * tell the rest of the chain to shut up
         */
        return ORTE_ERR_SILENT;
    }

    /* All done */
    return rc;
}