Beispiel #1
0
void ompi_info_show_mca_params(opal_list_t *info,
                               const char *type, const char *component, 
                               bool want_internal)
{
    opal_list_item_t *i;
    mca_base_param_info_t *p;
    char *value_string, *empty = "";
    char *message, *content, *tmp;
    int value_int, j;
    mca_base_param_source_t source;
    char *src_file;
    
    for (i = opal_list_get_first(info); i != opal_list_get_last(info);
         i = opal_list_get_next(i)) {
        p = (mca_base_param_info_t*) i;
        
        if (NULL != p->mbpp_type_name && 0 == strcmp(type, p->mbpp_type_name)) {
            if (0 == strcmp(component, ompi_info_component_all) || 
                NULL == p->mbpp_component_name ||
                (NULL != p->mbpp_component_name &&
                 0 == strcmp(component, p->mbpp_component_name))) {
                
                /* Find the source of the value */
                if (OPAL_SUCCESS != 
                    mca_base_param_lookup_source(p->mbpp_index, &source, &src_file)) {
                    continue;
                }
                
                /* Make a char *for the default value.  Invoke a
                 * lookup because it may transform the char *("~/" ->
                 * "<home dir>/") or get the value from the
                 * environment, a file, etc.
                 */
                if (MCA_BASE_PARAM_TYPE_STRING == p->mbpp_type) {
                    mca_base_param_lookup_string(p->mbpp_index,
                                                 &value_string);
                    
                    /* Can't let the char *be NULL because we
                     * assign it to a std::string, below
                     */
                    if (NULL == value_string) {
                        value_string = strdup(empty);
                    }
                } else {
                    mca_base_param_lookup_int(p->mbpp_index, &value_int);
                    asprintf(&value_string, "%d", value_int);
                }
                
                /* Build up the strings to ompi_info_output. */
                
                if (ompi_info_pretty) {
                    asprintf(&message, "MCA %s", p->mbpp_type_name);
                    
                    /* Put in the real, full name (which may be
                     * different than the categorization).
                     */
                    asprintf(&content, "%s \"%s\" (%s: <%s>, data source: ",
                             p->mbpp_read_only ? "information" : "parameter",
                             p->mbpp_full_name,
                             p->mbpp_read_only ? "value" : "current value",
                             (0 == strlen(value_string)) ? "none" : value_string);
                    
                    /* Indicate where the param was set from */
                    switch(source) {
                        case MCA_BASE_PARAM_SOURCE_DEFAULT:
                            asprintf(&tmp, "%sdefault value", content);
                            free(content);
                            content = tmp;
                            break;
                        case MCA_BASE_PARAM_SOURCE_ENV:
                            asprintf(&tmp, "%senvironment or cmdline", content);
                            free(content);
                            content = tmp;
                            break;
                        case MCA_BASE_PARAM_SOURCE_FILE:
                            asprintf(&tmp, "%sfile [%s]", content, src_file);
                            free(content);
                            content = tmp;
                            break;
                        case MCA_BASE_PARAM_SOURCE_OVERRIDE:
                            asprintf(&tmp, "%sAPI override", content);
                            free(content);
                            content = tmp;
                            break;
                        default:
                            break;
                    }
                    
                    /* Is this parameter deprecated? */
                    if (p->mbpp_deprecated) {
                        asprintf(&tmp, "%s, deprecated", content);
                        free(content);
                        content = tmp;
                    }
                    
                    /* Does this parameter have any synonyms? */
                    if (p->mbpp_synonyms_len > 0) {
                        asprintf(&tmp, "%s, synonyms: ", content);
                        free(content);
                        content = tmp;
                        for (j = 0; j < p->mbpp_synonyms_len; ++j) {
                            if (j > 0) {
                                asprintf(&tmp, "%s, %s", content, p->mbpp_synonyms[j]->mbpp_full_name);
                                free(content);
                                content = tmp;
                            } else {
                                asprintf(&tmp, "%s%s", content, p->mbpp_synonyms[j]->mbpp_full_name);
                                free(content);
                                content = tmp;
                            }
                        }
                    }
                    
                    /* Is this parameter a synonym of something else? */
                    else if (NULL != p->mbpp_synonym_parent) {
                        asprintf(&tmp, "%s, synonym of: %s", content, p->mbpp_synonym_parent->mbpp_full_name);
                        free(content);
                        content = tmp;
                    }
                    asprintf(&tmp, "%s)", content);
                    free(content);
                    content = tmp;
                    ompi_info_out(message, message, content);
                    free(message);
                    free(content);
                    
                    /* If we have a help message, ompi_info_output it */
                    if (NULL != p->mbpp_help_msg) {
                        ompi_info_out("", "", p->mbpp_help_msg);
                    }
                } else {
                    /* build the message*/
                    asprintf(&tmp, "mca:%s:%s:param:%s:", p->mbpp_type_name,
                             (NULL == p->mbpp_component_name) ? "base" : p->mbpp_component_name,
                             p->mbpp_full_name);

                    /* Output the value */
                    asprintf(&message, "%svalue", tmp);
                    ompi_info_out(message, message, value_string);
                    free(message);
                    
                    /* Indicate where the param was set from */
                    
                    asprintf(&message, "%sdata_source", tmp);
                    switch(source) {
                        case MCA_BASE_PARAM_SOURCE_DEFAULT:
                            content = strdup("default value");
                            break;
                        case MCA_BASE_PARAM_SOURCE_ENV:
                            content = strdup("environment-cmdline");
                            break;
                        case MCA_BASE_PARAM_SOURCE_FILE:
                            asprintf(&content, "file: %s", src_file);
                            break;
                        case MCA_BASE_PARAM_SOURCE_OVERRIDE:
                            content = strdup("API override");
                            break;
                        default:
                            break;
                    }
                    ompi_info_out(message, message, content);
                    free(message);
                    free(content);
                    
                    /* Output whether it's read only or writable */
                    
                    asprintf(&message, "%sstatus", tmp);
                    content = p->mbpp_read_only ? "read-only" : "writable";
                    ompi_info_out(message, message, content);
                    free(message);
                    
                    /* If it has a help message, ompi_info_output that */
                    
                    if (NULL != p->mbpp_help_msg) {
                        asprintf(&message, "%shelp", tmp);
                        content = p->mbpp_help_msg;
                        ompi_info_out(message, message, content);
                        free(message);
                    }
                    
                    /* Is this parameter deprecated? */
                    asprintf(&message, "%sdeprecated", tmp);
                    content = p->mbpp_deprecated ? "yes" : "no";
                    ompi_info_out(message, message, content);
                    free(message);
                    
                    /* Does this parameter have any synonyms? */
                    if (p->mbpp_synonyms_len > 0) {
                        for (j = 0; j < p->mbpp_synonyms_len; ++j) {
                            asprintf(&message, "%ssynonym:name", tmp);
                            content = p->mbpp_synonyms[j]->mbpp_full_name;
                            ompi_info_out(message, message, content);
                            free(message);
                        }
                    }
                    
                    /* Is this parameter a synonym of something else? */
                    else if (NULL != p->mbpp_synonym_parent) {
                        asprintf(&message, "%ssynonym_of:name", tmp);
                        content = p->mbpp_synonym_parent->mbpp_full_name;
                        ompi_info_out(message, message, content);
                        free(message);
                    }
                }
                
                /* If we allocated the string, then free it */
                
                if (NULL != value_string) {
                    free(value_string);
                }
            }
        }
    }
}
Beispiel #2
0
/*
 * Query the registry for all nodes allocated to a specified app_context
 */
int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr_t *total_num_slots,
                                     orte_app_context_t *app, orte_mapping_policy_t policy,
                                     bool initial_map, bool silent)
{
    opal_list_item_t *item, *next;
    orte_node_t *node, *nd, *nptr;
    orte_std_cntr_t num_slots;
    orte_std_cntr_t i;
    int rc;
    orte_job_t *daemons;
    bool novm;
    opal_list_t nodes;
    char *hosts;

    /** set default answer */
    *total_num_slots = 0;

    /* get the daemon job object */
    daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
    /* see if we have a vm or not */
    novm = orte_get_attribute(&daemons->attributes, ORTE_JOB_NO_VM, NULL, OPAL_BOOL);

    /* if this is NOT a managed allocation, then we use the nodes
     * that were specified for this app - there is no need to collect
     * all available nodes and "filter" them
     */
    if (!orte_managed_allocation) {
        OBJ_CONSTRUCT(&nodes, opal_list_t);
        /* if the app provided a dash-host, and we are not treating
         * them as requested or "soft" locations, then use those nodes
         */
        hosts = NULL;
        if (!orte_soft_locations &&
            orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) {
            OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                                 "%s using dash_host %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts));
            if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, false))) {
                ORTE_ERROR_LOG(rc);
                free(hosts);
                return rc;
            }
            free(hosts);
        } else if (orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)) {
            /* otherwise, if the app provided a hostfile, then use that */
            OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                                 "%s using hostfile %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts));
            if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, hosts))) {
                free(hosts);
                ORTE_ERROR_LOG(rc);
                return rc;
            }
            free(hosts);
        } else if (NULL != orte_rankfile) {
            /* use the rankfile, if provided */
            OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                                 "%s using rankfile %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 orte_rankfile));
            if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
                                                                   orte_rankfile))) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
            if (0 == opal_list_get_size(&nodes)) {
                OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                                     "%s nothing found in given rankfile",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                OBJ_DESTRUCT(&nodes);
                return ORTE_ERR_BAD_PARAM;
            }
        } else if (NULL != orte_default_hostfile) {
            /* fall back to the default hostfile, if provided */
            OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                                 "%s using default hostfile %s",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 orte_default_hostfile));
            if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
                                                                   orte_default_hostfile))) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
            /* this is a special case - we always install a default
             * hostfile, but it is empty. If the user didn't remove it
             * or put something into it, then we will have pursued that
             * option and found nothing. This isn't an error, we just need
             * to add all the known nodes
             */
            if (0 == opal_list_get_size(&nodes)) {
                OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                                     "%s nothing in default hostfile - using known nodes",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                goto addknown;
            }
        } else {
            /* if nothing else was available, then use all known nodes, which
             * will include ourselves
             */
            OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                                 "%s using known nodes",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
            goto addknown;
        }
        /** if we still don't have anything */
        if (0 == opal_list_get_size(&nodes)) {
            if (!silent) {
                orte_show_help("help-orte-rmaps-base.txt",
                               "orte-rmaps-base:no-available-resources",
                               true);
            }
            OBJ_DESTRUCT(&nodes);
            return ORTE_ERR_SILENT;
        }
        /* find the nodes in our node array and assemble them
         * in daemon order if the vm was launched
         */
        while (NULL != (item = opal_list_remove_first(&nodes))) {
            nptr = (orte_node_t*)item;
            nd = NULL;
            for (i=0; i < orte_node_pool->size; i++) {
                if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
                    continue;
                }
                if (0 != strcmp(node->name, nptr->name)) {
                    OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
                                         "NODE %s DOESNT MATCH NODE %s",
                                         node->name, nptr->name));
                    continue;
                }
                /* ignore nodes that are marked as do-not-use for this mapping */
                if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
                    OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
                                         "NODE %s IS MARKED NO_USE", node->name));
                    /* reset the state so it can be used another time */
                    node->state = ORTE_NODE_STATE_UP;
                    continue;
                }
                if (ORTE_NODE_STATE_DOWN == node->state) {
                    OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
                                         "NODE %s IS DOWN", node->name));
                    continue;
                }
                if (ORTE_NODE_STATE_NOT_INCLUDED == node->state) {
                    OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
                                         "NODE %s IS MARKED NO_INCLUDE", node->name));
                    /* not to be used */
                    continue;
                }
                /* if this node wasn't included in the vm (e.g., by -host), ignore it,
                 * unless we are mapping prior to launching the vm
                 */
                if (NULL == node->daemon && !novm) {
                    OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
                                         "NODE %s HAS NO DAEMON", node->name));
                    continue;
                }
                /* retain a copy for our use in case the item gets
                 * destructed along the way
                 */
                OBJ_RETAIN(node);
                if (initial_map) {
                    /* if this is the first app_context we
                     * are getting for an initial map of a job,
                     * then mark all nodes as unmapped
                     */
                    ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
                }
                if (NULL == nd || NULL == nd->daemon ||
                    NULL == node->daemon ||
                    nd->daemon->name.vpid < node->daemon->name.vpid) {
                    /* just append to end */
                    opal_list_append(allocated_nodes, &node->super);
                    nd = node;
                } else {
                    /* starting from end, put this node in daemon-vpid order */
                    while (node->daemon->name.vpid < nd->daemon->name.vpid) {
                        if (opal_list_get_begin(allocated_nodes) == opal_list_get_prev(&nd->super)) {
                            /* insert at beginning */
                            opal_list_prepend(allocated_nodes, &node->super);
                            goto moveon1;
                        }
                        nd = (orte_node_t*)opal_list_get_prev(&nd->super);
                    }
                    item = opal_list_get_next(&nd->super);
                    if (item == opal_list_get_end(allocated_nodes)) {
                        /* we are at the end - just append */
                        opal_list_append(allocated_nodes, &node->super);
                    } else {
                        nd = (orte_node_t*)item;
                        opal_list_insert_pos(allocated_nodes, item, &node->super);
                    }
                moveon1:
                    /* reset us back to the end for the next node */
                    nd = (orte_node_t*)opal_list_get_last(allocated_nodes);
                }
            }
            OBJ_RELEASE(nptr);
        }
        OBJ_DESTRUCT(&nodes);
        /* now prune for usage and compute total slots */
        goto complete;
    }

 addknown:
    /* if the hnp was allocated, include it unless flagged not to */
    if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(policy) & ORTE_MAPPING_NO_USE_LOCAL)) {
        if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) {
            if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
                OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
                                     "HNP IS MARKED NO_USE"));
                /* clear this for future use, but don't include it */
                node->state = ORTE_NODE_STATE_UP;
            } else if (ORTE_NODE_STATE_NOT_INCLUDED != node->state) {
                OBJ_RETAIN(node);
                if (initial_map) {
                    /* if this is the first app_context we
                     * are getting for an initial map of a job,
                     * then mark all nodes as unmapped
                     */
                    ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
                }
                opal_list_append(allocated_nodes, &node->super);
            }
        }
    }

    /* add everything in the node pool that can be used - add them
     * in daemon order, which may be different than the order in the
     * node pool. Since an empty list is passed into us, the list at
     * this point either has the HNP node or nothing, and the HNP
     * node obviously has a daemon on it (us!)
     */
    if (0 == opal_list_get_size(allocated_nodes)) {
        /* the list is empty */
        nd = NULL;
    } else {
        nd = (orte_node_t*)opal_list_get_last(allocated_nodes);
    }
    for (i=1; i < orte_node_pool->size; i++) {
        if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
            /* ignore nodes that are marked as do-not-use for this mapping */
            if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
                OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
                                     "NODE %s IS MARKED NO_USE", node->name));
                /* reset the state so it can be used another time */
                node->state = ORTE_NODE_STATE_UP;
                continue;
            }
            if (ORTE_NODE_STATE_DOWN == node->state) {
                OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
                                     "NODE %s IS MARKED DOWN", node->name));
                continue;
            }
            if (ORTE_NODE_STATE_NOT_INCLUDED == node->state) {
                OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
                                     "NODE %s IS MARKED NO_INCLUDE", node->name));
                /* not to be used */
                continue;
            }
            /* if this node wasn't included in the vm (e.g., by -host), ignore it,
             * unless we are mapping prior to launching the vm
             */
            if (NULL == node->daemon && !novm) {
                OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output,
                                     "NODE %s HAS NO DAEMON", node->name));
                continue;
            }
            /* retain a copy for our use in case the item gets
             * destructed along the way
             */
            OBJ_RETAIN(node);
            if (initial_map) {
                /* if this is the first app_context we
                 * are getting for an initial map of a job,
                 * then mark all nodes as unmapped
                 */
                    ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
            }
            if (NULL == nd || NULL == nd->daemon ||
		NULL == node->daemon ||
                nd->daemon->name.vpid < node->daemon->name.vpid) {
                /* just append to end */
                opal_list_append(allocated_nodes, &node->super);
                nd = node;
            } else {
                /* starting from end, put this node in daemon-vpid order */
                while (node->daemon->name.vpid < nd->daemon->name.vpid) {
                    if (opal_list_get_begin(allocated_nodes) == opal_list_get_prev(&nd->super)) {
                        /* insert at beginning */
                        opal_list_prepend(allocated_nodes, &node->super);
                        goto moveon;
                    }
                    nd = (orte_node_t*)opal_list_get_prev(&nd->super);
                }
                item = opal_list_get_next(&nd->super);
                if (item == opal_list_get_end(allocated_nodes)) {
                    /* we are at the end - just append */
                    opal_list_append(allocated_nodes, &node->super);
                } else {
                    nd = (orte_node_t*)item;
                    opal_list_insert_pos(allocated_nodes, item, &node->super);
                }
            moveon:
                /* reset us back to the end for the next node */
                nd = (orte_node_t*)opal_list_get_last(allocated_nodes);
            }
        }
    }

    OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                         "%s Starting with %d nodes in list",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         (int)opal_list_get_size(allocated_nodes)));

    /** check that anything is here */
    if (0 == opal_list_get_size(allocated_nodes)) {
        if (!silent) {
            orte_show_help("help-orte-rmaps-base.txt",
                           "orte-rmaps-base:no-available-resources",
                           true);
        }
        return ORTE_ERR_SILENT;
    }

    /* filter the nodes thru any hostfile and dash-host options */
    OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                         "%s Filtering thru apps",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    if (ORTE_SUCCESS != (rc = orte_rmaps_base_filter_nodes(app, allocated_nodes, true))
        && ORTE_ERR_TAKE_NEXT_OPTION != rc) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                         "%s Retained %d nodes in list",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         (int)opal_list_get_size(allocated_nodes)));

 complete:
    /* remove all nodes that are already at max usage, and
     * compute the total number of allocated slots while
     * we do so */
    num_slots = 0;
    item  = opal_list_get_first(allocated_nodes);
    while (item != opal_list_get_end(allocated_nodes)) {
        /** save the next pointer in case we remove this node */
        next  = opal_list_get_next(item);
        /** check to see if this node is fully used - remove if so */
        node = (orte_node_t*)item;
        if (0 != node->slots_max && node->slots_inuse > node->slots_max) {
            OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                                 "%s Removing node %s: max %d inuse %d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 node->name, node->slots_max, node->slots_inuse));
            opal_list_remove_item(allocated_nodes, item);
            OBJ_RELEASE(item);  /* "un-retain" it */
        } else if (node->slots <= node->slots_inuse &&
                   (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) {
            /* remove the node as fully used */
            OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                                 "%s Removing node %s slots %d inuse %d",
                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                 node->name, node->slots, node->slots_inuse));
            opal_list_remove_item(allocated_nodes, item);
            OBJ_RELEASE(item);  /* "un-retain" it */
        } else if (node->slots > node->slots_inuse) {
                /* add the available slots */
                OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                                     "%s node %s has %d slots available",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     node->name, node->slots - node->slots_inuse));
                num_slots += node->slots - node->slots_inuse;
        } else if (!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) {
                /* nothing needed to do here - we don't add slots to the
                 * count as we don't have any available. Just let the mapper
                 * do what it needs to do to meet the request
                 */
                OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                                     "%s node %s is fully used, but available for oversubscrition",
                                     ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                                     node->name));
        } else {
            /* if we cannot use it, remove it from list */
            opal_list_remove_item(allocated_nodes, item);
            OBJ_RELEASE(item);  /* "un-retain" it */
        }
        /** go on to next item */
        item = next;
    }

    /* Sanity check to make sure we have resources available */
    if (0 == opal_list_get_size(allocated_nodes)) {
        if (silent) {
            /* let the caller know that the resources exist,
             * but are currently busy
             */
            return ORTE_ERR_RESOURCE_BUSY;
        } else {
            orte_show_help("help-orte-rmaps-base.txt",
                           "orte-rmaps-base:all-available-resources-used", true);
            return ORTE_ERR_SILENT;
        }
    }

    /* pass back the total number of available slots */
    *total_num_slots = num_slots;

    if (4 < opal_output_get_verbosity(orte_rmaps_base_framework.framework_output)) {
        opal_output(0, "AVAILABLE NODES FOR MAPPING:");
        for (item = opal_list_get_first(allocated_nodes);
             item != opal_list_get_end(allocated_nodes);
             item = opal_list_get_next(item)) {
            node = (orte_node_t*)item;
            opal_output(0, "    node: %s daemon: %s", node->name,
                        (NULL == node->daemon) ? "NULL" : ORTE_VPID_PRINT(node->daemon->name.vpid));
        }
    }

    return ORTE_SUCCESS;
}
Beispiel #3
0
/*
 * determine the proper starting point for the next mapping operation
 */
orte_node_t* orte_rmaps_base_get_starting_point(opal_list_t *node_list,
                                                orte_job_t *jdata)
{
    opal_list_item_t *item, *cur_node_item;
    orte_node_t *node, *nd1, *ndmin;
    int overload;

    /* if a bookmark exists from some prior mapping, set us to start there */
    if (NULL != jdata->bookmark) {
        cur_node_item = NULL;
        /* find this node on the list */
        for (item = opal_list_get_first(node_list);
             item != opal_list_get_end(node_list);
             item = opal_list_get_next(item)) {
            node = (orte_node_t*)item;

            if (node->index == jdata->bookmark->index) {
                cur_node_item = item;
                break;
            }
        }
        /* see if we found it - if not, just start at the beginning */
        if (NULL == cur_node_item) {
            cur_node_item = opal_list_get_first(node_list);
        }
    } else {
        /* if no bookmark, then just start at the beginning of the list */
        cur_node_item = opal_list_get_first(node_list);
    }

    OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                         "%s Starting bookmark at node %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ((orte_node_t*)cur_node_item)->name));

    /* is this node fully subscribed? If so, then the first
     * proc we assign will oversubscribe it, so let's look
     * for another candidate
     */
    node = (orte_node_t*)cur_node_item;
    ndmin = node;
    overload = ndmin->slots_inuse - ndmin->slots;
    if (node->slots_inuse >= node->slots) {
        /* work down the list - is there another node that
         * would not be oversubscribed?
         */
        if (cur_node_item != opal_list_get_last(node_list)) {
            item = opal_list_get_next(cur_node_item);
        } else {
            item = opal_list_get_first(node_list);
        }
        nd1 = NULL;
        while (item != cur_node_item) {
            nd1 = (orte_node_t*)item;
            if (nd1->slots_inuse < nd1->slots) {
                /* this node is not oversubscribed! use it! */
                cur_node_item = item;
                goto process;
            }
            /* this one was also oversubscribed, keep track of the
             * node that has the least usage - if we can't
             * find anyone who isn't fully utilized, we will
             * start with the least used node
             */
            if (overload >= (nd1->slots_inuse - nd1->slots)) {
                ndmin = nd1;
                overload = ndmin->slots_inuse - ndmin->slots;
            }
            if (item == opal_list_get_last(node_list)) {
                item = opal_list_get_first(node_list);
            } else {
                item= opal_list_get_next(item);
            }
        }
        /* if we get here, then we cycled all the way around the
         * list without finding a better answer - just use the node
         * that is minimally overloaded if it is better than
         * what we already have
         */
        if (NULL != nd1 &&
            (nd1->slots_inuse - nd1->slots) < (node->slots_inuse - node->slots)) {
            cur_node_item = (opal_list_item_t*)ndmin;
        }
    }

 process:
    OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
                         "%s Starting at node %s",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ((orte_node_t*)cur_node_item)->name));

    /* make life easier - put the bookmark at the top of the list,
     * shifting everything above it to the end of the list while
     * preserving order
     */
    while (cur_node_item != (item = opal_list_get_first(node_list))) {
        opal_list_remove_item(node_list, item);
        opal_list_append(node_list, item);
    }

    return (orte_node_t*)cur_node_item;
}
static int mca_bcol_iboffload_fanin_proxy_progress(
                mca_bcol_iboffload_module_t *iboffload,
                struct mca_bcol_iboffload_collreq_t *coll_request)
{
    int rc = OMPI_SUCCESS, leader_rank = 0;

    struct mqe_task *last_send = NULL;
    mca_bcol_iboffload_task_t *send_task = NULL;
    mca_bcol_iboffload_frag_t *send_fragment = NULL;

    struct mqe_task **mqe_ptr_to_set;
    mca_bcol_iboffload_collfrag_t *coll_fragment;

    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
                         opal_list_get_last(&coll_request->work_requests);

    mqe_ptr_to_set = &coll_fragment->to_post;

    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
               iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
        goto out_of_resources;
    }

    /* post send */
    send_fragment = mca_bcol_iboffload_get_send_frag(coll_request,
                                    leader_rank, coll_request->qp_index, 0,
                                    0, SBUF, MCA_BCOL_IBOFFLOAD_SEND_FRAG_DUMMY);
    if(NULL == send_fragment) {
        IBOFFLOAD_VERBOSE(10, ("Failing for getting and packing send frag.\n"));
        goto out_of_resources;
    }

    send_task = mca_bcol_iboffload_get_send_task(iboffload, leader_rank, MCA_BCOL_IBOFFLOAD_QP_BARRIER,
                                                 send_fragment, coll_fragment, INLINE);
    if(NULL == send_task) {
        IBOFFLOAD_VERBOSE(10, ("Failing for getting send task.\n"));
        goto out_of_resources;
    }

    APPEND_TO_TASKLIST(mqe_ptr_to_set, send_task, last_send);
    MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, send_task);

   /* end of list */
    *mqe_ptr_to_set = NULL;
    assert(NULL != last_send);

    last_send->flags |= MQE_WR_FLAG_SIGNAL;

    coll_fragment->signal_task_wr_id = last_send->wr_id;
    last_send->wr_id = (uint64_t) (uintptr_t) coll_fragment;

    /* post the mwr */
    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
    if(OMPI_SUCCESS != rc) {
        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
        /* Note: need to clean up */
        return rc;
    }

    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);

    return OMPI_SUCCESS;

out_of_resources:
    /* Release all resources */
    IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending"));
    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
}
Beispiel #5
0
/*
 * Lookup a peer by name, create one if it doesn't exist.
 * @param name  Peers globally unique identifier.
 * @retval      Pointer to the newly created struture or NULL on error.
 */
mca_oob_tcp_peer_t * mca_oob_tcp_peer_lookup(const orte_process_name_t* name)
{
    int rc;
    mca_oob_tcp_peer_t * peer, *old;
    if (NULL == name) { /* can't look this one up */
        return NULL;
    }
    
    OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock);
    peer = (mca_oob_tcp_peer_t*)orte_hash_table_get_proc(
       &mca_oob_tcp_component.tcp_peers, name);
    if(NULL != peer && memcmp(&peer->peer_name,name,sizeof(peer->peer_name)) == 0) {
        OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
        return peer;
    }

    /* allocate from free list */
    MCA_OOB_TCP_PEER_ALLOC(peer, rc);
    if(NULL == peer) {
        OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
        return NULL;
    }

    /* initialize peer state */
    peer->peer_name = *name;
    peer->peer_addr = NULL;
    peer->peer_sd = -1;
    peer->peer_state = MCA_OOB_TCP_CLOSED;
    peer->peer_recv_msg = NULL;
    peer->peer_send_msg = NULL;
    peer->peer_retries = 0;

    /* add to lookup table */
    if(ORTE_SUCCESS != orte_hash_table_set_proc(&mca_oob_tcp_component.tcp_peers, 
        &peer->peer_name, peer)) {
        MCA_OOB_TCP_PEER_RETURN(peer);
        OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
        return NULL;
    }

    /* if the peer list is over the maximum size, remove one unsed peer */
    opal_list_prepend(&mca_oob_tcp_component.tcp_peer_list, (opal_list_item_t *) peer);
    if(mca_oob_tcp_component.tcp_peer_limit > 0 &&
       (int)opal_list_get_size(&mca_oob_tcp_component.tcp_peer_list) > 
       mca_oob_tcp_component.tcp_peer_limit) {
        old = (mca_oob_tcp_peer_t *) 
              opal_list_get_last(&mca_oob_tcp_component.tcp_peer_list);
        while(1) {
            if(0 == opal_list_get_size(&(old->peer_send_queue)) &&
               NULL == peer->peer_recv_msg) { 
                opal_list_remove_item(&mca_oob_tcp_component.tcp_peer_list, 
                                      (opal_list_item_t *) old);
                MCA_OOB_TCP_PEER_RETURN(old);
                break;
            } else {
                old = (mca_oob_tcp_peer_t *) opal_list_get_prev(old);
                if(opal_list_get_begin(&mca_oob_tcp_component.tcp_peer_list) == (opal_list_item_t*)old) {
                    /* we tried, but we couldn't find one that was valid to get rid
                     * of. Oh well. */
                    break;
                }
            }
        }
    }
    OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
    return peer;
}
Beispiel #6
0
int main(int argc, char **argv)
{
    /* local variables */
    opal_list_t list, x;
    size_t indx,i,list_size, tmp_size_1, tmp_size_2,size_elements;
    int error_cnt;
    test_data_t *elements, *ele;
    opal_list_item_t *item;

    opal_init();

    test_init("opal_list_t");

    /* initialize list */
    OBJ_CONSTRUCT(&list, opal_list_t);
    OBJ_CONSTRUCT(&x, opal_list_t);

    /* check length of list */
    list_size=opal_list_get_size(&list);
    if( 0 == list_size ) {
        test_success();
    } else {
        test_failure(" opal_list_get_size");
    }

    /* check for empty */
    if (opal_list_is_empty(&list)) {
        test_success();
    } else {
        test_failure(" opal_list_is_empty(empty list)");
    }

    /* create test elements */
    size_elements=4;
    elements=(test_data_t *)malloc(sizeof(test_data_t)*size_elements);
    assert(elements);
    for(i=0 ; i < size_elements ; i++) {
        OBJ_CONSTRUCT(elements + i, test_data_t);
        (elements+i)->data=i;
    }

    /* populate list */
    for(i=0 ; i < size_elements ; i++) {
        opal_list_append(&list,(opal_list_item_t *)(elements+i));
    }
    list_size=opal_list_get_size(&list);
    if( list_size == size_elements ) {
        test_success();
    } else {
        test_failure(" populating list");
    }

    /* checking for empty on non-empty list */
    if (!opal_list_is_empty(&list)) {
        test_success();
    } else {
        test_failure(" opal_list_is_empty(non-empty list)");
    }

    /* check that list is ordered as expected */
    i=0;
    error_cnt=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        if( ele->data != i )
            error_cnt++;
        i++;
    }
    if( 0 == error_cnt ) {
        test_success();
    } else {
        test_failure(" error in list order ");
    }

    /* check opal_list_get_first */
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) opal_list_get_first(&list);
    assert(ele);
    if( 0 == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_get_first");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( size_elements == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_get_first - list size changed ");
    }

    /* check opal_list_get_last */
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) opal_list_get_last(&list);
    assert(ele);
    if( (size_elements-1) == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_get_last");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( size_elements == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_get_first - list size changed ");
    }

    /* check opal_list_remove_first */
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) opal_list_remove_first(&list);
    assert(ele);
    if( 0 == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove_first");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( (size_elements-1) == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove_first - list size changed ");
    }

    /* test opal_list_prepend */
    opal_list_prepend(&list,(opal_list_item_t *)elements);
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) opal_list_get_first(&list);
    assert(ele);
    if( 0 == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_prepend");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( size_elements == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_prepend - list size changed ");
    }

    /* check opal_list_remove_last */
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) opal_list_remove_last(&list);
    assert(ele);
    if( (size_elements-1) == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove_last");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( (size_elements-1) == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove_last - list size changed ");
    }

    /* test opal_list_append */
    opal_list_append(&list,(opal_list_item_t *)(elements+size_elements-1));
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) opal_list_get_last(&list);
    assert(ele);
    if( (size_elements-1) == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_append");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( size_elements == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_append - list size changed ");
    }

    /* remove element from list */
    indx=size_elements/2;
    if( 0 == indx )
        indx=1;
    assert(2 <= size_elements);
    ele = (test_data_t *)NULL;
    ele = (test_data_t *) 
        opal_list_remove_item(&list,(opal_list_item_t *)(elements+indx));
    assert(ele);
    if( (indx-1) == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove - previous");
    }
    ele=(test_data_t *)(((opal_list_item_t *)ele)->opal_list_next);
    if( (indx+1) == ele->data ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove - next");
    }
    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( (size_elements-1) == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove - list size changed incorrectly");
    }

    /* test the insert function */
    i=opal_list_insert(&list,(opal_list_item_t *)(elements+indx),indx);
    if( 1 == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_remove_item \n");
    }

    i=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        i++;
    }
    if( size_elements == i ) {
        test_success();
    } else {
        test_failure(" error in opal_list_insert - incorrect list length");
    }
    i=0;
    error_cnt=0;
    for(ele = (test_data_t *) opal_list_get_first(&list);
            ele != (test_data_t *) opal_list_get_end(&list);
            ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) {
        if( ele->data != i )
            error_cnt++;
        i++;
    }
    if( 0 == error_cnt ) {
        test_success();
    } else {
        test_failure(" error in list order - opal_list_remove_item ");
    }

    /* test the splice and join functions  */
    list_size = opal_list_get_size(&list);
    for (i = 0, item = opal_list_get_first(&list) ; 
         i < list_size / 2 ; ++i, item = opal_list_get_next(item)) {
    }
    opal_list_splice(&x, opal_list_get_end(&x),
                     &list, item, opal_list_get_end(&list));
    tmp_size_1 = opal_list_get_size(&list);
    tmp_size_2 = opal_list_get_size(&x);
    if (tmp_size_1 != i) {
        test_failure(" error in splice (size of list)");
    } else if (tmp_size_2 != list_size - tmp_size_1) {
        test_failure(" error in splice (size of x)");
    } else {
        test_success();
    }

    opal_list_join(&list, opal_list_get_end(&list), &x);
    tmp_size_1 = opal_list_get_size(&list);
    tmp_size_2 = opal_list_get_size(&x);
    if (tmp_size_1 != list_size) {
        test_failure(" error in join (size of list)");
    } else if (tmp_size_2 != 0) {
        test_failure(" error in join (size of x)");
    } else {
        test_success();
    }

    if (NULL != elements) free(elements);

    opal_finalize();

    return test_finalize();
}
static int mca_bcol_iboffload_fanin_leader_progress(
                mca_bcol_iboffload_module_t *iboffload,
                struct mca_bcol_iboffload_collreq_t *coll_request)
{
    int rc = OMPI_SUCCESS, leader_rank = 0, rank,
        sbgp_size = iboffload->ibnet->super.group_size;

    struct mqe_task *last_wait = NULL;

    mca_bcol_iboffload_task_t *wait_task = NULL;
    mca_bcol_iboffload_frag_t *preposted_recv_frag = NULL;

    struct mqe_task **mqe_ptr_to_set;
    mca_bcol_iboffload_collfrag_t *coll_fragment;

    coll_fragment = (mca_bcol_iboffload_collfrag_t *)
                         opal_list_get_last(&coll_request->work_requests);

    mqe_ptr_to_set = &coll_fragment->to_post;

    if (OPAL_UNLIKELY(false == BCOL_IBOFFLOAD_MQ_HAVE_CREDITS(
               iboffload, coll_fragment->mq_index, coll_fragment->mq_credits))) {
        IBOFFLOAD_VERBOSE(10, ("There are not enough credits on MQ.\n"));
        goto out_of_resources;
    }

    for (rank = leader_rank + 1; rank < sbgp_size; ++rank) {
       /* post wait */
        preposted_recv_frag = mca_bcol_iboffload_get_preposted_recv_frag(
                                        iboffload, rank, coll_request->qp_index);
        if(NULL == preposted_recv_frag) {
            IBOFFLOAD_VERBOSE(10, ("Failing for getting prepost recv frag.\n"));
            goto out_of_resources;
        }

        wait_task = mca_bcol_iboffload_get_wait_task(iboffload, rank, 1,
                             preposted_recv_frag, coll_request->qp_index, NULL);
        if(NULL == wait_task) {
            IBOFFLOAD_VERBOSE(10, ("Failing for getting wait task.\n"));
            goto out_of_resources;
        }

        APPEND_TO_TASKLIST(mqe_ptr_to_set, wait_task, last_wait);
        MCA_BCOL_IBOFFLOAD_APPEND_TASK_TO_LIST(coll_fragment->task_next, wait_task);
    }

   /* end of list */
    *mqe_ptr_to_set = NULL;

    last_wait->flags |= MQE_WR_FLAG_SIGNAL;

    coll_fragment->signal_task_wr_id = last_wait->wr_id;
    last_wait->wr_id = (uint64_t) (uintptr_t) coll_fragment;

    /* post the mwr */
    rc = mca_bcol_iboffload_post_mqe_tasks(iboffload, coll_fragment->to_post);
    if(OMPI_SUCCESS != rc) {
        IBOFFLOAD_VERBOSE(10, ("MQE task posting failing.\n"));
        /* Note: need to clean up */
        return rc;
    }

    MCA_BCOL_UPDATE_ORDER_COUNTER(&iboffload->super, coll_request->order_info);

    return OMPI_SUCCESS;

out_of_resources:
    /* Release all resources */
    IBOFFLOAD_VERBOSE(10, ("Fan-in, adding collfrag to collfrag_pending"));
    return mca_bcol_iboffload_free_resources_and_move_to_pending(coll_fragment, iboffload);
}
Beispiel #8
0
/* during init, we setup two channels for both xmit and recv:
 * (a) a public address announcement channel. There are two variants
 *     of this:
 *         (1) system processes - e.g., daemons, tools. This channel
 *             is reserved solely for their use in performing admin
 *             functions
 *         (2) application processes. This channel is used to announce
 *             their existence and contact info for auto-wireup
 * (b) our own group's channel, which is where our own output
 *     will be sent. At this time, we assume that we always
 *     want to hear our peers, so this channels is also
 *     bidirectional
 *
 * In addition, the HNP opens a third channel which is used solely
 * for cmd-control purposes. This is where a tool, for example, might
 * send a cmd to the HNP to take some action - there is no point in
 * having that cmd echo around to every daemon and/or other tool
 * in the system.
 */
static int init(void)
{
    int rc;
    
    if (init_completed) {
        return ORTE_SUCCESS;
    }
    
    OPAL_OUTPUT_VERBOSE((2, orte_rmcast_base.rmcast_output, "%s rmcast:udp: init called",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
    
    /* setup local ctl */
    OBJ_CONSTRUCT(&ctl, orte_thread_ctl_t);

    /* flag that we are unreliable and need help */
    orte_rmcast_base.unreliable_xport = true;

    if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_SCHEDULER) {
        /* open the system channel - it will be our input/output channel as well */
        if (ORTE_SUCCESS != (rc = open_channel(ORTE_RMCAST_SYS_CHANNEL,
                                               "SYSTEM",
                                               NULL, -1, NULL, ORTE_RMCAST_BIDIR))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
        orte_rmcast_base.my_input_channel = (rmcast_base_channel_t*)opal_list_get_last(&orte_rmcast_base.channels);
        orte_rmcast_base.my_output_channel = orte_rmcast_base.my_input_channel;

        /* open the heartbeat channel */
        if (ORTE_SUCCESS != (rc = open_channel(ORTE_RMCAST_HEARTBEAT_CHANNEL, "heartbeat",
                                               NULL, -1, NULL, ORTE_RMCAST_BIDIR))) {
            ORTE_ERROR_LOG(rc);
            return rc;
        }
    } else if (ORTE_PROC_IS_APP) {
        /* setup our grp xmit/recv channels, if given */
        if (NULL != orte_rmcast_base.my_group_name) {
            if (ORTE_SUCCESS != (rc = open_channel(orte_rmcast_base.my_group_number,
                                                   "recv",
                                                   NULL, -1, NULL, ORTE_RMCAST_BIDIR))) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
            orte_rmcast_base.my_input_channel = (rmcast_base_channel_t*)opal_list_get_last(&orte_rmcast_base.channels);
            if (ORTE_SUCCESS != (rc = open_channel(orte_rmcast_base.my_group_number+1,
                                                   "xmit",
                                                   NULL, -1, NULL, ORTE_RMCAST_BIDIR))) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
            orte_rmcast_base.my_output_channel = (rmcast_base_channel_t*)opal_list_get_last(&orte_rmcast_base.channels);
        }
    }

    /* setup the recv for missed message replacement */
    if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
                                                      ORTE_RML_TAG_MISSED_MSG,
                                                      ORTE_RML_PERSISTENT,
                                                      resend_data,
                                                      NULL))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }
    if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
                                                      ORTE_RML_TAG_MULTICAST,
                                                      ORTE_RML_PERSISTENT,
                                                      missed_msg,
                                                      NULL))) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    /* start the recv threads */
    if (ORTE_SUCCESS != (rc = orte_rmcast_base_start_threads())) {
        ORTE_ERROR_LOG(rc);
        return rc;
    }

    init_completed = true;
    comm_enabled = true;

    return ORTE_SUCCESS;
}
Beispiel #9
0
/*
 * register memory
 */
int mca_mpool_rdma_register(mca_mpool_base_module_t *mpool, void *addr,
                              size_t size, uint32_t flags,
                              mca_mpool_base_registration_t **reg)
{
    mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool;
    mca_mpool_base_registration_t *rdma_reg;
    ompi_free_list_item_t *item;
    unsigned char *base, *bound;
    int rc;

    /* if cache bypass is requested don't use the cache */
    if(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS) {
        return register_cache_bypass(mpool, addr, size, flags, reg);
    }

    base = down_align_addr(addr, mca_mpool_base_page_size_log);
    bound = up_align_addr((void*)((char*) addr + size - 1),
             mca_mpool_base_page_size_log);
    OPAL_THREAD_LOCK(&mpool->rcache->lock);
    /* look through existing regs if not persistent registration requested.
     * Persistent registration are always registered and placed in the cache */
    if(!(flags & MCA_MPOOL_FLAGS_PERSIST)) {
        /* check to see if memory is registered */
        mpool->rcache->rcache_find(mpool->rcache, addr, size, reg);
        if(*reg != NULL &&
                (mca_mpool_rdma_component.leave_pinned ||
                 ((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) ||
                 ((*reg)->base == base && (*reg)->bound == bound))) {
            if(0 == (*reg)->ref_count &&
                    mca_mpool_rdma_component.leave_pinned) {
                opal_list_remove_item(&mpool_rdma->mru_list,
                        (opal_list_item_t*)(*reg));
            }
            mpool_rdma->stat_cache_hit++;
            (*reg)->ref_count++;
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            return MPI_SUCCESS;
        }

        mpool_rdma->stat_cache_miss++;
        *reg = NULL; /* in case previous find found something */

        /* If no suitable registration is in cache and leave_pinned isn't
         * set and size of registration cache is unlimited don't use the cache.
         * This is optimisation in case limit is not set. If limit is set we
         * have to put registration into the cache to determine when we hit
         * memory registration limit.
         * NONE: cache is still used for persistent registrations so previous
         * find can find something */
        if(!mca_mpool_rdma_component.leave_pinned &&
                 mca_mpool_rdma_component.rcache_size_limit == 0) {
            OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
            return register_cache_bypass(mpool, addr, size, flags, reg);
        }
    }

    OMPI_FREE_LIST_GET(&mpool_rdma->reg_list, item, rc);
    if(OMPI_SUCCESS != rc) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        return rc;
    }
    rdma_reg = (mca_mpool_base_registration_t*)item;

    rdma_reg->mpool = mpool;
    rdma_reg->base = base;
    rdma_reg->bound = bound;
    rdma_reg->flags = flags;

    while((rc = mpool->rcache->rcache_insert(mpool->rcache, rdma_reg,
             mca_mpool_rdma_component.rcache_size_limit)) ==
            OMPI_ERR_TEMP_OUT_OF_RESOURCE) {
        mca_mpool_base_registration_t *old_reg;
        /* try to remove one unused reg and retry */
        old_reg = (mca_mpool_base_registration_t*)
            opal_list_get_last(&mpool_rdma->mru_list);
        if(opal_list_get_end(&mpool_rdma->mru_list) !=
                (opal_list_item_t*)old_reg) {
            rc = dereg_mem(mpool, old_reg);
            if(MPI_SUCCESS == rc) {
                mpool->rcache->rcache_delete(mpool->rcache, old_reg);
                opal_list_remove_item(&mpool_rdma->mru_list,
                        (opal_list_item_t*)old_reg);
                OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list,
                        (ompi_free_list_item_t*)old_reg);
                mpool_rdma->stat_evicted++;
            } else
                break;
        } else
            break;
    }

    if(rc != OMPI_SUCCESS) {
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
        return rc;
    }

    rc = mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data,
            base, bound - base + 1, rdma_reg);

    if(rc != OMPI_SUCCESS) {
        mpool->rcache->rcache_delete(mpool->rcache, rdma_reg);
        OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
        OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item);
        return rc;
    }

    *reg = rdma_reg;
    (*reg)->ref_count++;
    OPAL_THREAD_UNLOCK(&mpool->rcache->lock);
    return OMPI_SUCCESS;
}
Beispiel #10
0
int orte_regex_create(char *nodelist, char **regexp)
{
    char *node;
    char prefix[ORTE_MAX_NODE_PREFIX];
    int i, j, len, startnum, nodenum, numdigits;
    bool found, fullname;
    char *suffix, *sfx;
    orte_regex_node_t *ndreg;
    orte_regex_range_t *range;
    opal_list_t nodeids;
    opal_list_item_t *item, *itm2;
    char **regexargs = NULL, *tmp, *tmp2;
    char *cptr;

    /* define the default */
    *regexp = NULL;

    cptr = strchr(nodelist, ',');
    if (NULL == cptr) {
        /* if there is only one node, don't bother */
        *regexp = strdup(nodelist);
        return ORTE_SUCCESS;
    }

    /* setup the list of results */
    OBJ_CONSTRUCT(&nodeids, opal_list_t);

    /* cycle thru the array of nodenames */
    node = nodelist;
    while (NULL != (cptr = strchr(node, ',')) || 0 < strlen(node)) {
        if (NULL != cptr) {
            *cptr = '\0';
        }
        /* determine this node's prefix by looking for first non-alpha char */
        fullname = false;
        len = strlen(node);
        startnum = -1;
        memset(prefix, 0, ORTE_MAX_NODE_PREFIX);
        numdigits = 0;
        for (i=0, j=0; i < len; i++) {
            if (!isalpha(node[i])) {
                /* found a non-alpha char */
                if (!isdigit(node[i])) {
                    /* if it is anything but a digit, we just use
                     * the entire name
                     */
                    fullname = true;
                    break;
                }
                /* count the size of the numeric field - but don't
                 * add the digits to the prefix
                 */
                numdigits++;
                if (startnum < 0) {
                    /* okay, this defines end of the prefix */
                    startnum = i;
                }
                continue;
            }
            if (startnum < 0) {
                prefix[j++] = node[i];
            }
        }
        if (fullname || startnum < 0) {
            /* can't compress this name - just add it to the list */
            ndreg = OBJ_NEW(orte_regex_node_t);
            ndreg->prefix = strdup(node);
            opal_list_append(&nodeids, &ndreg->super);
            /* move to the next posn */
            if (NULL == cptr) {
                break;
            }
            node = cptr + 1;
            continue;
        }
        /* convert the digits and get any suffix */
        nodenum = strtol(&node[startnum], &sfx, 10);
        if (NULL != sfx) {
            suffix = strdup(sfx);
        } else {
            suffix = NULL;
        }
        /* is this nodeid already on our list? */
        found = false;
        for (item = opal_list_get_first(&nodeids);
             !found && item != opal_list_get_end(&nodeids);
             item = opal_list_get_next(item)) {
            ndreg = (orte_regex_node_t*)item;
            if (0 < strlen(prefix) && NULL == ndreg->prefix) {
                continue;
            }
            if (0 == strlen(prefix) && NULL != ndreg->prefix) {
                continue;
            }
            if (0 < strlen(prefix) && NULL != ndreg->prefix
                && 0 != strcmp(prefix, ndreg->prefix)) {
                continue;
            }
            if (NULL == suffix && NULL != ndreg->suffix) {
                continue;
            }
            if (NULL != suffix && NULL == ndreg->suffix) {
                continue;
            }
            if (NULL != suffix && NULL != ndreg->suffix &&
                0 != strcmp(suffix, ndreg->suffix)) {
                continue;
            }
            if (numdigits != ndreg->num_digits) {
                continue;
            }
            /* found a match - flag it */
            found = true;
            /* get the last range on this nodeid - we do this
             * to preserve order
             */
            range = (orte_regex_range_t*)opal_list_get_last(&ndreg->ranges);
            if (NULL == range) {
                /* first range for this nodeid */
                range = OBJ_NEW(orte_regex_range_t);
                range->start = nodenum;
                range->cnt = 1;
                opal_list_append(&ndreg->ranges, &range->super);
                break;
            }
            /* see if the node number is out of sequence */
            if (nodenum != (range->start + range->cnt)) {
                /* start a new range */
                range = OBJ_NEW(orte_regex_range_t);
                range->start = nodenum;
                range->cnt = 1;
                opal_list_append(&ndreg->ranges, &range->super);
                break;
            }
            /* everything matches - just increment the cnt */
            range->cnt++;
            break;
        }
        if (!found) {
            /* need to add it */
            ndreg = OBJ_NEW(orte_regex_node_t);
            if (0 < strlen(prefix)) {
                ndreg->prefix = strdup(prefix);
            }
            if (NULL != suffix) {
                ndreg->suffix = strdup(suffix);
            }
            ndreg->num_digits = numdigits;
            opal_list_append(&nodeids, &ndreg->super);
            /* record the first range for this nodeid - we took
             * care of names we can't compress above
             */
            range = OBJ_NEW(orte_regex_range_t);
            range->start = nodenum;
            range->cnt = 1;
            opal_list_append(&ndreg->ranges, &range->super);
        }
        if (NULL != suffix) {
            free(suffix);
        }
        /* move to the next posn */
        if (NULL == cptr) {
            break;
        }
        node = cptr + 1;
    }

    /* begin constructing the regular expression */
    while (NULL != (item = opal_list_remove_first(&nodeids))) {
        ndreg = (orte_regex_node_t*)item;

        /* if no ranges, then just add the name */
        if (0 == opal_list_get_size(&ndreg->ranges)) {
            if (NULL != ndreg->prefix) {
                /* solitary node */
                asprintf(&tmp, "%s", ndreg->prefix);
                opal_argv_append_nosize(&regexargs, tmp);
                free(tmp);
            }
            OBJ_RELEASE(ndreg);
            continue;
        }
        /* start the regex for this nodeid with the prefix */
        if (NULL != ndreg->prefix) {
            asprintf(&tmp, "%s[%d:", ndreg->prefix, ndreg->num_digits);
        } else {
            asprintf(&tmp, "[%d:", ndreg->num_digits);
        }
        /* add the ranges */
        while (NULL != (itm2 = opal_list_remove_first(&ndreg->ranges))) {
            range = (orte_regex_range_t*)itm2;
            if (1 == range->cnt) {
                asprintf(&tmp2, "%s%d,", tmp, range->start);
            } else {
                asprintf(&tmp2, "%s%d-%d,", tmp, range->start, range->start + range->cnt - 1);
            }
            free(tmp);
            tmp = tmp2;
            OBJ_RELEASE(range);
        }
        /* replace the final comma */
        tmp[strlen(tmp)-1] = ']';
        if (NULL != ndreg->suffix) {
            /* add in the suffix, if provided */
            asprintf(&tmp2, "%s%s", tmp, ndreg->suffix);
            free(tmp);
            tmp = tmp2;
        }
        opal_argv_append_nosize(&regexargs, tmp);
        free(tmp);
        OBJ_RELEASE(ndreg);
    }

    /* assemble final result */
    *regexp = opal_argv_join(regexargs, ',');
    /* cleanup */
    opal_argv_free(regexargs);

    OBJ_DESTRUCT(&nodeids);


    return ORTE_SUCCESS;
}
int ompi_show_all_mca_params(int32_t rank, int requested, char *nodename) {
    opal_list_t *info;
    opal_list_item_t *i;
    mca_base_param_info_t *item;
    char *value_string;
    int value_int;
    FILE *fp = NULL;
    time_t timestamp;
    mca_base_param_source_t source;
    char *src_file;
    char *src_string;
    
    if (rank != 0) {
        return OMPI_SUCCESS;
    }
    
    timestamp = time(NULL);
    
    /* Open the file if one is specified */
    if (0 != strlen(ompi_mpi_show_mca_params_file)) {
        if ( NULL == (fp = fopen(ompi_mpi_show_mca_params_file, "w")) ) {
            opal_output(0, "Unable to open file <%s> to write MCA parameters", ompi_mpi_show_mca_params_file);
            return OMPI_ERR_FILE_OPEN_FAILURE;
        }
        fprintf(fp, "#\n");
        fprintf(fp, "# This file was automatically generated on %s", ctime(&timestamp));
        fprintf(fp, "# by MPI_COMM_WORLD rank %d (out of a total of %d) on %s\n", rank, requested, nodename );
        fprintf(fp, "#\n");
    }
    
    mca_base_param_dump(&info, false);
    for (i =  opal_list_get_first(info); 
         i != opal_list_get_last(info);
         i =  opal_list_get_next(i)) {
        item = (mca_base_param_info_t*) i;

        /* If this is an internal param, don't print it */
        if (item->mbpp_internal) {
            continue;
        }
        
        /* get the source - where the param was last set */
        if (OPAL_SUCCESS != 
            mca_base_param_lookup_source(item->mbpp_index, &source, &src_file)) {
            continue;
        }
        
        /* is this a default value and we are not displaying
         * defaults, ignore this one
         */
        if (MCA_BASE_PARAM_SOURCE_DEFAULT == source && !show_default_mca_params) {
            continue;
        }
        
        /* is this a file value and we are not displaying files,
         * ignore it
         */
        if (MCA_BASE_PARAM_SOURCE_FILE == source && !show_file_mca_params) {
            continue;
        }
        
        /* is this an enviro value and we are not displaying enviros,
         * ignore it
         */
        if (MCA_BASE_PARAM_SOURCE_ENV == source && !show_enviro_mca_params) {
            continue;
        }
        
        /* is this an API value and we are not displaying APIs,
         * ignore it
         */
        if (MCA_BASE_PARAM_SOURCE_OVERRIDE == source && !show_override_mca_params) {
            continue;
        }
        
        /* Get the parameter name, and convert it to a printable string */
        if (MCA_BASE_PARAM_TYPE_STRING == item->mbpp_type) {
            mca_base_param_lookup_string(item->mbpp_index, &value_string);
            if (NULL == value_string) {
                value_string = strdup("");
            }
        } else {
            mca_base_param_lookup_int(item->mbpp_index, &value_int);
            asprintf(&value_string, "%d", value_int);
        }
        
        switch(source) {
            case MCA_BASE_PARAM_SOURCE_DEFAULT:
                src_string = "default value";
                break;
            case MCA_BASE_PARAM_SOURCE_ENV:
                src_string = "environment";
                break;
            case MCA_BASE_PARAM_SOURCE_FILE:
                src_string = "file";
                break;
            case MCA_BASE_PARAM_SOURCE_OVERRIDE:
                src_string = "API override";
                break;
            default:
                src_string = NULL;
                break;
        }
        
        /* Print the parameter */
        if (0 != strlen(ompi_mpi_show_mca_params_file)) {
            if (NULL == src_file) {
                fprintf(fp, "%s=%s (%s)\n", item->mbpp_full_name, value_string,
                        (NULL != src_string ? src_string : "unknown"));
            } else {
                fprintf(fp, "%s=%s (%s:%s)\n", item->mbpp_full_name, value_string,
                        (NULL != src_string ? src_string : "unknown"), src_file);
            }
        } else {
            if (NULL == src_file) {
                opal_output(0, "%s=%s (%s)\n", item->mbpp_full_name, value_string,
                            (NULL != src_string ? src_string : "unknown"));
            } else {
                opal_output(0, "%s=%s (%s:%s)\n", item->mbpp_full_name, value_string,
                            (NULL != src_string ? src_string : "unknown"), src_file);
            }
        }
        
        free(value_string);
    }
    
    /* Close file, cleanup allocated memory*/
    if (0 != strlen(ompi_mpi_show_mca_params_file)) {
        fclose(fp);
    }
    mca_base_param_dump_release(info);
    
    return OMPI_SUCCESS;
}
/*
 * determine the proper starting point for the next mapping operation
 */
opal_list_item_t* orte_rmaps_base_get_starting_point(opal_list_t *node_list, orte_job_t *jdata)
{
    opal_list_item_t *item, *cur_node_item;
    orte_node_t *node, *nd1, *ndmin;
    int overload;
    
    /* if a bookmark exists from some prior mapping, set us to start there */
    if (NULL != jdata->bookmark) {
        cur_node_item = NULL;
        /* find this node on the list */
        for (item = opal_list_get_first(node_list);
             item != opal_list_get_end(node_list);
             item = opal_list_get_next(item)) {
            node = (orte_node_t*)item;
            
            if (node->index == jdata->bookmark->index) {
                cur_node_item = item;
                break;
            }
        }
        /* see if we found it - if not, just start at the beginning */
        if (NULL == cur_node_item) {
            cur_node_item = opal_list_get_first(node_list); 
        }
    } else {
        /* if no bookmark, then just start at the beginning of the list */
        cur_node_item = opal_list_get_first(node_list);
    }
    
    /* is this node fully subscribed? If so, then the first
     * proc we assign will oversubscribe it, so let's look
     * for another candidate
     */
    node = (orte_node_t*)cur_node_item;
    ndmin = node;
    overload = ndmin->slots_inuse - ndmin->slots_alloc;
    if (node->slots_inuse >= node->slots_alloc) {
        /* work down the list - is there another node that
         * would not be oversubscribed?
         */
        if (cur_node_item != opal_list_get_last(node_list)) {
            item = opal_list_get_next(cur_node_item);
        } else {
            item = opal_list_get_first(node_list);
        }
        while (item != cur_node_item) {
            nd1 = (orte_node_t*)item;
            if (nd1->slots_inuse < nd1->slots_alloc) {
                /* this node is not oversubscribed! use it! */
                return item;
            }
            /* this one was also oversubscribed, keep track of the
             * node that has the least usage - if we can't
             * find anyone who isn't fully utilized, we will
             * start with the least used node
             */
            if (overload >= (nd1->slots_inuse - nd1->slots_alloc)) {
                ndmin = nd1;
                overload = ndmin->slots_inuse - ndmin->slots_alloc;
            }
            if (item == opal_list_get_last(node_list)) {
                item = opal_list_get_first(node_list);
            } else {
                item= opal_list_get_next(item);
            }
        }
        /* if we get here, then we cycled all the way around the
         * list without finding a better answer - just use the node
         * that is minimally overloaded
         */
        cur_node_item = (opal_list_item_t*)ndmin;
    }

    return cur_node_item;
}