Exemplo n.º 1
0
main(int argc, char **argv)
{
    int rc;
    char *regex, *save;
    char **nodes=NULL;
    int i;

    if (argc < 1 || NULL == argv[1]) {
        fprintf(stderr, "usage: regex <comma-separated list of nodes>\n");
        return 1;
    }
    
    orte_init(&argc, &argv, ORTE_PROC_NON_MPI);

    if (NULL != strchr(argv[1], '[')) {
        /* given a regex to analyze */
        fprintf(stderr, "ANALYZING REGEX: %s\n", argv[1]);
        if (ORTE_SUCCESS != (rc = orte_regex_extract_node_names(argv[1], &nodes))) {
            ORTE_ERROR_LOG(rc);
        }
        for (i=0; NULL != nodes; i++) {
            fprintf(stderr, "%s\n", nodes[i]);
        }
        opal_argv_free(nodes);
        orte_finalize();
        return 0;
    }

    save = strdup(argv[1]);
    if (ORTE_SUCCESS != (rc = orte_regex_create(save, &regex))) {
        ORTE_ERROR_LOG(rc);
    } else {
        fprintf(stderr, "REGEX: %s\n", regex);
        if (ORTE_SUCCESS != (rc = orte_regex_extract_node_names(regex, &nodes))) {
            ORTE_ERROR_LOG(rc);
        }
        free(regex);
        regex = opal_argv_join(nodes, ',');
        opal_argv_free(nodes);
        if (0 == strcmp(regex, argv[1])) {
            fprintf(stderr, "EXACT MATCH\n");
        } else {
            fprintf(stderr, "ERROR: %s\n", regex);
        }
        free(regex);
    }
    free(save);
}
Exemplo n.º 2
0
static int rte_init(void)
{
    int ret;
    char *error = NULL;
    char **hosts = NULL;

    /* run the prolog */
    if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
        error = "orte_ess_base_std_prolog";
        goto error;
    }

    /* Start by getting a unique name */
    tm_set_name();

    /* if I am a daemon, complete my setup using the
     * default procedure
     */
    if (ORTE_PROC_IS_DAEMON) {
        if (NULL != orte_node_regex) {
            /* extract the nodes */
            if (ORTE_SUCCESS != (ret =
                orte_regex_extract_node_names(orte_node_regex, &hosts)) ||
                NULL == hosts) {
                error = "orte_regex_extract_node_names";
                goto error;
            }
        }
        if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) {
            ORTE_ERROR_LOG(ret);
            error = "orte_ess_base_orted_setup";
            goto error;
        }
        opal_argv_free(hosts);
        return ORTE_SUCCESS;
    }

    if (ORTE_PROC_IS_TOOL) {
        /* otherwise, if I am a tool proc, use that procedure */
        if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
            ORTE_ERROR_LOG(ret);
            error = "orte_ess_base_tool_setup";
            goto error;
        }
        return ORTE_SUCCESS;

    }

    /* no other options are supported! */
    error = "ess_error";
    ret = ORTE_ERROR;

  error:
    if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
        orte_show_help("help-orte-runtime.txt",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }

    return ret;
}
Exemplo n.º 3
0
static int rte_init(void)
{
    int ret;
    char *error = NULL;
    char **hosts = NULL;

    /* run the prolog */
    if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
        error = "orte_ess_base_std_prolog";
        goto error;
    }
    
    /* Start by getting a unique name from the enviro */
    env_set_name();

    /* if I am a daemon, complete my setup using the
     * default procedure
     */
    if (ORTE_PROC_IS_DAEMON) {
        if (NULL != orte_node_regex) {
            /* extract the nodes */
            if (ORTE_SUCCESS != (ret = orte_regex_extract_node_names(orte_node_regex, &hosts))) {
                error = "orte_regex_extract_node_names";
                goto error;
            }
        }
        if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) {
            ORTE_ERROR_LOG(ret);
            error = "orte_ess_base_orted_setup";
            goto error;
        }
        opal_argv_free(hosts);
        return ORTE_SUCCESS;
    }
    
    if (ORTE_PROC_IS_TOOL) {
        /* otherwise, if I am a tool proc, use that procedure */
        if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
            ORTE_ERROR_LOG(ret);
            error = "orte_ess_base_tool_setup";
            goto error;
        }
        /* as a tool, I don't need a nidmap - so just return now */
        return ORTE_SUCCESS;
        
    }
    
    /* otherwise, I must be an application process - use
     * the default procedure to finish my setup
     */
    if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_base_app_setup";
        goto error;
    }
    
    /* if data was provided, update the database */
    if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_util_nidmap_init";
        goto error;
    }
    
    /* setup process binding */
    if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) {
        error = "proc_binding";
        goto error;
    }

    return ORTE_SUCCESS;

error:
    if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
        orte_show_help("help-orte-runtime.txt",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }

    return ret;
}
Exemplo n.º 4
0
static int rte_init(void)
{
    int ret;
    char *error = NULL;
    char **hosts = NULL;

    /* run the prolog */
    if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
        error = "orte_ess_base_std_prolog";
        goto error;
    }
    
    /* Start by getting a unique name from the enviro */
    env_set_name();

    /* if I am a daemon, complete my setup using the
     * default procedure
     */
    if (ORTE_PROC_IS_DAEMON) {
        if (NULL != orte_node_regex) {
            /* extract the nodes */
            if (ORTE_SUCCESS != (ret = orte_regex_extract_node_names(orte_node_regex, &hosts))) {
                error = "orte_regex_extract_node_names";
                goto error;
            }
        }
        if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) {
            ORTE_ERROR_LOG(ret);
            error = "orte_ess_base_orted_setup";
            goto error;
        }
        opal_argv_free(hosts);
        return ORTE_SUCCESS;
    }
    
    if (ORTE_PROC_IS_TOOL) {
        /* otherwise, if I am a tool proc, use that procedure */
        if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
            ORTE_ERROR_LOG(ret);
            error = "orte_ess_base_tool_setup";
            goto error;
        }
        /* as a tool, I don't need a nidmap - so just return now */
        return ORTE_SUCCESS;
        
    }
    
    /* use the default procedure to finish my setup */
    if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup(true))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_base_app_setup";
        goto error;
    }
    
    /* if data was provided, update the database */
    if (ORTE_SUCCESS != (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_util_nidmap_init";
        goto error;
    }
    
    /* setup process binding */
    if (ORTE_SUCCESS != (ret = orte_ess_base_proc_binding())) {
        error = "proc_binding";
        goto error;
    }

    /* if we are an ORTE app - and not an MPI app - then
     * we need to exchange our connection info here.
     * MPI_Init has its own modex, so we don't need to do
     * two of them. However, if we don't do a modex at all,
     * then processes have no way to communicate
     *
     * NOTE: only do this when the process originally launches.
     * Cannot do this on a restart as the rest of the processes
     * in the job won't be executing this step, so we would hang
     */
    if (ORTE_PROC_IS_NON_MPI && !orte_do_not_barrier) {
        orte_grpcomm_collective_t coll;
        OBJ_CONSTRUCT(&coll, orte_grpcomm_collective_t);
        coll.id = orte_process_info.peer_modex;
        coll.active = true;
        if (ORTE_SUCCESS != (ret = orte_grpcomm.modex(&coll))) {
            ORTE_ERROR_LOG(ret);
            error = "orte modex";
            goto error;
        }
        ORTE_WAIT_FOR_COMPLETION(coll.active);
        OBJ_DESTRUCT(&coll);
    }
    
    return ORTE_SUCCESS;

error:
    if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
        orte_show_help("help-orte-runtime.txt",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }

    return ret;
}
Exemplo n.º 5
0
static int rte_init(void)
{
    int ret;
    char *error = NULL;
    char **hosts = NULL;

    OPAL_OUTPUT_VERBOSE((1, orte_ess_base_framework.framework_output,
                         "ess:alps in rte_init"));

    /*
     * shouldn't have been able to open this ess component if
     * process is app proc
     */

    if (ORTE_PROC_IS_APP) {
        error = "mpi rank invoking alps rte_init";
        ret = ORTE_ERR_NOT_SUPPORTED;
        goto fn_fail;
    }

    /* run the prolog */
    if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
        error = "orte_ess_base_std_prolog";
        goto fn_fail;
    }

    if (ORTE_SUCCESS != (ret = alps_set_name())) {
        error = "alps_set_name";
        goto fn_fail;
    }

    /*
     * if I am a daemon, complete my setup using the
     * default procedure
     */
    if (ORTE_PROC_IS_DAEMON) {
        if (NULL != orte_node_regex) {
            /* extract the nodes */
            if (ORTE_SUCCESS != (ret =
                orte_regex_extract_node_names(orte_node_regex, &hosts)) ||
                NULL == hosts) {
                error = "orte_regex_extract_node_names";
                goto fn_fail;
            }
        }
        if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) {
            ORTE_ERROR_LOG(ret);
            error = "orte_ess_base_orted_setup";
            goto fn_fail;
        }
        if (NULL != hosts) {
            opal_argv_free(hosts);
        }

        /*
         * now synchronize with aprun.
         */

        if (ORTE_SUCCESS != (ret = orte_ess_alps_sync_start())) {
            error = "orte_ess_alps_sync";
            goto fn_fail;
        }

        ret = ORTE_SUCCESS;
        goto fn_exit;
    }

    if (ORTE_PROC_IS_TOOL) {
        /* otherwise, if I am a tool proc, use that procedure */
        if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
            ORTE_ERROR_LOG(ret);
            error = "orte_ess_base_tool_setup";
            goto fn_fail;
        }
        /* as a tool, I don't need a nidmap - so just return now */
        ret = ORTE_SUCCESS;
        goto fn_exit;
    }

   fn_exit:
    return ret;

   fn_fail:
    if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
        orte_show_help("help-orte-runtime.txt",
                       "orte_init:startup:internal-failure",
                       true, error, ORTE_ERROR_NAME(ret), ret);
    }
    goto fn_exit;
}
Exemplo n.º 6
0
static int status(char *nodes, opal_list_t *images)
{
    char *query, *line, *ptr;
    FILE *fp;
    orcm_pvsn_provision_t *pvn, *pvnptr;
    opal_value_t *attr;
    int i, rc=ORTE_SUCCESS;
    int j;
    char **nodelist, **ranges;

    OPAL_OUTPUT_VERBOSE((5, orcm_pvsn_base_framework.framework_output,
                         "%s pvsn:wwulf:status",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

    /* if nodes is NULL, then get the info for all nodes. Note
     * that this could be a *lot* of info for a large cluster */
    if (NULL == nodes) {
        (void)asprintf(&query, "%s provision print", cmd);
    } else {
        /* could be a comma-separated regex, so parse it */
        ranges = opal_argv_split(nodes, ',');
        nodelist = NULL;
        for (i=0; NULL != ranges[i]; i++) {
            if (ORTE_SUCCESS != (rc = orte_regex_extract_node_names(ranges[i], &nodelist))) {
                ORTE_ERROR_LOG(rc);
                opal_argv_free(ranges);
                return rc;
            }
        }
        opal_argv_free(ranges);
        ptr = opal_argv_join(nodelist, ' ');
        opal_argv_free(nodelist);
        (void)asprintf(&query, "%s provision print %s", cmd, ptr);
        free(ptr);
    }

    if (NULL == (fp = popen(query, "r"))) {
        OPAL_OUTPUT_VERBOSE((5, orcm_pvsn_base_framework.framework_output,
                             "%s pvsn:wwulf:avail query for provisioning status failed",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
        free(query);
        return ORCM_ERROR;
    }
    free(query);
    while (NULL != (line = orcm_getline(fp))) {
        OPAL_OUTPUT_VERBOSE((5, orcm_pvsn_base_framework.framework_output,
                             "%s pvsn:wwulf:status got input %s",
                             ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), line));
        /* if the line starts with #, it can be ignored */
        if ('#' == line[0]) {
            free(line);
            continue;
        }
        /* we want the following sections of the output line:
         * 0  => node name
         * 1  => attribute
         * 3  => value
         */
        ptr = line;
        j=0;
        while (NULL != (query = parse_next(ptr, &ptr))) {
            switch(j) {
            case 0:
                /* see if we already have this node */
                pvn = NULL;
                OPAL_LIST_FOREACH(pvnptr, images, orcm_pvsn_provision_t) {
                    if (0 == strcmp(pvnptr->nodes, query)) {
                        pvn = pvnptr;
                        break;
                    }
                }
                if (NULL == pvn) {
                    pvn = OBJ_NEW(orcm_pvsn_provision_t);
                    opal_list_append(images, &pvn->super);
                    pvn->nodes = strdup(query);
                    /* need to come up with a naming scheme for images */
                    pvn->image.name = strdup(query);
                }
                break;
            case 1:
                attr = OBJ_NEW(opal_value_t);
                attr->key = strdup(query);
                opal_list_append(&pvn->image.attributes, &attr->super);
                break;
            case 3:
                attr->type = OPAL_STRING;
                attr->data.string = strdup(query);
                break;
            default:
                /* just ignore it */
                break;
            }
            j++;
        }
        free(line);
    }
    pclose(fp);

    return ORCM_SUCCESS;
}
Exemplo n.º 7
0
static int rte_init(void)
{
    int ret, i;
    char *error = NULL;
    char **hosts = NULL;

    /* run the prolog */
    if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
        error = "orte_ess_base_std_prolog";
        goto error;
    }

    /* Start by getting a unique name */
    alps_set_name();

    /* if I am a daemon, complete my setup using the
     * default procedure
     */
    if (ORTE_PROC_IS_DAEMON) {
        if (NULL != orte_node_regex) {
            /* extract the nodes */
            if (ORTE_SUCCESS != (ret =
                orte_regex_extract_node_names(orte_node_regex, &hosts)) ||
                NULL == hosts) {
                error = "orte_regex_extract_node_names";
                goto error;
            }

            /* find our host in the list */
            for (i=0; NULL != hosts[i]; i++) {
                if (0 == strncmp(hosts[i], orte_process_info.nodename,
                                 strlen(hosts[i]))) {
                    /* correct our vpid */
                    ORTE_PROC_MY_NAME->vpid = starting_vpid + i;
                    OPAL_OUTPUT_VERBOSE((1, orte_ess_base_output,
                                         "ess:alps reset name to %s",
                                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
                    break;
                }
            }
        }
        if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup(hosts))) {
            ORTE_ERROR_LOG(ret);
            error = "orte_ess_base_orted_setup";
            goto error;
        }
        opal_argv_free(hosts);
        return ORTE_SUCCESS;
    }
    if (ORTE_PROC_IS_TOOL) {
        /* otherwise, if I am a tool proc, use that procedure */
        if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
            ORTE_ERROR_LOG(ret);
            error = "orte_ess_base_tool_setup";
            goto error;
        }
        /* as a tool, I don't need a nidmap - so just return now */
        return ORTE_SUCCESS;
    }
    /* otherwise, I must be an application process - use
     * the default procedure to finish my setup
     */
    if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) {
        ORTE_ERROR_LOG(ret);
        error = "orte_ess_base_app_setup";
        goto error;
    }
    /* setup the nidmap arrays */
    if (ORTE_SUCCESS !=
        (ret = orte_util_nidmap_init(orte_process_info.sync_buf))) {
        ORTE_ERROR_LOG(ret);
        error = "orte_util_nidmap_init";
        goto error;
    }

    return ORTE_SUCCESS;

error:
    orte_show_help("help-orte-runtime.txt",
                   "orte_init:startup:internal-failure",
                   true, error, ORTE_ERROR_NAME(ret), ret);
    return ret;
}