Ejemplo n.º 1
0
/*
 * This function attempts to find an HNP to connect to.
 */
static int find_hnp(void) {
    int ret, exit_status = ORTE_SUCCESS;
    opal_list_t hnp_list;
    opal_list_item_t *item;
    orte_hnp_contact_t *hnpcandidate;
    
    /* get the list of local hnp's available to us and setup
     * contact info for them into the RML
     */
    OBJ_CONSTRUCT(&hnp_list, opal_list_t);
    if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list, true) ) ) {
        orte_show_help("help-orte-checkpoint.txt", "no_hnps", true,
                       orte_checkpoint_globals.pid,
                       orte_process_info.tmpdir_base,
                       orte_process_info.top_session_dir,
                       ret, ORTE_ERROR_NAME(ret));
        exit_status = ret;
        goto cleanup;
    }
    
    /* search the list for the desired hnp */
    while (NULL != (item = opal_list_remove_first(&hnp_list))) {
        hnpcandidate = (orte_hnp_contact_t*)item;
        if (hnpcandidate->name.jobid == orte_checkpoint_globals.req_hnp ||
            hnpcandidate->pid        == orte_checkpoint_globals.pid) {
            /* this is the one we want */
            orterun_hnp = hnpcandidate;
            exit_status = ORTE_SUCCESS;
            goto cleanup;
        }
    }

    /* If no match was found, error out */
    orte_show_help("help-orte-checkpoint.txt", "no_universe", true,
                   orte_checkpoint_globals.pid,
                   orte_process_info.tmpdir_base,
                   orte_process_info.top_session_dir);
    
cleanup:
    while (NULL != (item = opal_list_remove_first(&hnp_list))) {
        OBJ_RELEASE(item);
    }
    OBJ_DESTRUCT(&hnp_list);

    if( NULL == orterun_hnp ) {
        return ORTE_ERROR;
    } else {
        return exit_status;
    }
}
Ejemplo n.º 2
0
/*
 * This function attempts to find an HNP to connect to.
 */
static int find_hnp(void) {
    int ret, exit_status = ORTE_SUCCESS;
    opal_list_t hnp_list;
    opal_list_item_t *item;
    orte_hnp_contact_t *hnpcandidate;

    /* get the list of local hnp's available to us and setup
     * contact info for them into the RML
     */
    OBJ_CONSTRUCT(&hnp_list, opal_list_t);
    if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list, true) ) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }
    
    /* search the list for the desired hnp */
    while (NULL != (item = opal_list_remove_first(&hnp_list))) {
        hnpcandidate = (orte_hnp_contact_t*)item;
        if( hnpcandidate->pid        == orte_migrate_globals.pid) {
            /* this is the one we want */
            orterun_hnp = hnpcandidate;
            exit_status = ORTE_SUCCESS;
            goto cleanup;
        }
    }
    
cleanup:
    while (NULL != (item = opal_list_remove_first(&hnp_list))) {
        OBJ_RELEASE(item);
    }
    OBJ_DESTRUCT(&hnp_list);

    if( NULL == orterun_hnp ) {
        return ORTE_ERROR;
    } else {
        return exit_status;
    }
}
Ejemplo n.º 3
0
int
main(int argc, char *argv[])
{
    int ret, exit_status = ORTE_SUCCESS;
    opal_list_t hnp_list;
    opal_list_item_t* item = NULL;
    orte_ps_mpirun_info_t hnpinfo;
    bool reported = false;

    /***************
     * Initialize
     ***************/
    OBJ_CONSTRUCT(&hnp_list, opal_list_t);

    if (ORTE_SUCCESS != (ret = orte_ps_init(argc, argv))) {
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Get the directory listing
     */
    opal_output_verbose(10, orte_ps_globals.output,
                        "orte_ps: Acquiring list of HNPs and setting contact info into RML...\n");

    if (ORTE_SUCCESS != (ret = orte_list_local_hnps(&hnp_list, true) ) ) {
        exit_status = ret;
        goto cleanup;
    }

    opal_output_verbose(10, orte_ps_globals.output,
                        "orte_ps: Found %d HNPs\n",
                        (int)opal_list_get_size(&hnp_list));

    /*
     * For each hnp in the listing
     */
    while (NULL != (item  = opal_list_remove_first(&hnp_list))) {
        orte_hnp_contact_t *hnp = (orte_hnp_contact_t*)item;
        hnpinfo.hnp = hnp;

        opal_output_verbose(10, orte_ps_globals.output,
                            "orte_ps: Processing HNP %lu\n",
                            (unsigned long)hnpinfo.hnp->pid);

        if (0 < orte_ps_globals.pid &&
            hnpinfo.hnp->pid != orte_ps_globals.pid) {
            continue;
        }

        /*
         * Gather the information
         */
        opal_output_verbose(10, orte_ps_globals.output,
                            "orte_ps: Gathering Information for HNP: %s:%d\n",
                            ORTE_NAME_PRINT(&(hnpinfo.hnp->name)),
                            hnpinfo.hnp->pid);
        
        if( ORTE_SUCCESS != (ret = gather_information(&hnpinfo)) ) {
            /* this could be due to a stale session directory - if so,
             * just skip this entry, but don't abort
             */
            if (!reported && ORTE_ERR_SILENT == ret) {
                orte_show_help("help-orte-ps.txt", "stale-hnp", true,
                               ORTE_NAME_PRINT(&(hnpinfo.hnp->name)));
                reported = true;
                continue;
            }
            goto cleanup;
        }

        /* Print the information */
        if (orte_ps_globals.parseable) {
            if (ORTE_SUCCESS != (ret = parseable_print(&hnpinfo))) {
                exit_status = ret;
                goto cleanup;
            }
        } else {
            if(ORTE_SUCCESS != (ret = pretty_print(&hnpinfo)) ) {
                exit_status = ret;
                goto cleanup;
            }
        }
    }

    /***************
     * Cleanup
     ***************/
 cleanup:
    orte_finalize();

    return exit_status;
}