Exemplo n.º 1
0
int
main(int argc, char *argv[])
{
    int ret, exit_status = OPAL_SUCCESS;
    char *fname = NULL;
    opal_crs_state_type_t cr_state;

    /***************
     * Initialize
     ***************/
    if (OPAL_SUCCESS != (ret = initialize(argc, argv))) {
        exit_status = ret;
        goto cleanup;
    }

    /*******************************
     * Checkpoint the requested PID
     *******************************/
    opal_output_verbose(10, opal_checkpoint_globals.output,
                        "opal_checkpoint: Checkpointing PID %d",
                        opal_checkpoint_globals.pid);
    if( opal_checkpoint_globals.term ) {
        opal_output_verbose(10, opal_checkpoint_globals.output,
                            "\tTerminating application after checkpoint");
    }

    ret = notify_process_for_checkpoint(opal_checkpoint_globals.pid,
                                        &fname,
                                        opal_checkpoint_globals.term,
                                        &cr_state);
    if (OPAL_SUCCESS != ret ||
        cr_state == OPAL_CRS_ERROR) {
        opal_show_help("help-opal-checkpoint.txt", "ckpt_failure", true,
                       opal_checkpoint_globals.pid, ret, cr_state);
        exit_status = ret;
        goto cleanup;
    }

    if( !opal_checkpoint_globals.quiet ) {
        opal_output(opal_checkpoint_globals.output,
                    "Local Snapshot Reference = %s\n",
                    fname);
    }

 cleanup:
    /***************
     * Cleanup
     ***************/
    if (OPAL_SUCCESS != (ret = finalize())) {
        return ret;
    }

    return exit_status;
}
Exemplo n.º 2
0
int
main(int argc, char *argv[])
{
    int ret, exit_status = ORTE_SUCCESS;

    /***************
     * Initialize
     ***************/
    if (ORTE_SUCCESS != (ret = ckpt_init(argc, argv))) {
        exit_status = ret;
        goto cleanup;
    }

    /***************************
     * Find the HNP that we want to connect to, if it exists
     ***************************/
    if (ORTE_SUCCESS != (ret = find_hnp())) {
        /* Error printed by called function */
        exit_status = ret;
        goto cleanup;
    }

    /*******************************
     * Checkpoint the requested PID
     *******************************/
    if( orte_checkpoint_globals.verbose ) {
        opal_output_verbose(10, orte_checkpoint_globals.output,
                            "orte_checkpoint: Checkpointing...");
        if (0 < orte_checkpoint_globals.pid) {
            opal_output_verbose(10, orte_checkpoint_globals.output,
                                "\t PID %d",
                                orte_checkpoint_globals.pid);
        } else if (ORTE_JOBID_INVALID != orte_checkpoint_globals.req_hnp){
            opal_output_verbose(10, orte_checkpoint_globals.output,
                                "\t Mpirun (%s)",
                                ORTE_JOBID_PRINT(orte_checkpoint_globals.req_hnp));
        }

        opal_output_verbose(10, orte_checkpoint_globals.output,
                            "\t Connected to Mpirun %s",
                            ORTE_NAME_PRINT(&orterun_hnp->name));
         
        if(orte_checkpoint_globals.term) {
            opal_output_verbose(10, orte_checkpoint_globals.output,
                                "\t Terminating after checkpoint\n");
        }
    }

    if(ORTE_SUCCESS != (ret = notify_process_for_checkpoint( orte_checkpoint_globals.term)) ) {
        orte_show_help("help-orte-checkpoint.txt", "ckpt_failure", true,
                       orte_checkpoint_globals.pid, ret);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Wait for the checkpoint to complete
     */
    if(!orte_checkpoint_globals.nowait) { 
        while( ORTE_SNAPC_CKPT_STATE_FINISHED != orte_checkpoint_globals.ckpt_status &&
               ORTE_SNAPC_CKPT_STATE_NO_CKPT  != orte_checkpoint_globals.ckpt_status &&
               ORTE_SNAPC_CKPT_STATE_ERROR    != orte_checkpoint_globals.ckpt_status ) {
            opal_progress();
        }
    }

    if( ORTE_SNAPC_CKPT_STATE_NO_CKPT  == orte_checkpoint_globals.ckpt_status ) {
        exit_status = ORTE_ERROR;
        goto cleanup;
    }

    if( ORTE_SNAPC_CKPT_STATE_ERROR == orte_checkpoint_globals.ckpt_status ) {
        orte_show_help("help-orte-checkpoint.txt", "ckpt_failure", true,
                       orte_checkpoint_globals.pid, ORTE_ERROR);
        exit_status = ORTE_ERROR;
        goto cleanup;
    }

    if( orte_checkpoint_globals.status ) {
        orte_checkpoint_globals.ckpt_status = ORTE_SNAPC_CKPT_STATE_FINISHED;
        pretty_print_status();
    }

    if(!orte_checkpoint_globals.nowait) {
        pretty_print_reference();
    }

 cleanup:
    /***************
     * Cleanup
     ***************/
    if (ORTE_SUCCESS != (ret = ckpt_finalize())) {
        return ret;
    }

    return exit_status;
}