Beispiel #1
0
int opal_crs_self_checkpoint(pid_t pid,
                             opal_crs_base_snapshot_t *base_snapshot,
                             opal_crs_base_ckpt_options_t *options,
                             opal_crs_state_type_t *state)
{
    opal_crs_self_snapshot_t *snapshot = OBJ_NEW(opal_crs_self_snapshot_t);
    int ret, exit_status = OPAL_SUCCESS;
    char * restart_cmd = NULL;

    /*
     * This function should never be called by a tool
     */
    if( opal_cr_is_tool ) {
        return OPAL_ERR_NOT_SUPPORTED;
    }

    if( options->stop ) {
        opal_output(0,
                    "crs:self: checkpoint(): Error: SIGSTOP Not currently supported!");
    }

    /*
     * Setup for snapshot directory creation
     */
    snapshot->super = *base_snapshot;
#if 0
    snapshot->super.snapshot_directory = strdup(base_snapshot->snapshot_directory);
    snapshot->super.metadata_filename  = strdup(base_snapshot->metadata_filename);
#endif

    opal_output_verbose(10, mca_crs_self_component.super.output_handle,
                        "crs:self: checkpoint(%d, ---)", pid);

    if(!mca_crs_self_component.can_checkpoint) {
        opal_show_help("help-opal-crs-self.txt", "self:ckpt_disabled", false);
        exit_status = OPAL_ERROR;
        goto cleanup;
    }

    /*
     * Update the snapshot metadata
     */
    snapshot->super.component_name = strdup(mca_crs_self_component.super.base_version.mca_component_name);
    if( NULL == snapshot->super.metadata ) {
        if (NULL == (snapshot->super.metadata = fopen(snapshot->super.metadata_filename, "a")) ) {
            opal_output(mca_crs_self_component.super.output_handle,
                        "crs:self: checkpoint(): Error: Unable to open the file (%s)",
                        snapshot->super.metadata_filename);
            exit_status = OPAL_ERROR;
            goto cleanup;
        }
    }
    fprintf(snapshot->super.metadata, "%s%s\n", CRS_METADATA_COMP, snapshot->super.component_name);

    /*
     * Call the user callback function
     */
    if(NULL != mca_crs_self_component.ucb_checkpoint_fn) {
        mca_crs_self_component.ucb_checkpoint_fn(&restart_cmd);
    }

    /*
     * Save the restart command
     */
    if( NULL == restart_cmd) {
        *state = OPAL_CRS_ERROR;
        opal_show_help("help-opal-crs-self.txt", "self:no-restart-cmd",
                       true);
        exit_status = OPAL_ERROR;
        goto cleanup;
    }
    else {
        snapshot->cmd_line = strdup(restart_cmd);

        opal_output_verbose(10, mca_crs_self_component.super.output_handle,
                            "crs:self: checkpoint: Restart Command (%s)", snapshot->cmd_line);
    }

    /*
     * The best we can do is update the metadata file with the
     * application argv and argc we started with.
     */
    if( OPAL_SUCCESS != (ret = self_update_snapshot_metadata(snapshot)) ) {
        *state = OPAL_CRS_ERROR;
        opal_output(mca_crs_self_component.super.output_handle,
                    "crs:self: checkpoint(): Error: Unable to update metadata for snapshot (%s).",
                    snapshot->super.metadata_filename);
        exit_status = ret;
        goto cleanup;
    }


    *state = OPAL_CRS_CONTINUE;

    /*
     * Call their continue routine for completeness
     */
    if(NULL != mca_crs_self_component.ucb_continue_fn) {
        mca_crs_self_component.ucb_continue_fn();
    }

    base_snapshot = &(snapshot->super);

 cleanup:
    if( NULL != restart_cmd) {
        free(restart_cmd);
        restart_cmd = NULL;
    }

    return exit_status;
}
int opal_crs_self_checkpoint(pid_t pid, opal_crs_base_snapshot_t *base_snapshot, opal_crs_state_type_t *state)
{
    opal_crs_self_snapshot_t *snapshot = OBJ_NEW(opal_crs_self_snapshot_t);
    int ret, exit_status = OPAL_SUCCESS;
    char * restart_cmd = NULL;

    /*
     * This function should never be called by a tool
     */
    if( opal_cr_is_tool ) {
        return OPAL_ERR_NOT_SUPPORTED;
    }

    /*
     * Setup for snapshot directory creation
     */
    if(NULL != snapshot->super.reference_name)
        free(snapshot->super.reference_name);
    snapshot->super.reference_name = strdup(base_snapshot->reference_name);

    if(NULL != snapshot->super.local_location)
        free(snapshot->super.local_location);
    snapshot->super.local_location  = strdup(base_snapshot->local_location);

    if(NULL != snapshot->super.remote_location)
        free(snapshot->super.remote_location);
    snapshot->super.remote_location  = strdup(base_snapshot->remote_location);

    opal_output_verbose(10, mca_crs_self_component.super.output_handle,
                        "crs:self: checkpoint(%d, ---)", pid);

    if(!mca_crs_self_component.can_checkpoint) {
        opal_show_help("help-opal-crs-self.txt", "self:ckpt_disabled", false);
        exit_status = OPAL_ERROR;
        goto cleanup;
    }

    /*
     * Update the snapshot metadata
     */
    snapshot->super.component_name = strdup(mca_crs_self_component.super.base_version.mca_component_name);
    if( OPAL_SUCCESS != (ret = opal_crs_base_metadata_write_token(NULL, CRS_METADATA_COMP, snapshot->super.component_name) ) ) {
        opal_output(mca_crs_self_component.super.output_handle,
                    "crs:self: checkpoint(): Error: Unable to write component name to the directory for (%s).",
                    snapshot->super.reference_name);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Call the user callback function
     */
    if(NULL != mca_crs_self_component.ucb_checkpoint_fn) {
        mca_crs_self_component.ucb_checkpoint_fn(&restart_cmd);
    }

    /*
     * Save the restart command
     */
    if( NULL == restart_cmd) {
        *state = OPAL_CRS_ERROR;
        opal_show_help("help-opal-crs-self.txt", "self:no-restart-cmd",
                       true);
        exit_status = OPAL_ERROR;
        goto cleanup;
    }
    else {
        snapshot->cmd_line = strdup(restart_cmd);

        opal_output_verbose(10, mca_crs_self_component.super.output_handle,
                            "crs:self: checkpoint: Restart Command (%s)", snapshot->cmd_line);
    }

    /*
     * The best we can do is update the metadata file with the
     * application argv and argc we started with.
     */
    if( OPAL_SUCCESS != (ret = self_update_snapshot_metadata(snapshot)) ) {
        *state = OPAL_CRS_ERROR;
        opal_output(mca_crs_self_component.super.output_handle,
                    "crs:self: checkpoint(): Error: Unable to update metadata for snapshot (%s).",
                    snapshot->super.reference_name);
        exit_status = ret;
        goto cleanup;
    }


    *state = OPAL_CRS_CONTINUE;
    
    /*
     * Call their continue routine for completeness
     */
    if(NULL != mca_crs_self_component.ucb_continue_fn) {
        mca_crs_self_component.ucb_continue_fn();
    }

    base_snapshot = &(snapshot->super);

 cleanup:
    if( NULL != restart_cmd) {
        free(restart_cmd);
        restart_cmd = NULL;
    }

    return exit_status;
}