/* * get_step_image_dir - get the dir to store step task images * IN cr: checkpoint/restart * RET image dir on success, NULL on error * * NOTE: only can be called in callbak */ static char * get_step_image_dir(int cr) { const struct cr_checkpoint_info *ckpt_info; const struct cr_restart_info *rstrt_info; const char *dest; char *rchar, *dir; if (cr) { /* checkpoint */ ckpt_info = cr_get_checkpoint_info(); if (!ckpt_info) { error("failed to get checkpoint info: %s", cr_strerror(errno)); return NULL; } dest = ckpt_info->dest; } else { /* retart */ rstrt_info = cr_get_restart_info(); if (!rstrt_info) { error("failed to get restart info: %s", cr_strerror(errno)); return NULL; } dest = rstrt_info->src; } rchar = strrchr(dest, '/'); if (rchar) { dir = xstrndup(dest, rchar - dest + 1); } xstrfmtcat(dir, "%u.%u", jobid, stepid); return dir; }
static int opal_crs_blcr_thread_callback(void *arg) { const struct cr_checkpoint_info *ckpt_info = cr_get_checkpoint_info(); int ret; opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, "crs:blcr: thread_callback()"); OPAL_THREAD_LOCK(&blcr_lock); blcr_current_state = OPAL_CRS_CHECKPOINT; /* * Allow the checkpoint to be taken, if we requested it */ #if CRS_BLCR_HAVE_INFO_REQUESTER == 1 if( ckpt_info->requester != my_pid ) { ret = cr_checkpoint(CR_CHECKPOINT_OMIT); blcr_current_state = OPAL_CRS_RUNNING; opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, "crs:blcr: thread_callback(); WARNING: An external agent attempted to checkpoint this process " "when it did not expect to be checkpointed. Skipping this checkpoint request." " [%d != %d].", ckpt_info->requester, my_pid); return 0; } else #endif { ret = cr_checkpoint(0); } /* * Restarting */ if ( 0 < ret ) { opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, "crs:blcr: thread_callback: Restarting."); blcr_current_state = OPAL_CRS_RESTART; } /* * Continuing */ else { opal_output_verbose(10, mca_crs_blcr_component.super.output_handle, "crs:blcr: thread_callback: Continue."); blcr_current_state = OPAL_CRS_CONTINUE; } OPAL_THREAD_UNLOCK(&blcr_lock); opal_condition_signal(&blcr_cond); return 0; }
static int opal_crs_blcr_signal_callback(void *arg) { const struct cr_checkpoint_info *ckpt_info = cr_get_checkpoint_info(); int ret; /* * Allow the checkpoint to be taken, if we requested it */ #if CRS_BLCR_HAVE_INFO_REQUESTER == 1 if( ckpt_info->requester != my_pid ) { ret = cr_checkpoint(CR_CHECKPOINT_OMIT); return 0; } else #endif { ret = cr_checkpoint(0); } return 0; }