Ejemplo n.º 1
0
int orte_sstore_central_local_register(orte_sstore_base_handle_t handle)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_sstore_central_local_snapshot_info_t *handle_info = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(local): register()"));

    /*
     * Create a handle
     */
    if( NULL == (handle_info = find_handle_info(handle)) ) {
        handle_info = create_new_handle_info(handle);
    }

    /*
     * Get basic information from Global SStore
     */
    if( ORTE_SUCCESS != (ret = pull_handle_info(handle_info)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Wait here until the pull request has been satisfied
     */
    while(SSTORE_LOCAL_READY != handle_info->status &&
          SSTORE_LOCAL_ERROR != handle_info->status ) {
        opal_progress();
    }

 cleanup:
    return exit_status;
}
Ejemplo n.º 2
0
int orte_sstore_central_app_set_attr(orte_sstore_base_handle_t handle, orte_sstore_base_key_t key, char *value)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_sstore_central_app_snapshot_info_t *handle_info = NULL;
    char *key_str = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(app): set_attr(%d = %s)", key, value));

    if( NULL == value ) {
        ORTE_ERROR_LOG(ORTE_ERROR);
        exit_status = ORTE_ERROR;
        goto cleanup;
    }

    if( key >= SSTORE_METADATA_MAX ) {
        ORTE_ERROR_LOG(ORTE_ERROR);
        exit_status = ORTE_ERROR;
        goto cleanup;
    }

    /*
     * Lookup the handle
     */
    handle_info = find_handle_info(handle);

    /*
     * Access metadata
     */
    if( SSTORE_METADATA_LOCAL_CRS_COMP == key ) {
        if( NULL != handle_info->crs_comp ) {
            free(handle_info->crs_comp);
        }
        handle_info->crs_comp = strdup(value);
    }
    else if(SSTORE_METADATA_LOCAL_SKIP_CKPT == key ) {
        handle_info->ckpt_skipped = true;
    }
    else if( SSTORE_METADATA_LOCAL_MKDIR == key ||
             SSTORE_METADATA_LOCAL_TOUCH == key ) {
        orte_sstore_base_convert_key_to_string(key, &key_str);
        if( ORTE_SUCCESS != (ret = metadata_write_str(handle_info, key_str, value))) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    }
    else {
        exit_status = ORTE_ERROR;
        goto cleanup;
    }

 cleanup:
    if( NULL != key_str ) {
        free(key_str);
        key_str = NULL;
    }

    return exit_status;
}
Ejemplo n.º 3
0
void orte_sstore_central_local_recv(int status,
                                    orte_process_name_t* sender,
                                    opal_buffer_t* buffer,
                                    orte_rml_tag_t tag,
                                    void* cbdata)
{
    int ret;
    orte_sstore_central_cmd_flag_t command;
    orte_std_cntr_t count;
    orte_sstore_base_handle_t loc_id;
    orte_sstore_central_local_snapshot_info_t *handle_info = NULL;

    if( ORTE_RML_TAG_SSTORE_INTERNAL != tag ) {
        return;
    }

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(local): process_cmd(%s)",
                         ORTE_NAME_PRINT(sender)));

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &command, &count, ORTE_SSTORE_CENTRAL_CMD))) {
        ORTE_ERROR_LOG(ret);
        goto cleanup;
    }

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &loc_id, &count, ORTE_SSTORE_HANDLE )) ) {
        ORTE_ERROR_LOG(ret);
        goto cleanup;
    }

    /*
     * Find the referenced handle (Create if it does not exist)
     */
    if(NULL == (handle_info = find_handle_info(loc_id)) ) {
        handle_info = create_new_handle_info(loc_id);
    }

    /*
     * Process the command
     */
    if( ORTE_SSTORE_CENTRAL_PULL == command ) {
        if(OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, sender)) {
            process_global_pull(sender, buffer, handle_info);
        } else {
            process_app_pull(sender, buffer, handle_info);
        }
    }
    else if( ORTE_SSTORE_CENTRAL_PUSH == command ) {
        if(OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, sender)) {
            process_global_push(sender, buffer, handle_info);
        } else {
            process_app_push(sender, buffer, handle_info);
        }
    }

 cleanup:
    return;
}
Ejemplo n.º 4
0
int orte_sstore_central_global_register(orte_sstore_base_handle_t handle)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_sstore_central_global_snapshot_info_t *handle_info = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(global): register(%d) - Global", handle));

    /*
     * Lookup the handle
     */
    handle_info = find_handle_info(handle);
    if( SSTORE_GLOBAL_REG != handle_info->state ) {
        handle_info->state = SSTORE_GLOBAL_REG;
    } else {
        return orte_sstore_central_local_register(handle);
    }

    orte_sstore_handle_current = handle;

    /*
     * Associate the metadata
     */
    if( handle_info->migrating ) {
        if( ORTE_SUCCESS != (ret = metadata_write_int(handle_info,
                                                      SSTORE_METADATA_INTERNAL_MIG_SEQ_STR,
                                                      handle_info->seq_num)) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    } else {
        if( ORTE_SUCCESS != (ret = metadata_write_int(handle_info,
                                                      SSTORE_METADATA_GLOBAL_SNAP_SEQ_STR,
                                                      handle_info->seq_num)) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    }

    if( ORTE_SUCCESS != (ret = metadata_write_str(handle_info,
                                                  SSTORE_METADATA_LOCAL_SNAP_REF_FMT_STR,
                                                  orte_sstore_base_local_snapshot_fmt)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    if( ORTE_SUCCESS != (ret = metadata_write_timestamp(handle_info)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

 cleanup:
    return exit_status;
}
Ejemplo n.º 5
0
int orte_sstore_central_global_pack(orte_process_name_t* peer, opal_buffer_t* buffer, orte_sstore_base_handle_t handle)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_sstore_central_global_snapshot_info_t *handle_info = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(global): pack()"));

    /*
     * Lookup the handle
     */
    handle_info = find_handle_info(handle);

    /*
     * Pack the handle ID
     */
    if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &handle, 1, ORTE_SSTORE_HANDLE))) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(global): pack(%d, %d, %s)",
                         handle_info->id,
                         handle_info->seq_num,
                         handle_info->ref_name));

    /*
     * Pack any metadata
     */
    if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &(handle_info->seq_num), 1, OPAL_INT )) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &(handle_info->ref_name), 1, OPAL_STRING )) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    if (ORTE_SUCCESS != (ret = opal_dss.pack(buffer, &(handle_info->app_location_fmt), 1, OPAL_STRING )) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

 cleanup:
    return exit_status;
}
Ejemplo n.º 6
0
int orte_sstore_central_global_get_attr(orte_sstore_base_handle_t handle, orte_sstore_base_key_t key, char **value)
{
    int exit_status = ORTE_SUCCESS;
    orte_sstore_central_global_snapshot_info_t *handle_info = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(global): get_attr()"));

    /*
     * Lookup the handle
     */
    handle_info = find_handle_info(handle);

    /*
     * Access metadata
     */
    if( SSTORE_METADATA_GLOBAL_SNAP_REF == key ) {
        *value = strdup(handle_info->ref_name);
    }
    else if( SSTORE_METADATA_GLOBAL_SNAP_SEQ == key ) {
        asprintf(value, "%d", handle_info->seq_num);
    }
    else if( SSTORE_METADATA_LOCAL_SNAP_REF_FMT == key ) {
        *value = strdup(orte_sstore_base_local_snapshot_fmt);
    }
    /* 'central' does not cache, so these are the same */
    else if( SSTORE_METADATA_LOCAL_SNAP_LOC       == key ) {
        asprintf(value, "%s/%s/%d",
                 handle_info->base_location,
                 handle_info->ref_name,
                 handle_info->seq_num);
    }
    else if( SSTORE_METADATA_LOCAL_SNAP_REF_LOC_FMT == key ) {
        asprintf(value, "%s/%s/%d/%s",
                 handle_info->base_location,
                 handle_info->ref_name,
                 handle_info->seq_num,
                 orte_sstore_base_local_snapshot_fmt);
    }
    else {
        exit_status = ORTE_ERR_NOT_SUPPORTED;
    }

    return exit_status;
}
Ejemplo n.º 7
0
int orte_sstore_central_local_unpack(orte_process_name_t* peer, opal_buffer_t* buffer, orte_sstore_base_handle_t *handle)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_sstore_central_local_snapshot_info_t *handle_info = NULL;
    orte_std_cntr_t count;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(local): unpack()"));

    /*
     * Unpack the handle id
     */
    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, handle, &count, ORTE_SSTORE_HANDLE))) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Lookup the handle
     */
    if( NULL == (handle_info = find_handle_info(*handle)) ) {
        handle_info = create_new_handle_info(*handle);
    }

    /*
     * Unpack the metadata piggybacked on this message
     */
    if( ORTE_SUCCESS != (ret = process_global_push(peer, buffer, handle_info))) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(local): unpack(%d, %d, %s)",
                         handle_info->id,
                         handle_info->seq_num,
                         handle_info->global_ref_name));

 cleanup:
    return exit_status;
}
Ejemplo n.º 8
0
int orte_sstore_central_global_set_attr(orte_sstore_base_handle_t handle, orte_sstore_base_key_t key, char *value)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_sstore_central_global_snapshot_info_t *handle_info = NULL;
    char *key_str = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(global): set_attr()"));

    /*
     * Lookup the handle
     */
    handle_info = find_handle_info(handle);

    /*
     * Process key (Access metadata)
     */
    if( key == SSTORE_METADATA_GLOBAL_MIGRATING ) {
        handle_info->migrating = true;
    }
    else {
        orte_sstore_base_convert_key_to_string(key, &key_str);
        if( NULL == key_str ) {
            ORTE_ERROR_LOG(ORTE_ERROR);
            exit_status = ORTE_ERROR;
            goto cleanup;
        }

        if( ORTE_SUCCESS != (ret = metadata_write_str(handle_info, key_str, value))) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    }

 cleanup:
    if( NULL != key_str ) {
        free(key_str);
        key_str = NULL;
    }

    return exit_status;
}
Ejemplo n.º 9
0
int orte_sstore_central_app_sync(orte_sstore_base_handle_t handle)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_sstore_central_app_snapshot_info_t *handle_info = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(app): sync()"));

    /*
     * Lookup the handle
     */
    handle_info = find_handle_info(handle);

    /*
     * Finalize and close the metadata
     */
    if( ORTE_SUCCESS != (ret = metadata_write_timestamp(handle_info)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    if( ORTE_SUCCESS != (ret = metadata_close(handle_info)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Push information to the Local coordinator
     */
    if( ORTE_SUCCESS != (ret = push_handle_info(handle_info)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

 cleanup:
    orte_sstore_handle_current = ORTE_SSTORE_HANDLE_INVALID;

    return exit_status;
}
Ejemplo n.º 10
0
int orte_sstore_central_app_get_attr(orte_sstore_base_handle_t handle, orte_sstore_base_key_t key, char **value)
{
    int exit_status = ORTE_SUCCESS;
    orte_sstore_central_app_snapshot_info_t *handle_info = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(app): get_attr(%d)", key));

    /*
     * Lookup the handle
     */
    handle_info = find_handle_info(handle);

    /*
     * Access metadata
     */
    if( SSTORE_METADATA_GLOBAL_SNAP_SEQ == key ) {
        asprintf(value, "%d", handle_info->seq_num);
        OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                             "sstore:central:(app): get_attr(%d, %d) Seq = <%s>", key, handle_info->id, *value));
    }
    else if( SSTORE_METADATA_LOCAL_SNAP_LOC == key) {
        *value = strdup(handle_info->local_location);
    }
    else if( SSTORE_METADATA_LOCAL_SNAP_META == key ) {
        *value = strdup(handle_info->metadata_filename);
    }
    else if( SSTORE_METADATA_GLOBAL_SNAP_REF == key ) {
        *value = strdup(handle_info->global_ref_name);
        OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                             "sstore:central:(app): get_attr(%d, %d) Ref = <%s>", key, handle_info->id, *value));
    }
    else {
        exit_status = ORTE_ERR_NOT_SUPPORTED;
        goto cleanup;
    }

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(app): get_attr(%d, %d) <%s>", key, handle_info->id, *value));
 cleanup:
    return exit_status;
}
Ejemplo n.º 11
0
int orte_sstore_central_app_register(orte_sstore_base_handle_t handle)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_sstore_central_app_snapshot_info_t *handle_info = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(app): register(%d)", (int)handle));

    /*
     * Create a handle
     */
    orte_sstore_handle_current = handle;
    handle_info = find_handle_info(handle);
    if( NULL != handle_info ) {
        /* Remove the old, stale handle */
        opal_list_remove_item(active_handles, &(handle_info->super));
    }
    handle_info = create_new_handle_info(handle);

    /*
     * Get basic information from Local SStore
     */
    if( ORTE_SUCCESS != (ret = pull_handle_info(handle_info)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Setup the storage directory
     */
    if( ORTE_SUCCESS != (ret = init_local_snapshot_directory(handle_info)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

 cleanup:
    return exit_status;
}
Ejemplo n.º 12
0
int orte_sstore_central_local_sync(orte_sstore_base_handle_t handle)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_sstore_central_local_snapshot_info_t *handle_info = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(local): sync()"));

    /*
     * Lookup the handle
     */
    handle_info = find_handle_info(handle);

    /*
     * Wait for all of the applications to update their metadata
     */
    if( ORTE_SUCCESS != (ret = wait_all_apps_updated(handle_info))) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /*
     * Push information to the Global coordinator
     */
    if( ORTE_SUCCESS != (ret = push_handle_info(handle_info)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    handle_info->status = SSTORE_LOCAL_SYNCED;

 cleanup:
    return exit_status;
}
Ejemplo n.º 13
0
static void sstore_central_global_recv(int status,
                                       orte_process_name_t* sender,
                                       opal_buffer_t* buffer,
                                       orte_rml_tag_t tag,
                                       void* cbdata)
{
    int ret;
    orte_sstore_central_cmd_flag_t command;
    orte_std_cntr_t count;
    orte_sstore_base_handle_t loc_id;
    orte_sstore_central_global_snapshot_info_t *handle_info = NULL;

    if( ORTE_RML_TAG_SSTORE_INTERNAL != tag ) {
        return;
    }

    /*
     * If this was an application process contacting us, then act like an orted
     * instead of an HNP
     */
    if(OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_JOBID,
                                                   ORTE_PROC_MY_NAME,
                                                   sender)) {
        orte_sstore_central_local_recv(status, sender, buffer, tag, cbdata);
        return;
    }


    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(global): process_cmd(%s)",
                         ORTE_NAME_PRINT(sender)));

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &command, &count, ORTE_SSTORE_CENTRAL_CMD))) {
        ORTE_ERROR_LOG(ret);
        goto cleanup;
    }

    count = 1;
    if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &loc_id, &count, ORTE_SSTORE_HANDLE )) ) {
        ORTE_ERROR_LOG(ret);
        goto cleanup;
    }

    /*
     * Find the referenced handle
     */
    if(NULL == (handle_info = find_handle_info(loc_id)) ) {
        ; /* JJH big problem */
    }

    /*
     * Process the command
     */
    if( ORTE_SSTORE_CENTRAL_PULL == command ) {
        process_local_pull(sender, buffer, handle_info);
    }
    else if( ORTE_SSTORE_CENTRAL_PUSH == command ) {
        process_local_push(sender, buffer, handle_info);
    }

 cleanup:
    return;
}
Ejemplo n.º 14
0
int orte_sstore_central_global_sync(orte_sstore_base_handle_t handle)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_sstore_central_global_snapshot_info_t *handle_info = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(global): sync()"));

    /*
     * Lookup the handle
     */
    handle_info = find_handle_info(handle);
    if( SSTORE_GLOBAL_SYNCING != handle_info->state ) {
        handle_info->state = SSTORE_GLOBAL_SYNCING;
        if( ORTE_SNAPC_LOCAL_COORD_TYPE == (orte_snapc_coord_type & ORTE_SNAPC_LOCAL_COORD_TYPE) ) {
            return orte_sstore_central_local_sync(handle);
        }
    }

    /*
     * Synchronize all of the files
     */
    while(handle_info->num_procs_synced < handle_info->num_procs_total) {
        opal_progress();
    }

    /*
     * Finalize and close the metadata
     */
    if( ORTE_SUCCESS != (ret = metadata_write_timestamp(handle_info)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    if( handle_info->migrating ) {
        if( ORTE_SUCCESS != (ret = metadata_write_int(handle_info,
                                                      SSTORE_METADATA_INTERNAL_DONE_MIG_SEQ_STR,
                                                      handle_info->seq_num)) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    } else {
        if( ORTE_SUCCESS != (ret = metadata_write_int(handle_info,
                                                      SSTORE_METADATA_INTERNAL_DONE_SEQ_STR,
                                                      handle_info->seq_num)) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    }

    if( ORTE_SUCCESS != (ret = metadata_close(handle_info)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    /* JJH: We should lock this var! */
    if( !handle_info->migrating ) {
        orte_sstore_base_is_checkpoint_available = true;
        orte_sstore_handle_last_stable = orte_sstore_handle_current;
    }

 cleanup:
    return exit_status;
}