示例#1
0
int orte_sstore_central_global_request_global_snapshot_data(orte_sstore_base_handle_t *handle,
                                                            orte_sstore_base_global_snapshot_info_t *snapshot)
{
    int ret, exit_status = ORTE_SUCCESS;
    orte_sstore_central_global_snapshot_info_t *handle_info = NULL;

    OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle,
                         "sstore:central:(global): request_global_snapshot_data()"));

    /*
     * Lookup the handle (if NULL, use last stable)
     */
    if( NULL != handle ) {
        handle_info = find_handle_info(*handle);
        snapshot->ss_handle = *handle;
    } else {
        handle_info = find_handle_info(orte_sstore_handle_last_stable);
        snapshot->ss_handle = orte_sstore_handle_last_stable;
    }

    /*
     * Construct the snapshot from local data, and metadata file
     */
    snapshot->seq_num   = handle_info->seq_num;
    snapshot->reference = strdup(handle_info->ref_name);
    snapshot->basedir   = strdup(handle_info->base_location); 
    snapshot->metadata_filename = strdup(handle_info->metadata_filename);

    /* If this is the current checkpoint, pull data from local cache */
    if( orte_sstore_handle_current == snapshot->ss_handle ) {
        if( ORTE_SUCCESS != (ret = orte_sstore_central_extract_global_metadata(handle_info, snapshot)) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    }
    /* Otherwise, pull from metadata */
    else {
        if( ORTE_SUCCESS != (ret = orte_sstore_base_extract_global_metadata(snapshot)) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }
    }

    opal_list_sort(&snapshot->local_snapshots, central_snapshot_sort_compare_fn);

 cleanup:
    return exit_status;
}
示例#2
0
/*
 * Find all available RAS components and sort them according to
 * priority
 */
int orte_ras_base_find_available(void)
{
    opal_list_item_t *item;
    mca_base_component_list_item_t *cli;
    orte_ras_base_component_t *component;
    orte_ras_base_module_t *module;
    int priority, rc;
    orte_ras_base_cmp_t *cmp;

    orte_ras_base.ras_available_valid = false;
    
    if (orte_ras_base.ras_opened_valid) {
        OBJ_CONSTRUCT(&orte_ras_base.ras_available, opal_list_t);
        orte_ras_base.ras_available_valid = true;

        for (item = opal_list_get_first(&orte_ras_base.ras_opened);
             opal_list_get_end(&orte_ras_base.ras_opened) != item;
             item = opal_list_get_next(item)) {
            cli = (mca_base_component_list_item_t *) item;
            component = (orte_ras_base_component_t *) cli->cli_component;
            opal_output(orte_ras_base.ras_output,
                        "orte:ras:base:open: querying component %s",
                        component->ras_version.mca_component_name);

            /* Call the component's init function and see if it wants to be
               selected */

            module = component->ras_init(&priority);

            /* If we got a non-NULL module back, then the component wants
               to be considered for selection */

            if (NULL != module) {
                opal_output(orte_ras_base.ras_output,
                            "orte:ras:base:open: component %s returns priority %d",
                            component->ras_version.mca_component_name,
                            priority);

                cmp = OBJ_NEW(orte_ras_base_cmp_t);
                cmp->component = component;
                cmp->module = module;
                cmp->priority = priority;

                opal_list_append(&orte_ras_base.ras_available, &cmp->super);
            } else {
                opal_output(orte_ras_base.ras_output,
                            "orte:ras:base:open: component %s does NOT want to be considered for selection",
                            component->ras_version.mca_component_name);
            }
        }

        /* Sort the resulting available list in priority order */
        opal_list_sort(&orte_ras_base.ras_available, compare);

        /* if we are an HNP, start the receive */
        if (orte_process_info.seed) {
            if (ORTE_SUCCESS  != (rc = orte_ras_base_comm_start())) {
                ORTE_ERROR_LOG(rc);
                return rc;
            }
        }
    }
    
    return ORTE_SUCCESS;
}
示例#3
0
static void errmgr_autor_recover_processes(int fd, short event, void *cbdata)
{
    int ret, exit_status = ORTE_SUCCESS;
    opal_list_item_t *item = NULL;
    errmgr_autor_wp_item_t *wp_item = NULL;
    orte_std_cntr_t i_proc;
    orte_proc_t *proc = NULL;
    orte_sstore_base_global_snapshot_info_t *snapshot = NULL;
    char * tmp_str = NULL;

    autor_mask_faults = true;
    ERRMGR_AUTOR_CLEAR_TIMERS();
    ERRMGR_AUTOR_SET_TIMER(ERRMGR_AUTOR_TIMER_START);

    /*
     * Display the processes that are to be recovered
     */
    OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle,
                         "%s errmgr:hnp(autor):recover() "
                         "------- Display known failed processes in the job %s -------",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(current_global_jobdata->jobid)));

    opal_list_sort(procs_pending_recovery, autor_procs_sort_compare_fn);
    display_procs();

    /*
     * Find the latest checkpoint
     */
    OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle,
                         "%s errmgr:hnp(autor):recover() "
                         "------- Find the latest checkpoint for the job %s -------",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(current_global_jobdata->jobid)));

    snapshot = OBJ_NEW(orte_sstore_base_global_snapshot_info_t);
    if( ORTE_SUCCESS != (ret = orte_sstore.request_global_snapshot_data(&orte_sstore_handle_last_stable, snapshot)) ) {
        ORTE_ERROR_LOG(ret);
        exit_status = ret;
        goto cleanup;
    }

    ERRMGR_AUTOR_SET_TIMER(ERRMGR_AUTOR_TIMER_SETUP);

    /*
     * Safely terminate the entire job
     */
    opal_output_verbose(10, mca_errmgr_hnp_component.super.output_handle,
                        "errmgr:hnp(autor):recover() "
                        "------- Safely terminate the job %s -------",
                        ORTE_JOBID_PRINT(current_global_jobdata->jobid));

    for(i_proc = 0; i_proc < opal_pointer_array_get_size(current_global_jobdata->procs); ++i_proc) {
        proc = (orte_proc_t*)opal_pointer_array_get_item(current_global_jobdata->procs, i_proc);
        if( NULL == proc ) {
            continue;
        }
        if( proc->state < ORTE_PROC_STATE_UNTERMINATED ) {
            proc->state = ORTE_PROC_STATE_MIGRATING;
        }
        if( current_global_jobdata->stdin_target == proc->name.vpid ) {
            orte_iof.close(&(proc->name), ORTE_IOF_STDIN);
        }
    }

    orte_plm.terminate_procs(current_global_jobdata->procs);

    /*
     * Wait for the job to terminate all processes
     */
    while(!check_if_terminated(current_global_jobdata->procs) ) {
        opal_progress();
    }

    ERRMGR_AUTOR_SET_TIMER(ERRMGR_AUTOR_TIMER_TERM);

    opal_output_verbose(10, mca_errmgr_hnp_component.super.output_handle,
                        "errmgr:hnp(autor):recover() "
                        "------- Done waiting for termination of job %s -------",
                        ORTE_JOBID_PRINT(current_global_jobdata->jobid));
    current_global_jobdata->num_terminated = current_global_jobdata->num_procs;
    orte_plm_base_reset_job(current_global_jobdata);

    /*
     * Construct the app contexts to restart
     */
    OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle,
                         "%s errmgr:hnp(autor):recover() "
                         "------- Rebuild job %s app context -------",
                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
                         ORTE_JOBID_PRINT(current_global_jobdata->jobid)));
    for(i_proc = 0; i_proc < opal_pointer_array_get_size(current_global_jobdata->procs); ++i_proc) {
        proc = (orte_proc_t*)opal_pointer_array_get_item(current_global_jobdata->procs, i_proc);
        if( NULL == proc ) {
            continue;
        }

        if( ORTE_SUCCESS != (ret = orte_errmgr_base_update_app_context_for_cr_recovery(current_global_jobdata,
                                                                                       proc,
                                                                                       &(snapshot->local_snapshots))) ) {
            ORTE_ERROR_LOG(ret);
            exit_status = ret;
            goto cleanup;
        }

        OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle,
                             "\tAdjusted: \"%s\" [0x%d] [%s]\n",
                             ORTE_NAME_PRINT(&proc->name), proc->state, proc->node->name));
    }

    ERRMGR_AUTOR_SET_TIMER(ERRMGR_AUTOR_TIMER_RESETUP);

    /*
     * Spawn the restarted job
     */
    opal_output_verbose(10, mca_errmgr_hnp_component.super.output_handle,
                        "errmgr:hnp(autor):recover() "
                        "------- Respawning the job %s -------",
                        ORTE_JOBID_PRINT(current_global_jobdata->jobid));
    orte_snapc_base_has_recovered = false;
    autor_mask_faults = false; /* Failures pass this point are worth noting */
    orte_plm.spawn(current_global_jobdata);

    /*
     * Wait for all the processes to restart
     */
    opal_output_verbose(10, mca_errmgr_hnp_component.super.output_handle,
                        "errmgr:hnp(autor):recover() "
                        "------- Waiting for restart -------");
    while(!check_if_restarted(current_global_jobdata->procs) ) {
        opal_progress();
    }

    ERRMGR_AUTOR_SET_TIMER(ERRMGR_AUTOR_TIMER_RESTART);

    /*
     * All done
     */
    while( !orte_snapc_base_has_recovered ) {
        opal_progress();
    }

    opal_output_verbose(10, mca_errmgr_hnp_component.super.output_handle,
                        "errmgr:hnp(autor):recover() "
                        "------- Finished recovering job %s -------",
                        ORTE_JOBID_PRINT(current_global_jobdata->jobid));

    opal_show_help("help-orte-errmgr-hnp.txt", "autor_recovery_complete", true);

    ERRMGR_AUTOR_SET_TIMER(ERRMGR_AUTOR_TIMER_FINISH);

 cleanup:
    while(NULL != (item = opal_list_remove_first(procs_pending_recovery))) {
        wp_item = (errmgr_autor_wp_item_t*)item;
        OBJ_RELEASE(wp_item);
    }

    if( NULL != tmp_str ) {
        free(tmp_str);
        tmp_str = NULL;
    }

    ERRMGR_AUTOR_DISPLAY_ALL_TIMERS();

    autor_timer_active = false;
    autor_mask_faults  = false;

    return;
}
示例#4
0
static int
orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename,
                                unsigned int *uMe)
{
    int             iq;
    int             ix;
    int             iFd;                    /* file descriptor for appinfo    */
    int             iTrips;                 /* counter appinfo read attempts  */
    int             max_appinfo_read_attempts;
    struct stat     ssBuf;                  /* stat buffer                    */
    size_t          szLen;                  /* size of appinfo (file)         */
    off_t           oNow;                   /* current appinfo data offset    */
    off_t           oInfo=sizeof(appInfoHdr_t);
    off_t           oDet=sizeof(appInfo_t);
    off_t           oSlots;
    off_t           oEntry;
    int32_t         sNodes=0;
    char            *cpBuf;
    char            *hostname;
    orte_node_t     *node = NULL;
    appInfoHdr_t    *apHdr;                 /* ALPS header structure          */
    appInfo_t       *apInfo;                /* ALPS table info structure      */
#if ALPS_APPINFO_VERSION==0
    placeList_t     *apSlots;               /* ALPS node specific info        */
#else
    orte_ras_alps_placeNodeList_t *apNodes;
#endif

    orte_ras_alps_get_appinfo_attempts(&max_appinfo_read_attempts);
    oNow=0;
    iTrips=0;
    opal_output_verbose(1, orte_ras_base_framework.framework_output,
                        "ras:alps:allocate: begin processing appinfo file");

    while(!oNow) {                          /* Until appinfo read is complete */
        iTrips++;                           /* Increment trip count           */

        iFd=open( filename, O_RDONLY );
        if( iFd==-1 ) {                     /* If file absent, ALPS is down   */
            opal_output_verbose(1, orte_ras_base_framework.framework_output,
                                "ras:alps:allocate: ALPS information open failure");
            usleep(iTrips*50000);           /* Increasing delays, .05 s/try   */

            /*          Fail only when number of attempts have been exhausted.            */
            if( iTrips <= max_appinfo_read_attempts ) continue;
            ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
            return ORTE_ERR_FILE_OPEN_FAILURE;
        }
        if( fstat( iFd, &ssBuf )==-1 ) {    /* If stat fails, access denied   */

            ORTE_ERROR_LOG(ORTE_ERR_NOT_AVAILABLE);
            return ORTE_ERR_NOT_AVAILABLE;
        }

        szLen=ssBuf.st_size;                /* Get buffer size                */
        cpBuf=malloc(szLen+1);              /* Allocate buffer                */
        if (NULL == cpBuf) {
            ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
            return ORTE_ERR_OUT_OF_RESOURCE;
        }

        /*      Repeated attempts to read appinfo, with an increasing delay between   *
         *      successive attempts to allow scheduler I/O a chance to complete.      */
        if( (oNow=read( iFd, cpBuf, szLen ))!=(off_t)szLen ) {

            /*          This is where apstat fails; we will record it and try again.      */
            opal_output_verbose(1, orte_ras_base_framework.framework_output,
                                "ras:alps:allocate: ALPS information read failure: %ld bytes", (long int)oNow);

            free(cpBuf);                    /* Free (old) buffer              */
            close(iFd);                     /* Close (old) descriptor         */
            oNow=0;                         /* Reset byte count               */
            usleep(iTrips*50000);           /* Increasing delays, .05 s/try   */

            /*          Fail only when number of attempts have been exhausted.            */
            if( iTrips<=max_appinfo_read_attempts ) continue;
            ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE);
            return ORTE_ERR_FILE_READ_FAILURE;
        }
    }
    close(iFd);
    opal_output_verbose(1, orte_ras_base_framework.framework_output,
                        "ras:alps:allocate: file %s read", filename);

    /*  Now that we have the scheduler information, we just have to parse it for  *
     *  the data that we seek.                                                    */
    oNow=0;
    apHdr=(appInfoHdr_t *)cpBuf;

    opal_output_verbose(1, orte_ras_base_framework.framework_output,
                        "ras:alps:allocate: %d entries in file", apHdr->apNum);

    /*  Header info (apHdr) tells us how many entries are in the file:            *
     *                                                                            *
     *      apHdr->apNum                                                          */

    for( iq=0; iq<apHdr->apNum; iq++ ) {    /*  Parse all entries in file     */

        /*      Just at this level, a lot of information is available:                *
         *                                                                            *
         *          apInfo->apid         ... ALPS job ID                              *
         *          apInfo->resId        ... ALPS reservation ID                      *
         *          apInfo->numCmds      ... Number of executables                    *
         *          apInfo->numPlaces    ... Number of PEs                            */
        apInfo=(appInfo_t *)(cpBuf+oNow+oInfo);

        /*      Calculate the dependent offsets.                                      */
        oSlots=sizeof(cmdDetail_t)*apInfo->numCmds;

        opal_output_verbose(1, orte_ras_base_framework.framework_output,
                            "ras:alps:allocate: read data for resId %u - myId %u",
                            apInfo->resId, *uMe);


#if ALPS_APPINFO_VERSION==0

        /*      Finally, we get to the actual node-specific information:              *
         *                                                                            *
         *          apSlots[ix].cmdIx    ... index of apDet[].cmd                     *
         *          apSlots[ix].nid      ... NodeID (NID)                             *
         *          apSlots[ix].procMask ... mask for processors... need 16-bit shift */
        apSlots=(placeList_t *)(cpBuf+oNow+oInfo+oDet+oSlots);
        oEntry=sizeof(placeList_t)*apInfo->numPlaces;

        oNow+=(oDet+oSlots+oEntry);         /* Target next slot               */

        if( apInfo->resId != *uMe ) continue; /* Filter to our reservation Id */

        /* in this early version of alps, there is one entry for each PE in the
         * allocation - so cycle across the numPlaces entries, assigning a slot
         * for each time a node is named
         */
        for( ix=0; ix<apInfo->numPlaces; ix++ ) {

            opal_output_verbose(5, orte_ras_base_framework.framework_output,
                                "ras:alps:read_appinfo: got NID %d", apSlots[ix].nid);

            asprintf( &hostname, "%d", apSlots[ix].nid );
            if (NULL == hostname) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }

            /*          If this matches the prior nodename, just add to the slot count.   */
            if( NULL!=node && !strcmp(node->name, hostname) ) {

                free(hostname);             /* free hostname since not needed */
                ++node->slots;
            } else {                        /* must be new, so add to list    */

                opal_output_verbose(1, orte_ras_base_framework.framework_output,
                                    "ras:alps:read_appinfo: added NID %d to list", apSlots[ix].nid);

                node = OBJ_NEW(orte_node_t);
                node->name = hostname;
                orte_set_attribute(&node->attributes, ORTE_NODE_LAUNCH_ID, ORTE_ATTR_LOCAL, &apSlots[ix].nid, OPAL_INT32);
                node->slots_inuse = 0;
                node->slots_max = 0;
                node->slots = 1;
                node->state = ORTE_NODE_STATE_UP;
                /* need to order these node ids so the regex generator
                 * can properly function
                 */
                /* add it to the end */
                opal_list_append(nodes, &node->super);
                sNodes++;                   /* Increment the node count       */
            }
        }
#else
        /* in newer versions of alps, there is one entry for each node in the
         * allocation, and that struct directly carries the number of PEs
         * allocated on that node to this job.
         */
        apNodes=(orte_ras_alps_placeNodeList_t *)(cpBuf+oNow+oInfo+oDet+oSlots);
        oEntry=sizeof(orte_ras_alps_placeNodeList_t)*apInfo->numPlaces;

        oNow+=(oDet+oSlots+oEntry);         /* Target next entry               */

        if( apInfo->resId != *uMe ) continue; /* Filter to our reservation Id */

        for( ix=0; ix<apInfo->numPlaces; ix++ ) {
            opal_output_verbose(5, orte_ras_base_framework.framework_output,
                                "ras:alps:read_appinfo(modern): processing NID %d with %d slots",
                                apNodes[ix].nid, apNodes[ix].numPEs);
            asprintf( &hostname, "%d", apNodes[ix].nid );
            if (NULL == hostname) {
                ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
                return ORTE_ERR_OUT_OF_RESOURCE;
            }

            node = OBJ_NEW(orte_node_t);
            node->name = hostname;
            orte_set_attribute(&node->attributes, ORTE_NODE_LAUNCH_ID, ORTE_ATTR_LOCAL, &apNodes[ix].nid, OPAL_INT32);
            node->slots_inuse = 0;
            node->slots_max = 0;
            node->slots = apNodes[ix].numPEs;
            node->state = ORTE_NODE_STATE_UP;
            /* need to order these node ids so the regex generator
             * can properly function
             */
            /* add it to the end */
            opal_list_append(nodes, &node->super);
            sNodes++;                   /* Increment the node count       */
        }
#endif
        break;                              /* Extended details ignored       */
    }

    opal_list_sort (nodes, compare_nodes);

    free(cpBuf);                            /* Free the buffer                */

    return ORTE_SUCCESS;
}
示例#5
0
文件: orte-restart.c 项目: ORNL/ompi
static int create_appfile(orte_sstore_base_global_snapshot_info_t *snapshot)
{
    int exit_status = ORTE_SUCCESS;
    FILE *appfile = NULL;
    opal_list_item_t* item = NULL;
    char *tmp_str = NULL;
    char *amca_param = NULL;
    char *tune_param = NULL;
    char *reference_fmt_str = NULL;
    char *location_str = NULL;
    char *ref_location_fmt_str = NULL;
    orte_sstore_base_local_snapshot_info_t *vpid_snapshot = NULL;

    /*
     * Create the appfile
     */
    orte_sstore.get_attr(snapshot->ss_handle,
                         SSTORE_METADATA_GLOBAL_SNAP_LOC_ABS,
                         &tmp_str);
    asprintf(&orte_restart_globals.appfile, "%s/%s",
             tmp_str,
             strdup("restart-appfile"));
    if( NULL != tmp_str ) {
        free(tmp_str);
        tmp_str = NULL;
    }

    orte_sstore.get_attr(snapshot->ss_handle,
                         SSTORE_METADATA_GLOBAL_AMCA_PARAM,
                         &amca_param);

    orte_sstore.get_attr(snapshot->ss_handle,
                         SSTORE_METADATA_GLOBAL_TUNE_PARAM,
                         &tune_param);

    if (NULL == (appfile = fopen(orte_restart_globals.appfile, "w")) ) {
        exit_status = ORTE_ERROR;
        goto cleanup;
    }

    /* This will give a format string that we can use */
    orte_sstore.get_attr(snapshot->ss_handle,
                         SSTORE_METADATA_LOCAL_SNAP_REF_FMT,
                         &reference_fmt_str);
    orte_sstore.get_attr(snapshot->ss_handle,
                         SSTORE_METADATA_LOCAL_SNAP_LOC,
                         &location_str);
    orte_sstore.get_attr(snapshot->ss_handle,
                         SSTORE_METADATA_LOCAL_SNAP_REF_LOC_FMT,
                         &ref_location_fmt_str);

    /*
     * Sort the snapshots so that they are in order
     */
    opal_list_sort(&snapshot->local_snapshots, snapshot_sort_compare_fn);

    /*
     * Construct the appfile
     */
    for(item  = opal_list_get_first(&snapshot->local_snapshots);
        item != opal_list_get_end(&snapshot->local_snapshots);
        item  = opal_list_get_next(item) ) {
        vpid_snapshot = (orte_sstore_base_local_snapshot_info_t*)item;
        
        fprintf(appfile, "#\n");
        fprintf(appfile, "# Old Process Name: %u.%u\n", 
                vpid_snapshot->process_name.jobid,
                vpid_snapshot->process_name.vpid);
        fprintf(appfile, "#\n");
        fprintf(appfile, "-np 1 ");

        fprintf(appfile, "--sstore-load ");
        /* loc:ref:postfix:seq */
        fprintf(appfile, "%s:%s:",
                location_str,
                orte_restart_globals.snapshot_ref);
        fprintf(appfile, reference_fmt_str, vpid_snapshot->process_name.vpid);
        fprintf(appfile, ":%s:%s:%d ",
                (vpid_snapshot->compress_comp == NULL ? "" : vpid_snapshot->compress_comp),
                (vpid_snapshot->compress_postfix == NULL ? "" : vpid_snapshot->compress_postfix),
                orte_restart_globals.seq_number);

        if( NULL == amca_param ) {
            amca_param = strdup("ft-enable-cr");
            opal_show_help("help-orte-restart.txt", "amca_param_not_found", true,
                           amca_param);
        }
        fprintf(appfile, "-am %s ", amca_param);

        if( NULL == tune_param ) {
            tune_param = strdup("ft-enable-cr");
            opal_show_help("help-orte-restart.txt", "tune_param_not_found", true,
                           tune_param);
        }
        fprintf(appfile, "-tune %s ", tune_param);

        fprintf(appfile, " opal-restart ");

        /*
         * By default, point to the central storage location of the checkpoint.
         * The active SStore module at restart time will determine if files
         * need to be preloaded.
         */
        fprintf(appfile, "-l %s", location_str);
        fprintf(appfile, " -m %s ", orte_sstore_base_local_metadata_filename);

        fprintf(appfile, "-r ");
        fprintf(appfile, reference_fmt_str, vpid_snapshot->process_name.vpid);

        fprintf(appfile, "\n");
    }

 cleanup:
    if(NULL != appfile) {
        fclose(appfile);
        appfile = NULL;
    }
    if( NULL != tmp_str ) {
        free(tmp_str);
        tmp_str = NULL;
    }
    if( NULL != location_str ) {
        free(location_str);
        location_str = NULL;
    }
    if( NULL != reference_fmt_str ) {
        free(reference_fmt_str);
        reference_fmt_str = NULL;
    }
    if( NULL != ref_location_fmt_str ) {
        free(ref_location_fmt_str);
        ref_location_fmt_str = NULL;
    }

    return exit_status;
}
static int mca_oob_ud_process_messages (struct ibv_cq *event_cq, mca_oob_ud_port_t *port)
{
    mca_oob_ud_msg_item_t *msg_item, *next_item;
    opal_list_t *processing_msgs = &mca_oob_ud_component.ud_event_processing_msgs;
    mca_oob_ud_peer_t *peer;
    mca_oob_ud_msg_hdr_t *msg_hdr;
    int msg_num, i, count;
    struct ibv_wc wc[40];
    bool peer_nacked;

    count = ibv_poll_cq (event_cq, 40, wc);
    if (count < 0)
        return count;

    /* acknowlege the events */
    ibv_ack_cq_events (event_cq, count);

    for (i = 0 ; i < count ; ++i) {
        msg_num = (int)(wc[i].wr_id & (~MCA_OOB_UD_RECV_WR));
        msg_hdr = (mca_oob_ud_msg_hdr_t *) (port->msg_buf.ptr + msg_num * port->mtu);

        VALGRIND_MAKE_MEM_DEFINED(msg_hdr, wc[i].byte_len);

        if (!(wc[i].wr_id & MCA_OOB_UD_RECV_WR) || IBV_WC_SUCCESS != wc[i].status) {
            mca_oob_ud_port_post_one_recv (port, msg_num);
            continue;
        }

        peer = mca_oob_ud_get_peer (port, &msg_hdr->ra.name, wc[i].src_qp, msg_hdr->ra.qkey,
                                    wc[i].slid, msg_hdr->ra.port_num);

        if (peer) {
            if (MCA_OOB_UD_MSG_ACK != msg_hdr->msg_type && MCA_OOB_UD_MSG_NACK != msg_hdr->msg_type &&
                MCA_OOB_UD_MSG_END != msg_hdr->msg_type) {
                mca_oob_ud_msg_item_t *msg_item = OBJ_NEW(mca_oob_ud_msg_item_t);

                msg_item->msg_num = msg_num;
                msg_item->hdr     = msg_hdr;
                msg_item->port    = port;
                msg_item->peer    = peer;

                opal_list_append (processing_msgs, (opal_list_item_t *) msg_item);
            } else {
                if (MCA_OOB_UD_MSG_ACK == msg_hdr->msg_type) {
                    (void) mca_oob_ud_event_handle_ack (port, peer, msg_hdr);
                } else if (MCA_OOB_UD_MSG_NACK == msg_hdr->msg_type) {
                    (void) mca_oob_ud_event_handle_nack (port, peer, msg_hdr);
                } else {
                    mca_oob_ud_event_handle_end (peer, msg_hdr);
                }

                mca_oob_ud_port_post_one_recv (port, msg_num);
            }
        } else {
            OPAL_OUTPUT_VERBOSE((10, mca_oob_base_output, "%s oob:ud:process_message got a null peer for message id %"
                                 PRIu64, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr->msg_id));
            mca_oob_ud_port_post_one_recv (port, msg_num);
        }
    }

    /* Sort messages by peer then id */
    opal_list_sort (processing_msgs, mca_oob_ud_msg_item_cmp);

    /* Send ACKs/NACKs and throw away out-of-order messages */
    msg_item = (mca_oob_ud_msg_item_t *) mca_oob_ud_list_get_first (processing_msgs);

    for (peer = NULL, peer_nacked = false ; NULL != msg_item ; msg_item = next_item) {
        if (peer != msg_item->peer) {
            peer_nacked = false;
        }

        peer = msg_item->peer;

        next_item = (mca_oob_ud_msg_item_t *) mca_oob_ud_list_get_next (processing_msgs,
                                                                        (opal_list_item_t *)msg_item);

        if (false == peer_nacked) {
            if (msg_item->hdr->msg_id > peer->peer_expected_id) {
                (void) mca_oob_ud_event_send_nack (msg_item->port, peer, msg_item->hdr);
                peer_nacked = true;
            } else if (NULL == next_item || (next_item->peer != msg_item->peer)) {
                (void) mca_oob_ud_event_send_ack (msg_item->port, msg_item->peer, msg_item->hdr);
            }
        }

        if (msg_item->hdr->msg_id != peer->peer_expected_id) {
            opal_list_remove_item (processing_msgs, (opal_list_item_t *) msg_item);
            OBJ_RELEASE(msg_item);
        } else {
            peer->peer_expected_id++;
        }
    }

    /* Process remaining messages */
    while (NULL !=
           (msg_item = (mca_oob_ud_msg_item_t *) opal_list_remove_first (processing_msgs))) {
        switch (msg_item->hdr->msg_type) {
        case MCA_OOB_UD_MSG_REQUEST:
            mca_oob_ud_event_handle_req (port, msg_item->peer, msg_item->hdr);
            break;
        case MCA_OOB_UD_MSG_REPLY:
            mca_oob_ud_event_handle_rep (port, msg_item->hdr);
            break;
        case MCA_OOB_UD_MSG_COMPLETE:
            mca_oob_ud_event_handle_completion (port, msg_item->hdr);
            break;
        case MCA_OOB_UD_MSG_DATA_OK:
            mca_oob_ud_event_handle_data_ok (port, msg_item->hdr);
            break;
        case MCA_OOB_UD_MSG_END:
            mca_oob_ud_event_handle_end (peer, msg_item->hdr);
            break;
        default:
            /* do nothing */
            break;
        }

        OBJ_RELEASE(msg_item);
    }

    return count;
}
示例#7
0
static int
start_recover(void)
{
    int ret;
    int64_t epoch_counter;

    ompi_mtl_portals4.flowctl.flowctl_active = true;
    epoch_counter = opal_atomic_add_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1);

    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                        "Entering flowctl_start_recover %ld",
                        epoch_counter);

    /* re-arm trigger/alarm for next time */
    ret = setup_alarm(epoch_counter);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d setup_alarm failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    /* setup barrier tree for getting us out of flow control */
    ret = setup_barrier(epoch_counter);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d setup_barrier failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    /* drain all pending sends */
    while (ompi_mtl_portals4.flowctl.send_slots != 
           ompi_mtl_portals4.flowctl.max_send_slots) {
        opal_progress();
    }

    /* drain event queue */
    while (0 != ompi_mtl_portals4_progress()) { ; }

    /* check short block active count */
    ret = ompi_mtl_portals4_recv_short_link(1);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: recv_short_link failed: %d",
                            __FILE__, __LINE__, ret);
    }

    /* reorder the pending sends by operation count */
    ret = opal_list_sort(&ompi_mtl_portals4.flowctl.pending_sends, seqnum_compare);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d opal_list_sort failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    /* drain event queue again, just to make sure */
    while (0 != ompi_mtl_portals4_progress()) { ; }

    /* send barrier entry message */
    ret = PtlPut(ompi_mtl_portals4.zero_md_h,
                 0,
                 0,
                 PTL_NO_ACK_REQ,
                 ompi_mtl_portals4.flowctl.me,
                 ompi_mtl_portals4.flowctl_idx,
                 MTL_PORTALS4_FLOWCTL_FANIN,
                 0,
                 NULL,
                 0);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPut failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* recovery complete when fan-out event arrives, async event, so
       we're done now */
    ret = OMPI_SUCCESS;

 error:
    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "Exiting flowctl_start_recover %ld",
                         epoch_counter));

    return ret;
}