int orte_sstore_central_global_request_global_snapshot_data(orte_sstore_base_handle_t *handle, orte_sstore_base_global_snapshot_info_t *snapshot) { int ret, exit_status = ORTE_SUCCESS; orte_sstore_central_global_snapshot_info_t *handle_info = NULL; OPAL_OUTPUT_VERBOSE((10, mca_sstore_central_component.super.output_handle, "sstore:central:(global): request_global_snapshot_data()")); /* * Lookup the handle (if NULL, use last stable) */ if( NULL != handle ) { handle_info = find_handle_info(*handle); snapshot->ss_handle = *handle; } else { handle_info = find_handle_info(orte_sstore_handle_last_stable); snapshot->ss_handle = orte_sstore_handle_last_stable; } /* * Construct the snapshot from local data, and metadata file */ snapshot->seq_num = handle_info->seq_num; snapshot->reference = strdup(handle_info->ref_name); snapshot->basedir = strdup(handle_info->base_location); snapshot->metadata_filename = strdup(handle_info->metadata_filename); /* If this is the current checkpoint, pull data from local cache */ if( orte_sstore_handle_current == snapshot->ss_handle ) { if( ORTE_SUCCESS != (ret = orte_sstore_central_extract_global_metadata(handle_info, snapshot)) ) { ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } } /* Otherwise, pull from metadata */ else { if( ORTE_SUCCESS != (ret = orte_sstore_base_extract_global_metadata(snapshot)) ) { ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } } opal_list_sort(&snapshot->local_snapshots, central_snapshot_sort_compare_fn); cleanup: return exit_status; }
/* * Find all available RAS components and sort them according to * priority */ int orte_ras_base_find_available(void) { opal_list_item_t *item; mca_base_component_list_item_t *cli; orte_ras_base_component_t *component; orte_ras_base_module_t *module; int priority, rc; orte_ras_base_cmp_t *cmp; orte_ras_base.ras_available_valid = false; if (orte_ras_base.ras_opened_valid) { OBJ_CONSTRUCT(&orte_ras_base.ras_available, opal_list_t); orte_ras_base.ras_available_valid = true; for (item = opal_list_get_first(&orte_ras_base.ras_opened); opal_list_get_end(&orte_ras_base.ras_opened) != item; item = opal_list_get_next(item)) { cli = (mca_base_component_list_item_t *) item; component = (orte_ras_base_component_t *) cli->cli_component; opal_output(orte_ras_base.ras_output, "orte:ras:base:open: querying component %s", component->ras_version.mca_component_name); /* Call the component's init function and see if it wants to be selected */ module = component->ras_init(&priority); /* If we got a non-NULL module back, then the component wants to be considered for selection */ if (NULL != module) { opal_output(orte_ras_base.ras_output, "orte:ras:base:open: component %s returns priority %d", component->ras_version.mca_component_name, priority); cmp = OBJ_NEW(orte_ras_base_cmp_t); cmp->component = component; cmp->module = module; cmp->priority = priority; opal_list_append(&orte_ras_base.ras_available, &cmp->super); } else { opal_output(orte_ras_base.ras_output, "orte:ras:base:open: component %s does NOT want to be considered for selection", component->ras_version.mca_component_name); } } /* Sort the resulting available list in priority order */ opal_list_sort(&orte_ras_base.ras_available, compare); /* if we are an HNP, start the receive */ if (orte_process_info.seed) { if (ORTE_SUCCESS != (rc = orte_ras_base_comm_start())) { ORTE_ERROR_LOG(rc); return rc; } } } return ORTE_SUCCESS; }
static void errmgr_autor_recover_processes(int fd, short event, void *cbdata) { int ret, exit_status = ORTE_SUCCESS; opal_list_item_t *item = NULL; errmgr_autor_wp_item_t *wp_item = NULL; orte_std_cntr_t i_proc; orte_proc_t *proc = NULL; orte_sstore_base_global_snapshot_info_t *snapshot = NULL; char * tmp_str = NULL; autor_mask_faults = true; ERRMGR_AUTOR_CLEAR_TIMERS(); ERRMGR_AUTOR_SET_TIMER(ERRMGR_AUTOR_TIMER_START); /* * Display the processes that are to be recovered */ OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle, "%s errmgr:hnp(autor):recover() " "------- Display known failed processes in the job %s -------", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(current_global_jobdata->jobid))); opal_list_sort(procs_pending_recovery, autor_procs_sort_compare_fn); display_procs(); /* * Find the latest checkpoint */ OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle, "%s errmgr:hnp(autor):recover() " "------- Find the latest checkpoint for the job %s -------", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(current_global_jobdata->jobid))); snapshot = OBJ_NEW(orte_sstore_base_global_snapshot_info_t); if( ORTE_SUCCESS != (ret = orte_sstore.request_global_snapshot_data(&orte_sstore_handle_last_stable, snapshot)) ) { ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } ERRMGR_AUTOR_SET_TIMER(ERRMGR_AUTOR_TIMER_SETUP); /* * Safely terminate the entire job */ opal_output_verbose(10, mca_errmgr_hnp_component.super.output_handle, "errmgr:hnp(autor):recover() " "------- Safely terminate the job %s -------", ORTE_JOBID_PRINT(current_global_jobdata->jobid)); for(i_proc = 0; i_proc < opal_pointer_array_get_size(current_global_jobdata->procs); ++i_proc) { proc = (orte_proc_t*)opal_pointer_array_get_item(current_global_jobdata->procs, i_proc); if( NULL == proc ) { continue; } if( proc->state < ORTE_PROC_STATE_UNTERMINATED ) { proc->state = ORTE_PROC_STATE_MIGRATING; } if( current_global_jobdata->stdin_target == proc->name.vpid ) { orte_iof.close(&(proc->name), ORTE_IOF_STDIN); } } orte_plm.terminate_procs(current_global_jobdata->procs); /* * Wait for the job to terminate all processes */ while(!check_if_terminated(current_global_jobdata->procs) ) { opal_progress(); } ERRMGR_AUTOR_SET_TIMER(ERRMGR_AUTOR_TIMER_TERM); opal_output_verbose(10, mca_errmgr_hnp_component.super.output_handle, "errmgr:hnp(autor):recover() " "------- Done waiting for termination of job %s -------", ORTE_JOBID_PRINT(current_global_jobdata->jobid)); current_global_jobdata->num_terminated = current_global_jobdata->num_procs; orte_plm_base_reset_job(current_global_jobdata); /* * Construct the app contexts to restart */ OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle, "%s errmgr:hnp(autor):recover() " "------- Rebuild job %s app context -------", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(current_global_jobdata->jobid))); for(i_proc = 0; i_proc < opal_pointer_array_get_size(current_global_jobdata->procs); ++i_proc) { proc = (orte_proc_t*)opal_pointer_array_get_item(current_global_jobdata->procs, i_proc); if( NULL == proc ) { continue; } if( ORTE_SUCCESS != (ret = orte_errmgr_base_update_app_context_for_cr_recovery(current_global_jobdata, proc, &(snapshot->local_snapshots))) ) { ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } OPAL_OUTPUT_VERBOSE((10, mca_errmgr_hnp_component.super.output_handle, "\tAdjusted: \"%s\" [0x%d] [%s]\n", ORTE_NAME_PRINT(&proc->name), proc->state, proc->node->name)); } ERRMGR_AUTOR_SET_TIMER(ERRMGR_AUTOR_TIMER_RESETUP); /* * Spawn the restarted job */ opal_output_verbose(10, mca_errmgr_hnp_component.super.output_handle, "errmgr:hnp(autor):recover() " "------- Respawning the job %s -------", ORTE_JOBID_PRINT(current_global_jobdata->jobid)); orte_snapc_base_has_recovered = false; autor_mask_faults = false; /* Failures pass this point are worth noting */ orte_plm.spawn(current_global_jobdata); /* * Wait for all the processes to restart */ opal_output_verbose(10, mca_errmgr_hnp_component.super.output_handle, "errmgr:hnp(autor):recover() " "------- Waiting for restart -------"); while(!check_if_restarted(current_global_jobdata->procs) ) { opal_progress(); } ERRMGR_AUTOR_SET_TIMER(ERRMGR_AUTOR_TIMER_RESTART); /* * All done */ while( !orte_snapc_base_has_recovered ) { opal_progress(); } opal_output_verbose(10, mca_errmgr_hnp_component.super.output_handle, "errmgr:hnp(autor):recover() " "------- Finished recovering job %s -------", ORTE_JOBID_PRINT(current_global_jobdata->jobid)); opal_show_help("help-orte-errmgr-hnp.txt", "autor_recovery_complete", true); ERRMGR_AUTOR_SET_TIMER(ERRMGR_AUTOR_TIMER_FINISH); cleanup: while(NULL != (item = opal_list_remove_first(procs_pending_recovery))) { wp_item = (errmgr_autor_wp_item_t*)item; OBJ_RELEASE(wp_item); } if( NULL != tmp_str ) { free(tmp_str); tmp_str = NULL; } ERRMGR_AUTOR_DISPLAY_ALL_TIMERS(); autor_timer_active = false; autor_mask_faults = false; return; }
static int orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename, unsigned int *uMe) { int iq; int ix; int iFd; /* file descriptor for appinfo */ int iTrips; /* counter appinfo read attempts */ int max_appinfo_read_attempts; struct stat ssBuf; /* stat buffer */ size_t szLen; /* size of appinfo (file) */ off_t oNow; /* current appinfo data offset */ off_t oInfo=sizeof(appInfoHdr_t); off_t oDet=sizeof(appInfo_t); off_t oSlots; off_t oEntry; int32_t sNodes=0; char *cpBuf; char *hostname; orte_node_t *node = NULL; appInfoHdr_t *apHdr; /* ALPS header structure */ appInfo_t *apInfo; /* ALPS table info structure */ #if ALPS_APPINFO_VERSION==0 placeList_t *apSlots; /* ALPS node specific info */ #else orte_ras_alps_placeNodeList_t *apNodes; #endif orte_ras_alps_get_appinfo_attempts(&max_appinfo_read_attempts); oNow=0; iTrips=0; opal_output_verbose(1, orte_ras_base_framework.framework_output, "ras:alps:allocate: begin processing appinfo file"); while(!oNow) { /* Until appinfo read is complete */ iTrips++; /* Increment trip count */ iFd=open( filename, O_RDONLY ); if( iFd==-1 ) { /* If file absent, ALPS is down */ opal_output_verbose(1, orte_ras_base_framework.framework_output, "ras:alps:allocate: ALPS information open failure"); usleep(iTrips*50000); /* Increasing delays, .05 s/try */ /* Fail only when number of attempts have been exhausted. */ if( iTrips <= max_appinfo_read_attempts ) continue; ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); return ORTE_ERR_FILE_OPEN_FAILURE; } if( fstat( iFd, &ssBuf )==-1 ) { /* If stat fails, access denied */ ORTE_ERROR_LOG(ORTE_ERR_NOT_AVAILABLE); return ORTE_ERR_NOT_AVAILABLE; } szLen=ssBuf.st_size; /* Get buffer size */ cpBuf=malloc(szLen+1); /* Allocate buffer */ if (NULL == cpBuf) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } /* Repeated attempts to read appinfo, with an increasing delay between * * successive attempts to allow scheduler I/O a chance to complete. */ if( (oNow=read( iFd, cpBuf, szLen ))!=(off_t)szLen ) { /* This is where apstat fails; we will record it and try again. */ opal_output_verbose(1, orte_ras_base_framework.framework_output, "ras:alps:allocate: ALPS information read failure: %ld bytes", (long int)oNow); free(cpBuf); /* Free (old) buffer */ close(iFd); /* Close (old) descriptor */ oNow=0; /* Reset byte count */ usleep(iTrips*50000); /* Increasing delays, .05 s/try */ /* Fail only when number of attempts have been exhausted. */ if( iTrips<=max_appinfo_read_attempts ) continue; ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); return ORTE_ERR_FILE_READ_FAILURE; } } close(iFd); opal_output_verbose(1, orte_ras_base_framework.framework_output, "ras:alps:allocate: file %s read", filename); /* Now that we have the scheduler information, we just have to parse it for * * the data that we seek. */ oNow=0; apHdr=(appInfoHdr_t *)cpBuf; opal_output_verbose(1, orte_ras_base_framework.framework_output, "ras:alps:allocate: %d entries in file", apHdr->apNum); /* Header info (apHdr) tells us how many entries are in the file: * * * * apHdr->apNum */ for( iq=0; iq<apHdr->apNum; iq++ ) { /* Parse all entries in file */ /* Just at this level, a lot of information is available: * * * * apInfo->apid ... ALPS job ID * * apInfo->resId ... ALPS reservation ID * * apInfo->numCmds ... Number of executables * * apInfo->numPlaces ... Number of PEs */ apInfo=(appInfo_t *)(cpBuf+oNow+oInfo); /* Calculate the dependent offsets. */ oSlots=sizeof(cmdDetail_t)*apInfo->numCmds; opal_output_verbose(1, orte_ras_base_framework.framework_output, "ras:alps:allocate: read data for resId %u - myId %u", apInfo->resId, *uMe); #if ALPS_APPINFO_VERSION==0 /* Finally, we get to the actual node-specific information: * * * * apSlots[ix].cmdIx ... index of apDet[].cmd * * apSlots[ix].nid ... NodeID (NID) * * apSlots[ix].procMask ... mask for processors... need 16-bit shift */ apSlots=(placeList_t *)(cpBuf+oNow+oInfo+oDet+oSlots); oEntry=sizeof(placeList_t)*apInfo->numPlaces; oNow+=(oDet+oSlots+oEntry); /* Target next slot */ if( apInfo->resId != *uMe ) continue; /* Filter to our reservation Id */ /* in this early version of alps, there is one entry for each PE in the * allocation - so cycle across the numPlaces entries, assigning a slot * for each time a node is named */ for( ix=0; ix<apInfo->numPlaces; ix++ ) { opal_output_verbose(5, orte_ras_base_framework.framework_output, "ras:alps:read_appinfo: got NID %d", apSlots[ix].nid); asprintf( &hostname, "%d", apSlots[ix].nid ); if (NULL == hostname) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } /* If this matches the prior nodename, just add to the slot count. */ if( NULL!=node && !strcmp(node->name, hostname) ) { free(hostname); /* free hostname since not needed */ ++node->slots; } else { /* must be new, so add to list */ opal_output_verbose(1, orte_ras_base_framework.framework_output, "ras:alps:read_appinfo: added NID %d to list", apSlots[ix].nid); node = OBJ_NEW(orte_node_t); node->name = hostname; orte_set_attribute(&node->attributes, ORTE_NODE_LAUNCH_ID, ORTE_ATTR_LOCAL, &apSlots[ix].nid, OPAL_INT32); node->slots_inuse = 0; node->slots_max = 0; node->slots = 1; node->state = ORTE_NODE_STATE_UP; /* need to order these node ids so the regex generator * can properly function */ /* add it to the end */ opal_list_append(nodes, &node->super); sNodes++; /* Increment the node count */ } } #else /* in newer versions of alps, there is one entry for each node in the * allocation, and that struct directly carries the number of PEs * allocated on that node to this job. */ apNodes=(orte_ras_alps_placeNodeList_t *)(cpBuf+oNow+oInfo+oDet+oSlots); oEntry=sizeof(orte_ras_alps_placeNodeList_t)*apInfo->numPlaces; oNow+=(oDet+oSlots+oEntry); /* Target next entry */ if( apInfo->resId != *uMe ) continue; /* Filter to our reservation Id */ for( ix=0; ix<apInfo->numPlaces; ix++ ) { opal_output_verbose(5, orte_ras_base_framework.framework_output, "ras:alps:read_appinfo(modern): processing NID %d with %d slots", apNodes[ix].nid, apNodes[ix].numPEs); asprintf( &hostname, "%d", apNodes[ix].nid ); if (NULL == hostname) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } node = OBJ_NEW(orte_node_t); node->name = hostname; orte_set_attribute(&node->attributes, ORTE_NODE_LAUNCH_ID, ORTE_ATTR_LOCAL, &apNodes[ix].nid, OPAL_INT32); node->slots_inuse = 0; node->slots_max = 0; node->slots = apNodes[ix].numPEs; node->state = ORTE_NODE_STATE_UP; /* need to order these node ids so the regex generator * can properly function */ /* add it to the end */ opal_list_append(nodes, &node->super); sNodes++; /* Increment the node count */ } #endif break; /* Extended details ignored */ } opal_list_sort (nodes, compare_nodes); free(cpBuf); /* Free the buffer */ return ORTE_SUCCESS; }
static int create_appfile(orte_sstore_base_global_snapshot_info_t *snapshot) { int exit_status = ORTE_SUCCESS; FILE *appfile = NULL; opal_list_item_t* item = NULL; char *tmp_str = NULL; char *amca_param = NULL; char *tune_param = NULL; char *reference_fmt_str = NULL; char *location_str = NULL; char *ref_location_fmt_str = NULL; orte_sstore_base_local_snapshot_info_t *vpid_snapshot = NULL; /* * Create the appfile */ orte_sstore.get_attr(snapshot->ss_handle, SSTORE_METADATA_GLOBAL_SNAP_LOC_ABS, &tmp_str); asprintf(&orte_restart_globals.appfile, "%s/%s", tmp_str, strdup("restart-appfile")); if( NULL != tmp_str ) { free(tmp_str); tmp_str = NULL; } orte_sstore.get_attr(snapshot->ss_handle, SSTORE_METADATA_GLOBAL_AMCA_PARAM, &amca_param); orte_sstore.get_attr(snapshot->ss_handle, SSTORE_METADATA_GLOBAL_TUNE_PARAM, &tune_param); if (NULL == (appfile = fopen(orte_restart_globals.appfile, "w")) ) { exit_status = ORTE_ERROR; goto cleanup; } /* This will give a format string that we can use */ orte_sstore.get_attr(snapshot->ss_handle, SSTORE_METADATA_LOCAL_SNAP_REF_FMT, &reference_fmt_str); orte_sstore.get_attr(snapshot->ss_handle, SSTORE_METADATA_LOCAL_SNAP_LOC, &location_str); orte_sstore.get_attr(snapshot->ss_handle, SSTORE_METADATA_LOCAL_SNAP_REF_LOC_FMT, &ref_location_fmt_str); /* * Sort the snapshots so that they are in order */ opal_list_sort(&snapshot->local_snapshots, snapshot_sort_compare_fn); /* * Construct the appfile */ for(item = opal_list_get_first(&snapshot->local_snapshots); item != opal_list_get_end(&snapshot->local_snapshots); item = opal_list_get_next(item) ) { vpid_snapshot = (orte_sstore_base_local_snapshot_info_t*)item; fprintf(appfile, "#\n"); fprintf(appfile, "# Old Process Name: %u.%u\n", vpid_snapshot->process_name.jobid, vpid_snapshot->process_name.vpid); fprintf(appfile, "#\n"); fprintf(appfile, "-np 1 "); fprintf(appfile, "--sstore-load "); /* loc:ref:postfix:seq */ fprintf(appfile, "%s:%s:", location_str, orte_restart_globals.snapshot_ref); fprintf(appfile, reference_fmt_str, vpid_snapshot->process_name.vpid); fprintf(appfile, ":%s:%s:%d ", (vpid_snapshot->compress_comp == NULL ? "" : vpid_snapshot->compress_comp), (vpid_snapshot->compress_postfix == NULL ? "" : vpid_snapshot->compress_postfix), orte_restart_globals.seq_number); if( NULL == amca_param ) { amca_param = strdup("ft-enable-cr"); opal_show_help("help-orte-restart.txt", "amca_param_not_found", true, amca_param); } fprintf(appfile, "-am %s ", amca_param); if( NULL == tune_param ) { tune_param = strdup("ft-enable-cr"); opal_show_help("help-orte-restart.txt", "tune_param_not_found", true, tune_param); } fprintf(appfile, "-tune %s ", tune_param); fprintf(appfile, " opal-restart "); /* * By default, point to the central storage location of the checkpoint. * The active SStore module at restart time will determine if files * need to be preloaded. */ fprintf(appfile, "-l %s", location_str); fprintf(appfile, " -m %s ", orte_sstore_base_local_metadata_filename); fprintf(appfile, "-r "); fprintf(appfile, reference_fmt_str, vpid_snapshot->process_name.vpid); fprintf(appfile, "\n"); } cleanup: if(NULL != appfile) { fclose(appfile); appfile = NULL; } if( NULL != tmp_str ) { free(tmp_str); tmp_str = NULL; } if( NULL != location_str ) { free(location_str); location_str = NULL; } if( NULL != reference_fmt_str ) { free(reference_fmt_str); reference_fmt_str = NULL; } if( NULL != ref_location_fmt_str ) { free(ref_location_fmt_str); ref_location_fmt_str = NULL; } return exit_status; }
static int mca_oob_ud_process_messages (struct ibv_cq *event_cq, mca_oob_ud_port_t *port) { mca_oob_ud_msg_item_t *msg_item, *next_item; opal_list_t *processing_msgs = &mca_oob_ud_component.ud_event_processing_msgs; mca_oob_ud_peer_t *peer; mca_oob_ud_msg_hdr_t *msg_hdr; int msg_num, i, count; struct ibv_wc wc[40]; bool peer_nacked; count = ibv_poll_cq (event_cq, 40, wc); if (count < 0) return count; /* acknowlege the events */ ibv_ack_cq_events (event_cq, count); for (i = 0 ; i < count ; ++i) { msg_num = (int)(wc[i].wr_id & (~MCA_OOB_UD_RECV_WR)); msg_hdr = (mca_oob_ud_msg_hdr_t *) (port->msg_buf.ptr + msg_num * port->mtu); VALGRIND_MAKE_MEM_DEFINED(msg_hdr, wc[i].byte_len); if (!(wc[i].wr_id & MCA_OOB_UD_RECV_WR) || IBV_WC_SUCCESS != wc[i].status) { mca_oob_ud_port_post_one_recv (port, msg_num); continue; } peer = mca_oob_ud_get_peer (port, &msg_hdr->ra.name, wc[i].src_qp, msg_hdr->ra.qkey, wc[i].slid, msg_hdr->ra.port_num); if (peer) { if (MCA_OOB_UD_MSG_ACK != msg_hdr->msg_type && MCA_OOB_UD_MSG_NACK != msg_hdr->msg_type && MCA_OOB_UD_MSG_END != msg_hdr->msg_type) { mca_oob_ud_msg_item_t *msg_item = OBJ_NEW(mca_oob_ud_msg_item_t); msg_item->msg_num = msg_num; msg_item->hdr = msg_hdr; msg_item->port = port; msg_item->peer = peer; opal_list_append (processing_msgs, (opal_list_item_t *) msg_item); } else { if (MCA_OOB_UD_MSG_ACK == msg_hdr->msg_type) { (void) mca_oob_ud_event_handle_ack (port, peer, msg_hdr); } else if (MCA_OOB_UD_MSG_NACK == msg_hdr->msg_type) { (void) mca_oob_ud_event_handle_nack (port, peer, msg_hdr); } else { mca_oob_ud_event_handle_end (peer, msg_hdr); } mca_oob_ud_port_post_one_recv (port, msg_num); } } else { OPAL_OUTPUT_VERBOSE((10, mca_oob_base_output, "%s oob:ud:process_message got a null peer for message id %" PRIu64, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), msg_hdr->msg_id)); mca_oob_ud_port_post_one_recv (port, msg_num); } } /* Sort messages by peer then id */ opal_list_sort (processing_msgs, mca_oob_ud_msg_item_cmp); /* Send ACKs/NACKs and throw away out-of-order messages */ msg_item = (mca_oob_ud_msg_item_t *) mca_oob_ud_list_get_first (processing_msgs); for (peer = NULL, peer_nacked = false ; NULL != msg_item ; msg_item = next_item) { if (peer != msg_item->peer) { peer_nacked = false; } peer = msg_item->peer; next_item = (mca_oob_ud_msg_item_t *) mca_oob_ud_list_get_next (processing_msgs, (opal_list_item_t *)msg_item); if (false == peer_nacked) { if (msg_item->hdr->msg_id > peer->peer_expected_id) { (void) mca_oob_ud_event_send_nack (msg_item->port, peer, msg_item->hdr); peer_nacked = true; } else if (NULL == next_item || (next_item->peer != msg_item->peer)) { (void) mca_oob_ud_event_send_ack (msg_item->port, msg_item->peer, msg_item->hdr); } } if (msg_item->hdr->msg_id != peer->peer_expected_id) { opal_list_remove_item (processing_msgs, (opal_list_item_t *) msg_item); OBJ_RELEASE(msg_item); } else { peer->peer_expected_id++; } } /* Process remaining messages */ while (NULL != (msg_item = (mca_oob_ud_msg_item_t *) opal_list_remove_first (processing_msgs))) { switch (msg_item->hdr->msg_type) { case MCA_OOB_UD_MSG_REQUEST: mca_oob_ud_event_handle_req (port, msg_item->peer, msg_item->hdr); break; case MCA_OOB_UD_MSG_REPLY: mca_oob_ud_event_handle_rep (port, msg_item->hdr); break; case MCA_OOB_UD_MSG_COMPLETE: mca_oob_ud_event_handle_completion (port, msg_item->hdr); break; case MCA_OOB_UD_MSG_DATA_OK: mca_oob_ud_event_handle_data_ok (port, msg_item->hdr); break; case MCA_OOB_UD_MSG_END: mca_oob_ud_event_handle_end (peer, msg_item->hdr); break; default: /* do nothing */ break; } OBJ_RELEASE(msg_item); } return count; }
static int start_recover(void) { int ret; int64_t epoch_counter; ompi_mtl_portals4.flowctl.flowctl_active = true; epoch_counter = opal_atomic_add_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1); opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Entering flowctl_start_recover %ld", epoch_counter); /* re-arm trigger/alarm for next time */ ret = setup_alarm(epoch_counter); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d setup_alarm failed: %d\n", __FILE__, __LINE__, ret); return ret; } /* setup barrier tree for getting us out of flow control */ ret = setup_barrier(epoch_counter); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d setup_barrier failed: %d\n", __FILE__, __LINE__, ret); return ret; } /* drain all pending sends */ while (ompi_mtl_portals4.flowctl.send_slots != ompi_mtl_portals4.flowctl.max_send_slots) { opal_progress(); } /* drain event queue */ while (0 != ompi_mtl_portals4_progress()) { ; } /* check short block active count */ ret = ompi_mtl_portals4_recv_short_link(1); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: recv_short_link failed: %d", __FILE__, __LINE__, ret); } /* reorder the pending sends by operation count */ ret = opal_list_sort(&ompi_mtl_portals4.flowctl.pending_sends, seqnum_compare); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d opal_list_sort failed: %d\n", __FILE__, __LINE__, ret); return ret; } /* drain event queue again, just to make sure */ while (0 != ompi_mtl_portals4_progress()) { ; } /* send barrier entry message */ ret = PtlPut(ompi_mtl_portals4.zero_md_h, 0, 0, PTL_NO_ACK_REQ, ompi_mtl_portals4.flowctl.me, ompi_mtl_portals4.flowctl_idx, MTL_PORTALS4_FLOWCTL_FANIN, 0, NULL, 0); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPut failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* recovery complete when fan-out event arrives, async event, so we're done now */ ret = OMPI_SUCCESS; error: OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Exiting flowctl_start_recover %ld", epoch_counter)); return ret; }