static void resend_data(int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { int n, rc; orte_rmcast_channel_t channel; orte_rmcast_seq_t start; rmcast_base_channel_t *ch; rmcast_send_log_t *log; opal_buffer_t *recover; /* block any further ops until we complete the missing * message repair */ ORTE_ACQUIRE_THREAD(&ctl); n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &channel, &n, ORTE_RMCAST_CHANNEL_T))) { ORTE_ERROR_LOG(rc); goto release; } /* if the channel is UINT32_MAX, then we know that this is a * a response from a sender telling us that our request for * missing messages is too far behind, so we should just * abort */ if (UINT32_MAX == channel) { opal_output(0, "%s CANNOT RECOVER FROM LOST MESSAGE - TOO FAR BEHIND - ABORTING", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); orte_errmgr.abort(1, NULL); goto release; } n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &start, &n, ORTE_RMCAST_SEQ_T))) { ORTE_ERROR_LOG(rc); goto release; } opal_output(0, "%s request resend data from %s for channel %d start %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(sender), channel, start); /* get the referenced channel object */ if (NULL == (ch = orte_rmcast_base_get_channel(channel))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); goto release; } /* see if we can bring the proc up to date - if it is too * far behind, then there is no hope of recovery */ log = (rmcast_send_log_t*)opal_ring_buffer_poke(&ch->cache, 0); if (NULL == log || start < log->seq_num) { /* no hope - tell them */ channel = UINT32_MAX; recover = OBJ_NEW(opal_buffer_t); if (ORTE_SUCCESS != (rc = opal_dss.pack(recover, &channel, 1, ORTE_RMCAST_CHANNEL_T))) { ORTE_ERROR_LOG(rc); goto release; } if (0 > (rc = orte_rml.send_buffer_nb(sender, recover, ORTE_RML_TAG_MULTICAST, 0, cbfunc, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(recover); } goto release; } /* search its ring buffer for the starting message - function * automatically starts at the oldest message and works up * from there */ for (n=0; n < ch->cache.size; n++) { log = (rmcast_send_log_t*)opal_ring_buffer_poke(&ch->cache, n); if (NULL == log || log->seq_num <= start) { continue; } OPAL_OUTPUT_VERBOSE((0, orte_rmcast_base.rmcast_output, "%s resending msg %d to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), log->seq_num, ORTE_NAME_PRINT(sender))); recover = OBJ_NEW(opal_buffer_t); opal_dss.copy_payload(recover, log->buf); if (0 > (rc = orte_rml.send_buffer_nb(sender, recover, ORTE_RML_TAG_MULTICAST, 0, cbfunc, NULL))) { OBJ_RELEASE(recover); ORTE_ERROR_LOG(rc); goto release; } } release: ORTE_RELEASE_THREAD(&ctl); }
static void sample(orcm_sensor_sampler_t *sampler) { opal_pstats_t *stats; opal_node_stats_t *nstats; int rc, i; orte_proc_t *child; opal_buffer_t buf, *bptr; char *comp; OPAL_OUTPUT_VERBOSE((1, orcm_sensor_base_framework.framework_output, "sample:resusage sampling resource usage")); /* setup a buffer for our stats */ OBJ_CONSTRUCT(&buf, opal_buffer_t); /* pack our name */ comp = strdup("resusage"); if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &comp, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); return; } free(comp); /* update stats on ourself and the node */ stats = OBJ_NEW(opal_pstats_t); nstats = OBJ_NEW(opal_node_stats_t); if (ORCM_SUCCESS != (rc = opal_pstat.query(orte_process_info.pid, stats, nstats))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(stats); OBJ_RELEASE(nstats); OBJ_DESTRUCT(&buf); return; } /* the stats framework can't know nodename or rank */ strncpy(stats->node, orte_process_info.nodename, (OPAL_PSTAT_MAX_STRING_LEN - 1)); stats->rank = ORTE_PROC_MY_NAME->vpid; #if 0 /* locally save the stats */ if (NULL != (st = (opal_pstats_t*)opal_ring_buffer_push(&my_proc->stats, stats))) { OBJ_RELEASE(st); } if (NULL != (nst = (opal_node_stats_t*)opal_ring_buffer_push(&my_node->stats, nstats))) { /* release the popped value */ OBJ_RELEASE(nst); } #endif /* pack them */ if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.nodename, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); return; } if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &nstats, 1, OPAL_NODE_STAT))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); return; } if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &stats, 1, OPAL_PSTAT))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); return; } /* loop through our children and update their stats */ if (NULL != orte_local_children) { for (i=0; i < orte_local_children->size; i++) { if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { continue; } if (!ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_ALIVE)) { continue; } if (0 == child->pid) { /* race condition */ continue; } stats = OBJ_NEW(opal_pstats_t); if (ORCM_SUCCESS != opal_pstat.query(child->pid, stats, NULL)) { /* may hit a race condition where the process has * terminated, so just ignore any error */ OBJ_RELEASE(stats); continue; } /* the stats framework can't know nodename or rank */ strncpy(stats->node, orte_process_info.nodename, (OPAL_PSTAT_MAX_STRING_LEN - 1)); stats->rank = child->name.vpid; #if 0 /* store it */ if (NULL != (st = (opal_pstats_t*)opal_ring_buffer_push(&child->stats, stats))) { OBJ_RELEASE(st); } #endif /* pack them */ if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &stats, 1, OPAL_PSTAT))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); return; } } } /* xfer any data for transmission */ if (0 < buf.bytes_used) { bptr = &buf; if (OPAL_SUCCESS != (rc = opal_dss.pack(&sampler->bucket, &bptr, 1, OPAL_BUFFER))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&buf); return; } } OBJ_DESTRUCT(&buf); #if 0 /* are there any issues with node-level usage? */ nst = (opal_node_stats_t*)opal_ring_buffer_poke(&my_node->stats, -1); if (NULL != nst && 0.0 < mca_sensor_resusage_component.node_memory_limit) { OPAL_OUTPUT_VERBOSE((2, orcm_sensor_base_framework.framework_output, "%s CHECKING NODE MEM", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* compute the percentage of node memory in-use */ in_use = 1.0 - (nst->free_mem / nst->total_mem); OPAL_OUTPUT_VERBOSE((2, orcm_sensor_base_framework.framework_output, "%s PERCENT USED: %f LIMIT: %f", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), in_use, mca_sensor_resusage_component.node_memory_limit)); if (mca_sensor_resusage_component.node_memory_limit <= in_use) { /* loop through our children and find the biggest hog */ hog = NULL; max_mem = 0.0; for (i=0; i < orte_local_children->size; i++) { if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { continue; } if (!ORTE_FLAG_TEST(child, ORTE_PROC_IS_ALIVE)) { continue; } if (0 == child->pid) { /* race condition */ continue; } if (NULL == (st = (opal_pstats_t*)opal_ring_buffer_poke(&child->stats, -1))) { continue; } OPAL_OUTPUT_VERBOSE((5, orcm_sensor_base_framework.framework_output, "%s PROC %s AT VSIZE %f", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name), st->vsize)); if (max_mem < st->vsize) { hog = child; max_mem = st->vsize; } } if (NULL == hog) { /* if all children dead and we are still too big, * then we must be the culprit - abort */ OPAL_OUTPUT_VERBOSE((2, orcm_sensor_base_framework.framework_output, "%s NO CHILD: COMMITTING SUICIDE", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); orte_errmgr.abort(ORCM_ERR_MEM_LIMIT_EXCEEDED, NULL); } else { /* report the problem */ OPAL_OUTPUT_VERBOSE((2, orcm_sensor_base_framework.framework_output, "%s REPORTING %s TO ERRMGR FOR EXCEEDING LIMITS", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&hog->name))); ORTE_ACTIVATE_PROC_STATE(&hog->name, ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED); } /* since we have ordered someone to die, we've done enough for this * time around - don't check proc limits as well */ return; } } /* check proc limits */ if (0.0 < mca_sensor_resusage_component.proc_memory_limit) { OPAL_OUTPUT_VERBOSE((2, orcm_sensor_base_framework.framework_output, "%s CHECKING PROC MEM", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* check my children first */ for (i=0; i < orte_local_children->size; i++) { if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) { continue; } if (!ORTE_FLAG_TEST(child, ORTE_PROC_IS_ALIVE)) { continue; } if (0 == child->pid) { /* race condition */ continue; } if (NULL == (st = (opal_pstats_t*)opal_ring_buffer_poke(&child->stats, -1))) { continue; } OPAL_OUTPUT_VERBOSE((5, orcm_sensor_base_framework.framework_output, "%s PROC %s AT VSIZE %f", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name), st->vsize)); if (mca_sensor_resusage_component.proc_memory_limit <= st->vsize) { /* report the problem */ ORTE_ACTIVATE_PROC_STATE(&child->name, ORTE_PROC_STATE_SENSOR_BOUND_EXCEEDED); } } } #endif }