static int tool_pull(const orte_process_name_t* src_name, orte_iof_tag_t src_tag, int fd) { /* if we are a tool, then we need to request the HNP to please * forward the data from the specified process to us. Note that * the HNP will return an error if the specified stream of any * intended recipient is not open. By default, stdout/err/diag * are all left open. However, the user can also direct us to * close any or all of those streams, so the success of this call * will depend upon how the user executed the application */ opal_buffer_t *buf; orte_iof_tag_t tag; orte_process_name_t hnp; int rc; OPAL_OUTPUT_VERBOSE((1, orte_iof_base_framework.framework_output, "%s pulling output for proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(src_name))); buf = OBJ_NEW(opal_buffer_t); /* setup the tag to pull from HNP */ tag = src_tag | ORTE_IOF_PULL; /* pack the tag - we do this first so that flow control messages can * consist solely of the tag */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); return rc; } /* pack the name of the source */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, src_name, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); return rc; } /* pack our name as the sink */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); return rc; } /* send the buffer to the correct HNP */ ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid); orte_rml.send_buffer_nb(orte_mgmt_conduit, &hnp, buf, ORTE_RML_TAG_IOF_HNP, send_cb, NULL); return ORTE_SUCCESS; }
static int tool_close(const orte_process_name_t* src_name, orte_iof_tag_t src_tag) { /* if we are a tool, then we need to request the HNP to stop * forwarding data from this process/stream */ opal_buffer_t *buf; orte_iof_tag_t tag; orte_process_name_t hnp; int rc; OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s closing output for proc %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(src_name))); buf = OBJ_NEW(opal_buffer_t); /* setup the tag to stop the copy */ tag = src_tag | ORTE_IOF_CLOSE; /* pack the tag - we do this first so that flow control messages can * consist solely of the tag */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); return rc; } /* pack the name of the source */ if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, src_name, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); return rc; } /* flag that the close is incomplete */ mca_iof_tool_component.closed = false; /* send the buffer to the correct HNP */ ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid); orte_rml.send_buffer_nb(&hnp, buf, ORTE_RML_TAG_IOF_HNP, 0, send_cb, NULL); /* wait right here until the close is confirmed */ ORTE_PROGRESSED_WAIT(mca_iof_tool_component.closed, 0, 1); return ORTE_SUCCESS; }
static orte_process_name_t get_route(orte_process_name_t *target) { orte_process_name_t *ret, daemon; opal_list_item_t *item; orte_routed_tree_t *child; int i; orte_routed_jobfam_t *jfam; uint16_t jfamily; if (!orte_routing_is_enabled) { ret = target; goto found; } /* initialize */ daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; daemon.vpid = ORTE_PROC_MY_DAEMON->vpid; if (target->jobid == ORTE_JOBID_INVALID || target->vpid == ORTE_VPID_INVALID) { ret = ORTE_NAME_INVALID; goto found; } /* if it is me, then the route is just direct */ if (OPAL_EQUAL == opal_dss.compare(ORTE_PROC_MY_NAME, target, ORTE_NAME)) { ret = target; goto found; } /* if I am an application process, always route via my local daemon */ if (ORTE_PROC_IS_APP) { ret = ORTE_PROC_MY_DAEMON; goto found; } /* if I am a tool, the route is direct if target is in * my own job family, and to the target's HNP if not */ if (ORTE_PROC_IS_TOOL) { if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { ret = target; goto found; } else { ORTE_HNP_NAME_FROM_JOB(&daemon, target->jobid); ret = &daemon; goto found; } } /****** HNP AND DAEMONS ONLY ******/ /* IF THIS IS FOR A DIFFERENT JOB FAMILY... */ if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { /* if I am a daemon, route this via the HNP */ if (ORTE_PROC_IS_DAEMON) { ret = ORTE_PROC_MY_HNP; goto found; } /* if I am the HNP or a tool, then I stored a route to * this job family, so look it up */ jfamily = ORTE_JOB_FAMILY(target->jobid); for (i=0; i < orte_routed_jobfams.size; i++) { if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) { continue; } if (jfam->job_family == jfamily) { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routed_binomial: route to %s found", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOB_FAMILY_PRINT(target->jobid))); ret = &jfam->route; goto found; } } /* not found - so we have no route */ ret = ORTE_NAME_INVALID; goto found; } /* THIS CAME FROM OUR OWN JOB FAMILY... */ /* if this is going to the HNP, then send it direct if we don't know * how to get there - otherwise, send it via the tree */ if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) { if (!hnp_direct || orte_static_ports) { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routing to the HNP through my parent %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_PARENT))); ret = ORTE_PROC_MY_PARENT; goto found; } else { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routing direct to the HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ret = ORTE_PROC_MY_HNP; goto found; } } daemon.jobid = ORTE_PROC_MY_NAME->jobid; /* find out what daemon hosts this proc */ if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ret = ORTE_NAME_INVALID; goto found; } /* if the daemon is me, then send direct to the target! */ if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) { ret = target; goto found; } else if (orte_process_info.num_procs < mca_routed_radix_component.max_connections) { /* if the job is small enough, send direct to the target's daemon */ ret = &daemon; goto found; } else { /* search routing tree for next step to that daemon */ for (item = opal_list_get_first(&my_children); item != opal_list_get_end(&my_children); item = opal_list_get_next(item)) { child = (orte_routed_tree_t*)item; if (child->vpid == daemon.vpid) { /* the child is hosting the proc - just send it there */ ret = &daemon; goto found; } /* otherwise, see if the daemon we need is below the child */ if (opal_bitmap_is_set_bit(&child->relatives, daemon.vpid)) { /* yep - we need to step through this child */ daemon.vpid = child->vpid; ret = &daemon; goto found; } } } /* if we get here, then the target daemon is not beneath * any of our children, so we have to step up through our parent */ daemon.vpid = ORTE_PROC_MY_PARENT->vpid; ret = &daemon; found: OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, "%s routed_radix_get(%s) --> %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(ret))); return *ret; }
static orte_process_name_t get_route(orte_process_name_t *target) { orte_process_name_t *ret, daemon; if (target->jobid == ORTE_JOBID_INVALID || target->vpid == ORTE_VPID_INVALID) { ret = ORTE_NAME_INVALID; goto found; } /* initialize */ daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; daemon.vpid = ORTE_PROC_MY_DAEMON->vpid; if (ORTE_PROC_IS_APP) { /* if I am an application, AND I have knowledge of * my daemon (i.e., a daemon launched me), then I * always route thru the daemon */ if (NULL != orte_process_info.my_daemon_uri) { ret = ORTE_PROC_MY_DAEMON; } else { /* I was direct launched and do not have * a daemon, so I have to route direct */ ret = target; } goto found; } /* if I am a tool, the route is direct if target is in * my own job family, and to the target's HNP if not */ if (ORTE_PROC_IS_TOOL) { if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { ret = target; goto found; } else { ORTE_HNP_NAME_FROM_JOB(&daemon, target->jobid); ret = &daemon; goto found; } } /****** HNP AND DAEMONS ONLY ******/ if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routing direct to the HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ret = ORTE_PROC_MY_HNP; goto found; } daemon.jobid = ORTE_PROC_MY_NAME->jobid; /* find out what daemon hosts this proc */ if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) { ret = ORTE_NAME_INVALID; goto found; } /* if the daemon is me, then send direct to the target! */ if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) { ret = target; goto found; } /* else route to this daemon directly */ ret = &daemon; found: OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routed_direct_get(%s) --> %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(ret))); return *ret; }
static orte_process_name_t get_route(orte_process_name_t *target) { orte_process_name_t *ret, daemon; orte_routed_jobfam_t *jfam; int i; uint16_t jfamily; if (target->jobid == ORTE_JOBID_INVALID || target->vpid == ORTE_VPID_INVALID) { ret = ORTE_NAME_INVALID; goto found; } /* initialize */ daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; daemon.vpid = ORTE_PROC_MY_DAEMON->vpid; if (ORTE_PROC_IS_APP) { /* if I am an application, AND I have knowledge of * my daemon (i.e., a daemon launched me), then I * always route thru the daemon */ if (NULL != orte_process_info.my_daemon_uri) { ret = ORTE_PROC_MY_DAEMON; } else { /* I was direct launched and do not have * a daemon, so I have to route direct */ ret = target; } goto found; } /* if I am a tool, the route is direct if target is in * my own job family, and to the target's HNP if not */ if (ORTE_PROC_IS_TOOL) { if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { ret = target; goto found; } else { ORTE_HNP_NAME_FROM_JOB(&daemon, target->jobid); ret = &daemon; goto found; } } /****** HNP AND DAEMONS ONLY ******/ /* IF THIS IS FOR A DIFFERENT JOB FAMILY... */ if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { /* if I am a daemon, route this via the HNP */ if (ORTE_PROC_IS_DAEMON) { ret = ORTE_PROC_MY_HNP; goto found; } /* if I am the HNP, then I stored a route to * this job family, so look it up */ jfamily = ORTE_JOB_FAMILY(target->jobid); for (i=0; i < orte_routed_jobfams.size; i++) { if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) { continue; } if (jfam->job_family == jfamily) { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routed_direct: route to %s found", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOB_FAMILY_PRINT(target->jobid))); ret = &jfam->route; goto found; } } /* not found - so we have no route */ ret = ORTE_NAME_INVALID; goto found; } /* THIS CAME FROM OUR OWN JOB FAMILY... */ if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routing direct to the HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ret = ORTE_PROC_MY_HNP; goto found; } daemon.jobid = ORTE_PROC_MY_NAME->jobid; /* find out what daemon hosts this proc */ if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) { ret = ORTE_NAME_INVALID; goto found; } /* if the daemon is me, then send direct to the target! */ if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) { ret = target; goto found; } /* else route to this daemon directly */ ret = &daemon; found: OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routed_direct_get(%s) --> %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(ret))); return *ret; }
static orte_process_name_t get_route(orte_process_name_t *target) { orte_process_name_t *ret, daemon; opal_list_item_t *item; orte_routed_tree_t *child; if (!orte_routing_is_enabled) { ret = target; goto found; } /* initialize */ daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; daemon.vpid = ORTE_PROC_MY_DAEMON->vpid; if (target->jobid == ORTE_JOBID_INVALID || target->vpid == ORTE_VPID_INVALID) { ret = ORTE_NAME_INVALID; goto found; } /* if it is me, then the route is just direct */ if (OPAL_EQUAL == opal_dss.compare(ORTE_PROC_MY_NAME, target, ORTE_NAME)) { ret = target; goto found; } /* if I am an application process, always route via my local daemon */ if (ORTE_PROC_IS_APP) { ret = ORTE_PROC_MY_DAEMON; goto found; } /* if I am a tool, the route is direct if target is in * my own job family, and to the target's HNP if not */ if (ORTE_PROC_IS_TOOL) { if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { ret = target; goto found; } else { ORTE_HNP_NAME_FROM_JOB(&daemon, target->jobid); ret = &daemon; goto found; } } /****** HNP AND DAEMONS ONLY ******/ if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) { if (!hnp_direct || orte_static_ports) { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routing to the HNP through my parent %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_PARENT))); ret = ORTE_PROC_MY_PARENT; goto found; } else { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routing direct to the HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ret = ORTE_PROC_MY_HNP; goto found; } } daemon.jobid = ORTE_PROC_MY_NAME->jobid; /* find out what daemon hosts this proc */ if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) { /*ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);*/ ret = ORTE_NAME_INVALID; goto found; } /* if the daemon is me, then send direct to the target! */ if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) { ret = target; goto found; } /* search routing tree for next step to that daemon */ for (item = opal_list_get_first(&my_children); item != opal_list_get_end(&my_children); item = opal_list_get_next(item)) { child = (orte_routed_tree_t*)item; if (child->vpid == daemon.vpid) { /* the child is hosting the proc - just send it there */ ret = &daemon; goto found; } /* otherwise, see if the daemon we need is below the child */ if (opal_bitmap_is_set_bit(&child->relatives, daemon.vpid)) { /* yep - we need to step through this child */ daemon.vpid = child->vpid; ret = &daemon; goto found; } } /* if we get here, then the target daemon is not beneath * any of our children, so we have to step up through our parent */ daemon.vpid = ORTE_PROC_MY_PARENT->vpid; ret = &daemon; found: OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, "%s routed_binomial_get(%s) --> %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(ret))); return *ret; }