int opal_hash_table_get_value_uint64(opal_hash_table_t* ht, uint64_t key, void **ptr) { opal_list_t* list = ht->ht_table + (key & ht->ht_mask); opal_uint64_hash_node_t *node; #if OPAL_ENABLE_DEBUG if(ht->ht_table_size == 0) { opal_output(0, "opal_hash_table_get_value_uint64:" "opal_hash_table_init() has not been called"); return OPAL_ERROR; } #endif for(node = (opal_uint64_hash_node_t*)opal_list_get_first(list); node != (opal_uint64_hash_node_t*)opal_list_get_end(list); node = (opal_uint64_hash_node_t*)opal_list_get_next(node)) { if (node->hn_key == key) { *ptr = node->hn_value; return OPAL_SUCCESS; } } return OPAL_ERR_NOT_FOUND; }
int mca_btl_openib_proc_reg_btl(mca_btl_openib_proc_t* ib_proc, mca_btl_openib_module_t* openib_btl) { mca_btl_openib_proc_btlptr_t* elem; for(elem = (mca_btl_openib_proc_btlptr_t*)opal_list_get_first(&ib_proc->openib_btls); elem != (mca_btl_openib_proc_btlptr_t*)opal_list_get_end(&ib_proc->openib_btls); elem = (mca_btl_openib_proc_btlptr_t*)opal_list_get_next(elem)) { if(elem->openib_btl == openib_btl) { /* this is normal return meaning that this BTL has already touched this ib_proc */ return OPAL_ERR_RESOURCE_BUSY; } } elem = OBJ_NEW(mca_btl_openib_proc_btlptr_t); if( NULL == elem ){ return OPAL_ERR_OUT_OF_RESOURCE; } elem->openib_btl = openib_btl; opal_list_append(&ib_proc->openib_btls, &elem->super); return OPAL_SUCCESS; }
/* * Look for an existing uDAPL process instances based on the associated * ompi_proc_t instance. */ static mca_btl_udapl_proc_t* mca_btl_udapl_proc_lookup_ompi(ompi_proc_t* ompi_proc) { mca_btl_udapl_proc_t* udapl_proc; OPAL_THREAD_LOCK(&mca_btl_udapl_component.udapl_lock); for(udapl_proc = (mca_btl_udapl_proc_t*) opal_list_get_first(&mca_btl_udapl_component.udapl_procs); udapl_proc != (mca_btl_udapl_proc_t*) opal_list_get_end(&mca_btl_udapl_component.udapl_procs); udapl_proc = (mca_btl_udapl_proc_t*)opal_list_get_next(udapl_proc)) { if(udapl_proc->proc_ompi == ompi_proc) { OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock); return udapl_proc; } } OPAL_THREAD_UNLOCK(&mca_btl_udapl_component.udapl_lock); return NULL; }
/* * Look for an existing TEMPLATE process instances based on the associated * ompi_proc_t instance. */ static mca_btl_template_proc_t* mca_btl_template_proc_lookup_ompi(ompi_proc_t* ompi_proc) { mca_btl_template_proc_t* template_proc; OPAL_THREAD_LOCK(&mca_btl_template_component.template_lock); for(template_proc = (mca_btl_template_proc_t*) opal_list_get_first(&mca_btl_template_component.template_procs); template_proc != (mca_btl_template_proc_t*) opal_list_get_end(&mca_btl_template_component.template_procs); template_proc = (mca_btl_template_proc_t*)opal_list_get_next(template_proc)) { if(template_proc->proc_ompi == ompi_proc) { OPAL_THREAD_UNLOCK(&mca_btl_template_component.template_lock); return template_proc; } } OPAL_THREAD_UNLOCK(&mca_btl_template_component.template_lock); return NULL; }
/** * This graph API returns an array of pointers of all the * vertices in the graph. * * * @param graph * @param vertices_list an array of pointers of all the * vertices in the graph vertices. * * @return int returning the graph order (the * number of vertices in the returned array) */ int opal_graph_get_graph_vertices(opal_graph_t *graph, opal_pointer_array_t *vertices_list) { opal_adjacency_list_t *aj_list; opal_list_item_t *item; int i; /** * If the graph order is 0, return NULL. */ if (0 == graph->number_of_vertices) { return 0; } /* Run on all the vertices of the graph */ for (item = opal_list_get_first(graph->adjacency_list), i = 0; item != opal_list_get_end(graph->adjacency_list); item = opal_list_get_next(item), i++) { aj_list = (opal_adjacency_list_t *) item; /* Add the vertex to the vertices array */ opal_pointer_array_add(vertices_list,(void *)aj_list->vertex); } /* return the vertices list */ return graph->number_of_vertices; }
/** * This graph API finds a vertex in the graph according the * vertex data. * @param graph the graph we searching in. * @param vertex_data the vertex data we are searching according * to. * * @return opal_graph_vertex_t* The vertex founded or NULL. */ opal_graph_vertex_t *opal_graph_find_vertex(opal_graph_t *graph, void *vertex_data) { opal_adjacency_list_t *aj_list; opal_list_item_t *item; /** * Run on all the vertices of the graph */ for (item = opal_list_get_first(graph->adjacency_list); item != opal_list_get_end(graph->adjacency_list); item = opal_list_get_next(item)) { aj_list = (opal_adjacency_list_t *) item; if (NULL != aj_list->vertex->compare_vertex) { /* if the vertex data of a vertex is equal to the vertex data */ if (0 == aj_list->vertex->compare_vertex(aj_list->vertex->vertex_data, vertex_data)) { /* return the found vertex */ return aj_list->vertex; } } } /* if a vertex is not found, return NULL */ return NULL; }
orte_grpcomm_collective_t* orte_grpcomm_base_setup_collective(orte_grpcomm_coll_id_t id) { opal_list_item_t *item; orte_grpcomm_collective_t *cptr, *coll; coll = NULL; for (item = opal_list_get_first(&orte_grpcomm_base.active_colls); item != opal_list_get_end(&orte_grpcomm_base.active_colls); item = opal_list_get_next(item)) { cptr = (orte_grpcomm_collective_t*)item; if (id == cptr->id) { coll = cptr; break; } } if (NULL == coll) { coll = OBJ_NEW(orte_grpcomm_collective_t); coll->id = id; opal_list_append(&orte_grpcomm_base.active_colls, &coll->super); } return coll; }
/***************** * Local Functions *****************/ static bool any_live_children(orte_jobid_t job) { opal_list_item_t *item; orte_odls_child_t *child; /* the thread is locked elsewhere - don't try to do it again here */ for (item = opal_list_get_first(&orte_local_children); item != opal_list_get_end(&orte_local_children); item = opal_list_get_next(item)) { child = (orte_odls_child_t*)item; /* is this child part of the specified job? */ if ((job == child->name->jobid || ORTE_JOBID_WILDCARD == job) && child->alive) { return true; } } /* if we get here, then nobody is left alive from that job */ return false; }
static ompi_proc_t * ompi_proc_find_and_add(const ompi_process_name_t * name, bool* isnew) { ompi_proc_t *proc, *rproc = NULL; ompi_rte_cmp_bitmask_t mask; /* return the proc-struct which matches this jobid+process id */ mask = OMPI_RTE_CMP_JOBID | OMPI_RTE_CMP_VPID; OPAL_THREAD_LOCK(&ompi_proc_lock); for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); proc = (ompi_proc_t*)opal_list_get_next(proc)) { if (OPAL_EQUAL == ompi_rte_compare_name_fields(mask, &proc->proc_name, name)) { rproc = proc; *isnew = false; break; } } /* if we didn't find this proc in the list, create a new * proc_t and append it to the list */ if (NULL == rproc) { *isnew = true; rproc = OBJ_NEW(ompi_proc_t); if (NULL != rproc) { opal_list_append(&ompi_proc_list, (opal_list_item_t*)rproc); rproc->proc_name = *name; } /* caller had better fill in the rest of the proc, or there's going to be pain later... */ } OPAL_THREAD_UNLOCK(&ompi_proc_lock); return rproc; }
int oshmem_proc_finalize(void) { opal_list_item_t *item; /* Destroy all groups */ oshmem_proc_group_finalize(); /* remove all items from list and destroy them. Since we cannot know * the reference count of the procs for certain, it is possible that * a single OBJ_RELEASE won't drive the count to zero, and hence will * not release the memory. Accordingly, we cycle through the list here, * calling release on each item. * * This will cycle until it forces the reference count of each item * to zero, thus causing the destructor to run - which will remove * the item from the list! * * We cannot do this under the thread lock as the destructor will * call it when removing the item from the list. However, this function * is ONLY called from MPI_Finalize, and all threads are prohibited from * calling an MPI function once ANY thread has called MPI_Finalize. Of * course, multiple threads are allowed to call MPI_Finalize, so this * function may get called multiple times by various threads. We believe * it is thread safe to do so...though it may not -appear- to be so * without walking through the entire list/destructor sequence. */ while (opal_list_get_end(&oshmem_proc_list) != (item = opal_list_get_first(&oshmem_proc_list))) { OBJ_RELEASE(item); } OBJ_RELEASE( oshmem_shmem_local_convertor); /* now destruct the list and thread lock */ OBJ_DESTRUCT(&oshmem_proc_list); OBJ_DESTRUCT(&oshmem_proc_lock); return OSHMEM_SUCCESS; }
/** * This graph API adds an edge (connection between two * vertices) to a graph. The most common use * for this API is while building a graph. * * @param graph The graph that this edge will be added to. * @param edge The edge that we want to add. * * @return int Success or error. this API can return an error if * one of the vertices is not in the graph. */ int opal_graph_add_edge(opal_graph_t *graph, opal_graph_edge_t *edge) { opal_adjacency_list_t *aj_list, *start_aj_list= NULL; opal_list_item_t *item; bool start_found = false, end_found = false; /** * find the vertices that this edge should connect. */ for (item = opal_list_get_first(graph->adjacency_list); item != opal_list_get_end(graph->adjacency_list); item = opal_list_get_next(item)) { aj_list = (opal_adjacency_list_t *) item; if (aj_list->vertex == edge->start) { start_found = true; start_aj_list = aj_list; } if (aj_list->vertex == edge->end) { end_found = true; } } /** * if one of the vertices either the start or the end is not * found - return an error. */ if (false == start_found && false == end_found) { return OPAL_ERROR; } /* point the edge to the adjacency list of the start vertex (for easy search) */ edge->in_adj_list=start_aj_list; /* append the edge to the adjacency list of the start vertex */ opal_list_append(start_aj_list->edges, (opal_list_item_t*)edge); /* increase the graph size */ graph->number_of_edges++; return OPAL_SUCCESS; }
int mca_oob_tcp_recv_cancel( orte_process_name_t* name, int tag) { int matched = 0; opal_list_item_t *item, *next; /* wait for any previously matched messages to be processed */ OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_match_lock); #if OMPI_ENABLE_PROGRESS_THREADS if(opal_event_progress_thread() == false) { while(mca_oob_tcp_component.tcp_match_count) { opal_condition_wait( &mca_oob_tcp_component.tcp_match_cond, &mca_oob_tcp_component.tcp_match_lock); } } #endif /* remove any matching posted receives */ for(item = opal_list_get_first(&mca_oob_tcp_component.tcp_msg_post); item != opal_list_get_end(&mca_oob_tcp_component.tcp_msg_post); item = next) { mca_oob_tcp_msg_t* msg = (mca_oob_tcp_msg_t*)item; next = opal_list_get_next(item); if (OPAL_EQUAL == opal_dss.compare(name, &msg->msg_peer, ORTE_NAME)) { if (msg->msg_hdr.msg_tag == tag) { opal_list_remove_item(&mca_oob_tcp_component.tcp_msg_post, &msg->super.super); MCA_OOB_TCP_MSG_RETURN(msg); matched++; } } } OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_match_lock); return (matched > 0) ? ORTE_SUCCESS : ORTE_ERR_NOT_FOUND; }
orte_iof_base_endpoint_t* orte_iof_base_endpoint_match( const orte_process_name_t* target_name, orte_ns_cmp_bitmask_t target_mask, int target_tag) { opal_list_item_t* item; OPAL_THREAD_LOCK(&orte_iof_base.iof_lock); for(item = opal_list_get_first(&orte_iof_base.iof_endpoints); item != opal_list_get_end(&orte_iof_base.iof_endpoints); item = opal_list_get_next(item)) { orte_iof_base_endpoint_t* endpoint = (orte_iof_base_endpoint_t*)item; if(orte_ns.compare_fields(target_mask,target_name,&endpoint->ep_origin) == 0) { if(endpoint->ep_tag == target_tag || endpoint->ep_tag == ORTE_IOF_ANY || target_tag == ORTE_IOF_ANY) { OBJ_RETAIN(endpoint); OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); return endpoint; } } } OPAL_THREAD_UNLOCK(&orte_iof_base.iof_lock); return NULL; }
int mca_rcache_vma_clean(struct mca_rcache_base_module_t* rcache) { mca_rcache_vma_module_t *vma_rcache = (mca_rcache_vma_module_t*)rcache; mca_rcache_vma_t *vma; opal_list_item_t *i; do { OPAL_THREAD_LOCK(&rcache->lock); i = opal_list_get_first(&vma_rcache->vma_delete_list); if(opal_list_get_end(&vma_rcache->vma_delete_list) == i) { vma = NULL; OPAL_THREAD_UNLOCK(&rcache->lock); } else { vma = (mca_rcache_vma_t *)i; opal_list_remove_item(&vma_rcache->vma_delete_list, &vma->super); /* Need to drop the rcache lock before destroying the vma */ OPAL_THREAD_UNLOCK(&rcache->lock); mca_rcache_vma_destroy(vma); } } while (NULL != vma); return OMPI_SUCCESS; }
int opal_hash_table_remove_value_uint64(opal_hash_table_t* ht, uint64_t key) { opal_list_t* list = ht->ht_table + (key & ht->ht_mask); opal_uint64_hash_node_t *node; #if OPAL_ENABLE_DEBUG if(ht->ht_table_size == 0) { opal_output(0, "opal_hash_table_remove_value_uint64:" "opal_hash_table_init() has not been called"); return OPAL_ERR_BAD_PARAM; } #endif for(node = (opal_uint64_hash_node_t*)opal_list_get_first(list); node != (opal_uint64_hash_node_t*)opal_list_get_end(list); node = (opal_uint64_hash_node_t*)opal_list_get_next(node)) { if (node->hn_key == key) { opal_list_remove_item(list, (opal_list_item_t*)node); opal_list_append(&ht->ht_nodes, (opal_list_item_t*)node); ht->ht_size--; return OPAL_SUCCESS; } } return OPAL_ERR_NOT_FOUND; }
int mca_io_base_register_datarep(char *datarep, MPI_Datarep_conversion_function* read_fn, MPI_Datarep_conversion_function* write_fn, MPI_Datarep_extent_function* extent_fn, void* state) { opal_list_item_t *p; const mca_base_component_t *component; const mca_io_base_component_1_0_0_t *v100; int tmp, ret = OMPI_SUCCESS; /* Find the maximum additional number of bytes required by all io components for requests and make that the request size */ for (p = opal_list_get_first(&mca_io_base_components_available); p != opal_list_get_end(&mca_io_base_components_available); p = opal_list_get_next(p)) { component = ((mca_base_component_priority_list_item_t *) p)->super.cli_component; /* Only know how to handle v1.0.0 components for now */ if (component->mca_type_major_version == 1 && component->mca_type_minor_version == 0 && component->mca_type_release_version == 0) { v100 = (mca_io_base_component_1_0_0_t *) component; /* return first non-good error-code */ tmp = v100->io_register_datarep(datarep, read_fn, write_fn, extent_fn, state); ret = (ret == OMPI_SUCCESS) ? tmp : ret; } } return ret; }
static int route_lost(const orte_process_name_t *route) { opal_list_item_t *item; orte_routed_tree_t *child; orte_routed_jobfam_t *jfam; uint16_t jfamily; int i; OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s route to %s lost", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(route))); /* if the route is to a different job family and we are the HNP, look it up */ if ((ORTE_JOB_FAMILY(route->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) && ORTE_PROC_IS_HNP) { jfamily = ORTE_JOB_FAMILY(route->jobid); for (i=0; i < orte_routed_jobfams.size; i++) { if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) { continue; } if (jfam->job_family == jfamily) { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routed_radix: route to %s lost", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOB_FAMILY_PRINT(route->jobid))); opal_pointer_array_set_item(&orte_routed_jobfams, i, NULL); OBJ_RELEASE(jfam); break; } } } /* if we lose the connection to the lifeline and we are NOT already, * in finalize, tell the OOB to abort. * NOTE: we cannot call abort from here as the OOB needs to first * release a thread-lock - otherwise, we will hang!! */ if (!orte_finalizing && NULL != lifeline && OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routed:radix: Connection to lifeline %s lost", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(lifeline))); return ORTE_ERR_FATAL; } /* if we are the HNP or daemon, and the route is a daemon, * see if it is one of our children - if so, remove it */ if ((ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) && route->jobid == ORTE_PROC_MY_NAME->jobid) { for (item = opal_list_get_first(&my_children); item != opal_list_get_end(&my_children); item = opal_list_get_next(item)) { child = (orte_routed_tree_t*)item; if (child->vpid == route->vpid) { opal_list_remove_item(&my_children, item); OBJ_RELEASE(item); return ORTE_SUCCESS; } } } /* we don't care about this one, so return success */ return ORTE_SUCCESS; }
static orte_process_name_t get_route(orte_process_name_t *target) { orte_process_name_t *ret, daemon; opal_list_item_t *item; orte_routed_tree_t *child; int i; orte_routed_jobfam_t *jfam; uint16_t jfamily; if (!orte_routing_is_enabled) { ret = target; goto found; } /* initialize */ daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; daemon.vpid = ORTE_PROC_MY_DAEMON->vpid; if (target->jobid == ORTE_JOBID_INVALID || target->vpid == ORTE_VPID_INVALID) { ret = ORTE_NAME_INVALID; goto found; } /* if it is me, then the route is just direct */ if (OPAL_EQUAL == opal_dss.compare(ORTE_PROC_MY_NAME, target, ORTE_NAME)) { ret = target; goto found; } /* if I am an application process, always route via my local daemon */ if (ORTE_PROC_IS_APP) { ret = ORTE_PROC_MY_DAEMON; goto found; } /* if I am a tool, the route is direct if target is in * my own job family, and to the target's HNP if not */ if (ORTE_PROC_IS_TOOL) { if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { ret = target; goto found; } else { ORTE_HNP_NAME_FROM_JOB(&daemon, target->jobid); ret = &daemon; goto found; } } /****** HNP AND DAEMONS ONLY ******/ /* IF THIS IS FOR A DIFFERENT JOB FAMILY... */ if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { /* if I am a daemon, route this via the HNP */ if (ORTE_PROC_IS_DAEMON) { ret = ORTE_PROC_MY_HNP; goto found; } /* if I am the HNP or a tool, then I stored a route to * this job family, so look it up */ jfamily = ORTE_JOB_FAMILY(target->jobid); for (i=0; i < orte_routed_jobfams.size; i++) { if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) { continue; } if (jfam->job_family == jfamily) { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routed_binomial: route to %s found", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOB_FAMILY_PRINT(target->jobid))); ret = &jfam->route; goto found; } } /* not found - so we have no route */ ret = ORTE_NAME_INVALID; goto found; } /* THIS CAME FROM OUR OWN JOB FAMILY... */ /* if this is going to the HNP, then send it direct if we don't know * how to get there - otherwise, send it via the tree */ if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) { if (!hnp_direct || orte_static_ports) { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routing to the HNP through my parent %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_PARENT))); ret = ORTE_PROC_MY_PARENT; goto found; } else { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routing direct to the HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ret = ORTE_PROC_MY_HNP; goto found; } } daemon.jobid = ORTE_PROC_MY_NAME->jobid; /* find out what daemon hosts this proc */ if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); ret = ORTE_NAME_INVALID; goto found; } /* if the daemon is me, then send direct to the target! */ if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) { ret = target; goto found; } else if (orte_process_info.num_procs < mca_routed_radix_component.max_connections) { /* if the job is small enough, send direct to the target's daemon */ ret = &daemon; goto found; } else { /* search routing tree for next step to that daemon */ for (item = opal_list_get_first(&my_children); item != opal_list_get_end(&my_children); item = opal_list_get_next(item)) { child = (orte_routed_tree_t*)item; if (child->vpid == daemon.vpid) { /* the child is hosting the proc - just send it there */ ret = &daemon; goto found; } /* otherwise, see if the daemon we need is below the child */ if (opal_bitmap_is_set_bit(&child->relatives, daemon.vpid)) { /* yep - we need to step through this child */ daemon.vpid = child->vpid; ret = &daemon; goto found; } } } /* if we get here, then the target daemon is not beneath * any of our children, so we have to step up through our parent */ daemon.vpid = ORTE_PROC_MY_PARENT->vpid; ret = &daemon; found: OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, "%s routed_radix_get(%s) --> %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(ret))); return *ret; }
/** * Function for weeding out btl components that don't want to run. * * Call the init function on all available components to find out if * they want to run. Select all components that don't fail. Failing * components will be closed and unloaded. The selected modules will * be returned to the caller in a opal_list_t. */ int mca_btl_base_select(bool enable_progress_threads, bool enable_mpi_threads) { int i, num_btls; opal_list_item_t *item; mca_base_component_list_item_t *cli; mca_btl_base_component_t *component; mca_btl_base_module_t **modules; mca_btl_base_selected_module_t *sm; char** include = opal_argv_split(mca_btl_base_include, ','); char** exclude = opal_argv_split(mca_btl_base_exclude, ','); /* Traverse the list of opened modules; call their init functions. */ item = opal_list_get_first(&mca_btl_base_components_opened); while(item != opal_list_get_end(&mca_btl_base_components_opened)) { opal_list_item_t *next = opal_list_get_next(item); cli = (mca_base_component_list_item_t *) item; component = (mca_btl_base_component_t *) cli->cli_component; /* if there is an include list - item must be in the list to be included */ if ( NULL != include ) { char** argv = include; bool found = false; while(argv && *argv) { if(strcmp(component->btl_version.mca_component_name,*argv) == 0) { found = true; break; } argv++; } if(found == false) { item = next; continue; } /* otherwise - check the exclude list to see if this item has been specifically excluded */ } else if ( NULL != exclude ) { char** argv = exclude; bool found = false; while(argv && *argv) { if(strcmp(component->btl_version.mca_component_name,*argv) == 0) { found = true; break; } argv++; } if(found == true) { item = next; continue; } } opal_output_verbose(10, mca_btl_base_output, "select: initializing %s component %s", component->btl_version.mca_type_name, component->btl_version.mca_component_name); if (NULL == component->btl_init) { opal_output_verbose(10, mca_btl_base_output, "select: no init function; ignoring component %s", component->btl_version.mca_component_name); } else { modules = component->btl_init(&num_btls, enable_progress_threads, enable_mpi_threads); /* If the component didn't initialize, remove it from the opened list and remove it from the component repository */ if (NULL == modules) { opal_output_verbose(10, mca_btl_base_output, "select: init of component %s returned failure", component->btl_version.mca_component_name); opal_output_verbose(10, mca_btl_base_output, "select: module %s unloaded", component->btl_version.mca_component_name); mca_base_component_repository_release((mca_base_component_t *) component); opal_list_remove_item(&mca_btl_base_components_opened, item); } /* Otherwise, it initialized properly. Save it. */ else { opal_output_verbose(10, mca_btl_base_output, "select: init of component %s returned success", component->btl_version.mca_component_name); for (i = 0; i < num_btls; ++i) { sm = OBJ_NEW(mca_btl_base_selected_module_t); if (NULL == sm) { return OMPI_ERR_OUT_OF_RESOURCE; } sm->btl_component = component; sm->btl_module = modules[i]; opal_list_append(&mca_btl_base_modules_initialized, (opal_list_item_t*) sm); } free(modules); } } item = next; } /* Finished querying all components. Check for the bozo case. */ if (0 == opal_list_get_size(&mca_btl_base_modules_initialized)) { orte_show_help("help-mca-base.txt", "find-available:none-found", true, "btl"); orte_errmgr.abort(1, NULL); } return OMPI_SUCCESS; }
static int fetch_multiple(const opal_identifier_t *uid, opal_scope_t scope, const char *key, opal_list_t *kvs) { proc_data_t *proc_data; opal_value_t *kv, *kvnew; int rc; char *srchkey, *ptr; size_t len = 0; opal_identifier_t id; /* to protect alignment, copy the data across */ memcpy(&id, uid, sizeof(opal_identifier_t)); OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output, "db:hash:fetch_multiple: searching for key %s on proc %" PRIu64 "", (NULL == key) ? "NULL" : key, id)); /* lookup the proc data object for this proc */ if (NULL == (proc_data = lookup_opal_proc(&hash_data, id))) { /* look elsewhere */ return OPAL_ERR_TAKE_NEXT_OPTION; } /* if the key is NULL, then return all the values */ if (NULL == key) { for (kv = (opal_value_t*) opal_list_get_first(&proc_data->data); kv != (opal_value_t*) opal_list_get_end(&proc_data->data); kv = (opal_value_t*) opal_list_get_next(kv)) { /* check for a matching scope */ if (!(scope & kv->scope)) { continue; } if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&kvnew, kv, OPAL_VALUE))) { OPAL_ERROR_LOG(rc); return rc; } opal_list_append(kvs, &kvnew->super); } return OPAL_SUCCESS; } /* see if the key includes a wildcard */ srchkey = strdup(key); if (NULL != (ptr = strchr(srchkey, '*'))) { *ptr = '\0'; len = strlen(srchkey); } /* otherwise, find all matching keys and return them */ for (kv = (opal_value_t*) opal_list_get_first(&proc_data->data); kv != (opal_value_t*) opal_list_get_end(&proc_data->data); kv = (opal_value_t*) opal_list_get_next(kv)) { /* check for a matching scope */ if (!(scope & kv->scope)) { continue; } if ((0 < len && 0 == strncmp(srchkey, kv->key, len)) || (0 == len && 0 == strcmp(key, kv->key))) { if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&kvnew, kv, OPAL_VALUE))) { OPAL_ERROR_LOG(rc); return rc; } opal_list_append(kvs, &kvnew->super); } } free(srchkey); return OPAL_SUCCESS; }
/** * this routine assumes that sorted_procs is in the following state: * o all the local procs at the beginning. * o sorted_procs[0] is the lowest named process. */ int mca_common_sm_rml_info_bcast(opal_shmem_ds_t *ds_buf, ompi_proc_t **procs, size_t num_procs, int tag, bool bcast_root, char *msg_id_str, opal_list_t *pending_rml_msgs) { int rc = OMPI_SUCCESS; struct iovec iov[MCA_COMMON_SM_RML_MSG_LEN]; int iovrc; size_t p; char msg_id_str_to_tx[OPAL_PATH_MAX]; strncpy(msg_id_str_to_tx, msg_id_str, sizeof(msg_id_str_to_tx) - 1); /* let the first item be the queueing id name */ iov[0].iov_base = (ompi_iov_base_ptr_t)msg_id_str_to_tx; iov[0].iov_len = sizeof(msg_id_str_to_tx); iov[1].iov_base = (ompi_iov_base_ptr_t)ds_buf; iov[1].iov_len = sizeof(opal_shmem_ds_t); /* figure out if i am the root proc in the group. * if i am, bcast the message the rest of the local procs. */ if (bcast_root) { opal_progress_event_users_increment(); /* first num_procs items should be local procs */ for (p = 1; p < num_procs; ++p) { iovrc = orte_rml.send(&(procs[p]->proc_name), iov, MCA_COMMON_SM_RML_MSG_LEN, tag, 0); if ((ssize_t)(iov[0].iov_len + iov[1].iov_len) > iovrc) { ORTE_ERROR_LOG(OMPI_ERR_COMM_FAILURE); opal_progress_event_users_decrement(); rc = OMPI_ERROR; goto out; } } opal_progress_event_users_decrement(); } else { /* i am NOT the root ("lowest") proc */ opal_list_item_t *item; mca_common_sm_rml_pending_rml_msg_types_t *rml_msg; /* because a component query can be performed simultaneously in multiple * threads, the RML messages may arrive in any order. so first check to * see if we previously received a message for me. */ for (item = opal_list_get_first(pending_rml_msgs); opal_list_get_end(pending_rml_msgs) != item; item = opal_list_get_next(item)) { rml_msg = (mca_common_sm_rml_pending_rml_msg_types_t *)item; /* was the message for me? */ if (0 == strcmp(rml_msg->msg_id_str, msg_id_str)) { opal_list_remove_item(pending_rml_msgs, item); /* from ==============> to */ opal_shmem_ds_copy(&rml_msg->shmem_ds, ds_buf); OBJ_RELEASE(item); break; } } /* if we didn't find a message already waiting, block on receiving from * the RML. */ if (opal_list_get_end(pending_rml_msgs) == item) { do { /* bump up the libevent polling frequency while we're in this * RML recv, just to ensure we're checking libevent frequently. */ opal_progress_event_users_increment(); iovrc = orte_rml.recv(&(procs[0]->proc_name), iov, MCA_COMMON_SM_RML_MSG_LEN, tag, 0); opal_progress_event_users_decrement(); if (iovrc < 0) { ORTE_ERROR_LOG(OMPI_ERR_RECV_LESS_THAN_POSTED); rc = OMPI_ERROR; goto out; } /* was the message for me? if so, we're done */ if (0 == strcmp(msg_id_str_to_tx, msg_id_str)) { break; } /* if not, put it on the pending list and try again */ if (NULL == (rml_msg = OBJ_NEW(mca_common_sm_rml_pending_rml_msg_types_t))) { ORTE_ERROR_LOG(OMPI_ERR_OUT_OF_RESOURCE); rc = OMPI_ERROR; goto out; } /* not for me, so place on list */ /* from ========> to */ opal_shmem_ds_copy(ds_buf, &rml_msg->shmem_ds); memcpy(rml_msg->msg_id_str, msg_id_str_to_tx, OPAL_PATH_MAX); opal_list_append(pending_rml_msgs, &(rml_msg->super)); } while(1); } } out: return rc; }
static int orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename, unsigned int *uMe) { int iq; int ix; int iFd; /* file descriptor for appinfo */ int iTrips; /* counter appinfo read attempts */ int max_appinfo_read_attempts; struct stat ssBuf; /* stat buffer */ size_t szLen; /* size of appinfo (file) */ off_t oNow; /* current appinfo data offset */ off_t oInfo=sizeof(appInfoHdr_t); off_t oDet=sizeof(appInfo_t); off_t oSlots; off_t oEntry; int32_t sNodes=0; char *cpBuf; char *hostname; orte_node_t *node = NULL, *n2; appInfoHdr_t *apHdr; /* ALPS header structure */ appInfo_t *apInfo; /* ALPS table info structure */ #if ALPS_APPINFO_VERSION==0 placeList_t *apSlots; /* ALPS node specific info */ #else placeNodeList_t *apNodes; #endif bool added; opal_list_item_t *item; orte_ras_alps_get_appinfo_attempts(&max_appinfo_read_attempts); oNow=0; iTrips=0; opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:allocate: begin processing appinfo file"); while(!oNow) { /* Until appinfo read is complete */ iTrips++; /* Increment trip count */ iFd=open( filename, O_RDONLY ); if( iFd==-1 ) { /* If file absent, ALPS is down */ opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:allocate: ALPS information open failure"); usleep(iTrips*50000); /* Increasing delays, .05 s/try */ /* Fail only when number of attempts have been exhausted. */ if( iTrips <= max_appinfo_read_attempts ) continue; ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); return ORTE_ERR_FILE_OPEN_FAILURE; } if( fstat( iFd, &ssBuf )==-1 ) { /* If stat fails, access denied */ ORTE_ERROR_LOG(ORTE_ERR_NOT_AVAILABLE); return ORTE_ERR_NOT_AVAILABLE; } szLen=ssBuf.st_size; /* Get buffer size */ cpBuf=malloc(szLen+1); /* Allocate buffer */ if (NULL == cpBuf) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } /* Repeated attempts to read appinfo, with an increasing delay between * * successive attempts to allow scheduler I/O a chance to complete. */ if( (oNow=read( iFd, cpBuf, szLen ))!=(off_t)szLen ) { /* This is where apstat fails; we will record it and try again. */ opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:allocate: ALPS information read failure: %ld bytes", (long int)oNow); free(cpBuf); /* Free (old) buffer */ close(iFd); /* Close (old) descriptor */ oNow=0; /* Reset byte count */ usleep(iTrips*50000); /* Increasing delays, .05 s/try */ /* Fail only when number of attempts have been exhausted. */ if( iTrips<=max_appinfo_read_attempts ) continue; ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); return ORTE_ERR_FILE_READ_FAILURE; } } close(iFd); opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:allocate: file %s read", filename); /* Now that we have the scheduler information, we just have to parse it for * * the data that we seek. */ oNow=0; apHdr=(appInfoHdr_t *)cpBuf; opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:allocate: %d entries in file", apHdr->apNum); /* Header info (apHdr) tells us how many entries are in the file: * * * * apHdr->apNum */ for( iq=0; iq<apHdr->apNum; iq++ ) { /* Parse all entries in file */ /* Just at this level, a lot of information is available: * * * * apInfo->apid ... ALPS job ID * * apInfo->resId ... ALPS reservation ID * * apInfo->numCmds ... Number of executables * * apInfo->numPlaces ... Number of PEs */ apInfo=(appInfo_t *)(cpBuf+oNow+oInfo); /* Calculate the dependent offsets. */ oSlots=sizeof(cmdDetail_t)*apInfo->numCmds; opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:allocate: read data for resId %u - myId %u", apInfo->resId, *uMe); #if ALPS_APPINFO_VERSION==0 /* Finally, we get to the actual node-specific information: * * * * apSlots[ix].cmdIx ... index of apDet[].cmd * * apSlots[ix].nid ... NodeID (NID) * * apSlots[ix].procMask ... mask for processors... need 16-bit shift */ apSlots=(placeList_t *)(cpBuf+oNow+oInfo+oDet+oSlots); oEntry=sizeof(placeList_t)*apInfo->numPlaces; oNow+=(oDet+oSlots+oEntry); /* Target next slot */ if( apInfo->resId != *uMe ) continue; /* Filter to our reservation Id */ /* in this early version of alps, there is one entry for each PE in the * allocation - so cycle across the numPlaces entries, assigning a slot * for each time a node is named */ for( ix=0; ix<apInfo->numPlaces; ix++ ) { opal_output_verbose(5, orte_ras_base.ras_output, "ras:alps:read_appinfo: got NID %d", apSlots[ix].nid); asprintf( &hostname, "%d", apSlots[ix].nid ); if (NULL == hostname) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } /* If this matches the prior nodename, just add to the slot count. */ if( NULL!=node && !strcmp(node->name, hostname) ) { free(hostname); /* free hostname since not needed */ ++node->slots; } else { /* must be new, so add to list */ opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:read_appinfo: added NID %d to list", apSlots[ix].nid); node = OBJ_NEW(orte_node_t); node->name = hostname; node->launch_id = apSlots[ix].nid; node->slots_inuse = 0; node->slots_max = 0; node->slots = 1; /* need to order these node ids so the regex generator * can properly function */ added = false; for (item = opal_list_get_first(nodes); item != opal_list_get_end(nodes); item = opal_list_get_next(item)) { n2 = (orte_node_t*)item; if (node->launch_id < n2->launch_id) { /* insert the new node before this one */ opal_list_insert_pos(nodes, item, &node->super); added = true; break; } } if (!added) { /* add it to the end */ opal_list_append(nodes, &node->super); } sNodes++; /* Increment the node count */ } } #else /* in newer versions of alps, there is one entry for each node in the * allocation, and that struct directly carries the number of PEs * allocated on that node to this job. */ apNodes=(placeNodeList_t *)(cpBuf+oNow+oInfo+oDet+oSlots); oEntry=sizeof(placeNodeList_t)*apInfo->numPlaces; oNow+=(oDet+oSlots+oEntry); /* Target next entry */ if( apInfo->resId != *uMe ) continue; /* Filter to our reservation Id */ for( ix=0; ix<apInfo->numPlaces; ix++ ) { opal_output_verbose(5, orte_ras_base.ras_output, "ras:alps:read_appinfo(modern): processing NID %d with %d slots", apNodes[ix].nid, apNodes[ix].numPEs); asprintf( &hostname, "%d", apNodes[ix].nid ); if (NULL == hostname) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } node = OBJ_NEW(orte_node_t); node->name = hostname; node->launch_id = apNodes[ix].nid; node->slots_inuse = 0; node->slots_max = 0; node->slots = apNodes[ix].numPEs; /* need to order these node ids so the regex generator * can properly function */ added = false; for (item = opal_list_get_first(nodes); item != opal_list_get_end(nodes); item = opal_list_get_next(item)) { n2 = (orte_node_t*)item; if (node->launch_id < n2->launch_id) { /* insert the new node before this one */ opal_list_insert_pos(nodes, item, &node->super); added = true; break; } } if (!added) { /* add it to the end */ opal_list_append(nodes, &node->super); } sNodes++; /* Increment the node count */ } #endif break; /* Extended details ignored */ } free(cpBuf); /* Free the buffer */ return ORTE_SUCCESS; }
void orte_state_base_activate_job_state(orte_job_t *jdata, orte_job_state_t state) { opal_list_item_t *itm, *any=NULL, *error=NULL; orte_state_t *s; orte_state_caddy_t *caddy; for (itm = opal_list_get_first(&orte_job_states); itm != opal_list_get_end(&orte_job_states); itm = opal_list_get_next(itm)) { s = (orte_state_t*)itm; if (s->job_state == ORTE_JOB_STATE_ANY) { /* save this place */ any = itm; } if (s->job_state == ORTE_JOB_STATE_ERROR) { error = itm; } if (s->job_state == state) { OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output, "%s ACTIVATING JOB %s STATE %s PRI %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid), orte_job_state_to_str(state), s->priority)); if (NULL == s->cbfunc) { OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output, "%s NULL CBFUNC FOR JOB %s STATE %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == jdata) ? "ALL" : ORTE_JOBID_PRINT(jdata->jobid), orte_job_state_to_str(state))); return; } caddy = OBJ_NEW(orte_state_caddy_t); if (NULL != jdata) { caddy->jdata = jdata; caddy->job_state = state; OBJ_RETAIN(jdata); } opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy); opal_event_set_priority(&caddy->ev, s->priority); opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1); return; } } /* if we get here, then the state wasn't found, so execute * the default handler if it is defined */ if (ORTE_JOB_STATE_ERROR < state && NULL != error) { s = (orte_state_t*)error; } else if (NULL != any) { s = (orte_state_t*)any; } else { OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output, "ACTIVATE: ANY STATE NOT FOUND")); return; } if (NULL == s->cbfunc) { OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output, "ACTIVATE: ANY STATE HANDLER NOT DEFINED")); return; } caddy = OBJ_NEW(orte_state_caddy_t); if (NULL != jdata) { caddy->jdata = jdata; caddy->job_state = state; OBJ_RETAIN(jdata); } OPAL_OUTPUT_VERBOSE((1, orte_state_base_framework.framework_output, "%s ACTIVATING JOB %s STATE %s PRI %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid), orte_job_state_to_str(state), s->priority)); opal_event_set(orte_event_base, &caddy->ev, -1, OPAL_EV_WRITE, s->cbfunc, caddy); opal_event_set_priority(&caddy->ev, s->priority); opal_event_active(&caddy->ev, OPAL_EV_WRITE, 1); }
int orte_errmgr_base_update_app_context_for_cr_recovery(orte_job_t *jobdata, orte_proc_t *proc, opal_list_t *local_snapshots) { int ret, exit_status = ORTE_SUCCESS; opal_list_item_t *item = NULL; orte_std_cntr_t i_app; int argc = 0; orte_app_context_t *cur_app_context = NULL; orte_app_context_t *new_app_context = NULL; orte_sstore_base_local_snapshot_info_t *vpid_snapshot = NULL; char *reference_fmt_str = NULL; char *location_str = NULL; char *cache_location_str = NULL; char *ref_location_fmt_str = NULL; char *tmp_str = NULL; char *global_snapshot_ref = NULL; char *global_snapshot_seq = NULL; /* * Get the snapshot restart command for this process * JJH CLEANUP: Pass in the vpid_snapshot, so we don't have to look it up every time? */ for(item = opal_list_get_first(local_snapshots); item != opal_list_get_end(local_snapshots); item = opal_list_get_next(item) ) { vpid_snapshot = (orte_sstore_base_local_snapshot_info_t*)item; if(OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &vpid_snapshot->process_name, &proc->name) ) { break; } else { vpid_snapshot = NULL; } } if( NULL == vpid_snapshot ) { ORTE_ERROR_LOG(ORTE_ERROR); exit_status = ORTE_ERROR; goto cleanup; } orte_sstore.get_attr(vpid_snapshot->ss_handle, SSTORE_METADATA_LOCAL_SNAP_REF_FMT, &reference_fmt_str); orte_sstore.get_attr(vpid_snapshot->ss_handle, SSTORE_METADATA_LOCAL_SNAP_LOC, &location_str); orte_sstore.get_attr(vpid_snapshot->ss_handle, SSTORE_METADATA_LOCAL_SNAP_REF_LOC_FMT, &ref_location_fmt_str); orte_sstore.get_attr(vpid_snapshot->ss_handle, SSTORE_METADATA_GLOBAL_SNAP_REF, &global_snapshot_ref); orte_sstore.get_attr(vpid_snapshot->ss_handle, SSTORE_METADATA_GLOBAL_SNAP_SEQ, &global_snapshot_seq); /* * Find current app_context */ cur_app_context = NULL; for(i_app = 0; i_app < opal_pointer_array_get_size(jobdata->apps); ++i_app) { cur_app_context = (orte_app_context_t *)opal_pointer_array_get_item(jobdata->apps, i_app); if( NULL == cur_app_context ) { continue; } if(proc->app_idx == cur_app_context->idx) { break; } } if( NULL == cur_app_context ) { ORTE_ERROR_LOG(ret); exit_status = ret; goto cleanup; } /* * if > 1 processes in this app context * Create a new app_context * Copy over attributes * Add it to the job_t data structure * Associate it with this process in the job * else * Reuse this app_context */ if( cur_app_context->num_procs > 1 ) { /* Create a new app_context */ new_app_context = OBJ_NEW(orte_app_context_t); /* Copy over attributes */ new_app_context->idx = cur_app_context->idx; new_app_context->app = NULL; /* strdup(cur_app_context->app); */ new_app_context->num_procs = 1; new_app_context->argv = NULL; /* opal_argv_copy(cur_app_context->argv); */ new_app_context->env = opal_argv_copy(cur_app_context->env); new_app_context->cwd = (NULL == cur_app_context->cwd ? NULL : strdup(cur_app_context->cwd)); new_app_context->user_specified_cwd = cur_app_context->user_specified_cwd; new_app_context->hostfile = (NULL == cur_app_context->hostfile ? NULL : strdup(cur_app_context->hostfile)); new_app_context->add_hostfile = (NULL == cur_app_context->add_hostfile ? NULL : strdup(cur_app_context->add_hostfile)); new_app_context->dash_host = opal_argv_copy(cur_app_context->dash_host); new_app_context->prefix_dir = (NULL == cur_app_context->prefix_dir ? NULL : strdup(cur_app_context->prefix_dir)); new_app_context->preload_binary = false; new_app_context->preload_libs = false; new_app_context->preload_files_dest_dir = NULL; new_app_context->preload_files_src_dir = NULL; asprintf(&tmp_str, reference_fmt_str, vpid_snapshot->process_name.vpid); asprintf(&(new_app_context->sstore_load), "%s:%s:%s:%s:%s:%s", location_str, global_snapshot_ref, tmp_str, (vpid_snapshot->compress_comp == NULL ? "" : vpid_snapshot->compress_comp), (vpid_snapshot->compress_postfix == NULL ? "" : vpid_snapshot->compress_postfix), global_snapshot_seq); new_app_context->used_on_node = cur_app_context->used_on_node; /* Add it to the job_t data structure */ /*current_global_jobdata->num_apps++; */ new_app_context->idx = (jobdata->num_apps); proc->app_idx = new_app_context->idx; opal_pointer_array_add(jobdata->apps, new_app_context); ++(jobdata->num_apps); /* Remove association with the old app_context */ --(cur_app_context->num_procs); } else { new_app_context = cur_app_context; /* Cleanout old stuff */ free(new_app_context->app); new_app_context->app = NULL; opal_argv_free(new_app_context->argv); new_app_context->argv = NULL; asprintf(&tmp_str, reference_fmt_str, vpid_snapshot->process_name.vpid); asprintf(&(new_app_context->sstore_load), "%s:%s:%s:%s:%s:%s", location_str, global_snapshot_ref, tmp_str, (vpid_snapshot->compress_comp == NULL ? "" : vpid_snapshot->compress_comp), (vpid_snapshot->compress_postfix == NULL ? "" : vpid_snapshot->compress_postfix), global_snapshot_seq); } /* * Update the app_context with the restart informaiton */ new_app_context->app = strdup("opal-restart"); opal_argv_append(&argc, &(new_app_context->argv), new_app_context->app); opal_argv_append(&argc, &(new_app_context->argv), "-l"); opal_argv_append(&argc, &(new_app_context->argv), location_str); opal_argv_append(&argc, &(new_app_context->argv), "-m"); opal_argv_append(&argc, &(new_app_context->argv), orte_sstore_base_local_metadata_filename); opal_argv_append(&argc, &(new_app_context->argv), "-r"); if( NULL != tmp_str ) { free(tmp_str); tmp_str = NULL; } asprintf(&tmp_str, reference_fmt_str, vpid_snapshot->process_name.vpid); opal_argv_append(&argc, &(new_app_context->argv), tmp_str); cleanup: if( NULL != tmp_str) { free(tmp_str); tmp_str = NULL; } if( NULL != location_str ) { free(location_str); location_str = NULL; } if( NULL != cache_location_str ) { free(cache_location_str); cache_location_str = NULL; } if( NULL != reference_fmt_str ) { free(reference_fmt_str); reference_fmt_str = NULL; } if( NULL != ref_location_fmt_str ) { free(ref_location_fmt_str); ref_location_fmt_str = NULL; } return exit_status; }
/* we can only enter this routine if no other allocation * was found, so we only need to know that finding any * relative node syntax should generate an immediate error */ int orte_util_add_dash_host_nodes(opal_list_t *nodes, bool *override_oversubscribed, char ** host_argv) { opal_list_item_t* item; orte_std_cntr_t i, j, k; int rc; char **mapped_nodes = NULL, **mini_map; orte_node_t *node; /* Accumulate all of the host name mappings */ for (j = 0; j < opal_argv_count(host_argv); ++j) { mini_map = opal_argv_split(host_argv[j], ','); if (mapped_nodes == NULL) { mapped_nodes = mini_map; } else { for (k = 0; NULL != mini_map[k]; ++k) { rc = opal_argv_append_nosize(&mapped_nodes, mini_map[k]); if (OPAL_SUCCESS != rc) { goto cleanup; } } opal_argv_free(mini_map); } } /* Did we find anything? If not, then do nothing */ if (NULL == mapped_nodes) { return ORTE_SUCCESS; } /* go through the names found and add them to the host list. If they're not unique, then bump the slots count for each duplicate */ for (i = 0; NULL != mapped_nodes[i]; ++i) { /* if the specified node contains a relative node syntax, * this is an error */ if ('+' == mapped_nodes[i][0]) { orte_show_help("help-dash-host.txt", "dash-host:relative-syntax", true, mapped_nodes[i]); rc = ORTE_ERR_SILENT; goto cleanup; } /* see if the node is already on the list */ for (item = opal_list_get_first(nodes); item != opal_list_get_end(nodes); item = opal_list_get_next(item)) { node = (orte_node_t*) item; if (0 == strcmp(node->name, mapped_nodes[i]) || (0 == strcmp(node->name, orte_process_info.nodename) && (0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) { ++node->slots; break; } } /* If we didn't find it, add it to the list */ if (item == opal_list_get_end(nodes)) { node = OBJ_NEW(orte_node_t); if (NULL == node) { return ORTE_ERR_OUT_OF_RESOURCE; } /* check to see if this is a local name */ if (0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])) { /* it is local, so use the local nodename to avoid * later confusion */ if (orte_show_resolved_nodenames && 0 != strcmp(mapped_nodes[i], orte_process_info.nodename)) { /* add to list of aliases for this node - only add if unique */ opal_argv_append_unique_nosize(&node->alias, mapped_nodes[i]); } node->name = strdup(orte_process_info.nodename); } else { /* not local - use the given name */ node->name = strdup(mapped_nodes[i]); } node->state = ORTE_NODE_STATE_UP; node->slots_inuse = 0; node->slots_max = 0; node->slots = 1; /* indicate that ORTE should override any oversubscribed conditions * based on local hardware limits since the user (a) might not have * provided us any info on the #slots for a node, and (b) the user * might have been wrong! If we don't check the number of local physical * processors, then we could be too aggressive on our sched_yield setting * and cause performance problems. */ *override_oversubscribed = true; opal_list_append(nodes, &node->super); } } rc = ORTE_SUCCESS; cleanup: if (NULL != mapped_nodes) { opal_argv_free(mapped_nodes); } return rc; }
/* the -host option can always be used in both absolute * and relative mode, so we have to check for pre-existing * allocations if we are to use relative node syntax */ int orte_util_filter_dash_host_nodes(opal_list_t *nodes, char** host_argv) { opal_list_item_t* item; bool found; opal_list_item_t *next; orte_std_cntr_t i, j, k, len_mapped_node=0; int rc; char **mapped_nodes = NULL, **mini_map, *cptr; orte_node_t *node, **nodepool; int nodeidx; int num_empty=0; opal_list_t keep; bool want_all_empty = false; /* if the incoming node list is empty, then there * is nothing to filter! */ if (opal_list_is_empty(nodes)) { return ORTE_SUCCESS; } /* setup for relative node syntax */ nodepool = (orte_node_t**)orte_node_pool->addr; /* Accumulate all of the host name mappings */ for (j = 0; j < opal_argv_count(host_argv); ++j) { mini_map = opal_argv_split(host_argv[j], ','); for (k = 0; NULL != mini_map[k]; ++k) { if ('+' == mini_map[k][0]) { /* see if we specified empty nodes */ if ('e' == mini_map[k][1] || 'E' == mini_map[k][1]) { /* request for empty nodes - do they want * all of them? */ if (NULL != (cptr = strchr(mini_map[k], ':'))) { /* the colon indicates a specific # are requested */ cptr++; /* step past : */ /* put a marker into the list */ cptr--; *cptr = '*'; opal_argv_append_nosize(&mapped_nodes, cptr); } else { /* add a marker to the list */ opal_argv_append_nosize(&mapped_nodes, "*"); want_all_empty = true; } } else if ('n' == mini_map[k][1] || 'N' == mini_map[k][1]) { /* they want a specific relative node #, so * look it up on global pool */ nodeidx = strtol(&mini_map[k][2], NULL, 10); if (nodeidx < 0 || nodeidx > (int)orte_node_pool->size) { /* this is an error */ orte_show_help("help-dash-host.txt", "dash-host:relative-node-out-of-bounds", true, nodeidx, mini_map[k]); rc = ORTE_ERR_SILENT; goto cleanup; } /* if the HNP is not allocated, then we need to * adjust the index as the node pool is offset * by one */ if (!orte_hnp_is_allocated) { nodeidx++; } /* see if that location is filled */ if (NULL == nodepool[nodeidx]) { /* this is an error */ orte_show_help("help-dash-host.txt", "dash-host:relative-node-not-found", true, nodeidx, mini_map[k]); rc = ORTE_ERR_SILENT; goto cleanup; } /* add this node to the list */ opal_argv_append_nosize(&mapped_nodes, nodepool[nodeidx]->name); } else { /* invalid relative node syntax */ orte_show_help("help-dash-host.txt", "dash-host:invalid-relative-node-syntax", true, mini_map[k]); rc = ORTE_ERR_SILENT; goto cleanup; } } else { /* non-relative syntax - add to list */ if (OPAL_SUCCESS != (rc = opal_argv_append_nosize(&mapped_nodes, mini_map[k]))) { goto cleanup; } } } opal_argv_free(mini_map); } /* Did we find anything? If not, then do nothing */ if (NULL == mapped_nodes && 0 == num_empty) { return ORTE_SUCCESS; } /* we found some info - filter what is on the list... * i.e., go through the list and remove any nodes that * were -not- included on the -host list. * * NOTE: The following logic is based on knowing that * any node can only be included on the incoming * nodes list ONCE. */ len_mapped_node = opal_argv_count(mapped_nodes); /* setup a working list so we can put the final list * of nodes in order. This way, if the user specifies a * set of nodes, we will use them in the order in which * they were specifed. Note that empty node requests * will always be appended to the end */ OBJ_CONSTRUCT(&keep, opal_list_t); for (i = 0; i < len_mapped_node; ++i) { /* check if we are supposed to add some number of empty * nodes here */ if ('*' == mapped_nodes[i][0]) { /* if there is a number after the '*', then we are * to insert a specific # of nodes */ if ('\0' == mapped_nodes[i][1]) { /* take all empty nodes from the list */ num_empty = INT_MAX; } else { /* extract number of nodes to take */ num_empty = strtol(&mapped_nodes[i][1], NULL, 10); } /* search for empty nodes and take them */ item = opal_list_get_first(nodes); while (0 < num_empty && item != opal_list_get_end(nodes)) { next = opal_list_get_next(item); /* save this position */ node = (orte_node_t*)item; /* see if this node is empty */ if (0 == node->slots_inuse) { /* check to see if it is specified later */ for (j=i+1; j < len_mapped_node; j++) { if (0 == strcmp(mapped_nodes[j], node->name)) { /* specified later - skip this one */ goto skipnode; } } /* remove item from list */ opal_list_remove_item(nodes, item); /* xfer to keep list */ opal_list_append(&keep, item); --num_empty; } skipnode: item = next; } } else { /* we are looking for a specific node on the list * we have a match if one of two conditions is met: * 1. the node_name and mapped_nodes directly match * 2. the node_name is the local system name AND * either the mapped_node is "localhost" OR it * is a local interface as found by opal_ifislocal */ item = opal_list_get_first(nodes); while (item != opal_list_get_end(nodes)) { next = opal_list_get_next(item); /* save this position */ node = (orte_node_t*)item; /* search -host list to see if this one is found */ found = false; if ((0 == strcmp(node->name, mapped_nodes[i]) || (0 == strcmp(node->name, orte_process_info.nodename) && (0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i]))))) { /* remove item from list */ opal_list_remove_item(nodes, item); /* xfer to keep list */ opal_list_append(&keep, item); break; } item = next; } } /* done with the mapped entry */ free(mapped_nodes[i]); mapped_nodes[i] = NULL; } /* was something specified that was -not- found? */ for (i=0; i < len_mapped_node; i++) { if (NULL != mapped_nodes[i]) { orte_show_help("help-dash-host.txt", "not-all-mapped-alloc", true, mapped_nodes[i]); rc = ORTE_ERR_SILENT; goto cleanup; } } /* clear the rest of the nodes list */ while (NULL != (item = opal_list_remove_first(nodes))) { OBJ_RELEASE(item); } /* the nodes list has been cleared - rebuild it in order */ while (NULL != (item = opal_list_remove_first(&keep))) { opal_list_append(nodes, item); } /* did they ask for more than we could provide */ if (!want_all_empty && 0 < num_empty) { orte_show_help("help-dash-host.txt", "dash-host:not-enough-empty", true, num_empty); rc = ORTE_ERR_SILENT; goto cleanup; } rc = ORTE_SUCCESS; /* done filtering existing list */ cleanup: for (i=0; i < len_mapped_node; i++) { if (NULL != mapped_nodes[i]) { free(mapped_nodes[i]); mapped_nodes[i] = NULL; } } if (NULL != mapped_nodes) { free(mapped_nodes); } return rc; }
/** * Function for weeding out sensor components that don't want to run. * * Call the init function on all available components to find out if * they want to run. Select all components that don't fail. Failing * components will be closed and unloaded. The selected modules will * be returned to the caller in a opal_list_t. */ int orte_sensor_base_select(void) { mca_base_component_list_item_t *cli = NULL; mca_base_component_t *component = NULL; mca_base_module_t *module = NULL; orte_sensor_active_module_t *i_module; opal_list_item_t *item; int priority = 0, i, j, low_i; opal_pointer_array_t tmp_array; bool none_found; orte_sensor_active_module_t *tmp_module = NULL, *tmp_module_sw = NULL; orte_job_t *jdata; if (selected) { return ORTE_SUCCESS; } selected = true; OBJ_CONSTRUCT(&tmp_array, opal_pointer_array_t); opal_output_verbose(10, orte_sensor_base_framework.framework_output, "sensor:base:select: Auto-selecting components"); /* * Traverse the list of available components. * For each call their 'query' functions to determine relative priority. */ none_found = true; for (item = opal_list_get_first(&orte_sensor_base_framework.framework_components); item != opal_list_get_end(&orte_sensor_base_framework.framework_components); item = opal_list_get_next(item) ) { cli = (mca_base_component_list_item_t *) item; component = (mca_base_component_t *) cli->cli_component; /* * If there is a query function then use it. */ if (NULL == component->mca_query_component) { opal_output_verbose(5, orte_sensor_base_framework.framework_output, "sensor:base:select Skipping component [%s]. It does not implement a query function", component->mca_component_name ); continue; } /* * Query this component for the module and priority */ opal_output_verbose(5, orte_sensor_base_framework.framework_output, "sensor:base:select Querying component [%s]", component->mca_component_name); component->mca_query_component(&module, &priority); /* * If no module was returned or negative priority, then skip component */ if (NULL == module || priority < 0) { opal_output_verbose(5, orte_sensor_base_framework.framework_output, "sensor:base:select Skipping component [%s]. Query failed to return a module", component->mca_component_name ); continue; } /* * Append them to the temporary list, we will sort later */ opal_output_verbose(5, orte_sensor_base_framework.framework_output, "sensor:base:select Query of component [%s] set priority to %d", component->mca_component_name, priority); tmp_module = OBJ_NEW(orte_sensor_active_module_t); tmp_module->component = component; tmp_module->module = (orte_sensor_base_module_t*)module; tmp_module->priority = priority; opal_pointer_array_add(&tmp_array, (void*)tmp_module); none_found = false; } if (none_found) { /* okay for no modules to be found */ return ORTE_SUCCESS; } /* ensure my_proc and my_node are available on the global arrays */ if (NULL == (jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { orte_sensor_base.my_proc = OBJ_NEW(orte_proc_t); orte_sensor_base.my_node = OBJ_NEW(orte_node_t); } else { if (NULL == (orte_sensor_base.my_proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, ORTE_PROC_MY_NAME->vpid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } if (NULL == (orte_sensor_base.my_node = orte_sensor_base.my_proc->node)) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } /* protect the objects */ OBJ_RETAIN(orte_sensor_base.my_proc); OBJ_RETAIN(orte_sensor_base.my_node); } /* * Sort the list by decending priority */ priority = 0; for(j = 0; j < tmp_array.size; ++j) { tmp_module_sw = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, j); if( NULL == tmp_module_sw ) { continue; } low_i = -1; priority = tmp_module_sw->priority; for(i = 0; i < tmp_array.size; ++i) { tmp_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, i); if( NULL == tmp_module ) { continue; } if( tmp_module->priority > priority ) { low_i = i; priority = tmp_module->priority; } } if( low_i >= 0 ) { tmp_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&tmp_array, low_i); opal_pointer_array_set_item(&tmp_array, low_i, NULL); j--; /* Try this entry again, if it is not the lowest */ } else { tmp_module = tmp_module_sw; opal_pointer_array_set_item(&tmp_array, j, NULL); } opal_output_verbose(5, orte_sensor_base_framework.framework_output, "sensor:base:select Add module with priority [%s] %d", tmp_module->component->mca_component_name, tmp_module->priority); opal_pointer_array_add(&orte_sensor_base.modules, tmp_module); } OBJ_DESTRUCT(&tmp_array); /* * Initialize each of the modules in priority order from * highest to lowest */ for(i = 0; i < orte_sensor_base.modules.size; ++i) { i_module = (orte_sensor_active_module_t*)opal_pointer_array_get_item(&orte_sensor_base.modules, i); if( NULL == i_module ) { continue; } if( NULL != i_module->module->init ) { if (ORTE_SUCCESS != i_module->module->init()) { /* can't run after all */ opal_pointer_array_set_item(&orte_sensor_base.modules, i, NULL); } } } return ORTE_SUCCESS; }
static void update_routing_plan(void) { orte_routed_tree_t *child; int j; opal_list_item_t *item; int Level,Sum,NInLevel,Ii; int NInPrevLevel; /* if I am anything other than a daemon or the HNP, this * is a meaningless command as I am not allowed to route */ if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) { return; } /* clear the list of children if any are already present */ while (NULL != (item = opal_list_remove_first(&my_children))) { OBJ_RELEASE(item); } num_children = 0; /* compute my parent */ Ii = ORTE_PROC_MY_NAME->vpid; Level=0; Sum=1; NInLevel=1; while ( Sum < (Ii+1) ) { Level++; NInLevel *= mca_routed_radix_component.radix; Sum += NInLevel; } Sum -= NInLevel; NInPrevLevel = NInLevel/mca_routed_radix_component.radix; if( 0 == Ii ) { ORTE_PROC_MY_PARENT->vpid = -1; } else { ORTE_PROC_MY_PARENT->vpid = (Ii-Sum) % NInPrevLevel; ORTE_PROC_MY_PARENT->vpid += (Sum - NInPrevLevel); } /* compute my direct children and the bitmap that shows which vpids * lie underneath their branch */ radix_tree(Ii, &num_children, &my_children, NULL); if (0 < opal_output_get_verbosity(orte_routed_base_framework.framework_output)) { opal_output(0, "%s: parent %d num_children %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_PROC_MY_PARENT->vpid, num_children); for (item = opal_list_get_first(&my_children); item != opal_list_get_end(&my_children); item = opal_list_get_next(item)) { child = (orte_routed_tree_t*)item; opal_output(0, "%s: \tchild %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), child->vpid); for (j=0; j < (int)orte_process_info.num_procs; j++) { if (opal_bitmap_is_set_bit(&child->relatives, j)) { opal_output(0, "%s: \t\trelation %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j); } } } } }
/* this is the read handler for my own child procs and stdin */ void orte_iof_mrhnp_read_local_handler(int fd, short event, void *cbdata) { orte_iof_read_event_t *rev = (orte_iof_read_event_t*)cbdata; unsigned char data[ORTE_IOF_BASE_MSG_MAX]; int32_t numbytes; opal_list_item_t *item; orte_iof_proc_t *proct; int i, j; orte_ns_cmp_bitmask_t mask; orte_job_t *jdata; orte_iof_job_t *iofjob; orte_node_t *node; orte_proc_t *daemon; orte_job_map_t *map; bool write_out=false; /* read up to the fragment size */ #if !defined(__WINDOWS__) numbytes = read(fd, data, sizeof(data)); #else { DWORD readed; HANDLE handle = (HANDLE)_get_osfhandle(fd); ReadFile(handle, data, sizeof(data), &readed, NULL); numbytes = (int)readed; } #endif /* !defined(__WINDOWS__) */ OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s iof:mrhnp:read handler read %d bytes from %s:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name), fd)); if (numbytes < 0) { /* either we have a connection error or it was a non-blocking read */ /* non-blocking, retry */ if (EAGAIN == errno || EINTR == errno) { opal_event_add(rev->ev, 0); return; } OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s iof:mrhnp:read handler %s Error on connection:%d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&rev->name), fd)); /* Un-recoverable error. Allow the code to flow as usual in order to * to send the zero bytes message up the stream, and then close the * file descriptor and delete the event. */ numbytes = 0; } /* if job termination has been ordered, just ignore the * data and delete the stdin read event, if that is what fired */ if (orte_job_term_ordered) { if (ORTE_IOF_STDIN & rev->tag) { OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev); } return; } if (ORTE_IOF_STDIN & rev->tag) { /* The event has fired, so it's no longer active until we * re-add it */ mca_iof_mr_hnp_component.stdinev->active = false; /* if this was read from my stdin, I need to send this input to all * daemons who host mapper procs */ for (j=0; j < mca_iof_mr_hnp_component.stdin_jobs.size; j++) { if (NULL == (iofjob = (orte_iof_job_t*)opal_pointer_array_get_item(&mca_iof_mr_hnp_component.stdin_jobs, j))) { continue; } jdata = iofjob->jdata; OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s read %d bytes from stdin - writing to job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_JOBID_PRINT(jdata->jobid))); map = jdata->map; for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } daemon = node->daemon; if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) { /* if it is me, then send the bytes down the stdin pipe * for every local proc (they are all on my proct list) - we even send 0 byte events * down the pipe so it forces out any preceding data before * closing the output stream. We add a 0 byte message if * numbytes < sizeof(data) as this means the chunk we read * was the end of the file. */ for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs); item != opal_list_get_end(&mca_iof_mr_hnp_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; if (proct->name.jobid == jdata->jobid) { if (NULL == proct->sink) { opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name)); continue; } if (ORTE_IOF_MAX_INPUT_BUFFERS < orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev)) { /* getting too backed up - stop the read event for now if it is still active */ if (mca_iof_mr_hnp_component.stdinev->active) { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "buffer backed up - holding")); mca_iof_mr_hnp_component.stdinev->active = false; } return; } if (0 < numbytes && numbytes < (int)sizeof(data)) { /* need to write a 0-byte event to clear the stream and close it */ orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, 0, proct->sink->wev); proct->sink = NULL; } } } } else { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s sending %d bytes from stdin to daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&daemon->name))); /* send the data to the daemon so it can * write it to all local procs from this job. * If the connection closed, * numbytes will be zero so zero bytes will be * sent - this will tell the daemon to close * the fd for stdin to that proc */ send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes); if (0 < numbytes && numbytes < (int)sizeof(data)) { /* need to send a 0-byte message to clear the stream and close it */ send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, 0); } } } } /* if num_bytes was zero, then we need to terminate the event */ if (0 == numbytes || numbytes < (int)sizeof(data)) { /* this will also close our stdin file descriptor */ if (NULL != mca_iof_mr_hnp_component.stdinev) { OBJ_RELEASE(mca_iof_mr_hnp_component.stdinev); } } else { /* if we are looking at a tty, then we just go ahead and restart the * read event assuming we are not backgrounded */ if (orte_iof_mrhnp_stdin_check(fd)) { restart_stdin(fd, 0, NULL); } else { /* delay for awhile and then restart */ ORTE_TIMER_EVENT(0, 10000, restart_stdin, ORTE_INFO_PRI); } } return; } if (ORTE_IOF_STDOUT & rev->tag && 0 < numbytes) { /* see if we need to forward this output */ jdata = orte_get_job_data_object(rev->name.jobid); if (ORTE_JOBID_INVALID == jdata->stdout_target) { /* end of the chain - just output the info */ write_out = true; goto PROCESS; } /* it goes to the next job in the chain */ jdata = orte_get_job_data_object(jdata->stdout_target); map = jdata->map; for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue; } daemon = node->daemon; if (daemon->name.vpid == ORTE_PROC_MY_NAME->vpid) { /* if it is me, then send the bytes down the stdin pipe * for every local proc (they are all on my proct list) */ for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs); item != opal_list_get_end(&mca_iof_mr_hnp_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; if (proct->name.jobid == jdata->jobid) { if (NULL == proct->sink) { opal_output(0, "NULL SINK FOR PROC %s", ORTE_NAME_PRINT(&proct->name)); continue; } orte_iof_base_write_output(&proct->name, ORTE_IOF_STDIN, data, numbytes, proct->sink->wev); } } } else { OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s sending %d bytes from stdout of %s to daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, ORTE_NAME_PRINT(&rev->name), ORTE_NAME_PRINT(&daemon->name))); /* send the data to the daemon so it can * write it to all local procs from this job */ send_data(&daemon->name, ORTE_IOF_STDIN, jdata->jobid, data, numbytes); } } } PROCESS: OPAL_OUTPUT_VERBOSE((1, orte_iof_base.iof_output, "%s read %d bytes from %s of %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes, (ORTE_IOF_STDOUT & rev->tag) ? "stdout" : ((ORTE_IOF_STDERR & rev->tag) ? "stderr" : "stddiag"), ORTE_NAME_PRINT(&rev->name))); if (0 == numbytes) { /* if we read 0 bytes from the stdout/err/diag, find this proc * on our list and * release the appropriate event. This will delete the * read event and close the file descriptor */ for (item = opal_list_get_first(&mca_iof_mr_hnp_component.procs); item != opal_list_get_end(&mca_iof_mr_hnp_component.procs); item = opal_list_get_next(item)) { proct = (orte_iof_proc_t*)item; mask = ORTE_NS_CMP_ALL; if (OPAL_EQUAL == orte_util_compare_name_fields(mask, &proct->name, &rev->name)) { /* found it - release corresponding event. This deletes * the read event and closes the file descriptor */ if (rev->tag & ORTE_IOF_STDOUT) { OBJ_RELEASE(proct->revstdout); } else if (rev->tag & ORTE_IOF_STDERR) { OBJ_RELEASE(proct->revstderr); } else if (rev->tag & ORTE_IOF_STDDIAG) { OBJ_RELEASE(proct->revstddiag); } /* check to see if they are all done */ if (NULL == proct->revstdout && NULL == proct->revstderr && NULL == proct->revstddiag) { /* this proc's iof is complete */ opal_list_remove_item(&mca_iof_mr_hnp_component.procs, item); ORTE_ACTIVATE_PROC_STATE(&proct->name, ORTE_PROC_STATE_IOF_COMPLETE); OBJ_RELEASE(proct); } break; } } return; } else { /* output this to our local output */ if (ORTE_IOF_STDOUT & rev->tag) { if (write_out) { orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stdout->wev); } } else { orte_iof_base_write_output(&rev->name, rev->tag, data, numbytes, orte_iof_base.iof_write_stderr->wev); } } /* re-add the event */ opal_event_add(rev->ev, 0); return; }
static void sample(int fd, short event, void *arg) { struct stat buf; opal_list_item_t *item; file_tracker_t *ft; /* if we are not sampling any more, then just return */ if (NULL == sample_ev) { return; } OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output, "%s sampling files", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); for (item = opal_list_get_first(&jobs); item != opal_list_get_end(&jobs); item = opal_list_get_next(item)) { ft = (file_tracker_t*)item; /* stat the file and get its size */ if (0 > stat(ft->file, &buf)) { /* cannot stat file */ OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output, "%s could not stat %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ft->file)); continue; } OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output, "%s size %lu access %s\tmod %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)buf.st_size, ctime(&buf.st_atime), ctime(&buf.st_mtime))); if (ft->check_size) { if (buf.st_size == ft->file_size) { ft->tick++; goto CHECK; } else { ft->tick = 0; ft->file_size = buf.st_size; } } if (ft->check_access) { if (buf.st_atime == ft->last_access) { ft->tick++; goto CHECK; } else { ft->tick = 0; ft->last_access = buf.st_atime; } } if (ft->check_mod) { if (buf.st_mtime == ft->last_mod) { ft->tick++; goto CHECK; } else { ft->tick = 0; ft->last_mod = buf.st_mtime; } } CHECK: OPAL_OUTPUT_VERBOSE((1, orte_sensor_base.output, "%s sampled file %s tick %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ft->file, ft->tick)); if (ft->tick == ft->limit) { orte_show_help("help-orte-sensor-file.txt", "file-stalled", true, ft->file, ft->file_size, ctime(&ft->last_access), ctime(&ft->last_mod)); orte_errmgr.update_state(ft->jobid, ORTE_JOB_STATE_SENSOR_BOUND_EXCEEDED, NULL, ORTE_PROC_STATE_UNDEF, 0, ORTE_ERR_PROC_STALLED); } } /* restart the timer */ opal_event_evtimer_add(sample_ev, &sample_time); }