/** * Function to remove previously memory from the tree without freeing it * * @param base pointer to the memory to free * * @retval OMPI_SUCCESS * @retval OMPI_ERR_BAD_PARAM if the passed base pointer was invalid */ int mca_rcache_vma_tree_delete(mca_rcache_vma_module_t* vma_rcache, mca_mpool_base_registration_t* reg) { mca_rcache_vma_t *vma; vma = (mca_rcache_vma_t*)ompi_rb_tree_find_with(&vma_rcache->rb_tree, reg->base, mca_rcache_vma_tree_node_compare_search); if(!vma) return OMPI_ERROR; while(vma != (mca_rcache_vma_t*)opal_list_get_end(&vma_rcache->vma_list) && vma->start <= (uintptr_t)reg->bound) { mca_rcache_vma_remove_reg(vma, reg); if(opal_list_is_empty(&vma->reg_list)) { mca_rcache_vma_t *next = (mca_rcache_vma_t*)opal_list_get_next(&vma->super); ompi_rb_tree_delete(&vma_rcache->rb_tree, vma); mca_rcache_vma_update_byte_count(vma_rcache, vma->start - vma->end - 1); opal_list_remove_item(&vma_rcache->vma_list, &vma->super); opal_list_append(&vma_rcache->vma_delete_list, &vma->super); vma = next; } else { int merged; do { mca_rcache_vma_t *prev = NULL, *next = NULL; if(opal_list_get_begin(&vma_rcache->vma_list) != opal_list_get_prev(vma)) prev = (mca_rcache_vma_t*)opal_list_get_prev(vma); merged = 0; if(prev && vma->start == prev->end + 1 && mca_rcache_vma_compare_reg_lists(vma, prev)) { prev->end = vma->end; opal_list_remove_item(&vma_rcache->vma_list, &vma->super); ompi_rb_tree_delete(&vma_rcache->rb_tree, vma); opal_list_append(&vma_rcache->vma_delete_list, &vma->super); vma = prev; merged = 1; } if(opal_list_get_end(&vma_rcache->vma_list) != opal_list_get_next(vma)) next = (mca_rcache_vma_t*)opal_list_get_next(vma); if(next && vma->end + 1 == next->start && mca_rcache_vma_compare_reg_lists(vma, next)) { vma->end = next->end; opal_list_remove_item(&vma_rcache->vma_list, &next->super); ompi_rb_tree_delete(&vma_rcache->rb_tree, next); opal_list_append(&vma_rcache->vma_delete_list, &next->super); merged = 1; } } while(merged); vma = (mca_rcache_vma_t*)opal_list_get_next(vma); } } return 0; }
/* * Query the registry for all nodes allocated to a specified app_context */ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr_t *total_num_slots, orte_app_context_t *app, orte_mapping_policy_t policy, bool initial_map, bool silent) { opal_list_item_t *item, *next; orte_node_t *node, *nd, *nptr; orte_std_cntr_t num_slots; orte_std_cntr_t i; int rc; orte_job_t *daemons; bool novm; opal_list_t nodes; char *hosts; /** set default answer */ *total_num_slots = 0; /* get the daemon job object */ daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); /* see if we have a vm or not */ novm = orte_get_attribute(&daemons->attributes, ORTE_JOB_NO_VM, NULL, OPAL_BOOL); /* if this is NOT a managed allocation, then we use the nodes * that were specified for this app - there is no need to collect * all available nodes and "filter" them */ if (!orte_managed_allocation) { OBJ_CONSTRUCT(&nodes, opal_list_t); /* if the app provided a dash-host, and we are not treating * them as requested or "soft" locations, then use those nodes */ hosts = NULL; if (!orte_soft_locations && orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using dash_host %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts)); if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, false))) { ORTE_ERROR_LOG(rc); free(hosts); return rc; } free(hosts); } else if (orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)) { /* otherwise, if the app provided a hostfile, then use that */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts)); if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, hosts))) { free(hosts); ORTE_ERROR_LOG(rc); return rc; } free(hosts); } else if (NULL != orte_rankfile) { /* use the rankfile, if provided */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using rankfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_rankfile)); if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, orte_rankfile))) { ORTE_ERROR_LOG(rc); return rc; } if (0 == opal_list_get_size(&nodes)) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s nothing found in given rankfile", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); OBJ_DESTRUCT(&nodes); return ORTE_ERR_BAD_PARAM; } } else if (NULL != orte_default_hostfile) { /* fall back to the default hostfile, if provided */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using default hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_default_hostfile)); if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, orte_default_hostfile))) { ORTE_ERROR_LOG(rc); return rc; } /* this is a special case - we always install a default * hostfile, but it is empty. If the user didn't remove it * or put something into it, then we will have pursued that * option and found nothing. This isn't an error, we just need * to add all the known nodes */ if (0 == opal_list_get_size(&nodes)) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s nothing in default hostfile - using known nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto addknown; } } else { /* if nothing else was available, then use all known nodes, which * will include ourselves */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using known nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto addknown; } /** if we still don't have anything */ if (0 == opal_list_get_size(&nodes)) { if (!silent) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); } OBJ_DESTRUCT(&nodes); return ORTE_ERR_SILENT; } /* find the nodes in our node array and assemble them * in daemon order if the vm was launched */ while (NULL != (item = opal_list_remove_first(&nodes))) { nptr = (orte_node_t*)item; nd = NULL; for (i=0; i < orte_node_pool->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { continue; } if (0 != strcmp(node->name, nptr->name)) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s DOESNT MATCH NODE %s", node->name, nptr->name)); continue; } /* ignore nodes that are marked as do-not-use for this mapping */ if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED NO_USE", node->name)); /* reset the state so it can be used another time */ node->state = ORTE_NODE_STATE_UP; continue; } if (ORTE_NODE_STATE_DOWN == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS DOWN", node->name)); continue; } if (ORTE_NODE_STATE_NOT_INCLUDED == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED NO_INCLUDE", node->name)); /* not to be used */ continue; } /* if this node wasn't included in the vm (e.g., by -host), ignore it, * unless we are mapping prior to launching the vm */ if (NULL == node->daemon && !novm) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s HAS NO DAEMON", node->name)); continue; } /* retain a copy for our use in case the item gets * destructed along the way */ OBJ_RETAIN(node); if (initial_map) { /* if this is the first app_context we * are getting for an initial map of a job, * then mark all nodes as unmapped */ ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } if (NULL == nd || NULL == nd->daemon || NULL == node->daemon || nd->daemon->name.vpid < node->daemon->name.vpid) { /* just append to end */ opal_list_append(allocated_nodes, &node->super); nd = node; } else { /* starting from end, put this node in daemon-vpid order */ while (node->daemon->name.vpid < nd->daemon->name.vpid) { if (opal_list_get_begin(allocated_nodes) == opal_list_get_prev(&nd->super)) { /* insert at beginning */ opal_list_prepend(allocated_nodes, &node->super); goto moveon1; } nd = (orte_node_t*)opal_list_get_prev(&nd->super); } item = opal_list_get_next(&nd->super); if (item == opal_list_get_end(allocated_nodes)) { /* we are at the end - just append */ opal_list_append(allocated_nodes, &node->super); } else { nd = (orte_node_t*)item; opal_list_insert_pos(allocated_nodes, item, &node->super); } moveon1: /* reset us back to the end for the next node */ nd = (orte_node_t*)opal_list_get_last(allocated_nodes); } } OBJ_RELEASE(nptr); } OBJ_DESTRUCT(&nodes); /* now prune for usage and compute total slots */ goto complete; } addknown: /* if the hnp was allocated, include it unless flagged not to */ if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(policy) & ORTE_MAPPING_NO_USE_LOCAL)) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) { if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "HNP IS MARKED NO_USE")); /* clear this for future use, but don't include it */ node->state = ORTE_NODE_STATE_UP; } else if (ORTE_NODE_STATE_NOT_INCLUDED != node->state) { OBJ_RETAIN(node); if (initial_map) { /* if this is the first app_context we * are getting for an initial map of a job, * then mark all nodes as unmapped */ ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } opal_list_append(allocated_nodes, &node->super); } } } /* add everything in the node pool that can be used - add them * in daemon order, which may be different than the order in the * node pool. Since an empty list is passed into us, the list at * this point either has the HNP node or nothing, and the HNP * node obviously has a daemon on it (us!) */ if (0 == opal_list_get_size(allocated_nodes)) { /* the list is empty */ nd = NULL; } else { nd = (orte_node_t*)opal_list_get_last(allocated_nodes); } for (i=1; i < orte_node_pool->size; i++) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { /* ignore nodes that are marked as do-not-use for this mapping */ if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED NO_USE", node->name)); /* reset the state so it can be used another time */ node->state = ORTE_NODE_STATE_UP; continue; } if (ORTE_NODE_STATE_DOWN == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED DOWN", node->name)); continue; } if (ORTE_NODE_STATE_NOT_INCLUDED == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED NO_INCLUDE", node->name)); /* not to be used */ continue; } /* if this node wasn't included in the vm (e.g., by -host), ignore it, * unless we are mapping prior to launching the vm */ if (NULL == node->daemon && !novm) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s HAS NO DAEMON", node->name)); continue; } /* retain a copy for our use in case the item gets * destructed along the way */ OBJ_RETAIN(node); if (initial_map) { /* if this is the first app_context we * are getting for an initial map of a job, * then mark all nodes as unmapped */ ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } if (NULL == nd || NULL == nd->daemon || NULL == node->daemon || nd->daemon->name.vpid < node->daemon->name.vpid) { /* just append to end */ opal_list_append(allocated_nodes, &node->super); nd = node; } else { /* starting from end, put this node in daemon-vpid order */ while (node->daemon->name.vpid < nd->daemon->name.vpid) { if (opal_list_get_begin(allocated_nodes) == opal_list_get_prev(&nd->super)) { /* insert at beginning */ opal_list_prepend(allocated_nodes, &node->super); goto moveon; } nd = (orte_node_t*)opal_list_get_prev(&nd->super); } item = opal_list_get_next(&nd->super); if (item == opal_list_get_end(allocated_nodes)) { /* we are at the end - just append */ opal_list_append(allocated_nodes, &node->super); } else { nd = (orte_node_t*)item; opal_list_insert_pos(allocated_nodes, item, &node->super); } moveon: /* reset us back to the end for the next node */ nd = (orte_node_t*)opal_list_get_last(allocated_nodes); } } } OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Starting with %d nodes in list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)opal_list_get_size(allocated_nodes))); /** check that anything is here */ if (0 == opal_list_get_size(allocated_nodes)) { if (!silent) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); } return ORTE_ERR_SILENT; } /* filter the nodes thru any hostfile and dash-host options */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Filtering thru apps", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); if (ORTE_SUCCESS != (rc = orte_rmaps_base_filter_nodes(app, allocated_nodes, true)) && ORTE_ERR_TAKE_NEXT_OPTION != rc) { ORTE_ERROR_LOG(rc); return rc; } OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Retained %d nodes in list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)opal_list_get_size(allocated_nodes))); complete: /* remove all nodes that are already at max usage, and * compute the total number of allocated slots while * we do so */ num_slots = 0; item = opal_list_get_first(allocated_nodes); while (item != opal_list_get_end(allocated_nodes)) { /** save the next pointer in case we remove this node */ next = opal_list_get_next(item); /** check to see if this node is fully used - remove if so */ node = (orte_node_t*)item; if (0 != node->slots_max && node->slots_inuse > node->slots_max) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Removing node %s: max %d inuse %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots_max, node->slots_inuse)); opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ } else if (node->slots <= node->slots_inuse && (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { /* remove the node as fully used */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Removing node %s slots %d inuse %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots, node->slots_inuse)); opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ } else if (node->slots > node->slots_inuse) { /* add the available slots */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s node %s has %d slots available", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots - node->slots_inuse)); num_slots += node->slots - node->slots_inuse; } else if (!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { /* nothing needed to do here - we don't add slots to the * count as we don't have any available. Just let the mapper * do what it needs to do to meet the request */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s node %s is fully used, but available for oversubscrition", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name)); } else { /* if we cannot use it, remove it from list */ opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ } /** go on to next item */ item = next; } /* Sanity check to make sure we have resources available */ if (0 == opal_list_get_size(allocated_nodes)) { if (silent) { /* let the caller know that the resources exist, * but are currently busy */ return ORTE_ERR_RESOURCE_BUSY; } else { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:all-available-resources-used", true); return ORTE_ERR_SILENT; } } /* pass back the total number of available slots */ *total_num_slots = num_slots; if (4 < opal_output_get_verbosity(orte_rmaps_base_framework.framework_output)) { opal_output(0, "AVAILABLE NODES FOR MAPPING:"); for (item = opal_list_get_first(allocated_nodes); item != opal_list_get_end(allocated_nodes); item = opal_list_get_next(item)) { node = (orte_node_t*)item; opal_output(0, " node: %s daemon: %s", node->name, (NULL == node->daemon) ? "NULL" : ORTE_VPID_PRINT(node->daemon->name.vpid)); } } return ORTE_SUCCESS; }
/* * Lookup a peer by name, create one if it doesn't exist. * @param name Peers globally unique identifier. * @retval Pointer to the newly created struture or NULL on error. */ mca_oob_tcp_peer_t * mca_oob_tcp_peer_lookup(const orte_process_name_t* name) { int rc; mca_oob_tcp_peer_t * peer, *old; if (NULL == name) { /* can't look this one up */ return NULL; } OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock); peer = (mca_oob_tcp_peer_t*)orte_hash_table_get_proc( &mca_oob_tcp_component.tcp_peers, name); if(NULL != peer && memcmp(&peer->peer_name,name,sizeof(peer->peer_name)) == 0) { OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); return peer; } /* allocate from free list */ MCA_OOB_TCP_PEER_ALLOC(peer, rc); if(NULL == peer) { OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); return NULL; } /* initialize peer state */ peer->peer_name = *name; peer->peer_addr = NULL; peer->peer_sd = -1; peer->peer_state = MCA_OOB_TCP_CLOSED; peer->peer_recv_msg = NULL; peer->peer_send_msg = NULL; peer->peer_retries = 0; /* add to lookup table */ if(ORTE_SUCCESS != orte_hash_table_set_proc(&mca_oob_tcp_component.tcp_peers, &peer->peer_name, peer)) { MCA_OOB_TCP_PEER_RETURN(peer); OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); return NULL; } /* if the peer list is over the maximum size, remove one unsed peer */ opal_list_prepend(&mca_oob_tcp_component.tcp_peer_list, (opal_list_item_t *) peer); if(mca_oob_tcp_component.tcp_peer_limit > 0 && (int)opal_list_get_size(&mca_oob_tcp_component.tcp_peer_list) > mca_oob_tcp_component.tcp_peer_limit) { old = (mca_oob_tcp_peer_t *) opal_list_get_last(&mca_oob_tcp_component.tcp_peer_list); while(1) { if(0 == opal_list_get_size(&(old->peer_send_queue)) && NULL == peer->peer_recv_msg) { opal_list_remove_item(&mca_oob_tcp_component.tcp_peer_list, (opal_list_item_t *) old); MCA_OOB_TCP_PEER_RETURN(old); break; } else { old = (mca_oob_tcp_peer_t *) opal_list_get_prev(old); if(opal_list_get_begin(&mca_oob_tcp_component.tcp_peer_list) == (opal_list_item_t*)old) { /* we tried, but we couldn't find one that was valid to get rid * of. Oh well. */ break; } } } } OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); return peer; }