int orte_util_add_hostfile_nodes(opal_list_t *nodes, char *hostfile) { opal_list_t exclude; opal_list_item_t *item, *itm; int rc; OPAL_OUTPUT_VERBOSE((1, orte_debug_output, "%s hostfile: checking hostfile %s for nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile)); OBJ_CONSTRUCT(&exclude, opal_list_t); /* parse the hostfile and add the contents to the list */ if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, nodes, &exclude, false))) { goto cleanup; } /* parse the nodes to check for any relative node directives */ for (item = opal_list_get_first(nodes); item != opal_list_get_end(nodes); item = opal_list_get_next(item)) { orte_node_t *node=(orte_node_t*)item; if ('+' == node->name[0]) { orte_show_help("help-hostfile.txt", "hostfile:relative-syntax", true, node->name); rc = ORTE_ERR_SILENT; goto cleanup; } } /* remove from the list of nodes those that are in the exclude list */ while(NULL != (item = opal_list_remove_first(&exclude))) { orte_node_t *exnode = (orte_node_t*)item; /* check for matches on nodes */ for (itm = opal_list_get_first(nodes); itm != opal_list_get_end(nodes); itm = opal_list_get_next(itm)) { orte_node_t *node=(orte_node_t*)itm; if (0 == strcmp(exnode->name, node->name)) { /* match - remove it */ opal_list_remove_item(nodes, itm); OBJ_RELEASE(itm); break; } } OBJ_RELEASE(item); } cleanup: OBJ_DESTRUCT(&exclude); return rc; }
int orte_util_get_ordered_host_list(opal_list_t *nodes, char *hostfile) { opal_list_t exclude; opal_list_item_t *item, *itm, *item2, *item1; char *cptr; int num_empty, i, nodeidx, startempty=0; bool want_all_empty=false; orte_node_t *node_from_pool, *newnode; int rc; OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, "%s hostfile: creating ordered list of hosts from hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile)); OBJ_CONSTRUCT(&exclude, opal_list_t); /* parse the hostfile and add the contents to the list, keeping duplicates */ if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, nodes, &exclude, true))) { goto cleanup; } /* parse the nodes to process any relative node directives */ item2 = opal_list_get_first(nodes); while (item2 != opal_list_get_end(nodes)) { orte_node_t *node=(orte_node_t*)item2; /* save the next location in case this one gets removed */ item1 = opal_list_get_next(item2); if ('+' != node->name[0]) { item2 = item1; continue; } /* see if we specified empty nodes */ if ('e' == node->name[1] || 'E' == node->name[1]) { /* request for empty nodes - do they want * all of them? */ if (NULL != (cptr = strchr(node->name, ':'))) { /* the colon indicates a specific # are requested */ cptr++; /* step past : */ num_empty = strtol(cptr, NULL, 10); } else { /* want them all - set num_empty to max */ num_empty = INT_MAX; want_all_empty = true; } /* insert empty nodes into newnodes list in place of the current item. * since item1 is the next item, we insert in front of it */ if (!orte_hnp_is_allocated && 0 == startempty) { startempty = 1; } for (i=startempty; 0 < num_empty && i < orte_node_pool->size; i++) { if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { continue; } if (0 == node_from_pool->slots_inuse) { newnode = OBJ_NEW(orte_node_t); newnode->name = strdup(node_from_pool->name); /* if the slot count here is less than the * total slots avail on this node, set it * to the specified count - this allows people * to subdivide an allocation */ if (node->slots < node_from_pool->slots) { newnode->slots = node->slots; } else { newnode->slots = node_from_pool->slots; } opal_list_insert_pos(nodes, item1, &newnode->super); /* track number added */ --num_empty; } } /* bookmark where we stopped in case they ask for more */ startempty = i; /* did they get everything they wanted? */ if (!want_all_empty && 0 < num_empty) { orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty", true, num_empty); rc = ORTE_ERR_SILENT; goto cleanup; } /* since we have expanded the provided node, remove * it from list */ opal_list_remove_item(nodes, item2); OBJ_RELEASE(item2); } else if ('n' == node->name[1] || 'N' == node->name[1]) { /* they want a specific relative node #, so * look it up on global pool */ nodeidx = strtol(&node->name[2], NULL, 10); /* if the HNP is not allocated, then we need to * adjust the index as the node pool is offset * by one */ if (!orte_hnp_is_allocated) { nodeidx++; } /* see if that location is filled */ if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeidx))) { /* this is an error */ orte_show_help("help-hostfile.txt", "hostfile:relative-node-not-found", true, nodeidx, node->name); rc = ORTE_ERR_SILENT; goto cleanup; } /* create the node object */ newnode = OBJ_NEW(orte_node_t); newnode->name = strdup(node_from_pool->name); /* if the slot count here is less than the * total slots avail on this node, set it * to the specified count - this allows people * to subdivide an allocation */ if (node->slots < node_from_pool->slots) { newnode->slots = node->slots; } else { newnode->slots = node_from_pool->slots; } /* insert it before item1 */ opal_list_insert_pos(nodes, item1, &newnode->super); /* since we have expanded the provided node, remove * it from list */ opal_list_remove_item(nodes, item2); OBJ_RELEASE(item2); } else { /* invalid relative node syntax */ orte_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax", true, node->name); rc = ORTE_ERR_SILENT; goto cleanup; } /* move to next */ item2 = item1; } /* remove from the list of nodes those that are in the exclude list */ while(NULL != (item = opal_list_remove_first(&exclude))) { orte_node_t *exnode = (orte_node_t*)item; /* check for matches on nodes */ for (itm = opal_list_get_first(nodes); itm != opal_list_get_end(nodes); itm = opal_list_get_next(itm)) { orte_node_t *node=(orte_node_t*)itm; if (0 == strcmp(exnode->name, node->name)) { /* match - remove it */ opal_list_remove_item(nodes, itm); OBJ_RELEASE(itm); /* have to cycle through the entire list as we could * have duplicates */ } } OBJ_RELEASE(item); } cleanup: OBJ_DESTRUCT(&exclude); return rc; }
/* Parse the provided hostfile and filter the nodes that are * on the input list, removing those that * are not found in the hostfile */ int orte_util_filter_hostfile_nodes(opal_list_t *nodes, char *hostfile, bool remove) { opal_list_t newnodes, exclude; opal_list_item_t *item1, *item2, *next, *item3; orte_node_t *node_from_list, *node_from_file, *node_from_pool, *node3; int rc = ORTE_SUCCESS; char *cptr; int num_empty, nodeidx; bool want_all_empty = false; opal_list_t keep; bool found; OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, "%s hostfile: filtering nodes through hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile)); /* parse the hostfile and create local list of findings */ OBJ_CONSTRUCT(&newnodes, opal_list_t); OBJ_CONSTRUCT(&exclude, opal_list_t); if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, &newnodes, &exclude, false))) { OBJ_DESTRUCT(&newnodes); OBJ_DESTRUCT(&exclude); return rc; } /* if the hostfile was empty, then treat it as a no-op filter */ if (0 == opal_list_get_size(&newnodes)) { OBJ_DESTRUCT(&newnodes); OBJ_DESTRUCT(&exclude); /* indicate that the hostfile was empty */ return ORTE_ERR_TAKE_NEXT_OPTION; } /* remove from the list of newnodes those that are in the exclude list * since we could have added duplicate names above due to the */ while (NULL != (item1 = opal_list_remove_first(&exclude))) { node_from_file = (orte_node_t*)item1; /* check for matches on nodes */ for (item2 = opal_list_get_first(&newnodes); item2 != opal_list_get_end(&newnodes); item2 = opal_list_get_next(item2)) { orte_node_t *node = (orte_node_t*)item2; if (0 == strcmp(node_from_file->name, node->name)) { /* match - remove it */ opal_output(0, "HOST %s ON EXCLUDE LIST - REMOVING", node->name); opal_list_remove_item(&newnodes, item2); OBJ_RELEASE(item2); break; } } OBJ_RELEASE(item1); } /* now check our nodes and keep or mark those that match. We can * destruct our hostfile list as we go since this won't be needed */ OBJ_CONSTRUCT(&keep, opal_list_t); while (NULL != (item2 = opal_list_remove_first(&newnodes))) { node_from_file = (orte_node_t*)item2; next = opal_list_get_next(item2); /* see if this is a relative node syntax */ if ('+' == node_from_file->name[0]) { /* see if we specified empty nodes */ if ('e' == node_from_file->name[1] || 'E' == node_from_file->name[1]) { /* request for empty nodes - do they want * all of them? */ if (NULL != (cptr = strchr(node_from_file->name, ':'))) { /* the colon indicates a specific # are requested */ cptr++; /* step past : */ num_empty = strtol(cptr, NULL, 10); } else { /* want them all - set num_empty to max */ num_empty = INT_MAX; want_all_empty = true; } /* search the list of nodes provided to us and find those * that are empty */ item1 = opal_list_get_first(nodes); while (0 < num_empty && item1 != opal_list_get_end(nodes)) { node_from_list = (orte_node_t*)item1; next = opal_list_get_next(item1); /* keep our place */ if (0 == node_from_list->slots_inuse) { /* check to see if this node is explicitly called * out later - if so, don't use it here */ for (item3 = opal_list_get_first(&newnodes); item3 != opal_list_get_end(&newnodes); item3 = opal_list_get_next(item3)) { node3 = (orte_node_t*)item3; if (0 == strcmp(node3->name, node_from_list->name)) { /* match - don't use it */ goto skipnode; } } if (remove) { /* remove item from list */ opal_list_remove_item(nodes, item1); /* xfer to keep list */ opal_list_append(&keep, item1); } else { /* mark as included */ node_from_list->mapped = true; } --num_empty; } skipnode: item1 = next; } /* did they get everything they wanted? */ if (!want_all_empty && 0 < num_empty) { orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty", true, num_empty); rc = ORTE_ERR_SILENT; goto cleanup; } } else if ('n' == node_from_file->name[1] || 'N' == node_from_file->name[1]) { /* they want a specific relative node #, so * look it up on global pool */ nodeidx = strtol(&node_from_file->name[2], NULL, 10); if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeidx))) { /* this is an error */ orte_show_help("help-hostfile.txt", "hostfile:relative-node-not-found", true, nodeidx, node_from_file->name); rc = ORTE_ERR_SILENT; goto cleanup; } /* search the list of nodes provided to us and find it */ for (item1 = opal_list_get_first(nodes); item1 != opal_list_get_end(nodes); item1 = opal_list_get_next(nodes)) { node_from_list = (orte_node_t*)item1; if (0 == strcmp(node_from_list->name, node_from_pool->name)) { if (remove) { /* match - remove item from list */ opal_list_remove_item(nodes, item1); /* xfer to keep list */ opal_list_append(&keep, item1); } else { /* mark as included */ node_from_list->mapped = true; } break; } } } else { /* invalid relative node syntax */ orte_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax", true, node_from_file->name); rc = ORTE_ERR_SILENT; goto cleanup; } } else { /* we are looking for a specific node on the list * search the provided list of nodes to see if this * one is found */ found = false; for (item1 = opal_list_get_first(nodes); item1 != opal_list_get_end(nodes); item1 = opal_list_get_next(item1)) { node_from_list = (orte_node_t*)item1; /* since the name in the hostfile might not match * our local name, and yet still be intended to match, * we have to check for local interfaces */ if (0 == strcmp(node_from_file->name, node_from_list->name) || (0 == strcmp(node_from_file->name, "localhost") && 0 == strcmp(node_from_list->name, orte_process_info.nodename)) || (opal_ifislocal(node_from_list->name) && opal_ifislocal(node_from_file->name))) { /* if the slot count here is less than the * total slots avail on this node, set it * to the specified count - this allows people * to subdivide an allocation */ if (node_from_file->slots < node_from_list->slots) { node_from_list->slots = node_from_file->slots; } if (remove) { /* remove the node from the list */ opal_list_remove_item(nodes, item1); /* xfer it to keep list */ opal_list_append(&keep, item1); } else { /* mark as included */ node_from_list->mapped = true; } found = true; break; } } /* if the host in the newnode list wasn't found, * then that is an error we need to report to the * user and abort */ if (!found) { orte_show_help("help-hostfile.txt", "hostfile:extra-node-not-found", true, hostfile, node_from_file->name); rc = ORTE_ERR_SILENT; goto cleanup; } } /* cleanup the newnode list */ OBJ_RELEASE(item2); } /* if we still have entries on our hostfile list, then * there were requested hosts that were not in our allocation. * This is an error - report it to the user and return an error */ if (0 != opal_list_get_size(&newnodes)) { orte_show_help("help-hostfile.txt", "not-all-mapped-alloc", true, hostfile); while (NULL != (item1 = opal_list_remove_first(&newnodes))) { OBJ_RELEASE(item1); } OBJ_DESTRUCT(&newnodes); return ORTE_ERR_SILENT; } if (!remove) { /* all done */ OBJ_DESTRUCT(&newnodes); return ORTE_SUCCESS; } /* clear the rest of the nodes list */ while (NULL != (item1 = opal_list_remove_first(nodes))) { OBJ_RELEASE(item1); } /* the nodes list has been cleared - rebuild it in order */ while (NULL != (item1 = opal_list_remove_first(&keep))) { opal_list_append(nodes, item1); } cleanup: OBJ_DESTRUCT(&newnodes); return rc; }
int orte_util_add_hostfile_nodes(opal_list_t *nodes, char *hostfile) { opal_list_t exclude, adds; opal_list_item_t *item, *itm; int rc; orte_node_t *nd, *node; bool found; OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, "%s hostfile: checking hostfile %s for nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile)); OBJ_CONSTRUCT(&exclude, opal_list_t); OBJ_CONSTRUCT(&adds, opal_list_t); /* parse the hostfile and add any new contents to the list */ if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, &adds, &exclude, false))) { goto cleanup; } /* check for any relative node directives */ for (item = opal_list_get_first(&adds); item != opal_list_get_end(&adds); item = opal_list_get_next(item)) { node=(orte_node_t*)item; if ('+' == node->name[0]) { orte_show_help("help-hostfile.txt", "hostfile:relative-syntax", true, node->name); rc = ORTE_ERR_SILENT; goto cleanup; } } /* remove from the list of nodes those that are in the exclude list */ while (NULL != (item = opal_list_remove_first(&exclude))) { nd = (orte_node_t*)item; /* check for matches on nodes */ for (itm = opal_list_get_first(&adds); itm != opal_list_get_end(&adds); itm = opal_list_get_next(itm)) { node = (orte_node_t*)itm; if (0 == strcmp(nd->name, node->name)) { /* match - remove it */ opal_list_remove_item(&adds, itm); OBJ_RELEASE(itm); break; } } OBJ_RELEASE(item); } /* transfer across all unique nodes */ while (NULL != (item = opal_list_remove_first(&adds))) { nd = (orte_node_t*)item; found = false; for (itm = opal_list_get_first(nodes); itm != opal_list_get_end(nodes); itm = opal_list_get_next(itm)) { node = (orte_node_t*)itm; if (0 == strcmp(nd->name, node->name)) { found = true; break; } } if (!found) { opal_list_append(nodes, &nd->super); } else { OBJ_RELEASE(item); } } cleanup: OPAL_LIST_DESTRUCT(&exclude); OPAL_LIST_DESTRUCT(&adds); return rc; }
int orte_util_add_hostfile_nodes(opal_list_t *nodes, bool *override_oversubscribed, char *hostfile) { opal_list_t exclude; opal_list_item_t *item, *itm; int rc; OPAL_OUTPUT_VERBOSE((1, orte_debug_output, "%s hostfile: checking hostfile %s for nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile)); OBJ_CONSTRUCT(&exclude, opal_list_t); /* parse the hostfile and add the contents to the list */ if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, nodes, &exclude, false))) { goto cleanup; } /* parse the nodes to check for any relative node directives */ for (item = opal_list_get_first(nodes); item != opal_list_get_end(nodes); item = opal_list_get_next(item)) { orte_node_t *node=(orte_node_t*)item; if ('+' == node->name[0]) { orte_show_help("help-hostfile.txt", "hostfile:relative-syntax", true, node->name); rc = ORTE_ERR_SILENT; goto cleanup; } } /* remove from the list of nodes those that are in the exclude list */ while(NULL != (item = opal_list_remove_first(&exclude))) { orte_node_t *exnode = (orte_node_t*)item; /* check for matches on nodes */ for (itm = opal_list_get_first(nodes); itm != opal_list_get_end(nodes); itm = opal_list_get_next(itm)) { orte_node_t *node=(orte_node_t*)itm; if (0 == strcmp(exnode->name, node->name)) { /* match - remove it */ opal_list_remove_item(nodes, itm); OBJ_RELEASE(itm); break; } } OBJ_RELEASE(item); } /* indicate that ORTE should override any oversubscribed conditions * based on local hardware limits since the user (a) might not have * provided us any info on the #slots for a node, and (b) the user * might have been wrong! If we don't check the number of local physical * processors, then we could be too aggressive on our sched_yield setting * and cause performance problems. */ *override_oversubscribed = true; cleanup: OBJ_DESTRUCT(&exclude); return rc; }