static inline int mca_rcache_vma_add_reg(mca_rcache_vma_t *vma, mca_mpool_base_registration_t *reg) { opal_list_item_t *i; mca_rcache_vma_reg_list_item_t *item, *entry; entry = OBJ_NEW(mca_rcache_vma_reg_list_item_t); if(!entry) return -1; entry->reg = reg; for(i = opal_list_get_first(&vma->reg_list); i != opal_list_get_end(&vma->reg_list); i = opal_list_get_next(i)) { item = (mca_rcache_vma_reg_list_item_t*)i; if(mca_rcache_vma_compare_regs(item->reg, reg) > 0) continue; opal_list_insert_pos(&vma->reg_list, &item->super, &entry->super); return 0; } opal_list_append(&vma->reg_list, &entry->super); return 0; }
static int ompi_comm_register_cid (uint32_t cid ) { opal_list_item_t *item; ompi_comm_reg_t *regcom; ompi_comm_reg_t *newentry = OBJ_NEW(ompi_comm_reg_t); newentry->cid = cid; if ( !(opal_list_is_empty (&ompi_registered_comms)) ) { for (item = opal_list_get_first(&ompi_registered_comms); item != opal_list_get_end(&ompi_registered_comms); item = opal_list_get_next(item)) { regcom = (ompi_comm_reg_t *)item; if ( regcom->cid > cid ) { break; } #if OMPI_ENABLE_THREAD_MULTIPLE if( regcom->cid == cid ) { /** * The MPI standard state that is the user responsability to * schedule the global communications in order to avoid any * kind of troubles. As, managing communicators involve several * collective communications, we should enforce a sequential * execution order. This test only allow one communicator * creation function based on the same communicator. */ OBJ_RELEASE(newentry); return OMPI_ERROR; } #endif /* OMPI_ENABLE_THREAD_MULTIPLE */ } opal_list_insert_pos (&ompi_registered_comms, item, (opal_list_item_t *)newentry); } else { opal_list_append (&ompi_registered_comms, (opal_list_item_t *)newentry); } return OMPI_SUCCESS; }
static int orte_ras_alps_read_appinfo_file(opal_list_t *nodes, char *filename, unsigned int *uMe) { int iq; int ix; int iFd; /* file descriptor for appinfo */ int iTrips; /* counter appinfo read attempts */ int max_appinfo_read_attempts; struct stat ssBuf; /* stat buffer */ size_t szLen; /* size of appinfo (file) */ off_t oNow; /* current appinfo data offset */ off_t oInfo=sizeof(appInfoHdr_t); off_t oDet=sizeof(appInfo_t); off_t oSlots; off_t oEntry; int32_t sNodes=0; char *cpBuf; char *hostname; orte_node_t *node = NULL, *n2; appInfoHdr_t *apHdr; /* ALPS header structure */ appInfo_t *apInfo; /* ALPS table info structure */ #if ALPS_APPINFO_VERSION==0 placeList_t *apSlots; /* ALPS node specific info */ #else placeNodeList_t *apNodes; #endif bool added; opal_list_item_t *item; orte_ras_alps_get_appinfo_attempts(&max_appinfo_read_attempts); oNow=0; iTrips=0; opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:allocate: begin processing appinfo file"); while(!oNow) { /* Until appinfo read is complete */ iTrips++; /* Increment trip count */ iFd=open( filename, O_RDONLY ); if( iFd==-1 ) { /* If file absent, ALPS is down */ opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:allocate: ALPS information open failure"); usleep(iTrips*50000); /* Increasing delays, .05 s/try */ /* Fail only when number of attempts have been exhausted. */ if( iTrips <= max_appinfo_read_attempts ) continue; ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE); return ORTE_ERR_FILE_OPEN_FAILURE; } if( fstat( iFd, &ssBuf )==-1 ) { /* If stat fails, access denied */ ORTE_ERROR_LOG(ORTE_ERR_NOT_AVAILABLE); return ORTE_ERR_NOT_AVAILABLE; } szLen=ssBuf.st_size; /* Get buffer size */ cpBuf=malloc(szLen+1); /* Allocate buffer */ if (NULL == cpBuf) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } /* Repeated attempts to read appinfo, with an increasing delay between * * successive attempts to allow scheduler I/O a chance to complete. */ if( (oNow=read( iFd, cpBuf, szLen ))!=(off_t)szLen ) { /* This is where apstat fails; we will record it and try again. */ opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:allocate: ALPS information read failure: %ld bytes", (long int)oNow); free(cpBuf); /* Free (old) buffer */ close(iFd); /* Close (old) descriptor */ oNow=0; /* Reset byte count */ usleep(iTrips*50000); /* Increasing delays, .05 s/try */ /* Fail only when number of attempts have been exhausted. */ if( iTrips<=max_appinfo_read_attempts ) continue; ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); return ORTE_ERR_FILE_READ_FAILURE; } } close(iFd); opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:allocate: file %s read", filename); /* Now that we have the scheduler information, we just have to parse it for * * the data that we seek. */ oNow=0; apHdr=(appInfoHdr_t *)cpBuf; opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:allocate: %d entries in file", apHdr->apNum); /* Header info (apHdr) tells us how many entries are in the file: * * * * apHdr->apNum */ for( iq=0; iq<apHdr->apNum; iq++ ) { /* Parse all entries in file */ /* Just at this level, a lot of information is available: * * * * apInfo->apid ... ALPS job ID * * apInfo->resId ... ALPS reservation ID * * apInfo->numCmds ... Number of executables * * apInfo->numPlaces ... Number of PEs */ apInfo=(appInfo_t *)(cpBuf+oNow+oInfo); /* Calculate the dependent offsets. */ oSlots=sizeof(cmdDetail_t)*apInfo->numCmds; opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:allocate: read data for resId %u - myId %u", apInfo->resId, *uMe); #if ALPS_APPINFO_VERSION==0 /* Finally, we get to the actual node-specific information: * * * * apSlots[ix].cmdIx ... index of apDet[].cmd * * apSlots[ix].nid ... NodeID (NID) * * apSlots[ix].procMask ... mask for processors... need 16-bit shift */ apSlots=(placeList_t *)(cpBuf+oNow+oInfo+oDet+oSlots); oEntry=sizeof(placeList_t)*apInfo->numPlaces; oNow+=(oDet+oSlots+oEntry); /* Target next slot */ if( apInfo->resId != *uMe ) continue; /* Filter to our reservation Id */ /* in this early version of alps, there is one entry for each PE in the * allocation - so cycle across the numPlaces entries, assigning a slot * for each time a node is named */ for( ix=0; ix<apInfo->numPlaces; ix++ ) { opal_output_verbose(5, orte_ras_base.ras_output, "ras:alps:read_appinfo: got NID %d", apSlots[ix].nid); asprintf( &hostname, "%d", apSlots[ix].nid ); if (NULL == hostname) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } /* If this matches the prior nodename, just add to the slot count. */ if( NULL!=node && !strcmp(node->name, hostname) ) { free(hostname); /* free hostname since not needed */ ++node->slots; } else { /* must be new, so add to list */ opal_output_verbose(1, orte_ras_base.ras_output, "ras:alps:read_appinfo: added NID %d to list", apSlots[ix].nid); node = OBJ_NEW(orte_node_t); node->name = hostname; node->launch_id = apSlots[ix].nid; node->slots_inuse = 0; node->slots_max = 0; node->slots = 1; /* need to order these node ids so the regex generator * can properly function */ added = false; for (item = opal_list_get_first(nodes); item != opal_list_get_end(nodes); item = opal_list_get_next(item)) { n2 = (orte_node_t*)item; if (node->launch_id < n2->launch_id) { /* insert the new node before this one */ opal_list_insert_pos(nodes, item, &node->super); added = true; break; } } if (!added) { /* add it to the end */ opal_list_append(nodes, &node->super); } sNodes++; /* Increment the node count */ } } #else /* in newer versions of alps, there is one entry for each node in the * allocation, and that struct directly carries the number of PEs * allocated on that node to this job. */ apNodes=(placeNodeList_t *)(cpBuf+oNow+oInfo+oDet+oSlots); oEntry=sizeof(placeNodeList_t)*apInfo->numPlaces; oNow+=(oDet+oSlots+oEntry); /* Target next entry */ if( apInfo->resId != *uMe ) continue; /* Filter to our reservation Id */ for( ix=0; ix<apInfo->numPlaces; ix++ ) { opal_output_verbose(5, orte_ras_base.ras_output, "ras:alps:read_appinfo(modern): processing NID %d with %d slots", apNodes[ix].nid, apNodes[ix].numPEs); asprintf( &hostname, "%d", apNodes[ix].nid ); if (NULL == hostname) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } node = OBJ_NEW(orte_node_t); node->name = hostname; node->launch_id = apNodes[ix].nid; node->slots_inuse = 0; node->slots_max = 0; node->slots = apNodes[ix].numPEs; /* need to order these node ids so the regex generator * can properly function */ added = false; for (item = opal_list_get_first(nodes); item != opal_list_get_end(nodes); item = opal_list_get_next(item)) { n2 = (orte_node_t*)item; if (node->launch_id < n2->launch_id) { /* insert the new node before this one */ opal_list_insert_pos(nodes, item, &node->super); added = true; break; } } if (!added) { /* add it to the end */ opal_list_append(nodes, &node->super); } sNodes++; /* Increment the node count */ } #endif break; /* Extended details ignored */ } free(cpBuf); /* Free the buffer */ return ORTE_SUCCESS; }
int orte_util_get_ordered_host_list(opal_list_t *nodes, char *hostfile) { opal_list_t exclude; opal_list_item_t *item, *itm, *item2, *item1; char *cptr; int num_empty, i, nodeidx, startempty=0; bool want_all_empty=false; orte_node_t *node_from_pool, *newnode; int rc; OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, "%s hostfile: creating ordered list of hosts from hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile)); OBJ_CONSTRUCT(&exclude, opal_list_t); /* parse the hostfile and add the contents to the list, keeping duplicates */ if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, nodes, &exclude, true))) { goto cleanup; } /* parse the nodes to process any relative node directives */ item2 = opal_list_get_first(nodes); while (item2 != opal_list_get_end(nodes)) { orte_node_t *node=(orte_node_t*)item2; /* save the next location in case this one gets removed */ item1 = opal_list_get_next(item2); if ('+' != node->name[0]) { item2 = item1; continue; } /* see if we specified empty nodes */ if ('e' == node->name[1] || 'E' == node->name[1]) { /* request for empty nodes - do they want * all of them? */ if (NULL != (cptr = strchr(node->name, ':'))) { /* the colon indicates a specific # are requested */ cptr++; /* step past : */ num_empty = strtol(cptr, NULL, 10); } else { /* want them all - set num_empty to max */ num_empty = INT_MAX; want_all_empty = true; } /* insert empty nodes into newnodes list in place of the current item. * since item1 is the next item, we insert in front of it */ if (!orte_hnp_is_allocated && 0 == startempty) { startempty = 1; } for (i=startempty; 0 < num_empty && i < orte_node_pool->size; i++) { if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { continue; } if (0 == node_from_pool->slots_inuse) { newnode = OBJ_NEW(orte_node_t); newnode->name = strdup(node_from_pool->name); /* if the slot count here is less than the * total slots avail on this node, set it * to the specified count - this allows people * to subdivide an allocation */ if (node->slots < node_from_pool->slots) { newnode->slots = node->slots; } else { newnode->slots = node_from_pool->slots; } opal_list_insert_pos(nodes, item1, &newnode->super); /* track number added */ --num_empty; } } /* bookmark where we stopped in case they ask for more */ startempty = i; /* did they get everything they wanted? */ if (!want_all_empty && 0 < num_empty) { orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty", true, num_empty); rc = ORTE_ERR_SILENT; goto cleanup; } /* since we have expanded the provided node, remove * it from list */ opal_list_remove_item(nodes, item2); OBJ_RELEASE(item2); } else if ('n' == node->name[1] || 'N' == node->name[1]) { /* they want a specific relative node #, so * look it up on global pool */ nodeidx = strtol(&node->name[2], NULL, 10); /* if the HNP is not allocated, then we need to * adjust the index as the node pool is offset * by one */ if (!orte_hnp_is_allocated) { nodeidx++; } /* see if that location is filled */ if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeidx))) { /* this is an error */ orte_show_help("help-hostfile.txt", "hostfile:relative-node-not-found", true, nodeidx, node->name); rc = ORTE_ERR_SILENT; goto cleanup; } /* create the node object */ newnode = OBJ_NEW(orte_node_t); newnode->name = strdup(node_from_pool->name); /* if the slot count here is less than the * total slots avail on this node, set it * to the specified count - this allows people * to subdivide an allocation */ if (node->slots < node_from_pool->slots) { newnode->slots = node->slots; } else { newnode->slots = node_from_pool->slots; } /* insert it before item1 */ opal_list_insert_pos(nodes, item1, &newnode->super); /* since we have expanded the provided node, remove * it from list */ opal_list_remove_item(nodes, item2); OBJ_RELEASE(item2); } else { /* invalid relative node syntax */ orte_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax", true, node->name); rc = ORTE_ERR_SILENT; goto cleanup; } /* move to next */ item2 = item1; } /* remove from the list of nodes those that are in the exclude list */ while(NULL != (item = opal_list_remove_first(&exclude))) { orte_node_t *exnode = (orte_node_t*)item; /* check for matches on nodes */ for (itm = opal_list_get_first(nodes); itm != opal_list_get_end(nodes); itm = opal_list_get_next(itm)) { orte_node_t *node=(orte_node_t*)itm; if (0 == strcmp(exnode->name, node->name)) { /* match - remove it */ opal_list_remove_item(nodes, itm); OBJ_RELEASE(itm); /* have to cycle through the entire list as we could * have duplicates */ } } OBJ_RELEASE(item); } cleanup: OBJ_DESTRUCT(&exclude); return rc; }
/* * Query the registry for all nodes allocated to a specified app_context */ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr_t *total_num_slots, orte_app_context_t *app, orte_mapping_policy_t policy, bool initial_map, bool silent) { opal_list_item_t *item, *next; orte_node_t *node, *nd, *nptr; orte_std_cntr_t num_slots; orte_std_cntr_t i; int rc; orte_job_t *daemons; bool novm; opal_list_t nodes; char *hosts; /** set default answer */ *total_num_slots = 0; /* get the daemon job object */ daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); /* see if we have a vm or not */ novm = orte_get_attribute(&daemons->attributes, ORTE_JOB_NO_VM, NULL, OPAL_BOOL); /* if this is NOT a managed allocation, then we use the nodes * that were specified for this app - there is no need to collect * all available nodes and "filter" them */ if (!orte_managed_allocation) { OBJ_CONSTRUCT(&nodes, opal_list_t); /* if the app provided a dash-host, and we are not treating * them as requested or "soft" locations, then use those nodes */ hosts = NULL; if (!orte_soft_locations && orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using dash_host %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts)); if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, false))) { ORTE_ERROR_LOG(rc); free(hosts); return rc; } free(hosts); } else if (orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)) { /* otherwise, if the app provided a hostfile, then use that */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts)); if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, hosts))) { free(hosts); ORTE_ERROR_LOG(rc); return rc; } free(hosts); } else if (NULL != orte_rankfile) { /* use the rankfile, if provided */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using rankfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_rankfile)); if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, orte_rankfile))) { ORTE_ERROR_LOG(rc); return rc; } if (0 == opal_list_get_size(&nodes)) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s nothing found in given rankfile", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); OBJ_DESTRUCT(&nodes); return ORTE_ERR_BAD_PARAM; } } else if (NULL != orte_default_hostfile) { /* fall back to the default hostfile, if provided */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using default hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_default_hostfile)); if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, orte_default_hostfile))) { ORTE_ERROR_LOG(rc); return rc; } /* this is a special case - we always install a default * hostfile, but it is empty. If the user didn't remove it * or put something into it, then we will have pursued that * option and found nothing. This isn't an error, we just need * to add all the known nodes */ if (0 == opal_list_get_size(&nodes)) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s nothing in default hostfile - using known nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto addknown; } } else { /* if nothing else was available, then use all known nodes, which * will include ourselves */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s using known nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); goto addknown; } /** if we still don't have anything */ if (0 == opal_list_get_size(&nodes)) { if (!silent) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); } OBJ_DESTRUCT(&nodes); return ORTE_ERR_SILENT; } /* find the nodes in our node array and assemble them * in daemon order if the vm was launched */ while (NULL != (item = opal_list_remove_first(&nodes))) { nptr = (orte_node_t*)item; nd = NULL; for (i=0; i < orte_node_pool->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { continue; } if (0 != strcmp(node->name, nptr->name)) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s DOESNT MATCH NODE %s", node->name, nptr->name)); continue; } /* ignore nodes that are marked as do-not-use for this mapping */ if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED NO_USE", node->name)); /* reset the state so it can be used another time */ node->state = ORTE_NODE_STATE_UP; continue; } if (ORTE_NODE_STATE_DOWN == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS DOWN", node->name)); continue; } if (ORTE_NODE_STATE_NOT_INCLUDED == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED NO_INCLUDE", node->name)); /* not to be used */ continue; } /* if this node wasn't included in the vm (e.g., by -host), ignore it, * unless we are mapping prior to launching the vm */ if (NULL == node->daemon && !novm) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s HAS NO DAEMON", node->name)); continue; } /* retain a copy for our use in case the item gets * destructed along the way */ OBJ_RETAIN(node); if (initial_map) { /* if this is the first app_context we * are getting for an initial map of a job, * then mark all nodes as unmapped */ ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } if (NULL == nd || NULL == nd->daemon || NULL == node->daemon || nd->daemon->name.vpid < node->daemon->name.vpid) { /* just append to end */ opal_list_append(allocated_nodes, &node->super); nd = node; } else { /* starting from end, put this node in daemon-vpid order */ while (node->daemon->name.vpid < nd->daemon->name.vpid) { if (opal_list_get_begin(allocated_nodes) == opal_list_get_prev(&nd->super)) { /* insert at beginning */ opal_list_prepend(allocated_nodes, &node->super); goto moveon1; } nd = (orte_node_t*)opal_list_get_prev(&nd->super); } item = opal_list_get_next(&nd->super); if (item == opal_list_get_end(allocated_nodes)) { /* we are at the end - just append */ opal_list_append(allocated_nodes, &node->super); } else { nd = (orte_node_t*)item; opal_list_insert_pos(allocated_nodes, item, &node->super); } moveon1: /* reset us back to the end for the next node */ nd = (orte_node_t*)opal_list_get_last(allocated_nodes); } } OBJ_RELEASE(nptr); } OBJ_DESTRUCT(&nodes); /* now prune for usage and compute total slots */ goto complete; } addknown: /* if the hnp was allocated, include it unless flagged not to */ if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(policy) & ORTE_MAPPING_NO_USE_LOCAL)) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) { if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "HNP IS MARKED NO_USE")); /* clear this for future use, but don't include it */ node->state = ORTE_NODE_STATE_UP; } else if (ORTE_NODE_STATE_NOT_INCLUDED != node->state) { OBJ_RETAIN(node); if (initial_map) { /* if this is the first app_context we * are getting for an initial map of a job, * then mark all nodes as unmapped */ ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } opal_list_append(allocated_nodes, &node->super); } } } /* add everything in the node pool that can be used - add them * in daemon order, which may be different than the order in the * node pool. Since an empty list is passed into us, the list at * this point either has the HNP node or nothing, and the HNP * node obviously has a daemon on it (us!) */ if (0 == opal_list_get_size(allocated_nodes)) { /* the list is empty */ nd = NULL; } else { nd = (orte_node_t*)opal_list_get_last(allocated_nodes); } for (i=1; i < orte_node_pool->size; i++) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { /* ignore nodes that are marked as do-not-use for this mapping */ if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED NO_USE", node->name)); /* reset the state so it can be used another time */ node->state = ORTE_NODE_STATE_UP; continue; } if (ORTE_NODE_STATE_DOWN == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED DOWN", node->name)); continue; } if (ORTE_NODE_STATE_NOT_INCLUDED == node->state) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s IS MARKED NO_INCLUDE", node->name)); /* not to be used */ continue; } /* if this node wasn't included in the vm (e.g., by -host), ignore it, * unless we are mapping prior to launching the vm */ if (NULL == node->daemon && !novm) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s HAS NO DAEMON", node->name)); continue; } /* retain a copy for our use in case the item gets * destructed along the way */ OBJ_RETAIN(node); if (initial_map) { /* if this is the first app_context we * are getting for an initial map of a job, * then mark all nodes as unmapped */ ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } if (NULL == nd || NULL == nd->daemon || NULL == node->daemon || nd->daemon->name.vpid < node->daemon->name.vpid) { /* just append to end */ opal_list_append(allocated_nodes, &node->super); nd = node; } else { /* starting from end, put this node in daemon-vpid order */ while (node->daemon->name.vpid < nd->daemon->name.vpid) { if (opal_list_get_begin(allocated_nodes) == opal_list_get_prev(&nd->super)) { /* insert at beginning */ opal_list_prepend(allocated_nodes, &node->super); goto moveon; } nd = (orte_node_t*)opal_list_get_prev(&nd->super); } item = opal_list_get_next(&nd->super); if (item == opal_list_get_end(allocated_nodes)) { /* we are at the end - just append */ opal_list_append(allocated_nodes, &node->super); } else { nd = (orte_node_t*)item; opal_list_insert_pos(allocated_nodes, item, &node->super); } moveon: /* reset us back to the end for the next node */ nd = (orte_node_t*)opal_list_get_last(allocated_nodes); } } } OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Starting with %d nodes in list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)opal_list_get_size(allocated_nodes))); /** check that anything is here */ if (0 == opal_list_get_size(allocated_nodes)) { if (!silent) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); } return ORTE_ERR_SILENT; } /* filter the nodes thru any hostfile and dash-host options */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Filtering thru apps", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); if (ORTE_SUCCESS != (rc = orte_rmaps_base_filter_nodes(app, allocated_nodes, true)) && ORTE_ERR_TAKE_NEXT_OPTION != rc) { ORTE_ERROR_LOG(rc); return rc; } OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Retained %d nodes in list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)opal_list_get_size(allocated_nodes))); complete: /* remove all nodes that are already at max usage, and * compute the total number of allocated slots while * we do so */ num_slots = 0; item = opal_list_get_first(allocated_nodes); while (item != opal_list_get_end(allocated_nodes)) { /** save the next pointer in case we remove this node */ next = opal_list_get_next(item); /** check to see if this node is fully used - remove if so */ node = (orte_node_t*)item; if (0 != node->slots_max && node->slots_inuse > node->slots_max) { OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Removing node %s: max %d inuse %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots_max, node->slots_inuse)); opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ } else if (node->slots <= node->slots_inuse && (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { /* remove the node as fully used */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s Removing node %s slots %d inuse %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots, node->slots_inuse)); opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ } else if (node->slots > node->slots_inuse) { /* add the available slots */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s node %s has %d slots available", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots - node->slots_inuse)); num_slots += node->slots - node->slots_inuse; } else if (!(ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(policy))) { /* nothing needed to do here - we don't add slots to the * count as we don't have any available. Just let the mapper * do what it needs to do to meet the request */ OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output, "%s node %s is fully used, but available for oversubscrition", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name)); } else { /* if we cannot use it, remove it from list */ opal_list_remove_item(allocated_nodes, item); OBJ_RELEASE(item); /* "un-retain" it */ } /** go on to next item */ item = next; } /* Sanity check to make sure we have resources available */ if (0 == opal_list_get_size(allocated_nodes)) { if (silent) { /* let the caller know that the resources exist, * but are currently busy */ return ORTE_ERR_RESOURCE_BUSY; } else { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:all-available-resources-used", true); return ORTE_ERR_SILENT; } } /* pass back the total number of available slots */ *total_num_slots = num_slots; if (4 < opal_output_get_verbosity(orte_rmaps_base_framework.framework_output)) { opal_output(0, "AVAILABLE NODES FOR MAPPING:"); for (item = opal_list_get_first(allocated_nodes); item != opal_list_get_end(allocated_nodes); item = opal_list_get_next(item)) { node = (orte_node_t*)item; opal_output(0, " node: %s daemon: %s", node->name, (NULL == node->daemon) ? "NULL" : ORTE_VPID_PRINT(node->daemon->name.vpid)); } } return ORTE_SUCCESS; }
/* * For each module in the list, if it is in the list of names (or the * list of names is NULL), then check and see if it wants to run, and * do the resulting priority comparison. Make a list of components to * be only those who returned that they want to run, and put them in * priority order. */ static opal_list_t *check_components(opal_list_t *components, char *filename, struct ompi_info_t *info, char **names, int num_names) { int i; const mca_base_component_t *component; opal_list_item_t *item, *item2; bool want_to_check; opal_list_t *selectable; avail_io_t *avail, *avail2; /* Make a list of the components that query successfully */ selectable = OBJ_NEW(opal_list_t); /* Scan through the list of components. This nested loop is O(N^2), but we should never have too many components and/or names, so this *hopefully* shouldn't matter... */ for (item = opal_list_get_first(components); item != opal_list_get_end(components); item = opal_list_get_next(item)) { component = ((mca_base_component_priority_list_item_t *) item)->super.cli_component; /* If we have a list of names, scan through it */ if (0 == num_names) { want_to_check = true; } else { want_to_check = false; for (i = 0; i < num_names; ++i) { if (0 == strcmp(names[i], component->mca_component_name)) { want_to_check = true; } } } /* If we determined that we want to check this component, then do so */ if (want_to_check) { avail = check_one_component(component, filename, info); if (NULL != avail) { /* Put this item on the list in priority order (highest priority first). Should it go first? */ /* MSC actually put it Lowest priority first */ for(item2 = opal_list_get_first(selectable); item2 != opal_list_get_end(selectable); item2 = opal_list_get_next(item2)) { avail2 = (avail_io_t*)item2; if(avail->ai_priority < avail2->ai_priority) { opal_list_insert_pos(selectable, item2, (opal_list_item_t*)avail); break; } } if(opal_list_get_end(selectable) == item2) { opal_list_append(selectable, (opal_list_item_t*)avail); } /* item2 = opal_list_get_first(selectable); avail2 = (avail_io_t *) item2; if (opal_list_get_end(selectable) == item2 || avail->ai_priority > avail2->ai_priority) { opal_list_prepend(selectable, (opal_list_item_t*) avail); } else { for (i = 1; item2 != opal_list_get_end(selectable); item2 = opal_list_get_next(selectable), ++i) { avail2 = (avail_io_t *) item2; if (avail->ai_priority > avail2->ai_priority) { opal_list_insert(selectable, (opal_list_item_t *) avail, i); break; } } */ /* If we didn't find a place to put it in the list, then append it (because it has the lowest priority found so far) */ /* if (opal_list_get_end(selectable) == item2) { opal_list_append(selectable, (opal_list_item_t *) avail); } } */ } } } /* If we didn't find any available components, return an error */ if (0 == opal_list_get_size(selectable)) { OBJ_RELEASE(selectable); return NULL; } /* All done */ return selectable; }
int mca_rcache_vma_tree_insert(mca_rcache_vma_module_t* vma_rcache, mca_mpool_base_registration_t* reg, size_t limit) { mca_rcache_vma_t *i; uintptr_t begin = (uintptr_t)reg->base, end = (uintptr_t)reg->bound; i = (mca_rcache_vma_t*)ompi_rb_tree_find_with(&vma_rcache->rb_tree, (void*)begin, mca_rcache_vma_tree_node_compare_closest); if(!i) i = (mca_rcache_vma_t*)opal_list_get_end(&vma_rcache->vma_list); while (begin <= end) { mca_rcache_vma_t *vma; if((mca_rcache_vma_t*)opal_list_get_end(&vma_rcache->vma_list) == i) { vma = NULL; if(mca_rcache_vma_can_insert(vma_rcache, end - begin + 1, limit)) vma = mca_rcache_vma_new(vma_rcache, begin, end); if(!vma) goto remove; mca_rcache_vma_update_byte_count(vma_rcache, end - begin + 1); opal_list_append(&vma_rcache->vma_list, &vma->super); begin = vma->end + 1; mca_rcache_vma_add_reg(vma, reg); } else if(i->start > begin) { uintptr_t tend = (i->start <= end)?(i->start - 1):end; vma = NULL; if(mca_rcache_vma_can_insert(vma_rcache, tend - begin + 1, limit)) vma = mca_rcache_vma_new(vma_rcache, begin, tend); if(!vma) goto remove; mca_rcache_vma_update_byte_count(vma_rcache, tend - begin + 1); /* insert before */ opal_list_insert_pos(&vma_rcache->vma_list, &i->super, &vma->super); i = vma; begin = vma->end + 1; mca_rcache_vma_add_reg(vma, reg); } else if(i->start == begin) { if (i->end > end) { vma = mca_rcache_vma_new(vma_rcache, end+1, i->end); if(!vma) goto remove; i->end = end; mca_rcache_vma_copy_reg_list(vma, i); /* add after */ opal_list_insert_pos(&vma_rcache->vma_list, opal_list_get_next(&i->super), &vma->super); mca_rcache_vma_add_reg(i, reg); begin = end + 1; } else { mca_rcache_vma_add_reg(i, reg); begin = i->end + 1; } } else { vma = mca_rcache_vma_new(vma_rcache, begin, i->end); if(!vma) goto remove; i->end = begin - 1; mca_rcache_vma_copy_reg_list(vma, i); /* add after */ opal_list_insert_pos(&vma_rcache->vma_list, opal_list_get_next(&i->super), &vma->super); } i = (mca_rcache_vma_t*)opal_list_get_next(&i->super); } return OMPI_SUCCESS; remove: mca_rcache_vma_tree_delete(vma_rcache, reg); return OMPI_ERR_TEMP_OUT_OF_RESOURCE; }