/* Unload devices */ static int iboffload_release_devices(void) { int i; mca_bcol_iboffload_device_t *device = NULL; mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; opal_pointer_array_t *devs = &cm->devices; IBOFFLOAD_VERBOSE(10, ("Destroy all devices.\n")); for (i = 0; i < cm->num_devs; i++) { device = opal_pointer_array_get_item(devs, i); IBOFFLOAD_VERBOSE(10, ("Device %s with index %d will be destroyed.\n", ibv_get_device_name(device->dev.ib_dev), i)); if (NULL != device) { OBJ_RELEASE(device); } } IBOFFLOAD_VERBOSE(10, ("All devices were destroyed.\n")); opal_pointer_array_remove_all(devs); OBJ_DESTRUCT(devs); /* release device list */ /*ibv_free_device_list_compat(cm->ib_devs);*/ ompi_ibv_free_device_list(cm->ib_devs); cm->ib_devs = NULL; IBOFFLOAD_VERBOSE(10, ("All devices destroyed.\n")); return OMPI_SUCCESS; }
int orte_util_decode_pidmap(opal_byte_object_t *bo) { orte_jobid_t jobid; orte_vpid_t i, num_procs; orte_pmap_t *pmap; int32_t *nodes; orte_local_rank_t *local_rank; orte_node_rank_t *node_rank; orte_std_cntr_t n; opal_buffer_t buf; orte_jmap_t *jmap; bool already_present; int j; int rc; /* xfer the byte object to a buffer for unpacking */ OBJ_CONSTRUCT(&buf, opal_buffer_t); if (ORTE_SUCCESS != (rc = opal_dss.load(&buf, bo->bytes, bo->size))) { ORTE_ERROR_LOG(rc); goto cleanup; } n = 1; /* cycle through the buffer */ while (ORTE_SUCCESS == (rc = opal_dss.unpack(&buf, &jobid, &n, ORTE_JOBID))) { /* unfortunately, job objects cannot be stored * by index number as the jobid is a constructed * value. So we have no choice but to cycle through * the jobmap pointer array and look for this entry. Since * jobs are cleaned up as they complete, check the * entire array */ jmap = NULL; already_present = false; for (j=0; j < orte_jobmap.size; j++) { if (NULL == (jmap = (orte_jmap_t*)opal_pointer_array_get_item(&orte_jobmap, j))) { continue; } if (jobid == jmap->job) { already_present = true; break; } } /* unpack the number of procs */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &num_procs, &n, ORTE_VPID))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* allocate memory for the node info */ nodes = (int32_t*)malloc(num_procs * 4); /* unpack it in one shot */ n=num_procs; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, nodes, &n, OPAL_INT32))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* allocate memory for local ranks */ local_rank = (orte_local_rank_t*)malloc(num_procs*sizeof(orte_local_rank_t)); /* unpack them in one shot */ n=num_procs; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, local_rank, &n, ORTE_LOCAL_RANK))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* allocate memory for node ranks */ node_rank = (orte_node_rank_t*)malloc(num_procs*sizeof(orte_node_rank_t)); /* unpack node ranks in one shot */ n=num_procs; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, node_rank, &n, ORTE_NODE_RANK))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* if we already know about this job, we need to check the data to see * if something has changed - e.g., a proc that is being restarted somewhere * other than where it previously was */ if (already_present) { /* we already have the jmap object, so let's refresh its pidmap * using the new data - start by cleaning out the old array */ for (j=0; j < jmap->pmap.size; j++) { if (NULL == (pmap = (orte_pmap_t*)opal_pointer_array_get_item(&jmap->pmap, j))) { continue; } OBJ_RELEASE(pmap); } /* now use the opal function to reset the internal pointers */ opal_pointer_array_remove_all(&jmap->pmap); /* set the size of the storage so we minimize realloc's */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(&jmap->pmap, num_procs))) { ORTE_ERROR_LOG(rc); return rc; } /* add in the updated array */ for (i=0; i < num_procs; i++) { pmap = OBJ_NEW(orte_pmap_t); /* add the pidmap entry at the specific site corresponding * to the proc's vpid */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(&jmap->pmap, i, pmap))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* add/update the data */ pmap->node = nodes[i]; pmap->local_rank = local_rank[i]; pmap->node_rank = node_rank[i]; } /* update the #procs */ jmap->num_procs = num_procs; } else { /* if we don't already have this data, store it * unfortunately, job objects cannot be stored * by index number as the jobid is a constructed * value. So we have to just add it to the end * of the array */ jmap = OBJ_NEW(orte_jmap_t); jmap->job = jobid; jmap->num_procs = num_procs; if (0 > (j = opal_pointer_array_add(&orte_jobmap, jmap))) { ORTE_ERROR_LOG(j); rc = j; goto cleanup; } /* allocate memory for the procs array */ opal_pointer_array_set_size(&jmap->pmap, num_procs); /* xfer the data */ for (i=0; i < num_procs; i++) { pmap = OBJ_NEW(orte_pmap_t); pmap->node = nodes[i]; pmap->local_rank = local_rank[i]; pmap->node_rank = node_rank[i]; /* add the pidmap entry at the specific site corresponding * to the proc's vpid */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(&jmap->pmap, i, pmap))) { ORTE_ERROR_LOG(rc); goto cleanup; } } } /* release data */ free(nodes); free(local_rank); free(node_rank); /* setup for next cycle */ n = 1; } if (ORTE_ERR_UNPACK_READ_PAST_END_OF_BUFFER == OPAL_SOS_GET_ERROR_CODE(rc)) { rc = ORTE_SUCCESS; } cleanup: OBJ_DESTRUCT(&buf); return rc; }
int orte_util_decode_nodemap(opal_byte_object_t *bo) { int n; int32_t num_nodes, i, num_daemons; orte_nid_t *node; orte_vpid_t *vpids; orte_nid_t *nd, *ndptr; opal_buffer_t buf; int rc; uint8_t *oversub; OPAL_OUTPUT_VERBOSE((2, orte_debug_output, "%s decode:nidmap decoding nodemap", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* if there are any entries already in the node array, clear it out */ if (0 < orte_nidmap.size) { /* unfortunately, the opal function "remove_all" doesn't release * the memory pointed to by the elements in the array, so we need * to release those first */ for (i=0; i < orte_nidmap.size; i++) { if (NULL != (ndptr = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) { OBJ_RELEASE(ndptr); } } /* now use the opal function to reset the internal pointers */ opal_pointer_array_remove_all(&orte_nidmap); } /* xfer the byte object to a buffer for unpacking */ OBJ_CONSTRUCT(&buf, opal_buffer_t); opal_dss.load(&buf, bo->bytes, bo->size); /* unpack number of nodes */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &num_nodes, &n, OPAL_INT32))) { ORTE_ERROR_LOG(rc); return rc; } OPAL_OUTPUT_VERBOSE((2, orte_debug_output, "%s decode:nidmap decoding %d nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), num_nodes)); /* set the size of the nidmap storage so we minimize realloc's */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(&orte_nidmap, num_nodes))) { ORTE_ERROR_LOG(rc); return rc; } /* loop over nodes and unpack the raw nodename */ for (i=0; i < num_nodes; i++) { node = OBJ_NEW(orte_nid_t); /* the arch defaults to our arch so that non-hetero * case will yield correct behavior */ opal_pointer_array_set_item(&orte_nidmap, i, node); /* unpack the node's name */ n=1; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, &(node->name), &n, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } } /* unpack the daemon vpids */ vpids = (orte_vpid_t*)malloc(num_nodes * sizeof(orte_vpid_t)); n=num_nodes; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, vpids, &n, ORTE_VPID))) { ORTE_ERROR_LOG(rc); return rc; } /* unpack the oversubscribed flags */ oversub = (uint8_t*)malloc(num_nodes * sizeof(uint8_t)); n=num_nodes; if (ORTE_SUCCESS != (rc = opal_dss.unpack(&buf, oversub, &n, OPAL_UINT8))) { ORTE_ERROR_LOG(rc); return rc; } /* transfer the data to the nidmap, counting the number of * daemons in the system */ num_daemons = 0; for (i=0; i < num_nodes; i++) { if (NULL != (ndptr = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) { ndptr->daemon = vpids[i]; if (0 == oversub[i]) { ndptr->oversubscribed = false; } else { ndptr->oversubscribed = true; } if (ORTE_VPID_INVALID != vpids[i]) { ++num_daemons; } } } free(vpids); free(oversub); /* if we are a daemon or the HNP, update our num_procs */ if (ORTE_PROC_IS_HNP || ORTE_PROC_IS_DAEMON) { orte_process_info.num_procs = num_daemons; if (orte_process_info.max_procs < orte_process_info.num_procs) { orte_process_info.max_procs = orte_process_info.num_procs; } } /* update num_daemons */ orte_process_info.num_daemons = num_daemons; if (0 < opal_output_get_verbosity(orte_debug_output)) { for (i=0; i < num_nodes; i++) { if (NULL == (nd = (orte_nid_t*)opal_pointer_array_get_item(&orte_nidmap, i))) { continue; } opal_output(0, "%s node[%d].name %s daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i, (NULL == nd->name) ? "NULL" : nd->name, ORTE_VPID_PRINT(nd->daemon)); } } OBJ_DESTRUCT(&buf); return ORTE_SUCCESS; }