static void mca_coll_hcoll_module_destruct(mca_coll_hcoll_module_t *hcoll_module) { opal_list_item_t *item, *item_next; opal_list_t *am; mca_coll_hcoll_module_t *module; ompi_communicator_t *comm; int context_destroyed; am = &mca_coll_hcoll_component.active_modules; if (hcoll_module->comm == &ompi_mpi_comm_world.comm){ /* If we get here then we are detroying MPI_COMM_WORLD now. So, * it is safe to destory all the other communicators and corresponding * hcoll contexts that could still be on the "active_modules" list. */ item = opal_list_get_first(am); while (item != opal_list_get_end(am)){ item_next = opal_list_get_next(item); module = ((mca_coll_hcoll_module_list_item_wrapper_t *)item)->module; comm = module->comm; context_destroyed = 0; while(!context_destroyed){ hcoll_destroy_context(module->hcoll_context, (rte_grp_handle_t)comm, &context_destroyed); } module->hcoll_context = NULL; OBJ_RELEASE(comm); opal_list_remove_item(am,item); OBJ_RELEASE(item); item = item_next; } /* Now destory the comm_world hcoll context as well */ context_destroyed = 0; while(!context_destroyed){ hcoll_destroy_context(hcoll_module->hcoll_context, (rte_grp_handle_t)hcoll_module->comm, &context_destroyed); } } OBJ_RELEASE(hcoll_module->previous_barrier_module); OBJ_RELEASE(hcoll_module->previous_bcast_module); OBJ_RELEASE(hcoll_module->previous_reduce_module); OBJ_RELEASE(hcoll_module->previous_allreduce_module); OBJ_RELEASE(hcoll_module->previous_allgather_module); OBJ_RELEASE(hcoll_module->previous_allgatherv_module); OBJ_RELEASE(hcoll_module->previous_gather_module); OBJ_RELEASE(hcoll_module->previous_gatherv_module); OBJ_RELEASE(hcoll_module->previous_alltoall_module); OBJ_RELEASE(hcoll_module->previous_alltoallv_module); OBJ_RELEASE(hcoll_module->previous_alltoallw_module); OBJ_RELEASE(hcoll_module->previous_reduce_scatter_module); OBJ_RELEASE(hcoll_module->previous_ibarrier_module); OBJ_RELEASE(hcoll_module->previous_ibcast_module); OBJ_RELEASE(hcoll_module->previous_iallreduce_module); OBJ_RELEASE(hcoll_module->previous_iallgather_module); mca_coll_hcoll_module_clear(hcoll_module); }
static int store(const opal_identifier_t *uid, opal_db_locality_t locality, const char *key, const void *data, opal_data_type_t type) { proc_data_t *proc_data; opal_value_t *kv; opal_byte_object_t *boptr; opal_identifier_t id; /* to protect alignment, copy the data across */ memcpy(&id, uid, sizeof(opal_identifier_t)); /* we are at the bottom of the store priorities, so * if this fell to us, we store it */ opal_output_verbose(1, opal_db_base_framework.framework_output, "db:hash:store storing data for proc %" PRIu64 " at locality %d", id, (int)locality); /* lookup the proc data object for this proc */ if (NULL == (proc_data = lookup_opal_proc(&hash_data, id))) { /* unrecoverable error */ OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output, "db:hash:store: storing key %s[%s] for proc %" PRIu64 " unrecoverably failed", key, opal_dss.lookup_data_type(type), id)); return OPAL_ERR_OUT_OF_RESOURCE; } /* see if we already have this key in the data - means we are updating * a pre-existing value */ kv = lookup_keyval(proc_data, key); OPAL_OUTPUT_VERBOSE((5, opal_db_base_framework.framework_output, "db:hash:store: %s key %s[%s] for proc %" PRIu64 "", (NULL == kv ? "storing" : "updating"), key, opal_dss.lookup_data_type(type), id)); if (NULL != kv) { opal_list_remove_item(&proc_data->data, &kv->super); OBJ_RELEASE(kv); } kv = OBJ_NEW(opal_value_t); kv->key = strdup(key); opal_list_append(&proc_data->data, &kv->super); /* the type could come in as an OPAL one (e.g., OPAL_VPID). Since * the value is an OPAL definition, it cannot cover OPAL data * types, so convert to the underlying OPAL type */ switch (type) { case OPAL_STRING: kv->type = OPAL_STRING; if (NULL != data) { kv->data.string = strdup( (const char *) data); } else { kv->data.string = NULL; } break; case OPAL_UINT32: if (NULL == data) { OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM); return OPAL_ERR_BAD_PARAM; } kv->type = OPAL_UINT32; kv->data.uint32 = *(uint32_t*)data; break; case OPAL_UINT16: if (NULL == data) { OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM); return OPAL_ERR_BAD_PARAM; } kv->type = OPAL_UINT16; kv->data.uint16 = *(uint16_t*)(data); break; case OPAL_INT: if (NULL == data) { OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM); return OPAL_ERR_BAD_PARAM; } kv->type = OPAL_INT; kv->data.integer = *(int*)(data); break; case OPAL_UINT: if (NULL == data) { OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM); return OPAL_ERR_BAD_PARAM; } kv->type = OPAL_UINT; kv->data.uint = *(unsigned int*)(data); break; case OPAL_BYTE_OBJECT: kv->type = OPAL_BYTE_OBJECT; boptr = (opal_byte_object_t*)data; if (NULL != boptr && NULL != boptr->bytes && 0 < boptr->size) { kv->data.bo.bytes = (uint8_t *) malloc(boptr->size); memcpy(kv->data.bo.bytes, boptr->bytes, boptr->size); kv->data.bo.size = boptr->size; } else { kv->data.bo.bytes = NULL; kv->data.bo.size = 0; } break; default: OPAL_ERROR_LOG(OPAL_ERR_NOT_SUPPORTED); return OPAL_ERR_NOT_SUPPORTED; } return OPAL_SUCCESS; }
int main(int argc, char **argv) { /* local variables */ opal_list_t list, x; size_t indx,i,list_size, tmp_size_1, tmp_size_2,size_elements; int error_cnt, rc; test_data_t *elements, *ele; opal_list_item_t *item; rc = opal_init_util(&argc, &argv); test_verify_int(OPAL_SUCCESS, rc); if (OPAL_SUCCESS != rc) { test_finalize(); exit(1); } test_init("opal_list_t"); /* initialize list */ OBJ_CONSTRUCT(&list, opal_list_t); OBJ_CONSTRUCT(&x, opal_list_t); /* check length of list */ list_size=opal_list_get_size(&list); if( 0 == list_size ) { test_success(); } else { test_failure(" opal_list_get_size"); } /* check for empty */ if (opal_list_is_empty(&list)) { test_success(); } else { test_failure(" opal_list_is_empty(empty list)"); } /* create test elements */ size_elements=4; elements=(test_data_t *)malloc(sizeof(test_data_t)*size_elements); assert(elements); for(i=0 ; i < size_elements ; i++) { OBJ_CONSTRUCT(elements + i, test_data_t); (elements+i)->data=i; } /* populate list */ for(i=0 ; i < size_elements ; i++) { opal_list_append(&list,(opal_list_item_t *)(elements+i)); } list_size=opal_list_get_size(&list); if( list_size == size_elements ) { test_success(); } else { test_failure(" populating list"); } /* checking for empty on non-empty list */ if (!opal_list_is_empty(&list)) { test_success(); } else { test_failure(" opal_list_is_empty(non-empty list)"); } /* check that list is ordered as expected */ i=0; error_cnt=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { if( ele->data != i ) error_cnt++; i++; } if( 0 == error_cnt ) { test_success(); } else { test_failure(" error in list order "); } /* check opal_list_get_first */ ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_get_first(&list); assert(ele); if( 0 == ele->data ) { test_success(); } else { test_failure(" error in opal_list_get_first"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( size_elements == i ) { test_success(); } else { test_failure(" error in opal_list_get_first - list size changed "); } /* check opal_list_get_last */ ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_get_last(&list); assert(ele); if( (size_elements-1) == ele->data ) { test_success(); } else { test_failure(" error in opal_list_get_last"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( size_elements == i ) { test_success(); } else { test_failure(" error in opal_list_get_first - list size changed "); } /* check opal_list_remove_first */ ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_remove_first(&list); assert(ele); if( 0 == ele->data ) { test_success(); } else { test_failure(" error in opal_list_remove_first"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( (size_elements-1) == i ) { test_success(); } else { test_failure(" error in opal_list_remove_first - list size changed "); } /* test opal_list_prepend */ opal_list_prepend(&list,(opal_list_item_t *)elements); ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_get_first(&list); assert(ele); if( 0 == ele->data ) { test_success(); } else { test_failure(" error in opal_list_prepend"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( size_elements == i ) { test_success(); } else { test_failure(" error in opal_list_prepend - list size changed "); } /* check opal_list_remove_last */ ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_remove_last(&list); assert(ele); if( (size_elements-1) == ele->data ) { test_success(); } else { test_failure(" error in opal_list_remove_last"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( (size_elements-1) == i ) { test_success(); } else { test_failure(" error in opal_list_remove_last - list size changed "); } /* test opal_list_append */ opal_list_append(&list,(opal_list_item_t *)(elements+size_elements-1)); ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_get_last(&list); assert(ele); if( (size_elements-1) == ele->data ) { test_success(); } else { test_failure(" error in opal_list_append"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( size_elements == i ) { test_success(); } else { test_failure(" error in opal_list_append - list size changed "); } /* remove element from list */ indx=size_elements/2; if( 0 == indx ) indx=1; assert(2 <= size_elements); ele = (test_data_t *)NULL; ele = (test_data_t *) opal_list_remove_item(&list,(opal_list_item_t *)(elements+indx)); assert(ele); if( (indx-1) == ele->data ) { test_success(); } else { test_failure(" error in opal_list_remove - previous"); } ele=(test_data_t *)(((opal_list_item_t *)ele)->opal_list_next); if( (indx+1) == ele->data ) { test_success(); } else { test_failure(" error in opal_list_remove - next"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( (size_elements-1) == i ) { test_success(); } else { test_failure(" error in opal_list_remove - list size changed incorrectly"); } /* test the insert function */ i=opal_list_insert(&list,(opal_list_item_t *)(elements+indx),indx); if( 1 == i ) { test_success(); } else { test_failure(" error in opal_list_remove_item \n"); } i=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { i++; } if( size_elements == i ) { test_success(); } else { test_failure(" error in opal_list_insert - incorrect list length"); } i=0; error_cnt=0; for(ele = (test_data_t *) opal_list_get_first(&list); ele != (test_data_t *) opal_list_get_end(&list); ele = (test_data_t *) ((opal_list_item_t *)ele)->opal_list_next) { if( ele->data != i ) error_cnt++; i++; } if( 0 == error_cnt ) { test_success(); } else { test_failure(" error in list order - opal_list_remove_item "); } /* test the splice and join functions */ list_size = opal_list_get_size(&list); for (i = 0, item = opal_list_get_first(&list) ; i < list_size / 2 ; ++i, item = opal_list_get_next(item)) { } opal_list_splice(&x, opal_list_get_end(&x), &list, item, opal_list_get_end(&list)); tmp_size_1 = opal_list_get_size(&list); tmp_size_2 = opal_list_get_size(&x); if (tmp_size_1 != i) { test_failure(" error in splice (size of list)"); } else if (tmp_size_2 != list_size - tmp_size_1) { test_failure(" error in splice (size of x)"); } else { test_success(); } opal_list_join(&list, opal_list_get_end(&list), &x); tmp_size_1 = opal_list_get_size(&list); tmp_size_2 = opal_list_get_size(&x); if (tmp_size_1 != list_size) { test_failure(" error in join (size of list)"); } else if (tmp_size_2 != 0) { test_failure(" error in join (size of x)"); } else { test_success(); } if (NULL != elements) free(elements); opal_finalize_util (); return test_finalize(); }
static int route_lost(const orte_process_name_t *route) { opal_list_item_t *item; orte_routed_tree_t *child; orte_routed_jobfam_t *jfam; uint16_t jfamily; int i; OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s route to %s lost", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(route))); /* if the route is to a different job family and we are the HNP, look it up */ if ((ORTE_JOB_FAMILY(route->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) && ORTE_PROC_IS_HNP) { jfamily = ORTE_JOB_FAMILY(route->jobid); for (i=0; i < orte_routed_jobfams.size; i++) { if (NULL == (jfam = (orte_routed_jobfam_t*)opal_pointer_array_get_item(&orte_routed_jobfams, i))) { continue; } if (jfam->job_family == jfamily) { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routed_radix: route to %s lost", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOB_FAMILY_PRINT(route->jobid))); opal_pointer_array_set_item(&orte_routed_jobfams, i, NULL); OBJ_RELEASE(jfam); break; } } } /* if we lose the connection to the lifeline and we are NOT already, * in finalize, tell the OOB to abort. * NOTE: we cannot call abort from here as the OOB needs to first * release a thread-lock - otherwise, we will hang!! */ if (!orte_finalizing && NULL != lifeline && OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) { OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, "%s routed:radix: Connection to lifeline %s lost", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(lifeline))); return ORTE_ERR_FATAL; } /* if we are the HNP or daemon, and the route is a daemon, * see if it is one of our children - if so, remove it */ if ((ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) && route->jobid == ORTE_PROC_MY_NAME->jobid) { for (item = opal_list_get_first(&my_children); item != opal_list_get_end(&my_children); item = opal_list_get_next(item)) { child = (orte_routed_tree_t*)item; if (child->vpid == route->vpid) { opal_list_remove_item(&my_children, item); OBJ_RELEASE(item); return ORTE_SUCCESS; } } } /* we don't care about this one, so return success */ return ORTE_SUCCESS; }
int mca_base_components_filter (const char *framework_name, opal_list_t *components, int output_id, const char *filter_names, uint32_t filter_flags) { mca_base_component_list_item_t *cli, *next; char **requested_component_names = NULL; bool include_mode, can_use; int ret; assert (NULL != components); if (0 == filter_flags && NULL == filter_names) { return OPAL_SUCCESS; } ret = mca_base_component_parse_requested (filter_names, &include_mode, &requested_component_names); if (OPAL_SUCCESS != ret) { return ret; } OPAL_LIST_FOREACH_SAFE(cli, next, components, mca_base_component_list_item_t) { const mca_base_component_t *component = cli->cli_component; mca_base_open_only_dummy_component_t *dummy = (mca_base_open_only_dummy_component_t *) cli->cli_component; can_use = use_component (include_mode, (const char **) requested_component_names, cli->cli_component->mca_component_name); if (!can_use || (filter_flags & dummy->data.param_field) != filter_flags) { if (can_use && (filter_flags & MCA_BASE_METADATA_PARAM_CHECKPOINT) && !(MCA_BASE_METADATA_PARAM_CHECKPOINT & dummy->data.param_field)) { opal_output_verbose(10, output_id, "mca: base: components_filter: " "(%s) Component %s is *NOT* Checkpointable - Disabled", component->reserved, component->mca_component_name); } opal_list_remove_item (components, &cli->super); mca_base_component_unload (component, output_id); OBJ_RELEASE(cli); } else if (filter_flags & MCA_BASE_METADATA_PARAM_CHECKPOINT) { opal_output_verbose(10, output_id, "mca: base: components_filter: " "(%s) Component %s is Checkpointable", component->reserved, component->mca_component_name); } } if (include_mode) { ret = component_find_check (framework_name, requested_component_names, components); } else { ret = OPAL_SUCCESS; } if (NULL != requested_component_names) { opal_argv_free (requested_component_names); } return ret; }
static int staged_mapper(orte_job_t *jdata) { mca_base_component_t *c=&mca_rmaps_staged_component.base_version; int i, j, k, rc; orte_app_context_t *app; opal_list_t node_list, desired; orte_std_cntr_t num_slots; orte_proc_t *proc; orte_node_t *node, *next; bool work_to_do = false, first_pass = false; opal_list_item_t *item, *it2; char *cptr, **minimap; orte_vpid_t load; /* only use this mapper if it was specified */ if (NULL == jdata->map->req_mapper || 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name) || ORTE_MAPPING_STAGED != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { /* I wasn't specified */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:staged: job %s not using staged mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } opal_output_verbose(2, orte_rmaps_base_framework.framework_output, "%s mca:rmaps:staged: mapping job %s with %d procs", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid), (int)jdata->num_procs); /* flag that I did the mapping */ if (NULL != jdata->map->last_mapper) { free(jdata->map->last_mapper); } jdata->map->last_mapper = strdup(c->mca_component_name); /* if there are no nodes in the map, then this is our first * pass thru this job */ if (0 == jdata->map->num_nodes) { first_pass = true; } /* we assume that the app_contexts are in priority order, * with the highest priority being the first entry in the * job's app_context array. Loop across the app_contexts * in order, looking for apps that have not been * fully mapped */ for (i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } /* has it been fully mapped? */ if (ORTE_APP_STATE_ALL_MAPPED <= app->state) { continue; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s mca:rmaps:staged: working app %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), app->app); /* find nodes that meet any constraints provided in the form of * -hostfile or -host directives */ OBJ_CONSTRUCT(&node_list, opal_list_t); /* get nodes based on a strict interpretation of the location hints */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, jdata->map->mapping, false, true))) { /* we were unable to get any nodes that match those * specified in the app */ if (ORTE_ERR_RESOURCE_BUSY == rc) { /* if the return is "busy", then at least one of the * specified resources must exist, but no slots are * currently available. This means there is at least * a hope of eventually being able to map this app * within its specified constraints, so continue working */ if (orte_soft_locations) { /* if soft locations were given, then we know that * none of the nodes in this allocation are available, * so there is no point in continuing to check the * remaining apps */ while (NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&node_list); goto complete; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s mca:rmaps:staged: all nodes for this app are currently busy", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); OBJ_DESTRUCT(&node_list); continue; } else { /* this indicates that there are no nodes that match * the specified constraints, so there is no hope of * ever being able to execute this app. This is an * unrecoverable error - note that a return of * "silent" means that the function already printed * an error message, so the error_log will print nothing */ ORTE_ERROR_LOG(rc); return rc; } } /* if a max number of procs/node was given for this * app, remove all nodes from the list that exceed * that limit */ if (0 < app->max_procs_per_node) { item = opal_list_get_first(&node_list); while (item != opal_list_get_end(&node_list)) { it2 = opal_list_get_next(item); node = (orte_node_t*)item; if (app->max_procs_per_node <= node->num_procs) { opal_list_remove_item(&node_list, item); OBJ_RELEASE(item); } item = it2; } } /* if we have no available nodes, then move on to next app */ if (0 == opal_list_get_size(&node_list)) { OBJ_DESTRUCT(&node_list); continue; } /* if the app specified locations, soft or not, search the list of nodes * for those that match the requested locations and move those * to the desired list so we use them first */ if (NULL != app->dash_host) { OBJ_CONSTRUCT(&desired, opal_list_t); /* no particular order is required */ for (j=0; j < opal_argv_count(app->dash_host); j++) { minimap = opal_argv_split(app->dash_host[j], ','); for (k=0; k < opal_argv_count(minimap); k++) { cptr = minimap[k]; for (item = opal_list_get_first(&node_list); item != opal_list_get_end(&node_list); item = opal_list_get_next(item)) { node = (orte_node_t*)item; if (0 == strcmp(node->name, cptr) || (0 == strcmp("localhost", cptr) && 0 == strcmp(node->name, orte_process_info.nodename))) { opal_list_remove_item(&node_list, item); opal_list_append(&desired, item); opal_output_verbose(10, orte_rmaps_base_framework.framework_output, "%s mca:rmaps:staged: placing node %s on desired list", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name); break; } } } opal_argv_free(minimap); } /* if no nodes made the transition and the app specified soft * locations, then we can skip to look at the non-desired list */ if (0 == opal_list_get_size(&desired)) { OBJ_DESTRUCT(&desired); if (orte_soft_locations) { goto process; } else { /* move on to next app */ continue; } } /* cycle thru the procs for this app and attempt to map them * to the desired nodes using a load-balancing algo */ for (j=0; j < app->procs.size; j++) { if (NULL == (proc = opal_pointer_array_get_item(&app->procs, j))) { continue; } if (ORTE_PROC_STATE_UNDEF != proc->state) { /* this proc has already been mapped or executed */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s mca:rmaps:staged: proc %s has already been mapped", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc->name)); continue; } /* flag that there is at least one proc still to * be executed */ work_to_do = true; /* track number mapped */ jdata->num_mapped++; /* find the lightest-loaded node on the desired list */ node = NULL; load = ORTE_VPID_MAX; for (item = opal_list_get_first(&desired); item != opal_list_get_end(&desired); item = opal_list_get_next(item)) { next = (orte_node_t*)item; if (next->num_procs < load) { node = next; load = next->num_procs; } } /* put the proc there */ proc->node = node; proc->nodename = node->name; /* the local rank is the number of procs * on this node from this job - we don't * directly track this number, so it must * be found by looping across the node->procs * array and counting it each time. For now, * since we don't use this value in this mode * of operation, just set it to something arbitrary */ proc->local_rank = node->num_procs; /* the node rank is simply the number of procs * on the node at this time */ proc->node_rank = node->num_procs; /* track number of procs on node and number of slots used */ node->num_procs++; node->slots_inuse++; opal_output_verbose(10, orte_rmaps_base_framework.framework_output, "%s Proc %s on node %s: slots %d inuse %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc->name), node->name, (int)node->slots, (int)node->slots_inuse); if (node->slots_inuse == node->slots) { opal_list_remove_item(&desired, &node->super); OBJ_RELEASE(node); } if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(proc); return rc; } /* retain the proc struct so that we correctly track its release */ OBJ_RETAIN(proc); proc->state = ORTE_PROC_STATE_INIT; /* flag the proc as updated so it will be included * in the next pidmap message */ proc->updated =true; /* add the node to the map, if needed */ if (!node->mapped) { if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { ORTE_ERROR_LOG(rc); return rc; } node->mapped = true; OBJ_RETAIN(node); /* maintain accounting on object */ jdata->map->num_nodes++; } if (0 == opal_list_get_size(&desired)) { /* nothing more we can do */ break; } } /* clear the list */ while (NULL != (item = opal_list_remove_first(&desired))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&desired); } process: for (j=0; j < app->procs.size; j++) { if (NULL == (proc = opal_pointer_array_get_item(&app->procs, j))) { continue; } if (ORTE_PROC_STATE_UNDEF != proc->state) { /* this proc has already been mapped or executed */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s mca:rmaps:staged: proc %s has already been mapped", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc->name)); continue; } /* find the lightest-loaded node on the node list */ node = NULL; load = ORTE_VPID_MAX; for (item = opal_list_get_first(&node_list); item != opal_list_get_end(&node_list); item = opal_list_get_next(item)) { next = (orte_node_t*)item; if (next->num_procs < load) { node = next; load = next->num_procs; } } /* flag that there is at least one proc still to * be executed */ work_to_do = true; /* track number mapped */ jdata->num_mapped++; /* map this proc to the first available slot */ OBJ_RETAIN(node); /* maintain accounting on object */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s mca:rmaps:staged: assigning proc %s to node %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc->name), node->name); proc->node = node; proc->nodename = node->name; /* the local rank is the number of procs * on this node from this job - we don't * directly track this number, so it must * be found by looping across the node->procs * array and counting it each time. For now, * since we don't use this value in this mode * of operation, just set it to something arbitrary */ proc->local_rank = node->num_procs; /* the node rank is simply the number of procs * on the node at this time */ proc->node_rank = node->num_procs; /* track number of procs on node and number of slots used */ node->num_procs++; node->slots_inuse++; opal_output_verbose(10, orte_rmaps_base_framework.framework_output, "%s Proc %s on node %s: slots %d inuse %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&proc->name), node->name, (int)node->slots, (int)node->slots_inuse); if (node->slots_inuse == node->slots) { opal_list_remove_item(&node_list, &node->super); OBJ_RELEASE(node); } if (0 > (rc = opal_pointer_array_add(node->procs, (void*)proc))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(proc); return rc; } /* retain the proc struct so that we correctly track its release */ OBJ_RETAIN(proc); proc->state = ORTE_PROC_STATE_INIT; /* flag the proc as updated so it will be included * in the next pidmap message */ proc->updated =true; /* add the node to the map, if needed */ if (!node->mapped) { if (ORTE_SUCCESS > (rc = opal_pointer_array_add(jdata->map->nodes, (void*)node))) { ORTE_ERROR_LOG(rc); return rc; } node->mapped = true; OBJ_RETAIN(node); /* maintain accounting on object */ jdata->map->num_nodes++; } if (0 == opal_list_get_size(&node_list)) { /* nothing more we can do */ break; } } /* clear the list */ while (NULL != (item = opal_list_remove_first(&node_list))) { OBJ_RELEASE(item); } OBJ_DESTRUCT(&node_list); } complete: /* if there isn't at least one proc that can be launched, * then indicate that we don't need to proceed with the * launch sequence */ if (!work_to_do) { return ORTE_ERR_RESOURCE_BUSY; } /* flag that the job was updated so it will be * included in the pidmap message */ jdata->updated = true; /* if we successfully mapped ALL procs in the first pass, * then this job is capable of supporting MPI procs */ if (first_pass && jdata->num_mapped == jdata->num_procs) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "%s mca:rmaps:staged: job %s is MPI-capable", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid)); jdata->gang_launched = true; } return ORTE_SUCCESS; }
/* * Sequentially map the ranks according to the placement in the * specified hostfile */ static int orte_rmaps_seq_map(orte_job_t *jdata) { orte_job_map_t *map; orte_app_context_t *app; int i, n; orte_std_cntr_t j; opal_list_item_t *item; orte_node_t *node, *nd; seq_node_t *sq, *save=NULL, *seq;; orte_vpid_t vpid; orte_std_cntr_t num_nodes; int rc; opal_list_t default_seq_list; opal_list_t node_list, *seq_list, sq_list; orte_proc_t *proc; mca_base_component_t *c = &mca_rmaps_seq_component.base_version; char *hosts = NULL, *sep, *eptr; FILE *fp; opal_hwloc_resource_type_t rtype; OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base_framework.framework_output, "%s rmaps:seq called on job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); /* this mapper can only handle initial launch * when seq mapping is desired - allow * restarting of failed apps */ if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: job %s is being restarted - seq cannot map", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } if (NULL != jdata->map->req_mapper) { if (0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: job %s not using sequential mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } /* we need to process it */ goto process; } if (ORTE_MAPPING_SEQ != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) { /* I don't know how to do these - defer */ opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: job %s not using seq mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } process: opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: mapping job %s", ORTE_JOBID_PRINT(jdata->jobid)); /* flag that I did the mapping */ if (NULL != jdata->map->last_mapper) { free(jdata->map->last_mapper); } jdata->map->last_mapper = strdup(c->mca_component_name); /* convenience def */ map = jdata->map; /* if there is a default hostfile, go and get its ordered list of nodes */ OBJ_CONSTRUCT(&default_seq_list, opal_list_t); if (NULL != orte_default_hostfile) { char *hstname = NULL; /* open the file */ fp = fopen(orte_default_hostfile, "r"); if (NULL == fp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); rc = ORTE_ERR_NOT_FOUND; goto error; } while (NULL != (hstname = orte_getline(fp))) { if (0 == strlen(hstname)) { free(hstname); /* blank line - ignore */ continue; } if( '#' == hstname[0] ) { free(hstname); /* Comment line - ignore */ continue; } sq = OBJ_NEW(seq_node_t); if (NULL != (sep = strchr(hstname, ' '))) { *sep = '\0'; sep++; /* remove any trailing space */ eptr = sep + strlen(sep) - 1; while (eptr > sep && isspace(*eptr)) { eptr--; } *(eptr+1) = 0; sq->cpuset = strdup(sep); } // Strip off the FQDN if present, ignore IP addresses if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hstname) ) { char *ptr; if (NULL != (ptr = strchr(hstname, '.'))) { *ptr = '\0'; } } sq->hostname = hstname; opal_list_append(&default_seq_list, &sq->super); } fclose(fp); } /* start at the beginning... */ vpid = 0; jdata->num_procs = 0; if (0 < opal_list_get_size(&default_seq_list)) { save = (seq_node_t*)opal_list_get_first(&default_seq_list); } /* default to LOGICAL processors */ if (orte_get_attribute(&jdata->attributes, ORTE_JOB_PHYSICAL_CPUIDS, NULL, OPAL_BOOL)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using PHYSICAL processors"); rtype = OPAL_HWLOC_PHYSICAL; } else { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using LOGICAL processors"); rtype = OPAL_HWLOC_LOGICAL; } /* initialize all the nodes as not included in this job map */ for (j=0; j < orte_node_pool->size; j++) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) { ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } } /* cycle through the app_contexts, mapping them sequentially */ for(i=0; i < jdata->apps->size; i++) { if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { continue; } /* dash-host trumps hostfile */ if (orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using dash-host nodes on app %s", app->app); OBJ_CONSTRUCT(&node_list, opal_list_t); /* dash host entries cannot specify cpusets, so used the std function to retrieve the list */ if (ORTE_SUCCESS != (rc = orte_util_get_ordered_dash_host_list(&node_list, hosts))) { ORTE_ERROR_LOG(rc); free(hosts); goto error; } free(hosts); /* transfer the list to a seq_node_t list */ OBJ_CONSTRUCT(&sq_list, opal_list_t); while (NULL != (nd = (orte_node_t*)opal_list_remove_first(&node_list))) { sq = OBJ_NEW(seq_node_t); sq->hostname = strdup(nd->name); opal_list_append(&sq_list, &sq->super); OBJ_RELEASE(nd); } OBJ_DESTRUCT(&node_list); seq_list = &sq_list; } else if (orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)) { char *hstname; if (NULL == hosts) { rc = ORTE_ERR_NOT_FOUND; goto error; } opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using hostfile %s nodes on app %s", hosts, app->app); OBJ_CONSTRUCT(&sq_list, opal_list_t); /* open the file */ fp = fopen(hosts, "r"); if (NULL == fp) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); rc = ORTE_ERR_NOT_FOUND; OBJ_DESTRUCT(&sq_list); goto error; } while (NULL != (hstname = orte_getline(fp))) { if (0 == strlen(hstname)) { free(hstname); /* blank line - ignore */ continue; } if( '#' == hstname[0] ) { free(hstname); /* Comment line - ignore */ continue; } sq = OBJ_NEW(seq_node_t); if (NULL != (sep = strchr(hstname, ' '))) { *sep = '\0'; sep++; /* remove any trailing space */ eptr = sep + strlen(sep) - 1; while (eptr > sep && isspace(*eptr)) { eptr--; } *(eptr+1) = 0; sq->cpuset = strdup(sep); } // Strip off the FQDN if present, ignore IP addresses if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hstname) ) { char *ptr; if (NULL != (ptr = strchr(hstname, '.'))) { (*ptr) = '\0'; } } sq->hostname = hstname; opal_list_append(&sq_list, &sq->super); } fclose(fp); free(hosts); seq_list = &sq_list; } else if (0 < opal_list_get_size(&default_seq_list)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: using default hostfile nodes on app %s", app->app); seq_list = &default_seq_list; } else { /* can't do anything - no nodes available! */ orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); return ORTE_ERR_SILENT; } /* check for nolocal and remove the head node, if required */ if (map->mapping & ORTE_MAPPING_NO_USE_LOCAL) { for (item = opal_list_get_first(seq_list); item != opal_list_get_end(seq_list); item = opal_list_get_next(item) ) { seq = (seq_node_t*)item; /* need to check ifislocal because the name in the * hostfile may not have been FQDN, while name returned * by gethostname may have been (or vice versa) */ if (orte_ifislocal(seq->hostname)) { opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: removing head node %s", seq->hostname); opal_list_remove_item(seq_list, item); OBJ_RELEASE(item); /* "un-retain" it */ } } } if (NULL == seq_list || 0 == (num_nodes = (orte_std_cntr_t)opal_list_get_size(seq_list))) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-available-resources", true); return ORTE_ERR_SILENT; } /* if num_procs wasn't specified, set it now */ if (0 == app->num_procs) { app->num_procs = num_nodes; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: setting num procs to %s for app %s", ORTE_VPID_PRINT(app->num_procs), app->app); } else if (num_nodes < app->num_procs) { orte_show_help("help-orte-rmaps-base.txt", "seq:not-enough-resources", true, app->num_procs, num_nodes); return ORTE_ERR_SILENT; } if (seq_list == &default_seq_list) { sq = save; } else { sq = (seq_node_t*)opal_list_get_first(seq_list); } for (n=0; n < app->num_procs; n++) { /* find this node on the global array - this is necessary so * that our mapping gets saved on that array as the objects * returned by the hostfile function are -not- on the array */ node = NULL; for (j=0; j < orte_node_pool->size; j++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) { continue; } if (0 == strcmp(sq->hostname, node->name)) { break; } } if (NULL == node) { /* wasn't found - that is an error */ orte_show_help("help-orte-rmaps-seq.txt", "orte-rmaps-seq:resource-not-found", true, sq->hostname); rc = ORTE_ERR_SILENT; goto error; } /* ensure the node is in the map */ if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { OBJ_RETAIN(node); opal_pointer_array_add(map->nodes, node); jdata->map->num_nodes++; ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); } proc = orte_rmaps_base_setup_proc(jdata, node, i); if ((node->slots < (int)node->num_procs) || (0 < node->slots_max && node->slots_max < (int)node->num_procs)) { if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, node->num_procs, app->app); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); rc = ORTE_ERR_SILENT; goto error; } /* flag the node as oversubscribed so that sched-yield gets * properly set */ ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED); ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED); /* check for permission */ if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) { /* if we weren't given a directive either way, then we will error out * as the #slots were specifically given, either by the host RM or * via hostfile/dash-host */ if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) { orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); return ORTE_ERR_SILENT; } else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) { /* if we were explicitly told not to oversubscribe, then don't */ orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error", true, app->num_procs, app->app); ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE); return ORTE_ERR_SILENT; } } } /* assign the vpid */ proc->name.vpid = vpid++; opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: assign proc %s to node %s for app %s", ORTE_VPID_PRINT(proc->name.vpid), sq->hostname, app->app); /* record the cpuset, if given */ if (NULL != sq->cpuset) { hwloc_cpuset_t bitmap; char *cpu_bitmap; if (NULL == node->topology || NULL == node->topology->topo) { /* not allowed - for sequential cpusets, we must have * the topology info */ orte_show_help("help-orte-rmaps-base.txt", "rmaps:no-topology", true, node->name); rc = ORTE_ERR_SILENT; goto error; } /* if we are using hwthreads as cpus and binding to hwthreads, then * we can just copy the cpuset across as it already specifies things * at that level */ if (opal_hwloc_use_hwthreads_as_cpus && OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) { cpu_bitmap = strdup(sq->cpuset); } else { /* setup the bitmap */ bitmap = hwloc_bitmap_alloc(); /* parse the slot_list to find the socket and core */ if (ORTE_SUCCESS != (rc = opal_hwloc_base_cpu_list_parse(sq->cpuset, node->topology->topo, rtype, bitmap))) { ORTE_ERROR_LOG(rc); hwloc_bitmap_free(bitmap); goto error; } /* note that we cannot set the proc locale to any specific object * as the slot list may have assigned it to more than one - so * leave that field NULL */ /* set the proc to the specified map */ hwloc_bitmap_list_asprintf(&cpu_bitmap, bitmap); hwloc_bitmap_free(bitmap); } orte_set_attribute(&proc->attributes, ORTE_PROC_CPU_BITMAP, ORTE_ATTR_GLOBAL, cpu_bitmap, OPAL_STRING); opal_output_verbose(5, orte_rmaps_base_framework.framework_output, "mca:rmaps:seq: binding proc %s to cpuset %s bitmap %s", ORTE_VPID_PRINT(proc->name.vpid), sq->cpuset, cpu_bitmap); /* we are going to bind to cpuset since the user is specifying the cpus */ OPAL_SET_BINDING_POLICY(jdata->map->binding, OPAL_BIND_TO_CPUSET); /* note that the user specified the mapping */ ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYUSER); ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_GIVEN); /* cleanup */ free(cpu_bitmap); } else { hwloc_obj_t locale; /* assign the locale - okay for the topo to be null as * it just means it wasn't returned */ if (NULL != node->topology && NULL != node->topology->topo) { locale = hwloc_get_root_obj(node->topology->topo); orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, locale, OPAL_PTR); } } /* add to the jdata proc array */ if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) { ORTE_ERROR_LOG(rc); goto error; } /* move to next node */ sq = (seq_node_t*)opal_list_get_next(&sq->super); } /** track the total number of processes we mapped */ jdata->num_procs += app->num_procs; /* cleanup the node list if it came from this app_context */ if (seq_list != &default_seq_list) { OPAL_LIST_DESTRUCT(seq_list); } else { save = sq; } } /* mark that this job is to be fully * described in the launch msg */ orte_set_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL); return ORTE_SUCCESS; error: OPAL_LIST_DESTRUCT(&default_seq_list); return rc; }
/* Parse the provided hostfile and filter the nodes that are * on the input list, removing those that * are not found in the hostfile */ int orte_util_filter_hostfile_nodes(opal_list_t *nodes, char *hostfile, bool remove) { opal_list_t newnodes, exclude; opal_list_item_t *item1, *item2, *next, *item3; orte_node_t *node_from_list, *node_from_file, *node_from_pool, *node3; int rc = ORTE_SUCCESS; char *cptr; int num_empty, nodeidx; bool want_all_empty = false; opal_list_t keep; bool found; OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, "%s hostfile: filtering nodes through hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile)); /* parse the hostfile and create local list of findings */ OBJ_CONSTRUCT(&newnodes, opal_list_t); OBJ_CONSTRUCT(&exclude, opal_list_t); if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, &newnodes, &exclude, false))) { OBJ_DESTRUCT(&newnodes); OBJ_DESTRUCT(&exclude); return rc; } /* if the hostfile was empty, then treat it as a no-op filter */ if (0 == opal_list_get_size(&newnodes)) { OBJ_DESTRUCT(&newnodes); OBJ_DESTRUCT(&exclude); /* indicate that the hostfile was empty */ return ORTE_ERR_TAKE_NEXT_OPTION; } /* remove from the list of newnodes those that are in the exclude list * since we could have added duplicate names above due to the */ while (NULL != (item1 = opal_list_remove_first(&exclude))) { node_from_file = (orte_node_t*)item1; /* check for matches on nodes */ for (item2 = opal_list_get_first(&newnodes); item2 != opal_list_get_end(&newnodes); item2 = opal_list_get_next(item2)) { orte_node_t *node = (orte_node_t*)item2; if (0 == strcmp(node_from_file->name, node->name)) { /* match - remove it */ opal_output(0, "HOST %s ON EXCLUDE LIST - REMOVING", node->name); opal_list_remove_item(&newnodes, item2); OBJ_RELEASE(item2); break; } } OBJ_RELEASE(item1); } /* now check our nodes and keep or mark those that match. We can * destruct our hostfile list as we go since this won't be needed */ OBJ_CONSTRUCT(&keep, opal_list_t); while (NULL != (item2 = opal_list_remove_first(&newnodes))) { node_from_file = (orte_node_t*)item2; next = opal_list_get_next(item2); /* see if this is a relative node syntax */ if ('+' == node_from_file->name[0]) { /* see if we specified empty nodes */ if ('e' == node_from_file->name[1] || 'E' == node_from_file->name[1]) { /* request for empty nodes - do they want * all of them? */ if (NULL != (cptr = strchr(node_from_file->name, ':'))) { /* the colon indicates a specific # are requested */ cptr++; /* step past : */ num_empty = strtol(cptr, NULL, 10); } else { /* want them all - set num_empty to max */ num_empty = INT_MAX; want_all_empty = true; } /* search the list of nodes provided to us and find those * that are empty */ item1 = opal_list_get_first(nodes); while (0 < num_empty && item1 != opal_list_get_end(nodes)) { node_from_list = (orte_node_t*)item1; next = opal_list_get_next(item1); /* keep our place */ if (0 == node_from_list->slots_inuse) { /* check to see if this node is explicitly called * out later - if so, don't use it here */ for (item3 = opal_list_get_first(&newnodes); item3 != opal_list_get_end(&newnodes); item3 = opal_list_get_next(item3)) { node3 = (orte_node_t*)item3; if (0 == strcmp(node3->name, node_from_list->name)) { /* match - don't use it */ goto skipnode; } } if (remove) { /* remove item from list */ opal_list_remove_item(nodes, item1); /* xfer to keep list */ opal_list_append(&keep, item1); } else { /* mark as included */ node_from_list->mapped = true; } --num_empty; } skipnode: item1 = next; } /* did they get everything they wanted? */ if (!want_all_empty && 0 < num_empty) { orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty", true, num_empty); rc = ORTE_ERR_SILENT; goto cleanup; } } else if ('n' == node_from_file->name[1] || 'N' == node_from_file->name[1]) { /* they want a specific relative node #, so * look it up on global pool */ nodeidx = strtol(&node_from_file->name[2], NULL, 10); if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeidx))) { /* this is an error */ orte_show_help("help-hostfile.txt", "hostfile:relative-node-not-found", true, nodeidx, node_from_file->name); rc = ORTE_ERR_SILENT; goto cleanup; } /* search the list of nodes provided to us and find it */ for (item1 = opal_list_get_first(nodes); item1 != opal_list_get_end(nodes); item1 = opal_list_get_next(nodes)) { node_from_list = (orte_node_t*)item1; if (0 == strcmp(node_from_list->name, node_from_pool->name)) { if (remove) { /* match - remove item from list */ opal_list_remove_item(nodes, item1); /* xfer to keep list */ opal_list_append(&keep, item1); } else { /* mark as included */ node_from_list->mapped = true; } break; } } } else { /* invalid relative node syntax */ orte_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax", true, node_from_file->name); rc = ORTE_ERR_SILENT; goto cleanup; } } else { /* we are looking for a specific node on the list * search the provided list of nodes to see if this * one is found */ found = false; for (item1 = opal_list_get_first(nodes); item1 != opal_list_get_end(nodes); item1 = opal_list_get_next(item1)) { node_from_list = (orte_node_t*)item1; /* since the name in the hostfile might not match * our local name, and yet still be intended to match, * we have to check for local interfaces */ if (0 == strcmp(node_from_file->name, node_from_list->name) || (0 == strcmp(node_from_file->name, "localhost") && 0 == strcmp(node_from_list->name, orte_process_info.nodename)) || (opal_ifislocal(node_from_list->name) && opal_ifislocal(node_from_file->name))) { /* if the slot count here is less than the * total slots avail on this node, set it * to the specified count - this allows people * to subdivide an allocation */ if (node_from_file->slots < node_from_list->slots) { node_from_list->slots = node_from_file->slots; } if (remove) { /* remove the node from the list */ opal_list_remove_item(nodes, item1); /* xfer it to keep list */ opal_list_append(&keep, item1); } else { /* mark as included */ node_from_list->mapped = true; } found = true; break; } } /* if the host in the newnode list wasn't found, * then that is an error we need to report to the * user and abort */ if (!found) { orte_show_help("help-hostfile.txt", "hostfile:extra-node-not-found", true, hostfile, node_from_file->name); rc = ORTE_ERR_SILENT; goto cleanup; } } /* cleanup the newnode list */ OBJ_RELEASE(item2); } /* if we still have entries on our hostfile list, then * there were requested hosts that were not in our allocation. * This is an error - report it to the user and return an error */ if (0 != opal_list_get_size(&newnodes)) { orte_show_help("help-hostfile.txt", "not-all-mapped-alloc", true, hostfile); while (NULL != (item1 = opal_list_remove_first(&newnodes))) { OBJ_RELEASE(item1); } OBJ_DESTRUCT(&newnodes); return ORTE_ERR_SILENT; } if (!remove) { /* all done */ OBJ_DESTRUCT(&newnodes); return ORTE_SUCCESS; } /* clear the rest of the nodes list */ while (NULL != (item1 = opal_list_remove_first(nodes))) { OBJ_RELEASE(item1); } /* the nodes list has been cleared - rebuild it in order */ while (NULL != (item1 = opal_list_remove_first(&keep))) { opal_list_append(nodes, item1); } cleanup: OBJ_DESTRUCT(&newnodes); return rc; }
int orte_util_get_ordered_host_list(opal_list_t *nodes, char *hostfile) { opal_list_t exclude; opal_list_item_t *item, *itm, *item2, *item1; char *cptr; int num_empty, i, nodeidx, startempty=0; bool want_all_empty=false; orte_node_t *node_from_pool, *newnode; int rc; OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, "%s hostfile: creating ordered list of hosts from hostfile %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile)); OBJ_CONSTRUCT(&exclude, opal_list_t); /* parse the hostfile and add the contents to the list, keeping duplicates */ if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, nodes, &exclude, true))) { goto cleanup; } /* parse the nodes to process any relative node directives */ item2 = opal_list_get_first(nodes); while (item2 != opal_list_get_end(nodes)) { orte_node_t *node=(orte_node_t*)item2; /* save the next location in case this one gets removed */ item1 = opal_list_get_next(item2); if ('+' != node->name[0]) { item2 = item1; continue; } /* see if we specified empty nodes */ if ('e' == node->name[1] || 'E' == node->name[1]) { /* request for empty nodes - do they want * all of them? */ if (NULL != (cptr = strchr(node->name, ':'))) { /* the colon indicates a specific # are requested */ cptr++; /* step past : */ num_empty = strtol(cptr, NULL, 10); } else { /* want them all - set num_empty to max */ num_empty = INT_MAX; want_all_empty = true; } /* insert empty nodes into newnodes list in place of the current item. * since item1 is the next item, we insert in front of it */ if (!orte_hnp_is_allocated && 0 == startempty) { startempty = 1; } for (i=startempty; 0 < num_empty && i < orte_node_pool->size; i++) { if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { continue; } if (0 == node_from_pool->slots_inuse) { newnode = OBJ_NEW(orte_node_t); newnode->name = strdup(node_from_pool->name); /* if the slot count here is less than the * total slots avail on this node, set it * to the specified count - this allows people * to subdivide an allocation */ if (node->slots < node_from_pool->slots) { newnode->slots = node->slots; } else { newnode->slots = node_from_pool->slots; } opal_list_insert_pos(nodes, item1, &newnode->super); /* track number added */ --num_empty; } } /* bookmark where we stopped in case they ask for more */ startempty = i; /* did they get everything they wanted? */ if (!want_all_empty && 0 < num_empty) { orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty", true, num_empty); rc = ORTE_ERR_SILENT; goto cleanup; } /* since we have expanded the provided node, remove * it from list */ opal_list_remove_item(nodes, item2); OBJ_RELEASE(item2); } else if ('n' == node->name[1] || 'N' == node->name[1]) { /* they want a specific relative node #, so * look it up on global pool */ nodeidx = strtol(&node->name[2], NULL, 10); /* if the HNP is not allocated, then we need to * adjust the index as the node pool is offset * by one */ if (!orte_hnp_is_allocated) { nodeidx++; } /* see if that location is filled */ if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeidx))) { /* this is an error */ orte_show_help("help-hostfile.txt", "hostfile:relative-node-not-found", true, nodeidx, node->name); rc = ORTE_ERR_SILENT; goto cleanup; } /* create the node object */ newnode = OBJ_NEW(orte_node_t); newnode->name = strdup(node_from_pool->name); /* if the slot count here is less than the * total slots avail on this node, set it * to the specified count - this allows people * to subdivide an allocation */ if (node->slots < node_from_pool->slots) { newnode->slots = node->slots; } else { newnode->slots = node_from_pool->slots; } /* insert it before item1 */ opal_list_insert_pos(nodes, item1, &newnode->super); /* since we have expanded the provided node, remove * it from list */ opal_list_remove_item(nodes, item2); OBJ_RELEASE(item2); } else { /* invalid relative node syntax */ orte_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax", true, node->name); rc = ORTE_ERR_SILENT; goto cleanup; } /* move to next */ item2 = item1; } /* remove from the list of nodes those that are in the exclude list */ while(NULL != (item = opal_list_remove_first(&exclude))) { orte_node_t *exnode = (orte_node_t*)item; /* check for matches on nodes */ for (itm = opal_list_get_first(nodes); itm != opal_list_get_end(nodes); itm = opal_list_get_next(itm)) { orte_node_t *node=(orte_node_t*)itm; if (0 == strcmp(exnode->name, node->name)) { /* match - remove it */ opal_list_remove_item(nodes, itm); OBJ_RELEASE(itm); /* have to cycle through the entire list as we could * have duplicates */ } } OBJ_RELEASE(item); } cleanup: OBJ_DESTRUCT(&exclude); return rc; }
static int hostfile_parse_line(int token, opal_list_t* updates, opal_list_t* exclude, bool keep_all) { int rc; orte_node_t* node; bool got_max = false; char* value; char** argv; char* node_name = NULL; char* node_alias = NULL; char* username = NULL; int cnt; int number_of_slots = 0; char buff[64]; if (ORTE_HOSTFILE_STRING == token || ORTE_HOSTFILE_HOSTNAME == token || ORTE_HOSTFILE_INT == token || ORTE_HOSTFILE_IPV4 == token || ORTE_HOSTFILE_IPV6 == token) { if(ORTE_HOSTFILE_INT == token) { snprintf(buff, 64, "%d", orte_util_hostfile_value.ival); value = buff; } else { value = orte_util_hostfile_value.sval; } argv = opal_argv_split (value, '@'); cnt = opal_argv_count (argv); if (1 == cnt) { node_name = strdup(argv[0]); } else if (2 == cnt) { username = strdup(argv[0]); node_name = strdup(argv[1]); } else { opal_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */ } opal_argv_free (argv); /* if the first letter of the name is '^', then this is a node * to be excluded. Remove the ^ character so the nodename is * usable, and put it on the exclude list */ if ('^' == node_name[0]) { int i, len; len = strlen(node_name); for (i=1; i < len; i++) { node_name[i-1] = node_name[i]; } node_name[len-1] = '\0'; /* truncate */ OPAL_OUTPUT_VERBOSE((3, orte_ras_base_framework.framework_output, "%s hostfile: node %s is being excluded", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name)); /* convert this into something globally unique */ if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) { /* Nodename has been allocated, that is for sure */ if (orte_show_resolved_nodenames && 0 != strcmp(node_name, orte_process_info.nodename)) { node_alias = strdup(node_name); } free (node_name); node_name = strdup(orte_process_info.nodename); } /* Do we need to make a new node object? First check to see if it's already in the exclude list */ if (NULL == (node = hostfile_lookup(exclude, node_name))) { node = OBJ_NEW(orte_node_t); node->name = node_name; if (NULL != username) { node->username = strdup(username); } opal_list_append(exclude, &node->super); } return ORTE_SUCCESS; } /* this is not a node to be excluded, so we need to process it and * add it to the "include" list. See if this host is actually us. */ if (strcmp(node_name, "localhost") == 0 || opal_ifislocal(node_name)) { /* Nodename has been allocated, that is for sure */ if (orte_show_resolved_nodenames && 0 != strcmp(node_name, orte_process_info.nodename)) { node_alias = strdup(node_name); } free (node_name); node_name = strdup(orte_process_info.nodename); } OPAL_OUTPUT_VERBOSE((3, orte_ras_base_framework.framework_output, "%s hostfile: node %s is being included - keep all is %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name, keep_all ? "TRUE" : "FALSE")); /* Do we need to make a new node object? */ if (keep_all || NULL == (node = hostfile_lookup(updates, node_name))) { node = OBJ_NEW(orte_node_t); node->name = node_name; node->slots = 1; if (NULL != username) { node->username = strdup(username); } opal_list_append(updates, &node->super); } else { /* this node was already found once - add a slot and mark slots as "given" */ node->slots++; node->slots_given = true; } /* do we need to record an alias for this node? */ if (NULL != node_alias) { /* add to list of aliases for this node - only add if unique */ opal_argv_append_unique_nosize(&node->alias, node_alias, false); free(node_alias); } } else if (ORTE_HOSTFILE_RELATIVE == token) { /* store this for later processing */ node = OBJ_NEW(orte_node_t); node->name = strdup(orte_util_hostfile_value.sval); if (NULL != username) { node->username = strdup(username); } opal_list_append(updates, &node->super); } else if (ORTE_HOSTFILE_RANK == token) { /* we can ignore the rank, but we need to extract the node name. we * first need to shift over to the other side of the equal sign as * this is where the node name will be */ while (!orte_util_hostfile_done && ORTE_HOSTFILE_EQUAL != token) { token = orte_util_hostfile_lex(); } if (orte_util_hostfile_done) { /* bad syntax somewhere */ return ORTE_ERROR; } /* next position should be the node name */ token = orte_util_hostfile_lex(); if(ORTE_HOSTFILE_INT == token) { snprintf(buff, 64, "%d", orte_util_hostfile_value.ival); value = buff; } else { value = orte_util_hostfile_value.sval; } argv = opal_argv_split (value, '@'); cnt = opal_argv_count (argv); if (1 == cnt) { node_name = strdup(argv[0]); } else if (2 == cnt) { username = strdup(argv[0]); node_name = strdup(argv[1]); } else { opal_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */ } opal_argv_free (argv); /* Do we need to make a new node object? */ if (NULL == (node = hostfile_lookup(updates, node_name))) { node = OBJ_NEW(orte_node_t); node->name = node_name; node->slots = 1; if (NULL != username) { node->username = strdup(username); } opal_list_append(updates, &node->super); } else { /* add a slot */ node->slots++; } OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, "%s hostfile: node %s slots %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots)); /* mark the slots as "given" since we take them as being the * number specified via the rankfile */ node->slots_given = true; /* skip to end of line */ while (!orte_util_hostfile_done && ORTE_HOSTFILE_NEWLINE != token) { token = orte_util_hostfile_lex(); } return ORTE_SUCCESS; } else { hostfile_parse_error(token); return ORTE_ERROR; } while (!orte_util_hostfile_done) { token = orte_util_hostfile_lex(); switch (token) { case ORTE_HOSTFILE_DONE: goto done; case ORTE_HOSTFILE_NEWLINE: goto done; case ORTE_HOSTFILE_USERNAME: node->username = hostfile_parse_string(); break; case ORTE_HOSTFILE_COUNT: case ORTE_HOSTFILE_CPU: case ORTE_HOSTFILE_SLOTS: rc = hostfile_parse_int(); if (rc < 0) { orte_show_help("help-hostfile.txt", "slots", true, cur_hostfile_name, rc); opal_list_remove_item(updates, &node->super); OBJ_RELEASE(node); return ORTE_ERROR; } if (node->slots_given) { /* multiple definitions were given for the * slot count - this is not allowed */ orte_show_help("help-hostfile.txt", "slots-given", true, cur_hostfile_name, node->name); opal_list_remove_item(updates, &node->super); OBJ_RELEASE(node); return ORTE_ERROR; } node->slots = rc; node->slots_given = true; /* Ensure that slots_max >= slots */ if (node->slots_max != 0 && node->slots_max < node->slots) { node->slots_max = node->slots; } break; case ORTE_HOSTFILE_SLOTS_MAX: rc = hostfile_parse_int(); if (rc < 0) { orte_show_help("help-hostfile.txt", "max_slots", true, cur_hostfile_name, ((size_t) rc)); opal_list_remove_item(updates, &node->super); OBJ_RELEASE(node); return ORTE_ERROR; } /* Only take this update if it puts us >= node_slots */ if (rc >= node->slots) { if (node->slots_max != rc) { node->slots_max = rc; got_max = true; } } else { orte_show_help("help-hostfile.txt", "max_slots_lt", true, cur_hostfile_name, node->slots, rc); ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); opal_list_remove_item(updates, &node->super); OBJ_RELEASE(node); return ORTE_ERROR; } break; default: hostfile_parse_error(token); opal_list_remove_item(updates, &node->super); OBJ_RELEASE(node); return ORTE_ERROR; } if (number_of_slots > node->slots) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); opal_list_remove_item(updates, &node->super); OBJ_RELEASE(node); return ORTE_ERROR; } } done: if (got_max && !node->slots_given) { node->slots = node->slots_max; node->slots_given = true; } return ORTE_SUCCESS; }
int orte_util_add_hostfile_nodes(opal_list_t *nodes, char *hostfile) { opal_list_t exclude, adds; opal_list_item_t *item, *itm; int rc; orte_node_t *nd, *node; bool found; OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, "%s hostfile: checking hostfile %s for nodes", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile)); OBJ_CONSTRUCT(&exclude, opal_list_t); OBJ_CONSTRUCT(&adds, opal_list_t); /* parse the hostfile and add any new contents to the list */ if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, &adds, &exclude, false))) { goto cleanup; } /* check for any relative node directives */ for (item = opal_list_get_first(&adds); item != opal_list_get_end(&adds); item = opal_list_get_next(item)) { node=(orte_node_t*)item; if ('+' == node->name[0]) { orte_show_help("help-hostfile.txt", "hostfile:relative-syntax", true, node->name); rc = ORTE_ERR_SILENT; goto cleanup; } } /* remove from the list of nodes those that are in the exclude list */ while (NULL != (item = opal_list_remove_first(&exclude))) { nd = (orte_node_t*)item; /* check for matches on nodes */ for (itm = opal_list_get_first(&adds); itm != opal_list_get_end(&adds); itm = opal_list_get_next(itm)) { node = (orte_node_t*)itm; if (0 == strcmp(nd->name, node->name)) { /* match - remove it */ opal_list_remove_item(&adds, itm); OBJ_RELEASE(itm); break; } } OBJ_RELEASE(item); } /* transfer across all unique nodes */ while (NULL != (item = opal_list_remove_first(&adds))) { nd = (orte_node_t*)item; found = false; for (itm = opal_list_get_first(nodes); itm != opal_list_get_end(nodes); itm = opal_list_get_next(itm)) { node = (orte_node_t*)itm; if (0 == strcmp(nd->name, node->name)) { found = true; break; } } if (!found) { opal_list_append(nodes, &nd->super); OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, "%s hostfile: adding node %s slots %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nd->name, nd->slots)); } else { OBJ_RELEASE(item); } } cleanup: OPAL_LIST_DESTRUCT(&exclude); OPAL_LIST_DESTRUCT(&adds); return rc; }
static void process_reads(int fd, short args, void *cbdata) { orte_dfs_request_t *read_dfs = (orte_dfs_request_t*)cbdata; orte_dfs_tracker_t *tptr, *trk; opal_list_item_t *item; opal_buffer_t *buffer; int64_t i64; int rc; /* look in our local records for this fd */ trk = NULL; for (item = opal_list_get_first(&active_files); item != opal_list_get_end(&active_files); item = opal_list_get_next(item)) { tptr = (orte_dfs_tracker_t*)item; if (tptr->local_fd == read_dfs->local_fd) { trk = tptr; break; } } if (NULL == trk) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OBJ_RELEASE(read_dfs); return; } /* add this request to our pending list */ read_dfs->id = req_id++; opal_list_append(&requests, &read_dfs->super); /* setup a message for the daemon telling * them what file to read */ buffer = OBJ_NEW(opal_buffer_t); if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->cmd, 1, ORTE_DFS_CMD_T))) { ORTE_ERROR_LOG(rc); goto complete; } /* include the request id */ if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &read_dfs->id, 1, OPAL_UINT64))) { ORTE_ERROR_LOG(rc); goto complete; } if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { ORTE_ERROR_LOG(rc); goto complete; } i64 = (int64_t)read_dfs->read_length; if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &i64, 1, OPAL_INT64))) { ORTE_ERROR_LOG(rc); goto complete; } opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s sending read file request to %s for fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&trk->host_daemon), trk->local_fd); /* send it */ if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer, ORTE_RML_TAG_DFS_CMD, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buffer); } /* don't release the request */ return; complete: /* don't need to hang on to this request */ opal_list_remove_item(&requests, &read_dfs->super); OBJ_RELEASE(read_dfs); }
static void process_sizes(int fd, short args, void *cbdata) { orte_dfs_request_t *size_dfs = (orte_dfs_request_t*)cbdata; orte_dfs_tracker_t *tptr, *trk; opal_list_item_t *item; opal_buffer_t *buffer; int rc; opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s processing get_size on fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), size_dfs->local_fd); /* look in our local records for this fd */ trk = NULL; for (item = opal_list_get_first(&active_files); item != opal_list_get_end(&active_files); item = opal_list_get_next(item)) { tptr = (orte_dfs_tracker_t*)item; if (tptr->local_fd == size_dfs->local_fd) { trk = tptr; break; } } if (NULL == trk) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); OBJ_RELEASE(size_dfs); return; } /* add this request to our local list so we can * match it with the returned response when it comes */ size_dfs->id = req_id++; opal_list_append(&requests, &size_dfs->super); /* setup a message for the daemon telling * them what file we want to access */ buffer = OBJ_NEW(opal_buffer_t); if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->cmd, 1, ORTE_DFS_CMD_T))) { ORTE_ERROR_LOG(rc); opal_list_remove_item(&requests, &size_dfs->super); goto complete; } /* pass the request id */ if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &size_dfs->id, 1, OPAL_UINT64))) { ORTE_ERROR_LOG(rc); opal_list_remove_item(&requests, &size_dfs->super); goto complete; } if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { ORTE_ERROR_LOG(rc); opal_list_remove_item(&requests, &size_dfs->super); goto complete; } opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s sending get_size request to %s for fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&trk->host_daemon), trk->local_fd); /* send it */ if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer, ORTE_RML_TAG_DFS_CMD, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buffer); opal_list_remove_item(&requests, &size_dfs->super); if (NULL != size_dfs->size_cbfunc) { size_dfs->size_cbfunc(-1, size_dfs->cbdata); } goto complete; } /* leave the request there */ return; complete: OBJ_RELEASE(size_dfs); }
static void process_close(int fd, short args, void *cbdata) { orte_dfs_request_t *close_dfs = (orte_dfs_request_t*)cbdata; orte_dfs_tracker_t *tptr, *trk; opal_list_item_t *item; opal_buffer_t *buffer; int rc; opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s closing fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), close_dfs->local_fd); /* look in our local records for this fd */ trk = NULL; for (item = opal_list_get_first(&active_files); item != opal_list_get_end(&active_files); item = opal_list_get_next(item)) { tptr = (orte_dfs_tracker_t*)item; if (tptr->local_fd == close_dfs->local_fd) { trk = tptr; break; } } if (NULL == trk) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); if (NULL != close_dfs->close_cbfunc) { close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); } OBJ_RELEASE(close_dfs); return; } /* setup a message for the daemon telling * them what file to close */ buffer = OBJ_NEW(opal_buffer_t); if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &close_dfs->cmd, 1, ORTE_DFS_CMD_T))) { ORTE_ERROR_LOG(rc); goto complete; } if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &trk->remote_fd, 1, OPAL_INT))) { ORTE_ERROR_LOG(rc); goto complete; } opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s sending close file request to %s for fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&trk->host_daemon), trk->local_fd); /* send it */ if (0 > (rc = orte_rml.send_buffer_nb(&trk->host_daemon, buffer, ORTE_RML_TAG_DFS_CMD, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buffer); goto complete; } complete: opal_list_remove_item(&active_files, &trk->super); OBJ_RELEASE(trk); if (NULL != close_dfs->close_cbfunc) { close_dfs->close_cbfunc(close_dfs->local_fd, close_dfs->cbdata); } OBJ_RELEASE(close_dfs); }
static void process_opens(int fd, short args, void *cbdata) { orte_dfs_request_t *dfs = (orte_dfs_request_t*)cbdata; int rc; opal_buffer_t *buffer; char *scheme, *host, *filename, *hostname; orte_process_name_t daemon; bool found; orte_vpid_t v; opal_list_t myvals; opal_value_t *kv; opal_output(0, "%s PROCESSING OPEN", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); /* get the scheme to determine if we can process locally or not */ if (NULL == (scheme = opal_uri_get_scheme(dfs->uri))) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); goto complete; } opal_output(0, "%s GOT SCHEME", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); if (0 != strcmp(scheme, "file")) { /* not yet supported */ orte_show_help("orte_dfs_help.txt", "unsupported-filesystem", true, dfs->uri); goto complete; } /* dissect the uri to extract host and filename/path */ if (NULL == (filename = opal_filename_from_uri(dfs->uri, &host))) { goto complete; } opal_output(0, "%s GOT FILENAME %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), filename); if (NULL == host) { host = strdup(orte_process_info.nodename); } /* ident the daemon on that host */ daemon.jobid = ORTE_PROC_MY_DAEMON->jobid; found = false; for (v=0; v < orte_process_info.num_daemons; v++) { daemon.vpid = v; /* fetch the hostname where this daemon is located */ OBJ_CONSTRUCT(&myvals, opal_list_t); if (ORTE_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal, &daemon, OPAL_DSTORE_HOSTNAME, &myvals))) { ORTE_ERROR_LOG(rc); OPAL_LIST_DESTRUCT(&myvals); goto complete; } kv = (opal_value_t*)opal_list_get_first(&myvals); hostname = strdup(kv->data.string); OPAL_LIST_DESTRUCT(&myvals); opal_output(0, "%s GOT HOST %s HOSTNAME %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), host, hostname); if (0 == strcmp(host, hostname)) { found = true; break; } } if (!found) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); goto complete; } opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s file %s on host %s daemon %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), filename, host, ORTE_NAME_PRINT(&daemon)); /* add this request to our local list so we can * match it with the returned response when it comes */ dfs->id = req_id++; opal_list_append(&requests, &dfs->super); /* setup a message for the daemon telling * them what file we want to access */ buffer = OBJ_NEW(opal_buffer_t); if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->cmd, 1, ORTE_DFS_CMD_T))) { ORTE_ERROR_LOG(rc); opal_list_remove_item(&requests, &dfs->super); goto complete; } /* pass the request id */ if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &dfs->id, 1, OPAL_UINT64))) { ORTE_ERROR_LOG(rc); opal_list_remove_item(&requests, &dfs->super); goto complete; } if (OPAL_SUCCESS != (rc = opal_dss.pack(buffer, &filename, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); opal_list_remove_item(&requests, &dfs->super); goto complete; } opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s sending open file request to %s file %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&daemon), filename); /* send it */ if (0 > (rc = orte_rml.send_buffer_nb(&daemon, buffer, ORTE_RML_TAG_DFS_CMD, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buffer); opal_list_remove_item(&requests, &dfs->super); goto complete; } /* don't release it */ return; complete: /* we get here if an error occurred - execute any * pending callback so the proc doesn't hang */ if (NULL != dfs->open_cbfunc) { dfs->open_cbfunc(-1, dfs->cbdata); } OBJ_RELEASE(dfs); }
/* receives take place in an event, so we are free to process * the request list without fear of getting things out-of-order */ static void recv_dfs(int status, orte_process_name_t* sender, opal_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata) { orte_dfs_cmd_t cmd; int32_t cnt; orte_dfs_request_t *dfs, *dptr; opal_list_item_t *item; int remote_fd, rc; int64_t i64; uint64_t rid; orte_dfs_tracker_t *trk; /* unpack the command this message is responding to */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &cmd, &cnt, ORTE_DFS_CMD_T))) { ORTE_ERROR_LOG(rc); return; } opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s recvd cmd %d from sender %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)cmd, ORTE_NAME_PRINT(sender)); switch (cmd) { case ORTE_DFS_OPEN_CMD: /* unpack the request id */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { ORTE_ERROR_LOG(rc); return; } /* unpack the remote fd */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &remote_fd, &cnt, OPAL_INT))) { ORTE_ERROR_LOG(rc); return; } /* search our list of requests to find the matching one */ dfs = NULL; for (item = opal_list_get_first(&requests); item != opal_list_get_end(&requests); item = opal_list_get_next(item)) { dptr = (orte_dfs_request_t*)item; if (dptr->id == rid) { /* as the request has been fulfilled, remove it */ opal_list_remove_item(&requests, item); dfs = dptr; break; } } if (NULL == dfs) { opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s recvd open file - no corresponding request found for local fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return; } /* if the remote_fd < 0, then we had an error, so return * the error value to the caller */ if (remote_fd < 0) { opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s recvd open file response error file %s [error: %d]", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), dfs->uri, remote_fd); if (NULL != dfs->open_cbfunc) { dfs->open_cbfunc(remote_fd, dfs->cbdata); } /* release the request */ OBJ_RELEASE(dfs); return; } /* otherwise, create a tracker for this file */ trk = OBJ_NEW(orte_dfs_tracker_t); trk->requestor.jobid = ORTE_PROC_MY_NAME->jobid; trk->requestor.vpid = ORTE_PROC_MY_NAME->vpid; trk->host_daemon.jobid = sender->jobid; trk->host_daemon.vpid = sender->vpid; trk->filename = strdup(dfs->uri); /* define the local fd */ trk->local_fd = local_fd++; /* record the remote file descriptor */ trk->remote_fd = remote_fd; /* add it to our list of active files */ opal_list_append(&active_files, &trk->super); /* return the local_fd to the caller for * subsequent operations */ opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s recvd open file completed for file %s [local fd: %d remote fd: %d]", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), dfs->uri, trk->local_fd, remote_fd); if (NULL != dfs->open_cbfunc) { dfs->open_cbfunc(trk->local_fd, dfs->cbdata); } /* release the request */ OBJ_RELEASE(dfs); break; case ORTE_DFS_SIZE_CMD: /* unpack the request id for this request */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { ORTE_ERROR_LOG(rc); return; } /* search our list of requests to find the matching one */ dfs = NULL; for (item = opal_list_get_first(&requests); item != opal_list_get_end(&requests); item = opal_list_get_next(item)) { dptr = (orte_dfs_request_t*)item; if (dptr->id == rid) { /* request was fulfilled, so remove it */ opal_list_remove_item(&requests, item); dfs = dptr; break; } } if (NULL == dfs) { opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s recvd size - no corresponding request found for local fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return; } /* get the size */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(dfs); return; } /* pass it back to the original caller */ if (NULL != dfs->size_cbfunc) { dfs->size_cbfunc(i64, dfs->cbdata); } /* release the request */ OBJ_RELEASE(dfs); break; case ORTE_DFS_SEEK_CMD: /* unpack the request id for this read */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { ORTE_ERROR_LOG(rc); return; } /* search our list of requests to find the matching one */ dfs = NULL; for (item = opal_list_get_first(&requests); item != opal_list_get_end(&requests); item = opal_list_get_next(item)) { dptr = (orte_dfs_request_t*)item; if (dptr->id == rid) { /* request was fulfilled, so remove it */ opal_list_remove_item(&requests, item); dfs = dptr; break; } } if (NULL == dfs) { opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s recvd seek - no corresponding request found for local fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return; } /* get the returned offset/status */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(dfs); return; } /* pass it back to the original caller */ if (NULL != dfs->seek_cbfunc) { dfs->seek_cbfunc(i64, dfs->cbdata); } /* release the request */ OBJ_RELEASE(dfs); break; case ORTE_DFS_READ_CMD: /* unpack the request id for this read */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { ORTE_ERROR_LOG(rc); return; } /* search our list of requests to find the matching one */ dfs = NULL; for (item = opal_list_get_first(&requests); item != opal_list_get_end(&requests); item = opal_list_get_next(item)) { dptr = (orte_dfs_request_t*)item; if (dptr->id == rid) { /* request was fulfilled, so remove it */ opal_list_remove_item(&requests, item); dfs = dptr; break; } } if (NULL == dfs) { opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s recvd read - no corresponding request found for local fd %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), local_fd); ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return; } /* get the bytes read */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &i64, &cnt, OPAL_INT64))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(dfs); return; } if (0 < i64) { cnt = i64; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, dfs->read_buffer, &cnt, OPAL_UINT8))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(dfs); return; } } /* pass them back to the original caller */ if (NULL != dfs->read_cbfunc) { dfs->read_cbfunc(i64, dfs->read_buffer, dfs->cbdata); } /* release the request */ OBJ_RELEASE(dfs); break; case ORTE_DFS_POST_CMD: /* unpack the request id for this read */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { ORTE_ERROR_LOG(rc); return; } /* search our list of requests to find the matching one */ dfs = NULL; for (item = opal_list_get_first(&requests); item != opal_list_get_end(&requests); item = opal_list_get_next(item)) { dptr = (orte_dfs_request_t*)item; if (dptr->id == rid) { /* request was fulfilled, so remove it */ opal_list_remove_item(&requests, item); dfs = dptr; break; } } if (NULL == dfs) { opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s recvd post - no corresponding request found", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return; } if (NULL != dfs->post_cbfunc) { dfs->post_cbfunc(dfs->cbdata); } OBJ_RELEASE(dfs); break; case ORTE_DFS_GETFM_CMD: /* unpack the request id for this read */ cnt = 1; if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &rid, &cnt, OPAL_UINT64))) { ORTE_ERROR_LOG(rc); return; } /* search our list of requests to find the matching one */ dfs = NULL; for (item = opal_list_get_first(&requests); item != opal_list_get_end(&requests); item = opal_list_get_next(item)) { dptr = (orte_dfs_request_t*)item; if (dptr->id == rid) { /* request was fulfilled, so remove it */ opal_list_remove_item(&requests, item); dfs = dptr; break; } } if (NULL == dfs) { opal_output_verbose(1, orte_dfs_base_framework.framework_output, "%s recvd getfm - no corresponding request found", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return; } /* return it to caller */ if (NULL != dfs->fm_cbfunc) { dfs->fm_cbfunc(buffer, dfs->cbdata); } OBJ_RELEASE(dfs); break; default: opal_output(0, "TEST:DFS:RECV WTF"); break; } }