static int cray_get(const opal_process_name_t *id, const char *key, opal_value_t **kv) { int rc; opal_list_t vals; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:cray getting value for proc %s key %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(*id), key); OBJ_CONSTRUCT(&vals, opal_list_t); rc = opal_pmix_base_fetch(id, key, &vals); if (OPAL_SUCCESS == rc) { *kv = (opal_value_t*)opal_list_remove_first(&vals); return OPAL_SUCCESS; } else { opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:cray fetch from dstore failed: %d", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), rc); } OPAL_LIST_DESTRUCT(&vals); return rc; }
int opal_pmix_base_cache_keys_locally(const opal_process_name_t* id, const char* key, opal_value_t **out_kv, char* kvs_name, int vallen, kvs_get_fn fn) { char *tmp, *tmp2, *tmp3, *tmp_val; opal_data_type_t stored_type; size_t len, offset; int rc, size; opal_value_t *kv, *knew; opal_list_t values; /* set the default */ *out_kv = NULL; /* first try to fetch data from data storage */ OBJ_CONSTRUCT(&values, opal_list_t); rc = opal_pmix_base_fetch(id, key, &values); if (OPAL_SUCCESS == rc) { kv = (opal_value_t*)opal_list_get_first(&values); /* create the copy */ if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&knew, kv, OPAL_VALUE))) { OPAL_ERROR_LOG(rc); } else { *out_kv = knew; } OPAL_LIST_DESTRUCT(&values); return rc; } OPAL_LIST_DESTRUCT(&values); OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output, "pmix: get all keys for proc %s in KVS %s", OPAL_NAME_PRINT(*id), kvs_name)); rc = opal_pmix_base_get_packed(id, &tmp_val, &len, vallen, fn); if (OPAL_SUCCESS != rc) { return rc; } /* search for each key in the decoded data */ for (offset = 0 ; offset < len ; ) { /* type */ tmp = tmp_val + offset + strlen (tmp_val + offset) + 1; /* size */ tmp2 = tmp + strlen (tmp) + 1; /* data */ tmp3 = tmp2 + strlen (tmp2) + 1; stored_type = (opal_data_type_t) strtol (tmp, NULL, 16); size = strtol (tmp2, NULL, 16); /* cache value locally so we don't have to look it up via pmi again */ kv = OBJ_NEW(opal_value_t); kv->key = strdup(tmp_val + offset); kv->type = stored_type; switch (stored_type) { case OPAL_BYTE: kv->data.byte = *tmp3; break; case OPAL_STRING: kv->data.string = strdup(tmp3); break; case OPAL_PID: kv->data.pid = strtoul(tmp3, NULL, 10); break; case OPAL_INT: kv->data.integer = strtol(tmp3, NULL, 10); break; case OPAL_INT8: kv->data.int8 = strtol(tmp3, NULL, 10); break; case OPAL_INT16: kv->data.int16 = strtol(tmp3, NULL, 10); break; case OPAL_INT32: kv->data.int32 = strtol(tmp3, NULL, 10); break; case OPAL_INT64: kv->data.int64 = strtol(tmp3, NULL, 10); break; case OPAL_UINT: kv->data.uint = strtoul(tmp3, NULL, 10); break; case OPAL_UINT8: kv->data.uint8 = strtoul(tmp3, NULL, 10); break; case OPAL_UINT16: kv->data.uint16 = strtoul(tmp3, NULL, 10); break; case OPAL_UINT32: kv->data.uint32 = strtoul(tmp3, NULL, 10); break; case OPAL_UINT64: kv->data.uint64 = strtoull(tmp3, NULL, 10); break; case OPAL_BYTE_OBJECT: if (size == 0xffff) { kv->data.bo.bytes = NULL; kv->data.bo.size = 0; size = 0; } else { kv->data.bo.bytes = malloc(size); memcpy(kv->data.bo.bytes, tmp3, size); kv->data.bo.size = size; } break; default: opal_output(0, "UNSUPPORTED TYPE %d", stored_type); return OPAL_ERROR; } /* store data in local hash table */ if (OPAL_SUCCESS != (rc = opal_pmix_base_store(id, kv))) { OPAL_ERROR_LOG(rc); } /* keep going and cache everything locally */ offset = (size_t) (tmp3 - tmp_val) + size; if (0 == strcmp(kv->key, key)) { /* create the copy */ if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&knew, kv, OPAL_VALUE))) { OPAL_ERROR_LOG(rc); } else { *out_kv = knew; } } } free (tmp_val); /* if there was no issue with unpacking the message, but * we didn't find the requested info, then indicate that * the info wasn't found */ if (OPAL_SUCCESS == rc && NULL == *out_kv) { return OPAL_ERR_NOT_FOUND; } return rc; }
static int cray_fence(opal_list_t *procs, int collect_data) { int rc, cnt; int32_t i; int *all_lens = NULL; opal_value_t *kp, kvn; opal_buffer_t *send_buffer = NULL; opal_buffer_t *buf = NULL; void *sbuf_ptr; char *cptr, *rcv_buff = NULL; opal_process_name_t id; typedef struct { uint32_t pmix_rank; opal_process_name_t name; int32_t nbytes; } bytes_and_rank_t; int32_t rcv_nbytes_tot; bytes_and_rank_t s_bytes_and_rank; bytes_and_rank_t *r_bytes_and_ranks = NULL; opal_hwloc_locality_t locality; opal_list_t vals; char *cpuset = NULL; opal_process_name_t pname; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:cray executing fence cache_global %p cache_local %p", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (void *)mca_pmix_cray_component.cache_global, (void *)mca_pmix_cray_component.cache_local); /* get the modex data from each local process and set the * localities to avoid having the MPI layer fetch data * for every process in the job */ pname.jobid = OPAL_PROC_MY_NAME.jobid; /* * "unload" the cache_local/cache_global buffers, first copy * it so we can continue to use the local buffers if further * calls to put can be made */ send_buffer = OBJ_NEW(opal_buffer_t); if (NULL == send_buffer) { return OPAL_ERR_OUT_OF_RESOURCE; } opal_dss.copy_payload(send_buffer, mca_pmix_cray_component.cache_global); opal_dss.unload(send_buffer, &sbuf_ptr, &s_bytes_and_rank.nbytes); s_bytes_and_rank.pmix_rank = pmix_rank; s_bytes_and_rank.name = OPAL_PROC_MY_NAME; r_bytes_and_ranks = (bytes_and_rank_t *)malloc(pmix_size * sizeof(bytes_and_rank_t)); if (NULL == r_bytes_and_ranks) { rc = OPAL_ERR_OUT_OF_RESOURCE; goto fn_exit; } /* * gather up all the buffer sizes and rank order. * doing this step below since the cray pmi PMI_Allgather doesn't deliver * the gathered data necessarily in PMI rank order, although the order stays * the same for the duration of a job - assuming no node failures. */ if (PMI_SUCCESS != (rc = PMI_Allgather(&s_bytes_and_rank,r_bytes_and_ranks,sizeof(bytes_and_rank_t)))) { OPAL_PMI_ERROR(rc,"PMI_Allgather"); rc = OPAL_ERR_COMM_FAILURE; goto fn_exit; } for (rcv_nbytes_tot=0,i=0; i < pmix_size; i++) { rcv_nbytes_tot += r_bytes_and_ranks[i].nbytes; } opal_output_verbose(20, opal_pmix_base_framework.framework_output, "%s pmix:cray total number of bytes to receive %d", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), rcv_nbytes_tot); rcv_buff = (char *) malloc(rcv_nbytes_tot * sizeof(char)); if (NULL == rcv_buff) { rc = OPAL_ERR_OUT_OF_RESOURCE; goto fn_exit; } all_lens = (int *)malloc(sizeof(int) * pmix_size); if (NULL == all_lens) { rc = OPAL_ERR_OUT_OF_RESOURCE; goto fn_exit; } for (i=0; i< pmix_size; i++) { all_lens[r_bytes_and_ranks[i].pmix_rank] = r_bytes_and_ranks[i].nbytes; } if (PMI_SUCCESS != (rc = PMI_Allgatherv(sbuf_ptr,s_bytes_and_rank.nbytes,rcv_buff,all_lens))) { OPAL_PMI_ERROR(rc,"PMI_Allgatherv"); rc = OPAL_ERR_COMM_FAILURE; goto fn_exit; } OBJ_RELEASE(send_buffer); send_buffer = NULL; buf = OBJ_NEW(opal_buffer_t); if (buf == NULL) { rc = OPAL_ERR_OUT_OF_RESOURCE; goto fn_exit; } for (cptr = rcv_buff, i=0; i < pmix_size; i++) { id = r_bytes_and_ranks[i].name; buf->base_ptr = NULL; /* TODO: ugh */ if (OPAL_SUCCESS != (rc = opal_dss.load(buf, (void *)cptr, r_bytes_and_ranks[i].nbytes))) { OPAL_PMI_ERROR(rc,"pmix:cray opal_dss.load failed"); goto fn_exit; } /* unpack and stuff in to the dstore */ cnt = 1; while (OPAL_SUCCESS == (rc = opal_dss.unpack(buf, &kp, &cnt, OPAL_VALUE))) { opal_output_verbose(20, opal_pmix_base_framework.framework_output, "%s pmix:cray unpacked kp with key %s type(%d) for id %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kp->key, kp->type, OPAL_NAME_PRINT(id)); if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&id, kp))) { OPAL_ERROR_LOG(rc); goto fn_exit; } OBJ_RELEASE(kp); cnt = 1; } cptr += r_bytes_and_ranks[i].nbytes; } buf->base_ptr = NULL; /* TODO: ugh */ OBJ_RELEASE(buf); opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:cray kvs_fence complete", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); #if OPAL_HAVE_HWLOC /* fetch my cpuset */ OBJ_CONSTRUCT(&vals, opal_list_t); if (OPAL_SUCCESS == (rc = opal_pmix_base_fetch(&pmix_pname, OPAL_PMIX_CPUSET, &vals))) { kp = (opal_value_t*)opal_list_get_first(&vals); cpuset = strdup(kp->data.string); } else { cpuset = NULL; } OPAL_LIST_DESTRUCT(&vals); #endif /* we only need to set locality for each local rank as "not found" * equates to "non-local" */ for (i=0; i < pmix_nlranks; i++) { id.vpid = pmix_lranks[i]; id.jobid = pmix_jobid; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s checking out if %s is local to me", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(id)); /* fetch cpuset for this vpid */ #if OPAL_HAVE_HWLOC OBJ_CONSTRUCT(&vals, opal_list_t); if (OPAL_SUCCESS != (rc = opal_pmix_base_fetch(&id, OPAL_PMIX_CPUSET, &vals))) { opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s cpuset for local proc %s not found", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(id)); OPAL_LIST_DESTRUCT(&vals); /* even though the cpuset wasn't found, we at least know it is * on the same node with us */ locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; } else { kp = (opal_value_t*)opal_list_get_first(&vals); if (NULL == kp->data.string) { /* if we share a node, but we don't know anything more, then * mark us as on the node as this is all we know */ locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; } else { /* determine relative location on our node */ locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, cpuset, kp->data.string); } OPAL_LIST_DESTRUCT(&vals); } #else /* all we know is we share a node */ locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; #endif OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output, "%s pmix:cray proc %s locality %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(id), opal_hwloc_base_print_locality(locality))); OBJ_CONSTRUCT(&kvn, opal_value_t); kvn.key = strdup(OPAL_PMIX_LOCALITY); kvn.type = OPAL_UINT16; kvn.data.uint16 = locality; opal_pmix_base_store(&pname, &kvn); OBJ_DESTRUCT(&kvn); } fn_exit: #if OPAL_HAVE_HWLOC if (NULL != cpuset) { free(cpuset); } #endif if (all_lens != NULL) { free(all_lens); } if (rcv_buff != NULL) { free(rcv_buff); } if (r_bytes_and_ranks != NULL) { free(r_bytes_and_ranks); } return rc; }