/* gasneti_bootstrapBarrier */ void gasneti_bootstrapBarrier_pmi(void) { #if USE_PMI2_API #if GASNETI_PMI2_FENCE_IS_BARRIER PMI2_KVS_Fence(); #else static unsigned counter; char v[16]; int i; snprintf(kvs_key, max_key_len, "B%u-%u", counter, (unsigned)gasneti_mynode); snprintf(v, sizeof(v), "%u", counter); do_kvs_put(v, sizeof(v)); do_kvs_fence(); for (i = 0; i < gasneti_nodes; ++i) { if (i == gasneti_mynode) continue; snprintf(kvs_key, max_key_len, "B%u-%u", counter, (unsigned)i); do_kvs_get(v, sizeof(v)); if (atoi(v) != counter) gasneti_fatalerror("barrier failed: exp %u got %s\n", counter, v); } counter++; #endif #else PMI_Barrier(); #endif }
static int pmi_barrier(void) { int rc; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output, "%s grpcomm:pmi entering barrier", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* if I am alone, just return */ if (1 == orte_process_info.num_procs) { OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output, "%s grpcomm:pmi:barrier only one proc", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); return ORTE_SUCCESS; } /* use the PMI barrier function */ if (PMI_SUCCESS != (rc = PMI_Barrier())) { ORTE_PMI_ERROR(rc, "PMI_Barrier"); return ORTE_ERROR; } OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output, "%s grpcomm:pmi barrier complete", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); return ORTE_SUCCESS; }
/*** MODEX SECTION ***/ static int modex(orte_grpcomm_collective_t *coll) { char *rml_uri; orte_vpid_t v; orte_process_name_t name; int rc; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base.output, "%s grpcomm:pmi: modex entered", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* commit our modex info */ #if WANT_CRAY_PMI2_EXT PMI2_KVS_Fence(); #else { int rc; if (PMI_SUCCESS != (rc = PMI_KVS_Commit(pmi_kvs_name))) { ORTE_PMI_ERROR(rc, "PMI_KVS_Commit"); return ORTE_ERR_FATAL; } /* Barrier here to ensure all other procs have committed */ PMI_Barrier(); } #endif /* cycle thru all my peers and collect their contact info in * case I need to send an RML message to them */ name.jobid = ORTE_PROC_MY_NAME->jobid; for (v=0; v < orte_process_info.num_procs; v++) { if (v == ORTE_PROC_MY_NAME->vpid) { continue; } name.vpid = v; if (ORTE_SUCCESS != (rc = orte_db.fetch(&name, ORTE_DB_RMLURI, (void **)&rml_uri, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output, "%s grpcomm:pmi: proc %s oob endpoint %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&name), rml_uri)); /* set the contact info into the hash table */ if (ORTE_SUCCESS != (rc = orte_rml.set_contact_info(rml_uri))) { free(rml_uri); return rc; } free(rml_uri); } /* execute the callback */ coll->active = false; if (NULL != coll->cbfunc) { coll->cbfunc(NULL, coll->cbdata); } return ORTE_SUCCESS; }
void do_kvs_fence(void) { #if USE_PMI2_API PMI2_KVS_Fence(); #else PMI_KVS_Commit(kvs_name); PMI_Barrier(); #endif }
static int kvs_commit(void) { #if WANT_CRAY_PMI2_EXT return PMI2_KVS_Fence(); #else int rc; if (PMI_SUCCESS != (rc = PMI_KVS_Commit(pmi_kvs_name))) { return rc; } /* Barrier here to ensure all other procs have committed */ return PMI_Barrier(); #endif }
/* this function is not used in pmi2 */ static int publish_node_id(MPIDI_PG_t *pg, int our_pg_rank) { int mpi_errno = MPI_SUCCESS; int pmi_errno; int ret; char *key; int key_max_sz; char *kvs_name; MPIU_CHKLMEM_DECL(1); /* set MPIU_hostname */ ret = gethostname(MPIU_hostname, MAX_HOSTNAME_LEN); MPIR_ERR_CHKANDJUMP2(ret == -1, mpi_errno, MPI_ERR_OTHER, "**sock_gethost", "**sock_gethost %s %d", MPIU_Strerror(errno), errno); MPIU_hostname[MAX_HOSTNAME_LEN-1] = '\0'; /* Allocate space for pmi key */ pmi_errno = PMI_KVS_Get_key_length_max(&key_max_sz); MPIR_ERR_CHKANDJUMP1(pmi_errno, mpi_errno, MPI_ERR_OTHER, "**fail", "**fail %d", pmi_errno); MPIU_CHKLMEM_MALLOC(key, char *, key_max_sz, mpi_errno, "key"); mpi_errno = MPIDI_PG_GetConnKVSname(&kvs_name); if (mpi_errno) MPIR_ERR_POP(mpi_errno); /* Put my hostname id */ if (pg->size > 1) { memset(key, 0, key_max_sz); MPL_snprintf(key, key_max_sz, "hostname[%d]", our_pg_rank); pmi_errno = PMI_KVS_Put(kvs_name, key, MPIU_hostname); MPIR_ERR_CHKANDJUMP1(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_put", "**pmi_kvs_put %d", pmi_errno); pmi_errno = PMI_KVS_Commit(kvs_name); MPIR_ERR_CHKANDJUMP1(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_commit", "**pmi_kvs_commit %d", pmi_errno); pmi_errno = PMI_Barrier(); MPIR_ERR_CHKANDJUMP1(pmi_errno != PMI_SUCCESS, mpi_errno, MPI_ERR_OTHER, "**pmi_barrier", "**pmi_barrier %d", pmi_errno); } fn_exit: MPIU_CHKLMEM_FREEALL(); return mpi_errno; fn_fail: goto fn_exit; }
static int pmi_barrier(orte_grpcomm_collective_t *coll) { int rc; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, "%s grpcomm:pmi entering barrier", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* if I am alone, just execute the callback */ if (1 == orte_process_info.num_procs) { OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, "%s grpcomm:pmi:barrier only one proc", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); coll->active = false; if (NULL != coll->cbfunc) { coll->cbfunc(NULL, coll->cbdata); } return ORTE_SUCCESS; } #if WANT_PMI2_SUPPORT /* PMI2 doesn't provide a barrier, so use the Fence function here */ if (PMI_SUCCESS != (rc = PMI2_KVS_Fence())) { OPAL_PMI_ERROR(rc, "PMI2_KVS_Fence"); return ORTE_ERROR; } #else /* use the PMI barrier function */ if (PMI_SUCCESS != (rc = PMI_Barrier())) { OPAL_PMI_ERROR(rc, "PMI_Barrier"); return ORTE_ERROR; } #endif OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output, "%s grpcomm:pmi barrier complete", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* execute the callback */ coll->active = false; if (NULL != coll->cbfunc) { coll->cbfunc(NULL, coll->cbdata); } return ORTE_SUCCESS; }
static int test_item7(void) { int rc = 0; char tkey[100]; char tval[100]; char val[100]; int i = 0; for (i = 0; i < size; i++) { sprintf(tkey, "KEY-%d", i); sprintf(tval, "VALUE-%d", i); if (i == rank) { if (PMI_SUCCESS != (rc = PMI_KVS_Put(jobid, tkey, tval))) { log_fatal("PMI_KVS_Put [%s=%s] %d\n", tkey, tval, rc); return rc; } } } if (PMI_SUCCESS != (rc = PMI_KVS_Commit(jobid))) { log_fatal("PMI_KVS_Commit %d\n", rc); return rc; } if (PMI_SUCCESS != (rc = PMI_Barrier())) { log_fatal("PMI_Barrier %d\n", rc); return rc; } for (i = 0; i < size; i++) { sprintf(tkey, "KEY-%d", i); sprintf(tval, "VALUE-%d", i); if (PMI_SUCCESS != (rc = PMI_KVS_Get(jobid, tkey, val, sizeof(val)))) { log_fatal("PMI_KVS_Get [%s=?] %d\n", tkey, rc); return rc; } log_info("tkey=%s tval=%s val=%s\n", tkey, tval, val); log_assert(!strcmp(tval, val), "value does not meet expectation"); } return rc; }
int mca_common_pmi_barrier() { #if WANT_PMI2_SUPPORT if( mca_common_pmi_version == 2 ){ /* PMI2 doesn't provide a barrier, so use the Fence function here */ if (PMI2_SUCCESS != (rc = PMI2_KVS_Fence())) { // FIX ME: OPAL_PMI2_ERROR(rc, "PMI2_KVS_Fence"); return OPAL_ERROR; } } else #endif { /* use the PMI barrier function */ if (PMI_SUCCESS != (rc = PMI_Barrier())) { OPAL_PMI_ERROR(rc, "PMI_Barrier"); return OPAL_ERROR; } } return OPAL_SUCCESS; }
/*************** * calculate the name of the shared objects and semaphores * * name scheme * shared memory: charm_pxshm_<recvernoderank>_<sendernoderank> * semaphore : charm_pxshm_<recvernoderank>_<sendernoderank>.sem for semaphore for that shared object * the semaphore name used by us is the same as the shared memory object name * the posix library adds the semaphore tag // in linux at least . other machines might need more portable code * * open these shared objects and semaphores * *********/ void setupSharedBuffers(){ int i=0; allocBufNameStrings(&(pxshmContext->recvBufNames)); MACHSTATE(3,"allocBufNameStrings for recvBufNames done"); MEMDEBUG(CmiMemoryCheck()); allocBufNameStrings((&pxshmContext->sendBufNames)); MACHSTATE(3,"allocBufNameStrings for sendBufNames done"); for(i=0;i<pxshmContext->nodesize;i++){ if(i != pxshmContext->noderank){ snprintf(pxshmContext->recvBufNames[i],NAMESTRLEN-1,"%s_%d_%d",pxshmContext->prefixStr,pxshmContext->noderank+pxshmContext->nodestart,i+pxshmContext->nodestart); MACHSTATE2(3,"recvBufName %s with rank %d",pxshmContext->recvBufNames[i],i) snprintf(pxshmContext->sendBufNames[i],NAMESTRLEN-1,"%s_%d_%d",pxshmContext->prefixStr,i+pxshmContext->nodestart,pxshmContext->noderank+pxshmContext->nodestart); MACHSTATE2(3,"sendBufName %s with rank %d",pxshmContext->sendBufNames[i],i); } } createShmObjectsAndSems(&(pxshmContext->recvBufs),pxshmContext->recvBufNames); createShmObjectsAndSems(&(pxshmContext->sendBufs),pxshmContext->sendBufNames); for(i=0;i<pxshmContext->nodesize;i++){ if(i != pxshmContext->noderank){ //CmiAssert(pxshmContext->sendBufs[i].header->count == 0); pxshmContext->sendBufs[i].header->count = 0; pxshmContext->sendBufs[i].header->bytes = 0; } } #if CMK_SMP && ( CMK_CRAYXE || CMK_CRAYXC ) if (PMI_Barrier() != GNI_RC_SUCCESS) return; #else if (CmiBarrier() != 0) return; #endif freeSharedBuffers(); pxshm_freed = 1; }
static int pmi_barrier(void) { int rc; OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, "%s grpcomm:pmi entering barrier", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* if I am alone, just return */ if (1 == orte_process_info.num_procs) { OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_output, "%s grpcomm:pmi:barrier only one proc", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); return ORTE_SUCCESS; } #if WANT_CRAY_PMI2_EXT /* Cray doesn't provide a barrier, so use the Fence function here */ if (PMI_SUCCESS != (rc = PMI2_KVS_Fence())) { ORTE_PMI_ERROR(rc, "PMI2_KVS_Fence"); return ORTE_ERROR; } #else /* use the PMI barrier function */ if (PMI_SUCCESS != (rc = PMI_Barrier())) { ORTE_PMI_ERROR(rc, "PMI_Barrier"); return ORTE_ERROR; } #endif OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output, "%s grpcomm:pmi barrier complete", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); return ORTE_SUCCESS; }
int main(int argc,char **argv) { #ifdef __CRAYXE int nelems = 128; int i; int pe = -1; int npes = -1; int fail_count = 0; long *source = NULL; long *target = NULL; dmapp_return_t status; dmapp_rma_attrs_t actual_args; dmapp_jobinfo_t job; dmapp_seg_desc_t *seg = NULL; /* Initialize DMAPP resources before executing any other DMAPP calls. */ status = dmapp_init(NULL, &actual_args); if (status != DMAPP_RC_SUCCESS) { fprintf(stderr,"\n dmapp_init FAILED: %d\n", status); exit(1); } /* Allocate remotely accessible memory for source and target buffers. Only memory in the data segment or the sheap is remotely accessible. Here we allocate from the sheap. */ source = (long *)dmapp_sheap_malloc(nelems*sizeof(long)); target = (long *)dmapp_sheap_malloc(nelems*sizeof(long)); if ((source == NULL) || (target == NULL)) { fprintf(stderr,"\n dmapp_sheap_malloc FAILED\n"); exit(1); } for (i=0; i<nelems; i++) { source[i] = i; target[i] = -9L; } /* Synchronize to make sure everyone's buffers are initialized before data transfer is started. */ PMI_Barrier(); /* Retrieve information about job details, such as PE id and number of PEs. */ status = dmapp_get_jobinfo(&job); if (status != DMAPP_RC_SUCCESS) { fprintf(stderr,"\n dmapp_get_jobinfo FAILED: %d\n", status); exit(1); } pe = job.pe; npes = job.npes; /* Retrieve information about RMA attributes, such as offload_threshold and routing modes. */ status = dmapp_get_rma_attrs(&actual_args); if (status != DMAPP_RC_SUCCESS) { fprintf(stderr,"\n dmapp_get_rma_attrs FAILED: %d\n", status); exit(1); } /* Specify in which segment the remote memory region (the source) lies. In this case, it is the sheap (see above). */ seg = &(job.sheap_seg); fprintf(stderr," Hello from PE %d of %d, using seg start %p, seg size 0x%lx, offload_threshold %d\n", pe, npes, seg->addr, (unsigned long)seg->len, actual_args.offload_threshold); fprintf(stderr,"\n PE %d getting %d nelems from addr %p on PE %d to local addr %p", pe, nelems, (void *)source, npes-pe-1, (void *)source); /* Execute GET operation from remote memory region source on PE Y into local memory region target on PE X. */ status = dmapp_get(target, source, seg, npes-pe-1, nelems, DMAPP_QW); if (status != DMAPP_RC_SUCCESS) { fprintf(stderr,"\n dmapp_get FAILED: %d\n", status); exit(1); } /* Synchronize before verifying the data. */ PMI_Barrier(); /* Verify data received in target buffer. */ for (i=0; i<nelems; i++) { if (target[i] != i) { fprintf(stderr,"\n PE %d: target[%d] is %ld, should be %ld", pe, i, target[i], (long)i); fail_count++; } } if (fail_count == 0) fprintf(stderr,"\n dmapp_sample_get PASSED\n"); else fprintf(stderr,"\n dmapp_sample_get FAILED: %d wrong values\n", fail_count); /* Free buffers allocated from sheap. */ dmapp_sheap_free(target); dmapp_sheap_free(source); /* Release DMAPP resources. This is a mandatory call. */ status = dmapp_finalize(); if (status != DMAPP_RC_SUCCESS) { fprintf(stderr,"\n dmapp_finalize FAILED: %d\n", status); exit(1); } #endif return(0); }
static int InitPscomConnections(pscom_socket_t *socket) { char key[50]; unsigned long guard_pmi_key = MAGIC_PMI_KEY; int i; int mpi_errno = MPI_SUCCESS; int pg_rank = MPIDI_Process.my_pg_rank; int pg_size = MPIDI_Process.my_pg_size; char *pg_id = MPIDI_Process.pg_id_name; char *listen_socket; char **psp_port = NULL; /* Distribute my contact information */ snprintf(key, sizeof(key), "pscom%d", pg_rank); listen_socket = MPL_strdup(pscom_listen_socket_ondemand_str(socket)); PMICALL(PMI_KVS_Put(pg_id, key, listen_socket)); #define IPSCOM_VERSION "pscom_v5.0" i_version_set(pg_id, pg_rank, IPSCOM_VERSION); PMICALL(PMI_KVS_Commit(pg_id)); PMICALL(PMI_Barrier()); i_version_check(pg_id, pg_rank, IPSCOM_VERSION); init_grank_port_mapping(); /* Get portlist */ psp_port = MPL_malloc(pg_size * sizeof(*psp_port), MPL_MEM_OBJECT); assert(psp_port); for (i = 0; i < pg_size; i++) { char val[100]; unsigned long guard_pmi_value = MAGIC_PMI_VALUE; if (i != pg_rank) { snprintf(key, sizeof(key), "pscom%d", i); checked_PMI_KVS_Get(pg_id, key, val, sizeof(val)); /* simple_pmi.c has a bug.(fixed in mpich2-1.0.5) Test for the bugfix: */ assert(guard_pmi_value == MAGIC_PMI_VALUE); assert(guard_pmi_key == MAGIC_PMI_KEY); } else { /* myself: Dont use PMI_KVS_Get, because this fail in the case of no pm (SINGLETON_INIT_BUT_NO_PM) */ strcpy(val, listen_socket); } psp_port[i] = MPL_strdup(val); } /* Create all connections */ for (i = 0; i < pg_size; i++) { pscom_connection_t *con; pscom_err_t rc; const char *dest; dest = psp_port[i]; con = pscom_open_connection(socket); rc = pscom_connect_socket_str(con, dest); if (rc != PSCOM_SUCCESS) { PRINTERROR("Connecting %s to %s (rank %d to %d) failed : %s", listen_socket, dest, pg_rank, i, pscom_err_str(rc)); goto fn_fail; } grank2con_set(i, con); } pscom_stop_listen(socket); fn_exit: if (psp_port) { for (i = 0; i < pg_size; i++) { MPL_free(psp_port[i]); psp_port[i] = NULL; } MPL_free(psp_port); } MPL_free(listen_socket); return mpi_errno; /* --- */ fn_fail: mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, "InitPscomConnections", __LINE__, MPI_ERR_OTHER, "**connfailed", 0); goto fn_exit; }
static int InitPortConnections(pscom_socket_t *socket) { char key[50]; unsigned long guard_pmi_key = MAGIC_PMI_KEY; int i; int mpi_errno = MPI_SUCCESS; int pg_rank = MPIDI_Process.my_pg_rank; int pg_size = MPIDI_Process.my_pg_size; char *pg_id = MPIDI_Process.pg_id_name; char *listen_socket; char **psp_port = NULL; /* Distribute my contact information */ snprintf(key, sizeof(key), "psp%d", pg_rank); listen_socket = MPL_strdup(pscom_listen_socket_str(socket)); PMICALL(PMI_KVS_Put(pg_id, key, listen_socket)); #define INIT_VERSION "ps_v5.0" i_version_set(pg_id, pg_rank, INIT_VERSION); PMICALL(PMI_KVS_Commit(pg_id)); PMICALL(PMI_Barrier()); i_version_check(pg_id, pg_rank, INIT_VERSION); init_grank_port_mapping(); /* Get portlist */ psp_port = MPL_malloc(pg_size * sizeof(*psp_port), MPL_MEM_OBJECT); assert(psp_port); for (i = 0; i < pg_size; i++) { char val[100]; unsigned long guard_pmi_value = MAGIC_PMI_VALUE; if (i != pg_rank) { snprintf(key, sizeof(key), "psp%d", i); checked_PMI_KVS_Get(pg_id, key, val, sizeof(val)); /* simple_pmi.c has a bug.(fixed in mpich2-1.0.5) Test for the bugfix: */ assert(guard_pmi_value == MAGIC_PMI_VALUE); assert(guard_pmi_key == MAGIC_PMI_KEY); } else { /* myself: Dont use PMI_KVS_Get, because this fail in the case of no pm (SINGLETON_INIT_BUT_NO_PM) */ strcpy(val, listen_socket); } psp_port[i] = MPL_strdup(val); } /* connect ranks pg_rank..(pg_rank + pg_size/2) */ for (i = 0; i <= pg_size / 2; i++) { int dest = (pg_rank + i) % pg_size; int src = (pg_rank + pg_size - i) % pg_size; if (!i || (pg_rank / i) % 2) { /* connect, accept */ if (do_connect(socket, pg_rank, dest, psp_port[dest])) goto fn_fail; if (!i || src != dest) { do_wait(pg_rank, src); } } else { /* accept, connect */ do_wait(pg_rank, src); if (src != dest) { if (do_connect(socket, pg_rank, dest, psp_port[dest])) goto fn_fail; } } } /* Wait for all connections: (already done?) */ for (i = 0; i < pg_size; i++) { while (!grank2con_get(i)) { pscom_wait_any(); } } /* ToDo: */ pscom_stop_listen(socket); fn_exit: if (psp_port) { for (i = 0; i < pg_size; i++) { MPL_free(psp_port[i]); psp_port[i] = NULL; } MPL_free(psp_port); } MPL_free(listen_socket); return mpi_errno; /* --- */ fn_fail: mpi_errno = MPIR_Err_create_code(MPI_SUCCESS, MPIR_ERR_FATAL, "InitPortConnections", __LINE__, MPI_ERR_OTHER, "**connfailed", 0); goto fn_exit; }
static int s1_fence(opal_list_t *procs, int collect_data) { int rc; int32_t i; opal_value_t *kp, kvn; opal_hwloc_locality_t locality; opal_process_name_t s1_pname; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:s1 called fence", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); /* use the PMI barrier function */ if (PMI_SUCCESS != (rc = PMI_Barrier())) { OPAL_PMI_ERROR(rc, "PMI_Barrier"); return OPAL_ERROR; } opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:s1 barrier complete", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); /* get the modex data from each local process and set the * localities to avoid having the MPI layer fetch data * for every process in the job */ s1_pname.jobid = OPAL_PROC_MY_NAME.jobid; if (!got_modex_data) { got_modex_data = true; /* we only need to set locality for each local rank as "not found" * equates to "non-local" */ for (i=0; i < nlranks; i++) { s1_pname.vpid = lranks[i]; rc = opal_pmix_base_cache_keys_locally(&s1_pname, OPAL_PMIX_CPUSET, &kp, pmix_kvs_name, pmix_vallen_max, kvs_get); if (OPAL_SUCCESS != rc) { OPAL_ERROR_LOG(rc); return rc; } if (NULL == kp || NULL == kp->data.string) { /* if we share a node, but we don't know anything more, then * mark us as on the node as this is all we know */ locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; } else { /* determine relative location on our node */ locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, opal_process_info.cpuset, kp->data.string); } if (NULL != kp) { OBJ_RELEASE(kp); } OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output, "%s pmix:s1 proc %s locality %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(s1_pname), opal_hwloc_base_print_locality(locality))); OBJ_CONSTRUCT(&kvn, opal_value_t); kvn.key = strdup(OPAL_PMIX_LOCALITY); kvn.type = OPAL_UINT16; kvn.data.uint16 = locality; opal_pmix_base_store(&s1_pname, &kvn); OBJ_DESTRUCT(&kvn); } } return OPAL_SUCCESS; }
int MPIDI_PG_SetConnInfo( int rank, const char *connString ) { #ifdef USE_PMI2_API int mpi_errno = MPI_SUCCESS; int len; char key[PMI2_MAX_KEYLEN]; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_SetConnInfo); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_SetConnInfo); len = MPL_snprintf(key, sizeof(key), "P%d-businesscard", rank); MPIR_ERR_CHKANDJUMP1(len < 0 || len > sizeof(key), mpi_errno, MPI_ERR_OTHER, "**snprintf", "**snprintf %d", len); mpi_errno = PMI2_KVS_Put(key, connString); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = PMI2_KVS_Fence(); if (mpi_errno) MPIR_ERR_POP(mpi_errno); fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_SetConnInfo); return mpi_errno; fn_fail: goto fn_exit; #else int mpi_errno = MPI_SUCCESS; int pmi_errno; int len; char key[128]; MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPIDI_PG_SetConnInfo); MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPIDI_PG_SetConnInfo); MPIR_Assert(pg_world->connData); len = MPL_snprintf(key, sizeof(key), "P%d-businesscard", rank); if (len < 0 || len > sizeof(key)) { MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**snprintf", "**snprintf %d", len); } pmi_errno = PMI_KVS_Put(pg_world->connData, key, connString ); if (pmi_errno != PMI_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**pmi_kvs_put", "**pmi_kvs_put %d", pmi_errno); } pmi_errno = PMI_KVS_Commit(pg_world->connData); if (pmi_errno != PMI_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**pmi_kvs_commit", "**pmi_kvs_commit %d", pmi_errno); } pmi_errno = PMI_Barrier(); if (pmi_errno != PMI_SUCCESS) { MPIR_ERR_SETANDJUMP1(mpi_errno,MPI_ERR_OTHER, "**pmi_barrier", "**pmi_barrier %d", pmi_errno); } fn_exit: MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPIDI_PG_SetConnInfo); return mpi_errno; fn_fail: goto fn_exit; #endif }
int main(int argc, char *argv[]) { int initialized, rank, size; int i, max_name_len, max_key_len, max_val_len; char *name, *key, *val; if (PMI_SUCCESS != PMI_Initialized(&initialized)) { return 1; } if (0 == initialized) { if (PMI_SUCCESS != PMI_Init(&initialized)) { return 1; } } if (PMI_SUCCESS != PMI_Get_rank(&rank)) { return 1; } if (PMI_SUCCESS != PMI_Get_size(&size)) { return 1; } printf("Hello, World. I am %d of %d\n", rank, size); if (PMI_SUCCESS != PMI_KVS_Get_name_length_max(&max_name_len)) { return 1; } name = (char*) malloc(max_name_len); if (NULL == name) return 1; if (PMI_SUCCESS != PMI_KVS_Get_key_length_max(&max_key_len)) { return 1; } key = (char*) malloc(max_key_len); if (NULL == key) return 1; if (PMI_SUCCESS != PMI_KVS_Get_value_length_max(&max_val_len)) { return 1; } val = (char*) malloc(max_val_len); if (NULL == val) return 1; if (PMI_SUCCESS != PMI_KVS_Get_my_name(name, max_name_len)) { return 1; } /* put my information */ snprintf(key, max_key_len, "pmi_hello-%lu-test", (long unsigned) rank); snprintf(val, max_val_len, "%lu", (long unsigned) rank); if (PMI_SUCCESS != PMI_KVS_Put(name, key, val)) { return 1; } if (PMI_SUCCESS != PMI_KVS_Commit(name)) { return 1; } if (PMI_SUCCESS != PMI_Barrier()) { return 1; } /* verify everyone's information */ for (i = 0 ; i < size ; ++i) { snprintf(key, max_key_len, "pmi_hello-%lu-test", (long unsigned) i); if (PMI_SUCCESS != PMI_KVS_Get(name, key, val, max_val_len)) { return 1; } if (i != strtol(val, NULL, 0)) { fprintf(stderr, "%d: Error: Expected %d, got %d\n", rank, i, (int) strtol(val, NULL, 0)); return 1; } } PMI_Finalize(); return 0; }
main (int argc, char **argv) { int i, j, rc; int nprocs, procid; int clique_size, *clique_ranks = NULL; char *jobid_ptr, *nprocs_ptr, *procid_ptr; int pmi_rank, pmi_size, kvs_name_len, key_len, val_len; PMI_BOOL initialized; char *key, *val, *kvs_name; struct timeval tv1, tv2; long delta_t; char tv_str[20]; gettimeofday(&tv1, NULL); /* Get process count and our id from environment variables */ jobid_ptr = getenv("SLURM_JOB_ID"); nprocs_ptr = getenv("SLURM_NPROCS"); procid_ptr = getenv("SLURM_PROCID"); if (jobid_ptr == NULL) { printf("WARNING: PMI test not run under SLURM\n"); nprocs = 1; procid = 0; } else if ((nprocs_ptr == NULL) || (procid_ptr == NULL)) { printf("FAILURE: SLURM environment variables not set\n"); exit(1); } else { nprocs = atoi(nprocs_ptr); procid = atoi(procid_ptr); } /* Validate process count and our id */ if ((nprocs < 1) || (nprocs > 9999)) { printf("FAILURE: Invalid nprocs %s\n", nprocs_ptr); exit(1); } if ((procid < 0) || (procid > 9999)) { printf("FAILURE: Invalid procid %s\n", procid_ptr); exit(1); } /* Get process count and size from PMI and validate */ if ((rc = PMI_Init(&i)) != PMI_SUCCESS) { printf("FAILURE: PMI_Init: %d\n", rc); exit(1); } initialized = PMI_FALSE; if ((rc = PMI_Initialized(&initialized)) != PMI_SUCCESS) { printf("FAILURE: PMI_Initialized: %d\n", rc); exit(1); } if (initialized != PMI_TRUE) { printf("FAILURE: PMI_Initialized returned false\n"); exit(1); } if ((rc = PMI_Get_rank(&pmi_rank)) != PMI_SUCCESS) { printf("FAILURE: PMI_Get_rank: %d\n", rc); exit(1); } #if _DEBUG printf("PMI_Get_rank = %d\n", pmi_rank); #endif if ((rc = PMI_Get_size(&pmi_size)) != PMI_SUCCESS) { printf("FAILURE: PMI_Get_size: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_Get_size = %d\n", pmi_size); #endif if (pmi_rank != procid) { printf("FAILURE: Rank(%d) != PROCID(%d)\n", pmi_rank, procid); exit(1); } if (pmi_size != nprocs) { printf("FAILURE: Size(%d) != NPROCS(%d), task %d\n", pmi_size, nprocs, pmi_rank); exit(1); } if ((rc = PMI_Get_clique_size(&clique_size)) != PMI_SUCCESS) { printf("FAILURE: PMI_Get_clique_size: %d, task %d\n", rc, pmi_rank); exit(1); } clique_ranks = malloc(sizeof(int) * clique_size); if ((rc = PMI_Get_clique_ranks(clique_ranks, clique_size)) != PMI_SUCCESS) { printf("FAILURE: PMI_Get_clique_ranks: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG for (i=0; i<clique_size; i++) printf("PMI_Get_clique_ranks[%d]=%d\n", i, clique_ranks[i]); #endif free(clique_ranks); if ((rc = PMI_KVS_Get_name_length_max(&kvs_name_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get_name_length_max: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Get_name_length_max = %d\n", kvs_name_len); #endif kvs_name = malloc(kvs_name_len); if ((rc = PMI_KVS_Get_my_name(kvs_name, kvs_name_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get_my_name: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Get_my_name = %s\n", kvs_name); #endif if ((rc = PMI_KVS_Get_key_length_max(&key_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get_key_length_max: %d, task %d\n", rc, pmi_rank); exit(1); } key = malloc(key_len); if ((rc = PMI_KVS_Get_value_length_max(&val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get_value_length_max: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Get_value_length_max = %d\n", val_len); #endif val = malloc(val_len); /* Build and set some key=val pairs */ snprintf(key, key_len, "ATTR_1_%d", procid); snprintf(val, val_len, "A%d", procid+OFFSET_1); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val); #endif snprintf(key, key_len, "attr_2_%d", procid); snprintf(val, val_len, "B%d", procid+OFFSET_2); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val); #endif /* Sync KVS across all tasks */ if ((rc = PMI_KVS_Commit(kvs_name)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Commit: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Commit completed\n"); #endif if ((rc = PMI_Barrier()) != PMI_SUCCESS) { printf("FAILURE: PMI_Barrier: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_Barrier completed\n"); #endif /* Now lets get all keypairs and validate */ for (i=0; i<pmi_size; i++) { snprintf(key, key_len, "ATTR_1_%d", i); if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n", key, rc, pmi_rank); exit(1); } if ((val[0] != 'A') || ((atoi(&val[1])-OFFSET_1) != i)) { printf("FAILURE: Bad keypair %s=%s, task %d\n", key, val, pmi_rank); exit(1); } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 0)) /* limit output */ printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val); #endif snprintf(key, key_len, "attr_2_%d", i); if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n", key, rc, pmi_rank); exit(1); } if ((val[0] != 'B') || ((atoi(&val[1])-OFFSET_2) != i)) { printf("FAILURE: Bad keypair %s=%s, task %d\n", key,val, pmi_rank); exit(1); } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 1)) /* limit output */ printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val); #endif } /* use iterator */ if ((rc = PMI_KVS_Iter_first(kvs_name, key, key_len, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_iter_first: %d, task %d\n", rc, pmi_rank); exit(1); } for (i=0; ; i++) { if (key[0] == '\0') { if (i != (pmi_size * 2)) { printf("FAILURE: PMI_KVS_iter_next " "cycle count(%d, %d), task %d\n", i, pmi_size, pmi_rank); } break; } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 1)) { /* limit output */ printf("PMI_KVS_Iter_next(%s,%d): %s=%s\n", kvs_name, i, key, val); } #endif if ((rc = PMI_KVS_Iter_next(kvs_name, key, key_len, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_iter_next: %d, task %d\n", rc, pmi_rank); exit(1); } } /* Build some more key=val pairs */ snprintf(key, key_len, "ATTR_3_%d", procid); snprintf(val, val_len, "C%d", procid+OFFSET_1); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val); #endif snprintf(key, key_len, "attr_4_%d", procid); snprintf(val, val_len, "D%d", procid+OFFSET_2); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): %d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,%s,%s)\n", kvs_name, key, val); #endif /* Sync KVS across all tasks */ if ((rc = PMI_KVS_Commit(kvs_name)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Commit: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Commit completed\n"); #endif if ((rc = PMI_Barrier()) != PMI_SUCCESS) { printf("FAILURE: PMI_Barrier: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_Barrier completed\n"); #endif /* Now lets get some keypairs and validate */ for (i=0; i<pmi_size; i++) { snprintf(key, key_len, "ATTR_1_%d", i); if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n", key, rc, pmi_rank); exit(1); } if ((val[0] != 'A') || ((atoi(&val[1])-OFFSET_1) != i)) { printf("FAILURE: Bad keypair %s=%s, task %d\n", key, val, pmi_rank); exit(1); } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 1)) /* limit output */ printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val); #endif snprintf(key, key_len, "attr_4_%d", i); if ((rc = PMI_KVS_Get(kvs_name, key, val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s): %d, task %d\n", key, rc, pmi_rank); exit(1); } if ((val[0] != 'D') || ((atoi(&val[1])-OFFSET_2) != i)) { printf("FAILURE: Bad keypair %s=%s, task %d\n", key,val, pmi_rank); exit(1); } #if _DEBUG if ((pmi_size <= 8) && (pmi_rank == 1)) /* limit output */ printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, key, val); #endif } /* Replicate the very heavy load that MVAPICH2 puts on PMI * This load exceeds that of MPICH2 by a very wide margin */ #if _DEBUG printf("Starting %d iterations each with %d PMI_KVS_Put and \n" " one each PMI_KVS_Commit and KVS_Barrier\n", BARRIER_CNT, PUTS_PER_BARRIER); fflush(stdout); #endif for (i=0; i<BARRIER_CNT; i++) { for (j=0; j<PUTS_PER_BARRIER; j++) { snprintf(key, key_len, "ATTR_%d_%d_%d", i, j, procid); snprintf(val, val_len, "C%d", procid+OFFSET_1); if ((rc = PMI_KVS_Put(kvs_name, key, val)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put(%s,%s,%s): " "%d, task %d\n", kvs_name, key, val, rc, pmi_rank); exit(1); } } if ((rc= PMI_KVS_Commit(kvs_name)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Commit: %d, task %d\n", rc, pmi_rank); exit(1); } if ((rc = PMI_Barrier()) != PMI_SUCCESS) { printf("FAILURE: PMI_Barrier: %d, task %d\n", rc, pmi_rank); exit(1); } /* Don't bother with PMI_KVS_Get as those are all local * and do not put a real load on srun or the network */ } #if _DEBUG printf("Interative PMI calls successful\n"); #endif /* create new keyspace and test it */ if ((rc = PMI_KVS_Create(kvs_name, kvs_name_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Create: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Create %s\n", kvs_name); #endif if ((rc = PMI_KVS_Put(kvs_name, "KVS_KEY", "KVS_VAL")) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Put: %d, task %d\n", rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Put(%s,KVS_KEY,KVS_VAL)\n", kvs_name); #endif if ((rc = PMI_KVS_Get(kvs_name, "KVS_KEY", val, val_len)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Get(%s, KVS_KEY): %d, task %d\n", kvs_name, rc, pmi_rank); exit(1); } #if _DEBUG printf("PMI_KVS_Get(%s,%s) %s\n", kvs_name, "KVS_KEY", val); #endif if ((rc = PMI_KVS_Destroy(kvs_name)) != PMI_SUCCESS) { printf("FAILURE: PMI_KVS_Destroy(%s): %d, task %d\n", kvs_name, rc, pmi_rank); exit(1); } if ((rc = PMI_KVS_Get(kvs_name, "KVS_KEY", val, val_len)) != PMI_ERR_INVALID_KVS) { printf("FAILURE: PMI_KVS_Get(%s, KVS_KEY): %d, task %d\n", kvs_name, rc, pmi_rank); exit(1); } if ((rc = PMI_Finalize()) != PMI_SUCCESS) { printf("FAILURE: PMI_Finalize: %d, task %d\n", rc, pmi_rank); exit(1); } if (_DEBUG || (pmi_rank < 4)) { gettimeofday(&tv2, NULL); delta_t = (tv2.tv_sec - tv1.tv_sec) * 1000000; delta_t += tv2.tv_usec - tv1.tv_usec; snprintf(tv_str, sizeof(tv_str), "usec=%ld", delta_t); printf("PMI test ran successfully, for task %d, %s\n", pmi_rank, tv_str); } if (pmi_rank == 0) { printf("NOTE: All failures reported, "); printf("but only first four successes reported\n"); } exit(0); }
void chpl_comm_ofi_oob_barrier(void) { DBG_PRINTF(DBG_OOB, "OOB barrier"); PMI_CHK(PMI_Barrier()); }
int MPIDU_bc_table_create(int rank, int size, int *nodemap, void *bc, int bc_len, int same_len, int roots_only, void **bc_table, size_t ** bc_indices) { int rc, mpi_errno = MPI_SUCCESS; int start, end, i; int key_max, val_max, name_max, out_len, rem; char *kvsname = NULL, *key = NULL, *val = NULL, *val_p; int local_rank = -1, local_leader = -1; size_t my_bc_len = bc_len; MPIR_NODEMAP_get_local_info(rank, size, nodemap, &local_size, &local_rank, &local_leader); rc = PMI_KVS_Get_name_length_max(&name_max); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_name_length_max"); rc = PMI_KVS_Get_key_length_max(&key_max); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_key_length_max"); rc = PMI_KVS_Get_value_length_max(&val_max); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_value_length_max"); /* if business cards can be different length, use the max value length */ if (!same_len) bc_len = val_max; mpi_errno = MPIDU_shm_seg_alloc(bc_len * size, (void **) &segment, MPL_MEM_ADDRESS); if (mpi_errno) MPIR_ERR_POP(mpi_errno); mpi_errno = MPIDU_shm_seg_commit(&memory, &barrier, local_size, local_rank, local_leader, rank, MPL_MEM_ADDRESS); if (mpi_errno) MPIR_ERR_POP(mpi_errno); if (size == 1) { memcpy(segment, bc, my_bc_len); goto single; } kvsname = MPL_malloc(name_max, MPL_MEM_ADDRESS); MPIR_Assert(kvsname); rc = PMI_KVS_Get_my_name(kvsname, name_max); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get_my_name"); val = MPL_malloc(val_max, MPL_MEM_ADDRESS); memset(val, 0, val_max); val_p = val; rem = val_max; rc = MPL_str_add_binary_arg(&val_p, &rem, "mpi", (char *) bc, my_bc_len); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**buscard"); MPIR_Assert(rem >= 0); key = MPL_malloc(key_max, MPL_MEM_ADDRESS); MPIR_Assert(key); if (!roots_only || rank == local_leader) { sprintf(key, "bc-%d", rank); rc = PMI_KVS_Put(kvsname, key, val); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_put"); rc = PMI_KVS_Commit(kvsname); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_commit"); } rc = PMI_Barrier(); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_barrier"); if (!roots_only) { start = local_rank * (size / local_size); end = start + (size / local_size); if (local_rank == local_size - 1) end += size % local_size; for (i = start; i < end; i++) { sprintf(key, "bc-%d", i); rc = PMI_KVS_Get(kvsname, key, val, val_max); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get"); rc = MPL_str_get_binary_arg(val, "mpi", &segment[i * bc_len], bc_len, &out_len); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**argstr_missinghost"); } } else { int num_nodes, *node_roots; MPIR_NODEMAP_get_node_roots(nodemap, size, &node_roots, &num_nodes); start = local_rank * (num_nodes / local_size); end = start + (num_nodes / local_size); if (local_rank == local_size - 1) end += num_nodes % local_size; for (i = start; i < end; i++) { sprintf(key, "bc-%d", node_roots[i]); rc = PMI_KVS_Get(kvsname, key, val, val_max); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**pmi_kvs_get"); rc = MPL_str_get_binary_arg(val, "mpi", &segment[i * bc_len], bc_len, &out_len); MPIR_ERR_CHKANDJUMP(rc, mpi_errno, MPI_ERR_OTHER, "**argstr_missinghost"); } MPL_free(node_roots); } mpi_errno = MPIDU_shm_barrier(barrier, local_size); if (mpi_errno) MPIR_ERR_POP(mpi_errno); single: if (!same_len) { indices = MPL_malloc(size * sizeof(size_t), MPL_MEM_ADDRESS); MPIR_Assert(indices); for (i = 0; i < size; i++) indices[i] = bc_len * i; *bc_indices = indices; } fn_exit: MPL_free(kvsname); MPL_free(key); MPL_free(val); *bc_table = segment; return mpi_errno; fn_fail: goto fn_exit; }
int main(int argc,char **argv) { #ifdef __CRAYXE int max; int i; int pe = -1; int npes = -1; char * source = NULL; char * target = NULL; dmapp_return_t status; //dmapp_rma_attrs_t dmapp_config_in, dmapp_config_out; dmapp_rma_attrs_ext_t dmapp_config_in, dmapp_config_out; dmapp_jobinfo_t job; dmapp_seg_desc_t * seg = NULL; double t0, t1, dt; double bw; MPI_Init(&argc, &argv); /* Initialize DMAPP resources before executing any other DMAPP calls. */ //status = dmapp_init(NULL, &actual_args); dmapp_config_in.max_outstanding_nb = DMAPP_DEF_OUTSTANDING_NB; /* 512 */ dmapp_config_in.offload_threshold = DMAPP_OFFLOAD_THRESHOLD; /* 4096 */ //dmapp_config_in.put_relaxed_ordering = DMAPP_ROUTING_DETERMINISTIC; //dmapp_config_in.get_relaxed_ordering = DMAPP_ROUTING_DETERMINISTIC; dmapp_config_in.put_relaxed_ordering = DMAPP_ROUTING_ADAPTIVE; dmapp_config_in.get_relaxed_ordering = DMAPP_ROUTING_ADAPTIVE; dmapp_config_in.max_concurrency = 1; /* not thread-safe */ //dmapp_config_in.PI_ordering = DMAPP_PI_ORDERING_STRICT; dmapp_config_in.PI_ordering = DMAPP_PI_ORDERING_RELAXED; status = dmapp_init_ext( &dmapp_config_in, &dmapp_config_out ); assert(status==DMAPP_RC_SUCCESS); max = (argc>1) ? atoi(argv[1]) : 1000000; max *= 16; /* max must be a multiple of 16 for the test to work */ /* Allocate remotely accessible memory for source and target buffers. Only memory in the data segment or the sheap is remotely accessible. Here we allocate from the sheap. */ source = (char *)dmapp_sheap_malloc( max*sizeof(char) ); target = (char *)dmapp_sheap_malloc( max*sizeof(char) ); assert( (source!=NULL) && (target!=NULL)); memset (source,'S',max); memset (target,'T',max); /* Retrieve information about job details, such as PE id and number of PEs. */ status = dmapp_get_jobinfo(&job); assert(status==DMAPP_RC_SUCCESS); pe = job.pe; npes = job.npes; /* Retrieve information about RMA attributes, such as offload_threshold and routing modes. */ //status = dmapp_get_rma_attrs(&dmapp_config_out); status = dmapp_get_rma_attrs_ext(&dmapp_config_out); assert(status==DMAPP_RC_SUCCESS); /* Specify in which segment the remote memory region (the source) lies. In this case, it is the sheap (see above). */ seg = &(job.sheap_seg); if (pe == 0) fprintf(stderr," Hello from PE %d of %d, using seg start %p, seg size 0x%lx, offload_threshold %d \n", pe, npes, seg->addr, (unsigned long)seg->len, dmapp_config_out.offload_threshold); fflush(stderr); PMI_Barrier(); if (pe == 0) { fprintf(stderr,"%d: max = %d bytes, dmapp_put using DMAPP_DQW \n", pe, max); for (i=1; i<(max/16); i*=2) { t0 = MPI_Wtime(); status = dmapp_put(target, seg, 1, source, i, DMAPP_DQW); t1 = MPI_Wtime(); assert(status==DMAPP_RC_SUCCESS); dt = t1-t0; bw = 16 * 1e-6 * (double)i / dt; fprintf(stderr,"%d: %12d bytes %12lf seconds = %lf MB/s \n", pe, 16*i, dt, bw); } } fflush(stderr); PMI_Barrier(); if (pe == 0) { fprintf(stderr,"%d: max = %d bytes, dmapp_put using DMAPP_QW \n", pe, max); for (i=1; i<(max/8); i*=2) { t0 = MPI_Wtime(); status = dmapp_put(target, seg, 1, source, i, DMAPP_QW); t1 = MPI_Wtime(); assert(status==DMAPP_RC_SUCCESS); dt = t1-t0; bw = 8 * 1e-6 * (double)i / dt; fprintf(stderr,"%d: %12d bytes %12lf seconds = %lf MB/s \n", pe, 8*i, dt, bw); } } fflush(stderr); PMI_Barrier(); if (pe == 0) { fprintf(stderr,"%d: max = %d bytes, dmapp_put using DMAPP_DW \n", pe, max); for (i=1; i<(max/4); i*=2) { t0 = MPI_Wtime(); status = dmapp_put(target, seg, 1, source, i, DMAPP_DW); t1 = MPI_Wtime(); assert(status==DMAPP_RC_SUCCESS); dt = t1-t0; bw = 4 * 1e-6 * (double)i / dt; fprintf(stderr,"%d: %12d bytes %12lf seconds = %lf MB/s \n", pe, 4*i, dt, bw); } } fflush(stderr); PMI_Barrier(); if (pe == 0) { fprintf(stderr,"%d: max = %d bytes, dmapp_put using DMAPP_BYTE \n", pe, max); for (i=1; i<max; i*=2) { t0 = MPI_Wtime(); status = dmapp_put(target, seg, 1, source, i, DMAPP_BYTE); t1 = MPI_Wtime(); assert(status==DMAPP_RC_SUCCESS); dt = t1-t0; bw = 1 * 1e-6 * (double)i / dt; fprintf(stderr,"%d: %12d bytes %12lf seconds = %lf MB/s \n", pe, 1*i, dt, bw); } } fflush(stderr); PMI_Barrier(); /* Free buffers allocated from sheap. */ dmapp_sheap_free(target); dmapp_sheap_free(source); /* Release DMAPP resources. This is a mandatory call. */ status = dmapp_finalize(); assert(status==DMAPP_RC_SUCCESS); MPI_Finalize(); #endif return(0); }
int main(int argc, char **argv) { #ifdef __CRAYXE int i,j; int me = -1; int size = -1; //int fail_count = 0; dmapp_return_t status; dmapp_rma_attrs_t actual_args = { 0 }, rma_args = { 0 }; dmapp_jobinfo_t job; dmapp_seg_desc_t *seg = NULL; /* Set the RMA parameters. */ rma_args.put_relaxed_ordering = DMAPP_ROUTING_ADAPTIVE; rma_args.max_outstanding_nb = DMAPP_DEF_OUTSTANDING_NB; rma_args.offload_threshold = DMAPP_OFFLOAD_THRESHOLD; rma_args.max_concurrency = 1; /* Initialize DMAPP. */ status = dmapp_init(&rma_args, &actual_args); assert(status==DMAPP_RC_SUCCESS); /* Get job related information. */ status = dmapp_get_jobinfo(&job); assert(status==DMAPP_RC_SUCCESS); me = job.pe; size = job.npes; seg = &(job.sheap_seg); /* Allocate and initialize the source and target arrays. */ long * source = (long *) dmapp_sheap_malloc( size * sizeof(long) ); assert(source!=NULL); long * target = (long *) dmapp_sheap_malloc( size * sizeof(long) ); assert(target!=NULL); for (i = 0; i < size; i++) source[i] = 0; for (i = 0; i < size; i++) target[i] = 0; /* Wait for all PEs to complete array initialization. */ PMI_Barrier(); /* compare-and-swap */ // // dmapp_return_t dmapp_acswap_qw( // IN void *target_addr /* local memory */, // IN void *source_addr /* remote memory */, // IN dmapp_seg_desc_t *source_seg /* remote segment */, // IN dmapp_pe_t source_pe /* remote rank */, // IN int64_t comperand, // IN int64_t swaperand); // for (i = 0; i < size; i++) if (i != me) { status = dmapp_acswap_qw(&source[i], &target[i], seg, (dmapp_pe_t)i, (int64_t)0, (int64_t)me); if (status==DMAPP_RC_SUCCESS) printf("%d: DMAPP_RC_SUCCESS\n",me); else if (status==DMAPP_RC_INVALID_PARAM) printf("%d: DMAPP_RC_INVALID_PARAM\n",me); else if (status==DMAPP_RC_ALIGNMENT_ERROR) printf("%d: DMAPP_RC_ALIGNMENT_ERROR\n",me); else if (status==DMAPP_RC_NO_SPACE) printf("%d: DMAPP_RC_NO_SPACE\n",me); else if (status==DMAPP_RC_TRANSACTION_ERROR) printf("%d: DMAPP_RC_TRANSACTION_ERROR\n",me); fflush(stdout); assert(status==DMAPP_RC_SUCCESS); } /* Wait for all PEs. */ PMI_Barrier(); /* see who won */ for (i = 0; i < size; i++) { if (i==me) { for (j = 0; j < size; j++) printf("me = %d target[%d] = %ld\n", me, i, target[i] ); printf("==========================================\n"); fflush(stdout); } PMI_Barrier(); } /* Finalize. */ status = dmapp_finalize(); assert(status==DMAPP_RC_SUCCESS); #endif return(0); }
static int s1_fence(opal_process_name_t *procs, size_t nprocs) { int rc; int32_t i; opal_value_t *kp, kvn; opal_hwloc_locality_t locality; opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:s1 called fence", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); /* check if there is partially filled meta key and put them */ if (0 != pmix_packed_data_offset && NULL != pmix_packed_data) { opal_pmix_base_commit_packed(pmix_packed_data, pmix_packed_data_offset, pmix_vallen_max, &pmix_pack_key, kvs_put); pmix_packed_data_offset = 0; free(pmix_packed_data); pmix_packed_data = NULL; } /* if we haven't already done it, ensure we have committed our values */ if (!s1_committed) { opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:s1 committing values", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); if (PMI_SUCCESS != (rc = PMI_KVS_Commit(pmix_kvs_name))) { OPAL_PMI_ERROR(rc, "PMI_KVS_Commit"); return OPAL_ERROR; } } opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:s1 performing barrier", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); /* use the PMI barrier function */ if (PMI_SUCCESS != (rc = PMI_Barrier())) { OPAL_PMI_ERROR(rc, "PMI_Barrier"); return OPAL_ERROR; } opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s pmix:s1 barrier complete", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); /* get the modex data from each local process and set the * localities to avoid having the MPI layer fetch data * for every process in the job */ if (!got_modex_data) { got_modex_data = true; /* we only need to set locality for each local rank as "not found" * equates to "non-local" */ for (i=0; i < s1_nlranks; i++) { s1_pname.vid = i; rc = opal_pmix_base_cache_keys_locally((opal_identifier_t*)&s1_pname, OPAL_DSTORE_CPUSET, &kp, pmix_kvs_name, pmix_vallen_max, kvs_get); if (OPAL_SUCCESS != rc) { OPAL_ERROR_LOG(rc); return rc; } #if OPAL_HAVE_HWLOC if (NULL == kp || NULL == kp->data.string) { /* if we share a node, but we don't know anything more, then * mark us as on the node as this is all we know */ locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; } else { /* determine relative location on our node */ locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, opal_process_info.cpuset, kp->data.string); } if (NULL != kp) { OBJ_RELEASE(kp); } #else /* all we know is we share a node */ locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE; #endif OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output, "%s pmix:s1 proc %s locality %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(*(opal_identifier_t*)&s1_pname), opal_hwloc_base_print_locality(locality))); OBJ_CONSTRUCT(&kvn, opal_value_t); kvn.key = strdup(OPAL_DSTORE_LOCALITY); kvn.type = OPAL_UINT16; kvn.data.uint16 = locality; (void)opal_dstore.store(opal_dstore_internal, (opal_identifier_t*)&s1_pname, &kvn); OBJ_DESTRUCT(&kvn); } } return OPAL_SUCCESS; }