int main(int argc, char* argv[]) { int status = MPI_SUCCESS; pami_result_t result = PAMI_ERROR; int provided = MPI_THREAD_SINGLE; MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); if (provided<MPI_THREAD_MULTIPLE) exit(provided); /* initialize the second client */ char * clientname = ""; pami_client_t client; result = PAMI_Client_create(clientname, &client, NULL, 0); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_create"); /* query properties of the client */ pami_configuration_t config[3]; size_t num_contexts; config[0].name = PAMI_CLIENT_NUM_TASKS; config[1].name = PAMI_CLIENT_TASK_ID; config[2].name = PAMI_CLIENT_NUM_CONTEXTS; result = PAMI_Client_query(client, config, 3); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_query"); world_size = config[0].value.intval; world_rank = config[1].value.intval; num_contexts = config[2].value.intval; if (num_contexts>2) num_contexts = 2; if (world_rank==0) printf("hello world from rank %ld of %ld \n", world_rank, world_size ); fflush(stdout); /* initialize the contexts */ contexts = (pami_context_t *) safemalloc( num_contexts * sizeof(pami_context_t) ); result = PAMI_Context_createv( client, NULL, 0, contexts, num_contexts ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_createv"); /* setup the world geometry */ pami_geometry_t world_geometry; result = PAMI_Geometry_world(client, &world_geometry ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Geometry_world"); status = pthread_create(&Progress_thread, NULL, &Progress_function, NULL); TEST_ASSERT(status==0, "pthread_create"); /************************************************************************/ int n = (argc>1 ? atoi(argv[1]) : 1000000); size_t bytes = n * sizeof(int); int * shared = (int *) safemalloc(bytes); for (int i=0; i<n; i++) shared[i] = -1; int * local = (int *) safemalloc(bytes); for (int i=0; i<n; i++) local[i] = world_rank; status = MPI_Barrier(MPI_COMM_WORLD); TEST_ASSERT(result == MPI_SUCCESS,"MPI_Barrier"); int ** shptrs = (int **) safemalloc( world_size * sizeof(int *) ); status = MPI_Allgather(&shared, sizeof(int *), MPI_BYTE, shptrs, sizeof(int *), MPI_BYTE, MPI_COMM_WORLD); TEST_ASSERT(result == MPI_SUCCESS,"MPI_Allgather"); int target = (world_rank>0 ? world_rank-1 : world_size-1); pami_endpoint_t target_ep; result = PAMI_Endpoint_create(client, (pami_task_t) target, 1, &target_ep); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Endpoint_create"); int active = 2; pami_put_simple_t parameters; parameters.rma.dest = target_ep; //parameters.rma.hints = ; parameters.rma.bytes = bytes; parameters.rma.cookie = &active; parameters.rma.done_fn = cb_done; parameters.addr.local = local; parameters.addr.remote = shptrs[target]; parameters.put.rdone_fn = cb_done; status = MPI_Barrier(MPI_COMM_WORLD); TEST_ASSERT(result == MPI_SUCCESS,"MPI_Barrier"); uint64_t t0 = GetTimeBase(); result = PAMI_Put(contexts[0], ¶meters); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Put"); while (active) { result = PAMI_Context_trylock_advancev(&(contexts[0]), 1, 1000); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_trylock_advancev"); } uint64_t t1 = GetTimeBase(); uint64_t dt = t1-t0; barrier(world_geometry, contexts[0]); printf("%ld: PAMI_Put of %ld bytes achieves %lf MB/s \n", (long)world_rank, bytes, 1.6e9*1e-6*(double)bytes/(double)dt ); fflush(stdout); int errors = 0; target = (world_rank<(world_size-1) ? world_rank+1 : 0); for (int i=0; i<n; i++) if (shared[i] != target) errors++; if (errors>0) for (int i=0; i<n; i++) printf("%ld: local[%d] = %d (%d) \n", (long)world_rank, i, local[i], target); else printf("%ld: no errors :-) \n", (long)world_rank); fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); free(shptrs); free(local); free(shared); /************************************************************************/ void * rv; status = pthread_cancel(Progress_thread); TEST_ASSERT(status==0, "pthread_cancel"); status = pthread_join(Progress_thread, &rv); TEST_ASSERT(status==0, "pthread_join"); /* finalize the contexts */ result = PAMI_Context_destroyv( contexts, num_contexts ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_destroyv"); free(contexts); /* finalize the client */ result = PAMI_Client_destroy( &client ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_destroy"); status = MPI_Barrier(MPI_COMM_WORLD); TEST_ASSERT(result == MPI_SUCCESS,"MPI_Barrier"); MPI_Finalize(); if (world_rank==0) printf("%ld: end of test \n", world_rank ); fflush(stdout); return 0; }
static inline int MPIDI_Put_use_pami_put(pami_context_t context, MPIDI_Win_request * req,int *freed) { pami_result_t rc; void *map; pami_put_simple_t params; params = zero_put_parms; params.rma.dest=req->dest; params.rma.hints.use_rdma = PAMI_HINT_DEFAULT; #ifndef OUT_OF_ORDER_HANDLING params.rma.hints.no_long_header= 1, #endif params.rma.bytes = 0; params.rma.cookie = req; params.rma.done_fn = NULL; params.addr.local=req->buffer; params.addr.remote=req->win->mpid.info[req->target.rank].base_addr; params.put.rdone_fn= MPIDI_Win_DoneCB; struct MPIDI_Win_sync* sync = &req->win->mpid.sync; TRACE_ERR("Start index=%u/%d l-addr=%p r-base=%p r-offset=%zu (sync->started=%u sync->complete=%u)\n", req->state.index, req->target.dt.num_contig, req->buffer, req->win->mpid.info[req->target.rank].base_addr, req->offset, sync->started, sync->complete); while (req->state.index < req->target.dt.num_contig) { if (sync->started > sync->complete + MPIDI_Process.rma_pending) { TRACE_ERR("Bailing out; index=%u/%d sync->started=%u sync->complete=%u\n", req->state.index, req->target.dt.num_contig, sync->started, sync->complete); return PAMI_EAGAIN; } ++sync->started; params.rma.bytes = req->target.dt.map[req->state.index].DLOOP_VECTOR_LEN; params.addr.local = req->buffer+req->state.local_offset; params.addr.remote = req->win->mpid.info[req->target.rank].base_addr+ req->offset + (size_t)req->target.dt.map[req->state.index].DLOOP_VECTOR_BUF; #ifdef TRACE_ON unsigned* buf = (unsigned*)(req->buffer + req->state.local_offset); #endif TRACE_ERR(" Sub index=%u bytes=%zu l-offset=%zu r-offset=%zu buf=%p *(int*)buf=0x%08x\n", req->state.index, params.rma.bytes, params.addr.local, params.addr.remote, buf, *buf); /** sync->total will be updated with every RMA and the complete will not change till that RMA has completed. In the meanwhile the rest of the RMAs will have memory leaks */ if (req->target.dt.num_contig - req->state.index == 1) { map=NULL; if (req->target.dt.map != &req->target.dt.__map) { map=(void *) req->target.dt.map; } rc = PAMI_Put(context, ¶ms); MPID_assert(rc == PAMI_SUCCESS); if (map) { MPIU_Free(map); } *freed=1; return PAMI_SUCCESS; } else { rc = PAMI_Put(context, ¶ms); MPID_assert(rc == PAMI_SUCCESS); req->state.local_offset += params.rma.bytes; ++req->state.index; } } return PAMI_SUCCESS; }