static inline int MPIDI_Get_use_pami_get(pami_context_t context, MPIDI_Win_request * req) { pami_result_t rc; pami_get_simple_t params; params=zero_get_parms; params.rma.dest=req->dest; params.rma.hints.use_rdma = PAMI_HINT_DEFAULT; #ifndef OUT_OF_ORDER_HANDLING params.rma.hints.no_long_header= 1, #endif params.rma.bytes = 0; params.rma.cookie = req; params.rma.done_fn = MPIDI_Win_DoneCB; params.addr.local=req->buffer; params.addr.remote= req->win->mpid.info[req->target.rank].base_addr; struct MPIDI_Win_sync* sync = &req->win->mpid.sync; TRACE_ERR("Start index=%u/%d l-addr=%p r-base=%p r-offset=%zu (sync->started=%u sync->complete=%u)\n", req->state.index, req->target.dt.num_contig, req->buffer, req->win->mpid.info[req->target.rank].base_addr, req->offset, sync->started, sync->complete); while (req->state.index < req->target.dt.num_contig) { if (sync->started > sync->complete + MPIDI_Process.rma_pending) { TRACE_ERR("Bailing out; index=%u/%d sync->started=%u sync->complete=%u\n", req->state.index, req->target.dt.num_contig, sync->started, sync->complete); return PAMI_EAGAIN; } ++sync->started; params.rma.bytes = req->target.dt.map[req->state.index].DLOOP_VECTOR_LEN; params.addr.local = req->buffer+req->state.local_offset; params.addr.remote = req->win->mpid.info[req->target.rank].base_addr+ req->offset + (size_t)req->target.dt.map[req->state.index].DLOOP_VECTOR_BUF; #ifdef TRACE_ON unsigned* buf = (unsigned*)(req->buffer + params.rdma.local.offset); #endif TRACE_ERR(" Sub index=%u bytes=%zu l-offset=%zu r-offset=%zu buf=%p *(int*)buf=0x%08x\n", req->state.index, params.rma.bytes, params.rdma.local.offset, params.rdma.remote.offset, buf, *buf); /** sync->total will be updated with every RMA and the complete will not change till that RMA has completed. In the meanwhile the rest of the RMAs will have memory leaks */ if (req->target.dt.num_contig - req->state.index == 1) { rc = PAMI_Get(context, ¶ms); MPID_assert(rc == PAMI_SUCCESS); return PAMI_SUCCESS; } else { rc = PAMI_Get(context, ¶ms); MPID_assert(rc == PAMI_SUCCESS); req->state.local_offset += params.rma.bytes; ++req->state.index; } } return PAMI_SUCCESS; }
static void dispatch_rts ( pami_context_t context, /**< IN: PAMI context */ void * cookie, /**< IN: dispatch cookie */ const void * header_addr, /**< IN: header address */ size_t header_size, /**< IN: header size */ const void * pipe_addr, /**< IN: address of PAMI pipe buffer */ size_t pipe_size, /**< IN: size of PAMI pipe buffer */ pami_endpoint_t origin, pami_recv_t * recv) /**< OUT: receive message structure */ { volatile size_t * active = (volatile size_t *) cookie; fprintf (stderr, ">> 'rts' dispatch function. cookie = %p (active: %zu), header_size = %zu, pipe_size = %zu, recv = %p\n", cookie, *active, header_size, pipe_size, recv); rts_info_t * rts = (rts_info_t *) header_addr; fprintf (stderr, " 'rts' dispatch function. rts->origin = 0x%08x, rts->bytes = %zu, rts->source = %p\n", rts->origin, rts->bytes, rts->source); size_t pad = BUFFERSIZE; get_info_t * get = (get_info_t *) malloc (sizeof(get_info_t)); get->value = active; get->origin = rts->origin; get->bytes = rts->bytes; get->pad = pad; initialize_data (get->buffer, 0, 6); print_data (get->buffer, 12*4); pami_get_simple_t parameters; parameters.rma.dest = rts->origin; parameters.rma.hints = null_send_hint; parameters.rma.bytes = rts->bytes; parameters.rma.cookie = get; parameters.rma.done_fn = get_done; parameters.addr.local = (void *) (get->buffer+4); parameters.addr.remote = rts->source; PAMI_Get (context, ¶meters); fprintf (stderr, "<< 'rts' dispatch function.\n"); return; }
int main(int argc, char* argv[]) { pami_result_t result = PAMI_ERROR; /* initialize the second client */ char * clientname = ""; pami_client_t client; result = PAMI_Client_create(clientname, &client, NULL, 0); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_create"); /* query properties of the client */ pami_configuration_t config[3]; size_t num_contexts; config[0].name = PAMI_CLIENT_NUM_TASKS; config[1].name = PAMI_CLIENT_TASK_ID; config[2].name = PAMI_CLIENT_NUM_CONTEXTS; result = PAMI_Client_query(client, config, 3); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_query"); world_size = config[0].value.intval; world_rank = config[1].value.intval; num_contexts = config[2].value.intval; TEST_ASSERT(num_contexts>1,"num_contexts>1"); if (world_rank==0) { printf("hello world from rank %ld of %ld \n", world_rank, world_size ); fflush(stdout); } /* initialize the contexts */ contexts = (pami_context_t *) safemalloc( num_contexts * sizeof(pami_context_t) ); result = PAMI_Context_createv( client, NULL, 0, contexts, num_contexts ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_createv"); /* setup the world geometry */ pami_geometry_t world_geometry; result = PAMI_Geometry_world(client, &world_geometry ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Geometry_world"); int status = pthread_create(&Progress_thread, NULL, &Progress_function, NULL); TEST_ASSERT(status==0, "pthread_create"); /************************************************************************/ int n = (argc>1 ? atoi(argv[1]) : 1000000); size_t bytes = n * sizeof(int); int * shared = (int *) safemalloc(bytes); for (int i=0; i<n; i++) shared[i] = world_rank; int * local = (int *) safemalloc(bytes); for (int i=0; i<n; i++) local[i] = -1; result = barrier(world_geometry, contexts[0]); TEST_ASSERT(result == PAMI_SUCCESS,"barrier"); int ** shptrs = (int **) safemalloc( world_size * sizeof(int *) ); result = allgather(world_geometry, contexts[0], sizeof(int*), &shared, shptrs); TEST_ASSERT(result == PAMI_SUCCESS,"allgather"); int target = (world_rank>0 ? world_rank-1 : world_size-1); pami_endpoint_t target_ep; result = PAMI_Endpoint_create(client, (pami_task_t) target, 1, &target_ep); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Endpoint_create"); result = barrier(world_geometry, contexts[0]); TEST_ASSERT(result == PAMI_SUCCESS,"barrier"); int active = 1; pami_get_simple_t parameters; parameters.rma.dest = target_ep; //parameters.rma.hints = ; parameters.rma.bytes = bytes; parameters.rma.cookie = &active; parameters.rma.done_fn = cb_done; parameters.addr.local = local; parameters.addr.remote = shptrs[target]; uint64_t t0 = GetTimeBase(); result = PAMI_Get(contexts[0], ¶meters); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Rget"); while (active) { //result = PAMI_Context_advance( contexts[0], 100); //TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_advance"); result = PAMI_Context_trylock_advancev(&(contexts[0]), 1, 1000); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_trylock_advancev"); } uint64_t t1 = GetTimeBase(); uint64_t dt = t1-t0; /* barrier on non-progressing context to make sure CHT does its job */ barrier(world_geometry, contexts[0]); printf("%ld: PAMI_Get of %ld bytes achieves %lf MB/s \n", (long)world_rank, bytes, 1.6e9*1e-6*(double)bytes/(double)dt ); fflush(stdout); int errors = 0; //target = (world_rank<(world_size-1) ? world_rank+1 : 0); target = (world_rank>0 ? world_rank-1 : world_size-1); for (int i=0; i<n; i++) if (local[i] != target) errors++; if (errors>0) for (int i=0; i<n; i++) if (local[i] != target) printf("%ld: local[%d] = %d (%d) \n", (long)world_rank, i, local[i], target); else printf("%ld: no errors :-) \n", (long)world_rank); fflush(stdout); result = barrier(world_geometry, contexts[0]); TEST_ASSERT(result == PAMI_SUCCESS,"barrier"); free(shptrs); free(local); free(shared); /************************************************************************/ void * rv; status = pthread_cancel(Progress_thread); TEST_ASSERT(status==0, "pthread_cancel"); status = pthread_join(Progress_thread, &rv); TEST_ASSERT(status==0, "pthread_join"); result = barrier(world_geometry, contexts[0]); TEST_ASSERT(result == PAMI_SUCCESS,"barrier"); /* finalize the contexts */ result = PAMI_Context_destroyv( contexts, num_contexts ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_destroyv"); free(contexts); /* finalize the client */ result = PAMI_Client_destroy( &client ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_destroy"); if (world_rank==0) printf("%ld: end of test \n", world_rank ); fflush(stdout); return 0; }