Example #1
0
int main(int argc, char* argv[])
{
  int status = MPI_SUCCESS; 
  pami_result_t result = PAMI_ERROR;

  int provided = MPI_THREAD_SINGLE;
  MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
  if (provided<MPI_THREAD_MULTIPLE) 
    exit(provided);

  /* initialize the second client */
  char * clientname = "";
  pami_client_t client;
  result = PAMI_Client_create(clientname, &client, NULL, 0);
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_create");

  /* query properties of the client */
  pami_configuration_t config[3];
  size_t num_contexts;

  config[0].name = PAMI_CLIENT_NUM_TASKS;
  config[1].name = PAMI_CLIENT_TASK_ID;
  config[2].name = PAMI_CLIENT_NUM_CONTEXTS;
  result = PAMI_Client_query(client, config, 3);
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_query");
  world_size   = config[0].value.intval;
  world_rank   = config[1].value.intval;
  num_contexts = config[2].value.intval;
  if (num_contexts>2) num_contexts = 2;

  if (world_rank==0)
    printf("hello world from rank %ld of %ld \n", world_rank, world_size );
  fflush(stdout);

  /* initialize the contexts */
  contexts = (pami_context_t *) safemalloc( num_contexts * sizeof(pami_context_t) );

  result = PAMI_Context_createv( client, NULL, 0, contexts, num_contexts );
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_createv");

  /* setup the world geometry */
  pami_geometry_t world_geometry;
  result = PAMI_Geometry_world(client, &world_geometry );
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Geometry_world");

  status = pthread_create(&Progress_thread, NULL, &Progress_function, NULL);
  TEST_ASSERT(status==0, "pthread_create");

  /************************************************************************/

  int n = (argc>1 ? atoi(argv[1]) : 1000000);

  size_t bytes = n * sizeof(int);
  int *  shared = (int *) safemalloc(bytes);
  for (int i=0; i<n; i++)
    shared[i] = -1;

  int *  local  = (int *) safemalloc(bytes);
  for (int i=0; i<n; i++)
    local[i] = world_rank;

  status = MPI_Barrier(MPI_COMM_WORLD);
  TEST_ASSERT(result == MPI_SUCCESS,"MPI_Barrier");

  int ** shptrs = (int **) safemalloc( world_size * sizeof(int *) );

  status = MPI_Allgather(&shared, sizeof(int *), MPI_BYTE, 
                         shptrs,  sizeof(int *), MPI_BYTE, MPI_COMM_WORLD);
  TEST_ASSERT(result == MPI_SUCCESS,"MPI_Allgather");

  int target = (world_rank>0 ? world_rank-1 : world_size-1);
  pami_endpoint_t target_ep;
  result = PAMI_Endpoint_create(client, (pami_task_t) target, 1, &target_ep);
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Endpoint_create");

  int active = 2;
  pami_put_simple_t parameters;
  parameters.rma.dest     = target_ep;
  //parameters.rma.hints    = ;
  parameters.rma.bytes    = bytes;
  parameters.rma.cookie   = &active;
  parameters.rma.done_fn  = cb_done;
  parameters.addr.local   = local;
  parameters.addr.remote  = shptrs[target];
  parameters.put.rdone_fn = cb_done;

  status = MPI_Barrier(MPI_COMM_WORLD);
  TEST_ASSERT(result == MPI_SUCCESS,"MPI_Barrier");

  uint64_t t0 = GetTimeBase();

  result = PAMI_Put(contexts[0], &parameters);
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Put");

  while (active)
  {
    result = PAMI_Context_trylock_advancev(&(contexts[0]), 1, 1000);
    TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_trylock_advancev");
  }

  uint64_t t1 = GetTimeBase();
  uint64_t dt = t1-t0;

  barrier(world_geometry, contexts[0]);

  printf("%ld: PAMI_Put of %ld bytes achieves %lf MB/s \n", (long)world_rank, bytes, 1.6e9*1e-6*(double)bytes/(double)dt );
  fflush(stdout);

  int errors = 0;
  
  target = (world_rank<(world_size-1) ? world_rank+1 : 0);
  for (int i=0; i<n; i++)
    if (shared[i] != target)
       errors++;

  if (errors>0)
    for (int i=0; i<n; i++)
      printf("%ld: local[%d] = %d (%d) \n", (long)world_rank, i, local[i], target);
  else
    printf("%ld: no errors :-) \n", (long)world_rank); 

  fflush(stdout);
  MPI_Barrier(MPI_COMM_WORLD);

  free(shptrs);
  free(local);
  free(shared);

  /************************************************************************/

  void * rv;

  status = pthread_cancel(Progress_thread);
  TEST_ASSERT(status==0, "pthread_cancel");

  status = pthread_join(Progress_thread, &rv);
  TEST_ASSERT(status==0, "pthread_join");

  /* finalize the contexts */
  result = PAMI_Context_destroyv( contexts, num_contexts );
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_destroyv");

  free(contexts);

  /* finalize the client */
  result = PAMI_Client_destroy( &client );
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_destroy");

  status = MPI_Barrier(MPI_COMM_WORLD);
  TEST_ASSERT(result == MPI_SUCCESS,"MPI_Barrier");

  MPI_Finalize();

  if (world_rank==0)
    printf("%ld: end of test \n", world_rank );
  fflush(stdout);

  return 0;
}
Example #2
0
static inline int
MPIDI_Put_use_pami_put(pami_context_t   context, MPIDI_Win_request * req,int *freed)
{
  pami_result_t rc;
  void  *map;
  pami_put_simple_t params;

  params = zero_put_parms;

  params.rma.dest=req->dest;
  params.rma.hints.use_rdma          = PAMI_HINT_DEFAULT;
#ifndef OUT_OF_ORDER_HANDLING
  params.rma.hints.no_long_header= 1,
#endif
  params.rma.bytes   = 0;
  params.rma.cookie  = req;
  params.rma.done_fn = NULL;
  params.addr.local=req->buffer;
  params.addr.remote=req->win->mpid.info[req->target.rank].base_addr;
  params.put.rdone_fn= MPIDI_Win_DoneCB;

  struct MPIDI_Win_sync* sync = &req->win->mpid.sync;
  TRACE_ERR("Start       index=%u/%d  l-addr=%p  r-base=%p  r-offset=%zu (sync->started=%u  sync->complete=%u)\n",
	    req->state.index, req->target.dt.num_contig, req->buffer, req->win->mpid.info[req->target.rank].base_addr, req->offset, sync->started, sync->complete);
  while (req->state.index < req->target.dt.num_contig) {
    if (sync->started > sync->complete + MPIDI_Process.rma_pending)
      {
	TRACE_ERR("Bailing out;  index=%u/%d  sync->started=%u  sync->complete=%u\n",
		  req->state.index, req->target.dt.num_contig, sync->started, sync->complete);
	return PAMI_EAGAIN;
      }
    ++sync->started;


    params.rma.bytes          =                       req->target.dt.map[req->state.index].DLOOP_VECTOR_LEN;
    params.addr.local          = req->buffer+req->state.local_offset;
    params.addr.remote         = req->win->mpid.info[req->target.rank].base_addr+ req->offset + (size_t)req->target.dt.map[req->state.index].DLOOP_VECTOR_BUF;
#ifdef TRACE_ON
    unsigned* buf = (unsigned*)(req->buffer + req->state.local_offset);
#endif
    TRACE_ERR("  Sub     index=%u  bytes=%zu  l-offset=%zu  r-offset=%zu  buf=%p  *(int*)buf=0x%08x\n",
	      req->state.index, params.rma.bytes, params.addr.local, params.addr.remote, buf, *buf);

    /** sync->total will be updated with every RMA and the complete
	will not change till that RMA has completed. In the meanwhile
	the rest of the RMAs will have memory leaks */
    if (req->target.dt.num_contig - req->state.index == 1) {
        map=NULL;
        if (req->target.dt.map != &req->target.dt.__map) {
            map=(void *) req->target.dt.map;
        }
        rc = PAMI_Put(context, &params);
        MPID_assert(rc == PAMI_SUCCESS);
        if (map) {
            MPIU_Free(map);
        }
        *freed=1;
        return PAMI_SUCCESS;
    } else {
        rc = PAMI_Put(context, &params);
        MPID_assert(rc == PAMI_SUCCESS);
        req->state.local_offset += params.rma.bytes;
        ++req->state.index;
    }
  }
  return PAMI_SUCCESS;
}