Esempio n. 1
0
static inline int
MPIDI_Get_use_pami_get(pami_context_t context, MPIDI_Win_request * req)
{
  pami_result_t rc;
  pami_get_simple_t params;

  params=zero_get_parms;

  params.rma.dest=req->dest;
  params.rma.hints.use_rdma          = PAMI_HINT_DEFAULT;
#ifndef OUT_OF_ORDER_HANDLING
  params.rma.hints.no_long_header= 1,
#endif
  params.rma.bytes   = 0;
  params.rma.cookie  = req;
  params.rma.done_fn = MPIDI_Win_DoneCB;
  params.addr.local=req->buffer;
  params.addr.remote= req->win->mpid.info[req->target.rank].base_addr;

  struct MPIDI_Win_sync* sync = &req->win->mpid.sync;
  TRACE_ERR("Start       index=%u/%d  l-addr=%p  r-base=%p  r-offset=%zu (sync->started=%u  sync->complete=%u)\n",
	    req->state.index, req->target.dt.num_contig, req->buffer, req->win->mpid.info[req->target.rank].base_addr, req->offset, sync->started, sync->complete);
  while (req->state.index < req->target.dt.num_contig) {
    if (sync->started > sync->complete + MPIDI_Process.rma_pending)
      {
	TRACE_ERR("Bailing out;  index=%u/%d  sync->started=%u  sync->complete=%u\n",
		  req->state.index, req->target.dt.num_contig, sync->started, sync->complete);
	return PAMI_EAGAIN;
      }
    ++sync->started;


    params.rma.bytes          =                       req->target.dt.map[req->state.index].DLOOP_VECTOR_LEN;
    params.addr.local          = req->buffer+req->state.local_offset;
    params.addr.remote         = req->win->mpid.info[req->target.rank].base_addr+ req->offset + (size_t)req->target.dt.map[req->state.index].DLOOP_VECTOR_BUF;

#ifdef TRACE_ON
    unsigned* buf = (unsigned*)(req->buffer + params.rdma.local.offset);
#endif
    TRACE_ERR("  Sub     index=%u  bytes=%zu  l-offset=%zu  r-offset=%zu  buf=%p  *(int*)buf=0x%08x\n",
	      req->state.index, params.rma.bytes, params.rdma.local.offset, params.rdma.remote.offset, buf, *buf);
    
    /** sync->total will be updated with every RMA and the complete
	will not change till that RMA has completed. In the meanwhile
	the rest of the RMAs will have memory leaks */
    if (req->target.dt.num_contig - req->state.index == 1) {
        rc = PAMI_Get(context, &params);
        MPID_assert(rc == PAMI_SUCCESS);
        return PAMI_SUCCESS;
    } else {
        rc = PAMI_Get(context, &params);
        MPID_assert(rc == PAMI_SUCCESS);
        req->state.local_offset += params.rma.bytes;
        ++req->state.index;
    }
  }
  return PAMI_SUCCESS;
}
Esempio n. 2
0
static void dispatch_rts (
    pami_context_t       context,      /**< IN: PAMI context */
    void               * cookie,       /**< IN: dispatch cookie */
    const void         * header_addr,  /**< IN: header address */
    size_t               header_size,  /**< IN: header size */
    const void         * pipe_addr,    /**< IN: address of PAMI pipe buffer */
    size_t               pipe_size,    /**< IN: size of PAMI pipe buffer */
    pami_endpoint_t origin,
pami_recv_t        * recv)        /**< OUT: receive message structure */
{
  volatile size_t * active = (volatile size_t *) cookie;
  fprintf (stderr, ">> 'rts' dispatch function.  cookie = %p (active: %zu), header_size = %zu, pipe_size = %zu, recv = %p\n", cookie, *active, header_size, pipe_size, recv);

  rts_info_t * rts = (rts_info_t *) header_addr;
  fprintf (stderr, "   'rts' dispatch function.  rts->origin = 0x%08x, rts->bytes = %zu, rts->source = %p\n", rts->origin, rts->bytes, rts->source);

  size_t pad  = BUFFERSIZE;
  get_info_t * get = (get_info_t *) malloc (sizeof(get_info_t));
  get->value  = active;
  get->origin = rts->origin;
  get->bytes  = rts->bytes;
  get->pad    = pad;

  initialize_data (get->buffer, 0, 6);
  print_data (get->buffer, 12*4);

  pami_get_simple_t parameters;
  parameters.rma.dest    = rts->origin;
  parameters.rma.hints   = null_send_hint; 
  parameters.rma.bytes   = rts->bytes;
  parameters.rma.cookie  = get;
  parameters.rma.done_fn = get_done;
  parameters.addr.local  = (void *) (get->buffer+4);
  parameters.addr.remote = rts->source;
  PAMI_Get (context, &parameters);

  fprintf (stderr, "<< 'rts' dispatch function.\n");

  return;
}
Esempio n. 3
0
int main(int argc, char* argv[])
{
    pami_result_t result = PAMI_ERROR;

    /* initialize the second client */
    char * clientname = "";
    pami_client_t client;
    result = PAMI_Client_create(clientname, &client, NULL, 0);
    TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_create");

    /* query properties of the client */
    pami_configuration_t config[3];
    size_t num_contexts;

    config[0].name = PAMI_CLIENT_NUM_TASKS;
    config[1].name = PAMI_CLIENT_TASK_ID;
    config[2].name = PAMI_CLIENT_NUM_CONTEXTS;
    result = PAMI_Client_query(client, config, 3);
    TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_query");
    world_size   = config[0].value.intval;
    world_rank   = config[1].value.intval;
    num_contexts = config[2].value.intval;
    TEST_ASSERT(num_contexts>1,"num_contexts>1");

    if (world_rank==0)
    {
        printf("hello world from rank %ld of %ld \n", world_rank, world_size );
        fflush(stdout);
    }

    /* initialize the contexts */
    contexts = (pami_context_t *) safemalloc( num_contexts * sizeof(pami_context_t) );

    result = PAMI_Context_createv( client, NULL, 0, contexts, num_contexts );
    TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_createv");

    /* setup the world geometry */
    pami_geometry_t world_geometry;
    result = PAMI_Geometry_world(client, &world_geometry );
    TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Geometry_world");

    int status = pthread_create(&Progress_thread, NULL, &Progress_function, NULL);
    TEST_ASSERT(status==0, "pthread_create");

    /************************************************************************/

    int n = (argc>1 ? atoi(argv[1]) : 1000000);

    size_t bytes = n * sizeof(int);
    int *  shared = (int *) safemalloc(bytes);
    for (int i=0; i<n; i++)
        shared[i] = world_rank;

    int *  local  = (int *) safemalloc(bytes);
    for (int i=0; i<n; i++)
        local[i] = -1;

    result = barrier(world_geometry, contexts[0]);
    TEST_ASSERT(result == PAMI_SUCCESS,"barrier");

    int ** shptrs = (int **) safemalloc( world_size * sizeof(int *) );

    result = allgather(world_geometry, contexts[0], sizeof(int*), &shared, shptrs);
    TEST_ASSERT(result == PAMI_SUCCESS,"allgather");

    int target = (world_rank>0 ? world_rank-1 : world_size-1);
    pami_endpoint_t target_ep;
    result = PAMI_Endpoint_create(client, (pami_task_t) target, 1, &target_ep);
    TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Endpoint_create");

    result = barrier(world_geometry, contexts[0]);
    TEST_ASSERT(result == PAMI_SUCCESS,"barrier");

    int active = 1;
    pami_get_simple_t parameters;
    parameters.rma.dest     = target_ep;
    //parameters.rma.hints    = ;
    parameters.rma.bytes    = bytes;
    parameters.rma.cookie   = &active;
    parameters.rma.done_fn  = cb_done;
    parameters.addr.local   = local;
    parameters.addr.remote  = shptrs[target];

    uint64_t t0 = GetTimeBase();

    result = PAMI_Get(contexts[0], &parameters);
    TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Rget");

    while (active)
    {
        //result = PAMI_Context_advance( contexts[0], 100);
        //TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_advance");
        result = PAMI_Context_trylock_advancev(&(contexts[0]), 1, 1000);
        TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_trylock_advancev");
    }

    uint64_t t1 = GetTimeBase();
    uint64_t dt = t1-t0;

    /* barrier on non-progressing context to make sure CHT does its job */
    barrier(world_geometry, contexts[0]);

    printf("%ld: PAMI_Get of %ld bytes achieves %lf MB/s \n", (long)world_rank, bytes, 1.6e9*1e-6*(double)bytes/(double)dt );
    fflush(stdout);

    int errors = 0;

    //target = (world_rank<(world_size-1) ? world_rank+1 : 0);
    target = (world_rank>0 ? world_rank-1 : world_size-1);
    for (int i=0; i<n; i++)
        if (local[i] != target)
            errors++;

    if (errors>0)
        for (int i=0; i<n; i++)
            if (local[i] != target)
                printf("%ld: local[%d] = %d (%d) \n", (long)world_rank, i, local[i], target);
            else
                printf("%ld: no errors :-) \n", (long)world_rank);

    fflush(stdout);

    result = barrier(world_geometry, contexts[0]);
    TEST_ASSERT(result == PAMI_SUCCESS,"barrier");

    free(shptrs);
    free(local);
    free(shared);

    /************************************************************************/

    void * rv;

    status = pthread_cancel(Progress_thread);
    TEST_ASSERT(status==0, "pthread_cancel");

    status = pthread_join(Progress_thread, &rv);
    TEST_ASSERT(status==0, "pthread_join");

    result = barrier(world_geometry, contexts[0]);
    TEST_ASSERT(result == PAMI_SUCCESS,"barrier");

    /* finalize the contexts */
    result = PAMI_Context_destroyv( contexts, num_contexts );
    TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_destroyv");

    free(contexts);

    /* finalize the client */
    result = PAMI_Client_destroy( &client );
    TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_destroy");

    if (world_rank==0)
        printf("%ld: end of test \n", world_rank );
    fflush(stdout);

    return 0;
}