예제 #1
0
pami_result_t MPIDI_Pami_post_wrapper(pami_context_t context, void *cookie)
{
   TRACE_ERR("In post wrapper\n");
   TRACE_ERR("About to call collecive\n");
   PAMI_Collective(context, (pami_xfer_t *)cookie);
   TRACE_ERR("Done calling collective\n");
   return PAMI_SUCCESS;
}
예제 #2
0
static void
gasnete_coll_pami_scatt(const gasnet_team_handle_t team, void *dst,
                        gasnet_image_t srcimage, const void *src,
                        size_t nbytes, int flags GASNETI_THREAD_FARG)
{
    const int i_am_root = gasnete_coll_image_is_local(team, srcimage);

  #if GASNET_PAR
    int i_am_leader = gasnete_coll_pami_images_barrier(team); /* XXX: over-synced for IN_NO and IN_MY */

    if ((flags & GASNET_COLL_LOCAL) && i_am_root) {
        /* root thread must be leader for its node */
        const gasnete_coll_threaddata_t * const td = GASNETE_COLL_MYTHREAD_NOALLOC;
        i_am_leader = (srcimage == td->my_image);
    }
  #else
    const int i_am_leader = 1;
  #endif

    if (i_am_leader) {
        volatile unsigned int done = 0;
        pami_result_t rc;
        pami_xfer_t op;

        if (flags & GASNET_COLL_IN_ALLSYNC) gasnetc_fast_barrier();

        op = gasnete_op_template_scatt;
        op.cookie = (void *)&done;
        op.algorithm = team->pami.scatt_alg;
        op.cmd.xfer_scatter.root = gasnetc_endpoint(GASNETE_COLL_REL2ACT(team,gasnete_coll_image_node(team, srcimage)));
        op.cmd.xfer_scatter.sndbuf = (/*not-const*/ void *)src;
        op.cmd.xfer_scatter.stypecount = nbytes;
        op.cmd.xfer_scatter.rcvbuf = dst;
        op.cmd.xfer_scatter.rtypecount = nbytes;

        GASNETC_PAMI_LOCK(gasnetc_context);
        rc = PAMI_Collective(gasnetc_context, &op);
        GASNETC_PAMI_UNLOCK(gasnetc_context);
        GASNETC_PAMI_CHECK(rc, "initiating blocking scatter");

        gasneti_polluntil(done);
    }
      
    if (flags & GASNET_COLL_OUT_ALLSYNC) {
        if (i_am_leader) gasnetc_fast_barrier();
        (void) gasnete_coll_pami_images_barrier(team);
    }
}
예제 #3
0
static void native_collective(pami_xfer_t *op_p, int need_lock) {
  pami_result_t rc;
  volatile unsigned int counter = 0;

  op_p->cb_done = &gasnetc_cb_inc_uint;
  op_p->cookie = (void *)&counter;
  op_p->options.multicontext = PAMI_HINT_DISABLE;

  if (need_lock) GASNETC_PAMI_LOCK(gasnetc_context);
  rc = PAMI_Collective(gasnetc_context, op_p);
  GASNETC_PAMI_CHECK(rc, "initiating a native collective");
  if (need_lock) GASNETC_PAMI_UNLOCK(gasnetc_context);

  if (gasneti_attach_done) {
    gasneti_polluntil(counter);
  } else {
    rc = gasnetc_wait_uint(gasnetc_context, &counter, 1);
    GASNETC_PAMI_CHECK(rc, "polling a native collective");
  }
}
예제 #4
0
/**
 * \brief Blocking 'world geometry' barrier
 *
 * This function is provided for illustrative purposes only. One would never
 * include the retrieval of the world geometry and the query of the barrier
 * algorithm in a performance critical code.
 *
 * \param[in] client  The PAMI client; needed to obtain the geometry
 * \param[in] context The PAMI context; used for the barrier communication
 */
void simple_barrier (pami_client_t client, pami_context_t context)
{
  pami_result_t    result;
  pami_geometry_t  world_geometry;
  pami_xfer_t      xfer;
  pami_algorithm_t algorithm;
  pami_metadata_t  metadata;

  /* Retrieve the PAMI 'world' geometry */
  result = PAMI_ERROR;
  result = PAMI_Geometry_world (client, &world_geometry);
  assert (result == PAMI_SUCCESS);

  /* Query the 'always works' barrier algorithm in the geometry */
  result = PAMI_ERROR;
  result = PAMI_Geometry_algorithms_query (world_geometry, PAMI_XFER_BARRIER,
                                           &algorithm, &metadata, 1,
                                           NULL, NULL, 0);
  assert (result == PAMI_SUCCESS);

  /* Set up the barrier */
  volatile unsigned active = 1;
  xfer.cb_done   = simple_barrier_decrement;
  xfer.cookie    = (void *) & active;
  xfer.algorithm = algorithm;

  /* Issue the barrier collective */
  result = PAMI_ERROR;
  result = PAMI_Collective (context, &xfer);
  assert (result == PAMI_SUCCESS);

  /* Advance until the barrier has completed */
  while (active)
  {
    result = PAMI_ERROR;
    result = PAMI_Context_advance (context, 1);
    assert (result == PAMI_SUCCESS);
  }

  return;
}
static void
gasnete_coll_pami_allga(const gasnet_team_handle_t team,
                        void *dst, const void *src,
                        size_t nbytes, int flags GASNETE_THREAD_FARG)
{
  #if GASNET_PAR
    int i_am_leader = gasnete_coll_pami_images_barrier(team); /* XXX: over-synced for IN_NO and IN_MY */
  #else
    const int i_am_leader = 1;
  #endif

    if (i_am_leader) {
        volatile unsigned int done = 0;
        pami_result_t rc;
        pami_xfer_t op;

        if (flags & GASNET_COLL_IN_ALLSYNC) gasnetc_fast_barrier();

        op = gasnete_op_template_allga;
        op.cookie = (void *)&done;
        op.algorithm = team->pami.allga_alg;
        op.cmd.xfer_allgather.sndbuf = (/*not-const*/ void *)src;
        op.cmd.xfer_allgather.stypecount = nbytes;
        op.cmd.xfer_allgather.rcvbuf = dst;
        op.cmd.xfer_allgather.rtypecount = nbytes;

        GASNETC_PAMI_LOCK(gasnetc_context);
        rc = PAMI_Collective(gasnetc_context, &op);
        GASNETC_PAMI_UNLOCK(gasnetc_context);
        GASNETC_PAMI_CHECK(rc, "initiating blocking allgather");

        gasneti_polluntil(done);
    }
      
    if (flags & GASNET_COLL_OUT_ALLSYNC) {
        if (i_am_leader) gasnetc_fast_barrier();
        (void) gasnete_coll_pami_images_barrier(team);
    }
}
예제 #6
0
int main(int argc, char* argv[])
{
  pami_result_t result = PAMI_ERROR;

  if (Kernel_GetRank()==0)
    print_meminfo(stdout, "before PAMI_Client_create");

  /* initialize the client */
  char * clientname = "";
  pami_client_t client;
  result = PAMI_Client_create( clientname, &client, NULL, 0 );
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_create");

  if (Kernel_GetRank()==0)
    print_meminfo(stdout, "after PAMI_Client_create");

  /* query properties of the client */
  pami_configuration_t config;
  size_t num_contexts;

  config.name = PAMI_CLIENT_TASK_ID;
  result = PAMI_Client_query( client, &config, 1);
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_query");
  world_rank = config.value.intval;

  config.name = PAMI_CLIENT_NUM_TASKS;
  result = PAMI_Client_query( client, &config, 1);
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_query");
  world_size = config.value.intval;

  if ( world_rank == 0 ) 
  {
    printf("starting test on %ld ranks \n", world_size);
    fflush(stdout);
  }

  config.name = PAMI_CLIENT_PROCESSOR_NAME;
  result = PAMI_Client_query( client, &config, 1);
  assert(result == PAMI_SUCCESS);
  //printf("rank %ld is processor %s \n", world_rank, config.value.chararray);
  //fflush(stdout);

  config.name = PAMI_CLIENT_NUM_CONTEXTS;
  result = PAMI_Client_query( client, &config, 1);
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_query");
  num_contexts = config.value.intval;

  /* initialize the contexts */
  pami_context_t * contexts = NULL;
  contexts = (pami_context_t *) malloc( num_contexts * sizeof(pami_context_t) ); assert(contexts!=NULL);

  if (Kernel_GetRank()==0)
    fprintf(stdout, "num_contexts = %ld \n", (long)num_contexts);

  result = PAMI_Context_createv( client, &config, 0, contexts, num_contexts );
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_createv");

  if (Kernel_GetRank()==0)
    print_meminfo(stdout, "after PAMI_Context_createv");

  /* setup the world geometry */
  pami_geometry_t world_geometry;
  pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER;
  size_t num_alg[2];
  pami_algorithm_t * safe_barrier_algs = NULL;
  pami_metadata_t  * safe_barrier_meta = NULL;
  pami_algorithm_t * fast_barrier_algs = NULL;
  pami_metadata_t  * fast_barrier_meta = NULL;

  result = PAMI_Geometry_world( client, &world_geometry );
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Geometry_world");

  if (Kernel_GetRank()==0)
    print_meminfo(stdout, "after PAMI_Geometry_world");

  result = PAMI_Geometry_algorithms_num( world_geometry, barrier_xfer, num_alg );
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Geometry_algorithms_num");
  if ( world_rank == 0 ) printf("number of barrier algorithms = {%ld,%ld} \n", num_alg[0], num_alg[1] );

  if (Kernel_GetRank()==0)
    print_meminfo(stdout, "after PAMI_Geometry_algorithms_num");

  safe_barrier_algs = (pami_algorithm_t *) malloc( num_alg[0] * sizeof(pami_algorithm_t) ); assert(safe_barrier_algs!=NULL);
  safe_barrier_meta = (pami_metadata_t  *) malloc( num_alg[0] * sizeof(pami_metadata_t)  ); assert(safe_barrier_meta!=NULL);
  fast_barrier_algs = (pami_algorithm_t *) malloc( num_alg[1] * sizeof(pami_algorithm_t) ); assert(fast_barrier_algs!=NULL);
  fast_barrier_meta = (pami_metadata_t  *) malloc( num_alg[1] * sizeof(pami_metadata_t)  ); assert(fast_barrier_meta!=NULL);
  result = PAMI_Geometry_algorithms_query( world_geometry, barrier_xfer,
                                           safe_barrier_algs, safe_barrier_meta, num_alg[0],
                                           fast_barrier_algs, fast_barrier_meta, num_alg[1] );
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Geometry_algorithms_query");

  if (Kernel_GetRank()==0)
    print_meminfo(stdout, "after PAMI_Geometry_algorithms_query");

  /* perform a barrier */
  size_t b;
  pami_xfer_t barrier;
  volatile int active = 0;

  for ( b = 0 ; b < num_alg[0] ; b++ )
  {
      barrier.cb_done   = cb_done;
      barrier.cookie    = (void*) &active;
      barrier.algorithm = safe_barrier_algs[b];

      uint64_t t0 = GetTimeBase();
      active = 1;
      result = PAMI_Collective( contexts[0], &barrier );
      TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Collective - barrier");
      while (active)
        result = PAMI_Context_advance( contexts[0], 1 );
      TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_advance - barrier");
      uint64_t t1 = GetTimeBase();

      if ( world_rank == 0 ) printf("safe barrier algorithm %ld (%s) - took %llu cycles \n", b, safe_barrier_meta[b].name, (long long unsigned int)t1-t0 );
      fflush(stdout);
  }
  for ( b = 0 ; b < num_alg[1] ; b++ )
  {
      barrier.cb_done   = cb_done;
      barrier.cookie    = (void*) &active;
      barrier.algorithm = fast_barrier_algs[b];

      uint64_t t0 = GetTimeBase();
      active = 1;
      result = PAMI_Collective( contexts[0], &barrier );
      TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Collective - barrier");
      while (active)
        result = PAMI_Context_advance( contexts[0], 1 );
      TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_advance - barrier");
      uint64_t t1 = GetTimeBase();

      if ( world_rank == 0 ) printf("fast barrier algorithm %ld (%s) - took %llu cycles \n", b, fast_barrier_meta[b].name, (long long unsigned int)t1-t0 );
      fflush(stdout);
  }

  if (Kernel_GetRank()==0)
    print_meminfo(stdout, "after barrier tests");

  /* finalize the contexts */
  result = PAMI_Context_destroyv( contexts, num_contexts );
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_destroyv");

  free(contexts);

  if (Kernel_GetRank()==0)
    print_meminfo(stdout, "before PAMI_Client_destroy");

  /* finalize the client */
  result = PAMI_Client_destroy( &client );
  TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_destroy");

  if (Kernel_GetRank()==0)
    print_meminfo(stdout, "after PAMI_Client_destroy");

  if ( world_rank == 0 ) 
  {
    printf("end of test \n");
    fflush(stdout);
  }

  return 0;
}
예제 #7
0
int main(int argc, char*argv[])
{
  pami_client_t        client;
  pami_context_t      *context;
  pami_geometry_t      world_geometry;
  pami_task_t root_task = 0;

  /* Barrier variables */
  size_t               barrier_num_algorithm[2];
  pami_algorithm_t    *bar_always_works_algo = NULL;
  pami_metadata_t     *bar_always_works_md   = NULL;
  pami_algorithm_t    *bar_must_query_algo   = NULL;
  pami_metadata_t     *bar_must_query_md     = NULL;
  pami_xfer_type_t     barrier_xfer = PAMI_XFER_BARRIER;
  pami_xfer_t          barrier;
  volatile unsigned    bar_poll_flag = 0;

  /* Amscatter variables */
  size_t               amscatter_num_algorithm[2];
  pami_algorithm_t    *amscatter_always_works_algo = NULL;
  pami_metadata_t     *amscatter_always_works_md = NULL;
  pami_algorithm_t    *amscatter_must_query_algo = NULL;
  pami_metadata_t     *amscatter_must_query_md = NULL;
  pami_xfer_type_t     amscatter_xfer = PAMI_XFER_AMSCATTER;
  pami_xfer_t          amscatter;
  volatile unsigned    amscatter_total_count = 0;

  int                  nalg = 0, i;
  double               ti, tf, usec;

  /* Process environment variables and setup globals */
  setup_env();

  assert(gNum_contexts > 0);
  context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts);

  /* \note Test environment variable" TEST_ROOT=N, defaults to 0.*/
  char* sRoot = getenv("TEST_ROOT");
  /* Override ROOT */
  if (sRoot) root_task = (pami_task_t) atoi(sRoot);

  /*  Initialize PAMI */
  int rc = pami_init(&client,        /* Client             */
                     context,        /* Context            */
                     NULL,           /* Clientname=default */
                     &gNum_contexts, /* gNum_contexts      */
                     NULL,           /* null configuration */
                     0,              /* no configuration   */
                     &my_task_id,    /* task id            */
                     &num_tasks);    /* number of tasks    */

  if (rc == 1)
    return 1;

  if (gNumRoots > num_tasks) gNumRoots = num_tasks;

  /*  Allocate buffer(s) */
  int err = 0;
  void *sbuf = NULL;
  err = posix_memalign(&sbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset);
  assert(err == 0);
  sbuf = (char*)sbuf + gBuffer_offset;

  void* rbuf = NULL;
  err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset);
  assert(err == 0);
  rbuf = (char*)rbuf + gBuffer_offset;

  void *headers = NULL;
  err = posix_memalign((void **)&headers, 128, (num_tasks * sizeof(user_header_t)) + gBuffer_offset);
  headers = (char*)headers + gBuffer_offset;

  void *validation = NULL;
  err = posix_memalign((void **)&validation, 128, (num_tasks * sizeof(validation_t)) + gBuffer_offset);
  validation = (char*)validation + gBuffer_offset;

  /* Initialize the headers */
  for(i = 0; i < num_tasks; ++i)
  {
    ((user_header_t *)headers)[i].dst_rank = i;
  }

  unsigned iContext = 0;
  for (; iContext < gNum_contexts; ++iContext)
  {
    if (my_task_id == 0)
      printf("# Context: %u\n", iContext);

    /*  Query the world geometry for barrier algorithms */
    rc |= query_geometry_world(client,
                               context[iContext],
                               &world_geometry,
                               barrier_xfer,
                               barrier_num_algorithm,
                               &bar_always_works_algo,
                               &bar_always_works_md,
                               &bar_must_query_algo,
                               &bar_must_query_md);

    if (rc == 1)
      return 1;

    /*  Query the world geometry for amscatter algorithms */
    rc |= query_geometry_world(client,
                               context[iContext],
                               &world_geometry,
                               amscatter_xfer,
                               amscatter_num_algorithm,
                               &amscatter_always_works_algo,
                               &amscatter_always_works_md,
                               &amscatter_must_query_algo,
                               &amscatter_must_query_md);

    if (rc == 1)
      return 1;

    _g_recv_buffer = rbuf;
    _g_send_buffer = sbuf;
    _g_val_buffer  = validation;


    barrier.cb_done     = cb_done;
    barrier.cookie      = (void*) & bar_poll_flag;
    barrier.algorithm   = bar_always_works_algo[0];
    blocking_coll(context[iContext], &barrier, &bar_poll_flag);


    amscatter.algorithm = amscatter_always_works_algo[0];
    amscatter.cmd.xfer_amscatter.headers      = headers;
    amscatter.cmd.xfer_amscatter.headerlen    = sizeof(user_header_t);
    amscatter.cmd.xfer_amscatter.sndbuf       = sbuf;
    amscatter.cmd.xfer_amscatter.stype        = PAMI_TYPE_BYTE;
    amscatter.cmd.xfer_amscatter.stypecount   = 0;

    for (nalg = 0; nalg < amscatter_num_algorithm[0]; nalg++)
    {
      gProtocolName = amscatter_always_works_md[nalg].name;

      if (my_task_id == root_task)
      {
        printf("# AMScatter Bandwidth Test(size:%zu) -- context = %d, root = %d, protocol: %s\n",num_tasks,
               iContext, root_task, amscatter_always_works_md[nalg].name);
        printf("# Size(bytes)      iterations     bytes/sec      usec\n");
        printf("# -----------      -----------    -----------    ---------\n");
        fflush(stdout);
      }

      if (((strstr(amscatter_always_works_md[nalg].name,gSelected) == NULL) && gSelector) ||
          ((strstr(amscatter_always_works_md[nalg].name,gSelected) != NULL) && !gSelector))  continue;

      int j;
      pami_collective_hint_t h = {0};
      pami_dispatch_callback_function fn;
      lgContext = context[iContext];
      fn.amscatter = cb_amscatter_recv;
      PAMI_AMCollective_dispatch_set(context[iContext],
                                     amscatter_always_works_algo[nalg],
                                     root_task,/* Set the dispatch id, can be any arbitrary value */
                                     fn,
                                     (void*) &amscatter_total_count,
                                     h);
      amscatter.cmd.xfer_amscatter.dispatch = root_task;
      amscatter.algorithm = amscatter_always_works_algo[nalg];

      volatile unsigned *nscatter = &amscatter_total_count;
      for (i = gMin_byte_count; i <= gMax_byte_count; i *= 2)
      {
        size_t  dataSent = i;
        int     niter;
        pami_result_t result;

        if (dataSent < CUTOFF)
          niter = gNiterlat;
        else
          niter = NITERBW;

        *nscatter = 0;
        memset(rbuf, 0xFF, i);
        scatter_initialize_sndbuf (sbuf, i, num_tasks);

        blocking_coll(context[iContext], &barrier, &bar_poll_flag);
        ti = timer();

        for (j = 0; j < niter; j++)
        {
          root_task = (root_task + num_tasks - 1) % num_tasks;
          if (my_task_id == root_task)
          {
            amscatter.cmd.xfer_amscatter.stypecount = i;
            result = PAMI_Collective(context[iContext], &amscatter);
            if (result != PAMI_SUCCESS)
              {
                fprintf (stderr, "Error. Unable to issue collective. result = %d\n", result);
                return 1;
              }
          }
          while (*nscatter <= j)
            result = PAMI_Context_advance (context[iContext], 1);

          rc |= _gRc; /* validation return code done in cb_amscatter_done */
        }

        assert(*nscatter == niter);
        tf = timer();
        blocking_coll(context[iContext], &barrier, &bar_poll_flag);

        usec = (tf - ti) / (double)niter;
        if(my_task_id == root_task)
        {
          printf("  %11lld %16d %14.1f %12.2f\n",
                 (long long)dataSent,
                 niter,
                 (double)1e6*(double)dataSent / (double)usec,
                 usec);
          fflush(stdout);
        }
      }
      lgContext = NULL;
    }
    free(bar_always_works_algo);
    free(bar_always_works_md);
    free(bar_must_query_algo);
    free(bar_must_query_md);
    free(amscatter_always_works_algo);
    free(amscatter_always_works_md);
    free(amscatter_must_query_algo);
    free(amscatter_must_query_md);
  } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/

  sbuf = (char*)sbuf - gBuffer_offset;
  free(sbuf);

  rbuf = (char*)rbuf - gBuffer_offset;
  free(rbuf);

  headers = (char*)headers - gBuffer_offset;
  free(headers);

  validation = (char*)validation - gBuffer_offset;
  free(validation);

  rc |= pami_shutdown(&client, context, &gNum_contexts);
  return rc;
}
예제 #8
0
int main(int argc, char ** argv)
{
    pami_client_t         client;
    pami_context_t        context;
    pami_result_t         status = PAMI_ERROR;
    pami_configuration_t  pami_config;
    pami_geometry_t       world_geo;
    size_t                barrier_alg_num[2];
    pami_algorithm_t*     bar_always_works_algo = NULL;
    pami_metadata_t*      bar_always_works_md = NULL;
    pami_algorithm_t*     bar_must_query_algo = NULL;
    pami_metadata_t*      bar_must_query_md   = NULL;
    pami_xfer_t           barrier;
    int                   my_id;
    volatile int          is_fence_done   = 0;
    volatile int          is_barrier_done = 0;

    /* create PAMI client */
    RC( PAMI_Client_create("TEST", &client, NULL, 0) );
    DBG_FPRINTF((stderr,"Client created successfully at 0x%p\n",client));

    /* create PAMI context */
    RC( PAMI_Context_createv(client, NULL, 0, &context, 1) );
    DBG_FPRINTF((stderr,"Context created successfully at 0x%p\n",context));

    /* query my task id */
    bzero(&pami_config, sizeof(pami_configuration_t));
    pami_config.name = PAMI_CLIENT_TASK_ID;
    RC( PAMI_Client_query(client, &pami_config, 1) );
    my_id = pami_config.value.intval;
    DBG_FPRINTF((stderr,"My task id is %d\n", my_id));

    /* get the world geometry */
    RC( PAMI_Geometry_world(client, &world_geo) );
    DBG_FPRINTF((stderr,"World geometry is at 0x%p\n",world_geo));

    /* query number of barrier algorithms */
    RC( PAMI_Geometry_algorithms_num(world_geo, PAMI_XFER_BARRIER, 
                barrier_alg_num) );
    DBG_FPRINTF((stderr,"%d-%d algorithms are available for barrier op\n",
                barrier_alg_num[0], barrier_alg_num[1]));
    if (barrier_alg_num[0] <= 0) {
        fprintf (stderr, "Error. No (%lu) algorithm is available for barrier op\n",
                barrier_alg_num[0]);
        return 1;
    }

    /* query barrier algorithm list */
    bar_always_works_algo =
        (pami_algorithm_t*)malloc(sizeof(pami_algorithm_t)*barrier_alg_num[0]);
    bar_always_works_md =
        (pami_metadata_t*)malloc(sizeof(pami_metadata_t)*barrier_alg_num[0]);
    bar_must_query_algo =
        (pami_algorithm_t*)malloc(sizeof(pami_algorithm_t)*barrier_alg_num[1]);
    bar_must_query_md =
        (pami_metadata_t*)malloc(sizeof(pami_metadata_t)*barrier_alg_num[1]);

    RC( PAMI_Geometry_algorithms_query(world_geo, PAMI_XFER_BARRIER,
                bar_always_works_algo, bar_always_works_md, barrier_alg_num[0],
                bar_must_query_algo, bar_must_query_md, barrier_alg_num[1]) );
    DBG_FPRINTF((stderr,"Algorithm [%s] at 0x%p will be used for barrier op\n",
                bar_always_works_md[0].name, bar_always_works_algo[0]));

    /* begin PAMI fence */
    RC( PAMI_Fence_begin(context) );
    DBG_FPRINTF((stderr,"PAMI fence begins\n"));

    /* ------------------------------------------------------------------------ */

    pami_extension_t          extension;
    const char                ext_name[] = "EXT_hfi_extension";
    const char                sym_name[] = "hfi_remote_update";
    hfi_remote_update_fn      remote_update = NULL;
    hfi_remote_update_info_t  remote_info;
    pami_memregion_t          mem_region;
    size_t                    mem_region_sz = 0;
    unsigned long long        operand = 1234;
    unsigned long long        orig_val = 0;
    int                       offset = (operand)%MAX_TABLE_SZ;

    /* initialize table for remote update operation */
    int i;
    for (i = 0; i < MAX_TABLE_SZ; i ++) {
        table[i] = (unsigned long long) i;
    }
    orig_val = table[offset];

    /* open PAMI extension */
    RC( PAMI_Extension_open (client, ext_name, &extension) );
    DBG_FPRINTF((stderr,"Open %s successfully.\n", ext_name));

    /* load PAMI extension function */
    remote_update = (hfi_remote_update_fn) 
        PAMI_Extension_symbol (extension, sym_name);
    if (remote_update == (void *)NULL)
    {
        fprintf (stderr, "Error. Failed to load %s function in %s\n",
                 sym_name, ext_name); 
        return 1;
    } else {
        DBG_FPRINTF((stderr,"Loaded function %s in %s successfully.\n", 
                    sym_name, ext_name));
    }

    /* create a memory region for remote update operation */
    RC( PAMI_Memregion_create(context, table, 
                MAX_TABLE_SZ*sizeof(unsigned long long),
                &mem_region_sz, &mem_region) );
    DBG_FPRINTF((stderr,"%d-byte PAMI memory region created successfully.\n",
                mem_region_sz));

    /* perform a PAMI barrier */
    is_barrier_done = 0;
    barrier.cb_done = barrier_done;
    barrier.cookie = (void*)&is_barrier_done;
    barrier.algorithm = bar_always_works_algo[0];
    RC( PAMI_Collective(context, &barrier) );
    DBG_FPRINTF((stderr,"PAMI barrier op invoked successfully.\n"));
    while (is_barrier_done == 0)
        PAMI_Context_advance(context, 1000);
    DBG_FPRINTF((stderr,"PAMI barrier op finished successfully.\n"));

    RC( PAMI_Context_lock(context) );

    /* prepare remote update info */
    remote_info.dest = my_id^1;
    remote_info.op = 0;           /* op_add */
    remote_info.atomic_operand = operand;
    remote_info.dest_buf = (unsigned long long)(&(table[offset]));

    /* invoke remote update PAMI extension function */
    RC( remote_update(context, 1, &remote_info) );
    DBG_FPRINTF((stderr,"Function %s invoked successfully.\n", 
                sym_name));

    RC( PAMI_Context_unlock(context) );

    /* perform a PAMI fence */
    is_fence_done = 0;
    RC( PAMI_Fence_all(context, fence_done, (void*)&is_fence_done) );
    DBG_FPRINTF((stderr,"PAMI_Fence_all invoked successfully.\n")); 
    while (is_fence_done == 0)
        PAMI_Context_advance(context, 1000);
    DBG_FPRINTF((stderr,"PAMI_Fence_all finished successfully.\n")); 

    /* perform a PAMI barrier */
    is_barrier_done = 0;
    barrier.cb_done = barrier_done;
    barrier.cookie = (void*)&is_barrier_done;
    barrier.algorithm = bar_always_works_algo[0];
    RC( PAMI_Collective(context, &barrier) );
    DBG_FPRINTF((stderr,"PAMI barrier op invoked successfully.\n"));
    while (is_barrier_done == 0)
        PAMI_Context_advance(context, 1000);
    DBG_FPRINTF((stderr,"PAMI barrier op finished successfully.\n"));

    /* verify data after remote update operation */
    if (table[offset] != orig_val + operand) {
        printf("Data verification at offset %d with operand %lu failed: "
                "[%lu expected with %lu updated]\n",
                offset, operand, orig_val+operand, table[offset]);
    } else {
        printf("Data verification at offset %d with operand %lu passed: "
                "[%lu expected with %lu updated].\n",
                offset, operand, orig_val+operand, table[offset]);
    }

    /* destroy the memory region after remote update operation */
    RC( PAMI_Memregion_destroy(context, &mem_region) );
    DBG_FPRINTF((stderr,"PAMI memory region removed successfully.\n"));

    /* close PAMI extension */
    RC( PAMI_Extension_close (extension) );
    DBG_FPRINTF((stderr,"Close %s successfully.\n", ext_name));

    /* ------------------------------------------------------------------------ */

    /* end PAMI fence */
    RC( PAMI_Fence_end(context) );
    DBG_FPRINTF((stderr,"PAMI fence ends\n"));

    /* destroy PAMI context */
    RC( PAMI_Context_destroyv(&context, 1) );
    DBG_FPRINTF((stderr, "PAMI context destroyed successfully\n"));

    /* destroy PAMI client */
    RC( PAMI_Client_destroy(&client) );
    DBG_FPRINTF((stderr, "PAMI client destroyed successfully\n"));

    return 0;
}
static void
gasnete_coll_pami_allgavi(const gasnet_team_handle_t team,
                          void *dst, const void *src,
                          size_t nbytes, int flags GASNETE_THREAD_FARG)
{
    int i_am_leader = gasnete_coll_pami_images_barrier(team); /* XXX: over-synced for IN_NO and IN_MY */
    const gasnete_coll_threaddata_t * const td = GASNETE_COLL_MYTHREAD_NOALLOC;

    if (flags & GASNET_COLL_IN_ALLSYNC) {
        if (i_am_leader) gasnetc_fast_barrier();
        (void) gasnete_coll_pami_images_barrier(team);
    }

    GASNETE_FAST_UNALIGNED_MEMCPY(gasnete_coll_scale_ptr(team->pami.scratch_space,
                                                         td->my_local_image,
                                                         nbytes),
                                  src, nbytes);
    (void) gasnete_coll_pami_images_barrier(team);

    if (i_am_leader) {
        volatile unsigned int done = 0;
        pami_result_t rc;
        pami_xfer_t op;

        op = gasnete_op_template_allgavi; /* allgatherv_int */
        op.cookie = (void *)&done;
        op.algorithm = team->pami.allgavi_alg;
        op.cmd.xfer_allgatherv_int.sndbuf = team->pami.scratch_space;
        op.cmd.xfer_allgatherv_int.stypecount = nbytes * team->my_images;

        op.cmd.xfer_allgatherv_int.rcvbuf = dst;
        op.cmd.xfer_allgatherv_int.rtypecounts = team->pami.counts;
        op.cmd.xfer_allgatherv_int.rdispls = team->pami.displs;
        if (team->pami.prev_nbytes != nbytes) {
            int i;
            for (i = 0; i < team->total_ranks; ++i) {
                op.cmd.xfer_allgatherv_int.rtypecounts[i] = nbytes * team->all_images[i];
                op.cmd.xfer_allgatherv_int.rdispls[i] = nbytes * team->all_offset[i];
            }
            team->pami.prev_nbytes = nbytes;
        }

        GASNETC_PAMI_LOCK(gasnetc_context);
        rc = PAMI_Collective(gasnetc_context, &op);
        GASNETC_PAMI_UNLOCK(gasnetc_context);
        GASNETC_PAMI_CHECK(rc, "initiating blocking allgatherv_int");

        gasneti_polluntil(done);

        gasneti_assert(NULL == team->pami.tmp_addr);
        gasneti_sync_writes(); /* XXX: is this necessary? */
        team->pami.tmp_addr = dst; /* wakes pollers, below */
        (void) gasnete_coll_pami_images_barrier(team); /* matches instance below vvvv */
        team->pami.tmp_addr = NULL;
    } else {
        gasneti_waitwhile(NULL == team->pami.tmp_addr);
        GASNETE_FAST_UNALIGNED_MEMCPY(dst, team->pami.tmp_addr, nbytes * team->total_images);
        (void) gasnete_coll_pami_images_barrier(team); /* matches instance above ^^^^ */
    }
      
    if (flags & GASNET_COLL_OUT_ALLSYNC) {
        if (i_am_leader) gasnetc_fast_barrier();
        (void) gasnete_coll_pami_images_barrier(team);
    }
}
예제 #10
0
static void
gasnete_coll_pami_scattvi(const gasnet_team_handle_t team, void *dst,
                          gasnet_image_t srcimage, const void *src,
                          size_t nbytes, int flags GASNETI_THREAD_FARG)
{
    const int i_am_root = gasnete_coll_image_is_local(team, srcimage);
    int i_am_leader = gasnete_coll_pami_images_barrier(team); /* XXX: over-synced for IN_NO and IN_MY */
    const gasnete_coll_threaddata_t * const td = GASNETE_COLL_MYTHREAD_NOALLOC;

    if ((flags & GASNET_COLL_LOCAL) && i_am_root) {
        /* root thread must be leader for its node */
        i_am_leader = (srcimage == td->my_image);
    }

    if (i_am_leader) {
        volatile unsigned int done = 0;
        pami_result_t rc;
        pami_xfer_t op;

        if (flags & GASNET_COLL_IN_ALLSYNC) gasnetc_fast_barrier();

        op = gasnete_op_template_scattvi; /* scatterv_int */
        op.cookie = (void *)&done;
        op.algorithm = team->pami.scattvi_alg;
        op.cmd.xfer_scatterv_int.root = gasnetc_endpoint(GASNETE_COLL_REL2ACT(team,gasnete_coll_image_node(team, srcimage)));
        op.cmd.xfer_scatterv_int.rcvbuf = team->pami.scratch_space;
        op.cmd.xfer_scatterv_int.rtypecount = nbytes * team->my_images;

        if (i_am_root) {
            op.cmd.xfer_scatterv_int.sndbuf = (/*not-const*/ void *)src;
            op.cmd.xfer_scatterv_int.stypecounts = team->pami.counts;
            op.cmd.xfer_scatterv_int.sdispls = team->pami.displs;
            if (team->pami.prev_nbytes != nbytes) {
                int i;
                for (i = 0; i < team->total_ranks; ++i) {
                    op.cmd.xfer_scatterv_int.stypecounts[i] = nbytes * team->all_images[i];
                    op.cmd.xfer_scatterv_int.sdispls[i] = nbytes * team->all_offset[i];
                }
                team->pami.prev_nbytes = nbytes;
            }
        }

        GASNETC_PAMI_LOCK(gasnetc_context);
        rc = PAMI_Collective(gasnetc_context, &op);
        GASNETC_PAMI_UNLOCK(gasnetc_context);
        GASNETC_PAMI_CHECK(rc, "initiating blocking scatterv_int");

        gasneti_polluntil(done);

        gasneti_assert(NULL == team->pami.tmp_addr);
        gasneti_sync_writes();
        team->pami.tmp_addr = team->pami.scratch_space; /* wakes pollers, below */
    } else {
        gasneti_waitwhile(NULL == team->pami.tmp_addr);
    }

    GASNETI_MEMCPY               (dst,
                                  gasnete_coll_scale_ptr(team->pami.tmp_addr,
                                                         td->my_local_image,
                                                         nbytes),
                                  nbytes);
    (void) gasnete_coll_pami_images_barrier(team);

    if (i_am_leader) {
        team->pami.tmp_addr = NULL;
    }

    if (flags & GASNET_COLL_OUT_ALLSYNC) {
        if (i_am_leader) gasnetc_fast_barrier();
        (void) gasnete_coll_pami_images_barrier(team);
    }
}