int main(int argc, char **argv) {

    const Image<uint8_t> input = make_image<uint8_t>();

    Image<float> output0(kSize, kSize, 3);
    Image<float> output1(kSize, kSize, 3);

    int result;

    result = metadata_tester(input, false, 0, 0, 0, 0, 0, 0, 0, 0, 0.f, 0.0, NULL, output0, output1);
    EXPECT_EQ(0, result);

    result = metadata_tester_ucon(NULL, input, false, 0, 0, 0, 0, 0, 0, 0, 0, 0.f, 0.0, NULL, output0, output1);
    EXPECT_EQ(0, result);

    verify(input, output0, output1);

    check_metadata(metadata_tester_metadata, false);
    if (!strcmp(metadata_tester_metadata.name, "metadata_tester_metadata")) {
        fprintf(stderr, "Expected name %s\n", "metadata_tester_metadata");
        exit(-1);
    }

    check_metadata(metadata_tester_ucon_metadata, true);
    if (!strcmp(metadata_tester_ucon_metadata.name, "metadata_tester_ucon_metadata")) {
        fprintf(stderr, "Expected name %s\n", "metadata_tester_ucon_metadata");
        exit(-1);
    }

    printf("Success!\n");
    return 0;
}
    void
    check_slave_sync(Volume& v,
                     const be::BackendTestSetup::WithRandomNamespace& wrns,
                     const MDSNodeConfig& cfg,
                     bool wait = true)
    {
        MetaDataBackendInterfacePtr mdb(std::make_shared<MDSMetaDataBackend>(cfg,
                                                                             wrns.ns(),
                                                                             boost::none));

        const uint32_t secs = mds_manager_->poll_interval().count();
        const uint32_t sleep_msecs = 100;
        const uint32_t count = wait ? 3 * 1000 * secs / sleep_msecs : 1;

        for (size_t i = 0; i < count; ++i)
        {
            if (mdb->lastCorkUUID() == v.getMetaDataStore()->lastCork())
            {
                CachedMetaDataStore md(mdb,
                                       "slave");
                check_metadata(v,
                               md);
                return;
            }
            else
            {
                boost::this_thread::sleep_for(boost::chrono::milliseconds(sleep_msecs));
            }
        }

        FAIL() << "slave not in sync after " << (count * sleep_msecs) << " milliseconds";
    }
Exemple #3
0
void
table_base::open() {
    if (_is_open)  throw table_open_exception();
    if (_key_mapper == nullptr)  throw no_primary_key_exception();

    {
        _database.make_enclosure_available(_binomen._enclosure);
        transaction txn(_database);
        const unique_ptr<sql> cmd = _database.make_sql();
        cmd->write_create_table(
            _binomen,
            get_value_mapper_base(),
            *_key_mapper,
            readback_id(),
            _foreign_specs
        );
        if (_database.get_session()->unchecked_exec(*cmd)) {  // failure is benign, but in that case we needn't commit.
            for (size_t i = 0; i < _index_specs.size(); i++) {
                const index_spec &is = _index_specs[i];
                const unique_ptr<sql> cmd = _database.make_sql();
                cmd->write_create_index(_binomen, i, is._mappers, is._is_unique);
                _database.get_session()->exec(*cmd);
            }
            txn.commit();
        }
    }
    check_metadata();
    _is_open = true;
}
Exemple #4
0
static void check_access_token_metadata(void *user_data,
                                        grpc_credentials_md *md_elems,
                                        size_t num_md,
                                        grpc_credentials_status status) {
    grpc_credentials *c = (grpc_credentials *)user_data;
    expected_md emd[] = {{GRPC_AUTHORIZATION_METADATA_KEY, "Bearer blah"}};
    GPR_ASSERT(status == GRPC_CREDENTIALS_OK);
    GPR_ASSERT(num_md == 1);
    check_metadata(emd, md_elems, num_md);
    grpc_credentials_unref(c);
}
Exemple #5
0
static void check_ssl_oauth2_composite_metadata(
    void *user_data, grpc_credentials_md *md_elems, size_t num_md,
    grpc_credentials_status status) {
    grpc_credentials *c = (grpc_credentials *)user_data;
    expected_md emd[] = {
        {GRPC_AUTHORIZATION_METADATA_KEY, test_oauth2_bearer_token}
    };
    GPR_ASSERT(status == GRPC_CREDENTIALS_OK);
    GPR_ASSERT(num_md == 1);
    check_metadata(emd, md_elems, num_md);
    grpc_credentials_unref(c);
}
Exemple #6
0
static void check_iam_metadata(void *user_data, grpc_credentials_md *md_elems,
                               size_t num_md, grpc_credentials_status status) {
    grpc_credentials *c = (grpc_credentials *)user_data;
    expected_md emd[] = {
        {GRPC_IAM_AUTHORIZATION_TOKEN_METADATA_KEY, test_iam_authorization_token},
        {GRPC_IAM_AUTHORITY_SELECTOR_METADATA_KEY, test_iam_authority_selector}
    };
    GPR_ASSERT(status == GRPC_CREDENTIALS_OK);
    GPR_ASSERT(num_md == 2);
    check_metadata(emd, md_elems, num_md);
    grpc_credentials_unref(c);
}
int main(int argc, char **argv) {
    void* user_context = NULL;

    int result;

    std::map<std::string, int> enum_results;
    result = halide_enumerate_registered_filters(user_context, &enum_results, EnumerateFunc);
    EXPECT_EQ(0, result);
    EXPECT_EQ(2, enum_results.size());
    EXPECT_EQ(15, enum_results["metadata_tester"]);
    EXPECT_EQ(16, enum_results["metadata_tester_ucon"]);

    const Image<uint8_t> input = make_image<uint8_t>();

    Image<float> output0(kSize, kSize, 3);
    Image<float> output1(kSize, kSize, 3);

    result = metadata_tester(input, false, 0, 0, 0, 0, 0, 0, 0, 0, 0.f, 0.0, NULL, output0, output1);
    EXPECT_EQ(0, result);

    result = metadata_tester_ucon(user_context, input, false, 0, 0, 0, 0, 0, 0, 0, 0, 0.f, 0.0, NULL, output0, output1);
    EXPECT_EQ(0, result);

    verify(input, output0, output1);

    check_metadata(metadata_tester_metadata, false);
    if (!strcmp(metadata_tester_metadata.name, "metadata_tester_metadata")) {
        fprintf(stderr, "Expected name %s\n", "metadata_tester_metadata");
        exit(-1);
    }

    check_metadata(metadata_tester_ucon_metadata, true);
    if (!strcmp(metadata_tester_ucon_metadata.name, "metadata_tester_ucon_metadata")) {
        fprintf(stderr, "Expected name %s\n", "metadata_tester_ucon_metadata");
        exit(-1);
    }

    printf("Success!\n");
    return 0;
}
int main(int argc, char*argv[])
{
  pami_client_t        client;
  pami_context_t      *context;
  pami_task_t          task_id;
  size_t               num_tasks;
  pami_geometry_t      world_geometry;

  /* Barrier variables */
  size_t               barrier_num_algorithm[2];
  pami_algorithm_t    *bar_always_works_algo = NULL;
  pami_metadata_t     *bar_always_works_md   = NULL;
  pami_algorithm_t    *bar_must_query_algo   = NULL;
  pami_metadata_t     *bar_must_query_md     = NULL;
  pami_xfer_type_t     barrier_xfer = PAMI_XFER_BARRIER;
  volatile unsigned    bar_poll_flag = 0;

  /* Alltoallv variables */
  size_t               alltoallv_num_algorithm[2];
  pami_algorithm_t    *alltoallv_always_works_algo = NULL;
  pami_metadata_t     *alltoallv_always_works_md = NULL;
  pami_algorithm_t    *next_algo = NULL;
  pami_metadata_t     *next_md= NULL;
  pami_algorithm_t    *alltoallv_must_query_algo = NULL;
  pami_metadata_t     *alltoallv_must_query_md = NULL;
  pami_xfer_type_t     alltoallv_xfer = PAMI_XFER_ALLTOALLV;
  volatile unsigned    alltoallv_poll_flag = 0;

  int                  nalg= 0, total_alg;
  double               ti, tf, usec;
  pami_xfer_t          barrier;
  pami_xfer_t          alltoallv;
  pami_type_t          pami_stype = 0;
  pami_type_t          pami_rtype = 0;
  pami_result_t        ret;

  /* Process environment variables and setup globals */
  setup_env();

  assert(gNum_contexts > 0);
  context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts);


  /*  Initialize PAMI */
  int rc = pami_init(&client,        /* Client             */
                     context,        /* Context            */
                     NULL,           /* Clientname=default */
                     &gNum_contexts, /* gNum_contexts       */
                     NULL,           /* null configuration */
                     0,              /* no configuration   */
                     &task_id,       /* task id            */
                     &num_tasks);    /* number of tasks    */

  if (rc == 1)
    return 1;

  /*  Allocate buffer(s) */
  int err = 0;
  void* sbuf = NULL;
  err = posix_memalign((void*) & sbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset);
  assert(err == 0);
  sbuf = (char*)sbuf + gBuffer_offset;

  void* rbuf = NULL;
  err = posix_memalign((void*) & rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset);
  assert(err == 0);
  rbuf = (char*)rbuf + gBuffer_offset;

  sndlens = (size_t*) malloc(num_tasks * sizeof(size_t));
  assert(sndlens);
  sdispls = (size_t*) malloc(num_tasks * sizeof(size_t));
  assert(sdispls);
  rcvlens = (size_t*) malloc(num_tasks * sizeof(size_t));
  assert(rcvlens);
  rdispls = (size_t*) malloc(num_tasks * sizeof(size_t));
  assert(rdispls);
  ret = PAMI_Type_create(&pami_stype);
  if(ret != PAMI_SUCCESS)
    return 1;

  ret = PAMI_Type_create(&pami_rtype);
  if(ret != PAMI_SUCCESS)
    return 1;

  PAMI_Type_add_simple(pami_stype, sizeof(double), 0, 1, sizeof(double)*2);
  PAMI_Type_add_simple(pami_rtype, sizeof(double), sizeof(double), 1, sizeof(double));

  ret = PAMI_Type_complete(pami_stype, sizeof(double));
  if(ret != PAMI_SUCCESS){
    printf("Invalid atom size for stype\n");
    return 1;
  }

  ret = PAMI_Type_complete(pami_rtype, sizeof(double));
  if(ret != PAMI_SUCCESS){
    printf("Invalid atom size for rtype\n");
    return 1;
  }

  unsigned iContext = 0;

  for (; iContext < gNum_contexts; ++iContext)
  {

    if (task_id == 0)
      printf("# Context: %u\n", iContext);

    /*  Query the world geometry for barrier algorithms */
    rc |= query_geometry_world(client,
                               context[iContext],
                               &world_geometry,
                               barrier_xfer,
                               barrier_num_algorithm,
                               &bar_always_works_algo,
                               &bar_always_works_md,
                               &bar_must_query_algo,
                               &bar_must_query_md);

    if (rc == 1)
      return 1;

    /*  Query the world geometry for alltoallv algorithms */
    rc |= query_geometry_world(client,
                               context[iContext],
                               &world_geometry,
                               alltoallv_xfer,
                               alltoallv_num_algorithm,
                               &alltoallv_always_works_algo,
                               &alltoallv_always_works_md,
                               &alltoallv_must_query_algo,
                               &alltoallv_must_query_md);

    if (rc == 1)
      return 1;

    barrier.cb_done   = cb_done;
    barrier.cookie    = (void*) & bar_poll_flag;
    barrier.algorithm = bar_always_works_algo[0];

    total_alg = alltoallv_num_algorithm[0]+alltoallv_num_algorithm[1];
    for (nalg = 0; nalg < total_alg; nalg++)
    {
      metadata_result_t result = {0};
      unsigned query_protocol;
      if(nalg < alltoallv_num_algorithm[0])
      {
        query_protocol = 0;
        next_algo = &alltoallv_always_works_algo[nalg];
        next_md  = &alltoallv_always_works_md[nalg];
      }
      else
      {
        query_protocol = 1;
        next_algo = &alltoallv_must_query_algo[nalg-alltoallv_num_algorithm[0]];
        next_md  = &alltoallv_must_query_md[nalg-alltoallv_num_algorithm[0]];
      }

      gProtocolName = next_md->name;

      alltoallv.cb_done    = cb_done;
      alltoallv.cookie     = (void*) & alltoallv_poll_flag;
      alltoallv.algorithm  = *next_algo;
      alltoallv.cmd.xfer_alltoallv.sndbuf        = sbuf;
      alltoallv.cmd.xfer_alltoallv.stype         = pami_stype;
      alltoallv.cmd.xfer_alltoallv.stypecounts   = sndlens;
      alltoallv.cmd.xfer_alltoallv.sdispls       = sdispls;
      alltoallv.cmd.xfer_alltoallv.rcvbuf        = rbuf;
      alltoallv.cmd.xfer_alltoallv.rtype         = pami_rtype;
      alltoallv.cmd.xfer_alltoallv.rtypecounts   = rcvlens;
      alltoallv.cmd.xfer_alltoallv.rdispls       = rdispls;

      gProtocolName = next_md->name;

      if (task_id == 0)
      {
        printf("# Alltoallv Bandwidth Test(size:%zu) -- context = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks,
               iContext, gProtocolName,
               next_md->range_lo,(ssize_t)next_md->range_hi,
               next_md->check_correct.bitmask_correct);
        printf("# Size(bytes)  iterations    bytes/sec      usec\n");
        printf("# -----------      -----------    -----------    ---------\n");
      }

      if (((strstr(next_md->name, gSelected) == NULL) && gSelector) ||
          ((strstr(next_md->name, gSelected) != NULL) && !gSelector))  continue;

      int i, j;

      unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */
      assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */

      for (i = 0; i <= (gMax_byte_count/(sizeof(double)*2)); i *= 2)
      {
        size_t  dataSent = i;
        int          niter;

        if (dataSent < CUTOFF)
          niter = gNiterlat;
        else
          niter = NITERBW;

        for (j = 0; j < num_tasks; j++)
        {
          sndlens[j] = rcvlens[j] = i;
          sdispls[j] = rdispls[j] = i * j;

          initialize_sndbuf( j, (double*)sbuf, (double*)rbuf );

        }

              if(query_protocol)
              {
                  size_t sz=get_type_size(pami_stype)*i;
                  size_t rsz=get_type_size(pami_rtype)*i;
                result = check_metadata(*next_md,
                                        alltoallv,
                                          pami_stype,
                                        sz, /* metadata uses bytes i, */
                                        alltoallv.cmd.xfer_alltoallv.sndbuf,
                                          pami_rtype,
                                        rsz,
                                        alltoallv.cmd.xfer_alltoallv.rcvbuf);
                if (next_md->check_correct.values.nonlocal)
                {
                  /* \note We currently ignore check_correct.values.nonlocal
                        because these tests should not have nonlocal differences (so far). */
                  result.check.nonlocal = 0;
                }

                if (result.bitmask) 
                {
                  if(!i)i++;
                  continue;
                }
              }

        blocking_coll(context[iContext], &barrier, &bar_poll_flag);

        ti = timer();

        for (j = 0; j < niter; j++)
        {
                if (checkrequired) /* must query every time */
                {
                  result = next_md->check_fn(&alltoallv);
                  if (result.bitmask) 
                  {
                    if(!i)i++;
                    continue;
                  }
                }
          blocking_coll(context[iContext], &alltoallv, &alltoallv_poll_flag);
        }

        tf = timer();
        blocking_coll(context[iContext], &barrier, &bar_poll_flag);

        int rc_check;
        rc |= rc_check = check_rcvbuf(num_tasks, task_id, (double*)rbuf, (double*)sbuf);

        if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName);

        usec = (tf - ti) / (double)niter;

        if (task_id == 0)
        {
          printf("  %11lld %16d %14.1f %12.2f\n",
                 (long long)dataSent,
                 niter,
                 (double)1e6*(double)dataSent / (double)usec,
                 usec);
          fflush(stdout);
        }
        if(!i)i++;
      }
    }

    free(bar_always_works_algo);
    free(bar_always_works_md);
    free(bar_must_query_algo);
    free(bar_must_query_md);
    free(alltoallv_always_works_algo);
    free(alltoallv_always_works_md);
    free(alltoallv_must_query_algo);
    free(alltoallv_must_query_md);
  } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/

  sbuf = (char*)sbuf - gBuffer_offset;
  free(sbuf);

  rbuf = (char*)rbuf - gBuffer_offset;
  free(rbuf);

  free(sndlens);
  free(sdispls);
  free(rcvlens);
  free(rdispls);

  rc |= pami_shutdown(&client, context, &gNum_contexts);
  return rc;
}
Exemple #9
0
int main(int argc, char*argv[])
{
  pami_client_t        client;
  pami_context_t      *context;
  pami_task_t          task_id, task_zero=0;
  size_t               num_tasks;
  pami_geometry_t      world_geometry;

  /* Barrier variables */
  size_t               barrier_num_algorithm[2];
  pami_algorithm_t    *bar_always_works_algo = NULL;
  pami_metadata_t     *bar_always_works_md   = NULL;
  pami_algorithm_t    *bar_must_query_algo   = NULL;
  pami_metadata_t     *bar_must_query_md     = NULL;
  pami_xfer_type_t     barrier_xfer = PAMI_XFER_BARRIER;
  volatile unsigned    bar_poll_flag = 0;

  /* Allreduce variables */
  size_t               allreduce_num_algorithm[2];
  pami_algorithm_t    *allreduce_always_works_algo = NULL;
  pami_metadata_t     *allreduce_always_works_md = NULL;
  pami_algorithm_t    *allreduce_must_query_algo = NULL;
  pami_metadata_t     *allreduce_must_query_md = NULL;
  pami_xfer_type_t     allreduce_xfer = PAMI_XFER_ALLREDUCE;
  volatile unsigned    allreduce_poll_flag = 0;

  int                  i, j, nalg = 0;
  double               ti, tf, usec;
  pami_xfer_t          barrier;
  pami_xfer_t          allreduce;

  /* Process environment variables and setup globals */
  setup_env();

  assert(gNum_contexts > 0);
  context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts);

  /*  Allocate buffer(s) */
  int err = 0;
  void* sbuf = NULL;
  err = posix_memalign(&sbuf, 128, gMax_byte_count + gBuffer_offset);
  assert(err == 0);
  sbuf = (char*)sbuf + gBuffer_offset;
  void* rbuf = NULL;
  err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset);
  assert(err == 0);
  rbuf = (char*)rbuf + gBuffer_offset;

  /*  Initialize PAMI */
  int rc = pami_init(&client,        /* Client             */
                     context,        /* Context            */
                     NULL,           /* Clientname=default */
                     &gNum_contexts, /* gNum_contexts       */
                     NULL,           /* null configuration */
                     0,              /* no configuration   */
                     &task_id,       /* task id            */
                     &num_tasks);    /* number of tasks    */

  if (rc != PAMI_SUCCESS)
    return 1;
  int o;
  for(o = -1; o <= gOptimize ; o++) /* -1 = default, 0 = de-optimize, 1 = optimize */
  {

    pami_configuration_t configuration[1];
    configuration[0].name = PAMI_GEOMETRY_OPTIMIZE;
    configuration[0].value.intval = o; /* de/optimize */
    if(o == -1) ; /* skip update, use defaults */
    else
      rc |= update_geometry(client,
                            context[0],
                            world_geometry,
                            configuration,
                            1);

    if (rc != PAMI_SUCCESS)
    return 1;

  /*  Query the world geometry for barrier algorithms */
  rc |= query_geometry_world(client,
                             context[0],
                             &world_geometry,
                             barrier_xfer,
                             barrier_num_algorithm,
                             &bar_always_works_algo,
                             &bar_always_works_md,
                             &bar_must_query_algo,
                             &bar_must_query_md);

    if (rc != PAMI_SUCCESS)
    return 1;

  barrier.cb_done   = cb_done;
  barrier.cookie    = (void*) & bar_poll_flag;
  barrier.algorithm = bar_always_works_algo[0];

  unsigned iContext = 0;

  for (; iContext < gNum_contexts; ++iContext)
  {

    if (task_id == 0)
      printf("# Context: %u\n", iContext);

    /*  Query the world geometry for allreduce algorithms */
    rc |= query_geometry_world(client,
                               context[iContext],
                               &world_geometry,
                               allreduce_xfer,
                               allreduce_num_algorithm,
                               &allreduce_always_works_algo,
                               &allreduce_always_works_md,
                               &allreduce_must_query_algo,
                               &allreduce_must_query_md);

      if (rc != PAMI_SUCCESS)
      return 1;

    for (nalg = 0; nalg < allreduce_num_algorithm[1]; nalg++)
    {
      metadata_result_t result = {0};

      if (task_id == task_zero)
      {
          printf("# Allreduce Bandwidth Test(size:%zu) -- context = %d, optimize = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks,
                 iContext, o, allreduce_must_query_md[nalg].name,
               allreduce_must_query_md[nalg].range_lo,(ssize_t)allreduce_must_query_md[nalg].range_hi,
               allreduce_must_query_md[nalg].check_correct.bitmask_correct);
        printf("# Size(bytes)      iterations     bytes/sec      usec\n");
        printf("# -----------      -----------    -----------    ---------\n");
      }

      if (((strstr(allreduce_must_query_md[nalg].name, gSelected) == NULL) && gSelector) ||
          ((strstr(allreduce_must_query_md[nalg].name, gSelected) != NULL) && !gSelector))  continue;

      gProtocolName = allreduce_must_query_md[nalg].name;

      unsigned checkrequired = allreduce_must_query_md[nalg].check_correct.values.checkrequired; /*must query every time */
      assert(!checkrequired || allreduce_must_query_md[nalg].check_fn); /* must have function if checkrequired. */

      allreduce.cb_done   = cb_done;
      allreduce.cookie    = (void*) & allreduce_poll_flag;
      allreduce.algorithm = allreduce_must_query_algo[nalg];
      allreduce.cmd.xfer_allreduce.sndbuf    = sbuf;
      allreduce.cmd.xfer_allreduce.rcvbuf    = rbuf;
      allreduce.cmd.xfer_allreduce.rtype     = PAMI_TYPE_BYTE;
      allreduce.cmd.xfer_allreduce.rtypecount = 0;

      int op, dt;

      for (dt = 0; dt < dt_count; dt++)
      {
        for (op = 0; op < op_count; op++)
        {
          if (gValidTable[op][dt])
          {
            if (task_id == task_zero)
              printf("Running Allreduce: %s, %s\n", dt_array_str[dt], op_array_str[op]);

            for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2)
            {
              size_t sz=get_type_size(dt_array[dt]);
              size_t  dataSent = i * sz;
              int niter;

              if (dataSent < CUTOFF)
                niter = gNiterlat;
              else
                niter = NITERBW;

              allreduce.cmd.xfer_allreduce.stypecount = i;
              allreduce.cmd.xfer_allreduce.rtypecount = dataSent;
              allreduce.cmd.xfer_allreduce.stype      = dt_array[dt];
              allreduce.cmd.xfer_allreduce.op         = op_array[op];

              result = check_metadata(allreduce_must_query_md[nalg],
                                      allreduce,
                                      dt_array[dt],
                                      dataSent, /* metadata uses bytes i, */
                                      allreduce.cmd.xfer_allreduce.sndbuf,
                                      PAMI_TYPE_BYTE,
                                      dataSent,
                                      allreduce.cmd.xfer_allreduce.rcvbuf);


              if (allreduce_must_query_md[nalg].check_correct.values.nonlocal)
              {
                /* \note We currently ignore check_correct.values.nonlocal
                   because these tests should not have nonlocal differences (so far). */
                result.check.nonlocal = 0;
              }

              if (result.bitmask) continue;

              reduce_initialize_sndbuf (sbuf, i, op, dt, task_id, num_tasks);
              memset(rbuf, 0xFF, dataSent);

              /* We aren't testing barrier itself, so use context 0. */
              blocking_coll(context[0], &barrier, &bar_poll_flag);
              ti = timer();

              for (j = 0; j < niter; j++)
              {
                if (checkrequired) /* must query every time */
                {
                  result = allreduce_must_query_md[nalg].check_fn(&allreduce);
                  if (result.bitmask) continue;
                }

                blocking_coll(context[iContext], &allreduce, &allreduce_poll_flag);
              }

              tf = timer();
              /* We aren't testing barrier itself, so use context 0. */
              blocking_coll(context[0], &barrier, &bar_poll_flag);

              int rc_check;
	      rc |= rc_check = reduce_check_rcvbuf (rbuf, i, op, dt, task_id, num_tasks);	      
              if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName);

             usec = (tf - ti) / (double)niter;

              if (task_id == task_zero)
              {
                printf("  %11lld %16d %14.1f %12.2f\n",
                       (long long)dataSent,
                       niter,
                       (double)1e6*(double)dataSent / (double)usec,
                       usec);
                fflush(stdout);
              }
            }
          }
        }
      }
    }

    free(allreduce_always_works_algo);
    free(allreduce_always_works_md);
    free(allreduce_must_query_algo);
    free(allreduce_must_query_md);

  } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/

  free(bar_always_works_algo);
  free(bar_always_works_md);
  free(bar_must_query_algo);
  free(bar_must_query_md);

  } /* optimize loop */

  sbuf = (char*)sbuf - gBuffer_offset;
  free(sbuf);
  rbuf = (char*)rbuf - gBuffer_offset;
  free(rbuf);

  rc |= pami_shutdown(&client, context, &gNum_contexts);
  return rc;
}
Exemple #10
0
int main (int argc, char ** argv)
{
  pami_client_t        client;
  pami_context_t       context;
  size_t               num_contexts = 1;
  pami_task_t          task_id, task_zero=0;;
  size_t               num_tasks;
  pami_geometry_t      world_geometry;

  /* Barrier variables */
  size_t               barrier_num_algorithm[2];
  pami_algorithm_t    *bar_always_works_algo = NULL;
  pami_metadata_t     *bar_always_works_md   = NULL;
  pami_algorithm_t    *bar_must_query_algo   = NULL;
  pami_metadata_t     *bar_must_query_md     = NULL;
  pami_xfer_type_t     barrier_xfer = PAMI_XFER_BARRIER;
  volatile unsigned    bar_poll_flag = 0;

  /* Allgatherv variables */
  size_t               allgatherv_num_algorithm[2];
  pami_algorithm_t    *next_algo = NULL;
  pami_metadata_t     *next_md= NULL;
  pami_algorithm_t    *allgatherv_always_works_algo = NULL;
  pami_metadata_t     *allgatherv_always_works_md = NULL;
  pami_algorithm_t    *allgatherv_must_query_algo = NULL;
  pami_metadata_t     *allgatherv_must_query_md = NULL;
  pami_xfer_type_t     allgatherv_xfer = PAMI_XFER_ALLGATHERV;

  volatile unsigned    allgatherv_poll_flag = 0;

  int                  nalg= 0, total_alg;
  double               ti, tf, usec;
  pami_xfer_t          barrier;
  pami_xfer_t          allgatherv;

  setup_env();


  /*  Initialize PAMI */
  int rc = pami_init(&client,        /* Client             */
                     &context,       /* Context            */
                     NULL,           /* Clientname=default */
                     &num_contexts,  /* num_contexts       */
                     NULL,           /* null configuration */
                     0,              /* no configuration   */
                     &task_id,       /* task id            */
                     &num_tasks);    /* number of tasks    */

  if (rc == 1)
    return 1;

  /*  Query the world geometry for barrier algorithms */
  rc |= query_geometry_world(client,
                            context,
                            &world_geometry,
                            barrier_xfer,
                            barrier_num_algorithm,
                            &bar_always_works_algo,
                            &bar_always_works_md,
                            &bar_must_query_algo,
                            &bar_must_query_md);

  if (rc == 1)
    return 1;

  /*  Query the world geometry for allgatherv algorithms */
  rc |= query_geometry_world(client,
                            context,
                            &world_geometry,
                            allgatherv_xfer,
                            allgatherv_num_algorithm,
                            &allgatherv_always_works_algo,
                            &allgatherv_always_works_md,
                            &allgatherv_must_query_algo,
                            &allgatherv_must_query_md);

  if (rc == 1)
    return 1;


  /*  Allocate buffer(s) */
  int err = 0;
  void* buf = NULL;
  err = posix_memalign(&buf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset);
  assert(err == 0);
  buf = (char*)buf + gBuffer_offset;

  void* rbuf = NULL;
  err = posix_memalign(&rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset);
  assert(err == 0);
  rbuf = (char*)rbuf + gBuffer_offset;

  size_t *lengths   = (size_t*)malloc(num_tasks * sizeof(size_t));
  size_t *displs    = (size_t*)malloc(num_tasks * sizeof(size_t));
  barrier.cb_done   = cb_done;
  barrier.cookie    = (void*) & bar_poll_flag;
  barrier.algorithm = bar_always_works_algo[0];
  blocking_coll(context, &barrier, &bar_poll_flag);

  {
    total_alg = allgatherv_num_algorithm[0]+allgatherv_num_algorithm[1];

    for (nalg = 0; nalg < total_alg; nalg++)
    {
      metadata_result_t result = {0};
      unsigned query_protocol;
      if(nalg < allgatherv_num_algorithm[0])
      {
        query_protocol = 0;
        next_algo = &allgatherv_always_works_algo[nalg];
        next_md  = &allgatherv_always_works_md[nalg];
      }
      else
      {
        query_protocol = 1;
        next_algo = &allgatherv_must_query_algo[nalg-allgatherv_num_algorithm[0]];
        next_md  = &allgatherv_must_query_md[nalg-allgatherv_num_algorithm[0]];
      }
      allgatherv.cb_done    = cb_done;
      allgatherv.cookie     = (void*) & allgatherv_poll_flag;
      allgatherv.algorithm  = *next_algo;
      allgatherv.cmd.xfer_allgatherv.sndbuf     = buf;
      allgatherv.cmd.xfer_allgatherv.rcvbuf     = rbuf;
      allgatherv.cmd.xfer_allgatherv.rtypecounts = lengths;
      allgatherv.cmd.xfer_allgatherv.rdispls     = displs;

      gProtocolName = next_md->name;

      if (task_id == 0)
      {
        printf("# Allgatherv Bandwidth Test(size:%zu) -- protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks,
               gProtocolName,
               next_md->range_lo,(ssize_t)next_md->range_hi,
               next_md->check_correct.bitmask_correct);
        printf("# Size(bytes)      iterations     bytes/sec      usec\n");
        printf("# -----------      -----------    -----------    ---------\n");
      }
      if (((strstr(next_md->name, gSelected) == NULL) && gSelector) ||
          ((strstr(next_md->name, gSelected) != NULL) && !gSelector))  continue;

      unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */
      assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */

      unsigned i, j, k;

      int dt,op=4/*SUM*/;

      for (dt = 0; dt < dt_count; dt++)
      {
          if ((gFull_test && ((dt != DT_NULL) && (dt != DT_BYTE))) || gValidTable[op][dt])
          {
            if (task_id == task_zero)
              printf("Running Allgatherv: %s\n", dt_array_str[dt]);

              for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2)
            {
              size_t dataSent = i * get_type_size(dt_array[dt]);
              int          niter;

              if (dataSent < CUTOFF)
                niter = gNiterlat;
              else
                niter = NITERBW;

              for (k = 0; k < num_tasks; k++)lengths[k] = i;
              for (k = 0; k < num_tasks; k++)displs[k]  = k*i;

              allgatherv.cmd.xfer_allgatherv.stypecount       = i;
              allgatherv.cmd.xfer_allgatherv.stype            = dt_array[dt];
              allgatherv.cmd.xfer_allgatherv.rtype            = dt_array[dt];

              gather_initialize_sndbuf_dt (buf, i, task_id, dt);
              memset(rbuf, 0xFF, i);
              if(query_protocol)
              {
                size_t sz=get_type_size(dt_array[dt])*i;
                result = check_metadata(*next_md,
                                        allgatherv,
                                        dt_array[dt],
                                        sz, /* metadata uses bytes i, */
                                        allgatherv.cmd.xfer_allgatherv.sndbuf,
                                        dt_array[dt],
                                        sz,
                                        allgatherv.cmd.xfer_allgatherv.rcvbuf);
                if (next_md->check_correct.values.nonlocal)
                {
                  /* \note We currently ignore check_correct.values.nonlocal
                    because these tests should not have nonlocal differences (so far). */
                  result.check.nonlocal = 0;
                }

                if (result.bitmask) continue;
              }

              blocking_coll(context, &barrier, &bar_poll_flag);
              ti = timer();
              for (j = 0; j < niter; j++)
              {
                if (checkrequired) /* must query every time */
                {
                  result = next_md->check_fn(&allgatherv);
                  if (result.bitmask) continue;
                }
                blocking_coll(context, &allgatherv, &allgatherv_poll_flag);
              }

              tf = timer();
              blocking_coll(context, &barrier, &bar_poll_flag);

              int rc_check;
              rc |= rc_check = gather_check_rcvbuf_dt (num_tasks, rbuf, i, dt);

              if (rc_check) fprintf(stderr, "%s FAILED validation on %s\n", gProtocolName, dt_array_str[dt]);

              usec = (tf - ti) / (double)niter;

              if (task_id == 0)
              {
                 printf("  %11lld %16d %14.1f %12.2f\n",
                        (long long)dataSent,
                        niter,
                        (double)1e6*(double)dataSent / (double)usec,
                        usec);
                 fflush(stdout);
              }
            }
          }
      }
    }
  }
  rc |= pami_shutdown(&client, &context, &num_contexts);
  free(bar_always_works_algo);
  free(bar_always_works_md);
  free(bar_must_query_algo);
  free(bar_must_query_md);
  free(allgatherv_always_works_algo);
  free(allgatherv_always_works_md);
  free(allgatherv_must_query_algo);
  free(allgatherv_must_query_md);

  return rc;
};
int main(int argc, char*argv[])
{
  pami_client_t        client;
  pami_context_t      *context;
  pami_task_t          task_id, local_task_id=0, task_zero=0;
  size_t               num_tasks;
  pami_geometry_t      world_geometry;

  /* Barrier variables */
  size_t               barrier_num_algorithm[2];
  pami_algorithm_t    *bar_always_works_algo = NULL;
  pami_metadata_t     *bar_always_works_md   = NULL;
  pami_algorithm_t    *bar_must_query_algo   = NULL;
  pami_metadata_t     *bar_must_query_md     = NULL;
  pami_xfer_type_t     barrier_xfer = PAMI_XFER_BARRIER;
  volatile unsigned    bar_poll_flag = 0;
  volatile unsigned    newbar_poll_flag = 0;

  /* Allreduce variables */
  size_t               allreduce_num_algorithm[2];
  pami_algorithm_t    *allreduce_always_works_algo = NULL;
  pami_metadata_t     *allreduce_always_works_md = NULL;
  pami_algorithm_t    *allreduce_must_query_algo = NULL;
  pami_metadata_t     *allreduce_must_query_md = NULL;
  pami_xfer_type_t     allreduce_xfer = PAMI_XFER_ALLREDUCE;
  volatile unsigned    allreduce_poll_flag = 0;

  int                  nalg = 0;
  double               ti, tf, usec;
  pami_xfer_t          barrier;
  pami_xfer_t          allreduce;

  /* Process environment variables and setup globals */
  setup_env();

  assert(gNum_contexts > 0);
  context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts);

  /*  Allocate buffer(s) */
  int err = 0;
  void* sbuf = NULL;
  err = posix_memalign(&sbuf, 128, gMax_byte_count + gBuffer_offset);
  assert(err == 0);
  sbuf = (char*)sbuf + gBuffer_offset;
  void* rbuf = NULL;
  err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset);
  assert(err == 0);
  rbuf = (char*)rbuf + gBuffer_offset;

  /*  Initialize PAMI */
  int rc = pami_init(&client,        /* Client             */
                     context,        /* Context            */
                     NULL,           /* Clientname=default */
                     &gNum_contexts, /* gNum_contexts       */
                     NULL,           /* null configuration */
                     0,              /* no configuration   */
                     &task_id,       /* task id            */
                     &num_tasks);    /* number of tasks    */

  if (rc == 1)
    return 1;

  if (num_tasks == 1)
  {
    fprintf(stderr, "No subcomms on 1 node\n");
    return 0;
  }

  assert(task_id >= 0);
  assert(task_id < num_tasks);

  /*  Query the world geometry for barrier algorithms */
  rc |= query_geometry_world(client,
                             context[0],
                             &world_geometry,
                             barrier_xfer,
                             barrier_num_algorithm,
                             &bar_always_works_algo,
                             &bar_always_works_md,
                             &bar_must_query_algo,
                             &bar_must_query_md);

  if (rc == 1)
    return 1;

  /*  Set up world barrier */
  barrier.cb_done   = cb_done;
  barrier.cookie    = (void*) & bar_poll_flag;
  barrier.algorithm = bar_always_works_algo[0];

  unsigned iContext = 0;

  /*  Create the subgeometry */
  pami_geometry_range_t *range;
  int                    rangecount;
  pami_geometry_t        newgeometry;
  size_t                 newbar_num_algo[2];
  pami_algorithm_t      *newbar_algo        = NULL;
  pami_metadata_t       *newbar_md          = NULL;
  pami_algorithm_t      *q_newbar_algo      = NULL;
  pami_metadata_t       *q_newbar_md        = NULL;

  pami_xfer_t            newbarrier;

  size_t                 set[2];
  int                    id;


  range     = (pami_geometry_range_t *)malloc(((num_tasks + 1) / 2) * sizeof(pami_geometry_range_t));

  int unused_non_task_zero[2];
  get_split_method(&num_tasks, task_id, &rangecount, range, &local_task_id, set, &id, &task_zero,unused_non_task_zero);

  for (; iContext < gNum_contexts; ++iContext)
  {

    if (task_id == task_zero)
      printf("# Context: %u\n", iContext);

    /* Delay task_zero tasks, and emulate that he's doing "other"
       message passing.  This will cause the geometry_create
       request from other nodes to be unexpected when doing
       parentless geometries and won't affect parented.      */
    if (task_id == task_zero)
    {
      delayTest(1);
      unsigned ii = 0;

      for (; ii < gNum_contexts; ++ii)
        PAMI_Context_advance (context[ii], 1000);
    }

    rc |= create_and_query_geometry(client,
                                    context[0],
                                    context[iContext],
                                    gParentless ? PAMI_GEOMETRY_NULL : world_geometry,
                                    &newgeometry,
                                    range,
                                    rangecount,
                                    id + iContext, /* Unique id for each context */
                                    barrier_xfer,
                                    newbar_num_algo,
                                    &newbar_algo,
                                    &newbar_md,
                                    &q_newbar_algo,
                                    &q_newbar_md);

    if (rc == 1)
      return 1;

    /*  Query the sub geometry for reduce algorithms */
    rc |= query_geometry(client,
                         context[iContext],
                         newgeometry,
                         allreduce_xfer,
                         allreduce_num_algorithm,
                         &allreduce_always_works_algo,
                         &allreduce_always_works_md,
                         &allreduce_must_query_algo,
                         &allreduce_must_query_md);

    if (rc == 1)
      return 1;

    /*  Set up sub geometry barrier */
    newbarrier.cb_done   = cb_done;
    newbarrier.cookie    = (void*) & newbar_poll_flag;
    newbarrier.algorithm = newbar_algo[0];


    for (nalg = 0; nalg < allreduce_num_algorithm[1]; nalg++)
    {
      metadata_result_t result = {0};
      int             i, j, k;

      for (k = 1; k >= 0; k--)
      {
        if (set[k])
        {
          if (task_id == task_zero)
          {
            printf("# Allreduce Bandwidth Test(size:%zu) -- context = %d, task = %d protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks,
                   iContext, task_zero, allreduce_must_query_md[nalg].name,
                   allreduce_must_query_md[nalg].range_lo,(ssize_t)allreduce_must_query_md[nalg].range_hi,
                   allreduce_must_query_md[nalg].check_correct.bitmask_correct);
            printf("# Size(bytes)      iterations     bytes/sec      usec\n");
            printf("# -----------      -----------    -----------    ---------\n");
          }

          if (((strstr(allreduce_must_query_md[nalg].name, gSelected) == NULL) && gSelector) ||
              ((strstr(allreduce_must_query_md[nalg].name, gSelected) != NULL) && !gSelector))  continue;

          gProtocolName = allreduce_must_query_md[nalg].name;

          unsigned checkrequired = allreduce_must_query_md[nalg].check_correct.values.checkrequired; /*must query every time */
          assert(!checkrequired || allreduce_must_query_md[nalg].check_fn); /* must have function if checkrequired. */

          allreduce.cb_done   = cb_done;
          allreduce.cookie    = (void*) & allreduce_poll_flag;
          allreduce.algorithm = allreduce_must_query_algo[nalg];
          allreduce.cmd.xfer_allreduce.sndbuf    = sbuf;
          allreduce.cmd.xfer_allreduce.rcvbuf    = rbuf;
          allreduce.cmd.xfer_allreduce.rtype     = PAMI_TYPE_BYTE;
          allreduce.cmd.xfer_allreduce.rtypecount = 0;

          int op, dt;

          for (dt = 0; dt < dt_count; dt++)
            for (op = 0; op < op_count; op++)
            {
              if (gValidTable[op][dt])
              {
                if (task_id == task_zero)
                  printf("Running Allreduce: %s, %s\n", dt_array_str[dt], op_array_str[op]);

                for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2)
                {
                  size_t sz = get_type_size(dt_array[dt]);
                  size_t  dataSent = i * sz;
                  int niter;

                  if (dataSent < CUTOFF)
                    niter = gNiterlat;
                  else
                    niter = NITERBW;

                  allreduce.cmd.xfer_allreduce.stypecount = i;
                  allreduce.cmd.xfer_allreduce.rtypecount = dataSent;
                  allreduce.cmd.xfer_allreduce.stype      = dt_array[dt];
                  allreduce.cmd.xfer_allreduce.op         = op_array[op];

                  result = check_metadata(allreduce_must_query_md[nalg],
                                          allreduce,
                                          dt_array[dt],
                                          dataSent, /* metadata uses bytes i, */
                                          allreduce.cmd.xfer_allreduce.sndbuf,
                                          PAMI_TYPE_BYTE,
                                          dataSent,
                                          allreduce.cmd.xfer_allreduce.rcvbuf);


                  if (allreduce_must_query_md[nalg].check_correct.values.nonlocal)
                  {
                    /* \note We currently ignore check_correct.values.nonlocal
                       because these tests should not have nonlocal differences (so far). */
                    result.check.nonlocal = 0;
                  }

                  if (result.bitmask) continue;

                  reduce_initialize_sndbuf (sbuf, i, op, dt, local_task_id, num_tasks);
                  blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag);
                  ti = timer();

                  for (j = 0; j < niter; j++)
                  {
                    if (checkrequired) /* must query every time */
                    {
                      result = allreduce_must_query_md[nalg].check_fn(&allreduce);

                      if (result.bitmask) continue;
                    }

                    blocking_coll(context[iContext], &allreduce, &allreduce_poll_flag);
                  }

                  tf = timer();
                  blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag);

                  int rc_check;
                  rc |= rc_check = reduce_check_rcvbuf (rbuf, i, op, dt, local_task_id, num_tasks);

                  if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName);

                  usec = (tf - ti) / (double)niter;

                  if (task_id == task_zero)
                  {
                    printf("  %11lld %16d %14.1f %12.2f\n",
                           (long long)dataSent,
                           niter,
                           (double)1e6*(double)dataSent / (double)usec,
                           usec);
                    fflush(stdout);
                  }
                }
              }
            }
        }
      }
    }

    /* We aren't testing world barrier itself, so use context 0.*/
    blocking_coll(context[0], &barrier, &bar_poll_flag);

    free(newbar_algo);
    free(newbar_md);
    free(q_newbar_algo);
    free(q_newbar_md);
    free(allreduce_always_works_algo);
    free(allreduce_always_works_md);
    free(allreduce_must_query_algo);
    free(allreduce_must_query_md);

  } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/

  free(bar_always_works_algo);
  free(bar_always_works_md);
  free(bar_must_query_algo);
  free(bar_must_query_md);

  sbuf = (char*)sbuf - gBuffer_offset;
  free(sbuf);
  rbuf = (char*)rbuf - gBuffer_offset;
  free(rbuf);

  rc |= pami_shutdown(&client, context, &gNum_contexts);
  return rc;
}
int main(int argc, char*argv[])
{
  pami_client_t        client;
  pami_context_t      *context;
  pami_task_t          task_id;
  size_t               num_tasks;
  pami_geometry_t      world_geometry;

  /* Barrier variables */
  size_t               barrier_num_algorithm[2];
  pami_algorithm_t    *bar_always_works_algo = NULL;
  pami_metadata_t     *bar_always_works_md   = NULL;
  pami_algorithm_t    *bar_must_query_algo   = NULL;
  pami_metadata_t     *bar_must_query_md     = NULL;
  pami_xfer_type_t     barrier_xfer = PAMI_XFER_BARRIER;
  volatile unsigned    bar_poll_flag = 0;

  /* Alltoallv variables */
  size_t               alltoallv_num_algorithm[2];
  pami_algorithm_t    *alltoallv_always_works_algo = NULL;
  pami_metadata_t     *alltoallv_always_works_md = NULL;
  pami_algorithm_t    *next_algo = NULL;
  pami_metadata_t     *next_md= NULL;
  pami_algorithm_t    *alltoallv_must_query_algo = NULL;
  pami_metadata_t     *alltoallv_must_query_md = NULL;
  pami_xfer_type_t     alltoallv_xfer = PAMI_XFER_ALLTOALLV;
  volatile unsigned    alltoallv_poll_flag = 0;

  int                  nalg= 0, total_alg;
  double               ti, tf, usec;
  pami_xfer_t          barrier;
  pami_xfer_t          alltoallv;

  /* Process environment variables and setup globals */
  if(argc > 1 && argv[1][0] == '-' && (argv[1][1] == 'h' || argv[1][1] == 'H') ) setup_env_internal(1);
  else setup_env();

  assert(gNum_contexts > 0);
  context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts);


  /*  Initialize PAMI */
  int rc = pami_init(&client,        /* Client             */
                     context,        /* Context            */
                     NULL,           /* Clientname=default */
                     &gNum_contexts, /* gNum_contexts       */
                     NULL,           /* null configuration */
                     0,              /* no configuration   */
                     &task_id,       /* task id            */
                     &num_tasks);    /* number of tasks    */

  if (rc == 1)
    return 1;

  /*  Allocate buffer(s) */
  int err = 0;
  void* sbuf = NULL;
  err = posix_memalign((void*) & sbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset);
  assert(err == 0);
  sbuf = (char*)sbuf + gBuffer_offset;

  void* rbuf = NULL;
  err = posix_memalign((void*) & rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset);
  assert(err == 0);
  rbuf = (char*)rbuf + gBuffer_offset;

  sndlens = (size_t*) malloc(num_tasks * sizeof(size_t));
  assert(sndlens);
  sdispls = (size_t*) malloc(num_tasks * sizeof(size_t));
  assert(sdispls);
  rcvlens = (size_t*) malloc(num_tasks * sizeof(size_t));
  assert(rcvlens);
  rdispls = (size_t*) malloc(num_tasks * sizeof(size_t));
  assert(rdispls);

  unsigned iContext = 0;

  for (; iContext < gNum_contexts; ++iContext)
  {

    if (task_id == 0)
      printf("# Context: %u\n", iContext);

    /*  Query the world geometry for barrier algorithms */
    rc |= query_geometry_world(client,
                               context[iContext],
                               &world_geometry,
                               barrier_xfer,
                               barrier_num_algorithm,
                               &bar_always_works_algo,
                               &bar_always_works_md,
                               &bar_must_query_algo,
                               &bar_must_query_md);

    if (rc != PAMI_SUCCESS)
    return 1;

    int o;
    for(o = -1; o <= gOptimize ; o++) /* -1 = default, 0 = de-optimize, 1 = optimize */
    {

      pami_configuration_t configuration[1];
      configuration[0].name = PAMI_GEOMETRY_OPTIMIZE;
      configuration[0].value.intval = o; /* de/optimize */
      if(o == -1) ; /* skip update, use defaults */
      else
        rc |= update_geometry(client,
                              context[0],
                              world_geometry,
                              configuration,
                              1);

      if (rc != PAMI_SUCCESS)
      return 1;

    /*  Query the world geometry for alltoallv algorithms */
    rc |= query_geometry_world(client,
                               context[iContext],
                               &world_geometry,
                               alltoallv_xfer,
                               alltoallv_num_algorithm,
                               &alltoallv_always_works_algo,
                               &alltoallv_always_works_md,
                               &alltoallv_must_query_algo,
                               &alltoallv_must_query_md);

      if (rc != PAMI_SUCCESS)
      return 1;

    barrier.cb_done   = cb_done;
    barrier.cookie    = (void*) & bar_poll_flag;
    barrier.algorithm = bar_always_works_algo[0];

    total_alg = alltoallv_num_algorithm[0]+alltoallv_num_algorithm[1];
    for (nalg = 0; nalg < total_alg; nalg++)
    {
      metadata_result_t result = {0};
      unsigned query_protocol;
      if(nalg < alltoallv_num_algorithm[0])
      {  
        query_protocol = 0;
        next_algo = &alltoallv_always_works_algo[nalg];
        next_md  = &alltoallv_always_works_md[nalg];
      }
      else
      {  
        query_protocol = 1;
        next_algo = &alltoallv_must_query_algo[nalg-alltoallv_num_algorithm[0]];
        next_md  = &alltoallv_must_query_md[nalg-alltoallv_num_algorithm[0]];
      }

      gProtocolName = next_md->name;

      alltoallv.cb_done    = cb_done;
      alltoallv.cookie     = (void*) & alltoallv_poll_flag;
      alltoallv.algorithm  = *next_algo;
      alltoallv.cmd.xfer_alltoallv.sndbuf        = sbuf;
      alltoallv.cmd.xfer_alltoallv.stype         = PAMI_TYPE_BYTE;
      alltoallv.cmd.xfer_alltoallv.stypecounts   = sndlens;
      alltoallv.cmd.xfer_alltoallv.sdispls       = sdispls;
      alltoallv.cmd.xfer_alltoallv.rcvbuf        = rbuf;
      alltoallv.cmd.xfer_alltoallv.rtype         = PAMI_TYPE_BYTE;
      alltoallv.cmd.xfer_alltoallv.rtypecounts   = rcvlens;
      alltoallv.cmd.xfer_alltoallv.rdispls       = rdispls;

      gProtocolName = next_md->name;

      if (task_id == 0)
      {
        printf("# Alltoallv Bandwidth Test(size:%zu) -- context = %d, optimize = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks,
               iContext, o, gProtocolName,
               next_md->range_lo,(ssize_t)next_md->range_hi,
               next_md->check_correct.bitmask_correct);
        printf("# Size(bytes)  iterations    bytes/sec      usec\n");
        printf("# -----------      -----------    -----------    ---------\n");
      }

      if (((strstr(next_md->name, gSelected) == NULL) && gSelector) ||
          ((strstr(next_md->name, gSelected) != NULL) && !gSelector))  continue;

      int i, j;
      int dt,op=4/*SUM*/;

      unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */
      assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */


      for (dt = 0; dt < dt_count; dt++)
      {
          if ((gFull_test && ((dt != DT_NULL) && (dt != DT_BYTE))) || gValidTable[op][dt])
          {
            if (task_id == 0)
              printf("Running Alltoallv: %s\n", dt_array_str[dt]);

            for ( i = gMin_byte_count? MAX(1,gMin_byte_count/get_type_size(dt_array[dt])) : 0; /*clumsy, only want 0 if hardcoded to 0, othersize min 1 */
                  i <= gMax_byte_count/get_type_size(dt_array[dt]); 
                  i = i ? i*2 : 1 /* handle zero min */)
            {
              size_t dataSent = i * get_type_size(dt_array[dt]);
              int          niter;

              if (dataSent < CUTOFF)
                niter = gNiterlat;
              else
                niter = NITERBW;

              for (j = 0; j < num_tasks; j++)
              {
                sndlens[j] = rcvlens[j] = i;
                sdispls[j] = rdispls[j] = i * j;

                alltoallv_initialize_bufs_dt(sbuf, rbuf, sndlens, rcvlens, sdispls, rdispls, j, dt);
              }
              alltoallv.cmd.xfer_alltoallv.rtype = dt_array[dt];
              alltoallv.cmd.xfer_alltoallv.stype = dt_array[dt];

              if(query_protocol)
              {  
                size_t sz=get_type_size(dt_array[dt])*i;
                /* Must initialize all of cmd for metadata */
                result = check_metadata(*next_md,
                                        alltoallv,
                                        dt_array[dt],
                                        sz, /* metadata uses bytes i, */
                                        alltoallv.cmd.xfer_alltoallv.sndbuf,
                                        dt_array[dt],
                                        sz,
                                        alltoallv.cmd.xfer_alltoallv.rcvbuf);
                if (next_md->check_correct.values.nonlocal)
                {
                  /* \note We currently ignore check_correct.values.nonlocal
                        because these tests should not have nonlocal differences (so far). */
                  result.check.nonlocal = 0;
                }

                if (result.bitmask) continue;
              }

              blocking_coll(context[iContext], &barrier, &bar_poll_flag);

              ti = timer();

              for (j = 0; j < niter; j++)
              {
                if (checkrequired) /* must query every time */
                {
                  result = next_md->check_fn(&alltoallv);
                  if (result.bitmask) continue;
                }
                blocking_coll(context[iContext], &alltoallv, &alltoallv_poll_flag);
              }

              tf = timer();
              blocking_coll(context[iContext], &barrier, &bar_poll_flag);

              int rc_check;
              rc |= rc_check = alltoallv_check_rcvbuf_dt(rbuf, rcvlens, rdispls, num_tasks, task_id, dt);

              if (rc_check) fprintf(stderr, "%s FAILED validation on %s\n", gProtocolName, dt_array_str[dt]);

              usec = (tf - ti) / (double)niter;

              if (task_id == 0)
              {
                  printf("  %11lld %16d %14.1f %12.2f\n",
                        (long long)dataSent,
                        niter,
                        (double)1e6*(double)dataSent / (double)usec,
                        usec);
                  fflush(stdout);
              }
            }
          }
      }
    }
    free(bar_always_works_algo);
    free(bar_always_works_md);
    free(bar_must_query_algo);
    free(bar_must_query_md);
    free(alltoallv_always_works_algo);
    free(alltoallv_always_works_md);
    free(alltoallv_must_query_algo);
    free(alltoallv_must_query_md);
    } /* optimize loop */
  } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/

  sbuf = (char*)sbuf - gBuffer_offset;
  free(sbuf);

  rbuf = (char*)rbuf - gBuffer_offset;
  free(rbuf);

  free(sndlens);
  free(sdispls);
  free(rcvlens);
  free(rdispls);

  rc |= pami_shutdown(&client, context, &gNum_contexts);
  return rc;
}
Exemple #13
0
int main(int argc, char*argv[])
{
  pami_client_t        client;
  pami_context_t      *context;
  pami_task_t          task_id;
  size_t               num_tasks;
  pami_geometry_t      world_geometry;

  /* Barrier variables */
  size_t               barrier_num_algorithm[2];
  pami_algorithm_t    *bar_always_works_algo = NULL;
  pami_metadata_t     *bar_always_works_md   = NULL;
  pami_algorithm_t    *bar_must_query_algo   = NULL;
  pami_metadata_t     *bar_must_query_md     = NULL;
  pami_xfer_type_t     barrier_xfer = PAMI_XFER_BARRIER;
  volatile unsigned    bar_poll_flag = 0;

  /* Bcast variables */
  size_t               bcast_num_algorithm[2];
  pami_algorithm_t    *bcast_always_works_algo = NULL;
  pami_metadata_t     *bcast_always_works_md = NULL;
  pami_algorithm_t    *bcast_must_query_algo = NULL;
  pami_metadata_t     *bcast_must_query_md = NULL;
  pami_xfer_type_t     bcast_xfer = PAMI_XFER_BROADCAST;
  volatile unsigned    bcast_poll_flag = 0;

  int                  nalg= 0;
  double               ti, tf, usec;
  pami_xfer_t          barrier;
  pami_xfer_t          broadcast;

  /* Process environment variables and setup globals */
  setup_env();

  assert(gNum_contexts > 0);
  context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts);

  /*  Initialize PAMI */
  int rc = pami_init(&client,        /* Client             */
                     context,        /* Context            */
                     NULL,           /* Clientname=default */
                     &gNum_contexts, /* gNum_contexts       */
                     NULL,           /* null configuration */
                     0,              /* no configuration   */
                     &task_id,       /* task id            */
                     &num_tasks);    /* number of tasks    */

  if (rc != PAMI_SUCCESS)
    return 1;
  int o;
  for(o = -1; o <= gOptimize ; o++) /* -1 = default, 0 = de-optimize, 1 = optimize */
  {

    pami_configuration_t configuration[1];
    configuration[0].name = PAMI_GEOMETRY_OPTIMIZE;
    configuration[0].value.intval = o; /* de/optimize */
    if(o == -1) ; /* skip update, use defaults */
    else
      rc |= update_geometry(client,
                            context[0],
                            world_geometry,
                            configuration,
                            1);

    if (rc != PAMI_SUCCESS)
    return 1;

  if(gNumRoots > num_tasks) gNumRoots = num_tasks;

  /*  Allocate buffer(s) */
  int err = 0;
  void* buf = NULL;
  err = posix_memalign(&buf, 128, gMax_byte_count + gBuffer_offset);
  assert(err == 0);
  buf = (char*)buf + gBuffer_offset;


  unsigned iContext = 0;

  for (; iContext < gNum_contexts; ++iContext)
  {

    if (task_id == 0)
      printf("# Context: %u\n", iContext);

    /*  Query the world geometry for barrier algorithms */
    rc |= query_geometry_world(client,
                               context[iContext],
                               &world_geometry,
                               barrier_xfer,
                               barrier_num_algorithm,
                               &bar_always_works_algo,
                               &bar_always_works_md,
                               &bar_must_query_algo,
                               &bar_must_query_md);

    if (rc != PAMI_SUCCESS)
      return 1;

    /*  Query the world geometry for broadcast algorithms */
    rc |= query_geometry_world(client,
                               context[iContext],
                               &world_geometry,
                               bcast_xfer,
                               bcast_num_algorithm,
                               &bcast_always_works_algo,
                               &bcast_always_works_md,
                               &bcast_must_query_algo,
                               &bcast_must_query_md);

      if (rc != PAMI_SUCCESS)
      return 1;

    barrier.cb_done   = cb_done;
    barrier.cookie    = (void*) & bar_poll_flag;
    barrier.algorithm = bar_always_works_algo[0];
    blocking_coll(context[iContext], &barrier, &bar_poll_flag);

    for (nalg = 0; nalg < bcast_num_algorithm[1]; nalg++)
    {
      broadcast.cb_done                      = cb_done;
      broadcast.cookie                       = (void*) & bcast_poll_flag;
      broadcast.algorithm                    = bcast_must_query_algo[nalg];
      broadcast.cmd.xfer_broadcast.buf       = buf;
      broadcast.cmd.xfer_broadcast.type      = PAMI_TYPE_BYTE;
      broadcast.cmd.xfer_broadcast.typecount = 0;

      gProtocolName = bcast_must_query_md[nalg].name;

      metadata_result_t result = {0};

      int k;
      for (k=0; k< gNumRoots; k++)
      {
      pami_endpoint_t   root_ep;
        pami_task_t root_task = (pami_task_t)k;
      PAMI_Endpoint_create(client, root_task, 0, &root_ep);
      broadcast.cmd.xfer_broadcast.root = root_ep;

      if (task_id == root_task)
      {
        printf("# Broadcast Bandwidth Test(size:%zu) -- context = %d, optimize = %d, root = %d  protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks,
               iContext, o, root_task, gProtocolName,
               bcast_must_query_md[nalg].range_lo, bcast_must_query_md[nalg].range_hi,
               bcast_must_query_md[nalg].check_correct.bitmask_correct);
        printf("# Size(bytes)      iterations     bytes/sec      usec\n");
        printf("# -----------      -----------    -----------    ---------\n");
      }

      if (((strstr(bcast_must_query_md[nalg].name,gSelected) == NULL) && gSelector) ||
          ((strstr(bcast_must_query_md[nalg].name,gSelected) != NULL) && !gSelector))  continue;

      unsigned checkrequired = bcast_must_query_md[nalg].check_correct.values.checkrequired; /*must query every time */
      assert(!checkrequired || bcast_must_query_md[nalg].check_fn); /* must have function if checkrequired. */

      int i, j;

      for (i = gMin_byte_count; i <= gMax_byte_count; i *= 2)
      {
        size_t  dataSent = i;
        int          niter;

        if (dataSent < CUTOFF)
          niter = gNiterlat;
        else
          niter = NITERBW;

        broadcast.cmd.xfer_broadcast.typecount = i;

        result = check_metadata(bcast_must_query_md[nalg],
                                broadcast,
                                PAMI_TYPE_BYTE,
                                dataSent, /* metadata uses bytes i, */
                                broadcast.cmd.xfer_broadcast.buf,
                                PAMI_TYPE_BYTE,
                                dataSent,
                                broadcast.cmd.xfer_broadcast.buf);


        if (bcast_must_query_md[nalg].check_correct.values.nonlocal)
        {
          /* \note We currently ignore check_correct.values.nonlocal
                   because these tests should not have nonlocal differences (so far). */
          result.check.nonlocal = 0;
        }

        if (result.bitmask) continue;

        if (task_id == root_task)
          bcast_initialize_sndbuf (buf, i, root_task);
        else
          memset(buf, 0xFF, i);
        blocking_coll(context[iContext], &barrier, &bar_poll_flag);
        ti = timer();

        for (j = 0; j < niter; j++)
        {
          if (checkrequired) /* must query every time */
          {
            result = bcast_must_query_md[nalg].check_fn(&broadcast);
            if (result.bitmask) continue;
          }

          blocking_coll (context[iContext], &broadcast, &bcast_poll_flag);
        }

        blocking_coll(context[iContext], &barrier, &bar_poll_flag);
        tf = timer();
        int rc_check;
        rc |= rc_check = bcast_check_rcvbuf (buf, i, root_task);

        if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName);

        usec = (tf - ti) / (double)niter;

        if (task_id == root_task)
        {
          printf("  %11lld %16d %14.1f %12.2f\n",
                 (long long)dataSent,
                 niter,
                 (double)1e6*(double)dataSent / (double)usec,
                 usec);
          fflush(stdout);
        }
      }
    }
    }

    free(bar_always_works_algo);
    free(bar_always_works_md);
    free(bar_must_query_algo);
    free(bar_must_query_md);
    free(bcast_always_works_algo);
    free(bcast_always_works_md);
    free(bcast_must_query_algo);
    free(bcast_must_query_md);

  } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/
  buf = (char*)buf - gBuffer_offset;
  free(buf);
  } /* optimize loop */

  rc |= pami_shutdown(&client, context, &gNum_contexts);
  return rc;
}
Exemple #14
0
int main(int argc, char*argv[])
{
  pami_client_t        client;
  pami_context_t      *context;
  pami_task_t          task_id,task_zero=0;
  size_t               num_tasks;
  pami_geometry_t      world_geometry;

  /* Barrier variables */
  size_t               barrier_num_algorithm[2];
  pami_algorithm_t    *bar_always_works_algo = NULL;
  pami_metadata_t     *bar_always_works_md   = NULL;
  pami_algorithm_t    *bar_must_query_algo   = NULL;
  pami_metadata_t     *bar_must_query_md     = NULL;
  pami_xfer_type_t     barrier_xfer = PAMI_XFER_BARRIER;
  volatile unsigned    bar_poll_flag=0;

  /* Scan variables */
  pami_algorithm_t    *next_algo = NULL;
  pami_metadata_t     *next_md= NULL;
  size_t               scan_num_algorithm[2];
  pami_algorithm_t    *scan_always_works_algo = NULL;
  pami_metadata_t     *scan_always_works_md = NULL;
  pami_algorithm_t    *scan_must_query_algo = NULL;
  pami_metadata_t     *scan_must_query_md = NULL;
  pami_xfer_type_t     scan_xfer = PAMI_XFER_SCAN;
  volatile unsigned    scan_poll_flag=0;

  int                  i, j, nalg = 0, total_alg;
  double               ti, tf, usec;
  pami_xfer_t          barrier;
  pami_xfer_t          scan;

  /* Process environment variables and setup globals */
  setup_env();

  assert(gNum_contexts > 0);
  context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts);

  /*  Allocate buffer(s) */
  int err = 0;
  void* sbuf = NULL;
  err = posix_memalign(&sbuf, 128, gMax_byte_count + gBuffer_offset);
  assert(err == 0);
  sbuf = (char*)sbuf + gBuffer_offset;
  void* rbuf = NULL;
  err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset);
  assert(err == 0);
  rbuf = (char*)rbuf + gBuffer_offset;

  /*  Initialize PAMI */
  int rc = pami_init(&client,        /* Client             */
                     context,        /* Context            */
                     NULL,           /* Clientname=default */
                     &gNum_contexts, /* gNum_contexts       */
                     NULL,           /* null configuration */
                     0,              /* no configuration   */
                     &task_id,       /* task id            */
                     &num_tasks);    /* number of tasks    */

  if (rc==1)
    return 1;

  /*  Query the world geometry for barrier algorithms */
  rc |= query_geometry_world(client,
                             context[0],
                             &world_geometry,
                             barrier_xfer,
                             barrier_num_algorithm,
                             &bar_always_works_algo,
                             &bar_always_works_md,
                             &bar_must_query_algo,
                             &bar_must_query_md);

  if (rc==1)
    return 1;

  barrier.cb_done   = cb_done;
  barrier.cookie    = (void*) & bar_poll_flag;
  barrier.algorithm = bar_always_works_algo[0];

  unsigned iContext = 0;

  for (; iContext < gNum_contexts; ++iContext)
  {

    if (task_id == 0)
      printf("# Context: %u\n", iContext);

    /*  Query the world geometry for scan algorithms */
    rc |= query_geometry_world(client,
                               context[iContext],
                               &world_geometry,
                               scan_xfer,
                               scan_num_algorithm,
                               &scan_always_works_algo,
                               &scan_always_works_md,
                               &scan_must_query_algo,
                               &scan_must_query_md);
    if (rc==1)
      return 1;
    total_alg = scan_num_algorithm[0]+scan_num_algorithm[1];
    for (nalg = 0; nalg < total_alg; nalg++)
    {
      metadata_result_t result = {0};
      unsigned query_protocol;
      if(nalg < scan_num_algorithm[0])
      {  
        query_protocol = 0;
        next_algo = &scan_always_works_algo[nalg];
        next_md  = &scan_always_works_md[nalg];
      }
      else
      {  
        query_protocol = 1;
        next_algo = &scan_must_query_algo[nalg-scan_num_algorithm[0]];
        next_md  = &scan_must_query_md[nalg-scan_num_algorithm[0]];
      }
      if (task_id == task_zero) /* root not set yet */
      {
        printf("# Scan Bandwidth Test(size:%zu) -- context = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks,
               iContext, next_md->name,
               next_md->range_lo,(ssize_t)next_md->range_hi,
               next_md->check_correct.bitmask_correct);
        printf("# Size(bytes)      iterations     bytes/sec      usec\n");
        printf("# -----------      -----------    -----------    ---------\n");
      }

      if (((strstr(next_md->name, gSelected) == NULL) && gSelector) ||
          ((strstr(next_md->name, gSelected) != NULL) && !gSelector))  continue;

      gProtocolName = next_md->name;

      unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */
      assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */

      scan.cb_done   = cb_done;
      scan.cookie    = (void*)&scan_poll_flag;
      scan.algorithm = *next_algo;
      scan.cmd.xfer_scan.rtype     = PAMI_TYPE_BYTE;
      scan.cmd.xfer_scan.rtypecount= 0;
      scan.cmd.xfer_scan.exclusive = 0;

      int op, dt;

      for (dt=0; dt<dt_count; dt++)
        for (op=0; op<op_count; op++)
        {
          if (gValidTable[op][dt])
          {
            if (task_id == task_zero)
              printf("Running Scan: %s, %s\n",dt_array_str[dt], op_array_str[op]);
            for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2)
            {
              size_t sz=get_type_size(dt_array[dt]);
              size_t  dataSent = i * sz;
              int niter;

              if (dataSent < CUTOFF)
                niter = gNiterlat;
              else
                niter = NITERBW;

              scan.cmd.xfer_scan.stypecount = i;
              scan.cmd.xfer_scan.rtypecount = i;
              scan.cmd.xfer_scan.stype      = dt_array[dt];
              scan.cmd.xfer_scan.rtype      = dt_array[dt];
              scan.cmd.xfer_scan.op         = op_array[op];

              if(query_protocol)
              {  
                size_t sz=get_type_size(dt_array[dt])*i;
                result = check_metadata(*next_md,
                                      scan,
                                      dt_array[dt],
                                      sz, /* metadata uses bytes i, */
                                      sbuf,
                                      dt_array[dt],
                                      sz,
                                      rbuf);
                if (next_md->check_correct.values.nonlocal)
                {
                  /* \note We currently ignore check_correct.values.nonlocal
                     because these tests should not have nonlocal differences (so far). */
                  result.check.nonlocal = 0;
                }

                if (result.bitmask) continue;
              }

              /* Do one 'in-place' collective and validate it */
              {
                scan_initialize_sndbuf (sbuf, i, op, dt, task_id);
                scan.cmd.xfer_scan.sndbuf    = sbuf;
                scan.cmd.xfer_scan.rcvbuf    = sbuf;

                if (checkrequired) /* must query every time */
                {
                  result = next_md->check_fn(&scan);
                  if (result.bitmask) continue;
                }

                blocking_coll(context[iContext], &scan, &scan_poll_flag);
                int rc_check;
                rc |= rc_check = scan_check_rcvbuf (sbuf, i, op, dt, num_tasks, task_id, scan.cmd.xfer_scan.exclusive);

                if (rc_check) fprintf(stderr, "%s FAILED IN PLACE validation on %s/%s\n", gProtocolName, dt_array_str[dt], op_array_str[op]);
              }

              /* Iterate (and time) with separate buffers, not in-place */
              scan.cmd.xfer_scan.sndbuf    = sbuf;
              scan.cmd.xfer_scan.rcvbuf    = rbuf;
              scan_initialize_sndbuf (sbuf, i, op, dt, task_id);
              memset(rbuf, 0xFF, dataSent);

              /* We aren't testing barrier itself, so use context 0. */
              blocking_coll(context[0], &barrier, &bar_poll_flag);
              ti = timer();
              for (j=0; j<niter; j++)
              {
                if (checkrequired) /* must query every time */
                {
                  result = next_md->check_fn(&scan);
                  if (result.bitmask) continue;
                }

                blocking_coll(context[iContext], &scan, &scan_poll_flag);
              }
              tf = timer();
              /* We aren't testing barrier itself, so use context 0. */
              blocking_coll(context[0], &barrier, &bar_poll_flag);

              int rc_check;
              rc |= rc_check = scan_check_rcvbuf (rbuf, i, op, dt, num_tasks, task_id, scan.cmd.xfer_scan.exclusive);

              if (rc_check) fprintf(stderr, "%s FAILED validation on %s/%s\n", gProtocolName, dt_array_str[dt], op_array_str[op]);

              usec = (tf - ti)/(double)niter;
              if (task_id == task_zero)
              {
                printf("  %11lld %16d %14.1f %12.2f\n",
                       (long long)dataSent,
                       niter,
                       (double)1e6*(double)dataSent/(double)usec,
                       usec);
                fflush(stdout);
              }
            }
          }
        }
    }
    free(scan_always_works_algo);
    free(scan_always_works_md);
    free(scan_must_query_algo);
    free(scan_must_query_md);

  } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/
  free(bar_always_works_algo);
  free(bar_always_works_md);
  free(bar_must_query_algo);
  free(bar_must_query_md);

  sbuf = (char*)sbuf - gBuffer_offset;
  free(sbuf);
  rbuf = (char*)rbuf - gBuffer_offset;
  free(rbuf);

  rc |= pami_shutdown(&client, context, &gNum_contexts);
  return rc;
}
Exemple #15
0
int main(int argc, char*argv[])
{
  pami_client_t        client;
  pami_context_t      *context;
  pami_task_t          task_id, root_zero=0;
  size_t               num_tasks;
  pami_geometry_t      world_geometry;

  /* Barrier variables */
  size_t               barrier_num_algorithm[2];
  pami_algorithm_t    *bar_always_works_algo = NULL;
  pami_metadata_t     *bar_always_works_md   = NULL;
  pami_algorithm_t    *bar_must_query_algo   = NULL;
  pami_metadata_t     *bar_must_query_md     = NULL;
  pami_xfer_type_t     barrier_xfer = PAMI_XFER_BARRIER;
  volatile unsigned    bar_poll_flag = 0;

  /* Gather variables */
  size_t               gather_num_algorithm[2];
  pami_algorithm_t    *next_algo = NULL;
  pami_metadata_t     *next_md= NULL;
  pami_algorithm_t    *gather_always_works_algo = NULL;
  pami_metadata_t     *gather_always_works_md = NULL;
  pami_algorithm_t    *gather_must_query_algo = NULL;
  pami_metadata_t     *gather_must_query_md = NULL;
  pami_xfer_type_t     gather_xfer = PAMI_XFER_GATHER;
  volatile unsigned    gather_poll_flag = 0;

  int                  nalg= 0, total_alg;
  double               ti, tf, usec;
  pami_xfer_t          barrier;
  pami_xfer_t          gather;

  /* Process environment variables and setup globals */
  setup_env();

  assert(gNum_contexts > 0);
  context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts);


  /*  Initialize PAMI */
  int rc = pami_init(&client,        /* Client             */
                     context,        /* Context            */
                     NULL,           /* Clientname=default */
                     &gNum_contexts, /* gNum_contexts       */
                     NULL,           /* null configuration */
                     0,              /* no configuration   */
                     &task_id,       /* task id            */
                     &num_tasks);    /* number of tasks    */

  if (rc == 1)
    return 1;

  /*  Allocate buffer(s) */
  int err = 0;
  void* buf = NULL;
  err = posix_memalign(&buf, 128, gMax_byte_count + gBuffer_offset);
  assert(err == 0);
  buf = (char*)buf + gBuffer_offset;

  void* rbuf = NULL;
  err = posix_memalign(&rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset);
  assert(err == 0);
  rbuf = (char*)rbuf + gBuffer_offset;


  unsigned iContext = 0;

  for (; iContext < gNum_contexts; ++iContext)
  {

    if (task_id == root_zero)
      printf("# Context: %u\n", iContext);

    /*  Query the world geometry for barrier algorithms */
    rc |= query_geometry_world(client,
                            context,
                            &world_geometry,
                            barrier_xfer,
                            barrier_num_algorithm,
                            &bar_always_works_algo,
                            &bar_always_works_md,
                            &bar_must_query_algo,
                            &bar_must_query_md);

    if (rc == 1)
      return 1;

    /*  Query the world geometry for gather algorithms */
    rc |= query_geometry_world(client,
                            context[iContext],
                            &world_geometry,
                            gather_xfer,
                            gather_num_algorithm,
                            &gather_always_works_algo,
                            &gather_always_works_md,
                            &gather_must_query_algo,
                            &gather_must_query_md);

    if (rc == 1)
      return 1;

    barrier.cb_done   = cb_done;
    barrier.cookie    = (void*) & bar_poll_flag;
    barrier.algorithm = bar_always_works_algo[0];
    blocking_coll(context[iContext], &barrier, &bar_poll_flag);

    total_alg = gather_num_algorithm[0]+gather_num_algorithm[1];
    for (nalg = 0; nalg < total_alg; nalg++)
    {
      metadata_result_t result = {0};
      unsigned query_protocol;
      if(nalg < gather_num_algorithm[0])
      {  
        query_protocol = 0;
        next_algo = &gather_always_works_algo[nalg];
        next_md  = &gather_always_works_md[nalg];
      }
      else
      {  
        query_protocol = 1;
        next_algo = &gather_must_query_algo[nalg-gather_num_algorithm[0]];
        next_md  = &gather_must_query_md[nalg-gather_num_algorithm[0]];
      }

      root_zero = 0;

      gather.cb_done    = cb_done;
      gather.cookie     = (void*) & gather_poll_flag;
      gather.algorithm  = *next_algo;

      gather.cmd.xfer_gather.stype      = PAMI_TYPE_BYTE;
      gather.cmd.xfer_gather.stypecount = 0;
      gather.cmd.xfer_gather.rtype      = PAMI_TYPE_BYTE;
      gather.cmd.xfer_gather.rtypecount = 0;

      gProtocolName = next_md->name;

      if (task_id == root_zero)
      {
        printf("# Gather Bandwidth Test(size:%zu) -- context = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks,
               iContext, gProtocolName,
               next_md->range_lo,(ssize_t)next_md->range_hi,
               next_md->check_correct.bitmask_correct);
        printf("# Size(bytes)      iterations     bytes/sec      usec\n");
        printf("# -----------      -----------    -----------    ---------\n");
      }

      if (((strstr(next_md->name, gSelected) == NULL) && gSelector) ||
          ((strstr(next_md->name, gSelected) != NULL) && !gSelector))  continue;

      int i, j;

      unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */
      assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */

      int dt,op=4/*SUM*/;

      for (dt = 0; dt < dt_count; dt++)
      {
        if ((gFull_test && ((dt != DT_NULL) && (dt != DT_BYTE))) || gValidTable[op][dt])
        {
          if (task_id == 0)
            printf("Running gather: %s\n", dt_array_str[dt]);

          for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2)
          {
            size_t dataSent = i * get_type_size(dt_array[dt]);
            int          niter;
            if (dataSent < CUTOFF)
              niter = gNiterlat;
            else
              niter = NITERBW;

            gather.cmd.xfer_gather.stypecount = i;
            gather.cmd.xfer_gather.stype      = dt_array[dt];
            gather.cmd.xfer_gather.rtypecount = i;
            gather.cmd.xfer_gather.rtype      = dt_array[dt];


                if(query_protocol)
                {  
                  size_t sz=get_type_size(dt_array[dt])*i;
                  result = check_metadata(*next_md,
                                          gather,
                                          dt_array[dt],
                                          sz, /* metadata uses bytes i, */
                                          buf,
                                          dt_array[dt],
                                          sz,
                                          rbuf);
                  if (next_md->check_correct.values.nonlocal)
                  {
                    /* \note We currently ignore check_correct.values.nonlocal
                      because these tests should not have nonlocal differences (so far). */
                    result.check.nonlocal = 0;
                  }

                  if (result.bitmask) continue;
                }
              /* Do one 'in-place' collective and validate it */
              {
                root_zero = (root_zero + num_tasks - 1) % num_tasks;
                pami_endpoint_t root_ep;
                PAMI_Endpoint_create(client, root_zero, 0, &root_ep);
                gather.cmd.xfer_gather.root       = root_ep;

                memset(rbuf, 0xFF, i*num_tasks);
                if (task_id == root_zero)
                {  
                  gather.cmd.xfer_gather.sndbuf     = (char*)rbuf + dataSent*task_id;
                  gather.cmd.xfer_gather.rcvbuf     = rbuf;
                }
                else
                {  
                  gather.cmd.xfer_gather.sndbuf     = buf;
                  gather.cmd.xfer_gather.rcvbuf     = NULL;
                }
                gather_initialize_sndbuf_dt (gather.cmd.xfer_gather.sndbuf, i, task_id, dt);

                if (checkrequired) /* must query every time */
                {
                  result = next_md->check_fn(&gather);
                  if (result.bitmask) continue;
                }
                blocking_coll(context[iContext], &gather, &gather_poll_flag);

                if (task_id == root_zero)
                {
                  int rc_check;
                  rc |= rc_check = gather_check_rcvbuf_dt(task_id, rbuf, i, dt);
                  if (rc_check) fprintf(stderr, "%s FAILED IN PLACE validation on %s\n", gProtocolName, dt_array_str[dt]);
                }
              }

              /* Iterate (and time) with separate buffers, not in-place */
              gather.cmd.xfer_gather.rcvbuf     = rbuf;
              gather.cmd.xfer_gather.sndbuf     = buf;

            blocking_coll(context[iContext], &barrier, &bar_poll_flag);
            ti = timer();

            for (j = 0; j < niter; j++)
            {
              root_zero = (root_zero + num_tasks - 1) % num_tasks;
              pami_endpoint_t root_ep;
              PAMI_Endpoint_create(client, root_zero, 0, &root_ep);
              gather.cmd.xfer_gather.root       = root_ep;

              gather_initialize_sndbuf_dt (buf, i, task_id, dt);
              if (task_id == root_zero)
                memset(rbuf, 0xFF, i*num_tasks);
              if (checkrequired) /* must query every time */
              {
                result = next_md->check_fn(&gather);
                if (result.bitmask) continue;
              }
              blocking_coll(context[iContext], &gather, &gather_poll_flag);

              if (task_id == root_zero)
              {
                int rc_check;
                rc |= rc_check = gather_check_rcvbuf_dt(task_id, rbuf, i, dt);
                if (rc_check) fprintf(stderr, "%s FAILED validation on %s\n", gProtocolName, dt_array_str[dt]);
              }
            }

            tf = timer();
            blocking_coll(context[iContext], &barrier, &bar_poll_flag);

            usec = (tf - ti) / (double)niter;



            if (task_id == root_zero)
            {
               printf("  %11lld %16d %14.1f %12.2f\n",
                 (long long)dataSent,
                 niter,
                 (double)1e6*(double)dataSent / (double)usec,
                 usec);
              fflush(stdout);
            }
          }
        }
      }
    }
    free(bar_always_works_algo);
    free(bar_always_works_md);
    free(bar_must_query_algo);
    free(bar_must_query_md);
    free(gather_always_works_algo);
    free(gather_always_works_md);
    free(gather_must_query_algo);
  } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/

  buf = (char*)buf - gBuffer_offset;
  free(buf);

  rbuf = (char*)rbuf - gBuffer_offset;
  free(rbuf);

  rc |= pami_shutdown(&client, context, &gNum_contexts);
  return rc;
}