int main(int argc, char **argv) { const Image<uint8_t> input = make_image<uint8_t>(); Image<float> output0(kSize, kSize, 3); Image<float> output1(kSize, kSize, 3); int result; result = metadata_tester(input, false, 0, 0, 0, 0, 0, 0, 0, 0, 0.f, 0.0, NULL, output0, output1); EXPECT_EQ(0, result); result = metadata_tester_ucon(NULL, input, false, 0, 0, 0, 0, 0, 0, 0, 0, 0.f, 0.0, NULL, output0, output1); EXPECT_EQ(0, result); verify(input, output0, output1); check_metadata(metadata_tester_metadata, false); if (!strcmp(metadata_tester_metadata.name, "metadata_tester_metadata")) { fprintf(stderr, "Expected name %s\n", "metadata_tester_metadata"); exit(-1); } check_metadata(metadata_tester_ucon_metadata, true); if (!strcmp(metadata_tester_ucon_metadata.name, "metadata_tester_ucon_metadata")) { fprintf(stderr, "Expected name %s\n", "metadata_tester_ucon_metadata"); exit(-1); } printf("Success!\n"); return 0; }
void check_slave_sync(Volume& v, const be::BackendTestSetup::WithRandomNamespace& wrns, const MDSNodeConfig& cfg, bool wait = true) { MetaDataBackendInterfacePtr mdb(std::make_shared<MDSMetaDataBackend>(cfg, wrns.ns(), boost::none)); const uint32_t secs = mds_manager_->poll_interval().count(); const uint32_t sleep_msecs = 100; const uint32_t count = wait ? 3 * 1000 * secs / sleep_msecs : 1; for (size_t i = 0; i < count; ++i) { if (mdb->lastCorkUUID() == v.getMetaDataStore()->lastCork()) { CachedMetaDataStore md(mdb, "slave"); check_metadata(v, md); return; } else { boost::this_thread::sleep_for(boost::chrono::milliseconds(sleep_msecs)); } } FAIL() << "slave not in sync after " << (count * sleep_msecs) << " milliseconds"; }
void table_base::open() { if (_is_open) throw table_open_exception(); if (_key_mapper == nullptr) throw no_primary_key_exception(); { _database.make_enclosure_available(_binomen._enclosure); transaction txn(_database); const unique_ptr<sql> cmd = _database.make_sql(); cmd->write_create_table( _binomen, get_value_mapper_base(), *_key_mapper, readback_id(), _foreign_specs ); if (_database.get_session()->unchecked_exec(*cmd)) { // failure is benign, but in that case we needn't commit. for (size_t i = 0; i < _index_specs.size(); i++) { const index_spec &is = _index_specs[i]; const unique_ptr<sql> cmd = _database.make_sql(); cmd->write_create_index(_binomen, i, is._mappers, is._is_unique); _database.get_session()->exec(*cmd); } txn.commit(); } } check_metadata(); _is_open = true; }
static void check_access_token_metadata(void *user_data, grpc_credentials_md *md_elems, size_t num_md, grpc_credentials_status status) { grpc_credentials *c = (grpc_credentials *)user_data; expected_md emd[] = {{GRPC_AUTHORIZATION_METADATA_KEY, "Bearer blah"}}; GPR_ASSERT(status == GRPC_CREDENTIALS_OK); GPR_ASSERT(num_md == 1); check_metadata(emd, md_elems, num_md); grpc_credentials_unref(c); }
static void check_ssl_oauth2_composite_metadata( void *user_data, grpc_credentials_md *md_elems, size_t num_md, grpc_credentials_status status) { grpc_credentials *c = (grpc_credentials *)user_data; expected_md emd[] = { {GRPC_AUTHORIZATION_METADATA_KEY, test_oauth2_bearer_token} }; GPR_ASSERT(status == GRPC_CREDENTIALS_OK); GPR_ASSERT(num_md == 1); check_metadata(emd, md_elems, num_md); grpc_credentials_unref(c); }
static void check_iam_metadata(void *user_data, grpc_credentials_md *md_elems, size_t num_md, grpc_credentials_status status) { grpc_credentials *c = (grpc_credentials *)user_data; expected_md emd[] = { {GRPC_IAM_AUTHORIZATION_TOKEN_METADATA_KEY, test_iam_authorization_token}, {GRPC_IAM_AUTHORITY_SELECTOR_METADATA_KEY, test_iam_authority_selector} }; GPR_ASSERT(status == GRPC_CREDENTIALS_OK); GPR_ASSERT(num_md == 2); check_metadata(emd, md_elems, num_md); grpc_credentials_unref(c); }
int main(int argc, char **argv) { void* user_context = NULL; int result; std::map<std::string, int> enum_results; result = halide_enumerate_registered_filters(user_context, &enum_results, EnumerateFunc); EXPECT_EQ(0, result); EXPECT_EQ(2, enum_results.size()); EXPECT_EQ(15, enum_results["metadata_tester"]); EXPECT_EQ(16, enum_results["metadata_tester_ucon"]); const Image<uint8_t> input = make_image<uint8_t>(); Image<float> output0(kSize, kSize, 3); Image<float> output1(kSize, kSize, 3); result = metadata_tester(input, false, 0, 0, 0, 0, 0, 0, 0, 0, 0.f, 0.0, NULL, output0, output1); EXPECT_EQ(0, result); result = metadata_tester_ucon(user_context, input, false, 0, 0, 0, 0, 0, 0, 0, 0, 0.f, 0.0, NULL, output0, output1); EXPECT_EQ(0, result); verify(input, output0, output1); check_metadata(metadata_tester_metadata, false); if (!strcmp(metadata_tester_metadata.name, "metadata_tester_metadata")) { fprintf(stderr, "Expected name %s\n", "metadata_tester_metadata"); exit(-1); } check_metadata(metadata_tester_ucon_metadata, true); if (!strcmp(metadata_tester_ucon_metadata.name, "metadata_tester_ucon_metadata")) { fprintf(stderr, "Expected name %s\n", "metadata_tester_ucon_metadata"); exit(-1); } printf("Success!\n"); return 0; }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Alltoallv variables */ size_t alltoallv_num_algorithm[2]; pami_algorithm_t *alltoallv_always_works_algo = NULL; pami_metadata_t *alltoallv_always_works_md = NULL; pami_algorithm_t *next_algo = NULL; pami_metadata_t *next_md= NULL; pami_algorithm_t *alltoallv_must_query_algo = NULL; pami_metadata_t *alltoallv_must_query_md = NULL; pami_xfer_type_t alltoallv_xfer = PAMI_XFER_ALLTOALLV; volatile unsigned alltoallv_poll_flag = 0; int nalg= 0, total_alg; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t alltoallv; pami_type_t pami_stype = 0; pami_type_t pami_rtype = 0; pami_result_t ret; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign((void*) & sbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign((void*) & rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; sndlens = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(sndlens); sdispls = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(sdispls); rcvlens = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(rcvlens); rdispls = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(rdispls); ret = PAMI_Type_create(&pami_stype); if(ret != PAMI_SUCCESS) return 1; ret = PAMI_Type_create(&pami_rtype); if(ret != PAMI_SUCCESS) return 1; PAMI_Type_add_simple(pami_stype, sizeof(double), 0, 1, sizeof(double)*2); PAMI_Type_add_simple(pami_rtype, sizeof(double), sizeof(double), 1, sizeof(double)); ret = PAMI_Type_complete(pami_stype, sizeof(double)); if(ret != PAMI_SUCCESS){ printf("Invalid atom size for stype\n"); return 1; } ret = PAMI_Type_complete(pami_rtype, sizeof(double)); if(ret != PAMI_SUCCESS){ printf("Invalid atom size for rtype\n"); return 1; } unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == 0) printf("# Context: %u\n", iContext); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Query the world geometry for alltoallv algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, alltoallv_xfer, alltoallv_num_algorithm, &alltoallv_always_works_algo, &alltoallv_always_works_md, &alltoallv_must_query_algo, &alltoallv_must_query_md); if (rc == 1) return 1; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; total_alg = alltoallv_num_algorithm[0]+alltoallv_num_algorithm[1]; for (nalg = 0; nalg < total_alg; nalg++) { metadata_result_t result = {0}; unsigned query_protocol; if(nalg < alltoallv_num_algorithm[0]) { query_protocol = 0; next_algo = &alltoallv_always_works_algo[nalg]; next_md = &alltoallv_always_works_md[nalg]; } else { query_protocol = 1; next_algo = &alltoallv_must_query_algo[nalg-alltoallv_num_algorithm[0]]; next_md = &alltoallv_must_query_md[nalg-alltoallv_num_algorithm[0]]; } gProtocolName = next_md->name; alltoallv.cb_done = cb_done; alltoallv.cookie = (void*) & alltoallv_poll_flag; alltoallv.algorithm = *next_algo; alltoallv.cmd.xfer_alltoallv.sndbuf = sbuf; alltoallv.cmd.xfer_alltoallv.stype = pami_stype; alltoallv.cmd.xfer_alltoallv.stypecounts = sndlens; alltoallv.cmd.xfer_alltoallv.sdispls = sdispls; alltoallv.cmd.xfer_alltoallv.rcvbuf = rbuf; alltoallv.cmd.xfer_alltoallv.rtype = pami_rtype; alltoallv.cmd.xfer_alltoallv.rtypecounts = rcvlens; alltoallv.cmd.xfer_alltoallv.rdispls = rdispls; gProtocolName = next_md->name; if (task_id == 0) { printf("# Alltoallv Bandwidth Test(size:%zu) -- context = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, iContext, gProtocolName, next_md->range_lo,(ssize_t)next_md->range_hi, next_md->check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(next_md->name, gSelected) == NULL) && gSelector) || ((strstr(next_md->name, gSelected) != NULL) && !gSelector)) continue; int i, j; unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */ for (i = 0; i <= (gMax_byte_count/(sizeof(double)*2)); i *= 2) { size_t dataSent = i; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; for (j = 0; j < num_tasks; j++) { sndlens[j] = rcvlens[j] = i; sdispls[j] = rdispls[j] = i * j; initialize_sndbuf( j, (double*)sbuf, (double*)rbuf ); } if(query_protocol) { size_t sz=get_type_size(pami_stype)*i; size_t rsz=get_type_size(pami_rtype)*i; result = check_metadata(*next_md, alltoallv, pami_stype, sz, /* metadata uses bytes i, */ alltoallv.cmd.xfer_alltoallv.sndbuf, pami_rtype, rsz, alltoallv.cmd.xfer_alltoallv.rcvbuf); if (next_md->check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) { if(!i)i++; continue; } } blocking_coll(context[iContext], &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { if (checkrequired) /* must query every time */ { result = next_md->check_fn(&alltoallv); if (result.bitmask) { if(!i)i++; continue; } } blocking_coll(context[iContext], &alltoallv, &alltoallv_poll_flag); } tf = timer(); blocking_coll(context[iContext], &barrier, &bar_poll_flag); int rc_check; rc |= rc_check = check_rcvbuf(num_tasks, task_id, (double*)rbuf, (double*)sbuf); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (task_id == 0) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } if(!i)i++; } } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(alltoallv_always_works_algo); free(alltoallv_always_works_md); free(alltoallv_must_query_algo); free(alltoallv_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); free(sndlens); free(sdispls); free(rcvlens); free(rdispls); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id, task_zero=0; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Allreduce variables */ size_t allreduce_num_algorithm[2]; pami_algorithm_t *allreduce_always_works_algo = NULL; pami_metadata_t *allreduce_always_works_md = NULL; pami_algorithm_t *allreduce_must_query_algo = NULL; pami_metadata_t *allreduce_must_query_md = NULL; pami_xfer_type_t allreduce_xfer = PAMI_XFER_ALLREDUCE; volatile unsigned allreduce_poll_flag = 0; int i, j, nalg = 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t allreduce; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign(&sbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc != PAMI_SUCCESS) return 1; int o; for(o = -1; o <= gOptimize ; o++) /* -1 = default, 0 = de-optimize, 1 = optimize */ { pami_configuration_t configuration[1]; configuration[0].name = PAMI_GEOMETRY_OPTIMIZE; configuration[0].value.intval = o; /* de/optimize */ if(o == -1) ; /* skip update, use defaults */ else rc |= update_geometry(client, context[0], world_geometry, configuration, 1); if (rc != PAMI_SUCCESS) return 1; /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[0], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc != PAMI_SUCCESS) return 1; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == 0) printf("# Context: %u\n", iContext); /* Query the world geometry for allreduce algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, allreduce_xfer, allreduce_num_algorithm, &allreduce_always_works_algo, &allreduce_always_works_md, &allreduce_must_query_algo, &allreduce_must_query_md); if (rc != PAMI_SUCCESS) return 1; for (nalg = 0; nalg < allreduce_num_algorithm[1]; nalg++) { metadata_result_t result = {0}; if (task_id == task_zero) { printf("# Allreduce Bandwidth Test(size:%zu) -- context = %d, optimize = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, iContext, o, allreduce_must_query_md[nalg].name, allreduce_must_query_md[nalg].range_lo,(ssize_t)allreduce_must_query_md[nalg].range_hi, allreduce_must_query_md[nalg].check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(allreduce_must_query_md[nalg].name, gSelected) == NULL) && gSelector) || ((strstr(allreduce_must_query_md[nalg].name, gSelected) != NULL) && !gSelector)) continue; gProtocolName = allreduce_must_query_md[nalg].name; unsigned checkrequired = allreduce_must_query_md[nalg].check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || allreduce_must_query_md[nalg].check_fn); /* must have function if checkrequired. */ allreduce.cb_done = cb_done; allreduce.cookie = (void*) & allreduce_poll_flag; allreduce.algorithm = allreduce_must_query_algo[nalg]; allreduce.cmd.xfer_allreduce.sndbuf = sbuf; allreduce.cmd.xfer_allreduce.rcvbuf = rbuf; allreduce.cmd.xfer_allreduce.rtype = PAMI_TYPE_BYTE; allreduce.cmd.xfer_allreduce.rtypecount = 0; int op, dt; for (dt = 0; dt < dt_count; dt++) { for (op = 0; op < op_count; op++) { if (gValidTable[op][dt]) { if (task_id == task_zero) printf("Running Allreduce: %s, %s\n", dt_array_str[dt], op_array_str[op]); for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2) { size_t sz=get_type_size(dt_array[dt]); size_t dataSent = i * sz; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; allreduce.cmd.xfer_allreduce.stypecount = i; allreduce.cmd.xfer_allreduce.rtypecount = dataSent; allreduce.cmd.xfer_allreduce.stype = dt_array[dt]; allreduce.cmd.xfer_allreduce.op = op_array[op]; result = check_metadata(allreduce_must_query_md[nalg], allreduce, dt_array[dt], dataSent, /* metadata uses bytes i, */ allreduce.cmd.xfer_allreduce.sndbuf, PAMI_TYPE_BYTE, dataSent, allreduce.cmd.xfer_allreduce.rcvbuf); if (allreduce_must_query_md[nalg].check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) continue; reduce_initialize_sndbuf (sbuf, i, op, dt, task_id, num_tasks); memset(rbuf, 0xFF, dataSent); /* We aren't testing barrier itself, so use context 0. */ blocking_coll(context[0], &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { if (checkrequired) /* must query every time */ { result = allreduce_must_query_md[nalg].check_fn(&allreduce); if (result.bitmask) continue; } blocking_coll(context[iContext], &allreduce, &allreduce_poll_flag); } tf = timer(); /* We aren't testing barrier itself, so use context 0. */ blocking_coll(context[0], &barrier, &bar_poll_flag); int rc_check; rc |= rc_check = reduce_check_rcvbuf (rbuf, i, op, dt, task_id, num_tasks); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (task_id == task_zero) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } } } } free(allreduce_always_works_algo); free(allreduce_always_works_md); free(allreduce_must_query_algo); free(allreduce_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); } /* optimize loop */ sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main (int argc, char ** argv) { pami_client_t client; pami_context_t context; size_t num_contexts = 1; pami_task_t task_id, task_zero=0;; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Allgatherv variables */ size_t allgatherv_num_algorithm[2]; pami_algorithm_t *next_algo = NULL; pami_metadata_t *next_md= NULL; pami_algorithm_t *allgatherv_always_works_algo = NULL; pami_metadata_t *allgatherv_always_works_md = NULL; pami_algorithm_t *allgatherv_must_query_algo = NULL; pami_metadata_t *allgatherv_must_query_md = NULL; pami_xfer_type_t allgatherv_xfer = PAMI_XFER_ALLGATHERV; volatile unsigned allgatherv_poll_flag = 0; int nalg= 0, total_alg; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t allgatherv; setup_env(); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ &context, /* Context */ NULL, /* Clientname=default */ &num_contexts, /* num_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context, &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Query the world geometry for allgatherv algorithms */ rc |= query_geometry_world(client, context, &world_geometry, allgatherv_xfer, allgatherv_num_algorithm, &allgatherv_always_works_algo, &allgatherv_always_works_md, &allgatherv_must_query_algo, &allgatherv_must_query_md); if (rc == 1) return 1; /* Allocate buffer(s) */ int err = 0; void* buf = NULL; err = posix_memalign(&buf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); buf = (char*)buf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; size_t *lengths = (size_t*)malloc(num_tasks * sizeof(size_t)); size_t *displs = (size_t*)malloc(num_tasks * sizeof(size_t)); barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; blocking_coll(context, &barrier, &bar_poll_flag); { total_alg = allgatherv_num_algorithm[0]+allgatherv_num_algorithm[1]; for (nalg = 0; nalg < total_alg; nalg++) { metadata_result_t result = {0}; unsigned query_protocol; if(nalg < allgatherv_num_algorithm[0]) { query_protocol = 0; next_algo = &allgatherv_always_works_algo[nalg]; next_md = &allgatherv_always_works_md[nalg]; } else { query_protocol = 1; next_algo = &allgatherv_must_query_algo[nalg-allgatherv_num_algorithm[0]]; next_md = &allgatherv_must_query_md[nalg-allgatherv_num_algorithm[0]]; } allgatherv.cb_done = cb_done; allgatherv.cookie = (void*) & allgatherv_poll_flag; allgatherv.algorithm = *next_algo; allgatherv.cmd.xfer_allgatherv.sndbuf = buf; allgatherv.cmd.xfer_allgatherv.rcvbuf = rbuf; allgatherv.cmd.xfer_allgatherv.rtypecounts = lengths; allgatherv.cmd.xfer_allgatherv.rdispls = displs; gProtocolName = next_md->name; if (task_id == 0) { printf("# Allgatherv Bandwidth Test(size:%zu) -- protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, gProtocolName, next_md->range_lo,(ssize_t)next_md->range_hi, next_md->check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(next_md->name, gSelected) == NULL) && gSelector) || ((strstr(next_md->name, gSelected) != NULL) && !gSelector)) continue; unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */ unsigned i, j, k; int dt,op=4/*SUM*/; for (dt = 0; dt < dt_count; dt++) { if ((gFull_test && ((dt != DT_NULL) && (dt != DT_BYTE))) || gValidTable[op][dt]) { if (task_id == task_zero) printf("Running Allgatherv: %s\n", dt_array_str[dt]); for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2) { size_t dataSent = i * get_type_size(dt_array[dt]); int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; for (k = 0; k < num_tasks; k++)lengths[k] = i; for (k = 0; k < num_tasks; k++)displs[k] = k*i; allgatherv.cmd.xfer_allgatherv.stypecount = i; allgatherv.cmd.xfer_allgatherv.stype = dt_array[dt]; allgatherv.cmd.xfer_allgatherv.rtype = dt_array[dt]; gather_initialize_sndbuf_dt (buf, i, task_id, dt); memset(rbuf, 0xFF, i); if(query_protocol) { size_t sz=get_type_size(dt_array[dt])*i; result = check_metadata(*next_md, allgatherv, dt_array[dt], sz, /* metadata uses bytes i, */ allgatherv.cmd.xfer_allgatherv.sndbuf, dt_array[dt], sz, allgatherv.cmd.xfer_allgatherv.rcvbuf); if (next_md->check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) continue; } blocking_coll(context, &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { if (checkrequired) /* must query every time */ { result = next_md->check_fn(&allgatherv); if (result.bitmask) continue; } blocking_coll(context, &allgatherv, &allgatherv_poll_flag); } tf = timer(); blocking_coll(context, &barrier, &bar_poll_flag); int rc_check; rc |= rc_check = gather_check_rcvbuf_dt (num_tasks, rbuf, i, dt); if (rc_check) fprintf(stderr, "%s FAILED validation on %s\n", gProtocolName, dt_array_str[dt]); usec = (tf - ti) / (double)niter; if (task_id == 0) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } } } } rc |= pami_shutdown(&client, &context, &num_contexts); free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(allgatherv_always_works_algo); free(allgatherv_always_works_md); free(allgatherv_must_query_algo); free(allgatherv_must_query_md); return rc; };
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id, local_task_id=0, task_zero=0; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; volatile unsigned newbar_poll_flag = 0; /* Allreduce variables */ size_t allreduce_num_algorithm[2]; pami_algorithm_t *allreduce_always_works_algo = NULL; pami_metadata_t *allreduce_always_works_md = NULL; pami_algorithm_t *allreduce_must_query_algo = NULL; pami_metadata_t *allreduce_must_query_md = NULL; pami_xfer_type_t allreduce_xfer = PAMI_XFER_ALLREDUCE; volatile unsigned allreduce_poll_flag = 0; int nalg = 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t allreduce; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign(&sbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; if (num_tasks == 1) { fprintf(stderr, "No subcomms on 1 node\n"); return 0; } assert(task_id >= 0); assert(task_id < num_tasks); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[0], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Set up world barrier */ barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; unsigned iContext = 0; /* Create the subgeometry */ pami_geometry_range_t *range; int rangecount; pami_geometry_t newgeometry; size_t newbar_num_algo[2]; pami_algorithm_t *newbar_algo = NULL; pami_metadata_t *newbar_md = NULL; pami_algorithm_t *q_newbar_algo = NULL; pami_metadata_t *q_newbar_md = NULL; pami_xfer_t newbarrier; size_t set[2]; int id; range = (pami_geometry_range_t *)malloc(((num_tasks + 1) / 2) * sizeof(pami_geometry_range_t)); int unused_non_task_zero[2]; get_split_method(&num_tasks, task_id, &rangecount, range, &local_task_id, set, &id, &task_zero,unused_non_task_zero); for (; iContext < gNum_contexts; ++iContext) { if (task_id == task_zero) printf("# Context: %u\n", iContext); /* Delay task_zero tasks, and emulate that he's doing "other" message passing. This will cause the geometry_create request from other nodes to be unexpected when doing parentless geometries and won't affect parented. */ if (task_id == task_zero) { delayTest(1); unsigned ii = 0; for (; ii < gNum_contexts; ++ii) PAMI_Context_advance (context[ii], 1000); } rc |= create_and_query_geometry(client, context[0], context[iContext], gParentless ? PAMI_GEOMETRY_NULL : world_geometry, &newgeometry, range, rangecount, id + iContext, /* Unique id for each context */ barrier_xfer, newbar_num_algo, &newbar_algo, &newbar_md, &q_newbar_algo, &q_newbar_md); if (rc == 1) return 1; /* Query the sub geometry for reduce algorithms */ rc |= query_geometry(client, context[iContext], newgeometry, allreduce_xfer, allreduce_num_algorithm, &allreduce_always_works_algo, &allreduce_always_works_md, &allreduce_must_query_algo, &allreduce_must_query_md); if (rc == 1) return 1; /* Set up sub geometry barrier */ newbarrier.cb_done = cb_done; newbarrier.cookie = (void*) & newbar_poll_flag; newbarrier.algorithm = newbar_algo[0]; for (nalg = 0; nalg < allreduce_num_algorithm[1]; nalg++) { metadata_result_t result = {0}; int i, j, k; for (k = 1; k >= 0; k--) { if (set[k]) { if (task_id == task_zero) { printf("# Allreduce Bandwidth Test(size:%zu) -- context = %d, task = %d protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, iContext, task_zero, allreduce_must_query_md[nalg].name, allreduce_must_query_md[nalg].range_lo,(ssize_t)allreduce_must_query_md[nalg].range_hi, allreduce_must_query_md[nalg].check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(allreduce_must_query_md[nalg].name, gSelected) == NULL) && gSelector) || ((strstr(allreduce_must_query_md[nalg].name, gSelected) != NULL) && !gSelector)) continue; gProtocolName = allreduce_must_query_md[nalg].name; unsigned checkrequired = allreduce_must_query_md[nalg].check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || allreduce_must_query_md[nalg].check_fn); /* must have function if checkrequired. */ allreduce.cb_done = cb_done; allreduce.cookie = (void*) & allreduce_poll_flag; allreduce.algorithm = allreduce_must_query_algo[nalg]; allreduce.cmd.xfer_allreduce.sndbuf = sbuf; allreduce.cmd.xfer_allreduce.rcvbuf = rbuf; allreduce.cmd.xfer_allreduce.rtype = PAMI_TYPE_BYTE; allreduce.cmd.xfer_allreduce.rtypecount = 0; int op, dt; for (dt = 0; dt < dt_count; dt++) for (op = 0; op < op_count; op++) { if (gValidTable[op][dt]) { if (task_id == task_zero) printf("Running Allreduce: %s, %s\n", dt_array_str[dt], op_array_str[op]); for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2) { size_t sz = get_type_size(dt_array[dt]); size_t dataSent = i * sz; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; allreduce.cmd.xfer_allreduce.stypecount = i; allreduce.cmd.xfer_allreduce.rtypecount = dataSent; allreduce.cmd.xfer_allreduce.stype = dt_array[dt]; allreduce.cmd.xfer_allreduce.op = op_array[op]; result = check_metadata(allreduce_must_query_md[nalg], allreduce, dt_array[dt], dataSent, /* metadata uses bytes i, */ allreduce.cmd.xfer_allreduce.sndbuf, PAMI_TYPE_BYTE, dataSent, allreduce.cmd.xfer_allreduce.rcvbuf); if (allreduce_must_query_md[nalg].check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) continue; reduce_initialize_sndbuf (sbuf, i, op, dt, local_task_id, num_tasks); blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { if (checkrequired) /* must query every time */ { result = allreduce_must_query_md[nalg].check_fn(&allreduce); if (result.bitmask) continue; } blocking_coll(context[iContext], &allreduce, &allreduce_poll_flag); } tf = timer(); blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); int rc_check; rc |= rc_check = reduce_check_rcvbuf (rbuf, i, op, dt, local_task_id, num_tasks); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (task_id == task_zero) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } } } } } /* We aren't testing world barrier itself, so use context 0.*/ blocking_coll(context[0], &barrier, &bar_poll_flag); free(newbar_algo); free(newbar_md); free(q_newbar_algo); free(q_newbar_md); free(allreduce_always_works_algo); free(allreduce_always_works_md); free(allreduce_must_query_algo); free(allreduce_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Alltoallv variables */ size_t alltoallv_num_algorithm[2]; pami_algorithm_t *alltoallv_always_works_algo = NULL; pami_metadata_t *alltoallv_always_works_md = NULL; pami_algorithm_t *next_algo = NULL; pami_metadata_t *next_md= NULL; pami_algorithm_t *alltoallv_must_query_algo = NULL; pami_metadata_t *alltoallv_must_query_md = NULL; pami_xfer_type_t alltoallv_xfer = PAMI_XFER_ALLTOALLV; volatile unsigned alltoallv_poll_flag = 0; int nalg= 0, total_alg; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t alltoallv; /* Process environment variables and setup globals */ if(argc > 1 && argv[1][0] == '-' && (argv[1][1] == 'h' || argv[1][1] == 'H') ) setup_env_internal(1); else setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign((void*) & sbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign((void*) & rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; sndlens = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(sndlens); sdispls = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(sdispls); rcvlens = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(rcvlens); rdispls = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(rdispls); unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == 0) printf("# Context: %u\n", iContext); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc != PAMI_SUCCESS) return 1; int o; for(o = -1; o <= gOptimize ; o++) /* -1 = default, 0 = de-optimize, 1 = optimize */ { pami_configuration_t configuration[1]; configuration[0].name = PAMI_GEOMETRY_OPTIMIZE; configuration[0].value.intval = o; /* de/optimize */ if(o == -1) ; /* skip update, use defaults */ else rc |= update_geometry(client, context[0], world_geometry, configuration, 1); if (rc != PAMI_SUCCESS) return 1; /* Query the world geometry for alltoallv algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, alltoallv_xfer, alltoallv_num_algorithm, &alltoallv_always_works_algo, &alltoallv_always_works_md, &alltoallv_must_query_algo, &alltoallv_must_query_md); if (rc != PAMI_SUCCESS) return 1; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; total_alg = alltoallv_num_algorithm[0]+alltoallv_num_algorithm[1]; for (nalg = 0; nalg < total_alg; nalg++) { metadata_result_t result = {0}; unsigned query_protocol; if(nalg < alltoallv_num_algorithm[0]) { query_protocol = 0; next_algo = &alltoallv_always_works_algo[nalg]; next_md = &alltoallv_always_works_md[nalg]; } else { query_protocol = 1; next_algo = &alltoallv_must_query_algo[nalg-alltoallv_num_algorithm[0]]; next_md = &alltoallv_must_query_md[nalg-alltoallv_num_algorithm[0]]; } gProtocolName = next_md->name; alltoallv.cb_done = cb_done; alltoallv.cookie = (void*) & alltoallv_poll_flag; alltoallv.algorithm = *next_algo; alltoallv.cmd.xfer_alltoallv.sndbuf = sbuf; alltoallv.cmd.xfer_alltoallv.stype = PAMI_TYPE_BYTE; alltoallv.cmd.xfer_alltoallv.stypecounts = sndlens; alltoallv.cmd.xfer_alltoallv.sdispls = sdispls; alltoallv.cmd.xfer_alltoallv.rcvbuf = rbuf; alltoallv.cmd.xfer_alltoallv.rtype = PAMI_TYPE_BYTE; alltoallv.cmd.xfer_alltoallv.rtypecounts = rcvlens; alltoallv.cmd.xfer_alltoallv.rdispls = rdispls; gProtocolName = next_md->name; if (task_id == 0) { printf("# Alltoallv Bandwidth Test(size:%zu) -- context = %d, optimize = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, iContext, o, gProtocolName, next_md->range_lo,(ssize_t)next_md->range_hi, next_md->check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(next_md->name, gSelected) == NULL) && gSelector) || ((strstr(next_md->name, gSelected) != NULL) && !gSelector)) continue; int i, j; int dt,op=4/*SUM*/; unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */ for (dt = 0; dt < dt_count; dt++) { if ((gFull_test && ((dt != DT_NULL) && (dt != DT_BYTE))) || gValidTable[op][dt]) { if (task_id == 0) printf("Running Alltoallv: %s\n", dt_array_str[dt]); for ( i = gMin_byte_count? MAX(1,gMin_byte_count/get_type_size(dt_array[dt])) : 0; /*clumsy, only want 0 if hardcoded to 0, othersize min 1 */ i <= gMax_byte_count/get_type_size(dt_array[dt]); i = i ? i*2 : 1 /* handle zero min */) { size_t dataSent = i * get_type_size(dt_array[dt]); int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; for (j = 0; j < num_tasks; j++) { sndlens[j] = rcvlens[j] = i; sdispls[j] = rdispls[j] = i * j; alltoallv_initialize_bufs_dt(sbuf, rbuf, sndlens, rcvlens, sdispls, rdispls, j, dt); } alltoallv.cmd.xfer_alltoallv.rtype = dt_array[dt]; alltoallv.cmd.xfer_alltoallv.stype = dt_array[dt]; if(query_protocol) { size_t sz=get_type_size(dt_array[dt])*i; /* Must initialize all of cmd for metadata */ result = check_metadata(*next_md, alltoallv, dt_array[dt], sz, /* metadata uses bytes i, */ alltoallv.cmd.xfer_alltoallv.sndbuf, dt_array[dt], sz, alltoallv.cmd.xfer_alltoallv.rcvbuf); if (next_md->check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) continue; } blocking_coll(context[iContext], &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { if (checkrequired) /* must query every time */ { result = next_md->check_fn(&alltoallv); if (result.bitmask) continue; } blocking_coll(context[iContext], &alltoallv, &alltoallv_poll_flag); } tf = timer(); blocking_coll(context[iContext], &barrier, &bar_poll_flag); int rc_check; rc |= rc_check = alltoallv_check_rcvbuf_dt(rbuf, rcvlens, rdispls, num_tasks, task_id, dt); if (rc_check) fprintf(stderr, "%s FAILED validation on %s\n", gProtocolName, dt_array_str[dt]); usec = (tf - ti) / (double)niter; if (task_id == 0) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } } } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(alltoallv_always_works_algo); free(alltoallv_always_works_md); free(alltoallv_must_query_algo); free(alltoallv_must_query_md); } /* optimize loop */ } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); free(sndlens); free(sdispls); free(rcvlens); free(rdispls); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Bcast variables */ size_t bcast_num_algorithm[2]; pami_algorithm_t *bcast_always_works_algo = NULL; pami_metadata_t *bcast_always_works_md = NULL; pami_algorithm_t *bcast_must_query_algo = NULL; pami_metadata_t *bcast_must_query_md = NULL; pami_xfer_type_t bcast_xfer = PAMI_XFER_BROADCAST; volatile unsigned bcast_poll_flag = 0; int nalg= 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t broadcast; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc != PAMI_SUCCESS) return 1; int o; for(o = -1; o <= gOptimize ; o++) /* -1 = default, 0 = de-optimize, 1 = optimize */ { pami_configuration_t configuration[1]; configuration[0].name = PAMI_GEOMETRY_OPTIMIZE; configuration[0].value.intval = o; /* de/optimize */ if(o == -1) ; /* skip update, use defaults */ else rc |= update_geometry(client, context[0], world_geometry, configuration, 1); if (rc != PAMI_SUCCESS) return 1; if(gNumRoots > num_tasks) gNumRoots = num_tasks; /* Allocate buffer(s) */ int err = 0; void* buf = NULL; err = posix_memalign(&buf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); buf = (char*)buf + gBuffer_offset; unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == 0) printf("# Context: %u\n", iContext); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc != PAMI_SUCCESS) return 1; /* Query the world geometry for broadcast algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, bcast_xfer, bcast_num_algorithm, &bcast_always_works_algo, &bcast_always_works_md, &bcast_must_query_algo, &bcast_must_query_md); if (rc != PAMI_SUCCESS) return 1; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; blocking_coll(context[iContext], &barrier, &bar_poll_flag); for (nalg = 0; nalg < bcast_num_algorithm[1]; nalg++) { broadcast.cb_done = cb_done; broadcast.cookie = (void*) & bcast_poll_flag; broadcast.algorithm = bcast_must_query_algo[nalg]; broadcast.cmd.xfer_broadcast.buf = buf; broadcast.cmd.xfer_broadcast.type = PAMI_TYPE_BYTE; broadcast.cmd.xfer_broadcast.typecount = 0; gProtocolName = bcast_must_query_md[nalg].name; metadata_result_t result = {0}; int k; for (k=0; k< gNumRoots; k++) { pami_endpoint_t root_ep; pami_task_t root_task = (pami_task_t)k; PAMI_Endpoint_create(client, root_task, 0, &root_ep); broadcast.cmd.xfer_broadcast.root = root_ep; if (task_id == root_task) { printf("# Broadcast Bandwidth Test(size:%zu) -- context = %d, optimize = %d, root = %d protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, iContext, o, root_task, gProtocolName, bcast_must_query_md[nalg].range_lo, bcast_must_query_md[nalg].range_hi, bcast_must_query_md[nalg].check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(bcast_must_query_md[nalg].name,gSelected) == NULL) && gSelector) || ((strstr(bcast_must_query_md[nalg].name,gSelected) != NULL) && !gSelector)) continue; unsigned checkrequired = bcast_must_query_md[nalg].check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || bcast_must_query_md[nalg].check_fn); /* must have function if checkrequired. */ int i, j; for (i = gMin_byte_count; i <= gMax_byte_count; i *= 2) { size_t dataSent = i; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; broadcast.cmd.xfer_broadcast.typecount = i; result = check_metadata(bcast_must_query_md[nalg], broadcast, PAMI_TYPE_BYTE, dataSent, /* metadata uses bytes i, */ broadcast.cmd.xfer_broadcast.buf, PAMI_TYPE_BYTE, dataSent, broadcast.cmd.xfer_broadcast.buf); if (bcast_must_query_md[nalg].check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) continue; if (task_id == root_task) bcast_initialize_sndbuf (buf, i, root_task); else memset(buf, 0xFF, i); blocking_coll(context[iContext], &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { if (checkrequired) /* must query every time */ { result = bcast_must_query_md[nalg].check_fn(&broadcast); if (result.bitmask) continue; } blocking_coll (context[iContext], &broadcast, &bcast_poll_flag); } blocking_coll(context[iContext], &barrier, &bar_poll_flag); tf = timer(); int rc_check; rc |= rc_check = bcast_check_rcvbuf (buf, i, root_task); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (task_id == root_task) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(bcast_always_works_algo); free(bcast_always_works_md); free(bcast_must_query_algo); free(bcast_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ buf = (char*)buf - gBuffer_offset; free(buf); } /* optimize loop */ rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id,task_zero=0; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag=0; /* Scan variables */ pami_algorithm_t *next_algo = NULL; pami_metadata_t *next_md= NULL; size_t scan_num_algorithm[2]; pami_algorithm_t *scan_always_works_algo = NULL; pami_metadata_t *scan_always_works_md = NULL; pami_algorithm_t *scan_must_query_algo = NULL; pami_metadata_t *scan_must_query_md = NULL; pami_xfer_type_t scan_xfer = PAMI_XFER_SCAN; volatile unsigned scan_poll_flag=0; int i, j, nalg = 0, total_alg; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t scan; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign(&sbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc==1) return 1; /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[0], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc==1) return 1; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == 0) printf("# Context: %u\n", iContext); /* Query the world geometry for scan algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, scan_xfer, scan_num_algorithm, &scan_always_works_algo, &scan_always_works_md, &scan_must_query_algo, &scan_must_query_md); if (rc==1) return 1; total_alg = scan_num_algorithm[0]+scan_num_algorithm[1]; for (nalg = 0; nalg < total_alg; nalg++) { metadata_result_t result = {0}; unsigned query_protocol; if(nalg < scan_num_algorithm[0]) { query_protocol = 0; next_algo = &scan_always_works_algo[nalg]; next_md = &scan_always_works_md[nalg]; } else { query_protocol = 1; next_algo = &scan_must_query_algo[nalg-scan_num_algorithm[0]]; next_md = &scan_must_query_md[nalg-scan_num_algorithm[0]]; } if (task_id == task_zero) /* root not set yet */ { printf("# Scan Bandwidth Test(size:%zu) -- context = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, iContext, next_md->name, next_md->range_lo,(ssize_t)next_md->range_hi, next_md->check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(next_md->name, gSelected) == NULL) && gSelector) || ((strstr(next_md->name, gSelected) != NULL) && !gSelector)) continue; gProtocolName = next_md->name; unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */ scan.cb_done = cb_done; scan.cookie = (void*)&scan_poll_flag; scan.algorithm = *next_algo; scan.cmd.xfer_scan.rtype = PAMI_TYPE_BYTE; scan.cmd.xfer_scan.rtypecount= 0; scan.cmd.xfer_scan.exclusive = 0; int op, dt; for (dt=0; dt<dt_count; dt++) for (op=0; op<op_count; op++) { if (gValidTable[op][dt]) { if (task_id == task_zero) printf("Running Scan: %s, %s\n",dt_array_str[dt], op_array_str[op]); for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2) { size_t sz=get_type_size(dt_array[dt]); size_t dataSent = i * sz; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; scan.cmd.xfer_scan.stypecount = i; scan.cmd.xfer_scan.rtypecount = i; scan.cmd.xfer_scan.stype = dt_array[dt]; scan.cmd.xfer_scan.rtype = dt_array[dt]; scan.cmd.xfer_scan.op = op_array[op]; if(query_protocol) { size_t sz=get_type_size(dt_array[dt])*i; result = check_metadata(*next_md, scan, dt_array[dt], sz, /* metadata uses bytes i, */ sbuf, dt_array[dt], sz, rbuf); if (next_md->check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) continue; } /* Do one 'in-place' collective and validate it */ { scan_initialize_sndbuf (sbuf, i, op, dt, task_id); scan.cmd.xfer_scan.sndbuf = sbuf; scan.cmd.xfer_scan.rcvbuf = sbuf; if (checkrequired) /* must query every time */ { result = next_md->check_fn(&scan); if (result.bitmask) continue; } blocking_coll(context[iContext], &scan, &scan_poll_flag); int rc_check; rc |= rc_check = scan_check_rcvbuf (sbuf, i, op, dt, num_tasks, task_id, scan.cmd.xfer_scan.exclusive); if (rc_check) fprintf(stderr, "%s FAILED IN PLACE validation on %s/%s\n", gProtocolName, dt_array_str[dt], op_array_str[op]); } /* Iterate (and time) with separate buffers, not in-place */ scan.cmd.xfer_scan.sndbuf = sbuf; scan.cmd.xfer_scan.rcvbuf = rbuf; scan_initialize_sndbuf (sbuf, i, op, dt, task_id); memset(rbuf, 0xFF, dataSent); /* We aren't testing barrier itself, so use context 0. */ blocking_coll(context[0], &barrier, &bar_poll_flag); ti = timer(); for (j=0; j<niter; j++) { if (checkrequired) /* must query every time */ { result = next_md->check_fn(&scan); if (result.bitmask) continue; } blocking_coll(context[iContext], &scan, &scan_poll_flag); } tf = timer(); /* We aren't testing barrier itself, so use context 0. */ blocking_coll(context[0], &barrier, &bar_poll_flag); int rc_check; rc |= rc_check = scan_check_rcvbuf (rbuf, i, op, dt, num_tasks, task_id, scan.cmd.xfer_scan.exclusive); if (rc_check) fprintf(stderr, "%s FAILED validation on %s/%s\n", gProtocolName, dt_array_str[dt], op_array_str[op]); usec = (tf - ti)/(double)niter; if (task_id == task_zero) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent/(double)usec, usec); fflush(stdout); } } } } } free(scan_always_works_algo); free(scan_always_works_md); free(scan_must_query_algo); free(scan_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id, root_zero=0; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Gather variables */ size_t gather_num_algorithm[2]; pami_algorithm_t *next_algo = NULL; pami_metadata_t *next_md= NULL; pami_algorithm_t *gather_always_works_algo = NULL; pami_metadata_t *gather_always_works_md = NULL; pami_algorithm_t *gather_must_query_algo = NULL; pami_metadata_t *gather_must_query_md = NULL; pami_xfer_type_t gather_xfer = PAMI_XFER_GATHER; volatile unsigned gather_poll_flag = 0; int nalg= 0, total_alg; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t gather; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; /* Allocate buffer(s) */ int err = 0; void* buf = NULL; err = posix_memalign(&buf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); buf = (char*)buf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == root_zero) printf("# Context: %u\n", iContext); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context, &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Query the world geometry for gather algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, gather_xfer, gather_num_algorithm, &gather_always_works_algo, &gather_always_works_md, &gather_must_query_algo, &gather_must_query_md); if (rc == 1) return 1; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; blocking_coll(context[iContext], &barrier, &bar_poll_flag); total_alg = gather_num_algorithm[0]+gather_num_algorithm[1]; for (nalg = 0; nalg < total_alg; nalg++) { metadata_result_t result = {0}; unsigned query_protocol; if(nalg < gather_num_algorithm[0]) { query_protocol = 0; next_algo = &gather_always_works_algo[nalg]; next_md = &gather_always_works_md[nalg]; } else { query_protocol = 1; next_algo = &gather_must_query_algo[nalg-gather_num_algorithm[0]]; next_md = &gather_must_query_md[nalg-gather_num_algorithm[0]]; } root_zero = 0; gather.cb_done = cb_done; gather.cookie = (void*) & gather_poll_flag; gather.algorithm = *next_algo; gather.cmd.xfer_gather.stype = PAMI_TYPE_BYTE; gather.cmd.xfer_gather.stypecount = 0; gather.cmd.xfer_gather.rtype = PAMI_TYPE_BYTE; gather.cmd.xfer_gather.rtypecount = 0; gProtocolName = next_md->name; if (task_id == root_zero) { printf("# Gather Bandwidth Test(size:%zu) -- context = %d, protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, iContext, gProtocolName, next_md->range_lo,(ssize_t)next_md->range_hi, next_md->check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(next_md->name, gSelected) == NULL) && gSelector) || ((strstr(next_md->name, gSelected) != NULL) && !gSelector)) continue; int i, j; unsigned checkrequired = next_md->check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || next_md->check_fn); /* must have function if checkrequired. */ int dt,op=4/*SUM*/; for (dt = 0; dt < dt_count; dt++) { if ((gFull_test && ((dt != DT_NULL) && (dt != DT_BYTE))) || gValidTable[op][dt]) { if (task_id == 0) printf("Running gather: %s\n", dt_array_str[dt]); for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2) { size_t dataSent = i * get_type_size(dt_array[dt]); int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; gather.cmd.xfer_gather.stypecount = i; gather.cmd.xfer_gather.stype = dt_array[dt]; gather.cmd.xfer_gather.rtypecount = i; gather.cmd.xfer_gather.rtype = dt_array[dt]; if(query_protocol) { size_t sz=get_type_size(dt_array[dt])*i; result = check_metadata(*next_md, gather, dt_array[dt], sz, /* metadata uses bytes i, */ buf, dt_array[dt], sz, rbuf); if (next_md->check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) continue; } /* Do one 'in-place' collective and validate it */ { root_zero = (root_zero + num_tasks - 1) % num_tasks; pami_endpoint_t root_ep; PAMI_Endpoint_create(client, root_zero, 0, &root_ep); gather.cmd.xfer_gather.root = root_ep; memset(rbuf, 0xFF, i*num_tasks); if (task_id == root_zero) { gather.cmd.xfer_gather.sndbuf = (char*)rbuf + dataSent*task_id; gather.cmd.xfer_gather.rcvbuf = rbuf; } else { gather.cmd.xfer_gather.sndbuf = buf; gather.cmd.xfer_gather.rcvbuf = NULL; } gather_initialize_sndbuf_dt (gather.cmd.xfer_gather.sndbuf, i, task_id, dt); if (checkrequired) /* must query every time */ { result = next_md->check_fn(&gather); if (result.bitmask) continue; } blocking_coll(context[iContext], &gather, &gather_poll_flag); if (task_id == root_zero) { int rc_check; rc |= rc_check = gather_check_rcvbuf_dt(task_id, rbuf, i, dt); if (rc_check) fprintf(stderr, "%s FAILED IN PLACE validation on %s\n", gProtocolName, dt_array_str[dt]); } } /* Iterate (and time) with separate buffers, not in-place */ gather.cmd.xfer_gather.rcvbuf = rbuf; gather.cmd.xfer_gather.sndbuf = buf; blocking_coll(context[iContext], &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { root_zero = (root_zero + num_tasks - 1) % num_tasks; pami_endpoint_t root_ep; PAMI_Endpoint_create(client, root_zero, 0, &root_ep); gather.cmd.xfer_gather.root = root_ep; gather_initialize_sndbuf_dt (buf, i, task_id, dt); if (task_id == root_zero) memset(rbuf, 0xFF, i*num_tasks); if (checkrequired) /* must query every time */ { result = next_md->check_fn(&gather); if (result.bitmask) continue; } blocking_coll(context[iContext], &gather, &gather_poll_flag); if (task_id == root_zero) { int rc_check; rc |= rc_check = gather_check_rcvbuf_dt(task_id, rbuf, i, dt); if (rc_check) fprintf(stderr, "%s FAILED validation on %s\n", gProtocolName, dt_array_str[dt]); } } tf = timer(); blocking_coll(context[iContext], &barrier, &bar_poll_flag); usec = (tf - ti) / (double)niter; if (task_id == root_zero) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } } } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(gather_always_works_algo); free(gather_always_works_md); free(gather_must_query_algo); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ buf = (char*)buf - gBuffer_offset; free(buf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }