bool window::update_geometry( rect &r ) { SetWindowPos( _hwnd, NULL, r.x(), r.y(), r.width(), r.height(), SWP_NOOWNERZORDER | SWP_NOZORDER ); r = query_geometry(); return true; }
static void * allgather_test(void* p) { thread_data_t *td = (thread_data_t*)p; pami_context_t myContext = (pami_context_t)td->context; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Allgather variables */ size_t allgather_num_algorithm[2]; pami_algorithm_t *allgather_always_works_algo = NULL; pami_metadata_t *allgather_always_works_md = NULL; pami_algorithm_t *allgather_must_query_algo = NULL; pami_metadata_t *allgather_must_query_md = NULL; pami_xfer_type_t allgather_xfer = PAMI_XFER_ALLGATHER; volatile unsigned allgather_poll_flag = 0; int nalg= 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t allgather; int rc=0; /* Allocate buffer(s) */ int err = 0; void* buf = NULL; err = posix_memalign(&buf, 128, (gMax_byte_count) + gBuffer_offset); assert(err == 0); buf = (char*)buf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, (gMax_byte_count * num_ep) + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; /* Query the world geometry for barrier algorithms */ rc |= query_geometry(client, myContext, newgeometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); /* Query the world geometry for allgather algorithms */ rc |= query_geometry(client, myContext, newgeometry, allgather_xfer, allgather_num_algorithm, &allgather_always_works_algo, &allgather_always_works_md, &allgather_must_query_algo, &allgather_must_query_md); barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; blocking_coll(myContext, &barrier, &bar_poll_flag); pami_endpoint_t my_ep, zero_ep; PAMI_Endpoint_create(client,task_id,td->tid,&my_ep); PAMI_Endpoint_create(client,0,0,&zero_ep); for (nalg = 0; nalg < allgather_num_algorithm[0]; nalg++) { allgather.cb_done = cb_done; allgather.cookie = (void*) & allgather_poll_flag; allgather.algorithm = allgather_always_works_algo[nalg]; allgather.cmd.xfer_allgather.sndbuf = buf; allgather.cmd.xfer_allgather.stype = PAMI_TYPE_BYTE; allgather.cmd.xfer_allgather.stypecount = 0; allgather.cmd.xfer_allgather.rcvbuf = rbuf; allgather.cmd.xfer_allgather.rtype = PAMI_TYPE_BYTE; allgather.cmd.xfer_allgather.rtypecount = 0; gProtocolName = allgather_always_works_md[nalg].name; if (my_ep == zero_ep) { printf("# Allgather Bandwidth Test(size:%zu) -- context = %d, protocol: %s\n",num_tasks, td->tid, gProtocolName); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(allgather_always_works_md[nalg].name,gSelected) == NULL) && gSelector) || ((strstr(allgather_always_works_md[nalg].name,gSelected) != NULL) && !gSelector)) continue; int i, j; for (i = gMin_byte_count; i <= gMax_byte_count; i *= 2) { size_t dataSent = i; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; allgather.cmd.xfer_allgather.stypecount = i; allgather.cmd.xfer_allgather.rtypecount = i; allgather_initialize_sndbuf (buf, i, td->logical_rank); memset(rbuf, 0xFF, i); blocking_coll(myContext, &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { blocking_coll (myContext, &allgather, &allgather_poll_flag); } tf = timer(); blocking_coll(myContext, &barrier, &bar_poll_flag); int rc_check; rc |= rc_check = allgather_check_rcvbuf (rbuf, i, num_ep); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (my_ep == zero_ep) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(allgather_always_works_algo); free(allgather_always_works_md); free(allgather_must_query_algo); free(allgather_must_query_md); buf = (char*)buf - gBuffer_offset; free(buf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); pthread_exit(NULL); }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id, local_task_id=0, task_zero = 0; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; volatile unsigned newbar_poll_flag = 0; /* Alltoallv variables */ size_t alltoallv_num_algorithm[2]; pami_algorithm_t *alltoallv_always_works_algo = NULL; pami_metadata_t *alltoallv_always_works_md = NULL; pami_algorithm_t *alltoallv_must_query_algo = NULL; pami_metadata_t *alltoallv_must_query_md = NULL; pami_xfer_type_t alltoallv_xfer = PAMI_XFER_ALLTOALLV; volatile unsigned alltoallv_poll_flag = 0; int nalg = 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t alltoallv; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; if (num_tasks == 1) { fprintf(stderr, "No subcomms on 1 node\n"); return 0; } assert(task_id >= 0); assert(task_id < num_tasks); /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign((void*) & sbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign((void*) & rbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; sndlens = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(sndlens); sdispls = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(sdispls); rcvlens = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(rcvlens); rdispls = (size_t*) malloc(num_tasks * sizeof(size_t)); assert(rdispls); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[0], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Set up world barrier */ barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; /* Create the subgeometry */ pami_geometry_range_t *range; int rangecount; pami_geometry_t newgeometry; size_t newbar_num_algo[2]; pami_algorithm_t *newbar_algo = NULL; pami_metadata_t *newbar_md = NULL; pami_algorithm_t *q_newbar_algo = NULL; pami_metadata_t *q_newbar_md = NULL; pami_xfer_t newbarrier; size_t set[2]; int id; range = (pami_geometry_range_t *)malloc(((num_tasks + 1) / 2) * sizeof(pami_geometry_range_t)); int unused_non_task_zero[2]; get_split_method(&num_tasks, task_id, &rangecount, range, &local_task_id, set, &id, &task_zero,unused_non_task_zero); unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (task_id == task_zero) printf("# Context: %u\n", iContext); /* Delay task_zero tasks, and emulate that he's doing "other" message passing. This will cause the geometry_create request from other nodes to be unexpected when doing parentless geometries and won't affect parented. */ if (task_id == task_zero) { delayTest(1); unsigned ii = 0; for (; ii < gNum_contexts; ++ii) PAMI_Context_advance (context[ii], 1000); } rc |= create_and_query_geometry(client, context[0], context[iContext], gParentless ? PAMI_GEOMETRY_NULL : world_geometry, &newgeometry, range, rangecount, id + iContext, /* Unique id for each context */ barrier_xfer, newbar_num_algo, &newbar_algo, &newbar_md, &q_newbar_algo, &q_newbar_md); if (rc == 1) return 1; /* Query the sub geometry for alltoallv algorithms */ rc |= query_geometry(client, context[iContext], newgeometry, alltoallv_xfer, alltoallv_num_algorithm, &alltoallv_always_works_algo, &alltoallv_always_works_md, &alltoallv_must_query_algo, &alltoallv_must_query_md); if (rc == 1) return 1; /* Set up sub geometry barrier */ newbarrier.cb_done = cb_done; newbarrier.cookie = (void*) & newbar_poll_flag; newbarrier.algorithm = newbar_algo[0]; for (nalg = 0; nalg < alltoallv_num_algorithm[0]; nalg++) { alltoallv.cb_done = cb_done; alltoallv.cookie = (void*) & alltoallv_poll_flag; alltoallv.algorithm = alltoallv_always_works_algo[nalg]; alltoallv.cmd.xfer_alltoallv.sndbuf = sbuf; alltoallv.cmd.xfer_alltoallv.stype = PAMI_TYPE_BYTE; alltoallv.cmd.xfer_alltoallv.stypecounts = sndlens; alltoallv.cmd.xfer_alltoallv.sdispls = sdispls; alltoallv.cmd.xfer_alltoallv.rcvbuf = rbuf; alltoallv.cmd.xfer_alltoallv.rtype = PAMI_TYPE_BYTE; alltoallv.cmd.xfer_alltoallv.rtypecounts = rcvlens; alltoallv.cmd.xfer_alltoallv.rdispls = rdispls; int k; gProtocolName = alltoallv_always_works_md[nalg].name; for (k = 1; k >= 0; k--) { if (set[k]) { if (task_id == task_zero) { printf("# Alltoallv Bandwidth Test(size:%zu) -- context = %d, task_zero = %d, protocol: %s\n", num_tasks, iContext, task_zero, gProtocolName); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(alltoallv_always_works_md[nalg].name, gSelected) == NULL) && gSelector) || ((strstr(alltoallv_always_works_md[nalg].name, gSelected) != NULL) && !gSelector)) continue; blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); int i, j; for (i = gMin_byte_count; i <= gMax_byte_count; i *= 2) { size_t dataSent = i; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; for (j = 0; j < num_tasks; j++) { sndlens[j] = rcvlens[j] = i; sdispls[j] = rdispls[j] = i * j; alltoallv_initialize_bufs(sbuf, rbuf, sndlens, rcvlens, sdispls, rdispls, j); } blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); /* Warmup */ blocking_coll(context[iContext], &alltoallv, &alltoallv_poll_flag); blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { blocking_coll(context[iContext], &alltoallv, &alltoallv_poll_flag); } tf = timer(); blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); int rc_check; rc |= rc_check = alltoallv_check_rcvbuf(rbuf, rcvlens, rdispls, num_tasks, local_task_id); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (task_id == task_zero) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); fflush(stderr); } } /* We aren't testing world barrier itself, so use context 0.*/ blocking_coll(context[0], &barrier, &bar_poll_flag); free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(alltoallv_always_works_algo); free(alltoallv_always_works_md); free(alltoallv_must_query_algo); free(alltoallv_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); free(sndlens); free(sdispls); free(rcvlens); free(rdispls); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_task_t task_id, local_task_id=0, task_zero=0; size_t num_tasks; pami_geometry_t world_geometry; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; volatile unsigned newbar_poll_flag = 0; /* Allreduce variables */ size_t allreduce_num_algorithm[2]; pami_algorithm_t *allreduce_always_works_algo = NULL; pami_metadata_t *allreduce_always_works_md = NULL; pami_algorithm_t *allreduce_must_query_algo = NULL; pami_metadata_t *allreduce_must_query_md = NULL; pami_xfer_type_t allreduce_xfer = PAMI_XFER_ALLREDUCE; volatile unsigned allreduce_poll_flag = 0; int nalg = 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t allreduce; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign(&sbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; if (num_tasks == 1) { fprintf(stderr, "No subcomms on 1 node\n"); return 0; } assert(task_id >= 0); assert(task_id < num_tasks); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[0], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Set up world barrier */ barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; unsigned iContext = 0; /* Create the subgeometry */ pami_geometry_range_t *range; int rangecount; pami_geometry_t newgeometry; size_t newbar_num_algo[2]; pami_algorithm_t *newbar_algo = NULL; pami_metadata_t *newbar_md = NULL; pami_algorithm_t *q_newbar_algo = NULL; pami_metadata_t *q_newbar_md = NULL; pami_xfer_t newbarrier; size_t set[2]; int id; range = (pami_geometry_range_t *)malloc(((num_tasks + 1) / 2) * sizeof(pami_geometry_range_t)); int unused_non_task_zero[2]; get_split_method(&num_tasks, task_id, &rangecount, range, &local_task_id, set, &id, &task_zero,unused_non_task_zero); for (; iContext < gNum_contexts; ++iContext) { if (task_id == task_zero) printf("# Context: %u\n", iContext); /* Delay task_zero tasks, and emulate that he's doing "other" message passing. This will cause the geometry_create request from other nodes to be unexpected when doing parentless geometries and won't affect parented. */ if (task_id == task_zero) { delayTest(1); unsigned ii = 0; for (; ii < gNum_contexts; ++ii) PAMI_Context_advance (context[ii], 1000); } rc |= create_and_query_geometry(client, context[0], context[iContext], gParentless ? PAMI_GEOMETRY_NULL : world_geometry, &newgeometry, range, rangecount, id + iContext, /* Unique id for each context */ barrier_xfer, newbar_num_algo, &newbar_algo, &newbar_md, &q_newbar_algo, &q_newbar_md); if (rc == 1) return 1; /* Query the sub geometry for reduce algorithms */ rc |= query_geometry(client, context[iContext], newgeometry, allreduce_xfer, allreduce_num_algorithm, &allreduce_always_works_algo, &allreduce_always_works_md, &allreduce_must_query_algo, &allreduce_must_query_md); if (rc == 1) return 1; /* Set up sub geometry barrier */ newbarrier.cb_done = cb_done; newbarrier.cookie = (void*) & newbar_poll_flag; newbarrier.algorithm = newbar_algo[0]; for (nalg = 0; nalg < allreduce_num_algorithm[1]; nalg++) { metadata_result_t result = {0}; int i, j, k; for (k = 1; k >= 0; k--) { if (set[k]) { if (task_id == task_zero) { printf("# Allreduce Bandwidth Test(size:%zu) -- context = %d, task = %d protocol: %s, Metadata: range %zu <-> %zd, mask %#X\n",num_tasks, iContext, task_zero, allreduce_must_query_md[nalg].name, allreduce_must_query_md[nalg].range_lo,(ssize_t)allreduce_must_query_md[nalg].range_hi, allreduce_must_query_md[nalg].check_correct.bitmask_correct); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(allreduce_must_query_md[nalg].name, gSelected) == NULL) && gSelector) || ((strstr(allreduce_must_query_md[nalg].name, gSelected) != NULL) && !gSelector)) continue; gProtocolName = allreduce_must_query_md[nalg].name; unsigned checkrequired = allreduce_must_query_md[nalg].check_correct.values.checkrequired; /*must query every time */ assert(!checkrequired || allreduce_must_query_md[nalg].check_fn); /* must have function if checkrequired. */ allreduce.cb_done = cb_done; allreduce.cookie = (void*) & allreduce_poll_flag; allreduce.algorithm = allreduce_must_query_algo[nalg]; allreduce.cmd.xfer_allreduce.sndbuf = sbuf; allreduce.cmd.xfer_allreduce.rcvbuf = rbuf; allreduce.cmd.xfer_allreduce.rtype = PAMI_TYPE_BYTE; allreduce.cmd.xfer_allreduce.rtypecount = 0; int op, dt; for (dt = 0; dt < dt_count; dt++) for (op = 0; op < op_count; op++) { if (gValidTable[op][dt]) { if (task_id == task_zero) printf("Running Allreduce: %s, %s\n", dt_array_str[dt], op_array_str[op]); for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2) { size_t sz = get_type_size(dt_array[dt]); size_t dataSent = i * sz; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; allreduce.cmd.xfer_allreduce.stypecount = i; allreduce.cmd.xfer_allreduce.rtypecount = dataSent; allreduce.cmd.xfer_allreduce.stype = dt_array[dt]; allreduce.cmd.xfer_allreduce.op = op_array[op]; result = check_metadata(allreduce_must_query_md[nalg], allreduce, dt_array[dt], dataSent, /* metadata uses bytes i, */ allreduce.cmd.xfer_allreduce.sndbuf, PAMI_TYPE_BYTE, dataSent, allreduce.cmd.xfer_allreduce.rcvbuf); if (allreduce_must_query_md[nalg].check_correct.values.nonlocal) { /* \note We currently ignore check_correct.values.nonlocal because these tests should not have nonlocal differences (so far). */ result.check.nonlocal = 0; } if (result.bitmask) continue; reduce_initialize_sndbuf (sbuf, i, op, dt, local_task_id, num_tasks); blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { if (checkrequired) /* must query every time */ { result = allreduce_must_query_md[nalg].check_fn(&allreduce); if (result.bitmask) continue; } blocking_coll(context[iContext], &allreduce, &allreduce_poll_flag); } tf = timer(); blocking_coll(context[iContext], &newbarrier, &newbar_poll_flag); int rc_check; rc |= rc_check = reduce_check_rcvbuf (rbuf, i, op, dt, local_task_id, num_tasks); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (task_id == task_zero) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } } } } } /* We aren't testing world barrier itself, so use context 0.*/ blocking_coll(context[0], &barrier, &bar_poll_flag); free(newbar_algo); free(newbar_md); free(q_newbar_algo); free(q_newbar_md); free(allreduce_always_works_algo); free(allreduce_always_works_md); free(allreduce_must_query_algo); free(allreduce_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
static void * allreduce_test(void* p) { thread_data_t *td = (thread_data_t*)p; pami_context_t myContext = (pami_context_t)td->context; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Allreduce variables */ size_t allreduce_num_algorithm[2]; pami_algorithm_t *allreduce_always_works_algo = NULL; pami_metadata_t *allreduce_always_works_md = NULL; pami_algorithm_t *allreduce_must_query_algo = NULL; pami_metadata_t *allreduce_must_query_md = NULL; pami_xfer_type_t allreduce_xfer = PAMI_XFER_ALLREDUCE; volatile unsigned allreduce_poll_flag = 0; int nalg= 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t allreduce; int rc = 0; /* Allocate buffer(s) */ int err = 0; void* sbuf = NULL; err = posix_memalign(&sbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; /* Query the world geometry for barrier algorithms */ rc |= query_geometry(client, myContext, newgeometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); /* Query the world geometry for allreduce algorithms */ rc |= query_geometry(client, myContext, newgeometry, allreduce_xfer, allreduce_num_algorithm, &allreduce_always_works_algo, &allreduce_always_works_md, &allreduce_must_query_algo, &allreduce_must_query_md); barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; blocking_coll(myContext, &barrier, &bar_poll_flag); pami_endpoint_t my_ep, zero_ep; PAMI_Endpoint_create(client,task_id,td->tid,&my_ep); PAMI_Endpoint_create(client,0,0,&zero_ep); for (nalg = 0; nalg < allreduce_num_algorithm[0]; nalg++) { if (my_ep == zero_ep) { printf("# Allreduce Bandwidth Test(size:%zu) -- context = %d, protocol: %s\n",num_tasks, td->tid, allreduce_always_works_md[nalg].name); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(allreduce_always_works_md[nalg].name, gSelected) == NULL) && gSelector) || ((strstr(allreduce_always_works_md[nalg].name, gSelected) != NULL) && !gSelector)) continue; gProtocolName = allreduce_always_works_md[nalg].name; allreduce.cb_done = cb_done; allreduce.cookie = (void*) & allreduce_poll_flag; allreduce.algorithm = allreduce_always_works_algo[nalg]; allreduce.cmd.xfer_allreduce.sndbuf = sbuf; allreduce.cmd.xfer_allreduce.rcvbuf = rbuf; allreduce.cmd.xfer_allreduce.rtype = PAMI_TYPE_BYTE; allreduce.cmd.xfer_allreduce.rtypecount = 0; int op, dt,i,j; for (dt = 0; dt < dt_count; dt++) { for (op = 0; op < op_count; op++) { if (gValidTable[op][dt]) { if (my_ep == zero_ep) printf("Running Allreduce: %s, %s\n", dt_array_str[dt], op_array_str[op]); for (i = MAX(1,gMin_byte_count/get_type_size(dt_array[dt])); i <= gMax_byte_count/get_type_size(dt_array[dt]); i *= 2) { size_t sz=get_type_size(dt_array[dt]); size_t dataSent = i * sz; int niter; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; allreduce.cmd.xfer_allreduce.stypecount = i; allreduce.cmd.xfer_allreduce.rtypecount = dataSent; allreduce.cmd.xfer_allreduce.stype = dt_array[dt]; allreduce.cmd.xfer_allreduce.op = op_array[op]; reduce_initialize_sndbuf (sbuf, i, op, dt, td->logical_rank, num_ep); blocking_coll(myContext, &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { blocking_coll(myContext, &allreduce, &allreduce_poll_flag); } tf = timer(); /* We aren't testing barrier itself, so use context 0. */ blocking_coll(myContext, &barrier, &bar_poll_flag); int rc_check; rc |= rc_check = reduce_check_rcvbuf (rbuf, i, op, dt, td->logical_rank, num_ep); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); usec = (tf - ti) / (double)niter; if (my_ep == zero_ep) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } } } } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(allreduce_always_works_algo); free(allreduce_always_works_md); free(allreduce_must_query_algo); free(allreduce_must_query_md); sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); rc = PAMI_Fence_all (myContext, fence_cb_done, &fence_arrivals); while (fence_arrivals != 0) rc = PAMI_Context_advance (myContext, 1); pthread_exit(NULL); }
static void * gatherv_test(void* p) { thread_data_t *td = (thread_data_t*)p; pami_context_t myContext = (pami_context_t)td->context; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; volatile unsigned bar_poll_flag = 0; /* Gatherv variables */ size_t gatherv_num_algorithm[2]; pami_algorithm_t *gatherv_always_works_algo = NULL; pami_metadata_t *gatherv_always_works_md = NULL; pami_algorithm_t *gatherv_must_query_algo = NULL; pami_metadata_t *gatherv_must_query_md = NULL; pami_xfer_type_t gatherv_xfer = PAMI_XFER_GATHERV; volatile unsigned gatherv_poll_flag = 0; int nalg= 0; double ti, tf, usec; pami_xfer_t barrier; pami_xfer_t gatherv; int rc = 0; if(gNumRoots == -1) gNumRoots = num_ep; /* Allocate buffer(s) */ int err = 0; void* buf = NULL; err = posix_memalign(&buf, 128, (gMax_byte_count * num_ep) + gBuffer_offset); assert(err == 0); buf = (char*)buf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, (gMax_byte_count * num_ep) + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; size_t *lengths = (size_t*)malloc(num_ep * sizeof(size_t)); assert(lengths); size_t *displs = (size_t*)malloc(num_ep * sizeof(size_t)); assert(displs); /* Query the world geometry for barrier algorithms */ rc |= query_geometry(client, myContext, newgeometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); /* Query the world geometry for gatherv algorithms */ rc |= query_geometry(client, myContext, newgeometry, gatherv_xfer, gatherv_num_algorithm, &gatherv_always_works_algo, &gatherv_always_works_md, &gatherv_must_query_algo, &gatherv_must_query_md); barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; blocking_coll(myContext, &barrier, &bar_poll_flag); pami_endpoint_t my_ep, zero_ep; PAMI_Endpoint_create(client,task_id,td->tid,&my_ep); PAMI_Endpoint_create(client,0,0,&zero_ep); for (nalg = 0; nalg < gatherv_num_algorithm[0]; nalg++) { gatherv.cb_done = cb_done; gatherv.cookie = (void*) & gatherv_poll_flag; gatherv.algorithm = gatherv_always_works_algo[nalg]; gatherv.cmd.xfer_gatherv.sndbuf = buf; gatherv.cmd.xfer_gatherv.stype = PAMI_TYPE_BYTE; gatherv.cmd.xfer_gatherv.stypecount = 0; gatherv.cmd.xfer_gatherv.rcvbuf = rbuf; gatherv.cmd.xfer_gatherv.rtype = PAMI_TYPE_BYTE; gatherv.cmd.xfer_gatherv.rtypecounts = lengths; gatherv.cmd.xfer_gatherv.rdispls = displs; gProtocolName = gatherv_always_works_md[nalg].name; if (my_ep == zero_ep) { printf("# Gatherv Bandwidth Test(size:%zu) -- context = %d, protocol: %s\n",num_tasks, td->tid, gProtocolName); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); } if (((strstr(gatherv_always_works_md[nalg].name,gSelected) == NULL) && gSelector) || ((strstr(gatherv_always_works_md[nalg].name,gSelected) != NULL) && !gSelector)) continue; size_t i, j; for (i = gMin_byte_count; i <= gMax_byte_count; i *= 2) { size_t dataSent = i; int niter; size_t k = 0; for (k = 0; k < num_ep; k++) { lengths[k] = i; displs[k] = k * i; } lengths[k-1] = 0; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; blocking_coll(myContext, &barrier, &bar_poll_flag); ti = timer(); int ctxt_id = 0; pami_task_t root_task = 0; for (j = 0; j < niter; j++) { pami_endpoint_t root_ep; PAMI_Endpoint_create(client, root_task, ctxt_id, &root_ep); gatherv.cmd.xfer_gatherv.root = root_ep; gather_initialize_sndbuf(td->logical_rank, buf, i); if (root_ep == zero_ep) memset(rbuf, 0xFF, i*num_ep); if (td->logical_rank != num_ep - 1) gatherv.cmd.xfer_gatherv.stypecount = i; blocking_coll(myContext, &gatherv, &gatherv_poll_flag); if (my_ep == zero_ep) { int rc_check; rc |= rc_check = gather_check_rcvbuf(num_tasks-1, rbuf, i); if (rc_check) fprintf(stderr, "%s FAILED validation\n", gProtocolName); } ctxt_id = (ctxt_id + 1)%gNum_contexts; if(ctxt_id == 0) root_task = (root_task +1)%num_tasks; } tf = timer(); blocking_coll(myContext, &barrier, &bar_poll_flag); usec = (tf - ti) / (double)niter; if (my_ep == zero_ep) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(gatherv_always_works_algo); free(gatherv_always_works_md); free(gatherv_must_query_algo); free(gatherv_must_query_md); buf = (char*)buf - gBuffer_offset; free(buf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); free(lengths); free(displs); rc = PAMI_Fence_all (myContext, fence_cb_done, &fence_arrivals); while (fence_arrivals != 0) rc = PAMI_Context_advance (myContext, 1); pthread_exit(NULL); }