pami_result_t MPIDI_Pami_post_wrapper(pami_context_t context, void *cookie) { TRACE_ERR("In post wrapper\n"); TRACE_ERR("About to call collecive\n"); PAMI_Collective(context, (pami_xfer_t *)cookie); TRACE_ERR("Done calling collective\n"); return PAMI_SUCCESS; }
static void gasnete_coll_pami_scatt(const gasnet_team_handle_t team, void *dst, gasnet_image_t srcimage, const void *src, size_t nbytes, int flags GASNETI_THREAD_FARG) { const int i_am_root = gasnete_coll_image_is_local(team, srcimage); #if GASNET_PAR int i_am_leader = gasnete_coll_pami_images_barrier(team); /* XXX: over-synced for IN_NO and IN_MY */ if ((flags & GASNET_COLL_LOCAL) && i_am_root) { /* root thread must be leader for its node */ const gasnete_coll_threaddata_t * const td = GASNETE_COLL_MYTHREAD_NOALLOC; i_am_leader = (srcimage == td->my_image); } #else const int i_am_leader = 1; #endif if (i_am_leader) { volatile unsigned int done = 0; pami_result_t rc; pami_xfer_t op; if (flags & GASNET_COLL_IN_ALLSYNC) gasnetc_fast_barrier(); op = gasnete_op_template_scatt; op.cookie = (void *)&done; op.algorithm = team->pami.scatt_alg; op.cmd.xfer_scatter.root = gasnetc_endpoint(GASNETE_COLL_REL2ACT(team,gasnete_coll_image_node(team, srcimage))); op.cmd.xfer_scatter.sndbuf = (/*not-const*/ void *)src; op.cmd.xfer_scatter.stypecount = nbytes; op.cmd.xfer_scatter.rcvbuf = dst; op.cmd.xfer_scatter.rtypecount = nbytes; GASNETC_PAMI_LOCK(gasnetc_context); rc = PAMI_Collective(gasnetc_context, &op); GASNETC_PAMI_UNLOCK(gasnetc_context); GASNETC_PAMI_CHECK(rc, "initiating blocking scatter"); gasneti_polluntil(done); } if (flags & GASNET_COLL_OUT_ALLSYNC) { if (i_am_leader) gasnetc_fast_barrier(); (void) gasnete_coll_pami_images_barrier(team); } }
static void native_collective(pami_xfer_t *op_p, int need_lock) { pami_result_t rc; volatile unsigned int counter = 0; op_p->cb_done = &gasnetc_cb_inc_uint; op_p->cookie = (void *)&counter; op_p->options.multicontext = PAMI_HINT_DISABLE; if (need_lock) GASNETC_PAMI_LOCK(gasnetc_context); rc = PAMI_Collective(gasnetc_context, op_p); GASNETC_PAMI_CHECK(rc, "initiating a native collective"); if (need_lock) GASNETC_PAMI_UNLOCK(gasnetc_context); if (gasneti_attach_done) { gasneti_polluntil(counter); } else { rc = gasnetc_wait_uint(gasnetc_context, &counter, 1); GASNETC_PAMI_CHECK(rc, "polling a native collective"); } }
/** * \brief Blocking 'world geometry' barrier * * This function is provided for illustrative purposes only. One would never * include the retrieval of the world geometry and the query of the barrier * algorithm in a performance critical code. * * \param[in] client The PAMI client; needed to obtain the geometry * \param[in] context The PAMI context; used for the barrier communication */ void simple_barrier (pami_client_t client, pami_context_t context) { pami_result_t result; pami_geometry_t world_geometry; pami_xfer_t xfer; pami_algorithm_t algorithm; pami_metadata_t metadata; /* Retrieve the PAMI 'world' geometry */ result = PAMI_ERROR; result = PAMI_Geometry_world (client, &world_geometry); assert (result == PAMI_SUCCESS); /* Query the 'always works' barrier algorithm in the geometry */ result = PAMI_ERROR; result = PAMI_Geometry_algorithms_query (world_geometry, PAMI_XFER_BARRIER, &algorithm, &metadata, 1, NULL, NULL, 0); assert (result == PAMI_SUCCESS); /* Set up the barrier */ volatile unsigned active = 1; xfer.cb_done = simple_barrier_decrement; xfer.cookie = (void *) & active; xfer.algorithm = algorithm; /* Issue the barrier collective */ result = PAMI_ERROR; result = PAMI_Collective (context, &xfer); assert (result == PAMI_SUCCESS); /* Advance until the barrier has completed */ while (active) { result = PAMI_ERROR; result = PAMI_Context_advance (context, 1); assert (result == PAMI_SUCCESS); } return; }
static void gasnete_coll_pami_allga(const gasnet_team_handle_t team, void *dst, const void *src, size_t nbytes, int flags GASNETE_THREAD_FARG) { #if GASNET_PAR int i_am_leader = gasnete_coll_pami_images_barrier(team); /* XXX: over-synced for IN_NO and IN_MY */ #else const int i_am_leader = 1; #endif if (i_am_leader) { volatile unsigned int done = 0; pami_result_t rc; pami_xfer_t op; if (flags & GASNET_COLL_IN_ALLSYNC) gasnetc_fast_barrier(); op = gasnete_op_template_allga; op.cookie = (void *)&done; op.algorithm = team->pami.allga_alg; op.cmd.xfer_allgather.sndbuf = (/*not-const*/ void *)src; op.cmd.xfer_allgather.stypecount = nbytes; op.cmd.xfer_allgather.rcvbuf = dst; op.cmd.xfer_allgather.rtypecount = nbytes; GASNETC_PAMI_LOCK(gasnetc_context); rc = PAMI_Collective(gasnetc_context, &op); GASNETC_PAMI_UNLOCK(gasnetc_context); GASNETC_PAMI_CHECK(rc, "initiating blocking allgather"); gasneti_polluntil(done); } if (flags & GASNET_COLL_OUT_ALLSYNC) { if (i_am_leader) gasnetc_fast_barrier(); (void) gasnete_coll_pami_images_barrier(team); } }
int main(int argc, char* argv[]) { pami_result_t result = PAMI_ERROR; if (Kernel_GetRank()==0) print_meminfo(stdout, "before PAMI_Client_create"); /* initialize the client */ char * clientname = ""; pami_client_t client; result = PAMI_Client_create( clientname, &client, NULL, 0 ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_create"); if (Kernel_GetRank()==0) print_meminfo(stdout, "after PAMI_Client_create"); /* query properties of the client */ pami_configuration_t config; size_t num_contexts; config.name = PAMI_CLIENT_TASK_ID; result = PAMI_Client_query( client, &config, 1); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_query"); world_rank = config.value.intval; config.name = PAMI_CLIENT_NUM_TASKS; result = PAMI_Client_query( client, &config, 1); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_query"); world_size = config.value.intval; if ( world_rank == 0 ) { printf("starting test on %ld ranks \n", world_size); fflush(stdout); } config.name = PAMI_CLIENT_PROCESSOR_NAME; result = PAMI_Client_query( client, &config, 1); assert(result == PAMI_SUCCESS); //printf("rank %ld is processor %s \n", world_rank, config.value.chararray); //fflush(stdout); config.name = PAMI_CLIENT_NUM_CONTEXTS; result = PAMI_Client_query( client, &config, 1); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_query"); num_contexts = config.value.intval; /* initialize the contexts */ pami_context_t * contexts = NULL; contexts = (pami_context_t *) malloc( num_contexts * sizeof(pami_context_t) ); assert(contexts!=NULL); if (Kernel_GetRank()==0) fprintf(stdout, "num_contexts = %ld \n", (long)num_contexts); result = PAMI_Context_createv( client, &config, 0, contexts, num_contexts ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_createv"); if (Kernel_GetRank()==0) print_meminfo(stdout, "after PAMI_Context_createv"); /* setup the world geometry */ pami_geometry_t world_geometry; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; size_t num_alg[2]; pami_algorithm_t * safe_barrier_algs = NULL; pami_metadata_t * safe_barrier_meta = NULL; pami_algorithm_t * fast_barrier_algs = NULL; pami_metadata_t * fast_barrier_meta = NULL; result = PAMI_Geometry_world( client, &world_geometry ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Geometry_world"); if (Kernel_GetRank()==0) print_meminfo(stdout, "after PAMI_Geometry_world"); result = PAMI_Geometry_algorithms_num( world_geometry, barrier_xfer, num_alg ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Geometry_algorithms_num"); if ( world_rank == 0 ) printf("number of barrier algorithms = {%ld,%ld} \n", num_alg[0], num_alg[1] ); if (Kernel_GetRank()==0) print_meminfo(stdout, "after PAMI_Geometry_algorithms_num"); safe_barrier_algs = (pami_algorithm_t *) malloc( num_alg[0] * sizeof(pami_algorithm_t) ); assert(safe_barrier_algs!=NULL); safe_barrier_meta = (pami_metadata_t *) malloc( num_alg[0] * sizeof(pami_metadata_t) ); assert(safe_barrier_meta!=NULL); fast_barrier_algs = (pami_algorithm_t *) malloc( num_alg[1] * sizeof(pami_algorithm_t) ); assert(fast_barrier_algs!=NULL); fast_barrier_meta = (pami_metadata_t *) malloc( num_alg[1] * sizeof(pami_metadata_t) ); assert(fast_barrier_meta!=NULL); result = PAMI_Geometry_algorithms_query( world_geometry, barrier_xfer, safe_barrier_algs, safe_barrier_meta, num_alg[0], fast_barrier_algs, fast_barrier_meta, num_alg[1] ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Geometry_algorithms_query"); if (Kernel_GetRank()==0) print_meminfo(stdout, "after PAMI_Geometry_algorithms_query"); /* perform a barrier */ size_t b; pami_xfer_t barrier; volatile int active = 0; for ( b = 0 ; b < num_alg[0] ; b++ ) { barrier.cb_done = cb_done; barrier.cookie = (void*) &active; barrier.algorithm = safe_barrier_algs[b]; uint64_t t0 = GetTimeBase(); active = 1; result = PAMI_Collective( contexts[0], &barrier ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Collective - barrier"); while (active) result = PAMI_Context_advance( contexts[0], 1 ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_advance - barrier"); uint64_t t1 = GetTimeBase(); if ( world_rank == 0 ) printf("safe barrier algorithm %ld (%s) - took %llu cycles \n", b, safe_barrier_meta[b].name, (long long unsigned int)t1-t0 ); fflush(stdout); } for ( b = 0 ; b < num_alg[1] ; b++ ) { barrier.cb_done = cb_done; barrier.cookie = (void*) &active; barrier.algorithm = fast_barrier_algs[b]; uint64_t t0 = GetTimeBase(); active = 1; result = PAMI_Collective( contexts[0], &barrier ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Collective - barrier"); while (active) result = PAMI_Context_advance( contexts[0], 1 ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_advance - barrier"); uint64_t t1 = GetTimeBase(); if ( world_rank == 0 ) printf("fast barrier algorithm %ld (%s) - took %llu cycles \n", b, fast_barrier_meta[b].name, (long long unsigned int)t1-t0 ); fflush(stdout); } if (Kernel_GetRank()==0) print_meminfo(stdout, "after barrier tests"); /* finalize the contexts */ result = PAMI_Context_destroyv( contexts, num_contexts ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Context_destroyv"); free(contexts); if (Kernel_GetRank()==0) print_meminfo(stdout, "before PAMI_Client_destroy"); /* finalize the client */ result = PAMI_Client_destroy( &client ); TEST_ASSERT(result == PAMI_SUCCESS,"PAMI_Client_destroy"); if (Kernel_GetRank()==0) print_meminfo(stdout, "after PAMI_Client_destroy"); if ( world_rank == 0 ) { printf("end of test \n"); fflush(stdout); } return 0; }
int main(int argc, char*argv[]) { pami_client_t client; pami_context_t *context; pami_geometry_t world_geometry; pami_task_t root_task = 0; /* Barrier variables */ size_t barrier_num_algorithm[2]; pami_algorithm_t *bar_always_works_algo = NULL; pami_metadata_t *bar_always_works_md = NULL; pami_algorithm_t *bar_must_query_algo = NULL; pami_metadata_t *bar_must_query_md = NULL; pami_xfer_type_t barrier_xfer = PAMI_XFER_BARRIER; pami_xfer_t barrier; volatile unsigned bar_poll_flag = 0; /* Amscatter variables */ size_t amscatter_num_algorithm[2]; pami_algorithm_t *amscatter_always_works_algo = NULL; pami_metadata_t *amscatter_always_works_md = NULL; pami_algorithm_t *amscatter_must_query_algo = NULL; pami_metadata_t *amscatter_must_query_md = NULL; pami_xfer_type_t amscatter_xfer = PAMI_XFER_AMSCATTER; pami_xfer_t amscatter; volatile unsigned amscatter_total_count = 0; int nalg = 0, i; double ti, tf, usec; /* Process environment variables and setup globals */ setup_env(); assert(gNum_contexts > 0); context = (pami_context_t*)malloc(sizeof(pami_context_t) * gNum_contexts); /* \note Test environment variable" TEST_ROOT=N, defaults to 0.*/ char* sRoot = getenv("TEST_ROOT"); /* Override ROOT */ if (sRoot) root_task = (pami_task_t) atoi(sRoot); /* Initialize PAMI */ int rc = pami_init(&client, /* Client */ context, /* Context */ NULL, /* Clientname=default */ &gNum_contexts, /* gNum_contexts */ NULL, /* null configuration */ 0, /* no configuration */ &my_task_id, /* task id */ &num_tasks); /* number of tasks */ if (rc == 1) return 1; if (gNumRoots > num_tasks) gNumRoots = num_tasks; /* Allocate buffer(s) */ int err = 0; void *sbuf = NULL; err = posix_memalign(&sbuf, 128, (gMax_byte_count * num_tasks) + gBuffer_offset); assert(err == 0); sbuf = (char*)sbuf + gBuffer_offset; void* rbuf = NULL; err = posix_memalign(&rbuf, 128, gMax_byte_count + gBuffer_offset); assert(err == 0); rbuf = (char*)rbuf + gBuffer_offset; void *headers = NULL; err = posix_memalign((void **)&headers, 128, (num_tasks * sizeof(user_header_t)) + gBuffer_offset); headers = (char*)headers + gBuffer_offset; void *validation = NULL; err = posix_memalign((void **)&validation, 128, (num_tasks * sizeof(validation_t)) + gBuffer_offset); validation = (char*)validation + gBuffer_offset; /* Initialize the headers */ for(i = 0; i < num_tasks; ++i) { ((user_header_t *)headers)[i].dst_rank = i; } unsigned iContext = 0; for (; iContext < gNum_contexts; ++iContext) { if (my_task_id == 0) printf("# Context: %u\n", iContext); /* Query the world geometry for barrier algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, barrier_xfer, barrier_num_algorithm, &bar_always_works_algo, &bar_always_works_md, &bar_must_query_algo, &bar_must_query_md); if (rc == 1) return 1; /* Query the world geometry for amscatter algorithms */ rc |= query_geometry_world(client, context[iContext], &world_geometry, amscatter_xfer, amscatter_num_algorithm, &amscatter_always_works_algo, &amscatter_always_works_md, &amscatter_must_query_algo, &amscatter_must_query_md); if (rc == 1) return 1; _g_recv_buffer = rbuf; _g_send_buffer = sbuf; _g_val_buffer = validation; barrier.cb_done = cb_done; barrier.cookie = (void*) & bar_poll_flag; barrier.algorithm = bar_always_works_algo[0]; blocking_coll(context[iContext], &barrier, &bar_poll_flag); amscatter.algorithm = amscatter_always_works_algo[0]; amscatter.cmd.xfer_amscatter.headers = headers; amscatter.cmd.xfer_amscatter.headerlen = sizeof(user_header_t); amscatter.cmd.xfer_amscatter.sndbuf = sbuf; amscatter.cmd.xfer_amscatter.stype = PAMI_TYPE_BYTE; amscatter.cmd.xfer_amscatter.stypecount = 0; for (nalg = 0; nalg < amscatter_num_algorithm[0]; nalg++) { gProtocolName = amscatter_always_works_md[nalg].name; if (my_task_id == root_task) { printf("# AMScatter Bandwidth Test(size:%zu) -- context = %d, root = %d, protocol: %s\n",num_tasks, iContext, root_task, amscatter_always_works_md[nalg].name); printf("# Size(bytes) iterations bytes/sec usec\n"); printf("# ----------- ----------- ----------- ---------\n"); fflush(stdout); } if (((strstr(amscatter_always_works_md[nalg].name,gSelected) == NULL) && gSelector) || ((strstr(amscatter_always_works_md[nalg].name,gSelected) != NULL) && !gSelector)) continue; int j; pami_collective_hint_t h = {0}; pami_dispatch_callback_function fn; lgContext = context[iContext]; fn.amscatter = cb_amscatter_recv; PAMI_AMCollective_dispatch_set(context[iContext], amscatter_always_works_algo[nalg], root_task,/* Set the dispatch id, can be any arbitrary value */ fn, (void*) &amscatter_total_count, h); amscatter.cmd.xfer_amscatter.dispatch = root_task; amscatter.algorithm = amscatter_always_works_algo[nalg]; volatile unsigned *nscatter = &amscatter_total_count; for (i = gMin_byte_count; i <= gMax_byte_count; i *= 2) { size_t dataSent = i; int niter; pami_result_t result; if (dataSent < CUTOFF) niter = gNiterlat; else niter = NITERBW; *nscatter = 0; memset(rbuf, 0xFF, i); scatter_initialize_sndbuf (sbuf, i, num_tasks); blocking_coll(context[iContext], &barrier, &bar_poll_flag); ti = timer(); for (j = 0; j < niter; j++) { root_task = (root_task + num_tasks - 1) % num_tasks; if (my_task_id == root_task) { amscatter.cmd.xfer_amscatter.stypecount = i; result = PAMI_Collective(context[iContext], &amscatter); if (result != PAMI_SUCCESS) { fprintf (stderr, "Error. Unable to issue collective. result = %d\n", result); return 1; } } while (*nscatter <= j) result = PAMI_Context_advance (context[iContext], 1); rc |= _gRc; /* validation return code done in cb_amscatter_done */ } assert(*nscatter == niter); tf = timer(); blocking_coll(context[iContext], &barrier, &bar_poll_flag); usec = (tf - ti) / (double)niter; if(my_task_id == root_task) { printf(" %11lld %16d %14.1f %12.2f\n", (long long)dataSent, niter, (double)1e6*(double)dataSent / (double)usec, usec); fflush(stdout); } } lgContext = NULL; } free(bar_always_works_algo); free(bar_always_works_md); free(bar_must_query_algo); free(bar_must_query_md); free(amscatter_always_works_algo); free(amscatter_always_works_md); free(amscatter_must_query_algo); free(amscatter_must_query_md); } /*for(unsigned iContext = 0; iContext < gNum_contexts; ++iContexts)*/ sbuf = (char*)sbuf - gBuffer_offset; free(sbuf); rbuf = (char*)rbuf - gBuffer_offset; free(rbuf); headers = (char*)headers - gBuffer_offset; free(headers); validation = (char*)validation - gBuffer_offset; free(validation); rc |= pami_shutdown(&client, context, &gNum_contexts); return rc; }
int main(int argc, char ** argv) { pami_client_t client; pami_context_t context; pami_result_t status = PAMI_ERROR; pami_configuration_t pami_config; pami_geometry_t world_geo; size_t barrier_alg_num[2]; pami_algorithm_t* bar_always_works_algo = NULL; pami_metadata_t* bar_always_works_md = NULL; pami_algorithm_t* bar_must_query_algo = NULL; pami_metadata_t* bar_must_query_md = NULL; pami_xfer_t barrier; int my_id; volatile int is_fence_done = 0; volatile int is_barrier_done = 0; /* create PAMI client */ RC( PAMI_Client_create("TEST", &client, NULL, 0) ); DBG_FPRINTF((stderr,"Client created successfully at 0x%p\n",client)); /* create PAMI context */ RC( PAMI_Context_createv(client, NULL, 0, &context, 1) ); DBG_FPRINTF((stderr,"Context created successfully at 0x%p\n",context)); /* query my task id */ bzero(&pami_config, sizeof(pami_configuration_t)); pami_config.name = PAMI_CLIENT_TASK_ID; RC( PAMI_Client_query(client, &pami_config, 1) ); my_id = pami_config.value.intval; DBG_FPRINTF((stderr,"My task id is %d\n", my_id)); /* get the world geometry */ RC( PAMI_Geometry_world(client, &world_geo) ); DBG_FPRINTF((stderr,"World geometry is at 0x%p\n",world_geo)); /* query number of barrier algorithms */ RC( PAMI_Geometry_algorithms_num(world_geo, PAMI_XFER_BARRIER, barrier_alg_num) ); DBG_FPRINTF((stderr,"%d-%d algorithms are available for barrier op\n", barrier_alg_num[0], barrier_alg_num[1])); if (barrier_alg_num[0] <= 0) { fprintf (stderr, "Error. No (%lu) algorithm is available for barrier op\n", barrier_alg_num[0]); return 1; } /* query barrier algorithm list */ bar_always_works_algo = (pami_algorithm_t*)malloc(sizeof(pami_algorithm_t)*barrier_alg_num[0]); bar_always_works_md = (pami_metadata_t*)malloc(sizeof(pami_metadata_t)*barrier_alg_num[0]); bar_must_query_algo = (pami_algorithm_t*)malloc(sizeof(pami_algorithm_t)*barrier_alg_num[1]); bar_must_query_md = (pami_metadata_t*)malloc(sizeof(pami_metadata_t)*barrier_alg_num[1]); RC( PAMI_Geometry_algorithms_query(world_geo, PAMI_XFER_BARRIER, bar_always_works_algo, bar_always_works_md, barrier_alg_num[0], bar_must_query_algo, bar_must_query_md, barrier_alg_num[1]) ); DBG_FPRINTF((stderr,"Algorithm [%s] at 0x%p will be used for barrier op\n", bar_always_works_md[0].name, bar_always_works_algo[0])); /* begin PAMI fence */ RC( PAMI_Fence_begin(context) ); DBG_FPRINTF((stderr,"PAMI fence begins\n")); /* ------------------------------------------------------------------------ */ pami_extension_t extension; const char ext_name[] = "EXT_hfi_extension"; const char sym_name[] = "hfi_remote_update"; hfi_remote_update_fn remote_update = NULL; hfi_remote_update_info_t remote_info; pami_memregion_t mem_region; size_t mem_region_sz = 0; unsigned long long operand = 1234; unsigned long long orig_val = 0; int offset = (operand)%MAX_TABLE_SZ; /* initialize table for remote update operation */ int i; for (i = 0; i < MAX_TABLE_SZ; i ++) { table[i] = (unsigned long long) i; } orig_val = table[offset]; /* open PAMI extension */ RC( PAMI_Extension_open (client, ext_name, &extension) ); DBG_FPRINTF((stderr,"Open %s successfully.\n", ext_name)); /* load PAMI extension function */ remote_update = (hfi_remote_update_fn) PAMI_Extension_symbol (extension, sym_name); if (remote_update == (void *)NULL) { fprintf (stderr, "Error. Failed to load %s function in %s\n", sym_name, ext_name); return 1; } else { DBG_FPRINTF((stderr,"Loaded function %s in %s successfully.\n", sym_name, ext_name)); } /* create a memory region for remote update operation */ RC( PAMI_Memregion_create(context, table, MAX_TABLE_SZ*sizeof(unsigned long long), &mem_region_sz, &mem_region) ); DBG_FPRINTF((stderr,"%d-byte PAMI memory region created successfully.\n", mem_region_sz)); /* perform a PAMI barrier */ is_barrier_done = 0; barrier.cb_done = barrier_done; barrier.cookie = (void*)&is_barrier_done; barrier.algorithm = bar_always_works_algo[0]; RC( PAMI_Collective(context, &barrier) ); DBG_FPRINTF((stderr,"PAMI barrier op invoked successfully.\n")); while (is_barrier_done == 0) PAMI_Context_advance(context, 1000); DBG_FPRINTF((stderr,"PAMI barrier op finished successfully.\n")); RC( PAMI_Context_lock(context) ); /* prepare remote update info */ remote_info.dest = my_id^1; remote_info.op = 0; /* op_add */ remote_info.atomic_operand = operand; remote_info.dest_buf = (unsigned long long)(&(table[offset])); /* invoke remote update PAMI extension function */ RC( remote_update(context, 1, &remote_info) ); DBG_FPRINTF((stderr,"Function %s invoked successfully.\n", sym_name)); RC( PAMI_Context_unlock(context) ); /* perform a PAMI fence */ is_fence_done = 0; RC( PAMI_Fence_all(context, fence_done, (void*)&is_fence_done) ); DBG_FPRINTF((stderr,"PAMI_Fence_all invoked successfully.\n")); while (is_fence_done == 0) PAMI_Context_advance(context, 1000); DBG_FPRINTF((stderr,"PAMI_Fence_all finished successfully.\n")); /* perform a PAMI barrier */ is_barrier_done = 0; barrier.cb_done = barrier_done; barrier.cookie = (void*)&is_barrier_done; barrier.algorithm = bar_always_works_algo[0]; RC( PAMI_Collective(context, &barrier) ); DBG_FPRINTF((stderr,"PAMI barrier op invoked successfully.\n")); while (is_barrier_done == 0) PAMI_Context_advance(context, 1000); DBG_FPRINTF((stderr,"PAMI barrier op finished successfully.\n")); /* verify data after remote update operation */ if (table[offset] != orig_val + operand) { printf("Data verification at offset %d with operand %lu failed: " "[%lu expected with %lu updated]\n", offset, operand, orig_val+operand, table[offset]); } else { printf("Data verification at offset %d with operand %lu passed: " "[%lu expected with %lu updated].\n", offset, operand, orig_val+operand, table[offset]); } /* destroy the memory region after remote update operation */ RC( PAMI_Memregion_destroy(context, &mem_region) ); DBG_FPRINTF((stderr,"PAMI memory region removed successfully.\n")); /* close PAMI extension */ RC( PAMI_Extension_close (extension) ); DBG_FPRINTF((stderr,"Close %s successfully.\n", ext_name)); /* ------------------------------------------------------------------------ */ /* end PAMI fence */ RC( PAMI_Fence_end(context) ); DBG_FPRINTF((stderr,"PAMI fence ends\n")); /* destroy PAMI context */ RC( PAMI_Context_destroyv(&context, 1) ); DBG_FPRINTF((stderr, "PAMI context destroyed successfully\n")); /* destroy PAMI client */ RC( PAMI_Client_destroy(&client) ); DBG_FPRINTF((stderr, "PAMI client destroyed successfully\n")); return 0; }
static void gasnete_coll_pami_allgavi(const gasnet_team_handle_t team, void *dst, const void *src, size_t nbytes, int flags GASNETE_THREAD_FARG) { int i_am_leader = gasnete_coll_pami_images_barrier(team); /* XXX: over-synced for IN_NO and IN_MY */ const gasnete_coll_threaddata_t * const td = GASNETE_COLL_MYTHREAD_NOALLOC; if (flags & GASNET_COLL_IN_ALLSYNC) { if (i_am_leader) gasnetc_fast_barrier(); (void) gasnete_coll_pami_images_barrier(team); } GASNETE_FAST_UNALIGNED_MEMCPY(gasnete_coll_scale_ptr(team->pami.scratch_space, td->my_local_image, nbytes), src, nbytes); (void) gasnete_coll_pami_images_barrier(team); if (i_am_leader) { volatile unsigned int done = 0; pami_result_t rc; pami_xfer_t op; op = gasnete_op_template_allgavi; /* allgatherv_int */ op.cookie = (void *)&done; op.algorithm = team->pami.allgavi_alg; op.cmd.xfer_allgatherv_int.sndbuf = team->pami.scratch_space; op.cmd.xfer_allgatherv_int.stypecount = nbytes * team->my_images; op.cmd.xfer_allgatherv_int.rcvbuf = dst; op.cmd.xfer_allgatherv_int.rtypecounts = team->pami.counts; op.cmd.xfer_allgatherv_int.rdispls = team->pami.displs; if (team->pami.prev_nbytes != nbytes) { int i; for (i = 0; i < team->total_ranks; ++i) { op.cmd.xfer_allgatherv_int.rtypecounts[i] = nbytes * team->all_images[i]; op.cmd.xfer_allgatherv_int.rdispls[i] = nbytes * team->all_offset[i]; } team->pami.prev_nbytes = nbytes; } GASNETC_PAMI_LOCK(gasnetc_context); rc = PAMI_Collective(gasnetc_context, &op); GASNETC_PAMI_UNLOCK(gasnetc_context); GASNETC_PAMI_CHECK(rc, "initiating blocking allgatherv_int"); gasneti_polluntil(done); gasneti_assert(NULL == team->pami.tmp_addr); gasneti_sync_writes(); /* XXX: is this necessary? */ team->pami.tmp_addr = dst; /* wakes pollers, below */ (void) gasnete_coll_pami_images_barrier(team); /* matches instance below vvvv */ team->pami.tmp_addr = NULL; } else { gasneti_waitwhile(NULL == team->pami.tmp_addr); GASNETE_FAST_UNALIGNED_MEMCPY(dst, team->pami.tmp_addr, nbytes * team->total_images); (void) gasnete_coll_pami_images_barrier(team); /* matches instance above ^^^^ */ } if (flags & GASNET_COLL_OUT_ALLSYNC) { if (i_am_leader) gasnetc_fast_barrier(); (void) gasnete_coll_pami_images_barrier(team); } }
static void gasnete_coll_pami_scattvi(const gasnet_team_handle_t team, void *dst, gasnet_image_t srcimage, const void *src, size_t nbytes, int flags GASNETI_THREAD_FARG) { const int i_am_root = gasnete_coll_image_is_local(team, srcimage); int i_am_leader = gasnete_coll_pami_images_barrier(team); /* XXX: over-synced for IN_NO and IN_MY */ const gasnete_coll_threaddata_t * const td = GASNETE_COLL_MYTHREAD_NOALLOC; if ((flags & GASNET_COLL_LOCAL) && i_am_root) { /* root thread must be leader for its node */ i_am_leader = (srcimage == td->my_image); } if (i_am_leader) { volatile unsigned int done = 0; pami_result_t rc; pami_xfer_t op; if (flags & GASNET_COLL_IN_ALLSYNC) gasnetc_fast_barrier(); op = gasnete_op_template_scattvi; /* scatterv_int */ op.cookie = (void *)&done; op.algorithm = team->pami.scattvi_alg; op.cmd.xfer_scatterv_int.root = gasnetc_endpoint(GASNETE_COLL_REL2ACT(team,gasnete_coll_image_node(team, srcimage))); op.cmd.xfer_scatterv_int.rcvbuf = team->pami.scratch_space; op.cmd.xfer_scatterv_int.rtypecount = nbytes * team->my_images; if (i_am_root) { op.cmd.xfer_scatterv_int.sndbuf = (/*not-const*/ void *)src; op.cmd.xfer_scatterv_int.stypecounts = team->pami.counts; op.cmd.xfer_scatterv_int.sdispls = team->pami.displs; if (team->pami.prev_nbytes != nbytes) { int i; for (i = 0; i < team->total_ranks; ++i) { op.cmd.xfer_scatterv_int.stypecounts[i] = nbytes * team->all_images[i]; op.cmd.xfer_scatterv_int.sdispls[i] = nbytes * team->all_offset[i]; } team->pami.prev_nbytes = nbytes; } } GASNETC_PAMI_LOCK(gasnetc_context); rc = PAMI_Collective(gasnetc_context, &op); GASNETC_PAMI_UNLOCK(gasnetc_context); GASNETC_PAMI_CHECK(rc, "initiating blocking scatterv_int"); gasneti_polluntil(done); gasneti_assert(NULL == team->pami.tmp_addr); gasneti_sync_writes(); team->pami.tmp_addr = team->pami.scratch_space; /* wakes pollers, below */ } else { gasneti_waitwhile(NULL == team->pami.tmp_addr); } GASNETI_MEMCPY (dst, gasnete_coll_scale_ptr(team->pami.tmp_addr, td->my_local_image, nbytes), nbytes); (void) gasnete_coll_pami_images_barrier(team); if (i_am_leader) { team->pami.tmp_addr = NULL; } if (flags & GASNET_COLL_OUT_ALLSYNC) { if (i_am_leader) gasnetc_fast_barrier(); (void) gasnete_coll_pami_images_barrier(team); } }