int main() { DCMF_Messager_initialize(); init(); barrier_init(DCMF_DEFAULT_GLOBALBARRIER_PROTOCOL); control_init(DCMF_DEFAULT_CONTROL_PROTOCOL, DCMF_DEFAULT_NETWORK); memregion_init(MAX_BUF_SIZE * nranks); put_init(DCMF_DEFAULT_PUT_PROTOCOL, DCMF_TORUS_NETWORK); barrier(); if (myrank == 0) { printf("Put Latency (usec) post vs restart\n"); fflush(stdout); } put_restart(); barrier(); printf("[%d] Benchmark complete\n", myrank); fflush(stdout); memregion_finalize(); DCMF_Messager_finalize(); return 0; }
int main() { DCMF_Messager_initialize(); init(); barrier_init(DCMF_DEFAULT_GLOBALBARRIER_PROTOCOL); allreduce_init(DCMF_DEFAULT_GLOBALALLREDUCE_PROTOCOL); control_init(DCMF_DEFAULT_CONTROL_PROTOCOL, DCMF_DEFAULT_NETWORK); memregion_init(MAX_MSG_SIZE * ITERATIONS * 2); get_init(DCMF_DEFAULT_PUT_PROTOCOL, DCMF_TORUS_NETWORK); if (myrank == 0) { printf("Get Bandwidth - All processes communication in pairs \n"); fflush(stdout); } get_contention(); if (myrank == 0) { printf("Benchmark Complete \n"); fflush(stdout); } memregion_finalize(); DCMF_Messager_finalize(); return 0; }
int main() { DCMF_Messager_initialize(); init(); barrier_init(DCMF_DEFAULT_GLOBALBARRIER_PROTOCOL); posix_memalign((void **) &source, 16, MAX_MSG_SIZE_LOCAL); posix_memalign((void **) &target, 16, MAX_MSG_SIZE_LOCAL); send_init(DCMF_EAGER_SEND_PROTOCOL, DCMF_TORUS_NETWORK); barrier(); send_remoteadvance(); barrier(); if (myrank == 0) { printf("[%d] Benchmark Complete \n", myrank); fflush(stdout); } memregion_finalize(); DCMF_Messager_finalize(); return 0; }
/** * \brief Initialize the DCMF ARMCI resources */ int ARMCIX_Init () { DCMF_CriticalSection_enter(0); DCMF_Messager_initialize (); ARMCIX_DCMF_Connection_initialize (); /* Determine request pool defaults */ int ARMCIX_DCMF_REQUESTPOOL_MAX = 1000; ENV_Int (getenv ("ARMCIX_DCMF_REQUESTPOOL_MAX"), &ARMCIX_DCMF_REQUESTPOOL_MAX); int ARMCIX_DCMF_REQUESTPOOL_INC = 0; ENV_Int (getenv ("ARMCIX_DCMF_REQUESTPOOL_INC"), &ARMCIX_DCMF_REQUESTPOOL_INC); ARMCIX_DCMF_request_initialize (ARMCIX_DCMF_REQUESTPOOL_MAX, ARMCIX_DCMF_REQUESTPOOL_INC); ARMCIX_DCMF_Get_register (); ARMCIX_DCMF_Put_register (__connection); ARMCIX_DCMF_Acc_register (__connection); ARMCIX_DCMF_Fence_register (__connection); ARMCIX_DCMF_Rmw_register (); /* Determine interrupt mode */ int interrupts = 1; ENV_Bool (getenv ("DCMF_INTERRUPT"), &interrupts); ENV_Bool (getenv ("DCMF_INTERRUPTS"), &interrupts); DCMF_Configure_t config; memset (&config, 0x00, sizeof(DCMF_Configure_t)); config.interrupts = (interrupts==0)?DCMF_INTERRUPTS_OFF:DCMF_INTERRUPTS_ON; DCMF_Messager_configure (&config, &config); DCMF_Messager_configure (NULL, &config); //ARMCIX_DCMF_request_print ("after armcix_init"); DCMF_CriticalSection_exit(0); return 0; }
int main() { DCMF_Messager_initialize(); init(); barrier_init(DCMF_DEFAULT_GLOBALBARRIER_PROTOCOL); allreduce_init(DCMF_DEFAULT_GLOBALALLREDUCE_PROTOCOL); control_init(DCMF_DEFAULT_CONTROL_PROTOCOL, DCMF_DEFAULT_NETWORK); memregion_init(MAX_MSG_SIZE_LOCAL * ITERATIONS_LOCAL * 2); put_init(DCMF_DEFAULT_PUT_PROTOCOL, DCMF_TORUS_NETWORK); barrier(); printf("[%d] Start \n", myrank); fflush(stdout); if (myrank == 0) { printf("Intra node transfer latency - memcpy vs put \n"); fflush(stdout); } memcpyvsput(); barrier(); if (myrank == 0) { printf("[%d] Benchmark Complete \n", myrank); fflush(stdout); } memregion_finalize(); DCMF_Messager_finalize(); return 0; }
int main() { DCMF_Messager_initialize(); init(); source = (char *) malloc(MAX_MSG_SIZE * ITERATIONS * 2); target = (char *) malloc(MAX_MSG_SIZE * ITERATIONS * 2); target_index = 0; barrier_init(DCMF_DEFAULT_GLOBALBARRIER_PROTOCOL); allreduce_init(DCMF_DEFAULT_GLOBALALLREDUCE_PROTOCOL); ack_control_init(DCMF_DEFAULT_CONTROL_PROTOCOL, DCMF_DEFAULT_NETWORK); send_init(DCMF_DEFAULT_SEND_PROTOCOL, DCMF_TORUS_NETWORK); rcb_send_init(DCMF_DEFAULT_SEND_PROTOCOL, DCMF_TORUS_NETWORK); barrier(); send_localvsremote(); barrier(); if (myrank == 0) { printf("[%d] Benchmark Complete \n", myrank); fflush(stdout); } DCMF_Messager_finalize(); return 0; }
int main() { int i, rank, nranks, msgsize, status, expected; long bufsize; int *src_buffer; int *trg_buffer; unsigned *ranks; DCMF_Result dcmf_result; DCMF_CollectiveProtocol_t barrier_protocol, lbarrier_protocol; DCMF_CollectiveProtocol_t allreduce_protocol, allreduce_notree_protocol; DCMF_Barrier_Configuration_t barrier_conf; DCMF_Allreduce_Configuration_t allreduce_conf; DCMF_CollectiveRequest_t crequest, crequest1, crequest2; DCMF_Callback_t done_callback; volatile unsigned allreduce_active = 0; DCMF_Messager_initialize(); dcmf_result = DCMF_Collective_initialize(); assert(dcmf_result == DCMF_SUCCESS); rank = DCMF_Messager_rank(); nranks = DCMF_Messager_size(); ranks = (unsigned *) malloc(nranks * sizeof(int)); for(i=0; i<nranks; i++) ranks[i] = i; bufsize = MAX_MSG_SIZE; src_buffer = (int *) malloc(bufsize); trg_buffer = (int *) malloc(bufsize); barrier_conf.protocol = DCMF_GI_BARRIER_PROTOCOL; barrier_conf.cb_geometry = getGeometry; dcmf_result = DCMF_Barrier_register(&barrier_protocol, &barrier_conf); assert(dcmf_result == DCMF_SUCCESS); barrier_conf.protocol = DCMF_LOCKBOX_BARRIER_PROTOCOL; barrier_conf.cb_geometry = getGeometry; dcmf_result = DCMF_Barrier_register(&lbarrier_protocol, &barrier_conf); assert(dcmf_result == DCMF_SUCCESS); DCMF_CollectiveProtocol_t *barrier_ptr, *lbarrier_ptr; barrier_ptr = &barrier_protocol; lbarrier_ptr = &lbarrier_protocol; dcmf_result = DCMF_Geometry_initialize(&geometry, 0, ranks, nranks, &barrier_ptr, 1, &lbarrier_ptr, 1, &crequest, 0, 1); assert(dcmf_result == DCMF_SUCCESS); allreduce_conf.protocol = DCMF_TREE_ALLREDUCE_PROTOCOL; allreduce_conf.cb_geometry = getGeometry; allreduce_conf.reuse_storage = 1; dcmf_result = DCMF_Allreduce_register(&allreduce_protocol, &allreduce_conf); assert(dcmf_result == DCMF_SUCCESS); allreduce_conf.protocol = DCMF_TORUS_BINOMIAL_ALLREDUCE_PROTOCOL; allreduce_conf.cb_geometry = getGeometry; allreduce_conf.reuse_storage = 1; dcmf_result = DCMF_Allreduce_register(&allreduce_notree_protocol, &allreduce_conf); assert(dcmf_result == DCMF_SUCCESS); status = DCMF_Geometry_analyze(&geometry, &allreduce_protocol); assert(status == 1); status = DCMF_Geometry_analyze(&geometry, &allreduce_notree_protocol); assert(status == 1); done_callback.function = done; done_callback.clientdata = (void *) &allreduce_active; if (rank == 0) { printf("DCMF_Allreduce Test\n"); fflush(stdout); } for (msgsize = sizeof(int); msgsize < MAX_MSG_SIZE; msgsize *= 2) { /*initializing buffer*/ for (i = 0; i < bufsize/sizeof(int); i++) { src_buffer[i] = rank; trg_buffer[i] = 0; } allreduce_active += 1; /*sum reduce operation*/ dcmf_result = DCMF_Allreduce(&allreduce_protocol, &crequest1, done_callback, DCMF_SEQUENTIAL_CONSISTENCY, &geometry, (char *) src_buffer, (char *) trg_buffer, msgsize/sizeof(int), DCMF_SIGNED_INT, DCMF_SUM); assert(dcmf_result == DCMF_SUCCESS); while(allreduce_active > 0) DCMF_Messager_advance(); expected = (nranks-1)*(nranks)/2; for (i = 0; i < msgsize/sizeof(int); i++) { if(trg_buffer[i] - expected != 0) { printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n", rank, expected, trg_buffer[i], i); fflush(stdout); exit(-1); } } printf("[%d] %d message sum allreduce successful \n", rank, msgsize); fflush(stdout); for (i = 0; i < bufsize/sizeof(int); i++) { src_buffer[i] = 1; trg_buffer[i] = 0; } allreduce_active += 1; /*sum reduce operation*/ dcmf_result = DCMF_Allreduce(&allreduce_notree_protocol, &crequest2, done_callback, DCMF_SEQUENTIAL_CONSISTENCY, &geometry, (char *) src_buffer, (char *) trg_buffer, msgsize/sizeof(int), DCMF_SIGNED_INT, DCMF_PROD); assert(dcmf_result == DCMF_SUCCESS); while(allreduce_active > 0) DCMF_Messager_advance(); expected = 1; for (i = 0; i < msgsize/sizeof(int); i++) { if(trg_buffer[i] - expected != 0) { printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n", rank, expected, trg_buffer[i], i); fflush(stdout); exit(-1); } } printf("[%d] %d message product allreduce successful\n", rank, msgsize); fflush(stdout); } free(src_buffer); free(trg_buffer); DCMF_Messager_finalize(); return 0; }
int main() { int i, rank, nranks, msgsize, status, expected; long bufsize; int *buffer; DCMF_Protocol_t ga_protocol; DCMF_GlobalAllreduce_Configuration_t ga_conf; DCMF_Request_t request; DCMF_Callback_t done_callback; volatile unsigned ga_active = 0; DCMF_Messager_initialize(); rank = DCMF_Messager_rank(); nranks = DCMF_Messager_size(); bufsize = MAX_MSG_SIZE; buffer = (int *) malloc(bufsize); ga_conf.protocol = DCMF_DEFAULT_GLOBALALLREDUCE_PROTOCOL; status = DCMF_GlobalAllreduce_register(&ga_protocol, &ga_conf); if(status != DCMF_SUCCESS) { printf("DCMF_GlobalAllreduce_register returned with error %d \n", status); exit(-1); } done_callback.function = done; done_callback.clientdata = (void *) &ga_active; if (rank == 0) { printf("DCMF_Allreduce Test\n"); fflush(stdout); } for (msgsize = sizeof(int); msgsize < MAX_MSG_SIZE; msgsize *= 2) { /*initializing buffer*/ for (i = 0; i < bufsize/sizeof(int); i++) { buffer[i] = rank; } ga_active += 1; /*sum reduce operation*/ status = DCMF_GlobalAllreduce(&ga_protocol, &request, done_callback, DCMF_SEQUENTIAL_CONSISTENCY, -1, (char *) buffer, (char *) buffer, msgsize/sizeof(int), DCMF_SIGNED_INT, DCMF_SUM); while(ga_active > 0) DCMF_Messager_advance(); expected = (nranks-1)*(nranks)/2; for (i = 0; i < msgsize/sizeof(int); i++) { if(buffer[i] - expected != 0) { printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n", rank, expected, buffer[i], i); fflush(stdout); exit(-1); } } printf("[%d] %d message sum reduce successful \n", rank, msgsize); fflush(stdout); for (i = 0; i < bufsize/sizeof(int); i++) { buffer[i] = 1; } ga_active += 1; status = DCMF_GlobalAllreduce(&ga_protocol, &request, done_callback, DCMF_SEQUENTIAL_CONSISTENCY, -1, (char *) buffer, (char *) buffer, msgsize/sizeof(int), DCMF_SIGNED_INT, DCMF_PROD); while(ga_active > 0) DCMF_Messager_advance(); expected = 1; for (i = 0; i < msgsize/sizeof(int); i++) { if(buffer[i] - expected != 0) { printf("[%d] Validation has failed Expected: %d, Actual: %d, i: %d \n", rank, expected, buffer[i], i); fflush(stdout); exit(-1); } } printf("[%d] %d message product reduce successful\n", rank, msgsize); fflush(stdout); } free(buffer); DCMF_Messager_finalize(); return 0; }
/** * \brief Initialize the DCMF ARMCI resources */ int ARMCIX_Init () { DCMF_CriticalSection_enter(0); DCMF_Messager_initialize (); ARMCIX_DCMF_Connection_initialize (); /* Determine request pool defaults */ int ARMCIX_DCMF_REQUESTPOOL_MAX = 1000; ENV_Int (getenv ("ARMCIX_DCMF_REQUESTPOOL_MAX"), &ARMCIX_DCMF_REQUESTPOOL_MAX); int ARMCIX_DCMF_REQUESTPOOL_INC = 0; ENV_Int (getenv ("ARMCIX_DCMF_REQUESTPOOL_INC"), &ARMCIX_DCMF_REQUESTPOOL_INC); ARMCIX_DCMF_request_initialize (ARMCIX_DCMF_REQUESTPOOL_MAX, ARMCIX_DCMF_REQUESTPOOL_INC); ARMCIX_DCMF_Get_register (); ARMCIX_DCMF_Put_register (__connection); ARMCIX_DCMF_Acc_register (__connection); ARMCIX_DCMF_Fence_register (__connection); ARMCIX_DCMF_Rmw_register (); /* Initializer helper thread or configure interrupt mode */ int interrupts = 0; ENV_Bool (getenv ("DCMF_INTERRUPT"), &interrupts); ENV_Bool (getenv ("DCMF_INTERRUPTS"), &interrupts); //fprintf(stdout,"interrupts = %d\n",interrupts); /*if (interrupts==1){ if( 0==DCMF_Messager_rank() ) fprintf(stdout,"DCMF interrupts ON\n"); } else { if( 0==DCMF_Messager_rank() ) fprintf(stdout,"DCMF interrupts OFF\n"); }*/ if (interrupts==0) { int ret = pthread_create(&armcix_advance_thread, NULL, armcix_advance, NULL); if ( ret != 0 ) { if( 0==DCMF_Messager_rank() ) fprintf(stdout,"pthread_create failed\n"); armcix_advance_active = 0; } else { if( 0==DCMF_Messager_rank() ) fprintf(stdout,"pthread_create succeeded\n"); armcix_advance_active = 1; } } DCMF_Configure_t config; memset (&config, 0x00, sizeof(DCMF_Configure_t)); config.interrupts = (interrupts==0)?DCMF_INTERRUPTS_OFF:DCMF_INTERRUPTS_ON; DCMF_Messager_configure (&config, &config); DCMF_Messager_configure (NULL, &config); //ARMCIX_DCMF_request_print ("after armcix_init"); DCMF_CriticalSection_exit(0); return 0; }
int main(int argc, void* argv[]) { DCMF_Configure_t config; config.thread_level = DCMF_THREAD_MULTIPLE; DCMF_Messager_initialize(); DCMF_Messager_configure(&config, &config); init(); if (nranks != (THREAD_NUM + 1)) { printf("This test requires only %d processes \n", (THREAD_NUM + 1)); fflush(stdout); return -1; } barrier_init(DCMF_DEFAULT_GLOBALBARRIER_PROTOCOL); control_init(DCMF_DEFAULT_CONTROL_PROTOCOL, DCMF_DEFAULT_NETWORK); memregion_init(LOCAL_MAX_BUF_SIZE * THREAD_NUM); get_init(DCMF_DEFAULT_PUT_PROTOCOL, DCMF_TORUS_NETWORK); source = (char *) malloc(LOCAL_MAX_BUF_SIZE * THREAD_NUM); target = (char *) malloc(LOCAL_MAX_BUF_SIZE * THREAD_NUM); send_init(DCMF_DEFAULT_SEND_PROTOCOL, DCMF_TORUS_NETWORK); int status; long i; if (myrank == 0) { pthread_t threads[THREAD_NUM]; pthread_barrier_init(&ptbarrier, NULL, THREAD_NUM); pthread_barrier_init(&ptbarrier1, NULL, THREAD_NUM); for (i = 0; i < THREAD_NUM; i++) { pthread_create(&threads[i], NULL, mrate_test, (void *) i); } for (i = 0; i < THREAD_NUM; i++) { pthread_join(threads[i], (void *) &status); } } else { snd_rcv_active += LOCAL_ITERATIONS; while (snd_rcv_active > 0) DCMF_Messager_advance(); } barrier(); DCMF_Messager_finalize(); if (myrank == 0) { printf("Benchmark Complete \n"); fflush(stdout); } return (0); }