void* roundrobin(void* tparam) { ptrdiff_t tid = (ptrdiff_t)tparam; int offset = tid*N_ELEMS; /* fprintf(stderr,"Starting thread %lu with offset %d\n",tid,offset); */ int nextpe = (shmem_my_pe()+1)%shmem_n_pes(); int prevpe = (shmem_my_pe()-1 + shmem_n_pes())%shmem_n_pes(); shmem_long_put(target+offset, source+offset, N_ELEMS, nextpe); /* fprintf(stderr,"Thread %lu done first put\n",tid); */ pthread_barrier_wait(&fencebar); if(tid == 0) shmem_barrier_all(); pthread_barrier_wait(&fencebar); shmem_long_get(source+offset, target+offset, N_ELEMS, prevpe); /* fprintf(stderr,"Thread %lu done first get\n",tid); */ pthread_barrier_wait(&fencebar); if(tid == 0) shmem_barrier_all(); pthread_barrier_wait(&fencebar); shmem_long_get(target+offset, source+offset, N_ELEMS, nextpe); /* fprintf(stderr,"Thread %lu done second get\n",tid); */ pthread_barrier_wait(&fencebar); if(tid == 0) shmem_barrier_all(); pthread_barrier_wait(&fencebar); /* fprintf(stderr,"Done thread %lu\n",tid); */ return 0; }
int main(void) { static int bigd[100]; int *ptr; int i; shmem_init(); if (shmem_my_pe() == 0) { /* initialize PE 1's bigd array */ ptr = shmem_ptr(bigd, 1); if (ptr == NULL) printf("can't use pointer to directly access PE 1's array\n"); else for (i=0; i<100; i++) *ptr++ = i+1; } shmem_barrier_all(); if (shmem_my_pe() == 1) { printf("bigd on PE 1 is:\n"); for (i=0; i<100; i++) printf(" %d\n",bigd[i]); printf("\n"); } return 1; }
int main () { int i; for (i = 0; i < _SHMEM_REDUCE_SYNC_SIZE; i += 1) { pSync[i] = _SHMEM_SYNC_VALUE; } shmem_init (); for (i = 0; i < N; i += 1) { src[i] = shmem_my_pe () + i; } shmem_barrier_all (); shmem_long_max_to_all (dst, src, 3, 0, 0, 4, pWrk, pSync); printf ("%d/%d dst =", shmem_my_pe (), shmem_n_pes ()); for (i = 0; i < N; i += 1) { printf (" %ld", dst[i]); } printf ("\n"); shmem_finalize (); return 0; }
int main(int argc, char* argv[]) { int i, j, num_pes; int failed = 0; shmem_init(); if (shmem_my_pe() == 0) { num_pes=shmem_n_pes(); for(j = 0; j < num_pes; j++) { memset(target, 0, sizeof(long) * 10); shmem_long_get_nbi(target, source, 10, j); shmem_quiet(); for (i = 0; i < 10; i++) { if (source[i] != target[i]) { fprintf(stderr,"[%d] get_nbi from PE %d: target[%d] = %ld, expected %ld\n", shmem_my_pe(), j, i, target[i], source[i]); failed = 1; } } if (failed) shmem_global_exit(1); } } shmem_finalize(); return 0; }
/* * Verifies the correctness of the sort. * Ensures all keys are within a PE's bucket boundaries. * Ensures the final number of keys is equal to the initial. */ static int verify_results(int const * const my_local_key_counts, KEY_TYPE const * const my_local_keys) { shmem_barrier_all(); int error = 0; const int my_rank = shmem_my_pe(); const int my_min_key = my_rank * BUCKET_WIDTH; const int my_max_key = (my_rank+1) * BUCKET_WIDTH - 1; #ifdef ISX_PROFILING unsigned long long start = current_time_ns(); #endif // Verify all keys are within bucket boundaries for(long long int i = 0; i < my_bucket_size; ++i){ const int key = my_local_keys[i]; if((key < my_min_key) || (key > my_max_key)){ printf("Rank %d Failed Verification!\n",my_rank); printf("Key: %d is outside of bounds [%d, %d]\n", key, my_min_key, my_max_key); error = 1; } } #ifdef ISX_PROFILING unsigned long long end = current_time_ns(); if (shmem_my_pe() == 0) printf("Verifying took %llu ns\n", end - start); #endif // Verify the sum of the key population equals the expected bucket size long long int bucket_size_test = 0; for(uint64_t i = 0; i < BUCKET_WIDTH; ++i){ bucket_size_test += my_local_key_counts[i]; } if(bucket_size_test != my_bucket_size){ printf("Rank %d Failed Verification!\n",my_rank); printf("Actual Bucket Size: %lld Should be %lld\n", bucket_size_test, my_bucket_size); error = 1; } // Verify the final number of keys equals the initial number of keys static long long int total_num_keys = 0; shmem_longlong_sum_to_all(&total_num_keys, &my_bucket_size, 1, 0, 0, NUM_PES, llWrk, pSync); shmem_barrier_all(); if(total_num_keys != (long long int)(NUM_KEYS_PER_PE * NUM_PES)){ if(my_rank == ROOT_PE){ printf("Verification Failed!\n"); printf("Actual total number of keys: %lld Expected %" PRIu64 "\n", total_num_keys, NUM_KEYS_PER_PE * NUM_PES ); error = 1; } } return error; }
int main(const int argc, char ** argv) { shmem_init(); #ifdef EXTRA_STATS _timer_t total_time; if(shmem_my_pe() == 0) { printf("\n-----\nmkdir timedrun fake\n\n"); timer_start(&total_time); } #endif init_shmem_sync_array(pSync); char * log_file = parse_params(argc, argv); int err = bucket_sort(); log_times(log_file); #ifdef EXTRA_STATS if(shmem_my_pe() == 0) { just_timer_stop(&total_time); double tTime = ( total_time.stop.tv_sec - total_time.start.tv_sec ) + ( total_time.stop.tv_nsec - total_time.start.tv_nsec )/1E9; avg_time *= 1000; avg_time_all2all *= 1000; printf("\n============================ MMTk Statistics Totals ============================\n"); if(NUM_ITERATIONS == 1) { //TODO: fix time calculation below for more number of iterations printf("time.mu\tt.ATA_KEYS\tt.MAKE_INPUT\tt.COUNT_BUCKET_SIZES\tt.BUCKETIZE\tt.COMPUTE_OFFSETS\tt.LOCAL_SORT\tBARRIER_AT_START\tBARRIER_AT_EXCHANGE\tBARRIER_AT_END\tnWorkers\tnPEs\n"); double TIMES[TIMER_NTIMERS]; memset(TIMES, 0x00, sizeof(double) * TIMER_NTIMERS); for(int i=0; i<NUM_PES; i++) { for(int t = 0; t < TIMER_NTIMERS; ++t){ if(timers[t].all_times != NULL){ TIMES[t] += timers[t].all_times[i]; } } } for(int t = 0; t < TIMER_NTIMERS; ++t){ printf("%.3f\t", (TIMES[t]/NUM_PES)*1000); } printf("1\t%d\n",NUM_PES); printf("Total time: %.3f\n",(TIMES[0]/NUM_PES)*1000); } else { printf("time.mu\ttimeAll2All\tnWorkers\tnPEs\n"); printf("%.3f\t%.3f\t1\t%d\n",avg_time,avg_time_all2all,NUM_PES); printf("Total time: %.3f\n",avg_time); } printf("------------------------------ End MMTk Statistics -----------------------------\n"); printf("===== TEST PASSED in %.3f msec =====\n",(tTime*1000)); } #endif shmem_finalize(); return err; }
int main(void) { static int race_winner = -1; int oldval; shmem_init(); oldval = shmem_int_cswap(&race_winner, -1, shmem_my_pe(), 0); if(oldval == -1) printf("pe %d was first\n",shmem_my_pe()); return 1; }
/* * Counts the occurence of each key in my bucket. * Key indices into the count array are the key's value minus my bucket's * minimum key value to allow indexing from 0. * my_bucket_keys: All keys in my bucket unsorted [my_rank * BUCKET_WIDTH, (my_rank+1)*BUCKET_WIDTH) */ static int * count_local_keys(KEY_TYPE const * const my_bucket_keys) { int * const my_local_key_counts = malloc(BUCKET_WIDTH * sizeof(int)); assert(my_local_key_counts); memset(my_local_key_counts, 0, BUCKET_WIDTH * sizeof(int)); timer_start(&timers[TIMER_SORT]); const int my_rank = shmem_my_pe(); const int my_min_key = my_rank * BUCKET_WIDTH; #ifdef ISX_PROFILING unsigned long long start = current_time_ns(); #endif // Count the occurences of each key in my bucket for(long long int i = 0; i < my_bucket_size; ++i){ const unsigned int key_index = my_bucket_keys[i] - my_min_key; assert(my_bucket_keys[i] >= my_min_key); assert(key_index < BUCKET_WIDTH); my_local_key_counts[key_index]++; } #ifdef ISX_PROFILING unsigned long long end = current_time_ns(); if (shmem_my_pe() == 0) printf("Counting local took %llu ns, my_bucket_size = %u, BUCKET_WIDTH = " "%llu\n", end - start, my_bucket_size, BUCKET_WIDTH); #endif timer_stop(&timers[TIMER_SORT]); #ifdef DEBUG wait_my_turn(); char msg[4096]; sprintf(msg,"Rank %d: Bucket Size %lld | Local Key Counts:", my_rank, my_bucket_size); for(uint64_t i = 0; i < BUCKET_WIDTH; ++i){ if(i < PRINT_MAX) sprintf(msg + strlen(msg),"%d ", my_local_key_counts[i]); } sprintf(msg + strlen(msg),"\n"); printf("%s",msg); fflush(stdout); my_turn_complete(); #endif return my_local_key_counts; }
int main (int argc, char **argv) { int i; int nextpe; int me, npes; long src[N]; long *dest; shmemx_request_handle_t handle; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); for (i = 0; i < N; i += 1) { src[i] = (long) me; } dest = (long *) shmem_malloc (N * sizeof (*dest)); nextpe = (me + 1) % npes; shmemx_long_put_nb (dest, src, N, nextpe, &handle); shmemx_wait_req (handle); shmem_barrier_all (); shmem_free (dest); shmem_finalize (); return 0; }
int main () { int i; int me; int npes; for (i = 0; i < _SHMEM_REDUCE_SYNC_SIZE; i += 1) { pSync[i] = _SHMEM_SYNC_VALUE; } shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); src = me + 1; shmem_barrier_all (); shmem_int_or_to_all (&dst, &src, 1, 0, 0, npes, pWrk, pSync); printf ("%d/%d dst = %d\n", me, npes, dst); shmem_finalize (); return 0; }
int main(void) { double *f; int me; shmem_init(); me = shmem_my_pe(); f = (double *) shmem_malloc(sizeof(*f)); *f = PI; shmem_barrier_all(); if (me == 0) { shmem_double_p(f, E, 1); } shmem_barrier_all(); if (me == 1) { printf("PE %d: %f, %s\n", me, *f, (fabs(*f - E) < epsilon) ? "OK" : "FAIL"); } shmem_free(f); shmem_finalize(); return 0; }
int main (int argc, char **argv) { int npes; int me; int *ip; start_pes (0); npes = shmem_n_pes (); me = shmem_my_pe (); /* fire off allocation */ ip = shmalloc_nb (sizeof (*ip)); printf ("PE %d / %d does some other work in the middle of shmalloc_nb\n", me, npes); /* now wait for all PEs to be ready */ shmem_barrier_all (); if (me == 0) { /* PE 0 writes number of PEs to top PE */ shmem_int_p (ip, npes, npes - 1); } shmem_barrier_all (); printf ("PE %d / %d says \"ip\" = %d\n", me, npes, *ip); shfree_nb (ip); printf ("PE %d / %d does some other work in the middle of shfree_nb\n", me, npes); return 0; }
/* * Generates uniformly random keys [0, MAX_KEY_VAL] on each rank using the time and rank * number as a seed */ static KEY_TYPE * make_input(void) { timer_start(&timers[TIMER_INPUT]); KEY_TYPE * restrict const my_keys = malloc(NUM_KEYS_PER_PE * sizeof(KEY_TYPE)); pcg32_random_t rng = seed_my_rank(); for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i) { my_keys[i] = pcg32_boundedrand_r(&rng, MAX_KEY_VAL); } timer_stop(&timers[TIMER_INPUT]); #ifdef DEBUG wait_my_turn(); char msg[1024]; const int my_rank = shmem_my_pe(); sprintf(msg,"Rank %d: Initial Keys: ", my_rank); for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i){ if(i < PRINT_MAX) sprintf(msg + strlen(msg),"%d ", my_keys[i]); } sprintf(msg + strlen(msg),"\n"); printf("%s",msg); fflush(stdout); my_turn_complete(); #endif return my_keys; }
int main(void) { int i; int my_pe, num_pes; for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1) { pSync[i] = _SHMEM_SYNC_VALUE; } shmem_init(); my_pe = shmem_my_pe(); num_pes = shmem_n_pes(); for (i = 0; i < N; i += 1) { src[i] = my_pe + i; } shmem_barrier_all(); shmem_long_max_to_all(dst, src, N, 0, 0, num_pes, pWrk, pSync); printf("%d/%d dst =", my_pe, num_pes); for (i = 0; i < N; i+= 1) { printf(" %ld", dst[i]); } printf("\n"); shmem_finalize(); return 0; }
int main() { start_pes(0); me = shmem_my_pe(); npes = shmem_n_pes(); shmem_barrier_all(); if(me%2==0){ a = 42; shmem_barrier_all(); } else{ a = 0; //shmem_barrier_all(); } shmem_barrier_all(); if (me == 0) { printf("value in a is %d (should be 42)\n", a); } return 0; }
int main () { short source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; static short target[10]; int me; start_pes (0); me = shmem_my_pe (); if (me == 0) { /* put 10 words into target on PE 1 */ shmem_short_iput (target, source, 1, 2, 5, 1); } shmem_barrier_all (); /* sync sender and receiver */ if (me == 1) { printf ("target on PE %d is %hd %hd %hd %hd %hd\n", me, target[0], target[1], target[2], target[3], target[4]); } shmem_barrier_all (); /* sync before exiting */ return 0; }
int main(void) { long *f; int me; shmem_init(); me = shmem_my_pe(); f = (long *) shmem_malloc(sizeof(*f)); *f = 3; shmem_barrier_all(); printf("PE %d: before put, f = %ld\n", me, *f); if (me == 0) { shmem_long_p(f, 42, 1); } shmem_barrier_all(); if (me == 1) { printf("PE %d: after put, f = %ld, %s\n", me, *f, (*f == 42) ? "OK" : "FAIL"); } shmem_finalize(); return 0; }
/* * Gathers all the timing information from each PE and prints * it to a file. All information from a PE is printed as a row in a tab seperated file */ static void log_times(char * log_file) { FILE * fp = NULL; for(uint64_t i = 0; i < TIMER_NTIMERS; ++i){ timers[i].all_times = gather_rank_times(&timers[i]); timers[i].all_counts = gather_rank_counts(&timers[i]); } if(shmem_my_pe() == ROOT_PE) { int print_names = 0; if(file_exists(log_file) != 1){ print_names = 1; } if((fp = fopen(log_file, "a+b"))==NULL){ perror("Error opening log file:"); exit(1); } if(print_names == 1){ print_run_info(fp); print_timer_names(fp); } print_timer_values(fp); report_summary_stats(); fclose(fp); } }
int main(int argc, char **argv) { const long int ITER_CNT = 100; const long int MAX_MSG_SIZE = 1048576; int* source_addr; int peer; long int i=0,j=0, buff_size; long long int start_time, stop_time, res; double time; shmem_init(); int pe_id = shmem_my_pe(); source_addr = (int*) malloc(MAX_MSG_SIZE); if(pe_id == 1) { if(shmem_n_pes()!=4) fprintf(stderr,"Num PEs should be ==4"); printf("#Message Cnt;Time(s);MR(msgs/sec)\n"); } if (pe_id==1) peer = 3; else if(pe_id==3) peer = 1; get_rtc_res_(&res); for (i = 0; i < SHMEM_BARRIER_SYNC_SIZE; i += 1){ pSync[i] = SHMEM_SYNC_VALUE; } /* Collective operation: Implicit barrier on return from attach */ shmemx_am_attach(HANDLER_ID_REQ, &sample_req_handler); shmem_barrier_all(); if(pe_id == 1 || pe_id == 3) { for(buff_size=1; buff_size<=MAX_MSG_SIZE; buff_size*=2) { shmem_barrier(1,1,2,pSync); get_rtc_(&start_time); for(j=1;j<=ITER_CNT;j++) { if(pe_id == 1) { shmemx_am_request(peer, HANDLER_ID_REQ, source_addr, buff_size); shmemx_am_quiet(); } } shmem_barrier(1,1,2,pSync); get_rtc_(&stop_time); time = (stop_time - start_time)*1.0/(double)res/ITER_CNT; if(pe_id == 1) { printf("%20ld;%20.12f;%20.12f\n", buff_size, time, (double)buff_size/time); } fflush(stdout); } } shmem_barrier_all(); shmem_finalize(); }
void initializeCommunication(LSMSCommunication &comm) { //MPI_Init(NULL,NULL); //comm.comm=MPI_COMM_WORLD; //MPI_Comm_rank(comm.comm, &comm.rank); //MPI_Comm_size(comm.comm, &comm.size); int i; shmem_init(); allocate_symm_buffers(); comm.comm.rank = shmem_my_pe(); comm.comm.size = shmem_n_pes(); comm.comm.start_pe = 0; comm.comm.logPE_stride = 0; sync_send_flag=(int*)shmalloc(comm.comm.size*sizeof(int)); sync_recv_flag=(int*)shmalloc(comm.comm.size*sizeof(int)); memset(sync_send_flag,0,comm.comm.size*sizeof(int)); memset(sync_recv_flag,0,comm.comm.size*sizeof(int)); for (i=0;i<comm.comm.size;i++) { sync_send_flag[i]=0; sync_recv_flag[i]=0; } shmem_barrier_all(); for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1) { pSync1[i] = _SHMEM_SYNC_VALUE; pSync2[i] = _SHMEM_SYNC_VALUE; } }
int main() { int me, npes; setbuf(stdout, NULL); shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if (me == 0) { int i; for (i = 1; i < npes; i += 1) { printf("From %d: PE %d is ", me, i); printf("%s", shmem_pe_accessible(i) ? "" : "NOT "); printf("accessible\n"); } } else { ; } shmem_finalize(); return 0; }
int main(int argc, char *argv[]) { shmem_init(); input_file = argv[1]; mype = shmem_my_pe(); NumProcs = shmem_n_pes(); shmemx_am_attach(hid_BESTPATH, &handler_master_bestpath); shmemx_am_attach(hid_SUBSCRIBE, &handler_master_subscribe); shmemx_am_attach(hid_PUTPATH, &handler_master_putpath); shmemx_am_mutex_init(&lock_shortestlen); shmemx_am_mutex_init(&lock_queue); shmemx_am_mutex_init(&lock_workers_stack); if (NumProcs<2) { printf("At least 2 processes are required\n"); exit(-1); } // Initialize distance matrix. Ususally done by one process // and bcast, or initialized from a file in a shared file system. Fill_Dist(); // process 0 read the data and broadcast it to the others if (mype==0) Master(); else Worker(); //TODO // shmemx_am_detach(hid_BESTPATH); // shmemx_am_detach(hid_SUBSCRIBE); // shmemx_am_detach(hid_PUTPATH); shmem_finalize(); return 0; }
int main (void) { int i; start_pes (0); npes = shmem_n_pes (); me = shmem_my_pe (); for (i = 0; i < DST_SIZE; i++) { dst[i] = -1; } for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1) { pSync[i] = _SHMEM_SYNC_VALUE; } shmem_barrier_all (); shmem_collect64 (dst, src, me + 1, 0, 0, 4, pSync); show_dst ("AFTER"); return 0; }
int main (void) { int i; long *target; static long source[8] = { 1, 2, 3, 4, 5, 6, 7, 8 }; int nlong = 8; int me; start_pes (0); me = shmem_my_pe (); target = (long *) shmalloc (8 * sizeof (*target)); for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1) { pSync[i] = _SHMEM_SYNC_VALUE; } shmem_barrier_all (); shmem_broadcast64 (target, source, nlong, 1, 0, 0, 4, pSync); for (i = 0; i < 8; i++) { printf ("%d: target[%d] = %ld\n", me, i, target[i]); } shmem_barrier_all (); shfree (target); return 0; }
/* * Seeds each rank based on the rank number and time */ static pcg32_random_t seed_my_rank(void) { const unsigned int my_rank = shmem_my_pe(); pcg32_random_t rng; pcg32_srandom_r(&rng, (uint64_t) my_rank, (uint64_t) my_rank ); return rng; }
int main(void) { long source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; int src = 99; shmem_init(); if (shmem_my_pe() == 0) { shmem_long_put(dest, source, 10, 1); /*put1*/ shmem_long_put(dest, source, 10, 2); /*put2*/ shmem_fence(); shmem_int_put(&targ, &src, 1, 1); /*put3*/ shmem_int_put(&targ, &src, 1, 2); /*put4*/ } shmem_barrier_all(); /* sync sender and receiver */ printf("dest[0] on PE %d is %ld\n", shmem_my_pe(), dest[0]); return 1; }
int main(void) { int i; shmem_init(); npes = shmem_n_pes(); me = shmem_my_pe(); for (i = 0; i < DST_SIZE; i++) { dst[i] = -1; } for (i = 0; i < SHMEM_COLLECT_SYNC_SIZE; i += 1) { pSync[i] = SHMEM_SYNC_VALUE; } shmem_barrier_all(); shmem_fcollect64(dst, src, 2, 0, 0, npes, pSync); shmem_barrier_all(); show_dst("AFTER"); shmem_finalize(); return 0; }
int main (int argc, char **argv) { int dest; int src; int me, npes; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); src = 42; shmem_barrier_all (); if (me == 0) { shmem_int_put (&dest, &src, 1, 1); } shmem_barrier_all (); shmem_finalize (); return 0; }
int main(int argc, char* argv[]) { int verbose = 0; if(argc > 1) { verbose = !strcmp("-v",argv[1]); } int errors = 0; int me, myshmem_n_pes; shmem_init(); myshmem_n_pes = shmem_n_pes(); me = shmem_my_pe(); srand(1+me); int nextpe = (me+1)%myshmem_n_pes; #define RUN_TEST(TYPENAME,TYPE) do { \ errors += (TYPENAME##_rmaTest(nextpe,verbose)); \ } while(0) SHMEM_DECLARE_FOR_RMA(RUN_TEST); shmem_finalize(); return errors; }
void shmemi_barrier_linear (int PE_start, int logPE_stride, int PE_size, long *pSync) { const int me = shmem_my_pe (); const int step = 1 << logPE_stride; const long nreplies = SHMEM_SYNC_VALUE + PE_size - 1; int i, round; int thatpe; for (round = 0; round < 2; round += 1) { for (thatpe = PE_start, i = 0; i < PE_size; thatpe += step, i += 1) { if (thatpe != me) { shmem_long_inc (&pSync[round], thatpe); shmemi_trace (SHMEM_LOG_BARRIER, "round = %d, sent increment to PE %d", round, thatpe); } } shmem_long_wait_until (&pSync[round], SHMEM_CMP_EQ, nreplies); pSync[round] = SHMEM_SYNC_VALUE; } }