/* * Verifies the correctness of the sort. * Ensures all keys are within a PE's bucket boundaries. * Ensures the final number of keys is equal to the initial. */ static int verify_results(int const * const my_local_key_counts, KEY_TYPE const * const my_local_keys) { shmem_barrier_all(); int error = 0; const int my_rank = shmem_my_pe(); const int my_min_key = my_rank * BUCKET_WIDTH; const int my_max_key = (my_rank+1) * BUCKET_WIDTH - 1; #ifdef ISX_PROFILING unsigned long long start = current_time_ns(); #endif // Verify all keys are within bucket boundaries for(long long int i = 0; i < my_bucket_size; ++i){ const int key = my_local_keys[i]; if((key < my_min_key) || (key > my_max_key)){ printf("Rank %d Failed Verification!\n",my_rank); printf("Key: %d is outside of bounds [%d, %d]\n", key, my_min_key, my_max_key); error = 1; } } #ifdef ISX_PROFILING unsigned long long end = current_time_ns(); if (shmem_my_pe() == 0) printf("Verifying took %llu ns\n", end - start); #endif // Verify the sum of the key population equals the expected bucket size long long int bucket_size_test = 0; for(uint64_t i = 0; i < BUCKET_WIDTH; ++i){ bucket_size_test += my_local_key_counts[i]; } if(bucket_size_test != my_bucket_size){ printf("Rank %d Failed Verification!\n",my_rank); printf("Actual Bucket Size: %lld Should be %lld\n", bucket_size_test, my_bucket_size); error = 1; } // Verify the final number of keys equals the initial number of keys static long long int total_num_keys = 0; shmem_longlong_sum_to_all(&total_num_keys, &my_bucket_size, 1, 0, 0, NUM_PES, llWrk, pSync); shmem_barrier_all(); if(total_num_keys != (long long int)(NUM_KEYS_PER_PE * NUM_PES)){ if(my_rank == ROOT_PE){ printf("Verification Failed!\n"); printf("Actual total number of keys: %lld Expected %" PRIu64 "\n", total_num_keys, NUM_KEYS_PER_PE * NUM_PES ); error = 1; } } return error; }
int sum_to_all(int me, int npes) { int i, pass=0; memset(ok,0,sizeof(ok)); for (i = 0; i < N; i++) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me; dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst3[i] = -9; dst4[i] = -9; dst5[i] = -9; dst6[i] = -9; } shmem_barrier_all(); shmem_short_sum_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_sum_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_sum_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_float_sum_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); shmem_double_sum_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); shmem_longdouble_sum_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); shmem_longlong_sum_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); if(me == 0) { for (i = 0; i < N; i++) { if(dst0[i] != (short) (npes * (npes-1)/2)) ok[0] = 1; if(dst1[i] != (int) (npes * (npes-1)/2)) ok[1] = 1; if(dst2[i] != (long) (npes * (npes-1)/2)) ok[2] = 1; if(dst3[i] != (float) (npes * (npes-1)/2)) ok[3] = 1; if(dst4[i] != (double) (npes * (npes-1)/2)) ok[4] = 1; if(dst5[i] != (long double) (npes * (npes-1)/2)) ok[5] = 1; if(dst6[i] != (long long) (npes * (npes-1)/2)) ok[6] = 1; } if(ok[0]==1){ printf("Reduction operation shmem_short_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_short_sum_to_all: Passed\n"); pass++; } if(ok[1]==1){ printf("Reduction operation shmem_int_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_int_sum_to_all: Passed\n"); pass++; } if(ok[2]==1){ printf("Reduction operation shmem_long_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_long_sum_to_all: Passed\n"); pass++; } if(ok[3]==1){ printf("Reduction operation shmem_float_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_float_sum_to_all: Passed\n"); pass++; } if(ok[4]==1){ printf("Reduction operation shmem_double_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_double_sum_to_all: Passed\n"); pass++; } if(ok[5]==1){ printf("Reduction operation shmem_longdouble_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_longdouble_sum_to_all: Passed\n"); pass++; } if(ok[6]==1){ printf("Reduction operation shmem_longlong_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_longlong_sum_to_all: Passed\n"); pass++; } Vprintf("\n"); fflush(stdout); } if (Serialize) shmem_barrier_all(); return (pass == 7 ? 1 : 0); }
int main() { int i,j; int me, npes; int success0, success1, success2, success3, success4, success5, success6; success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0; start_pes(0); me = _my_pe(); npes = _num_pes(); for (i = 0; i < _SHMEM_REDUCE_SYNC_SIZE; i += 1) { pSync[i] = _SHMEM_SYNC_VALUE; pSync1[i] = _SHMEM_SYNC_VALUE; } for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + i; } /*Test MAX: shmem_double_max_to_all, shmem_float_max_to_all, shmem_int_max_to_all, shmem_long_max_to_all, shmem_longdouble_max_to_all, shmem_longlong_max_to_all, shmem_short_max_to_all */ shmem_barrier_all(); shmem_short_max_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_max_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_max_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_float_max_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); shmem_double_max_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); shmem_longdouble_max_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); shmem_longlong_max_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); if(me == 0){ for (i = 0,j=-1; i < N; i++,j++) { if(dst0[i] != npes+j) success0 =1; if(dst1[i] != npes+j) success1 =1; if(dst2[i] != npes+j) success2 =1; if(dst3[i] != npes+j) success3 =1; if(dst4[i] != npes+j) success4 =1; if(dst5[i] != npes+j) success5 =1; if(dst6[i] != npes+j) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_max_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_max_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_max_to_all: Passed\n"); } if(success3==1){ printf("Reduction operation shmem_float_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_float_max_to_all: Passed\n"); } if(success4==1){ printf("Reduction operation shmem_double_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_double_max_to_all: Passed\n"); } if(success5==1){ printf("Reduction operation shmem_longdouble_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longdouble_max_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_max_to_all: Passed\n"); } } /*Test MIN: shmem_double_min_to_all, shmem_float_min_to_all, shmem_int_min_to_all, shmem_long_min_to_all, shmem_longdouble_min_to_all, shmem_longlong_min_to_all, shmem_short_min_to_all*/ success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0; for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + i; } for (i = 0; i < N; i += 1) { dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst3[i] = -9; dst4[i] = -9; dst5[i] = -9; dst6[i] = -9; } shmem_barrier_all(); shmem_short_min_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_min_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_min_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_float_min_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); shmem_double_min_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); shmem_longdouble_min_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); shmem_longlong_min_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); if(me == 0){ for (i = 0; i < N; i++) { if(dst0[i] != i) success0 =1; if(dst1[i] != i) success1 =1; if(dst2[i] != i) success2 =1; if(dst3[i] != i) success3 =1; if(dst4[i] != i) success4 =1; if(dst5[i] != i) success5 =1; if(dst6[i] != i) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_min_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_min_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_min_to_all: Passed\n"); } if(success3==1){ printf("Reduction operation shmem_float_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_float_min_to_all: Passed\n"); } if(success4==1){ printf("Reduction operation shmem_double_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_double_min_to_all: Passed\n"); } if(success5==1){ printf("Reduction operation shmem_longdouble_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longdouble_min_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_min_to_all: Passed\n"); } } /*Test SUM: shmem_double_sum_to_all, shmem_float_sum_to_all, shmem_int_sum_to_all, shmem_long_sum_to_all, shmem_longdouble_sum_to_all, shmem_longlong_sum_to_all, shmem_short_sum_to_all*/ success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0; for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me; } for (i = 0; i < N; i += 1) { dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst3[i] = -9; dst4[i] = -9; dst5[i] = -9; dst6[i] = -9; } shmem_barrier_all(); shmem_short_sum_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_sum_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_sum_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_float_sum_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); shmem_double_sum_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); shmem_longdouble_sum_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); shmem_longlong_sum_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); if(me == 0){ for (i = 0; i < N; i++) { if(dst0[i] != (npes * (npes-1)/2)) success0 =1; if(dst1[i] != (npes * (npes-1)/2)) success1 =1; if(dst2[i] != (npes * (npes-1)/2)) success2 =1; if(dst3[i] != (npes * (npes-1)/2)) success3 =1; if(dst4[i] != (npes * (npes-1)/2)) success4 =1; if(dst5[i] != (npes * (npes-1)/2)) success5 =1; if(dst6[i] != (npes * (npes-1)/2)) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_sum_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_sum_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_sum_to_all: Passed\n"); } if(success3==1){ printf("Reduction operation shmem_float_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_float_sum_to_all: Passed\n"); } if(success4==1){ printf("Reduction operation shmem_double_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_double_sum_to_all: Passed\n"); } if(success5==1){ printf("Reduction operation shmem_longdouble_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longdouble_sum_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_sum_to_all: Passed\n"); } } /*Test AND: shmem_int_and_to_all, shmem_long_and_to_all, shmem_longlong_and_to_all, shmem_short_and_to_all,*/ success0 = success1 = success2 = success6 = 0; for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src6[i] = me; } for (i = 0; i < N; i += 1) { dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst6[i] = -9; } shmem_barrier_all(); shmem_short_and_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_and_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_and_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_longlong_and_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1); if(me==0){ for (i = 0; i < N; i++) { if(dst0[i] != 0) success0 =1; if(dst1[i] != 0) success1 =1; if(dst2[i] != 0) success2 =1; if(dst6[i] != 0) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_and_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_and_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_and_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_and_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_and_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_and_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_and_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_and_to_all: Passed\n"); } } /*Test PROD: shmem_double_prod_to_all, shmem_float_prod_to_all, shmem_int_prod_to_all, shmem_long_prod_to_all, shmem_longdouble_prod_to_all, shmem_longlong_prod_to_all, shmem_short_prod_to_all, */ success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0; for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + 1; } for (i = 0; i < N; i += 1) { dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst3[i] = -9; dst4[i] = -9; dst5[i] = -9; dst6[i] = -9; } expected_result0 = expected_result1 = expected_result2 = expected_result3 = expected_result4 = expected_result5 = expected_result6 =1; for(i=1;i<=npes;i++){ expected_result0 = expected_result0 * i; expected_result1 = expected_result1 * i; expected_result2 = expected_result2 * i; expected_result3 = expected_result3 * i; expected_result4 = expected_result4 * i; expected_result5 = expected_result5 * i; expected_result6 = expected_result6 * i; } shmem_barrier_all(); shmem_short_prod_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_prod_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_prod_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_float_prod_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); shmem_double_prod_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); shmem_longdouble_prod_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); shmem_longlong_prod_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); if(me == 0){ for (i = 0; i < N; i++) { /*printf("dst2[%d]: %ld, expected val: %ld\n",i, dst2[i], (long)expected_result2);*/ if(dst0[i] != expected_result0) success0 =1; if(dst1[i] != expected_result1) success1 =1; if(dst2[i] != expected_result2) success2 =1; if(dst3[i] != expected_result3) success3 =1; if(dst4[i] != expected_result4) success4 =1; if(dst5[i] != expected_result5) success5 =1; if(dst6[i] != expected_result6) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_prod_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_prod_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_prod_to_all: Passed\n"); } if(success3==1){ printf("Reduction operation shmem_float_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_float_prod_to_all: Passed\n"); } if(success4==1){ printf("Reduction operation shmem_double_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_double_prod_to_all: Passed\n"); } if(success5==1){ printf("Reduction operation shmem_longdouble_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longdouble_prod_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_prod_to_all: Passed\n"); } } /*Test OR: shmem_int_or_to_all, shmem_long_or_to_all, shmem_longlong_or_to_all, shmem_short_or_to_all,*/ success0 = success1 = success2 = success6 = 0; for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src6[i] = (me + 1)%4; } for (i = 0; i < N; i += 1) { dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst6[i] = -9; } shmem_barrier_all(); shmem_short_or_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_or_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_or_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_longlong_or_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1); if(me==0){ for (i = 0; i < N; i++) { if(dst0[i] != 3) success0 =1; if(dst1[i] != 3) success1 =1; if(dst2[i] != 3) success2 =1; if(dst6[i] != 3) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_or_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_or_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_or_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_or_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_or_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_or_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_or_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_or_to_all: Passed\n"); } } /*Test XOR: shmem_int_xor_to_all, shmem_long_xor_to_all, shmem_longlong_xor_to_all, shmem_short_xor_to_all*/ success0 = success1 = success2 = success6 = 0; for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src6[i] = me%2; } for (i = 0; i < N; i += 1) { dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst6[i] = -9; } int expected_result = ((int)(npes/2) % 2); shmem_barrier_all(); shmem_short_xor_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_xor_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_xor_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_longlong_xor_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1); if(me==0){ for (i = 0; i < N; i++) { if(dst0[i] != expected_result) success0 =1; if(dst1[i] != expected_result) success1 =1; if(dst2[i] != expected_result) success2 =1; if(dst6[i] != expected_result) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_xor_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_xor_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_xor_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_xor_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_xor_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_xor_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_xor_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_xor_to_all: Passed\n"); } } return 0; }
int HPCC_SHMEMRandomAccess(HPCC_Params *params) { s64Int i; static s64Int NumErrors, GlbNumErrors; int NumProcs, logNumProcs, MyProc; u64Int GlobalStartMyProc; int Remainder; /* Number of processors with (LocalTableSize + 1) entries */ u64Int Top; /* Number of table entries in top of Table */ s64Int LocalTableSize; /* Local table width */ u64Int MinLocalTableSize; /* Integer ratio TableSize/NumProcs */ u64Int logTableSize, TableSize; double CPUTime; /* CPU time to update table */ double RealTime; /* Real time to update table */ double TotalMem; static int sAbort, rAbort; int PowerofTwo; double timeBound = -1; /* OPTIONAL time bound for execution time */ u64Int NumUpdates_Default; /* Number of updates to table (suggested: 4x number of table entries) */ u64Int NumUpdates; /* actual number of updates to table - may be smaller than * NumUpdates_Default due to execution time bounds */ s64Int ProcNumUpdates; /* number of updates per processor */ #ifdef RA_TIME_BOUND s64Int GlbNumUpdates; /* for reduction */ #endif static long llpSync[_SHMEM_BCAST_SYNC_SIZE]; static long long int llpWrk[_SHMEM_REDUCE_SYNC_SIZE]; static long ipSync[_SHMEM_BCAST_SYNC_SIZE]; static int ipWrk[_SHMEM_REDUCE_SYNC_SIZE]; FILE *outFile = NULL; double *GUPs; double *temp_GUPs; int numthreads; for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1){ ipSync[i] = _SHMEM_SYNC_VALUE; llpSync[i] = _SHMEM_SYNC_VALUE; } params->SHMEMGUPs = -1; GUPs = ¶ms->SHMEMGUPs; NumProcs = shmem_n_pes(); MyProc = shmem_my_pe(); if (0 == MyProc) { outFile = stdout; setbuf(outFile, NULL); } params->HPLMaxProcMem = 200000; TotalMem = params->HPLMaxProcMem; /* max single node memory */ TotalMem *= NumProcs; /* max memory in NumProcs nodes */ TotalMem /= sizeof(u64Int); /* calculate TableSize --- the size of update array (must be a power of 2) */ for (TotalMem *= 0.5, logTableSize = 0, TableSize = 1; TotalMem >= 1.0; TotalMem *= 0.5, logTableSize++, TableSize <<= 1) ; /* EMPTY */ /* determine whether the number of processors is a power of 2 */ if ( (NumProcs & (NumProcs -1)) == 0) { PowerofTwo = HPCC_TRUE; Remainder = 0; Top = 0; MinLocalTableSize = (TableSize / NumProcs); LocalTableSize = MinLocalTableSize; GlobalStartMyProc = (MinLocalTableSize * MyProc); } else { if(MyProc == 0) { printf("Number of processes must be power of 2\n"); } return 0; } sAbort = 0; HPCC_Table = HPCC_XMALLOC( s64Int, LocalTableSize ); if (! HPCC_Table) sAbort = 1; shmem_barrier_all(); shmem_int_sum_to_all(&rAbort, &sAbort, 1, 0, 0, NumProcs, ipWrk, ipSync); shmem_barrier_all(); if (rAbort > 0) { if (MyProc == 0) fprintf(outFile, "Failed to allocate memory for the main table.\n"); /* check all allocations in case there are new added and their order changes */ if (HPCC_Table) HPCC_free( HPCC_Table ); goto failed_table; } params->SHMEMRandomAccess_N = (s64Int)TableSize; /* Default number of global updates to table: 4x number of table entries */ NumUpdates_Default = 4 * TableSize; ProcNumUpdates = 4*LocalTableSize; NumUpdates = NumUpdates_Default; if (MyProc == 0) { fprintf( outFile, "Running on %d processors%s\n", NumProcs, PowerofTwo ? " (PowerofTwo)" : ""); fprintf( outFile, "Total Main table size = 2^" FSTR64 " = " FSTR64 " words\n", logTableSize, TableSize ); if (PowerofTwo) fprintf( outFile, "PE Main table size = 2^" FSTR64 " = " FSTR64 " words/PE\n", (logTableSize - logNumProcs), TableSize/NumProcs ); else fprintf( outFile, "PE Main table size = (2^" FSTR64 ")/%d = " FSTR64 " words/PE MAX\n", logTableSize, NumProcs, LocalTableSize); fprintf( outFile, "Default number of updates (RECOMMENDED) = " FSTR64 "\n", NumUpdates_Default); params->SHMEMRandomAccess_ExeUpdates = NumUpdates; } /* Initialize main table */ for (i=0; i<LocalTableSize; i++) HPCC_Table[i] = i + GlobalStartMyProc; shmem_barrier_all(); RealTime = -RTSEC(); Power2NodesRandomAccessUpdate(logTableSize, TableSize, LocalTableSize, MinLocalTableSize, GlobalStartMyProc, Top, logNumProcs, NumProcs, Remainder, MyProc, ProcNumUpdates); shmem_barrier_all(); /* End timed section */ RealTime += RTSEC(); /* Print timing results */ if (MyProc == 0){ params->SHMEMRandomAccess_time = RealTime; *GUPs = 1e-9*NumUpdates / RealTime; fprintf( outFile, "Real time used = %.6f seconds\n", RealTime ); fprintf( outFile, "%.9f Billion(10^9) Updates per second [GUP/s]\n", *GUPs ); fprintf( outFile, "%.9f Billion(10^9) Updates/PE per second [GUP/s]\n", *GUPs / NumProcs ); /* No longer reporting per CPU number */ /* *GUPs /= NumProcs; */ } /* distribute result to all nodes */ temp_GUPs = GUPs; shmem_barrier_all(); shmem_broadcast64(GUPs,temp_GUPs,1,0,0,0,NumProcs,llpSync); shmem_barrier_all(); /* Verification phase */ /* Begin timing here */ RealTime = -RTSEC(); HPCC_Power2NodesSHMEMRandomAccessCheck(logTableSize, TableSize, LocalTableSize, GlobalStartMyProc, logNumProcs, NumProcs, MyProc, ProcNumUpdates, &NumErrors); shmem_barrier_all(); shmem_longlong_sum_to_all( &GlbNumErrors, &NumErrors, 1, 0,0, NumProcs,llpWrk, llpSync); shmem_barrier_all(); /* End timed section */ RealTime += RTSEC(); if(MyProc == 0){ params->SHMEMRandomAccess_CheckTime = RealTime; fprintf( outFile, "Verification: Real time used = %.6f seconds\n", RealTime); fprintf( outFile, "Found " FSTR64 " errors in " FSTR64 " locations (%s).\n", GlbNumErrors, TableSize, (GlbNumErrors <= 0.01*TableSize) ? "passed" : "failed"); if (GlbNumErrors > 0.01*TableSize) params->Failure = 1; params->SHMEMRandomAccess_Errors = (s64Int)GlbNumErrors; params->SHMEMRandomAccess_ErrorsFraction = (double)GlbNumErrors / (double)TableSize; params->SHMEMRandomAccess_Algorithm = 1; } /* End verification phase */ /* Deallocate memory (in reverse order of allocation which should help fragmentation) */ HPCC_free( HPCC_Table ); failed_table: if (0 == MyProc) if (outFile != stderr) fclose( outFile ); shmem_barrier_all(); return 0; }