int main() { int i,j; int me, npes; int success0, success1, success2, success3, success4, success5, success6; success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0; start_pes(0); me = _my_pe(); npes = _num_pes(); for (i = 0; i < _SHMEM_REDUCE_SYNC_SIZE; i += 1) { pSync[i] = _SHMEM_SYNC_VALUE; pSync1[i] = _SHMEM_SYNC_VALUE; } for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + i; } /*Test MAX: shmem_double_max_to_all, shmem_float_max_to_all, shmem_int_max_to_all, shmem_long_max_to_all, shmem_longdouble_max_to_all, shmem_longlong_max_to_all, shmem_short_max_to_all */ shmem_barrier_all(); shmem_short_max_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_max_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_max_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_float_max_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); shmem_double_max_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); shmem_longdouble_max_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); shmem_longlong_max_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); if(me == 0){ for (i = 0,j=-1; i < N; i++,j++) { if(dst0[i] != npes+j) success0 =1; if(dst1[i] != npes+j) success1 =1; if(dst2[i] != npes+j) success2 =1; if(dst3[i] != npes+j) success3 =1; if(dst4[i] != npes+j) success4 =1; if(dst5[i] != npes+j) success5 =1; if(dst6[i] != npes+j) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_max_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_max_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_max_to_all: Passed\n"); } if(success3==1){ printf("Reduction operation shmem_float_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_float_max_to_all: Passed\n"); } if(success4==1){ printf("Reduction operation shmem_double_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_double_max_to_all: Passed\n"); } if(success5==1){ printf("Reduction operation shmem_longdouble_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longdouble_max_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_max_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_max_to_all: Passed\n"); } } /*Test MIN: shmem_double_min_to_all, shmem_float_min_to_all, shmem_int_min_to_all, shmem_long_min_to_all, shmem_longdouble_min_to_all, shmem_longlong_min_to_all, shmem_short_min_to_all*/ success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0; for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + i; } for (i = 0; i < N; i += 1) { dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst3[i] = -9; dst4[i] = -9; dst5[i] = -9; dst6[i] = -9; } shmem_barrier_all(); shmem_short_min_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_min_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_min_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_float_min_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); shmem_double_min_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); shmem_longdouble_min_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); shmem_longlong_min_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); if(me == 0){ for (i = 0; i < N; i++) { if(dst0[i] != i) success0 =1; if(dst1[i] != i) success1 =1; if(dst2[i] != i) success2 =1; if(dst3[i] != i) success3 =1; if(dst4[i] != i) success4 =1; if(dst5[i] != i) success5 =1; if(dst6[i] != i) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_min_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_min_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_min_to_all: Passed\n"); } if(success3==1){ printf("Reduction operation shmem_float_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_float_min_to_all: Passed\n"); } if(success4==1){ printf("Reduction operation shmem_double_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_double_min_to_all: Passed\n"); } if(success5==1){ printf("Reduction operation shmem_longdouble_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longdouble_min_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_min_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_min_to_all: Passed\n"); } } /*Test SUM: shmem_double_sum_to_all, shmem_float_sum_to_all, shmem_int_sum_to_all, shmem_long_sum_to_all, shmem_longdouble_sum_to_all, shmem_longlong_sum_to_all, shmem_short_sum_to_all*/ success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0; for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me; } for (i = 0; i < N; i += 1) { dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst3[i] = -9; dst4[i] = -9; dst5[i] = -9; dst6[i] = -9; } shmem_barrier_all(); shmem_short_sum_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_sum_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_sum_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_float_sum_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); shmem_double_sum_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); shmem_longdouble_sum_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); shmem_longlong_sum_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); if(me == 0){ for (i = 0; i < N; i++) { if(dst0[i] != (npes * (npes-1)/2)) success0 =1; if(dst1[i] != (npes * (npes-1)/2)) success1 =1; if(dst2[i] != (npes * (npes-1)/2)) success2 =1; if(dst3[i] != (npes * (npes-1)/2)) success3 =1; if(dst4[i] != (npes * (npes-1)/2)) success4 =1; if(dst5[i] != (npes * (npes-1)/2)) success5 =1; if(dst6[i] != (npes * (npes-1)/2)) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_sum_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_sum_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_sum_to_all: Passed\n"); } if(success3==1){ printf("Reduction operation shmem_float_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_float_sum_to_all: Passed\n"); } if(success4==1){ printf("Reduction operation shmem_double_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_double_sum_to_all: Passed\n"); } if(success5==1){ printf("Reduction operation shmem_longdouble_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longdouble_sum_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_sum_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_sum_to_all: Passed\n"); } } /*Test AND: shmem_int_and_to_all, shmem_long_and_to_all, shmem_longlong_and_to_all, shmem_short_and_to_all,*/ success0 = success1 = success2 = success6 = 0; for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src6[i] = me; } for (i = 0; i < N; i += 1) { dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst6[i] = -9; } shmem_barrier_all(); shmem_short_and_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_and_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_and_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_longlong_and_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1); if(me==0){ for (i = 0; i < N; i++) { if(dst0[i] != 0) success0 =1; if(dst1[i] != 0) success1 =1; if(dst2[i] != 0) success2 =1; if(dst6[i] != 0) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_and_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_and_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_and_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_and_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_and_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_and_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_and_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_and_to_all: Passed\n"); } } /*Test PROD: shmem_double_prod_to_all, shmem_float_prod_to_all, shmem_int_prod_to_all, shmem_long_prod_to_all, shmem_longdouble_prod_to_all, shmem_longlong_prod_to_all, shmem_short_prod_to_all, */ success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0; for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + 1; } for (i = 0; i < N; i += 1) { dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst3[i] = -9; dst4[i] = -9; dst5[i] = -9; dst6[i] = -9; } expected_result0 = expected_result1 = expected_result2 = expected_result3 = expected_result4 = expected_result5 = expected_result6 =1; for(i=1;i<=npes;i++){ expected_result0 = expected_result0 * i; expected_result1 = expected_result1 * i; expected_result2 = expected_result2 * i; expected_result3 = expected_result3 * i; expected_result4 = expected_result4 * i; expected_result5 = expected_result5 * i; expected_result6 = expected_result6 * i; } shmem_barrier_all(); shmem_short_prod_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_prod_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_prod_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_float_prod_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); shmem_double_prod_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); shmem_longdouble_prod_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); shmem_longlong_prod_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); if(me == 0){ for (i = 0; i < N; i++) { /*printf("dst2[%d]: %ld, expected val: %ld\n",i, dst2[i], (long)expected_result2);*/ if(dst0[i] != expected_result0) success0 =1; if(dst1[i] != expected_result1) success1 =1; if(dst2[i] != expected_result2) success2 =1; if(dst3[i] != expected_result3) success3 =1; if(dst4[i] != expected_result4) success4 =1; if(dst5[i] != expected_result5) success5 =1; if(dst6[i] != expected_result6) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_prod_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_prod_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_prod_to_all: Passed\n"); } if(success3==1){ printf("Reduction operation shmem_float_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_float_prod_to_all: Passed\n"); } if(success4==1){ printf("Reduction operation shmem_double_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_double_prod_to_all: Passed\n"); } if(success5==1){ printf("Reduction operation shmem_longdouble_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longdouble_prod_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_prod_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_prod_to_all: Passed\n"); } } /*Test OR: shmem_int_or_to_all, shmem_long_or_to_all, shmem_longlong_or_to_all, shmem_short_or_to_all,*/ success0 = success1 = success2 = success6 = 0; for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src6[i] = (me + 1)%4; } for (i = 0; i < N; i += 1) { dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst6[i] = -9; } shmem_barrier_all(); shmem_short_or_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_or_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_or_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_longlong_or_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1); if(me==0){ for (i = 0; i < N; i++) { if(dst0[i] != 3) success0 =1; if(dst1[i] != 3) success1 =1; if(dst2[i] != 3) success2 =1; if(dst6[i] != 3) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_or_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_or_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_or_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_or_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_or_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_or_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_or_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_or_to_all: Passed\n"); } } /*Test XOR: shmem_int_xor_to_all, shmem_long_xor_to_all, shmem_longlong_xor_to_all, shmem_short_xor_to_all*/ success0 = success1 = success2 = success6 = 0; for (i = 0; i < N; i += 1) { src0[i] = src1[i] = src2[i] = src6[i] = me%2; } for (i = 0; i < N; i += 1) { dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst6[i] = -9; } int expected_result = ((int)(npes/2) % 2); shmem_barrier_all(); shmem_short_xor_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_xor_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_xor_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_longlong_xor_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1); if(me==0){ for (i = 0; i < N; i++) { if(dst0[i] != expected_result) success0 =1; if(dst1[i] != expected_result) success1 =1; if(dst2[i] != expected_result) success2 =1; if(dst6[i] != expected_result) success6 =1; } if(success0==1){ printf("Reduction operation shmem_short_xor_to_all: Failed\n"); } else{ printf("Reduction operation shmem_short_xor_to_all: Passed\n"); } if(success1==1){ printf("Reduction operation shmem_int_xor_to_all: Failed\n"); } else{ printf("Reduction operation shmem_int_xor_to_all: Passed\n"); } if(success2==1){ printf("Reduction operation shmem_long_xor_to_all: Failed\n"); } else{ printf("Reduction operation shmem_long_xor_to_all: Passed\n"); } if(success6==1){ printf("Reduction operation shmem_longlong_xor_to_all: Failed\n"); } else{ printf("Reduction operation shmem_longlong_xor_to_all: Passed\n"); } } return 0; }
int sum_to_all(int me, int npes) { int i, pass=0; memset(ok,0,sizeof(ok)); for (i = 0; i < N; i++) { src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me; dst0[i] = -9; dst1[i] = -9; dst2[i] = -9; dst3[i] = -9; dst4[i] = -9; dst5[i] = -9; dst6[i] = -9; } shmem_barrier_all(); shmem_short_sum_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync); shmem_int_sum_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1); shmem_long_sum_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync); shmem_float_sum_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1); shmem_double_sum_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync); shmem_longdouble_sum_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1); shmem_longlong_sum_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync); if(me == 0) { for (i = 0; i < N; i++) { if(dst0[i] != (short) (npes * (npes-1)/2)) ok[0] = 1; if(dst1[i] != (int) (npes * (npes-1)/2)) ok[1] = 1; if(dst2[i] != (long) (npes * (npes-1)/2)) ok[2] = 1; if(dst3[i] != (float) (npes * (npes-1)/2)) ok[3] = 1; if(dst4[i] != (double) (npes * (npes-1)/2)) ok[4] = 1; if(dst5[i] != (long double) (npes * (npes-1)/2)) ok[5] = 1; if(dst6[i] != (long long) (npes * (npes-1)/2)) ok[6] = 1; } if(ok[0]==1){ printf("Reduction operation shmem_short_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_short_sum_to_all: Passed\n"); pass++; } if(ok[1]==1){ printf("Reduction operation shmem_int_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_int_sum_to_all: Passed\n"); pass++; } if(ok[2]==1){ printf("Reduction operation shmem_long_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_long_sum_to_all: Passed\n"); pass++; } if(ok[3]==1){ printf("Reduction operation shmem_float_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_float_sum_to_all: Passed\n"); pass++; } if(ok[4]==1){ printf("Reduction operation shmem_double_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_double_sum_to_all: Passed\n"); pass++; } if(ok[5]==1){ printf("Reduction operation shmem_longdouble_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_longdouble_sum_to_all: Passed\n"); pass++; } if(ok[6]==1){ printf("Reduction operation shmem_longlong_sum_to_all: Failed\n"); } else{ Vprintf("Reduction operation shmem_longlong_sum_to_all: Passed\n"); pass++; } Vprintf("\n"); fflush(stdout); } if (Serialize) shmem_barrier_all(); return (pass == 7 ? 1 : 0); }
/* Performance test for shmem_XX_put (latency and bandwidth) */ #include <stdio.h> #include <stdlib.h> #include <time.h> #include <shmem.h> #include <sys/time.h> long double time_taken; long pSync[_SHMEM_REDUCE_SYNC_SIZE]; long double pWrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; //#define N_ELEMENTS 25600/*Data size chosen to be able to capture time required*/ int main(void) { int i,j,k; int *target; int *source; int me, npes; int nxtpe; struct timeval start, end; long double start_time,end_time; int N_ELEMENTS = (4194304*2)/sizeof(int); start_pes(0); me = _my_pe(); npes = _num_pes(); for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1) { pSync[i] = _SHMEM_SYNC_VALUE; } nxtpe = (me+1)%npes; source = (int *) shmalloc( N_ELEMENTS * sizeof(*source) ); target = (int *) shmalloc( N_ELEMENTS * sizeof(*target) ); if(me == 0) printf("Put performance test results:\nSize (Bytes)\t\tTime (Microseconds)\t\tBandwidth (Bytes/Second)\n"); for (i = 0; i < N_ELEMENTS; i += 1) { source[i] = i + 1; target[i] = -90; } shmem_barrier_all(); /*For int put we take average of all the times realized by a pair of PEs, thus * reducing effects of physical location of PEs*/ for (i=1;i<=N_ELEMENTS;i=i*2) { time_taken = 0; for(j=0;j<10000;j++){ gettimeofday(&start, NULL); start_time = (start.tv_sec * 1000000.0) + start.tv_usec; shmem_int_put(target, source, i,nxtpe); gettimeofday(&end, NULL); end_time = (end.tv_sec * 1000000.0) + end.tv_usec; time_taken = time_taken + (end_time - start_time); } shmem_longdouble_sum_to_all(&time_taken, &time_taken,1, 0, 0, npes, pWrk, pSync); if(me == 0){ time_taken = time_taken/(npes*10000); /*Average time across all PEs for one put*/ if (i*sizeof(i) < 1048576) printf("%ld \t\t\t\t %lf\t\t\t\t %lf\n",i*sizeof(i), (double)time_taken,(double)((i*sizeof(i))/(time_taken))); else printf("%ld \t\t\t %lf\t\t\t\t %lf\n",i*sizeof(i), (double)time_taken,(double)((i*sizeof(i))/(time_taken))); } } shmem_barrier_all(); shfree(target); shfree(source); return 0; }