int
main()
{
  int i,j;
  int me, npes;
  int success0, success1, success2, success3, success4, success5, success6;
  success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0;

  start_pes(0);
  me = _my_pe();
  npes = _num_pes();

  for (i = 0; i < _SHMEM_REDUCE_SYNC_SIZE; i += 1) {
    pSync[i] = _SHMEM_SYNC_VALUE;
	pSync1[i] = _SHMEM_SYNC_VALUE;
  }

  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + i;
 }
  
  /*Test MAX: shmem_double_max_to_all, shmem_float_max_to_all, shmem_int_max_to_all, shmem_long_max_to_all, shmem_longdouble_max_to_all, shmem_longlong_max_to_all, shmem_short_max_to_all */
  shmem_barrier_all();

  shmem_short_max_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_max_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_max_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_float_max_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1);
  shmem_double_max_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync);
  shmem_longdouble_max_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1);
  shmem_longlong_max_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync);
  
  
  if(me == 0){
    for (i = 0,j=-1; i < N; i++,j++) {
      if(dst0[i] != npes+j)
        success0 =1;
	  if(dst1[i] != npes+j)
        success1 =1;
	  if(dst2[i] != npes+j)
        success2 =1;
	  if(dst3[i] != npes+j)
        success3 =1;
	  if(dst4[i] != npes+j)
        success4 =1;
	  if(dst5[i] != npes+j)
        success5 =1;
	  if(dst6[i] != npes+j)
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_max_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_max_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_max_to_all: Passed\n");
	}
	if(success3==1){
      printf("Reduction operation shmem_float_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_float_max_to_all: Passed\n");
	}
	if(success4==1){
      printf("Reduction operation shmem_double_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_double_max_to_all: Passed\n");
	}
	if(success5==1){
      printf("Reduction operation shmem_longdouble_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longdouble_max_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_max_to_all: Passed\n");
	}
	
  }
  
  
  /*Test MIN: shmem_double_min_to_all, shmem_float_min_to_all, shmem_int_min_to_all, shmem_long_min_to_all, shmem_longdouble_min_to_all, shmem_longlong_min_to_all, shmem_short_min_to_all*/
  success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0;
  
  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + i;
 }
 
  for (i = 0; i < N; i += 1) {
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst3[i] = -9;
	dst4[i] = -9;
	dst5[i] = -9;
	dst6[i] = -9;
  }
   
  shmem_barrier_all();
  
  shmem_short_min_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_min_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_min_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_float_min_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1);
  shmem_double_min_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync);
  shmem_longdouble_min_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1);
  shmem_longlong_min_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync);
  
  
  if(me == 0){
    for (i = 0; i < N; i++) {
      if(dst0[i] != i)
        success0 =1;
	  if(dst1[i] != i)
        success1 =1;
	  if(dst2[i] != i)
        success2 =1;
	  if(dst3[i] != i)
        success3 =1;
	  if(dst4[i] != i)
        success4 =1;
	  if(dst5[i] != i)
        success5 =1;
	  if(dst6[i] != i)
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_min_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_min_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_min_to_all: Passed\n");
	}
	if(success3==1){
      printf("Reduction operation shmem_float_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_float_min_to_all: Passed\n");
	}
	if(success4==1){
      printf("Reduction operation shmem_double_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_double_min_to_all: Passed\n");
	}
	if(success5==1){
      printf("Reduction operation shmem_longdouble_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longdouble_min_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_min_to_all: Passed\n");
	}
	
  }
  
  /*Test SUM: shmem_double_sum_to_all, shmem_float_sum_to_all, shmem_int_sum_to_all, shmem_long_sum_to_all, shmem_longdouble_sum_to_all, shmem_longlong_sum_to_all, shmem_short_sum_to_all*/
  success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0;
  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me;
 }
  for (i = 0; i < N; i += 1) {
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst3[i] = -9;
	dst4[i] = -9;
	dst5[i] = -9;
	dst6[i] = -9;
  }
  shmem_barrier_all();

  shmem_short_sum_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_sum_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_sum_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_float_sum_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1);
  shmem_double_sum_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync);
  shmem_longdouble_sum_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1);
  shmem_longlong_sum_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync);

  
  if(me == 0){
    for (i = 0; i < N; i++) {
	  if(dst0[i] != (npes * (npes-1)/2))
        success0 =1;
	  if(dst1[i] != (npes * (npes-1)/2))
        success1 =1;
	  if(dst2[i] != (npes * (npes-1)/2))
        success2 =1;
	  if(dst3[i] != (npes * (npes-1)/2))
        success3 =1;
	  if(dst4[i] != (npes * (npes-1)/2))
        success4 =1;
	  if(dst5[i] != (npes * (npes-1)/2))
        success5 =1;
	  if(dst6[i] != (npes * (npes-1)/2))
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_sum_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_sum_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_sum_to_all: Passed\n");
	}
	if(success3==1){
      printf("Reduction operation shmem_float_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_float_sum_to_all: Passed\n");
	}
	if(success4==1){
      printf("Reduction operation shmem_double_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_double_sum_to_all: Passed\n");
	}
	if(success5==1){
      printf("Reduction operation shmem_longdouble_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longdouble_sum_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_sum_to_all: Passed\n");
	}
	
  }
  
  /*Test AND: shmem_int_and_to_all, shmem_long_and_to_all, shmem_longlong_and_to_all, shmem_short_and_to_all,*/
  success0 = success1 = success2 = success6 = 0;
  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src6[i] = me;
 }
 for (i = 0; i < N; i += 1) {
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst6[i] = -9;
  }
 
  shmem_barrier_all();
  
  shmem_short_and_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_and_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_and_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_longlong_and_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1);
  
  
  if(me==0){
    for (i = 0; i < N; i++) {
	  if(dst0[i] != 0)
        success0 =1;
	  if(dst1[i] != 0)
        success1 =1;
	  if(dst2[i] != 0)
        success2 =1;
	 if(dst6[i] != 0)
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_and_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_and_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_and_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_and_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_and_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_and_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_and_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_and_to_all: Passed\n");
	}
	
  }
  
 /*Test PROD: shmem_double_prod_to_all, shmem_float_prod_to_all, shmem_int_prod_to_all, shmem_long_prod_to_all, shmem_longdouble_prod_to_all, shmem_longlong_prod_to_all, shmem_short_prod_to_all, */
  
  success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0;
  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + 1;
 }
  for (i = 0; i < N; i += 1) {
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst3[i] = -9;
	dst4[i] = -9;
	dst5[i] = -9;
	dst6[i] = -9;
  }
  
  expected_result0 = expected_result1 = expected_result2 = expected_result3 = expected_result4 = expected_result5 = expected_result6 =1;
  for(i=1;i<=npes;i++){
    expected_result0 = expected_result0 * i;
	expected_result1 = expected_result1 * i;
	expected_result2 = expected_result2 * i;
	expected_result3 = expected_result3 * i;
	expected_result4 = expected_result4 * i;
	expected_result5 = expected_result5 * i;
	expected_result6 = expected_result6 * i;
  }
   
  shmem_barrier_all();
 
  shmem_short_prod_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_prod_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_prod_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_float_prod_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1);
  shmem_double_prod_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync);
  shmem_longdouble_prod_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1);
  shmem_longlong_prod_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync);

 
  if(me == 0){
    for (i = 0; i < N; i++) {
	 /*printf("dst2[%d]: %ld, expected val: %ld\n",i, dst2[i], (long)expected_result2);*/
      if(dst0[i] != expected_result0)
        success0 =1;
	  if(dst1[i] != expected_result1)
        success1 =1;
	  if(dst2[i] != expected_result2)
        success2 =1;
	  if(dst3[i] != expected_result3)
        success3 =1;
	  if(dst4[i] != expected_result4)
        success4 =1;
	  if(dst5[i] != expected_result5)
        success5 =1;
	  if(dst6[i] != expected_result6)
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_prod_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_prod_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_prod_to_all: Passed\n");
	}
	if(success3==1){
      printf("Reduction operation shmem_float_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_float_prod_to_all: Passed\n");
	}
	if(success4==1){
      printf("Reduction operation shmem_double_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_double_prod_to_all: Passed\n");
	}
	if(success5==1){
      printf("Reduction operation shmem_longdouble_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longdouble_prod_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_prod_to_all: Passed\n");
	}
	
  }
 
 /*Test OR: shmem_int_or_to_all, shmem_long_or_to_all, shmem_longlong_or_to_all, shmem_short_or_to_all,*/
  
  success0 = success1 = success2 = success6 = 0;
  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src6[i] = (me + 1)%4;
 }
 for (i = 0; i < N; i += 1) {
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst6[i] = -9;
  }
 
  shmem_barrier_all();
  
  shmem_short_or_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_or_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_or_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_longlong_or_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1);
  
  
  if(me==0){
    for (i = 0; i < N; i++) {
      if(dst0[i] != 3)
        success0 =1;
	  if(dst1[i] != 3)
        success1 =1;
	  if(dst2[i] != 3)
        success2 =1;
	 if(dst6[i] != 3)
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_or_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_or_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_or_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_or_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_or_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_or_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_or_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_or_to_all: Passed\n");
	}
	
  }
 
 /*Test XOR: shmem_int_xor_to_all, shmem_long_xor_to_all, shmem_longlong_xor_to_all, shmem_short_xor_to_all*/
  
  success0 = success1 = success2 = success6 = 0;
  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src6[i] = me%2;
 }
 for (i = 0; i < N; i += 1) {
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst6[i] = -9;
  }
  int expected_result = ((int)(npes/2) % 2);
  
 
  shmem_barrier_all();
  
  shmem_short_xor_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_xor_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_xor_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_longlong_xor_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1);
  
  if(me==0){
    for (i = 0; i < N; i++) {
      if(dst0[i] != expected_result)
        success0 =1;
	  if(dst1[i] != expected_result)
        success1 =1;
	  if(dst2[i] != expected_result)
        success2 =1;
	 if(dst6[i] != expected_result)
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_xor_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_xor_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_xor_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_xor_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_xor_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_xor_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_xor_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_xor_to_all: Passed\n");
	}
	
  }

  return 0;
}
示例#2
0
文件: to_all.c 项目: caomw/SOS
int
sum_to_all(int me, int npes)
{
  int i, pass=0;

  memset(ok,0,sizeof(ok));

  for (i = 0; i < N; i++) {
    src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me;
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst3[i] = -9;
	dst4[i] = -9;
	dst5[i] = -9;
	dst6[i] = -9;
  }

  shmem_barrier_all();

  shmem_short_sum_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_sum_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_sum_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_float_sum_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1);
  shmem_double_sum_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync);
  shmem_longdouble_sum_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1);
  shmem_longlong_sum_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync);

  if(me == 0) {
    for (i = 0; i < N; i++) {
	  if(dst0[i] != (short) (npes * (npes-1)/2)) ok[0] = 1;
	  if(dst1[i] != (int) (npes * (npes-1)/2)) ok[1] = 1;
	  if(dst2[i] != (long) (npes * (npes-1)/2)) ok[2] = 1;
	  if(dst3[i] != (float) (npes * (npes-1)/2)) ok[3] = 1;
	  if(dst4[i] != (double) (npes * (npes-1)/2)) ok[4] = 1;
	  if(dst5[i] != (long double) (npes * (npes-1)/2)) ok[5] = 1;
	  if(dst6[i] != (long long) (npes * (npes-1)/2)) ok[6] = 1;
    }
    if(ok[0]==1){
      printf("Reduction operation shmem_short_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_short_sum_to_all: Passed\n");
      pass++;
	}
	if(ok[1]==1){
      printf("Reduction operation shmem_int_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_int_sum_to_all: Passed\n");
      pass++;
	}
	if(ok[2]==1){
      printf("Reduction operation shmem_long_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_long_sum_to_all: Passed\n");
      pass++;
	}
	if(ok[3]==1){
      printf("Reduction operation shmem_float_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_float_sum_to_all: Passed\n");
      pass++;
	}
	if(ok[4]==1){
      printf("Reduction operation shmem_double_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_double_sum_to_all: Passed\n");
      pass++;
	}
	if(ok[5]==1){
      printf("Reduction operation shmem_longdouble_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_longdouble_sum_to_all: Passed\n");
      pass++;
	}
	if(ok[6]==1){
      printf("Reduction operation shmem_longlong_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_longlong_sum_to_all: Passed\n");
      pass++;
	}
    Vprintf("\n"); fflush(stdout);
  }
    if (Serialize) shmem_barrier_all();

    return (pass == 7 ? 1 : 0);
}
示例#3
0
/* Performance test for shmem_XX_put (latency and bandwidth) */

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <shmem.h>
#include <sys/time.h>

long double time_taken;

long pSync[_SHMEM_REDUCE_SYNC_SIZE];
long double pWrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];

//#define N_ELEMENTS 25600/*Data size chosen to be able to capture time required*/
  int
main(void)
{
  int i,j,k;
  int *target;
  int *source;
  int me, npes;
  int nxtpe;
  struct timeval start, end;
  long double start_time,end_time;

  int N_ELEMENTS = (4194304*2)/sizeof(int);

  start_pes(0);
  me = _my_pe();
  npes = _num_pes();

  for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1)
  {
    pSync[i] = _SHMEM_SYNC_VALUE;
  }
  nxtpe = (me+1)%npes;
  source = (int *) shmalloc( N_ELEMENTS * sizeof(*source) );
  target = (int *) shmalloc( N_ELEMENTS * sizeof(*target) );

  if(me == 0)
    printf("Put performance test results:\nSize (Bytes)\t\tTime (Microseconds)\t\tBandwidth (Bytes/Second)\n");

  for (i = 0; i < N_ELEMENTS; i += 1) {
    source[i] = i + 1;
    target[i] = -90;
  }
  shmem_barrier_all();

  /*For int put we take average of all the times realized by a pair of PEs, thus
   * reducing effects of physical location of PEs*/
  for (i=1;i<=N_ELEMENTS;i=i*2)
  {
    time_taken = 0;

    for(j=0;j<10000;j++){
      gettimeofday(&start, NULL);

      start_time = (start.tv_sec * 1000000.0) + start.tv_usec;

      shmem_int_put(target, source, i,nxtpe);

      gettimeofday(&end, NULL);

      end_time = (end.tv_sec * 1000000.0) + end.tv_usec;

      time_taken = time_taken + (end_time - start_time);

    }
    shmem_longdouble_sum_to_all(&time_taken, &time_taken,1, 0, 0, npes, pWrk, pSync);


    if(me == 0){
      time_taken = time_taken/(npes*10000); /*Average time across all PEs for one put*/
      if (i*sizeof(i) < 1048576)
        printf("%ld \t\t\t\t %lf\t\t\t\t %lf\n",i*sizeof(i),
               (double)time_taken,(double)((i*sizeof(i))/(time_taken)));
      else
        printf("%ld \t\t\t %lf\t\t\t\t %lf\n",i*sizeof(i),
               (double)time_taken,(double)((i*sizeof(i))/(time_taken)));

    }

  }
  shmem_barrier_all();

  shfree(target);
  shfree(source);
  return 0;
}