Beispiel #1
0
/*
 * Verifies the correctness of the sort. 
 * Ensures all keys are within a PE's bucket boundaries.
 * Ensures the final number of keys is equal to the initial.
 */
static int verify_results(int const * const my_local_key_counts,
                           KEY_TYPE const * const my_local_keys)
{

  shmem_barrier_all();

  int error = 0;

  const int my_rank = shmem_my_pe();

  const int my_min_key = my_rank * BUCKET_WIDTH;
  const int my_max_key = (my_rank+1) * BUCKET_WIDTH - 1;

#ifdef ISX_PROFILING
  unsigned long long start = current_time_ns();
#endif

  // Verify all keys are within bucket boundaries
  for(long long int i = 0; i < my_bucket_size; ++i){
    const int key = my_local_keys[i];
    if((key < my_min_key) || (key > my_max_key)){
      printf("Rank %d Failed Verification!\n",my_rank);
      printf("Key: %d is outside of bounds [%d, %d]\n", key, my_min_key, my_max_key);
      error = 1;
    }
  }

#ifdef ISX_PROFILING
  unsigned long long end = current_time_ns();
  if (shmem_my_pe() == 0)
  printf("Verifying took %llu ns\n", end - start);
#endif

  // Verify the sum of the key population equals the expected bucket size
  long long int bucket_size_test = 0;
  for(uint64_t i = 0; i < BUCKET_WIDTH; ++i){
    bucket_size_test +=  my_local_key_counts[i];
  }
  if(bucket_size_test != my_bucket_size){
      printf("Rank %d Failed Verification!\n",my_rank);
      printf("Actual Bucket Size: %lld Should be %lld\n", bucket_size_test, my_bucket_size);
      error = 1;
  }

  // Verify the final number of keys equals the initial number of keys
  static long long int total_num_keys = 0;
  shmem_longlong_sum_to_all(&total_num_keys, &my_bucket_size, 1, 0, 0, NUM_PES, llWrk, pSync);
  shmem_barrier_all();

  if(total_num_keys != (long long int)(NUM_KEYS_PER_PE * NUM_PES)){
    if(my_rank == ROOT_PE){
      printf("Verification Failed!\n");
      printf("Actual total number of keys: %lld Expected %" PRIu64 "\n", total_num_keys, NUM_KEYS_PER_PE * NUM_PES );
      error = 1;
    }
  }

  return error;
}
Beispiel #2
0
int
sum_to_all(int me, int npes)
{
  int i, pass=0;

  memset(ok,0,sizeof(ok));

  for (i = 0; i < N; i++) {
    src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me;
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst3[i] = -9;
	dst4[i] = -9;
	dst5[i] = -9;
	dst6[i] = -9;
  }

  shmem_barrier_all();

  shmem_short_sum_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_sum_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_sum_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_float_sum_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1);
  shmem_double_sum_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync);
  shmem_longdouble_sum_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1);
  shmem_longlong_sum_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync);

  if(me == 0) {
    for (i = 0; i < N; i++) {
	  if(dst0[i] != (short) (npes * (npes-1)/2)) ok[0] = 1;
	  if(dst1[i] != (int) (npes * (npes-1)/2)) ok[1] = 1;
	  if(dst2[i] != (long) (npes * (npes-1)/2)) ok[2] = 1;
	  if(dst3[i] != (float) (npes * (npes-1)/2)) ok[3] = 1;
	  if(dst4[i] != (double) (npes * (npes-1)/2)) ok[4] = 1;
	  if(dst5[i] != (long double) (npes * (npes-1)/2)) ok[5] = 1;
	  if(dst6[i] != (long long) (npes * (npes-1)/2)) ok[6] = 1;
    }
    if(ok[0]==1){
      printf("Reduction operation shmem_short_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_short_sum_to_all: Passed\n");
      pass++;
	}
	if(ok[1]==1){
      printf("Reduction operation shmem_int_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_int_sum_to_all: Passed\n");
      pass++;
	}
	if(ok[2]==1){
      printf("Reduction operation shmem_long_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_long_sum_to_all: Passed\n");
      pass++;
	}
	if(ok[3]==1){
      printf("Reduction operation shmem_float_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_float_sum_to_all: Passed\n");
      pass++;
	}
	if(ok[4]==1){
      printf("Reduction operation shmem_double_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_double_sum_to_all: Passed\n");
      pass++;
	}
	if(ok[5]==1){
      printf("Reduction operation shmem_longdouble_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_longdouble_sum_to_all: Passed\n");
      pass++;
	}
	if(ok[6]==1){
      printf("Reduction operation shmem_longlong_sum_to_all: Failed\n");
	}  
    else{
      Vprintf("Reduction operation shmem_longlong_sum_to_all: Passed\n");
      pass++;
	}
    Vprintf("\n"); fflush(stdout);
  }
    if (Serialize) shmem_barrier_all();

    return (pass == 7 ? 1 : 0);
}
int
main()
{
  int i,j;
  int me, npes;
  int success0, success1, success2, success3, success4, success5, success6;
  success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0;

  start_pes(0);
  me = _my_pe();
  npes = _num_pes();

  for (i = 0; i < _SHMEM_REDUCE_SYNC_SIZE; i += 1) {
    pSync[i] = _SHMEM_SYNC_VALUE;
	pSync1[i] = _SHMEM_SYNC_VALUE;
  }

  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + i;
 }
  
  /*Test MAX: shmem_double_max_to_all, shmem_float_max_to_all, shmem_int_max_to_all, shmem_long_max_to_all, shmem_longdouble_max_to_all, shmem_longlong_max_to_all, shmem_short_max_to_all */
  shmem_barrier_all();

  shmem_short_max_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_max_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_max_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_float_max_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1);
  shmem_double_max_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync);
  shmem_longdouble_max_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1);
  shmem_longlong_max_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync);
  
  
  if(me == 0){
    for (i = 0,j=-1; i < N; i++,j++) {
      if(dst0[i] != npes+j)
        success0 =1;
	  if(dst1[i] != npes+j)
        success1 =1;
	  if(dst2[i] != npes+j)
        success2 =1;
	  if(dst3[i] != npes+j)
        success3 =1;
	  if(dst4[i] != npes+j)
        success4 =1;
	  if(dst5[i] != npes+j)
        success5 =1;
	  if(dst6[i] != npes+j)
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_max_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_max_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_max_to_all: Passed\n");
	}
	if(success3==1){
      printf("Reduction operation shmem_float_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_float_max_to_all: Passed\n");
	}
	if(success4==1){
      printf("Reduction operation shmem_double_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_double_max_to_all: Passed\n");
	}
	if(success5==1){
      printf("Reduction operation shmem_longdouble_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longdouble_max_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_max_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_max_to_all: Passed\n");
	}
	
  }
  
  
  /*Test MIN: shmem_double_min_to_all, shmem_float_min_to_all, shmem_int_min_to_all, shmem_long_min_to_all, shmem_longdouble_min_to_all, shmem_longlong_min_to_all, shmem_short_min_to_all*/
  success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0;
  
  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + i;
 }
 
  for (i = 0; i < N; i += 1) {
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst3[i] = -9;
	dst4[i] = -9;
	dst5[i] = -9;
	dst6[i] = -9;
  }
   
  shmem_barrier_all();
  
  shmem_short_min_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_min_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_min_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_float_min_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1);
  shmem_double_min_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync);
  shmem_longdouble_min_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1);
  shmem_longlong_min_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync);
  
  
  if(me == 0){
    for (i = 0; i < N; i++) {
      if(dst0[i] != i)
        success0 =1;
	  if(dst1[i] != i)
        success1 =1;
	  if(dst2[i] != i)
        success2 =1;
	  if(dst3[i] != i)
        success3 =1;
	  if(dst4[i] != i)
        success4 =1;
	  if(dst5[i] != i)
        success5 =1;
	  if(dst6[i] != i)
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_min_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_min_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_min_to_all: Passed\n");
	}
	if(success3==1){
      printf("Reduction operation shmem_float_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_float_min_to_all: Passed\n");
	}
	if(success4==1){
      printf("Reduction operation shmem_double_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_double_min_to_all: Passed\n");
	}
	if(success5==1){
      printf("Reduction operation shmem_longdouble_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longdouble_min_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_min_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_min_to_all: Passed\n");
	}
	
  }
  
  /*Test SUM: shmem_double_sum_to_all, shmem_float_sum_to_all, shmem_int_sum_to_all, shmem_long_sum_to_all, shmem_longdouble_sum_to_all, shmem_longlong_sum_to_all, shmem_short_sum_to_all*/
  success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0;
  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me;
 }
  for (i = 0; i < N; i += 1) {
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst3[i] = -9;
	dst4[i] = -9;
	dst5[i] = -9;
	dst6[i] = -9;
  }
  shmem_barrier_all();

  shmem_short_sum_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_sum_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_sum_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_float_sum_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1);
  shmem_double_sum_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync);
  shmem_longdouble_sum_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1);
  shmem_longlong_sum_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync);

  
  if(me == 0){
    for (i = 0; i < N; i++) {
	  if(dst0[i] != (npes * (npes-1)/2))
        success0 =1;
	  if(dst1[i] != (npes * (npes-1)/2))
        success1 =1;
	  if(dst2[i] != (npes * (npes-1)/2))
        success2 =1;
	  if(dst3[i] != (npes * (npes-1)/2))
        success3 =1;
	  if(dst4[i] != (npes * (npes-1)/2))
        success4 =1;
	  if(dst5[i] != (npes * (npes-1)/2))
        success5 =1;
	  if(dst6[i] != (npes * (npes-1)/2))
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_sum_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_sum_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_sum_to_all: Passed\n");
	}
	if(success3==1){
      printf("Reduction operation shmem_float_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_float_sum_to_all: Passed\n");
	}
	if(success4==1){
      printf("Reduction operation shmem_double_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_double_sum_to_all: Passed\n");
	}
	if(success5==1){
      printf("Reduction operation shmem_longdouble_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longdouble_sum_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_sum_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_sum_to_all: Passed\n");
	}
	
  }
  
  /*Test AND: shmem_int_and_to_all, shmem_long_and_to_all, shmem_longlong_and_to_all, shmem_short_and_to_all,*/
  success0 = success1 = success2 = success6 = 0;
  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src6[i] = me;
 }
 for (i = 0; i < N; i += 1) {
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst6[i] = -9;
  }
 
  shmem_barrier_all();
  
  shmem_short_and_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_and_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_and_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_longlong_and_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1);
  
  
  if(me==0){
    for (i = 0; i < N; i++) {
	  if(dst0[i] != 0)
        success0 =1;
	  if(dst1[i] != 0)
        success1 =1;
	  if(dst2[i] != 0)
        success2 =1;
	 if(dst6[i] != 0)
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_and_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_and_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_and_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_and_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_and_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_and_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_and_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_and_to_all: Passed\n");
	}
	
  }
  
 /*Test PROD: shmem_double_prod_to_all, shmem_float_prod_to_all, shmem_int_prod_to_all, shmem_long_prod_to_all, shmem_longdouble_prod_to_all, shmem_longlong_prod_to_all, shmem_short_prod_to_all, */
  
  success0 = success1 = success2 = success3 = success4 = success5 = success6 = 0;
  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src3[i] = src4[i] = src5[i] = src6[i] = me + 1;
 }
  for (i = 0; i < N; i += 1) {
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst3[i] = -9;
	dst4[i] = -9;
	dst5[i] = -9;
	dst6[i] = -9;
  }
  
  expected_result0 = expected_result1 = expected_result2 = expected_result3 = expected_result4 = expected_result5 = expected_result6 =1;
  for(i=1;i<=npes;i++){
    expected_result0 = expected_result0 * i;
	expected_result1 = expected_result1 * i;
	expected_result2 = expected_result2 * i;
	expected_result3 = expected_result3 * i;
	expected_result4 = expected_result4 * i;
	expected_result5 = expected_result5 * i;
	expected_result6 = expected_result6 * i;
  }
   
  shmem_barrier_all();
 
  shmem_short_prod_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_prod_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_prod_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_float_prod_to_all(dst3, src3, N, 0, 0, npes, pWrk3, pSync1);
  shmem_double_prod_to_all(dst4, src4, N, 0, 0, npes, pWrk4, pSync);
  shmem_longdouble_prod_to_all(dst5, src5, N, 0, 0, npes, pWrk5, pSync1);
  shmem_longlong_prod_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync);

 
  if(me == 0){
    for (i = 0; i < N; i++) {
	 /*printf("dst2[%d]: %ld, expected val: %ld\n",i, dst2[i], (long)expected_result2);*/
      if(dst0[i] != expected_result0)
        success0 =1;
	  if(dst1[i] != expected_result1)
        success1 =1;
	  if(dst2[i] != expected_result2)
        success2 =1;
	  if(dst3[i] != expected_result3)
        success3 =1;
	  if(dst4[i] != expected_result4)
        success4 =1;
	  if(dst5[i] != expected_result5)
        success5 =1;
	  if(dst6[i] != expected_result6)
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_prod_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_prod_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_prod_to_all: Passed\n");
	}
	if(success3==1){
      printf("Reduction operation shmem_float_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_float_prod_to_all: Passed\n");
	}
	if(success4==1){
      printf("Reduction operation shmem_double_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_double_prod_to_all: Passed\n");
	}
	if(success5==1){
      printf("Reduction operation shmem_longdouble_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longdouble_prod_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_prod_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_prod_to_all: Passed\n");
	}
	
  }
 
 /*Test OR: shmem_int_or_to_all, shmem_long_or_to_all, shmem_longlong_or_to_all, shmem_short_or_to_all,*/
  
  success0 = success1 = success2 = success6 = 0;
  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src6[i] = (me + 1)%4;
 }
 for (i = 0; i < N; i += 1) {
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst6[i] = -9;
  }
 
  shmem_barrier_all();
  
  shmem_short_or_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_or_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_or_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_longlong_or_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1);
  
  
  if(me==0){
    for (i = 0; i < N; i++) {
      if(dst0[i] != 3)
        success0 =1;
	  if(dst1[i] != 3)
        success1 =1;
	  if(dst2[i] != 3)
        success2 =1;
	 if(dst6[i] != 3)
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_or_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_or_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_or_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_or_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_or_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_or_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_or_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_or_to_all: Passed\n");
	}
	
  }
 
 /*Test XOR: shmem_int_xor_to_all, shmem_long_xor_to_all, shmem_longlong_xor_to_all, shmem_short_xor_to_all*/
  
  success0 = success1 = success2 = success6 = 0;
  for (i = 0; i < N; i += 1) {
    src0[i] = src1[i] = src2[i] = src6[i] = me%2;
 }
 for (i = 0; i < N; i += 1) {
    dst0[i] = -9;
	dst1[i] = -9;
	dst2[i] = -9;
	dst6[i] = -9;
  }
  int expected_result = ((int)(npes/2) % 2);
  
 
  shmem_barrier_all();
  
  shmem_short_xor_to_all(dst0, src0, N, 0, 0, npes, pWrk0, pSync);
  shmem_int_xor_to_all(dst1, src1, N, 0, 0, npes, pWrk1, pSync1);
  shmem_long_xor_to_all(dst2, src2, N, 0, 0, npes, pWrk2, pSync);
  shmem_longlong_xor_to_all(dst6, src6, N, 0, 0, npes, pWrk6, pSync1);
  
  if(me==0){
    for (i = 0; i < N; i++) {
      if(dst0[i] != expected_result)
        success0 =1;
	  if(dst1[i] != expected_result)
        success1 =1;
	  if(dst2[i] != expected_result)
        success2 =1;
	 if(dst6[i] != expected_result)
        success6 =1;
    }
    if(success0==1){
      printf("Reduction operation shmem_short_xor_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_short_xor_to_all: Passed\n");
	}
	if(success1==1){
      printf("Reduction operation shmem_int_xor_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_int_xor_to_all: Passed\n");
	}
	if(success2==1){
      printf("Reduction operation shmem_long_xor_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_long_xor_to_all: Passed\n");
	}
	if(success6==1){
      printf("Reduction operation shmem_longlong_xor_to_all: Failed\n");
	}  
    else{
      printf("Reduction operation shmem_longlong_xor_to_all: Passed\n");
	}
	
  }

  return 0;
}
Beispiel #4
0
int
HPCC_SHMEMRandomAccess(HPCC_Params *params) {
  s64Int i;
  static s64Int NumErrors, GlbNumErrors;

  int NumProcs, logNumProcs, MyProc;
  u64Int GlobalStartMyProc;
  int Remainder;            /* Number of processors with (LocalTableSize + 1) entries */
  u64Int Top;               /* Number of table entries in top of Table */
  s64Int LocalTableSize;    /* Local table width */
  u64Int MinLocalTableSize; /* Integer ratio TableSize/NumProcs */
  u64Int logTableSize, TableSize;

  double CPUTime;               /* CPU  time to update table */
  double RealTime;              /* Real time to update table */

  double TotalMem;
  static int sAbort, rAbort;
  int PowerofTwo;

  double timeBound = -1;  /* OPTIONAL time bound for execution time */
  u64Int NumUpdates_Default; /* Number of updates to table (suggested: 4x number of table entries) */
  u64Int NumUpdates;  /* actual number of updates to table - may be smaller than
                       * NumUpdates_Default due to execution time bounds */
  s64Int ProcNumUpdates; /* number of updates per processor */

#ifdef RA_TIME_BOUND
  s64Int GlbNumUpdates;  /* for reduction */
#endif

  static long llpSync[_SHMEM_BCAST_SYNC_SIZE];
  static long long int llpWrk[_SHMEM_REDUCE_SYNC_SIZE];

  static long ipSync[_SHMEM_BCAST_SYNC_SIZE];
  static int ipWrk[_SHMEM_REDUCE_SYNC_SIZE];

  FILE *outFile = NULL;
  double *GUPs;
  double *temp_GUPs;


  int numthreads;


  for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1){
        ipSync[i] = _SHMEM_SYNC_VALUE;
        llpSync[i] = _SHMEM_SYNC_VALUE;
  }


  params->SHMEMGUPs = -1;
  GUPs = &params->SHMEMGUPs;

  NumProcs = shmem_n_pes();
  MyProc = shmem_my_pe();

  if (0 == MyProc) {
    outFile = stdout;
    setbuf(outFile, NULL);
  }

  params->HPLMaxProcMem = 200000;

  TotalMem = params->HPLMaxProcMem; /* max single node memory */
  TotalMem *= NumProcs;             /* max memory in NumProcs nodes */

  TotalMem /= sizeof(u64Int);

  /* calculate TableSize --- the size of update array (must be a power of 2) */
  for (TotalMem *= 0.5, logTableSize = 0, TableSize = 1;
       TotalMem >= 1.0;
       TotalMem *= 0.5, logTableSize++, TableSize <<= 1)
    ; /* EMPTY */


  /* determine whether the number of processors is a power of 2 */
  if ( (NumProcs & (NumProcs -1)) == 0) {
    PowerofTwo = HPCC_TRUE;
    Remainder = 0;
    Top = 0;
    MinLocalTableSize = (TableSize / NumProcs);
    LocalTableSize = MinLocalTableSize;
    GlobalStartMyProc = (MinLocalTableSize * MyProc);
  }
  else {
    if(MyProc == 0) {
        printf("Number of processes must be power of 2\n");

    }
    return 0;
  }
  sAbort = 0;
  HPCC_Table = HPCC_XMALLOC( s64Int, LocalTableSize );

  if (! HPCC_Table) sAbort = 1;



  shmem_barrier_all();
  shmem_int_sum_to_all(&rAbort, &sAbort, 1, 0, 0, NumProcs, ipWrk, ipSync);
  shmem_barrier_all();

  if (rAbort > 0) {
    if (MyProc == 0) fprintf(outFile, "Failed to allocate memory for the main table.\n");
    /* check all allocations in case there are new added and their order changes */
    if (HPCC_Table) HPCC_free( HPCC_Table );
    goto failed_table;
  }

  params->SHMEMRandomAccess_N = (s64Int)TableSize;

  /* Default number of global updates to table: 4x number of table entries */
  NumUpdates_Default = 4 * TableSize;
  ProcNumUpdates = 4*LocalTableSize;
  NumUpdates = NumUpdates_Default;

  if (MyProc == 0) {
    fprintf( outFile, "Running on %d processors%s\n", NumProcs, PowerofTwo ? " (PowerofTwo)" : "");
    fprintf( outFile, "Total Main table size = 2^" FSTR64 " = " FSTR64 " words\n",
             logTableSize, TableSize );
    if (PowerofTwo)
        fprintf( outFile, "PE Main table size = 2^" FSTR64 " = " FSTR64 " words/PE\n",
                 (logTableSize - logNumProcs), TableSize/NumProcs );
      else
        fprintf( outFile, "PE Main table size = (2^" FSTR64 ")/%d  = " FSTR64 " words/PE MAX\n",
                 logTableSize, NumProcs, LocalTableSize);

    fprintf( outFile, "Default number of updates (RECOMMENDED) = " FSTR64 "\n", NumUpdates_Default);
    params->SHMEMRandomAccess_ExeUpdates = NumUpdates;
  }

  /* Initialize main table */
  for (i=0; i<LocalTableSize; i++)
    HPCC_Table[i] = i + GlobalStartMyProc;

  shmem_barrier_all();

  RealTime = -RTSEC();

  Power2NodesRandomAccessUpdate(logTableSize, TableSize, LocalTableSize,
                                     MinLocalTableSize, GlobalStartMyProc, Top,
                                     logNumProcs, NumProcs, Remainder,
                                     MyProc, ProcNumUpdates);

  shmem_barrier_all();

  /* End timed section */

  RealTime += RTSEC();

  /* Print timing results */
  if (MyProc == 0){
    params->SHMEMRandomAccess_time = RealTime;
    *GUPs = 1e-9*NumUpdates / RealTime;
    fprintf( outFile, "Real time used = %.6f seconds\n", RealTime );
    fprintf( outFile, "%.9f Billion(10^9) Updates    per second [GUP/s]\n",
             *GUPs );
    fprintf( outFile, "%.9f Billion(10^9) Updates/PE per second [GUP/s]\n",
             *GUPs / NumProcs );
    /* No longer reporting per CPU number */
    /* *GUPs /= NumProcs; */
  }
  /* distribute result to all nodes */
  temp_GUPs = GUPs;
  shmem_barrier_all();
  shmem_broadcast64(GUPs,temp_GUPs,1,0,0,0,NumProcs,llpSync);
  shmem_barrier_all();

  /* Verification phase */

  /* Begin timing here */

  RealTime = -RTSEC();


  HPCC_Power2NodesSHMEMRandomAccessCheck(logTableSize, TableSize, LocalTableSize,
                                    GlobalStartMyProc,
                                    logNumProcs, NumProcs,
                                    MyProc, ProcNumUpdates,
                                    &NumErrors);

  shmem_barrier_all(); 
  shmem_longlong_sum_to_all( &GlbNumErrors,  &NumErrors, 1, 0,0, NumProcs,llpWrk, llpSync);
  shmem_barrier_all(); 

  /* End timed section */

  RealTime += RTSEC();

  if(MyProc == 0){
    params->SHMEMRandomAccess_CheckTime = RealTime;

    fprintf( outFile, "Verification:  Real time used = %.6f seconds\n", RealTime);
    fprintf( outFile, "Found " FSTR64 " errors in " FSTR64 " locations (%s).\n",
             GlbNumErrors, TableSize, (GlbNumErrors <= 0.01*TableSize) ?
             "passed" : "failed");
    if (GlbNumErrors > 0.01*TableSize) params->Failure = 1;
    params->SHMEMRandomAccess_Errors = (s64Int)GlbNumErrors;
    params->SHMEMRandomAccess_ErrorsFraction = (double)GlbNumErrors / (double)TableSize;
    params->SHMEMRandomAccess_Algorithm = 1;
  }
  /* End verification phase */


  /* Deallocate memory (in reverse order of allocation which should
     help fragmentation) */

  HPCC_free( HPCC_Table );
  failed_table:

  if (0 == MyProc) if (outFile != stderr) fclose( outFile );

  shmem_barrier_all();

  return 0;
}