示例#1
0
int main(void)
{
    int i, me, npes;
    int errors = 0;

    shmem_init();

    me = shmem_my_pe();
    npes = shmem_n_pes();

    for (i = 0; i < NELEM; i++) {
        src[i] = me;
        dst_max[i] = -1;
        dst_min[i] = -1;
    }

    for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) {
        max_psync[i] = SHMEM_SYNC_VALUE;
        max_psync[i] = SHMEM_SYNC_VALUE;
    }

    if (me == 0)
        printf("Shrinking active set test\n");

    shmem_barrier_all();

    /* A total of npes tests are performed, where the active set in each test
     * includes PEs i..npes-1 */
    for (i = 0; i <= me; i++) {
        int j;

        if (me == i)
            printf(" + PE_start=%d, logPE_stride=0, PE_size=%d\n", i, npes-i);

        shmem_long_max_to_all(dst_max, src, NELEM, i, 0, npes-i, max_pwrk, max_psync);

        /* Validate reduced data */
        for (j = 0; j < NELEM; j++) {
            long expected = npes-1;
            if (dst_max[j] != expected) {
                printf("%d: Max expected dst_max[%d] = %ld, got dst_max[%d] = %ld, iteration %d\n",
                       me, j, expected, j, dst_max[j], i);
                errors++;
            }
        }

        shmem_long_min_to_all(dst_min, src, NELEM, i, 0, npes-i, min_pwrk, min_psync);

        /* Validate reduced data */
        for (j = 0; j < NELEM; j++) {
            long expected = i;
            if (dst_min[j] != expected) {
                printf("%d: Min expected dst_min[%d] = %ld, got dst_min[%d] = %ld, iteration %d\n",
                       me, j, expected, j, dst_min[j], i);
                errors++;
            }
        }

    }

    shmem_finalize();

    return errors != 0;
}
int main(int argc, char **argv)
{
  int i,j,iter;
  int my_pe,n_pes;
  int *flag,*one;
  size_t max_elements,max_elements_bytes;
  size_t elements[16] = {1,2,4,8,12,16,24,32,64,128,256,512,1024,2048,4096,8192};
  int num_elements = 16;

  short *srce_short,*targ_short;
  int *srce_int,*targ_int;
  long *srce_long,*targ_long;
  float *srce_float,*targ_float;
  double *srce_double,*targ_double;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
  flag = shmem_malloc((size_t) sizeof(int));
  one  = shmem_malloc((size_t) sizeof(int));
  *one  = 1;

/*  fail if trying to use odd number of processors  */
  if ( (n_pes % 2) != 0 ){
        fprintf(stderr, "FAIL - test requires even number of PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_both_put_nb_size(%s)\n", argv[0]);

/*  alloc arrays   */

  max_elements = (size_t) (MAX_SIZE / sizeof(int));
  max_elements_bytes = (size_t) (sizeof(int)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_int_put_nb        max_elements = %d\n",max_elements);
  srce_int = shmem_malloc(max_elements_bytes);
  targ_int = shmem_malloc(max_elements_bytes);
  if((srce_int == NULL) || (targ_int == NULL))
    shmalloc_error();

  max_elements = (size_t) (MAX_SIZE / sizeof(short));
  max_elements_bytes = (size_t) (sizeof(short)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_short_put         max_elements = %d\n",max_elements);
  srce_short = shmem_malloc(max_elements_bytes);
  targ_short = shmem_malloc(max_elements_bytes);
  if((srce_short == NULL) || (targ_short == NULL))
    shmalloc_error();

   max_elements = (size_t) (MAX_SIZE / sizeof(long));
  max_elements_bytes = (size_t) (sizeof(long)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_long_put_nb       max_elements = %d\n",max_elements);
  srce_long = shmem_malloc(max_elements_bytes);
  targ_long = shmem_malloc(max_elements_bytes);
  if((srce_long == NULL) || (targ_long == NULL))
    shmalloc_error();

  max_elements = (size_t) (MAX_SIZE / sizeof(float));
  max_elements_bytes = (size_t) (sizeof(float)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_float_put_nb      max_elements = %d\n",max_elements);
  srce_float = shmem_malloc(max_elements_bytes);
  targ_float = shmem_malloc(max_elements_bytes);
  if((srce_float == NULL) || (targ_float == NULL))
    shmalloc_error();

  max_elements = (size_t) (MAX_SIZE / sizeof(double));
  max_elements_bytes = (size_t) (sizeof(double)*max_elements);
  if(my_pe == 0)
    fprintf(stderr,"shmem_double_put_nb     max_elements = %d\n",max_elements);
  srce_double = shmem_malloc(max_elements_bytes);
  targ_double = shmem_malloc(max_elements_bytes);
  if((srce_double == NULL) || (targ_double == NULL))
    shmalloc_error();

  if(my_pe == 0)
    fprintf(stderr,"Actual value used for   max_elements = %d\n",max_elements);
  /* try the different sizes MAX_ITER times */
  for (iter = 0; iter < MAX_ITER; iter++) {
   for (i = 0; i < num_elements; i++) {
    *flag = 0;
    if (elements[i] <= max_elements) {
     if ( (my_pe % 2) == 0 )
       for(j = 0; j < elements[i]; j++) {
         srce_short[j] = (short)(my_pe+j);
         srce_int[j] = (int)(iter*10000+elements[i]*100+my_pe+j);
         srce_long[j] = (long)(iter*10000+elements[i]*100+my_pe+j);
         srce_float[j] = (float)(iter*10000+elements[i]*100+my_pe+j);
         srce_double[j] = (double)(iter*10000+elements[i]*100+my_pe+j);
       }
     else
       for(j = 0; j < elements[i]; j++) {
         targ_short[j] = (short)(my_pe+j);
         targ_int[j] = (int)(iter*10000+elements[i]*100+my_pe+j);
         targ_long[j] = (long)(iter*10000+elements[i]*100+my_pe+j);
         targ_float[j] = (float)(iter*10000+elements[i]*100+my_pe+j);
         targ_double[j] = (double)(iter*10000+elements[i]*100+my_pe+j);
       }
     shmem_barrier_all();
     if ( (my_pe % 2) == 0 ) {
#ifndef OPENSHMEM
       shmemx_int_put_nb(targ_int,srce_int,elements[i],my_pe+1,NULL);
       shmemx_long_put_nb(targ_long,srce_long,elements[i],my_pe+1,NULL);
       shmemx_float_put_nb(targ_float,srce_float,elements[i],my_pe+1,NULL);
       shmemx_double_put_nb(targ_double,srce_double,elements[i],my_pe+1,NULL);
#else
       shmem_int_put_nbi(targ_int,srce_int,elements[i],my_pe+1);
       shmem_long_put_nbi(targ_long,srce_long,elements[i],my_pe+1);
       shmem_float_put_nbi(targ_float,srce_float,elements[i],my_pe+1);
       shmem_double_put_nbi(targ_double,srce_double,elements[i],my_pe+1);
#endif
       /* this one is blocking */
       shmem_short_put(targ_short,srce_short,elements[i],my_pe+1);
       shmem_quiet();
       shmem_int_put(flag,one,(size_t)1,my_pe+1);
     } else {
       shmem_int_wait(flag,0);
       for(j = 0; j < elements[i]; j++) {
         if ( targ_short[j] != (short)(my_pe+j-1) )
           fprintf(stderr,
           "FAIL: PE [%d] iter=%d i=%d targ_short[%d]=%d not equal %d\n",
              my_pe,iter,i,j,targ_short[j],my_pe+j-1);
         if ( targ_int[j] != (int)(iter*10000+elements[i]*100+my_pe+j-1) )
           fprintf(stderr, 
           "FAIL: PE [%d] iter=%d i=%d targ_int[%d]=%d not equal %d\n",
              my_pe,iter,i,j,targ_int[j],iter*10000+elements[i]*100+my_pe+j-1);
         if ( targ_long[j] != (long)(iter*10000+elements[i]*100+my_pe+j-1) )
           fprintf(stderr,
           "FAIL: PE [%d] iter=%d i=%d targ_long[%d]=%d not equal %d\n",
              my_pe,iter,i,j,targ_long[j],iter*10000+elements[i]*100+my_pe+j-1);
         if ( targ_float[j] != (float)(iter*10000+elements[i]*100+my_pe+j-1) )
           fprintf(stderr,
           "FAIL: PE [%d] iter=%d i=%d targ_long[%d]=%f not equal %d\n",
              my_pe,iter,i,j,targ_float[j],iter*10000+elements[i]*100+my_pe+j-1);
         if ( targ_double[j] != (double)(iter*10000+elements[i]*100+my_pe+j-1) )
           fprintf(stderr,
           "FAIL: PE [%d] iter=%d i=%d targ_double[%d]=%f not equal %d\n",
              my_pe,iter,i,j,targ_double[j],iter*10000+elements[i]*100+my_pe+j-1);
         }
     }
    }
   }
  }
  shmem_free(srce_short);  shmem_free(targ_short);
  shmem_free(srce_int);  shmem_free(targ_int);
  shmem_free(srce_long);  shmem_free(targ_long);
  shmem_free(srce_float);  shmem_free(targ_float);
  shmem_free(srce_double);  shmem_free(targ_double);
#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
示例#3
0
int
main(int argc, char** argv)
{
#ifndef SEQUENTIAL
  TM2C_INIT;
#else
  SEQ_INIT;
#endif

  struct option long_options[] =
    {
      // These options don't set a flag
      {"help", no_argument, NULL, 'h'},
      {"verbose", no_argument, NULL, 'v'},
      {"duration", required_argument, NULL, 'd'},
      {"initial-size", required_argument, NULL, 'i'},
      {"range", required_argument, NULL, 'r'},
      {"update-rate", required_argument, NULL, 'u'},
      {"elasticity", required_argument, NULL, 'x'},
      {"effective", required_argument, NULL, 'f'},
      {NULL, 0, NULL, 0}
    };

  intset_t* set;
  int i, c, size;
  val_t last = 0;
  val_t val = 0;
  thread_data_t* data;
  double duration = DEFAULT_DURATION;
  int initial = DEFAULT_INITIAL;
  int nb_app_cores = NUM_APP_NODES;
#if defined(SEQUENTIAL)
  nb_app_cores = 1;
#endif
  long range = DEFAULT_RANGE;
  int update = DEFAULT_UPDATE;
  int unit_tx = DEFAULT_ELASTICITY;
  int alternate = DEFAULT_ALTERNATE;
  int effective = DEFAULT_EFFECTIVE;
  int verbose = DEFAULT_VERBOSE;
  unsigned int seed = 0;

  while (1) 
    {
      i = 0;
      c = getopt_long(argc, argv, "hAf:d:i:r:u:x:v", long_options, &i);

      if (c == -1)
	break;

      if (c == 0 && long_options[i].flag == 0)
	c = long_options[i].val;

      switch (c) {
      case 0:
	/* Flag is automatically set */
	break;
      case 'h':
	ONCE
	  {
	    printf("intset -- STM stress test "
		   "(linked list)\n"
		   "\n"
		   "Usage:\n"
		   "  intset [options...]\n"
		   "\n"
		   "Options:\n"
		   "  -h, --help\n"
		   "        Print this message\n"
		   "  -A, --alternate (default="XSTR(DEFAULT_ALTERNATE)")\n"
		   "        Consecutive insert/remove target the same value\n"
		   "  -f, --effective <int>\n"
		   "        update txs must effectively write (0=trial, 1=effective, default=" XSTR(DEFAULT_EFFECTIVE) ")\n"
		   "  -d, --duration secs<double>\n"
		   "        Test duration in milliseconds (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n"
		   "  -i, --initial-size <int>\n"
		   "        Number of elements to insert before test (default=" XSTR(DEFAULT_INITIAL) ")\n"
		   "  -r, --range <int>\n"
		   "        Range of integer values inserted in set (default=" XSTR(DEFAULT_RANGE) ")\n"
		   "  -u, --update-rate <int>\n"
		   "        Percentage of update transactions (default=" XSTR(DEFAULT_UPDATE) ")\n"
		   "  -v , --verbose\n"
		   "        Print detailed stats"
		   );
	  }
	goto end;
      case 'A':
	alternate = 1;
	break;
      case 'f':
	effective = atoi(optarg);
	break;
      case 'd':
	duration = atof(optarg);
	break;
      case 'i':
	initial = atoi(optarg);
	break;
      case 'r':
	range = atol(optarg);
	break;
      case 'u':
	update = atoi(optarg);
	break;
      case 'x':
	unit_tx = atoi(optarg);
	break;
      case 'v':
	verbose = 1;
	break;
      case '?':
	ONCE
	  {
	    printf("Use -h or --help for help\n");
	  }
      default:
	goto end;
      }
    }

 if (seed == 0)
    {
      srand_core();
      seed = rand_range((NODE_ID() + 17) * 123);
      srand(seed);
    }
  else
    srand(seed);

  assert(duration >= 0);
  assert(initial >= 0);
  assert(nb_app_cores > 0);
  assert(range > 0 && range >= initial);
  assert(update >= 0 && update <= 100);

  ONCE
    {
      printf("Bench type   : linked list\n");
#ifdef SEQUENTIAL
      printf("                sequential\n");
#elif defined(EARLY_RELEASE )
      printf("                using early-release\n");
#elif defined(READ_VALIDATION)
      printf("                using read-validation\n");
#endif
#ifdef LOCKS
      printf("                  with locks\n");
#endif
      printf("Duration     : %f\n", duration);
      printf("Initial size : %d\n", initial);
      printf("Nb cores     : %d\n", nb_app_cores);
      printf("Value range  : %ld\n", range);
      printf("Update rate  : %d\n", update);
      printf("Elasticity   : %d\n", unit_tx);
      printf("Alternate    : %d\n", alternate);
      printf("Effective    : %d\n", effective);
      FLUSH;
    }

  if ((data = (thread_data_t*) malloc(sizeof (thread_data_t))) == NULL)
    {
      perror("malloc");
      exit(1);
    }

  set = set_new();

  BARRIER;

  ONCE
    {
      /* Populate set */
      /* printf("Adding %d entries to set\n", initial); */
      i = 0;
      while (i < initial) {
	val = rand_range(range);
	if (set_add(set, val, 0)) {
	  last = val;
	  i++;
	}
      }
      size = set_size(set);
      /* set_print(set); */
      printf("Set size     : %d\n", size);
      assert(size == initial);
      FLUSH
	}

  shmem_init(10 * 1024 * (NODE_ID()-1) * sizeof (node_t) + ((initial + 2) * sizeof (node_t)));

  /* Access set from all threads */
  data->first = last;
  data->range = range;
  data->update = update;
  data->unit_tx = unit_tx;
  data->alternate = alternate;
  data->effective = effective;
  data->nb_add = 0;
  data->nb_added = 0;
  data->nb_remove = 0;
  data->nb_removed = 0;
  data->nb_contains = 0;
  data->nb_found = 0;
  data->set = set;
  data->seed = seed;

  BARRIER;
  /* Start */
  test(data, duration);

  if (verbose)
    {
      APP_EXEC_ORDER
	{
	  printf("-- Core %d\n", NODE_ID());
	  printf("  #add        : %lu\n", data->nb_add);
	  printf("    #added    : %lu\n", data->nb_added);
	  printf("  #remove     : %lu\n", data->nb_remove);
	  printf("    #removed  : %lu\n", data->nb_removed);
	  printf("  #contains   : %lu\n", data->nb_contains);
	  printf("    #found    : %lu\n", data->nb_found);
	  printf("---------------------------------------------------");
	  FLUSH;
	} APP_EXEC_ORDER_END;
    }
  /* Delete set */

  BARRIER;

  ONCE
    {
      int size_after = set_size(set);
      /* set_print(set); */
      printf("Set size (af): %u\n", size_after);
    }

  BARRIER;
  
#ifdef SEQUENTIAL
  int total_ops = data->nb_add + data->nb_contains + data->nb_remove;
  printf("#Ops          : %d\n", total_ops);
  printf("#Ops/s        : %d\n", (int) (total_ops / duration__));
  printf("#Latency      : %f\n", duration__ / total_ops);
  FLUSH;
#endif

  //set_delete(set);

  /* Cleanup STM */

  free(data);
  BARRIER;

 end:
#ifndef SEQUENTIAL
  TM_END;
#endif

  EXIT(0);
}
int
main(void)
{
    int i;
    int nextpe;
    int me, npes;
    int success1, success2, success3, success4,
        success5, success6, success7, success8;
    short dest1[N];
    int dest2[N];
    long dest3[N];
    long double dest4[N];
    long long dest5[N];
    double dest6[N];
    float dest7[N];
    char *dest8;
    short dest9;
    int dest10;
    long dest11;
    double dest12;
    float dest13;
    short *src1;
    int *src2;
    long *src3;
    long double *src4;
    long long *src5;
    double *src6;
    float *src7;
    char *src8;
    short *src9;
    int *src10;
    long *src11;
    double *src12;
    float *src13;

    shmem_init();
    me = shmem_my_pe();
    npes = shmem_n_pes();

    if (npes > 1) {
        success1 = 0;
        success2 = 0;
        success3 = 0;
        success4 = 0;
        success5 = 0;
        success6 = 0;
        success7 = 0;
        success8 = 0;
        dest8 = (char *) malloc(N * sizeof(char));

        for (i = 0; i < N; i += 1) {
            dest1[i] = -9;
            dest2[i] = -9;
            dest3[i] = -9;
            dest4[i] = -9;
            dest5[i] = -9;
            dest6[i] = -9;
            dest7[i] = -9.0;
            dest8[i] = -9;
        }
        dest9 = -9;
        dest10 = -9;
        dest11 = -9;
        dest12 = -9;
        dest13 = -9;
        src1 = (short *) shmem_malloc(N * sizeof(*src1));
        src2 = (int *) shmem_malloc(N * sizeof(*src2));
        src3 = (long *) shmem_malloc(N * sizeof(*src3));
        src4 = (long double *) shmem_malloc(N * sizeof(*src4));
        src5 = (long long *) shmem_malloc(N * sizeof(*src5));
        src6 = (double *) shmem_malloc(N * sizeof(*src6));
        src7 = (float *) shmem_malloc(N * sizeof(*src7));
        src8 = (char *) shmem_malloc(4 * sizeof(*src8));
        src9 = (short *) shmem_malloc(sizeof(*src9));
        src10 = (int *) shmem_malloc(sizeof(*src10));
        src11 = (long *) shmem_malloc(sizeof(*src11));
        src12 = (double *) shmem_malloc(sizeof(*src12));
        src13 = (float *) shmem_malloc(sizeof(*src13));
        for (i = 0; i < N; i += 1) {
            src1[i] = (short) me;
            src2[i] = me;
            src3[i] = (long) me;
            src4[i] = (long double) me;
            src5[i] = (long long) me;
            src6[i] = (double) me;
            src7[i] = (float) me;
            src8[i] = (char) me;
        } *src9 = (short) me;
        *src10 = me;
        *src11 = (long) me;
        *src12 = (double) me;
        *src13 = (float) me;
        nextpe = (me + 1) % npes;

        /* Testing shmem_short_get, shmem_short_get, shmem_int_get,
           shmem_long_get, shmem_longdouble_get, shmem_longlong_get,
           shmem_double_get, shmem_float_get, shmem_getmem */
        shmem_barrier_all();
        shmem_short_get(dest1, src1, N, nextpe);
        shmem_int_get(dest2, src2, N, nextpe);
        shmem_long_get(dest3, src3, N, nextpe);
        shmem_longdouble_get(dest4, src4, N, nextpe);
        shmem_longlong_get(dest5, src5, N, nextpe);
        shmem_double_get(dest6, src6, N, nextpe);
        shmem_float_get(dest7, src7, N, nextpe);
        shmem_getmem(dest8, src8, N * sizeof(char), nextpe);
        shmem_barrier_all();
        if (me == 0) {
            for (i = 0; i < N; i += 1) {
                if (dest1[i] != (1)) {
                    success1 = 1;
                }
                if (dest2[i] != (1)) {
                    success2 = 1;
                }
                if (dest3[i] != (1)) {
                    success3 = 1;
                }
                if (dest4[i] != (1)) {
                    success4 = 1;
                }
                if (dest5[i] != (1)) {
                    success5 = 1;
                }
                if (dest6[i] != (1)) {
                    success6 = 1;
                }
                if (dest7[i] != (1)) {
                    success7 = 1;
                }
                if (dest8[i] != (1)) {
                    success8 = 1;
                }
            }
            if (success1 == 0)
                printf("Test shmem_short_get: Passed\n");

            else
                printf("Test shmem_short_get: Failed\n");
            if (success2 == 0)
                printf("Test shmem_int_get: Passed\n");

            else
                printf("Test shmem_int_get: Failed\n");
            if (success3 == 0)
                printf("Test shmem_long_get: Passed\n");

            else
                printf("Test shmem_long_get: Failed\n");
            if (success4 == 0)
                printf("Test shmem_longdouble_get: Passed\n");

            else
                printf("Test shmem_longdouble_get: Failed\n");
            if (success5 == 0)
                printf("Test shmem_longlong_get: Passed\n");

            else
                printf("Test shmem_longlong_get: Failed\n");
            if (success6 == 0)
                printf("Test shmem_double_get: Passed\n");

            else
                printf("Test shmem_double_get: Failed\n");
            if (success7 == 0)
                printf("Test shmem_float_get: Passed\n");

            else
                printf("Test shmem_float_get: Failed\n");
            if (success8 == 0)
                printf("Test shmem_getmem: Passed\n");

            else
                printf("Test shmem_getmem: Failed\n");
        }
        shmem_barrier_all();

        /* Testing shmem_get32, shmem_get64, shmem_get128 */
        if (sizeof(int) == 4) {
            for (i = 0; i < N; i += 1) {
                dest2[i] = -9;
                dest3[i] = -9;
                dest4[i] = -9;
            }
            success2 = 0;
            success3 = 0;
            success4 = 0;
            shmem_barrier_all();
            shmem_get32(dest2, src2, N, nextpe);
            shmem_get64(dest3, src3, N, nextpe);
            shmem_get128(dest4, src4, N, nextpe);
            shmem_barrier_all();
            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest2[i] != (1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (1)) {
                        success3 = 1;
                    }
                    if (dest4[i] != (1)) {
                        success4 = 1;
                    }
                }
                if (success2 == 0)
                    printf("Test shmem_get32: Passed\n");

                else
                    printf("Test shmem_get32: Failed\n");
                if (success3 == 0)
                    printf("Test shmem_get64: Passed\n");

                else
                    printf("Test shmem_get64: Failed\n");
                if (success4 == 0)
                    printf("Test shmem_get128: Passed\n");

                else
                    printf("Test shmem_get128: Failed\n");
            }
        }

        else if (sizeof(int) == 8) {
            for (i = 0; i < N; i += 1) {
                dest1[i] = -9;
                dest2[i] = -9;
                dest3[i] = -9;
            }
            success1 = 0;
            success2 = 0;
            success3 = 0;
            shmem_barrier_all();
            shmem_get32(dest1, src1, N, nextpe);
            shmem_get64(dest2, src2, N, nextpe);
            shmem_get128(dest3, src3, N, nextpe);
            shmem_barrier_all();
            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest1[i] != (1)) {
                        success1 = 1;
                    }
                    if (dest2[i] != (1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (1)) {
                        success3 = 1;
                    }
                }
                if (success1 == 0)
                    printf("Test shmem_get32: Passed\n");

                else
                    printf("Test shmem_get32: Failed\n");
                if (success2 == 0)
                    printf("Test shmem_get64: Passed\n");

                else
                    printf("Test shmem_get64: Failed\n");
                if (success3 == 0)
                    printf("Test shmem_get128: Passed\n");

                else
                    printf("Test shmem_get128: Failed\n");
            }
        }

        /* Testing shmem_double_g, shmem_float_g, shmem_int_g, shmem_long_g,
           shmem_short_g */
        shmem_barrier_all();
        dest9 = shmem_short_g(src9, nextpe);
        dest10 = shmem_int_g(src10, nextpe);
        dest11 = shmem_long_g(src11, nextpe);
        dest12 = shmem_double_g(src12, nextpe);
        dest13 = shmem_float_g(src13, nextpe);
        shmem_barrier_all();
        if (me == 0) {
            if (dest9 == 1)
                printf("Test shmem_short_g: Passed\n");

            else
                printf("Test shmem_short_g: Failed\n");
            if (dest10 == 1)
                printf("Test shmem_int_g: Passed\n");

            else
                printf("Test shmem_int_g: Failed\n");
            if (dest11 == 1)
                printf("Test shmem_long_g: Passed\n");

            else
                printf("Test shmem_long_g: Failed\n");
            if (dest12 == 1)
                printf("Test shmem_double_g: Passed\n");

            else
                printf("Test shmem_double_g: Failed\n");
            if (dest13 == 1)
                printf("Test shmem_float_g: Passed\n");

            else
                printf("Test shmem_float_g: Failed\n");
        }
        shmem_barrier_all();
        shmem_free(src1);
        shmem_free(src2);
        shmem_free(src3);
        shmem_free(src4);
        shmem_free(src5);
        shmem_free(src6);
        shmem_free(src7);
        shmem_free(src8);
    }

    else {
        printf("Number of PEs must be > 1 to test shmem get, test skipped\n");
    }

    shmem_finalize();

    return 0;
}
int main(int argc, char **argv)
{
  int i,j;
  long modj,oldj,oldxmodj,oldxa;
  int my_pe,n_pes;
  size_t max_elements,max_elements_bytes;
  static long *x,*xa;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
#ifdef HAVE_SET_CACHE_INV
  shmem_set_cache_inv();
#endif

/*  fail if trying to use only one processor  */
  if ( n_pes  <= 1 ){
        fprintf(stderr, "FAIL - test requires at least two PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_long_swap(%s) n_pes=%d\n", argv[0],n_pes);

/*  shmalloc x & xa on all pes (only use the ones on PE 0)  */

  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  x = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    x[i] = 0;
  max_elements_bytes = (size_t) (sizeof(long) * n_pes * ITER);
  xa = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes*ITER; i++)
    xa[i] = 0;
  count = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      oldj = shmem_long_finc(&count, 0);  /* get index oldj from PE 0 */
      modj = (oldj % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* record PE value in x[modj] */
      oldxmodj = shmem_long_swap(&x[modj], my_pe, 0); 
      /* printf("PE=%d,oldj=%ld,modj=%ld,oldxmodj=%ld\n",my_pe,oldj,modj,oldxmodj); */
        /* record PE value in xa[oldj] -- tells PE involved for each count */
      oldxa = shmem_long_swap(&xa[oldj], my_pe, 0);
      /* printf("PE=%d,i=%d,oldj=%ld,oldxa=%ld\n",my_pe,i,oldj,oldxa); */
      if (oldxa != 0)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxa = %ld expected = 0\n",
                         my_pe, n_pes, i, oldxa);
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check last x[j] array PEs vs saved ans in xa[i] */
    i = (ITER-1)*(n_pes-1);
    for(j=1 ; j<n_pes; j++) {
      printf("j=%d,x[%d]=%ld,xa[%d]=%ld\n",j,j-1,x[j-1],i,xa[i]);
      if (x[j-1] != xa[i])
        fprintf(stderr, "FAIL PE %d of %d: x[%d] = %ld expected = %ld\n", 
                         my_pe, n_pes, j-1, x[j-1], ITER);
      i++;
    }
  }

  shmem_barrier_all();
#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
示例#6
0
int main(int argc, char **argv)
{
    int        me, nproc;
    int        c, all_ops = 1;
    int        T = 0, S = 0, P = 0;
    const int  DEFAULT_ITR = 7;
    int        iterations = DEFAULT_ITR;

    shmem_init();

    me = shmem_my_pe();
    nproc = shmem_n_pes();

    memset(target, -1, NUM_WRITE * sizeof(int));
    memset(source, -1, NUM_READ * sizeof(int));
    memset(sync_pes, -1, NUM_SYNC * sizeof(int));

    shmem_barrier_all();

    if (nproc != 2) {
        if (me == 0) {
            fprintf(stderr, "This is a micro test and is only "
                    "intended to run on exactly two processes you"
                    " are using %d\n", nproc);
        }
        shmem_finalize();
        return 0;
    }

    while ((c = getopt(argc, argv, "i:vdpgaAscfFh")) != -1) {
        switch (c) {
            case 'i':
                iterations = atoi(optarg);
                assert(iterations > 0);
                all_ops += 2;
                break;
            case 'v':
                verbose = 1;
                all_ops++;
                break;
            case 'd':
                debug = 1;
                break;
            case 'p':
                putfence(me, iterations, T++);
                break;
            case 'g':
                gettest(me, iterations, T++, S++, P++);
                break;
            case 'a':
                atomic_add(me, iterations, T++);
                break;
            case 'A':
                atomic_inc(me, iterations, T++);
                break;
            case 's':
                swaptest(me, iterations, T++, S++, P++);
                break;
            case 'c':
                cswaptest(me, iterations, T++, S++, P++);
                break;
            case 'f':
                fetchatomic_add(me, iterations, T++, S++);
                break;
            case 'F':
                fetchatomic_inc(me, iterations, T++, S++);
                break;
            case 'h':
            default:
                if (me == 0) {
                    fprintf(stderr, "input options:\n 1) single"
                            " argument option will run all tests by default"
                            "and additionally request:  -v (verbose) | "
                            "-i <number of interations>\n");
                    fprintf(stderr, " 2) two argument options "
                            "choose any combination of the following "
                            "to run individual tests:  -i <iterations>, -v"
                            ", -d, -p, -g, -a, -A, -s, -c, -f, -F, -h\n");
                }
                shmem_finalize();
                return 1;
        }
    }

    if (argc == all_ops || argc == 1) {
        putfence(me, iterations,  T++);
        gettest(me, iterations, T++, S++, P++);
        atomic_add(me, iterations, T++);
        atomic_inc(me, iterations, T++);
        swaptest(me, iterations, T++, S++, P++);
        cswaptest(me, iterations, T++, S++, P++);
        fetchatomic_add(me, iterations, T++, S++);
        fetchatomic_inc(me, iterations, T++, S++);
    }

    if (verbose) {
        if (me == 1)
            printf("PE 1: PASS: %8d iterations\n", iterations);
        else
            printf("PE 0 Successful exit\n");
    }

    shmem_finalize();

    return 0;
}
int
main (void)
{
    int i;
    int *target;
    int *source;
    int me, npes;
    struct timeval start, end;
    long time_taken, start_time, end_time;

    shmem_init ();
    me = shmem_my_pe ();
    npes = shmem_n_pes ();

    source = (int *) shmem_malloc (N_ELEMENTS * sizeof (*source));

    time_taken = 0;

    for (i = 0; i < N_ELEMENTS; i += 1) {
        source[i] = (i + 1) * 10 + me;
    }
    target = (int *) shmem_malloc (N_ELEMENTS * sizeof (*target) * npes);
    for (i = 0; i < N_ELEMENTS * npes; i += 1) {
        target[i] = -90;
    }
    for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i += 1) {
        pSyncA[i] = _SHMEM_SYNC_VALUE;
        pSyncB[i] = _SHMEM_SYNC_VALUE;
    }
    shmem_barrier_all ();

    for (i = 0; i < 10000; i++) {
        gettimeofday (&start, NULL);

        start_time = (start.tv_sec * 1000000.0) + start.tv_usec;

        /* alternate between 2 pSync arrays to synchronize consequent
           collectives of even and odd iterations */
        if (i % 2) {
            shmem_fcollect32 (target, source, N_ELEMENTS, 0, 0, npes, pSyncA);
        }
        else {
            shmem_fcollect32 (target, source, N_ELEMENTS, 0, 0, npes, pSyncB);
        }

        gettimeofday (&end, NULL);

        end_time = (end.tv_sec * 1000000.0) + end.tv_usec;
        if (me == 0) {
            time_taken = time_taken + (end_time - start_time);
        }

    }
    if (me == 0) {
        printf
            ("Time required to collect %d bytes of data, with %d PEs is %ld microseconds\n",
             (4 * N_ELEMENTS * npes), npes, time_taken / 10000);
    }

    shmem_barrier_all ();
    shmem_free (target);
    shmem_free (source);

    shmem_finalize ();

    return 0;
}
示例#8
0
int
main (int argc, char **argv)
{
    int i;
    int nextpe;
    int me, npes;
    int success1, success2, success3, success4, success5, success6, success7,
        success8;

    short dest1[N];
    int dest2[N];
    long dest3[N];
    long double dest4[N];
    long long dest5[N];
    double dest6[N];
    float dest7[N];
    char *dest8;
    short dest9;
    int dest10;
    long dest11;
    double dest12;
    float dest13;

    int fail_count = 0;

    shmem_init ();
    me = shmem_my_pe ();
    npes = shmem_n_pes ();

    if (npes > 1) {

        success1 = 0;
        success2 = 0;
        success3 = 0;
        success4 = 0;
        success5 = 0;
        success6 = 0;
        success7 = 0;
        success8 = 0;
        dest8 = (char *) malloc (N * sizeof (char));

        for (i = 0; i < N; i += 1) {
            dest1[i] = -9;
            dest2[i] = -9;
            dest3[i] = -9;
            dest4[i] = -9;
            dest5[i] = -9;
            dest6[i] = -9;
            dest7[i] = -9.0;
            dest8[i] = -9;
        }
        dest9 = -9;
        dest10 = -9;
        dest11 = -9;
        dest12 = -9;
        dest13 = -9;

        for (i = 0; i < N; i += 1) {
            src1[i] = (short) me;
            src2[i] = me;
            src3[i] = (long) me;
            src4[i] = (long double) me;
            src5[i] = (long long) me;
            src6[i] = (double) me;
            src7[i] = (float) me;
            src8[i] = (char) me;
        }
        src9 = (short) me;
        src10 = me;
        src11 = (long) me;
        src12 = (double) me;
        src13 = (float) me;



        nextpe = (me + 1) % npes;

        /* Testing shmem_short_get, shmem_short_get, shmem_int_get,
           shmem_long_get, shmem_longdouble_get, shmem_longlong_get,
           shmem_double_get, shmem_float_get, shmem_getmem */
        shmem_barrier_all ();

        shmem_short_get (dest1, src1, N, nextpe);
        shmem_int_get (dest2, src2, N, nextpe);
        shmem_long_get (dest3, src3, N, nextpe);
        shmem_longdouble_get (dest4, src4, N, nextpe);
        shmem_longlong_get (dest5, src5, N, nextpe);
        shmem_double_get (dest6, src6, N, nextpe);
        shmem_float_get (dest7, src7, N, nextpe);
        shmem_getmem (dest8, src8, N * sizeof (char), nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            for (i = 0; i < N; i += 1) {
                if (dest1[i] != (1)) {
                    success1 = 1;
                }
                if (dest2[i] != (1)) {
                    success2 = 1;
                }
                if (dest3[i] != (1)) {
                    success3 = 1;
                }
                if (dest4[i] != (1)) {
                    success4 = 1;
                }
                if (dest5[i] != (1)) {
                    success5 = 1;
                }
                if (dest6[i] != (1)) {
                    success6 = 1;
                }
                if (dest7[i] != (1)) {
                    success7 = 1;
                }
                if (dest8[i] != (1)) {
                    success8 = 1;
                }
            }

            if (success1 == 0)
                printf ("Test shmem_short_get: Passed\n");
            else {
                printf ("Test shmem_short_get: Failed\n");
		fail_count++;
	    }
            if (success2 == 0)
                printf ("Test shmem_int_get: Passed\n");
            else {
                printf ("Test shmem_int_get: Failed\n");
		fail_count++;
	    }
            if (success3 == 0)
                printf ("Test shmem_long_get: Passed\n");
            else {
                printf ("Test shmem_long_get: Failed\n");
		fail_count++;
	    }
            if (success4 == 0)
                printf ("Test shmem_longdouble_get: Passed\n");
            else {
                printf ("Test shmem_longdouble_get: Failed\n");
		fail_count++;
	    }
            if (success5 == 0)
                printf ("Test shmem_longlong_get: Passed\n");
            else {
                printf ("Test shmem_longlong_get: Failed\n");
		fail_count++;
	    }
            if (success6 == 0)
                printf ("Test shmem_double_get: Passed\n");
            else {
                printf ("Test shmem_double_get: Failed\n");
		fail_count++;
	    }
            if (success7 == 0)
                printf ("Test shmem_float_get: Passed\n");
            else {
                printf ("Test shmem_float_get: Failed\n");
		fail_count++;
	    }
            if (success8 == 0)
                printf ("Test shmem_getmem: Passed\n");
            else {
                printf ("Test shmem_getmem: Failed\n");
		fail_count++;
	    }
        }
        shmem_barrier_all ();

        /* Testing shmem_get32, shmem_get64, shmem_get128 */
        if (sizeof (int) == 4) {
            for (i = 0; i < N; i += 1) {
                dest2[i] = -9;
                dest3[i] = -9;
                dest4[i] = -9;
            }
            success2 = 0;
            success3 = 0;
            success4 = 0;

            shmem_barrier_all ();

            shmem_get32 (dest2, src2, N, nextpe);
            shmem_get64 (dest3, src3, N, nextpe);
            shmem_get128 (dest4, src4, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest2[i] != (1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (1)) {
                        success3 = 1;
                    }
                    if (dest4[i] != (1)) {
                        success4 = 1;
                    }
                }
                if (success2 == 0)
                    printf ("Test shmem_get32: Passed\n");
                else {
                    printf ("Test shmem_get32: Failed\n");
		    fail_count++;
		}

                if (success3 == 0)
                    printf ("Test shmem_get64: Passed\n");
                else {
                    printf ("Test shmem_get64: Failed\n");
		    fail_count++;
		}

                if (success4 == 0)
                    printf ("Test shmem_get128: Passed\n");
                else {
                    printf ("Test shmem_get128: Failed\n");
		    fail_count++;
		}
            }
        }
        else if (sizeof (int) == 8) {
            for (i = 0; i < N; i += 1) {
                dest1[i] = -9;
                dest2[i] = -9;
                dest3[i] = -9;
            }
            success1 = 0;
            success2 = 0;
            success3 = 0;

            shmem_barrier_all ();

            shmem_get32 (dest1, src1, N, nextpe);
            shmem_get64 (dest2, src2, N, nextpe);
            shmem_get128 (dest3, src3, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest1[i] != (1)) {
                        success1 = 1;
                    }
                    if (dest2[i] != (1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (1)) {
                        success3 = 1;
                    }

                }
                if (success1 == 0)
                    printf ("Test shmem_get32: Passed\n");
                else {
                    printf ("Test shmem_get32: Failed\n");
		    fail_count++;
		}

                if (success2 == 0)
                    printf ("Test shmem_get64: Passed\n");
                else {
                    printf ("Test shmem_get64: Failed\n");
		    fail_count++;
		}

                if (success3 == 0)
                    printf ("Test shmem_get128: Passed\n");
                else {
                    printf ("Test shmem_get128: Failed\n");
		    fail_count++;
		}
            }
        }
        /* Testing shmem_iget32, shmem_iget64, shmem_iget128 */
        shmem_barrier_all ();
        if (sizeof (int) == 4) {
            for (i = 0; i < N; i += 1) {
                dest2[i] = -9;
                dest3[i] = -9;
                dest4[i] = -9;
            }
            success2 = 0;
            success3 = 0;
            success4 = 0;

            shmem_barrier_all ();

            shmem_iget32 (dest2, src2, 1, 2, N / 2, npes - 1);
            shmem_iget64 (dest3, src3, 1, 2, N / 2, npes - 1);
            shmem_iget128 (dest4, src4, 1, 2, N / 2, npes - 1);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N / 2; i += 1) {
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }
                    if (dest4[i] != (npes - 1)) {
                        success4 = 1;
                    }
                }
                if (success2 == 0)
                    printf ("Test shmem_iget32: Passed\n");
                else {
                    printf ("Test shmem_iget32: Failed\n");
		    fail_count++;
		}

                if (success3 == 0)
                    printf ("Test shmem_iget64: Passed\n");
                else {
                    printf ("Test shmem_iget64: Failed\n");
		    fail_count++;
		}

                if (success4 == 0)
                    printf ("Test shmem_iget128: Passed\n");
                else {
                    printf ("Test shmem_iget128: Failed\n");
		    fail_count++;
		}
            }
        }
        else if (sizeof (int) == 8) {
            for (i = 0; i < N; i += 1) {
                dest1[i] = -9;
                dest2[i] = -9;
                dest3[i] = -9;
            }
            success1 = 0;
            success2 = 0;
            success3 = 0;

            shmem_barrier_all ();

            shmem_iget32 (dest1, src1, 1, 2, N / 2, npes - 1);
            shmem_iget64 (dest2, src2, 1, 2, N / 2, npes - 1);
            shmem_iget128 (dest3, src3, 1, 2, N / 2, npes - 1);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N / 2; i += 1) {
                    if (dest1[i] != (npes - 1)) {
                        success1 = 1;
                    }
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }

                }
                if (success1 == 0)
                    printf ("Test shmem_iget32: Passed\n");
                else {
                    printf ("Test shmem_iget32: Failed\n");
		    fail_count++;
		}

                if (success2 == 0)
                    printf ("Test shmem_iget64: Passed\n");
                else {
                    printf ("Test shmem_iget64: Failed\n");
		    fail_count++;
		}

                if (success3 == 0)
                    printf ("Test shmem_iget128: Passed\n");
                else {
                    printf ("Test shmem_iget128: Failed\n");
		    fail_count++;
		}
            }
        }

        /* Testing shmem_short_iget, shmem_int_iget, shmem_long_iget,
           shmem_double_iget, shmem_float_iget */
        for (i = 0; i < N; i += 1) {
            dest1[i] = -9;
            dest2[i] = -9;
            dest3[i] = -9;
            dest6[i] = -9;
            dest7[i] = -9;
        }
        success1 = 0;
        success2 = 0;
        success3 = 0;
        success6 = 0;
        success7 = 0;

        shmem_barrier_all ();

        shmem_short_iget (dest1, src1, 1, 2, N / 2, npes - 1);
        shmem_int_iget (dest2, src2, 1, 2, N / 2, npes - 1);
        shmem_long_iget (dest3, src3, 1, 2, N / 2, npes - 1);
        shmem_double_iget (dest6, src6, 1, 2, N / 2, npes - 1);
        shmem_float_iget (dest7, src7, 1, 2, N / 2, npes - 1);

        shmem_barrier_all ();

        if (me == 0) {
            for (i = 0; i < N / 2; i += 1) {
                if (dest1[i] != (npes - 1)) {
                    success1 = 1;
                }
                if (dest2[i] != (npes - 1)) {
                    success2 = 1;
                }
                if (dest3[i] != (npes - 1)) {
                    success3 = 1;
                }
                if (dest6[i] != (npes - 1)) {
                    success6 = 1;
                }
                if (dest7[i] != (npes - 1)) {
                    success7 = 1;
                }
            }

            if (success1 == 0)
                printf ("Test shmem_short_iget: Passed\n");
            else {
                printf ("Test shmem_short_iget: Failed\n");
		fail_count++;
	    }
            if (success2 == 0)
                printf ("Test shmem_int_iget: Passed\n");
            else {
                printf ("Test shmem_int_iget: Failed\n");
		fail_count++;
	    }
            if (success3 == 0)
                printf ("Test shmem_long_iget: Passed\n");
            else {
                printf ("Test shmem_long_iget: Failed\n");
		fail_count++;
	    }
            if (success6 == 0)
                printf ("Test shmem_double_iget: Passed\n");
            else {
                printf ("Test shmem_double_iget: Failed\n");
		fail_count++;
	    }
            if (success7 == 0)
                printf ("Test shmem_float_iget: Passed\n");
            else {
                printf ("Test shmem_float_iget: Failed\n");
		fail_count++;
	    }
        }

        /* Testing shmem_double_g, shmem_float_g, shmem_int_g, shmem_long_g,
           shmem_short_g */
        shmem_barrier_all ();

        dest9 = shmem_short_g (&src9, nextpe);
        dest10 = shmem_int_g (&src10, nextpe);
        dest11 = shmem_long_g (&src11, nextpe);
        dest12 = shmem_double_g (&src12, nextpe);
        dest13 = shmem_float_g (&src13, nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            if (dest9 == 1)
                printf ("Test shmem_short_g: Passed\n");
            else {
                printf ("Test shmem_short_g: Failed\n");
		fail_count++;
	    }
            if (dest10 == 1)
                printf ("Test shmem_int_g: Passed\n");
            else {
                printf ("Test shmem_int_g: Failed\n");
		fail_count++;
	    }
            if (dest11 == 1)
                printf ("Test shmem_long_g: Passed\n");
            else {
                printf ("Test shmem_long_g: Failed\n");
		fail_count++;
	    }
            if (dest12 == 1)
                printf ("Test shmem_double_g: Passed\n");
            else {
                printf ("Test shmem_double_g: Failed\n");
		fail_count++;
	    }
            if (dest13 == 1)
                printf ("Test shmem_float_g: Passed\n");
            else {
                printf ("Test shmem_float_g: Failed\n");
		fail_count++;
	    }
        }

        shmem_barrier_all ();

        if (me == 0) {
	    if (fail_count == 0)
	        printf("All Tests Passed\n");
	    else
	        printf("%d Tests Failed\n", fail_count);
	}
    }
    else {
        printf ("Number of PEs must be > 1 to test shmem get, test skipped\n");
    }

    shmem_finalize ();

    return 0;
}
示例#9
0
int
main(int argc, char **argv)
{
	int i,ps,ps_cnt=2;
	int *target;
	int *source;
	int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS;
    char *pgm;
	double start_time, time_taken;

	shmem_init();
	me = shmem_my_pe();
	npes = shmem_n_pes();

    if ((pgm=strrchr(argv[0],'/')))
        pgm++;
    else
        pgm = argv[0];

    while ((i = getopt (argc, argv, "hve:l:p:s")) != EOF) {
        switch (i)
        {
          case 'v':
              Verbose++;
              break;
          case 'e':
              if ((elements = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad elements count %d\n",elements);
                  shmem_finalize();
                  return 1;
              }
              break;
          case 'l':
              if ((loops = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad loop count %d\n",loops);
                  shmem_finalize();
                  return 1;
              }
              break;
          case 'p':
              if ((ps_cnt = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad pSync[] elements %d\n",loops);
                  shmem_finalize();
                  return 1;
              }
              break;
          case 's':
              Serialize++;
              break;
          case 'h':
              if (me == 0)
                  usage(pgm);
              return 0;
          default:
              if (me == 0) {
                  fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i);
                  usage(pgm);
              }
              shmem_finalize();
              return 1;
        }
    }

	ps_cnt *= _SHMEM_BCAST_SYNC_SIZE;
	pSync = shmem_malloc( ps_cnt * sizeof(long) );

	for (i = 0; i < ps_cnt; i++)
	  pSync[i] = _SHMEM_SYNC_VALUE;

	source = (int *) shmem_malloc( elements * sizeof(*source) );

	target = (int *) shmem_malloc( elements * sizeof(*target) );
	for (i = 0; i < elements; i += 1) {
	    source[i] = i + 1;
	    target[i] = -90;
	}

    if (me==0 && Verbose)
        fprintf(stderr,"ps_cnt %d loops %d nElems %d\n",
                        ps_cnt,loops,elements);

	shmem_barrier_all();

	for(time_taken = 0.0, ps = i = 0; i < loops; i++) {

	    start_time = shmemx_wtime();

	    shmem_broadcast32(target, source, elements, 0, 0, 0, npes, &pSync[ps]);

        if (Serialize) shmem_barrier_all();

	    time_taken += (shmemx_wtime() - start_time);

        if (ps_cnt > 1 ) {
	        ps += _SHMEM_BCAST_SYNC_SIZE;
	        if ( ps >= ps_cnt ) ps = 0;
        }
	}

	if(me == 0 && Verbose) {
        printf("%d loops of Broadcast32(%ld bytes) over %d PEs: %7.3f secs\n",
            loops, (elements*sizeof(*source)), npes, time_taken);
        elements = (elements * loops * sizeof(*source)) / (1024*1024);
        printf("  %7.5f secs per broadcast() @ %7.4f MB/sec\n",
               (time_taken/(double)loops), ((double)elements / time_taken) );
    }

    if (Verbose > 1)  fprintf(stderr,"[%d] pre B1\n",me);

	shmem_barrier_all();

    if (Verbose > 1)  fprintf(stderr,"[%d] post B1\n",me);

	shmem_free(pSync);
	shmem_free(target);
	shmem_free(source);

	shmem_finalize();

	return 0;
}
示例#10
0
int main(int argc, char **argv)
{
  int i,j;
  short    modjs, oldjs, oldxmodjs, valuejs;
  int      modji, oldji, oldxmodji, valueji;
  long     modjl, oldjl, oldxmodjl, valuejl;
  long long modjll,oldjll,oldxmodjll, valuejll;
  int my_pe,n_pes;
  size_t max_elements,max_elements_bytes;
  static short *xs;
  static int   *xi;
  static long  *xl;
  static long long *xll;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
#ifdef HAVE_SET_CACHE_INV
  shmem_set_cache_inv();
#endif

/*  fail if trying to use only one processor  */
  if ( n_pes  <= 1 ){
        fprintf(stderr, "FAIL - test requires at least two PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_fadd(%s) n_pes=%d\n", argv[0],n_pes);

/*  test shmem_short_fadd  */
#ifdef HAVE_SHORT

  /*  shmalloc xs on all pes (only use the one on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(short) * n_pes);
  xs = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xs[i] = 0;
  count_short = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      oldjs = shmem_short_finc(&count_short, 0);  /* get index oldjs from PE 0 */
      modjs = (oldjs % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* add 10 to value in xs[modjs] */
      valuejs = (short) 10;
      oldxmodjs = shmem_short_fadd(&xs[modjs], valuejs, 0);
      /* printf("PE=%d,oldjs=%d,modjs=%d,oldxmodjs=%d,valuejs=%d\n",
                 my_pe,oldjs,modjs,oldxmodjs,valuejs); */
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {         /* check xs[j] array on PE 0 */
    for(j=1 ; j<n_pes; j++) {
      if (xs[j-1] != 10*ITER)
        fprintf(stderr, "FAIL PE %d of %d: xs[%d] = %d expected = %d\n",
                         my_pe, n_pes, j-1, xs[j-1], ITER);
    }
  }
  shmem_free(xs);

#endif

/*  test shmem_int_fadd  */

  /*  shmalloc xi on all pes (only use the one on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(int) * n_pes);
  xi = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xi[i] = 0;
  count_int = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      oldji = shmem_int_finc(&count_int, 0);  /* get index oldji from PE 0 */
      modji = (oldji % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* add 10 to value in xi[modji] */
      valueji = (int) 10;
      oldxmodji = shmem_int_fadd(&xi[modji], valueji, 0);
      /* printf("PE=%d,oldji=%d,modji=%d,oldxmodji=%d,valueji=%d\n",
                 my_pe,oldji,modji,oldxmodji,valueji); */
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {         /* check xi[j] array on PE 0 */
    for(j=1 ; j<n_pes; j++) {
      if (xi[j-1] != 10*ITER)
        fprintf(stderr, "FAIL PE %d of %d: xi[%d] = %d expected = %d\n",
                         my_pe, n_pes, j-1, xi[j-1], ITER);
    }
  }
  shmem_free(xi);

/*  test shmem_long_fadd  */

  /*  shmalloc xl on all pes (only use the one on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  xl = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xl[i] = 0;
  count_long = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      oldjl = shmem_long_finc(&count_long, 0);  /* get index oldjl from PE 0 */
      modjl = (oldjl % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* add 10 to value in xl[modjl] */
      valuejl = (long) 10;
      oldxmodjl = shmem_long_fadd(&xl[modjl], valuejl, 0);
      /* printf("PE=%d,oldjl=%ld,modjl=%ld,oldxmodjl=%ld,valuejl=%ld\n",
                 my_pe,oldjl,modjl,oldxmodjl,valuejl); */
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {         /* check xl[j] array on PE 0 */
    for(j=1 ; j<n_pes; j++) {
      if (xl[j-1] != 10*ITER)
        fprintf(stderr, "FAIL PE %d of %d: xl[%d] = %ld expected = %ld\n",
                         my_pe, n_pes, j-1, xl[j-1], ITER);
    }
  }
  shmem_free(xl);

/*  test shmem_longlong_fadd  */

#ifdef HAVE_LONG_LONG

  /*  shmalloc xll on all pes (only use the one on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long long) * n_pes);
  xll = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xll[i] = 0;
  count_longlong = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      oldjll = shmem_longlong_finc(&count_longlong, 0);  /* get index oldjll from PE 0 */
      modjll = (oldjll % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* add 10 to value in xll[modjll] */
      valuejll = (long long) 10;
      oldxmodjll = shmem_longlong_fadd(&xll[modjll], valuejll, 0); 
      /* printf("PE=%d,oldjll=%ld,modjll=%ld,oldxmodjll=%ld,valuejll=%ld\n",
                 my_pe,oldjll,modjll,oldxmodjll,valuejll); */
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {         /* check xll[j] array on PE 0 */
    for(j=1 ; j<n_pes; j++) {
      if (xll[j-1] != 10*ITER)
        fprintf(stderr, "FAIL PE %d of %d: xll[%d] = %ld expected = %ld\n", 
                         my_pe, n_pes, j-1, xll[j-1], ITER);
    }
  }
  shmem_free(xll);

#endif

#ifdef SHMEM_C_GENERIC_32

/*  test shmem_fadd (GENERIC 32)  */

  /*  shmalloc xi on all pes (only use the one on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(int) * n_pes);
  xi = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xi[i] = 0;
  count_int = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      oldji = shmem_finc(&count_int, 0);  /* get index oldji from PE 0 */
      modji = (oldji % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* add 10 to value in xi[modji] */
      valueji = (int) 10;
      oldxmodji = shmem_fadd(&xi[modji], valueji, 0);
      /* printf("PE=%d,oldji=%d,modji=%d,oldxmodji=%d,valueji=%d\n",
                 my_pe,oldji,modji,oldxmodji,valueji); */
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {         /* check xi[j] array on PE 0 */
    for(j=1 ; j<n_pes; j++) {
      if (xi[j-1] != 10*ITER)
        fprintf(stderr, "FAIL pe %d of %d: xi[%d] = %d expected = %d\n",
                         my_pe, n_pes, j-1, xi[j-1], ITER);
    }
  }
  shmem_free(xi);

#else

/*  test shmem_fadd (GENERIC 64)  */

  /*  shmalloc xl on all pes (only use the one on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  xl = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xl[i] = 0;
  count_long = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
      oldjl = shmem_finc(&count_long, 0);  /* get index oldjl from PE 0 */
#else
      oldjl = shmem_long_finc(&count_long, 0);  /* get index oldjl from PE 0 */
#endif
      modjl = (oldjl % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* add 10 to value in xl[modjl] */
      valuejl = (long) 10;
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
      oldxmodjl = shmem_fadd(&xl[modjl], valuejl, 0);
#else
      oldxmodjl = shmem_long_fadd(&xl[modjl], valuejl, 0);
#endif
      /* printf("PE=%d,oldjl=%ld,modjl=%ld,oldxmodjl=%ld,valuejl=%ld\n",
                 my_pe,oldjl,modjl,oldxmodjl,valuejl); */
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {         /* check xl[j] array on PE 0 */
    for(j=1 ; j<n_pes; j++) {
      if (xl[j-1] != 10*ITER)
        fprintf(stderr, "FAIL pe %d of %d: xl[%d] = %ld expected = %ld\n",
                         my_pe, n_pes, j-1, xl[j-1], ITER);
    }
  }
  shmem_free(xl);

#endif

  shmem_barrier_all();
#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
示例#11
0
文件: lfinc.c 项目: caomw/SOS
int main( int argc, char *argv[])
{
    int rc=0, my_pe, npes, neighbor;
    int loops=LOOPS;
    int j;
    size_t data_sz=sizeof(long) * 3;
    double start_time;
    long *data, lval=0;

    if (argc > 1)
        loops = atoi(argv[1]);

    shmem_init();

    my_pe = shmem_my_pe();
    npes = shmem_n_pes();

    data = shmem_malloc(data_sz);
    if (!data) {
        fprintf(stderr,"[%d] shmem_malloc(%ld) failure? %d\n",
                my_pe,data_sz,errno);
        shmem_global_exit(1);
    }
    memset((void*)data,0,data_sz);

    shmem_barrier_all();

    neighbor = (my_pe + 1) % npes;
    start_time = shmemx_wtime();
    for(j=0,elapsed=0.0; j < loops; j++) {
        start_time = shmemx_wtime();
        lval = shmem_long_finc( (void*)&data[1], neighbor );
        elapsed += shmemx_wtime() - start_time;
        if (lval != (long) j) {
            fprintf(stderr,"[%d] Test: FAIL previous val %ld != %d Exit.\n",
                    my_pe, lval, j);
            shmem_global_exit(1);
        }
    }
    shmem_barrier_all();

    rc = 0;
    if (data[1] != (long)loops) { 
        fprintf(stderr,"[%d] finc neighbot: FAIL data[1](%p) %ld != %d Exit.\n",
                    my_pe, (void*)&data[1], data[1], loops);
        rc--;
    }

    /* check if adjancent memory locations distrubed */
    assert(data[0] == 0);
    assert(data[2] == 0);

    if (my_pe == 0 ) {
        if (rc == 0 && Verbose)
            fprintf(stderr,"[%d] finc neighbor: PASSED.\n",my_pe);
        fprintf(stderr,"[%d] %d loops of shmem_long_finc() in %6.4f secs\n"
                "  %2.6f usecs per shmem_long_finc()\n",
                    my_pe,loops,elapsed,((elapsed*100000.0)/(double)loops));
    }
    shmem_free(data);

    shmem_finalize();

    return rc;
}
示例#12
0
文件: waituntil.c 项目: caomw/SOS
int
main(int argc, char* argv[])
{
    DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
    static DataType target[10];
    static DataType pong=666;
    DataType *t2=NULL;
    int me, num_pes, pe, Verbose=0;

    if (argc > 1 && (strcmp(argv[1],"-v") == 0)) {
        Verbose++;
    }

    shmem_init();
    me = shmem_my_pe();
    num_pes = shmem_n_pes();

    if (num_pes == 1) {
        printf("%s: Requires number of PEs > 1\n", argv[0]);
        shmem_finalize();
        return 0;
    }

    t2 = shmem_malloc(10*sizeof(DataType));
    if (!t2) {
        if (me==0) printf("shmem_malloc() failed?\n");
        shmem_global_exit(1);
    }
    t2[9] = target[9] = 0xFF;

    shmem_barrier_all();

    if (me == 0) {
        memset(target, 0, sizeof(target));
        for(pe=1; pe < num_pes; pe++)
            SHM_PUT(target, target, 10, pe);

        for(pe=1; pe < num_pes; pe++) /* put 10 elements into target on PE 1 */
            SHM_PUT(target, source, 10, pe);

        SHM_WAITU( &pong, SHMEM_CMP_GT, 666 );
        Vprintf("PE[%d] pong now "PF"\n",me,pong);

        for(pe=1; pe < num_pes; pe++) /* put 1 element into t2 on PE 1 */
            SHM_PUTP(&t2[9], 0xDD, pe);
    }
    else {
        /* wait for 10th element write of 'target' */
        SHM_WAITU( &target[9], SHMEM_CMP_NE, 0xFF );
        Vprintf("PE[%d] target[9] was 255 now "PF", success.\n",me,target[9]);

        SHM_WAITU( &target[9], SHMEM_CMP_EQ, 10 );
        Vprintf("PE[%d] expected target[9] == 10 now "PF"\n",me,target[9]);

        if (me == 1) {
            if (Verbose) {
                DataType tmp = SHM_GETP( &pong, 0);
                printf("PE[%d] @ PE[0] pong == "PF", setting to 999\n",me,tmp);
            }
            SHM_PUTP( &pong, 999, 0);
        }

        SHM_WAITU( &t2[9], SHMEM_CMP_NE, 0xFF );
    }

    //shmem_barrier_all();  /* sync sender and receiver */

    if (me != 0) {
        if (memcmp(source, target, sizeof(DataType) * 10) != 0) {
            int i;
            fprintf(stderr,"[%d] Src & Target mismatch?\n",me);
            for (i = 0 ; i < 10 ; ++i) {
                printf(PF","PF" ", source[i], target[i]);
            }
            printf("\n");
            shmem_global_exit(1);
        }
    }
    shmem_free(t2);

    if (Verbose)
        fprintf(stderr,"[%d] exit\n",shmem_my_pe());

    shmem_finalize();
    return 0;
}
示例#13
0
int
main(int argc, char *argv[])
{
    int i;

    shmem_init();

    rank = shmem_my_pe();
    world_size = shmem_n_pes();

    /* root handles arguments and bcasts answers */
    if (0 == rank) {
        int ch;
        while (start_err != 1 && 
               (ch = getopt(argc, argv, "p:i:m:s:c:n:oh")) != -1) {
            switch (ch) {
            case 'p':
                npeers = atoi(optarg);
                break;
            case 'i':
                niters = atoi(optarg);
                break;
            case 'm':
                nmsgs = atoi(optarg);
                break;
            case 's':
                nbytes = atoi(optarg);
                break;
            case 'c':
                cache_size = atoi(optarg) / sizeof(int);
                break;
            case 'n':
                ppn = atoi(optarg);
                break;
            case 'o':
                machine_output = 1;
                break;
            case 'h':
            case '?':
            default:
                start_err = 1;
                usage();
            }
        }

        /* sanity check */
        if (start_err != 1) {
#if 0
            if (world_size < 3) {
                fprintf(stderr, "Error: At least three processes are required\n");
                start_err = 1;
            } else 
#endif
                if (world_size <= npeers) {
                fprintf(stderr, "Error: job size (%d) <= number of peers (%d)\n",
                        world_size, npeers);
                start_err = 77;
            } else if (ppn < 1) {
                fprintf(stderr, "Error: must specify process per node (-n #)\n");
                start_err = 77;
            } else if (world_size / ppn <= npeers) {
                fprintf(stderr, "Error: node count <= number of peers\n");
                start_err = 77;
            }
        }
    }

    shmem_barrier_all();

    /* broadcast results */
    printf("%d: psync: 0x%lu\n", rank, (unsigned long) bcast_pSync);
    shmem_broadcast32(&start_err, &start_err, 1, 0, 0, 0, world_size, bcast_pSync);
    if (0 != start_err) {
        exit(start_err);
    }
    shmem_barrier_all();
    shmem_broadcast32(&npeers, &npeers, 1, 0, 0, 0, world_size, bcast_pSync);
    shmem_barrier_all();
    shmem_broadcast32(&niters, &niters, 1, 0, 0, 0, world_size, bcast_pSync);
    shmem_barrier_all();
    shmem_broadcast32(&nmsgs, &nmsgs, 1, 0, 0, 0, world_size, bcast_pSync);
    shmem_barrier_all();
    shmem_broadcast32(&nbytes, &nbytes, 1, 0, 0, 0, world_size, bcast_pSync);
    shmem_barrier_all();
    shmem_broadcast32(&cache_size, &cache_size, 1, 0, 0, 0, world_size, bcast_pSync);
    shmem_barrier_all();
    shmem_broadcast32(&ppn, &ppn, 1, 0, 0, 0, world_size, bcast_pSync);
    shmem_barrier_all();
    if (0 == rank) {
        if (!machine_output) {
            printf("job size:   %d\n", world_size);
            printf("npeers:     %d\n", npeers);
            printf("niters:     %d\n", niters);
            printf("nmsgs:      %d\n", nmsgs);
            printf("nbytes:     %d\n", nbytes);
            printf("cache size: %d\n", cache_size * (int)sizeof(int));
            printf("ppn:        %d\n", ppn);
        } else {
            printf("%d %d %d %d %d %d %d ", 
                   world_size, npeers, niters, nmsgs, nbytes,
                   cache_size * (int)sizeof(int), ppn);
        }
    }

    /* allocate buffers */
    send_peers = malloc(sizeof(int) * npeers);
    if (NULL == send_peers) abort_app("malloc");
    recv_peers = malloc(sizeof(int) * npeers);
    if (NULL == recv_peers) abort_app("malloc");
    cache_buf = malloc(sizeof(int) * cache_size);
    if (NULL == cache_buf) abort_app("malloc");
    send_buf = malloc(npeers * nmsgs * nbytes);
    if (NULL == send_buf) abort_app("malloc");
    memset(send_buf, 1, npeers * nmsgs * nbytes);

    recv_buf = shmem_malloc(npeers * nmsgs * nbytes);
    if (NULL == recv_buf) abort_app("malloc");
    memset(recv_buf, 0, npeers * nmsgs * nbytes);

    /* calculate peers */
    for (i = 0 ; i < npeers ; ++i) {
        if (i < npeers / 2) {
            send_peers[i] = (rank + world_size + ((i - npeers / 2) * ppn)) % world_size;
        } else {
            send_peers[i] = (rank + world_size + ((i - npeers / 2 + 1) * ppn)) % world_size;
        }
    }
    if (npeers % 2 == 0) {
        /* even */
        for (i = 0 ; i < npeers ; ++i) {
            if (i < npeers / 2) {
                recv_peers[i] = (rank + world_size + ((i - npeers / 2) *ppn)) % world_size;
            } else {
                recv_peers[i] = (rank + world_size + ((i - npeers / 2 + 1) * ppn)) % world_size;
            }
        } 
    } else {
        /* odd */
        for (i = 0 ; i < npeers ; ++i) {
            if (i < npeers / 2 + 1) {
                recv_peers[i] = (rank + world_size + ((i - npeers / 2 - 1) * ppn)) % world_size;
            } else {
                recv_peers[i] = (rank + world_size + ((i - npeers / 2) * ppn)) % world_size;
            }
        }
    }

    /* BWB: FIX ME: trash the free lists / malloc here */

    /* sync, although tests will do this on their own (in theory) */
    shmem_barrier_all();

    /* run tests */
    test_one_way();
    test_same_direction();
    test_prepost();
    test_allstart();

    if (rank == 0 && machine_output) printf("\n");

    /* done */
    shmem_finalize();
    return 0;
}
示例#14
0
文件: bcast.c 项目: tonycurtis/SOS
int
main(int argc, char* argv[])
{
    int i, Verbose=0;
    int mpe, num_pes, loops=10, cloop;
    char *pgm;
    long *dst, *src;
    int nBytes = START_BCAST_SIZE;
    int nLongs=0;

    shmem_init();
    mpe = shmem_my_pe();
    num_pes = shmem_n_pes();

    if (num_pes == 1) {
        printf("%s: Requires number of PEs > 1\n", argv[0]);
        shmem_finalize();
        return 0;
    }

    if (sizeof(long) != 8) {
        printf("Test assumes 64-bit long (%zd)\n", sizeof(long));
        shmem_global_exit(1);
        return 0;
    }

    if ((pgm=strrchr(argv[0],'/'))) {
        pgm++;
    } else {
        pgm = argv[0];
    }

    if (argc > 1) {
        if (strncmp(argv[1],"-v",3) == 0) {
            Verbose=1;
        } else if (strncmp(argv[1],"-h",3) == 0) {
            fprintf(stderr,"usage: %s {-v(verbose)|h(help)}\n",pgm);
            shmem_finalize();
            exit(1);
        }
    }

    for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1) {
        pSync[i] = SHMEM_SYNC_VALUE;
    }

    if ( mpe == 0 && Verbose ) {
        fprintf(stderr,"%d loops\n",loops);
    }

    for(cloop=1; cloop <= loops; cloop++) {

        nLongs = nBytes / sizeof(long);
        dst = (long *)shmem_malloc(nBytes*2);
        if ( !dst ) {
            fprintf(stderr,"[%d] shmem_malloc(%d) failed %s\n",
                            mpe,nBytes,strerror(errno));
            return 0;
        }
        memset( (void*)dst, 0, nBytes );
        src = &dst[nLongs];
        for (i = 1; i < nLongs; i++) {
            src[i] = i+1;
        }

        shmem_barrier_all();

        shmem_broadcast64(dst, src, nLongs, 1, 0, 0, num_pes, pSync);

        for(i=0; i < nLongs; i++) {
            /* the root node shouldn't have the result into dst (cf specification).*/
            if (1 != mpe && dst[i] != src[i]) {
                fprintf(stderr,"[%d] dst[%d] %ld != expected %ld\n",
                        mpe, i, dst[i],src[i]);
                shmem_global_exit(1);
            } else if (1 == mpe && dst[i] != 0) {
                fprintf(stderr,"[%d] dst[%d] %ld != expected 0\n",
                        mpe, i, dst[i]);
                shmem_global_exit(1);
            }
        }
        shmem_barrier_all();

        shmem_free (dst);
        if (Verbose && mpe ==0)
            fprintf(stderr,"loop %2d Bcast %d, Done.\n",cloop,nBytes);
        nBytes += BCAST_INCR;
    }

    shmem_finalize();

    return 0;
}
示例#15
0
ssize_t ipc_shmem_init    (ipc_t *ipc, config_t *config){ // {{{
	ssize_t                ret;
	int                    shmid;
	uint32_t               shmkey;
	size_t                 shmsize;
	size_t                 nitems            = NITEMS_DEFAULT;
	size_t                 item_size         = ITEM_SIZE_DEFAULT;
	uintmax_t              f_async           = 0;
	uintmax_t              f_sync            = 0;
	char                  *role_str          = NULL;
	ipc_shmem_userdata    *userdata          = (ipc_shmem_userdata *)ipc->userdata;
	
	userdata->buffer        = HK(buffer);
	userdata->return_result = 1;
	
	hash_data_get(ret, TYPE_UINT32T, shmkey,     config, HK(key));  if(ret != 0) return error("no key supplied");
	hash_data_convert(ret, TYPE_STRINGT, role_str,   config, HK(role)); if(ret != 0) return error("no role supplied");
	hash_data_get(ret, TYPE_SIZET,   item_size,  config, HK(item_size));
	hash_data_get(ret, TYPE_SIZET,   nitems,     config, HK(size));
	hash_data_get(ret, TYPE_UINTT,   f_async,    config, HK(force_async));
	hash_data_get(ret, TYPE_UINTT,   f_sync,     config, HK(force_sync));
	hash_data_get(ret, TYPE_HASHKEYT, userdata->buffer,   config, HK(buffer));
	hash_data_get(ret, TYPE_UINTT,    userdata->return_result, config, HK(return_result));
	
	if( (userdata->role = ipc_string_to_role(role_str)) == ROLE_INVALID)
		return error("invalid role supplied");
	
	free(role_str);
	
	shmsize = nitems * sizeof(ipc_shmem_block) + nitems * item_size + sizeof(ipc_shmem_header); 
	
	if( (shmid = shmget(shmkey, shmsize, IPC_CREAT | 0666)) < 0)
		return error("shmget failed");
	
	if( (userdata->shmaddr = shmat(shmid, NULL, 0)) == (void *)-1)
		return error("shmat failed");
	
	if( (f_async != 0 && f_sync != 0) )
		return error("force_async with force_sync");
	
	userdata->shmblocks = (ipc_shmem_block *)((void *)userdata->shmaddr   + sizeof(ipc_shmem_header));
	userdata->shmdata   = (void *)           ((void *)userdata->shmblocks + nitems * sizeof(ipc_shmem_block));
	userdata->inited    = 1;
	
	userdata->forced_state = FORCE_NONE;
	if(f_async != 0) userdata->forced_state = FORCE_ASYNC;
	if(f_sync  != 0) userdata->forced_state = FORCE_SYNC;
	
	if(userdata->role == ROLE_SERVER){
		userdata->shmaddr->item_size = item_size;
		userdata->shmaddr->nitems    = nitems;
		
		if(shmem_init(userdata) != 0)
			return error("shmem_init failed");
		
		// start threads
		if(pthread_create(&userdata->server_thr, NULL, &ipc_shmem_listen, ipc) != 0)
			return error("pthread_create failed");
	}
	return 0;
} // }}}
int main(int argc, char **argv)
{
  int i,j;
  long modj,oldj,oldxmodj,newcount;
  int my_pe,n_pes;
  size_t max_elements_bytes;
  static long *x;

  shmem_init();
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
#ifdef HAVE_SET_CACHE_INV
  shmem_set_cache_inv();
#endif

/*  fail if trying to use only one processor  */
  if ( n_pes  <= 1 ){
        fprintf(stderr, "FAIL - test requires at least two PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_lock_set_clear(%s) n_pes=%d\n", argv[0],n_pes);

/*  shmalloc x on all pes (only use the one on PE 0)  */

  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  x = shmem_malloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    x[i] = 0;
  count = 0;
  shmem_barrier_all();

  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      /* emulate  oldj = shmem_long_finc(&count, 0); */
      shmem_set_lock(&lock);
      shmem_long_get(&oldj,&count,1,0);   /* get oldj from PE 0's count */
      newcount = oldj+1;
      shmem_long_put(&count,&newcount,1,0);  /* update count on PE 0 */
      shmem_quiet;                        /* insure that write completes */
      shmem_clear_lock(&lock);
      /* end of emulation */
      modj = (oldj % (n_pes-1));  /* PE 0 is just the counter/checker */
        /* increment value in x[modj] */
      oldxmodj = shmem_long_finc(&x[modj], 0); 
      /* printf("PE=%d,oldj=%ld,modj=%ld,oldxmodj=%ld\n",my_pe,oldj,modj,oldxmodj); */
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {         /* check x[j] array on PE 0 */
    for(j=1 ; j<n_pes; j++) {
      if (x[j-1] != (long) ITER)
        fprintf(stderr, "FAIL PE %d of %d: x[%d] = %ld expected = %ld\n", 
                         my_pe, n_pes, j-1, x[j-1], (long) ITER);
    }
  }

  shmem_barrier_all();
#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
示例#17
0
int
main(int argc, char **argv)
{
    int i;
	int *target;
	int *source;
	int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS;
    char *pgm;

	shmem_init();
	me = shmem_my_pe();
	npes = shmem_n_pes();

    if ((pgm=strrchr(argv[0],'/')))
        pgm++;
    else
        pgm = argv[0];

    /* lower-case switch enable only a specific test; otherwise run all tests */
    while ((i = getopt (argc, argv, "hvqe:l:abcmn")) != EOF) {
        switch (i)
        {
          case 'a':
              All2++;
              break;
          case 'b':
              Bcast++;
              break;
          case 'c':
              Collect++;
              break;
          case 'm':
              Many++;
              break;
          case 'n':
              Neighbor++;
              break;
          case 'q':
              Verbose=0;
              break;
          case 'v':
              Verbose++;
              break;
          case 'e':
              if ((elements = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad elements count %d\n",elements);
                  shmem_finalize();
                  return 1;
              }
              break;
          case 'l':
              if ((loops = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad loop count %d\n",loops);
                  shmem_finalize();
                  return 1;
              }
              break;
          case 'h':
              if (me == 0)
                  usage(pgm);
              shmem_finalize();
              return 0;
          default:
              if (me == 0) {
                  fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i);
                  usage(pgm);
              }
              shmem_finalize();
              return 1;
        }
    }

    if (All2==0 && Bcast==0 && Collect==0 && Many==0 && Neighbor==0)
        All2 = Bcast = Collect = Many = Neighbor = 1;

	source = (int *) shmem_malloc( elements * sizeof(*source) );
	target = (int *) shmem_malloc( elements * sizeof(*target) );

	for (i = 0; i < elements; i += 1) {
	    source[i] = i + 1;
	    target[i] = -90;
	}

	shmem_barrier_all();

    if (Neighbor) {
        neighbor_put( target, source, elements, me, npes, loops );
        neighbor_get( target, source, elements, me, npes, loops );
    }

    if (All2) {
        all2all_put( target, source, elements, me, npes, loops );
        all2all_get( target, source, elements, me, npes, loops );
    }

    if (Many) {
        one2many_put( target, source, elements, me, npes, loops );
        many2one_get( target, source, elements, me, npes, loops );
    }

    if (Bcast) bcast( target, source, elements, me, npes, loops );

    if (Collect) {
        collect( NULL, source, elements, me, npes, loops );
        fcollect( NULL, source, elements, me, npes, loops );
    }

	shmem_barrier_all();

	shmem_free(target);
	shmem_free(source);

	shmem_finalize();

	return 0;
}
示例#18
0
文件: mdl.c 项目: N-BodyShop/mdl
int mdlInitialize(MDL *pmdl,char **argv,void (*fcnChild)(MDL))
{
	MDL mdl;
	int i,bDiag,bThreads;
	char *p,ach[256],achDiag[256];
	int argc;

	/* SHMEM */
	for (i=0;i<_SHMEM_COLLECT_SYNC_SIZE;++i) {
	    pSync[i]=_SHMEM_SYNC_VALUE;
	}
	/* Init Shmem */
	shmem_init();

	*pmdl = NULL;
	mdl = malloc(sizeof(struct mdlContext));
	assert(mdl != NULL);
	/*
	 ** Set default "maximums" for structures. These are NOT hard
	 ** maximums, as the structures will be realloc'd when these
	 ** values are exceeded.
	 */
	mdl->nMaxServices = MDL_DEFAULT_SERVICES;
	mdl->nMaxSrvBytes = MDL_DEFAULT_BYTES;
	mdl->nMaxCacheIds = MDL_DEFAULT_CACHEIDS;
	/*
	 ** Now allocate the initial service slots.
	 */
	mdl->psrv = malloc(mdl->nMaxServices*sizeof(SERVICE));
	assert(mdl->psrv != NULL);
	/*
	 ** Initialize the new service slots.
	 */
	for (i=0;i<mdl->nMaxServices;++i) {
		mdl->psrv[i].p1 = NULL;
		mdl->psrv[i].nInBytes = 0;
		mdl->psrv[i].nOutBytes = 0;
		mdl->psrv[i].fcnService = NULL;
		}
	/*
	 ** Provide a 'null' service for sid = 0, so that stopping the 
	 ** service handler is well defined!
	 */
	mdl->psrv[0].p1 = NULL;
	mdl->psrv[0].nInBytes = 0;
	mdl->psrv[0].nOutBytes = 0;
	mdl->psrv[0].fcnService = _srvNull;
	/*
	 ** Allocate service buffers.
	 */
	mdl->pszIn = malloc(mdl->nMaxSrvBytes+sizeof(SRVHEAD));
	assert(mdl->pszIn != NULL);
	mdl->pszOut = malloc(mdl->nMaxSrvBytes+sizeof(SRVHEAD));
	assert(mdl->pszOut != NULL);
	mdl->pszBuf = malloc(mdl->nMaxSrvBytes+sizeof(SRVHEAD));
	assert(mdl->pszBuf != NULL);
	/*
	 ** Allocate swapping transfer buffer. This buffer remains fixed.
	 */
	mdl->pszTrans = malloc(MDL_TRANS_SIZE);
	assert(mdl->pszTrans != NULL);
	/*
	 ** Allocate initial cache spaces.
	 */
	mdl->cache = malloc(mdl->nMaxCacheIds*sizeof(CACHE));
	assert(mdl->cache != NULL);
	/*
	 ** Initialize caching spaces.
	 */
	for (i=0;i<mdl->nMaxCacheIds;++i) {
		mdl->cache[i].iType = MDL_NOCACHE;
		}

	for(argc = 0; argv[argc]; argc++);

	MPI_Init(&argc, &argv);

	/*
	 ** Do some low level argument parsing for number of threads, and
	 ** diagnostic flag!
	 */
	bDiag = 0;
	bThreads = 0;
	i = 1;
	while (argv[i]) {
		if (!strcmp(argv[i],"-sz") && !bThreads) {
			++i;
			if (argv[i]) bThreads = 1;
			}
		if (!strcmp(argv[i],"+d") && !bDiag) {
			p = getenv("MDL_DIAGNOSTIC");
			if (!p) p = getenv("HOME");
			if (!p) sprintf(ach,"/tmp");
			else sprintf(ach,"%s",p);
			bDiag = 1;
			}
		++i;
		}
	if (bThreads) {
		fprintf(stderr,"Warning: -sz parameter ignored, using as many\n");
		fprintf(stderr,"         processors as specified in environment.\n");
		fflush(stderr);
		}

	MPI_Comm_size(MPI_COMM_WORLD, &mdl->nThreads);
	MPI_Comm_rank(MPI_COMM_WORLD, &mdl->idSelf);
	/*
	 ** Allocate caching buffers, with initial data size of 0.
	 ** We need one reply buffer for each thread, to deadlock situations.
	 */
	mdl->iMaxDataSize = 0;
	mdl->iCaBufSize = sizeof(CAHEAD);
	mdl->pszRcv = malloc(mdl->iCaBufSize);
	assert(mdl->pszRcv != NULL);
	mdl->ppszRpl = malloc(mdl->nThreads*sizeof(char *));
	assert(mdl->ppszRpl != NULL);
	mdl->pmidRpl = malloc(mdl->nThreads*sizeof(int));
	assert(mdl->pmidRpl != NULL);
	for (i=0;i<mdl->nThreads;++i)
		mdl->pmidRpl[i] = -1;
	mdl->pReqRpl = malloc(mdl->nThreads*sizeof(MPI_Request));
	assert(mdl->pReqRpl != NULL);
	for (i=0;i<mdl->nThreads;++i) {
		mdl->ppszRpl[i] = malloc(mdl->iCaBufSize);
		assert(mdl->ppszRpl[i] != NULL);
		}
	mdl->pszFlsh = malloc(mdl->iCaBufSize);
	assert(mdl->pszFlsh != NULL);
	mdl->bDiag = bDiag;
	*pmdl = mdl;
	if (mdl->bDiag) {
		char *tmp = strrchr(argv[0],'/');
		if (!tmp) tmp = argv[0];
		else ++tmp;
		sprintf(achDiag,"%s/%s.%d",ach,tmp,mdl->idSelf);
		mdl->fpDiag = fopen(achDiag,"w");
		assert(mdl->fpDiag != NULL);
		}
	if (mdl->nThreads > 1 && mdl->idSelf) {
		/*
		 ** Child thread.
		 */
		(*fcnChild)(mdl);
		mdlFinish(mdl);
		exit(0);
		}
	return(mdl->nThreads);
	}
示例#19
0
文件: isx.c 项目: habanero-rice/hclib
int main(const int argc,  char ** argv)
{
  shmem_init();

  my_bucket_keys = (KEY_TYPE *)shmem_malloc(KEY_BUFFER_SIZE * sizeof(KEY_TYPE));
  assert(my_bucket_keys);
  // fprintf(stderr, "PE %d allocating %llu bytes at %p\n", shmem_my_pe(),
  //         KEY_BUFFER_SIZE * sizeof(KEY_TYPE), my_bucket_keys);

  #ifdef EXTRA_STATS
  _timer_t total_time;
  if(shmem_my_pe() == 0) {
    printf("\n-----\nmkdir timedrun fake\n\n");
    timer_start(&total_time);
  }
#endif

  init_shmem_sync_array(pSync); 

  char * log_file = parse_params(argc, argv);

  int err = bucket_sort();

  log_times(log_file);

  #ifdef EXTRA_STATS
  if(shmem_my_pe() == 0) {
    just_timer_stop(&total_time);
    double tTime = ( total_time.stop.tv_sec - total_time.start.tv_sec ) + ( total_time.stop.tv_nsec - total_time.start.tv_nsec )/1E9;
    avg_time *= 1000;
    avg_time_all2all *= 1000;

    printf("\n============================ MMTk Statistics Totals ============================\n");
    if(NUM_ITERATIONS == 1) { //TODO: fix time calculation below for more number of iterations
      printf("time.mu\tt.ATA_KEYS\tt.MAKE_INPUT\tt.COUNT_BUCKET_SIZES\tt.BUCKETIZE\tt.COMPUTE_OFFSETS\tt.LOCAL_SORT\tBARRIER_AT_START\tBARRIER_AT_EXCHANGE\tBARRIER_AT_END\tnWorkers\tnPEs\n");
      double TIMES[TIMER_NTIMERS];
      memset(TIMES, 0x00, sizeof(double) * TIMER_NTIMERS);
      for(int i=0; i<NUM_PES; i++) {
        for(int t = 0; t < TIMER_NTIMERS; ++t){
          if(timers[t].all_times != NULL){
            TIMES[t] += timers[t].all_times[i];
          }
        }
      }
      for(int t = 0; t < TIMER_NTIMERS; ++t){
        printf("%.3f\t", (TIMES[t]/NUM_PES)*1000);
      }
      printf("1\t%d\n",NUM_PES);
      printf("Total time: %.3f\n",(TIMES[0]/NUM_PES)*1000);
    }
    else {
      printf("time.mu\ttimeAll2All\tnWorkers\tnPEs\n");
      printf("%.3f\t%.3f\t1\t%d\n",avg_time,avg_time_all2all,NUM_PES);
      printf("Total time: %.3f\n",avg_time);
    }
    printf("------------------------------ End MMTk Statistics -----------------------------\n");
    printf("===== TEST PASSED in %.3f msec =====\n",(tTime*1000));
  }
#endif

  shmem_finalize();
  return err;
}
示例#20
0
int main()
{
   int start,stride,rmlast,rstride,np_aset,inset,lpe;
   int my_pe,n_pes;
   int i,fail,n_err,asfail,nasfail;
   char Case[40];
   
   static int sSource_int[NREDUCE];
   static int sTarget_int[NREDUCE];
   static int spWrk_int[PWRKELEM];
   
   static long spSync[_SHMEM_REDUCE_SYNC_SIZE];


   shmem_init();
   my_pe = shmem_my_pe();
   n_pes = shmem_n_pes();
   lpe=my_pe;

   dpSync=shmem_malloc(_SHMEM_REDUCE_SYNC_SIZE*sizeof(long));
   for(i=0;i<_SHMEM_REDUCE_SYNC_SIZE;i++) {
      gpSync[i]=_SHMEM_SYNC_VALUE;
      dpSync[i]=_SHMEM_SYNC_VALUE;
      spSync[i]=_SHMEM_SYNC_VALUE;
   }
      
   dSource_int=shmem_malloc(NREDUCE*sizeof(int));
   dTarget_int=shmem_malloc(NREDUCE*sizeof(int));
   dpWrk_int=shmem_malloc((NREDUCE/2+1 > _SHMEM_REDUCE_MIN_WRKDATA_SIZE ? NREDUCE/2+1 : _SHMEM_REDUCE_MIN_WRKDATA_SIZE)*sizeof(int));

   for(start=0;start<=MAXSTART;start++) {
      rstride=1; 
      for(stride=0;stride<=MAXSTRIDE;stride++) {
         for(rmlast=0;rmlast<=MAXRMLAST;rmlast++)
	 {
	    np_aset=(n_pes+rstride-1-start)/rstride-rmlast; /* number of processes in the active set */
	    if(np_aset > 0) /* if active set is not empty */
	    {
	       if(my_pe==0) printf("\nActive set triplet: PE_start=%d,logPE_stride=%d,PE_size=%d \n",start,stride,np_aset);
	       if((my_pe>=start) && ((my_pe-start)%rstride==0) && ((my_pe-start)/rstride<np_aset)) inset=1;
	       else inset=0;

/* Initialize Source and Target arrays */
	       for(i=0;i<NREDUCE;i++) {
                  sSource_int[i]=SINIT;
                  sTarget_int[i]=TINIT;
                  gSource_int[i]=SINIT;
                  gTarget_int[i]=TINIT;
                  dSource_int[i]=SINIT;
                  dTarget_int[i]=TINIT;
	       }
               shmem_barrier_all();

/* CASE: static arrays, source is different from target */
               sprintf(Case,"static, source!=target");
	       if(inset) 
	          asfail=or_int(sSource_int,sTarget_int,start,stride,np_aset,rstride,0,dpWrk_int,gpSync,Case);
	       else {	/* check that values of source and target have not been changed */
	          nasfail+=check_sval_notchanged(sSource_int,Case);
		  nasfail+=check_tval_notchanged(sTarget_int,Case);
	       }
		  
	       
/* CASE: global arrays, source is different from target */
               sprintf(Case,"global, source!=target");
	       if(inset)
                  asfail=or_int(gSource_int,gTarget_int,start,stride,np_aset,rstride,0,spWrk_int,dpSync,Case);
	       else {	/* check that values of source and target have not been changed */
	          nasfail+=check_sval_notchanged(gSource_int,Case);
		  nasfail+=check_tval_notchanged(gTarget_int,Case);
	       }
	       
/* CASE: symmetric heap arrays, source is different from target */
               sprintf(Case,"sym heap, source!=target");
	       if(inset)
                  asfail=or_int(dSource_int,dTarget_int,start,stride,np_aset,rstride,0,gpWrk_int,spSync,Case);
	       else {	/* check that values of source and target have not been changed */
	          nasfail+=check_sval_notchanged(dSource_int,Case);
		  nasfail+=check_tval_notchanged(dTarget_int,Case);
	       }
	       

/* Reinitialize Source arrays for new tests */
	       for(i=0;i<NREDUCE;i++) {
                  sSource_int[i]=SINIT;
                  gSource_int[i]=SINIT;
                  dSource_int[i]=SINIT;
	       }
               shmem_barrier_all();

/* CASE: static arrays, source and target are the same array */
               sprintf(Case,"static, source==target");
	       if(inset)
                  asfail=or_int(sSource_int,sSource_int,start,stride,np_aset,rstride,1,gpWrk_int,dpSync,Case);
	       else 	/* check that values of source have not been changed */
	          nasfail+=check_sval_notchanged(sSource_int,Case);

/* CASE: global arrays, source and target are the same array */
               sprintf(Case,"global, source==target");
	       if(inset)
                  asfail=or_int(gSource_int,gSource_int,start,stride,np_aset,rstride,1,dpWrk_int,spSync,Case);
	       else 	/* check that values of source have not been changed */
	          nasfail+=check_sval_notchanged(gSource_int,Case);

/* CASE: symmetric heap arrays, source and target are the same array */
               sprintf(Case,"sym heap, source==target");
	       if(inset)
                  asfail=or_int(dSource_int,dSource_int,start,stride,np_aset,rstride,1,spWrk_int,gpSync,Case);
	       else 	/* check that values of source have not been changed */
	          nasfail+=check_sval_notchanged(dSource_int,Case);

	       
	    }	/* end of if active set is not empty */
	 } 	/* end of for loop on rmlast */
	 rstride*=2;
      } 	/* end of for loop on stride */
   } 		/* end of for loop on start */

   shmem_barrier_all();  
#ifdef NEEDS_FINALIZE
   shmem_finalize();
#endif
   return(0);
}
示例#21
0
int
main(int argc, char **argv)
{
    int loops=DFLT_LOOPS;
    char *pgm;
    int *Target;
    int *Source;
    int i, me, npes;
    int target_pe;
    long bytes;
    double time_taken=0.0, start_time;

    shmem_init();
    me = shmem_my_pe();
    npes = shmem_n_pes();

    if ((pgm=strrchr(argv[0],'/')))
        pgm++;
    else
        pgm = argv[0];

    while ((i = getopt (argc, argv, "hve:l:st")) != EOF) {
        switch (i)
        {
            case 'v':
                Verbose++;
                break;
            case 'e':
                if ((elements = atoi_scaled(optarg)) <= 0) {
                    fprintf(stderr,"ERR: Bad elements count %d\n",elements);
                    shmem_finalize();
                    return 1;
                }
                break;
            case 'l':
                if ((loops = atoi_scaled(optarg)) <= 0) {
                    fprintf(stderr,"ERR: Bad loop count %d\n",loops);
                    shmem_finalize();
                    return 1;
                }
                break;
            case 's':
                Sync++;
                break;
            case 't':
                Track++;
                break;
            case 'h':
                if (me == 0)
                    usage(pgm);
                return 0;
            default:
                if (me == 0) {
                    fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i);
                    usage(pgm);
                }
                shmem_finalize();
                return 1;
        }
    }

    target_pe = (me+1) % npes;

    total_time = (double *) shmem_malloc( npes * sizeof(double) );
    if (!total_time) {
        fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n",
                (elements * sizeof(double)));
        shmem_global_exit(1);
    }

    Source = (int *) shmem_malloc( elements * sizeof(*Source) );
    if (!Source) {
        fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n",
                (elements * sizeof(*Target)));
        shmem_free(total_time);
        shmem_global_exit(1);
    }

    Target = (int *) shmem_malloc( elements * sizeof(*Target) );
    if (!Target) {
        fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n",
                (elements * sizeof(*Target)));
        shmem_free(Source);
        shmem_free(total_time);
        shmem_global_exit(1);
    }

    for (i = 0; i < elements; i++) {
        Target[i] = -90;
        Source[i] = i + 1;
    }

    bytes = loops * sizeof(int) * elements;

    if (Verbose && me==0)
        fprintf(stderr, "%s: INFO - %d loops, get %d (int) elements from PE+1\n",
                pgm, loops, elements);

    shmem_barrier_all();

    for(i=0; i < loops; i++) {

        start_time = shmemx_wtime();

        shmem_int_get( Target, Source, elements, target_pe );

        time_taken += shmemx_wtime() - start_time;

        if (me==0) {
            if ( Track && i > 0 && ((i % 200) == 0))
                fprintf(stderr,".%d",i);
        }
        if (Sync)
            shmem_barrier_all();
    }

    // collect time per node elapsed time.
    shmem_double_put( &total_time[me], &time_taken, 1, 0 );

    shmem_barrier_all();

    for (i = 0; i < elements; i++) {
        if (Target[i] != i + 1) {
            printf("%d: Error Target[%d] = %d, expected %d\n",
                   me, i, Target[i], i + 1);
            shmem_global_exit(1);
        }
    }

    if ( Track && me == 0 )
        fprintf(stderr,"\n");

    if (Verbose && me == 0) {
        double rate,secs;

        // average time
        for(i=0,secs=0.0; i < npes; i++)
            secs += total_time[i];
        secs /= (double)npes;
        rate = ((double)bytes/(1024.0*1024.0)) / secs;
        printf("%s: ave %5.3f MB/sec (bytes %ld secs %5.3f)\n",
               pgm, rate, bytes, secs);
    }

    shmem_free(total_time);
    shmem_free(Target);
    shmem_free(Source);

    shmem_finalize();

    return 0;
}
示例#22
0
int
main(int argc, char **argv)
{
    int me, nProcs, c, l;
    int nWords, loops, incWords;
    int Verbose = 0, power2 = 0, modulo = 5;
    DataType *dp;

    pgm = strrchr(argv[0],'/');
    if ( pgm )
        pgm++;
    else
        pgm = argv[0];

    shmem_init();
    me = shmem_my_pe();
    nProcs = shmem_n_pes();

    while ((c = getopt (argc, argv, "hpv")) != -1)
        switch (c)
        {
        case 'p':
            power2++;
            break;
        case 'v':
            Verbose++;
            break;
        case 'h':
        default:
            usage();
            break;
        }

    if (optind == argc)
        nWords = DFLT_NWORDS;
    else if ((nWords = getSize (argv[optind++])) <= 0)
        usage ();

    if (optind == argc)
            loops = DFLT_LOOPS;
    else if ((loops = getSize (argv[optind++])) < 0)
        usage ();

    if (optind == argc)
        incWords = DFLT_INCR;
    else if ((incWords = getSize (argv[optind++])) < 0)
        usage ();

    if (power2) {
        nWords = 1;
        modulo = 1;
        loops = 21;
    }

    if (Verbose && me == 0) {
        if (power2) {
            printf("%s: nWords(1) << 1 per loop.\n", pgm);
        }
        else
            printf("%s: nWords(%d) loops(%d) nWords-incr-per-loop(%d)\n",
                pgm, nWords, loops, incWords);
    }

    for(l=0; l < loops; l++) {

        result_sz = (nProcs-1) * (nWords * sizeof(DataType));
        result = (DataType *)shmem_malloc(result_sz);
        if (! result)
        {
            perror ("Failed result memory allocation");
            shmem_finalize();
            exit (1);
        }
        for(dp=result; dp < &result[(result_sz/sizeof(DataType))];)
            *dp++ = 1;


        target_sz = nWords * sizeof(DataType);
        if (!(target = (DataType *)shmem_malloc(target_sz)))
        {
            perror ("Failed target memory allocation");
            shmem_finalize();
            exit (1);
        }
        for(dp=target; dp < &target[(target_sz / sizeof(DataType))];)
            *dp++ = 2;

        source_sz = 2 * nWords * sizeof(DataType);
        if (!(source = (DataType *)shmem_malloc(source_sz)))
        {
            perror ("Failed source memory allocation");
            shmem_finalize();
            exit (1);
        }
        for(dp=source; dp < &source[(source_sz / sizeof(DataType))];)
            *dp++ = 3;
#if 0
        printf("[%d] source %p target %p result %p\n",
            me, (void*)source,(void*)target,(void*)result);
        shmem_barrier_all();
#endif

        shmem_barrier_all(); /* sync sender and receiver */

        for(dp=source; dp < &source[(source_sz / sizeof(DataType))]; dp++)
            if (*dp != 3 ) {
                printf("source not consistent @ 3?\n");
                break;
            }
        shmem_free(source);

        for(dp=target; dp < &target[(target_sz / sizeof(DataType))]; dp++)
            if (*dp != 2 ) {
                printf("target not consistent @ 2?\n");
                break;
            }
        shmem_free(target);

        for(dp=result; dp < &result[(result_sz / sizeof(DataType))]; dp++)
            if (*dp != 1 ) {
                printf("result not consistent @ 1?\n");
                break;
            }
        shmem_free(result);

        if (loops > 1) {
            if (Verbose && me == 0) {
                if (l == 0 || (l % modulo == 0))
                    printf("End loop %3d nWords(%d)\n",(l+1),nWords);
            }
            if (power2)
                nWords <<= 1;
            else
                nWords += incWords; // watch for double inc.
        }
    }

    shmem_finalize();

    return 0;
}
int
main (int argc, char **argv)
{
    int i;
    int nextpe;
    int me, npes;
    int success1, success2, success3, success4, success5, success6, success7,
        success8;

    int fail_count = 0;

    short src1[N];
    int src2[N];
    long src3[N];
    long double src4[N];
    long long src5[N];
    double src6[N];
    float src7[N];
    char *src8;
    short src9;
    int src10;
    long src11;
    double src12;
    float src13;

    short *dest1;
    int *dest2;
    long *dest3;
    long double *dest4;
    long long *dest5;
    double *dest6;
    float *dest7;
    char *dest8;
    short *dest9;
    int *dest10;
    long *dest11;
    double *dest12;
    float *dest13;


    shmem_init ();
    me = shmem_my_pe ();
    npes = shmem_n_pes ();

    if (npes > 1) {

        success1 = 0;
        success2 = 0;
        success3 = 0;
        success4 = 0;
        success5 = 0;
        success6 = 0;
        success7 = 0;
        success8 = 0;
        src8 = (char *) malloc (N * sizeof (char));

        for (i = 0; i < N; i += 1) {
            src1[i] = (short) me;
            src2[i] = me;
            src3[i] = (long) me;
            src4[i] = (long double) me;
            src5[i] = (long long) me;
            src6[i] = (double) me;
            src7[i] = (float) me;
            src8[i] = (char) me;
        }
        src9 = (short) me;
        src10 = me;
        src11 = (long) me;
        src12 = (double) me;
        src13 = (float) me;


        dest1 = (short *) shmem_malloc (N * sizeof (*dest1));
        dest2 = (int *) shmem_malloc (N * sizeof (*dest2));
        dest3 = (long *) shmem_malloc (N * sizeof (*dest3));
        dest4 = (long double *) shmem_malloc (N * sizeof (*dest4));
        dest5 = (long long *) shmem_malloc (N * sizeof (*dest5));
        dest6 = (double *) shmem_malloc (N * sizeof (*dest6));
        dest7 = (float *) shmem_malloc (N * sizeof (*dest7));
        dest8 = (char *) shmem_malloc (4 * sizeof (*dest8));
        dest9 = (short *) shmem_malloc (sizeof (*dest9));
        dest10 = (int *) shmem_malloc (sizeof (*dest10));
        dest11 = (long *) shmem_malloc (sizeof (*dest11));
        dest12 = (double *) shmem_malloc (sizeof (*dest12));
        dest13 = (float *) shmem_malloc (sizeof (*dest13));

        for (i = 0; i < N; i += 1) {
            dest1[i] = -9;
            dest2[i] = -9;
            dest3[i] = -9;
            dest4[i] = -9;
            dest5[i] = -9;
            dest6[i] = -9;
            dest7[i] = -9.0;
            dest8[i] = -9;
        }
        *dest9 = -9;
        *dest10 = -9;
        *dest11 = -9;
        *dest12 = -9;
        *dest13 = -9.0;

        nextpe = (me + 1) % npes;

        /* Testing shmem_short_put, shmem_int_put, shmem_long_put,
           shmem_longdouble_put, shmem_longlong_put, shmem_double_put,
           shmem_float_put, shmem_putmem */
        shmem_barrier_all ();

        shmem_short_put (dest1, src1, N, nextpe);
        shmem_int_put (dest2, src2, N, nextpe);
        shmem_long_put (dest3, src3, N, nextpe);
        shmem_longdouble_put (dest4, src4, N, nextpe);
        shmem_longlong_put (dest5, src5, N, nextpe);
        shmem_double_put (dest6, src6, N, nextpe);
        shmem_float_put (dest7, src7, N, nextpe);
        shmem_putmem (dest8, src8, N * sizeof (char), nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            for (i = 0; i < N; i += 1) {
                if (dest1[i] != (npes - 1)) {
                    success1 = 1;
                }
                if (dest2[i] != (npes - 1)) {
                    success2 = 1;
                }
                if (dest3[i] != (npes - 1)) {
                    success3 = 1;
                }
                if (dest4[i] != (npes - 1)) {
                    success4 = 1;
                }
                if (dest5[i] != (npes - 1)) {
                    success5 = 1;
                }
                if (dest6[i] != (npes - 1)) {
                    success6 = 1;
                }
                if (dest7[i] != (npes - 1)) {
                    success7 = 1;
                }
                if (dest8[i] != (npes - 1)) {
                    success8 = 1;
                }
            }

            if (success1 == 0)
                printf ("Test shmem_short_put: Passed\n");
            else {
                printf ("Test shmem_short_put: Failed\n");
                fail_count++;
            }
            if (success2 == 0)
                printf ("Test shmem_int_put: Passed\n");
            else {
                printf ("Test shmem_int_put: Failed\n");
                fail_count++;
            }
            if (success3 == 0)
                printf ("Test shmem_long_put: Passed\n");
            else {
                printf ("Test shmem_long_put: Failed\n");
                fail_count++;
            }
            if (success4 == 0)
                printf ("Test shmem_longdouble_put: Passed\n");
            else {
                printf ("Test shmem_longdouble_put: Failed\n");
                fail_count++;
            }
            if (success5 == 0)
                printf ("Test shmem_longlong_put: Passed\n");
            else {
                printf ("Test shmem_longlong_put: Failed\n");
                fail_count++;
            }
            if (success6 == 0)
                printf ("Test shmem_double_put: Passed\n");
            else {
                printf ("Test shmem_double_put: Failed\n");
                fail_count++;
            }
            if (success7 == 0)
                printf ("Test shmem_float_put: Passed\n");
            else {
                printf ("Test shmem_float_put: Failed\n");
                fail_count++;
            }
            if (success8 == 0)
                printf ("Test shmem_putmem: Passed\n");
            else {
                printf ("Test shmem_putmem: Failed\n");
                fail_count++;
            }
        }
        shmem_barrier_all ();

        /* Testing shmem_put32, shmem_put64, shmem_put128 */
        if (sizeof (int) == 4) {
            for (i = 0; i < N; i += 1) {
                dest2[i] = -9;
                dest3[i] = -9;
                dest4[i] = -9;
            }
            success2 = 0;
            success3 = 0;
            success4 = 0;

            shmem_barrier_all ();

            shmem_put32 (dest2, src2, N, nextpe);
            shmem_put64 (dest3, src3, N, nextpe);
            shmem_put128 (dest4, src4, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }
                    if (dest4[i] != (npes - 1)) {
                        success4 = 1;
                    }
                }
                if (success2 == 0)
                    printf ("Test shmem_put32: Passed\n");
                else {
                    printf ("Test shmem_put32: Failed\n");
                    fail_count++;
                }

                if (success3 == 0)
                    printf ("Test shmem_put64: Passed\n");
                else {
                    printf ("Test shmem_put64: Failed\n");
                    fail_count++;
                }

                if (success4 == 0)
                    printf ("Test shmem_put128: Passed\n");
                else {
                    printf ("Test shmem_put128: Failed\n");
                    fail_count++;
                }
            }
        }
        else if (sizeof (int) == 8) {
            for (i = 0; i < N; i += 1) {
                dest1[i] = -9;
                dest2[i] = -9;
                dest3[i] = -9;
            }
            success1 = 0;
            success2 = 0;
            success3 = 0;

            shmem_barrier_all ();

            shmem_put32 (dest1, src1, N, nextpe);
            shmem_put64 (dest2, src2, N, nextpe);
            shmem_put128 (dest3, src3, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest1[i] != (npes - 1)) {
                        success1 = 1;
                    }
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }

                }
                if (success1 == 0)
                    printf ("Test shmem_put32: Passed\n");
                else {
                    printf ("Test shmem_put32: Failed\n");
                    fail_count++;
                }

                if (success2 == 0)
                    printf ("Test shmem_put64: Passed\n");
                else {
                    printf ("Test shmem_put64: Failed\n");
                    fail_count++;
                }

                if (success3 == 0)
                    printf ("Test shmem_put128: Passed\n");
                else {
                    printf ("Test shmem_put128: Failed\n");
                    fail_count++;
                }
            }
        }

        /* Testing shmem_iput32, shmem_iput64, shmem_iput128 */
        shmem_barrier_all ();
        if (sizeof (int) == 4) {
            for (i = 0; i < N; i += 1) {
                dest2[i] = -9;
                dest3[i] = -9;
                dest4[i] = -9;
            }
            success2 = 0;
            success3 = 0;
            success4 = 0;

            shmem_barrier_all ();

            shmem_iput32 (dest2, src2, 1, 2, N, nextpe);
            shmem_iput64 (dest3, src3, 1, 2, N, nextpe);
            shmem_iput128 (dest4, src4, 1, 2, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N / 2; i += 1) {
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }
                    if (dest4[i] != (npes - 1)) {
                        success4 = 1;
                    }
                }
                if (success2 == 0)
                    printf ("Test shmem_iput32: Passed\n");
                else {
                    printf ("Test shmem_iput32: Failed\n");
                    fail_count++;
                }

                if (success3 == 0)
                    printf ("Test shmem_iput64: Passed\n");
                else {
                    printf ("Test shmem_iput64: Failed\n");
                    fail_count++;
                }

                if (success4 == 0)
                    printf ("Test shmem_iput128: Passed\n");
                else {
                    printf ("Test shmem_iput128: Failed\n");
                    fail_count++;
                }
            }
        }
        else if (sizeof (int) == 8) {
            for (i = 0; i < N; i += 1) {
                dest1[i] = -9;
                dest2[i] = -9;
                dest3[i] = -9;
            }
            success1 = 0;
            success2 = 0;
            success3 = 0;

            shmem_barrier_all ();

            shmem_iput32 (dest1, src1, 1, 2, N, nextpe);
            shmem_iput64 (dest2, src2, 1, 2, N, nextpe);
            shmem_iput128 (dest3, src3, 1, 2, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N / 2; i += 1) {
                    if (dest1[i] != (npes - 1)) {
                        success1 = 1;
                    }
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }

                }
                if (success1 == 0)
                    printf ("Test shmem_iput32: Passed\n");
                else {
                    printf ("Test shmem_iput32: Failed\n");
                    fail_count++;
                }

                if (success2 == 0)
                    printf ("Test shmem_iput64: Passed\n");
                else {
                    printf ("Test shmem_iput64: Failed\n");
                    fail_count++;
                }

                if (success3 == 0)
                    printf ("Test shmem_iput128: Passed\n");
                else {
                    printf ("Test shmem_iput128: Failed\n");
                    fail_count++;
                }
            }
        }

        /* Testing shmem_short_iput, shmem_int_iput, shmem_long_iput,
           shmem_double_iput, shmem_float_iput */
        for (i = 0; i < N; i += 1) {
            dest1[i] = -9;
            dest2[i] = -9;
            dest3[i] = -9;
            dest6[i] = -9;
            dest7[i] = -9;
        }
        success1 = 0;
        success2 = 0;
        success3 = 0;
        success6 = 0;
        success7 = 0;

        shmem_barrier_all ();

        shmem_short_iput (dest1, src1, 1, 2, N, nextpe);
        shmem_int_iput (dest2, src2, 1, 2, N, nextpe);
        shmem_long_iput (dest3, src3, 1, 2, N, nextpe);
        shmem_double_iput (dest6, src6, 1, 2, N, nextpe);
        shmem_float_iput (dest7, src7, 1, 2, N, nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            for (i = 0; i < N / 2; i += 1) {
                if (dest1[i] != (npes - 1)) {
                    success1 = 1;
                }
                if (dest2[i] != (npes - 1)) {
                    success2 = 1;
                }
                if (dest3[i] != (npes - 1)) {
                    success3 = 1;
                }
                if (dest6[i] != (npes - 1)) {
                    success6 = 1;
                }
                if (dest7[i] != (npes - 1)) {
                    success7 = 1;
                }
            }

            if (success1 == 0)
                printf ("Test shmem_short_iput: Passed\n");
            else {
                printf ("Test shmem_short_iput: Failed\n");
                fail_count++;
            }
            if (success2 == 0)
                printf ("Test shmem_int_iput: Passed\n");
            else {
                printf ("Test shmem_int_iput: Failed\n");
                fail_count++;
            }
            if (success3 == 0)
                printf ("Test shmem_long_iput: Passed\n");
            else {
                printf ("Test shmem_long_iput: Failed\n");
                fail_count++;
            }
            if (success6 == 0)
                printf ("Test shmem_double_iput: Passed\n");
            else {
                printf ("Test shmem_double_iput: Failed\n");
                fail_count++;
            }
            if (success7 == 0)
                printf ("Test shmem_float_iput: Passed\n");
            else {
                printf ("Test shmem_float_iput: Failed\n");
                fail_count++;
            }
        }


        /* Testing shmem_double_p, shmem_float_p, shmem_int_p, shmem_long_p,
           shmem_short_p */
        shmem_barrier_all ();

        shmem_short_p (dest9, src9, nextpe);
        shmem_int_p (dest10, src10, nextpe);
        shmem_long_p (dest11, src11, nextpe);
        shmem_double_p (dest12, src12, nextpe);
        shmem_float_p (dest13, src13, nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            if (*dest9 == (npes - 1))
                printf ("Test shmem_short_p: Passed\n");
            else {
                printf ("Test shmem_short_p: Failed\n");
                fail_count++;
            }
            if (*dest10 == (npes - 1))
                printf ("Test shmem_int_p: Passed\n");
            else {
                printf ("Test shmem_int_p: Failed\n");
                fail_count++;
            }
            if (*dest11 == (npes - 1))
                printf ("Test shmem_long_p: Passed\n");
            else {
                printf ("Test shmem_long_p: Failed\n");
                fail_count++;
            }
            if (*dest12 == (npes - 1))
                printf ("Test shmem_double_p: Passed\n");
            else {
                printf ("Test shmem_double_p: Failed\n");
                fail_count++;
            }
            if (*dest13 == (npes - 1))
                printf ("Test shmem_float_p: Passed\n");
            else {
                printf ("Test shmem_float_p: Failed\n");
                fail_count++;
            }
        }

        shmem_barrier_all ();

        if (me == 0) {
            if (fail_count == 0)
                printf("All Tests Passed\n");
            else
                printf("%d Tests Failed\n", fail_count);
        }

        shmem_free (dest1);
        shmem_free (dest2);
        shmem_free (dest3);
        shmem_free (dest4);
        shmem_free (dest5);
        shmem_free (dest6);
        shmem_free (dest7);
        shmem_free (dest8);
        shmem_free (dest9);
        shmem_free (dest10);
        shmem_free (dest11);
        shmem_free (dest12);
        shmem_free (dest13);

    }
    else {
        printf ("Number of PEs must be > 1 to test shmem put, test skipped\n");
    }

    shmem_finalize ();

    return 0;
}
示例#24
0
文件: fcollect64.c 项目: caomw/SOS
int
main(int argc, char* argv[])
{
	int c, j, cloop, loops = DFLT_LOOPS;
	int mpe, num_pes;
	int nWords=1;
	int nIncr=1;
	int failures=0;
	char *pgm;

	shmem_init();
	mpe = shmem_my_pe();
	num_pes = shmem_n_pes();

	if (num_pes == 1) {
   		Rfprintf(stderr,
			"ERR - Requires > 1 PEs\n");
		shmem_finalize();
		return 0;
	}
	pgm = strrchr(argv[0],'/');
	if ( pgm )
		pgm++;
	else
		pgm = argv[0];

	while((c=getopt(argc,argv,"hqVvl:")) != -1) {
		switch(c) {
		  case 'V':
		  case 'v':
			Verbose++;
			break;
		  case 'l':
            loops = atoi(optarg);
            break;
		  case 'h':
			Rfprintf(stderr,
                "usage: %s {-l loopcnt(%d)} {numLongs(%d)} {loopIncr(%d)}\n",
                    pgm,DFLT_LOOPS,DFLT_NWORDS,DFLT_INCR);
			shmem_finalize();
			return 1;
		  default:
			shmem_finalize();
			return 1;
		}
	}

	if (optind == argc)
		nWords = DFLT_NWORDS;
	else {
		nWords = atoi_scaled(argv[optind++]);
		if (nWords <= 0) {
    			Rfprintf(stderr, "ERR - Bad nBytes arg?\n");
			shmem_finalize();
			return 1;
		}
	}

	if (optind == argc)
		nIncr = DFLT_INCR;
	else {
		loops = atoi(argv[optind++]);
		if (nIncr <= 0 ) {
   		    Rfprintf(stderr, "ERR - incLongs arg out of bounds '%d'?\n", nIncr);
			shmem_finalize();
			return 1;
		}
	}

    if ( nWords % 8 ) { // integral multiple of longs
	    Rprintf("%s: nWords(%d) not a multiple of %ld?\n",
            pgm,nWords,sizeof(long));
		shmem_finalize();
		return 1;
    }

    for (c = 0; c < SHMEM_COLLECT_SYNC_SIZE;c++)
        pSync[c] = SHMEM_SYNC_VALUE;

    if (Verbose && mpe == 0)
	    fprintf(stderr,"loops(%d) nWords(%d) incr-per-loop(%d)\n",
            loops,nWords,nIncr);

	for(cloop=1; cloop <= loops; cloop++) {

        c = (sizeof(long)*nWords) * (num_pes + 1); // src + dst allocation.
        //nWords /= sizeof(long); // convert input of bytes --> longs.

        src = (long*)shmem_malloc(c);
        if ( !src ) {
	        Rprintf("[%d] %s: shmem_malloc(%d) failed?\n", mpe, pgm,c);
            shmem_global_exit(1);
        }
        dst = &src[nWords];

	    for(j=0; j < nWords; j++)
		    src[j] = (long) (j + mpe*nWords);

		shmem_barrier_all();

        shmem_fcollect64(dst,src,nWords,0,0,num_pes,pSync);

        // Expect dst to be consecuative integers 0 ... (nLongs*num_pes)-1
        for(j=0; j < (nWords*num_pes); j++) {
            if ( dst[j] != (long) j ) {
                fprintf(stderr,
                    "[%d] dst[%d] %ld != expected %d\n",mpe,j,dst[j],j);
                shmem_global_exit(1);
            }
        }
		shmem_barrier_all();

		if (Verbose && mpe == 0 && loops > 1) {
			fprintf(stderr,".");
		}
        nWords += nIncr;
	}

    if (Verbose && mpe == 0) {
	    fprintf(stderr,"\n");fflush(stderr);
    }
    shmem_free( (void*)src );
	shmem_barrier_all();
	if (Verbose)
        printf("%d(%d) Exit(%d)\n", mpe, num_pes, failures);

	shmem_finalize();

	return failures;
}