Ejemplo n.º 1
0
void
flat_tree (STREAM_TYPE * target, STREAM_TYPE * source, int nreduce)
{
  STREAM_TYPE *tmptrg;
  tmptrg = (STREAM_TYPE *) malloc (nreduce * sizeof (STREAM_TYPE));

  // only one PE needs to access this section
  if (_world_rank == 0)
    {
      /* First, finish gathering */
      for (int n = 0; n < _world_size; n++)
	{
	  shmem_getmem (tmptrg, source, nreduce * sizeof (STREAM_TYPE), n);
	  /* Compute max */
	  for (int k = 0; k < nreduce; k++)
	    {
	      tmptrg[k] = REDUCE_MAX (tmptrg[k], source[k]);
	    }
	}
      /* Then, broadcast results */
      for (int n = 0; n < _world_size; n++)
	{
	  shmem_putmem (target, tmptrg, nreduce * sizeof (STREAM_TYPE), n);
	}
    }
  shmem_barrier_all ();

  free (tmptrg);

  return;
}
Ejemplo n.º 2
0
static int test_item1(void)
{
    int rc = TC_PASS;
    int num_proc = 0;
    int my_proc = 0;
    int peer;
    int size;
    char *buf;
    int test_byte;
    int max_heap_size_per_proc;

    num_proc = _num_pes();
    my_proc = _my_pe();
    peer = (my_proc + 1) % num_proc;

    max_heap_size_per_proc = 1L << (sys_log2((memheap_size() * HEAP_USAGE_PERCENT)/ num_proc) - 1);
    max_heap_size_per_proc = (max_heap_size_per_proc > MAX_SIZE) ? MAX_SIZE : max_heap_size_per_proc;
    buf = (char *)shmalloc(max_heap_size_per_proc * num_proc);
    if (!buf)
    {
        log_error(OSH_TC, "shmalloc(%d)\n", max_heap_size_per_proc * num_proc);
        return TC_SETUP_FAIL;
    }

    size = 1L << sys_log2(num_proc);
    size = ((size - 2) > 0) ? size : 4;
    log_debug(OSH_TC, "%d: buf = %p size=%d\n", my_proc, buf, size);
    for (; size <= max_heap_size_per_proc; size *=2)
    {
        memset(buf + max_heap_size_per_proc * my_proc, 1 + my_proc % (size - 2), max_heap_size_per_proc);
        log_debug(OSH_TC, "\n%d: b4 barrier size = %d\n", my_proc, size);
        shmem_barrier_all();
        log_debug(OSH_TC, "%d: b4 putmem size = %d  %p -> %p\n", my_proc, size,
                buf+max_heap_size_per_proc*my_proc, buf + max_heap_size_per_proc * my_proc);
        shmem_putmem(buf+max_heap_size_per_proc*my_proc, buf+max_heap_size_per_proc*my_proc, size, peer);
        shmem_fence();
        test_byte = 0;
        log_debug(OSH_TC, "%d: b4 getmem size = %d\n %p <- %p ", my_proc, size,
                &test_byte,
                buf+max_heap_size_per_proc*peer + size - 1
                );
        shmem_getmem(&test_byte, buf+max_heap_size_per_proc*my_proc + size - 1, 1, peer);

        log_debug(OSH_TC, "%d: after getmem size = %d result=%x\n", my_proc, size, test_byte);
        if (test_byte != 1 + my_proc % (size-2))
        {
            log_error(OSH_TC, "fence failed at size %d got = %x expected = %x\n", size, test_byte, 1 + my_proc % (size-2));
            rc = TC_FAIL;
        }

    }

    shfree(buf);
    log_debug(OSH_TC, rc == TC_PASS? "passed" : "failed");
    return rc;
}
void
streaming_latency(int len, perf_metrics_t *data)
{
    double start = 0.0;
    double end = 0.0;
    int i = 0;

    if (data->my_node == 0) {

        for (i = 0; i < data->trials + data->warmup; i++) {
            if(i == data->warmup)
                start = perf_shmemx_wtime();

            shmem_getmem(data->dest, data->src, len, 1);
        }
        end = perf_shmemx_wtime();

        calc_and_print_results(start, end, len, *data);
    }
} /* latency/bw for one-way trip */
Ejemplo n.º 4
0
void
flat_tree (STREAM_TYPE * target, STREAM_TYPE * source, int nreduce)
{
  STREAM_TYPE *tmptrg;
  STREAM_TYPE *write_to;
  /* use temp target in case source/target overlap/same */
  tmptrg = (STREAM_TYPE *) malloc (nreduce * sizeof (STREAM_TYPE));
  write_to = tmptrg;

  for (int j = 0; j < nreduce; j += 1)
    {
      write_to[j] = source[j];
    }
  shmem_barrier_all ();
  // only one PE needs to access this section
  if (_world_rank == 0)
    {
      /* First, finish gathering */
      for (int n = 0; n < _world_size; n++)
	{
	  shmem_getmem (tmptrg, source, nreduce * sizeof (STREAM_TYPE), n);
	  /* Compute max */
	  for (int k = 0; k < nreduce; k++)
	    {
	      write_to[k] = REDUCE_MAX (write_to[k], source[k]);
	    }
	}
      /* Then, broadcast results */
      for (int n = 0; n < _world_size; n++)
	{
	  shmem_putmem (target, tmptrg, nreduce * sizeof (STREAM_TYPE), n);
	}
    }
  shmem_barrier_all ();

  free (tmptrg);
  tmptrg = NULL;

  return;
}
Ejemplo n.º 5
0
/**
 * \brief Check to make sure the test is correct, SHMEM
 * \param tst Struct that tells the number of cycles and stages to run the test.
 * \param m Struct that holds the results of the test.
 */
void bit_SHMEM_test(test_p tst, measurement_p m) {
#ifdef SHMEM
	buffer_t *abuf, *bbuf, *cbuf;
	int i, j, k, icycle, istage, partner_rank;
	unsigned char pattern;
	abuf = comm_newbuffer(m->buflen);							/* set up exchange buffers */
	bbuf = comm_newbuffer(m->buflen);
	cbuf = comm_newbuffer(m->buflen);
	for (icycle = 0; icycle < tst->num_cycles; icycle++) {					/* multiple cycles repeat the test */
		for (istage = 0; istage < tst->num_stages; istage++) {				/* step through the stage schedule */
			partner_rank = my_rank ^ istage;					/* who's my partner for this stage? */
			shmem_barrier_all();
			if ((partner_rank < num_ranks) && (partner_rank != my_rank)) {		/* valid pair? proceed with test */
				for (k=0x00; k< 0x100; k++) {		/* try each byte patter */
					pattern=k;
					for (i=0; i<m->buflen; i++) ((unsigned char *)(abuf->data))[i]=pattern;
					shmem_putmem(bbuf->data, abuf->data, m->buflen, partner_rank);
					shmem_fence();
					shmem_getmem(cbuf->data, bbuf->data, m->buflen, partner_rank);
					for (i=0; i<m->buflen; i++) {
						if (((unsigned char *)(cbuf->data))[i] != pattern) {
							printf("DATA ERROR DETECTED:   node:%20s   rank:%10d"
								"   pattern:0x%2x   buflen:%10d   position:%10d\n",
								nodename, my_rank, (int)pattern, m->buflen, i);
						} /* if mismatch */
					} /* for buflen */
				} /* for pattern */
			} /* if valid pairing */
		} /* for istage */
	} /* for icycle */
	shmem_barrier_all();
	comm_freebuffer(cbuf);
	comm_freebuffer(bbuf);
	comm_freebuffer(abuf);
#endif
	return;
}
Ejemplo n.º 6
0
void
FORTRANIFY (shmem_getmem) (void *target, const void *source, int *size, int *pe)
{
    shmem_getmem (target, source, *size, *pe);
}
int
main(int argc, char **argv)
{
  int i,j;
  int nextpe;
  int me, npes;
  int success1,success2,success3, success4, success5, success6, success7, success8;

  short dest1[N];
  int dest2[N];
  long dest3[N];
  long double dest4[N];
  long long dest5[N];
  double dest6[N];
  float dest7[N];
  char *dest8;
  short dest9;
  int dest10;
  long dest11;
  double dest12;
  float dest13;

  short *src1;
  int *src2;
  long *src3;
  long double *src4;
  long long *src5;
  double *src6;
  float *src7;
  char *src8;
  short *src9;
  int *src10;
  long *src11;
  double *src12;
  float *src13;


  start_pes(0);
  me = _my_pe();
  npes = _num_pes();

  if(npes>1){

    success1 =0;
    success2 =0;
    success3 =0;
    success4 =0;
    success5 =0;
    success6 =0;
    success7 =0;
    success8 =0;
    dest8 = (char *)malloc(N*sizeof(char));

    for (i = 0; i < N; i += 1) {
      dest1[i] = -9;
      dest2[i] = -9;
      dest3[i] = -9;
      dest4[i] = -9;
      dest5[i] = -9;
      dest6[i] = -9;
      dest7[i] = -9.0;
      dest8[i] = -9;
    }
    dest9 = -9;
    dest10 = -9;
    dest11 = -9;
    dest12 = -9;
    dest13 = -9;


    src1 = (short *)shmalloc( N * sizeof(*src1) );
    src2 = (int *)shmalloc( N * sizeof(*src2) );
    src3 = (long *)shmalloc( N * sizeof(*src3) );
    src4 = (long double *)shmalloc( N * sizeof(*src4) );
    src5 = (long long*)shmalloc( N * sizeof(*src5) );
    src6 = (double *)shmalloc( N * sizeof(*src6) );
    src7 = (float *)shmalloc( N * sizeof(*src7) );
    src8 = (char *)shmalloc( 4 * sizeof(*src8) );
    src9 = (short *)shmalloc( sizeof(*src9) );
    src10 = (int *)shmalloc( sizeof(*src10) );
    src11 = (long *)shmalloc( sizeof(*src11) );
    src12 = (double *)shmalloc( sizeof(*src12) );
    src13 = (float *)shmalloc( sizeof(*src13) );

    for (i = 0; i < N; i += 1) {
      src1[i] = (short)me;
      src2[i] = me;
      src3[i] = (long)me;
      src4[i] = (long double)me;
      src5[i] = (long long)me;
      src6[i] = (double)me;
      src7[i] = (float)me;
      src8[i] = (char)me;
    }
    *src9 = (short)me;
    *src10 = me;
    *src11 = (long)me;
    *src12 = (double)me;
    *src13 = (float)me;



    nextpe = (me + 1) % npes;

    /*Testing shmem_short_get, shmem_short_get, shmem_int_get, shmem_long_get, shmem_longdouble_get, shmem_longlong_get, shmem_double_get, shmem_float_get, shmem_getmem*/
    shmem_barrier_all();

    shmem_short_get(dest1, src1, N, nextpe);
    shmem_int_get(dest2, src2, N, nextpe);
    shmem_long_get(dest3, src3, N, nextpe);
    shmem_longdouble_get(dest4, src4, N, nextpe);
    shmem_longlong_get(dest5, src5, N, nextpe);
    shmem_double_get(dest6, src6, N, nextpe);
    shmem_float_get(dest7, src7, N, nextpe);
    shmem_getmem(dest8, src8, N*sizeof(char), nextpe);

    shmem_barrier_all();

    if(me == 0){
      for (i = 0; i < N; i += 1) {
        if(dest1[i] != ( 1)){
          success1=1;
        }
        if(dest2[i] != ( 1)){
          success2=1;
        }
        if(dest3[i] != ( 1)){
          success3=1;
        }
        if(dest4[i] != ( 1)){
          success4=1;
        }
        if(dest5[i] != ( 1)){
          success5=1;
        }
        if(dest6[i] != ( 1)){
          success6=1;
        }
        if(dest7[i] != ( 1)){
          success7=1;
        }
        if(dest8[i] != ( 1)){
          success8=1;
        }
      }

      if(success1==0)
        printf("Test shmem_short_get: Passed\n");  
      else
        printf("Test shmem_short_get: Failed\n");
      if(success2==0)
        printf("Test shmem_int_get: Passed\n");  
      else
        printf("Test shmem_int_get: Failed\n");
      if(success3==0)
        printf("Test shmem_long_get: Passed\n");  
      else
        printf("Test shmem_long_get: Failed\n");
      if(success4==0)
        printf("Test shmem_longdouble_get: Passed\n");  
      else
        printf("Test shmem_longdouble_get: Failed\n");
      if(success5==0)
        printf("Test shmem_longlong_get: Passed\n");  
      else
        printf("Test shmem_longlong_get: Failed\n");
      if(success6==0)
        printf("Test shmem_double_get: Passed\n");  
      else
        printf("Test shmem_double_get: Failed\n");
      if(success7==0)
        printf("Test shmem_float_get: Passed\n");  
      else
        printf("Test shmem_float_get: Failed\n");
      if(success8==0)
        printf("Test shmem_getmem: Passed\n");  
      else
        printf("Test shmem_getmem: Failed\n");

    }
    shmem_barrier_all();

    /*Testing shmem_get32, shmem_get64, shmem_get128 */
    if(sizeof(int)==4){
      for (i = 0; i < N; i += 1) {
        dest2[i] = -9;
        dest3[i] = -9;
        dest4[i] = -9;
      }
      success2 = 0;
      success3 = 0;
      success4 = 0;

      shmem_barrier_all();

      shmem_get32(dest2, src2, N, nextpe);
      shmem_get64(dest3, src3, N, nextpe);
      shmem_get128(dest4, src4, N, nextpe);

      shmem_barrier_all();

      if(me == 0){
        for (i = 0; i < N; i += 1) {
          if(dest2[i] != ( 1)){
            success2=1;
          }
          if(dest3[i] != ( 1)){
            success3=1;
          }
          if(dest4[i] != ( 1)){
            success4=1;
          }
        }
        if(success2==0)
          printf("Test shmem_get32: Passed\n");  
        else
          printf("Test shmem_get32: Failed\n");

        if(success3==0)
          printf("Test shmem_get64: Passed\n");  
        else
          printf("Test shmem_get64: Failed\n");

        if(success4==0)
          printf("Test shmem_get128: Passed\n");  
        else
          printf("Test shmem_get128: Failed\n");
      }
    }
    else if(sizeof(int)==8){
      for (i = 0; i < N; i += 1) {
        dest1[i] = -9;
        dest2[i] = -9;
        dest3[i] = -9;
      }
      success1 = 0;
      success2 = 0;
      success3 = 0;

      shmem_barrier_all();

      shmem_get32(dest1, src1, N, nextpe);
      shmem_get64(dest2, src2, N, nextpe);
      shmem_get128(dest3, src3, N, nextpe);

      shmem_barrier_all();

      if(me == 0){
        for (i = 0; i < N; i += 1) {
          if(dest1[i] != ( 1)){
            success1=1;
          }
          if(dest2[i] != ( 1)){
            success2=1;
          }
          if(dest3[i] != ( 1)){
            success3=1;
          }

        }
        if(success1==0)
          printf("Test shmem_get32: Passed\n");  
        else
          printf("Test shmem_get32: Failed\n");
        if(success2==0)
          printf("Test shmem_get64: Passed\n");  
        else
          printf("Test shmem_get64: Failed\n");

        if(success3==0)
          printf("Test shmem_get128: Passed\n");  
        else
          printf("Test shmem_get128: Failed\n");	
      }
    }	
	
	/* Testing shmem_iget32, shmem_iget64, shmem_iget128 */
	shmem_barrier_all();
	if(sizeof(int)==4){
      for (i = 0; i < N; i += 1) {
        dest2[i] = -9;
        dest3[i] = -9;
        dest4[i] = -9;
      }
      success2 = 0;
      success3 = 0;
      success4 = 0;

      shmem_barrier_all();

      shmem_iget32(dest2, src2, 1, 2, N/2, npes-1);
      shmem_iget64(dest3, src3, 1, 2, N/2, npes-1);
      shmem_iget128(dest4, src4, 1, 2, N/2, npes-1);

      shmem_barrier_all();

      if(me == 0){
        for (i = 0; i < N/2; i += 1) {
          if(dest2[i] != (npes-1)){
            success2=1;
          }
          if(dest3[i] != (npes-1)){
            success3=1;
          }
          if(dest4[i] != (npes-1)){
            success4=1;
          }
        }
        if(success2==0)
          printf("Test shmem_iget32: Passed\n");  
        else
          printf("Test shmem_iget32: Failed\n");

        if(success3==0)
          printf("Test shmem_iget64: Passed\n");  
        else
          printf("Test shmem_iget64: Failed\n");

        if(success4==0)
          printf("Test shmem_iget128: Passed\n");  
        else
          printf("Test shmem_iget128: Failed\n");
      }
    }
    else if(sizeof(int)==8){
      for (i = 0; i < N; i += 1) {
        dest1[i] = -9;
        dest2[i] = -9;
        dest3[i] = -9;
      }
      success1 = 0;
      success2 = 0;
      success3 = 0;

      shmem_barrier_all();

      shmem_iget32(dest1, src1, 1, 2, N/2, npes-1);
      shmem_iget64(dest2, src2, 1, 2, N/2, npes-1);
      shmem_iget128(dest3, src3, 1, 2, N/2, npes-1);

      shmem_barrier_all();

      if(me == 0){
        for (i = 0; i < N/2; i += 1) {
          if(dest1[i] != (npes-1)){
            success1=1;
          }
          if(dest2[i] != (npes-1)){
            success2=1;
          }
          if(dest3[i] != (npes-1)){
            success3=1;
          }

        }
        if(success1==0)
          printf("Test shmem_iget32: Passed\n");  
        else
          printf("Test shmem_iget32: Failed\n");
        if(success2==0)
          printf("Test shmem_iget64: Passed\n");  
        else
          printf("Test shmem_iget64: Failed\n");

        if(success3==0)
          printf("Test shmem_iget128: Passed\n");  
        else
          printf("Test shmem_iget128: Failed\n");	
      }
    }	
	
	/*Testing shmem_short_iget, shmem_int_iget, shmem_long_iget, shmem_double_iget, shmem_float_iget */
	for (i = 0; i < N; i += 1) {
	    dest1[i] = -9;
        dest2[i] = -9;
        dest3[i] = -9;
        dest6[i] = -9;
		dest7[i] = -9;
      }
      success1 = 0;
      success2 = 0;
      success3 = 0;
	  success6 = 0;
      success7 = 0;
      
    shmem_barrier_all();

    shmem_short_iget(dest1, src1, 1, 2, N/2, npes-1);
    shmem_int_iget(dest2, src2, 1, 2, N/2, npes-1);
    shmem_long_iget(dest3, src3, 1, 2, N/2, npes-1);
    shmem_double_iget(dest6, src6, 1, 2, N/2, npes-1);
    shmem_float_iget(dest7, src7, 1, 2, N/2, npes-1);
    
    shmem_barrier_all();

    if(me == 0){
      for (i = 0; i < N/2; i += 1) {
        if(dest1[i] != (npes-1)){
          success1=1;
        }
        if(dest2[i] != (npes-1)){
          success2=1;
        }
        if(dest3[i] != (npes-1)){
          success3=1;
        }
        if(dest6[i] != (npes-1)){
          success6=1;
        }
        if(dest7[i] != (npes-1)){
          success7=1;
        }
      }

      if(success1==0)
        printf("Test shmem_short_iget: Passed\n");  
      else
        printf("Test shmem_short_iget: Failed\n");
      if(success2==0)
        printf("Test shmem_int_iget: Passed\n");  
      else
        printf("Test shmem_int_iget: Failed\n");
      if(success3==0)
        printf("Test shmem_long_iget: Passed\n");  
      else
        printf("Test shmem_long_iget: Failed\n");
      if(success6==0)
        printf("Test shmem_double_iget: Passed\n");  
      else
        printf("Test shmem_double_iget: Failed\n");
      if(success7==0)
        printf("Test shmem_float_iget: Passed\n");  
      else
        printf("Test shmem_float_iget: Failed\n");
      
    }
   


    /* Testing shmem_double_g, shmem_float_g, shmem_int_g, shmem_long_g, shmem_short_g */
    shmem_barrier_all();

    dest9 = shmem_short_g(src9, nextpe);
    dest10 = shmem_int_g(src10, nextpe);
    dest11 = shmem_long_g(src11, nextpe);
    dest12 = shmem_double_g(src12, nextpe);
    dest13 = shmem_float_g(src13, nextpe);

    shmem_barrier_all();

    if(me == 0){
      if(dest9 == 1)
        printf("Test shmem_short_g: Passed\n");  
      else
        printf("Test shmem_short_g: Failed\n");
      if(dest10 == 1)
        printf("Test shmem_int_g: Passed\n");  
      else
        printf("Test shmem_int_g: Failed\n");
      if(dest11 == 1)
        printf("Test shmem_long_g: Passed\n");  
      else
        printf("Test shmem_long_g: Failed\n");
      if(dest12 == 1)
        printf("Test shmem_double_g: Passed\n");  
      else
        printf("Test shmem_double_g: Failed\n");
      if(dest13 == 1)
        printf("Test shmem_float_g: Passed\n");  
      else
        printf("Test shmem_float_g: Failed\n");


    }

    shmem_barrier_all();


    shfree(src1);
    shfree(src2);
    shfree(src3);
    shfree(src4);
    shfree(src5);
    shfree(src6);
    shfree(src7);
    shfree(src8);
  }
  else{
    printf("Number of PEs must be > 1 to test shmem get, test skipped\n");
  }
  return 0;
}
Ejemplo n.º 8
0
int main(int argc, char *argv[])
{
    int myid, numprocs, i;
    int size;
    char *s_buf, *r_buf;
    char *s_buf_heap, *r_buf_heap;
    int align_size;
    int64_t t_start = 0, t_end = 0;
    int use_heap = 0;   //default uses global

    start_pes(0);
    myid = _my_pe();
    numprocs = _num_pes();

    if(numprocs != 2) {
        if(myid == 0) {
            fprintf(stderr, "This test requires exactly two processes\n");
        }

        return EXIT_FAILURE;
    }

    if(argc != 2) {
        usage(myid);

        return EXIT_FAILURE;
    }

    if(0 == strncmp(argv[1], "heap", strlen("heap"))){
        use_heap = 1;
    } else if(0 == strncmp(argv[1], "global", strlen("global"))){
        use_heap = 0;
    } else {
        usage(myid);
        return EXIT_FAILURE;
    }

    align_size = MESSAGE_ALIGNMENT;

    /**************Allocating Memory*********************/

    if(use_heap){

        s_buf_heap = shmalloc(MYBUFSIZE);
        r_buf_heap = shmalloc(MYBUFSIZE);

        s_buf =
            (char *) (((unsigned long) s_buf_heap + (align_size - 1)) /
                      align_size * align_size);

        r_buf =
            (char *) (((unsigned long) r_buf_heap + (align_size - 1)) /
                      align_size * align_size);
    } else {

        s_buf =
            (char *) (((unsigned long) s_buf_original + (align_size - 1)) /
                      align_size * align_size);

        r_buf =
            (char *) (((unsigned long) r_buf_original + (align_size - 1)) /
                      align_size * align_size);
    }

    /**************Memory Allocation Done*********************/

    if(myid == 0) {
        fprintf(stdout, HEADER);
        fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)");
        fflush(stdout);
    }

    for(size = 1; size <= MAX_MSG_SIZE; size = (size ? size * 2 : 1)) {
        
        /* touch the data */
        for(i = 0; i < size; i++) {
            s_buf[i] = 'a';
            r_buf[i] = 'b';
        }

        if(size > large_message_size) {
            loop = loop_large = 100;
            skip = skip_large = 0;
        }

        shmem_barrier_all();

        if(myid == 0) 
            {
                for(i = 0; i < loop + skip; i++) {
                    if(i == skip) t_start = TIME();

                    shmem_getmem(r_buf, s_buf, size, 1);
                }

                t_end = TIME();
            }
        shmem_barrier_all();

        if(myid == 0) {
            double latency = (1.0 * (t_end-t_start)) / loop;

            fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH,
                    FLOAT_PRECISION, latency);
            fflush(stdout);
        }
    }

    shmem_barrier_all();

    if(use_heap){
        shfree(s_buf_heap);
        shfree(r_buf_heap);
    }

    shmem_barrier_all();
    return EXIT_SUCCESS;
}
Ejemplo n.º 9
0
int
main (int argc, char **argv)
{
    int i;
    int nextpe;
    int me, npes;
    int success1, success2, success3, success4, success5, success6, success7,
        success8;

    short dest1[N];
    int dest2[N];
    long dest3[N];
    long double dest4[N];
    long long dest5[N];
    double dest6[N];
    float dest7[N];
    char *dest8;
    short dest9;
    int dest10;
    long dest11;
    double dest12;
    float dest13;

    int fail_count = 0;

    shmem_init ();
    me = shmem_my_pe ();
    npes = shmem_n_pes ();

    if (npes > 1) {

        success1 = 0;
        success2 = 0;
        success3 = 0;
        success4 = 0;
        success5 = 0;
        success6 = 0;
        success7 = 0;
        success8 = 0;
        dest8 = (char *) malloc (N * sizeof (char));

        for (i = 0; i < N; i += 1) {
            dest1[i] = -9;
            dest2[i] = -9;
            dest3[i] = -9;
            dest4[i] = -9;
            dest5[i] = -9;
            dest6[i] = -9;
            dest7[i] = -9.0;
            dest8[i] = -9;
        }
        dest9 = -9;
        dest10 = -9;
        dest11 = -9;
        dest12 = -9;
        dest13 = -9;

        for (i = 0; i < N; i += 1) {
            src1[i] = (short) me;
            src2[i] = me;
            src3[i] = (long) me;
            src4[i] = (long double) me;
            src5[i] = (long long) me;
            src6[i] = (double) me;
            src7[i] = (float) me;
            src8[i] = (char) me;
        }
        src9 = (short) me;
        src10 = me;
        src11 = (long) me;
        src12 = (double) me;
        src13 = (float) me;



        nextpe = (me + 1) % npes;

        /* Testing shmem_short_get, shmem_short_get, shmem_int_get,
           shmem_long_get, shmem_longdouble_get, shmem_longlong_get,
           shmem_double_get, shmem_float_get, shmem_getmem */
        shmem_barrier_all ();

        shmem_short_get (dest1, src1, N, nextpe);
        shmem_int_get (dest2, src2, N, nextpe);
        shmem_long_get (dest3, src3, N, nextpe);
        shmem_longdouble_get (dest4, src4, N, nextpe);
        shmem_longlong_get (dest5, src5, N, nextpe);
        shmem_double_get (dest6, src6, N, nextpe);
        shmem_float_get (dest7, src7, N, nextpe);
        shmem_getmem (dest8, src8, N * sizeof (char), nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            for (i = 0; i < N; i += 1) {
                if (dest1[i] != (1)) {
                    success1 = 1;
                }
                if (dest2[i] != (1)) {
                    success2 = 1;
                }
                if (dest3[i] != (1)) {
                    success3 = 1;
                }
                if (dest4[i] != (1)) {
                    success4 = 1;
                }
                if (dest5[i] != (1)) {
                    success5 = 1;
                }
                if (dest6[i] != (1)) {
                    success6 = 1;
                }
                if (dest7[i] != (1)) {
                    success7 = 1;
                }
                if (dest8[i] != (1)) {
                    success8 = 1;
                }
            }

            if (success1 == 0)
                printf ("Test shmem_short_get: Passed\n");
            else {
                printf ("Test shmem_short_get: Failed\n");
		fail_count++;
	    }
            if (success2 == 0)
                printf ("Test shmem_int_get: Passed\n");
            else {
                printf ("Test shmem_int_get: Failed\n");
		fail_count++;
	    }
            if (success3 == 0)
                printf ("Test shmem_long_get: Passed\n");
            else {
                printf ("Test shmem_long_get: Failed\n");
		fail_count++;
	    }
            if (success4 == 0)
                printf ("Test shmem_longdouble_get: Passed\n");
            else {
                printf ("Test shmem_longdouble_get: Failed\n");
		fail_count++;
	    }
            if (success5 == 0)
                printf ("Test shmem_longlong_get: Passed\n");
            else {
                printf ("Test shmem_longlong_get: Failed\n");
		fail_count++;
	    }
            if (success6 == 0)
                printf ("Test shmem_double_get: Passed\n");
            else {
                printf ("Test shmem_double_get: Failed\n");
		fail_count++;
	    }
            if (success7 == 0)
                printf ("Test shmem_float_get: Passed\n");
            else {
                printf ("Test shmem_float_get: Failed\n");
		fail_count++;
	    }
            if (success8 == 0)
                printf ("Test shmem_getmem: Passed\n");
            else {
                printf ("Test shmem_getmem: Failed\n");
		fail_count++;
	    }
        }
        shmem_barrier_all ();

        /* Testing shmem_get32, shmem_get64, shmem_get128 */
        if (sizeof (int) == 4) {
            for (i = 0; i < N; i += 1) {
                dest2[i] = -9;
                dest3[i] = -9;
                dest4[i] = -9;
            }
            success2 = 0;
            success3 = 0;
            success4 = 0;

            shmem_barrier_all ();

            shmem_get32 (dest2, src2, N, nextpe);
            shmem_get64 (dest3, src3, N, nextpe);
            shmem_get128 (dest4, src4, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest2[i] != (1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (1)) {
                        success3 = 1;
                    }
                    if (dest4[i] != (1)) {
                        success4 = 1;
                    }
                }
                if (success2 == 0)
                    printf ("Test shmem_get32: Passed\n");
                else {
                    printf ("Test shmem_get32: Failed\n");
		    fail_count++;
		}

                if (success3 == 0)
                    printf ("Test shmem_get64: Passed\n");
                else {
                    printf ("Test shmem_get64: Failed\n");
		    fail_count++;
		}

                if (success4 == 0)
                    printf ("Test shmem_get128: Passed\n");
                else {
                    printf ("Test shmem_get128: Failed\n");
		    fail_count++;
		}
            }
        }
        else if (sizeof (int) == 8) {
            for (i = 0; i < N; i += 1) {
                dest1[i] = -9;
                dest2[i] = -9;
                dest3[i] = -9;
            }
            success1 = 0;
            success2 = 0;
            success3 = 0;

            shmem_barrier_all ();

            shmem_get32 (dest1, src1, N, nextpe);
            shmem_get64 (dest2, src2, N, nextpe);
            shmem_get128 (dest3, src3, N, nextpe);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N; i += 1) {
                    if (dest1[i] != (1)) {
                        success1 = 1;
                    }
                    if (dest2[i] != (1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (1)) {
                        success3 = 1;
                    }

                }
                if (success1 == 0)
                    printf ("Test shmem_get32: Passed\n");
                else {
                    printf ("Test shmem_get32: Failed\n");
		    fail_count++;
		}

                if (success2 == 0)
                    printf ("Test shmem_get64: Passed\n");
                else {
                    printf ("Test shmem_get64: Failed\n");
		    fail_count++;
		}

                if (success3 == 0)
                    printf ("Test shmem_get128: Passed\n");
                else {
                    printf ("Test shmem_get128: Failed\n");
		    fail_count++;
		}
            }
        }
        /* Testing shmem_iget32, shmem_iget64, shmem_iget128 */
        shmem_barrier_all ();
        if (sizeof (int) == 4) {
            for (i = 0; i < N; i += 1) {
                dest2[i] = -9;
                dest3[i] = -9;
                dest4[i] = -9;
            }
            success2 = 0;
            success3 = 0;
            success4 = 0;

            shmem_barrier_all ();

            shmem_iget32 (dest2, src2, 1, 2, N / 2, npes - 1);
            shmem_iget64 (dest3, src3, 1, 2, N / 2, npes - 1);
            shmem_iget128 (dest4, src4, 1, 2, N / 2, npes - 1);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N / 2; i += 1) {
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }
                    if (dest4[i] != (npes - 1)) {
                        success4 = 1;
                    }
                }
                if (success2 == 0)
                    printf ("Test shmem_iget32: Passed\n");
                else {
                    printf ("Test shmem_iget32: Failed\n");
		    fail_count++;
		}

                if (success3 == 0)
                    printf ("Test shmem_iget64: Passed\n");
                else {
                    printf ("Test shmem_iget64: Failed\n");
		    fail_count++;
		}

                if (success4 == 0)
                    printf ("Test shmem_iget128: Passed\n");
                else {
                    printf ("Test shmem_iget128: Failed\n");
		    fail_count++;
		}
            }
        }
        else if (sizeof (int) == 8) {
            for (i = 0; i < N; i += 1) {
                dest1[i] = -9;
                dest2[i] = -9;
                dest3[i] = -9;
            }
            success1 = 0;
            success2 = 0;
            success3 = 0;

            shmem_barrier_all ();

            shmem_iget32 (dest1, src1, 1, 2, N / 2, npes - 1);
            shmem_iget64 (dest2, src2, 1, 2, N / 2, npes - 1);
            shmem_iget128 (dest3, src3, 1, 2, N / 2, npes - 1);

            shmem_barrier_all ();

            if (me == 0) {
                for (i = 0; i < N / 2; i += 1) {
                    if (dest1[i] != (npes - 1)) {
                        success1 = 1;
                    }
                    if (dest2[i] != (npes - 1)) {
                        success2 = 1;
                    }
                    if (dest3[i] != (npes - 1)) {
                        success3 = 1;
                    }

                }
                if (success1 == 0)
                    printf ("Test shmem_iget32: Passed\n");
                else {
                    printf ("Test shmem_iget32: Failed\n");
		    fail_count++;
		}

                if (success2 == 0)
                    printf ("Test shmem_iget64: Passed\n");
                else {
                    printf ("Test shmem_iget64: Failed\n");
		    fail_count++;
		}

                if (success3 == 0)
                    printf ("Test shmem_iget128: Passed\n");
                else {
                    printf ("Test shmem_iget128: Failed\n");
		    fail_count++;
		}
            }
        }

        /* Testing shmem_short_iget, shmem_int_iget, shmem_long_iget,
           shmem_double_iget, shmem_float_iget */
        for (i = 0; i < N; i += 1) {
            dest1[i] = -9;
            dest2[i] = -9;
            dest3[i] = -9;
            dest6[i] = -9;
            dest7[i] = -9;
        }
        success1 = 0;
        success2 = 0;
        success3 = 0;
        success6 = 0;
        success7 = 0;

        shmem_barrier_all ();

        shmem_short_iget (dest1, src1, 1, 2, N / 2, npes - 1);
        shmem_int_iget (dest2, src2, 1, 2, N / 2, npes - 1);
        shmem_long_iget (dest3, src3, 1, 2, N / 2, npes - 1);
        shmem_double_iget (dest6, src6, 1, 2, N / 2, npes - 1);
        shmem_float_iget (dest7, src7, 1, 2, N / 2, npes - 1);

        shmem_barrier_all ();

        if (me == 0) {
            for (i = 0; i < N / 2; i += 1) {
                if (dest1[i] != (npes - 1)) {
                    success1 = 1;
                }
                if (dest2[i] != (npes - 1)) {
                    success2 = 1;
                }
                if (dest3[i] != (npes - 1)) {
                    success3 = 1;
                }
                if (dest6[i] != (npes - 1)) {
                    success6 = 1;
                }
                if (dest7[i] != (npes - 1)) {
                    success7 = 1;
                }
            }

            if (success1 == 0)
                printf ("Test shmem_short_iget: Passed\n");
            else {
                printf ("Test shmem_short_iget: Failed\n");
		fail_count++;
	    }
            if (success2 == 0)
                printf ("Test shmem_int_iget: Passed\n");
            else {
                printf ("Test shmem_int_iget: Failed\n");
		fail_count++;
	    }
            if (success3 == 0)
                printf ("Test shmem_long_iget: Passed\n");
            else {
                printf ("Test shmem_long_iget: Failed\n");
		fail_count++;
	    }
            if (success6 == 0)
                printf ("Test shmem_double_iget: Passed\n");
            else {
                printf ("Test shmem_double_iget: Failed\n");
		fail_count++;
	    }
            if (success7 == 0)
                printf ("Test shmem_float_iget: Passed\n");
            else {
                printf ("Test shmem_float_iget: Failed\n");
		fail_count++;
	    }
        }

        /* Testing shmem_double_g, shmem_float_g, shmem_int_g, shmem_long_g,
           shmem_short_g */
        shmem_barrier_all ();

        dest9 = shmem_short_g (&src9, nextpe);
        dest10 = shmem_int_g (&src10, nextpe);
        dest11 = shmem_long_g (&src11, nextpe);
        dest12 = shmem_double_g (&src12, nextpe);
        dest13 = shmem_float_g (&src13, nextpe);

        shmem_barrier_all ();

        if (me == 0) {
            if (dest9 == 1)
                printf ("Test shmem_short_g: Passed\n");
            else {
                printf ("Test shmem_short_g: Failed\n");
		fail_count++;
	    }
            if (dest10 == 1)
                printf ("Test shmem_int_g: Passed\n");
            else {
                printf ("Test shmem_int_g: Failed\n");
		fail_count++;
	    }
            if (dest11 == 1)
                printf ("Test shmem_long_g: Passed\n");
            else {
                printf ("Test shmem_long_g: Failed\n");
		fail_count++;
	    }
            if (dest12 == 1)
                printf ("Test shmem_double_g: Passed\n");
            else {
                printf ("Test shmem_double_g: Failed\n");
		fail_count++;
	    }
            if (dest13 == 1)
                printf ("Test shmem_float_g: Passed\n");
            else {
                printf ("Test shmem_float_g: Failed\n");
		fail_count++;
	    }
        }

        shmem_barrier_all ();

        if (me == 0) {
	    if (fail_count == 0)
	        printf("All Tests Passed\n");
	    else
	        printf("%d Tests Failed\n", fail_count);
	}
    }
    else {
        printf ("Number of PEs must be > 1 to test shmem get, test skipped\n");
    }

    shmem_finalize ();

    return 0;
}