예제 #1
0
glio_arena_t *
_glio_arena_create(
	glio_group_t	*gg,		/* group of processes to share access */
	size_t		asize)		/* arena size (bytes per process) */
{
	char	*fname, *fncpy;
	int	fd;
	int	groupsz;
	int	namelen;
	int	myrank;
	size_t	arena_size;
	glio_arena_t	*arp;
	shmem_group_t	shg;
	void	*aret;
	MPI_Comm	comm;
	MPI_Status	mpistatus;

	groupsz	= gg->groupsz;
	myrank	= gg->myrank;

	arp	= malloc(sizeof(*arp));
	if (arp == NULL) {
                fprintf(stderr,"%s:\n\
    _glio_arena_create(a) could not allocate a memory object of size %lld bytes\n",
			GLOBERRMSG, (long long)sizeof(*arp));
		abort();
	}
	bzero(arp, sizeof(*arp));

	arp->grp	= *gg;		/* copy it */
	gg		= &arp->grp;	/* point to the new copy */

	/* 
	 * Process with rank 0 finds a unique new file name to use as
	 * a memory mapped file.
	 */
	if (myrank == 0) {

		fname	= NULL;

		do {
			if (fname != NULL)
				free(fname);
			fname	= tempnam(NULL, "glio_arena");
			assert(fname != NULL);
			fd	= open(fname, O_CREAT | O_EXCL | O_RDWR, 0700);
		} while (fd == -1 && errno == EEXIST);
	}
	

	/*
	 * Trivial groups of size 1 can be handled trivially.
	 */
	if (groupsz == 1)
		goto past_file_name_send;

	_glio_barrier(arp);

	/*
	 * Initialization
	 */
	switch (gg->grtype) {
	    case GR_SHMEM:
		if ( _shmem_group_inquire != NULL) {
			_shmem_group_inquire(arp->grp.u.shmem.group, &shg);
		} else {
		    /* Special case for pre-release versions of MPT 1.2 */
		    static int	*world_plist;
		    static int	*world_racom;

		    /* if pre-release version of MPT 1.2 is used, then all */
		    /* PEs are in the group */
		    assert (groupsz == _num_pes());

                    if (world_plist == NULL) {
			register short	ipe;

                        world_plist	= malloc(_num_pes() * sizeof(int));
			if (world_plist == NULL) {
                		fprintf(stderr,"%s:\n\
    _glio_arena_create(b) could not allocate a memory object of size %lld bytes\n",
					GLOBERRMSG, (long long)(_num_pes() *
					sizeof(int)));
				abort();
			}

			world_racom	= shmalloc(SHMEM_GROUP_COM_SIZE *
					sizeof(int));
                        assert(world_racom != NULL);
			bzero(world_racom, 10*sizeof(int));

                        for (ipe = 0; ipe < _num_pes(); ipe++)
                                world_plist[ipe]	= ipe;
                    }

                    shg.groupsz	= _num_pes();
                    shg.myrank	= _my_pe();
		    shg.plist	= world_plist;
		    shg.racom	= world_racom;

		}
		break;

	    case GR_MPI:
		comm	= arp->grp.u.MPI.comm;
		break;

	    default:
		break;
	}

	/*
	 * Process 0 now must communicate the file name to all other 
	 * processes in the group.
	 */

	switch (gg->grtype) {
	    case GR_SHMEM:
		if (myrank == 0) {
			void	*vp;

			fncpy	= _sma_global_heap_alloc(strlen(fname)+1);
			assert(fncpy != NULL);
			strcpy(fncpy, fname); 

			vp	= fncpy;
			
			/* racom[1] gets string length */
			shg.racom[1]	= strlen(fname);

			/* racom[2]  and racom[3] get the pointer */
			/* to the string.			    */
			memcpy(&shg.racom[2], &vp, sizeof(vp));
		}

		_glio_barrier(arp);
			
		/* 
		 * Other processes now get the file name.
		 */
		if (myrank != 0) {
			void	*vp;

			namelen	= _shmem_int_g( &shg.racom[1], shg.plist[0]);
			assert(namelen > 0);

			/* get pointer to the string */
			_shmem_getmem(&vp, &shg.racom[2], sizeof(vp),
				shg.plist[0]);

			fname	= malloc(namelen + 1);
			if (fname == NULL) {
                		fprintf(stderr,"%s:\n\
    _glio_arena_create(c) could not allocate a memory object of size %lld bytes\n",
				GLOBERRMSG, (long long)(namelen + 1));
				abort();
			}

			/* copy the string */
			_shmem_getmem(fname, vp, namelen, shg.plist[0]);
			fname[namelen]	= '\0';
		}

		_glio_barrier(arp);

		if (myrank == 0) {
			_sma_global_heap_free(fncpy);
		}

		break;

	    case GR_MPI:
		if (myrank == 0) {
			register int	rank;

			namelen	= strlen(fname);

			for (rank = 1; rank < groupsz; rank++) {
                		ckMPIerr( MPI_Send(&namelen, 1, MPI_INT, 
					rank, 1, comm) );
			}

			for (rank = 1; rank < groupsz; rank++) {
                		ckMPIerr( MPI_Send(fname, namelen, MPI_CHAR, 
					rank, 2, comm) );
			}
		} else {
                	ckMPIerr( MPI_Recv(&namelen, 1, MPI_INT, 
					0, 1, comm, &mpistatus) );

			fname	= malloc(namelen + 1);
			if (fname == NULL) {
                		fprintf(stderr,"%s:\n\
    _glio_arena_create(d) could not allocate a memory object of size %lld bytes\n",
				GLOBERRMSG, (long long)(namelen + 1));
				abort();
			}

                	ckMPIerr( MPI_Recv(fname, namelen, MPI_CHAR, 
					0, 2, comm, &mpistatus) );

			fname[namelen]	= '\0';
		}

		break;

	    default:
		assert(0);
	}


	_glio_barrier(arp);

	/* 
	 * Non-rank-0 processes now open the file.
	 */
	if (myrank != 0) {
		fd	= open(fname, O_RDWR, 0700);
		if (fd == -1) {
			fprintf(stderr, "%s:\n\
Global I/O failed to open mapped file.  Errno is %d\n",
				GLOBERRMSG, errno);
			abort();
		}
	}

	_glio_barrier(arp);

past_file_name_send:
	/*
 	 * All processes have opened the file, so rank 0 may now unlink it.
	 */
	if (myrank == 0) {
		unlink(fname);
	}

	_glio_barrier(arp);

	/*
	 * After the barrier process 0 may initialize the mapped
	 * file and unlink it because we know that all processes in the
	 * group have now opened this file.
	 */
	arena_size	= groupsz * CEILING(asize, 1024);

	if (myrank == 0) {
		ssize_t	wret;

		wret	= pwrite(fd, " ", 1, arena_size - 1);
		assert(wret != -1L);
	}

	_glio_barrier(arp);

	/*
	 * A barrier assures us that the file has been initialized
	 * to the right size.   Now map the file into every process'
	 * address space.
	 */

	aret	= mmap64(NULL, arena_size, PROT_READ | PROT_WRITE, MAP_SHARED,
                        fd, 0);
	if (aret == MAP_FAILED) {
                fprintf(stderr,"%s:\n\
    Cannot map internal file %s\n\
    for shared memory arena.  Error = %d\n",
			GLOBERRMSG, fname, errno);
		abort();
	}
예제 #2
0
파일: shmem_2dheat.c 프로젝트: coti/oshmpi
int
main (int argc, char **argv)
{
  /* arrays used to contain each PE's rows - specify cols, no need to spec rows */
  float **U_Curr;
  float **U_Next;
  /* helper variables */
  /* available iterator  */
  int i, j, k, m, n;
  int per_proc, remainder, my_start_row, my_end_row, my_num_rows;
  int verbose = 0;
  int show_time = 0;
  double time;
  double t, tv[2];

  /*OpenSHMEM initilization*/
  start_pes (0);
  p = _num_pes ();
  my_rank = _my_pe ();

  if (p > 8) {
      fprintf(stderr, "Ignoring test when run with more than 8 pes\n");
      return 77;
  }

  /* argument processing done by everyone */
  int c, errflg;
  extern char *optarg;
  extern int optind, optopt;

  while ((c = getopt (argc, argv, "e:h:m:tw:v")) != -1)
    {
      switch (c)
	{
	case 'e':
	  EPSILON = atof (optarg);
	  break;
	case 'h':
	  HEIGHT = atoi (optarg);
	  break;
	case 'm':
	  /* selects the numerical methods */
	  switch (atoi (optarg))
	    {
	    case 1:		/* jacobi */
	      meth = 1;
	      break;
	    case 2:		/* gauss-seidel */
	      meth = 2;
	      break;
	    case 3:		/* sor */
	      meth = 3;
	      break;
	    }
	  break;
	case 't':
	  show_time++;		/* overridden by -v (verbose) */
	  break;
	case 'w':
	  WIDTH = atoi (optarg);
	  break;
	case 'v':
	  verbose++;
	  break;
	  /* handle bad arguments */
	case ':':		/* -h or -w without operand */
	  if (ROOT == my_rank)
	    fprintf (stderr, "Option -%c requires an operand\n", optopt);
	  errflg++;
	  break;
	case '?':
	  if (ROOT == my_rank)
	    fprintf (stderr, "Unrecognized option: -%c\n", optopt);
	  errflg++;
	  break;
	}
    }

  if (ROOT == my_rank && argc < 2)
    {
      printf ("Using defaults: -h 20 -w 20 -m 2\n");
    }

//  if (0 < errflg) 
//      exit(EXIT_FAILURE);


  /* wait for user to input runtime params */
 
  for (i = 0; i < _SHMEM_REDUCE_SYNC_SIZE; i += 1)
    pSync[i] = _SHMEM_SYNC_VALUE;

  shmem_barrier_all ();


  /* broadcast method to use  */
  
  shmem_broadcast32 (&meth, &meth, 1, 0, 0, 0, p, pSync);
  switch (meth)
    {
    case 1:
      method = &jacobi;
      break;
    case 2:
      method = &gauss_seidel;
      break;
    case 3:
      method = &sor;
      break;
    }

  /* let each processor decide what rows(s) it owns */
  my_start_row = get_start (my_rank);
  my_end_row = get_end (my_rank);
  my_num_rows = get_num_rows (my_rank);

  if (0 < verbose)
    printf ("proc %d contains (%d) rows %d to %d\n", my_rank, my_num_rows,
	    my_start_row, my_end_row);
  fflush (stdout);

  /* allocate 2d array */
  U_Curr = (float **) malloc (sizeof (float *) * my_num_rows);
  U_Curr[0] =
    (float *) malloc (sizeof (float) * my_num_rows * (int) floor (WIDTH / H));
  for (i = 1; i < my_num_rows; i++)
    {
      U_Curr[i] = U_Curr[i - 1] + (int) floor (WIDTH / H);
    }

  /* allocate 2d array */
  U_Next = (float **) malloc (sizeof (float *) * my_num_rows);
  U_Next[0] =
    (float *) malloc (sizeof (float) * my_num_rows * (int) floor (WIDTH / H));
  for (i = 1; i < my_num_rows; i++)
    {
      U_Next[i] = U_Next[i - 1] + (int) floor (WIDTH / H);
    }

  /* initialize global grid */
  init_domain (U_Curr, my_rank);
  init_domain (U_Next, my_rank);

  /* iterate for solution */
  if (my_rank == ROOT)
    {
     
      tv[0] = gettime ();
    }
  k = 1;
  while (1)
    {
      method (U_Curr, U_Next);

      local_convergence_sqd = get_convergence_sqd (U_Curr, U_Next, my_rank);
     
      shmem_barrier_all ();
      shmem_float_sum_to_all (&convergence_sqd, &local_convergence_sqd, 1, 0,
			      0, p, pWrk, pSync);
      if (my_rank == ROOT)
	{
	  convergence = sqrt (convergence_sqd);
	  if (verbose == 1)
	    {
	      printf ("L2 = %f\n", convergence);
	    }
	}

      /* broadcast method to use */
     
      shmem_barrier_all ();
      shmem_broadcast32 (&convergence, &convergence, 1, 0, 0, 0, p, pSync);
      if (convergence <= EPSILON)
	{
	  break;
	}

      /* copy U_Next to U_Curr */
      for (j = my_start_row; j <= my_end_row; j++)
	{
	  for (i = 0; i < (int) floor (WIDTH / H); i++)
	    {
	      U_Curr[j - my_start_row][i] = U_Next[j - my_start_row][i];
	    }
	}
      k++;
      //MPI_Barrier(MPI_COMM_WORLD);    
      shmem_barrier_all ();
    }


  /* say something at the end */
  if (my_rank == ROOT)
    {
      //time = MPI_Wtime() - time;
      tv[1] = gettime ();
      t = dt (&tv[1], &tv[0]);
      printf
	("Estimated time to convergence in %d iterations using %d processors on a %dx%d grid is %f seconds\n",
	 k, p, (int) floor (WIDTH / H), (int) floor (HEIGHT / H),
	 t / 1000000.0);
    }

  //MPI_Finalize();
  exit (EXIT_SUCCESS);
  return 0;
}
예제 #3
0
int
main ()
{
  int quantum = -1, checktick ();
  int BytesPerWord;
  int k;
  ssize_t j, i;
  STREAM_TYPE scalar;


  /* --- SETUP --- determine precision and check timing --- */

  printf (HLINE);
  printf ("STREAM version $Revision: 5.10 $\n");
  printf (HLINE);
  BytesPerWord = sizeof (STREAM_TYPE);
  printf ("This system uses %d bytes per array element.\n", BytesPerWord);
  /* SHMEM initialize */
  start_pes (0);
  _world_size = _num_pes ();
  _world_rank = _my_pe ();

  STREAM_TYPE *a =
    (STREAM_TYPE *) shmalloc ((STREAM_ARRAY_SIZE + OFFSET) *
			      sizeof (STREAM_TYPE));
  STREAM_TYPE *b =
    (STREAM_TYPE *) shmalloc ((STREAM_ARRAY_SIZE + OFFSET) *
			      sizeof (STREAM_TYPE));
  STREAM_TYPE *c =
    (STREAM_TYPE *) shmalloc ((STREAM_ARRAY_SIZE + OFFSET) *
			      sizeof (STREAM_TYPE));

  /* wait for user to input runtime params */
  for (int j = 0; j < _SHMEM_BARRIER_SYNC_SIZE; j++)
    {
      pSync0[j] = pSync1[j] = pSync2[j] = _SHMEM_SYNC_VALUE;
    }

  int size = _world_size;
  if (!(size == 0) && !(size & (size - 1)))
    ;
  else
    {
      printf ("Program only works for a PE size of power-of-2\n");
      exit (-1);
    }

  if (_world_rank == 0)
    {
      printf (HLINE);
#ifdef N
      printf ("*****  WARNING: ******\n");
      printf
	("      It appears that you set the preprocessor variable N when compiling this code.\n");
      printf
	("      This version of the code uses the preprocesor variable STREAM_ARRAY_SIZE to control the array size\n");
      printf ("      Reverting to default value of STREAM_ARRAY_SIZE=%llu\n",
	      (unsigned long long) STREAM_ARRAY_SIZE);
      printf ("*****  WARNING: ******\n");
#endif

      printf ("Array size = %llu (elements), Offset = %d (elements)\n",
	      (unsigned long long) STREAM_ARRAY_SIZE, OFFSET);
      printf ("Memory per array = %.1f MiB (= %.1f GiB).\n",
	      BytesPerWord * ((double) STREAM_ARRAY_SIZE / 1024.0 / 1024.0),
	      BytesPerWord * ((double) STREAM_ARRAY_SIZE / 1024.0 / 1024.0 /
			      1024.0));
      printf ("Total memory required = %.1f MiB (= %.1f GiB).\n",
	      (3.0 * BytesPerWord) * ((double) STREAM_ARRAY_SIZE / 1024.0 /
				      1024.),
	      (3.0 * BytesPerWord) * ((double) STREAM_ARRAY_SIZE / 1024.0 /
				      1024. / 1024.));
      printf ("Each kernel will be executed %d times.\n", NTIMES);
      printf
	(" The *best* time for each kernel (excluding the first iteration)\n");
      printf (" will be used to compute the reported bandwidth.\n");
      printf ("Number of SHMEM PEs requested = %i\n", _world_size);
    }

  int blocksize = 10000;
  assert (STREAM_ARRAY_SIZE % blocksize == 0);

  // do something really minor
  /* Get initial value for system clock. */
  for (j = 0; j < STREAM_ARRAY_SIZE; j++)
    {
      a[j] = 1.0;
      b[j] = 2.0;
      c[j] = 0.0;
    }

  printf (HLINE);

  if (_world_rank == 0)
    {
      if ((quantum = checktick ()) >= 1)
	printf ("Your clock granularity/precision appears to be "
		"%d microseconds.\n", quantum);
      else
	{
	  printf ("Your clock granularity appears to be "
		  "less than one microsecond.\n");
	  quantum = 1;
	}
    }

  shmem_barrier_all ();
  // assign fixed iterations per PE

  // since we know default STREAM array size
  // we are hardcoding this, but if the value
  // changes, then this blocking factor must
  // also change
  // basically, each PE works on this block
  // size at a time

  time_start = mysecond ();
  /* Initialize */
  next_p = shmem_int_fadd (&gcounter, 1, ROOT);
  for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize)
    {
      if (next_p == count_p)
	{
	  for (i = j; i < (j + blocksize); i++)
	    {
	      a[i] = 2.0E0 * a[i];
	    }
	  next_p = shmem_int_fadd (&gcounter, 1, ROOT);
	}
      count_p++;
    }
  time_end = mysecond ();
  clock_time_PE = time_end - time_start;
  shmem_double_sum_to_all (&total_clock_time, &clock_time_PE, 1,
			   0, 0, _world_size, pWrk0, pSync0);

  if (_world_rank == 0)
    {
      printf ("Each test below will take on the order"
	      " of %d microseconds.\n", (int) (total_clock_time * 1.0E6));
      printf ("   (= %d clock ticks)\n",
	      (int) ((1.0E6 * total_clock_time) / quantum));
      printf ("Increase the size of the arrays if this shows that\n");
      printf ("you are not getting at least 20 clock ticks per test.\n");

      printf (HLINE);

      printf ("WARNING -- The above is only a rough guideline.\n");
      printf ("For best results, please be sure you know the\n");
      printf ("precision of your system timer.\n");
      printf (HLINE);
    }
  /*      --- MAIN LOOP --- repeat test cases NTIMES times --- */

  // reduction required, as each PE only fills a,b,c partially
  scalar = 3.0;

  for (k = 0; k < NTIMES; k++)
    {
      for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize)
	{
	  if (next_p == count_p)
	    {
	      for (i = j; i < (j + blocksize); i++)
		{
		  a[i] = 1.0;
		  b[i] = 2.0;
		  c[i] = 0.0;
		  a[i] = 2.0E0 * a[i];

		}
	      next_p = shmem_int_fadd (&gcounter, 1, ROOT);
	    }
	  count_p++;
	  //shmem_double_max_to_all (a + j, a + j, blocksize, 0,
	  //                       0, _world_size, pWrk1, pSync1);
	  shmem_barrier_all ();
	  flat_tree (a + j, a + j, blocksize);
	}
      shmem_barrier_all ();

      time_start = mysecond ();
      for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize)
	{
	  if (next_p == count_p)
	    {
	      for (i = j; i < (j + blocksize); i++)
		{
		  c[i] = a[i];
		}
	      next_p = shmem_int_fadd (&gcounter, 1, ROOT);
	    }
	  count_p++;
	  //shmem_double_max_to_all (c + j, c + j, blocksize, 0,
	  //                       0, _world_size, pWrk1, pSync1);
	  shmem_barrier_all ();
	  flat_tree (c + j, c + j, blocksize);
	}
      shmem_barrier_all ();
      time_end = mysecond () - time_start;
      shmem_double_max_to_all (&times[0][k], &time_end, 1,
			       0, 0, _world_size, pWrk0, pSync0);

      time_start = mysecond ();
      for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize)
	{
	  if (next_p == count_p)
	    {
	      for (i = j; i < (j + blocksize); i++)
		{
		  b[i] = scalar * c[i];
		}
	      next_p = shmem_int_fadd (&gcounter, 1, ROOT);
	    }
	  count_p++;
	  //shmem_double_max_to_all (b + j, b + j, blocksize, 0,
	  //                       0, _world_size, pWrk1, pSync1);
	  shmem_barrier_all ();
	  flat_tree (b + j, b + j, blocksize);
	}
      shmem_barrier_all ();
      time_end = mysecond () - time_start;
      shmem_double_sum_to_all (&times[1][k], &time_end, 1,
			       0, 0, _world_size, pWrk0, pSync0);

      time_start = mysecond ();
      for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize)
	{
	  if (next_p == count_p)
	    {
	      for (i = j; i < (j + blocksize); i++)
		{
		  c[i] = a[i] + b[i];
		}
	      next_p = shmem_int_fadd (&gcounter, 1, ROOT);
	    }
	  count_p++;
	  //shmem_double_max_to_all (c + j, c + j, blocksize, 0,
	  //                        0, _world_size, pWrk1, pSync1);
	  shmem_barrier_all ();
	  flat_tree (c + j, c + j, blocksize);
	}
      shmem_barrier_all ();
      time_end = mysecond () - time_start;
      shmem_double_sum_to_all (&times[2][k], &time_end, 1,
			       0, 0, _world_size, pWrk0, pSync0);

      time_start = mysecond ();
      for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize)
	{
	  if (next_p == count_p)
	    {
	      for (i = j; i < (j + blocksize); i++)
		{
		  a[i] = b[i] + scalar * c[i];
		}
	      next_p = shmem_int_fadd (&gcounter, 1, ROOT);
	    }
	  count_p++;
	  //shmem_double_max_to_all (a + j, a + j, blocksize, 0,
	  //                      0, _world_size, pWrk1, pSync1);
	  shmem_barrier_all ();
	  flat_tree (a + j, a + j, blocksize);
	}
      shmem_barrier_all ();
      time_end = mysecond () - time_start;
      shmem_double_sum_to_all (&times[3][k], &time_end, 1,
			       0, 0, _world_size, pWrk0, pSync0);
    }

  shmem_barrier_all ();

  /*      --- SUMMARY --- */

  for (k = 1; k < NTIMES; k++)	/* note -- skip first iteration */
    {
      for (j = 0; j < 4; j++)
	{
	  avgtime[j] = avgtime[j] + times[j][k];
	  mintime[j] = MIN (mintime[j], times[j][k]);
	  maxtime[j] = MAX (maxtime[j], times[j][k]);
	}
    }

  if (_world_rank == 0)
    {
      printf
	("Function    Best Rate MB/s  Avg time     Min time     Max time\n");
      for (j = 0; j < 4; j++)
	{
	  avgtime[j] = avgtime[j] / (double) (NTIMES - 1);

	  printf ("%s%12.1f  %11.6f  %11.6f  %11.6f\n", label[j],
		  1.0E-06 * bytes[j] / mintime[j],
		  avgtime[j], mintime[j], maxtime[j]);
	}
      printf (HLINE);
    }
  /* --- Check Results --- */
  if (_world_rank == 0)
    {
      checkSTREAMresults (a, b, c);
      printf (HLINE);
    }

  shfree (a);
  shfree (b);
  shfree (c);

  return 0;
}
예제 #4
0
static int test_item9(void)
{
    int rc = TC_PASS;
    static TYPE_VALUE target_addr[MAX_BUFFER_SIZE * 2];
    static TYPE_VALUE source_addr[MAX_BUFFER_SIZE * 2];
    TYPE_VALUE source_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    long* pSyncMult = NULL;
    TYPE_VALUE* pWrkMult = NULL;
    int pSyncNum = 2;
    int pWrkNum = 2;

    num_proc = _num_pes();
    my_proc = _my_pe();

    pSyncMult = shmalloc(sizeof(*pSyncMult) * pSyncNum * _SHMEM_REDUCE_SYNC_SIZE);
    if (pSyncMult)
    {
        TYPE_VALUE value = DEFAULT_VALUE;
        int i = 0;
        int j = 0;
        long cur_buf_size = 0;

        for ( j = 0; j < pSyncNum * _SHMEM_REDUCE_SYNC_SIZE; j++ )
        {
            pSyncMult[j] = _SHMEM_SYNC_VALUE;
        }

        /* Give some time to all PE for setting their values */
        shmem_barrier_all();

        pWrkMult = shmalloc(sizeof(*pWrkMult) * pWrkNum * sys_max(MAX_BUFFER_SIZE, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
        if (pWrkMult)
        {
            value = DEFAULT_VALUE;
            source_value = (TYPE_VALUE)(my_proc + 1);
            fill_buffer((void *)source_addr, MAX_BUFFER_SIZE * 2, (void *)&source_value, sizeof(source_value));
            fill_buffer((void *)target_addr, MAX_BUFFER_SIZE * 2, (void *)&value, sizeof(value));
            shmem_barrier_all();
            for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
            {
                cur_buf_size = sys_max(1, (i + 1) * MAX_BUFFER_SIZE / __cycle_count);
                /* Set initial target value */
                value = DEFAULT_VALUE;

                /* Set my value */
                source_value = (TYPE_VALUE)(my_proc + 1);

                /* Define expected value */
                expect_value = 0;
                if (my_proc % 2)    expect_value = DEFAULT_VALUE;
                else
                {
                    int k = num_proc;
                    while (k)
                    {
                        if (k % 2)  expect_value |= k;
                        k--;
                    }
                }

                int in_active_set = check_within_active_set(0, 1, ((num_proc / 2) + (num_proc % 2)), my_proc, num_proc);

                if ( in_active_set ) {
                    /* Put value to peer */
                    FUNC_VALUE(target_addr + (i % 2) * MAX_BUFFER_SIZE, source_addr + (i % 2) * MAX_BUFFER_SIZE, cur_buf_size, 0, 1, ((num_proc / 2) + (num_proc % 2)), pWrkMult + (i % pWrkNum) * sys_max(MAX_BUFFER_SIZE, _SHMEM_REDUCE_MIN_WRKDATA_SIZE),  pSyncMult + (i % pSyncNum) * _SHMEM_REDUCE_SYNC_SIZE);
                    rc = (!compare_buffer_with_const(target_addr + (i % 2) * MAX_BUFFER_SIZE, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);

                    log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld buffer size = %lld\n",
                                       my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value, (INT64_TYPE)cur_buf_size);

                    if (rc)
                    {
                        TYPE_VALUE* check_addr = target_addr + (i % 2) * MAX_BUFFER_SIZE;
                        int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value));
                        int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                        int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);

                        log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                        log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                        show_buffer(check_addr + show_index, show_size);
                    }
                    fill_buffer((void *)(source_addr + (i % 2) * MAX_BUFFER_SIZE), cur_buf_size, (void *)&source_value, sizeof(source_value));
                    fill_buffer((void *)(target_addr + (i % 2) * MAX_BUFFER_SIZE ), cur_buf_size, (void *)&value, sizeof(value));
                }
            }
            shfree(pWrkMult);
        } else {
            rc = TC_SETUP_FAIL;
        }
        shfree(pSyncMult);
    } else {
        rc = TC_SETUP_FAIL;
    }

    return rc;
}
static int test_item1(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE* local_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE* expect_value = NULL;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;
	int tst, sst;
    int max_stride = MAX_ARRAY_SIZE/2-1;
    int *wait_variable = NULL;
    wait_variable = shmalloc(sizeof(int));
    num_proc = _num_pes();
    my_proc = _my_pe();
    shmem_addr = shmalloc(sizeof(*shmem_addr)*MAX_ARRAY_SIZE);
    local_addr = malloc(sizeof(*local_addr)*MAX_ARRAY_SIZE);
    expect_value = malloc(sizeof(*expect_value)*MAX_ARRAY_SIZE);
    if (shmem_addr)
    {
        INT64_TYPE i = 0;
        INT64_TYPE j = 0;
        int num_to_get;
        my_value = 0;
        size_t odd_pos = 0;
        for (i = 0; (i < COUNT_VALUE) && (rc == TC_PASS); i++)
        {
            tst = (i < max_stride) ? i+1 : max_stride;
            sst = tst;
            num_to_get = MAX_ARRAY_SIZE/tst;
            /* Set my value */
            my_value = (TYPE_VALUE)(my_proc + 1);
            memset(shmem_addr,0,MAX_ARRAY_SIZE*SIZE_VALUE);
            memset(expect_value,0,MAX_ARRAY_SIZE*SIZE_VALUE);
            for (j = 0; j < MAX_ARRAY_SIZE; j++)
                local_addr[j] = my_value;


            /* Define peer and it value */
            peer_proc = (my_proc + 1) % num_proc;
            peer_value = (TYPE_VALUE)((my_proc == 0) ? num_proc : my_proc);


            /* Define expected value */
            for (j=0; j<num_to_get; j++)
                expect_value[j*tst] = peer_value;
            /* Wait is set instead of barrier to give some time to all PE for setting their values */
            shmem_barrier_all();

            /* Get value from peer */
            FUNC_VALUE(shmem_addr,local_addr,tst,sst,num_to_get,peer_proc);
            wait_for_completion(wait_variable,peer_proc,&rc);

            if (rc == TC_PASS)
            {
                rc = (compare_longdouble_buffers(shmem_addr, expect_value, MAX_ARRAY_SIZE, &odd_pos) ? TC_PASS : TC_FAIL);
            }
            log_debug(OSH_TC, "my(#%d:%lld) peer(#%d:%lld) expected = %lld vs got = %lld, odd = %i\n",
                               my_proc, (INT64_TYPE)my_value, peer_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value[odd_pos], (INT64_TYPE)shmem_addr[odd_pos],odd_pos);

            /* Wait is set instead of barrier to give some time to all PE for setting their values */
            shmem_barrier_all();
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (local_addr)
    {
        free(local_addr);
    }
    if (expect_value)
    {
        free(expect_value);
    }
    if (shmem_addr)
    {
        shfree(shmem_addr);
    }
    if (wait_variable)
    {
        shfree(wait_variable);
    }
    return rc;
}
예제 #6
0
static int test_item3(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    shmem_addr = shmalloc(sizeof(*shmem_addr));
    if (shmem_addr)
    {
        TYPE_VALUE value = -1;
        INT64_TYPE i = 0;

        /* Set my value */
        my_value = (-1);
        *shmem_addr = my_value;
        for (i = 0; i < COUNT_VALUE; i++)
        {
            /* Define peer and it value */
            peer_proc = (my_proc + 1) % num_proc;
            peer_value = (peer_proc % 2 ? 1 : -1) * (i * STEP_VALUE);

            /* Define expected value */
            expect_value = (my_proc % 2 ? 1 : -1) * (i * STEP_VALUE);

            /* This guarantees that PE set initial value before peer change one */
            shmem_barrier_all();

            /* Write value to peer */
            FUNC_VALUE(shmem_addr, peer_value, peer_proc);

            /* Get value put by peer:
             * These routines start the remote transfer and may return before the data
             * is delivered to the remote PE
             */
            wait_for_put_completion(peer_proc,10 /* wait for 10 secs */);
            value = *shmem_addr;

            rc = (sys_fcompare(expect_value, value) ? TC_PASS : TC_FAIL);

            log_debug(OSH_TC, "my(#%d:%Lf) peer(#%d:%Lf) expected = %Lf vs got = %Lf\n",
                               my_proc, (long double)my_value, peer_proc, (long double)peer_value, (long double)expect_value, (long double)value);
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (shmem_addr)
    {
        shfree(shmem_addr);
    }

    return rc;
}
예제 #7
0
static int test_item7(void)
{
    int rc = TC_PASS;
    static TYPE_VALUE shmem_addr[MAX_BUFFER_SIZE * 2];
    static TYPE_VALUE send_addr[MAX_BUFFER_SIZE * 2];
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    int root_proc = 0;
    long* pSyncMult = NULL;
    int pSyncNum = 2;

    num_proc = _num_pes();
    my_proc = _my_pe();

    pSyncMult = shmalloc(sizeof(*pSyncMult) * pSyncNum * _SHMEM_COLLECT_SYNC_SIZE);
    if (!pSyncMult)
    {
        rc = TC_SETUP_FAIL;
    }

    if (rc == TC_PASS)
    {
        int i = 0;
        int j = 0;

        for ( j = 0; j < pSyncNum * _SHMEM_COLLECT_SYNC_SIZE; j++ )
        {
            pSyncMult[j] = _SHMEM_SYNC_VALUE;
        }

        /* Give some time to all PE for setting their values */
        shmem_barrier_all();

        /* Set root */
        root_proc = 0;

        my_value = DEFAULT_VALUE;
        peer_value = MAX_VALUE;
        expect_value = (my_proc == root_proc ? DEFAULT_VALUE : peer_value);

        fill_buffer((void *)send_addr, MAX_BUFFER_SIZE * 2, (void *)&peer_value, sizeof(peer_value));
        fill_buffer((void *)shmem_addr, MAX_BUFFER_SIZE * 2, (void *)&my_value, sizeof(my_value));
        shmem_barrier_all();
        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            /* Put value to peer */
            FUNC_VALUE(shmem_addr + (i % 2) * MAX_BUFFER_SIZE, send_addr + (i % 2) * MAX_BUFFER_SIZE, MAX_BUFFER_SIZE, root_proc, 0, 0, num_proc, pSyncMult + (i % pSyncNum) * _SHMEM_COLLECT_SYNC_SIZE);
            rc = (!compare_buffer_with_const(shmem_addr + (i % 2) * MAX_BUFFER_SIZE, MAX_BUFFER_SIZE, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);

            log_debug(OSH_TC, "my#%d root(#%d:%lld) expected = %lld actual = %lld buffer size = %lld\n",
                               my_proc, root_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value, (INT64_TYPE)(*shmem_addr), (INT64_TYPE)MAX_BUFFER_SIZE);

            if (rc)
            {
                TYPE_VALUE* check_addr = shmem_addr + (i % 2) * MAX_BUFFER_SIZE;
                int odd_index = compare_buffer_with_const(check_addr, MAX_BUFFER_SIZE, &expect_value, sizeof(expect_value));
                int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                int show_size = sizeof(*check_addr) * sys_min(3, MAX_BUFFER_SIZE - show_index);

                log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                show_buffer(check_addr + show_index, show_size);
            }
                fill_buffer((void *)(send_addr + (i % 2) * MAX_BUFFER_SIZE), MAX_BUFFER_SIZE, (void *)&peer_value, sizeof(peer_value));
                fill_buffer((void *)(shmem_addr + (i % 2) * MAX_BUFFER_SIZE ), MAX_BUFFER_SIZE, (void *)&my_value, sizeof(my_value));
        }
    }

    if (pSyncMult)
    {
        shfree(pSyncMult);
    }

    return rc;
}
예제 #8
0
int
main(int argc, char **argv)
{
     int me, npes;
     struct timeval now;
     long t_start, t_end;

     start_pes(0);
     me = _my_pe();
     npes = _num_pes();

     if (npes < 4) {
          if (me==0)
               fprintf(stderr,"ERR: test requires 4 or more PEs\n");
          return 1;
     }
     shmem_barrier_all();

     gettimeofday(&now, NULL);
     t_start = (now.tv_sec * 1000000.0) + now.tv_usec;

     switch (me) {
     case 0:
          while (pe_escape) {
               double pi, pi2, pi3;
               int j;

               for (j=1; j <= 5000; j++) {
                    pi = (22.0 / 7.0) + (double) j;
                    pi2 = pi * (double) j;
                    pi3 = (pi2 * pi) / 1.2;
               }
               mb();
          }
          gettimeofday(&now, NULL);
          t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start;
          break;

     case 1:
          shmem_int_inc(&A, 0);
          gettimeofday(&now, NULL);
          t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start;
          break;

     case 2:
          while (1 != shmem_int_g(&A, 0)) { ; }
          shmem_int_inc(&A, 0);
          gettimeofday(&now, NULL);
          t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start;
          break;

     case 3:
          while (2 != shmem_int_g(&A, 0)) { ; }
          shmem_int_p((int*) &pe_escape, 0, 0);  // release PE0.
          if (npes > 4) {
               int i;

               for(i=4; i < npes; i++)
                    shmem_int_p((int*)&pe_escape, 0, i);  // release PE0.
          }
          gettimeofday(&now, NULL);
          t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start;
          break;

     default:
          /* spin until released, A will never == 99, generate PE-0 traffic */
          while (99 != shmem_int_g(&A, 0) && pe_escape) {
               mb();
          }
          gettimeofday(&now, NULL);
          t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start;
          break;
     }

     if (me < 4)
          fprintf(stderr,"[%d] elapsed usecs %ld A %d\n",me,t_end,A);

     shmem_barrier_all();

     return 0;
}
예제 #9
0
static int test_item4(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE* send_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    int root_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    shmem_addr = shmalloc(sizeof(*shmem_addr));
    send_addr = shmalloc(sizeof(*send_addr));
    if (shmem_addr && send_addr)
    {
        TYPE_VALUE value = DEFAULT_VALUE;
        int j = 0;

        /* Set my value */
        my_value = DEFAULT_VALUE;
        *shmem_addr = my_value;

        /* Define peer and it value */
        peer_value = BASE_VALUE;
        *send_addr = peer_value;

        /* Set root */
        root_proc = 0;

        /* Define expected value */
        expect_value = (((my_proc % 2) == 0) && (my_proc != 0) ? BASE_VALUE : DEFAULT_VALUE);

        /* This guarantees that PE set initial value before peer change one */
        for ( j = 0; j < _SHMEM_COLLECT_SYNC_SIZE; j++ )
        {
            pSync[j] = _SHMEM_SYNC_VALUE;
        }
        shmem_barrier_all();

        /* Put value to peer */
        if ((my_proc % 2) == 0)
        {
            FUNC_VALUE(shmem_addr, send_addr, 1, root_proc, 0, 1, ((num_proc / 2) + (num_proc % 2)), pSync);
        }

        /* Get value put by peer:
         * These routines start the remote transfer and may return before the data
         * is delivered to the remote PE
         */
        shmem_barrier_all();
        {
            int wait = WAIT_COUNT;

            while (wait--)
            {
                value = *shmem_addr;
                if (expect_value == value) break;
                sleep(1);
            }
        }

        rc = (expect_value == value ? TC_PASS : TC_FAIL);

        log_debug(OSH_TC, "my#%d root(#%d:%lld) expected = %lld actual = %lld\n",
                           my_proc, root_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value, (INT64_TYPE)value);
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (send_addr)
    {
        shfree(send_addr);
    }

    if (shmem_addr)
    {
        shfree(shmem_addr);
    }

    return rc;
}
예제 #10
0
static int test_item6(void)
{
    int rc = TC_PASS;
    static TYPE_VALUE shmem_addr[MAX_BUFFER_SIZE];
    static TYPE_VALUE send_addr[MAX_BUFFER_SIZE];
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    int root_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    {
        TYPE_VALUE value = DEFAULT_VALUE;
        int i = 0;
        int j = 0;
        long cur_buf_size = 0;

        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            cur_buf_size = sys_max(1, (i + 1) * MAX_BUFFER_SIZE / __cycle_count);

            /* Set my value */
            my_value = DEFAULT_VALUE;
            fill_buffer((void *)shmem_addr, cur_buf_size, (void *)&my_value, sizeof(my_value));

            /* Give some time to all PE for setting their values */
            shmem_barrier_all();

            /* Define peer and it value */
            peer_value = (i * (MAX_VALUE / __cycle_count));
            fill_buffer((void *)send_addr, cur_buf_size, (void *)&peer_value, sizeof(peer_value));

            /* Set root */
            root_proc = 0;

            /* Define expected value */
            expect_value = (((my_proc % 2) == 0) && (my_proc != root_proc) ? peer_value : DEFAULT_VALUE);

            /* This guarantees that PE set initial value before peer change one */
            for ( j = 0; j < _SHMEM_COLLECT_SYNC_SIZE; j++ )
            {
                pSync[j] = _SHMEM_SYNC_VALUE;
            }
            shmem_barrier_all();

            /* Put value to peer */
            if ((my_proc % 2) == 0)
            {
                FUNC_VALUE(shmem_addr, send_addr, cur_buf_size, root_proc, 0, 1, ((num_proc / 2) + (num_proc % 2)), pSync);
            }

            /* Get value put by peer:
             * These routines start the remote transfer and may return before the data
             * is delivered to the remote PE
             */
            shmem_barrier_all();
            {
                int wait = WAIT_COUNT;

                while (wait--)
                {
                    value = *shmem_addr;
                    if (expect_value == value) break;
                    sleep(1);
                }
            }

            rc = (!compare_buffer_with_const(shmem_addr, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);

            log_debug(OSH_TC, "my#%d root(#%d:%lld) expected = %lld actual = %lld buffer size = %lld\n",
                               my_proc, root_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value, (INT64_TYPE)value, (INT64_TYPE)cur_buf_size);

            if (rc)
            {
                TYPE_VALUE* check_addr = shmem_addr;
                int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value));
                int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - show_index);

                log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                show_buffer(check_addr + show_index, show_size);
            }
        }
    }

    return rc;
}
예제 #11
0
static int test_item5(void)
{
    int rc = TC_PASS;
    static TYPE_VALUE shmem_addr[MAX_BUFFER_SIZE];
    static TYPE_VALUE recv_addr[MAX_BUFFER_SIZE];
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    {
        INT64_TYPE i = 0;
        long cur_buf_size = 0;

        my_value = 0;
        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            /* Set my value */
            my_value = (my_proc % 2 ? 1 : -1) * (i * (MAX_VALUE / __cycle_count));
            cur_buf_size = sys_max(1, (i + 1) * MAX_BUFFER_SIZE / __cycle_count);
            fill_buffer((void *)shmem_addr, cur_buf_size, (void *)&my_value, sizeof(my_value));

            /* Give some time to all PE for setting their values */
            shmem_barrier_all();

            /* Define peer and it value */
            peer_proc = (my_proc + 1) % num_proc;
            peer_value = (peer_proc % 2 ? 1 : -1) * (i * (MAX_VALUE / __cycle_count));

            /* Define expected value */
            expect_value = peer_value;

            /* Get value from peer */
            FUNC_VALUE(recv_addr, shmem_addr, cur_buf_size, peer_proc);

            rc = (!compare_buffer_with_const(recv_addr, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);

            log_debug(OSH_TC, "my(#%d:%lld) peer(#%d:%lld) expected = %lld buffer size = %lld\n",
                               my_proc, (INT64_TYPE)my_value, peer_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value, (INT64_TYPE)cur_buf_size);

            if (rc)
            {
                TYPE_VALUE* check_addr = recv_addr;
                int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value));
                int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);

                log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                show_buffer(check_addr + show_index, show_size);
            }

            shmem_barrier_all();
        }
    }

    return rc;
}
예제 #12
0
static int test_item3(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE* recv_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE expect_value = 0;
    int my_proc = 0;
    int peer_proc = 0;

    my_proc = _my_pe();

    shmem_addr = (TYPE_VALUE*)shmalloc(sizeof(*shmem_addr) * __max_buffer_size);
    recv_addr = (TYPE_VALUE*)sys_malloc(sizeof(*recv_addr) * __max_buffer_size);
    if (shmem_addr && recv_addr)
    {
        INT64_TYPE i = 0;
        long cur_buf_size = 0;

        my_value = 0;
        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            /* Set my value */
            my_value = (my_proc % 2 ? 1 : -1) * (i * (MAX_VALUE / __cycle_count));
            cur_buf_size = sys_max(1, (i + 1) * __max_buffer_size / __cycle_count);
            fill_buffer((void *)shmem_addr, cur_buf_size, (void *)&my_value, sizeof(my_value));

            /* Give some time to all PE for setting their values */
            shmem_barrier_all();

            /* Define peer and it value */
            peer_proc = my_proc;
            peer_value = my_value;

            /* Define expected value */
            expect_value = peer_value;

            /* Get value from peer */
            FUNC_VALUE(recv_addr, shmem_addr, cur_buf_size, peer_proc);

            rc = (!compare_buffer_with_const(recv_addr, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);

            log_debug(OSH_TC, "my(#%d:%Lf) peer(#%d:%Lf) expected = %Lf buffer size = %lld\n",
                               my_proc, (long double)my_value, peer_proc, (long double)peer_value, (long double)expect_value, (INT64_TYPE)cur_buf_size);

            if (rc)
            {
                TYPE_VALUE* check_addr = recv_addr;
                int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value));
                int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);

                log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                show_buffer(check_addr + show_index, show_size);
            }

            shmem_barrier_all();
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (recv_addr)
    {
        sys_free(recv_addr);
    }

    if (shmem_addr)
    {
        shfree(shmem_addr);
    }

    return rc;
}
예제 #13
0
/* Performance test for shmem_XX_get (latency and bandwidth) */

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include <shmem.h>

long double time_taken;

long pSync[_SHMEM_REDUCE_SYNC_SIZE];
long double pWrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];

//#define N_ELEMENTS 25600/*Data size chosen to be able to capture time required*/
  int
main(void)
{
  int i,j,k;
  int *target;
  int *source;
  int me, npes;
  int nxtpe;
  struct timeval start, end;
  long double start_time,end_time;

  int N_ELEMENTS = (4194304*2)/sizeof(int);

  start_pes(0);
  me = _my_pe();
  npes = _num_pes();

  for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1)
  {
    pSync[i] = _SHMEM_SYNC_VALUE;
  }
  nxtpe = (me+1)%npes;
  source = (int *) shmalloc( N_ELEMENTS * sizeof(*source) );
  target = (int *) shmalloc( N_ELEMENTS * sizeof(*target) );

  if(me == 0)
    printf("Get Performance test results:\nSize (Bytes)\t\tTime (Microseconds)\t\tBandwidth (Bytes/Second)\n");

  for (i = 0; i < N_ELEMENTS; i += 1) {
    source[i] = i + 1;
    target[i] = -90;
  }
  shmem_barrier_all();

  /*For int put we take average of all the times realized by a pair of PEs, thus
   * reducing effects of physical location of PEs*/
  for (i=1;i<=N_ELEMENTS;i=i*2)
  {
    time_taken = 0;

    for(j=0;j<10000;j++){
      gettimeofday(&start, NULL);

      start_time = (start.tv_sec * 1000000.0) + start.tv_usec;

      shmem_int_get(target, source, i,nxtpe);

      gettimeofday(&end, NULL);

      end_time = (end.tv_sec * 1000000.0) + end.tv_usec;

      time_taken = time_taken + (end_time - start_time);

    }
    shmem_longdouble_sum_to_all(&time_taken, &time_taken,1, 0, 0, npes, pWrk, pSync);


    if(me == 0){
      time_taken = time_taken/(npes*10000); /*Average time across all PEs for one put*/
      if (i*sizeof(i) < 1048576)
        printf("%ld \t\t\t\t %ld\t\t\t\t %ld\n",i*sizeof(i),time_taken,(i*sizeof(i))/(time_taken*1000000.0));
      else
        printf("%ld \t\t\t %ld\t\t\t\t %ld\n",i*sizeof(i),time_taken,(i*sizeof(i))/(time_taken*1000000.0));

    }

  }
  shmem_barrier_all();

  shfree(target);
  shfree(source);
  return 0;
}
예제 #14
0
static int test_item4(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* target_addr = NULL;
    TYPE_VALUE* source_addr = NULL;
    TYPE_VALUE source_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();


    pWrk = shmalloc(sizeof(*pWrk) * sys_max(1/2 + 1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
    if (pWrk)
    {
        source_addr = shmalloc(sizeof(*source_addr));
        target_addr = source_addr;
    }

    if (target_addr && source_addr)
    {
        TYPE_VALUE value = DEFAULT_VALUE;
        int j = 0;

        /* Set my value */
        source_value = ( my_proc < OVERFLOW_FACTORIAL_LIMIT ? (TYPE_VALUE)(my_proc + 1) : 1);
        *source_addr = source_value;

        /* Define expected value */
        expect_value = 1;
        {
            int k = ( num_proc <= OVERFLOW_FACTORIAL_LIMIT ? num_proc : OVERFLOW_FACTORIAL_LIMIT);
            while (k) expect_value *= k--;
        }

        /* This guarantees that PE set initial value before peer change one */
        for ( j = 0; j < _SHMEM_REDUCE_SYNC_SIZE; j++ )
        {
            pSync[j] = _SHMEM_SYNC_VALUE;
        }
        shmem_barrier_all();

        /* Put value to peer */
        FUNC_VALUE(target_addr, source_addr, 1, 0, 0, num_proc, pWrk, pSync);

        /* Get value put by peer:
         * These routines start the remote transfer and may return before the data
         * is delivered to the remote PE
         */
        shmem_barrier_all();
        {
            int total_wait = 0;
            while (*target_addr == DEFAULT_VALUE && total_wait < 1000 * WAIT_COUNT)
            {
                total_wait++;
                usleep(1);
            }
            value = *target_addr;
        }

        rc = (expect_value == value ? TC_PASS : TC_FAIL);

        log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld\n",
                           my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value);
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (source_addr)
    {
        shfree(source_addr);
    }

    if (pWrk)
    {
        shfree(pWrk);
        pWrk = NULL;
    }

    return rc;
}
예제 #15
0
int
main (int argc, char *argv[])
{
  int myid, numprocs, i;
  double h, sum, x;
  struct timeval startwtime, endwtime;

  start_pes (0);
  numprocs = _num_pes ();
  myid = _my_pe ();

  if (myid == 0)
    {
      if (argc > 1)
	n = atoi (argv[1]);	/* # rectangles on command line */
      else
	n = 10000;		/* default # of rectangles */

      gettimeofday (&startwtime, NULL);
    }

  /* initialize sync array */
  for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1)
    pSync[i] = _SHMEM_SYNC_VALUE;
  shmem_barrier_all ();

  /* send "n" out to everyone */
  shmem_broadcast32 (&n, &n, 1, 0, 0, 0, numprocs, pSync);

  /* do partial computation */
  h = 1.0 / (double) n;
  sum = 0.0;
  /* A slightly better approach starts from large i and works back */
  for (i = myid + 1; i <= n; i += numprocs)
    {
      x = h * ((double) i - 0.5);
      sum += f (x);
    }
  mypi = h * sum;

  /* wait for everyone to finish */
  shmem_barrier_all ();

  /* add up partial pi computations into PI */
  shmem_double_sum_to_all (&pi, &mypi, 1, 0, 0, numprocs, pWrk, pSync);

  /* "master" PE summarizes */
  if (myid == 0)
    {
      double elapsed;
      gettimeofday (&endwtime, NULL);
      elapsed = (endwtime.tv_sec - startwtime.tv_sec) * 1000.0;	/* sec to ms */
      elapsed += (endwtime.tv_usec - startwtime.tv_usec) / 1000.0;	/* us to ms */
      printf ("pi is approximately %.16f, Error is %.16f\n",
	      pi, fabs (pi - PI25DT));
      printf ("run time = %f ms\n", elapsed);
      fflush (stdout);
    }

  return 0;
}
예제 #16
0
static int test_item7(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* target_addr = NULL;
    TYPE_VALUE* source_addr = NULL;
    TYPE_VALUE source_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    target_addr = (TYPE_VALUE*)shmalloc(sizeof(*target_addr) * __max_buffer_size);
    source_addr = (TYPE_VALUE*)shmalloc(sizeof(*source_addr) * __max_buffer_size);
    if (target_addr && source_addr)
    {
        TYPE_VALUE value = DEFAULT_VALUE;
        int i = 0;
        int j = 0;
        long cur_buf_size = 0;

        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            cur_buf_size = sys_max(1, (i + 1) * __max_buffer_size / __cycle_count);
            pWrk = shmalloc(sizeof(*pWrk) * sys_max(cur_buf_size/2 + 1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
            if (pWrk)
            {
                /* Set initial target value */
                value = DEFAULT_VALUE;
                fill_buffer((void *)target_addr, cur_buf_size, (void *)&value, sizeof(value));

                /* Give some time to all PE for setting their values */
                shmem_barrier_all();

                /* Set my value */
                source_value = ( my_proc < OVERFLOW_FACTORIAL_LIMIT ? (TYPE_VALUE)(my_proc + 1) : 1);
                fill_buffer((void *)source_addr, cur_buf_size, (void *)&source_value, sizeof(source_value));

                /* Define expected value */
                expect_value = 1;
                if (my_proc % 2)    expect_value = DEFAULT_VALUE;
                else
                {
                    int k = ( num_proc <= OVERFLOW_FACTORIAL_LIMIT ? num_proc : OVERFLOW_FACTORIAL_LIMIT);
                    while (k)
                    {
                        if (k % 2)  expect_value *= k;
                        k--;
                    }
                }

                /* This guarantees that PE set initial value before peer change one */
                for ( j = 0; j < _SHMEM_REDUCE_SYNC_SIZE; j++ )
                {
                    pSync[j] = _SHMEM_SYNC_VALUE;
                }
                shmem_barrier_all();

                /* Put value to peer */
                FUNC_VALUE(target_addr, source_addr, cur_buf_size, 0, 1, ((num_proc / 2) + (num_proc % 2)), pWrk, pSync);

                /* Get value put by peer:
                 * These routines start the remote transfer and may return before the data
                 * is delivered to the remote PE
                 */
                shmem_barrier_all();
                {
                    int wait = WAIT_COUNT;

                    while (wait--)
                    {
                        value = *target_addr;
                        if (expect_value == value) break;
                        sleep(1);
                    }
                }

                rc = (!compare_buffer_with_const(target_addr, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);

                log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld buffer size = %lld\n",
                                   my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value, (INT64_TYPE)cur_buf_size);

                if (rc)
                {
                    TYPE_VALUE* check_addr = target_addr;
                    int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value));
                    int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                    int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);

                    log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                    log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                    show_buffer(check_addr + show_index, show_size);
                }

                shfree(pWrk);
            } else {
                rc = TC_SETUP_FAIL;
            }
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (source_addr)
    {
        shfree(source_addr);
    }

    if (target_addr)
    {
        shfree(target_addr);
    }

    return rc;
}
예제 #17
0
static int test_item3(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    shmem_addr = shmalloc(sizeof(*shmem_addr));
    if (shmem_addr)
    {
        TYPE_VALUE value = -1;
        INT64_TYPE i = 0;

        my_value = 0;
        for (i = 0; i < COUNT_VALUE; i++)
        {
            /* Set my value */
            my_value = (my_proc % 2 ? 1 : -1) * (i * STEP_VALUE);
            *shmem_addr = my_value;

            /* Wait is set instead of barrier to give some time to all PE for setting their values */
            shmem_barrier_all();

            /* Define peer and it value */
            peer_proc = (my_proc + 1) % num_proc;
            peer_value = (peer_proc % 2 ? 1 : -1) * (i * STEP_VALUE);

            /* Define expected value */
            expect_value = peer_value;

            /* Get value from peer */
            value = FUNC_VALUE(shmem_addr, peer_proc);

            rc = (expect_value == value ? TC_PASS : TC_FAIL);

            log_debug(OSH_TC, "my(#%d:%lld) peer(#%d:%lld) expected = %lld vs got = %lld\n",
                               my_proc, (INT64_TYPE)my_value, peer_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value, (INT64_TYPE)value);

            /* Wait is set instead of barrier to give some time to all PE for setting their values */
            shmem_barrier_all();
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (shmem_addr)
    {
        shfree(shmem_addr);
    }

    return rc;
}