Esempio n. 1
0
int
main (int argc, char *argv[])
{
    struct pe_vars v;
    char * msg_buffer, * aligned_buffer;
    long alignment;
    int use_heap;

    /*
     * Initialize
     */
    v = init_openshmem();
    check_usage(v.me, v.npes, argc, argv);
    print_header(v.me);

    /*
     * Allocate Memory
     */
    use_heap = !strncmp(argv[1], "heap", 10);
    alignment = use_heap ? sysconf(_SC_PAGESIZE) : 4096;
    msg_buffer = allocate_memory(v.me, alignment, use_heap);
    aligned_buffer = align_memory((unsigned long)msg_buffer, alignment);
    memset(aligned_buffer, 0, MAX_MSG_SZ * ITERS_LARGE);

    /*
     * Time Put Message Rate
     */
    benchmark(v, aligned_buffer);

    /*
     * Finalize
     */
    if (use_heap) {
        shfree(msg_buffer);
    }
    
    return EXIT_SUCCESS;
}
Esempio n. 2
0
/****************************************************************************
 * Test Case processing procedure
 ***************************************************************************/
int osh_lock_tc2(const TE_NODE *node, int argc, const char *argv[])
{
    //This is a stress test which makes sure the distributed locking is not hanging
    long *test_variable = shmalloc(sizeof(long));
    int number_of_iterations = 10;
    int i = 0;

    UNREFERENCED_PARAMETER(node);
    UNREFERENCED_PARAMETER(argc);
    UNREFERENCED_PARAMETER(argv);

    *test_variable = 0;
    shmem_barrier_all();
    for (i = 0; i < number_of_iterations; i++)
    {
        shmem_set_lock(test_variable);
        shmem_clear_lock(test_variable);
    }
    shmem_barrier_all();

    shfree(test_variable);

    return TC_PASS;
}
Esempio n. 3
0
File: sping.c Progetto: coti/oshmpi
int main (int argc, char *argv[])
{
	double t,tv[2];
	int reps = DFLT_REPS;
	int doprint = 1/*0*/;
	char *progName;
	int minWords;
	int maxWords;
	int incWords, nwords, nproc, proc, peer, c, r, i;
	long *rbuf;	/* remote buffer - sink */
	long *tbuf;	/* transmit buffer - src */

	start_pes(0);
	proc = _my_pe();
	nproc = _num_pes();
	if (nproc == 1) {
		fprintf(stderr, "ERR - Requires > 1 Processing Elements\n");
		return 1;
	}

	for (progName = argv[0] + strlen(argv[0]);
		 progName > argv[0] && *(progName - 1) != '/';
		 progName--)
		;

	while ((c = getopt (argc, argv, "n:evh")) != -1)
		switch (c)
		{
		case 'n':
			if ((reps = getSize (optarg)) <= 0)
				usage (progName);
			break;
		case 'e':
			doprint++;
			break;
		case 'v':
			Verbose++;
			break;
		case 'h':
			help (progName);
		default:
			usage (progName);
		}

	if (optind == argc)
		minWords = DFLT_MIN_WORDS;
	else if ((minWords = getSize (argv[optind++])) <= 0)
		usage (progName);

	if (optind == argc)
		maxWords = minWords;
	else if ((maxWords = getSize (argv[optind++])) < minWords)
		usage (progName);

	if (optind == argc)
		incWords = 0;
	else if ((incWords = getSize (argv[optind++])) < 0)
		usage (progName);

	if (!(rbuf = (long *)shmalloc(maxWords * sizeof(long))))
	{
		perror ("Failed memory allocation");
		exit (1);
	}
	memset (rbuf, 0, maxWords * sizeof (long));

	if (!(tbuf = (long *)shmalloc(maxWords * sizeof(long))))
	{
		perror ("Failed memory allocation");
		exit (1);
	}

	for (i = 0; i < maxWords; i++)
		tbuf[i] = 1000 + (i & 255);

	if (doprint)
		printf ("%d(%d): Shmem PING reps %d minWords %d maxWords %d "
				"incWords %d\n",
				proc, nproc, reps, minWords, maxWords, incWords);

        dprint("[%d] rbuf: %ld\n", proc, (unsigned long) rbuf);

	shmem_barrier_all();

	peer = proc ^ 1;
	if (peer >= nproc)
		doprint = 0;

	for (nwords = minWords;
		 nwords <= maxWords;
		 nwords = incWords ? nwords + incWords : nwords ? 2 * nwords : 1)
	{
		r = reps;
		shmem_barrier_all();
		tv[0] = gettime();
		if (peer < nproc)
		{
			if (proc & 1)
			{
				r--;
				shmem_wait(&rbuf[nwords-1], 0);
				rbuf[nwords-1] = 0;
			}

			while (r-- > 0)
			{
				shmem_long_put(rbuf, tbuf, nwords, peer);
				shmem_wait(&rbuf[nwords-1], 0);
				rbuf[nwords-1] = 0;
			}

			if (proc & 1)
			{
				shmem_long_put(rbuf, tbuf, nwords, peer);
			}
		}
		tv[1] = gettime();
		t = dt (&tv[1], &tv[0]) / (2 * reps);

		shmem_barrier_all();

		printStats (proc, peer, doprint, nwords, t);
	}

    shfree(rbuf);
    shfree(tbuf);

	shmem_barrier_all();
	return 0;
}
Esempio n. 4
0
int
main (int argc, char *argv[])
{
  int table_bytes;
  int lock_bytes;
  int i;

  srand ( getpid ()  + getuid () );

  start_pes (0);
  me = _my_pe ();
  npes = _num_pes ();

  /*
   * size of the per-PE partition
   */
  ip_pe = table_size / npes;

  /*
   * each PE only stores what it owns
   */
  table_bytes = sizeof (*table) * ip_pe;
  table = shmalloc (table_bytes);             /* !!! unchecked !!! */
  /*
   * initialize table
   */
  for (i = 0; i < ip_pe; i+= 1)
    {
      table[i] = 0;
    }

  /*
   * each PE needs to be able to lock everywhere
   */
  lock_bytes = sizeof (*lock) * table_size;
  lock = shmalloc (lock_bytes);            	/* !!! unchecked !!! */
  /*
   * initialize locks
   */
  for (i = 0; i < table_size; i+= 1)
    {
      lock[i] = 0L;
    }

  /*
   * make sure all PEs have initialized symmetric data
   */
  shmem_barrier_all ();

  for (i = 0; i < 4; i += 1)
    {
      const int updater = rand () % npes;

      if (me == updater)
        {
          const int i2u = rand () % table_size;
          const int nv = rand () % 100;

          printf ("PE %d: About to update index %d with %d...\n",
                  me, i2u, nv
                 );

          table_update (nv, i2u);
        }
    }

  shmem_barrier_all ();

  /*
   * everyone shows their part of the table
   */
  table_dump ();

  /*
   * clean up allocated memory
   */
  shmem_barrier_all ();
  shfree (lock);
  shfree (table);

  return 0;
}
int
main(int argc, char **argv)
{
  int i,j;
  int nextpe;
  int me, npes;
  int success1,success2,success3, success4, success5, success6, success7, success8;

  short dest1[N];
  int dest2[N];
  long dest3[N];
  long double dest4[N];
  long long dest5[N];
  double dest6[N];
  float dest7[N];
  char *dest8;
  short dest9;
  int dest10;
  long dest11;
  double dest12;
  float dest13;

  short *src1;
  int *src2;
  long *src3;
  long double *src4;
  long long *src5;
  double *src6;
  float *src7;
  char *src8;
  short *src9;
  int *src10;
  long *src11;
  double *src12;
  float *src13;


  start_pes(0);
  me = _my_pe();
  npes = _num_pes();

  if(npes>1){

    success1 =0;
    success2 =0;
    success3 =0;
    success4 =0;
    success5 =0;
    success6 =0;
    success7 =0;
    success8 =0;
    dest8 = (char *)malloc(N*sizeof(char));

    for (i = 0; i < N; i += 1) {
      dest1[i] = -9;
      dest2[i] = -9;
      dest3[i] = -9;
      dest4[i] = -9;
      dest5[i] = -9;
      dest6[i] = -9;
      dest7[i] = -9.0;
      dest8[i] = -9;
    }
    dest9 = -9;
    dest10 = -9;
    dest11 = -9;
    dest12 = -9;
    dest13 = -9;


    src1 = (short *)shmalloc( N * sizeof(*src1) );
    src2 = (int *)shmalloc( N * sizeof(*src2) );
    src3 = (long *)shmalloc( N * sizeof(*src3) );
    src4 = (long double *)shmalloc( N * sizeof(*src4) );
    src5 = (long long*)shmalloc( N * sizeof(*src5) );
    src6 = (double *)shmalloc( N * sizeof(*src6) );
    src7 = (float *)shmalloc( N * sizeof(*src7) );
    src8 = (char *)shmalloc( 4 * sizeof(*src8) );
    src9 = (short *)shmalloc( sizeof(*src9) );
    src10 = (int *)shmalloc( sizeof(*src10) );
    src11 = (long *)shmalloc( sizeof(*src11) );
    src12 = (double *)shmalloc( sizeof(*src12) );
    src13 = (float *)shmalloc( sizeof(*src13) );

    for (i = 0; i < N; i += 1) {
      src1[i] = (short)me;
      src2[i] = me;
      src3[i] = (long)me;
      src4[i] = (long double)me;
      src5[i] = (long long)me;
      src6[i] = (double)me;
      src7[i] = (float)me;
      src8[i] = (char)me;
    }
    *src9 = (short)me;
    *src10 = me;
    *src11 = (long)me;
    *src12 = (double)me;
    *src13 = (float)me;



    nextpe = (me + 1) % npes;

    /*Testing shmem_short_get, shmem_short_get, shmem_int_get, shmem_long_get, shmem_longdouble_get, shmem_longlong_get, shmem_double_get, shmem_float_get, shmem_getmem*/
    shmem_barrier_all();

    shmem_short_get(dest1, src1, N, nextpe);
    shmem_int_get(dest2, src2, N, nextpe);
    shmem_long_get(dest3, src3, N, nextpe);
    shmem_longdouble_get(dest4, src4, N, nextpe);
    shmem_longlong_get(dest5, src5, N, nextpe);
    shmem_double_get(dest6, src6, N, nextpe);
    shmem_float_get(dest7, src7, N, nextpe);
    shmem_getmem(dest8, src8, N*sizeof(char), nextpe);

    shmem_barrier_all();

    if(me == 0){
      for (i = 0; i < N; i += 1) {
        if(dest1[i] != ( 1)){
          success1=1;
        }
        if(dest2[i] != ( 1)){
          success2=1;
        }
        if(dest3[i] != ( 1)){
          success3=1;
        }
        if(dest4[i] != ( 1)){
          success4=1;
        }
        if(dest5[i] != ( 1)){
          success5=1;
        }
        if(dest6[i] != ( 1)){
          success6=1;
        }
        if(dest7[i] != ( 1)){
          success7=1;
        }
        if(dest8[i] != ( 1)){
          success8=1;
        }
      }

      if(success1==0)
        printf("Test shmem_short_get: Passed\n");  
      else
        printf("Test shmem_short_get: Failed\n");
      if(success2==0)
        printf("Test shmem_int_get: Passed\n");  
      else
        printf("Test shmem_int_get: Failed\n");
      if(success3==0)
        printf("Test shmem_long_get: Passed\n");  
      else
        printf("Test shmem_long_get: Failed\n");
      if(success4==0)
        printf("Test shmem_longdouble_get: Passed\n");  
      else
        printf("Test shmem_longdouble_get: Failed\n");
      if(success5==0)
        printf("Test shmem_longlong_get: Passed\n");  
      else
        printf("Test shmem_longlong_get: Failed\n");
      if(success6==0)
        printf("Test shmem_double_get: Passed\n");  
      else
        printf("Test shmem_double_get: Failed\n");
      if(success7==0)
        printf("Test shmem_float_get: Passed\n");  
      else
        printf("Test shmem_float_get: Failed\n");
      if(success8==0)
        printf("Test shmem_getmem: Passed\n");  
      else
        printf("Test shmem_getmem: Failed\n");

    }
    shmem_barrier_all();

    /*Testing shmem_get32, shmem_get64, shmem_get128 */
    if(sizeof(int)==4){
      for (i = 0; i < N; i += 1) {
        dest2[i] = -9;
        dest3[i] = -9;
        dest4[i] = -9;
      }
      success2 = 0;
      success3 = 0;
      success4 = 0;

      shmem_barrier_all();

      shmem_get32(dest2, src2, N, nextpe);
      shmem_get64(dest3, src3, N, nextpe);
      shmem_get128(dest4, src4, N, nextpe);

      shmem_barrier_all();

      if(me == 0){
        for (i = 0; i < N; i += 1) {
          if(dest2[i] != ( 1)){
            success2=1;
          }
          if(dest3[i] != ( 1)){
            success3=1;
          }
          if(dest4[i] != ( 1)){
            success4=1;
          }
        }
        if(success2==0)
          printf("Test shmem_get32: Passed\n");  
        else
          printf("Test shmem_get32: Failed\n");

        if(success3==0)
          printf("Test shmem_get64: Passed\n");  
        else
          printf("Test shmem_get64: Failed\n");

        if(success4==0)
          printf("Test shmem_get128: Passed\n");  
        else
          printf("Test shmem_get128: Failed\n");
      }
    }
    else if(sizeof(int)==8){
      for (i = 0; i < N; i += 1) {
        dest1[i] = -9;
        dest2[i] = -9;
        dest3[i] = -9;
      }
      success1 = 0;
      success2 = 0;
      success3 = 0;

      shmem_barrier_all();

      shmem_get32(dest1, src1, N, nextpe);
      shmem_get64(dest2, src2, N, nextpe);
      shmem_get128(dest3, src3, N, nextpe);

      shmem_barrier_all();

      if(me == 0){
        for (i = 0; i < N; i += 1) {
          if(dest1[i] != ( 1)){
            success1=1;
          }
          if(dest2[i] != ( 1)){
            success2=1;
          }
          if(dest3[i] != ( 1)){
            success3=1;
          }

        }
        if(success1==0)
          printf("Test shmem_get32: Passed\n");  
        else
          printf("Test shmem_get32: Failed\n");
        if(success2==0)
          printf("Test shmem_get64: Passed\n");  
        else
          printf("Test shmem_get64: Failed\n");

        if(success3==0)
          printf("Test shmem_get128: Passed\n");  
        else
          printf("Test shmem_get128: Failed\n");	
      }
    }	
	
	/* Testing shmem_iget32, shmem_iget64, shmem_iget128 */
	shmem_barrier_all();
	if(sizeof(int)==4){
      for (i = 0; i < N; i += 1) {
        dest2[i] = -9;
        dest3[i] = -9;
        dest4[i] = -9;
      }
      success2 = 0;
      success3 = 0;
      success4 = 0;

      shmem_barrier_all();

      shmem_iget32(dest2, src2, 1, 2, N/2, npes-1);
      shmem_iget64(dest3, src3, 1, 2, N/2, npes-1);
      shmem_iget128(dest4, src4, 1, 2, N/2, npes-1);

      shmem_barrier_all();

      if(me == 0){
        for (i = 0; i < N/2; i += 1) {
          if(dest2[i] != (npes-1)){
            success2=1;
          }
          if(dest3[i] != (npes-1)){
            success3=1;
          }
          if(dest4[i] != (npes-1)){
            success4=1;
          }
        }
        if(success2==0)
          printf("Test shmem_iget32: Passed\n");  
        else
          printf("Test shmem_iget32: Failed\n");

        if(success3==0)
          printf("Test shmem_iget64: Passed\n");  
        else
          printf("Test shmem_iget64: Failed\n");

        if(success4==0)
          printf("Test shmem_iget128: Passed\n");  
        else
          printf("Test shmem_iget128: Failed\n");
      }
    }
    else if(sizeof(int)==8){
      for (i = 0; i < N; i += 1) {
        dest1[i] = -9;
        dest2[i] = -9;
        dest3[i] = -9;
      }
      success1 = 0;
      success2 = 0;
      success3 = 0;

      shmem_barrier_all();

      shmem_iget32(dest1, src1, 1, 2, N/2, npes-1);
      shmem_iget64(dest2, src2, 1, 2, N/2, npes-1);
      shmem_iget128(dest3, src3, 1, 2, N/2, npes-1);

      shmem_barrier_all();

      if(me == 0){
        for (i = 0; i < N/2; i += 1) {
          if(dest1[i] != (npes-1)){
            success1=1;
          }
          if(dest2[i] != (npes-1)){
            success2=1;
          }
          if(dest3[i] != (npes-1)){
            success3=1;
          }

        }
        if(success1==0)
          printf("Test shmem_iget32: Passed\n");  
        else
          printf("Test shmem_iget32: Failed\n");
        if(success2==0)
          printf("Test shmem_iget64: Passed\n");  
        else
          printf("Test shmem_iget64: Failed\n");

        if(success3==0)
          printf("Test shmem_iget128: Passed\n");  
        else
          printf("Test shmem_iget128: Failed\n");	
      }
    }	
	
	/*Testing shmem_short_iget, shmem_int_iget, shmem_long_iget, shmem_double_iget, shmem_float_iget */
	for (i = 0; i < N; i += 1) {
	    dest1[i] = -9;
        dest2[i] = -9;
        dest3[i] = -9;
        dest6[i] = -9;
		dest7[i] = -9;
      }
      success1 = 0;
      success2 = 0;
      success3 = 0;
	  success6 = 0;
      success7 = 0;
      
    shmem_barrier_all();

    shmem_short_iget(dest1, src1, 1, 2, N/2, npes-1);
    shmem_int_iget(dest2, src2, 1, 2, N/2, npes-1);
    shmem_long_iget(dest3, src3, 1, 2, N/2, npes-1);
    shmem_double_iget(dest6, src6, 1, 2, N/2, npes-1);
    shmem_float_iget(dest7, src7, 1, 2, N/2, npes-1);
    
    shmem_barrier_all();

    if(me == 0){
      for (i = 0; i < N/2; i += 1) {
        if(dest1[i] != (npes-1)){
          success1=1;
        }
        if(dest2[i] != (npes-1)){
          success2=1;
        }
        if(dest3[i] != (npes-1)){
          success3=1;
        }
        if(dest6[i] != (npes-1)){
          success6=1;
        }
        if(dest7[i] != (npes-1)){
          success7=1;
        }
      }

      if(success1==0)
        printf("Test shmem_short_iget: Passed\n");  
      else
        printf("Test shmem_short_iget: Failed\n");
      if(success2==0)
        printf("Test shmem_int_iget: Passed\n");  
      else
        printf("Test shmem_int_iget: Failed\n");
      if(success3==0)
        printf("Test shmem_long_iget: Passed\n");  
      else
        printf("Test shmem_long_iget: Failed\n");
      if(success6==0)
        printf("Test shmem_double_iget: Passed\n");  
      else
        printf("Test shmem_double_iget: Failed\n");
      if(success7==0)
        printf("Test shmem_float_iget: Passed\n");  
      else
        printf("Test shmem_float_iget: Failed\n");
      
    }
   


    /* Testing shmem_double_g, shmem_float_g, shmem_int_g, shmem_long_g, shmem_short_g */
    shmem_barrier_all();

    dest9 = shmem_short_g(src9, nextpe);
    dest10 = shmem_int_g(src10, nextpe);
    dest11 = shmem_long_g(src11, nextpe);
    dest12 = shmem_double_g(src12, nextpe);
    dest13 = shmem_float_g(src13, nextpe);

    shmem_barrier_all();

    if(me == 0){
      if(dest9 == 1)
        printf("Test shmem_short_g: Passed\n");  
      else
        printf("Test shmem_short_g: Failed\n");
      if(dest10 == 1)
        printf("Test shmem_int_g: Passed\n");  
      else
        printf("Test shmem_int_g: Failed\n");
      if(dest11 == 1)
        printf("Test shmem_long_g: Passed\n");  
      else
        printf("Test shmem_long_g: Failed\n");
      if(dest12 == 1)
        printf("Test shmem_double_g: Passed\n");  
      else
        printf("Test shmem_double_g: Failed\n");
      if(dest13 == 1)
        printf("Test shmem_float_g: Passed\n");  
      else
        printf("Test shmem_float_g: Failed\n");


    }

    shmem_barrier_all();


    shfree(src1);
    shfree(src2);
    shfree(src3);
    shfree(src4);
    shfree(src5);
    shfree(src6);
    shfree(src7);
    shfree(src8);
  }
  else{
    printf("Number of PEs must be > 1 to test shmem get, test skipped\n");
  }
  return 0;
}
Esempio n. 6
0
int osh_coll_tc9(const TE_NODE *node, int argc, const char *argv[])
{
  /* General initialisations			*/

  int rc = TC_PASS;

  int ii, numprocs, count, d, nlong;
  int32_t *source, *target, *displ;
  long *pSync;

  UNREFERENCED_PARAMETER(node);
  UNREFERENCED_PARAMETER(argc);
  UNREFERENCED_PARAMETER(argv);

  numprocs = _num_pes();

  nlong = _my_pe() + 1;

  source = NULL;
  displ = NULL;
  target = NULL;
  pSync = NULL;

  if (numprocs == 1)
  {
    log_debug(OSH_TC, "Using more than 1 CPU makes the tests of this program more interesting\n");
    return TC_SETUP_FAIL;
  }

  displ = malloc(sizeof(int) * numprocs);

  count = 0;
  for (ii = 0; ii < numprocs; ii++)
  {
    displ[ii] = count;
    count = count + ii + 1;
  }

  pSync = shmalloc(sizeof(long) *_SHMEM_COLLECT_SYNC_SIZE);
  for (ii=0; ii < _SHMEM_COLLECT_SYNC_SIZE; ii++)
    pSync[ii] = _SHMEM_SYNC_VALUE;

  target = shmalloc(sizeof(int) * count);
  for (ii = 0; ii < count; ii++)
    target[ii] = 0;

  source = shmalloc(sizeof(int) * numprocs);
  for (ii = 0; ii < nlong; ii++)
    source[ii] = ii;

  shmem_barrier_all();		/* Wait for all CPUs to initialize pSync */

  /* Collect function				*/

  shmem_collect32( target, source, nlong, 0, 0,
                 numprocs, pSync );

  ii = d = 0;
  while (ii < numprocs)
  {
    for(count = 0; count <= ii; count++)
      if (target[d + count] != count)
        rc = TC_FAIL;
    d = displ[count];
    ii++;
  }

  /* Finalizes					*/
  shfree(source);
  shfree(target);
  shfree(pSync);
  free(displ);

  return rc;
}
static int test_item9(void)
{
    int rc = TC_PASS;
    static TYPE_VALUE target_addr[MAX_BUFFER_SIZE * 2];
    static TYPE_VALUE source_addr[MAX_BUFFER_SIZE * 2];
    TYPE_VALUE source_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    long* pSyncMult = NULL;
    TYPE_VALUE* pWrkMult = NULL;
    int pSyncNum = 2;
    int pWrkNum = 2;

    num_proc = _num_pes();
    my_proc = _my_pe();

    pSyncMult = shmalloc(sizeof(*pSyncMult) * pSyncNum * _SHMEM_REDUCE_SYNC_SIZE);
    if (pSyncMult)
    {
        TYPE_VALUE value = DEFAULT_VALUE;
        int i = 0;
        int j = 0;
        long cur_buf_size = 0;

        for ( j = 0; j < pSyncNum * _SHMEM_REDUCE_SYNC_SIZE; j++ )
        {
            pSyncMult[j] = _SHMEM_SYNC_VALUE;
        }

        /* Give some time to all PE for setting their values */
        shmem_barrier_all();

        pWrkMult = shmalloc(sizeof(*pWrkMult) * pWrkNum * sys_max(MAX_BUFFER_SIZE, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
        if (pWrkMult)
        {
            value = DEFAULT_VALUE;
            source_value = (TYPE_VALUE)(my_proc + 1);
            fill_buffer((void *)source_addr, MAX_BUFFER_SIZE * 2, (void *)&source_value, sizeof(source_value));
            fill_buffer((void *)target_addr, MAX_BUFFER_SIZE * 2, (void *)&value, sizeof(value));
            shmem_barrier_all();
            for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
            {
                cur_buf_size = sys_max(1, (i + 1) * MAX_BUFFER_SIZE / __cycle_count);
                /* Set initial target value */
                value = DEFAULT_VALUE;

                /* Set my value */
                source_value = (TYPE_VALUE)(my_proc + 1);

                /* Define expected value */
                expect_value = 0;
                if (my_proc % 2)    expect_value = DEFAULT_VALUE;
                else
                {
                    int k = num_proc;
                    while (k)
                    {
                        if (k % 2)  expect_value |= k;
                        k--;
                    }
                }

                int in_active_set = check_within_active_set(0, 1, ((num_proc / 2) + (num_proc % 2)), my_proc, num_proc);

                if ( in_active_set ) {
                    /* Put value to peer */
                    FUNC_VALUE(target_addr + (i % 2) * MAX_BUFFER_SIZE, source_addr + (i % 2) * MAX_BUFFER_SIZE, cur_buf_size, 0, 1, ((num_proc / 2) + (num_proc % 2)), pWrkMult + (i % pWrkNum) * sys_max(MAX_BUFFER_SIZE, _SHMEM_REDUCE_MIN_WRKDATA_SIZE),  pSyncMult + (i % pSyncNum) * _SHMEM_REDUCE_SYNC_SIZE);
                    rc = (!compare_buffer_with_const(target_addr + (i % 2) * MAX_BUFFER_SIZE, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);

                    log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld buffer size = %lld\n",
                                       my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value, (INT64_TYPE)cur_buf_size);

                    if (rc)
                    {
                        TYPE_VALUE* check_addr = target_addr + (i % 2) * MAX_BUFFER_SIZE;
                        int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value));
                        int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                        int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);

                        log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                        log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                        show_buffer(check_addr + show_index, show_size);
                    }
                    fill_buffer((void *)(source_addr + (i % 2) * MAX_BUFFER_SIZE), cur_buf_size, (void *)&source_value, sizeof(source_value));
                    fill_buffer((void *)(target_addr + (i % 2) * MAX_BUFFER_SIZE ), cur_buf_size, (void *)&value, sizeof(value));
                }
            }
            shfree(pWrkMult);
        } else {
            rc = TC_SETUP_FAIL;
        }
        shfree(pSyncMult);
    } else {
        rc = TC_SETUP_FAIL;
    }

    return rc;
}
/****************************************************************************
 * Test Case processing procedure
 ***************************************************************************/
int osh_reduce_tc8(const TE_NODE *node, int argc, const char *argv[])
{
    int rc = TC_PASS;

    rc = __parse_opt(node, argc, argv);

    if (rc == TC_PASS)
    {
        pSync = shmalloc(sizeof(*pSync) * _SHMEM_REDUCE_SYNC_SIZE);
        if (!pSync)
        {
            rc = TC_SETUP_FAIL;
        }
    } else {
        rc = TC_SETUP_FAIL;
    }

    /* Every PE does reduction of the single value as symmetric data object to itself */
    if (rc == TC_PASS)
    {
        rc = test_item1();
        log_item(node, 1, rc);
        shmem_barrier_all();
    }

    /* All PEs reduce the single value */
    if (rc == TC_PASS)
    {
        rc = test_item2();
        log_item(node, 2, rc);
        shmem_barrier_all();
    }

    /* Every PE does reduction of the single value as symmetric data object to itself
     * (target and source are the same array)
     */
    if (rc == TC_PASS)
    {
        rc = test_item3();
        log_item(node, 3, rc);
        shmem_barrier_all();
    }

    /* All PEs reduce the single value
     * (target and source are the same array)
     */
    if (rc == TC_PASS)
    {
        rc = test_item4();
        log_item(node, 4, rc);
        shmem_barrier_all();
    }

    /* Every PE does reduction of the buffer as symmetric data object to itself */
    if (rc == TC_PASS)
    {
        rc = test_item5();
        log_item(node, 5, rc);
        shmem_barrier_all();
    }

    /* All PEs reduce the buffer */
    if (rc == TC_PASS)
    {
        rc = test_item6();
        log_item(node, 6, rc);
        shmem_barrier_all();
    }

    /* Even PEs reduce the dynamic buffer */
    if (rc == TC_PASS)
    {
        rc = test_item7();
        log_item(node, 7, rc);
        shmem_barrier_all();
    }

    /* Even PEs reduce the static buffer */
    if (rc == TC_PASS)
    {
        rc = test_item8();
        log_item(node, 8, rc);
        shmem_barrier_all();
    }

    /* reduce calls in loop with alternating multiple pSync and pWrk arrays (without barrrier synchronization between iterations) */
    if (rc == TC_PASS)
    {
        rc = test_item9();
        log_item(node, 9, rc);
        shmem_barrier_all();
    }

    if (pSync)
    {
        shfree(pSync);
    }

    return rc;
}
Esempio n. 9
0
int main(int argc, char *argv[])
{
    int i = 0, rank, size;
    int skip, numprocs;
    static double avg_time = 0.0, max_time = 0.0, min_time = 0.0;
    static double latency = 0.0;
    int64_t t_start = 0, t_stop = 0, timer=0;
    char *buffer=NULL;
    int max_msg_size = 1048576, full = 0;
    int t;

    for ( t = 0; t < _SHMEM_BCAST_SYNC_SIZE; t += 1) pSyncBcast1[t] = _SHMEM_SYNC_VALUE;
    for ( t = 0; t < _SHMEM_BCAST_SYNC_SIZE; t += 1) pSyncBcast2[t] = _SHMEM_SYNC_VALUE;
    for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed1[t] = _SHMEM_SYNC_VALUE;
    for ( t = 0; t < _SHMEM_REDUCE_SYNC_SIZE; t += 1) pSyncRed2[t] = _SHMEM_SYNC_VALUE;

    start_pes(0);
    rank = _my_pe();
    numprocs = _num_pes();

    if (process_args(argc, argv, rank, &max_msg_size, &full)) {
        return 0;
    }
    
    if(numprocs < 2) {
        if(rank == 0) {
            fprintf(stderr, "This test requires at least two processes\n");
        }
        return -1;
    }
    print_header(rank, full);

    buffer = shmalloc(max_msg_size * sizeof(char));
    if(NULL == buffer) {
        fprintf(stderr, "malloc failed.\n");
        exit(1);
    }
    
    memset(buffer,1, max_msg_size);

    for(size=1; size <=max_msg_size/sizeof(uint32_t); size *= 2) {
        if(size > LARGE_MESSAGE_SIZE) {
            skip = SKIP_LARGE;
            iterations = iterations_large;
        }
        else {
            skip = SKIP;
        }

        timer=0;        
        for(i=0; i < iterations + skip ; i++) {
            t_start = TIME();
            if(i%2)
                shmem_broadcast32(buffer, buffer, size, 0, 0, 0, numprocs, pSyncBcast1);
            else
                shmem_broadcast32(buffer, buffer, size, 0, 0, 0, numprocs, pSyncBcast2);
            t_stop = TIME();

            if(i>=skip){
                timer+=t_stop-t_start;
            } 
            shmem_barrier_all();
        }
        shmem_barrier_all();            
        latency = (1.0 * timer) / iterations;

        shmem_double_min_to_all(&min_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1);
        shmem_double_max_to_all(&max_time, &latency, 1, 0, 0, numprocs, pWrk2, pSyncRed2);
        shmem_double_sum_to_all(&avg_time, &latency, 1, 0, 0, numprocs, pWrk1, pSyncRed1);
        avg_time = avg_time/numprocs;

        print_data(rank, full, size*sizeof(uint32_t), avg_time, min_time, max_time, iterations);
    }

    shfree(buffer);  
    return EXIT_SUCCESS;
}
Esempio n. 10
0
void _PERM_IR(_permmap* const pm) {
  const int one = 1;
  int * rindbase;
  int * lindbase;
  int * const restrict lsize = (int *)shmalloc(_PROCESSORS * sizeof(int));
  int * const restrict rsize = (int *)shmalloc(_PROCESSORS * sizeof(int));
  int * restrict * const restrict lind = pm->lind;
  int * restrict * const restrict rind = pm->rind;
  char * restrict * const restrict rptr = pm->rptr;
  int * const restrict rflag = pm->rflag;
  int* addr;
  int i, j;

  for (i = 0; i < _PROCESSORS; i++) {
    lsize[i] = lind[i] ? lind[i][0] : 0;
    rsize[i] = 0;
  }
  shmem_barrier_all();
  for (i = (_INDEX == _PROCESSORS - 1) ? 0 : _INDEX+1;
       i != _INDEX;
       i = (i == _PROCESSORS - 1) ? 0 : i++) {
    if (lsize[i] > 0) {
#ifdef _SHMEM_PERMUTE_DEBUG
      printf("%d sending count to %d\n", _INDEX, i);
  fflush(stdout);
#endif
      shmem_int_put(&(rsize[_INDEX]), &(lsize[i]), 1, i);
    }
  }
  rsize[_INDEX] = lsize[_INDEX];
  shmem_barrier_all();
#ifdef _SHMEM_PERMUTE_DEBUG
  sleep(_PROCESSORS);
#endif
  _PERM_CleanIndices(lsize, rsize, lind, rind, &lindbase, &rindbase);

#ifdef _SHMEM_PERMUTE_DEBUG
  sleep(_INDEX);
  printf("FROM PROCESSOR %d\n", _INDEX);
  printf("  LSIZE = ");
  for (i = 0; i < _PROCESSORS; i++) {
    printf("%d ", lsize[i]);
  }
  printf("\n");
  printf("  RSIZE = ");
  for (i = 0; i < _PROCESSORS; i++) {
    printf("%d ", rsize[i]);
  }
  printf("\n");

  printf("  PROCMAP: size = %d, # elts = %d, encoded = %d :: ", pm->procmap[0], pm->procmap[1], pm->procmap[2]);
   for (j = 3; j < pm->procmap[0]; j++) {
    printf("%d ", pm->procmap[j]);
  }
  printf("\n");
  for (i = 0; i < _PROCESSORS; i++) {
    if (lind[i] != 0) {
      printf("  TO PROCESSOR %d: ", i);
      printf("size = %d, # elts = %d, encoded = %d :: ", lind[i][0], lind[i][1], lind[i][2]);
      for (j = 3; j < lind[i][0]; j++) {
	printf("%d ", lind[i][j]);
      }
      printf("\n");
    }
  }
  printf("\n");
  fflush(stdout);
  sleep(_PROCESSORS-_INDEX);
#endif

  for (i = (_INDEX == _PROCESSORS - 1) ? 0 : _INDEX+1;
       i != _INDEX;
       i = (i == _PROCESSORS - 1) ? 0 : i++) {
    if (rsize[i] > 0) {
#ifdef _SHMEM_PERMUTE_DEBUG
      printf("%d sending rind address to %d\n", _INDEX, i);
  fflush(stdout);
#endif
      rflag[_INDEX] = 0;
      shmem_put((void*)&(rptr[_INDEX]), (void*)&(rind[i]), 1, i);
    }
  }
#ifdef _SHMEM_PERMUTE_DEBUG
  sleep(_PROCESSORS);
#endif
  for (i = (_INDEX == 0) ? _PROCESSORS-1 : _INDEX-1;
       i != _INDEX;
       i = (i == 0) ? _PROCESSORS-1 : i--) {
    if (lsize[i] > 0) {
#ifdef _SHMEM_PERMUTE_DEBUG
      printf("%d waiting for rind address from %d, sending lind\n", _INDEX, i);
  fflush(stdout);
#endif
      shmem_wait((long*)&(rptr[i]), 0);
      addr = (int*)rptr[i];
      rptr[i] = 0;
      shmem_int_put(addr, lind[i], lsize[i], i);
    }
  }
#ifdef _SHMEM_PERMUTE_DEBUG
  sleep(_PROCESSORS);
#endif
  shmem_fence();
  for (i = (_INDEX == 0) ? _PROCESSORS-1 : _INDEX-1;
       i != _INDEX;
       i = (i == 0) ? _PROCESSORS-1 : i--) {
    if (lsize[i] > 0) {
#ifdef _SHMEM_PERMUTE_DEBUG
      printf("IR %d sending one to %d\n", _INDEX, i);
  fflush(stdout);
#endif
      shmem_int_put(&(rflag[_INDEX]), &one, 1, i);
    }
  }
  if (lsize[_INDEX] > 0) {
    memcpy(rind[_INDEX], lind[_INDEX], lsize[_INDEX]*sizeof(int));
  }
  pm->lindbase = lindbase;
  pm->rindbase = rindbase;
  shfree(lsize);
  shfree(rsize);
}
Esempio n. 11
0
static int test_item4(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE* recv_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    shmem_addr = (TYPE_VALUE*)shmalloc(sizeof(*shmem_addr) * __max_buffer_size);
    recv_addr = (TYPE_VALUE*)sys_malloc(sizeof(*recv_addr) * __max_buffer_size);
    if (shmem_addr && recv_addr)
    {
        INT64_TYPE i = 0;
        long cur_buf_size = 0;

        my_value = 0;
        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            /* Set my value */
            my_value = (my_proc % 2 ? 1 : -1) * (i * (MAX_VALUE / __cycle_count));
            cur_buf_size = sys_max(1, (i + 1) * __max_buffer_size / __cycle_count);
            fill_buffer((void *)shmem_addr, cur_buf_size, (void *)&my_value, sizeof(my_value));

            /* Give some time to all PE for setting their values */
            shmem_barrier_all();

            /* Define peer and it value */
            peer_proc = (my_proc + 1) % num_proc;
            peer_value = (peer_proc % 2 ? 1 : -1) * (i * (MAX_VALUE / __cycle_count));

            /* Define expected value */
            expect_value = peer_value;

            /* Get value from peer */
            FUNC_VALUE(recv_addr, shmem_addr, cur_buf_size, peer_proc);

            rc = (!compare_buffer_with_const(recv_addr, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);

            log_debug(OSH_TC, "my(#%d:%Lf) peer(#%d:%Lf) expected = %Lf buffer size = %lld\n",
                               my_proc, (long double)my_value, peer_proc, (long double)peer_value, (long double)expect_value, (INT64_TYPE)cur_buf_size);

            if (rc)
            {
                TYPE_VALUE* check_addr = recv_addr;
                int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value));
                int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);

                log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                show_buffer(check_addr + show_index, show_size);
            }

            shmem_barrier_all();
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (recv_addr)
    {
        sys_free(recv_addr);
    }

    if (shmem_addr)
    {
        shfree(shmem_addr);
    }

    return rc;
}
Esempio n. 12
0
void _ZPL_SYM_FREE(void *ptr, char* description) {
  shfree(ptr);
}
Esempio n. 13
0
File: cswap.c Progetto: coti/oshmpi
int
main(int argc, char* argv[])
{
    int me, num_pes, l, pe;
    int Verbose = 0;

    start_pes(0);
    me = _my_pe();
    num_pes = _num_pes();

    for (l = 0 ; l < loops ; ++l) {

        if ((src_int = shmalloc(sizeof(int))) == NULL) {
            printf("PE-%d int shmalloc() failed?\n", me);
            exit(1);
        }
        *src_int = 4;
        dst_int = itmp = 0;

        if ((src_long = shmalloc(sizeof(long))) == NULL) {
            printf("PE-%d long shmalloc() failed?\n", me);
            exit(1);
        }
        *src_long = 8;
        dst_long = ltmp = 0;

        if ((src_llong = shmalloc(sizeof(long long))) == NULL) {
            printf("PE-%d long long shmalloc() failed?\n", me);
            exit(1);
        }
        *src_llong = 16;
        dst_llong = lltmp = 0;

        //printf("PE-%d malloc()s done.\n",me);
        shmem_barrier_all();

        if ( me == 0 ) {
            /* integer swap */
            itmp = shmem_int_g(src_int,1);
            Vprintf("PE-0 Initial Conditions(int) local %d rem(%d)\n",
                    dst_int,itmp);

            dst_int = shmem_int_cswap(src_int,*src_int,0,1);
            if (dst_int != 4) {
                printf("PE-%d dst_int %d != 4?\n",me,dst_int);
                exit(1);
            }
            /* verify remote data */
            itmp = shmem_int_g(src_int,1);
            if (itmp != 0) {
                printf("PE-%d rem %d != 0?\n",me,itmp);
                exit(1);
            }
            Vprintf("PE-0 1st int_cswap done: local %d rem(%d)\n",dst_int,itmp);

            dst_int = shmem_int_cswap(src_int,0,dst_int,1);
            if (dst_int != 0) {
                printf("PE-%d dst_int %d != 0?\n",me,dst_int);
                exit(1);
            }
            /* verify remote data */
            itmp = shmem_int_g(src_int,1);
            if (itmp != 4) {
                printf("PE-%d rem %d != 4?\n",me,itmp);
                exit(1);
            }
            Vprintf("PE-0 2nd int_swap done: local %d rem(%d)\n",dst_int,itmp);

            /* cswap() should not swap as cond(0) != remote(4) */
            dst_int = shmem_int_cswap(src_int,0,0,1);
            if (dst_int != 4) {
                printf("PE-%d int no-swap returned dst_int %d != 4?\n",
                        me,dst_int);
                exit(1);
            }
            /* verify previous cswap() did not swap */
            itmp = shmem_int_g(src_int,1);
            if (itmp != 4) {
                printf("PE-%d failed cond int_cswap() swapped? rem(%d) != 4?\n",
                        me,itmp);
                exit(1);
            }

            /* long swap */
            ltmp = shmem_long_g(src_long,1);
            Vprintf("PE-0 Initial Conditions(long) local %ld rem(%ld)\n",
                    dst_long,ltmp);

            dst_long = shmem_long_cswap(src_long,*src_long,0,1);
            if (dst_long != 8) {
                printf("PE-%d dst_long %ld != 8?\n",me,dst_long);
                exit(1);
            }
            /* verify remote data */
            ltmp = shmem_long_g(src_long,1);
            if (ltmp != 0) {
                printf("PE-%d long rem(%ld) != 0?\n",me,ltmp);
                exit(1);
            }
            Vprintf("PE-0 1st long_cswap done: local %ld rem(%ld)\n",
                    dst_long,ltmp);

            dst_long = shmem_long_cswap(src_long,0,dst_long,1);
            if (dst_long != 0) {
                printf("PE-%d dst_long %ld != 0?\n",me,dst_long);
                exit(1);
            }
            /* verify remote data */
            ltmp = shmem_long_g(src_long,1);
            if (ltmp != 8) {
                printf("PE-%d long rem(%ld) != 8?\n",me,ltmp);
                exit(1);
            }
            Vprintf("PE-0 2nd long_swap done: local %ld rem(%ld)\n",
                    dst_long,ltmp);

            /* cswap() should not swap as cond(0) != remote(8) */
            dst_long = shmem_long_cswap(src_long,0,0,1);
            if (dst_long != 8) {
                printf("PE-%d long no-swap returned dst_long %ld != 8?\n",
                        me,dst_long);
                exit(1);
            }
            /* verify previous cswap() did not swap */
            ltmp = shmem_long_g(src_long,1);
            if (ltmp != 8) {
                printf("PE-%d failed cond long_cswap() swapped? rem(%ld) != 8?\n",
                        me,ltmp);
                exit(1);
            }

            /* long long swap */
            lltmp = shmem_longlong_g(src_llong,1);
            Vprintf("PE-0 Initial Conditions(long long) local %lld rem(%lld)\n",
                    dst_llong,lltmp);

            dst_llong = shmem_longlong_cswap(src_llong,*src_llong,0,1);
            if (dst_llong != 16) {
                printf("PE-%d dst_llong %lld != 16?\n",me,dst_llong);
                exit(1);
            }
            /* verify remote data */
            lltmp = shmem_longlong_g(src_llong,1);
            if (lltmp != 0) {
                printf("PE-%d longlong rem(%lld) != 0?\n",me,lltmp);
                exit(1);
            }
            Vprintf("PE-0 1st longlong_cswap done: local %lld rem(%lld)\n",
                    dst_llong, lltmp);

            dst_llong = shmem_longlong_cswap(src_llong,0,dst_llong,1);
            if (dst_llong != 0) {
                printf("PE-%d dst_llong %lld != 0?\n",me,dst_llong);
                exit(1);
            }
            /* verify remote data */
            lltmp = shmem_longlong_g(src_llong,1);
            if (lltmp != 16) {
                printf("PE-%d long long rem(%lld) != 16?\n",me,lltmp);
                exit(1);
            }
            Vprintf("PE-0 2nd longlong_swap done: local %lld rem(%lld)\n",
                    dst_llong,lltmp);

            /* cswap() should not swap as cond(0) != remote(8) */
            dst_llong = shmem_longlong_cswap(src_llong,0,0,1);
            if (dst_llong != 16) {
                printf("PE-%d longlong no-swap returned dst_llong %lld != 16?\n",
                        me,dst_llong);
                exit(1);
            }
            /* verify previous cswap() did not swap */
            lltmp = shmem_longlong_g(src_llong,1);
            if (lltmp != 16) {
                printf("PE-0 failed cond longlong_cswap() swapped? rem(%lld) != 16?\n",
                        lltmp);
                exit(1);
            }
        }
        else {
            if (!shmem_addr_accessible(src_int,0)) {
                printf("PE-%d local src_int %p not accessible from PE-%d?\n",
                        me, (void*)src_int, 0);
                exit(1);
            }
            if (!shmem_addr_accessible(src_long,0)) {
                printf("PE-%d local src_long %p not accessible from PE-%d?\n",
                        me, (void*)src_long, 0);
                exit(1);
            }
            if (!shmem_addr_accessible(src_llong,0)) {
                printf("PE-%d local src_llong %p not accessible from PE-%d?\n",
                        me, (void*)src_llong, 0);
                exit(1);
            }
        }
        shmem_barrier_all();

        /* shmem_*fadd() exercise */

        if (me == 0) {
            itmp = 0;
            ltmp = 0;
            lltmp = 0;
            *src_int = 0;
            *src_long = 0;
            *src_llong = 0;
        }
        shmem_barrier_all();

        (void)shmem_int_fadd( &itmp, me+1, 0 );
        (void)shmem_long_fadd( &ltmp, me+1, 0 );
        (void)shmem_longlong_fadd( &lltmp, me+1, 0 );

        shmem_barrier_all();

        if (me == 0) {
            int tot;

            for(pe=0,tot=0; pe < num_pes; pe++)
                tot += pe+1;

            if ( itmp != tot )
                printf("fadd() total %d != expected %d?\n",itmp,tot);

            if ( ltmp != (long)tot )
                printf("fadd() total %ld != expected %d?\n",ltmp,tot);

            if ( lltmp != (long long)tot )
                printf("fadd() total %lld != expected %d?\n",lltmp,tot);
        }
        shmem_barrier_all();

        (void)shmem_int_finc(src_int,0);
        (void)shmem_long_finc(src_long,0);
        (void)shmem_longlong_finc(src_llong,0);

        shmem_barrier_all();

        if (me == 0) {
            int tot = num_pes;

            if ( *src_int != tot )
                printf("finc() total %d != expected %d?\n",*src_int,tot);

            if ( *src_long != (long)tot )
                printf("finc() total %ld != expected %d?\n",*src_long,tot);

            if ( *src_llong != (long long)tot )
                printf("finc() total %lld != expected %d?\n",*src_llong,tot);
        }
        shmem_barrier_all();

        shfree(src_int);
        shfree(src_long);
        shfree(src_llong);
    }

    if (Verbose)
        fprintf(stderr,"[%d] exit\n",_my_pe());

    return 0;
}
Esempio n. 14
0
int main(int argc, char **argv)
{
  int i,j;
  short     oldjs, oldxs, my_pes;
  int       oldji, oldxi, my_pei;
  long      oldjl, oldxl, my_pel;
  long long oldjll,oldxll,my_pell;
  float     oldjf, oldxf, my_pef;
  double    oldjd, oldxd, my_ped;
  int my_pe,n_pes;
  size_t max_elements,max_elements_bytes;
  static short *xs;
  static int   *xi;
  static long  *xl;
  static long long *xll;
  static float  *xf;
  static double *xd;

  start_pes(0);
  my_pe = shmem_my_pe();
  n_pes = shmem_n_pes();
  my_pes = (short) my_pe;
  my_pei = (int)  my_pe;
  my_pel = (long) my_pe;
  my_pell = (long long) my_pe;
  my_pef = (float) my_pe;
  my_ped = (double) my_pe;
#ifdef HAVE_SET_CACHE_INV
  shmem_set_cache_inv();
#endif

/*  fail if trying to use only one processor  */
  if ( n_pes  <= 1 ){
        fprintf(stderr, "FAIL - test requires at least two PEs\n");
        exit(1);
  }

  if(my_pe == 0)
    fprintf(stderr, "shmem_swap(%s) n_pes=%d\n", argv[0],n_pes);

/*  test shmem_short_swap  */

  /*  shmalloc xs on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(short) * n_pes);
  xs = shmalloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xs[i] = 0;
  shmem_barrier_all();

  oldjs = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pes = my_pes + (short) 1;
      /* record PE value in xs[my_pe] -- save PE number */
      oldxs = shmem_short_swap(&xs[my_pe], my_pes, 0);
      /* printf("PE=%d,i=%d,my_pes=%d,oldxs=%d\n",my_pe,i,my_pes,oldxs); */
      if (oldxs != oldjs)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxs = %d expected = %d\n",
                         my_pe, n_pes, i, oldxs, oldjs);
      oldjs = my_pes;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xs[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xs[%d]=%d,i=%d\n",j,j,xs[j],i); */
      if (xs[j] != (short) i)
        fprintf(stderr, "FAIL PE %d of %d: xs[%d] = %d expected = %d\n",
                         my_pe, n_pes, j, xs[j],i);
      i++;
    }
  }
  shfree(xs);

/*  test shmem_int_swap  */

  /*  shmalloc xi on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(int) * n_pes);
  xi = shmalloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xi[i] = 0;
  shmem_barrier_all();

  oldji = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pei = my_pei + (int) 1;
      /* record PE value in xi[my_pe] -- save PE number */
      oldxi = shmem_int_swap(&xi[my_pe], my_pei, 0);
      /* printf("PE=%d,i=%d,my_pei=%d,oldxi=%d\n",my_pe,i,my_pei,oldxi); */
      if (oldxi != oldji)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxi = %d expected = %d\n",
                         my_pe, n_pes, i, oldxi, oldji);
      oldji = my_pei;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xi[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xi[%d]=%d,i=%d\n",j,j,xi[j],i); */
      if (xi[j] != i)
        fprintf(stderr, "FAIL PE %d of %d: xi[%d] = %d expected = %d\n",
                         my_pe, n_pes, j, xi[j],i);
      i++;
    }
  }
  shfree(xi);

/*  test shmem_long_swap  */

  /*  shmalloc xl on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  xl = shmalloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xl[i] = 0;
  shmem_barrier_all();

  oldjl = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pel = my_pel + (long) 1;
      /* record PE value in xl[my_pe] -- save PE number */
      oldxl = shmem_long_swap(&xl[my_pe], my_pel, 0);
      /* printf("PE=%d,i=%d,my_pel=%d,oldxl=%d\n",my_pe,i,my_pel,oldxl); */
      if (oldxl != oldjl)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxl = %d expected = %d\n",
                         my_pe, n_pes, i, oldxl, oldjl);
      oldjl = my_pel;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xl[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xl[%d]=%d,i=%d\n",j,j,xl[j],i); */
      if (xl[j] != (long)i)
        fprintf(stderr, "FAIL PE %d of %d: xl[%d] = %ld expected = %d\n",
                         my_pe, n_pes, j, xl[j],i);
      i++;
    }
  }
  shfree(xl);

/*  test shmem_longlong_swap  */

#ifdef HAVE_LONG_LONG

  /*  shmalloc xll on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long long) * n_pes);
  xll = shmalloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xll[i] = 0;
  shmem_barrier_all();

  oldjll = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pell = my_pell + (long long) 1;
      /* record PE value in xll[my_pe] -- save PE number */
      oldxll = shmem_longlong_swap(&xll[my_pe], my_pell, 0);
      /* printf("PE=%d,i=%d,my_pell=%ld,oldxll=%d\n",my_pe,i,my_pell,oldxll); */
      if (oldxll != (long long) oldjll)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxll = %ld expected = %ld\n",
                         my_pe, n_pes, i, oldxll, oldjll);
      oldjll = my_pell;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xll[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xll[%d]=%ld,i=%d\n",j,j,xll[j],i); */
      if (xll[j] != (long long) i)
        fprintf(stderr, "FAIL PE %d of %d: xll[%d] = %d expected = %d\n",
                         my_pe, n_pes, j, xll[j],i);
      i++;
    }
  }
  shfree(xll);

#endif

/*  test shmem_float_swap  */

  /*  shmalloc xf on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(float) * n_pes);
  xf = shmalloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xf[i] = (float) 0;
  shmem_barrier_all();

  oldjf = (float) 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pef = my_pef + (float) 1;
      /* record PE value in xf[my_pe] -- save PE number */
      oldxf = shmem_float_swap(&xf[my_pe], my_pef, 0);
      /* printf("PE=%d,i=%d,my_pef=%10.2f,oldxf=%10.2f\n",my_pe,i,my_pef,oldxf); */
      if (oldxf != oldjf)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxf = %10.2f expected = %10.2f\n",
                         my_pe, n_pes, i, oldxf, oldjf);
      oldjf = my_pef;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xs[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xf[%d]=%10.2f,i=%d\n",j,j,xf[j],i); */
      if (xf[j] != (float) i)
        fprintf(stderr, "FAIL PE %d of %d: xf[%d] = %10.2f expected = %10.2f\n",
                         my_pe, n_pes, j-1, xf[j], (float)i);
      i++;
    }
  }
  shfree(xf);

/*  test shmem_double_swap  */

  /*  shmalloc xd on all pes (only use the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(double) * n_pes);
  xd = shmalloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xd[i] = (double) 0;
  shmem_barrier_all();

  oldjd = (double) 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_ped = my_ped + (double) 1;
      /* record PE value in xd[my_pe] -- save PE number */
      oldxd = shmem_double_swap(&xd[my_pe], my_ped, 0);
      /* printf("PE=%d,i=%d,my_ped=%10.2f,oldxd=%10.2f\n",my_pe,i,my_ped,oldxd);
 */
      if (oldxd != oldjd)
        fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxd = %10.2f expected = %10.2f\n",
                         my_pe, n_pes, i, oldxd, oldjd);
      oldjd = my_ped;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xd[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xd[%d]=%10.2f,i=%d\n",j,j,xd[j],i); */
      if (xd[j] != (double) i)
        fprintf(stderr, "FAIL PE %d of %d: xd[%d] = %10.2f expected = %10.2f\n",
                         my_pe, n_pes, j, xd[j], (double)i);
      i++;
    }
  }
  shfree(xd);

#ifdef SHMEM_C_GENERIC_32

/*  test shmem_swap (GENERIC 32)  */

  my_pei = (int)  my_pe;
  /*  shmalloc xi on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(int) * n_pes);
  xi = shmalloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xi[i] = 0;
  shmem_barrier_all();

  oldji = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pei = my_pei + (int) 1;
      /* record PE value in xi[my_pe] -- save PE number */
      oldxi = shmem_swap(&xi[my_pe], my_pei, 0);
      /* printf("PE=%d,i=%d,my_pei=%d,oldxi=%d\n",my_pe,i,my_pei,oldxi); */
      if (oldxi != oldji)
        fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxi = %d expected = %d\n",
                         my_pe, n_pes, i, oldxi, oldji);
      oldji = my_pei;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xi[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xi[%d]=%d,i=%d\n",j,j,xi[j],i); */
      if (xi[j] != i)
        fprintf(stderr, "FAIL pe %d of %d: xi[%d] = %d expected = %d\n",
                         my_pe, n_pes, j, xi[j],i);
      i++;
    }
  }
  shfree(xi);

#else

/*  test shmem_swap (GENERIC 64)  */

  my_pel = (long) my_pe;
  /*  shmalloc xl on all pes (only check the ones on PE 0)  */
  max_elements_bytes = (size_t) (sizeof(long) * n_pes);
  xl = shmalloc( max_elements_bytes );
  for(i=0; i<n_pes; i++)
    xl[i] = 0;
  shmem_barrier_all();

  oldjl = 0;
  for(i=0; i<ITER; i++) {
    if (my_pe != 0) {
      my_pel = my_pel + (long) 1;
      /* record PE value in xl[my_pe] -- save PE number */
      oldxl = shmem_swap(&xl[my_pe], my_pel, 0);
      /* printf("PE=%d,i=%d,my_pel=%d,oldxl=%d\n",my_pe,i,my_pel,oldxl); */
      if (oldxl != oldjl)
        fprintf(stderr, "FAIL pe %d of %d: i=%d, oldxl = %d expected = %d\n",
                         my_pe, n_pes, i, oldxl, oldjl);
      oldjl = my_pel;
    }
  }
  shmem_barrier_all();

  if (my_pe == 0) {  /* check xl[j] array vs PE# + ITER */
    i = (int) ITER + 1;
    for(j=1 ; j<n_pes; j++) {
      /* printf("j=%d,xl[%d]=%d,i=%d\n",j,j,xl[j],i); */
      if (xl[j] != (long)i)
        fprintf(stderr, "FAIL pe %d of %d: xl[%d] = %ld expected = %d\n",
                         my_pe, n_pes, j, xl[j],i);
      i++;
    }
  }
  shfree(xl);

#endif

  shmem_barrier_all();
#ifdef NEEDS_FINALIZE
  shmem_finalize(); 
#endif
  return 0;
}
Esempio n. 15
0
void shmemx_ct_free(shmemx_ct_t *ct)
{
    shfree(*ct);
}
Esempio n. 16
0
int main(int argc, char *argv[])
{
    int myid, numprocs, i;
    int size;
    char *s_buf, *r_buf;
    char *s_buf_heap, *r_buf_heap;
    int align_size;
    int64_t t_start = 0, t_end = 0;
    int use_heap = 0;   //default uses global

    start_pes(0);
    myid = _my_pe();
    numprocs = _num_pes();

    if(numprocs != 2) {
        if(myid == 0) {
            fprintf(stderr, "This test requires exactly two processes\n");
        }

        return EXIT_FAILURE;
    }

    if(argc != 2) {
        usage(myid);

        return EXIT_FAILURE;
    }

    if(0 == strncmp(argv[1], "heap", strlen("heap"))){
        use_heap = 1;
    } else if(0 == strncmp(argv[1], "global", strlen("global"))){
        use_heap = 0;
    } else {
        usage(myid);
        return EXIT_FAILURE;
    }

    align_size = MESSAGE_ALIGNMENT;

    /**************Allocating Memory*********************/

    if(use_heap){

        s_buf_heap = shmalloc(MYBUFSIZE);
        r_buf_heap = shmalloc(MYBUFSIZE);

        s_buf =
            (char *) (((unsigned long) s_buf_heap + (align_size - 1)) /
                      align_size * align_size);

        r_buf =
            (char *) (((unsigned long) r_buf_heap + (align_size - 1)) /
                      align_size * align_size);
    } else {

        s_buf =
            (char *) (((unsigned long) s_buf_original + (align_size - 1)) /
                      align_size * align_size);

        r_buf =
            (char *) (((unsigned long) r_buf_original + (align_size - 1)) /
                      align_size * align_size);
    }

    /**************Memory Allocation Done*********************/

    if(myid == 0) {
        fprintf(stdout, HEADER);
        fprintf(stdout, "%-*s%*s\n", 10, "# Size", FIELD_WIDTH, "Latency (us)");
        fflush(stdout);
    }

    for(size = 1; size <= MAX_MSG_SIZE; size = (size ? size * 2 : 1)) {
        
        /* touch the data */
        for(i = 0; i < size; i++) {
            s_buf[i] = 'a';
            r_buf[i] = 'b';
        }

        if(size > large_message_size) {
            loop = loop_large = 100;
            skip = skip_large = 0;
        }

        shmem_barrier_all();

        if(myid == 0) 
            {
                for(i = 0; i < loop + skip; i++) {
                    if(i == skip) t_start = TIME();

                    shmem_getmem(r_buf, s_buf, size, 1);
                }

                t_end = TIME();
            }
        shmem_barrier_all();

        if(myid == 0) {
            double latency = (1.0 * (t_end-t_start)) / loop;

            fprintf(stdout, "%-*d%*.*f\n", 10, size, FIELD_WIDTH,
                    FLOAT_PRECISION, latency);
            fflush(stdout);
        }
    }

    shmem_barrier_all();

    if(use_heap){
        shfree(s_buf_heap);
        shfree(r_buf_heap);
    }

    shmem_barrier_all();
    return EXIT_SUCCESS;
}
Esempio n. 17
0
static int test_item2(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;
    int i = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    shmem_addr = shmalloc(sizeof(*shmem_addr));
    if (shmem_addr)
    {
        TYPE_VALUE value = 0;

        /* Store my value */
        my_value = (TYPE_VALUE)my_proc;
        *shmem_addr = DEFAULT_VALUE;

        /* Define peer */
        peer_proc = (my_proc + 1) % num_proc;

        /* Define expected value */
        expect_value = ( my_proc == 0 ? (num_proc - 1) : (my_proc - 1) ) + (__cycle_count - 1);

        shmem_barrier_all();
        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            value = num_proc + __cycle_count;
            value = FUNC_VALUE(shmem_addr, value, (my_value + i), peer_proc);
            if  ( ((i > 0 ) && (value != (my_value + i - 1))) || ((i == 0) && (value != DEFAULT_VALUE)) )
            {
                break;
            }

            value = ( i == 0 ? DEFAULT_VALUE : (my_value + i - 1));
            value = FUNC_VALUE(shmem_addr, value, (my_value + i), peer_proc);
            if  ( ((i > 0 ) && (value != (my_value + i - 1))) || ((i == 0) && (value != DEFAULT_VALUE)) )
            {
                break;
            }
        }
        shmem_barrier_all();

        value = *shmem_addr;
        rc = (expect_value == value ? TC_PASS : TC_FAIL);

        log_debug(OSH_TC, "my(#%d:%lld) expected = %lld vs got = %lld\n",
                           my_proc, (INT64_TYPE)my_value, (INT64_TYPE)expect_value, (INT64_TYPE)value);
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (shmem_addr)
    {
        shfree(shmem_addr);
    }

    return rc;
}
Esempio n. 18
0
int
main(int argc, char **argv)
{
	int i,ps,ps_cnt=2;
	int *target;
	int *source;
	int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS;
    char *pgm;
	double start_time, time_taken;

	start_pes(0);
	me = _my_pe();
	npes = _num_pes();

    if ((pgm=strrchr(argv[0],'/')))
        pgm++;
    else
        pgm = argv[0];

    while ((i = getopt (argc, argv, "hve:l:p:s")) != EOF) {
        switch (i)
        {
          case 'v':
              Verbose++;
              break;
          case 'e':
              if ((elements = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad elements count %d\n",elements);
                  return 1;
              }
              break;
          case 'l':
              if ((loops = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad loop count %d\n",loops);
                  return 1;
              }
              break;
          case 'p':
              if ((ps_cnt = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad pSync[] elements %d\n",loops);
                  return 1;
              }
              break;
          case 's':
              Serialize++;
              break;
          case 'h':
              if (me == 0)
                  usage(pgm);
              return 0;
          default:
              if (me == 0) {
                  fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i);
                  usage(pgm);
              }
              return 1;
        }
    }

	ps_cnt *= _SHMEM_BCAST_SYNC_SIZE;
	pSync = shmalloc( ps_cnt * sizeof(long) );

	for (i = 0; i < ps_cnt; i++)
	  pSync[i] = _SHMEM_SYNC_VALUE;

	source = (int *) shmalloc( elements * sizeof(*source) );

	target = (int *) shmalloc( elements * sizeof(*target) );
	for (i = 0; i < elements; i += 1) {
	    source[i] = i + 1;
	    target[i] = -90;
	}

    if (me==0 && Verbose)
        fprintf(stderr,"ps_cnt %d loops %d nElems %d\n",
                        ps_cnt,loops,elements);

	shmem_barrier_all();

	for(time_taken = 0.0, ps = i = 0; i < loops; i++) {

	    start_time = shmem_wtime();

	    shmem_broadcast32(target, source, elements, 0, 0, 0, npes, &pSync[ps]);

        if (Serialize) shmem_barrier_all();

	    time_taken += (shmem_wtime() - start_time);

        if (ps_cnt > 1 ) {
	        ps += _SHMEM_BCAST_SYNC_SIZE;
	        if ( ps >= ps_cnt ) ps = 0;
        }
	}

	if(me == 0 && Verbose) {
        printf("%d loops of Broadcast32(%ld bytes) over %d PEs: %7.3f secs\n",
            loops, (elements*sizeof(*source)), npes, time_taken);
        elements = (elements * loops * sizeof(*source)) / (1024*1024);
        printf("  %7.5f secs per broadcast() @ %7.4f MB/sec\n",
               (time_taken/(double)loops), ((double)elements / time_taken) );
    }

    if (Verbose > 1)  fprintf(stderr,"[%d] pre B1\n",me);

	shmem_barrier_all();

    if (Verbose > 1)  fprintf(stderr,"[%d] post B1\n",me);

	shfree(pSync);
	shfree(target);
	shfree(source);

	return 0;
}
static int test_item4(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* target_addr = NULL;
    TYPE_VALUE* source_addr = NULL;
    TYPE_VALUE source_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();


    pWrk = shmalloc(sizeof(*pWrk) * sys_max(1/2 + 1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
    if (pWrk)
    {
        source_addr = shmalloc(sizeof(*source_addr));
        target_addr = source_addr;
    }

    if (target_addr && source_addr)
    {
        TYPE_VALUE value = DEFAULT_VALUE;
        int j = 0;

        /* Set my value */
        source_value = (TYPE_VALUE)my_proc;
        *source_addr = source_value;

        /* Define expected value */
        expect_value = 0;
        {
            int k = num_proc;
            while (k--) expect_value |= k;
        }

        /* This guarantees that PE set initial value before peer change one */
        for ( j = 0; j < _SHMEM_REDUCE_SYNC_SIZE; j++ )
        {
            pSync[j] = _SHMEM_SYNC_VALUE;
        }
        shmem_barrier_all();

        /* Put value to peer */
        FUNC_VALUE(target_addr, source_addr, 1, 0, 0, num_proc, pWrk, pSync);

        /* Get value put by peer:
         * These routines start the remote transfer and may return before the data
         * is delivered to the remote PE
         */
        shmem_barrier_all();
        {
            int total_wait = 0;
            while (*target_addr == DEFAULT_VALUE && total_wait < 1000 * WAIT_COUNT)
            {
                total_wait++;
                usleep(1);
            }
            value = *target_addr;
        }

        rc = (expect_value == value ? TC_PASS : TC_FAIL);

        log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld\n",
                           my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value);
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (source_addr)
    {
        shfree(source_addr);
    }

    if (pWrk)
    {
        shfree(pWrk);
        pWrk = NULL;
    }

    return rc;
}
Esempio n. 20
0
File: swapm.c Progetto: coti/oshmpi
int
main(int argc, char* argv[])
{
    int me, num_procs, l, j;
    int Verbose = 0;

    start_pes(0);
    me = _my_pe();
    num_procs = _num_pes();
    if ( num_procs < 2 ) {
        if (me ==0)
            printf("PE[0] requires 2 or more PEs?\n");
        return 1;
    }

    for (l = 0 ; l < loops ; ++l) {

        if ((src_int = shmalloc(2*num_procs*sizeof(int))) == NULL) {
            printf("PE-%d int shmalloc() failed?\n", me);
            exit(1);
        }
        dst_int = &src_int[num_procs];
        for(j=0; j < num_procs; j++) {
            src_int[j] = 4;
            dst_int[j] = 0;
        }

        if ((src_float = shmalloc(2*num_procs*sizeof(float))) == NULL) {
            printf("PE-%d float shmalloc() failed?\n", me);
            exit(1);
        }
        dst_float = &src_float[num_procs];
        for(j=0; j < num_procs; j++) {
            src_float[j] = 4.0;
            dst_float[j] = 0.0;
        }

        if ((src_double = shmalloc(2*num_procs*sizeof(double))) == NULL) {
            printf("PE-%d double shmalloc() failed?\n", me);
            exit(1);
        }
        dst_double = &src_double[num_procs];
        for(j=0; j < num_procs; j++) {
            src_double[j] = 8.0;
            dst_double[j] = 0.0;
        }

        if ((src_long = shmalloc(2*num_procs*sizeof(long))) == NULL) {
            printf("PE-%d long shmalloc() failed?\n", me);
            exit(1);
        }
        dst_long = &src_long[num_procs];
        for(j=0; j < num_procs; j++) {
            src_long[j] = 8;
            dst_long[j] = 0;
        }

        if ((src_llong = shmalloc(2*num_procs*sizeof(long long))) == NULL) {
            printf("PE-%d long shmalloc() failed?\n", me);
            exit(1);
        }
        dst_llong = &src_llong[num_procs];
        for(j=0; j < num_procs; j++) {
            src_llong[j] = 16;
            dst_llong[j] = 0;
        }

        shmem_barrier_all();

        if ( me != 0 ) {
            /* is 'src_*' accessible from PE0? should be. */
            if (!shmem_addr_accessible(src_int,0)) {
                printf("PE-%d local src_int %p not accessible from PE-%d?\n",
                       me, (void*)src_int, 0);
                exit(1);
            }
            if (!shmem_addr_accessible(src_float,0)) {
                printf("PE-%d local src_float %p not accessible from PE-%d?\n",
                       me, (void*)src_float, 0);
                exit(1);
            }
            if (!shmem_addr_accessible(src_double,0)) {
                printf("PE-%d local src_double %p not accessible from PE-%d?\n",
                       me, (void*)src_double, 0);
                exit(1);
            }
            if (!shmem_addr_accessible(src_long,0)) {
                printf("PE-%d local src_long %p not accessible from PE-%d?\n",
                       me, (void*)src_long, 0);
                exit(1);
            }
            if (!shmem_addr_accessible(src_llong,0)) {
                printf("PE-%d local src_llong %p not accessible from PE-%d?\n",
                       me, (void*)src_llong, 0);
                exit(1);
            }
        }
        shmem_barrier_all();

        if ( me == 0 ) {
            shmem_quiet();
            for(j=1; j < num_procs; j++) {
                dst_int[j] = shmem_int_swap(src_int+j,0,j);
                if (dst_int[j] != 4) {
                    printf("PE-%d dst_int[%d] %d != 4?\n",me,j,dst_int[j]);
                    exit(1);
                }
            }
            shmem_barrier_all();

            /* verify remote data */
            for(j=1; j < num_procs; j++) {
                itmp = shmem_int_g(src_int+j,j);
                if (itmp != 0) {
                    printf("PE-0 int PE[%d] rem(%d) != 0?\n",j,itmp);
                    exit(1);
                }

                /* swap back */
                dst_int[j] = shmem_int_swap(src_int+j,dst_int[j],j);
                if (dst_int[j] != 0) {
                    printf("PE-0 dst_int[%d] %d != 0?\n",j,dst_int[j]);
                    exit(1);
                }

                itmp = shmem_int_g(src_int+j,j);
                if (itmp != 4) {
                    printf("PE-0 PE[%d] rem %d != 4?\n",j,itmp);
                    exit(1);
                }
            }

            for(j=1; j < num_procs; j++) {
                dst_float[j] = shmem_float_swap(src_float+j,0.0,j);
                if (dst_float[j] != 4.0) {
                    printf("PE-0 dst_float[%d] %f != 4.0?\n",j,dst_float[j]);
                    exit(1);
                }

                /* verify remote data */
                ftmp = shmem_float_g(src_float+j,j);
                if (ftmp != 0.0) {
                    printf("PE-0 float rem(%f) != 0.0?\n",ftmp);
                    exit(1);
                }
                /* swap back */
                dst_float[j] = shmem_float_swap(src_float+j,dst_float[j],j);
                if (dst_float[j] != 0.0) {
                    printf("PE-0 dst_float[%d] %f != 0.0?\n",j,dst_float[j]);
                    exit(1);
                }
                ftmp = shmem_float_g(src_float+j,j);
                if (ftmp != 4.0) {
                    printf("PE-%d float rem(%f) != 4.0?\n",me,ftmp);
                    exit(1);
                }
            }

            for(j=1; j < num_procs; j++) {
                dst_double[j] = shmem_double_swap(src_double+j,0.0,j);
                if (dst_double[j] != 8.0) {
                    printf("PE-0 dst_double[%d] %f != 8.0?\n",j,dst_double[j]);
                    exit(1);
                }
                /* verify remote data */
                dtmp = shmem_double_g(src_double+j,j);
                if (dtmp != 0.0) {
                    printf("PE-0 float rem(%f) != 0.0?\n",dtmp);
                    exit(1);
                }
                dst_double[j] = shmem_double_swap(src_double+j,dst_double[j],j);
                if (dst_double[j] != 0.0) {
                    printf("PE-0 dst_double[%d] %f != 0.0?\n",j,dst_double[j]);
                    exit(1);
                }
                dtmp = shmem_double_g(src_double+j,j);
                if (dtmp != 8.0) {
                    printf("PE-0 double rem(%f) != 8.0?\n",dtmp);
                    exit(1);
                }
            }

            for(j=1; j < num_procs; j++) {
                dst_long[j] = shmem_long_swap(src_long+j,0,j);
                if (dst_long[j] != 8) {
                    printf("PE-0 dst_long[%d] %ld != 8?\n",j,dst_long[j]);
                    exit(1);
                }
            }
            shmem_barrier_all();

            /* verify remote data */
            for(j=1; j < num_procs; j++) {
                ltmp = shmem_long_g(src_long+j,j);
                if (ltmp != 0) {
                    printf("PE-0 PE[%d]long rem(%ld) != 0?\n",j,ltmp);
                    exit(1);
                }
                /* swap back */
                dst_long[j] = shmem_long_swap(src_long+j,dst_long[j],j);
                if (dst_long[j] != 0) {
                    printf("PE-%d dst_long[%d] %ld != 0?\n",me,j,dst_long[j]);
                    exit(1);
                }
                ltmp = shmem_long_g(src_long+j,j);
                if (ltmp != 8) {
                    printf("PE-%d long rem(%ld) != 8?\n",me,ltmp);
                    exit(1);
                }
            }

            for(j=1; j < num_procs; j++) {
                dst_llong[j] = shmem_longlong_swap(src_llong+j,0,j);
                if (dst_llong[j] != 16) {
                    printf("PE-%d dst_llong[%d] %lld != 16?\n",me,j,dst_llong[j]);
                    exit(1);
                }
            }
            shmem_barrier_all();

            /* verify remote data */
            for(j=1; j < num_procs; j++) {
                lltmp = shmem_longlong_g(src_llong+j,j);
                if (lltmp != 0) {
                    printf("PE-%d long long rem(%lld) != 0?\n",me,lltmp);
                    exit(1);
                }
                /* swap back */
                dst_llong[j] = shmem_longlong_swap(src_llong+j,dst_llong[j],j);
                if (dst_llong[j] != 0) {
                    printf("PE-%d  dst_llong[%d] %lld != 0?\n", me,j,dst_llong[j]);
                    exit(1);
                }
                lltmp = shmem_longlong_g(src_llong+j,j);
                if (lltmp != 16) {
                    printf("PE-%d longlong rem(%lld) != 16?\n",me,lltmp);
                    exit(1);
                }
            }
        }
        else {
            shmem_int_wait_until(&src_int[me],SHMEM_CMP_EQ,0);
            shmem_barrier_all();

            shmem_long_wait_until(&src_long[me],SHMEM_CMP_EQ,0);
            shmem_barrier_all();

            shmem_longlong_wait_until(&src_llong[me],SHMEM_CMP_EQ,0);
            shmem_barrier_all();
        }

        shmem_barrier_all();

        shfree(src_int);
        shfree(src_float);
        shfree(src_double);
        shfree(src_long);
        shfree(src_llong);
    }

    if (Verbose)
        fprintf(stderr,"[%d] exit\n",_my_pe());

    return 0;
}
static int test_item8(void)
{
    int rc = TC_PASS;
    static TYPE_VALUE target_addr[MAX_BUFFER_SIZE];
    static TYPE_VALUE source_addr[MAX_BUFFER_SIZE];
    TYPE_VALUE source_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    {
        TYPE_VALUE value = DEFAULT_VALUE;
        int i = 0;
        int j = 0;
        long cur_buf_size = 0;

        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            cur_buf_size = sys_max(1, (i + 1) * MAX_BUFFER_SIZE / __cycle_count);
            pWrk = shmalloc(sizeof(*pWrk) * sys_max(cur_buf_size/2 + 1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
            if (pWrk)
            {
                /* Set initial target value */
                value = DEFAULT_VALUE;
                fill_buffer((void *)target_addr, cur_buf_size, (void *)&value, sizeof(value));

                /* Give some time to all PE for setting their values */
                shmem_barrier_all();

                /* Set my value */
                source_value = (TYPE_VALUE)(my_proc + 1);
                fill_buffer((void *)source_addr, cur_buf_size, (void *)&source_value, sizeof(source_value));

                /* Define expected value */
                expect_value = 0;
                if (my_proc % 2)    expect_value = DEFAULT_VALUE;
                else
                {
                    int k = num_proc;
                    while (k)
                    {
                        if (k % 2)  expect_value |= k;
                        k--;
                    }
                }

                /* This guarantees that PE set initial value before peer change one */
                for ( j = 0; j < _SHMEM_REDUCE_SYNC_SIZE; j++ )
                {
                    pSync[j] = _SHMEM_SYNC_VALUE;
                }
                shmem_barrier_all();

                int in_active_set = check_within_active_set(0, 1, ((num_proc / 2) + (num_proc % 2)), my_proc, num_proc);

                if ( in_active_set ) {
                    /* Put value to peer */
                    FUNC_VALUE(target_addr, source_addr, cur_buf_size, 0, 1, ((num_proc / 2) + (num_proc % 2)), pWrk, pSync);
                }

                /* Get value put by peer:
                 * These routines start the remote transfer and may return before the data
                 * is delivered to the remote PE
                 */
                shmem_barrier_all();
                {
                    int wait = WAIT_COUNT;

                    while (wait--)
                    {
                        value = *target_addr;
                        if (expect_value == value) break;
                        sleep(1);
                    }
                }

                if ( in_active_set ) {
                    rc = (!compare_buffer_with_const(target_addr, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);

                    log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld buffer size = %lld\n",
                                       my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value, (INT64_TYPE)cur_buf_size);

                    if (rc)
                    {
                        TYPE_VALUE* check_addr = target_addr;
                        int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value));
                        int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                        int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);

                        log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                        log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                        show_buffer(check_addr + show_index, show_size);
                    }
                }
                shfree(pWrk);
            } else {
                rc = TC_SETUP_FAIL;
            }
        }
    }

    return rc;
}
Esempio n. 22
0
int
main(int argc, char* argv[])
{
	int c, j, cloop, loops = DFLT_LOOPS;
	int mpe, num_pes;
	int nWords=1;
	int nIncr=1;
	int failures=0;
	char *pgm;

	start_pes(0);
	mpe = _my_pe();
	num_pes = _num_pes();

	if (num_pes == 1) {
   		Rfprintf(stderr,
			"ERR - Requires > 1 PEs\n");
		return 1;
	}
	pgm = strrchr(argv[0],'/');
	if ( pgm )
		pgm++;
	else
		pgm = argv[0];

	while((c=getopt(argc,argv,"hqVvl:")) != -1) {
		switch(c) {
		  case 'V':
		  case 'v':
			Verbose++;
			break;
		  case 'l':
            loops = atoi(optarg);
            break;
		  case 'h':
			Rfprintf(stderr,
                "usage: %s {-l loopcnt(%d)} {numLongs(%d)} {loopIncr(%d)}\n",
                    pgm,DFLT_LOOPS,DFLT_NWORDS,DFLT_INCR);
			return 1;
		  default:
			return 1;
		}
	}

	if (optind == argc)
		nWords = DFLT_NWORDS;
	else {
		nWords = atoi_scaled(argv[optind++]);
		if (nWords <= 0) {
    			Rfprintf(stderr, "ERR - Bad nBytes arg?\n");
			return 1;
		}
	}

	if (optind == argc)
		nIncr = DFLT_INCR;
	else {
		loops = atoi(argv[optind++]);
		if (nIncr <= 0 ) {
   		    Rfprintf(stderr, "ERR - incLongs arg out of bounds '%d'?\n", nIncr);
			return 1;
		}
	}

    if ( nWords % 8 ) { // integral multiple of longs
	    Rprintf("%s: nWords(%d) not a multiple of %ld?\n",
            pgm,nWords,sizeof(long));
        return 1;
    }

    for (c = 0; c < _SHMEM_COLLECT_SYNC_SIZE;c++)
        pSync[c] = _SHMEM_SYNC_VALUE;

    if (Verbose && mpe == 0)
	    fprintf(stderr,"loops(%d) nWords(%d) incr-per-loop(%d)\n",
            loops,nWords,nIncr);

	for(cloop=1; cloop <= loops; cloop++) {

        c = (sizeof(long)*nWords) * (num_pes + 1); // src + dst allocation.
        //nWords /= sizeof(long); // convert input of bytes --> longs.

        src = (long*)shmalloc(c);
        if ( !src ) {
	        Rprintf("[%d] %s: shmalloc(%d) failed?\n", mpe, pgm,c);
            return 0;
        }
        dst = &src[nWords];

	    for(j=0; j < nWords; j++)
		    src[j] = (long) (j + mpe*nWords);

		shmem_barrier_all();

        shmem_fcollect64(dst,src,nWords,0,0,num_pes,pSync);

        // Expect dst to be consecuative integers 0 ... (nLongs*num_pes)-1
        for(j=0; j < (nWords*num_pes); j++) {
            if ( dst[j] != (long) j ) {
                fprintf(stderr,
                    "[%d] dst[%d] %ld != expected %d\n",mpe,j,dst[j],j);
                return 1;
            }
        }
		shmem_barrier_all();

		if (Verbose && mpe == 0 && loops > 1) {
			fprintf(stderr,".");
		}
        nWords += nIncr;
	}

    if (Verbose && mpe == 0) {
	    fprintf(stderr,"\n");fflush(stderr);
    }
    shfree( (void*)src );
	shmem_barrier_all();
	if (Verbose)
        printf("%d(%d) Exit(%d)\n", mpe, num_pes, failures);

	return failures;
}
Esempio n. 23
0
int main(int argc, char *argv[])
{
  int size, rank, world_rank, my_group;
  int num_lsms; // number of parallel LSMS instances
  int size_lsms; // number of atoms in a lsms instance
  int num_steps; // number of energy calculations
  int initial_steps; // number of steps before sampling starts
  int stepCount=0; // count the Monte Carlo steps executed
  double max_time; // maximum walltime for this run in seconds
  bool restrict_time = false;       // was the maximum time specified?
  bool restrict_steps = false; // or the max. numer of steps?
  int align; // alignment of lsms_instances
  
  double magnetization;
  double energy_accumulator; // accumulates the enegy to calculate the mean
  int energies_accumulated;


  int new_peid,new_root;
  static int op,flag;
  double *evec,*r_values;
  evec=(double *)shmalloc(sizeof(double)*3*size_lsms);
  r_values=(double *)shmalloc(sizeof(double)*(R_VALUE_OFFSET+3*(size_lsms+1)));




  energy_accumulator=0.0;
  energies_accumulated=0;

  double walltime_0,walltime;

  double restartWriteFrequency=30.0*60.0;
  double nextWriteTime=restartWriteFrequency;

  MPI_Comm local_comm;
  int *lsms_rank0;
  MPI_Status status;

  char prefix[40];
  char i_lsms_name[64];
  char gWL_in_name[64], gWL_out_name[64];
  char mode_name[64];
  char energy_calculation_name[64];
  char stupid[37];

  char step_out_name[64];
  char wl_step_out_name[128];
  char *wl_stepf=NULL;
  bool step_out_flag=false;
  std::ofstream step_out_file;
  typedef enum {Constant, Random, WangLandau_1d, ExhaustiveIsing, WangLandau_2d} EvecGenerationMode;
  typedef enum {MagneticMoment, MagneticMomentZ, MagneticMomentX, MagneticMomentY} SecondDimension;

  EvecGenerationMode evec_generation_mode = Constant;
  SecondDimension second_dimension = MagneticMoment;
  double ev0[3];

  bool return_moments_flag=true; // true-> return all magnetic moments from lsms run at each step.
  bool generator_needs_moment=false;

  typedef enum {OneStepEnergy, MultiStepEnergy, ScfEnergy} EnergyCalculationMode;
  EnergyCalculationMode energyCalculationMode = OneStepEnergy;
  int energyIndex=1; // index for the return value to use for the MC step (0: total energy, 1: band energy)

  ev0[0]=ev0[1]=0.0; ev0[2]=1.0;
  // size has to be align + size_lsms*num_lsms
  align=1;
  num_lsms=1;
  size_lsms=-1;
  my_group=-1;
  num_steps=1;
  initial_steps=0;

  sprintf(i_lsms_name,"i_lsms");
  gWL_in_name[0]=gWL_out_name[0]=0;
  mode_name[0]=0;
  energy_calculation_name[0]=0;

  // check command line arguments
  for(int i=0; i<argc; i++)
  {
    if(!strcmp("-num_lsms",argv[i])) num_lsms=atoi(argv[++i]);
    if(!strcmp("-size_lsms",argv[i])) size_lsms=atoi(argv[++i]);
    if(!strcmp("-align",argv[i])) align=atoi(argv[++i]);
    if(!strcmp("-num_steps",argv[i])) {num_steps=atoi(argv[++i]); restrict_steps=true;}
    if(!strcmp("-initial_steps",argv[i])) initial_steps=atoi(argv[++i]); 
    if(!strcmp("-walltime",argv[i])) {max_time=60.0*atof(argv[++i]); restrict_time=true;}
    if(!strcmp("-i",argv[i])) strncpy(i_lsms_name,argv[++i],64);
    if(!strcmp("-random_dir",argv[i])) {evec_generation_mode = Random;}
    if(!strcmp("-step_out",argv[i]))
    {strncpy(step_out_name,argv[++i],64); step_out_flag=true;
      return_moments_flag=true;}
    if(!strcmp("-wl_out", argv[i])) strncpy(gWL_out_name,argv[++i],64);
    if(!strcmp("-wl_in", argv[i])) strncpy(gWL_in_name,argv[++i],64);
    if(!strcmp("-mode", argv[i])) strncpy(mode_name,argv[++i],64);
    if(!strcmp("-energy_calculation",argv[i])) strncpy(energy_calculation_name,argv[++i],64);
  }

  if(!(restrict_steps || restrict_time)) restrict_steps=true;

  if(mode_name[0]!=0)
  {
    if(!strcmp("constant",mode_name)) evec_generation_mode = Constant;
    if(!strcmp("random",mode_name)) evec_generation_mode = Random;
    if(!strcmp("1d",mode_name)) evec_generation_mode = WangLandau_1d;
    if(!strcmp("ising",mode_name)) evec_generation_mode = ExhaustiveIsing;
    if(!strcmp("2d",mode_name)) evec_generation_mode = WangLandau_2d;
    if(!strcmp("2d-m",mode_name)) {evec_generation_mode = WangLandau_2d; second_dimension=MagneticMoment;}
    if(!strcmp("2d-x",mode_name)) {evec_generation_mode = WangLandau_2d; second_dimension=MagneticMomentX;}
    if(!strcmp("2d-y",mode_name)) {evec_generation_mode = WangLandau_2d; second_dimension=MagneticMomentY;}
    if(!strcmp("2d-z",mode_name)) {evec_generation_mode = WangLandau_2d; second_dimension=MagneticMomentZ;}
  }

  if(energy_calculation_name[0]!=0)
  {
    if(energy_calculation_name[0]=='o') { energyCalculationMode = OneStepEnergy; energyIndex=1; }
    if(energy_calculation_name[0]=='m') { energyCalculationMode = MultiStepEnergy; energyIndex=1; }
    if(energy_calculation_name[0]=='s') { energyCalculationMode = ScfEnergy; energyIndex=0; }
  }

#ifdef USE_PAPI
#define NUM_PAPI_EVENTS 4
  int hw_counters = PAPI_num_counters();
  if(hw_counters>NUM_PAPI_EVENTS) hw_counters=NUM_PAPI_EVENTS;
  int papi_events[NUM_PAPI_EVENTS]; // = {PAPI_TOT_INS,PAPI_TOT_CYC,PAPI_FP_OPS,PAPI_VEC_INS};
  char *papi_event_name[] = {"PAPI_TOT_INS","PAPI_FP_OPS",
                             "RETIRED_SSE_OPERATIONS:DOUBLE_ADD_SUB_OPS:DOUBLE_MUL_OPS:DOUBLE_DIV_OPS:OP_TYPE",
                             "RETIRED_SSE_OPERATIONS:SINGLE_ADD_SUB_OPS:SINGLE_MUL_OPS:SINGLE_DIV_OPS:OP_TYPE"};
  // "RETIRED_INSTRUCTIONS",
  // "RETIRED_MMX_AND_FP_INSTRUCTIONS:PACKED_SSE_AND_SSE2",
  // "RETIRED_SSE_OPERATIONS:DOUBLE_ADD_SUB_OPS:DOUBLE_MUL_OPS:DOUBLE_DIV_OPS:1",
  // "RETIRED_SSE_OPERATIONS:SINGLE_ADD_SUB_OPS:SINGLE_MUL_OPS:SINGLE_DIV_OPS:1"
  // get events from names:
  for(int i=0; i<NUM_PAPI_EVENTS; i++)
  {
    if(PAPI_event_name_to_code(papi_event_name[i],&papi_events[i]) != PAPI_OK)
    {
      // printline("Error in obtaining PAPI event code for: "+ttos(papi_event_name[i]),
      //           std::cerr,parameters.myrankWorld);
      // printline("Skipping all following events",
      //           std::cerr,parameters.myrankWorld);
      if(hw_counters>i) hw_counters=i;
    }
  }
  long long papi_values[NUM_PAPI_EVENTS+4];
  // printline("PAPI: "+ttos(hw_counters)+" counters available",std::cout,parameters.myrankWorld);
  if(hw_counters>NUM_PAPI_EVENTS) hw_counters=NUM_PAPI_EVENTS;
  long long papi_real_cyc_0 = PAPI_get_real_cyc();
  long long papi_real_usec_0 = PAPI_get_real_usec();
  long long papi_virt_cyc_0 = PAPI_get_virt_cyc();
  long long papi_virt_usec_0 = PAPI_get_virt_usec();
  PAPI_start_counters(papi_events,hw_counters);
#endif


  lsms_rank0=(int *)malloc(sizeof(int)*(num_lsms+1));

  // initialize MPI:
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  world_rank=rank;
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  walltime_0 = get_rtc();

#ifndef SVN_REV
#define SVN_REV "unknown"
#endif

// make sure 'return_moments_flag' is set correctly
  switch(evec_generation_mode)
  {
  case Constant : break;
  case Random : break;
  case WangLandau_1d :
    return_moments_flag = true;
    generator_needs_moment = true;
    break;
  case ExhaustiveIsing : break;
  case WangLandau_2d :
    return_moments_flag = true;
    generator_needs_moment = true;
    break;
  default: std::cout<<" ERROR: UNKNOWN EVEC GENERATION MODE\n"; exit(1);
  }

  if(rank==0)
  {
    std::cout<<"LSMS_3"<<std::endl;
    std::cout<<" SVN revision "<<SVN_REV<<std::endl<<std::endl;
#ifdef USE_PAPI
    std::cout<<" Using Papi counters"<<std::endl<<std::endl; 
#endif
    std::cout<<" Size of LSMS instances = "<<size_lsms<<" atoms\n";
    std::cout<<" Number of LSMS instances = "<<num_lsms<<std::endl;
    std::cout<<" LSMS Energy calculated using ";
    switch(energyCalculationMode)
    {
    case OneStepEnergy: std::cout<<"oneStepEnergy [frozen potential band energy]"<<std::endl; break;
    case MultiStepEnergy: std::cout<<"multiStepEnergy [frozen potential band energy with converged Fermi energy]"<<std::endl; break;
    case ScfEnergy: std::cout<<"scfEnergy [self-consistent total energy]"<<std::endl; break;
    default: std::cout<<"UNKNOWN ENERGY CALCULATION METHOD"<<std::endl; exit(1);
    }
    if(restrict_steps) std::cout<<" Number of gWL steps = "<<num_steps<<std::endl;
    if(restrict_time) std::cout<<" Maximum walltime = "<<max_time<<"s\n";
    std::cout<<" Processor alignment (process allocation quantization) = "<<align<<std::endl;
    switch(evec_generation_mode)
    {
    case Constant : std::cout<<" Constant moments direction along "
                             <<ev0[0]<<" "<<ev0[1]<<" "<<ev0[2]<<std::endl;
      break;
    case Random : std::cout<<" Random distribution of moments (no Wang-Landau)"<<std::endl;
      break;
    case WangLandau_1d : std::cout<<" Wang-Landau for one continuous variable (energy)"<<std::endl;
//      return_moments_flag = true;
//      generator_needs_moment = true;
      break;
    case ExhaustiveIsing : std::cout<<" Exhaustive Ising sampling"<<std::endl; break;
    case WangLandau_2d : std::cout<<" Wang-Landau for two continuous variable (energy, ";
      switch(second_dimension)
      {
      case MagneticMoment  : std::cout<<"magnitude of magnetization)"; break;
      case MagneticMomentX : std::cout<<"x component of magnetization)"; break;
      case MagneticMomentY : std::cout<<"y component of magnetization)"; break;
      case MagneticMomentZ : std::cout<<"z component of magnetization)"; break;
      }
      std::cout<<std::endl;
//      return_moments_flag = true;
//      generator_needs_moment = true;
      break;
    default: std::cout<<" ERROR: UNKNOWN EVEC GENERATION MODE\n"; exit(1);
    }
    if(step_out_flag) std::cout<<" Step output written to: "<<step_out_name<<std::endl;
    std::cout<<std::endl;

    if(step_out_flag && (evec_generation_mode==WangLandau_1d))
    {
      // step_out_flag=false;
      snprintf(wl_step_out_name,127,"wl1d_%s",step_out_name);
      wl_stepf=wl_step_out_name;
    }

    if(step_out_flag)
    {
      step_out_file.open(step_out_name);
      step_out_file<<"#";
      for(int i=0; i<argc; i++) step_out_file<<" "<<argv[i];
      step_out_file<<std::endl<<size_lsms<<std::endl;
    }
  }

  if(generator_needs_moment) return_moments_flag=true;

  if(num_lsms==1)
  {
    SHMEM_activeset local_comm;
    local_comm.rank=shmem_my_pe();
    local_comm.size=shmem_n_pes();
    local_comm.start_pe=0;
    local_comm.logPE_stride=0;
    LSMS lsms_calc(local_comm,i_lsms_name,"1_");
      
    if(rank==0)
    {
      std::cout<<"executing LSMS(C++) for "<<lsms_calc.numSpins()<<" atoms\n";
      std::cout<<"  LSMS version = "<<lsms_calc.version()<<std::endl;
    }

    if(energyCalculationMode==OneStepEnergy)
      std::cout<<"one step Energy = "<<lsms_calc.oneStepEnergy()<<std::endl;
    else if(energyCalculationMode==MultiStepEnergy)
      std::cout<<"multi-step Energy = "<<lsms_calc.multiStepEnergy()<<std::endl;
    else if(energyCalculationMode==ScfEnergy)
      std::cout<<"self-consistent Energy = "<<lsms_calc.scfEnergy()<<std::endl;
    else
    {
      printf("ERROR: Unknown energy calculation mode for lsms_calc in wl-lsms main!\n");
     // MPI_Abort(MPI_COMM_WORLD,5);
      exit(5);
    }
  }
  else
  {
    // build the communicators
    //int color=MPI_UNDEFINED;
    //Assuming user passes a power of two while using "-align"
    int s = align;
    int comm_size=(size-align)/num_lsms;
    int world_rank;
    for(int i=0; i<num_lsms; i++)
    {
      if((world_rank>=s) && (world_rank<s+comm_size)) 
      { 
        my_group=i; 
        //color=i; 
        new_peid=world_rank-s;
        new_root=s;
      }
      lsms_rank0[i]=s;
      s+=comm_size;
    }
    if(world_rank==0){ 
      //color=num_lsms;
      new_peid=0;
      comm_size=1;
      new_root=0;
    }

    //MPI_Comm_split(MPI_COMM_WORLD, color, 0, &local_comm);
    SHMEM_activeset local_comm;
    local_comm.rank=new_peid;
    local_comm.size=comm_size;
    local_comm.start_pe=new_root;
    local_comm.logPE_stride=0;

    std::cout<<"world_rank="<<world_rank<<" -> group="<<my_group<<std::endl;

      
    snprintf(prefix,38,"Group %4d: ",my_group);

    // now we get ready to do some calculations...

    if(my_group>=0)
    {
      double energy;
      double band_energy;
      int static i_values[10];
      double static r_values[10];
      static int op;


      //MPI_Comm_rank(local_comm, &rank);
      rank = local_comm.rank;
      snprintf(prefix,38,"%d_",my_group);
      // to use the ramdisk on jaguarpf:
      // snprintf(prefix,38,"/tmp/ompi/%d_",my_group);
      LSMS lsms_calc(local_comm,i_lsms_name,prefix);
      snprintf(prefix,38,"Group %4d: ",my_group);

      if(rank==0 && my_group==0)
      {
        std::cout<<prefix<<"executing LSMS(C++) for "<<lsms_calc.numSpins()<<" atoms\n";
        std::cout<<prefix<<"  LSMS version = "<<lsms_calc.version()<<std::endl;
      }

      // wait for commands from master
      bool finished=false;
      while(!finished)
      {
        if(rank==0)
        {
          //MPI_Recv(evec,3*size_lsms,MPI_DOUBLE,0,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
          //op =status.MPI_TAG;
          if (lsms_rank0[0]==world_rank)
                shmem_barrier(0, lsms_rank0[0], 2, pSync1);

        }
        //MPI_Bcast(&op,1,MPI_INT,0,local_comm);
        shmem_broadcast32(&op, &op, 1, local_comm.start_pe, local_comm.start_pe, local_comm.logPE_stride, local_comm.size, pSync2); 

/* recognized opcodes:
   5: calculate energy

   recognized energy calculation modes:
   OneStepEnergy : calclulate frozen potential band energy in one step (don't converge Ef)
   use only if the Fermi energy will not change due to MC steps!
   The only method available in LSMS_1.9
   MultiStepEnergy : calculate frozen potential band energy after converging Fermi energy
   This should be the new default method. If the Fermi energy doesn't change
   multiStepEnergy only performs one step and should be equivalent to oneStepEnergy
   The tolerance for Ef convergence can be set with LSMS::setEfTol(Real).
   The default tolerance is set in the LSMS::LSMS constructor (currently 1.0e-6).
   The maximum number of steps is read from the LSMS input file 'nscf' parameter.
   ScfEnergy : this will calculate the selfconsistent total energy.
   The maximum number of steps is read from the LSMS input file 'nscf' parameter.
   NOT IMPLEMENTED YET!!!

   10: get number of sites
*/

        if(op==5)
        {
          lsms_calc.setEvec(evec);
          if(energyCalculationMode==OneStepEnergy)
            energy=lsms_calc.oneStepEnergy(&band_energy);
          else if(energyCalculationMode==MultiStepEnergy)
            band_energy=energy=lsms_calc.multiStepEnergy();
          else if(energyCalculationMode==ScfEnergy)
            energy=lsms_calc.scfEnergy(&band_energy);
          else
          {
            printf("ERROR: Unknown energy calculation mode for lsms_calc in wl-lsms main!\n");
            //MPI_Abort(MPI_COMM_WORLD,5);
            exit(5);
          }
          r_values[0]=energy;
          r_values[1]=band_energy;
          if(return_moments_flag)
          {
            lsms_calc.getMag(&r_values[R_VALUE_OFFSET]);
          }
          if(rank==0)
          {
            if(return_moments_flag)
            {
              //MPI_Send(r_values,R_VALUE_OFFSET+3*size_lsms,MPI_DOUBLE,0,1005,MPI_COMM_WORLD);
              shmem_double_put(r_values, r_values, R_VALUE_OFFSET+3*size_lsms, 0);

            } else {
              //MPI_Send(r_values,R_VALUE_OFFSET,MPI_DOUBLE,0,1005,MPI_COMM_WORLD);
              shmem_double_put(r_values, r_values, R_VALUE_OFFSET, 0);
            }
            shmem_fence();
            shmem_int_swap(&flag, world_rank, 0);

          }
              
        } else if(op==10) {
          i_values[0]=lsms_calc.numSpins();
          //MPI_Send(i_values,10,MPI_INT,0,1010,MPI_COMM_WORLD);
          shmem_int_put(i_values, i_values, 10, 0);
        } else {
          // printf("world rank %d: recieved exit\n",world_rank); 
          finished=true;
        }
      }

      shfree(evec);
      //shfree(r_values);
    }
    else if(world_rank==0)
    {
      int running;
      double **evecs;
      //double *r_values;
      //int i_values[10];
      int *init_steps;
      int total_init_steps;
      bool accepted;
        
      char *wl_inf=NULL;
      char *wl_outf=NULL;
      if(gWL_in_name) wl_inf=gWL_in_name;
      if(gWL_out_name) wl_outf=gWL_out_name;
        
      EvecGenerator *generator;

/*
      // get number of spins from first LSMS instance
      // temp r_values:
      r_values=(double *)malloc(sizeof(double)*10);
      MPI_Send(r_values,1,MPI_DOUBLE, lsms_rank0[0], 10, MPI_COMM_WORLD);
      free(r_values);
      MPI_Recv(i_values,10,MPI_INT,lsms_rank0[0],1010,MPI_COMM_WORLD,&status);
      if(i_values[0]!=size_lsms)
      {
        printf("Size specified for Wang-Landau and in LSMS input file don't match!\n");
        size_lsms=i_values[0];
      }
*/

      evecs=(double **)shmalloc(sizeof(double *)*num_lsms);
      init_steps=(int *)shmalloc(sizeof(int)*num_lsms);
      for(int i=0; i<num_lsms; i++)
      {
        evecs[i]=(double *)shmalloc(sizeof(double)*3*size_lsms);
        init_steps[i]=initial_steps;
      }
      total_init_steps=num_lsms*initial_steps;
        

      // Initialize the correct evec generator
      switch(evec_generation_mode)
      {
      case Random :  generator = new RandomEvecGenerator(size_lsms);
        break;
      case Constant: generator = new ConstantEvecGenerator(size_lsms, ev0, num_lsms);
        break;
     //case WangLandau_1d : generator = new WL1dEvecGenerator<std::mt19937>(size_lsms, num_lsms,
     //                                                                      evecs, wl_inf, wl_outf, wl_stepf);
     case WangLandau_1d : generator = new WL1dEvecGenerator<boost::mt19937>(size_lsms, num_lsms,
                                                                           evecs, wl_inf, wl_outf, wl_stepf);
        break;
      case ExhaustiveIsing : generator = new ExhaustiveIsing1dEvecGenerator(size_lsms, num_lsms,
                                                                            evecs, wl_inf, wl_outf);
        break;
      //case WangLandau_2d : generator = new WL2dEvecGenerator<std::mt19937>(size_lsms, num_lsms,
      //                                                                     evecs, wl_inf, wl_outf, wl_stepf);
      case WangLandau_2d : generator = new WL2dEvecGenerator<boost::mt19937>(size_lsms, num_lsms,
                                                                           evecs, wl_inf, wl_outf, wl_stepf);
        break;
      default: std::cerr<<"The code should never arrive here: UNKNOWN EVEC GENERATION MODE\n";
        exit(1);
      }

      for(int i=0; i<num_lsms; i++)
      {
        generator->initializeEvec(i,evecs[i]);
      }
      std::cout<<"This is the master node\n";
      // issue initial commands to all LSMS instances
      running=0;
      bool more_work=true;
      if(total_init_steps>0)
      {
        for(int i=0; i<num_lsms; i++)
        {
          std::cout<<"starting initial calculation in group "<<i<<std::endl;
          //MPI_Send(evecs[i], 3*size_lsms, MPI_DOUBLE, lsms_rank0[i], 5, MPI_COMM_WORLD);
          shmem_double_put(evec, evecs[i], 3*size_lsms, lsms_rank0[i]);
          shmem_int_p(&op, 5, lsms_rank0[i]);
          shmem_fence();


          num_steps--; running++; stepCount++;
          if(restrict_steps) std::cout<<"      "<<num_steps<<" steps remaining\n";
        }
        shmem_barrier(0, lsms_rank0[0], 2, pSync1);
        // first deal with the initial steps:
        while(running>0)
        {
          //if(return_moments_flag)
          //  MPI_Recv(r_values,R_VALUE_OFFSET+3*size_lsms,MPI_DOUBLE,MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
          //else
          //  MPI_Recv(r_values,R_VALUE_OFFSET,MPI_DOUBLE,MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
          
          shmem_int_wait(&flag,-1);

          running--;
          // std::cout<<"received energy E_tot ="<<r_values[0]<<std::endl;
          // std::cout<<"    band energy E_band="<<r_values[1]<<std::endl;
          if(total_init_steps>0)
          {
            //int r_group=(status.MPI_SOURCE-align)/comm_size;
            int r_group=(flag-align)/comm_size;
            std::cout<<"starting additional calculation in group "<<r_group<<std::endl;

            if(init_steps[r_group]>0)
            {
              more_work = !(generator->generateUnsampledEvec(r_group,evecs[r_group],r_values[energyIndex]));
              init_steps[r_group]--; total_init_steps--;
            }
                
            //MPI_Send(evecs[r_group], 3*size_lsms, MPI_DOUBLE, lsms_rank0[r_group], 5, MPI_COMM_WORLD);
            shmem_double_put(r_values, evecs[r_group],  3*size_lsms, lsms_rank0[r_group]); //TODO check this
            shmem_fence();
                
            num_steps--; running++; stepCount++;
            if(restrict_steps && num_steps<=0) more_work=false;
            if(restrict_steps) std::cout<<"      "<<num_steps<<" steps remaining\n";
            walltime = get_rtc() - walltime_0;
            if(restrict_time && walltime>=max_time) more_work=false;
            if(restrict_time) std::cout<<"      "<<max_time-walltime<<" seconds remaining\n";
          }
              
        }
      }
      more_work=true;
      running=0;
      for(int i=0; i<num_lsms; i++)
      {
        std::cout<<"starting main calculation in group "<<i<<std::endl;
        //MPI_Send(evecs[i], 3*size_lsms, MPI_DOUBLE, lsms_rank0[i], 5, MPI_COMM_WORLD);
        shmem_double_put(evec, evecs[i], 3*size_lsms, lsms_rank0[i]);
        shmem_int_p(&op, 5, lsms_rank0[i]);
        shmem_fence();
        num_steps--; running++; stepCount++;
        if(restrict_steps) std::cout<<"      "<<num_steps<<" steps remaining\n";
      }
      shmem_barrier(0, lsms_rank0[0], 2, pSync1);
        
      generator->startSampling();
      // wait for results and issue new commands or wind down
      while(running>0)
      {
        //MPI_Recv(r_values,R_VALUE_OFFSET+3*size_lsms,MPI_DOUBLE,MPI_ANY_SOURCE,MPI_ANY_TAG,MPI_COMM_WORLD,&status);
        shmem_int_wait(&flag,-1);

        running--;
        std::cout<<"received energy E_tot ="<<r_values[0]<<std::endl;
        std::cout<<"    band energy E_band="<<r_values[1]<<std::endl;
        // printf("from status.MPI_SOURCE=%d\n",status.MPI_SOURCE);
        energy_accumulator+=r_values[0]; energies_accumulated++;
        if(more_work)
        {
          int r_group=(status.MPI_SOURCE-align)/comm_size;
          std::cout<<"starting additional calculation in group "<<r_group<<std::endl;
              
          if(generator_needs_moment)
          {
            double m0,m1,m2;
            m0=0.0; m1=0.0; m2=0.0;
            for(int i=0; i<3*size_lsms; i+=3)
            {
              m0+=r_values[R_VALUE_OFFSET+i];
              m1+=r_values[R_VALUE_OFFSET+i+1];
              m2+=r_values[R_VALUE_OFFSET+i+2];
            }
            switch(second_dimension)
            {
            case  MagneticMoment : magnetization=std::sqrt(m0*m0+m1*m1+m2*m2); break;
            case  MagneticMomentX : magnetization=m0; break;
            case  MagneticMomentY : magnetization=m1; break;
            case  MagneticMomentZ : magnetization=m2; break;
            }
            if(generator->generateEvec(r_group,evecs[r_group],r_values[energyIndex],magnetization, &accepted))
              more_work=false;
          } else {
            if(generator->generateEvec(r_group,evecs[r_group],r_values[energyIndex], &accepted)) more_work=false;
          }

          //MPI_Send(evecs[r_group], 3*size_lsms, MPI_DOUBLE, lsms_rank0[r_group], 5, MPI_COMM_WORLD);
          shmem_double_put(r_values, evecs[r_group],  3*size_lsms, lsms_rank0[r_group]); //TODO check this
          shmem_fence();

          num_steps--; running++; stepCount++;
          if(restrict_steps && num_steps<=0) more_work=false;
          if(restrict_steps) std::cout<<"      "<<num_steps<<" steps remaining\n";
          walltime = get_rtc() - walltime_0;
          if(restrict_time && walltime>=max_time) more_work=false;
          if(restrict_time) std::cout<<"      "<<max_time-walltime<<" seconds remaining\n";
        }
        else
        {
          // send an exit message to this instance of LSMS
          int r_group=(status.MPI_SOURCE-align)/comm_size;

          MPI_Send(evecs[r_group], 3*size_lsms, MPI_DOUBLE, lsms_rank0[r_group], 2, MPI_COMM_WORLD);
        }

        if(step_out_flag && accepted)
        {
          step_out_file<<"# iteration "<<energies_accumulated<<std::endl;
          step_out_file.precision(15);
          step_out_file<<energies_accumulated<<std::endl;
          step_out_file<<r_values[0]<<"  "<<r_values[1]<<std::endl;
          for(int j=0; j<3*size_lsms; j+=3)
          {
            step_out_file<<r_values[j+R_VALUE_OFFSET]<<"  "<<r_values[j+R_VALUE_OFFSET+1]
                         <<"  "<<r_values[j+R_VALUE_OFFSET+2]<<std::endl;
          }
        }
        // write restart file every restartWriteFrequency seconds
        if(walltime>nextWriteTime)
        {
          generator->writeState("WLrestart.jsn");
          nextWriteTime+=restartWriteFrequency;
        }

      }
      generator->writeState("WLrestart.jsn");
/*
  if(evec_generation_mode==WangLandau_1d)
  (static_cast<WL1dEvecGenerator<std::mt19937> *>(generator))->writeState("WLrestart.state");
  if(evec_generation_mode==ExhaustiveIsing)
  (static_cast<ExhaustiveIsing1dEvecGenerator *>(generator))->writeState("WLrestart.state");
*/
      for(int i=0; i<num_lsms; i++) free(evecs[i]);
      shfree(evecs);
      //shfree(r_values);
    }
  }

  if(world_rank==0)
  {
    if(step_out_flag)
    {
      step_out_file<<"# end\n-1\n"
                   <<energy_accumulator/double(energies_accumulated)<<std::endl;
      step_out_file.close();
    }
    std::cout<<"Finished all scheduled calculations. Freeing resources.\n";
    std::cout<<"Energy mean = "<<energy_accumulator/double(energies_accumulated)<<"Ry\n";
  }


  if(num_lsms>1)
  {
    // make sure averyone arrives here:
    MPI_Bcast(stupid,37,MPI_CHAR,0,MPI_COMM_WORLD);

    if(world_rank==0)
    {
      MPI_Comm_free(&local_comm);
    }
    else if(my_group>=0)
    {
      MPI_Comm_free(&local_comm);
    }
  }



  if(world_rank==0)
  {
    double walltime = get_rtc() - walltime_0;
    std::cout<<" WL-LSMS finished in "<<walltime<<" seconds.\n";
    std::cout<<" Monte-Carlo steps / walltime = "
             <<double(stepCount)/walltime<<"/sec\n";
  }

#ifdef USE_PAPI
  PAPI_stop_counters(papi_values,hw_counters);
  papi_values[hw_counters  ] = PAPI_get_real_cyc()-papi_real_cyc_0;
  papi_values[hw_counters+1] = PAPI_get_real_usec()-papi_real_usec_0;
  papi_values[hw_counters+2] = PAPI_get_virt_cyc()-papi_virt_cyc_0;
  papi_values[hw_counters+3] = PAPI_get_virt_usec()-papi_virt_usec_0;
  long long accumulated_counters[NUM_PAPI_EVENTS+4];
/*
  for(int i=0; i<hw_counters; i++)
  {
  printline(ttos(papi_event_name[i])+" = "+ttos(papi_values[i]),
  std::cout,parameters.myrankWorld);
  }
  printline("PAPI real cycles : "+ttos(papi_values[hw_counters]),
  std::cout,parameters.myrankWorld);
  printline("PAPI real usecs : "+ttos(papi_values[hw_counters+1]),
  std::cout,parameters.myrankWorld);
  printline("PAPI user cycles : "+ttos(papi_values[hw_counters+2]),
  std::cout,parameters.myrankWorld);
  printline("PAPI user usecs : "+ttos(papi_values[hw_counters+3]),
  std::cout,parameters.myrankWorld);
*/
  
  //MPI_Reduce(papi_values,accumulated_counters,hw_counters+4,
  //           MPI_LONG,MPI_SUM,0,MPI_COMM_WORLD);

  shmem_long_sum_to_all(accumulated_counters, papi_values, hw_counters+4,
      comm.pestart, comm.logPE_stride, comm.size, pWrk_i, pSync2);



  if(world_rank==0)
  {
    for(int i=0; i<hw_counters; i++)
    {
      std::cout<<"Accumulated: "<<(papi_event_name[i])<<" = "<<(accumulated_counters[i])<<"\n";
    }
    std::cout<<"PAPI accumulated real cycles : "<<(accumulated_counters[hw_counters])<<"\n";
    std::cout<<"PAPI accumulated user cycles : "<<(accumulated_counters[hw_counters+2])<<"\n";
    double gflops_papi = ((double)accumulated_counters[1])/
      (1000.0*(double)papi_values[hw_counters+1]);
    double gflops_hw_double = ((double)accumulated_counters[2])/
      (1000.0*(double)papi_values[hw_counters+1]);
    double gflops_hw_single = ((double)accumulated_counters[3])/
      (1000.0*(double)papi_values[hw_counters+1]);
    double gips = ((double)accumulated_counters[0])/(1000.0*(double)papi_values[hw_counters+1]);
    std::cout<<"PAPI_FP_OPS real GFLOP/s : "<<(gflops_papi)<<"\n";
    std::cout<<"PAPI hw double real GFLOP/s : "<<(gflops_hw_double)<<"\n";
    std::cout<<"PAPI hw single real GFLOP/s : "<<(gflops_hw_single)<<"\n";
    std::cout<<"PAPI real GINST/s : "<<(gips)<<"\n";
  }
#endif


  //MPI_Finalize();
  return 0;
}
Esempio n. 24
0
static int test_item4(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE* send_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    shmem_addr = (TYPE_VALUE*)shmalloc(sizeof(*shmem_addr) * __max_buffer_size);
    send_addr = (TYPE_VALUE*)sys_malloc(sizeof(*send_addr) * __max_buffer_size);
    if (shmem_addr && send_addr)
    {
        INT64_TYPE i = 0;
        long cur_buf_size = 0;

        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            cur_buf_size = sys_max(1, (i + 1) * __max_buffer_size / __cycle_count);

            /* Set my value */
            my_value = (-1);
            fill_buffer((void *)shmem_addr, cur_buf_size, (void *)&my_value, sizeof(my_value));

            /* Give some time to all PE for setting their values */
            shmem_barrier_all();

            /* Define peer and it value */
            peer_proc = (my_proc + 1) % num_proc;
            peer_value = (peer_proc % 2 ? 1 : -1) * (i * (MAX_VALUE / __cycle_count));
            fill_buffer((void *)send_addr, cur_buf_size, (void *)&peer_value, sizeof(peer_value));

            /* Define expected value */
            expect_value = (my_proc % 2 ? 1 : -1) * (i * (MAX_VALUE / __cycle_count));

            /* Get value put by peer */
            FUNC_VALUE(shmem_addr, send_addr, cur_buf_size, peer_proc);

            /* Get value put by peer:
             * These routines start the remote transfer and may return before the data
             * is delivered to the remote PE
             */
            wait_for_put_completion(peer_proc,10 /* wait for 10 secs */);

            rc = (!compare_buffer_with_const_longdouble(shmem_addr, cur_buf_size, expect_value) ? TC_PASS : TC_FAIL);

            log_debug(OSH_TC, "my(#%d:%Lf) peer(#%d:%Lf) expected = %Lf buffer size = %lld\n",
                               my_proc, (long double)my_value, peer_proc, (long double)peer_value, (long double)expect_value, (INT64_TYPE)1);

            if (rc)
            {
                TYPE_VALUE* check_addr = shmem_addr;
                int odd_index = compare_buffer_with_const_longdouble(check_addr, cur_buf_size, expect_value);
                int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);

                log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                show_buffer(check_addr + show_index, show_size);
            }
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (send_addr)
    {
        sys_free(send_addr);
    }

    if (shmem_addr)
    {
        shfree(shmem_addr);
    }

    return rc;
}
Esempio n. 25
0
static int test_item3(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    shmem_addr = shmalloc(sizeof(*shmem_addr));
    if (shmem_addr)
    {
        TYPE_VALUE value = -1;
        INT64_TYPE i = 0;

        /* Set my value */
        my_value = (-1);
        *shmem_addr = my_value;
        for (i = 0; i < COUNT_VALUE; i++)
        {
            /* Define peer and it value */
            peer_proc = (my_proc + 1) % num_proc;
            peer_value = (peer_proc % 2 ? 1 : -1) * (i * STEP_VALUE);

            /* Define expected value */
            expect_value = (my_proc % 2 ? 1 : -1) * (i * STEP_VALUE);

            /* This guarantees that PE set initial value before peer change one */
            shmem_barrier_all();

            /* Write value to peer */
            FUNC_VALUE(shmem_addr, peer_value, peer_proc);

            /* Get value put by peer:
             * These routines start the remote transfer and may return before the data
             * is delivered to the remote PE
             */
            wait_for_put_completion(peer_proc,10 /* wait for 10 secs */);
            value = *shmem_addr;

            rc = (sys_fcompare(expect_value, value) ? TC_PASS : TC_FAIL);

            log_debug(OSH_TC, "my(#%d:%Lf) peer(#%d:%Lf) expected = %Lf vs got = %Lf\n",
                               my_proc, (long double)my_value, peer_proc, (long double)peer_value, (long double)expect_value, (long double)value);
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (shmem_addr)
    {
        shfree(shmem_addr);
    }

    return rc;
}
Esempio n. 26
0
static int test_item3(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE* check_arr = NULL;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;
    int i = 0;
    int j = 0;
    int k = 0;
    int flag = 0;
    int missed_values = 0;
    static long* pSync = NULL;

    num_proc = _num_pes();
    my_proc = _my_pe();

    shmem_addr = shmalloc(sizeof(*shmem_addr));
    check_arr = shmalloc(sizeof(*check_arr) * num_proc);

    pSync = shmalloc(sizeof(*pSync) * _SHMEM_COLLECT_SYNC_SIZE);
    for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) {
        pSync[i] = _SHMEM_SYNC_VALUE;
    }

    if (shmem_addr && pSync && check_arr)
    {
        static TYPE_VALUE value = 0;

        /* Store my value */
        my_value = (TYPE_VALUE)my_proc;
        *shmem_addr = DEFAULT_VALUE;

        shmem_barrier_all();
        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            missed_values = 0;
            my_value = (TYPE_VALUE)my_proc;

            value = FUNC_VALUE(shmem_addr, my_value, peer_proc);

            shmem_barrier_all();
            shmem_collect32(check_arr, &value, (sizeof(value) + 3 ) / 4, 0, 0, num_proc, pSync);
            shmem_barrier_all();
            for (j = 0; j < num_proc ; j++)
            {
                flag = 0;
                for (k = 0; k < num_proc; k++)
                {
                    if (sys_fcompare(check_arr[k], j))
                    {
                        flag = 1;
                        break;
                    }
                }
                if (flag == 0)
                {
                    missed_values++;
                }
                if (missed_values > 1)
                {
                    rc = TC_FAIL;
                    break;
                }
            }
        }
        shmem_barrier_all();

        log_debug(OSH_TC, "my(#%d:%lld) missed_values expected = 1 vs missed_values = %d\n",
                           my_proc, (INT64_TYPE)my_value, missed_values);
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (shmem_addr)
    {
        shfree(shmem_addr);
    }
    if (pSync)
    {
        shfree(pSync);
    }

    return rc;
}
Esempio n. 27
0
/* main routine */
int main(int argc, char **argv)
{
        int listenfd, r;
        unsigned short port;
        struct sockaddr_in serv;
        int mode;
        int *log_level;

        signal(SIGPIPE, SIG_IGN);
        log_level = shmalloc("/log_level", sizeof(int));
        if (!log_level) {
                slog(SLOG_ERROR, "could not allocate shm for slog");
                exit(EXIT_FAILURE);
        }
        *log_level = SLOG_ALL;
        slog_level(log_level);

        if (argc < 2) {
                fprintf(stderr, "Usage: %s <port number> "
                                "[mode: 0=seq,1=thr,2=proc]\n", argv[0]);
                exit(EXIT_FAILURE);
        } else if (argc >= 3) {
                mode = atoi(argv[2]);
                if (argc >= 4) {
                        *log_level = atoi(argv[3]);
                        fprintf(stderr, "%s: *log_level = %d\n",
                                argv[0], *log_level);
                }

        } else {
                fprintf(stderr, "%s: no mode specified. "
                                "defaulting to mode=0 (seq)\n", argv[0]);
                mode = 0;
        }

        if ((port = atoi(argv[1])) <= 0) {
                fprintf(stderr, "%s: bad port\n", argv[0]); 
                exit(EXIT_FAILURE);
        }

        listenfd = socket(AF_INET, SOCK_STREAM, 0);
        if (listenfd == -1) {
                slog_perror("socket");
                exit(EXIT_FAILURE);
        }


        memset(&serv, 0, sizeof(serv));
        serv.sin_family = AF_INET;
        serv.sin_addr.s_addr = htonl(INADDR_ANY);
        serv.sin_port = htons(port);

        r = bind(listenfd, (struct sockaddr *)&serv, sizeof(serv));
        if (r == -1) {
                slog_perror("bind");
                exit(EXIT_FAILURE);
        }

        r = listen(listenfd, LISTENQ);
        if (r == -1) {
                slog_perror("listen");
                exit(EXIT_FAILURE);
        }

        if (mode == 0) {
                proxy_seq(listenfd);
        } else if (mode == 1) {
                proxy_thr(listenfd);
        } else {
                proxy_proc(listenfd);
        }

        r = close(listenfd);
        if (r == -1) {
                slog_perror("close"); 
                exit(EXIT_FAILURE);
        }

        shfree("/log_level", log_level, sizeof(int));
        exit(EXIT_SUCCESS);
}
static int test_item2(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* target_addr = NULL;
    TYPE_VALUE* source_addr = NULL;
    TYPE_VALUE source_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();


    pWrk = shmalloc(sizeof(*pWrk) * sys_max(1/2 + 1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
    if (pWrk)
    {
        target_addr = shmalloc(sizeof(*target_addr));
        source_addr = shmalloc(sizeof(*source_addr));
    }

    if (target_addr && source_addr)
    {
        TYPE_VALUE value = DEFAULT_VALUE;
        int j = 0;

        /* Set initial target value */
        *target_addr = DEFAULT_VALUE;

        /* Set my value */
        source_value = ( my_proc < OVERFLOW_FACTORIAL_LIMIT ? (TYPE_VALUE)(my_proc + 1) : 1);
        *source_addr = source_value;

        /* Define expected value */
        expect_value = 1;
        {
            int k = ( num_proc <= OVERFLOW_FACTORIAL_LIMIT ? num_proc : OVERFLOW_FACTORIAL_LIMIT);
            while (k) expect_value *= k--;
        }

        /* This guarantees that PE set initial value before peer change one */
        for ( j = 0; j < _SHMEM_REDUCE_SYNC_SIZE; j++ )
        {
            pSync[j] = _SHMEM_SYNC_VALUE;
        }
        shmem_barrier_all();

        /* Put value to peer */
        FUNC_VALUE(target_addr, source_addr, 1, 0, 0, num_proc, pWrk, pSync);

        /* Get value put by peer:
         * These routines start the remote transfer and may return before the data
         * is delivered to the remote PE
         */
        shmem_barrier_all();
        {
            int total_wait = 0;
            while (sys_fcompare(*target_addr, DEFAULT_VALUE) && total_wait < 1000 * WAIT_COUNT)
            {
                total_wait++;
                usleep(1);
            }
            value = *target_addr;
        }

        rc = (sys_fcompare(expect_value, value) ? TC_PASS : TC_FAIL);

        log_debug(OSH_TC, "my#%d source = %Lf expected = %Lf actual = %Lf\n",
                           my_proc, (long double)source_value, (long double)expect_value, (long double)value);
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (source_addr)
    {
        shfree(source_addr);
    }

    if (target_addr)
    {
        shfree(target_addr);
    }

    if (pWrk)
    {
        shfree(pWrk);
        pWrk = NULL;
    }

    return rc;
}
Esempio n. 29
0
/****************************************************************************
 * Place for Test Item functions
 ***************************************************************************/
static int test_item1(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE* local_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE peer_value = 0;
    TYPE_VALUE* expect_value = NULL;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;
	int tst, sst;
    int max_stride = MAX_ARRAY_SIZE/2-1;

    num_proc = _num_pes();
    my_proc = _my_pe();
    shmem_addr = shmalloc(sizeof(*shmem_addr)*MAX_ARRAY_SIZE);
    local_addr = malloc(sizeof(*local_addr)*MAX_ARRAY_SIZE);
    expect_value = malloc(sizeof(*expect_value)*MAX_ARRAY_SIZE);
    if (shmem_addr)
    {
        INT64_TYPE i = 0;
        INT64_TYPE j = 0;
        int num_to_get;
        my_value = 0;
        size_t odd_pos;
        for (i = 0; (i < COUNT_VALUE) && (rc == TC_PASS); i++)
        {
            tst = (i < max_stride) ? i+1 : max_stride;
            sst = tst;
            num_to_get = MAX_ARRAY_SIZE/tst;
            /* Set my value */
            my_value = (TYPE_VALUE)(my_proc + 1);
            memset(local_addr,0,MAX_ARRAY_SIZE*SIZE_VALUE);
            memset(expect_value,0,MAX_ARRAY_SIZE*SIZE_VALUE);
            for (j = 0; j < MAX_ARRAY_SIZE; j++)
                shmem_addr[j] = my_value;


            /* Define peer and it value */
            peer_proc = (my_proc + 1) % num_proc;
            peer_value = (TYPE_VALUE)(peer_proc + 1);


            /* Define expected value */
            for (j=0; j<num_to_get; j++)
                expect_value[j*tst] = peer_value;
            /* Wait is set instead of barrier to give some time to all PE for setting their values */
            shmem_barrier_all();

            /* Get value from peer */
            FUNC_VALUE(local_addr, shmem_addr,tst,sst,num_to_get,peer_proc);

            if (rc == TC_PASS)
            {
                rc = (compare_buffer((unsigned char*)local_addr, (unsigned char*)expect_value, MAX_ARRAY_SIZE, &odd_pos) ? TC_PASS : TC_FAIL);
            }
            log_debug(OSH_TC, "my(#%d:%lld) peer(#%d:%lld) expected = %lld vs got = %lld\n",
                               my_proc, (INT64_TYPE)my_value, peer_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value[0], (INT64_TYPE)local_addr[0]);

            /* Wait is set instead of barrier to give some time to all PE for setting their values */
            shmem_barrier_all();
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (local_addr)
    {
        free(local_addr);
    }
    if (expect_value)
    {
        free(expect_value);
    }
    if (shmem_addr)
    {
        shfree(shmem_addr);
    }

    return rc;
}
static int test_item6(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* target_addr = NULL;
    TYPE_VALUE* source_addr = NULL;
    TYPE_VALUE source_value = 0;
    TYPE_VALUE expect_value = 0;
    int num_proc = 0;
    int my_proc = 0;

    num_proc = _num_pes();
    my_proc = _my_pe();

    target_addr = (TYPE_VALUE*)shmalloc(sizeof(*target_addr) * __max_buffer_size);
    source_addr = (TYPE_VALUE*)shmalloc(sizeof(*source_addr) * __max_buffer_size);
    if (target_addr && source_addr)
    {
        TYPE_VALUE value = DEFAULT_VALUE;
        int i = 0;
        int j = 0;
        long cur_buf_size = 0;

        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            cur_buf_size = sys_max(1, (i + 1) * __max_buffer_size / __cycle_count);
            pWrk = shmalloc(sizeof(*pWrk) * sys_max(cur_buf_size/2 + 1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
            if (pWrk)
            {
                /* Set initial target value */
                value = DEFAULT_VALUE;
                fill_buffer((void *)target_addr, cur_buf_size, (void *)&value, sizeof(value));

                /* Give some time to all PE for setting their values */
                shmem_barrier_all();

                /* Set my value */
                source_value = ( my_proc < OVERFLOW_FACTORIAL_LIMIT ? (TYPE_VALUE)(my_proc + 1) : 1);
                fill_buffer((void *)source_addr, cur_buf_size, (void *)&source_value, sizeof(source_value));

                /* Define expected value */
                expect_value = 1;
                {
                    int k = ( num_proc <= OVERFLOW_FACTORIAL_LIMIT ? num_proc : OVERFLOW_FACTORIAL_LIMIT);
                    while (k) expect_value *= k--;
                }

                /* This guarantees that PE set initial value before peer change one */
                for ( j = 0; j < _SHMEM_REDUCE_SYNC_SIZE; j++ )
                {
                    pSync[j] = _SHMEM_SYNC_VALUE;
                }
                shmem_barrier_all();

                /* Put value to peer */
                FUNC_VALUE(target_addr, source_addr, cur_buf_size, 0, 0, num_proc, pWrk, pSync);

                /* Get value put by peer:
                 * These routines start the remote transfer and may return before the data
                 * is delivered to the remote PE
                 */
                shmem_barrier_all();
                {
                    int wait = WAIT_COUNT;

                    while (wait--)
                    {
                        value = *target_addr;
                        if (sys_fcompare(expect_value, value)) break;
                        sleep(1);
                    }
                }

                rc = (!compare_buffer_with_const_longdouble(target_addr, cur_buf_size, expect_value) ? TC_PASS : TC_FAIL);

                log_debug(OSH_TC, "my#%d source = %Lf expected = %Lf actual = %Lf buffer size = %lld\n",
                                   my_proc, (long double)source_value, (long double)expect_value, (long double)value, (INT64_TYPE)cur_buf_size);

                if (rc)
                {
                    TYPE_VALUE* check_addr = target_addr;
                    int odd_index = compare_buffer_with_const_longdouble(check_addr, cur_buf_size, expect_value);
                    int show_index = (odd_index > 1 ? odd_index - 2 : 0);
                    int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);

                    log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
                    log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
                    show_buffer(check_addr + show_index, show_size);
                }

                shfree(pWrk);
            } else {
                rc = TC_SETUP_FAIL;
            }
        }
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (source_addr)
    {
        shfree(source_addr);
    }

    if (target_addr)
    {
        shfree(target_addr);
    }

    return rc;
}