Example #1
0
int main (int argc, char *argv[])
{
   static char cvs_info[] = "BMkGRP $Date: $ $Revision: $ $RCSfile: all2all_main.c,v $ $Name: $";

   int itr;
   int idx;
   brand_t br;
   timer t, t0, t1;
   double nsec;

   double total_time = 0.0;
  
   int status = 0;
  
   int64 i, seed, arg, msize, tsize, len, oldsize=0, rep, cksum;
   uint64 *tab=NULL;
  
   start_pes(0);
   SELF=_my_pe();
   SIZE=_n_pes();

   if (argc < 5) {
      if (SELF == 0)
         fprintf (stderr, "Usage:\t%s seed msg_size(B) table_size(MB) rep_cnt "
	          "[ms2 ts2 rc2 ..]\n", argv[0]);
         status = 1;
      goto DONE;
   }
   seed = atol (argv[1]);
   if (SELF == 0)
      printf ("base seed is %ld\n", seed);
   seed += SELF << 32;
   brand_init (&br, seed);  // seed uniquely per PE

   arg = 2;
  
   while (arg < argc) {
    
    
      msize = atol (argv[arg++]);               if (arg >= argc)  break;
      /* Table size * 1 million. */
      tsize = atol (argv[arg++]) * (1L << 20);  if (arg >= argc)  break;
      //rep   = atol (argv[arg++]);
      rep   = 1;
      arg++;

      if (SELF == 0)  printf ("tsize = %ldMB  msize = %dB\n",
			        tsize/(1L<<20), msize);
      if (msize < sizeof(long)) {
         if (SELF == 0)  printf ("msize too short!\n");
         //status = 1;
         goto DONE;
      }
      //itr=0;

      idx = 0;

      switch(SIZE){
         case 2:
            idx = 0;
            break;
         case 4:
            idx = 1;
            break;
         case 8:
            idx = 2;
            break;
         default:
            fprintf(stderr,"warning, check sum for (%d) pes not supported.\n",
                        SIZE);
      }
 
      while (rep-- > 0) {

         /* START TIMING */
         //timer_clear (&t0);
         //timer_clear (&t1);
         //timer_start (&t0);
	    
         if ((tab == NULL) || (tsize > oldsize)) {
	   if (tab != NULL) {
	      dram_shfree (tab);
	      oldsize = 0;
	   }
	   if (SELF == 0)  printf ("trying dram_shmalloc of %ld bytes\n", tsize);
	   tab = (uint64 *) dram_shmalloc (tsize);
	
	   if (tab == NULL) {
	      if (SELF == 0)  printf ("dram_shmalloc failed!\n");
               status = 1;
	      goto DONE;
	  
	   }
	   oldsize = tsize;
         }
      
         // length in words
         len = tsize / sizeof(uint64);
      
         // important to init table
         // to ensure cksum consistency on different platforms
         memset(tab,0,tsize);
      
         for (i = 0; i < len; i+=64){
            tab[i] = brand(&br);
         }
     
         // we'll have destination/source arrays each of half size
         len /= 2;

         //timer_stop (&t0);
         // source checksum
         cksum = do_cksum (&tab[len], len);
         if (SELF == 0)  printf ("cksum is %016lx\n", cksum);
         if (SELF == 0){
            //if(cksum!=ckv[itr++]){
            /* Set up for one iteration only. */
            if(cksum!=ckv[idx]){
               printf ("cksum  %016lx != ckv[%d] %016x\n",cksum,idx,ckv[idx]);
	         gexit(1);
	   }
          
         }      	      

         //timer_start (&t1);
         len = do_all2all (&tab[0], &tab[len], len, msize/sizeof(uint64));
      
         shmem_barrier_all();
      
         //timer_stop (&t1);
         /* END TIMING */
#if 0

         // dest checksum
         i = do_cksum (&tab[0], len);
         if (i != cksum) {
	   printf ("PE %4ld  ERROR: %016lx != %016lx\n", SIZE, i, cksum);
	   status = 1;
	   goto DONE;
         }

#ifndef CHECKOFF
         if (i != known_v[gv]) {
            printf ("CHECKSUM PE %4ld  ERROR: %016lx != %016lx\n", SIZE, i, known_v[gv]);
            status = 1;
            goto DONE;
         }
         gv++;
#endif


         //t.accum_wall = t0.accum_wall + t1.accum_wall;
         //t.accum_cpus = t0.accum_cpus + t1.accum_cpus;


         /*if (SELF == 0) {

#ifdef PTIMES
	   printf ("%8.3f %8.3f\n",    t0.accum_wall , t1.accum_wall);
	   printf ("%8.3f %8.3f\n",    t0.accum_cpus , t1.accum_cpus);
#endif
	   printf ("wall reports %8.3f secs  cpus report %8.3f secs\n",
		    t.accum_wall, t.accum_cpus);
	   nsec = MAX(t.accum_wall, t.accum_cpus);
            total_time += nsec;
	   if (nsec > 0)
	     printf ("%8.3f MB/sec with %ld bytes transfers\n",
		     len*sizeof(uint64)/(double)(1L<<20)/nsec, msize);
         }*/
#endif
      }
      //if (SELF == 0)
         //printf ("\n");
   }
   //if (SELF == 0)
   //{
      //printf ("total time = %14.9f\n", total_time);

   //}
 
 DONE: 
   shmem_barrier_all();
   return status;
}
int main (int argc, char *argv[])

{
  brand_t br;
  int64 seed, arg, msize, tsize, rep;
  TYPE *tab;
  uint64 *loc;

  start_pes(0);
  //mpp_init();

  open_df_mmu();

  if (argc < 5) {
    if (MY_GTHREAD == 0)
      fprintf (stderr, "Usage:\t%s seed msg_size(B) table_size(MB) rep_cnt "
        "[ms2 ts2 rc2 ..]\n", argv[0]);
      goto DONE;
  }

  // alloc some shared space
  // (checks for valid pointer and casts)
  tab = mpp_alloc (NWRDS * sizeof(uint64));

  // pointer to local space
#if defined(__UPC__)
  loc = (uint64 *)&tab[MY_GTHREAD];

#else
  loc = &tab[0];

#endif

  // init all local memory
  bzero ((void *)&loc[0], NWRDS * sizeof(uint64));

  seed = atol (argv[1]);
  if (MY_GTHREAD == 0)
    printf ("base seed is %ld\n", seed);
  seed += (uint64)MY_GTHREAD << 32;
  brand_init (&br, seed);  // seed uniquely per PE

  arg = 2;
  while (arg < argc) {
    msize = atol (argv[arg++]);
    if (arg >= argc)
      break;
    tsize = atol (argv[arg++]) * (1L << 20);
    if (arg >= argc)
      break;
    rep   = atol (argv[arg++]);

    if (MY_GTHREAD == 0)
      printf ("tsize = %ldMB  msize = %5ldB\n", tsize/(1L<<20), msize);
    if (msize < sizeof(long)) {
      if (MY_GTHREAD == 0)
	printf ("msize must be > %ld B\n", (int64)sizeof(long));
      goto DONE;
    }
    if (tsize > (NWRDS * sizeof(long))) {
      if (MY_GTHREAD == 0)
	printf ("tsize must be < %ld MiB\n",
		(int64)(NWRDS * sizeof(long)) / (1uL<<20));
      goto DONE;
    }

    // exits on error
    do_all2all (tab, loc, &br, msize, tsize, rep, 1);
    
    if (MY_GTHREAD == 0)
      printf ("\n");
  }

  // free up the shared memory
  mpp_free (tab);

 DONE:
  mpp_barrier_all();

  close_df_mmu();
  //mpp_finalize();
  return 0;
}