Esempio n. 1
0
int main (int argc, char *argv[])
{
   static char cvs_info[] = "BMkGRP $Date: $ $Revision: $ $RCSfile: all2all_main.c,v $ $Name: $";

   int itr;
   int idx;
   brand_t br;
   timer t, t0, t1;
   double nsec;

   double total_time = 0.0;
  
   int status = 0;
  
   int64 i, seed, arg, msize, tsize, len, oldsize=0, rep, cksum;
   uint64 *tab=NULL;
  
   start_pes(0);
   SELF=_my_pe();
   SIZE=_n_pes();

   if (argc < 5) {
      if (SELF == 0)
         fprintf (stderr, "Usage:\t%s seed msg_size(B) table_size(MB) rep_cnt "
	          "[ms2 ts2 rc2 ..]\n", argv[0]);
         status = 1;
      goto DONE;
   }
   seed = atol (argv[1]);
   if (SELF == 0)
      printf ("base seed is %ld\n", seed);
   seed += SELF << 32;
   brand_init (&br, seed);  // seed uniquely per PE

   arg = 2;
  
   while (arg < argc) {
    
    
      msize = atol (argv[arg++]);               if (arg >= argc)  break;
      /* Table size * 1 million. */
      tsize = atol (argv[arg++]) * (1L << 20);  if (arg >= argc)  break;
      //rep   = atol (argv[arg++]);
      rep   = 1;
      arg++;

      if (SELF == 0)  printf ("tsize = %ldMB  msize = %dB\n",
			        tsize/(1L<<20), msize);
      if (msize < sizeof(long)) {
         if (SELF == 0)  printf ("msize too short!\n");
         //status = 1;
         goto DONE;
      }
      //itr=0;

      idx = 0;

      switch(SIZE){
         case 2:
            idx = 0;
            break;
         case 4:
            idx = 1;
            break;
         case 8:
            idx = 2;
            break;
         default:
            fprintf(stderr,"warning, check sum for (%d) pes not supported.\n",
                        SIZE);
      }
 
      while (rep-- > 0) {

         /* START TIMING */
         //timer_clear (&t0);
         //timer_clear (&t1);
         //timer_start (&t0);
	    
         if ((tab == NULL) || (tsize > oldsize)) {
	   if (tab != NULL) {
	      dram_shfree (tab);
	      oldsize = 0;
	   }
	   if (SELF == 0)  printf ("trying dram_shmalloc of %ld bytes\n", tsize);
	   tab = (uint64 *) dram_shmalloc (tsize);
	
	   if (tab == NULL) {
	      if (SELF == 0)  printf ("dram_shmalloc failed!\n");
               status = 1;
	      goto DONE;
	  
	   }
	   oldsize = tsize;
         }
      
         // length in words
         len = tsize / sizeof(uint64);
      
         // important to init table
         // to ensure cksum consistency on different platforms
         memset(tab,0,tsize);
      
         for (i = 0; i < len; i+=64){
            tab[i] = brand(&br);
         }
     
         // we'll have destination/source arrays each of half size
         len /= 2;

         //timer_stop (&t0);
         // source checksum
         cksum = do_cksum (&tab[len], len);
         if (SELF == 0)  printf ("cksum is %016lx\n", cksum);
         if (SELF == 0){
            //if(cksum!=ckv[itr++]){
            /* Set up for one iteration only. */
            if(cksum!=ckv[idx]){
               printf ("cksum  %016lx != ckv[%d] %016x\n",cksum,idx,ckv[idx]);
	         gexit(1);
	   }
          
         }      	      

         //timer_start (&t1);
         len = do_all2all (&tab[0], &tab[len], len, msize/sizeof(uint64));
      
         shmem_barrier_all();
      
         //timer_stop (&t1);
         /* END TIMING */
#if 0

         // dest checksum
         i = do_cksum (&tab[0], len);
         if (i != cksum) {
	   printf ("PE %4ld  ERROR: %016lx != %016lx\n", SIZE, i, cksum);
	   status = 1;
	   goto DONE;
         }

#ifndef CHECKOFF
         if (i != known_v[gv]) {
            printf ("CHECKSUM PE %4ld  ERROR: %016lx != %016lx\n", SIZE, i, known_v[gv]);
            status = 1;
            goto DONE;
         }
         gv++;
#endif


         //t.accum_wall = t0.accum_wall + t1.accum_wall;
         //t.accum_cpus = t0.accum_cpus + t1.accum_cpus;


         /*if (SELF == 0) {

#ifdef PTIMES
	   printf ("%8.3f %8.3f\n",    t0.accum_wall , t1.accum_wall);
	   printf ("%8.3f %8.3f\n",    t0.accum_cpus , t1.accum_cpus);
#endif
	   printf ("wall reports %8.3f secs  cpus report %8.3f secs\n",
		    t.accum_wall, t.accum_cpus);
	   nsec = MAX(t.accum_wall, t.accum_cpus);
            total_time += nsec;
	   if (nsec > 0)
	     printf ("%8.3f MB/sec with %ld bytes transfers\n",
		     len*sizeof(uint64)/(double)(1L<<20)/nsec, msize);
         }*/
#endif
      }
      //if (SELF == 0)
         //printf ("\n");
   }
   //if (SELF == 0)
   //{
      //printf ("total time = %14.9f\n", total_time);

   //}
 
 DONE: 
   shmem_barrier_all();
   return status;
}
Esempio n. 2
0
/**
 * \brief Creates and initializes the working data for the plan
 * \param [in] plan The struct that holds the plan's data values.
 * \return int Error flag value
 * \sa parseCBAPlan
 * \sa makeCBAPlan
 * \sa execCBAPlan
 * \sa perfCBAPlan
 * \sa killCBAPlan
 */
int initCBAPlan(void *plan){
    int ret = make_error(ALLOC,generic_err);
    int i;
    int nrow, ncol;
    Plan *p;
    CBA_data *ci = NULL;
    p = (Plan *)plan;

    #ifdef HAVE_PAPI
    int temp_event, k;
    int PAPI_Events [NUM_PAPI_EVENTS] = PAPI_COUNTERS;
    char *PAPI_units [NUM_PAPI_EVENTS] = PAPI_UNITS;
    #endif //HAVE_PAPI

    if(p){
        ci = (CBA_data *)p->vptr;
        p->exec_count = 0;
        if(DO_PERF){
            perftimer_init(&p->timers, NUM_TIMERS);

            #ifdef HAVE_PAPI
            /* Initialize plan's PAPI data */
            p->PAPI_EventSet = PAPI_NULL;
            p->PAPI_Num_Events = 0;

            TEST_PAPI(PAPI_create_eventset(&p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME);

            //Add the desired events to the Event Set; ensure the dsired counters
            //  are on the system then add, ignore otherwise
            for(k = 0; k < TOTAL_PAPI_EVENTS && k < NUM_PAPI_EVENTS; k++){
                temp_event = PAPI_Events[k];
                if(PAPI_query_event(temp_event) == PAPI_OK){
                    p->PAPI_Num_Events++;
                    TEST_PAPI(PAPI_add_event(p->PAPI_EventSet, temp_event), PAPI_OK, MyRank, 9999, PRINT_SOME);
                }
            }

            PAPIRes_init(p->PAPI_Results, p->PAPI_Times);
            PAPI_set_units(p->name, PAPI_units, NUM_PAPI_EVENTS);

            TEST_PAPI(PAPI_start(p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME);
            #endif //HAVE_PAPI
        }         //DO_PERF
    }
    if(ci){
        brand_init(&(ci->br), ci->seed);

        nrow = ci->nrows;
        ncol = ci->ncols;

        ci->niter *= 64;          /* we'll do iterations in blocks of 64 */

        if((ci->ncols % BLOCKSIZE) != 0){
            return make_error(0,specific_err);
            //fprintf(stderr, "ERROR (plan_cba): BLOCKSIZE (%ld) must divide"
            //" ncol (%ld)\n", BLOCKSIZE, ncol);
        }
        assert ((NITERS % 64) == 0);

        ci->work = (uint64_t *)calloc((size_t)((nrow * ncol + PAD + NITERS) * 2),
                                      sizeof(uint64_t));
        ret = (ci->work == NULL) ? make_error(ALLOC,generic_err) : ERR_CLEAN;

        ci->out = &(ci->work[nrow * ncol + PAD]);
        ci->data = &(ci->out[NITERS]);
        ci->chk = &(ci->data[nrow * ncol + PAD]);

        for(i = 0; i < (nrow * ncol); i++){
            ci->data[i] = brand(&(ci->br));
        }

        blockit (ci->data, nrow, ncol, ci->work);
    }
    return ret;
} /* initCBAPlan */
int main (int argc, char *argv[])

{
  brand_t br;
  int64 i, seed, msize, niters, root = -1;
  const int64 nwrds = NWRDS / 2;
  double mem;
  char *scale;
  TYPE *dst, *src;

  start_pes(0);
  //mpp_init();
  open_df_mmu();

  if (argc < 4) {
    if (MY_GTHREAD == 0)
      fprintf (stderr, "Usage:\t%s seed msize(B) niters [root]\n", argv[0]);
    goto DONE;
  }

  // alloc two shared buffers
  // (mpp_alloc checks for valid pointer and casts)
  dst = mpp_alloc (nwrds * sizeof(uint64)); 
  src = mpp_alloc (nwrds * sizeof(uint64)); 

  // get args
  seed = atol (argv[1]);
  msize = atol (argv[2]);
  niters = atol (argv[3]);
  if (argc > 4)
    root = atol (argv[4]);

  // seed uniquely to generate a unique val /PE
  brand_init (&br, seed + ((int64)MY_GTHREAD << 32));
  val = brand(&br);
  // seed uniformly across PEs for benchmark
  brand_init (&br, seed);
  // runup a few times
  for (i = 0; i < 8; i++)  brand(&br);

  if (MY_GTHREAD == 0) {
    printf ("base seed is %ld\n", seed);
    mem = scale_mem (msize, &scale);
    printf ("msize = %.2lf %s\n", mem, scale);
  }
  if (msize < sizeof(uint64)) {
    if (MY_GTHREAD == 0)
      printf ("msize must be > %ld B\n", (int64)sizeof(uint64));
    goto DONE;
  }
  if (msize > (nwrds * sizeof(uint64))) {
    if (MY_GTHREAD == 0)
      printf ("msize must be < %ld B\n", nwrds * sizeof(uint64));
    goto DONE;
  }
  if (root >= GTHREADS)
    root = -1;
  if (MY_GTHREAD == 0) {
    if (root < 0)
      printf ("randomizing root PEs (%ld)\n", root);
    else
      printf ("using fixed root PE %ld\n", root);
  }

  // this exits on error
  do_bcast (dst, src, nwrds, &br, msize, niters, root);
  
  // free up the shared memory
  mpp_free (dst);
  mpp_free (src);

 DONE:
  mpp_barrier_all();
  //mpp_finalize();
  close_df_mmu();
  return 0;
}
int main (int argc, char *argv[])

{
  brand_t br;
  int64 seed, arg, msize, tsize, rep;
  TYPE *tab;
  uint64 *loc;

  start_pes(0);
  //mpp_init();

  open_df_mmu();

  if (argc < 5) {
    if (MY_GTHREAD == 0)
      fprintf (stderr, "Usage:\t%s seed msg_size(B) table_size(MB) rep_cnt "
        "[ms2 ts2 rc2 ..]\n", argv[0]);
      goto DONE;
  }

  // alloc some shared space
  // (checks for valid pointer and casts)
  tab = mpp_alloc (NWRDS * sizeof(uint64));

  // pointer to local space
#if defined(__UPC__)
  loc = (uint64 *)&tab[MY_GTHREAD];

#else
  loc = &tab[0];

#endif

  // init all local memory
  bzero ((void *)&loc[0], NWRDS * sizeof(uint64));

  seed = atol (argv[1]);
  if (MY_GTHREAD == 0)
    printf ("base seed is %ld\n", seed);
  seed += (uint64)MY_GTHREAD << 32;
  brand_init (&br, seed);  // seed uniquely per PE

  arg = 2;
  while (arg < argc) {
    msize = atol (argv[arg++]);
    if (arg >= argc)
      break;
    tsize = atol (argv[arg++]) * (1L << 20);
    if (arg >= argc)
      break;
    rep   = atol (argv[arg++]);

    if (MY_GTHREAD == 0)
      printf ("tsize = %ldMB  msize = %5ldB\n", tsize/(1L<<20), msize);
    if (msize < sizeof(long)) {
      if (MY_GTHREAD == 0)
	printf ("msize must be > %ld B\n", (int64)sizeof(long));
      goto DONE;
    }
    if (tsize > (NWRDS * sizeof(long))) {
      if (MY_GTHREAD == 0)
	printf ("tsize must be < %ld MiB\n",
		(int64)(NWRDS * sizeof(long)) / (1uL<<20));
      goto DONE;
    }

    // exits on error
    do_all2all (tab, loc, &br, msize, tsize, rep, 1);
    
    if (MY_GTHREAD == 0)
      printf ("\n");
  }

  // free up the shared memory
  mpp_free (tab);

 DONE:
  mpp_barrier_all();

  close_df_mmu();
  //mpp_finalize();
  return 0;
}