int main (int argc, char *argv[]) { static char cvs_info[] = "BMkGRP $Date: $ $Revision: $ $RCSfile: all2all_main.c,v $ $Name: $"; int itr; int idx; brand_t br; timer t, t0, t1; double nsec; double total_time = 0.0; int status = 0; int64 i, seed, arg, msize, tsize, len, oldsize=0, rep, cksum; uint64 *tab=NULL; start_pes(0); SELF=_my_pe(); SIZE=_n_pes(); if (argc < 5) { if (SELF == 0) fprintf (stderr, "Usage:\t%s seed msg_size(B) table_size(MB) rep_cnt " "[ms2 ts2 rc2 ..]\n", argv[0]); status = 1; goto DONE; } seed = atol (argv[1]); if (SELF == 0) printf ("base seed is %ld\n", seed); seed += SELF << 32; brand_init (&br, seed); // seed uniquely per PE arg = 2; while (arg < argc) { msize = atol (argv[arg++]); if (arg >= argc) break; /* Table size * 1 million. */ tsize = atol (argv[arg++]) * (1L << 20); if (arg >= argc) break; //rep = atol (argv[arg++]); rep = 1; arg++; if (SELF == 0) printf ("tsize = %ldMB msize = %dB\n", tsize/(1L<<20), msize); if (msize < sizeof(long)) { if (SELF == 0) printf ("msize too short!\n"); //status = 1; goto DONE; } //itr=0; idx = 0; switch(SIZE){ case 2: idx = 0; break; case 4: idx = 1; break; case 8: idx = 2; break; default: fprintf(stderr,"warning, check sum for (%d) pes not supported.\n", SIZE); } while (rep-- > 0) { /* START TIMING */ //timer_clear (&t0); //timer_clear (&t1); //timer_start (&t0); if ((tab == NULL) || (tsize > oldsize)) { if (tab != NULL) { dram_shfree (tab); oldsize = 0; } if (SELF == 0) printf ("trying dram_shmalloc of %ld bytes\n", tsize); tab = (uint64 *) dram_shmalloc (tsize); if (tab == NULL) { if (SELF == 0) printf ("dram_shmalloc failed!\n"); status = 1; goto DONE; } oldsize = tsize; } // length in words len = tsize / sizeof(uint64); // important to init table // to ensure cksum consistency on different platforms memset(tab,0,tsize); for (i = 0; i < len; i+=64){ tab[i] = brand(&br); } // we'll have destination/source arrays each of half size len /= 2; //timer_stop (&t0); // source checksum cksum = do_cksum (&tab[len], len); if (SELF == 0) printf ("cksum is %016lx\n", cksum); if (SELF == 0){ //if(cksum!=ckv[itr++]){ /* Set up for one iteration only. */ if(cksum!=ckv[idx]){ printf ("cksum %016lx != ckv[%d] %016x\n",cksum,idx,ckv[idx]); gexit(1); } } //timer_start (&t1); len = do_all2all (&tab[0], &tab[len], len, msize/sizeof(uint64)); shmem_barrier_all(); //timer_stop (&t1); /* END TIMING */ #if 0 // dest checksum i = do_cksum (&tab[0], len); if (i != cksum) { printf ("PE %4ld ERROR: %016lx != %016lx\n", SIZE, i, cksum); status = 1; goto DONE; } #ifndef CHECKOFF if (i != known_v[gv]) { printf ("CHECKSUM PE %4ld ERROR: %016lx != %016lx\n", SIZE, i, known_v[gv]); status = 1; goto DONE; } gv++; #endif //t.accum_wall = t0.accum_wall + t1.accum_wall; //t.accum_cpus = t0.accum_cpus + t1.accum_cpus; /*if (SELF == 0) { #ifdef PTIMES printf ("%8.3f %8.3f\n", t0.accum_wall , t1.accum_wall); printf ("%8.3f %8.3f\n", t0.accum_cpus , t1.accum_cpus); #endif printf ("wall reports %8.3f secs cpus report %8.3f secs\n", t.accum_wall, t.accum_cpus); nsec = MAX(t.accum_wall, t.accum_cpus); total_time += nsec; if (nsec > 0) printf ("%8.3f MB/sec with %ld bytes transfers\n", len*sizeof(uint64)/(double)(1L<<20)/nsec, msize); }*/ #endif } //if (SELF == 0) //printf ("\n"); } //if (SELF == 0) //{ //printf ("total time = %14.9f\n", total_time); //} DONE: shmem_barrier_all(); return status; }
/** * \brief Creates and initializes the working data for the plan * \param [in] plan The struct that holds the plan's data values. * \return int Error flag value * \sa parseCBAPlan * \sa makeCBAPlan * \sa execCBAPlan * \sa perfCBAPlan * \sa killCBAPlan */ int initCBAPlan(void *plan){ int ret = make_error(ALLOC,generic_err); int i; int nrow, ncol; Plan *p; CBA_data *ci = NULL; p = (Plan *)plan; #ifdef HAVE_PAPI int temp_event, k; int PAPI_Events [NUM_PAPI_EVENTS] = PAPI_COUNTERS; char *PAPI_units [NUM_PAPI_EVENTS] = PAPI_UNITS; #endif //HAVE_PAPI if(p){ ci = (CBA_data *)p->vptr; p->exec_count = 0; if(DO_PERF){ perftimer_init(&p->timers, NUM_TIMERS); #ifdef HAVE_PAPI /* Initialize plan's PAPI data */ p->PAPI_EventSet = PAPI_NULL; p->PAPI_Num_Events = 0; TEST_PAPI(PAPI_create_eventset(&p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); //Add the desired events to the Event Set; ensure the dsired counters // are on the system then add, ignore otherwise for(k = 0; k < TOTAL_PAPI_EVENTS && k < NUM_PAPI_EVENTS; k++){ temp_event = PAPI_Events[k]; if(PAPI_query_event(temp_event) == PAPI_OK){ p->PAPI_Num_Events++; TEST_PAPI(PAPI_add_event(p->PAPI_EventSet, temp_event), PAPI_OK, MyRank, 9999, PRINT_SOME); } } PAPIRes_init(p->PAPI_Results, p->PAPI_Times); PAPI_set_units(p->name, PAPI_units, NUM_PAPI_EVENTS); TEST_PAPI(PAPI_start(p->PAPI_EventSet), PAPI_OK, MyRank, 9999, PRINT_SOME); #endif //HAVE_PAPI } //DO_PERF } if(ci){ brand_init(&(ci->br), ci->seed); nrow = ci->nrows; ncol = ci->ncols; ci->niter *= 64; /* we'll do iterations in blocks of 64 */ if((ci->ncols % BLOCKSIZE) != 0){ return make_error(0,specific_err); //fprintf(stderr, "ERROR (plan_cba): BLOCKSIZE (%ld) must divide" //" ncol (%ld)\n", BLOCKSIZE, ncol); } assert ((NITERS % 64) == 0); ci->work = (uint64_t *)calloc((size_t)((nrow * ncol + PAD + NITERS) * 2), sizeof(uint64_t)); ret = (ci->work == NULL) ? make_error(ALLOC,generic_err) : ERR_CLEAN; ci->out = &(ci->work[nrow * ncol + PAD]); ci->data = &(ci->out[NITERS]); ci->chk = &(ci->data[nrow * ncol + PAD]); for(i = 0; i < (nrow * ncol); i++){ ci->data[i] = brand(&(ci->br)); } blockit (ci->data, nrow, ncol, ci->work); } return ret; } /* initCBAPlan */
int main (int argc, char *argv[]) { brand_t br; int64 i, seed, msize, niters, root = -1; const int64 nwrds = NWRDS / 2; double mem; char *scale; TYPE *dst, *src; start_pes(0); //mpp_init(); open_df_mmu(); if (argc < 4) { if (MY_GTHREAD == 0) fprintf (stderr, "Usage:\t%s seed msize(B) niters [root]\n", argv[0]); goto DONE; } // alloc two shared buffers // (mpp_alloc checks for valid pointer and casts) dst = mpp_alloc (nwrds * sizeof(uint64)); src = mpp_alloc (nwrds * sizeof(uint64)); // get args seed = atol (argv[1]); msize = atol (argv[2]); niters = atol (argv[3]); if (argc > 4) root = atol (argv[4]); // seed uniquely to generate a unique val /PE brand_init (&br, seed + ((int64)MY_GTHREAD << 32)); val = brand(&br); // seed uniformly across PEs for benchmark brand_init (&br, seed); // runup a few times for (i = 0; i < 8; i++) brand(&br); if (MY_GTHREAD == 0) { printf ("base seed is %ld\n", seed); mem = scale_mem (msize, &scale); printf ("msize = %.2lf %s\n", mem, scale); } if (msize < sizeof(uint64)) { if (MY_GTHREAD == 0) printf ("msize must be > %ld B\n", (int64)sizeof(uint64)); goto DONE; } if (msize > (nwrds * sizeof(uint64))) { if (MY_GTHREAD == 0) printf ("msize must be < %ld B\n", nwrds * sizeof(uint64)); goto DONE; } if (root >= GTHREADS) root = -1; if (MY_GTHREAD == 0) { if (root < 0) printf ("randomizing root PEs (%ld)\n", root); else printf ("using fixed root PE %ld\n", root); } // this exits on error do_bcast (dst, src, nwrds, &br, msize, niters, root); // free up the shared memory mpp_free (dst); mpp_free (src); DONE: mpp_barrier_all(); //mpp_finalize(); close_df_mmu(); return 0; }
int main (int argc, char *argv[]) { brand_t br; int64 seed, arg, msize, tsize, rep; TYPE *tab; uint64 *loc; start_pes(0); //mpp_init(); open_df_mmu(); if (argc < 5) { if (MY_GTHREAD == 0) fprintf (stderr, "Usage:\t%s seed msg_size(B) table_size(MB) rep_cnt " "[ms2 ts2 rc2 ..]\n", argv[0]); goto DONE; } // alloc some shared space // (checks for valid pointer and casts) tab = mpp_alloc (NWRDS * sizeof(uint64)); // pointer to local space #if defined(__UPC__) loc = (uint64 *)&tab[MY_GTHREAD]; #else loc = &tab[0]; #endif // init all local memory bzero ((void *)&loc[0], NWRDS * sizeof(uint64)); seed = atol (argv[1]); if (MY_GTHREAD == 0) printf ("base seed is %ld\n", seed); seed += (uint64)MY_GTHREAD << 32; brand_init (&br, seed); // seed uniquely per PE arg = 2; while (arg < argc) { msize = atol (argv[arg++]); if (arg >= argc) break; tsize = atol (argv[arg++]) * (1L << 20); if (arg >= argc) break; rep = atol (argv[arg++]); if (MY_GTHREAD == 0) printf ("tsize = %ldMB msize = %5ldB\n", tsize/(1L<<20), msize); if (msize < sizeof(long)) { if (MY_GTHREAD == 0) printf ("msize must be > %ld B\n", (int64)sizeof(long)); goto DONE; } if (tsize > (NWRDS * sizeof(long))) { if (MY_GTHREAD == 0) printf ("tsize must be < %ld MiB\n", (int64)(NWRDS * sizeof(long)) / (1uL<<20)); goto DONE; } // exits on error do_all2all (tab, loc, &br, msize, tsize, rep, 1); if (MY_GTHREAD == 0) printf ("\n"); } // free up the shared memory mpp_free (tab); DONE: mpp_barrier_all(); close_df_mmu(); //mpp_finalize(); return 0; }