int main (int argc, char *argv[]) { static char cvs_info[] = "BMkGRP $Date: $ $Revision: $ $RCSfile: all2all_main.c,v $ $Name: $"; int itr; int idx; brand_t br; timer t, t0, t1; double nsec; double total_time = 0.0; int status = 0; int64 i, seed, arg, msize, tsize, len, oldsize=0, rep, cksum; uint64 *tab=NULL; start_pes(0); SELF=_my_pe(); SIZE=_n_pes(); if (argc < 5) { if (SELF == 0) fprintf (stderr, "Usage:\t%s seed msg_size(B) table_size(MB) rep_cnt " "[ms2 ts2 rc2 ..]\n", argv[0]); status = 1; goto DONE; } seed = atol (argv[1]); if (SELF == 0) printf ("base seed is %ld\n", seed); seed += SELF << 32; brand_init (&br, seed); // seed uniquely per PE arg = 2; while (arg < argc) { msize = atol (argv[arg++]); if (arg >= argc) break; /* Table size * 1 million. */ tsize = atol (argv[arg++]) * (1L << 20); if (arg >= argc) break; //rep = atol (argv[arg++]); rep = 1; arg++; if (SELF == 0) printf ("tsize = %ldMB msize = %dB\n", tsize/(1L<<20), msize); if (msize < sizeof(long)) { if (SELF == 0) printf ("msize too short!\n"); //status = 1; goto DONE; } //itr=0; idx = 0; switch(SIZE){ case 2: idx = 0; break; case 4: idx = 1; break; case 8: idx = 2; break; default: fprintf(stderr,"warning, check sum for (%d) pes not supported.\n", SIZE); } while (rep-- > 0) { /* START TIMING */ //timer_clear (&t0); //timer_clear (&t1); //timer_start (&t0); if ((tab == NULL) || (tsize > oldsize)) { if (tab != NULL) { dram_shfree (tab); oldsize = 0; } if (SELF == 0) printf ("trying dram_shmalloc of %ld bytes\n", tsize); tab = (uint64 *) dram_shmalloc (tsize); if (tab == NULL) { if (SELF == 0) printf ("dram_shmalloc failed!\n"); status = 1; goto DONE; } oldsize = tsize; } // length in words len = tsize / sizeof(uint64); // important to init table // to ensure cksum consistency on different platforms memset(tab,0,tsize); for (i = 0; i < len; i+=64){ tab[i] = brand(&br); } // we'll have destination/source arrays each of half size len /= 2; //timer_stop (&t0); // source checksum cksum = do_cksum (&tab[len], len); if (SELF == 0) printf ("cksum is %016lx\n", cksum); if (SELF == 0){ //if(cksum!=ckv[itr++]){ /* Set up for one iteration only. */ if(cksum!=ckv[idx]){ printf ("cksum %016lx != ckv[%d] %016x\n",cksum,idx,ckv[idx]); gexit(1); } } //timer_start (&t1); len = do_all2all (&tab[0], &tab[len], len, msize/sizeof(uint64)); shmem_barrier_all(); //timer_stop (&t1); /* END TIMING */ #if 0 // dest checksum i = do_cksum (&tab[0], len); if (i != cksum) { printf ("PE %4ld ERROR: %016lx != %016lx\n", SIZE, i, cksum); status = 1; goto DONE; } #ifndef CHECKOFF if (i != known_v[gv]) { printf ("CHECKSUM PE %4ld ERROR: %016lx != %016lx\n", SIZE, i, known_v[gv]); status = 1; goto DONE; } gv++; #endif //t.accum_wall = t0.accum_wall + t1.accum_wall; //t.accum_cpus = t0.accum_cpus + t1.accum_cpus; /*if (SELF == 0) { #ifdef PTIMES printf ("%8.3f %8.3f\n", t0.accum_wall , t1.accum_wall); printf ("%8.3f %8.3f\n", t0.accum_cpus , t1.accum_cpus); #endif printf ("wall reports %8.3f secs cpus report %8.3f secs\n", t.accum_wall, t.accum_cpus); nsec = MAX(t.accum_wall, t.accum_cpus); total_time += nsec; if (nsec > 0) printf ("%8.3f MB/sec with %ld bytes transfers\n", len*sizeof(uint64)/(double)(1L<<20)/nsec, msize); }*/ #endif } //if (SELF == 0) //printf ("\n"); } //if (SELF == 0) //{ //printf ("total time = %14.9f\n", total_time); //} DONE: shmem_barrier_all(); return status; }
int main (int argc, char *argv[]) { brand_t br; int64 seed, arg, msize, tsize, rep; TYPE *tab; uint64 *loc; start_pes(0); //mpp_init(); open_df_mmu(); if (argc < 5) { if (MY_GTHREAD == 0) fprintf (stderr, "Usage:\t%s seed msg_size(B) table_size(MB) rep_cnt " "[ms2 ts2 rc2 ..]\n", argv[0]); goto DONE; } // alloc some shared space // (checks for valid pointer and casts) tab = mpp_alloc (NWRDS * sizeof(uint64)); // pointer to local space #if defined(__UPC__) loc = (uint64 *)&tab[MY_GTHREAD]; #else loc = &tab[0]; #endif // init all local memory bzero ((void *)&loc[0], NWRDS * sizeof(uint64)); seed = atol (argv[1]); if (MY_GTHREAD == 0) printf ("base seed is %ld\n", seed); seed += (uint64)MY_GTHREAD << 32; brand_init (&br, seed); // seed uniquely per PE arg = 2; while (arg < argc) { msize = atol (argv[arg++]); if (arg >= argc) break; tsize = atol (argv[arg++]) * (1L << 20); if (arg >= argc) break; rep = atol (argv[arg++]); if (MY_GTHREAD == 0) printf ("tsize = %ldMB msize = %5ldB\n", tsize/(1L<<20), msize); if (msize < sizeof(long)) { if (MY_GTHREAD == 0) printf ("msize must be > %ld B\n", (int64)sizeof(long)); goto DONE; } if (tsize > (NWRDS * sizeof(long))) { if (MY_GTHREAD == 0) printf ("tsize must be < %ld MiB\n", (int64)(NWRDS * sizeof(long)) / (1uL<<20)); goto DONE; } // exits on error do_all2all (tab, loc, &br, msize, tsize, rep, 1); if (MY_GTHREAD == 0) printf ("\n"); } // free up the shared memory mpp_free (tab); DONE: mpp_barrier_all(); close_df_mmu(); //mpp_finalize(); return 0; }