int main(int argc,char *argv[]) { gaspi_rank_t rank,tnc; gaspi_float vers; gaspi_config_t gconf; char mtype[16]; int i; mcycles_t t0, t1; mcycles_t delta[1024]; gaspi_float cpu_freq; gaspi_config_get(&gconf); gconf.mtu = 4096; gconf.queue_num = 1; gaspi_config_set(gconf); GPI2_ASSERT(gaspi_proc_init(GASPI_BLOCK)); GPI2_ASSERT( gaspi_version(&vers) ); GPI2_ASSERT( gaspi_proc_rank(&rank) ); GPI2_ASSERT( gaspi_proc_num(&tnc) ); GPI2_ASSERT( gaspi_machine_type(mtype) ); GPI2_ASSERT( gaspi_cpu_frequency(&cpu_freq)); if ( 0 == rank) printf("my rank: %d tnc: %d (vers: %.2f) machine:%s\n",rank,tnc,vers,mtype); GPI2_ASSERT(gaspi_barrier(GASPI_GROUP_ALL,GASPI_BLOCK)); if(0 == rank) printf("cpu freq: %.2f\n",cpu_freq); //benchmark for(i = 0; i < 1000; i++) { t0 = get_mcycles(); GPI2_ASSERT(gaspi_barrier(GASPI_GROUP_ALL,GASPI_BLOCK)); t1 = get_mcycles(); delta[i] = (t1 - t0); } if(0 == rank) { qsort(delta,1000,sizeof *delta,mcycles_compare); const double div = 1.0 / cpu_freq; const double ts = (double)delta[500] * div; printf("time: %f usec\n",ts); } fflush(stdout); GPI2_ASSERT(gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); GPI2_ASSERT(gaspi_proc_term(GASPI_BLOCK)); return 0; }
int main(int argc,char *argv[]) { gaspi_rank_t rank,tnc; gaspi_return_t ret; gaspi_float vers; gaspi_config_t gconf; char mtype[16]; int commSize=1,cnt=0; gaspi_number_t queueSize; void *pret; int i,l; mcycles_t t0,t1,dt; mcycles_t stamp[1024], stamp2[1024], delta[1024]; int amount_work = 1000; gaspi_float cpu_freq; gaspi_config_get(&gconf); gconf.mtu = 4096; gconf.queue_num = 1; gaspi_config_set(gconf); GPI2_ASSERT(gaspi_proc_init(GASPI_BLOCK)); GPI2_ASSERT( gaspi_version(&vers) ); GPI2_ASSERT( gaspi_proc_rank(&rank) ); GPI2_ASSERT( gaspi_proc_num(&tnc) ); GPI2_ASSERT( gaspi_machine_type(mtype) ); GPI2_ASSERT( gaspi_cpu_frequency(&cpu_freq)); if(0 == rank) { printf("cpu freq: %.2f\n", cpu_freq); printf("my rank: %d tnc: %d (vers: %.2f) machine:%s\n",rank, tnc, vers, mtype); } GPI2_ASSERT(gaspi_barrier(GASPI_GROUP_ALL,GASPI_BLOCK)); //benchmark for(i = 0; i < 1000; i++) { t0=t1=dt=0; do { t0 = get_mcycles(); ret = gaspi_barrier(GASPI_GROUP_ALL,GASPI_TEST); t1 = get_mcycles(); dt += (t1-t0); usleep(amount_work); //useful work here.. }while(ret!=GASPI_SUCCESS); delta[i]=dt; } if(0 == rank) { qsort(delta,1000,sizeof *delta,mcycles_compare); const double div = 1.0 / cpu_freq; const double ts = (double)delta[500] * div; printf("time: %f usec\n",ts); } GPI2_ASSERT(gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK)); GPI2_ASSERT(gaspi_proc_term(GASPI_BLOCK)); return 0; }
int main (int argc, char *argv[]) { int i, j, t; gaspi_rank_t myrank; char *ptr0; //on numa architectures you have to map this process to the numa node where nic is installed if (start_bench (2) != 0) { printf ("Initialization failed\n"); exit (-1); } // BENCH // gaspi_proc_rank (&myrank); if (gaspi_segment_ptr (0, (void **) &ptr0) != GASPI_SUCCESS) { printf ("gaspi_segment_ptr failed !\n"); exit (-1); } gaspi_float cpu_freq; gaspi_cpu_frequency(&cpu_freq); if (myrank < 2) { if(myrank == 0) { printf("-----------------------------------\n"); printf ("%12s\t%5s\n", "Bytes", "Lat(usecs)"); printf("-----------------------------------\n"); } int bytes = 2; volatile char *postBuf = (volatile char *) ptr0; for (i = 1; i < 24; i++) { volatile char *pollBuf = (volatile char *) (ptr0 + ( 2 * bytes -1 )); int rcnt = 0; int cnt = 0; gaspi_barrier(GASPI_GROUP_ALL, GASPI_BLOCK); for (j = 0; j < ITERATIONS; j++) { if (rcnt < ITERATIONS && !(cnt < 1 && myrank == 1)) { rcnt++; while (*pollBuf != (char) rcnt) { #ifdef MIC _mm_delay_32(32); #else _mm_pause(); #endif } } stamp[j] = get_mcycles (); postBuf[bytes - 1] = (char) ++cnt; gaspi_write (0, 0, myrank ^ 0x1, 0, bytes, bytes, 0, GASPI_BLOCK); gaspi_wait (0, GASPI_BLOCK); } for (t = 0; t < (ITERATIONS - 1); t++) delta[t] = stamp[t + 1] - stamp[t]; qsort (delta, (ITERATIONS - 1), sizeof *delta, mcycles_compare); const double div = 1.0 / cpu_freq; const double ts = (double) delta[ITERATIONS / 2] * div * 0.5; if(myrank == 0) printf ("%12d\t%4.2f\n", bytes, ts); bytes <<= 1; } } end_bench (); return 0; }