void smp_coll_tune_barrier(smp_coll_t handle) {
  int iter;
  int i;
  gasnett_tick_t start,stop;
  int radix=2;
  double best_time=1e20;
  double time;
  int barrier_iters=gasneti_getenv_int_withdefault("GASNET_COLL_TUNE_SMP_BARRIER_ITER", 1000, 0);
  int root;
  static int best_barrier_radix = 2;
  static int best_barrier_routine = SMP_COLL_BARRIER_DISSEM_ATOMIC;
  static int best_root = 0;

#if VERBOSE_TUNING
  if(handle->MYTHREAD==0) fprintf(stderr, "starting autotuning of local barrier\n");
#endif
  for(root=0; root<1; root++) {

    if(handle->MYTHREAD==0 && VERBOSE_TUNING) fprintf(stderr, "ROOT: %d\n", root);
    for(i=0; i<SMP_COLL_NUM_BARR_ROUTINES; i++) {
      if(i==SMP_COLL_BARRIER_COND_VAR) continue;
      if(handle->MYTHREAD==0 && VERBOSE_TUNING) fprintf(stderr, "\t routine: %d\n",(int)i);
      for(radix=2; radix<=handle->THREADS; radix*=2) {
        
        
        
        if(i==SMP_COLL_BARRIER_COND_VAR && radix>2) continue;
        if(i==SMP_COLL_BARRIER_PTHREAD && radix>2) continue;
        if(handle->MYTHREAD==0 && VERBOSE_TUNING) fprintf(stderr, "\t\t radix: %d\n",radix);
        smp_coll_set_barrier_routine_with_root(handle, (smp_coll_barrier_routine_t)i, radix, root);
        
        start = gasnett_ticks_now();
        for(iter = 0; iter<barrier_iters; iter++) {
          smp_coll_barrier(handle, 0);
        }
        stop = gasnett_ticks_now();
        smp_coll_safe_barrier(handle, 0);
        time = ((double) gasnett_ticks_to_ns(stop-start))/barrier_iters;
        
        
        if(time < best_time && handle->MYTHREAD==0) {
          best_barrier_radix=radix;
          best_barrier_routine = i;
          best_time = time;
          best_root = root;
        }
        smp_coll_reset_all_flags(handle);
        /*run a root agnostic barrier*/
        smp_coll_safe_barrier(handle, 0);
      }
    }
  }
#if VERBOSE_TUNING
  if(handle->MYTHREAD==0) fprintf(stderr, "setting best barrier: routine: %d radix: %d root: %d time: %g ns\n", best_barrier_routine, best_barrier_radix, best_root, best_time);
#endif
  smp_coll_set_barrier_routine_with_root(handle, (smp_coll_barrier_routine_t)best_barrier_routine, best_barrier_radix, best_root);
  
  
}
Exemple #2
0
void run() {

  unsigned i;
  gasnett_tick_t start, end;

  hashtable_create(TABLE_SIZE);

  BARRIER();

  start = gasnett_ticks_now();
  for (i = 0; i < WRITES_PER_PROC; ++i) {
    hashtable_insert(grt_id * WRITES_PER_PROC + i);
  }
  end = gasnett_ticks_now();
  unsigned time = ((unsigned) gasnett_ticks_to_us(end - start));
  printf("processor %u: insertion time=%f us\n", 
	 grt_id, (double) time);
  fflush(stdout);
  grt_write(0, time, &times[grt_id]);

  BARRIER();

  /* sanity check */
  for (i = 0; i < WRITES_PER_PROC; ++i) {
    grt_word_t num = grt_id * WRITES_PER_PROC + i;
    grt_bool_t found = hashtable_find(num);
    if (!found) {
      fprintf(stderr, "processor %d: expected to find %d\n", 
	      grt_id, (int) num);
    }
  }

  BARRIER();

  if (grt_id == 0) {
    time = 0, max_time = 0;
    for (i = 0; i < grt_num_procs; ++i) {
      gasnett_tick_t this_time = times[i];
      time += this_time;
      if (this_time >= max_time) max_time = this_time;
    }
    time_per_op = ((float) time) / NUM_WRITES;
    printf("total CPU time=%f us\n", (double) time);
    printf("time per operation=%f us\n", time_per_op);
    printf("max time=%f us\n", (double) max_time);
  }

  BARRIER();

  hashtable_destroy();

  BARRIER();

}
Exemple #3
0
void
test_sleep(threaddata_t *tdata)
{
	unsigned	usecs = (unsigned) sleep_min_us + 
				(rand() % (sleep_max_us - sleep_min_us));
	ACTION_PRINTF("tid=%3d> sleeping %.3f millisecs", tdata->tid, usecs/1000.0);
        { uint64_t goal = gasnett_ticks_to_us(gasnett_ticks_now()) + usecs;
          while (gasnett_ticks_to_us(gasnett_ticks_now()) < goal) 
            gasnett_sched_yield();
        }
	ACTION_PRINTF("tid=%3d> awaking", tdata->tid);
}
Exemple #4
0
void run() {

  unsigned i, time;
  gasnett_tick_t start, end;

  hashset_create(params[HASHSET_SIZE], params[ON_PTHREAD]);

  BARRIER();

  start = gasnett_ticks_now();
  for (i = 0; i < MY_NUM_OPS; ++i) {
    if (put_flags[i] == GRT_TRUE) {
      hashset_insert(numbers[i]);
    } else {
      hashset_find(numbers[i]);
    }
  }
  end = gasnett_ticks_now();
  time = ((unsigned) gasnett_ticks_to_us(end - start));
  //printf("processor %u: execution time=%f us\n", 
  //	 grt_id, (double) time);
  fflush(stdout);
  grt_write(0, time, &times[grt_id]);

  BARRIER();

  if (grt_id == 0) {
    time = 0, max_time = 0;
    for (i = 0; i < grt_num_procs; ++i) {
      gasnett_tick_t this_time = times[i];
      time += this_time;
      if (this_time >= max_time) max_time = this_time;
    }
    time_per_op = ((float) time) / params[NUM_OPS];
    printf("total CPU time=%f us\n", (double) time);
    printf("time per operation=%f us\n", time_per_op);
    printf("max time=%f us\n", (double) max_time);
  }

  BARRIER();

  hashset_destroy();

  BARRIER();

}
Exemple #5
0
/* ------------------------------------------------------------------------------------ */
void doit1(void) {
    GASNET_BEGIN_FUNCTION();

    {
        int i;
        for (i=0; i<8; i++) handles[i] = GASNET_INVALID_HANDLE;
    }

    TEST_SECTION_BEGIN();
    TIME_OPERATION("Tester overhead", {});

    TIME_OPERATION("gasnett_ticks_now()",
    { timertemp = gasnett_ticks_now(); });
Exemple #6
0
void run() {

  unsigned i, time;
  gasnett_tick_t start, end;

  hash_map_create(params[HASHMAP_SIZE], (grt_bool_t) params[ON_PTHREAD]);
  grt_barrier();

#ifdef LOCKS
  grt_lock_state_t state;
#endif
  for (i = 0; i < MY_NUM_OPS; ++i) {
    grt_word_t key = keys[i], val = values[i];
#ifdef LOCKS
    hash_t hash = compute_hash(key);
    hash_map_lock(hash.proc, hash.offset, WRITE, &state);
#endif
    hash_map_insert(key, val);
#ifdef LOCKS
    hash_map_unlock(hash.proc, hash.offset);
#endif
  }

  BARRIER();

  start = gasnett_ticks_now();

#ifdef LOCKS
  grt_lock_state_t state1, state2;
#endif
  for (i = 0; i < MY_NUM_OPS; ++i) {
    unsigned idx = grt_random_next() * MY_NUM_OPS;
    grt_word_t key1 = keys[i];
    unsigned second_idx = grt_random_next() * MY_NUM_OPS;
    grt_word_t key2 = keys[second_idx];
#ifdef LOCKS
    lock(key1, key2, &state1, &state2);
#endif
    grt_word_t val1, val2;
#ifndef LOCKS
#ifndef NOLOCKS
    stm_start(grt_id);
#endif
#endif
    grt_bool_t found1 = hash_map_find(key1, &val1);
    grt_bool_t found2 = hash_map_find(key2, &val2);
    hash_map_insert(key1, val2);
    hash_map_insert(key2, val1);
#ifndef LOCKS
#ifndef NOLOCKS
    stm_commit(grt_id);
#endif
#endif
#if LOCKS
    unlock(key1, key2);
#endif
  }

  end = gasnett_ticks_now();
  time = ((unsigned) gasnett_ticks_to_us(end - start));
  printf("processor %u: execution time=%f us\n", 
	 grt_id, (double) time);
  fflush(stdout);
  grt_write(0, time, &times[grt_id]);

  BARRIER();

  if (grt_id == 0) {
    time = 0, max_time = 0;
    for (i = 0; i < grt_num_procs; ++i) {
      gasnett_tick_t this_time = times[i];
      time += this_time;
      if (this_time >= max_time) max_time = this_time;
    }
    time_per_op = ((float) time) / params[NUM_OPS];
    printf("total CPU time=%f us\n", (double) time);
    printf("time per operation=%f us\n", time_per_op);
    printf("max time=%f us\n", (double) max_time);
  }

  BARRIER();

  hash_map_destroy();

  BARRIER();

}
Exemple #7
0
int main(int argc, char **argv) {
  int iters = 0;
  int arg;
  void *alloc = NULL;
  int firstlastmode = 0;
  int fullduplexmode = 0;
  int crossmachinemode = 0;
  int singlesender = 0;
  int help = 0;   

  /* call startup */
  GASNET_Safe(gasnet_init(&argc, &argv));

  /* parse arguments */
  arg = 1;
  while (argc > arg) {
    if (!strcmp(argv[arg], "-in")) {
      insegment = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-out")) {
      insegment = 0;
      ++arg;
    } else if (!strcmp(argv[arg], "-sl")) {
      ++arg;
      if (argc > arg) { stridelevels = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-mincontig")) {
      ++arg;
      if (argc > arg) { min_contig = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-maxcontig")) {
      ++arg;
      if (argc > arg) { max_contig = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-contigfactor")) {
      ++arg;
      if (argc > arg) { contigfactor = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-mindata")) {
      ++arg;
      if (argc > arg) { min_payload = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-maxdata")) {
      ++arg;
      if (argc > arg) { max_payload = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-datafactor")) {
      ++arg;
      if (argc > arg) { datafactor = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-densitysteps")) {
      ++arg;
      if (argc > arg) { densitysteps = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-f")) {
      firstlastmode = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-c")) {
      crossmachinemode = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-a")) {
      fullduplexmode = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-p")) {
      dogets = 0; doputs = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-g")) {
      dogets = 1; doputs = 0;
      ++arg;
    } else if (!strcmp(argv[arg], "-r")) {
      remotecontig = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-l")) {
      localcontig = 1;
      ++arg;
    } else if (argv[arg][0] == '-') {
      help = 1;
      ++arg;
    } else break;
  }

  if (argc > arg) { iters = atoi(argv[arg]); arg++; }
  if (!iters) iters = 1000;
  if (argc > arg) { TEST_SECTION_PARSE(argv[arg]); arg++; }
  if (min_contig && max_contig && min_contig > max_contig) { ERR("min_contig > max_contig"); help = 1; }
  if (min_payload && max_payload && min_payload > max_payload) { ERR("min_payload > max_payload"); help = 1; }
  if (min_payload && min_contig && min_payload < min_contig) { ERR("min_payload < min_contig"); help = 1; }
  if (max_contig && max_payload && max_contig > max_payload) { ERR("max_contig > max_payload"); help = 1; }
  if (contigfactor < 2) { ERR("contigfactor < 2"); help = 1; }
  if (datafactor < 2) { ERR("datafactor < 2"); help = 1; }

  if (!max_payload) max_payload = 2*1024*1024; /* 2 MB default */
  #ifdef GASNET_SEGMENT_EVERYTHING
    maxsz = gasnet_getMaxGlobalSegmentSize();
  #else
    maxsz = 16*1024*1024;
  #endif
  max_payload = (int)MIN(maxsz, max_payload);
  maxsz = MIN(((uint64_t)max_payload) * densitysteps,maxsz);
  if (!min_contig) min_contig = 8;
  if (!max_contig) max_contig = MIN(256*1024,max_payload);
  if (!min_payload) min_payload = min_contig;

  GASNET_Safe(gasnet_attach(NULL, 0, TEST_SEGSZ_REQUEST, TEST_MINHEAPOFFSET));
  test_init("testvisperf",1, "[options] (iters) (test_sections)\n"
             "  -p/-g     selects puts only or gets only (default is both).\n"
             "  -r/-l     selects remotely contiguous or locally contiguous (default is neither).\n"
             "  -mindata/-maxdata <sz>   \n"
             "            selects sz as min/max data payload per operation.\n"
             "  -mincontig/-maxcontig <sz>   \n"
             "            selects sz as min/max contig size.\n"
             "  -datafactor/-contigfactor <f>   \n"
             "            selects f as growth factor for data/contig sizes.\n"
             "  -densitysteps <d>   \n"
             "            selects d density steps, inclusive from 100%..100/d%\n"
             "  -sl <n>   selects n striding levels (default is 2).\n"
             "  -in/-out  selects whether the initiator-side\n"
             "            memory is in the GASNet segment or not (default is not).\n"
             "  -a        enables full-duplex mode, where all nodes send.\n"
             "  -c        enables cross-machine pairing, default is nearest neighbor.\n"
             "  -f        enables 'first/last' mode, where the first/last\n"
             "            nodes communicate with each other, while all other nodes sit idle.");
  if (help || argc > arg) test_usage();

  /* get SPMD info */
  myproc = gasnet_mynode();
  numprocs = gasnet_nodes();

  if (!firstlastmode) {
    /* Only allow 1 or even number for numprocs */
    if (numprocs > 1 && numprocs % 2 != 0) {
      MSG0("WARNING: This test requires a unary or even number of nodes. Test skipped.\n");
      gasnet_exit(0); /* exit 0 to prevent false negatives in test harnesses for smp-conduit */
    }
  }

  /* Setting peer thread rank */
  if (firstlastmode) {
    peerproc = (myproc == 0 ? numprocs-1 : 0);
    iamsender = (fullduplexmode ? myproc == 0 || myproc == numprocs-1 : myproc == 0);
  } else if (numprocs == 1) {
    peerproc = 0;
    iamsender = 1;
  } else if (crossmachinemode) {
    if (myproc < numprocs / 2) {
      peerproc = myproc + numprocs/2;
      iamsender = 1;
    } else {
      peerproc = myproc - numprocs/2;
      iamsender = fullduplexmode;
    }
  } else { 
    peerproc = (myproc % 2) ? (myproc - 1) : (myproc + 1);
    iamsender = (fullduplexmode || myproc % 2 == 0);
  }
  singlesender = (numprocs == 1) || ((numprocs == 2 || firstlastmode) && !fullduplexmode);

  Rbase = TEST_SEG(peerproc);

  if (insegment) {
    Lbase = TEST_SEG(myproc);
  } else {
    alloc = test_calloc(maxsz+PAGESZ,1); /* use calloc to prevent valgrind warnings */
    Lbase = alignup_ptr(alloc, PAGESZ); /* ensure page alignment of base */
  }
  assert(((uintptr_t)Lbase) % PAGESZ == 0);

  if (myproc == 0) {
    MSG0("Running %i iterations of %s%s%snon-contiguous put/get%s%s\n local data %s-segment for sizes: %i...%i\n", 
    iters, 
    (firstlastmode ? "first/last " : ""),
    (fullduplexmode ? "full-duplex ": ""),
    (crossmachinemode ? "cross-machine ": ""),
    (remotecontig?"(remotely-contiguous)":""),
    (localcontig?"(locally-contiguous)":""),
    insegment ? "in" : "out", 
    min_payload, max_payload);
    printf("rows are databytes/op : bandwidth values in MB/s\n");
  }
  BARRIER();

  { int contigsz;
    int rawdatasz;
    int isget;
    test_vis_t viscat;
    for (viscat = TEST_V; viscat <= TEST_S; viscat++) {
    for (isget = 0; isget < 2; isget++) {
      if (TEST_SECTION_BEGIN_ENABLED()) {
        if (isget && !dogets) continue;
        if (!isget && !doputs) continue;
        if (!dovis[viscat]) continue;
        for (contigsz = min_contig; contigsz <= max_contig; contigsz *= contigfactor) {
          int di;
          size_t lastdatasz = 0;
          if (contigsz > max_payload) continue;
          if (!myproc) {
            printf("\n%c: %s %s CONTIGSZ = %i\n", TEST_SECTION_NAME(),
                        visdesc[(int)viscat], (isget?"GET":"PUT"), contigsz);
            printf(" density:");
            for (di = 0; di < densitysteps; di++) {
              printf("%8i%%", (int)((densitysteps-di)*100.0/densitysteps));
            }
            printf("\n");
          }
          for (rawdatasz = min_payload; rawdatasz <= max_payload; rawdatasz *= datafactor) {
            char mystr[255];
            size_t datasz = aligndown(rawdatasz,contigsz);
            if (datasz == lastdatasz) continue;
            lastdatasz = datasz;
            if (singlesender) snprintf(mystr, sizeof(mystr), "%8i: ", (int)datasz);
            else  snprintf(mystr, sizeof(mystr), "P%i: %6i: ", myproc, (int)datasz);
            for (di = 0; di < densitysteps; di++) {
              gasnett_tick_t begin=0, end=0;
              size_t Lcnt = (localcontig ? 1 : datasz/contigsz);
              size_t Rcnt = (remotecontig? 1 : datasz/contigsz);
              size_t Lsz = datasz/Lcnt;
              size_t Rsz = datasz/Rcnt;
              void **Lilist = NULL;
              void **Rilist = NULL;
              gasnet_memvec_t *Lvlist = NULL;
              gasnet_memvec_t *Rvlist = NULL;
              size_t *Lstrides = NULL;
              size_t *Rstrides = NULL;
              size_t *LRcount = NULL;
              size_t stride = contigsz*(((double)densitysteps)/(densitysteps-di));
              if (stride * MAX(Lcnt,Rcnt) > maxsz) { strcat(mystr,"    -   "); continue; }

              if (iamsender) { /* setup metadata */
                switch (viscat) {
                  case TEST_V: 
                    Lvlist = make_vlist(Lbase, stride, Lcnt, Lsz);
                    Rvlist = make_vlist(Rbase, stride, Rcnt, Rsz);
                    break;
                  case TEST_I: 
                    Lilist = make_ilist(Lbase, stride, Lcnt, Lsz);
                    Rilist = make_ilist(Rbase, stride, Rcnt, Rsz);
                    break;
                  case TEST_S: {
                    size_t chunkcnt = datasz/contigsz;
                    int dim;
                    Lstrides = test_malloc(sizeof(size_t)*stridelevels);
                    Rstrides = test_malloc(sizeof(size_t)*stridelevels);
                    LRcount = test_malloc(sizeof(size_t)*(stridelevels+1));
                    LRcount[0] = contigsz;
                    Lstrides[0] = (localcontig ? contigsz : stride);
                    Rstrides[0] = (remotecontig ? contigsz : stride);
                    for (dim = 1; dim < stridelevels; dim++) {
                      size_t factor = 1, fi;
                      for (fi = 1; fi <= chunkcnt/(2*(stridelevels-dim)); fi++) /* choose a reasonable factor */
                        if (chunkcnt/fi*fi == chunkcnt) factor = fi;
                      LRcount[dim] = factor;
                      chunkcnt /= factor;
                      Lstrides[dim] = LRcount[dim]*Lstrides[dim-1];
                      Rstrides[dim] = LRcount[dim]*Rstrides[dim-1];
                    }
                    LRcount[stridelevels] = chunkcnt;
                    { size_t tmp = 1;
                      for (dim = 0; dim <= stridelevels; dim++) tmp *= LRcount[dim];
                      assert(tmp == datasz);
                    }
                    break;
                  }
                }
              }
              #define DOIT(iters) do {                                                           \
                int i;                                                                           \
                switch (viscat) {                                                                \
                  case TEST_V:                                                                   \
                    for (i = 0; i < iters; i++) {                                                \
                      if (isget) gasnet_getv_nbi_bulk(Lcnt,Lvlist,peerproc,Rcnt,Rvlist);         \
                      else gasnet_putv_nbi_bulk(peerproc,Rcnt,Rvlist,Lcnt,Lvlist);               \
                    }                                                                            \
                    break;                                                                       \
                  case TEST_I:                                                                   \
                    for (i = 0; i < iters; i++) {                                                \
                      if (isget) gasnet_geti_nbi_bulk(Lcnt,Lilist,Lsz,peerproc,Rcnt,Rilist,Rsz); \
                      else gasnet_puti_nbi_bulk(peerproc,Rcnt,Rilist,Rsz,Lcnt,Lilist,Lsz);       \
                    }                                                                            \
                    break;                                                                       \
                  case TEST_S:                                                                   \
                    for (i = 0; i < iters; i++) {                                                \
                      if (isget) gasnet_gets_nbi_bulk(Lbase,Lstrides,peerproc,Rbase,Rstrides,    \
                                                      LRcount,stridelevels);                     \
                      else gasnet_puts_nbi_bulk(peerproc,Rbase,Rstrides,Lbase,Lstrides,          \
                                                LRcount,stridelevels);                           \
                    }                                                                            \
                    break;                                                                       \
                }                                                                                \
                gasnet_wait_syncnbi_all();                                                       \
              } while (0)
              if (iamsender) DOIT(1); /* pay some warm-up costs */
              BARRIER();
              if (iamsender) { 
  	        begin = gasnett_ticks_now();
                DOIT(iters);
	        end = gasnett_ticks_now();
              }
              BARRIER();
              if (iamsender) { 
                char tmp[80];
                double secs = gasnett_ticks_to_ns(end - begin)/1.0E9;
                double dataMB = ((double)datasz) * iters / (1024*1024);
                snprintf(tmp, sizeof(tmp), " %8.3f", dataMB / secs);
                strcat(mystr, tmp);
              }
              if (Lilist) test_free(Lilist);
              if (Rilist) test_free(Rilist);
              if (Lvlist) test_free(Lvlist);
              if (Rvlist) test_free(Rvlist);
              if (Lstrides) test_free(Lstrides);
              if (Rstrides) test_free(Rstrides);
              if (LRcount) test_free(LRcount);
            }
            if (iamsender) { printf("%s\n", mystr); fflush(stdout); }
            BARRIER();
          }
        }
      }
    }
    }
  }

  BARRIER();
  if (alloc) test_free(alloc);

  gasnet_exit(0);

  return 0;
}
Exemple #8
0
/* placed in a function to avoid excessive inlining */
gasnett_tick_t ticktime(void) { return gasnett_ticks_now(); }