示例#1
0
/* This tester measures the performance of contended HSLs and pthread mutexes.
 */
int main(int argc, char **argv) {
  

  GASNET_Safe(gex_Client_Init(&myclient, &myep, &myteam, "testlockcontend", &argc, &argv, 0));
  GASNET_Safe(gex_Segment_Attach(&mysegment, myteam, TEST_SEGSZ_REQUEST));
  test_init("testlockcontend",1,"(maxthreads) (iters) (accuracy) (test sections)");

  if (argc > 1) maxthreads = atoi(argv[1]);
  maxthreads = test_thread_limit(maxthreads);
  if (maxthreads < 1) {
    printf("Threads must be between 1 and %i\n", TEST_MAXTHREADS);
    gasnet_exit(-1);
  }

  if (argc > 2) iters = atoi(argv[2]);
  if (!iters) iters = 1000000;

  if (argc > 3) accuracy = atoi(argv[3]);
  if (!accuracy) accuracy = 3;

  if (argc > 4) TEST_SECTION_PARSE(argv[4]);

  if (argc > 5) test_usage();

  mynode = gex_TM_QueryRank(myteam);
  myseg = TEST_MYSEG();

  if (mynode == 0) {
    printf("Running locks performance test with 1..%i threads and %i iterations...\n",maxthreads,iters);
    fflush(stdout);
    MSG0("Spawning pthreads...");
    if (TEST_SECTION_BEGIN_ENABLED()) {
      header("lock/unlock contended pthread mutex (others in thread barrier)");
      test_createandjoin_pthreads(threads = maxthreads, &thread_fn1, NULL, 0);
    }
    if (TEST_SECTION_BEGIN_ENABLED()) {
      header("lock/unlock contended HSL (others in thread barrier)");
      test_createandjoin_pthreads(threads = maxthreads, &thread_fn2, NULL, 0);
    }
    if (TEST_SECTION_BEGIN_ENABLED()) {
      header("lock/unlock contended pthread mutex (no other threads)");
      for (threads=1; threads<=maxthreads; ++threads) {
	test_createandjoin_pthreads(threads, &thread_fn3, NULL, 0);
      }
    }
    if (TEST_SECTION_BEGIN_ENABLED()) {
      header("lock/unlock contended HSL (no other threads)");
      for (threads=1; threads<=maxthreads; ++threads) {
	test_createandjoin_pthreads(threads, &thread_fn4, NULL, 0);
      }
    }
  }

  BARRIER();
  MSG("done.");

  gasnet_exit(0);
  return 0;
}
示例#2
0
文件: testam.c 项目: bradcray/chapel
/* ------------------------------------------------------------------------------------ */
void doAMShort(void) {
    GASNET_BEGIN_FUNCTION();

    if (sender) { /* warm-up */
      flag = 0;                                                                                  
      for (i=0; i < iters; i++) {
        GASNET_Safe(gasnet_AMRequestShort0(peer, hidx_ping_shorthandler_flood));
      }
      GASNET_BLOCKUNTIL(flag == iters);
      GASNET_Safe(gasnet_AMRequestShort0(peer, hidx_ping_shorthandler));
      GASNET_BLOCKUNTIL(flag == iters+1);
    }
    BARRIER();
    /* ------------------------------------------------------------------------------------ */
    if (TEST_SECTION_BEGIN_ENABLED() && sender) {
      int64_t start = TIME();
      flag = -1;
      for (i=0; i < iters; i++) {
        GASNET_Safe(gasnet_AMRequestShort0(peer, hidx_ping_shorthandler));
        GASNET_BLOCKUNTIL(flag == i);
      }
      report("        AMShort     ping-pong roundtrip ReqRep",TIME() - start, iters, 0, 1);
    }

    BARRIER();
    /* ------------------------------------------------------------------------------------ */
    if (TEST_SECTION_ENABLED()) {
      int64_t start = TIME();
      flag = -1;
      BARRIER();
      if (sender && recvr) {
        assert(peer == mynode);
        for (i=0; i < iters; i++) {
          int lim = i << 1;
          GASNET_Safe(gasnet_AMRequestShort0(peer, hidx_pong_shorthandler));
          GASNET_BLOCKUNTIL(flag == lim);
          lim++;
          GASNET_Safe(gasnet_AMRequestShort0(peer, hidx_pong_shorthandler));
          GASNET_BLOCKUNTIL(flag == lim);
        }
      } else if (sender) {
        for (i=0; i < iters; i++) {
          GASNET_Safe(gasnet_AMRequestShort0(peer, hidx_pong_shorthandler));
          GASNET_BLOCKUNTIL(flag == i);
        }
      } else if (recvr) {
        for (i=0; i < iters; i++) {
          GASNET_BLOCKUNTIL(flag == i);
          GASNET_Safe(gasnet_AMRequestShort0(peer, hidx_pong_shorthandler));
        }
      }
      report("        AMShort     ping-pong roundtrip ReqReq",TIME() - start, iters, 0, 1);

    if (mynode == 0) { printf("\n"); fflush(stdout); }
    }
    BARRIER();
    /* ------------------------------------------------------------------------------------ */
   if (TEST_SECTION_ENABLED()) {
    if (sender) {
      int64_t start = TIME();
      flag = 0;
      BARRIER();
      for (i=0; i < iters; i++) {
        GASNET_Safe(gasnet_AMRequestShort0(peer, hidx_pong_shorthandler_flood));
      }
      if (recvr) GASNET_BLOCKUNTIL(flag == iters);
      BARRIER();
      report("        AMShort     flood     one-way   Req",TIME() - start, iters, 0, 0);
    } else {
      flag = 0;
      BARRIER();
      GASNET_BLOCKUNTIL(flag == iters);
      BARRIER();
    }

    if (mynode == 0) { printf("\n"); fflush(stdout); }
   }
    BARRIER();
    /* ------------------------------------------------------------------------------------ */
    if (TEST_SECTION_ENABLED() && sender) {
      int64_t start = TIME();
      flag = 0;
      for (i=0; i < iters; i++) {
        GASNET_Safe(gasnet_AMRequestShort0(peer, hidx_ping_shorthandler_flood));
      }
      GASNET_BLOCKUNTIL(flag == iters);
      report("        AMShort     flood     roundtrip ReqRep",TIME() - start, iters, 0, 1);

      if (mynode == 0) { printf("\n"); fflush(stdout); }
    }
    BARRIER();
}
示例#3
0
int main(int argc, char **argv) {
  int iters = 0;
  int arg;
  void *alloc = NULL;
  int firstlastmode = 0;
  int fullduplexmode = 0;
  int crossmachinemode = 0;
  int singlesender = 0;
  int help = 0;   

  /* call startup */
  GASNET_Safe(gasnet_init(&argc, &argv));

  /* parse arguments */
  arg = 1;
  while (argc > arg) {
    if (!strcmp(argv[arg], "-in")) {
      insegment = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-out")) {
      insegment = 0;
      ++arg;
    } else if (!strcmp(argv[arg], "-sl")) {
      ++arg;
      if (argc > arg) { stridelevels = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-mincontig")) {
      ++arg;
      if (argc > arg) { min_contig = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-maxcontig")) {
      ++arg;
      if (argc > arg) { max_contig = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-contigfactor")) {
      ++arg;
      if (argc > arg) { contigfactor = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-mindata")) {
      ++arg;
      if (argc > arg) { min_payload = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-maxdata")) {
      ++arg;
      if (argc > arg) { max_payload = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-datafactor")) {
      ++arg;
      if (argc > arg) { datafactor = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-densitysteps")) {
      ++arg;
      if (argc > arg) { densitysteps = atoi(argv[arg]); arg++; }
      else help = 1;
    } else if (!strcmp(argv[arg], "-f")) {
      firstlastmode = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-c")) {
      crossmachinemode = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-a")) {
      fullduplexmode = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-p")) {
      dogets = 0; doputs = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-g")) {
      dogets = 1; doputs = 0;
      ++arg;
    } else if (!strcmp(argv[arg], "-r")) {
      remotecontig = 1;
      ++arg;
    } else if (!strcmp(argv[arg], "-l")) {
      localcontig = 1;
      ++arg;
    } else if (argv[arg][0] == '-') {
      help = 1;
      ++arg;
    } else break;
  }

  if (argc > arg) { iters = atoi(argv[arg]); arg++; }
  if (!iters) iters = 1000;
  if (argc > arg) { TEST_SECTION_PARSE(argv[arg]); arg++; }
  if (min_contig && max_contig && min_contig > max_contig) { ERR("min_contig > max_contig"); help = 1; }
  if (min_payload && max_payload && min_payload > max_payload) { ERR("min_payload > max_payload"); help = 1; }
  if (min_payload && min_contig && min_payload < min_contig) { ERR("min_payload < min_contig"); help = 1; }
  if (max_contig && max_payload && max_contig > max_payload) { ERR("max_contig > max_payload"); help = 1; }
  if (contigfactor < 2) { ERR("contigfactor < 2"); help = 1; }
  if (datafactor < 2) { ERR("datafactor < 2"); help = 1; }

  if (!max_payload) max_payload = 2*1024*1024; /* 2 MB default */
  #ifdef GASNET_SEGMENT_EVERYTHING
    maxsz = gasnet_getMaxGlobalSegmentSize();
  #else
    maxsz = 16*1024*1024;
  #endif
  max_payload = (int)MIN(maxsz, max_payload);
  maxsz = MIN(((uint64_t)max_payload) * densitysteps,maxsz);
  if (!min_contig) min_contig = 8;
  if (!max_contig) max_contig = MIN(256*1024,max_payload);
  if (!min_payload) min_payload = min_contig;

  GASNET_Safe(gasnet_attach(NULL, 0, TEST_SEGSZ_REQUEST, TEST_MINHEAPOFFSET));
  test_init("testvisperf",1, "[options] (iters) (test_sections)\n"
             "  -p/-g     selects puts only or gets only (default is both).\n"
             "  -r/-l     selects remotely contiguous or locally contiguous (default is neither).\n"
             "  -mindata/-maxdata <sz>   \n"
             "            selects sz as min/max data payload per operation.\n"
             "  -mincontig/-maxcontig <sz>   \n"
             "            selects sz as min/max contig size.\n"
             "  -datafactor/-contigfactor <f>   \n"
             "            selects f as growth factor for data/contig sizes.\n"
             "  -densitysteps <d>   \n"
             "            selects d density steps, inclusive from 100%..100/d%\n"
             "  -sl <n>   selects n striding levels (default is 2).\n"
             "  -in/-out  selects whether the initiator-side\n"
             "            memory is in the GASNet segment or not (default is not).\n"
             "  -a        enables full-duplex mode, where all nodes send.\n"
             "  -c        enables cross-machine pairing, default is nearest neighbor.\n"
             "  -f        enables 'first/last' mode, where the first/last\n"
             "            nodes communicate with each other, while all other nodes sit idle.");
  if (help || argc > arg) test_usage();

  /* get SPMD info */
  myproc = gasnet_mynode();
  numprocs = gasnet_nodes();

  if (!firstlastmode) {
    /* Only allow 1 or even number for numprocs */
    if (numprocs > 1 && numprocs % 2 != 0) {
      MSG0("WARNING: This test requires a unary or even number of nodes. Test skipped.\n");
      gasnet_exit(0); /* exit 0 to prevent false negatives in test harnesses for smp-conduit */
    }
  }

  /* Setting peer thread rank */
  if (firstlastmode) {
    peerproc = (myproc == 0 ? numprocs-1 : 0);
    iamsender = (fullduplexmode ? myproc == 0 || myproc == numprocs-1 : myproc == 0);
  } else if (numprocs == 1) {
    peerproc = 0;
    iamsender = 1;
  } else if (crossmachinemode) {
    if (myproc < numprocs / 2) {
      peerproc = myproc + numprocs/2;
      iamsender = 1;
    } else {
      peerproc = myproc - numprocs/2;
      iamsender = fullduplexmode;
    }
  } else { 
    peerproc = (myproc % 2) ? (myproc - 1) : (myproc + 1);
    iamsender = (fullduplexmode || myproc % 2 == 0);
  }
  singlesender = (numprocs == 1) || ((numprocs == 2 || firstlastmode) && !fullduplexmode);

  Rbase = TEST_SEG(peerproc);

  if (insegment) {
    Lbase = TEST_SEG(myproc);
  } else {
    alloc = test_calloc(maxsz+PAGESZ,1); /* use calloc to prevent valgrind warnings */
    Lbase = alignup_ptr(alloc, PAGESZ); /* ensure page alignment of base */
  }
  assert(((uintptr_t)Lbase) % PAGESZ == 0);

  if (myproc == 0) {
    MSG0("Running %i iterations of %s%s%snon-contiguous put/get%s%s\n local data %s-segment for sizes: %i...%i\n", 
    iters, 
    (firstlastmode ? "first/last " : ""),
    (fullduplexmode ? "full-duplex ": ""),
    (crossmachinemode ? "cross-machine ": ""),
    (remotecontig?"(remotely-contiguous)":""),
    (localcontig?"(locally-contiguous)":""),
    insegment ? "in" : "out", 
    min_payload, max_payload);
    printf("rows are databytes/op : bandwidth values in MB/s\n");
  }
  BARRIER();

  { int contigsz;
    int rawdatasz;
    int isget;
    test_vis_t viscat;
    for (viscat = TEST_V; viscat <= TEST_S; viscat++) {
    for (isget = 0; isget < 2; isget++) {
      if (TEST_SECTION_BEGIN_ENABLED()) {
        if (isget && !dogets) continue;
        if (!isget && !doputs) continue;
        if (!dovis[viscat]) continue;
        for (contigsz = min_contig; contigsz <= max_contig; contigsz *= contigfactor) {
          int di;
          size_t lastdatasz = 0;
          if (contigsz > max_payload) continue;
          if (!myproc) {
            printf("\n%c: %s %s CONTIGSZ = %i\n", TEST_SECTION_NAME(),
                        visdesc[(int)viscat], (isget?"GET":"PUT"), contigsz);
            printf(" density:");
            for (di = 0; di < densitysteps; di++) {
              printf("%8i%%", (int)((densitysteps-di)*100.0/densitysteps));
            }
            printf("\n");
          }
          for (rawdatasz = min_payload; rawdatasz <= max_payload; rawdatasz *= datafactor) {
            char mystr[255];
            size_t datasz = aligndown(rawdatasz,contigsz);
            if (datasz == lastdatasz) continue;
            lastdatasz = datasz;
            if (singlesender) snprintf(mystr, sizeof(mystr), "%8i: ", (int)datasz);
            else  snprintf(mystr, sizeof(mystr), "P%i: %6i: ", myproc, (int)datasz);
            for (di = 0; di < densitysteps; di++) {
              gasnett_tick_t begin=0, end=0;
              size_t Lcnt = (localcontig ? 1 : datasz/contigsz);
              size_t Rcnt = (remotecontig? 1 : datasz/contigsz);
              size_t Lsz = datasz/Lcnt;
              size_t Rsz = datasz/Rcnt;
              void **Lilist = NULL;
              void **Rilist = NULL;
              gasnet_memvec_t *Lvlist = NULL;
              gasnet_memvec_t *Rvlist = NULL;
              size_t *Lstrides = NULL;
              size_t *Rstrides = NULL;
              size_t *LRcount = NULL;
              size_t stride = contigsz*(((double)densitysteps)/(densitysteps-di));
              if (stride * MAX(Lcnt,Rcnt) > maxsz) { strcat(mystr,"    -   "); continue; }

              if (iamsender) { /* setup metadata */
                switch (viscat) {
                  case TEST_V: 
                    Lvlist = make_vlist(Lbase, stride, Lcnt, Lsz);
                    Rvlist = make_vlist(Rbase, stride, Rcnt, Rsz);
                    break;
                  case TEST_I: 
                    Lilist = make_ilist(Lbase, stride, Lcnt, Lsz);
                    Rilist = make_ilist(Rbase, stride, Rcnt, Rsz);
                    break;
                  case TEST_S: {
                    size_t chunkcnt = datasz/contigsz;
                    int dim;
                    Lstrides = test_malloc(sizeof(size_t)*stridelevels);
                    Rstrides = test_malloc(sizeof(size_t)*stridelevels);
                    LRcount = test_malloc(sizeof(size_t)*(stridelevels+1));
                    LRcount[0] = contigsz;
                    Lstrides[0] = (localcontig ? contigsz : stride);
                    Rstrides[0] = (remotecontig ? contigsz : stride);
                    for (dim = 1; dim < stridelevels; dim++) {
                      size_t factor = 1, fi;
                      for (fi = 1; fi <= chunkcnt/(2*(stridelevels-dim)); fi++) /* choose a reasonable factor */
                        if (chunkcnt/fi*fi == chunkcnt) factor = fi;
                      LRcount[dim] = factor;
                      chunkcnt /= factor;
                      Lstrides[dim] = LRcount[dim]*Lstrides[dim-1];
                      Rstrides[dim] = LRcount[dim]*Rstrides[dim-1];
                    }
                    LRcount[stridelevels] = chunkcnt;
                    { size_t tmp = 1;
                      for (dim = 0; dim <= stridelevels; dim++) tmp *= LRcount[dim];
                      assert(tmp == datasz);
                    }
                    break;
                  }
                }
              }
              #define DOIT(iters) do {                                                           \
                int i;                                                                           \
                switch (viscat) {                                                                \
                  case TEST_V:                                                                   \
                    for (i = 0; i < iters; i++) {                                                \
                      if (isget) gasnet_getv_nbi_bulk(Lcnt,Lvlist,peerproc,Rcnt,Rvlist);         \
                      else gasnet_putv_nbi_bulk(peerproc,Rcnt,Rvlist,Lcnt,Lvlist);               \
                    }                                                                            \
                    break;                                                                       \
                  case TEST_I:                                                                   \
                    for (i = 0; i < iters; i++) {                                                \
                      if (isget) gasnet_geti_nbi_bulk(Lcnt,Lilist,Lsz,peerproc,Rcnt,Rilist,Rsz); \
                      else gasnet_puti_nbi_bulk(peerproc,Rcnt,Rilist,Rsz,Lcnt,Lilist,Lsz);       \
                    }                                                                            \
                    break;                                                                       \
                  case TEST_S:                                                                   \
                    for (i = 0; i < iters; i++) {                                                \
                      if (isget) gasnet_gets_nbi_bulk(Lbase,Lstrides,peerproc,Rbase,Rstrides,    \
                                                      LRcount,stridelevels);                     \
                      else gasnet_puts_nbi_bulk(peerproc,Rbase,Rstrides,Lbase,Lstrides,          \
                                                LRcount,stridelevels);                           \
                    }                                                                            \
                    break;                                                                       \
                }                                                                                \
                gasnet_wait_syncnbi_all();                                                       \
              } while (0)
              if (iamsender) DOIT(1); /* pay some warm-up costs */
              BARRIER();
              if (iamsender) { 
  	        begin = gasnett_ticks_now();
                DOIT(iters);
	        end = gasnett_ticks_now();
              }
              BARRIER();
              if (iamsender) { 
                char tmp[80];
                double secs = gasnett_ticks_to_ns(end - begin)/1.0E9;
                double dataMB = ((double)datasz) * iters / (1024*1024);
                snprintf(tmp, sizeof(tmp), " %8.3f", dataMB / secs);
                strcat(mystr, tmp);
              }
              if (Lilist) test_free(Lilist);
              if (Rilist) test_free(Rilist);
              if (Lvlist) test_free(Lvlist);
              if (Rvlist) test_free(Rvlist);
              if (Lstrides) test_free(Lstrides);
              if (Rstrides) test_free(Rstrides);
              if (LRcount) test_free(LRcount);
            }
            if (iamsender) { printf("%s\n", mystr); fflush(stdout); }
            BARRIER();
          }
        }
      }
    }
    }
  }

  BARRIER();
  if (alloc) test_free(alloc);

  gasnet_exit(0);

  return 0;
}
示例#4
0
int main(int argc, char **argv)
{
    int iters = 0;
    int arg;
    void *myseg;
    int firstlastmode = 0;
    int fullduplexmode = 0;
    int crossmachinemode = 0;
    int skipwarmup = 0;
    int help = 0;   

    /* call startup */
    GASNET_Safe(gasnet_init(&argc, &argv));

    /* parse arguments */
    arg = 1;
    while (argc > arg) {
      if (!strcmp(argv[arg], "-f")) {
        firstlastmode = 1;
        ++arg;
      } else if (!strcmp(argv[arg], "-c")) {
        crossmachinemode = 1;
        ++arg;
      } else if (!strcmp(argv[arg], "-a")) {
        fullduplexmode = 1;
        ++arg;
      } else if (!strcmp(argv[arg], "-m")) {
        unitsMB = 1;
        ++arg;
      } else if (!strcmp(argv[arg], "-s")) {
        skipwarmup = 1;
        ++arg;
      } else if (argv[arg][0] == '-') {
        help = 1;
        ++arg;
      } else break;
    }

    if (argc > arg) { iters = atoi(argv[arg]); arg++; }
    if (!iters) iters = 1000;
    if (argc > arg) { maxsz = atoi(argv[arg]); arg++; }
    if (!maxsz) maxsz = 1024*1024; /* 1 MB default */
    if (argc > arg) { TEST_SECTION_PARSE(argv[arg]); arg++; }

    #ifdef GASNET_SEGMENT_EVERYTHING
      if (maxsz > TEST_SEGSZ) { ERR("maxsz must be <= %lu on GASNET_SEGMENT_EVERYTHING",(unsigned long)TEST_SEGSZ); gasnet_exit(1); }
    #endif
    GASNET_Safe(gasnet_attach(NULL, 0, TEST_SEGSZ_REQUEST, TEST_MINHEAPOFFSET));
    test_init("testmemset",1, "[options] (iters) (maxsz) (test_sections)\n"
               "  The -s option skips warm-up iterations\n"
               "  The -m option enables MB/sec units for bandwidth output (MB=2^20 bytes).\n"
               "  The -a option enables full-duplex mode, where all nodes send.\n"
               "  The -c option enables cross-machine pairing, default is nearest neighbor.\n"
               "  The -f option enables 'first/last' mode, where the first/last\n"
               "   nodes communicate with each other, while all other nodes sit idle.");
    if (help || argc > arg) test_usage();
    
    min_payload = 1;
    max_payload = maxsz;

    if (max_payload < min_payload) {
      ERR("maxsz must be >= %i\n",min_payload);
      test_usage();
    }

    /* get SPMD info */
    myproc = gasnet_mynode();
    numprocs = gasnet_nodes();

    if (!firstlastmode) {
      /* Only allow 1 or even number for numprocs */
      if (numprocs > 1 && numprocs % 2 != 0) {
        MSG("WARNING: This test requires a unary or even number of nodes. Test skipped.\n");
        gasnet_exit(0); /* exit 0 to prevent false negatives in test harnesses for smp-conduit */
      }
    }
    
    /* Setting peer thread rank */
    if (firstlastmode) {
      peerproc = (myproc == 0 ? numprocs-1 : 0);
      iamsender = (fullduplexmode ? myproc == 0 || myproc == numprocs-1 : myproc == 0);
    } else if (numprocs == 1) {
      peerproc = 0;
      iamsender = 1;
    } else if (crossmachinemode) {
      if (myproc < numprocs / 2) {
        peerproc = myproc + numprocs/2;
        iamsender = 1;
      } else {
        peerproc = myproc - numprocs/2;
        iamsender = fullduplexmode;
      }
    } else { 
      peerproc = (myproc % 2) ? (myproc - 1) : (myproc + 1);
      iamsender = (fullduplexmode || myproc % 2 == 0);
    }

    myseg = TEST_SEG(myproc);
    tgtmem = TEST_SEG(peerproc);

        if (myproc == 0) 
          MSG("Running %i iterations of %s%s%smemset for sizes: %i...%i\n", 
          iters, 
          (firstlastmode ? "first/last " : ""),
          (fullduplexmode ? "full-duplex ": ""),
          (crossmachinemode ? "cross-machine ": ""),
          min_payload, max_payload);
        BARRIER();

        if (iamsender && !skipwarmup) { /* pay some warm-up costs */
           int i;
           int warm_iters = MIN(iters, 32767);	/* avoid hitting 65535-handle limit */
           gasnet_handle_t *h = test_malloc(sizeof(gasnet_handle_t)*warm_iters);
           for (i = 0; i < warm_iters; i++) {
              gasnet_memset(peerproc, tgtmem, 0xff, 8);
              gasnet_memset_nbi(peerproc, tgtmem, 0xff, 8);
              h[i] = gasnet_memset_nb(peerproc, tgtmem, 0xff, 8);
           }
           gasnet_memset(peerproc, tgtmem, 0xff, max_payload);
           gasnet_wait_syncnb_all(h, warm_iters);
           gasnet_wait_syncnbi_puts();
           test_free(h);
        }

        BARRIER();

	if (TEST_SECTION_BEGIN_ENABLED()) bulk_test(iters);
	if (TEST_SECTION_BEGIN_ENABLED()) bulk_test_nbi(iters);
	if (TEST_SECTION_BEGIN_ENABLED()) bulk_test_nb(iters);

        BARRIER();

    gasnet_exit(0);

    return 0;

}