int main(int argc, char *argv[]) { FILE *out; /* Output data file */ char s[255]; /* Generic string */ char *memtmp; char *memtmp1; int c, /* option index */ i, j, n, nq, /* Loop indices */ asyncReceive=0, /* Pre-post a receive buffer? */ bufoffset=0, /* Align buffer to this */ bufalign=16*1024,/* Boundary to align buffer to */ errFlag, /* Error occurred in inner testing loop */ nrepeat, /* Number of time to do the transmission */ len, /* Number of bytes to be transmitted */ inc=0, /* Increment value */ trans=-1, /* Transmitter flag. 1 if transmitting. */ detailflag=0, /* Set to examine the signature curve detail */ bufszflag=0, /* Set to change the TCP socket buffer size */ pert, /* Perturbation value */ start=1, /* Starting value for signature curve */ end=MAXINT, /* Ending value for signature curve */ streamopt=0, /* Streaming mode flag */ printopt=0; /* Debug print statements flag */ ArgStruct args; /* Argumentsfor all the calls */ double t, t0, t1, t2, /* Time variables */ tlast, /* Time for the last transmission */ latency; /* Network message latency */ Data bwdata[NSAMP]; /* Bandwidth curve data */ short port=DEFPORT; /* Port number for connection */ #ifdef HAVE_GETRUSAGE struct rusage prev_rusage, curr_rusage; /* Resource usage */ double user_time, sys_time; /* User & system time used */ double best_user_time, best_sys_time; /* Total user & system time used */ double ut1, ut2, st1, st2; /* User & system time ctrs for variance */ double ut_var, st_var; /* Variance in user & system time */ #endif #ifdef MPI MPI_Init(&argc, &argv); #endif strcpy(s, "NetPIPE.out"); #ifndef MPI if(argc < 2) PrintUsage(); #endif /* Parse the arguments. See Usage for description */ while ((c = getopt(argc, argv, "Pstrh:p:o:A:O:l:u:i:b:a")) != -1) { switch(c) { case 'o': strcpy(s,optarg); break; case 't': trans = 1; break; case 'r': trans = 0; break; case 's': streamopt = 1; break; case 'l': /*detailflag = 1;*/ start = atoi(optarg); if (start < 1) { fprintf(stderr,"Need a starting value >= 1\n"); exit(743); } break; case 'u': /*detailflag = 1;*/ end = atoi(optarg); break; case 'i': detailflag = 1; inc = atoi(optarg); break; case 'b': bufszflag = 1; #ifdef TCP args.prot.rcvbufsz=atoi(optarg); args.prot.sndbufsz=args.prot.rcvbufsz; #endif break; case 'P': printopt = 1; break; case 'A': bufalign = atoi(optarg); break; case 'O': bufoffset = atoi(optarg); break; case 'p': port = atoi(optarg); break; case 'h': if (trans == 1) { args.host = (char *)malloc(strlen(optarg)+1); strcpy(args.host, optarg); } else { fprintf(stderr, "Error: -t must be specified before -h\n"); exit(-11); } break; case 'a': asyncReceive = 1; break; default: PrintUsage(); exit(-12); } } if (start > end) { fprintf(stderr, "Start MUST be LESS than end\n"); exit(420132); } #if defined(TCP) || defined(PVM) /* It should be explicitly specified whether this is the transmitter or the receiver. */ if (trans < 0) { fprintf(stderr, "Error: either -t or -r must be specified\n"); exit(-11); } #endif args.nbuff = TRIALS; args.tr = trans; args.port = port; #if defined(TCP) if (!bufszflag) { args.prot.sndbufsz = 0; args.prot.rcvbufsz = 0; } else fprintf(stderr,"Send and Recv Buffers are %d bytes\n", args.prot.sndbufsz); #endif Setup(&args); Establish(&args); if (args.tr) { if ((out = fopen(s, "w")) == NULL) { fprintf(stderr,"Can't open %s for output\n", s); exit(1); } } else out = stdout; args.bufflen = 1; args.buff = (char *)malloc(args.bufflen); args.buff1 = (char *)malloc(args.bufflen); if (asyncReceive) PrepareToReceive(&args); Sync(&args); t0 = When(); t0 = When(); t0 = When(); #ifdef HAVE_GETRUSAGE getrusage(RUSAGE_SELF, &prev_rusage); #endif t0 = When(); for (i = 0; i < LATENCYREPS; i++) { if (args.tr) { SendData(&args); RecvData(&args); if (asyncReceive && (i < LATENCYREPS - 1)) { PrepareToReceive(&args); } } else { RecvData(&args); if (asyncReceive && (i < LATENCYREPS - 1)) { PrepareToReceive(&args); } SendData(&args); } } latency = (When() - t0)/(2 * LATENCYREPS); #ifdef HAVE_GETRUSAGE getrusage(RUSAGE_SELF, &curr_rusage); #endif free(args.buff); free(args.buff1); if (args.tr) { SendTime(&args, &latency); } else { RecvTime(&args, &latency); } if (args.tr && printopt) { fprintf(stderr,"Latency: %.7f\n", latency); fprintf(stderr,"Now starting main loop\n"); } tlast = latency; if (inc == 0) { /* Set a starting value for the message size increment. */ inc = (start > 1) ? start / 2 : 1; } /* Main loop of benchmark */ for (nq = n = 0, len = start, errFlag = 0; n < NSAMP - 3 && tlast < STOPTM && len <= end && !errFlag; len = len + inc, nq++ ) { if (nq > 2 && !detailflag) { /* This has the effect of exponentially increasing the block size. If detailflag is false, then the block size is linearly increased (the increment is not adjusted). */ inc = ((nq % 2))? inc + inc: inc; } /* This is a perturbation loop to test nearby values */ for (pert = (!detailflag && inc > PERT+1)? -PERT: 0; pert <= PERT; n++, pert += (!detailflag && inc > PERT+1)? PERT: PERT+1) { /* Calculate how many times to repeat the experiment. */ if (args.tr) { nrepeat = MAX((RUNTM / ((double)args.bufflen / (args.bufflen - inc + 1.0) * tlast)), TRIALS); SendRepeat(&args, nrepeat); } else { RecvRepeat(&args, &nrepeat); } /* Allocate the buffer */ args.bufflen = len + pert; if((args.buff=(char *)malloc(args.bufflen+bufalign))==(char *)NULL) { fprintf(stderr,"Couldn't allocate memory\n"); errFlag = -1; break; } if((args.buff1=(char *)malloc(args.bufflen+bufalign))==(char *)NULL) { fprintf(stderr,"Couldn't allocate memory\n"); errFlag = -1; break; } /* Possibly align the data buffer: make memtmp and memtmp1 point to the original blocks (so they can be freed later), then adjust args.buff and args.buff1 if the user requested it. */ memtmp = args.buff; memtmp1 = args.buff1; if (bufalign != 0) args.buff +=(bufalign - ((int)(*args.buff) % bufalign) + bufoffset) % bufalign; if (bufalign != 0) args.buff1 +=(bufalign - ((int)(*args.buff1) % bufalign) + bufoffset) % bufalign; if (args.tr && printopt) fprintf(stderr,"%3d: %9d bytes %4d times --> ", n,args.bufflen,nrepeat); /* Finally, we get to transmit or receive and time */ if (args.tr) { /* This is the transmitter: send the block TRIALS times, and if we are not streaming, expect the receiver to return each block. */ bwdata[n].t = LONGTIME; t2 = t1 = 0; #ifdef HAVE_GETRUSAGE ut1 = ut2 = st1 = st2 = 0.0; best_user_time = best_sys_time = LONGTIME; #endif for (i = 0; i < TRIALS; i++) { Sync(&args); #ifdef HAVE_GETRUSAGE getrusage(RUSAGE_SELF, &prev_rusage); #endif t0 = When(); for (j = 0; j < nrepeat; j++) { if (asyncReceive && !streamopt) { PrepareToReceive(&args); } SendData(&args); if (!streamopt) { RecvData(&args); } } t = (When() - t0)/((1 + !streamopt) * nrepeat); #ifdef HAVE_GETRUSAGE getrusage(RUSAGE_SELF, &curr_rusage); user_time = ((curr_rusage.ru_utime.tv_sec - prev_rusage.ru_utime.tv_sec) + (double) (curr_rusage.ru_utime.tv_usec - prev_rusage.ru_utime.tv_usec) * 1.0E-6) / ((1 + !streamopt) * nrepeat); sys_time = ((curr_rusage.ru_stime.tv_sec - prev_rusage.ru_stime.tv_sec) + (double) (curr_rusage.ru_stime.tv_usec - prev_rusage.ru_stime.tv_usec) * 1.0E-6) / ((1 + !streamopt) * nrepeat); ut2 += user_time * user_time; st2 += sys_time * sys_time; ut1 += user_time; st1 += sys_time; if ((user_time + sys_time) < (best_user_time + best_sys_time)) { best_user_time = user_time; best_sys_time = sys_time; } #endif if (!streamopt) { t2 += t*t; t1 += t; bwdata[n].t = MIN(bwdata[n].t, t); } } if (!streamopt) SendTime(&args, &bwdata[n].t); else RecvTime(&args, &bwdata[n].t); if (!streamopt) bwdata[n].variance = t2/TRIALS - t1/TRIALS * t1/TRIALS; #ifdef HAVE_GETRUSAGE ut_var = ut2/TRIALS - (ut1/TRIALS) * (ut1/TRIALS); st_var = st2/TRIALS - (st1/TRIALS) * (st1/TRIALS); #endif } else { /* This is the receiver: receive the block TRIALS times, and if we are not streaming, send the block back to the sender. */ bwdata[n].t = LONGTIME; t2 = t1 = 0; for (i = 0; i < TRIALS; i++) { if (asyncReceive) { PrepareToReceive(&args); } Sync(&args); t0 = When(); for (j = 0; j < nrepeat; j++) { RecvData(&args); if (asyncReceive && (j < nrepeat - 1)) { PrepareToReceive(&args); } if (!streamopt) SendData(&args); } t = (When() - t0)/((1 + !streamopt) * nrepeat); if (streamopt) { t2 += t*t; t1 += t; bwdata[n].t = MIN(bwdata[n].t, t); } } if (streamopt) SendTime(&args, &bwdata[n].t); else RecvTime(&args, &bwdata[n].t); if (streamopt) bwdata[n].variance = t2/TRIALS - t1/TRIALS * t1/TRIALS; } tlast = bwdata[n].t; bwdata[n].bits = args.bufflen * CHARSIZE; bwdata[n].bps = bwdata[n].bits / (bwdata[n].t * 1024 * 1024); bwdata[n].repeat = nrepeat; if (args.tr) { fprintf(out, "%.7f %.7f %d %d %.7f", bwdata[n].t, bwdata[n].bps, bwdata[n].bits, bwdata[n].bits / 8, bwdata[n].variance); #ifdef HAVE_GETRUSAGE fprintf(out, " %.7f %.7f %.7f %.7f", ut1 / (double) TRIALS, st1 / (double) TRIALS, ut_var, st_var); #endif fprintf(out, "\n"); } fflush(out); free(memtmp); free(memtmp1); if (args.tr && printopt) { fprintf(stderr," %6.2f Mbps in %.7f sec", bwdata[n].bps, tlast); #ifdef HAVE_GETRUSAGE fprintf(stderr, ", avg utime=%.7f avg stime=%.7f, ", ut1 / (double) TRIALS, st1 / (double) TRIALS); fprintf(stderr, "min utime=%.7f stime=%.7f, ", best_user_time, best_sys_time); fprintf(stderr, "utime var=%.7f stime var=%.7f", ut_var, st_var); #endif fprintf(stderr, "\n"); } } /* End of perturbation loop */ } /* End of main loop */ if (args.tr) fclose(out); CleanUp(&args); return(0); }
int main(int argc, char **argv) { FILE *out; /* Output data file */ char s[255],s2[255],delim[255],*pstr; /* Generic strings */ int *memcache; /* used to flush cache */ int len_buf_align, /* meaningful when args.cache is 0. buflen */ /* rounded up to be divisible by 8 */ num_buf_align; /* meaningful when args.cache is 0. number */ /* of aligned buffers in memtmp */ int c, /* option index */ i, j, n, nq, /* Loop indices */ asyncReceive=0, /* Pre-post a receive buffer? */ bufalign=16*1024,/* Boundary to align buffer to */ errFlag, /* Error occurred in inner testing loop */ nrepeat, /* Number of time to do the transmission */ nrepeat_const=0,/* Set if we are using a constant nrepeat */ len, /* Number of bytes to be transmitted */ inc=0, /* Increment value */ perturbation=DEFPERT, /* Perturbation value */ pert, start= 1, /* Starting value for signature curve */ end=MAXINT, /* Ending value for signature curve */ streamopt=0, /* Streaming mode flag */ reset_connection;/* Reset the connection between trials */ ArgStruct args; /* Arguments for all the calls */ double t, t0, t1, t2, /* Time variables */ tlast, /* Time for the last transmission */ latency; /* Network message latency */ Data bwdata[NSAMP]; /* Bandwidth curve data */ int integCheck=0; /* Integrity check */ /* Initialize vars that may change from default due to arguments */ strcpy(s, "np.out"); /* Default output file */ /* Let modules initialize related vars, and possibly call a library init function that requires argc and argv */ Init(&args, &argc, &argv); /* This will set args.tr and args.rcv */ args.preburst = 0; /* Default to not bursting preposted receives */ args.bidir = 0; /* Turn bi-directional mode off initially */ args.cache = 1; /* Default to use cache */ args.upper = end; args.host = NULL; args.soffset=0; /* default to no offsets */ args.roffset=0; args.syncflag=0; /* use normal mpi_send */ args.port = DEFPORT; /* just in case the user doesn't set this. */ /* TCGMSG launches NPtcgmsg with a -master master_hostname * argument, so ignore all arguments and set them manually * in netpipe.c instead. */ #if ! defined(TCGMSG) /* Parse the arguments. See Usage for description */ while ((c = getopt(argc, argv, "SO:rIiszgfaB2h:p:o:l:u:b:m:n:t:c:d:D:P:")) != -1) { switch(c) { case 'O': strcpy(s2,optarg); strcpy(delim,","); if((pstr=strtok(s2,delim))!=NULL) { args.soffset=atoi(pstr); if((pstr=strtok((char *)NULL,delim))!=NULL) args.roffset=atoi(pstr); else /* only got one token */ args.roffset=args.soffset; } else { args.soffset=0; args.roffset=0; } printf("Transmit buffer offset: %d\nReceive buffer offset: %d\n",args.soffset,args.roffset); break; case 'p': perturbation = atoi(optarg); if( perturbation > 0 ) { printf("Using a perturbation value of %d\n\n", perturbation); } else { perturbation = 0; printf("Using no perturbations\n\n"); } break; case 'B': if(integCheck == 1) { fprintf(stderr, "Integrity check not supported with prepost burst\n"); exit(-1); } args.preburst = 1; asyncReceive = 1; printf("Preposting all receives before a timed run.\n"); printf("Some would consider this cheating,\n"); printf("but it is needed to match some vendor tests.\n"); fflush(stdout); break; case 'I': args.cache = 0; printf("Performance measured without cache effects\n\n"); fflush(stdout); break; case 'o': strcpy(s,optarg); printf("Sending output to %s\n", s); fflush(stdout); break; case 's': streamopt = 1; printf("Streaming in one direction only.\n\n"); #if defined(TCP) && ! defined(INFINIBAND) printf("Sockets are reset between trials to avoid\n"); printf("degradation from a collapsing window size.\n\n"); #endif args.reset_conn = 1; printf("Streaming does not provide an accurate\n"); printf("measurement of the latency since small\n"); printf("messages may get bundled together.\n\n"); if( args.bidir == 1 ) { printf("You can't use -s and -2 together\n"); exit(0); } fflush(stdout); break; case 'l': start = atoi(optarg); if (start < 1) { fprintf(stderr,"Need a starting value >= 1\n"); exit(0); } break; case 'u': end = atoi(optarg); break; #if defined(TCP) && ! defined(INFINIBAND) case 'b': /* -b # resets the buffer size, -b 0 keeps system defs */ args.prot.sndbufsz = args.prot.rcvbufsz = atoi(optarg); break; #endif case '2': args.bidir = 1; /* Both procs are transmitters */ /* end will be maxed at sndbufsz+rcvbufsz */ printf("Passing data in both directions simultaneously.\n"); printf("Output is for the combined bandwidth.\n"); #if defined(TCP) && ! defined(INFINIBAND) printf("The socket buffer size limits the maximum test size.\n\n"); #endif if( streamopt ) { printf("You can't use -s and -2 together\n"); exit(0); } break; case 'h': args.tr = 1; /* -h implies transmit node */ args.rcv = 0; args.host = (char *)malloc(strlen(optarg)+1); strcpy(args.host, optarg); break; #ifdef DISK case 'd': args.tr = 1; /* -d to specify input/output file */ args.rcv = 0; args.prot.read = 0; args.prot.read_type = 'c'; args.prot.dfile_name = (char *)malloc(strlen(optarg)+1); strcpy(args.prot.dfile_name, optarg); break; case 'D': if( optarg[0] == 'r' ) args.prot.read = 1; else args.prot.read = 0; args.prot.read_type = optarg[1]; break; #endif case 'i': if(args.preburst == 1) { fprintf(stderr, "Integrity check not supported with prepost burst\n"); exit(-1); } integCheck = 1; perturbation = 0; start = sizeof(int)+1; /* Start with integer size */ printf("Doing an integrity check instead of measuring performance\n"); fflush(stdout); break; #if defined(MPI) case 'z': args.source_node = -1; printf("Receive using the ANY_SOURCE flag\n"); fflush(stdout); break; case 'a': asyncReceive = 1; printf("Preposting asynchronous receives\n"); fflush(stdout); break; case 'S': args.syncflag=1; fprintf(stderr,"Using synchronous sends\n"); break; #endif #if defined(MPI2) case 'g': if(args.prot.no_fence == 1) { fprintf(stderr, "-f cannot be used with -g\n"); exit(-1); } args.prot.use_get = 1; printf("Using MPI-2 Get instead of Put\n"); break; case 'f': if(args.prot.use_get == 1) { fprintf(stderr, "-f cannot be used with -g\n"); exit(-1); } args.prot.no_fence = 1; bufalign = 0; printf("Buffer alignment off (Required for no fence)\n"); break; #endif /* MPI2 */ #if defined(INFINIBAND) case 'm': switch(atoi(optarg)) { case 256: args.prot.ib_mtu = MTU256; break; case 512: args.prot.ib_mtu = MTU512; break; case 1024: args.prot.ib_mtu = MTU1024; break; case 2048: args.prot.ib_mtu = MTU2048; break; case 4096: args.prot.ib_mtu = MTU4096; break; default: fprintf(stderr, "Invalid MTU size, must be one of " "256, 512, 1024, 2048, 4096\n"); exit(-1); } break; case 't': if( !strcmp(optarg, "send_recv") ) { printf("Using Send/Receive communications\n"); args.prot.commtype = NP_COMM_SENDRECV; } else if( !strcmp(optarg, "send_recv_with_imm") ) { printf("Using Send/Receive communications with immediate data\n"); args.prot.commtype = NP_COMM_SENDRECV_WITH_IMM; } else if( !strcmp(optarg, "rdma_write") ) { printf("Using RDMA Write communications\n"); args.prot.commtype = NP_COMM_RDMAWRITE; } else if( !strcmp(optarg, "rdma_write_with_imm") ) { printf("Using RDMA Write communications with immediate data\n"); args.prot.commtype = NP_COMM_RDMAWRITE_WITH_IMM; } else { fprintf(stderr, "Invalid transfer type " "specified, please choose one of:\n\n" "\tsend_recv\t\tUse Send/Receive communications\t(default)\n" "\tsend_recv_with_imm\tSame as above with immediate data\n" "\trdma_write\t\tUse RDMA Write communications\n" "\trdma_write_with_imm\tSame as above with immediate data\n\n"); exit(-1); } break; case 'c': if( !strcmp(optarg, "local_poll") ) { printf("Using local polling completion\n"); args.prot.comptype = NP_COMP_LOCALPOLL; } else if( !strcmp(optarg, "vapi_poll") ) { printf("Using VAPI polling completion\n"); args.prot.comptype = NP_COMP_VAPIPOLL; } else if( !strcmp(optarg, "event") ) { printf("Using VAPI event completion\n"); args.prot.comptype = NP_COMP_EVENT; } else { fprintf(stderr, "Invalid completion type specified, " "please choose one of:\n\n" "\tlocal_poll\tWait for last byte of data\t(default)\n" "\tvapi_poll\tUse VAPI polling function\n" "\tevent\t\tUse VAPI event handling function\n\n"); exit(-1); } break; #endif case 'n': nrepeat_const = atoi(optarg); break; #if defined(TCP) && ! defined(INFINIBAND) case 'r': args.reset_conn = 1; printf("Resetting connection after every trial\n"); break; #endif case 'P': args.port = atoi(optarg); break; default: PrintUsage(); exit(-12); } } #endif /* ! defined TCGMSG */ #if defined(INFINIBAND) asyncReceive = 1; fprintf(stderr, "Preposting asynchronous receives (required for Infiniband)\n"); if(args.bidir && ( (args.cache && args.prot.commtype == NP_COMM_RDMAWRITE) || /* rdma_write only works with no-cache mode */ (!args.preburst && args.prot.commtype != NP_COMM_RDMAWRITE) || /* anything besides rdma_write requires prepost burst */ (args.preburst && args.prot.comptype == NP_COMP_LOCALPOLL && args.cache) || /* preburst with local polling in cache mode doesn't work */ 0)) { fprintf(stderr, "\n" "Bi-directional mode currently only works with a subset of the\n" "Infiniband options. Restrictions are:\n" "\n" " RDMA write (-t rdma_write) requires no-cache mode (-I).\n" "\n" " Local polling (-c local_poll, default if no -c given) requires\n" " no-cache mode (-I), and if not using RDMA write communication,\n" " burst mode (-B).\n" "\n" " Any other communication type and any other completion type\n" " require burst mode (-B). No-cache mode (-I) may be used\n" " optionally.\n" "\n" " All other option combinations will fail.\n" "\n"); exit(-1); } #endif if (start > end) { fprintf(stderr, "Start MUST be LESS than end\n"); exit(420132); } args.nbuff = TRIALS; Setup(&args); if( args.bidir && end > args.upper ) { end = args.upper; if( args.tr ) { printf("The upper limit is being set to %d Bytes\n", end); #if defined(TCP) && ! defined(INFINIBAND) printf("due to socket buffer size limitations\n\n"); #endif } } #if defined(GM) if(streamopt && (!nrepeat_const || nrepeat_const > args.prot.num_stokens)) { printf("\nGM is currently limited by the driver software to %d\n", args.prot.num_stokens); printf("outstanding sends. The number of repeats will be set\n"); printf("to this limit for every trial in streaming mode. You\n"); printf("may use the -n switch to set a smaller number of repeats\n\n"); nrepeat_const = args.prot.num_stokens; } #endif if( args.tr ) /* Primary transmitter */ { if ((out = fopen(s, "w")) == NULL) { fprintf(stderr,"Can't open %s for output\n", s); exit(1); } } else out = stdout; /* Set a starting value for the message size increment. */ inc = (start > 1) ? start / 2 : 1; nq = (start > 1) ? 1 : 0; /* Test the timing to set tlast for the first test */ args.bufflen = start; MyMalloc(&args, args.bufflen, 0, 0); InitBufferData(&args, args.bufflen, 0, 0); if(args.cache) args.s_buff = args.r_buff; args.r_ptr = args.r_buff_orig = args.r_buff; args.s_ptr = args.s_buff_orig = args.s_buff; AfterAlignmentInit(&args); /* MPI-2 needs this to create a window */ /* Infiniband requires use of asynchronous communications, so we need * the PrepareToReceive calls below */ if( asyncReceive ) PrepareToReceive(&args); Sync(&args); /* Sync to prevent race condition in armci module */ /* For simplicity's sake, even if the real test below will be done in * bi-directional mode, we still do the ping-pong one-way-at-a-time test * here to estimate the one-way latency. Unless it takes significantly * longer to send data in both directions at once than it does to send data * one way at a time, this shouldn't be too far off anyway. */ t0 = When(); for( n=0; n<100; n++) { if( args.tr) { SendData(&args); RecvData(&args); if( asyncReceive && n<99 ) PrepareToReceive(&args); } else if( args.rcv) { RecvData(&args); if( asyncReceive && n<99 ) PrepareToReceive(&args); SendData(&args); } } tlast = (When() - t0)/200; /* Sync up and Reset before freeing the buffers */ Sync(&args); Reset(&args); /* Free the buffers and any other module-specific resources. */ if(args.cache) FreeBuff(args.r_buff_orig, NULL); else FreeBuff(args.r_buff_orig, args.s_buff_orig); /* Do setup for no-cache mode, using two distinct buffers. */ if (!args.cache) { /* Allocate dummy pool of memory to flush cache with */ if ( (memcache = (int *)malloc(MEMSIZE)) == NULL) { perror("malloc"); exit(1); } mymemset(memcache, 0, MEMSIZE/sizeof(int)); /* Allocate large memory pools */ MyMalloc(&args, MEMSIZE+bufalign, args.soffset, args.roffset); /* Save buffer addresses */ args.s_buff_orig = args.s_buff; args.r_buff_orig = args.r_buff; /* Align buffers */ args.s_buff = AlignBuffer(args.s_buff, bufalign); args.r_buff = AlignBuffer(args.r_buff, bufalign); /* Post alignment initialization */ AfterAlignmentInit(&args); /* Initialize send buffer pointer */ /* both soffset and roffset should be zero if we don't have any offset stuff, so this should be fine */ args.s_ptr = args.s_buff+args.soffset; args.r_ptr = args.r_buff+args.roffset; } /************************** * Main loop of benchmark * **************************/ if( args.tr ) fprintf(stderr,"Now starting the main loop\n"); for ( n = 0, len = start, errFlag = 0; n < NSAMP - 3 && tlast < STOPTM && len <= end && !errFlag; len = len + inc, nq++ ) { /* Exponentially increase the block size. */ if (nq > 2) inc = ((nq % 2))? inc + inc: inc; /* This is a perturbation loop to test nearby values */ for (pert = ((perturbation > 0) && (inc > perturbation+1)) ? -perturbation : 0; pert <= perturbation; n++, pert += ((perturbation > 0) && (inc > perturbation+1)) ? perturbation : perturbation+1) { Sync(&args); /* Sync to prevent race condition in armci module */ /* Calculate how many times to repeat the experiment. */ if( args.tr ) { if (nrepeat_const) { nrepeat = nrepeat_const; /* } else if (len == start) {*/ /* nrepeat = MAX( RUNTM/( 0.000020 + start/(8*1000) ), TRIALS);*/ } else { nrepeat = MAX((RUNTM / ((double)args.bufflen / (args.bufflen - inc + 1.0) * tlast)),TRIALS); } SendRepeat(&args, nrepeat); } else if( args.rcv ) { RecvRepeat(&args, &nrepeat); } args.bufflen = len + pert; if( args.tr ) fprintf(stderr,"%3d: %7d bytes %6d times --> ", n,args.bufflen,nrepeat); if (args.cache) /* Allow cache effects. We use only one buffer */ { /* Allocate the buffer with room for alignment*/ MyMalloc(&args, args.bufflen+bufalign, args.soffset, args.roffset); /* Save buffer address */ args.r_buff_orig = args.r_buff; args.s_buff_orig = args.r_buff; /* Align buffer */ args.r_buff = AlignBuffer(args.r_buff, bufalign); args.s_buff = args.r_buff; /* Initialize buffer with data * * NOTE: The buffers should be initialized with some sort of * valid data, whether it is actually used for anything else, * to get accurate results. Performance increases noticeably * if the buffers are left uninitialized, but this does not * give very useful results as realworld apps tend to actually * have data stored in memory. We are not sure what causes * the difference in performance at this time. */ InitBufferData(&args, args.bufflen, args.soffset, args.roffset); /* Post-alignment initialization */ AfterAlignmentInit(&args); /* Initialize buffer pointers (We use r_ptr and s_ptr for * compatibility with no-cache mode, as this makes the code * simpler) */ /* offsets are zero by default so this saves an #ifdef */ args.r_ptr = args.r_buff+args.roffset; args.s_ptr = args.r_buff+args.soffset; } else /* Eliminate cache effects. We use two distinct buffers */ { /* this isn't truly set up for offsets yet */ /* Size of an aligned memory block including trailing padding */ len_buf_align = args.bufflen; if(bufalign != 0) len_buf_align += bufalign - args.bufflen % bufalign; /* Initialize the buffers with data * * See NOTE above. */ InitBufferData(&args, MEMSIZE, args.soffset, args.roffset); /* Reset buffer pointers to beginning of pools */ args.r_ptr = args.r_buff+args.roffset; args.s_ptr = args.s_buff+args.soffset; } bwdata[n].t = LONGTIME; /* t2 = t1 = 0;*/ /* Finally, we get to transmit or receive and time */ /* NOTE: If a module is running that uses only one process (e.g. * memcpy), we assume that it will always have the args.tr flag * set. Thus we make some special allowances in the transmit * section that are not in the receive section. */ if( args.tr || args.bidir ) { /* This is the transmitter: send the block TRIALS times, and if we are not streaming, expect the receiver to return each block. */ for (i = 0; i < (integCheck ? 1 : TRIALS); i++) { if(args.preburst && asyncReceive && !streamopt) { /* We need to save the value of the recv ptr so * we can reset it after we do the preposts, in case * the module needs to use the same ptr values again * so it can wait on the last byte to change to indicate * the recv is finished. */ SaveRecvPtr(&args); for(j=0; j<nrepeat; j++) { PrepareToReceive(&args); if(!args.cache) AdvanceRecvPtr(&args, len_buf_align); } ResetRecvPtr(&args); } /* Flush the cache using the dummy buffer */ if (!args.cache) flushcache(memcache, MEMSIZE/sizeof(int)); Sync(&args); t0 = When(); for (j = 0; j < nrepeat; j++) { if (!args.preburst && asyncReceive && !streamopt) { PrepareToReceive(&args); } if (integCheck) SetIntegrityData(&args); SendData(&args); if (!streamopt) { RecvData(&args); if (integCheck) VerifyIntegrity(&args); if(!args.cache) AdvanceRecvPtr(&args, len_buf_align); } /* Wait to advance send pointer in case RecvData uses * it (e.g. memcpy module). */ if (!args.cache) AdvanceSendPtr(&args, len_buf_align); } /* t is the 1-directional trasmission time */ t = (When() - t0)/ nrepeat; if( !streamopt && !args.bidir) t /= 2; /* Normal ping-pong */ Reset(&args); /* NOTE: NetPIPE does each data point TRIALS times, bouncing the message * nrepeats times for each trial, then reports the lowest of the TRIALS * times. -Dave Turner */ bwdata[n].t = MIN(bwdata[n].t, t); /* t1 += t;*/ /* t2 += t*t;*/ } if (streamopt){ /* Get time info from Recv node */ RecvTime(&args, &bwdata[n].t); /* RecvTime(&args, &t1);*/ /* RecvTime(&args, &t2);*/ } /* Calculate variance after completing this set of trials */ /* bwdata[n].variance = t2/TRIALS - t1/TRIALS * t1/TRIALS;*/ } else if( args.rcv ) { /* This is the receiver: receive the block TRIALS times, and if we are not streaming, send the block back to the sender. */ for (i = 0; i < (integCheck ? 1 : TRIALS); i++) { if (asyncReceive) { if (args.preburst) { /* We need to save the value of the recv ptr so * we can reset it after we do the preposts, in case * the module needs to use the same ptr values again * so it can wait on the last byte to change to * indicate the recv is finished. */ SaveRecvPtr(&args); for (j=0; j < nrepeat; j++) { PrepareToReceive(&args); if (!args.cache) AdvanceRecvPtr(&args, len_buf_align); } ResetRecvPtr(&args); } else { PrepareToReceive(&args); } } /* Flush the cache using the dummy buffer */ if (!args.cache) flushcache(memcache, MEMSIZE/sizeof(int)); Sync(&args); t0 = When(); for (j = 0; j < nrepeat; j++) { RecvData(&args); if (integCheck) VerifyIntegrity(&args); if (!args.cache) { AdvanceRecvPtr(&args, len_buf_align); } if (!args.preburst && asyncReceive && (j < nrepeat-1)) { PrepareToReceive(&args); } if (!streamopt) { if (integCheck) SetIntegrityData(&args); SendData(&args); if(!args.cache) AdvanceSendPtr(&args, len_buf_align); } } t = (When() - t0)/ nrepeat; if( !streamopt && !args.bidir) t /= 2; /* Normal ping-pong */ Reset(&args); bwdata[n].t = MIN(bwdata[n].t, t); /* t1 += t;*/ /* t2 += t*t;*/ } if (streamopt){ /* Recv proc calcs time and sends to Trans */ SendTime(&args, &bwdata[n].t); /* SendTime(&args, &t1);*/ /* SendTime(&args, &t2);*/ } } else /* Just going along for the ride */ { for (i = 0; i < (integCheck ? 1 : TRIALS); i++) { Sync(&args); } } /* Streaming mode doesn't really calculate correct latencies * for small message sizes, and on some nics we can get * zero second latency after doing the math. Protect against * this. */ if(bwdata[n].t == 0.0) { bwdata[n].t = 0.000001; } tlast = bwdata[n].t; bwdata[n].bits = args.bufflen * CHARSIZE * (1+args.bidir); bwdata[n].bps = bwdata[n].bits / (bwdata[n].t * 1024 * 1024); bwdata[n].repeat = nrepeat; if (args.tr) { if(integCheck) { fprintf(out,"%8d %d", bwdata[n].bits / 8, nrepeat); } else { fprintf(out,"%8d %lf %12.8lf", bwdata[n].bits / 8, bwdata[n].bps, bwdata[n].t); } fprintf(out, "\n"); fflush(out); } /* Free using original buffer addresses since we may have aligned r_buff and s_buff */ if (args.cache) FreeBuff(args.r_buff_orig, NULL); if ( args.tr ) { if(integCheck) { fprintf(stderr, " Integrity check passed\n"); } else { fprintf(stderr," %8.2lf Mbps in %10.2lf usec\n", bwdata[n].bps, tlast*1.0e6); } } } /* End of perturbation loop */ } /* End of main loop */ /* Free using original buffer addresses since we may have aligned r_buff and s_buff */ if (!args.cache) { FreeBuff(args.s_buff_orig, args.r_buff_orig); } if (args.tr) fclose(out); CleanUp(&args); return 0; }