unsigned int readmailbox(unsigned int channel) { unsigned int count = 0; unsigned int data; /* Loop until something is received from channel * If nothing recieved, it eventually give up and returns 0xffffffff */ while(1) { while (*MAILBOX0STATUS & MAILBOX_EMPTY) { /* Need to check if this is the right thing to do */ flushcache(); /* This is an arbritarily large number */ if(count++ >(1<<25)) { return 0xffffffff; } } /* Read the data * Data memory barriers as we've switched peripheral */ dmb(); data = *MAILBOX0READ; dmb(); if ((data & 15) == channel) return data; } return 0; }
static int aoerio(SDreq *r) { int i, count, rw; uvlong lba; Ctlr *c; SDunit *u; u = r->unit; c = u->dev->ctlr; // if(c->feat & Datapi) // return aoeriopkt(r, d); if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91) { qlock(c); i = flushcache(c); qunlock(c); if(i == 0) return sdsetsense(r, SDok, 0, 0, 0); return sdsetsense(r, SDcheck, 3, 0xc, 2); } if((i = sdfakescsi(r)) != SDnostatus) { r->status = i; return i; } if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus) return i; r->rlen = aoebio(u, r->lun, rw == SDwrite, r->data, count, lba); return r->status = SDok; }
int main(){ unsigned long max_offset; unsigned long buffer_size=1; int fd = open("10GigFile",O_RDONLY|O_LARGEFILE); if(fd == -1) { perror("file could not be opened for reading\n"); exit(1); } char* buffer; flushcache(); max_offset = (10240*1024/buffer_size)-1; buffer = (char*) malloc(buffer_size*1024); run_experiment(fd,buffer,buffer_size*1024,max_offset); free(buffer); flushcache(); srand(time(NULL)%INT_MAX); }
void writemailbox(unsigned int channel, unsigned int data) { /* Wait for mailbox to be not full */ while (*MAILBOX0STATUS & MAILBOX_FULL) { /* Need to check if this is the right thing to do */ flushcache(); } dmb(); *MAILBOX0WRITE = (data | channel); }
static void acpi_test(int ac, char **av) { unsigned int i; unsigned int k; unsigned int sleep_gpio_save[13]; ls1a_save_gpio_val(sleep_gpio_save); suspend_save(); i = strtoul(av[1], 0, 0); // printf ("you set %d for test !\n", i); if (i & 1) *(volatile unsigned int *)0xbfe7c004 = 1 << 8; //power button k = *(volatile unsigned int *)0xbfe7c024; k &= ~((1 << 8) | (1 << 9)); *(volatile unsigned int *)0xbfe7c024 = (i & 0x3) << 8; //[8:9]RI_EN、PME_EN // *(volatile unsigned int *)0xbfe7c008 = (1 << 13) | (5 << 10); //sleep to ram flushcache(); __asm__ volatile ( "la $2, 2f\n\t" "li $3, 0xa01ffc00\n\t" "sw $2, 0x0($3)\n\t" //save return address "li $2, 0xaffffe34\n\t" "lw $3, 0x0($2)\n\t" "or $3, 0x1\n\t" "sw $3, 0x0($2)\n\t" //enable ddr autorefresh "li $2, 0xbfe7c008\n\t" "li $3, (1<<13) | (5<<10)\n\t" "sw $3, 0x0($2)\n\t" //go to sleep "1:\n\t" "bal 1b\n\t" "nop\n\t" "2:\n\t" :: : "$2","$3","memory" // "move %0,$2\n\t" // : "=r" (p) // : "0" (p), "r" (len), "r" (1) // : "$2","$3","$4","$5" ); ls1a_restore_gpio_val(sleep_gpio_save); dc_init(); outstring("acpi resume back here !\n"); // printf ("acpi resume back here !\n"); }
/* * Pseudo (chained) interrupt from the esp driver to kick the * current running DMA transfer. I am replying on espintr() to * pickup and clean errors for now * * return 1 if it was a DMA continue. */ int espdmaintr(struct esp_softc *sc) { struct ncr53c9x_softc *nsc = (struct ncr53c9x_softc *)sc; int trans, resid; u_long csr = sc->sc_dma_direction; #if 0 if (csr & D_ERR_PEND) { DMACSR(sc) &= ~D_EN_DMA; /* Stop DMA */ DMACSR(sc) |= D_INVALIDATE; printf("%s: error: csr=%s\n", device_xname(nsc->sc_dev), bitmask_snprintf(csr, DMACSRBITS, bits, sizeof(bits))); return -1; } #endif /* This is an "assertion" :) */ if (sc->sc_dmaactive == 0) panic("%s: DMA wasn't active", __func__); /* dbdma_flush(sc->sc_dmareg); */ /* DMA has stopped */ dbdma_stop(sc->sc_dmareg); sc->sc_dmaactive = 0; if (sc->sc_dmasize == 0) { /* A "Transfer Pad" operation completed */ NCR_DMA(("dmaintr: discarded %d bytes (tcl=%d, tcm=%d)\n", NCR_READ_REG(nsc, NCR_TCL) | (NCR_READ_REG(nsc, NCR_TCM) << 8), NCR_READ_REG(nsc, NCR_TCL), NCR_READ_REG(nsc, NCR_TCM))); return 0; } resid = 0; /* * If a transfer onto the SCSI bus gets interrupted by the device * (e.g. for a SAVEPOINTER message), the data in the FIFO counts * as residual since the ESP counter registers get decremented as * bytes are clocked into the FIFO. */ if (!(csr & D_WRITE) && (resid = (NCR_READ_REG(nsc, NCR_FFLAG) & NCRFIFO_FF)) != 0) { NCR_DMA(("dmaintr: empty esp FIFO of %d ", resid)); } if ((nsc->sc_espstat & NCRSTAT_TC) == 0) { /* * `Terminal count' is off, so read the residue * out of the ESP counter registers. */ resid += (NCR_READ_REG(nsc, NCR_TCL) | (NCR_READ_REG(nsc, NCR_TCM) << 8) | ((nsc->sc_cfg2 & NCRCFG2_FE) ? (NCR_READ_REG(nsc, NCR_TCH) << 16) : 0)); if (resid == 0 && sc->sc_dmasize == 65536 && (nsc->sc_cfg2 & NCRCFG2_FE) == 0) /* A transfer of 64K is encoded as `TCL=TCM=0' */ resid = 65536; } trans = sc->sc_dmasize - resid; if (trans < 0) { /* transferred < 0 ? */ #if 0 /* * This situation can happen in perfectly normal operation * if the ESP is reselected while using DMA to select * another target. As such, don't print the warning. */ printf("%s: xfer (%d) > req (%d)\n", device_xname(nsc->sc_dev), trans, sc->sc_dmasize); #endif trans = sc->sc_dmasize; } NCR_DMA(("dmaintr: tcl=%d, tcm=%d, tch=%d; trans=%d, resid=%d\n", NCR_READ_REG(nsc, NCR_TCL), NCR_READ_REG(nsc, NCR_TCM), (nsc->sc_cfg2 & NCRCFG2_FE) ? NCR_READ_REG(nsc, NCR_TCH) : 0, trans, resid)); #if 0 if (csr & D_WRITE) flushcache(*sc->sc_dmaaddr, trans); #endif *sc->sc_dmalen -= trans; *sc->sc_dmaaddr += trans; #if 0 /* this is not normal operation just yet */ if (*sc->sc_dmalen == 0 || nsc->sc_phase != nsc->sc_prevphase) return 0; /* and again */ dma_start(sc, sc->sc_dmaaddr, sc->sc_dmalen, DMACSR(sc) & D_WRITE); return 1; #endif return 0; }
int main(int argc, char **argv) { FILE *out; /* Output data file */ char s[255],s2[255],delim[255],*pstr; /* Generic strings */ int *memcache; /* used to flush cache */ int len_buf_align, /* meaningful when args.cache is 0. buflen */ /* rounded up to be divisible by 8 */ num_buf_align; /* meaningful when args.cache is 0. number */ /* of aligned buffers in memtmp */ int c, /* option index */ i, j, n, nq, /* Loop indices */ asyncReceive=0, /* Pre-post a receive buffer? */ bufalign=16*1024,/* Boundary to align buffer to */ errFlag, /* Error occurred in inner testing loop */ nrepeat, /* Number of time to do the transmission */ nrepeat_const=0,/* Set if we are using a constant nrepeat */ len, /* Number of bytes to be transmitted */ inc=0, /* Increment value */ perturbation=DEFPERT, /* Perturbation value */ pert, start= 1, /* Starting value for signature curve */ end=MAXINT, /* Ending value for signature curve */ streamopt=0, /* Streaming mode flag */ reset_connection;/* Reset the connection between trials */ ArgStruct args; /* Arguments for all the calls */ double t, t0, t1, t2, /* Time variables */ tlast, /* Time for the last transmission */ latency; /* Network message latency */ Data bwdata[NSAMP]; /* Bandwidth curve data */ int integCheck=0; /* Integrity check */ /* Initialize vars that may change from default due to arguments */ strcpy(s, "np.out"); /* Default output file */ /* Let modules initialize related vars, and possibly call a library init function that requires argc and argv */ Init(&args, &argc, &argv); /* This will set args.tr and args.rcv */ args.preburst = 0; /* Default to not bursting preposted receives */ args.bidir = 0; /* Turn bi-directional mode off initially */ args.cache = 1; /* Default to use cache */ args.upper = end; args.host = NULL; args.soffset=0; /* default to no offsets */ args.roffset=0; args.syncflag=0; /* use normal mpi_send */ args.port = DEFPORT; /* just in case the user doesn't set this. */ /* TCGMSG launches NPtcgmsg with a -master master_hostname * argument, so ignore all arguments and set them manually * in netpipe.c instead. */ #if ! defined(TCGMSG) /* Parse the arguments. See Usage for description */ while ((c = getopt(argc, argv, "SO:rIiszgfaB2h:p:o:l:u:b:m:n:t:c:d:D:P:")) != -1) { switch(c) { case 'O': strcpy(s2,optarg); strcpy(delim,","); if((pstr=strtok(s2,delim))!=NULL) { args.soffset=atoi(pstr); if((pstr=strtok((char *)NULL,delim))!=NULL) args.roffset=atoi(pstr); else /* only got one token */ args.roffset=args.soffset; } else { args.soffset=0; args.roffset=0; } printf("Transmit buffer offset: %d\nReceive buffer offset: %d\n",args.soffset,args.roffset); break; case 'p': perturbation = atoi(optarg); if( perturbation > 0 ) { printf("Using a perturbation value of %d\n\n", perturbation); } else { perturbation = 0; printf("Using no perturbations\n\n"); } break; case 'B': if(integCheck == 1) { fprintf(stderr, "Integrity check not supported with prepost burst\n"); exit(-1); } args.preburst = 1; asyncReceive = 1; printf("Preposting all receives before a timed run.\n"); printf("Some would consider this cheating,\n"); printf("but it is needed to match some vendor tests.\n"); fflush(stdout); break; case 'I': args.cache = 0; printf("Performance measured without cache effects\n\n"); fflush(stdout); break; case 'o': strcpy(s,optarg); printf("Sending output to %s\n", s); fflush(stdout); break; case 's': streamopt = 1; printf("Streaming in one direction only.\n\n"); #if defined(TCP) && ! defined(INFINIBAND) printf("Sockets are reset between trials to avoid\n"); printf("degradation from a collapsing window size.\n\n"); #endif args.reset_conn = 1; printf("Streaming does not provide an accurate\n"); printf("measurement of the latency since small\n"); printf("messages may get bundled together.\n\n"); if( args.bidir == 1 ) { printf("You can't use -s and -2 together\n"); exit(0); } fflush(stdout); break; case 'l': start = atoi(optarg); if (start < 1) { fprintf(stderr,"Need a starting value >= 1\n"); exit(0); } break; case 'u': end = atoi(optarg); break; #if defined(TCP) && ! defined(INFINIBAND) case 'b': /* -b # resets the buffer size, -b 0 keeps system defs */ args.prot.sndbufsz = args.prot.rcvbufsz = atoi(optarg); break; #endif case '2': args.bidir = 1; /* Both procs are transmitters */ /* end will be maxed at sndbufsz+rcvbufsz */ printf("Passing data in both directions simultaneously.\n"); printf("Output is for the combined bandwidth.\n"); #if defined(TCP) && ! defined(INFINIBAND) printf("The socket buffer size limits the maximum test size.\n\n"); #endif if( streamopt ) { printf("You can't use -s and -2 together\n"); exit(0); } break; case 'h': args.tr = 1; /* -h implies transmit node */ args.rcv = 0; args.host = (char *)malloc(strlen(optarg)+1); strcpy(args.host, optarg); break; #ifdef DISK case 'd': args.tr = 1; /* -d to specify input/output file */ args.rcv = 0; args.prot.read = 0; args.prot.read_type = 'c'; args.prot.dfile_name = (char *)malloc(strlen(optarg)+1); strcpy(args.prot.dfile_name, optarg); break; case 'D': if( optarg[0] == 'r' ) args.prot.read = 1; else args.prot.read = 0; args.prot.read_type = optarg[1]; break; #endif case 'i': if(args.preburst == 1) { fprintf(stderr, "Integrity check not supported with prepost burst\n"); exit(-1); } integCheck = 1; perturbation = 0; start = sizeof(int)+1; /* Start with integer size */ printf("Doing an integrity check instead of measuring performance\n"); fflush(stdout); break; #if defined(MPI) case 'z': args.source_node = -1; printf("Receive using the ANY_SOURCE flag\n"); fflush(stdout); break; case 'a': asyncReceive = 1; printf("Preposting asynchronous receives\n"); fflush(stdout); break; case 'S': args.syncflag=1; fprintf(stderr,"Using synchronous sends\n"); break; #endif #if defined(MPI2) case 'g': if(args.prot.no_fence == 1) { fprintf(stderr, "-f cannot be used with -g\n"); exit(-1); } args.prot.use_get = 1; printf("Using MPI-2 Get instead of Put\n"); break; case 'f': if(args.prot.use_get == 1) { fprintf(stderr, "-f cannot be used with -g\n"); exit(-1); } args.prot.no_fence = 1; bufalign = 0; printf("Buffer alignment off (Required for no fence)\n"); break; #endif /* MPI2 */ #if defined(INFINIBAND) case 'm': switch(atoi(optarg)) { case 256: args.prot.ib_mtu = MTU256; break; case 512: args.prot.ib_mtu = MTU512; break; case 1024: args.prot.ib_mtu = MTU1024; break; case 2048: args.prot.ib_mtu = MTU2048; break; case 4096: args.prot.ib_mtu = MTU4096; break; default: fprintf(stderr, "Invalid MTU size, must be one of " "256, 512, 1024, 2048, 4096\n"); exit(-1); } break; case 't': if( !strcmp(optarg, "send_recv") ) { printf("Using Send/Receive communications\n"); args.prot.commtype = NP_COMM_SENDRECV; } else if( !strcmp(optarg, "send_recv_with_imm") ) { printf("Using Send/Receive communications with immediate data\n"); args.prot.commtype = NP_COMM_SENDRECV_WITH_IMM; } else if( !strcmp(optarg, "rdma_write") ) { printf("Using RDMA Write communications\n"); args.prot.commtype = NP_COMM_RDMAWRITE; } else if( !strcmp(optarg, "rdma_write_with_imm") ) { printf("Using RDMA Write communications with immediate data\n"); args.prot.commtype = NP_COMM_RDMAWRITE_WITH_IMM; } else { fprintf(stderr, "Invalid transfer type " "specified, please choose one of:\n\n" "\tsend_recv\t\tUse Send/Receive communications\t(default)\n" "\tsend_recv_with_imm\tSame as above with immediate data\n" "\trdma_write\t\tUse RDMA Write communications\n" "\trdma_write_with_imm\tSame as above with immediate data\n\n"); exit(-1); } break; case 'c': if( !strcmp(optarg, "local_poll") ) { printf("Using local polling completion\n"); args.prot.comptype = NP_COMP_LOCALPOLL; } else if( !strcmp(optarg, "vapi_poll") ) { printf("Using VAPI polling completion\n"); args.prot.comptype = NP_COMP_VAPIPOLL; } else if( !strcmp(optarg, "event") ) { printf("Using VAPI event completion\n"); args.prot.comptype = NP_COMP_EVENT; } else { fprintf(stderr, "Invalid completion type specified, " "please choose one of:\n\n" "\tlocal_poll\tWait for last byte of data\t(default)\n" "\tvapi_poll\tUse VAPI polling function\n" "\tevent\t\tUse VAPI event handling function\n\n"); exit(-1); } break; #endif case 'n': nrepeat_const = atoi(optarg); break; #if defined(TCP) && ! defined(INFINIBAND) case 'r': args.reset_conn = 1; printf("Resetting connection after every trial\n"); break; #endif case 'P': args.port = atoi(optarg); break; default: PrintUsage(); exit(-12); } } #endif /* ! defined TCGMSG */ #if defined(INFINIBAND) asyncReceive = 1; fprintf(stderr, "Preposting asynchronous receives (required for Infiniband)\n"); if(args.bidir && ( (args.cache && args.prot.commtype == NP_COMM_RDMAWRITE) || /* rdma_write only works with no-cache mode */ (!args.preburst && args.prot.commtype != NP_COMM_RDMAWRITE) || /* anything besides rdma_write requires prepost burst */ (args.preburst && args.prot.comptype == NP_COMP_LOCALPOLL && args.cache) || /* preburst with local polling in cache mode doesn't work */ 0)) { fprintf(stderr, "\n" "Bi-directional mode currently only works with a subset of the\n" "Infiniband options. Restrictions are:\n" "\n" " RDMA write (-t rdma_write) requires no-cache mode (-I).\n" "\n" " Local polling (-c local_poll, default if no -c given) requires\n" " no-cache mode (-I), and if not using RDMA write communication,\n" " burst mode (-B).\n" "\n" " Any other communication type and any other completion type\n" " require burst mode (-B). No-cache mode (-I) may be used\n" " optionally.\n" "\n" " All other option combinations will fail.\n" "\n"); exit(-1); } #endif if (start > end) { fprintf(stderr, "Start MUST be LESS than end\n"); exit(420132); } args.nbuff = TRIALS; Setup(&args); if( args.bidir && end > args.upper ) { end = args.upper; if( args.tr ) { printf("The upper limit is being set to %d Bytes\n", end); #if defined(TCP) && ! defined(INFINIBAND) printf("due to socket buffer size limitations\n\n"); #endif } } #if defined(GM) if(streamopt && (!nrepeat_const || nrepeat_const > args.prot.num_stokens)) { printf("\nGM is currently limited by the driver software to %d\n", args.prot.num_stokens); printf("outstanding sends. The number of repeats will be set\n"); printf("to this limit for every trial in streaming mode. You\n"); printf("may use the -n switch to set a smaller number of repeats\n\n"); nrepeat_const = args.prot.num_stokens; } #endif if( args.tr ) /* Primary transmitter */ { if ((out = fopen(s, "w")) == NULL) { fprintf(stderr,"Can't open %s for output\n", s); exit(1); } } else out = stdout; /* Set a starting value for the message size increment. */ inc = (start > 1) ? start / 2 : 1; nq = (start > 1) ? 1 : 0; /* Test the timing to set tlast for the first test */ args.bufflen = start; MyMalloc(&args, args.bufflen, 0, 0); InitBufferData(&args, args.bufflen, 0, 0); if(args.cache) args.s_buff = args.r_buff; args.r_ptr = args.r_buff_orig = args.r_buff; args.s_ptr = args.s_buff_orig = args.s_buff; AfterAlignmentInit(&args); /* MPI-2 needs this to create a window */ /* Infiniband requires use of asynchronous communications, so we need * the PrepareToReceive calls below */ if( asyncReceive ) PrepareToReceive(&args); Sync(&args); /* Sync to prevent race condition in armci module */ /* For simplicity's sake, even if the real test below will be done in * bi-directional mode, we still do the ping-pong one-way-at-a-time test * here to estimate the one-way latency. Unless it takes significantly * longer to send data in both directions at once than it does to send data * one way at a time, this shouldn't be too far off anyway. */ t0 = When(); for( n=0; n<100; n++) { if( args.tr) { SendData(&args); RecvData(&args); if( asyncReceive && n<99 ) PrepareToReceive(&args); } else if( args.rcv) { RecvData(&args); if( asyncReceive && n<99 ) PrepareToReceive(&args); SendData(&args); } } tlast = (When() - t0)/200; /* Sync up and Reset before freeing the buffers */ Sync(&args); Reset(&args); /* Free the buffers and any other module-specific resources. */ if(args.cache) FreeBuff(args.r_buff_orig, NULL); else FreeBuff(args.r_buff_orig, args.s_buff_orig); /* Do setup for no-cache mode, using two distinct buffers. */ if (!args.cache) { /* Allocate dummy pool of memory to flush cache with */ if ( (memcache = (int *)malloc(MEMSIZE)) == NULL) { perror("malloc"); exit(1); } mymemset(memcache, 0, MEMSIZE/sizeof(int)); /* Allocate large memory pools */ MyMalloc(&args, MEMSIZE+bufalign, args.soffset, args.roffset); /* Save buffer addresses */ args.s_buff_orig = args.s_buff; args.r_buff_orig = args.r_buff; /* Align buffers */ args.s_buff = AlignBuffer(args.s_buff, bufalign); args.r_buff = AlignBuffer(args.r_buff, bufalign); /* Post alignment initialization */ AfterAlignmentInit(&args); /* Initialize send buffer pointer */ /* both soffset and roffset should be zero if we don't have any offset stuff, so this should be fine */ args.s_ptr = args.s_buff+args.soffset; args.r_ptr = args.r_buff+args.roffset; } /************************** * Main loop of benchmark * **************************/ if( args.tr ) fprintf(stderr,"Now starting the main loop\n"); for ( n = 0, len = start, errFlag = 0; n < NSAMP - 3 && tlast < STOPTM && len <= end && !errFlag; len = len + inc, nq++ ) { /* Exponentially increase the block size. */ if (nq > 2) inc = ((nq % 2))? inc + inc: inc; /* This is a perturbation loop to test nearby values */ for (pert = ((perturbation > 0) && (inc > perturbation+1)) ? -perturbation : 0; pert <= perturbation; n++, pert += ((perturbation > 0) && (inc > perturbation+1)) ? perturbation : perturbation+1) { Sync(&args); /* Sync to prevent race condition in armci module */ /* Calculate how many times to repeat the experiment. */ if( args.tr ) { if (nrepeat_const) { nrepeat = nrepeat_const; /* } else if (len == start) {*/ /* nrepeat = MAX( RUNTM/( 0.000020 + start/(8*1000) ), TRIALS);*/ } else { nrepeat = MAX((RUNTM / ((double)args.bufflen / (args.bufflen - inc + 1.0) * tlast)),TRIALS); } SendRepeat(&args, nrepeat); } else if( args.rcv ) { RecvRepeat(&args, &nrepeat); } args.bufflen = len + pert; if( args.tr ) fprintf(stderr,"%3d: %7d bytes %6d times --> ", n,args.bufflen,nrepeat); if (args.cache) /* Allow cache effects. We use only one buffer */ { /* Allocate the buffer with room for alignment*/ MyMalloc(&args, args.bufflen+bufalign, args.soffset, args.roffset); /* Save buffer address */ args.r_buff_orig = args.r_buff; args.s_buff_orig = args.r_buff; /* Align buffer */ args.r_buff = AlignBuffer(args.r_buff, bufalign); args.s_buff = args.r_buff; /* Initialize buffer with data * * NOTE: The buffers should be initialized with some sort of * valid data, whether it is actually used for anything else, * to get accurate results. Performance increases noticeably * if the buffers are left uninitialized, but this does not * give very useful results as realworld apps tend to actually * have data stored in memory. We are not sure what causes * the difference in performance at this time. */ InitBufferData(&args, args.bufflen, args.soffset, args.roffset); /* Post-alignment initialization */ AfterAlignmentInit(&args); /* Initialize buffer pointers (We use r_ptr and s_ptr for * compatibility with no-cache mode, as this makes the code * simpler) */ /* offsets are zero by default so this saves an #ifdef */ args.r_ptr = args.r_buff+args.roffset; args.s_ptr = args.r_buff+args.soffset; } else /* Eliminate cache effects. We use two distinct buffers */ { /* this isn't truly set up for offsets yet */ /* Size of an aligned memory block including trailing padding */ len_buf_align = args.bufflen; if(bufalign != 0) len_buf_align += bufalign - args.bufflen % bufalign; /* Initialize the buffers with data * * See NOTE above. */ InitBufferData(&args, MEMSIZE, args.soffset, args.roffset); /* Reset buffer pointers to beginning of pools */ args.r_ptr = args.r_buff+args.roffset; args.s_ptr = args.s_buff+args.soffset; } bwdata[n].t = LONGTIME; /* t2 = t1 = 0;*/ /* Finally, we get to transmit or receive and time */ /* NOTE: If a module is running that uses only one process (e.g. * memcpy), we assume that it will always have the args.tr flag * set. Thus we make some special allowances in the transmit * section that are not in the receive section. */ if( args.tr || args.bidir ) { /* This is the transmitter: send the block TRIALS times, and if we are not streaming, expect the receiver to return each block. */ for (i = 0; i < (integCheck ? 1 : TRIALS); i++) { if(args.preburst && asyncReceive && !streamopt) { /* We need to save the value of the recv ptr so * we can reset it after we do the preposts, in case * the module needs to use the same ptr values again * so it can wait on the last byte to change to indicate * the recv is finished. */ SaveRecvPtr(&args); for(j=0; j<nrepeat; j++) { PrepareToReceive(&args); if(!args.cache) AdvanceRecvPtr(&args, len_buf_align); } ResetRecvPtr(&args); } /* Flush the cache using the dummy buffer */ if (!args.cache) flushcache(memcache, MEMSIZE/sizeof(int)); Sync(&args); t0 = When(); for (j = 0; j < nrepeat; j++) { if (!args.preburst && asyncReceive && !streamopt) { PrepareToReceive(&args); } if (integCheck) SetIntegrityData(&args); SendData(&args); if (!streamopt) { RecvData(&args); if (integCheck) VerifyIntegrity(&args); if(!args.cache) AdvanceRecvPtr(&args, len_buf_align); } /* Wait to advance send pointer in case RecvData uses * it (e.g. memcpy module). */ if (!args.cache) AdvanceSendPtr(&args, len_buf_align); } /* t is the 1-directional trasmission time */ t = (When() - t0)/ nrepeat; if( !streamopt && !args.bidir) t /= 2; /* Normal ping-pong */ Reset(&args); /* NOTE: NetPIPE does each data point TRIALS times, bouncing the message * nrepeats times for each trial, then reports the lowest of the TRIALS * times. -Dave Turner */ bwdata[n].t = MIN(bwdata[n].t, t); /* t1 += t;*/ /* t2 += t*t;*/ } if (streamopt){ /* Get time info from Recv node */ RecvTime(&args, &bwdata[n].t); /* RecvTime(&args, &t1);*/ /* RecvTime(&args, &t2);*/ } /* Calculate variance after completing this set of trials */ /* bwdata[n].variance = t2/TRIALS - t1/TRIALS * t1/TRIALS;*/ } else if( args.rcv ) { /* This is the receiver: receive the block TRIALS times, and if we are not streaming, send the block back to the sender. */ for (i = 0; i < (integCheck ? 1 : TRIALS); i++) { if (asyncReceive) { if (args.preburst) { /* We need to save the value of the recv ptr so * we can reset it after we do the preposts, in case * the module needs to use the same ptr values again * so it can wait on the last byte to change to * indicate the recv is finished. */ SaveRecvPtr(&args); for (j=0; j < nrepeat; j++) { PrepareToReceive(&args); if (!args.cache) AdvanceRecvPtr(&args, len_buf_align); } ResetRecvPtr(&args); } else { PrepareToReceive(&args); } } /* Flush the cache using the dummy buffer */ if (!args.cache) flushcache(memcache, MEMSIZE/sizeof(int)); Sync(&args); t0 = When(); for (j = 0; j < nrepeat; j++) { RecvData(&args); if (integCheck) VerifyIntegrity(&args); if (!args.cache) { AdvanceRecvPtr(&args, len_buf_align); } if (!args.preburst && asyncReceive && (j < nrepeat-1)) { PrepareToReceive(&args); } if (!streamopt) { if (integCheck) SetIntegrityData(&args); SendData(&args); if(!args.cache) AdvanceSendPtr(&args, len_buf_align); } } t = (When() - t0)/ nrepeat; if( !streamopt && !args.bidir) t /= 2; /* Normal ping-pong */ Reset(&args); bwdata[n].t = MIN(bwdata[n].t, t); /* t1 += t;*/ /* t2 += t*t;*/ } if (streamopt){ /* Recv proc calcs time and sends to Trans */ SendTime(&args, &bwdata[n].t); /* SendTime(&args, &t1);*/ /* SendTime(&args, &t2);*/ } } else /* Just going along for the ride */ { for (i = 0; i < (integCheck ? 1 : TRIALS); i++) { Sync(&args); } } /* Streaming mode doesn't really calculate correct latencies * for small message sizes, and on some nics we can get * zero second latency after doing the math. Protect against * this. */ if(bwdata[n].t == 0.0) { bwdata[n].t = 0.000001; } tlast = bwdata[n].t; bwdata[n].bits = args.bufflen * CHARSIZE * (1+args.bidir); bwdata[n].bps = bwdata[n].bits / (bwdata[n].t * 1024 * 1024); bwdata[n].repeat = nrepeat; if (args.tr) { if(integCheck) { fprintf(out,"%8d %d", bwdata[n].bits / 8, nrepeat); } else { fprintf(out,"%8d %lf %12.8lf", bwdata[n].bits / 8, bwdata[n].bps, bwdata[n].t); } fprintf(out, "\n"); fflush(out); } /* Free using original buffer addresses since we may have aligned r_buff and s_buff */ if (args.cache) FreeBuff(args.r_buff_orig, NULL); if ( args.tr ) { if(integCheck) { fprintf(stderr, " Integrity check passed\n"); } else { fprintf(stderr," %8.2lf Mbps in %10.2lf usec\n", bwdata[n].bps, tlast*1.0e6); } } } /* End of perturbation loop */ } /* End of main loop */ /* Free using original buffer addresses since we may have aligned r_buff and s_buff */ if (!args.cache) { FreeBuff(args.s_buff_orig, args.r_buff_orig); } if (args.tr) fclose(out); CleanUp(&args); return 0; }