Example #1
0
int main(int argc, char **argv)
{
    FILE        *out;           /* Output data file                          */
    char        s[255],s2[255],delim[255],*pstr; /* Generic strings          */
    int         *memcache;      /* used to flush cache                       */

    int         len_buf_align,  /* meaningful when args.cache is 0. buflen   */
                                /* rounded up to be divisible by 8           */
                num_buf_align;  /* meaningful when args.cache is 0. number   */
                                /* of aligned buffers in memtmp              */

    int         c,              /* option index                              */
                i, j, n, nq,    /* Loop indices                              */
                asyncReceive=0, /* Pre-post a receive buffer?                */
                bufalign=16*1024,/* Boundary to align buffer to              */
                errFlag,        /* Error occurred in inner testing loop      */
                nrepeat,        /* Number of time to do the transmission     */
                nrepeat_const=0,/* Set if we are using a constant nrepeat    */
                len,            /* Number of bytes to be transmitted         */
                inc=0,          /* Increment value                           */
                perturbation=DEFPERT, /* Perturbation value                  */
                pert,
                start= 1,       /* Starting value for signature curve        */
                end=MAXINT,     /* Ending value for signature curve          */
                streamopt=0,    /* Streaming mode flag                       */
                reset_connection;/* Reset the connection between trials      */
   
    ArgStruct   args;           /* Arguments for all the calls               */

    double      t, t0, t1, t2,  /* Time variables                            */
                tlast,          /* Time for the last transmission            */
                latency;        /* Network message latency                   */

    Data        bwdata[NSAMP];  /* Bandwidth curve data                      */

    int         integCheck=0;   /* Integrity check                           */

    /* Initialize vars that may change from default due to arguments */

    strcpy(s, "np.out");   /* Default output file */

    /* Let modules initialize related vars, and possibly call a library init
       function that requires argc and argv */


    Init(&args, &argc, &argv);   /* This will set args.tr and args.rcv */

    args.preburst = 0; /* Default to not bursting preposted receives */
    args.bidir = 0; /* Turn bi-directional mode off initially */
    args.cache = 1; /* Default to use cache */
    args.upper = end;
    args.host  = NULL;
    args.soffset=0; /* default to no offsets */
    args.roffset=0; 
    args.syncflag=0; /* use normal mpi_send */


    /* TCGMSG launches NPtcgmsg with a -master master_hostname
     * argument, so ignore all arguments and set them manually 
     * in netpipe.c instead.
     */

#if ! defined(TCGMSG)

    /* Parse the arguments. See Usage for description */
    while ((c = getopt(argc, argv, "SO:rIiPszgfaB2h:p:o:l:u:b:m:n:t:c:d:D:")) != -1)
    {
        switch(c)
        {
            case 'O':
                      strcpy(s2,optarg);
                      strcpy(delim,",");
                      if((pstr=strtok(s2,delim))!=NULL) {
                         args.soffset=atoi(pstr);
                         if((pstr=strtok((char *)NULL,delim))!=NULL)
                            args.roffset=atoi(pstr);
                         else /* only got one token */
                            args.roffset=args.soffset;
                      } else {
                         args.soffset=0; args.roffset=0;
                      }
                      printf("Transmit buffer offset: %d\nReceive buffer offset: %d\n",args.soffset,args.roffset);
                      break;
            case 'p': perturbation = atoi(optarg);
                      if( perturbation > 0 ) {
                         printf("Using a perturbation value of %d\n\n", perturbation);
                      } else {
                         perturbation = 0;
                         printf("Using no perturbations\n\n");
                      }
                      break;

            case 'B': if(integCheck == 1) {
                        fprintf(stderr, "Integrity check not supported with prepost burst\n");
                        exit(-1);
                      }
                      args.preburst = 1;
                      asyncReceive = 1;
                      printf("Preposting all receives before a timed run.\n");
                      printf("Some would consider this cheating,\n");
                      printf("but it is needed to match some vendor tests.\n"); fflush(stdout);
                      break;

            case 'I': args.cache = 0;
                      printf("Performance measured without cache effects\n\n"); fflush(stdout);
                      break;

            case 'o': memset(s,0,sizeof(s));strncpy(s,optarg,sizeof(s)-1);
                      printf("Sending output to %s\n", s); fflush(stdout);
                      break;

            case 's': streamopt = 1;
                      printf("Streaming in one direction only.\n\n");
#if defined(TCP) && ! defined(INFINIBAND) 
                      printf("Sockets are reset between trials to avoid\n");
                      printf("degradation from a collapsing window size.\n\n");
#endif
                      args.reset_conn = 1;
                      printf("Streaming does not provide an accurate\n");
                      printf("measurement of the latency since small\n");
                      printf("messages may get bundled together.\n\n");
                      if( args.bidir == 1 ) {
                        printf("You can't use -s and -2 together\n");
                        exit(0);
                      }
                      fflush(stdout);
                      break;

            case 'l': start = atoi(optarg);
                      if (start < 1)
                      {
                        fprintf(stderr,"Need a starting value >= 1\n");
                        exit(0);
                      }
                      break;

            case 'u': end = atoi(optarg);
                      break;

#if defined(TCP) && ! defined(INFINIBAND)
            case 'b': /* -b # resets the buffer size, -b 0 keeps system defs */
                      args.prot.sndbufsz = args.prot.rcvbufsz = atoi(optarg);
                      break;
#endif

            case '2': args.bidir = 1;    /* Both procs are transmitters */
                         /* end will be maxed at sndbufsz+rcvbufsz */
                      printf("Passing data in both directions simultaneously.\n");
                      printf("Output is for the combined bandwidth.\n");
#if defined(TCP) && ! defined(INFINIBAND)
                      printf("The socket buffer size limits the maximum test size.\n\n");
#endif
                      if( streamopt ) {
                        printf("You can't use -s and -2 together\n");
                        exit(0);
                      }
                      break;

            case 'h': args.tr = 1;       /* -h implies transmit node */
                      args.rcv = 0;
                      args.host = (char *)malloc(strlen(optarg)+1);
                      strcpy(args.host, optarg);
                      break;

#ifdef DISK
            case 'd': args.tr = 1;      /* -d to specify input/output file */
                      args.rcv = 0;
                      args.prot.read = 0;
                      args.prot.read_type = 'c';
                      args.prot.dfile_name = (char *)malloc(strlen(optarg)+1);
                      strcpy(args.prot.dfile_name, optarg);
                      break;

            case 'D': if( optarg[0] == 'r' )
                         args.prot.read = 1;
                      else
                         args.prot.read = 0;
                      args.prot.read_type = optarg[1];
                      break;
#endif

            case 'i': if(args.preburst == 1) {
                        fprintf(stderr, "Integrity check not supported with prepost burst\n");
                        exit(-1);
                      }
                      integCheck = 1;
                      perturbation = 0;
                      start = sizeof(int)+1; /* Start with integer size */
                      printf("Doing an integrity check instead of measuring performance\n"); fflush(stdout);
                      break;

#if defined(MPI)
            case 'z': args.source_node = -1;
                      printf("Receive using the ANY_SOURCE flag\n"); fflush(stdout);
                      break;

            case 'a': asyncReceive = 1;
                      printf("Preposting asynchronous receives\n"); fflush(stdout);
                      break;

            case 'S': args.syncflag=1;
                      fprintf(stderr,"Using synchronous sends\n");
                      break;
#endif
#if defined(MPI2)
            case 'g': if(args.prot.no_fence == 1) {
                        fprintf(stderr, "-f cannot be used with -g\n");
                        exit(-1);
                      } 
                      args.prot.use_get = 1;
                      printf("Using MPI-2 Get instead of Put\n");
                      break;

            case 'f': if(args.prot.use_get == 1) {
                         fprintf(stderr, "-f cannot be used with -g\n");
                         exit(-1);
                      }
                      args.prot.no_fence = 1;
                      bufalign = 0;
                      printf("Buffer alignment off (Required for no fence)\n");
                      break;
#endif /* MPI2 */

#if defined(INFINIBAND)
            case 'm': switch(atoi(optarg)) {
                        case 256: args.prot.ib_mtu = MTU256;
                          break;
                        case 512: args.prot.ib_mtu = MTU512;
                          break;
                        case 1024: args.prot.ib_mtu = MTU1024;
                          break;
                        case 2048: args.prot.ib_mtu = MTU2048;
                          break;
                        case 4096: args.prot.ib_mtu = MTU4096;
                          break;
                        default: 
                          fprintf(stderr, "Invalid MTU size, must be one of "
                                          "256, 512, 1024, 2048, 4096\n");
                          exit(-1);
                      }
                      break;

            case 't': if( !strcmp(optarg, "send_recv") ) {
                         printf("Using Send/Receive communications\n");
                         args.prot.commtype = NP_COMM_SENDRECV;
                      } else if( !strcmp(optarg, "send_recv_with_imm") ) {
                         printf("Using Send/Receive communications with immediate data\n");
                         args.prot.commtype = NP_COMM_SENDRECV_WITH_IMM;
                      } else if( !strcmp(optarg, "rdma_write") ) {
                         printf("Using RDMA Write communications\n");
                         args.prot.commtype = NP_COMM_RDMAWRITE;
                      } else if( !strcmp(optarg, "rdma_write_with_imm") ) {
                         printf("Using RDMA Write communications with immediate data\n");
                         args.prot.commtype = NP_COMM_RDMAWRITE_WITH_IMM;
                      } else {
                         fprintf(stderr, "Invalid transfer type "
                                 "specified, please choose one of:\n\n"
                                 "\tsend_recv\t\tUse Send/Receive communications\t(default)\n"
                                 "\tsend_recv_with_imm\tSame as above with immediate data\n"
                                 "\trdma_write\t\tUse RDMA Write communications\n"
                                 "\trdma_write_with_imm\tSame as above with immediate data\n\n");
                         exit(-1);
                      }
                      break;

            case 'c': if( !strcmp(optarg, "local_poll") ) {
                         printf("Using local polling completion\n");
                         args.prot.comptype = NP_COMP_LOCALPOLL;
                      } else if( !strcmp(optarg, "vapi_poll") ) {
                         printf("Using VAPI polling completion\n");
                         args.prot.comptype = NP_COMP_VAPIPOLL;
                      } else if( !strcmp(optarg, "event") ) {
                         printf("Using VAPI event completion\n");
                         args.prot.comptype = NP_COMP_EVENT;
                      } else {
                         fprintf(stderr, "Invalid completion type specified, "
                                 "please choose one of:\n\n"
                                 "\tlocal_poll\tWait for last byte of data\t(default)\n"
                                 "\tvapi_poll\tUse VAPI polling function\n"
                                 "\tevent\t\tUse VAPI event handling function\n\n");
                         exit(-1);
                      }
                      break;
#endif

            case 'n': nrepeat_const = atoi(optarg);
                      break;

#if defined(TCP) && ! defined(INFINIBAND)
            case 'r': args.reset_conn = 1;
                      printf("Resetting connection after every trial\n");
                      break;
#endif

            default: 
                     PrintUsage(); 
                     exit(-12);
       }
   }

#endif /* ! defined TCGMSG */

#if defined(INFINIBAND)
   asyncReceive = 1;
   fprintf(stderr, "Preposting asynchronous receives (required for Infiniband)\n");
   if(args.bidir && (
          (args.cache && args.prot.commtype == NP_COMM_RDMAWRITE) || /* rdma_write only works with no-cache mode */
          (!args.preburst && args.prot.commtype != NP_COMM_RDMAWRITE) || /* anything besides rdma_write requires prepost burst */
          (args.preburst && args.prot.comptype == NP_COMP_LOCALPOLL && args.cache) || /* preburst with local polling in cache mode doesn't work */
          0)) {

      fprintf(stderr, 
         "\n"
         "Bi-directional mode currently only works with a subset of the\n"
         "Infiniband options. Restrictions are:\n"
         "\n"
         "  RDMA write (-t rdma_write) requires no-cache mode (-I).\n"
         "\n"
         "  Local polling (-c local_poll, default if no -c given) requires\n"
         "    no-cache mode (-I), and if not using RDMA write communication,\n"
         "    burst mode (-B).\n"
         "\n"
         "  Any other communication type and any other completion type\n"
         "    require burst mode (-B). No-cache mode (-I) may be used\n"
         "    optionally.\n"
         "\n"
         "  All other option combinations will fail.\n"
         "\n");
               
      exit(-1);      

   }
#endif

   if (start > end)
   {
       fprintf(stderr, "Start MUST be LESS than end\n");
       exit(420132);
   }
   args.nbuff = TRIALS;
   args.port = DEFPORT;

   Setup(&args);

   if( args.bidir && end > args.upper ) {
      end = args.upper;
      if( args.tr ) {
         printf("The upper limit is being set to %d Bytes\n", end);
#if defined(TCP) && ! defined(INFINIBAND)
         printf("due to socket buffer size limitations\n\n");
#endif
   }  }

#if defined(GM)

   if(streamopt && (!nrepeat_const || nrepeat_const > args.prot.num_stokens)) {
     printf("\nGM is currently limited by the driver software to %d\n", 
            args.prot.num_stokens);
     printf("outstanding sends. The number of repeats will be set\n");
     printf("to this limit for every trial in streaming mode.  You\n");
     printf("may use the -n switch to set a smaller number of repeats\n\n");

     nrepeat_const = args.prot.num_stokens;
   }

#endif

   if( args.tr )                     /* Primary transmitter */
   {
       if ((out = fopen(s, "w")) == NULL)
       {
           fprintf(stderr,"Can't open %s for output\n", s);
           exit(1);
       }
   }
   else out = stdout;

      /* Set a starting value for the message size increment. */

   inc = (start > 1) ? start / 2 : 1;
   nq = (start > 1) ? 1 : 0;

      /* Test the timing to set tlast for the first test */

   args.bufflen = start;
   MyMalloc(&args, args.bufflen, 0, 0);
   InitBufferData(&args, args.bufflen, 0, 0);

   if(args.cache) args.s_buff = args.r_buff;
   
   args.r_ptr = args.r_buff_orig = args.r_buff;
   args.s_ptr = args.s_buff_orig = args.s_buff;
      
   AfterAlignmentInit(&args);  /* MPI-2 needs this to create a window */

   /* Infiniband requires use of asynchronous communications, so we need
    * the PrepareToReceive calls below
    */
   if( asyncReceive )
      PrepareToReceive(&args);
   
   Sync(&args);    /* Sync to prevent race condition in armci module */

   /* For simplicity's sake, even if the real test below will be done in
    * bi-directional mode, we still do the ping-pong one-way-at-a-time test
    * here to estimate the one-way latency. Unless it takes significantly
    * longer to send data in both directions at once than it does to send data
    * one way at a time, this shouldn't be too far off anyway.
    */
   t0 = When();
      for( n=0; n<100; n++) {
         if( args.tr) {
            SendData(&args);
            RecvData(&args);
            if( asyncReceive && n<99 )
               PrepareToReceive(&args);
         } else if( args.rcv) {
            RecvData(&args);
            if( asyncReceive && n<99 )
               PrepareToReceive(&args);
            SendData(&args);
         }
      }
   tlast = (When() - t0)/200;

   /* Sync up and Reset before freeing the buffers */

   Sync(&args); 

   Reset(&args);
   
   /* Free the buffers and any other module-specific resources. */
   if(args.cache)
      FreeBuff(args.r_buff_orig, NULL);
   else
      FreeBuff(args.r_buff_orig, args.s_buff_orig);

      /* Do setup for no-cache mode, using two distinct buffers. */

   if (!args.cache)
   {

       /* Allocate dummy pool of memory to flush cache with */

       if ( (memcache = (int *)malloc(MEMSIZE)) == NULL)
       {
           perror("malloc");
           exit(1);
       }
       mymemset(memcache, 0, MEMSIZE/sizeof(int)); 

       /* Allocate large memory pools */

       MyMalloc(&args, MEMSIZE+bufalign, args.soffset, args.roffset); 

       /* Save buffer addresses */
       
       args.s_buff_orig = args.s_buff;
       args.r_buff_orig = args.r_buff;

       /* Align buffers */

       args.s_buff = AlignBuffer(args.s_buff, bufalign);
       args.r_buff = AlignBuffer(args.r_buff, bufalign);

       /* Post alignment initialization */

       AfterAlignmentInit(&args);

       /* Initialize send buffer pointer */
       
/* both soffset and roffset should be zero if we don't have any offset stuff, so this should be fine */
       args.s_ptr = args.s_buff+args.soffset;
       args.r_ptr = args.r_buff+args.roffset;
   }

       /**************************
        * Main loop of benchmark *
        **************************/

   if( args.tr ) fprintf(stderr,"Now starting the main loop\n");

   for ( n = 0, len = start, errFlag = 0; 
        n < NSAMP - 3 && tlast < STOPTM && len <= end && !errFlag; 
        len = len + inc, nq++ )
   {

           /* Exponentially increase the block size.  */

       if (nq > 2) inc = ((nq % 2))? inc + inc: inc;
       
          /* This is a perturbation loop to test nearby values */

       for (pert = ((perturbation > 0) && (inc > perturbation+1)) ? -perturbation : 0;
            pert <= perturbation; 
            n++, pert += ((perturbation > 0) && (inc > perturbation+1)) ? perturbation : perturbation+1)
       {

           Sync(&args);    /* Sync to prevent race condition in armci module */

               /* Calculate how many times to repeat the experiment. */

           if( args.tr )
           {
               if (nrepeat_const) {
                   nrepeat = nrepeat_const;
/*               } else if (len == start) {*/
/*                   nrepeat = MAX( RUNTM/( 0.000020 + start/(8*1000) ), TRIALS);*/
               } else {
                   nrepeat = MAX((RUNTM / ((double)args.bufflen /
                                  (args.bufflen - inc + 1.0) * tlast)),TRIALS);
               }
               SendRepeat(&args, nrepeat);
           }
           else if( args.rcv )
           {
               RecvRepeat(&args, &nrepeat);
           }

           args.bufflen = len + pert;

           if( args.tr )
               fprintf(stderr,"%3d: %7d bytes %6d times --> ",
                       n,args.bufflen,nrepeat);

           if (args.cache) /* Allow cache effects.  We use only one buffer */
           {
               /* Allocate the buffer with room for alignment*/

               MyMalloc(&args, args.bufflen+bufalign, args.soffset, args.roffset); 

               /* Save buffer address */

               args.r_buff_orig = args.r_buff;
               args.s_buff_orig = args.r_buff;

               /* Align buffer */

               args.r_buff = AlignBuffer(args.r_buff, bufalign);
               args.s_buff = args.r_buff;
               
               /* Initialize buffer with data
                *
                * NOTE: The buffers should be initialized with some sort of
                * valid data, whether it is actually used for anything else,
                * to get accurate results.  Performance increases noticeably
                * if the buffers are left uninitialized, but this does not
                * give very useful results as realworld apps tend to actually
                * have data stored in memory.  We are not sure what causes
                * the difference in performance at this time.
                */

               InitBufferData(&args, args.bufflen, args.soffset, args.roffset);


               /* Post-alignment initialization */

               AfterAlignmentInit(&args);

               /* Initialize buffer pointers (We use r_ptr and s_ptr for
                * compatibility with no-cache mode, as this makes the code
                * simpler) 
                */
               /* offsets are zero by default so this saves an #ifdef */
               args.r_ptr = args.r_buff+args.roffset;
               args.s_ptr = args.r_buff+args.soffset;

           }
           else /* Eliminate cache effects.  We use two distinct buffers */
           {

               /* this isn't truly set up for offsets yet */
               /* Size of an aligned memory block including trailing padding */

               len_buf_align = args.bufflen;
               if(bufalign != 0)
                 len_buf_align += bufalign - args.bufflen % bufalign;
 
               /* Initialize the buffers with data
                *
                * See NOTE above.
                */
               InitBufferData(&args, MEMSIZE, args.soffset, args.roffset); 
               

               /* Reset buffer pointers to beginning of pools */
               args.r_ptr = args.r_buff+args.roffset;
               args.s_ptr = args.s_buff+args.soffset;
            }

            bwdata[n].t = LONGTIME;
/*            t2 = t1 = 0;*/

            /* Finally, we get to transmit or receive and time */

            /* NOTE: If a module is running that uses only one process (e.g.
             * memcpy), we assume that it will always have the args.tr flag
             * set.  Thus we make some special allowances in the transmit 
             * section that are not in the receive section.
             */

            if( args.tr || args.bidir )
            {
                /*
                   This is the transmitter: send the block TRIALS times, and
                   if we are not streaming, expect the receiver to return each
                   block.
                */

                for (i = 0; i < (integCheck ? 1 : TRIALS); i++)
                {                    
                    if(args.preburst && asyncReceive && !streamopt)
                    {

                      /* We need to save the value of the recv ptr so
                       * we can reset it after we do the preposts, in case
                       * the module needs to use the same ptr values again
                       * so it can wait on the last byte to change to indicate
                       * the recv is finished.
                       */

                      SaveRecvPtr(&args);

                      for(j=0; j<nrepeat; j++)
                      {
                        PrepareToReceive(&args);
                        if(!args.cache)
                          AdvanceRecvPtr(&args, len_buf_align);
                      }

                      ResetRecvPtr(&args);
                    }

                    /* Flush the cache using the dummy buffer */
                    if (!args.cache)
                      flushcache(memcache, MEMSIZE/sizeof(int));

                    Sync(&args);

                    t0 = When();

                    for (j = 0; j < nrepeat; j++)
                    {
                        if (!args.preburst && asyncReceive && !streamopt)
                        {
                            PrepareToReceive(&args);
                        }

                        if (integCheck) SetIntegrityData(&args);

                        SendData(&args);

                        if (!streamopt)
                        {
                            RecvData(&args);

                            if (integCheck) VerifyIntegrity(&args);

                            if(!args.cache)
                              AdvanceRecvPtr(&args, len_buf_align);

                        }
                        
                        /* Wait to advance send pointer in case RecvData uses
                         * it (e.g. memcpy module).
                         */
                        if (!args.cache)
                          AdvanceSendPtr(&args, len_buf_align);

                    }

                       /* t is the 1-directional trasmission time */

                    t = (When() - t0)/ nrepeat;

                    if( !streamopt && !args.bidir) t /= 2; /* Normal ping-pong */

                    Reset(&args);

/* NOTE: NetPIPE does each data point TRIALS times, bouncing the message
 * nrepeats times for each trial, then reports the lowest of the TRIALS
 * times.  -Dave Turner
 */
                    bwdata[n].t = MIN(bwdata[n].t, t);
/*                    t1 += t;*/
/*                    t2 += t*t;*/
                }

                if (streamopt){  /* Get time info from Recv node */
                    RecvTime(&args, &bwdata[n].t);
/*                    RecvTime(&args, &t1);*/
/*                    RecvTime(&args, &t2);*/
                }

                   /* Calculate variance after completing this set of trials */

/*                bwdata[n].variance = t2/TRIALS - t1/TRIALS * t1/TRIALS;*/

            }
            else if( args.rcv )
            {
                /*
                   This is the receiver: receive the block TRIALS times, and
                   if we are not streaming, send the block back to the
                   sender.
                */
                for (i = 0; i < (integCheck ? 1 : TRIALS); i++)
                {
                    if (asyncReceive)
                    {
                       if (args.preburst)
                       {

                         /* We need to save the value of the recv ptr so
                          * we can reset it after we do the preposts, in case
                          * the module needs to use the same ptr values again
                          * so it can wait on the last byte to change to 
                          * indicate the recv is finished.
                          */

                         SaveRecvPtr(&args);

                         for (j=0; j < nrepeat; j++)
                         {
                              PrepareToReceive(&args);
                              if (!args.cache)
                                 AdvanceRecvPtr(&args, len_buf_align);
                         }
                         
                         ResetRecvPtr(&args);
                         
                       }
                       else
                       {
                           PrepareToReceive(&args);
                       }
                      
                    }
                    
                    /* Flush the cache using the dummy buffer */
                    if (!args.cache)
                      flushcache(memcache, MEMSIZE/sizeof(int));

                    Sync(&args);

                    t0 = When();
                    for (j = 0; j < nrepeat; j++)
                    {
                        RecvData(&args);

                        if (integCheck) VerifyIntegrity(&args);

                        if (!args.cache)
                        { 
                            AdvanceRecvPtr(&args, len_buf_align);
                        }
                        
                        if (!args.preburst && asyncReceive && (j < nrepeat-1))
                        {
                            PrepareToReceive(&args);
                        }

                        if (!streamopt)
                        {
                            if (integCheck) SetIntegrityData(&args);
                            
                            SendData(&args);

                            if(!args.cache) 
                              AdvanceSendPtr(&args, len_buf_align);
                        }

                    }
                    t = (When() - t0)/ nrepeat;

                    if( !streamopt && !args.bidir) t /= 2; /* Normal ping-pong */

                    Reset(&args);
                    
                    bwdata[n].t = MIN(bwdata[n].t, t);
/*                    t1 += t;*/
/*                    t2 += t*t;*/
                }
                if (streamopt){  /* Recv proc calcs time and sends to Trans */
                    SendTime(&args, &bwdata[n].t);
/*                    SendTime(&args, &t1);*/
/*                    SendTime(&args, &t2);*/
                }
            }
            else  /* Just going along for the ride */
            {
                for (i = 0; i < (integCheck ? 1 : TRIALS); i++)
                {
                    Sync(&args);
                }
            }

            /* Streaming mode doesn't really calculate correct latencies
             * for small message sizes, and on some nics we can get
             * zero second latency after doing the math.  Protect against
             * this.
             */
            if(bwdata[n].t == 0.0) {
              bwdata[n].t = 0.000001;
            }
            
            tlast = bwdata[n].t;
            bwdata[n].bits = args.bufflen * CHARSIZE * (1+args.bidir);
            bwdata[n].bps = bwdata[n].bits / (bwdata[n].t * 1024 * 1024);
            bwdata[n].repeat = nrepeat;
            
            if (args.tr)
            {
                if(integCheck) {
                  fprintf(out,"%8d %d", bwdata[n].bits / 8, nrepeat);

                } else {
                  fprintf(out,"%8d %lf %12.8lf",
                        bwdata[n].bits / 8, bwdata[n].bps, bwdata[n].t);

                }
                fprintf(out, "\n");
                fflush(out);
            }
    
            /* Free using original buffer addresses since we may have aligned
               r_buff and s_buff */

            if (args.cache)
                FreeBuff(args.r_buff_orig, NULL);
            
            if ( args.tr ) {
               if(integCheck) {
                 fprintf(stderr, " Integrity check passed\n");

               } else {
                 fprintf(stderr," %8.2lf Mbps in %10.2lf usec\n", 
                         bwdata[n].bps, tlast*1.0e6);
               }
            }


        } /* End of perturbation loop */

    } /* End of main loop  */
 
   /* Free using original buffer addresses since we may have aligned
      r_buff and s_buff */

   if (!args.cache) {
        FreeBuff(args.s_buff_orig, args.r_buff_orig);
   }
    if (args.tr) fclose(out);
         
    CleanUp(&args);
    return 0;
}
void CPlainGraphicalTextEditor::DeleteLine(void)
{
	mcEditor.DeleteLine();
	Sync();
}
void CPlainGraphicalTextEditor::HomeEnter(void)
{
	mcEditor.HomeEnter();
	Sync();
}
void CPlainGraphicalTextEditor::Paste(char* szSource)
{
	mcEditor.Paste(szSource);
	Sync();
}
void CPlainGraphicalTextEditor::Duplicate(void)
{
	mcEditor.Duplicate();
	Sync();
}
void CPlainGraphicalTextEditor::SelectWholeWord(void)
{
	mcEditor.SelectWholeWord();
	Sync();
}
void CPlainGraphicalTextEditor::ClearSelection(void)
{
	mcEditor.ClearSelection();
	Sync();
}
Example #8
0
void RecvRepeat(ArgStruct *p, int *rpt)
{
    /* Only child calls RecvRepeat */
    Sync(p);
    *rpt = my_shared->repeat;
}
Example #9
0
void Setup(ArgStruct *p)
{
    pid_t pid;
    struct knem_cmd_info info;
    char *filename = (char*) "NPknem.mmap";
    size_t pagesize = sysconf(_SC_PAGESIZE);
    int need_fork = 0, am_parent = 0, flags = O_RDWR;

    /* If no filename was specified, then we're the parent until we
       fork */
    if (NULL == p->host) {
        am_parent = 1;
        need_fork = 1;
        unlink(filename);
    } 
    /* If the file was specified, see if the file exists.  If it does,
       then we're the parent.  Otherwise, we're the "child". */
    else {
        int ret;
        struct stat sbuf;

        filename = p->host;
        need_fork = 0;

        ret = stat(filename, &sbuf);
        if (ret < 0 && errno == ENOENT) {
            am_parent = 1;
        }
    }

    /* Open a file so that we can mmap it */
    if (am_parent) {
        flags |= O_CREAT;
        printf("NPknem: creating shared file: %s\n", filename);
    } else {
        printf("NPknem: attaching to shared file: %s\n", filename);
    }
    shared_fd = open(filename, flags, 0600);
    if (shared_fd < 0) {
        fprintf(stderr, "Netpipe failed to create a file to mmap.  Aborting in despair.\n");
        exit(1);
    }
    shared_len = pagesize * 2;
    if (0 != ftruncate(shared_fd, shared_len)) {
        fprintf(stderr, "Netpipe failed to set the size of a shared file.  Aborting in despair.\n");
        exit(1);
    }

    /* Now fork the child and reset transmitter / receiver flags as
       appropriate */
    if (need_fork) {
        pid = fork();
        if (0 == pid) {
            am_parent = 0;
        } else {
            am_parent = 1;
        }
    }
    if (am_parent) {
        p->rcv = 0;
        p->tr = 1;
    } else {
        p->rcv = 1;
        p->tr = 0;
    }

    /* Map an anonymous shared memory segment between the two
       processes for passing cookies, etc. */
    shared_map = mmap(NULL, shared_len, (PROT_READ | PROT_WRITE), MAP_SHARED, 
                  shared_fd, 0);
    if (MAP_FAILED == shared_map) {
        if (am_parent || !need_fork) {
            perror("mmap");
            fprintf(stderr, "Netpipe failed to mmap a shared file.  Aborting in despair.\n");
        }
        exit(1);
    }

    /* Once we have both mmaped the file, parent unlinks the file so
       that we don't deal with filesystem synchronization.  Parent's
       data is the first page; child's data is the second.  This
       way we can pin them down to our local memory via "first touch"
       method. */
    if (am_parent) {
        my_shared = (shared_data_t*) shared_map;
        peer_shared = (shared_data_t*) (shared_map + pagesize);
    } else {
        peer_shared = (shared_data_t*) shared_map;
        my_shared = (shared_data_t*) (shared_map + pagesize);
    }
    memset((void*) my_shared, 0, sizeof(*my_shared));
    Sync(p);
    if (am_parent){ 
        unlink(filename);
    }

    /* See if we can find /dev/knem */
    knem_fd = open(KNEM_DEVICE_FILENAME, O_RDWR);
    if (knem_fd < 0) {
        if (am_parent || !need_fork) {
            fprintf(stderr, "Netpipe cannot open %s on this host.  Aborting in despair.\n", KNEM_DEVICE_FILENAME);
        }
        exit(1);
    }

    /* Check that we're compiled against the same ABI version that
       we're running with */
    if (ioctl(knem_fd, KNEM_CMD_GET_INFO, &info) < 0) {
        if (am_parent || !need_fork) {
            fprintf(stderr, "Netpipe failed to get ABI version from %s.  Aborting in despair.\n", KNEM_DEVICE_FILENAME);
        }
        exit(1);
    }

#if KNEM_ABI_VERSION < 0x0000000c
#error KNEM is too old, pleasse upgrade to 0.7 or later
#endif

    if (KNEM_ABI_VERSION != info.abi) {
        if (am_parent || !need_fork) {
            fprintf(stderr, "The knem ABI that Netpipe was compiled against is different than that of %s.  Aborting in despair.\n", KNEM_DEVICE_FILENAME);
        }
        exit(1);
    }

    /* If DMA was asked for, ensure that it's available */
    if (0 != p->prot.flags && 0 == (info.features & KNEM_FEATURE_DMA)) {
        if (am_parent || !need_fork) {
            fprintf(stderr, "DMA copy mode was requested but is not supported in the running kernel.  Aborting in despair.\n");
        }
        exit(1);
    }
}   
Example #10
0
static DECLFW(M179WriteLo)
{
  if(A==0x5ff1) reg[1]=V;
  Sync();
}
Example #11
0
void SendData(ArgStruct *p)
{
    struct knem_cmd_param_iovec knem_iov = {
        .base = (uintptr_t) p->s_ptr,
        .len = p->bufflen
    };
    struct knem_cmd_create_region createregioncmd = {
        .iovec_array = (uintptr_t) &knem_iov,
        .iovec_nr = 1,
        .flags = KNEM_FLAG_SINGLEUSE,
        .protection = PROT_READ,
    };

    /* Create the region */
    if (0 != ioctl(knem_fd, KNEM_CMD_CREATE_REGION, &createregioncmd)) {
        fprintf(stderr, "Netpipe failed to KNEM_CMD_CREATE_REGION.  Aborting in despair.\n");
        exit(1);
    }

    /* Send the cookie to the peer */
    peer_shared->cookies[peer_cookie_index] = createregioncmd.cookie;
    peer_cookie_index = (0 == peer_cookie_index) ? 1 : 0;
}

void RecvData(ArgStruct *p)
{
    uint64_t cookie;

    struct knem_cmd_param_iovec knem_iov = {
        .base = (uintptr_t) p->r_ptr,
        .len = p->bufflen
    };
    struct knem_cmd_inline_copy icopycmd = {
        .local_iovec_array = (uintptr_t) &knem_iov,
        .local_iovec_nr = 1,
        .write = 0,
        .async_status_index = 0,
        .flags = p->prot.flags
    };

    /* Wait for the sender to set my cookie */
    while (0 == (cookie = my_shared->cookies[my_cookie_index])) {
        continue;
    }
    icopycmd.remote_cookie = cookie;
    icopycmd.remote_offset = 0;
    my_shared->cookies[my_cookie_index] = 0;
    my_cookie_index = (0 == my_cookie_index) ? 1 : 0;

    /* Initiate the receive (synchronous mode) */
    if (0 != ioctl(knem_fd, KNEM_CMD_INLINE_COPY, &icopycmd)) {
        fprintf(stderr, "Netpipe failed to KNEM_CMD_INLINE_COPY.  Aborting in despair.\n");
        exit(1);
    }

    if (icopycmd.current_status != KNEM_STATUS_SUCCESS) {
        fprintf(stderr, "Netpipe failed to complete KNEM_CMD_INLINE_COPY.  Aborting in despair.\n");
        exit(1);
    }
}

void SendTime(ArgStruct *p, double *t)
{
    /* Only child calls SendTime */
    peer_shared->time = *t;
    Sync(p);
}

void RecvTime(ArgStruct *p, double *t)
{
    /* Only parent calls RecvTime */
    Sync(p);
    *t = my_shared->time;
}

void SendRepeat(ArgStruct *p, int rpt)
{
    /* Only parent calls SendRepeat */
    peer_shared->repeat = rpt;
    Sync(p);
}
Example #12
0
static DECLFW(M179Write)
{
  if(A==0xa000) reg[0]=V;
  Sync();
}
Example #13
0
static DECLFW(UNLDSOUNDV1Write) {
    reg[A & 3] = V;
//	FCEU_printf("cmd %04x:%02x\n", A, V);
    Sync();
}
Example #14
0
static DECLFW(WriteHi)
{
 PRGBank16 = V;
 Sync();
}
void CPlainGraphicalTextEditor::SelectDocumentEnd(void)
{
	mcEditor.SelectDocumentEnd();
	Sync();
}
Example #16
0
static DECLFW(M178Write) {
	reg[A & 3] = V;
	Sync();
}
void CPlainGraphicalTextEditor::SelectPageDown(void)
{
	mcEditor.SelectPageDown();
	Sync();
}
Example #18
0
static void StateRestore(int version) {
	Sync();
}
void CPlainGraphicalTextEditor::SelectAll(void)
{
	mcEditor.SelectAll();
	Sync();
}
void CPlainGraphicalTextEditor::Backspace(void)
{
	mcEditor.Backspace();
	Sync();
}
void CPlainGraphicalTextEditor::Copy(CChars* pszDest)
{
	mcEditor.Copy(pszDest);
	Sync();
}
void CPlainGraphicalTextEditor::Printable(char c, BOOL bInsert)
{
	mcEditor.Printable(c, bInsert);
	Sync();
}
void CPlainGraphicalTextEditor::CutLine(CChars* pszDest)
{
	mcEditor.CutLine(pszDest);
	Sync();
}
void CPlainGraphicalTextEditor::DocumentHome(void)
{
	mcEditor.DocumentHome();
	Sync();
}
void CPlainGraphicalTextEditor::DeleteWordRemainingLeft(void)
{
	mcEditor.DeleteWordRemainingLeft();
	Sync();
}
void CPlainGraphicalTextEditor::PageDown(void)
{
	mcEditor.PageDown();
	Sync();
}
void CPlainGraphicalTextEditor::EndEnter(void)
{
	mcEditor.EndEnter();
	Sync();
}
void CPlainGraphicalTextEditor::SelectWordRight(void)
{
	mcEditor.SelectWordRight();
	Sync();
}
void CPlainGraphicalTextEditor::Space(BOOL bInsert)
{
	mcEditor.Space(bInsert);
	Sync();
}
Example #30
0
		void AsyncRenderer::SetGameMap(GameMap *gm) {
			SPADES_MARK_FUNCTION();
			rcmds::SetGameMap *cmd = generator->AllocCommand<rcmds::SetGameMap>();
			cmd->map = gm;
			Sync();
		}