Esempio n. 1
0
int RCCE_APP(int argc, char **argv){
  int YOU, ME, nrounds = 1024*1024, actualrounds, size, N=32, round, pair, index;
  int bigsize, subindex, roundsize;
  double timer;
  char buffer[1024*1024*4];

  RCCE_init(&argc, &argv);

  //  RCCE_debug_set(RCCE_DEBUG_ALL);
  ME = RCCE_ue();
  YOU = !ME;

  if (argc>1) nrounds = atoi(*++argv);
  if (nrounds<1) {
    if (!ME) printf("Pingpong needs at least 1 round; try again\n");
    return(1);
  }
  if (RCCE_num_ues() != 2) {
    if (!ME) printf("Pingpong needs at two UEs; try again\n");
    return(1);
  }

  bigsize = 32;
  for (index=0; index<17; index++) {
    size = bigsize;
    for (subindex=0; subindex<4; subindex++) {

      roundsize = max(32,size - size%32);
      // synchronize before starting the timer
      RCCE_barrier(&RCCE_COMM_WORLD);
      timer = RCCE_wtime();
    
      actualrounds = max(10,(nrounds*32)/roundsize);
      for (round=0; round <actualrounds; round++) {
        if (ME)  {
          RCCE_send(buffer, roundsize, YOU);
          RCCE_recv(buffer, roundsize, YOU);
        } 
        else {
          RCCE_recv(buffer, roundsize, YOU);
          RCCE_send(buffer, roundsize, YOU);
        }
      }
      timer = RCCE_wtime()-timer;

      if (ME) printf("%d  %1.9lf\n", roundsize, timer/actualrounds);
      size *= 1.18920712;
   
    }

    bigsize *= 2;

  }

  RCCE_finalize();

  return(0);
}
Esempio n. 2
0
void timer_start(int np) {

      int n = np;

      start_time(n) = RCCE_wtime();

      return;
}
Esempio n. 3
0
void timer_start(int *np) {

      int n = *np;

      start(n) = RCCE_wtime();

      return;
}
Esempio n. 4
0
/*
 * Seeding the rand()
 */
void
srand_core()
{
    double timed_ = RCCE_wtime();
    unsigned int timeprfx_ = (unsigned int) timed_;
    unsigned int time_ = (unsigned int) ((timed_ - timeprfx_) * 1000000);
    srand(time_ + (13 * (RCCE_ue() + 1)));
}
Esempio n. 5
0
void timer_stop(int *np) {

      int n = *np;

      double t, now;
      now = RCCE_wtime();
      t = now - start(n);
      elapsed(n) = elapsed(n) + t;

      return;
}
Esempio n. 6
0
int RCCE_send(char *privbuf, size_t size, int dest)
{
#ifdef MEASURE_TIME
    double send_start = RCCE_wtime();
#endif

    if (dest<0 || dest >= RCCE_NP) {
        return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
    }

    errval_t err = send_message(privbuf, size, RC_COREID[dest]);
    assert(err_is_ok(err));

#ifdef MEASURE_TIME
    measure_rcce_time += RCCE_wtime() - send_start;
#endif

#ifdef MEASURE_DATA
    measure_rcce_data[rcce_curphase][dest] += size;
#endif

    return (RCCE_SUCCESS);
}
Esempio n. 7
0
int RCCE_finalize(void)
{
#ifdef MEASURE_TIME
    double measure_end = RCCE_wtime();
    printf("%d: Time spent in RCCE communication %.5g seconds. "
           "%.5g seconds total program run-time.\n", RCCE_ue(),
           measure_rcce_time, measure_end - measure_start);
#endif

#ifdef MEASURE_DATA
    for(int phase = 0; phase < MAX_PHASES; phase++) {
        printf("%d: Phase %d: ", RCCE_ue(), phase);
        for(int i = 0; i < RCCE_NP; i++) {
            printf("%lu ", measure_rcce_data[phase][i]);
        }
        printf("\n");
    }
#endif

    return (RCCE_SUCCESS);
}
Esempio n. 8
0
int RCCE_APP(int argc, char **argv)
{
  int i;
  int num_ranks;
  int remote_rank, my_rank;
  int numrounds = NUMROUNDS;
  int maxlen    = DEFAULTLEN;
  int length;
  int round;
  double timer;
  RCCE_SEND_REQUEST send_request;
  RCCE_RECV_REQUEST recv_request;

  RCCE_init(&argc, &argv);

  my_rank   = RCCE_ue();
  num_ranks = RCCE_num_ues();

  if(argc > 1) numrounds = atoi(argv[1]);

  if(numrounds < 1)
  {
    if(my_rank == 0) fprintf(stderr, "Pingping needs at least 1 round; try again\n");
    exit(-1);
  }

  if(argc > 2) maxlen = atoi(argv[2]);

  if(maxlen < 1)
  {
    if(my_rank == 0) fprintf(stderr, "Illegal message size: %s; try again\n", argv[2]);
    exit(-1);
  }
  else if(maxlen > MAXBUFSIZE)
  {
    if(my_rank == 0) fprintf(stderr, "Message size %d is too big; try again\n", maxlen);
    exit(-1);
  }

  if(num_ranks != 2)
  {
    if(my_rank == 0) fprintf(stderr, "Pingping needs exactly two UEs; try again\n");
    exit(-1);
  }

  remote_rank = (my_rank + 1) % 2;
 
  if(my_rank == 0) printf("#bytes\t\tusec\t\tMB/sec\n");

  for(length=1; length <= maxlen; length*=2)
  {
#ifdef _CACHE_WARM_UP_
    for(i=0; i < length; i++)
    {
      /* cache warm-up: */
      dummy += send_buffer[i];  
      dummy += recv_buffer[i];
    }
#endif
    
    /* synchronize before starting PING-PING: */
    RCCE_barrier(&RCCE_COMM_WORLD);
    
    for(round=0; round < numrounds+1; round++)
    {

#ifdef _ERROR_CHECK_
      for(i=0; i < length; i++)
      {
	send_buffer[i] = (i+length+round) % 127;
      }
#endif

      /* send PING: */
      RCCE_isend(send_buffer, length, remote_rank, &send_request);

      /* recv PING: */
      RCCE_irecv(recv_buffer, length, remote_rank, &recv_request);
      
      /* wait for completion: */
      RCCE_isend_wait(&send_request);
      RCCE_irecv_wait(&recv_request);
      
      /* start timer: */
      if(round==0) timer = RCCE_wtime();

#ifdef _ERROR_CHECK_
      for(i=0; i < length; i++)
      {
	if(recv_buffer[i] != (i+length+round) % 127)
	{
	  fprintf(stderr, "ERROR: %d VS %d\n", recv_buffer[i], (i+length+round) % 127);
	  exit(-1);
	}
      }
#endif
    }
    
    /* stop timer: */
    timer = RCCE_wtime() - timer;
    
    if(my_rank == 0) printf("%d\t\t%1.2lf\t\t%1.2lf\n", length, timer/(numrounds)*1000000, (length / (timer/(numrounds))) / (1024*1024) );
  }

  RCCE_finalize();

  return 0;
}
Esempio n. 9
0
int RCCE_APP(int argc, char **argv){

  float     a[NXNY];
  int       i, offset, iter=3;
  int       fdiv, vlevel;
  int       ID, ID_right, ID_left;
  int       NTILES1;
  double    time;
  RCCE_REQUEST req;

  RCCE_init(&argc, &argv);
 
  //  RCCE_debug_set(RCCE_DEBUG_ALL);

  NTILES1 = RCCE_num_ues()-1;
  ID = RCCE_ue();


  ID_right = (ID+1)%RCCE_num_ues();
  ID_left = (ID-1+RCCE_num_ues())%RCCE_num_ues();

// set the relevant areas of the board to the default frequency and voltage
  RCCE_set_frequency_divider(8, &fdiv);
  if (ID==0)print_dividers();

  //  return(0);
  //    RCCE_iset_power(3, &req, &fdiv, &vlevel);
  //  if (ID==RCCE_power_domain_master()) printf("UE %d computed vlevel %d\n", ID,vlevel);
  //  RCCE_wait_power(&req);
  //  RCCE_set_frequency_divider(3, &fdiv);

  if (NX%8) {
    printf("Grid width should be multiple of 8: %d\n", NX);
    exit(1);
  }
  if (argc>1) iter=atoi(*++argv);
  if (!ID) printf("Core %d Executing %d iterations\n", ID, iter);

  /* initialize array a on all tiles; this stuffs a into private caches  */

  for (offset=0,       i=0; i<NXNY; i++) a[i+offset] = 0.0;
  if (ID == 0) 
     for (offset=0,    i=0; i<NX;   i++) a[i+offset] = 1.0;
  if (ID == NTILES1) 
     for (offset=NXNY1,i=0; i<NX;   i++) a[i+offset] = 2.0;

  /* main loop */

  if (ID==0) time = RCCE_wtime();

  while ((iter--)>0){

    RCCE_iset_power(3, &req, &fdiv, &vlevel);
    if (ID==RCCE_power_domain_master()) 
      printf("asked for divider 3, received %d, voltage level %d\n", fdiv, vlevel); 
    fflush(NULL);
      if (!(iter%100)) printf("Iteration %d\n", iter);
    /* start with copying fringe data to neighboring tiles; we need to
       group semantic send/recv pairs together to avoid deadlock         */
    if (ID_right!=0) RCCE_send((char*)(&a[NXNY2]), NX*sizeof(float), ID_right);
    if (ID != 0)     RCCE_recv((char*)(&a[0]),     NX*sizeof(float), ID_left);

    RCCE_wait_power(&req);
    if (ID!=0)       RCCE_send((char *)(&a[NX]),    NX*sizeof(float), ID_left);
    if (ID_right!=0) RCCE_recv((char *)(&a[NXNY1]), NX*sizeof(float), ID_right);

    RCCE_iset_power(3, &req, &fdiv, &vlevel);
    RCCE_set_frequency_divider(3, &fdiv);

    if (ID==RCCE_power_domain_master())    
      printf("asked for divider 3, received %d, voltage level %d\n", fdiv, vlevel);
    fflush(NULL);

    /* apply the stencil operation                                       */
    for (i=0; i<NXNY2; i++) {
      a[i+O3] +=
         W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5];
    }
    RCCE_wait_power(&req);
  }


//  /* print result strip by strip; this would not be done on RC */
//  for (int id=0; id<=NTILES1; id++) {
//    RCCE_barrier(&RCCE_COMM_WORLD);
//    if (ID==id) {
//      int start = NX; int end = NXNY1;
//      if (ID==0) start = 0;
//      if (ID == NTILES1) end = NXNY;
//      for (offset=0, i=start; i<end; i++) {
//        if (!(i%NX)) printf("\n");
////        comment out next line and uncomment subsequent three to print error
//        printf("%1.5f ",a[i+offset]); fflush(stdout);
////        int jj=i/NX+(ID*(NY-1));
////        double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1));
////        printf("%f ",a[i+offset]-aexact);
//      }
//    }
//  }
//  RCCE_barrier(&RCCE_COMM_WORLD);
//  if (ID==0) { 
//    printf("\n");
//    time = RCCE_wtime()-time;
//    printf("Total time: %lf\n", time);
//  }

//reset the relevant areas of the board to the default frequency and voltage
//  RCCE_set_frequency_divider(8, &fdiv);
//  RCCE_iset_power(2, &req, &fdiv, &vlevel);
//  if (ID==RCCE_power_domain_master()) printf("UE %d computed vlevel %d\n", ID,vlevel);
//  RCCE_wait_power(&req);

//  RCCE_set_frequency_divider(3, &fdiv);
  RCCE_barrier(&RCCE_COMM_WORLD);
  if (ID==0)print_dividers();

  RCCE_finalize();

  return(0);
}
Esempio n. 10
0
double MPI_Wtime(void) {
  // Somehow, this does not work; must replace MPI_Wtime with RCCE_wtime directly  
  return(RCCE_wtime());
}
Esempio n. 11
0
int RCCE_init(int *argc, char ***argv)
{
    int ue;
    void *nothing = NULL;

    assert(*argc >= 3);

    setup_routes(*argc, *argv);

    // save pointer to executable name for later insertion into the argument list
    char *executable_name = (*argv)[0];

    RCCE_NP        = atoi(*(++(*argv)));
    RC_REFCLOCKGHZ = atof(*(++(*argv)));

    if(RC_REFCLOCKGHZ == 0) {
        printf("Barrelfish RCCE extension: Computing reference clock GHz automatically...\n");
        uint64_t tscperms;
        errval_t err = sys_debug_get_tsc_per_ms(&tscperms);
        assert(err_is_ok(err));
        RC_REFCLOCKGHZ = ((double)tscperms) / 1000000.0;
        printf("Reference clock computed to be %.2g\n", RC_REFCLOCKGHZ);
    }

    // put the participating core ids (unsorted) into an array
    for (ue=0; ue<RCCE_NP; ue++) {
        RC_COREID[ue] = atoi(*(++(*argv)));
    }

    // make sure executable name is as expected
    (*argv)[0] = executable_name;

    RC_MY_COREID = MYCOREID();

    // adjust apparent number of command line arguments, so it will appear to main
    // program that number of UEs, clock frequency, and core ID list were not on
    // command line
    *argc -= RCCE_NP+2;

    // sort array of participating phyical core IDs to determine their ranks
    qsort((char *)RC_COREID, RCCE_NP, sizeof(int), id_compare);

    // determine rank of calling core
    for (ue=0; ue<RCCE_NP; ue++) {
        if (RC_COREID[ue] == RC_MY_COREID) RCCE_IAM = ue;
    }

    // leave in one reassuring debug print
    printf("My rank is %d, physical core ID is %d\n", RCCE_IAM, RC_MY_COREID);
    if (RCCE_IAM<0) {
        return(RCCE_ERROR_CORE_NOT_IN_HOSTFILE);
    }

    // create global communicator (equivalent of MPI_COMM_WORLD); this will also allocate
    // the two synchronization flags associated with the global barrier
    RCCE_comm_split(RCCE_global_color, nothing, &RCCE_COMM_WORLD);

#ifdef MEASURE_TIME
    measure_start = RCCE_wtime();
    measure_rcce_time = 0.0;
#endif

#ifdef MEASURE_DATA
    memset(measure_rcce_data, 0, sizeof(measure_rcce_data));
#endif

    return (RCCE_SUCCESS);
}
Esempio n. 12
0
int RCCE_recv(char *privbuf, size_t size, int source)
{
    errval_t err;
#ifdef MEASURE_TIME
    double recv_start = RCCE_wtime();
#endif

#ifdef RCCE_PERF_MEASURE
    dispatcher_handle_t handle = curdispatcher();
    struct dispatcher_shared_generic* d =
        get_dispatcher_shared_generic(handle);
#endif

    if (source<0 || source >= RCCE_NP) {
        return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));
    }

    int core_id = RC_COREID[source];
    struct msg_buf *mb = &msgbuf[core_id];
#ifdef BULK_TRANSFER_ENABLED
    mb->bulk_ready = true;
    mb->length = size;
    mb->current = 0;
    mb->msg = privbuf;
#endif

    dprintf("%d: R(%lu,%d,%p,%d,%p)\n", my_core_id, size, source, mb, mb->pending, privbuf);

#ifdef BULK_TRANSFER_ENABLED
    err = barray[core_id]->tx_vtbl.bulk_recv_ready(barray[core_id], NOP_CONT,
            my_core_id, size);
    assert(err_is_ok(err));
#endif

    PERF(30);

    while(!mb->pending) {
        messages_wait_and_handle_next();
    }

    PERF(31);

    dprintf("%d: msg arrived\n", my_core_id);

    /* if(size <= DEFAULT_UMP_BUFLEN) { */
#ifndef BULK_TRANSFER_ENABLED
    assert(size == mb->length);
    memcpy(privbuf, mb->msg, size);
    /* } else { */
#else
    assert(mb->bulk);
#endif
    /* } */
    mb->pending = false;

#ifndef BULK_TRANSFER_ENABLED
    assert(!mb->bulk);
    free(mb->msg);
    PERF(32);
    err = barray[core_id]->tx_vtbl.message_reply(barray[core_id],
            NOP_CONT, my_core_id);
    PERF(33);
    assert(err_is_ok(err));
#else
    assert(mb->bulk);
#endif

#ifdef MEASURE_TIME
    measure_rcce_time += RCCE_wtime() - recv_start;
#endif

    return (RCCE_SUCCESS);
}
Esempio n. 13
0
int RCCE_APP(int argc, char **argv) {

  /* statically allocated space sits in off-chip private memory          */
  float     a[NXNY], *buff;
  int       i, offset, iter=10, tile;
  int       MY_ID;
  int       NTILES1;
  double    time;
  RCCE_FLAG flag0, flag1;

  RCCE_init(&argc, &argv);
  
  NTILES1 = RCCE_num_ues()-1;
  MY_ID = RCCE_ue();

  if (NX%8) {
    printf("Grid width should be multiple of 8: %d\n", NX);
    exit(1);
  }
  if (argc>1) iter=atoi(*++argv);
  if (MY_ID==0) printf("Executing %d iterations\n", iter);

    /* allocate space on the comm buffer                                 */
  buff = (float *) RCCE_malloc(sizeof(float)*2*NX); 
  /* Allocate flags to coordinate comm.                                  */                                 
  if (RCCE_flag_alloc(&flag0)) return(1);
  if (RCCE_flag_alloc(&flag1)) return(1);

  /* initialize array a on all tiles; this stuffs a into private caches  */
  for (offset=0,       i=0; i<NXNY; i++) a[i+offset] = 0.0;
  if (MY_ID == 0) 
     for (offset=0,    i=0; i<NX;   i++) a[i+offset] = 1.0;
  if (MY_ID == NTILES1) 
     for (offset=NXNY1,i=0; i<NX;   i++) a[i+offset] = 2.0;

  /* put in a barrier so everybody can be sure to have initialized       */
  RCCE_barrier(&RCCE_COMM_WORLD);

  /* main loop */

  if (MY_ID==0) time = RCCE_wtime();

  while ((iter--)>0){
  
    /* start with copying fringe data to neighboring tiles               */
    if (MY_ID!=NTILES1) {
      /* Initialize neighbor flag to zero                                */
      RCCE_flag_write(&flag0, RCCE_FLAG_UNSET, MY_ID+1); 
      /* copy private data to shared comm buffer of neighbor             */
      RCCE_put((t_vcharp)(&buff[0]), (t_vcharp)(&a[NXNY2]), NX*sizeof(float), MY_ID+1);
      RCCE_flag_write(&flag0, RCCE_FLAG_SET, MY_ID+1); 
    }
    if (MY_ID != 0) {
      /* Initialize neighbor flag to zero                                */
      RCCE_flag_write(&flag1, 0, MY_ID-1); 
      /* copy private data to shared comm buffer of neighbor             */
      RCCE_put((t_vcharp)(&buff[NX]), (t_vcharp)(&a[NX]), NX*sizeof(float), MY_ID-1);
      RCCE_flag_write(&flag1, RCCE_FLAG_SET, MY_ID-1); 
    }

    /* Make sure the data has been recvd and copy data out of buffer(s)  */
    if (MY_ID!=NTILES1) {
      RCCE_wait_until(flag1, RCCE_FLAG_SET);
      RCCE_get((t_vcharp)(&a[NXNY1]), (t_vcharp)(&buff[NX]), NX*sizeof(float),MY_ID);
    }

    if (MY_ID!=0) {
      RCCE_wait_until(flag0, RCCE_FLAG_SET);
      RCCE_get((t_vcharp)(&a[0]), (t_vcharp)(&buff[0]), NX*sizeof(float),MY_ID);
    }

    /* apply the stencil operation                                       */
    for (i=0; i<NXNY2; i++) {
      a[i+O3] +=
         W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5];
    }
  }
  RCCE_barrier(&RCCE_COMM_WORLD);
  if (MY_ID==0) { 
    time = RCCE_wtime()-time;
  }

  /* print result strip by strip; this would not be done on RC */
  for (int id=0; id<=NTILES1; id++) {
    RCCE_barrier(&RCCE_COMM_WORLD);
    if (MY_ID==id) {
      int start = NX; int end = NXNY1;
      if (MY_ID==0) start = 0;
      if (MY_ID == NTILES1) end = NXNY;
      for (offset=0, i=start; i<end; i++) {
        if (!(i%NX)) printf("\n");
//        comment out next line and uncomment subsequent three to print error
        printf("%f ",a[i+offset]);
//        int jj=i/NX+(MY_ID*(NY-1));
//        double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1));
//        printf("%f ",a[i+offset]-aexact);
      }
    }
  }
  RCCE_barrier(&RCCE_COMM_WORLD);
  if (MY_ID==0) { 
    printf("\nTotal time: %lf\n", time);
  }

  RCCE_finalize();

  return(0);
}