Example #1
0
int RCCE_APP(int argc, char **argv){

  RCCE_REQUEST req;
  int fdiv, fdiv_in, vlevel, ID;

  RCCE_init(&argc, &argv);

  if (argc==2) fdiv_in=atoi(*++argv);  
  else return(0);
  
  ID = RCCE_ue();

  if (ID==RCCE_power_domain_master()) {
    if(RCCE_iset_power(fdiv_in, &req, &fdiv, &vlevel)) {
      printf("UE %d failed ISET_POWER %d\n", ID, fdiv_in);
      fflush(NULL);
    }
    else {
      printf("Requested fdiv: %d, actual fdiv, vlevel: %d, %d\n", 
             fdiv_in, fdiv, vlevel);
      fflush(NULL);
    }
    if (RCCE_wait_power(&req)) {
      printf("UE %d failed WAIT_POWER %d\n", ID, fdiv_in);
      fflush(NULL);
    }
  }
  
  RCCE_barrier(&RCCE_COMM_WORLD);

  if (ID==0)print_dividers();
  RCCE_finalize();

  return(0);
}
Example #2
0
int RCCE_APP(int argc, char **argv){
  int YOU, ME, nrounds = 1024*1024, actualrounds, size, N=32, round, pair, index;
  int bigsize, subindex, roundsize;
  double timer;
  char buffer[1024*1024*4];

  RCCE_init(&argc, &argv);

  //  RCCE_debug_set(RCCE_DEBUG_ALL);
  ME = RCCE_ue();
  YOU = !ME;

  if (argc>1) nrounds = atoi(*++argv);
  if (nrounds<1) {
    if (!ME) printf("Pingpong needs at least 1 round; try again\n");
    return(1);
  }
  if (RCCE_num_ues() != 2) {
    if (!ME) printf("Pingpong needs at two UEs; try again\n");
    return(1);
  }

  bigsize = 32;
  for (index=0; index<17; index++) {
    size = bigsize;
    for (subindex=0; subindex<4; subindex++) {

      roundsize = max(32,size - size%32);
      // synchronize before starting the timer
      RCCE_barrier(&RCCE_COMM_WORLD);
      timer = RCCE_wtime();
    
      actualrounds = max(10,(nrounds*32)/roundsize);
      for (round=0; round <actualrounds; round++) {
        if (ME)  {
          RCCE_send(buffer, roundsize, YOU);
          RCCE_recv(buffer, roundsize, YOU);
        } 
        else {
          RCCE_recv(buffer, roundsize, YOU);
          RCCE_send(buffer, roundsize, YOU);
        }
      }
      timer = RCCE_wtime()-timer;

      if (ME) printf("%d  %1.9lf\n", roundsize, timer/actualrounds);
      size *= 1.18920712;
   
    }

    bigsize *= 2;

  }

  RCCE_finalize();

  return(0);
}
Example #3
0
void 
sys_tm2c_init_system(int* argc, char** argv[])
{
  RCCE_init(argc, argv);

  TM2C_ID = RCCE_ue();
  TM2C_NUM_NODES = RCCE_num_ues();

  ssmp_init(RCCE_num_ues(), RCCE_ue());
}
Example #4
0
int RCCE_APP(int argc, char **argv){

  RCCE_init(&argc, &argv);

  //  RCCE_debug_set(RCCE_DEBUG_ALL);

#ifdef RCCE_VERSION
  printf("Hello from RCCE ... I am %s\n",RCCE_VERSION);
#else
  printf("Hello from RCCE \n");
#endif

  RCCE_finalize();

  return(0);
}
Example #5
0
int RCCE_APP(int argc, char **argv)
{
  int i;
  int num_ranks;
  int remote_rank, my_rank;
  int numrounds = NUMROUNDS;
  int maxlen    = DEFAULTLEN;
  int length;
  int round;
  double timer;
  RCCE_SEND_REQUEST send_request;
  RCCE_RECV_REQUEST recv_request;

  RCCE_init(&argc, &argv);

  my_rank   = RCCE_ue();
  num_ranks = RCCE_num_ues();

  if(argc > 1) numrounds = atoi(argv[1]);

  if(numrounds < 1)
  {
    if(my_rank == 0) fprintf(stderr, "Pingping needs at least 1 round; try again\n");
    exit(-1);
  }

  if(argc > 2) maxlen = atoi(argv[2]);

  if(maxlen < 1)
  {
    if(my_rank == 0) fprintf(stderr, "Illegal message size: %s; try again\n", argv[2]);
    exit(-1);
  }
  else if(maxlen > MAXBUFSIZE)
  {
    if(my_rank == 0) fprintf(stderr, "Message size %d is too big; try again\n", maxlen);
    exit(-1);
  }

  if(num_ranks != 2)
  {
    if(my_rank == 0) fprintf(stderr, "Pingping needs exactly two UEs; try again\n");
    exit(-1);
  }

  remote_rank = (my_rank + 1) % 2;
 
  if(my_rank == 0) printf("#bytes\t\tusec\t\tMB/sec\n");

  for(length=1; length <= maxlen; length*=2)
  {
#ifdef _CACHE_WARM_UP_
    for(i=0; i < length; i++)
    {
      /* cache warm-up: */
      dummy += send_buffer[i];  
      dummy += recv_buffer[i];
    }
#endif
    
    /* synchronize before starting PING-PING: */
    RCCE_barrier(&RCCE_COMM_WORLD);
    
    for(round=0; round < numrounds+1; round++)
    {

#ifdef _ERROR_CHECK_
      for(i=0; i < length; i++)
      {
	send_buffer[i] = (i+length+round) % 127;
      }
#endif

      /* send PING: */
      RCCE_isend(send_buffer, length, remote_rank, &send_request);

      /* recv PING: */
      RCCE_irecv(recv_buffer, length, remote_rank, &recv_request);
      
      /* wait for completion: */
      RCCE_isend_wait(&send_request);
      RCCE_irecv_wait(&recv_request);
      
      /* start timer: */
      if(round==0) timer = RCCE_wtime();

#ifdef _ERROR_CHECK_
      for(i=0; i < length; i++)
      {
	if(recv_buffer[i] != (i+length+round) % 127)
	{
	  fprintf(stderr, "ERROR: %d VS %d\n", recv_buffer[i], (i+length+round) % 127);
	  exit(-1);
	}
      }
#endif
    }
    
    /* stop timer: */
    timer = RCCE_wtime() - timer;
    
    if(my_rank == 0) printf("%d\t\t%1.2lf\t\t%1.2lf\n", length, timer/(numrounds)*1000000, (length / (timer/(numrounds))) / (1024*1024) );
  }

  RCCE_finalize();

  return 0;
}
Example #6
0
int RCCE_APP(int argc, char **argv){

  float     a[NXNY];
  int       i, offset, iter=3;
  int       fdiv, vlevel;
  int       ID, ID_right, ID_left;
  int       NTILES1;
  double    time;
  RCCE_REQUEST req;

  RCCE_init(&argc, &argv);
 
  //  RCCE_debug_set(RCCE_DEBUG_ALL);

  NTILES1 = RCCE_num_ues()-1;
  ID = RCCE_ue();


  ID_right = (ID+1)%RCCE_num_ues();
  ID_left = (ID-1+RCCE_num_ues())%RCCE_num_ues();

// set the relevant areas of the board to the default frequency and voltage
  RCCE_set_frequency_divider(8, &fdiv);
  if (ID==0)print_dividers();

  //  return(0);
  //    RCCE_iset_power(3, &req, &fdiv, &vlevel);
  //  if (ID==RCCE_power_domain_master()) printf("UE %d computed vlevel %d\n", ID,vlevel);
  //  RCCE_wait_power(&req);
  //  RCCE_set_frequency_divider(3, &fdiv);

  if (NX%8) {
    printf("Grid width should be multiple of 8: %d\n", NX);
    exit(1);
  }
  if (argc>1) iter=atoi(*++argv);
  if (!ID) printf("Core %d Executing %d iterations\n", ID, iter);

  /* initialize array a on all tiles; this stuffs a into private caches  */

  for (offset=0,       i=0; i<NXNY; i++) a[i+offset] = 0.0;
  if (ID == 0) 
     for (offset=0,    i=0; i<NX;   i++) a[i+offset] = 1.0;
  if (ID == NTILES1) 
     for (offset=NXNY1,i=0; i<NX;   i++) a[i+offset] = 2.0;

  /* main loop */

  if (ID==0) time = RCCE_wtime();

  while ((iter--)>0){

    RCCE_iset_power(3, &req, &fdiv, &vlevel);
    if (ID==RCCE_power_domain_master()) 
      printf("asked for divider 3, received %d, voltage level %d\n", fdiv, vlevel); 
    fflush(NULL);
      if (!(iter%100)) printf("Iteration %d\n", iter);
    /* start with copying fringe data to neighboring tiles; we need to
       group semantic send/recv pairs together to avoid deadlock         */
    if (ID_right!=0) RCCE_send((char*)(&a[NXNY2]), NX*sizeof(float), ID_right);
    if (ID != 0)     RCCE_recv((char*)(&a[0]),     NX*sizeof(float), ID_left);

    RCCE_wait_power(&req);
    if (ID!=0)       RCCE_send((char *)(&a[NX]),    NX*sizeof(float), ID_left);
    if (ID_right!=0) RCCE_recv((char *)(&a[NXNY1]), NX*sizeof(float), ID_right);

    RCCE_iset_power(3, &req, &fdiv, &vlevel);
    RCCE_set_frequency_divider(3, &fdiv);

    if (ID==RCCE_power_domain_master())    
      printf("asked for divider 3, received %d, voltage level %d\n", fdiv, vlevel);
    fflush(NULL);

    /* apply the stencil operation                                       */
    for (i=0; i<NXNY2; i++) {
      a[i+O3] +=
         W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5];
    }
    RCCE_wait_power(&req);
  }


//  /* print result strip by strip; this would not be done on RC */
//  for (int id=0; id<=NTILES1; id++) {
//    RCCE_barrier(&RCCE_COMM_WORLD);
//    if (ID==id) {
//      int start = NX; int end = NXNY1;
//      if (ID==0) start = 0;
//      if (ID == NTILES1) end = NXNY;
//      for (offset=0, i=start; i<end; i++) {
//        if (!(i%NX)) printf("\n");
////        comment out next line and uncomment subsequent three to print error
//        printf("%1.5f ",a[i+offset]); fflush(stdout);
////        int jj=i/NX+(ID*(NY-1));
////        double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1));
////        printf("%f ",a[i+offset]-aexact);
//      }
//    }
//  }
//  RCCE_barrier(&RCCE_COMM_WORLD);
//  if (ID==0) { 
//    printf("\n");
//    time = RCCE_wtime()-time;
//    printf("Total time: %lf\n", time);
//  }

//reset the relevant areas of the board to the default frequency and voltage
//  RCCE_set_frequency_divider(8, &fdiv);
//  RCCE_iset_power(2, &req, &fdiv, &vlevel);
//  if (ID==RCCE_power_domain_master()) printf("UE %d computed vlevel %d\n", ID,vlevel);
//  RCCE_wait_power(&req);

//  RCCE_set_frequency_divider(3, &fdiv);
  RCCE_barrier(&RCCE_COMM_WORLD);
  if (ID==0)print_dividers();

  RCCE_finalize();

  return(0);
}
Example #7
0
int RCCE_APP(int argc, char **argv){
  int YOU, ME, round;
  uint64_t timer = 0, sum = 0;

  int core1 = atoi(argv[3]);
  int core2 = atoi(argv[4]);

  RCCE_init(&argc, &argv);

  RCCE_debug_set(RCCE_DEBUG_ALL);
  ME = RCCE_ue();
  printf("Core %d passed RCCE_init\n", ME);
  if (RCCE_num_ues() != 2) {
    if (!ME) printf("Ping pong needs exactly two UEs; try again\n");
    return(1);
  }
  YOU = !ME;

  // synchronize before starting the timer
  RCCE_barrier(&RCCE_COMM_WORLD);

  struct rcce_ump_ipi_binding *ob;

  if(disp_get_core_id() == core1) {
      ob = (struct rcce_ump_ipi_binding *)barray[core2];
  } else {
      ob = (struct rcce_ump_ipi_binding *)barray[core1];
  }
  errval_t err = lmp_endpoint_deregister(ob->ipi_notify.iep);

  struct event_closure cl = {
      .handler = rck_ping_handler,
      .arg = NULL
  };

  for(;;) {
  for (round=0; round <MAXROUND; round++) {
    if (ME)  {
        ipi_notify_raise(&ob->ipi_notify);
      /* RCCE_send(buffer, BUFSIZE, YOU); */

        err = ipi_notify_register(&ob->ipi_notify, get_default_waitset(), cl);
        assert(err_is_ok(err));
        req_done = false;
        while(!req_done) {
            messages_wait_and_handle_next();
        }
      /* RCCE_recv(buffer, BUFSIZE, YOU); */

    } else {
        timer = rdtsc();

        err = ipi_notify_register(&ob->ipi_notify, get_default_waitset(), cl);
        assert(err_is_ok(err));
        req_done = false;
        while(!req_done) {
            messages_wait_and_handle_next();
        }
      /* RCCE_recv(buffer, BUFSIZE, YOU); */

        ipi_notify_raise(&ob->ipi_notify);
      /* RCCE_send(buffer, BUFSIZE, YOU); */

      sum += rdtsc() - timer;

        if(round % ROUNDS_PER_SLICE == 0) {
            yield_timeslices = 10;
            thread_yield();
            yield_timeslices = 0;
        }
    }
  }
  if (!ME) printf("RTL = %"PRIu64"\n", sum/MAXROUND);
  sum = 0;
  }

  return(0);
}
Example #8
0
int RCCE_APP(int argc, char **argv){

  int ID, ID_nb, ID_donor, nrounds, error, strlength;
  RCCE_FLAG flag_sent, flag_ack;

  double *cbuffer, *buffer, sum;
  char msg[RCCE_MAX_ERROR_STRING];

  RCCE_init(&argc, &argv);

  ID = RCCE_ue();
  ID_nb = (ID+1)%RCCE_num_ues();
  ID_donor = (ID-1+RCCE_num_ues())%RCCE_num_ues();

  if (argc != 2) {
    if (ID==0) printf("Executable requires one parameter (number of rounds): %d\n",argc-1);
    return(1);
  }
  nrounds = atoi(*++argv);
  if (nrounds < 0) {
    if (ID==0) printf("Number of rounds should be non-negative: %d\n", nrounds);
    return(1);
  }

  /* allocate private memory and comm buffer space */
  buffer  = (double *) malloc(BUFSIZE*sizeof(double));
  if (!buffer) printf("Mark 01: Failed to allocate private buffer on proc %d\n", ID);
  cbuffer = (double *) RCCE_malloc(BUFSIZE*sizeof(double));
  if (!buffer) printf("Mark 02:RCCE failed to allocate %d doubles on proc %d\n",
      BUFSIZE, ID);

  /* initialize buffer with UE-specific data  */
  for (int i=0; i<BUFSIZE; i++) buffer[i] = (double)(ID+1+i);
  sum = 0.0;  
  for (int i=0; i<BUFSIZE; i++) sum += buffer[i];
  printf("Initial sum on UE %03d equals %f\n", ID, sum);

  /* create and initialize flag variables */
  if (error=RCCE_flag_alloc(&flag_sent))
    printf("Mark 03a: Could not allocate flag_sent on %d, error=%d\n", ID, error);
  if (error=RCCE_flag_alloc(&flag_ack))
    printf("Mark 03b: Could not allocate flag_ack on %d, error=%d\n", ID, error);

  if(error=RCCE_flag_write(&flag_sent, RCCE_FLAG_UNSET, ID)) 
    printf("Mark 04: Could not initialize flag_sent on %d, error=%d\n", ID, error);
  if(error=RCCE_flag_write(&flag_ack, RCCE_FLAG_SET, ID_donor)) 
    printf("Mark 05: Could not initialize flag_ack on %d, error=%d\n", ID_donor, error);

  for (int round=0; round<nrounds; round++) {

    int size = BUFSIZE*sizeof(double);
    RCCE_wait_until(flag_ack, RCCE_FLAG_SET);
    RCCE_flag_write(&flag_ack, RCCE_FLAG_UNSET, ID);
    RCCE_put((t_vcharp)cbuffer, (t_vcharp)buffer, size, ID_nb);
    RCCE_flag_write(&flag_sent, RCCE_FLAG_SET, ID_nb);

    RCCE_wait_until(flag_sent, RCCE_FLAG_SET);
    RCCE_flag_write(&flag_sent, RCCE_FLAG_UNSET, ID);
    RCCE_get((t_vcharp)buffer, (t_vcharp)cbuffer, size, ID);
    RCCE_flag_write(&flag_ack, RCCE_FLAG_SET, ID_donor);

  }

  /* compute local sum */
  sum = 0.0;
  for (int i=0; i<BUFSIZE; i++) sum += buffer[i];
  printf("Final sum on UE %03d equals %f\n", ID, sum);

  RCCE_finalize();

  return(0);
}
Example #9
0
int RCCE_APP(int argc, char **argv){
  int iam, bufsize=1024*64, size, i, receiver, sender,
    count_receiver, count_sender, count1, count2;
  volatile int *buffer;

  RCCE_init(&argc, &argv);

  iam      = RCCE_ue();
  receiver =1;
  sender   =0;
  size   = bufsize*sizeof(int);
  buffer = (int *) RCCE_shmalloc(size);
  count_receiver = count_sender = 0;

/**********************************************************
The sender initializes its data.
Now this is shared data so value is "seen" by both cores.
The receiver flushes its cache.
***********************************************************/
     if(iam==sender) {
        for(i=0;i<bufsize; i++) { buffer[i]=1; }
     }

     if(iam==receiver) {RCCE_DCMflush();}
  RCCE_barrier(&RCCE_COMM_WORLD);

/**********************************************************
The sender reads its data. 
It reads by creating count_sender. 
count_sender (on the sender) is 64K = 65536.
count_sender (on the receiver) is 0.

The sender modifies its data.
Now these data are in the sender's cache. So the data may not be seen by the receiver.
It might be seen by the receiver. We have no control when data from the cache are evicted.

The sender flushes its cache 
This guarantees that the receiver sees the data from the sender.
***********************************************************/
     if(iam==sender) {
        for(i=0;i<bufsize; i++) {
           count_sender +=buffer[i];
           buffer[i]++;
        }
        RCCE_DCMflush();
     }
  RCCE_barrier(&RCCE_COMM_WORLD);


/**********************************************************
The receiver reads the data.
It should see the data from  the sender.
count_receiver (on the receiver) should be 128K= 131072
count_receiver (on the sender is 0).
***********************************************************/
     if(iam==receiver) {
        for(i=0;i<bufsize; i++) { 
           count_receiver +=buffer[i]; 
        }
     }
  RCCE_barrier(&RCCE_COMM_WORLD);

/**********************************************************
count1 and count 2 are on both cores.

count2 contains the number of buffer entries that are 2 (which
should be all of them). So count2 should be 64K.

count1 should be 0.

***********************************************************/

     count1= count2= 0;
     for(i=0;i<bufsize; i++) {
        if(buffer[i]==2) count2++;
        if(buffer[i]==1) count1++;
     }

     printf("LINE %d: Core %d: count_sender: %d  count_receiver: %d  count1: %d   count2: %d\n",
       __LINE__,iam,count_sender, count_receiver,count1,count2);

  RCCE_barrier(&RCCE_COMM_WORLD);
     RCCE_shfree((t_vcharp)buffer);
     RCCE_finalize();
     return(0);
}
Example #10
0
int MPI_Init(int *argc, char ***argv) {
  return(RCCE_init(argc, argv));
}
Example #11
0
int RCCE_APP(int argc, char **argv) {

  /* statically allocated space sits in off-chip private memory          */
  float     a[NXNY], *buff;
  int       i, offset, iter=10, tile;
  int       MY_ID;
  int       NTILES1;
  double    time;
  RCCE_FLAG flag0, flag1;

  RCCE_init(&argc, &argv);
  
  NTILES1 = RCCE_num_ues()-1;
  MY_ID = RCCE_ue();

  if (NX%8) {
    printf("Grid width should be multiple of 8: %d\n", NX);
    exit(1);
  }
  if (argc>1) iter=atoi(*++argv);
  if (MY_ID==0) printf("Executing %d iterations\n", iter);

    /* allocate space on the comm buffer                                 */
  buff = (float *) RCCE_malloc(sizeof(float)*2*NX); 
  /* Allocate flags to coordinate comm.                                  */                                 
  if (RCCE_flag_alloc(&flag0)) return(1);
  if (RCCE_flag_alloc(&flag1)) return(1);

  /* initialize array a on all tiles; this stuffs a into private caches  */
  for (offset=0,       i=0; i<NXNY; i++) a[i+offset] = 0.0;
  if (MY_ID == 0) 
     for (offset=0,    i=0; i<NX;   i++) a[i+offset] = 1.0;
  if (MY_ID == NTILES1) 
     for (offset=NXNY1,i=0; i<NX;   i++) a[i+offset] = 2.0;

  /* put in a barrier so everybody can be sure to have initialized       */
  RCCE_barrier(&RCCE_COMM_WORLD);

  /* main loop */

  if (MY_ID==0) time = RCCE_wtime();

  while ((iter--)>0){
  
    /* start with copying fringe data to neighboring tiles               */
    if (MY_ID!=NTILES1) {
      /* Initialize neighbor flag to zero                                */
      RCCE_flag_write(&flag0, RCCE_FLAG_UNSET, MY_ID+1); 
      /* copy private data to shared comm buffer of neighbor             */
      RCCE_put((t_vcharp)(&buff[0]), (t_vcharp)(&a[NXNY2]), NX*sizeof(float), MY_ID+1);
      RCCE_flag_write(&flag0, RCCE_FLAG_SET, MY_ID+1); 
    }
    if (MY_ID != 0) {
      /* Initialize neighbor flag to zero                                */
      RCCE_flag_write(&flag1, 0, MY_ID-1); 
      /* copy private data to shared comm buffer of neighbor             */
      RCCE_put((t_vcharp)(&buff[NX]), (t_vcharp)(&a[NX]), NX*sizeof(float), MY_ID-1);
      RCCE_flag_write(&flag1, RCCE_FLAG_SET, MY_ID-1); 
    }

    /* Make sure the data has been recvd and copy data out of buffer(s)  */
    if (MY_ID!=NTILES1) {
      RCCE_wait_until(flag1, RCCE_FLAG_SET);
      RCCE_get((t_vcharp)(&a[NXNY1]), (t_vcharp)(&buff[NX]), NX*sizeof(float),MY_ID);
    }

    if (MY_ID!=0) {
      RCCE_wait_until(flag0, RCCE_FLAG_SET);
      RCCE_get((t_vcharp)(&a[0]), (t_vcharp)(&buff[0]), NX*sizeof(float),MY_ID);
    }

    /* apply the stencil operation                                       */
    for (i=0; i<NXNY2; i++) {
      a[i+O3] +=
         W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5];
    }
  }
  RCCE_barrier(&RCCE_COMM_WORLD);
  if (MY_ID==0) { 
    time = RCCE_wtime()-time;
  }

  /* print result strip by strip; this would not be done on RC */
  for (int id=0; id<=NTILES1; id++) {
    RCCE_barrier(&RCCE_COMM_WORLD);
    if (MY_ID==id) {
      int start = NX; int end = NXNY1;
      if (MY_ID==0) start = 0;
      if (MY_ID == NTILES1) end = NXNY;
      for (offset=0, i=start; i<end; i++) {
        if (!(i%NX)) printf("\n");
//        comment out next line and uncomment subsequent three to print error
        printf("%f ",a[i+offset]);
//        int jj=i/NX+(MY_ID*(NY-1));
//        double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1));
//        printf("%f ",a[i+offset]-aexact);
      }
    }
  }
  RCCE_barrier(&RCCE_COMM_WORLD);
  if (MY_ID==0) { 
    printf("\nTotal time: %lf\n", time);
  }

  RCCE_finalize();

  return(0);
}