예제 #1
0
void 
sys_tm2c_init_system(int* argc, char** argv[])
{
  RCCE_init(argc, argv);

  TM2C_ID = RCCE_ue();
  TM2C_NUM_NODES = RCCE_num_ues();

  ssmp_init(RCCE_num_ues(), RCCE_ue());
}
예제 #2
0
int RCCE_APP(int argc, char **argv){
  int YOU, ME, nrounds = 1024*1024, actualrounds, size, N=32, round, pair, index;
  int bigsize, subindex, roundsize;
  double timer;
  char buffer[1024*1024*4];

  RCCE_init(&argc, &argv);

  //  RCCE_debug_set(RCCE_DEBUG_ALL);
  ME = RCCE_ue();
  YOU = !ME;

  if (argc>1) nrounds = atoi(*++argv);
  if (nrounds<1) {
    if (!ME) printf("Pingpong needs at least 1 round; try again\n");
    return(1);
  }
  if (RCCE_num_ues() != 2) {
    if (!ME) printf("Pingpong needs at two UEs; try again\n");
    return(1);
  }

  bigsize = 32;
  for (index=0; index<17; index++) {
    size = bigsize;
    for (subindex=0; subindex<4; subindex++) {

      roundsize = max(32,size - size%32);
      // synchronize before starting the timer
      RCCE_barrier(&RCCE_COMM_WORLD);
      timer = RCCE_wtime();
    
      actualrounds = max(10,(nrounds*32)/roundsize);
      for (round=0; round <actualrounds; round++) {
        if (ME)  {
          RCCE_send(buffer, roundsize, YOU);
          RCCE_recv(buffer, roundsize, YOU);
        } 
        else {
          RCCE_recv(buffer, roundsize, YOU);
          RCCE_send(buffer, roundsize, YOU);
        }
      }
      timer = RCCE_wtime()-timer;

      if (ME) printf("%d  %1.9lf\n", roundsize, timer/actualrounds);
      size *= 1.18920712;
   
    }

    bigsize *= 2;

  }

  RCCE_finalize();

  return(0);
}
예제 #3
0
int RCCE_APP(int argc, char **argv)
{
  int i;
  int num_ranks;
  int remote_rank, my_rank;
  int numrounds = NUMROUNDS;
  int maxlen    = DEFAULTLEN;
  int length;
  int round;
  double timer;
  RCCE_SEND_REQUEST send_request;
  RCCE_RECV_REQUEST recv_request;

  RCCE_init(&argc, &argv);

  my_rank   = RCCE_ue();
  num_ranks = RCCE_num_ues();

  if(argc > 1) numrounds = atoi(argv[1]);

  if(numrounds < 1)
  {
    if(my_rank == 0) fprintf(stderr, "Pingping needs at least 1 round; try again\n");
    exit(-1);
  }

  if(argc > 2) maxlen = atoi(argv[2]);

  if(maxlen < 1)
  {
    if(my_rank == 0) fprintf(stderr, "Illegal message size: %s; try again\n", argv[2]);
    exit(-1);
  }
  else if(maxlen > MAXBUFSIZE)
  {
    if(my_rank == 0) fprintf(stderr, "Message size %d is too big; try again\n", maxlen);
    exit(-1);
  }

  if(num_ranks != 2)
  {
    if(my_rank == 0) fprintf(stderr, "Pingping needs exactly two UEs; try again\n");
    exit(-1);
  }

  remote_rank = (my_rank + 1) % 2;
 
  if(my_rank == 0) printf("#bytes\t\tusec\t\tMB/sec\n");

  for(length=1; length <= maxlen; length*=2)
  {
#ifdef _CACHE_WARM_UP_
    for(i=0; i < length; i++)
    {
      /* cache warm-up: */
      dummy += send_buffer[i];  
      dummy += recv_buffer[i];
    }
#endif
    
    /* synchronize before starting PING-PING: */
    RCCE_barrier(&RCCE_COMM_WORLD);
    
    for(round=0; round < numrounds+1; round++)
    {

#ifdef _ERROR_CHECK_
      for(i=0; i < length; i++)
      {
	send_buffer[i] = (i+length+round) % 127;
      }
#endif

      /* send PING: */
      RCCE_isend(send_buffer, length, remote_rank, &send_request);

      /* recv PING: */
      RCCE_irecv(recv_buffer, length, remote_rank, &recv_request);
      
      /* wait for completion: */
      RCCE_isend_wait(&send_request);
      RCCE_irecv_wait(&recv_request);
      
      /* start timer: */
      if(round==0) timer = RCCE_wtime();

#ifdef _ERROR_CHECK_
      for(i=0; i < length; i++)
      {
	if(recv_buffer[i] != (i+length+round) % 127)
	{
	  fprintf(stderr, "ERROR: %d VS %d\n", recv_buffer[i], (i+length+round) % 127);
	  exit(-1);
	}
      }
#endif
    }
    
    /* stop timer: */
    timer = RCCE_wtime() - timer;
    
    if(my_rank == 0) printf("%d\t\t%1.2lf\t\t%1.2lf\n", length, timer/(numrounds)*1000000, (length / (timer/(numrounds))) / (1024*1024) );
  }

  RCCE_finalize();

  return 0;
}
예제 #4
0
int RCCE_APP(int argc, char **argv){

  float     a[NXNY];
  int       i, offset, iter=3;
  int       fdiv, vlevel;
  int       ID, ID_right, ID_left;
  int       NTILES1;
  double    time;
  RCCE_REQUEST req;

  RCCE_init(&argc, &argv);
 
  //  RCCE_debug_set(RCCE_DEBUG_ALL);

  NTILES1 = RCCE_num_ues()-1;
  ID = RCCE_ue();


  ID_right = (ID+1)%RCCE_num_ues();
  ID_left = (ID-1+RCCE_num_ues())%RCCE_num_ues();

// set the relevant areas of the board to the default frequency and voltage
  RCCE_set_frequency_divider(8, &fdiv);
  if (ID==0)print_dividers();

  //  return(0);
  //    RCCE_iset_power(3, &req, &fdiv, &vlevel);
  //  if (ID==RCCE_power_domain_master()) printf("UE %d computed vlevel %d\n", ID,vlevel);
  //  RCCE_wait_power(&req);
  //  RCCE_set_frequency_divider(3, &fdiv);

  if (NX%8) {
    printf("Grid width should be multiple of 8: %d\n", NX);
    exit(1);
  }
  if (argc>1) iter=atoi(*++argv);
  if (!ID) printf("Core %d Executing %d iterations\n", ID, iter);

  /* initialize array a on all tiles; this stuffs a into private caches  */

  for (offset=0,       i=0; i<NXNY; i++) a[i+offset] = 0.0;
  if (ID == 0) 
     for (offset=0,    i=0; i<NX;   i++) a[i+offset] = 1.0;
  if (ID == NTILES1) 
     for (offset=NXNY1,i=0; i<NX;   i++) a[i+offset] = 2.0;

  /* main loop */

  if (ID==0) time = RCCE_wtime();

  while ((iter--)>0){

    RCCE_iset_power(3, &req, &fdiv, &vlevel);
    if (ID==RCCE_power_domain_master()) 
      printf("asked for divider 3, received %d, voltage level %d\n", fdiv, vlevel); 
    fflush(NULL);
      if (!(iter%100)) printf("Iteration %d\n", iter);
    /* start with copying fringe data to neighboring tiles; we need to
       group semantic send/recv pairs together to avoid deadlock         */
    if (ID_right!=0) RCCE_send((char*)(&a[NXNY2]), NX*sizeof(float), ID_right);
    if (ID != 0)     RCCE_recv((char*)(&a[0]),     NX*sizeof(float), ID_left);

    RCCE_wait_power(&req);
    if (ID!=0)       RCCE_send((char *)(&a[NX]),    NX*sizeof(float), ID_left);
    if (ID_right!=0) RCCE_recv((char *)(&a[NXNY1]), NX*sizeof(float), ID_right);

    RCCE_iset_power(3, &req, &fdiv, &vlevel);
    RCCE_set_frequency_divider(3, &fdiv);

    if (ID==RCCE_power_domain_master())    
      printf("asked for divider 3, received %d, voltage level %d\n", fdiv, vlevel);
    fflush(NULL);

    /* apply the stencil operation                                       */
    for (i=0; i<NXNY2; i++) {
      a[i+O3] +=
         W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5];
    }
    RCCE_wait_power(&req);
  }


//  /* print result strip by strip; this would not be done on RC */
//  for (int id=0; id<=NTILES1; id++) {
//    RCCE_barrier(&RCCE_COMM_WORLD);
//    if (ID==id) {
//      int start = NX; int end = NXNY1;
//      if (ID==0) start = 0;
//      if (ID == NTILES1) end = NXNY;
//      for (offset=0, i=start; i<end; i++) {
//        if (!(i%NX)) printf("\n");
////        comment out next line and uncomment subsequent three to print error
//        printf("%1.5f ",a[i+offset]); fflush(stdout);
////        int jj=i/NX+(ID*(NY-1));
////        double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1));
////        printf("%f ",a[i+offset]-aexact);
//      }
//    }
//  }
//  RCCE_barrier(&RCCE_COMM_WORLD);
//  if (ID==0) { 
//    printf("\n");
//    time = RCCE_wtime()-time;
//    printf("Total time: %lf\n", time);
//  }

//reset the relevant areas of the board to the default frequency and voltage
//  RCCE_set_frequency_divider(8, &fdiv);
//  RCCE_iset_power(2, &req, &fdiv, &vlevel);
//  if (ID==RCCE_power_domain_master()) printf("UE %d computed vlevel %d\n", ID,vlevel);
//  RCCE_wait_power(&req);

//  RCCE_set_frequency_divider(3, &fdiv);
  RCCE_barrier(&RCCE_COMM_WORLD);
  if (ID==0)print_dividers();

  RCCE_finalize();

  return(0);
}
예제 #5
0
int RCCE_APP(int argc, char **argv){
  int YOU, ME, round;
  uint64_t timer = 0, sum = 0;

  int core1 = atoi(argv[3]);
  int core2 = atoi(argv[4]);

  RCCE_init(&argc, &argv);

  RCCE_debug_set(RCCE_DEBUG_ALL);
  ME = RCCE_ue();
  printf("Core %d passed RCCE_init\n", ME);
  if (RCCE_num_ues() != 2) {
    if (!ME) printf("Ping pong needs exactly two UEs; try again\n");
    return(1);
  }
  YOU = !ME;

  // synchronize before starting the timer
  RCCE_barrier(&RCCE_COMM_WORLD);

  struct rcce_ump_ipi_binding *ob;

  if(disp_get_core_id() == core1) {
      ob = (struct rcce_ump_ipi_binding *)barray[core2];
  } else {
      ob = (struct rcce_ump_ipi_binding *)barray[core1];
  }
  errval_t err = lmp_endpoint_deregister(ob->ipi_notify.iep);

  struct event_closure cl = {
      .handler = rck_ping_handler,
      .arg = NULL
  };

  for(;;) {
  for (round=0; round <MAXROUND; round++) {
    if (ME)  {
        ipi_notify_raise(&ob->ipi_notify);
      /* RCCE_send(buffer, BUFSIZE, YOU); */

        err = ipi_notify_register(&ob->ipi_notify, get_default_waitset(), cl);
        assert(err_is_ok(err));
        req_done = false;
        while(!req_done) {
            messages_wait_and_handle_next();
        }
      /* RCCE_recv(buffer, BUFSIZE, YOU); */

    } else {
        timer = rdtsc();

        err = ipi_notify_register(&ob->ipi_notify, get_default_waitset(), cl);
        assert(err_is_ok(err));
        req_done = false;
        while(!req_done) {
            messages_wait_and_handle_next();
        }
      /* RCCE_recv(buffer, BUFSIZE, YOU); */

        ipi_notify_raise(&ob->ipi_notify);
      /* RCCE_send(buffer, BUFSIZE, YOU); */

      sum += rdtsc() - timer;

        if(round % ROUNDS_PER_SLICE == 0) {
            yield_timeslices = 10;
            thread_yield();
            yield_timeslices = 0;
        }
    }
  }
  if (!ME) printf("RTL = %"PRIu64"\n", sum/MAXROUND);
  sum = 0;
  }

  return(0);
}
예제 #6
0
파일: RCCE_shift.c 프로젝트: xulesc/rckskel
int RCCE_APP(int argc, char **argv){

  int ID, ID_nb, ID_donor, nrounds, error, strlength;
  RCCE_FLAG flag_sent, flag_ack;

  double *cbuffer, *buffer, sum;
  char msg[RCCE_MAX_ERROR_STRING];

  RCCE_init(&argc, &argv);

  ID = RCCE_ue();
  ID_nb = (ID+1)%RCCE_num_ues();
  ID_donor = (ID-1+RCCE_num_ues())%RCCE_num_ues();

  if (argc != 2) {
    if (ID==0) printf("Executable requires one parameter (number of rounds): %d\n",argc-1);
    return(1);
  }
  nrounds = atoi(*++argv);
  if (nrounds < 0) {
    if (ID==0) printf("Number of rounds should be non-negative: %d\n", nrounds);
    return(1);
  }

  /* allocate private memory and comm buffer space */
  buffer  = (double *) malloc(BUFSIZE*sizeof(double));
  if (!buffer) printf("Mark 01: Failed to allocate private buffer on proc %d\n", ID);
  cbuffer = (double *) RCCE_malloc(BUFSIZE*sizeof(double));
  if (!buffer) printf("Mark 02:RCCE failed to allocate %d doubles on proc %d\n",
      BUFSIZE, ID);

  /* initialize buffer with UE-specific data  */
  for (int i=0; i<BUFSIZE; i++) buffer[i] = (double)(ID+1+i);
  sum = 0.0;  
  for (int i=0; i<BUFSIZE; i++) sum += buffer[i];
  printf("Initial sum on UE %03d equals %f\n", ID, sum);

  /* create and initialize flag variables */
  if (error=RCCE_flag_alloc(&flag_sent))
    printf("Mark 03a: Could not allocate flag_sent on %d, error=%d\n", ID, error);
  if (error=RCCE_flag_alloc(&flag_ack))
    printf("Mark 03b: Could not allocate flag_ack on %d, error=%d\n", ID, error);

  if(error=RCCE_flag_write(&flag_sent, RCCE_FLAG_UNSET, ID)) 
    printf("Mark 04: Could not initialize flag_sent on %d, error=%d\n", ID, error);
  if(error=RCCE_flag_write(&flag_ack, RCCE_FLAG_SET, ID_donor)) 
    printf("Mark 05: Could not initialize flag_ack on %d, error=%d\n", ID_donor, error);

  for (int round=0; round<nrounds; round++) {

    int size = BUFSIZE*sizeof(double);
    RCCE_wait_until(flag_ack, RCCE_FLAG_SET);
    RCCE_flag_write(&flag_ack, RCCE_FLAG_UNSET, ID);
    RCCE_put((t_vcharp)cbuffer, (t_vcharp)buffer, size, ID_nb);
    RCCE_flag_write(&flag_sent, RCCE_FLAG_SET, ID_nb);

    RCCE_wait_until(flag_sent, RCCE_FLAG_SET);
    RCCE_flag_write(&flag_sent, RCCE_FLAG_UNSET, ID);
    RCCE_get((t_vcharp)buffer, (t_vcharp)cbuffer, size, ID);
    RCCE_flag_write(&flag_ack, RCCE_FLAG_SET, ID_donor);

  }

  /* compute local sum */
  sum = 0.0;
  for (int i=0; i<BUFSIZE; i++) sum += buffer[i];
  printf("Final sum on UE %03d equals %f\n", ID, sum);

  RCCE_finalize();

  return(0);
}
예제 #7
0
int RCCE_setup_work_queue_teams(QUEUE_PARMS *wq_pars){ 
 
  int NP, ID, ue, size, mem, master, team_lead, team_size, local_rank;
  int test, isleader;
  int *team_member, *master_list;
 
  NP = wq_pars->NP = RCCE_num_ues();
  ID = wq_pars->ID = RCCE_ue();
  team_member = wq_pars->team_member;
  master_list = wq_pars->master_list;
 
/* determine the number of UEs in the local power domain and form teams         */
  wq_pars->team_size = team_size = RCCE_power_domain_size();
  wq_pars->team_lead = team_lead = RCCE_power_domain_master();
  if (team_lead == ID) {
    /* the team lead is the first team member                                   */
    team_member[0] = team_lead;
    size = 1;
    /* the team leads collects IDs from its team members ...                    */
    while (size<team_size) for (ue=0; ue<NP; ue++) if (ue != team_lead) {
      RCCE_recv_test((char *)(&(team_member[size])), sizeof(int), ue, &test);
      if (test) team_member[size++] = ue;
    }
    /* ... and sends the list to all other team members, after sorting it       */
    RCCE_qsort((char *)team_member, team_size, sizeof(int), id_compare);
    for (ue=1; ue<team_size; ue++) 
      RCCE_send((char *)team_member, team_size*sizeof(int), team_member[ue]);
  }
  else {
    /* team members check in with the team lead ...                             */
    RCCE_send((char *)(&ID), sizeof(int), team_lead);
    /* ... and receive the complete list of team members                        */
    RCCE_recv((char *)team_member, team_size*sizeof(int), team_lead);
  }
 
  /* we assign the UE with the highest rank the role of master. We know that
     this UE is either in a power domain by itself, or there is another UE  
     in the same power domain who is the power domain master, because the
     power domain master is always the UE in that domain with the lowest rank   */
  master = wq_pars->master = NP-1;
 
/* the team containing the overall master must remove it from its member list   */
  if (team_member[team_size-1] == master) wq_pars->team_size = --team_size;
 
  /* the overall master is not in any team                                      */
  if (ID==master) team_size = wq_pars->team_size = 0;
 
/* each UE determines its rank within the team                                  */
  local_rank = wq_pars->local_rank = 0;
  for (ue=0; ue<team_size; ue++) if (ID==team_member[ue]) 
    local_rank = wq_pars->local_rank = ue;
 
/* this code determines number of power domain leads, plus list of UEs          */
  if (ID == master) {
    wq_pars->master_number = 0;
    for (int ue=0; ue<RCCE_num_ues()-1; ue++) {
      /* ask each core whether it is a team lead or not                         */
      RCCE_recv((char *)(&isleader), sizeof(int), ue);
      if (isleader) {
        master_list[wq_pars->master_number] = ue;
        (wq_pars->master_number)++;
      }
    }
  }
  else {
    /* all cores let the master know their team lead status                     */
    isleader = (ID == team_lead);
    RCCE_send((char *)(&isleader), sizeof(int), master);
  }
 
/* all UEs report their team size and memberships                               */
//  for (ue=0; ue<NP; ue++) {
//    RCCE_barrier(&RCCE_COMM_WORLD);
//    if (ID==ue) {
//      printf("UE %d (%d) is in a team with %d members: ", ID, 
//             local_rank, team_size);
//      for (mem=0; mem<team_size; mem++) printf("%d ", team_member[mem]);
//      printf("\n");
//    }
//  }
  return (RCCE_SUCCESS);
}
예제 #8
0
int RCCE_APP(int argc, char **argv) {

  /* statically allocated space sits in off-chip private memory          */
  float     a[NXNY], *buff;
  int       i, offset, iter=10, tile;
  int       MY_ID;
  int       NTILES1;
  double    time;
  RCCE_FLAG flag0, flag1;

  RCCE_init(&argc, &argv);
  
  NTILES1 = RCCE_num_ues()-1;
  MY_ID = RCCE_ue();

  if (NX%8) {
    printf("Grid width should be multiple of 8: %d\n", NX);
    exit(1);
  }
  if (argc>1) iter=atoi(*++argv);
  if (MY_ID==0) printf("Executing %d iterations\n", iter);

    /* allocate space on the comm buffer                                 */
  buff = (float *) RCCE_malloc(sizeof(float)*2*NX); 
  /* Allocate flags to coordinate comm.                                  */                                 
  if (RCCE_flag_alloc(&flag0)) return(1);
  if (RCCE_flag_alloc(&flag1)) return(1);

  /* initialize array a on all tiles; this stuffs a into private caches  */
  for (offset=0,       i=0; i<NXNY; i++) a[i+offset] = 0.0;
  if (MY_ID == 0) 
     for (offset=0,    i=0; i<NX;   i++) a[i+offset] = 1.0;
  if (MY_ID == NTILES1) 
     for (offset=NXNY1,i=0; i<NX;   i++) a[i+offset] = 2.0;

  /* put in a barrier so everybody can be sure to have initialized       */
  RCCE_barrier(&RCCE_COMM_WORLD);

  /* main loop */

  if (MY_ID==0) time = RCCE_wtime();

  while ((iter--)>0){
  
    /* start with copying fringe data to neighboring tiles               */
    if (MY_ID!=NTILES1) {
      /* Initialize neighbor flag to zero                                */
      RCCE_flag_write(&flag0, RCCE_FLAG_UNSET, MY_ID+1); 
      /* copy private data to shared comm buffer of neighbor             */
      RCCE_put((t_vcharp)(&buff[0]), (t_vcharp)(&a[NXNY2]), NX*sizeof(float), MY_ID+1);
      RCCE_flag_write(&flag0, RCCE_FLAG_SET, MY_ID+1); 
    }
    if (MY_ID != 0) {
      /* Initialize neighbor flag to zero                                */
      RCCE_flag_write(&flag1, 0, MY_ID-1); 
      /* copy private data to shared comm buffer of neighbor             */
      RCCE_put((t_vcharp)(&buff[NX]), (t_vcharp)(&a[NX]), NX*sizeof(float), MY_ID-1);
      RCCE_flag_write(&flag1, RCCE_FLAG_SET, MY_ID-1); 
    }

    /* Make sure the data has been recvd and copy data out of buffer(s)  */
    if (MY_ID!=NTILES1) {
      RCCE_wait_until(flag1, RCCE_FLAG_SET);
      RCCE_get((t_vcharp)(&a[NXNY1]), (t_vcharp)(&buff[NX]), NX*sizeof(float),MY_ID);
    }

    if (MY_ID!=0) {
      RCCE_wait_until(flag0, RCCE_FLAG_SET);
      RCCE_get((t_vcharp)(&a[0]), (t_vcharp)(&buff[0]), NX*sizeof(float),MY_ID);
    }

    /* apply the stencil operation                                       */
    for (i=0; i<NXNY2; i++) {
      a[i+O3] +=
         W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5];
    }
  }
  RCCE_barrier(&RCCE_COMM_WORLD);
  if (MY_ID==0) { 
    time = RCCE_wtime()-time;
  }

  /* print result strip by strip; this would not be done on RC */
  for (int id=0; id<=NTILES1; id++) {
    RCCE_barrier(&RCCE_COMM_WORLD);
    if (MY_ID==id) {
      int start = NX; int end = NXNY1;
      if (MY_ID==0) start = 0;
      if (MY_ID == NTILES1) end = NXNY;
      for (offset=0, i=start; i<end; i++) {
        if (!(i%NX)) printf("\n");
//        comment out next line and uncomment subsequent three to print error
        printf("%f ",a[i+offset]);
//        int jj=i/NX+(MY_ID*(NY-1));
//        double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1));
//        printf("%f ",a[i+offset]-aexact);
      }
    }
  }
  RCCE_barrier(&RCCE_COMM_WORLD);
  if (MY_ID==0) { 
    printf("\nTotal time: %lf\n", time);
  }

  RCCE_finalize();

  return(0);
}