Ejemplo n.º 1
0
int RCCE_queue_master_loop(void *work_item, QUEUE_PARMS *wq_pars){
 
  int ue, ignore, test, count;
 
  int size = RCCE_WI_size(work_item);
  void *address = RCCE_WI_address(work_item);
  count = 0; 
 
  if (RCCE_WI_valid(work_item)) {
 
    /* service work requests from any UE; first come, first served                */
    for (ue=0; ue<wq_pars->master_number; ue++) {
      RCCE_recv_test((char *)(&ignore), sizeof(int), wq_pars->master_list[ue], &test);
      if (test) {
//        printf("Master sends work to UE %d\n", wq_pars->master_list[ue]);
        RCCE_send((char *)address, size, wq_pars->master_list[ue]);
        count++;
        /* generate the next work item                                            */
        RCCE_new_work_item(work_item, wq_pars);
      }
    }
  }
  else {
    /*  this loop ends all teams, so must insist each team checks in              */
    for (ue=0; ue<wq_pars->master_number; ue++) {
      RCCE_recv((char *)(&ignore), sizeof(int), wq_pars->master_list[ue]);
//      printf("Master sends end of work message to UE %d\n", ue);
      RCCE_send((char *)address, size,  wq_pars->master_list[ue]);
    }
  }
    
  return(count);
}
Ejemplo n.º 2
0
int RCCE_queue_member_loop(void *work_item, QUEUE_PARMS *wq_pars) {
 
  int gimme_work, mem;
  int size = RCCE_WI_size(work_item);
  void *address = RCCE_WI_address(work_item);
 
  /* ask for work if I am a team lead                                          */
  if (wq_pars->ID == wq_pars->team_lead) {
    RCCE_send((char *)(&gimme_work), sizeof(int), wq_pars->master);
    RCCE_recv((char *)address, size, wq_pars->master);
    /* team leads parcel out the work to the workers */
    for (mem=1; mem<(wq_pars->team_size); mem++) {
        printf("Team lead %d sends work to UE %d\n", RCCE_ue(), wq_pars->team_member[mem]);
        fflush(0);
      RCCE_send((char *)address, size, wq_pars->team_member[mem]);
    }
  }
  else {
    RCCE_recv((char *)address, size, wq_pars->team_lead);
  }
  if (RCCE_WI_valid(work_item)) {
    RCCE_execute_work_item(work_item, wq_pars);
//    printf("UE %d executed work item\n", wq_pars->ID);
  }
  else {
//    printf("UE %d received stop queue task\n", RCCE_ue());
    return(1);
  }
  return(RCCE_SUCCESS);
}
Ejemplo n.º 3
0
int RCCE_APP(int argc, char **argv){
  int YOU, ME, nrounds = 1024*1024, actualrounds, size, N=32, round, pair, index;
  int bigsize, subindex, roundsize;
  double timer;
  char buffer[1024*1024*4];

  RCCE_init(&argc, &argv);

  //  RCCE_debug_set(RCCE_DEBUG_ALL);
  ME = RCCE_ue();
  YOU = !ME;

  if (argc>1) nrounds = atoi(*++argv);
  if (nrounds<1) {
    if (!ME) printf("Pingpong needs at least 1 round; try again\n");
    return(1);
  }
  if (RCCE_num_ues() != 2) {
    if (!ME) printf("Pingpong needs at two UEs; try again\n");
    return(1);
  }

  bigsize = 32;
  for (index=0; index<17; index++) {
    size = bigsize;
    for (subindex=0; subindex<4; subindex++) {

      roundsize = max(32,size - size%32);
      // synchronize before starting the timer
      RCCE_barrier(&RCCE_COMM_WORLD);
      timer = RCCE_wtime();
    
      actualrounds = max(10,(nrounds*32)/roundsize);
      for (round=0; round <actualrounds; round++) {
        if (ME)  {
          RCCE_send(buffer, roundsize, YOU);
          RCCE_recv(buffer, roundsize, YOU);
        } 
        else {
          RCCE_recv(buffer, roundsize, YOU);
          RCCE_send(buffer, roundsize, YOU);
        }
      }
      timer = RCCE_wtime()-timer;

      if (ME) printf("%d  %1.9lf\n", roundsize, timer/actualrounds);
      size *= 1.18920712;
   
    }

    bigsize *= 2;

  }

  RCCE_finalize();

  return(0);
}
Ejemplo n.º 4
0
//--------------------------------------------------------------------------------------
// RCCE_bcast
//--------------------------------------------------------------------------------------
// function that sends data from UE root to all other UEs in the communicator
//--------------------------------------------------------------------------------------
int RCCE_bcast(
  char *buf,     // private memory, used for sending (root) and receiving (other UEs) 
  size_t num,    // number of bytes to be sent
  int root,      // source within "comm" of broadcast data
  RCCE_COMM comm // communication domain
  ) {

  int ue, ierr;
#ifdef GORY
  printf("Collectives only implemented for simplified API\n");
  return(1);
#else
  // check to make sure root is member of the communicator
  if (root<0 || root >= comm.size) 
  return(RCCE_error_return(RCCE_debug_comm,RCCE_ERROR_ID));

  if (RCCE_IAM == comm.member[root]) {
    for (ue=0; ue<comm.size; ue++) if (ue != root)
      if((ierr=RCCE_send(buf, num, comm.member[ue])))
         return(RCCE_error_return(RCCE_debug_comm,ierr));
  }
  else if((ierr=RCCE_recv(buf, num, comm.member[root])))
         return(RCCE_error_return(RCCE_debug_comm,ierr));

  return(RCCE_SUCCESS);
#endif
}
Ejemplo n.º 5
0
int RCCE_APP(int argc, char **argv){

  float     a[NXNY];
  int       i, offset, iter=3;
  int       fdiv, vlevel;
  int       ID, ID_right, ID_left;
  int       NTILES1;
  double    time;
  RCCE_REQUEST req;

  RCCE_init(&argc, &argv);
 
  //  RCCE_debug_set(RCCE_DEBUG_ALL);

  NTILES1 = RCCE_num_ues()-1;
  ID = RCCE_ue();


  ID_right = (ID+1)%RCCE_num_ues();
  ID_left = (ID-1+RCCE_num_ues())%RCCE_num_ues();

// set the relevant areas of the board to the default frequency and voltage
  RCCE_set_frequency_divider(8, &fdiv);
  if (ID==0)print_dividers();

  //  return(0);
  //    RCCE_iset_power(3, &req, &fdiv, &vlevel);
  //  if (ID==RCCE_power_domain_master()) printf("UE %d computed vlevel %d\n", ID,vlevel);
  //  RCCE_wait_power(&req);
  //  RCCE_set_frequency_divider(3, &fdiv);

  if (NX%8) {
    printf("Grid width should be multiple of 8: %d\n", NX);
    exit(1);
  }
  if (argc>1) iter=atoi(*++argv);
  if (!ID) printf("Core %d Executing %d iterations\n", ID, iter);

  /* initialize array a on all tiles; this stuffs a into private caches  */

  for (offset=0,       i=0; i<NXNY; i++) a[i+offset] = 0.0;
  if (ID == 0) 
     for (offset=0,    i=0; i<NX;   i++) a[i+offset] = 1.0;
  if (ID == NTILES1) 
     for (offset=NXNY1,i=0; i<NX;   i++) a[i+offset] = 2.0;

  /* main loop */

  if (ID==0) time = RCCE_wtime();

  while ((iter--)>0){

    RCCE_iset_power(3, &req, &fdiv, &vlevel);
    if (ID==RCCE_power_domain_master()) 
      printf("asked for divider 3, received %d, voltage level %d\n", fdiv, vlevel); 
    fflush(NULL);
      if (!(iter%100)) printf("Iteration %d\n", iter);
    /* start with copying fringe data to neighboring tiles; we need to
       group semantic send/recv pairs together to avoid deadlock         */
    if (ID_right!=0) RCCE_send((char*)(&a[NXNY2]), NX*sizeof(float), ID_right);
    if (ID != 0)     RCCE_recv((char*)(&a[0]),     NX*sizeof(float), ID_left);

    RCCE_wait_power(&req);
    if (ID!=0)       RCCE_send((char *)(&a[NX]),    NX*sizeof(float), ID_left);
    if (ID_right!=0) RCCE_recv((char *)(&a[NXNY1]), NX*sizeof(float), ID_right);

    RCCE_iset_power(3, &req, &fdiv, &vlevel);
    RCCE_set_frequency_divider(3, &fdiv);

    if (ID==RCCE_power_domain_master())    
      printf("asked for divider 3, received %d, voltage level %d\n", fdiv, vlevel);
    fflush(NULL);

    /* apply the stencil operation                                       */
    for (i=0; i<NXNY2; i++) {
      a[i+O3] +=
         W1*a[i+O1] + W2*a[i+O2] + W3*a[i+O3] + W4*a[i+O4] + W5*a[i+O5];
    }
    RCCE_wait_power(&req);
  }


//  /* print result strip by strip; this would not be done on RC */
//  for (int id=0; id<=NTILES1; id++) {
//    RCCE_barrier(&RCCE_COMM_WORLD);
//    if (ID==id) {
//      int start = NX; int end = NXNY1;
//      if (ID==0) start = 0;
//      if (ID == NTILES1) end = NXNY;
//      for (offset=0, i=start; i<end; i++) {
//        if (!(i%NX)) printf("\n");
////        comment out next line and uncomment subsequent three to print error
//        printf("%1.5f ",a[i+offset]); fflush(stdout);
////        int jj=i/NX+(ID*(NY-1));
////        double aexact=1.0+(double)jj/((NTILES1+1)*(NY-1));
////        printf("%f ",a[i+offset]-aexact);
//      }
//    }
//  }
//  RCCE_barrier(&RCCE_COMM_WORLD);
//  if (ID==0) { 
//    printf("\n");
//    time = RCCE_wtime()-time;
//    printf("Total time: %lf\n", time);
//  }

//reset the relevant areas of the board to the default frequency and voltage
//  RCCE_set_frequency_divider(8, &fdiv);
//  RCCE_iset_power(2, &req, &fdiv, &vlevel);
//  if (ID==RCCE_power_domain_master()) printf("UE %d computed vlevel %d\n", ID,vlevel);
//  RCCE_wait_power(&req);

//  RCCE_set_frequency_divider(3, &fdiv);
  RCCE_barrier(&RCCE_COMM_WORLD);
  if (ID==0)print_dividers();

  RCCE_finalize();

  return(0);
}
Ejemplo n.º 6
0
void exchange_1(double *g, int k, int iex) {

  int i, j;
  size_t chunk;
  int error, len;
  char msg[200];
  double bufin[5*(isiz1+isiz2)], bufout[5*(isiz1+isiz2)];

  /* rcce_curphase = iex == 0 ? 0 : 1; */
  if(iex == 1 || iex == 2) {
      rcce_curphase = 2;
  } else {
      rcce_curphase = 3;
  }

  if( iex == 0 ) {

     if( north != -1 ) {
        RCCE_recv((char*)bufin, 5*(jend-jst+1)*sizeof(double), north);
       
       for (int ib=0,j=jst; j<=jend; j++) {
          g(1,0,j,k) = bufin[ib++];
          g(2,0,j,k) = bufin[ib++];
          g(3,0,j,k) = bufin[ib++];
          g(4,0,j,k) = bufin[ib++];
          g(5,0,j,k) = bufin[ib++];
       }
     }

     if( west != -1 ) {
        RCCE_recv((char*)bufin, 5*(iend-ist+1)*sizeof(double), west);

       for (int ib=0,i=ist; i<=iend; i++) {
          g(1,i,0,k) = bufin[ib++];
          g(2,i,0,k) = bufin[ib++];
          g(3,i,0,k) = bufin[ib++];
          g(4,i,0,k) = bufin[ib++];
          g(5,i,0,k) = bufin[ib++];
       }
     }
  }

  else if( iex == 1 ) {

     if( south != -1 ) {
        RCCE_recv((char*)bufin, 5*(jend-jst+1)*sizeof(double), south);

       for (int ib=0,j=jst; j<=jend; j++) {
          g(1,nx+1,j,k) = bufin[ib++];
          g(2,nx+1,j,k) = bufin[ib++];
          g(3,nx+1,j,k) = bufin[ib++];
          g(4,nx+1,j,k) = bufin[ib++];
          g(5,nx+1,j,k) = bufin[ib++];
       }
     }

     if( east != -1 ) {
        RCCE_recv((char*)bufin, 5*(iend-ist+1)*sizeof(double), east);

       for (int ib=0,i=ist; i<=iend; i++) {
          g(1,i,ny+1,k) = bufin[ib++];
          g(2,i,ny+1,k) = bufin[ib++];
          g(3,i,ny+1,k) = bufin[ib++];
          g(4,i,ny+1,k) = bufin[ib++];
          g(5,i,ny+1,k) = bufin[ib++];
       }
     }
  }
  else if( iex == 2 ) {

     if( south != -1 ) {
       for (int ib=0,j=jst; j<=jend; j++) {
          bufout[ib++] = g(1,nx,j,k);
          bufout[ib++] = g(2,nx,j,k);
          bufout[ib++] = g(3,nx,j,k);
          bufout[ib++] = g(4,nx,j,k);
          bufout[ib++] = g(5,nx,j,k);
       }
        RCCE_send((char*)bufout, 5*(jend-jst+1)*sizeof(double), south);
     }

     if( east != -1 ) {
        for (int ib=0,i=ist; i<=iend; i++) {
           bufout[ib++] = g(1,i,ny,k);
           bufout[ib++] = g(2,i,ny,k);
           bufout[ib++] = g(3,i,ny,k);
           bufout[ib++] = g(4,i,ny,k);
           bufout[ib++] = g(5,i,ny,k);
        } 

        RCCE_send((char*)bufout, 5*(iend-ist+1)*sizeof(double), east);
     }
  }
  else {

     if( north != -1 ) {
       for (int ib=0,j=jst; j<=jend; j++) {
          bufout[ib++] = g(1,1,j,k);
          bufout[ib++] = g(2,1,j,k);
          bufout[ib++] = g(3,1,j,k);
          bufout[ib++] = g(4,1,j,k);
          bufout[ib++] = g(5,1,j,k);
       }

        RCCE_send((char*)bufout, 5*(jend-jst+1)*sizeof(double), north);
     }

     if( west != -1 ) {
       for (int ib=0,i=ist; i<=iend; i++) {
          bufout[ib++] = g(1,i,1,k);
          bufout[ib++] = g(2,i,1,k);
          bufout[ib++] = g(3,i,1,k);
          bufout[ib++] = g(4,i,1,k);
          bufout[ib++] = g(5,i,1,k);
       }

        RCCE_send((char*)bufout, 5*(iend-ist+1)*sizeof(double), west);
     }

  }

  return;
}
Ejemplo n.º 7
0
//--------------------------------------------------------------------------------------
// FUNCTION: RCCE_reduce_general
//--------------------------------------------------------------------------------------
//  function used to implement both reduce and allreduce
//--------------------------------------------------------------------------------------
static int RCCE_reduce_general(
  char *inbuf,   // source buffer for reduction datan
  char *outbuf,  // target buffer for reduction data
  int num,       // number of data elements to be reduced
  int type,      // type of data elements
  int op,        // reduction operation
  int root,      // root of reduction tree, used for all reductions
  int all,       // if 1, use allreduce, if 0, use reduce
  RCCE_COMM comm // communication domain within which to reduce
  ) {

  int ue, i, type_size, ierr;
  int    *iin, *iout;
  long   *lin, *lout;
  float  *fin, *fout;
  double *din, *dout;
  // create aliases for source and target buffers to simplify arithmetic operations
  iin = (int *)    inbuf; iout = (int *)    outbuf;
  lin = (long *)   inbuf; lout = (long *)   outbuf;
  fin = (float *)  inbuf; fout = (float *)  outbuf;
  din = (double *) inbuf; dout = (double *) outbuf;

#ifdef GORY
  printf("Reduction only implemented for non-gory API\n");
  return(1);
#else
  switch (op) {
     case RCCE_SUM:  
     case RCCE_MAX:  
     case RCCE_MIN:  
     case RCCE_PROD: break;
     default:  return(RCCE_ERROR_ILLEGAL_OP);
  }

  switch (type) {
    case RCCE_INT:    type_size = sizeof(int);    
                      break;
    case RCCE_LONG:   type_size = sizeof(long);   
                      break;
    case RCCE_FLOAT:  type_size = sizeof(float);  
                      break;
    case RCCE_DOUBLE: type_size = sizeof(double); 
                      break;
    default: return(RCCE_ERROR_ILLEGAL_TYPE);
  }

  if (RCCE_IAM != comm.member[root]) {
    // non-root UEs send their source buffers to the root
    if (ierr=RCCE_send(inbuf, num*type_size, comm.member[root]))
      return(ierr);
    // in case of allreduce they also receive the reduced buffer
    if (all) if (ierr=RCCE_recv(outbuf, num*type_size, comm.member[root]))
      return(ierr);
  }
  else {
    // the root can copy directly from source to target buffer
    memcpy(outbuf, inbuf, num*type_size);
    for (ue=0; ue<comm.size; ue++) if (ue != root) {
      if (ierr=RCCE_recv(inbuf, num*type_size, comm.member[ue]))
        return(ierr);
      
      // use combination of operation and data type to reduce number of switch statements
      switch (op+(RCCE_NUM_OPS)*(type)) {

        case RCCE_SUM_INT:     for (i=0; i<num; i++) iout[i] += iin[i];             break;
        case RCCE_MAX_INT:     for (i=0; i<num; i++) iout[i] = MAX(iout[i],iin[i]); break;
        case RCCE_MIN_INT:     for (i=0; i<num; i++) iout[i] = MIN(iout[i],iin[i]); break;
        case RCCE_PROD_INT:    for (i=0; i<num; i++) iout[i] *= iin[i];             break;

        case RCCE_SUM_LONG:    for (i=0; i<num; i++) lout[i] += lin[i];             break;
        case RCCE_MAX_LONG:    for (i=0; i<num; i++) lout[i] = MAX(lout[i],lin[i]); break;
        case RCCE_MIN_LONG:    for (i=0; i<num; i++) lout[i] = MIN(lout[i],lin[i]); break;
        case RCCE_PROD_LONG:   for (i=0; i<num; i++) lout[i] *= lin[i];             break;

        case RCCE_SUM_FLOAT:   for (i=0; i<num; i++) fout[i] += fin[i];             break;
        case RCCE_MAX_FLOAT:   for (i=0; i<num; i++) fout[i] = MAX(fout[i],fin[i]); break;
        case RCCE_MIN_FLOAT:   for (i=0; i<num; i++) fout[i] = MIN(fout[i],fin[i]); break;
        case RCCE_PROD_FLOAT:  for (i=0; i<num; i++) fout[i] *= fin[i];             break;

        case RCCE_SUM_DOUBLE:  for (i=0; i<num; i++) dout[i] += din[i];             break;
        case RCCE_MAX_DOUBLE:  for (i=0; i<num; i++) dout[i] = MAX(dout[i],din[i]); break;
        case RCCE_MIN_DOUBLE:  for (i=0; i<num; i++) dout[i] = MIN(dout[i],din[i]); break;
        case RCCE_PROD_DOUBLE: for (i=0; i<num; i++) dout[i] *= din[i];             break;
      }
    }

    // in case of allreduce the root sends the reduction results to all non-root UEs
    if (all) for (ue=0; ue<comm.size; ue++) if (ue != root)
             if(ierr=RCCE_send(outbuf, num*type_size, comm.member[ue]))
                return(ierr);
  }
  return(RCCE_SUCCESS);
#endif GORY
}
Ejemplo n.º 8
0
int RCCE_setup_work_queue_teams(QUEUE_PARMS *wq_pars){ 
 
  int NP, ID, ue, size, mem, master, team_lead, team_size, local_rank;
  int test, isleader;
  int *team_member, *master_list;
 
  NP = wq_pars->NP = RCCE_num_ues();
  ID = wq_pars->ID = RCCE_ue();
  team_member = wq_pars->team_member;
  master_list = wq_pars->master_list;
 
/* determine the number of UEs in the local power domain and form teams         */
  wq_pars->team_size = team_size = RCCE_power_domain_size();
  wq_pars->team_lead = team_lead = RCCE_power_domain_master();
  if (team_lead == ID) {
    /* the team lead is the first team member                                   */
    team_member[0] = team_lead;
    size = 1;
    /* the team leads collects IDs from its team members ...                    */
    while (size<team_size) for (ue=0; ue<NP; ue++) if (ue != team_lead) {
      RCCE_recv_test((char *)(&(team_member[size])), sizeof(int), ue, &test);
      if (test) team_member[size++] = ue;
    }
    /* ... and sends the list to all other team members, after sorting it       */
    RCCE_qsort((char *)team_member, team_size, sizeof(int), id_compare);
    for (ue=1; ue<team_size; ue++) 
      RCCE_send((char *)team_member, team_size*sizeof(int), team_member[ue]);
  }
  else {
    /* team members check in with the team lead ...                             */
    RCCE_send((char *)(&ID), sizeof(int), team_lead);
    /* ... and receive the complete list of team members                        */
    RCCE_recv((char *)team_member, team_size*sizeof(int), team_lead);
  }
 
  /* we assign the UE with the highest rank the role of master. We know that
     this UE is either in a power domain by itself, or there is another UE  
     in the same power domain who is the power domain master, because the
     power domain master is always the UE in that domain with the lowest rank   */
  master = wq_pars->master = NP-1;
 
/* the team containing the overall master must remove it from its member list   */
  if (team_member[team_size-1] == master) wq_pars->team_size = --team_size;
 
  /* the overall master is not in any team                                      */
  if (ID==master) team_size = wq_pars->team_size = 0;
 
/* each UE determines its rank within the team                                  */
  local_rank = wq_pars->local_rank = 0;
  for (ue=0; ue<team_size; ue++) if (ID==team_member[ue]) 
    local_rank = wq_pars->local_rank = ue;
 
/* this code determines number of power domain leads, plus list of UEs          */
  if (ID == master) {
    wq_pars->master_number = 0;
    for (int ue=0; ue<RCCE_num_ues()-1; ue++) {
      /* ask each core whether it is a team lead or not                         */
      RCCE_recv((char *)(&isleader), sizeof(int), ue);
      if (isleader) {
        master_list[wq_pars->master_number] = ue;
        (wq_pars->master_number)++;
      }
    }
  }
  else {
    /* all cores let the master know their team lead status                     */
    isleader = (ID == team_lead);
    RCCE_send((char *)(&isleader), sizeof(int), master);
  }
 
/* all UEs report their team size and memberships                               */
//  for (ue=0; ue<NP; ue++) {
//    RCCE_barrier(&RCCE_COMM_WORLD);
//    if (ID==ue) {
//      printf("UE %d (%d) is in a team with %d members: ", ID, 
//             local_rank, team_size);
//      for (mem=0; mem<team_size; mem++) printf("%d ", team_member[mem]);
//      printf("\n");
//    }
//  }
  return (RCCE_SUCCESS);
}
Ejemplo n.º 9
0
// We abuse the MPI_Datatype to convery information about the size of the
// data type
int MPI_Send(void *buf, int count, int type_size, int dest, int tag, RCCE_COMM comm) {

  return(RCCE_send((char *)buf, count*type_size, comm.member[dest]));
}