Exemplo n.º 1
0
static inline void init_comp_semaphores() {
	// allocate completion semaphore
	comp[MYTHREAD] = bupc_sem_alloc(0);
	completion = malloc(THREADS * sizeof(bupc_sem_t*));

	// wait for all threads to have allocated their semaphore
	upc_barrier;

	// copy all semaphore pointers to local memory
	int i; for (i = 0; i < THREADS; i++) {
		completion[i] = comp[i];
	}
}
Exemplo n.º 2
0
Arquivo: p2p.c Projeto: ParRes/Kernels
int main(int argc, char ** argv) {

  long   m, n;            /* grid dimensions                                     */
  int    i, j, iter;      /* dummies                                             */
  int    iterations;      /* number of times to run the pipeline algorithm       */
  double pipeline_time,   /* timing parameters                                   */
         avgtime, max_time;
  double epsilon = 1.e-8; /* error tolerance                                     */
  double corner_val;      /* verification value at top right corner of grid      */
  double *vector;/* array holding grid values                           */
  long   total_length;    /* total required length to store grid values          */

  /*******************************************************************************
  ** process and test input parameters
  ********************************************************************************/

  if(MYTHREAD == THREADS-1){
    printf("Parallel Research Kernels version %s\n", PRKVERSION);
    printf("UPC pipeline execution on 2D grid\n");
  }

  if (argc != 4){
    if(MYTHREAD == THREADS-1){
      printf("Usage: %s <# iterations> <first array dimension> ", *argv);
      printf("<second array dimension>\n");
    }
    upc_global_exit(EXIT_FAILURE);
  }

  iterations  = atoi(*++argv);
  if (iterations < 1){
    if(MYTHREAD == THREADS-1)
      printf("ERROR: iterations must be >= 1 : %d \n",iterations);
    upc_global_exit(EXIT_FAILURE);
  }

  m  = atol(*++argv);
  n  = atol(*++argv);

  if (m < 1 || n < 1){
    if(MYTHREAD == THREADS-1)
      printf("ERROR: grid dimensions must be positive: %d, %d \n", m, n);
    upc_global_exit(EXIT_FAILURE);
  }

  if(MYTHREAD == THREADS-1){
    printf("Number of threads         = %d\n", THREADS);
    printf("Grid sizes                = %ld, %ld\n", m, n);
    printf("Number of iterations      = %d\n", iterations);
#if USE_BUPC_EXT
    printf("Using Berkeley UPC extensions\n");
#endif
  }

  /*********************************************************************
  ** Allocate memory for input and output matrices
  *********************************************************************/
#if USE_BUPC_EXT
  bupc_sem_t *myflag = bupc_sem_alloc(BUPC_SEM_INTEGER | BUPC_SEM_MPRODUCER);
  upc_barrier;
  allflags[MYTHREAD] = myflag;
  upc_barrier;
  bupc_sem_t *mypeer = allflags[(MYTHREAD+1) % THREADS];
#endif

  long segment_size = m / THREADS;
  int leftover = m % THREADS;
  int myoffsetx, sizex;

  if(MYTHREAD < leftover){
    myoffsetx = (segment_size + 1) * MYTHREAD;
    sizex = segment_size + 1;
  }else{
    myoffsetx = (segment_size + 1) * leftover + segment_size * (MYTHREAD - leftover);
    sizex = segment_size;
  }

#if USE_BUPC_EXT
  if(MYTHREAD != 0){
    myoffsetx -= 1;
    sizex += 1;
  }
#endif

  int sizey = n;
  int myoffsety = 0;

  upc_barrier;

  debug("Allocating arrays (%d, %d), offset (%d, %d)", sizex, sizey, myoffsetx, myoffsety);
  local_shared_block_ptrs in_array  = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety);

  in_arrays[MYTHREAD] = in_array;

  double **in_array_private = shared_2d_array_to_private(in_array, sizex, sizey, myoffsetx, myoffsety);

  if(MYTHREAD == 0)
    current_max_line[MYTHREAD] = sizey;
  else
    current_max_line[MYTHREAD] = 0;

  upc_barrier;

  /*********************************************************************
  ** Initialize the matrices
  *********************************************************************/

  /* clear the array                                                             */
  for (j=0; j<n; j++)
    for (i=myoffsetx; i<myoffsetx + sizex; i++)
      ARRAY(i, j) = 0.0;

  /* set boundary values (bottom and left side of grid                           */
  if(MYTHREAD == 0)
    for (j=0; j<n; j++)
      ARRAY(0, j) = (double) j;

  for (i=myoffsetx; i<myoffsetx + sizex; i++)
    ARRAY(i, 0) = (double) i;

  upc_barrier;

  for (iter = 0; iter<=iterations; iter++){
    /* start timer after a warmup iteration */
    if (iter == 1)
      pipeline_time = wtime();
    if(MYTHREAD == 0)
      debug("start it %d, %f", iter, ARRAY(0, 0));

    if(MYTHREAD != THREADS - 1)  // Send the element in line 0
      in_arrays[MYTHREAD + 1][0][myoffsetx + sizex -1] = ARRAY(myoffsetx + sizex - 1, 0);

    for (j=1; j<n; j++) {
#if USE_BUPC_EXT
      if(MYTHREAD > 0){
        bupc_sem_wait(myflag);
      }

      for (i=myoffsetx+1; i<myoffsetx + sizex; i++)
        ARRAY(i, j) = ARRAY(i-1, j) + ARRAY(i, j-1) - ARRAY(i-1, j-1);

      if(MYTHREAD != THREADS - 1){
        in_arrays[MYTHREAD + 1][j][myoffsetx + sizex -1] = ARRAY(myoffsetx + sizex - 1, j);

        bupc_sem_post(mypeer);
      }
#else
      while(j > current_max_line[MYTHREAD]) // Normally not necessary: bupc_poll();
        ;

      if(MYTHREAD > 0)
        ARRAY(myoffsetx, j) = in_arrays[MYTHREAD - 1][j][myoffsetx-1] + ARRAY(myoffsetx, j-1) - in_arrays[MYTHREAD-1][j-1][myoffsetx-1];

      for (i=myoffsetx+1; i<myoffsetx + sizex; i++)
        ARRAY(i, j) = ARRAY(i-1, j) + ARRAY(i, j-1) - ARRAY(i-1, j-1);

      if(MYTHREAD < THREADS - 1)
        current_max_line[MYTHREAD+1] = j;

#endif
    }

    /* copy top right corner value to bottom left corner to create dependency; we
       need a barrier to make sure the latest value is used. This also guarantees
     that the flags for the next iteration (if any) are not getting clobbered  */
    if(MYTHREAD == 0)
      current_max_line[MYTHREAD] = sizey;
    else
      current_max_line[MYTHREAD] = 0;

    if(MYTHREAD == THREADS - 1){
      in_arrays[0][0][0] = -ARRAY(m-1, n-1);
    }
    upc_barrier;
  }

  pipeline_time = wtime() - pipeline_time;
  times[MYTHREAD] = pipeline_time;

  upc_barrier;

  // Compute max_time
  if(MYTHREAD == THREADS - 1){
    max_time = times[MYTHREAD];
    for(i=1; i<THREADS; i++){
      if(max_time < times[i])
        max_time = times[i];
    }
  }

  /*******************************************************************************
  ** Analyze and output results.
  ********************************************************************************/

  /* verify correctness, using top right value;                                  */
  if( MYTHREAD == THREADS - 1){
    corner_val = (double)((iterations+1)*(n+m-2));
    if (fabs(ARRAY(m-1,n-1)-corner_val)/corner_val > epsilon) {
      printf("ERROR: checksum %lf does not match verification value %lf\n",
          ARRAY(m-1, n-1), corner_val);
      exit(EXIT_FAILURE);
    }
#if VERBOSE
    printf("checksum %lf verification value %lf\n",
        ARRAY(m-1, n-1), corner_val);
    printf("Solution validates; verification value = %lf\n", corner_val);
#else
    printf("Solution validates\n");
#endif
    avgtime = max_time/iterations;
  printf("Rate (MFlops/s): %lf Avg time (s): %lf\n",
         1.0E-06 * 2 * ((double)(m-1)*(double)(n-1))/avgtime, avgtime);
  exit(EXIT_SUCCESS);
  }
}