int main()
{
	 upcio_file_t *fd;
	upc_off_t ret_size;
	uint32_t size, nmemb;
	char *buffer;
	int i, err;
	int flag;
	void *dummy;
	upc_flag_t sync_mode = 0;	
	char fname[] = "/mnt/plfs/upcio.test";
	

	if(!MYTHREAD)
		printf("upcio test: test fread_local_async with %d Threads\n", THREADS);

	nmemb = 1;
	size = 10;
	buffer = (char *)malloc(sizeof(char)*size*nmemb);
	fd=upc_all_fopen( fname, UPC_INDIVIDUAL_FP|UPC_WRONLY, 0666);
	upc_barrier;

	if(fd==NULL)
	{
		printf("TH%2d: File open Error\n",MYTHREAD);
		upc_global_exit(-1);
	}

	upc_barrier;
	upc_all_fseek(fd, 10*MYTHREAD, UPC_SEEK_SET);
	/* Initialize the buffer, then write */
	for(i=0; i<size; i++)
		buffer[i]= MYTHREAD + 48;
	
	upc_all_fwrite_local_async(fd, (void *)buffer, size, nmemb, &ret_size, sync_mode);

	err = upc_all_fwait_async(fd);
	if( err == -1 )
		printf("TH%2d: fwait Error\n",MYTHREAD);
	else
		printf("TH%2d: fwait returns %d\n",MYTHREAD, err);
	
		
	if(upc_all_fclose(fd)!=0)
	{
		printf("TH%2d: File close Error\n",MYTHREAD);
		upc_global_exit(-1);
	}

	if(!MYTHREAD)
		printf("upcio test: Done with fread_local_async testing\n");
	
	
	free((void *)buffer);
	return 0;
}
Beispiel #2
0
void impl_abort(int err) {
#if defined(__UPC__)
  upc_global_exit(err);
#elif defined(_OPENMP)
  exit(err);
#elif defined(_SHMEM)
  exit(err);
#else
  exit(err);
#endif
}
Beispiel #3
0
void die(char *fmt, ...){
  va_list argp;
  char buffer[1024];

  va_start(argp, fmt);
  vsnprintf(buffer, 1024, fmt, argp);
  va_end(argp);

  fprintf(stderr, "FATAL ERROR %s\n", buffer);

  upc_global_exit(EXIT_FAILURE);
}
Beispiel #4
0
 upcio_file_t *uopen(char *fname, int ronly) {
	 upcio_file_t *fd = NULL;
	int flags = 0;

	if (ronly)
		flags = UPC_INDIVIDUAL_FP|UPC_RDONLY;
	else
		flags = UPC_INDIVIDUAL_FP|UPC_WRONLY|UPC_CREATE;

	fd = upc_all_fopen( fname, flags, 0666);
	upc_barrier;
	if(fd==NULL)
	{
		printf("TH%2d: File open Error\n",MYTHREAD);
		upc_global_exit(-1);
	}

	return fd;
}
int main()
{
	 upcio_file_t *fd;
	char *buffer;
        upc_off_t ret_size;
	uint32_t size, i;
	int err;
	struct upc_io_local_memvec memvec[2];
	struct upc_io_filevec filevec[2];
        int flag;
        void *dummy;
	upc_flag_t sync_mode = 0;
	char fname[] = "/mnt/plfs/upcio.test";
	

	if(!MYTHREAD)
		printf("upcio test: test fwrite_list_local_async with %d Threads\n", THREADS);

	size = 10;
	buffer = (char *)malloc(sizeof(char)*size);
	memvec[0].baseaddr = &buffer[0];
	memvec[0].len = 4;
	memvec[1].baseaddr = &buffer[6];
	memvec[1].len = 3;
	filevec[0].offset = 4*MYTHREAD;
	filevec[0].len = 3;
        filevec[1].offset = 8+4*MYTHREAD;
        filevec[1].len = 4;

	for(i=0; i<size; i++)
		buffer[i] = 'z';

	fd=upc_all_fopen( fname, UPC_INDIVIDUAL_FP|UPC_WRONLY, 0666);
	upc_barrier;

	if(fd==NULL)
	{
		printf("TH%2d: File open Error\n",MYTHREAD);
		upc_global_exit(-1);
	}

	upc_barrier;
	upc_all_fwrite_list_local_async(fd, 2, (struct upc_io_local_memvec const *)&memvec, 
					2, (struct upc_io_filevec const *)&filevec, &ret_size, 
					sync_mode);
	dummy = NULL;
        if( upc_all_fcntl(fd, UPC_ASYNC_OUTSTANDING, dummy) )
                printf("TH%2d has an outstanding ASYNC OP\n",MYTHREAD);
        else
                printf("TH%2d does not has outstanding ASYNC OPs\n",MYTHREAD);
        upc_barrier;

        err = upc_all_ftest_async(fd, &flag);
        if( err == -1 )
                printf("TH%2d: ftest Error\n",MYTHREAD);
        else
        {
                if( flag )
                {
                        printf("TH%2d: Async op done\n",MYTHREAD);
                        printf("TH%2d: Async return %d\n",MYTHREAD, err);
                }
                else
                        printf("TH%2d: Async pending\n",MYTHREAD);
        }

        err = upc_all_fwait_async(fd);
        if( err == -1 )
                printf("TH%2d: fwait Error\n",MYTHREAD);
        else
                printf("TH%2d: fwait returns %d\n",MYTHREAD, err);

        upc_barrier;

        if( upc_all_fcntl(fd, UPC_ASYNC_OUTSTANDING, dummy) )
                printf("TH%2d has an outstanding ASYNC OP\n",MYTHREAD);
        else
                printf("TH%2d does not has outstanding ASYNC OPs\n",MYTHREAD);

        upc_barrier;

	if(upc_all_fclose(fd)!=0)
	{
                printf("TH%2d: File close Error\n",MYTHREAD);
                upc_global_exit(-1);
	}

	if(!MYTHREAD)
		printf("upcio test: Done with fwrite_list_local_async testing\n");

	free((void *)buffer);
	return 0;
}
int main(int argc, char **argv)
{
	upcio_file_t *fd;
	upc_off_t ret_size, size;
	uint32_t nmemb;
	upc_flag_t sync = 0;
	char *buffer;
	uint32_t i;
	int tid = 0;
	char fname[] = "/mnt/plfs/upcio.test";

	if(!MYTHREAD)
		printf("upcio test: test fread_local with %d Threads\n", THREADS);

	if (argc < 2) {
		printf("usage: ./test_read_trans_local transaction_id\n");
		exit(1);
	}

	tid = atoi(argv[1]);
	nmemb = 1;
	size = 10*(MYTHREAD+1);
	buffer = (char *)malloc(sizeof(char)*(size+1)*nmemb);
	buffer[size] = '\0';

	fd=upc_all_fopen_trans( fname, UPC_INDIVIDUAL_FP|UPC_RDONLY|UPC_CREATE, 0666, tid);
	upc_barrier;

	if(fd==NULL)
	{
		printf("TH%2d: File open Error\n",MYTHREAD);
		upc_global_exit(-1);
	}

	ret_size = upc_all_fread_local(fd, (void *)buffer, size, nmemb, sync);
	if( ret_size == -1 )
		printf("upcio test: fread_local error on TH%2d\n",MYTHREAD);
	else
	{
		for(i=0; i<THREADS; i++)
		{
			if(MYTHREAD==i)
				printf("upcio test: read \"%s\" on TH%2d\n",buffer,MYTHREAD);

			upc_barrier;
		}
	}
	
	if(upc_all_fclose(fd)!=0)
	{
                printf("TH%2d: File close Error\n",MYTHREAD);
                upc_global_exit(-1);
	}

	if(!MYTHREAD)
		printf("upcio test: Done with fread_local testing\n");

	free((void *)buffer);

	return 0;
}
Beispiel #7
0
int main(int argc, char ** argv) {
  int    N;
  int    tile_size=32;  /* default tile size for tiling of local transpose */
  int    num_iterations;/* number of times to do the transpose             */
  int    tiling;        /* boolean: true if tiling is used                 */
  double total_bytes;   /* combined size of matrices                       */
  double start_time,    /* timing parameters                               */
         end_time, avgtime;

  /*********************************************************************
  ** read and test input parameters
  *********************************************************************/

  if(argc != 3 && argc != 4){
    if(MYTHREAD == 0)
      printf("Usage: %s <# iterations> <matrix order> [tile size]\n", *argv);
    upc_global_exit(EXIT_FAILURE);
  }

  num_iterations = atoi(*++argv);
  if(num_iterations < 1){
    if(MYTHREAD == 0)
      printf("ERROR: iterations must be >= 1 : %d \n", num_iterations);
    upc_global_exit(EXIT_FAILURE);
  }

  N = atoi(*++argv);
  if(N < 0){
    if(MYTHREAD == 0)
      printf("ERROR: Matrix Order must be greater than 0 : %d \n", N);
    upc_global_exit(EXIT_FAILURE);
  }

  if (argc == 4)
    tile_size = atoi(*++argv);

  /*a non-positive tile size means no tiling of the local transpose */
  tiling = (tile_size > 0) && (tile_size < N);
  if(!tiling)
    tile_size = N;

  int sizex = N / THREADS;
  if(N % THREADS != 0) {
    if(MYTHREAD == 0)
      printf("N %% THREADS != 0\n");
    upc_global_exit(EXIT_FAILURE);
  }
  int sizey = N;

  if(MYTHREAD == 0) {
    printf("Parallel Research Kernels version %s\n", PRKVERSION);
    printf("UPC matrix transpose: B = A^T\n");
    printf("Number of threads    = %d\n", THREADS);
    printf("Matrix order         = %d\n", N);
    printf("Number of iterations = %d\n", num_iterations);
    if (tiling)
          printf("Tile size            = %d\n", tile_size);
    else  printf("Untiled\n");
  }

  /*********************************************************************
  ** Allocate memory for input and output matrices
  *********************************************************************/

  total_bytes = 2.0 * sizeof(double) * N * N;

  int myoffsetx = MYTHREAD * sizex;
  int myoffsety = 0;

  upc_barrier;

  debug("Allocating arrays (%d, %d), offset (%d, %d)", sizex, sizey, myoffsetx, myoffsety);
  local_shared_block_ptrs in_array  = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety);
  local_shared_block_ptrs out_array = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety);
  local_shared_block_ptrs buf_array = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety);

  in_arrays[MYTHREAD] = in_array;
  out_arrays[MYTHREAD] = out_array;
  buf_arrays[MYTHREAD] = buf_array;

  double **in_array_private = shared_2d_array_to_private(in_array, sizex, sizey, myoffsetx, myoffsety);
  double **out_array_private = shared_2d_array_to_private(out_array, sizex, sizey, myoffsetx, myoffsety);
  double **buf_array_private = shared_2d_array_to_private(buf_array, sizex, sizey, myoffsetx, myoffsety);

  upc_barrier;

  /*********************************************************************
  ** Initialize the matrices
  *********************************************************************/
  for(int y=myoffsety; y<myoffsety + sizey; y++){
    for(int x=myoffsetx; x<myoffsetx + sizex; x++){
      in_array_private[y][x] = (double) (x+N*y);
      out_array[y][x] = -1.0;
    }
  }
  upc_barrier;

  for(int y=myoffsety; y<myoffsety + sizey; y++){
    for(int x=myoffsetx; x<myoffsetx + sizex; x++){
      if(in_array_private[y][x] !=(double) (x+N*y))
        die("x=%d y=%d in_array=%f != %f", x, y, in_array[y][x], (x+N*y));
      if(out_array_private[y][x] != -1.0)
        die("out_array_private error");
    }
  }

  /*********************************************************************
  ** Transpose
  *********************************************************************/
  int transfer_size = sizex * sizex * sizeof(double);
  if(MYTHREAD == 0)
    debug("transfer size = %d", transfer_size);

  for(int iter=0; iter<=num_iterations; iter++){
    /* start timer after a warmup iteration */
    if(iter == 1){
      upc_barrier;
      start_time = wtime();
    }

    for(int i=0; i<THREADS; i++){
      int local_blk_id = (MYTHREAD + i) % THREADS;
      int remote_blk_id = MYTHREAD;
      int remote_thread = local_blk_id;

      upc_memget(&buf_array_private[local_blk_id * sizex][myoffsetx],
                  &in_arrays[remote_thread][remote_blk_id * sizex][remote_thread * sizex], transfer_size);

#define OUT_ARRAY(x,y) out_array_private[local_blk_id * sizex + x][myoffsetx + y]
#define BUF_ARRAY(x,y) buf_array_private[local_blk_id * sizex + x][myoffsetx + y]

      if(!tiling){
        for(int x=0; x<sizex; x++){
          for(int y=0; y<sizex; y++){
            OUT_ARRAY(x,y) = BUF_ARRAY(y,x);
          }
        }
      }
      else{
        for(int x=0; x<sizex; x+=tile_size){
          for(int y=0; y<sizex; y+=tile_size){
            for(int bx=x; bx<MIN(sizex, x+tile_size); bx++){
              for(int by=y; by<MIN(sizex, y+tile_size); by++){
                OUT_ARRAY(bx,by) = BUF_ARRAY(by,bx);
              }
            }
          }
        }
      }
    }
    upc_barrier;
  }

  upc_barrier;
  end_time = wtime();

  /*********************************************************************
  ** Analyze and output results.
  *********************************************************************/
  for(int y=myoffsety; y<myoffsety + sizey; y++){
    for(int x=myoffsetx; x<myoffsetx + sizex; x++){
      if(in_array_private[y][x] != (double)(x+ N*y))
        die("Error in input: x=%d y=%d", x, y);
      if(out_array_private[y][x] != (double)(y + N*x))
        die("x=%d y=%d in_array=%f != %f   %d %d", x, y, out_array[y][x], (double)(y + N*x), (int)(out_array[y][x]) % N, (int)(out_array[y][x]) / N);
    }
  }

  if(MYTHREAD == 0){
    printf("Solution validates\n");
    double transfer_size = 2 * N * N * sizeof(double);
    avgtime = (end_time - start_time) / num_iterations;
    double rate = transfer_size / avgtime * 1.0E-06;
    printf("Rate (MB/s): %lf Avg time (s): %lf\n",rate, avgtime);
  }
}
Beispiel #8
0
int main(int argc, char ** argv) {

  long   m, n;            /* grid dimensions                                     */
  int    i, j, iter;      /* dummies                                             */
  int    iterations;      /* number of times to run the pipeline algorithm       */
  double pipeline_time,   /* timing parameters                                   */
         avgtime, max_time;
  double epsilon = 1.e-8; /* error tolerance                                     */
  double corner_val;      /* verification value at top right corner of grid      */
  double *vector;/* array holding grid values                           */
  long   total_length;    /* total required length to store grid values          */

  /*******************************************************************************
  ** process and test input parameters
  ********************************************************************************/

  if(MYTHREAD == THREADS-1){
    printf("Parallel Research Kernels version %s\n", PRKVERSION);
    printf("UPC pipeline execution on 2D grid\n");
  }

  if (argc != 4){
    if(MYTHREAD == THREADS-1){
      printf("Usage: %s <# iterations> <first array dimension> ", *argv);
      printf("<second array dimension>\n");
    }
    upc_global_exit(EXIT_FAILURE);
  }

  iterations  = atoi(*++argv);
  if (iterations < 1){
    if(MYTHREAD == THREADS-1)
      printf("ERROR: iterations must be >= 1 : %d \n",iterations);
    upc_global_exit(EXIT_FAILURE);
  }

  m  = atol(*++argv);
  n  = atol(*++argv);

  if (m < 1 || n < 1){
    if(MYTHREAD == THREADS-1)
      printf("ERROR: grid dimensions must be positive: %d, %d \n", m, n);
    upc_global_exit(EXIT_FAILURE);
  }

  if(MYTHREAD == THREADS-1){
    printf("Number of threads         = %d\n", THREADS);
    printf("Grid sizes                = %ld, %ld\n", m, n);
    printf("Number of iterations      = %d\n", iterations);
#if USE_BUPC_EXT
    printf("Using Berkeley UPC extensions\n");
#endif
  }

  /*********************************************************************
  ** Allocate memory for input and output matrices
  *********************************************************************/
#if USE_BUPC_EXT
  bupc_sem_t *myflag = bupc_sem_alloc(BUPC_SEM_INTEGER | BUPC_SEM_MPRODUCER);
  upc_barrier;
  allflags[MYTHREAD] = myflag;
  upc_barrier;
  bupc_sem_t *mypeer = allflags[(MYTHREAD+1) % THREADS];
#endif

  long segment_size = m / THREADS;
  int leftover = m % THREADS;
  int myoffsetx, sizex;

  if(MYTHREAD < leftover){
    myoffsetx = (segment_size + 1) * MYTHREAD;
    sizex = segment_size + 1;
  }else{
    myoffsetx = (segment_size + 1) * leftover + segment_size * (MYTHREAD - leftover);
    sizex = segment_size;
  }

#if USE_BUPC_EXT
  if(MYTHREAD != 0){
    myoffsetx -= 1;
    sizex += 1;
  }
#endif

  int sizey = n;
  int myoffsety = 0;

  upc_barrier;

  debug("Allocating arrays (%d, %d), offset (%d, %d)", sizex, sizey, myoffsetx, myoffsety);
  local_shared_block_ptrs in_array  = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety);

  in_arrays[MYTHREAD] = in_array;

  double **in_array_private = shared_2d_array_to_private(in_array, sizex, sizey, myoffsetx, myoffsety);

  if(MYTHREAD == 0)
    current_max_line[MYTHREAD] = sizey;
  else
    current_max_line[MYTHREAD] = 0;

  upc_barrier;

  /*********************************************************************
  ** Initialize the matrices
  *********************************************************************/

  /* clear the array                                                             */
  for (j=0; j<n; j++)
    for (i=myoffsetx; i<myoffsetx + sizex; i++)
      ARRAY(i, j) = 0.0;

  /* set boundary values (bottom and left side of grid                           */
  if(MYTHREAD == 0)
    for (j=0; j<n; j++)
      ARRAY(0, j) = (double) j;

  for (i=myoffsetx; i<myoffsetx + sizex; i++)
    ARRAY(i, 0) = (double) i;

  upc_barrier;

  for (iter = 0; iter<=iterations; iter++){
    /* start timer after a warmup iteration */
    if (iter == 1)
      pipeline_time = wtime();
    if(MYTHREAD == 0)
      debug("start it %d, %f", iter, ARRAY(0, 0));

    if(MYTHREAD != THREADS - 1)  // Send the element in line 0
      in_arrays[MYTHREAD + 1][0][myoffsetx + sizex -1] = ARRAY(myoffsetx + sizex - 1, 0);

    for (j=1; j<n; j++) {
#if USE_BUPC_EXT
      if(MYTHREAD > 0){
        bupc_sem_wait(myflag);
      }

      for (i=myoffsetx+1; i<myoffsetx + sizex; i++)
        ARRAY(i, j) = ARRAY(i-1, j) + ARRAY(i, j-1) - ARRAY(i-1, j-1);

      if(MYTHREAD != THREADS - 1){
        in_arrays[MYTHREAD + 1][j][myoffsetx + sizex -1] = ARRAY(myoffsetx + sizex - 1, j);

        bupc_sem_post(mypeer);
      }
#else
      while(j > current_max_line[MYTHREAD]) // Normally not necessary: bupc_poll();
        ;

      if(MYTHREAD > 0)
        ARRAY(myoffsetx, j) = in_arrays[MYTHREAD - 1][j][myoffsetx-1] + ARRAY(myoffsetx, j-1) - in_arrays[MYTHREAD-1][j-1][myoffsetx-1];

      for (i=myoffsetx+1; i<myoffsetx + sizex; i++)
        ARRAY(i, j) = ARRAY(i-1, j) + ARRAY(i, j-1) - ARRAY(i-1, j-1);

      if(MYTHREAD < THREADS - 1)
        current_max_line[MYTHREAD+1] = j;

#endif
    }

    /* copy top right corner value to bottom left corner to create dependency; we
       need a barrier to make sure the latest value is used. This also guarantees
     that the flags for the next iteration (if any) are not getting clobbered  */
    if(MYTHREAD == 0)
      current_max_line[MYTHREAD] = sizey;
    else
      current_max_line[MYTHREAD] = 0;

    if(MYTHREAD == THREADS - 1){
      in_arrays[0][0][0] = -ARRAY(m-1, n-1);
    }
    upc_barrier;
  }

  pipeline_time = wtime() - pipeline_time;
  times[MYTHREAD] = pipeline_time;

  upc_barrier;

  // Compute max_time
  if(MYTHREAD == THREADS - 1){
    max_time = times[MYTHREAD];
    for(i=1; i<THREADS; i++){
      if(max_time < times[i])
        max_time = times[i];
    }
  }

  /*******************************************************************************
  ** Analyze and output results.
  ********************************************************************************/

  /* verify correctness, using top right value;                                  */
  if( MYTHREAD == THREADS - 1){
    corner_val = (double)((iterations+1)*(n+m-2));
    if (fabs(ARRAY(m-1,n-1)-corner_val)/corner_val > epsilon) {
      printf("ERROR: checksum %lf does not match verification value %lf\n",
          ARRAY(m-1, n-1), corner_val);
      exit(EXIT_FAILURE);
    }
#if VERBOSE
    printf("checksum %lf verification value %lf\n",
        ARRAY(m-1, n-1), corner_val);
    printf("Solution validates; verification value = %lf\n", corner_val);
#else
    printf("Solution validates\n");
#endif
    avgtime = max_time/iterations;
  printf("Rate (MFlops/s): %lf Avg time (s): %lf\n",
         1.0E-06 * 2 * ((double)(m-1)*(double)(n-1))/avgtime, avgtime);
  exit(EXIT_SUCCESS);
  }
}
Beispiel #9
0
int main(int argc, char **argv)
{
	int i, j, ntimes, err, flag, strl;
	double stim, read_tim, write_tim;
	double min_read_tim, min_write_tim, read_bw, write_bw;
	 upcio_file_t *fh;
	upc_flag_t sync = 0;
	char *filename;
	
	shared int *buf;
	shared char *gfilename;
	shared int *len;

	ntimes=1;
/* process 0 takes the file name as a command-line argument and 
   broadcasts it to other processes */
	len = (shared int *) upc_all_alloc(1, sizeof(int));
	upc_barrier;
	if (!MYTHREAD) {
		i = 1;
		while ((i < argc) && strcmp("-fname", *argv)) {
			i++;
			argv++;
		}
		if (i >= argc) {
			fprintf(stderr, "\n*#  Usage: perf -fname filename\n\n");
			upc_global_exit(-1);
		}
		argv++;
		strl = strlen(*argv);
		upc_memput(len, &strl, sizeof(int));
	}

	upc_barrier;
	upc_memget(&strl, len, sizeof(int));
	upc_barrier;
	gfilename = (shared char *) upc_all_alloc(1,sizeof(char)*(strl));
	if (!MYTHREAD)
	{
		upc_memput(gfilename, *argv, strl);
		fprintf(stderr, "Access size per process = %d bytes, ntimes = %d\n", SIZE, ntimes);
	}

	upc_barrier;
	filename = (char *) malloc(sizeof(char)*(strl+1));
	upc_memget(filename, gfilename, strl);
	filename[strl] = '\0';

	/* allocate the shared buf on each thread
	   this is for shared w/r with INDIVIDUAL FP */
	buf = (shared int *) upc_global_alloc(1,SIZE);

	upc_barrier;
	min_read_tim=0.0;
	min_write_tim=0.0;

	upc_barrier;

	fh = uopen( filename, 0); 
	for (j=0; j<ntimes; j++) {
		upc_barrier;
		stim = UPC_Wtime();
		upc_all_fseek(fh, MYTHREAD*SIZE + SIZE*THREADS*j, UPC_SEEK_SET);
		err = upc_all_fwrite_shared(fh, buf, BLOCK, SIZE, sizeof(unsigned char), sync);
		if( err == -1 )
		{
			fprintf(stderr, "TH%2d: Error in write\n", MYTHREAD);
			break;
		}

		write_tim = UPC_Wtime() - stim;       
		min_write_tim += write_tim;	
	}

	upc_all_fclose(fh);
	upc_all_fsync(fh);
	min_write_tim /= ntimes;

	upc_barrier;
	fh = uopen( filename, 1); 
	for (j=0; j<ntimes; j++) {
		upc_barrier;
		stim = UPC_Wtime();
		upc_all_fseek(fh, MYTHREAD*SIZE + SIZE*THREADS*j, UPC_SEEK_SET);
		err = upc_all_fread_shared(fh, buf, BLOCK, SIZE, sizeof(unsigned char), sync);
		if( err == -1 )
		{
			fprintf(stderr, "TH%2d: Error in read\n", MYTHREAD);
			break;
		}

		read_tim = UPC_Wtime() - stim;
		min_read_tim += read_tim;
	}

	upc_all_fclose(fh);
	min_read_tim /= ntimes;
	
	upc_barrier;
    
	if (!MYTHREAD) {
		read_bw = (SIZE*THREADS*ntimes)/(min_read_tim*1024.0*1024.0);
		write_bw = (SIZE*THREADS*ntimes)/(min_write_tim*1024.0*1024.0);
		printf("TH: %d - Write bandwidth with a prior file sync = %f Mbytes/sec\n", MYTHREAD, write_bw);
		printf("TH: %d - Read bandwidth with a prior file sync = %f Mbytes/sec\n", MYTHREAD, read_bw);
	}

	upc_barrier;
	/* only thread 0 clean up the single shared buf */
	if(!MYTHREAD) {
		upc_free(buf);
		upc_free(gfilename);
		upc_free(len);
	}

	free(filename);
	return 0;
}