コード例 #1
0
ファイル: transpose.c プロジェクト: beginZero/Kernels
int main(int argc, char ** argv) {
  int    N;
  int    tile_size=32;  /* default tile size for tiling of local transpose */
  int    num_iterations;/* number of times to do the transpose             */
  int    tiling;        /* boolean: true if tiling is used                 */
  double total_bytes;   /* combined size of matrices                       */
  double start_time,    /* timing parameters                               */
         end_time, avgtime;

  /*********************************************************************
  ** read and test input parameters
  *********************************************************************/

  if(argc != 3 && argc != 4){
    if(MYTHREAD == 0)
      printf("Usage: %s <# iterations> <matrix order> [tile size]\n", *argv);
    upc_global_exit(EXIT_FAILURE);
  }

  num_iterations = atoi(*++argv);
  if(num_iterations < 1){
    if(MYTHREAD == 0)
      printf("ERROR: iterations must be >= 1 : %d \n", num_iterations);
    upc_global_exit(EXIT_FAILURE);
  }

  N = atoi(*++argv);
  if(N < 0){
    if(MYTHREAD == 0)
      printf("ERROR: Matrix Order must be greater than 0 : %d \n", N);
    upc_global_exit(EXIT_FAILURE);
  }

  if (argc == 4)
    tile_size = atoi(*++argv);

  /*a non-positive tile size means no tiling of the local transpose */
  tiling = (tile_size > 0) && (tile_size < N);
  if(!tiling)
    tile_size = N;

  int sizex = N / THREADS;
  if(N % THREADS != 0) {
    if(MYTHREAD == 0)
      printf("N %% THREADS != 0\n");
    upc_global_exit(EXIT_FAILURE);
  }
  int sizey = N;

  if(MYTHREAD == 0) {
    printf("Parallel Research Kernels version %s\n", PRKVERSION);
    printf("UPC matrix transpose: B = A^T\n");
    printf("Number of threads    = %d\n", THREADS);
    printf("Matrix order         = %d\n", N);
    printf("Number of iterations = %d\n", num_iterations);
    if (tiling)
          printf("Tile size            = %d\n", tile_size);
    else  printf("Untiled\n");
  }

  /*********************************************************************
  ** Allocate memory for input and output matrices
  *********************************************************************/

  total_bytes = 2.0 * sizeof(double) * N * N;

  int myoffsetx = MYTHREAD * sizex;
  int myoffsety = 0;

  upc_barrier;

  debug("Allocating arrays (%d, %d), offset (%d, %d)", sizex, sizey, myoffsetx, myoffsety);
  local_shared_block_ptrs in_array  = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety);
  local_shared_block_ptrs out_array = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety);
  local_shared_block_ptrs buf_array = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety);

  in_arrays[MYTHREAD] = in_array;
  out_arrays[MYTHREAD] = out_array;
  buf_arrays[MYTHREAD] = buf_array;

  double **in_array_private = shared_2d_array_to_private(in_array, sizex, sizey, myoffsetx, myoffsety);
  double **out_array_private = shared_2d_array_to_private(out_array, sizex, sizey, myoffsetx, myoffsety);
  double **buf_array_private = shared_2d_array_to_private(buf_array, sizex, sizey, myoffsetx, myoffsety);

  upc_barrier;

  /*********************************************************************
  ** Initialize the matrices
  *********************************************************************/
  for(int y=myoffsety; y<myoffsety + sizey; y++){
    for(int x=myoffsetx; x<myoffsetx + sizex; x++){
      in_array_private[y][x] = (double) (x+N*y);
      out_array[y][x] = -1.0;
    }
  }
  upc_barrier;

  for(int y=myoffsety; y<myoffsety + sizey; y++){
    for(int x=myoffsetx; x<myoffsetx + sizex; x++){
      if(in_array_private[y][x] !=(double) (x+N*y))
        die("x=%d y=%d in_array=%f != %f", x, y, in_array[y][x], (x+N*y));
      if(out_array_private[y][x] != -1.0)
        die("out_array_private error");
    }
  }

  /*********************************************************************
  ** Transpose
  *********************************************************************/
  int transfer_size = sizex * sizex * sizeof(double);
  if(MYTHREAD == 0)
    debug("transfer size = %d", transfer_size);

  for(int iter=0; iter<=num_iterations; iter++){
    /* start timer after a warmup iteration */
    if(iter == 1){
      upc_barrier;
      start_time = wtime();
    }

    for(int i=0; i<THREADS; i++){
      int local_blk_id = (MYTHREAD + i) % THREADS;
      int remote_blk_id = MYTHREAD;
      int remote_thread = local_blk_id;

      upc_memget(&buf_array_private[local_blk_id * sizex][myoffsetx],
                  &in_arrays[remote_thread][remote_blk_id * sizex][remote_thread * sizex], transfer_size);

#define OUT_ARRAY(x,y) out_array_private[local_blk_id * sizex + x][myoffsetx + y]
#define BUF_ARRAY(x,y) buf_array_private[local_blk_id * sizex + x][myoffsetx + y]

      if(!tiling){
        for(int x=0; x<sizex; x++){
          for(int y=0; y<sizex; y++){
            OUT_ARRAY(x,y) = BUF_ARRAY(y,x);
          }
        }
      }
      else{
        for(int x=0; x<sizex; x+=tile_size){
          for(int y=0; y<sizex; y+=tile_size){
            for(int bx=x; bx<MIN(sizex, x+tile_size); bx++){
              for(int by=y; by<MIN(sizex, y+tile_size); by++){
                OUT_ARRAY(bx,by) = BUF_ARRAY(by,bx);
              }
            }
          }
        }
      }
    }
    upc_barrier;
  }

  upc_barrier;
  end_time = wtime();

  /*********************************************************************
  ** Analyze and output results.
  *********************************************************************/
  for(int y=myoffsety; y<myoffsety + sizey; y++){
    for(int x=myoffsetx; x<myoffsetx + sizex; x++){
      if(in_array_private[y][x] != (double)(x+ N*y))
        die("Error in input: x=%d y=%d", x, y);
      if(out_array_private[y][x] != (double)(y + N*x))
        die("x=%d y=%d in_array=%f != %f   %d %d", x, y, out_array[y][x], (double)(y + N*x), (int)(out_array[y][x]) % N, (int)(out_array[y][x]) / N);
    }
  }

  if(MYTHREAD == 0){
    printf("Solution validates\n");
    double transfer_size = 2 * N * N * sizeof(double);
    avgtime = (end_time - start_time) / num_iterations;
    double rate = transfer_size / avgtime * 1.0E-06;
    printf("Rate (MB/s): %lf Avg time (s): %lf\n",rate, avgtime);
  }
}
コード例 #2
0
ファイル: perf_shared.c プロジェクト: hugegreenbug/pupcio
int main(int argc, char **argv)
{
	int i, j, ntimes, err, flag, strl;
	double stim, read_tim, write_tim;
	double min_read_tim, min_write_tim, read_bw, write_bw;
	 upcio_file_t *fh;
	upc_flag_t sync = 0;
	char *filename;
	
	shared int *buf;
	shared char *gfilename;
	shared int *len;

	ntimes=1;
/* process 0 takes the file name as a command-line argument and 
   broadcasts it to other processes */
	len = (shared int *) upc_all_alloc(1, sizeof(int));
	upc_barrier;
	if (!MYTHREAD) {
		i = 1;
		while ((i < argc) && strcmp("-fname", *argv)) {
			i++;
			argv++;
		}
		if (i >= argc) {
			fprintf(stderr, "\n*#  Usage: perf -fname filename\n\n");
			upc_global_exit(-1);
		}
		argv++;
		strl = strlen(*argv);
		upc_memput(len, &strl, sizeof(int));
	}

	upc_barrier;
	upc_memget(&strl, len, sizeof(int));
	upc_barrier;
	gfilename = (shared char *) upc_all_alloc(1,sizeof(char)*(strl));
	if (!MYTHREAD)
	{
		upc_memput(gfilename, *argv, strl);
		fprintf(stderr, "Access size per process = %d bytes, ntimes = %d\n", SIZE, ntimes);
	}

	upc_barrier;
	filename = (char *) malloc(sizeof(char)*(strl+1));
	upc_memget(filename, gfilename, strl);
	filename[strl] = '\0';

	/* allocate the shared buf on each thread
	   this is for shared w/r with INDIVIDUAL FP */
	buf = (shared int *) upc_global_alloc(1,SIZE);

	upc_barrier;
	min_read_tim=0.0;
	min_write_tim=0.0;

	upc_barrier;

	fh = uopen( filename, 0); 
	for (j=0; j<ntimes; j++) {
		upc_barrier;
		stim = UPC_Wtime();
		upc_all_fseek(fh, MYTHREAD*SIZE + SIZE*THREADS*j, UPC_SEEK_SET);
		err = upc_all_fwrite_shared(fh, buf, BLOCK, SIZE, sizeof(unsigned char), sync);
		if( err == -1 )
		{
			fprintf(stderr, "TH%2d: Error in write\n", MYTHREAD);
			break;
		}

		write_tim = UPC_Wtime() - stim;       
		min_write_tim += write_tim;	
	}

	upc_all_fclose(fh);
	upc_all_fsync(fh);
	min_write_tim /= ntimes;

	upc_barrier;
	fh = uopen( filename, 1); 
	for (j=0; j<ntimes; j++) {
		upc_barrier;
		stim = UPC_Wtime();
		upc_all_fseek(fh, MYTHREAD*SIZE + SIZE*THREADS*j, UPC_SEEK_SET);
		err = upc_all_fread_shared(fh, buf, BLOCK, SIZE, sizeof(unsigned char), sync);
		if( err == -1 )
		{
			fprintf(stderr, "TH%2d: Error in read\n", MYTHREAD);
			break;
		}

		read_tim = UPC_Wtime() - stim;
		min_read_tim += read_tim;
	}

	upc_all_fclose(fh);
	min_read_tim /= ntimes;
	
	upc_barrier;
    
	if (!MYTHREAD) {
		read_bw = (SIZE*THREADS*ntimes)/(min_read_tim*1024.0*1024.0);
		write_bw = (SIZE*THREADS*ntimes)/(min_write_tim*1024.0*1024.0);
		printf("TH: %d - Write bandwidth with a prior file sync = %f Mbytes/sec\n", MYTHREAD, write_bw);
		printf("TH: %d - Read bandwidth with a prior file sync = %f Mbytes/sec\n", MYTHREAD, read_bw);
	}

	upc_barrier;
	/* only thread 0 clean up the single shared buf */
	if(!MYTHREAD) {
		upc_free(buf);
		upc_free(gfilename);
		upc_free(len);
	}

	free(filename);
	return 0;
}
コード例 #3
0
ファイル: stencil.c プロジェクト: afanfa/Kernels
int main(int argc, char ** argv) {

  int    n;               /* linear grid dimension */
  int    i, j, ii, jj, it, jt, iter;  /* dummies */
  double norm,            /* L1 norm of solution */
         reference_norm;
  double f_active_points; /* interior of grid with respect to stencil */
  DTYPE  flops;           /* floating point ops per iteration */
  int    iterations;      /* number of times to run the algorithm */
  double stencil_time,    /* timing parameters */
         avgtime, max_time;
  int    stencil_size;    /* number of points in stencil */
  DTYPE  weight[2*RADIUS+1][2*RADIUS+1]; /* weights of points in the stencil */
  int    istart;    /* bounds of grid tile assigned to calling rank        */
  int    jstart;    /* bounds of grid tile assigned to calling rank        */
  int    Num_procsx, Num_procsy;

  /*******************************************************************************
  ** process and test input parameters
  ********************************************************************************/
  if(MYTHREAD == 0){
    printf("Parallel Research Kernels version %s\n", PRKVERSION);
    printf("UPC stencil execution on 2D grid\n");
    fflush(stdout);
  }

  if (argc != 4 && argc != 3)
    if(MYTHREAD == 0)
      bail_out("Usage: %s <# iterations> <array dimension> [x_tiles]\n", *argv);

  iterations  = atoi(*++argv);
  if (iterations < 1)
    if(MYTHREAD == 0)
      bail_out("iterations must be >= 1 : %d", iterations);

  n  = atoi(*++argv);

  if (n < 1)
    if(MYTHREAD == 0)
      bail_out("grid dimension must be positive: %d", n);

  if (argc == 4)
    Num_procsx  = atoi(*++argv);
  else
    Num_procsx = 0;

  if(Num_procsx < 0)
    if(MYTHREAD == 0)
      bail_out("Number of tiles in the x-direction should be positive (got: %d)", Num_procsx);

  if(Num_procsx > THREADS)
    if(MYTHREAD == 0)
      bail_out("Number of tiles in the x-direction should be < THREADS (got: %d)", Num_procsx);

  /* Num_procsx=0 refers to automated calculation of division on each coordinates like MPI code */
  if(Num_procsx == 0){
    for (Num_procsx=(int) (sqrt(THREADS+1)); Num_procsx>0; Num_procsx--) {
      if (!(THREADS%Num_procsx)) {
        Num_procsy = THREADS/Num_procsx;
        break;
      }
    }
  }
  else {
    Num_procsy = THREADS / Num_procsx;
  }

  if(RADIUS < 1)
    if(MYTHREAD == 0)
      bail_out("Stencil radius %d should be positive", RADIUS);

  if(2*RADIUS +1 > n)
    if(MYTHREAD == 0)
      bail_out("Stencil radius %d exceeds grid size %d", RADIUS, n);

  if(Num_procsx * Num_procsy != THREADS){
    bail_out("Num_procsx * Num_procsy != THREADS");
  }

  /* compute amount of space required for input and solution arrays             */

  int my_IDx = MYTHREAD % Num_procsx;
  int my_IDy = MYTHREAD / Num_procsx;

  int blockx = n / Num_procsx;
  int leftover = n % Num_procsx;
  if (my_IDx < leftover) {
    istart = (blockx + 1) * my_IDx;
    blockx += 1;
  }
  else {
    istart = (blockx+1) * leftover + blockx * (my_IDx-leftover);
  }

  if (blockx == 0)
    bail_out("No work to do on x-direction!");

  int blocky = n / Num_procsy;
  leftover = n % Num_procsy;
  if (my_IDy < leftover) {
    jstart = (blocky+1) * my_IDy;
    blocky += 1;
  }
  else {
    jstart = (blocky+1) * leftover + blocky * (my_IDy-leftover);
  }

  if (blocky == 0)
    bail_out("No work to do on y-direction!");

  if(blockx < RADIUS || blocky < RADIUS) {
    bail_out("blockx < RADIUS || blocky < RADIUS");
  }

  int myoffsetx = istart - RADIUS;
  int myoffsety = jstart - RADIUS;
  thread_offsetx[MYTHREAD] = myoffsetx;
  thread_offsety[MYTHREAD] = myoffsety;

  int sizex = blockx + 2*RADIUS;
  int sizey = blocky + 2*RADIUS;
  thread_sizex[MYTHREAD] = sizex;
  thread_sizey[MYTHREAD] = sizey;

  upc_barrier;

  local_shared_block_ptrs in_array  = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety);
  local_shared_block_ptrs out_array = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety);

  in_arrays[MYTHREAD] = in_array;
  out_arrays[MYTHREAD] = out_array;

  DTYPE **in_array_private = shared_2d_array_to_private(in_array, sizex, sizey, myoffsetx, myoffsety);
  DTYPE **out_array_private = shared_2d_array_to_private(out_array, sizex, sizey, myoffsetx, myoffsety);

  upc_barrier;

  private_in_arrays = prk_malloc(sizeof(private_shared_block_ptrs) * THREADS);
  if(private_in_arrays == NULL)
    bail_out("Cannot allocate private_in_arrays");

  private_out_arrays = prk_malloc(sizeof(private_shared_block_ptrs) * THREADS);
  if(private_out_arrays == NULL)
    bail_out("Cannot allocate private_out_arrays");

  for(int thread=0; thread<THREADS; thread++){
    private_in_arrays[thread] = partially_privatize(in_arrays[thread], thread);
    private_out_arrays[thread] = partially_privatize(out_arrays[thread], thread);
  }

  /* intialize the input and output arrays */
  for(int y=myoffsety; y<myoffsety + sizey; y++){
    for(int x=myoffsetx; x<myoffsetx + sizex; x++){
      in_array_private[y][x] = COEFX*x + COEFY*y;
      out_array[y][x] = 0.;
    }
  }
  upc_barrier;

  for(int y=myoffsety; y<myoffsety + sizey; y++){
    for(int x=myoffsetx; x<myoffsetx + sizex; x++){
      if(in_array_private[y][x] != COEFX*x + COEFY*y)
        bail_out("x=%d y=%d in_array=%f != %f", x, y, in_array[y][x], COEFX*x + COEFY*y);
    }
  }

  /* fill the stencil weights to reflect a discrete divergence operator */
  for (jj=-RADIUS; jj<=RADIUS; jj++)
    for (ii=-RADIUS; ii<=RADIUS; ii++)
      WEIGHT(ii, jj) = (DTYPE)0.0;

  stencil_size = 4*RADIUS+1;
  for (ii=1; ii<=RADIUS; ii++) {
    WEIGHT(0, ii) = WEIGHT( ii,0) =  (DTYPE) (1.0/(2.0*ii*RADIUS));
    WEIGHT(0,-ii) = WEIGHT(-ii,0) = -(DTYPE) (1.0/(2.0*ii*RADIUS));
  }

  if(MYTHREAD == 0){
    printf("Number of threads      = %d\n", THREADS);
    printf("Grid size              = %d\n", n);
    printf("Radius of stencil      = %d\n", RADIUS);
    printf("Tiles in x/y-direction = %d/%d\n", Num_procsx, Num_procsy);
#if DOUBLE
    printf("Data type              = double precision\n");
#else
    printf("Data type              = single precision\n");
#endif
#if LOOPGEN
    printf("Script used to expand stencil loop body\n");
#else
    printf("Compact representation of stencil loop body\n");
#endif
    printf("Number of iterations   = %d\n", iterations);
  }

  upc_barrier;

  int startx = myoffsetx + RADIUS;
  int endx = myoffsetx + sizex - RADIUS;

  int starty = myoffsety + RADIUS;
  int endy = myoffsety + sizey - RADIUS;

  if(my_IDx == 0)
    startx += RADIUS;

  if(my_IDx == Num_procsx - 1)
    endx -= RADIUS;

  if(my_IDy == 0)
    starty += RADIUS;

  if(my_IDy == Num_procsy - 1)
    endy -= RADIUS;

  upc_barrier;

  for (iter = 0; iter<=iterations; iter++){
    /* start timer after a warmup iteration */
    if (iter == 1) {
      upc_barrier;
      stencil_time = wtime();
    }

    /* Get ghost zones */
    /* NORTH */
    if(my_IDy != 0){
      int peer = (my_IDy - 1) * Num_procsx + my_IDx;
      for (int y=starty - RADIUS; y<starty; y++) {
        int transfer_size = (endx - startx) * sizeof(DTYPE);
        upc_memget(&in_array_private[y][startx], &private_in_arrays[peer][y][startx], transfer_size);
      }
    }
    /* SOUTH */
    if(my_IDy != Num_procsy - 1){
      int peer = (my_IDy + 1) * Num_procsx + my_IDx;
      for (int y=endy; y<endy + RADIUS; y++) {
        int transfer_size = (endx - startx) * sizeof(DTYPE);
        upc_memget(&in_array_private[y][startx], &private_in_arrays[peer][y][startx], transfer_size);
      }
    }
    /* LEFT */
    if(my_IDx != 0){
      int peer = my_IDy * Num_procsx + my_IDx - 1;
      for (int y=starty; y<endy; y++) {
        for (int x=startx - RADIUS; x<startx; x++) {
          in_array_private[y][x] = private_in_arrays[peer][y][x];
        }
      }
    }
    /* RIGHT*/
    if(my_IDx != Num_procsx - 1){
      int peer = my_IDy * Num_procsx + my_IDx + 1;
      for (int y=starty; y<endy; y++) {
        for (int x=endx; x<endx + RADIUS; x++) {
          in_array_private[y][x] = private_in_arrays[peer][y][x];
        }
      }
    }

    /* Apply the stencil operator */
    for (j=starty; j<endy; j++) {
      for (i=startx; i<endx; i++) {
        #if LOOPGEN
          #include "loop_body_star.incl"
        #else
          for (jj=-RADIUS; jj<=RADIUS; jj++) OUT(i,j) += WEIGHT(0,jj)*IN(i,j+jj);
          for (ii=-RADIUS; ii<0; ii++)       OUT(i,j) += WEIGHT(ii,0)*IN(i+ii,j);
          for (ii=1; ii<=RADIUS; ii++)       OUT(i,j) += WEIGHT(ii,0)*IN(i+ii,j);
        #endif
      }
    }

    upc_barrier; /* <- Necessary barrier: some slow threads could use future data */

    /* add constant to solution to force refresh of neighbor data, if any */
    for(int y=myoffsety + RADIUS; y<myoffsety + sizey - RADIUS; y++)
      for(int x=myoffsetx + RADIUS; x<myoffsetx + sizex - RADIUS; x++)
        in_array_private[y][x] += 1.0;

    upc_barrier; /* <- Necessary barrier: some threads could start on old data */
  } /* end of iterations */

  stencil_time = wtime() - stencil_time;
  times[MYTHREAD] = stencil_time;

  upc_barrier;

  // Compute max_time
  if(MYTHREAD == 0){
    max_time = times[MYTHREAD];
    for(i=1; i<THREADS; i++){
      if(max_time < times[i])
        max_time = times[i];
    }
  }

  norm = (double) 0.0;
  f_active_points = (double)(n-2*RADIUS) * (double)(n-2*RADIUS);

  /* compute L1 norm in parallel */
  for (int y=starty; y<endy; y++) {
    for (int x=startx; x<endx; x++) {
      norm += (double)ABS(out_array[y][x]);
    }
  }

  norm /= f_active_points;
  norms[MYTHREAD] = norm;

  upc_barrier;

  if(MYTHREAD == 0){
    norm = 0.;
    for(int i=0; i<THREADS; i++) norm += norms[i];

    /*******************************************************************************
    ** Analyze and output results.
    ********************************************************************************/

    /* verify correctness */
    reference_norm = (double) (iterations+1) * (COEFX + COEFY);

    if (ABS(norm - reference_norm) > EPSILON)
      bail_out("L1 norm = "FSTR", Reference L1 norm = "FSTR"\n", norm, reference_norm);
    else {
      printf("Solution validates\n");
#if VERBOSE
      printf("Reference L1 norm = "FSTR", L1 norm = "FSTR"\n",
             reference_norm, norm);
#endif
    }

    flops = (DTYPE) (2*stencil_size+1) * f_active_points;
    avgtime = max_time/iterations;
    printf("Rate (MFlops/s): "FSTR"  Avg time (s): %lf\n",
           1.0E-06 * flops/avgtime, avgtime);

    exit(EXIT_SUCCESS);
  }
}
コード例 #4
0
ファイル: pgen_obj.c プロジェクト: jvpoulos/cs267-hw3
extern int user_main(
  int argc,
  char ** argv)
#line 14 "pgen.upc"
{
#line 14 "pgen.upc"
  UPCR_BEGIN_FUNCTION();
  register _IEEE64 _bupc_comma;
  register _INT64 _bupc_comma0;
  register _INT32 _bupc_comma1;
  register _UINT64 _bupc_comma2;
  register _IEEE64 _bupc_comma3;
  register _IEEE64 _bupc_comma4;
  register _INT64 _bupc_comma5;
  register _INT64 _bupc_comma7;
  register _INT64 _bupc_comma6;
  register _IEEE64 _bupc_comma8;
  register _IEEE64 _bupc_comma9;
  register _IEEE64 _bupc_comma10;
  _IEEE64 inputTime;
  _IEEE64 constrTime;
  _IEEE64 traversalTime;
  int n_total_kmers;
  int n_kmers_to_process_ideal;
  int start_kmer;
  int end_kmer;
  int n_kmers_to_process;
  int char_start_position;
  int chars_to_read;
  unsigned char * buffer;
  unsigned char * _bupc__casttmp9;
  struct __sFILE * input_file;
  int _bupc__spilleq10;
  unsigned long _bupc__spilleq11;
  struct memory_heap_t private_memory_heap;
  struct hash_table_t * private_hashtable;
  int nints;
  upcr_pshared_ptr_t process_kmer_list_offsets_global;
  int * process_kmer_list_offsets;
  int max_kmers_to_transfer_to_single_process;
  int nchars;
  upcr_pshared_ptr_t kmers_to_transfer_global;
  char * kmers_to_transfer;
  int i;
  int process_owner;
  int _bupc_w2c_i0;
  int n_kmers_char_to_transfer_to_self;
  int j;
  long long _bupc__spilleq12;
  void * _bupc_call5;
  struct __sFILE * _bupc_call6;
  struct hash_table_t * _bupc_call7;
  upcr_shared_ptr_t _bupc_call8;
  upcr_shared_ptr_t _bupc_call9;
  upcr_pshared_ptr_t _bupc_Mstopcvt10;
  upcr_pshared_ptr_t _bupc_Mptra11;
  int * _bupc_Mcvtptr12;
  upcr_pshared_ptr_t _bupc_Mstopcvt13;
  upcr_pshared_ptr_t _bupc_Mptra14;
  char * _bupc_Mcvtptr15;
  upcr_pshared_ptr_t _bupc_Mptra16;
  upcr_pshared_ptr_t _bupc_Mptra17;
  int _bupc_spillld18;
  upcr_pshared_ptr_t _bupc_Mptra19;
  upcr_pshared_ptr_t _bupc_Mptra20;
  upcr_shared_ptr_t _bupc_Mstopcvt21;
  
#line 17 "pgen.upc"
  inputTime = 0.0;
#line 17 "pgen.upc"
  constrTime = 0.0;
#line 17 "pgen.upc"
  traversalTime = 0.0;
#line 20 "pgen.upc"
  upcr_barrier(346153894, 1);
#line 21 "pgen.upc"
  _bupc_comma = gettime();
#line 21 "pgen.upc"
  inputTime = inputTime - _bupc_comma;
#line 24 "pgen.upc"
  _bupc_comma0 = getNumKmersInUFX((const char *) * (argv + 1LL));
#line 24 "pgen.upc"
  n_total_kmers = _bupc_comma0;
#line 25 "pgen.upc"
  n_kmers_to_process_ideal = n_total_kmers / ((int) upcr_threads () );
#line 26 "pgen.upc"
  start_kmer = ((int) upcr_mythread () ) * n_kmers_to_process_ideal;
#line 27 "pgen.upc"
  end_kmer = (((int) upcr_mythread () ) + 1) * n_kmers_to_process_ideal;
#line 28 "pgen.upc"
  if(((int) upcr_mythread () ) == (((int) upcr_threads () ) + -1))
#line 28 "pgen.upc"
  {
#line 28 "pgen.upc"
    end_kmer = n_total_kmers;
  }
#line 29 "pgen.upc"
  n_kmers_to_process = end_kmer - start_kmer;
#line 30 "pgen.upc"
  char_start_position = start_kmer * 23;
#line 31 "pgen.upc"
  chars_to_read = n_kmers_to_process * 23;
#line 32 "pgen.upc"
  _bupc_call5 = malloc((unsigned long)(_UINT64)(chars_to_read));
#line 32 "pgen.upc"
  _bupc__casttmp9 = _bupc_call5;
#line 32 "pgen.upc"
  buffer = _bupc__casttmp9;
#line 34 "pgen.upc"
  printf("Process %d: Reading and creating graph for K-mers %d - %d\n", ((int) upcr_mythread () ), start_kmer, end_kmer);
#line 37 "pgen.upc"
  _bupc_call6 = fopen((const char *) * (argv + 1LL), "r");
#line 37 "pgen.upc"
  input_file = _bupc_call6;
#line 38 "pgen.upc"
  _bupc_comma1 = fseek(input_file, (long) char_start_position, (int) 1);
#line 38 "pgen.upc"
  _bupc__spilleq10 = _bupc_comma1;
#line 38 "pgen.upc"
  if(_bupc__spilleq10 != 0)
#line 38 "pgen.upc"
  {
#line 39 "pgen.upc"
    printf("Error Seeking...");
#line 40 "pgen.upc"
    upcri_do_exit((int) 0);
  }
#line 42 "pgen.upc"
  _bupc_comma2 = fread(buffer, (unsigned long) 1ULL, (unsigned long)(_UINT64)(chars_to_read), input_file);
#line 42 "pgen.upc"
  _bupc__spilleq11 = _bupc_comma2;
#line 42 "pgen.upc"
  if(_bupc__spilleq11 != (_UINT64)(chars_to_read))
#line 42 "pgen.upc"
  {
#line 43 "pgen.upc"
    printf("Error reading...");
#line 44 "pgen.upc"
    upcri_do_exit((int) 0);
  }
#line 46 "pgen.upc"
  fclose(input_file);
#line 47 "pgen.upc"
  upcr_barrier(346153895, 1);
#line 48 "pgen.upc"
  _bupc_comma3 = gettime();
#line 48 "pgen.upc"
  inputTime = inputTime + _bupc_comma3;
#line 51 "pgen.upc"
  _bupc_comma4 = gettime();
#line 51 "pgen.upc"
  constrTime = constrTime - _bupc_comma4;
#line 56 "pgen.upc"
  _bupc_call7 = create_hash_table((long long)(n_kmers_to_process * 2), &private_memory_heap);
#line 56 "pgen.upc"
  private_hashtable = _bupc_call7;
#line 59 "pgen.upc"
  nints = ((int) upcr_threads () );
#line 60 "pgen.upc"
  _bupc_call8 = upc_all_alloc((unsigned long)(_UINT64)(((int) upcr_threads () )), (unsigned long)((_UINT64)(nints) * 4ULL));
#line 60 "pgen.upc"
  _bupc_Mstopcvt10 = UPCR_SHARED_TO_PSHARED(_bupc_call8);
#line 60 "pgen.upc"
  process_kmer_list_offsets_global = _bupc_Mstopcvt10;
#line 61 "pgen.upc"
  _bupc_Mptra11 = UPCR_ADD_PSHARED1(process_kmer_list_offsets_global, 4ULL, ((int) upcr_mythread () ));
#line 61 "pgen.upc"
  _bupc_Mcvtptr12 = (int *) UPCR_PSHARED_TO_LOCAL(_bupc_Mptra11);
#line 61 "pgen.upc"
  process_kmer_list_offsets = _bupc_Mcvtptr12;
#line 62 "pgen.upc"
  memset(process_kmer_list_offsets, (int) 0, (unsigned long)((_UINT64)(((int) upcr_threads () )) * 4ULL));
#line 64 "pgen.upc"
  max_kmers_to_transfer_to_single_process = (n_kmers_to_process_ideal / ((int) upcr_threads () )) * 2;
#line 65 "pgen.upc"
  nchars = (max_kmers_to_transfer_to_single_process * ((int) upcr_threads () )) * 23;
#line 68 "pgen.upc"
  _bupc_call9 = upc_all_alloc((unsigned long)(_UINT64)(((int) upcr_threads () )), (unsigned long)(_UINT64)(nchars));
#line 68 "pgen.upc"
  _bupc_Mstopcvt13 = UPCR_SHARED_TO_PSHARED(_bupc_call9);
#line 68 "pgen.upc"
  kmers_to_transfer_global = _bupc_Mstopcvt13;
#line 70 "pgen.upc"
  _bupc_Mptra14 = UPCR_ADD_PSHARED1(kmers_to_transfer_global, 1ULL, ((int) upcr_mythread () ));
#line 70 "pgen.upc"
  _bupc_Mcvtptr15 = (char *) UPCR_PSHARED_TO_LOCAL(_bupc_Mptra14);
#line 70 "pgen.upc"
  kmers_to_transfer = _bupc_Mcvtptr15;
#line 73 "pgen.upc"
  i = 0;
#line 73 "pgen.upc"
  while(i < (n_kmers_to_process * 23))
#line 73 "pgen.upc"
  {
#line 74 "pgen.upc"
    _bupc_comma5 = hashkmer((long long) ((int) upcr_threads () ), (char *)(buffer + i));
#line 74 "pgen.upc"
    process_owner = _bupc_comma5;
#line 76 "pgen.upc"
    if(process_owner == ((int) upcr_mythread () ))
#line 76 "pgen.upc"
    {
#line 77 "pgen.upc"
      add_kmer(private_hashtable, &private_memory_heap, buffer + i, (char)(char) * ((buffer + (i + 19)) + 1LL), (char)(char) * ((buffer + (i + 19)) + 2LL));
    }
    else
#line 77 "pgen.upc"
    {
#line 83 "pgen.upc"
      memcpy(kmers_to_transfer + (*(process_kmer_list_offsets + process_owner) + ((process_owner * max_kmers_to_transfer_to_single_process) * 23)), buffer + i, (unsigned long) 23ULL);
#line 84 "pgen.upc"
      * (process_kmer_list_offsets + process_owner) = *(process_kmer_list_offsets + process_owner) + 23;
    }
#line 86 "pgen.upc"
    _1 :;
#line 86 "pgen.upc"
    i = i + 23;
  }
#line 89 "pgen.upc"
  upcr_barrier(346153896, 1);
#line 91 "pgen.upc"
  _bupc_w2c_i0 = 0;
#line 91 "pgen.upc"
  while(_bupc_w2c_i0 < ((int) upcr_threads () ))
#line 91 "pgen.upc"
  {
#line 92 "pgen.upc"
    if(_bupc_w2c_i0 != ((int) upcr_mythread () ))
#line 92 "pgen.upc"
    {
#line 93 "pgen.upc"
      _bupc_Mptra16 = UPCR_ADD_PSHARED1(process_kmer_list_offsets_global, 4ULL, _bupc_w2c_i0);
#line 93 "pgen.upc"
      _bupc_Mptra17 = UPCR_ADD_PSHAREDI(_bupc_Mptra16, 4ULL, ((int) upcr_mythread () ));
#line 93 "pgen.upc"
      UPCR_GET_PSHARED(&_bupc_spillld18, _bupc_Mptra17, 0, 4);
#line 93 "pgen.upc"
      n_kmers_char_to_transfer_to_self = _bupc_spillld18;
#line 96 "pgen.upc"
      _bupc_Mptra19 = UPCR_ADD_PSHARED1(kmers_to_transfer_global, 1ULL, _bupc_w2c_i0);
#line 96 "pgen.upc"
      _bupc_Mptra20 = UPCR_ADD_PSHAREDI(_bupc_Mptra19, 1ULL, (((int) upcr_mythread () ) * max_kmers_to_transfer_to_single_process) * 23);
#line 96 "pgen.upc"
      _bupc_Mstopcvt21 = UPCR_PSHARED_TO_SHARED(_bupc_Mptra20);
#line 96 "pgen.upc"
      upc_memget(buffer, _bupc_Mstopcvt21, (unsigned long)(_UINT64)(n_kmers_char_to_transfer_to_self));
#line 98 "pgen.upc"
      j = 0;
#line 98 "pgen.upc"
      while(j < n_kmers_char_to_transfer_to_self)
#line 98 "pgen.upc"
      {
#line 99 "pgen.upc"
        _bupc_comma7 = hashkmer((long long) ((int) upcr_threads () ), (char *)(buffer + j));
#line 99 "pgen.upc"
        _bupc__spilleq12 = _bupc_comma7;
#line 99 "pgen.upc"
        if(_bupc__spilleq12 != (_INT64)(((int) upcr_mythread () )))
#line 99 "pgen.upc"
        {
#line 99 "pgen.upc"
          _bupc_comma6 = hashkmer((long long) ((int) upcr_threads () ), (char *)(buffer + j));
#line 99 "pgen.upc"
          printf("%d %d\n", ((int) upcr_mythread () ), _bupc_comma6);
        }
#line 100 "pgen.upc"
        add_kmer(private_hashtable, &private_memory_heap, buffer + j, (char)(char) * ((buffer + (j + 19)) + 1LL), (char)(char) * ((buffer + (j + 19)) + 2LL));
#line 101 "pgen.upc"
        _3 :;
#line 101 "pgen.upc"
        j = j + 23;
      }
    }
#line 103 "pgen.upc"
    _2 :;
#line 103 "pgen.upc"
    _bupc_w2c_i0 = _bupc_w2c_i0 + 1;
  }
#line 105 "pgen.upc"
  upcr_barrier(346153897, 1);
#line 106 "pgen.upc"
  _bupc_comma8 = gettime();
#line 106 "pgen.upc"
  constrTime = constrTime + _bupc_comma8;
#line 109 "pgen.upc"
  _bupc_comma9 = gettime();
#line 109 "pgen.upc"
  traversalTime = traversalTime - _bupc_comma9;
#line 114 "pgen.upc"
  upcr_barrier(346153898, 1);
#line 115 "pgen.upc"
  _bupc_comma10 = gettime();
#line 115 "pgen.upc"
  traversalTime = traversalTime + _bupc_comma10;
#line 119 "pgen.upc"
  if(((int) upcr_mythread () ) == 0)
#line 119 "pgen.upc"
  {
#line 120 "pgen.upc"
    printf("%s: Input set: %s\n", *argv, *(argv + 1LL));
#line 121 "pgen.upc"
    printf("Number of UPC threads: %d\n", ((int) upcr_threads () ));
#line 122 "pgen.upc"
    printf("Input reading time: %f seconds\n", inputTime);
#line 123 "pgen.upc"
    printf("Graph construction time: %f seconds\n", constrTime);
#line 124 "pgen.upc"
    printf("Graph traversal time: %f seconds\n", traversalTime);
  }
#line 126 "pgen.upc"
  UPCR_EXIT_FUNCTION();
#line 126 "pgen.upc"
  return 0;
} /* user_main */
コード例 #5
0
void upc_all_fwrite_shared_async( upcio_file_t *fh_shared,
                                  shared void *buffer,
                                  uint32_t blocksize,
                                  upc_off_t size,
                                  uint32_t nmemb,
				  int64_t *ret,
                                  upc_flag_t sync_mode )
{
	Plfs_fd *fd;
	UPC_ADIO_Request request;
	unsigned char *local_buf;
	upc_off_t count, blocksize_byte, roundsize;
	uint32_t round, i;
	upc_off_t start_th, my_th;
	int error_code;
	shared unsigned char * buffer_char;
	struct __struct_thread_upc_file_t *fh;
	upc_off_t *dispsize;
	upc_off_t *disparray;
	upc_off_t disp;
	upc_off_t extra;
	uint32_t extra_block;
	upc_off_t mpi_size;
	upc_off_t nblocks;

	/*------------------------------------------------------------------*/
	/* the file handler has to be valid                                 */
	/*------------------------------------------------------------------*/
	if( fh_shared == NULL )
		return;

	/*------------------------------------------------------------------*/
	/* cast the local file handler into private ones                    */
	/* hopefully doing so will increase performance                     */
	/*------------------------------------------------------------------*/
	fh = (struct __struct_thread_upc_file_t *)(fh_shared->th[MYTHREAD]);
	fd = (Plfs_fd *)fh->adio_fd;

	/*------------------------------------------------------------------*/
	/* make sure the file is not opened with read only                  */
	/*------------------------------------------------------------------*/
	if( fh->flags & UPC_RDONLY )
		return;

	/*------------------------------------------------------------------*/
	/* make sure there is no asynchrounouse ops pending                 */
	/*------------------------------------------------------------------*/
	if( fh->async_flag == 1 )
		return;

	/*------------------------------------------------------------------*/
	/* set the asynchrounouse ops flag                                  */
	/*------------------------------------------------------------------*/
	fh->async_flag = 1;

	/*------------------------------------------------------------------*/
	/* upc sync mode                                                    */
	/*------------------------------------------------------------------*/
	if( sync_mode & UPC_IN_NOSYNC )
		;
	else if( sync_mode & UPC_IN_MYSYNC )
		upc_barrier;
	else
		upc_barrier;

	count = size*nmemb;
	blocksize_byte = blocksize*size;

	if( fh->flags & UPC_INDIVIDUAL_FP )
	{
		if( blocksize )
		{
			roundsize = blocksize_byte * THREADS;
			buffer_char = (shared unsigned char *)buffer;
			start_th = upc_threadof( buffer_char );

			local_buf=(unsigned char *)malloc(sizeof(unsigned char)*count);
			my_th = start_th;
			round = 0;
			for(i=0; i<count-(count%blocksize_byte); i+=blocksize_byte)
			{
				upc_memget(&local_buf[i], buffer_char+round*roundsize+my_th-start_th, blocksize_byte);
				my_th++;
				if(my_th == THREADS)
				{
					my_th = 0;
					round++;
				}
			}
			upc_memget(&local_buf[i], buffer_char+round*roundsize+my_th-start_th, count-i);
			UPC_ADIO_IwriteContig( fd, local_buf, count, fh->private_pointer, &request, 
					       ret, &error_code );
		}
		else
		{
			local_buf = (unsigned char *)malloc(sizeof(unsigned char)*count);
			upc_memget(local_buf, buffer, count);
			UPC_ADIO_IwriteContig( fd, local_buf, count, fh->private_pointer, &request, 
					       ret, &error_code );
		}
		/*------------------------------------------------------------------*/
		/* increment the file pointer                                       */
		/*------------------------------------------------------------------*/
		fh->private_pointer += count;
	}
	else
	{
		if( blocksize )
		{
			buffer_char = (shared unsigned char *)buffer;
			start_th = upc_threadof( buffer_char );
			roundsize = blocksize_byte * THREADS;
			round = (uint32_t)(count / roundsize);
			nblocks = (uint32_t)(count / blocksize_byte);
			extra_block = nblocks%THREADS;
			extra = count % blocksize_byte;
			mpi_size = round * blocksize_byte;
			round++;
			if( MYTHREAD < start_th )       /* wrap around */
			{
				my_th = MYTHREAD + THREADS - start_th;
				local_buf = (unsigned char *)(buffer_char + roundsize - start_th +MYTHREAD );
			}
			else
			{
				my_th = MYTHREAD - start_th;
				local_buf = (unsigned char *)(buffer_char + my_th);
			}

			if( my_th < nblocks%THREADS )
				mpi_size += blocksize_byte;
			if( my_th == extra_block )
				mpi_size += extra;

			disp = fh->shared_pointer + blocksize_byte * my_th;
			disparray = (upc_off_t *)malloc(round*sizeof(upc_off_t));
			dispsize = (upc_off_t *)malloc(round*sizeof(upc_off_t));
			for( i=0; i<round; i++ )
			{
				disparray[i] = disp + i * roundsize;
				dispsize[i] = blocksize_byte;
			}

			UPC_ADIO_IwriteStrided( fd, 1, &local_buf, &mpi_size, round, disparray, dispsize, &request, 
						ret, &error_code );
			/*------------------------------------------------------------------*/
			/* update the metadata                                              */
			/*------------------------------------------------------------------*/
			fh->disparray = disparray;
			fh->dispsize = dispsize;
		}
		else
		{
			//if( MYTHREAD == upc_threadof(buffer) )
			if( MYTHREAD == 0 )
			{
				local_buf = (unsigned char *)malloc(sizeof(unsigned char)*count);
				upc_memget(local_buf, buffer, count);
				UPC_ADIO_IwriteContig( fd, local_buf, count, fh->shared_pointer, &request, 
						       ret, &error_code );
			}
		}

		/*------------------------------------------------------------------*/
		/* increment the file pointer                                       */
		/*------------------------------------------------------------------*/
		fh->shared_pointer += count;
	}

	/*------------------------------------------------------------------*/
	/* update the metadata                                              */
	/*------------------------------------------------------------------*/
	fh->request = request;
	fh->async_op = __REF_UPC_WRITE_SHARED_ASYNC;
	fh->local_ptr = local_buf;
	fh->blocksize = blocksize_byte;
	fh->size = count;

	/*------------------------------------------------------------------*/
	/* upc sync mode                                                    */
	/*------------------------------------------------------------------*/
	if( sync_mode & UPC_IN_NOSYNC )
		;
	else if( sync_mode & UPC_IN_MYSYNC )
		upc_barrier;
	else
		upc_barrier;

	return;
}