Ejemplo n.º 1
0
Archivo: misc.c Proyecto: dmdu/hpgmg
void shift_vector(level_type * level, int id_c, int id_a, double shift_a) {
    uint64_t _timeStart = CycleTime();


    int box;
    PRAGMA_THREAD_ACROSS_BOXES(level,box)
    for(box=0; box<level->num_my_boxes; box++) {
        int i,j,k;
        const int jStride = level->my_boxes[box].jStride;
        const int kStride = level->my_boxes[box].kStride;
        const int  ghosts = level->my_boxes[box].ghosts;
        const int     dim = level->my_boxes[box].dim;
        double * __restrict__ grid_c = level->my_boxes[box].vectors[id_c] + ghosts*(1+jStride+kStride); // i.e. [0] = first non ghost zone point
        double * __restrict__ grid_a = level->my_boxes[box].vectors[id_a] + ghosts*(1+jStride+kStride); // i.e. [0] = first non ghost zone point

        PRAGMA_THREAD_WITHIN_A_BOX(level,i,j,k)
        for(k=0; k<dim; k++) {
            for(j=0; j<dim; j++) {
                for(i=0; i<dim; i++) {
                    int ijk = i + j*jStride + k*kStride;
                    grid_c[ijk] = grid_a[ijk] + shift_a;
                }
            }
        }
    }
    level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 2
0
Archivo: misc.c Proyecto: dmdu/hpgmg
//------------------------------------------------------------------------------------------------------------------------------
void initialize_grid_to_scalar(level_type * level, int component_id, double scalar) {
    // initializes the grid to a scalar while zero'ing the ghost zones...
    uint64_t _timeStart = CycleTime();
    int box;

    PRAGMA_THREAD_ACROSS_BOXES(level,box)
    for(box=0; box<level->num_my_boxes; box++) {
        int i,j,k;
        const int jStride = level->my_boxes[box].jStride;
        const int kStride = level->my_boxes[box].kStride;
        const int  ghosts = level->my_boxes[box].ghosts;
        const int     dim = level->my_boxes[box].dim;
        double * __restrict__ grid = level->my_boxes[box].vectors[component_id] + ghosts*(1+jStride+kStride);
        PRAGMA_THREAD_WITHIN_A_BOX(level,i,j,k)
        for(k=-ghosts; k<dim+ghosts; k++) {
            for(j=-ghosts; j<dim+ghosts; j++) {
                for(i=-ghosts; i<dim+ghosts; i++) {
                    int ijk = i + j*jStride + k*kStride;
                    int ghostZone = (i<0) || (j<0) || (k<0) || (i>=dim) || (j>=dim) || (k>=dim);
                    grid[ijk] = ghostZone ? 0.0 : scalar;
                }
            }
        }
    }
    level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 3
0
Archivo: misc.c Proyecto: dmdu/hpgmg
//------------------------------------------------------------------------------------------------------------------------------
void add_vectors(level_type * level, int id_c, double scale_a, int id_a, double scale_b, int id_b) { // c=scale_a*id_a + scale_b*id_b
    uint64_t _timeStart = CycleTime();

    int box;

    PRAGMA_THREAD_ACROSS_BOXES(level,box)
    for(box=0; box<level->num_my_boxes; box++) {
        int i,j,k;
        const int jStride = level->my_boxes[box].jStride;
        const int kStride = level->my_boxes[box].kStride;
        const int  ghosts = level->my_boxes[box].ghosts;
        const int     dim = level->my_boxes[box].dim;
        double * __restrict__ grid_c = level->my_boxes[box].vectors[id_c] + ghosts*(1+jStride+kStride);
        double * __restrict__ grid_a = level->my_boxes[box].vectors[id_a] + ghosts*(1+jStride+kStride);
        double * __restrict__ grid_b = level->my_boxes[box].vectors[id_b] + ghosts*(1+jStride+kStride);
        PRAGMA_THREAD_WITHIN_A_BOX(level,i,j,k)
        for(k=0; k<dim; k++) {
            for(j=0; j<dim; j++) {
                for(i=0; i<dim; i++) {
                    int ijk = i + j*jStride + k*kStride;
                    grid_c[ijk] = scale_a*grid_a[ijk] + scale_b*grid_b[ijk];
                }
            }
        }
    }
    level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 4
0
Archivo: misc.c Proyecto: dmdu/hpgmg
//------------------------------------------------------------------------------------------------------------------------------
void initialize_valid_region(level_type * level) {
    uint64_t _timeStart = CycleTime();
    int box;

    PRAGMA_THREAD_ACROSS_BOXES(level,box)
    for(box=0; box<level->num_my_boxes; box++) {
        int i,j,k;
        const int jStride = level->my_boxes[box].jStride;
        const int kStride = level->my_boxes[box].kStride;
        const int  ghosts = level->my_boxes[box].ghosts;
        const int     dim = level->my_boxes[box].dim;
        double * __restrict__ valid = level->my_boxes[box].vectors[VECTOR_VALID] + ghosts*(1+jStride+kStride);
        PRAGMA_THREAD_WITHIN_A_BOX(level,i,j,k)
        for(k=-ghosts; k<dim+ghosts; k++) {
            for(j=-ghosts; j<dim+ghosts; j++) {
                for(i=-ghosts; i<dim+ghosts; i++) {
                    int ijk = i + j*jStride + k*kStride;
                    valid[ijk] = 1.0; // i.e. all cells including ghosts are valid for periodic BC's
                    if(level->domain_boundary_condition == BC_DIRICHLET) { // cells outside the domain boundaries are not valid
                        if(i + level->my_boxes[box].low.i <             0)valid[ijk] = 0.0;
                        if(j + level->my_boxes[box].low.j <             0)valid[ijk] = 0.0;
                        if(k + level->my_boxes[box].low.k <             0)valid[ijk] = 0.0;
                        if(i + level->my_boxes[box].low.i >= level->dim.i)valid[ijk] = 0.0;
                        if(j + level->my_boxes[box].low.j >= level->dim.j)valid[ijk] = 0.0;
                        if(k + level->my_boxes[box].low.k >= level->dim.k)valid[ijk] = 0.0;
                    }
                }
            }
        }
    }
    level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 5
0
//------------------------------------------------------------------------------------------------------------------------------
// Samuel Williams
// [email protected]
// Lawrence Berkeley National Lab
//------------------------------------------------------------------------------------------------------------------------------
void apply_op(level_type * level, int Ax_id, int x_id, double a, double b){  // y=Ax
  // exchange the boundary of x in preparation for Ax
  exchange_boundary(level,x_id,stencil_is_star_shaped());
          apply_BCs(level,x_id);

  // now do Ax proper...
  uint64_t _timeStart = CycleTime();
  int box;

  PRAGMA_THREAD_ACROSS_BOXES(level,box)
  for(box=0;box<level->num_my_boxes;box++){
    int i,j,k;
    const int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int  ghosts = level->my_boxes[box].ghosts;
    const int     dim = level->my_boxes[box].dim;
    const double h2inv = 1.0/(level->h*level->h);
    const double * __restrict__ x      = level->my_boxes[box].vectors[         x_id] + ghosts*(1+jStride+kStride); // i.e. [0] = first non ghost zone point
          double * __restrict__ Ax     = level->my_boxes[box].vectors[        Ax_id] + ghosts*(1+jStride+kStride); 
    const double * __restrict__ alpha  = level->my_boxes[box].vectors[VECTOR_ALPHA ] + ghosts*(1+jStride+kStride);
    const double * __restrict__ beta_i = level->my_boxes[box].vectors[VECTOR_BETA_I] + ghosts*(1+jStride+kStride);
    const double * __restrict__ beta_j = level->my_boxes[box].vectors[VECTOR_BETA_J] + ghosts*(1+jStride+kStride);
    const double * __restrict__ beta_k = level->my_boxes[box].vectors[VECTOR_BETA_K] + ghosts*(1+jStride+kStride);
    const double * __restrict__  valid = level->my_boxes[box].vectors[VECTOR_VALID ] + ghosts*(1+jStride+kStride);

    PRAGMA_THREAD_WITHIN_A_BOX(level,i,j,k)
    for(k=0;k<dim;k++){
    for(j=0;j<dim;j++){
    for(i=0;i<dim;i++){
      int ijk = i + j*jStride + k*kStride;
      Ax[ijk] = apply_op_ijk(x);
    }}}
  }
  level->cycles.apply_op += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 6
0
//------------------------------------------------------------------------------------------------------------------------------
void mul_vectors(level_type * level, int id_c, double scale, int id_a, int id_b){ // id_c=scale*id_a*id_b
  uint64_t _timeStart = CycleTime();

  int block;

  PRAGMA_THREAD_ACROSS_BLOCKS(level,block,level->num_my_blocks)
  for(block=0;block<level->num_my_blocks;block++){
    const int box = level->my_blocks[block].read.box;
    const int ilo = level->my_blocks[block].read.i;
    const int jlo = level->my_blocks[block].read.j;
    const int klo = level->my_blocks[block].read.k;
    const int ihi = level->my_blocks[block].dim.i + ilo;
    const int jhi = level->my_blocks[block].dim.j + jlo;
    const int khi = level->my_blocks[block].dim.k + klo;
    int i,j,k;
    const int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int  ghosts = level->my_boxes[box].ghosts;
    double * __restrict__ grid_c = level->my_boxes[box].vectors[id_c] + ghosts*(1+jStride+kStride);
    double * __restrict__ grid_a = level->my_boxes[box].vectors[id_a] + ghosts*(1+jStride+kStride);
    double * __restrict__ grid_b = level->my_boxes[box].vectors[id_b] + ghosts*(1+jStride+kStride);

    for(k=klo;k<khi;k++){
    for(j=jlo;j<jhi;j++){
    for(i=ilo;i<ihi;i++){
        int ijk = i + j*jStride + k*kStride;
        grid_c[ijk] = scale*grid_a[ijk]*grid_b[ijk];
    }}}
  }
  level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 7
0
//------------------------------------------------------------------------------------------------------------------------------
void shift_vector(level_type * level, int id_c, int id_a, double shift_a){
  uint64_t _timeStart = CycleTime();
  int block;

  PRAGMA_THREAD_ACROSS_BLOCKS(level,block,level->num_my_blocks)
  for(block=0;block<level->num_my_blocks;block++){
    const int box = level->my_blocks[block].read.box;
    const int ilo = level->my_blocks[block].read.i;
    const int jlo = level->my_blocks[block].read.j;
    const int klo = level->my_blocks[block].read.k;
    const int ihi = level->my_blocks[block].dim.i + ilo;
    const int jhi = level->my_blocks[block].dim.j + jlo;
    const int khi = level->my_blocks[block].dim.k + klo;
    int i,j,k;
    const int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int  ghosts = level->my_boxes[box].ghosts;
    double * __restrict__ grid_c = level->my_boxes[box].vectors[id_c] + ghosts*(1+jStride+kStride); // i.e. [0] = first non ghost zone point
    double * __restrict__ grid_a = level->my_boxes[box].vectors[id_a] + ghosts*(1+jStride+kStride); // i.e. [0] = first non ghost zone point


    for(k=klo;k<khi;k++){
    for(j=jlo;j<jhi;j++){
    for(i=ilo;i<ihi;i++){
      int ijk = i + j*jStride + k*kStride;
      grid_c[ijk] = grid_a[ijk] + shift_a;
    }}}
  }
  level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 8
0
Archivo: misc.c Proyecto: dmdu/hpgmg
//------------------------------------------------------------------------------------------------------------------------------
// Samuel Williams
// [email protected]
// Lawrence Berkeley National Lab
//------------------------------------------------------------------------------------------------------------------------------
void zero_vector(level_type * level, int component_id) {
    // zero's the entire grid INCLUDING ghost zones...
    uint64_t _timeStart = CycleTime();
    int box;

    PRAGMA_THREAD_ACROSS_BOXES(level,box)
    for(box=0; box<level->num_my_boxes; box++) {
        int i,j,k;
        const int jStride = level->my_boxes[box].jStride;
        const int kStride = level->my_boxes[box].kStride;
        const int  ghosts = level->my_boxes[box].ghosts;
        const int     dim = level->my_boxes[box].dim;
        double * __restrict__ grid = level->my_boxes[box].vectors[component_id] + ghosts*(1+jStride+kStride);
        PRAGMA_THREAD_WITHIN_A_BOX(level,i,j,k)
        for(k=-ghosts; k<dim+ghosts; k++) {
            for(j=-ghosts; j<dim+ghosts; j++) {
                for(i=-ghosts; i<dim+ghosts; i++) {
                    int ijk = i + j*jStride + k*kStride;
                    grid[ijk] = 0.0;
                }
            }
        }
    }
    level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 9
0
void FRichCurve::RemapTimeValue(float& InTime, float& CycleValueOffset) const
{
	const int32 NumKeys = Keys.Num();
	
	if (NumKeys < 2)
	{
		return;
	} 

	if (InTime <= Keys[0].Time)
	{
		if (PreInfinityExtrap != RCCE_Linear && PreInfinityExtrap != RCCE_Constant)
		{
			float MinTime = Keys[0].Time;
			float MaxTime = Keys[NumKeys - 1].Time;

			int CycleCount = 0;
			CycleTime(MinTime, MaxTime, InTime, CycleCount);

			if (PreInfinityExtrap == RCCE_CycleWithOffset)
			{
				float DV = Keys[0].Value - Keys[NumKeys - 1].Value;
				CycleValueOffset = DV * CycleCount;
			}
			else if (PreInfinityExtrap == RCCE_Oscillate)
			{
				if (CycleCount % 2 == 1)
				{
					InTime = MinTime + (MaxTime - InTime);
				}
			}
		}
	}
	else if (InTime >= Keys[NumKeys - 1].Time)
	{
		if (PostInfinityExtrap != RCCE_Linear && PostInfinityExtrap != RCCE_Constant)
		{
			float MinTime = Keys[0].Time;
			float MaxTime = Keys[NumKeys - 1].Time;

			int CycleCount = 0; 
			CycleTime(MinTime, MaxTime, InTime, CycleCount);

			if (PostInfinityExtrap == RCCE_CycleWithOffset)
			{
				float DV = Keys[NumKeys - 1].Value - Keys[0].Value;
				CycleValueOffset = DV * CycleCount;
			}
			else if (PostInfinityExtrap == RCCE_Oscillate)
			{
				if (CycleCount % 2 == 1)
				{
					InTime = MinTime + (MaxTime - InTime);
				}
			}
		}
	}
}
Ejemplo n.º 10
0
Archivo: jacobi.c Proyecto: dmdu/hpgmg
//------------------------------------------------------------------------------------------------------------------------------
void smooth(level_type * level, int x_id, int rhs_id, double a, double b){
  if(NUM_SMOOTHS&1){
    fprintf(stderr,"error - NUM_SMOOTHS must be even...\n");
    exit(0);
  }

  #ifdef USE_L1JACOBI
  double weight = 1.0;
  #else
  double weight = 2.0/3.0;
  #endif
 
  int box,s;
  for(s=0;s<NUM_SMOOTHS;s++){
    // exchange ghost zone data... Jacobi ping pongs between x_id and VECTOR_TEMP
    if((s&1)==0){exchange_boundary(level,       x_id,stencil_is_star_shaped());apply_BCs(level,       x_id);}
            else{exchange_boundary(level,VECTOR_TEMP,stencil_is_star_shaped());apply_BCs(level,VECTOR_TEMP);}

    // apply the smoother... Jacobi ping pongs between x_id and VECTOR_TEMP
    uint64_t _timeStart = CycleTime();
    PRAGMA_THREAD_ACROSS_BOXES(level,box)
    for(box=0;box<level->num_my_boxes;box++){
      int i,j,k;
      const int ghosts = level->box_ghosts;
      const int jStride = level->my_boxes[box].jStride;
      const int kStride = level->my_boxes[box].kStride;
      const int     dim = level->my_boxes[box].dim;
      const double h2inv = 1.0/(level->h*level->h);
      const double * __restrict__ rhs    = level->my_boxes[box].vectors[       rhs_id] + ghosts*(1+jStride+kStride);
      const double * __restrict__ alpha  = level->my_boxes[box].vectors[VECTOR_ALPHA ] + ghosts*(1+jStride+kStride);
      const double * __restrict__ beta_i = level->my_boxes[box].vectors[VECTOR_BETA_I] + ghosts*(1+jStride+kStride);
      const double * __restrict__ beta_j = level->my_boxes[box].vectors[VECTOR_BETA_J] + ghosts*(1+jStride+kStride);
      const double * __restrict__ beta_k = level->my_boxes[box].vectors[VECTOR_BETA_K] + ghosts*(1+jStride+kStride);
      const double * __restrict__ valid  = level->my_boxes[box].vectors[VECTOR_VALID ] + ghosts*(1+jStride+kStride); // cell is inside the domain
      #ifdef USE_L1JACOBI
      const double * __restrict__ lambda = level->my_boxes[box].vectors[VECTOR_L1INV ] + ghosts*(1+jStride+kStride);
      #else
      const double * __restrict__ lambda = level->my_boxes[box].vectors[VECTOR_DINV  ] + ghosts*(1+jStride+kStride);
      #endif
        const double * __restrict__ x_n;
              double * __restrict__ x_np1;
                      if((s&1)==0){x_n   = level->my_boxes[box].vectors[         x_id] + ghosts*(1+jStride+kStride);
                                   x_np1 = level->my_boxes[box].vectors[VECTOR_TEMP  ] + ghosts*(1+jStride+kStride);}
                              else{x_n   = level->my_boxes[box].vectors[VECTOR_TEMP  ] + ghosts*(1+jStride+kStride);
                                   x_np1 = level->my_boxes[box].vectors[         x_id] + ghosts*(1+jStride+kStride);}
      PRAGMA_THREAD_WITHIN_A_BOX(level,i,j,k)
      for(k=0;k<dim;k++){
      for(j=0;j<dim;j++){
      for(i=0;i<dim;i++){
        int ijk = i + j*jStride + k*kStride;
        double Ax_n = apply_op_ijk(x_n);
        x_np1[ijk] = x_n[ijk] + weight*lambda[ijk]*(rhs[ijk]-Ax_n);
      }}}
    } // box-loop
    level->cycles.smooth += (uint64_t)(CycleTime()-_timeStart);
  } // s-loop
}
Ejemplo n.º 11
0
//------------------------------------------------------------------------------------------------------------------------------
void initialize_valid_region(level_type * level){
  uint64_t _timeStart = CycleTime();
  int block;

  PRAGMA_THREAD_ACROSS_BLOCKS(level,block,level->num_my_blocks)
  for(block=0;block<level->num_my_blocks;block++){
    const int box = level->my_blocks[block].read.box;
          int ilo = level->my_blocks[block].read.i;
          int jlo = level->my_blocks[block].read.j;
          int klo = level->my_blocks[block].read.k;
          int ihi = level->my_blocks[block].dim.i + ilo;
          int jhi = level->my_blocks[block].dim.j + jlo;
          int khi = level->my_blocks[block].dim.k + klo;
    int i,j,k;
    const int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int  ghosts = level->my_boxes[box].ghosts;
    const int     dim = level->my_boxes[box].dim;

    // expand the size of the block to include the ghost zones...
    if(ilo<=  0)ilo-=ghosts;
    if(jlo<=  0)jlo-=ghosts;
    if(klo<=  0)klo-=ghosts;
    if(ihi>=dim)ihi+=ghosts;
    if(jhi>=dim)jhi+=ghosts;
    if(khi>=dim)khi+=ghosts;

    double * __restrict__ valid = level->my_boxes[box].vectors[VECTOR_VALID] + ghosts*(1+jStride+kStride);

    for(k=klo;k<khi;k++){
    for(j=jlo;j<jhi;j++){
    for(i=ilo;i<ihi;i++){
      int ijk = i + j*jStride + k*kStride;
      valid[ijk] = 1.0; // i.e. all cells including ghosts are valid for periodic BC's
      if(level->domain_boundary_condition == BC_DIRICHLET){ // cells outside the domain boundaries are not valid
        if(i + level->my_boxes[box].low.i <             0)valid[ijk] = 0.0;
        if(j + level->my_boxes[box].low.j <             0)valid[ijk] = 0.0;
        if(k + level->my_boxes[box].low.k <             0)valid[ijk] = 0.0;
        if(i + level->my_boxes[box].low.i >= level->dim.i)valid[ijk] = 0.0;
        if(j + level->my_boxes[box].low.j >= level->dim.j)valid[ijk] = 0.0;
        if(k + level->my_boxes[box].low.k >= level->dim.k)valid[ijk] = 0.0;
      }
    }}}
  }
  level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 12
0
//------------------------------------------------------------------------------------------------------------------------------
double mean(level_type * level, int id_a){
  uint64_t _timeStart = CycleTime();


  int block;
  double sum_level =  0.0;

  PRAGMA_THREAD_ACROSS_BLOCKS_SUM(level,block,level->num_my_blocks,sum_level)
  for(block=0;block<level->num_my_blocks;block++){
    const int box = level->my_blocks[block].read.box;
    const int ilo = level->my_blocks[block].read.i;
    const int jlo = level->my_blocks[block].read.j;
    const int klo = level->my_blocks[block].read.k;
    const int ihi = level->my_blocks[block].dim.i + ilo;
    const int jhi = level->my_blocks[block].dim.j + jlo;
    const int khi = level->my_blocks[block].dim.k + klo;
    int i,j,k;
    int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int  ghosts = level->my_boxes[box].ghosts;
    double * __restrict__ grid_a = level->my_boxes[box].vectors[id_a] + ghosts*(1+jStride+kStride); // i.e. [0] = first non ghost zone point
    double sum_block = 0.0;

    for(k=klo;k<khi;k++){
    for(j=jlo;j<jhi;j++){
    for(i=ilo;i<ihi;i++){
      int ijk = i + j*jStride + k*kStride;
      sum_block += grid_a[ijk];
    }}}
    sum_level+=sum_block;
  }
  level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);
  double ncells_level = (double)level->dim.i*(double)level->dim.j*(double)level->dim.k;

  #ifdef USE_MPI
  uint64_t _timeStartAllReduce = CycleTime();
  double send = sum_level;
  MPI_Allreduce(&send,&sum_level,1,MPI_DOUBLE,MPI_SUM,level->MPI_COMM_ALLREDUCE);
  uint64_t _timeEndAllReduce = CycleTime();
  level->cycles.collectives   += (uint64_t)(_timeEndAllReduce-_timeStartAllReduce);
  #endif

  double mean_level = sum_level / ncells_level;
  return(mean_level);
}
Ejemplo n.º 13
0
//------------------------------------------------------------------------------------------------------------------------------
double norm(level_type * level, int component_id){ // implements the max norm
  uint64_t _timeStart = CycleTime();

  int block;
  double max_norm =  0.0;

  PRAGMA_THREAD_ACROSS_BLOCKS_MAX(level,block,level->num_my_blocks,max_norm)
  for(block=0;block<level->num_my_blocks;block++){
    const int box = level->my_blocks[block].read.box;
    const int ilo = level->my_blocks[block].read.i;
    const int jlo = level->my_blocks[block].read.j;
    const int klo = level->my_blocks[block].read.k;
    const int ihi = level->my_blocks[block].dim.i + ilo;
    const int jhi = level->my_blocks[block].dim.j + jlo;
    const int khi = level->my_blocks[block].dim.k + klo;
    int i,j,k;
    const int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int  ghosts = level->my_boxes[box].ghosts;
    double * __restrict__ grid   = level->my_boxes[box].vectors[component_id] + ghosts*(1+jStride+kStride); // i.e. [0] = first non ghost zone point
    double block_norm = 0.0;

    for(k=klo;k<khi;k++){
    for(j=jlo;j<jhi;j++){
    for(i=ilo;i<ihi;i++){
      int ijk = i + j*jStride + k*kStride;
      double fabs_grid_ijk = fabs(grid[ijk]);
      if(fabs_grid_ijk>block_norm){block_norm=fabs_grid_ijk;} // max norm
    }}}

    if(block_norm>max_norm){max_norm = block_norm;}
  } // block list
  level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);

  #ifdef USE_MPI
  uint64_t _timeStartAllReduce = CycleTime();
  double send = max_norm;
  MPI_Allreduce(&send,&max_norm,1,MPI_DOUBLE,MPI_MAX,level->MPI_COMM_ALLREDUCE);
  uint64_t _timeEndAllReduce = CycleTime();
  level->cycles.collectives   += (uint64_t)(_timeEndAllReduce-_timeStartAllReduce);
  #endif
  return(max_norm);
}
Ejemplo n.º 14
0
Archivo: misc.c Proyecto: dmdu/hpgmg
//------------------------------------------------------------------------------------------------------------------------------
double norm(level_type * level, int component_id) { // implements the max norm
    uint64_t _timeStart = CycleTime();

    int box;
    double max_norm =  0.0;
    // FIX, schedule(static) is a stand in to guarantee reproducibility...
    PRAGMA_THREAD_ACROSS_BOXES_MAX(level,box,max_norm)
    for(box=0; box<level->num_my_boxes; box++) {
        int i,j,k;
        const int jStride = level->my_boxes[box].jStride;
        const int kStride = level->my_boxes[box].kStride;
        const int  ghosts = level->my_boxes[box].ghosts;
        const int     dim = level->my_boxes[box].dim;
        double * __restrict__ grid   = level->my_boxes[box].vectors[component_id] + ghosts*(1+jStride+kStride); // i.e. [0] = first non ghost zone point
        double box_norm = 0.0;
        PRAGMA_THREAD_WITHIN_A_BOX_MAX(level,i,j,k,box_norm)
        for(k=0; k<dim; k++) {
            for(j=0; j<dim; j++) {
                for(i=0; i<dim; i++) {
                    int ijk = i + j*jStride + k*kStride;
                    double fabs_grid_ijk = fabs(grid[ijk]);
                    if(fabs_grid_ijk>box_norm) {
                        box_norm=fabs_grid_ijk;   // max norm
                    }
                }
            }
        }
        if(box_norm>max_norm) {
            max_norm = box_norm;
        }
    } // box list
    level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);

#ifdef USE_MPI
    uint64_t _timeStartAllReduce = CycleTime();
    double send = max_norm;
    MPI_Allreduce(&send,&max_norm,1,MPI_DOUBLE,MPI_MAX,level->MPI_COMM_ALLREDUCE);
    uint64_t _timeEndAllReduce = CycleTime();
    level->cycles.collectives   += (uint64_t)(_timeEndAllReduce-_timeStartAllReduce);
#endif
    return(max_norm);
}
Ejemplo n.º 15
0
//------------------------------------------------------------------------------------------------------------------------------
// Samuel Williams
// [email protected]
// Lawrence Berkeley National Lab
//------------------------------------------------------------------------------------------------------------------------------
void apply_op(level_type * level, int Ax_id, int x_id, double a, double b){  // y=Ax
  // exchange the boundary of x in preparation for Ax
  exchange_boundary(level,x_id,stencil_is_star_shaped());
          apply_BCs(level,x_id);

  // now do Ax proper...
  uint64_t _timeStart = CycleTime();
  int block;

  PRAGMA_THREAD_ACROSS_BLOCKS(level,block,level->num_my_blocks)
  for(block=0;block<level->num_my_blocks;block++){
    const int box = level->my_blocks[block].read.box;
    const int ilo = level->my_blocks[block].read.i;
    const int jlo = level->my_blocks[block].read.j;
    const int klo = level->my_blocks[block].read.k;
    const int ihi = level->my_blocks[block].dim.i + ilo;
    const int jhi = level->my_blocks[block].dim.j + jlo;
    const int khi = level->my_blocks[block].dim.k + klo;
    int i,j,k;
    const int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int  ghosts = level->my_boxes[box].ghosts;
    const int     dim = level->my_boxes[box].dim;
    const double h2inv = 1.0/(level->h*level->h);
    const double * __restrict__ x      = level->my_boxes[box].vectors[         x_id] + ghosts*(1+jStride+kStride); // i.e. [0] = first non ghost zone point
          double * __restrict__ Ax     = level->my_boxes[box].vectors[        Ax_id] + ghosts*(1+jStride+kStride); 
    const double * __restrict__ alpha  = level->my_boxes[box].vectors[VECTOR_ALPHA ] + ghosts*(1+jStride+kStride);
    const double * __restrict__ beta_i = level->my_boxes[box].vectors[VECTOR_BETA_I] + ghosts*(1+jStride+kStride);
    const double * __restrict__ beta_j = level->my_boxes[box].vectors[VECTOR_BETA_J] + ghosts*(1+jStride+kStride);
    const double * __restrict__ beta_k = level->my_boxes[box].vectors[VECTOR_BETA_K] + ghosts*(1+jStride+kStride);
    const double * __restrict__  valid = level->my_boxes[box].vectors[VECTOR_VALID ] + ghosts*(1+jStride+kStride);

    for(k=klo;k<khi;k++){
    for(j=jlo;j<jhi;j++){
    for(i=ilo;i<ihi;i++){
      int ijk = i + j*jStride + k*kStride;
      Ax[ijk] = apply_op_ijk(x);
    }}}
  }
  level->cycles.apply_op += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 16
0
Archivo: misc.c Proyecto: dmdu/hpgmg
//------------------------------------------------------------------------------------------------------------------------------
double dot(level_type * level, int id_a, int id_b) {
    uint64_t _timeStart = CycleTime();


    int box;
    double a_dot_b_level =  0.0;
    // FIX, schedule(static) is a stand in to guarantee reproducibility...
    PRAGMA_THREAD_ACROSS_BOXES_SUM(level,box,a_dot_b_level)
    for(box=0; box<level->num_my_boxes; box++) {
        int i,j,k;
        const int jStride = level->my_boxes[box].jStride;
        const int kStride = level->my_boxes[box].kStride;
        const int  ghosts = level->my_boxes[box].ghosts;
        const int     dim = level->my_boxes[box].dim;
        double * __restrict__ grid_a = level->my_boxes[box].vectors[id_a] + ghosts*(1+jStride+kStride); // i.e. [0] = first non ghost zone point
        double * __restrict__ grid_b = level->my_boxes[box].vectors[id_b] + ghosts*(1+jStride+kStride);
        double a_dot_b_box = 0.0;
        PRAGMA_THREAD_WITHIN_A_BOX_SUM(level,i,j,k,a_dot_b_box)
        for(k=0; k<dim; k++) {
            for(j=0; j<dim; j++) {
                for(i=0; i<dim; i++) {
                    int ijk = i + j*jStride + k*kStride;
                    a_dot_b_box += grid_a[ijk]*grid_b[ijk];
                }
            }
        }
        a_dot_b_level+=a_dot_b_box;
    }
    level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);

#ifdef USE_MPI
    uint64_t _timeStartAllReduce = CycleTime();
    double send = a_dot_b_level;
    MPI_Allreduce(&send,&a_dot_b_level,1,MPI_DOUBLE,MPI_SUM,level->MPI_COMM_ALLREDUCE);
    uint64_t _timeEndAllReduce = CycleTime();
    level->cycles.collectives   += (uint64_t)(_timeEndAllReduce-_timeStartAllReduce);
#endif

    return(a_dot_b_level);
}
Ejemplo n.º 17
0
//------------------------------------------------------------------------------------------------------------------------------
void initialize_grid_to_scalar(level_type * level, int component_id, double scalar){
  // initializes the grid to a scalar while zero'ing the ghost zones...
  uint64_t _timeStart = CycleTime();
  int block;

  PRAGMA_THREAD_ACROSS_BLOCKS(level,block,level->num_my_blocks)
  for(block=0;block<level->num_my_blocks;block++){
    const int box = level->my_blocks[block].read.box;
          int ilo = level->my_blocks[block].read.i;
          int jlo = level->my_blocks[block].read.j;
          int klo = level->my_blocks[block].read.k;
          int ihi = level->my_blocks[block].dim.i + ilo;
          int jhi = level->my_blocks[block].dim.j + jlo;
          int khi = level->my_blocks[block].dim.k + klo;
    int i,j,k;
    const int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int  ghosts = level->my_boxes[box].ghosts;
    const int     dim = level->my_boxes[box].dim;

    // expand the size of the block to include the ghost zones...
    if(ilo<=  0)ilo-=ghosts;
    if(jlo<=  0)jlo-=ghosts;
    if(klo<=  0)klo-=ghosts;
    if(ihi>=dim)ihi+=ghosts;
    if(jhi>=dim)jhi+=ghosts;
    if(khi>=dim)khi+=ghosts;

    double * __restrict__ grid = level->my_boxes[box].vectors[component_id] + ghosts*(1+jStride+kStride);

    for(k=klo;k<khi;k++){
    for(j=jlo;j<jhi;j++){
    for(i=ilo;i<ihi;i++){
        int ijk = i + j*jStride + k*kStride;
        int ghostZone = (i<0) || (j<0) || (k<0) || (i>=dim) || (j>=dim) || (k>=dim);
        grid[ijk] = ghostZone ? 0.0 : scalar;
    }}}
  }
  level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 18
0
Archivo: misc.c Proyecto: dmdu/hpgmg
//------------------------------------------------------------------------------------------------------------------------------
double mean(level_type * level, int id_a) {
    uint64_t _timeStart = CycleTime();


    int box;
    double sum_level =  0.0;
    PRAGMA_THREAD_ACROSS_BOXES_SUM(level,box,sum_level)
    for(box=0; box<level->num_my_boxes; box++) {
        int i,j,k;
        int jStride = level->my_boxes[box].jStride;
        const int kStride = level->my_boxes[box].kStride;
        const int  ghosts = level->my_boxes[box].ghosts;
        const int     dim = level->my_boxes[box].dim;
        double * __restrict__ grid_a = level->my_boxes[box].vectors[id_a] + ghosts*(1+jStride+kStride); // i.e. [0] = first non ghost zone point
        double sum_box = 0.0;
        PRAGMA_THREAD_WITHIN_A_BOX_SUM(level,i,j,k,sum_box)
        for(k=0; k<dim; k++) {
            for(j=0; j<dim; j++) {
                for(i=0; i<dim; i++) {
                    int ijk = i + j*jStride + k*kStride;
                    sum_box += grid_a[ijk];
                }
            }
        }
        sum_level+=sum_box;
    }
    level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);
    double ncells_level = (double)level->dim.i*(double)level->dim.j*(double)level->dim.k;

#ifdef USE_MPI
    uint64_t _timeStartAllReduce = CycleTime();
    double send = sum_level;
    MPI_Allreduce(&send,&sum_level,1,MPI_DOUBLE,MPI_SUM,level->MPI_COMM_ALLREDUCE);
    uint64_t _timeEndAllReduce = CycleTime();
    level->cycles.collectives   += (uint64_t)(_timeEndAllReduce-_timeStartAllReduce);
#endif

    double mean_level = sum_level / ncells_level;
    return(mean_level);
}
Ejemplo n.º 19
0
Archivo: misc.c Proyecto: dmdu/hpgmg
//------------------------------------------------------------------------------------------------------------------------------
void project_cell_to_face(level_type * level, int id_cell, int id_face, int dir) {
    uint64_t _timeStart = CycleTime();
    int box;

    PRAGMA_THREAD_ACROSS_BOXES(level,box)
    for(box=0; box<level->num_my_boxes; box++) {
        int i,j,k;
        const int jStride = level->my_boxes[box].jStride;
        const int kStride = level->my_boxes[box].kStride;
        const int  ghosts = level->my_boxes[box].ghosts;
        const int     dim = level->my_boxes[box].dim;
        double * __restrict__ grid_cell = level->my_boxes[box].vectors[id_cell] + ghosts*(1+jStride+kStride);
        double * __restrict__ grid_face = level->my_boxes[box].vectors[id_face] + ghosts*(1+jStride+kStride);
        int stride;
        switch(dir) {
        case 0:
            stride =       1;
            break;//i-direction
        case 1:
            stride = jStride;
            break;//j-direction
        case 2:
            stride = kStride;
            break;//k-direction
        }
        PRAGMA_THREAD_WITHIN_A_BOX(level,i,j,k)
        for(k=0; k<=dim; k++) { // <= to ensure you do low and high faces
            for(j=0; j<=dim; j++) {
                for(i=0; i<=dim; i++) {
                    int ijk = i + j*jStride + k*kStride;
                    grid_face[ijk] = 0.5*(grid_cell[ijk-stride] + grid_cell[ijk]); // simple linear interpolation
                }
            }
        }
    }

    level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 20
0
//------------------------------------------------------------------------------------------------------------------------------
void project_cell_to_face(level_type * level, int id_cell, int id_face, int dir){
  uint64_t _timeStart = CycleTime();
  int block;

  PRAGMA_THREAD_ACROSS_BLOCKS(level,block,level->num_my_blocks)
  for(block=0;block<level->num_my_blocks;block++){
    const int box = level->my_blocks[block].read.box;
    const int ilo = level->my_blocks[block].read.i;
    const int jlo = level->my_blocks[block].read.j;
    const int klo = level->my_blocks[block].read.k;
    const int ihi = level->my_blocks[block].dim.i + ilo;
    const int jhi = level->my_blocks[block].dim.j + jlo;
    const int khi = level->my_blocks[block].dim.k + klo;
    int i,j,k;
    const int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int  ghosts = level->my_boxes[box].ghosts;
    double * __restrict__ grid_cell = level->my_boxes[box].vectors[id_cell] + ghosts*(1+jStride+kStride);
    double * __restrict__ grid_face = level->my_boxes[box].vectors[id_face] + ghosts*(1+jStride+kStride);
    int stride;
    switch(dir){
      case 0: stride =       1;break;//i-direction
      case 1: stride = jStride;break;//j-direction
      case 2: stride = kStride;break;//k-direction
    }

    for(k=klo;k<=khi;k++){ // <= to ensure you do low and high faces
    for(j=jlo;j<=jhi;j++){
    for(i=ilo;i<=ihi;i++){
      int ijk = i + j*jStride + k*kStride;
      grid_face[ijk] = 0.5*(grid_cell[ijk-stride] + grid_cell[ijk]); // simple linear interpolation
    }}}
  }

  level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 21
0
//------------------------------------------------------------------------------------------------------------------------------
// Samuel Williams
// [email protected]
// Lawrence Berkeley National Lab
//------------------------------------------------------------------------------------------------------------------------------
void apply_BCs_linear(level_type * level, int x_id){
  if(level->boundary_condition.type == BC_PERIODIC)return; // no BC's to apply !

  // for cell-centered, we need to fill in the ghost zones to apply any BC's
  // this code does a simple linear interpolation for homogeneous dirichlet
  //
  //   . . . . . . . . .          . . . . . . . . .
  //   .       .       .          .       .       .
  //   .   ?   .   ?   .          .+x(0,0).-x(0,0).
  //   .       .       .          .       .       .
  //   . . . . +-------+          . . . . +-------+
  //   .       |       |          .       |       |
  //   .   ?   | x(0,0)|          .-x(0,0)| x(0,0)|
  //   .       |       |          .       |       |
  //   . . . . +-------+          . . . . +-------+
  //           ^
  //           domain boundary is the face... i.e. between two array indices !!! 
  //

  uint64_t _timeStart = CycleTime();
  int box;

  PRAGMA_THREAD_ACROSS_BOXES(level,box)
  for(box=0;box<level->num_my_boxes;box++){
    const int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int  ghosts = level->my_boxes[box].ghosts;
    const int     dim = level->my_boxes[box].dim;
    double * __restrict__ x      = level->my_boxes[box].vectors[        x_id] + ghosts*(1+jStride+kStride); // i.e. [0] = first non ghost zone point
  //double * __restrict__  valid = level->my_boxes[box].vectors[VECTOR_VALID] + ghosts*(1+jStride+kStride);

    int box_on_low_i  = (level->my_boxes[box].low.i     ==            0);
    int box_on_low_j  = (level->my_boxes[box].low.j     ==            0);
    int box_on_low_k  = (level->my_boxes[box].low.k     ==            0);
    int box_on_high_i = (level->my_boxes[box].low.i+dim == level->dim.i);
    int box_on_high_j = (level->my_boxes[box].low.j+dim == level->dim.j);
    int box_on_high_k = (level->my_boxes[box].low.k+dim == level->dim.k);

    if(level->boundary_condition.type == BC_DIRICHLET){
      int i,j,k,normal;
      double s;

      // note, just because you are in a corner ghost zone, doesn't mean you are on the corner of the domain.
      // thus, one needs to calculate the normal to the domain (not normal to box) in each ghost zone region
      // depending on whether this normal is on a domain face, edge, or corner, one needs to choose 's' appropriately

      // calculate a normal vector for this face                                                              // if face is on a domain boundary, impose the boundary condition using the calculated normal
      s=1;if(box_on_low_i ){normal= 1+      0+      0;s*=-1;}                                                           if(box_on_low_i ){i= -1;j  =0;k  =0;for(j=0;j<dim;j++)for(k=0;k<dim;k++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;if(box_on_low_j ){normal= 0+jStride+      0;s*=-1;}                                                           if(box_on_low_j ){i=  0;j= -1;k  =0;for(k=0;k<dim;k++)for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;if(box_on_low_k ){normal= 0+      0+kStride;s*=-1;}                                                           if(box_on_low_k ){i=  0;j  =0;k= -1;for(j=0;j<dim;j++)for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;if(box_on_high_i){normal=-1+      0+      0;s*=-1;}                                                           if(box_on_high_i){i=dim;j  =0;k  =0;for(j=0;j<dim;j++)for(k=0;k<dim;k++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;if(box_on_high_j){normal= 0-jStride+      0;s*=-1;}                                                           if(box_on_high_j){i=  0;j=dim;k  =0;for(k=0;k<dim;k++)for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;if(box_on_high_k){normal= 0+      0-kStride;s*=-1;}                                                           if(box_on_high_k){i=  0;j  =0;k=dim;for(j=0;j<dim;j++)for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}

      // calculate a normal vector for this edge                                                                                              // if edge is on a domain boundary, impose the boundary condition using the calculated normal
      s=1;normal=0;if(box_on_low_j ){normal+=jStride;s*=-1;}if(box_on_low_k ){normal+=kStride;s*=-1;}                                         if(box_on_low_j ||box_on_low_k ){i=  0;j= -1;k= -1;for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_high_j){normal-=jStride;s*=-1;}if(box_on_low_k ){normal+=kStride;s*=-1;}                                         if(box_on_high_j||box_on_low_k ){i=  0;j=dim;k= -1;for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_low_j ){normal+=jStride;s*=-1;}if(box_on_high_k){normal-=kStride;s*=-1;}                                         if(box_on_low_j ||box_on_high_k){i=  0;j= -1;k=dim;for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_high_j){normal-=jStride;s*=-1;}if(box_on_high_k){normal-=kStride;s*=-1;}                                         if(box_on_high_j||box_on_high_k){i=  0;j=dim;k=dim;for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_low_i ){normal+=      1;s*=-1;}if(box_on_low_k ){normal+=kStride;s*=-1;}                                         if(box_on_low_i ||box_on_low_k ){i= -1;j=  0;k= -1;for(j=0;j<dim;j++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_high_i){normal-=      1;s*=-1;}if(box_on_low_k ){normal+=kStride;s*=-1;}                                         if(box_on_high_i||box_on_low_k ){i=dim;j=  0;k= -1;for(j=0;j<dim;j++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_low_i ){normal+=      1;s*=-1;}if(box_on_high_k){normal-=kStride;s*=-1;}                                         if(box_on_low_i ||box_on_high_k){i= -1;j=  0;k=dim;for(j=0;j<dim;j++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_high_i){normal-=      1;s*=-1;}if(box_on_high_k){normal-=kStride;s*=-1;}                                         if(box_on_high_i||box_on_high_k){i=dim;j=  0;k=dim;for(j=0;j<dim;j++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_low_i ){normal+=      1;s*=-1;}if(box_on_low_j ){normal+=jStride;s*=-1;}                                         if(box_on_low_i ||box_on_low_j ){i= -1;j= -1;k=  0;for(k=0;k<dim;k++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_high_i){normal-=      1;s*=-1;}if(box_on_low_j ){normal+=jStride;s*=-1;}                                         if(box_on_high_i||box_on_low_j ){i=dim;j= -1;k=  0;for(k=0;k<dim;k++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_low_i ){normal+=      1;s*=-1;}if(box_on_high_j){normal-=jStride;s*=-1;}                                         if(box_on_low_i ||box_on_high_j){i= -1;j=dim;k=  0;for(k=0;k<dim;k++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_high_i){normal-=      1;s*=-1;}if(box_on_high_j){normal-=jStride;s*=-1;}                                         if(box_on_high_i||box_on_high_j){i=dim;j=dim;k=  0;for(k=0;k<dim;k++){int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      
      // calculate a normal vector for this corner                                                                                            // if corner is on a domain boundary, impose the boundary condition using the calculated normal
      s=1;normal=0;if(box_on_low_i ){normal+=      1;s*=-1;}if(box_on_low_j ){normal+=jStride;s*=-1;}if(box_on_low_k ){normal+=kStride;s*=-1;}if(box_on_low_i || box_on_low_j || box_on_low_k ){i= -1;j= -1;k= -1;{int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_high_i){normal-=      1;s*=-1;}if(box_on_low_j ){normal+=jStride;s*=-1;}if(box_on_low_k ){normal+=kStride;s*=-1;}if(box_on_high_i|| box_on_low_j || box_on_low_k ){i=dim;j= -1;k= -1;{int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_low_i ){normal+=      1;s*=-1;}if(box_on_high_j){normal-=jStride;s*=-1;}if(box_on_low_k ){normal+=kStride;s*=-1;}if(box_on_low_i || box_on_high_j|| box_on_low_k ){i= -1;j=dim;k= -1;{int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_high_i){normal-=      1;s*=-1;}if(box_on_high_j){normal-=jStride;s*=-1;}if(box_on_low_k ){normal+=kStride;s*=-1;}if(box_on_high_i|| box_on_high_j|| box_on_low_k ){i=dim;j=dim;k= -1;{int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_low_i ){normal+=      1;s*=-1;}if(box_on_low_j ){normal+=jStride;s*=-1;}if(box_on_high_k){normal-=kStride;s*=-1;}if(box_on_low_i || box_on_low_j || box_on_high_k){i= -1;j= -1;k=dim;{int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_high_i){normal-=      1;s*=-1;}if(box_on_low_j ){normal+=jStride;s*=-1;}if(box_on_high_k){normal-=kStride;s*=-1;}if(box_on_high_i|| box_on_low_j || box_on_high_k){i=dim;j= -1;k=dim;{int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_low_i ){normal+=      1;s*=-1;}if(box_on_high_j){normal-=jStride;s*=-1;}if(box_on_high_k){normal-=kStride;s*=-1;}if(box_on_low_i || box_on_high_j|| box_on_high_k){i= -1;j=dim;k=dim;{int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}
      s=1;normal=0;if(box_on_high_i){normal-=      1;s*=-1;}if(box_on_high_j){normal-=jStride;s*=-1;}if(box_on_high_k){normal-=kStride;s*=-1;}if(box_on_high_i|| box_on_high_j|| box_on_high_k){i=dim;j=dim;k=dim;{int ijk=i+j*jStride+k*kStride;x[ijk]=s*x[ijk+normal];}}

    }
  }
  level->cycles.boundary_conditions += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 22
0
//------------------------------------------------------------------------------------------------------------------------------
void rebuild_operator(level_type * level, level_type *fromLevel, double a, double b){
  if(level->my_rank==0){printf("  rebuilding operator for level...  h=%e  ",level->h);fflush(stdout);}

  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // form restriction of alpha[], beta_*[] coefficients from fromLevel
  if(fromLevel != NULL){
    restriction(level,VECTOR_ALPHA ,fromLevel,VECTOR_ALPHA ,RESTRICT_CELL  );
    restriction(level,VECTOR_BETA_I,fromLevel,VECTOR_BETA_I,RESTRICT_FACE_I);
    restriction(level,VECTOR_BETA_J,fromLevel,VECTOR_BETA_J,RESTRICT_FACE_J);
    restriction(level,VECTOR_BETA_K,fromLevel,VECTOR_BETA_K,RESTRICT_FACE_K);
  } // else case assumes alpha/beta have been set


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // exchange alpha/beta/...  (must be done before calculating Dinv)
  exchange_boundary(level,VECTOR_ALPHA ,0); // must be 0(faces,edges,corners) for CA version or 27pt
  exchange_boundary(level,VECTOR_BETA_I,0);
  exchange_boundary(level,VECTOR_BETA_J,0);
  exchange_boundary(level,VECTOR_BETA_K,0);


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // calculate Dinv, L1inv, and estimate the dominant Eigenvalue
  uint64_t _timeStart = CycleTime();
  int printedError=0;
  int box;

  double dominant_eigenvalue = -1e9;
  #pragma omp parallel for private(box) OMP_THREAD_ACROSS_BOXES(level->concurrent_boxes) reduction(max:dominant_eigenvalue) schedule(static)
  for(box=0;box<level->num_my_boxes;box++){
    int i,j,k;
    int lowi    = level->my_boxes[box].low.i;
    int lowj    = level->my_boxes[box].low.j;
    int lowk    = level->my_boxes[box].low.k;
    int jStride = level->my_boxes[box].jStride;
    int kStride = level->my_boxes[box].kStride;
    int  ghosts = level->my_boxes[box].ghosts;
    int     dim = level->my_boxes[box].dim;
    double h2inv = 1.0/(level->h*level->h);
    double * __restrict__ alpha  = level->my_boxes[box].vectors[VECTOR_ALPHA ] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_i = level->my_boxes[box].vectors[VECTOR_BETA_I] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_j = level->my_boxes[box].vectors[VECTOR_BETA_J] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_k = level->my_boxes[box].vectors[VECTOR_BETA_K] + ghosts*(1+jStride+kStride);
    double * __restrict__   Dinv = level->my_boxes[box].vectors[VECTOR_DINV  ] + ghosts*(1+jStride+kStride);
    double * __restrict__  L1inv = level->my_boxes[box].vectors[VECTOR_L1INV ] + ghosts*(1+jStride+kStride);
    double * __restrict__  valid = level->my_boxes[box].vectors[VECTOR_VALID ] + ghosts*(1+jStride+kStride);
    double box_eigenvalue = -1e9;
    #pragma omp parallel for private(k,j,i) OMP_THREAD_WITHIN_A_BOX(level->threads_per_box) reduction(max:box_eigenvalue) schedule(static)
    for(k=0;k<dim;k++){
    for(j=0;j<dim;j++){
    for(i=0;i<dim;i++){
      int ijk = i + j*jStride + k*kStride;
      #if 0
      // FIX This looks wrong, but is faster... theory is because its doing something akin to SOR
      // radius of Gershgorin disc is the sum of the absolute values of the off-diagonal elements...
      double sumAbsAij = fabs(b*h2inv*beta_i[ijk]) + fabs(b*h2inv*beta_i[ijk+      1]) +
                         fabs(b*h2inv*beta_j[ijk]) + fabs(b*h2inv*beta_j[ijk+jStride]) +
                         fabs(b*h2inv*beta_k[ijk]) + fabs(b*h2inv*beta_k[ijk+kStride]);
      // centr of Gershgorin disc is the diagonal element...
      double    Aii = a*alpha[ijk] - b*h2inv*(
                                       -beta_i[ijk]-beta_i[ijk+      1]
                                       -beta_j[ijk]-beta_j[ijk+jStride]
                                       -beta_k[ijk]-beta_k[ijk+kStride]
                                     );
      #endif
      #if 1
      // radius of Gershgorin disc is the sum of the absolute values of the off-diagonal elements...
      double sumAbsAij = fabs(b*h2inv) * (
                      fabs( beta_i[ijk        ]*valid[ijk-1      ] )+
                      fabs( beta_j[ijk        ]*valid[ijk-jStride] )+
                      fabs( beta_k[ijk        ]*valid[ijk-kStride] )+
                      fabs( beta_i[ijk+1      ]*valid[ijk+1      ] )+
                      fabs( beta_j[ijk+jStride]*valid[ijk+jStride] )+
                      fabs( beta_k[ijk+kStride]*valid[ijk+kStride] )
                      );

      // centr of Gershgorin disc is the diagonal element...
      double    Aii = a*alpha[ijk] - b*h2inv*(
                                       beta_i[ijk        ]*( valid[ijk-1      ]-2.0 )+
                                       beta_j[ijk        ]*( valid[ijk-jStride]-2.0 )+
                                       beta_k[ijk        ]*( valid[ijk-kStride]-2.0 )+
                                       beta_i[ijk+1      ]*( valid[ijk+1      ]-2.0 )+
                                       beta_j[ijk+jStride]*( valid[ijk+jStride]-2.0 )+
                                       beta_k[ijk+kStride]*( valid[ijk+kStride]-2.0 )
                                     );

      #endif
                             Dinv[ijk] = 1.0/Aii;				// inverse of the diagonal Aii
                          //L1inv[ijk] = 1.0/(Aii+sumAbsAij);			// inverse of the L1 row norm
      // L1inv = ( D+D^{L1} )^{-1}
      // as suggested by eq 6.5 in Baker et al, "Multigrid smoothers for ultra-parallel computing: additional theory and discussion"...
      if(Aii>=1.5*sumAbsAij)L1inv[ijk] = 1.0/(Aii              ); 		//
                       else L1inv[ijk] = 1.0/(Aii+0.5*sumAbsAij);		//
      double Di = (Aii + sumAbsAij)/Aii;if(Di>box_eigenvalue)box_eigenvalue=Di;	// upper limit to Gershgorin disc == bound on dominant eigenvalue
    }}}
    if(box_eigenvalue>dominant_eigenvalue){dominant_eigenvalue = box_eigenvalue;}
  }
  level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // Reduce the local estimates dominant eigenvalue to a global estimate
  #ifdef USE_MPI
  uint64_t _timeStartAllReduce = CycleTime();
  double send = dominant_eigenvalue;
  MPI_Allreduce(&send,&dominant_eigenvalue,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
  uint64_t _timeEndAllReduce = CycleTime();
  level->cycles.collectives   += (uint64_t)(_timeEndAllReduce-_timeStartAllReduce);
  #endif
  if(level->my_rank==0){printf("eigenvalue_max<%e\n",dominant_eigenvalue);fflush(stdout);}
  level->dominant_eigenvalue_of_DinvA = dominant_eigenvalue;


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // exchange Dinv/L1inv/...
  exchange_boundary(level,VECTOR_DINV ,0); // must be 0(faces,edges,corners) for CA version
  exchange_boundary(level,VECTOR_L1INV,0);
  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
}
Ejemplo n.º 23
0
//------------------------------------------------------------------------------------------------------------------------------
void init_timer() {
  uint64_t t0 = CycleTime();
  sleep(1);
  uint64_t t1 = CycleTime();
  frequency = (double)(t1-t0);
}
Ejemplo n.º 24
0
//------------------------------------------------------------------------------------------------------------------------------
// Samuel Williams
// [email protected]
// Lawrence Berkeley National Lab
//------------------------------------------------------------------------------------------------------------------------------
void apply_BCs_linear(level_type * level, int x_id){
  if(level->domain_boundary_condition == BC_PERIODIC)return; // no BC's to apply !

  // for cell-centered, we need to fill in the ghost zones to apply any BC's
  // this code does a simple linear interpolation for homogeneous dirichlet
  //
  //   . . . . . . . . .          . . . . . . . . .
  //   .       .       .          .       .       .
  //   .   ?   .   ?   .          .+x(0,0).-x(0,0).
  //   .       .       .          .       .       .
  //   . . . . +-------+          . . . . +-------+
  //   .       |       |          .       |       |
  //   .   ?   | x(0,0)|          .-x(0,0)| x(0,0)|
  //   .       |       |          .       |       |
  //   . . . . +-------+          . . . . +-------+
  //           ^
  //           domain boundary is the face... i.e. between two array indices !!! 
  //

  uint64_t _timeStart = CycleTime();
  int omp_across_boxes = 1;
  int omp_within_a_box = 0;
  int box;

  #pragma omp parallel for private(box) OMP_THREAD_ACROSS_BOXES(level->concurrent_boxes)
  for(box=0;box<level->num_my_boxes;box++){
    int i,j,k,s;
    int jStride = level->my_boxes[box].jStride;
    int kStride = level->my_boxes[box].kStride;
    int  ghosts = level->my_boxes[box].ghosts;
    int     dim = level->my_boxes[box].dim;
    double h2inv = 1.0/(level->h*level->h);
    double * __restrict__ x      = level->my_boxes[box].vectors[        x_id] + ghosts*(1+jStride+kStride); // i.e. [0] = first non ghost zone point
    double * __restrict__  valid = level->my_boxes[box].vectors[VECTOR_VALID] + ghosts*(1+jStride+kStride);

    if(level->domain_boundary_condition == BC_DIRICHLET){
      // why these and not -1, -5, +77 ???
                  k= -1;if((level->my_boxes[box].low.k     ==            0))                                                                for(j=0;j<dim;j++)for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk          +kStride];} // face
            j= -1;      if((level->my_boxes[box].low.j     ==            0))                                                                for(k=0;k<dim;k++)for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk  +jStride        ];} // face
      i= -1;            if((level->my_boxes[box].low.i     ==            0))                                                                for(k=0;k<dim;k++)for(j=0;j<dim;j++){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk+1                ];} // face
      i=dim;            if((level->my_boxes[box].low.i+dim == level->dim.i))                                                                for(k=0;k<dim;k++)for(j=0;j<dim;j++){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk-1                ];} // face
            j=dim;      if((level->my_boxes[box].low.j+dim == level->dim.j))                                                                for(k=0;k<dim;k++)for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk  -jStride        ];} // face
                  k=dim;if((level->my_boxes[box].low.k+dim == level->dim.k))                                                                for(j=0;j<dim;j++)for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk          -kStride];} // face

            j= -1;k= -1;if((level->my_boxes[box].low.j     ==            0)&&(level->my_boxes[box].low.k     ==            0))                                for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=x[ijk  +jStride+kStride];} // edge
      i= -1;      k= -1;if((level->my_boxes[box].low.i     ==            0)&&(level->my_boxes[box].low.k     ==            0))                                for(j=0;j<dim;j++){int ijk=i+j*jStride+k*kStride;x[ijk]=x[ijk+1        +kStride];} // edge
      i=dim;      k= -1;if((level->my_boxes[box].low.i+dim == level->dim.i)&&(level->my_boxes[box].low.k     ==            0))                                for(j=0;j<dim;j++){int ijk=i+j*jStride+k*kStride;x[ijk]=x[ijk-1        +kStride];} // edge
            j=dim;k= -1;if((level->my_boxes[box].low.j+dim == level->dim.j)&&(level->my_boxes[box].low.k     ==            0))                                for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=x[ijk  -jStride+kStride];} // edge
      i= -1;j= -1;      if((level->my_boxes[box].low.i     ==            0)&&(level->my_boxes[box].low.j     ==            0))                                for(k=0;k<dim;k++){int ijk=i+j*jStride+k*kStride;x[ijk]=x[ijk+1+jStride        ];} // edge
      i=dim;j= -1;      if((level->my_boxes[box].low.i+dim == level->dim.i)&&(level->my_boxes[box].low.j     ==            0))                                for(k=0;k<dim;k++){int ijk=i+j*jStride+k*kStride;x[ijk]=x[ijk-1+jStride        ];} // edge
      i= -1;j=dim;      if((level->my_boxes[box].low.i     ==            0)&&(level->my_boxes[box].low.j+dim == level->dim.j))                                for(k=0;k<dim;k++){int ijk=i+j*jStride+k*kStride;x[ijk]=x[ijk+1-jStride        ];} // edge
      i=dim;j=dim;      if((level->my_boxes[box].low.i+dim == level->dim.i)&&(level->my_boxes[box].low.j+dim == level->dim.j))                                for(k=0;k<dim;k++){int ijk=i+j*jStride+k*kStride;x[ijk]=x[ijk-1-jStride        ];} // edge
            j= -1;k=dim;if((level->my_boxes[box].low.j     ==            0)&&(level->my_boxes[box].low.k+dim == level->dim.k))                                for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=x[ijk  +jStride-kStride];} // edge
      i= -1;      k=dim;if((level->my_boxes[box].low.i     ==            0)&&(level->my_boxes[box].low.k+dim == level->dim.k))                                for(j=0;j<dim;j++){int ijk=i+j*jStride+k*kStride;x[ijk]=x[ijk+1        -kStride];} // edge
      i=dim;      k=dim;if((level->my_boxes[box].low.i+dim == level->dim.i)&&(level->my_boxes[box].low.k+dim == level->dim.k))                                for(j=0;j<dim;j++){int ijk=i+j*jStride+k*kStride;x[ijk]=x[ijk-1        -kStride];} // edge
            j=dim;k=dim;if((level->my_boxes[box].low.j+dim == level->dim.j)&&(level->my_boxes[box].low.k+dim == level->dim.k))                                for(i=0;i<dim;i++){int ijk=i+j*jStride+k*kStride;x[ijk]=x[ijk  -jStride-kStride];} // edge

      i= -1;j= -1;k= -1;if((level->my_boxes[box].low.i     ==            0)&&(level->my_boxes[box].low.j     ==            0)&&(level->my_boxes[box].low.k     ==            0)){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk+1+jStride+kStride];} // corner
      i=dim;j= -1;k= -1;if((level->my_boxes[box].low.i+dim == level->dim.i)&&(level->my_boxes[box].low.j     ==            0)&&(level->my_boxes[box].low.k     ==            0)){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk-1+jStride+kStride];} // corner
      i= -1;j=dim;k= -1;if((level->my_boxes[box].low.i     ==            0)&&(level->my_boxes[box].low.j+dim == level->dim.j)&&(level->my_boxes[box].low.k     ==            0)){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk+1-jStride+kStride];} // corner
      i=dim;j=dim;k= -1;if((level->my_boxes[box].low.i+dim == level->dim.i)&&(level->my_boxes[box].low.j+dim == level->dim.j)&&(level->my_boxes[box].low.k     ==            0)){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk-1-jStride+kStride];} // corner
      i= -1;j= -1;k=dim;if((level->my_boxes[box].low.i     ==            0)&&(level->my_boxes[box].low.j     ==            0)&&(level->my_boxes[box].low.k+dim == level->dim.k)){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk+1+jStride-kStride];} // corner
      i=dim;j= -1;k=dim;if((level->my_boxes[box].low.i+dim == level->dim.i)&&(level->my_boxes[box].low.j     ==            0)&&(level->my_boxes[box].low.k+dim == level->dim.k)){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk-1+jStride-kStride];} // corner
      i= -1;j=dim;k=dim;if((level->my_boxes[box].low.i     ==            0)&&(level->my_boxes[box].low.j+dim == level->dim.j)&&(level->my_boxes[box].low.k+dim == level->dim.k)){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk+1-jStride-kStride];} // corner
      i=dim;j=dim;k=dim;if((level->my_boxes[box].low.i+dim == level->dim.i)&&(level->my_boxes[box].low.j+dim == level->dim.j)&&(level->my_boxes[box].low.k+dim == level->dim.k)){int ijk=i+j*jStride+k*kStride;x[ijk]=-x[ijk-1-jStride-kStride];} // corner
    }
  }
  level->cycles.boundary_conditions += (uint64_t)(CycleTime()-_timeStart);
}
Ejemplo n.º 25
0
//------------------------------------------------------------------------------------------------------------------------------
// Samuel Williams
// [email protected]
// Lawrence Berkeley National Lab
//------------------------------------------------------------------------------------------------------------------------------
// perform a (intra-level) ghost zone exchange
//  NOTE exchange_boundary() only exchanges the boundary.
//  It will not enforce any boundary conditions
//  BC's are either the responsibility of a separate function or should be fused into the stencil
void exchange_boundary(level_type * level, int id, int justFaces){
  uint64_t _timeCommunicationStart = CycleTime();
  uint64_t _timeStart,_timeEnd;
  int buffer=0;
  int n;

  if(justFaces)justFaces=1;else justFaces=0;  // must be 0 or 1 in order to index into exchange_ghosts[]

  #ifdef USE_MPI
  int nMessages = level->exchange_ghosts[justFaces].num_recvs + level->exchange_ghosts[justFaces].num_sends;
  //MPI_Request *recv_requests = level->exchange_ghosts[justFaces].requests;
  //MPI_Request *send_requests = level->exchange_ghosts[justFaces].requests + level->exchange_ghosts[justFaces].num_recvs;
  MPI_Request *send_requests = level->exchange_ghosts[justFaces].requests;
  MPI_Request *recv_requests = level->exchange_ghosts[justFaces].requests + level->exchange_ghosts[justFaces].num_sends;

  // loop through packed list of MPI receives and prepost Irecv's...
  _timeStart = CycleTime();
  #ifdef USE_MPI_THREAD_MULTIPLE
  #pragma omp parallel for schedule(dynamic,1)
  #endif
  for(n=0;n<level->exchange_ghosts[justFaces].num_recvs;n++){
    MPI_Irecv(level->exchange_ghosts[justFaces].recv_buffers[n],
              level->exchange_ghosts[justFaces].recv_sizes[n],
              MPI_DOUBLE,
              level->exchange_ghosts[justFaces].recv_ranks[n],
              0, // by convention, ghost zone exchanges use tag=0
              MPI_COMM_WORLD,
              //&level->exchange_ghosts[justFaces].requests[n]
              //&recv_requests[n]
              &recv_requests[n]
    );
  }
  _timeEnd = CycleTime();
  level->cycles.ghostZone_recv += (_timeEnd-_timeStart);


  // pack MPI send buffers...
  _timeStart = CycleTime();
  #pragma omp parallel for if(level->exchange_ghosts[justFaces].num_blocks[0]>1) schedule(static,1)
  for(buffer=0;buffer<level->exchange_ghosts[justFaces].num_blocks[0];buffer++){CopyBlock(level,id,&level->exchange_ghosts[justFaces].blocks[0][buffer]);}
  _timeEnd = CycleTime();
  level->cycles.ghostZone_pack += (_timeEnd-_timeStart);


  // loop through MPI send buffers and post Isend's...
  _timeStart = CycleTime();
  #ifdef USE_MPI_THREAD_MULTIPLE
  #pragma omp parallel for schedule(dynamic,1)
  #endif
  for(n=0;n<level->exchange_ghosts[justFaces].num_sends;n++){
    MPI_Isend(level->exchange_ghosts[justFaces].send_buffers[n],
              level->exchange_ghosts[justFaces].send_sizes[n],
              MPI_DOUBLE,
              level->exchange_ghosts[justFaces].send_ranks[n],
              0, // by convention, ghost zone exchanges use tag=0
              MPI_COMM_WORLD,
              &send_requests[n]
              //&level->exchange_ghosts[justFaces].requests[n+level->exchange_ghosts[justFaces].num_recvs]
                                              // requests[0..num_recvs-1] were used by recvs.  So sends start at num_recvs
    );
  }
  _timeEnd = CycleTime();
  level->cycles.ghostZone_send += (_timeEnd-_timeStart);
  #endif


  // exchange locally... try and hide within Isend latency...
  _timeStart = CycleTime();
  #pragma omp parallel for if(level->exchange_ghosts[justFaces].num_blocks[1]>1) schedule(static,1)
  for(buffer=0;buffer<level->exchange_ghosts[justFaces].num_blocks[1];buffer++){CopyBlock(level,id,&level->exchange_ghosts[justFaces].blocks[1][buffer]);}
  _timeEnd = CycleTime();
  level->cycles.ghostZone_local += (_timeEnd-_timeStart);


  // wait for MPI to finish...
  #ifdef USE_MPI
  _timeStart = CycleTime();
  if(nMessages)MPI_Waitall(nMessages,level->exchange_ghosts[justFaces].requests,level->exchange_ghosts[justFaces].status);
  _timeEnd = CycleTime();
  level->cycles.ghostZone_wait += (_timeEnd-_timeStart);


  // unpack MPI receive buffers
  _timeStart = CycleTime();
  #pragma omp parallel for if(level->exchange_ghosts[justFaces].num_blocks[2]>1) schedule(static,1)
  for(buffer=0;buffer<level->exchange_ghosts[justFaces].num_blocks[2];buffer++){CopyBlock(level,id,&level->exchange_ghosts[justFaces].blocks[2][buffer]);}
  _timeEnd = CycleTime();
  level->cycles.ghostZone_unpack += (_timeEnd-_timeStart);
  #endif


  level->cycles.ghostZone_total += (uint64_t)(CycleTime()-_timeCommunicationStart);
}
Ejemplo n.º 26
0
//------------------------------------------------------------------------------------------------------------------------------
// perform a (inter-level) restriction
void restriction(level_type * level_c, int id_c, level_type *level_f, int id_f, int restrictionType){
  uint64_t _timeCommunicationStart = CycleTime();
  uint64_t _timeStart,_timeEnd;
  int buffer=0;
  int n;
  int my_tag = (level_f->tag<<4) | 0x5;




  #ifdef USE_MPI
  // by convention, level_f allocates a combined array of requests for both level_f sends and level_c recvs...
  int nMessages = level_c->restriction[restrictionType].num_recvs + level_f->restriction[restrictionType].num_sends;
  MPI_Request *recv_requests = level_f->restriction[restrictionType].requests;
  MPI_Request *send_requests = level_f->restriction[restrictionType].requests + level_c->restriction[restrictionType].num_recvs;


  // loop through packed list of MPI receives and prepost Irecv's...
  _timeStart = CycleTime();
  #ifdef USE_MPI_THREAD_MULTIPLE
  #pragma omp parallel for schedule(dynamic,1)
  #endif
  for(n=0;n<level_c->restriction[restrictionType].num_recvs;n++){
    MPI_Irecv(level_c->restriction[restrictionType].recv_buffers[n],
              level_c->restriction[restrictionType].recv_sizes[n],
              MPI_DOUBLE,
              level_c->restriction[restrictionType].recv_ranks[n],
              my_tag,
              MPI_COMM_WORLD,
              &recv_requests[n]
    );
  }
  _timeEnd = CycleTime();
  level_f->cycles.restriction_recv += (_timeEnd-_timeStart);


  // pack MPI send buffers...
  _timeStart = CycleTime();
  PRAGMA_THREAD_ACROSS_BLOCKS(level_f,buffer,level_f->restriction[restrictionType].num_blocks[0])
  for(buffer=0;buffer<level_f->restriction[restrictionType].num_blocks[0];buffer++){RestrictBlock(level_c,id_c,level_f,id_f,&level_f->restriction[restrictionType].blocks[0][buffer],restrictionType);}
  _timeEnd = CycleTime();
  level_f->cycles.restriction_pack += (_timeEnd-_timeStart);

 
  // loop through MPI send buffers and post Isend's...
  _timeStart = CycleTime();
  #ifdef USE_MPI_THREAD_MULTIPLE
  #pragma omp parallel for schedule(dynamic,1)
  #endif
  for(n=0;n<level_f->restriction[restrictionType].num_sends;n++){
    MPI_Isend(level_f->restriction[restrictionType].send_buffers[n],
              level_f->restriction[restrictionType].send_sizes[n],
              MPI_DOUBLE,
              level_f->restriction[restrictionType].send_ranks[n],
              my_tag,
              MPI_COMM_WORLD,
              &send_requests[n]
    );
  }
  _timeEnd = CycleTime();
  level_f->cycles.restriction_send += (_timeEnd-_timeStart);
  #endif


  // perform local restriction[restrictionType]... try and hide within Isend latency... 
  _timeStart = CycleTime();
  PRAGMA_THREAD_ACROSS_BLOCKS(level_f,buffer,level_f->restriction[restrictionType].num_blocks[1])
  for(buffer=0;buffer<level_f->restriction[restrictionType].num_blocks[1];buffer++){RestrictBlock(level_c,id_c,level_f,id_f,&level_f->restriction[restrictionType].blocks[1][buffer],restrictionType);}
  _timeEnd = CycleTime();
  level_f->cycles.restriction_local += (_timeEnd-_timeStart);


  // wait for MPI to finish...
  #ifdef USE_MPI 
  _timeStart = CycleTime();
  if(nMessages)MPI_Waitall(nMessages,level_f->restriction[restrictionType].requests,level_f->restriction[restrictionType].status);
  _timeEnd = CycleTime();
  level_f->cycles.restriction_wait += (_timeEnd-_timeStart);


  // unpack MPI receive buffers 
  _timeStart = CycleTime();
  PRAGMA_THREAD_ACROSS_BLOCKS(level_f,buffer,level_c->restriction[restrictionType].num_blocks[2])
  for(buffer=0;buffer<level_c->restriction[restrictionType].num_blocks[2];buffer++){CopyBlock(level_c,id_c,&level_c->restriction[restrictionType].blocks[2][buffer]);}
  _timeEnd = CycleTime();
  level_f->cycles.restriction_unpack += (_timeEnd-_timeStart);


  #endif
 
 
  level_f->cycles.restriction_total += (uint64_t)(CycleTime()-_timeCommunicationStart);
}
Ejemplo n.º 27
0
//------------------------------------------------------------------------------------------------------------------------------
void rebuild_operator(level_type * level, level_type *fromLevel, double a, double b){
  if(level->my_rank==0){fprintf(stdout,"  rebuilding 27pt CC operator for level...  h=%e  ",level->h);}

  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // form restriction of alpha[], beta_*[] coefficients from fromLevel
  if(fromLevel != NULL){
    restriction(level,VECTOR_ALPHA ,fromLevel,VECTOR_ALPHA ,RESTRICT_CELL  );
    restriction(level,VECTOR_BETA_I,fromLevel,VECTOR_BETA_I,RESTRICT_FACE_I);
    restriction(level,VECTOR_BETA_J,fromLevel,VECTOR_BETA_J,RESTRICT_FACE_J);
    restriction(level,VECTOR_BETA_K,fromLevel,VECTOR_BETA_K,RESTRICT_FACE_K);
  } // else case assumes alpha/beta have been set


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // exchange alpha/beta/...  (must be done before calculating Dinv)
  exchange_boundary(level,VECTOR_ALPHA ,0); // must be 0(faces,edges,corners) for CA version or 27pt
  exchange_boundary(level,VECTOR_BETA_I,0);
  exchange_boundary(level,VECTOR_BETA_J,0);
  exchange_boundary(level,VECTOR_BETA_K,0);


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // calculate Dinv, L1inv, and estimate the dominant Eigenvalue
  uint64_t _timeStart = CycleTime();
  int block;

  double dominant_eigenvalue = -1e9;

  PRAGMA_THREAD_ACROSS_BLOCKS_MAX(level,block,level->num_my_blocks,dominant_eigenvalue)
  for(block=0;block<level->num_my_blocks;block++){
    const int box = level->my_blocks[block].read.box;
    const int ilo = level->my_blocks[block].read.i;
    const int jlo = level->my_blocks[block].read.j;
    const int klo = level->my_blocks[block].read.k;
    const int ihi = level->my_blocks[block].dim.i + ilo;
    const int jhi = level->my_blocks[block].dim.j + jlo;
    const int khi = level->my_blocks[block].dim.k + klo;
    int i,j,k;
    const int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int  ghosts = level->my_boxes[box].ghosts;
    double h2inv = 1.0/(level->h*level->h);
    double * __restrict__ alpha  = level->my_boxes[box].vectors[VECTOR_ALPHA ] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_i = level->my_boxes[box].vectors[VECTOR_BETA_I] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_j = level->my_boxes[box].vectors[VECTOR_BETA_J] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_k = level->my_boxes[box].vectors[VECTOR_BETA_K] + ghosts*(1+jStride+kStride);
    double * __restrict__   Dinv = level->my_boxes[box].vectors[VECTOR_DINV  ] + ghosts*(1+jStride+kStride);
    double * __restrict__  L1inv = level->my_boxes[box].vectors[VECTOR_L1INV ] + ghosts*(1+jStride+kStride);
    double * __restrict__  valid = level->my_boxes[box].vectors[VECTOR_VALID ] + ghosts*(1+jStride+kStride);
    double block_eigenvalue = -1e9;

    for(k=klo;k<khi;k++){
    for(j=jlo;j<jhi;j++){
    for(i=ilo;i<ihi;i++){ 
      int ijk = i + j*jStride + k*kStride;
      // radius of Gershgorin disc is the sum of the absolute values of the off-diagonal elements...
                      double sumAbsAij = fabs(b*h2inv*6.0*STENCIL_COEF1) + fabs(b*h2inv*12.0*STENCIL_COEF2) + fabs(b*h2inv*8.0*STENCIL_COEF3);
      // center of Gershgorin disc is the diagonal element...
                            double Aii = a - b*h2inv*( STENCIL_COEF0 );
                             Dinv[ijk] = 1.0/Aii;					// inverse of the diagonal Aii
                          //L1inv[ijk] = 1.0/(Aii+sumAbsAij);				// inverse of the L1 row norm... L1inv = ( D+D^{L1} )^{-1}
      // as suggested by eq 6.5 in Baker et al, "Multigrid smoothers for ultra-parallel computing: additional theory and discussion"...
      if(Aii>=1.5*sumAbsAij)L1inv[ijk] = 1.0/(Aii              ); 			//
                       else L1inv[ijk] = 1.0/(Aii+0.5*sumAbsAij);			// 
      double Di = (Aii + sumAbsAij)/Aii;if(Di>block_eigenvalue)block_eigenvalue=Di;	// upper limit to Gershgorin disc == bound on dominant eigenvalue
    }}}
    if(block_eigenvalue>dominant_eigenvalue){dominant_eigenvalue = block_eigenvalue;}
  }
  level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // Reduce the local estimates dominant eigenvalue to a global estimate
  #ifdef USE_MPI
  uint64_t _timeStartAllReduce = CycleTime();
  double send = dominant_eigenvalue;
  MPI_Allreduce(&send,&dominant_eigenvalue,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
  uint64_t _timeEndAllReduce = CycleTime();
  level->cycles.collectives   += (uint64_t)(_timeEndAllReduce-_timeStartAllReduce);
  #endif
  if(level->my_rank==0){fprintf(stdout,"eigenvalue_max<%e\n",dominant_eigenvalue);}
  level->dominant_eigenvalue_of_DinvA = dominant_eigenvalue;


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // exchange Dinv/L1inv/...
  exchange_boundary(level,VECTOR_DINV ,0); // must be 0(faces,edges,corners) for CA version
  exchange_boundary(level,VECTOR_L1INV,0);
  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
}