Esempio n. 1
0
//------------------------------------------------------------------------------------------------------------------------------
void rebuild_operator(level_type * level, level_type *fromLevel, double a, double b){
  // form restriction of alpha[], beta_*[] coefficients from fromLevel
  if(fromLevel != NULL){
    #ifdef VECTOR_ALPHA
    restriction(level,VECTOR_ALPHA ,fromLevel,VECTOR_ALPHA ,RESTRICT_CELL  );
    #endif
    restriction(level,VECTOR_BETA_I,fromLevel,VECTOR_BETA_I,RESTRICT_FACE_I);
    restriction(level,VECTOR_BETA_J,fromLevel,VECTOR_BETA_J,RESTRICT_FACE_J);
    restriction(level,VECTOR_BETA_K,fromLevel,VECTOR_BETA_K,RESTRICT_FACE_K);
  } // else case assumes alpha/beta have been set

  // extrapolate the beta's into the ghost zones (needed for mixed derivatives)
  extrapolate_betas(level);
  //initialize_problem(level,level->h,a,b); // approach used for testing smooth beta's; destroys the black box nature of the solver

  // exchange alpha/beta/...  (must be done before calculating Dinv)
  #ifdef VECTOR_ALPHA
  exchange_boundary(level,VECTOR_ALPHA ,STENCIL_SHAPE_BOX); // safe
  #endif
  exchange_boundary(level,VECTOR_BETA_I,STENCIL_SHAPE_BOX);
  exchange_boundary(level,VECTOR_BETA_J,STENCIL_SHAPE_BOX);
  exchange_boundary(level,VECTOR_BETA_K,STENCIL_SHAPE_BOX);

  // black box rebuild of D^{-1}, l1^{-1}, dominant eigenvalue, ...
  rebuild_operator_blackbox(level,a,b,4);

  // exchange Dinv/L1inv/...
  exchange_boundary(level,VECTOR_DINV ,STENCIL_SHAPE_BOX); // safe
  #ifdef VECTOR_L1INV
  exchange_boundary(level,VECTOR_L1INV,STENCIL_SHAPE_BOX);
  #endif
}
Esempio n. 2
0
//------------------------------------------------------------------------------------------------------------------------------
void rebuild_operator(level_type * level, level_type *fromLevel, double a, double b){
  // form restriction of alpha[], beta_*[] coefficients from fromLevel
  if(fromLevel != NULL){
    #ifdef VECTOR_ALPHA
    restriction(level,VECTOR_ALPHA ,fromLevel,VECTOR_ALPHA ,RESTRICT_CELL  );
    #endif
    restriction(level,VECTOR_BETA_I,fromLevel,VECTOR_BETA_I,RESTRICT_FACE_I);
    restriction(level,VECTOR_BETA_J,fromLevel,VECTOR_BETA_J,RESTRICT_FACE_J);
    restriction(level,VECTOR_BETA_K,fromLevel,VECTOR_BETA_K,RESTRICT_FACE_K);
  } // else case assumes alpha/beta have been set

  // exchange alpha/beta/...  (must be done before calculating Dinv)
  #ifdef VECTOR_ALPHA
  exchange_boundary(level,VECTOR_ALPHA ,STENCIL_SHAPE_BOX); // safe
  #endif
  exchange_boundary(level,VECTOR_BETA_I,STENCIL_SHAPE_BOX);
  exchange_boundary(level,VECTOR_BETA_J,STENCIL_SHAPE_BOX);
  exchange_boundary(level,VECTOR_BETA_K,STENCIL_SHAPE_BOX);

  // black box rebuild of D^{-1}, l1^{-1}, dominant eigenvalue, ...
  rebuild_operator_blackbox(level,a,b,2);

  // exchange Dinv...
  exchange_boundary(level,VECTOR_DINV ,STENCIL_SHAPE_BOX); // safe
}
void peano::applications::poisson::multigrid::mappings::SpacetreeGrid2SetupExperiment::createBoundaryVertex(
  peano::applications::poisson::multigrid::SpacetreeGridVertex&               fineGridVertex,
  const tarch::la::Vector<DIMENSIONS,double>&                                 fineGridX,
  const tarch::la::Vector<DIMENSIONS,double>&                                 fineGridH,
  peano::applications::poisson::multigrid::SpacetreeGridVertex const * const  coarseGridVertices,
  const peano::kernel::gridinterface::VertexEnumerator&                       coarseGridVerticesEnumerator,
  const peano::applications::poisson::multigrid::SpacetreeGridCell&           coarseGridCell,
  const tarch::la::Vector<DIMENSIONS,int>&                                    fineGridPositionOfVertex
) {
  logTraceInWith6Arguments( "createBoundaryVertex(...)", fineGridVertex, fineGridX, fineGridH, coarseGridVerticesEnumerator.toString(), coarseGridCell, fineGridPositionOfVertex );
  //  if (tarch::la::volume(fineGridH) > _refinementThreshold) {
  //    fineGridVertex.refine();
  //  }
    if (coarseGridVerticesEnumerator.getLevel() < 3) {
      fineGridVertex.refine();
    }

    peano::toolbox::stencil::Stencil stencil(0.0);
    fineGridVertex.setStencil(stencil);

    peano::toolbox::stencil::ProlongationMatrix prolongation (0.0);
    fineGridVertex.setP(prolongation);

    peano::toolbox::stencil::RestrictionMatrix restriction(0.0);
    fineGridVertex.setR(restriction);

    fineGridVertex.clearTempAP();
    fineGridVertex.clearTempP();

  logTraceOutWith1Argument( "createBoundaryVertex(...)", fineGridVertex );
}
bool CSpaceRestrictionManager::accessible					(ALife::_OBJECT_ID id, u32 level_vertex_id, float radius)
{
	CRestrictionPtr				client_restriction = restriction(id);
	if (client_restriction)
		return					(client_restriction->accessible(level_vertex_id,radius));
	return						(true);
}
bool CSpaceRestrictionManager::accessible						(ALife::_OBJECT_ID id, const Fsphere &sphere)
{
	CRestrictionPtr				client_restriction = restriction(id);
	if (client_restriction)
		return					(client_restriction->accessible(sphere));
	return						(true);
}
shared_str	CSpaceRestrictionManager::out_restrictions			(ALife::_OBJECT_ID id)
{
	CRestrictionPtr				client_restriction = restriction(id);
	if (client_restriction)
		return					(client_restriction->out_restrictions());
	return						("");
}
Esempio n. 7
0
void KCValidity::loadOdfValidationCondition(QString &valExpression, const KCValueParser *parser)
{
    if (isEmpty()) return;
    QString value;
    if (valExpression.indexOf("<=") == 0) {
        value = valExpression.remove(0, 2);
        setCondition(KCConditional::InferiorEqual);
    } else if (valExpression.indexOf(">=") == 0) {
        value = valExpression.remove(0, 2);
        setCondition(KCConditional::SuperiorEqual);
    } else if (valExpression.indexOf("!=") == 0) {
        //add Differentto attribute
        value = valExpression.remove(0, 2);
        setCondition(KCConditional::DifferentTo);
    } else if (valExpression.indexOf('<') == 0) {
        value = valExpression.remove(0, 1);
        setCondition(KCConditional::Inferior);
    } else if (valExpression.indexOf('>') == 0) {
        value = valExpression.remove(0, 1);
        setCondition(KCConditional::Superior);
    } else if (valExpression.indexOf('=') == 0) {
        value = valExpression.remove(0, 1);
        setCondition(KCConditional::Equal);
    } else
        kDebug(36003) << " I don't know how to parse it :" << valExpression;
    if (restriction() == KCValidity::Date) {
        setMinimumValue(parser->tryParseDate(value));
    } else if (restriction() == KCValidity::Date) {
        setMinimumValue(parser->tryParseTime(value));
    } else {
        bool ok = false;
        setMinimumValue(KCValue(value.toDouble(&ok)));
        if (!ok) {
            setMinimumValue(KCValue(value.toInt(&ok)));
            if (!ok)
                kDebug(36003) << " Try to parse this value :" << value;

#if 0
            if (!ok)
                setMinimumValue(value);
#endif
        }
    }
}
Esempio n. 8
0
void KCValidity::loadOdfValidationValue(const QStringList &listVal, const KCValueParser *parser)
{
    bool ok = false;
    kDebug(36003) << " listVal[0] :" << listVal[0] << " listVal[1] :" << listVal[1];

    if (restriction() == KCValidity::Date) {
        setMinimumValue(parser->tryParseDate(listVal[0]));
        setMaximumValue(parser->tryParseDate(listVal[1]));
    } else if (restriction() == KCValidity::Time) {
        setMinimumValue(parser->tryParseTime(listVal[0]));
        setMaximumValue(parser->tryParseTime(listVal[1]));
    } else {
        setMinimumValue(KCValue(listVal[0].toDouble(&ok)));
        if (!ok) {
            setMinimumValue(KCValue(listVal[0].toInt(&ok)));
            if (!ok)
                kDebug(36003) << " Try to parse this value :" << listVal[0];

#if 0
            if (!ok)
                setMinimumValue(listVal[0]);
#endif
        }
        ok = false;
        setMaximumValue(KCValue(listVal[1].toDouble(&ok)));
        if (!ok) {
            setMaximumValue(KCValue(listVal[1].toInt(&ok)));
            if (!ok)
                kDebug(36003) << " Try to parse this value :" << listVal[1];

#if 0
            if (!ok)
                setMaximumValue(listVal[1]);
#endif
        }
    }
}
void CSpaceRestrictionManager::remove_restrictions			(ALife::_OBJECT_ID id, shared_str remove_out_restrictions, shared_str remove_in_restrictions)
{
	CRestrictionPtr				_client_restriction = restriction(id);
	if (!_client_restriction)
		return;

	VERIFY						(!_client_restriction->applied());

	CClientRestriction			&client_restriction = (*m_clients)[id];

	shared_str					new_out_restrictions = client_restriction.m_base_out_restrictions;
	shared_str					new_in_restrictions = client_restriction.m_base_in_restrictions;

	difference_restrictions		(new_out_restrictions,remove_out_restrictions);
	difference_restrictions		(new_in_restrictions,remove_in_restrictions);

	restrict					(id,new_out_restrictions,new_in_restrictions);
}
Esempio n. 10
0
  void Preprocess::Callback::ProcessTurnRestriction(const std::vector<RawRelation::Member>& members,
                                                    TurnRestriction::Type type)
  {
    Id from=0;
    Id via=0;
    Id to=0;

    for (std::vector<RawRelation::Member>::const_iterator member=members.begin();
         member!=members.end();
         ++member) {
      if (member->type==RawRelation::memberWay &&
          member->role=="from") {
        from=member->id;
      }
      else if (member->type==RawRelation::memberNode &&
               member->role=="via") {
        via=member->id;
      }
      else if (member->type==RawRelation::memberWay &&
               member->role=="to") {
        to=member->id;
      }

      // finished collection data
      if (from!=0 &&
          via!=0 &&
          to!=0) {
        break;
      }
    }

    if (from!=0 &&
        via!=0 &&
        to!=0) {
      TurnRestriction restriction(type,
                                  from,
                                  via,
                                  to);

      restriction.Write(turnRestrictionWriter);
      turnRestrictionCount++;
    }
  }
void CSpaceRestrictionManager::add_restrictions				(ALife::_OBJECT_ID id, shared_str add_out_restrictions, shared_str add_in_restrictions)
{
	CRestrictionPtr				_client_restriction = restriction(id);
	if (!_client_restriction) {
		restrict				(id,add_out_restrictions,add_in_restrictions);
		return;
	}

	VERIFY						(!_client_restriction->applied());

	CClientRestriction			&client_restriction = (*m_clients)[id];

	shared_str					new_out_restrictions = client_restriction.m_base_out_restrictions;
	shared_str					new_in_restrictions = client_restriction.m_base_in_restrictions;

	join_restrictions			(new_out_restrictions,add_out_restrictions);
	join_restrictions			(new_in_restrictions,add_in_restrictions);

	restrict					(id,new_out_restrictions,new_in_restrictions);
}
void CSpaceRestrictionManager::restrict							(ALife::_OBJECT_ID id, shared_str out_restrictors, shared_str in_restrictors)
{
	shared_str									merged_out_restrictions = out_restrictors;
	shared_str									merged_in_restrictions = in_restrictors;
	shared_str									_default_out_restrictions = default_out_restrictions();
	shared_str									_default_in_restrictions = default_in_restrictions();
	
	difference_restrictions						(_default_out_restrictions,merged_in_restrictions);
	difference_restrictions						(_default_in_restrictions,merged_out_restrictions);

	join_restrictions							(merged_out_restrictions,_default_out_restrictions);
	join_restrictions							(merged_in_restrictions,_default_in_restrictions);

	CLIENT_RESTRICTIONS::iterator				I = m_clients->find(id);
	VERIFY2										((m_clients->end() == I) || !(*I).second.m_restriction || !(*I).second.m_restriction->applied(),"Restriction cannot be changed since its border is still applied!");
	(*m_clients)[id].m_restriction				= restriction(merged_out_restrictions,merged_in_restrictions);
	(*m_clients)[id].m_base_out_restrictions	= out_restrictors;
	(*m_clients)[id].m_base_in_restrictions		= in_restrictors;
	
	collect_garbage								();
}
Esempio n. 13
0
void Multigrid::iterate()
{
    int short o = controller->getCommand();
    if (o < 0) {
        if (superlevel != nullptr) smooth();
        sublevel->resetZero();
        restriction();
        sublevel->iterate();
    } else if (o == 0) {
        smooth();
    } else{
        if (superlevel != nullptr) {
            smooth();
            prolongation();
            superlevel->iterate();
        } else {
            smooth();
            if (allNeumann) subtractMean();
            controller->reset();
        }
    }
}
Esempio n. 14
0
	bool ValidateParam(User* user, Channel* chan, std::string &word)
	{
		std::string::size_type p = word.find(':');
		if (p == std::string::npos)
		{
			user->WriteNumeric(955, chan->name, word, "Invalid exemptchanops entry, format is <restriction>:<prefix>");
			return false;
		}

		std::string restriction(word, 0, p);
		// If there is a '-' in the restriction string ignore it and everything after it
		// to support "auditorium-vis" and "auditorium-see" in m_auditorium
		p = restriction.find('-');
		if (p != std::string::npos)
			restriction.erase(p);

		if (!ServerInstance->Modes->FindMode(restriction, MODETYPE_CHANNEL))
		{
			user->WriteNumeric(955, chan->name, restriction, "Unknown restriction");
			return false;
		}

		return true;
	}
Esempio n. 15
0
//------------------------------------------------------------------------------------------------------------------------------
void rebuild_operator(level_type * level, level_type *fromLevel, double a, double b){
  if(level->my_rank==0){fprintf(stdout,"  rebuilding operator for level...  h=%e  ",level->h);fflush(stdout);}

  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // form restriction of alpha[], beta_*[] coefficients from fromLevel
  if(fromLevel != NULL){
    #ifdef VECTOR_ALPHA
    restriction(level,VECTOR_ALPHA ,fromLevel,VECTOR_ALPHA ,RESTRICT_CELL  );
    #endif
    restriction(level,VECTOR_BETA_I,fromLevel,VECTOR_BETA_I,RESTRICT_FACE_I);
    restriction(level,VECTOR_BETA_J,fromLevel,VECTOR_BETA_J,RESTRICT_FACE_J);
    restriction(level,VECTOR_BETA_K,fromLevel,VECTOR_BETA_K,RESTRICT_FACE_K);
  } // else case assumes alpha/beta have been set


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // exchange alpha/beta/...  (must be done before calculating Dinv)
  #ifdef VECTOR_ALPHA
  exchange_boundary(level,VECTOR_ALPHA ,STENCIL_SHAPE_BOX); // safe
  #endif
  exchange_boundary(level,VECTOR_BETA_I,STENCIL_SHAPE_BOX);
  exchange_boundary(level,VECTOR_BETA_J,STENCIL_SHAPE_BOX);
  exchange_boundary(level,VECTOR_BETA_K,STENCIL_SHAPE_BOX);


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // calculate Dinv, L1inv, and estimate the dominant Eigenvalue
  double _timeStart = getTime();
  int block;

  double dominant_eigenvalue = -1e9;

  PRAGMA_THREAD_ACROSS_BLOCKS_MAX(level,block,level->num_my_blocks,dominant_eigenvalue)
  for(block=0;block<level->num_my_blocks;block++){
    const int box = level->my_blocks[block].read.box;
    const int ilo = level->my_blocks[block].read.i;
    const int jlo = level->my_blocks[block].read.j;
    const int klo = level->my_blocks[block].read.k;
    const int ihi = level->my_blocks[block].dim.i + ilo;
    const int jhi = level->my_blocks[block].dim.j + jlo;
    const int khi = level->my_blocks[block].dim.k + klo;
    int i,j,k;
    const int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int  ghosts = level->my_boxes[box].ghosts;
    double h2inv = 1.0/(level->h*level->h);
    #ifdef VECTOR_ALPHA
    double * __restrict__ alpha  = level->my_boxes[box].vectors[VECTOR_ALPHA ] + ghosts*(1+jStride+kStride);
    #endif
    double * __restrict__ beta_i = level->my_boxes[box].vectors[VECTOR_BETA_I] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_j = level->my_boxes[box].vectors[VECTOR_BETA_J] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_k = level->my_boxes[box].vectors[VECTOR_BETA_K] + ghosts*(1+jStride+kStride);
    double * __restrict__   Dinv = level->my_boxes[box].vectors[VECTOR_DINV  ] + ghosts*(1+jStride+kStride);
    #ifdef VECTOR_L1INV
    double * __restrict__  L1inv = level->my_boxes[box].vectors[VECTOR_L1INV ] + ghosts*(1+jStride+kStride);
    #endif
    double block_eigenvalue = -1e9;

    for(k=klo;k<khi;k++){
    for(j=jlo;j<jhi;j++){
    for(i=ilo;i<ihi;i++){ 
      int ijk = i + j*jStride + k*kStride;

      // used for quick linear approximation to zero dirichlet BC
      double ilo_is_valid =1.0;
      double ihi_is_valid =1.0;
      double jlo_is_valid =1.0;
      double jhi_is_valid =1.0;
      double klo_is_valid =1.0;
      double khi_is_valid =1.0;
      if(level->boundary_condition.type != BC_PERIODIC){
         if(level->my_boxes[box].low.i+i-1 <             0)ilo_is_valid = 0.0;
         if(level->my_boxes[box].low.j+j-1 <             0)jlo_is_valid = 0.0;
         if(level->my_boxes[box].low.k+k-1 <             0)klo_is_valid = 0.0;
         if(level->my_boxes[box].low.i+i+1 >= level->dim.i)ihi_is_valid = 0.0;
         if(level->my_boxes[box].low.j+j+1 >= level->dim.j)jhi_is_valid = 0.0;
         if(level->my_boxes[box].low.k+k+1 >= level->dim.k)khi_is_valid = 0.0;
       }

      #ifdef STENCIL_VARIABLE_COEFFICIENT
      // radius of Gershgorin disc is the sum of the absolute values of the off-diagonal elements...
      double sumAbsAij = fabs(b*h2inv) * (
                           fabs( beta_i[ijk        ]*ilo_is_valid )+
                           fabs( beta_j[ijk        ]*jlo_is_valid )+
                           fabs( beta_k[ijk        ]*klo_is_valid )+
                           fabs( beta_i[ijk+1      ]*ihi_is_valid )+
                           fabs( beta_j[ijk+jStride]*jhi_is_valid )+
                           fabs( beta_k[ijk+kStride]*khi_is_valid )
                         );

      // center of Gershgorin disc is the diagonal element...
      double    Aii = -b*h2inv*(
                        beta_i[ijk        ]*( ilo_is_valid-2.0 )+
                        beta_j[ijk        ]*( jlo_is_valid-2.0 )+
                        beta_k[ijk        ]*( klo_is_valid-2.0 )+
                        beta_i[ijk+1      ]*( ihi_is_valid-2.0 )+
                        beta_j[ijk+jStride]*( jhi_is_valid-2.0 )+
                        beta_k[ijk+kStride]*( khi_is_valid-2.0 ) 
                      );
      #ifdef VECTOR_ALPHA
                Aii += a*alpha[ijk];
      #endif
      #else // Constant coefficient versions with fused BC's...
      // radius of Gershgorin disc is the sum of the absolute values of the off-diagonal elements...
      double sumAbsAij = fabs(b*h2inv) * (
                           ilo_is_valid +
                           jlo_is_valid +
                           klo_is_valid +
                           ihi_is_valid +
                           jhi_is_valid +
                           khi_is_valid 
                         );

      // center of Gershgorin disc is the diagonal element...
      double    Aii = a - b*h2inv*(
                         ilo_is_valid +
                         jlo_is_valid +
                         klo_is_valid +
                         ihi_is_valid +
                         jhi_is_valid +
                         khi_is_valid - 12.0
                      );
      #endif

                             Dinv[ijk] = 1.0/Aii;					// inverse of the diagonal Aii
      double Di = (Aii + sumAbsAij)/Aii;if(Di>block_eigenvalue)block_eigenvalue=Di;	// upper limit to Gershgorin disc == bound on dominant eigenvalue
      #ifdef VECTOR_L1INV
                          //L1inv[ijk] = 1.0/(Aii+sumAbsAij);				// inverse of the L1 row norm... L1inv = ( D+D^{L1} )^{-1}
      if(Aii>=1.5*sumAbsAij)L1inv[ijk] = 1.0/(Aii              ); 			// as suggested by eq 6.5 in Baker et al, "Multigrid smoothers for ultra-parallel computing: additional theory and discussion"...
                       else L1inv[ijk] = 1.0/(Aii+0.5*sumAbsAij);			// 
      #endif
    }}}
    if(block_eigenvalue>dominant_eigenvalue){dominant_eigenvalue = block_eigenvalue;}
  }
  level->timers.blas1 += (double)(getTime()-_timeStart);


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // Reduce the local estimates dominant eigenvalue to a global estimate
  #ifdef USE_MPI
  double _timeStartAllReduce = getTime();
  double send = dominant_eigenvalue;
  MPI_Allreduce(&send,&dominant_eigenvalue,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
  double _timeEndAllReduce = getTime();
  level->timers.collectives   += (double)(_timeEndAllReduce-_timeStartAllReduce);
  #endif
  if(level->my_rank==0){fprintf(stdout,"eigenvalue_max<%e\n",dominant_eigenvalue);}
  level->dominant_eigenvalue_of_DinvA = dominant_eigenvalue;


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // exchange Dinv/L1inv/...
  exchange_boundary(level,VECTOR_DINV ,STENCIL_SHAPE_BOX); // safe
  #ifdef VECTOR_L1INV
  exchange_boundary(level,VECTOR_L1INV,STENCIL_SHAPE_BOX);
  #endif
  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
}
Esempio n. 16
0
    std::tuple<
        std::shared_ptr<Matrix>,
        std::shared_ptr<Matrix>
        >
    transfer_operators(const Matrix &A) {
        typedef typename backend::value_type<Matrix>::type Val;
        typedef ptrdiff_t Idx;

        AMGCL_TIC("aggregates");
        Aggregates aggr(A, prm.aggr, prm.nullspace.cols);
        prm.aggr.eps_strong *= 0.5;
        AMGCL_TOC("aggregates");

        AMGCL_TIC("interpolation");
        auto P_tent = tentative_prolongation<Matrix>(
                rows(A), aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size
                );

        // Filter the system matrix
        backend::crs<Val> Af;
        Af.set_size(rows(A), cols(A));
        Af.ptr[0] = 0;

        std::vector<Val> dia(Af.nrows);

#pragma omp parallel for
        for(Idx i = 0; i < static_cast<Idx>(Af.nrows); ++i) {
            Idx row_begin = A.ptr[i];
            Idx row_end   = A.ptr[i+1];
            Idx row_width = row_end - row_begin;

            Val D = math::zero<Val>();
            for(Idx j = row_begin; j < row_end; ++j) {
                Idx c = A.col[j];
                Val v = A.val[j];

                if (c == i)
                    D += v;
                else if (!aggr.strong_connection[j]) {
                    D += v;
                    --row_width;
                }
            }

            dia[i] = D;
            Af.ptr[i+1] = row_width;
        }

        Af.set_nonzeros(Af.scan_row_sizes());

#pragma omp parallel for
        for(Idx i = 0; i < static_cast<Idx>(Af.nrows); ++i) {
            Idx row_begin = A.ptr[i];
            Idx row_end   = A.ptr[i+1];
            Idx row_head  = Af.ptr[i];

            for(Idx j = row_begin; j < row_end; ++j) {
                Idx c = A.col[j];

                if (c == i) {
                    Af.col[row_head] = i;
                    Af.val[row_head] = dia[i];
                    ++row_head;
                } else if (aggr.strong_connection[j]) {
                    Af.col[row_head] = c;
                    Af.val[row_head] = A.val[j];
                    ++row_head;
                }
            }
        }

        std::vector<Val> omega;

        auto P = interpolation(Af, dia, *P_tent, omega);
        auto R = restriction  (Af, dia, *P_tent, omega);
        AMGCL_TOC("interpolation");

        if (prm.nullspace.cols > 0)
            prm.aggr.block_size = prm.nullspace.cols;

        return std::make_tuple(P, R);
    }
// multigrid v-cycle
void v_cycle( double* P, uint n_dof, cuint nx, cuint ny, cuint nz,
			  cdouble hx, cdouble hy, cdouble hz,
			  cdouble hx2i, cdouble hy2i, cdouble hz2i,
			  cdouble tol, cuint max_iteration, cuint pre_smooth_iteration,
			  cdouble lx, cdouble ly, cdouble lz,
			  cuint level, cuint max_level,
			  double* F,
			  double& Er,
			  double* Uss, double* Vss, double* Wss,
			  cdouble bcs[][6],
			  cdouble dt
			  )
{
	cout<<"level: "<<level<<" n_dof: "<<n_dof<<endl;

	// initialize finite difference matrix (+1 for global constraint)
// 	double** M = new double*[n_dof];
// 	for(int n = 0; n < (n_dof); n++)
// 		M[n] = new double[n_dof];
// 	// initialize 
// #pragma omp parallel for shared(n_dof, M)
// 	for(int i=0; i<n_dof; i++)
// 		for(int j=0; j<n_dof; j++)
// 			M[i][j] = 0;

	cout<<"fd_matrix_sparse"<<endl;
	vector<tuple <uint, uint, double> > M_sp;
	vector<double> val;
	vector<uint> col_ind;
	vector<uint> row_ptr(1,0);
	
	// create finite difference matrix
	cout<<"create finite difference matrix"<<endl;
	// build pressure matrix
	pressure_matrix( M_sp,
					 val, col_ind, row_ptr,
					 nx, ny, nz,
					 hx2i, hy2i, hz2i,
					 n_dof
					 );	
	
	// construct load vector
	// load vector is created only at the level 0
	if(level==0){
		F = new double[n_dof];
		cout<<"create load vector"<<endl;

		pressure_rhs(F, Uss, Vss, Wss, nx, ny, nz, bcs, hx, hy, hz, dt);
		// load_vector(F, n_dof, I,J,K );
	}

	// cout<<"save matrix and vector"<<endl;
	// char matrix_file[100];
	// char vector_file[100];
	// sprintf(vector_file, "vector_%i.dat", level);
	// if(write_vector(n_dof,F,vector_file)) cout<<"write_vector fail"<<endl;
	
	// construct solution vector
	double* U;
	if(level==0) U=P;
	else U = new double[n_dof];
	double* U_tmp = new double[n_dof];
	// initial guess
#pragma omp parallel for shared(U, U_tmp) num_threads(nt)
	for(int n=0; n<n_dof; n++){
	    U[n] = 0.0;
	    U_tmp[n] = 0.0;
    }

	// residual and error
	double* R = new double[n_dof];

	// perform pre-smoothing and compute residual
	cout<<"pre-smoothing "<<pre_smooth_iteration<<" times"<<endl;
	Er = tol*10;
	jacobi_sparse(tol, pre_smooth_iteration, n_dof, U, U_tmp,
				  val, col_ind, row_ptr, F, Er, R);
		
	// restriction of residual on coarse grid
	double* F_coar;
		
	// Restrict the residual
	cuint nx_coar = (nx)/2;
	cuint ny_coar = (ny)/2;
	cuint nz_coar = (nz)/2;
	uint n_dof_coar = nx_coar*ny_coar*nz_coar; 
	F_coar = new double[n_dof_coar];

	// mesh size 
	cdouble hx_coar = lx/(nx_coar);
	cdouble hy_coar = ly/(ny_coar);
	cdouble hz_coar = lz/(nz_coar);
	
	// inverse of square of mesh sizes
	cdouble hx2i_coar = 1.0/(hx_coar*hx_coar);
	cdouble hy2i_coar = 1.0/(hy_coar*hy_coar);
	cdouble hz2i_coar = 1.0/(hz_coar*hz_coar);
		
	// restric residual to the coarrse grid
	cout<<"restriction"<<endl;
	restriction( R, F_coar, nx, ny, nz, nx_coar, ny_coar, nz_coar);
	
	// construct solution vector on coarse grid
	double* U_coar = new double[n_dof_coar];
	double* U_coar_tmp = new double[n_dof_coar];
	
	// if the grid is coarsest
	if( level==max_level){
		cout<<"level: "<<level+1<<" n_dof: "<<n_dof_coar<<endl;

		// initial guess
#pragma omp parallel for shared(U_coar, U_coar_tmp) num_threads(nt)
		for(int n=0; n<n_dof_coar; n++){
			U_coar[n] = 0.0;
			U_coar_tmp[n] = 0.0;
		}

		vector<tuple <uint, uint, double> > M_sp_coar;
		vector<double> val_coar;
		vector<uint> col_ind_coar;
		vector<uint> row_ptr_coar(1,0);
		
		// create finite difference matrix
		cout<<"create finite difference matrix"<<endl;
		// fd_matrix_sparse(M_sp_coar, val_coar, col_ind_coar, row_ptr_coar,
		// 				 nx_coar,ny_coar,nz_coar,
		// 				 hx2i_coar, hy2i_coar, hz2i_coar, n_dof_coar );
		
		pressure_matrix( M_sp_coar, val_coar, col_ind_coar, row_ptr_coar,
						 nx_coar, ny_coar, nz_coar,
						 hx2i_coar, hy2i_coar, hz2i_coar,
						 n_dof_coar
						 );
		
		// residual on coarse grid
		double* R_coar = new double[n_dof_coar];
		
		// exact Jacobi method
		Er = tol*10;
		jacobi_sparse(tol, max_iteration, n_dof_coar, U_coar, U_coar_tmp,
					  val_coar, col_ind_coar, row_ptr_coar, F_coar,
					  Er, R_coar);
		
		// write_results( U_coar,
		// 			   n_dof_coar,
		// 			   I_coar, J_coar, K_coar,
		// 			   dx_coar, dy_coar, dz_coar, level);
		
		delete[] R_coar;
		
		// cout<<"R"<<endl;
		// for(int i=0; i<n_dof; i++)
		// 	cout<<R[i]<<endl;
		 
	}
	else{
		// v_cycle on the coarse grid
		v_cycle( U_coar, n_dof_coar, nx_coar, ny_coar, nz_coar,
						  hx_coar, hy_coar, hz_coar,
						  hx2i_coar, hy2i_coar, hz2i_coar,
						  tol, max_iteration, pre_smooth_iteration,
						  lx, ly, lz,
						  level+1, max_level,
						  F_coar, Er,
						  Uss, Vss, Wss,
						  bcs, dt
						  );
		
		cdouble dx_coar = lx/(nx_coar);
		cdouble dy_coar = ly/(ny_coar);
		cdouble dz_coar = lz/(nz_coar);

		// // write partial results for test purpose
		// write_results( U_coar,
		// 			   n_dof_coar,
		// 			   I_coar, J_coar, K_coar,
		// 			   dx_coar, dy_coar, dz_coar, level);
		 
	}

	// interpolate to fine grid
	double* E = new double[n_dof];
	interpolation(U_coar, E, nx_coar,ny_coar,nz_coar, nx, ny, nz);

	// correct the fine grid approximation
#pragma omp parallel for shared(U,E) num_threads(nt)
	for(int i=0; i<n_dof; i++){
		// cout<<i<<" "<<U[i]<<" "<<E[i]<<" "<<E[i]/U[i]<<endl;
		U[i] += E[i];
	}

	// perform post-smoothing and compute residual
	uint post_smooth_iteration;
	// if(level==0)
		post_smooth_iteration=max_iteration;
	// else
		// post_smooth_iteration=( pre_smooth_iteration+1)*1000;

	cout<<"post-smoothing "<<post_smooth_iteration<<" times on level "
		<<level<<endl;
	// jacobi(tol, post_smooth_iteration, n_dof, U, U_tmp, M, F, Er, R);
	Er = tol*10;
	jacobi_sparse(tol, post_smooth_iteration, n_dof, U, U_tmp,
				  val, col_ind, row_ptr, F, Er, R);

	
	// cleanup
	if (level==0)
		delete[] F;

	delete[] U_tmp;
	delete[] R, F_coar;
	delete[] E;
	delete[] U_coar, U_coar_tmp;
	
}
Esempio n. 18
0
void solve_with_HPGMG(MultiFab& soln, MultiFab& gphi, Real a, Real b, MultiFab& alpha, PArray<MultiFab>& beta,
                      MultiFab& beta_cc, MultiFab& rhs, const BoxArray& bs, const Geometry& geom, int n_cell)
{
  BndryData bd(bs, 1, geom);
  set_boundary(bd, rhs, 0);

  ABecLaplacian abec_operator(bd, dx);
  abec_operator.setScalars(a, b);
  abec_operator.setCoefficients(alpha, beta);

  int minCoarseDim;
  if (domain_boundary_condition == BC_PERIODIC)
  {
    minCoarseDim = 2; // avoid problems with black box calculation of D^{-1} for poisson with periodic BC's on a 1^3 grid
  }
  else
  {
    minCoarseDim = 1; // assumes you can drop order on the boundaries
  }

  level_type level_h;
  mg_type MG_h;
  int numVectors = 12;

  int my_rank = 0, num_ranks = 1;

#ifdef BL_USE_MPI
  MPI_Comm_size (MPI_COMM_WORLD, &num_ranks);
  MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
#endif /* BL_USE_MPI */

  const double h0 = dx[0];
  // Create the geometric structure of the HPGMG grid using the RHS MultiFab as
  // a template. This doesn't copy any actual data.
  CreateHPGMGLevel(&level_h, rhs, n_cell, max_grid_size, my_rank, num_ranks, domain_boundary_condition, numVectors, h0);

  // Set up the coefficients for the linear operator L.
  SetupHPGMGCoefficients(a, b, alpha, beta_cc, &level_h);

  // Now that the HPGMG grid is built, populate it with RHS data.
  ConvertToHPGMGLevel(rhs, n_cell, max_grid_size, &level_h, VECTOR_F);

#ifdef USE_HELMHOLTZ
  if (ParallelDescriptor::IOProcessor()) {
    std::cout << "Creating Helmholtz (a=" << a << ", b=" << b << ") test problem" << std::endl;;
  }
#else
  if (ParallelDescriptor::IOProcessor()) {
    std::cout << "Creating Poisson (a=" << a << ", b=" << b << ") test problem" << std::endl;;
  }
#endif /* USE_HELMHOLTZ */

  if (level_h.boundary_condition.type == BC_PERIODIC)
  {
    double average_value_of_f = mean (&level_h, VECTOR_F);
    if (average_value_of_f != 0.0)
    {
      if (ParallelDescriptor::IOProcessor())
      {
        std::cerr << "WARNING: Periodic boundary conditions, but f does not sum to zero... mean(f)=" << average_value_of_f << std::endl;
      }
      //shift_vector(&level_h,VECTOR_F,VECTOR_F,-average_value_of_f);
    }
  }
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  rebuild_operator(&level_h,NULL,a,b);    // i.e. calculate Dinv and lambda_max
  MGBuild(&MG_h,&level_h,a,b,minCoarseDim,ParallelDescriptor::Communicator()); // build the Multigrid Hierarchy
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  if (ParallelDescriptor::IOProcessor())
      std::cout << std::endl << std::endl << "===== STARTING SOLVE =====" << std::endl << std::flush;

  MGResetTimers (&MG_h);
  zero_vector (MG_h.levels[0], VECTOR_U);
#ifdef USE_FCYCLES
  FMGSolve (&MG_h, 0, VECTOR_U, VECTOR_F, a, b, tolerance_abs, tolerance_rel);
#else
  MGSolve (&MG_h, 0, VECTOR_U, VECTOR_F, a, b, tolerance_abs, tolerance_rel);
#endif /* USE_FCYCLES */

  MGPrintTiming (&MG_h, 0);   // don't include the error check in the timing results
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

  if (ParallelDescriptor::IOProcessor())
    std::cout << std::endl << std::endl << "===== Performing Richardson error analysis ==========================" << std::endl;
  // solve A^h u^h = f^h
  // solve A^2h u^2h = f^2h
  // solve A^4h u^4h = f^4h
  // error analysis...
  MGResetTimers(&MG_h);
  const double dtol = tolerance_abs;
  const double rtol = tolerance_rel;
  int l;for(l=0;l<3;l++){
    if(l>0)restriction(MG_h.levels[l],VECTOR_F,MG_h.levels[l-1],VECTOR_F,RESTRICT_CELL);
           zero_vector(MG_h.levels[l],VECTOR_U);
    #ifdef USE_FCYCLES
    FMGSolve(&MG_h,l,VECTOR_U,VECTOR_F,a,b,dtol,rtol);
    #else
     MGSolve(&MG_h,l,VECTOR_U,VECTOR_F,a,b,dtol,rtol);
    #endif
  }
  richardson_error(&MG_h,0,VECTOR_U);

  // Now convert solution from HPGMG back to rhs MultiFab.
  ConvertFromHPGMGLevel(soln, &level_h, VECTOR_U);

  const double norm_from_HPGMG = norm(&level_h, VECTOR_U);
  const double mean_from_HPGMG = mean(&level_h, VECTOR_U);
  const Real norm0 = soln.norm0();
  const Real norm2 = soln.norm2();
  if (ParallelDescriptor::IOProcessor()) {
    std::cout << "mean from HPGMG: " << mean_from_HPGMG << std::endl;
    std::cout << "norm from HPGMG: " << norm_from_HPGMG << std::endl;
    std::cout << "norm0 of RHS copied to MF: " << norm0 << std::endl;
    std::cout << "norm2 of RHS copied to MF: " << norm2 << std::endl;
  }

  // Write the MF to disk for comparison with the in-house solver
  if (plot_soln)
  {
    writePlotFile("SOLN-HPGMG", soln, geom);
  }

  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  MGDestroy(&MG_h);
  destroy_level(&level_h);
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

  PArray<MultiFab> grad_phi(BL_SPACEDIM, PArrayManage);
  for (int n = 0; n < BL_SPACEDIM; ++n)
      grad_phi.set(n, new MultiFab(BoxArray(soln.boxArray()).surroundingNodes(n), 1, 0));

#if (BL_SPACEDIM == 2)
  abec_operator.compFlux(grad_phi[0],grad_phi[1],soln);
#elif (BL_SPACEDIM == 3)
  abec_operator.compFlux(grad_phi[0],grad_phi[1],grad_phi[2],soln);
#endif

  // Average edge-centered gradients to cell centers.
  BoxLib::average_face_to_cellcenter(gphi, grad_phi, geom);
}
Esempio n. 19
0
//------------------------------------------------------------------------------------------------------------------------------
void rebuild_operator(level_type * level, level_type *fromLevel, double a, double b){
  if(level->my_rank==0){printf("  rebuilding operator for level...  h=%e  ",level->h);fflush(stdout);}

  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // form restriction of alpha[], beta_*[] coefficients from fromLevel
  if(fromLevel != NULL){
    restriction(level,VECTOR_ALPHA ,fromLevel,VECTOR_ALPHA ,RESTRICT_CELL  );
    restriction(level,VECTOR_BETA_I,fromLevel,VECTOR_BETA_I,RESTRICT_FACE_I);
    restriction(level,VECTOR_BETA_J,fromLevel,VECTOR_BETA_J,RESTRICT_FACE_J);
    restriction(level,VECTOR_BETA_K,fromLevel,VECTOR_BETA_K,RESTRICT_FACE_K);
  } // else case assumes alpha/beta have been set


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // exchange alpha/beta/...  (must be done before calculating Dinv)
  exchange_boundary(level,VECTOR_ALPHA ,0); // must be 0(faces,edges,corners) for CA version or 27pt
  exchange_boundary(level,VECTOR_BETA_I,0);
  exchange_boundary(level,VECTOR_BETA_J,0);
  exchange_boundary(level,VECTOR_BETA_K,0);


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // calculate Dinv, L1inv, and estimate the dominant Eigenvalue
  uint64_t _timeStart = CycleTime();
  int printedError=0;
  int box;

  double dominant_eigenvalue = -1e9;
  #pragma omp parallel for private(box) OMP_THREAD_ACROSS_BOXES(level->concurrent_boxes) reduction(max:dominant_eigenvalue) schedule(static)
  for(box=0;box<level->num_my_boxes;box++){
    int i,j,k;
    int lowi    = level->my_boxes[box].low.i;
    int lowj    = level->my_boxes[box].low.j;
    int lowk    = level->my_boxes[box].low.k;
    int jStride = level->my_boxes[box].jStride;
    int kStride = level->my_boxes[box].kStride;
    int  ghosts = level->my_boxes[box].ghosts;
    int     dim = level->my_boxes[box].dim;
    double h2inv = 1.0/(level->h*level->h);
    double * __restrict__ alpha  = level->my_boxes[box].vectors[VECTOR_ALPHA ] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_i = level->my_boxes[box].vectors[VECTOR_BETA_I] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_j = level->my_boxes[box].vectors[VECTOR_BETA_J] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_k = level->my_boxes[box].vectors[VECTOR_BETA_K] + ghosts*(1+jStride+kStride);
    double * __restrict__   Dinv = level->my_boxes[box].vectors[VECTOR_DINV  ] + ghosts*(1+jStride+kStride);
    double * __restrict__  L1inv = level->my_boxes[box].vectors[VECTOR_L1INV ] + ghosts*(1+jStride+kStride);
    double * __restrict__  valid = level->my_boxes[box].vectors[VECTOR_VALID ] + ghosts*(1+jStride+kStride);
    double box_eigenvalue = -1e9;
    #pragma omp parallel for private(k,j,i) OMP_THREAD_WITHIN_A_BOX(level->threads_per_box) reduction(max:box_eigenvalue) schedule(static)
    for(k=0;k<dim;k++){
    for(j=0;j<dim;j++){
    for(i=0;i<dim;i++){
      int ijk = i + j*jStride + k*kStride;
      #if 0
      // FIX This looks wrong, but is faster... theory is because its doing something akin to SOR
      // radius of Gershgorin disc is the sum of the absolute values of the off-diagonal elements...
      double sumAbsAij = fabs(b*h2inv*beta_i[ijk]) + fabs(b*h2inv*beta_i[ijk+      1]) +
                         fabs(b*h2inv*beta_j[ijk]) + fabs(b*h2inv*beta_j[ijk+jStride]) +
                         fabs(b*h2inv*beta_k[ijk]) + fabs(b*h2inv*beta_k[ijk+kStride]);
      // centr of Gershgorin disc is the diagonal element...
      double    Aii = a*alpha[ijk] - b*h2inv*(
                                       -beta_i[ijk]-beta_i[ijk+      1]
                                       -beta_j[ijk]-beta_j[ijk+jStride]
                                       -beta_k[ijk]-beta_k[ijk+kStride]
                                     );
      #endif
      #if 1
      // radius of Gershgorin disc is the sum of the absolute values of the off-diagonal elements...
      double sumAbsAij = fabs(b*h2inv) * (
                      fabs( beta_i[ijk        ]*valid[ijk-1      ] )+
                      fabs( beta_j[ijk        ]*valid[ijk-jStride] )+
                      fabs( beta_k[ijk        ]*valid[ijk-kStride] )+
                      fabs( beta_i[ijk+1      ]*valid[ijk+1      ] )+
                      fabs( beta_j[ijk+jStride]*valid[ijk+jStride] )+
                      fabs( beta_k[ijk+kStride]*valid[ijk+kStride] )
                      );

      // centr of Gershgorin disc is the diagonal element...
      double    Aii = a*alpha[ijk] - b*h2inv*(
                                       beta_i[ijk        ]*( valid[ijk-1      ]-2.0 )+
                                       beta_j[ijk        ]*( valid[ijk-jStride]-2.0 )+
                                       beta_k[ijk        ]*( valid[ijk-kStride]-2.0 )+
                                       beta_i[ijk+1      ]*( valid[ijk+1      ]-2.0 )+
                                       beta_j[ijk+jStride]*( valid[ijk+jStride]-2.0 )+
                                       beta_k[ijk+kStride]*( valid[ijk+kStride]-2.0 )
                                     );

      #endif
                             Dinv[ijk] = 1.0/Aii;				// inverse of the diagonal Aii
                          //L1inv[ijk] = 1.0/(Aii+sumAbsAij);			// inverse of the L1 row norm
      // L1inv = ( D+D^{L1} )^{-1}
      // as suggested by eq 6.5 in Baker et al, "Multigrid smoothers for ultra-parallel computing: additional theory and discussion"...
      if(Aii>=1.5*sumAbsAij)L1inv[ijk] = 1.0/(Aii              ); 		//
                       else L1inv[ijk] = 1.0/(Aii+0.5*sumAbsAij);		//
      double Di = (Aii + sumAbsAij)/Aii;if(Di>box_eigenvalue)box_eigenvalue=Di;	// upper limit to Gershgorin disc == bound on dominant eigenvalue
    }}}
    if(box_eigenvalue>dominant_eigenvalue){dominant_eigenvalue = box_eigenvalue;}
  }
  level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // Reduce the local estimates dominant eigenvalue to a global estimate
  #ifdef USE_MPI
  uint64_t _timeStartAllReduce = CycleTime();
  double send = dominant_eigenvalue;
  MPI_Allreduce(&send,&dominant_eigenvalue,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
  uint64_t _timeEndAllReduce = CycleTime();
  level->cycles.collectives   += (uint64_t)(_timeEndAllReduce-_timeStartAllReduce);
  #endif
  if(level->my_rank==0){printf("eigenvalue_max<%e\n",dominant_eigenvalue);fflush(stdout);}
  level->dominant_eigenvalue_of_DinvA = dominant_eigenvalue;


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // exchange Dinv/L1inv/...
  exchange_boundary(level,VECTOR_DINV ,0); // must be 0(faces,edges,corners) for CA version
  exchange_boundary(level,VECTOR_L1INV,0);
  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
}
u32	CSpaceRestrictionManager::accessible_nearest			(ALife::_OBJECT_ID id, const Fvector &position, Fvector &result)
{
	CRestrictionPtr				client_restriction = restriction(id);
	VERIFY						(client_restriction);
	return						(client_restriction->accessible_nearest(position,result));
}
Esempio n. 21
0
//------------------------------------------------------------------------------------------------------------------------------
int main(int argc, char **argv){
  int my_rank=0;
  int num_tasks=1;
  int OMP_Threads = 1;

  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
  #ifdef _OPENMP
  #pragma omp parallel 
  {
    #pragma omp master
    {
      OMP_Threads = omp_get_num_threads();
    }
  }
  #endif
    

  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
  // initialize MPI and HPM
  #ifdef USE_MPI
  int    actual_threading_model = -1;
  int requested_threading_model = -1;
      requested_threading_model = MPI_THREAD_SINGLE;
    //requested_threading_model = MPI_THREAD_FUNNELED;
    //requested_threading_model = MPI_THREAD_SERIALIZED;
    //requested_threading_model = MPI_THREAD_MULTIPLE;
    #ifdef _OPENMP
      requested_threading_model = MPI_THREAD_FUNNELED;
    //requested_threading_model = MPI_THREAD_SERIALIZED;
    //requested_threading_model = MPI_THREAD_MULTIPLE;
    #endif
  MPI_Init_thread(&argc, &argv, requested_threading_model, &actual_threading_model);
  MPI_Comm_size(MPI_COMM_WORLD, &num_tasks);
  MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
  #ifdef USE_HPM // IBM HPM counters for BGQ...
  HPM_Init();
  #endif
  #endif // USE_MPI


  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -  
  // parse the arguments...
  int     log2_box_dim           =  6; // 64^3
  int     target_boxes_per_rank  =  1;
//int64_t target_memory_per_rank = -1; // not specified
  int64_t box_dim                = -1;
  int64_t boxes_in_i             = -1;
  int64_t target_boxes           = -1;

  if(argc==3){
             log2_box_dim=atoi(argv[1]);
    target_boxes_per_rank=atoi(argv[2]);

    if(log2_box_dim>9){
      // NOTE, in order to use 32b int's for array indexing, box volumes must be less than 2^31 doubles
      if(my_rank==0){fprintf(stderr,"log2_box_dim must be less than 10\n");}
      #ifdef USE_MPI
      MPI_Finalize();
      #endif
      exit(0);
    }

    if(log2_box_dim<4){
      if(my_rank==0){fprintf(stderr,"log2_box_dim must be at least 4\n");}
      #ifdef USE_MPI
      MPI_Finalize();
      #endif
      exit(0);
    }

    if(target_boxes_per_rank<1){
      if(my_rank==0){fprintf(stderr,"target_boxes_per_rank must be at least 1\n");}
      #ifdef USE_MPI
      MPI_Finalize();
      #endif
      exit(0);
    }

    #ifndef MAX_COARSE_DIM
    #define MAX_COARSE_DIM 11
    #endif
    box_dim=1<<log2_box_dim;
    target_boxes = (int64_t)target_boxes_per_rank*(int64_t)num_tasks;
    boxes_in_i = -1;
    int64_t bi;
    for(bi=1;bi<1000;bi++){ // search all possible problem sizes to find acceptable boxes_in_i
      int64_t total_boxes = bi*bi*bi;
      if(total_boxes<=target_boxes){
        int64_t coarse_grid_dim = box_dim*bi;
        while( (coarse_grid_dim%2) == 0){coarse_grid_dim=coarse_grid_dim/2;}
        if(coarse_grid_dim<=MAX_COARSE_DIM){
          boxes_in_i = bi;
        }
      }
    }
    if(boxes_in_i<1){
      if(my_rank==0){fprintf(stderr,"failed to find an acceptable problem size\n");}
      #ifdef USE_MPI
      MPI_Finalize();
      #endif
      exit(0);
    }
  } // argc==3

  #if 0
  else if(argc==2){ // interpret argv[1] as target_memory_per_rank
    char *ptr = argv[1];
    char *tmp;
    target_memory_per_rank = strtol(ptr,&ptr,10);
    if(target_memory_per_rank<1){
      if(my_rank==0){fprintf(stderr,"unrecognized target_memory_per_rank... '%s'\n",argv[1]);}
      #ifdef USE_MPI
      MPI_Finalize();
      #endif
      exit(0);
    }
    tmp=strstr(ptr,"TB");if(tmp){ptr=tmp+2;target_memory_per_rank *= (uint64_t)(1<<30)*(1<<10);}
    tmp=strstr(ptr,"GB");if(tmp){ptr=tmp+2;target_memory_per_rank *= (uint64_t)(1<<30);}
    tmp=strstr(ptr,"MB");if(tmp){ptr=tmp+2;target_memory_per_rank *= (uint64_t)(1<<20);}
    tmp=strstr(ptr,"tb");if(tmp){ptr=tmp+2;target_memory_per_rank *= (uint64_t)(1<<30)*(1<<10);}
    tmp=strstr(ptr,"gb");if(tmp){ptr=tmp+2;target_memory_per_rank *= (uint64_t)(1<<30);}
    tmp=strstr(ptr,"mb");if(tmp){ptr=tmp+2;target_memory_per_rank *= (uint64_t)(1<<20);}
    if( (ptr) && (*ptr != '\0') ){
      if(my_rank==0){fprintf(stderr,"unrecognized units... '%s'\n",ptr);}
      #ifdef USE_MPI
      MPI_Finalize();
      #endif
      exit(0);
    }
    // FIX, now search for an 'acceptable' box_dim and boxes_in_i constrained by target_memory_per_rank, num_tasks, and MAX_COARSE_DIM
  } // argc==2
  #endif


  else{
    if(my_rank==0){fprintf(stderr,"usage: ./hpgmg-fv  [log2_box_dim]  [target_boxes_per_rank]\n");}
                 //fprintf(stderr,"       ./hpgmg-fv  [target_memory_per_rank[MB,GB,TB]]\n");}
    #ifdef USE_MPI
    MPI_Finalize();
    #endif
    exit(0);
  }




  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  if(my_rank==0){
  fprintf(stdout,"\n\n");
  fprintf(stdout,"********************************************************************************\n");
  fprintf(stdout,"***                            HPGMG-FV Benchmark                            ***\n");
  fprintf(stdout,"********************************************************************************\n");
  #ifdef USE_MPI
       if(requested_threading_model == MPI_THREAD_MULTIPLE  )fprintf(stdout,"Requested MPI_THREAD_MULTIPLE, ");
  else if(requested_threading_model == MPI_THREAD_SINGLE    )fprintf(stdout,"Requested MPI_THREAD_SINGLE, ");
  else if(requested_threading_model == MPI_THREAD_FUNNELED  )fprintf(stdout,"Requested MPI_THREAD_FUNNELED, ");
  else if(requested_threading_model == MPI_THREAD_SERIALIZED)fprintf(stdout,"Requested MPI_THREAD_SERIALIZED, ");
  else if(requested_threading_model == MPI_THREAD_MULTIPLE  )fprintf(stdout,"Requested MPI_THREAD_MULTIPLE, ");
  else                                                       fprintf(stdout,"Requested Unknown MPI Threading Model (%d), ",requested_threading_model);
       if(actual_threading_model    == MPI_THREAD_MULTIPLE  )fprintf(stdout,"got MPI_THREAD_MULTIPLE\n");
  else if(actual_threading_model    == MPI_THREAD_SINGLE    )fprintf(stdout,"got MPI_THREAD_SINGLE\n");
  else if(actual_threading_model    == MPI_THREAD_FUNNELED  )fprintf(stdout,"got MPI_THREAD_FUNNELED\n");
  else if(actual_threading_model    == MPI_THREAD_SERIALIZED)fprintf(stdout,"got MPI_THREAD_SERIALIZED\n");
  else if(actual_threading_model    == MPI_THREAD_MULTIPLE  )fprintf(stdout,"got MPI_THREAD_MULTIPLE\n");
  else                                                       fprintf(stdout,"got Unknown MPI Threading Model (%d)\n",actual_threading_model);
  #endif
  fprintf(stdout,"%d MPI Tasks of %d threads\n",num_tasks,OMP_Threads);
  fprintf(stdout,"\n\n===== Benchmark setup ==========================================================\n");
  }


  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  // create the fine level...
  #ifdef USE_PERIODIC_BC
  int bc = BC_PERIODIC;
  int minCoarseDim = 2; // avoid problems with black box calculation of D^{-1} for poisson with periodic BC's on a 1^3 grid
  #else
  int bc = BC_DIRICHLET;
  int minCoarseDim = 1; // assumes you can drop order on the boundaries
  #endif
  level_type level_h;
  int ghosts=stencil_get_radius();
  create_level(&level_h,boxes_in_i,box_dim,ghosts,VECTORS_RESERVED,bc,my_rank,num_tasks);
  #ifdef USE_HELMHOLTZ
  double a=1.0;double b=1.0; // Helmholtz
  if(my_rank==0)fprintf(stdout,"  Creating Helmholtz (a=%f, b=%f) test problem\n",a,b);
  #else
  double a=0.0;double b=1.0; // Poisson
  if(my_rank==0)fprintf(stdout,"  Creating Poisson (a=%f, b=%f) test problem\n",a,b);
  #endif
  double h=1.0/( (double)boxes_in_i*(double)box_dim );  // [0,1]^3 problem
  initialize_problem(&level_h,h,a,b);                   // initialize VECTOR_ALPHA, VECTOR_BETA*, and VECTOR_F
  rebuild_operator(&level_h,NULL,a,b);                  // calculate Dinv and lambda_max
  if(level_h.boundary_condition.type == BC_PERIODIC){   // remove any constants from the RHS for periodic problems
    double average_value_of_f = mean(&level_h,VECTOR_F);
    if(average_value_of_f!=0.0){
      if(my_rank==0){fprintf(stderr,"  WARNING... Periodic boundary conditions, but f does not sum to zero... mean(f)=%e\n",average_value_of_f);}
      shift_vector(&level_h,VECTOR_F,VECTOR_F,-average_value_of_f);
    }
  }


  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  // create the MG hierarchy...
  mg_type MG_h;
  MGBuild(&MG_h,&level_h,a,b,minCoarseDim);             // build the Multigrid Hierarchy 


  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  // HPGMG-500 benchmark proper
  // evaluate performance on problem sizes of h, 2h, and 4h
  // (i.e. examine dynamic range for problem sizes N, N/8, and N/64)
//double dtol=1e-15;double rtol=  0.0; // converged if ||D^{-1}(b-Ax)|| < dtol
  double dtol=  0.0;double rtol=1e-10; // converged if ||b-Ax|| / ||b|| < rtol
  int l;
  #ifndef TEST_ERROR

  double AverageSolveTime[3];
  for(l=0;l<3;l++){
    if(l>0)restriction(MG_h.levels[l],VECTOR_F,MG_h.levels[l-1],VECTOR_F,RESTRICT_CELL);
    bench_hpgmg(&MG_h,l,a,b,dtol,rtol);
    AverageSolveTime[l] = (double)MG_h.timers.MGSolve / (double)MG_h.MGSolves_performed;
    if(my_rank==0){fprintf(stdout,"\n\n===== Timing Breakdown =========================================================\n");}
    MGPrintTiming(&MG_h,l);
  }

  if(my_rank==0){
    #ifdef CALIBRATE_TIMER
    double _timeStart=getTime();sleep(1);double _timeEnd=getTime();
    double SecondsPerCycle = (double)1.0/(double)(_timeEnd-_timeStart);
    #else
    double SecondsPerCycle = 1.0;
    #endif
    fprintf(stdout,"\n\n===== Performance Summary ======================================================\n");
    for(l=0;l<3;l++){
      double DOF = (double)MG_h.levels[l]->dim.i*(double)MG_h.levels[l]->dim.j*(double)MG_h.levels[l]->dim.k;
      double seconds = SecondsPerCycle*(double)AverageSolveTime[l];
      double DOFs = DOF / seconds;
      fprintf(stdout,"  h=%0.15e  DOF=%0.15e  time=%0.6f  DOF/s=%0.3e  MPI=%d  OMP=%d\n",MG_h.levels[l]->h,DOF,seconds,DOFs,num_tasks,OMP_Threads);
    }
  }
  #endif

  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  if(my_rank==0){fprintf(stdout,"\n\n===== Richardson error analysis ================================================\n");}
  // solve A^h u^h = f^h
  // solve A^2h u^2h = f^2h
  // solve A^4h u^4h = f^4h
  // error analysis...
  MGResetTimers(&MG_h);
  for(l=0;l<3;l++){
    if(l>0)restriction(MG_h.levels[l],VECTOR_F,MG_h.levels[l-1],VECTOR_F,RESTRICT_CELL);
           zero_vector(MG_h.levels[l],VECTOR_U);
    #ifdef USE_FCYCLES
    FMGSolve(&MG_h,l,VECTOR_U,VECTOR_F,a,b,dtol,rtol);
    #else
     MGSolve(&MG_h,l,VECTOR_U,VECTOR_F,a,b,dtol,rtol);
    #endif
  }
  richardson_error(&MG_h,0,VECTOR_U);


  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  if(my_rank==0){fprintf(stdout,"\n\n===== Deallocating memory ======================================================\n");}
  MGDestroy(&MG_h);
  destroy_level(&level_h);


  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
  if(my_rank==0){fprintf(stdout,"\n\n===== Done =====================================================================\n");}

  #ifdef USE_MPI
  #ifdef USE_HPM // IBM performance counters for BGQ...
  HPM_Print();
  #endif
  MPI_Finalize();
  #endif
  return(0);
  //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
}
void CSpaceRestrictionManager::remove_border				(ALife::_OBJECT_ID id)
{
	CRestrictionPtr				client_restriction = restriction(id);
	if (client_restriction)
		client_restriction->remove_border	();
}
Esempio n. 23
0
int main (int argc, const char * argv[]) 

{
    
    int i, j, kk;
    
    int x, y;
    
    
    float maxdiff;
    float Finalmaxdiff = 0.0;
    
    float time;
    FILE *fp;
    
    
    
    //get command line arguments
    coarse_dim = (argc > 1)? atoi(argv[1]) : MAXSIZE;
    
        
    solution_iter = (argc > 2)? atoi(argv[2]) : MAXITER;
    
    if (coarse_dim > MAXSIZE) coarse_dim = MAXSIZE;
    
    if ((solution_iter > MAXITER)||(solution_iter <= 0)) solution_iter = MAXITER;
    
    
    //accomodate the boundary conditions in size
    coarse_dim_with_boundary = coarse_dim + 2;
    fine_dim = (coarse_dim*2)+1;
    fine_dim_with_boundary=fine_dim+2;
    
    //calculate the coarse grid with double spacing
    
    
    
    printf("\n\n******* Fine Grid Size: %d and Number of coarse iterations: %d *******\n\n", fine_dim, solution_iter);
    
    
    
    //create the matrices
    grid_fine = (float* )malloc(fine_dim_with_boundary * fine_dim_with_boundary * sizeof(float));
    grid_coarse = (float* )malloc(coarse_dim_with_boundary * coarse_dim_with_boundary * sizeof(float));
    
    
    //Set inner values
    for (i=1; i<=fine_dim; i++) 
    {
        
        for (j=1; j<=fine_dim; j++) 
        {
            
            grid_fine[(i*fine_dim+j)-1]=0;
            
        }
        
    }
    
    
    //Set boundary conditions
    for (i=0; i<fine_dim_with_boundary; i++)
    {
        
        grid_fine[i]=1;// First row
        
        grid_fine[i*fine_dim_with_boundary]=1; // First column
        
        grid_fine[(i*fine_dim_with_boundary)+(fine_dim+1)]=1; // Last column
        
        grid_fine[(fine_dim_with_boundary*(fine_dim_with_boundary-1))+i]=1; // Last Row
        
        
         
    }
    
    i = 0;
    j = 0;
    for (x=0; x<coarse_dim_with_boundary; x++)
    {
        
        for (y=0; y<coarse_dim_with_boundary; y++)
        {
            
            grid_coarse[x*coarse_dim_with_boundary+y] = grid_fine[i*fine_dim_with_boundary+j];
            j=j+2;
            
        }
        i=i+2;
        j=0;
    }
    
    
    
    
    time = timer();
    
    
    //******************************* STEP 1 SMOOTHING **********************************************
    
    // Step1:Smoothing via jacobi    
    
    //********************************************** //**********************************************    
    
    for(kk = 0; kk < v_cycles; kk++)
    {
        //printf("\nStep1 Smoothing on fine matrix: DONE\n");
        jacobi(grid_fine, fine_dim_with_boundary, smoothing_iter);
        
        //printMatrix(grid_fine, fine_dim_with_boundary);
        
        
        
        //***************************** STEP 2 RESTRICTION **********************************************
        
        //step2: Restrict the fine grid to a coarser grid in which the points are twice as far apart
        //restriction operator
        //coarse[x][y] = fine[i][j]*0.5 + (fine[i-1][j] + fine[i][j-1] + fine[i][j+1] + fine[i+1][j])* 0.125
        
        
        //********************************************** //**********************************************
        
        restriction();
        
        //printf("\nStep2 coarse grid restriction: DONE\n");
        // printMatrix(grid_coarse, coarse_dim_with_boundary);
        
        
        
        //******************************** STEP 3 SOLUTION **********************************************
        
        //step3: compute the solution to desired accuracy
        
        //********************************************** //**********************************************
        
        

        jacobi(grid_coarse, coarse_dim_with_boundary, solution_iter);
        
        
        //printf("\n\n\nStep3 %d iterations on coarse: DONE\n", solution_iter);
        //printMatrix(grid_coarse, coarse_dim_with_boundary);
        
        //*************************** STEP 4 INTERPOLATION **********************************************
        
        //step4: Interpolate the coarse grid back to fine grid
        
        //********************************************** //**********************************************
        
        interpolate();        
        //printf("\n\n\nStep4 matrix Interpolation back to fine grid: DONE\n"); 
        //printMatrix(grid_fine, fine_dim_with_boundary);
        
        //****************************** STEP 5: SMOOTHING **********************************************
        
        //step5: update the fine grid for a few iterations
        
        //********************************************** //**********************************************
        
        jacobi(grid_fine, fine_dim_with_boundary, smoothing_iter);
        
        //printf("\n\n\nStep5 Final Smoothing: DONE\n"); 
        // printMatrix(grid_fine, fine_dim_with_boundary);
    }
    
    Finalmaxdiff = 0.0;
    
    
    for (i=1; i<fine_dim_with_boundary; i++) 
        
    {
        
        for (j=1; j<fine_dim_with_boundary; j++) 
            
        {
            Finalmaxdiff = max(Finalmaxdiff, absolute(1 - grid_fine[(i*fine_dim_with_boundary)+j]));
            //Finalmaxdiff = max(Finalmaxdiff, absolute(newMatrix_fine[(i*fine_dim_with_boundary)+j] - grid_fine[(i*fine_dim_with_boundary)+j]));            
        }
    }
    
    printf("\nFinal maxdiff: %f after %d V-cycles\n\n",Finalmaxdiff, v_cycles);
    
    time=timer()-time;
    printf("Elapsed time: %f\n",time/1000000.0);
    fp=fopen("multigrid_gauss_serial_data.txt", "wb");
    
    if(fp==NULL) 
    {
        
        printf("Error: can't open file.\n");
        
        exit(0);
        
    }
    
    
    //save in file
    for (i=0; i<fine_dim_with_boundary; i++)
        
    {
        for(j=0; j<fine_dim_with_boundary; j++)
            
        {
            
            fprintf(fp, "%f ", grid_fine[i*fine_dim_with_boundary+j]); 
            
        }
        
        fputs("\n", fp);
        
    }
    
    printf("data saved in multigrid_gauss_serial_data.txt\n");
    fclose(fp);
    
    
    
    free(grid_fine);
    free(grid_coarse);
    
    return 0;
    
}//end Main
Esempio n. 24
0
//------------------------------------------------------------------------------------------------------------------------------
void rebuild_operator(level_type * level, level_type *fromLevel, double a, double b){
  if(level->my_rank==0){fprintf(stdout,"  rebuilding 27pt CC operator for level...  h=%e  ",level->h);}

  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // form restriction of alpha[], beta_*[] coefficients from fromLevel
  if(fromLevel != NULL){
    restriction(level,VECTOR_ALPHA ,fromLevel,VECTOR_ALPHA ,RESTRICT_CELL  );
    restriction(level,VECTOR_BETA_I,fromLevel,VECTOR_BETA_I,RESTRICT_FACE_I);
    restriction(level,VECTOR_BETA_J,fromLevel,VECTOR_BETA_J,RESTRICT_FACE_J);
    restriction(level,VECTOR_BETA_K,fromLevel,VECTOR_BETA_K,RESTRICT_FACE_K);
  } // else case assumes alpha/beta have been set


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // exchange alpha/beta/...  (must be done before calculating Dinv)
  exchange_boundary(level,VECTOR_ALPHA ,0); // must be 0(faces,edges,corners) for CA version or 27pt
  exchange_boundary(level,VECTOR_BETA_I,0);
  exchange_boundary(level,VECTOR_BETA_J,0);
  exchange_boundary(level,VECTOR_BETA_K,0);


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // calculate Dinv, L1inv, and estimate the dominant Eigenvalue
  uint64_t _timeStart = CycleTime();
  int block;

  double dominant_eigenvalue = -1e9;

  PRAGMA_THREAD_ACROSS_BLOCKS_MAX(level,block,level->num_my_blocks,dominant_eigenvalue)
  for(block=0;block<level->num_my_blocks;block++){
    const int box = level->my_blocks[block].read.box;
    const int ilo = level->my_blocks[block].read.i;
    const int jlo = level->my_blocks[block].read.j;
    const int klo = level->my_blocks[block].read.k;
    const int ihi = level->my_blocks[block].dim.i + ilo;
    const int jhi = level->my_blocks[block].dim.j + jlo;
    const int khi = level->my_blocks[block].dim.k + klo;
    int i,j,k;
    const int jStride = level->my_boxes[box].jStride;
    const int kStride = level->my_boxes[box].kStride;
    const int  ghosts = level->my_boxes[box].ghosts;
    double h2inv = 1.0/(level->h*level->h);
    double * __restrict__ alpha  = level->my_boxes[box].vectors[VECTOR_ALPHA ] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_i = level->my_boxes[box].vectors[VECTOR_BETA_I] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_j = level->my_boxes[box].vectors[VECTOR_BETA_J] + ghosts*(1+jStride+kStride);
    double * __restrict__ beta_k = level->my_boxes[box].vectors[VECTOR_BETA_K] + ghosts*(1+jStride+kStride);
    double * __restrict__   Dinv = level->my_boxes[box].vectors[VECTOR_DINV  ] + ghosts*(1+jStride+kStride);
    double * __restrict__  L1inv = level->my_boxes[box].vectors[VECTOR_L1INV ] + ghosts*(1+jStride+kStride);
    double * __restrict__  valid = level->my_boxes[box].vectors[VECTOR_VALID ] + ghosts*(1+jStride+kStride);
    double block_eigenvalue = -1e9;

    for(k=klo;k<khi;k++){
    for(j=jlo;j<jhi;j++){
    for(i=ilo;i<ihi;i++){ 
      int ijk = i + j*jStride + k*kStride;
      // radius of Gershgorin disc is the sum of the absolute values of the off-diagonal elements...
                      double sumAbsAij = fabs(b*h2inv*6.0*STENCIL_COEF1) + fabs(b*h2inv*12.0*STENCIL_COEF2) + fabs(b*h2inv*8.0*STENCIL_COEF3);
      // center of Gershgorin disc is the diagonal element...
                            double Aii = a - b*h2inv*( STENCIL_COEF0 );
                             Dinv[ijk] = 1.0/Aii;					// inverse of the diagonal Aii
                          //L1inv[ijk] = 1.0/(Aii+sumAbsAij);				// inverse of the L1 row norm... L1inv = ( D+D^{L1} )^{-1}
      // as suggested by eq 6.5 in Baker et al, "Multigrid smoothers for ultra-parallel computing: additional theory and discussion"...
      if(Aii>=1.5*sumAbsAij)L1inv[ijk] = 1.0/(Aii              ); 			//
                       else L1inv[ijk] = 1.0/(Aii+0.5*sumAbsAij);			// 
      double Di = (Aii + sumAbsAij)/Aii;if(Di>block_eigenvalue)block_eigenvalue=Di;	// upper limit to Gershgorin disc == bound on dominant eigenvalue
    }}}
    if(block_eigenvalue>dominant_eigenvalue){dominant_eigenvalue = block_eigenvalue;}
  }
  level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart);


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // Reduce the local estimates dominant eigenvalue to a global estimate
  #ifdef USE_MPI
  uint64_t _timeStartAllReduce = CycleTime();
  double send = dominant_eigenvalue;
  MPI_Allreduce(&send,&dominant_eigenvalue,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
  uint64_t _timeEndAllReduce = CycleTime();
  level->cycles.collectives   += (uint64_t)(_timeEndAllReduce-_timeStartAllReduce);
  #endif
  if(level->my_rank==0){fprintf(stdout,"eigenvalue_max<%e\n",dominant_eigenvalue);}
  level->dominant_eigenvalue_of_DinvA = dominant_eigenvalue;


  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // exchange Dinv/L1inv/...
  exchange_boundary(level,VECTOR_DINV ,0); // must be 0(faces,edges,corners) for CA version
  exchange_boundary(level,VECTOR_L1INV,0);
  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
}