//------------------------------------------------------------------------------------------------------------------------------ void rebuild_operator(level_type * level, level_type *fromLevel, double a, double b){ // form restriction of alpha[], beta_*[] coefficients from fromLevel if(fromLevel != NULL){ #ifdef VECTOR_ALPHA restriction(level,VECTOR_ALPHA ,fromLevel,VECTOR_ALPHA ,RESTRICT_CELL ); #endif restriction(level,VECTOR_BETA_I,fromLevel,VECTOR_BETA_I,RESTRICT_FACE_I); restriction(level,VECTOR_BETA_J,fromLevel,VECTOR_BETA_J,RESTRICT_FACE_J); restriction(level,VECTOR_BETA_K,fromLevel,VECTOR_BETA_K,RESTRICT_FACE_K); } // else case assumes alpha/beta have been set // extrapolate the beta's into the ghost zones (needed for mixed derivatives) extrapolate_betas(level); //initialize_problem(level,level->h,a,b); // approach used for testing smooth beta's; destroys the black box nature of the solver // exchange alpha/beta/... (must be done before calculating Dinv) #ifdef VECTOR_ALPHA exchange_boundary(level,VECTOR_ALPHA ,STENCIL_SHAPE_BOX); // safe #endif exchange_boundary(level,VECTOR_BETA_I,STENCIL_SHAPE_BOX); exchange_boundary(level,VECTOR_BETA_J,STENCIL_SHAPE_BOX); exchange_boundary(level,VECTOR_BETA_K,STENCIL_SHAPE_BOX); // black box rebuild of D^{-1}, l1^{-1}, dominant eigenvalue, ... rebuild_operator_blackbox(level,a,b,4); // exchange Dinv/L1inv/... exchange_boundary(level,VECTOR_DINV ,STENCIL_SHAPE_BOX); // safe #ifdef VECTOR_L1INV exchange_boundary(level,VECTOR_L1INV,STENCIL_SHAPE_BOX); #endif }
//------------------------------------------------------------------------------------------------------------------------------ void rebuild_operator(level_type * level, level_type *fromLevel, double a, double b){ // form restriction of alpha[], beta_*[] coefficients from fromLevel if(fromLevel != NULL){ #ifdef VECTOR_ALPHA restriction(level,VECTOR_ALPHA ,fromLevel,VECTOR_ALPHA ,RESTRICT_CELL ); #endif restriction(level,VECTOR_BETA_I,fromLevel,VECTOR_BETA_I,RESTRICT_FACE_I); restriction(level,VECTOR_BETA_J,fromLevel,VECTOR_BETA_J,RESTRICT_FACE_J); restriction(level,VECTOR_BETA_K,fromLevel,VECTOR_BETA_K,RESTRICT_FACE_K); } // else case assumes alpha/beta have been set // exchange alpha/beta/... (must be done before calculating Dinv) #ifdef VECTOR_ALPHA exchange_boundary(level,VECTOR_ALPHA ,STENCIL_SHAPE_BOX); // safe #endif exchange_boundary(level,VECTOR_BETA_I,STENCIL_SHAPE_BOX); exchange_boundary(level,VECTOR_BETA_J,STENCIL_SHAPE_BOX); exchange_boundary(level,VECTOR_BETA_K,STENCIL_SHAPE_BOX); // black box rebuild of D^{-1}, l1^{-1}, dominant eigenvalue, ... rebuild_operator_blackbox(level,a,b,2); // exchange Dinv... exchange_boundary(level,VECTOR_DINV ,STENCIL_SHAPE_BOX); // safe }
void peano::applications::poisson::multigrid::mappings::SpacetreeGrid2SetupExperiment::createBoundaryVertex( peano::applications::poisson::multigrid::SpacetreeGridVertex& fineGridVertex, const tarch::la::Vector<DIMENSIONS,double>& fineGridX, const tarch::la::Vector<DIMENSIONS,double>& fineGridH, peano::applications::poisson::multigrid::SpacetreeGridVertex const * const coarseGridVertices, const peano::kernel::gridinterface::VertexEnumerator& coarseGridVerticesEnumerator, const peano::applications::poisson::multigrid::SpacetreeGridCell& coarseGridCell, const tarch::la::Vector<DIMENSIONS,int>& fineGridPositionOfVertex ) { logTraceInWith6Arguments( "createBoundaryVertex(...)", fineGridVertex, fineGridX, fineGridH, coarseGridVerticesEnumerator.toString(), coarseGridCell, fineGridPositionOfVertex ); // if (tarch::la::volume(fineGridH) > _refinementThreshold) { // fineGridVertex.refine(); // } if (coarseGridVerticesEnumerator.getLevel() < 3) { fineGridVertex.refine(); } peano::toolbox::stencil::Stencil stencil(0.0); fineGridVertex.setStencil(stencil); peano::toolbox::stencil::ProlongationMatrix prolongation (0.0); fineGridVertex.setP(prolongation); peano::toolbox::stencil::RestrictionMatrix restriction(0.0); fineGridVertex.setR(restriction); fineGridVertex.clearTempAP(); fineGridVertex.clearTempP(); logTraceOutWith1Argument( "createBoundaryVertex(...)", fineGridVertex ); }
bool CSpaceRestrictionManager::accessible (ALife::_OBJECT_ID id, u32 level_vertex_id, float radius) { CRestrictionPtr client_restriction = restriction(id); if (client_restriction) return (client_restriction->accessible(level_vertex_id,radius)); return (true); }
bool CSpaceRestrictionManager::accessible (ALife::_OBJECT_ID id, const Fsphere &sphere) { CRestrictionPtr client_restriction = restriction(id); if (client_restriction) return (client_restriction->accessible(sphere)); return (true); }
shared_str CSpaceRestrictionManager::out_restrictions (ALife::_OBJECT_ID id) { CRestrictionPtr client_restriction = restriction(id); if (client_restriction) return (client_restriction->out_restrictions()); return (""); }
void KCValidity::loadOdfValidationCondition(QString &valExpression, const KCValueParser *parser) { if (isEmpty()) return; QString value; if (valExpression.indexOf("<=") == 0) { value = valExpression.remove(0, 2); setCondition(KCConditional::InferiorEqual); } else if (valExpression.indexOf(">=") == 0) { value = valExpression.remove(0, 2); setCondition(KCConditional::SuperiorEqual); } else if (valExpression.indexOf("!=") == 0) { //add Differentto attribute value = valExpression.remove(0, 2); setCondition(KCConditional::DifferentTo); } else if (valExpression.indexOf('<') == 0) { value = valExpression.remove(0, 1); setCondition(KCConditional::Inferior); } else if (valExpression.indexOf('>') == 0) { value = valExpression.remove(0, 1); setCondition(KCConditional::Superior); } else if (valExpression.indexOf('=') == 0) { value = valExpression.remove(0, 1); setCondition(KCConditional::Equal); } else kDebug(36003) << " I don't know how to parse it :" << valExpression; if (restriction() == KCValidity::Date) { setMinimumValue(parser->tryParseDate(value)); } else if (restriction() == KCValidity::Date) { setMinimumValue(parser->tryParseTime(value)); } else { bool ok = false; setMinimumValue(KCValue(value.toDouble(&ok))); if (!ok) { setMinimumValue(KCValue(value.toInt(&ok))); if (!ok) kDebug(36003) << " Try to parse this value :" << value; #if 0 if (!ok) setMinimumValue(value); #endif } } }
void KCValidity::loadOdfValidationValue(const QStringList &listVal, const KCValueParser *parser) { bool ok = false; kDebug(36003) << " listVal[0] :" << listVal[0] << " listVal[1] :" << listVal[1]; if (restriction() == KCValidity::Date) { setMinimumValue(parser->tryParseDate(listVal[0])); setMaximumValue(parser->tryParseDate(listVal[1])); } else if (restriction() == KCValidity::Time) { setMinimumValue(parser->tryParseTime(listVal[0])); setMaximumValue(parser->tryParseTime(listVal[1])); } else { setMinimumValue(KCValue(listVal[0].toDouble(&ok))); if (!ok) { setMinimumValue(KCValue(listVal[0].toInt(&ok))); if (!ok) kDebug(36003) << " Try to parse this value :" << listVal[0]; #if 0 if (!ok) setMinimumValue(listVal[0]); #endif } ok = false; setMaximumValue(KCValue(listVal[1].toDouble(&ok))); if (!ok) { setMaximumValue(KCValue(listVal[1].toInt(&ok))); if (!ok) kDebug(36003) << " Try to parse this value :" << listVal[1]; #if 0 if (!ok) setMaximumValue(listVal[1]); #endif } } }
void CSpaceRestrictionManager::remove_restrictions (ALife::_OBJECT_ID id, shared_str remove_out_restrictions, shared_str remove_in_restrictions) { CRestrictionPtr _client_restriction = restriction(id); if (!_client_restriction) return; VERIFY (!_client_restriction->applied()); CClientRestriction &client_restriction = (*m_clients)[id]; shared_str new_out_restrictions = client_restriction.m_base_out_restrictions; shared_str new_in_restrictions = client_restriction.m_base_in_restrictions; difference_restrictions (new_out_restrictions,remove_out_restrictions); difference_restrictions (new_in_restrictions,remove_in_restrictions); restrict (id,new_out_restrictions,new_in_restrictions); }
void Preprocess::Callback::ProcessTurnRestriction(const std::vector<RawRelation::Member>& members, TurnRestriction::Type type) { Id from=0; Id via=0; Id to=0; for (std::vector<RawRelation::Member>::const_iterator member=members.begin(); member!=members.end(); ++member) { if (member->type==RawRelation::memberWay && member->role=="from") { from=member->id; } else if (member->type==RawRelation::memberNode && member->role=="via") { via=member->id; } else if (member->type==RawRelation::memberWay && member->role=="to") { to=member->id; } // finished collection data if (from!=0 && via!=0 && to!=0) { break; } } if (from!=0 && via!=0 && to!=0) { TurnRestriction restriction(type, from, via, to); restriction.Write(turnRestrictionWriter); turnRestrictionCount++; } }
void CSpaceRestrictionManager::add_restrictions (ALife::_OBJECT_ID id, shared_str add_out_restrictions, shared_str add_in_restrictions) { CRestrictionPtr _client_restriction = restriction(id); if (!_client_restriction) { restrict (id,add_out_restrictions,add_in_restrictions); return; } VERIFY (!_client_restriction->applied()); CClientRestriction &client_restriction = (*m_clients)[id]; shared_str new_out_restrictions = client_restriction.m_base_out_restrictions; shared_str new_in_restrictions = client_restriction.m_base_in_restrictions; join_restrictions (new_out_restrictions,add_out_restrictions); join_restrictions (new_in_restrictions,add_in_restrictions); restrict (id,new_out_restrictions,new_in_restrictions); }
void CSpaceRestrictionManager::restrict (ALife::_OBJECT_ID id, shared_str out_restrictors, shared_str in_restrictors) { shared_str merged_out_restrictions = out_restrictors; shared_str merged_in_restrictions = in_restrictors; shared_str _default_out_restrictions = default_out_restrictions(); shared_str _default_in_restrictions = default_in_restrictions(); difference_restrictions (_default_out_restrictions,merged_in_restrictions); difference_restrictions (_default_in_restrictions,merged_out_restrictions); join_restrictions (merged_out_restrictions,_default_out_restrictions); join_restrictions (merged_in_restrictions,_default_in_restrictions); CLIENT_RESTRICTIONS::iterator I = m_clients->find(id); VERIFY2 ((m_clients->end() == I) || !(*I).second.m_restriction || !(*I).second.m_restriction->applied(),"Restriction cannot be changed since its border is still applied!"); (*m_clients)[id].m_restriction = restriction(merged_out_restrictions,merged_in_restrictions); (*m_clients)[id].m_base_out_restrictions = out_restrictors; (*m_clients)[id].m_base_in_restrictions = in_restrictors; collect_garbage (); }
void Multigrid::iterate() { int short o = controller->getCommand(); if (o < 0) { if (superlevel != nullptr) smooth(); sublevel->resetZero(); restriction(); sublevel->iterate(); } else if (o == 0) { smooth(); } else{ if (superlevel != nullptr) { smooth(); prolongation(); superlevel->iterate(); } else { smooth(); if (allNeumann) subtractMean(); controller->reset(); } } }
bool ValidateParam(User* user, Channel* chan, std::string &word) { std::string::size_type p = word.find(':'); if (p == std::string::npos) { user->WriteNumeric(955, chan->name, word, "Invalid exemptchanops entry, format is <restriction>:<prefix>"); return false; } std::string restriction(word, 0, p); // If there is a '-' in the restriction string ignore it and everything after it // to support "auditorium-vis" and "auditorium-see" in m_auditorium p = restriction.find('-'); if (p != std::string::npos) restriction.erase(p); if (!ServerInstance->Modes->FindMode(restriction, MODETYPE_CHANNEL)) { user->WriteNumeric(955, chan->name, restriction, "Unknown restriction"); return false; } return true; }
//------------------------------------------------------------------------------------------------------------------------------ void rebuild_operator(level_type * level, level_type *fromLevel, double a, double b){ if(level->my_rank==0){fprintf(stdout," rebuilding operator for level... h=%e ",level->h);fflush(stdout);} // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // form restriction of alpha[], beta_*[] coefficients from fromLevel if(fromLevel != NULL){ #ifdef VECTOR_ALPHA restriction(level,VECTOR_ALPHA ,fromLevel,VECTOR_ALPHA ,RESTRICT_CELL ); #endif restriction(level,VECTOR_BETA_I,fromLevel,VECTOR_BETA_I,RESTRICT_FACE_I); restriction(level,VECTOR_BETA_J,fromLevel,VECTOR_BETA_J,RESTRICT_FACE_J); restriction(level,VECTOR_BETA_K,fromLevel,VECTOR_BETA_K,RESTRICT_FACE_K); } // else case assumes alpha/beta have been set // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // exchange alpha/beta/... (must be done before calculating Dinv) #ifdef VECTOR_ALPHA exchange_boundary(level,VECTOR_ALPHA ,STENCIL_SHAPE_BOX); // safe #endif exchange_boundary(level,VECTOR_BETA_I,STENCIL_SHAPE_BOX); exchange_boundary(level,VECTOR_BETA_J,STENCIL_SHAPE_BOX); exchange_boundary(level,VECTOR_BETA_K,STENCIL_SHAPE_BOX); // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // calculate Dinv, L1inv, and estimate the dominant Eigenvalue double _timeStart = getTime(); int block; double dominant_eigenvalue = -1e9; PRAGMA_THREAD_ACROSS_BLOCKS_MAX(level,block,level->num_my_blocks,dominant_eigenvalue) for(block=0;block<level->num_my_blocks;block++){ const int box = level->my_blocks[block].read.box; const int ilo = level->my_blocks[block].read.i; const int jlo = level->my_blocks[block].read.j; const int klo = level->my_blocks[block].read.k; const int ihi = level->my_blocks[block].dim.i + ilo; const int jhi = level->my_blocks[block].dim.j + jlo; const int khi = level->my_blocks[block].dim.k + klo; int i,j,k; const int jStride = level->my_boxes[box].jStride; const int kStride = level->my_boxes[box].kStride; const int ghosts = level->my_boxes[box].ghosts; double h2inv = 1.0/(level->h*level->h); #ifdef VECTOR_ALPHA double * __restrict__ alpha = level->my_boxes[box].vectors[VECTOR_ALPHA ] + ghosts*(1+jStride+kStride); #endif double * __restrict__ beta_i = level->my_boxes[box].vectors[VECTOR_BETA_I] + ghosts*(1+jStride+kStride); double * __restrict__ beta_j = level->my_boxes[box].vectors[VECTOR_BETA_J] + ghosts*(1+jStride+kStride); double * __restrict__ beta_k = level->my_boxes[box].vectors[VECTOR_BETA_K] + ghosts*(1+jStride+kStride); double * __restrict__ Dinv = level->my_boxes[box].vectors[VECTOR_DINV ] + ghosts*(1+jStride+kStride); #ifdef VECTOR_L1INV double * __restrict__ L1inv = level->my_boxes[box].vectors[VECTOR_L1INV ] + ghosts*(1+jStride+kStride); #endif double block_eigenvalue = -1e9; for(k=klo;k<khi;k++){ for(j=jlo;j<jhi;j++){ for(i=ilo;i<ihi;i++){ int ijk = i + j*jStride + k*kStride; // used for quick linear approximation to zero dirichlet BC double ilo_is_valid =1.0; double ihi_is_valid =1.0; double jlo_is_valid =1.0; double jhi_is_valid =1.0; double klo_is_valid =1.0; double khi_is_valid =1.0; if(level->boundary_condition.type != BC_PERIODIC){ if(level->my_boxes[box].low.i+i-1 < 0)ilo_is_valid = 0.0; if(level->my_boxes[box].low.j+j-1 < 0)jlo_is_valid = 0.0; if(level->my_boxes[box].low.k+k-1 < 0)klo_is_valid = 0.0; if(level->my_boxes[box].low.i+i+1 >= level->dim.i)ihi_is_valid = 0.0; if(level->my_boxes[box].low.j+j+1 >= level->dim.j)jhi_is_valid = 0.0; if(level->my_boxes[box].low.k+k+1 >= level->dim.k)khi_is_valid = 0.0; } #ifdef STENCIL_VARIABLE_COEFFICIENT // radius of Gershgorin disc is the sum of the absolute values of the off-diagonal elements... double sumAbsAij = fabs(b*h2inv) * ( fabs( beta_i[ijk ]*ilo_is_valid )+ fabs( beta_j[ijk ]*jlo_is_valid )+ fabs( beta_k[ijk ]*klo_is_valid )+ fabs( beta_i[ijk+1 ]*ihi_is_valid )+ fabs( beta_j[ijk+jStride]*jhi_is_valid )+ fabs( beta_k[ijk+kStride]*khi_is_valid ) ); // center of Gershgorin disc is the diagonal element... double Aii = -b*h2inv*( beta_i[ijk ]*( ilo_is_valid-2.0 )+ beta_j[ijk ]*( jlo_is_valid-2.0 )+ beta_k[ijk ]*( klo_is_valid-2.0 )+ beta_i[ijk+1 ]*( ihi_is_valid-2.0 )+ beta_j[ijk+jStride]*( jhi_is_valid-2.0 )+ beta_k[ijk+kStride]*( khi_is_valid-2.0 ) ); #ifdef VECTOR_ALPHA Aii += a*alpha[ijk]; #endif #else // Constant coefficient versions with fused BC's... // radius of Gershgorin disc is the sum of the absolute values of the off-diagonal elements... double sumAbsAij = fabs(b*h2inv) * ( ilo_is_valid + jlo_is_valid + klo_is_valid + ihi_is_valid + jhi_is_valid + khi_is_valid ); // center of Gershgorin disc is the diagonal element... double Aii = a - b*h2inv*( ilo_is_valid + jlo_is_valid + klo_is_valid + ihi_is_valid + jhi_is_valid + khi_is_valid - 12.0 ); #endif Dinv[ijk] = 1.0/Aii; // inverse of the diagonal Aii double Di = (Aii + sumAbsAij)/Aii;if(Di>block_eigenvalue)block_eigenvalue=Di; // upper limit to Gershgorin disc == bound on dominant eigenvalue #ifdef VECTOR_L1INV //L1inv[ijk] = 1.0/(Aii+sumAbsAij); // inverse of the L1 row norm... L1inv = ( D+D^{L1} )^{-1} if(Aii>=1.5*sumAbsAij)L1inv[ijk] = 1.0/(Aii ); // as suggested by eq 6.5 in Baker et al, "Multigrid smoothers for ultra-parallel computing: additional theory and discussion"... else L1inv[ijk] = 1.0/(Aii+0.5*sumAbsAij); // #endif }}} if(block_eigenvalue>dominant_eigenvalue){dominant_eigenvalue = block_eigenvalue;} } level->timers.blas1 += (double)(getTime()-_timeStart); // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Reduce the local estimates dominant eigenvalue to a global estimate #ifdef USE_MPI double _timeStartAllReduce = getTime(); double send = dominant_eigenvalue; MPI_Allreduce(&send,&dominant_eigenvalue,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD); double _timeEndAllReduce = getTime(); level->timers.collectives += (double)(_timeEndAllReduce-_timeStartAllReduce); #endif if(level->my_rank==0){fprintf(stdout,"eigenvalue_max<%e\n",dominant_eigenvalue);} level->dominant_eigenvalue_of_DinvA = dominant_eigenvalue; // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // exchange Dinv/L1inv/... exchange_boundary(level,VECTOR_DINV ,STENCIL_SHAPE_BOX); // safe #ifdef VECTOR_L1INV exchange_boundary(level,VECTOR_L1INV,STENCIL_SHAPE_BOX); #endif // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - }
std::tuple< std::shared_ptr<Matrix>, std::shared_ptr<Matrix> > transfer_operators(const Matrix &A) { typedef typename backend::value_type<Matrix>::type Val; typedef ptrdiff_t Idx; AMGCL_TIC("aggregates"); Aggregates aggr(A, prm.aggr, prm.nullspace.cols); prm.aggr.eps_strong *= 0.5; AMGCL_TOC("aggregates"); AMGCL_TIC("interpolation"); auto P_tent = tentative_prolongation<Matrix>( rows(A), aggr.count, aggr.id, prm.nullspace, prm.aggr.block_size ); // Filter the system matrix backend::crs<Val> Af; Af.set_size(rows(A), cols(A)); Af.ptr[0] = 0; std::vector<Val> dia(Af.nrows); #pragma omp parallel for for(Idx i = 0; i < static_cast<Idx>(Af.nrows); ++i) { Idx row_begin = A.ptr[i]; Idx row_end = A.ptr[i+1]; Idx row_width = row_end - row_begin; Val D = math::zero<Val>(); for(Idx j = row_begin; j < row_end; ++j) { Idx c = A.col[j]; Val v = A.val[j]; if (c == i) D += v; else if (!aggr.strong_connection[j]) { D += v; --row_width; } } dia[i] = D; Af.ptr[i+1] = row_width; } Af.set_nonzeros(Af.scan_row_sizes()); #pragma omp parallel for for(Idx i = 0; i < static_cast<Idx>(Af.nrows); ++i) { Idx row_begin = A.ptr[i]; Idx row_end = A.ptr[i+1]; Idx row_head = Af.ptr[i]; for(Idx j = row_begin; j < row_end; ++j) { Idx c = A.col[j]; if (c == i) { Af.col[row_head] = i; Af.val[row_head] = dia[i]; ++row_head; } else if (aggr.strong_connection[j]) { Af.col[row_head] = c; Af.val[row_head] = A.val[j]; ++row_head; } } } std::vector<Val> omega; auto P = interpolation(Af, dia, *P_tent, omega); auto R = restriction (Af, dia, *P_tent, omega); AMGCL_TOC("interpolation"); if (prm.nullspace.cols > 0) prm.aggr.block_size = prm.nullspace.cols; return std::make_tuple(P, R); }
// multigrid v-cycle void v_cycle( double* P, uint n_dof, cuint nx, cuint ny, cuint nz, cdouble hx, cdouble hy, cdouble hz, cdouble hx2i, cdouble hy2i, cdouble hz2i, cdouble tol, cuint max_iteration, cuint pre_smooth_iteration, cdouble lx, cdouble ly, cdouble lz, cuint level, cuint max_level, double* F, double& Er, double* Uss, double* Vss, double* Wss, cdouble bcs[][6], cdouble dt ) { cout<<"level: "<<level<<" n_dof: "<<n_dof<<endl; // initialize finite difference matrix (+1 for global constraint) // double** M = new double*[n_dof]; // for(int n = 0; n < (n_dof); n++) // M[n] = new double[n_dof]; // // initialize // #pragma omp parallel for shared(n_dof, M) // for(int i=0; i<n_dof; i++) // for(int j=0; j<n_dof; j++) // M[i][j] = 0; cout<<"fd_matrix_sparse"<<endl; vector<tuple <uint, uint, double> > M_sp; vector<double> val; vector<uint> col_ind; vector<uint> row_ptr(1,0); // create finite difference matrix cout<<"create finite difference matrix"<<endl; // build pressure matrix pressure_matrix( M_sp, val, col_ind, row_ptr, nx, ny, nz, hx2i, hy2i, hz2i, n_dof ); // construct load vector // load vector is created only at the level 0 if(level==0){ F = new double[n_dof]; cout<<"create load vector"<<endl; pressure_rhs(F, Uss, Vss, Wss, nx, ny, nz, bcs, hx, hy, hz, dt); // load_vector(F, n_dof, I,J,K ); } // cout<<"save matrix and vector"<<endl; // char matrix_file[100]; // char vector_file[100]; // sprintf(vector_file, "vector_%i.dat", level); // if(write_vector(n_dof,F,vector_file)) cout<<"write_vector fail"<<endl; // construct solution vector double* U; if(level==0) U=P; else U = new double[n_dof]; double* U_tmp = new double[n_dof]; // initial guess #pragma omp parallel for shared(U, U_tmp) num_threads(nt) for(int n=0; n<n_dof; n++){ U[n] = 0.0; U_tmp[n] = 0.0; } // residual and error double* R = new double[n_dof]; // perform pre-smoothing and compute residual cout<<"pre-smoothing "<<pre_smooth_iteration<<" times"<<endl; Er = tol*10; jacobi_sparse(tol, pre_smooth_iteration, n_dof, U, U_tmp, val, col_ind, row_ptr, F, Er, R); // restriction of residual on coarse grid double* F_coar; // Restrict the residual cuint nx_coar = (nx)/2; cuint ny_coar = (ny)/2; cuint nz_coar = (nz)/2; uint n_dof_coar = nx_coar*ny_coar*nz_coar; F_coar = new double[n_dof_coar]; // mesh size cdouble hx_coar = lx/(nx_coar); cdouble hy_coar = ly/(ny_coar); cdouble hz_coar = lz/(nz_coar); // inverse of square of mesh sizes cdouble hx2i_coar = 1.0/(hx_coar*hx_coar); cdouble hy2i_coar = 1.0/(hy_coar*hy_coar); cdouble hz2i_coar = 1.0/(hz_coar*hz_coar); // restric residual to the coarrse grid cout<<"restriction"<<endl; restriction( R, F_coar, nx, ny, nz, nx_coar, ny_coar, nz_coar); // construct solution vector on coarse grid double* U_coar = new double[n_dof_coar]; double* U_coar_tmp = new double[n_dof_coar]; // if the grid is coarsest if( level==max_level){ cout<<"level: "<<level+1<<" n_dof: "<<n_dof_coar<<endl; // initial guess #pragma omp parallel for shared(U_coar, U_coar_tmp) num_threads(nt) for(int n=0; n<n_dof_coar; n++){ U_coar[n] = 0.0; U_coar_tmp[n] = 0.0; } vector<tuple <uint, uint, double> > M_sp_coar; vector<double> val_coar; vector<uint> col_ind_coar; vector<uint> row_ptr_coar(1,0); // create finite difference matrix cout<<"create finite difference matrix"<<endl; // fd_matrix_sparse(M_sp_coar, val_coar, col_ind_coar, row_ptr_coar, // nx_coar,ny_coar,nz_coar, // hx2i_coar, hy2i_coar, hz2i_coar, n_dof_coar ); pressure_matrix( M_sp_coar, val_coar, col_ind_coar, row_ptr_coar, nx_coar, ny_coar, nz_coar, hx2i_coar, hy2i_coar, hz2i_coar, n_dof_coar ); // residual on coarse grid double* R_coar = new double[n_dof_coar]; // exact Jacobi method Er = tol*10; jacobi_sparse(tol, max_iteration, n_dof_coar, U_coar, U_coar_tmp, val_coar, col_ind_coar, row_ptr_coar, F_coar, Er, R_coar); // write_results( U_coar, // n_dof_coar, // I_coar, J_coar, K_coar, // dx_coar, dy_coar, dz_coar, level); delete[] R_coar; // cout<<"R"<<endl; // for(int i=0; i<n_dof; i++) // cout<<R[i]<<endl; } else{ // v_cycle on the coarse grid v_cycle( U_coar, n_dof_coar, nx_coar, ny_coar, nz_coar, hx_coar, hy_coar, hz_coar, hx2i_coar, hy2i_coar, hz2i_coar, tol, max_iteration, pre_smooth_iteration, lx, ly, lz, level+1, max_level, F_coar, Er, Uss, Vss, Wss, bcs, dt ); cdouble dx_coar = lx/(nx_coar); cdouble dy_coar = ly/(ny_coar); cdouble dz_coar = lz/(nz_coar); // // write partial results for test purpose // write_results( U_coar, // n_dof_coar, // I_coar, J_coar, K_coar, // dx_coar, dy_coar, dz_coar, level); } // interpolate to fine grid double* E = new double[n_dof]; interpolation(U_coar, E, nx_coar,ny_coar,nz_coar, nx, ny, nz); // correct the fine grid approximation #pragma omp parallel for shared(U,E) num_threads(nt) for(int i=0; i<n_dof; i++){ // cout<<i<<" "<<U[i]<<" "<<E[i]<<" "<<E[i]/U[i]<<endl; U[i] += E[i]; } // perform post-smoothing and compute residual uint post_smooth_iteration; // if(level==0) post_smooth_iteration=max_iteration; // else // post_smooth_iteration=( pre_smooth_iteration+1)*1000; cout<<"post-smoothing "<<post_smooth_iteration<<" times on level " <<level<<endl; // jacobi(tol, post_smooth_iteration, n_dof, U, U_tmp, M, F, Er, R); Er = tol*10; jacobi_sparse(tol, post_smooth_iteration, n_dof, U, U_tmp, val, col_ind, row_ptr, F, Er, R); // cleanup if (level==0) delete[] F; delete[] U_tmp; delete[] R, F_coar; delete[] E; delete[] U_coar, U_coar_tmp; }
void solve_with_HPGMG(MultiFab& soln, MultiFab& gphi, Real a, Real b, MultiFab& alpha, PArray<MultiFab>& beta, MultiFab& beta_cc, MultiFab& rhs, const BoxArray& bs, const Geometry& geom, int n_cell) { BndryData bd(bs, 1, geom); set_boundary(bd, rhs, 0); ABecLaplacian abec_operator(bd, dx); abec_operator.setScalars(a, b); abec_operator.setCoefficients(alpha, beta); int minCoarseDim; if (domain_boundary_condition == BC_PERIODIC) { minCoarseDim = 2; // avoid problems with black box calculation of D^{-1} for poisson with periodic BC's on a 1^3 grid } else { minCoarseDim = 1; // assumes you can drop order on the boundaries } level_type level_h; mg_type MG_h; int numVectors = 12; int my_rank = 0, num_ranks = 1; #ifdef BL_USE_MPI MPI_Comm_size (MPI_COMM_WORLD, &num_ranks); MPI_Comm_rank (MPI_COMM_WORLD, &my_rank); #endif /* BL_USE_MPI */ const double h0 = dx[0]; // Create the geometric structure of the HPGMG grid using the RHS MultiFab as // a template. This doesn't copy any actual data. CreateHPGMGLevel(&level_h, rhs, n_cell, max_grid_size, my_rank, num_ranks, domain_boundary_condition, numVectors, h0); // Set up the coefficients for the linear operator L. SetupHPGMGCoefficients(a, b, alpha, beta_cc, &level_h); // Now that the HPGMG grid is built, populate it with RHS data. ConvertToHPGMGLevel(rhs, n_cell, max_grid_size, &level_h, VECTOR_F); #ifdef USE_HELMHOLTZ if (ParallelDescriptor::IOProcessor()) { std::cout << "Creating Helmholtz (a=" << a << ", b=" << b << ") test problem" << std::endl;; } #else if (ParallelDescriptor::IOProcessor()) { std::cout << "Creating Poisson (a=" << a << ", b=" << b << ") test problem" << std::endl;; } #endif /* USE_HELMHOLTZ */ if (level_h.boundary_condition.type == BC_PERIODIC) { double average_value_of_f = mean (&level_h, VECTOR_F); if (average_value_of_f != 0.0) { if (ParallelDescriptor::IOProcessor()) { std::cerr << "WARNING: Periodic boundary conditions, but f does not sum to zero... mean(f)=" << average_value_of_f << std::endl; } //shift_vector(&level_h,VECTOR_F,VECTOR_F,-average_value_of_f); } } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rebuild_operator(&level_h,NULL,a,b); // i.e. calculate Dinv and lambda_max MGBuild(&MG_h,&level_h,a,b,minCoarseDim,ParallelDescriptor::Communicator()); // build the Multigrid Hierarchy //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (ParallelDescriptor::IOProcessor()) std::cout << std::endl << std::endl << "===== STARTING SOLVE =====" << std::endl << std::flush; MGResetTimers (&MG_h); zero_vector (MG_h.levels[0], VECTOR_U); #ifdef USE_FCYCLES FMGSolve (&MG_h, 0, VECTOR_U, VECTOR_F, a, b, tolerance_abs, tolerance_rel); #else MGSolve (&MG_h, 0, VECTOR_U, VECTOR_F, a, b, tolerance_abs, tolerance_rel); #endif /* USE_FCYCLES */ MGPrintTiming (&MG_h, 0); // don't include the error check in the timing results //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if (ParallelDescriptor::IOProcessor()) std::cout << std::endl << std::endl << "===== Performing Richardson error analysis ==========================" << std::endl; // solve A^h u^h = f^h // solve A^2h u^2h = f^2h // solve A^4h u^4h = f^4h // error analysis... MGResetTimers(&MG_h); const double dtol = tolerance_abs; const double rtol = tolerance_rel; int l;for(l=0;l<3;l++){ if(l>0)restriction(MG_h.levels[l],VECTOR_F,MG_h.levels[l-1],VECTOR_F,RESTRICT_CELL); zero_vector(MG_h.levels[l],VECTOR_U); #ifdef USE_FCYCLES FMGSolve(&MG_h,l,VECTOR_U,VECTOR_F,a,b,dtol,rtol); #else MGSolve(&MG_h,l,VECTOR_U,VECTOR_F,a,b,dtol,rtol); #endif } richardson_error(&MG_h,0,VECTOR_U); // Now convert solution from HPGMG back to rhs MultiFab. ConvertFromHPGMGLevel(soln, &level_h, VECTOR_U); const double norm_from_HPGMG = norm(&level_h, VECTOR_U); const double mean_from_HPGMG = mean(&level_h, VECTOR_U); const Real norm0 = soln.norm0(); const Real norm2 = soln.norm2(); if (ParallelDescriptor::IOProcessor()) { std::cout << "mean from HPGMG: " << mean_from_HPGMG << std::endl; std::cout << "norm from HPGMG: " << norm_from_HPGMG << std::endl; std::cout << "norm0 of RHS copied to MF: " << norm0 << std::endl; std::cout << "norm2 of RHS copied to MF: " << norm2 << std::endl; } // Write the MF to disk for comparison with the in-house solver if (plot_soln) { writePlotFile("SOLN-HPGMG", soln, geom); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - MGDestroy(&MG_h); destroy_level(&level_h); //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - PArray<MultiFab> grad_phi(BL_SPACEDIM, PArrayManage); for (int n = 0; n < BL_SPACEDIM; ++n) grad_phi.set(n, new MultiFab(BoxArray(soln.boxArray()).surroundingNodes(n), 1, 0)); #if (BL_SPACEDIM == 2) abec_operator.compFlux(grad_phi[0],grad_phi[1],soln); #elif (BL_SPACEDIM == 3) abec_operator.compFlux(grad_phi[0],grad_phi[1],grad_phi[2],soln); #endif // Average edge-centered gradients to cell centers. BoxLib::average_face_to_cellcenter(gphi, grad_phi, geom); }
//------------------------------------------------------------------------------------------------------------------------------ void rebuild_operator(level_type * level, level_type *fromLevel, double a, double b){ if(level->my_rank==0){printf(" rebuilding operator for level... h=%e ",level->h);fflush(stdout);} // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // form restriction of alpha[], beta_*[] coefficients from fromLevel if(fromLevel != NULL){ restriction(level,VECTOR_ALPHA ,fromLevel,VECTOR_ALPHA ,RESTRICT_CELL ); restriction(level,VECTOR_BETA_I,fromLevel,VECTOR_BETA_I,RESTRICT_FACE_I); restriction(level,VECTOR_BETA_J,fromLevel,VECTOR_BETA_J,RESTRICT_FACE_J); restriction(level,VECTOR_BETA_K,fromLevel,VECTOR_BETA_K,RESTRICT_FACE_K); } // else case assumes alpha/beta have been set // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // exchange alpha/beta/... (must be done before calculating Dinv) exchange_boundary(level,VECTOR_ALPHA ,0); // must be 0(faces,edges,corners) for CA version or 27pt exchange_boundary(level,VECTOR_BETA_I,0); exchange_boundary(level,VECTOR_BETA_J,0); exchange_boundary(level,VECTOR_BETA_K,0); // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // calculate Dinv, L1inv, and estimate the dominant Eigenvalue uint64_t _timeStart = CycleTime(); int printedError=0; int box; double dominant_eigenvalue = -1e9; #pragma omp parallel for private(box) OMP_THREAD_ACROSS_BOXES(level->concurrent_boxes) reduction(max:dominant_eigenvalue) schedule(static) for(box=0;box<level->num_my_boxes;box++){ int i,j,k; int lowi = level->my_boxes[box].low.i; int lowj = level->my_boxes[box].low.j; int lowk = level->my_boxes[box].low.k; int jStride = level->my_boxes[box].jStride; int kStride = level->my_boxes[box].kStride; int ghosts = level->my_boxes[box].ghosts; int dim = level->my_boxes[box].dim; double h2inv = 1.0/(level->h*level->h); double * __restrict__ alpha = level->my_boxes[box].vectors[VECTOR_ALPHA ] + ghosts*(1+jStride+kStride); double * __restrict__ beta_i = level->my_boxes[box].vectors[VECTOR_BETA_I] + ghosts*(1+jStride+kStride); double * __restrict__ beta_j = level->my_boxes[box].vectors[VECTOR_BETA_J] + ghosts*(1+jStride+kStride); double * __restrict__ beta_k = level->my_boxes[box].vectors[VECTOR_BETA_K] + ghosts*(1+jStride+kStride); double * __restrict__ Dinv = level->my_boxes[box].vectors[VECTOR_DINV ] + ghosts*(1+jStride+kStride); double * __restrict__ L1inv = level->my_boxes[box].vectors[VECTOR_L1INV ] + ghosts*(1+jStride+kStride); double * __restrict__ valid = level->my_boxes[box].vectors[VECTOR_VALID ] + ghosts*(1+jStride+kStride); double box_eigenvalue = -1e9; #pragma omp parallel for private(k,j,i) OMP_THREAD_WITHIN_A_BOX(level->threads_per_box) reduction(max:box_eigenvalue) schedule(static) for(k=0;k<dim;k++){ for(j=0;j<dim;j++){ for(i=0;i<dim;i++){ int ijk = i + j*jStride + k*kStride; #if 0 // FIX This looks wrong, but is faster... theory is because its doing something akin to SOR // radius of Gershgorin disc is the sum of the absolute values of the off-diagonal elements... double sumAbsAij = fabs(b*h2inv*beta_i[ijk]) + fabs(b*h2inv*beta_i[ijk+ 1]) + fabs(b*h2inv*beta_j[ijk]) + fabs(b*h2inv*beta_j[ijk+jStride]) + fabs(b*h2inv*beta_k[ijk]) + fabs(b*h2inv*beta_k[ijk+kStride]); // centr of Gershgorin disc is the diagonal element... double Aii = a*alpha[ijk] - b*h2inv*( -beta_i[ijk]-beta_i[ijk+ 1] -beta_j[ijk]-beta_j[ijk+jStride] -beta_k[ijk]-beta_k[ijk+kStride] ); #endif #if 1 // radius of Gershgorin disc is the sum of the absolute values of the off-diagonal elements... double sumAbsAij = fabs(b*h2inv) * ( fabs( beta_i[ijk ]*valid[ijk-1 ] )+ fabs( beta_j[ijk ]*valid[ijk-jStride] )+ fabs( beta_k[ijk ]*valid[ijk-kStride] )+ fabs( beta_i[ijk+1 ]*valid[ijk+1 ] )+ fabs( beta_j[ijk+jStride]*valid[ijk+jStride] )+ fabs( beta_k[ijk+kStride]*valid[ijk+kStride] ) ); // centr of Gershgorin disc is the diagonal element... double Aii = a*alpha[ijk] - b*h2inv*( beta_i[ijk ]*( valid[ijk-1 ]-2.0 )+ beta_j[ijk ]*( valid[ijk-jStride]-2.0 )+ beta_k[ijk ]*( valid[ijk-kStride]-2.0 )+ beta_i[ijk+1 ]*( valid[ijk+1 ]-2.0 )+ beta_j[ijk+jStride]*( valid[ijk+jStride]-2.0 )+ beta_k[ijk+kStride]*( valid[ijk+kStride]-2.0 ) ); #endif Dinv[ijk] = 1.0/Aii; // inverse of the diagonal Aii //L1inv[ijk] = 1.0/(Aii+sumAbsAij); // inverse of the L1 row norm // L1inv = ( D+D^{L1} )^{-1} // as suggested by eq 6.5 in Baker et al, "Multigrid smoothers for ultra-parallel computing: additional theory and discussion"... if(Aii>=1.5*sumAbsAij)L1inv[ijk] = 1.0/(Aii ); // else L1inv[ijk] = 1.0/(Aii+0.5*sumAbsAij); // double Di = (Aii + sumAbsAij)/Aii;if(Di>box_eigenvalue)box_eigenvalue=Di; // upper limit to Gershgorin disc == bound on dominant eigenvalue }}} if(box_eigenvalue>dominant_eigenvalue){dominant_eigenvalue = box_eigenvalue;} } level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart); // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Reduce the local estimates dominant eigenvalue to a global estimate #ifdef USE_MPI uint64_t _timeStartAllReduce = CycleTime(); double send = dominant_eigenvalue; MPI_Allreduce(&send,&dominant_eigenvalue,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD); uint64_t _timeEndAllReduce = CycleTime(); level->cycles.collectives += (uint64_t)(_timeEndAllReduce-_timeStartAllReduce); #endif if(level->my_rank==0){printf("eigenvalue_max<%e\n",dominant_eigenvalue);fflush(stdout);} level->dominant_eigenvalue_of_DinvA = dominant_eigenvalue; // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // exchange Dinv/L1inv/... exchange_boundary(level,VECTOR_DINV ,0); // must be 0(faces,edges,corners) for CA version exchange_boundary(level,VECTOR_L1INV,0); // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - }
u32 CSpaceRestrictionManager::accessible_nearest (ALife::_OBJECT_ID id, const Fvector &position, Fvector &result) { CRestrictionPtr client_restriction = restriction(id); VERIFY (client_restriction); return (client_restriction->accessible_nearest(position,result)); }
//------------------------------------------------------------------------------------------------------------------------------ int main(int argc, char **argv){ int my_rank=0; int num_tasks=1; int OMP_Threads = 1; //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #ifdef _OPENMP #pragma omp parallel { #pragma omp master { OMP_Threads = omp_get_num_threads(); } } #endif //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // initialize MPI and HPM #ifdef USE_MPI int actual_threading_model = -1; int requested_threading_model = -1; requested_threading_model = MPI_THREAD_SINGLE; //requested_threading_model = MPI_THREAD_FUNNELED; //requested_threading_model = MPI_THREAD_SERIALIZED; //requested_threading_model = MPI_THREAD_MULTIPLE; #ifdef _OPENMP requested_threading_model = MPI_THREAD_FUNNELED; //requested_threading_model = MPI_THREAD_SERIALIZED; //requested_threading_model = MPI_THREAD_MULTIPLE; #endif MPI_Init_thread(&argc, &argv, requested_threading_model, &actual_threading_model); MPI_Comm_size(MPI_COMM_WORLD, &num_tasks); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); #ifdef USE_HPM // IBM HPM counters for BGQ... HPM_Init(); #endif #endif // USE_MPI //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // parse the arguments... int log2_box_dim = 6; // 64^3 int target_boxes_per_rank = 1; //int64_t target_memory_per_rank = -1; // not specified int64_t box_dim = -1; int64_t boxes_in_i = -1; int64_t target_boxes = -1; if(argc==3){ log2_box_dim=atoi(argv[1]); target_boxes_per_rank=atoi(argv[2]); if(log2_box_dim>9){ // NOTE, in order to use 32b int's for array indexing, box volumes must be less than 2^31 doubles if(my_rank==0){fprintf(stderr,"log2_box_dim must be less than 10\n");} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } if(log2_box_dim<4){ if(my_rank==0){fprintf(stderr,"log2_box_dim must be at least 4\n");} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } if(target_boxes_per_rank<1){ if(my_rank==0){fprintf(stderr,"target_boxes_per_rank must be at least 1\n");} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } #ifndef MAX_COARSE_DIM #define MAX_COARSE_DIM 11 #endif box_dim=1<<log2_box_dim; target_boxes = (int64_t)target_boxes_per_rank*(int64_t)num_tasks; boxes_in_i = -1; int64_t bi; for(bi=1;bi<1000;bi++){ // search all possible problem sizes to find acceptable boxes_in_i int64_t total_boxes = bi*bi*bi; if(total_boxes<=target_boxes){ int64_t coarse_grid_dim = box_dim*bi; while( (coarse_grid_dim%2) == 0){coarse_grid_dim=coarse_grid_dim/2;} if(coarse_grid_dim<=MAX_COARSE_DIM){ boxes_in_i = bi; } } } if(boxes_in_i<1){ if(my_rank==0){fprintf(stderr,"failed to find an acceptable problem size\n");} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } } // argc==3 #if 0 else if(argc==2){ // interpret argv[1] as target_memory_per_rank char *ptr = argv[1]; char *tmp; target_memory_per_rank = strtol(ptr,&ptr,10); if(target_memory_per_rank<1){ if(my_rank==0){fprintf(stderr,"unrecognized target_memory_per_rank... '%s'\n",argv[1]);} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } tmp=strstr(ptr,"TB");if(tmp){ptr=tmp+2;target_memory_per_rank *= (uint64_t)(1<<30)*(1<<10);} tmp=strstr(ptr,"GB");if(tmp){ptr=tmp+2;target_memory_per_rank *= (uint64_t)(1<<30);} tmp=strstr(ptr,"MB");if(tmp){ptr=tmp+2;target_memory_per_rank *= (uint64_t)(1<<20);} tmp=strstr(ptr,"tb");if(tmp){ptr=tmp+2;target_memory_per_rank *= (uint64_t)(1<<30)*(1<<10);} tmp=strstr(ptr,"gb");if(tmp){ptr=tmp+2;target_memory_per_rank *= (uint64_t)(1<<30);} tmp=strstr(ptr,"mb");if(tmp){ptr=tmp+2;target_memory_per_rank *= (uint64_t)(1<<20);} if( (ptr) && (*ptr != '\0') ){ if(my_rank==0){fprintf(stderr,"unrecognized units... '%s'\n",ptr);} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } // FIX, now search for an 'acceptable' box_dim and boxes_in_i constrained by target_memory_per_rank, num_tasks, and MAX_COARSE_DIM } // argc==2 #endif else{ if(my_rank==0){fprintf(stderr,"usage: ./hpgmg-fv [log2_box_dim] [target_boxes_per_rank]\n");} //fprintf(stderr," ./hpgmg-fv [target_memory_per_rank[MB,GB,TB]]\n");} #ifdef USE_MPI MPI_Finalize(); #endif exit(0); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if(my_rank==0){ fprintf(stdout,"\n\n"); fprintf(stdout,"********************************************************************************\n"); fprintf(stdout,"*** HPGMG-FV Benchmark ***\n"); fprintf(stdout,"********************************************************************************\n"); #ifdef USE_MPI if(requested_threading_model == MPI_THREAD_MULTIPLE )fprintf(stdout,"Requested MPI_THREAD_MULTIPLE, "); else if(requested_threading_model == MPI_THREAD_SINGLE )fprintf(stdout,"Requested MPI_THREAD_SINGLE, "); else if(requested_threading_model == MPI_THREAD_FUNNELED )fprintf(stdout,"Requested MPI_THREAD_FUNNELED, "); else if(requested_threading_model == MPI_THREAD_SERIALIZED)fprintf(stdout,"Requested MPI_THREAD_SERIALIZED, "); else if(requested_threading_model == MPI_THREAD_MULTIPLE )fprintf(stdout,"Requested MPI_THREAD_MULTIPLE, "); else fprintf(stdout,"Requested Unknown MPI Threading Model (%d), ",requested_threading_model); if(actual_threading_model == MPI_THREAD_MULTIPLE )fprintf(stdout,"got MPI_THREAD_MULTIPLE\n"); else if(actual_threading_model == MPI_THREAD_SINGLE )fprintf(stdout,"got MPI_THREAD_SINGLE\n"); else if(actual_threading_model == MPI_THREAD_FUNNELED )fprintf(stdout,"got MPI_THREAD_FUNNELED\n"); else if(actual_threading_model == MPI_THREAD_SERIALIZED)fprintf(stdout,"got MPI_THREAD_SERIALIZED\n"); else if(actual_threading_model == MPI_THREAD_MULTIPLE )fprintf(stdout,"got MPI_THREAD_MULTIPLE\n"); else fprintf(stdout,"got Unknown MPI Threading Model (%d)\n",actual_threading_model); #endif fprintf(stdout,"%d MPI Tasks of %d threads\n",num_tasks,OMP_Threads); fprintf(stdout,"\n\n===== Benchmark setup ==========================================================\n"); } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // create the fine level... #ifdef USE_PERIODIC_BC int bc = BC_PERIODIC; int minCoarseDim = 2; // avoid problems with black box calculation of D^{-1} for poisson with periodic BC's on a 1^3 grid #else int bc = BC_DIRICHLET; int minCoarseDim = 1; // assumes you can drop order on the boundaries #endif level_type level_h; int ghosts=stencil_get_radius(); create_level(&level_h,boxes_in_i,box_dim,ghosts,VECTORS_RESERVED,bc,my_rank,num_tasks); #ifdef USE_HELMHOLTZ double a=1.0;double b=1.0; // Helmholtz if(my_rank==0)fprintf(stdout," Creating Helmholtz (a=%f, b=%f) test problem\n",a,b); #else double a=0.0;double b=1.0; // Poisson if(my_rank==0)fprintf(stdout," Creating Poisson (a=%f, b=%f) test problem\n",a,b); #endif double h=1.0/( (double)boxes_in_i*(double)box_dim ); // [0,1]^3 problem initialize_problem(&level_h,h,a,b); // initialize VECTOR_ALPHA, VECTOR_BETA*, and VECTOR_F rebuild_operator(&level_h,NULL,a,b); // calculate Dinv and lambda_max if(level_h.boundary_condition.type == BC_PERIODIC){ // remove any constants from the RHS for periodic problems double average_value_of_f = mean(&level_h,VECTOR_F); if(average_value_of_f!=0.0){ if(my_rank==0){fprintf(stderr," WARNING... Periodic boundary conditions, but f does not sum to zero... mean(f)=%e\n",average_value_of_f);} shift_vector(&level_h,VECTOR_F,VECTOR_F,-average_value_of_f); } } //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // create the MG hierarchy... mg_type MG_h; MGBuild(&MG_h,&level_h,a,b,minCoarseDim); // build the Multigrid Hierarchy //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // HPGMG-500 benchmark proper // evaluate performance on problem sizes of h, 2h, and 4h // (i.e. examine dynamic range for problem sizes N, N/8, and N/64) //double dtol=1e-15;double rtol= 0.0; // converged if ||D^{-1}(b-Ax)|| < dtol double dtol= 0.0;double rtol=1e-10; // converged if ||b-Ax|| / ||b|| < rtol int l; #ifndef TEST_ERROR double AverageSolveTime[3]; for(l=0;l<3;l++){ if(l>0)restriction(MG_h.levels[l],VECTOR_F,MG_h.levels[l-1],VECTOR_F,RESTRICT_CELL); bench_hpgmg(&MG_h,l,a,b,dtol,rtol); AverageSolveTime[l] = (double)MG_h.timers.MGSolve / (double)MG_h.MGSolves_performed; if(my_rank==0){fprintf(stdout,"\n\n===== Timing Breakdown =========================================================\n");} MGPrintTiming(&MG_h,l); } if(my_rank==0){ #ifdef CALIBRATE_TIMER double _timeStart=getTime();sleep(1);double _timeEnd=getTime(); double SecondsPerCycle = (double)1.0/(double)(_timeEnd-_timeStart); #else double SecondsPerCycle = 1.0; #endif fprintf(stdout,"\n\n===== Performance Summary ======================================================\n"); for(l=0;l<3;l++){ double DOF = (double)MG_h.levels[l]->dim.i*(double)MG_h.levels[l]->dim.j*(double)MG_h.levels[l]->dim.k; double seconds = SecondsPerCycle*(double)AverageSolveTime[l]; double DOFs = DOF / seconds; fprintf(stdout," h=%0.15e DOF=%0.15e time=%0.6f DOF/s=%0.3e MPI=%d OMP=%d\n",MG_h.levels[l]->h,DOF,seconds,DOFs,num_tasks,OMP_Threads); } } #endif //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if(my_rank==0){fprintf(stdout,"\n\n===== Richardson error analysis ================================================\n");} // solve A^h u^h = f^h // solve A^2h u^2h = f^2h // solve A^4h u^4h = f^4h // error analysis... MGResetTimers(&MG_h); for(l=0;l<3;l++){ if(l>0)restriction(MG_h.levels[l],VECTOR_F,MG_h.levels[l-1],VECTOR_F,RESTRICT_CELL); zero_vector(MG_h.levels[l],VECTOR_U); #ifdef USE_FCYCLES FMGSolve(&MG_h,l,VECTOR_U,VECTOR_F,a,b,dtol,rtol); #else MGSolve(&MG_h,l,VECTOR_U,VECTOR_F,a,b,dtol,rtol); #endif } richardson_error(&MG_h,0,VECTOR_U); //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if(my_rank==0){fprintf(stdout,"\n\n===== Deallocating memory ======================================================\n");} MGDestroy(&MG_h); destroy_level(&level_h); //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - if(my_rank==0){fprintf(stdout,"\n\n===== Done =====================================================================\n");} #ifdef USE_MPI #ifdef USE_HPM // IBM performance counters for BGQ... HPM_Print(); #endif MPI_Finalize(); #endif return(0); //- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - }
void CSpaceRestrictionManager::remove_border (ALife::_OBJECT_ID id) { CRestrictionPtr client_restriction = restriction(id); if (client_restriction) client_restriction->remove_border (); }
int main (int argc, const char * argv[]) { int i, j, kk; int x, y; float maxdiff; float Finalmaxdiff = 0.0; float time; FILE *fp; //get command line arguments coarse_dim = (argc > 1)? atoi(argv[1]) : MAXSIZE; solution_iter = (argc > 2)? atoi(argv[2]) : MAXITER; if (coarse_dim > MAXSIZE) coarse_dim = MAXSIZE; if ((solution_iter > MAXITER)||(solution_iter <= 0)) solution_iter = MAXITER; //accomodate the boundary conditions in size coarse_dim_with_boundary = coarse_dim + 2; fine_dim = (coarse_dim*2)+1; fine_dim_with_boundary=fine_dim+2; //calculate the coarse grid with double spacing printf("\n\n******* Fine Grid Size: %d and Number of coarse iterations: %d *******\n\n", fine_dim, solution_iter); //create the matrices grid_fine = (float* )malloc(fine_dim_with_boundary * fine_dim_with_boundary * sizeof(float)); grid_coarse = (float* )malloc(coarse_dim_with_boundary * coarse_dim_with_boundary * sizeof(float)); //Set inner values for (i=1; i<=fine_dim; i++) { for (j=1; j<=fine_dim; j++) { grid_fine[(i*fine_dim+j)-1]=0; } } //Set boundary conditions for (i=0; i<fine_dim_with_boundary; i++) { grid_fine[i]=1;// First row grid_fine[i*fine_dim_with_boundary]=1; // First column grid_fine[(i*fine_dim_with_boundary)+(fine_dim+1)]=1; // Last column grid_fine[(fine_dim_with_boundary*(fine_dim_with_boundary-1))+i]=1; // Last Row } i = 0; j = 0; for (x=0; x<coarse_dim_with_boundary; x++) { for (y=0; y<coarse_dim_with_boundary; y++) { grid_coarse[x*coarse_dim_with_boundary+y] = grid_fine[i*fine_dim_with_boundary+j]; j=j+2; } i=i+2; j=0; } time = timer(); //******************************* STEP 1 SMOOTHING ********************************************** // Step1:Smoothing via jacobi //********************************************** //********************************************** for(kk = 0; kk < v_cycles; kk++) { //printf("\nStep1 Smoothing on fine matrix: DONE\n"); jacobi(grid_fine, fine_dim_with_boundary, smoothing_iter); //printMatrix(grid_fine, fine_dim_with_boundary); //***************************** STEP 2 RESTRICTION ********************************************** //step2: Restrict the fine grid to a coarser grid in which the points are twice as far apart //restriction operator //coarse[x][y] = fine[i][j]*0.5 + (fine[i-1][j] + fine[i][j-1] + fine[i][j+1] + fine[i+1][j])* 0.125 //********************************************** //********************************************** restriction(); //printf("\nStep2 coarse grid restriction: DONE\n"); // printMatrix(grid_coarse, coarse_dim_with_boundary); //******************************** STEP 3 SOLUTION ********************************************** //step3: compute the solution to desired accuracy //********************************************** //********************************************** jacobi(grid_coarse, coarse_dim_with_boundary, solution_iter); //printf("\n\n\nStep3 %d iterations on coarse: DONE\n", solution_iter); //printMatrix(grid_coarse, coarse_dim_with_boundary); //*************************** STEP 4 INTERPOLATION ********************************************** //step4: Interpolate the coarse grid back to fine grid //********************************************** //********************************************** interpolate(); //printf("\n\n\nStep4 matrix Interpolation back to fine grid: DONE\n"); //printMatrix(grid_fine, fine_dim_with_boundary); //****************************** STEP 5: SMOOTHING ********************************************** //step5: update the fine grid for a few iterations //********************************************** //********************************************** jacobi(grid_fine, fine_dim_with_boundary, smoothing_iter); //printf("\n\n\nStep5 Final Smoothing: DONE\n"); // printMatrix(grid_fine, fine_dim_with_boundary); } Finalmaxdiff = 0.0; for (i=1; i<fine_dim_with_boundary; i++) { for (j=1; j<fine_dim_with_boundary; j++) { Finalmaxdiff = max(Finalmaxdiff, absolute(1 - grid_fine[(i*fine_dim_with_boundary)+j])); //Finalmaxdiff = max(Finalmaxdiff, absolute(newMatrix_fine[(i*fine_dim_with_boundary)+j] - grid_fine[(i*fine_dim_with_boundary)+j])); } } printf("\nFinal maxdiff: %f after %d V-cycles\n\n",Finalmaxdiff, v_cycles); time=timer()-time; printf("Elapsed time: %f\n",time/1000000.0); fp=fopen("multigrid_gauss_serial_data.txt", "wb"); if(fp==NULL) { printf("Error: can't open file.\n"); exit(0); } //save in file for (i=0; i<fine_dim_with_boundary; i++) { for(j=0; j<fine_dim_with_boundary; j++) { fprintf(fp, "%f ", grid_fine[i*fine_dim_with_boundary+j]); } fputs("\n", fp); } printf("data saved in multigrid_gauss_serial_data.txt\n"); fclose(fp); free(grid_fine); free(grid_coarse); return 0; }//end Main
//------------------------------------------------------------------------------------------------------------------------------ void rebuild_operator(level_type * level, level_type *fromLevel, double a, double b){ if(level->my_rank==0){fprintf(stdout," rebuilding 27pt CC operator for level... h=%e ",level->h);} // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // form restriction of alpha[], beta_*[] coefficients from fromLevel if(fromLevel != NULL){ restriction(level,VECTOR_ALPHA ,fromLevel,VECTOR_ALPHA ,RESTRICT_CELL ); restriction(level,VECTOR_BETA_I,fromLevel,VECTOR_BETA_I,RESTRICT_FACE_I); restriction(level,VECTOR_BETA_J,fromLevel,VECTOR_BETA_J,RESTRICT_FACE_J); restriction(level,VECTOR_BETA_K,fromLevel,VECTOR_BETA_K,RESTRICT_FACE_K); } // else case assumes alpha/beta have been set // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // exchange alpha/beta/... (must be done before calculating Dinv) exchange_boundary(level,VECTOR_ALPHA ,0); // must be 0(faces,edges,corners) for CA version or 27pt exchange_boundary(level,VECTOR_BETA_I,0); exchange_boundary(level,VECTOR_BETA_J,0); exchange_boundary(level,VECTOR_BETA_K,0); // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // calculate Dinv, L1inv, and estimate the dominant Eigenvalue uint64_t _timeStart = CycleTime(); int block; double dominant_eigenvalue = -1e9; PRAGMA_THREAD_ACROSS_BLOCKS_MAX(level,block,level->num_my_blocks,dominant_eigenvalue) for(block=0;block<level->num_my_blocks;block++){ const int box = level->my_blocks[block].read.box; const int ilo = level->my_blocks[block].read.i; const int jlo = level->my_blocks[block].read.j; const int klo = level->my_blocks[block].read.k; const int ihi = level->my_blocks[block].dim.i + ilo; const int jhi = level->my_blocks[block].dim.j + jlo; const int khi = level->my_blocks[block].dim.k + klo; int i,j,k; const int jStride = level->my_boxes[box].jStride; const int kStride = level->my_boxes[box].kStride; const int ghosts = level->my_boxes[box].ghosts; double h2inv = 1.0/(level->h*level->h); double * __restrict__ alpha = level->my_boxes[box].vectors[VECTOR_ALPHA ] + ghosts*(1+jStride+kStride); double * __restrict__ beta_i = level->my_boxes[box].vectors[VECTOR_BETA_I] + ghosts*(1+jStride+kStride); double * __restrict__ beta_j = level->my_boxes[box].vectors[VECTOR_BETA_J] + ghosts*(1+jStride+kStride); double * __restrict__ beta_k = level->my_boxes[box].vectors[VECTOR_BETA_K] + ghosts*(1+jStride+kStride); double * __restrict__ Dinv = level->my_boxes[box].vectors[VECTOR_DINV ] + ghosts*(1+jStride+kStride); double * __restrict__ L1inv = level->my_boxes[box].vectors[VECTOR_L1INV ] + ghosts*(1+jStride+kStride); double * __restrict__ valid = level->my_boxes[box].vectors[VECTOR_VALID ] + ghosts*(1+jStride+kStride); double block_eigenvalue = -1e9; for(k=klo;k<khi;k++){ for(j=jlo;j<jhi;j++){ for(i=ilo;i<ihi;i++){ int ijk = i + j*jStride + k*kStride; // radius of Gershgorin disc is the sum of the absolute values of the off-diagonal elements... double sumAbsAij = fabs(b*h2inv*6.0*STENCIL_COEF1) + fabs(b*h2inv*12.0*STENCIL_COEF2) + fabs(b*h2inv*8.0*STENCIL_COEF3); // center of Gershgorin disc is the diagonal element... double Aii = a - b*h2inv*( STENCIL_COEF0 ); Dinv[ijk] = 1.0/Aii; // inverse of the diagonal Aii //L1inv[ijk] = 1.0/(Aii+sumAbsAij); // inverse of the L1 row norm... L1inv = ( D+D^{L1} )^{-1} // as suggested by eq 6.5 in Baker et al, "Multigrid smoothers for ultra-parallel computing: additional theory and discussion"... if(Aii>=1.5*sumAbsAij)L1inv[ijk] = 1.0/(Aii ); // else L1inv[ijk] = 1.0/(Aii+0.5*sumAbsAij); // double Di = (Aii + sumAbsAij)/Aii;if(Di>block_eigenvalue)block_eigenvalue=Di; // upper limit to Gershgorin disc == bound on dominant eigenvalue }}} if(block_eigenvalue>dominant_eigenvalue){dominant_eigenvalue = block_eigenvalue;} } level->cycles.blas1 += (uint64_t)(CycleTime()-_timeStart); // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // Reduce the local estimates dominant eigenvalue to a global estimate #ifdef USE_MPI uint64_t _timeStartAllReduce = CycleTime(); double send = dominant_eigenvalue; MPI_Allreduce(&send,&dominant_eigenvalue,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD); uint64_t _timeEndAllReduce = CycleTime(); level->cycles.collectives += (uint64_t)(_timeEndAllReduce-_timeStartAllReduce); #endif if(level->my_rank==0){fprintf(stdout,"eigenvalue_max<%e\n",dominant_eigenvalue);} level->dominant_eigenvalue_of_DinvA = dominant_eigenvalue; // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // exchange Dinv/L1inv/... exchange_boundary(level,VECTOR_DINV ,0); // must be 0(faces,edges,corners) for CA version exchange_boundary(level,VECTOR_L1INV,0); // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - }