void CoupledAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Build", currentLevel); RCP<Aggregates> aggregates; { //TODO check for reuse of aggregates here // Level Get RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph"); // Build aggregates = rcp(new Aggregates(*graph)); aggregates->setObjectLabel("UC"); algo1_.CoarsenUncoupled(*graph, *aggregates); algo2_.AggregateLeftovers(*graph, *aggregates); } aggregates->AggregatesCrossProcessors(true); // Level Set Set(currentLevel, "Aggregates", aggregates); if (IsPrint(Statistics0)) { aggregates->describe(GetOStream(Statistics0, 0), getVerbLevel()); } }
void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::CheckMainDiagonal(RCP<Matrix> & Ac) const { // plausibility check: no zeros on diagonal RCP<Vector> diagVec = VectorFactory::Build(Ac->getRowMap()); Ac->getLocalDiagCopy(*diagVec); SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one(); LO lZeroDiags = 0; Teuchos::ArrayRCP< Scalar > diagVal = diagVec->getDataNonConst(0); for (size_t r = 0; r < Ac->getRowMap()->getNodeNumElements(); r++) { if (diagVal[r] == zero) { lZeroDiags++; if (repairZeroDiagonals_) { GO grid = Ac->getRowMap()->getGlobalElement(r); LO lcid = Ac->getColMap()->getLocalElement(grid); Teuchos::ArrayRCP<LO> indout(1, lcid); Teuchos::ArrayRCP<SC> valout(1, one); Ac->insertLocalValues(r, indout.view(0, indout.size()), valout.view(0, valout.size())); } } } if (IsPrint(Warnings0)) { const RCP<const Teuchos::Comm<int> > & comm = Ac->getRowMap()->getComm(); GO lZeroDiagsGO = Teuchos::as<GO>(lZeroDiags); /* LO->GO conversion */ GO gZeroDiags = 0; sumAll(comm, lZeroDiagsGO, gZeroDiags); if (repairZeroDiagonals_) GetOStream(Warnings0,0) << "RAPFactory (WARNING): repaired " << gZeroDiags << " zeros on main diagonal of Ac." << std::endl; else GetOStream(Warnings0,0) << "RAPFactory (WARNING): found " << gZeroDiags << " zeros on main diagonal of Ac." << std::endl; } }
/* * clean_string - clean up a string possibly containing garbage * * *sigh* Before the kiddies find this new and exciting way of * annoying opers, lets clean up what is sent to local opers * -Dianora */ char * clean_string(char *dest, const unsigned char *src, ssize_t len) { char *d = dest; assert(0 != dest); assert(0 != src); if (dest == NULL || src == NULL) return NULL; len -= 3; /* allow for worst case, '^A\0' */ while (*src && (len > 0)) { if (*src & 0x80) /* if high bit is set */ *d++ = '.'; else if (!IsPrint(*src)) /* if NOT printable */ { *d++ = '^'; --len; *d++ = 0x40 + *src; /* turn it into a printable */ } else *d++ = *src; ++src, --len; } *d = '\0'; return dest; }
void mutt_display_sanitize (char *s) { for (; *s; s++) { if (!IsPrint (*s)) *s = '?'; } }
/*! @brief Constructor @param[in] object Reference to the class instance that is creating this SubMonitor. @param[in] msg String that indicates what the SubMonitor is monitoring, e.g., "Build" @param[in] level The MueLu Level object. @param[in] msgLevel Governs whether information should be printed. @param[in] timerLevel Governs whether timing information should be *gathered*. Setting this to NoTimeReport prevents the creation of timers. */ SubFactoryMonitor(const BaseClass& object, const std::string & msg, const Level & level, MsgType msgLevel = Runtime1, MsgType timerLevel = Timings1) : SubMonitor(object, msg, msgLevel, timerLevel) { if (IsPrint(TimingsByLevel)) { levelTimeMonitor_ = rcp(new TimeMonitor(object, object.ShortClassName() + ": " + msg + " (sub, total, level=" + Teuchos::Utils::toString(level.GetLevelID()) + ")", timerLevel)); } }
void DrawCharacters(Page *page) { size_t start = page->start; size_t i; for (i = start; i < page->end; i++) { short x = character_positions[i].x; short y = character_positions[i].y; if (IsPrint(text[i])) { draw: ; XChar2b font_code; XFontStruct *font = SelectFont(text[i], &font_code); GC gc = XCreateGC(disp, back_buffer, 0, NULL); XCopyGC(disp, default_gc, GCForeground | GCBackground, gc); XSetFont(disp, gc, font->fid); XDrawString16(disp, back_buffer, gc, x, y + (font->ascent - default_font->ascent), &font_code, 1); XFreeGC(disp, gc); } else { if (EqAscii2b(text[i], '\n')) { // DOWNWARDS ARROW WITH TIP LEFTWARDS XChar2b symbol = { .byte1 = 0x21, .byte2 = 0xb2 }; XDrawString16(disp, back_buffer, control_gc, x, y, &symbol, 1); } else if (EqAscii2b(text[i], '\t')) { ; } else { goto draw; } } }
void mutt_safe_path (char *s, size_t l, ADDRESS *a) { char *p; mutt_save_path (s, l, a); for (p = s; *p; p++) if (*p == '/' || ISSPACE (*p) || !IsPrint ((unsigned char) *p)) *p = '_'; }
static void clean_error_buf(void) { char *s; for(s = Errorbuf; *s; s++) { if(!IsPrint(*s)) *s = '.'; } }
/*! @brief Constructor @param[in] object Reference to the class instance that is creating this SubMonitor. @param[in] msg String that indicates what the SubMonitor is monitoring, e.g., "Build". @param[in] level The MueLu Level object. @param[in] msgLevel Governs whether information should be printed. @param[in] timerLevel Governs whether timing information should be *gathered*. Setting this to NoTimeReport prevents the creation of timers. TODO: code factorization */ FactoryMonitor(const BaseClass& object, const std::string & msg, const Level & level, MsgType msgLevel = static_cast<MsgType>(Test | Runtime0), MsgType timerLevel = Timings0) : Monitor(object, msg, msgLevel, timerLevel), timerMonitorExclusive_(object, object.ShortClassName() + " : " + msg, timerLevel) { if (IsPrint(TimingsByLevel)) { levelTimeMonitor_ = rcp(new TimeMonitor(object, object.ShortClassName() + ": " + msg + " (total, level=" + Teuchos::Utils::toString(level.GetLevelID()) + ")", timerLevel)); levelTimeMonitorExclusive_ = rcp(new MutuallyExclusiveTimeMonitor<Level>(object, object.ShortClassName() + " " + MUELU_TIMER_AS_STRING + " : " + msg + " (level=" + Teuchos::Utils::toString(level.GetLevelID()) + ")", timerLevel)); } }
Teuchos::FancyOStream & VerboseObject::GetOStream(MsgType type, int thisProcRankOnly) const { if (!IsPrint(type, thisProcRankOnly)) return *blackHole_; Teuchos::FancyOStream& os = *getOStream(); if (!(type & ((Extreme | Test) ^ Warnings))) os << "\n******* WARNING *******" << std::endl; return os; }
/*! @brief Constructor @param[in] object Reference to the class instance that is creating this MutuallyExclusiveTimeMonitor. @param[in] msg String that indicates what the Monitor is monitoring, e.g., "Build" @param[in] timerLevel Governs whether timing information should be *gathered*. Setting this to NoTimeReport prevents the creation of timers. */ MutuallyExclusiveTimeMonitor(const BaseClass& object, const std::string& msg, MsgType timerLevel = Timings0) { // Inherit verbosity from 'object' SetVerbLevel(object.GetVerbLevel()); setOStream(object.getOStream()); if (IsPrint(timerLevel) && /* disable timer if never printed: */ (IsPrint(RuntimeTimings) || (!IsPrint(NoTimeReport)))) { if (!IsPrint(NoTimeReport)) { timer_ = MutuallyExclusiveTime<TagName>::getNewTimer("MueLu: " + msg /*+ " (MutuallyExclusive)" */); } else { timer_ = rcp(new MutuallyExclusiveTime<TagName> ("MueLu: " + msg /*+ " (MutuallyExclusive)" */)); } timer_->start(); timer_->incrementNumCalls(); } }
TimeMonitor(const BaseClass& object, const std::string& msg, MsgType timerLevel = Timings0) { // Inherit verbosity from 'object' SetVerbLevel(object.GetVerbLevel()); setOStream(object.getOStream()); if (IsPrint(timerLevel) && /* disable timer if never printed: */ (IsPrint(RuntimeTimings) || (!IsPrint(NoTimeReport)))) { if (!IsPrint(NoTimeReport)) { // TODO: there is no function to register a timer in Teuchos::TimeMonitor after the creation of the timer. But would be useful... timer_ = Teuchos::TimeMonitor::getNewTimer("MueLu: " + msg); } else { timer_ = rcp(new Teuchos::Time("MueLu: " + msg)); } // Start the timer (this is what is done by Teuchos::TimeMonitor) timer_->start(); timer_->incrementNumCalls(); } }
//! Constructor PrintMonitor(const BaseClass& object, const std::string& msg, MsgType msgLevel = Runtime0) { // Inherit verbosity from 'object' SetVerbLevel(object.GetVerbLevel()); setOStream(object.getOStream()); // Print description and new indent if (IsPrint(msgLevel)) { GetOStream(msgLevel, 0) << msg << std::endl; tab_ = rcp(new Teuchos::OSTab(getOStream())); } }
void UserPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildP(Level& fineLevel, Level& coarseLevel) const { FactoryMonitor m(*this, "Build", coarseLevel); RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A"); RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace"); TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() != 1, Exceptions::RuntimeError, "Block size > 1 has not been implemented"); const Teuchos::ParameterList& pL = GetParameterList(); std::string mapFile = pL.get<std::string>("mapFileName"); RCP<const Map> rowMap = A->getRowMap(); RCP<const Map> coarseMap = Utils2::ReadMap(mapFile, rowMap->lib(), rowMap->getComm()); Set(coarseLevel, "CoarseMap", coarseMap); std::string matrixFile = pL.get<std::string>("matrixFileName"); RCP<Matrix> P = Utils::Read(matrixFile, rowMap, coarseMap, coarseMap, rowMap); #if 1 Set(coarseLevel, "P", P); #else // Expand column map by 1 RCP<Matrix> P1 = Utils::Multiply(*A, false, *P, false); P = Utils::Read(matrixFile, rowMap, P1->getColMap(), coarseMap, rowMap); Set(coarseLevel, "P", P); #endif RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors()); P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(), Teuchos::ScalarTraits<SC>::zero()); Set(coarseLevel, "Nullspace", coarseNullspace); // Coordinates transfer size_t n = Teuchos::as<size_t>(sqrt(coarseMap->getGlobalNumElements())); TEUCHOS_TEST_FOR_EXCEPTION(n*n != coarseMap->getGlobalNumElements(), Exceptions::RuntimeError, "Unfortunately, this is not the case, don't know what to do"); RCP<MultiVector> coarseCoords = MultiVectorFactory::Build(coarseMap, 2); ArrayRCP<Scalar> x = coarseCoords->getDataNonConst(0), y = coarseCoords->getDataNonConst(1); for (size_t LID = 0; LID < coarseMap->getNodeNumElements(); ++LID) { GlobalOrdinal GID = coarseMap->getGlobalElement(LID) - coarseMap->getIndexBase(); GlobalOrdinal i = GID % n, j = GID/n; x[LID] = i; y[LID] = j; } Set(coarseLevel, "Coordinates", coarseCoords); if (IsPrint(Statistics1)) { RCP<ParameterList> params = rcp(new ParameterList()); params->set("printLoadBalancingInfo", true); GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params); } }
~TimeMonitor() { if (timer_ != Teuchos::null) { // Stop the timer timer_->stop(); if (IsPrint(RuntimeTimings)) { //FIXME: creates lot of barriers. An option to report time of proc0 only instead would be nice //FIXME: MPI_COMM_WORLD only... BTW, it is also the case in Teuchos::TimeMonitor... // // mfh 11 Nov 2012: Actually, Teuchos::TimeMonitor::summarize() has multiple overloads that take a Teuchos::Comm. ArrayRCP<double> stats = ReduceMaxMinAvg(timer_->totalElapsedTime(), *Teuchos::DefaultComm<int>::getComm ()); //FIXME: Not very important for now, but timer will be printed even if verboseLevel of Monitor/Object changed // between Monitor constructor and destructor. GetOStream(RuntimeTimings, 0) << "Timer: " << " max=" << stats[0] << " min=" << stats[1] << " avg=" << stats[2] << std::endl; } } }
static int exec(FILE* fp, ENC_INFO* einfo) { #define NCOL 8 int c, val, enc; enc = einfo->num; fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n", einfo->name); for (c = 0; c < 256; c++) { val = 0; if (IsNewline(enc, c)) val |= BIT_CTYPE_NEWLINE; if (IsAlpha (enc, c)) val |= (BIT_CTYPE_ALPHA | BIT_CTYPE_ALNUM); if (IsBlank (enc, c)) val |= BIT_CTYPE_BLANK; if (IsCntrl (enc, c)) val |= BIT_CTYPE_CNTRL; if (IsDigit (enc, c)) val |= (BIT_CTYPE_DIGIT | BIT_CTYPE_ALNUM); if (IsGraph (enc, c)) val |= BIT_CTYPE_GRAPH; if (IsLower (enc, c)) val |= BIT_CTYPE_LOWER; if (IsPrint (enc, c)) val |= BIT_CTYPE_PRINT; if (IsPunct (enc, c)) val |= BIT_CTYPE_PUNCT; if (IsSpace (enc, c)) val |= BIT_CTYPE_SPACE; if (IsUpper (enc, c)) val |= BIT_CTYPE_UPPER; if (IsXDigit(enc, c)) val |= BIT_CTYPE_XDIGIT; if (IsWord (enc, c)) val |= BIT_CTYPE_WORD; if (IsAscii (enc, c)) val |= BIT_CTYPE_ASCII; if (c % NCOL == 0) fputs(" ", fp); fprintf(fp, "0x%04x", val); if (c != 255) fputs(",", fp); if (c != 0 && c % NCOL == (NCOL-1)) fputs("\n", fp); else fputs(" ", fp); } fprintf(fp, "};\n"); return 0; }
void LocalAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::CoarsenUncoupled(GraphBase const & graph, Aggregates & aggregates) const { Monitor m(*this, "Coarsen Uncoupled"); std::string orderingType; switch(ordering_) { case NATURAL: orderingType="Natural"; break; case RANDOM: orderingType="Random"; break; case GRAPH: orderingType="Graph"; break; default: break; } GetOStream(Runtime1) << "Ordering: " << orderingType << std::endl; GetOStream(Runtime1) << "Min nodes per aggregate: " << minNodesPerAggregate_ << std::endl; GetOStream(Runtime1) << "Max nbrs already selected: " << maxNeighAlreadySelected_ << std::endl; /* Create Aggregation object */ my_size_t nAggregates = 0; /* ============================================================= */ /* aggStat indicates whether this node has been aggreated, and */ /* vertex2AggId stores the aggregate number where this node has */ /* been aggregated into. */ /* ============================================================= */ Teuchos::ArrayRCP<NodeState> aggStat; const my_size_t nRows = graph.GetNodeNumVertices(); if (nRows > 0) aggStat = Teuchos::arcp<NodeState>(nRows); for ( my_size_t i = 0; i < nRows; ++i ) aggStat[i] = READY; /* ============================================================= */ /* Phase 1 : */ /* for all nodes, form a new aggregate with its neighbors */ /* if the number of its neighbors having been aggregated does */ /* not exceed a given threshold */ /* (GetMaxNeighAlreadySelected() = 0 ===> Vanek's scheme) */ /* ============================================================= */ /* some general variable declarations */ Teuchos::ArrayRCP<LO> randomVector; RCP<MueLu::LinkedList> nodeList; /* list storing the next node to pick as a root point for ordering_ == GRAPH */ MueLu_SuperNode *aggHead=NULL, *aggCurrent=NULL, *supernode=NULL; /**/ if ( ordering_ == RANDOM ) /* random ordering */ { //TODO: could be stored in a class that respect interface of LinkedList randomVector = Teuchos::arcp<LO>(nRows); //size_t or int ?-> to be propagated for (my_size_t i = 0; i < nRows; ++i) randomVector[i] = i; RandomReorder(randomVector); } else if ( ordering_ == GRAPH ) /* graph ordering */ { nodeList = rcp(new MueLu::LinkedList()); nodeList->Add(0); } /* main loop */ { LO iNode = 0; LO iNode2 = 0; Teuchos::ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); // output only: contents ignored while (iNode2 < nRows) { /*------------------------------------------------------ */ /* pick the next node to aggregate */ /*------------------------------------------------------ */ if ( ordering_ == NATURAL ) iNode = iNode2++; else if ( ordering_ == RANDOM ) iNode = randomVector[iNode2++]; else if ( ordering_ == GRAPH ) { if ( nodeList->IsEmpty() ) { for ( int jNode = 0; jNode < nRows; ++jNode ) { if ( aggStat[jNode] == READY ) { nodeList->Add(jNode); //TODO optim: not necessary to create a node. Can just set iNode value and skip the end break; } } } if ( nodeList->IsEmpty() ) break; /* end of the while loop */ //TODO: coding style :( iNode = nodeList->Pop(); } else { throw(Exceptions::RuntimeError("CoarsenUncoupled: bad aggregation ordering option")); } /*------------------------------------------------------ */ /* consider further only if the node is in READY mode */ /*------------------------------------------------------ */ if ( aggStat[iNode] == READY ) { // neighOfINode is the neighbor node list of node 'iNode'. Teuchos::ArrayView<const LO> neighOfINode = graph.getNeighborVertices(iNode); typename Teuchos::ArrayView<const LO>::size_type length = neighOfINode.size(); supernode = new MueLu_SuperNode; try { supernode->list = Teuchos::arcp<int>(length+1); } catch (std::bad_alloc&) { TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::LocalAggregationAlgorithm::CoarsenUncoupled(): Error: couldn't allocate memory for supernode! length=" + Teuchos::toString(length)); } supernode->maxLength = length; supernode->length = 1; supernode->list[0] = iNode; int selectFlag = 1; { /*--------------------------------------------------- */ /* count the no. of neighbors having been aggregated */ /*--------------------------------------------------- */ int count = 0; for (typename Teuchos::ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int index = *it; if ( index < nRows ) { if ( aggStat[index] == READY || aggStat[index] == NOTSEL ) supernode->list[supernode->length++] = index; else count++; } } /*--------------------------------------------------- */ /* if there are too many neighbors aggregated or the */ /* number of nodes in the new aggregate is too few, */ /* don't do this one */ /*--------------------------------------------------- */ if ( count > GetMaxNeighAlreadySelected() ) selectFlag = 0; } // Note: the supernode length is actually 1 more than the // number of nodes in the candidate aggregate. The // root is counted twice. I'm not sure if this is // a bug or a feature ... so I'll leave it and change // < to <= in the if just below. if (selectFlag != 1 || supernode->length <= GetMinNodesPerAggregate()) { aggStat[iNode] = NOTSEL; delete supernode; if ( ordering_ == GRAPH ) /* if graph ordering */ { for (typename Teuchos::ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int index = *it; if ( index < nRows && aggStat[index] == READY ) { nodeList->Add(index); } } } } else { aggregates.SetIsRoot(iNode); for ( int j = 0; j < supernode->length; ++j ) { int jNode = supernode->list[j]; aggStat[jNode] = SELECTED; vertex2AggId[jNode] = nAggregates; if ( ordering_ == GRAPH ) /* if graph ordering */ { Teuchos::ArrayView<const LO> neighOfJNode = graph.getNeighborVertices(jNode); for (typename Teuchos::ArrayView<const LO>::const_iterator it = neighOfJNode.begin(); it != neighOfJNode.end(); ++it) { int index = *it; if ( index < nRows && aggStat[index] == READY ) { nodeList->Add(index); } } } } supernode->next = NULL; supernode->index = nAggregates; if ( nAggregates == 0 ) { aggHead = supernode; aggCurrent = supernode; } else { aggCurrent->next = supernode; aggCurrent = supernode; } nAggregates++; // unused aggCntArray[nAggregates] = supernode->length; } } } // end of 'for' // views on distributed vectors are freed here. } // end of 'main loop' nodeList = Teuchos::null; /* Update aggregate object */ aggregates.SetNumAggregates(nAggregates); /* Verbose */ { const RCP<const Teuchos::Comm<int> > & comm = graph.GetComm(); if (IsPrint(Warnings0)) { GO localReady=0, globalReady; // Compute 'localReady' for ( my_size_t i = 0; i < nRows; ++i ) if (aggStat[i] == READY) localReady++; // Compute 'globalReady' sumAll(comm, localReady, globalReady); if(globalReady > 0) GetOStream(Warnings0) << "Warning: " << globalReady << " READY nodes left" << std::endl; } if (IsPrint(Statistics1)) { // Compute 'localSelected' LO localSelected=0; for ( my_size_t i = 0; i < nRows; ++i ) if ( aggStat[i] == SELECTED ) localSelected++; // Compute 'globalSelected' GO globalSelected; sumAll(comm, (GO)localSelected, globalSelected); // Compute 'globalNRows' GO globalNRows; sumAll(comm, (GO)nRows, globalNRows); GetOStream(Statistics1) << "Nodes aggregated = " << globalSelected << " (" << globalNRows << ")" << std::endl; } if (IsPrint(Statistics1)) { GO nAggregatesGlobal; sumAll(comm, (GO)nAggregates, nAggregatesGlobal); GetOStream(Statistics1) << "Total aggregates = " << nAggregatesGlobal << std::endl; } } // verbose /* ------------------------------------------------------------- */ /* clean up */ /* ------------------------------------------------------------- */ aggCurrent = aggHead; while ( aggCurrent != NULL ) { supernode = aggCurrent; aggCurrent = aggCurrent->next; delete supernode; } } // CoarsenUncoupled
// 次のページの開始位置、あるいは文書の終端 (== text_length) を返す。 size_t FillPage(size_t start, Page *page, bool draw) { XWindowAttributes attrs; XGetWindowAttributes(disp, win, &attrs); // ページのサイズ。 const int LEFT_MARGIN = 50; const int RIGHT_MARGIN = attrs.width - LEFT_MARGIN; const int TOP_MARGIN = 50; const int BOTTOM_MARGIN = attrs.height - TOP_MARGIN; const XChar2b sp = { 0x00, 0x21 }; const int EM = GetCharWidth(sp); // 行の高さ。 const int LINE_HEIGHT = 22; // 現在の文字の描画位置。 int x = LEFT_MARGIN, y = TOP_MARGIN + font->ascent; size_t i; for (i = start; i < text_length; i++) { // カーソルの描画 if (draw && i == cursor_position) { XFillRectangle(disp, win, cursor_gc, x, y - font->ascent, CURSOR_WIDTH, font->ascent + font->descent); } if (IsPrint(text[i])) { // 印字可能文字の場合。 int width = GetCharWidth(text[i]); // この文字を描画すると右マージンにかかるようなら改行する。 // ただし、行頭に居る場合は改行しない。 if ( x + width > RIGHT_MARGIN && !ForbiddenAtStart(text[i]) && // 行頭禁止文字ならばぶらさげる x != LEFT_MARGIN ) { y += LINE_HEIGHT; x = LEFT_MARGIN; // ページにも収まらない場合、この位置で終了する。 if (y + font->descent > BOTTOM_MARGIN) { page->start = start; page->end = i; return i; } } if (draw) XDrawString16(disp, win, gc, x, y, &text[i], 1); x += width; } else { // ラインフィードで改行する。 if (EqAscii2b(text[i], '\n')) { if (draw) { // DOWNWARDS ARROW WITH TIP LEFTWARDS XChar2b symbol = { .byte1 = 0x21, .byte2 = 0xb2 }; XDrawString16(disp, win, control_gc, x, y, &symbol, 1); } y += LINE_HEIGHT; x = LEFT_MARGIN; // ページにも収まらない場合、次の位置で終了する。 // ページ区切り位置での改行は持ち越さない。 if (y + font->descent > BOTTOM_MARGIN) { page->start = start; page->end = i + 1; return i + 1; } } else if (EqAscii2b(text[i], '\t')) { int tab = EM * 8; x = LEFT_MARGIN + (((x - LEFT_MARGIN) / tab) + 1) * tab; } } } if (draw && i == cursor_position) { XFillRectangle(disp, win, cursor_gc, x, y - font->ascent, CURSOR_WIDTH, font->ascent + font->descent); } if (draw) XDrawString(disp, win, control_gc, x, y, "[EOF]", 5); // 全てのテキストを配置した。 page->start = start; page->end = text_length; return text_length; }
void UncoupledAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Build", currentLevel); const ParameterList& pL = GetParameterList(); bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed bool bUseOnePtAggregationAlgorithm = pL.get<bool>("UseOnePtAggregationAlgorithm"); bool bUseSmallAggregationAlgorithm = pL.get<bool>("UseSmallAggregatesAggregationAlgorithm"); bool bUsePreserveDirichletAggregationAlgorithm = pL.get<bool>("UsePreserveDirichletAggregationAlgorithm"); bool bUseUncoupledAggregationAglorithm = pL.get<bool>("UseUncoupledAggregationAlgorithm"); bool bUseMaxLinkAggregationAlgorithm = pL.get<bool>("UseMaxLinkAggregationAlgorithm"); bool bUseIsolatedNodeAggregationAglorithm = pL.get<bool>("UseIsolatedNodeAggregationAlgorithm"); bool bUseEmergencyAggregationAlgorithm = pL.get<bool>("UseEmergencyAggregationAlgorithm"); // define aggregation algorithms RCP<const FactoryBase> graphFact = GetFactory("Graph"); // TODO Can we keep different aggregation algorithms over more Build calls? algos_.clear(); if (bUseOnePtAggregationAlgorithm) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); if (bUseSmallAggregationAlgorithm) algos_.push_back(rcp(new SmallAggregationAlgorithm (graphFact))); if (bUseUncoupledAggregationAglorithm) algos_.push_back(rcp(new UncoupledAggregationAlgorithm (graphFact))); if (bUseMaxLinkAggregationAlgorithm) algos_.push_back(rcp(new MaxLinkAggregationAlgorithm (graphFact))); if (bUsePreserveDirichletAggregationAlgorithm) algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm (graphFact))); if (bUseIsolatedNodeAggregationAglorithm) algos_.push_back(rcp(new IsolatedNodeAggregationAlgorithm (graphFact))); if (bUseEmergencyAggregationAlgorithm) algos_.push_back(rcp(new EmergencyAggregationAlgorithm (graphFact))); std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name"), mapSmallAggName = pL.get<std::string>("SmallAgg aggregate map name"); RCP<const Map> OnePtMap, SmallAggMap; if (mapOnePtName.length()) { RCP<const FactoryBase> mapOnePtFact = GetFactory("OnePt aggregate map factory"); OnePtMap = currentLevel.Get<RCP<const Map> >(mapOnePtName, mapOnePtFact.get()); } if (mapSmallAggName.length()) { RCP<const FactoryBase> mapSmallAggFact = GetFactory("SmallAgg aggregate map factory"); SmallAggMap = currentLevel.Get<RCP<const Map> >(mapSmallAggName, mapSmallAggFact.get()); } RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph"); // Build RCP<Aggregates> aggregates = rcp(new Aggregates(*graph)); aggregates->setObjectLabel("UC"); const LO nRows = graph->GetNodeNumVertices(); // construct aggStat information std::vector<unsigned> aggStat(nRows, NodeStats::READY); ArrayRCP<const bool> dirichletBoundaryMap = graph->GetBoundaryNodeMap(); if (dirichletBoundaryMap != Teuchos::null) { for (LO i = 0; i < nRows; i++) if (dirichletBoundaryMap[i] == true) aggStat[i] = NodeStats::BOUNDARY; } LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode"); GO indexBase = graph->GetDomainMap()->getIndexBase(); if (SmallAggMap != Teuchos::null || OnePtMap != Teuchos::null) { for (LO i = 0; i < nRows; i++) { // reconstruct global row id (FIXME only works for contiguous maps) GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; if (SmallAggMap != null) { for (LO kr = 0; kr < nDofsPerNode; kr++) { if (SmallAggMap->isNodeGlobalElement(grid + kr)) aggStat[i] = MueLu::NodeStats::SMALLAGG; } } if (OnePtMap != null) { for (LO kr = 0; kr < nDofsPerNode; kr++) { if (OnePtMap->isNodeGlobalElement(grid + kr)) aggStat[i] = MueLu::NodeStats::ONEPT; } } } } const RCP<const Teuchos::Comm<int> > comm = graph->GetComm(); GO numGlobalRows = 0; if (IsPrint(Statistics1)) sumAll(comm, as<GO>(nRows), numGlobalRows); LO numNonAggregatedNodes = nRows; GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; for (size_t a = 0; a < algos_.size(); a++) { std::string phase = algos_[a]->description(); SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel); algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); if (IsPrint(Statistics1)) { GO numLocalAggregated = nRows - numNonAggregatedNodes, numGlobalAggregated = 0; GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; sumAll(comm, numLocalAggregated, numGlobalAggregated); sumAll(comm, numLocalAggs, numGlobalAggs); double aggPercent = 100*as<double>(numGlobalAggregated)/as<double>(numGlobalRows); GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; numGlobalAggregatedPrev = numGlobalAggregated; numGlobalAggsPrev = numGlobalAggs; } } TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); aggregates->AggregatesCrossProcessors(false); Set(currentLevel, "Aggregates", aggregates); GetOStream(Statistics0) << aggregates->description() << std::endl; }
void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::CheckRepairMainDiagonal(RCP<Matrix>& Ac) const { const Teuchos::ParameterList& pL = GetParameterList(); bool repairZeroDiagonals = pL.get<bool>("RepairMainDiagonal"); bool checkAc = pL.get<bool>("CheckMainDiagonal"); if (!checkAc && !repairZeroDiagonals) return; SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one(); Teuchos::RCP<Teuchos::ParameterList> p = Teuchos::rcp(new Teuchos::ParameterList()); p->set("DoOptimizeStorage", true); RCP<const Map> rowMap = Ac->getRowMap(); RCP<Vector> diagVec = VectorFactory::Build(rowMap); Ac->getLocalDiagCopy(*diagVec); LO lZeroDiags = 0; Teuchos::ArrayRCP< Scalar > diagVal = diagVec->getDataNonConst(0); for (size_t i = 0; i < rowMap->getNodeNumElements(); i++) { if (diagVal[i] == zero) { lZeroDiags++; } } GO gZeroDiags; MueLu_sumAll(rowMap->getComm(), Teuchos::as<GO>(lZeroDiags), gZeroDiags); if (repairZeroDiagonals && gZeroDiags > 0) { // TAW: If Ac has empty rows, put a 1 on the diagonal of Ac. Be aware that Ac might have empty rows AND columns. // The columns might not exist in the column map at all. // // It would be nice to add the entries to the original matrix Ac. But then we would have to use // insertLocalValues. However we cannot add new entries for local column indices that do not exist in the column map // of Ac (at least Epetra is not able to do this). // // Here we build a diagonal matrix with zeros on the diagonal and ones on the diagonal for the rows where Ac has empty rows // We have to build a new matrix to be able to use insertGlobalValues. Then we add the original matrix Ac to our new block // diagonal matrix and use the result as new (non-singular) matrix Ac. // This is very inefficient. // // If you know something better, please let me know. RCP<Matrix> fixDiagMatrix = Teuchos::null; fixDiagMatrix = MatrixFactory::Build(rowMap, 1); for (size_t r = 0; r < rowMap->getNodeNumElements(); r++) { if (diagVal[r] == zero) { GO grid = rowMap->getGlobalElement(r); Teuchos::ArrayRCP<GO> indout(1,grid); Teuchos::ArrayRCP<SC> valout(1, one); fixDiagMatrix->insertGlobalValues(grid,indout.view(0, 1), valout.view(0, 1)); } } { Teuchos::TimeMonitor m1(*Teuchos::TimeMonitor::getNewTimer("CheckRepairMainDiagonal: fillComplete1")); Ac->fillComplete(p); } MueLu::Utils2<Scalar, LocalOrdinal, GlobalOrdinal, Node>::TwoMatrixAdd(*Ac, false, 1.0, *fixDiagMatrix, 1.0); if (Ac->IsView("stridedMaps")) fixDiagMatrix->CreateView("stridedMaps", Ac); Ac = Teuchos::null; // free singular coarse level matrix Ac = fixDiagMatrix; // set fixed non-singular coarse level matrix } // call fillComplete with optimized storage option set to true // This is necessary for new faster Epetra MM kernels. { Teuchos::TimeMonitor m1(*Teuchos::TimeMonitor::getNewTimer("CheckRepairMainDiagonal: fillComplete2")); Ac->fillComplete(p); } // print some output if (IsPrint(Warnings0)) GetOStream(Warnings0) << "RAPFactory (WARNING): " << (repairZeroDiagonals ? "repaired " : "found ") << gZeroDiags << " zeros on main diagonal of Ac." << std::endl; #ifdef HAVE_MUELU_DEBUG // only for debugging // check whether Ac has been repaired... Ac->getLocalDiagCopy(*diagVec); Teuchos::ArrayRCP< Scalar > diagVal2 = diagVec->getDataNonConst(0); for (size_t r = 0; r < Ac->getRowMap()->getNodeNumElements(); r++) { if (diagVal2[r] == zero) { GetOStream(Errors,-1) << "Error: there are zeros left on diagonal after repair..." << std::endl; break; } } #endif }
static lexicon Get() { lexicon c; register lexicon *pC = &c; register int character; if (!Empty) { *pC = lifo[rp]; rp++; if (rp == sizeof lifo/sizeof (lexicon)) { rp = 0; } if (rp == wp) { Empty = 1; } Full = 0; } else { character = GetC(); switch (character) { case EOF: pC->type = LEX_END_OF_FILE; break; case '^': character = GetC(); if (!IsPrint(character)) { pC->type = LEX_ILLEGAL; } else { pC->type = LEX_CARETED; if (character == '?') { character |= 0x40; /* rubout */ } else { character &= 0x1f; } } break; case '\\': character = GetC(); if (!IsPrint(character)) { pC->type = LEX_ILLEGAL; } else { pC->type = LEX_ESCAPED; switch (character) { case 'E': case 'e': character = ESCAPE; break; case 't': character = TAB; break; case 'n': character = NEWLINE; break; case 'r': character = CARRIAGE_RETURN; break; default: pC->type = LEX_ILLEGAL; break; } } break; default: if ((IsPrint(character)) || isspace(character)) { pC->type = LEX_CHAR; } else { pC->type = LEX_ILLEGAL; } break; } pC->value = character; } return(*pC); }
void SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildP(Level &fineLevel, Level &coarseLevel) const { FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel); std::ostringstream levelstr; levelstr << coarseLevel.GetLevelID(); typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude; // Get default tentative prolongator factory // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory. // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere! RCP<const FactoryBase> initialPFact = GetFactory("P"); if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); } // Level Get RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A"); RCP<Matrix> Ptent = coarseLevel.Get< RCP<Matrix> >("P", initialPFact.get()); if(restrictionMode_) { SubFactoryMonitor m2(*this, "Transpose A", coarseLevel); A = Utils2::Transpose(*A, true); // build transpose of A explicitely } //Build final prolongator RCP<Matrix> finalP; // output const ParameterList & pL = GetParameterList(); Scalar dampingFactor = as<Scalar>(pL.get<double>("sa: damping factor")); LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations")); bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate"); if (dampingFactor != Teuchos::ScalarTraits<Scalar>::zero()) { Scalar lambdaMax; { SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel); lambdaMax = A->GetMaxEigenvalueEstimate(); if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) { GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = "<< maxEigenIterations << ")" << std::endl; Magnitude stopTol = 1e-4; lambdaMax = Utils::PowerMethod(*A, true, maxEigenIterations, stopTol); A->SetMaxEigenvalueEstimate(lambdaMax); } else { GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl; } GetOStream(Statistics0) << "Prolongator damping factor = " << dampingFactor/lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl; } { SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel); Teuchos::RCP<Vector> invDiag = Utils::GetMatrixDiagonalInverse(*A); SC omega = dampingFactor / lambdaMax; // finalP = Ptent + (I - \omega D^{-1}A) Ptent finalP = Utils::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2),std::string("MueLu::SaP-")+levelstr.str()); } } else { finalP = Ptent; } // Level Set if (!restrictionMode_) { // prolongation factory is in prolongation mode Set(coarseLevel, "P", finalP); // NOTE: EXPERIMENTAL if (Ptent->IsView("stridedMaps")) finalP->CreateView("stridedMaps", Ptent); } else { // prolongation factory is in restriction mode RCP<Matrix> R = Utils2::Transpose(*finalP, true); // use Utils2 -> specialization for double Set(coarseLevel, "R", R); // NOTE: EXPERIMENTAL if (Ptent->IsView("stridedMaps")) R->CreateView("stridedMaps", Ptent, true); } if (IsPrint(Statistics1)) { RCP<ParameterList> params = rcp(new ParameterList()); params->set("printLoadBalancingInfo", true); params->set("printCommInfo", true); GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*finalP, (!restrictionMode_ ? "P" : "R"), params); } } //Build()
void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& fineLevel, Level& coarseLevel) const { { FactoryMonitor m(*this, "Computing Ac", coarseLevel); std::ostringstream levelstr; levelstr << coarseLevel.GetLevelID(); TEUCHOS_TEST_FOR_EXCEPTION(hasDeclaredInput_==false, Exceptions::RuntimeError, "MueLu::RAPFactory::Build(): CallDeclareInput has not been called before Build!"); // Set "Keeps" from params const Teuchos::ParameterList& pL = GetParameterList(); if (pL.get<bool>("Keep AP Pattern")) coarseLevel.Keep("AP Pattern", this); if (pL.get<bool>("Keep RAP Pattern")) coarseLevel.Keep("RAP Pattern", this); RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A"); RCP<Matrix> P = Get< RCP<Matrix> >(coarseLevel, "P"), AP, Ac; // Reuse pattern if available (multiple solve) if (coarseLevel.IsAvailable("AP Pattern", this)) { GetOStream(Runtime0) << "Ac: Using previous AP pattern" << std::endl; AP = Get< RCP<Matrix> >(coarseLevel, "AP Pattern"); } { SubFactoryMonitor subM(*this, "MxM: A x P", coarseLevel); AP = Utils::Multiply(*A, false, *P, false, AP, GetOStream(Statistics2),true,true,std::string("MueLu::A*P-")+levelstr.str()); } if (pL.get<bool>("Keep AP Pattern")) Set(coarseLevel, "AP Pattern", AP); // Reuse coarse matrix memory if available (multiple solve) if (coarseLevel.IsAvailable("RAP Pattern", this)) { GetOStream(Runtime0) << "Ac: Using previous RAP pattern" << std::endl; Ac = Get< RCP<Matrix> >(coarseLevel, "RAP Pattern"); // Some eigenvalue may have been cached with the matrix in the previous run. // As the matrix values will be updated, we need to reset the eigenvalue. Ac->SetMaxEigenvalueEstimate(-Teuchos::ScalarTraits<SC>::one()); } // If we do not modify matrix later, allow optimization of storage. // This is necessary for new faster Epetra MM kernels. bool doOptimizeStorage = !pL.get<bool>("RepairMainDiagonal"); const bool doTranspose = true; const bool doFillComplete = true; if (pL.get<bool>("transpose: use implicit") == true) { SubFactoryMonitor m2(*this, "MxM: P' x (AP) (implicit)", coarseLevel); Ac = Utils::Multiply(*P, doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), doFillComplete, doOptimizeStorage,std::string("MueLu::R*(AP)-implicit-")+levelstr.str()); } else { RCP<Matrix> R = Get< RCP<Matrix> >(coarseLevel, "R"); SubFactoryMonitor m2(*this, "MxM: R x (AP) (explicit)", coarseLevel); Ac = Utils::Multiply(*R, !doTranspose, *AP, !doTranspose, Ac, GetOStream(Statistics2), doFillComplete, doOptimizeStorage,std::string("MueLu::R*(AP)-explicit-")+levelstr.str()); } CheckRepairMainDiagonal(Ac); if (IsPrint(Statistics1)) { RCP<ParameterList> params = rcp(new ParameterList());; params->set("printLoadBalancingInfo", true); params->set("printCommInfo", true); GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*Ac, "Ac", params); } Set(coarseLevel, "A", Ac); if (pL.get<bool>("Keep RAP Pattern")) Set(coarseLevel, "RAP Pattern", Ac); } if (transferFacts_.begin() != transferFacts_.end()) { SubFactoryMonitor m(*this, "Projections", coarseLevel); // call Build of all user-given transfer factories for (std::vector<RCP<const FactoryBase> >::const_iterator it = transferFacts_.begin(); it != transferFacts_.end(); ++it) { RCP<const FactoryBase> fac = *it; GetOStream(Runtime0) << "RAPFactory: call transfer factory: " << fac->description() << std::endl; fac->CallBuild(coarseLevel); // Coordinates transfer is marginally different from all other operations // because it is *optional*, and not required. For instance, we may need // coordinates only on level 4 if we start repartitioning from that level, // but we don't need them on level 1,2,3. As our current Hierarchy setup // assumes propagation of dependencies only through three levels, this // means that we need to rely on other methods to propagate optional data. // // The method currently used is through RAP transfer factories, which are // simply factories which are called at the end of RAP with a single goal: // transfer some fine data to coarser level. Because these factories are // kind of outside of the mainline factories, they behave different. In // particular, we call their Build method explicitly, rather than through // Get calls. This difference is significant, as the Get call is smart // enough to know when to release all factory dependencies, and Build is // dumb. This led to the following CoordinatesTransferFactory sequence: // 1. Request level 0 // 2. Request level 1 // 3. Request level 0 // 4. Release level 0 // 5. Release level 1 // // The problem is missing "6. Release level 0". Because it was missing, // we had outstanding request on "Coordinates", "Aggregates" and // "CoarseMap" on level 0. // // This was fixed by explicitly calling Release on transfer factories in // RAPFactory. I am still unsure how exactly it works, but now we have // clear data requests for all levels. coarseLevel.Release(*fac); } } }
bool Input::Test( ) { bool ok = true; cout << "Testing Input" << endl; Input & input = Input::Instance(); try { #if defined(USE_CWIID) cout << "To use a Wiimote, hold buttons 1 & 2 down." << endl; #endif cout << "Init()" << endl; input.Init( ); cout << "Number of devices: " << input.NumDevices() << endl; cout << "Index\tType\t\tButtons\tPnters\tAxes\tAccels\tName" << endl; for ( int i = 0; i < input.NumDevices(); ++i ) { shared_ptr< InputDevice const > pDev = input.Device( i ); Assert( pDev != 0 ); cout << i << "\t" << DeviceTypeName( pDev->Type() ) << " " << "\t" << pDev->NumButtons() << "\t" << pDev->NumPointers() << "\t" << pDev->NumAxes() << "\t" << pDev->NumAccelerometers() << "\t" << pDev->Name() << endl; } cout << endl; cout << "Shutdown()" << endl; input.Shutdown( ); cout << "Init()" << endl; input.Init( ); for ( int i = 0; i < input.NumDevices(); ++i ) { shared_ptr< InputDevice const > pDev = input.Device( i ); Assert( pDev != 0 ); } cout << "Init() again" << endl; input.Init( ); cout << endl; cout << "Number of devices: " << input.NumDevices() << endl; cout << "Index\tType\t\tButtons\tPnters\tAxes\tAccels\tName" << endl; for ( int i = 0; i < input.NumDevices(); ++i ) { shared_ptr< InputDevice const > pDev = input.Device( i ); Assert( pDev != 0 ); cout << i << "\t" << DeviceTypeName( pDev->Type() ) << " " << "\t" << pDev->NumButtons() << "\t" << pDev->NumPointers() << "\t" << pDev->NumAxes() << "\t" << pDev->NumAccelerometers() << "\t" << pDev->Name() << endl; } cout << endl; cout << "Ready to report events." << endl; Timer realTime; while ( true ) { input.Update( ); shared_ptr< InputEvent const > pEvent = input.CheckEvent( ); if ( pEvent ) { shared_ptr< InputDevice const > pDev = pEvent->Device(); int button = pEvent->Button(); cout << "Button=" << hex << setw(2) << setfill('0') << button << dec << setw(0) << setfill(' '); if ( (pDev->Type() == InputDevice::Keyboard) && (button <= 0xFF) && IsPrint( (char)button ) ) cout << " (" << (char)button << ")"; cout << " Device: Type=" << DeviceTypeName( pDev->Type() ) << " Name=\"" << pDev->Name() << "\"" << endl; cout << " Buttons down: "; for ( int i = 0; i < pDev->NumButtons(); ++i ) if ( pDev->ButtonDown( i ) ) cout << hex << setw(2) << setfill('0') << i << dec << setw(0) << setfill(' ') << " "; cout << endl; if ( pDev->NumPointers() > 0 ) { cout << " Pointers: "; for ( int i = 0; i < pDev->NumPointers(); ++i ) cout << i << ": " << pDev->Pointer( i ) << " "; cout << endl; } if ( pDev->NumAxes() > 0 ) { cout << " Axes: "; for ( int i = 0; i < pDev->NumAxes(); ++i ) cout << i << ": " << pDev->Axis( i ) << " "; cout << endl; } if ( pDev->NumAccelerometers() > 0 ) { cout << " Accelerometers: "; for ( int i = 0; i < pDev->NumAccelerometers(); ++i ) cout << i << ": " << "Accel=" << pDev->Acceleration( i ) << " Gravity=" << pDev->Gravity( i ) << " "; cout << endl; } } if ( realTime.Seconds( ) > 60. ) { cout << "Time's up" << endl; break; } } realTime.Reset(); cout << "SetTextInput( true )" << endl; input.SetTextInput( true ); while ( true ) { input.Update( ); shared_ptr< InputEvent const > pEvent = input.CheckEvent( ); if ( pEvent ) { if ( pEvent->Device()->Type() == InputDevice::Keyboard ) { int button = pEvent->Button(); if ( (button < MaximumCodePoint) && IsPrint( (wchar_t)button ) ) wcout << (wchar_t)button << flush; else if ( button > 0 ) cout << "[" << hex << setw(2) << setfill('0') << button << dec << setw(0) << setfill(' ') << "]" << flush; } } if ( realTime.Seconds( ) > 60. ) { cout << endl << "Time's up" << endl; break; } } wcout << endl; } catch ( Exception & except ) { cout << except.Description( ) << endl; ok = false; } if ( ok ) cout << "Input PASSED." << endl << endl; else cout << "Input FAILED." << endl << endl; return ok; }
void ByteSink::Vprintf(const char * format, va_list & ap) { MFM_API_ASSERT_NONNULL(format); u8 p; Format::Type type; bool alt; s32 fieldWidth; u8 padChar; while ((p = *format++)) { if (p != '%') { if (p == '\n') // '\n's _in_the_format_string_ are Println(); // treated as packet delimiters! else Print(p,Format::BYTE); continue; } alt = false; fieldWidth = -1; padChar = ' '; again: switch (p = *format++) { case '#': alt = true; goto again; case '0': if (fieldWidth < 0) { padChar = '0'; fieldWidth = 0; } else fieldWidth *= 10; goto again; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': if (fieldWidth < 0) fieldWidth = 0; fieldWidth = fieldWidth * 10 + (p - '0'); goto again; case 'c': { u32 ch = va_arg(ap,int); if (!alt || IsPrint(ch)) Print(ch,Format::BYTE); else { Print('[',Format::BYTE); if (ch < 0x10) Print('0', Format::BYTE); Print(ch, Format::HEX); Print(']', Format::BYTE); } } break; case '<': // Print the rest of a given &ByteSource { ByteSource * bs = va_arg(ap,ByteSource*); if (!bs) Print("(null)"); else Copy(*bs); } break; case '@': { s32 argument = 0; if (alt) argument = va_arg(ap,s32); ByteSerializable * bs = va_arg(ap,ByteSerializable*); if (!bs) Print("(null)"); else Print(*bs, argument); } break; case 'q': Print(va_arg(ap,u64), Format::BEU64); break; case 'H': type = Format::LEXHD; goto print; case 'h': type = Format::BEU16; goto print; case 'l': type = Format::BEU32; goto print; case 'b': type = Format::BIN; goto printbase; case 'o': type = Format::OCT; goto printbase; case 'u': type = Format::DEC; goto printbase; case 'd': if (alt) { type = Format::DEC; goto printbase; } else { Print(va_arg(ap,s32), fieldWidth, padChar); break; } case 'x': type = Format::HEX; goto printbase; case 't': type = Format::B36; goto printbase; case 'D': type = Format::LEX32; goto print; case 'X': type = Format::LXX32; goto print; printbase: PrintInBase(va_arg(ap,u32), type, fieldWidth, padChar); break; print: Print(va_arg(ap,u32),type, fieldWidth, padChar); break; case 'f': { FAIL(INCOMPLETE_CODE); /* double v = va_arg(ap,double); ByteSink::Print(face,v); break; */ } /* %Z: Print a null-terminated string INCLUDING a trailing NULL */ case 'Z': { const char * s = va_arg(ap,const char *); if (!s) Print("(null)", fieldWidth, padChar); else Print(s, fieldWidth, padChar); } /* FALL THROUGH */ /* %z: Print a null byte. Consumes no args */ case 'z': WriteByte('\0'); break; case 's': { const char * s = va_arg(ap,const char *); if (!s) Print("(null)", fieldWidth, padChar); else Print(s, fieldWidth, padChar); break; } case 'S': { const char * s = va_arg(ap,const char *); if (!s) Print("(null)", fieldWidth, padChar); else PrintDoubleQuotedCStringWithLength(s); break; } case 'p': { const void * p = va_arg(ap,void *); if (!p) Print("(nullp)"); else { Print("0x"); Print((uptr) p, Format::HEX); } break; } case '%': Print(p, Format::BYTE); break; default: // Either I don't know that code, or you're bogus. FAIL(BAD_FORMAT_ARG); // Either way, I die. You're welcome. } } }
void RepartitionFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level& currentLevel) const { FactoryMonitor m(*this, "Build", currentLevel); const Teuchos::ParameterList & pL = GetParameterList(); // Access parameters here to make sure that we set the parameter entry flag to "used" even in case of short-circuit evaluation. // TODO (JG): I don't really know if we want to do this. const int startLevel = pL.get<int> ("repartition: start level"); const LO minRowsPerProcessor = pL.get<LO> ("repartition: min rows per proc"); const double nonzeroImbalance = pL.get<double>("repartition: max imbalance"); const bool remapPartitions = pL.get<bool> ("repartition: remap parts"); // TODO: We only need a CrsGraph. This class does not have to be templated on Scalar types. RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A"); // ====================================================================================================== // Determine whether partitioning is needed // ====================================================================================================== // NOTE: most tests include some global communication, which is why we currently only do tests until we make // a decision on whether to repartition. However, there is value in knowing how "close" we are to having to // rebalance an operator. So, it would probably be beneficial to do and report *all* tests. // Test1: skip repartitioning if current level is less than the specified minimum level for repartitioning if (currentLevel.GetLevelID() < startLevel) { GetOStream(Statistics0) << "Repartitioning? NO:" << "\n current level = " << Teuchos::toString(currentLevel.GetLevelID()) << ", first level where repartitioning can happen is " + Teuchos::toString(startLevel) << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } RCP<const Map> rowMap = A->getRowMap(); // NOTE: Teuchos::MPIComm::duplicate() calls MPI_Bcast inside, so this is // a synchronization point. However, as we do MueLu_sumAll afterwards anyway, it // does not matter. RCP<const Teuchos::Comm<int> > origComm = rowMap->getComm(); RCP<const Teuchos::Comm<int> > comm = origComm->duplicate(); // Test 2: check whether A is actually distributed, i.e. more than one processor owns part of A // TODO: this global communication can be avoided if we store the information with the matrix (it is known when matrix is created) // TODO: further improvements could be achieved when we use subcommunicator for the active set. Then we only need to check its size { int numActiveProcesses = 0; MueLu_sumAll(comm, Teuchos::as<int>((A->getNodeNumRows() > 0) ? 1 : 0), numActiveProcesses); if (numActiveProcesses == 1) { GetOStream(Statistics0) << "Repartitioning? NO:" << "\n # processes with rows = " << Teuchos::toString(numActiveProcesses) << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } } bool test3 = false, test4 = false; std::string msg3, msg4; // Test3: check whether number of rows on any processor satisfies the minimum number of rows requirement // NOTE: Test2 ensures that repartitionning is not done when there is only one processor (it may or may not satisfy Test3) if (minRowsPerProcessor > 0) { LO numMyRows = Teuchos::as<LO>(A->getNodeNumRows()), minNumRows, LOMAX = Teuchos::OrdinalTraits<LO>::max(); LO haveFewRows = (numMyRows < minRowsPerProcessor ? 1 : 0), numWithFewRows = 0; MueLu_sumAll(comm, haveFewRows, numWithFewRows); MueLu_minAll(comm, (numMyRows > 0 ? numMyRows : LOMAX), minNumRows); // TODO: we could change it to repartition only if the number of processors with numRows < minNumRows is larger than some // percentage of the total number. This way, we won't repartition if 2 out of 1000 processors don't have enough elements. // I'm thinking maybe 20% threshold. To implement, simply add " && numWithFewRows < .2*numProcs" to the if statement. if (numWithFewRows > 0) test3 = true; msg3 = "\n min # rows per proc = " + Teuchos::toString(minNumRows) + ", min allowable = " + Teuchos::toString(minRowsPerProcessor); } // Test4: check whether the balance in the number of nonzeros per processor is greater than threshold if (!test3) { GO minNnz, maxNnz, numMyNnz = Teuchos::as<GO>(A->getNodeNumEntries()); MueLu_maxAll(comm, numMyNnz, maxNnz); MueLu_minAll(comm, (numMyNnz > 0 ? numMyNnz : maxNnz), minNnz); // min nnz over all active processors double imbalance = Teuchos::as<double>(maxNnz)/minNnz; if (imbalance > nonzeroImbalance) test4 = true; msg4 = "\n nonzero imbalance = " + Teuchos::toString(imbalance) + ", max allowable = " + Teuchos::toString(nonzeroImbalance); } if (!test3 && !test4) { GetOStream(Statistics0) << "Repartitioning? NO:" << msg3 + msg4 << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } GetOStream(Statistics0) << "Repartitioning? YES:" << msg3 + msg4 << std::endl; GO indexBase = rowMap->getIndexBase(); Xpetra::UnderlyingLib lib = rowMap->lib(); int myRank = comm->getRank(); int numProcs = comm->getSize(); RCP<const Teuchos::MpiComm<int> > tmpic = rcp_dynamic_cast<const Teuchos::MpiComm<int> >(comm); TEUCHOS_TEST_FOR_EXCEPTION(tmpic == Teuchos::null, Exceptions::RuntimeError, "Cannot cast base Teuchos::Comm to Teuchos::MpiComm object."); RCP<const Teuchos::OpaqueWrapper<MPI_Comm> > rawMpiComm = tmpic->getRawMpiComm(); // ====================================================================================================== // Calculate number of partitions // ====================================================================================================== // FIXME Quick way to figure out how many partitions there should be (same algorithm as ML) // FIXME Should take into account nnz? Perhaps only when user is using min #nnz per row threshold. GO numPartitions; if (currentLevel.IsAvailable("number of partitions")) { numPartitions = currentLevel.Get<GO>("number of partitions"); GetOStream(Warnings0) << "Using user-provided \"number of partitions\", the performance is unknown" << std::endl; } else { if (Teuchos::as<GO>(A->getGlobalNumRows()) < minRowsPerProcessor) { // System is too small, migrate it to a single processor numPartitions = 1; } else { // Make sure that each processor has approximately minRowsPerProcessor numPartitions = A->getGlobalNumRows() / minRowsPerProcessor; } numPartitions = std::min(numPartitions, Teuchos::as<GO>(numProcs)); currentLevel.Set("number of partitions", numPartitions, NoFactory::get()); } GetOStream(Statistics0) << "Number of partitions to use = " << numPartitions << std::endl; // ====================================================================================================== // Construct decomposition vector // ====================================================================================================== RCP<GOVector> decomposition; if (numPartitions == 1) { // Trivial case: decomposition is the trivial one, all zeros. We skip the call to Zoltan_Interface // (this is mostly done to avoid extra output messages, as even if we didn't skip there is a shortcut // in Zoltan[12]Interface). // TODO: We can probably skip more work in this case (like building all extra data structures) GetOStream(Warnings0) << "Only one partition: Skip call to the repartitioner." << std::endl; decomposition = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(A->getRowMap(), true); } else { decomposition = Get<RCP<GOVector> >(currentLevel, "Partition"); if (decomposition.is_null()) { GetOStream(Warnings0) << "No repartitioning necessary: partitions were left unchanged by the repartitioner" << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } } // ====================================================================================================== // Remap if necessary // ====================================================================================================== // From a user perspective, we want user to not care about remapping, thinking of it as only a performance feature. // There are two problems, however. // (1) Next level aggregation depends on the order of GIDs in the vector, if one uses "natural" or "random" orderings. // This also means that remapping affects next level aggregation, despite the fact that the _set_ of GIDs for // each partition is the same. // (2) Even with the fixed order of GIDs, the remapping may influence the aggregation for the next-next level. // Let us consider the following example. Lets assume that when we don't do remapping, processor 0 would have // GIDs {0,1,2}, and processor 1 GIDs {3,4,5}, and if we do remapping processor 0 would contain {3,4,5} and // processor 1 {0,1,2}. Now, when we run repartitioning algorithm on the next level (say Zoltan1 RCB), it may // be dependent on whether whether it is [{0,1,2}, {3,4,5}] or [{3,4,5}, {0,1,2}]. Specifically, the tie-breaking // algorithm can resolve these differently. For instance, running // mpirun -np 5 ./MueLu_ScalingTestParamList.exe --xml=easy_sa.xml --nx=12 --ny=12 --nz=12 // with // <ParameterList name="MueLu"> // <Parameter name="coarse: max size" type="int" value="1"/> // <Parameter name="repartition: enable" type="bool" value="true"/> // <Parameter name="repartition: min rows per proc" type="int" value="2"/> // <ParameterList name="level 1"> // <Parameter name="repartition: remap parts" type="bool" value="false/true"/> // </ParameterList> // </ParameterList> // produces different repartitioning for level 2. // This different repartitioning may then escalate into different aggregation for the next level. // // We fix (1) by fixing the order of GIDs in a vector by sorting the resulting vector. // Fixing (2) is more complicated. // FIXME: Fixing (2) in Zoltan may not be enough, as we may use some arbitration in MueLu, // for instance with CoupledAggregation. What we really need to do is to use the same order of processors containing // the same order of GIDs. To achieve that, the newly created subcommunicator must be conforming with the order. For // instance, if we have [{0,1,2}, {3,4,5}], we create a subcommunicator where processor 0 gets rank 0, and processor 1 // gets rank 1. If, on the other hand, we have [{3,4,5}, {0,1,2}], we assign rank 1 to processor 0, and rank 0 to processor 1. // This rank permutation requires help from Epetra/Tpetra, both of which have no such API in place. // One should also be concerned that if we had such API in place, rank 0 in subcommunicator may no longer be rank 0 in // MPI_COMM_WORLD, which may lead to issues for logging. if (remapPartitions) { SubFactoryMonitor m1(*this, "DeterminePartitionPlacement", currentLevel); DeterminePartitionPlacement(*A, *decomposition, numPartitions); } // ====================================================================================================== // Construct importer // ====================================================================================================== // At this point, the following is true: // * Each processors owns 0 or 1 partitions // * If a processor owns a partition, that partition number is equal to the processor rank // * The decomposition vector contains the partitions ids that the corresponding GID belongs to ArrayRCP<const GO> decompEntries; if (decomposition->getLocalLength() > 0) decompEntries = decomposition->getData(0); #ifdef HAVE_MUELU_DEBUG // Test range of partition ids int incorrectRank = -1; for (int i = 0; i < decompEntries.size(); i++) if (decompEntries[i] >= numProcs || decompEntries[i] < 0) { incorrectRank = myRank; break; } int incorrectGlobalRank = -1; MueLu_maxAll(comm, incorrectRank, incorrectGlobalRank); TEUCHOS_TEST_FOR_EXCEPTION(incorrectGlobalRank >- 1, Exceptions::RuntimeError, "pid " + Teuchos::toString(incorrectGlobalRank) + " encountered a partition number is that out-of-range"); #endif Array<GO> myGIDs; myGIDs.reserve(decomposition->getLocalLength()); // Step 0: Construct mapping // part number -> GIDs I own which belong to this part // NOTE: my own part GIDs are not part of the map typedef std::map<GO, Array<GO> > map_type; map_type sendMap; for (LO i = 0; i < decompEntries.size(); i++) { GO id = decompEntries[i]; GO GID = rowMap->getGlobalElement(i); if (id == myRank) myGIDs .push_back(GID); else sendMap[id].push_back(GID); } decompEntries = Teuchos::null; if (IsPrint(Statistics2)) { GO numLocalKept = myGIDs.size(), numGlobalKept, numGlobalRows = A->getGlobalNumRows(); MueLu_sumAll(comm,numLocalKept, numGlobalKept); GetOStream(Statistics2) << "Unmoved rows: " << numGlobalKept << " / " << numGlobalRows << " (" << 100*Teuchos::as<double>(numGlobalKept)/numGlobalRows << "%)" << std::endl; } int numSend = sendMap.size(), numRecv; // Arrayify map keys Array<GO> myParts(numSend), myPart(1); int cnt = 0; myPart[0] = myRank; for (typename map_type::const_iterator it = sendMap.begin(); it != sendMap.end(); it++) myParts[cnt++] = it->first; // Step 1: Find out how many processors send me data // partsIndexBase starts from zero, as the processors ids start from zero GO partsIndexBase = 0; RCP<Map> partsIHave = MapFactory ::Build(lib, Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), myParts(), partsIndexBase, comm); RCP<Map> partsIOwn = MapFactory ::Build(lib, numProcs, myPart(), partsIndexBase, comm); RCP<Export> partsExport = ExportFactory::Build(partsIHave, partsIOwn); RCP<GOVector> partsISend = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIHave); RCP<GOVector> numPartsIRecv = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(partsIOwn); if (numSend) { ArrayRCP<GO> partsISendData = partsISend->getDataNonConst(0); for (int i = 0; i < numSend; i++) partsISendData[i] = 1; } (numPartsIRecv->getDataNonConst(0))[0] = 0; numPartsIRecv->doExport(*partsISend, *partsExport, Xpetra::ADD); numRecv = (numPartsIRecv->getData(0))[0]; // Step 2: Get my GIDs from everybody else MPI_Datatype MpiType = MpiTypeTraits<GO>::getType(); int msgTag = 12345; // TODO: use Comm::dup for all internal messaging // Post sends Array<MPI_Request> sendReqs(numSend); cnt = 0; for (typename map_type::iterator it = sendMap.begin(); it != sendMap.end(); it++) MPI_Isend(static_cast<void*>(it->second.getRawPtr()), it->second.size(), MpiType, Teuchos::as<GO>(it->first), msgTag, *rawMpiComm, &sendReqs[cnt++]); map_type recvMap; size_t totalGIDs = myGIDs.size(); for (int i = 0; i < numRecv; i++) { MPI_Status status; MPI_Probe(MPI_ANY_SOURCE, msgTag, *rawMpiComm, &status); // Get rank and number of elements from status int fromRank = status.MPI_SOURCE, count; MPI_Get_count(&status, MpiType, &count); recvMap[fromRank].resize(count); MPI_Recv(static_cast<void*>(recvMap[fromRank].getRawPtr()), count, MpiType, fromRank, msgTag, *rawMpiComm, &status); totalGIDs += count; } // Do waits on send requests if (numSend) { Array<MPI_Status> sendStatuses(numSend); MPI_Waitall(numSend, sendReqs.getRawPtr(), sendStatuses.getRawPtr()); } // Merge GIDs myGIDs.reserve(totalGIDs); for (typename map_type::const_iterator it = recvMap.begin(); it != recvMap.end(); it++) { int offset = myGIDs.size(), len = it->second.size(); if (len) { myGIDs.resize(offset + len); memcpy(myGIDs.getRawPtr() + offset, it->second.getRawPtr(), len*sizeof(GO)); } } // NOTE 2: The general sorting algorithm could be sped up by using the knowledge that original myGIDs and all received chunks // (i.e. it->second) are sorted. Therefore, a merge sort would work well in this situation. std::sort(myGIDs.begin(), myGIDs.end()); // Step 3: Construct importer RCP<Map> newRowMap = MapFactory ::Build(lib, rowMap->getGlobalNumElements(), myGIDs(), indexBase, origComm); RCP<const Import> rowMapImporter; { SubFactoryMonitor m1(*this, "Import construction", currentLevel); rowMapImporter = ImportFactory::Build(rowMap, newRowMap); } Set(currentLevel, "Importer", rowMapImporter); // ====================================================================================================== // Print some data // ====================================================================================================== if (pL.get<bool>("repartition: print partition distribution") && IsPrint(Statistics2)) { // Print the grid of processors GetOStream(Statistics2) << "Partition distribution over cores (ownership is indicated by '+')" << std::endl; char amActive = (myGIDs.size() ? 1 : 0); std::vector<char> areActive(numProcs, 0); MPI_Gather(&amActive, 1, MPI_CHAR, &areActive[0], 1, MPI_CHAR, 0, *rawMpiComm); int rowWidth = std::min(Teuchos::as<int>(ceil(sqrt(numProcs))), 100); for (int proc = 0; proc < numProcs; proc += rowWidth) { for (int j = 0; j < rowWidth; j++) if (proc + j < numProcs) GetOStream(Statistics2) << (areActive[proc + j] ? "+" : "."); else GetOStream(Statistics2) << " "; GetOStream(Statistics2) << " " << proc << ":" << std::min(proc + rowWidth, numProcs) - 1 << std::endl; } } } // Build
void UncoupledAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Build", currentLevel); ParameterList pL = GetParameterList(); bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed // define aggregation algorithms RCP<const FactoryBase> graphFact = GetFactory("Graph"); // TODO Can we keep different aggregation algorithms over more Build calls? algos_.clear(); if (pL.get<std::string>("aggregation: mode") == "old") { if (pL.get<bool>("UseOnePtAggregationAlgorithm") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); if (pL.get<bool>("UsePreserveDirichletAggregationAlgorithm") == true) algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm (graphFact))); if (pL.get<bool>("UseUncoupledAggregationAlgorithm") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); if (pL.get<bool>("UseMaxLinkAggregationAlgorithm") == true) algos_.push_back(rcp(new MaxLinkAggregationAlgorithm (graphFact))); if (pL.get<bool>("UseEmergencyAggregationAlgorithm") == true) algos_.push_back(rcp(new EmergencyAggregationAlgorithm (graphFact))); algos_.push_back(rcp(new IsolatedNodeAggregationAlgorithm (graphFact))); } else { if (pL.get<bool>("aggregation: preserve Dirichlet points") == true) algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm (graphFact))); if (pL.get<bool>("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); if (pL.get<bool>("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm (graphFact))); if (pL.get<bool>("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm (graphFact))); if (pL.get<bool>("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm (graphFact))); algos_.push_back(rcp(new IsolatedNodeAggregationAlgorithm (graphFact))); } std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name"); RCP<const Map> OnePtMap; if (mapOnePtName.length()) { RCP<const FactoryBase> mapOnePtFact = GetFactory("OnePt aggregate map factory"); OnePtMap = currentLevel.Get<RCP<const Map> >(mapOnePtName, mapOnePtFact.get()); } RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph"); // Build RCP<Aggregates> aggregates = rcp(new Aggregates(*graph)); aggregates->setObjectLabel("UC"); const LO numRows = graph->GetNodeNumVertices(); // construct aggStat information std::vector<unsigned> aggStat(numRows, READY); ArrayRCP<const bool> dirichletBoundaryMap = graph->GetBoundaryNodeMap(); if (dirichletBoundaryMap != Teuchos::null) for (LO i = 0; i < numRows; i++) if (dirichletBoundaryMap[i] == true) aggStat[i] = BOUNDARY; LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode"); GO indexBase = graph->GetDomainMap()->getIndexBase(); if (OnePtMap != Teuchos::null) { for (LO i = 0; i < numRows; i++) { // reconstruct global row id (FIXME only works for contiguous maps) GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; for (LO kr = 0; kr < nDofsPerNode; kr++) if (OnePtMap->isNodeGlobalElement(grid + kr)) aggStat[i] = ONEPT; } } const RCP<const Teuchos::Comm<int> > comm = graph->GetComm(); GO numGlobalRows = 0; if (IsPrint(Statistics1)) sumAll(comm, as<GO>(numRows), numGlobalRows); LO numNonAggregatedNodes = numRows; GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; for (size_t a = 0; a < algos_.size(); a++) { std::string phase = algos_[a]->description(); SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel); algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); if (IsPrint(Statistics1)) { GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0; GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; sumAll(comm, numLocalAggregated, numGlobalAggregated); sumAll(comm, numLocalAggs, numGlobalAggs); double aggPercent = 100*as<double>(numGlobalAggregated)/as<double>(numGlobalRows); if (aggPercent > 99.99 && aggPercent < 100.00) { // Due to round off (for instance, for 140465733/140466897), we could // get 100.00% display even if there are some remaining nodes. This // is bad from the users point of view. It is much better to change // it to display 99.99%. aggPercent = 99.99; } GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; numGlobalAggregatedPrev = numGlobalAggregated; numGlobalAggsPrev = numGlobalAggs; } } TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); aggregates->AggregatesCrossProcessors(false); Set(currentLevel, "Aggregates", aggregates); GetOStream(Statistics0) << aggregates->description() << std::endl; }
void mutt_FormatString (char *dest, /* output buffer */ size_t destlen, /* output buffer len */ const char *src, /* template string */ format_t *callback, /* callback for processing */ unsigned long data, /* callback data */ format_flag flags) /* callback flags */ { char prefix[SHORT_STRING], buf[LONG_STRING], *cp, *wptr = dest, ch; char ifstring[SHORT_STRING], elsestring[SHORT_STRING]; size_t wlen, count, len; destlen--; /* save room for the terminal \0 */ wlen = (flags & M_FORMAT_ARROWCURSOR && option (OPTARROWCURSOR)) ? 3 : 0; while (*src && wlen < destlen) { if (*src == '%') { if (*++src == '%') { *wptr++ = '%'; wlen++; src++; continue; } if (*src == '?') { flags |= M_FORMAT_OPTIONAL; src++; } else { flags &= ~M_FORMAT_OPTIONAL; /* eat the format string */ cp = prefix; count = 0; while (count < sizeof (prefix) && (isdigit ((unsigned char) *src) || *src == '.' || *src == '-')) { *cp++ = *src++; count++; } *cp = 0; } if (!*src) break; /* bad format */ ch = *src++; /* save the character to switch on */ if (flags & M_FORMAT_OPTIONAL) { if (*src != '?') break; /* bad format */ src++; /* eat the `if' part of the string */ cp = ifstring; count = 0; while (count < sizeof (ifstring) && *src && *src != '?' && *src != '&') { *cp++ = *src++; count++; } *cp = 0; /* eat the `else' part of the string (optional) */ if (*src == '&') src++; /* skip the & */ cp = elsestring; count = 0; while (count < sizeof (elsestring) && *src && *src != '?') { *cp++ = *src++; count++; } *cp = 0; if (!*src) break; /* bad format */ src++; /* move past the trailing `?' */ } /* handle generic cases first */ if (ch == '>') { /* right justify to EOL */ ch = *src++; /* pad char */ /* calculate space left on line. if we've already written more data than will fit on the line, ignore the rest of the line */ count = (COLS < destlen ? COLS : destlen); if (count > wlen) { count -= wlen; /* how many chars left on this line */ mutt_FormatString (buf, sizeof (buf), src, callback, data, flags); len = mutt_strlen (buf); if (count > len) { count -= len; /* how many chars to pad */ memset (wptr, ch, count); wptr += count; wlen += count; } if (len + wlen > destlen) len = destlen - wlen; memcpy (wptr, buf, len); wptr += len; wlen += len; } break; /* skip rest of input */ } else if (ch == '|') { /* pad to EOL */ ch = *src++; if (destlen > COLS) destlen = COLS; if (destlen > wlen) { count = destlen - wlen; memset (wptr, ch, count); wptr += count; } break; /* skip rest of input */ } else { short tolower = 0; if (ch == '_') { ch = *src++; tolower = 1; } /* use callback function to handle this case */ src = callback (buf, sizeof (buf), ch, src, prefix, ifstring, elsestring, data, flags); if (tolower) mutt_strlower (buf); if ((len = mutt_strlen (buf)) + wlen > destlen) len = (destlen - wlen > 0) ? (destlen - wlen) : 0; memcpy (wptr, buf, len); wptr += len; wlen += len; } } else if (*src == '\\') { if (!*++src) break; switch (*src) { case 'n': *wptr = '\n'; break; case 't': *wptr = '\t'; break; case 'r': *wptr = '\r'; break; case 'f': *wptr = '\f'; break; case 'v': *wptr = '\v'; break; default: *wptr = *src; break; } src++; wptr++; wlen++; } else { *wptr++ = *src++; wlen++; } } *wptr = 0; if (flags & M_FORMAT_MAKEPRINT) { /* Make sure that the string is printable by changing all non-printable chars to dots, or spaces for non-printable whitespace */ for (cp = dest ; *cp ; cp++) if (!IsPrint (*cp) && !((flags & M_FORMAT_TREE) && (*cp <= M_TREE_MAX))) *cp = isspace ((unsigned char) *cp) ? ' ' : '.'; } }
void Q2Q1PFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildP(Level& fineLevel, Level& coarseLevel) const { FactoryMonitor m(*this, "Build", coarseLevel); RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel, "A"); RCP<const Map> rowMap = A->getRowMap(); Xpetra::global_size_t N = rowMap->getGlobalNumElements(); int V; size_t n = as<size_t>(sqrt(N)); if (N == n*n) { // pressure mode V = 1; GetOStream(Runtime1) << "Pressure mode" << std::endl; } else { n = as<size_t>(sqrt(N/2)); if (N == 2*n*n) { // velocity mode V = 2; GetOStream(Runtime1) << "Velocity mode" << std::endl; } else { throw Exceptions::RuntimeError("Matrix size (" + toString(N) + ") is incompatible with both velocity and pressure"); } } const int C = 4; Xpetra::global_size_t nc = (n-1)/C + 1; TEUCHOS_TEST_FOR_EXCEPTION(C*(nc-1)+1 != n, Exceptions::InvalidArgument, "Incorrect dim size: " << n); ArrayView<const GO> elementList = rowMap->getNodeElementList(); GO indexBase = rowMap->getIndexBase(); // Calculate offsets GO offset = (V == 2 ? 0 : 2*(2*n -1)*(2*n -1)); GO coarseOffset = (V == 2 ? 0 : 2*(2*nc-1)*(2*nc-1)); GetOStream(Runtime1) << "offset = " << offset << ", coarseOffset = " << coarseOffset << std::endl; Array<GO> coarseList; for (LO k = 0; k < elementList.size(); k += V) { GO GID = elementList[k] - offset - indexBase; GO i = (GID / V) % n, ii = i/C; GO j = (GID / V) / n, jj = j/C; if (i % C == 0 && j % C == 0) for (int q = 0; q < V; q++) coarseList.push_back(V*(jj*nc + ii) + q + coarseOffset); } typedef Teuchos::ScalarTraits<SC> STS; SC one = STS::one(); Xpetra::global_size_t INVALID = Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(); std::vector<size_t> stridingInfo(1,1); const int stridedBlockId = -1; RCP<Map> coarseMap = StridedMapFactory ::Build(rowMap->lib(), INVALID, coarseList, indexBase, stridingInfo, rowMap->getComm(), stridedBlockId, coarseOffset); RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(coarseMap, 1); coarseNullspace->putScalar(one); int nnzEstimate = 4; RCP<Matrix> P = MatrixFactory::Build(rowMap, coarseMap, nnzEstimate); Array<GO> inds(nnzEstimate), inds1(nnzEstimate); Array<SC> vals(nnzEstimate, one); int sz; for (LO k = 0; k < elementList.size(); k += V) { GO GID = elementList[k] - offset - indexBase; GO i = (GID/V) % n, ii = i/C; GO j = (GID/V) / n, jj = j/C; if (i % C == 0 && j % C == 0) { sz = 1; inds[0] = jj *nc + ii ; } else if (i % C == 0 && j % C != 0) { sz = 2; inds[0] = jj *nc + ii ; inds[1] = (jj+1)*nc + ii ; } else if (i % C != 0 && j % C == 0) { sz = 2; inds[0] = jj *nc + ii ; inds[1] = jj *nc + ii+1; } else { sz = 4; inds[0] = jj *nc + ii ; inds[1] = jj *nc + ii+1; inds[2] = (jj+1)*nc + ii ; inds[3] = (jj+1)*nc + ii+1; } for (int q = 0; q < V; q++) { for (int p = 0; p < sz; p++) inds1[p] = V*inds[p]+q + coarseOffset; P->insertGlobalValues(elementList[k]+q, inds1.view(0,sz), vals.view(0,sz)); } } P->fillComplete(coarseMap, A->getDomainMap()); // Level Set Set(coarseLevel, "Nullspace", coarseNullspace); Set(coarseLevel, "P", P); Set(fineLevel, "CoarseMap", coarseMap); if (IsPrint(Statistics1)) { RCP<ParameterList> params = rcp(new ParameterList()); params->set("printLoadBalancingInfo", true); params->set("printCommInfo", true); GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params); } }
void RebalanceTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& fineLevel, Level& coarseLevel) const { FactoryMonitor m(*this, "Build", coarseLevel); const ParameterList& pL = GetParameterList(); int implicit = !pL.get<bool>("repartition: rebalance P and R"); int writeStart = pL.get<int> ("write start"); int writeEnd = pL.get<int> ("write end"); if (writeStart == 0 && fineLevel.GetLevelID() == 0 && writeStart <= writeEnd && IsAvailable(fineLevel, "Coordinates")) { std::string fileName = "coordinates_level_0.m"; RCP<MultiVector> fineCoords = fineLevel.Get< RCP<MultiVector> >("Coordinates"); if (fineCoords != Teuchos::null) Utils::Write(fileName, *fineCoords); } RCP<const Import> importer = Get<RCP<const Import> >(coarseLevel, "Importer"); if (implicit) { // Save the importer, we'll need it for solve coarseLevel.Set("Importer", importer, NoFactory::get()); } RCP<ParameterList> params = rcp(new ParameterList());; params->set("printLoadBalancingInfo", true); params->set("printCommInfo", true); std::string transferType = pL.get<std::string>("type"); if (transferType == "Interpolation") { RCP<Matrix> originalP = Get< RCP<Matrix> >(coarseLevel, "P"); { // This line must be after the Get call SubFactoryMonitor m1(*this, "Rebalancing prolongator", coarseLevel); if (implicit || importer.is_null()) { GetOStream(Runtime0) << "Using original prolongator" << std::endl; Set(coarseLevel, "P", originalP); } else { // P is the transfer operator from the coarse grid to the fine grid. // P must transfer the data from the newly reordered coarse A to the // (unchanged) fine A. This means that the domain map (coarse) of P // must be changed according to the new partition. The range map // (fine) is kept unchanged. // // The domain map of P must match the range map of R. See also note // below about domain/range map of R and its implications for P. // // To change the domain map of P, P needs to be fillCompleted again // with the new domain map. To achieve this, P is copied into a new // matrix that is not fill-completed. The doImport() operation is // just used here to make a copy of P: the importer is trivial and // there is no data movement involved. The reordering actually // happens during the fillComplete() with domainMap == importer->getTargetMap(). RCP<Matrix> rebalancedP = originalP; RCP<const CrsMatrixWrap> crsOp = rcp_dynamic_cast<const CrsMatrixWrap>(originalP); TEUCHOS_TEST_FOR_EXCEPTION(crsOp == Teuchos::null, Exceptions::BadCast, "Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed"); RCP<CrsMatrix> rebalancedP2 = crsOp->getCrsMatrix(); TEUCHOS_TEST_FOR_EXCEPTION(rebalancedP2 == Teuchos::null, std::runtime_error, "Xpetra::CrsMatrixWrap doesn't have a CrsMatrix"); { SubFactoryMonitor subM(*this, "Rebalancing prolongator -- fast map replacement", coarseLevel); RCP<const Import> newImporter = ImportFactory::Build(importer->getTargetMap(), rebalancedP->getColMap()); rebalancedP2->replaceDomainMapAndImporter(importer->getTargetMap(), newImporter); } ///////////////////////// EXPERIMENTAL // TODO FIXME somehow we have to transfer the striding information of the permuted domain/range maps. // That is probably something for an external permutation factory // if (originalP->IsView("stridedMaps")) // rebalancedP->CreateView("stridedMaps", originalP); ///////////////////////// EXPERIMENTAL Set(coarseLevel, "P", rebalancedP); if (IsPrint(Statistics1)) GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*rebalancedP, "P (rebalanced)", params); } } if (importer.is_null()) { if (IsAvailable(coarseLevel, "Nullspace")) Set(coarseLevel, "Nullspace", Get<RCP<MultiVector> >(coarseLevel, "Nullspace")); if (pL.isParameter("Coordinates") && pL.get< RCP<const FactoryBase> >("Coordinates") != Teuchos::null) if (IsAvailable(coarseLevel, "Coordinates")) Set(coarseLevel, "Coordinates", Get< RCP<MultiVector> >(coarseLevel, "Coordinates")); return; } if (pL.isParameter("Coordinates") && pL.get< RCP<const FactoryBase> >("Coordinates") != Teuchos::null && IsAvailable(coarseLevel, "Coordinates")) { RCP<MultiVector> coords = Get<RCP<MultiVector> >(coarseLevel, "Coordinates"); // This line must be after the Get call SubFactoryMonitor subM(*this, "Rebalancing coordinates", coarseLevel); LO nodeNumElts = coords->getMap()->getNodeNumElements(); // If a process has no matrix rows, then we can't calculate blocksize using the formula below. LO myBlkSize = 0, blkSize = 0; if (nodeNumElts > 0) myBlkSize = importer->getSourceMap()->getNodeNumElements() / nodeNumElts; maxAll(coords->getMap()->getComm(), myBlkSize, blkSize); RCP<const Import> coordImporter; if (blkSize == 1) { coordImporter = importer; } else { // NOTE: there is an implicit assumption here: we assume that dof any node are enumerated consequently // Proper fix would require using decomposition similar to how we construct importer in the // RepartitionFactory RCP<const Map> origMap = coords->getMap(); GO indexBase = origMap->getIndexBase(); ArrayView<const GO> OEntries = importer->getTargetMap()->getNodeElementList(); LO numEntries = OEntries.size()/blkSize; ArrayRCP<GO> Entries(numEntries); for (LO i = 0; i < numEntries; i++) Entries[i] = (OEntries[i*blkSize]-indexBase)/blkSize + indexBase; RCP<const Map> targetMap = MapFactory::Build(origMap->lib(), origMap->getGlobalNumElements(), Entries(), indexBase, origMap->getComm()); coordImporter = ImportFactory::Build(origMap, targetMap); } RCP<MultiVector> permutedCoords = MultiVectorFactory::Build(coordImporter->getTargetMap(), coords->getNumVectors()); permutedCoords->doImport(*coords, *coordImporter, Xpetra::INSERT); if (pL.get<bool>("useSubcomm") == true) permutedCoords->replaceMap(permutedCoords->getMap()->removeEmptyProcesses()); Set(coarseLevel, "Coordinates", permutedCoords); std::string fileName = "rebalanced_coordinates_level_" + toString(coarseLevel.GetLevelID()) + ".m"; if (writeStart <= coarseLevel.GetLevelID() && coarseLevel.GetLevelID() <= writeEnd && permutedCoords->getMap() != Teuchos::null) Utils::Write(fileName, *permutedCoords); } if (IsAvailable(coarseLevel, "Nullspace")) { RCP<MultiVector> nullspace = Get< RCP<MultiVector> >(coarseLevel, "Nullspace"); // This line must be after the Get call SubFactoryMonitor subM(*this, "Rebalancing nullspace", coarseLevel); RCP<MultiVector> permutedNullspace = MultiVectorFactory::Build(importer->getTargetMap(), nullspace->getNumVectors()); permutedNullspace->doImport(*nullspace, *importer, Xpetra::INSERT); if (pL.get<bool>("useSubcomm") == true) permutedNullspace->replaceMap(permutedNullspace->getMap()->removeEmptyProcesses()); Set(coarseLevel, "Nullspace", permutedNullspace); } } else { if (pL.get<bool>("transpose: use implicit") == false) { RCP<Matrix> originalR = Get< RCP<Matrix> >(coarseLevel, "R"); SubFactoryMonitor m2(*this, "Rebalancing restriction", coarseLevel); if (implicit || importer.is_null()) { GetOStream(Runtime0) << "Using original restrictor" << std::endl; Set(coarseLevel, "R", originalR); } else { RCP<Matrix> rebalancedR; { SubFactoryMonitor subM(*this, "Rebalancing restriction -- fusedImport", coarseLevel); RCP<Map> dummy; // meaning: use originalR's domain map. rebalancedR = MatrixFactory::Build(originalR, *importer, dummy, importer->getTargetMap()); } Set(coarseLevel, "R", rebalancedR); ///////////////////////// EXPERIMENTAL // TODO FIXME somehow we have to transfer the striding information of the permuted domain/range maps. // That is probably something for an external permutation factory // if (originalR->IsView("stridedMaps")) // rebalancedR->CreateView("stridedMaps", originalR); ///////////////////////// EXPERIMENTAL if (IsPrint(Statistics1)) GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*rebalancedR, "R (rebalanced)", params); } } } }