Mark* score4K(Dice *dice[]) { int i, j, count; Mark *mark = (Mark *)malloc(sizeof(Mark)); /* tests each die value for a 4k or better */ for(i = 1; i <= 6; i++) { count = 0; /* counts the number of occurence of the die */ for(j = 0; j < 5; j++) if(i == dice[j]->value) count++; /* if 4k or better breka */ if(count >= 4) { mark->value = sumAll(dice); break; } } mark->vol = (count >= 4) ? 0 : 1; return mark; }
std::string MHDRAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::PrintLoadBalancingInfo(const Matrix & Ac, const std::string & msgTag) { std::stringstream ss(std::stringstream::out); // TODO: provide a option to skip this (to avoid global communication) // TODO: skip if nproc == 1 //nonzero imbalance size_t numMyNnz = Ac.getNodeNumEntries(); GO maxNnz, minNnz; RCP<const Teuchos::Comm<int> > comm = Ac.getRowMap()->getComm(); maxAll(comm,(GO)numMyNnz,maxNnz); //min nnz over all proc (disallow any processors with 0 nnz) minAll(comm, (GO)((numMyNnz > 0) ? numMyNnz : maxNnz), minNnz); double imbalance = ((double) maxNnz) / minNnz; size_t numMyRows = Ac.getNodeNumRows(); //Check whether Ac is spread over more than one process. GO numActiveProcesses=0; sumAll(comm, (GO)((numMyRows > 0) ? 1 : 0), numActiveProcesses); //min, max, and avg # rows per proc GO minNumRows, maxNumRows; double avgNumRows; maxAll(comm, (GO)numMyRows, maxNumRows); minAll(comm, (GO)((numMyRows > 0) ? numMyRows : maxNumRows), minNumRows); assert(numActiveProcesses > 0); avgNumRows = Ac.getGlobalNumRows() / numActiveProcesses; ss << msgTag << " # processes with rows = " << numActiveProcesses << std::endl; ss << msgTag << " min # rows per proc = " << minNumRows << ", max # rows per proc = " << maxNumRows << ", avg # rows per proc = " << avgNumRows << std::endl; ss << msgTag << " nonzero imbalance = " << imbalance << std::endl; return ss.str(); }
void LeftoverAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::RootCandidates(my_size_t nVertices, ArrayView<const LO> & vertex2AggId, GraphBase const &graph, ArrayRCP<LO> &candidates, my_size_t &nCandidates, global_size_t &nCandidatesGlobal) const { nCandidates = 0; for (my_size_t i = 0; i < nVertices; i++ ) { if (vertex2AggId[i] == MUELU_UNAGGREGATED) { bool noAggdNeighbors = true; // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int adjacent = *it; if (vertex2AggId[adjacent] != MUELU_UNAGGREGATED) noAggdNeighbors = false; } if (noAggdNeighbors == true) candidates[nCandidates++] = i; } } sumAll(graph.GetComm(), (GO)nCandidates, nCandidatesGlobal); } //RootCandidates
void RAPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::CheckMainDiagonal(RCP<Matrix> & Ac) const { // plausibility check: no zeros on diagonal RCP<Vector> diagVec = VectorFactory::Build(Ac->getRowMap()); Ac->getLocalDiagCopy(*diagVec); SC zero = Teuchos::ScalarTraits<SC>::zero(), one = Teuchos::ScalarTraits<SC>::one(); LO lZeroDiags = 0; Teuchos::ArrayRCP< Scalar > diagVal = diagVec->getDataNonConst(0); for (size_t r = 0; r < Ac->getRowMap()->getNodeNumElements(); r++) { if (diagVal[r] == zero) { lZeroDiags++; if (repairZeroDiagonals_) { GO grid = Ac->getRowMap()->getGlobalElement(r); LO lcid = Ac->getColMap()->getLocalElement(grid); Teuchos::ArrayRCP<LO> indout(1, lcid); Teuchos::ArrayRCP<SC> valout(1, one); Ac->insertLocalValues(r, indout.view(0, indout.size()), valout.view(0, valout.size())); } } } if (IsPrint(Warnings0)) { const RCP<const Teuchos::Comm<int> > & comm = Ac->getRowMap()->getComm(); GO lZeroDiagsGO = Teuchos::as<GO>(lZeroDiags); /* LO->GO conversion */ GO gZeroDiags = 0; sumAll(comm, lZeroDiagsGO, gZeroDiags); if (repairZeroDiagonals_) GetOStream(Warnings0,0) << "RAPFactory (WARNING): repaired " << gZeroDiags << " zeros on main diagonal of Ac." << std::endl; else GetOStream(Warnings0,0) << "RAPFactory (WARNING): found " << gZeroDiags << " zeros on main diagonal of Ac." << std::endl; } }
// Returns the roll's value for a given scoring choice const int DiceRoll::rollValue(const ChoiceAction choice) const { switch(choice) { case ACES: return sumIfEqual(1); case TWOS: return sumIfEqual(2); case THREES: return sumIfEqual(3); case FOURS: return sumIfEqual(4); case FIVES: return sumIfEqual(5); case SIXES: return sumIfEqual(6); case THREE_OF_A_KIND: if (isThreeOfAKind()) return sumAll(); return 0; case FOUR_OF_A_KIND: if (isFourOfAKind()) return sumAll(); return 0; case FULL_HOUSE: if (isFullHouse()) return 25; return 0; case SMALL_STRAIGHT: if (isSmallStraight()) return 30; return 0; case LARGE_STRAIGHT: if (isLargeStraight()) return 40; return 0; case YAHTZEE: if (isYahtzee()) return 50; return 0; case CHANCE: return sumAll(); case ROLL: assert(false); // Unscorable! } }
Mark* scoreChance(Dice *dice[]) { Mark *temp = score4K(dice), *mark = (Mark *)malloc(sizeof(Mark)); mark->value = sumAll(dice); mark->vol = (temp->value > 0) ? 0 : 1; free(temp); return mark; }
int main(void) { int j[] = {4,10,19,42,15,53,36}, capacity = 7; printf("The smallest value in the vector is: %d\n", minValue(j, capacity)); printf("The product of all the values in the vector is: %d\n", productOfAll(j, capacity)); printf("The maximum value in the vector is: %d\n", maxValue(j, capacity)); printf("The index of the minimum value of the vector is: %d\n", minIndex(j, capacity)); printf("The sum of the vectors is: %d\n", sumAll(j, capacity)); printf("The max value index is: %d\n", maxIndex(j, capacity)); }
GlobalOrdinal Aggregates<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::GetNumGlobalAggregates() const { LO nAggregates = GetNumAggregates(); GO nGlobalAggregates; sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates); return nGlobalAggregates; }
void Newton::solveNonLinear() { casadi_log("Newton::solveNonLinear:begin"); // Set up timers for profiling double time_zero=0; double time_start=0; double time_stop=0; if (CasadiOptions::profiling && !CasadiOptions::profilingBinary) { time_zero = getRealTime(); CasadiOptions::profilingLog << "start " << this << ":" <<getOption("name") << std::endl; } // Pass the inputs to J for (int i=0; i<getNumInputs(); ++i) { if (i!=iin_) jac_.setInput(input(i), i); } // Aliases DMatrix &u = output(iout_); DMatrix &J = jac_.output(0); DMatrix &F = jac_.output(1+iout_); // Perform the Newton iterations int iter=0; bool success = true; while (true) { // Break if maximum number of iterations already reached if (iter >= max_iter_) { log("evaluate", "Max. iterations reached."); stats_["return_status"] = "max_iteration_reached"; success = false; break; } // Start a new iteration iter++; // Print progress if (monitored("step") || monitored("stepsize")) { std::cout << "Step " << iter << "." << std::endl; } if (monitored("step")) { std::cout << " u = " << u << std::endl; } // Use u to evaluate J jac_.setInput(u, iin_); for (int i=0; i<getNumInputs(); ++i) if (i!=iin_) jac_.setInput(input(i), i); if (CasadiOptions::profiling) { time_start = getRealTime(); // Start timer } jac_.evaluate(); // Write out profiling information if (CasadiOptions::profiling && !CasadiOptions::profilingBinary) { time_stop = getRealTime(); // Stop timer CasadiOptions::profilingLog << (time_stop-time_start)*1e6 << " ns | " << (time_stop-time_zero)*1e3 << " ms | " << this << ":" << getOption("name") << ":0|" << jac_.get() << ":" << jac_.getOption("name") << "|evaluate jacobian" << std::endl; } if (monitored("F")) std::cout << " F = " << F << std::endl; if (monitored("normF")) std::cout << " F (min, max, 1-norm, 2-norm) = " << (*std::min_element(F.data().begin(), F.data().end())) << ", " << (*std::max_element(F.data().begin(), F.data().end())) << ", " << sumAll(fabs(F)) << ", " << sqrt(sumAll(F*F)) << std::endl; if (monitored("J")) std::cout << " J = " << J << std::endl; double abstol = 0; if (numeric_limits<double>::infinity() != abstol_) { abstol = std::max((*std::max_element(F.data().begin(), F.data().end())), -(*std::min_element(F.data().begin(), F.data().end()))); if (abstol <= abstol_) { casadi_log("Converged to acceptable tolerance - abstol: " << abstol_); break; } } // Prepare the linear solver with J linsol_.setInput(J, LINSOL_A); if (CasadiOptions::profiling) { time_start = getRealTime(); // Start timer } linsol_.prepare(); // Write out profiling information if (CasadiOptions::profiling && !CasadiOptions::profilingBinary) { time_stop = getRealTime(); // Stop timer CasadiOptions::profilingLog << (time_stop-time_start)*1e6 << " ns | " << (time_stop-time_zero)*1e3 << " ms | " << this << ":" << getOption("name") << ":1||prepare linear system" << std::endl; } if (CasadiOptions::profiling) { time_start = getRealTime(); // Start timer } // Solve against F linsol_.solve(&F.front(), 1, false); if (CasadiOptions::profiling && !CasadiOptions::profilingBinary) { time_stop = getRealTime(); // Stop timer CasadiOptions::profilingLog << (time_stop-time_start)*1e6 << " ns | " << (time_stop-time_zero)*1e3 << " ms | " << this << ":" << getOption("name") << ":2||solve linear system" << std::endl; } if (monitored("step")) { std::cout << " step = " << F << std::endl; } double abstolStep=0; if (numeric_limits<double>::infinity() != abstolStep_) { abstolStep = std::max((*std::max_element(F.data().begin(), F.data().end())), -(*std::min_element(F.data().begin(), F.data().end()))); if (monitored("stepsize")) { std::cout << " stepsize = " << abstolStep << std::endl; } if (abstolStep <= abstolStep_) { casadi_log("Converged to acceptable tolerance - abstolStep: " << abstolStep_); break; } } if (print_iteration_) { // Only print iteration header once in a while if (iter % 10==0) { printIteration(std::cout); } // Print iteration information printIteration(std::cout, iter, abstol, abstolStep); } // Update Xk+1 = Xk - J^(-1) F std::transform(u.begin(), u.end(), F.begin(), u.begin(), std::minus<double>()); // Get auxiliary outputs for (int i=0; i<getNumOutputs(); ++i) { if (i!=iout_) jac_.getOutput(output(i), 1+i); } } // Store the iteration count if (gather_stats_) stats_["iter"] = iter; if (success) stats_["return_status"] = "success"; // Factorization up-to-date fact_up_to_date_ = true; casadi_log("Newton::solveNonLinear():end after " << iter << " steps"); }
void UncoupledAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Build", currentLevel); ParameterList pL = GetParameterList(); bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed // define aggregation algorithms RCP<const FactoryBase> graphFact = GetFactory("Graph"); // TODO Can we keep different aggregation algorithms over more Build calls? algos_.clear(); if (pL.get<std::string>("aggregation: mode") == "old") { if (pL.get<bool>("UseOnePtAggregationAlgorithm") == true) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); if (pL.get<bool>("UsePreserveDirichletAggregationAlgorithm") == true) algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm (graphFact))); if (pL.get<bool>("UseUncoupledAggregationAlgorithm") == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); if (pL.get<bool>("UseMaxLinkAggregationAlgorithm") == true) algos_.push_back(rcp(new MaxLinkAggregationAlgorithm (graphFact))); if (pL.get<bool>("UseEmergencyAggregationAlgorithm") == true) algos_.push_back(rcp(new EmergencyAggregationAlgorithm (graphFact))); algos_.push_back(rcp(new IsolatedNodeAggregationAlgorithm (graphFact))); } else { if (pL.get<bool>("aggregation: preserve Dirichlet points") == true) algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm (graphFact))); if (pL.get<bool>("aggregation: enable phase 1" ) == true) algos_.push_back(rcp(new AggregationPhase1Algorithm (graphFact))); if (pL.get<bool>("aggregation: enable phase 2a") == true) algos_.push_back(rcp(new AggregationPhase2aAlgorithm (graphFact))); if (pL.get<bool>("aggregation: enable phase 2b") == true) algos_.push_back(rcp(new AggregationPhase2bAlgorithm (graphFact))); if (pL.get<bool>("aggregation: enable phase 3" ) == true) algos_.push_back(rcp(new AggregationPhase3Algorithm (graphFact))); algos_.push_back(rcp(new IsolatedNodeAggregationAlgorithm (graphFact))); } std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name"); RCP<const Map> OnePtMap; if (mapOnePtName.length()) { RCP<const FactoryBase> mapOnePtFact = GetFactory("OnePt aggregate map factory"); OnePtMap = currentLevel.Get<RCP<const Map> >(mapOnePtName, mapOnePtFact.get()); } RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph"); // Build RCP<Aggregates> aggregates = rcp(new Aggregates(*graph)); aggregates->setObjectLabel("UC"); const LO numRows = graph->GetNodeNumVertices(); // construct aggStat information std::vector<unsigned> aggStat(numRows, READY); ArrayRCP<const bool> dirichletBoundaryMap = graph->GetBoundaryNodeMap(); if (dirichletBoundaryMap != Teuchos::null) for (LO i = 0; i < numRows; i++) if (dirichletBoundaryMap[i] == true) aggStat[i] = BOUNDARY; LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode"); GO indexBase = graph->GetDomainMap()->getIndexBase(); if (OnePtMap != Teuchos::null) { for (LO i = 0; i < numRows; i++) { // reconstruct global row id (FIXME only works for contiguous maps) GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; for (LO kr = 0; kr < nDofsPerNode; kr++) if (OnePtMap->isNodeGlobalElement(grid + kr)) aggStat[i] = ONEPT; } } const RCP<const Teuchos::Comm<int> > comm = graph->GetComm(); GO numGlobalRows = 0; if (IsPrint(Statistics1)) sumAll(comm, as<GO>(numRows), numGlobalRows); LO numNonAggregatedNodes = numRows; GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; for (size_t a = 0; a < algos_.size(); a++) { std::string phase = algos_[a]->description(); SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel); algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); if (IsPrint(Statistics1)) { GO numLocalAggregated = numRows - numNonAggregatedNodes, numGlobalAggregated = 0; GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; sumAll(comm, numLocalAggregated, numGlobalAggregated); sumAll(comm, numLocalAggs, numGlobalAggs); double aggPercent = 100*as<double>(numGlobalAggregated)/as<double>(numGlobalRows); if (aggPercent > 99.99 && aggPercent < 100.00) { // Due to round off (for instance, for 140465733/140466897), we could // get 100.00% display even if there are some remaining nodes. This // is bad from the users point of view. It is much better to change // it to display 99.99%. aggPercent = 99.99; } GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; numGlobalAggregatedPrev = numGlobalAggregated; numGlobalAggsPrev = numGlobalAggs; } } TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); aggregates->AggregatesCrossProcessors(false); Set(currentLevel, "Aggregates", aggregates); GetOStream(Statistics0) << aggregates->description() << std::endl; }
void LocalAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::CoarsenUncoupled(GraphBase const & graph, Aggregates & aggregates) const { Monitor m(*this, "Coarsen Uncoupled"); std::string orderingType; switch(ordering_) { case NATURAL: orderingType="Natural"; break; case RANDOM: orderingType="Random"; break; case GRAPH: orderingType="Graph"; break; default: break; } GetOStream(Runtime1) << "Ordering: " << orderingType << std::endl; GetOStream(Runtime1) << "Min nodes per aggregate: " << minNodesPerAggregate_ << std::endl; GetOStream(Runtime1) << "Max nbrs already selected: " << maxNeighAlreadySelected_ << std::endl; /* Create Aggregation object */ my_size_t nAggregates = 0; /* ============================================================= */ /* aggStat indicates whether this node has been aggreated, and */ /* vertex2AggId stores the aggregate number where this node has */ /* been aggregated into. */ /* ============================================================= */ Teuchos::ArrayRCP<NodeState> aggStat; const my_size_t nRows = graph.GetNodeNumVertices(); if (nRows > 0) aggStat = Teuchos::arcp<NodeState>(nRows); for ( my_size_t i = 0; i < nRows; ++i ) aggStat[i] = READY; /* ============================================================= */ /* Phase 1 : */ /* for all nodes, form a new aggregate with its neighbors */ /* if the number of its neighbors having been aggregated does */ /* not exceed a given threshold */ /* (GetMaxNeighAlreadySelected() = 0 ===> Vanek's scheme) */ /* ============================================================= */ /* some general variable declarations */ Teuchos::ArrayRCP<LO> randomVector; RCP<MueLu::LinkedList> nodeList; /* list storing the next node to pick as a root point for ordering_ == GRAPH */ MueLu_SuperNode *aggHead=NULL, *aggCurrent=NULL, *supernode=NULL; /**/ if ( ordering_ == RANDOM ) /* random ordering */ { //TODO: could be stored in a class that respect interface of LinkedList randomVector = Teuchos::arcp<LO>(nRows); //size_t or int ?-> to be propagated for (my_size_t i = 0; i < nRows; ++i) randomVector[i] = i; RandomReorder(randomVector); } else if ( ordering_ == GRAPH ) /* graph ordering */ { nodeList = rcp(new MueLu::LinkedList()); nodeList->Add(0); } /* main loop */ { LO iNode = 0; LO iNode2 = 0; Teuchos::ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); // output only: contents ignored while (iNode2 < nRows) { /*------------------------------------------------------ */ /* pick the next node to aggregate */ /*------------------------------------------------------ */ if ( ordering_ == NATURAL ) iNode = iNode2++; else if ( ordering_ == RANDOM ) iNode = randomVector[iNode2++]; else if ( ordering_ == GRAPH ) { if ( nodeList->IsEmpty() ) { for ( int jNode = 0; jNode < nRows; ++jNode ) { if ( aggStat[jNode] == READY ) { nodeList->Add(jNode); //TODO optim: not necessary to create a node. Can just set iNode value and skip the end break; } } } if ( nodeList->IsEmpty() ) break; /* end of the while loop */ //TODO: coding style :( iNode = nodeList->Pop(); } else { throw(Exceptions::RuntimeError("CoarsenUncoupled: bad aggregation ordering option")); } /*------------------------------------------------------ */ /* consider further only if the node is in READY mode */ /*------------------------------------------------------ */ if ( aggStat[iNode] == READY ) { // neighOfINode is the neighbor node list of node 'iNode'. Teuchos::ArrayView<const LO> neighOfINode = graph.getNeighborVertices(iNode); typename Teuchos::ArrayView<const LO>::size_type length = neighOfINode.size(); supernode = new MueLu_SuperNode; try { supernode->list = Teuchos::arcp<int>(length+1); } catch (std::bad_alloc&) { TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "MueLu::LocalAggregationAlgorithm::CoarsenUncoupled(): Error: couldn't allocate memory for supernode! length=" + Teuchos::toString(length)); } supernode->maxLength = length; supernode->length = 1; supernode->list[0] = iNode; int selectFlag = 1; { /*--------------------------------------------------- */ /* count the no. of neighbors having been aggregated */ /*--------------------------------------------------- */ int count = 0; for (typename Teuchos::ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int index = *it; if ( index < nRows ) { if ( aggStat[index] == READY || aggStat[index] == NOTSEL ) supernode->list[supernode->length++] = index; else count++; } } /*--------------------------------------------------- */ /* if there are too many neighbors aggregated or the */ /* number of nodes in the new aggregate is too few, */ /* don't do this one */ /*--------------------------------------------------- */ if ( count > GetMaxNeighAlreadySelected() ) selectFlag = 0; } // Note: the supernode length is actually 1 more than the // number of nodes in the candidate aggregate. The // root is counted twice. I'm not sure if this is // a bug or a feature ... so I'll leave it and change // < to <= in the if just below. if (selectFlag != 1 || supernode->length <= GetMinNodesPerAggregate()) { aggStat[iNode] = NOTSEL; delete supernode; if ( ordering_ == GRAPH ) /* if graph ordering */ { for (typename Teuchos::ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int index = *it; if ( index < nRows && aggStat[index] == READY ) { nodeList->Add(index); } } } } else { aggregates.SetIsRoot(iNode); for ( int j = 0; j < supernode->length; ++j ) { int jNode = supernode->list[j]; aggStat[jNode] = SELECTED; vertex2AggId[jNode] = nAggregates; if ( ordering_ == GRAPH ) /* if graph ordering */ { Teuchos::ArrayView<const LO> neighOfJNode = graph.getNeighborVertices(jNode); for (typename Teuchos::ArrayView<const LO>::const_iterator it = neighOfJNode.begin(); it != neighOfJNode.end(); ++it) { int index = *it; if ( index < nRows && aggStat[index] == READY ) { nodeList->Add(index); } } } } supernode->next = NULL; supernode->index = nAggregates; if ( nAggregates == 0 ) { aggHead = supernode; aggCurrent = supernode; } else { aggCurrent->next = supernode; aggCurrent = supernode; } nAggregates++; // unused aggCntArray[nAggregates] = supernode->length; } } } // end of 'for' // views on distributed vectors are freed here. } // end of 'main loop' nodeList = Teuchos::null; /* Update aggregate object */ aggregates.SetNumAggregates(nAggregates); /* Verbose */ { const RCP<const Teuchos::Comm<int> > & comm = graph.GetComm(); if (IsPrint(Warnings0)) { GO localReady=0, globalReady; // Compute 'localReady' for ( my_size_t i = 0; i < nRows; ++i ) if (aggStat[i] == READY) localReady++; // Compute 'globalReady' sumAll(comm, localReady, globalReady); if(globalReady > 0) GetOStream(Warnings0) << "Warning: " << globalReady << " READY nodes left" << std::endl; } if (IsPrint(Statistics1)) { // Compute 'localSelected' LO localSelected=0; for ( my_size_t i = 0; i < nRows; ++i ) if ( aggStat[i] == SELECTED ) localSelected++; // Compute 'globalSelected' GO globalSelected; sumAll(comm, (GO)localSelected, globalSelected); // Compute 'globalNRows' GO globalNRows; sumAll(comm, (GO)nRows, globalNRows); GetOStream(Statistics1) << "Nodes aggregated = " << globalSelected << " (" << globalNRows << ")" << std::endl; } if (IsPrint(Statistics1)) { GO nAggregatesGlobal; sumAll(comm, (GO)nAggregates, nAggregatesGlobal); GetOStream(Statistics1) << "Total aggregates = " << nAggregatesGlobal << std::endl; } } // verbose /* ------------------------------------------------------------- */ /* clean up */ /* ------------------------------------------------------------- */ aggCurrent = aggHead; while ( aggCurrent != NULL ) { supernode = aggCurrent; aggCurrent = aggCurrent->next; delete supernode; } } // CoarsenUncoupled
void UncoupledAggregationFactory<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Build", currentLevel); const ParameterList& pL = GetParameterList(); bDefinitionPhase_ = false; // definition phase is finished, now all aggregation algorithm information is fixed bool bUseOnePtAggregationAlgorithm = pL.get<bool>("UseOnePtAggregationAlgorithm"); bool bUseSmallAggregationAlgorithm = pL.get<bool>("UseSmallAggregatesAggregationAlgorithm"); bool bUsePreserveDirichletAggregationAlgorithm = pL.get<bool>("UsePreserveDirichletAggregationAlgorithm"); bool bUseUncoupledAggregationAglorithm = pL.get<bool>("UseUncoupledAggregationAlgorithm"); bool bUseMaxLinkAggregationAlgorithm = pL.get<bool>("UseMaxLinkAggregationAlgorithm"); bool bUseIsolatedNodeAggregationAglorithm = pL.get<bool>("UseIsolatedNodeAggregationAlgorithm"); bool bUseEmergencyAggregationAlgorithm = pL.get<bool>("UseEmergencyAggregationAlgorithm"); // define aggregation algorithms RCP<const FactoryBase> graphFact = GetFactory("Graph"); // TODO Can we keep different aggregation algorithms over more Build calls? algos_.clear(); if (bUseOnePtAggregationAlgorithm) algos_.push_back(rcp(new OnePtAggregationAlgorithm (graphFact))); if (bUseSmallAggregationAlgorithm) algos_.push_back(rcp(new SmallAggregationAlgorithm (graphFact))); if (bUseUncoupledAggregationAglorithm) algos_.push_back(rcp(new UncoupledAggregationAlgorithm (graphFact))); if (bUseMaxLinkAggregationAlgorithm) algos_.push_back(rcp(new MaxLinkAggregationAlgorithm (graphFact))); if (bUsePreserveDirichletAggregationAlgorithm) algos_.push_back(rcp(new PreserveDirichletAggregationAlgorithm (graphFact))); if (bUseIsolatedNodeAggregationAglorithm) algos_.push_back(rcp(new IsolatedNodeAggregationAlgorithm (graphFact))); if (bUseEmergencyAggregationAlgorithm) algos_.push_back(rcp(new EmergencyAggregationAlgorithm (graphFact))); std::string mapOnePtName = pL.get<std::string>("OnePt aggregate map name"), mapSmallAggName = pL.get<std::string>("SmallAgg aggregate map name"); RCP<const Map> OnePtMap, SmallAggMap; if (mapOnePtName.length()) { RCP<const FactoryBase> mapOnePtFact = GetFactory("OnePt aggregate map factory"); OnePtMap = currentLevel.Get<RCP<const Map> >(mapOnePtName, mapOnePtFact.get()); } if (mapSmallAggName.length()) { RCP<const FactoryBase> mapSmallAggFact = GetFactory("SmallAgg aggregate map factory"); SmallAggMap = currentLevel.Get<RCP<const Map> >(mapSmallAggName, mapSmallAggFact.get()); } RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph"); // Build RCP<Aggregates> aggregates = rcp(new Aggregates(*graph)); aggregates->setObjectLabel("UC"); const LO nRows = graph->GetNodeNumVertices(); // construct aggStat information std::vector<unsigned> aggStat(nRows, NodeStats::READY); ArrayRCP<const bool> dirichletBoundaryMap = graph->GetBoundaryNodeMap(); if (dirichletBoundaryMap != Teuchos::null) { for (LO i = 0; i < nRows; i++) if (dirichletBoundaryMap[i] == true) aggStat[i] = NodeStats::BOUNDARY; } LO nDofsPerNode = Get<LO>(currentLevel, "DofsPerNode"); GO indexBase = graph->GetDomainMap()->getIndexBase(); if (SmallAggMap != Teuchos::null || OnePtMap != Teuchos::null) { for (LO i = 0; i < nRows; i++) { // reconstruct global row id (FIXME only works for contiguous maps) GO grid = (graph->GetDomainMap()->getGlobalElement(i)-indexBase) * nDofsPerNode + indexBase; if (SmallAggMap != null) { for (LO kr = 0; kr < nDofsPerNode; kr++) { if (SmallAggMap->isNodeGlobalElement(grid + kr)) aggStat[i] = MueLu::NodeStats::SMALLAGG; } } if (OnePtMap != null) { for (LO kr = 0; kr < nDofsPerNode; kr++) { if (OnePtMap->isNodeGlobalElement(grid + kr)) aggStat[i] = MueLu::NodeStats::ONEPT; } } } } const RCP<const Teuchos::Comm<int> > comm = graph->GetComm(); GO numGlobalRows = 0; if (IsPrint(Statistics1)) sumAll(comm, as<GO>(nRows), numGlobalRows); LO numNonAggregatedNodes = nRows; GO numGlobalAggregatedPrev = 0, numGlobalAggsPrev = 0; for (size_t a = 0; a < algos_.size(); a++) { std::string phase = algos_[a]->description(); SubFactoryMonitor sfm(*this, "Algo \"" + phase + "\"", currentLevel); algos_[a]->BuildAggregates(pL, *graph, *aggregates, aggStat, numNonAggregatedNodes); if (IsPrint(Statistics1)) { GO numLocalAggregated = nRows - numNonAggregatedNodes, numGlobalAggregated = 0; GO numLocalAggs = aggregates->GetNumAggregates(), numGlobalAggs = 0; sumAll(comm, numLocalAggregated, numGlobalAggregated); sumAll(comm, numLocalAggs, numGlobalAggs); double aggPercent = 100*as<double>(numGlobalAggregated)/as<double>(numGlobalRows); GetOStream(Statistics1) << " aggregated : " << (numGlobalAggregated - numGlobalAggregatedPrev) << " (phase), " << std::fixed << std::setprecision(2) << numGlobalAggregated << "/" << numGlobalRows << " [" << aggPercent << "%] (total)\n" << " remaining : " << numGlobalRows - numGlobalAggregated << "\n" << " aggregates : " << numGlobalAggs-numGlobalAggsPrev << " (phase), " << numGlobalAggs << " (total)" << std::endl; numGlobalAggregatedPrev = numGlobalAggregated; numGlobalAggsPrev = numGlobalAggs; } } TEUCHOS_TEST_FOR_EXCEPTION(numNonAggregatedNodes, Exceptions::RuntimeError, "MueLu::UncoupledAggregationFactory::Build: Leftover nodes found! Error!"); aggregates->AggregatesCrossProcessors(false); Set(currentLevel, "Aggregates", aggregates); GetOStream(Statistics0) << aggregates->description() << std::endl; }
int main(int argc, char **argv) { int numbers[5] = {3, 4, 1, 7, 4}; int sum = sumAll(numbers, 5); printf("sum is: %d\n", sum); return 0; }
Int_t main(Int_t argc, Char_t *argv[]) { ROOT::Mpi::TEnvironment env(argc, argv); ROOT::Mpi::TIntraCommunicator world; TVectorT<Double_t> mResult; Double_t fScalarResult; TVectorT<Double_t> v1(elements); TVectorT<Double_t> v2(elements); for (Int_t i = 0; i < elements; i++) { v1[i] = i + (i + world.Size()); v2[i] = i * (i + world.Size()); } /////////////////////////////////////////////// //Testing methdos with results in single Rank// /////////////////////////////////////////////// ROOT::Mpi::Math::TVectorTWrapper<Double_t> add(v1); add.Addition(v2, root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> sub(v1); sub.Subtraction(v2, root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> dot(v1); dot.Dot(v2, root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> norm2Sqr(v1); norm2Sqr.Norm2Sqr(root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> norm1(v1); norm1.Norm1(root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> min(v1); min.Min(root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> max(v1); max.Max(root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> normalize(v1); normalize.Normalize(root); ROOT::Mpi::Math::TVectorTWrapper<Double_t> sum(v1); sum.Sum(root); if (world.Rank() == root) { add.GetResult(mResult); MpiCompareTVectorTest(mResult, v1 + v2, world.Rank(), "Vector Addition Single"); sub.GetResult(mResult); MpiCompareTVectorTest(mResult, v1 - v2, world.Rank(), "Vector Subtraction Single"); dot.GetResult(fScalarResult); MpiCompareTest(fScalarResult, Dot(v1, v2) , world.Rank(), "Vector Dot Product Single"); norm2Sqr.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v1.Norm2Sqr() , world.Rank(), "Vector Norm2Sqr Single"); norm1.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v1.Norm1() , world.Rank(), "Vector Norm1 Single"); min.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v1.Min() , world.Rank(), "Vector Min Single"); max.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v1.Max() , world.Rank(), "Vector Max Single"); normalize.GetResult(mResult); MpiCompareTest(mResult.Norm2Sqr(), ((1 / TMath::Sqrt(v1.Norm2Sqr()))*v1).Norm2Sqr() , world.Rank(), "Vector Normalize Single"); sum.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v1.Sum(), world.Rank(), "Vector Sum Single"); } /////////////////////////////////////////////// //Testing methdos with results in all ranks // /////////////////////////////////////////////// ROOT::Mpi::Math::TVectorTWrapper<Double_t> addAll(v1); add.Addition(v2); ROOT::Mpi::Math::TVectorTWrapper<Double_t> subAll(v1); sub.Subtraction(v2); add.GetResult(mResult); MpiCompareTVectorTest(mResult, v1 + v2, world.Rank(), "Vector Addition All"); sub.GetResult(mResult); MpiCompareTVectorTest(mResult, v1 - v2, world.Rank(), "Vector Subtraction All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> dotAll(v1); dotAll.Dot(v2); dotAll.GetResult(fScalarResult); MpiCompareTest(fScalarResult, Dot(v1, v2) , world.Rank(), "Vector Dot Product All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> norm2SqrAll(v2); norm2SqrAll.Norm2Sqr(); norm2SqrAll.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v2.Norm2Sqr() , world.Rank(), "Vector Norm2Sqr All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> norm1All(v2); norm1All.Norm1(); norm1All.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v2.Norm1() , world.Rank(), "Vector Norm1 All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> minAll(v2); minAll.Min(); minAll.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v2.Min() , world.Rank(), "Vector Min All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> maxAll(v2); maxAll.Max(); maxAll.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v2.Max() , world.Rank(), "Vector Max All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> normalizeAll(v2); normalizeAll.Normalize(); normalizeAll.GetResult(mResult); //show if the vector is normalize, then Norm2Sqr of result is near to 1 MpiCompareTest(mResult.Norm2Sqr(), ((1 / TMath::Sqrt(v2.Norm2Sqr()))*v2).Norm2Sqr() , world.Rank(), "Vector Normalize All"); ROOT::Mpi::Math::TVectorTWrapper<Double_t> sumAll(v2); sumAll.Sum(); sumAll.GetResult(fScalarResult); MpiCompareTest(fScalarResult, v2.Sum() , world.Rank(), "Vector Sum All"); return 0; }
void LeftoverAggregationAlgorithm<LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::AggregateLeftovers(GraphBase const &graph, Aggregates &aggregates) const { Monitor m(*this, "AggregateLeftovers"); my_size_t nVertices = graph.GetNodeNumVertices(); int exp_nRows = aggregates.GetMap()->getNodeNumElements(); // Tentative fix... was previously exp_nRows = nVertices + graph.GetNodeNumGhost(); int myPid = graph.GetComm()->getRank(); my_size_t nAggregates = aggregates.GetNumAggregates(); int minNodesPerAggregate = GetMinNodesPerAggregate(); const RCP<const Map> nonUniqueMap = aggregates.GetMap(); //column map of underlying graph const RCP<const Map> uniqueMap = graph.GetDomainMap(); MueLu::CoupledAggregationCommHelper<LO,GO,NO,LMO> myWidget(uniqueMap, nonUniqueMap); //TODO JJH We want to skip this call RCP<Xpetra::Vector<double,LO,GO,NO> > distWeights = Xpetra::VectorFactory<double,LO,GO,NO>::Build(nonUniqueMap); // Aggregated vertices not "definitively" assigned to processors are // arbitrated by ArbitrateAndCommunicate(). There is some // additional logic to prevent losing root nodes in arbitration. { ArrayRCP<const LO> vertex2AggId = aggregates.GetVertex2AggId()->getData(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); for (size_t i=0;i<nonUniqueMap->getNodeNumElements();i++) { if (procWinner[i] == MUELU_UNASSIGNED) { if (vertex2AggId[i] != MUELU_UNAGGREGATED) { weights[i] = 1.; if (aggregates.IsRoot(i)) weights[i] = 2.; } } } // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); // All tentatively assigned vertices are now definitive // Tentatively assign any vertex (ghost or local) which neighbors a root // to the aggregate associated with the root. { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); for (my_size_t i = 0; i < nVertices; i++) { if ( aggregates.IsRoot(i) && (procWinner[i] == myPid) ) { // neighOfINode is the neighbor node list of node 'i'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int colj = *it; if (vertex2AggId[colj] == MUELU_UNAGGREGATED) { weights[colj]= 1.; vertex2AggId[colj] = vertex2AggId[i]; } } } } // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); // All tentatively assigned vertices are now definitive // Record the number of aggregated vertices GO total_phase_one_aggregated = 0; { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); GO phase_one_aggregated = 0; for (my_size_t i = 0; i < nVertices; i++) { if (vertex2AggId[i] != MUELU_UNAGGREGATED) phase_one_aggregated++; } sumAll(graph.GetComm(), phase_one_aggregated, total_phase_one_aggregated); GO local_nVertices = nVertices, total_nVertices = 0; sumAll(graph.GetComm(), local_nVertices, total_nVertices); /* Among unaggregated points, see if we can make a reasonable size */ /* aggregate out of it. We do this by looking at neighbors and seeing */ /* how many are unaggregated and on my processor. Loosely, */ /* base the number of new aggregates created on the percentage of */ /* unaggregated nodes. */ ArrayRCP<double> weights = distWeights->getDataNonConst(0); double factor = 1.; factor = ((double) total_phase_one_aggregated)/((double)(total_nVertices + 1)); factor = pow(factor, GetPhase3AggCreation()); for (my_size_t i = 0; i < nVertices; i++) { if (vertex2AggId[i] == MUELU_UNAGGREGATED) { // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); int rowi_N = neighOfINode.size(); int nonaggd_neighbors = 0; for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int colj = *it; if (vertex2AggId[colj] == MUELU_UNAGGREGATED && colj < nVertices) nonaggd_neighbors++; } if ( (nonaggd_neighbors > minNodesPerAggregate) && (((double) nonaggd_neighbors)/((double) rowi_N) > factor)) { vertex2AggId[i] = (nAggregates)++; for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int colj = *it; if (vertex2AggId[colj]==MUELU_UNAGGREGATED) { vertex2AggId[colj] = vertex2AggId[i]; if (colj < nVertices) weights[colj] = 2.; else weights[colj] = 1.; } } aggregates.SetIsRoot(i); weights[i] = 2.; } } } // for (i = 0; i < nVertices; i++) // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); //All tentatively assigned vertices are now definitive if (IsPrint(Statistics1)) { GO Nphase1_agg = nAggregates; GO total_aggs; sumAll(graph.GetComm(), Nphase1_agg, total_aggs); GetOStream(Statistics1, 0) << "Phase 1 - nodes aggregated = " << total_phase_one_aggregated << std::endl; GetOStream(Statistics1, 0) << "Phase 1 - total aggregates = " << total_aggs << std::endl; GO i = nAggregates - Nphase1_agg; { GO ii; sumAll(graph.GetComm(),i,ii); i = ii; } GetOStream(Statistics1, 0) << "Phase 3 - additional aggregates = " << i << std::endl; } // Determine vertices that are not shared by setting Temp to all ones // and doing NonUnique2NonUnique(..., ADD). This sums values of all // local copies associated with each Gid. Thus, sums > 1 are shared. // std::cout << "exp_nrows=" << exp_nRows << " (nVertices= " << nVertices << ", numGhost=" << graph.GetNodeNumGhost() << ")" << std::endl; // std::cout << "nonUniqueMap=" << nonUniqueMap->getNodeNumElements() << std::endl; RCP<Xpetra::Vector<double,LO,GO,NO> > temp_ = Xpetra::VectorFactory<double,LO,GO,NO> ::Build(nonUniqueMap,false); //no need to zero out vector in ctor temp_->putScalar(1.); RCP<Xpetra::Vector<double,LO,GO,NO> > tempOutput_ = Xpetra::VectorFactory<double,LO,GO,NO> ::Build(nonUniqueMap); myWidget.NonUnique2NonUnique(*temp_, *tempOutput_, Xpetra::ADD); std::vector<bool> gidNotShared(exp_nRows); { ArrayRCP<const double> tempOutput = tempOutput_->getData(0); for (int i = 0; i < exp_nRows; i++) { if (tempOutput[i] > 1.) gidNotShared[i] = false; else gidNotShared[i] = true; } } // Phase 4. double nAggregatesTarget; nAggregatesTarget = ((double) uniqueMap->getGlobalNumElements())* (((double) uniqueMap->getGlobalNumElements())/ ((double) graph.GetGlobalNumEdges())); GO nAggregatesLocal=nAggregates, nAggregatesGlobal; sumAll(graph.GetComm(), nAggregatesLocal, nAggregatesGlobal); LO minNAggs; minAll(graph.GetComm(), nAggregates, minNAggs); LO maxNAggs; maxAll(graph.GetComm(), nAggregates, maxNAggs); // // Only do this phase if things look really bad. THIS // CODE IS PRETTY EXPERIMENTAL // #define MUELU_PHASE4BUCKETS 6 if ((nAggregatesGlobal < graph.GetComm()->getSize()) && (2.5*nAggregatesGlobal < nAggregatesTarget) && (minNAggs ==0) && (maxNAggs <= 1)) { // Modify seed of the random algorithm used by temp_->randomize() { typedef Teuchos::ScalarTraits<double> scalarTrait; // temp_ is of type double. scalarTrait::seedrandom(static_cast<unsigned int>(myPid*2 + (int) (11*scalarTrait::random()))); int k = (int)ceil( (10.*myPid)/graph.GetComm()->getSize()); for (int i = 0; i < k+7; i++) scalarTrait::random(); temp_->setSeed(static_cast<unsigned int>(scalarTrait::random())); } temp_->randomize(); ArrayRCP<double> temp = temp_->getDataNonConst(0); // build a list of candidate root nodes (vertices not adjacent // to aggregated vertices) my_size_t nCandidates = 0; global_size_t nCandidatesGlobal; ArrayRCP<LO> candidates = Teuchos::arcp<LO>(nVertices+1); double priorThreshold = 0.; for (int kkk = 0; kkk < MUELU_PHASE4BUCKETS; kkk++) { { ArrayRCP<const LO> vertex2AggId = aggregates.GetVertex2AggId()->getData(0); ArrayView<const LO> vertex2AggIdView = vertex2AggId(); RootCandidates(nVertices, vertex2AggIdView, graph, candidates, nCandidates, nCandidatesGlobal); // views on distributed vectors are freed here. } double nTargetNewGuys = nAggregatesTarget - nAggregatesGlobal; double threshold = priorThreshold + (1. - priorThreshold)*nTargetNewGuys/(nCandidatesGlobal + .001); threshold = (threshold*(kkk+1.))/((double) MUELU_PHASE4BUCKETS); priorThreshold = threshold; { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); for (int k = 0; k < nCandidates; k++ ) { int i = candidates[k]; if ((vertex2AggId[i] == MUELU_UNAGGREGATED) && (fabs(temp[i]) < threshold)) { // Note: priorThreshold <= fabs(temp[i]) <= 1 // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); if (neighOfINode.size() > minNodesPerAggregate) { //TODO: check if this test is exactly was we want to do int count = 0; for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; // This might not be true if someone close to i // is chosen as a root via fabs(temp[]) < Threshold if (vertex2AggId[Adjacent] == MUELU_UNAGGREGATED){ count++; vertex2AggId[Adjacent] = nAggregates; weights[Adjacent] = 1.; } } if (count >= minNodesPerAggregate) { vertex2AggId[i] = nAggregates++; weights[i] = 2.; aggregates.SetIsRoot(i); } else { // undo things for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; if (vertex2AggId[Adjacent] == nAggregates){ vertex2AggId[Adjacent] = MUELU_UNAGGREGATED; weights[Adjacent] = 0.; } } } } } } // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); // All tentatively assigned vertices are now definitive nAggregatesLocal=nAggregates; sumAll(graph.GetComm(), nAggregatesLocal, nAggregatesGlobal); // check that there are no aggregates sizes below minNodesPerAggregate aggregates.SetNumAggregates(nAggregates); RemoveSmallAggs(aggregates, minNodesPerAggregate, distWeights, myWidget); nAggregates = aggregates.GetNumAggregates(); } // one possibility } // Initialize things for Phase 5. This includes building the transpose // of the matrix ONLY for transposed rows that correspond to unaggregted // ghost vertices. Further, the transpose is only a local transpose. // Nonzero edges which exist on other processors are not represented. int observedNAgg=-1; //number of aggregates that contain vertices on this process { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); for(LO k = 0; k < vertex2AggId.size(); ++k ) if(vertex2AggId[k]>observedNAgg) observedNAgg=vertex2AggId[k]; observedNAgg++; } ArrayRCP<int> Mark = Teuchos::arcp<int>(exp_nRows+1); ArrayRCP<int> agg_incremented = Teuchos::arcp<int>(observedNAgg); ArrayRCP<int> SumOfMarks = Teuchos::arcp<int>(observedNAgg); for (int i = 0; i < exp_nRows; i++) Mark[i] = MUELU_DISTONE_VERTEX_WEIGHT; for (int i = 0; i < agg_incremented.size(); i++) agg_incremented[i] = 0; for (int i = 0; i < SumOfMarks.size(); i++) SumOfMarks[i] = 0; // Grab the transpose matrix graph for unaggregated ghost vertices. // a) count the number of nonzeros per row in the transpose std::vector<int> RowPtr(exp_nRows+1-nVertices); //{ ArrayRCP<const LO> vertex2AggIdCst = aggregates.GetVertex2AggId()->getData(0); for (int i = nVertices; i < exp_nRows; i++) RowPtr[i-nVertices] = 0; for (int i = 0; i < nVertices; i++) { // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int j = *it; if ( (j >= nVertices) && (vertex2AggIdCst[j] == MUELU_UNAGGREGATED)){ RowPtr[j-nVertices]++; } } } // b) Convert RowPtr[i] to point to 1st first nnz spot in row i. int iSum = 0, iTemp; for (int i = nVertices; i < exp_nRows; i++) { iTemp = RowPtr[i-nVertices]; RowPtr[i-nVertices] = iSum; iSum += iTemp; } RowPtr[exp_nRows-nVertices] = iSum; std::vector<LO> cols(iSum+1); // c) Traverse matrix and insert entries in proper location. for (int i = 0; i < nVertices; i++) { // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int j = *it; if ( (j >= nVertices) && (vertex2AggIdCst[j] == MUELU_UNAGGREGATED)){ cols[RowPtr[j-nVertices]++] = i; } } } // d) RowPtr[i] points to beginning of row i+1 so shift by one location. for (int i = exp_nRows; i > nVertices; i--) RowPtr[i-nVertices] = RowPtr[i-1-nVertices]; RowPtr[0] = 0; // views on distributed vectors are freed here. vertex2AggIdCst = Teuchos::null; //} int bestScoreCutoff; int thresholds[10] = {300,200,100,50,25,13,7,4,2,0}; // Stick unaggregated vertices into existing aggregates as described above. { int ncalls=0; for (int kk = 0; kk < 10; kk += 2) { bestScoreCutoff = thresholds[kk]; ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); for (int i = 0; i < exp_nRows; i++) { if (vertex2AggId[i] == MUELU_UNAGGREGATED) { // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode; // Grab neighboring vertices which is either in graph for local ids // or sits in transposed fragment just constructed above for ghosts. if (i < nVertices) { neighOfINode = graph.getNeighborVertices(i); } else { LO *rowi_col = NULL, rowi_N; rowi_col = &(cols[RowPtr[i-nVertices]]); rowi_N = RowPtr[i+1-nVertices] - RowPtr[i-nVertices]; neighOfINode = ArrayView<const LO>(rowi_col, rowi_N); } for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; int AdjacentAgg = vertex2AggId[Adjacent]; //Adjacent is aggregated and either I own the aggregate // or I could own the aggregate after arbitration. if ((AdjacentAgg != MUELU_UNAGGREGATED) && ((procWinner[Adjacent] == myPid) || (procWinner[Adjacent] == MUELU_UNASSIGNED))){ SumOfMarks[AdjacentAgg] += Mark[Adjacent]; } } int best_score = MUELU_NOSCORE; int best_agg = -1; int BestMark = -1; bool cannotLoseAllFriends=false; // Used to address possible loss of vertices in arbitration of shared nodes discussed above. (Initialized to false only to avoid a compiler warning). for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; int AdjacentAgg = vertex2AggId[Adjacent]; //Adjacent is unaggregated, has some value and no //other processor has definitively claimed him if ((AdjacentAgg != MUELU_UNAGGREGATED) && (SumOfMarks[AdjacentAgg] != 0) && ((procWinner[Adjacent] == myPid) || (procWinner[Adjacent] == MUELU_UNASSIGNED ))) { // first figure out the penalty associated with // AdjacentAgg having already been incremented // during this phase, then compute score. double penalty = (double) (INCR_SCALING*agg_incremented[AdjacentAgg]); if (penalty > MUELU_PENALTYFACTOR*((double)SumOfMarks[AdjacentAgg])) penalty = MUELU_PENALTYFACTOR*((double)SumOfMarks[AdjacentAgg]); int score = SumOfMarks[AdjacentAgg]- ((int) floor(penalty)); if (score > best_score) { best_agg = AdjacentAgg; best_score = score; BestMark = Mark[Adjacent]; cannotLoseAllFriends = false; // This address issue mentioned above by checking whether // Adjacent could be lost in arbitration. weight==0 means that // Adjacent was not set during this loop of Phase 5 (and so it // has already undergone arbitration). GidNotShared == true // obviously implies that Adjacent cannot be lost to arbitration if ((weights[Adjacent]== 0.) || (gidNotShared[Adjacent] == true)) cannotLoseAllFriends = true; } // Another vertex within current best aggregate found. // We should have (best_score == score). We need to see // if we can improve BestMark and cannotLoseAllFriends. else if (best_agg == AdjacentAgg) { if ((weights[Adjacent]== 0.) || (gidNotShared[Adjacent] == true)) cannotLoseAllFriends = true; if (Mark[Adjacent] > BestMark) BestMark = Mark[Adjacent]; } } } // Clean up for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int Adjacent = *it; int AdjacentAgg = vertex2AggId[Adjacent]; if (AdjacentAgg >= 0) SumOfMarks[AdjacentAgg] = 0; } // Tentatively assign vertex to best_agg. if ( (best_score >= bestScoreCutoff) && (cannotLoseAllFriends)) { TEUCHOS_TEST_FOR_EXCEPTION(best_agg == -1 || BestMark == -1, MueLu::Exceptions::RuntimeError, "MueLu::CoupledAggregationFactory internal error"); // should never happen vertex2AggId[i] = best_agg; weights[i] = best_score; agg_incremented[best_agg]++; Mark[i] = (int) ceil( ((double) BestMark)/2.); } } // views on distributed vectors are freed here. } vertex2AggId = Teuchos::null; procWinner = Teuchos::null; weights = Teuchos::null; ++ncalls; //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, true); // All tentatively assigned vertices are now definitive } // if (graph.GetComm()->getRank()==0) // std::cout << "#calls to Arb&Comm=" << ncalls << std::endl; } // Phase 6: Aggregate remain unaggregated vertices and try at all costs // to avoid small aggregates. // One case where we can find ourselves in this situation // is if all vertices vk adjacent to v have already been // put in other processor's aggregates and v does not have // a direct connection to a local vertex in any of these // aggregates. int Nleftover = 0, Nsingle = 0; { ArrayRCP<LO> vertex2AggId = aggregates.GetVertex2AggId()->getDataNonConst(0); ArrayRCP<double> weights = distWeights->getDataNonConst(0); ArrayRCP<const LO> procWinner = aggregates.GetProcWinner()->getData(0); int count = 0; for (my_size_t i = 0; i < nVertices; i++) { if (vertex2AggId[i] == MUELU_UNAGGREGATED) { Nleftover++; // neighOfINode is the neighbor node list of node 'iNode'. ArrayView<const LO> neighOfINode = graph.getNeighborVertices(i); // We don't want too small of an aggregate. So lets see if there is an // unaggregated neighbor that we can also put with this vertex vertex2AggId[i] = nAggregates; weights[i] = 1.; if (count == 0) aggregates.SetIsRoot(i); count++; for (typename ArrayView<const LO>::const_iterator it = neighOfINode.begin(); it != neighOfINode.end(); ++it) { int j = *it; if ((j != i)&&(vertex2AggId[j] == MUELU_UNAGGREGATED)&& (j < nVertices)) { vertex2AggId[j] = nAggregates; weights[j] = 1.; count++; } } if ( count >= minNodesPerAggregate) { nAggregates++; count = 0; } } } // We have something which is under minNodesPerAggregate when if (count != 0) { #ifdef FIXME // Can stick small aggregate with 0th aggregate? if (nAggregates > 0) { for (my_size_t i = 0; i < nVertices; i++) { if ((vertex2AggId[i] == nAggregates) && (procWinner[i] == myPid)) { vertex2AggId[i] = 0; aggregates.SetIsRoot(i,false); } } } else { Nsingle++; nAggregates++; } #else // Can stick small aggregate with 0th aggregate? if (nAggregates > 0) { for (my_size_t i = 0; i < nVertices; i++) { // TW: This is not a real fix. This may produce ugly bad aggregates! // I removed the procWinner[i] == myPid check. it makes no sense to me since // it leaves vertex2AggId[i] == nAggregates -> crash in ComputeAggregateSizes(). // Maybe it's better to add the leftovers to the last generated agg on the current proc. // The best solution would be to add them to the "next"/nearest aggregate, that may be // on an other processor if (vertex2AggId[i] == nAggregates) { vertex2AggId[i] = nAggregates-1; //0; aggregates.SetIsRoot(i,false); } } } else { Nsingle++; nAggregates++; } #endif } // views on distributed vectors are freed here. } //TODO JJH We want to skip this call myWidget.ArbitrateAndCommunicate(*distWeights, aggregates, false); if (IsPrint(Statistics1)) { GO total_Nsingle=0; sumAll(graph.GetComm(), (GO)Nsingle, total_Nsingle); GO total_Nleftover=0; sumAll(graph.GetComm(), (GO)Nleftover, total_Nleftover); // GO total_aggs; sumAll(graph.GetComm(), (GO)nAggregates, total_aggs); // GetOStream(Statistics1, 0) << "Phase 6 - total aggregates = " << total_aggs << std::endl; GetOStream(Statistics1, 0) << "Phase 6 - leftovers = " << total_Nleftover << " and singletons = " << total_Nsingle << std::endl; } aggregates.SetNumAggregates(nAggregates); } //AggregateLeftovers
void AlgebraicPermutationStrategy<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildPermutation(const Teuchos::RCP<Matrix> & A, const Teuchos::RCP<const Map> permRowMap, Level & currentLevel, const FactoryBase* genFactory) const { #ifndef HAVE_MUELU_INST_COMPLEX_INT_INT const Teuchos::RCP< const Teuchos::Comm< int > > comm = A->getRowMap()->getComm(); int numProcs = comm->getSize(); int myRank = comm->getRank(); /*if( permRowMap == Teuchos::null ) { permRowMap = A->getRowMap(); // use full row map of A }*/ size_t nDofsPerNode = 1; if (A->IsView("stridedMaps")) { Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps"); nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize(); } //GetOStream(Runtime0, 0) << "Perform generation of permutation operators on " << mapName_ << " map with " << permRowMap->getGlobalNumElements() << " elements" << std::endl; std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates; std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries; std::vector<Scalar> Weights; // loop over all local rows in matrix A and keep diagonal entries if corresponding // matrix rows are not contained in permRowMap for (size_t row = 0; row < A->getRowMap()->getNodeNumElements(); row++) { GlobalOrdinal grow = A->getRowMap()->getGlobalElement(row); if(permRowMap->isNodeGlobalElement(grow) == true) continue; size_t nnz = A->getNumEntriesInLocalRow(row); // extract local row information from matrix Teuchos::ArrayView<const LocalOrdinal> indices; Teuchos::ArrayView<const Scalar> vals; A->getLocalRowView(row, indices, vals); TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error."); // find column entry with max absolute value GlobalOrdinal gMaxValIdx = 0; Scalar norm1 = 0.0; Scalar maxVal = 0.0; for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) { norm1 += std::abs(vals[j]); if(std::abs(vals[j]) > maxVal) { maxVal = std::abs(vals[j]); gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]); } } if(grow == gMaxValIdx) // only keep row/col pair if it's diagonal dominant!!! keepDiagonalEntries.push_back(std::make_pair(grow,grow)); } ////////// // handle rows that are marked to be relevant for permutations for (size_t row = 0; row < permRowMap->getNodeNumElements(); row++) { GlobalOrdinal grow = permRowMap->getGlobalElement(row); LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow); size_t nnz = A->getNumEntriesInLocalRow(lArow); // extract local row information from matrix Teuchos::ArrayView<const LocalOrdinal> indices; Teuchos::ArrayView<const Scalar> vals; A->getLocalRowView(lArow, indices, vals); TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error."); // find column entry with max absolute value GlobalOrdinal gMaxValIdx = 0; Scalar norm1 = 0.0; Scalar maxVal = 0.0; for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) { norm1 += std::abs(vals[j]); if(std::abs(vals[j]) > maxVal) { maxVal = std::abs(vals[j]); gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]); } } if(std::abs(maxVal) > 0.0) { // keep only max Entries \neq 0.0 permutedDiagCandidates.push_back(std::make_pair(grow,gMaxValIdx)); Weights.push_back(maxVal/(norm1*Teuchos::as<Scalar>(nnz))); } else { std::cout << "ATTENTION: row " << grow << " has only zero entries -> singular matrix!" << std::endl; } } // sort Weights in descending order std::vector<int> permutation; sortingPermutation(Weights,permutation); // create new vector with exactly one possible entry for each column // each processor which requests the global column id gcid adds 1 to gColVec // gColVec will be summed up over all processors and communicated to gDomVec // which is based on the non-overlapping domain map of A. Teuchos::RCP<Vector> gColVec = VectorFactory::Build(A->getColMap()); Teuchos::RCP<Vector> gDomVec = VectorFactory::Build(A->getDomainMap()); gColVec->putScalar(0.0); gDomVec->putScalar(0.0); // put in all keep diagonal entries for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) { gColVec->sumIntoGlobalValue((*p).second,1.0); } Teuchos::RCP<Export> exporter = ExportFactory::Build(gColVec->getMap(), gDomVec->getMap()); gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); // communicate blocked gcolids to all procs gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered; // TODO reserve memory std::map<GlobalOrdinal, Scalar> gColId2Weight; Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0); for(size_t i = 0; i < permutedDiagCandidates.size(); ++i) { // loop over all candidates std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]]; GlobalOrdinal grow = pp.first; GlobalOrdinal gcol = pp.second; LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol); //Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0); if(ddata[lcol] > 0.0){ continue; // skip lcol: column already handled by another row } // mark column as already taken ddata[lcol]++; permutedDiagCandidatesFiltered.push_back(std::make_pair(grow,gcol)); gColId2Weight[gcol] = Weights[permutation[i]]; } // communicate how often each column index is requested by the different procs gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); // probably not needed // TODO check me //***************************************************************************************** // first communicate ALL global ids of column indices which are requested by more // than one proc to all other procs // detect which global col indices are requested by more than one proc // and store them in the multipleColRequests vector std::vector<GlobalOrdinal> multipleColRequests; // store all global column indices from current processor that are also // requested by another processor. This is possible, since they are stored // in gDomVec which is based on the nonoverlapping domain map. That is, each // global col id is handled by exactly one proc. std::queue<GlobalOrdinal> unusedColIdx; // unused column indices on current processor for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) { Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0); if(arrDomVec[sz] > 1.0) { multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz)); } else if(arrDomVec[sz] == 0.0) { unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz)); } } // communicate the global number of column indices which are requested by more than one proc LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size()); LocalOrdinal globalMultColRequests = 0; // sum up all entries in multipleColRequests over all processors sumAll(gDomVec->getMap()->getComm(), (LocalOrdinal)localMultColRequests, globalMultColRequests); if(globalMultColRequests > 0) { // special handling: two processors request the same global column id. // decide which processor gets it // distribute number of multipleColRequests to all processors // each processor stores how many column ids for exchange are handled by the cur proc std::vector<GlobalOrdinal> numMyMultColRequests(numProcs,0); std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs,0); numMyMultColRequests[myRank] = localMultColRequests; Teuchos::reduceAll(*comm,Teuchos::REDUCE_MAX,numProcs,&numMyMultColRequests[0],&numGlobalMultColRequests[0]); // communicate multipleColRequests entries to all processors int nMyOffset = 0; for (int i=0; i<myRank-1; i++) nMyOffset += numGlobalMultColRequests[i]; // calculate offset to store the weights on the corresponding place in procOverlappingWeights GlobalOrdinal zero=0; std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests,zero); std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests,zero); // loop over all local column GIDs that are also requested by other procs for(size_t i = 0; i < multipleColRequests.size(); i++) { procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i]; // all weights are > 0 ? } // template ordinal, package (double) Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(globalMultColRequests), &procMultRequestedColIds[0], &global_procMultRequestedColIds[0]); // loop over global_procOverlappingWeights and eliminate wrong entries... for (size_t k = 0; k<global_procMultRequestedColIds.size(); k++) { GlobalOrdinal globColId = global_procMultRequestedColIds[k]; std::vector<Scalar> MyWeightForColId(numProcs,0); std::vector<Scalar> GlobalWeightForColId(numProcs,0); if(gColVec->getMap()->isNodeGlobalElement(globColId)) { MyWeightForColId[myRank] = gColId2Weight[globColId]; } else { MyWeightForColId[myRank] = 0.0; } Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &MyWeightForColId[0], &GlobalWeightForColId[0]); if(gColVec->getMap()->isNodeGlobalElement(globColId)) { // note: 2 procs could have the same weight for a column index. // pick the first one. Scalar winnerValue = 0.0; int winnerProcRank = 0; for (int proc = 0; proc < numProcs; proc++) { if(GlobalWeightForColId[proc] > winnerValue) { winnerValue = GlobalWeightForColId[proc]; winnerProcRank = proc; } } // winnerProcRank is the winner for handling globColId. // winnerProcRank is unique (even if two procs have the same weight for a column index) if(myRank != winnerProcRank) { // remove corresponding entry from permutedDiagCandidatesFiltered typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin(); while(p != permutedDiagCandidatesFiltered.end() ) { if((*p).second == globColId) p = permutedDiagCandidatesFiltered.erase(p); else p++; } } } // end if isNodeGlobalElement } // end loop over global_procOverlappingWeights and eliminate wrong entries... } // end if globalMultColRequests > 0 // put together all pairs: //size_t sizeRowColPairs = keepDiagonalEntries.size() + permutedDiagCandidatesFiltered.size(); std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs; RowColPairs.insert( RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end()); RowColPairs.insert( RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end()); #ifdef DEBUG_OUTPUT //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& // plausibility check gColVec->putScalar(0.0); gDomVec->putScalar(0.0); typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin(); while(pl != RowColPairs.end() ) { //GlobalOrdinal ik = (*pl).first; GlobalOrdinal jk = (*pl).second; gColVec->sumIntoGlobalValue(jk,1.0); pl++; } gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) { Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0); if(arrDomVec[sz] > 1.0) { GetOStream(Runtime0,0) << "RowColPairs has multiple column [" << sz << "]=" << arrDomVec[sz] << std::endl; } else if(arrDomVec[sz] == 0.0) { GetOStream(Runtime0,0) << "RowColPairs has empty column [" << sz << "]=" << arrDomVec[sz] << std::endl; } } //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& #endif ////////////////////////////////////////////////// // assumption: on each processor RowColPairs now contains // a valid set of (row,column) pairs, where the row entries // are a subset of the processor's rows and the column entries // are unique throughout all processors. // Note: the RowColPairs are only defined for a subset of all rows, // so there might be rows without an entry in RowColPairs. // It can be, that some rows seem to be missing in RowColPairs, since // the entry in that row with maximum absolute value has been reserved // by another row already (e.g. as already diagonal dominant row outside // of perRowMap). // In fact, the RowColPairs vector only defines the (row,column) pairs // that will be definitely moved to the diagonal after permutation. #ifdef DEBUG_OUTPUT // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) { // std::cout << "proc: " << myRank << " r/c: " << (*p).first << "/" << (*p).second << std::endl; // } // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) // { //// if((*p).first != (*p).second) std::cout << "difference: " << (*p).first << " " << (*p).second << std::endl; // std::cout << (*p).first +1 << " " << (*p).second+1 << std::endl; // } // std::cout << "\n"; #endif // vectors to store permutation information Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap()); Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map) Teuchos::RCP<Vector> lQperm = VectorFactory::Build(A->getColMap()); // local variant (based on column map) Teuchos::ArrayRCP< Scalar > PpermData = Pperm->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > QpermData = Qperm->getDataNonConst(0); Pperm->putScalar(0.0); Qperm->putScalar(0.0); lQperm->putScalar(0.0); // setup exporter for Qperm Teuchos::RCP<Export> QpermExporter = ExportFactory::Build(lQperm->getMap(), Qperm->getMap()); Teuchos::RCP<Vector> RowIdStatus = VectorFactory::Build(A->getRowMap()); Teuchos::RCP<Vector> ColIdStatus = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map) Teuchos::RCP<Vector> lColIdStatus = VectorFactory::Build(A->getColMap()); // local variant (based on column map) Teuchos::RCP<Vector> ColIdUsed = VectorFactory::Build(A->getDomainMap()); // mark column ids to be already in use Teuchos::ArrayRCP< Scalar > RowIdStatusArray = RowIdStatus->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > ColIdStatusArray = ColIdStatus->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > lColIdStatusArray = lColIdStatus->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > ColIdUsedArray = ColIdUsed->getDataNonConst(0); // not sure about this RowIdStatus->putScalar(0.0); ColIdStatus->putScalar(0.0); lColIdStatus->putScalar(0.0); ColIdUsed->putScalar(0.0); // no column ids are used // count wide-range permutations // a wide-range permutation is defined as a permutation of rows/columns which do not // belong to the same node LocalOrdinal lWideRangeRowPermutations = 0; GlobalOrdinal gWideRangeRowPermutations = 0; LocalOrdinal lWideRangeColPermutations = 0; GlobalOrdinal gWideRangeColPermutations = 0; // run 1: mark all "identity" permutations typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin(); while(p != RowColPairs.end() ) { GlobalOrdinal ik = (*p).first; GlobalOrdinal jk = (*p).second; LocalOrdinal lik = A->getRowMap()->getLocalElement(ik); LocalOrdinal ljk = A->getColMap()->getLocalElement(jk); if(RowIdStatusArray[lik] == 0.0) { RowIdStatusArray[lik] = 1.0; // use this row id lColIdStatusArray[ljk] = 1.0; // use this column id Pperm->replaceLocalValue(lik, ik); lQperm->replaceLocalValue(ljk, ik); // use column map ColIdUsed->replaceGlobalValue(ik,1.0); // ik is now used p = RowColPairs.erase(p); // detect wide range permutations if(floor(ik/nDofsPerNode) != floor(jk/nDofsPerNode)) { lWideRangeColPermutations++; } } else p++; } // communicate column map -> domain map Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX); // plausibility check if(RowColPairs.size()>0) GetOStream(Warnings0,0) << "MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl; // TODO fix me // close Pperm // count, how many row permutations are missing on current proc size_t cntFreeRowIdx = 0; std::queue<GlobalOrdinal> qFreeGRowIdx; // store global row ids of "free" rows for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) { if(RowIdStatusArray[lik] == 0.0) { cntFreeRowIdx++; qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik)); } } // fix Pperm for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) { if(RowIdStatusArray[lik] == 0.0) { RowIdStatusArray[lik] = 1.0; // use this row id Pperm->replaceLocalValue(lik, qFreeGRowIdx.front()); // detect wide range permutations if(floor(qFreeGRowIdx.front()/nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik)/nDofsPerNode)) { lWideRangeRowPermutations++; } qFreeGRowIdx.pop(); } } // close Qperm (free permutation entries in Qperm) size_t cntFreeColIdx = 0; std::queue<GlobalOrdinal> qFreeGColIdx; // store global column ids of "free" available columns for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { if(ColIdStatusArray[ljk] == 0.0) { cntFreeColIdx++; qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk)); } } size_t cntUnusedColIdx = 0; std::queue<GlobalOrdinal> qUnusedGColIdx; // store global column ids of "free" available columns for (size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) { if(ColIdUsedArray[ljk] == 0.0) { cntUnusedColIdx++; qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk)); } } // fix Qperm with local entries for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // stop if no (local) unused column idx are left if(cntUnusedColIdx == 0) break; if(ColIdStatusArray[ljk] == 0.0) { ColIdStatusArray[ljk] = 1.0; // use this row id Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front()); // loop over ColIdStatus (lives on domain map) ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(),1.0); // ljk is now used, too // detect wide range permutations if(floor(qUnusedGColIdx.front()/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) { lWideRangeColPermutations++; } qUnusedGColIdx.pop(); cntUnusedColIdx--; cntFreeColIdx--; } } //Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); // no export necessary, since changes only locally //ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX); // count, how many unused column idx are needed on current processor // to complete Qperm cntFreeColIdx = 0; for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // TODO avoid this loop if(ColIdStatusArray[ljk] == 0.0) { cntFreeColIdx++; } } GlobalOrdinal global_cntFreeColIdx = 0; LocalOrdinal local_cntFreeColIdx = cntFreeColIdx; sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx); #ifdef DEBUG_OUTPUT std::cout << "global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl; #endif // avoid global communication if possible if(global_cntFreeColIdx > 0) { // 1) count how many unused column ids are left GlobalOrdinal global_cntUnusedColIdx = 0; LocalOrdinal local_cntUnusedColIdx = cntUnusedColIdx; sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx); #ifdef DEBUG_OUTPUT std::cout << "global # of unused column idx: " << global_cntUnusedColIdx << std::endl; #endif // 2) communicate how many unused column ids are available on procs std::vector<LocalOrdinal> local_UnusedColIdxOnProc (numProcs); std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs); local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx; Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_UnusedColIdxOnProc[0], &global_UnusedColIdxOnProc[0]); #ifdef DEBUG_OUTPUT std::cout << "PROC " << myRank << " global num unused indices per proc: "; for (size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) { std::cout << " " << global_UnusedColIdxOnProc[ljk]; } std::cout << std::endl; #endif // 3) build array of length global_cntUnusedColIdx to globally replicate unused column idx std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx)); std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx)); GlobalOrdinal global_cntUnusedColIdxStartIter = 0; for(int proc=0; proc<myRank; proc++) { global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc]; } for(GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter+local_cntUnusedColIdx; k++) { local_UnusedColIdxVector[k] = qUnusedGColIdx.front(); qUnusedGColIdx.pop(); } Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(global_cntUnusedColIdx), &local_UnusedColIdxVector[0], &global_UnusedColIdxVector[0]); #ifdef DEBUG_OUTPUT std::cout << "PROC " << myRank << " global UnusedGColIdx: "; for (size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) { std::cout << " " << global_UnusedColIdxVector[ljk]; } std::cout << std::endl; #endif // 4) communicate, how many column idx are needed on each processor // to complete Qperm std::vector<LocalOrdinal> local_EmptyColIdxOnProc (numProcs); std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs); local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx; Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_EmptyColIdxOnProc[0], &global_EmptyColIdxOnProc[0]); #ifdef DEBUG_OUTPUT std::cout << "PROC " << myRank << " global num of needed column indices: "; for (size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) { std::cout << " " << global_EmptyColIdxOnProc[ljk]; } std::cout << std::endl; #endif // 5) determine first index in global_UnusedColIdxVector for unused column indices, // that are marked to be used by this processor GlobalOrdinal global_UnusedColStartIdx = 0; for(int proc=0; proc<myRank; proc++) { global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc]; } #ifdef DEBUG_OUTPUT GetOStream(Statistics0,0) << "PROC " << myRank << " is allowd to use the following column gids: "; for(GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) { GetOStream(Statistics0,0) << global_UnusedColIdxVector[k] << " "; } GetOStream(Statistics0,0) << std::endl; #endif // 6.) fix Qperm with global entries GlobalOrdinal array_iter = 0; for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { if(ColIdStatusArray[ljk] == 0.0) { ColIdStatusArray[ljk] = 1.0; // use this row id Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]); ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter],1.0); // detect wide range permutations if(floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) { lWideRangeColPermutations++; } array_iter++; //cntUnusedColIdx--; // check me } } } // end if global_cntFreeColIdx > 0 /////////////////// Qperm should be fine now... // create new empty Matrix Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile)); Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile)); for(size_t row=0; row<A->getNodeNumRows(); row++) { Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1,Teuchos::as<GO>(PpermData[row])); // column idx for Perm^T Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1,Teuchos::as<GO>(QpermData[row])); // column idx for Qperm Teuchos::ArrayRCP<Scalar> valout(1,1.0); permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0,indoutP.size()), valout.view(0,valout.size())); permQTmatrix->insertGlobalValues (A->getRowMap()->getGlobalElement(row), indoutQ.view(0,indoutQ.size()), valout.view(0,valout.size())); } permPTmatrix->fillComplete(); permQTmatrix->fillComplete(); Teuchos::RCP<Matrix> permPmatrix = Utils2::Transpose(permPTmatrix,true); for(size_t row=0; row<permPTmatrix->getNodeNumRows(); row++) { if(permPTmatrix->getNumEntriesInLocalRow(row) != 1) GetOStream(Warnings0,0) <<"#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl; if(permPmatrix->getNumEntriesInLocalRow(row) != 1) GetOStream(Warnings0,0) <<"#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl; if(permQTmatrix->getNumEntriesInLocalRow(row) != 1) GetOStream(Warnings0,0) <<"#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl; } // build permP * A * permQT Teuchos::RCP<Matrix> ApermQt = Utils::Multiply(*A, false, *permQTmatrix, false); Teuchos::RCP<Matrix> permPApermQt = Utils::Multiply(*permPmatrix, false, *ApermQt, false); /* MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("A.mat", *A); MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permP.mat", *permPmatrix); MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permQt.mat", *permQTmatrix); MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permPApermQt.mat", *permPApermQt); */ // build scaling matrix Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(),true); Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(),true); Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0); Teuchos::ArrayRCP< Scalar > invDiagVecData = invDiagVec->getDataNonConst(0); permPApermQt->getLocalDiagCopy(*diagVec); for(size_t i = 0; i<diagVec->getMap()->getNodeNumElements(); ++i) { if(diagVecData[i] != 0.0) invDiagVecData[i] = 1/diagVecData[i]; else { invDiagVecData[i] = 1.0; GetOStream(Statistics0,0) << "MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl; } } Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(),1,Xpetra::StaticProfile)); for(size_t row=0; row<A->getNodeNumRows(); row++) { Teuchos::ArrayRCP<GlobalOrdinal> indout(1,permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T Teuchos::ArrayRCP<Scalar> valout(1,invDiagVecData[row]); diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size())); } diagScalingOp->fillComplete(); Teuchos::RCP<Matrix> scaledA = Utils::Multiply(*diagScalingOp, false, *permPApermQt, false); currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory/*this*/); currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory/*this*/); // TODO careful with this!!! currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory/*this*/); currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory/*this*/); currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory/*this*/); //// count row permutations // count zeros on diagonal in P -> number of row permutations Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(),true); permPmatrix->getLocalDiagCopy(*diagPVec); Teuchos::ArrayRCP< const Scalar > diagPVecData = diagPVec->getData(0); LocalOrdinal lNumRowPermutations = 0; GlobalOrdinal gNumRowPermutations = 0; for(size_t i = 0; i<diagPVec->getMap()->getNodeNumElements(); ++i) { if(diagPVecData[i] == 0.0) { lNumRowPermutations++; } } // sum up all entries in multipleColRequests over all processors sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations); //// count column permutations // count zeros on diagonal in Q^T -> number of column permutations Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(),true); permQTmatrix->getLocalDiagCopy(*diagQTVec); Teuchos::ArrayRCP< const Scalar > diagQTVecData = diagQTVec->getData(0); LocalOrdinal lNumColPermutations = 0; GlobalOrdinal gNumColPermutations = 0; for(size_t i = 0; i<diagQTVec->getMap()->getNodeNumElements(); ++i) { if(diagQTVecData[i] == 0.0) { lNumColPermutations++; } } // sum up all entries in multipleColRequests over all processors sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations); currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory/*this*/); currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory/*this*/); currentLevel.Set("#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory/*this*/); currentLevel.Set("#WideRangeColPermutations", gWideRangeColPermutations, genFactory/*this*/); GetOStream(Statistics0, 0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl; GetOStream(Statistics0, 0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl; GetOStream(Runtime1, 0) << "#wide range row permutations: " << gWideRangeRowPermutations << " #wide range column permutations: " << gWideRangeColPermutations << std::endl; #else #warning PermutationFactory not compiling/working for Scalar==complex. #endif // #ifndef HAVE_MUELU_INST_COMPLEX_INT_INT }
Matrix& LASSO::train(Matrix& X, Matrix& Y) { /*XNX = [X, -X]; H_G = XNX' * XNX; D = repmat(diag(H_G), [1, n_y]); XNXTY = XNX' * Y; A = (X' * X + lambda * eye(p)) \ (X' * Y);*/ Matrix& XNX = horzcat(2, &X, &uminus(X)); Matrix& H_G = XNX.transpose().mtimes(XNX); double* Q = new double[size(H_G, 1)]; for (int i = 0; i < size(H_G, 1); i++) { Q[i] = H_G.getEntry(i, i); } Matrix& XNXTY = XNX.transpose().mtimes(Y); Matrix& A = mldivide( plus(X.transpose().mtimes(X), times(lambda, eye(p))), X.transpose().mtimes(Y) ); /*AA = [subplus(A); subplus(-A)]; C = -XNXTY + lambda; Grad = C + H_G * AA; tol = epsilon * norm(Grad); PGrad = zeros(size(Grad));*/ Matrix& AA = vertcat(2, &subplus(A), &subplus(uminus(A))); Matrix& C = plus(uminus(XNXTY), lambda); Matrix& Grad = plus(C, mtimes(H_G, AA)); double tol = epsilon * norm(Grad); Matrix& PGrad = zeros(size(Grad)); std::list<double> J; double fval = 0; // J(1) = sum(sum((Y - X * A).^2)) / 2 + lambda * sum(sum(abs(A))); if (calc_OV) { fval = sumAll(pow(minus(Y, mtimes(X, A)), 2)) / 2 + lambda * sum(sum(abs(A))); J.push_back(fval); } /*Matrix I_k = null; Matrix I_k_com = null;*/ Matrix& I_k = Grad.copy(); double d = 0; int k = 0; DenseVector& SFPlusCi = *new DenseVector(AA.getColumnDimension()); Matrix& S = H_G; Vector** SRows = null; if (typeid(H_G) == typeid(DenseMatrix)) SRows = denseMatrix2DenseRowVectors(S); else SRows = sparseMatrix2SparseRowVectors(S); Vector** CRows = null; if (typeid(C) == typeid(DenseMatrix)) CRows = denseMatrix2DenseRowVectors(C); else CRows = sparseMatrix2SparseRowVectors(C); double** FData = ((DenseMatrix&) AA).getData(); double* FRow = null; double* pr = null; int K = 2 * p; while (true) { /*I_k = Grad < 0 | AA > 0; I_k_com = not(I_k); PGrad(I_k) = Grad(I_k); PGrad(I_k_com) = 0;*/ _or(I_k, lt(Grad, 0), gt(AA, 0)); Matrix& I_k_com = _not(I_k); assign(PGrad, Grad); logicalIndexingAssignment(PGrad, I_k_com, 0); d = norm(PGrad, inf); if (d < tol) { if (verbose) println("Converge successfully!"); break; } /*for i = 1:2*p AA(i, :) = max(AA(i, :) - (C(i, :) + H_G(i, :) * AA) ./ (D(i, :)), 0); end A = AA(1:p,:) - AA(p+1:end,:);*/ for (int i = 0; i < K; i++) { // SFPlusCi = SRows[i].operate(AA); operate(SFPlusCi, *SRows[i], AA); plusAssign(SFPlusCi, *CRows[i]); timesAssign(SFPlusCi, 1 / Q[i]); pr = SFPlusCi.getPr(); // F(i, :) = max(F(i, :) - (S(i, :) * F + C(i, :)) / D[i]), 0); // F(i, :) = max(F(i, :) - SFPlusCi, 0) FRow = FData[i]; for (int j = 0; j < AA.getColumnDimension(); j++) { FRow[j] = max(FRow[j] - pr[j], 0); } } // Grad = plus(C, mtimes(H_G, AA)); plus(Grad, C, mtimes(H_G, AA)); k = k + 1; if (k > maxIter) { if (verbose) println("Maximal iterations"); break; } if (calc_OV) { fval = sum(sum(pow(minus(Y, mtimes(XNX, AA)), 2))) / 2 + lambda * sum(sum(abs(AA))); J.push_back(fval); } if (k % 10 == 0 && verbose) { if (calc_OV) fprintf("Iter %d - ||PGrad||: %f, ofv: %f\n", k, d, J.back()); else fprintf("Iter %d - ||PGrad||: %f\n", k, d); } } Matrix& res = minus( AA.getSubMatrix(0, p - 1, 0, ny - 1), AA.getSubMatrix(p, 2 * p - 1, 0, ny - 1) ); return res; }