void AlgebraicPermutationStrategy<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildPermutation(const Teuchos::RCP<Matrix> & A, const Teuchos::RCP<const Map> permRowMap, Level & currentLevel, const FactoryBase* genFactory) const { #ifndef HAVE_MUELU_INST_COMPLEX_INT_INT const Teuchos::RCP< const Teuchos::Comm< int > > comm = A->getRowMap()->getComm(); int numProcs = comm->getSize(); int myRank = comm->getRank(); /*if( permRowMap == Teuchos::null ) { permRowMap = A->getRowMap(); // use full row map of A }*/ size_t nDofsPerNode = 1; if (A->IsView("stridedMaps")) { Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps"); nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize(); } //GetOStream(Runtime0, 0) << "Perform generation of permutation operators on " << mapName_ << " map with " << permRowMap->getGlobalNumElements() << " elements" << std::endl; std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidates; std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > keepDiagonalEntries; std::vector<Scalar> Weights; // loop over all local rows in matrix A and keep diagonal entries if corresponding // matrix rows are not contained in permRowMap for (size_t row = 0; row < A->getRowMap()->getNodeNumElements(); row++) { GlobalOrdinal grow = A->getRowMap()->getGlobalElement(row); if(permRowMap->isNodeGlobalElement(grow) == true) continue; size_t nnz = A->getNumEntriesInLocalRow(row); // extract local row information from matrix Teuchos::ArrayView<const LocalOrdinal> indices; Teuchos::ArrayView<const Scalar> vals; A->getLocalRowView(row, indices, vals); TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error."); // find column entry with max absolute value GlobalOrdinal gMaxValIdx = 0; Scalar norm1 = 0.0; Scalar maxVal = 0.0; for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) { norm1 += std::abs(vals[j]); if(std::abs(vals[j]) > maxVal) { maxVal = std::abs(vals[j]); gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]); } } if(grow == gMaxValIdx) // only keep row/col pair if it's diagonal dominant!!! keepDiagonalEntries.push_back(std::make_pair(grow,grow)); } ////////// // handle rows that are marked to be relevant for permutations for (size_t row = 0; row < permRowMap->getNodeNumElements(); row++) { GlobalOrdinal grow = permRowMap->getGlobalElement(row); LocalOrdinal lArow = A->getRowMap()->getLocalElement(grow); size_t nnz = A->getNumEntriesInLocalRow(lArow); // extract local row information from matrix Teuchos::ArrayView<const LocalOrdinal> indices; Teuchos::ArrayView<const Scalar> vals; A->getLocalRowView(lArow, indices, vals); TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(indices.size()) != nnz, Exceptions::RuntimeError, "MueLu::PermutationFactory::Build: number of nonzeros not equal to number of indices? Error."); // find column entry with max absolute value GlobalOrdinal gMaxValIdx = 0; Scalar norm1 = 0.0; Scalar maxVal = 0.0; for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) { norm1 += std::abs(vals[j]); if(std::abs(vals[j]) > maxVal) { maxVal = std::abs(vals[j]); gMaxValIdx = A->getColMap()->getGlobalElement(indices[j]); } } if(std::abs(maxVal) > 0.0) { // keep only max Entries \neq 0.0 permutedDiagCandidates.push_back(std::make_pair(grow,gMaxValIdx)); Weights.push_back(maxVal/(norm1*Teuchos::as<Scalar>(nnz))); } else { std::cout << "ATTENTION: row " << grow << " has only zero entries -> singular matrix!" << std::endl; } } // sort Weights in descending order std::vector<int> permutation; sortingPermutation(Weights,permutation); // create new vector with exactly one possible entry for each column // each processor which requests the global column id gcid adds 1 to gColVec // gColVec will be summed up over all processors and communicated to gDomVec // which is based on the non-overlapping domain map of A. Teuchos::RCP<Vector> gColVec = VectorFactory::Build(A->getColMap()); Teuchos::RCP<Vector> gDomVec = VectorFactory::Build(A->getDomainMap()); gColVec->putScalar(0.0); gDomVec->putScalar(0.0); // put in all keep diagonal entries for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = keepDiagonalEntries.begin(); p != keepDiagonalEntries.end(); ++p) { gColVec->sumIntoGlobalValue((*p).second,1.0); } Teuchos::RCP<Export> exporter = ExportFactory::Build(gColVec->getMap(), gDomVec->getMap()); gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); // communicate blocked gcolids to all procs gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > permutedDiagCandidatesFiltered; // TODO reserve memory std::map<GlobalOrdinal, Scalar> gColId2Weight; Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0); for(size_t i = 0; i < permutedDiagCandidates.size(); ++i) { // loop over all candidates std::pair<GlobalOrdinal, GlobalOrdinal> pp = permutedDiagCandidates[permutation[i]]; GlobalOrdinal grow = pp.first; GlobalOrdinal gcol = pp.second; LocalOrdinal lcol = A->getColMap()->getLocalElement(gcol); //Teuchos::ArrayRCP< Scalar > ddata = gColVec->getDataNonConst(0); if(ddata[lcol] > 0.0){ continue; // skip lcol: column already handled by another row } // mark column as already taken ddata[lcol]++; permutedDiagCandidatesFiltered.push_back(std::make_pair(grow,gcol)); gColId2Weight[gcol] = Weights[permutation[i]]; } // communicate how often each column index is requested by the different procs gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); gColVec->doImport(*gDomVec,*exporter,Xpetra::INSERT); // probably not needed // TODO check me //***************************************************************************************** // first communicate ALL global ids of column indices which are requested by more // than one proc to all other procs // detect which global col indices are requested by more than one proc // and store them in the multipleColRequests vector std::vector<GlobalOrdinal> multipleColRequests; // store all global column indices from current processor that are also // requested by another processor. This is possible, since they are stored // in gDomVec which is based on the nonoverlapping domain map. That is, each // global col id is handled by exactly one proc. std::queue<GlobalOrdinal> unusedColIdx; // unused column indices on current processor for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) { Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0); if(arrDomVec[sz] > 1.0) { multipleColRequests.push_back(gDomVec->getMap()->getGlobalElement(sz)); } else if(arrDomVec[sz] == 0.0) { unusedColIdx.push(gDomVec->getMap()->getGlobalElement(sz)); } } // communicate the global number of column indices which are requested by more than one proc LocalOrdinal localMultColRequests = Teuchos::as<LocalOrdinal>(multipleColRequests.size()); LocalOrdinal globalMultColRequests = 0; // sum up all entries in multipleColRequests over all processors sumAll(gDomVec->getMap()->getComm(), (LocalOrdinal)localMultColRequests, globalMultColRequests); if(globalMultColRequests > 0) { // special handling: two processors request the same global column id. // decide which processor gets it // distribute number of multipleColRequests to all processors // each processor stores how many column ids for exchange are handled by the cur proc std::vector<GlobalOrdinal> numMyMultColRequests(numProcs,0); std::vector<GlobalOrdinal> numGlobalMultColRequests(numProcs,0); numMyMultColRequests[myRank] = localMultColRequests; Teuchos::reduceAll(*comm,Teuchos::REDUCE_MAX,numProcs,&numMyMultColRequests[0],&numGlobalMultColRequests[0]); // communicate multipleColRequests entries to all processors int nMyOffset = 0; for (int i=0; i<myRank-1; i++) nMyOffset += numGlobalMultColRequests[i]; // calculate offset to store the weights on the corresponding place in procOverlappingWeights GlobalOrdinal zero=0; std::vector<GlobalOrdinal> procMultRequestedColIds(globalMultColRequests,zero); std::vector<GlobalOrdinal> global_procMultRequestedColIds(globalMultColRequests,zero); // loop over all local column GIDs that are also requested by other procs for(size_t i = 0; i < multipleColRequests.size(); i++) { procMultRequestedColIds[nMyOffset + i] = multipleColRequests[i]; // all weights are > 0 ? } // template ordinal, package (double) Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(globalMultColRequests), &procMultRequestedColIds[0], &global_procMultRequestedColIds[0]); // loop over global_procOverlappingWeights and eliminate wrong entries... for (size_t k = 0; k<global_procMultRequestedColIds.size(); k++) { GlobalOrdinal globColId = global_procMultRequestedColIds[k]; std::vector<Scalar> MyWeightForColId(numProcs,0); std::vector<Scalar> GlobalWeightForColId(numProcs,0); if(gColVec->getMap()->isNodeGlobalElement(globColId)) { MyWeightForColId[myRank] = gColId2Weight[globColId]; } else { MyWeightForColId[myRank] = 0.0; } Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &MyWeightForColId[0], &GlobalWeightForColId[0]); if(gColVec->getMap()->isNodeGlobalElement(globColId)) { // note: 2 procs could have the same weight for a column index. // pick the first one. Scalar winnerValue = 0.0; int winnerProcRank = 0; for (int proc = 0; proc < numProcs; proc++) { if(GlobalWeightForColId[proc] > winnerValue) { winnerValue = GlobalWeightForColId[proc]; winnerProcRank = proc; } } // winnerProcRank is the winner for handling globColId. // winnerProcRank is unique (even if two procs have the same weight for a column index) if(myRank != winnerProcRank) { // remove corresponding entry from permutedDiagCandidatesFiltered typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = permutedDiagCandidatesFiltered.begin(); while(p != permutedDiagCandidatesFiltered.end() ) { if((*p).second == globColId) p = permutedDiagCandidatesFiltered.erase(p); else p++; } } } // end if isNodeGlobalElement } // end loop over global_procOverlappingWeights and eliminate wrong entries... } // end if globalMultColRequests > 0 // put together all pairs: //size_t sizeRowColPairs = keepDiagonalEntries.size() + permutedDiagCandidatesFiltered.size(); std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> > RowColPairs; RowColPairs.insert( RowColPairs.end(), keepDiagonalEntries.begin(), keepDiagonalEntries.end()); RowColPairs.insert( RowColPairs.end(), permutedDiagCandidatesFiltered.begin(), permutedDiagCandidatesFiltered.end()); #ifdef DEBUG_OUTPUT //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& // plausibility check gColVec->putScalar(0.0); gDomVec->putScalar(0.0); typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator pl = RowColPairs.begin(); while(pl != RowColPairs.end() ) { //GlobalOrdinal ik = (*pl).first; GlobalOrdinal jk = (*pl).second; gColVec->sumIntoGlobalValue(jk,1.0); pl++; } gDomVec->doExport(*gColVec,*exporter,Xpetra::ADD); for(size_t sz = 0; sz<gDomVec->getLocalLength(); ++sz) { Teuchos::ArrayRCP< const Scalar > arrDomVec = gDomVec->getData(0); if(arrDomVec[sz] > 1.0) { GetOStream(Runtime0,0) << "RowColPairs has multiple column [" << sz << "]=" << arrDomVec[sz] << std::endl; } else if(arrDomVec[sz] == 0.0) { GetOStream(Runtime0,0) << "RowColPairs has empty column [" << sz << "]=" << arrDomVec[sz] << std::endl; } } //&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& #endif ////////////////////////////////////////////////// // assumption: on each processor RowColPairs now contains // a valid set of (row,column) pairs, where the row entries // are a subset of the processor's rows and the column entries // are unique throughout all processors. // Note: the RowColPairs are only defined for a subset of all rows, // so there might be rows without an entry in RowColPairs. // It can be, that some rows seem to be missing in RowColPairs, since // the entry in that row with maximum absolute value has been reserved // by another row already (e.g. as already diagonal dominant row outside // of perRowMap). // In fact, the RowColPairs vector only defines the (row,column) pairs // that will be definitely moved to the diagonal after permutation. #ifdef DEBUG_OUTPUT // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) { // std::cout << "proc: " << myRank << " r/c: " << (*p).first << "/" << (*p).second << std::endl; // } // for (typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::const_iterator p = RowColPairs.begin(); p != RowColPairs.end(); ++p) // { //// if((*p).first != (*p).second) std::cout << "difference: " << (*p).first << " " << (*p).second << std::endl; // std::cout << (*p).first +1 << " " << (*p).second+1 << std::endl; // } // std::cout << "\n"; #endif // vectors to store permutation information Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap()); Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map) Teuchos::RCP<Vector> lQperm = VectorFactory::Build(A->getColMap()); // local variant (based on column map) Teuchos::ArrayRCP< Scalar > PpermData = Pperm->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > QpermData = Qperm->getDataNonConst(0); Pperm->putScalar(0.0); Qperm->putScalar(0.0); lQperm->putScalar(0.0); // setup exporter for Qperm Teuchos::RCP<Export> QpermExporter = ExportFactory::Build(lQperm->getMap(), Qperm->getMap()); Teuchos::RCP<Vector> RowIdStatus = VectorFactory::Build(A->getRowMap()); Teuchos::RCP<Vector> ColIdStatus = VectorFactory::Build(A->getDomainMap()); // global variant (based on domain map) Teuchos::RCP<Vector> lColIdStatus = VectorFactory::Build(A->getColMap()); // local variant (based on column map) Teuchos::RCP<Vector> ColIdUsed = VectorFactory::Build(A->getDomainMap()); // mark column ids to be already in use Teuchos::ArrayRCP< Scalar > RowIdStatusArray = RowIdStatus->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > ColIdStatusArray = ColIdStatus->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > lColIdStatusArray = lColIdStatus->getDataNonConst(0); Teuchos::ArrayRCP< Scalar > ColIdUsedArray = ColIdUsed->getDataNonConst(0); // not sure about this RowIdStatus->putScalar(0.0); ColIdStatus->putScalar(0.0); lColIdStatus->putScalar(0.0); ColIdUsed->putScalar(0.0); // no column ids are used // count wide-range permutations // a wide-range permutation is defined as a permutation of rows/columns which do not // belong to the same node LocalOrdinal lWideRangeRowPermutations = 0; GlobalOrdinal gWideRangeRowPermutations = 0; LocalOrdinal lWideRangeColPermutations = 0; GlobalOrdinal gWideRangeColPermutations = 0; // run 1: mark all "identity" permutations typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin(); while(p != RowColPairs.end() ) { GlobalOrdinal ik = (*p).first; GlobalOrdinal jk = (*p).second; LocalOrdinal lik = A->getRowMap()->getLocalElement(ik); LocalOrdinal ljk = A->getColMap()->getLocalElement(jk); if(RowIdStatusArray[lik] == 0.0) { RowIdStatusArray[lik] = 1.0; // use this row id lColIdStatusArray[ljk] = 1.0; // use this column id Pperm->replaceLocalValue(lik, ik); lQperm->replaceLocalValue(ljk, ik); // use column map ColIdUsed->replaceGlobalValue(ik,1.0); // ik is now used p = RowColPairs.erase(p); // detect wide range permutations if(floor(ik/nDofsPerNode) != floor(jk/nDofsPerNode)) { lWideRangeColPermutations++; } } else p++; } // communicate column map -> domain map Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX); // plausibility check if(RowColPairs.size()>0) GetOStream(Warnings0,0) << "MueLu::PermutationFactory: There are Row/Col pairs left!!!" << std::endl; // TODO fix me // close Pperm // count, how many row permutations are missing on current proc size_t cntFreeRowIdx = 0; std::queue<GlobalOrdinal> qFreeGRowIdx; // store global row ids of "free" rows for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) { if(RowIdStatusArray[lik] == 0.0) { cntFreeRowIdx++; qFreeGRowIdx.push(RowIdStatus->getMap()->getGlobalElement(lik)); } } // fix Pperm for (size_t lik = 0; lik < RowIdStatus->getLocalLength(); ++lik) { if(RowIdStatusArray[lik] == 0.0) { RowIdStatusArray[lik] = 1.0; // use this row id Pperm->replaceLocalValue(lik, qFreeGRowIdx.front()); // detect wide range permutations if(floor(qFreeGRowIdx.front()/nDofsPerNode) != floor(RowIdStatus->getMap()->getGlobalElement(lik)/nDofsPerNode)) { lWideRangeRowPermutations++; } qFreeGRowIdx.pop(); } } // close Qperm (free permutation entries in Qperm) size_t cntFreeColIdx = 0; std::queue<GlobalOrdinal> qFreeGColIdx; // store global column ids of "free" available columns for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { if(ColIdStatusArray[ljk] == 0.0) { cntFreeColIdx++; qFreeGColIdx.push(ColIdStatus->getMap()->getGlobalElement(ljk)); } } size_t cntUnusedColIdx = 0; std::queue<GlobalOrdinal> qUnusedGColIdx; // store global column ids of "free" available columns for (size_t ljk = 0; ljk < ColIdUsed->getLocalLength(); ++ljk) { if(ColIdUsedArray[ljk] == 0.0) { cntUnusedColIdx++; qUnusedGColIdx.push(ColIdUsed->getMap()->getGlobalElement(ljk)); } } // fix Qperm with local entries for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // stop if no (local) unused column idx are left if(cntUnusedColIdx == 0) break; if(ColIdStatusArray[ljk] == 0.0) { ColIdStatusArray[ljk] = 1.0; // use this row id Qperm->replaceLocalValue(ljk, qUnusedGColIdx.front()); // loop over ColIdStatus (lives on domain map) ColIdUsed->replaceGlobalValue(qUnusedGColIdx.front(),1.0); // ljk is now used, too // detect wide range permutations if(floor(qUnusedGColIdx.front()/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) { lWideRangeColPermutations++; } qUnusedGColIdx.pop(); cntUnusedColIdx--; cntFreeColIdx--; } } //Qperm->doExport(*lQperm,*QpermExporter,Xpetra::ABSMAX); // no export necessary, since changes only locally //ColIdStatus->doExport(*lColIdStatus,*QpermExporter,Xpetra::ABSMAX); // count, how many unused column idx are needed on current processor // to complete Qperm cntFreeColIdx = 0; for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { // TODO avoid this loop if(ColIdStatusArray[ljk] == 0.0) { cntFreeColIdx++; } } GlobalOrdinal global_cntFreeColIdx = 0; LocalOrdinal local_cntFreeColIdx = cntFreeColIdx; sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntFreeColIdx), global_cntFreeColIdx); #ifdef DEBUG_OUTPUT std::cout << "global # of empty column idx entries in Qperm: " << global_cntFreeColIdx << std::endl; #endif // avoid global communication if possible if(global_cntFreeColIdx > 0) { // 1) count how many unused column ids are left GlobalOrdinal global_cntUnusedColIdx = 0; LocalOrdinal local_cntUnusedColIdx = cntUnusedColIdx; sumAll(comm, Teuchos::as<GlobalOrdinal>(local_cntUnusedColIdx), global_cntUnusedColIdx); #ifdef DEBUG_OUTPUT std::cout << "global # of unused column idx: " << global_cntUnusedColIdx << std::endl; #endif // 2) communicate how many unused column ids are available on procs std::vector<LocalOrdinal> local_UnusedColIdxOnProc (numProcs); std::vector<LocalOrdinal> global_UnusedColIdxOnProc(numProcs); local_UnusedColIdxOnProc[myRank] = local_cntUnusedColIdx; Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_UnusedColIdxOnProc[0], &global_UnusedColIdxOnProc[0]); #ifdef DEBUG_OUTPUT std::cout << "PROC " << myRank << " global num unused indices per proc: "; for (size_t ljk = 0; ljk < global_UnusedColIdxOnProc.size(); ++ljk) { std::cout << " " << global_UnusedColIdxOnProc[ljk]; } std::cout << std::endl; #endif // 3) build array of length global_cntUnusedColIdx to globally replicate unused column idx std::vector<GlobalOrdinal> local_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx)); std::vector<GlobalOrdinal> global_UnusedColIdxVector(Teuchos::as<size_t>(global_cntUnusedColIdx)); GlobalOrdinal global_cntUnusedColIdxStartIter = 0; for(int proc=0; proc<myRank; proc++) { global_cntUnusedColIdxStartIter += global_UnusedColIdxOnProc[proc]; } for(GlobalOrdinal k = global_cntUnusedColIdxStartIter; k < global_cntUnusedColIdxStartIter+local_cntUnusedColIdx; k++) { local_UnusedColIdxVector[k] = qUnusedGColIdx.front(); qUnusedGColIdx.pop(); } Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, Teuchos::as<int>(global_cntUnusedColIdx), &local_UnusedColIdxVector[0], &global_UnusedColIdxVector[0]); #ifdef DEBUG_OUTPUT std::cout << "PROC " << myRank << " global UnusedGColIdx: "; for (size_t ljk = 0; ljk < global_UnusedColIdxVector.size(); ++ljk) { std::cout << " " << global_UnusedColIdxVector[ljk]; } std::cout << std::endl; #endif // 4) communicate, how many column idx are needed on each processor // to complete Qperm std::vector<LocalOrdinal> local_EmptyColIdxOnProc (numProcs); std::vector<LocalOrdinal> global_EmptyColIdxOnProc(numProcs); local_EmptyColIdxOnProc[myRank] = local_cntFreeColIdx; Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, numProcs, &local_EmptyColIdxOnProc[0], &global_EmptyColIdxOnProc[0]); #ifdef DEBUG_OUTPUT std::cout << "PROC " << myRank << " global num of needed column indices: "; for (size_t ljk = 0; ljk < global_EmptyColIdxOnProc.size(); ++ljk) { std::cout << " " << global_EmptyColIdxOnProc[ljk]; } std::cout << std::endl; #endif // 5) determine first index in global_UnusedColIdxVector for unused column indices, // that are marked to be used by this processor GlobalOrdinal global_UnusedColStartIdx = 0; for(int proc=0; proc<myRank; proc++) { global_UnusedColStartIdx += global_EmptyColIdxOnProc[proc]; } #ifdef DEBUG_OUTPUT GetOStream(Statistics0,0) << "PROC " << myRank << " is allowd to use the following column gids: "; for(GlobalOrdinal k = global_UnusedColStartIdx; k < global_UnusedColStartIdx + Teuchos::as<GlobalOrdinal>(cntFreeColIdx); k++) { GetOStream(Statistics0,0) << global_UnusedColIdxVector[k] << " "; } GetOStream(Statistics0,0) << std::endl; #endif // 6.) fix Qperm with global entries GlobalOrdinal array_iter = 0; for (size_t ljk = 0; ljk < ColIdStatus->getLocalLength(); ++ljk) { if(ColIdStatusArray[ljk] == 0.0) { ColIdStatusArray[ljk] = 1.0; // use this row id Qperm->replaceLocalValue(ljk, global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]); ColIdUsed->replaceGlobalValue(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter],1.0); // detect wide range permutations if(floor(global_UnusedColIdxVector[global_UnusedColStartIdx + array_iter]/nDofsPerNode) != floor(ColIdStatus->getMap()->getGlobalElement(ljk)/nDofsPerNode)) { lWideRangeColPermutations++; } array_iter++; //cntUnusedColIdx--; // check me } } } // end if global_cntFreeColIdx > 0 /////////////////// Qperm should be fine now... // create new empty Matrix Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile)); Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile)); for(size_t row=0; row<A->getNodeNumRows(); row++) { Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1,Teuchos::as<GO>(PpermData[row])); // column idx for Perm^T Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1,Teuchos::as<GO>(QpermData[row])); // column idx for Qperm Teuchos::ArrayRCP<Scalar> valout(1,1.0); permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0,indoutP.size()), valout.view(0,valout.size())); permQTmatrix->insertGlobalValues (A->getRowMap()->getGlobalElement(row), indoutQ.view(0,indoutQ.size()), valout.view(0,valout.size())); } permPTmatrix->fillComplete(); permQTmatrix->fillComplete(); Teuchos::RCP<Matrix> permPmatrix = Utils2::Transpose(permPTmatrix,true); for(size_t row=0; row<permPTmatrix->getNodeNumRows(); row++) { if(permPTmatrix->getNumEntriesInLocalRow(row) != 1) GetOStream(Warnings0,0) <<"#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl; if(permPmatrix->getNumEntriesInLocalRow(row) != 1) GetOStream(Warnings0,0) <<"#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl; if(permQTmatrix->getNumEntriesInLocalRow(row) != 1) GetOStream(Warnings0,0) <<"#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl; } // build permP * A * permQT Teuchos::RCP<Matrix> ApermQt = Utils::Multiply(*A, false, *permQTmatrix, false); Teuchos::RCP<Matrix> permPApermQt = Utils::Multiply(*permPmatrix, false, *ApermQt, false); /* MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("A.mat", *A); MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permP.mat", *permPmatrix); MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permQt.mat", *permQTmatrix); MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Write("permPApermQt.mat", *permPApermQt); */ // build scaling matrix Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(),true); Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(),true); Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0); Teuchos::ArrayRCP< Scalar > invDiagVecData = invDiagVec->getDataNonConst(0); permPApermQt->getLocalDiagCopy(*diagVec); for(size_t i = 0; i<diagVec->getMap()->getNodeNumElements(); ++i) { if(diagVecData[i] != 0.0) invDiagVecData[i] = 1/diagVecData[i]; else { invDiagVecData[i] = 1.0; GetOStream(Statistics0,0) << "MueLu::PermutationFactory: found zero on diagonal in row " << i << std::endl; } } Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(),1,Xpetra::StaticProfile)); for(size_t row=0; row<A->getNodeNumRows(); row++) { Teuchos::ArrayRCP<GlobalOrdinal> indout(1,permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T Teuchos::ArrayRCP<Scalar> valout(1,invDiagVecData[row]); diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size())); } diagScalingOp->fillComplete(); Teuchos::RCP<Matrix> scaledA = Utils::Multiply(*diagScalingOp, false, *permPApermQt, false); currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory/*this*/); currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory/*this*/); // TODO careful with this!!! currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory/*this*/); currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory/*this*/); currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory/*this*/); //// count row permutations // count zeros on diagonal in P -> number of row permutations Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(),true); permPmatrix->getLocalDiagCopy(*diagPVec); Teuchos::ArrayRCP< const Scalar > diagPVecData = diagPVec->getData(0); LocalOrdinal lNumRowPermutations = 0; GlobalOrdinal gNumRowPermutations = 0; for(size_t i = 0; i<diagPVec->getMap()->getNodeNumElements(); ++i) { if(diagPVecData[i] == 0.0) { lNumRowPermutations++; } } // sum up all entries in multipleColRequests over all processors sumAll(diagPVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumRowPermutations), gNumRowPermutations); //// count column permutations // count zeros on diagonal in Q^T -> number of column permutations Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(),true); permQTmatrix->getLocalDiagCopy(*diagQTVec); Teuchos::ArrayRCP< const Scalar > diagQTVecData = diagQTVec->getData(0); LocalOrdinal lNumColPermutations = 0; GlobalOrdinal gNumColPermutations = 0; for(size_t i = 0; i<diagQTVec->getMap()->getNodeNumElements(); ++i) { if(diagQTVecData[i] == 0.0) { lNumColPermutations++; } } // sum up all entries in multipleColRequests over all processors sumAll(diagQTVec->getMap()->getComm(), Teuchos::as<GlobalOrdinal>(lNumColPermutations), gNumColPermutations); currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory/*this*/); currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory/*this*/); currentLevel.Set("#WideRangeRowPermutations", gWideRangeRowPermutations, genFactory/*this*/); currentLevel.Set("#WideRangeColPermutations", gWideRangeColPermutations, genFactory/*this*/); GetOStream(Statistics0, 0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl; GetOStream(Statistics0, 0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl; GetOStream(Runtime1, 0) << "#wide range row permutations: " << gWideRangeRowPermutations << " #wide range column permutations: " << gWideRangeColPermutations << std::endl; #else #warning PermutationFactory not compiling/working for Scalar==complex. #endif // #ifndef HAVE_MUELU_INST_COMPLEX_INT_INT }
void SubBlockAFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level & currentLevel) const { typedef Xpetra::Matrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> OMatrix; //TODO typedef Xpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixClass; //TODO typedef Xpetra::CrsMatrixWrap<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> CrsMatrixWrapClass; //TODO typedef Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> BlockedCrsOMatrix; //TODO typedef Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> MapExtractorClass; const ParameterList & pL = GetParameterList(); size_t row = Teuchos::as<size_t>(pL.get<int>("block row")); size_t col = Teuchos::as<size_t>(pL.get<int>("block col")); RCP<OMatrix> Ain = Teuchos::null; Ain = Get< RCP<OMatrix> >(currentLevel, "A"); RCP<BlockedCrsOMatrix> bA = Teuchos::rcp_dynamic_cast<BlockedCrsOMatrix>(Ain); TEUCHOS_TEST_FOR_EXCEPTION(bA==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: input matrix A is not of type BlockedCrsMatrix! error."); TEUCHOS_TEST_FOR_EXCEPTION(row > bA->Rows(), Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: A.Rows() > rows_! error."); TEUCHOS_TEST_FOR_EXCEPTION(col > bA->Cols(), Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: A.Cols() > cols_! error."); Teuchos::RCP<CrsMatrixClass> A = bA->getMatrix(row, col); Teuchos::RCP<CrsMatrixWrapClass> Op = Teuchos::rcp(new CrsMatrixWrapClass(A)); //////////////// EXPERIMENTAL // extract striding information from RangeMapExtractor Teuchos::RCP<const MapExtractorClass> rgMapExtractor = bA->getRangeMapExtractor(); Teuchos::RCP<const MapExtractorClass> doMapExtractor = bA->getDomainMapExtractor(); Teuchos::RCP<const Map> rgMap = rgMapExtractor->getMap(row); Teuchos::RCP<const Map> doMap = doMapExtractor->getMap(col); Teuchos::RCP<const StridedMap> srgMap = Teuchos::rcp_dynamic_cast<const StridedMap>(rgMap); Teuchos::RCP<const StridedMap> sdoMap = Teuchos::rcp_dynamic_cast<const StridedMap>(doMap); if(srgMap == Teuchos::null) { Teuchos::RCP<const Map> fullRgMap = rgMapExtractor->getFullMap(); Teuchos::RCP<const StridedMap> sFullRgMap = Teuchos::rcp_dynamic_cast<const StridedMap>(fullRgMap); TEUCHOS_TEST_FOR_EXCEPTION(sFullRgMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: full rangeMap is not a strided map"); std::vector<size_t> stridedData = sFullRgMap->getStridingData(); if(stridedData.size() == 1 && row > 0) // we have block matrices. use striding block information 0 srgMap = StridedMapFactory::Build(rgMap, stridedData, 0, sFullRgMap->getOffset()); else // we have strided matrices. use striding information of the corresponding block srgMap = StridedMapFactory::Build(rgMap, stridedData, row, sFullRgMap->getOffset()); } if(sdoMap == Teuchos::null) { Teuchos::RCP<const Map> fullDoMap = doMapExtractor->getFullMap(); Teuchos::RCP<const StridedMap> sFullDoMap = Teuchos::rcp_dynamic_cast<const StridedMap>(fullDoMap); TEUCHOS_TEST_FOR_EXCEPTION(sFullDoMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: full domainMap is not a strided map"); std::vector<size_t> stridedData2 = sFullDoMap->getStridingData(); if(stridedData2.size() == 1 && col > 0) // we have block matrices. use striding block information 0 sdoMap = StridedMapFactory::Build(doMap, stridedData2, 0, sFullDoMap->getOffset()); else // we have strided matrices. use striding information of the corresponding block sdoMap = StridedMapFactory::Build(doMap, stridedData2, col, sFullDoMap->getOffset()); } TEUCHOS_TEST_FOR_EXCEPTION(srgMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: rangeMap " << row << " is not a strided map"); TEUCHOS_TEST_FOR_EXCEPTION(sdoMap==Teuchos::null, Exceptions::BadCast, "MueLu::SubBlockAFactory::Build: domainMap " << col << " is not a strided map"); GetOStream(Statistics1) << "A(" << row << "," << col << ") has strided maps: range map fixed block size=" << srgMap->getFixedBlockSize() << " strided block id = " << srgMap->getStridedBlockId() << ", domain map fixed block size=" << sdoMap->getFixedBlockSize() << ", strided block id=" << sdoMap->getStridedBlockId() << std::endl; if(Op->IsView("stridedMaps") == true) Op->RemoveView("stridedMaps"); Op->CreateView("stridedMaps", srgMap, sdoMap); TEUCHOS_TEST_FOR_EXCEPTION(Op->IsView("stridedMaps")==false, Exceptions::RuntimeError, "MueLu::SubBlockAFactory::Build: failed to set stridedMaps"); //////////////// EXPERIMENTAL currentLevel.Set("A", Teuchos::rcp_dynamic_cast<OMatrix>(Op), this); }
void CloneRepartitionInterface<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(Level ¤tLevel) const { FactoryMonitor m(*this, "Build", currentLevel); currentLevel.print(GetOStream(Statistics0,0)); // extract blocked operator A from current level Teuchos::RCP<Matrix> A = Get< Teuchos::RCP<Matrix> > (currentLevel, "A"); Teuchos::RCP<const Teuchos::Comm< int > > comm = A->getRowMap()->getComm(); // number of Partitions only used for a shortcut. GO numPartitions = 0; if (currentLevel.IsAvailable("number of partitions")) { numPartitions = currentLevel.Get<GO>("number of partitions"); GetOStream(Warnings0) << "Using user-provided \"number of partitions\", the performance is unknown" << std::endl; } // ====================================================================================================== // Construct decomposition vector // ====================================================================================================== RCP<GOVector> decomposition = Teuchos::null; // extract decomposition vector decomposition = Get<RCP<GOVector> >(currentLevel, "Partition"); ArrayRCP<const GO> decompEntries = decomposition->getData(0); if (decomposition.is_null()) { GetOStream(Warnings0) << "No repartitioning necessary: partitions were left unchanged by the repartitioner" << std::endl; Set<RCP<const Import> >(currentLevel, "Importer", Teuchos::null); return; } // create new decomposition vector Teuchos::RCP<GOVector> ret = Xpetra::VectorFactory<GO, LO, GO, NO>::Build(A->getRowMap(), false); ArrayRCP<GO> retDecompEntries = ret->getDataNonConst(0); // block size of output vector LocalOrdinal blkSize = 1; // check for blocking/striding information if(A->IsView("stridedMaps") && Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) { Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!) RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap()); TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null,Exceptions::BadCast,"MueLu::CloneRepartitionInterface::Build: cast to strided row map failed."); LocalOrdinal stridedBlock = strMap->getStridedBlockId(); if (stridedBlock == -1) blkSize = strMap->getFixedBlockSize(); else { std::vector<size_t> strInfo = strMap->getStridingData(); blkSize = strInfo[stridedBlock]; } oldView = A->SwitchToView(oldView); GetOStream(Statistics1) << "CloneRepartitionInterface::Build():" << " found blockdim=" << blkSize << " from strided maps."<< std::endl; } else { GetOStream(Statistics1) << "CloneRepartitionInterface::Build(): no striding information available. Use blockdim=" << blkSize << " (DofsPerNode)." << std::endl; blkSize = A->GetFixedBlockSize(); } // plausibility check! size_t inLocalLength = decomposition->getLocalLength(); size_t outLocalLength = A->getRowMap()->getNodeNumElements(); // only for non-strided maps size_t numLocalNodes = outLocalLength / blkSize; TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(outLocalLength % blkSize) != 0, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: inconsistent number of local DOFs (" << outLocalLength << ") and degrees of freedoms (" << blkSize <<")"); if (numLocalNodes > 0) { TEUCHOS_TEST_FOR_EXCEPTION(Teuchos::as<size_t>(inLocalLength % numLocalNodes) != 0, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: inconsistent number of local DOFs (" << inLocalLength << ") and number of local nodes (" << numLocalNodes << ")"); LocalOrdinal inBlkSize = Teuchos::as<LocalOrdinal>(inLocalLength / numLocalNodes); //TEUCHOS_TEST_FOR_EXCEPTION(blkSize != inBlkSize, MueLu::Exceptions::RuntimeError,"CloneRepartitionInterface: input block size = " << inBlkSize << " outpub block size = " << blkSize << ". They should be the same."); for(LO i = 0; i<Teuchos::as<LO>(numLocalNodes); i++) { for(LO j = 0; j < blkSize; j++) { retDecompEntries[i*blkSize + j] = Teuchos::as<GO>(decompEntries[i*inBlkSize]); } } } // end if numLocalNodes > 0 Set(currentLevel, "Partition", ret); } //Build()
TEUCHOS_UNIT_TEST(CoalesceDropFactory, AmalgamationStridedOffsetDropping2LW) { // unit test for block size 9 = (2,3,4). wrap block 1. // drop small entries // lightweight wrap = true out << "version: " << MueLu::Version() << std::endl; RCP<const Teuchos::Comm<int> > comm = Parameters::getDefaultComm(); Xpetra::UnderlyingLib lib = TestHelpers::Parameters::getLib(); // create strided map information std::vector<size_t> stridingInfo; stridingInfo.push_back(as<size_t>(2)); stridingInfo.push_back(as<size_t>(3)); stridingInfo.push_back(as<size_t>(4)); LocalOrdinal stridedBlockId = 1; GlobalOrdinal offset = 19; RCP<const StridedMap> dofMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(lib, 9*comm->getSize(), 0, stridingInfo, comm, stridedBlockId, offset); ///////////////////////////////////////////////////// Teuchos::RCP<Matrix> mtx = TestHelpers::TestFactory<SC,LO,GO,NO>::BuildTridiag(dofMap, 2.0, 1.0, 0.0001); Level fineLevel; TestHelpers::TestFactory<SC,LO,GO,NO>::createSingleLevelHierarchy(fineLevel); RCP<const Map> stridedRangeMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build( mtx->getRangeMap(), stridingInfo, stridedBlockId, offset ); RCP<const Map> stridedDomainMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build( mtx->getDomainMap(), stridingInfo, stridedBlockId, offset ); if(mtx->IsView("stridedMaps") == true) mtx->RemoveView("stridedMaps"); mtx->CreateView("stridedMaps", stridedRangeMap, stridedDomainMap); fineLevel.Set("A", mtx); CoalesceDropFactory dropFact = CoalesceDropFactory(); dropFact.SetParameter("lightweight wrap",Teuchos::ParameterEntry(true)); dropFact.SetParameter("aggregation: drop tol",Teuchos::ParameterEntry(0.3)); fineLevel.Request("Graph", &dropFact); fineLevel.Request("DofsPerNode", &dropFact); dropFact.Build(fineLevel); fineLevel.print(out); RCP<GraphBase> graph = fineLevel.Get<RCP<GraphBase> >("Graph", &dropFact); LO myDofsPerNode = fineLevel.Get<LO>("DofsPerNode", &dropFact); TEST_EQUALITY(as<int>(graph->GetDomainMap()->getGlobalNumElements()) == comm->getSize(), true); TEST_EQUALITY(as<int>(myDofsPerNode) == 9, true); bool bCorrectGraph = false; if (comm->getSize() == 1 && graph->getNeighborVertices(0).size() == 1) { bCorrectGraph = true; } else { if (comm->getRank() == 0) { if (graph->getNeighborVertices(0).size() == 1) bCorrectGraph = true; } else { if (graph->getNeighborVertices(0).size() == 2) bCorrectGraph = true; } } TEST_EQUALITY(bCorrectGraph, true); const RCP<const Map> myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! const RCP<const Map> myDomainMap = graph->GetDomainMap(); TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myImportMap->getMinLocalIndex(),0); TEST_EQUALITY(myImportMap->getGlobalNumElements(),as<size_t>(comm->getSize()+2*(comm->getSize()-1))); if (comm->getSize()>1) { size_t numLocalRowMapElts = graph->GetNodeNumVertices(); size_t numLocalImportElts = myImportMap->getNodeNumElements(); if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) { TEST_EQUALITY(as<bool>(numLocalImportElts==numLocalRowMapElts+1), true); } else { TEST_EQUALITY(as<bool>(numLocalImportElts==numLocalRowMapElts+2), true); } } TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myDomainMap->getMinLocalIndex(),0); TEST_EQUALITY(myDomainMap->getGlobalNumElements(),as<size_t>(comm->getSize())); TEST_EQUALITY(as<bool>(myDomainMap->getNodeNumElements()==1), true); } // AmalgamationStridedOffsetDropping2LW
TEUCHOS_UNIT_TEST(CoalesceDropFactory, AmalgamationStrided2LW) { # include "MueLu_UseShortNames.hpp" MUELU_TESTING_SET_OSTREAM; MUELU_TESTING_LIMIT_SCOPE(Scalar,GlobalOrdinal,NO); out << "version: " << MueLu::Version() << std::endl; // unit test for block size 3 = (2,1). wrap block 0 // lightweight wrap = true RCP<const Teuchos::Comm<int> > comm = Parameters::getDefaultComm(); Xpetra::UnderlyingLib lib = TestHelpers::Parameters::getLib(); // create strided map information std::vector<size_t> stridingInfo; stridingInfo.push_back(as<size_t>(2)); stridingInfo.push_back(as<size_t>(1)); LocalOrdinal stridedBlockId = 0; int blockSize=3; RCP<const StridedMap> dofMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build(lib, blockSize*comm->getSize(), 0, stridingInfo, comm, stridedBlockId /*blockId*/, 0 /*offset*/); ///////////////////////////////////////////////////// Teuchos::RCP<Matrix> mtx = TestHelpers::TestFactory<SC,LO,GO,NO>::BuildTridiag(dofMap, 2.0, -1.0, -1.0); Level fineLevel; TestHelpers::TestFactory<SC,LO,GO,NO>::createSingleLevelHierarchy(fineLevel); RCP<const Xpetra::StridedMap<LocalOrdinal, GlobalOrdinal, Node> > stridedRangeMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build( mtx->getRangeMap(), stridingInfo, stridedBlockId, 0 /*offset*/ ); RCP<const Map> stridedDomainMap = Xpetra::StridedMapFactory<LocalOrdinal, GlobalOrdinal, Node>::Build( mtx->getDomainMap(), stridingInfo, stridedBlockId, 0 /*offset*/ ); if(mtx->IsView("stridedMaps") == true) mtx->RemoveView("stridedMaps"); mtx->CreateView("stridedMaps", stridedRangeMap, stridedDomainMap); fineLevel.Set("A", mtx); CoalesceDropFactory dropFact = CoalesceDropFactory(); dropFact.SetParameter("lightweight wrap",Teuchos::ParameterEntry(true)); fineLevel.Request("Graph", &dropFact); fineLevel.Request("DofsPerNode", &dropFact); dropFact.Build(fineLevel); fineLevel.print(out); RCP<GraphBase> graph = fineLevel.Get<RCP<GraphBase> >("Graph", &dropFact); LO myDofsPerNode = fineLevel.Get<LO>("DofsPerNode", &dropFact); TEST_EQUALITY(as<int>(graph->GetDomainMap()->getGlobalNumElements()) == comm->getSize(), true); TEST_EQUALITY(as<int>(myDofsPerNode) == blockSize, true); bool bCorrectGraph = false; if (comm->getSize() == 1 && graph->getNeighborVertices(0).size() == 1) { bCorrectGraph = true; } else { if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) { if (graph->getNeighborVertices(0).size() == 2) bCorrectGraph = true; } else { if (graph->getNeighborVertices(0).size() == blockSize) bCorrectGraph = true; } } TEST_EQUALITY(bCorrectGraph, true); const RCP<const Map> myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! const RCP<const Map> myDomainMap = graph->GetDomainMap(); TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myImportMap->getMinLocalIndex(),0); TEST_EQUALITY(myImportMap->getGlobalNumElements(),as<size_t>(comm->getSize()+2*(comm->getSize()-1))); if (comm->getSize()>1) { size_t numLocalRowMapElts = graph->GetNodeNumVertices(); size_t numLocalImportElts = myImportMap->getNodeNumElements(); if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) { TEST_EQUALITY(as<bool>(numLocalImportElts==numLocalRowMapElts+1), true); } else { TEST_EQUALITY(as<bool>(numLocalImportElts==numLocalRowMapElts+2), true); } } if (comm->getSize()>1) { size_t numLocalRowMapElts = graph->GetNodeNumVertices(); size_t maxLocalIndex = myImportMap->getMaxLocalIndex(); if (comm->getRank() == 0 || comm->getRank() == comm->getSize()-1) { TEST_EQUALITY(as<bool>(maxLocalIndex==numLocalRowMapElts*blockSize-2), true); } else { TEST_EQUALITY(as<bool>(maxLocalIndex==numLocalRowMapElts*blockSize-1), true); } } TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), comm->getSize()-1); TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myDomainMap->getMinLocalIndex(),0); TEST_EQUALITY(myDomainMap->getMaxLocalIndex(),0); TEST_EQUALITY(myDomainMap->getGlobalNumElements(),as<size_t>(comm->getSize())); TEST_EQUALITY(as<bool>(myDomainMap->getNodeNumElements()==1), true); } // AmalgamationStrided2LW
void RebalanceBlockRestrictionFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level &fineLevel, Level &coarseLevel) const { FactoryMonitor m(*this, "Build", coarseLevel); //const Teuchos::ParameterList & pL = GetParameterList(); RCP<Teuchos::FancyOStream> out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout)); Teuchos::RCP<Matrix> originalTransferOp = Teuchos::null; originalTransferOp = Get< RCP<Matrix> >(coarseLevel, "R"); RCP<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> > bOriginalTransferOp = Teuchos::rcp_dynamic_cast<Xpetra::BlockedCrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps> >(originalTransferOp); TEUCHOS_TEST_FOR_EXCEPTION(bOriginalTransferOp==Teuchos::null, Exceptions::BadCast, "MueLu::RebalanceBlockTransferFactory::Build: input matrix P or R is not of type BlockedCrsMatrix! error."); // plausibility check TEUCHOS_TEST_FOR_EXCEPTION(bOriginalTransferOp->Rows() != 2,Exceptions::RuntimeError, "MueLu::RebalanceBlockTransferFactory::Build: number of block rows of transfer operator is not equal 2. error."); TEUCHOS_TEST_FOR_EXCEPTION(bOriginalTransferOp->Cols() != 2,Exceptions::RuntimeError, "MueLu::RebalanceBlockTransferFactory::Build: number of block columns of transfer operator is not equal 2. error."); // rebuild rebalanced blocked P operator std::vector<GO> fullRangeMapVector; std::vector<GO> fullDomainMapVector; std::vector<RCP<const Map> > subBlockRRangeMaps; std::vector<RCP<const Map> > subBlockRDomainMaps; subBlockRRangeMaps.reserve(bOriginalTransferOp->Rows()); // reserve size for block P operators subBlockRDomainMaps.reserve(bOriginalTransferOp->Cols()); // reserve size for block P operators std::vector<Teuchos::RCP<Matrix> > subBlockRebR; subBlockRebR.reserve(bOriginalTransferOp->Cols()); int curBlockId = 0; Teuchos::RCP<const Import> rebalanceImporter = Teuchos::null; std::vector<Teuchos::RCP<const FactoryManagerBase> >::const_iterator it; for (it = FactManager_.begin(); it != FactManager_.end(); ++it) { // begin SubFactoryManager environment SetFactoryManager fineSFM (rcpFromRef(fineLevel), *it); SetFactoryManager coarseSFM(rcpFromRef(coarseLevel), *it); rebalanceImporter = coarseLevel.Get<Teuchos::RCP<const Import> >("Importer", (*it)->GetFactory("Importer").get()); // extract matrix block Teuchos::RCP<CrsMatrix> Rmii = bOriginalTransferOp->getMatrix(curBlockId, curBlockId); Teuchos::RCP<CrsMatrixWrap> Rwii = Teuchos::rcp(new CrsMatrixWrap(Rmii)); Teuchos::RCP<Matrix> Rii = Teuchos::rcp_dynamic_cast<Matrix>(Rwii); Teuchos::RCP<Matrix> rebRii; if(rebalanceImporter != Teuchos::null) { std::stringstream ss; ss << "Rebalancing restriction block R(" << curBlockId << "," << curBlockId << ")"; SubFactoryMonitor m1(*this, ss.str(), coarseLevel); { SubFactoryMonitor subM(*this, "Rebalancing restriction -- fusedImport", coarseLevel); // Note: The 3rd argument says to use originalR's domain map. RCP<Map> dummy; rebRii = MatrixFactory::Build(Rii,*rebalanceImporter,dummy,rebalanceImporter->getTargetMap()); } RCP<ParameterList> params = rcp(new ParameterList()); params->set("printLoadBalancingInfo", true); std::stringstream ss2; ss2 << "R(" << curBlockId << "," << curBlockId << ") rebalanced:"; GetOStream(Statistics0) << PerfUtils::PrintMatrixInfo(*rebRii, ss2.str(), params); } else { rebRii = Rii; RCP<ParameterList> params = rcp(new ParameterList()); params->set("printLoadBalancingInfo", true); std::stringstream ss2; ss2 << "R(" << curBlockId << "," << curBlockId << ") not rebalanced:"; GetOStream(Statistics0) << PerfUtils::PrintMatrixInfo(*rebRii, ss2.str(), params); } // fix striding information for rebalanced diagonal block rebRii RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > rgRMapExtractor = bOriginalTransferOp->getRangeMapExtractor(); // original map extractor Teuchos::RCP<const StridedMap> orig_stridedRgMap = Teuchos::rcp_dynamic_cast<const StridedMap>(rgRMapExtractor->getMap(Teuchos::as<size_t>(curBlockId))); Teuchos::RCP<const Map> stridedRgMap = Teuchos::null; if(orig_stridedRgMap != Teuchos::null) { std::vector<size_t> stridingData = orig_stridedRgMap->getStridingData(); Teuchos::ArrayView< const GlobalOrdinal > nodeRangeMapii = rebRii->getRangeMap()->getNodeElementList(); stridedRgMap = StridedMapFactory::Build( originalTransferOp->getRangeMap()->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), nodeRangeMapii, rebRii->getRangeMap()->getIndexBase(), stridingData, originalTransferOp->getRangeMap()->getComm(), orig_stridedRgMap->getStridedBlockId(), orig_stridedRgMap->getOffset()); } RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > doRMapExtractor = bOriginalTransferOp->getDomainMapExtractor(); // original map extractor Teuchos::RCP<const StridedMap> orig_stridedDoMap = Teuchos::rcp_dynamic_cast<const StridedMap>(doRMapExtractor->getMap(Teuchos::as<size_t>(curBlockId))); Teuchos::RCP<const Map> stridedDoMap = Teuchos::null; if(orig_stridedDoMap != Teuchos::null) { std::vector<size_t> stridingData = orig_stridedDoMap->getStridingData(); Teuchos::ArrayView< const GlobalOrdinal > nodeDomainMapii = rebRii->getDomainMap()->getNodeElementList(); stridedDoMap = StridedMapFactory::Build( originalTransferOp->getDomainMap()->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), nodeDomainMapii, rebRii->getDomainMap()->getIndexBase(), stridingData, originalTransferOp->getDomainMap()->getComm(), orig_stridedDoMap->getStridedBlockId(), orig_stridedDoMap->getOffset()); } TEUCHOS_TEST_FOR_EXCEPTION(stridedRgMap == Teuchos::null,Exceptions::RuntimeError, "MueLu::RebalanceBlockRestrictionFactory::Build: failed to generate striding information. error."); TEUCHOS_TEST_FOR_EXCEPTION(stridedDoMap == Teuchos::null,Exceptions::RuntimeError, "MueLu::RebalanceBlockRestrictionFactory::Build: failed to generate striding information. error."); // replace stridedMaps view in diagonal sub block if(rebRii->IsView("stridedMaps")) rebRii->RemoveView("stridedMaps"); rebRii->CreateView("stridedMaps", stridedRgMap, stridedDoMap); // store rebalanced subblock subBlockRebR.push_back(rebRii); // append strided row map (= range map) to list of range maps. Teuchos::RCP<const Map> rangeMapii = rebRii->getRowMap("stridedMaps"); //rebRii->getRangeMap(); subBlockRRangeMaps.push_back(rangeMapii); Teuchos::ArrayView< const GlobalOrdinal > nodeRangeMapii = rebRii->getRangeMap()->getNodeElementList(); fullRangeMapVector.insert(fullRangeMapVector.end(), nodeRangeMapii.begin(), nodeRangeMapii.end()); sort(fullRangeMapVector.begin(), fullRangeMapVector.end()); // append strided col map (= domain map) to list of range maps. Teuchos::RCP<const Map> domainMapii = rebRii->getColMap("stridedMaps"); //rebRii->getDomainMap(); subBlockRDomainMaps.push_back(domainMapii); Teuchos::ArrayView< const GlobalOrdinal > nodeDomainMapii = rebRii->getDomainMap()->getNodeElementList(); fullDomainMapVector.insert(fullDomainMapVector.end(), nodeDomainMapii.begin(), nodeDomainMapii.end()); sort(fullDomainMapVector.begin(), fullDomainMapVector.end()); //////////////////////////////////////////////////////////// // rebalance null space if(rebalanceImporter != Teuchos::null) { // rebalance null space std::stringstream ss2; ss2 << "Rebalancing nullspace block(" << curBlockId << "," << curBlockId << ")"; SubFactoryMonitor subM(*this, ss2.str(), coarseLevel); RCP<MultiVector> nullspace = coarseLevel.Get<RCP<MultiVector> >("Nullspace", (*it)->GetFactory("Nullspace").get()); RCP<MultiVector> permutedNullspace = MultiVectorFactory::Build(rebalanceImporter->getTargetMap(), nullspace->getNumVectors()); permutedNullspace->doImport(*nullspace, *rebalanceImporter, Xpetra::INSERT); // TODO think about this //if (pL.get<bool>("useSubcomm") == true) // TODO either useSubcomm is enabled everywhere or nowhere //permutedNullspace->replaceMap(permutedNullspace->getMap()->removeEmptyProcesses()); coarseLevel.Set<RCP<MultiVector> >("Nullspace", permutedNullspace, (*it)->GetFactory("Nullspace").get()); } // end rebalance null space else { // do nothing RCP<MultiVector> nullspace = coarseLevel.Get<RCP<MultiVector> >("Nullspace", (*it)->GetFactory("Nullspace").get()); coarseLevel.Set<RCP<MultiVector> >("Nullspace", nullspace, (*it)->GetFactory("Nullspace").get()); } //////////////////////////////////////////////////////////// curBlockId++; } // end for loop // extract map index base from maps of blocked P GO rangeIndexBase = originalTransferOp->getRangeMap()->getIndexBase(); GO domainIndexBase= originalTransferOp->getDomainMap()->getIndexBase(); // check this RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > rangeRMapExtractor = bOriginalTransferOp->getRangeMapExtractor(); // original map extractor Teuchos::ArrayView<GO> fullRangeMapGIDs(&fullRangeMapVector[0],fullRangeMapVector.size()); Teuchos::RCP<const StridedMap> stridedRgFullMap = Teuchos::rcp_dynamic_cast<const StridedMap>(rangeRMapExtractor->getFullMap()); Teuchos::RCP<const Map > fullRangeMap = Teuchos::null; if(stridedRgFullMap != Teuchos::null) { std::vector<size_t> stridedData = stridedRgFullMap->getStridingData(); fullRangeMap = StridedMapFactory::Build( originalTransferOp->getRangeMap()->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), fullRangeMapGIDs, rangeIndexBase, stridedData, originalTransferOp->getRangeMap()->getComm(), stridedRgFullMap->getStridedBlockId(), stridedRgFullMap->getOffset()); } else { fullRangeMap = MapFactory::Build( originalTransferOp->getRangeMap()->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), fullRangeMapGIDs, rangeIndexBase, originalTransferOp->getRangeMap()->getComm()); } RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > domainAMapExtractor = bOriginalTransferOp->getDomainMapExtractor(); Teuchos::ArrayView<GO> fullDomainMapGIDs(&fullDomainMapVector[0],fullDomainMapVector.size()); Teuchos::RCP<const StridedMap> stridedDoFullMap = Teuchos::rcp_dynamic_cast<const StridedMap>(domainAMapExtractor->getFullMap()); Teuchos::RCP<const Map > fullDomainMap = Teuchos::null; if(stridedDoFullMap != Teuchos::null) { TEUCHOS_TEST_FOR_EXCEPTION(stridedDoFullMap==Teuchos::null, Exceptions::BadCast, "MueLu::BlockedPFactory::Build: full map in domain map extractor has no striding information! error."); std::vector<size_t> stridedData2 = stridedDoFullMap->getStridingData(); fullDomainMap = StridedMapFactory::Build( originalTransferOp->getDomainMap()->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), fullDomainMapGIDs, domainIndexBase, stridedData2, originalTransferOp->getDomainMap()->getComm(), stridedDoFullMap->getStridedBlockId(), stridedDoFullMap->getOffset()); } else { fullDomainMap = MapFactory::Build( originalTransferOp->getDomainMap()->lib(), Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), fullDomainMapGIDs, domainIndexBase, originalTransferOp->getDomainMap()->getComm()); } // build map extractors Teuchos::RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > rangeMapExtractor = Xpetra::MapExtractorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(fullRangeMap, subBlockRRangeMaps); Teuchos::RCP<const Xpetra::MapExtractor<Scalar, LocalOrdinal, GlobalOrdinal, Node> > domainMapExtractor = Xpetra::MapExtractorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(fullDomainMap, subBlockRDomainMaps); Teuchos::RCP<BlockedCrsMatrix> bRebR = Teuchos::rcp(new BlockedCrsMatrix(rangeMapExtractor,domainMapExtractor,10)); for(size_t i = 0; i<subBlockRRangeMaps.size(); i++) { Teuchos::RCP<const CrsMatrixWrap> crsOpii = Teuchos::rcp_dynamic_cast<const CrsMatrixWrap>(subBlockRebR[i]); Teuchos::RCP<CrsMatrix> crsMatii = crsOpii->getCrsMatrix(); bRebR->setMatrix(i,i,crsMatii); } bRebR->fillComplete(); Set(coarseLevel, "R", Teuchos::rcp_dynamic_cast<Matrix>(bRebR)); // do nothing // TODO remove this! } // Build