DenseContainer<MatrixType, LocalScalarType>:: DenseContainer (const Teuchos::RCP<const row_matrix_type>& matrix, const Teuchos::ArrayView<const local_ordinal_type>& localRows) : Container<MatrixType> (matrix, localRows), numRows_ (localRows.size ()), diagBlock_ (numRows_, numRows_), ipiv_ (numRows_, 0) { using Teuchos::Array; using Teuchos::ArrayView; using Teuchos::RCP; using Teuchos::rcp; using Teuchos::toString; typedef Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type; typedef typename ArrayView<const local_ordinal_type>::size_type size_type; TEUCHOS_TEST_FOR_EXCEPTION( ! matrix->hasColMap (), std::invalid_argument, "Ifpack2::DenseContainer: " "The constructor's input matrix must have a column Map."); // Check whether the input set of local row indices is correct. const map_type& rowMap = * (matrix->getRowMap ()); const size_type numRows = localRows.size (); bool rowIndicesValid = true; Array<local_ordinal_type> invalidLocalRowIndices; for (size_type i = 0; i < numRows; ++i) { if (! rowMap.isNodeLocalElement (localRows[i])) { rowIndicesValid = false; invalidLocalRowIndices.push_back (localRows[i]); break; } } TEUCHOS_TEST_FOR_EXCEPTION( ! rowIndicesValid, std::invalid_argument, "Ifpack2::DenseContainer: " "On process " << rowMap.getComm ()->getRank () << " of " << rowMap.getComm ()->getSize () << ", in the given set of local row " "indices localRows = " << toString (localRows) << ", the following " "entries are not valid local row indices on the calling process: " << toString (invalidLocalRowIndices) << "."); #ifdef HAVE_MPI RCP<const Teuchos::Comm<int> > localComm = rcp (new Teuchos::MpiComm<int> (MPI_COMM_SELF)); #else RCP<const Teuchos::Comm<int> > localComm = rcp (new Teuchos::SerialComm<int> ()); #endif // HAVE_MPI // FIXME (mfh 25 Aug 2013) What if the matrix's row Map has a // different index base than zero? const global_ordinal_type indexBase = 0; localMap_ = rcp (new map_type (numRows_, indexBase, localComm)); }
void DefaultMultipliedLinearOp<Scalar>::validateOps() { using Teuchos::toString; #ifdef TEUCHOS_DEBUG try { const int nOps = Ops_.size(); for( int k = 0; k < nOps; ++k ) { TEST_FOR_EXCEPT( Ops_[k]().get() == NULL ); if( k < nOps-1 ) { THYRA_ASSERT_LINEAR_OP_TIMES_LINEAR_OP_SPACES_NAMES( "DefaultMultipliedLinearOp<Scalar>::initialize(...)" ,*Ops_[k],NOTRANS,("Ops["+toString(k)+"]") ,*Ops_[k+1],NOTRANS,("Ops["+toString(k+1)+"]") ); } } } catch(...) { uninitialize(); throw; } #endif }
void DenseContainer<MatrixType,LocalScalarType>:: extract (const Teuchos::RCP<const row_matrix_type>& globalMatrix) { using Teuchos::Array; using Teuchos::ArrayView; using Teuchos::toString; typedef local_ordinal_type LO; typedef global_ordinal_type GO; typedef Tpetra::Map<LO, GO, node_type> map_type; const size_t inputMatrixNumRows = globalMatrix->getNodeNumRows (); // We only use the rank of the calling process and the number of MPI // processes for generating error messages. Extraction itself is // entirely local to each participating MPI process. const int myRank = globalMatrix->getRowMap ()->getComm ()->getRank (); const int numProcs = globalMatrix->getRowMap ()->getComm ()->getSize (); // Sanity check that the local row indices to extract fall within // the valid range of local row indices for the input matrix. ArrayView<const LO> localRows = this->getLocalRows (); for (size_t j = 0; j < numRows_; ++j) { TEUCHOS_TEST_FOR_EXCEPTION( localRows[j] < 0 || static_cast<size_t> (localRows[j]) >= inputMatrixNumRows, std::runtime_error, "Ifpack2::DenseContainer::extract: On process " << myRank << " of " << numProcs << ", localRows[j=" << j << "] = " << localRows[j] << ", which is out of the valid range of local row indices " "indices [0, " << (inputMatrixNumRows - 1) << "] for the input matrix."); } // Convert the local row indices we want into local column indices. // For every local row ii_local = localRows[i] we take, we also want // to take the corresponding column. To find the corresponding // column, we use the row Map to convert the local row index // ii_local into a global index ii_global, and then use the column // Map to convert ii_global into a local column index jj_local. If // the input matrix doesn't have a column Map, we need to be using // global indices anyway... // We use the domain Map to exclude off-process global entries. const map_type& globalRowMap = * (globalMatrix->getRowMap ()); const map_type& globalColMap = * (globalMatrix->getColMap ()); const map_type& globalDomMap = * (globalMatrix->getDomainMap ()); bool rowIndsValid = true; bool colIndsValid = true; Array<LO> localCols (numRows_); // For error messages, collect the sets of invalid row indices and // invalid column indices. They are otherwise not useful. Array<LO> invalidLocalRowInds; Array<GO> invalidGlobalColInds; for (size_t i = 0; i < numRows_; ++i) { // ii_local is the (local) row index we want to look up. const LO ii_local = localRows[i]; // Find the global index jj_global corresponding to ii_local. // Global indices are the same (rather, are required to be the // same) in all three Maps, which is why we use jj (suggesting a // column index, which is how we will use it below). const GO jj_global = globalRowMap.getGlobalElement (ii_local); if (jj_global == Teuchos::OrdinalTraits<GO>::invalid ()) { // If ii_local is not a local index in the row Map on the // calling process, that means localRows is incorrect. We've // already checked for this in the constructor, but we might as // well check again here, since it's cheap to do so (just an // integer comparison, since we need jj_global anyway). rowIndsValid = false; invalidLocalRowInds.push_back (ii_local); break; } // Exclude "off-process" entries: that is, those in the column Map // on this process that are not in the domain Map on this process. if (globalDomMap.isNodeGlobalElement (jj_global)) { // jj_global is not an off-process entry. Look up its local // index in the column Map; we want to extract this column index // from the input matrix. If jj_global is _not_ in the column // Map on the calling process, that could mean that the column // in question is empty on this process. That would be bad for // solving linear systems with the extract submatrix. We could // solve the resulting singular linear systems in a minimum-norm // least-squares sense, but for now we simply raise an exception. const LO jj_local = globalColMap.getLocalElement (jj_global); if (jj_local == Teuchos::OrdinalTraits<local_ordinal_type>::invalid ()) { colIndsValid = false; invalidGlobalColInds.push_back (jj_global); break; } localCols[i] = jj_local; } } TEUCHOS_TEST_FOR_EXCEPTION( ! rowIndsValid, std::logic_error, "Ifpack2::DenseContainer::extract: " "On process " << myRank << ", at least one row index in the set of local " "row indices given to the constructor is not a valid local row index in " "the input matrix's row Map on this process. This should be impossible " "because the constructor checks for this case. Here is the complete set " "of invalid local row indices: " << toString (invalidLocalRowInds) << ". " "Please report this bug to the Ifpack2 developers."); TEUCHOS_TEST_FOR_EXCEPTION( ! colIndsValid, std::runtime_error, "Ifpack2::DenseContainer::extract: " "On process " << myRank << ", " "At least one row index in the set of row indices given to the constructor " "does not have a corresponding column index in the input matrix's column " "Map. This probably means that the column(s) in question is/are empty on " "this process, which would make the submatrix to extract structurally " "singular. Here is the compete set of invalid global column indices: " << toString (invalidGlobalColInds) << "."); diagBlock_.putScalar (Teuchos::ScalarTraits<local_scalar_type>::zero ()); const size_t maxNumEntriesInRow = globalMatrix->getNodeMaxNumRowEntries (); Array<scalar_type> val (maxNumEntriesInRow); Array<local_ordinal_type> ind (maxNumEntriesInRow); const local_ordinal_type INVALID = Teuchos::OrdinalTraits<local_ordinal_type>::invalid (); for (size_t i = 0; i < numRows_; ++i) { const local_ordinal_type localRow = localRows[i]; size_t numEntries; globalMatrix->getLocalRowCopy (localRow, ind (), val (), numEntries); for (size_t k = 0; k < numEntries; ++k) { const local_ordinal_type localCol = ind[k]; // Skip off-process elements // // FIXME (mfh 24 Aug 2013) This assumes the following: // // 1. The column and row Maps begin with the same set of // on-process entries, in the same order. That is, // on-process row and column indices are the same. // 2. All off-process indices in the column Map of the input // matrix occur after that initial set. if (localCol >= 0 && static_cast<size_t> (localCol) < inputMatrixNumRows) { // for local column IDs, look for each ID in the list // of columns hosted by this object local_ordinal_type jj = INVALID; for (size_t kk = 0; kk < numRows_; ++kk) { if (localRows[kk] == localCol) { jj = kk; } } if (jj != INVALID) { diagBlock_ (i, jj) += val[k]; // ??? } } } } }
void Export<LocalOrdinal,GlobalOrdinal,Node>:: print (std::ostream& os) const { using Teuchos::Comm; using Teuchos::getFancyOStream; using Teuchos::RCP; using Teuchos::rcpFromRef; using Teuchos::toString; using std::endl; RCP<const Comm<int> > comm = getSourceMap ()->getComm (); const int myImageID = comm->getRank (); const int numImages = comm->getSize (); for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) { if (myImageID == imageCtr) { os << endl; if (myImageID == 0) { // I'm the root node (only output this info once) os << "Export Data Members:" << endl; } os << "Image ID : " << myImageID << endl; os << "permuteFromLIDs: " << toString (getPermuteFromLIDs ()) << endl; os << "permuteToLIDs : " << toString (getPermuteToLIDs ()) << endl; os << "remoteLIDs : " << toString (getRemoteLIDs ()) << endl; os << "exportLIDs : " << toString (getExportLIDs ()) << endl; os << "exportPIDs : " << toString (getExportPIDs ()) << endl; os << "numSameIDs : " << getNumSameIDs () << endl; os << "numPermuteIDs : " << getNumPermuteIDs () << endl; os << "numRemoteIDs : " << getNumRemoteIDs () << endl; os << "numExportIDs : " << getNumExportIDs () << endl; } // A few global barriers give output a chance to complete. comm->barrier(); comm->barrier(); comm->barrier(); } if (myImageID == 0) { os << endl << endl << "Source Map:" << endl << std::flush; } comm->barrier(); os << *getSourceMap(); comm->barrier(); if (myImageID == 0) { os << endl << endl << "Target Map:" << endl << std::flush; } comm->barrier(); os << *getTargetMap(); comm->barrier(); // It's also helpful for debugging to print the Distributor // object. Epetra_Export::Print() does this, so we can do a // side-by-side comparison. if (myImageID == 0) { os << endl << endl << "Distributor:" << endl << std::flush; } comm->barrier(); getDistributor().describe (*(getFancyOStream (rcpFromRef (os))), Teuchos::VERB_EXTREME); }
// This test is only meaningful in an MPI build. TEUCHOS_UNIT_TEST( Map, replaceCommWithSubset ) { typedef int local_ordinal_type; typedef long global_ordinal_type; typedef Tpetra::Map<local_ordinal_type, global_ordinal_type> map_type; typedef Array<global_ordinal_type>::size_type size_type; RCP<const Comm<int> > origComm = rcp (new MpiComm<int> (MPI_COMM_WORLD)); const int numProcs = origComm->getSize (); const int myRank = origComm->getRank (); // Create a Map in which all processes have a nonzero number of elements. const size_type numGidsPerProc = 3; const size_type myNumGids = numGidsPerProc; Array<global_ordinal_type> myGids (myNumGids); for (size_type k = 0; k < myNumGids; ++k) { myGids[k] = as<global_ordinal_type> (myRank) * as<global_ordinal_type> (numGidsPerProc) + as<global_ordinal_type> (k); } const global_size_t globalNumElts = as<global_size_t> (numGidsPerProc) * as<global_size_t> (numProcs); const global_ordinal_type indexBase = 0; RCP<const map_type> origMap (new map_type (globalNumElts, myGids (), indexBase, origComm)); // Create a new communicator that excludes Proc 0. // This will exercise recomputing the index base. const int color = (myRank == 0) ? 0 : 1; const int key = 0; RCP<const Comm<int> > newComm = origComm->split (color, key); if (myRank == 0) { newComm = null; } // Create the new Map distributed over the subset communicator. RCP<const map_type> newMap = origMap->replaceCommWithSubset (newComm); // Test collectively for success, so the test doesn't hang on failure. int localSuccess = 1; std::ostringstream err; if (myRank == 0) { if (! newMap.is_null ()) { localSuccess = 0; err << "removeEmptyProcesses() should have returned null, but did not." << endl; } } else { if (newMap.is_null ()) { localSuccess = 0; err << "removeEmptyProcesses() should not have returned null, but did." << endl; } else { RCP<const Comm<int> > theNewComm = newMap->getComm (); if (theNewComm->getSize () != numProcs - 1) { localSuccess = 0; err << "New communicator should have " << (numProcs - 1) << " processes, but has " << theNewComm->getSize () << " processes instead." << endl; } if (newMap->getGlobalNumElements () != origMap->getGlobalNumElements () - numGidsPerProc) { localSuccess = 0; err << "New Map has " << newMap->getGlobalNumElements () << " global " << "elements, but should have " << (origMap->getGlobalNumElements () - numGidsPerProc) << "." << endl; } if (newMap->getNodeNumElements () != origMap->getNodeNumElements ()) { localSuccess = 0; err << "New Map has " << newMap->getNodeNumElements () << " local " << "elements, but should have " << origMap->getNodeNumElements () << "." << endl; } if (newMap->getIndexBase () != as<global_ordinal_type> (numGidsPerProc)) { localSuccess = 0; err << "New Map has index base " << newMap->getIndexBase () << ", but should have index base " << numGidsPerProc << "." << endl; } ArrayView<const global_ordinal_type> myNewGids = newMap->getNodeElementList (); if (myNewGids.size () != myGids.size () || ! std::equal (myNewGids.begin (), myNewGids.end (), myGids.begin ())) { localSuccess = 0; err << "New Map has local GID list " << toString (myNewGids) << ", but " << "should have local GID list " << toString (myGids ()) << "." << endl; } } } int globalSuccess = 0; reduceAll (*origComm, REDUCE_MIN, localSuccess, outArg (globalSuccess)); if (globalSuccess == 0) { if (myRank == 0) { cerr << "TEST FAILED" << endl << "Error messages from each process:" << endl << endl; } for (int p = 0; p < numProcs; ++p) { if (myRank == p) { cerr << "Process " << myRank << ": " << err.str () << endl; } origComm->barrier (); // Give time for output to finish. origComm->barrier (); origComm->barrier (); } } TEST_EQUALITY(globalSuccess, 1); }
Teuchos::RCP<const Teuchos::ParameterList> PardisoMKL<Matrix,Vector>::getValidParameters_impl() const { using std::string; using Teuchos::as; using Teuchos::RCP; using Teuchos::tuple; using Teuchos::toString; using Teuchos::EnhancedNumberValidator; using Teuchos::setStringToIntegralParameter; using Teuchos::anyNumberParameterEntryValidator; using Teuchos::stringToIntegralParameterEntryValidator; typedef Teuchos::StringToIntegralParameterEntryValidator<int> STIPEV; Teuchos::AnyNumberParameterEntryValidator::EPreferredType preferred_int = Teuchos::AnyNumberParameterEntryValidator::PREFER_INT; static Teuchos::RCP<const Teuchos::ParameterList> valid_params; if( is_null(valid_params) ){ Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList(); // Use pardisoinit to get some default values; void *pt_dummy[64]; PMKL::_INTEGER_t mtype_temp = mtype_; PMKL::_INTEGER_t iparm_temp[64]; PMKL::pardisoinit(pt_dummy, const_cast<PMKL::_INTEGER_t*>(&mtype_temp), const_cast<PMKL::_INTEGER_t*>(iparm_temp)); // Initialize our parameter validators, saving the string to int validators for later RCP<STIPEV> iparm_2_validator = stringToIntegralParameterEntryValidator<int>(tuple<string>("0", "2", "3"), tuple<string>("The minimum degree algorithm", "Nested dissection algorithm from METIS", "OpenMP parallel nested dissection algorithm"), tuple<int>(0, 2, 3), toString(iparm_temp[1])); validators.insert( std::pair<int,RCP<STIPEV> >(2, iparm_2_validator) ); Teuchos::RCP<EnhancedNumberValidator<int> > iparm_4_validator = Teuchos::rcp( new EnhancedNumberValidator<int>() ); iparm_4_validator->setMin(0); RCP<STIPEV> iparm_24_validator = stringToIntegralParameterEntryValidator<int>(tuple<string>("0", "1"), tuple<string>("PARDISO uses the previous algorithm for factorization", "PARDISO uses the new two-level factorization algorithm"), tuple<int>(0, 1), toString(iparm_temp[23])); validators.insert( std::pair<int,RCP<STIPEV> >(24, iparm_24_validator) ); RCP<STIPEV> iparm_25_validator = stringToIntegralParameterEntryValidator<int>(tuple<string>("0", "1"), tuple<string>("PARDISO uses the parallel algorithm for the solve step", "PARDISO uses the sequential forward and backward solve"), tuple<int>(0, 1), toString(iparm_temp[24])); validators.insert( std::pair<int,RCP<STIPEV> >(25, iparm_25_validator) ); RCP<STIPEV> iparm_60_validator = stringToIntegralParameterEntryValidator<int>(tuple<string>("0", "2"), tuple<string>("In-core PARDISO", "Out-of-core PARDISO. The OOC PARDISO can solve very " "large problems by holding the matrix factors in files " "on the disk. Hence the amount of RAM required by OOC " "PARDISO is significantly reduced."), tuple<int>(0, 2), toString(iparm_temp[59])); validators.insert( std::pair<int,RCP<STIPEV> >(60, iparm_60_validator) ); Teuchos::AnyNumberParameterEntryValidator::AcceptedTypes accept_int( false ); accept_int.allowInt( true ); pl->set("IPARM(2)" , validators[2]->getDefaultParameterName(), "Fill-in reducing ordering for the input matrix", validators[2]); pl->set("IPARM(4)" , as<int>(iparm_temp[3]) , "Preconditioned CGS/CG", iparm_4_validator); pl->set("IPARM(8)" , as<int>(iparm_temp[8]) , "Iterative refinement step", anyNumberParameterEntryValidator(preferred_int, accept_int)); pl->set("IPARM(10)", as<int>(iparm_temp[9]) , "Pivoting perturbation", anyNumberParameterEntryValidator(preferred_int, accept_int)); pl->set("IPARM(18)", as<int>(iparm_temp[17]), "Report the number of non-zero elements in the factors", anyNumberParameterEntryValidator(preferred_int, accept_int)); pl->set("IPARM(24)", validators[24]->getDefaultParameterName(), "Parallel factorization control", validators[24]); pl->set("IPARM(25)", validators[25]->getDefaultParameterName(), "Parallel forward/backward solve control", validators[25]); pl->set("IPARM(60)", validators[60]->getDefaultParameterName(), "PARDISO mode (OOC mode)", validators[60]); valid_params = pl; } return valid_params; }
TEUCHOS_UNIT_TEST( TpetraUtils, Merge2 ) { using Tpetra::merge2; using Teuchos::Array; using Teuchos::ArrayView; using Teuchos::OSTab; using Teuchos::toString; using std::endl; typedef Array<int>::size_type size_type; const size_type origNumEntries = 8; Array<int> ind (origNumEntries); ind[0] = 0; ind[1] = 1; ind[2] = 1; ind[3] = 3; ind[4] = -1; ind[5] = -1; ind[6] = -1; ind[7] = 0; Array<int> indCopy = ind; // deep copy Array<double> val (origNumEntries); val[0] = 42.0; val[1] = -4.0; val[2] = -3.0; val[3] = 1.5; val[4] = 1.0; val[5] = 2.0; val[6] = 3.0; val[7] = 100.0; Array<double> valCopy = val; // deep copy const int expNumEntries = 5; const int indExp[] = { 0, 1, 3, -1, 0 }; const double valExp[] = {42.0, -7.0, 1.5, 6.0, 100.0}; // Test merge2 with default merge policy (add). { Array<int>::iterator indEnd = ind.end (); Array<double>::iterator valEnd = val.end (); merge2 (indEnd, valEnd, ind.begin (), indEnd, val.begin (), valEnd); const size_type newIndLen = indEnd - ind.begin (); const size_type newValLen = valEnd - val.begin (); TEST_EQUALITY( newIndLen, expNumEntries ); TEST_EQUALITY( newValLen, expNumEntries ); const bool indEq = std::equal (ind.begin (), indEnd, indExp); const bool valEq = std::equal (val.begin (), valEnd, valExp); TEST_EQUALITY( indEq, true ); TEST_EQUALITY( valEq, true ); if (! valEq) { OSTab tab (out); out << "Input value range: " << toString (valCopy ()) << endl; out << "Expected output: " << toString (ArrayView<const double> ((const double*) valExp, expNumEntries)) << endl; out << "Actual output: " << toString (val.view (0, newValLen)) << endl; } } ind = indCopy; // deep copy; restore original values val = valCopy; // deep copy; restore original values // Test merge2 with custom merge policy (also add). { Array<int>::iterator indEnd = ind.end (); Array<double>::iterator valEnd = val.end (); merge2 (indEnd, valEnd, ind.begin (), indEnd, val.begin (), valEnd, std::plus<double> ()); const size_type newIndLen = indEnd - ind.begin (); const size_type newValLen = valEnd - val.begin (); TEST_EQUALITY( newIndLen, expNumEntries ); TEST_EQUALITY( newValLen, expNumEntries ); const bool indEq = std::equal (ind.begin (), indEnd, indExp); const bool valEq = std::equal (val.begin (), valEnd, valExp); TEST_EQUALITY( indEq, true ); TEST_EQUALITY( valEq, true ); if (! valEq) { OSTab tab (out); out << "Input value range: " << toString (valCopy ()) << endl; out << "Expected output: " << toString (ArrayView<const double> ((const double*) valExp, expNumEntries)) << endl; out << "Actual output: " << toString (val.view (0, newValLen)) << endl; } } }
void DefaultBlockedLinearOp<Scalar>::setBlockSpaces( const int i, const int j, const LinearOpBase<Scalar> &block ) { using Teuchos::toString; assertBlockFillIsActive(true); assertBlockRowCol(i,j); // Validate that if the vector space block is already set that it is // compatible with the block that is being set. if( i < numRowBlocks_ && j < numColBlocks_ ) { #ifdef TEUCHOS_DEBUG RCP<const VectorSpaceBase<Scalar> > rangeBlock = ( productRange_.get() ? productRange_->getBlock(i) : rangeBlocks_[i] ), domainBlock = ( productDomain_.get() ? productDomain_->getBlock(j) : domainBlocks_[j] ); if(rangeBlock.get()) { THYRA_ASSERT_VEC_SPACES_NAMES( "DefaultBlockedLinearOp<Scalar>::setBlockSpaces(i,j,block):\n\n" "Adding block: " + block.description(), *rangeBlock,("(*productRange->getBlock("+toString(i)+"))"), *block.range(),("(*block["+toString(i)+","+toString(j)+"].range())") ); } if(domainBlock.get()) { THYRA_ASSERT_VEC_SPACES_NAMES( "DefaultBlockedLinearOp<Scalar>::setBlockSpaces(i,j,block):\n\n" "Adding block: " + block.description(), *domainBlock,("(*productDomain->getBlock("+toString(j)+"))"), *block.domain(),("(*block["+toString(i)+","+toString(j)+"].domain())") ); } #endif // TEUCHOS_DEBUG } // Add spaces missing range and domain space blocks if we are doing a // flexible fill (otherwise these loops will not be executed) for( int k = numRowBlocks_; k <= i; ++k ) rangeBlocks_.push_back(Teuchos::null); for( int k = numColBlocks_; k <= j; ++k ) domainBlocks_.push_back(Teuchos::null); // Set the incoming range and domain blocks if not already set if(!productRange_.get()) { if(!rangeBlocks_[i].get()) rangeBlocks_[i] = block.range().assert_not_null(); if(!domainBlocks_[j].get()) { domainBlocks_[j] = block.domain().assert_not_null(); } } // Update the current number of row and columns blocks if doing a flexible // fill. if(!Ops_.size()) { numRowBlocks_ = rangeBlocks_.size(); numColBlocks_ = domainBlocks_.size(); } }