int main(int argc, char* argv[]) { #ifdef HAVE_MPI MPI_Init(&argc,&argv); #endif // Creating an instance of the LAPACK class for double-precision routines looks like: Teuchos::LAPACK<int, double> lapack; // This instance provides the access to all the LAPACK routines. Teuchos::SerialDenseMatrix<int, double> My_Matrix(4,4); Teuchos::SerialDenseVector<int, double> My_Vector(4); My_Matrix.random(); My_Vector.random(); // Perform an LU factorization of this matrix. int ipiv[4], info; char TRANS = 'N'; lapack.GETRF( 4, 4, My_Matrix.values(), My_Matrix.stride(), ipiv, &info ); // Solve the linear system. lapack.GETRS( TRANS, 4, 1, My_Matrix.values(), My_Matrix.stride(), ipiv, My_Vector.values(), My_Vector.stride(), &info ); // Print out the solution. cout << My_Vector << endl; #ifdef HAVE_MPI MPI_Finalize(); #endif return 0; }
void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) { Ppattern_ = Ppattern; const RCP<const Map> uniqueMap = Ppattern_->getDomainMap(); const RCP<const Map> nonUniqueMap = Ppattern_->getColMap(); RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap); const size_t NSDim = Bc.getNumVectors(); X_ = MultiVectorFactory::Build(nonUniqueMap, NSDim); X_->doImport(Bc, *importer, Xpetra::INSERT); size_t numRows = Ppattern_->getNodeNumRows(); XXtInv_.resize(numRows); Teuchos::SerialDenseVector<LO,SC> BcRow(NSDim, false); for (size_t i = 0; i < numRows; i++) { Teuchos::ArrayView<const LO> indices; Ppattern_->getLocalRowView(i, indices); size_t nnz = indices.size(); Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false); for (size_t j = 0; j < nnz; j++) { for (size_t k = 0; k < NSDim; k++) BcRow[k] = X_->getData(k)[indices[j]]; Teuchos::setCol(BcRow, (LO)j, locX); } XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false); Teuchos::BLAS<LO,SC> blas; blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz, Teuchos::ScalarTraits<SC>::one(), locX.values(), locX.stride(), locX.values(), locX.stride(), Teuchos::ScalarTraits<SC>::zero(), XXtInv_[i].values(), XXtInv_[i].stride()); Teuchos::LAPACK<LO,SC> lapack; LO info, lwork = 3*NSDim; ArrayRCP<LO> IPIV(NSDim); ArrayRCP<SC> WORK(lwork); lapack.GETRF(NSDim, NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), &info); lapack.GETRI(NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), WORK.get(), lwork, &info); } }
int MxDimMatrix<T, DIM>::solve(MxDimVector<T, DIM> & x, const MxDimVector<T, DIM> & b) const { x = b; MxDimMatrix<T, DIM> copy(*this); Teuchos::LAPACK<int, T> lapack; MxDimVector<int, DIM> ipiv; //int ipiv[DIM]; int info; lapack.GETRF(DIM, DIM, ©(0, 0), DIM, &ipiv[0], &info); if (info != 0) std::cout << "MxDimMatrix::solve(...): error in lapack routine getrf. Return code: " << info << ".\n"; lapack.GETRS('T', DIM, 1, ©(0, 0), DIM, &ipiv[0], &x[0], DIM, &info); if (info != 0) std::cout << "MxDimMatrix::solve(...): error in lapack routine getrs. Return code: " << info << ".\n"; return info; }
void updateGuess(Teuchos::SerialDenseVector<int, std::complex<double> >& myCurrentGuess, Teuchos::SerialDenseVector<int, std::complex<double> >& myTargetsCalculated, Teuchos::SerialDenseMatrix<int, std::complex<double> >& myJacobian, Teuchos::LAPACK<int, std::complex<double> >& myLAPACK ) { //v = J(inverse) * (-F(x)) //new guess = v + old guess myTargetsCalculated *= -1.0; //Perform an LU factorization of this matrix. int ipiv[NUMDIMENSIONS], info; char TRANS = 'N'; myLAPACK.GETRF( NUMDIMENSIONS, NUMDIMENSIONS, myJacobian.values(), myJacobian.stride(), ipiv, &info ); // Solve the linear system. myLAPACK.GETRS( TRANS, NUMDIMENSIONS, 1, myJacobian.values(), myJacobian.stride(), ipiv, myTargetsCalculated.values(), myTargetsCalculated.stride(), &info ); //We have overwritten myTargetsCalculated with guess update values //myBLAS.AXPY(NUMDIMENSIONS, 1.0, myGuessAdjustment.values(), 1, myCurrentGuess.values(), 1); myCurrentGuess += myTargetsCalculated; }
void DenseContainer<MatrixType, LocalScalarType>::factor () { Teuchos::LAPACK<int, local_scalar_type> lapack; int INFO = 0; lapack.GETRF (diagBlock_.numRows (), diagBlock_.numCols (), diagBlock_.values (), diagBlock_.stride (), ipiv_.getRawPtr (), &INFO); // INFO < 0 is a bug. TEUCHOS_TEST_FOR_EXCEPTION( INFO < 0, std::logic_error, "Ifpack2::DenseContainer::factor: " "LAPACK's _GETRF (LU factorization with partial pivoting) was called " "incorrectly. INFO = " << INFO << " < 0. " "Please report this bug to the Ifpack2 developers."); // INFO > 0 means the matrix is singular. This is probably an issue // either with the choice of rows the rows we extracted, or with the // input matrix itself. TEUCHOS_TEST_FOR_EXCEPTION( INFO > 0, std::runtime_error, "Ifpack2::DenseContainer::factor: " "LAPACK's _GETRF (LU factorization with partial pivoting) reports that the " "computed U factor is exactly singular. U(" << INFO << "," << INFO << ") " "(one-based index i) is exactly zero. This probably means that the input " "matrix has a singular diagonal block."); }
NOX::Abstract::Group::ReturnType LOCA::BorderedSolver::LowerTriangularBlockElimination:: solve(Teuchos::ParameterList& params, const LOCA::BorderedSolver::AbstractOperator& op, const LOCA::MultiContinuation::ConstraintInterface& B, const NOX::Abstract::MultiVector::DenseMatrix& C, const NOX::Abstract::MultiVector* F, const NOX::Abstract::MultiVector::DenseMatrix* G, NOX::Abstract::MultiVector& X, NOX::Abstract::MultiVector::DenseMatrix& Y) const { string callingFunction = "LOCA::BorderedSolver::LowerTriangularBlockElimination::solve()"; NOX::Abstract::Group::ReturnType finalStatus = NOX::Abstract::Group::Ok; NOX::Abstract::Group::ReturnType status; // Determine if X or Y is zero bool isZeroF = (F == NULL); bool isZeroG = (G == NULL); bool isZeroB = B.isDXZero(); bool isZeroX = isZeroF; bool isZeroY = isZeroG && (isZeroB || isZeroX); // First compute X if (isZeroX) X.init(0.0); else { // Solve X = J^-1 F, note F must be nonzero status = op.applyInverse(params, *F, X); finalStatus = globalData->locaErrorCheck->combineAndCheckReturnTypes(status, finalStatus, callingFunction); } // Now compute Y if (isZeroY) Y.putScalar(0.0); else { // Compute G - B^T*X and store in Y if (isZeroG) B.multiplyDX(-1.0, X, Y); else { Y.assign(*G); if (!isZeroB && !isZeroX) { NOX::Abstract::MultiVector::DenseMatrix T(Y.numRows(),Y.numCols()); B.multiplyDX(1.0, X, T); Y -= T; } } // Overwrite Y with Y = C^-1 * (G - B^T*X) NOX::Abstract::MultiVector::DenseMatrix M(C); int *ipiv = new int[M.numRows()]; Teuchos::LAPACK<int,double> L; int info; L.GETRF(M.numRows(), M.numCols(), M.values(), M.stride(), ipiv, &info); if (info != 0) { status = NOX::Abstract::Group::Failed; finalStatus = globalData->locaErrorCheck->combineAndCheckReturnTypes( status, finalStatus, callingFunction); } L.GETRS('N', M.numRows(), Y.numCols(), M.values(), M.stride(), ipiv, Y.values(), Y.stride(), &info); delete [] ipiv; if (info != 0) { status = NOX::Abstract::Group::Failed; finalStatus = globalData->locaErrorCheck->combineAndCheckReturnTypes( status, finalStatus, callingFunction); } } return finalStatus; }
Basis_HGRAD_LINE_Cn_FEM<SpT,OT,PT>:: Basis_HGRAD_LINE_Cn_FEM( const ordinal_type order, const EPointType pointType ) { this->basisCardinality_ = order+1; this->basisDegree_ = order; this->basisCellTopology_ = shards::CellTopology(shards::getCellTopologyData<shards::Line<2> >() ); this->basisType_ = BASIS_FEM_FIAT; this->basisCoordinates_ = COORDINATES_CARTESIAN; const ordinal_type card = this->basisCardinality_; // points are computed in the host and will be copied Kokkos::DynRankView<typename scalarViewType::value_type,typename SpT::array_layout,Kokkos::HostSpace> dofCoords("Hgrad::Line::Cn::dofCoords", card, 1); switch (pointType) { case POINTTYPE_EQUISPACED: case POINTTYPE_WARPBLEND: { // lattice ordering { const ordinal_type offset = 0; PointTools::getLattice( dofCoords, this->basisCellTopology_, order, offset, pointType ); } // topological order // { // // two vertices // dofCoords(0,0) = -1.0; // dofCoords(1,0) = 1.0; // // internal points // typedef Kokkos::pair<ordinal_type,ordinal_type> range_type; // auto pts = Kokkos::subview(dofCoords, range_type(2, card), Kokkos::ALL()); // const auto offset = 1; // PointTools::getLattice( pts, // this->basisCellTopology_, // order, offset, // pointType ); // } break; } case POINTTYPE_GAUSS: { // internal points only PointTools::getGaussPoints( dofCoords, order ); break; } default: { INTREPID2_TEST_FOR_EXCEPTION( !isValidPointType(pointType), std::invalid_argument , ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) invalid pointType." ); } } this->dofCoords_ = Kokkos::create_mirror_view(typename SpT::memory_space(), dofCoords); Kokkos::deep_copy(this->dofCoords_, dofCoords); // form Vandermonde matrix; actually, this is the transpose of the VDM, // this matrix is used in LAPACK so it should be column major and left layout const ordinal_type lwork = card*card; Kokkos::DynRankView<typename scalarViewType::value_type,Kokkos::LayoutLeft,Kokkos::HostSpace> vmat("Hgrad::Line::Cn::vmat", card, card), work("Hgrad::Line::Cn::work", lwork), ipiv("Hgrad::Line::Cn::ipiv", card); const double alpha = 0.0, beta = 0.0; Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI:: getValues<Kokkos::HostSpace::execution_space,Parameters::MaxNumPtsPerBasisEval> (vmat, dofCoords, order, alpha, beta, OPERATOR_VALUE); ordinal_type info = 0; Teuchos::LAPACK<ordinal_type,typename scalarViewType::value_type> lapack; lapack.GETRF(card, card, vmat.data(), vmat.stride_1(), (ordinal_type*)ipiv.data(), &info); INTREPID2_TEST_FOR_EXCEPTION( info != 0, std::runtime_error , ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) lapack.GETRF returns nonzero info." ); lapack.GETRI(card, vmat.data(), vmat.stride_1(), (ordinal_type*)ipiv.data(), work.data(), lwork, &info); INTREPID2_TEST_FOR_EXCEPTION( info != 0, std::runtime_error , ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM) lapack.GETRI returns nonzero info." ); // create host mirror Kokkos::DynRankView<typename scalarViewType::value_type,typename SpT::array_layout,Kokkos::HostSpace> vinv("Hgrad::Line::Cn::vinv", card, card); for (ordinal_type i=0;i<card;++i) for (ordinal_type j=0;j<card;++j) vinv(i,j) = vmat(j,i); this->vinv_ = Kokkos::create_mirror_view(typename SpT::memory_space(), vinv); Kokkos::deep_copy(this->vinv_ , vinv); // initialize tags { const bool is_vertex_included = (pointType != POINTTYPE_GAUSS); // Basis-dependent initializations const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell ordinal_type tags[Parameters::MaxOrder+1][4]; // now we check the points for association if (is_vertex_included) { // lattice order { const auto v0 = 0; tags[v0][0] = 0; // vertex dof tags[v0][1] = 0; // vertex id tags[v0][2] = 0; // local dof id tags[v0][3] = 1; // total number of dofs in this vertex const ordinal_type iend = card - 2; for (ordinal_type i=0;i<iend;++i) { const auto e = i + 1; tags[e][0] = 1; // edge dof tags[e][1] = 0; // edge id tags[e][2] = i; // local dof id tags[e][3] = iend; // total number of dofs in this edge } const auto v1 = card -1; tags[v1][0] = 0; // vertex dof tags[v1][1] = 1; // vertex id tags[v1][2] = 0; // local dof id tags[v1][3] = 1; // total number of dofs in this vertex } // topological order // { // tags[0][0] = 0; // vertex dof // tags[0][1] = 0; // vertex id // tags[0][2] = 0; // local dof id // tags[0][3] = 1; // total number of dofs in this vertex // tags[1][0] = 0; // vertex dof // tags[1][1] = 1; // vertex id // tags[1][2] = 0; // local dof id // tags[1][3] = 1; // total number of dofs in this vertex // const ordinal_type iend = card - 2; // for (ordinal_type i=0;i<iend;++i) { // const auto ii = i + 2; // tags[ii][0] = 1; // edge dof // tags[ii][1] = 0; // edge id // tags[ii][2] = i; // local dof id // tags[ii][3] = iend; // total number of dofs in this edge // } // } } else { for (ordinal_type i=0;i<card;++i) { tags[i][0] = 1; // edge dof tags[i][1] = 0; // edge id tags[i][2] = i; // local dof id tags[i][3] = card; // total number of dofs in this edge } } ordinal_type_array_1d_host tagView(&tags[0][0], card*4); // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays: // tags are constructed on host this->setOrdinalTagData(this->tagToOrdinal_, this->ordinalToTag_, tagView, this->basisCardinality_, tagSize, posScDim, posScOrd, posDfOrd); } }
void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) { const size_t NSDim = Bc.getNumVectors(); Ppattern_ = Ppattern; size_t numRows = Ppattern_->getNodeNumRows(); XXtInv_.resize(numRows); RCP<const Import> importer = Ppattern_->getImporter(); X_ = MultiVectorFactory::Build(Ppattern_->getColMap(), NSDim); if (!importer.is_null()) X_->doImport(Bc, *importer, Xpetra::INSERT); else *X_ = Bc; std::vector<const SC*> Xval(NSDim); for (size_t j = 0; j < NSDim; j++) Xval[j] = X_->getData(j).get(); SC zero = Teuchos::ScalarTraits<SC>::zero(); SC one = Teuchos::ScalarTraits<SC>::one(); Teuchos::BLAS <LO,SC> blas; Teuchos::LAPACK<LO,SC> lapack; LO lwork = 3*NSDim; ArrayRCP<LO> IPIV(NSDim); ArrayRCP<SC> WORK(lwork); for (size_t i = 0; i < numRows; i++) { Teuchos::ArrayView<const LO> indices; Ppattern_->getLocalRowView(i, indices); size_t nnz = indices.size(); XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false/*zeroOut*/); Teuchos::SerialDenseMatrix<LO,SC>& XXtInv = XXtInv_[i]; if (NSDim == 1) { SC d = zero; for (size_t j = 0; j < nnz; j++) d += Xval[0][indices[j]] * Xval[0][indices[j]]; XXtInv(0,0) = one/d; } else { Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false/*zeroOut*/); for (size_t j = 0; j < nnz; j++) for (size_t k = 0; k < NSDim; k++) locX(k,j) = Xval[k][indices[j]]; // XXtInv_ = (locX*locX^T)^{-1} blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz, one, locX.values(), locX.stride(), locX.values(), locX.stride(), zero, XXtInv.values(), XXtInv.stride()); LO info; // Compute LU factorization using partial pivoting with row exchanges lapack.GETRF(NSDim, NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), &info); // Use the computed factorization to compute the inverse lapack.GETRI(NSDim, XXtInv.values(), XXtInv.stride(), IPIV.get(), WORK.get(), lwork, &info); } } }
//---------------------------------------------------------------------------// // Tests. //---------------------------------------------------------------------------// TEUCHOS_UNIT_TEST( LAPACK, block_inversion ) { // Build a 4x4 block. int m = 4; int n = 4; Teuchos::SerialDenseMatrix<int,double> block( m, n ); block(0,0) = 3.2; block(0,1) = -1.43; block(0,2) = 2.98; block(0,3) = 0.32; block(1,0) = -4.12; block(1,1) = -7.53; block(1,2) = 1.44; block(1,3) = -3.72; block(2,0) = 4.24; block(2,1) = -6.42; block(2,2) = 1.82; block(2,3) = 2.67; block(3,0) = -0.23; block(3,1) = 5.8; block(3,2) = 1.13; block(3,3) = -3.73; // Make a LAPACK object. Teuchos::LAPACK<int,double> lapack; // Compute the LU-factorization of the block. Teuchos::ArrayRCP<int> ipiv( block.numRows() ); int info = 0; int lda = m; lapack.GETRF( m, n, block.values(), lda, ipiv.getRawPtr(), &info ); TEST_EQUALITY( info, 0 ); // Compute the inverse of the block from the LU-factorization. Teuchos::ArrayRCP<double> work( m ); lapack.GETRI( n, block.values(), lda, ipiv.getRawPtr(), work.getRawPtr(), work.size(), &info ); TEST_EQUALITY( info, 0 ); TEST_EQUALITY( work[0], m ); // Check the inversion against matlab. TEST_FLOATING_EQUALITY( block(0,0), -0.461356423424245, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(0,1), -0.060920073472551, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(0,2), 0.547244760641934, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(0,3), 0.412904055961420, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(1,0), 0.154767451798665, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(1,1), -0.056225122550555, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(1,2), -0.174451348828054, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(1,3), -0.055523340725809, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(2,0), 0.848746201780808, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(2,1), 0.045927762119214, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(2,2), -0.618485718805259, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(2,3), -0.415712965073367, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(3,0), 0.526232280383953, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(3,1), -0.069757566407458, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(3,2), -0.492378815120724, 1.0e-14 ); TEST_FLOATING_EQUALITY( block(3,3), -0.505833501236923, 1.0e-14 ); }