double BasisAngle( const Epetra_MultiVector& S, const Epetra_MultiVector& T ) { // // Computes the largest acute angle between the two orthogonal basis // if (S.NumVectors() != T.NumVectors()) { return acos( 0.0 ); } else { int lwork, info = 0; int m = S.NumVectors(), n = T.NumVectors(); int num_vecs = ( m < n ? m : n ); double U[ 1 ], Vt[ 1 ]; lwork = 3*m*n; std::vector<double> work( lwork ); std::vector<double> theta( num_vecs ); Epetra_LAPACK lapack; Epetra_LocalMap localMap( S.NumVectors(), 0, S.Map().Comm() ); Epetra_MultiVector Pvec( localMap, T.NumVectors() ); info = Pvec.Multiply( 'T', 'N', 1.0, S, T, 0.0 ); // // Perform SVD on S^T*T // lapack.GESVD( 'N', 'N', num_vecs, num_vecs, Pvec.Values(), Pvec.Stride(), &theta[0], U, 1, Vt, 1, &work[0], &lwork, &info ); assert( info == 0 ); return (acos( theta[num_vecs-1] ) ); } // // Default return statement, should never be executed. // return acos( 0.0 ); }
void projection(double *z, int *options, int *proc_config, double *params, AZ_MATRIX_STRUCT *Amat, AZ_PREC_STRUCT *prec) { Epetra_BLAS callBLAS; Epetra_LAPACK callLAPACK; // // Do one Jacobi step // if (Amat->data_org[AZ_matrix_type] == AZ_MSR_MATRIX) { // if (commCoarse->MyPID() == 0) // cout << " Do ONE JACOBI STEP " << endl; // for (int i = 0; i < rowQcoarse; ++i) // z[i] /= Amat->val[i]; // } double *tmp = new double[2*colQcoarse]; memset(tmp, 0, 2*colQcoarse*sizeof(double)); int info = 0; for (int i = 0; i < 2; ++i) { if (rowQcoarse > 0) { callBLAS.GEMV('T',rowQcoarse,colQcoarse,1.0,Qcoarse,rowQcoarse,z,0.0,tmp+colQcoarse); } commCoarse->SumAll(tmp + colQcoarse, tmp, colQcoarse); if (rowQcoarse > 0) { callLAPACK.POTRS('U', colQcoarse, 1, QcoarseTQcoarse, colQcoarse, tmp, colQcoarse, &info); callBLAS.GEMV('N', rowQcoarse, colQcoarse, -1.0, Qcoarse, rowQcoarse, tmp, 1.0, z); } } delete[] tmp; }
void ISVDMultiCD::makePass() { Epetra_LAPACK lapack; Epetra_BLAS blas; bool firstPass = (curRank_ == 0); const int numCols = A_->NumVectors(); TEUCHOS_TEST_FOR_EXCEPTION( !firstPass && (numProc_ != numCols), std::logic_error, "RBGen::ISVDMultiCD::makePass(): after first pass, numProc should be numCols"); // compute W = I - Z T Z^T from current V_ Teuchos::RCP<Epetra_MultiVector> lclAZT, lclZ; double *Z_A, *AZT_A; int Z_LDA, AZT_LDA; int oldRank = 0; double Rerr = 0.0; if (!firstPass) { // copy V_ into workZ_ lclAZT = Teuchos::rcp( new Epetra_MultiVector(::View,*workAZT_,0,curRank_) ); lclZ = Teuchos::rcp( new Epetra_MultiVector(::View,*workZ_,0,curRank_) ); { Epetra_MultiVector lclV(::View,*V_,0,curRank_); *lclZ = lclV; } // compute the Householder QR factorization of the current right basis // Vhat = W*R int info, lwork = curRank_; std::vector<double> tau(curRank_), work(lwork); info = lclZ->ExtractView(&Z_A,&Z_LDA); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "RBGen::ISVDMultiCD::makePass(): error calling ExtractView on Epetra_MultiVector Z."); lapack.GEQRF(numCols,curRank_,Z_A,Z_LDA,&tau[0],&work[0],lwork,&info); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "RBGen::ISVDMultiCD::makePass(): error calling GEQRF on current right basis while constructing next pass coefficients."); if (debug_) { // we just took the QR factorization of a set of orthonormal vectors // they should have an R factor which is diagonal, with unit elements (\pm 1) // check it Rerr = 0.0; for (int j=0; j<curRank_; j++) { for (int i=0; i<j; i++) { Rerr += abs(Z_A[j*Z_LDA+i]); } Rerr += abs(abs(Z_A[j*Z_LDA+j]) - 1.0); } } // compute the block representation // W = I - Z T Z^T lapack.LARFT('F','C',numCols,curRank_,Z_A,Z_LDA,&tau[0],workT_->A(),workT_->LDA()); // LARFT left upper tri block of Z unchanged // note: it should currently contain R factor of V_, which is very close to // diag(\pm 1, ..., \pm 1) // // we need to set it to: // [1 0 0 ... 0] // [ 1 0 ... 0] // [ .... ] // [ 1] // // see documentation for LARFT // for (int j=0; j<curRank_; j++) { Z_A[j*Z_LDA+j] = 1.0; for (int i=0; i<j; i++) { Z_A[j*Z_LDA+i] = 0.0; } } // compute part of A W: A Z T // put this in workAZT_ // first, A Z info = lclAZT->Multiply('N','N',1.0,*A_,*lclZ,0.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for A*Z"); // second, (A Z) T (in situ, as T is upper triangular) info = lclAZT->ExtractView(&AZT_A,&AZT_LDA); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "RBGen::ISVDMultiCD::makePass(): error calling ExtractView on Epetra_MultiVector AZ."); blas.TRMM('R','U','N','N',numCols,curRank_,1.0,workT_->A(),workT_->LDA(),AZT_A,AZT_LDA); // save oldRank: it tells us the width of Z oldRank = curRank_; curRank_ = 0; numProc_ = 0; } else { // firstPass == true curRank_ = 0; numProc_ = 0; } while (numProc_ < numCols) { // // determine lup // // want lup >= lmin // lup <= lmax // need lup <= numCols - numProc // lup <= maxBasisSize - curRank // int lup; if (curRank_ == 0) { // first step uses startRank_ // this is not affected by lmin,lmax lup = startRank_; } else { // this value minimizes overall complexity, assuming fixed rank lup = (int)(curRank_ / Teuchos::ScalarTraits<double>::squareroot(2.0)); // contrain to [lmin,lmax] lup = (lup < lmin_ ? lmin_ : lup); lup = (lup > lmax_ ? lmax_ : lup); } // // now cap lup via maxBasisSize and the available data // these caps apply to all lup, as a result of memory and data constraints // // available data lup = (lup > numCols - numProc_ ? numCols - numProc_ : lup); // available memory lup = (lup > maxBasisSize_ - curRank_ ? maxBasisSize_ - curRank_ : lup); // get view of new vectors { const Epetra_MultiVector Aplus(::View,*A_,numProc_,lup); Epetra_MultiVector Unew(::View,*U_,curRank_,lup); // put them in U if (firstPass) { // new vectors are just Aplus Unew = Aplus; } else { // new vectors are Aplus - (A Z T) Z_i^T // specifically, Aplus - (A Z T) Z(numProc:numProc+lup-1,1:oldRank)^T Epetra_LocalMap lclmap(lup,0,A_->Comm()); Epetra_MultiVector Zi(::View,lclmap,&Z_A[numProc_],Z_LDA,oldRank); Unew = Aplus; int info = Unew.Multiply('N','T',-1.0,*lclAZT,Zi,1.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for A*Wi"); } } // perform the incremental step incStep(lup); } // compute W V = V - Z T Z^T V // Z^T V is oldRank x curRank // T Z^T V is oldRank x curRank // we need T Z^T V in a local Epetra_MultiVector if (!firstPass) { Teuchos::RCP<Epetra_MultiVector> lclV; double *TZTV_A; int TZTV_LDA; int info; Epetra_LocalMap lclmap(oldRank,0,A_->Comm()); // get pointer to current V lclV = Teuchos::rcp( new Epetra_MultiVector(::View,*V_,0,curRank_) ); // create space for T Z^T V Epetra_MultiVector TZTV(lclmap,curRank_,false); // multiply Z^T V info = TZTV.Multiply('T','N',1.0,*lclZ,*lclV,0.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for Z^T V."); // get pointer to data in Z^T V info = TZTV.ExtractView(&TZTV_A,&TZTV_LDA); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "RBGen::ISVDMultiCD::makePass(): error calling ExtractView on Epetra_MultiVector TZTV."); // multiply T (Z^T V) blas.TRMM('L','U','N','N',oldRank,curRank_,1.0,workT_->A(),workT_->LDA(),TZTV_A,TZTV_LDA); // multiply V - Z (T Z^T V) info = lclV->Multiply('N','N',-1.0,*lclZ,TZTV,1.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for W V."); } // // compute the new residuals // we know that A V = U S // if, in addition, A^T U = V S, then have singular subspaces // check residuals A^T U - V S, scaling the i-th column by sigma[i] // { // make these static, because makePass() will be likely be called again static Epetra_LocalMap lclmap(A_->NumVectors(),0,A_->Comm()); static Epetra_MultiVector ATU(lclmap,maxBasisSize_,false); // we know that A V = U S // if, in addition, A^T U = V S, then have singular subspaces // check residuals A^T U - V S, scaling the i-th column by sigma[i] Epetra_MultiVector ATUlcl(::View,ATU,0,curRank_); Epetra_MultiVector Ulcl(::View,*U_,0,curRank_); Epetra_MultiVector Vlcl(::View,*V_,0,curRank_); // compute A^T U int info = ATUlcl.Multiply('T','N',1.0,*A_,Ulcl,0.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply for A^T U."); Epetra_LocalMap rankmap(curRank_,0,A_->Comm()); Epetra_MultiVector S(rankmap,curRank_,true); for (int i=0; i<curRank_; i++) { S[i][i] = sigma_[i]; } // subtract V S from A^T U info = ATUlcl.Multiply('N','N',-1.0,Vlcl,S,1.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0, std::logic_error, "RBGen::ISVDMultiCD::computeBasis(): Error calling Epetra_MultiVector::Multiply for V S."); resNorms_.resize(curRank_); ATUlcl.Norm2(&resNorms_[0]); // scale by sigmas for (int i=0; i<curRank_; i++) { if (sigma_[i] != 0.0) { resNorms_[i] /= sigma_[i]; } } } // debugging checks std::vector<double> errnorms(curRank_); if (debug_) { int info; // Check that A V = U Sigma // get pointers to current U and V, create workspace for A V - U Sigma Epetra_MultiVector work(U_->Map(),curRank_,false), curU(::View,*U_,0,curRank_), curV(::View,*V_,0,curRank_); // create local MV for sigmas Epetra_LocalMap lclmap(curRank_,0,A_->Comm()); Epetra_MultiVector curS(lclmap,curRank_,true); for (int i=0; i<curRank_; i++) { curS[i][i] = sigma_[i]; } info = work.Multiply('N','N',1.0,curU,curS,0.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for debugging U S."); info = work.Multiply('N','N',-1.0,*A_,curV,1.0); TEUCHOS_TEST_FOR_EXCEPTION(info != 0,std::logic_error, "RBGen::ISVDMultiCD::makePass(): Error calling Epetra_MultiVector::Multiply() for debugging U S - A V."); work.Norm2(&errnorms[0]); for (int i=0; i<curRank_; i++) { if (sigma_[i] != 0.0) { errnorms[i] /= sigma_[i]; } } } // update pass counter curNumPasses_++; // print out some info const Epetra_Comm *comm = &A_->Comm(); if (comm->MyPID() == 0 && verbLevel_ >= 1) { std::cout << "------------- ISVDMultiCD::makePass() -----------" << std::endl << "| Number of passes: " << curNumPasses_ << std::endl << "| Current rank: " << curRank_ << std::endl << "| Current sigmas: " << std::endl; for (int i=0; i<curRank_; i++) { std::cout << "| " << sigma_[i] << std::endl; } if (debug_) { std::cout << "|DBG US-AV norms: " << std::endl; for (int i=0; i<curRank_; i++) { std::cout << "|DBG " << errnorms[i] << std::endl; } if (!firstPass) { std::cout << "|DBG R-I norm: " << Rerr << std::endl; } } } return; }
void IncSVDPOD::incStep(int lup) { Epetra_LAPACK lapack; // perform gram-schmidt expansion // expand() will update bases U_ and V_, factor B_, as well as counters curRank_ and numProc_ this->expand(lup); // compute the SVD of B const int lwork = 5*curRank_; int info; Epetra_SerialDenseMatrix Uhat(::Copy,B_->A(),B_->LDA(),curRank_,curRank_), Vhat(curRank_,curRank_); std::vector<double> Shat(curRank_), work(lwork); // Note: this actually stores Vhat^T (we remedy this below) lapack.GESVD('O','A',curRank_,curRank_,Uhat.A(),Uhat.LDA(),&Shat[0],Uhat.A(),Uhat.LDA(),Vhat.A(),Vhat.LDA(),&work[0],&lwork,&info); TEUCHOS_TEST_FOR_EXCEPTION(info!=0,std::logic_error,"RBGen::IncSVDPOD::incStep(): GESVD return info != 0"); // use filter to determine new rank std::vector<int> keepind = filter_->filter(Shat); std::vector<int> truncind(curRank_-keepind.size()); { std::vector<int> allind(curRank_); for (int i=0; i<curRank_; i++) { allind[i] = i; } set_difference(allind.begin(),allind.end(),keepind.begin(),keepind.end(),truncind.begin()); // Vhat actually contains Vhat^T; correct this here Epetra_SerialDenseMatrix Ucopy(Uhat), Vcopy(curRank_,curRank_); std::vector<double> Scopy(Shat); for (int j=0; j<curRank_; j++) { for (int i=0; i<curRank_; i++) { Vcopy(i,j) = Vhat(j,i); } } // put the desired sigmas at the front of Uhat, Vhat for (unsigned int j=0; j<keepind.size(); j++) { std::copy(&Ucopy(0,keepind[j]),&Ucopy(curRank_,keepind[j]),&Uhat(0,j)); std::copy(&Vcopy(0,keepind[j]),&Vcopy(curRank_,keepind[j]),&Vhat(0,j)); Shat[j] = Scopy[keepind[j]]; } for (unsigned int j=0; j<truncind.size(); j++) { std::copy(&Ucopy(0,truncind[j]),&Ucopy(curRank_,truncind[j]),&Uhat(0,keepind.size()+j)); std::copy(&Vcopy(0,truncind[j]),&Vcopy(curRank_,truncind[j]),&Vhat(0,keepind.size()+j)); Shat[keepind.size()+j] = Scopy[truncind[j]]; } } // shrink back down again // shrink() will update bases U_ and V_, as well as singular values sigma_ and curRank_ this->shrink(truncind.size(),Shat,Uhat,Vhat); // print out some info const Epetra_Comm *comm = &A_->Comm(); if (comm->MyPID() == 0 && verbLevel_ >= 2) { std::cout << "------------- IncSVDPOD::incStep() --------------" << std::endl << "| Cols Processed: " << numProc_ << std::endl << "| Current lup: " << lup << std::endl << "| Current ldown: " << truncind.size() << std::endl << "| Current rank: " << curRank_ << std::endl << "| Current sigmas: " << std::endl; for (int i=0; i<curRank_; i++) { std::cout << "| " << sigma_[i] << std::endl; } } }
void HyperbolicSolver< Mesh, SolverType >:: localEvolve ( const UInt& iElem ) { // LAPACK wrapper of Epetra Epetra_LAPACK lapack; // Flags for LAPACK routines. Int INFO[1] = { 0 }; Int NB = M_FESpace.refFE().nbDof(); // Parameter that indicate the Lower storage of matrices. char param_L = 'L'; char param_N = 'N'; // Paramater that indicate the Transpose of matrices. char param_T = 'T'; // Numbers of columns of the right hand side := 1. Int NBRHS = 1; // Clean the local flux M_localFlux.zero(); // Loop on the faces of the element iElem and compute the local contribution for ( UInt iFace (0); iFace < M_FESpace.mesh()->numLocalFaces(); ++iFace ) { // Id mapping const UInt iGlobalFace ( M_FESpace.mesh()->localFacetId ( iElem, iFace ) ); // Take the left element to the face, see regionMesh for the meaning of left element const UInt leftElement ( M_FESpace.mesh()->faceElement ( iGlobalFace, 0 ) ); // Take the right element to the face, see regionMesh for the meaning of right element const UInt rightElement ( M_FESpace.mesh()->faceElement ( iGlobalFace, 1 ) ); // Update the normal vector of the current face in each quadrature point M_FESpace.feBd().updateMeasNormalQuadPt ( M_FESpace.mesh()->boundaryFacet ( iGlobalFace ) ); // Local flux of a face times the integration weight VectorElemental localFaceFluxWeight ( M_FESpace.refFE().nbDof(), 1 ); // Solution in the left element VectorElemental leftValue ( M_FESpace.refFE().nbDof(), 1 ); // Solution in the right element VectorElemental rightValue ( M_FESpace.refFE().nbDof(), 1 ); // Extract the solution in the current element, now is the leftElement extract_vec ( *M_uOld, leftValue, M_FESpace.refFE(), M_FESpace.dof(), leftElement , 0 ); // Check if the current face is a boundary face, that is rightElement == NotAnId if ( !Flag::testOneSet ( M_FESpace.mesh()->face ( iGlobalFace ).flag(), EntityFlags::PHYSICAL_BOUNDARY | EntityFlags::SUBDOMAIN_INTERFACE ) ) { // Extract the solution in the current element, now is the leftElement extract_vec ( *M_uOld, rightValue, M_FESpace.refFE(), M_FESpace.dof(), rightElement , 0 ); } else if ( Flag::testOneSet ( M_FESpace.mesh()->face ( iGlobalFace ).flag(), EntityFlags::SUBDOMAIN_INTERFACE ) ) { // TODO: this works only for P0 elements rightValue[ 0 ] = M_ghostDataMap[ iGlobalFace ]; } else // Flag::testOneSet ( M_FESpace.mesh()->face ( iGlobalFace ).flag(), PHYSICAL_BOUNDARY ) { // Clean the value of the right element rightValue.zero(); // Check if the boundary conditions were updated. if ( !M_BCh->bcUpdateDone() ) { // Update the boundary conditions handler. We use the finite element of the boundary of the dual variable. M_BCh->bcUpdate ( *M_FESpace.mesh(), M_FESpace.feBd(), M_FESpace.dof() ); } // Take the boundary marker for the current boundary face const ID faceMarker ( M_FESpace.mesh()->boundaryFacet ( iGlobalFace ).markerID() ); // Take the corrispective boundary function const BCBase& bcBase ( M_BCh->findBCWithFlag ( faceMarker ) ); // Check if the bounday condition is of type Essential, useful for operator splitting strategies if ( bcBase.type() == Essential ) { // Loop on all the quadrature points for ( UInt ig (0); ig < M_FESpace.feBd().nbQuadPt(); ++ig) { // Current quadrature point KN<Real> quadPoint (3); // normal vector KN<Real> normal (3); for (UInt icoor (0); icoor < 3; ++icoor) { quadPoint (icoor) = M_FESpace.feBd().quadPt ( ig, icoor ); normal (icoor) = M_FESpace.feBd().normal ( icoor, ig ) ; } // Compute the boundary contribution rightValue[0] = bcBase ( M_data.dataTime()->time(), quadPoint (0), quadPoint (1), quadPoint (2), 0 ); const Real localFaceFlux = M_numericalFlux->firstDerivativePhysicalFluxDotNormal ( normal, iElem, M_data.dataTime()->time(), quadPoint (0), quadPoint (1), quadPoint (2), rightValue[ 0 ] ); // Update the local flux of the current face with the quadrature weight localFaceFluxWeight[0] += localFaceFlux * M_FESpace.feBd().weightMeas ( ig ); } } else { /* If the boundary flag is not Essential then is automatically an outflow boundary. We impose to localFaceFluxWeight a positive value. */ localFaceFluxWeight[0] = 1.; } // It is an outflow face, we use a ghost cell if ( localFaceFluxWeight[0] > 1e-4 ) { rightValue = leftValue; } // Clean the localFaceFluxWeight localFaceFluxWeight.zero(); } // Clean the localFaceFluxWeight localFaceFluxWeight.zero(); // Loop on all the quadrature points for ( UInt ig (0); ig < M_FESpace.feBd().nbQuadPt(); ++ig ) { // Current quadrature point KN<Real> quadPoint (3); // normal vector KN<Real> normal (3); for (UInt icoor (0); icoor < 3; ++icoor) { quadPoint (icoor) = M_FESpace.feBd().quadPt ( ig, icoor ); normal (icoor) = M_FESpace.feBd().normal ( icoor, ig ) ; } // If the normal is orientated inward, we change its sign and swap the left and value of the solution if ( iElem == rightElement ) { normal *= -1.; std::swap ( leftValue, rightValue ); } const Real localFaceFlux = (*M_numericalFlux) ( leftValue[ 0 ], rightValue[ 0 ], normal, iElem, M_data.dataTime()->time(), quadPoint (0), quadPoint (1), quadPoint (2) ); // Update the local flux of the current face with the quadrature weight localFaceFluxWeight[0] += localFaceFlux * M_FESpace.feBd().weightMeas ( ig ); } /* Put in localFlux the vector L^{-1} * localFlux For more details see http://www.netlib.org/lapack/lapack-3.1.1/SRC/dtrtrs.f */ lapack.TRTRS ( param_L, param_N, param_N, NB, NBRHS, M_elmatMass[ iElem ].mat(), NB, localFaceFluxWeight, NB, INFO); ASSERT_PRE ( !INFO[0], "Lapack Computation M_elvecSource = LB^{-1} rhs is not achieved." ); /* Put in localFlux the vector L^{-T} * localFlux For more details see http://www.netlib.org/lapack/lapack-3.1.1/SRC/dtrtrs.f */ lapack.TRTRS ( param_L, param_T, param_N, NB, NBRHS, M_elmatMass[ iElem ].mat(), NB, localFaceFluxWeight, NB, INFO); ASSERT_PRE ( !INFO[0], "Lapack Computation M_elvecSource = LB^{-1} rhs is not achieved." ); // Add to the local flux the local flux of the current face M_localFlux += localFaceFluxWeight; } } // localEvolve
void HyperbolicSolver<Mesh, SolverType>:: setup () { // LAPACK wrapper of Epetra Epetra_LAPACK lapack; // Flags for LAPACK routines. Int INFO[1] = {0}; Int NB = M_FESpace.refFE().nbDof(); // Parameter that indicate the Lower storage of matrices. char param_L = 'L'; // Total number of elements. UInt meshNumberOfElements = M_FESpace.mesh()->numElements(); // Vector of interpolation of the mass function vector_Type vectorMass ( M_FESpace.map(), Repeated ); // If the mass function is given take it, otherwise use one as a value. if ( M_mass != NULL ) { // Interpolate the mass function on the finite element space. M_FESpace.interpolate ( M_mass, vectorMass ); } else { vectorMass = 1.; } // For each element it creates the mass matrix and factorize it using Cholesky. for ( UInt iElem (0); iElem < meshNumberOfElements; ++iElem ) { // Update the element. M_FESpace.fe().update ( M_FESpace.mesh()->element ( iElem), UPDATE_QUAD_NODES | UPDATE_WDET ); // Local mass matrix MatrixElemental matElem (M_FESpace.refFE().nbDof(), 1, 1); matElem.zero(); // Compute the mass matrix for the current element VectorElemental massValue ( M_FESpace.refFE().nbDof(), 1 ); extract_vec ( vectorMass, massValue, M_FESpace.refFE(), M_FESpace.dof(), iElem, 0 ); // TODO: this works only for P0 mass ( massValue[ 0 ], matElem, M_FESpace.fe(), 0, 0); /* Put in M the matrix L and L^T, where L and L^T is the Cholesky factorization of M. For more details see http://www.netlib.org/lapack/double/dpotrf.f */ lapack.POTRF ( param_L, NB, matElem.mat(), NB, INFO ); ASSERT_PRE ( !INFO[0], "Lapack factorization of M is not achieved." ); // Save the local mass matrix in the global vector of mass matrices M_elmatMass.push_back ( matElem ); } //make sure mesh facets are updated if (! M_FESpace.mesh()->hasLocalFacets() ) { M_FESpace.mesh()->updateElementFacets(); } } // setup
// ====================================================================== void GetPtent(const Epetra_RowMatrix& A, Teuchos::ParameterList& List, double* thisns, Teuchos::RCP<Epetra_CrsMatrix>& Ptent, Teuchos::RCP<Epetra_MultiVector>& NextNS, const int domainoffset) { const int nsdim = List.get<int>("null space: dimension",-1); if (nsdim<=0) ML_THROW("null space dimension not given",-1); const Epetra_Map& rowmap = A.RowMatrixRowMap(); const int mylength = rowmap.NumMyElements(); // wrap nullspace into something more handy Epetra_MultiVector ns(View,rowmap,thisns,mylength,nsdim); // vector to hold aggregation information Epetra_IntVector aggs(rowmap,false); // aggregation with global aggregate numbering int naggregates = GetGlobalAggregates(const_cast<Epetra_RowMatrix&>(A),List,thisns,aggs); // build a domain map for Ptent // find first aggregate on proc int firstagg = -1; int offset = -1; for (int i=0; i<mylength; ++i) if (aggs[i]>=0) { offset = firstagg = aggs[i]; break; } offset *= nsdim; if (offset<0) ML_THROW("could not find any aggregate on proc",-2); std::vector<int> coarsegids(naggregates*nsdim); for (int i=0; i<naggregates; ++i) for (int j=0; j<nsdim; ++j) { coarsegids[i*nsdim+j] = offset + domainoffset; ++offset; } Epetra_Map pdomainmap(-1,naggregates*nsdim,&coarsegids[0],0,A.Comm()); // loop aggregates and build ids for dofs std::map<int,std::vector<int> > aggdofs; std::map<int,std::vector<int> >::iterator fool; for (int i=0; i<naggregates; ++i) { std::vector<int> gids(0); aggdofs.insert(std::pair<int,std::vector<int> >(firstagg+i,gids)); } for (int i=0; i<mylength; ++i) { if (aggs[i]<0) continue; std::vector<int>& gids = aggdofs[aggs[i]]; gids.push_back(aggs.Map().GID(i)); } #if 0 // debugging output for (int proc=0; proc<A.Comm().NumProc(); ++proc) { if (proc==A.Comm().MyPID()) { for (fool=aggdofs.begin(); fool!=aggdofs.end(); ++fool) { std::cout << "Proc " << proc << " Aggregate " << fool->first << " Dofs "; std::vector<int>& gids = fool->second; for (int i=0; i<(int)gids.size(); ++i) std::cout << gids[i] << " "; std::cout << std::endl; } } fflush(stdout); A.Comm().Barrier(); } #endif // coarse level nullspace to be filled NextNS = Teuchos::rcp(new Epetra_MultiVector(pdomainmap,nsdim,true)); Epetra_MultiVector& nextns = *NextNS; // matrix Ptent Ptent = Teuchos::rcp(new Epetra_CrsMatrix(Copy,rowmap,nsdim)); // loop aggregates and extract the appropriate slices of the nullspace. // do QR and assemble Q and R to Ptent and NextNS for (fool=aggdofs.begin(); fool!=aggdofs.end(); ++fool) { // extract aggregate-local junk of nullspace const int aggsize = (int)fool->second.size(); Epetra_SerialDenseMatrix Bagg(aggsize,nsdim); for (int i=0; i<aggsize; ++i) for (int j=0; j<nsdim; ++j) Bagg(i,j) = (*ns(j))[ns.Map().LID(fool->second[i])]; // Bagg = Q*R int m = Bagg.M(); int n = Bagg.N(); int lwork = n*10; int info = 0; int k = std::min(m,n); if (k!=n) ML_THROW("Aggregate too small, fatal!",-1); std::vector<double> work(lwork); std::vector<double> tau(k); Epetra_LAPACK lapack; lapack.GEQRF(m,n,Bagg.A(),m,&tau[0],&work[0],lwork,&info); if (info) ML_THROW("Lapack dgeqrf returned nonzero",info); if (work[0]>lwork) { lwork = (int)work[0]; work.resize(lwork); } // get R (stored on Bagg) and assemble it into nextns int agg_cgid = fool->first*nsdim; if (!nextns.Map().MyGID(agg_cgid+domainoffset)) ML_THROW("Missing coarse column id on this proc",-1); for (int i=0; i<n; ++i) for (int j=i; j<n; ++j) (*nextns(j))[nextns.Map().LID(domainoffset+agg_cgid+i)] = Bagg(i,j); // get Q and assemble it into Ptent lapack.ORGQR(m,n,k,Bagg.A(),m,&tau[0],&work[0],lwork,&info); if (info) ML_THROW("Lapack dorgqr returned nonzero",info); for (int i=0; i<aggsize; ++i) { const int actgrow = fool->second[i]; for (int j=0; j<nsdim; ++j) { int actgcol = fool->first*nsdim+j+domainoffset; int errone = Ptent->SumIntoGlobalValues(actgrow,1,&Bagg(i,j),&actgcol); if (errone>0) { int errtwo = Ptent->InsertGlobalValues(actgrow,1,&Bagg(i,j),&actgcol); if (errtwo<0) ML_THROW("Epetra_CrsMatrix::InsertGlobalValues returned negative nonzero",errtwo); } else if (errone) ML_THROW("Epetra_CrsMatrix::SumIntoGlobalValues returned negative nonzero",errone); } } // for (int i=0; i<aggsize; ++i) } // for (fool=aggdofs.begin(); fool!=aggdofs.end(); ++fool) int err = Ptent->FillComplete(pdomainmap,rowmap); if (err) ML_THROW("Epetra_CrsMatrix::FillComplete returned nonzero",err); err = Ptent->OptimizeStorage(); if (err) ML_THROW("Epetra_CrsMatrix::OptimizeStorage returned nonzero",err); return; }
// ====================================================================== int Amesos_Lapack::GEEV(Epetra_Vector& Er, Epetra_Vector& Ei) { if (IsSymbolicFactorizationOK_ == false) AMESOS_CHK_ERR(SymbolicFactorization()); if (MyPID_ == 0) AMESOS_CHK_ERR(DenseMatrix_.Shape(static_cast<int>(NumGlobalRows64()),static_cast<int>(NumGlobalRows64()))); AMESOS_CHK_ERR(DistributedToSerial()); AMESOS_CHK_ERR(SerialToDense()); Teuchos::RCP<Epetra_Vector> LocalEr; Teuchos::RCP<Epetra_Vector> LocalEi; if (NumProcs_ == 1) { LocalEr = Teuchos::rcp(&Er, false); LocalEi = Teuchos::rcp(&Ei, false); } else { LocalEr = Teuchos::rcp(new Epetra_Vector(*SerialMap_)); LocalEi = Teuchos::rcp(new Epetra_Vector(*SerialMap_)); } if (MyPID_ == 0) { int n = static_cast<int>(NumGlobalRows64()); char jobvl = 'N'; /* V/N to calculate/not calculate left eigenvectors of matrix H.*/ char jobvr = 'N'; /* As above, but for right eigenvectors. */ int info = 0; int ldvl = n; int ldvr = n; Er.PutScalar(0.0); Ei.PutScalar(0.0); Epetra_LAPACK LAPACK; std::vector<double> work(1); int lwork = -1; LAPACK.GEEV(jobvl, jobvr, n, DenseMatrix_.A(), n, LocalEr->Values(), LocalEi->Values(), NULL, ldvl, NULL, ldvr, &work[0], lwork, &info); lwork = (int)work[0]; work.resize(lwork); LAPACK.GEEV(jobvl, jobvr, n, DenseMatrix_.A(), n, LocalEr->Values(), LocalEi->Values(), NULL, ldvl, NULL, ldvr, &work[0], lwork, &info); if (info) AMESOS_CHK_ERR(info); } if (NumProcs_ != 1) { // I am not really sure that exporting the results make sense... // It is just to be coherent with the other parts of the code. Er.Import(*LocalEr, Epetra_Import(Er.Map(), SerialMap()), Insert); Ei.Import(*LocalEi, Epetra_Import(Ei.Map(), SerialMap()), Insert); } return(0); }