/* approximately solve the overdetermined system: 2*F(x_i)\cdot F(\x_j)\alpha_i = 0 \alpha_i = 1 Which minimizes the L2 norm of the linearization of: ||F(\sum_i \alpha_i*x_i)||^2 With the constraint that \sum_i\alpha_i = 1 Where x_i is the solution from the ith subsolver. */ static PetscErrorCode SNESCompositeApply_AdditiveOptimal(SNES snes,Vec X,Vec B,Vec F,PetscReal *fnorm) { PetscErrorCode ierr; SNES_Composite *jac = (SNES_Composite*)snes->data; SNES_CompositeLink next = jac->head; Vec *Xes = jac->Xes,*Fes = jac->Fes; PetscInt i,j; PetscScalar tot,total,ftf; PetscReal min_fnorm; PetscInt min_i; SNESConvergedReason reason; PetscFunctionBegin; if (!next) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_ARG_WRONGSTATE,"No composite SNESes supplied via SNESCompositeAddSNES() or -snes_composite_sneses"); if (snes->normschedule == SNES_NORM_ALWAYS) { next = jac->head; ierr = SNESSetInitialFunction(next->snes,F);CHKERRQ(ierr); while (next->next) { next = next->next; ierr = SNESSetInitialFunction(next->snes,F);CHKERRQ(ierr); } } next = jac->head; i = 0; ierr = VecCopy(X,Xes[i]);CHKERRQ(ierr); ierr = SNESSolve(next->snes,B,Xes[i]);CHKERRQ(ierr); ierr = SNESGetConvergedReason(next->snes,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { jac->innerFailures++; if (jac->innerFailures >= snes->maxFailures) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } } while (next->next) { i++; next = next->next; ierr = VecCopy(X,Xes[i]);CHKERRQ(ierr); ierr = SNESSolve(next->snes,B,Xes[i]);CHKERRQ(ierr); ierr = SNESGetConvergedReason(next->snes,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { jac->innerFailures++; if (jac->innerFailures >= snes->maxFailures) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } } } /* all the solutions are collected; combine optimally */ for (i=0;i<jac->n;i++) { for (j=0;j<i+1;j++) { ierr = VecDotBegin(Fes[i],Fes[j],&jac->h[i + j*jac->n]);CHKERRQ(ierr); } ierr = VecDotBegin(Fes[i],F,&jac->g[i]);CHKERRQ(ierr); } for (i=0;i<jac->n;i++) { for (j=0;j<i+1;j++) { ierr = VecDotEnd(Fes[i],Fes[j],&jac->h[i + j*jac->n]);CHKERRQ(ierr); if (i == j) jac->fnorms[i] = PetscSqrtReal(PetscRealPart(jac->h[i + j*jac->n])); } ierr = VecDotEnd(Fes[i],F,&jac->g[i]);CHKERRQ(ierr); } ftf = (*fnorm)*(*fnorm); for (i=0; i<jac->n; i++) { for (j=i+1;j<jac->n;j++) { jac->h[i + j*jac->n] = jac->h[j + i*jac->n]; } } for (i=0; i<jac->n; i++) { for (j=0; j<jac->n; j++) { jac->h[i + j*jac->n] = jac->h[i + j*jac->n] - jac->g[j] - jac->g[i] + ftf; } jac->beta[i] = ftf - jac->g[i]; } #if defined(PETSC_MISSING_LAPACK_GELSS) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"SNESCOMPOSITE with ADDITIVEOPTIMAL requires the LAPACK GELSS routine."); #else jac->info = 0; jac->rcond = -1.; ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) PetscStackCall("LAPACKgelss",LAPACKgelss_(&jac->n,&jac->n,&jac->nrhs,jac->h,&jac->lda,jac->beta,&jac->lda,jac->s,&jac->rcond,&jac->rank,jac->work,&jac->lwork,jac->rwork,&jac->info)); #else PetscStackCall("LAPACKgelss",LAPACKgelss_(&jac->n,&jac->n,&jac->nrhs,jac->h,&jac->lda,jac->beta,&jac->lda,jac->s,&jac->rcond,&jac->rank,jac->work,&jac->lwork,&jac->info)); #endif ierr = PetscFPTrapPop();CHKERRQ(ierr); if (jac->info < 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"Bad argument to GELSS"); if (jac->info > 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD failed to converge"); #endif tot = 0.; total = 0.; for (i=0; i<jac->n; i++) { if (snes->errorifnotconverged && PetscIsInfOrNanScalar(jac->beta[i])) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD generated inconsistent output"); ierr = PetscInfo2(snes,"%D: %g\n",i,(double)PetscRealPart(jac->beta[i]));CHKERRQ(ierr); tot += jac->beta[i]; total += PetscAbsScalar(jac->beta[i]); } ierr = VecScale(X,(1. - tot));CHKERRQ(ierr); ierr = VecMAXPY(X,jac->n,jac->beta,Xes);CHKERRQ(ierr); ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr); if (snes->xl && snes->xu) { ierr = SNESVIComputeInactiveSetFnorm(snes, F, X, fnorm);CHKERRQ(ierr); } else { ierr = VecNorm(F, NORM_2, fnorm);CHKERRQ(ierr); } /* take the minimum-normed candidate if it beats the combination by a factor of rtol or the combination has stagnated */ min_fnorm = jac->fnorms[0]; min_i = 0; for (i=0; i<jac->n; i++) { if (jac->fnorms[i] < min_fnorm) { min_fnorm = jac->fnorms[i]; min_i = i; } } /* stagnation or divergence restart to the solution of the solver that failed the least */ if (PetscRealPart(total) < jac->stol || min_fnorm*jac->rtol < *fnorm) { ierr = VecCopy(jac->Xes[min_i],X);CHKERRQ(ierr); ierr = VecCopy(jac->Fes[min_i],F);CHKERRQ(ierr); *fnorm = min_fnorm; } PetscFunctionReturn(0); }
PetscErrorCode SNESNGMRESFormCombinedSolution_Private(SNES snes,PetscInt l,Vec XM,Vec FM,PetscReal fMnorm,Vec X,Vec XA,Vec FA) { SNES_NGMRES *ngmres = (SNES_NGMRES*) snes->data; PetscInt i,j; Vec *Fdot = ngmres->Fdot; Vec *Xdot = ngmres->Xdot; PetscScalar *beta = ngmres->beta; PetscScalar *xi = ngmres->xi; PetscScalar alph_total = 0.; PetscErrorCode ierr; PetscReal nu; Vec Y = snes->work[2]; PetscBool changed_y,changed_w; PetscFunctionBegin; nu = fMnorm*fMnorm; /* construct the right hand side and xi factors */ ierr = VecMDot(FM,l,Fdot,xi);CHKERRQ(ierr); for (i = 0; i < l; i++) beta[i] = nu - xi[i]; /* construct h */ for (j = 0; j < l; j++) { for (i = 0; i < l; i++) { H(i,j) = Q(i,j)-xi[i]-xi[j]+nu; } } if (l == 1) { /* simply set alpha[0] = beta[0] / H[0, 0] */ if (H(0,0) != 0.) beta[0] = beta[0]/H(0,0); else beta[0] = 0.; } else { #if defined(PETSC_MISSING_LAPACK_GELSS) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"NGMRES with LS requires the LAPACK GELSS routine."); #else ierr = PetscBLASIntCast(l,&ngmres->m);CHKERRQ(ierr); ierr = PetscBLASIntCast(l,&ngmres->n);CHKERRQ(ierr); ngmres->info = 0; ngmres->rcond = -1.; ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) PetscStackCall("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,ngmres->rwork,&ngmres->info)); #else PetscStackCall("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,&ngmres->info)); #endif ierr = PetscFPTrapPop();CHKERRQ(ierr); if (ngmres->info < 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"Bad argument to GELSS"); if (ngmres->info > 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD failed to converge"); #endif } for (i=0; i<l; i++) { if (PetscIsInfOrNanScalar(beta[i])) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD generated inconsistent output"); } alph_total = 0.; for (i = 0; i < l; i++) alph_total += beta[i]; ierr = VecCopy(XM,XA);CHKERRQ(ierr); ierr = VecScale(XA,1.-alph_total);CHKERRQ(ierr); ierr = VecMAXPY(XA,l,beta,Xdot);CHKERRQ(ierr); /* check the validity of the step */ ierr = VecCopy(XA,Y);CHKERRQ(ierr); ierr = VecAXPY(Y,-1.0,X);CHKERRQ(ierr); ierr = SNESLineSearchPostCheck(snes->linesearch,X,Y,XA,&changed_y,&changed_w);CHKERRQ(ierr); if (!ngmres->approxfunc) {ierr = SNESComputeFunction(snes,XA,FA);CHKERRQ(ierr);} else { ierr = VecCopy(FM,FA);CHKERRQ(ierr); ierr = VecScale(FA,1.-alph_total);CHKERRQ(ierr); ierr = VecMAXPY(FA,l,beta,Fdot);CHKERRQ(ierr); } PetscFunctionReturn(0); }
void DenseMatrix<T>::_svd_solve_lapack(const DenseVector<T> & rhs, DenseVector<T> & x, Real rcond) const { // Since BLAS is expecting column-major storage, we first need to // make a transposed copy of *this, then pass it to the gelss // routine instead of the original. This extra copy is kind of a // bummer, it might be better if we could use the full SVD to // compute the least-squares solution instead... Note that it isn't // completely terrible either, since A_trans gets overwritten by // Lapack, and we usually would end up making a copy of A outside // the function call anyway. DenseMatrix<T> A_trans; this->get_transpose(A_trans); // M is INTEGER // The number of rows of the input matrix. M >= 0. // This is actually the number of *columns* of A_trans. int M = A_trans.n(); // N is INTEGER // The number of columns of the matrix A. N >= 0. // This is actually the number of *rows* of A_trans. int N = A_trans.m(); // We'll use the min and max of (M,N) several times below. int max_MN = std::max(M,N); int min_MN = std::min(M,N); // NRHS is INTEGER // The number of right hand sides, i.e., the number of columns // of the matrices B and X. NRHS >= 0. // This could later be generalized to solve for multiple right-hand // sides... int NRHS = 1; // A is DOUBLE PRECISION array, dimension (LDA,N) // On entry, the M-by-N matrix A. // On exit, the first min(m,n) rows of A are overwritten with // its right singular vectors, stored rowwise. // // The data vector that will be passed to Lapack. std::vector<T> & A_trans_vals = A_trans.get_values(); // LDA is INTEGER // The leading dimension of the array A. LDA >= max(1,M). int LDA = M; // B is DOUBLE PRECISION array, dimension (LDB,NRHS) // On entry, the M-by-NRHS right hand side matrix B. // On exit, B is overwritten by the N-by-NRHS solution // matrix X. If m >= n and RANK = n, the residual // sum-of-squares for the solution in the i-th column is given // by the sum of squares of elements n+1:m in that column. // // Since we don't want the user's rhs vector to be overwritten by // the solution, we copy the rhs values into the solution vector "x" // now. x needs to be long enough to hold both the (Nx1) solution // vector or the (Mx1) rhs, so size it to the max of those. x.resize(max_MN); for (unsigned i=0; i<rhs.size(); ++i) x(i) = rhs(i); // Make the syntax below simpler by grabbing a reference to this array. std::vector<T> & B = x.get_values(); // LDB is INTEGER // The leading dimension of the array B. LDB >= max(1,max(M,N)). int LDB = x.size(); // S is DOUBLE PRECISION array, dimension (min(M,N)) // The singular values of A in decreasing order. // The condition number of A in the 2-norm = S(1)/S(min(m,n)). std::vector<T> S(min_MN); // RCOND is DOUBLE PRECISION // RCOND is used to determine the effective rank of A. // Singular values S(i) <= RCOND*S(1) are treated as zero. // If RCOND < 0, machine precision is used instead. Real RCOND = rcond; // RANK is INTEGER // The effective rank of A, i.e., the number of singular values // which are greater than RCOND*S(1). int RANK = 0; // LWORK is INTEGER // The dimension of the array WORK. LWORK >= 1, and also: // LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS ) // For good performance, LWORK should generally be larger. // // If LWORK = -1, then a workspace query is assumed; the routine // only calculates the optimal size of the WORK array, returns // this value as the first entry of the WORK array, and no error // message related to LWORK is issued by XERBLA. // // The factor of 1.5 is arbitrary and is used to satisfy the "should // generally be larger" clause. int LWORK = 1.5 * (3*min_MN + std::max(2*min_MN, std::max(max_MN, NRHS))); // WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK)) // On exit, if INFO = 0, WORK(1) returns the optimal LWORK. std::vector<T> WORK(LWORK); // INFO is INTEGER // = 0: successful exit // < 0: if INFO = -i, the i-th argument had an illegal value. // > 0: the algorithm for computing the SVD failed to converge; // if INFO = i, i off-diagonal elements of an intermediate // bidiagonal form did not converge to zero. int INFO = 0; // LAPACKgelss_(const PetscBLASInt *, // M // const PetscBLASInt *, // N // const PetscBLASInt *, // NRHS // PetscScalar *, // A // const PetscBLASInt *, // LDA // PetscScalar *, // B // const PetscBLASInt *, // LDB // PetscReal *, // S(out) = singular values of A in increasing order // const PetscReal *, // RCOND = tolerance for singular values // PetscBLASInt *, // RANK(out) = number of "non-zero" singular values // PetscScalar *, // WORK // const PetscBLASInt *, // LWORK // PetscBLASInt *); // INFO LAPACKgelss_(&M, &N, &NRHS, &A_trans_vals[0], &LDA, &B[0], &LDB, &S[0], &RCOND, &RANK, &WORK[0], &LWORK, &INFO); // Check for errors in the Lapack call if (INFO < 0) libmesh_error_msg("Error, argument " << -INFO << " to LAPACKgelss_ had an illegal value."); if (INFO > 0) libmesh_error_msg("The algorithm for computing the SVD failed to converge!"); // Debugging: print singular values and information about condition number: // libMesh::err << "RCOND=" << RCOND << std::endl; // libMesh::err << "Singular values: " << std::endl; // for (unsigned i=0; i<S.size(); ++i) // libMesh::err << S[i] << std::endl; // libMesh::err << "The condition number of A is approximately: " << S[0]/S.back() << std::endl; // Lapack has already written the solution into B, but it will be // the wrong size for non-square problems, so we need to resize it // correctly. The size of the solution vector should be the number // of columns of the original A matrix. Unfortunately, resizing a // DenseVector currently also zeros it out (unlike a std::vector) so // we'll resize the underlying storage directly (the size is not // stored independently elsewhere). x.get_values().resize(this->n()); }