예제 #1
0
/* approximately solve the overdetermined system:

 2*F(x_i)\cdot F(\x_j)\alpha_i = 0
 \alpha_i                      = 1

 Which minimizes the L2 norm of the linearization of:
 ||F(\sum_i \alpha_i*x_i)||^2

 With the constraint that \sum_i\alpha_i = 1
 Where x_i is the solution from the ith subsolver.
 */
static PetscErrorCode SNESCompositeApply_AdditiveOptimal(SNES snes,Vec X,Vec B,Vec F,PetscReal *fnorm)
{
  PetscErrorCode      ierr;
  SNES_Composite      *jac = (SNES_Composite*)snes->data;
  SNES_CompositeLink  next = jac->head;
  Vec                 *Xes = jac->Xes,*Fes = jac->Fes;
  PetscInt            i,j;
  PetscScalar         tot,total,ftf;
  PetscReal           min_fnorm;
  PetscInt            min_i;
  SNESConvergedReason reason;

  PetscFunctionBegin;
  if (!next) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_ARG_WRONGSTATE,"No composite SNESes supplied via SNESCompositeAddSNES() or -snes_composite_sneses");

  if (snes->normschedule == SNES_NORM_ALWAYS) {
    next = jac->head;
    ierr = SNESSetInitialFunction(next->snes,F);CHKERRQ(ierr);
    while (next->next) {
      next = next->next;
      ierr = SNESSetInitialFunction(next->snes,F);CHKERRQ(ierr);
    }
  }

  next = jac->head;
  i = 0;
  ierr = VecCopy(X,Xes[i]);CHKERRQ(ierr);
  ierr = SNESSolve(next->snes,B,Xes[i]);CHKERRQ(ierr);
  ierr = SNESGetConvergedReason(next->snes,&reason);CHKERRQ(ierr);
  if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) {
    jac->innerFailures++;
    if (jac->innerFailures >= snes->maxFailures) {
      snes->reason = SNES_DIVERGED_INNER;
      PetscFunctionReturn(0);
    }
  }
  while (next->next) {
    i++;
    next = next->next;
    ierr = VecCopy(X,Xes[i]);CHKERRQ(ierr);
    ierr = SNESSolve(next->snes,B,Xes[i]);CHKERRQ(ierr);
    ierr = SNESGetConvergedReason(next->snes,&reason);CHKERRQ(ierr);
    if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) {
      jac->innerFailures++;
      if (jac->innerFailures >= snes->maxFailures) {
        snes->reason = SNES_DIVERGED_INNER;
        PetscFunctionReturn(0);
      }
    }
  }

  /* all the solutions are collected; combine optimally */
  for (i=0;i<jac->n;i++) {
    for (j=0;j<i+1;j++) {
      ierr = VecDotBegin(Fes[i],Fes[j],&jac->h[i + j*jac->n]);CHKERRQ(ierr);
    }
    ierr = VecDotBegin(Fes[i],F,&jac->g[i]);CHKERRQ(ierr);
  }

  for (i=0;i<jac->n;i++) {
    for (j=0;j<i+1;j++) {
      ierr = VecDotEnd(Fes[i],Fes[j],&jac->h[i + j*jac->n]);CHKERRQ(ierr);
      if (i == j) jac->fnorms[i] = PetscSqrtReal(PetscRealPart(jac->h[i + j*jac->n]));
    }
    ierr = VecDotEnd(Fes[i],F,&jac->g[i]);CHKERRQ(ierr);
  }

  ftf = (*fnorm)*(*fnorm);

  for (i=0; i<jac->n; i++) {
    for (j=i+1;j<jac->n;j++) {
      jac->h[i + j*jac->n] = jac->h[j + i*jac->n];
    }
  }

  for (i=0; i<jac->n; i++) {
    for (j=0; j<jac->n; j++) {
      jac->h[i + j*jac->n] = jac->h[i + j*jac->n] - jac->g[j] - jac->g[i] + ftf;
    }
    jac->beta[i] = ftf - jac->g[i];
  }

#if defined(PETSC_MISSING_LAPACK_GELSS)
  SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"SNESCOMPOSITE with ADDITIVEOPTIMAL requires the LAPACK GELSS routine.");
#else
  jac->info  = 0;
  jac->rcond = -1.;
  ierr          = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
#if defined(PETSC_USE_COMPLEX)
  PetscStackCall("LAPACKgelss",LAPACKgelss_(&jac->n,&jac->n,&jac->nrhs,jac->h,&jac->lda,jac->beta,&jac->lda,jac->s,&jac->rcond,&jac->rank,jac->work,&jac->lwork,jac->rwork,&jac->info));
#else
  PetscStackCall("LAPACKgelss",LAPACKgelss_(&jac->n,&jac->n,&jac->nrhs,jac->h,&jac->lda,jac->beta,&jac->lda,jac->s,&jac->rcond,&jac->rank,jac->work,&jac->lwork,&jac->info));
#endif
  ierr = PetscFPTrapPop();CHKERRQ(ierr);
  if (jac->info < 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"Bad argument to GELSS");
  if (jac->info > 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD failed to converge");
#endif
  tot = 0.;
  total = 0.;
  for (i=0; i<jac->n; i++) {
    if (snes->errorifnotconverged && PetscIsInfOrNanScalar(jac->beta[i])) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD generated inconsistent output");
    ierr = PetscInfo2(snes,"%D: %g\n",i,(double)PetscRealPart(jac->beta[i]));CHKERRQ(ierr);
    tot += jac->beta[i];
    total += PetscAbsScalar(jac->beta[i]);
  }
  ierr = VecScale(X,(1. - tot));CHKERRQ(ierr);
  ierr = VecMAXPY(X,jac->n,jac->beta,Xes);CHKERRQ(ierr);
  ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr);

  if (snes->xl && snes->xu) {
    ierr = SNESVIComputeInactiveSetFnorm(snes, F, X, fnorm);CHKERRQ(ierr);
  } else {
    ierr = VecNorm(F, NORM_2, fnorm);CHKERRQ(ierr);
  }

  /* take the minimum-normed candidate if it beats the combination by a factor of rtol or the combination has stagnated */
  min_fnorm = jac->fnorms[0];
  min_i     = 0;
  for (i=0; i<jac->n; i++) {
    if (jac->fnorms[i] < min_fnorm) {
      min_fnorm = jac->fnorms[i];
      min_i     = i;
    }
  }

  /* stagnation or divergence restart to the solution of the solver that failed the least */
  if (PetscRealPart(total) < jac->stol || min_fnorm*jac->rtol < *fnorm) {
    ierr = VecCopy(jac->Xes[min_i],X);CHKERRQ(ierr);
    ierr = VecCopy(jac->Fes[min_i],F);CHKERRQ(ierr);
    *fnorm = min_fnorm;
  }
  PetscFunctionReturn(0);
}
예제 #2
0
파일: ngmresfunc.c 프로젝트: hansec/petsc
PetscErrorCode SNESNGMRESFormCombinedSolution_Private(SNES snes,PetscInt l,Vec XM,Vec FM,PetscReal fMnorm,Vec X,Vec XA,Vec FA)
{
  SNES_NGMRES    *ngmres = (SNES_NGMRES*) snes->data;
  PetscInt       i,j;
  Vec            *Fdot      = ngmres->Fdot;
  Vec            *Xdot      = ngmres->Xdot;
  PetscScalar    *beta      = ngmres->beta;
  PetscScalar    *xi        = ngmres->xi;
  PetscScalar    alph_total = 0.;
  PetscErrorCode ierr;
  PetscReal      nu;
  Vec            Y = snes->work[2];
  PetscBool      changed_y,changed_w;

  PetscFunctionBegin;
  nu = fMnorm*fMnorm;

  /* construct the right hand side and xi factors */
  ierr = VecMDot(FM,l,Fdot,xi);CHKERRQ(ierr);
  for (i = 0; i < l; i++) beta[i] = nu - xi[i];

  /* construct h */
  for (j = 0; j < l; j++) {
    for (i = 0; i < l; i++) {
      H(i,j) = Q(i,j)-xi[i]-xi[j]+nu;
    }
  }
  if (l == 1) {
    /* simply set alpha[0] = beta[0] / H[0, 0] */
    if (H(0,0) != 0.) beta[0] = beta[0]/H(0,0);
    else beta[0] = 0.;
  } else {
#if defined(PETSC_MISSING_LAPACK_GELSS)
    SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"NGMRES with LS requires the LAPACK GELSS routine.");
#else
    ierr          = PetscBLASIntCast(l,&ngmres->m);CHKERRQ(ierr);
    ierr          = PetscBLASIntCast(l,&ngmres->n);CHKERRQ(ierr);
    ngmres->info  = 0;
    ngmres->rcond = -1.;
    ierr          = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
#if defined(PETSC_USE_COMPLEX)
    PetscStackCall("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,ngmres->rwork,&ngmres->info));
#else
    PetscStackCall("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,&ngmres->info));
#endif
    ierr = PetscFPTrapPop();CHKERRQ(ierr);
    if (ngmres->info < 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"Bad argument to GELSS");
    if (ngmres->info > 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD failed to converge");
#endif
  }
  for (i=0; i<l; i++) {
    if (PetscIsInfOrNanScalar(beta[i])) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD generated inconsistent output");
  }
  alph_total = 0.;
  for (i = 0; i < l; i++) alph_total += beta[i];

  ierr = VecCopy(XM,XA);CHKERRQ(ierr);
  ierr = VecScale(XA,1.-alph_total);CHKERRQ(ierr);
  ierr = VecMAXPY(XA,l,beta,Xdot);CHKERRQ(ierr);
  /* check the validity of the step */
  ierr = VecCopy(XA,Y);CHKERRQ(ierr);
  ierr = VecAXPY(Y,-1.0,X);CHKERRQ(ierr);
  ierr = SNESLineSearchPostCheck(snes->linesearch,X,Y,XA,&changed_y,&changed_w);CHKERRQ(ierr);
  if (!ngmres->approxfunc) {ierr = SNESComputeFunction(snes,XA,FA);CHKERRQ(ierr);}
  else {
    ierr = VecCopy(FM,FA);CHKERRQ(ierr);
    ierr = VecScale(FA,1.-alph_total);CHKERRQ(ierr);
    ierr = VecMAXPY(FA,l,beta,Fdot);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
void DenseMatrix<T>::_svd_solve_lapack(const DenseVector<T> & rhs,
                                       DenseVector<T> & x,
                                       Real rcond) const
{
  // Since BLAS is expecting column-major storage, we first need to
  // make a transposed copy of *this, then pass it to the gelss
  // routine instead of the original.  This extra copy is kind of a
  // bummer, it might be better if we could use the full SVD to
  // compute the least-squares solution instead...  Note that it isn't
  // completely terrible either, since A_trans gets overwritten by
  // Lapack, and we usually would end up making a copy of A outside
  // the function call anyway.
  DenseMatrix<T> A_trans;
  this->get_transpose(A_trans);

  // M is INTEGER
  // The number of rows of the input matrix. M >= 0.
  // This is actually the number of *columns* of A_trans.
  int M = A_trans.n();

  // N is INTEGER
  // The number of columns of the matrix A. N >= 0.
  // This is actually the number of *rows* of A_trans.
  int N = A_trans.m();

  // We'll use the min and max of (M,N) several times below.
  int max_MN = std::max(M,N);
  int min_MN = std::min(M,N);

  // NRHS is INTEGER
  // The number of right hand sides, i.e., the number of columns
  // of the matrices B and X. NRHS >= 0.
  // This could later be generalized to solve for multiple right-hand
  // sides...
  int NRHS = 1;

  // A is DOUBLE PRECISION array, dimension (LDA,N)
  // On entry, the M-by-N matrix A.
  // On exit, the first min(m,n) rows of A are overwritten with
  // its right singular vectors, stored rowwise.
  //
  // The data vector that will be passed to Lapack.
  std::vector<T> & A_trans_vals = A_trans.get_values();

  // LDA is INTEGER
  // The leading dimension of the array A.  LDA >= max(1,M).
  int LDA = M;

  // B is DOUBLE PRECISION array, dimension (LDB,NRHS)
  // On entry, the M-by-NRHS right hand side matrix B.
  // On exit, B is overwritten by the N-by-NRHS solution
  // matrix X.  If m >= n and RANK = n, the residual
  // sum-of-squares for the solution in the i-th column is given
  // by the sum of squares of elements n+1:m in that column.
  //
  // Since we don't want the user's rhs vector to be overwritten by
  // the solution, we copy the rhs values into the solution vector "x"
  // now.  x needs to be long enough to hold both the (Nx1) solution
  // vector or the (Mx1) rhs, so size it to the max of those.
  x.resize(max_MN);
  for (unsigned i=0; i<rhs.size(); ++i)
    x(i) = rhs(i);

  // Make the syntax below simpler by grabbing a reference to this array.
  std::vector<T> & B = x.get_values();

  // LDB is INTEGER
  // The leading dimension of the array B. LDB >= max(1,max(M,N)).
  int LDB = x.size();

  // S is DOUBLE PRECISION array, dimension (min(M,N))
  // The singular values of A in decreasing order.
  // The condition number of A in the 2-norm = S(1)/S(min(m,n)).
  std::vector<T> S(min_MN);

  // RCOND is DOUBLE PRECISION
  // RCOND is used to determine the effective rank of A.
  // Singular values S(i) <= RCOND*S(1) are treated as zero.
  // If RCOND < 0, machine precision is used instead.
  Real RCOND = rcond;

  // RANK is INTEGER
  // The effective rank of A, i.e., the number of singular values
  // which are greater than RCOND*S(1).
  int RANK = 0;

  // LWORK is INTEGER
  // The dimension of the array WORK. LWORK >= 1, and also:
  // LWORK >= 3*min(M,N) + max( 2*min(M,N), max(M,N), NRHS )
  // For good performance, LWORK should generally be larger.
  //
  // If LWORK = -1, then a workspace query is assumed; the routine
  // only calculates the optimal size of the WORK array, returns
  // this value as the first entry of the WORK array, and no error
  // message related to LWORK is issued by XERBLA.
  //
  // The factor of 1.5 is arbitrary and is used to satisfy the "should
  // generally be larger" clause.
  int LWORK = 1.5 * (3*min_MN + std::max(2*min_MN, std::max(max_MN, NRHS)));

  // WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
  // On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
  std::vector<T> WORK(LWORK);

  // INFO is INTEGER
  // = 0:  successful exit
  // < 0:  if INFO = -i, the i-th argument had an illegal value.
  // > 0:  the algorithm for computing the SVD failed to converge;
  //       if INFO = i, i off-diagonal elements of an intermediate
  //       bidiagonal form did not converge to zero.
  int INFO = 0;

  // LAPACKgelss_(const PetscBLASInt *, // M
  //              const PetscBLASInt *, // N
  //              const PetscBLASInt *, // NRHS
  //              PetscScalar *,        // A
  //              const PetscBLASInt *, // LDA
  //              PetscScalar *,        // B
  //              const PetscBLASInt *, // LDB
  //              PetscReal *,          // S(out) = singular values of A in increasing order
  //              const PetscReal *,    // RCOND = tolerance for singular values
  //              PetscBLASInt *,       // RANK(out) = number of "non-zero" singular values
  //              PetscScalar *,        // WORK
  //              const PetscBLASInt *, // LWORK
  //              PetscBLASInt *);      // INFO
  LAPACKgelss_(&M, &N, &NRHS, &A_trans_vals[0], &LDA, &B[0], &LDB, &S[0], &RCOND, &RANK, &WORK[0], &LWORK, &INFO);

  // Check for errors in the Lapack call
  if (INFO < 0)
    libmesh_error_msg("Error, argument " << -INFO << " to LAPACKgelss_ had an illegal value.");
  if (INFO > 0)
    libmesh_error_msg("The algorithm for computing the SVD failed to converge!");

  // Debugging: print singular values and information about condition number:
  // libMesh::err << "RCOND=" << RCOND << std::endl;
  // libMesh::err << "Singular values: " << std::endl;
  // for (unsigned i=0; i<S.size(); ++i)
  //   libMesh::err << S[i] << std::endl;
  // libMesh::err << "The condition number of A is approximately: " << S[0]/S.back() << std::endl;

  // Lapack has already written the solution into B, but it will be
  // the wrong size for non-square problems, so we need to resize it
  // correctly.  The size of the solution vector should be the number
  // of columns of the original A matrix.  Unfortunately, resizing a
  // DenseVector currently also zeros it out (unlike a std::vector) so
  // we'll resize the underlying storage directly (the size is not
  // stored independently elsewhere).
  x.get_values().resize(this->n());
}