Пример #1
0
PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
{
  PetscErrorCode ierr;
  PetscInt       i;
  PetscBLASInt   n,ld,incx=1;
  PetscScalar    *A,*Q,*x,*y,one=1.0,zero=0.0;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  A  = ds->mat[DS_MAT_A];
  Q  = ds->mat[DS_MAT_Q];
  ierr = DSAllocateWork_Private(ds,2*ld,0,0);CHKERRQ(ierr);
  x = ds->work;
  y = ds->work+ld;
  for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
  PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
  for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
  ds->k = n;
  PetscFunctionReturn(0);
}
Пример #2
0
PetscErrorCode  VecSquare(Vec v)
{
  PetscErrorCode ierr;
  PetscScalar    *x;
  PetscInt       i, n;

  PetscFunctionBegin;
  ierr = VecGetLocalSize(v, &n);CHKERRQ(ierr);
  ierr = VecGetArray(v, &x);CHKERRQ(ierr);
  for (i = 0; i < n; i++) x[i] *= PetscConj(x[i]);
  ierr = VecRestoreArray(v, &x);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Пример #3
0
PetscErrorCode VecConjugate_Seq(Vec xin)
{
  PetscScalar    *x;
  PetscInt       n = xin->map->n;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  ierr = VecGetArray(xin,&x);CHKERRQ(ierr);
  while (n-->0) {
    *x = PetscConj(*x);
    x++;
  }
  ierr = VecRestoreArray(xin,&x);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Пример #4
0
/*
  Custom CGS orthogonalization, preprocess after first orthogonalization
*/
static PetscErrorCode SVDOrthogonalizeCGS(BV V,PetscInt i,PetscScalar* h,PetscReal a,BVOrthogRefineType refine,PetscReal eta,PetscReal *norm)
{
  PetscErrorCode ierr;
  PetscReal      sum,onorm;
  PetscScalar    dot;
  PetscInt       j;

  PetscFunctionBegin;
  switch (refine) {
  case BV_ORTHOG_REFINE_NEVER:
    ierr = BVNormColumn(V,i,NORM_2,norm);CHKERRQ(ierr);
    break;
  case BV_ORTHOG_REFINE_ALWAYS:
    ierr = BVSetActiveColumns(V,0,i);CHKERRQ(ierr);
    ierr = BVDotColumn(V,i,h);CHKERRQ(ierr);
    ierr = BVMultColumn(V,-1.0,1.0,i,h);CHKERRQ(ierr);
    ierr = BVNormColumn(V,i,NORM_2,norm);CHKERRQ(ierr);
    break;
  case BV_ORTHOG_REFINE_IFNEEDED:
    dot = h[i];
    onorm = PetscSqrtReal(PetscRealPart(dot)) / a;
    sum = 0.0;
    for (j=0;j<i;j++) {
      sum += PetscRealPart(h[j] * PetscConj(h[j]));
    }
    *norm = PetscRealPart(dot)/(a*a) - sum;
    if (*norm>0.0) *norm = PetscSqrtReal(*norm);
    else {
      ierr = BVNormColumn(V,i,NORM_2,norm);CHKERRQ(ierr);
    }
    if (*norm < eta*onorm) {
      ierr = BVSetActiveColumns(V,0,i);CHKERRQ(ierr);
      ierr = BVDotColumn(V,i,h);CHKERRQ(ierr);
      ierr = BVMultColumn(V,-1.0,1.0,i,h);CHKERRQ(ierr);
      ierr = BVNormColumn(V,i,NORM_2,norm);CHKERRQ(ierr);
    }
    break;
  }
  PetscFunctionReturn(0);
}
Пример #5
0
static PetscErrorCode getSumSquares(Mat matrix, double *diag) {
   PetscErrorCode ierr;
   int i, j;
   double *sumr, *sumc;
   PetscInt n, mLocal, nLocal, low, high;
   PetscReal *aux;

   PetscFunctionBegin;

   ierr = MatGetSize(matrix, NULL, &n); CHKERRQ(ierr);
   ierr = MatGetLocalSize(matrix, &mLocal, &nLocal); CHKERRQ(ierr);
   sumr = diag; sumc = &diag[mLocal];

   ierr = PetscMalloc1(n, &aux); CHKERRQ(ierr);
   ierr = MatGetColumnNorms(matrix, NORM_2, aux); CHKERRQ(ierr);
   ierr = MatGetOwnershipRangeColumn(matrix, &low, &high);CHKERRQ(ierr);  
   for (i=low; i<high; i++) {
      sumc[i-low] = aux[i]*aux[i];
   }
   ierr = PetscFree(aux); CHKERRQ(ierr);

   ierr = MatGetOwnershipRange(matrix, &low, &high); CHKERRQ(ierr);
   for (i=low; i<high; i++) {
     PetscInt          ncols;
     const PetscInt    *cols;
     const PetscScalar *vals;

     sumr[i-low] = 0.0;
     ierr = MatGetRow(matrix, i, &ncols, &cols, &vals); CHKERRQ(ierr);
     for (j = 0; j < ncols; j++) {
       sumr[i-low] += PetscRealPart(vals[j]*PetscConj(vals[j]));
     }
     ierr = MatRestoreRow(matrix, i, &ncols, &cols, &vals); CHKERRQ(ierr);
   }

   PetscFunctionReturn(0);
}
Пример #6
0
static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
{
  KSP_BCGSL      *bcgsl = (KSP_BCGSL*) ksp->data;
  PetscScalar    alpha, beta, omega, sigma;
  PetscScalar    rho0, rho1;
  PetscReal      kappa0, kappaA, kappa1;
  PetscReal      ghat;
  PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
  PetscBool      bUpdateX;
  PetscInt       maxit;
  PetscInt       h, i, j, k, vi, ell;
  PetscBLASInt   ldMZ,bierr;
  PetscScalar    utb;
  PetscReal      max_s, pinv_tol;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  /* set up temporary vectors */
  vi         = 0;
  ell        = bcgsl->ell;
  bcgsl->vB  = ksp->work[vi]; vi++;
  bcgsl->vRt = ksp->work[vi]; vi++;
  bcgsl->vTm = ksp->work[vi]; vi++;
  bcgsl->vvR = ksp->work+vi; vi += ell+1;
  bcgsl->vvU = ksp->work+vi; vi += ell+1;
  bcgsl->vXr = ksp->work[vi]; vi++;
  ierr       = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr);

  /* Prime the iterative solver */
  ierr           = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr);
  ierr           = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr);
  rnmax_computed = zeta0;
  rnmax_true     = zeta0;

  ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
  if (ksp->reason) {
    ierr       = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
    ksp->its   = 0;
    ksp->rnorm = zeta0;
    ierr       = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ierr  = VecSet(VVU[0],0.0);CHKERRQ(ierr);
  alpha = 0.;
  rho0  = omega = 1;

  if (bcgsl->delta>0.0) {
    ierr = VecCopy(VX, VXR);CHKERRQ(ierr);
    ierr = VecSet(VX,0.0);CHKERRQ(ierr);
    ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr);
  } else {
    ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr);
  }

  /* Life goes on */
  ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr);
  zeta = zeta0;

  ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr);

  for (k=0; k<maxit; k += bcgsl->ell) {
    ksp->its   = k;
    ksp->rnorm = zeta;

    ierr = KSPLogResidualHistory(ksp, zeta);CHKERRQ(ierr);
    ierr = KSPMonitor(ksp, ksp->its, zeta);CHKERRQ(ierr);

    ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
    if (ksp->reason < 0) PetscFunctionReturn(0);
    else if (ksp->reason) break;

    /* BiCG part */
    rho0 = -omega*rho0;
    nrm0 = zeta;
    for (j=0; j<bcgsl->ell; j++) {
      /* rho1 <- r_j' * r_tilde */
      ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr);
      if (rho1 == 0.0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
        PetscFunctionReturn(0);
      }
      beta = alpha*(rho1/rho0);
      rho0 = rho1;
      for (i=0; i<=j; i++) {
        /* u_i <- r_i - beta*u_i */
        ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr);
      }
      /* u_{j+1} <- inv(K)*A*u_j */
      ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr);

      ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr);
      if (sigma == 0.0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
        PetscFunctionReturn(0);
      }
      alpha = rho1/sigma;

      /* x <- x + alpha*u_0 */
      ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr);

      for (i=0; i<=j; i++) {
        /* r_i <- r_i - alpha*u_{i+1} */
        ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr);
      }

      /* r_{j+1} <- inv(K)*A*r_j */
      ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr);

      ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr);
      if (bcgsl->delta>0.0) {
        if (rnmax_computed<nrm0) rnmax_computed = nrm0;
        if (rnmax_true<nrm0) rnmax_true = nrm0;
      }

      /* NEW: check for early exit */
      ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
      if (ksp->reason) {
        ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);

        ksp->its   = k+j;
        ksp->rnorm = nrm0;

        ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
        if (ksp->reason < 0) PetscFunctionReturn(0);
      }
    }

    /* Polynomial part */
    for (i = 0; i <= bcgsl->ell; ++i) {
      ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr);
    }
    /* Symmetrize MZa */
    for (i = 0; i <= bcgsl->ell; ++i) {
      for (j = i+1; j <= bcgsl->ell; ++j) {
        MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
      }
    }
    /* Copy MZa to MZb */
    ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));CHKERRQ(ierr);

    if (!bcgsl->bConvex || bcgsl->ell==1) {
      PetscBLASInt ione = 1,bell;
      ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr);

      AY0c[0] = -1;
      if (bcgsl->pinv) {
#if defined(PETSC_MISSING_LAPACK_GESVD)
        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable.");
#else
#  if defined(PETSC_USE_COMPLEX)
        PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr));
#  else
        PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr));
#  endif
#endif
        if (bierr!=0) {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          PetscFunctionReturn(0);
        }
        /* Apply pseudo-inverse */
        max_s = bcgsl->s[0];
        for (i=1; i<bell; i++) {
          if (bcgsl->s[i] > max_s) {
            max_s = bcgsl->s[i];
          }
        }
        /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */
        pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON;
        ierr = PetscMemzero(&AY0c[1],bell*sizeof(PetscScalar));CHKERRQ(ierr);
        for (i=0; i<bell; i++) {
          if (bcgsl->s[i] >= pinv_tol) {
            utb=0.;
            for (j=0; j<bell; j++) {
              utb += MZb[1+j]*bcgsl->u[i*bell+j];
            }

            for (j=0; j<bell; j++) {
              AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i];
            }
          }
        }
      } else {
#if defined(PETSC_MISSING_LAPACK_POTRF)
        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
#else
        PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr));
#endif
        if (bierr!=0) {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          PetscFunctionReturn(0);
        }
        ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));CHKERRQ(ierr);
        PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
      }
    } else {
      PetscBLASInt ione = 1;
      PetscScalar  aone = 1.0, azero = 0.0;
      PetscBLASInt neqs;
      ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr);

#if defined(PETSC_MISSING_LAPACK_POTRF)
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
#else
      PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr));
#endif
      if (bierr!=0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN;
        PetscFunctionReturn(0);
      }
      ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr);
      PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
      AY0c[0]          = -1;
      AY0c[bcgsl->ell] = 0.;

      ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr);
      PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr));

      AYlc[0]          = 0.;
      AYlc[bcgsl->ell] = -1;

      PetscStackCall("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione));

      kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione));

      /* round-off can cause negative kappa's */
      if (kappa0<0) kappa0 = -kappa0;
      kappa0 = PetscSqrtReal(kappa0);

      kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

      PetscStackCall("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione));

      kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

      if (kappa1<0) kappa1 = -kappa1;
      kappa1 = PetscSqrtReal(kappa1);

      if (kappa0!=0.0 && kappa1!=0.0) {
        if (kappaA<0.7*kappa0*kappa1) {
          ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
        } else {
          ghat = kappaA/(kappa1*kappa1);
        }
        for (i=0; i<=bcgsl->ell; i++) {
          AY0c[i] = AY0c[i] - ghat* AYlc[i];
        }
      }
    }

    omega = AY0c[bcgsl->ell];
    for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h];
    if (omega==0.0) {
      ksp->reason = KSP_DIVERGED_BREAKDOWN;
      PetscFunctionReturn(0);
    }


    ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr);
    for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
    ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr);
    ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr);
    for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
    ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr);

    /* Accurate Update */
    if (bcgsl->delta>0.0) {
      if (rnmax_computed<zeta) rnmax_computed = zeta;
      if (rnmax_true<zeta) rnmax_true = zeta;

      bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
      if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
        /* r0 <- b-inv(K)*A*X */
        ierr       = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr);
        ierr       = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr);
        rnmax_true = zeta;

        if (bUpdateX) {
          ierr           = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr);
          ierr           = VecSet(VX,0.0);CHKERRQ(ierr);
          ierr           = VecCopy(VVR[0], VB);CHKERRQ(ierr);
          rnmax_computed = zeta;
        }
      }
    }
  }
  if (bcgsl->delta>0.0) {
    ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr);
  }

  ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
  if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
  PetscFunctionReturn(0);
}
Пример #7
0
static PetscErrorCode  KSPSolve_IBCGS(KSP ksp)
{
  PetscErrorCode ierr;
  PetscInt       i,N;
  PetscReal      rnorm,rnormin = 0.0;
#if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
  /* Because of possible instabilities in the algorithm (as indicated by different residual histories for the same problem 
     on the same number of processes  with different runs) we support computing the inner products using Intel's 80 bit arithematic
     rather than just 64 bit. Thus we copy our double precision values into long doubles (hoping this keeps the 16 extra bits)
     and tell MPI to do its ALlreduces with MPI_LONG_DOUBLE.

     Note for developers that does not effect the code. Intel's long double is implemented by storing the 80 bits of extended double
     precision into a 16 byte space (the rest of the space is ignored)  */
  long double    insums[7],outsums[7];
#else
  PetscScalar    insums[7],outsums[7];
#endif
  PetscScalar    sigman_2, sigman_1, sigman, pin_1, pin, phin_1, phin,tmp1,tmp2;
  PetscScalar    taun_1, taun, rhon, alphan_1, alphan, omegan_1, omegan;
  const PetscScalar *PETSC_RESTRICT r0, *PETSC_RESTRICT f0, *PETSC_RESTRICT qn, *PETSC_RESTRICT b, *PETSC_RESTRICT un;
  PetscScalar    *PETSC_RESTRICT rn, *PETSC_RESTRICT xn, *PETSC_RESTRICT vn, *PETSC_RESTRICT zn;
  /* the rest do not have to keep n_1 values */
  PetscScalar    kappan, thetan, etan, gamman, betan, deltan;
  const PetscScalar *PETSC_RESTRICT tn;
  PetscScalar    *PETSC_RESTRICT sn;
  Vec            R0,Rn,Xn,F0,Vn,Zn,Qn,Tn,Sn,B,Un;
  Mat            A;

  PetscFunctionBegin;
  if (!ksp->vec_rhs->petscnative) SETERRQ(((PetscObject)ksp)->comm,PETSC_ERR_SUP,"Only coded for PETSc vectors");

  ierr = PCGetOperators(ksp->pc,&A,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
  ierr = VecGetLocalSize(ksp->vec_sol,&N);CHKERRQ(ierr);
  Xn = ksp->vec_sol;ierr = VecGetArray(Xn_1,(PetscScalar**)&xn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Xn_1,PETSC_NULL);CHKERRQ(ierr);
  B  = ksp->vec_rhs;ierr = VecGetArrayRead(B,(const PetscScalar**)&b);ierr = VecRestoreArrayRead(B,PETSC_NULL);CHKERRQ(ierr);
  R0 = ksp->work[0];ierr = VecGetArrayRead(R0,(const PetscScalar**)&r0);CHKERRQ(ierr);ierr = VecRestoreArrayRead(R0,PETSC_NULL);CHKERRQ(ierr);
  Rn = ksp->work[1];ierr = VecGetArray(Rn_1,(PetscScalar**)&rn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Rn_1,PETSC_NULL);CHKERRQ(ierr);
  Un = ksp->work[2];ierr = VecGetArrayRead(Un_1,(const PetscScalar**)&un_1);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Un_1,PETSC_NULL);CHKERRQ(ierr);
  F0 = ksp->work[3];ierr = VecGetArrayRead(F0,(const PetscScalar**)&f0);CHKERRQ(ierr);ierr = VecRestoreArrayRead(F0,PETSC_NULL);CHKERRQ(ierr);
  Vn = ksp->work[4];ierr = VecGetArray(Vn_1,(PetscScalar**)&vn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Vn_1,PETSC_NULL);CHKERRQ(ierr);
  Zn = ksp->work[5];ierr = VecGetArray(Zn_1,(PetscScalar**)&zn_1);CHKERRQ(ierr);ierr = VecRestoreArray(Zn_1,PETSC_NULL);CHKERRQ(ierr);
  Qn = ksp->work[6];ierr = VecGetArrayRead(Qn_1,(const PetscScalar**)&qn_1);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Qn_1,PETSC_NULL);CHKERRQ(ierr);
  Tn = ksp->work[7];ierr = VecGetArrayRead(Tn,(const PetscScalar**)&tn);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Tn,PETSC_NULL);CHKERRQ(ierr);
  Sn = ksp->work[8];ierr = VecGetArrayRead(Sn,(const PetscScalar**)&sn);CHKERRQ(ierr);ierr = VecRestoreArrayRead(Sn,PETSC_NULL);CHKERRQ(ierr);

  /* r0 = rn_1 = b - A*xn_1; */
  /* ierr = KSP_PCApplyBAorAB(ksp,Xn_1,Rn_1,Tn);CHKERRQ(ierr);
     ierr = VecAYPX(Rn_1,-1.0,B);CHKERRQ(ierr); */
  ierr = KSPInitialResidual(ksp,Xn_1,Tn,Sn,Rn_1,B);CHKERRQ(ierr);

  ierr = VecNorm(Rn_1,NORM_2,&rnorm);CHKERRQ(ierr);
  ierr = KSPMonitor(ksp,0,rnorm);CHKERRQ(ierr);
  ierr = (*ksp->converged)(ksp,0,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);   
  if (ksp->reason) PetscFunctionReturn(0);

  ierr = VecCopy(Rn_1,R0);CHKERRQ(ierr);

  /* un_1 = A*rn_1; */
  ierr = KSP_PCApplyBAorAB(ksp,Rn_1,Un_1,Tn);CHKERRQ(ierr);
  
  /* f0   = A'*rn_1; */
  if (ksp->pc_side == PC_RIGHT) { /* B' A' */
    ierr = MatMultTranspose(A,R0,Tn);CHKERRQ(ierr);
    ierr = PCApplyTranspose(ksp->pc,Tn,F0);CHKERRQ(ierr);
  } else if (ksp->pc_side == PC_LEFT) { /* A' B' */
    ierr = PCApplyTranspose(ksp->pc,R0,Tn);CHKERRQ(ierr);
    ierr = MatMultTranspose(A,Tn,F0);CHKERRQ(ierr);
  }

  /*qn_1 = vn_1 = zn_1 = 0.0; */
  ierr = VecSet(Qn_1,0.0);CHKERRQ(ierr);
  ierr = VecSet(Vn_1,0.0);CHKERRQ(ierr);
  ierr = VecSet(Zn_1,0.0);CHKERRQ(ierr);

  sigman_2 = pin_1 = taun_1 = 0.0;

  /* the paper says phin_1 should be initialized to zero, it is actually R0'R0 */
  ierr = VecDot(R0,R0,&phin_1);CHKERRQ(ierr); 

  /* sigman_1 = rn_1'un_1  */
  ierr = VecDot(R0,Un_1,&sigman_1);CHKERRQ(ierr); 

  alphan_1 = omegan_1 = 1.0;

  for (ksp->its = 1; ksp->its<ksp->max_it+1; ksp->its++) {
    rhon   = phin_1 - omegan_1*sigman_2 + omegan_1*alphan_1*pin_1;
    /*    if (rhon == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"rhon is zero, iteration %D",n); */
    if (ksp->its == 1) deltan = rhon;
    else deltan = rhon/taun_1;
    betan  = deltan/omegan_1;
    taun   = sigman_1 + betan*taun_1  - deltan*pin_1;
    if (taun == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"taun is zero, iteration %D",ksp->its);
    alphan = rhon/taun;
    ierr = PetscLogFlops(15.0);

    /*  
        zn = alphan*rn_1 + (alphan/alphan_1)betan*zn_1 - alphan*deltan*vn_1
        vn = un_1 + betan*vn_1 - deltan*qn_1
        sn = rn_1 - alphan*vn

       The algorithm in the paper is missing the alphan/alphan_1 term in the zn update
    */
    ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr);
    tmp1 = (alphan/alphan_1)*betan;
    tmp2 = alphan*deltan;
    for (i=0; i<N; i++) {
      zn[i] = alphan*rn_1[i] + tmp1*zn_1[i] - tmp2*vn_1[i];
      vn[i] = un_1[i] + betan*vn_1[i] - deltan*qn_1[i];
      sn[i] = rn_1[i] - alphan*vn[i];
    }
    ierr = PetscLogFlops(3.0+11.0*N);
    ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr);

    /*
        qn = A*vn
    */
    ierr = KSP_PCApplyBAorAB(ksp,Vn,Qn,Tn);CHKERRQ(ierr);

    /*
        tn = un_1 - alphan*qn
    */
    ierr = VecWAXPY(Tn,-alphan,Qn,Un_1);CHKERRQ(ierr);
      

    /*
        phin = r0'sn
        pin  = r0'qn
        gamman = f0'sn
        etan   = f0'tn
        thetan = sn'tn
        kappan = tn'tn
    */
    ierr = PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr);
    phin = pin = gamman = etan = thetan = kappan = 0.0;
    for (i=0; i<N; i++) {
      phin += r0[i]*sn[i];
      pin  += r0[i]*qn[i];
      gamman += f0[i]*sn[i];
      etan   += f0[i]*tn[i];
      thetan += sn[i]*tn[i];
      kappan += tn[i]*tn[i];
    }
    ierr = PetscLogFlops(12.0*N);
    ierr = PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);CHKERRQ(ierr);
    insums[0] = phin;
    insums[1] = pin;
    insums[2] = gamman;
    insums[3] = etan;
    insums[4] = thetan;
    insums[5] = kappan;
    insums[6] = rnormin;
    ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr);
#if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
    if (ksp->lagnorm && ksp->its > 1) {
      ierr = MPI_Allreduce(insums,outsums,7,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr);
    } else {
      ierr = MPI_Allreduce(insums,outsums,6,MPI_LONG_DOUBLE,MPI_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr);
    }
#else
    if (ksp->lagnorm && ksp->its > 1) {
      ierr = MPI_Allreduce(insums,outsums,7,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr);
    } else {
      ierr = MPI_Allreduce(insums,outsums,6,MPIU_SCALAR,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr);
    }
#endif
    ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr);
    phin     = outsums[0];
    pin      = outsums[1];
    gamman   = outsums[2];
    etan     = outsums[3];
    thetan   = outsums[4];
    kappan   = outsums[5];
    if (ksp->lagnorm && ksp->its > 1) rnorm = PetscSqrtReal(PetscRealPart(outsums[6]));

    if (kappan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"kappan is zero, iteration %D",ksp->its);
    if (thetan == 0.0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_CONV_FAILED,"thetan is zero, iteration %D",ksp->its);
    omegan = thetan/kappan;
    sigman = gamman - omegan*etan;

    /*
        rn = sn - omegan*tn
        xn = xn_1 + zn + omegan*sn
    */
    ierr = PetscLogEventBegin(VEC_Ops,0,0,0,0);CHKERRQ(ierr);
    rnormin = 0.0;
    for (i=0; i<N; i++) {
      rn[i]    = sn[i] - omegan*tn[i];
      rnormin += PetscRealPart(PetscConj(rn[i])*rn[i]);
      xn[i]   += zn[i] + omegan*sn[i];
    }
    ierr = PetscObjectStateIncrease((PetscObject)Xn);CHKERRQ(ierr);
    ierr = PetscLogFlops(7.0*N);
    ierr = PetscLogEventEnd(VEC_Ops,0,0,0,0);CHKERRQ(ierr);

    if (!ksp->lagnorm && ksp->chknorm < ksp->its) {
      ierr = PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr);
      ierr = MPI_Allreduce(&rnormin,&rnorm,1,MPIU_REAL,MPIU_SUM,((PetscObject)ksp)->comm);CHKERRQ(ierr);
      ierr = PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,((PetscObject)ksp)->comm);CHKERRQ(ierr);
      rnorm = PetscSqrtReal(rnorm);
    } 

    /* Test for convergence */
    ierr = KSPMonitor(ksp,ksp->its,rnorm);CHKERRQ(ierr);
    ierr = (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);   
    if (ksp->reason) break;
 
    /* un = A*rn */
    ierr = KSP_PCApplyBAorAB(ksp,Rn,Un,Tn);CHKERRQ(ierr);   

    /* Update n-1 locations with n locations */
    sigman_2 = sigman_1;
    sigman_1 = sigman;
    pin_1    = pin;
    phin_1   = phin;
    alphan_1 = alphan;
    taun_1   = taun;
    omegan_1 = omegan;
  }
  if (ksp->its >= ksp->max_it) {
    ksp->reason = KSP_DIVERGED_ITS;
  }
  ierr = KSPUnwindPreconditioner(ksp,Xn,Tn);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Пример #8
0
int main(int Argc,char **Args)
{
  PetscBool      flg;
  PetscInt       n   = -6;
  PetscScalar    rho = 1.0;
  PetscReal      h;
  PetscReal      beta = 1.0;
  DM             da;
  PetscRandom    rctx;
  PetscMPIInt    comm_size;
  Mat            H,HtH;
  PetscInt       x, y, xs, ys, xm, ym;
  PetscReal      r1, r2;
  PetscScalar    uxy1, uxy2;
  MatStencil     sxy, sxy_m;
  PetscScalar    val, valconj;
  Vec            b, Htb,xvec;
  KSP            kspmg;
  PC             pcmg;
  PetscErrorCode ierr;
  PetscInt       ix[1]   = {0};
  PetscScalar    vals[1] = {1.0};

  PetscInitialize(&Argc,&Args,(char*)0,help);
  ierr = PetscOptionsGetInt(NULL,"-size",&n,&flg);CHKERRQ(ierr);
  ierr = PetscOptionsGetReal(NULL,"-beta",&beta,&flg);CHKERRQ(ierr);
  ierr = PetscOptionsGetScalar(NULL,"-rho",&rho,&flg);CHKERRQ(ierr);

  /* Set the fudge parameters, we scale the whole thing by 1/(2*h) later */
  h    = 1.;
  rho *= 1./(2.*h);

  /* Geometry info */
  ierr = DMDACreate2d(PETSC_COMM_WORLD, DMDA_BOUNDARY_PERIODIC,DMDA_BOUNDARY_PERIODIC, DMDA_STENCIL_STAR, n, n,
                      PETSC_DECIDE, PETSC_DECIDE, 2 /* this is the # of dof's */,
                      1, NULL, NULL, &da);CHKERRQ(ierr);

  /* Random numbers */
  ierr = PetscRandomCreate(PETSC_COMM_WORLD,&rctx);CHKERRQ(ierr);
  ierr = PetscRandomSetFromOptions(rctx);CHKERRQ(ierr);

  /* Single or multi processor ? */
  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&comm_size);CHKERRQ(ierr);

  /* construct matrix */
  ierr = DMSetMatType(da,MATAIJ);CHKERRQ(ierr);
  ierr = DMCreateMatrix(da, &H);CHKERRQ(ierr);

  /* get local corners for this processor */
  ierr = DMDAGetCorners(da,&xs,&ys,0,&xm,&ym,0);CHKERRQ(ierr);

  /* Assemble the matrix */
  for (x=xs; x<xs+xm; x++) {
    for (y=ys; y<ys+ym; y++) {
      /* each lattice point sets only the *forward* pointing parameters (right, down),
         i.e. Nabla_1^+ and Nabla_2^+.
         In this way we can use only local random number creation. That means
         we also have to set the corresponding backward pointing entries. */
      /* Compute some normally distributed random numbers via Box-Muller */
      ierr = PetscRandomGetValueReal(rctx, &r1);CHKERRQ(ierr);
      r1   = 1.-r1; /* to change from [0,1) to (0,1], which we need for the log */
      ierr = PetscRandomGetValueReal(rctx, &r2);CHKERRQ(ierr);
      PetscReal R = PetscSqrtReal(-2.*PetscLogReal(r1));
      PetscReal c = PetscCosReal(2.*PETSC_PI*r2);
      PetscReal s = PetscSinReal(2.*PETSC_PI*r2);

      /* use those to set the field */
      uxy1 = PetscExpScalar(((PetscScalar) (R*c/beta))*PETSC_i);
      uxy2 = PetscExpScalar(((PetscScalar) (R*s/beta))*PETSC_i);

      sxy.i = x; sxy.j = y; /* the point where we are */

      /* center action */
      sxy.c = 0; /* spin 0, 0 */
      ierr  = MatSetValuesStencil(H, 1, &sxy, 1, &sxy, &rho, ADD_VALUES);CHKERRQ(ierr);
      sxy.c = 1; /* spin 1, 1 */
      val   = -rho;
      ierr  = MatSetValuesStencil(H, 1, &sxy, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr);

      sxy_m.i = x+1; sxy_m.j = y; /* right action */
      sxy.c   = 0; sxy_m.c = 0; /* spin 0, 0 */
      val     = -uxy1; valconj = PetscConj(val);
      ierr    = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr);
      ierr    = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr);
      sxy.c   = 0; sxy_m.c = 1; /* spin 0, 1 */
      val     = -uxy1; valconj = PetscConj(val);
      ierr    = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr);
      ierr    = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr);
      sxy.c   = 1; sxy_m.c = 0; /* spin 1, 0 */
      val     = uxy1; valconj = PetscConj(val);
      ierr    = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr);
      ierr    = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr);
      sxy.c   = 1; sxy_m.c = 1; /* spin 1, 1 */
      val     = uxy1; valconj = PetscConj(val);
      ierr    = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr);
      ierr    = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr);

      sxy_m.i = x; sxy_m.j = y+1; /* down action */
      sxy.c   = 0; sxy_m.c = 0; /* spin 0, 0 */
      val     = -uxy2; valconj = PetscConj(val);
      ierr    = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr);
      ierr    = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr);
      sxy.c   = 0; sxy_m.c = 1; /* spin 0, 1 */
      val     = -PETSC_i*uxy2; valconj = PetscConj(val);
      ierr    = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr);
      ierr    = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr);
      sxy.c   = 1; sxy_m.c = 0; /* spin 1, 0 */
      val     = -PETSC_i*uxy2; valconj = PetscConj(val);
      ierr    = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr);
      ierr    = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr);
      sxy.c   = 1; sxy_m.c = 1; /* spin 1, 1 */
      val     = PetscConj(uxy2); valconj = PetscConj(val);
      ierr    = MatSetValuesStencil(H, 1, &sxy_m, 1, &sxy, &val, ADD_VALUES);CHKERRQ(ierr);
      ierr    = MatSetValuesStencil(H, 1, &sxy, 1, &sxy_m, &valconj, ADD_VALUES);CHKERRQ(ierr);
    }
  }

  ierr = MatAssemblyBegin(H, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(H, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

  /* scale H */
  ierr = MatScale(H, 1./(2.*h));CHKERRQ(ierr);

  /* it looks like H is Hermetian */
  /* construct normal equations */
  ierr = MatMatMult(H, H, MAT_INITIAL_MATRIX, 1., &HtH);CHKERRQ(ierr);

  /* permutation matrix to check whether H and HtH are identical to the ones in the paper */
/*   Mat perm; */
/*   ierr = DMCreateMatrix(da, &perm);CHKERRQ(ierr); */
/*   PetscInt row, col; */
/*   PetscScalar one = 1.0; */
/*   for (PetscInt i=0; i<n; i++) { */
/*     for (PetscInt j=0; j<n; j++) { */
/*       row = (i*n+j)*2; col = i*n+j; */
/*       ierr = MatSetValues(perm, 1, &row, 1, &col, &one, INSERT_VALUES);CHKERRQ(ierr); */
/*       row = (i*n+j)*2+1; col = i*n+j + n*n; */
/*       ierr = MatSetValues(perm, 1, &row, 1, &col, &one, INSERT_VALUES);CHKERRQ(ierr); */
/*     } */
/*   } */
/*   ierr = MatAssemblyBegin(perm, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); */
/*   ierr = MatAssemblyEnd(perm, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); */

/*   Mat Hperm; */
/*   ierr = MatPtAP(H, perm, MAT_INITIAL_MATRIX, 1.0, &Hperm);CHKERRQ(ierr); */
/*   ierr = PetscPrintf(PETSC_COMM_WORLD, "Matrix H after construction\n");CHKERRQ(ierr); */
/*   ierr = MatView(Hperm, PETSC_VIEWER_STDOUT_(PETSC_COMM_WORLD));CHKERRQ(ierr); */

/*   Mat HtHperm; */
/*   ierr = MatPtAP(HtH, perm, MAT_INITIAL_MATRIX, 1.0, &HtHperm);CHKERRQ(ierr); */
/*   ierr = PetscPrintf(PETSC_COMM_WORLD, "Matrix HtH:\n");CHKERRQ(ierr); */
/*   ierr = MatView(HtHperm, PETSC_VIEWER_STDOUT_(PETSC_COMM_WORLD));CHKERRQ(ierr); */

  /* right hand side */
  ierr = DMCreateGlobalVector(da, &b);CHKERRQ(ierr);
  ierr = VecSet(b,0.0);CHKERRQ(ierr);
  ierr = VecSetValues(b, 1, ix, vals, INSERT_VALUES);CHKERRQ(ierr);
  ierr = VecAssemblyBegin(b);CHKERRQ(ierr);
  ierr = VecAssemblyEnd(b);CHKERRQ(ierr);
/*   ierr = VecSetRandom(b, rctx);CHKERRQ(ierr); */
  ierr = VecDuplicate(b, &Htb);CHKERRQ(ierr);
  ierr = MatMultTranspose(H, b, Htb);CHKERRQ(ierr);

  /* construct solver */
  ierr = KSPCreate(PETSC_COMM_WORLD,&kspmg);CHKERRQ(ierr);
  ierr = KSPSetType(kspmg, KSPCG);CHKERRQ(ierr);

  ierr = KSPGetPC(kspmg,&pcmg);CHKERRQ(ierr);
  ierr = PCSetType(pcmg,PCASA);CHKERRQ(ierr);

  /* maybe user wants to override some of the choices */
  ierr = KSPSetFromOptions(kspmg);CHKERRQ(ierr);

  ierr = KSPSetOperators(kspmg, HtH, HtH, DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);

  ierr = DMDASetRefinementFactor(da, 3, 3, 3);CHKERRQ(ierr);
  ierr = PCSetDM(pcmg,da);CHKERRQ(ierr);

  ierr = PCASASetTolerances(pcmg, 1.e-6, 1.e-10,PETSC_DEFAULT,PETSC_DEFAULT);CHKERRQ(ierr);

  ierr = VecDuplicate(b, &xvec);CHKERRQ(ierr);
  ierr = VecSet(xvec, 0.0);CHKERRQ(ierr);

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
                      Solve the linear system
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

  ierr = KSPSolve(kspmg, Htb, xvec);CHKERRQ(ierr);

/*   ierr = VecView(xvec, PETSC_VIEWER_STDOUT_(PETSC_COMM_WORLD));CHKERRQ(ierr); */

  ierr = KSPDestroy(&kspmg);CHKERRQ(ierr);
  ierr = VecDestroy(&xvec);CHKERRQ(ierr);

  /*   seems to be destroyed by KSPDestroy */
  ierr = VecDestroy(&b);CHKERRQ(ierr);
  ierr = VecDestroy(&Htb);CHKERRQ(ierr);
  ierr = MatDestroy(&HtH);CHKERRQ(ierr);
  ierr = MatDestroy(&H);CHKERRQ(ierr);

  ierr = DMDestroy(&da);CHKERRQ(ierr);
  ierr = PetscRandomDestroy(&rctx);CHKERRQ(ierr);
  ierr = PetscFinalize();
  return 0;
}
Пример #9
0
PetscErrorCode  KSPSolve_BiCG(KSP ksp)
{
  PetscErrorCode ierr;
  PetscInt       i;
  PetscBool      diagonalscale;
  PetscScalar    dpi,a=1.0,beta,betaold=1.0,b,ma;
  PetscReal      dp;
  Vec            X,B,Zl,Zr,Rl,Rr,Pl,Pr;
  Mat            Amat,Pmat;

  PetscFunctionBegin;
  ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr);
  if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);

  X  = ksp->vec_sol;
  B  = ksp->vec_rhs;
  Rl = ksp->work[0];
  Zl = ksp->work[1];
  Pl = ksp->work[2];
  Rr = ksp->work[3];
  Zr = ksp->work[4];
  Pr = ksp->work[5];

  ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);

  if (!ksp->guess_zero) {
    ierr = KSP_MatMult(ksp,Amat,X,Rr);CHKERRQ(ierr);      /*   r <- b - Ax       */
    ierr = VecAYPX(Rr,-1.0,B);CHKERRQ(ierr);
  } else {
    ierr = VecCopy(B,Rr);CHKERRQ(ierr);           /*     r <- b (x is 0) */
  }
  ierr = VecCopy(Rr,Rl);CHKERRQ(ierr);
  ierr = KSP_PCApply(ksp,Rr,Zr);CHKERRQ(ierr);     /*     z <- Br         */
  ierr = VecConjugate(Rl);CHKERRQ(ierr);
  ierr = KSP_PCApplyTranspose(ksp,Rl,Zl);CHKERRQ(ierr);
  ierr = VecConjugate(Rl);CHKERRQ(ierr);
  ierr = VecConjugate(Zl);CHKERRQ(ierr);
  if (ksp->normtype == KSP_NORM_PRECONDITIONED) {
    ierr = VecNorm(Zr,NORM_2,&dp);CHKERRQ(ierr);  /*    dp <- z'*z       */
  } else {
    ierr = VecNorm(Rr,NORM_2,&dp);CHKERRQ(ierr);  /*    dp <- r'*r       */
  }
  ierr       = KSPMonitor(ksp,0,dp);CHKERRQ(ierr);
  ierr       = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
  ksp->its   = 0;
  ksp->rnorm = dp;
  ierr       = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
  ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr);
  ierr = (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
  if (ksp->reason) PetscFunctionReturn(0);

  i = 0;
  do {
    ierr = VecDot(Zr,Rl,&beta);CHKERRQ(ierr);       /*     beta <- r'z     */
    if (!i) {
      if (beta == 0.0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
        PetscFunctionReturn(0);
      }
      ierr = VecCopy(Zr,Pr);CHKERRQ(ierr);       /*     p <- z          */
      ierr = VecCopy(Zl,Pl);CHKERRQ(ierr);
    } else {
      b    = beta/betaold;
      ierr = VecAYPX(Pr,b,Zr);CHKERRQ(ierr);  /*     p <- z + b* p   */
      b    = PetscConj(b);
      ierr = VecAYPX(Pl,b,Zl);CHKERRQ(ierr);
    }
    betaold = beta;
    ierr    = KSP_MatMult(ksp,Amat,Pr,Zr);CHKERRQ(ierr); /*     z <- Kp         */
    ierr    = VecConjugate(Pl);CHKERRQ(ierr);
    ierr    = KSP_MatMultTranspose(ksp,Amat,Pl,Zl);CHKERRQ(ierr);
    ierr    = VecConjugate(Pl);CHKERRQ(ierr);
    ierr    = VecConjugate(Zl);CHKERRQ(ierr);
    ierr    = VecDot(Zr,Pl,&dpi);CHKERRQ(ierr);            /*     dpi <- z'p      */
    a       = beta/dpi;                           /*     a = beta/p'z    */
    ierr    = VecAXPY(X,a,Pr);CHKERRQ(ierr);    /*     x <- x + ap     */
    ma      = -a;
    ierr    = VecAXPY(Rr,ma,Zr);CHKERRQ(ierr);
    ma      = PetscConj(ma);
    ierr    = VecAXPY(Rl,ma,Zl);CHKERRQ(ierr);
    if (ksp->normtype == KSP_NORM_PRECONDITIONED) {
      ierr = KSP_PCApply(ksp,Rr,Zr);CHKERRQ(ierr);  /*     z <- Br         */
      ierr = VecConjugate(Rl);CHKERRQ(ierr);
      ierr = KSP_PCApplyTranspose(ksp,Rl,Zl);CHKERRQ(ierr);
      ierr = VecConjugate(Rl);CHKERRQ(ierr);
      ierr = VecConjugate(Zl);CHKERRQ(ierr);
      ierr = VecNorm(Zr,NORM_2,&dp);CHKERRQ(ierr);  /*    dp <- z'*z       */
    } else {
      ierr = VecNorm(Rr,NORM_2,&dp);CHKERRQ(ierr);  /*    dp <- r'*r       */
    }
    ierr       = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
    ksp->its   = i+1;
    ksp->rnorm = dp;
    ierr       = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
    ierr = KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr);
    ierr = KSPMonitor(ksp,i+1,dp);CHKERRQ(ierr);
    ierr = (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
    if (ksp->reason) break;
    if (ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
      ierr = KSP_PCApply(ksp,Rr,Zr);CHKERRQ(ierr);  /* z <- Br  */
      ierr = VecConjugate(Rl);CHKERRQ(ierr);
      ierr = KSP_PCApplyTranspose(ksp,Rl,Zl);CHKERRQ(ierr);
      ierr = VecConjugate(Rl);CHKERRQ(ierr);
      ierr = VecConjugate(Zl);CHKERRQ(ierr);
    }
    i++;
  } while (i<ksp->max_it);
  if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
  PetscFunctionReturn(0);
}
Пример #10
0
PetscErrorCode VecMDot_Seq(Vec xin,PetscInt nv,const Vec yin[],PetscScalar *z)
{
  PetscErrorCode    ierr;
  PetscInt          n = xin->map->n,i,j,nv_rem,j_rem;
  PetscScalar       sum0,sum1,sum2,sum3,x0,x1,x2,x3;
  const PetscScalar *yy0,*yy1,*yy2,*yy3,*x,*xbase;
  Vec               *yy;

  PetscFunctionBegin;
  sum0 = 0.;
  sum1 = 0.;
  sum2 = 0.;

  i      = nv;
  nv_rem = nv&0x3;
  yy     = (Vec *)yin;
  j      = n;
  ierr   = VecGetArrayRead(xin,&xbase);CHKERRQ(ierr); 
  x      = xbase;

  switch (nv_rem) {
  case 3:
    ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
    ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr);
    ierr = VecGetArrayRead(yy[2],&yy2);CHKERRQ(ierr);
    switch (j_rem=j&0x3) {
    case 3: 
      x2 = x[2]; 
      sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]); 
      sum2 += x2*PetscConj(yy2[2]); 
    case 2: 
      x1 = x[1]; 
      sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]); 
      sum2 += x1*PetscConj(yy2[1]); 
    case 1: 
      x0 = x[0]; 
      sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]); 
      sum2 += x0*PetscConj(yy2[0]); 
    case 0: 
      x   += j_rem;
      yy0 += j_rem;
      yy1 += j_rem;
      yy2 += j_rem;
      j   -= j_rem;
      break;
    }
    while (j>0) {
      x0 = x[0];
      x1 = x[1];
      x2 = x[2];
      x3 = x[3];
      x += 4;
      
      sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
      sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
      sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
      j -= 4;
    }
    z[0] = sum0;
    z[1] = sum1;
    z[2] = sum2;
    ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
    ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr);
    ierr = VecRestoreArrayRead(yy[2],&yy2);CHKERRQ(ierr);
    break;
  case 2:
    ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
    ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr);
    switch (j_rem=j&0x3) {
    case 3: 
      x2 = x[2]; 
      sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]); 
    case 2: 
      x1 = x[1]; 
      sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]); 
    case 1: 
      x0 = x[0]; 
      sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]); 
    case 0: 
      x   += j_rem;
      yy0 += j_rem;
      yy1 += j_rem;
      j   -= j_rem;
      break;
    }
    while (j>0) {
      x0 = x[0];
      x1 = x[1];
      x2 = x[2];
      x3 = x[3];
      x += 4;
      
      sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
      sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
      j -= 4;
    }
    z[0] = sum0;
    z[1] = sum1;
 
    ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
    ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr);
    break;
  case 1:
    ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
    switch (j_rem=j&0x3) {
    case 3: 
      x2 = x[2]; sum0 += x2*PetscConj(yy0[2]);
    case 2: 
      x1 = x[1]; sum0 += x1*PetscConj(yy0[1]);
    case 1: 
      x0 = x[0]; sum0 += x0*PetscConj(yy0[0]);
    case 0: 
      x   += j_rem;
      yy0 += j_rem;
      j   -= j_rem;
      break;
    }
    while (j>0) {
      sum0 += x[0]*PetscConj(yy0[0]) + x[1]*PetscConj(yy0[1])
            + x[2]*PetscConj(yy0[2]) + x[3]*PetscConj(yy0[3]); 
      yy0+=4;
      j -= 4; x+=4;
    }
    z[0] = sum0;

    ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
    break;
  case 0:
    break;
  }
  z  += nv_rem;
  i  -= nv_rem;
  yy += nv_rem;

  while (i >0) {
    sum0 = 0.;
    sum1 = 0.;
    sum2 = 0.;
    sum3 = 0.;
    ierr = VecGetArrayRead(yy[0],&yy0);CHKERRQ(ierr);
    ierr = VecGetArrayRead(yy[1],&yy1);CHKERRQ(ierr);
    ierr = VecGetArrayRead(yy[2],&yy2);CHKERRQ(ierr);
    ierr = VecGetArrayRead(yy[3],&yy3);CHKERRQ(ierr);

    j = n;
    x = xbase;
    switch (j_rem=j&0x3) {
    case 3: 
      x2 = x[2]; 
      sum0 += x2*PetscConj(yy0[2]); sum1 += x2*PetscConj(yy1[2]); 
      sum2 += x2*PetscConj(yy2[2]); sum3 += x2*PetscConj(yy3[2]);
    case 2: 
      x1 = x[1]; 
      sum0 += x1*PetscConj(yy0[1]); sum1 += x1*PetscConj(yy1[1]); 
      sum2 += x1*PetscConj(yy2[1]); sum3 += x1*PetscConj(yy3[1]);
    case 1: 
      x0 = x[0]; 
      sum0 += x0*PetscConj(yy0[0]); sum1 += x0*PetscConj(yy1[0]); 
      sum2 += x0*PetscConj(yy2[0]); sum3 += x0*PetscConj(yy3[0]);
    case 0: 
      x   += j_rem;
      yy0 += j_rem;
      yy1 += j_rem;
      yy2 += j_rem;
      yy3 += j_rem;
      j   -= j_rem;
      break;
    }
    while (j>0) {
      x0 = x[0];
      x1 = x[1];
      x2 = x[2];
      x3 = x[3];
      x += 4;
      
      sum0 += x0*PetscConj(yy0[0]) + x1*PetscConj(yy0[1]) + x2*PetscConj(yy0[2]) + x3*PetscConj(yy0[3]); yy0+=4;
      sum1 += x0*PetscConj(yy1[0]) + x1*PetscConj(yy1[1]) + x2*PetscConj(yy1[2]) + x3*PetscConj(yy1[3]); yy1+=4;
      sum2 += x0*PetscConj(yy2[0]) + x1*PetscConj(yy2[1]) + x2*PetscConj(yy2[2]) + x3*PetscConj(yy2[3]); yy2+=4;
      sum3 += x0*PetscConj(yy3[0]) + x1*PetscConj(yy3[1]) + x2*PetscConj(yy3[2]) + x3*PetscConj(yy3[3]); yy3+=4;
      j -= 4;
    }
    z[0] = sum0;
    z[1] = sum1;
    z[2] = sum2;
    z[3] = sum3;
    z   += 4;
    i   -= 4;
    ierr = VecRestoreArrayRead(yy[0],&yy0);CHKERRQ(ierr);
    ierr = VecRestoreArrayRead(yy[1],&yy1);CHKERRQ(ierr);
    ierr = VecRestoreArrayRead(yy[2],&yy2);CHKERRQ(ierr);
    ierr = VecRestoreArrayRead(yy[3],&yy3);CHKERRQ(ierr);
    yy  += 4;
  }
  ierr = VecRestoreArrayRead(xin,&xbase);CHKERRQ(ierr); 
  ierr = PetscLogFlops(PetscMax(nv*(2.0*xin->map->n-1),0.0));CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Пример #11
0
static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
{
    KSP_BCGSL      *bcgsl = (KSP_BCGSL *) ksp->data;
    PetscScalar    alpha, beta, omega, sigma;
    PetscScalar    rho0, rho1;
    PetscReal      kappa0, kappaA, kappa1;
    PetscReal      ghat, epsilon, abstol;
    PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
    PetscTruth     bUpdateX;
    PetscTruth     bBombed = PETSC_FALSE;

    PetscInt       maxit;
    PetscInt       h, i, j, k, vi, ell;
    PetscBLASInt   ldMZ,bierr;

    PetscErrorCode ierr;

    PetscFunctionBegin;
    if (ksp->normtype == KSP_NORM_NATURAL) SETERRQ(PETSC_ERR_SUP,"Cannot use natural norm with KSPBCGSL");
    if (ksp->normtype == KSP_NORM_PRECONDITIONED && ksp->pc_side != PC_LEFT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type unpreconditioned for right preconditioning and KSPBCGSL");
    if (ksp->normtype == KSP_NORM_UNPRECONDITIONED && ksp->pc_side != PC_RIGHT) SETERRQ(PETSC_ERR_SUP,"Use -ksp_norm_type preconditioned for left preconditioning and KSPBCGSL");

    /* set up temporary vectors */
    vi = 0;
    ell = bcgsl->ell;
    bcgsl->vB    = ksp->work[vi];
    vi++;
    bcgsl->vRt   = ksp->work[vi];
    vi++;
    bcgsl->vTm   = ksp->work[vi];
    vi++;
    bcgsl->vvR   = ksp->work+vi;
    vi += ell+1;
    bcgsl->vvU   = ksp->work+vi;
    vi += ell+1;
    bcgsl->vXr   = ksp->work[vi];
    vi++;
    ldMZ = PetscBLASIntCast(ell+1);

    /* Prime the iterative solver */
    ierr = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);
    CHKERRQ(ierr);
    ierr = VecNorm(VVR[0], NORM_2, &zeta0);
    CHKERRQ(ierr);
    rnmax_computed = zeta0;
    rnmax_true = zeta0;

    ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);
    CHKERRQ(ierr);
    if (ksp->reason) {
        ierr = PetscObjectTakeAccess(ksp);
        CHKERRQ(ierr);
        ksp->its   = 0;
        ksp->rnorm = zeta0;
        ierr = PetscObjectGrantAccess(ksp);
        CHKERRQ(ierr);
        PetscFunctionReturn(0);
    }

    ierr = VecSet(VVU[0],0.0);
    CHKERRQ(ierr);
    alpha = 0.;
    rho0 = omega = 1;

    if (bcgsl->delta>0.0) {
        ierr = VecCopy(VX, VXR);
        CHKERRQ(ierr);
        ierr = VecSet(VX,0.0);
        CHKERRQ(ierr);
        ierr = VecCopy(VVR[0], VB);
        CHKERRQ(ierr);
    } else {
        ierr = VecCopy(ksp->vec_rhs, VB);
        CHKERRQ(ierr);
    }

    /* Life goes on */
    ierr = VecCopy(VVR[0], VRT);
    CHKERRQ(ierr);
    zeta = zeta0;

    ierr = KSPGetTolerances(ksp, &epsilon, &abstol, PETSC_NULL, &maxit);
    CHKERRQ(ierr);

    for (k=0; k<maxit; k += bcgsl->ell) {
        ksp->its   = k;
        ksp->rnorm = zeta;

        KSPLogResidualHistory(ksp, zeta);
        KSPMonitor(ksp, ksp->its, zeta);

        ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
        CHKERRQ(ierr);
        if (ksp->reason) break;

        /* BiCG part */
        rho0 = -omega*rho0;
        nrm0 = zeta;
        for (j=0; j<bcgsl->ell; j++) {
            /* rho1 <- r_j' * r_tilde */
            ierr = VecDot(VVR[j], VRT, &rho1);
            CHKERRQ(ierr);
            if (rho1 == 0.0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
                bBombed = PETSC_TRUE;
                break;
            }
            beta = alpha*(rho1/rho0);
            rho0 = rho1;
            for (i=0; i<=j; i++) {
                /* u_i <- r_i - beta*u_i */
                ierr = VecAYPX(VVU[i], -beta, VVR[i]);
                CHKERRQ(ierr);
            }
            /* u_{j+1} <- inv(K)*A*u_j */
            ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);
            CHKERRQ(ierr);

            ierr = VecDot(VVU[j+1], VRT, &sigma);
            CHKERRQ(ierr);
            if (sigma == 0.0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
                bBombed = PETSC_TRUE;
                break;
            }
            alpha = rho1/sigma;

            /* x <- x + alpha*u_0 */
            ierr = VecAXPY(VX, alpha, VVU[0]);
            CHKERRQ(ierr);

            for (i=0; i<=j; i++) {
                /* r_i <- r_i - alpha*u_{i+1} */
                ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);
                CHKERRQ(ierr);
            }

            /* r_{j+1} <- inv(K)*A*r_j */
            ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);
            CHKERRQ(ierr);

            ierr = VecNorm(VVR[0], NORM_2, &nrm0);
            CHKERRQ(ierr);
            if (bcgsl->delta>0.0) {
                if (rnmax_computed<nrm0) rnmax_computed = nrm0;
                if (rnmax_true<nrm0) rnmax_true = nrm0;
            }

            /* NEW: check for early exit */
            ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);
            CHKERRQ(ierr);
            if (ksp->reason) {
                ierr = PetscObjectTakeAccess(ksp);
                CHKERRQ(ierr);
                ksp->its   = k+j;
                ksp->rnorm = nrm0;
                ierr = PetscObjectGrantAccess(ksp);
                CHKERRQ(ierr);
                break;
            }
        }

        if (bBombed==PETSC_TRUE) break;

        /* Polynomial part */
        for(i = 0; i <= bcgsl->ell; ++i) {
            ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);
            CHKERRQ(ierr);
        }
        /* Symmetrize MZa */
        for(i = 0; i <= bcgsl->ell; ++i) {
            for(j = i+1; j <= bcgsl->ell; ++j) {
                MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
            }
        }
        /* Copy MZa to MZb */
        ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));
        CHKERRQ(ierr);

        if (!bcgsl->bConvex || bcgsl->ell==1) {
            PetscBLASInt ione = 1,bell = PetscBLASIntCast(bcgsl->ell);

            AY0c[0] = -1;
            LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr);
            if (ierr!=0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN;
                bBombed = PETSC_TRUE;
                break;
            }
            ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));
            CHKERRQ(ierr);
            LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
        } else {
            PetscBLASInt ione = 1;
            PetscScalar aone = 1.0, azero = 0.0;
            PetscBLASInt neqs = PetscBLASIntCast(bcgsl->ell-1);

            LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr);
            if (ierr!=0) {
                ksp->reason = KSP_DIVERGED_BREAKDOWN;
                bBombed = PETSC_TRUE;
                break;
            }
            ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));
            CHKERRQ(ierr);
            LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr);
            AY0c[0] = -1;
            AY0c[bcgsl->ell] = 0.;

            ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));
            CHKERRQ(ierr);
            LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr);

            AYlc[0] = 0.;
            AYlc[bcgsl->ell] = -1;

            BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione);

            kappa0 = BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione);

            /* round-off can cause negative kappa's */
            if (kappa0<0) kappa0 = -kappa0;
            kappa0 = sqrt(kappa0);

            kappaA = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

            BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione);

            kappa1 = BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione);

            if (kappa1<0) kappa1 = -kappa1;
            kappa1 = sqrt(kappa1);

            if (kappa0!=0.0 && kappa1!=0.0) {
                if (kappaA<0.7*kappa0*kappa1) {
                    ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
                } else {
                    ghat = kappaA/(kappa1*kappa1);
                }
                for (i=0; i<=bcgsl->ell; i++) {
                    AY0c[i] = AY0c[i] - ghat* AYlc[i];
                }
            }
        }

        omega = AY0c[bcgsl->ell];
        for (h=bcgsl->ell; h>0 && omega==0.0; h--) {
            omega = AY0c[h];
        }
        if (omega==0.0) {
            ksp->reason = KSP_DIVERGED_BREAKDOWN;
            break;
        }


        ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);
        CHKERRQ(ierr);
        for (i=1; i<=bcgsl->ell; i++) {
            AY0c[i] *= -1.0;
        }
        ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);
        CHKERRQ(ierr);
        ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);
        CHKERRQ(ierr);
        for (i=1; i<=bcgsl->ell; i++) {
            AY0c[i] *= -1.0;
        }
        ierr = VecNorm(VVR[0], NORM_2, &zeta);
        CHKERRQ(ierr);

        /* Accurate Update */
        if (bcgsl->delta>0.0) {
            if (rnmax_computed<zeta) rnmax_computed = zeta;
            if (rnmax_true<zeta) rnmax_true = zeta;

            bUpdateX = (PetscTruth) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
            if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
                /* r0 <- b-inv(K)*A*X */
                ierr = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);
                CHKERRQ(ierr);
                ierr = VecAYPX(VVR[0], -1.0, VB);
                CHKERRQ(ierr);
                rnmax_true = zeta;

                if (bUpdateX) {
                    ierr = VecAXPY(VXR,1.0,VX);
                    CHKERRQ(ierr);
                    ierr = VecSet(VX,0.0);
                    CHKERRQ(ierr);
                    ierr = VecCopy(VVR[0], VB);
                    CHKERRQ(ierr);
                    rnmax_computed = zeta;
                }
            }
        }
    }
    if (bcgsl->delta>0.0) {
        ierr = VecAXPY(VX,1.0,VXR);
        CHKERRQ(ierr);
    }

    ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);
    CHKERRQ(ierr);
    if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
    PetscFunctionReturn(0);
}
Пример #12
0
PetscErrorCode  SNESMonitorVI(SNES snes,PetscInt its,PetscReal fgnorm,void *dummy)
{
  PetscErrorCode    ierr;
  PetscViewer       viewer = dummy ? (PetscViewer) dummy : PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)snes));
  const PetscScalar *x,*xl,*xu,*f;
  PetscInt          i,n,act[2] = {0,0},fact[2],N;
  /* Number of components that actually hit the bounds (c.f. active variables) */
  PetscInt  act_bound[2] = {0,0},fact_bound[2];
  PetscReal rnorm,fnorm;
  double    tmp;

  PetscFunctionBegin;
  ierr = VecGetLocalSize(snes->vec_sol,&n);CHKERRQ(ierr);
  ierr = VecGetSize(snes->vec_sol,&N);CHKERRQ(ierr);
  ierr = VecGetArrayRead(snes->xl,&xl);CHKERRQ(ierr);
  ierr = VecGetArrayRead(snes->xu,&xu);CHKERRQ(ierr);
  ierr = VecGetArrayRead(snes->vec_sol,&x);CHKERRQ(ierr);
  ierr = VecGetArrayRead(snes->vec_func,&f);CHKERRQ(ierr);

  rnorm = 0.0;
  for (i=0; i<n; i++) {
    if (((PetscRealPart(x[i]) > PetscRealPart(xl[i]) + 1.e-8 || (PetscRealPart(f[i]) < 0.0)) && ((PetscRealPart(x[i]) < PetscRealPart(xu[i]) - 1.e-8) || PetscRealPart(f[i]) > 0.0))) rnorm += PetscRealPart(PetscConj(f[i])*f[i]);
    else if (PetscRealPart(x[i]) <= PetscRealPart(xl[i]) + 1.e-8 && PetscRealPart(f[i]) >= 0.0) act[0]++;
    else if (PetscRealPart(x[i]) >= PetscRealPart(xu[i]) - 1.e-8 && PetscRealPart(f[i]) <= 0.0) act[1]++;
    else SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_PLIB,"Can never get here");
  }

  for (i=0; i<n; i++) {
    if (PetscRealPart(x[i]) <= PetscRealPart(xl[i]) + 1.e-8) act_bound[0]++;
    else if (PetscRealPart(x[i]) >= PetscRealPart(xu[i]) - 1.e-8) act_bound[1]++;
  }
  ierr  = VecRestoreArrayRead(snes->vec_func,&f);CHKERRQ(ierr);
  ierr  = VecRestoreArrayRead(snes->xl,&xl);CHKERRQ(ierr);
  ierr  = VecRestoreArrayRead(snes->xu,&xu);CHKERRQ(ierr);
  ierr  = VecRestoreArrayRead(snes->vec_sol,&x);CHKERRQ(ierr);
  ierr  = MPI_Allreduce(&rnorm,&fnorm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)snes));CHKERRQ(ierr);
  ierr  = MPI_Allreduce(act,fact,2,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)snes));CHKERRQ(ierr);
  ierr  = MPI_Allreduce(act_bound,fact_bound,2,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)snes));CHKERRQ(ierr);
  fnorm = PetscSqrtReal(fnorm);

  ierr = PetscViewerASCIIAddTab(viewer,((PetscObject)snes)->tablevel);CHKERRQ(ierr);
  if (snes->ntruebounds) tmp = ((double)(fact[0]+fact[1]))/((double)snes->ntruebounds);
  else tmp = 0.0;
  ierr = PetscViewerASCIIPrintf(viewer,"%3D SNES VI Function norm %14.12e Active lower constraints %D/%D upper constraints %D/%D Percent of total %g Percent of bounded %g\n",its,(double)fnorm,fact[0],fact_bound[0],fact[1],fact_bound[1],((double)(fact[0]+fact[1]))/((double)N),tmp);CHKERRQ(ierr);

  ierr = PetscViewerASCIISubtractTab(viewer,((PetscObject)snes)->tablevel);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Пример #13
0
PetscErrorCode SNESVIComputeInactiveSetFnorm(SNES snes,Vec F,Vec X, PetscReal *fnorm)
{
  PetscErrorCode    ierr;
  const PetscScalar *x,*xl,*xu,*f;
  PetscInt          i,n;
  PetscReal         rnorm;

  PetscFunctionBegin;
  ierr  = VecGetLocalSize(X,&n);CHKERRQ(ierr);
  ierr  = VecGetArrayRead(snes->xl,&xl);CHKERRQ(ierr);
  ierr  = VecGetArrayRead(snes->xu,&xu);CHKERRQ(ierr);
  ierr  = VecGetArrayRead(X,&x);CHKERRQ(ierr);
  ierr  = VecGetArrayRead(F,&f);CHKERRQ(ierr);
  rnorm = 0.0;
  for (i=0; i<n; i++) {
    if (((PetscRealPart(x[i]) > PetscRealPart(xl[i]) + 1.e-8 || (PetscRealPart(f[i]) < 0.0)) && ((PetscRealPart(x[i]) < PetscRealPart(xu[i]) - 1.e-8) || PetscRealPart(f[i]) > 0.0))) rnorm += PetscRealPart(PetscConj(f[i])*f[i]);
  }
  ierr   = VecRestoreArrayRead(F,&f);CHKERRQ(ierr);
  ierr   = VecRestoreArrayRead(snes->xl,&xl);CHKERRQ(ierr);
  ierr   = VecRestoreArrayRead(snes->xu,&xu);CHKERRQ(ierr);
  ierr   = VecRestoreArrayRead(X,&x);CHKERRQ(ierr);
  ierr   = MPI_Allreduce(&rnorm,fnorm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)snes));CHKERRQ(ierr);
  *fnorm = PetscSqrtReal(*fnorm);
  PetscFunctionReturn(0);
}
Пример #14
0
PetscErrorCode cHamiltonianMatrix::measurement(){
	double	 *ALLdepart = new double[Nt];
	double	 *ALLentropy = new double[Nt];
	gsl_matrix*     density1 = gsl_matrix_alloc(L,Nt);//background fermion density
	gsl_matrix*     density2 = gsl_matrix_alloc(L,Nt);//background fermion density
	gsl_vector*     corr12 = gsl_vector_alloc(Nt);//correlation betwen fermion up and down.                                         
        // The density correlation is in fact proportional to the interacting energy.                                                     
	double var_rank;
	PetscScalar var_tmp, var_tmp2;
	gsl_complex var_tmp_gsl;
	Vec vectort;
	VecScatter ctx;
	ofstream output;
	VecScatterCreateToZero(WFt[0],&ctx,&vectort);
	if(rank==0) cout << size << endl;
	gsl_matrix_complex*	RDM = gsl_matrix_complex_alloc(dim2,dim2);
	gsl_vector *eval_RDM = gsl_vector_alloc(dim2);
	gsl_eigen_herm_workspace* w_RDM = gsl_eigen_herm_alloc(dim2);
	for (int itime = 0; itime < Nt; ++itime) {
	  if (rank==0&&itime%10==0) cout << "this is time " << itime << endl;
	  // % ## departure ##
	  var_rank = 0.0;
	  for (int ivar = rstart; ivar < rend; ++ivar) {
	    ierr = VecGetValues(WFt[itime],1,&ivar,&var_tmp);CHKERRQ(ierr);
	    var_rank += pow(gsl_vector_get(rr,ivar)*PetscAbsComplex(var_tmp),2);
	  }
	  MPI_Reduce(&var_rank, &(ALLdepart[itime]), 1, MPI_DOUBLE, MPI_SUM, 0, PETSC_COMM_WORLD);
	  ALLdepart[itime] = sqrt(ALLdepart[itime]);
	  // % ## entropy ##
	  VecScatterBegin(ctx,WFt[itime],vectort,INSERT_VALUES,SCATTER_FORWARD);
	  VecScatterEnd(ctx,WFt[itime],vectort,INSERT_VALUES,SCATTER_FORWARD);
	  if(rank==0) {
	    int ivar;double eigen_RDM;
	    gsl_matrix_complex_set_zero(RDM);
	    for (int row2 = 0; row2 < dim2; ++row2) {
	      for (int col2 = row2; col2 < dim2; ++col2) {
		var_tmp_gsl.dat[0] = 0.0; var_tmp_gsl.dat[1] = 0.0;
		for (int jjj = 0; jjj < dim; ++jjj) {
		  ivar = row2*dim+jjj;
		  ierr = VecGetValues(vectort,1,&ivar,&var_tmp);CHKERRQ(ierr);
		  ivar = col2*dim+jjj;
		  ierr = VecGetValues(vectort,1,&ivar,&var_tmp2);CHKERRQ(ierr);
		  var_tmp_gsl.dat[0] += PetscRealPart(var_tmp*PetscConj(var_tmp2));
		  var_tmp_gsl.dat[1] += PetscImaginaryPart(var_tmp*PetscConj(var_tmp2));
		}
		gsl_matrix_complex_set(RDM,row2,col2,var_tmp_gsl);
		if (col2 != row2) {
		  gsl_matrix_complex_set(RDM,col2,row2,gsl_matrix_complex_get(RDM,row2,col2));
		}
	      }
	    }
	    gsl_eigen_herm(RDM,eval_RDM,w_RDM);
	    ALLentropy[itime] = 0;
	    for (ivar = 0; ivar < dim2; ++ivar) {
	      eigen_RDM = gsl_vector_get(eval_RDM, ivar);
	      //  cout << eigen_RDM << endl;
	      ALLentropy[itime] += -eigen_RDM*log(eigen_RDM);
	    }
	  }

	  // % ## density distribution of impurity fermion
	  if(rank==0) {
            int ivar;
            for (int row2 = 0; row2 < dim2; ++row2) {
	      for (int jpar = 0; jpar < N2; ++jpar) {
		double density_tmp=0;
		for (int jjj = 0; jjj < dim; ++jjj) {
		  ivar = row2*dim+jjj;
		  ierr = VecGetValues(vectort,1,&ivar,&var_tmp);CHKERRQ(ierr);
		  density_tmp +=pow(PetscAbsComplex(var_tmp),2);
		}
		/*if (itime==0) {
		  if (rank==0) cout << "density_tmp=" << density_tmp << endl;
		  }*/
		gsl_matrix_set(density2,gsl_matrix_get(basis2,jpar,row2)-1,itime,gsl_matrix_get(density2,gsl_matrix_get(basis2,jpar,row2)-1,itime)+density_tmp);
	      }
	    }
	  }

	  /*if (rank==0) {
	    cout << "density of impurity:" << endl;
	    for (int jpar =0; jpar < L; ++jpar) {
	      cout << gsl_matrix_get(density2,jpar,itime) << "\t";
	    }
	    cout << endl;
	    }*/
	            
	  // % ## density distribution of majority fermions
	  if(rank==0) {
            int ivar;
	    for (int jjj = 0; jjj < dim; ++jjj) {
	      for (int jpar = 0; jpar < N; ++jpar) {
                double density_tmp=0;
		for (int row2 = 0; row2 < dim2; ++row2) {
                  ivar = row2*dim+jjj;
                  ierr = VecGetValues(vectort,1,&ivar,&var_tmp);CHKERRQ(ierr);
		  density_tmp +=pow(PetscAbsComplex(var_tmp),2);
		}
		gsl_matrix_set(density1,gsl_matrix_get(basis1,jpar,jjj)-1,itime,gsl_matrix_get(density1,gsl_matrix_get(basis1,jpar,jjj)-1,itime)+density_tmp);
              }
            }
          }

	  // correlation between impurity and majority fermion                                                                            
          if(rank==0) {
            int ivar;
            double corr_tmp=0;
            for (int jimp=0; jimp<dim2; ++jimp) {
              for (int jmaj=0; jmaj<dim; ++jmaj) {
                for (int jpar=0; jpar<N; ++jpar)  {
                  if (gsl_matrix_get(basis1,jpar,jmaj)==jimp+1){
                    ivar = jimp*dim+jmaj;
                    ierr = VecGetValues(vectort,1,&ivar,&var_tmp);CHKERRQ(ierr);
                    corr_tmp+=pow(PetscAbsComplex(var_tmp),2);
                  }
                }
              }
            }
            gsl_vector_set(corr12,itime,corr_tmp);
          }// end of correlation

	}

	  
	if (rank == 0) {
          char filename[50];
	  sprintf(filename,"measurement.data");
          output.open(filename);
          output.is_open();
          output.precision(16);
          for (int itime = 0; itime < Nt; ++itime) {
            if (itime==0) {
	      //              cout << "time t[1] " << '\t' << "departure[2] " << '\t' << "entropy[3]" << '\t' << "density of majority [L]" <<'\t' << "density of impurity [L]" << endl;
            }
            output << dt*itime-3 << '\t' << ALLdepart[itime] << '\t' << ALLentropy[itime] << '\t';
	    for (int jpar = 0; jpar < L; ++jpar) {
              output << gsl_matrix_get(density1,jpar,itime) << '\t';
            }
	    for (int jpar = 0; jpar < L; ++jpar) {
              output << gsl_matrix_get(density2,jpar,itime) << '\t';
            }
	    output << gsl_vector_get(corr12,itime) << '\t';
	    output << endl;
          }
          output.close();
	}
 

	//	CopyFile(source,destination,FALSE);
	
	delete[] ALLdepart;
	VecScatterDestroy(&ctx);
	VecDestroy(&vectort);
	gsl_matrix_complex_free(RDM);
	gsl_vector_free(eval_RDM);
	gsl_eigen_herm_free(w_RDM);
	gsl_matrix_free(density1);
	gsl_matrix_free(density2);
	gsl_vector_free(corr12);
	//	CopyFile(source,destination,FALSE);
	return ierr;
}
Пример #15
0
/*
.  it - column of the Hessenberg that is complete, PGMRES is actually computing two columns ahead of this
 */
static PetscErrorCode KSPPGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool *hapend,PetscReal *res)
{
  PetscScalar    *hh,*cc,*ss,*rs;
  PetscInt       j;
  PetscReal      hapbnd;
  KSP_PGMRES     *pgmres = (KSP_PGMRES*)(ksp->data);
  PetscErrorCode ierr;

  PetscFunctionBegin;
  hh = HH(0,it);   /* pointer to beginning of column to update */
  cc = CC(0);      /* beginning of cosine rotations */
  ss = SS(0);      /* beginning of sine rotations */
  rs = RS(0);      /* right hand side of least squares system */

  /* The Hessenberg matrix is now correct through column it, save that form for possible spectral analysis */
  for (j=0; j<=it+1; j++) *HES(j,it) = hh[j];

  /* check for the happy breakdown */
  hapbnd = PetscMin(PetscAbsScalar(hh[it+1] / rs[it]),pgmres->haptol);
  if (PetscAbsScalar(hh[it+1]) < hapbnd) {
    ierr    = PetscInfo4(ksp,"Detected happy breakdown, current hapbnd = %14.12e H(%D,%D) = %14.12e\n",(double)hapbnd,it+1,it,(double)PetscAbsScalar(*HH(it+1,it)));CHKERRQ(ierr);
    *hapend = PETSC_TRUE;
  }

  /* Apply all the previously computed plane rotations to the new column
     of the Hessenberg matrix */
  /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
     and some refs have [c   s ; -conj(s)  c] (don't be confused!) */

  for (j=0; j<it; j++) {
    PetscScalar hhj = hh[j];
    hh[j]   = PetscConj(cc[j])*hhj + ss[j]*hh[j+1];
    hh[j+1] =          -ss[j] *hhj + cc[j]*hh[j+1];
  }

  /*
    compute the new plane rotation, and apply it to:
     1) the right-hand-side of the Hessenberg system (RS)
        note: it affects RS(it) and RS(it+1)
     2) the new column of the Hessenberg matrix
        note: it affects HH(it,it) which is currently pointed to
        by hh and HH(it+1, it) (*(hh+1))
    thus obtaining the updated value of the residual...
  */

  /* compute new plane rotation */

  if (!*hapend) {
    PetscReal delta = PetscSqrtReal(PetscSqr(PetscAbsScalar(hh[it])) + PetscSqr(PetscAbsScalar(hh[it+1])));
    if (delta == 0.0) {
      ksp->reason = KSP_DIVERGED_NULL;
      PetscFunctionReturn(0);
    }

    cc[it] = hh[it] / delta;    /* new cosine value */
    ss[it] = hh[it+1] / delta;  /* new sine value */

    hh[it]   = PetscConj(cc[it])*hh[it] + ss[it]*hh[it+1];
    rs[it+1] = -ss[it]*rs[it];
    rs[it]   = PetscConj(cc[it])*rs[it];
    *res     = PetscAbsScalar(rs[it+1]);
  } else { /* happy breakdown: HH(it+1, it) = 0, therefore we don't need to apply
            another rotation matrix (so RH doesn't change).  The new residual is
            always the new sine term times the residual from last time (RS(it)),
            but now the new sine rotation would be zero...so the residual should
            be zero...so we will multiply "zero" by the last residual.  This might
            not be exactly what we want to do here -could just return "zero". */

    *res = 0.0;
  }
  PetscFunctionReturn(0);
}
Пример #16
0
static PetscErrorCode KSPFGMRESUpdateHessenberg(KSP ksp,PetscInt it,PetscBool hapend,PetscReal *res)
{
  PetscScalar *hh,*cc,*ss,tt;
  PetscInt    j;
  KSP_FGMRES  *fgmres = (KSP_FGMRES*)(ksp->data);

  PetscFunctionBegin;
  hh = HH(0,it);   /* pointer to beginning of column to update - so
                      incrementing hh "steps down" the (it+1)th col of HH*/
  cc = CC(0);      /* beginning of cosine rotations */
  ss = SS(0);      /* beginning of sine rotations */

  /* Apply all the previously computed plane rotations to the new column
     of the Hessenberg matrix */
  /* Note: this uses the rotation [conj(c)  s ; -s   c], c= cos(theta), s= sin(theta),
     and some refs have [c   s ; -conj(s)  c] (don't be confused!) */

  for (j=1; j<=it; j++) {
    tt  = *hh;
    *hh = PetscConj(*cc) * tt + *ss * *(hh+1);
    hh++;
    *hh = *cc++ * *hh - (*ss++ * tt);
    /* hh, cc, and ss have all been incremented one by end of loop */
  }

  /*
    compute the new plane rotation, and apply it to:
     1) the right-hand-side of the Hessenberg system (RS)
        note: it affects RS(it) and RS(it+1)
     2) the new column of the Hessenberg matrix
        note: it affects HH(it,it) which is currently pointed to
        by hh and HH(it+1, it) (*(hh+1))
    thus obtaining the updated value of the residual...
  */

  /* compute new plane rotation */

  if (!hapend) {
    tt = PetscSqrtScalar(PetscConj(*hh) * *hh + PetscConj(*(hh+1)) * *(hh+1));
    if (tt == 0.0) {
      ksp->reason = KSP_DIVERGED_NULL;
      PetscFunctionReturn(0);
    }

    *cc = *hh / tt;         /* new cosine value */
    *ss = *(hh+1) / tt;        /* new sine value */

    /* apply to 1) and 2) */
    *RS(it+1) = -(*ss * *RS(it));
    *RS(it)   = PetscConj(*cc) * *RS(it);
    *hh       = PetscConj(*cc) * *hh + *ss * *(hh+1);

    /* residual is the last element (it+1) of right-hand side! */
    *res = PetscAbsScalar(*RS(it+1));

  } else { /* happy breakdown: HH(it+1, it) = 0, therfore we don't need to apply
            another rotation matrix (so RH doesn't change).  The new residual is
            always the new sine term times the residual from last time (RS(it)),
            but now the new sine rotation would be zero...so the residual should
            be zero...so we will multiply "zero" by the last residual.  This might
            not be exactly what we want to do here -could just return "zero". */

    *res = 0.0;
  }
  PetscFunctionReturn(0);
}
Пример #17
0
PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
{
#if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable");
#else
  PetscErrorCode ierr;
  PetscInt       i,j;
  PetscBLASInt   *ipiv,info,n,ld,one=1,ncol;
  PetscScalar    *A,*B,*Q,*g=gin,*ghat;
  PetscScalar    done=1.0,dmone=-1.0,dzero=0.0;
  PetscReal      gnorm;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  A  = ds->mat[DS_MAT_A];

  if (!recover) {

    ierr = DSAllocateWork_Private(ds,0,0,ld);CHKERRQ(ierr);
    ipiv = ds->iwork;
    if (!g) {
      ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr);
      g = ds->work;
    }
    /* use workspace matrix W to factor A-tau*eye(n) */
    ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr);
    B = ds->mat[DS_MAT_W];
    ierr = PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);CHKERRQ(ierr);

    /* Vector g initialy stores b = beta*e_n^T */
    ierr = PetscMemzero(g,n*sizeof(PetscScalar));CHKERRQ(ierr);
    g[n-1] = beta;

    /* g = (A-tau*eye(n))'\b */
    for (i=0;i<n;i++)
      B[i+i*ld] -= tau;
    PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
    if (info<0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to LU factorization");
    if (info>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization");
    ierr = PetscLogFlops(2.0*n*n*n/3.0);CHKERRQ(ierr);
    PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
    if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
    ierr = PetscLogFlops(2.0*n*n-n);CHKERRQ(ierr);

    /* A = A + g*b' */
    for (i=0;i<n;i++)
      A[i+(n-1)*ld] += g[i]*beta;

  } else { /* recover */

    PetscValidPointer(g,6);
    ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr);
    ghat = ds->work;
    Q    = ds->mat[DS_MAT_Q];

    /* g^ = -Q(:,idx)'*g */
    ierr = PetscBLASIntCast(ds->l+ds->k,&ncol);CHKERRQ(ierr);
    PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));

    /* A = A + g^*b' */
    for (i=0;i<ds->l+ds->k;i++)
      for (j=ds->l;j<ds->l+ds->k;j++)
        A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;

    /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
    PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
  }

  /* Compute gamma factor */
  if (gamma) {
    gnorm = 0.0;
    for (i=0;i<n;i++)
      gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
    *gamma = PetscSqrtReal(1.0+gnorm);
  }
  PetscFunctionReturn(0);
#endif
}