Exemple #1
0
/*@C
  PetscLinearRegression - Gives the best least-squares linear fit to some x-y data points

  Input Parameters:
+ n - The number of points
. x - The x-values
- y - The y-values

  Output Parameters:
+ slope     - The slope of the best-fit line
- intercept - The y-intercept of the best-fit line

  Level: intermediate

.seealso: PetscConvEstGetConvRate()
@*/
PetscErrorCode PetscLinearRegression(PetscInt n, const PetscReal x[], const PetscReal y[], PetscReal *slope, PetscReal *intercept)
{
  PetscScalar    H[4];
  PetscReal     *X, *Y, beta[2];
  PetscInt       i, j, k;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  *slope = *intercept = 0.0;
  ierr = PetscMalloc2(n*2, &X, n*2, &Y);CHKERRQ(ierr);
  for (k = 0; k < n; ++k) {
    /* X[n,2] = [1, x] */
    X[k*2+0] = 1.0;
    X[k*2+1] = x[k];
  }
  /* H = X^T X */
  for (i = 0; i < 2; ++i) {
    for (j = 0; j < 2; ++j) {
      H[i*2+j] = 0.0;
      for (k = 0; k < n; ++k) {
        H[i*2+j] += X[k*2+i] * X[k*2+j];
      }
    }
  }
  /* H = (X^T X)^{-1} */
  {
    PetscBLASInt two = 2, ipiv[2], info;
    PetscScalar  work[2];

    ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
    PetscStackCallBLAS("LAPACKgetrf", LAPACKgetrf_(&two, &two, H, &two, ipiv, &info));
    PetscStackCallBLAS("LAPACKgetri", LAPACKgetri_(&two, H, &two, ipiv, work, &two, &info));
    ierr = PetscFPTrapPop();CHKERRQ(ierr);
  }
    /* Y = H X^T */
  for (i = 0; i < 2; ++i) {
    for (k = 0; k < n; ++k) {
      Y[i*n+k] = 0.0;
      for (j = 0; j < 2; ++j) {
        Y[i*n+k] += PetscRealPart(H[i*2+j]) * X[k*2+j];
      }
    }
  }
  /* beta = Y error = [y-intercept, slope] */
  for (i = 0; i < 2; ++i) {
    beta[i] = 0.0;
    for (k = 0; k < n; ++k) {
      beta[i] += Y[i*n+k] * y[k];
    }
  }
  ierr = PetscFree2(X, Y);CHKERRQ(ierr);
  *intercept = beta[0];
  *slope     = beta[1];
  PetscFunctionReturn(0);
}
Exemple #2
0
PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
{
#if defined(SLEPC_MISSING_LAPACK_GESVD)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
#else
  PetscErrorCode ierr;
  PetscInt       i,j;
  PetscBLASInt   info,ld,n,n1,lwork,inc=1;
  PetscScalar    sdummy,done=1.0,zero=0.0;
  PetscReal      *sigma;
  PetscBool      iscomplex = PETSC_FALSE;
  PetscScalar    *A = ds->mat[DS_MAT_A];
  PetscScalar    *Q = ds->mat[DS_MAT_Q];
  PetscScalar    *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
  PetscScalar    *W;

  PetscFunctionBegin;
  if (left) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for left vectors");
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  n1 = n+1;
  if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
  if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
  ierr = DSAllocateWork_Private(ds,5*ld,6*ld,0);CHKERRQ(ierr);
  ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr);
  W = ds->mat[DS_MAT_W];
  lwork = 5*ld;
  sigma = ds->rwork+5*ld;

  /* build A-w*I in W */
  for (j=0;j<n;j++)
    for (i=0;i<=n;i++)
      W[i+j*ld] = A[i+j*ld];
  for (i=0;i<n;i++)
    W[i+i*ld] -= A[(*k)+(*k)*ld];

  /* compute SVD of W */
#if !defined(PETSC_USE_COMPLEX)
  PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
#else
  PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
#endif
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGESVD %d",info);

  /* the smallest singular value is the new error estimate */
  if (rnorm) *rnorm = sigma[n-1];

  /* update vector with right singular vector associated to smallest singular value,
     accumulating the transformation matrix Q */
  PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
  PetscFunctionReturn(0);
#endif
}
Exemple #3
0
PetscErrorCode DSNormalize_NHEP(DS ds,DSMatType mat,PetscInt col)
{
  PetscErrorCode ierr;
  PetscInt       i,i0,i1;
  PetscBLASInt   ld,n,one = 1;
  PetscScalar    *A = ds->mat[DS_MAT_A],norm,*x;
#if !defined(PETSC_USE_COMPLEX)
  PetscScalar    norm0;
#endif

  PetscFunctionBegin;
  switch (mat) {
    case DS_MAT_X:
    case DS_MAT_Y:
    case DS_MAT_Q:
      /* Supported matrices */
      break;
    case DS_MAT_U:
    case DS_MAT_VT:
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
      break;
    default:
      SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
  }

  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  ierr = DSGetArray(ds,mat,&x);CHKERRQ(ierr);
  if (col < 0) {
    i0 = 0; i1 = ds->n;
  } else if (col>0 && A[ds->ld*(col-1)+col] != 0.0) {
    i0 = col-1; i1 = col+1;
  } else {
    i0 = col; i1 = col+1;
  }
  for (i=i0;i<i1;i++) {
#if !defined(PETSC_USE_COMPLEX)
    if (i<n-1 && A[ds->ld*i+i+1] != 0.0) {
      norm = BLASnrm2_(&n,&x[ld*i],&one);
      norm0 = BLASnrm2_(&n,&x[ld*(i+1)],&one);
      norm = 1.0/SlepcAbsEigenvalue(norm,norm0);
      PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*i],&one));
      PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*(i+1)],&one));
      i++;
    } else
#endif
    {
      norm = BLASnrm2_(&n,&x[ld*i],&one);
      norm = 1.0/norm;
      PetscStackCallBLAS("BLASscal",BLASscal_(&n,&norm,&x[ld*i],&one));
    }
  }
  PetscFunctionReturn(0);
}
Exemple #4
0
/* Overwrites A. Can only handle full-rank problems with m>=n
 * A in column-major format
 * Ainv in row-major format
 * tau has length m
 * worksize must be >= max(1,n)
 */
static PetscErrorCode PetscDTPseudoInverseQR(PetscInt m,PetscInt mstride,PetscInt n,PetscReal *A_in,PetscReal *Ainv_out,PetscScalar *tau,PetscInt worksize,PetscScalar *work)
{
  PetscErrorCode ierr;
  PetscBLASInt   M,N,K,lda,ldb,ldwork,info;
  PetscScalar    *A,*Ainv,*R,*Q,Alpha;

  PetscFunctionBegin;
#if defined(PETSC_USE_COMPLEX)
  {
    PetscInt i,j;
    ierr = PetscMalloc2(m*n,&A,m*n,&Ainv);CHKERRQ(ierr);
    for (j=0; j<n; j++) {
      for (i=0; i<m; i++) A[i+m*j] = A_in[i+mstride*j];
    }
    mstride = m;
  }
#else
  A = A_in;
  Ainv = Ainv_out;
#endif

  ierr = PetscBLASIntCast(m,&M);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(n,&N);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(mstride,&lda);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(worksize,&ldwork);CHKERRQ(ierr);
  ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
  PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&M,&N,A,&lda,tau,work,&ldwork,&info));
  ierr = PetscFPTrapPop();CHKERRQ(ierr);
  if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xGEQRF error");
  R = A; /* Upper triangular part of A now contains R, the rest contains the elementary reflectors */

  /* Extract an explicit representation of Q */
  Q = Ainv;
  ierr = PetscMemcpy(Q,A,mstride*n*sizeof(PetscScalar));CHKERRQ(ierr);
  K = N;                        /* full rank */
  PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&M,&N,&K,Q,&lda,tau,work,&ldwork,&info));
  if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"xORGQR/xUNGQR error");

  /* Compute A^{-T} = (R^{-1} Q^T)^T = Q R^{-T} */
  Alpha = 1.0;
  ldb = lda;
  PetscStackCallBLAS("BLAStrsm",BLAStrsm_("Right","Upper","ConjugateTranspose","NotUnitTriangular",&M,&N,&Alpha,R,&lda,Q,&ldb));
  /* Ainv is Q, overwritten with inverse */

#if defined(PETSC_USE_COMPLEX)
  {
    PetscInt i;
    for (i=0; i<m*n; i++) Ainv_out[i] = PetscRealPart(Ainv[i]);
    ierr = PetscFree2(A,Ainv);CHKERRQ(ierr);
  }
#endif
  PetscFunctionReturn(0);
}
Exemple #5
0
/*@
   PetscDTGaussQuadrature - create Gauss quadrature

   Not Collective

   Input Arguments:
+  npoints - number of points
.  a - left end of interval (often-1)
-  b - right end of interval (often +1)

   Output Arguments:
+  x - quadrature points
-  w - quadrature weights

   Level: intermediate

   References:
   Golub and Welsch, Calculation of Quadrature Rules, Math. Comp. 23(106), 221--230, 1969.

.seealso: PetscDTLegendreEval()
@*/
PetscErrorCode PetscDTGaussQuadrature(PetscInt npoints,PetscReal a,PetscReal b,PetscReal *x,PetscReal *w)
{
  PetscErrorCode ierr;
  PetscInt       i;
  PetscReal      *work;
  PetscScalar    *Z;
  PetscBLASInt   N,LDZ,info;

  PetscFunctionBegin;
  /* Set up the Golub-Welsch system */
  for (i=0; i<npoints; i++) {
    x[i] = 0;                   /* diagonal is 0 */
    if (i) w[i-1] = 0.5 / PetscSqrtReal(1 - 1./PetscSqr(2*i));
  }
  ierr = PetscRealView(npoints-1,w,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
  ierr = PetscMalloc2(npoints*npoints,PetscScalar,&Z,PetscMax(1,2*npoints-2),PetscReal,&work);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(npoints,&N);CHKERRQ(ierr);
  LDZ  = N;
  ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
  PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("I",&N,x,w,Z,&LDZ,work,&info));
  ierr = PetscFPTrapPop();CHKERRQ(ierr);
  if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"xSTEQR error");

  for (i=0; i<(npoints+1)/2; i++) {
    PetscReal y = 0.5 * (-x[i] + x[npoints-i-1]); /* enforces symmetry */
    x[i]           = (a+b)/2 - y*(b-a)/2;
    x[npoints-i-1] = (a+b)/2 + y*(b-a)/2;

    w[i] = w[npoints-1-i] = (b-a)*PetscSqr(0.5*PetscAbsScalar(Z[i*npoints] + Z[(npoints-i-1)*npoints]));
  }
  ierr = PetscFree2(Z,work);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #6
0
PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
{
#if defined(SLEPC_MISSING_LAPACK_TRSEN)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable");
#else
  PetscErrorCode ierr;
  PetscInt       i;
  PetscBLASInt   info,n,ld,mout,lwork,*selection;
  PetscScalar    *T = ds->mat[DS_MAT_A],*Q = ds->mat[DS_MAT_Q],*work;
#if !defined(PETSC_USE_COMPLEX)
  PetscBLASInt   *iwork,liwork;
#endif

  PetscFunctionBegin;
  if (!k) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must supply argument k");
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
#if !defined(PETSC_USE_COMPLEX)
  lwork = n;
  liwork = 1;
  ierr = DSAllocateWork_Private(ds,lwork,0,liwork+n);CHKERRQ(ierr);
  work = ds->work;
  lwork = ds->lwork;
  selection = ds->iwork;
  iwork = ds->iwork + n;
  liwork = ds->liwork - n;
#else
  lwork = 1;
  ierr = DSAllocateWork_Private(ds,lwork,0,n);CHKERRQ(ierr);
  work = ds->work;
  selection = ds->iwork;
#endif
  /* Compute the selected eigenvalue to be in the leading position */
  ierr = DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);CHKERRQ(ierr);
  ierr = PetscMemzero(selection,n*sizeof(PetscBLASInt));CHKERRQ(ierr);
  for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
#if !defined(PETSC_USE_COMPLEX)
  PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,NULL,NULL,work,&lwork,iwork,&liwork,&info));
#else
  PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,NULL,NULL,work,&lwork,&info));
#endif
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTRSEN %d",info);
  *k = mout;
  PetscFunctionReturn(0);
#endif
}
Exemple #7
0
PetscErrorCode SNESMonitorJacUpdateSpectrum(SNES snes,PetscInt it,PetscReal fnorm,void *ctx)
{
#if defined(PETSC_MISSING_LAPACK_GEEV)
  SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
#elif defined(PETSC_HAVE_ESSL)
  SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"GEEV - No support for ESSL Lapack Routines");
#else
  Vec            X;
  Mat            J,dJ,dJdense;
  PetscErrorCode ierr;
  PetscErrorCode (*func)(SNES,Vec,Mat,Mat,void*);
  PetscInt       n,i;
  PetscBLASInt   nb,lwork;
  PetscReal      *eigr,*eigi;
  PetscScalar    *work;
  PetscScalar    *a;

  PetscFunctionBegin;
  if (it == 0) PetscFunctionReturn(0);
  /* create the difference between the current update and the current jacobian */
  ierr = SNESGetSolution(snes,&X);CHKERRQ(ierr);
  ierr = SNESGetJacobian(snes,NULL,&J,&func,NULL);CHKERRQ(ierr);
  ierr = MatDuplicate(J,MAT_COPY_VALUES,&dJ);CHKERRQ(ierr);
  ierr = SNESComputeJacobian(snes,X,dJ,dJ);CHKERRQ(ierr);
  ierr = MatAXPY(dJ,-1.0,J,SAME_NONZERO_PATTERN);CHKERRQ(ierr);

  /* compute the spectrum directly */
  ierr  = MatConvert(dJ,MATSEQDENSE,MAT_INITIAL_MATRIX,&dJdense);CHKERRQ(ierr);
  ierr  = MatGetSize(dJ,&n,NULL);CHKERRQ(ierr);
  ierr  = PetscBLASIntCast(n,&nb);CHKERRQ(ierr);
  lwork = 3*nb;
  ierr  = PetscMalloc1(n,&eigr);CHKERRQ(ierr);
  ierr  = PetscMalloc1(n,&eigi);CHKERRQ(ierr);
  ierr  = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
  ierr  = MatDenseGetArray(dJdense,&a);CHKERRQ(ierr);
#if !defined(PETSC_USE_COMPLEX)
  {
    PetscBLASInt lierr;
    ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
    PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&nb,a,&nb,eigr,eigi,NULL,&nb,NULL,&nb,work,&lwork,&lierr));
    if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"geev() error %d",lierr);
    ierr = PetscFPTrapPop();CHKERRQ(ierr);
  }
#else
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for complex");
#endif
  PetscPrintf(PetscObjectComm((PetscObject)snes),"Eigenvalues of J_%d - J_%d:\n",it,it-1);CHKERRQ(ierr);
  for (i=0;i<n;i++) {
    PetscPrintf(PetscObjectComm((PetscObject)snes),"%5d: %20.5g + %20.5gi\n",i,(double)eigr[i],(double)eigi[i]);CHKERRQ(ierr);
  }
  ierr = MatDenseRestoreArray(dJdense,&a);CHKERRQ(ierr);
  ierr = MatDestroy(&dJ);CHKERRQ(ierr);
  ierr = MatDestroy(&dJdense);CHKERRQ(ierr);
  ierr = PetscFree(eigr);CHKERRQ(ierr);
  ierr = PetscFree(eigi);CHKERRQ(ierr);
  ierr = PetscFree(work);CHKERRQ(ierr);
  PetscFunctionReturn(0);
#endif
}
Exemple #8
0
/*
   DenseTridiagonal - Solves a real tridiagonal Hermitian Eigenvalue Problem.

   Input Parameters:
+  n   - dimension of the eigenproblem
.  D   - pointer to the array containing the diagonal elements
-  E   - pointer to the array containing the off-diagonal elements

   Output Parameters:
+  w  - pointer to the array to store the computed eigenvalues
-  V  - pointer to the array to store the eigenvectors

   Notes:
   If V is NULL then the eigenvectors are not computed.

   This routine use LAPACK routines xSTEVR.
*/
static PetscErrorCode DenseTridiagonal(PetscInt n_,PetscReal *D,PetscReal *E,PetscReal *w,PetscScalar *V)
{
#if defined(SLEPC_MISSING_LAPACK_STEVR)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEVR - Lapack routine is unavailable");
#else
  PetscErrorCode ierr;
  PetscReal      abstol = 0.0,vl,vu,*work;
  PetscBLASInt   il,iu,m,*isuppz,n,lwork,*iwork,liwork,info;
  const char     *jobz;
#if defined(PETSC_USE_COMPLEX)
  PetscInt       i,j;
  PetscReal      *VV;
#endif

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(n_,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(20*n_,&lwork);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(10*n_,&liwork);CHKERRQ(ierr);
  if (V) {
    jobz = "V";
#if defined(PETSC_USE_COMPLEX)
    ierr = PetscMalloc1(n*n,&VV);CHKERRQ(ierr);
#endif
  } else jobz = "N";
  ierr = PetscMalloc3(2*n,&isuppz,lwork,&work,liwork,&iwork);CHKERRQ(ierr);
  ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
#if defined(PETSC_USE_COMPLEX)
  PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_(jobz,"A",&n,D,E,&vl,&vu,&il,&iu,&abstol,&m,w,VV,&n,isuppz,work,&lwork,iwork,&liwork,&info));
#else
  PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_(jobz,"A",&n,D,E,&vl,&vu,&il,&iu,&abstol,&m,w,V,&n,isuppz,work,&lwork,iwork,&liwork,&info));
#endif
  ierr = PetscFPTrapPop();CHKERRQ(ierr);
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack DSTEVR %d",info);
#if defined(PETSC_USE_COMPLEX)
  if (V) {
    for (i=0;i<n;i++)
      for (j=0;j<n;j++)
        V[i*n+j] = VV[i*n+j];
    ierr = PetscFree(VV);CHKERRQ(ierr);
  }
#endif
  ierr = PetscFree3(isuppz,work,iwork);CHKERRQ(ierr);
  PetscFunctionReturn(0);
#endif
}
Exemple #9
0
/**************************************xxt.c***********************************/
static PetscErrorCode do_xxt_solve(xxt_ADT xxt_handle,  PetscScalar *uc)
{
  PetscInt       off, len, *iptr;
  PetscInt       level        = xxt_handle->level;
  PetscInt       n            = xxt_handle->info->n;
  PetscInt       m            = xxt_handle->info->m;
  PetscInt       *stages      = xxt_handle->info->stages;
  PetscInt       *col_indices = xxt_handle->info->col_indices;
  PetscScalar    *x_ptr, *uu_ptr;
  PetscScalar    *solve_uu = xxt_handle->info->solve_uu;
  PetscScalar    *solve_w  = xxt_handle->info->solve_w;
  PetscScalar    *x        = xxt_handle->info->x;
  PetscBLASInt   i1        = 1,dlen;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  uu_ptr=solve_uu;
  PCTFS_rvec_zero(uu_ptr,m);

  /* x  = X.Y^T.b */
  /* uu = Y^T.b */
  for (x_ptr=x,iptr=col_indices; *iptr!=-1; x_ptr+=len) {
    off       =*iptr++;
    len       =*iptr++;
    ierr      = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr);
    PetscStackCallBLAS("BLASdot",*uu_ptr++ = BLASdot_(&dlen,uc+off,&i1,x_ptr,&i1));
  }

  /* comunication of beta */
  uu_ptr=solve_uu;
  if (level) PCTFS_ssgl_radd(uu_ptr, solve_w, level, stages);

  PCTFS_rvec_zero(uc,n);

  /* x = X.uu */
  for (x_ptr=x,iptr=col_indices; *iptr!=-1; x_ptr+=len) {
    off  =*iptr++;
    len  =*iptr++;
    ierr = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr);
    PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dlen,uu_ptr++,x_ptr,&i1,uc+off,&i1));
  }
  PetscFunctionReturn(0);
}
Exemple #10
0
PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
{
#if defined(PETSC_MISSING_LAPACK_GETRF) || defined(SLEPC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE) || defined(SLEPC_MISSING_LAPACK_LANHS)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE/LANHS - Lapack routines are unavailable");
#else
  PetscErrorCode ierr;
  PetscScalar    *work;
  PetscReal      *rwork;
  PetscBLASInt   *ipiv;
  PetscBLASInt   lwork,info,n,ld;
  PetscReal      hn,hin;
  PetscScalar    *A;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  lwork = 8*ld;
  ierr = DSAllocateWork_Private(ds,lwork,ld,ld);CHKERRQ(ierr);
  work  = ds->work;
  rwork = ds->rwork;
  ipiv  = ds->iwork;

  /* use workspace matrix W to avoid overwriting A */
  ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr);
  A = ds->mat[DS_MAT_W];
  ierr = PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);CHKERRQ(ierr);

  /* norm of A */
  if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
  else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);

  /* norm of inv(A) */
  PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRF %d",info);
  PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRI %d",info);
  hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);

  *cond = hn*hin;
  PetscFunctionReturn(0);
#endif
}
Exemple #11
0
PetscErrorCode VecNorm_Seq(Vec xin,NormType type,PetscReal *z)
{
  const PetscScalar *xx;
  PetscErrorCode    ierr;
  PetscInt          n = xin->map->n;
  PetscBLASInt      one = 1, bn;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
  if (type == NORM_2 || type == NORM_FROBENIUS) {
    ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr);
#if defined(PETSC_USE_REAL___FP16)
    *z   = BLASnrm2_(&bn,xx,&one);
#else
    *z   = PetscRealPart(BLASdot_(&bn,xx,&one,xx,&one));
    *z   = PetscSqrtReal(*z);
#endif
    ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr);
    ierr = PetscLogFlops(PetscMax(2.0*n-1,0.0));CHKERRQ(ierr);
  } else if (type == NORM_INFINITY) {
    PetscInt  i;
    PetscReal max = 0.0,tmp;

    ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr);
    for (i=0; i<n; i++) {
      if ((tmp = PetscAbsScalar(*xx)) > max) max = tmp;
      /* check special case of tmp == NaN */
      if (tmp != tmp) {max = tmp; break;}
      xx++;
    }
    ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr);
    *z   = max;
  } else if (type == NORM_1) {
#if defined(PETSC_USE_COMPLEX)
    PetscReal tmp = 0.0;
    PetscInt    i;
#endif
    ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr);
#if defined(PETSC_USE_COMPLEX)
    /* BLASasum() returns the nonstandard 1 norm of the 1 norm of the complex entries so we provide a custom loop instead */
    for (i=0; i<n; i++) {
      tmp += PetscAbsScalar(xx[i]);
    }
    *z = tmp;
#else
    PetscStackCallBLAS("BLASasum",*z   = BLASasum_(&bn,xx,&one));
#endif
    ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr);
    ierr = PetscLogFlops(PetscMax(n-1.0,0.0));CHKERRQ(ierr);
  } else if (type == NORM_1_AND_2) {
    ierr = VecNorm_Seq(xin,NORM_1,z);CHKERRQ(ierr);
    ierr = VecNorm_Seq(xin,NORM_2,z+1);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Exemple #12
0
PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
{
#if defined(SLEPC_MISSING_LAPACK_TREVC)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
#else
  PetscErrorCode ierr;
  PetscBLASInt   n,ld,mout,info;
  PetscScalar    *X,*Y,*A = ds->mat[DS_MAT_A];
  const char     *side,*back;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  if (left) {
    X = NULL;
    Y = ds->mat[DS_MAT_Y];
    side = "L";
  } else {
    X = ds->mat[DS_MAT_X];
    Y = NULL;
    side = "R";
  }
  if (ds->state>=DS_STATE_CONDENSED) {
    /* DSSolve() has been called, backtransform with matrix Q */
    back = "B";
    ierr = PetscMemcpy(left?Y:X,ds->mat[DS_MAT_Q],ld*ld*sizeof(PetscScalar));CHKERRQ(ierr);
  } else back = "A";
#if !defined(PETSC_USE_COMPLEX)
  ierr = DSAllocateWork_Private(ds,3*ld,0,0);CHKERRQ(ierr);
  PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
#else
  ierr = DSAllocateWork_Private(ds,2*ld,ld,0);CHKERRQ(ierr);
  PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
#endif
  if (info) SETERRQ1(PetscObjectComm((PetscObject)ds),PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);
  PetscFunctionReturn(0);
#endif
}
PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
{
  Mat_MPIDense   *A     = (Mat_MPIDense*)inA->data;
  Mat_SeqDense   *a     = (Mat_SeqDense*)A->A->data;
  PetscScalar    oalpha = alpha;
  PetscErrorCode ierr;
  PetscBLASInt   one = 1,nz;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(inA->rmap->n*inA->cmap->N,&nz);CHKERRQ(ierr);
  PetscStackCallBLAS("BLASscal",BLASscal_(&nz,&oalpha,a->v,&one));
  ierr = PetscLogFlops(nz);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #14
0
PetscErrorCode VecSwap_Seq(Vec xin,Vec yin)
{
  PetscScalar    *ya, *xa;
  PetscErrorCode ierr;
  PetscBLASInt   one = 1,bn;

  PetscFunctionBegin;
  if (xin != yin) {
    ierr = PetscBLASIntCast(xin->map->n,&bn);CHKERRQ(ierr);
    ierr = VecGetArray(xin,&xa);CHKERRQ(ierr);
    ierr = VecGetArray(yin,&ya);CHKERRQ(ierr);
    PetscStackCallBLAS("BLASswap",BLASswap_(&bn,xa,&one,ya,&one));
    ierr = VecRestoreArray(xin,&xa);CHKERRQ(ierr);
    ierr = VecRestoreArray(yin,&ya);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Exemple #15
0
/*
     Matrix free operation of 1d Laplacian and Grad for GLL spectral elements
*/
PetscErrorCode MatMult_Laplacian(Mat A,Vec x,Vec y)
{
  AppCtx            *appctx;
  PetscErrorCode    ierr;
  PetscReal         **temp,vv;
  PetscInt          i,j,xs,xn;
  Vec               xlocal,ylocal;
  const PetscScalar *xl;
  PetscScalar       *yl;
  PetscBLASInt      _One = 1,n;
  PetscScalar       _DOne = 1;  

  ierr = MatShellGetContext(A,&appctx);CHKERRQ(ierr);
  ierr = DMGetLocalVector(appctx->da,&xlocal);CHKERRQ(ierr);
  ierr = DMGlobalToLocalBegin(appctx->da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
  ierr = DMGlobalToLocalEnd(appctx->da,x,INSERT_VALUES,xlocal);CHKERRQ(ierr);
  ierr = DMGetLocalVector(appctx->da,&ylocal);CHKERRQ(ierr);
  ierr = VecSet(ylocal,0.0);CHKERRQ(ierr);
  ierr = PetscGLLElementLaplacianCreate(&appctx->SEMop.gll,&temp);CHKERRQ(ierr);
  for (i=0; i<appctx->param.N; i++) {
    vv =-appctx->param.mu*2.0/appctx->param.Le;
    for (j=0; j<appctx->param.N; j++) temp[i][j]=temp[i][j]*vv;
  }
  ierr = DMDAVecGetArrayRead(appctx->da,xlocal,(void*)&xl);CHKERRQ(ierr);
  ierr = DMDAVecGetArray(appctx->da,ylocal,&yl);CHKERRQ(ierr);
  ierr = DMDAGetCorners(appctx->da,&xs,NULL,NULL,&xn,NULL,NULL);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(appctx->param.N,&n);CHKERRQ(ierr);
  for (j=xs; j<xs+xn; j += appctx->param.N-1) {
    PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&_DOne,&temp[0][0],&n,&xl[j],&_One,&_DOne,&yl[j],&_One));
  }
  ierr = DMDAVecRestoreArrayRead(appctx->da,xlocal,(void*)&xl);CHKERRQ(ierr);
  ierr = DMDAVecRestoreArray(appctx->da,ylocal,&yl);CHKERRQ(ierr);
  ierr = PetscGLLElementLaplacianDestroy(&appctx->SEMop.gll,&temp);CHKERRQ(ierr);
  ierr = VecSet(y,0.0);CHKERRQ(ierr);
  ierr = DMLocalToGlobalBegin(appctx->da,ylocal,ADD_VALUES,y);CHKERRQ(ierr);
  ierr = DMLocalToGlobalEnd(appctx->da,ylocal,ADD_VALUES,y);CHKERRQ(ierr);
  ierr = DMRestoreLocalVector(appctx->da,&xlocal);CHKERRQ(ierr);
  ierr = DMRestoreLocalVector(appctx->da,&ylocal);CHKERRQ(ierr);
  ierr = VecPointwiseDivide(y,y,appctx->SEMop.mass);CHKERRQ(ierr);
  return 0;
}
Exemple #16
0
PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
{
  PetscErrorCode ierr;
  PetscInt       i;
  PetscBLASInt   n,ld,incx=1;
  PetscScalar    *A,*Q,*x,*y,one=1.0,zero=0.0;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  A  = ds->mat[DS_MAT_A];
  Q  = ds->mat[DS_MAT_Q];
  ierr = DSAllocateWork_Private(ds,2*ld,0,0);CHKERRQ(ierr);
  x = ds->work;
  y = ds->work+ld;
  for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
  PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
  for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
  ds->k = n;
  PetscFunctionReturn(0);
}
Exemple #17
0
static PetscErrorCode PCBDDCMatTransposeMatSolve_SeqDense(Mat A,Mat B,Mat X)
{
  Mat_SeqDense      *mat = (Mat_SeqDense*)A->data;
  PetscErrorCode    ierr;
  const PetscScalar *b;
  PetscScalar       *x;
  PetscInt          n;
  PetscBLASInt      nrhs,info,m;
  PetscBool         flg;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(A->rmap->n,&m);CHKERRQ(ierr);
  ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
  if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
  ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
  if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");

  ierr = MatGetSize(B,NULL,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(n,&nrhs);CHKERRQ(ierr);
  ierr = MatDenseGetArrayRead(B,&b);CHKERRQ(ierr);
  ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr);
  ierr = PetscMemcpy(x,b,m*nrhs*sizeof(PetscScalar));CHKERRQ(ierr);
  ierr = MatDenseRestoreArrayRead(B,&b);CHKERRQ(ierr);

  if (A->factortype == MAT_FACTOR_LU) {
#if defined(PETSC_MISSING_LAPACK_GETRS)
    SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable.");
#else
    PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("T",&m,&nrhs,mat->v,&mat->lda,mat->pivots,x,&m,&info));
    if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
#endif
  } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only LU factor supported");

  ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr);
  ierr = PetscLogFlops(nrhs*(2.0*m*m - m));CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #18
0
PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
{
#if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable");
#else
  PetscErrorCode ierr;
  PetscInt       i,j;
  PetscBLASInt   *ipiv,info,n,ld,one=1,ncol;
  PetscScalar    *A,*B,*Q,*g=gin,*ghat;
  PetscScalar    done=1.0,dmone=-1.0,dzero=0.0;
  PetscReal      gnorm;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  A  = ds->mat[DS_MAT_A];

  if (!recover) {

    ierr = DSAllocateWork_Private(ds,0,0,ld);CHKERRQ(ierr);
    ipiv = ds->iwork;
    if (!g) {
      ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr);
      g = ds->work;
    }
    /* use workspace matrix W to factor A-tau*eye(n) */
    ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr);
    B = ds->mat[DS_MAT_W];
    ierr = PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);CHKERRQ(ierr);

    /* Vector g initialy stores b = beta*e_n^T */
    ierr = PetscMemzero(g,n*sizeof(PetscScalar));CHKERRQ(ierr);
    g[n-1] = beta;

    /* g = (A-tau*eye(n))'\b */
    for (i=0;i<n;i++)
      B[i+i*ld] -= tau;
    PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
    if (info<0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Bad argument to LU factorization");
    if (info>0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Bad LU factorization");
    ierr = PetscLogFlops(2.0*n*n*n/3.0);CHKERRQ(ierr);
    PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
    if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"GETRS - Bad solve");
    ierr = PetscLogFlops(2.0*n*n-n);CHKERRQ(ierr);

    /* A = A + g*b' */
    for (i=0;i<n;i++)
      A[i+(n-1)*ld] += g[i]*beta;

  } else { /* recover */

    PetscValidPointer(g,6);
    ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr);
    ghat = ds->work;
    Q    = ds->mat[DS_MAT_Q];

    /* g^ = -Q(:,idx)'*g */
    ierr = PetscBLASIntCast(ds->l+ds->k,&ncol);CHKERRQ(ierr);
    PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));

    /* A = A + g^*b' */
    for (i=0;i<ds->l+ds->k;i++)
      for (j=ds->l;j<ds->l+ds->k;j++)
        A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;

    /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
    PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
  }

  /* Compute gamma factor */
  if (gamma) {
    gnorm = 0.0;
    for (i=0;i<n;i++)
      gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
    *gamma = PetscSqrtReal(1.0+gnorm);
  }
  PetscFunctionReturn(0);
#endif
}
Exemple #19
0
/*@
   KSPComputeEigenvaluesExplicitly - Computes all of the eigenvalues of the
   preconditioned operator using LAPACK.

   Collective on KSP

   Input Parameter:
+  ksp - iterative context obtained from KSPCreate()
-  n - size of arrays r and c

   Output Parameters:
+  r - real part of computed eigenvalues, provided by user with a dimension at least of n
-  c - complex part of computed eigenvalues, provided by user with a dimension at least of n

   Notes:
   This approach is very slow but will generally provide accurate eigenvalue
   estimates.  This routine explicitly forms a dense matrix representing
   the preconditioned operator, and thus will run only for relatively small
   problems, say n < 500.

   Many users may just want to use the monitoring routine
   KSPMonitorSingularValue() (which can be set with option -ksp_monitor_singular_value)
   to print the singular values at each iteration of the linear solve.

   The preconditoner operator, rhs vector, solution vectors should be
   set before this routine is called. i.e use KSPSetOperators(),KSPSolve() or
   KSPSetOperators()

   Level: advanced

.keywords: KSP, compute, eigenvalues, explicitly

.seealso: KSPComputeEigenvalues(), KSPMonitorSingularValue(), KSPComputeExtremeSingularValues(), KSPSetOperators(), KSPSolve()
@*/
PetscErrorCode  KSPComputeEigenvaluesExplicitly(KSP ksp,PetscInt nmax,PetscReal r[],PetscReal c[])
{
  Mat               BA;
  PetscErrorCode    ierr;
  PetscMPIInt       size,rank;
  MPI_Comm          comm;
  PetscScalar       *array;
  Mat               A;
  PetscInt          m,row,nz,i,n,dummy;
  const PetscInt    *cols;
  const PetscScalar *vals;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject)ksp,&comm);CHKERRQ(ierr);
  ierr = KSPComputeExplicitOperator(ksp,&BA);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);

  ierr = MatGetSize(BA,&n,&n);CHKERRQ(ierr);
  if (size > 1) { /* assemble matrix on first processor */
    ierr = MatCreate(PetscObjectComm((PetscObject)ksp),&A);CHKERRQ(ierr);
    if (!rank) {
      ierr = MatSetSizes(A,n,n,n,n);CHKERRQ(ierr);
    } else {
      ierr = MatSetSizes(A,0,0,n,n);CHKERRQ(ierr);
    }
    ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr);
    ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr);
    ierr = PetscLogObjectParent((PetscObject)BA,(PetscObject)A);CHKERRQ(ierr);

    ierr = MatGetOwnershipRange(BA,&row,&dummy);CHKERRQ(ierr);
    ierr = MatGetLocalSize(BA,&m,&dummy);CHKERRQ(ierr);
    for (i=0; i<m; i++) {
      ierr = MatGetRow(BA,row,&nz,&cols,&vals);CHKERRQ(ierr);
      ierr = MatSetValues(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
      ierr = MatRestoreRow(BA,row,&nz,&cols,&vals);CHKERRQ(ierr);
      row++;
    }

    ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatDenseGetArray(A,&array);CHKERRQ(ierr);
  } else {
    ierr = MatDenseGetArray(BA,&array);CHKERRQ(ierr);
  }

#if defined(PETSC_HAVE_ESSL)
  /* ESSL has a different calling sequence for dgeev() and zgeev() than standard LAPACK */
  if (!rank) {
    PetscScalar  sdummy,*cwork;
    PetscReal    *work,*realpart;
    PetscBLASInt clen,idummy,lwork,bn,zero = 0;
    PetscInt     *perm;

#if !defined(PETSC_USE_COMPLEX)
    clen = n;
#else
    clen = 2*n;
#endif
    ierr   = PetscMalloc1(clen,&cwork);CHKERRQ(ierr);
    idummy = -1;                /* unused */
    ierr   = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
    lwork  = 5*n;
    ierr   = PetscMalloc1(lwork,&work);CHKERRQ(ierr);
    ierr   = PetscMalloc1(n,&realpart);CHKERRQ(ierr);
    ierr   = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
    PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_(&zero,array,&bn,cwork,&sdummy,&idummy,&idummy,&bn,work,&lwork));
    ierr = PetscFPTrapPop();CHKERRQ(ierr);
    ierr = PetscFree(work);CHKERRQ(ierr);

    /* For now we stick with the convention of storing the real and imaginary
       components of evalues separately.  But is this what we really want? */
    ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr);

#if !defined(PETSC_USE_COMPLEX)
    for (i=0; i<n; i++) {
      realpart[i] = cwork[2*i];
      perm[i]     = i;
    }
    ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
    for (i=0; i<n; i++) {
      r[i] = cwork[2*perm[i]];
      c[i] = cwork[2*perm[i]+1];
    }
#else
    for (i=0; i<n; i++) {
      realpart[i] = PetscRealPart(cwork[i]);
      perm[i]     = i;
    }
    ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
    for (i=0; i<n; i++) {
      r[i] = PetscRealPart(cwork[perm[i]]);
      c[i] = PetscImaginaryPart(cwork[perm[i]]);
    }
#endif
    ierr = PetscFree(perm);CHKERRQ(ierr);
    ierr = PetscFree(realpart);CHKERRQ(ierr);
    ierr = PetscFree(cwork);CHKERRQ(ierr);
  }
#elif !defined(PETSC_USE_COMPLEX)
  if (!rank) {
    PetscScalar  *work;
    PetscReal    *realpart,*imagpart;
    PetscBLASInt idummy,lwork;
    PetscInt     *perm;

    idummy   = n;
    lwork    = 5*n;
    ierr     = PetscMalloc2(n,&realpart,n,&imagpart);CHKERRQ(ierr);
    ierr     = PetscMalloc1(5*n,&work);CHKERRQ(ierr);
#if defined(PETSC_MISSING_LAPACK_GEEV)
    SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
#else
    {
      PetscBLASInt lierr;
      PetscScalar  sdummy;
      PetscBLASInt bn;

      ierr = PetscBLASIntCast(n,&bn);CHKERRQ(ierr);
      ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
      PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&bn,array,&bn,realpart,imagpart,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,&lierr));
      if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr);
      ierr = PetscFPTrapPop();CHKERRQ(ierr);
    }
#endif
    ierr = PetscFree(work);CHKERRQ(ierr);
    ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr);

    for (i=0; i<n; i++)  perm[i] = i;
    ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
    for (i=0; i<n; i++) {
      r[i] = realpart[perm[i]];
      c[i] = imagpart[perm[i]];
    }
    ierr = PetscFree(perm);CHKERRQ(ierr);
    ierr = PetscFree2(realpart,imagpart);CHKERRQ(ierr);
  }
#else
  if (!rank) {
    PetscScalar  *work,*eigs;
    PetscReal    *rwork;
    PetscBLASInt idummy,lwork;
    PetscInt     *perm;

    idummy = n;
    lwork  = 5*n;
    ierr   = PetscMalloc1(5*n,&work);CHKERRQ(ierr);
    ierr   = PetscMalloc1(2*n,&rwork);CHKERRQ(ierr);
    ierr   = PetscMalloc1(n,&eigs);CHKERRQ(ierr);
#if defined(PETSC_MISSING_LAPACK_GEEV)
    SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
#else
    {
      PetscBLASInt lierr;
      PetscScalar  sdummy;
      PetscBLASInt nb;
      ierr = PetscBLASIntCast(n,&nb);CHKERRQ(ierr);
      ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
      PetscStackCallBLAS("LAPACKgeev",LAPACKgeev_("N","N",&nb,array,&nb,eigs,&sdummy,&idummy,&sdummy,&idummy,work,&lwork,rwork,&lierr));
      if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in LAPACK routine %d",(int)lierr);
      ierr = PetscFPTrapPop();CHKERRQ(ierr);
    }
#endif
    ierr = PetscFree(work);CHKERRQ(ierr);
    ierr = PetscFree(rwork);CHKERRQ(ierr);
    ierr = PetscMalloc1(n,&perm);CHKERRQ(ierr);
    for (i=0; i<n; i++) perm[i] = i;
    for (i=0; i<n; i++) r[i]    = PetscRealPart(eigs[i]);
    ierr = PetscSortRealWithPermutation(n,r,perm);CHKERRQ(ierr);
    for (i=0; i<n; i++) {
      r[i] = PetscRealPart(eigs[perm[i]]);
      c[i] = PetscImaginaryPart(eigs[perm[i]]);
    }
    ierr = PetscFree(perm);CHKERRQ(ierr);
    ierr = PetscFree(eigs);CHKERRQ(ierr);
  }
#endif
  if (size > 1) {
    ierr = MatDenseRestoreArray(A,&array);CHKERRQ(ierr);
    ierr = MatDestroy(&A);CHKERRQ(ierr);
  } else {
    ierr = MatDenseRestoreArray(BA,&array);CHKERRQ(ierr);
  }
  ierr = MatDestroy(&BA);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #20
0
static PetscErrorCode KSPAGMRESBuildSoln(KSP ksp,PetscInt it)
{
  KSP_AGMRES     *agmres = (KSP_AGMRES*)ksp->data;
  PetscErrorCode ierr;
  PetscInt       max_k = agmres->max_k;       /* Size of the non-augmented Krylov basis */
  PetscInt       i, j;
  PetscInt       r = agmres->r;           /* current number of augmented eigenvectors */
  PetscBLASInt   KspSize;
  PetscBLASInt   lC;
  PetscBLASInt   N;
  PetscBLASInt   ldH = N + 1;
  PetscBLASInt   lwork;
  PetscBLASInt   info, nrhs = 1;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(KSPSIZE,&KspSize);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(4 * (KspSize+1),&lwork);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(KspSize+1,&lC);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(MAXKSPSIZE + 1,&N);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(N + 1,&ldH);CHKERRQ(ierr);
  /* Save a copy of the Hessenberg matrix */
  for (j = 0; j < N-1; j++) {
    for (i = 0; i < N; i++) {
      *HS(i,j) = *H(i,j);
    }
  }
  /* QR factorize the Hessenberg matrix */
#if defined(PETSC_MISSING_LAPACK_GEQRF)
  SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable.");
#else
  PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&lC, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->work, &lwork, &info));
  if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XGEQRF INFO=%d", info);
#endif
  /* Update the right hand side of the least square problem */
  ierr = PetscMemzero(agmres->nrs, N*sizeof(PetscScalar));CHKERRQ(ierr);

  agmres->nrs[0] = ksp->rnorm;
#if defined(PETSC_MISSING_LAPACK_ORMQR)
  SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable.");
#else
  PetscStackCallBLAS("LAPACKormqr",LAPACKormqr_("L", "T", &lC, &nrhs, &KspSize, agmres->hh_origin, &ldH, agmres->tau, agmres->nrs, &N, agmres->work, &lwork, &info));
  if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XORMQR INFO=%d",info);
#endif
  ksp->rnorm = PetscAbsScalar(agmres->nrs[KspSize]);
  /* solve the least-square problem */
#if defined(PETSC_MISSING_LAPACK_TRTRS)
  SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"TRTRS - Lapack routine is unavailable.");
#else
  PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U", "N", "N", &KspSize, &nrhs, agmres->hh_origin, &ldH, agmres->nrs, &N, &info));
  if (info) SETERRQ1(PetscObjectComm((PetscObject)ksp), PETSC_ERR_LIB,"Error in LAPACK routine XTRTRS INFO=%d",info);
#endif
  /* Accumulate the correction to the solution of the preconditioned problem in VEC_TMP */
  ierr = VecZeroEntries(VEC_TMP);CHKERRQ(ierr);
  ierr = VecMAXPY(VEC_TMP, max_k, agmres->nrs, &VEC_V(0));CHKERRQ(ierr);
  if (!agmres->DeflPrecond) { ierr = VecMAXPY(VEC_TMP, r, &agmres->nrs[max_k], agmres->U);CHKERRQ(ierr); }

  if ((ksp->pc_side == PC_RIGHT) && agmres->r && agmres->DeflPrecond) {
    ierr = KSPDGMRESApplyDeflation_DGMRES(ksp, VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr);
    ierr = VecCopy(VEC_TMP_MATOP, VEC_TMP);CHKERRQ(ierr);
  }
  ierr = KSPUnwindPreconditioner(ksp, VEC_TMP, VEC_TMP_MATOP);CHKERRQ(ierr);
  /* add the solution to the previous one */
  ierr = VecAXPY(ksp->vec_sol, 1.0, VEC_TMP);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #21
0
static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
{
  KSP_BCGSL      *bcgsl = (KSP_BCGSL*) ksp->data;
  PetscScalar    alpha, beta, omega, sigma;
  PetscScalar    rho0, rho1;
  PetscReal      kappa0, kappaA, kappa1;
  PetscReal      ghat;
  PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
  PetscBool      bUpdateX;
  PetscInt       maxit;
  PetscInt       h, i, j, k, vi, ell;
  PetscBLASInt   ldMZ,bierr;
  PetscScalar    utb;
  PetscReal      max_s, pinv_tol;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  /* set up temporary vectors */
  vi         = 0;
  ell        = bcgsl->ell;
  bcgsl->vB  = ksp->work[vi]; vi++;
  bcgsl->vRt = ksp->work[vi]; vi++;
  bcgsl->vTm = ksp->work[vi]; vi++;
  bcgsl->vvR = ksp->work+vi; vi += ell+1;
  bcgsl->vvU = ksp->work+vi; vi += ell+1;
  bcgsl->vXr = ksp->work[vi]; vi++;
  ierr       = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr);

  /* Prime the iterative solver */
  ierr           = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr);
  ierr           = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr);
  rnmax_computed = zeta0;
  rnmax_true     = zeta0;

  ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
  if (ksp->reason) {
    ierr       = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
    ksp->its   = 0;
    ksp->rnorm = zeta0;
    ierr       = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ierr  = VecSet(VVU[0],0.0);CHKERRQ(ierr);
  alpha = 0.;
  rho0  = omega = 1;

  if (bcgsl->delta>0.0) {
    ierr = VecCopy(VX, VXR);CHKERRQ(ierr);
    ierr = VecSet(VX,0.0);CHKERRQ(ierr);
    ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr);
  } else {
    ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr);
  }

  /* Life goes on */
  ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr);
  zeta = zeta0;

  ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr);

  for (k=0; k<maxit; k += bcgsl->ell) {
    ksp->its   = k;
    ksp->rnorm = zeta;

    ierr = KSPLogResidualHistory(ksp, zeta);CHKERRQ(ierr);
    ierr = KSPMonitor(ksp, ksp->its, zeta);CHKERRQ(ierr);

    ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
    if (ksp->reason < 0) PetscFunctionReturn(0);
    else if (ksp->reason) break;

    /* BiCG part */
    rho0 = -omega*rho0;
    nrm0 = zeta;
    for (j=0; j<bcgsl->ell; j++) {
      /* rho1 <- r_j' * r_tilde */
      ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr);
      if (rho1 == 0.0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
        PetscFunctionReturn(0);
      }
      beta = alpha*(rho1/rho0);
      rho0 = rho1;
      for (i=0; i<=j; i++) {
        /* u_i <- r_i - beta*u_i */
        ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr);
      }
      /* u_{j+1} <- inv(K)*A*u_j */
      ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr);

      ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr);
      if (sigma == 0.0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
        PetscFunctionReturn(0);
      }
      alpha = rho1/sigma;

      /* x <- x + alpha*u_0 */
      ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr);

      for (i=0; i<=j; i++) {
        /* r_i <- r_i - alpha*u_{i+1} */
        ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr);
      }

      /* r_{j+1} <- inv(K)*A*r_j */
      ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr);

      ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr);
      if (bcgsl->delta>0.0) {
        if (rnmax_computed<nrm0) rnmax_computed = nrm0;
        if (rnmax_true<nrm0) rnmax_true = nrm0;
      }

      /* NEW: check for early exit */
      ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
      if (ksp->reason) {
        ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr);

        ksp->its   = k+j;
        ksp->rnorm = nrm0;

        ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
        if (ksp->reason < 0) PetscFunctionReturn(0);
      }
    }

    /* Polynomial part */
    for (i = 0; i <= bcgsl->ell; ++i) {
      ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr);
    }
    /* Symmetrize MZa */
    for (i = 0; i <= bcgsl->ell; ++i) {
      for (j = i+1; j <= bcgsl->ell; ++j) {
        MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
      }
    }
    /* Copy MZa to MZb */
    ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));CHKERRQ(ierr);

    if (!bcgsl->bConvex || bcgsl->ell==1) {
      PetscBLASInt ione = 1,bell;
      ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr);

      AY0c[0] = -1;
      if (bcgsl->pinv) {
#if defined(PETSC_MISSING_LAPACK_GESVD)
        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable.");
#else
#  if defined(PETSC_USE_COMPLEX)
        PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr));
#  else
        PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr));
#  endif
#endif
        if (bierr!=0) {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          PetscFunctionReturn(0);
        }
        /* Apply pseudo-inverse */
        max_s = bcgsl->s[0];
        for (i=1; i<bell; i++) {
          if (bcgsl->s[i] > max_s) {
            max_s = bcgsl->s[i];
          }
        }
        /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */
        pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON;
        ierr = PetscMemzero(&AY0c[1],bell*sizeof(PetscScalar));CHKERRQ(ierr);
        for (i=0; i<bell; i++) {
          if (bcgsl->s[i] >= pinv_tol) {
            utb=0.;
            for (j=0; j<bell; j++) {
              utb += MZb[1+j]*bcgsl->u[i*bell+j];
            }

            for (j=0; j<bell; j++) {
              AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i];
            }
          }
        }
      } else {
#if defined(PETSC_MISSING_LAPACK_POTRF)
        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
#else
        PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr));
#endif
        if (bierr!=0) {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          PetscFunctionReturn(0);
        }
        ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));CHKERRQ(ierr);
        PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
      }
    } else {
      PetscBLASInt ione = 1;
      PetscScalar  aone = 1.0, azero = 0.0;
      PetscBLASInt neqs;
      ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr);

#if defined(PETSC_MISSING_LAPACK_POTRF)
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
#else
      PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr));
#endif
      if (bierr!=0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN;
        PetscFunctionReturn(0);
      }
      ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr);
      PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
      AY0c[0]          = -1;
      AY0c[bcgsl->ell] = 0.;

      ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr);
      PetscStackCallBLAS("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr));

      AYlc[0]          = 0.;
      AYlc[bcgsl->ell] = -1;

      PetscStackCallBLAS("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione));

      kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione));

      /* round-off can cause negative kappa's */
      if (kappa0<0) kappa0 = -kappa0;
      kappa0 = PetscSqrtReal(kappa0);

      kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

      PetscStackCallBLAS("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione));

      kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

      if (kappa1<0) kappa1 = -kappa1;
      kappa1 = PetscSqrtReal(kappa1);

      if (kappa0!=0.0 && kappa1!=0.0) {
        if (kappaA<0.7*kappa0*kappa1) {
          ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
        } else {
          ghat = kappaA/(kappa1*kappa1);
        }
        for (i=0; i<=bcgsl->ell; i++) {
          AY0c[i] = AY0c[i] - ghat* AYlc[i];
        }
      }
    }

    omega = AY0c[bcgsl->ell];
    for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h];
    if (omega==0.0) {
      ksp->reason = KSP_DIVERGED_BREAKDOWN;
      PetscFunctionReturn(0);
    }


    ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr);
    for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
    ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr);
    ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr);
    for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
    ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr);

    /* Accurate Update */
    if (bcgsl->delta>0.0) {
      if (rnmax_computed<zeta) rnmax_computed = zeta;
      if (rnmax_true<zeta) rnmax_true = zeta;

      bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
      if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
        /* r0 <- b-inv(K)*A*X */
        ierr       = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr);
        ierr       = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr);
        rnmax_true = zeta;

        if (bUpdateX) {
          ierr           = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr);
          ierr           = VecSet(VX,0.0);CHKERRQ(ierr);
          ierr           = VecCopy(VVR[0], VB);CHKERRQ(ierr);
          rnmax_computed = zeta;
        }
      }
    }
  }
  if (bcgsl->delta>0.0) {
    ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr);
  }

  ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
  if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
  PetscFunctionReturn(0);
}
Exemple #22
0
static PetscErrorCode triangulateAndFormProl(IS  selected_2, /* list of selected local ID, includes selected ghosts */
                                             const PetscInt data_stride,
                                             const PetscReal coords[], /* column vector of local coordinates w/ ghosts */
                                             const PetscInt nselected_1, /* list of selected local ID, includes selected ghosts */
                                             const PetscInt clid_lid_1[],
                                             const PetscCoarsenData *agg_lists_1, /* selected_1 vertices of aggregate unselected vertices */
                                             const PetscInt crsGID[],
                                             const PetscInt bs,
                                             Mat a_Prol, /* prolongation operator (output) */
                                             PetscReal *a_worst_best) /* measure of worst missed fine vertex, 0 is no misses */
{
#if defined(PETSC_HAVE_TRIANGLE)
  PetscErrorCode       ierr;
  PetscInt             jj,tid,tt,idx,nselected_2;
  struct triangulateio in,mid;
  const PetscInt       *selected_idx_2;
  PetscMPIInt          rank,size;
  PetscInt             Istart,Iend,nFineLoc,myFine0;
  int                  kk,nPlotPts,sid;
  MPI_Comm             comm;
  PetscReal            tm;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject)a_Prol,&comm);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
  ierr = ISGetSize(selected_2, &nselected_2);CHKERRQ(ierr);
  if (nselected_2 == 1 || nselected_2 == 2) { /* 0 happens on idle processors */
    *a_worst_best = 100.0; /* this will cause a stop, but not globalized (should not happen) */
  } else *a_worst_best = 0.0;
  ierr = MPI_Allreduce(a_worst_best, &tm, 1, MPIU_REAL, MPIU_MAX, comm);CHKERRQ(ierr);
  if (tm > 0.0) {
    *a_worst_best = 100.0;
    PetscFunctionReturn(0);
  }
  ierr     = MatGetOwnershipRange(a_Prol, &Istart, &Iend);CHKERRQ(ierr);
  nFineLoc = (Iend-Istart)/bs; myFine0 = Istart/bs;
  nPlotPts = nFineLoc; /* locals */
  /* traingle */
  /* Define input points - in*/
  in.numberofpoints          = nselected_2;
  in.numberofpointattributes = 0;
  /* get nselected points */
  ierr = PetscMalloc1(2*(nselected_2), &in.pointlist);CHKERRQ(ierr);
  ierr = ISGetIndices(selected_2, &selected_idx_2);CHKERRQ(ierr);

  for (kk=0,sid=0; kk<nselected_2; kk++,sid += 2) {
    PetscInt lid = selected_idx_2[kk];
    in.pointlist[sid]   = coords[lid];
    in.pointlist[sid+1] = coords[data_stride + lid];
    if (lid>=nFineLoc) nPlotPts++;
  }
  if (sid != 2*nselected_2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sid %D != 2*nselected_2 %D",sid,nselected_2);

  in.numberofsegments      = 0;
  in.numberofedges         = 0;
  in.numberofholes         = 0;
  in.numberofregions       = 0;
  in.trianglelist          = 0;
  in.segmentmarkerlist     = 0;
  in.pointattributelist    = 0;
  in.pointmarkerlist       = 0;
  in.triangleattributelist = 0;
  in.trianglearealist      = 0;
  in.segmentlist           = 0;
  in.holelist              = 0;
  in.regionlist            = 0;
  in.edgelist              = 0;
  in.edgemarkerlist        = 0;
  in.normlist              = 0;

  /* triangulate */
  mid.pointlist = 0;            /* Not needed if -N switch used. */
  /* Not needed if -N switch used or number of point attributes is zero: */
  mid.pointattributelist = 0;
  mid.pointmarkerlist    = 0; /* Not needed if -N or -B switch used. */
  mid.trianglelist       = 0;    /* Not needed if -E switch used. */
  /* Not needed if -E switch used or number of triangle attributes is zero: */
  mid.triangleattributelist = 0;
  mid.neighborlist          = 0; /* Needed only if -n switch used. */
  /* Needed only if segments are output (-p or -c) and -P not used: */
  mid.segmentlist = 0;
  /* Needed only if segments are output (-p or -c) and -P and -B not used: */
  mid.segmentmarkerlist = 0;
  mid.edgelist          = 0;    /* Needed only if -e switch used. */
  mid.edgemarkerlist    = 0; /* Needed if -e used and -B not used. */
  mid.numberoftriangles = 0;

  /* Triangulate the points.  Switches are chosen to read and write a  */
  /*   PSLG (p), preserve the convex hull (c), number everything from  */
  /*   zero (z), assign a regional attribute to each element (A), and  */
  /*   produce an edge list (e), a Voronoi diagram (v), and a triangle */
  /*   neighbor list (n).                                            */
  if (nselected_2 != 0) { /* inactive processor */
    char args[] = "npczQ"; /* c is needed ? */
    triangulate(args, &in, &mid, (struct triangulateio*) NULL);
    /* output .poly files for 'showme' */
    if (!PETSC_TRUE) {
      static int level = 1;
      FILE       *file; char fname[32];

      sprintf(fname,"C%d_%d.poly",level,rank); file = fopen(fname, "w");
      /*First line: <# of vertices> <dimension (must be 2)> <# of attributes> <# of boundary markers (0 or 1)>*/
      fprintf(file, "%d  %d  %d  %d\n",in.numberofpoints,2,0,0);
      /*Following lines: <vertex #> <x> <y> */
      for (kk=0,sid=0; kk<in.numberofpoints; kk++,sid += 2) {
        fprintf(file, "%d %e %e\n",kk,in.pointlist[sid],in.pointlist[sid+1]);
      }
      /*One line: <# of segments> <# of boundary markers (0 or 1)> */
      fprintf(file, "%d  %d\n",0,0);
      /*Following lines: <segment #> <endpoint> <endpoint> [boundary marker] */
      /* One line: <# of holes> */
      fprintf(file, "%d\n",0);
      /* Following lines: <hole #> <x> <y> */
      /* Optional line: <# of regional attributes and/or area constraints> */
      /* Optional following lines: <region #> <x> <y> <attribute> <maximum area> */
      fclose(file);

      /* elems */
      sprintf(fname,"C%d_%d.ele",level,rank); file = fopen(fname, "w");
      /* First line: <# of triangles> <nodes per triangle> <# of attributes> */
      fprintf(file, "%d %d %d\n",mid.numberoftriangles,3,0);
      /* Remaining lines: <triangle #> <node> <node> <node> ... [attributes] */
      for (kk=0,sid=0; kk<mid.numberoftriangles; kk++,sid += 3) {
        fprintf(file, "%d %d %d %d\n",kk,mid.trianglelist[sid],mid.trianglelist[sid+1],mid.trianglelist[sid+2]);
      }
      fclose(file);

      sprintf(fname,"C%d_%d.node",level,rank); file = fopen(fname, "w");
      /* First line: <# of vertices> <dimension (must be 2)> <# of attributes> <# of boundary markers (0 or 1)> */
      /* fprintf(file, "%d  %d  %d  %d\n",in.numberofpoints,2,0,0); */
      fprintf(file, "%d  %d  %d  %d\n",nPlotPts,2,0,0);
      /*Following lines: <vertex #> <x> <y> */
      for (kk=0,sid=0; kk<in.numberofpoints; kk++,sid+=2) {
        fprintf(file, "%d %e %e\n",kk,in.pointlist[sid],in.pointlist[sid+1]);
      }

      sid /= 2;
      for (jj=0; jj<nFineLoc; jj++) {
        PetscBool sel = PETSC_TRUE;
        for (kk=0; kk<nselected_2 && sel; kk++) {
          PetscInt lid = selected_idx_2[kk];
          if (lid == jj) sel = PETSC_FALSE;
        }
        if (sel) fprintf(file, "%d %e %e\n",sid++,coords[jj],coords[data_stride + jj]);
      }
      fclose(file);
      if (sid != nPlotPts) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sid %D != nPlotPts %D",sid,nPlotPts);
      level++;
    }
  }
#if defined PETSC_GAMG_USE_LOG
  ierr = PetscLogEventBegin(petsc_gamg_setup_events[FIND_V],0,0,0,0);CHKERRQ(ierr);
#endif
  { /* form P - setup some maps */
    PetscInt clid,mm,*nTri,*node_tri;

    ierr = PetscMalloc1(nselected_2, &node_tri);CHKERRQ(ierr);
    ierr = PetscMalloc1(nselected_2, &nTri);CHKERRQ(ierr);

    /* need list of triangles on node */
    for (kk=0; kk<nselected_2; kk++) nTri[kk] = 0;
    for (tid=0,kk=0; tid<mid.numberoftriangles; tid++) {
      for (jj=0; jj<3; jj++) {
        PetscInt cid = mid.trianglelist[kk++];
        if (nTri[cid] == 0) node_tri[cid] = tid;
        nTri[cid]++;
      }
    }
#define EPS 1.e-12
    /* find points and set prolongation */
    for (mm = clid = 0; mm < nFineLoc; mm++) {
      PetscBool ise;
      ierr = PetscCDEmptyAt(agg_lists_1,mm,&ise);CHKERRQ(ierr);
      if (!ise) {
        const PetscInt lid = mm;
        /* for (clid_iterator=0;clid_iterator<nselected_1;clid_iterator++) { */
        PetscScalar  AA[3][3];
        PetscBLASInt N=3,NRHS=1,LDA=3,IPIV[3],LDB=3,INFO;
        PetscCDPos   pos;
        ierr = PetscCDGetHeadPos(agg_lists_1,lid,&pos);CHKERRQ(ierr);
        while (pos) {
          PetscInt flid;
          ierr = PetscLLNGetID(pos, &flid);CHKERRQ(ierr);
          ierr = PetscCDGetNextPos(agg_lists_1,lid,&pos);CHKERRQ(ierr);

          if (flid < nFineLoc) {  /* could be a ghost */
            PetscInt       bestTID = -1; PetscReal best_alpha = 1.e10;
            const PetscInt fgid    = flid + myFine0;
            /* compute shape function for gid */
            const PetscReal fcoord[3] = {coords[flid],coords[data_stride+flid],1.0};
            PetscBool       haveit    =PETSC_FALSE; PetscScalar alpha[3]; PetscInt clids[3];

            /* look for it */
            for (tid = node_tri[clid], jj=0;
                 jj < 5 && !haveit && tid != -1;
                 jj++) {
              for (tt=0; tt<3; tt++) {
                PetscInt cid2 = mid.trianglelist[3*tid + tt];
                PetscInt lid2 = selected_idx_2[cid2];
                AA[tt][0] = coords[lid2]; AA[tt][1] = coords[data_stride + lid2]; AA[tt][2] = 1.0;
                clids[tt] = cid2; /* store for interp */
              }

              for (tt=0; tt<3; tt++) alpha[tt] = (PetscScalar)fcoord[tt];

              /* SUBROUTINE DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO) */
              PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO));
              {
                PetscBool have=PETSC_TRUE;  PetscReal lowest=1.e10;
                for (tt = 0, idx = 0; tt < 3; tt++) {
                  if (PetscRealPart(alpha[tt]) > (1.0+EPS) || PetscRealPart(alpha[tt]) < -EPS) have = PETSC_FALSE;
                  if (PetscRealPart(alpha[tt]) < lowest) {
                    lowest = PetscRealPart(alpha[tt]);
                    idx    = tt;
                  }
                }
                haveit = have;
              }
              tid = mid.neighborlist[3*tid + idx];
            }

            if (!haveit) {
              /* brute force */
              for (tid=0; tid<mid.numberoftriangles && !haveit; tid++) {
                for (tt=0; tt<3; tt++) {
                  PetscInt cid2 = mid.trianglelist[3*tid + tt];
                  PetscInt lid2 = selected_idx_2[cid2];
                  AA[tt][0] = coords[lid2]; AA[tt][1] = coords[data_stride + lid2]; AA[tt][2] = 1.0;
                  clids[tt] = cid2; /* store for interp */
                }
                for (tt=0; tt<3; tt++) alpha[tt] = fcoord[tt];
                /* SUBROUTINE DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO) */
                PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO));
                {
                  PetscBool have=PETSC_TRUE;  PetscReal worst=0.0, v;
                  for (tt=0; tt<3 && have; tt++) {
                    if (PetscRealPart(alpha[tt]) > 1.0+EPS || PetscRealPart(alpha[tt]) < -EPS) have=PETSC_FALSE;
                    if ((v=PetscAbs(PetscRealPart(alpha[tt])-0.5)) > worst) worst = v;
                  }
                  if (worst < best_alpha) {
                    best_alpha = worst; bestTID = tid;
                  }
                  haveit = have;
                }
              }
            }
            if (!haveit) {
              if (best_alpha > *a_worst_best) *a_worst_best = best_alpha;
              /* use best one */
              for (tt=0; tt<3; tt++) {
                PetscInt cid2 = mid.trianglelist[3*bestTID + tt];
                PetscInt lid2 = selected_idx_2[cid2];
                AA[tt][0] = coords[lid2]; AA[tt][1] = coords[data_stride + lid2]; AA[tt][2] = 1.0;
                clids[tt] = cid2; /* store for interp */
              }
              for (tt=0; tt<3; tt++) alpha[tt] = fcoord[tt];
              /* SUBROUTINE DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO) */
              PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&N, &NRHS, (PetscScalar*)AA, &LDA, IPIV, alpha, &LDB, &INFO));
            }

            /* put in row of P */
            for (idx=0; idx<3; idx++) {
              PetscScalar shp = alpha[idx];
              if (PetscAbs(PetscRealPart(shp)) > 1.e-6) {
                PetscInt cgid = crsGID[clids[idx]];
                PetscInt jj   = cgid*bs, ii = fgid*bs; /* need to gloalize */
                for (tt=0; tt < bs; tt++, ii++, jj++) {
                  ierr = MatSetValues(a_Prol,1,&ii,1,&jj,&shp,INSERT_VALUES);CHKERRQ(ierr);
                }
              }
            }
          }
        } /* aggregates iterations */
        clid++;
      } /* a coarse agg */
    } /* for all fine nodes */

    ierr = ISRestoreIndices(selected_2, &selected_idx_2);CHKERRQ(ierr);
    ierr = MatAssemblyBegin(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd(a_Prol,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

    ierr = PetscFree(node_tri);CHKERRQ(ierr);
    ierr = PetscFree(nTri);CHKERRQ(ierr);
  }
#if defined PETSC_GAMG_USE_LOG
  ierr = PetscLogEventEnd(petsc_gamg_setup_events[FIND_V],0,0,0,0);CHKERRQ(ierr);
#endif
  free(mid.trianglelist);
  free(mid.neighborlist);
  ierr = PetscFree(in.pointlist);CHKERRQ(ierr);
  PetscFunctionReturn(0);
#else
  SETERRQ(PetscObjectComm((PetscObject)a_Prol),PETSC_ERR_PLIB,"configure with TRIANGLE to use geometric MG");
#endif
}
Exemple #23
0
/*
c     ***********
c
c     Subroutine dgqt
c
c     Given an n by n symmetric matrix A, an n-vector b, and a
c     positive number delta, this subroutine determines a vector
c     x which approximately minimizes the quadratic function
c
c           f(x) = (1/2)*x'*A*x + b'*x
c
c     subject to the Euclidean norm constraint
c
c           norm(x) <= delta.
c
c     This subroutine computes an approximation x and a Lagrange
c     multiplier par such that either par is zero and
c
c            norm(x) <= (1+rtol)*delta,
c
c     or par is positive and
c
c            abs(norm(x) - delta) <= rtol*delta.
c
c     If xsol is the solution to the problem, the approximation x
c     satisfies
c
c            f(x) <= ((1 - rtol)**2)*f(xsol)
c
c     The subroutine statement is
c
c       subroutine dgqt(n,a,lda,b,delta,rtol,atol,itmax,
c                        par,f,x,info,z,wa1,wa2)
c
c     where
c
c       n is an integer variable.
c         On entry n is the order of A.
c         On exit n is unchanged.
c
c       a is a double precision array of dimension (lda,n).
c         On entry the full upper triangle of a must contain the
c            full upper triangle of the symmetric matrix A.
c         On exit the array contains the matrix A.
c
c       lda is an integer variable.
c         On entry lda is the leading dimension of the array a.
c         On exit lda is unchanged.
c
c       b is an double precision array of dimension n.
c         On entry b specifies the linear term in the quadratic.
c         On exit b is unchanged.
c
c       delta is a double precision variable.
c         On entry delta is a bound on the Euclidean norm of x.
c         On exit delta is unchanged.
c
c       rtol is a double precision variable.
c         On entry rtol is the relative accuracy desired in the
c            solution. Convergence occurs if
c
c              f(x) <= ((1 - rtol)**2)*f(xsol)
c
c         On exit rtol is unchanged.
c
c       atol is a double precision variable.
c         On entry atol is the absolute accuracy desired in the
c            solution. Convergence occurs when
c
c              norm(x) <= (1 + rtol)*delta
c
c              max(-f(x),-f(xsol)) <= atol
c
c         On exit atol is unchanged.
c
c       itmax is an integer variable.
c         On entry itmax specifies the maximum number of iterations.
c         On exit itmax is unchanged.
c
c       par is a double precision variable.
c         On entry par is an initial estimate of the Lagrange
c            multiplier for the constraint norm(x) <= delta.
c         On exit par contains the final estimate of the multiplier.
c
c       f is a double precision variable.
c         On entry f need not be specified.
c         On exit f is set to f(x) at the output x.
c
c       x is a double precision array of dimension n.
c         On entry x need not be specified.
c         On exit x is set to the final estimate of the solution.
c
c       info is an integer variable.
c         On entry info need not be specified.
c         On exit info is set as follows:
c
c            info = 1  The function value f(x) has the relative
c                      accuracy specified by rtol.
c
c            info = 2  The function value f(x) has the absolute
c                      accuracy specified by atol.
c
c            info = 3  Rounding errors prevent further progress.
c                      On exit x is the best available approximation.
c
c            info = 4  Failure to converge after itmax iterations.
c                      On exit x is the best available approximation.
c
c       z is a double precision work array of dimension n.
c
c       wa1 is a double precision work array of dimension n.
c
c       wa2 is a double precision work array of dimension n.
c
c     Subprograms called
c
c       MINPACK-2  ......  destsv
c
c       LAPACK  .........  dpotrf
c
c       Level 1 BLAS  ...  daxpy, dcopy, ddot, dnrm2, dscal
c
c       Level 2 BLAS  ...  dtrmv, dtrsv
c
c     MINPACK-2 Project. October 1993.
c     Argonne National Laboratory and University of Minnesota.
c     Brett M. Averick, Richard Carter, and Jorge J. More'
c
c     ***********
*/
PetscErrorCode gqt(PetscInt n, PetscReal *a, PetscInt lda, PetscReal *b,
                   PetscReal delta, PetscReal rtol, PetscReal atol,
                   PetscInt itmax, PetscReal *retpar, PetscReal *retf,
                   PetscReal *x, PetscInt *retinfo, PetscInt *retits,
                   PetscReal *z, PetscReal *wa1, PetscReal *wa2)
{
  PetscErrorCode ierr;
  PetscReal      f=0.0,p001=0.001,p5=0.5,minusone=-1,delta2=delta*delta;
  PetscInt       iter, j, rednc,info;
  PetscBLASInt   indef;
  PetscBLASInt   blas1=1, blasn=n, iblas, blaslda = lda,blasldap1=lda+1,blasinfo;
  PetscReal      alpha, anorm, bnorm, parc, parf, parl, pars, par=*retpar,paru, prod, rxnorm, rznorm=0.0, temp, xnorm;

  PetscFunctionBegin;
  parf = 0.0;
  xnorm = 0.0;
  rxnorm = 0.0;
  rednc = 0;
  for (j=0; j<n; j++) {
    x[j] = 0.0;
    z[j] = 0.0;
  }

  /* Copy the diagonal and save A in its lower triangle */
  PetscStackCallBLAS("BLAScopy",BLAScopy_(&blasn,a,&blasldap1, wa1, &blas1));
  for (j=0;j<n-1;j++) {
    iblas = n - j - 1;
    PetscStackCallBLAS("BLAScopy",BLAScopy_(&iblas,&a[j + lda*(j+1)], &blaslda, &a[j+1 + lda*j], &blas1));
  }

  /* Calculate the l1-norm of A, the Gershgorin row sums, and the
   l2-norm of b */
  anorm = 0.0;
  for (j=0;j<n;j++) {
    wa2[j] = BLASasum_(&blasn, &a[0 + lda*j], &blas1);
    CHKMEMQ;
    anorm = PetscMax(anorm,wa2[j]);
  }
  for (j=0;j<n;j++) {
    wa2[j] = wa2[j] - PetscAbs(wa1[j]);
  }
  bnorm = BLASnrm2_(&blasn,b,&blas1);
  CHKMEMQ;
  /* Calculate a lower bound, pars, for the domain of the problem.
   Also calculate an upper bound, paru, and a lower bound, parl,
   for the Lagrange multiplier. */
  pars = parl = paru = -anorm;
  for (j=0;j<n;j++) {
    pars = PetscMax(pars, -wa1[j]);
    parl = PetscMax(parl, wa1[j] + wa2[j]);
    paru = PetscMax(paru, -wa1[j] + wa2[j]);
  }
  parl = PetscMax(bnorm/delta - parl,pars);
  parl = PetscMax(0.0,parl);
  paru = PetscMax(0.0, bnorm/delta + paru);

  /* If the input par lies outside of the interval (parl, paru),
   set par to the closer endpoint. */

  par = PetscMax(par,parl);
  par = PetscMin(par,paru);

  /* Special case: parl == paru */
  paru = PetscMax(paru, (1.0 + rtol)*parl);

  /* Beginning of an iteration */

  info = 0;
  for (iter=1;iter<=itmax;iter++) {
    /* Safeguard par */
    if (par <= pars && paru > 0) {
      par = PetscMax(p001, PetscSqrtScalar(parl/paru)) * paru;
    }

    /* Copy the lower triangle of A into its upper triangle and
     compute A + par*I */

    for (j=0;j<n-1;j++) {
      iblas = n - j - 1;
      PetscStackCallBLAS("BLAScopy",BLAScopy_(&iblas,&a[j+1 + j*lda], &blas1,&a[j + (j+1)*lda], &blaslda));
    }
    for (j=0;j<n;j++) {
      a[j + j*lda] = wa1[j] + par;
    }

    /* Attempt the Cholesky factorization of A without referencing
     the lower triangular part. */
    PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&blasn,a,&blaslda,&indef));

    /* Case 1: A + par*I is pos. def. */
    if (indef == 0) {

      /* Compute an approximate solution x and save the
       last value of par with A + par*I pos. def. */

      parf = par;
      PetscStackCallBLAS("BLAScopy",BLAScopy_(&blasn, b, &blas1, wa2, &blas1));
      PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&blasn,&blas1,a,&blaslda,wa2,&blasn,&blasinfo));
      rxnorm = BLASnrm2_(&blasn, wa2, &blas1);
      PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","N","N",&blasn,&blas1,a,&blaslda,wa2,&blasn,&blasinfo));
      PetscStackCallBLAS("BLAScopy",BLAScopy_(&blasn, wa2, &blas1, x, &blas1));
      PetscStackCallBLAS("BLASscal",BLASscal_(&blasn, &minusone, x, &blas1));
      xnorm = BLASnrm2_(&blasn, x, &blas1);
      CHKMEMQ;

      /* Test for convergence */
      if (PetscAbs(xnorm - delta) <= rtol*delta ||
          (par == 0  && xnorm <= (1.0+rtol)*delta)) {
        info = 1;
      }

      /* Compute a direction of negative curvature and use this
       information to improve pars. */

      iblas=blasn*blasn;

      ierr = estsv(n,a,lda,&rznorm,z);CHKERRQ(ierr);
      CHKMEMQ;
      pars = PetscMax(pars, par-rznorm*rznorm);

      /* Compute a negative curvature solution of the form
       x + alpha*z,  where norm(x+alpha*z)==delta */

      rednc = 0;
      if (xnorm < delta) {
        /* Compute alpha */
        prod = BLASdot_(&blasn, z, &blas1, x, &blas1) / delta;
        temp = (delta - xnorm)*((delta + xnorm)/delta);
        alpha = temp/(PetscAbs(prod) + PetscSqrtScalar(prod*prod + temp/delta));
        if (prod >= 0) alpha = PetscAbs(alpha);
        else alpha =-PetscAbs(alpha);

                /* Test to decide if the negative curvature step
                   produces a larger reduction than with z=0 */
        rznorm = PetscAbs(alpha) * rznorm;
        if ((rznorm*rznorm + par*xnorm*xnorm)/(delta2) <= par) {
          rednc = 1;
        }
        /* Test for convergence */
        if (p5 * rznorm*rznorm / delta2 <= rtol*(1.0-p5*rtol)*(par + rxnorm*rxnorm/delta2)) {
          info = 1;
        } else if (info == 0 && (p5*(par + rxnorm*rxnorm/delta2) <= atol/delta2)) {
          info = 2;
        }
      }

      /* Compute the Newton correction parc to par. */
      if (xnorm == 0) {
        parc = -par;
      } else {
        PetscStackCallBLAS("BLAScopy",BLAScopy_(&blasn, x, &blas1, wa2, &blas1));
        temp = 1.0/xnorm;
        PetscStackCallBLAS("BLASscal",BLASscal_(&blasn, &temp, wa2, &blas1));
        PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&blasn, &blas1, a, &blaslda, wa2, &blasn, &blasinfo));
        temp = BLASnrm2_(&blasn, wa2, &blas1);
        parc = (xnorm - delta)/(delta*temp*temp);
      }

      /* update parl or paru */
      if (xnorm > delta) {
        parl = PetscMax(parl, par);
      } else if (xnorm < delta) {
        paru = PetscMin(paru, par);
      }
    } else {
      /* Case 2: A + par*I is not pos. def. */

      /* Use the rank information from the Cholesky
       decomposition to update par. */

      if (indef > 1) {
        /* Restore column indef to A + par*I. */
        iblas = indef - 1;
        PetscStackCallBLAS("BLAScopy",BLAScopy_(&iblas,&a[indef-1 + 0*lda],&blaslda,&a[0 + (indef-1)*lda],&blas1));
        a[indef-1 + (indef-1)*lda] = wa1[indef-1] + par;

                /* compute parc. */
        PetscStackCallBLAS("BLAScopy",BLAScopy_(&iblas,&a[0 + (indef-1)*lda], &blas1, wa2, &blas1));
        PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&iblas,&blas1,a,&blaslda,wa2,&blasn,&blasinfo));
        PetscStackCallBLAS("BLAScopy",BLAScopy_(&iblas,wa2,&blas1,&a[0 + (indef-1)*lda],&blas1));
        temp = BLASnrm2_(&iblas,&a[0 + (indef-1)*lda],&blas1);
        CHKMEMQ;
        a[indef-1 + (indef-1)*lda] -= temp*temp;
        PetscStackCallBLAS("LAPACKtrtr",LAPACKtrtrs_("U","N","N",&iblas,&blas1,a,&blaslda,wa2,&blasn,&blasinfo));
      }

      wa2[indef-1] = -1.0;
      iblas = indef;
      temp = BLASnrm2_(&iblas,wa2,&blas1);
      parc = - a[indef-1 + (indef-1)*lda]/(temp*temp);
      pars = PetscMax(pars,par+parc);

      /* If necessary, increase paru slightly.
       This is needed because in some exceptional situations
       paru is the optimal value of par. */

      paru = PetscMax(paru, (1.0+rtol)*pars);
    }

    /* Use pars to update parl */
    parl = PetscMax(parl,pars);

    /* Test for converged. */
    if (info == 0) {
      if (iter == itmax) info=4;
      if (paru <= (1.0+p5*rtol)*pars) info=3;
      if (paru == 0.0) info = 2;
    }

    /* If exiting, store the best approximation and restore
     the upper triangle of A. */

    if (info != 0) {
      /* Compute the best current estimates for x and f. */
      par = parf;
      f = -p5 * (rxnorm*rxnorm + par*xnorm*xnorm);
      if (rednc) {
        f = -p5 * (rxnorm*rxnorm + par*delta*delta - rznorm*rznorm);
        PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&blasn, &alpha, z, &blas1, x, &blas1));
      }
      /* Restore the upper triangle of A */
      for (j = 0; j<n; j++) {
        iblas = n - j - 1;
        PetscStackCallBLAS("BLAScopy",BLAScopy_(&iblas,&a[j+1 + j*lda],&blas1, &a[j + (j+1)*lda],&blaslda));
      }
      iblas = lda+1;
      PetscStackCallBLAS("BLAScopy",BLAScopy_(&blasn,wa1,&blas1,a,&iblas));
      break;
    }
    par = PetscMax(parl,par+parc);
  }
  *retpar = par;
  *retf = f;
  *retinfo = info;
  *retits = iter;
  CHKMEMQ;
  PetscFunctionReturn(0);
}
PetscErrorCode BDC_dlaed3m_(const char *jobz,const char *defl,PetscBLASInt k,PetscBLASInt n,
        PetscBLASInt n1,PetscReal *d,PetscReal *q,PetscBLASInt ldq,
        PetscReal rho,PetscReal *dlamda,PetscReal *q2,PetscBLASInt *indx, 
        PetscBLASInt *ctot,PetscReal *w,PetscReal *s,PetscBLASInt *info,
        PetscBLASInt jobz_len,PetscBLASInt defl_len)
{
/*  -- Routine written in LAPACK version 3.0 style -- */
/* *************************************************** */
/*     Written by */
/*     Michael Moldaschl and Wilfried Gansterer */
/*     University of Vienna */
/*     last modification: March 16, 2014 */

/*     Small adaptations of original code written by */
/*     Wilfried Gansterer and Bob Ward, */
/*     Department of Computer Science, University of Tennessee */
/*     see http://dx.doi.org/10.1137/S1064827501399432 */
/* *************************************************** */

/*  Purpose */
/*  ======= */

/*  DLAED3M finds the roots of the secular equation, as defined by the */
/*  values in D, W, and RHO, between 1 and K.  It makes the */
/*  appropriate calls to DLAED4 and then updates the eigenvectors by */
/*  multiplying the matrix of eigenvectors of the pair of eigensystems */
/*  being combined by the matrix of eigenvectors of the K-by-K system */
/*  which is solved here. */

/*  This code makes very mild assumptions about floating point */
/*  arithmetic. It will work on machines with a guard digit in */
/*  add/subtract, or on those binary machines without guard digits */
/*  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */
/*  It could conceivably fail on hexadecimal or decimal machines */
/*  without guard digits, but we know of none. */

/*  Arguments */
/*  ========= */

/*  JOBZ    (input) CHARACTER*1 */
/*          = 'N':  Do not accumulate eigenvectors (not implemented); */
/*          = 'D':  Do accumulate eigenvectors in the divide-and-conquer */
/*                  process. */

/*  DEFL    (input) CHARACTER*1 */
/*          = '0':  No deflation happened in DSRTDF */
/*          = '1':  Some deflation happened in DSRTDF (and therefore some */
/*                  Givens rotations need to be applied to the computed */
/*                  eigenvector matrix Q) */

/*  K       (input) INTEGER */
/*          The number of terms in the rational function to be solved by */
/*          DLAED4. 0 <= K <= N. */

/*  N       (input) INTEGER */
/*          The number of rows and columns in the Q matrix. */
/*          N >= K (deflation may result in N>K). */

/*  N1      (input) INTEGER */
/*          The location of the last eigenvalue in the leading submatrix. */
/*          min(1,N) <= N1 <= max(1,N-1). */

/*  D       (output) DOUBLE PRECISION array, dimension (N) */
/*          D(I) contains the updated eigenvalues for */
/*          1 <= I <= K. */

/*  Q       (output) DOUBLE PRECISION array, dimension (LDQ,N) */
/*          Initially the first K columns are used as workspace. */
/*          On output the columns 1 to K contain */
/*          the updated eigenvectors. */

/*  LDQ     (input) INTEGER */
/*          The leading dimension of the array Q.  LDQ >= max(1,N). */

/*  RHO     (input) DOUBLE PRECISION */
/*          The value of the parameter in the rank one update equation. */
/*          RHO >= 0 required. */

/*  DLAMDA  (input/output) DOUBLE PRECISION array, dimension (K) */
/*          The first K elements of this array contain the old roots */
/*          of the deflated updating problem.  These are the poles */
/*          of the secular equation. May be changed on output by */
/*          having lowest order bit set to zero on Cray X-MP, Cray Y-MP, */
/*          Cray-2, or Cray C-90, as described above. */

/*  Q2      (input) DOUBLE PRECISION array, dimension (LDQ2, N) */
/*          The first K columns of this matrix contain the non-deflated */
/*          eigenvectors for the split problem. */

/*  INDX    (input) INTEGER array, dimension (N) */
/*          The permutation used to arrange the columns of the deflated */
/*          Q matrix into three groups (see DLAED2). */
/*          The rows of the eigenvectors found by DLAED4 must be likewise */
/*          permuted before the matrix multiply can take place. */

/*  CTOT    (input) INTEGER array, dimension (4) */
/*          A count of the total number of the various types of columns */
/*          in Q, as described in INDX.  The fourth column type is any */
/*          column which has been deflated. */

/*  W       (input/output) DOUBLE PRECISION array, dimension (K) */
/*          The first K elements of this array contain the components */
/*          of the deflation-adjusted updating vector. Destroyed on */
/*          output. */

/*  S       (workspace) DOUBLE PRECISION array, dimension */
/*          ( MAX(CTOT(1)+CTOT(2),CTOT(2)+CTOT(3)) + 1 )*K */
/*          Will contain parts of the eigenvectors of the repaired matrix */
/*          which will be multiplied by the previously accumulated */
/*          eigenvectors to update the system. This array is a major */
/*          source of workspace requirements ! */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit. */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
/*          > 0:  if INFO = i, eigenpair i was not computed successfully */

/*  Further Details */
/*  =============== */

/*  Based on code written by */
/*     Wilfried Gansterer and Bob Ward, */
/*     Department of Computer Science, University of Tennessee */
/*  Based on the design of the LAPACK code DLAED3 with small modifications */
/*  (Note that in contrast to the original DLAED3, this routine */
/*  DOES NOT require that N1 <= N/2) */

/*  Based on contributions by */
/*     Jeff Rutter, Computer Science Division, University of California */
/*     at Berkeley, USA */
/*  Modified by Francoise Tisseur, University of Tennessee. */

/*  ===================================================================== */

#if defined(SLEPC_MISSING_LAPACK_LAED4) || defined(SLEPC_MISSING_LAPACK_LACPY) || defined(SLEPC_MISSING_LAPACK_LASET)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAED4/LACPY/LASET - Lapack routine is unavailable");
#else
  PetscReal    temp, done = 1.0, dzero = 0.0;
  PetscBLASInt i, j, n2, n12, ii, n23, iq2, i1, one=1;

  PetscFunctionBegin;
  *info = 0;

  if (k < 0) {
    *info = -3;
  } else if (n < k) {
    *info = -4;
  } else if (n1 < PetscMin(1,n) || n1 > PetscMax(1,n)) {
    *info = -5;
  } else if (ldq < PetscMax(1,n)) {
    *info = -8;
  } else if (rho < 0.) {
    *info = -9;
  }
  if (*info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong argument %d in DLAED3M",-(*info));

  /* Quick return if possible */

  if (k == 0) PetscFunctionReturn(0);

  /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */
  /* be computed with high relative accuracy (barring over/underflow). */
  /* This is a problem on machines without a guard digit in */
  /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */
  /* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */
  /* which on any of these machines zeros out the bottommost */
  /* bit of DLAMDA(I) if it is 1; this makes the subsequent */
  /* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */
  /* occurs. On binary machines with a guard digit (almost all */
  /* machines) it does not change DLAMDA(I) at all. On hexadecimal */
  /* and decimal machines with a guard digit, it slightly */
  /* changes the bottommost bits of DLAMDA(I). It does not account */
  /* for hexadecimal or decimal machines without guard digits */
  /* (we know of none). We use a subroutine call to compute */
  /* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */
  /* this code. */

  for (i = 0; i < k; ++i) {
    dlamda[i] = LAPACKlamc3_(&dlamda[i], &dlamda[i]) - dlamda[i];
  }

  for (j = 1; j <= k; ++j) {

    /* ....calling DLAED4 for eigenpair J.... */

    PetscStackCallBLAS("LAPACKlaed4",LAPACKlaed4_(&k, &j, dlamda, w, &q[(j-1)*ldq], &rho, &d[j-1], info));
    if (*info) SETERRQ3(PETSC_COMM_SELF,1,"Error in dlaed4, info = %d, failed when computing D(%d)=%g",*info,j,d[j-1]);

    if (j < k) {

      /* If the zero finder terminated properly, but the computed */
      /* eigenvalues are not ordered, issue an error statement */
      /* but continue computation. */

      if (dlamda[j-1] >= dlamda[j]) SETERRQ2(PETSC_COMM_SELF,1,"DLAMDA(%d) is greater or equal than DLAMDA(%d)", j, j+1);
      if (d[j-1] < dlamda[j-1] || d[j-1] > dlamda[j]) SETERRQ6(PETSC_COMM_SELF,1,"DLAMDA(%d) = %g D(%d) = %g DLAMDA(%d) = %g", j, dlamda[j-1], j, d[j-1], j+1, dlamda[j]);
    }
  }

  if (k == 1) goto L110;

  if (k == 2) {

    /* permute the components of Q(:,J) (the information returned by DLAED4 */
    /* necessary to construct the eigenvectors) according to the permutation */
    /* stored in INDX, resulting from deflation */

    for (j = 0; j < k; ++j) {
      w[0] = q[0+j*ldq];
      w[1] = q[1+j*ldq];
      ii = indx[0];
      q[0+j*ldq] = w[ii-1];
      ii = indx[1];
      q[1+j*ldq] = w[ii-1];
    }
    goto L110;
  }

  /* ....K.GE.3.... */
  /* Compute updated W (used for computing the eigenvectors corresponding */
  /* to the previously computed eigenvalues). */

  PetscStackCallBLAS("BLAScopy",BLAScopy_(&k, w, &one, s, &one));

  /* Initialize W(I) = Q(I,I) */

  i1 = ldq + 1;
  PetscStackCallBLAS("BLAScopy",BLAScopy_(&k, q, &i1, w, &one));
  for (j = 0; j < k; ++j) {
    for (i = 0; i < j; ++i) {
      w[i] *= q[i+j*ldq] / (dlamda[i] - dlamda[j]);
    }
    for (i = j + 1; i < k; ++i) {
      w[i] *= q[i+j*ldq] / (dlamda[i] - dlamda[j]);
    }
  }
  for (i = 0; i < k; ++i) {
    temp = PetscSqrtReal(-w[i]);
    if (temp<0) temp = -temp;
    w[i] =  (s[i] >= 0) ? temp : -temp;
  }

  /* Compute eigenvectors of the modified rank-1 modification (using the */
  /* vector W). */

  for (j = 0; j < k; ++j) {
    for (i = 0; i < k; ++i) {
      s[i] = w[i] / q[i+j*ldq];
    }
    temp = BLASnrm2_(&k, s, &one);
    for (i = 0; i < k; ++i) {

      /* apply the permutation resulting from deflation as stored */
      /* in INDX */

      ii = indx[i];
      q[i+j*ldq] = s[ii-1] / temp;
    }
  }

/* ************************************************************************** */

  /* ....updating the eigenvectors.... */

L110:

  n2 = n - n1;
  n12 = ctot[0] + ctot[1];
  n23 = ctot[1] + ctot[2];
  if (*(unsigned char *)jobz == 'D') {

    /* Compute the updated eigenvectors. (NOTE that every call of */
    /* DGEMM requires three DISTINCT arrays) */

    /* copy Q( CTOT(1)+1:K,1:K ) to S */

    PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("A", &n23, &k, &q[ctot[0]], &ldq, s, &n23));
    iq2 = n1 * n12 + 1;

    if (n23 != 0) {

      /* multiply the second part of Q2 (the eigenvectors of the */
      /* lower block) with S and write the result into the lower part of */
      /* Q, i.e., Q( N1+1:N,1:K ) */

      PetscStackCallBLAS("BLASgemm",BLASgemm_("N", "N", &n2, &k, &n23, &done,
                  &q2[iq2-1], &n2, s, &n23, &dzero, &q[n1], &ldq));
    } else {
      PetscStackCallBLAS("LAPACKlaset",LAPACKlaset_("A", &n2, &k, &dzero, &dzero, &q[n1], &ldq));
    }

    /* copy Q( 1:CTOT(1)+CTOT(2),1:K ) to S */

    PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("A", &n12, &k, q, &ldq, s, &n12));

    if (n12 != 0) {

      /* multiply the first part of Q2 (the eigenvectors of the */
      /* upper block) with S and write the result into the upper part of */
      /* Q, i.e., Q( 1:N1,1:K ) */

      PetscStackCallBLAS("BLASgemm",BLASgemm_("N", "N", &n1, &k, &n12, &done,
                  q2, &n1, s, &n12, &dzero, q, &ldq));
    } else {
      PetscStackCallBLAS("LAPACKlaset",LAPACKlaset_("A", &n1, &k, &dzero, &dzero, q, &ldq));
    }
  }
  PetscFunctionReturn(0);
#endif
}
Exemple #25
0
static PetscErrorCode estsv(PetscInt n, PetscReal *r, PetscInt ldr, PetscReal *svmin, PetscReal *z)
{
  PetscBLASInt blas1=1, blasn=n, blasnmi, blasj, blasldr = ldr;
  PetscInt     i,j;
  PetscReal    e,temp,w,wm,ynorm,znorm,s,sm;

  PetscFunctionBegin;
  for (i=0;i<n;i++) {
    z[i]=0.0;
  }
  e = PetscAbs(r[0]);
  if (e == 0.0) {
    *svmin = 0.0;
    z[0] = 1.0;
  } else {
    /* Solve R'*y = e */
    for (i=0;i<n;i++) {
      /* Scale y. The scaling factor (0.01) reduces the number of scalings */
      if (z[i] >= 0.0) e =-PetscAbs(e);
      else             e = PetscAbs(e);

      if (PetscAbs(e - z[i]) > PetscAbs(r[i + ldr*i])) {
        temp = PetscMin(0.01,PetscAbs(r[i + ldr*i]))/PetscAbs(e-z[i]);
        PetscStackCallBLAS("BLASscal",BLASscal_(&blasn, &temp, z, &blas1));
        e = temp*e;
      }

      /* Determine the two possible choices of y[i] */
      if (r[i + ldr*i] == 0.0) {
        w = wm = 1.0;
      } else {
        w = (e - z[i]) / r[i + ldr*i];
        wm = - (e + z[i]) / r[i + ldr*i];
      }

      /*  Chose y[i] based on the predicted value of y[j] for j>i */
      s = PetscAbs(e - z[i]);
      sm = PetscAbs(e + z[i]);
      for (j=i+1;j<n;j++) {
        sm += PetscAbs(z[j] + wm * r[i + ldr*j]);
      }
      if (i < n-1) {
        blasnmi = n-i-1;
        PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&blasnmi, &w, &r[i + ldr*(i+1)], &blasldr, &z[i+1], &blas1));
        s += BLASasum_(&blasnmi, &z[i+1], &blas1);
      }
      if (s < sm) {
        temp = wm - w;
        w = wm;
        if (i < n-1) {
          PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&blasnmi, &temp, &r[i + ldr*(i+1)], &blasldr, &z[i+1], &blas1));
        }
      }
      z[i] = w;
    }

    ynorm = BLASnrm2_(&blasn, z, &blas1);

    /* Solve R*z = y */
    for (j=n-1; j>=0; j--) {
      /* Scale z */
      if (PetscAbs(z[j]) > PetscAbs(r[j + ldr*j])) {
        temp = PetscMin(0.01, PetscAbs(r[j + ldr*j] / z[j]));
        PetscStackCallBLAS("BLASscal",BLASscal_(&blasn, &temp, z, &blas1));
        ynorm *=temp;
      }
      if (r[j + ldr*j] == 0) {
        z[j] = 1.0;
      } else {
        z[j] = z[j] / r[j + ldr*j];
      }
      temp = -z[j];
      blasj=j;
      PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&blasj,&temp,&r[0+ldr*j],&blas1,z,&blas1));
    }

    /* Compute svmin and normalize z */
    znorm = 1.0 / BLASnrm2_(&blasn, z, &blas1);
    *svmin = ynorm*znorm;
    PetscStackCallBLAS("BLASscal",BLASscal_(&blasn, &znorm, z, &blas1));
  }
  PetscFunctionReturn(0);
}
Exemple #26
0
/**************************************xxt.c***********************************/
static PetscInt xxt_generate(xxt_ADT xxt_handle)
{
  PetscInt       i,j,k,idex;
  PetscInt       dim, col;
  PetscScalar    *u, *uu, *v, *z, *w, alpha, alpha_w;
  PetscInt       *segs;
  PetscInt       op[] = {GL_ADD,0};
  PetscInt       off, len;
  PetscScalar    *x_ptr;
  PetscInt       *iptr, flag;
  PetscInt       start =0, end, work;
  PetscInt       op2[] = {GL_MIN,0};
  PCTFS_gs_ADT   PCTFS_gs_handle;
  PetscInt       *nsep, *lnsep, *fo;
  PetscInt       a_n            =xxt_handle->mvi->n;
  PetscInt       a_m            =xxt_handle->mvi->m;
  PetscInt       *a_local2global=xxt_handle->mvi->local2global;
  PetscInt       level;
  PetscInt       xxt_nnz=0, xxt_max_nnz=0;
  PetscInt       n, m;
  PetscInt       *col_sz, *col_indices, *stages;
  PetscScalar    **col_vals, *x;
  PetscInt       n_global;
  PetscInt       xxt_zero_nnz  =0;
  PetscInt       xxt_zero_nnz_0=0;
  PetscBLASInt   i1            = 1,dlen;
  PetscScalar    dm1           = -1.0;
  PetscErrorCode ierr;

  n               = xxt_handle->mvi->n;
  nsep            = xxt_handle->info->nsep;
  lnsep           = xxt_handle->info->lnsep;
  fo              = xxt_handle->info->fo;
  end             = lnsep[0];
  level           = xxt_handle->level;
  PCTFS_gs_handle = xxt_handle->mvi->PCTFS_gs_handle;

  /* is there a null space? */
  /* LATER add in ability to detect null space by checking alpha */
  for (i=0, j=0; i<=level; i++) j+=nsep[i];

  m = j-xxt_handle->ns;
  if (m!=j) {
    ierr = PetscPrintf(PETSC_COMM_WORLD,"xxt_generate() :: null space exists %D %D %D\n",m,j,xxt_handle->ns);CHKERRQ(ierr);
  }

  /* get and initialize storage for x local         */
  /* note that x local is nxm and stored by columns */
  col_sz      = (PetscInt*) malloc(m*sizeof(PetscInt));
  col_indices = (PetscInt*) malloc((2*m+1)*sizeof(PetscInt));
  col_vals    = (PetscScalar**) malloc(m*sizeof(PetscScalar*));
  for (i=j=0; i<m; i++, j+=2) {
    col_indices[j]=col_indices[j+1]=col_sz[i]=-1;
    col_vals[i]   = NULL;
  }
  col_indices[j]=-1;

  /* size of separators for each sub-hc working from bottom of tree to top */
  /* this looks like nsep[]=segments */
  stages = (PetscInt*) malloc((level+1)*sizeof(PetscInt));
  segs   = (PetscInt*) malloc((level+1)*sizeof(PetscInt));
  PCTFS_ivec_zero(stages,level+1);
  PCTFS_ivec_copy(segs,nsep,level+1);
  for (i=0; i<level; i++) segs[i+1] += segs[i];
  stages[0] = segs[0];

  /* temporary vectors  */
  u  = (PetscScalar*) malloc(n*sizeof(PetscScalar));
  z  = (PetscScalar*) malloc(n*sizeof(PetscScalar));
  v  = (PetscScalar*) malloc(a_m*sizeof(PetscScalar));
  uu = (PetscScalar*) malloc(m*sizeof(PetscScalar));
  w  = (PetscScalar*) malloc(m*sizeof(PetscScalar));

  /* extra nnz due to replication of vertices across separators */
  for (i=1, j=0; i<=level; i++) j+=nsep[i];

  /* storage for sparse x values */
  n_global    = xxt_handle->info->n_global;
  xxt_max_nnz = (PetscInt)(2.5*PetscPowReal(1.0*n_global,1.6667) + j*n/2)/PCTFS_num_nodes;
  x           = (PetscScalar*) malloc(xxt_max_nnz*sizeof(PetscScalar));
  xxt_nnz     = 0;

  /* LATER - can embed next sep to fire in gs */
  /* time to make the donuts - generate X factor */
  for (dim=i=j=0; i<m; i++) {
    /* time to move to the next level? */
    while (i==segs[dim]) {
      if (dim==level) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"dim about to exceed level\n");
      stages[dim++]=i;
      end         +=lnsep[dim];
    }
    stages[dim]=i;

    /* which column are we firing? */
    /* i.e. set v_l */
    /* use new seps and do global min across hc to determine which one to fire */
    (start<end) ? (col=fo[start]) : (col=INT_MAX);
    PCTFS_giop_hc(&col,&work,1,op2,dim);

    /* shouldn't need this */
    if (col==INT_MAX) {
      ierr = PetscInfo(0,"hey ... col==INT_MAX??\n");CHKERRQ(ierr);
      continue;
    }

    /* do I own it? I should */
    PCTFS_rvec_zero(v,a_m);
    if (col==fo[start]) {
      start++;
      idex=PCTFS_ivec_linear_search(col, a_local2global, a_n);
      if (idex!=-1) {
        v[idex] = 1.0; j++;
      } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"NOT FOUND!\n");
    } else {
      idex=PCTFS_ivec_linear_search(col, a_local2global, a_m);
      if (idex!=-1) v[idex] = 1.0;
    }

    /* perform u = A.v_l */
    PCTFS_rvec_zero(u,n);
    do_matvec(xxt_handle->mvi,v,u);

    /* uu =  X^T.u_l (local portion) */
    /* technically only need to zero out first i entries */
    /* later turn this into an XXT_solve call ? */
    PCTFS_rvec_zero(uu,m);
    x_ptr=x;
    iptr = col_indices;
    for (k=0; k<i; k++) {
      off   = *iptr++;
      len   = *iptr++;
      ierr  = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr);
      PetscStackCallBLAS("BLASdot",uu[k] = BLASdot_(&dlen,u+off,&i1,x_ptr,&i1));
      x_ptr+=len;
    }


    /* uu = X^T.u_l (comm portion) */
    PCTFS_ssgl_radd  (uu, w, dim, stages);

    /* z = X.uu */
    PCTFS_rvec_zero(z,n);
    x_ptr=x;
    iptr = col_indices;
    for (k=0; k<i; k++) {
      off  = *iptr++;
      len  = *iptr++;
      ierr = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr);
      PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dlen,&uu[k],x_ptr,&i1,z+off,&i1));
      x_ptr+=len;
    }

    /* compute v_l = v_l - z */
    PCTFS_rvec_zero(v+a_n,a_m-a_n);
    ierr = PetscBLASIntCast(n,&dlen);CHKERRQ(ierr);
    PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dlen,&dm1,z,&i1,v,&i1));

    /* compute u_l = A.v_l */
    if (a_n!=a_m) PCTFS_gs_gop_hc(PCTFS_gs_handle,v,"+\0",dim);
    PCTFS_rvec_zero(u,n);
    do_matvec(xxt_handle->mvi,v,u);

    /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - local portion */
    ierr  = PetscBLASIntCast(n,&dlen);CHKERRQ(ierr);
    PetscStackCallBLAS("BLASdot",alpha = BLASdot_(&dlen,u,&i1,v,&i1));
    /* compute sqrt(alpha) = sqrt(v_l^T.u_l) - comm portion */
    PCTFS_grop_hc(&alpha, &alpha_w, 1, op, dim);

    alpha = (PetscScalar) PetscSqrtReal((PetscReal)alpha);

    /* check for small alpha                             */
    /* LATER use this to detect and determine null space */
    if (PetscAbsScalar(alpha)<1.0e-14) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"bad alpha! %g\n",alpha);

    /* compute v_l = v_l/sqrt(alpha) */
    PCTFS_rvec_scale(v,1.0/alpha,n);

    /* add newly generated column, v_l, to X */
    flag = 1;
    off=len=0;
    for (k=0; k<n; k++) {
      if (v[k]!=0.0) {
        len=k;
        if (flag) { off=k; flag=0; }
      }
    }

    len -= (off-1);

    if (len>0) {
      if ((xxt_nnz+len)>xxt_max_nnz) {
        ierr         = PetscInfo(0,"increasing space for X by 2x!\n");CHKERRQ(ierr);
        xxt_max_nnz *= 2;
        x_ptr        = (PetscScalar*) malloc(xxt_max_nnz*sizeof(PetscScalar));
        PCTFS_rvec_copy(x_ptr,x,xxt_nnz);
        free(x);
        x     = x_ptr;
        x_ptr+=xxt_nnz;
      }
      xxt_nnz += len;
      PCTFS_rvec_copy(x_ptr,v+off,len);

      /* keep track of number of zeros */
      if (dim) {
        for (k=0; k<len; k++) {
          if (x_ptr[k]==0.0) xxt_zero_nnz++;
        }
      } else {
        for (k=0; k<len; k++) {
          if (x_ptr[k]==0.0) xxt_zero_nnz_0++;
        }
      }
      col_indices[2*i] = off;
      col_sz[i] = col_indices[2*i+1] = len;
      col_vals[i] = x_ptr;
    }
    else {
      col_indices[2*i] = 0;
      col_sz[i]        = col_indices[2*i+1] = 0;
      col_vals[i]      = x_ptr;
    }
  }

  /* close off stages for execution phase */
  while (dim!=level) {
    stages[dim++] = i;
    ierr          = PetscInfo2(0,"disconnected!!! dim(%D)!=level(%D)\n",dim,level);CHKERRQ(ierr);
  }
  stages[dim]=i;

  xxt_handle->info->n              = xxt_handle->mvi->n;
  xxt_handle->info->m              = m;
  xxt_handle->info->nnz            = xxt_nnz;
  xxt_handle->info->max_nnz        = xxt_max_nnz;
  xxt_handle->info->msg_buf_sz     = stages[level]-stages[0];
  xxt_handle->info->solve_uu       = (PetscScalar*) malloc(m*sizeof(PetscScalar));
  xxt_handle->info->solve_w        = (PetscScalar*) malloc(m*sizeof(PetscScalar));
  xxt_handle->info->x              = x;
  xxt_handle->info->col_vals       = col_vals;
  xxt_handle->info->col_sz         = col_sz;
  xxt_handle->info->col_indices    = col_indices;
  xxt_handle->info->stages         = stages;
  xxt_handle->info->nsolves        = 0;
  xxt_handle->info->tot_solve_time = 0.0;

  free(segs);
  free(u);
  free(v);
  free(uu);
  free(z);
  free(w);

  return(0);
}
Exemple #27
0
PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
{
#if defined(SLEPC_MISSING_LAPACK_TREVC)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
#else
  PetscErrorCode ierr;
  PetscInt       i;
  PetscBLASInt   mm=1,mout,info,ld,n,inc = 1;
  PetscScalar    tmp,done=1.0,zero=0.0;
  PetscReal      norm;
  PetscBool      iscomplex = PETSC_FALSE;
  PetscBLASInt   *select;
  PetscScalar    *A = ds->mat[DS_MAT_A];
  PetscScalar    *Q = ds->mat[DS_MAT_Q];
  PetscScalar    *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
  PetscScalar    *Y;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  ierr = DSAllocateWork_Private(ds,0,0,ld);CHKERRQ(ierr);
  select = ds->iwork;
  for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;

  /* Compute k-th eigenvector Y of A */
  Y = X+(*k)*ld;
  select[*k] = (PetscBLASInt)PETSC_TRUE;
#if !defined(PETSC_USE_COMPLEX)
  if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
  mm = iscomplex? 2: 1;
  if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
  ierr = DSAllocateWork_Private(ds,3*ld,0,0);CHKERRQ(ierr);
  PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
#else
  ierr = DSAllocateWork_Private(ds,2*ld,ld,0);CHKERRQ(ierr);
  PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
#endif
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info);
  if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");

  /* accumulate and normalize eigenvectors */
  if (ds->state>=DS_STATE_CONDENSED) {
    ierr = PetscMemcpy(ds->work,Y,mout*ld*sizeof(PetscScalar));CHKERRQ(ierr);
    PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
#if !defined(PETSC_USE_COMPLEX)
    if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
#endif
    norm = BLASnrm2_(&n,Y,&inc);
#if !defined(PETSC_USE_COMPLEX)
    if (iscomplex) {
      tmp = BLASnrm2_(&n,Y+ld,&inc);
      norm = SlepcAbsEigenvalue(norm,tmp);
    }
#endif
    tmp = 1.0 / norm;
    PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
#if !defined(PETSC_USE_COMPLEX)
    if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
#endif
  }

  /* set output arguments */
  if (iscomplex) (*k)++;
  if (rnorm) {
    if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
    else *rnorm = PetscAbsScalar(Y[n-1]);
  }
  PetscFunctionReturn(0);
#endif
}
Exemple #28
0
PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
{
#if defined(SLEPC_MISSING_LAPACK_TREXC)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
#else
  PetscErrorCode ierr;
  PetscScalar    re;
  PetscInt       i,j,pos,result;
  PetscBLASInt   ifst,ilst,info,n,ld;
  PetscScalar    *T = ds->mat[DS_MAT_A];
  PetscScalar    *Q = ds->mat[DS_MAT_Q];
#if !defined(PETSC_USE_COMPLEX)
  PetscScalar    *work,im;
#endif

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
#if !defined(PETSC_USE_COMPLEX)
  ierr = DSAllocateWork_Private(ds,ld,0,0);CHKERRQ(ierr);
  work = ds->work;
#endif
  /* selection sort */
  for (i=ds->l;i<n-1;i++) {
    re = wr[i];
#if !defined(PETSC_USE_COMPLEX)
    im = wi[i];
#endif
    pos = 0;
    j=i+1; /* j points to the next eigenvalue */
#if !defined(PETSC_USE_COMPLEX)
    if (im != 0) j=i+2;
#endif
    /* find minimum eigenvalue */
    for (;j<n;j++) {
#if !defined(PETSC_USE_COMPLEX)
      ierr = SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);CHKERRQ(ierr);
#else
      ierr = SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);CHKERRQ(ierr);
#endif
      if (result > 0) {
        re = wr[j];
#if !defined(PETSC_USE_COMPLEX)
        im = wi[j];
#endif
        pos = j;
      }
#if !defined(PETSC_USE_COMPLEX)
      if (wi[j] != 0) j++;
#endif
    }
    if (pos) {
      /* interchange blocks */
      ierr = PetscBLASIntCast(pos+1,&ifst);CHKERRQ(ierr);
      ierr = PetscBLASIntCast(i+1,&ilst);CHKERRQ(ierr);
#if !defined(PETSC_USE_COMPLEX)
      PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
#else
      PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
#endif
      if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREXC %d",info);
      /* recover original eigenvalues from T matrix */
      for (j=i;j<n;j++) {
        wr[j] = T[j+j*ld];
#if !defined(PETSC_USE_COMPLEX)
        if (j<n-1 && T[j+1+j*ld] != 0.0) {
          /* complex conjugate eigenvalue */
          wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
                  PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
          wr[j+1] = wr[j];
          wi[j+1] = -wi[j];
          j++;
        } else {
          wi[j] = 0.0;
        }
#endif
      }
    }
#if !defined(PETSC_USE_COMPLEX)
    if (wi[i] != 0) i++;
#endif
  }
  PetscFunctionReturn(0);
#endif
}
Exemple #29
0
PetscErrorCode DSFunction_EXP_NHEP_PADE(DS ds)
{
#if defined(PETSC_MISSING_LAPACK_GESV) || defined(SLEPC_MISSING_LAPACK_LANGE)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV/LANGE - Lapack routines are unavailable");
#else
  PetscErrorCode ierr;
  PetscBLASInt   n,ld,ld2,*ipiv,info,inc=1;
  PetscInt       j,k,odd;
  const PetscInt p=MAX_PADE;
  PetscReal      c[MAX_PADE+1],s;
  PetscScalar    scale,mone=-1.0,one=1.0,two=2.0,zero=0.0;
  PetscScalar    *A,*A2,*Q,*P,*W,*aux;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  ld2 = ld*ld;
  ierr = DSAllocateWork_Private(ds,0,ld,ld);CHKERRQ(ierr);
  ipiv = ds->iwork;
  if (!ds->mat[DS_MAT_W]) { ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr); }
  if (!ds->mat[DS_MAT_Z]) { ierr = DSAllocateMat_Private(ds,DS_MAT_Z);CHKERRQ(ierr); }
  A  = ds->mat[DS_MAT_A];
  A2 = ds->mat[DS_MAT_Z];
  Q  = ds->mat[DS_MAT_Q];
  P  = ds->mat[DS_MAT_F];
  W  = ds->mat[DS_MAT_W];

  /* Pade' coefficients */
  c[0] = 1.0;
  for (k=1;k<=p;k++) {
    c[k] = c[k-1]*(p+1-k)/(k*(2*p+1-k));
  }

  /* Scaling */
  s = LAPACKlange_("I",&n,&n,A,&ld,ds->rwork);
  if (s>0.5) {
    s = PetscMax(0,(int)(PetscLogReal(s)/PetscLogReal(2.0)) + 2);
    scale = PetscPowReal(2.0,(-1)*s);
    PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&scale,A,&inc));
  }

  /* Horner evaluation */
  PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,A,&ld,A,&ld,&zero,A2,&ld));
  ierr = PetscMemzero(Q,ld*ld*sizeof(PetscScalar));CHKERRQ(ierr);
  ierr = PetscMemzero(P,ld*ld*sizeof(PetscScalar));CHKERRQ(ierr);
  for (j=0;j<n;j++) {
    Q[j+j*ld] = c[p];
    P[j+j*ld] = c[p-1];
  }

  odd = 1;
  for (k=p-1;k>0;k--) {
    if (odd==1) {
      PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A2,&ld,&zero,W,&ld));
      aux = Q;
      Q = W;
      W = aux;
      for (j=0;j<n;j++)
        Q[j+j*ld] = Q[j+j*ld] + c[k-1];
    } else {
      PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A2,&ld,&zero,W,&ld));
      aux = P;
      P = W;
      W = aux;
      for (j=0;j<n;j++)
        P[j+j*ld] = P[j+j*ld] + c[k-1];
    }
    odd = 1-odd;
  }
  if (odd==1) {
    PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A,&ld,&zero,W,&ld));
    aux = Q;
    Q = W;
    W = aux;
    PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
    PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
    PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
    for (j=0;j<n;j++)
      P[j+j*ld] = P[j+j*ld] + 1.0;
    PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&mone,P,&inc));
  } else {
    PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A,&ld,&zero,W,&ld));
    aux = P;
    P = W;
    W = aux;
    PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc));
    PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info));
    PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc));
    for (j=0;j<n;j++)
      P[j+j*ld] = P[j+j*ld] + 1.0;
  }

  for (k=1;k<=s;k++) {
    PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,P,&ld,&zero,W,&ld));
    ierr = PetscMemcpy(P,W,ld2*sizeof(PetscScalar));CHKERRQ(ierr);
  }
  if (P!=ds->mat[DS_MAT_F]) {
    ierr = PetscMemcpy(ds->mat[DS_MAT_F],P,ld2*sizeof(PetscScalar));CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
#endif
}
Exemple #30
0
PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
{
#if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
  PetscFunctionBegin;
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
#else
  PetscErrorCode ierr;
  PetscScalar    *work,*tau;
  PetscInt       i,j;
  PetscBLASInt   ilo,lwork,info,n,ld;
  PetscScalar    *A = ds->mat[DS_MAT_A];
  PetscScalar    *Q = ds->mat[DS_MAT_Q];

  PetscFunctionBegin;
#if !defined(PETSC_USE_COMPLEX)
  PetscValidPointer(wi,3);
#endif
  ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(ds->l+1,&ilo);CHKERRQ(ierr);
  ierr = DSAllocateWork_Private(ds,ld+ld*ld,0,0);CHKERRQ(ierr);
  tau  = ds->work;
  work = ds->work+ld;
  lwork = ld*ld;

  /* initialize orthogonal matrix */
  ierr = PetscMemzero(Q,ld*ld*sizeof(PetscScalar));CHKERRQ(ierr);
  for (i=0;i<n;i++)
    Q[i+i*ld] = 1.0;
  if (n==1) { /* quick return */
    wr[0] = A[0];
    wi[0] = 0.0;
    PetscFunctionReturn(0);
  }

  /* reduce to upper Hessenberg form */
  if (ds->state<DS_STATE_INTERMEDIATE) {
    PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEHRD %d",info);
    for (j=0;j<n-1;j++) {
      for (i=j+2;i<n;i++) {
        Q[i+j*ld] = A[i+j*ld];
        A[i+j*ld] = 0.0;
      }
    }
    PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGHR %d",info);
  }

  /* compute the (real) Schur form */
#if !defined(PETSC_USE_COMPLEX)
  PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
  for (j=0;j<ds->l;j++) {
    if (j==n-1 || A[j+1+j*ld] == 0.0) {
      /* real eigenvalue */
      wr[j] = A[j+j*ld];
      wi[j] = 0.0;
    } else {
      /* complex eigenvalue */
      wr[j] = A[j+j*ld];
      wr[j+1] = A[j+j*ld];
      wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
              PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
      wi[j+1] = -wi[j];
      j++;
    }
  }
#else
  PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
  if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
#endif
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xHSEQR %d",info);
  PetscFunctionReturn(0);
#endif
}