Exemple #1
0
PetscErrorCode SNESMonitorJacUpdateSpectrum(SNES snes,PetscInt it,PetscReal fnorm,void *ctx)
{
#if defined(PETSC_MISSING_LAPACK_GEEV)
  SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"GEEV - Lapack routine is unavailable\nNot able to provide eigen values.");
#elif defined(PETSC_HAVE_ESSL)
  SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"GEEV - No support for ESSL Lapack Routines");
#else
  Vec            X;
  Mat            J,dJ,dJdense;
  PetscErrorCode ierr;
  PetscErrorCode (*func)(SNES,Vec,Mat*,Mat*,MatStructure*,void*);
  PetscInt       n,i;
  PetscBLASInt   nb,lwork;
  PetscReal      *eigr,*eigi;
  MatStructure   flg = DIFFERENT_NONZERO_PATTERN;
  PetscScalar    *work;
  PetscScalar    *a;

  PetscFunctionBegin;
  if (it == 0) PetscFunctionReturn(0);
  /* create the difference between the current update and the current jacobian */
  ierr = SNESGetSolution(snes,&X);CHKERRQ(ierr);
  ierr = SNESGetJacobian(snes,&J,NULL,&func,NULL);CHKERRQ(ierr);
  ierr = MatDuplicate(J,MAT_COPY_VALUES,&dJ);CHKERRQ(ierr);
  ierr = SNESComputeJacobian(snes,X,&dJ,&dJ,&flg);CHKERRQ(ierr);
  ierr = MatAXPY(dJ,-1.0,J,SAME_NONZERO_PATTERN);CHKERRQ(ierr);

  /* compute the spectrum directly */
  ierr  = MatConvert(dJ,MATSEQDENSE,MAT_INITIAL_MATRIX,&dJdense);CHKERRQ(ierr);
  ierr  = MatGetSize(dJ,&n,NULL);CHKERRQ(ierr);
  ierr  = PetscBLASIntCast(n,&nb);CHKERRQ(ierr);
  lwork = 3*nb;
  ierr  = PetscMalloc(n*sizeof(PetscReal),&eigr);CHKERRQ(ierr);
  ierr  = PetscMalloc(n*sizeof(PetscReal),&eigi);CHKERRQ(ierr);
  ierr  = PetscMalloc(lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
  ierr  = MatDenseGetArray(dJdense,&a);CHKERRQ(ierr);
#if !defined(PETSC_USE_COMPLEX)
  {
    PetscBLASInt lierr;
    ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
    PetscStackCall("LAPACKgeev",LAPACKgeev_("N","N",&nb,a,&nb,eigr,eigi,NULL,&nb,NULL,&nb,work,&lwork,&lierr));
    if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"geev() error %d",lierr);
    ierr = PetscFPTrapPop();CHKERRQ(ierr);
  }
#else
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for complex");
#endif
  PetscPrintf(PetscObjectComm((PetscObject)snes),"Eigenvalues of J_%d - J_%d:\n",it,it-1);CHKERRQ(ierr);
  for (i=0;i<n;i++) {
    PetscPrintf(PetscObjectComm((PetscObject)snes),"%5d: %20.5g + %20.5gi\n",i,eigr[i],eigi[i]);CHKERRQ(ierr);
  }
  ierr = MatDenseRestoreArray(dJdense,&a);CHKERRQ(ierr);
  ierr = MatDestroy(&dJ);CHKERRQ(ierr);
  ierr = MatDestroy(&dJdense);CHKERRQ(ierr);
  ierr = PetscFree(eigr);CHKERRQ(ierr);
  ierr = PetscFree(eigi);CHKERRQ(ierr);
  ierr = PetscFree(work);CHKERRQ(ierr);
  PetscFunctionReturn(0);
#endif
}
Exemple #2
0
/*@
   PetscDTGaussQuadrature - create Gauss quadrature

   Not Collective

   Input Arguments:
+  npoints - number of points
.  a - left end of interval (often-1)
-  b - right end of interval (often +1)

   Output Arguments:
+  x - quadrature points
-  w - quadrature weights

   Level: intermediate

   References:
   Golub and Welsch, Calculation of Quadrature Rules, Math. Comp. 23(106), 221--230, 1969.

.seealso: PetscDTLegendreEval()
@*/
PetscErrorCode PetscDTGaussQuadrature(PetscInt npoints,PetscReal a,PetscReal b,PetscReal *x,PetscReal *w)
{
  PetscErrorCode ierr;
  PetscInt       i;
  PetscReal      *work;
  PetscScalar    *Z;
  PetscBLASInt   N,LDZ,info;

  PetscFunctionBegin;
  /* Set up the Golub-Welsch system */
  for (i=0; i<npoints; i++) {
    x[i] = 0;                   /* diagonal is 0 */
    if (i) w[i-1] = 0.5 / PetscSqrtReal(1 - 1./PetscSqr(2*i));
  }
  ierr = PetscRealView(npoints-1,w,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
  ierr = PetscMalloc2(npoints*npoints,PetscScalar,&Z,PetscMax(1,2*npoints-2),PetscReal,&work);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(npoints,&N);CHKERRQ(ierr);
  LDZ  = N;
  ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
  PetscStackCall("LAPACKsteqr",LAPACKsteqr_("I",&N,x,w,Z,&LDZ,work,&info));
  ierr = PetscFPTrapPop();CHKERRQ(ierr);
  if (info) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"xSTEQR error");

  for (i=0; i<(npoints+1)/2; i++) {
    PetscReal y = 0.5 * (-x[i] + x[npoints-i-1]); /* enforces symmetry */
    x[i]           = (a+b)/2 - y*(b-a)/2;
    x[npoints-i-1] = (a+b)/2 + y*(b-a)/2;

    w[i] = w[npoints-1-i] = (b-a)*PetscSqr(0.5*PetscAbsScalar(Z[i*npoints] + Z[(npoints-i-1)*npoints]));
  }
  ierr = PetscFree2(Z,work);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #3
0
/**************************************xyt.c***********************************/
static PetscErrorCode do_xyt_solve(xyt_ADT xyt_handle,  PetscScalar *uc)
{
  PetscInt       off, len, *iptr;
  PetscInt       level        =xyt_handle->level;
  PetscInt       n            =xyt_handle->info->n;
  PetscInt       m            =xyt_handle->info->m;
  PetscInt       *stages      =xyt_handle->info->stages;
  PetscInt       *xcol_indices=xyt_handle->info->xcol_indices;
  PetscInt       *ycol_indices=xyt_handle->info->ycol_indices;
  PetscScalar    *x_ptr, *y_ptr, *uu_ptr;
  PetscScalar    *solve_uu=xyt_handle->info->solve_uu;
  PetscScalar    *solve_w =xyt_handle->info->solve_w;
  PetscScalar    *x       =xyt_handle->info->x;
  PetscScalar    *y       =xyt_handle->info->y;
  PetscBLASInt   i1       = 1,dlen;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  uu_ptr=solve_uu;
  PCTFS_rvec_zero(uu_ptr,m);

  /* x  = X.Y^T.b */
  /* uu = Y^T.b */
  for (y_ptr=y,iptr=ycol_indices; *iptr!=-1; y_ptr+=len)
  {
    off       =*iptr++;
    len       =*iptr++;
    ierr      = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr);
    PetscStackCall("BLASdot",*uu_ptr++ = BLASdot_(&dlen,uc+off,&i1,y_ptr,&i1));
  }

  /* comunication of beta */
  uu_ptr=solve_uu;
  if (level) PCTFS_ssgl_radd(uu_ptr, solve_w, level, stages);
  PCTFS_rvec_zero(uc,n);

  /* x = X.uu */
  for (x_ptr=x,iptr=xcol_indices; *iptr!=-1; x_ptr+=len) {
    off  =*iptr++;
    len  =*iptr++;
    ierr = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr);
    PetscStackCall("BLASaxpy",BLASaxpy_(&dlen,uu_ptr++,x_ptr,&i1,uc+off,&i1));
  }
  PetscFunctionReturn(0);
}
Exemple #4
0
PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
{
  Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->spptr;
  PetscInt         M   = A->rmap->N,N=A->cmap->N;

  PetscFunctionBegin;
  /* Initialize the SuperLU process grid. */
  PetscStackCall("SuperLU_DIST:superlu_gridinit",superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid));

  /* Initialize ScalePermstruct and LUstruct. */
  PetscStackCall("SuperLU_DIST:ScalePermstructInit",ScalePermstructInit(M, N, &lu->ScalePermstruct));
  PetscStackCall("SuperLU_DIST:LUstructInit",LUstructInit(M, N, &lu->LUstruct));
  F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
  F->ops->solve           = MatSolve_SuperLU_DIST;
  F->ops->matsolve        = MatMatSolve_SuperLU_DIST;
  lu->CleanUpSuperLU_Dist = PETSC_TRUE;
  PetscFunctionReturn(0);
}
Exemple #5
0
PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha)
{
  Mat_MPIDense   *A     = (Mat_MPIDense*)inA->data;
  Mat_SeqDense   *a     = (Mat_SeqDense*)A->A->data;
  PetscScalar    oalpha = alpha;
  PetscErrorCode ierr;
  PetscBLASInt   one = 1,nz;

  PetscFunctionBegin;
  ierr = PetscBLASIntCast(inA->rmap->n*inA->cmap->N,&nz);CHKERRQ(ierr);
  PetscStackCall("BLASscal",BLASscal_(&nz,&oalpha,a->v,&one));
  ierr = PetscLogFlops(nz);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Exemple #6
0
PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
{
  PetscErrorCode   ierr;
  Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
  PetscBool        flg;

  PetscFunctionBegin;
  if (lu && lu->CleanUpSuperLU_Dist) {
    /* Deallocate SuperLU_DIST storage */
    if (lu->MatInputMode == GLOBAL) {
      PetscStackCall("SuperLU_DIST:Destroy_CompCol_Matrix_dist",Destroy_CompCol_Matrix_dist(&lu->A_sup));
    } else {
      PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup));
      if (lu->options.SolveInitialized) {
#if defined(PETSC_USE_COMPLEX)
        PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct));
#else
        PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct));
#endif
      }
    }
    PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct));
    PetscStackCall("SuperLU_DIST:ScalePermstructFree",ScalePermstructFree(&lu->ScalePermstruct));
    PetscStackCall("SuperLU_DIST:LUstructFree",LUstructFree(&lu->LUstruct));

    /* Release the SuperLU_DIST process grid. */
    PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&lu->grid));
    ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
  }
  ierr = PetscFree(A->spptr);CHKERRQ(ierr);

  ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&flg);CHKERRQ(ierr);
  if (flg) {
    ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
  } else {
    ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Exemple #7
0
PetscErrorCode SNESNGMRESFormCombinedSolution_Private(SNES snes,PetscInt l,Vec XM,Vec FM,PetscReal fMnorm,Vec X,Vec XA,Vec FA)
{
  SNES_NGMRES    *ngmres = (SNES_NGMRES*) snes->data;
  PetscInt       i,j;
  Vec            *Fdot      = ngmres->Fdot;
  Vec            *Xdot      = ngmres->Xdot;
  PetscScalar    *beta      = ngmres->beta;
  PetscScalar    *xi        = ngmres->xi;
  PetscScalar    alph_total = 0.;
  PetscErrorCode ierr;
  PetscReal      nu;
  Vec            Y = snes->work[2];
  PetscBool      changed_y,changed_w;

  PetscFunctionBegin;
  nu = fMnorm*fMnorm;

  /* construct the right hand side and xi factors */
  ierr = VecMDot(FM,l,Fdot,xi);CHKERRQ(ierr);
  for (i = 0; i < l; i++) beta[i] = nu - xi[i];

  /* construct h */
  for (j = 0; j < l; j++) {
    for (i = 0; i < l; i++) {
      H(i,j) = Q(i,j)-xi[i]-xi[j]+nu;
    }
  }
  if (l == 1) {
    /* simply set alpha[0] = beta[0] / H[0, 0] */
    if (H(0,0) != 0.) beta[0] = beta[0]/H(0,0);
    else beta[0] = 0.;
  } else {
#if defined(PETSC_MISSING_LAPACK_GELSS)
    SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"NGMRES with LS requires the LAPACK GELSS routine.");
#else
    ierr          = PetscBLASIntCast(l,&ngmres->m);CHKERRQ(ierr);
    ierr          = PetscBLASIntCast(l,&ngmres->n);CHKERRQ(ierr);
    ngmres->info  = 0;
    ngmres->rcond = -1.;
    ierr          = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
#if defined(PETSC_USE_COMPLEX)
    PetscStackCall("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,ngmres->rwork,&ngmres->info));
#else
    PetscStackCall("LAPACKgelss",LAPACKgelss_(&ngmres->m,&ngmres->n,&ngmres->nrhs,ngmres->h,&ngmres->lda,ngmres->beta,&ngmres->ldb,ngmres->s,&ngmres->rcond,&ngmres->rank,ngmres->work,&ngmres->lwork,&ngmres->info));
#endif
    ierr = PetscFPTrapPop();CHKERRQ(ierr);
    if (ngmres->info < 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"Bad argument to GELSS");
    if (ngmres->info > 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD failed to converge");
#endif
  }
  for (i=0; i<l; i++) {
    if (PetscIsInfOrNanScalar(beta[i])) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD generated inconsistent output");
  }
  alph_total = 0.;
  for (i = 0; i < l; i++) alph_total += beta[i];

  ierr = VecCopy(XM,XA);CHKERRQ(ierr);
  ierr = VecScale(XA,1.-alph_total);CHKERRQ(ierr);
  ierr = VecMAXPY(XA,l,beta,Xdot);CHKERRQ(ierr);
  /* check the validity of the step */
  ierr = VecCopy(XA,Y);CHKERRQ(ierr);
  ierr = VecAXPY(Y,-1.0,X);CHKERRQ(ierr);
  ierr = SNESLineSearchPostCheck(snes->linesearch,X,Y,XA,&changed_y,&changed_w);CHKERRQ(ierr);
  if (!ngmres->approxfunc) {ierr = SNESComputeFunction(snes,XA,FA);CHKERRQ(ierr);}
  else {
    ierr = VecCopy(FM,FA);CHKERRQ(ierr);
    ierr = VecScale(FA,1.-alph_total);CHKERRQ(ierr);
    ierr = VecMAXPY(FA,l,beta,Fdot);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
/* approximately solve the overdetermined system:

 2*F(x_i)\cdot F(\x_j)\alpha_i = 0
 \alpha_i                      = 1

 Which minimizes the L2 norm of the linearization of:
 ||F(\sum_i \alpha_i*x_i)||^2

 With the constraint that \sum_i\alpha_i = 1
 Where x_i is the solution from the ith subsolver.
 */
static PetscErrorCode SNESCompositeApply_AdditiveOptimal(SNES snes,Vec X,Vec B,Vec F,PetscReal *fnorm)
{
  PetscErrorCode      ierr;
  SNES_Composite      *jac = (SNES_Composite*)snes->data;
  SNES_CompositeLink  next = jac->head;
  Vec                 *Xes = jac->Xes,*Fes = jac->Fes;
  PetscInt            i,j;
  PetscScalar         tot,total,ftf;
  PetscReal           min_fnorm;
  PetscInt            min_i;
  SNESConvergedReason reason;

  PetscFunctionBegin;
  if (!next) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_ARG_WRONGSTATE,"No composite SNESes supplied via SNESCompositeAddSNES() or -snes_composite_sneses");

  if (snes->normschedule == SNES_NORM_ALWAYS) {
    next = jac->head;
    ierr = SNESSetInitialFunction(next->snes,F);CHKERRQ(ierr);
    while (next->next) {
      next = next->next;
      ierr = SNESSetInitialFunction(next->snes,F);CHKERRQ(ierr);
    }
  }

  next = jac->head;
  i = 0;
  ierr = VecCopy(X,Xes[i]);CHKERRQ(ierr);
  ierr = SNESSolve(next->snes,B,Xes[i]);CHKERRQ(ierr);
  ierr = SNESGetConvergedReason(next->snes,&reason);CHKERRQ(ierr);
  if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) {
    jac->innerFailures++;
    if (jac->innerFailures >= snes->maxFailures) {
      snes->reason = SNES_DIVERGED_INNER;
      PetscFunctionReturn(0);
    }
  }
  while (next->next) {
    i++;
    next = next->next;
    ierr = VecCopy(X,Xes[i]);CHKERRQ(ierr);
    ierr = SNESSolve(next->snes,B,Xes[i]);CHKERRQ(ierr);
    ierr = SNESGetConvergedReason(next->snes,&reason);CHKERRQ(ierr);
    if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) {
      jac->innerFailures++;
      if (jac->innerFailures >= snes->maxFailures) {
        snes->reason = SNES_DIVERGED_INNER;
        PetscFunctionReturn(0);
      }
    }
  }

  /* all the solutions are collected; combine optimally */
  for (i=0;i<jac->n;i++) {
    for (j=0;j<i+1;j++) {
      ierr = VecDotBegin(Fes[i],Fes[j],&jac->h[i + j*jac->n]);CHKERRQ(ierr);
    }
    ierr = VecDotBegin(Fes[i],F,&jac->g[i]);CHKERRQ(ierr);
  }

  for (i=0;i<jac->n;i++) {
    for (j=0;j<i+1;j++) {
      ierr = VecDotEnd(Fes[i],Fes[j],&jac->h[i + j*jac->n]);CHKERRQ(ierr);
      if (i == j) jac->fnorms[i] = PetscSqrtReal(PetscRealPart(jac->h[i + j*jac->n]));
    }
    ierr = VecDotEnd(Fes[i],F,&jac->g[i]);CHKERRQ(ierr);
  }

  ftf = (*fnorm)*(*fnorm);

  for (i=0; i<jac->n; i++) {
    for (j=i+1;j<jac->n;j++) {
      jac->h[i + j*jac->n] = jac->h[j + i*jac->n];
    }
  }

  for (i=0; i<jac->n; i++) {
    for (j=0; j<jac->n; j++) {
      jac->h[i + j*jac->n] = jac->h[i + j*jac->n] - jac->g[j] - jac->g[i] + ftf;
    }
    jac->beta[i] = ftf - jac->g[i];
  }

#if defined(PETSC_MISSING_LAPACK_GELSS)
  SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_SUP,"SNESCOMPOSITE with ADDITIVEOPTIMAL requires the LAPACK GELSS routine.");
#else
  jac->info  = 0;
  jac->rcond = -1.;
  ierr          = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
#if defined(PETSC_USE_COMPLEX)
  PetscStackCall("LAPACKgelss",LAPACKgelss_(&jac->n,&jac->n,&jac->nrhs,jac->h,&jac->lda,jac->beta,&jac->lda,jac->s,&jac->rcond,&jac->rank,jac->work,&jac->lwork,jac->rwork,&jac->info));
#else
  PetscStackCall("LAPACKgelss",LAPACKgelss_(&jac->n,&jac->n,&jac->nrhs,jac->h,&jac->lda,jac->beta,&jac->lda,jac->s,&jac->rcond,&jac->rank,jac->work,&jac->lwork,&jac->info));
#endif
  ierr = PetscFPTrapPop();CHKERRQ(ierr);
  if (jac->info < 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"Bad argument to GELSS");
  if (jac->info > 0) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD failed to converge");
#endif
  tot = 0.;
  total = 0.;
  for (i=0; i<jac->n; i++) {
    if (snes->errorifnotconverged && PetscIsInfOrNanScalar(jac->beta[i])) SETERRQ(PetscObjectComm((PetscObject)snes),PETSC_ERR_LIB,"SVD generated inconsistent output");
    ierr = PetscInfo2(snes,"%D: %g\n",i,(double)PetscRealPart(jac->beta[i]));CHKERRQ(ierr);
    tot += jac->beta[i];
    total += PetscAbsScalar(jac->beta[i]);
  }
  ierr = VecScale(X,(1. - tot));CHKERRQ(ierr);
  ierr = VecMAXPY(X,jac->n,jac->beta,Xes);CHKERRQ(ierr);
  ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr);

  if (snes->xl && snes->xu) {
    ierr = SNESVIComputeInactiveSetFnorm(snes, F, X, fnorm);CHKERRQ(ierr);
  } else {
    ierr = VecNorm(F, NORM_2, fnorm);CHKERRQ(ierr);
  }

  /* take the minimum-normed candidate if it beats the combination by a factor of rtol or the combination has stagnated */
  min_fnorm = jac->fnorms[0];
  min_i     = 0;
  for (i=0; i<jac->n; i++) {
    if (jac->fnorms[i] < min_fnorm) {
      min_fnorm = jac->fnorms[i];
      min_i     = i;
    }
  }

  /* stagnation or divergence restart to the solution of the solver that failed the least */
  if (PetscRealPart(total) < jac->stol || min_fnorm*jac->rtol < *fnorm) {
    ierr = VecCopy(jac->Xes[min_i],X);CHKERRQ(ierr);
    ierr = VecCopy(jac->Fes[min_i],F);CHKERRQ(ierr);
    *fnorm = min_fnorm;
  }
  PetscFunctionReturn(0);
}
Exemple #9
0
/**************************************xyt.c***********************************/
static PetscInt xyt_generate(xyt_ADT xyt_handle)
{
  PetscInt       i,j,k,idx;
  PetscInt       dim, col;
  PetscScalar    *u, *uu, *v, *z, *w, alpha, alpha_w;
  PetscInt       *segs;
  PetscInt       op[] = {GL_ADD,0};
  PetscInt       off, len;
  PetscScalar    *x_ptr, *y_ptr;
  PetscInt       *iptr, flag;
  PetscInt       start =0, end, work;
  PetscInt       op2[] = {GL_MIN,0};
  PCTFS_gs_ADT   PCTFS_gs_handle;
  PetscInt       *nsep, *lnsep, *fo;
  PetscInt       a_n            =xyt_handle->mvi->n;
  PetscInt       a_m            =xyt_handle->mvi->m;
  PetscInt       *a_local2global=xyt_handle->mvi->local2global;
  PetscInt       level;
  PetscInt       n, m;
  PetscInt       *xcol_sz, *xcol_indices, *stages;
  PetscScalar    **xcol_vals, *x;
  PetscInt       *ycol_sz, *ycol_indices;
  PetscScalar    **ycol_vals, *y;
  PetscInt       n_global;
  PetscInt       xt_nnz       =0, xt_max_nnz=0;
  PetscInt       yt_nnz       =0, yt_max_nnz=0;
  PetscInt       xt_zero_nnz  =0;
  PetscInt       xt_zero_nnz_0=0;
  PetscInt       yt_zero_nnz  =0;
  PetscInt       yt_zero_nnz_0=0;
  PetscBLASInt   i1           = 1,dlen;
  PetscScalar    dm1          = -1.0;
  PetscErrorCode ierr;

  n              =xyt_handle->mvi->n;
  nsep           =xyt_handle->info->nsep;
  lnsep          =xyt_handle->info->lnsep;
  fo             =xyt_handle->info->fo;
  end            =lnsep[0];
  level          =xyt_handle->level;
  PCTFS_gs_handle=xyt_handle->mvi->PCTFS_gs_handle;

  /* is there a null space? */
  /* LATER add in ability to detect null space by checking alpha */
  for (i=0, j=0; i<=level; i++) j+=nsep[i];

  m = j-xyt_handle->ns;
  if (m!=j) {
    ierr = PetscPrintf(PETSC_COMM_WORLD,"xyt_generate() :: null space exists %D %D %D\n",m,j,xyt_handle->ns);CHKERRQ(ierr);
  }

  ierr = PetscInfo2(0,"xyt_generate() :: X(%D,%D)\n",n,m);CHKERRQ(ierr);

  /* get and initialize storage for x local         */
  /* note that x local is nxm and stored by columns */
  xcol_sz      = (PetscInt*) malloc(m*sizeof(PetscInt));
  xcol_indices = (PetscInt*) malloc((2*m+1)*sizeof(PetscInt));
  xcol_vals    = (PetscScalar**) malloc(m*sizeof(PetscScalar*));
  for (i=j=0; i<m; i++, j+=2) {
    xcol_indices[j]=xcol_indices[j+1]=xcol_sz[i]=-1;
    xcol_vals[i]   = NULL;
  }
  xcol_indices[j]=-1;

  /* get and initialize storage for y local         */
  /* note that y local is nxm and stored by columns */
  ycol_sz      = (PetscInt*) malloc(m*sizeof(PetscInt));
  ycol_indices = (PetscInt*) malloc((2*m+1)*sizeof(PetscInt));
  ycol_vals    = (PetscScalar**) malloc(m*sizeof(PetscScalar*));
  for (i=j=0; i<m; i++, j+=2) {
    ycol_indices[j]=ycol_indices[j+1]=ycol_sz[i]=-1;
    ycol_vals[i]   = NULL;
  }
  ycol_indices[j]=-1;

  /* size of separators for each sub-hc working from bottom of tree to top */
  /* this looks like nsep[]=segments */
  stages = (PetscInt*) malloc((level+1)*sizeof(PetscInt));
  segs   = (PetscInt*) malloc((level+1)*sizeof(PetscInt));
  PCTFS_ivec_zero(stages,level+1);
  PCTFS_ivec_copy(segs,nsep,level+1);
  for (i=0; i<level; i++) segs[i+1] += segs[i];
  stages[0] = segs[0];

  /* temporary vectors  */
  u  = (PetscScalar*) malloc(n*sizeof(PetscScalar));
  z  = (PetscScalar*) malloc(n*sizeof(PetscScalar));
  v  = (PetscScalar*) malloc(a_m*sizeof(PetscScalar));
  uu = (PetscScalar*) malloc(m*sizeof(PetscScalar));
  w  = (PetscScalar*) malloc(m*sizeof(PetscScalar));

  /* extra nnz due to replication of vertices across separators */
  for (i=1, j=0; i<=level; i++) j+=nsep[i];

  /* storage for sparse x values */
  n_global   = xyt_handle->info->n_global;
  xt_max_nnz = yt_max_nnz = (PetscInt)(2.5*PetscPowReal(1.0*n_global,1.6667) + j*n/2)/PCTFS_num_nodes;
  x          = (PetscScalar*) malloc(xt_max_nnz*sizeof(PetscScalar));
  y          = (PetscScalar*) malloc(yt_max_nnz*sizeof(PetscScalar));

  /* LATER - can embed next sep to fire in gs */
  /* time to make the donuts - generate X factor */
  for (dim=i=j=0; i<m; i++) {
    /* time to move to the next level? */
    while (i==segs[dim]) {
      if (dim==level) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"dim about to exceed level\n");
      stages[dim++]=i;
      end         +=lnsep[dim];
    }
    stages[dim]=i;

    /* which column are we firing? */
    /* i.e. set v_l */
    /* use new seps and do global min across hc to determine which one to fire */
    (start<end) ? (col=fo[start]) : (col=INT_MAX);
    PCTFS_giop_hc(&col,&work,1,op2,dim);

    /* shouldn't need this */
    if (col==INT_MAX) {
      ierr = PetscInfo(0,"hey ... col==INT_MAX??\n");CHKERRQ(ierr);
      continue;
    }

    /* do I own it? I should */
    PCTFS_rvec_zero(v,a_m);
    if (col==fo[start]) {
      start++;
      idx=PCTFS_ivec_linear_search(col, a_local2global, a_n);
      if (idx!=-1) {
        v[idx] = 1.0;
        j++;
      } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"NOT FOUND!\n");
    } else {
      idx=PCTFS_ivec_linear_search(col, a_local2global, a_m);
      if (idx!=-1) v[idx] = 1.0;
    }

    /* perform u = A.v_l */
    PCTFS_rvec_zero(u,n);
    do_matvec(xyt_handle->mvi,v,u);

    /* uu =  X^T.u_l (local portion) */
    /* technically only need to zero out first i entries */
    /* later turn this into an XYT_solve call ? */
    PCTFS_rvec_zero(uu,m);
    y_ptr=y;
    iptr = ycol_indices;
    for (k=0; k<i; k++) {
      off   = *iptr++;
      len   = *iptr++;
      ierr  = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr);
      PetscStackCall("BLASdot",uu[k] = BLASdot_(&dlen,u+off,&i1,y_ptr,&i1));
      y_ptr+=len;
    }

    /* uu = X^T.u_l (comm portion) */
    PCTFS_ssgl_radd  (uu, w, dim, stages);

    /* z = X.uu */
    PCTFS_rvec_zero(z,n);
    x_ptr=x;
    iptr = xcol_indices;
    for (k=0; k<i; k++) {
      off  = *iptr++;
      len  = *iptr++;
      ierr = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr);
      PetscStackCall("BLASaxpy",BLASaxpy_(&dlen,&uu[k],x_ptr,&i1,z+off,&i1));
      x_ptr+=len;
    }

    /* compute v_l = v_l - z */
    PCTFS_rvec_zero(v+a_n,a_m-a_n);
    ierr = PetscBLASIntCast(n,&dlen);CHKERRQ(ierr);
    PetscStackCall("BLASaxpy",BLASaxpy_(&dlen,&dm1,z,&i1,v,&i1));

    /* compute u_l = A.v_l */
    if (a_n!=a_m) PCTFS_gs_gop_hc(PCTFS_gs_handle,v,"+\0",dim);
    PCTFS_rvec_zero(u,n);
    do_matvec(xyt_handle->mvi,v,u);

    /* compute sqrt(alpha) = sqrt(u_l^T.u_l) - local portion */
    ierr  = PetscBLASIntCast(n,&dlen);CHKERRQ(ierr);
    PetscStackCall("BLASdot",alpha = BLASdot_(&dlen,u,&i1,u,&i1));
    /* compute sqrt(alpha) = sqrt(u_l^T.u_l) - comm portion */
    PCTFS_grop_hc(&alpha, &alpha_w, 1, op, dim);

    alpha = (PetscScalar) PetscSqrtReal((PetscReal)alpha);

    /* check for small alpha                             */
    /* LATER use this to detect and determine null space */
    if (fabs(alpha)<1.0e-14) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"bad alpha! %g\n",alpha);

    /* compute v_l = v_l/sqrt(alpha) */
    PCTFS_rvec_scale(v,1.0/alpha,n);
    PCTFS_rvec_scale(u,1.0/alpha,n);

    /* add newly generated column, v_l, to X */
    flag = 1;
    off  =len=0;
    for (k=0; k<n; k++) {
      if (v[k]!=0.0) {
        len=k;
        if (flag) {off=k; flag=0;}
      }
    }

    len -= (off-1);

    if (len>0) {
      if ((xt_nnz+len)>xt_max_nnz) {
        ierr        = PetscInfo(0,"increasing space for X by 2x!\n");CHKERRQ(ierr);
        xt_max_nnz *= 2;
        x_ptr       = (PetscScalar*) malloc(xt_max_nnz*sizeof(PetscScalar));
        PCTFS_rvec_copy(x_ptr,x,xt_nnz);
        free(x);
        x     = x_ptr;
        x_ptr+=xt_nnz;
      }
      xt_nnz += len;
      PCTFS_rvec_copy(x_ptr,v+off,len);

      /* keep track of number of zeros */
      if (dim) {
        for (k=0; k<len; k++) {
          if (x_ptr[k]==0.0) xt_zero_nnz++;
        }
      } else {
        for (k=0; k<len; k++) {
          if (x_ptr[k]==0.0) xt_zero_nnz_0++;
        }
      }
      xcol_indices[2*i] = off;
      xcol_sz[i]        = xcol_indices[2*i+1] = len;
      xcol_vals[i]      = x_ptr;
    } else {
      xcol_indices[2*i] = 0;
      xcol_sz[i]        = xcol_indices[2*i+1] = 0;
      xcol_vals[i]      = x_ptr;
    }


    /* add newly generated column, u_l, to Y */
    flag = 1;
    off  =len=0;
    for (k=0; k<n; k++) {
      if (u[k]!=0.0) {
        len=k;
        if (flag) { off=k; flag=0; }
      }
    }

    len -= (off-1);

    if (len>0) {
      if ((yt_nnz+len)>yt_max_nnz) {
        ierr        = PetscInfo(0,"increasing space for Y by 2x!\n");CHKERRQ(ierr);
        yt_max_nnz *= 2;
        y_ptr       = (PetscScalar*) malloc(yt_max_nnz*sizeof(PetscScalar));
        PCTFS_rvec_copy(y_ptr,y,yt_nnz);
        free(y);
        y     = y_ptr;
        y_ptr+=yt_nnz;
      }
      yt_nnz += len;
      PCTFS_rvec_copy(y_ptr,u+off,len);

      /* keep track of number of zeros */
      if (dim) {
        for (k=0; k<len; k++) {
          if (y_ptr[k]==0.0) yt_zero_nnz++;
        }
      } else {
        for (k=0; k<len; k++) {
          if (y_ptr[k]==0.0) yt_zero_nnz_0++;
        }
      }
      ycol_indices[2*i] = off;
      ycol_sz[i]        = ycol_indices[2*i+1] = len;
      ycol_vals[i]      = y_ptr;
    } else {
      ycol_indices[2*i] = 0;
      ycol_sz[i]        = ycol_indices[2*i+1] = 0;
      ycol_vals[i]      = y_ptr;
    }
  }

  /* close off stages for execution phase */
  while (dim!=level) {
    stages[dim++]=i;
    ierr         = PetscInfo2(0,"disconnected!!! dim(%D)!=level(%D)\n",dim,level);CHKERRQ(ierr);
  }
  stages[dim]=i;

  xyt_handle->info->n           =xyt_handle->mvi->n;
  xyt_handle->info->m           =m;
  xyt_handle->info->nnz         =xt_nnz + yt_nnz;
  xyt_handle->info->max_nnz     =xt_max_nnz + yt_max_nnz;
  xyt_handle->info->msg_buf_sz  =stages[level]-stages[0];
  xyt_handle->info->solve_uu    = (PetscScalar*) malloc(m*sizeof(PetscScalar));
  xyt_handle->info->solve_w     = (PetscScalar*) malloc(m*sizeof(PetscScalar));
  xyt_handle->info->x           =x;
  xyt_handle->info->xcol_vals   =xcol_vals;
  xyt_handle->info->xcol_sz     =xcol_sz;
  xyt_handle->info->xcol_indices=xcol_indices;
  xyt_handle->info->stages      =stages;
  xyt_handle->info->y           =y;
  xyt_handle->info->ycol_vals   =ycol_vals;
  xyt_handle->info->ycol_sz     =ycol_sz;
  xyt_handle->info->ycol_indices=ycol_indices;

  free(segs);
  free(u);
  free(v);
  free(uu);
  free(z);
  free(w);

  return(0);
}
Exemple #10
0
static PetscErrorCode  KSPSolve_BCGSL(KSP ksp)
{
  KSP_BCGSL      *bcgsl = (KSP_BCGSL*) ksp->data;
  PetscScalar    alpha, beta, omega, sigma;
  PetscScalar    rho0, rho1;
  PetscReal      kappa0, kappaA, kappa1;
  PetscReal      ghat;
  PetscReal      zeta, zeta0, rnmax_computed, rnmax_true, nrm0;
  PetscBool      bUpdateX;
  PetscInt       maxit;
  PetscInt       h, i, j, k, vi, ell;
  PetscBLASInt   ldMZ,bierr;
  PetscScalar    utb;
  PetscReal      max_s, pinv_tol;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  /* set up temporary vectors */
  vi         = 0;
  ell        = bcgsl->ell;
  bcgsl->vB  = ksp->work[vi]; vi++;
  bcgsl->vRt = ksp->work[vi]; vi++;
  bcgsl->vTm = ksp->work[vi]; vi++;
  bcgsl->vvR = ksp->work+vi; vi += ell+1;
  bcgsl->vvU = ksp->work+vi; vi += ell+1;
  bcgsl->vXr = ksp->work[vi]; vi++;
  ierr       = PetscBLASIntCast(ell+1,&ldMZ);CHKERRQ(ierr);

  /* Prime the iterative solver */
  ierr           = KSPInitialResidual(ksp, VX, VTM, VB, VVR[0], ksp->vec_rhs);CHKERRQ(ierr);
  ierr           = VecNorm(VVR[0], NORM_2, &zeta0);CHKERRQ(ierr);
  rnmax_computed = zeta0;
  rnmax_true     = zeta0;

  ierr = (*ksp->converged)(ksp, 0, zeta0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
  if (ksp->reason) {
    ierr       = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
    ksp->its   = 0;
    ksp->rnorm = zeta0;
    ierr       = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ierr  = VecSet(VVU[0],0.0);CHKERRQ(ierr);
  alpha = 0.;
  rho0  = omega = 1;

  if (bcgsl->delta>0.0) {
    ierr = VecCopy(VX, VXR);CHKERRQ(ierr);
    ierr = VecSet(VX,0.0);CHKERRQ(ierr);
    ierr = VecCopy(VVR[0], VB);CHKERRQ(ierr);
  } else {
    ierr = VecCopy(ksp->vec_rhs, VB);CHKERRQ(ierr);
  }

  /* Life goes on */
  ierr = VecCopy(VVR[0], VRT);CHKERRQ(ierr);
  zeta = zeta0;

  ierr = KSPGetTolerances(ksp, NULL, NULL, NULL, &maxit);CHKERRQ(ierr);

  for (k=0; k<maxit; k += bcgsl->ell) {
    ksp->its   = k;
    ksp->rnorm = zeta;

    ierr = KSPLogResidualHistory(ksp, zeta);CHKERRQ(ierr);
    ierr = KSPMonitor(ksp, ksp->its, zeta);CHKERRQ(ierr);

    ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
    if (ksp->reason < 0) PetscFunctionReturn(0);
    else if (ksp->reason) break;

    /* BiCG part */
    rho0 = -omega*rho0;
    nrm0 = zeta;
    for (j=0; j<bcgsl->ell; j++) {
      /* rho1 <- r_j' * r_tilde */
      ierr = VecDot(VVR[j], VRT, &rho1);CHKERRQ(ierr);
      if (rho1 == 0.0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
        PetscFunctionReturn(0);
      }
      beta = alpha*(rho1/rho0);
      rho0 = rho1;
      for (i=0; i<=j; i++) {
        /* u_i <- r_i - beta*u_i */
        ierr = VecAYPX(VVU[i], -beta, VVR[i]);CHKERRQ(ierr);
      }
      /* u_{j+1} <- inv(K)*A*u_j */
      ierr = KSP_PCApplyBAorAB(ksp, VVU[j], VVU[j+1], VTM);CHKERRQ(ierr);

      ierr = VecDot(VVU[j+1], VRT, &sigma);CHKERRQ(ierr);
      if (sigma == 0.0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN_BICG;
        PetscFunctionReturn(0);
      }
      alpha = rho1/sigma;

      /* x <- x + alpha*u_0 */
      ierr = VecAXPY(VX, alpha, VVU[0]);CHKERRQ(ierr);

      for (i=0; i<=j; i++) {
        /* r_i <- r_i - alpha*u_{i+1} */
        ierr = VecAXPY(VVR[i], -alpha, VVU[i+1]);CHKERRQ(ierr);
      }

      /* r_{j+1} <- inv(K)*A*r_j */
      ierr = KSP_PCApplyBAorAB(ksp, VVR[j], VVR[j+1], VTM);CHKERRQ(ierr);

      ierr = VecNorm(VVR[0], NORM_2, &nrm0);CHKERRQ(ierr);
      if (bcgsl->delta>0.0) {
        if (rnmax_computed<nrm0) rnmax_computed = nrm0;
        if (rnmax_true<nrm0) rnmax_true = nrm0;
      }

      /* NEW: check for early exit */
      ierr = (*ksp->converged)(ksp, k+j, nrm0, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
      if (ksp->reason) {
        ierr = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);

        ksp->its   = k+j;
        ksp->rnorm = nrm0;

        ierr = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
        if (ksp->reason < 0) PetscFunctionReturn(0);
      }
    }

    /* Polynomial part */
    for (i = 0; i <= bcgsl->ell; ++i) {
      ierr = VecMDot(VVR[i], i+1, VVR, &MZa[i*ldMZ]);CHKERRQ(ierr);
    }
    /* Symmetrize MZa */
    for (i = 0; i <= bcgsl->ell; ++i) {
      for (j = i+1; j <= bcgsl->ell; ++j) {
        MZa[i*ldMZ+j] = MZa[j*ldMZ+i] = PetscConj(MZa[j*ldMZ+i]);
      }
    }
    /* Copy MZa to MZb */
    ierr = PetscMemcpy(MZb,MZa,ldMZ*ldMZ*sizeof(PetscScalar));CHKERRQ(ierr);

    if (!bcgsl->bConvex || bcgsl->ell==1) {
      PetscBLASInt ione = 1,bell;
      ierr = PetscBLASIntCast(bcgsl->ell,&bell);CHKERRQ(ierr);

      AY0c[0] = -1;
      if (bcgsl->pinv) {
#if defined(PETSC_MISSING_LAPACK_GESVD)
        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable.");
#else
#  if defined(PETSC_USE_COMPLEX)
        PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,bcgsl->realwork,&bierr));
#  else
        PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&bell,&bell,&MZa[1+ldMZ],&ldMZ,bcgsl->s,bcgsl->u,&bell,bcgsl->v,&bell,bcgsl->work,&bcgsl->lwork,&bierr));
#  endif
#endif
        if (bierr!=0) {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          PetscFunctionReturn(0);
        }
        /* Apply pseudo-inverse */
        max_s = bcgsl->s[0];
        for (i=1; i<bell; i++) {
          if (bcgsl->s[i] > max_s) {
            max_s = bcgsl->s[i];
          }
        }
        /* tolerance is hardwired to bell*max(s)*PETSC_MACHINE_EPSILON */
        pinv_tol = bell*max_s*PETSC_MACHINE_EPSILON;
        ierr = PetscMemzero(&AY0c[1],bell*sizeof(PetscScalar));CHKERRQ(ierr);
        for (i=0; i<bell; i++) {
          if (bcgsl->s[i] >= pinv_tol) {
            utb=0.;
            for (j=0; j<bell; j++) {
              utb += MZb[1+j]*bcgsl->u[i*bell+j];
            }

            for (j=0; j<bell; j++) {
              AY0c[1+j] += utb/bcgsl->s[i]*bcgsl->v[j*bell+i];
            }
          }
        }
      } else {
#if defined(PETSC_MISSING_LAPACK_POTRF)
        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
#else
        PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &bell, &MZa[1+ldMZ], &ldMZ, &bierr));
#endif
        if (bierr!=0) {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          PetscFunctionReturn(0);
        }
        ierr = PetscMemcpy(&AY0c[1],&MZb[1],bcgsl->ell*sizeof(PetscScalar));CHKERRQ(ierr);
        PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &bell, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
      }
    } else {
      PetscBLASInt ione = 1;
      PetscScalar  aone = 1.0, azero = 0.0;
      PetscBLASInt neqs;
      ierr = PetscBLASIntCast(bcgsl->ell-1,&neqs);CHKERRQ(ierr);

#if defined(PETSC_MISSING_LAPACK_POTRF)
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable.");
#else
      PetscStackCall("LAPACKpotrf",LAPACKpotrf_("Lower", &neqs, &MZa[1+ldMZ], &ldMZ, &bierr));
#endif
      if (bierr!=0) {
        ksp->reason = KSP_DIVERGED_BREAKDOWN;
        PetscFunctionReturn(0);
      }
      ierr = PetscMemcpy(&AY0c[1],&MZb[1],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr);
      PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AY0c[1], &ldMZ, &bierr));
      AY0c[0]          = -1;
      AY0c[bcgsl->ell] = 0.;

      ierr = PetscMemcpy(&AYlc[1],&MZb[1+ldMZ*(bcgsl->ell)],(bcgsl->ell-1)*sizeof(PetscScalar));CHKERRQ(ierr);
      PetscStackCall("LAPACKpotrs",LAPACKpotrs_("Lower", &neqs, &ione, &MZa[1+ldMZ], &ldMZ, &AYlc[1], &ldMZ, &bierr));

      AYlc[0]          = 0.;
      AYlc[bcgsl->ell] = -1;

      PetscStackCall("BLASgemv",BLASgemv_("NoTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AY0c, &ione, &azero, AYtc, &ione));

      kappa0 = PetscRealPart(BLASdot_(&ldMZ, AY0c, &ione, AYtc, &ione));

      /* round-off can cause negative kappa's */
      if (kappa0<0) kappa0 = -kappa0;
      kappa0 = PetscSqrtReal(kappa0);

      kappaA = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

      PetscStackCall("BLASgemv",BLASgemv_("noTr", &ldMZ, &ldMZ, &aone, MZb, &ldMZ, AYlc, &ione, &azero, AYtc, &ione));

      kappa1 = PetscRealPart(BLASdot_(&ldMZ, AYlc, &ione, AYtc, &ione));

      if (kappa1<0) kappa1 = -kappa1;
      kappa1 = PetscSqrtReal(kappa1);

      if (kappa0!=0.0 && kappa1!=0.0) {
        if (kappaA<0.7*kappa0*kappa1) {
          ghat = (kappaA<0.0) ?  -0.7*kappa0/kappa1 : 0.7*kappa0/kappa1;
        } else {
          ghat = kappaA/(kappa1*kappa1);
        }
        for (i=0; i<=bcgsl->ell; i++) {
          AY0c[i] = AY0c[i] - ghat* AYlc[i];
        }
      }
    }

    omega = AY0c[bcgsl->ell];
    for (h=bcgsl->ell; h>0 && omega==0.0; h--) omega = AY0c[h];
    if (omega==0.0) {
      ksp->reason = KSP_DIVERGED_BREAKDOWN;
      PetscFunctionReturn(0);
    }


    ierr = VecMAXPY(VX, bcgsl->ell,AY0c+1, VVR);CHKERRQ(ierr);
    for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
    ierr = VecMAXPY(VVU[0], bcgsl->ell,AY0c+1, VVU+1);CHKERRQ(ierr);
    ierr = VecMAXPY(VVR[0], bcgsl->ell,AY0c+1, VVR+1);CHKERRQ(ierr);
    for (i=1; i<=bcgsl->ell; i++) AY0c[i] *= -1.0;
    ierr = VecNorm(VVR[0], NORM_2, &zeta);CHKERRQ(ierr);

    /* Accurate Update */
    if (bcgsl->delta>0.0) {
      if (rnmax_computed<zeta) rnmax_computed = zeta;
      if (rnmax_true<zeta) rnmax_true = zeta;

      bUpdateX = (PetscBool) (zeta<bcgsl->delta*zeta0 && zeta0<=rnmax_computed);
      if ((zeta<bcgsl->delta*rnmax_true && zeta0<=rnmax_true) || bUpdateX) {
        /* r0 <- b-inv(K)*A*X */
        ierr       = KSP_PCApplyBAorAB(ksp, VX, VVR[0], VTM);CHKERRQ(ierr);
        ierr       = VecAYPX(VVR[0], -1.0, VB);CHKERRQ(ierr);
        rnmax_true = zeta;

        if (bUpdateX) {
          ierr           = VecAXPY(VXR,1.0,VX);CHKERRQ(ierr);
          ierr           = VecSet(VX,0.0);CHKERRQ(ierr);
          ierr           = VecCopy(VVR[0], VB);CHKERRQ(ierr);
          rnmax_computed = zeta;
        }
      }
    }
  }
  if (bcgsl->delta>0.0) {
    ierr = VecAXPY(VX,1.0,VXR);CHKERRQ(ierr);
  }

  ierr = (*ksp->converged)(ksp, k, zeta, &ksp->reason, ksp->cnvP);CHKERRQ(ierr);
  if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
  PetscFunctionReturn(0);
}
Exemple #11
0
static PetscErrorCode PCSetUp_SVD(PC pc)
{
#if defined(PETSC_MISSING_LAPACK_GESVD)
  SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable\nNot able to provide singular value estimates.");
#else
  PC_SVD         *jac = (PC_SVD*)pc->data;
  PetscErrorCode ierr;
  PetscScalar    *a,*u,*v,*d,*work;
  PetscBLASInt   nb,lwork;
  PetscInt       i,n;
  PetscMPIInt    size;

  PetscFunctionBegin;
  ierr = MatDestroy(&jac->A);CHKERRQ(ierr);
  ierr = MPI_Comm_size(((PetscObject)pc->pmat)->comm,&size);CHKERRQ(ierr);
  if (size > 1) {
    Mat      redmat;
    PetscInt M;
    ierr = MatGetSize(pc->pmat,&M,NULL);CHKERRQ(ierr);
    ierr = MatGetRedundantMatrix(pc->pmat,size,PETSC_COMM_SELF,M,MAT_INITIAL_MATRIX,&redmat);CHKERRQ(ierr);
    ierr = MatConvert(redmat,MATSEQDENSE,MAT_INITIAL_MATRIX,&jac->A);CHKERRQ(ierr);
    ierr = MatDestroy(&redmat);CHKERRQ(ierr);
  } else {
    ierr = MatConvert(pc->pmat,MATSEQDENSE,MAT_INITIAL_MATRIX,&jac->A);CHKERRQ(ierr);
  }
  if (!jac->diag) {    /* assume square matrices */
    ierr = MatGetVecs(jac->A,&jac->diag,&jac->work);CHKERRQ(ierr);
  }
  if (!jac->U) {
    ierr = MatDuplicate(jac->A,MAT_DO_NOT_COPY_VALUES,&jac->U);CHKERRQ(ierr);
    ierr = MatDuplicate(jac->A,MAT_DO_NOT_COPY_VALUES,&jac->Vt);CHKERRQ(ierr);
  }
  ierr  = MatGetSize(pc->pmat,&n,NULL);CHKERRQ(ierr);
  ierr  = PetscBLASIntCast(n,&nb);CHKERRQ(ierr);
  lwork = 5*nb;
  ierr  = PetscMalloc(lwork*sizeof(PetscScalar),&work);CHKERRQ(ierr);
  ierr  = MatDenseGetArray(jac->A,&a);CHKERRQ(ierr);
  ierr  = MatDenseGetArray(jac->U,&u);CHKERRQ(ierr);
  ierr  = MatDenseGetArray(jac->Vt,&v);CHKERRQ(ierr);
  ierr  = VecGetArray(jac->diag,&d);CHKERRQ(ierr);
#if !defined(PETSC_USE_COMPLEX)
  {
    PetscBLASInt lierr;
    ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
    PetscStackCall("LAPACKgesvd",LAPACKgesvd_("A","A",&nb,&nb,a,&nb,d,u,&nb,v,&nb,work,&lwork,&lierr));
    if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"gesv() error %d",lierr);
    ierr = PetscFPTrapPop();CHKERRQ(ierr);
  }
#else
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for complex");
#endif
  ierr = MatDenseRestoreArray(jac->A,&a);CHKERRQ(ierr);
  ierr = MatDenseRestoreArray(jac->U,&u);CHKERRQ(ierr);
  ierr = MatDenseRestoreArray(jac->Vt,&v);CHKERRQ(ierr);
  for (i=n-1; i>=0; i--) if (PetscRealPart(d[i]) > jac->zerosing) break;
  jac->nzero = n-1-i;
  if (jac->monitor) {
    ierr = PetscViewerASCIIAddTab(jac->monitor,((PetscObject)pc)->tablevel);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(jac->monitor,"    SVD: condition number %14.12e, %D of %D singular values are (nearly) zero\n",(double)PetscRealPart(d[0]/d[n-1]),jac->nzero,n);CHKERRQ(ierr);
    if (n >= 10) {              /* print 5 smallest and 5 largest */
      ierr = PetscViewerASCIIPrintf(jac->monitor,"    SVD: smallest singular values: %14.12e %14.12e %14.12e %14.12e %14.12e\n",(double)PetscRealPart(d[n-1]),(double)PetscRealPart(d[n-2]),(double)PetscRealPart(d[n-3]),(double)PetscRealPart(d[n-4]),(double)PetscRealPart(d[n-5]));CHKERRQ(ierr);
      ierr = PetscViewerASCIIPrintf(jac->monitor,"    SVD: largest singular values : %14.12e %14.12e %14.12e %14.12e %14.12e\n",(double)PetscRealPart(d[4]),(double)PetscRealPart(d[3]),(double)PetscRealPart(d[2]),(double)PetscRealPart(d[1]),(double)PetscRealPart(d[0]));CHKERRQ(ierr);
    } else {                    /* print all singular values */
      char     buf[256],*p;
      size_t   left = sizeof(buf),used;
      PetscInt thisline;
      for (p=buf,i=n-1,thisline=1; i>=0; i--,thisline++) {
        ierr  = PetscSNPrintfCount(p,left," %14.12e",&used,(double)PetscRealPart(d[i]));CHKERRQ(ierr);
        left -= used;
        p    += used;
        if (thisline > 4 || i==0) {
          ierr     = PetscViewerASCIIPrintf(jac->monitor,"    SVD: singular values:%s\n",buf);CHKERRQ(ierr);
          p        = buf;
          thisline = 0;
        }
      }
    }
    ierr = PetscViewerASCIISubtractTab(jac->monitor,((PetscObject)pc)->tablevel);CHKERRQ(ierr);
  }
  ierr = PetscInfo2(pc,"Largest and smallest singular values %14.12e %14.12e\n",(double)PetscRealPart(d[0]),(double)PetscRealPart(d[n-1]));CHKERRQ(ierr);
  for (i=0; i<n-jac->nzero; i++) d[i] = 1.0/d[i];
  for (; i<n; i++) d[i] = 0.0;
  if (jac->essrank > 0) for (i=0; i<n-jac->nzero-jac->essrank; i++) d[i] = 0.0; /* Skip all but essrank eigenvalues */
  ierr = PetscInfo1(pc,"Number of zero or nearly singular values %D\n",jac->nzero);CHKERRQ(ierr);
  ierr = VecRestoreArray(jac->diag,&d);CHKERRQ(ierr);
#if defined(foo)
  {
    PetscViewer viewer;
    ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,"joe",FILE_MODE_WRITE,&viewer);CHKERRQ(ierr);
    ierr = MatView(jac->A,viewer);CHKERRQ(ierr);
    ierr = MatView(jac->U,viewer);CHKERRQ(ierr);
    ierr = MatView(jac->Vt,viewer);CHKERRQ(ierr);
    ierr = VecView(jac->diag,viewer);CHKERRQ(ierr);
    ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr);
  }
#endif
  ierr = PetscFree(work);CHKERRQ(ierr);
  PetscFunctionReturn(0);
#endif
}
Exemple #12
0
PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
{
  Mat              *tseq,A_seq = NULL;
  Mat_SeqAIJ       *aa,*bb;
  Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
  PetscErrorCode   ierr;
  PetscInt         M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
                   m=A->rmap->n, colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
  int              sinfo;   /* SuperLU_Dist info flag is always an int even with long long indices */
  PetscMPIInt      size;
  SuperLUStat_t    stat;
  double           *berr=0;
  IS               isrow;
  Mat              F_diag=NULL;
#if defined(PETSC_USE_COMPLEX)
  doublecomplex    *av, *bv;
#else
  double           *av, *bv;
#endif

  PetscFunctionBegin;
  ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);

  if (lu->MatInputMode == GLOBAL) { /* global mat input */
    if (size > 1) { /* convert mpi A to seq mat A */
      ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
      ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
      ierr = ISDestroy(&isrow);CHKERRQ(ierr);

      A_seq = *tseq;
      ierr  = PetscFree(tseq);CHKERRQ(ierr);
      aa    = (Mat_SeqAIJ*)A_seq->data;
    } else {
      PetscBool flg;
      ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
      if (flg) {
        Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data;
        A = At->A;
      }
      aa =  (Mat_SeqAIJ*)A->data;
    }

    /* Convert Petsc NR matrix to SuperLU_DIST NC.
       Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
    if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
      PetscStackCall("SuperLU_DIST:Destroy_CompCol_Matrix_dist",Destroy_CompCol_Matrix_dist(&lu->A_sup));
      if (lu->FactPattern == SamePattern_SameRowPerm) {
        lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
      } else { /* lu->FactPattern == SamePattern */
        PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(N, &lu->grid, &lu->LUstruct));
        lu->options.Fact = SamePattern;
      }
    }
#if defined(PETSC_USE_COMPLEX)
    PetscStackCall("SuperLU_DIST:zCompRow_to_CompCol_dist",zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,(int_t*)aa->j,(int_t*)aa->i,&lu->val,&lu->col, &lu->row));
#else
    PetscStackCall("SuperLU_DIST:dCompRow_to_CompCol_dist",dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,(int_t*)aa->j,(int_t*)aa->i,&lu->val, &lu->col, &lu->row));
#endif

    /* Create compressed column matrix A_sup. */
#if defined(PETSC_USE_COMPLEX)
    PetscStackCall("SuperLU_DIST:zCreate_CompCol_Matrix_dist",zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE));
#else
    PetscStackCall("SuperLU_DIST:dCreate_CompCol_Matrix_dist",dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE));
#endif
  } else { /* distributed mat input */
    Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
    aa=(Mat_SeqAIJ*)(mat->A)->data;
    bb=(Mat_SeqAIJ*)(mat->B)->data;
    ai=aa->i; aj=aa->j;
    bi=bb->i; bj=bb->j;
#if defined(PETSC_USE_COMPLEX)
    av=(doublecomplex*)aa->a;
    bv=(doublecomplex*)bb->a;
#else
    av=aa->a;
    bv=bb->a;
#endif
    rstart = A->rmap->rstart;
    nz     = aa->nz + bb->nz;
    garray = mat->garray;

    if (lu->options.Fact == DOFACT) { /* first numeric factorization */
#if defined(PETSC_USE_COMPLEX)
      PetscStackCall("SuperLU_DIST:zallocateA_dist",zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row));
#else
      PetscStackCall("SuperLU_DIST:dallocateA_dist",dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row));
#endif
    } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
      /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup); */ /* this leads to crash! However, see SuperLU_DIST_2.5/EXAMPLE/pzdrive2.c */
      if (lu->FactPattern == SamePattern_SameRowPerm) {
        lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
      } else {
        PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(N, &lu->grid, &lu->LUstruct)); /* Deallocate storage associated with the L and U matrices. */
        lu->options.Fact = SamePattern;
      }
    }
    nz = 0;
    for (i=0; i<m; i++) {
      lu->row[i] = nz;
      countA     = ai[i+1] - ai[i];
      countB     = bi[i+1] - bi[i];
      ajj        = aj + ai[i]; /* ptr to the beginning of this row */
      bjj        = bj + bi[i];

      /* B part, smaller col index */
      colA_start = rstart + ajj[0]; /* the smallest global col index of A */
      jB         = 0;
      for (j=0; j<countB; j++) {
        jcol = garray[bjj[j]];
        if (jcol > colA_start) {
          jB = j;
          break;
        }
        lu->col[nz]   = jcol;
        lu->val[nz++] = *bv++;
        if (j==countB-1) jB = countB;
      }

      /* A part */
      for (j=0; j<countA; j++) {
        lu->col[nz]   = rstart + ajj[j];
        lu->val[nz++] = *av++;
      }

      /* B part, larger col index */
      for (j=jB; j<countB; j++) {
        lu->col[nz]   = garray[bjj[j]];
        lu->val[nz++] = *bv++;
      }
    }
    lu->row[m] = nz;
#if defined(PETSC_USE_COMPLEX)
    PetscStackCall("SuperLU_DIST:zCreate_CompRowLoc_Matrix_dist",zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE));
#else
    PetscStackCall("SuperLU_DIST:dCreate_CompRowLoc_Matrix_dist",dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE));
#endif
  }

  /* Factor the matrix. */
  PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat));   /* Initialize the statistics variables. */
  if (lu->MatInputMode == GLOBAL) { /* global mat input */
#if defined(PETSC_USE_COMPLEX)
    PetscStackCall("SuperLU_DIST:pzgssvx_ABglobal",pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo));
#else
    PetscStackCall("SuperLU_DIST:pdgssvx_ABglobal",pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,&lu->grid, &lu->LUstruct, berr, &stat, &sinfo));
#endif
  } else { /* distributed mat input */
#if defined(PETSC_USE_COMPLEX)
    PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
    if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
#else
    PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,&lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo));
    if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
#endif
  }

  if (lu->MatInputMode == GLOBAL && size > 1) {
    ierr = MatDestroy(&A_seq);CHKERRQ(ierr);
  }

  if (lu->options.PrintStat) {
    PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
  }
  PStatFree(&stat);
  if (size > 1) {
    F_diag            = ((Mat_MPIAIJ*)(F)->data)->A;
    F_diag->assembled = PETSC_TRUE;
  }
  (F)->assembled    = PETSC_TRUE;
  (F)->preallocated = PETSC_TRUE;
  lu->options.Fact  = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
  PetscFunctionReturn(0);
}
Exemple #13
0
PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X)
{
  Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
  PetscErrorCode   ierr;
  PetscMPIInt      size;
  PetscInt         M=A->rmap->N,m=A->rmap->n,nrhs;
  SuperLUStat_t    stat;
  double           berr[1];
  PetscScalar      *bptr;
  int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
  PetscBool        flg;

  PetscFunctionBegin;
  if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
  ierr = PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
  if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
  ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
  if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");

  ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
  if (size > 1 && lu->MatInputMode == GLOBAL) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatInputMode=GLOBAL for nproc %d>1 is not supported",size);
  /* size==1 or distributed mat input */
  if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) {
    /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
       thus destroy it and create a new SOLVEstruct.
       Otherwise it may result in memory corruption or incorrect solution
       See src/mat/examples/tests/ex125.c */
#if defined(PETSC_USE_COMPLEX)
    PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct));
#else
    PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct));
#endif
    lu->options.SolveInitialized = NO;
  }
  ierr = MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);

  ierr = MatGetSize(B_mpi,NULL,&nrhs);CHKERRQ(ierr);

  PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat));        /* Initialize the statistics variables. */
  ierr = MatDenseGetArray(X,&bptr);CHKERRQ(ierr);
  if (lu->MatInputMode == GLOBAL) { /* size == 1 */
#if defined(PETSC_USE_COMPLEX)
    PetscStackCall("SuperLU_DIST:pzgssvx_ABglobal",pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, M, nrhs,&lu->grid, &lu->LUstruct, berr, &stat, &info));
#else
    PetscStackCall("SuperLU_DIST:pdgssvx_ABglobal",pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, M, nrhs, &lu->grid, &lu->LUstruct, berr, &stat, &info));
#endif
  } else { /* distributed mat input */
#if defined(PETSC_USE_COMPLEX)
    PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid, &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info));
#else
    PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info));
#endif
  }
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
  ierr = MatDenseRestoreArray(X,&bptr);CHKERRQ(ierr);

  if (lu->options.PrintStat) PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
  PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat));
  lu->matsolve_iscalled    = PETSC_FALSE;
  lu->matmatsolve_iscalled = PETSC_TRUE;
  PetscFunctionReturn(0);
}
Exemple #14
0
PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
{
  Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
  PetscErrorCode   ierr;
  PetscMPIInt      size;
  PetscInt         m=A->rmap->n,M=A->rmap->N,N=A->cmap->N;
  SuperLUStat_t    stat;
  double           berr[1];
  PetscScalar      *bptr;
  PetscInt         nrhs=1;
  Vec              x_seq;
  IS               iden;
  VecScatter       scat;
  int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
  static PetscBool cite = PETSC_FALSE;

  PetscFunctionBegin;
  ierr = PetscCitationsRegister("@article{lidemmel03,\n  author = {Xiaoye S. Li and James W. Demmel},\n  title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n           Solver for Unsymmetric Linear Systems},\n  journal = {ACM Trans. Mathematical Software},\n  volume = {29},\n  number = {2},\n  pages = {110-140},\n  year = 2003\n}\n",&cite);CHKERRQ(ierr);
  ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
  if (size > 1 && lu->MatInputMode == GLOBAL) {
    /* global mat input, convert b to x_seq */
    ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
    ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
    ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
    ierr = ISDestroy(&iden);CHKERRQ(ierr);

    ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
    ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
    ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
  } else { /* size==1 || distributed mat input */
    if (lu->options.SolveInitialized && !lu->matsolve_iscalled) {
      /* see comments in MatMatSolve() */
#if defined(PETSC_USE_COMPLEX)
      PetscStackCall("SuperLU_DIST:zSolveFinalize",zSolveFinalize(&lu->options, &lu->SOLVEstruct));
#else
      PetscStackCall("SuperLU_DIST:dSolveFinalize",dSolveFinalize(&lu->options, &lu->SOLVEstruct));
#endif
      lu->options.SolveInitialized = NO;
    }
    ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
    ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
  }

  if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");

  PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat));        /* Initialize the statistics variables. */
  if (lu->MatInputMode == GLOBAL) {
#if defined(PETSC_USE_COMPLEX)
    PetscStackCall("SuperLU_DIST:pzgssvx_ABglobal",pzgssvx_ABglobal(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,M,nrhs,&lu->grid,&lu->LUstruct,berr,&stat,&info));
#else
    PetscStackCall("SuperLU_DIST:pdgssvx_ABglobal",pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr,M,nrhs,&lu->grid,&lu->LUstruct,berr,&stat,&info));
#endif
  } else { /* distributed mat input */
#if defined(PETSC_USE_COMPLEX)
    PetscStackCall("SuperLU_DIST:pzgssvx",pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info));
#else
    PetscStackCall("SuperLU_DIST:pdgssvx",pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info));
#endif
  }
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);

  if (lu->options.PrintStat) PStatPrint(&lu->options, &stat, &lu->grid);      /* Print the statistics. */
  PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat));

  if (size > 1 && lu->MatInputMode == GLOBAL) {
    /* convert seq x to mpi x */
    ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
    ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
    ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
    ierr = VecScatterDestroy(&scat);CHKERRQ(ierr);
    ierr = VecDestroy(&x_seq);CHKERRQ(ierr);
  } else {
    ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);

    lu->matsolve_iscalled    = PETSC_TRUE;
    lu->matmatsolve_iscalled = PETSC_FALSE;
  }
  PetscFunctionReturn(0);
}
int main(int argc, char **args)
{
  Mat            A, L;
  AppCtx         ctx;
  PetscViewer    viewer;
  PetscErrorCode ierr;

  ierr = PetscInitialize(&argc, &args, (char *) 0, help);CHKERRQ(ierr);
  ierr = ProcessOptions(&ctx);CHKERRQ(ierr);
  /* Load matrix */
  ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD, ctx.matFilename, FILE_MODE_READ, &viewer);CHKERRQ(ierr);
  ierr = MatCreate(PETSC_COMM_WORLD, &A);CHKERRQ(ierr);
  ierr = MatLoad(A, viewer);CHKERRQ(ierr);
  ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr);
  /* Make graph Laplacian from matrix */
  ierr = MatLaplacian(A, 1.0e-12, &L);CHKERRQ(ierr);
  /* Check Laplacian */
  PetscReal norm;
  Vec       x, y;

  ierr = MatGetVecs(L, &x, NULL);CHKERRQ(ierr);
  ierr = VecDuplicate(x, &y);CHKERRQ(ierr);
  ierr = VecSet(x, 1.0);CHKERRQ(ierr);
  ierr = MatMult(L, x, y);CHKERRQ(ierr);
  ierr = VecNorm(y, NORM_INFINITY, &norm);CHKERRQ(ierr);
  if (norm > 1.0e-10) SETERRQ(PetscObjectComm((PetscObject) y), PETSC_ERR_PLIB, "Invalid graph Laplacian");
  ierr = VecDestroy(&x);CHKERRQ(ierr);
  ierr = VecDestroy(&y);CHKERRQ(ierr);
  /* Compute Fiedler vector, and perhaps more vectors */
  Mat          LD;
  PetscScalar *a, *realpart, *imagpart, *eigvec, *work, sdummy;
  PetscBLASInt bn, bN, lwork, lierr, idummy;
  PetscInt     n, i;

  ierr = MatConvert(L, MATDENSE, MAT_INITIAL_MATRIX, &LD);CHKERRQ(ierr);
  ierr = MatGetLocalSize(LD, &n, NULL);CHKERRQ(ierr);
  ierr = MatDenseGetArray(LD, &a);CHKERRQ(ierr);

  ierr = PetscBLASIntCast(n, &bn);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(n, &bN);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(5*n,&lwork);CHKERRQ(ierr);
  ierr = PetscBLASIntCast(1,&idummy);CHKERRQ(ierr);
  ierr = PetscMalloc4(n,PetscScalar,&realpart,n,PetscScalar,&imagpart,n*n,PetscScalar,&eigvec,lwork,PetscScalar,&work);CHKERRQ(ierr);
  ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr);
  PetscStackCall("LAPACKgeev", LAPACKgeev_("N","V",&bn,a,&bN,realpart,imagpart,&sdummy,&idummy,eigvec,&bN,work,&lwork,&lierr));
  if (lierr) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in LAPACK routine %d", (int) lierr);
  ierr = PetscFPTrapPop();CHKERRQ(ierr);
  PetscReal *r, *c;
  PetscInt  *perm;

  ierr = PetscMalloc3(n,PetscInt,&perm,n,PetscReal,&r,n,PetscReal,&c);CHKERRQ(ierr);
  for (i = 0; i < n; ++i) perm[i] = i;
  ierr = PetscSortRealWithPermutation(n,realpart,perm);CHKERRQ(ierr);
  for (i = 0; i < n; ++i) {
    r[i] = realpart[perm[i]];
    c[i] = imagpart[perm[i]];
  }
  for (i = 0; i < n; ++i) {
    realpart[i] = r[i];
    imagpart[i] = c[i];
  }
  /* Output spectrum */
  if (ctx.showSpectrum) {
    ierr = PetscPrintf(PETSC_COMM_SELF, "Spectrum\n");CHKERRQ(ierr);
    for (i = 0; i < n; ++i) {ierr = PetscPrintf(PETSC_COMM_SELF, "%d: Real %g Imag %g\n", i, realpart[i], imagpart[i]);CHKERRQ(ierr);}
  }
  /* Check lowest eigenvalue and eigenvector */
  PetscInt evInd = perm[0];

  if ((realpart[0] > 1.0e-12) || (imagpart[0] > 1.0e-12)) SETERRQ(PetscObjectComm((PetscObject) L), PETSC_ERR_PLIB, "Graph Laplacian must have lowest eigenvalue 0");
  for (i = 0; i < n; ++i) {
    if (fabs(eigvec[evInd*n+i] - eigvec[evInd*n+0]) > 1.0e-10) SETERRQ3(PetscObjectComm((PetscObject) L), PETSC_ERR_PLIB, "Graph Laplacian must have constant lowest eigenvector ev_%d %g != ev_0 %g", i, eigvec[evInd*n+i], eigvec[evInd*n+0]);
  }
  /* Output Fiedler vector */
  evInd = perm[1];
  if (ctx.showFiedler) {
    ierr = PetscPrintf(PETSC_COMM_SELF, "Fiedler vector, Re{ev} %g\n", realpart[1]);CHKERRQ(ierr);
    for (i = 0; i < n; ++i) {ierr = PetscPrintf(PETSC_COMM_SELF, "%d: %g\n", i, eigvec[evInd*n+i]);CHKERRQ(ierr);}
  }
  /* Construct Fiedler partition */
  IS        fIS, fIS2;
  PetscInt *fperm, *fperm2, pos, neg, posSize = 0;

  ierr = PetscMalloc(n * sizeof(PetscInt), &fperm);CHKERRQ(ierr);
  for (i = 0; i < n; ++i) {
    if (eigvec[evInd*n+i] > 0.0) ++posSize;
  }

  ierr = PetscMalloc(n * sizeof(PetscInt), &fperm2);CHKERRQ(ierr);
  for (i = 0; i < n; ++i) fperm[i] = i;
  ierr = PetscSortRealWithPermutation(n, &eigvec[evInd*n], fperm);CHKERRQ(ierr);
  for (i = 0; i < n; ++i) fperm2[n-1-i] = fperm[i];

  for (i = 0, pos = 0, neg = posSize; i < n; ++i) {
    if (eigvec[evInd*n+i] > 0.0) fperm[pos++] = i;
    else                         fperm[neg++] = i;
  }

  ierr = ISCreateGeneral(PetscObjectComm((PetscObject) L), n, fperm, PETSC_OWN_POINTER, &fIS);CHKERRQ(ierr);
  ierr = ISSetPermutation(fIS);CHKERRQ(ierr);
  ierr = ISCreateGeneral(PetscObjectComm((PetscObject) L), n, fperm2, PETSC_OWN_POINTER, &fIS2);CHKERRQ(ierr);
  ierr = ISSetPermutation(fIS2);CHKERRQ(ierr);

  ierr = PetscFree3(perm,r,c);CHKERRQ(ierr);
  ierr = PetscFree4(realpart,imagpart,eigvec,work);CHKERRQ(ierr);
  ierr = MatDenseRestoreArray(LD, &a);CHKERRQ(ierr);
  ierr = MatDestroy(&LD);CHKERRQ(ierr);
  ierr = MatDestroy(&L);CHKERRQ(ierr);
  /* Permute matrix */
  Mat AR, AR2;

  ierr = MatPermute(A, fIS, fIS, &AR);CHKERRQ(ierr);
  ierr = MatView(A,  PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = MatView(AR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = ISDestroy(&fIS);CHKERRQ(ierr);

  ierr = MatPermute(A, fIS2, fIS2, &AR2);CHKERRQ(ierr);
  ierr = MatView(AR2, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = ISDestroy(&fIS2);CHKERRQ(ierr);
  ierr = MatDestroy(&AR);CHKERRQ(ierr);
  AR   = AR2;
  /* Extract blocks and reorder */
  Mat               AP, AN, APR, ANR;
  IS                ispos, isneg, rpermpos, cpermpos, rpermneg, cpermneg;
  PetscInt          bw, bwr;

  ierr = ISCreateStride(PETSC_COMM_SELF, posSize, 0, 1, &ispos);CHKERRQ(ierr);
  ierr = ISCreateStride(PETSC_COMM_SELF, n - posSize, posSize, 1, &isneg);CHKERRQ(ierr);
  ierr = MatGetSubMatrix(AR, ispos, ispos, MAT_INITIAL_MATRIX, &AP);CHKERRQ(ierr);
  ierr = MatGetSubMatrix(AR, isneg, isneg, MAT_INITIAL_MATRIX, &AN);CHKERRQ(ierr);
  ierr = ISDestroy(&ispos);CHKERRQ(ierr);
  ierr = ISDestroy(&isneg);CHKERRQ(ierr);
  ierr = MatGetOrdering(AP, ctx.matOrdtype, &rpermpos, &cpermpos);CHKERRQ(ierr);
  ierr = MatGetOrdering(AN, ctx.matOrdtype, &rpermneg, &cpermneg);CHKERRQ(ierr);
  ierr = MatPermute(AP, rpermpos, cpermpos, &APR);CHKERRQ(ierr);
  ierr = MatComputeBandwidth(AP, 0.0, &bw);CHKERRQ(ierr);
  ierr = MatComputeBandwidth(APR, 0.0, &bwr);CHKERRQ(ierr);
  ierr = PetscPrintf(PETSC_COMM_WORLD, "Reduced positive bandwidth from %d to %d\n", bw, bwr);CHKERRQ(ierr);
  ierr = MatPermute(AN, rpermneg, cpermneg, &ANR);CHKERRQ(ierr);
  ierr = MatComputeBandwidth(AN, 0.0, &bw);CHKERRQ(ierr);
  ierr = MatComputeBandwidth(ANR, 0.0, &bwr);CHKERRQ(ierr);
  ierr = PetscPrintf(PETSC_COMM_WORLD, "Reduced negative bandwidth from %d to %d\n", bw, bwr);CHKERRQ(ierr);
  ierr = MatView(AP,  PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = MatView(APR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = MatView(AN,  PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = MatView(ANR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  /* Reorder original matrix */
  Mat             ARR;
  IS              rperm, cperm;
  PetscInt       *idx;
  const PetscInt *cidx;

  ierr = PetscMalloc(n * sizeof(PetscInt), &idx);CHKERRQ(ierr);
  ierr = ISGetIndices(rpermpos, &cidx);CHKERRQ(ierr);
  for (i = 0; i < posSize; ++i) idx[i] = cidx[i];
  ierr = ISRestoreIndices(rpermpos, &cidx);CHKERRQ(ierr);
  ierr = ISGetIndices(rpermneg, &cidx);CHKERRQ(ierr);
  for (i = posSize; i < n; ++i) idx[i] = cidx[i-posSize] + posSize;
  ierr = ISRestoreIndices(rpermneg, &cidx);CHKERRQ(ierr);
  ierr = ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, &rperm);CHKERRQ(ierr);
  ierr = ISSetPermutation(rperm);CHKERRQ(ierr);
  ierr = PetscMalloc(n * sizeof(PetscInt), &idx);CHKERRQ(ierr);
  ierr = ISGetIndices(cpermpos, &cidx);CHKERRQ(ierr);
  for (i = 0; i < posSize; ++i) idx[i] = cidx[i];
  ierr = ISRestoreIndices(cpermpos, &cidx);CHKERRQ(ierr);
  ierr = ISGetIndices(cpermneg, &cidx);CHKERRQ(ierr);
  for (i = posSize; i < n; ++i) idx[i] = cidx[i-posSize] + posSize;
  ierr = ISRestoreIndices(cpermneg, &cidx);CHKERRQ(ierr);
  ierr = ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, &cperm);CHKERRQ(ierr);
  ierr = ISSetPermutation(cperm);CHKERRQ(ierr);
  ierr = MatPermute(AR, rperm, cperm, &ARR);CHKERRQ(ierr);
  ierr = MatView(ARR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = ISDestroy(&rperm);CHKERRQ(ierr);
  ierr = ISDestroy(&cperm);CHKERRQ(ierr);
  ierr = ISDestroy(&rpermpos);CHKERRQ(ierr);
  ierr = ISDestroy(&cpermpos);CHKERRQ(ierr);
  ierr = ISDestroy(&rpermneg);CHKERRQ(ierr);
  ierr = ISDestroy(&cpermneg);CHKERRQ(ierr);
  ierr = MatDestroy(&AP);CHKERRQ(ierr);
  ierr = MatDestroy(&AN);CHKERRQ(ierr);
  ierr = MatDestroy(&APR);CHKERRQ(ierr);
  ierr = MatDestroy(&ANR);CHKERRQ(ierr);
  /* Compare bands */
  Mat B, BR;

  ierr = MatCreateSubMatrixBanded(A,   50, 0.95, &B);CHKERRQ(ierr);
  ierr = MatCreateSubMatrixBanded(ARR, 50, 0.95, &BR);CHKERRQ(ierr);
  ierr = MatView(B,  PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = MatView(BR, PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  ierr = MatDestroy(&B);CHKERRQ(ierr);
  ierr = MatDestroy(&BR);CHKERRQ(ierr);
  /* Cleanup */
  ierr = MatDestroy(&ARR);CHKERRQ(ierr);
  ierr = MatDestroy(&AR);CHKERRQ(ierr);
  ierr = MatDestroy(&A);CHKERRQ(ierr);
  ierr = PetscFinalize();
  return 0;
}