Beispiel #1
0
/*@C
    PetscMemoryView - Shows the amount of memory currently being used
        in a communicator.

    Collective on PetscViewer

    Input Parameter:
+    viewer - the viewer that defines the communicator
-    message - string printed before values

    Options Database:
+    -malloc - have PETSc track how much memory it has allocated
-    -memory_view - during PetscFinalize() have this routine called

    Level: intermediate

    Concepts: memory usage

.seealso: PetscMallocDump(), PetscMemoryGetCurrentUsage(), PetscMemorySetGetMaximumUsage()
 @*/
PetscErrorCode  PetscMemoryView(PetscViewer viewer,const char message[])
{
  PetscLogDouble allocated,allocatedmax,resident,residentmax,gallocated,gallocatedmax,gresident,gresidentmax,maxgallocated,maxgallocatedmax,maxgresident,maxgresidentmax;
  PetscLogDouble mingallocated,mingallocatedmax,mingresident,mingresidentmax;
  PetscErrorCode ierr;
  MPI_Comm       comm;

  PetscFunctionBegin;
  if (!viewer) viewer = PETSC_VIEWER_STDOUT_WORLD;
  ierr = PetscMallocGetCurrentUsage(&allocated);CHKERRQ(ierr);
  ierr = PetscMallocGetMaximumUsage(&allocatedmax);CHKERRQ(ierr);
  ierr = PetscMemoryGetCurrentUsage(&resident);CHKERRQ(ierr);
  ierr = PetscMemoryGetMaximumUsage(&residentmax);CHKERRQ(ierr);
  if (residentmax > 0) residentmax = PetscMax(resident,residentmax);
  ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
  ierr = PetscViewerASCIIPrintf(viewer,message);CHKERRQ(ierr);
  if (resident && residentmax && allocated) {
    ierr = MPI_Reduce(&residentmax,&gresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&residentmax,&maxgresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&residentmax,&mingresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(viewer,"Maximum (over computational time) process memory:        total %5.4e max %5.4e min %5.4e\n",gresidentmax,maxgresidentmax,mingresidentmax);CHKERRQ(ierr);
    ierr = MPI_Reduce(&resident,&gresident,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&resident,&maxgresident,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&resident,&mingresident,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(viewer,"Current process memory:                                  total %5.4e max %5.4e min %5.4e\n",gresident,maxgresident,mingresident);CHKERRQ(ierr);
    ierr = MPI_Reduce(&allocatedmax,&gallocatedmax,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&allocatedmax,&maxgallocatedmax,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&allocatedmax,&mingallocatedmax,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(viewer,"Maximum (over computational time) space PetscMalloc()ed: total %5.4e max %5.4e min %5.4e\n",gallocatedmax,maxgallocatedmax,mingallocatedmax);CHKERRQ(ierr);
    ierr = MPI_Reduce(&allocated,&gallocated,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&allocated,&maxgallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&allocated,&mingallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(viewer,"Current space PetscMalloc()ed:                           total %5.4e max %5.4e min %5.4e\n",gallocated,maxgallocated,mingallocated);CHKERRQ(ierr);
  } else if (resident && residentmax) {
    ierr = MPI_Reduce(&residentmax,&gresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&residentmax,&maxgresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&residentmax,&mingresidentmax,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(viewer,"Maximum (over computational time) process memory:        total %5.4e max %5.4e min %5.4e\n",gresidentmax,maxgresidentmax,mingresidentmax);CHKERRQ(ierr);
    ierr = MPI_Reduce(&resident,&gresident,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&resident,&maxgresident,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&resident,&mingresident,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(viewer,"Current process memory:                                  total %5.4e max %5.4e min %5.4e\n",gresident,maxgresident,mingresident);CHKERRQ(ierr);
  } else if (resident && allocated) {
    ierr = MPI_Reduce(&resident,&gresident,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&resident,&maxgresident,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&resident,&mingresident,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(viewer,"Current process memory:                                  total %5.4e max %5.4e min %5.4e\n",gresident,maxgresident,mingresident);CHKERRQ(ierr);
    ierr = MPI_Reduce(&allocated,&gallocated,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&allocated,&maxgallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&allocated,&mingallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(viewer,"Current space PetscMalloc()ed:                           total %5.4e max %5.4e min %5.4e\n",gallocated,maxgallocated,mingallocated);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(viewer,"Run with -memory_view to get maximum memory usage\n");CHKERRQ(ierr);
  } else if (allocated) {
    ierr = MPI_Reduce(&allocated,&gallocated,1,MPIU_PETSCLOGDOUBLE,MPI_SUM,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&allocated,&maxgallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MAX,0,comm);CHKERRQ(ierr);
    ierr = MPI_Reduce(&allocated,&mingallocated,1,MPIU_PETSCLOGDOUBLE,MPI_MIN,0,comm);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(viewer,"Current space PetscMalloc()ed:                           total %5.4e max %5.4e min %5.4e\n",gallocated,maxgallocated,mingallocated);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(viewer,"Run with -memory_view to get maximum memory usage\n");CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(viewer,"OS cannot compute process memory\n");CHKERRQ(ierr);
  } else {
    ierr = PetscViewerASCIIPrintf(viewer,"Run with -malloc to get statistics on PetscMalloc() calls\nOS cannot compute process memory\n");CHKERRQ(ierr);
  }
  ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #2
0
static PetscErrorCode TaoSolve_NTR(Tao tao)
{
  TAO_NTR            *tr = (TAO_NTR *)tao->data;
  PC                 pc;
  KSPConvergedReason ksp_reason;
  TaoConvergedReason reason;
  PetscReal          fmin, ftrial, prered, actred, kappa, sigma, beta;
  PetscReal          tau, tau_1, tau_2, tau_max, tau_min, max_radius;
  PetscReal          f, gnorm;

  PetscReal          delta;
  PetscReal          norm_d;
  PetscErrorCode     ierr;
  PetscInt           iter = 0;
  PetscInt           bfgsUpdates = 0;
  PetscInt           needH;

  PetscInt           i_max = 5;
  PetscInt           j_max = 1;
  PetscInt           i, j, N, n, its;

  PetscFunctionBegin;
  if (tao->XL || tao->XU || tao->ops->computebounds) {
    ierr = PetscPrintf(((PetscObject)tao)->comm,"WARNING: Variable bounds have been set but will be ignored by ntr algorithm\n");CHKERRQ(ierr);
  }

  tao->trust = tao->trust0;

  /* Modify the radius if it is too large or small */
  tao->trust = PetscMax(tao->trust, tr->min_radius);
  tao->trust = PetscMin(tao->trust, tr->max_radius);


  if (NTR_PC_BFGS == tr->pc_type && !tr->M) {
    ierr = VecGetLocalSize(tao->solution,&n);CHKERRQ(ierr);
    ierr = VecGetSize(tao->solution,&N);CHKERRQ(ierr);
    ierr = MatCreateLMVM(((PetscObject)tao)->comm,n,N,&tr->M);CHKERRQ(ierr);
    ierr = MatLMVMAllocateVectors(tr->M,tao->solution);CHKERRQ(ierr);
  }

  /* Check convergence criteria */
  ierr = TaoComputeObjectiveAndGradient(tao, tao->solution, &f, tao->gradient);CHKERRQ(ierr);
  ierr = VecNorm(tao->gradient,NORM_2,&gnorm);CHKERRQ(ierr);
  if (PetscIsInfOrNanReal(f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
  needH = 1;

  ierr = TaoMonitor(tao, iter, f, gnorm, 0.0, 1.0, &reason);CHKERRQ(ierr);
  if (reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);

  /* Create vectors for the limited memory preconditioner */
  if ((NTR_PC_BFGS == tr->pc_type) &&
      (BFGS_SCALE_BFGS != tr->bfgs_scale_type)) {
    if (!tr->Diag) {
        ierr = VecDuplicate(tao->solution, &tr->Diag);CHKERRQ(ierr);
    }
  }

  switch(tr->ksp_type) {
  case NTR_KSP_NASH:
    ierr = KSPSetType(tao->ksp, KSPNASH);CHKERRQ(ierr);
    if (tao->ksp->ops->setfromoptions) {
      (*tao->ksp->ops->setfromoptions)(tao->ksp);
    }
    break;

  case NTR_KSP_STCG:
    ierr = KSPSetType(tao->ksp, KSPSTCG);CHKERRQ(ierr);
    if (tao->ksp->ops->setfromoptions) {
      (*tao->ksp->ops->setfromoptions)(tao->ksp);
    }
    break;

  default:
    ierr = KSPSetType(tao->ksp, KSPGLTR);CHKERRQ(ierr);
    if (tao->ksp->ops->setfromoptions) {
      (*tao->ksp->ops->setfromoptions)(tao->ksp);
    }
    break;
  }

  /*  Modify the preconditioner to use the bfgs approximation */
  ierr = KSPGetPC(tao->ksp, &pc);CHKERRQ(ierr);
  switch(tr->pc_type) {
  case NTR_PC_NONE:
    ierr = PCSetType(pc, PCNONE);CHKERRQ(ierr);
    if (pc->ops->setfromoptions) {
      (*pc->ops->setfromoptions)(pc);
    }
    break;

  case NTR_PC_AHESS:
    ierr = PCSetType(pc, PCJACOBI);CHKERRQ(ierr);
    if (pc->ops->setfromoptions) {
      (*pc->ops->setfromoptions)(pc);
    }
    ierr = PCJacobiSetUseAbs(pc);CHKERRQ(ierr);
    break;

  case NTR_PC_BFGS:
    ierr = PCSetType(pc, PCSHELL);CHKERRQ(ierr);
    if (pc->ops->setfromoptions) {
      (*pc->ops->setfromoptions)(pc);
    }
    ierr = PCShellSetName(pc, "bfgs");CHKERRQ(ierr);
    ierr = PCShellSetContext(pc, tr->M);CHKERRQ(ierr);
    ierr = PCShellSetApply(pc, MatLMVMSolveShell);CHKERRQ(ierr);
    break;

  default:
    /*  Use the pc method set by pc_type */
    break;
  }

  /*  Initialize trust-region radius */
  switch(tr->init_type) {
  case NTR_INIT_CONSTANT:
    /*  Use the initial radius specified */
    break;

  case NTR_INIT_INTERPOLATION:
    /*  Use the initial radius specified */
    max_radius = 0.0;

    for (j = 0; j < j_max; ++j) {
      fmin = f;
      sigma = 0.0;

      if (needH) {
        ierr = TaoComputeHessian(tao,tao->solution,tao->hessian,tao->hessian_pre);CHKERRQ(ierr);
        needH = 0;
      }

      for (i = 0; i < i_max; ++i) {

        ierr = VecCopy(tao->solution, tr->W);CHKERRQ(ierr);
        ierr = VecAXPY(tr->W, -tao->trust/gnorm, tao->gradient);CHKERRQ(ierr);
        ierr = TaoComputeObjective(tao, tr->W, &ftrial);CHKERRQ(ierr);

        if (PetscIsInfOrNanReal(ftrial)) {
          tau = tr->gamma1_i;
        }
        else {
          if (ftrial < fmin) {
            fmin = ftrial;
            sigma = -tao->trust / gnorm;
          }

          ierr = MatMult(tao->hessian, tao->gradient, tao->stepdirection);CHKERRQ(ierr);
          ierr = VecDot(tao->gradient, tao->stepdirection, &prered);CHKERRQ(ierr);

          prered = tao->trust * (gnorm - 0.5 * tao->trust * prered / (gnorm * gnorm));
          actred = f - ftrial;
          if ((PetscAbsScalar(actred) <= tr->epsilon) &&
              (PetscAbsScalar(prered) <= tr->epsilon)) {
            kappa = 1.0;
          }
          else {
            kappa = actred / prered;
          }

          tau_1 = tr->theta_i * gnorm * tao->trust / (tr->theta_i * gnorm * tao->trust + (1.0 - tr->theta_i) * prered - actred);
          tau_2 = tr->theta_i * gnorm * tao->trust / (tr->theta_i * gnorm * tao->trust - (1.0 + tr->theta_i) * prered + actred);
          tau_min = PetscMin(tau_1, tau_2);
          tau_max = PetscMax(tau_1, tau_2);

          if (PetscAbsScalar(kappa - 1.0) <= tr->mu1_i) {
            /*  Great agreement */
            max_radius = PetscMax(max_radius, tao->trust);

            if (tau_max < 1.0) {
              tau = tr->gamma3_i;
            }
            else if (tau_max > tr->gamma4_i) {
              tau = tr->gamma4_i;
            }
            else {
              tau = tau_max;
            }
          }
          else if (PetscAbsScalar(kappa - 1.0) <= tr->mu2_i) {
            /*  Good agreement */
            max_radius = PetscMax(max_radius, tao->trust);

            if (tau_max < tr->gamma2_i) {
              tau = tr->gamma2_i;
            }
            else if (tau_max > tr->gamma3_i) {
              tau = tr->gamma3_i;
            }
            else {
              tau = tau_max;
            }
          }
          else {
            /*  Not good agreement */
            if (tau_min > 1.0) {
              tau = tr->gamma2_i;
            }
            else if (tau_max < tr->gamma1_i) {
              tau = tr->gamma1_i;
            }
            else if ((tau_min < tr->gamma1_i) && (tau_max >= 1.0)) {
              tau = tr->gamma1_i;
            }
            else if ((tau_1 >= tr->gamma1_i) && (tau_1 < 1.0) &&
                     ((tau_2 < tr->gamma1_i) || (tau_2 >= 1.0))) {
              tau = tau_1;
            }
            else if ((tau_2 >= tr->gamma1_i) && (tau_2 < 1.0) &&
                     ((tau_1 < tr->gamma1_i) || (tau_2 >= 1.0))) {
              tau = tau_2;
            }
            else {
              tau = tau_max;
            }
          }
        }
        tao->trust = tau * tao->trust;
      }

      if (fmin < f) {
        f = fmin;
        ierr = VecAXPY(tao->solution, sigma, tao->gradient);CHKERRQ(ierr);
        ierr = TaoComputeGradient(tao,tao->solution, tao->gradient);CHKERRQ(ierr);

        ierr = VecNorm(tao->gradient, NORM_2, &gnorm);CHKERRQ(ierr);

        if (PetscIsInfOrNanReal(f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
        needH = 1;

        ierr = TaoMonitor(tao, iter, f, gnorm, 0.0, 1.0, &reason);CHKERRQ(ierr);
        if (reason != TAO_CONTINUE_ITERATING) {
          PetscFunctionReturn(0);
        }
      }
    }
    tao->trust = PetscMax(tao->trust, max_radius);

    /*  Modify the radius if it is too large or small */
    tao->trust = PetscMax(tao->trust, tr->min_radius);
    tao->trust = PetscMin(tao->trust, tr->max_radius);
    break;

  default:
    /*  Norm of the first direction will initialize radius */
    tao->trust = 0.0;
    break;
  }

  /* Set initial scaling for the BFGS preconditioner
     This step is done after computing the initial trust-region radius
     since the function value may have decreased */
  if (NTR_PC_BFGS == tr->pc_type) {
    if (f != 0.0) {
      delta = 2.0 * PetscAbsScalar(f) / (gnorm*gnorm);
    }
    else {
      delta = 2.0 / (gnorm*gnorm);
    }
    ierr = MatLMVMSetDelta(tr->M,delta);CHKERRQ(ierr);
  }

  /* Have not converged; continue with Newton method */
  while (reason == TAO_CONTINUE_ITERATING) {
    ++iter;

    /* Compute the Hessian */
    if (needH) {
      ierr = TaoComputeHessian(tao,tao->solution,tao->hessian,tao->hessian_pre);CHKERRQ(ierr);
      needH = 0;
    }

    if (NTR_PC_BFGS == tr->pc_type) {
      if (BFGS_SCALE_AHESS == tr->bfgs_scale_type) {
        /* Obtain diagonal for the bfgs preconditioner */
        ierr = MatGetDiagonal(tao->hessian, tr->Diag);CHKERRQ(ierr);
        ierr = VecAbs(tr->Diag);CHKERRQ(ierr);
        ierr = VecReciprocal(tr->Diag);CHKERRQ(ierr);
        ierr = MatLMVMSetScale(tr->M,tr->Diag);CHKERRQ(ierr);
      }

      /* Update the limited memory preconditioner */
      ierr = MatLMVMUpdate(tr->M, tao->solution, tao->gradient);CHKERRQ(ierr);
      ++bfgsUpdates;
    }

    while (reason == TAO_CONTINUE_ITERATING) {
      ierr = KSPSetOperators(tao->ksp, tao->hessian, tao->hessian_pre);CHKERRQ(ierr);

      /* Solve the trust region subproblem */
      if (NTR_KSP_NASH == tr->ksp_type) {
        ierr = KSPNASHSetRadius(tao->ksp,tao->trust);CHKERRQ(ierr);
        ierr = KSPSolve(tao->ksp, tao->gradient, tao->stepdirection);CHKERRQ(ierr);
        ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr);
        tao->ksp_its+=its;
        ierr = KSPNASHGetNormD(tao->ksp, &norm_d);CHKERRQ(ierr);
      } else if (NTR_KSP_STCG == tr->ksp_type) {
        ierr = KSPSTCGSetRadius(tao->ksp,tao->trust);CHKERRQ(ierr);
        ierr = KSPSolve(tao->ksp, tao->gradient, tao->stepdirection);CHKERRQ(ierr);
        ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr);
        tao->ksp_its+=its;
        ierr = KSPSTCGGetNormD(tao->ksp, &norm_d);CHKERRQ(ierr);
      } else { /* NTR_KSP_GLTR */
        ierr = KSPGLTRSetRadius(tao->ksp,tao->trust);CHKERRQ(ierr);
        ierr = KSPSolve(tao->ksp, tao->gradient, tao->stepdirection);CHKERRQ(ierr);
        ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr);
        tao->ksp_its+=its;
        ierr = KSPGLTRGetNormD(tao->ksp, &norm_d);CHKERRQ(ierr);
      }

      if (0.0 == tao->trust) {
        /* Radius was uninitialized; use the norm of the direction */
        if (norm_d > 0.0) {
          tao->trust = norm_d;

          /* Modify the radius if it is too large or small */
          tao->trust = PetscMax(tao->trust, tr->min_radius);
          tao->trust = PetscMin(tao->trust, tr->max_radius);
        }
        else {
          /* The direction was bad; set radius to default value and re-solve
             the trust-region subproblem to get a direction */
          tao->trust = tao->trust0;

          /* Modify the radius if it is too large or small */
          tao->trust = PetscMax(tao->trust, tr->min_radius);
          tao->trust = PetscMin(tao->trust, tr->max_radius);

          if (NTR_KSP_NASH == tr->ksp_type) {
            ierr = KSPNASHSetRadius(tao->ksp,tao->trust);CHKERRQ(ierr);
            ierr = KSPSolve(tao->ksp, tao->gradient, tao->stepdirection);CHKERRQ(ierr);
            ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr);
            tao->ksp_its+=its;
            ierr = KSPNASHGetNormD(tao->ksp, &norm_d);CHKERRQ(ierr);
          } else if (NTR_KSP_STCG == tr->ksp_type) {
            ierr = KSPSTCGSetRadius(tao->ksp,tao->trust);CHKERRQ(ierr);
            ierr = KSPSolve(tao->ksp, tao->gradient, tao->stepdirection);CHKERRQ(ierr);
            ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr);
            tao->ksp_its+=its;
            ierr = KSPSTCGGetNormD(tao->ksp, &norm_d);CHKERRQ(ierr);
          } else { /* NTR_KSP_GLTR */
            ierr = KSPGLTRSetRadius(tao->ksp,tao->trust);CHKERRQ(ierr);
            ierr = KSPSolve(tao->ksp, tao->gradient, tao->stepdirection);CHKERRQ(ierr);
            ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr);
            tao->ksp_its+=its;
            ierr = KSPGLTRGetNormD(tao->ksp, &norm_d);CHKERRQ(ierr);
          }

          if (norm_d == 0.0) SETERRQ(PETSC_COMM_SELF,1, "Initial direction zero");
        }
      }
      ierr = VecScale(tao->stepdirection, -1.0);CHKERRQ(ierr);
      ierr = KSPGetConvergedReason(tao->ksp, &ksp_reason);CHKERRQ(ierr);
      if ((KSP_DIVERGED_INDEFINITE_PC == ksp_reason) &&
          (NTR_PC_BFGS == tr->pc_type) && (bfgsUpdates > 1)) {
        /* Preconditioner is numerically indefinite; reset the
           approximate if using BFGS preconditioning. */

        if (f != 0.0) {
          delta = 2.0 * PetscAbsScalar(f) / (gnorm*gnorm);
        }
        else {
          delta = 2.0 / (gnorm*gnorm);
        }
        ierr = MatLMVMSetDelta(tr->M, delta);CHKERRQ(ierr);
        ierr = MatLMVMReset(tr->M);CHKERRQ(ierr);
        ierr = MatLMVMUpdate(tr->M, tao->solution, tao->gradient);CHKERRQ(ierr);
        bfgsUpdates = 1;
      }

      if (NTR_UPDATE_REDUCTION == tr->update_type) {
        /* Get predicted reduction */
        if (NTR_KSP_NASH == tr->ksp_type) {
          ierr = KSPNASHGetObjFcn(tao->ksp,&prered);CHKERRQ(ierr);
        } else if (NTR_KSP_STCG == tr->ksp_type) {
          ierr = KSPSTCGGetObjFcn(tao->ksp,&prered);CHKERRQ(ierr);
        } else { /* gltr */
          ierr = KSPGLTRGetObjFcn(tao->ksp,&prered);CHKERRQ(ierr);
        }

        if (prered >= 0.0) {
          /* The predicted reduction has the wrong sign.  This cannot
             happen in infinite precision arithmetic.  Step should
             be rejected! */
          tao->trust = tr->alpha1 * PetscMin(tao->trust, norm_d);
        }
        else {
          /* Compute trial step and function value */
          ierr = VecCopy(tao->solution,tr->W);CHKERRQ(ierr);
          ierr = VecAXPY(tr->W, 1.0, tao->stepdirection);CHKERRQ(ierr);
          ierr = TaoComputeObjective(tao, tr->W, &ftrial);CHKERRQ(ierr);

          if (PetscIsInfOrNanReal(ftrial)) {
            tao->trust = tr->alpha1 * PetscMin(tao->trust, norm_d);
          } else {
            /* Compute and actual reduction */
            actred = f - ftrial;
            prered = -prered;
            if ((PetscAbsScalar(actred) <= tr->epsilon) &&
                (PetscAbsScalar(prered) <= tr->epsilon)) {
              kappa = 1.0;
            }
            else {
              kappa = actred / prered;
            }

            /* Accept or reject the step and update radius */
            if (kappa < tr->eta1) {
              /* Reject the step */
              tao->trust = tr->alpha1 * PetscMin(tao->trust, norm_d);
            }
            else {
              /* Accept the step */
              if (kappa < tr->eta2) {
                /* Marginal bad step */
                tao->trust = tr->alpha2 * PetscMin(tao->trust, norm_d);
              }
              else if (kappa < tr->eta3) {
                /* Reasonable step */
                tao->trust = tr->alpha3 * tao->trust;
              }
              else if (kappa < tr->eta4) {
                /* Good step */
                tao->trust = PetscMax(tr->alpha4 * norm_d, tao->trust);
              }
              else {
                /* Very good step */
                tao->trust = PetscMax(tr->alpha5 * norm_d, tao->trust);
              }
              break;
            }
          }
        }
      }
      else {
        /* Get predicted reduction */
        if (NTR_KSP_NASH == tr->ksp_type) {
          ierr = KSPNASHGetObjFcn(tao->ksp,&prered);CHKERRQ(ierr);
        } else if (NTR_KSP_STCG == tr->ksp_type) {
          ierr = KSPSTCGGetObjFcn(tao->ksp,&prered);CHKERRQ(ierr);
        } else { /* gltr */
          ierr = KSPGLTRGetObjFcn(tao->ksp,&prered);CHKERRQ(ierr);
        }

        if (prered >= 0.0) {
          /* The predicted reduction has the wrong sign.  This cannot
             happen in infinite precision arithmetic.  Step should
             be rejected! */
          tao->trust = tr->gamma1 * PetscMin(tao->trust, norm_d);
        }
        else {
          ierr = VecCopy(tao->solution, tr->W);CHKERRQ(ierr);
          ierr = VecAXPY(tr->W, 1.0, tao->stepdirection);CHKERRQ(ierr);
          ierr = TaoComputeObjective(tao, tr->W, &ftrial);CHKERRQ(ierr);
          if (PetscIsInfOrNanReal(ftrial)) {
            tao->trust = tr->gamma1 * PetscMin(tao->trust, norm_d);
          }
          else {
            ierr = VecDot(tao->gradient, tao->stepdirection, &beta);CHKERRQ(ierr);
            actred = f - ftrial;
            prered = -prered;
            if ((PetscAbsScalar(actred) <= tr->epsilon) &&
                (PetscAbsScalar(prered) <= tr->epsilon)) {
              kappa = 1.0;
            }
            else {
              kappa = actred / prered;
            }

            tau_1 = tr->theta * beta / (tr->theta * beta - (1.0 - tr->theta) * prered + actred);
            tau_2 = tr->theta * beta / (tr->theta * beta + (1.0 + tr->theta) * prered - actred);
            tau_min = PetscMin(tau_1, tau_2);
            tau_max = PetscMax(tau_1, tau_2);

            if (kappa >= 1.0 - tr->mu1) {
              /* Great agreement; accept step and update radius */
              if (tau_max < 1.0) {
                tao->trust = PetscMax(tao->trust, tr->gamma3 * norm_d);
              }
              else if (tau_max > tr->gamma4) {
                tao->trust = PetscMax(tao->trust, tr->gamma4 * norm_d);
              }
              else {
                tao->trust = PetscMax(tao->trust, tau_max * norm_d);
              }
              break;
            }
            else if (kappa >= 1.0 - tr->mu2) {
              /* Good agreement */

              if (tau_max < tr->gamma2) {
                tao->trust = tr->gamma2 * PetscMin(tao->trust, norm_d);
              }
              else if (tau_max > tr->gamma3) {
                tao->trust = PetscMax(tao->trust, tr->gamma3 * norm_d);
              }
              else if (tau_max < 1.0) {
                tao->trust = tau_max * PetscMin(tao->trust, norm_d);
              }
              else {
                tao->trust = PetscMax(tao->trust, tau_max * norm_d);
              }
              break;
            }
            else {
              /* Not good agreement */
              if (tau_min > 1.0) {
                tao->trust = tr->gamma2 * PetscMin(tao->trust, norm_d);
              }
              else if (tau_max < tr->gamma1) {
                tao->trust = tr->gamma1 * PetscMin(tao->trust, norm_d);
              }
              else if ((tau_min < tr->gamma1) && (tau_max >= 1.0)) {
                tao->trust = tr->gamma1 * PetscMin(tao->trust, norm_d);
              }
              else if ((tau_1 >= tr->gamma1) && (tau_1 < 1.0) &&
                       ((tau_2 < tr->gamma1) || (tau_2 >= 1.0))) {
                tao->trust = tau_1 * PetscMin(tao->trust, norm_d);
              }
              else if ((tau_2 >= tr->gamma1) && (tau_2 < 1.0) &&
                       ((tau_1 < tr->gamma1) || (tau_2 >= 1.0))) {
                tao->trust = tau_2 * PetscMin(tao->trust, norm_d);
              }
              else {
                tao->trust = tau_max * PetscMin(tao->trust, norm_d);
              }
            }
          }
        }
      }

      /* The step computed was not good and the radius was decreased.
         Monitor the radius to terminate. */
      ierr = TaoMonitor(tao, iter, f, gnorm, 0.0, tao->trust, &reason);CHKERRQ(ierr);
    }

    /* The radius may have been increased; modify if it is too large */
    tao->trust = PetscMin(tao->trust, tr->max_radius);

    if (reason == TAO_CONTINUE_ITERATING) {
      ierr = VecCopy(tr->W, tao->solution);CHKERRQ(ierr);
      f = ftrial;
      ierr = TaoComputeGradient(tao, tao->solution, tao->gradient);
      ierr = VecNorm(tao->gradient, NORM_2, &gnorm);CHKERRQ(ierr);
      if (PetscIsInfOrNanReal(f) || PetscIsInfOrNanReal(gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
      needH = 1;
      ierr = TaoMonitor(tao, iter, f, gnorm, 0.0, tao->trust, &reason);CHKERRQ(ierr);
    }
  }
  PetscFunctionReturn(0);
}
static PetscErrorCode KSPPGMRESCycle(PetscInt *itcount,KSP ksp)
{
  KSP_PGMRES     *pgmres = (KSP_PGMRES*)(ksp->data);
  PetscReal      res_norm,res,newnorm;
  PetscErrorCode ierr;
  PetscInt       it     = 0,j,k;
  PetscBool      hapend = PETSC_FALSE;

  PetscFunctionBegin;
  if (itcount) *itcount = 0;
  ierr   = VecNormalize(VEC_VV(0),&res_norm);CHKERRQ(ierr);
  res    = res_norm;
  *RS(0) = res_norm;

  /* check for the convergence */
  ierr       = PetscObjectAMSTakeAccess((PetscObject)ksp);CHKERRQ(ierr);
  ksp->rnorm = res;
  ierr       = PetscObjectAMSGrantAccess((PetscObject)ksp);CHKERRQ(ierr);
  pgmres->it = it-2;
  ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
  ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
  if (!res) {
    ksp->reason = KSP_CONVERGED_ATOL;
    ierr        = PetscInfo(ksp,"Converged due to zero residual norm on entry\n");CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
  for (; !ksp->reason; it++) {
    Vec Zcur,Znext;
    if (pgmres->vv_allocated <= it + VEC_OFFSET + 1) {
      ierr = KSPGMRESGetNewVectors(ksp,it+1);CHKERRQ(ierr);
    }
    /* VEC_VV(it-1) is orthogonal, it will be normalized once the VecNorm arrives. */
    Zcur  = VEC_VV(it);         /* Zcur is not yet orthogonal, but the VecMDot to orthogonalize it has been started. */
    Znext = VEC_VV(it+1);       /* This iteration will compute Znext, update with a deferred correction once we know how
                                 * Zcur relates to the previous vectors, and start the reduction to orthogonalize it. */

    if (it < pgmres->max_k+1 && ksp->its+1 < PetscMax(2,ksp->max_it)) { /* We don't know whether what we have computed is enough, so apply the matrix. */
      ierr = KSP_PCApplyBAorAB(ksp,Zcur,Znext,VEC_TEMP_MATOP);CHKERRQ(ierr);
    }

    if (it > 1) {               /* Complete the pending reduction */
      ierr           = VecNormEnd(VEC_VV(it-1),NORM_2,&newnorm);CHKERRQ(ierr);
      *HH(it-1,it-2) = newnorm;
    }
    if (it > 0) {               /* Finish the reduction computing the latest column of H */
      ierr = VecMDotEnd(Zcur,it,&(VEC_VV(0)),HH(0,it-1));CHKERRQ(ierr);
    }

    if (it > 1) {
      /* normalize the base vector from two iterations ago, basis is complete up to here */
      ierr = VecScale(VEC_VV(it-1),1./ *HH(it-1,it-2));CHKERRQ(ierr);

      ierr       = KSPPGMRESUpdateHessenberg(ksp,it-2,&hapend,&res);CHKERRQ(ierr);
      pgmres->it = it-2;
      ksp->its++;
      ksp->rnorm = res;

      ierr = (*ksp->converged)(ksp,ksp->its,res,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
      if (it < pgmres->max_k+1 || ksp->reason || ksp->its == ksp->max_it) {  /* Monitor if we are done or still iterating, but not before a restart. */
        ierr = KSPLogResidualHistory(ksp,res);CHKERRQ(ierr);
        ierr = KSPMonitor(ksp,ksp->its,res);CHKERRQ(ierr);
      }
      if (ksp->reason) break;
      /* Catch error in happy breakdown and signal convergence and break from loop */
      if (hapend) {
        if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %G",res);
        else {
          ksp->reason = KSP_DIVERGED_BREAKDOWN;
          break;
        }
      }

      if (!(it < pgmres->max_k+1 && ksp->its < ksp->max_it)) break;

      /* The it-2 column of H was not scaled when we computed Zcur, apply correction */
      ierr = VecScale(Zcur,1./ *HH(it-1,it-2));CHKERRQ(ierr);
      /* And Znext computed in this iteration was computed using the under-scaled Zcur */
      ierr = VecScale(Znext,1./ *HH(it-1,it-2));CHKERRQ(ierr);

      /* In the previous iteration, we projected an unnormalized Zcur against the Krylov basis, so we need to fix the column of H resulting from that projection. */
      for (k=0; k<it; k++) *HH(k,it-1) /= *HH(it-1,it-2);
      /* When Zcur was projected against the Krylov basis, VV(it-1) was still not normalized, so fix that too. This
       * column is complete except for HH(it,it-1) which we won't know until the next iteration. */
      *HH(it-1,it-1) /= *HH(it-1,it-2);
    }

    if (it > 0) {
      PetscScalar *work;
      if (!pgmres->orthogwork) {ierr = PetscMalloc((pgmres->max_k + 2)*sizeof(PetscScalar),&pgmres->orthogwork);CHKERRQ(ierr);}
      work = pgmres->orthogwork;
      /* Apply correction computed by the VecMDot in the last iteration to Znext. The original form is
       *
       *   Znext -= sum_{j=0}^{i-1} Z[j+1] * H[j,i-1]
       *
       * where
       *
       *   Z[j] = sum_{k=0}^j V[k] * H[k,j-1]
       *
       * substituting
       *
       *   Znext -= sum_{j=0}^{i-1} sum_{k=0}^{j+1} V[k] * H[k,j] * H[j,i-1]
       *
       * rearranging the iteration space from row-column to column-row
       *
       *   Znext -= sum_{k=0}^i sum_{j=k-1}^{i-1} V[k] * H[k,j] * H[j,i-1]
       *
       * Note that column it-1 of HH is correct. For all previous columns, we must look at HES because HH has already
       * been transformed to upper triangular form.
       */
      for (k=0; k<it+1; k++) {
        work[k] = 0;
        for (j=PetscMax(0,k-1); j<it-1; j++) work[k] -= *HES(k,j) * *HH(j,it-1);
      }
      ierr = VecMAXPY(Znext,it+1,work,&VEC_VV(0));CHKERRQ(ierr);
      ierr = VecAXPY(Znext,-*HH(it-1,it-1),Zcur);CHKERRQ(ierr);

      /* Orthogonalize Zcur against existing basis vectors. */
      for (k=0; k<it; k++) work[k] = -*HH(k,it-1);
      ierr = VecMAXPY(Zcur,it,work,&VEC_VV(0));CHKERRQ(ierr);
      /* Zcur is now orthogonal, and will be referred to as VEC_VV(it) again, though it is still not normalized. */
      /* Begin computing the norm of the new vector, will be normalized after the MatMult in the next iteration. */
      ierr = VecNormBegin(VEC_VV(it),NORM_2,&newnorm);CHKERRQ(ierr);
    }

    /* Compute column of H (to the diagonal, but not the subdiagonal) to be able to orthogonalize the newest vector. */
    ierr = VecMDotBegin(Znext,it+1,&VEC_VV(0),HH(0,it));CHKERRQ(ierr);

    /* Start an asynchronous split-mode reduction, the result of the MDot and Norm will be collected on the next iteration. */
    ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Znext));CHKERRQ(ierr);
  }

  if (itcount) *itcount = it-1; /* Number of iterations actually completed. */

  /*
    Down here we have to solve for the "best" coefficients of the Krylov
    columns, add the solution values together, and possibly unwind the
    preconditioning from the solution
   */
  /* Form the solution (or the solution so far) */
  ierr = KSPPGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,it-2);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #4
0
/*
   IFunction - Evaluates nonlinear function, F(U).

   Input Parameters:
.  ts - the TS context
.  U - input vector
.  ptr - optional user-defined context, as set by SNESSetFunction()

   Output Parameter:
.  F - function vector
 */
PetscErrorCode IFunction(TS ts,PetscReal ftime,Vec U,Vec Udot,Vec F,void *ptr)
{
  AppCtx         *appctx = (AppCtx*)ptr;
  DM             da;
  PetscErrorCode ierr;
  PetscInt       i,Mx,xs,xm;
  PetscReal      hx,sx;
  PetscScalar    rho,c,rhoxx,cxx,cx,rhox,kcxrhox;
  Field          *u,*f,*udot;
  Vec            localU;

  PetscFunctionBegin;
  ierr = TSGetDM(ts,&da);CHKERRQ(ierr);
  ierr = DMGetLocalVector(da,&localU);CHKERRQ(ierr);
  ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);CHKERRQ(ierr);

  hx = 1.0/(PetscReal)(Mx-1); sx = 1.0/(hx*hx);

  /*
     Scatter ghost points to local vector,using the 2-step process
        DMGlobalToLocalBegin(),DMGlobalToLocalEnd().
     By placing code between these two statements, computations can be
     done while messages are in transition.
  */
  ierr = DMGlobalToLocalBegin(da,U,INSERT_VALUES,localU);CHKERRQ(ierr);
  ierr = DMGlobalToLocalEnd(da,U,INSERT_VALUES,localU);CHKERRQ(ierr);

  /*
     Get pointers to vector data
  */
  ierr = DMDAVecGetArrayRead(da,localU,&u);CHKERRQ(ierr);
  ierr = DMDAVecGetArrayRead(da,Udot,&udot);CHKERRQ(ierr);
  ierr = DMDAVecGetArray(da,F,&f);CHKERRQ(ierr);

  /*
     Get local grid boundaries
  */
  ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr);

  if (!xs) {
    f[0].rho = udot[0].rho; /* u[0].rho - 0.0; */
    f[0].c   = udot[0].c; /* u[0].c   - 1.0; */
    xs++;
    xm--;
  }
  if (xs+xm == Mx) {
    f[Mx-1].rho = udot[Mx-1].rho; /* u[Mx-1].rho - 1.0; */
    f[Mx-1].c   = udot[Mx-1].c;  /* u[Mx-1].c   - 0.0;  */
    xm--;
  }

  /*
     Compute function over the locally owned part of the grid
  */
  for (i=xs; i<xs+xm; i++) {
    rho   = u[i].rho;
    rhoxx = (-2.0*rho + u[i-1].rho + u[i+1].rho)*sx;
    c     = u[i].c;
    cxx   = (-2.0*c + u[i-1].c + u[i+1].c)*sx;

    if (!appctx->upwind) {
      rhox    = .5*(u[i+1].rho - u[i-1].rho)/hx;
      cx      = .5*(u[i+1].c - u[i-1].c)/hx;
      kcxrhox = appctx->kappa*(cxx*rho + cx*rhox);
    } else {
      kcxrhox = appctx->kappa*((u[i+1].c - u[i].c)*u[i+1].rho - (u[i].c - u[i-1].c)*u[i].rho)*sx;
    }

    f[i].rho = udot[i].rho - appctx->epsilon*rhoxx + kcxrhox  - appctx->mu*PetscAbsScalar(rho)*(1.0 - rho)*PetscMax(0,PetscRealPart(c - appctx->cstar)) + appctx->beta*rho;
    f[i].c   = udot[i].c - appctx->delta*cxx + appctx->lambda*c + appctx->alpha*rho*c/(appctx->gamma + c);
  }

  /*
     Restore vectors
  */
  ierr = DMDAVecRestoreArrayRead(da,localU,&u);CHKERRQ(ierr);
  ierr = DMDAVecRestoreArrayRead(da,Udot,&udot);CHKERRQ(ierr);
  ierr = DMDAVecRestoreArray(da,F,&f);CHKERRQ(ierr);
  ierr = DMRestoreLocalVector(da,&localU);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #5
0
int main(int argc,char **args)
{
  MatType         mtype = MATMPIAIJ; /* matrix format */
  Mat             A,B;               /* matrix */
  PetscViewer     fd;                /* viewer */
  char            file[PETSC_MAX_PATH_LEN];         /* input file name */
  PetscBool       flg,viewMats,viewIS,viewVecs;
  PetscInt        ierr,*nlocal,m,n;
  PetscMPIInt     rank,size;
  MatPartitioning part;
  IS              is,isn;
  Vec             xin, xout;
  VecScatter      scat;

  PetscInitialize(&argc,&args,(char*)0,help);
  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
  ierr = PetscOptionsHasName(NULL, "-view_mats", &viewMats);CHKERRQ(ierr);
  ierr = PetscOptionsHasName(NULL, "-view_is", &viewIS);CHKERRQ(ierr);
  ierr = PetscOptionsHasName(NULL, "-view_vecs", &viewVecs);CHKERRQ(ierr);

  /*
     Determine file from which we read the matrix
  */
  ierr = PetscOptionsGetString(NULL,"-f",file,PETSC_MAX_PATH_LEN,&flg);CHKERRQ(ierr);

  /*
       Open binary file.  Note that we use FILE_MODE_READ to indicate
       reading from this file.
  */
  ierr = PetscViewerBinaryOpen(PETSC_COMM_WORLD,file,FILE_MODE_READ,&fd);CHKERRQ(ierr);

  /*
      Load the matrix and vector; then destroy the viewer.
  */
  ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr);
  ierr = MatSetType(A,mtype);CHKERRQ(ierr);
  ierr = MatLoad(A,fd);CHKERRQ(ierr);
  ierr = VecCreate(PETSC_COMM_WORLD,&xin);CHKERRQ(ierr);
  ierr = VecLoad(xin,fd);CHKERRQ(ierr);
  ierr = PetscViewerDestroy(&fd);CHKERRQ(ierr);
  if (viewMats) {
    if (!rank) printf("Original matrix:\n");
    ierr = MatView(A,PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  }
  if (viewVecs) {
    if (!rank) printf("Original vector:\n");
    ierr = VecView(xin,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  }

  /* Partition the graph of the matrix */
  ierr = MatPartitioningCreate(PETSC_COMM_WORLD,&part);CHKERRQ(ierr);
  ierr = MatPartitioningSetAdjacency(part,A);CHKERRQ(ierr);
  ierr = MatPartitioningSetFromOptions(part);CHKERRQ(ierr);

  /* get new processor owner number of each vertex */
  ierr = MatPartitioningApply(part,&is);CHKERRQ(ierr);
  if (viewIS) {
    if (!rank) printf("IS1 - new processor ownership:\n");
    ierr = ISView(is,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  }

  /* get new global number of each old global number */
  ierr = ISPartitioningToNumbering(is,&isn);CHKERRQ(ierr);
  if (viewIS) {
    if (!rank) printf("IS2 - new global numbering:\n");
    ierr = ISView(isn,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  }

  /* get number of new vertices for each processor */
  ierr = PetscMalloc(size*sizeof(PetscInt),&nlocal);CHKERRQ(ierr);
  ierr = ISPartitioningCount(is,size,nlocal);CHKERRQ(ierr);
  ierr = ISDestroy(&is);CHKERRQ(ierr);

  /* get old global number of each new global number */
  ierr = ISInvertPermutation(isn,nlocal[rank],&is);CHKERRQ(ierr);
  ierr = PetscFree(nlocal);CHKERRQ(ierr);
  ierr = ISDestroy(&isn);CHKERRQ(ierr);
  ierr = MatPartitioningDestroy(&part);CHKERRQ(ierr);
  if (viewIS) {
    if (!rank) printf("IS3=inv(IS2) - old global number of each new global number:\n");
    ierr = ISView(is,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  }

  /* move the matrix rows to the new processes they have been assigned to by the permutation */
  ierr = ISSort(is);CHKERRQ(ierr);
  ierr = MatGetSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
  ierr = MatDestroy(&A);CHKERRQ(ierr);

  /* move the vector rows to the new processes they have been assigned to */
  ierr = MatGetLocalSize(B,&m,&n);CHKERRQ(ierr);
  ierr = VecCreateMPI(PETSC_COMM_WORLD,m,PETSC_DECIDE,&xout);CHKERRQ(ierr);
  ierr = VecScatterCreate(xin,is,xout,NULL,&scat);CHKERRQ(ierr);
  ierr = VecScatterBegin(scat,xin,xout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
  ierr = VecScatterEnd(scat,xin,xout,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
  ierr = VecScatterDestroy(&scat);CHKERRQ(ierr);
  ierr = ISDestroy(&is);CHKERRQ(ierr);
  if (viewMats) {
    if (!rank) printf("Partitioned matrix:\n");
    ierr = MatView(B,PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
  }
  if (viewVecs) {
    if (!rank) printf("Mapped vector:\n");
    ierr = VecView(xout,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
  }

  {
    PetscInt          rstart,i,*nzd,*nzo,nzl,nzmax = 0,*ncols,nrow,j;
    Mat               J;
    const PetscInt    *cols;
    const PetscScalar *vals;
    PetscScalar       *nvals;

    ierr = MatGetOwnershipRange(B,&rstart,NULL);CHKERRQ(ierr);
    ierr = PetscMalloc(2*m*sizeof(PetscInt),&nzd);CHKERRQ(ierr);
    ierr = PetscMemzero(nzd,2*m*sizeof(PetscInt));CHKERRQ(ierr);
    ierr = PetscMalloc(2*m*sizeof(PetscInt),&nzo);CHKERRQ(ierr);
    ierr = PetscMemzero(nzo,2*m*sizeof(PetscInt));CHKERRQ(ierr);
    for (i=0; i<m; i++) {
      ierr = MatGetRow(B,i+rstart,&nzl,&cols,NULL);CHKERRQ(ierr);
      for (j=0; j<nzl; j++) {
        if (cols[j] >= rstart && cols[j] < rstart+n) {
          nzd[2*i] += 2;
          nzd[2*i+1] += 2;
        } else {
          nzo[2*i] += 2;
          nzo[2*i+1] += 2;
        }
      }
      nzmax = PetscMax(nzmax,nzd[2*i]+nzo[2*i]);
      ierr  = MatRestoreRow(B,i+rstart,&nzl,&cols,NULL);CHKERRQ(ierr);
    }
    ierr = MatCreateAIJ(PETSC_COMM_WORLD,2*m,2*m,PETSC_DECIDE,PETSC_DECIDE,0,nzd,0,nzo,&J);CHKERRQ(ierr);
    ierr = PetscInfo(0,"Created empty Jacobian matrix\n");CHKERRQ(ierr);
    ierr = PetscFree(nzd);CHKERRQ(ierr);
    ierr = PetscFree(nzo);CHKERRQ(ierr);
    ierr = PetscMalloc2(nzmax,PetscInt,&ncols,nzmax,PetscScalar,&nvals);CHKERRQ(ierr);
    ierr = PetscMemzero(nvals,nzmax*sizeof(PetscScalar));CHKERRQ(ierr);
    for (i=0; i<m; i++) {
      ierr = MatGetRow(B,i+rstart,&nzl,&cols,&vals);CHKERRQ(ierr);
      for (j=0; j<nzl; j++) {
        ncols[2*j]   = 2*cols[j];
        ncols[2*j+1] = 2*cols[j]+1;
      }
      nrow = 2*(i+rstart);
      ierr = MatSetValues(J,1,&nrow,2*nzl,ncols,nvals,INSERT_VALUES);CHKERRQ(ierr);
      nrow = 2*(i+rstart) + 1;
      ierr = MatSetValues(J,1,&nrow,2*nzl,ncols,nvals,INSERT_VALUES);CHKERRQ(ierr);
      ierr = MatRestoreRow(B,i+rstart,&nzl,&cols,&vals);CHKERRQ(ierr);
    }
    ierr = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    ierr = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
    if (viewMats) {
      if (!rank) printf("Jacobian matrix structure:\n");
      ierr = MatView(J,PETSC_VIEWER_DRAW_WORLD);CHKERRQ(ierr);
    }
    ierr = MatDestroy(&J);CHKERRQ(ierr);
    ierr = PetscFree2(ncols,nvals);CHKERRQ(ierr);
  }

  /*
       Free work space.  All PETSc objects should be destroyed when they
       are no longer needed.
  */
  ierr = MatDestroy(&B);CHKERRQ(ierr);
  ierr = VecDestroy(&xin);CHKERRQ(ierr);
  ierr = VecDestroy(&xout);CHKERRQ(ierr);
  ierr = PetscFinalize();
  return 0;
}
Beispiel #6
0
/*  data1, odata1 and odata2 are packed in the format (for communication):
       data[0]          = is_max, no of is
       data[1]          = size of is[0]
        ...
       data[is_max]     = size of is[is_max-1]
       data[is_max + 1] = data(is[0])
        ...
       data[is_max+1+sum(size of is[k]), k=0,...,i-1] = data(is[i])
        ...
   data2 is packed in the format (for creating output is[]):
       data[0]          = is_max, no of is
       data[1]          = size of is[0]
        ...
       data[is_max]     = size of is[is_max-1]
       data[is_max + 1] = data(is[0])
        ...
       data[is_max + 1 + Mbs*i) = data(is[i])
        ...
*/
static PetscErrorCode MatIncreaseOverlap_MPISBAIJ_Once(Mat C,PetscInt is_max,IS is[])
{
    Mat_MPISBAIJ   *c = (Mat_MPISBAIJ*)C->data;
    PetscErrorCode ierr;
    PetscMPIInt    size,rank,tag1,tag2,*len_s,nrqr,nrqs,*id_r1,*len_r1,flag,len,*iwork;
    const PetscInt *idx_i;
    PetscInt       idx,isz,col,*n,*data1,**data1_start,*data2,*data2_i,*data,*data_i;
    PetscInt       Mbs,i,j,k,*odata1,*odata2;
    PetscInt       proc_id,**odata2_ptr,*ctable=0,*btable,len_max,len_est;
    PetscInt       proc_end=0,len_unused,nodata2;
    PetscInt       ois_max; /* max no of is[] in each of processor */
    char           *t_p;
    MPI_Comm       comm;
    MPI_Request    *s_waits1,*s_waits2,r_req;
    MPI_Status     *s_status,r_status;
    PetscBT        *table;  /* mark indices of this processor's is[] */
    PetscBT        table_i;
    PetscBT        otable; /* mark indices of other processors' is[] */
    PetscInt       bs=C->rmap->bs,Bn = c->B->cmap->n,Bnbs = Bn/bs,*Bowners;
    IS             garray_local,garray_gl;

    PetscFunctionBegin;
    ierr = PetscObjectGetComm((PetscObject)C,&comm);
    CHKERRQ(ierr);
    size = c->size;
    rank = c->rank;
    Mbs  = c->Mbs;

    ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);
    CHKERRQ(ierr);
    ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);
    CHKERRQ(ierr);

    /* create tables used in
       step 1: table[i] - mark c->garray of proc [i]
       step 3: table[i] - mark indices of is[i] when whose=MINE
               table[0] - mark incideces of is[] when whose=OTHER */
    len  = PetscMax(is_max, size);
    CHKERRQ(ierr);
    ierr = PetscMalloc2(len,&table,(Mbs/PETSC_BITS_PER_BYTE+1)*len,&t_p);
    CHKERRQ(ierr);
    for (i=0; i<len; i++) {
        table[i] = t_p  + (Mbs/PETSC_BITS_PER_BYTE+1)*i;
    }

    ierr = MPIU_Allreduce(&is_max,&ois_max,1,MPIU_INT,MPI_MAX,comm);
    CHKERRQ(ierr);

    /* 1. Send this processor's is[] to other processors */
    /*---------------------------------------------------*/
    /* allocate spaces */
    ierr = PetscMalloc1(is_max,&n);
    CHKERRQ(ierr);
    len  = 0;
    for (i=0; i<is_max; i++) {
        ierr = ISGetLocalSize(is[i],&n[i]);
        CHKERRQ(ierr);
        len += n[i];
    }
    if (!len) {
        is_max = 0;
    } else {
        len += 1 + is_max; /* max length of data1 for one processor */
    }


    ierr = PetscMalloc1(size*len+1,&data1);
    CHKERRQ(ierr);
    ierr = PetscMalloc1(size,&data1_start);
    CHKERRQ(ierr);
    for (i=0; i<size; i++) data1_start[i] = data1 + i*len;

    ierr = PetscMalloc4(size,&len_s,size,&btable,size,&iwork,size+1,&Bowners);
    CHKERRQ(ierr);

    /* gather c->garray from all processors */
    ierr = ISCreateGeneral(comm,Bnbs,c->garray,PETSC_COPY_VALUES,&garray_local);
    CHKERRQ(ierr);
    ierr = ISAllGather(garray_local, &garray_gl);
    CHKERRQ(ierr);
    ierr = ISDestroy(&garray_local);
    CHKERRQ(ierr);
    ierr = MPI_Allgather(&Bnbs,1,MPIU_INT,Bowners+1,1,MPIU_INT,comm);
    CHKERRQ(ierr);

    Bowners[0] = 0;
    for (i=0; i<size; i++) Bowners[i+1] += Bowners[i];

    if (is_max) {
        /* hash table ctable which maps c->row to proc_id) */
        ierr = PetscMalloc1(Mbs,&ctable);
        CHKERRQ(ierr);
        for (proc_id=0,j=0; proc_id<size; proc_id++) {
            for (; j<C->rmap->range[proc_id+1]/bs; j++) ctable[j] = proc_id;
        }

        /* hash tables marking c->garray */
        ierr = ISGetIndices(garray_gl,&idx_i);
        CHKERRQ(ierr);
        for (i=0; i<size; i++) {
            table_i = table[i];
            ierr    = PetscBTMemzero(Mbs,table_i);
            CHKERRQ(ierr);
            for (j = Bowners[i]; j<Bowners[i+1]; j++) { /* go through B cols of proc[i]*/
                ierr = PetscBTSet(table_i,idx_i[j]);
                CHKERRQ(ierr);
            }
        }
        ierr = ISRestoreIndices(garray_gl,&idx_i);
        CHKERRQ(ierr);
    }  /* if (is_max) */
    ierr = ISDestroy(&garray_gl);
    CHKERRQ(ierr);

    /* evaluate communication - mesg to who, length, and buffer space */
    for (i=0; i<size; i++) len_s[i] = 0;

    /* header of data1 */
    for (proc_id=0; proc_id<size; proc_id++) {
        iwork[proc_id]        = 0;
        *data1_start[proc_id] = is_max;
        data1_start[proc_id]++;
        for (j=0; j<is_max; j++) {
            if (proc_id == rank) {
                *data1_start[proc_id] = n[j];
            } else {
                *data1_start[proc_id] = 0;
            }
            data1_start[proc_id]++;
        }
    }

    for (i=0; i<is_max; i++) {
        ierr = ISGetIndices(is[i],&idx_i);
        CHKERRQ(ierr);
        for (j=0; j<n[i]; j++) {
            idx                = idx_i[j];
            *data1_start[rank] = idx;
            data1_start[rank]++; /* for local proccessing */
            proc_end           = ctable[idx];
            for (proc_id=0; proc_id<=proc_end; proc_id++) {  /* for others to process */
                if (proc_id == rank) continue; /* done before this loop */
                if (proc_id < proc_end && !PetscBTLookup(table[proc_id],idx)) continue; /* no need for sending idx to [proc_id] */
                *data1_start[proc_id] = idx;
                data1_start[proc_id]++;
                len_s[proc_id]++;
            }
        }
        /* update header data */
        for (proc_id=0; proc_id<size; proc_id++) {
            if (proc_id== rank) continue;
            *(data1 + proc_id*len + 1 + i) = len_s[proc_id] - iwork[proc_id];
            iwork[proc_id]                 = len_s[proc_id];
        }
        ierr = ISRestoreIndices(is[i],&idx_i);
        CHKERRQ(ierr);
    }

    nrqs = 0;
    nrqr = 0;
    for (i=0; i<size; i++) {
        data1_start[i] = data1 + i*len;
        if (len_s[i]) {
            nrqs++;
            len_s[i] += 1 + is_max; /* add no. of header msg */
        }
    }

    for (i=0; i<is_max; i++) {
        ierr = ISDestroy(&is[i]);
        CHKERRQ(ierr);
    }
    ierr = PetscFree(n);
    CHKERRQ(ierr);
    ierr = PetscFree(ctable);
    CHKERRQ(ierr);

    /* Determine the number of messages to expect, their lengths, from from-ids */
    ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&nrqr);
    CHKERRQ(ierr);
    ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,len_s,&id_r1,&len_r1);
    CHKERRQ(ierr);

    /*  Now  post the sends */
    ierr = PetscMalloc2(size,&s_waits1,size,&s_waits2);
    CHKERRQ(ierr);
    k    = 0;
    for (proc_id=0; proc_id<size; proc_id++) {  /* send data1 to processor [proc_id] */
        if (len_s[proc_id]) {
            ierr = MPI_Isend(data1_start[proc_id],len_s[proc_id],MPIU_INT,proc_id,tag1,comm,s_waits1+k);
            CHKERRQ(ierr);
            k++;
        }
    }

    /* 2. Receive other's is[] and process. Then send back */
    /*-----------------------------------------------------*/
    len = 0;
    for (i=0; i<nrqr; i++) {
        if (len_r1[i] > len) len = len_r1[i];
    }
    ierr = PetscFree(len_r1);
    CHKERRQ(ierr);
    ierr = PetscFree(id_r1);
    CHKERRQ(ierr);

    for (proc_id=0; proc_id<size; proc_id++) len_s[proc_id] = iwork[proc_id] = 0;

    ierr = PetscMalloc1(len+1,&odata1);
    CHKERRQ(ierr);
    ierr = PetscMalloc1(size,&odata2_ptr);
    CHKERRQ(ierr);
    ierr = PetscBTCreate(Mbs,&otable);
    CHKERRQ(ierr);

    len_max = ois_max*(Mbs+1); /* max space storing all is[] for each receive */
    len_est = 2*len_max;       /* estimated space of storing is[] for all receiving messages */
    ierr    = PetscMalloc1(len_est+1,&odata2);
    CHKERRQ(ierr);
    nodata2 = 0;               /* nodata2+1: num of PetscMalloc(,&odata2_ptr[]) called */

    odata2_ptr[nodata2] = odata2;

    len_unused = len_est; /* unused space in the array odata2_ptr[nodata2]-- needs to be >= len_max  */

    k = 0;
    while (k < nrqr) {
        /* Receive messages */
        ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status);
        CHKERRQ(ierr);
        if (flag) {
            ierr    = MPI_Get_count(&r_status,MPIU_INT,&len);
            CHKERRQ(ierr);
            proc_id = r_status.MPI_SOURCE;
            ierr    = MPI_Irecv(odata1,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req);
            CHKERRQ(ierr);
            ierr    = MPI_Wait(&r_req,&r_status);
            CHKERRQ(ierr);

            /*  Process messages */
            /*  make sure there is enough unused space in odata2 array */
            if (len_unused < len_max) { /* allocate more space for odata2 */
                ierr = PetscMalloc1(len_est+1,&odata2);
                CHKERRQ(ierr);

                odata2_ptr[++nodata2] = odata2;

                len_unused = len_est;
            }

            ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,odata1,OTHER,odata2,&otable);
            CHKERRQ(ierr);
            len  = 1 + odata2[0];
            for (i=0; i<odata2[0]; i++) len += odata2[1 + i];

            /* Send messages back */
            ierr = MPI_Isend(odata2,len,MPIU_INT,proc_id,tag2,comm,s_waits2+k);
            CHKERRQ(ierr);
            k++;
            odata2        += len;
            len_unused    -= len;
            len_s[proc_id] = len; /* num of messages sending back to [proc_id] by this proc */
        }
    }
    ierr = PetscFree(odata1);
    CHKERRQ(ierr);
    ierr = PetscBTDestroy(&otable);
    CHKERRQ(ierr);

    /* 3. Do local work on this processor's is[] */
    /*-------------------------------------------*/
    /* make sure there is enough unused space in odata2(=data) array */
    len_max = is_max*(Mbs+1); /* max space storing all is[] for this processor */
    if (len_unused < len_max) { /* allocate more space for odata2 */
        ierr = PetscMalloc1(len_est+1,&odata2);
        CHKERRQ(ierr);

        odata2_ptr[++nodata2] = odata2;
    }

    data = odata2;
    ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,data1_start[rank],MINE,data,table);
    CHKERRQ(ierr);
    ierr = PetscFree(data1_start);
    CHKERRQ(ierr);

    /* 4. Receive work done on other processors, then merge */
    /*------------------------------------------------------*/
    /* get max number of messages that this processor expects to recv */
    ierr = MPIU_Allreduce(len_s,iwork,size,MPI_INT,MPI_MAX,comm);
    CHKERRQ(ierr);
    ierr = PetscMalloc1(iwork[rank]+1,&data2);
    CHKERRQ(ierr);
    ierr = PetscFree4(len_s,btable,iwork,Bowners);
    CHKERRQ(ierr);

    k = 0;
    while (k < nrqs) {
        /* Receive messages */
        ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag2,comm,&flag,&r_status);
        CHKERRQ(ierr);
        if (flag) {
            ierr = MPI_Get_count(&r_status,MPIU_INT,&len);
            CHKERRQ(ierr);

            proc_id = r_status.MPI_SOURCE;

            ierr = MPI_Irecv(data2,len,MPIU_INT,proc_id,r_status.MPI_TAG,comm,&r_req);
            CHKERRQ(ierr);
            ierr = MPI_Wait(&r_req,&r_status);
            CHKERRQ(ierr);
            if (len > 1+is_max) { /* Add data2 into data */
                data2_i = data2 + 1 + is_max;
                for (i=0; i<is_max; i++) {
                    table_i = table[i];
                    data_i  = data + 1 + is_max + Mbs*i;
                    isz     = data[1+i];
                    for (j=0; j<data2[1+i]; j++) {
                        col = data2_i[j];
                        if (!PetscBTLookupSet(table_i,col)) data_i[isz++] = col;
                    }
                    data[1+i] = isz;
                    if (i < is_max - 1) data2_i += data2[1+i];
                }
            }
            k++;
        }
    }
    ierr = PetscFree(data2);
    CHKERRQ(ierr);
    ierr = PetscFree2(table,t_p);
    CHKERRQ(ierr);

    /* phase 1 sends are complete */
    ierr = PetscMalloc1(size,&s_status);
    CHKERRQ(ierr);
    if (nrqs) {
        ierr = MPI_Waitall(nrqs,s_waits1,s_status);
        CHKERRQ(ierr);
    }
    ierr = PetscFree(data1);
    CHKERRQ(ierr);

    /* phase 2 sends are complete */
    if (nrqr) {
        ierr = MPI_Waitall(nrqr,s_waits2,s_status);
        CHKERRQ(ierr);
    }
    ierr = PetscFree2(s_waits1,s_waits2);
    CHKERRQ(ierr);
    ierr = PetscFree(s_status);
    CHKERRQ(ierr);

    /* 5. Create new is[] */
    /*--------------------*/
    for (i=0; i<is_max; i++) {
        data_i = data + 1 + is_max + Mbs*i;
        ierr   = ISCreateGeneral(PETSC_COMM_SELF,data[1+i],data_i,PETSC_COPY_VALUES,is+i);
        CHKERRQ(ierr);
    }
    for (k=0; k<=nodata2; k++) {
        ierr = PetscFree(odata2_ptr[k]);
        CHKERRQ(ierr);
    }
    ierr = PetscFree(odata2_ptr);
    CHKERRQ(ierr);
    PetscFunctionReturn(0);
}
Beispiel #7
0
PetscErrorCode NEPSolve_RII(NEP nep)
{
  PetscErrorCode     ierr;
  Mat                T=nep->function,Tp=nep->jacobian,Tsigma;
  Vec                u,r=nep->work[0],delta=nep->work[1];
  PetscScalar        lambda,a1,a2;
  PetscReal          relerr;
  PetscBool          hascopy;
  KSPConvergedReason kspreason;

  PetscFunctionBegin;
  /* get initial approximation of eigenvalue and eigenvector */
  ierr = NEPGetDefaultShift(nep,&lambda);CHKERRQ(ierr);
  if (!nep->nini) {
    ierr = BVSetRandomColumn(nep->V,0,nep->rand);CHKERRQ(ierr);
  }
  ierr = BVGetColumn(nep->V,0,&u);CHKERRQ(ierr);

  /* correct eigenvalue approximation: lambda = lambda - (u'*T*u)/(u'*Tp*u) */
  ierr = NEPComputeFunction(nep,lambda,T,T);CHKERRQ(ierr);
  ierr = MatMult(T,u,r);CHKERRQ(ierr);
  ierr = VecDot(u,r,&a1);CHKERRQ(ierr);
  ierr = NEPApplyJacobian(nep,lambda,u,delta,r,Tp);CHKERRQ(ierr);
  ierr = VecDot(u,r,&a2);CHKERRQ(ierr);
  lambda = lambda - a1/a2;

  /* prepare linear solver */
  ierr = MatDuplicate(T,MAT_COPY_VALUES,&Tsigma);CHKERRQ(ierr);
  ierr = KSPSetOperators(nep->ksp,Tsigma,Tsigma);CHKERRQ(ierr);

  /* Restart loop */
  while (nep->reason == NEP_CONVERGED_ITERATING) {
    nep->its++;

    /* update preconditioner and set adaptive tolerance */
    if (nep->lag && !(nep->its%nep->lag) && nep->its>2*nep->lag && relerr<1e-2) {
      ierr = MatHasOperation(T,MATOP_COPY,&hascopy);CHKERRQ(ierr);
      if (hascopy) {
        ierr = MatCopy(T,Tsigma,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
      } else {
        ierr = MatDestroy(&Tsigma);CHKERRQ(ierr);
        ierr = MatDuplicate(T,MAT_COPY_VALUES,&Tsigma);CHKERRQ(ierr);
      }
      ierr = KSPSetOperators(nep->ksp,Tsigma,Tsigma);CHKERRQ(ierr);
    }
    if (!nep->cctol) {
      nep->ktol = PetscMax(nep->ktol/2.0,PETSC_MACHINE_EPSILON*10.0);
      ierr = KSPSetTolerances(nep->ksp,nep->ktol,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);CHKERRQ(ierr);
    }

    /* form residual,  r = T(lambda)*u */
    ierr = NEPApplyFunction(nep,lambda,u,delta,r,T,T);CHKERRQ(ierr);

    /* convergence test */
    ierr = VecNorm(r,NORM_2,&relerr);CHKERRQ(ierr);
    nep->errest[nep->nconv] = relerr;
    nep->eigr[nep->nconv] = lambda;
    if (relerr<=nep->rtol) {
      nep->nconv = nep->nconv + 1;
      nep->reason = NEP_CONVERGED_FNORM_RELATIVE;
    }
    ierr = NEPMonitor(nep,nep->its,nep->nconv,nep->eigr,nep->errest,1);CHKERRQ(ierr);

    if (!nep->nconv) {
      /* eigenvector correction: delta = T(sigma)\r */
      ierr = NEP_KSPSolve(nep,r,delta);CHKERRQ(ierr);
      ierr = KSPGetConvergedReason(nep->ksp,&kspreason);CHKERRQ(ierr);
      if (kspreason<0) {
        ierr = PetscInfo1(nep,"iter=%D, linear solve failed, stopping solve\n",nep->its);CHKERRQ(ierr);
        nep->reason = NEP_DIVERGED_LINEAR_SOLVE;
        break;
      }

      /* update eigenvector: u = u - delta */
      ierr = VecAXPY(u,-1.0,delta);CHKERRQ(ierr);

      /* normalize eigenvector */
      ierr = VecNormalize(u,NULL);CHKERRQ(ierr);

      /* correct eigenvalue: lambda = lambda - (u'*T*u)/(u'*Tp*u) */
      ierr = NEPApplyFunction(nep,lambda,u,delta,r,T,T);CHKERRQ(ierr);
      ierr = VecDot(u,r,&a1);CHKERRQ(ierr);
      ierr = NEPApplyJacobian(nep,lambda,u,delta,r,Tp);CHKERRQ(ierr);
      ierr = VecDot(u,r,&a2);CHKERRQ(ierr);
      lambda = lambda - a1/a2;
    }
    if (nep->its >= nep->max_it) nep->reason = NEP_DIVERGED_MAX_IT;
  }
  ierr = MatDestroy(&Tsigma);CHKERRQ(ierr);
  ierr = BVRestoreColumn(nep->V,0,&u);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
/*@
  MatCreateSubMatrixBanded - Extract the banded subset B of A such that ||Vec(B)||_1 >= frac ||Vec(A)||_1

  Input Parameters:
+ A   - The matrix
. kmax - The maximum half-bandwidth, so 2k+1 diagonals may be extracted
- frac - The norm fraction for the extracted band

  Output Parameters:
. B - The banded submatrix

  Level: intermediate

.seealso: MatChop()
 @*/
PetscErrorCode MatCreateSubMatrixBanded(Mat A, PetscInt kmax, PetscReal frac, Mat *B)
{
  Vec            weight;
  PetscScalar   *w, *newVals;
  PetscReal      normA = 0.0, normB = 0.0;
  PetscInt       rStart, rEnd, r;
  PetscInt      *dnnz, *onnz, *newCols;
  PetscInt       m, n, M, N, k, maxcols = 0;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  /* Create weight vector */
  ierr = MatGetVecs(A, NULL, &weight);CHKERRQ(ierr);
  ierr = VecSet(weight, 0.0);CHKERRQ(ierr);
  ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr);
  ierr = VecGetArray(weight, &w);CHKERRQ(ierr);
  for (r = rStart; r < rEnd; ++r) {
    const PetscScalar *vals;
    const PetscInt    *cols;
    PetscInt           ncols, c;

    ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
    for (c = 0; c < ncols; ++c) {
      w[abs(r - cols[c])] += PetscAbsScalar(vals[c]);
      normA += PetscAbsScalar(vals[c]);
    }
    ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
  }
  ierr = VecRestoreArray(weight, &w);CHKERRQ(ierr);
  /* Determine bandwidth */
  ierr = PetscPrintf(PETSC_COMM_WORLD, "||Vec(A)||_1: %g\n", normA);CHKERRQ(ierr);
  ierr = VecGetArray(weight, &w);CHKERRQ(ierr);
  for (k = 0; k < kmax; ++k) {
    normB += w[k];
    if (normB >= frac*normA) break;
  }
  ierr = VecRestoreArray(weight, &w);CHKERRQ(ierr);
  ierr = VecDestroy(&weight);CHKERRQ(ierr);
  ierr = PetscPrintf(PETSC_COMM_WORLD, "Bandwidth of %d%% band: %d frac: %g\n", (PetscInt) (frac*100), k, normB/normA);CHKERRQ(ierr);
  /* Extract band */
  ierr = MatCreate(PetscObjectComm((PetscObject) A), B);CHKERRQ(ierr);
  ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
  ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
  ierr = MatSetSizes(*B, m, n, M, N);CHKERRQ(ierr);
  ierr = PetscMalloc2(m,PetscInt,&dnnz,m,PetscInt,&onnz);CHKERRQ(ierr);
  for (r = rStart; r < rEnd; ++r) {
    const PetscScalar *vals;
    const PetscInt    *cols;
    PetscInt           ncols, c;

    dnnz[r-rStart] = onnz[r-rStart] = 0;
    ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
    for (c = 0; c < ncols; ++c) {
      if (abs(cols[c] - r) > k) continue;
      if ((cols[c] >= rStart) && (cols[c] < rEnd)) ++dnnz[r-rStart];
      else                                         ++onnz[r-rStart];
    }
    maxcols = PetscMax(ncols, maxcols);
    ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
  }
  ierr = MatSetFromOptions(*B);CHKERRQ(ierr);
  ierr = MatXAIJSetPreallocation(*B, 1, dnnz, onnz, NULL, NULL);CHKERRQ(ierr);
  ierr = MatSetUp(*B);CHKERRQ(ierr);
  ierr = PetscMalloc2(maxcols,PetscInt,&newCols,maxcols,PetscScalar,&newVals);CHKERRQ(ierr);
  for (r = rStart; r < rEnd; ++r) {
    const PetscScalar *vals;
    const PetscInt    *cols;
    PetscInt           ncols, newcols, c;

    ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
    for (c = 0, newcols = 0; c < ncols; ++c) {
      if (abs(cols[c] - r) > k) continue;
      newCols[newcols] = cols[c];
      newVals[newcols] = vals[c];
      ++newcols;
      if (newcols > maxcols) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Overran work space");
    }
    ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
    ierr = MatSetValues(*B, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr);
  }
  ierr = PetscFree2(newCols, newVals);CHKERRQ(ierr);
  ierr = PetscFree2(dnnz, onnz);CHKERRQ(ierr);
  ierr = MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
/*@
  MatLaplacian - Form the matrix Laplacian, with all values in the matrix less than the tolerance set to zero

  Input Parameters:
+ A   - The matrix
- tol - The zero tolerance

  Output Parameters:
. L - The graph Laplacian matrix

  Level: intermediate

.seealso: MatChop()
 @*/
PetscErrorCode MatLaplacian(Mat A, PetscReal tol, Mat *L)
{
  PetscScalar   *newVals;
  PetscInt      *newCols;
  PetscInt       rStart, rEnd, r, colMax = 0;
  PetscInt      *dnnz, *onnz;
  PetscInt       m, n, M, N;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  ierr = MatCreate(PetscObjectComm((PetscObject) A), L);CHKERRQ(ierr);
  ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
  ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
  ierr = MatSetSizes(*L, m, n, M, N);CHKERRQ(ierr);
  ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr);
  ierr = PetscMalloc2(m,PetscInt,&dnnz,m,PetscInt,&onnz);CHKERRQ(ierr);
  for (r = rStart; r < rEnd; ++r) {
    const PetscScalar *vals;
    const PetscInt    *cols;
    PetscInt           ncols, newcols, c;
    PetscBool          hasdiag = PETSC_FALSE;

    dnnz[r-rStart] = onnz[r-rStart] = 0;
    ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
    for (c = 0, newcols = 0; c < ncols; ++c) {
      if (cols[c] == r) {
        ++newcols;
        hasdiag = PETSC_TRUE;
        ++dnnz[r-rStart];
      } else if (PetscAbsScalar(vals[c]) >= tol) {
        if ((cols[c] >= rStart) && (cols[c] < rEnd)) ++dnnz[r-rStart];
        else                                         ++onnz[r-rStart];
        ++newcols;
      }
    }
    if (!hasdiag) {++newcols; ++dnnz[r-rStart];}
    colMax = PetscMax(colMax, newcols);CHKERRQ(ierr);
    ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
  }
  ierr = MatSetFromOptions(*L);CHKERRQ(ierr);
  ierr = MatXAIJSetPreallocation(*L, 1, dnnz, onnz, NULL, NULL);CHKERRQ(ierr);
  ierr = MatSetUp(*L);CHKERRQ(ierr);
  ierr = PetscMalloc2(colMax,PetscInt,&newCols,colMax,PetscScalar,&newVals);CHKERRQ(ierr);
  for (r = rStart; r < rEnd; ++r) {
    const PetscScalar *vals;
    const PetscInt    *cols;
    PetscInt           ncols, newcols, c;
    PetscBool          hasdiag = PETSC_FALSE;

    ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
    for (c = 0, newcols = 0; c < ncols; ++c) {
      if (cols[c] == r) {
        newCols[newcols] = cols[c];
        newVals[newcols] = dnnz[r-rStart]+onnz[r-rStart]-1;
        ++newcols;
        hasdiag = PETSC_TRUE;
      } else if (PetscAbsScalar(vals[c]) >= tol) {
        newCols[newcols] = cols[c];
        newVals[newcols] = -1.0;
        ++newcols;
      }
      if (newcols > colMax) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Overran work space");
    }
    if (!hasdiag) {
      newCols[newcols] = r;
      newVals[newcols] = dnnz[r-rStart]+onnz[r-rStart]-1;
      ++newcols;
    }
    ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr);
    ierr = MatSetValues(*L, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr);
  }
  ierr = PetscFree2(dnnz,onnz);CHKERRQ(ierr);
  ierr = MatAssemblyBegin(*L, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(*L, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
PetscErrorCode SVDSolve_TRLanczos(SVD svd)
{
  PetscErrorCode ierr;
  SVD_TRLANCZOS  *lanczos = (SVD_TRLANCZOS*)svd->data;
  PetscReal      *alpha,*beta,lastbeta,norm;
  PetscScalar    *Q,*swork=NULL,*w;
  PetscInt       i,k,l,nv,ld;
  Mat            U,VT;
  PetscBool      conv;
  BVOrthogType   orthog;

  PetscFunctionBegin;
  /* allocate working space */
  ierr = DSGetLeadingDimension(svd->ds,&ld);CHKERRQ(ierr);
  ierr = BVGetOrthogonalization(svd->V,&orthog,NULL,NULL);CHKERRQ(ierr);
  ierr = PetscMalloc1(ld,&w);CHKERRQ(ierr);
  if (lanczos->oneside && orthog == BV_ORTHOG_CGS) {
    ierr = PetscMalloc1(svd->ncv+1,&swork);CHKERRQ(ierr);
  }

  /* normalize start vector */
  if (!svd->nini) {
    ierr = BVSetRandomColumn(svd->V,0,svd->rand);CHKERRQ(ierr);
    ierr = BVNormColumn(svd->V,0,NORM_2,&norm);CHKERRQ(ierr);
    ierr = BVScaleColumn(svd->V,0,1.0/norm);CHKERRQ(ierr);
  }

  l = 0;
  while (svd->reason == SVD_CONVERGED_ITERATING) {
    svd->its++;

    /* inner loop */
    nv = PetscMin(svd->nconv+svd->mpd,svd->ncv);
    ierr = BVSetActiveColumns(svd->V,svd->nconv,nv);CHKERRQ(ierr);
    ierr = BVSetActiveColumns(svd->U,svd->nconv,nv);CHKERRQ(ierr);
    ierr = DSGetArrayReal(svd->ds,DS_MAT_T,&alpha);CHKERRQ(ierr);
    beta = alpha + ld;
    if (lanczos->oneside) {
      if (orthog == BV_ORTHOG_MGS) {
        ierr = SVDOneSideTRLanczosMGS(svd,alpha,beta,svd->V,svd->U,svd->nconv,l,nv);CHKERRQ(ierr);
      } else {
        ierr = SVDOneSideTRLanczosCGS(svd,alpha,beta,svd->V,svd->U,svd->nconv,l,nv,swork);CHKERRQ(ierr);
      }
    } else {
      ierr = SVDTwoSideLanczos(svd,alpha,beta,svd->V,svd->U,svd->nconv+l,nv);CHKERRQ(ierr);
    }
    lastbeta = beta[nv-1];
    ierr = DSRestoreArrayReal(svd->ds,DS_MAT_T,&alpha);CHKERRQ(ierr);
    ierr = BVScaleColumn(svd->V,nv,1.0/lastbeta);CHKERRQ(ierr);

    /* compute SVD of general matrix */
    ierr = DSSetDimensions(svd->ds,nv,nv,svd->nconv,svd->nconv+l);CHKERRQ(ierr);
    if (l==0) {
      ierr = DSSetState(svd->ds,DS_STATE_INTERMEDIATE);CHKERRQ(ierr);
    } else {
      ierr = DSSetState(svd->ds,DS_STATE_RAW);CHKERRQ(ierr);
    }
    ierr = DSSolve(svd->ds,w,NULL);CHKERRQ(ierr);
    ierr = DSSort(svd->ds,w,NULL,NULL,NULL,NULL);CHKERRQ(ierr);

    /* compute error estimates */
    k = 0;
    conv = PETSC_TRUE;
    ierr = DSGetArray(svd->ds,DS_MAT_U,&Q);CHKERRQ(ierr);
    ierr = DSGetArrayReal(svd->ds,DS_MAT_T,&alpha);CHKERRQ(ierr);
    beta = alpha + ld;
    for (i=svd->nconv;i<nv;i++) {
      svd->sigma[i] = PetscRealPart(w[i]);
      beta[i] = PetscRealPart(Q[nv-1+i*ld])*lastbeta;
      svd->errest[i] = PetscAbsScalar(beta[i]);
      if (svd->sigma[i] > svd->tol) svd->errest[i] /= svd->sigma[i];
      if (conv) {
        if (svd->errest[i] < svd->tol) k++;
        else conv = PETSC_FALSE;
      }
    }
    ierr = DSRestoreArrayReal(svd->ds,DS_MAT_T,&alpha);CHKERRQ(ierr);
    ierr = DSRestoreArray(svd->ds,DS_MAT_U,&Q);CHKERRQ(ierr);

    /* check convergence and update l */
    if (svd->its >= svd->max_it) svd->reason = SVD_DIVERGED_ITS;
    if (svd->nconv+k >= svd->nsv) svd->reason = SVD_CONVERGED_TOL;
    if (svd->reason != SVD_CONVERGED_ITERATING) l = 0;
    else l = PetscMax((nv-svd->nconv-k)/2,0);

    /* compute converged singular vectors and restart vectors */
    ierr = DSGetMat(svd->ds,DS_MAT_VT,&VT);CHKERRQ(ierr);
    ierr = BVMultInPlaceTranspose(svd->V,VT,svd->nconv,svd->nconv+k+l);CHKERRQ(ierr);
    ierr = MatDestroy(&VT);CHKERRQ(ierr);
    ierr = DSGetMat(svd->ds,DS_MAT_U,&U);CHKERRQ(ierr);
    ierr = BVMultInPlace(svd->U,U,svd->nconv,svd->nconv+k+l);CHKERRQ(ierr);
    ierr = MatDestroy(&U);CHKERRQ(ierr);

    /* copy the last vector to be the next initial vector */
    if (svd->reason == SVD_CONVERGED_ITERATING) {
      ierr = BVCopyColumn(svd->V,nv,svd->nconv+k+l);CHKERRQ(ierr);
    }

    svd->nconv += k;
    ierr = SVDMonitor(svd,svd->its,svd->nconv,svd->sigma,svd->errest,nv);CHKERRQ(ierr);
  }

  /* orthonormalize U columns in one side method */
  if (lanczos->oneside) {
    for (i=0;i<svd->nconv;i++) {
      ierr = BVOrthogonalizeColumn(svd->U,i,NULL,&norm,NULL);CHKERRQ(ierr);
      ierr = BVScaleColumn(svd->U,i,1.0/norm);CHKERRQ(ierr);
    }
  }

  /* free working space */
  ierr = PetscFree(w);CHKERRQ(ierr);
  if (swork) { ierr = PetscFree(swork);CHKERRQ(ierr); }
  PetscFunctionReturn(0);
}
Beispiel #11
0
/*
  FormFunctionLocal - Form the local residual F from the local input X

  Input Parameters:
+ dm - The mesh
. X  - Local input vector
- user - The user context

  Output Parameter:
. F  - Local output vector

  Note:
  We form the residual one batch of elements at a time. This allows us to offload work onto an accelerator,
  like a GPU, or vectorize on a multicore machine.

.seealso: FormJacobianLocal()
*/
PetscErrorCode FormFunctionLocal(DM dm, Vec X, Vec F, AppCtx *user)
{
  const PetscInt debug = user->debug;
  const PetscInt dim   = user->dim;
  PetscReal      *coords, *v0, *J, *invJ, *detJ;
  PetscScalar    *elemVec, *u;
  PetscInt       cellDof = 0;
  PetscInt       maxQuad = 0;
  PetscInt       jacSize = dim*dim;
  PetscInt       numCells, cStart, cEnd, c, field, d;
  PetscErrorCode ierr;

  PetscFunctionBeginUser;
  ierr = PetscLogEventBegin(user->residualEvent,0,0,0,0);CHKERRQ(ierr);
  ierr = VecSet(F, 0.0);CHKERRQ(ierr);
  ierr = DMDAGetHeightStratum(dm, 0, &cStart, &cEnd);CHKERRQ(ierr);

  numCells = cEnd - cStart;
  for (field = 0; field < numFields; ++field) {
    cellDof += user->q[field].numBasisFuncs*user->q[field].numComponents;
    maxQuad  = PetscMax(maxQuad, user->q[field].numQuadPoints);
  }
  for (d = 0; d < dim; ++d) jacSize *= maxQuad;
  ierr = PetscMalloc3(dim,&coords,dim,&v0,jacSize,&J);CHKERRQ(ierr);
  ierr = PetscMalloc4(numCells*cellDof,&u,numCells*jacSize,&invJ,numCells*maxQuad,&detJ,numCells*cellDof,&elemVec);CHKERRQ(ierr);
  for (c = cStart; c < cEnd; ++c) {
    PetscScalar *x = NULL;
    PetscInt     i;

    ierr = DMDAComputeCellGeometry(dm, c, &user->q[0], v0, J, &invJ[c*jacSize], &detJ[c]);CHKERRQ(ierr);
    if (detJ[c] <= 0.0) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Invalid determinant %g for element %d", detJ[c], c);
    ierr = DMDAVecGetClosure(dm, NULL, X, c, &x);CHKERRQ(ierr);

    for (i = 0; i < cellDof; ++i) u[c*cellDof+i] = x[i];
  }
  for (field = 0; field < numFields; ++field) {
    const PetscInt numQuadPoints = user->q[field].numQuadPoints;
    const PetscInt numBasisFuncs = user->q[field].numBasisFuncs;
    void           (*f0)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f0[]) = user->f0Funcs[field];
    void           (*f1)(PetscScalar u[], const PetscScalar gradU[], PetscScalar f1[]) = user->f1Funcs[field];
    /* Conforming batches */
    PetscInt blockSize  = numBasisFuncs*numQuadPoints;
    PetscInt numBlocks  = 1;
    PetscInt batchSize  = numBlocks * blockSize;
    PetscInt numBatches = user->numBatches;
    PetscInt numChunks  = numCells / (numBatches*batchSize);
    ierr = IntegrateResidualBatchCPU(numChunks*numBatches*batchSize, numFields, field, u, invJ, detJ, user->q, f0, f1, elemVec, user);CHKERRQ(ierr);
    /* Remainder */
    PetscInt numRemainder = numCells % (numBatches * batchSize);
    PetscInt offset       = numCells - numRemainder;
    ierr = IntegrateResidualBatchCPU(numRemainder, numFields, field, &u[offset*cellDof], &invJ[offset*dim*dim], &detJ[offset],
                                     user->q, f0, f1, &elemVec[offset*cellDof], user);CHKERRQ(ierr);
  }
  for (c = cStart; c < cEnd; ++c) {
    if (debug) {ierr = DMPrintCellVector(c, "Residual", cellDof, &elemVec[c*cellDof]);CHKERRQ(ierr);}
    ierr = DMDAVecSetClosure(dm, NULL, F, c, &elemVec[c*cellDof], ADD_VALUES);CHKERRQ(ierr);
  }
  ierr = PetscFree4(u,invJ,detJ,elemVec);CHKERRQ(ierr);
  ierr = PetscFree3(coords,v0,J);CHKERRQ(ierr);
  if (user->showResidual) {
    PetscInt p;

    ierr = PetscPrintf(PETSC_COMM_WORLD, "Residual:\n");CHKERRQ(ierr);
    for (p = 0; p < user->numProcs; ++p) {
      if (p == user->rank) {
        Vec f;

        ierr = VecDuplicate(F, &f);CHKERRQ(ierr);
        ierr = VecCopy(F, f);CHKERRQ(ierr);
        ierr = VecChop(f, 1.0e-10);CHKERRQ(ierr);
        ierr = VecView(f, PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
        ierr = VecDestroy(&f);CHKERRQ(ierr);
      }
      ierr = PetscBarrier((PetscObject) dm);CHKERRQ(ierr);
    }
  }
  ierr = PetscLogEventEnd(user->residualEvent,0,0,0,0);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #12
0
/*
  DMPlexGetRawFaces_Internal - Gets groups of vertices that correspond to faces for the given cone
*/
PetscErrorCode DMPlexGetRawFaces_Internal(DM dm, PetscInt dim, PetscInt coneSize, const PetscInt cone[], PetscInt *numFaces, PetscInt *faceSize, const PetscInt *faces[])
{
  PetscInt       *facesTmp;
  PetscInt        maxConeSize, maxSupportSize;
  PetscErrorCode  ierr;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
  ierr = DMPlexGetMaxSizes(dm, &maxConeSize, &maxSupportSize);CHKERRQ(ierr);
  if (faces) {ierr = DMGetWorkArray(dm, PetscSqr(PetscMax(maxConeSize, maxSupportSize)), PETSC_INT, &facesTmp);CHKERRQ(ierr);}
  switch (dim) {
  case 1:
    switch (coneSize) {
    case 2:
      if (faces) {
        facesTmp[0] = cone[0]; facesTmp[1] = cone[1];
        *faces = facesTmp;
      }
      if (numFaces) *numFaces         = 2;
      if (faceSize) *faceSize         = 1;
      break;
    default:
      SETERRQ2(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Cone size %D not supported for dimension %D", coneSize, dim);
    }
    break;
  case 2:
    switch (coneSize) {
    case 3:
      if (faces) {
        facesTmp[0] = cone[0]; facesTmp[1] = cone[1];
        facesTmp[2] = cone[1]; facesTmp[3] = cone[2];
        facesTmp[4] = cone[2]; facesTmp[5] = cone[0];
        *faces = facesTmp;
      }
      if (numFaces) *numFaces         = 3;
      if (faceSize) *faceSize         = 2;
      break;
    case 4:
      /* Vertices follow right hand rule */
      if (faces) {
        facesTmp[0] = cone[0]; facesTmp[1] = cone[1];
        facesTmp[2] = cone[1]; facesTmp[3] = cone[2];
        facesTmp[4] = cone[2]; facesTmp[5] = cone[3];
        facesTmp[6] = cone[3]; facesTmp[7] = cone[0];
        *faces = facesTmp;
      }
      if (numFaces) *numFaces         = 4;
      if (faceSize) *faceSize         = 2;
      break;
    default:
      SETERRQ2(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Cone size %D not supported for dimension %D", coneSize, dim);
    }
    break;
  case 3:
    switch (coneSize) {
    case 3:
      if (faces) {
        facesTmp[0] = cone[0]; facesTmp[1] = cone[1];
        facesTmp[2] = cone[1]; facesTmp[3] = cone[2];
        facesTmp[4] = cone[2]; facesTmp[5] = cone[0];
        *faces = facesTmp;
      }
      if (numFaces) *numFaces         = 3;
      if (faceSize) *faceSize         = 2;
      break;
    case 4:
      /* Vertices of first face follow right hand rule and normal points away from last vertex */
      if (faces) {
        facesTmp[0] = cone[0]; facesTmp[1]  = cone[1]; facesTmp[2]  = cone[2];
        facesTmp[3] = cone[0]; facesTmp[4]  = cone[3]; facesTmp[5]  = cone[1];
        facesTmp[6] = cone[0]; facesTmp[7]  = cone[2]; facesTmp[8]  = cone[3];
        facesTmp[9] = cone[2]; facesTmp[10] = cone[1]; facesTmp[11] = cone[3];
        *faces = facesTmp;
      }
      if (numFaces) *numFaces         = 4;
      if (faceSize) *faceSize         = 3;
      break;
    case 8:
      if (faces) {
        facesTmp[0]  = cone[0]; facesTmp[1]  = cone[1]; facesTmp[2]  = cone[2]; facesTmp[3]  = cone[3]; /* Bottom */
        facesTmp[4]  = cone[4]; facesTmp[5]  = cone[5]; facesTmp[6]  = cone[6]; facesTmp[7]  = cone[7]; /* Top */
        facesTmp[8]  = cone[0]; facesTmp[9]  = cone[3]; facesTmp[10] = cone[5]; facesTmp[11] = cone[4]; /* Front */
        facesTmp[12] = cone[2]; facesTmp[13] = cone[1]; facesTmp[14] = cone[7]; facesTmp[15] = cone[6]; /* Back */
        facesTmp[16] = cone[3]; facesTmp[17] = cone[2]; facesTmp[18] = cone[6]; facesTmp[19] = cone[5]; /* Right */
        facesTmp[20] = cone[0]; facesTmp[21] = cone[4]; facesTmp[22] = cone[7]; facesTmp[23] = cone[1]; /* Left */
        *faces = facesTmp;
      }
      if (numFaces) *numFaces         = 6;
      if (faceSize) *faceSize         = 4;
      break;
    default:
      SETERRQ2(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Cone size %D not supported for dimension %D", coneSize, dim);
    }
    break;
  default:
    SETERRQ1(PetscObjectComm((PetscObject)dm), PETSC_ERR_ARG_OUTOFRANGE, "Dimension %D not supported", dim);
  }
  PetscFunctionReturn(0);
}
Beispiel #13
0
/* This interpolates faces for cells at some stratum */
static PetscErrorCode DMPlexInterpolateFaces_Internal(DM dm, PetscInt cellDepth, DM idm)
{
  DMLabel        subpointMap;
  PetscHashIJKL  faceTable;
  PetscInt      *pStart, *pEnd;
  PetscInt       cellDim, depth, faceDepth = cellDepth, numPoints = 0, faceSizeAll = 0, face, c, d;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  ierr = DMPlexGetDimension(dm, &cellDim);CHKERRQ(ierr);
  /* HACK: I need a better way to determine face dimension, or an alternative to GetFaces() */
  ierr = DMPlexGetSubpointMap(dm, &subpointMap);CHKERRQ(ierr);
  if (subpointMap) ++cellDim;
  ierr = DMPlexGetDepth(dm, &depth);CHKERRQ(ierr);
  ++depth;
  ++cellDepth;
  cellDim -= depth - cellDepth;
  ierr = PetscMalloc2(depth+1,&pStart,depth+1,&pEnd);CHKERRQ(ierr);
  for (d = depth-1; d >= faceDepth; --d) {
    ierr = DMPlexGetDepthStratum(dm, d, &pStart[d+1], &pEnd[d+1]);CHKERRQ(ierr);
  }
  ierr = DMPlexGetDepthStratum(dm, -1, NULL, &pStart[faceDepth]);CHKERRQ(ierr);
  pEnd[faceDepth] = pStart[faceDepth];
  for (d = faceDepth-1; d >= 0; --d) {
    ierr = DMPlexGetDepthStratum(dm, d, &pStart[d], &pEnd[d]);CHKERRQ(ierr);
  }
  if (pEnd[cellDepth] > pStart[cellDepth]) {ierr = DMPlexGetFaces_Internal(dm, cellDim, pStart[cellDepth], NULL, &faceSizeAll, NULL);CHKERRQ(ierr);}
  if (faceSizeAll > 4) SETERRQ1(PetscObjectComm((PetscObject) dm), PETSC_ERR_ARG_WRONG, "Do not support interpolation of meshes with faces of %D vertices", faceSizeAll);
  ierr = PetscHashIJKLCreate(&faceTable);CHKERRQ(ierr);
  for (c = pStart[cellDepth], face = pStart[faceDepth]; c < pEnd[cellDepth]; ++c) {
    const PetscInt *cellFaces;
    PetscInt        numCellFaces, faceSize, cf;

    ierr = DMPlexGetFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr);
    if (faceSize != faceSizeAll) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent face for cell %D of size %D != %D", c, faceSize, faceSizeAll);
    for (cf = 0; cf < numCellFaces; ++cf) {
      const PetscInt   *cellFace = &cellFaces[cf*faceSize];
      PetscHashIJKLKey  key;
      PetscHashIJKLIter missing, iter;

      if (faceSize == 2) {
        key.i = PetscMin(cellFace[0], cellFace[1]);
        key.j = PetscMax(cellFace[0], cellFace[1]);
        key.k = 0;
        key.l = 0;
      } else {
        key.i = cellFace[0]; key.j = cellFace[1]; key.k = cellFace[2]; key.l = faceSize > 3 ? cellFace[3] : 0;
        ierr = PetscSortInt(faceSize, (PetscInt *) &key);
      }
      ierr = PetscHashIJKLPut(faceTable, key, &missing, &iter);CHKERRQ(ierr);
      if (missing) {ierr = PetscHashIJKLSet(faceTable, iter, face++);CHKERRQ(ierr);}
    }
    ierr = DMPlexRestoreFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr);
  }
  pEnd[faceDepth] = face;
  ierr = PetscHashIJKLDestroy(&faceTable);CHKERRQ(ierr);
  /* Count new points */
  for (d = 0; d <= depth; ++d) {
    numPoints += pEnd[d]-pStart[d];
  }
  ierr = DMPlexSetChart(idm, 0, numPoints);CHKERRQ(ierr);
  /* Set cone sizes */
  for (d = 0; d <= depth; ++d) {
    PetscInt coneSize, p;

    if (d == faceDepth) {
      for (p = pStart[d]; p < pEnd[d]; ++p) {
        /* I see no way to do this if we admit faces of different shapes */
        ierr = DMPlexSetConeSize(idm, p, faceSizeAll);CHKERRQ(ierr);
      }
    } else if (d == cellDepth) {
      for (p = pStart[d]; p < pEnd[d]; ++p) {
        /* Number of cell faces may be different from number of cell vertices*/
        ierr = DMPlexGetFaces_Internal(dm, cellDim, p, &coneSize, NULL, NULL);CHKERRQ(ierr);
        ierr = DMPlexSetConeSize(idm, p, coneSize);CHKERRQ(ierr);
      }
    } else {
      for (p = pStart[d]; p < pEnd[d]; ++p) {
        ierr = DMPlexGetConeSize(dm, p, &coneSize);CHKERRQ(ierr);
        ierr = DMPlexSetConeSize(idm, p, coneSize);CHKERRQ(ierr);
      }
    }
  }
  ierr = DMSetUp(idm);CHKERRQ(ierr);
  /* Get face cones from subsets of cell vertices */
  if (faceSizeAll > 4) SETERRQ1(PetscObjectComm((PetscObject) dm), PETSC_ERR_ARG_WRONG, "Do not support interpolation of meshes with faces of %D vertices", faceSizeAll);
  ierr = PetscHashIJKLCreate(&faceTable);CHKERRQ(ierr);
  for (d = depth; d > cellDepth; --d) {
    const PetscInt *cone;
    PetscInt        p;

    for (p = pStart[d]; p < pEnd[d]; ++p) {
      ierr = DMPlexGetCone(dm, p, &cone);CHKERRQ(ierr);
      ierr = DMPlexSetCone(idm, p, cone);CHKERRQ(ierr);
      ierr = DMPlexGetConeOrientation(dm, p, &cone);CHKERRQ(ierr);
      ierr = DMPlexSetConeOrientation(idm, p, cone);CHKERRQ(ierr);
    }
  }
  for (c = pStart[cellDepth], face = pStart[faceDepth]; c < pEnd[cellDepth]; ++c) {
    const PetscInt *cellFaces;
    PetscInt        numCellFaces, faceSize, cf;

    ierr = DMPlexGetFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr);
    if (faceSize != faceSizeAll) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Inconsistent face for cell %D of size %D != %D", c, faceSize, faceSizeAll);
    for (cf = 0; cf < numCellFaces; ++cf) {
      const PetscInt  *cellFace = &cellFaces[cf*faceSize];
      PetscHashIJKLKey key;
      PetscHashIJKLIter missing, iter;

      if (faceSize == 2) {
        key.i = PetscMin(cellFace[0], cellFace[1]);
        key.j = PetscMax(cellFace[0], cellFace[1]);
        key.k = 0;
        key.l = 0;
      } else {
        key.i = cellFace[0]; key.j = cellFace[1]; key.k = cellFace[2]; key.l = faceSize > 3 ? cellFace[3] : 0;
        ierr = PetscSortInt(faceSize, (PetscInt *) &key);
      }
      ierr = PetscHashIJKLPut(faceTable, key, &missing, &iter);CHKERRQ(ierr);
      if (missing) {
        ierr = DMPlexSetCone(idm, face, cellFace);CHKERRQ(ierr);
        ierr = PetscHashIJKLSet(faceTable, iter, face);CHKERRQ(ierr);
        ierr = DMPlexInsertCone(idm, c, cf, face++);CHKERRQ(ierr);
      } else {
        const PetscInt *cone;
        PetscInt        coneSize, ornt, i, j, f;

        ierr = PetscHashIJKLGet(faceTable, iter, &f);CHKERRQ(ierr);
        ierr = DMPlexInsertCone(idm, c, cf, f);CHKERRQ(ierr);
        /* Orient face: Do not allow reverse orientation at the first vertex */
        ierr = DMPlexGetConeSize(idm, f, &coneSize);CHKERRQ(ierr);
        ierr = DMPlexGetCone(idm, f, &cone);CHKERRQ(ierr);
        if (coneSize != faceSize) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of face vertices %D for face %D should be %D", coneSize, f, faceSize);
        /* - First find the initial vertex */
        for (i = 0; i < faceSize; ++i) if (cellFace[0] == cone[i]) break;
        /* - Try forward comparison */
        for (j = 0; j < faceSize; ++j) if (cellFace[j] != cone[(i+j)%faceSize]) break;
        if (j == faceSize) {
          if ((faceSize == 2) && (i == 1)) ornt = -2;
          else                             ornt = i;
        } else {
          /* - Try backward comparison */
          for (j = 0; j < faceSize; ++j) if (cellFace[j] != cone[(i+faceSize-j)%faceSize]) break;
          if (j == faceSize) {
            if (i == 0) ornt = -faceSize;
            else        ornt = -i;
          } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Could not determine face orientation");
        }
        ierr = DMPlexInsertConeOrientation(idm, c, cf, ornt);CHKERRQ(ierr);
      }
    }
    ierr = DMPlexRestoreFaces_Internal(dm, cellDim, c, &numCellFaces, &faceSize, &cellFaces);CHKERRQ(ierr);
  }
  if (face != pEnd[faceDepth]) SETERRQ2(PetscObjectComm((PetscObject) dm), PETSC_ERR_PLIB, "Invalid number of faces %D should be %D", face-pStart[faceDepth], pEnd[faceDepth]-pStart[faceDepth]);
  ierr = PetscFree2(pStart,pEnd);CHKERRQ(ierr);
  ierr = PetscHashIJKLDestroy(&faceTable);CHKERRQ(ierr);
  ierr = PetscFree2(pStart,pEnd);CHKERRQ(ierr);
  ierr = DMPlexSymmetrize(idm);CHKERRQ(ierr);
  ierr = DMPlexStratify(idm);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #14
0
PetscErrorCode  DMSetUp_ADDA(DM dm)
{
  PetscErrorCode ierr;
  PetscInt       s=1; /* stencil width, fixed to 1 at the moment */
  PetscMPIInt    rank,size;
  PetscInt       i;
  PetscInt       procsleft;
  PetscInt       procsdimi;
  PetscInt       ranki;
  PetscInt       rpq;
  DM_ADDA        *dd = (DM_ADDA*)dm->data;
  MPI_Comm       comm;
  PetscInt       *nodes,*procs,dim,dof;
  PetscBool      *periodic;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 
  procs = dd->procs;
  nodes = dd->nodes;
  dim   = dd->dim;
  dof   = dd->dof;
  periodic = dd->periodic;

  /* check for validity */
  procsleft = 1;
  for(i=0; i<dim; i++) {
    if (nodes[i] < procs[i]) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Partition in direction %d is too fine! %D nodes, %D processors", i, nodes[i], procs[i]);
    procsleft *= procs[i];
  }
  if (procsleft != size) SETERRQ(comm,PETSC_ERR_PLIB, "Created or was provided with inconsistent distribution of processors");

  
  /* find out local region */
  ierr = PetscMalloc(dim*sizeof(PetscInt), &(dd->lcs));CHKERRQ(ierr);
  ierr = PetscMalloc(dim*sizeof(PetscInt), &(dd->lce));CHKERRQ(ierr);
  procsdimi=size;
  ranki=rank;
  for(i=0; i<dim; i++) {
    /* What is the number of processor for dimensions i+1, ..., dim-1? */
    procsdimi /= procs[i];
    /* these are all nodes that come before our region */
    rpq = ranki / procsdimi;
    dd->lcs[i] = rpq * (nodes[i]/procs[i]);
    if( rpq + 1 < procs[i] ) {
      dd->lce[i] = (rpq + 1) * (nodes[i]/procs[i]);
    } else {
      /* last one gets all the rest */
      dd->lce[i] = nodes[i];
    }
    ranki = ranki - rpq*procsdimi;
  }
  
  /* compute local size */
  dd->lsize=1;
  for(i=0; i<dim; i++) {
    dd->lsize *= (dd->lce[i]-dd->lcs[i]);
  }
  dd->lsize *= dof;

  /* find out ghost points */
  ierr = PetscMalloc(dim*sizeof(PetscInt), &(dd->lgs));CHKERRQ(ierr);
  ierr = PetscMalloc(dim*sizeof(PetscInt), &(dd->lge));CHKERRQ(ierr);
  for(i=0; i<dim; i++) {
    if( periodic[i] ) {
      dd->lgs[i] = dd->lcs[i] - s;
      dd->lge[i] = dd->lce[i] + s;
    } else {
      dd->lgs[i] = PetscMax(dd->lcs[i] - s, 0);
      dd->lge[i] = PetscMin(dd->lce[i] + s, nodes[i]);
    }
  }
  
  /* compute local size with ghost points */
  dd->lgsize=1;
  for(i=0; i<dim; i++) {
    dd->lgsize *= (dd->lge[i]-dd->lgs[i]);
  }
  dd->lgsize *= dof;

  /* create global and local prototype vector */
  ierr = VecCreateMPIWithArray(comm,dd->dof,dd->lsize,PETSC_DECIDE,0,&(dd->global));CHKERRQ(ierr);
#if ADDA_NEEDS_LOCAL_VECTOR
  /* local includes ghost points */
  ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,dof,dd->lgsize,0,&(dd->local));CHKERRQ(ierr);
#endif

  ierr = PetscMalloc(dim*sizeof(PetscInt), &(dd->refine));CHKERRQ(ierr);
  for(i=0; i<dim; i++) dd->refine[i] = 3;
  dd->dofrefine = 1;
  PetscFunctionReturn(0);
}
Beispiel #15
0
static PetscErrorCode TaoLineSearchApply_MT(TaoLineSearch ls, Vec x, PetscReal *f, Vec g, Vec s)
{
  PetscErrorCode   ierr;
  TaoLineSearch_MT *mt;

  PetscReal        xtrapf = 4.0;
  PetscReal        finit, width, width1, dginit, fm, fxm, fym, dgm, dgxm, dgym;
  PetscReal        dgx, dgy, dg, dg2, fx, fy, stx, sty, dgtest;
  PetscReal        ftest1=0.0, ftest2=0.0;
  PetscInt         i, stage1,n1,n2,nn1,nn2;
  PetscReal        bstepmin1, bstepmin2, bstepmax;
  PetscBool        g_computed=PETSC_FALSE; /* to prevent extra gradient computation */

  PetscFunctionBegin;
  PetscValidHeaderSpecific(ls,TAOLINESEARCH_CLASSID,1);
  PetscValidHeaderSpecific(x,VEC_CLASSID,2);
  PetscValidScalarPointer(f,3);
  PetscValidHeaderSpecific(g,VEC_CLASSID,4);
  PetscValidHeaderSpecific(s,VEC_CLASSID,5);

  /* comm,type,size checks are done in interface TaoLineSearchApply */
  mt = (TaoLineSearch_MT*)(ls->data);
  ls->reason = TAOLINESEARCH_CONTINUE_ITERATING;

  /* Check work vector */
  if (!mt->work) {
    ierr = VecDuplicate(x,&mt->work);CHKERRQ(ierr);
    mt->x = x;
    ierr = PetscObjectReference((PetscObject)mt->x);CHKERRQ(ierr);
  } else if (x != mt->x) {
    ierr = VecDestroy(&mt->work);CHKERRQ(ierr);
    ierr = VecDuplicate(x,&mt->work);CHKERRQ(ierr);
    ierr = PetscObjectDereference((PetscObject)mt->x);CHKERRQ(ierr);
    mt->x = x;
    ierr = PetscObjectReference((PetscObject)mt->x);CHKERRQ(ierr);
  }

  if (ls->bounded) {
    /* Compute step length needed to make all variables equal a bound */
    /* Compute the smallest steplength that will make one nonbinding variable
     equal the bound */
    ierr = VecGetLocalSize(ls->upper,&n1);CHKERRQ(ierr);
    ierr = VecGetLocalSize(mt->x, &n2);CHKERRQ(ierr);
    ierr = VecGetSize(ls->upper,&nn1);CHKERRQ(ierr);
    ierr = VecGetSize(mt->x,&nn2);CHKERRQ(ierr);
    if (n1 != n2 || nn1 != nn2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Variable vector not compatible with bounds vector");
    ierr = VecScale(s,-1.0);CHKERRQ(ierr);
    ierr = VecBoundGradientProjection(s,x,ls->lower,ls->upper,s);CHKERRQ(ierr);
    ierr = VecScale(s,-1.0);CHKERRQ(ierr);
    ierr = VecStepBoundInfo(x,s,ls->lower,ls->upper,&bstepmin1,&bstepmin2,&bstepmax);CHKERRQ(ierr);
    ls->stepmax = PetscMin(bstepmax,1.0e15);
  }

  ierr = VecDot(g,s,&dginit);CHKERRQ(ierr);
  if (PetscIsInfOrNanReal(dginit)) {
    ierr = PetscInfo1(ls,"Initial Line Search step * g is Inf or Nan (%g)\n",(double)dginit);CHKERRQ(ierr);
    ls->reason=TAOLINESEARCH_FAILED_INFORNAN;
    PetscFunctionReturn(0);
  }
  if (dginit >= 0.0) {
    ierr = PetscInfo1(ls,"Initial Line Search step * g is not descent direction (%g)\n",(double)dginit);CHKERRQ(ierr);
    ls->reason = TAOLINESEARCH_FAILED_ASCENT;
    PetscFunctionReturn(0);
  }


  /* Initialization */
  mt->bracket = 0;
  stage1 = 1;
  finit = *f;
  dgtest = ls->ftol * dginit;
  width = ls->stepmax - ls->stepmin;
  width1 = width * 2.0;
  ierr = VecCopy(x,mt->work);CHKERRQ(ierr);
  /* Variable dictionary:
   stx, fx, dgx - the step, function, and derivative at the best step
   sty, fy, dgy - the step, function, and derivative at the other endpoint
   of the interval of uncertainty
   step, f, dg - the step, function, and derivative at the current step */

  stx = 0.0;
  fx  = finit;
  dgx = dginit;
  sty = 0.0;
  fy  = finit;
  dgy = dginit;

  ls->step=ls->initstep;
  for (i=0; i< ls->max_funcs; i++) {
    /* Set min and max steps to correspond to the interval of uncertainty */
    if (mt->bracket) {
      ls->stepmin = PetscMin(stx,sty);
      ls->stepmax = PetscMax(stx,sty);
    } else {
      ls->stepmin = stx;
      ls->stepmax = ls->step + xtrapf * (ls->step - stx);
    }

    /* Force the step to be within the bounds */
    ls->step = PetscMax(ls->step,ls->stepmin);
    ls->step = PetscMin(ls->step,ls->stepmax);

    /* If an unusual termination is to occur, then let step be the lowest
     point obtained thus far */
    if ((stx!=0) && (((mt->bracket) && (ls->step <= ls->stepmin || ls->step >= ls->stepmax)) || ((mt->bracket) && (ls->stepmax - ls->stepmin <= ls->rtol * ls->stepmax)) ||
                     ((ls->nfeval+ls->nfgeval) >= ls->max_funcs - 1) || (mt->infoc == 0))) {
      ls->step = stx;
    }

    ierr = VecCopy(x,mt->work);CHKERRQ(ierr);
    ierr = VecAXPY(mt->work,ls->step,s);CHKERRQ(ierr);   /* W = X + step*S */

    if (ls->bounded) {
      ierr = VecMedian(ls->lower, mt->work, ls->upper, mt->work);CHKERRQ(ierr);
    }
    if (ls->usegts) {
      ierr = TaoLineSearchComputeObjectiveAndGTS(ls,mt->work,f,&dg);CHKERRQ(ierr);
      g_computed=PETSC_FALSE;
    } else {
      ierr = TaoLineSearchComputeObjectiveAndGradient(ls,mt->work,f,g);CHKERRQ(ierr);
      g_computed=PETSC_TRUE;
      if (ls->bounded) {
        ierr = VecDot(g,x,&dg);CHKERRQ(ierr);
        ierr = VecDot(g,mt->work,&dg2);CHKERRQ(ierr);
        dg = (dg2 - dg)/ls->step;
      } else {
        ierr = VecDot(g,s,&dg);CHKERRQ(ierr);
      }
    }

    if (0 == i) {
      ls->f_fullstep=*f;
    }

    if (PetscIsInfOrNanReal(*f) || PetscIsInfOrNanReal(dg)) {
      /* User provided compute function generated Not-a-Number, assume
       domain violation and set function value and directional
       derivative to infinity. */
      *f = PETSC_INFINITY;
      dg = PETSC_INFINITY;
    }

    ftest1 = finit + ls->step * dgtest;
    if (ls->bounded) {
      ftest2 = finit + ls->step * dgtest * ls->ftol;
    }
    /* Convergence testing */
    if (((*f - ftest1 <= 1.0e-10 * PetscAbsReal(finit)) &&  (PetscAbsReal(dg) + ls->gtol*dginit <= 0.0))) {
      ierr = PetscInfo(ls, "Line search success: Sufficient decrease and directional deriv conditions hold\n");CHKERRQ(ierr);
      ls->reason = TAOLINESEARCH_SUCCESS;
      break;
    }

    /* Check Armijo if beyond the first breakpoint */
    if (ls->bounded && (*f <= ftest2) && (ls->step >= bstepmin2)) {
      ierr = PetscInfo(ls,"Line search success: Sufficient decrease.\n");CHKERRQ(ierr);
      ls->reason = TAOLINESEARCH_SUCCESS;
      break;
    }

    /* Checks for bad cases */
    if (((mt->bracket) && (ls->step <= ls->stepmin||ls->step >= ls->stepmax)) || (!mt->infoc)) {
      ierr = PetscInfo(ls,"Rounding errors may prevent further progress.  May not be a step satisfying\n");CHKERRQ(ierr);
      ierr = PetscInfo(ls,"sufficient decrease and curvature conditions. Tolerances may be too small.\n");CHKERRQ(ierr);
      ls->reason = TAOLINESEARCH_HALTED_OTHER;
      break;
    }
    if ((ls->step == ls->stepmax) && (*f <= ftest1) && (dg <= dgtest)) {
      ierr = PetscInfo1(ls,"Step is at the upper bound, stepmax (%g)\n",(double)ls->stepmax);CHKERRQ(ierr);
      ls->reason = TAOLINESEARCH_HALTED_UPPERBOUND;
      break;
    }
    if ((ls->step == ls->stepmin) && (*f >= ftest1) && (dg >= dgtest)) {
      ierr = PetscInfo1(ls,"Step is at the lower bound, stepmin (%g)\n",(double)ls->stepmin);CHKERRQ(ierr);
      ls->reason = TAOLINESEARCH_HALTED_LOWERBOUND;
      break;
    }
    if ((mt->bracket) && (ls->stepmax - ls->stepmin <= ls->rtol*ls->stepmax)){
      ierr = PetscInfo1(ls,"Relative width of interval of uncertainty is at most rtol (%g)\n",(double)ls->rtol);CHKERRQ(ierr);
      ls->reason = TAOLINESEARCH_HALTED_RTOL;
      break;
    }

    /* In the first stage, we seek a step for which the modified function
     has a nonpositive value and nonnegative derivative */
    if ((stage1) && (*f <= ftest1) && (dg >= dginit * PetscMin(ls->ftol, ls->gtol))) {
      stage1 = 0;
    }

    /* A modified function is used to predict the step only if we
     have not obtained a step for which the modified function has a
     nonpositive function value and nonnegative derivative, and if a
     lower function value has been obtained but the decrease is not
     sufficient */

    if ((stage1) && (*f <= fx) && (*f > ftest1)) {
      fm   = *f - ls->step * dgtest;    /* Define modified function */
      fxm  = fx - stx * dgtest;         /* and derivatives */
      fym  = fy - sty * dgtest;
      dgm  = dg - dgtest;
      dgxm = dgx - dgtest;
      dgym = dgy - dgtest;

      /* if (dgxm * (ls->step - stx) >= 0.0) */
      /* Update the interval of uncertainty and compute the new step */
      ierr = Tao_mcstep(ls,&stx,&fxm,&dgxm,&sty,&fym,&dgym,&ls->step,&fm,&dgm);CHKERRQ(ierr);

      fx  = fxm + stx * dgtest; /* Reset the function and */
      fy  = fym + sty * dgtest; /* gradient values */
      dgx = dgxm + dgtest;
      dgy = dgym + dgtest;
    } else {
      /* Update the interval of uncertainty and compute the new step */
      ierr = Tao_mcstep(ls,&stx,&fx,&dgx,&sty,&fy,&dgy,&ls->step,f,&dg);CHKERRQ(ierr);
    }

    /* Force a sufficient decrease in the interval of uncertainty */
    if (mt->bracket) {
      if (PetscAbsReal(sty - stx) >= 0.66 * width1) ls->step = stx + 0.5*(sty - stx);
      width1 = width;
      width = PetscAbsReal(sty - stx);
    }
  }
  if ((ls->nfeval+ls->nfgeval) > ls->max_funcs) {
    ierr = PetscInfo2(ls,"Number of line search function evals (%D) > maximum (%D)\n",(ls->nfeval+ls->nfgeval),ls->max_funcs);CHKERRQ(ierr);
    ls->reason = TAOLINESEARCH_HALTED_MAXFCN;
  }

  /* Finish computations */
  ierr = PetscInfo2(ls,"%D function evals in line search, step = %g\n",(ls->nfeval+ls->nfgeval),(double)ls->step);CHKERRQ(ierr);

  /* Set new solution vector and compute gradient if needed */
  ierr = VecCopy(mt->work,x);CHKERRQ(ierr);
  if (!g_computed) {
    ierr = TaoLineSearchComputeGradient(ls,mt->work,g);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
Beispiel #16
0
int main(int argc,char **argv)
{
  TS                ts;         /* time integrator */
  SNES              snes;       /* nonlinear solver */
  SNESLineSearch    linesearch; /* line search */
  Vec               X;          /* solution, residual vectors */
  Mat               J;          /* Jacobian matrix */
  PetscInt          steps,maxsteps,mx;
  PetscErrorCode    ierr;
  DM                da;
  PetscReal         ftime,dt;
  struct _User      user;       /* user-defined work context */
  TSConvergedReason reason;

  PetscInitialize(&argc,&argv,(char*)0,help);

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Create distributed array (DMDA) to manage parallel grid and vectors
  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  ierr = DMDACreate1d(PETSC_COMM_WORLD,DMDA_BOUNDARY_NONE,-11,2,2,NULL,&da);CHKERRQ(ierr);

  /*  - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Extract global vectors from DMDA;
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  ierr = DMCreateGlobalVector(da,&X);CHKERRQ(ierr);

  /* Initialize user application context */
  ierr = PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Advection-reaction options","");
  {
    user.a[0] = 1;           ierr = PetscOptionsReal("-a0","Advection rate 0","",user.a[0],&user.a[0],NULL);CHKERRQ(ierr);
    user.a[1] = 0;           ierr = PetscOptionsReal("-a1","Advection rate 1","",user.a[1],&user.a[1],NULL);CHKERRQ(ierr);
    user.k[0] = 1e6;         ierr = PetscOptionsReal("-k0","Reaction rate 0","",user.k[0],&user.k[0],NULL);CHKERRQ(ierr);
    user.k[1] = 2*user.k[0]; ierr = PetscOptionsReal("-k1","Reaction rate 1","",user.k[1],&user.k[1],NULL);CHKERRQ(ierr);
    user.s[0] = 0;           ierr = PetscOptionsReal("-s0","Source 0","",user.s[0],&user.s[0],NULL);CHKERRQ(ierr);
    user.s[1] = 1;           ierr = PetscOptionsReal("-s1","Source 1","",user.s[1],&user.s[1],NULL);CHKERRQ(ierr);
  }
  ierr = PetscOptionsEnd();CHKERRQ(ierr);

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Create timestepping solver context
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr);
  ierr = TSSetDM(ts,da);CHKERRQ(ierr);
  ierr = TSSetType(ts,TSARKIMEX);CHKERRQ(ierr);
  ierr = TSSetRHSFunction(ts,NULL,FormRHSFunction,&user);CHKERRQ(ierr);
  ierr = TSSetIFunction(ts,NULL,FormIFunction,&user);CHKERRQ(ierr);
  ierr = DMSetMatType(da,MATAIJ);CHKERRQ(ierr);
  ierr = DMCreateMatrix(da,&J);CHKERRQ(ierr);
  ierr = TSSetIJacobian(ts,J,J,FormIJacobian,&user);CHKERRQ(ierr);

  /* A line search in the nonlinear solve can fail due to ill-conditioning unless an absolute tolerance is set. Since
   * this problem is linear, we deactivate the line search. For a linear problem, it is usually recommended to also use
   * SNESSetType(snes,SNESKSPONLY). */
  ierr = TSGetSNES(ts,&snes);CHKERRQ(ierr);
  ierr = SNESGetLineSearch(snes,&linesearch);CHKERRQ(ierr);
  ierr = SNESLineSearchSetType(linesearch,SNESLINESEARCHBASIC);CHKERRQ(ierr);

  ftime    = 1.0;
  maxsteps = 10000;
  ierr     = TSSetDuration(ts,maxsteps,ftime);CHKERRQ(ierr);

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Set initial conditions
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  ierr = FormInitialSolution(ts,X,&user);CHKERRQ(ierr);
  ierr = TSSetSolution(ts,X);CHKERRQ(ierr);
  ierr = VecGetSize(X,&mx);CHKERRQ(ierr);
  dt   = .1 * PetscMax(user.a[0],user.a[1]) / mx; /* Advective CFL, I don't know why it needs so much safety factor. */
  ierr = TSSetInitialTimeStep(ts,0.0,dt);CHKERRQ(ierr);

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Set runtime options
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  ierr = TSSetFromOptions(ts);CHKERRQ(ierr);

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Solve nonlinear system
     - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  ierr = TSSolve(ts,X);CHKERRQ(ierr);
  ierr = TSGetSolveTime(ts,&ftime);CHKERRQ(ierr);
  ierr = TSGetTimeStepNumber(ts,&steps);CHKERRQ(ierr);
  ierr = TSGetConvergedReason(ts,&reason);CHKERRQ(ierr);
  ierr = PetscPrintf(PETSC_COMM_WORLD,"%s at time %G after %D steps\n",TSConvergedReasons[reason],ftime,steps);CHKERRQ(ierr);

  /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
     Free work space.
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  ierr = MatDestroy(&J);CHKERRQ(ierr);
  ierr = VecDestroy(&X);CHKERRQ(ierr);
  ierr = TSDestroy(&ts);CHKERRQ(ierr);
  ierr = DMDestroy(&da);CHKERRQ(ierr);
  ierr = PetscFinalize();
  return 0;
}
Beispiel #17
0
/* @ TaoApply_Armijo - This routine performs a linesearch. It
   backtracks until the (nonmonotone) Armijo conditions are satisfied.

   Input Parameters:
+  tao - Tao context
.  X - current iterate (on output X contains new iterate, X + step*S)
.  S - search direction
.  f - merit function evaluated at X
.  G - gradient of merit function evaluated at X
.  W - work vector
-  step - initial estimate of step length

   Output parameters:
+  f - merit function evaluated at new iterate, X + step*S
.  G - gradient of merit function evaluated at new iterate, X + step*S
.  X - new iterate
-  step - final step length

@ */
static PetscErrorCode TaoLineSearchApply_Armijo(TaoLineSearch ls, Vec x, PetscReal *f, Vec g, Vec s)
{
  TaoLineSearch_ARMIJO *armP = (TaoLineSearch_ARMIJO *)ls->data;
  PetscErrorCode       ierr;
  PetscInt             i;
  PetscReal            fact, ref, gdx;
  PetscInt             idx;
  PetscBool            g_computed=PETSC_FALSE; /* to prevent extra gradient computation */

  PetscFunctionBegin;

  ls->reason = TAOLINESEARCH_CONTINUE_ITERATING;
  if (!armP->work) {
    ierr = VecDuplicate(x,&armP->work);CHKERRQ(ierr);
    armP->x = x;
    ierr = PetscObjectReference((PetscObject)armP->x);CHKERRQ(ierr);
  } else if (x != armP->x) {
    /* If x has changed, then recreate work */
    ierr = VecDestroy(&armP->work);CHKERRQ(ierr);
    ierr = VecDuplicate(x,&armP->work);CHKERRQ(ierr);
    ierr = PetscObjectDereference((PetscObject)armP->x);CHKERRQ(ierr);
    armP->x = x;
    ierr = PetscObjectReference((PetscObject)armP->x);CHKERRQ(ierr);
  }

  /* Check linesearch parameters */
  if (armP->alpha < 1) {
    ierr = PetscInfo1(ls,"Armijo line search error: alpha (%g) < 1\n", (double)armP->alpha);CHKERRQ(ierr);
    ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER;
  } else if ((armP->beta <= 0) || (armP->beta >= 1)) {
    ierr = PetscInfo1(ls,"Armijo line search error: beta (%g) invalid\n", (double)armP->beta);CHKERRQ(ierr);
    ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER;
  } else if ((armP->beta_inf <= 0) || (armP->beta_inf >= 1)) {
    ierr = PetscInfo1(ls,"Armijo line search error: beta_inf (%g) invalid\n", (double)armP->beta_inf);CHKERRQ(ierr);
    ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER;
  } else if ((armP->sigma <= 0) || (armP->sigma >= 0.5)) {
    ierr = PetscInfo1(ls,"Armijo line search error: sigma (%g) invalid\n", (double)armP->sigma);CHKERRQ(ierr);
    ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER;
  } else if (armP->memorySize < 1) {
    ierr = PetscInfo1(ls,"Armijo line search error: memory_size (%D) < 1\n", armP->memorySize);CHKERRQ(ierr);
    ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER;
  } else if ((armP->referencePolicy != REFERENCE_MAX) && (armP->referencePolicy != REFERENCE_AVE) && (armP->referencePolicy != REFERENCE_MEAN)) {
    ierr = PetscInfo(ls,"Armijo line search error: reference_policy invalid\n");CHKERRQ(ierr);
    ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER;
  } else if ((armP->replacementPolicy != REPLACE_FIFO) && (armP->replacementPolicy != REPLACE_MRU)) {
    ierr = PetscInfo(ls,"Armijo line search error: replacement_policy invalid\n");CHKERRQ(ierr);
    ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER;
  } else if (PetscIsInfOrNanReal(*f)) {
    ierr = PetscInfo(ls,"Armijo line search error: initial function inf or nan\n");CHKERRQ(ierr);
    ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER;
  }

  if (ls->reason != TAOLINESEARCH_CONTINUE_ITERATING) {
    PetscFunctionReturn(0);
  }

  /* Check to see of the memory has been allocated.  If not, allocate
     the historical array and populate it with the initial function
     values. */
  if (!armP->memory) {
    ierr = PetscMalloc1(armP->memorySize, &armP->memory );CHKERRQ(ierr);
  }

  if (!armP->memorySetup) {
    for (i = 0; i < armP->memorySize; i++) {
      armP->memory[i] = armP->alpha*(*f);
    }

    armP->current = 0;
    armP->lastReference = armP->memory[0];
    armP->memorySetup=PETSC_TRUE;
  }

  /* Calculate reference value (MAX) */
  ref = armP->memory[0];
  idx = 0;

  for (i = 1; i < armP->memorySize; i++) {
    if (armP->memory[i] > ref) {
      ref = armP->memory[i];
      idx = i;
    }
  }

  if (armP->referencePolicy == REFERENCE_AVE) {
    ref = 0;
    for (i = 0; i < armP->memorySize; i++) {
      ref += armP->memory[i];
    }
    ref = ref / armP->memorySize;
    ref = PetscMax(ref, armP->memory[armP->current]);
  } else if (armP->referencePolicy == REFERENCE_MEAN) {
    ref = PetscMin(ref, 0.5*(armP->lastReference + armP->memory[armP->current]));
  }
  ierr = VecDot(g,s,&gdx);CHKERRQ(ierr);

  if (PetscIsInfOrNanReal(gdx)) {
    ierr = PetscInfo1(ls,"Initial Line Search step * g is Inf or Nan (%g)\n",(double)gdx);CHKERRQ(ierr);
    ls->reason=TAOLINESEARCH_FAILED_INFORNAN;
    PetscFunctionReturn(0);
  }
  if (gdx >= 0.0) {
    ierr = PetscInfo1(ls,"Initial Line Search step is not descent direction (g's=%g)\n",(double)gdx);CHKERRQ(ierr);
    ls->reason = TAOLINESEARCH_FAILED_ASCENT;
    PetscFunctionReturn(0);
  }

  if (armP->nondescending) {
    fact = armP->sigma;
  } else {
    fact = armP->sigma * gdx;
  }
  ls->step = ls->initstep;
  while (ls->step >= ls->stepmin && (ls->nfeval+ls->nfgeval) < ls->max_funcs) {
    /* Calculate iterate */
    ierr = VecCopy(x,armP->work);CHKERRQ(ierr);
    ierr = VecAXPY(armP->work,ls->step,s);CHKERRQ(ierr);
    if (ls->bounded) {
      ierr = VecMedian(ls->lower,armP->work,ls->upper,armP->work);CHKERRQ(ierr);
    }

    /* Calculate function at new iterate */
    if (ls->hasobjective) {
      ierr = TaoLineSearchComputeObjective(ls,armP->work,f);CHKERRQ(ierr);
      g_computed=PETSC_FALSE;
    } else if (ls->usegts) {
      ierr = TaoLineSearchComputeObjectiveAndGTS(ls,armP->work,f,&gdx);CHKERRQ(ierr);
      g_computed=PETSC_FALSE;
    } else {
      ierr = TaoLineSearchComputeObjectiveAndGradient(ls,armP->work,f,g);CHKERRQ(ierr);
      g_computed=PETSC_TRUE;
    }
    if (ls->step == ls->initstep) {
      ls->f_fullstep = *f;
    }

    if (PetscIsInfOrNanReal(*f)) {
      ls->step *= armP->beta_inf;
    } else {
      /* Check descent condition */
      if (armP->nondescending && *f <= ref - ls->step*fact*ref)
        break;
      if (!armP->nondescending && *f <= ref + ls->step*fact) {
        break;
      }

      ls->step *= armP->beta;
    }
  }

  /* Check termination */
  if (PetscIsInfOrNanReal(*f)) {
    ierr = PetscInfo(ls, "Function is inf or nan.\n");CHKERRQ(ierr);
    ls->reason = TAOLINESEARCH_FAILED_INFORNAN;
  } else if (ls->step < ls->stepmin) {
    ierr = PetscInfo(ls, "Step length is below tolerance.\n");CHKERRQ(ierr);
    ls->reason = TAOLINESEARCH_HALTED_RTOL;
  } else if ((ls->nfeval+ls->nfgeval) >= ls->max_funcs) {
    ierr = PetscInfo2(ls, "Number of line search function evals (%D) > maximum allowed (%D)\n",ls->nfeval+ls->nfgeval, ls->max_funcs);CHKERRQ(ierr);
    ls->reason = TAOLINESEARCH_HALTED_MAXFCN;
  }
  if (ls->reason) {
    PetscFunctionReturn(0);
  }

  /* Successful termination, update memory */
  ls->reason = TAOLINESEARCH_SUCCESS;
  armP->lastReference = ref;
  if (armP->replacementPolicy == REPLACE_FIFO) {
    armP->memory[armP->current++] = *f;
    if (armP->current >= armP->memorySize) {
      armP->current = 0;
    }
  } else {
    armP->current = idx;
    armP->memory[idx] = *f;
  }

  /* Update iterate and compute gradient */
  ierr = VecCopy(armP->work,x);CHKERRQ(ierr);
  if (!g_computed) {
    ierr = TaoLineSearchComputeGradient(ls, x, g);CHKERRQ(ierr);
  }
  ierr = PetscInfo2(ls, "%D function evals in line search, step = %g\n",ls->nfeval, (double)ls->step);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #18
0
// Approximates continuous L^\infty and L^2 norms of error, normalized by L^\infty and L^2 norms of analytic solution
PetscErrorCode OpIntegrateNorms(Op op,DM dm,Vec U,PetscReal *normInfty,PetscReal *normL2) {
  PetscErrorCode ierr;
  Vec X,Uloc;
  DM dmx;
  const PetscScalar *x,*u;
  const PetscReal *B,*D,*w3;
  PetscReal L[3];
  struct {PetscReal error,u;} sumInfty={},sum2={};
  PetscInt nelem,ne = op->ne,P,Q,P3,Q3;

  PetscFunctionBegin;
  ierr = PetscLogEventBegin(OP_IntegrateNorms,dm,U,0,0);CHKERRQ(ierr);
  ierr = DMFEGetTensorEval(dm,&P,&Q,&B,&D,NULL,NULL,&w3);CHKERRQ(ierr);
  P3 = P*P*P;
  Q3 = Q*Q*Q;

  ierr = DMFEGetUniformCoordinates(dm,L);CHKERRQ(ierr);
  ierr = DMGetCoordinateDM(dm,&dmx);CHKERRQ(ierr);
  ierr = DMGetCoordinatesLocal(dm,&X);CHKERRQ(ierr);
  ierr = DMGetLocalVector(dm,&Uloc);CHKERRQ(ierr);
  ierr = DMGlobalToLocalBegin(dm,U,INSERT_VALUES,Uloc);CHKERRQ(ierr);
  ierr = DMGlobalToLocalEnd(dm,U,INSERT_VALUES,Uloc);CHKERRQ(ierr);
  ierr = DMFEGetNumElements(dm,&nelem);CHKERRQ(ierr);
  ierr = VecGetArrayRead(X,&x);CHKERRQ(ierr);
  ierr = VecGetArrayRead(Uloc,&u);CHKERRQ(ierr);

  for (PetscInt e=0; e<nelem; e+=ne) {
    PetscScalar ue[op->dof*P3*ne]_align,uq[op->dof][Q3][ne]_align,xe[3*P3*ne]_align,xq[3][Q3][ne]_align,dx[3][3][Q3][ne]_align,wdxdet[Q3][ne]_align;

    ierr = DMFEExtractElements(dmx,x,e,ne,xe);CHKERRQ(ierr);
    ierr = PetscMemzero(xq,sizeof xq);CHKERRQ(ierr);
    ierr = TensorContract(op->Tensor3,B,B,B,TENSOR_EVAL,xe,xq[0][0]);CHKERRQ(ierr);
    ierr = PetscMemzero(dx,sizeof dx);CHKERRQ(ierr);
    ierr = TensorContract(op->Tensor3,D,B,B,TENSOR_EVAL,xe,dx[0][0][0]);CHKERRQ(ierr);
    ierr = TensorContract(op->Tensor3,B,D,B,TENSOR_EVAL,xe,dx[1][0][0]);CHKERRQ(ierr);
    ierr = TensorContract(op->Tensor3,B,B,D,TENSOR_EVAL,xe,dx[2][0][0]);CHKERRQ(ierr);
    ierr = PointwiseJacobianInvert(ne,Q3,w3,dx,wdxdet);CHKERRQ(ierr);

    ierr = DMFEExtractElements(dm,u,e,ne,ue);CHKERRQ(ierr);
    ierr = PetscMemzero(uq,sizeof uq);CHKERRQ(ierr);
    ierr = TensorContract(op->TensorDOF,B,B,B,TENSOR_EVAL,ue,uq[0][0]);CHKERRQ(ierr);

    for (PetscInt i=0; i<Q3; i++) {
      for (PetscInt l=0; l<ne; l++) {
        PetscReal xx[] = {xq[0][i][l],xq[1][i][l],xq[2][i][l]};
        PetscScalar uql[op->dof],fql[op->dof];
        ierr = (op->PointwiseSolution)(op,xx,L,uql);CHKERRQ(ierr);
        ierr = (op->PointwiseForcing)(op,xx,L,fql);CHKERRQ(ierr);
        for (PetscInt d=0; d<op->dof; d++) {
          PetscReal error = uq[d][i][l] - uql[d];
          sumInfty.error = PetscMax(sumInfty.error,PetscAbs(error));
          sumInfty.u     = PetscMax(sumInfty.u    ,PetscAbs(uql[d]));
          sum2.error    += PetscSqr(error) * wdxdet[i][l];
          sum2.u        += PetscSqr(uql[d]) * wdxdet[i][l];
        }
      }
    }
  }
  ierr = VecRestoreArrayRead(X,&x);CHKERRQ(ierr);
  ierr = VecRestoreArrayRead(Uloc,&u);CHKERRQ(ierr);
  ierr = DMRestoreLocalVector(dm,&Uloc);CHKERRQ(ierr);
  ierr = MPI_Allreduce(MPI_IN_PLACE,(void*)&sumInfty,2,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)dm));CHKERRQ(ierr);
  ierr = MPI_Allreduce(MPI_IN_PLACE,(void*)&sum2,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)dm));CHKERRQ(ierr);
  *normInfty = sumInfty.error/sumInfty.u;
  *normL2    = PetscSqrtReal(sum2.error)/PetscSqrtReal(sum2.u);
  ierr = PetscLogEventEnd(OP_IntegrateNorms,dm,U,0,0);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
static PetscErrorCode  SNESLineSearchApply_NLEQERR(SNESLineSearch linesearch)
{
  PetscBool              changed_y,changed_w;
  PetscErrorCode         ierr;
  Vec                    X,F,Y,W,G;
  SNES                   snes;
  PetscReal              fnorm, xnorm, ynorm, gnorm, wnorm;
  PetscReal              lambda, minlambda, stol;
  PetscViewer            monitor;
  PetscInt               max_its, count, snes_iteration;
  PetscReal              theta, mudash, lambdadash;
  SNESLineSearch_NLEQERR *nleqerr = (SNESLineSearch_NLEQERR*)linesearch->data;
  KSPConvergedReason     kspreason;

  PetscFunctionBegin;
  ierr = PetscCitationsRegister(NLEQERR_citation, &NLEQERR_cited);CHKERRQ(ierr);

  ierr = SNESLineSearchGetVecs(linesearch, &X, &F, &Y, &W, &G);CHKERRQ(ierr);
  ierr = SNESLineSearchGetNorms(linesearch, &xnorm, &fnorm, &ynorm);CHKERRQ(ierr);
  ierr = SNESLineSearchGetLambda(linesearch, &lambda);CHKERRQ(ierr);
  ierr = SNESLineSearchGetSNES(linesearch, &snes);CHKERRQ(ierr);
  ierr = SNESLineSearchGetDefaultMonitor(linesearch, &monitor);CHKERRQ(ierr);
  ierr = SNESLineSearchGetTolerances(linesearch,&minlambda,NULL,NULL,NULL,NULL,&max_its);CHKERRQ(ierr);
  ierr = SNESGetTolerances(snes,NULL,NULL,&stol,NULL,NULL);CHKERRQ(ierr);

  /* reset the state of the Lipschitz estimates */
  ierr = SNESGetIterationNumber(snes, &snes_iteration);CHKERRQ(ierr);
  if (!snes_iteration) {
    ierr = SNESLineSearchReset_NLEQERR(linesearch);CHKERRQ(ierr);
  }

  /* precheck */
  ierr = SNESLineSearchPreCheck(linesearch,X,Y,&changed_y);CHKERRQ(ierr);
  ierr = SNESLineSearchSetReason(linesearch, SNES_LINESEARCH_SUCCEEDED);CHKERRQ(ierr);

  ierr = VecNormBegin(Y, NORM_2, &ynorm);CHKERRQ(ierr);
  ierr = VecNormBegin(X, NORM_2, &xnorm);CHKERRQ(ierr);
  ierr = VecNormEnd(Y, NORM_2, &ynorm);CHKERRQ(ierr);
  ierr = VecNormEnd(X, NORM_2, &xnorm);CHKERRQ(ierr);

  /* Note: Y is *minus* the Newton step. For whatever reason PETSc doesn't solve with the minus on  the RHS. */

  if (ynorm == 0.0) {
    if (monitor) {
      ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
      ierr = PetscViewerASCIIPrintf(monitor,"    Line search: Initial direction and size is 0\n");CHKERRQ(ierr);
      ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
    }
    ierr = VecCopy(X,W);CHKERRQ(ierr);
    ierr = VecCopy(F,G);CHKERRQ(ierr);
    ierr = SNESLineSearchSetNorms(linesearch,xnorm,fnorm,ynorm);CHKERRQ(ierr);
    ierr = SNESLineSearchSetReason(linesearch, SNES_LINESEARCH_FAILED_REDUCT);CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  /* At this point, we've solved the Newton system for delta_x, and we assume that
     its norm is greater than the solution tolerance (otherwise we wouldn't be in
     here). So let's go ahead and estimate the Lipschitz constant. 

     W contains bar_delta_x_prev at this point. */

  if (monitor) {
    ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPrintf(monitor,"    Line search: norm of Newton step: %14.12e\n", (double) ynorm);CHKERRQ(ierr);
    ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
  }

  /* this needs information from a previous iteration, so can't do it on the first one */
  if (nleqerr->norm_delta_x_prev > 0 && nleqerr->norm_bar_delta_x_prev > 0) {
    ierr = VecWAXPY(G, +1.0, Y, W);CHKERRQ(ierr); /* bar_delta_x - delta_x; +1 because Y is -delta_x */
    ierr = VecNormBegin(G, NORM_2, &gnorm);CHKERRQ(ierr);
    ierr = VecNormEnd(G, NORM_2, &gnorm);CHKERRQ(ierr);

    nleqerr->mu_curr = nleqerr->lambda_prev * (nleqerr->norm_delta_x_prev * nleqerr->norm_bar_delta_x_prev) / (gnorm * ynorm);
    lambda = PetscMin(1.0, nleqerr->mu_curr);

    if (monitor) {
      ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
      ierr = PetscViewerASCIIPrintf(monitor,"    Line search: Lipschitz estimate: %14.12e; lambda: %14.12e\n", (double) nleqerr->mu_curr, (double) lambda);CHKERRQ(ierr);
      ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
    }
  } else {
    lambda = linesearch->damping;
  }

  /* The main while loop of the algorithm. 
     At the end of this while loop, G should have the accepted new X in it. */

  count = 0;
  while (PETSC_TRUE) {
    if (monitor) {
      ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
      ierr = PetscViewerASCIIPrintf(monitor,"    Line search: entering iteration with lambda: %14.12e\n", lambda);CHKERRQ(ierr);
      ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
    }

    /* Check that we haven't performed too many iterations */
    count += 1;
    if (count >= max_its) {
      if (monitor) {
        ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
        ierr = PetscViewerASCIIPrintf(monitor,"    Line search: maximum iterations reached\n");CHKERRQ(ierr);
        ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
      }
      ierr = SNESLineSearchSetReason(linesearch, SNES_LINESEARCH_FAILED_REDUCT);CHKERRQ(ierr);
      PetscFunctionReturn(0);
    }

    /* Now comes the Regularity Test. */
    if (lambda <= minlambda) {
      /* This isn't what is suggested by Deuflhard, but it works better in my experience */
      if (monitor) {
        ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
        ierr = PetscViewerASCIIPrintf(monitor,"    Line search: lambda has reached lambdamin, taking full Newton step\n");CHKERRQ(ierr);
        ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
      }
      lambda = 1.0;
      ierr = VecWAXPY(G, -lambda, Y, X);CHKERRQ(ierr);

      /* and clean up the state for next time */
      ierr = SNESLineSearchReset_NLEQERR(linesearch);CHKERRQ(ierr);
      /*
         The clang static analyzer detected a problem here; once the loop is broken the values
         nleqerr->norm_delta_x_prev     = ynorm;
         nleqerr->norm_bar_delta_x_prev = wnorm;
         are set, but wnorm has not even been computed.
         I don't know if this is the correct fix but by setting ynorm and wnorm to -1.0 at
         least the linesearch object is kept in the state set by the SNESLineSearchReset_NLEQERR() call above
      */
      ynorm = wnorm = -1.0;
      break;
    }

    /* Compute new trial iterate */
    ierr = VecWAXPY(W, -lambda, Y, X);CHKERRQ(ierr);
    ierr = SNESComputeFunction(snes, W, G);CHKERRQ(ierr);

    /* Solve linear system for bar_delta_x_curr: old Jacobian, new RHS. Note absence of minus sign, compared to Deuflhard, in keeping with PETSc convention */
    ierr = KSPSolve(snes->ksp, G, W);CHKERRQ(ierr);
    ierr = KSPGetConvergedReason(snes->ksp, &kspreason);CHKERRQ(ierr);
    if (kspreason < 0) {
      ierr = PetscInfo(snes,"Solution for \\bar{delta x}^{k+1} failed.");CHKERRQ(ierr);
    }

    /* W now contains -bar_delta_x_curr. */

    ierr = VecNorm(W, NORM_2, &wnorm);CHKERRQ(ierr);
    if (monitor) {
      ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
      ierr = PetscViewerASCIIPrintf(monitor,"    Line search: norm of simplified Newton update: %14.12e\n", (double) wnorm);CHKERRQ(ierr);
      ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
    }

    /* compute the monitoring quantities theta and mudash. */

    theta = wnorm / ynorm;

    ierr = VecWAXPY(G, -(1.0 - lambda), Y, W);CHKERRQ(ierr);
    ierr = VecNorm(G, NORM_2, &gnorm);CHKERRQ(ierr);

    mudash = (0.5 * ynorm * lambda * lambda) / gnorm;

    /* Check for termination of the linesearch */
    if (theta >= 1.0) {
      /* need to go around again with smaller lambda */
      if (monitor) {
        ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
        ierr = PetscViewerASCIIPrintf(monitor,"    Line search: monotonicity check failed, ratio: %14.12e\n", (double) theta);CHKERRQ(ierr);
        ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr);
      }
      lambda = PetscMin(mudash, 0.5 * lambda);
      lambda = PetscMax(lambda, minlambda);
      /* continue through the loop, i.e. go back to regularity test */
    } else {
      /* linesearch terminated */
      lambdadash = PetscMin(1.0, mudash);

      if (lambdadash == 1.0 && lambda == 1.0 && wnorm <= stol) {
        /* store the updated state, X - Y - W, in G:
           I need to keep W for the next linesearch */
        ierr = VecCopy(X, G);CHKERRQ(ierr);
        ierr = VecAXPY(G, -1.0, Y);CHKERRQ(ierr);
        ierr = VecAXPY(G, -1.0, W);CHKERRQ(ierr);
        break;
      }

      /* Deuflhard suggests to add the following:
      else if (lambdadash >= 4.0 * lambda) {
        lambda = lambdadash;
      }
      to continue through the loop, i.e. go back to regularity test.
      I deliberately exclude this, as I have practical experience of this
      getting stuck in infinite loops (on e.g. an Allen--Cahn problem). */

      else {
        /* accept iterate without adding on, i.e. don't use bar_delta_x;
           again, I need to keep W for the next linesearch */
        ierr = VecWAXPY(G, -lambda, Y, X);CHKERRQ(ierr);
        break;
      }
    }
  }

  if (linesearch->ops->viproject) {
    ierr = (*linesearch->ops->viproject)(snes, G);CHKERRQ(ierr);
  }

  /* W currently contains -bar_delta_u. Scale it so that it contains bar_delta_u. */
  ierr = VecScale(W, -1.0);CHKERRQ(ierr);

  /* postcheck */
  ierr = SNESLineSearchPostCheck(linesearch,X,Y,G,&changed_y,&changed_w);CHKERRQ(ierr);
  if (changed_y || changed_w) {
    ierr = SNESLineSearchSetReason(linesearch, SNES_LINESEARCH_FAILED_USER);CHKERRQ(ierr);
    ierr = PetscInfo(snes,"Changing the search direction here doesn't make sense.\n");CHKERRQ(ierr);
    PetscFunctionReturn(0);
  }

  /* copy the solution and information from this iteration over */
  nleqerr->norm_delta_x_prev     = ynorm;
  nleqerr->norm_bar_delta_x_prev = wnorm;
  nleqerr->lambda_prev           = lambda;

  ierr = VecCopy(G, X);CHKERRQ(ierr);
  ierr = SNESComputeFunction(snes, X, F);CHKERRQ(ierr);
  ierr = VecNorm(X, NORM_2, &xnorm);CHKERRQ(ierr);
  ierr = VecNorm(F, NORM_2, &fnorm);CHKERRQ(ierr);
  ierr = SNESLineSearchSetLambda(linesearch, lambda);CHKERRQ(ierr);
  ierr = SNESLineSearchSetNorms(linesearch, xnorm, fnorm, (ynorm < 0 ? PETSC_INFINITY : ynorm));CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #20
0
PetscErrorCode PCBDDCGraphComputeConnectedComponents(PCBDDCGraph graph)
{
  PetscBool      adapt_interface_reduced;
  MPI_Comm       interface_comm;
  PetscMPIInt    size;
  PetscInt       i;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  /* compute connected components locally */
  ierr = PetscObjectGetComm((PetscObject)(graph->l2gmap),&interface_comm);CHKERRQ(ierr);
  ierr = PCBDDCGraphComputeConnectedComponentsLocal(graph);CHKERRQ(ierr);
  /* check consistency of connected components among neighbouring subdomains -> it adapt them in case it is needed */
  ierr = MPI_Comm_size(interface_comm,&size);CHKERRQ(ierr);
  adapt_interface_reduced = PETSC_FALSE;
  if (size > 1) {
    PetscInt i;
    PetscBool adapt_interface = PETSC_FALSE;
    for (i=0;i<graph->n_subsets;i++) {
      /* We are not sure that on a given subset of the local interface,
         with two connected components, the latters be the same among sharing subdomains */
      if (graph->subset_ncc[i] > 1) {
        adapt_interface = PETSC_TRUE;
        break;
      }
    }
    ierr = MPIU_Allreduce(&adapt_interface,&adapt_interface_reduced,1,MPIU_BOOL,MPI_LOR,interface_comm);CHKERRQ(ierr);
  }

  if (graph->n_subsets && adapt_interface_reduced) {
    PetscBT     subset_cc_adapt;
    MPI_Request *send_requests,*recv_requests;
    PetscInt    *send_buffer,*recv_buffer;
    PetscInt    sum_requests,start_of_recv,start_of_send;
    PetscInt    *cum_recv_counts;
    PetscInt    *labels;
    PetscInt    ncc,cum_queue,mss,mns,j,k,s;
    PetscInt    **refine_buffer=NULL,*private_labels = NULL;

    ierr = PetscMalloc1(graph->nvtxs,&labels);CHKERRQ(ierr);
    ierr = PetscMemzero(labels,graph->nvtxs*sizeof(*labels));CHKERRQ(ierr);
    for (i=0;i<graph->ncc;i++)
      for (j=graph->cptr[i];j<graph->cptr[i+1];j++)
        labels[graph->queue[j]] = i;

    /* allocate some space */
    ierr = PetscMalloc1(graph->n_subsets+1,&cum_recv_counts);CHKERRQ(ierr);
    ierr = PetscMemzero(cum_recv_counts,(graph->n_subsets+1)*sizeof(*cum_recv_counts));CHKERRQ(ierr);

    /* first count how many neighbours per connected component I will receive from */
    cum_recv_counts[0] = 0;
    for (i=0;i<graph->n_subsets;i++) cum_recv_counts[i+1] = cum_recv_counts[i]+graph->count[graph->subset_idxs[i][0]];
    ierr = PetscMalloc1(cum_recv_counts[graph->n_subsets],&recv_buffer);CHKERRQ(ierr);
    ierr = PetscMalloc2(cum_recv_counts[graph->n_subsets],&send_requests,cum_recv_counts[graph->n_subsets],&recv_requests);CHKERRQ(ierr);
    for (i=0;i<cum_recv_counts[graph->n_subsets];i++) {
      send_requests[i] = MPI_REQUEST_NULL;
      recv_requests[i] = MPI_REQUEST_NULL;
    }

    /* exchange with my neighbours the number of my connected components on the subset of interface */
    sum_requests = 0;
    for (i=0;i<graph->n_subsets;i++) {
      PetscMPIInt neigh,tag;
      PetscInt    count,*neighs;

      count = graph->count[graph->subset_idxs[i][0]];
      neighs = graph->neighbours_set[graph->subset_idxs[i][0]];
      ierr = PetscMPIIntCast(2*graph->subset_ref_node[i],&tag);CHKERRQ(ierr);
      for (k=0;k<count;k++) {
        ierr = PetscMPIIntCast(neighs[k],&neigh);CHKERRQ(ierr);
        ierr = MPI_Isend(&graph->subset_ncc[i],1,MPIU_INT,neigh,tag,interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr);
        ierr = MPI_Irecv(&recv_buffer[sum_requests],1,MPIU_INT,neigh,tag,interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr);
        sum_requests++;
      }
    }
    ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
    ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);

    /* determine the subsets I have to adapt (those having more than 1 cc) */
    ierr = PetscBTCreate(graph->n_subsets,&subset_cc_adapt);CHKERRQ(ierr);
    ierr = PetscBTMemzero(graph->n_subsets,subset_cc_adapt);CHKERRQ(ierr);
    for (i=0;i<graph->n_subsets;i++) {
      if (graph->subset_ncc[i] > 1) {
        ierr = PetscBTSet(subset_cc_adapt,i);CHKERRQ(ierr);
        continue;
      }
      for (j=cum_recv_counts[i];j<cum_recv_counts[i+1];j++){
         if (recv_buffer[j] > 1) {
          ierr = PetscBTSet(subset_cc_adapt,i);CHKERRQ(ierr);
          break;
        }
      }
    }
    ierr = PetscFree(recv_buffer);CHKERRQ(ierr);

    /* determine send/recv buffers sizes */
    j = 0;
    mss = 0;
    for (i=0;i<graph->n_subsets;i++) {
      if (PetscBTLookup(subset_cc_adapt,i)) {
        j += graph->subset_size[i];
        mss = PetscMax(graph->subset_size[i],mss);
      }
    }
    k = 0;
    mns = 0;
    for (i=0;i<graph->n_subsets;i++) {
      if (PetscBTLookup(subset_cc_adapt,i)) {
        k += (cum_recv_counts[i+1]-cum_recv_counts[i])*graph->subset_size[i];
        mns = PetscMax(cum_recv_counts[i+1]-cum_recv_counts[i],mns);
      }
    }
    ierr = PetscMalloc2(j,&send_buffer,k,&recv_buffer);CHKERRQ(ierr);

    /* fill send buffer (order matters: subset_idxs ordered by global ordering) */
    j = 0;
    for (i=0;i<graph->n_subsets;i++)
      if (PetscBTLookup(subset_cc_adapt,i))
        for (k=0;k<graph->subset_size[i];k++)
          send_buffer[j++] = labels[graph->subset_idxs[i][k]];

    /* now exchange the data */
    start_of_recv = 0;
    start_of_send = 0;
    sum_requests = 0;
    for (i=0;i<graph->n_subsets;i++) {
      if (PetscBTLookup(subset_cc_adapt,i)) {
        PetscMPIInt neigh,tag;
        PetscInt    size_of_send = graph->subset_size[i];

        j = graph->subset_idxs[i][0];
        ierr = PetscMPIIntCast(2*graph->subset_ref_node[i]+1,&tag);CHKERRQ(ierr);
        for (k=0;k<graph->count[j];k++) {
          ierr = PetscMPIIntCast(graph->neighbours_set[j][k],&neigh);CHKERRQ(ierr);
          ierr = MPI_Isend(&send_buffer[start_of_send],size_of_send,MPIU_INT,neigh,tag,interface_comm,&send_requests[sum_requests]);CHKERRQ(ierr);
          ierr = MPI_Irecv(&recv_buffer[start_of_recv],size_of_send,MPIU_INT,neigh,tag,interface_comm,&recv_requests[sum_requests]);CHKERRQ(ierr);
          start_of_recv += size_of_send;
          sum_requests++;
        }
        start_of_send += size_of_send;
      }
    }
    ierr = MPI_Waitall(sum_requests,recv_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);

    /* refine connected components */
    start_of_recv = 0;
    /* allocate some temporary space */
    if (mss) {
      ierr = PetscMalloc1(mss,&refine_buffer);CHKERRQ(ierr);
      ierr = PetscMalloc2(mss*(mns+1),&refine_buffer[0],mss,&private_labels);CHKERRQ(ierr);
    }
    ncc = 0;
    cum_queue = 0;
    graph->cptr[0] = 0;
    for (i=0;i<graph->n_subsets;i++) {
      if (PetscBTLookup(subset_cc_adapt,i)) {
        PetscInt subset_counter = 0;
        PetscInt sharingprocs = cum_recv_counts[i+1]-cum_recv_counts[i]+1; /* count myself */
        PetscInt buffer_size = graph->subset_size[i];

        /* compute pointers */
        for (j=1;j<buffer_size;j++) refine_buffer[j] = refine_buffer[j-1] + sharingprocs;
        /* analyze contributions from subdomains that share the i-th subset
           The stricture of refine_buffer is suitable to find intersections of ccs among sharingprocs.
           supposing the current subset is shared by 3 processes and has dimension 5 with global dofs 0,1,2,3,4 (local 0,4,3,1,2)
           sharing procs connected components:
             neigh 0: [0 1 4], [2 3], labels [4,7]  (2 connected components)
             neigh 1: [0 1], [2 3 4], labels [3 2]  (2 connected components)
             neigh 2: [0 4], [1], [2 3], labels [1 5 6] (3 connected components)
           refine_buffer will be filled as:
             [ 4, 3, 1;
               4, 2, 1;
               7, 2, 6;
               4, 3, 5;
               7, 2, 6; ];
           The connected components in local ordering are [0], [1], [2 3], [4] */
        /* fill temp_buffer */
        for (k=0;k<buffer_size;k++) refine_buffer[k][0] = labels[graph->subset_idxs[i][k]];
        for (j=0;j<sharingprocs-1;j++) {
          for (k=0;k<buffer_size;k++) refine_buffer[k][j+1] = recv_buffer[start_of_recv+k];
          start_of_recv += buffer_size;
        }
        ierr = PetscMemzero(private_labels,buffer_size*sizeof(PetscInt));CHKERRQ(ierr);
        for (j=0;j<buffer_size;j++) {
          if (!private_labels[j]) { /* found a new cc  */
            PetscBool same_set;

            graph->cptr[ncc] = cum_queue;
            ncc++;
            subset_counter++;
            private_labels[j] = subset_counter;
            graph->queue[cum_queue++] = graph->subset_idxs[i][j];
            for (k=j+1;k<buffer_size;k++) { /* check for other nodes in new cc */
              same_set = PETSC_TRUE;
              for (s=0;s<sharingprocs;s++) {
                if (refine_buffer[j][s] != refine_buffer[k][s]) {
                  same_set = PETSC_FALSE;
                  break;
                }
              }
              if (same_set) {
                private_labels[k] = subset_counter;
                graph->queue[cum_queue++] = graph->subset_idxs[i][k];
              }
            }
          }
        }
        graph->cptr[ncc] = cum_queue;
        graph->subset_ncc[i] = subset_counter;
        graph->queue_sorted = PETSC_FALSE;
      } else { /* this subset does not need to be adapted */
        ierr = PetscMemcpy(graph->queue+cum_queue,graph->subset_idxs[i],graph->subset_size[i]*sizeof(PetscInt));CHKERRQ(ierr);
        ncc++;
        cum_queue += graph->subset_size[i];
        graph->cptr[ncc] = cum_queue;
      }
    }
    graph->cptr[ncc] = cum_queue;
    graph->ncc = ncc;
    if (mss) {
      ierr = PetscFree2(refine_buffer[0],private_labels);CHKERRQ(ierr);
      ierr = PetscFree(refine_buffer);CHKERRQ(ierr);
    }
    ierr = PetscFree(labels);CHKERRQ(ierr);
    ierr = MPI_Waitall(sum_requests,send_requests,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
    ierr = PetscFree2(send_requests,recv_requests);CHKERRQ(ierr);
    ierr = PetscFree2(send_buffer,recv_buffer);CHKERRQ(ierr);
    ierr = PetscFree(cum_recv_counts);CHKERRQ(ierr);
    ierr = PetscBTDestroy(&subset_cc_adapt);CHKERRQ(ierr);
  }

  /* Determine if we are in 2D or 3D */
  if (!graph->twodimset) {
    PetscBool twodim = PETSC_TRUE;
    for (i=0;i<graph->ncc;i++) {
      PetscInt repdof = graph->queue[graph->cptr[i]];
      PetscInt ccsize = graph->cptr[i+1]-graph->cptr[i];
      if (graph->count[repdof] > 1 && ccsize > graph->custom_minimal_size) {
        twodim = PETSC_FALSE;
        break;
      }
    }
    ierr = MPIU_Allreduce(&twodim,&graph->twodim,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)graph->l2gmap));CHKERRQ(ierr);
    graph->twodimset = PETSC_TRUE;
  }
  PetscFunctionReturn(0);
}
Beispiel #21
0
PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
{
    Mat            B;
    Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
    PetscErrorCode ierr;
    PetscInt       bs = A->rmap->bs,*ai = a->i,*aj = a->j,n = A->rmap->N/bs,i,j,k;
    PetscInt       *rowlengths,*rows,*cols,maxlen = 0,ncols;
    MatScalar      *aa = a->a;

    PetscFunctionBegin;
    ierr = PetscMalloc1(n*bs,&rowlengths);
    CHKERRQ(ierr);
    for (i=0; i<n; i++) {
        maxlen = PetscMax(maxlen,(ai[i+1] - ai[i]));
        for (j=0; j<bs; j++) {
            rowlengths[i*bs+j] = bs*(ai[i+1] - ai[i]);
        }
    }
    ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);
    CHKERRQ(ierr);
    ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
    CHKERRQ(ierr);
    ierr = MatSetType(B,MATSEQAIJ);
    CHKERRQ(ierr);
    ierr = MatSeqAIJSetPreallocation(B,0,rowlengths);
    CHKERRQ(ierr);
    ierr = MatSetOption(B,MAT_ROW_ORIENTED,PETSC_FALSE);
    CHKERRQ(ierr);
    ierr = PetscFree(rowlengths);
    CHKERRQ(ierr);

    ierr = PetscMalloc1(bs,&rows);
    CHKERRQ(ierr);
    ierr = PetscMalloc1(bs*maxlen,&cols);
    CHKERRQ(ierr);
    for (i=0; i<n; i++) {
        for (j=0; j<bs; j++) {
            rows[j] = i*bs+j;
        }
        ncols = ai[i+1] - ai[i];
        for (k=0; k<ncols; k++) {
            for (j=0; j<bs; j++) {
                cols[k*bs+j] = bs*(*aj) + j;
            }
            aj++;
        }
        ierr = MatSetValues(B,bs,rows,bs*ncols,cols,aa,INSERT_VALUES);
        CHKERRQ(ierr);
        aa  += ncols*bs*bs;
    }
    ierr = PetscFree(cols);
    CHKERRQ(ierr);
    ierr = PetscFree(rows);
    CHKERRQ(ierr);
    ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
    CHKERRQ(ierr);
    ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
    CHKERRQ(ierr);

    B->rmap->bs = A->rmap->bs;

    if (reuse == MAT_REUSE_MATRIX) {
        ierr = MatHeaderReplace(A,B);
        CHKERRQ(ierr);
    } else {
        *newmat = B;
    }
    PetscFunctionReturn(0);
}
Beispiel #22
0
static PetscErrorCode TestCellShape(DM dm)
{
  PetscMPIInt    rank;
  PetscInt       dim, c, cStart, cEnd, count = 0;
  ex1_stats_t    stats, globalStats;
  PetscReal      *J, *invJ, min = 0, max = 0, mean = 0, stdev = 0;
  MPI_Comm       comm = PetscObjectComm((PetscObject)dm);
  DM             dmCoarse;
  PetscErrorCode ierr;

  PetscFunctionBegin;
  stats.min = PETSC_MAX_REAL;
  stats.max = PETSC_MIN_REAL;
  stats.sum = stats.squaresum = 0.;
  stats.count = 0;

  ierr = DMGetDimension(dm,&dim);CHKERRQ(ierr);

  ierr = PetscMalloc2(dim * dim, &J, dim * dim, &invJ);CHKERRQ(ierr);

  ierr = DMPlexGetHeightStratum(dm,0,&cStart,&cEnd);CHKERRQ(ierr);
  for (c = cStart; c < cEnd; c++) {
    PetscInt  i;
    PetscReal frobJ = 0., frobInvJ = 0., cond2, cond, detJ;

    ierr = DMPlexComputeCellGeometryAffineFEM(dm,c,NULL,J,invJ,&detJ);CHKERRQ(ierr);

    for (i = 0; i < dim * dim; i++) {
      frobJ += J[i] * J[i];
      frobInvJ += invJ[i] * invJ[i];
    }
    cond2 = frobJ * frobInvJ;
    cond  = PetscSqrtReal(cond2);

    stats.min = PetscMin(stats.min,cond);
    stats.max = PetscMax(stats.max,cond);
    stats.sum += cond;
    stats.squaresum += cond2;
    stats.count++;
  }

  {
    PetscMPIInt    blockLengths[2] = {4,1};
    MPI_Aint       blockOffsets[2] = {offsetof(ex1_stats_t,min),offsetof(ex1_stats_t,count)};
    MPI_Datatype   blockTypes[2]   = {MPIU_REAL,MPIU_INT}, statType;
    MPI_Op         statReduce;

    ierr = MPI_Type_create_struct(2,blockLengths,blockOffsets,blockTypes,&statType);CHKERRQ(ierr);
    ierr = MPI_Type_commit(&statType);CHKERRQ(ierr);
    ierr = MPI_Op_create(ex1_stats_reduce, PETSC_TRUE, &statReduce);CHKERRQ(ierr);
    ierr = MPI_Reduce(&stats,&globalStats,1,statType,statReduce,0,comm);CHKERRQ(ierr);
    ierr = MPI_Op_free(&statReduce);CHKERRQ(ierr);
    ierr = MPI_Type_free(&statType);CHKERRQ(ierr);
  }

  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  if (!rank) {
    count = globalStats.count;
    min = globalStats.min;
    max = globalStats.max;
    mean = globalStats.sum / globalStats.count;
    stdev = PetscSqrtReal(globalStats.squaresum / globalStats.count - mean * mean);
  }
  ierr = PetscPrintf(comm,"Mesh with %d cells, shape condition numbers: min = %g, max = %g, mean = %g, stddev = %g\n", count, (double) min, (double) max, (double) mean, (double) stdev);

  ierr = PetscFree2(J,invJ);CHKERRQ(ierr);

  ierr = DMPlexGetCoarseDM(dm,&dmCoarse);CHKERRQ(ierr);
  if (dmCoarse) {
    ierr = TestCellShape(dmCoarse);CHKERRQ(ierr);
  }

  PetscFunctionReturn(0);
}
Beispiel #23
0
PetscErrorCode EPSSetUp_XD(EPS eps)
{
  PetscErrorCode ierr;
  EPS_DAVIDSON   *data = (EPS_DAVIDSON*)eps->data;
  dvdDashboard   *dvd = &data->ddb;
  dvdBlackboard  b;
  PetscInt       min_size_V,plusk,bs,initv,i,cX_in_proj,cX_in_impr,nmat;
  Mat            A,B;
  KSP            ksp;
  PetscBool      t,ipB,ispositive,dynamic;
  HarmType_t     harm;
  InitType_t     init;
  PetscReal      fix;
  PetscScalar    target;

  PetscFunctionBegin;
  /* Setup EPS options and get the problem specification */
  ierr = EPSXDGetBlockSize_XD(eps,&bs);CHKERRQ(ierr);
  if (bs <= 0) bs = 1;
  if (eps->ncv) {
    if (eps->ncv<eps->nev) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The value of ncv must be at least nev");
  } else if (eps->mpd) eps->ncv = eps->mpd + eps->nev + bs;
  else if (eps->nev<500) eps->ncv = PetscMin(eps->n-bs,PetscMax(2*eps->nev,eps->nev+15))+bs;
  else eps->ncv = PetscMin(eps->n-bs,eps->nev+500)+bs;
  if (!eps->mpd) eps->mpd = eps->ncv;
  if (eps->mpd > eps->ncv) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The mpd has to be less or equal than ncv");
  if (eps->mpd < 2) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The mpd has to be greater than 2");
  if (!eps->max_it) eps->max_it = PetscMax(100*eps->ncv,2*eps->n);
  if (!eps->which) eps->which = EPS_LARGEST_MAGNITUDE;
  if (eps->ishermitian && (eps->which==EPS_LARGEST_IMAGINARY || eps->which==EPS_SMALLEST_IMAGINARY)) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Wrong value of eps->which");
  if (!(eps->nev + bs <= eps->ncv)) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The ncv has to be greater than nev plus blocksize");
  if (eps->trueres) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"-eps_true_residual is temporally disable in this solver.");

  ierr = EPSXDGetRestart_XD(eps,&min_size_V,&plusk);CHKERRQ(ierr);
  if (!min_size_V) min_size_V = PetscMin(PetscMax(bs,5),eps->mpd/2);
  if (!(min_size_V+bs <= eps->mpd)) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The value of minv must be less than mpd minus blocksize");
  ierr = EPSXDGetInitialSize_XD(eps,&initv);CHKERRQ(ierr);
  if (eps->mpd < initv) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The initv has to be less or equal than mpd");

  /* Set STPrecond as the default ST */
  if (!((PetscObject)eps->st)->type_name) {
    ierr = STSetType(eps->st,STPRECOND);CHKERRQ(ierr);
  }
  ierr = STPrecondSetKSPHasMat(eps->st,PETSC_FALSE);CHKERRQ(ierr);

  /* Change the default sigma to inf if necessary */
  if (eps->which == EPS_LARGEST_MAGNITUDE || eps->which == EPS_LARGEST_REAL ||
      eps->which == EPS_LARGEST_IMAGINARY) {
    ierr = STSetDefaultShift(eps->st,PETSC_MAX_REAL);CHKERRQ(ierr);
  }

  /* Davidson solvers only support STPRECOND */
  ierr = STSetUp(eps->st);CHKERRQ(ierr);
  ierr = PetscObjectTypeCompare((PetscObject)eps->st,STPRECOND,&t);CHKERRQ(ierr);
  if (!t) SETERRQ1(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"%s only works with precond spectral transformation",
    ((PetscObject)eps)->type_name);

  /* Setup problem specification in dvd */
  ierr = STGetNumMatrices(eps->st,&nmat);CHKERRQ(ierr);
  ierr = STGetOperators(eps->st,0,&A);CHKERRQ(ierr);
  if (nmat>1) { ierr = STGetOperators(eps->st,1,&B);CHKERRQ(ierr); }
  ierr = EPSReset_XD(eps);CHKERRQ(ierr);
  ierr = PetscMemzero(dvd,sizeof(dvdDashboard));CHKERRQ(ierr);
  dvd->A = A; dvd->B = eps->isgeneralized? B : NULL;
  ispositive = eps->ispositive;
  dvd->sA = DVD_MAT_IMPLICIT |
            (eps->ishermitian? DVD_MAT_HERMITIAN : 0) |
            ((ispositive && !eps->isgeneralized) ? DVD_MAT_POS_DEF : 0);
  /* Asume -eps_hermitian means hermitian-definite in generalized problems */
  if (!ispositive && !eps->isgeneralized && eps->ishermitian) ispositive = PETSC_TRUE;
  if (!eps->isgeneralized) dvd->sB = DVD_MAT_IMPLICIT | DVD_MAT_HERMITIAN | DVD_MAT_IDENTITY | DVD_MAT_UNITARY | DVD_MAT_POS_DEF;
  else dvd->sB = DVD_MAT_IMPLICIT | (eps->ishermitian? DVD_MAT_HERMITIAN : 0) | (ispositive? DVD_MAT_POS_DEF : 0);
  ipB = (dvd->B && data->ipB && DVD_IS(dvd->sB,DVD_MAT_HERMITIAN))?PETSC_TRUE:PETSC_FALSE;
  if (data->ipB && !ipB) data->ipB = PETSC_FALSE;
  dvd->correctXnorm = ipB;
  dvd->sEP = ((!eps->isgeneralized || (eps->isgeneralized && ipB))? DVD_EP_STD : 0) |
             (ispositive? DVD_EP_HERMITIAN : 0) |
             ((eps->problem_type == EPS_GHIEP && ipB) ? DVD_EP_INDEFINITE : 0);
  dvd->nev = eps->nev;
  dvd->which = eps->which;
  dvd->withTarget = PETSC_TRUE;
  switch (eps->which) {
    case EPS_TARGET_MAGNITUDE:
    case EPS_TARGET_IMAGINARY:
      dvd->target[0] = target = eps->target; dvd->target[1] = 1.0;
      break;
    case EPS_TARGET_REAL:
      dvd->target[0] = PetscRealPart(target = eps->target); dvd->target[1] = 1.0;
      break;
    case EPS_LARGEST_REAL:
    case EPS_LARGEST_MAGNITUDE:
    case EPS_LARGEST_IMAGINARY: /* TODO: think about this case */
      dvd->target[0] = 1.0; dvd->target[1] = target = 0.0;
      break;
    case EPS_SMALLEST_MAGNITUDE:
    case EPS_SMALLEST_REAL:
    case EPS_SMALLEST_IMAGINARY: /* TODO: think about this case */
      dvd->target[0] = target = 0.0; dvd->target[1] = 1.0;
      break;
    case EPS_WHICH_USER:
      ierr = STGetShift(eps->st,&target);CHKERRQ(ierr);
      dvd->target[0] = target; dvd->target[1] = 1.0;
      break;
    case EPS_ALL:
      SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Unsupported option: which == EPS_ALL");
      break;
    default:
      SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Unsupported value of option 'which'");
  }
  dvd->tol = eps->tol==PETSC_DEFAULT?SLEPC_DEFAULT_TOL:eps->tol;
  dvd->eps = eps;

  /* Setup the extraction technique */
  if (!eps->extraction) {
    if (ipB || ispositive) eps->extraction = EPS_RITZ;
    else {
      switch (eps->which) {
        case EPS_TARGET_REAL:
        case EPS_TARGET_MAGNITUDE:
        case EPS_TARGET_IMAGINARY:
        case EPS_SMALLEST_MAGNITUDE:
        case EPS_SMALLEST_REAL:
        case EPS_SMALLEST_IMAGINARY:
          eps->extraction = EPS_HARMONIC;
          break;
        case EPS_LARGEST_REAL:
        case EPS_LARGEST_MAGNITUDE:
        case EPS_LARGEST_IMAGINARY:
          eps->extraction = EPS_HARMONIC_LARGEST;
          break;
        default:
          eps->extraction = EPS_RITZ;
      }
    }
  }
  switch (eps->extraction) {
    case EPS_RITZ:              harm = DVD_HARM_NONE; break;
    case EPS_HARMONIC:          harm = DVD_HARM_RR; break;
    case EPS_HARMONIC_RELATIVE: harm = DVD_HARM_RRR; break;
    case EPS_HARMONIC_RIGHT:    harm = DVD_HARM_REIGS; break;
    case EPS_HARMONIC_LARGEST:  harm = DVD_HARM_LEIGS; break;
    default: SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Unsupported extraction type");
  }

  /* Setup the type of starting subspace */
  ierr = EPSXDGetKrylovStart_XD(eps,&t);CHKERRQ(ierr);
  init = (!t)? DVD_INITV_CLASSIC : DVD_INITV_KRYLOV;

  /* Setup the presence of converged vectors in the projected problem and in the projector */
  ierr = EPSXDGetWindowSizes_XD(eps,&cX_in_impr,&cX_in_proj);CHKERRQ(ierr);
  if (cX_in_impr>0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The option pwindow is temporally disable in this solver.");
  if (cX_in_proj>0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"The option qwindow is temporally disable in this solver.");
  if (min_size_V <= cX_in_proj) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"minv has to be greater than qwindow");
  if (bs > 1 && cX_in_impr > 0) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Unsupported option: pwindow > 0 and bs > 1");

  /* Get the fix parameter */
  ierr = EPSXDGetFix_XD(eps,&fix);CHKERRQ(ierr);

  /* Get whether the stopping criterion is used */
  ierr = EPSJDGetConstCorrectionTol_JD(eps,&dynamic);CHKERRQ(ierr);

  /* Preconfigure dvd */
  ierr = STGetKSP(eps->st,&ksp);CHKERRQ(ierr);
  ierr = dvd_schm_basic_preconf(dvd,&b,eps->mpd,min_size_V,bs,
                                initv,
                                PetscAbs(eps->nini),
                                plusk,harm,
                                ksp,init,eps->trackall,
                                data->ipB,cX_in_proj,cX_in_impr,
                                data->scheme);CHKERRQ(ierr);

  /* Allocate memory */
  ierr = EPSAllocateSolution(eps,0);CHKERRQ(ierr);

  /* Setup orthogonalization */
  ierr = EPS_SetInnerProduct(eps);CHKERRQ(ierr);
  if (!(ipB && dvd->B)) {
    ierr = BVSetMatrix(eps->V,NULL,PETSC_FALSE);CHKERRQ(ierr);
  }

  for (i=0;i<eps->ncv;i++) eps->perm[i] = i;

  /* Configure dvd for a basic GD */
  ierr = dvd_schm_basic_conf(dvd,&b,eps->mpd,min_size_V,bs,
                             initv,
                             PetscAbs(eps->nini),plusk,
                             harm,dvd->withTarget,
                             target,ksp,
                             fix,init,eps->trackall,
                             data->ipB,cX_in_proj,cX_in_impr,dynamic,
                             data->scheme);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #24
0
/*@
  DMPlexOrient - Give a consistent orientation to the input mesh

  Input Parameters:
. dm - The DM

  Note: The orientation data for the DM are change in-place.
$ This routine will fail for non-orientable surfaces, such as the Moebius strip.

  Level: advanced

.seealso: DMCreate(), DMPLEX
@*/
PetscErrorCode DMPlexOrient(DM dm)
{
  MPI_Comm           comm;
  PetscSF            sf;
  const PetscInt    *lpoints;
  const PetscSFNode *rpoints;
  PetscSFNode       *rorntComp = NULL, *lorntComp = NULL;
  PetscInt          *numNeighbors, **neighbors;
  PetscSFNode       *nrankComp;
  PetscBool         *match, *flipped;
  PetscBT            seenCells, flippedCells, seenFaces;
  PetscInt          *faceFIFO, fTop, fBottom, *cellComp, *faceComp;
  PetscInt           numLeaves, numRoots, dim, h, cStart, cEnd, c, cell, fStart, fEnd, face, off, totNeighbors = 0;
  PetscMPIInt        rank, size, numComponents, comp = 0;
  PetscBool          flg, flg2;
  PetscViewer        viewer = NULL, selfviewer = NULL;
  PetscErrorCode     ierr;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject) dm, &comm);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
  ierr = PetscOptionsHasName(((PetscObject) dm)->options,((PetscObject) dm)->prefix, "-orientation_view", &flg);CHKERRQ(ierr);
  ierr = PetscOptionsHasName(((PetscObject) dm)->options,((PetscObject) dm)->prefix, "-orientation_view_synchronized", &flg2);CHKERRQ(ierr);
  ierr = DMGetPointSF(dm, &sf);CHKERRQ(ierr);
  ierr = PetscSFGetGraph(sf, &numRoots, &numLeaves, &lpoints, &rpoints);CHKERRQ(ierr);
  /* Truth Table
     mismatch    flips   do action   mismatch   flipA ^ flipB   action
         F       0 flips     no         F             F           F
         F       1 flip      yes        F             T           T
         F       2 flips     no         T             F           T
         T       0 flips     yes        T             T           F
         T       1 flip      no
         T       2 flips     yes
  */
  ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr);
  ierr = DMPlexGetVTKCellHeight(dm, &h);CHKERRQ(ierr);
  ierr = DMPlexGetHeightStratum(dm, h,   &cStart, &cEnd);CHKERRQ(ierr);
  ierr = DMPlexGetHeightStratum(dm, h+1, &fStart, &fEnd);CHKERRQ(ierr);
  ierr = PetscBTCreate(cEnd - cStart, &seenCells);CHKERRQ(ierr);
  ierr = PetscBTMemzero(cEnd - cStart, seenCells);CHKERRQ(ierr);
  ierr = PetscBTCreate(cEnd - cStart, &flippedCells);CHKERRQ(ierr);
  ierr = PetscBTMemzero(cEnd - cStart, flippedCells);CHKERRQ(ierr);
  ierr = PetscBTCreate(fEnd - fStart, &seenFaces);CHKERRQ(ierr);
  ierr = PetscBTMemzero(fEnd - fStart, seenFaces);CHKERRQ(ierr);
  ierr = PetscCalloc3(fEnd - fStart, &faceFIFO, cEnd-cStart, &cellComp, fEnd-fStart, &faceComp);CHKERRQ(ierr);
  /*
   OLD STYLE
   - Add an integer array over cells and faces (component) for connected component number
   Foreach component
     - Mark the initial cell as seen
     - Process component as usual
     - Set component for all seenCells
     - Wipe seenCells and seenFaces (flippedCells can stay)
   - Generate parallel adjacency for component using SF and seenFaces
   - Collect numComponents adj data from each proc to 0
   - Build same serial graph
   - Use same solver
   - Use Scatterv to to send back flipped flags for each component
   - Negate flippedCells by component

   NEW STYLE
   - Create the adj on each process
   - Bootstrap to complete graph on proc 0
  */
  /* Loop over components */
  for (cell = cStart; cell < cEnd; ++cell) cellComp[cell-cStart] = -1;
  do {
    /* Look for first unmarked cell */
    for (cell = cStart; cell < cEnd; ++cell) if (cellComp[cell-cStart] < 0) break;
    if (cell >= cEnd) break;
    /* Initialize FIFO with first cell in component */
    {
      const PetscInt *cone;
      PetscInt        coneSize;

      fTop = fBottom = 0;
      ierr = DMPlexGetConeSize(dm, cell, &coneSize);CHKERRQ(ierr);
      ierr = DMPlexGetCone(dm, cell, &cone);CHKERRQ(ierr);
      for (c = 0; c < coneSize; ++c) {
        faceFIFO[fBottom++] = cone[c];
        ierr = PetscBTSet(seenFaces, cone[c]-fStart);CHKERRQ(ierr);
      }
      ierr = PetscBTSet(seenCells, cell-cStart);CHKERRQ(ierr);
    }
    /* Consider each face in FIFO */
    while (fTop < fBottom) {
      ierr = DMPlexCheckFace_Internal(dm, faceFIFO, &fTop, &fBottom, cStart, fStart, fEnd, seenCells, flippedCells, seenFaces);CHKERRQ(ierr);
    }
    /* Set component for cells and faces */
    for (cell = 0; cell < cEnd-cStart; ++cell) {
      if (PetscBTLookup(seenCells, cell)) cellComp[cell] = comp;
    }
    for (face = 0; face < fEnd-fStart; ++face) {
      if (PetscBTLookup(seenFaces, face)) faceComp[face] = comp;
    }
    /* Wipe seenCells and seenFaces for next component */
    ierr = PetscBTMemzero(fEnd - fStart, seenFaces);CHKERRQ(ierr);
    ierr = PetscBTMemzero(cEnd - cStart, seenCells);CHKERRQ(ierr);
    ++comp;
  } while (1);
  numComponents = comp;
  if (flg) {
    PetscViewer v;

    ierr = PetscViewerASCIIGetStdout(comm, &v);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPushSynchronized(v);CHKERRQ(ierr);
    ierr = PetscViewerASCIISynchronizedPrintf(v, "[%d]BT for serial flipped cells:\n", rank);CHKERRQ(ierr);
    ierr = PetscBTView(cEnd-cStart, flippedCells, v);CHKERRQ(ierr);
    ierr = PetscViewerFlush(v);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPopSynchronized(v);CHKERRQ(ierr);
  }
  /* Now all subdomains are oriented, but we need a consistent parallel orientation */
  if (numLeaves >= 0) {
    /* Store orientations of boundary faces*/
    ierr = PetscCalloc2(numRoots,&rorntComp,numRoots,&lorntComp);CHKERRQ(ierr);
    for (face = fStart; face < fEnd; ++face) {
      const PetscInt *cone, *support, *ornt;
      PetscInt        coneSize, supportSize;

      ierr = DMPlexGetSupportSize(dm, face, &supportSize);CHKERRQ(ierr);
      if (supportSize != 1) continue;
      ierr = DMPlexGetSupport(dm, face, &support);CHKERRQ(ierr);

      ierr = DMPlexGetCone(dm, support[0], &cone);CHKERRQ(ierr);
      ierr = DMPlexGetConeSize(dm, support[0], &coneSize);CHKERRQ(ierr);
      ierr = DMPlexGetConeOrientation(dm, support[0], &ornt);CHKERRQ(ierr);
      for (c = 0; c < coneSize; ++c) if (cone[c] == face) break;
      if (dim == 1) {
        /* Use cone position instead, shifted to -1 or 1 */
        if (PetscBTLookup(flippedCells, support[0]-cStart)) rorntComp[face].rank = 1-c*2;
        else                                                rorntComp[face].rank = c*2-1;
      } else {
        if (PetscBTLookup(flippedCells, support[0]-cStart)) rorntComp[face].rank = ornt[c] < 0 ? -1 :  1;
        else                                                rorntComp[face].rank = ornt[c] < 0 ?  1 : -1;
      }
      rorntComp[face].index = faceComp[face-fStart];
    }
    /* Communicate boundary edge orientations */
    ierr = PetscSFBcastBegin(sf, MPIU_2INT, rorntComp, lorntComp);CHKERRQ(ierr);
    ierr = PetscSFBcastEnd(sf, MPIU_2INT, rorntComp, lorntComp);CHKERRQ(ierr);
  }
  /* Get process adjacency */
  ierr = PetscMalloc2(numComponents, &numNeighbors, numComponents, &neighbors);CHKERRQ(ierr);
  viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)dm));
  if (flg2) {ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);}
  ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&selfviewer);CHKERRQ(ierr);
  for (comp = 0; comp < numComponents; ++comp) {
    PetscInt  l, n;

    numNeighbors[comp] = 0;
    ierr = PetscMalloc1(PetscMax(numLeaves, 0), &neighbors[comp]);CHKERRQ(ierr);
    /* I know this is p^2 time in general, but for bounded degree its alright */
    for (l = 0; l < numLeaves; ++l) {
      const PetscInt face = lpoints[l];

      /* Find a representative face (edge) separating pairs of procs */
      if ((face >= fStart) && (face < fEnd) && (faceComp[face-fStart] == comp)) {
        const PetscInt rrank = rpoints[l].rank;
        const PetscInt rcomp = lorntComp[face].index;

        for (n = 0; n < numNeighbors[comp]; ++n) if ((rrank == rpoints[neighbors[comp][n]].rank) && (rcomp == lorntComp[lpoints[neighbors[comp][n]]].index)) break;
        if (n >= numNeighbors[comp]) {
          PetscInt supportSize;

          ierr = DMPlexGetSupportSize(dm, face, &supportSize);CHKERRQ(ierr);
          if (supportSize != 1) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Boundary faces should see one cell, not %d", supportSize);
          if (flg) {ierr = PetscViewerASCIIPrintf(selfviewer, "[%d]: component %d, Found representative leaf %d (face %d) connecting to face %d on (%d, %d) with orientation %d\n", rank, comp, l, face, rpoints[l].index, rrank, rcomp, lorntComp[face].rank);CHKERRQ(ierr);}
          neighbors[comp][numNeighbors[comp]++] = l;
        }
      }
    }
    totNeighbors += numNeighbors[comp];
  }
  ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&selfviewer);CHKERRQ(ierr);
  ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
  if (flg2) {ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);}
  ierr = PetscMalloc2(totNeighbors, &nrankComp, totNeighbors, &match);CHKERRQ(ierr);
  for (comp = 0, off = 0; comp < numComponents; ++comp) {
    PetscInt n;

    for (n = 0; n < numNeighbors[comp]; ++n, ++off) {
      const PetscInt face = lpoints[neighbors[comp][n]];
      const PetscInt o    = rorntComp[face].rank*lorntComp[face].rank;

      if      (o < 0) match[off] = PETSC_TRUE;
      else if (o > 0) match[off] = PETSC_FALSE;
      else SETERRQ5(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid face %d (%d, %d) neighbor: %d comp: %d", face, rorntComp[face], lorntComp[face], neighbors[comp][n], comp);
      nrankComp[off].rank  = rpoints[neighbors[comp][n]].rank;
      nrankComp[off].index = lorntComp[lpoints[neighbors[comp][n]]].index;
    }
    ierr = PetscFree(neighbors[comp]);CHKERRQ(ierr);
  }
  /* Collect the graph on 0 */
  if (numLeaves >= 0) {
    Mat          G;
    PetscBT      seenProcs, flippedProcs;
    PetscInt    *procFIFO, pTop, pBottom;
    PetscInt    *N   = NULL, *Noff;
    PetscSFNode *adj = NULL;
    PetscBool   *val = NULL;
    PetscMPIInt *recvcounts = NULL, *displs = NULL, *Nc, p, o;
    PetscMPIInt  size = 0;

    ierr = PetscCalloc1(numComponents, &flipped);CHKERRQ(ierr);
    if (!rank) {ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);}
    ierr = PetscCalloc4(size, &recvcounts, size+1, &displs, size, &Nc, size+1, &Noff);CHKERRQ(ierr);
    ierr = MPI_Gather(&numComponents, 1, MPI_INT, Nc, 1, MPI_INT, 0, comm);CHKERRQ(ierr);
    for (p = 0; p < size; ++p) {
      displs[p+1] = displs[p] + Nc[p];
    }
    if (!rank) {ierr = PetscMalloc1(displs[size],&N);CHKERRQ(ierr);}
    ierr = MPI_Gatherv(numNeighbors, numComponents, MPIU_INT, N, Nc, displs, MPIU_INT, 0, comm);CHKERRQ(ierr);
    for (p = 0, o = 0; p < size; ++p) {
      recvcounts[p] = 0;
      for (c = 0; c < Nc[p]; ++c, ++o) recvcounts[p] += N[o];
      displs[p+1] = displs[p] + recvcounts[p];
    }
    if (!rank) {ierr = PetscMalloc2(displs[size], &adj, displs[size], &val);CHKERRQ(ierr);}
    ierr = MPI_Gatherv(nrankComp, totNeighbors, MPIU_2INT, adj, recvcounts, displs, MPIU_2INT, 0, comm);CHKERRQ(ierr);
    ierr = MPI_Gatherv(match, totNeighbors, MPIU_BOOL, val, recvcounts, displs, MPIU_BOOL, 0, comm);CHKERRQ(ierr);
    ierr = PetscFree2(numNeighbors, neighbors);CHKERRQ(ierr);
    if (!rank) {
      for (p = 1; p <= size; ++p) {Noff[p] = Noff[p-1] + Nc[p-1];}
      if (flg) {
        PetscInt n;

        for (p = 0, off = 0; p < size; ++p) {
          for (c = 0; c < Nc[p]; ++c) {
            ierr = PetscPrintf(PETSC_COMM_SELF, "Proc %d Comp %d:\n", p, c);CHKERRQ(ierr);
            for (n = 0; n < N[Noff[p]+c]; ++n, ++off) {
              ierr = PetscPrintf(PETSC_COMM_SELF, "  edge (%d, %d) (%d):\n", adj[off].rank, adj[off].index, val[off]);CHKERRQ(ierr);
            }
          }
        }
      }
      /* Symmetrize the graph */
      ierr = MatCreate(PETSC_COMM_SELF, &G);CHKERRQ(ierr);
      ierr = MatSetSizes(G, Noff[size], Noff[size], Noff[size], Noff[size]);CHKERRQ(ierr);
      ierr = MatSetUp(G);CHKERRQ(ierr);
      for (p = 0, off = 0; p < size; ++p) {
        for (c = 0; c < Nc[p]; ++c) {
          const PetscInt r = Noff[p]+c;
          PetscInt       n;

          for (n = 0; n < N[r]; ++n, ++off) {
            const PetscInt    q = Noff[adj[off].rank] + adj[off].index;
            const PetscScalar o = val[off] ? 1.0 : 0.0;

            ierr = MatSetValues(G, 1, &r, 1, &q, &o, INSERT_VALUES);CHKERRQ(ierr);
            ierr = MatSetValues(G, 1, &q, 1, &r, &o, INSERT_VALUES);CHKERRQ(ierr);
          }
        }
      }
      ierr = MatAssemblyBegin(G, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
      ierr = MatAssemblyEnd(G, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

      ierr = PetscBTCreate(Noff[size], &seenProcs);CHKERRQ(ierr);
      ierr = PetscBTMemzero(Noff[size], seenProcs);CHKERRQ(ierr);
      ierr = PetscBTCreate(Noff[size], &flippedProcs);CHKERRQ(ierr);
      ierr = PetscBTMemzero(Noff[size], flippedProcs);CHKERRQ(ierr);
      ierr = PetscMalloc1(Noff[size], &procFIFO);CHKERRQ(ierr);
      pTop = pBottom = 0;
      for (p = 0; p < Noff[size]; ++p) {
        if (PetscBTLookup(seenProcs, p)) continue;
        /* Initialize FIFO with next proc */
        procFIFO[pBottom++] = p;
        ierr = PetscBTSet(seenProcs, p);CHKERRQ(ierr);
        /* Consider each proc in FIFO */
        while (pTop < pBottom) {
          const PetscScalar *ornt;
          const PetscInt    *neighbors;
          PetscInt           proc, nproc, seen, flippedA, flippedB, mismatch, numNeighbors, n;

          proc     = procFIFO[pTop++];
          flippedA = PetscBTLookup(flippedProcs, proc) ? 1 : 0;
          ierr = MatGetRow(G, proc, &numNeighbors, &neighbors, &ornt);CHKERRQ(ierr);
          /* Loop over neighboring procs */
          for (n = 0; n < numNeighbors; ++n) {
            nproc    = neighbors[n];
            mismatch = PetscRealPart(ornt[n]) > 0.5 ? 0 : 1;
            seen     = PetscBTLookup(seenProcs, nproc);
            flippedB = PetscBTLookup(flippedProcs, nproc) ? 1 : 0;

            if (mismatch ^ (flippedA ^ flippedB)) {
              if (seen) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Previously seen procs %d and %d do not match: Fault mesh is non-orientable", proc, nproc);
              if (!flippedB) {
                ierr = PetscBTSet(flippedProcs, nproc);CHKERRQ(ierr);
              } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Inconsistent mesh orientation: Fault mesh is non-orientable");
            } else if (mismatch && flippedA && flippedB) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Attempt to flip already flipped cell: Fault mesh is non-orientable");
            if (!seen) {
              procFIFO[pBottom++] = nproc;
              ierr = PetscBTSet(seenProcs, nproc);CHKERRQ(ierr);
            }
          }
        }
      }
      ierr = PetscFree(procFIFO);CHKERRQ(ierr);
      ierr = MatDestroy(&G);CHKERRQ(ierr);
      ierr = PetscFree2(adj, val);CHKERRQ(ierr);
      ierr = PetscBTDestroy(&seenProcs);CHKERRQ(ierr);
    }
    /* Scatter flip flags */
    {
      PetscBool *flips = NULL;

      if (!rank) {
        ierr = PetscMalloc1(Noff[size], &flips);CHKERRQ(ierr);
        for (p = 0; p < Noff[size]; ++p) {
          flips[p] = PetscBTLookup(flippedProcs, p) ? PETSC_TRUE : PETSC_FALSE;
          if (flg && flips[p]) {ierr = PetscPrintf(comm, "Flipping Proc+Comp %d:\n", p);CHKERRQ(ierr);}
        }
        for (p = 0; p < size; ++p) {
          displs[p+1] = displs[p] + Nc[p];
        }
      }
      ierr = MPI_Scatterv(flips, Nc, displs, MPIU_BOOL, flipped, numComponents, MPIU_BOOL, 0, comm);CHKERRQ(ierr);
      ierr = PetscFree(flips);CHKERRQ(ierr);
    }
    if (!rank) {ierr = PetscBTDestroy(&flippedProcs);CHKERRQ(ierr);}
    ierr = PetscFree(N);CHKERRQ(ierr);
    ierr = PetscFree4(recvcounts, displs, Nc, Noff);CHKERRQ(ierr);
    ierr = PetscFree2(nrankComp, match);CHKERRQ(ierr);

    /* Decide whether to flip cells in each component */
    for (c = 0; c < cEnd-cStart; ++c) {if (flipped[cellComp[c]]) {ierr = PetscBTNegate(flippedCells, c);CHKERRQ(ierr);}}
    ierr = PetscFree(flipped);CHKERRQ(ierr);
  }
  if (flg) {
    PetscViewer v;

    ierr = PetscViewerASCIIGetStdout(comm, &v);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPushSynchronized(v);CHKERRQ(ierr);
    ierr = PetscViewerASCIISynchronizedPrintf(v, "[%d]BT for parallel flipped cells:\n", rank);CHKERRQ(ierr);
    ierr = PetscBTView(cEnd-cStart, flippedCells, v);CHKERRQ(ierr);
    ierr = PetscViewerFlush(v);CHKERRQ(ierr);
    ierr = PetscViewerASCIIPopSynchronized(v);CHKERRQ(ierr);
  }
  /* Reverse flipped cells in the mesh */
  for (c = cStart; c < cEnd; ++c) {
    if (PetscBTLookup(flippedCells, c-cStart)) {
      ierr = DMPlexReverseCell(dm, c);CHKERRQ(ierr);
    }
  }
  ierr = PetscBTDestroy(&seenCells);CHKERRQ(ierr);
  ierr = PetscBTDestroy(&flippedCells);CHKERRQ(ierr);
  ierr = PetscBTDestroy(&seenFaces);CHKERRQ(ierr);
  ierr = PetscFree2(numNeighbors, neighbors);CHKERRQ(ierr);
  ierr = PetscFree2(rorntComp, lorntComp);CHKERRQ(ierr);
  ierr = PetscFree3(faceFIFO, cellComp, faceComp);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #25
0
/*@C
  MatCreateLMVM - Creates a limited memory matrix for lmvm algorithms.

  Collective on A

  Input Parameters:
+ comm - MPI Communicator
. n - local size of vectors
- N - global size of vectors

  Output Parameters:
. A - New LMVM matrix

  Level: developer

@*/
extern PetscErrorCode MatCreateLMVM(MPI_Comm comm, PetscInt n, PetscInt N, Mat *A)
{
  MatLMVMCtx     *ctx;
  PetscErrorCode ierr;
  PetscInt       nhistory;

  PetscFunctionBegin;
  /*  create data structure and populate with default values */
  ierr = PetscNew(&ctx);CHKERRQ(ierr);
  ctx->lm=5;
  ctx->eps=0.0;
  ctx->limitType=MatLMVM_Limit_None;
  ctx->scaleType=MatLMVM_Scale_Broyden;
  ctx->rScaleType = MatLMVM_Rescale_Scalar;
  ctx->s_alpha = 1.0;
  ctx->r_alpha = 1.0;
  ctx->r_beta = 0.5;
  ctx->mu = 1.0;
  ctx->nu = 100.0;
  
  ctx->phi = 0.125;
  
  ctx->scalar_history = 1;
  ctx->rescale_history = 1;
  
  ctx->delta_min = 1e-7;
  ctx->delta_max = 100.0;

  /*  Begin configuration */
  ierr = PetscOptionsInt("-tao_lmm_vectors", "vectors to use for approximation", "", ctx->lm, &ctx->lm, 0);CHKERRQ(ierr);
  ierr = PetscOptionsReal("-tao_lmm_limit_mu", "mu limiting factor", "", ctx->mu, &ctx->mu, 0);CHKERRQ(ierr);
  ierr = PetscOptionsReal("-tao_lmm_limit_nu", "nu limiting factor", "", ctx->nu, &ctx->nu, 0);CHKERRQ(ierr);
  ierr = PetscOptionsReal("-tao_lmm_broyden_phi", "phi factor for Broyden scaling", "", ctx->phi, &ctx->phi, 0);CHKERRQ(ierr);
  ierr = PetscOptionsReal("-tao_lmm_scalar_alpha", "alpha factor for scalar scaling", "",ctx->s_alpha, &ctx->s_alpha, 0);CHKERRQ(ierr);
  ierr = PetscOptionsReal("-tao_lmm_rescale_alpha", "alpha factor for rescaling diagonal", "", ctx->r_alpha, &ctx->r_alpha, 0);CHKERRQ(ierr);
  ierr = PetscOptionsReal("-tao_lmm_rescale_beta", "beta factor for rescaling diagonal", "", ctx->r_beta, &ctx->r_beta, 0);CHKERRQ(ierr);
  ierr = PetscOptionsInt("-tao_lmm_scalar_history", "amount of history for scalar scaling", "", ctx->scalar_history, &ctx->scalar_history, 0);CHKERRQ(ierr);
  ierr = PetscOptionsInt("-tao_lmm_rescale_history", "amount of history for rescaling diagonal", "", ctx->rescale_history, &ctx->rescale_history, 0);CHKERRQ(ierr);
  ierr = PetscOptionsReal("-tao_lmm_eps", "rejection tolerance", "", ctx->eps, &ctx->eps, 0);CHKERRQ(ierr);
  ierr = PetscOptionsEList("-tao_lmm_scale_type", "scale type", "", Scale_Table, MatLMVM_Scale_Types, Scale_Table[ctx->scaleType], &ctx->scaleType, 0);CHKERRQ(ierr);
  ierr = PetscOptionsEList("-tao_lmm_rescale_type", "rescale type", "", Rescale_Table, MatLMVM_Rescale_Types, Rescale_Table[ctx->rScaleType], &ctx->rScaleType, 0);CHKERRQ(ierr);
  ierr = PetscOptionsEList("-tao_lmm_limit_type", "limit type", "", Limit_Table, MatLMVM_Limit_Types, Limit_Table[ctx->limitType], &ctx->limitType, 0);CHKERRQ(ierr);
  ierr = PetscOptionsReal("-tao_lmm_delta_min", "minimum delta value", "", ctx->delta_min, &ctx->delta_min, 0);CHKERRQ(ierr);
  ierr = PetscOptionsReal("-tao_lmm_delta_max", "maximum delta value", "", ctx->delta_max, &ctx->delta_max, 0);CHKERRQ(ierr);
  
  /*  Complete configuration */
  ctx->rescale_history = PetscMin(ctx->rescale_history, ctx->lm);
  ierr = PetscMalloc1(ctx->lm+1,&ctx->rho);CHKERRQ(ierr);
  ierr = PetscMalloc1(ctx->lm+1,&ctx->beta);CHKERRQ(ierr);

  nhistory = PetscMax(ctx->scalar_history,1);
  ierr = PetscMalloc1(nhistory,&ctx->yy_history);CHKERRQ(ierr);
  ierr = PetscMalloc1(nhistory,&ctx->ys_history);CHKERRQ(ierr);
  ierr = PetscMalloc1(nhistory,&ctx->ss_history);CHKERRQ(ierr);

  nhistory = PetscMax(ctx->rescale_history,1);
  ierr = PetscMalloc1(nhistory,&ctx->yy_rhistory);CHKERRQ(ierr);
  ierr = PetscMalloc1(nhistory,&ctx->ys_rhistory);CHKERRQ(ierr);
  ierr = PetscMalloc1(nhistory,&ctx->ss_rhistory);CHKERRQ(ierr);

  /*  Finish initializations */
  ctx->lmnow = 0;
  ctx->iter = 0;
  ctx->nupdates = 0;
  ctx->nrejects = 0;
  ctx->delta = 1.0;

  ctx->Gprev = 0;
  ctx->Xprev = 0;

  ctx->scale = 0;
  ctx->useScale = PETSC_FALSE;

  ctx->H0 = 0;
  ctx->useDefaultH0=PETSC_TRUE;

  ierr = MatCreateShell(comm, n, n, N, N, ctx, A);CHKERRQ(ierr);
  ierr = MatShellSetOperation(*A,MATOP_DESTROY,(void(*)(void))MatDestroy_LMVM);CHKERRQ(ierr);
  ierr = MatShellSetOperation(*A,MATOP_VIEW,(void(*)(void))MatView_LMVM);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}
Beispiel #26
0
static PetscErrorCode TaoSolve_TRON(Tao tao)
{
  TAO_TRON                     *tron = (TAO_TRON *)tao->data;
  PetscErrorCode               ierr;
  PetscInt                     iter=0,its;
  TaoConvergedReason           reason = TAO_CONTINUE_ITERATING;
  TaoLineSearchConvergedReason ls_reason = TAOLINESEARCH_CONTINUE_ITERATING;
  PetscReal                    prered,actred,delta,f,f_new,rhok,gdx,xdiff,stepsize;

  PetscFunctionBegin;
  tron->pgstepsize=1.0;
  tao->trust = tao->trust0;
  /*   Project the current point onto the feasible set */
  ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr);
  ierr = VecMedian(tao->XL,tao->solution,tao->XU,tao->solution);CHKERRQ(ierr);
  ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr);

  ierr = TaoComputeObjectiveAndGradient(tao,tao->solution,&tron->f,tao->gradient);CHKERRQ(ierr);
  ierr = ISDestroy(&tron->Free_Local);CHKERRQ(ierr);

  ierr = VecWhichBetween(tao->XL,tao->solution,tao->XU,&tron->Free_Local);CHKERRQ(ierr);

  /* Project the gradient and calculate the norm */
  ierr = VecBoundGradientProjection(tao->gradient,tao->solution, tao->XL, tao->XU, tao->gradient);CHKERRQ(ierr);
  ierr = VecNorm(tao->gradient,NORM_2,&tron->gnorm);CHKERRQ(ierr);

  if (PetscIsInfOrNanReal(tron->f) || PetscIsInfOrNanReal(tron->gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf pr NaN");
  if (tao->trust <= 0) {
    tao->trust=PetscMax(tron->gnorm*tron->gnorm,1.0);
  }

  tron->stepsize=tao->trust;
  ierr = TaoMonitor(tao, iter, tron->f, tron->gnorm, 0.0, tron->stepsize, &reason);CHKERRQ(ierr);
  while (reason==TAO_CONTINUE_ITERATING){

    ierr = TronGradientProjections(tao,tron);CHKERRQ(ierr);
    f=tron->f; delta=tao->trust;
    tron->n_free_last = tron->n_free;
    ierr = TaoComputeHessian(tao,tao->solution,tao->hessian,tao->hessian_pre);CHKERRQ(ierr);

    ierr = ISGetSize(tron->Free_Local, &tron->n_free);CHKERRQ(ierr);

    /* If no free variables */
    if (tron->n_free == 0) {
      actred=0;
      PetscInfo(tao,"No free variables in tron iteration.");
      break;

    }
    /* use free_local to mask/submat gradient, hessian, stepdirection */
    ierr = TaoVecGetSubVec(tao->gradient,tron->Free_Local,tao->subset_type,0.0,&tron->R);CHKERRQ(ierr);
    ierr = TaoVecGetSubVec(tao->gradient,tron->Free_Local,tao->subset_type,0.0,&tron->DXFree);CHKERRQ(ierr);
    ierr = VecSet(tron->DXFree,0.0);CHKERRQ(ierr);
    ierr = VecScale(tron->R, -1.0);CHKERRQ(ierr);
    ierr = TaoMatGetSubMat(tao->hessian, tron->Free_Local, tron->diag, tao->subset_type, &tron->H_sub);CHKERRQ(ierr);
    if (tao->hessian == tao->hessian_pre) {
      ierr = MatDestroy(&tron->Hpre_sub);CHKERRQ(ierr);
      ierr = PetscObjectReference((PetscObject)(tron->H_sub));CHKERRQ(ierr);
      tron->Hpre_sub = tron->H_sub;
    } else {
      ierr = TaoMatGetSubMat(tao->hessian_pre, tron->Free_Local, tron->diag, tao->subset_type,&tron->Hpre_sub);CHKERRQ(ierr);
    }
    ierr = KSPReset(tao->ksp);CHKERRQ(ierr);
    ierr = KSPSetOperators(tao->ksp, tron->H_sub, tron->Hpre_sub);CHKERRQ(ierr);
    while (1) {

      /* Approximately solve the reduced linear system */
      ierr = KSPSTCGSetRadius(tao->ksp,delta);CHKERRQ(ierr);

      ierr = KSPSolve(tao->ksp, tron->R, tron->DXFree);CHKERRQ(ierr);
      ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr);
      tao->ksp_its+=its;
      ierr = VecSet(tao->stepdirection,0.0);CHKERRQ(ierr);

      /* Add dxfree matrix to compute step direction vector */
      ierr = VecISAXPY(tao->stepdirection,tron->Free_Local,1.0,tron->DXFree);CHKERRQ(ierr);
      if (0) {
        PetscReal rhs,stepnorm;
        ierr = VecNorm(tron->R,NORM_2,&rhs);CHKERRQ(ierr);
        ierr = VecNorm(tron->DXFree,NORM_2,&stepnorm);CHKERRQ(ierr);
        ierr = PetscPrintf(PETSC_COMM_WORLD,"|rhs|=%g\t|s|=%g\n",(double)rhs,(double)stepnorm);CHKERRQ(ierr);
      }


      ierr = VecDot(tao->gradient, tao->stepdirection, &gdx);CHKERRQ(ierr);
      ierr = PetscInfo1(tao,"Expected decrease in function value: %14.12e\n",(double)gdx);CHKERRQ(ierr);

      ierr = VecCopy(tao->solution, tron->X_New);CHKERRQ(ierr);
      ierr = VecCopy(tao->gradient, tron->G_New);CHKERRQ(ierr);

      stepsize=1.0;f_new=f;

      ierr = TaoLineSearchSetInitialStepLength(tao->linesearch,1.0);CHKERRQ(ierr);
      ierr = TaoLineSearchApply(tao->linesearch, tron->X_New, &f_new, tron->G_New, tao->stepdirection,&stepsize,&ls_reason);CHKERRQ(ierr);CHKERRQ(ierr);
      ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr);

      ierr = MatMult(tao->hessian, tao->stepdirection, tron->Work);CHKERRQ(ierr);
      ierr = VecAYPX(tron->Work, 0.5, tao->gradient);CHKERRQ(ierr);
      ierr = VecDot(tao->stepdirection, tron->Work, &prered);CHKERRQ(ierr);
      actred = f_new - f;
      if (actred<0) {
        rhok=PetscAbs(-actred/prered);
      } else {
        rhok=0.0;
      }

      /* Compare actual improvement to the quadratic model */
      if (rhok > tron->eta1) { /* Accept the point */
        /* d = x_new - x */
        ierr = VecCopy(tron->X_New, tao->stepdirection);CHKERRQ(ierr);
        ierr = VecAXPY(tao->stepdirection, -1.0, tao->solution);CHKERRQ(ierr);

        ierr = VecNorm(tao->stepdirection, NORM_2, &xdiff);CHKERRQ(ierr);
        xdiff *= stepsize;

        /* Adjust trust region size */
        if (rhok < tron->eta2 ){
          delta = PetscMin(xdiff,delta)*tron->sigma1;
        } else if (rhok > tron->eta4 ){
          delta= PetscMin(xdiff,delta)*tron->sigma3;
        } else if (rhok > tron->eta3 ){
          delta=PetscMin(xdiff,delta)*tron->sigma2;
        }
        ierr = VecBoundGradientProjection(tron->G_New,tron->X_New, tao->XL, tao->XU, tao->gradient);CHKERRQ(ierr);
        if (tron->Free_Local) {
          ierr = ISDestroy(&tron->Free_Local);CHKERRQ(ierr);
        }
        ierr = VecWhichBetween(tao->XL, tron->X_New, tao->XU, &tron->Free_Local);CHKERRQ(ierr);
        f=f_new;
        ierr = VecNorm(tao->gradient,NORM_2,&tron->gnorm);CHKERRQ(ierr);
        ierr = VecCopy(tron->X_New, tao->solution);CHKERRQ(ierr);
        ierr = VecCopy(tron->G_New, tao->gradient);CHKERRQ(ierr);
        break;
      }
      else if (delta <= 1e-30) {
        break;
      }
      else {
        delta /= 4.0;
      }
    } /* end linear solve loop */


    tron->f=f; tron->actred=actred; tao->trust=delta;
    iter++;
    ierr = TaoMonitor(tao, iter, tron->f, tron->gnorm, 0.0, delta, &reason);CHKERRQ(ierr);
  }  /* END MAIN LOOP  */

  PetscFunctionReturn(0);
}
Beispiel #27
0
static PetscErrorCode KSPSolve_PIPEFCG_cycle(KSP ksp)
{
  PetscErrorCode ierr;
  PetscInt       i,j,k,idx,kdx,mi;
  KSP_PIPEFCG    *pipefcg;
  PetscScalar    alpha=0.0,gamma,*betas,*dots;
  PetscReal      dp=0.0, delta,*eta,*etas;
  Vec            B,R,Z,X,Qcurr,W,ZETAcurr,M,N,Pcurr,Scurr,*redux;
  Mat            Amat,Pmat;

  PetscFunctionBegin;

  /* We have not checked these routines for use with complex numbers. The inner products
     are likely not defined correctly for that case */
#if (defined(PETSC_USE_COMPLEX) && !defined(PETSC_SKIP_COMPLEX))
  SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"PIPEFGMRES has not been implemented for use with complex scalars");
#endif

#define VecXDot(x,y,a)         (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDot       (x,y,a)   : VecTDot       (x,y,a))
#define VecXDotBegin(x,y,a)    (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDotBegin  (x,y,a)   : VecTDotBegin  (x,y,a))
#define VecXDotEnd(x,y,a)      (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecDotEnd    (x,y,a)   : VecTDotEnd    (x,y,a))
#define VecMXDot(x,n,y,a)      (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecMDot      (x,n,y,a) : VecMTDot      (x,n,y,a))
#define VecMXDotBegin(x,n,y,a) (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecMDotBegin (x,n,y,a) : VecMTDotBegin (x,n,y,a))
#define VecMXDotEnd(x,n,y,a)   (((pipefcg->type) == (KSP_CG_HERMITIAN)) ? VecMDotEnd   (x,n,y,a) : VecMTDotEnd   (x,n,y,a))

  pipefcg       = (KSP_PIPEFCG*)ksp->data;
  X             = ksp->vec_sol;
  B             = ksp->vec_rhs;
  R             = ksp->work[0];
  Z             = ksp->work[1];
  W             = ksp->work[2];
  M             = ksp->work[3];
  N             = ksp->work[4];

  redux = pipefcg->redux;
  dots  = pipefcg->dots;
  etas  = pipefcg->etas;
  betas = dots;        /* dots takes the result of all dot products of which the betas are a subset */

  ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr);

  /* Compute cycle initial residual */
  ierr = KSP_MatMult(ksp,Amat,X,R);CHKERRQ(ierr);
  ierr = VecAYPX(R,-1.0,B);CHKERRQ(ierr);                   /* r <- b - Ax */
  ierr = KSP_PCApply(ksp,R,Z);CHKERRQ(ierr);                /* z <- Br     */

  Pcurr = pipefcg->Pvecs[0];
  Scurr = pipefcg->Svecs[0];
  Qcurr = pipefcg->Qvecs[0];
  ZETAcurr = pipefcg->ZETAvecs[0];
  ierr  = VecCopy(Z,Pcurr);CHKERRQ(ierr);
  ierr  = KSP_MatMult(ksp,Amat,Pcurr,Scurr);CHKERRQ(ierr);  /* S = Ap     */
  ierr  = VecCopy(Scurr,W);CHKERRQ(ierr);                   /* w = s = Az */

  /* Initial state of pipelining intermediates */
  redux[0] = R;
  redux[1] = W;
  ierr     = VecMXDotBegin(Z,2,redux,dots);CHKERRQ(ierr);
  ierr     = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Z));CHKERRQ(ierr); /* perform asynchronous reduction */
  ierr     = KSP_PCApply(ksp,W,M);CHKERRQ(ierr);            /* m = B(w) */
  ierr     = KSP_MatMult(ksp,Amat,M,N);CHKERRQ(ierr);       /* n = Am   */
  ierr     = VecCopy(M,Qcurr);CHKERRQ(ierr);                /* q = m    */
  ierr     = VecCopy(N,ZETAcurr);CHKERRQ(ierr);             /* zeta = n */
  ierr     = VecMXDotEnd(Z,2,redux,dots);CHKERRQ(ierr);
  gamma    = dots[0];
  delta    = PetscRealPart(dots[1]);
  etas[0]  = delta;
  alpha    = gamma/delta;

  i = 0;
  do {
    ksp->its++;

    /* Update X, R, Z, W */
    ierr = VecAXPY(X,+alpha,Pcurr);CHKERRQ(ierr);           /* x <- x + alpha * pi    */
    ierr = VecAXPY(R,-alpha,Scurr);CHKERRQ(ierr);           /* r <- r - alpha * si    */
    ierr = VecAXPY(Z,-alpha,Qcurr);CHKERRQ(ierr);           /* z <- z - alpha * qi    */
    ierr = VecAXPY(W,-alpha,ZETAcurr);CHKERRQ(ierr);        /* w <- w - alpha * zetai */

    /* Compute norm for convergence check */
    switch (ksp->normtype) {
      case KSP_NORM_PRECONDITIONED:
        ierr = VecNorm(Z,NORM_2,&dp);CHKERRQ(ierr);         /* dp <- sqrt(z'*z) = sqrt(e'*A'*B'*B*A*e) */
        break;
      case KSP_NORM_UNPRECONDITIONED:
        ierr = VecNorm(R,NORM_2,&dp);CHKERRQ(ierr);         /* dp <- sqrt(r'*r) = sqrt(e'*A'*A*e)      */
        break;
      case KSP_NORM_NATURAL:
        dp = PetscSqrtReal(PetscAbsScalar(gamma));          /* dp <- sqrt(r'*z) = sqrt(e'*A'*B*A*e)    */
        break;
      case KSP_NORM_NONE:
        dp = 0.0;
        break;
      default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
    }

    /* Check for convergence */
    ksp->rnorm = dp;
    KSPLogResidualHistory(ksp,dp);CHKERRQ(ierr);
    ierr = KSPMonitor(ksp,ksp->its,dp);CHKERRQ(ierr);
    ierr = (*ksp->converged)(ksp,ksp->its+1,dp,&ksp->reason,ksp->cnvP);CHKERRQ(ierr);
    if (ksp->reason) break;

    /* Computations of current iteration done */
    ++i;

    /* If needbe, allocate a new chunk of vectors in P and C */
    ierr = KSPAllocateVectors_PIPEFCG(ksp,i+1,pipefcg->vecb);CHKERRQ(ierr);

    /* Note that we wrap around and start clobbering old vectors */
    idx = i % (pipefcg->mmax+1);
    Pcurr    = pipefcg->Pvecs[idx];
    Scurr    = pipefcg->Svecs[idx];
    Qcurr    = pipefcg->Qvecs[idx];
    ZETAcurr = pipefcg->ZETAvecs[idx];
    eta      = pipefcg->etas+idx;

    /* number of old directions to orthogonalize against */
    switch(pipefcg->truncstrat){
      case KSP_FCD_TRUNC_TYPE_STANDARD:
        mi = pipefcg->mmax;
        break;
      case KSP_FCD_TRUNC_TYPE_NOTAY:
        mi = ((i-1) % pipefcg->mmax)+1;
        break;
      default:
        SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unrecognized Truncation Strategy");
    }

    /* Pick old p,s,q,zeta in a way suitable for VecMDot */
    ierr = VecCopy(Z,Pcurr);CHKERRQ(ierr);
    for(k=PetscMax(0,i-mi),j=0;k<i;++j,++k){
      kdx = k % (pipefcg->mmax+1);
      pipefcg->Pold[j]    = pipefcg->Pvecs[kdx];
      pipefcg->Sold[j]    = pipefcg->Svecs[kdx];
      pipefcg->Qold[j]    = pipefcg->Qvecs[kdx];
      pipefcg->ZETAold[j] = pipefcg->ZETAvecs[kdx];
      redux[j]            = pipefcg->Svecs[kdx];
    }
    redux[j]   = R;   /* If the above loop is not executed redux contains only R => all beta_k = 0, only gamma, delta != 0 */
    redux[j+1] = W;

    ierr = VecMXDotBegin(Z,j+2,redux,betas);CHKERRQ(ierr);  /* Start split reductions for beta_k = (z,s_k), gamma = (z,r), delta = (z,w) */
    ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Z));CHKERRQ(ierr); /* perform asynchronous reduction */
    ierr = VecWAXPY(N,-1.0,R,W);CHKERRQ(ierr);              /* m = u + B(w-r): (a) ntmp = w-r              */
    ierr = KSP_PCApply(ksp,N,M);CHKERRQ(ierr);              /* m = u + B(w-r): (b) mtmp = B(ntmp) = B(w-r) */
    ierr = VecAXPY(M,1.0,Z);CHKERRQ(ierr);                  /* m = u + B(w-r): (c) m = z + mtmp            */
    ierr = KSP_MatMult(ksp,Amat,M,N);CHKERRQ(ierr);         /* n = Am                                      */
    ierr = VecMXDotEnd(Z,j+2,redux,betas);CHKERRQ(ierr);    /* Finish split reductions */
    gamma = betas[j];
    delta = PetscRealPart(betas[j+1]);

    *eta = 0.;
    for(k=PetscMax(0,i-mi),j=0;k<i;++j,++k){
      kdx = k % (pipefcg->mmax+1);
      betas[j] /= -etas[kdx];                               /* betak  /= etak */
      *eta -= ((PetscReal)(PetscAbsScalar(betas[j])*PetscAbsScalar(betas[j]))) * etas[kdx];
                                                            /* etaitmp = -betaik^2 * etak */
    }
    *eta += delta;                                          /* etai    = delta -betaik^2 * etak */
    if(*eta < 0.) {
      pipefcg->norm_breakdown = PETSC_TRUE;
      ierr = PetscInfo1(ksp,"Restart due to square root breakdown at it = \n",ksp->its);CHKERRQ(ierr);
      break;
    } else {
      alpha= gamma/(*eta);                                  /* alpha = gamma/etai */
    }

    /* project out stored search directions using classical G-S */
    ierr = VecCopy(Z,Pcurr);CHKERRQ(ierr);
    ierr = VecCopy(W,Scurr);CHKERRQ(ierr);
    ierr = VecCopy(M,Qcurr);CHKERRQ(ierr);
    ierr = VecCopy(N,ZETAcurr);CHKERRQ(ierr);
    ierr = VecMAXPY(Pcurr   ,j,betas,pipefcg->Pold);CHKERRQ(ierr);    /* pi    <- ui - sum_k beta_k p_k    */
    ierr = VecMAXPY(Scurr   ,j,betas,pipefcg->Sold);CHKERRQ(ierr);    /* si    <- wi - sum_k beta_k s_k    */
    ierr = VecMAXPY(Qcurr   ,j,betas,pipefcg->Qold);CHKERRQ(ierr);    /* qi    <- m  - sum_k beta_k q_k    */
    ierr = VecMAXPY(ZETAcurr,j,betas,pipefcg->ZETAold);CHKERRQ(ierr); /* zetai <- n  - sum_k beta_k zeta_k */

  } while (ksp->its < ksp->max_it);
  PetscFunctionReturn(0);
}
Beispiel #28
0
static PetscErrorCode Tao_mcstep(TaoLineSearch ls,PetscReal *stx,PetscReal *fx,PetscReal *dx,PetscReal *sty,PetscReal *fy,PetscReal *dy,PetscReal *stp,PetscReal *fp,PetscReal *dp)
{
  TaoLineSearch_MT *mtP = (TaoLineSearch_MT *) ls->data;
  PetscReal        gamma1, p, q, r, s, sgnd, stpc, stpf, stpq, theta;
  PetscInt         bound;

  PetscFunctionBegin;
  /* Check the input parameters for errors */
  mtP->infoc = 0;
  if (mtP->bracket && (*stp <= PetscMin(*stx,*sty) || (*stp >= PetscMax(*stx,*sty)))) SETERRQ(PETSC_COMM_SELF,1,"bad stp in bracket");
  if (*dx * (*stp-*stx) >= 0.0) SETERRQ(PETSC_COMM_SELF,1,"dx * (stp-stx) >= 0.0");
  if (ls->stepmax < ls->stepmin) SETERRQ(PETSC_COMM_SELF,1,"stepmax > stepmin");

  /* Determine if the derivatives have opposite sign */
  sgnd = *dp * (*dx / PetscAbsReal(*dx));

  if (*fp > *fx) {
    /* Case 1: a higher function value.
     The minimum is bracketed. If the cubic step is closer
     to stx than the quadratic step, the cubic step is taken,
     else the average of the cubic and quadratic steps is taken. */

    mtP->infoc = 1;
    bound = 1;
    theta = 3 * (*fx - *fp) / (*stp - *stx) + *dx + *dp;
    s = PetscMax(PetscAbsReal(theta),PetscAbsReal(*dx));
    s = PetscMax(s,PetscAbsReal(*dp));
    gamma1 = s*PetscSqrtScalar(PetscPowScalar(theta/s,2.0) - (*dx/s)*(*dp/s));
    if (*stp < *stx) gamma1 = -gamma1;
    /* Can p be 0?  Check */
    p = (gamma1 - *dx) + theta;
    q = ((gamma1 - *dx) + gamma1) + *dp;
    r = p/q;
    stpc = *stx + r*(*stp - *stx);
    stpq = *stx + ((*dx/((*fx-*fp)/(*stp-*stx)+*dx))*0.5) * (*stp - *stx);

    if (PetscAbsReal(stpc-*stx) < PetscAbsReal(stpq-*stx)) {
      stpf = stpc;
    } else {
      stpf = stpc + 0.5*(stpq - stpc);
    }
    mtP->bracket = 1;
  } else if (sgnd < 0.0) {
    /* Case 2: A lower function value and derivatives of
     opposite sign. The minimum is bracketed. If the cubic
     step is closer to stx than the quadratic (secant) step,
     the cubic step is taken, else the quadratic step is taken. */

    mtP->infoc = 2;
    bound = 0;
    theta = 3*(*fx - *fp)/(*stp - *stx) + *dx + *dp;
    s = PetscMax(PetscAbsReal(theta),PetscAbsReal(*dx));
    s = PetscMax(s,PetscAbsReal(*dp));
    gamma1 = s*PetscSqrtScalar(PetscPowScalar(theta/s,2.0) - (*dx/s)*(*dp/s));
    if (*stp > *stx) gamma1 = -gamma1;
    p = (gamma1 - *dp) + theta;
    q = ((gamma1 - *dp) + gamma1) + *dx;
    r = p/q;
    stpc = *stp + r*(*stx - *stp);
    stpq = *stp + (*dp/(*dp-*dx))*(*stx - *stp);

    if (PetscAbsReal(stpc-*stp) > PetscAbsReal(stpq-*stp)) {
      stpf = stpc;
    } else {
      stpf = stpq;
    }
    mtP->bracket = 1;
  } else if (PetscAbsReal(*dp) < PetscAbsReal(*dx)) {
    /* Case 3: A lower function value, derivatives of the
     same sign, and the magnitude of the derivative decreases.
     The cubic step is only used if the cubic tends to infinity
     in the direction of the step or if the minimum of the cubic
     is beyond stp. Otherwise the cubic step is defined to be
     either stepmin or stepmax. The quadratic (secant) step is also
     computed and if the minimum is bracketed then the step
     closest to stx is taken, else the step farthest away is taken. */

    mtP->infoc = 3;
    bound = 1;
    theta = 3*(*fx - *fp)/(*stp - *stx) + *dx + *dp;
    s = PetscMax(PetscAbsReal(theta),PetscAbsReal(*dx));
    s = PetscMax(s,PetscAbsReal(*dp));

    /* The case gamma1 = 0 only arises if the cubic does not tend
       to infinity in the direction of the step. */
    gamma1 = s*PetscSqrtScalar(PetscMax(0.0,PetscPowScalar(theta/s,2.0) - (*dx/s)*(*dp/s)));
    if (*stp > *stx) gamma1 = -gamma1;
    p = (gamma1 - *dp) + theta;
    q = (gamma1 + (*dx - *dp)) + gamma1;
    r = p/q;
    if (r < 0.0 && gamma1 != 0.0) stpc = *stp + r*(*stx - *stp);
    else if (*stp > *stx)        stpc = ls->stepmax;
    else                         stpc = ls->stepmin;
    stpq = *stp + (*dp/(*dp-*dx)) * (*stx - *stp);

    if (mtP->bracket) {
      if (PetscAbsReal(*stp-stpc) < PetscAbsReal(*stp-stpq)) {
        stpf = stpc;
      } else {
        stpf = stpq;
      }
    } else {
      if (PetscAbsReal(*stp-stpc) > PetscAbsReal(*stp-stpq)) {
        stpf = stpc;
      } else {
        stpf = stpq;
      }
    }
  } else {
    /* Case 4: A lower function value, derivatives of the
       same sign, and the magnitude of the derivative does
       not decrease. If the minimum is not bracketed, the step
       is either stpmin or stpmax, else the cubic step is taken. */

    mtP->infoc = 4;
    bound = 0;
    if (mtP->bracket) {
      theta = 3*(*fp - *fy)/(*sty - *stp) + *dy + *dp;
      s = PetscMax(PetscAbsReal(theta),PetscAbsReal(*dy));
      s = PetscMax(s,PetscAbsReal(*dp));
      gamma1 = s*PetscSqrtScalar(PetscPowScalar(theta/s,2.0) - (*dy/s)*(*dp/s));
      if (*stp > *sty) gamma1 = -gamma1;
      p = (gamma1 - *dp) + theta;
      q = ((gamma1 - *dp) + gamma1) + *dy;
      r = p/q;
      stpc = *stp + r*(*sty - *stp);
      stpf = stpc;
    } else if (*stp > *stx) {
      stpf = ls->stepmax;
    } else {
      stpf = ls->stepmin;
    }
  }

  /* Update the interval of uncertainty.  This update does not
     depend on the new step or the case analysis above. */

  if (*fp > *fx) {
    *sty = *stp;
    *fy = *fp;
    *dy = *dp;
  } else {
    if (sgnd < 0.0) {
      *sty = *stx;
      *fy = *fx;
      *dy = *dx;
    }
    *stx = *stp;
    *fx = *fp;
    *dx = *dp;
  }

  /* Compute the new step and safeguard it. */
  stpf = PetscMin(ls->stepmax,stpf);
  stpf = PetscMax(ls->stepmin,stpf);
  *stp = stpf;
  if (mtP->bracket && bound) {
    if (*sty > *stx) {
      *stp = PetscMin(*stx+0.66*(*sty-*stx),*stp);
    } else {
      *stp = PetscMax(*stx+0.66*(*sty-*stx),*stp);
    }
  }
  PetscFunctionReturn(0);
}
Beispiel #29
0
int main(int argc,char **args)
{
  User           user;
  Mat            A,S;
  PetscScalar    *data,diag = 1.3;
  PetscReal      tol = PETSC_SMALL;
  PetscInt       i,j,m = PETSC_DECIDE,n = PETSC_DECIDE,M = 17,N = 15,s1,s2;
  PetscInt       test, ntest = 2;
  PetscMPIInt    rank,size;
  PetscBool      nc = PETSC_FALSE, cong;
  PetscBool      ronl = PETSC_TRUE;
  PetscBool      randomize = PETSC_FALSE;
  PetscBool      keep = PETSC_FALSE;
  PetscBool      testzerorows = PETSC_TRUE, testdiagscale = PETSC_TRUE, testgetdiag = PETSC_TRUE;
  PetscBool      testshift = PETSC_TRUE, testscale = PETSC_TRUE, testdup = PETSC_TRUE, testreset = PETSC_TRUE;
  PetscErrorCode ierr;

  ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
  ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
  ierr = PetscOptionsGetInt(NULL,NULL,"-M",&M,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetInt(NULL,NULL,"-N",&N,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetInt(NULL,NULL,"-ml",&m,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetInt(NULL,NULL,"-nl",&n,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL,NULL,"-square_nc",&nc,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL,NULL,"-rows_only",&ronl,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL,NULL,"-randomize",&randomize,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL,NULL,"-test_zerorows",&testzerorows,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL,NULL,"-test_diagscale",&testdiagscale,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL,NULL,"-test_getdiag",&testgetdiag,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL,NULL,"-test_shift",&testshift,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL,NULL,"-test_scale",&testscale,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL,NULL,"-test_dup",&testdup,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL,NULL,"-test_reset",&testreset,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetInt(NULL,NULL,"-loop",&ntest,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetReal(NULL,NULL,"-tol",&tol,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetScalar(NULL,NULL,"-diag",&diag,NULL);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL,NULL,"-keep",&keep,NULL);CHKERRQ(ierr);
  /* This tests square matrices with different row/col layout */
  if (nc && size > 1) {
    M = PetscMax(PetscMax(N,M),1);
    N = M;
    m = n = 0;
    if (rank == 0) { m = M-1; n = 1; }
    else if (rank == 1) { m = 1; n = N-1; }
  }
  ierr = MatCreateDense(PETSC_COMM_WORLD,m,n,M,N,NULL,&A);CHKERRQ(ierr);
  ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
  ierr = MatGetSize(A,&M,&N);CHKERRQ(ierr);
  ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
  ierr = MatGetOwnershipRange(A,&s1,NULL);CHKERRQ(ierr);
  s2   = 1;
  while (s2 < M) s2 *= 10;
  ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr);
  for (j = 0; j < N; j++) {
    for (i = 0; i < m; i++) {
      data[j*m + i] = s2*j + i + s1 + 1;
    }
  }
  ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
  ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);

  ierr = MatConvert(A,MATAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr);
  ierr = MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,keep);CHKERRQ(ierr);
  ierr = PetscObjectSetName((PetscObject)A,"initial");CHKERRQ(ierr);
  ierr = MatViewFromOptions(A,NULL,"-view_mat");CHKERRQ(ierr);

  ierr = PetscNew(&user);CHKERRQ(ierr);
  ierr = MatCreateShell(PETSC_COMM_WORLD,m,n,M,N,user,&S);CHKERRQ(ierr);
  ierr = MatShellSetOperation(S,MATOP_MULT,(void (*)(void))MatMult_User);CHKERRQ(ierr);
  ierr = MatShellSetOperation(S,MATOP_MULT_TRANSPOSE,(void (*)(void))MatMultTranspose_User);CHKERRQ(ierr);
  if (cong) {
    ierr = MatShellSetOperation(S,MATOP_GET_DIAGONAL,(void (*)(void))MatGetDiagonal_User);CHKERRQ(ierr);
  }
  ierr = MatDuplicate(A,MAT_COPY_VALUES,&user->B);CHKERRQ(ierr);

  /* Square and rows only scaling */
  ronl = cong ? ronl : PETSC_TRUE;

  for (test = 0; test < ntest; test++) {
    PetscReal err;

    if (testzerorows) {
      Mat       ST,B,C,BT,BTT;
      IS        zr;
      Vec       x = NULL, b1 = NULL, b2 = NULL;
      PetscInt  *idxs = NULL, nr = 0;

      if (rank == (test%size)) {
        nr = 1;
        ierr = PetscMalloc1(nr,&idxs);CHKERRQ(ierr);
        if (test%2) {
          idxs[0] = (2*M - 1 - test/2)%M;
        } else {
          idxs[0] = (test/2)%M;
        }
        idxs[0] = PetscMax(idxs[0],0);
      }
      ierr = ISCreateGeneral(PETSC_COMM_WORLD,nr,idxs,PETSC_OWN_POINTER,&zr);CHKERRQ(ierr);
      ierr = PetscObjectSetName((PetscObject)zr,"ZR");CHKERRQ(ierr);
      ierr = ISViewFromOptions(zr,NULL,"-view_is");CHKERRQ(ierr);
      ierr = MatCreateVecs(A,&x,&b1);CHKERRQ(ierr);
      if (randomize) {
        ierr = VecSetRandom(x,NULL);CHKERRQ(ierr);
        ierr = VecSetRandom(b1,NULL);CHKERRQ(ierr);
      } else {
        ierr = VecSet(x,11.4);CHKERRQ(ierr);
        ierr = VecSet(b1,-14.2);CHKERRQ(ierr);
      }
      ierr = VecDuplicate(b1,&b2);CHKERRQ(ierr);
      ierr = VecCopy(b1,b2);CHKERRQ(ierr);
      ierr = PetscObjectSetName((PetscObject)b1,"A_B1");CHKERRQ(ierr);
      ierr = PetscObjectSetName((PetscObject)b2,"A_B2");CHKERRQ(ierr);
      if (size > 1 && !cong) { /* MATMPIAIJ ZeroRows and ZeroRowsColumns are buggy in this case */
        ierr = VecDestroy(&b1);CHKERRQ(ierr);
      }
      if (ronl) {
        ierr = MatZeroRowsIS(A,zr,diag,x,b1);CHKERRQ(ierr);
        ierr = MatZeroRowsIS(S,zr,diag,x,b2);CHKERRQ(ierr);
      } else {
        ierr = MatZeroRowsColumnsIS(A,zr,diag,x,b1);CHKERRQ(ierr);
        ierr = MatZeroRowsColumnsIS(S,zr,diag,x,b2);CHKERRQ(ierr);
        ierr = ISDestroy(&zr);CHKERRQ(ierr);
        /* Mix zerorows and zerorowscols */
        nr   = 0;
        idxs = NULL;
        if (!rank) {
          nr   = 1;
          ierr = PetscMalloc1(nr,&idxs);CHKERRQ(ierr);
          if (test%2) {
            idxs[0] = (3*M - 2 - test/2)%M;
          } else {
            idxs[0] = (test/2+1)%M;
          }
          idxs[0] = PetscMax(idxs[0],0);
        }
        ierr = ISCreateGeneral(PETSC_COMM_WORLD,nr,idxs,PETSC_OWN_POINTER,&zr);CHKERRQ(ierr);
        ierr = PetscObjectSetName((PetscObject)zr,"ZR2");CHKERRQ(ierr);
        ierr = ISViewFromOptions(zr,NULL,"-view_is");CHKERRQ(ierr);
        ierr = MatZeroRowsIS(A,zr,diag*2.0+PETSC_SMALL,NULL,NULL);CHKERRQ(ierr);
        ierr = MatZeroRowsIS(S,zr,diag*2.0+PETSC_SMALL,NULL,NULL);CHKERRQ(ierr);
      }
      ierr = ISDestroy(&zr);CHKERRQ(ierr);

      if (b1) {
        Vec b;

        ierr = VecViewFromOptions(b1,NULL,"-view_b");CHKERRQ(ierr);
        ierr = VecViewFromOptions(b2,NULL,"-view_b");CHKERRQ(ierr);
        ierr = VecDuplicate(b1,&b);CHKERRQ(ierr);
        ierr = VecCopy(b1,b);CHKERRQ(ierr);
        ierr = VecAXPY(b,-1.0,b2);CHKERRQ(ierr);
        ierr = VecNorm(b,NORM_INFINITY,&err);CHKERRQ(ierr);
        if (err >= tol) {
          ierr = PetscPrintf(PETSC_COMM_WORLD,"[test %D] Error b %g\n",test,(double)err);CHKERRQ(ierr);
        }
        ierr = VecDestroy(&b);CHKERRQ(ierr);
      }
      ierr = VecDestroy(&b1);CHKERRQ(ierr);
      ierr = VecDestroy(&b2);CHKERRQ(ierr);
      ierr = VecDestroy(&x);CHKERRQ(ierr);
      ierr = MatConvert(S,MATDENSE,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);

      ierr = MatCreateTranspose(S,&ST);CHKERRQ(ierr);
      ierr = MatComputeOperator(ST,MATDENSE,&BT);CHKERRQ(ierr);
      ierr = MatTranspose(BT,MAT_INITIAL_MATRIX,&BTT);CHKERRQ(ierr);
      ierr = PetscObjectSetName((PetscObject)B,"S");CHKERRQ(ierr);
      ierr = PetscObjectSetName((PetscObject)BTT,"STT");CHKERRQ(ierr);
      ierr = MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&C);CHKERRQ(ierr);
      ierr = PetscObjectSetName((PetscObject)C,"A");CHKERRQ(ierr);

      ierr = MatViewFromOptions(C,NULL,"-view_mat");CHKERRQ(ierr);
      ierr = MatViewFromOptions(B,NULL,"-view_mat");CHKERRQ(ierr);
      ierr = MatViewFromOptions(BTT,NULL,"-view_mat");CHKERRQ(ierr);

      ierr = MatAXPY(C,-1.0,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
      ierr = MatNorm(C,NORM_FROBENIUS,&err);CHKERRQ(ierr);
      if (err >= tol) {
        ierr = PetscPrintf(PETSC_COMM_WORLD,"[test %D] Error mat mult %g\n",test,(double)err);CHKERRQ(ierr);
      }

      ierr = MatConvert(A,MATDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
      ierr = MatAXPY(C,-1.0,BTT,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
      ierr = MatNorm(C,NORM_FROBENIUS,&err);CHKERRQ(ierr);
      if (err >= tol) {
        ierr = PetscPrintf(PETSC_COMM_WORLD,"[test %D] Error mat mult transpose %g\n",test,(double)err);CHKERRQ(ierr);
      }

      ierr = MatDestroy(&ST);CHKERRQ(ierr);
      ierr = MatDestroy(&BTT);CHKERRQ(ierr);
      ierr = MatDestroy(&BT);CHKERRQ(ierr);
      ierr = MatDestroy(&B);CHKERRQ(ierr);
      ierr = MatDestroy(&C);CHKERRQ(ierr);
    }
    if (testdiagscale) { /* MatDiagonalScale() */
      Vec vr,vl;

      ierr = MatCreateVecs(A,&vr,&vl);CHKERRQ(ierr);
      if (randomize) {
        ierr = VecSetRandom(vr,NULL);CHKERRQ(ierr);
        ierr = VecSetRandom(vl,NULL);CHKERRQ(ierr);
      } else {
        ierr = VecSet(vr,test%2 ? 0.15 : 1.0/0.15);CHKERRQ(ierr);
        ierr = VecSet(vl,test%2 ? -1.2 : 1.0/-1.2);CHKERRQ(ierr);
      }
      ierr = MatDiagonalScale(A,vl,vr);CHKERRQ(ierr);
      ierr = MatDiagonalScale(S,vl,vr);CHKERRQ(ierr);
      ierr = VecDestroy(&vr);CHKERRQ(ierr);
      ierr = VecDestroy(&vl);CHKERRQ(ierr);
    }

    if (testscale) { /* MatScale() */
      ierr = MatScale(A,test%2 ? 1.4 : 1.0/1.4);CHKERRQ(ierr);
      ierr = MatScale(S,test%2 ? 1.4 : 1.0/1.4);CHKERRQ(ierr);
    }

    if (testshift && cong) { /* MatShift() : MATSHELL shift is broken when row/cols layout are not congruent and left/right scaling have been applied */
      ierr = MatShift(A,test%2 ? -77.5 : 77.5);CHKERRQ(ierr);
      ierr = MatShift(S,test%2 ? -77.5 : 77.5);CHKERRQ(ierr);
    }

    if (testgetdiag && cong) { /* MatGetDiagonal() */
      Vec dA,dS;

      ierr = MatCreateVecs(A,&dA,NULL);CHKERRQ(ierr);
      ierr = MatCreateVecs(S,&dS,NULL);CHKERRQ(ierr);
      ierr = MatGetDiagonal(A,dA);CHKERRQ(ierr);
      ierr = MatGetDiagonal(S,dS);CHKERRQ(ierr);
      ierr = VecAXPY(dA,-1.0,dS);CHKERRQ(ierr);
      ierr = VecNorm(dA,NORM_INFINITY,&err);CHKERRQ(ierr);
      if (err >= tol) {
        ierr = PetscPrintf(PETSC_COMM_WORLD,"[test %D] Error diag %g\n",test,(double)err);CHKERRQ(ierr);
      }
      ierr = VecDestroy(&dA);CHKERRQ(ierr);
      ierr = VecDestroy(&dS);CHKERRQ(ierr);
    }

    if (testdup && !test) {
      Mat A2, S2;

      ierr = MatDuplicate(A,MAT_COPY_VALUES,&A2);CHKERRQ(ierr);
      ierr = MatDuplicate(S,MAT_COPY_VALUES,&S2);CHKERRQ(ierr);
      ierr = MatDestroy(&A);CHKERRQ(ierr);
      ierr = MatDestroy(&S);CHKERRQ(ierr);
      A = A2;
      S = S2;
    }

    if (testreset && (ntest == 1 || test == ntest-2)) {
      /* reset MATSHELL */
      ierr = MatAssemblyBegin(S,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
      ierr = MatAssemblyEnd(S,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
      /* reset A */
      ierr = MatCopy(user->B,A,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr);
    }
  }

  ierr = MatDestroy(&A);CHKERRQ(ierr);
  ierr = MatDestroy(&user->B);CHKERRQ(ierr);
  ierr = MatDestroy(&S);CHKERRQ(ierr);
  ierr = PetscFree(user);CHKERRQ(ierr);
  ierr = PetscFinalize();
  return ierr;
}
Beispiel #30
0
int main(int argc,char **argv)
{
  TS             ts;                 /* timestepping context */
  Vec            x,r;               /* solution, residual vectors */
  Mat            J;                  /* Jacobian matrix */
  AppCtx         user;               /* user-defined work context */
  PetscInt       its,N;                /* iterations for convergence */
  PetscErrorCode ierr; 
  PetscReal      param_max = 6.81,param_min = 0.,dt;
  PetscReal      ftime;
  PetscMPIInt    size;

  PetscInitialize(&argc,&argv,PETSC_NULL,help);
  ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);
  if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only");

  user.mx        = 4;
  user.my        = 4;
  user.param     = 6.0;
  
  /*
     Allow user to set the grid dimensions and nonlinearity parameter at run-time
  */
  PetscOptionsGetInt(PETSC_NULL,"-mx",&user.mx,PETSC_NULL);
  PetscOptionsGetInt(PETSC_NULL,"-my",&user.my,PETSC_NULL);
  PetscOptionsGetReal(PETSC_NULL,"-param",&user.param,PETSC_NULL);
  if (user.param >= param_max || user.param <= param_min) SETERRQ(PETSC_COMM_SELF,1,"Parameter is out of range");
  dt = .5/PetscMax(user.mx,user.my);
  ierr = PetscOptionsGetReal(PETSC_NULL,"-dt",&dt,PETSC_NULL);CHKERRQ(ierr);
  N          = user.mx*user.my;
  
  /* 
      Create vectors to hold the solution and function value
  */
  ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x);CHKERRQ(ierr);
  ierr = VecDuplicate(x,&r);CHKERRQ(ierr);

  /*
    Create matrix to hold Jacobian. Preallocate 5 nonzeros per row
    in the sparse matrix. Note that this is not the optimal strategy; see
    the Performance chapter of the users manual for information on 
    preallocating memory in sparse matrices.
  */
  ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,N,N,5,0,&J);CHKERRQ(ierr);

  /* 
     Create timestepper context 
  */
  ierr = TSCreate(PETSC_COMM_WORLD,&ts);CHKERRQ(ierr);
  ierr = TSSetProblemType(ts,TS_NONLINEAR);CHKERRQ(ierr);

  /*
     Tell the timestepper context where to compute solutions
  */
  ierr = TSSetSolution(ts,x);CHKERRQ(ierr);

  /*
     Provide the call-back for the nonlinear function we are 
     evaluating. Thus whenever the timestepping routines need the
     function they will call this routine. Note the final argument
     is the application context used by the call-back functions.
  */
  ierr = TSSetRHSFunction(ts,PETSC_NULL,FormFunction,&user);CHKERRQ(ierr);

  /*
     Set the Jacobian matrix and the function used to compute 
     Jacobians.
  */
  ierr = TSSetRHSJacobian(ts,J,J,FormJacobian,&user);CHKERRQ(ierr);

  /*
       For the initial guess for the problem
  */
  ierr = FormInitialGuess(x,&user);

  /*
       This indicates that we are using pseudo timestepping to 
     find a steady state solution to the nonlinear problem.
  */
  ierr = TSSetType(ts,TSPSEUDO);CHKERRQ(ierr);

  /*
       Set the initial time to start at (this is arbitrary for 
     steady state problems; and the initial timestep given above
  */
  ierr = TSSetInitialTimeStep(ts,0.0,dt);CHKERRQ(ierr);

  /*
      Set a large number of timesteps and final duration time
     to insure convergence to steady state.
  */
  ierr = TSSetDuration(ts,1000,1.e12);

  /*
      Use the default strategy for increasing the timestep
  */
  ierr = TSPseudoSetTimeStep(ts,TSPseudoDefaultTimeStep,0);CHKERRQ(ierr);

  /*
      Set any additional options from the options database. This
     includes all options for the nonlinear and linear solvers used
     internally the the timestepping routines.
  */
  ierr = TSSetFromOptions(ts);CHKERRQ(ierr);

  ierr = TSSetUp(ts);CHKERRQ(ierr);

  /*
      Perform the solve. This is where the timestepping takes place.
  */
  ierr = TSSolve(ts,x,&ftime);CHKERRQ(ierr);

  /*
      Get the number of steps
  */
  ierr = TSGetTimeStepNumber(ts,&its);CHKERRQ(ierr);

  printf("Number of pseudo timesteps = %d final time %4.2e\n",(int)its,ftime);

  /* 
     Free the data structures constructed above
  */
  ierr = VecDestroy(&x);CHKERRQ(ierr);
  ierr = VecDestroy(&r);CHKERRQ(ierr);
  ierr = MatDestroy(&J);CHKERRQ(ierr);
  ierr = TSDestroy(&ts);CHKERRQ(ierr);
  ierr = PetscFinalize();

  return 0;
}