Example #1
0
File: bntr.c Project: petsc/petsc
PetscErrorCode TaoSolve_BNTR(Tao tao)
{
  PetscErrorCode               ierr;
  TAO_BNK                      *bnk = (TAO_BNK *)tao->data;
  KSPConvergedReason           ksp_reason;

  PetscReal                    oldTrust, prered, actred, steplen, resnorm;
  PetscBool                    cgTerminate, needH = PETSC_TRUE, stepAccepted, shift = PETSC_FALSE;
  PetscInt                     stepType, nDiff;
  
  PetscFunctionBegin;
  /* Initialize the preconditioner, KSP solver and trust radius/line search */
  tao->reason = TAO_CONTINUE_ITERATING;
  ierr = TaoBNKInitialize(tao, bnk->init_type, &needH);CHKERRQ(ierr);
  if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0);

  /* Have not converged; continue with Newton method */
  while (tao->reason == TAO_CONTINUE_ITERATING) {
    /* Call general purpose update function */
    if (tao->ops->update) {
      ierr = (*tao->ops->update)(tao, tao->niter, tao->user_update);CHKERRQ(ierr);
    }
    ++tao->niter;
    
    if (needH && bnk->inactive_idx) { 
      /* Take BNCG steps (if enabled) to trade-off Hessian evaluations for more gradient evaluations */
      ierr = TaoBNKTakeCGSteps(tao, &cgTerminate);CHKERRQ(ierr);
      if (cgTerminate) {
        tao->reason = bnk->bncg->reason;
        PetscFunctionReturn(0);
      }
      /* Compute the hessian and update the BFGS preconditioner at the new iterate */
      ierr = (*bnk->computehessian)(tao);CHKERRQ(ierr);
      needH = PETSC_FALSE;
    }
    
    /* Store current solution before it changes */
    bnk->fold = bnk->f;
    ierr = VecCopy(tao->solution, bnk->Xold);CHKERRQ(ierr);
    ierr = VecCopy(tao->gradient, bnk->Gold);CHKERRQ(ierr);
    ierr = VecCopy(bnk->unprojected_gradient, bnk->unprojected_gradient_old);CHKERRQ(ierr);
    
    /* Enter into trust region loops */
    stepAccepted = PETSC_FALSE;
    while (!stepAccepted && tao->reason == TAO_CONTINUE_ITERATING) {
      tao->ksp_its=0;
      
      /* Use the common BNK kernel to compute the Newton step (for inactive variables only) */
      ierr = (*bnk->computestep)(tao, shift, &ksp_reason, &stepType);CHKERRQ(ierr);

      /* Temporarily accept the step and project it into the bounds */
      ierr = VecAXPY(tao->solution, 1.0, tao->stepdirection);CHKERRQ(ierr);
      ierr = TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution);CHKERRQ(ierr);

      /* Check if the projection changed the step direction */
      if (nDiff > 0) {
        /* Projection changed the step, so we have to recompute the step and 
           the predicted reduction. Leave the trust radius unchanged. */
        ierr = VecCopy(tao->solution, tao->stepdirection);CHKERRQ(ierr);
        ierr = VecAXPY(tao->stepdirection, -1.0, bnk->Xold);CHKERRQ(ierr);
        ierr = TaoBNKRecomputePred(tao, tao->stepdirection, &prered);CHKERRQ(ierr);
      } else {
        /* Step did not change, so we can just recover the pre-computed prediction */
        ierr = KSPCGGetObjFcn(tao->ksp, &prered);CHKERRQ(ierr);
      }
      prered = -prered;

      /* Compute the actual reduction and update the trust radius */
      ierr = TaoComputeObjective(tao, tao->solution, &bnk->f);CHKERRQ(ierr);
      if (PetscIsInfOrNanReal(bnk->f)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
      actred = bnk->fold - bnk->f;
      oldTrust = tao->trust;
      ierr = TaoBNKUpdateTrustRadius(tao, prered, actred, bnk->update_type, stepType, &stepAccepted);CHKERRQ(ierr);

      if (stepAccepted) {
        /* Step is good, evaluate the gradient and flip the need-Hessian switch */
        steplen = 1.0;
        needH = PETSC_TRUE;
        ++bnk->newt;
        ierr = TaoComputeGradient(tao, tao->solution, bnk->unprojected_gradient);CHKERRQ(ierr);
        ierr = TaoBNKEstimateActiveSet(tao, bnk->as_type);CHKERRQ(ierr);
        ierr = VecCopy(bnk->unprojected_gradient, tao->gradient);CHKERRQ(ierr);
        ierr = VecISSet(tao->gradient, bnk->active_idx, 0.0);CHKERRQ(ierr);
        ierr = TaoGradientNorm(tao, tao->gradient, NORM_2, &bnk->gnorm);CHKERRQ(ierr);
      } else {
        /* Step is bad, revert old solution and re-solve with new radius*/
        steplen = 0.0;
        needH = PETSC_FALSE;
        bnk->f = bnk->fold;
        ierr = VecCopy(bnk->Xold, tao->solution);CHKERRQ(ierr);
        ierr = VecCopy(bnk->Gold, tao->gradient);CHKERRQ(ierr);
        ierr = VecCopy(bnk->unprojected_gradient_old, bnk->unprojected_gradient);CHKERRQ(ierr);
        if (oldTrust == tao->trust) {
          /* Can't change the radius anymore so just terminate */
          tao->reason = TAO_DIVERGED_TR_REDUCTION;
        }
      }

      /*  Check for termination */
      ierr = VecFischer(tao->solution, bnk->unprojected_gradient, tao->XL, tao->XU, bnk->W);CHKERRQ(ierr);
      ierr = VecNorm(bnk->W, NORM_2, &resnorm);CHKERRQ(ierr);
      if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN");
      ierr = TaoLogConvergenceHistory(tao, bnk->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr);
      ierr = TaoMonitor(tao, tao->niter, bnk->f, resnorm, 0.0, steplen);CHKERRQ(ierr);
      ierr = (*tao->ops->convergencetest)(tao, tao->cnvP);CHKERRQ(ierr);
    }
  }
  PetscFunctionReturn(0);
}
Example #2
0
static PetscErrorCode TaoSolve_BMRM(Tao tao)
{
  PetscErrorCode     ierr;
  TAO_DF             df;
  TAO_BMRM           *bmrm = (TAO_BMRM*)tao->data;

  /* Values and pointers to parts of the optimization problem */
  PetscReal          f = 0.0;
  Vec                W = tao->solution;
  Vec                G = tao->gradient;
  PetscReal          lambda;
  PetscReal          bt;
  Vec_Chain          grad_list, *tail_glist, *pgrad;
  PetscInt           i;
  PetscMPIInt        rank;

  /* Used in converged criteria check */
  PetscReal          reg;
  PetscReal          jtwt = 0.0, max_jtwt, pre_epsilon, epsilon, jw, min_jw;
  PetscReal          innerSolverTol;
  MPI_Comm           comm;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr);
  lambda = bmrm->lambda;

  /* Check Stopping Condition */
  tao->step = 1.0;
  max_jtwt = -BMRM_INFTY;
  min_jw = BMRM_INFTY;
  innerSolverTol = 1.0;
  epsilon = 0.0;

  if (!rank) {
    ierr = init_df_solver(&df);CHKERRQ(ierr);
    grad_list.next = NULL;
    tail_glist = &grad_list;
  }

  df.tol = 1e-6;
  tao->reason = TAO_CONTINUE_ITERATING;

  /*-----------------Algorithm Begins------------------------*/
  /* make the scatter */
  ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr);
  ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr);
  ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr);

  /* NOTE: In application pass the sub-gradient of Remp(W) */
  ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);
  ierr = TaoLogConvergenceHistory(tao,f,1.0,0.0,tao->ksp_its);CHKERRQ(ierr);
  ierr = TaoMonitor(tao,tao->niter,f,1.0,0.0,tao->step);CHKERRQ(ierr);
  ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
  
  while (tao->reason == TAO_CONTINUE_ITERATING) {
    /* Call general purpose update function */
    if (tao->ops->update) {
      ierr = (*tao->ops->update)(tao, tao->niter);CHKERRQ(ierr);
    }
    
    /* compute bt = Remp(Wt-1) - <Wt-1, At> */
    ierr = VecDot(W, G, &bt);CHKERRQ(ierr);
    bt = f - bt;

    /* First gather the gradient to the master node */
    ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);
    ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr);

    /* Bring up the inner solver */
    if (!rank) {
      ierr = ensure_df_space(tao->niter+1, &df);CHKERRQ(ierr);
      ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr);
      tail_glist->next = pgrad;
      tail_glist = pgrad;

      df.a[tao->niter] = 1.0;
      df.f[tao->niter] = -bt;
      df.u[tao->niter] = 1.0;
      df.l[tao->niter] = 0.0;

      /* set up the Q */
      pgrad = grad_list.next;
      for (i=0; i<=tao->niter; i++) {
        if (!pgrad) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Assert that there are at least tao->niter+1 pgrad available");
        ierr = VecDot(pgrad->V, bmrm->local_w, &reg);CHKERRQ(ierr);
        df.Q[i][tao->niter] = df.Q[tao->niter][i] = reg / lambda;
        pgrad = pgrad->next;
      }

      if (tao->niter > 0) {
        df.x[tao->niter] = 0.0;
        ierr = solve(&df);CHKERRQ(ierr);
      } else
        df.x[0] = 1.0;

      /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */
      jtwt = 0.0;
      ierr = VecSet(bmrm->local_w, 0.0);CHKERRQ(ierr);
      pgrad = grad_list.next;
      for (i=0; i<=tao->niter; i++) {
        jtwt -= df.x[i] * df.f[i];
        ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V);CHKERRQ(ierr);
        pgrad = pgrad->next;
      }

      ierr = VecNorm(bmrm->local_w, NORM_2, &reg);CHKERRQ(ierr);
      reg = 0.5*lambda*reg*reg;
      jtwt -= reg;
    } /* end if rank == 0 */

    /* scatter the new W to all nodes */
    ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
    ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);

    ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr);

    ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRQ(ierr);
    ierr = MPI_Bcast(&reg,1,MPIU_REAL,0,comm);CHKERRQ(ierr);

    jw = reg + f;                                       /* J(w) = regularizer + Remp(w) */
    if (jw < min_jw) min_jw = jw;
    if (jtwt > max_jtwt) max_jtwt = jtwt;

    pre_epsilon = epsilon;
    epsilon = min_jw - jtwt;

    if (!rank) {
      if (innerSolverTol > epsilon) innerSolverTol = epsilon;
      else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7;

      /* if the annealing doesn't work well, lower the inner solver tolerance */
      if(pre_epsilon < epsilon) innerSolverTol *= 0.2;

      df.tol = innerSolverTol*0.5;
    }

    tao->niter++;
    ierr = TaoLogConvergenceHistory(tao,min_jw,epsilon,0.0,tao->ksp_its);CHKERRQ(ierr);
    ierr = TaoMonitor(tao,tao->niter,min_jw,epsilon,0.0,tao->step);CHKERRQ(ierr);
    ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr);
  }

  /* free all the memory */
  if (!rank) {
    ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr);
    ierr = destroy_df_solver(&df);CHKERRQ(ierr);
  }

  ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr);
  ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr);
  PetscFunctionReturn(0);
}