PetscErrorCode TaoSolve_BNTR(Tao tao) { PetscErrorCode ierr; TAO_BNK *bnk = (TAO_BNK *)tao->data; KSPConvergedReason ksp_reason; PetscReal oldTrust, prered, actred, steplen, resnorm; PetscBool cgTerminate, needH = PETSC_TRUE, stepAccepted, shift = PETSC_FALSE; PetscInt stepType, nDiff; PetscFunctionBegin; /* Initialize the preconditioner, KSP solver and trust radius/line search */ tao->reason = TAO_CONTINUE_ITERATING; ierr = TaoBNKInitialize(tao, bnk->init_type, &needH);CHKERRQ(ierr); if (tao->reason != TAO_CONTINUE_ITERATING) PetscFunctionReturn(0); /* Have not converged; continue with Newton method */ while (tao->reason == TAO_CONTINUE_ITERATING) { /* Call general purpose update function */ if (tao->ops->update) { ierr = (*tao->ops->update)(tao, tao->niter, tao->user_update);CHKERRQ(ierr); } ++tao->niter; if (needH && bnk->inactive_idx) { /* Take BNCG steps (if enabled) to trade-off Hessian evaluations for more gradient evaluations */ ierr = TaoBNKTakeCGSteps(tao, &cgTerminate);CHKERRQ(ierr); if (cgTerminate) { tao->reason = bnk->bncg->reason; PetscFunctionReturn(0); } /* Compute the hessian and update the BFGS preconditioner at the new iterate */ ierr = (*bnk->computehessian)(tao);CHKERRQ(ierr); needH = PETSC_FALSE; } /* Store current solution before it changes */ bnk->fold = bnk->f; ierr = VecCopy(tao->solution, bnk->Xold);CHKERRQ(ierr); ierr = VecCopy(tao->gradient, bnk->Gold);CHKERRQ(ierr); ierr = VecCopy(bnk->unprojected_gradient, bnk->unprojected_gradient_old);CHKERRQ(ierr); /* Enter into trust region loops */ stepAccepted = PETSC_FALSE; while (!stepAccepted && tao->reason == TAO_CONTINUE_ITERATING) { tao->ksp_its=0; /* Use the common BNK kernel to compute the Newton step (for inactive variables only) */ ierr = (*bnk->computestep)(tao, shift, &ksp_reason, &stepType);CHKERRQ(ierr); /* Temporarily accept the step and project it into the bounds */ ierr = VecAXPY(tao->solution, 1.0, tao->stepdirection);CHKERRQ(ierr); ierr = TaoBoundSolution(tao->solution, tao->XL,tao->XU, 0.0, &nDiff, tao->solution);CHKERRQ(ierr); /* Check if the projection changed the step direction */ if (nDiff > 0) { /* Projection changed the step, so we have to recompute the step and the predicted reduction. Leave the trust radius unchanged. */ ierr = VecCopy(tao->solution, tao->stepdirection);CHKERRQ(ierr); ierr = VecAXPY(tao->stepdirection, -1.0, bnk->Xold);CHKERRQ(ierr); ierr = TaoBNKRecomputePred(tao, tao->stepdirection, &prered);CHKERRQ(ierr); } else { /* Step did not change, so we can just recover the pre-computed prediction */ ierr = KSPCGGetObjFcn(tao->ksp, &prered);CHKERRQ(ierr); } prered = -prered; /* Compute the actual reduction and update the trust radius */ ierr = TaoComputeObjective(tao, tao->solution, &bnk->f);CHKERRQ(ierr); if (PetscIsInfOrNanReal(bnk->f)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); actred = bnk->fold - bnk->f; oldTrust = tao->trust; ierr = TaoBNKUpdateTrustRadius(tao, prered, actred, bnk->update_type, stepType, &stepAccepted);CHKERRQ(ierr); if (stepAccepted) { /* Step is good, evaluate the gradient and flip the need-Hessian switch */ steplen = 1.0; needH = PETSC_TRUE; ++bnk->newt; ierr = TaoComputeGradient(tao, tao->solution, bnk->unprojected_gradient);CHKERRQ(ierr); ierr = TaoBNKEstimateActiveSet(tao, bnk->as_type);CHKERRQ(ierr); ierr = VecCopy(bnk->unprojected_gradient, tao->gradient);CHKERRQ(ierr); ierr = VecISSet(tao->gradient, bnk->active_idx, 0.0);CHKERRQ(ierr); ierr = TaoGradientNorm(tao, tao->gradient, NORM_2, &bnk->gnorm);CHKERRQ(ierr); } else { /* Step is bad, revert old solution and re-solve with new radius*/ steplen = 0.0; needH = PETSC_FALSE; bnk->f = bnk->fold; ierr = VecCopy(bnk->Xold, tao->solution);CHKERRQ(ierr); ierr = VecCopy(bnk->Gold, tao->gradient);CHKERRQ(ierr); ierr = VecCopy(bnk->unprojected_gradient_old, bnk->unprojected_gradient);CHKERRQ(ierr); if (oldTrust == tao->trust) { /* Can't change the radius anymore so just terminate */ tao->reason = TAO_DIVERGED_TR_REDUCTION; } } /* Check for termination */ ierr = VecFischer(tao->solution, bnk->unprojected_gradient, tao->XL, tao->XU, bnk->W);CHKERRQ(ierr); ierr = VecNorm(bnk->W, NORM_2, &resnorm);CHKERRQ(ierr); if (PetscIsInfOrNanReal(resnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf or NaN"); ierr = TaoLogConvergenceHistory(tao, bnk->f, resnorm, 0.0, tao->ksp_its);CHKERRQ(ierr); ierr = TaoMonitor(tao, tao->niter, bnk->f, resnorm, 0.0, steplen);CHKERRQ(ierr); ierr = (*tao->ops->convergencetest)(tao, tao->cnvP);CHKERRQ(ierr); } } PetscFunctionReturn(0); }
static PetscErrorCode TaoSolve_BMRM(Tao tao) { PetscErrorCode ierr; TAO_DF df; TAO_BMRM *bmrm = (TAO_BMRM*)tao->data; /* Values and pointers to parts of the optimization problem */ PetscReal f = 0.0; Vec W = tao->solution; Vec G = tao->gradient; PetscReal lambda; PetscReal bt; Vec_Chain grad_list, *tail_glist, *pgrad; PetscInt i; PetscMPIInt rank; /* Used in converged criteria check */ PetscReal reg; PetscReal jtwt = 0.0, max_jtwt, pre_epsilon, epsilon, jw, min_jw; PetscReal innerSolverTol; MPI_Comm comm; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)tao,&comm);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); lambda = bmrm->lambda; /* Check Stopping Condition */ tao->step = 1.0; max_jtwt = -BMRM_INFTY; min_jw = BMRM_INFTY; innerSolverTol = 1.0; epsilon = 0.0; if (!rank) { ierr = init_df_solver(&df);CHKERRQ(ierr); grad_list.next = NULL; tail_glist = &grad_list; } df.tol = 1e-6; tao->reason = TAO_CONTINUE_ITERATING; /*-----------------Algorithm Begins------------------------*/ /* make the scatter */ ierr = VecScatterCreateToZero(W, &bmrm->scatter, &bmrm->local_w);CHKERRQ(ierr); ierr = VecAssemblyBegin(bmrm->local_w);CHKERRQ(ierr); ierr = VecAssemblyEnd(bmrm->local_w);CHKERRQ(ierr); /* NOTE: In application pass the sub-gradient of Remp(W) */ ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr); ierr = TaoLogConvergenceHistory(tao,f,1.0,0.0,tao->ksp_its);CHKERRQ(ierr); ierr = TaoMonitor(tao,tao->niter,f,1.0,0.0,tao->step);CHKERRQ(ierr); ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); while (tao->reason == TAO_CONTINUE_ITERATING) { /* Call general purpose update function */ if (tao->ops->update) { ierr = (*tao->ops->update)(tao, tao->niter);CHKERRQ(ierr); } /* compute bt = Remp(Wt-1) - <Wt-1, At> */ ierr = VecDot(W, G, &bt);CHKERRQ(ierr); bt = f - bt; /* First gather the gradient to the master node */ ierr = VecScatterBegin(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(bmrm->scatter, G, bmrm->local_w, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr); /* Bring up the inner solver */ if (!rank) { ierr = ensure_df_space(tao->niter+1, &df);CHKERRQ(ierr); ierr = make_grad_node(bmrm->local_w, &pgrad);CHKERRQ(ierr); tail_glist->next = pgrad; tail_glist = pgrad; df.a[tao->niter] = 1.0; df.f[tao->niter] = -bt; df.u[tao->niter] = 1.0; df.l[tao->niter] = 0.0; /* set up the Q */ pgrad = grad_list.next; for (i=0; i<=tao->niter; i++) { if (!pgrad) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Assert that there are at least tao->niter+1 pgrad available"); ierr = VecDot(pgrad->V, bmrm->local_w, ®);CHKERRQ(ierr); df.Q[i][tao->niter] = df.Q[tao->niter][i] = reg / lambda; pgrad = pgrad->next; } if (tao->niter > 0) { df.x[tao->niter] = 0.0; ierr = solve(&df);CHKERRQ(ierr); } else df.x[0] = 1.0; /* now computing Jt*(alpha_t) which should be = Jt(wt) to check convergence */ jtwt = 0.0; ierr = VecSet(bmrm->local_w, 0.0);CHKERRQ(ierr); pgrad = grad_list.next; for (i=0; i<=tao->niter; i++) { jtwt -= df.x[i] * df.f[i]; ierr = VecAXPY(bmrm->local_w, -df.x[i] / lambda, pgrad->V);CHKERRQ(ierr); pgrad = pgrad->next; } ierr = VecNorm(bmrm->local_w, NORM_2, ®);CHKERRQ(ierr); reg = 0.5*lambda*reg*reg; jtwt -= reg; } /* end if rank == 0 */ /* scatter the new W to all nodes */ ierr = VecScatterBegin(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(bmrm->scatter,bmrm->local_w,W,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); ierr = TaoComputeObjectiveAndGradient(tao, W, &f, G);CHKERRQ(ierr); ierr = MPI_Bcast(&jtwt,1,MPIU_REAL,0,comm);CHKERRQ(ierr); ierr = MPI_Bcast(®,1,MPIU_REAL,0,comm);CHKERRQ(ierr); jw = reg + f; /* J(w) = regularizer + Remp(w) */ if (jw < min_jw) min_jw = jw; if (jtwt > max_jtwt) max_jtwt = jtwt; pre_epsilon = epsilon; epsilon = min_jw - jtwt; if (!rank) { if (innerSolverTol > epsilon) innerSolverTol = epsilon; else if (innerSolverTol < 1e-7) innerSolverTol = 1e-7; /* if the annealing doesn't work well, lower the inner solver tolerance */ if(pre_epsilon < epsilon) innerSolverTol *= 0.2; df.tol = innerSolverTol*0.5; } tao->niter++; ierr = TaoLogConvergenceHistory(tao,min_jw,epsilon,0.0,tao->ksp_its);CHKERRQ(ierr); ierr = TaoMonitor(tao,tao->niter,min_jw,epsilon,0.0,tao->step);CHKERRQ(ierr); ierr = (*tao->ops->convergencetest)(tao,tao->cnvP);CHKERRQ(ierr); } /* free all the memory */ if (!rank) { ierr = destroy_grad_list(&grad_list);CHKERRQ(ierr); ierr = destroy_df_solver(&df);CHKERRQ(ierr); } ierr = VecDestroy(&bmrm->local_w);CHKERRQ(ierr); ierr = VecScatterDestroy(&bmrm->scatter);CHKERRQ(ierr); PetscFunctionReturn(0); }