PetscErrorCode SNESSolve_Multiblock(SNES snes) { SNES_Multiblock *mb = (SNES_Multiblock *) snes->data; Vec X, Y, F; PetscReal fnorm; PetscInt maxits, i; PetscErrorCode ierr; PetscFunctionBegin; snes->reason = SNES_CONVERGED_ITERATING; maxits = snes->max_its; /* maximum number of iterations */ X = snes->vec_sol; /* X^n */ Y = snes->vec_sol_update; /* \tilde X */ F = snes->vec_func; /* residual vector */ ierr = VecSetBlockSize(X, mb->bs);CHKERRQ(ierr); ierr = VecSetBlockSize(Y, mb->bs);CHKERRQ(ierr); ierr = VecSetBlockSize(F, mb->bs);CHKERRQ(ierr); ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); snes->iter = 0; snes->norm = 0.; ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); if (!snes->vec_func_init_set){ ierr = SNESComputeFunction(snes, X, F);CHKERRQ(ierr); if (snes->domainerror) { snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; PetscFunctionReturn(0); } } else { snes->vec_func_init_set = PETSC_FALSE; } if (!snes->norm_init_set) { ierr = VecNorm(F, NORM_2, &fnorm);CHKERRQ(ierr); /* fnorm <- ||F|| */ if (PetscIsInfOrNanReal(fnorm)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_FP, "Infinite or not-a-number generated in norm"); } else { fnorm = snes->norm_init; snes->norm_init_set = PETSC_FALSE; } ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); snes->norm = fnorm; ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); SNESLogConvHistory(snes,fnorm,0); ierr = SNESMonitor(snes,0,fnorm);CHKERRQ(ierr); /* set parameter for default relative tolerance convergence test */ snes->ttol = fnorm*snes->rtol; /* test convergence */ ierr = (*snes->ops->converged)(snes,0,0.0,0.0,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) PetscFunctionReturn(0); for (i = 0; i < maxits; i++) { /* Call general purpose update function */ if (snes->ops->update) { ierr = (*snes->ops->update)(snes, snes->iter);CHKERRQ(ierr); } /* Compute X^{new} from subsolves */ if (mb->type == PC_COMPOSITE_ADDITIVE) { BlockDesc blocks = mb->blocks; if (mb->defaultblocks) { /*TODO: Make an array of Vecs for this */ /*ierr = VecStrideGatherAll(X, mb->x, INSERT_VALUES);CHKERRQ(ierr);*/ while (blocks) { ierr = SNESSolve(blocks->snes, PETSC_NULL, blocks->x);CHKERRQ(ierr); blocks = blocks->next; } /*ierr = VecStrideScatterAll(mb->x, X, INSERT_VALUES);CHKERRQ(ierr);*/ } else { while (blocks) { ierr = VecScatterBegin(blocks->sctx, X, blocks->x, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(blocks->sctx, X, blocks->x, INSERT_VALUES, SCATTER_FORWARD);CHKERRQ(ierr); ierr = SNESSolve(blocks->snes, PETSC_NULL, blocks->x);CHKERRQ(ierr); ierr = VecScatterBegin(blocks->sctx, blocks->x, X, INSERT_VALUES, SCATTER_REVERSE);CHKERRQ(ierr); ierr = VecScatterEnd(blocks->sctx, blocks->x, X, INSERT_VALUES, SCATTER_REVERSE);CHKERRQ(ierr); blocks = blocks->next; } } } else { SETERRQ1(((PetscObject) snes)->comm, PETSC_ERR_SUP, "Unsupported or unknown composition", (int) mb->type); } CHKMEMQ; /* Compute F(X^{new}) */ ierr = SNESComputeFunction(snes, X, F);CHKERRQ(ierr); ierr = VecNorm(F, NORM_2, &fnorm);CHKERRQ(ierr); if (PetscIsInfOrNanReal(fnorm)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Infinite or not-a-number generated norm"); if (snes->nfuncs >= snes->max_funcs) { snes->reason = SNES_DIVERGED_FUNCTION_COUNT; break; } if (snes->domainerror) { snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; PetscFunctionReturn(0); } /* Monitor convergence */ ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); snes->iter = i+1; snes->norm = fnorm; ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); SNESLogConvHistory(snes,snes->norm,0); ierr = SNESMonitor(snes,snes->iter,snes->norm);CHKERRQ(ierr); /* Test for convergence */ ierr = (*snes->ops->converged)(snes,snes->iter,0.0,0.0,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) break; } if (i == maxits) { ierr = PetscInfo1(snes, "Maximum number of iterations has been reached: %D\n", maxits);CHKERRQ(ierr); if (!snes->reason) snes->reason = SNES_DIVERGED_MAX_IT; } PetscFunctionReturn(0); }
/*@C PetscHTTPSRequest - Send a request to an HTTPS server Input Parameters: + type - either "POST" or "GET" . url - URL of request host/path . header - additional header information, may be NULL . ctype - data type of body, for example application/json . body - data to send to server . ssl - obtained with PetscHTTPSConnect() - buffsize - size of buffer Output Parameter: . buff - everything returned from server Level: advanced .seealso: PetscHTTPRequest(), PetscHTTPSConnect(), PetscSSLInitializeContext(), PetscSSLDestroyContext(), PetscPullJSONValue() @*/ PetscErrorCode PetscHTTPSRequest(const char type[],const char url[],const char header[],const char ctype[],const char body[],SSL *ssl,char buff[],size_t buffsize) { char *request; int r; size_t request_len,len; PetscErrorCode ierr; PetscBool foundbody = PETSC_FALSE; PetscFunctionBegin; ierr = PetscHTTPBuildRequest(type,url,header,ctype,body,&request);CHKERRQ(ierr); ierr = PetscStrlen(request,&request_len);CHKERRQ(ierr); r = SSL_write(ssl,request,(int)request_len); switch (SSL_get_error(ssl,r)){ case SSL_ERROR_NONE: if (request_len != (size_t)r) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Incomplete write to SSL socket"); break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"SSL socket write problem"); } /* Now read the server's response, globus sends it in two chunks hence must read a second time if needed */ ierr = PetscMemzero(buff,buffsize);CHKERRQ(ierr); len = 0; foundbody = PETSC_FALSE; do { char *clen; int cl; size_t nlen; r = SSL_read(ssl,buff+len,(int)buffsize); len += r; switch (SSL_get_error(ssl,r)){ case SSL_ERROR_NONE: break; case SSL_ERROR_ZERO_RETURN: foundbody = PETSC_TRUE; SSL_shutdown(ssl); break; case SSL_ERROR_SYSCALL: foundbody = PETSC_TRUE; break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"SSL read problem"); } ierr = PetscStrstr(buff,"Content-Length: ",&clen);CHKERRQ(ierr); if (clen) { clen += 15; sscanf(clen,"%d",&cl); if (!cl) foundbody = PETSC_TRUE; else { ierr = PetscStrstr(buff,"\r\n\r\n",&clen);CHKERRQ(ierr); if (clen) { ierr = PetscStrlen(clen,&nlen);CHKERRQ(ierr); if (nlen-4 == (size_t) cl) foundbody = PETSC_TRUE; } } } else { /* if no content length than must leave because you don't know if you can read again */ foundbody = PETSC_TRUE; } } while (!foundbody); ierr = PetscInfo1(NULL,"HTTPS result follows: \n%s\n",buff);CHKERRQ(ierr); SSL_free(ssl); ierr = PetscFree(request);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* @ TaoApply_Armijo - This routine performs a linesearch. It backtracks until the (nonmonotone) Armijo conditions are satisfied. Input Parameters: + tao - Tao context . X - current iterate (on output X contains new iterate, X + step*S) . S - search direction . f - merit function evaluated at X . G - gradient of merit function evaluated at X . W - work vector - step - initial estimate of step length Output parameters: + f - merit function evaluated at new iterate, X + step*S . G - gradient of merit function evaluated at new iterate, X + step*S . X - new iterate - step - final step length @ */ static PetscErrorCode TaoLineSearchApply_Armijo(TaoLineSearch ls, Vec x, PetscReal *f, Vec g, Vec s) { TaoLineSearch_ARMIJO *armP = (TaoLineSearch_ARMIJO *)ls->data; PetscErrorCode ierr; PetscInt i; PetscReal fact, ref, gdx; PetscInt idx; PetscBool g_computed=PETSC_FALSE; /* to prevent extra gradient computation */ PetscFunctionBegin; ls->reason = TAOLINESEARCH_CONTINUE_ITERATING; if (!armP->work) { ierr = VecDuplicate(x,&armP->work);CHKERRQ(ierr); armP->x = x; ierr = PetscObjectReference((PetscObject)armP->x);CHKERRQ(ierr); } else if (x != armP->x) { /* If x has changed, then recreate work */ ierr = VecDestroy(&armP->work);CHKERRQ(ierr); ierr = VecDuplicate(x,&armP->work);CHKERRQ(ierr); ierr = PetscObjectDereference((PetscObject)armP->x);CHKERRQ(ierr); armP->x = x; ierr = PetscObjectReference((PetscObject)armP->x);CHKERRQ(ierr); } /* Check linesearch parameters */ if (armP->alpha < 1) { ierr = PetscInfo1(ls,"Armijo line search error: alpha (%g) < 1\n", (double)armP->alpha);CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if ((armP->beta <= 0) || (armP->beta >= 1)) { ierr = PetscInfo1(ls,"Armijo line search error: beta (%g) invalid\n", (double)armP->beta);CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if ((armP->beta_inf <= 0) || (armP->beta_inf >= 1)) { ierr = PetscInfo1(ls,"Armijo line search error: beta_inf (%g) invalid\n", (double)armP->beta_inf);CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if ((armP->sigma <= 0) || (armP->sigma >= 0.5)) { ierr = PetscInfo1(ls,"Armijo line search error: sigma (%g) invalid\n", (double)armP->sigma);CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if (armP->memorySize < 1) { ierr = PetscInfo1(ls,"Armijo line search error: memory_size (%D) < 1\n", armP->memorySize);CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if ((armP->referencePolicy != REFERENCE_MAX) && (armP->referencePolicy != REFERENCE_AVE) && (armP->referencePolicy != REFERENCE_MEAN)) { ierr = PetscInfo(ls,"Armijo line search error: reference_policy invalid\n");CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if ((armP->replacementPolicy != REPLACE_FIFO) && (armP->replacementPolicy != REPLACE_MRU)) { ierr = PetscInfo(ls,"Armijo line search error: replacement_policy invalid\n");CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } else if (PetscIsInfOrNanReal(*f)) { ierr = PetscInfo(ls,"Armijo line search error: initial function inf or nan\n");CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_BADPARAMETER; } if (ls->reason != TAOLINESEARCH_CONTINUE_ITERATING) { PetscFunctionReturn(0); } /* Check to see of the memory has been allocated. If not, allocate the historical array and populate it with the initial function values. */ if (!armP->memory) { ierr = PetscMalloc1(armP->memorySize, &armP->memory );CHKERRQ(ierr); } if (!armP->memorySetup) { for (i = 0; i < armP->memorySize; i++) { armP->memory[i] = armP->alpha*(*f); } armP->current = 0; armP->lastReference = armP->memory[0]; armP->memorySetup=PETSC_TRUE; } /* Calculate reference value (MAX) */ ref = armP->memory[0]; idx = 0; for (i = 1; i < armP->memorySize; i++) { if (armP->memory[i] > ref) { ref = armP->memory[i]; idx = i; } } if (armP->referencePolicy == REFERENCE_AVE) { ref = 0; for (i = 0; i < armP->memorySize; i++) { ref += armP->memory[i]; } ref = ref / armP->memorySize; ref = PetscMax(ref, armP->memory[armP->current]); } else if (armP->referencePolicy == REFERENCE_MEAN) { ref = PetscMin(ref, 0.5*(armP->lastReference + armP->memory[armP->current])); } ierr = VecDot(g,s,&gdx);CHKERRQ(ierr); if (PetscIsInfOrNanReal(gdx)) { ierr = PetscInfo1(ls,"Initial Line Search step * g is Inf or Nan (%g)\n",(double)gdx);CHKERRQ(ierr); ls->reason=TAOLINESEARCH_FAILED_INFORNAN; PetscFunctionReturn(0); } if (gdx >= 0.0) { ierr = PetscInfo1(ls,"Initial Line Search step is not descent direction (g's=%g)\n",(double)gdx);CHKERRQ(ierr); ls->reason = TAOLINESEARCH_FAILED_ASCENT; PetscFunctionReturn(0); } if (armP->nondescending) { fact = armP->sigma; } else { fact = armP->sigma * gdx; } ls->step = ls->initstep; while (ls->step >= ls->stepmin && (ls->nfeval+ls->nfgeval) < ls->max_funcs) { /* Calculate iterate */ ierr = VecCopy(x,armP->work);CHKERRQ(ierr); ierr = VecAXPY(armP->work,ls->step,s);CHKERRQ(ierr); if (ls->bounded) { ierr = VecMedian(ls->lower,armP->work,ls->upper,armP->work);CHKERRQ(ierr); } /* Calculate function at new iterate */ if (ls->hasobjective) { ierr = TaoLineSearchComputeObjective(ls,armP->work,f);CHKERRQ(ierr); g_computed=PETSC_FALSE; } else if (ls->usegts) { ierr = TaoLineSearchComputeObjectiveAndGTS(ls,armP->work,f,&gdx);CHKERRQ(ierr); g_computed=PETSC_FALSE; } else { ierr = TaoLineSearchComputeObjectiveAndGradient(ls,armP->work,f,g);CHKERRQ(ierr); g_computed=PETSC_TRUE; } if (ls->step == ls->initstep) { ls->f_fullstep = *f; } if (PetscIsInfOrNanReal(*f)) { ls->step *= armP->beta_inf; } else { /* Check descent condition */ if (armP->nondescending && *f <= ref - ls->step*fact*ref) break; if (!armP->nondescending && *f <= ref + ls->step*fact) { break; } ls->step *= armP->beta; } } /* Check termination */ if (PetscIsInfOrNanReal(*f)) { ierr = PetscInfo(ls, "Function is inf or nan.\n");CHKERRQ(ierr); ls->reason = TAOLINESEARCH_FAILED_INFORNAN; } else if (ls->step < ls->stepmin) { ierr = PetscInfo(ls, "Step length is below tolerance.\n");CHKERRQ(ierr); ls->reason = TAOLINESEARCH_HALTED_RTOL; } else if ((ls->nfeval+ls->nfgeval) >= ls->max_funcs) { ierr = PetscInfo2(ls, "Number of line search function evals (%D) > maximum allowed (%D)\n",ls->nfeval+ls->nfgeval, ls->max_funcs);CHKERRQ(ierr); ls->reason = TAOLINESEARCH_HALTED_MAXFCN; } if (ls->reason) { PetscFunctionReturn(0); } /* Successful termination, update memory */ ls->reason = TAOLINESEARCH_SUCCESS; armP->lastReference = ref; if (armP->replacementPolicy == REPLACE_FIFO) { armP->memory[armP->current++] = *f; if (armP->current >= armP->memorySize) { armP->current = 0; } } else { armP->current = idx; armP->memory[idx] = *f; } /* Update iterate and compute gradient */ ierr = VecCopy(armP->work,x);CHKERRQ(ierr); if (!g_computed) { ierr = TaoLineSearchComputeGradient(ls, x, g);CHKERRQ(ierr); } ierr = PetscInfo2(ls, "%D function evals in line search, step = %g\n",ls->nfeval, (double)ls->step);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode SNESLineSearchApply_BT(SNESLineSearch linesearch) { PetscBool changed_y,changed_w; PetscErrorCode ierr; Vec X,F,Y,W,G; SNES snes; PetscReal fnorm, xnorm, ynorm, gnorm; PetscReal lambda,lambdatemp,lambdaprev,minlambda,maxstep,initslope,alpha,stol; PetscReal t1,t2,a,b,d; PetscReal f; PetscReal g,gprev; PetscBool domainerror; PetscViewer monitor; PetscInt max_its,count; SNESLineSearch_BT *bt; Mat jac; PetscErrorCode (*objective)(SNES,Vec,PetscReal*,void*); PetscFunctionBegin; ierr = SNESLineSearchGetVecs(linesearch, &X, &F, &Y, &W, &G);CHKERRQ(ierr); ierr = SNESLineSearchGetNorms(linesearch, &xnorm, &fnorm, &ynorm);CHKERRQ(ierr); ierr = SNESLineSearchGetLambda(linesearch, &lambda);CHKERRQ(ierr); ierr = SNESLineSearchGetSNES(linesearch, &snes);CHKERRQ(ierr); ierr = SNESLineSearchGetMonitor(linesearch, &monitor);CHKERRQ(ierr); ierr = SNESLineSearchGetTolerances(linesearch,&minlambda,&maxstep,NULL,NULL,NULL,&max_its);CHKERRQ(ierr); ierr = SNESGetTolerances(snes,NULL,NULL,&stol,NULL,NULL);CHKERRQ(ierr); ierr = SNESGetObjective(snes,&objective,NULL);CHKERRQ(ierr); bt = (SNESLineSearch_BT*)linesearch->data; alpha = bt->alpha; ierr = SNESGetJacobian(snes, &jac, NULL, NULL, NULL);CHKERRQ(ierr); if (!jac && !objective) SETERRQ(PetscObjectComm((PetscObject)linesearch), PETSC_ERR_USER, "SNESLineSearchBT requires a Jacobian matrix"); /* precheck */ ierr = SNESLineSearchPreCheck(linesearch,X,Y,&changed_y);CHKERRQ(ierr); ierr = SNESLineSearchSetSuccess(linesearch, PETSC_TRUE);CHKERRQ(ierr); ierr = VecNormBegin(Y, NORM_2, &ynorm);CHKERRQ(ierr); ierr = VecNormBegin(X, NORM_2, &xnorm);CHKERRQ(ierr); ierr = VecNormEnd(Y, NORM_2, &ynorm);CHKERRQ(ierr); ierr = VecNormEnd(X, NORM_2, &xnorm);CHKERRQ(ierr); if (ynorm == 0.0) { if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: Initial direction and size is 0\n");CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } ierr = VecCopy(X,W);CHKERRQ(ierr); ierr = VecCopy(F,G);CHKERRQ(ierr); ierr = SNESLineSearchSetNorms(linesearch,xnorm,fnorm,ynorm);CHKERRQ(ierr); ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (ynorm > maxstep) { /* Step too big, so scale back */ if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: Scaling step by %14.12e old ynorm %14.12e\n", (double)(maxstep/ynorm),(double)ynorm);CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } ierr = VecScale(Y,maxstep/(ynorm));CHKERRQ(ierr); ynorm = maxstep; } /* if the SNES has an objective set, use that instead of the function value */ if (objective) { ierr = SNESComputeObjective(snes,X,&f);CHKERRQ(ierr); } else { f = fnorm*fnorm; } /* compute the initial slope */ if (objective) { /* slope comes from the function (assumed to be the gradient of the objective */ ierr = VecDotRealPart(Y,F,&initslope);CHKERRQ(ierr); } else { /* slope comes from the normal equations */ ierr = MatMult(jac,Y,W);CHKERRQ(ierr); ierr = VecDotRealPart(F,W,&initslope);CHKERRQ(ierr); if (initslope > 0.0) initslope = -initslope; if (initslope == 0.0) initslope = -1.0; } ierr = VecWAXPY(W,-lambda,Y,X);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr); } if (snes->nfuncs >= snes->max_funcs) { ierr = PetscInfo(snes,"Exceeded maximum function evaluations, while checking full step length!\n");CHKERRQ(ierr); snes->reason = SNES_DIVERGED_FUNCTION_COUNT; ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (objective) { ierr = SNESComputeObjective(snes,W,&g);CHKERRQ(ierr); } else { ierr = (*linesearch->ops->snesfunc)(snes,W,G);CHKERRQ(ierr); ierr = SNESGetFunctionDomainError(snes, &domainerror);CHKERRQ(ierr); if (domainerror) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (linesearch->ops->vinorm) { gnorm = fnorm; ierr = (*linesearch->ops->vinorm)(snes, G, W, &gnorm);CHKERRQ(ierr); } else { ierr = VecNorm(G,NORM_2,&gnorm);CHKERRQ(ierr); } g = PetscSqr(gnorm); } if (PetscIsInfOrNanReal(g)) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); ierr = PetscInfo(monitor,"Aborted due to Nan or Inf in function evaluation\n");CHKERRQ(ierr); PetscFunctionReturn(0); } if (!objective) { ierr = PetscInfo2(snes,"Initial fnorm %14.12e gnorm %14.12e\n", (double)fnorm, (double)gnorm);CHKERRQ(ierr); } if (.5*g <= .5*f + lambda*alpha*initslope) { /* Sufficient reduction or step tolerance convergence */ if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); if (!objective) { ierr = PetscViewerASCIIPrintf(monitor," Line search: Using full step: fnorm %14.12e gnorm %14.12e\n", (double)fnorm, (double)gnorm);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor," Line search: Using full step: obj0 %14.12e obj %14.12e\n", (double)f, (double)g);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } } else { /* Since the full step didn't work and the step is tiny, quit */ if (stol*xnorm > ynorm) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); ierr = PetscInfo2(monitor,"Aborted due to ynorm < stol*xnorm (%14.12e < %14.12e) and inadequate full step.\n",(double)ynorm,(double)stol*xnorm);CHKERRQ(ierr); PetscFunctionReturn(0); } /* Fit points with quadratic */ lambdatemp = -initslope/(g - f - 2.0*lambda*initslope); lambdaprev = lambda; gprev = g; if (lambdatemp > .5*lambda) lambdatemp = .5*lambda; if (lambdatemp <= .1*lambda) lambda = .1*lambda; else lambda = lambdatemp; ierr = VecWAXPY(W,-lambda,Y,X);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr); } if (snes->nfuncs >= snes->max_funcs) { ierr = PetscInfo1(snes,"Exceeded maximum function evaluations, while attempting quadratic backtracking! %D \n",snes->nfuncs);CHKERRQ(ierr); snes->reason = SNES_DIVERGED_FUNCTION_COUNT; ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (objective) { ierr = SNESComputeObjective(snes,W,&g);CHKERRQ(ierr); } else { ierr = (*linesearch->ops->snesfunc)(snes,W,G);CHKERRQ(ierr); ierr = SNESGetFunctionDomainError(snes, &domainerror);CHKERRQ(ierr); if (domainerror) PetscFunctionReturn(0); if (linesearch->ops->vinorm) { gnorm = fnorm; ierr = (*linesearch->ops->vinorm)(snes, G, W, &gnorm);CHKERRQ(ierr); } else { ierr = VecNorm(G,NORM_2,&gnorm);CHKERRQ(ierr); } g = PetscSqr(gnorm); } if (PetscIsInfOrNanReal(g)) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); ierr = PetscInfo(monitor,"Aborted due to Nan or Inf in function evaluation\n");CHKERRQ(ierr); PetscFunctionReturn(0); } if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); if (!objective) { ierr = PetscViewerASCIIPrintf(monitor," Line search: gnorm after quadratic fit %14.12e\n",(double)gnorm);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor," Line search: obj after quadratic fit %14.12e\n",(double)g);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } if (.5*g < .5*f + lambda*alpha*initslope) { /* sufficient reduction */ if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: Quadratically determined step, lambda=%18.16e\n",(double)lambda);CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } } else { /* Fit points with cubic */ for (count = 0; count < max_its; count++) { if (lambda <= minlambda) { if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(monitor," Line search: unable to find good step length! After %D tries \n",count);CHKERRQ(ierr); if (!objective) { ierr = PetscViewerASCIIPrintf(monitor, " Line search: fnorm=%18.16e, gnorm=%18.16e, ynorm=%18.16e, minlambda=%18.16e, lambda=%18.16e, initial slope=%18.16e\n", (double)fnorm, (double)gnorm, (double)ynorm, (double)minlambda, (double)lambda, (double)initslope);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor, " Line search: obj(0)=%18.16e, obj=%18.16e, ynorm=%18.16e, minlambda=%18.16e, lambda=%18.16e, initial slope=%18.16e\n", (double)f, (double)g, (double)ynorm, (double)minlambda, (double)lambda, (double)initslope);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (linesearch->order == SNES_LINESEARCH_ORDER_CUBIC) { t1 = .5*(g - f) - lambda*initslope; t2 = .5*(gprev - f) - lambdaprev*initslope; a = (t1/(lambda*lambda) - t2/(lambdaprev*lambdaprev))/(lambda-lambdaprev); b = (-lambdaprev*t1/(lambda*lambda) + lambda*t2/(lambdaprev*lambdaprev))/(lambda-lambdaprev); d = b*b - 3*a*initslope; if (d < 0.0) d = 0.0; if (a == 0.0) lambdatemp = -initslope/(2.0*b); else lambdatemp = (-b + PetscSqrtReal(d))/(3.0*a); } else if (linesearch->order == SNES_LINESEARCH_ORDER_QUADRATIC) { lambdatemp = -initslope/(g - f - 2.0*initslope); } else SETERRQ(PetscObjectComm((PetscObject)linesearch), PETSC_ERR_SUP, "unsupported line search order for type bt"); lambdaprev = lambda; gprev = g; if (lambdatemp > .5*lambda) lambdatemp = .5*lambda; if (lambdatemp <= .1*lambda) lambda = .1*lambda; else lambda = lambdatemp; ierr = VecWAXPY(W,-lambda,Y,X);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes,W);CHKERRQ(ierr); } if (snes->nfuncs >= snes->max_funcs) { ierr = PetscInfo1(snes,"Exceeded maximum function evaluations, while looking for good step length! %D \n",count);CHKERRQ(ierr); if (!objective) { ierr = PetscInfo5(snes,"fnorm=%18.16e, gnorm=%18.16e, ynorm=%18.16e, lambda=%18.16e, initial slope=%18.16e\n", (double)fnorm,(double)gnorm,(double)ynorm,(double)lambda,(double)initslope);CHKERRQ(ierr); } ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); snes->reason = SNES_DIVERGED_FUNCTION_COUNT; PetscFunctionReturn(0); } if (objective) { ierr = SNESComputeObjective(snes,W,&g);CHKERRQ(ierr); } else { ierr = (*linesearch->ops->snesfunc)(snes,W,G);CHKERRQ(ierr); ierr = SNESGetFunctionDomainError(snes, &domainerror);CHKERRQ(ierr); if (domainerror) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (linesearch->ops->vinorm) { gnorm = fnorm; ierr = (*linesearch->ops->vinorm)(snes, G, W, &gnorm);CHKERRQ(ierr); } else { ierr = VecNorm(G,NORM_2,&gnorm);CHKERRQ(ierr); } g = PetscSqr(gnorm); } if (PetscIsInfOrNanReal(gnorm)) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); ierr = PetscInfo(monitor,"Aborted due to Nan or Inf in function evaluation\n");CHKERRQ(ierr); PetscFunctionReturn(0); } if (.5*g < .5*f + lambda*alpha*initslope) { /* is reduction enough? */ if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); if (!objective) { if (linesearch->order == SNES_LINESEARCH_ORDER_CUBIC) { ierr = PetscViewerASCIIPrintf(monitor," Line search: Cubically determined step, current gnorm %14.12e lambda=%18.16e\n",(double)gnorm,(double)lambda);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor," Line search: Quadratically determined step, current gnorm %14.12e lambda=%18.16e\n",(double)gnorm,(double)lambda);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } else { if (linesearch->order == SNES_LINESEARCH_ORDER_CUBIC) { ierr = PetscViewerASCIIPrintf(monitor," Line search: Cubically determined step, obj %14.12e lambda=%18.16e\n",(double)g,(double)lambda);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor," Line search: Quadratically determined step, obj %14.12e lambda=%18.16e\n",(double)g,(double)lambda);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } } break; } else if (monitor) { ierr = PetscViewerASCIIAddTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); if (!objective) { if (linesearch->order == SNES_LINESEARCH_ORDER_CUBIC) { ierr = PetscViewerASCIIPrintf(monitor," Line search: Cubic step no good, shrinking lambda, current gnorm %12.12e lambda=%18.16e\n",(double)gnorm,(double)lambda);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor," Line search: Quadratic step no good, shrinking lambda, current gnorm %12.12e lambda=%18.16e\n",(double)gnorm,(double)lambda);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } else { if (linesearch->order == SNES_LINESEARCH_ORDER_CUBIC) { ierr = PetscViewerASCIIPrintf(monitor," Line search: Cubic step no good, shrinking lambda, obj %12.12e lambda=%18.16e\n",(double)g,(double)lambda);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(monitor," Line search: Quadratic step no good, shrinking lambda, obj %12.12e lambda=%18.16e\n",(double)g,(double)lambda);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(monitor,((PetscObject)linesearch)->tablevel);CHKERRQ(ierr); } } } } } /* postcheck */ ierr = SNESLineSearchPostCheck(linesearch,X,Y,W,&changed_y,&changed_w);CHKERRQ(ierr); if (changed_y) { ierr = VecWAXPY(W,-lambda,Y,X);CHKERRQ(ierr); if (linesearch->ops->viproject) { ierr = (*linesearch->ops->viproject)(snes, W);CHKERRQ(ierr); } } if (changed_y || changed_w || objective) { /* recompute the function norm if the step has changed or the objective isn't the norm */ ierr = (*linesearch->ops->snesfunc)(snes,W,G);CHKERRQ(ierr); ierr = SNESGetFunctionDomainError(snes, &domainerror);CHKERRQ(ierr); if (domainerror) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); PetscFunctionReturn(0); } if (linesearch->ops->vinorm) { gnorm = fnorm; ierr = (*linesearch->ops->vinorm)(snes, G, W, &gnorm);CHKERRQ(ierr); } else { ierr = VecNorm(G,NORM_2,&gnorm);CHKERRQ(ierr); } ierr = VecNorm(Y,NORM_2,&ynorm);CHKERRQ(ierr); if (PetscIsInfOrNanReal(gnorm)) { ierr = SNESLineSearchSetSuccess(linesearch, PETSC_FALSE);CHKERRQ(ierr); ierr = PetscInfo(monitor,"Aborted due to Nan or Inf in function evaluation\n");CHKERRQ(ierr); PetscFunctionReturn(0); } } /* copy the solution over */ ierr = VecCopy(W, X);CHKERRQ(ierr); ierr = VecCopy(G, F);CHKERRQ(ierr); ierr = VecNorm(X, NORM_2, &xnorm);CHKERRQ(ierr); ierr = SNESLineSearchSetLambda(linesearch, lambda);CHKERRQ(ierr); ierr = SNESLineSearchSetNorms(linesearch, xnorm, gnorm, ynorm);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode TaoSolve_TRON(Tao tao) { TAO_TRON *tron = (TAO_TRON *)tao->data; PetscErrorCode ierr; PetscInt iter=0,its; TaoConvergedReason reason = TAO_CONTINUE_ITERATING; TaoLineSearchConvergedReason ls_reason = TAOLINESEARCH_CONTINUE_ITERATING; PetscReal prered,actred,delta,f,f_new,rhok,gdx,xdiff,stepsize; PetscFunctionBegin; tron->pgstepsize=1.0; tao->trust = tao->trust0; /* Project the current point onto the feasible set */ ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); ierr = VecMedian(tao->XL,tao->solution,tao->XU,tao->solution);CHKERRQ(ierr); ierr = TaoLineSearchSetVariableBounds(tao->linesearch,tao->XL,tao->XU);CHKERRQ(ierr); ierr = TaoComputeObjectiveAndGradient(tao,tao->solution,&tron->f,tao->gradient);CHKERRQ(ierr); ierr = ISDestroy(&tron->Free_Local);CHKERRQ(ierr); ierr = VecWhichBetween(tao->XL,tao->solution,tao->XU,&tron->Free_Local);CHKERRQ(ierr); /* Project the gradient and calculate the norm */ ierr = VecBoundGradientProjection(tao->gradient,tao->solution, tao->XL, tao->XU, tao->gradient);CHKERRQ(ierr); ierr = VecNorm(tao->gradient,NORM_2,&tron->gnorm);CHKERRQ(ierr); if (PetscIsInfOrNanReal(tron->f) || PetscIsInfOrNanReal(tron->gnorm)) SETERRQ(PETSC_COMM_SELF,1, "User provided compute function generated Inf pr NaN"); if (tao->trust <= 0) { tao->trust=PetscMax(tron->gnorm*tron->gnorm,1.0); } tron->stepsize=tao->trust; ierr = TaoMonitor(tao, iter, tron->f, tron->gnorm, 0.0, tron->stepsize, &reason);CHKERRQ(ierr); while (reason==TAO_CONTINUE_ITERATING){ tao->ksp_its=0; ierr = TronGradientProjections(tao,tron);CHKERRQ(ierr); f=tron->f; delta=tao->trust; tron->n_free_last = tron->n_free; ierr = TaoComputeHessian(tao,tao->solution,tao->hessian,tao->hessian_pre);CHKERRQ(ierr); ierr = ISGetSize(tron->Free_Local, &tron->n_free);CHKERRQ(ierr); /* If no free variables */ if (tron->n_free == 0) { actred=0; ierr = PetscInfo(tao,"No free variables in tron iteration.\n");CHKERRQ(ierr); break; } /* use free_local to mask/submat gradient, hessian, stepdirection */ ierr = TaoVecGetSubVec(tao->gradient,tron->Free_Local,tao->subset_type,0.0,&tron->R);CHKERRQ(ierr); ierr = TaoVecGetSubVec(tao->gradient,tron->Free_Local,tao->subset_type,0.0,&tron->DXFree);CHKERRQ(ierr); ierr = VecSet(tron->DXFree,0.0);CHKERRQ(ierr); ierr = VecScale(tron->R, -1.0);CHKERRQ(ierr); ierr = TaoMatGetSubMat(tao->hessian, tron->Free_Local, tron->diag, tao->subset_type, &tron->H_sub);CHKERRQ(ierr); if (tao->hessian == tao->hessian_pre) { ierr = MatDestroy(&tron->Hpre_sub);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)(tron->H_sub));CHKERRQ(ierr); tron->Hpre_sub = tron->H_sub; } else { ierr = TaoMatGetSubMat(tao->hessian_pre, tron->Free_Local, tron->diag, tao->subset_type,&tron->Hpre_sub);CHKERRQ(ierr); } ierr = KSPReset(tao->ksp);CHKERRQ(ierr); ierr = KSPSetOperators(tao->ksp, tron->H_sub, tron->Hpre_sub);CHKERRQ(ierr); while (1) { /* Approximately solve the reduced linear system */ ierr = KSPSTCGSetRadius(tao->ksp,delta);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp, tron->R, tron->DXFree);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&its);CHKERRQ(ierr); tao->ksp_its+=its; tao->ksp_tot_its+=its; ierr = VecSet(tao->stepdirection,0.0);CHKERRQ(ierr); /* Add dxfree matrix to compute step direction vector */ ierr = VecISAXPY(tao->stepdirection,tron->Free_Local,1.0,tron->DXFree);CHKERRQ(ierr); if (0) { PetscReal rhs,stepnorm; ierr = VecNorm(tron->R,NORM_2,&rhs);CHKERRQ(ierr); ierr = VecNorm(tron->DXFree,NORM_2,&stepnorm);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"|rhs|=%g\t|s|=%g\n",(double)rhs,(double)stepnorm);CHKERRQ(ierr); } ierr = VecDot(tao->gradient, tao->stepdirection, &gdx);CHKERRQ(ierr); ierr = PetscInfo1(tao,"Expected decrease in function value: %14.12e\n",(double)gdx);CHKERRQ(ierr); ierr = VecCopy(tao->solution, tron->X_New);CHKERRQ(ierr); ierr = VecCopy(tao->gradient, tron->G_New);CHKERRQ(ierr); stepsize=1.0;f_new=f; ierr = TaoLineSearchSetInitialStepLength(tao->linesearch,1.0);CHKERRQ(ierr); ierr = TaoLineSearchApply(tao->linesearch, tron->X_New, &f_new, tron->G_New, tao->stepdirection,&stepsize,&ls_reason);CHKERRQ(ierr);CHKERRQ(ierr); ierr = TaoAddLineSearchCounts(tao);CHKERRQ(ierr); ierr = MatMult(tao->hessian, tao->stepdirection, tron->Work);CHKERRQ(ierr); ierr = VecAYPX(tron->Work, 0.5, tao->gradient);CHKERRQ(ierr); ierr = VecDot(tao->stepdirection, tron->Work, &prered);CHKERRQ(ierr); actred = f_new - f; if (actred<0) { rhok=PetscAbs(-actred/prered); } else { rhok=0.0; } /* Compare actual improvement to the quadratic model */ if (rhok > tron->eta1) { /* Accept the point */ /* d = x_new - x */ ierr = VecCopy(tron->X_New, tao->stepdirection);CHKERRQ(ierr); ierr = VecAXPY(tao->stepdirection, -1.0, tao->solution);CHKERRQ(ierr); ierr = VecNorm(tao->stepdirection, NORM_2, &xdiff);CHKERRQ(ierr); xdiff *= stepsize; /* Adjust trust region size */ if (rhok < tron->eta2 ){ delta = PetscMin(xdiff,delta)*tron->sigma1; } else if (rhok > tron->eta4 ){ delta= PetscMin(xdiff,delta)*tron->sigma3; } else if (rhok > tron->eta3 ){ delta=PetscMin(xdiff,delta)*tron->sigma2; } ierr = VecBoundGradientProjection(tron->G_New,tron->X_New, tao->XL, tao->XU, tao->gradient);CHKERRQ(ierr); if (tron->Free_Local) { ierr = ISDestroy(&tron->Free_Local);CHKERRQ(ierr); } ierr = VecWhichBetween(tao->XL, tron->X_New, tao->XU, &tron->Free_Local);CHKERRQ(ierr); f=f_new; ierr = VecNorm(tao->gradient,NORM_2,&tron->gnorm);CHKERRQ(ierr); ierr = VecCopy(tron->X_New, tao->solution);CHKERRQ(ierr); ierr = VecCopy(tron->G_New, tao->gradient);CHKERRQ(ierr); break; } else if (delta <= 1e-30) { break; } else { delta /= 4.0; } } /* end linear solve loop */ tron->f=f; tron->actred=actred; tao->trust=delta; iter++; ierr = TaoMonitor(tao, iter, tron->f, tron->gnorm, 0.0, delta, &reason);CHKERRQ(ierr); } /* END MAIN LOOP */ PetscFunctionReturn(0); }
/* SNESSolve_NEWTONLS - Solves a nonlinear system with a truncated Newton method with a line search. Input Parameters: . snes - the SNES context Output Parameter: . outits - number of iterations until termination Application Interface Routine: SNESSolve() Notes: This implements essentially a truncated Newton method with a line search. By default a cubic backtracking line search is employed, as described in the text "Numerical Methods for Unconstrained Optimization and Nonlinear Equations" by Dennis and Schnabel. */ PetscErrorCode SNESSolve_NEWTONLS(SNES snes) { PetscErrorCode ierr; PetscInt maxits,i,lits; SNESLineSearchReason lssucceed; PetscReal fnorm,gnorm,xnorm,ynorm; Vec Y,X,F; SNESLineSearch linesearch; SNESConvergedReason reason; PetscFunctionBegin; if (snes->xl || snes->xu || snes->ops->computevariablebounds) SETERRQ1(PetscObjectComm((PetscObject)snes),PETSC_ERR_ARG_WRONGSTATE, "SNES solver %s does not support bounds", ((PetscObject)snes)->type_name); snes->numFailures = 0; snes->numLinearSolveFailures = 0; snes->reason = SNES_CONVERGED_ITERATING; maxits = snes->max_its; /* maximum number of iterations */ X = snes->vec_sol; /* solution vector */ F = snes->vec_func; /* residual vector */ Y = snes->vec_sol_update; /* newton step */ ierr = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->iter = 0; snes->norm = 0.0; ierr = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr); ierr = SNESGetLineSearch(snes, &linesearch);CHKERRQ(ierr); /* compute the preconditioned function first in the case of left preconditioning with preconditioned function */ if (snes->pc && snes->pcside == PC_LEFT && snes->functype == SNES_FUNCTION_PRECONDITIONED) { ierr = SNESApplyNPC(snes,X,NULL,F);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } ierr = VecNormBegin(F,NORM_2,&fnorm);CHKERRQ(ierr); ierr = VecNormEnd(F,NORM_2,&fnorm);CHKERRQ(ierr); } else { if (!snes->vec_func_init_set) { ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr); } else snes->vec_func_init_set = PETSC_FALSE; } ierr = VecNorm(F,NORM_2,&fnorm);CHKERRQ(ierr); /* fnorm <- ||F|| */ SNESCheckFunctionNorm(snes,fnorm); ierr = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->norm = fnorm; ierr = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr); ierr = SNESLogConvergenceHistory(snes,fnorm,0);CHKERRQ(ierr); ierr = SNESMonitor(snes,0,fnorm);CHKERRQ(ierr); /* test convergence */ ierr = (*snes->ops->converged)(snes,0,0.0,0.0,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) PetscFunctionReturn(0); for (i=0; i<maxits; i++) { /* Call general purpose update function */ if (snes->ops->update) { ierr = (*snes->ops->update)(snes, snes->iter);CHKERRQ(ierr); } /* apply the nonlinear preconditioner */ if (snes->pc) { if (snes->pcside == PC_RIGHT) { ierr = SNESSetInitialFunction(snes->pc, F);CHKERRQ(ierr); ierr = PetscLogEventBegin(SNES_NPCSolve,snes->pc,X,snes->vec_rhs,0);CHKERRQ(ierr); ierr = SNESSolve(snes->pc, snes->vec_rhs, X);CHKERRQ(ierr); ierr = PetscLogEventEnd(SNES_NPCSolve,snes->pc,X,snes->vec_rhs,0);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } ierr = SNESGetNPCFunction(snes,F,&fnorm);CHKERRQ(ierr); } else if (snes->pcside == PC_LEFT && snes->functype == SNES_FUNCTION_UNPRECONDITIONED) { ierr = SNESApplyNPC(snes,X,F,F);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } } } /* Solve J Y = F, where J is Jacobian matrix */ ierr = SNESComputeJacobian(snes,X,snes->jacobian,snes->jacobian_pre);CHKERRQ(ierr); ierr = KSPSetOperators(snes->ksp,snes->jacobian,snes->jacobian_pre);CHKERRQ(ierr); ierr = KSPSolve(snes->ksp,F,Y);CHKERRQ(ierr); SNESCheckKSPSolve(snes); ierr = KSPGetIterationNumber(snes->ksp,&lits);CHKERRQ(ierr); snes->linear_its += lits; ierr = PetscInfo2(snes,"iter=%D, linear solve iterations=%D\n",snes->iter,lits);CHKERRQ(ierr); if (PetscLogPrintInfo) { ierr = SNESNEWTONLSCheckResidual_Private(snes,snes->jacobian,F,Y);CHKERRQ(ierr); } /* Compute a (scaled) negative update in the line search routine: X <- X - lambda*Y and evaluate F = function(X) (depends on the line search). */ gnorm = fnorm; ierr = SNESLineSearchApply(linesearch, X, F, &fnorm, Y);CHKERRQ(ierr); ierr = SNESLineSearchGetReason(linesearch, &lssucceed);CHKERRQ(ierr); ierr = SNESLineSearchGetNorms(linesearch, &xnorm, &fnorm, &ynorm);CHKERRQ(ierr); ierr = PetscInfo4(snes,"fnorm=%18.16e, gnorm=%18.16e, ynorm=%18.16e, lssucceed=%d\n",(double)gnorm,(double)fnorm,(double)ynorm,(int)lssucceed);CHKERRQ(ierr); if (snes->reason == SNES_DIVERGED_FUNCTION_COUNT) break; SNESCheckFunctionNorm(snes,fnorm); if (lssucceed) { if (snes->stol*xnorm > ynorm) { snes->reason = SNES_CONVERGED_SNORM_RELATIVE; PetscFunctionReturn(0); } if (++snes->numFailures >= snes->maxFailures) { PetscBool ismin; snes->reason = SNES_DIVERGED_LINE_SEARCH; ierr = SNESNEWTONLSCheckLocalMin_Private(snes,snes->jacobian,F,fnorm,&ismin);CHKERRQ(ierr); if (ismin) snes->reason = SNES_DIVERGED_LOCAL_MIN; break; } } /* Monitor convergence */ ierr = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->iter = i+1; snes->norm = fnorm; ierr = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr); ierr = SNESLogConvergenceHistory(snes,snes->norm,lits);CHKERRQ(ierr); ierr = SNESMonitor(snes,snes->iter,snes->norm);CHKERRQ(ierr); /* Test for convergence */ ierr = (*snes->ops->converged)(snes,snes->iter,xnorm,ynorm,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) break; } if (i == maxits) { ierr = PetscInfo1(snes,"Maximum number of iterations has been reached: %D\n",maxits);CHKERRQ(ierr); if (!snes->reason) snes->reason = SNES_DIVERGED_MAX_IT; } PetscFunctionReturn(0); }
static PetscErrorCode KSPCGSolve_NASH(KSP ksp) { #if defined(PETSC_USE_COMPLEX) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP, "NASH is not available for complex systems"); #else KSPCG_NASH *cg = (KSPCG_NASH*)ksp->data; PetscErrorCode ierr; Mat Qmat, Mmat; Vec r, z, p, d; PC pc; PetscReal norm_r, norm_d, norm_dp1, norm_p, dMp; PetscReal alpha, beta, kappa, rz, rzm1; PetscReal rr, r2, step; PetscInt max_cg_its; PetscBool diagonalscale; PetscFunctionBegin; /***************************************************************************/ /* Check the arguments and parameters. */ /***************************************************************************/ ierr = PCGetDiagonalScale(ksp->pc, &diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP, "Krylov method %s does not support diagonal scaling", ((PetscObject)ksp)->type_name); if (cg->radius < 0.0) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE, "Input error: radius < 0"); /***************************************************************************/ /* Get the workspace vectors and initialize variables */ /***************************************************************************/ r2 = cg->radius * cg->radius; r = ksp->work[0]; z = ksp->work[1]; p = ksp->work[2]; d = ksp->vec_sol; pc = ksp->pc; ierr = PCGetOperators(pc, &Qmat, &Mmat);CHKERRQ(ierr); ierr = VecGetSize(d, &max_cg_its);CHKERRQ(ierr); max_cg_its = PetscMin(max_cg_its, ksp->max_it); ksp->its = 0; /***************************************************************************/ /* Initialize objective function and direction. */ /***************************************************************************/ cg->o_fcn = 0.0; ierr = VecSet(d, 0.0);CHKERRQ(ierr); /* d = 0 */ cg->norm_d = 0.0; /***************************************************************************/ /* Begin the conjugate gradient method. Check the right-hand side for */ /* numerical problems. The check for not-a-number and infinite values */ /* need be performed only once. */ /***************************************************************************/ ierr = VecCopy(ksp->vec_rhs, r);CHKERRQ(ierr); /* r = -grad */ ierr = VecDot(r, r, &rr);CHKERRQ(ierr); /* rr = r^T r */ KSPCheckDot(ksp,rr); /***************************************************************************/ /* Check the preconditioner for numerical problems and for positive */ /* definiteness. The check for not-a-number and infinite values need be */ /* performed only once. */ /***************************************************************************/ ierr = KSP_PCApply(ksp, r, z);CHKERRQ(ierr); /* z = inv(M) r */ ierr = VecDot(r, z, &rz);CHKERRQ(ierr); /* rz = r^T inv(M) r */ if (PetscIsInfOrNanScalar(rz)) { /*************************************************************************/ /* The preconditioner contains not-a-number or an infinite value. */ /* Return the gradient direction intersected with the trust region. */ /*************************************************************************/ ksp->reason = KSP_DIVERGED_NANORINF; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: bad preconditioner: rz=%g\n", (double)rz);CHKERRQ(ierr); if (cg->radius) { if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } if (rz < 0.0) { /*************************************************************************/ /* The preconditioner is indefinite. Because this is the first */ /* and we do not have a direction yet, we use the gradient step. Note */ /* that we cannot use the preconditioned norm when computing the step */ /* because the matrix is indefinite. */ /*************************************************************************/ ksp->reason = KSP_DIVERGED_INDEFINITE_PC; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: indefinite preconditioner: rz=%g\n", (double)rz);CHKERRQ(ierr); if (cg->radius) { if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } /***************************************************************************/ /* As far as we know, the preconditioner is positive semidefinite. */ /* Compute and log the residual. Check convergence because this */ /* initializes things, but do not terminate until at least one conjugate */ /* gradient iteration has been performed. */ /***************************************************************************/ switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = VecNorm(z, NORM_2, &norm_r);CHKERRQ(ierr); /* norm_r = |z| */ break; case KSP_NORM_UNPRECONDITIONED: norm_r = PetscSqrtReal(rr); /* norm_r = |r| */ break; case KSP_NORM_NATURAL: norm_r = PetscSqrtReal(rz); /* norm_r = |r|_M */ break; default: norm_r = 0.0; break; } ierr = KSPLogResidualHistory(ksp, norm_r);CHKERRQ(ierr); ierr = KSPMonitor(ksp, ksp->its, norm_r);CHKERRQ(ierr); ksp->rnorm = norm_r; ierr = (*ksp->converged)(ksp, ksp->its, norm_r, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); /***************************************************************************/ /* Compute the first direction and update the iteration. */ /***************************************************************************/ ierr = VecCopy(z, p);CHKERRQ(ierr); /* p = z */ ierr = KSP_MatMult(ksp, Qmat, p, z);CHKERRQ(ierr); /* z = Q * p */ ++ksp->its; /***************************************************************************/ /* Check the matrix for numerical problems. */ /***************************************************************************/ ierr = VecDot(p, z, &kappa);CHKERRQ(ierr); /* kappa = p^T Q p */ if (PetscIsInfOrNanScalar(kappa)) { /*************************************************************************/ /* The matrix produced not-a-number or an infinite value. In this case, */ /* we must stop and use the gradient direction. This condition need */ /* only be checked once. */ /*************************************************************************/ ksp->reason = KSP_DIVERGED_NANORINF; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: bad matrix: kappa=%g\n", (double)kappa);CHKERRQ(ierr); if (cg->radius) { if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } /***************************************************************************/ /* Initialize variables for calculating the norm of the direction. */ /***************************************************************************/ dMp = 0.0; norm_d = 0.0; switch (cg->dtype) { case NASH_PRECONDITIONED_DIRECTION: norm_p = rz; break; default: ierr = VecDot(p, p, &norm_p);CHKERRQ(ierr); break; } /***************************************************************************/ /* Check for negative curvature. */ /***************************************************************************/ if (kappa <= 0.0) { /*************************************************************************/ /* In this case, the matrix is indefinite and we have encountered a */ /* direction of negative curvature. Because negative curvature occurs */ /* during the first step, we must follow a direction. */ /*************************************************************************/ ksp->reason = KSP_CONVERGED_CG_NEG_CURVE; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: negative curvature: kappa=%g\n", (double)kappa);CHKERRQ(ierr); if (cg->radius && norm_p > 0.0) { /***********************************************************************/ /* Follow direction of negative curvature to the boundary of the */ /* trust region. */ /***********************************************************************/ step = PetscSqrtReal(r2 / norm_p); cg->norm_d = cg->radius; ierr = VecAXPY(d, step, p);CHKERRQ(ierr); /* d = d + step p */ /***********************************************************************/ /* Update objective function. */ /***********************************************************************/ cg->o_fcn += step * (0.5 * step * kappa - rz); } else if (cg->radius) { /***********************************************************************/ /* The norm of the preconditioned direction is zero; use the gradient */ /* step. */ /***********************************************************************/ if (r2 >= rr) { alpha = 1.0; cg->norm_d = PetscSqrtReal(rr); } else { alpha = PetscSqrtReal(r2 / rr); cg->norm_d = cg->radius; } ierr = VecAXPY(d, alpha, r);CHKERRQ(ierr); /* d = d + alpha r */ /***********************************************************************/ /* Compute objective function. */ /***********************************************************************/ ierr = KSP_MatMult(ksp, Qmat, d, z);CHKERRQ(ierr); ierr = VecAYPX(z, -0.5, ksp->vec_rhs);CHKERRQ(ierr); ierr = VecDot(d, z, &cg->o_fcn);CHKERRQ(ierr); cg->o_fcn = -cg->o_fcn; ++ksp->its; } PetscFunctionReturn(0); } /***************************************************************************/ /* Run the conjugate gradient method until either the problem is solved, */ /* we encounter the boundary of the trust region, or the conjugate */ /* gradient method breaks down. */ /***************************************************************************/ while (1) { /*************************************************************************/ /* Know that kappa is nonzero, because we have not broken down, so we */ /* can compute the steplength. */ /*************************************************************************/ alpha = rz / kappa; /*************************************************************************/ /* Compute the steplength and check for intersection with the trust */ /* region. */ /*************************************************************************/ norm_dp1 = norm_d + alpha*(2.0*dMp + alpha*norm_p); if (cg->radius && norm_dp1 >= r2) { /***********************************************************************/ /* In this case, the matrix is positive definite as far as we know. */ /* However, the full step goes beyond the trust region. */ /***********************************************************************/ ksp->reason = KSP_CONVERGED_CG_CONSTRAINED; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: constrained step: radius=%g\n", (double)cg->radius);CHKERRQ(ierr); if (norm_p > 0.0) { /*********************************************************************/ /* Follow the direction to the boundary of the trust region. */ /*********************************************************************/ step = (PetscSqrtReal(dMp*dMp+norm_p*(r2-norm_d))-dMp)/norm_p; cg->norm_d = cg->radius; ierr = VecAXPY(d, step, p);CHKERRQ(ierr); /* d = d + step p */ /*********************************************************************/ /* Update objective function. */ /*********************************************************************/ cg->o_fcn += step * (0.5 * step * kappa - rz); } else { /*********************************************************************/ /* The norm of the direction is zero; there is nothing to follow. */ /*********************************************************************/ } break; } /*************************************************************************/ /* Now we can update the direction and residual. */ /*************************************************************************/ ierr = VecAXPY(d, alpha, p);CHKERRQ(ierr); /* d = d + alpha p */ ierr = VecAXPY(r, -alpha, z);CHKERRQ(ierr); /* r = r - alpha Q p */ ierr = KSP_PCApply(ksp, r, z);CHKERRQ(ierr); /* z = inv(M) r */ switch (cg->dtype) { case NASH_PRECONDITIONED_DIRECTION: norm_d = norm_dp1; break; default: ierr = VecDot(d, d, &norm_d);CHKERRQ(ierr); break; } cg->norm_d = PetscSqrtReal(norm_d); /*************************************************************************/ /* Update objective function. */ /*************************************************************************/ cg->o_fcn -= 0.5 * alpha * rz; /*************************************************************************/ /* Check that the preconditioner appears positive semidefinite. */ /*************************************************************************/ rzm1 = rz; ierr = VecDot(r, z, &rz);CHKERRQ(ierr); /* rz = r^T z */ if (rz < 0.0) { /***********************************************************************/ /* The preconditioner is indefinite. */ /***********************************************************************/ ksp->reason = KSP_DIVERGED_INDEFINITE_PC; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: cg indefinite preconditioner: rz=%g\n", (double)rz);CHKERRQ(ierr); break; } /*************************************************************************/ /* As far as we know, the preconditioner is positive semidefinite. */ /* Compute the residual and check for convergence. */ /*************************************************************************/ switch (ksp->normtype) { case KSP_NORM_PRECONDITIONED: ierr = VecNorm(z, NORM_2, &norm_r);CHKERRQ(ierr); /* norm_r = |z| */ break; case KSP_NORM_UNPRECONDITIONED: ierr = VecNorm(r, NORM_2, &norm_r);CHKERRQ(ierr); /* norm_r = |r| */ break; case KSP_NORM_NATURAL: norm_r = PetscSqrtReal(rz); /* norm_r = |r|_M */ break; default: norm_r = 0.; break; } ierr = KSPLogResidualHistory(ksp, norm_r);CHKERRQ(ierr); ierr = KSPMonitor(ksp, ksp->its, norm_r);CHKERRQ(ierr); ksp->rnorm = norm_r; ierr = (*ksp->converged)(ksp, ksp->its, norm_r, &ksp->reason, ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { /***********************************************************************/ /* The method has converged. */ /***********************************************************************/ ierr = PetscInfo2(ksp, "KSPCGSolve_NASH: truncated step: rnorm=%g, radius=%g\n", (double)norm_r, (double)cg->radius);CHKERRQ(ierr); break; } /*************************************************************************/ /* We have not converged yet. Check for breakdown. */ /*************************************************************************/ beta = rz / rzm1; if (PetscAbsReal(beta) <= 0.0) { /***********************************************************************/ /* Conjugate gradients has broken down. */ /***********************************************************************/ ksp->reason = KSP_DIVERGED_BREAKDOWN; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: breakdown: beta=%g\n", (double)beta);CHKERRQ(ierr); break; } /*************************************************************************/ /* Check iteration limit. */ /*************************************************************************/ if (ksp->its >= max_cg_its) { ksp->reason = KSP_DIVERGED_ITS; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: iterlim: its=%D\n", ksp->its);CHKERRQ(ierr); break; } /*************************************************************************/ /* Update p and the norms. */ /*************************************************************************/ ierr = VecAYPX(p, beta, z);CHKERRQ(ierr); /* p = z + beta p */ switch (cg->dtype) { case NASH_PRECONDITIONED_DIRECTION: dMp = beta*(dMp + alpha*norm_p); norm_p = beta*(rzm1 + beta*norm_p); break; default: ierr = VecDot(d, p, &dMp);CHKERRQ(ierr); ierr = VecDot(p, p, &norm_p);CHKERRQ(ierr); break; } /*************************************************************************/ /* Compute the new direction and update the iteration. */ /*************************************************************************/ ierr = KSP_MatMult(ksp, Qmat, p, z);CHKERRQ(ierr); /* z = Q * p */ ierr = VecDot(p, z, &kappa);CHKERRQ(ierr); /* kappa = p^T Q p */ ++ksp->its; /*************************************************************************/ /* Check for negative curvature. */ /*************************************************************************/ if (kappa <= 0.0) { /***********************************************************************/ /* In this case, the matrix is indefinite and we have encountered */ /* a direction of negative curvature. Stop at the base. */ /***********************************************************************/ ksp->reason = KSP_CONVERGED_CG_NEG_CURVE; ierr = PetscInfo1(ksp, "KSPCGSolve_NASH: negative curvature: kappa=%g\n", (double)kappa);CHKERRQ(ierr); break; } } PetscFunctionReturn(0); #endif }
/*@C PetscFileRetrieve - Obtains a library from a URL or compressed and copies into local disk space as uncompressed. Collective on MPI_Comm Input Parameter: + comm - processors accessing the library . libname - name of library, including entire URL (with or without .gz) - llen - length of llibname Output Parameter: + llibname - name of local copy of library - found - if found and retrieved the file Level: developer @*/ PetscErrorCode PETSC_DLLEXPORT PetscFileRetrieve(MPI_Comm comm,const char libname[],char llibname[],size_t llen,PetscTruth *found) { char buf[1024],tmpdir[PETSC_MAX_PATH_LEN],urlget[PETSC_MAX_PATH_LEN],*par; const char *pdir; FILE *fp; PetscErrorCode ierr; int i; PetscMPIInt rank; size_t len = 0; PetscTruth flg1,flg2,flg3,sharedtmp,exists; PetscFunctionBegin; *found = PETSC_FALSE; /* if file does not have an ftp:// or http:// or .gz then need not process file */ ierr = PetscStrstr(libname,".gz",&par);CHKERRQ(ierr); if (par) {ierr = PetscStrlen(par,&len);CHKERRQ(ierr);} ierr = PetscStrncmp(libname,"ftp://",6,&flg1);CHKERRQ(ierr); ierr = PetscStrncmp(libname,"http://",7,&flg2);CHKERRQ(ierr); ierr = PetscStrncmp(libname,"file://",7,&flg3);CHKERRQ(ierr); if (!flg1 && !flg2 && !flg3 && (!par || len != 3)) { ierr = PetscStrncpy(llibname,libname,llen);CHKERRQ(ierr); ierr = PetscTestFile(libname,'r',found);CHKERRQ(ierr); if (*found) { ierr = PetscInfo1(PETSC_NULL,"Found file %s\n",libname); } else { ierr = PetscInfo1(PETSC_NULL,"Did not find file %s\n",libname); } PetscFunctionReturn(0); } /* Determine if all processors share a common /tmp */ ierr = PetscSharedTmp(comm,&sharedtmp);CHKERRQ(ierr); ierr = PetscOptionsGetenv(comm,"PETSC_TMP",tmpdir,PETSC_MAX_PATH_LEN,&flg1);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); if (!rank || !sharedtmp) { /* Construct the script to get URL file */ ierr = PetscGetPetscDir(&pdir);CHKERRQ(ierr); ierr = PetscStrcpy(urlget,pdir);CHKERRQ(ierr); ierr = PetscStrcat(urlget,"/bin/urlget");CHKERRQ(ierr); ierr = PetscTestFile(urlget,'r',&exists);CHKERRQ(ierr); if (!exists) { ierr = PetscTestFile("urlget",'r',&exists);CHKERRQ(ierr); if (!exists) { SETERRQ1(PETSC_ERR_PLIB,"Cannot locate PETSc script urlget in %s or current directory",urlget); } ierr = PetscStrcpy(urlget,"urlget");CHKERRQ(ierr); } ierr = PetscStrcat(urlget," ");CHKERRQ(ierr); /* are we using an alternative /tmp? */ if (flg1) { ierr = PetscStrcat(urlget,"-tmp ");CHKERRQ(ierr); ierr = PetscStrcat(urlget,tmpdir);CHKERRQ(ierr); ierr = PetscStrcat(urlget," ");CHKERRQ(ierr); } ierr = PetscStrcat(urlget,libname);CHKERRQ(ierr); ierr = PetscStrcat(urlget," 2>&1 ");CHKERRQ(ierr); #if defined(PETSC_HAVE_POPEN) ierr = PetscPOpen(PETSC_COMM_SELF,PETSC_NULL,urlget,"r",&fp);CHKERRQ(ierr); #else SETERRQ(PETSC_ERR_SUP_SYS,"Cannot run external programs on this machine"); #endif if (!fgets(buf,1024,fp)) { SETERRQ1(PETSC_ERR_PLIB,"No output from ${PETSC_DIR}/bin/urlget in getting file %s",libname); } ierr = PetscInfo1(0,"Message back from urlget: %s\n",buf);CHKERRQ(ierr); ierr = PetscStrncmp(buf,"Error",5,&flg1);CHKERRQ(ierr); ierr = PetscStrncmp(buf,"Traceback",9,&flg2);CHKERRQ(ierr); #if defined(PETSC_HAVE_POPEN) ierr = PetscPClose(PETSC_COMM_SELF,fp);CHKERRQ(ierr); #endif if (flg1 || flg2) { *found = PETSC_FALSE; } else { *found = PETSC_TRUE; /* Check for \n and make it 0 */ for (i=0; i<1024; i++) { if (buf[i] == '\n') { buf[i] = 0; break; } } ierr = PetscStrncpy(llibname,buf,llen);CHKERRQ(ierr); } } if (sharedtmp) { /* send library name to all processors */ ierr = MPI_Bcast(found,1,MPI_INT,0,comm);CHKERRQ(ierr); if (*found) { ierr = MPI_Bcast(llibname,llen,MPI_CHAR,0,comm);CHKERRQ(ierr); ierr = MPI_Bcast(found,1,MPI_INT,0,comm);CHKERRQ(ierr); } } PetscFunctionReturn(0); }
PetscErrorCode SNESSolve_NCG(SNES snes) { SNES_NCG *ncg = (SNES_NCG*)snes->data; Vec X,dX,lX,F,dXold; PetscReal fnorm, ynorm, xnorm, beta = 0.0; PetscScalar dXdotdX, dXolddotdXold, dXdotdXold, lXdotdX, lXdotdXold; PetscInt maxits, i; PetscErrorCode ierr; PetscBool lsSuccess = PETSC_TRUE; SNESLineSearch linesearch; SNESConvergedReason reason; PetscFunctionBegin; snes->reason = SNES_CONVERGED_ITERATING; maxits = snes->max_its; /* maximum number of iterations */ X = snes->vec_sol; /* X^n */ dXold = snes->work[0]; /* The previous iterate of X */ dX = snes->work[1]; /* the preconditioned direction */ lX = snes->vec_sol_update; /* the conjugate direction */ F = snes->vec_func; /* residual vector */ ierr = SNESGetLineSearch(snes, &linesearch);CHKERRQ(ierr); ierr = PetscObjectAMSTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->iter = 0; snes->norm = 0.; ierr = PetscObjectAMSGrantAccess((PetscObject)snes);CHKERRQ(ierr); /* compute the initial function and preconditioned update dX */ if (snes->pc && snes->functype == SNES_FUNCTION_PRECONDITIONED) { ierr = SNESApplyPC(snes,X,NULL,NULL,dX);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } ierr = VecCopy(dX,F);CHKERRQ(ierr); ierr = VecNorm(F,NORM_2,&fnorm);CHKERRQ(ierr); } else { if (!snes->vec_func_init_set) { ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr); if (snes->domainerror) { snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; PetscFunctionReturn(0); } } else snes->vec_func_init_set = PETSC_FALSE; /* convergence test */ ierr = VecNorm(F,NORM_2,&fnorm);CHKERRQ(ierr); if (PetscIsInfOrNanReal(fnorm)) { snes->reason = SNES_DIVERGED_FNORM_NAN; PetscFunctionReturn(0); } ierr = VecCopy(F,dX);CHKERRQ(ierr); } if (snes->pc) { if (snes->functype == SNES_FUNCTION_UNPRECONDITIONED) { ierr = SNESApplyPC(snes,X,F,&fnorm,dX);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } } } ierr = VecCopy(dX,lX);CHKERRQ(ierr); ierr = VecDot(dX, dX, &dXdotdX);CHKERRQ(ierr); ierr = PetscObjectAMSTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->norm = fnorm; ierr = PetscObjectAMSGrantAccess((PetscObject)snes);CHKERRQ(ierr); ierr = SNESLogConvergenceHistory(snes,fnorm,0);CHKERRQ(ierr); ierr = SNESMonitor(snes,0,fnorm);CHKERRQ(ierr); /* test convergence */ ierr = (*snes->ops->converged)(snes,0,0.0,0.0,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) PetscFunctionReturn(0); /* Call general purpose update function */ if (snes->ops->update) { ierr = (*snes->ops->update)(snes, snes->iter);CHKERRQ(ierr); } /* first update -- just use the (preconditioned) residual direction for the initial conjugate direction */ for (i = 1; i < maxits + 1; i++) { lsSuccess = PETSC_TRUE; /* some update types require the old update direction or conjugate direction */ if (ncg->type != SNES_NCG_FR) { ierr = VecCopy(dX, dXold);CHKERRQ(ierr); } ierr = SNESLineSearchApply(linesearch,X,F,&fnorm,lX);CHKERRQ(ierr); ierr = SNESLineSearchGetSuccess(linesearch, &lsSuccess);CHKERRQ(ierr); if (!lsSuccess) { if (++snes->numFailures >= snes->maxFailures) { snes->reason = SNES_DIVERGED_LINE_SEARCH; PetscFunctionReturn(0); } } if (snes->nfuncs >= snes->max_funcs) { snes->reason = SNES_DIVERGED_FUNCTION_COUNT; PetscFunctionReturn(0); } if (snes->domainerror) { snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; PetscFunctionReturn(0); } ierr = SNESLineSearchGetNorms(linesearch, &xnorm, &fnorm, &ynorm);CHKERRQ(ierr); /* Monitor convergence */ ierr = PetscObjectAMSTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->iter = i; snes->norm = fnorm; ierr = PetscObjectAMSGrantAccess((PetscObject)snes);CHKERRQ(ierr); ierr = SNESLogConvergenceHistory(snes,snes->norm,0);CHKERRQ(ierr); ierr = SNESMonitor(snes,snes->iter,snes->norm);CHKERRQ(ierr); /* Test for convergence */ ierr = (*snes->ops->converged)(snes,snes->iter,xnorm,ynorm,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) PetscFunctionReturn(0); /* Call general purpose update function */ if (snes->ops->update) { ierr = (*snes->ops->update)(snes, snes->iter);CHKERRQ(ierr); } if (snes->pc) { if (snes->functype == SNES_FUNCTION_PRECONDITIONED) { ierr = SNESApplyPC(snes,X,NULL,NULL,dX);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } ierr = VecCopy(dX,F);CHKERRQ(ierr); } else { ierr = SNESApplyPC(snes,X,F,&fnorm,dX);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } } } else { ierr = VecCopy(F,dX);CHKERRQ(ierr); } /* compute the conjugate direction lX = dX + beta*lX with beta = ((dX, dX) / (dX_old, dX_old) (Fletcher-Reeves update)*/ switch (ncg->type) { case SNES_NCG_FR: /* Fletcher-Reeves */ dXolddotdXold= dXdotdX; ierr = VecDot(dX, dX, &dXdotdX);CHKERRQ(ierr); beta = PetscRealPart(dXdotdX / dXolddotdXold); break; case SNES_NCG_PRP: /* Polak-Ribiere-Poylak */ dXolddotdXold= dXdotdX; ierr = VecDotBegin(dX, dX, &dXdotdXold);CHKERRQ(ierr); ierr = VecDotBegin(dXold, dX, &dXdotdXold);CHKERRQ(ierr); ierr = VecDotEnd(dX, dX, &dXdotdX);CHKERRQ(ierr); ierr = VecDotEnd(dXold, dX, &dXdotdXold);CHKERRQ(ierr); beta = PetscRealPart(((dXdotdX - dXdotdXold) / dXolddotdXold)); if (beta < 0.0) beta = 0.0; /* restart */ break; case SNES_NCG_HS: /* Hestenes-Stiefel */ ierr = VecDotBegin(dX, dX, &dXdotdX);CHKERRQ(ierr); ierr = VecDotBegin(dX, dXold, &dXdotdXold);CHKERRQ(ierr); ierr = VecDotBegin(lX, dX, &lXdotdX);CHKERRQ(ierr); ierr = VecDotBegin(lX, dXold, &lXdotdXold);CHKERRQ(ierr); ierr = VecDotEnd(dX, dX, &dXdotdX);CHKERRQ(ierr); ierr = VecDotEnd(dX, dXold, &dXdotdXold);CHKERRQ(ierr); ierr = VecDotEnd(lX, dX, &lXdotdX);CHKERRQ(ierr); ierr = VecDotEnd(lX, dXold, &lXdotdXold);CHKERRQ(ierr); beta = PetscRealPart((dXdotdX - dXdotdXold) / (lXdotdX - lXdotdXold)); break; case SNES_NCG_DY: /* Dai-Yuan */ ierr = VecDotBegin(dX, dX, &dXdotdX);CHKERRQ(ierr); ierr = VecDotBegin(lX, dX, &lXdotdX);CHKERRQ(ierr); ierr = VecDotBegin(lX, dXold, &lXdotdXold);CHKERRQ(ierr); ierr = VecDotEnd(dX, dX, &dXdotdX);CHKERRQ(ierr); ierr = VecDotEnd(lX, dX, &lXdotdX);CHKERRQ(ierr); ierr = VecDotEnd(lX, dXold, &lXdotdXold);CHKERRQ(ierr); beta = PetscRealPart(dXdotdX / (lXdotdXold - lXdotdX));CHKERRQ(ierr); break; case SNES_NCG_CD: /* Conjugate Descent */ ierr = VecDotBegin(dX, dX, &dXdotdX);CHKERRQ(ierr); ierr = VecDotBegin(lX, dXold, &lXdotdXold);CHKERRQ(ierr); ierr = VecDotEnd(dX, dX, &dXdotdX);CHKERRQ(ierr); ierr = VecDotEnd(lX, dXold, &lXdotdXold);CHKERRQ(ierr); beta = PetscRealPart(dXdotdX / lXdotdXold);CHKERRQ(ierr); break; } if (ncg->monitor) { ierr = PetscViewerASCIIPrintf(ncg->monitor, "beta = %e\n", beta);CHKERRQ(ierr); } ierr = VecAYPX(lX, beta, dX);CHKERRQ(ierr); } ierr = PetscInfo1(snes, "Maximum number of iterations has been reached: %D\n", maxits);CHKERRQ(ierr); if (!snes->reason) snes->reason = SNES_DIVERGED_MAX_IT; PetscFunctionReturn(0); }
PETSC_INTERN PetscErrorCode PetscLINPACKgefa(MatScalar *a,PetscInt n,PetscInt *ipvt,PetscBool allowzeropivot,PetscBool *zeropivotdetected) { PetscInt i__2,i__3,kp1,nm1,j,k,l,ll,kn,knp1,jn1; MatScalar t,*ax,*ay,*aa; MatReal tmp,max; PetscFunctionBegin; if (zeropivotdetected) *zeropivotdetected = PETSC_FALSE; /* Parameter adjustments */ --ipvt; a -= n + 1; /* Function Body */ nm1 = n - 1; for (k = 1; k <= nm1; ++k) { kp1 = k + 1; kn = k*n; knp1 = k*n + k; /* find l = pivot index */ i__2 = n - k + 1; aa = &a[knp1]; max = PetscAbsScalar(aa[0]); l = 1; for (ll=1; ll<i__2; ll++) { tmp = PetscAbsScalar(aa[ll]); if (tmp > max) { max = tmp; l = ll+1;} } l += k - 1; ipvt[k] = l; if (a[l + kn] == 0.0) { if (allowzeropivot) { PetscErrorCode ierr; ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",k-1);CHKERRQ(ierr); if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",k-1); } /* interchange if necessary */ if (l != k) { t = a[l + kn]; a[l + kn] = a[knp1]; a[knp1] = t; } /* compute multipliers */ t = -1. / a[knp1]; i__2 = n - k; aa = &a[1 + knp1]; for (ll=0; ll<i__2; ll++) aa[ll] *= t; /* row elimination with column indexing */ ax = aa; for (j = kp1; j <= n; ++j) { jn1 = j*n; t = a[l + jn1]; if (l != k) { a[l + jn1] = a[k + jn1]; a[k + jn1] = t; } i__3 = n - k; ay = &a[1+k+jn1]; for (ll=0; ll<i__3; ll++) ay[ll] += t*ax[ll]; } } ipvt[n] = n; if (a[n + n * n] == 0.0) { if (allowzeropivot) { PetscErrorCode ierr; ierr = PetscInfo1(NULL,"Zero pivot, row %D\n",n-1);CHKERRQ(ierr); if (zeropivotdetected) *zeropivotdetected = PETSC_TRUE; } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",n-1); } PetscFunctionReturn(0); }
static PetscErrorCode KSPPIPEFGMRESCycle(PetscInt *itcount,KSP ksp) { KSP_PIPEFGMRES *pipefgmres = (KSP_PIPEFGMRES*)(ksp->data); PetscReal res_norm; PetscReal hapbnd,tt; PetscScalar *hh,*hes,*lhh,shift = pipefgmres->shift; PetscBool hapend = PETSC_FALSE; /* indicates happy breakdown ending */ PetscErrorCode ierr; PetscInt loc_it; /* local count of # of dir. in Krylov space */ PetscInt max_k = pipefgmres->max_k; /* max # of directions Krylov space */ PetscInt i,j,k; Mat Amat,Pmat; Vec Q,W; /* Pipelining vectors */ Vec *redux = pipefgmres->redux; /* workspace for single reduction */ PetscFunctionBegin; if (itcount) *itcount = 0; /* Assign simpler names to these vectors, allocated as pipelining workspace */ Q = VEC_Q; W = VEC_W; /* Allocate memory for orthogonalization work (freed in the GMRES Destroy routine)*/ /* Note that we add an extra value here to allow for a single reduction */ if (!pipefgmres->orthogwork) { ierr = PetscMalloc1(pipefgmres->max_k + 2 ,&pipefgmres->orthogwork);CHKERRQ(ierr); } lhh = pipefgmres->orthogwork; /* Number of pseudo iterations since last restart is the number of prestart directions */ loc_it = 0; /* note: (pipefgmres->it) is always set one less than (loc_it) It is used in KSPBUILDSolution_PIPEFGMRES, where it is passed to KSPPIPEFGMRESBuildSoln. Note that when KSPPIPEFGMRESBuildSoln is called from this function, (loc_it -1) is passed, so the two are equivalent */ pipefgmres->it = (loc_it - 1); /* initial residual is in VEC_VV(0) - compute its norm*/ ierr = VecNorm(VEC_VV(0),NORM_2,&res_norm);CHKERRQ(ierr); /* first entry in right-hand-side of hessenberg system is just the initial residual norm */ *RS(0) = res_norm; ksp->rnorm = res_norm; ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); /* check for the convergence - maybe the current guess is good enough */ ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { if (itcount) *itcount = 0; PetscFunctionReturn(0); } /* scale VEC_VV (the initial residual) */ ierr = VecScale(VEC_VV(0),1.0/res_norm);CHKERRQ(ierr); /* Fill the pipeline */ ierr = KSP_PCApply(ksp,VEC_VV(loc_it),PREVEC(loc_it));CHKERRQ(ierr); ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); ierr = KSP_MatMult(ksp,Amat,PREVEC(loc_it),ZVEC(loc_it));CHKERRQ(ierr); ierr = VecAXPY(ZVEC(loc_it),-shift,VEC_VV(loc_it));CHKERRQ(ierr); /* Note shift */ /* MAIN ITERATION LOOP BEGINNING*/ /* keep iterating until we have converged OR generated the max number of directions OR reached the max number of iterations for the method */ while (!ksp->reason && loc_it < max_k && ksp->its < ksp->max_it) { if (loc_it) { ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); } pipefgmres->it = (loc_it - 1); /* see if more space is needed for work vectors */ if (pipefgmres->vv_allocated <= loc_it + VEC_OFFSET + 1) { ierr = KSPPIPEFGMRESGetNewVectors(ksp,loc_it+1);CHKERRQ(ierr); /* (loc_it+1) is passed in as number of the first vector that should be allocated */ } /* Note that these inner products are with "Z" now, so in particular, lhh[loc_it] is the 'barred' or 'shifted' value, not the value from the equivalent FGMRES run (even in exact arithmetic) That is, the H we need for the Arnoldi relation is different from the coefficients we use in the orthogonalization process,because of the shift */ /* Do some local twiddling to allow for a single reduction */ for(i=0;i<loc_it+1;i++){ redux[i] = VEC_VV(i); } redux[loc_it+1] = ZVEC(loc_it); /* note the extra dot product which ends up in lh[loc_it+1], which computes ||z||^2 */ ierr = VecMDotBegin(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr); /* Start the split reduction (This actually calls the MPI_Iallreduce, otherwise, the reduction is simply delayed until the "end" call)*/ ierr = PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)ZVEC(loc_it)));CHKERRQ(ierr); /* The work to be overlapped with the inner products follows. This is application of the preconditioner and the operator to compute intermediate quantites which will be combined (locally) with the results of the inner products. */ ierr = KSP_PCApply(ksp,ZVEC(loc_it),Q);CHKERRQ(ierr); ierr = PCGetOperators(ksp->pc,&Amat,&Pmat);CHKERRQ(ierr); ierr = KSP_MatMult(ksp,Amat,Q,W);CHKERRQ(ierr); /* Compute inner products of the new direction with previous directions, and the norm of the to-be-orthogonalized direction "Z". This information is enough to build the required entries of H. The inner product with VEC_VV(it_loc) is *different* than in the standard FGMRES and need to be dealt with specially. That is, for standard FGMRES the orthogonalization coefficients are the same as the coefficients used in the Arnoldi relation to reconstruct, but here this is not true (albeit only for the one entry of H which we "unshift" below. */ /* Finish the dot product, retrieving the extra entry */ ierr = VecMDotEnd(ZVEC(loc_it),loc_it+2,redux,lhh);CHKERRQ(ierr); tt = PetscRealPart(lhh[loc_it+1]); /* Hessenberg entries, and entries for (naive) classical Graham-Schmidt Note that the Hessenberg entries require a shift, as these are for the relation AU = VH, which is wrt unshifted basis vectors */ hh = HH(0,loc_it); hes=HES(0,loc_it); for (j=0; j<loc_it; j++) { hh[j] = lhh[j]; hes[j] = lhh[j]; } hh[loc_it] = lhh[loc_it] + shift; hes[loc_it] = lhh[loc_it] + shift; /* we delay applying the shift here */ for (j=0; j<=loc_it; j++) { lhh[j] = -lhh[j]; /* flip sign */ } /* Compute the norm of the un-normalized new direction using the rearranged formula Note that these are shifted ("barred") quantities */ for(k=0;k<=loc_it;k++) tt -= ((PetscReal)(PetscAbsScalar(lhh[k]) * PetscAbsScalar(lhh[k]))); if (tt < 0.0) { /* If we detect square root breakdown in the norm, we must restart the algorithm. Here this means we simply break the current loop and reconstruct the solution using the basis we have computed thus far. Note that by breaking immediately, we do not update the iteration count, so computation done in this iteration should be disregarded. */ ierr = PetscInfo1(ksp,"Restart due to square root breakdown at it = \n",ksp->its);CHKERRQ(ierr); break; } else { tt = PetscSqrtReal(tt); } /* new entry in hessenburg is the 2-norm of our new direction */ hh[loc_it+1] = tt; hes[loc_it+1] = tt; /* The recurred computation for the new direction The division by tt is delayed to the happy breakdown check later Note placement BEFORE the unshift */ ierr = VecCopy(ZVEC(loc_it),VEC_VV(loc_it+1));CHKERRQ(ierr); ierr = VecMAXPY(VEC_VV(loc_it+1),loc_it+1,lhh,&VEC_VV(0));CHKERRQ(ierr); /* (VEC_VV(loc_it+1) is not normalized yet) */ /* The recurred computation for the preconditioned vector (u) */ ierr = VecCopy(Q,PREVEC(loc_it+1));CHKERRQ(ierr); ierr = VecMAXPY(PREVEC(loc_it+1),loc_it+1,lhh,&PREVEC(0));CHKERRQ(ierr); ierr = VecScale(PREVEC(loc_it+1),1.0/tt);CHKERRQ(ierr); /* Unshift an entry in the GS coefficients ("removing the bar") */ lhh[loc_it] -= shift; /* The recurred computation for z (Au) Note placement AFTER the "unshift" */ ierr = VecCopy(W,ZVEC(loc_it+1));CHKERRQ(ierr); ierr = VecMAXPY(ZVEC(loc_it+1),loc_it+1,lhh,&ZVEC(0));CHKERRQ(ierr); ierr = VecScale(ZVEC(loc_it+1),1.0/tt);CHKERRQ(ierr); /* Happy Breakdown Check */ hapbnd = PetscAbsScalar((tt) / *RS(loc_it)); /* RS(loc_it) contains the res_norm from the last iteration */ hapbnd = PetscMin(pipefgmres->haptol,hapbnd); if (tt > hapbnd) { /* scale new direction by its norm */ ierr = VecScale(VEC_VV(loc_it+1),1.0/tt);CHKERRQ(ierr); } else { /* This happens when the solution is exactly reached. */ /* So there is no new direction... */ ierr = VecSet(VEC_TEMP,0.0);CHKERRQ(ierr); /* set VEC_TEMP to 0 */ hapend = PETSC_TRUE; } /* note that for pipefgmres we could get HES(loc_it+1, loc_it) = 0 and the current solution would not be exact if HES was singular. Note that HH non-singular implies that HES is not singular, and HES is guaranteed to be nonsingular when PREVECS are linearly independent and A is nonsingular (in GMRES, the nonsingularity of A implies the nonsingularity of HES). So we should really add a check to verify that HES is nonsingular.*/ /* Note that to be thorough, in debug mode, one could call a LAPACK routine here to check that the hessenberg matrix is indeed non-singular (since FGMRES does not guarantee this) */ /* Now apply rotations to new col of hessenberg (and right side of system), calculate new rotation, and get new residual norm at the same time*/ ierr = KSPPIPEFGMRESUpdateHessenberg(ksp,loc_it,&hapend,&res_norm);CHKERRQ(ierr); if (ksp->reason) break; loc_it++; pipefgmres->it = (loc_it-1); /* Add this here in case it has converged */ ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its++; ksp->rnorm = res_norm; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,ksp->its,res_norm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); /* Catch error in happy breakdown and signal convergence and break from loop */ if (hapend) { if (!ksp->reason) { if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"You reached the happy break down, but convergence was not indicated. Residual norm = %g",(double)res_norm); else { ksp->reason = KSP_DIVERGED_BREAKDOWN; break; } } } } /* END OF ITERATION LOOP */ ierr = KSPLogResidualHistory(ksp,res_norm);CHKERRQ(ierr); /* Monitor if we know that we will not return for a restart */ if (loc_it && (ksp->reason || ksp->its >= ksp->max_it)) { ierr = KSPMonitor(ksp,ksp->its,res_norm);CHKERRQ(ierr); } if (itcount) *itcount = loc_it; /* Down here we have to solve for the "best" coefficients of the Krylov columns, add the solution values together, and possibly unwind the preconditioning from the solution */ /* Form the solution (or the solution so far) */ /* Note: must pass in (loc_it-1) for iteration count so that KSPPIPEGMRESIIBuildSoln properly navigates */ ierr = KSPPIPEFGMRESBuildSoln(RS(0),ksp->vec_sol,ksp->vec_sol,ksp,loc_it-1);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode KSPSolve_QCG(KSP ksp) { /* Correpondence with documentation above: B = g = gradient, X = s = step Note: This is not coded correctly for complex arithmetic! */ KSP_QCG *pcgP = (KSP_QCG*)ksp->data; MatStructure pflag; Mat Amat,Pmat; Vec W,WA,WA2,R,P,ASP,BS,X,B; PetscScalar scal,beta,rntrn,step; PetscReal q1,q2,xnorm,step1,step2,rnrm,btx,xtax; PetscReal ptasp,rtr,wtasp,bstp; PetscReal dzero = 0.0,bsnrm; PetscErrorCode ierr; PetscInt i,maxit; PC pc = ksp->pc; PCSide side; PetscBool diagonalscale; PetscFunctionBegin; ierr = PCGetDiagonalScale(ksp->pc,&diagonalscale);CHKERRQ(ierr); if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name); if (ksp->transpose_solve) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Currently does not support transpose solve"); ksp->its = 0; maxit = ksp->max_it; WA = ksp->work[0]; R = ksp->work[1]; P = ksp->work[2]; ASP = ksp->work[3]; BS = ksp->work[4]; W = ksp->work[5]; WA2 = ksp->work[6]; X = ksp->vec_sol; B = ksp->vec_rhs; if (pcgP->delta <= dzero) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE,"Input error: delta <= 0"); ierr = KSPGetPCSide(ksp,&side);CHKERRQ(ierr); if (side != PC_SYMMETRIC) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_ARG_OUTOFRANGE,"Requires symmetric preconditioner!"); /* Initialize variables */ ierr = VecSet(W,0.0);CHKERRQ(ierr); /* W = 0 */ ierr = VecSet(X,0.0);CHKERRQ(ierr); /* X = 0 */ ierr = PCGetOperators(pc,&Amat,&Pmat,&pflag);CHKERRQ(ierr); /* Compute: BS = D^{-1} B */ ierr = PCApplySymmetricLeft(pc,B,BS);CHKERRQ(ierr); ierr = VecNorm(BS,NORM_2,&bsnrm);CHKERRQ(ierr); ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its = 0; ksp->rnorm = bsnrm; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,bsnrm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,0,bsnrm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,0,bsnrm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) PetscFunctionReturn(0); /* Compute the initial scaled direction and scaled residual */ ierr = VecCopy(BS,R);CHKERRQ(ierr); ierr = VecScale(R,-1.0);CHKERRQ(ierr); ierr = VecCopy(R,P);CHKERRQ(ierr); ierr = VecDotRealPart(R,R,&rtr);CHKERRQ(ierr); for (i=0; i<=maxit; i++) { ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->its++; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); /* Compute: asp = D^{-T}*A*D^{-1}*p */ ierr = PCApplySymmetricRight(pc,P,WA);CHKERRQ(ierr); ierr = MatMult(Amat,WA,WA2);CHKERRQ(ierr); ierr = PCApplySymmetricLeft(pc,WA2,ASP);CHKERRQ(ierr); /* Check for negative curvature */ ierr = VecDotRealPart(P,ASP,&ptasp);CHKERRQ(ierr); if (ptasp <= dzero) { /* Scaled negative curvature direction: Compute a step so that ||w + step*p|| = delta and QS(w + step*p) is least */ if (!i) { ierr = VecCopy(P,X);CHKERRQ(ierr); ierr = VecNorm(X,NORM_2,&xnorm);CHKERRQ(ierr); scal = pcgP->delta / xnorm; ierr = VecScale(X,scal);CHKERRQ(ierr); } else { /* Compute roots of quadratic */ ierr = KSPQCGQuadraticRoots(W,P,pcgP->delta,&step1,&step2);CHKERRQ(ierr); ierr = VecDotRealPart(W,ASP,&wtasp);CHKERRQ(ierr); ierr = VecDotRealPart(BS,P,&bstp);CHKERRQ(ierr); ierr = VecCopy(W,X);CHKERRQ(ierr); q1 = step1*(bstp + wtasp + .5*step1*ptasp); q2 = step2*(bstp + wtasp + .5*step2*ptasp); if (q1 <= q2) { ierr = VecAXPY(X,step1,P);CHKERRQ(ierr); } else { ierr = VecAXPY(X,step2,P);CHKERRQ(ierr); } } pcgP->ltsnrm = pcgP->delta; /* convergence in direction of */ ksp->reason = KSP_CONVERGED_CG_NEG_CURVE; /* negative curvature */ if (!i) { ierr = PetscInfo1(ksp,"negative curvature: delta=%G\n",pcgP->delta);CHKERRQ(ierr); } else { ierr = PetscInfo3(ksp,"negative curvature: step1=%G, step2=%G, delta=%G\n",step1,step2,pcgP->delta);CHKERRQ(ierr); } } else { /* Compute step along p */ step = rtr/ptasp; ierr = VecCopy(W,X);CHKERRQ(ierr); /* x = w */ ierr = VecAXPY(X,step,P);CHKERRQ(ierr); /* x <- step*p + x */ ierr = VecNorm(X,NORM_2,&pcgP->ltsnrm);CHKERRQ(ierr); if (pcgP->ltsnrm > pcgP->delta) { /* Since the trial iterate is outside the trust region, evaluate a constrained step along p so that ||w + step*p|| = delta The positive step is always better in this case. */ if (!i) { scal = pcgP->delta / pcgP->ltsnrm; ierr = VecScale(X,scal);CHKERRQ(ierr); } else { /* Compute roots of quadratic */ ierr = KSPQCGQuadraticRoots(W,P,pcgP->delta,&step1,&step2);CHKERRQ(ierr); ierr = VecCopy(W,X);CHKERRQ(ierr); ierr = VecAXPY(X,step1,P);CHKERRQ(ierr); /* x <- step1*p + x */ } pcgP->ltsnrm = pcgP->delta; ksp->reason = KSP_CONVERGED_CG_CONSTRAINED; /* convergence along constrained step */ if (!i) { ierr = PetscInfo1(ksp,"constrained step: delta=%G\n",pcgP->delta);CHKERRQ(ierr); } else { ierr = PetscInfo3(ksp,"constrained step: step1=%G, step2=%G, delta=%G\n",step1,step2,pcgP->delta);CHKERRQ(ierr); } } else { /* Evaluate the current step */ ierr = VecCopy(X,W);CHKERRQ(ierr); /* update interior iterate */ ierr = VecAXPY(R,-step,ASP);CHKERRQ(ierr); /* r <- -step*asp + r */ ierr = VecNorm(R,NORM_2,&rnrm);CHKERRQ(ierr); ierr = PetscObjectSAWsTakeAccess((PetscObject)ksp);CHKERRQ(ierr); ksp->rnorm = rnrm; ierr = PetscObjectSAWsGrantAccess((PetscObject)ksp);CHKERRQ(ierr); ierr = KSPLogResidualHistory(ksp,rnrm);CHKERRQ(ierr); ierr = KSPMonitor(ksp,i+1,rnrm);CHKERRQ(ierr); ierr = (*ksp->converged)(ksp,i+1,rnrm,&ksp->reason,ksp->cnvP);CHKERRQ(ierr); if (ksp->reason) { /* convergence for */ ierr = PetscInfo3(ksp,"truncated step: step=%G, rnrm=%G, delta=%G\n",PetscRealPart(step),rnrm,pcgP->delta);CHKERRQ(ierr); } } } if (ksp->reason) break; /* Convergence has been attained */ else { /* Compute a new AS-orthogonal direction */ ierr = VecDot(R,R,&rntrn);CHKERRQ(ierr); beta = rntrn/rtr; ierr = VecAYPX(P,beta,R);CHKERRQ(ierr); /* p <- r + beta*p */ rtr = PetscRealPart(rntrn); } } if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS; /* Unscale x */ ierr = VecCopy(X,WA2);CHKERRQ(ierr); ierr = PCApplySymmetricRight(pc,WA2,X);CHKERRQ(ierr); ierr = MatMult(Amat,X,WA);CHKERRQ(ierr); ierr = VecDotRealPart(B,X,&btx);CHKERRQ(ierr); ierr = VecDotRealPart(X,WA,&xtax);CHKERRQ(ierr); pcgP->quadratic = btx + .5*xtax; PetscFunctionReturn(0); }
/*@C PetscInitialize - Initializes the PETSc database and MPI. PetscInitialize() calls MPI_Init() if that has yet to be called, so this routine should always be called near the beginning of your program -- usually the very first line! Collective on MPI_COMM_WORLD or PETSC_COMM_WORLD if it has been set Input Parameters: + argc - count of number of command line arguments . args - the command line arguments . file - [optional] PETSc database file, also checks ~username/.petscrc and .petscrc use NULL to not check for code specific file. Use -skip_petscrc in the code specific file to skip the .petscrc files - help - [optional] Help message to print, use NULL for no message If you wish PETSc code to run ONLY on a subcommunicator of MPI_COMM_WORLD, create that communicator first and assign it to PETSC_COMM_WORLD BEFORE calling PetscInitialize(). Thus if you are running a four process job and two processes will run PETSc and have PetscInitialize() and PetscFinalize() and two process will not, then do this. If ALL processes in the job are using PetscInitialize() and PetscFinalize() then you don't need to do this, even if different subcommunicators of the job are doing different things with PETSc. Options Database Keys: + -start_in_debugger [noxterm,dbx,xdb,gdb,...] - Starts program in debugger . -on_error_attach_debugger [noxterm,dbx,xdb,gdb,...] - Starts debugger when error detected . -on_error_emacs <machinename> causes emacsclient to jump to error file . -on_error_abort calls abort() when error detected (no traceback) . -on_error_mpiabort calls MPI_abort() when error detected . -error_output_stderr prints error messages to stderr instead of the default stdout . -error_output_none does not print the error messages (but handles errors in the same way as if this was not called) . -debugger_nodes [node1,node2,...] - Indicates nodes to start in debugger . -debugger_pause [sleeptime] (in seconds) - Pauses debugger . -stop_for_debugger - Print message on how to attach debugger manually to process and wait (-debugger_pause) seconds for attachment . -malloc - Indicates use of PETSc error-checking malloc (on by default for debug version of libraries) . -malloc no - Indicates not to use error-checking malloc . -malloc_debug - check for memory corruption at EVERY malloc or free . -malloc_test - like -malloc_dump -malloc_debug, but only active for debugging builds . -fp_trap - Stops on floating point exceptions (Note that on the IBM RS6000 this slows code by at least a factor of 10.) . -no_signal_handler - Indicates not to trap error signals . -shared_tmp - indicates /tmp directory is shared by all processors . -not_shared_tmp - each processor has own /tmp . -tmp - alternative name of /tmp directory . -get_total_flops - returns total flops done by all processors . -memory_info - Print memory usage at end of run - -server <port> - start PETSc webserver (default port is 8080) Options Database Keys for Profiling: See the <a href="../../docs/manual.pdf#nameddest=ch_profiling">profiling chapter of the users manual</a> for details. + -info <optional filename> - Prints verbose information to the screen . -info_exclude <null,vec,mat,pc,ksp,snes,ts> - Excludes some of the verbose messages . -log_sync - Log the synchronization in scatters, inner products and norms . -log_trace [filename] - Print traces of all PETSc calls to the screen (useful to determine where a program hangs without running in the debugger). See PetscLogTraceBegin(). . -log_summary [filename] - Prints summary of flop and timing information to screen. If the filename is specified the summary is written to the file. See PetscLogView(). . -log_summary_python [filename] - Prints data on of flop and timing usage to a file or screen. See PetscLogPrintSViewPython(). . -log_all [filename] - Logs extensive profiling information See PetscLogDump(). . -log [filename] - Logs basic profiline information See PetscLogDump(). - -log_mpe [filename] - Creates a logfile viewable by the utility Jumpshot (in MPICH distribution) Only one of -log_trace, -log_summary, -log_all, -log, or -log_mpe may be used at a time Environmental Variables: + PETSC_TMP - alternative tmp directory . PETSC_SHARED_TMP - tmp is shared by all processes . PETSC_NOT_SHARED_TMP - each process has its own private tmp . PETSC_VIEWER_SOCKET_PORT - socket number to use for socket viewer - PETSC_VIEWER_SOCKET_MACHINE - machine to use for socket viewer to connect to Level: beginner Notes: If for some reason you must call MPI_Init() separately, call it before PetscInitialize(). Fortran Version: In Fortran this routine has the format $ call PetscInitialize(file,ierr) + ierr - error return code - file - [optional] PETSc database file, also checks ~username/.petscrc and .petscrc use NULL_CHARACTER to not check for code specific file. Use -skip_petscrc in the code specific file to skip the .petscrc files Important Fortran Note: In Fortran, you MUST use NULL_CHARACTER to indicate a null character string; you CANNOT just use NULL as in the C version. See the <a href="../../docs/manual.pdf">users manual</a> for details. If your main program is C but you call Fortran code that also uses PETSc you need to call PetscInitializeFortran() soon after calling PetscInitialize(). Concepts: initializing PETSc .seealso: PetscFinalize(), PetscInitializeFortran(), PetscGetArgs(), PetscInitializeNoArguments() @*/ PetscErrorCode PetscInitialize(int *argc,char ***args,const char file[],const char help[]) { PetscErrorCode ierr; PetscMPIInt flag, size; PetscInt nodesize; PetscBool flg; char hostname[256]; PetscFunctionBegin; if (PetscInitializeCalled) PetscFunctionReturn(0); /* these must be initialized in a routine, not as a constant declaration*/ PETSC_STDOUT = stdout; PETSC_STDERR = stderr; ierr = PetscOptionsCreate();CHKERRQ(ierr); /* We initialize the program name here (before MPI_Init()) because MPICH has a bug in it that it sets args[0] on all processors to be args[0] on the first processor. */ if (argc && *argc) { ierr = PetscSetProgramName(**args);CHKERRQ(ierr); } else { ierr = PetscSetProgramName("Unknown Name");CHKERRQ(ierr); } ierr = MPI_Initialized(&flag);CHKERRQ(ierr); if (!flag) { if (PETSC_COMM_WORLD != MPI_COMM_NULL) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"You cannot set PETSC_COMM_WORLD if you have not initialized MPI first"); #if defined(PETSC_HAVE_MPI_INIT_THREAD) { PetscMPIInt provided; ierr = MPI_Init_thread(argc,args,MPI_THREAD_FUNNELED,&provided);CHKERRQ(ierr); } #else ierr = MPI_Init(argc,args);CHKERRQ(ierr); #endif PetscBeganMPI = PETSC_TRUE; } if (argc && args) { PetscGlobalArgc = *argc; PetscGlobalArgs = *args; } PetscFinalizeCalled = PETSC_FALSE; if (PETSC_COMM_WORLD == MPI_COMM_NULL) PETSC_COMM_WORLD = MPI_COMM_WORLD; ierr = MPI_Comm_set_errhandler(PETSC_COMM_WORLD,MPI_ERRORS_RETURN);CHKERRQ(ierr); /* Done after init due to a bug in MPICH-GM? */ ierr = PetscErrorPrintfInitialize();CHKERRQ(ierr); ierr = MPI_Comm_rank(MPI_COMM_WORLD,&PetscGlobalRank);CHKERRQ(ierr); ierr = MPI_Comm_size(MPI_COMM_WORLD,&PetscGlobalSize);CHKERRQ(ierr); MPIU_BOOL = MPI_INT; MPIU_ENUM = MPI_INT; /* Initialized the global complex variable; this is because with shared libraries the constructors for global variables are not called; at least on IRIX. */ #if defined(PETSC_HAVE_COMPLEX) { #if defined(PETSC_CLANGUAGE_CXX) PetscComplex ic(0.0,1.0); PETSC_i = ic; #elif defined(PETSC_CLANGUAGE_C) PETSC_i = _Complex_I; #endif } #if !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX) ierr = MPI_Type_contiguous(2,MPI_DOUBLE,&MPIU_C_DOUBLE_COMPLEX);CHKERRQ(ierr); ierr = MPI_Type_commit(&MPIU_C_DOUBLE_COMPLEX);CHKERRQ(ierr); ierr = MPI_Type_contiguous(2,MPI_FLOAT,&MPIU_C_COMPLEX);CHKERRQ(ierr); ierr = MPI_Type_commit(&MPIU_C_COMPLEX);CHKERRQ(ierr); #endif #endif /* PETSC_HAVE_COMPLEX */ /* Create the PETSc MPI reduction operator that sums of the first half of the entries and maxes the second half. */ ierr = MPI_Op_create(PetscMaxSum_Local,1,&PetscMaxSum_Op);CHKERRQ(ierr); #if defined(PETSC_USE_REAL___FLOAT128) ierr = MPI_Type_contiguous(2,MPI_DOUBLE,&MPIU___FLOAT128);CHKERRQ(ierr); ierr = MPI_Type_commit(&MPIU___FLOAT128);CHKERRQ(ierr); #if defined(PETSC_HAVE_COMPLEX) ierr = MPI_Type_contiguous(4,MPI_DOUBLE,&MPIU___COMPLEX128);CHKERRQ(ierr); ierr = MPI_Type_commit(&MPIU___COMPLEX128);CHKERRQ(ierr); #endif ierr = MPI_Op_create(PetscMax_Local,1,&MPIU_MAX);CHKERRQ(ierr); ierr = MPI_Op_create(PetscMin_Local,1,&MPIU_MIN);CHKERRQ(ierr); #endif #if (defined(PETSC_HAVE_COMPLEX) && !defined(PETSC_HAVE_MPI_C_DOUBLE_COMPLEX)) || defined(PETSC_USE_REAL___FLOAT128) ierr = MPI_Op_create(PetscSum_Local,1,&MPIU_SUM);CHKERRQ(ierr); #endif ierr = MPI_Type_contiguous(2,MPIU_SCALAR,&MPIU_2SCALAR);CHKERRQ(ierr); ierr = MPI_Type_commit(&MPIU_2SCALAR);CHKERRQ(ierr); ierr = MPI_Op_create(PetscADMax_Local,1,&PetscADMax_Op);CHKERRQ(ierr); ierr = MPI_Op_create(PetscADMin_Local,1,&PetscADMin_Op);CHKERRQ(ierr); #if defined(PETSC_USE_64BIT_INDICES) || !defined(MPI_2INT) ierr = MPI_Type_contiguous(2,MPIU_INT,&MPIU_2INT);CHKERRQ(ierr); ierr = MPI_Type_commit(&MPIU_2INT);CHKERRQ(ierr); #endif /* Attributes to be set on PETSc communicators */ ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelCounter,&Petsc_Counter_keyval,(void*)0);CHKERRQ(ierr); ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelComm,&Petsc_InnerComm_keyval,(void*)0);CHKERRQ(ierr); ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelComm,&Petsc_OuterComm_keyval,(void*)0);CHKERRQ(ierr); /* Build the options database */ ierr = PetscOptionsInsert(argc,args,file);CHKERRQ(ierr); /* Print main application help message */ ierr = PetscOptionsHasName(NULL,"-help",&flg);CHKERRQ(ierr); if (help && flg) { ierr = PetscPrintf(PETSC_COMM_WORLD,help);CHKERRQ(ierr); } ierr = PetscOptionsCheckInitial_Private();CHKERRQ(ierr); /* SHOULD PUT IN GUARDS: Make sure logging is initialized, even if we do not print it out */ #if defined(PETSC_USE_LOG) ierr = PetscLogBegin_Private();CHKERRQ(ierr); #endif /* Load the dynamic libraries (on machines that support them), this registers all the solvers etc. (On non-dynamic machines this initializes the PetscDraw and PetscViewer classes) */ ierr = PetscInitialize_DynamicLibraries();CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr); ierr = PetscInfo1(0,"PETSc successfully started: number of processors = %d\n",size);CHKERRQ(ierr); ierr = PetscGetHostName(hostname,256);CHKERRQ(ierr); ierr = PetscInfo1(0,"Running on machine: %s\n",hostname);CHKERRQ(ierr); ierr = PetscOptionsCheckInitial_Components();CHKERRQ(ierr); /* Check the options database for options related to the options database itself */ ierr = PetscOptionsSetFromOptions();CHKERRQ(ierr); #if defined(PETSC_USE_PETSC_MPI_EXTERNAL32) /* Tell MPI about our own data representation converter, this would/should be used if extern32 is not supported by the MPI Currently not used because it is not supported by MPICH. */ #if !defined(PETSC_WORDS_BIGENDIAN) ierr = MPI_Register_datarep((char*)"petsc",PetscDataRep_read_conv_fn,PetscDataRep_write_conv_fn,PetscDataRep_extent_fn,NULL);CHKERRQ(ierr); #endif #endif ierr = PetscOptionsGetInt(NULL,"-hmpi_spawn_size",&nodesize,&flg);CHKERRQ(ierr); if (flg) { #if defined(PETSC_HAVE_MPI_COMM_SPAWN) ierr = PetscHMPISpawn((PetscMPIInt) nodesize);CHKERRQ(ierr); /* worker nodes never return from here; they go directly to PetscEnd() */ #else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PETSc built without MPI 2 (MPI_Comm_spawn) support, use -hmpi_merge_size instead"); #endif } else { ierr = PetscOptionsGetInt(NULL,"-hmpi_merge_size",&nodesize,&flg);CHKERRQ(ierr); if (flg) { ierr = PetscHMPIMerge((PetscMPIInt) nodesize,NULL,NULL);CHKERRQ(ierr); if (PetscHMPIWorker) { /* if worker then never enter user code */ PetscInitializeCalled = PETSC_TRUE; PetscEnd(); } } } #if defined(PETSC_HAVE_CUDA) { PetscMPIInt p; for (p = 0; p < PetscGlobalSize; ++p) { if (p == PetscGlobalRank) cublasInit(); ierr = MPI_Barrier(PETSC_COMM_WORLD);CHKERRQ(ierr); } } #endif ierr = PetscOptionsHasName(NULL,"-python",&flg);CHKERRQ(ierr); if (flg) { PetscInitializeCalled = PETSC_TRUE; ierr = PetscPythonInitialize(NULL,NULL);CHKERRQ(ierr); } ierr = PetscThreadCommInitializePackage();CHKERRQ(ierr); /* Setup building of stack frames for all function calls */ #if defined(PETSC_USE_DEBUG) PetscThreadLocalRegister((PetscThreadKey*)&petscstack); /* Creates petscstack_key if needed */ ierr = PetscStackCreate();CHKERRQ(ierr); #endif #if defined(PETSC_SERIALIZE_FUNCTIONS) ierr = PetscFPTCreate(10000);CHKERRQ(ierr); #endif /* Once we are completedly initialized then we can set this variables */ PetscInitializeCalled = PETSC_TRUE; PetscFunctionReturn(0); }
PetscErrorCode PCGAMGProlongator_GEO(PC pc,Mat Amat,Mat Gmat,PetscCoarsenData *agg_lists,Mat *a_P_out) { PC_MG *mg = (PC_MG*)pc->data; PC_GAMG *pc_gamg = (PC_GAMG*)mg->innerctx; const PetscInt dim = pc_gamg->data_cell_cols, data_cols = pc_gamg->data_cell_cols; PetscErrorCode ierr; PetscInt Istart,Iend,nloc,my0,jj,kk,ncols,nLocalSelected,bs,*clid_flid; Mat Prol; PetscMPIInt rank, size; MPI_Comm comm; IS selected_2,selected_1; const PetscInt *selected_idx; MatType mtype; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); ierr = PetscLogEventBegin(PC_GAMGProlongator_GEO,0,0,0,0);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MatGetOwnershipRange(Amat, &Istart, &Iend);CHKERRQ(ierr); ierr = MatGetBlockSize(Amat, &bs);CHKERRQ(ierr); nloc = (Iend-Istart)/bs; my0 = Istart/bs; if ((Iend-Istart) % bs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"(Iend %D - Istart %D) % bs %D",Iend,Istart,bs); /* get 'nLocalSelected' */ ierr = PetscCDGetMIS(agg_lists, &selected_1);CHKERRQ(ierr); ierr = ISGetSize(selected_1, &jj);CHKERRQ(ierr); ierr = PetscMalloc1(jj, &clid_flid);CHKERRQ(ierr); ierr = ISGetIndices(selected_1, &selected_idx);CHKERRQ(ierr); for (kk=0,nLocalSelected=0; kk<jj; kk++) { PetscInt lid = selected_idx[kk]; if (lid<nloc) { ierr = MatGetRow(Gmat,lid+my0,&ncols,0,0);CHKERRQ(ierr); if (ncols>1) clid_flid[nLocalSelected++] = lid; /* fiter out singletons */ ierr = MatRestoreRow(Gmat,lid+my0,&ncols,0,0);CHKERRQ(ierr); } } ierr = ISRestoreIndices(selected_1, &selected_idx);CHKERRQ(ierr); ierr = ISDestroy(&selected_1);CHKERRQ(ierr); /* this is selected_1 in serial */ /* create prolongator matrix */ ierr = MatGetType(Amat,&mtype);CHKERRQ(ierr); ierr = MatCreate(comm, &Prol);CHKERRQ(ierr); ierr = MatSetSizes(Prol,nloc*bs,nLocalSelected*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); ierr = MatSetBlockSizes(Prol, bs, bs);CHKERRQ(ierr); ierr = MatSetType(Prol, mtype);CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(Prol,3*data_cols,NULL);CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(Prol,3*data_cols,NULL,3*data_cols,NULL);CHKERRQ(ierr); /* can get all points "removed" - but not on geomg */ ierr = MatGetSize(Prol, &kk, &jj);CHKERRQ(ierr); if (!jj) { ierr = PetscInfo(pc,"ERROE: no selected points on coarse grid\n");CHKERRQ(ierr); ierr = PetscFree(clid_flid);CHKERRQ(ierr); ierr = MatDestroy(&Prol);CHKERRQ(ierr); *a_P_out = NULL; /* out */ PetscFunctionReturn(0); } { PetscReal *coords; PetscInt data_stride; PetscInt *crsGID = NULL; Mat Gmat2; if (dim != data_cols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"dim %D != data_cols %D",dim,data_cols); /* grow ghost data for better coarse grid cover of fine grid */ #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET5],0,0,0,0);CHKERRQ(ierr); #endif /* messy method, squares graph and gets some data */ ierr = getGIDsOnSquareGraph(nLocalSelected, clid_flid, Gmat, &selected_2, &Gmat2, &crsGID);CHKERRQ(ierr); /* llist is now not valid wrt squared graph, but will work as iterator in 'triangulateAndFormProl' */ #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET5],0,0,0,0);CHKERRQ(ierr); #endif /* create global vector of coorindates in 'coords' */ if (size > 1) { ierr = PCGAMGGetDataWithGhosts(Gmat2, dim, pc_gamg->data, &data_stride, &coords);CHKERRQ(ierr); } else { coords = (PetscReal*)pc_gamg->data; data_stride = pc_gamg->data_sz/pc_gamg->data_cell_cols; } ierr = MatDestroy(&Gmat2);CHKERRQ(ierr); /* triangulate */ if (dim == 2) { PetscReal metric,tm; #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventBegin(petsc_gamg_setup_events[SET6],0,0,0,0);CHKERRQ(ierr); #endif ierr = triangulateAndFormProl(selected_2, data_stride, coords,nLocalSelected, clid_flid, agg_lists, crsGID, bs, Prol, &metric);CHKERRQ(ierr); #if defined PETSC_GAMG_USE_LOG ierr = PetscLogEventEnd(petsc_gamg_setup_events[SET6],0,0,0,0);CHKERRQ(ierr); #endif ierr = PetscFree(crsGID);CHKERRQ(ierr); /* clean up and create coordinates for coarse grid (output) */ if (size > 1) ierr = PetscFree(coords);CHKERRQ(ierr); ierr = MPI_Allreduce(&metric, &tm, 1, MPIU_REAL, MPIU_MAX, comm);CHKERRQ(ierr); if (tm > 1.) { /* needs to be globalized - should not happen */ ierr = PetscInfo1(pc," failed metric for coarse grid %e\n",(double)tm);CHKERRQ(ierr); ierr = MatDestroy(&Prol);CHKERRQ(ierr); } else if (metric > .0) { ierr = PetscInfo1(pc,"worst metric for coarse grid = %e\n",(double)metric);CHKERRQ(ierr); } } else SETERRQ(comm,PETSC_ERR_PLIB,"3D not implemented for 'geo' AMG"); { /* create next coords - output */ PetscReal *crs_crds; ierr = PetscMalloc1(dim*nLocalSelected, &crs_crds);CHKERRQ(ierr); for (kk=0; kk<nLocalSelected; kk++) { /* grab local select nodes to promote - output */ PetscInt lid = clid_flid[kk]; for (jj=0; jj<dim; jj++) crs_crds[jj*nLocalSelected + kk] = pc_gamg->data[jj*nloc + lid]; } ierr = PetscFree(pc_gamg->data);CHKERRQ(ierr); pc_gamg->data = crs_crds; /* out */ pc_gamg->data_sz = dim*nLocalSelected; } ierr = ISDestroy(&selected_2);CHKERRQ(ierr); } *a_P_out = Prol; /* out */ ierr = PetscFree(clid_flid);CHKERRQ(ierr); ierr = PetscLogEventEnd(PC_GAMGProlongator_GEO,0,0,0,0);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode SNESSolve_QN(SNES snes) { PetscErrorCode ierr; SNES_QN *qn = (SNES_QN*) snes->data; Vec X,Xold; Vec F,W; Vec Y,D,Dold; PetscInt i, i_r; PetscReal fnorm,xnorm,ynorm,gnorm; SNESLineSearchReason lssucceed; PetscBool powell,periodic; PetscScalar DolddotD,DolddotDold; SNESConvergedReason reason; /* basically just a regular newton's method except for the application of the Jacobian */ PetscFunctionBegin; if (snes->xl || snes->xu || snes->ops->computevariablebounds) SETERRQ1(PetscObjectComm((PetscObject)snes),PETSC_ERR_ARG_WRONGSTATE, "SNES solver %s does not support bounds", ((PetscObject)snes)->type_name); ierr = PetscCitationsRegister(SNESCitation,&SNEScite);CHKERRQ(ierr); F = snes->vec_func; /* residual vector */ Y = snes->vec_sol_update; /* search direction generated by J^-1D*/ W = snes->work[3]; X = snes->vec_sol; /* solution vector */ Xold = snes->work[0]; /* directions generated by the preconditioned problem with F_pre = F or x - M(x, b) */ D = snes->work[1]; Dold = snes->work[2]; snes->reason = SNES_CONVERGED_ITERATING; ierr = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->iter = 0; snes->norm = 0.; ierr = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr); if (snes->pc && snes->pcside == PC_LEFT && snes->functype == SNES_FUNCTION_PRECONDITIONED) { ierr = SNESApplyNPC(snes,X,NULL,F);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } ierr = VecNorm(F,NORM_2,&fnorm);CHKERRQ(ierr); } else { if (!snes->vec_func_init_set) { ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr); } else snes->vec_func_init_set = PETSC_FALSE; ierr = VecNorm(F,NORM_2,&fnorm);CHKERRQ(ierr); SNESCheckFunctionNorm(snes,fnorm); } if (snes->pc && snes->pcside == PC_LEFT && snes->functype == SNES_FUNCTION_UNPRECONDITIONED) { ierr = SNESApplyNPC(snes,X,F,D);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } } else { ierr = VecCopy(F,D);CHKERRQ(ierr); } ierr = PetscObjectSAWsTakeAccess((PetscObject)snes);CHKERRQ(ierr); snes->norm = fnorm; ierr = PetscObjectSAWsGrantAccess((PetscObject)snes);CHKERRQ(ierr); ierr = SNESLogConvergenceHistory(snes,fnorm,0);CHKERRQ(ierr); ierr = SNESMonitor(snes,0,fnorm);CHKERRQ(ierr); /* test convergence */ ierr = (*snes->ops->converged)(snes,0,0.0,0.0,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) PetscFunctionReturn(0); if (snes->pc && snes->pcside == PC_RIGHT) { ierr = PetscLogEventBegin(SNES_NPCSolve,snes->pc,X,0,0);CHKERRQ(ierr); ierr = SNESSolve(snes->pc,snes->vec_rhs,X);CHKERRQ(ierr); ierr = PetscLogEventEnd(SNES_NPCSolve,snes->pc,X,0,0);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } ierr = SNESGetNPCFunction(snes,F,&fnorm);CHKERRQ(ierr); ierr = VecCopy(F,D);CHKERRQ(ierr); } /* scale the initial update */ if (qn->scale_type == SNES_QN_SCALE_JACOBIAN) { ierr = SNESComputeJacobian(snes,X,snes->jacobian,snes->jacobian_pre);CHKERRQ(ierr); ierr = KSPSetOperators(snes->ksp,snes->jacobian,snes->jacobian_pre);CHKERRQ(ierr); } for (i = 0, i_r = 0; i < snes->max_its; i++, i_r++) { if (qn->scale_type == SNES_QN_SCALE_SHANNO && i_r > 0) { PetscScalar ff,xf; ierr = VecCopy(Dold,Y);CHKERRQ(ierr); ierr = VecCopy(Xold,W);CHKERRQ(ierr); ierr = VecAXPY(Y,-1.0,D);CHKERRQ(ierr); ierr = VecAXPY(W,-1.0,X);CHKERRQ(ierr); ierr = VecDotBegin(Y,Y,&ff);CHKERRQ(ierr); ierr = VecDotBegin(W,Y,&xf);CHKERRQ(ierr); ierr = VecDotEnd(Y,Y,&ff);CHKERRQ(ierr); ierr = VecDotEnd(W,Y,&xf);CHKERRQ(ierr); qn->scaling = PetscRealPart(xf)/PetscRealPart(ff); if (qn->monitor) { ierr = PetscViewerASCIIAddTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); ierr = PetscViewerASCIIPrintf(qn->monitor, "Shanno scaling %D %g\n", i,(double)qn->scaling);CHKERRQ(ierr); ierr = PetscViewerASCIISubtractTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); } } switch (qn->type) { case SNES_QN_BADBROYDEN: ierr = SNESQNApply_BadBroyden(snes,i_r,Y,X,Xold,D,Dold);CHKERRQ(ierr); break; case SNES_QN_BROYDEN: ierr = SNESQNApply_Broyden(snes,i_r,Y,X,Xold,D);CHKERRQ(ierr); break; case SNES_QN_LBFGS: SNESQNApply_LBFGS(snes,i_r,Y,X,Xold,D,Dold);CHKERRQ(ierr); break; } /* line search for lambda */ ynorm = 1; gnorm = fnorm; ierr = VecCopy(D, Dold);CHKERRQ(ierr); ierr = VecCopy(X, Xold);CHKERRQ(ierr); ierr = SNESLineSearchApply(snes->linesearch, X, F, &fnorm, Y);CHKERRQ(ierr); if (snes->reason == SNES_DIVERGED_FUNCTION_COUNT) break; ierr = SNESLineSearchGetReason(snes->linesearch, &lssucceed);CHKERRQ(ierr); ierr = SNESLineSearchGetNorms(snes->linesearch, &xnorm, &fnorm, &ynorm);CHKERRQ(ierr); if (lssucceed) { if (++snes->numFailures >= snes->maxFailures) { snes->reason = SNES_DIVERGED_LINE_SEARCH; break; } } if (qn->scale_type == SNES_QN_SCALE_LINESEARCH) { ierr = SNESLineSearchGetLambda(snes->linesearch, &qn->scaling);CHKERRQ(ierr); } /* convergence monitoring */ ierr = PetscInfo4(snes,"fnorm=%18.16e, gnorm=%18.16e, ynorm=%18.16e, lssucceed=%d\n",(double)fnorm,(double)gnorm,(double)ynorm,(int)lssucceed);CHKERRQ(ierr); if (snes->pc && snes->pcside == PC_RIGHT) { ierr = PetscLogEventBegin(SNES_NPCSolve,snes->pc,X,0,0);CHKERRQ(ierr); ierr = SNESSolve(snes->pc,snes->vec_rhs,X);CHKERRQ(ierr); ierr = PetscLogEventEnd(SNES_NPCSolve,snes->pc,X,0,0);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } ierr = SNESGetNPCFunction(snes,F,&fnorm);CHKERRQ(ierr); } ierr = SNESSetIterationNumber(snes, i+1);CHKERRQ(ierr); snes->norm = fnorm; ierr = SNESLogConvergenceHistory(snes,snes->norm,snes->iter);CHKERRQ(ierr); ierr = SNESMonitor(snes,snes->iter,snes->norm);CHKERRQ(ierr); /* set parameter for default relative tolerance convergence test */ ierr = (*snes->ops->converged)(snes,snes->iter,xnorm,ynorm,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) PetscFunctionReturn(0); if (snes->pc && snes->pcside == PC_LEFT && snes->functype == SNES_FUNCTION_UNPRECONDITIONED) { ierr = SNESApplyNPC(snes,X,F,D);CHKERRQ(ierr); ierr = SNESGetConvergedReason(snes->pc,&reason);CHKERRQ(ierr); if (reason < 0 && reason != SNES_DIVERGED_MAX_IT) { snes->reason = SNES_DIVERGED_INNER; PetscFunctionReturn(0); } } else { ierr = VecCopy(F, D);CHKERRQ(ierr); } powell = PETSC_FALSE; if (qn->restart_type == SNES_QN_RESTART_POWELL && i_r > 1) { /* check restart by Powell's Criterion: |F^T H_0 Fold| > powell_gamma * |Fold^T H_0 Fold| */ if (qn->scale_type == SNES_QN_SCALE_JACOBIAN) { ierr = MatMult(snes->jacobian_pre,Dold,W);CHKERRQ(ierr); } else { ierr = VecCopy(Dold,W);CHKERRQ(ierr); } ierr = VecDotBegin(W, Dold, &DolddotDold);CHKERRQ(ierr); ierr = VecDotBegin(W, D, &DolddotD);CHKERRQ(ierr); ierr = VecDotEnd(W, Dold, &DolddotDold);CHKERRQ(ierr); ierr = VecDotEnd(W, D, &DolddotD);CHKERRQ(ierr); if (PetscAbs(PetscRealPart(DolddotD)) > qn->powell_gamma*PetscAbs(PetscRealPart(DolddotDold))) powell = PETSC_TRUE; } periodic = PETSC_FALSE; if (qn->restart_type == SNES_QN_RESTART_PERIODIC) { if (i_r>qn->m-1) periodic = PETSC_TRUE; } /* restart if either powell or periodic restart is satisfied. */ if (powell || periodic) { if (qn->monitor) { ierr = PetscViewerASCIIAddTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); if (powell) { ierr = PetscViewerASCIIPrintf(qn->monitor, "Powell restart! |%14.12e| > %6.4f*|%14.12e| i_r = %D\n", (double)PetscRealPart(DolddotD), (double)qn->powell_gamma, (double)PetscRealPart(DolddotDold),i_r);CHKERRQ(ierr); } else { ierr = PetscViewerASCIIPrintf(qn->monitor, "Periodic restart! i_r = %D\n", i_r);CHKERRQ(ierr); } ierr = PetscViewerASCIISubtractTab(qn->monitor,((PetscObject)snes)->tablevel+2);CHKERRQ(ierr); } i_r = -1; /* general purpose update */ if (snes->ops->update) { ierr = (*snes->ops->update)(snes, snes->iter);CHKERRQ(ierr); } if (qn->scale_type == SNES_QN_SCALE_JACOBIAN) { ierr = SNESComputeJacobian(snes,X,snes->jacobian,snes->jacobian_pre);CHKERRQ(ierr); } } /* general purpose update */ if (snes->ops->update) { ierr = (*snes->ops->update)(snes, snes->iter);CHKERRQ(ierr); } } if (i == snes->max_its) { ierr = PetscInfo1(snes, "Maximum number of iterations has been reached: %D\n", snes->max_its);CHKERRQ(ierr); if (!snes->reason) snes->reason = SNES_DIVERGED_MAX_IT; } PetscFunctionReturn(0); }
/*@C PetscSharedWorkingDirectory - Determines if all processors in a communicator share a working directory or have different ones. Collective on MPI_Comm Input Parameters: . comm - MPI_Communicator that may share working directory Output Parameters: . shared - PETSC_TRUE or PETSC_FALSE Options Database Keys: + -shared_working_directory . -not_shared_working_directory Environmental Variables: + PETSC_SHARED_WORKING_DIRECTORY . PETSC_NOT_SHARED_WORKING_DIRECTORY Level: developer Notes: Stores the status as a MPI attribute so it does not have to be redetermined each time. Assumes that all processors in a communicator either 1) have a common working directory or 2) each has a separate working directory eventually we can write a fancier one that determines which processors share a common working directory. This will be very slow on runs with a large number of processors since it requires O(p*p) file opens. @*/ PetscErrorCode PETSC_DLLEXPORT PetscSharedWorkingDirectory(MPI_Comm comm,PetscTruth *shared) { PetscErrorCode ierr; PetscMPIInt size,rank,*tagvalp,sum,cnt,i; PetscTruth flg,iflg; FILE *fd; static PetscMPIInt Petsc_WD_keyval = MPI_KEYVAL_INVALID; int err; PetscFunctionBegin; ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); if (size == 1) { *shared = PETSC_TRUE; PetscFunctionReturn(0); } ierr = PetscOptionsGetenv(comm,"PETSC_SHARED_WORKING_DIRECTORY",PETSC_NULL,0,&flg);CHKERRQ(ierr); if (flg) { *shared = PETSC_TRUE; PetscFunctionReturn(0); } ierr = PetscOptionsGetenv(comm,"PETSC_NOT_SHARED_WORKING_DIRECTORY",PETSC_NULL,0,&flg);CHKERRQ(ierr); if (flg) { *shared = PETSC_FALSE; PetscFunctionReturn(0); } if (Petsc_WD_keyval == MPI_KEYVAL_INVALID) { ierr = MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelTmpShared,&Petsc_WD_keyval,0);CHKERRQ(ierr); } ierr = MPI_Attr_get(comm,Petsc_WD_keyval,(void**)&tagvalp,(int*)&iflg);CHKERRQ(ierr); if (!iflg) { char filename[PETSC_MAX_PATH_LEN]; /* This communicator does not yet have a shared attribute */ ierr = PetscMalloc(sizeof(PetscMPIInt),&tagvalp);CHKERRQ(ierr); ierr = MPI_Attr_put(comm,Petsc_WD_keyval,tagvalp);CHKERRQ(ierr); ierr = PetscGetWorkingDirectory(filename,240);CHKERRQ(ierr); ierr = PetscStrcat(filename,"/petsctestshared");CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); /* each processor creates a file and all the later ones check */ /* this makes sure no subset of processors is shared */ *shared = PETSC_FALSE; for (i=0; i<size-1; i++) { if (rank == i) { fd = fopen(filename,"w"); if (!fd) SETERRQ1(PETSC_ERR_FILE_OPEN,"Unable to open test file %s",filename); err = fclose(fd); if (err) SETERRQ(PETSC_ERR_SYS,"fclose() failed on file"); } ierr = MPI_Barrier(comm);CHKERRQ(ierr); if (rank >= i) { fd = fopen(filename,"r"); if (fd) cnt = 1; else cnt = 0; if (fd) { err = fclose(fd); if (err) SETERRQ(PETSC_ERR_SYS,"fclose() failed on file"); } } else { cnt = 0; } ierr = MPI_Allreduce(&cnt,&sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); if (rank == i) { unlink(filename); } if (sum == size) { *shared = PETSC_TRUE; break; } else if (sum != 1) { SETERRQ(PETSC_ERR_SUP_SYS,"Subset of processes share working directory"); } } *tagvalp = (int)*shared; } else { *shared = (PetscTruth) *tagvalp; } ierr = PetscInfo1(0,"processors %s working directory\n",(*shared) ? "shared" : "do NOT share");CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode SNESSolve_FAS(SNES snes) { PetscErrorCode ierr; PetscInt i, maxits; Vec X, F; PetscReal fnorm; SNES_FAS *fas = (SNES_FAS *)snes->data,*ffas; DM dm; PetscBool isFine; PetscFunctionBegin; maxits = snes->max_its; /* maximum number of iterations */ snes->reason = SNES_CONVERGED_ITERATING; X = snes->vec_sol; F = snes->vec_func; ierr = SNESFASCycleIsFine(snes, &isFine);CHKERRQ(ierr); /*norm setup */ ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); snes->iter = 0; snes->norm = 0.; ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); if (!snes->vec_func_init_set) { ierr = SNESComputeFunction(snes,X,F);CHKERRQ(ierr); if (snes->domainerror) { snes->reason = SNES_DIVERGED_FUNCTION_DOMAIN; PetscFunctionReturn(0); } } else { snes->vec_func_init_set = PETSC_FALSE; } if (!snes->norm_init_set) { ierr = VecNorm(F, NORM_2, &fnorm);CHKERRQ(ierr); /* fnorm <- ||F|| */ if (PetscIsInfOrNanReal(fnorm)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FP,"Infinite or not-a-number generated in norm"); ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); } else { fnorm = snes->norm_init; snes->norm_init_set = PETSC_FALSE; } snes->norm = fnorm; ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); SNESLogConvHistory(snes,fnorm,0); ierr = SNESMonitor(snes,0,fnorm);CHKERRQ(ierr); /* set parameter for default relative tolerance convergence test */ snes->ttol = fnorm*snes->rtol; /* test convergence */ ierr = (*snes->ops->converged)(snes,0,0.0,0.0,fnorm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) PetscFunctionReturn(0); if (isFine) { /* propagate scale-dependent data up the hierarchy */ ierr = SNESGetDM(snes,&dm);CHKERRQ(ierr); for (ffas=fas; ffas->next; ffas=(SNES_FAS*)ffas->next->data) { DM dmcoarse; ierr = SNESGetDM(ffas->next,&dmcoarse);CHKERRQ(ierr); ierr = DMRestrict(dm,ffas->restrct,ffas->rscale,ffas->inject,dmcoarse);CHKERRQ(ierr); dm = dmcoarse; } } for (i = 0; i < maxits; i++) { /* Call general purpose update function */ if (snes->ops->update) { ierr = (*snes->ops->update)(snes, snes->iter);CHKERRQ(ierr); } if (fas->fastype == SNES_FAS_MULTIPLICATIVE) { ierr = SNESFASCycle_Multiplicative(snes, X);CHKERRQ(ierr); } else { ierr = SNESFASCycle_Additive(snes, X);CHKERRQ(ierr); } /* check for FAS cycle divergence */ if (snes->reason != SNES_CONVERGED_ITERATING) { PetscFunctionReturn(0); } /* Monitor convergence */ ierr = PetscObjectTakeAccess(snes);CHKERRQ(ierr); snes->iter = i+1; ierr = PetscObjectGrantAccess(snes);CHKERRQ(ierr); SNESLogConvHistory(snes,snes->norm,0); ierr = SNESMonitor(snes,snes->iter,snes->norm);CHKERRQ(ierr); /* Test for convergence */ if (isFine) { ierr = (*snes->ops->converged)(snes,snes->iter,0.0,0.0,snes->norm,&snes->reason,snes->cnvP);CHKERRQ(ierr); if (snes->reason) break; } } if (i == maxits) { ierr = PetscInfo1(snes, "Maximum number of iterations has been reached: %D\n", maxits);CHKERRQ(ierr); if (!snes->reason) snes->reason = SNES_DIVERGED_MAX_IT; } PetscFunctionReturn(0); }
PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2; PetscBool row_identity,col_identity,both_identity; IS isicol; PetscErrorCode ierr; const PetscInt *r,*ic; PetscInt i,*ai=a->i,*aj=a->j; PetscInt *bi,*bj,*ajtmp; PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; PetscReal f; PetscInt nlnk,*lnk,k,**bi_ptr; PetscFreeSpaceList free_space=NULL,current_space=NULL; PetscBT lnkbt; PetscBool missing; PetscFunctionBegin; if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); if (bs>1) { /* check shifttype */ if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); } ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr); ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr); bi[0] = bdiag[0] = 0; /* linked list for storing column indices of the active row */ nlnk = n + 1; ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); /* initial FreeSpace size is f*(ai[n]+1) */ f = info->fill; ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr); current_space = free_space; for (i=0; i<n; i++) { /* copy previous fill into linked list */ nzi = 0; nnz = ai[r[i]+1] - ai[r[i]]; ajtmp = aj + ai[r[i]]; ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); nzi += nlnk; /* add pivot rows into linked list */ row = lnk[n]; while (row < i) { nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); nzi += nlnk; row = lnk[row]; } bi[i+1] = bi[i] + nzi; im[i] = nzi; /* mark bdiag */ nzbd = 0; nnz = nzi; k = lnk[n]; while (nnz-- && k < i) { nzbd++; k = lnk[k]; } bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ /* if free space is not available, make more free space */ if (current_space->local_remaining<nzi) { nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */ ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); reallocs++; } /* copy data into free space, then initialize lnk */ ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); bi_ptr[i] = current_space->array; current_space->array += nzi; current_space->local_used += nzi; current_space->local_remaining -= nzi; } ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr); ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); /* put together the new matrix */ ierr = MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); b = (Mat_SeqBAIJ*)(B)->data; b->free_a = PETSC_TRUE; b->free_ij = PETSC_TRUE; b->singlemalloc = PETSC_FALSE; ierr = PetscMalloc1((bdiag[0]+1)*bs2,&b->a);CHKERRQ(ierr); b->j = bj; b->i = bi; b->diag = bdiag; b->free_diag = PETSC_TRUE; b->ilen = 0; b->imax = 0; b->row = isrow; b->col = iscol; b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); b->icol = isicol; ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr); ierr = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr); b->maxnz = b->nz = bdiag[0]+1; B->factortype = MAT_FACTOR_LU; B->info.factor_mallocs = reallocs; B->info.fill_ratio_given = f; if (ai[n] != 0) { B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); } else { B->info.fill_ratio_needed = 0.0; } #if defined(PETSC_USE_INFO) if (ai[n] != 0) { PetscReal af = B->info.fill_ratio_needed; ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); } else { ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); } #endif ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); both_identity = (PetscBool) (row_identity && col_identity); ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners) { PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; PetscInt size=stash->size,nsends; PetscErrorCode ierr; PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; PetscScalar **rvalues,*svalues; MPI_Comm comm = stash->comm; MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; PetscMPIInt *sizes,*nlengths,nreceives; PetscInt *sp_idx,*sp_idy; PetscScalar *sp_val; PetscMatStashSpace space,space_next; PetscFunctionBegin; { /* make sure all processors are either in INSERTMODE or ADDMODE */ InsertMode addv; ierr = MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); mat->insertmode = addv; /* in case this processor had no cache */ } bs2 = stash->bs*stash->bs; /* first count number of contributors to each processor */ ierr = PetscCalloc1(size,&sizes);CHKERRQ(ierr); ierr = PetscCalloc1(size,&nlengths);CHKERRQ(ierr); ierr = PetscMalloc1(stash->n+1,&owner);CHKERRQ(ierr); i = j = 0; lastidx = -1; space = stash->space_head; while (space) { space_next = space->next; sp_idx = space->idx; for (l=0; l<space->local_used; l++) { /* if indices are NOT locally sorted, need to start search at the beginning */ if (lastidx > (idx = sp_idx[l])) j = 0; lastidx = idx; for (; j<size; j++) { if (idx >= owners[j] && idx < owners[j+1]) { nlengths[j]++; owner[i] = j; break; } } i++; } space = space_next; } /* Now check what procs get messages - and compute nsends. */ for (i=0, nsends=0; i<size; i++) { if (nlengths[i]) { sizes[i] = 1; nsends++; } } {PetscMPIInt *onodes,*olengths; /* Determine the number of messages to expect, their lengths, from from-ids */ ierr = PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);CHKERRQ(ierr); ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); /* since clubbing row,col - lengths are multiplied by 2 */ for (i=0; i<nreceives; i++) olengths[i] *=2; ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); /* values are size 'bs2' lengths (and remove earlier factor 2 */ for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); ierr = PetscFree(onodes);CHKERRQ(ierr); ierr = PetscFree(olengths);CHKERRQ(ierr);} /* do sends: 1) starts[i] gives the starting index in svalues for stuff going to the ith processor */ ierr = PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);CHKERRQ(ierr); ierr = PetscMalloc1(2*nsends,&send_waits);CHKERRQ(ierr); ierr = PetscMalloc2(size,&startv,size,&starti);CHKERRQ(ierr); /* use 2 sends the first with all_a, the next with all_i and all_j */ startv[0] = 0; starti[0] = 0; for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1]; starti[i] = starti[i-1] + 2*nlengths[i-1]; } i = 0; space = stash->space_head; while (space) { space_next = space->next; sp_idx = space->idx; sp_idy = space->idy; sp_val = space->val; for (l=0; l<space->local_used; l++) { j = owner[i]; if (bs2 == 1) { svalues[startv[j]] = sp_val[l]; } else { PetscInt k; PetscScalar *buf1,*buf2; buf1 = svalues+bs2*startv[j]; buf2 = space->val + bs2*l; for (k=0; k<bs2; k++) buf1[k] = buf2[k]; } sindices[starti[j]] = sp_idx[l]; sindices[starti[j]+nlengths[j]] = sp_idy[l]; startv[j]++; starti[j]++; i++; } space = space_next; } startv[0] = 0; for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1]; for (i=0,count=0; i<size; i++) { if (sizes[i]) { ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); } } #if defined(PETSC_USE_INFO) ierr = PetscInfo1(NULL,"No of messages: %d \n",nsends);CHKERRQ(ierr); for (i=0; i<size; i++) { if (sizes[i]) { ierr = PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); } } #endif ierr = PetscFree(nlengths);CHKERRQ(ierr); ierr = PetscFree(owner);CHKERRQ(ierr); ierr = PetscFree2(startv,starti);CHKERRQ(ierr); ierr = PetscFree(sizes);CHKERRQ(ierr); /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ ierr = PetscMalloc1(2*nreceives,&recv_waits);CHKERRQ(ierr); for (i=0; i<nreceives; i++) { recv_waits[2*i] = recv_waits1[i]; recv_waits[2*i+1] = recv_waits2[i]; } stash->recv_waits = recv_waits; ierr = PetscFree(recv_waits1);CHKERRQ(ierr); ierr = PetscFree(recv_waits2);CHKERRQ(ierr); stash->svalues = svalues; stash->sindices = sindices; stash->rvalues = rvalues; stash->rindices = rindices; stash->send_waits = send_waits; stash->nsends = nsends; stash->nrecvs = nreceives; stash->reproduce_count = 0; PetscFunctionReturn(0); }