PETSC_STATIC_INLINE void PetscGammaCorrect(double *r,double *g,double *b) { PetscReal igamma = 1/Gamma; *r = (double)PetscPowReal((PetscReal)*r,igamma); *g = (double)PetscPowReal((PetscReal)*g,igamma); *b = (double)PetscPowReal((PetscReal)*b,igamma); }
PetscErrorCode PetscDrawUtilitySetCmap(const char colormap[],int mapsize,unsigned char R[],unsigned char G[],unsigned char B[]) { int i,j; const char *cmap_name_list[sizeof(PetscDrawCmapTable)/sizeof(PetscDrawCmapTable[0])]; PetscInt id = 0, count = (PetscInt)(sizeof(cmap_name_list)/sizeof(char*)); PetscBool reverse = PETSC_FALSE, brighten = PETSC_FALSE; PetscReal beta = 0; PetscErrorCode ierr; PetscFunctionBegin; for (i=0; i<count; i++) cmap_name_list[i] = PetscDrawCmapTable[i].name; if (colormap && colormap[0]) { PetscBool match = PETSC_FALSE; for (id=0; !match && id<count; id++) {ierr = PetscStrcasecmp(colormap,cmap_name_list[id],&match);CHKERRQ(ierr);} if (!match) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Colormap '%s' not found",colormap); } ierr = PetscOptionsGetEList(NULL,NULL,"-draw_cmap",cmap_name_list,count,&id,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetBool(NULL,NULL,"-draw_cmap_reverse",&reverse,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetReal(NULL,NULL,"-draw_cmap_brighten",&beta,&brighten);CHKERRQ(ierr); if (brighten && (beta <= (PetscReal)-1 || beta >= (PetscReal)+1)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"brighten parameter %g must be in the range (-1,1)",(double)beta); if (PetscDrawCmapTable[id].cmap) { ierr = PetscDrawCmapTable[id].cmap(mapsize,R,G,B);CHKERRQ(ierr); } else { const unsigned char (*rgb)[3] = PetscDrawCmapTable[id].data; if (mapsize != 256-PETSC_DRAW_BASIC_COLORS) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Colormap '%s' with size %d not supported",cmap_name_list[id],mapsize); for (i=0; i<mapsize; i++) {R[i] = rgb[i][0]; G[i] = rgb[i][1]; B[i] = rgb[i][2];} } if (reverse) { i = 0; j = mapsize-1; while(i < j) { #define SWAP(a,i,j) do { unsigned char t = a[i]; a[i] = a[j]; a[j] = t; } while (0) SWAP(R,i,j); SWAP(G,i,j); SWAP(B,i,j); #undef SWAP i++; j--; } } if (brighten) { PetscReal gamma = (beta > 0.0) ? (1 - beta) : (1 / (1 + beta)); for (i=0; i<mapsize; i++) { PetscReal r = PetscPowReal((PetscReal)R[i]/255,gamma); PetscReal g = PetscPowReal((PetscReal)G[i]/255,gamma); PetscReal b = PetscPowReal((PetscReal)B[i]/255,gamma); R[i] = (unsigned char)(255*PetscMin(r,(PetscReal)1.0)); G[i] = (unsigned char)(255*PetscMin(g,(PetscReal)1.0)); B[i] = (unsigned char)(255*PetscMin(b,(PetscReal)1.0)); } } PetscFunctionReturn(0); }
PetscErrorCode GetExactEigenvalues(PetscInt M,PetscInt N,PetscInt P,PetscInt nconv,PetscReal *exact) { PetscInt n,i,j,k,l; PetscReal *evals,ax,ay,az,sx,sy,sz; PetscErrorCode ierr; PetscFunctionBeginUser; ax = PETSC_PI/2/(M+1); ay = PETSC_PI/2/(N+1); az = PETSC_PI/2/(P+1); n = PetscCeilReal(PetscPowReal(nconv,0.33333)+1); ierr = PetscMalloc1(n*n*n,&evals);CHKERRQ(ierr); l = 0; for (i=1;i<=n;i++) { sx = PetscSinReal(ax*i); for (j=1;j<=n;j++) { sy = PetscSinReal(ay*j); for (k=1;k<=n;k++) { sz = PetscSinReal(az*k); evals[l++] = 4.0*(sx*sx+sy*sy+sz*sz); } } } ierr = PetscSortReal(n*n*n,evals);CHKERRQ(ierr); for (i=0;i<nconv;i++) exact[i] = evals[i]; ierr = PetscFree(evals);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscScalar k1(AppCtx *ctx,PetscReal t) { PetscReal th = t/3600.0; PetscReal barth = th - 24.0*floor(th/24.0); if (((((PetscInt)th) % 24) < 4) || ((((PetscInt)th) % 24) >= 20)) return(1.0e-40); else return(ctx->k1*PetscExpReal(7.0*PetscPowReal(PetscSinReal(.0625*PETSC_PI*(barth - 4.0)),.2))); }
static PetscErrorCode TSAdaptChoose_Basic(TSAdapt adapt,TS ts,PetscReal h,PetscInt *next_sc,PetscReal *next_h,PetscBool *accept,PetscReal *wlte) { TSAdapt_Basic *basic = (TSAdapt_Basic*)adapt->data; PetscInt order = PETSC_DECIDE; PetscReal enorm = -1; PetscReal safety = basic->safety; PetscReal hfac_lte,h_lte; PetscErrorCode ierr; PetscFunctionBegin; *next_sc = 0; /* Reuse the same order scheme */ if (ts->ops->evaluatewlte) { ierr = TSEvaluateWLTE(ts,adapt->wnormtype,&order,&enorm);CHKERRQ(ierr); if (enorm >= 0 && order < 1) SETERRQ1(PetscObjectComm((PetscObject)adapt),PETSC_ERR_ARG_OUTOFRANGE,"Computed error order %D must be positive",order); } else if (ts->ops->evaluatestep) { if (adapt->candidates.n < 1) SETERRQ(PetscObjectComm((PetscObject)adapt),PETSC_ERR_ARG_WRONGSTATE,"No candidate has been registered"); if (!adapt->candidates.inuse_set) SETERRQ1(PetscObjectComm((PetscObject)adapt),PETSC_ERR_ARG_WRONGSTATE,"The current in-use scheme is not among the %D candidates",adapt->candidates.n); if (!basic->Y) {ierr = VecDuplicate(ts->vec_sol,&basic->Y);CHKERRQ(ierr);} order = adapt->candidates.order[0]; ierr = TSEvaluateStep(ts,order-1,basic->Y,NULL);CHKERRQ(ierr); ierr = TSErrorWeightedNorm(ts,ts->vec_sol,basic->Y,adapt->wnormtype,&enorm);CHKERRQ(ierr); } if (enorm < 0) { *accept = PETSC_TRUE; *next_h = h; /* Reuse the old step */ *wlte = -1; /* Weighted local truncation error was not evaluated */ PetscFunctionReturn(0); } /* Determine whether the step is accepted of rejected */ if (enorm > 1) { if (!*accept) safety *= basic->reject_safety; /* The last attempt also failed, shorten more aggressively */ if (h < (1 + PETSC_SQRT_MACHINE_EPSILON)*adapt->dt_min) { ierr = PetscInfo2(adapt,"Estimated scaled local truncation error %g, accepting because step size %g is at minimum\n",(double)enorm,(double)h);CHKERRQ(ierr); *accept = PETSC_TRUE; } else if (basic->always_accept) { ierr = PetscInfo2(adapt,"Estimated scaled local truncation error %g, accepting step of size %g because always_accept is set\n",(double)enorm,(double)h);CHKERRQ(ierr); *accept = PETSC_TRUE; } else { ierr = PetscInfo2(adapt,"Estimated scaled local truncation error %g, rejecting step of size %g\n",(double)enorm,(double)h);CHKERRQ(ierr); *accept = PETSC_FALSE; } } else { ierr = PetscInfo2(adapt,"Estimated scaled local truncation error %g, accepting step of size %g\n",(double)enorm,(double)h);CHKERRQ(ierr); *accept = PETSC_TRUE; } /* The optimal new step based purely on local truncation error for this step. */ if (enorm > 0) hfac_lte = safety * PetscPowReal(enorm,((PetscReal)-1)/order); else hfac_lte = safety * PETSC_INFINITY; h_lte = h * PetscClipInterval(hfac_lte,basic->clip[0],basic->clip[1]); *next_h = PetscClipInterval(h_lte,adapt->dt_min,adapt->dt_max); *wlte = enorm; PetscFunctionReturn(0); }
PetscErrorCode DMADDASetParameters(DM dm,PetscInt dim, PetscInt *nodes,PetscInt *procs,PetscInt dof,PetscBool *periodic) { PetscErrorCode ierr; PetscMPIInt rank,size; MPI_Comm comm; PetscInt i; PetscInt nodes_total; PetscInt nodesleft; PetscInt procsleft; DM_ADDA *dd = (DM_ADDA*)dm->data; PetscFunctionBegin; ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr); ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); /* total number of nodes */ nodes_total = 1; for (i=0; i<dim; i++) nodes_total *= nodes[i]; dd->dim = dim; dd->dof = dof; dd->periodic = periodic; ierr = PetscMalloc(dim*sizeof(PetscInt), &(dd->nodes));CHKERRQ(ierr); ierr = PetscMemcpy(dd->nodes, nodes, dim*sizeof(PetscInt));CHKERRQ(ierr); /* procs */ ierr = PetscMalloc(dim*sizeof(PetscInt), &(dd->procs));CHKERRQ(ierr); /* create distribution of nodes to processors */ if (procs == NULL) { procs = dd->procs; nodesleft = nodes_total; procsleft = size; /* figure out a good way to split the array to several processors */ for (i=0; i<dim; i++) { if (i==dim-1) { procs[i] = procsleft; } else { /* calculate best partition */ procs[i] = (PetscInt)(((PetscReal) nodes[i])*PetscPowReal(((PetscReal) procsleft)/((PetscReal) nodesleft),1./((PetscReal)(dim-i)))+0.5); if (procs[i]<1) procs[i]=1; while (procs[i] > 0) { if (procsleft % procs[i]) procs[i]--; else break; } nodesleft /= nodes[i]; procsleft /= procs[i]; } } } else { /* user provided the number of processors */ ierr = PetscMemcpy(dd->procs, procs, dim*sizeof(PetscInt));CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode MatCreate_LMVM(Mat B) { Mat_LMVM *lmvm; PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscNewLog(B, &lmvm);CHKERRQ(ierr); B->data = (void*)lmvm; lmvm->m = 5; lmvm->k = -1; lmvm->nupdates = 0; lmvm->nrejects = 0; lmvm->nresets = 0; lmvm->ksp_max_it = 20; lmvm->ksp_rtol = 0.0; lmvm->ksp_atol = 0.0; lmvm->shift = 0.0; lmvm->eps = PetscPowReal(PETSC_MACHINE_EPSILON, 2.0/3.0); lmvm->allocated = PETSC_FALSE; lmvm->prev_set = PETSC_FALSE; lmvm->user_scale = PETSC_FALSE; lmvm->user_pc = PETSC_FALSE; lmvm->user_ksp = PETSC_FALSE; lmvm->square = PETSC_FALSE; B->ops->destroy = MatDestroy_LMVM; B->ops->setfromoptions = MatSetFromOptions_LMVM; B->ops->view = MatView_LMVM; B->ops->setup = MatSetUp_LMVM; B->ops->getvecs = MatGetVecs_LMVM; B->ops->shift = MatShift_LMVM; B->ops->duplicate = MatDuplicate_LMVM; B->ops->mult = MatMult_LMVM; B->ops->multadd = MatMultAdd_LMVM; B->ops->copy = MatCopy_LMVM; lmvm->ops->update = MatUpdate_LMVM; lmvm->ops->allocate = MatAllocate_LMVM; lmvm->ops->reset = MatReset_LMVM; ierr = KSPCreate(PetscObjectComm((PetscObject)B), &lmvm->J0ksp);CHKERRQ(ierr); ierr = PetscObjectIncrementTabLevel((PetscObject)lmvm->J0ksp, (PetscObject)B, 1);CHKERRQ(ierr); ierr = KSPSetOptionsPrefix(lmvm->J0ksp, "mat_lmvm_");CHKERRQ(ierr); ierr = KSPSetType(lmvm->J0ksp, KSPGMRES);CHKERRQ(ierr); ierr = KSPSetTolerances(lmvm->J0ksp, lmvm->ksp_rtol, lmvm->ksp_atol, PETSC_DEFAULT, lmvm->ksp_max_it);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode PetscDTGaussJacobiQuadrature1D_Internal(PetscInt npoints, PetscReal a, PetscReal b, PetscReal *x, PetscReal *w) { PetscInt maxIter = 100; PetscReal eps = 1.0e-8; PetscReal a1, a2, a3, a4, a5, a6; PetscInt k; PetscErrorCode ierr; PetscFunctionBegin; a1 = PetscPowReal(2.0, a+b+1); #if defined(PETSC_HAVE_TGAMMA) a2 = PetscTGamma(a + npoints + 1); a3 = PetscTGamma(b + npoints + 1); a4 = PetscTGamma(a + b + npoints + 1); #else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"tgamma() - math routine is unavailable."); #endif ierr = PetscDTFactorial_Internal(npoints, &a5);CHKERRQ(ierr); a6 = a1 * a2 * a3 / a4 / a5; /* Computes the m roots of P_{m}^{a,b} on [-1,1] by Newton's method with Chebyshev points as initial guesses. Algorithm implemented from the pseudocode given by Karniadakis and Sherwin and Python in FIAT */ for (k = 0; k < npoints; ++k) { PetscReal r = -PetscCosReal((2.0*k + 1.0) * PETSC_PI / (2.0 * npoints)), dP; PetscInt j; if (k > 0) r = 0.5 * (r + x[k-1]); for (j = 0; j < maxIter; ++j) { PetscReal s = 0.0, delta, f, fp; PetscInt i; for (i = 0; i < k; ++i) s = s + 1.0 / (r - x[i]); ierr = PetscDTComputeJacobi(a, b, npoints, r, &f);CHKERRQ(ierr); ierr = PetscDTComputeJacobiDerivative(a, b, npoints, r, &fp);CHKERRQ(ierr); delta = f / (fp - f * s); r = r - delta; if (PetscAbsReal(delta) < eps) break; } x[k] = r; ierr = PetscDTComputeJacobiDerivative(a, b, npoints, x[k], &dP);CHKERRQ(ierr); w[k] = a6 / (1.0 - PetscSqr(x[k])) / PetscSqr(dP); } PetscFunctionReturn(0); }
/* ------------------------------------------------------------------- */ PetscErrorCode InitialConditions(DM da,Vec U) { PetscErrorCode ierr; PetscInt i,j,xs,ys,xm,ym,Mx,My; Field **u; PetscReal hx,hy,x,y; PetscFunctionBegin; ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,&My,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE);CHKERRQ(ierr); hx = 2.5/(PetscReal)(Mx); hy = 2.5/(PetscReal)(My); /* Get pointers to vector data */ ierr = DMDAVecGetArray(da,U,&u);CHKERRQ(ierr); /* Get local grid boundaries */ ierr = DMDAGetCorners(da,&xs,&ys,NULL,&xm,&ym,NULL);CHKERRQ(ierr); /* Compute function over the locally owned part of the grid */ for (j=ys; j<ys+ym; j++) { y = j*hy; for (i=xs; i<xs+xm; i++) { x = i*hx; if ((1.0 <= x) && (x <= 1.5) && (1.0 <= y) && (y <= 1.5)) u[j][i].v = .25*PetscPowReal(PetscSinReal(4.0*PETSC_PI*x),2.0)*PetscPowReal(PetscSinReal(4.0*PETSC_PI*y),2.0); else u[j][i].v = 0.0; u[j][i].u = 1.0 - 2.0*u[j][i].v; } } /* Restore vectors */ ierr = DMDAVecRestoreArray(da,U,&u);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode TSGLLEAdaptChoose_Both(TSGLLEAdapt adapt,PetscInt n,const PetscInt orders[],const PetscReal errors[],const PetscReal cost[],PetscInt cur,PetscReal h,PetscReal tleft,PetscInt *next_sc,PetscReal *next_h,PetscBool *finish) { TSGLLEAdapt_Both *both = (TSGLLEAdapt_Both*)adapt->data; PetscErrorCode ierr; PetscReal dec = 0.2,inc = 5.0,safe = 0.9; struct {PetscInt id; PetscReal h,eff;} best={-1,0,0},trial={-1,0,0},current={-1,0,0}; PetscInt i; PetscFunctionBegin; for (i=0; i<n; i++) { PetscReal optimal; trial.id = i; optimal = PetscPowReal((PetscReal)errors[i],(PetscReal)-1./(safe*orders[i])); trial.h = h*optimal; trial.eff = trial.h/cost[i]; if (trial.eff > best.eff) {ierr = PetscMemcpy(&best,&trial,sizeof(trial));CHKERRQ(ierr);} if (i == cur) {ierr = PetscMemcpy(¤t,&trial,sizeof(trial));CHKERRQ(ierr);} } /* Only switch orders if the scheme offers significant benefits over the current one. When the scheme is not changing, only change step size if it offers significant benefits. */ if (best.eff < 1.2*current.eff || both->count_at_order < orders[cur]+2) { PetscReal last_desired_h; *next_sc = current.id; last_desired_h = both->desired_h; both->desired_h = PetscMax(h*dec,PetscMin(h*inc,current.h)); *next_h = (both->count_at_order > 0) ? PetscSqrtReal(last_desired_h * both->desired_h) : both->desired_h; both->count_at_order++; } else { PetscReal rat = cost[best.id]/cost[cur]; *next_sc = best.id; *next_h = PetscMax(h*rat*dec,PetscMin(h*rat*inc,best.h)); both->count_at_order = 0; both->desired_h = best.h; } if (*next_h > tleft) { *finish = PETSC_TRUE; *next_h = tleft; } else *finish = PETSC_FALSE; PetscFunctionReturn(0); }
/* PEPComputeScaleFactor - compute sfactor as described in [Betcke 2008]. */ PetscErrorCode PEPComputeScaleFactor(PEP pep) { PetscErrorCode ierr; PetscBool has0,has1,flg; PetscReal norm0,norm1; Mat T[2]; PEPBasis basis; PetscFunctionBegin; if (pep->scale==PEP_SCALE_NONE || pep->scale==PEP_SCALE_DIAGONAL) { /* no scalar scaling */ pep->sfactor = 1.0; PetscFunctionReturn(0); } if (pep->sfactor_set) PetscFunctionReturn(0); /* user provided value */ ierr = PEPGetBasis(pep,&basis);CHKERRQ(ierr); if (basis==PEP_BASIS_MONOMIAL) { ierr = STGetTransform(pep->st,&flg);CHKERRQ(ierr); if (flg) { ierr = STGetTOperators(pep->st,0,&T[0]);CHKERRQ(ierr); ierr = STGetTOperators(pep->st,pep->nmat-1,&T[1]);CHKERRQ(ierr); } else { T[0] = pep->A[0]; T[1] = pep->A[pep->nmat-1]; } if (pep->nmat>2) { ierr = MatHasOperation(T[0],MATOP_NORM,&has0);CHKERRQ(ierr); ierr = MatHasOperation(T[1],MATOP_NORM,&has1);CHKERRQ(ierr); if (has0 && has1) { ierr = MatNorm(T[0],NORM_INFINITY,&norm0);CHKERRQ(ierr); ierr = MatNorm(T[1],NORM_INFINITY,&norm1);CHKERRQ(ierr); pep->sfactor = PetscPowReal(norm0/norm1,1.0/(pep->nmat-1)); } else { pep->sfactor = 1.0; } } } else pep->sfactor = 1.0; PetscFunctionReturn(0); }
static PetscErrorCode TSGLLEAdaptChoose_Size(TSGLLEAdapt adapt,PetscInt n,const PetscInt orders[],const PetscReal errors[],const PetscReal cost[],PetscInt cur,PetscReal h,PetscReal tleft,PetscInt *next_sc,PetscReal *next_h,PetscBool *finish) { TSGLLEAdapt_Size *sz = (TSGLLEAdapt_Size*)adapt->data; PetscReal dec = 0.2,inc = 5.0,safe = 0.9,optimal,last_desired_h; PetscFunctionBegin; *next_sc = cur; optimal = PetscPowReal((PetscReal)errors[cur],(PetscReal)-1./(safe*orders[cur])); /* Step sizes oscillate when there is no smoothing. Here we use a geometric mean of the current step size and the * one that would have been taken (without smoothing) on the last step. */ last_desired_h = sz->desired_h; sz->desired_h = h*PetscMax(dec,PetscMin(inc,optimal)); /* Trim to [dec,inc] */ /* Normally only happens on the first step */ if (last_desired_h > 1e-14) *next_h = PetscSqrtReal(last_desired_h * sz->desired_h); else *next_h = sz->desired_h; if (*next_h > tleft) { *finish = PETSC_TRUE; *next_h = tleft; } else *finish = PETSC_FALSE; PetscFunctionReturn(0); }
// Find the squarest grid, with best[0] <= best[1] <= best[2]. PetscErrorCode ProcessGridFindSquarest(PetscMPIInt nranks,PetscInt best[3]) { PetscMPIInt target,s,a,b,c; PetscFunctionBegin; target = PetscCeilReal(PetscPowReal(nranks,1./3)); if (target*target*target > nranks) target--; // if ceil was overzealous for (a=target; a>=1; a--) { if (nranks%a) continue; // Not a candidate factor s = PetscCeilReal(PetscSqrtReal(nranks/a)); if (s*s > nranks/a) s--; // If our ceil was overzealous for (b=s; b>=a; b--) { if (nranks/a % b == 0) { // The first proper divisor is the one I want c = nranks/a/b; best[0] = a; best[1] = b; best[2] = c; PetscFunctionReturn(0); } } } SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"Could not find squarest grid"); PetscFunctionReturn(0); }
PetscErrorCode PetscExp10(PetscReal d,PetscReal *result) { PetscFunctionBegin; *result = PetscPowReal((PetscReal)10.0,d); PetscFunctionReturn(0); }
/**************************************xxt.c***********************************/ PetscInt XXT_stats(xxt_ADT xxt_handle) { PetscInt op[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD,GL_MIN,GL_MAX,GL_ADD}; PetscInt fop[] = {NON_UNIFORM,GL_MIN,GL_MAX,GL_ADD}; PetscInt vals[9], work[9]; PetscScalar fvals[3], fwork[3]; PetscErrorCode ierr; PCTFS_comm_init(); check_handle(xxt_handle); /* if factorization not done there are no stats */ if (!xxt_handle->info||!xxt_handle->mvi) { if (!PCTFS_my_id) { ierr = PetscPrintf(PETSC_COMM_WORLD,"XXT_stats() :: no stats available!\n");CHKERRQ(ierr); } return 1; } vals[0]=vals[1]=vals[2]=xxt_handle->info->nnz; vals[3]=vals[4]=vals[5]=xxt_handle->mvi->n; vals[6]=vals[7]=vals[8]=xxt_handle->info->msg_buf_sz; PCTFS_giop(vals,work,sizeof(op)/sizeof(op[0])-1,op); fvals[0]=fvals[1]=fvals[2] =xxt_handle->info->tot_solve_time/xxt_handle->info->nsolves++; PCTFS_grop(fvals,fwork,sizeof(fop)/sizeof(fop[0])-1,fop); if (!PCTFS_my_id) { ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: min xxt_nnz=%D\n",PCTFS_my_id,vals[0]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: max xxt_nnz=%D\n",PCTFS_my_id,vals[1]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: avg xxt_nnz=%g\n",PCTFS_my_id,1.0*vals[2]/PCTFS_num_nodes);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: tot xxt_nnz=%D\n",PCTFS_my_id,vals[2]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: xxt C(2d) =%g\n",PCTFS_my_id,vals[2]/(PetscPowReal(1.0*vals[5],1.5)));CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: xxt C(3d) =%g\n",PCTFS_my_id,vals[2]/(PetscPowReal(1.0*vals[5],1.6667)));CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: min xxt_n =%D\n",PCTFS_my_id,vals[3]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: max xxt_n =%D\n",PCTFS_my_id,vals[4]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: avg xxt_n =%g\n",PCTFS_my_id,1.0*vals[5]/PCTFS_num_nodes);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: tot xxt_n =%D\n",PCTFS_my_id,vals[5]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: min xxt_buf=%D\n",PCTFS_my_id,vals[6]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: max xxt_buf=%D\n",PCTFS_my_id,vals[7]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: avg xxt_buf=%g\n",PCTFS_my_id,1.0*vals[8]/PCTFS_num_nodes);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: min xxt_slv=%g\n",PCTFS_my_id,fvals[0]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: max xxt_slv=%g\n",PCTFS_my_id,fvals[1]);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"%D :: avg xxt_slv=%g\n",PCTFS_my_id,fvals[2]/PCTFS_num_nodes);CHKERRQ(ierr); } return(0); }
/* PEPBuildDiagonalScaling - compute two diagonal matrices to be applied for balancing in polynomial eigenproblems. */ PetscErrorCode PEPBuildDiagonalScaling(PEP pep) { PetscErrorCode ierr; PetscInt it,i,j,k,nmat,nr,e,nz,lst,lend,nc=0,*cols,emax,emin,emaxl,eminl; const PetscInt *cidx,*ridx; Mat M,*T,A; PetscMPIInt n; PetscBool cont=PETSC_TRUE,flg=PETSC_FALSE; PetscScalar *array,*Dr,*Dl,t; PetscReal l2,d,*rsum,*aux,*csum,w=1.0; MatStructure str; MatInfo info; PetscFunctionBegin; l2 = 2*PetscLogReal(2.0); nmat = pep->nmat; ierr = PetscMPIIntCast(pep->n,&n); ierr = STGetMatStructure(pep->st,&str);CHKERRQ(ierr); ierr = PetscMalloc1(nmat,&T);CHKERRQ(ierr); for (k=0;k<nmat;k++) { ierr = STGetTOperators(pep->st,k,&T[k]);CHKERRQ(ierr); } /* Form local auxiliar matrix M */ ierr = PetscObjectTypeCompareAny((PetscObject)T[0],&cont,MATMPIAIJ,MATSEQAIJ);CHKERRQ(ierr); if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]),PETSC_ERR_SUP,"Only for MPIAIJ or SEQAIJ matrix types"); ierr = PetscObjectTypeCompare((PetscObject)T[0],MATMPIAIJ,&cont);CHKERRQ(ierr); if (cont) { ierr = MatMPIAIJGetLocalMat(T[0],MAT_INITIAL_MATRIX,&M);CHKERRQ(ierr); flg = PETSC_TRUE; } else { ierr = MatDuplicate(T[0],MAT_COPY_VALUES,&M);CHKERRQ(ierr); } ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (i=0;i<nz;i++) { t = PetscAbsScalar(array[i]); array[i] = t*t; } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); for (k=1;k<nmat;k++) { if (flg) { ierr = MatMPIAIJGetLocalMat(T[k],MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); } else { if (str==SAME_NONZERO_PATTERN) { ierr = MatCopy(T[k],A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); } else { ierr = MatDuplicate(T[k],MAT_COPY_VALUES,&A);CHKERRQ(ierr); } } ierr = MatGetInfo(A,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = MatSeqAIJGetArray(A,&array);CHKERRQ(ierr); for (i=0;i<nz;i++) { t = PetscAbsScalar(array[i]); array[i] = t*t; } ierr = MatSeqAIJRestoreArray(A,&array);CHKERRQ(ierr); w *= pep->slambda*pep->slambda*pep->sfactor; ierr = MatAXPY(M,w,A,str);CHKERRQ(ierr); if (flg || str!=SAME_NONZERO_PATTERN || k==nmat-2) { ierr = MatDestroy(&A);CHKERRQ(ierr); } } ierr = MatGetRowIJ(M,0,PETSC_FALSE,PETSC_FALSE,&nr,&ridx,&cidx,&cont);CHKERRQ(ierr); if (!cont) SETERRQ(PetscObjectComm((PetscObject)T[0]), PETSC_ERR_SUP,"It is not possible to compute scaling diagonals for these PEP matrices"); ierr = MatGetInfo(M,MAT_LOCAL,&info);CHKERRQ(ierr); nz = info.nz_used; ierr = VecGetOwnershipRange(pep->Dl,&lst,&lend);CHKERRQ(ierr); ierr = PetscMalloc4(nr,&rsum,pep->n,&csum,pep->n,&aux,PetscMin(pep->n-lend+lst,nz),&cols);CHKERRQ(ierr); ierr = VecSet(pep->Dr,1.0);CHKERRQ(ierr); ierr = VecSet(pep->Dl,1.0);CHKERRQ(ierr); ierr = VecGetArray(pep->Dl,&Dl);CHKERRQ(ierr); ierr = VecGetArray(pep->Dr,&Dr);CHKERRQ(ierr); ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr); for (j=0;j<nz;j++) { /* Search non-zero columns outsize lst-lend */ if (aux[cidx[j]]==0 && (cidx[j]<lst || lend<=cidx[j])) cols[nc++] = cidx[j]; /* Local column sums */ aux[cidx[j]] += PetscAbsScalar(array[j]); } for (it=0;it<pep->sits && cont;it++) { emaxl = 0; eminl = 0; /* Column sum */ if (it>0) { /* it=0 has been already done*/ ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); ierr = PetscMemzero(aux,pep->n*sizeof(PetscReal));CHKERRQ(ierr); for (j=0;j<nz;j++) aux[cidx[j]] += PetscAbsScalar(array[j]); ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); } ierr = MPI_Allreduce(aux,csum,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)pep->Dr)); /* Update Dr */ for (j=lst;j<lend;j++) { d = PetscLogReal(csum[j])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); Dr[j-lst] *= d; aux[j] = d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } for (j=0;j<nc;j++) { d = PetscLogReal(csum[cols[j]])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); aux[cols[j]] = d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } /* Scale M */ ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (j=0;j<nz;j++) { array[j] *= aux[cidx[j]]; } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); /* Row sum */ ierr = PetscMemzero(rsum,nr*sizeof(PetscReal));CHKERRQ(ierr); ierr = MatSeqAIJGetArray(M,&array);CHKERRQ(ierr); for (i=0;i<nr;i++) { for (j=ridx[i];j<ridx[i+1];j++) rsum[i] += PetscAbsScalar(array[j]); /* Update Dl */ d = PetscLogReal(rsum[i])/l2; e = -(PetscInt)((d < 0)?(d-0.5):(d+0.5)); d = PetscPowReal(2.0,e); Dl[i] *= d; /* Scale M */ for (j=ridx[i];j<ridx[i+1];j++) array[j] *= d*d; emaxl = PetscMax(emaxl,e); eminl = PetscMin(eminl,e); } ierr = MatSeqAIJRestoreArray(M,&array);CHKERRQ(ierr); /* Compute global max and min */ ierr = MPI_Allreduce(&emaxl,&emax,1,MPIU_INT,MPIU_MAX,PetscObjectComm((PetscObject)pep->Dl)); ierr = MPI_Allreduce(&eminl,&emin,1,MPIU_INT,MPIU_MIN,PetscObjectComm((PetscObject)pep->Dl)); if (emax<=emin+2) cont = PETSC_FALSE; } ierr = VecRestoreArray(pep->Dr,&Dr);CHKERRQ(ierr); ierr = VecRestoreArray(pep->Dl,&Dl);CHKERRQ(ierr); /* Free memory*/ ierr = MatDestroy(&M);CHKERRQ(ierr); ierr = PetscFree4(rsum,csum,aux,cols);CHKERRQ(ierr); ierr = PetscFree(T);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode TaoSolve_SSFLS(Tao tao) { TAO_SSLS *ssls = (TAO_SSLS *)tao->data; PetscReal psi, ndpsi, normd, innerd, t=0; PetscReal delta, rho; TaoConvergedReason reason; TaoLineSearchConvergedReason ls_reason; PetscErrorCode ierr; PetscFunctionBegin; /* Assume that Setup has been called! Set the structure for the Jacobian and create a linear solver. */ delta = ssls->delta; rho = ssls->rho; ierr = TaoComputeVariableBounds(tao);CHKERRQ(ierr); /* Project solution inside bounds */ ierr = VecMedian(tao->XL,tao->solution,tao->XU,tao->solution);CHKERRQ(ierr); ierr = TaoLineSearchSetObjectiveAndGradientRoutine(tao->linesearch,Tao_SSLS_FunctionGradient,tao);CHKERRQ(ierr); ierr = TaoLineSearchSetObjectiveRoutine(tao->linesearch,Tao_SSLS_Function,tao);CHKERRQ(ierr); /* Calculate the function value and fischer function value at the current iterate */ ierr = TaoLineSearchComputeObjectiveAndGradient(tao->linesearch,tao->solution,&psi,ssls->dpsi);CHKERRQ(ierr); ierr = VecNorm(ssls->dpsi,NORM_2,&ndpsi);CHKERRQ(ierr); while (PETSC_TRUE) { ierr=PetscInfo3(tao, "iter: %D, merit: %g, ndpsi: %g\n",tao->niter, (double)ssls->merit, (double)ndpsi);CHKERRQ(ierr); /* Check the termination criteria */ ierr = TaoMonitor(tao,tao->niter,ssls->merit,ndpsi,0.0,t,&reason);CHKERRQ(ierr); if (reason!=TAO_CONTINUE_ITERATING) break; tao->niter++; /* Calculate direction. (Really negative of newton direction. Therefore, rest of the code uses -d.) */ ierr = KSPSetOperators(tao->ksp,tao->jacobian,tao->jacobian_pre);CHKERRQ(ierr); ierr = KSPSolve(tao->ksp,ssls->ff,tao->stepdirection);CHKERRQ(ierr); ierr = KSPGetIterationNumber(tao->ksp,&tao->ksp_its);CHKERRQ(ierr); tao->ksp_tot_its+=tao->ksp_its; ierr = VecCopy(tao->stepdirection,ssls->w);CHKERRQ(ierr); ierr = VecScale(ssls->w,-1.0);CHKERRQ(ierr); ierr = VecBoundGradientProjection(ssls->w,tao->solution,tao->XL,tao->XU,ssls->w);CHKERRQ(ierr); ierr = VecNorm(ssls->w,NORM_2,&normd);CHKERRQ(ierr); ierr = VecDot(ssls->w,ssls->dpsi,&innerd);CHKERRQ(ierr); /* Make sure that we have a descent direction */ if (innerd >= -delta*PetscPowReal(normd, rho)) { ierr = PetscInfo(tao, "newton direction not descent\n");CHKERRQ(ierr); ierr = VecCopy(ssls->dpsi,tao->stepdirection);CHKERRQ(ierr); ierr = VecDot(ssls->w,ssls->dpsi,&innerd);CHKERRQ(ierr); } ierr = VecScale(tao->stepdirection, -1.0);CHKERRQ(ierr); innerd = -innerd; ierr = TaoLineSearchSetInitialStepLength(tao->linesearch,1.0);CHKERRQ(ierr); ierr = TaoLineSearchApply(tao->linesearch,tao->solution,&psi,ssls->dpsi,tao->stepdirection,&t,&ls_reason);CHKERRQ(ierr); ierr = VecNorm(ssls->dpsi,NORM_2,&ndpsi);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/* EPSPartialLanczos - Partial reorthogonalization. */ static PetscErrorCode EPSPartialLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscReal anorm) { PetscErrorCode ierr; EPS_LANCZOS *lanczos = (EPS_LANCZOS*)eps->data; PetscInt i,j,m = *M; Vec vj,vj1; PetscReal norm,*omega,lomega[100],*omega_old,lomega_old[100],eps1,delta,eta; PetscBool *which,lwhich[100],*which2,lwhich2[100]; PetscBool reorth = PETSC_FALSE,force_reorth = PETSC_FALSE; PetscBool fro = PETSC_FALSE,estimate_anorm = PETSC_FALSE; PetscScalar *hwork,lhwork[100]; PetscFunctionBegin; if (m>100) { ierr = PetscMalloc5(m,&omega,m,&omega_old,m,&which,m,&which2,m,&hwork);CHKERRQ(ierr); } else { omega = lomega; omega_old = lomega_old; which = lwhich; which2 = lwhich2; hwork = lhwork; } eps1 = PetscSqrtReal((PetscReal)eps->n)*PETSC_MACHINE_EPSILON/2; delta = PETSC_SQRT_MACHINE_EPSILON/PetscSqrtReal((PetscReal)eps->ncv); eta = PetscPowReal(PETSC_MACHINE_EPSILON,3.0/4.0)/PetscSqrtReal((PetscReal)eps->ncv); if (anorm < 0.0) { anorm = 1.0; estimate_anorm = PETSC_TRUE; } for (i=0;i<m-k;i++) omega[i] = omega_old[i] = 0.0; for (i=0;i<k;i++) which[i] = PETSC_TRUE; ierr = BVSetActiveColumns(eps->V,0,m);CHKERRQ(ierr); for (j=k;j<m;j++) { ierr = BVGetColumn(eps->V,j,&vj);CHKERRQ(ierr); ierr = BVGetColumn(eps->V,j+1,&vj1);CHKERRQ(ierr); ierr = STApply(eps->st,vj,vj1);CHKERRQ(ierr); ierr = BVRestoreColumn(eps->V,j,&vj);CHKERRQ(ierr); ierr = BVRestoreColumn(eps->V,j+1,&vj1);CHKERRQ(ierr); if (fro) { /* Lanczos step with full reorthogonalization */ ierr = BVOrthogonalizeColumn(eps->V,j+1,hwork,&norm,breakdown);CHKERRQ(ierr); alpha[j] = PetscRealPart(hwork[j]); } else { /* Lanczos step */ which[j] = PETSC_TRUE; if (j-2>=k) which[j-2] = PETSC_FALSE; ierr = BVOrthogonalizeSomeColumn(eps->V,j+1,which,hwork,&norm,breakdown);CHKERRQ(ierr); alpha[j] = PetscRealPart(hwork[j]); beta[j] = norm; /* Estimate ||A|| if needed */ if (estimate_anorm) { if (j>k) anorm = PetscMax(anorm,PetscAbsReal(alpha[j])+norm+beta[j-1]); else anorm = PetscMax(anorm,PetscAbsReal(alpha[j])+norm); } /* Check if reorthogonalization is needed */ reorth = PETSC_FALSE; if (j>k) { update_omega(omega,omega_old,j,alpha,beta-1,eps1,anorm); for (i=0;i<j-k;i++) { if (PetscAbsScalar(omega[i]) > delta) reorth = PETSC_TRUE; } } if (reorth || force_reorth) { for (i=0;i<k;i++) which2[i] = PETSC_FALSE; for (i=k;i<=j;i++) which2[i] = PETSC_TRUE; if (lanczos->reorthog == EPS_LANCZOS_REORTHOG_PERIODIC) { /* Periodic reorthogonalization */ if (force_reorth) force_reorth = PETSC_FALSE; else force_reorth = PETSC_TRUE; for (i=0;i<j-k;i++) omega[i] = eps1; } else { /* Partial reorthogonalization */ if (force_reorth) force_reorth = PETSC_FALSE; else { force_reorth = PETSC_TRUE; compute_int(which2+k,omega,j-k,delta,eta); for (i=0;i<j-k;i++) { if (which2[i+k]) omega[i] = eps1; } } } ierr = BVOrthogonalizeSomeColumn(eps->V,j+1,which2,hwork,&norm,breakdown);CHKERRQ(ierr); } } if (*breakdown || norm < eps->n*anorm*PETSC_MACHINE_EPSILON) { *M = j+1; break; } if (!fro && norm*delta < anorm*eps1) { fro = PETSC_TRUE; ierr = PetscInfo1(eps,"Switching to full reorthogonalization at iteration %D\n",eps->its);CHKERRQ(ierr); } beta[j] = norm; ierr = BVScaleColumn(eps->V,j+1,1.0/norm);CHKERRQ(ierr); } if (m>100) { ierr = PetscFree5(omega,omega_old,which,which2,hwork);CHKERRQ(ierr); } PetscFunctionReturn(0); }
PetscErrorCode DSFunction_EXP_NHEP_PADE(DS ds) { #if defined(PETSC_MISSING_LAPACK_GESV) || defined(SLEPC_MISSING_LAPACK_LANGE) PetscFunctionBegin; SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV/LANGE - Lapack routines are unavailable"); #else PetscErrorCode ierr; PetscBLASInt n,ld,ld2,*ipiv,info,inc=1; PetscInt j,k,odd; const PetscInt p=MAX_PADE; PetscReal c[MAX_PADE+1],s; PetscScalar scale,mone=-1.0,one=1.0,two=2.0,zero=0.0; PetscScalar *A,*A2,*Q,*P,*W,*aux; PetscFunctionBegin; ierr = PetscBLASIntCast(ds->n,&n);CHKERRQ(ierr); ierr = PetscBLASIntCast(ds->ld,&ld);CHKERRQ(ierr); ld2 = ld*ld; ierr = DSAllocateWork_Private(ds,0,ld,ld);CHKERRQ(ierr); ipiv = ds->iwork; if (!ds->mat[DS_MAT_W]) { ierr = DSAllocateMat_Private(ds,DS_MAT_W);CHKERRQ(ierr); } if (!ds->mat[DS_MAT_Z]) { ierr = DSAllocateMat_Private(ds,DS_MAT_Z);CHKERRQ(ierr); } A = ds->mat[DS_MAT_A]; A2 = ds->mat[DS_MAT_Z]; Q = ds->mat[DS_MAT_Q]; P = ds->mat[DS_MAT_F]; W = ds->mat[DS_MAT_W]; /* Pade' coefficients */ c[0] = 1.0; for (k=1;k<=p;k++) { c[k] = c[k-1]*(p+1-k)/(k*(2*p+1-k)); } /* Scaling */ s = LAPACKlange_("I",&n,&n,A,&ld,ds->rwork); if (s>0.5) { s = PetscMax(0,(int)(PetscLogReal(s)/PetscLogReal(2.0)) + 2); scale = PetscPowReal(2.0,(-1)*s); PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&scale,A,&inc)); } /* Horner evaluation */ PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,A,&ld,A,&ld,&zero,A2,&ld)); ierr = PetscMemzero(Q,ld*ld*sizeof(PetscScalar));CHKERRQ(ierr); ierr = PetscMemzero(P,ld*ld*sizeof(PetscScalar));CHKERRQ(ierr); for (j=0;j<n;j++) { Q[j+j*ld] = c[p]; P[j+j*ld] = c[p-1]; } odd = 1; for (k=p-1;k>0;k--) { if (odd==1) { PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A2,&ld,&zero,W,&ld)); aux = Q; Q = W; W = aux; for (j=0;j<n;j++) Q[j+j*ld] = Q[j+j*ld] + c[k-1]; } else { PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A2,&ld,&zero,W,&ld)); aux = P; P = W; W = aux; for (j=0;j<n;j++) P[j+j*ld] = P[j+j*ld] + c[k-1]; } odd = 1-odd; } if (odd==1) { PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,A,&ld,&zero,W,&ld)); aux = Q; Q = W; W = aux; PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc)); PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info)); PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc)); for (j=0;j<n;j++) P[j+j*ld] = P[j+j*ld] + 1.0; PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&mone,P,&inc)); } else { PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,A,&ld,&zero,W,&ld)); aux = P; P = W; W = aux; PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&ld2,&mone,P,&inc,Q,&inc)); PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,Q,&ld,ipiv,P,&ld,&info)); PetscStackCallBLAS("BLASscal",BLASscal_(&ld2,&two,P,&inc)); for (j=0;j<n;j++) P[j+j*ld] = P[j+j*ld] + 1.0; } for (k=1;k<=s;k++) { PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,P,&ld,P,&ld,&zero,W,&ld)); ierr = PetscMemcpy(P,W,ld2*sizeof(PetscScalar));CHKERRQ(ierr); } if (P!=ds->mat[DS_MAT_F]) { ierr = PetscMemcpy(ds->mat[DS_MAT_F],P,ld2*sizeof(PetscScalar));CHKERRQ(ierr); } PetscFunctionReturn(0); #endif }
/**************************************xyt.c***********************************/ static PetscInt xyt_generate(xyt_ADT xyt_handle) { PetscInt i,j,k,idx; PetscInt dim, col; PetscScalar *u, *uu, *v, *z, *w, alpha, alpha_w; PetscInt *segs; PetscInt op[] = {GL_ADD,0}; PetscInt off, len; PetscScalar *x_ptr, *y_ptr; PetscInt *iptr, flag; PetscInt start =0, end, work; PetscInt op2[] = {GL_MIN,0}; PCTFS_gs_ADT PCTFS_gs_handle; PetscInt *nsep, *lnsep, *fo; PetscInt a_n =xyt_handle->mvi->n; PetscInt a_m =xyt_handle->mvi->m; PetscInt *a_local2global=xyt_handle->mvi->local2global; PetscInt level; PetscInt n, m; PetscInt *xcol_sz, *xcol_indices, *stages; PetscScalar **xcol_vals, *x; PetscInt *ycol_sz, *ycol_indices; PetscScalar **ycol_vals, *y; PetscInt n_global; PetscInt xt_nnz =0, xt_max_nnz=0; PetscInt yt_nnz =0, yt_max_nnz=0; PetscInt xt_zero_nnz =0; PetscInt xt_zero_nnz_0=0; PetscInt yt_zero_nnz =0; PetscInt yt_zero_nnz_0=0; PetscBLASInt i1 = 1,dlen; PetscScalar dm1 = -1.0; PetscErrorCode ierr; n =xyt_handle->mvi->n; nsep =xyt_handle->info->nsep; lnsep =xyt_handle->info->lnsep; fo =xyt_handle->info->fo; end =lnsep[0]; level =xyt_handle->level; PCTFS_gs_handle=xyt_handle->mvi->PCTFS_gs_handle; /* is there a null space? */ /* LATER add in ability to detect null space by checking alpha */ for (i=0, j=0; i<=level; i++) j+=nsep[i]; m = j-xyt_handle->ns; if (m!=j) { ierr = PetscPrintf(PETSC_COMM_WORLD,"xyt_generate() :: null space exists %D %D %D\n",m,j,xyt_handle->ns);CHKERRQ(ierr); } ierr = PetscInfo2(0,"xyt_generate() :: X(%D,%D)\n",n,m);CHKERRQ(ierr); /* get and initialize storage for x local */ /* note that x local is nxm and stored by columns */ xcol_sz = (PetscInt*) malloc(m*sizeof(PetscInt)); xcol_indices = (PetscInt*) malloc((2*m+1)*sizeof(PetscInt)); xcol_vals = (PetscScalar**) malloc(m*sizeof(PetscScalar*)); for (i=j=0; i<m; i++, j+=2) { xcol_indices[j]=xcol_indices[j+1]=xcol_sz[i]=-1; xcol_vals[i] = NULL; } xcol_indices[j]=-1; /* get and initialize storage for y local */ /* note that y local is nxm and stored by columns */ ycol_sz = (PetscInt*) malloc(m*sizeof(PetscInt)); ycol_indices = (PetscInt*) malloc((2*m+1)*sizeof(PetscInt)); ycol_vals = (PetscScalar**) malloc(m*sizeof(PetscScalar*)); for (i=j=0; i<m; i++, j+=2) { ycol_indices[j]=ycol_indices[j+1]=ycol_sz[i]=-1; ycol_vals[i] = NULL; } ycol_indices[j]=-1; /* size of separators for each sub-hc working from bottom of tree to top */ /* this looks like nsep[]=segments */ stages = (PetscInt*) malloc((level+1)*sizeof(PetscInt)); segs = (PetscInt*) malloc((level+1)*sizeof(PetscInt)); PCTFS_ivec_zero(stages,level+1); PCTFS_ivec_copy(segs,nsep,level+1); for (i=0; i<level; i++) segs[i+1] += segs[i]; stages[0] = segs[0]; /* temporary vectors */ u = (PetscScalar*) malloc(n*sizeof(PetscScalar)); z = (PetscScalar*) malloc(n*sizeof(PetscScalar)); v = (PetscScalar*) malloc(a_m*sizeof(PetscScalar)); uu = (PetscScalar*) malloc(m*sizeof(PetscScalar)); w = (PetscScalar*) malloc(m*sizeof(PetscScalar)); /* extra nnz due to replication of vertices across separators */ for (i=1, j=0; i<=level; i++) j+=nsep[i]; /* storage for sparse x values */ n_global = xyt_handle->info->n_global; xt_max_nnz = yt_max_nnz = (PetscInt)(2.5*PetscPowReal(1.0*n_global,1.6667) + j*n/2)/PCTFS_num_nodes; x = (PetscScalar*) malloc(xt_max_nnz*sizeof(PetscScalar)); y = (PetscScalar*) malloc(yt_max_nnz*sizeof(PetscScalar)); /* LATER - can embed next sep to fire in gs */ /* time to make the donuts - generate X factor */ for (dim=i=j=0; i<m; i++) { /* time to move to the next level? */ while (i==segs[dim]) { if (dim==level) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"dim about to exceed level\n"); stages[dim++]=i; end +=lnsep[dim]; } stages[dim]=i; /* which column are we firing? */ /* i.e. set v_l */ /* use new seps and do global min across hc to determine which one to fire */ (start<end) ? (col=fo[start]) : (col=INT_MAX); PCTFS_giop_hc(&col,&work,1,op2,dim); /* shouldn't need this */ if (col==INT_MAX) { ierr = PetscInfo(0,"hey ... col==INT_MAX??\n");CHKERRQ(ierr); continue; } /* do I own it? I should */ PCTFS_rvec_zero(v,a_m); if (col==fo[start]) { start++; idx=PCTFS_ivec_linear_search(col, a_local2global, a_n); if (idx!=-1) { v[idx] = 1.0; j++; } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"NOT FOUND!\n"); } else { idx=PCTFS_ivec_linear_search(col, a_local2global, a_m); if (idx!=-1) v[idx] = 1.0; } /* perform u = A.v_l */ PCTFS_rvec_zero(u,n); do_matvec(xyt_handle->mvi,v,u); /* uu = X^T.u_l (local portion) */ /* technically only need to zero out first i entries */ /* later turn this into an XYT_solve call ? */ PCTFS_rvec_zero(uu,m); y_ptr=y; iptr = ycol_indices; for (k=0; k<i; k++) { off = *iptr++; len = *iptr++; ierr = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr); PetscStackCall("BLASdot",uu[k] = BLASdot_(&dlen,u+off,&i1,y_ptr,&i1)); y_ptr+=len; } /* uu = X^T.u_l (comm portion) */ PCTFS_ssgl_radd (uu, w, dim, stages); /* z = X.uu */ PCTFS_rvec_zero(z,n); x_ptr=x; iptr = xcol_indices; for (k=0; k<i; k++) { off = *iptr++; len = *iptr++; ierr = PetscBLASIntCast(len,&dlen);CHKERRQ(ierr); PetscStackCall("BLASaxpy",BLASaxpy_(&dlen,&uu[k],x_ptr,&i1,z+off,&i1)); x_ptr+=len; } /* compute v_l = v_l - z */ PCTFS_rvec_zero(v+a_n,a_m-a_n); ierr = PetscBLASIntCast(n,&dlen);CHKERRQ(ierr); PetscStackCall("BLASaxpy",BLASaxpy_(&dlen,&dm1,z,&i1,v,&i1)); /* compute u_l = A.v_l */ if (a_n!=a_m) PCTFS_gs_gop_hc(PCTFS_gs_handle,v,"+\0",dim); PCTFS_rvec_zero(u,n); do_matvec(xyt_handle->mvi,v,u); /* compute sqrt(alpha) = sqrt(u_l^T.u_l) - local portion */ ierr = PetscBLASIntCast(n,&dlen);CHKERRQ(ierr); PetscStackCall("BLASdot",alpha = BLASdot_(&dlen,u,&i1,u,&i1)); /* compute sqrt(alpha) = sqrt(u_l^T.u_l) - comm portion */ PCTFS_grop_hc(&alpha, &alpha_w, 1, op, dim); alpha = (PetscScalar) PetscSqrtReal((PetscReal)alpha); /* check for small alpha */ /* LATER use this to detect and determine null space */ if (fabs(alpha)<1.0e-14) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"bad alpha! %g\n",alpha); /* compute v_l = v_l/sqrt(alpha) */ PCTFS_rvec_scale(v,1.0/alpha,n); PCTFS_rvec_scale(u,1.0/alpha,n); /* add newly generated column, v_l, to X */ flag = 1; off =len=0; for (k=0; k<n; k++) { if (v[k]!=0.0) { len=k; if (flag) {off=k; flag=0;} } } len -= (off-1); if (len>0) { if ((xt_nnz+len)>xt_max_nnz) { ierr = PetscInfo(0,"increasing space for X by 2x!\n");CHKERRQ(ierr); xt_max_nnz *= 2; x_ptr = (PetscScalar*) malloc(xt_max_nnz*sizeof(PetscScalar)); PCTFS_rvec_copy(x_ptr,x,xt_nnz); free(x); x = x_ptr; x_ptr+=xt_nnz; } xt_nnz += len; PCTFS_rvec_copy(x_ptr,v+off,len); /* keep track of number of zeros */ if (dim) { for (k=0; k<len; k++) { if (x_ptr[k]==0.0) xt_zero_nnz++; } } else { for (k=0; k<len; k++) { if (x_ptr[k]==0.0) xt_zero_nnz_0++; } } xcol_indices[2*i] = off; xcol_sz[i] = xcol_indices[2*i+1] = len; xcol_vals[i] = x_ptr; } else { xcol_indices[2*i] = 0; xcol_sz[i] = xcol_indices[2*i+1] = 0; xcol_vals[i] = x_ptr; } /* add newly generated column, u_l, to Y */ flag = 1; off =len=0; for (k=0; k<n; k++) { if (u[k]!=0.0) { len=k; if (flag) { off=k; flag=0; } } } len -= (off-1); if (len>0) { if ((yt_nnz+len)>yt_max_nnz) { ierr = PetscInfo(0,"increasing space for Y by 2x!\n");CHKERRQ(ierr); yt_max_nnz *= 2; y_ptr = (PetscScalar*) malloc(yt_max_nnz*sizeof(PetscScalar)); PCTFS_rvec_copy(y_ptr,y,yt_nnz); free(y); y = y_ptr; y_ptr+=yt_nnz; } yt_nnz += len; PCTFS_rvec_copy(y_ptr,u+off,len); /* keep track of number of zeros */ if (dim) { for (k=0; k<len; k++) { if (y_ptr[k]==0.0) yt_zero_nnz++; } } else { for (k=0; k<len; k++) { if (y_ptr[k]==0.0) yt_zero_nnz_0++; } } ycol_indices[2*i] = off; ycol_sz[i] = ycol_indices[2*i+1] = len; ycol_vals[i] = y_ptr; } else { ycol_indices[2*i] = 0; ycol_sz[i] = ycol_indices[2*i+1] = 0; ycol_vals[i] = y_ptr; } } /* close off stages for execution phase */ while (dim!=level) { stages[dim++]=i; ierr = PetscInfo2(0,"disconnected!!! dim(%D)!=level(%D)\n",dim,level);CHKERRQ(ierr); } stages[dim]=i; xyt_handle->info->n =xyt_handle->mvi->n; xyt_handle->info->m =m; xyt_handle->info->nnz =xt_nnz + yt_nnz; xyt_handle->info->max_nnz =xt_max_nnz + yt_max_nnz; xyt_handle->info->msg_buf_sz =stages[level]-stages[0]; xyt_handle->info->solve_uu = (PetscScalar*) malloc(m*sizeof(PetscScalar)); xyt_handle->info->solve_w = (PetscScalar*) malloc(m*sizeof(PetscScalar)); xyt_handle->info->x =x; xyt_handle->info->xcol_vals =xcol_vals; xyt_handle->info->xcol_sz =xcol_sz; xyt_handle->info->xcol_indices=xcol_indices; xyt_handle->info->stages =stages; xyt_handle->info->y =y; xyt_handle->info->ycol_vals =ycol_vals; xyt_handle->info->ycol_sz =ycol_sz; xyt_handle->info->ycol_indices=ycol_indices; free(segs); free(u); free(v); free(uu); free(z); free(w); return(0); }
PetscReal findDistance(PetscReal x1, PetscReal x2, PetscReal y1, PetscReal y2) { return PetscSqrtReal(PetscPowReal(x2-x1,2.0) + PetscPowReal(y2-y1,2.0)); }
PetscErrorCode AssembleSystem(Mat A, Vec b, PetscScalar soft_alpha, PetscScalar x_r, PetscScalar y_r, PetscScalar z_r, PetscScalar r, PetscInt ne, PetscMPIInt npe, PetscMPIInt rank, PetscInt nn, PetscInt m) { PetscErrorCode ierr; PetscReal h = 1.0 / ne; PetscScalar DD[24][24], DD2[24][24]; PetscScalar DD1[24][24]; const PetscInt NP = (PetscInt)(PetscPowReal((PetscReal)npe, 1.0 / 3.0) + 0.5); const PetscInt ipx = rank % NP, ipy = (rank % (NP * NP)) / NP, ipz = rank / (NP * NP); const PetscInt Ni0 = ipx * (nn / NP), Nj0 = ipy * (nn / NP), Nk0 = ipz * (nn / NP); const PetscInt Ni1 = Ni0 + (m > 0 ? (nn / NP) : 0), Nj1 = Nj0 + (nn / NP), Nk1 = Nk0 + (nn / NP); const PetscInt NN = nn / NP, id0 = ipz * nn * nn * NN + ipy * nn * NN * NN + ipx * NN * NN * NN; PetscScalar vv[24], v2[24]; PetscInt i, j, k; { ierr = elem_3d_elast_v_25((PetscScalar*)DD1); CHKERRQ(ierr); for (i = 0; i < 24; i++) { for (j = 0; j < 24; j++) { if (i < 12 || j < 12) { if (i == j) DD2[i][j] = 0.1 * DD1[i][j]; else DD2[i][j] = 0.0; } else DD2[i][j] = DD1[i][j]; } } for (i = 0; i < 24; i++) { if (i % 3 == 0) vv[i] = h * h; else if (i % 3 == 1) vv[i] = 2.0 * h * h; else vv[i] = 0.0; } for (i = 0; i < 24; i++) { if (i % 3 == 0 && i >= 12) v2[i] = h * h; else if (i % 3 == 1 && i >= 12) v2[i] = 2.0 * h * h; else v2[i] = 0.0; } } ierr = MatZeroEntries(A); CHKERRQ(ierr); ierr = VecZeroEntries(b); CHKERRQ(ierr); PetscInt ii, jj, kk; for (i = Ni0, ii = 0; i < Ni1; i++, ii++) { for (j = Nj0, jj = 0; j < Nj1; j++, jj++) { for (k = Nk0, kk = 0; k < Nk1; k++, kk++) { PetscReal x = h * (PetscReal)i; PetscReal y = h * (PetscReal)j; PetscReal z = h * (PetscReal)k; PetscInt id = id0 + ii + NN * jj + NN * NN * kk; if (i < ne && j < ne && k < ne) { PetscReal radius = PetscSqrtReal((x - 0.5 + h / 2) * (x - 0.5 + h / 2) + (y - 0.5 + h / 2) * (y - 0.5 + h / 2) + (z - 0.5 + h / 2) * (z - 0.5 + h / 2)); PetscReal alpha = 1.0; PetscInt jx, ix, idx[8]; idx[0] = id; idx[1] = id + 1; idx[2] = id + NN + 1; idx[3] = id + NN; idx[4] = id + NN * NN; idx[5] = id + 1 + NN * NN; idx[6] = id + NN + 1 + NN * NN; idx[7] = id + NN + NN * NN; if (i == Ni1 - 1 && Ni1 != nn) { idx[1] += NN * (NN * NN - 1); idx[2] += NN * (NN * NN - 1); idx[5] += NN * (NN * NN - 1); idx[6] += NN * (NN * NN - 1); } if (j == Nj1 - 1 && Nj1 != nn) { idx[2] += NN * NN * (nn - 1); idx[3] += NN * NN * (nn - 1); idx[6] += NN * NN * (nn - 1); idx[7] += NN * NN * (nn - 1); } if (k == Nk1 - 1 && Nk1 != nn) { idx[4] += NN * (nn * nn - NN * NN); idx[5] += NN * (nn * nn - NN * NN); idx[6] += NN * (nn * nn - NN * NN); idx[7] += NN * (nn * nn - NN * NN); } if (radius < r) alpha = soft_alpha; for (ix = 0; ix < 24; ix++) { for (jx = 0; jx < 24; jx++) DD[ix][jx] = alpha * DD1[ix][jx]; } if (k > 0) { ierr = MatSetValuesBlocked(A, 8, idx, 8, idx, (const PetscScalar*)DD, ADD_VALUES); CHKERRQ(ierr); ierr = VecSetValuesBlocked(b, 8, idx, (const PetscScalar*)vv, ADD_VALUES); CHKERRQ(ierr); } else { for (ix = 0; ix < 24; ix++) { for (jx = 0; jx < 24; jx++) DD[ix][jx] = alpha * DD2[ix][jx]; } ierr = MatSetValuesBlocked(A, 8, idx, 8, idx, (const PetscScalar*)DD, ADD_VALUES); CHKERRQ(ierr); ierr = VecSetValuesBlocked(b, 8, idx, (const PetscScalar*)v2, ADD_VALUES); CHKERRQ(ierr); } } } } } ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY); CHKERRQ(ierr); ierr = VecAssemblyBegin(b); CHKERRQ(ierr); ierr = VecAssemblyEnd(b); CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode DMDASubDomainDA_Private(DM dm, PetscInt *nlocal, DM **sdm) { DM *da; PetscInt dim,size,i,j,k,idx; PetscErrorCode ierr; DMDALocalInfo info; PetscInt xsize,ysize,zsize; PetscInt xo,yo,zo; PetscInt xs,ys,zs; PetscInt xm=1,ym=1,zm=1; PetscInt xol,yol,zol; PetscInt m=1,n=1,p=1; PetscInt M,N,P; PetscInt pm,mtmp; PetscFunctionBegin; ierr = DMDAGetLocalInfo(dm,&info);CHKERRQ(ierr); ierr = DMDAGetOverlap(dm,&xol,&yol,&zol);CHKERRQ(ierr); ierr = DMDAGetNumLocalSubDomains(dm,&size);CHKERRQ(ierr); ierr = PetscMalloc1(size,&da);CHKERRQ(ierr); if (nlocal) *nlocal = size; dim = info.dim; M = info.xm; N = info.ym; P = info.zm; if (dim == 1) { m = size; } else if (dim == 2) { m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)N))); while (m > 0) { n = size/m; if (m*n*p == size) break; m--; } } else if (dim == 3) { n = (PetscInt)(0.5 + PetscPowReal(((PetscReal)N*N)*((PetscReal)size)/((PetscReal)P*M),(PetscReal)(1./3.))); if (!n) n = 1; while (n > 0) { pm = size/n; if (n*pm == size) break; n--; } if (!n) n = 1; m = (PetscInt)(0.5 + PetscSqrtReal(((PetscReal)M)*((PetscReal)size)/((PetscReal)P*n))); if (!m) m = 1; while (m > 0) { p = size/(m*n); if (m*n*p == size) break; m--; } if (M > P && m < p) {mtmp = m; m = p; p = mtmp;} } zs = info.zs; idx = 0; for (k = 0; k < p; k++) { ys = info.ys; for (j = 0; j < n; j++) { xs = info.xs; for (i = 0; i < m; i++) { if (dim == 1) { xm = M/m + ((M % m) > i); } else if (dim == 2) { xm = M/m + ((M % m) > i); ym = N/n + ((N % n) > j); } else if (dim == 3) { xm = M/m + ((M % m) > i); ym = N/n + ((N % n) > j); zm = P/p + ((P % p) > k); } xsize = xm; ysize = ym; zsize = zm; xo = xs; yo = ys; zo = zs; ierr = DMDACreate(PETSC_COMM_SELF,&(da[idx]));CHKERRQ(ierr); ierr = DMSetOptionsPrefix(da[idx],"sub_");CHKERRQ(ierr); ierr = DMSetDimension(da[idx], info.dim);CHKERRQ(ierr); ierr = DMDASetDof(da[idx], info.dof);CHKERRQ(ierr); ierr = DMDASetStencilType(da[idx],info.st);CHKERRQ(ierr); ierr = DMDASetStencilWidth(da[idx],info.sw);CHKERRQ(ierr); if (info.bx == DM_BOUNDARY_PERIODIC || (xs != 0)) { xsize += xol; xo -= xol; } if (info.by == DM_BOUNDARY_PERIODIC || (ys != 0)) { ysize += yol; yo -= yol; } if (info.bz == DM_BOUNDARY_PERIODIC || (zs != 0)) { zsize += zol; zo -= zol; } if (info.bx == DM_BOUNDARY_PERIODIC || (xs+xm != info.mx)) xsize += xol; if (info.by == DM_BOUNDARY_PERIODIC || (ys+ym != info.my)) ysize += yol; if (info.bz == DM_BOUNDARY_PERIODIC || (zs+zm != info.mz)) zsize += zol; if (info.bx != DM_BOUNDARY_PERIODIC) { if (xo < 0) { xsize += xo; xo = 0; } if (xo+xsize > info.mx-1) { xsize -= xo+xsize - info.mx; } } if (info.by != DM_BOUNDARY_PERIODIC) { if (yo < 0) { ysize += yo; yo = 0; } if (yo+ysize > info.my-1) { ysize -= yo+ysize - info.my; } } if (info.bz != DM_BOUNDARY_PERIODIC) { if (zo < 0) { zsize += zo; zo = 0; } if (zo+zsize > info.mz-1) { zsize -= zo+zsize - info.mz; } } ierr = DMDASetSizes(da[idx], xsize, ysize, zsize);CHKERRQ(ierr); ierr = DMDASetNumProcs(da[idx], 1, 1, 1);CHKERRQ(ierr); ierr = DMDASetBoundaryType(da[idx], DM_BOUNDARY_GHOSTED, DM_BOUNDARY_GHOSTED, DM_BOUNDARY_GHOSTED);CHKERRQ(ierr); /* set up as a block instead */ ierr = DMSetUp(da[idx]);CHKERRQ(ierr); /* nonoverlapping region */ ierr = DMDASetNonOverlappingRegion(da[idx],xs,ys,zs,xm,ym,zm);CHKERRQ(ierr); /* this alters the behavior of DMDAGetInfo, DMDAGetLocalInfo, DMDAGetCorners, and DMDAGetGhostedCorners and should be used with care */ ierr = DMDASetOffset(da[idx],xo,yo,zo,info.mx,info.my,info.mz);CHKERRQ(ierr); xs += xm; idx++; } ys += ym; } zs += zm; } *sdm = da; PetscFunctionReturn(0); }
int main(int argc, char** argv) { PC pc; PetscErrorCode ierr; PetscInt m, nn, M, j, k, ne = 4; PetscReal* coords; Vec x, rhs; Mat A; KSP ksp; PetscMPIInt npe, rank; PetscInitialize(&argc, &argv, NULL, NULL); ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank); CHKERRQ(ierr); ierr = MPI_Comm_size(PETSC_COMM_WORLD, &npe); CHKERRQ(ierr); ierr = PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Linear elasticity in 3D", ""); { char nestring[256]; ierr = PetscSNPrintf(nestring, sizeof nestring, "number of elements in each direction, ne+1 must be a multiple of %D (sizes^{1/3})", (PetscInt)(PetscPowReal((PetscReal)npe, 1.0 / 3.0) + 0.5)); ierr = PetscOptionsInt("-ne", nestring, "", ne, &ne, NULL); } ierr = PetscOptionsEnd(); CHKERRQ(ierr); const HpddmOption* const opt = HpddmOptionGet(); { HpddmOptionParse(opt, argc, argv, rank == 0); if (rank) HpddmOptionRemove(opt, "verbosity"); } nn = ne + 1; M = 3 * nn * nn * nn; if (npe == 2) { if (rank == 1) m = 0; else m = nn * nn * nn; npe = 1; } else { m = nn * nn * nn / npe; if (rank == npe - 1) m = nn * nn * nn - (npe - 1) * m; } m *= 3; ierr = KSPCreate(PETSC_COMM_WORLD, &ksp); CHKERRQ(ierr); ierr = KSPSetFromOptions(ksp); CHKERRQ(ierr); int i; { PetscInt Istart, Iend, jj, ic; const PetscInt NP = (PetscInt)(PetscPowReal((PetscReal)npe, 1.0 / 3.0) + 0.5); const PetscInt ipx = rank % NP, ipy = (rank % (NP * NP)) / NP, ipz = rank / (NP * NP); const PetscInt Ni0 = ipx * (nn / NP), Nj0 = ipy * (nn / NP), Nk0 = ipz * (nn / NP); const PetscInt Ni1 = Ni0 + (m > 0 ? (nn / NP) : 0), Nj1 = Nj0 + (nn / NP), Nk1 = Nk0 + (nn / NP); PetscInt *d_nnz, *o_nnz, osz[4] = {0, 9, 15, 19}, nbc; if (npe != NP * NP * NP) SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "npe=%d: npe^{1/3} must be integer", npe); if (nn != NP * (nn / NP)) SETERRQ1(PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "-ne %d: (ne+1)%(npe^{1/3}) must equal zero", ne); ierr = PetscMalloc1(m + 1, &d_nnz); CHKERRQ(ierr); ierr = PetscMalloc1(m + 1, &o_nnz); CHKERRQ(ierr); for (i = Ni0, ic = 0; i < Ni1; i++) { for (j = Nj0; j < Nj1; j++) { for (k = Nk0; k < Nk1; k++) { nbc = 0; if (i == Ni0 || i == Ni1 - 1) nbc++; if (j == Nj0 || j == Nj1 - 1) nbc++; if (k == Nk0 || k == Nk1 - 1) nbc++; for (jj = 0; jj < 3; jj++, ic++) { d_nnz[ic] = 3 * (27 - osz[nbc]); o_nnz[ic] = 3 * osz[nbc]; } } } } if (ic != m) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_PLIB, "ic %D does not equal m %D", ic, m); ierr = MatCreate(PETSC_COMM_WORLD, &A); CHKERRQ(ierr); ierr = MatSetSizes(A, m, m, M, M); CHKERRQ(ierr); ierr = MatSetBlockSize(A, 3); CHKERRQ(ierr); ierr = MatSetType(A, MATAIJ); CHKERRQ(ierr); ierr = MatSeqAIJSetPreallocation(A, 0, d_nnz); CHKERRQ(ierr); ierr = MatMPIAIJSetPreallocation(A, 0, d_nnz, 0, o_nnz); CHKERRQ(ierr); ierr = PetscFree(d_nnz); CHKERRQ(ierr); ierr = PetscFree(o_nnz); CHKERRQ(ierr); ierr = MatGetOwnershipRange(A, &Istart, &Iend); CHKERRQ(ierr); if (m != Iend - Istart) SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_PLIB, "m %D does not equal Iend %D - Istart %D", m, Iend, Istart); ierr = VecCreate(PETSC_COMM_WORLD, &x); CHKERRQ(ierr); ierr = VecSetSizes(x, m, M); CHKERRQ(ierr); ierr = VecSetBlockSize(x, 3); CHKERRQ(ierr); ierr = VecSetFromOptions(x); CHKERRQ(ierr); ierr = VecDuplicate(x, &rhs); CHKERRQ(ierr); ierr = PetscMalloc1(m + 1, &coords); CHKERRQ(ierr); coords[m] = -99.0; PetscReal h = 1.0 / ne; for (i = Ni0, ic = 0; i < Ni1; i++) { for (j = Nj0; j < Nj1; j++) { for (k = Nk0; k < Nk1; k++, ic++) { coords[3 * ic] = h * (PetscReal)i; coords[3 * ic + 1] = h * (PetscReal)j; coords[3 * ic + 2] = h * (PetscReal)k; } } } } PetscReal s_r[SIZE_ARRAY_R] = {30, 0.1, 20, 10}; PetscReal x_r[SIZE_ARRAY_R] = {0.5, 0.4, 0.4, 0.4}; PetscReal y_r[SIZE_ARRAY_R] = {0.5, 0.5, 0.4, 0.4}; PetscReal z_r[SIZE_ARRAY_R] = {0.5, 0.45, 0.4, 0.35}; PetscReal r[SIZE_ARRAY_R] = {0.5, 0.5, 0.4, 0.4}; AssembleSystem(A, rhs, s_r[0], x_r[0], y_r[0], z_r[0], r[0], ne, npe, rank, nn, m); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); MatNullSpace matnull; Vec vec_coords; PetscScalar* c; ierr = VecCreate(MPI_COMM_WORLD, &vec_coords); CHKERRQ(ierr); ierr = VecSetBlockSize(vec_coords, 3); CHKERRQ(ierr); ierr = VecSetSizes(vec_coords, m, PETSC_DECIDE); CHKERRQ(ierr); ierr = VecSetUp(vec_coords); CHKERRQ(ierr); ierr = VecGetArray(vec_coords, &c); CHKERRQ(ierr); for (i = 0; i < m; i++) c[i] = coords[i]; ierr = VecRestoreArray(vec_coords, &c); CHKERRQ(ierr); ierr = MatNullSpaceCreateRigidBody(vec_coords, &matnull); CHKERRQ(ierr); ierr = MatSetNearNullSpace(A, matnull); CHKERRQ(ierr); ierr = MatNullSpaceDestroy(&matnull); CHKERRQ(ierr); ierr = VecDestroy(&vec_coords); CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); double time = MPI_Wtime(); ierr = KSPSetUp(ksp); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime() - time; ierr = PetscPrintf(PETSC_COMM_WORLD, "--- PC setup = %f\n", time); CHKERRQ(ierr); float t_time[SIZE_ARRAY_R]; int t_its[SIZE_ARRAY_R]; { { ierr = KSPSolve(ksp, rhs, x); CHKERRQ(ierr); ierr = KSPReset(ksp); CHKERRQ(ierr); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); } for (i = 0; i < SIZE_ARRAY_R; ++i) { ierr = VecZeroEntries(x); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime(); ierr = KSPSolve(ksp, rhs, x); CHKERRQ(ierr); MPI_Barrier(PETSC_COMM_WORLD); t_time[i] = MPI_Wtime() - time; PetscInt its; ierr = KSPGetIterationNumber(ksp, &its); CHKERRQ(ierr); t_its[i] = its; ierr = ComputeError(A, rhs, x); CHKERRQ(ierr); if (i == (SIZE_ARRAY_R - 1)) AssembleSystem(A, rhs, s_r[0], x_r[0], y_r[0], z_r[0], r[0], ne, npe, rank, nn, m); else AssembleSystem(A, rhs, s_r[i + 1], x_r[i + 1], y_r[i + 1], z_r[i + 1], r[i + 1], ne, npe, rank, nn, m); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); } for (i = 0; i < SIZE_ARRAY_R; ++i) { ierr = PetscPrintf(PETSC_COMM_WORLD, "%d\t%d\t%f\n", i + 1, t_its[i], t_time[i]); CHKERRQ(ierr); if (i > 0) { t_its[0] += t_its[i]; t_time[0] += t_time[i]; } } if (SIZE_ARRAY_R > 1) { ierr = PetscPrintf(PETSC_COMM_WORLD, "------------------------\n\t%d\t%f\n", t_its[0], t_time[0]); CHKERRQ(ierr); } } { ierr = KSPGetPC(ksp, &pc); CHKERRQ(ierr); HpddmCustomOperator H; H._A = A; H._M = pc; H._mv = mv; H._precond = precond; H._b = rhs; H._x = x; int n; MatGetLocalSize(A, &n, NULL); { ierr = VecZeroEntries(x); K* pt_rhs; K* pt_x; VecGetArray(rhs, &pt_rhs); VecGetArray(x, &pt_x); int previous = HpddmOptionVal(opt, "verbosity"); if (previous > 0) HpddmOptionRemove(opt, "verbosity"); HpddmCustomOperatorSolve(&H, n, H._mv, H._precond, pt_rhs, pt_x, 1, &PETSC_COMM_WORLD); if (previous > 0) { char buffer[20]; snprintf(buffer, 20, "%d", previous); char* concat = malloc(strlen("-hpddm_verbosity ") + strlen(buffer) + 1); strcpy(concat, "-hpddm_verbosity "); strcat(concat, buffer); HpddmOptionParseString(opt, concat); free(concat); } VecRestoreArray(x, &pt_x); VecRestoreArray(rhs, &pt_rhs); previous = HpddmOptionVal(opt, "krylov_method"); if(previous == 4 || previous == 5) HpddmDestroyRecycling(); ierr = KSPReset(ksp); CHKERRQ(ierr); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetInitialGuessNonzero(ksp, PETSC_TRUE); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); } for (i = 0; i < SIZE_ARRAY_R; ++i) { ierr = VecZeroEntries(x); CHKERRQ(ierr); K* pt_rhs; K* pt_x; VecGetArray(rhs, &pt_rhs); VecGetArray(x, &pt_x); MPI_Barrier(PETSC_COMM_WORLD); time = MPI_Wtime(); t_its[i] = HpddmCustomOperatorSolve(&H, n, H._mv, H._precond, pt_rhs, pt_x, 1, &PETSC_COMM_WORLD); MPI_Barrier(PETSC_COMM_WORLD); t_time[i] = MPI_Wtime() - time; VecRestoreArray(x, &pt_x); VecRestoreArray(rhs, &pt_rhs); ierr = ComputeError(A, rhs, x); CHKERRQ(ierr); if (i != (SIZE_ARRAY_R - 1)) { AssembleSystem(A, rhs, s_r[i + 1], x_r[i + 1], y_r[i + 1], z_r[i + 1], r[i + 1], ne, npe, rank, nn, m); ierr = KSPSetOperators(ksp, A, A); CHKERRQ(ierr); ierr = KSPSetUp(ksp); CHKERRQ(ierr); } } for (i = 0; i < SIZE_ARRAY_R; ++i) { ierr = PetscPrintf(PETSC_COMM_WORLD, "%d\t%d\t%f\n", i + 1, t_its[i], t_time[i]); CHKERRQ(ierr); if (i > 0) { t_its[0] += t_its[i]; t_time[0] += t_time[i]; } } if (SIZE_ARRAY_R > 1) { ierr = PetscPrintf(PETSC_COMM_WORLD, "------------------------\n\t%d\t%f\n", t_its[0], t_time[0]); CHKERRQ(ierr); } } ierr = KSPDestroy(&ksp); CHKERRQ(ierr); ierr = VecDestroy(&x); CHKERRQ(ierr); ierr = VecDestroy(&rhs); CHKERRQ(ierr); ierr = MatDestroy(&A); CHKERRQ(ierr); ierr = PetscFree(coords); CHKERRQ(ierr); ierr = PetscFinalize(); return 0; }