/* AppCtxInitialize - Sets initial values for the application context parameters Input: ptr - void user-defined application context Output: ptr - user-defined application context with the default or user-provided parameters */ static int AppCtxInitialize(void *ptr) { AppCtx *user = (AppCtx*)ptr; PetscTruth flg; int info; /* Specify default parameters */ user->mx = user->my = 11; user->b = -0.5; user->t = 0.5; user->l = -0.5; user->r = 0.5; user->fx=0.5; user->fy=0.5; user->bheight=0.0; /* Check for command line arguments that override defaults */ info = PetscOptionsGetInt(TAO_NULL, "-mx", &user->mx, &flg); CHKERRQ(info); info = PetscOptionsGetInt(TAO_NULL, "-my", &user->my, &flg); CHKERRQ(info); info = PetscOptionsGetReal(TAO_NULL, "-bottom", &user->b, &flg); CHKERRQ(info); info = PetscOptionsGetReal(TAO_NULL, "-top", &user->t, &flg); CHKERRQ(info); info = PetscOptionsGetReal(TAO_NULL, "-left", &user->l, &flg); CHKERRQ(info); info = PetscOptionsGetReal(TAO_NULL, "-right", &user->r, &flg); CHKERRQ(info); info = PetscOptionsGetReal(PETSC_NULL,"-bmx",&user->fx,&flg); CHKERRQ(info); info = PetscOptionsGetReal(PETSC_NULL,"-bmy",&user->fy,&flg); CHKERRQ(info); info = PetscOptionsGetReal(PETSC_NULL,"-bheight",&user->bheight,&flg); CHKERRQ(info); user->hx = (user->r - user->l) / (user->mx - 1); user->hy = (user->t - user->b) / (user->my - 1); user->area = 0.5 * user->hx * user->hy; info = PetscLogFlops(8); CHKERRQ(info); return 0; } /* AppCtxInitialize */
static PetscErrorCode PCApply_PBJacobi_7(PC pc,Vec x,Vec y) { PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; PetscErrorCode ierr; PetscInt i,m = jac->mbs; const MatScalar *diag = jac->diag; PetscScalar x0,x1,x2,x3,x4,x5,x6,*xx,*yy; PetscFunctionBegin; ierr = VecGetArray(x,&xx);CHKERRQ(ierr); ierr = VecGetArray(y,&yy);CHKERRQ(ierr); for (i=0; i<m; i++) { x0 = xx[7*i]; x1 = xx[7*i+1]; x2 = xx[7*i+2]; x3 = xx[7*i+3]; x4 = xx[7*i+4]; x5 = xx[7*i+5]; x6 = xx[7*i+6]; yy[7*i] = diag[0]*x0 + diag[7]*x1 + diag[14]*x2 + diag[21]*x3 + diag[28]*x4 + diag[35]*x5 + diag[42]*x6; yy[7*i+1] = diag[1]*x0 + diag[8]*x1 + diag[15]*x2 + diag[22]*x3 + diag[29]*x4 + diag[36]*x5 + diag[43]*x6; yy[7*i+2] = diag[2]*x0 + diag[9]*x1 + diag[16]*x2 + diag[23]*x3 + diag[30]*x4 + diag[37]*x5 + diag[44]*x6; yy[7*i+3] = diag[3]*x0 + diag[10]*x1 + diag[17]*x2 + diag[24]*x3 + diag[31]*x4 + diag[38]*x5 + diag[45]*x6; yy[7*i+4] = diag[4]*x0 + diag[11]*x1 + diag[18]*x2 + diag[25]*x3 + diag[32]*x4 + diag[39]*x5 + diag[46]*x6; yy[7*i+5] = diag[5]*x0 + diag[12]*x1 + diag[19]*x2 + diag[26]*x3 + diag[33]*x4 + diag[40]*x5 + diag[47]*x6; yy[7*i+6] = diag[6]*x0 + diag[13]*x1 + diag[20]*x2 + diag[27]*x3 + diag[34]*x4 + diag[41]*x5 + diag[48]*x6; diag += 49; } ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); ierr = PetscLogFlops(80.0*m);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatSolve_SeqSBSTRM_5_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqSBAIJ *a =(Mat_SeqSBAIJ*)A->data; PetscInt mbs=a->mbs,*ai=a->i,*aj=a->j,bs=A->rmap->bs,bs2 = a->bs2; PetscScalar *x,*b; PetscErrorCode ierr; Mat_SeqSBSTRM *sbstrm = (Mat_SeqSBSTRM*)A->spptr; MatScalar *as =sbstrm->as; PetscFunctionBegin; #if 0 #endif ierr = VecGetArray(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); /* solve U^T * D * y = b by forward substitution */ ierr = PetscMemcpy(x,b,5*mbs*sizeof(PetscScalar));CHKERRQ(ierr); /* x <- b */ ierr = MatForwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);CHKERRQ(ierr); /* solve U*x = y by back substitution */ ierr = MatBackwardSolve_SeqSBSTRM_5_NaturalOrdering(ai,aj,as,mbs,x);CHKERRQ(ierr); ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(4.0*bs2*a->nz - (bs+2.0*bs2)*mbs);CHKERRQ(ierr); PetscFunctionReturn(0); }
static PetscErrorCode PCApply_PBJacobi_5(PC pc,Vec x,Vec y) { PC_PBJacobi *jac = (PC_PBJacobi*)pc->data; PetscErrorCode ierr; PetscInt i,m = jac->mbs; const MatScalar *diag = jac->diag; PetscScalar x0,x1,x2,x3,x4,*xx,*yy; PetscFunctionBegin; ierr = VecGetArray(x,&xx);CHKERRQ(ierr); ierr = VecGetArray(y,&yy);CHKERRQ(ierr); for (i=0; i<m; i++) { x0 = xx[5*i]; x1 = xx[5*i+1]; x2 = xx[5*i+2]; x3 = xx[5*i+3]; x4 = xx[5*i+4]; yy[5*i] = diag[0]*x0 + diag[5]*x1 + diag[10]*x2 + diag[15]*x3 + diag[20]*x4; yy[5*i+1] = diag[1]*x0 + diag[6]*x1 + diag[11]*x2 + diag[16]*x3 + diag[21]*x4; yy[5*i+2] = diag[2]*x0 + diag[7]*x1 + diag[12]*x2 + diag[17]*x3 + diag[22]*x4; yy[5*i+3] = diag[3]*x0 + diag[8]*x1 + diag[13]*x2 + diag[18]*x3 + diag[23]*x4; yy[5*i+4] = diag[4]*x0 + diag[9]*x1 + diag[14]*x2 + diag[19]*x3 + diag[24]*x4; diag += 25; } ierr = VecRestoreArray(x,&xx);CHKERRQ(ierr); ierr = VecRestoreArray(y,&yy);CHKERRQ(ierr); ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* ComputeFunction - Evaluates nonlinear function, F(x). Input Parameters: . X - input vector . user - user-defined application context Output Parameter: . F - function vector */ PetscErrorCode ComputeFunction(AppCtx *user,Vec X,Vec F) { PetscErrorCode ierr; PetscInt i,j,row,mx,my,xs,ys,xm,ym,gxs,gys,gxm,gym; PetscReal two = 2.0,one = 1.0,lambda,hx,hy,hxdhy,hydhx,sc; PetscScalar u,uxx,uyy,*x,*f; Vec localX = user->localX; mx = user->mx; my = user->my; lambda = user->param; hx = one/(PetscReal)(mx-1); hy = one/(PetscReal)(my-1); sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx; /* Scatter ghost points to local vector, using the 2-step process DMGlobalToLocalBegin(), DMGlobalToLocalEnd(). By placing code between these two statements, computations can be done while messages are in transition. */ ierr = DMGlobalToLocalBegin(user->da,X,INSERT_VALUES,localX);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(user->da,X,INSERT_VALUES,localX);CHKERRQ(ierr); /* Get pointers to vector data */ ierr = VecGetArray(localX,&x);CHKERRQ(ierr); ierr = VecGetArray(F,&f);CHKERRQ(ierr); /* Get local grid boundaries */ ierr = DMDAGetCorners(user->da,&xs,&ys,NULL,&xm,&ym,NULL);CHKERRQ(ierr); ierr = DMDAGetGhostCorners(user->da,&gxs,&gys,NULL,&gxm,&gym,NULL);CHKERRQ(ierr); /* Compute function over the locally owned part of the grid */ for (j=ys; j<ys+ym; j++) { row = (j - gys)*gxm + xs - gxs - 1; for (i=xs; i<xs+xm; i++) { row++; if (i == 0 || j == 0 || i == mx-1 || j == my-1) { f[row] = x[row]; continue; } u = x[row]; uxx = (two*u - x[row-1] - x[row+1])*hydhx; uyy = (two*u - x[row-gxm] - x[row+gxm])*hxdhy; f[row] = uxx + uyy - sc*PetscExpScalar(u); } } /* Restore vectors */ ierr = VecRestoreArray(localX,&x);CHKERRQ(ierr); ierr = VecRestoreArray(F,&f);CHKERRQ(ierr); ierr = PetscLogFlops(11.0*ym*xm);CHKERRQ(ierr); return 0; }
PetscErrorCode MatMultTranspose_SeqBSTRM_5(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; Mat_SeqBSTRM *sbstrm = (Mat_SeqBSTRM*)A->spptr; PetscScalar zero = 0.0; PetscScalar *z = 0; PetscScalar *x,*xb; const MatScalar *v1, *v2, *v3, *v4, *v5; PetscScalar x1, x2, x3, x4, x5; PetscErrorCode ierr; PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j; PetscInt nonzerorow=0; PetscInt slen; PetscFunctionBegin; ierr = VecSet(zz,zero);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); slen = 5*(ai[mbs]-ai[0]); v1 = sbstrm->as; v2 = v1 + slen; v3 = v2 + slen; v4 = v3 + slen; v5 = v4 + slen; xb = x; for (i=0; i<mbs; i++) { n = ai[i+1] - ai[i]; nonzerorow += (n>0); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; x5 = xb[4]; xb += 5; ib = aj + ai[i]; PetscPrefetchBlock(ib+n,n,0,PETSC_PREFETCH_HINT_NTA); /* Indices for the next row (assumes same size as this one) */ PetscPrefetchBlock(v1+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ PetscPrefetchBlock(v2+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ PetscPrefetchBlock(v3+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ PetscPrefetchBlock(v4+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ PetscPrefetchBlock(v5+5*n,5*n,0,PETSC_PREFETCH_HINT_NTA); /* Entries for the next row */ for (j=0; j<n; j++) { cval = ib[j]*5; z[cval] += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4 + v5[0]*x5; z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4 + v5[1]*x5; z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4 + v5[2]*x5; z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4 + v5[3]*x5; z[cval+4] += v1[4]*x1 + v2[4]*x2 + v3[4]*x3 + v4[4]*x4 + v5[4]*x5; v1 += 5; v2 += 5; v3 += 5; v4 += 5; v5 += 5; } } ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); ierr = PetscLogFlops(50.0*a->nz - 5.0*nonzerorow);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* FormFunction - Evaluates the function, f(X). Input Parameters: . tao - the Tao context . X - the input vector . ptr - optional user-defined context, as set by TaoSetFunction() Output Parameters: . f - the newly evaluated function */ PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr) { AppCtx *user = (AppCtx *) ptr; PetscReal hx = user->hx, hy = user->hy, area, three = 3.0, p5 = 0.5; PetscReal zero = 0.0, vb, vl, vr, vt, dvdx, dvdy, flin = 0.0, fquad = 0.0; PetscReal v, cdiv3 = user->param/three; PetscReal *x; PetscErrorCode ierr; PetscInt nx = user->mx, ny = user->my, i, j, k; /* Get pointer to vector data */ ierr = VecGetArray(X,&x);CHKERRQ(ierr); /* Compute function contributions over the lower triangular elements */ for (j=-1; j<ny; j++) { for (i=-1; i<nx; i++) { k = nx*j + i; v = zero; vr = zero; vt = zero; if (i >= 0 && j >= 0) v = x[k]; if (i < nx-1 && j > -1) vr = x[k+1]; if (i > -1 && j < ny-1) vt = x[k+nx]; dvdx = (vr-v)/hx; dvdy = (vt-v)/hy; fquad += dvdx*dvdx + dvdy*dvdy; flin -= cdiv3*(v+vr+vt); } } /* Compute function contributions over the upper triangular elements */ for (j=0; j<=ny; j++) { for (i=0; i<=nx; i++) { k = nx*j + i; vb = zero; vl = zero; v = zero; if (i < nx && j > 0) vb = x[k-nx]; if (i > 0 && j < ny) vl = x[k-1]; if (i < nx && j < ny) v = x[k]; dvdx = (v-vl)/hx; dvdy = (v-vb)/hy; fquad = fquad + dvdx*dvdx + dvdy*dvdy; flin = flin - cdiv3*(vb+vl+v); } } /* Restore vector */ ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); /* Scale the function */ area = p5*hx*hy; *f = area*(p5*fquad+flin); ierr = PetscLogFlops(nx*ny*24);CHKERRQ(ierr); return 0; }
/* Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL - the scatter is used only in the parallel version */ PetscErrorCode MatMult_AIJCRL(Mat A,Vec xx,Vec yy) { Mat_AIJCRL *aijcrl = (Mat_AIJCRL*) A->spptr; PetscInt m = aijcrl->m; /* Number of rows in the matrix. */ PetscInt rmax = aijcrl->rmax,*icols = aijcrl->icols; PetscScalar *acols = aijcrl->acols; PetscErrorCode ierr; PetscScalar *x,*y; #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) PetscInt i,j,ii; #endif #if defined(PETSC_HAVE_PRAGMA_DISJOINT) #pragma disjoint(*x,*y,*aa) #endif PetscFunctionBegin; if (aijcrl->xscat) { ierr = VecCopy(xx,aijcrl->xwork);CHKERRQ(ierr); /* get remote values needed for local part of multiply */ ierr = VecScatterBegin(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); ierr = VecScatterEnd(aijcrl->xscat,xx,aijcrl->fwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); xx = aijcrl->xwork; } ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(yy,&y);CHKERRQ(ierr); #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL) fortranmultcrl_(&m,&rmax,x,y,icols,acols); #else /* first column */ for (j=0; j<m; j++) y[j] = acols[j]*x[icols[j]]; /* other columns */ #if defined(PETSC_HAVE_CRAY_VECTOR) #pragma _CRI preferstream #endif for (i=1; i<rmax; i++) { ii = i*m; #if defined(PETSC_HAVE_CRAY_VECTOR) #pragma _CRI prefervector #endif for (j=0; j<m; j++) y[j] = y[j] + acols[ii+j]*x[icols[ii+j]]; } #if defined(PETSC_HAVE_CRAY_VECTOR) #pragma _CRI ivdep #endif #endif ierr = PetscLogFlops(2.0*aijcrl->nz - m);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode VecNorm_MPI(Vec xin,NormType type,PetscReal *z) { Vec_MPI *x = (Vec_MPI*)xin->data; PetscReal sum,work = 0.0; PetscScalar *xx = x->array; PetscErrorCode ierr; PetscInt n = xin->map->n; PetscFunctionBegin; if (type == NORM_2 || type == NORM_FROBENIUS) { #if defined(PETSC_HAVE_SLOW_BLAS_NORM2) #if defined(PETSC_USE_FORTRAN_KERNEL_NORM) fortrannormsqr_(xx,&n,&work); #elif defined(PETSC_USE_UNROLLED_NORM) switch (n & 0x3) { case 3: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; case 2: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; case 1: work += PetscRealPart(xx[0]*PetscConj(xx[0])); xx++; n -= 4; } while (n>0) { work += PetscRealPart(xx[0]*PetscConj(xx[0])+xx[1]*PetscConj(xx[1])+ xx[2]*PetscConj(xx[2])+xx[3]*PetscConj(xx[3])); xx += 4; n -= 4; } #else {PetscInt i; for (i=0; i<n; i++) work += PetscRealPart((xx[i])*(PetscConj(xx[i])));} #endif #else {PetscBLASInt one = 1,bn = PetscBLASIntCast(n); work = BLASnrm2_(&bn,xx,&one); work *= work; } #endif ierr = MPI_Allreduce(&work,&sum,1,MPIU_REAL,MPI_SUM,((PetscObject)xin)->comm);CHKERRQ(ierr); *z = sqrt(sum); ierr = PetscLogFlops(2.0*xin->map->n);CHKERRQ(ierr); } else if (type == NORM_1) { /* Find the local part */ ierr = VecNorm_Seq(xin,NORM_1,&work);CHKERRQ(ierr); /* Find the global max */ ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_SUM,((PetscObject)xin)->comm);CHKERRQ(ierr); } else if (type == NORM_INFINITY) { /* Find the local max */ ierr = VecNorm_Seq(xin,NORM_INFINITY,&work);CHKERRQ(ierr); /* Find the global max */ ierr = MPI_Allreduce(&work,z,1,MPIU_REAL,MPI_MAX,((PetscObject)xin)->comm);CHKERRQ(ierr); } else if (type == NORM_1_AND_2) { PetscReal temp[2]; ierr = VecNorm_Seq(xin,NORM_1,temp);CHKERRQ(ierr); ierr = VecNorm_Seq(xin,NORM_2,temp+1);CHKERRQ(ierr); temp[1] = temp[1]*temp[1]; ierr = MPI_Allreduce(temp,z,2,MPIU_REAL,MPI_SUM,((PetscObject)xin)->comm);CHKERRQ(ierr); z[1] = sqrt(z[1]); } PetscFunctionReturn(0); }
PetscErrorCode VecAYPX_Seq(Vec yin,PetscScalar alpha,Vec xin) { PetscErrorCode ierr; PetscInt n = yin->map->n; PetscScalar *yy; const PetscScalar *xx; PetscFunctionBegin; if (alpha == (PetscScalar)0.0) { ierr = VecCopy(xin,yin);CHKERRQ(ierr); } else if (alpha == (PetscScalar)1.0) { ierr = VecAXPY_Seq(yin,alpha,xin);CHKERRQ(ierr); } else if (alpha == (PetscScalar)-1.0) { PetscInt i; ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr); ierr = VecGetArray(yin,&yy);CHKERRQ(ierr); for (i=0; i<n; i++) { yy[i] = xx[i] - yy[i]; } ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr); ierr = VecRestoreArray(yin,&yy);CHKERRQ(ierr); ierr = PetscLogFlops(1.0*n);CHKERRQ(ierr); } else { ierr = VecGetArrayRead(xin,&xx);CHKERRQ(ierr); ierr = VecGetArray(yin,&yy);CHKERRQ(ierr); #if defined(PETSC_USE_FORTRAN_KERNEL_AYPX) { PetscScalar oalpha = alpha; fortranaypx_(&n,&oalpha,xx,yy); } #else { PetscInt i; for (i=0; i<n; i++) { yy[i] = xx[i] + alpha*yy[i]; } } #endif ierr = VecRestoreArrayRead(xin,&xx);CHKERRQ(ierr); ierr = VecRestoreArray(yin,&yy);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr); } PetscFunctionReturn(0); }
/* RHSFunction - Evaluates nonlinear function, F(u). Input Parameters: . ts - the TS context . U - input vector . ptr - optional user-defined context, as set by TSSetFunction() Output Parameter: . F - function vector */ PetscErrorCode RHSFunction(TS ts,PetscReal ftime,Vec U,Vec F,void *ptr) { /* PETSC_UNUSED AppCtx *user=(AppCtx*)ptr; */ DM da; PetscErrorCode ierr; PetscInt i,j,Mx,My,xs,ys,xm,ym; PetscReal two = 2.0,hx,hy,sx,sy; PetscScalar u,uxx,uyy,**uarray,**f; Vec localU; PetscFunctionBeginUser; ierr = TSGetDM(ts,&da);CHKERRQ(ierr); ierr = DMGetLocalVector(da,&localU);CHKERRQ(ierr); ierr = DMDAGetInfo(da,PETSC_IGNORE,&Mx,&My,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE, PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE,PETSC_IGNORE); hx = 1.0/(PetscReal)(Mx-1); sx = 1.0/(hx*hx); hy = 1.0/(PetscReal)(My-1); sy = 1.0/(hy*hy); /* Scatter ghost points to local vector,using the 2-step process DMGlobalToLocalBegin(),DMGlobalToLocalEnd(). By placing code between these two statements, computations can be done while messages are in transition. */ ierr = DMGlobalToLocalBegin(da,U,INSERT_VALUES,localU);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(da,U,INSERT_VALUES,localU);CHKERRQ(ierr); /* Get pointers to vector data */ ierr = DMDAVecGetArray(da,localU,&uarray);CHKERRQ(ierr); ierr = DMDAVecGetArray(da,F,&f);CHKERRQ(ierr); /* Get local grid boundaries */ ierr = DMDAGetCorners(da,&xs,&ys,NULL,&xm,&ym,NULL);CHKERRQ(ierr); /* Compute function over the locally owned part of the grid */ for (j=ys; j<ys+ym; j++) { for (i=xs; i<xs+xm; i++) { if (i == 0 || j == 0 || i == Mx-1 || j == My-1) { f[j][i] = uarray[j][i]; continue; } u = uarray[j][i]; uxx = (-two*u + uarray[j][i-1] + uarray[j][i+1])*sx; uyy = (-two*u + uarray[j-1][i] + uarray[j+1][i])*sy; f[j][i] = uxx + uyy; } } /* Restore vectors */ ierr = DMDAVecRestoreArray(da,localU,&uarray);CHKERRQ(ierr); ierr = DMDAVecRestoreArray(da,F,&f);CHKERRQ(ierr); ierr = DMRestoreLocalVector(da,&localU);CHKERRQ(ierr); ierr = PetscLogFlops(11.0*ym*xm);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode MatMult_SeqBSTRM_4(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; Mat_SeqBSTRM *bstrm = (Mat_SeqBSTRM*)A->spptr; PetscScalar *z = 0,sum1,sum2,sum3,sum4,x1,x2,x3,x4,*zarray; const PetscScalar *x,*xb; const MatScalar *v1, *v2, *v3, *v4; PetscErrorCode ierr; PetscInt mbs,i,*idx,*ii,j,n,*ridx=NULL,nonzerorow=0; PetscBool usecprow=a->compressedrow.use; PetscInt slen; PetscFunctionBegin; ierr = VecGetArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&zarray);CHKERRQ(ierr); idx = a->j; if (usecprow) { mbs = a->compressedrow.nrows; ii = a->compressedrow.i; ridx = a->compressedrow.rindex; } else { mbs = a->mbs; ii = a->i; z = zarray; } slen = 4*(ii[mbs]-ii[0]); v1 = bstrm->as; v2 = v1 + slen; v3 = v2 + slen; v4 = v3 + slen; for (i=0; i<mbs; i++) { n = ii[1] - ii[0]; ii++; sum1 = 0.0; sum2 = 0.0; sum3 = 0.0; sum4 = 0.0; nonzerorow += (n>0); for (j=0; j<n; j++) { xb = x + 4*(*idx++); x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; sum1 += v1[0]*x1 + v1[1]*x2 + v1[2]*x3 + v1[3]*x4; sum2 += v2[0]*x1 + v2[1]*x2 + v2[2]*x3 + v2[3]*x4; sum3 += v3[0]*x1 + v3[1]*x2 + v3[2]*x3 + v3[3]*x4; sum4 += v4[0]*x1 + v4[1]*x2 + v4[2]*x3 + v4[3]*x4; v1 += 4; v2 += 4; v3 += 4; v4 += 4; } if (usecprow) z = zarray + 4*ridx[i]; z[0] = sum1; z[1] = sum2; z[2] = sum3; z[3] = sum4; if (!usecprow) z += 4; } ierr = VecRestoreArray(xx,(PetscScalar**)&x);CHKERRQ(ierr); ierr = VecRestoreArray(zz,&zarray);CHKERRQ(ierr); ierr = PetscLogFlops(32*a->nz - 4*nonzerorow);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* Evaluates FU = Gradiant(L(w,u,lambda)) This local function acts on the ghosted version of U (accessed via DMCompositeGetLocalVectors() and DMCompositeScatter()) BUT the global, nonghosted version of FU (via DMCompositeGetAccess()). */ PetscErrorCode ComputeFunction(SNES snes,Vec U,Vec FU,void *ctx) { PetscErrorCode ierr; PetscInt xs,xm,i,N; ULambda *u_lambda,*fu_lambda; PetscScalar d,h,*w,*fw; Vec vw,vfw,vu_lambda,vfu_lambda; DM packer,red,da; PetscFunctionBeginUser; ierr = VecGetDM(U, &packer);CHKERRQ(ierr); ierr = DMCompositeGetEntries(packer,&red,&da);CHKERRQ(ierr); ierr = DMCompositeGetLocalVectors(packer,&vw,&vu_lambda);CHKERRQ(ierr); ierr = DMCompositeScatter(packer,U,vw,vu_lambda);CHKERRQ(ierr); ierr = DMCompositeGetAccess(packer,FU,&vfw,&vfu_lambda);CHKERRQ(ierr); ierr = DMDAGetCorners(da,&xs,NULL,NULL,&xm,NULL,NULL);CHKERRQ(ierr); ierr = DMDAGetInfo(da,0,&N,0,0,0,0,0,0,0,0,0,0,0);CHKERRQ(ierr); ierr = VecGetArray(vw,&w);CHKERRQ(ierr); ierr = VecGetArray(vfw,&fw);CHKERRQ(ierr); ierr = DMDAVecGetArray(da,vu_lambda,&u_lambda);CHKERRQ(ierr); ierr = DMDAVecGetArray(da,vfu_lambda,&fu_lambda);CHKERRQ(ierr); d = N-1.0; h = 1.0/d; /* derivative of L() w.r.t. w */ if (xs == 0) { /* only first processor computes this */ fw[0] = -2.0*d*u_lambda[0].lambda; } /* derivative of L() w.r.t. u */ for (i=xs; i<xs+xm; i++) { if (i == 0) fu_lambda[0].lambda = h*u_lambda[0].u + 2.*d*u_lambda[0].lambda - d*u_lambda[1].lambda; else if (i == 1) fu_lambda[1].lambda = 2.*h*u_lambda[1].u + 2.*d*u_lambda[1].lambda - d*u_lambda[2].lambda; else if (i == N-1) fu_lambda[N-1].lambda = h*u_lambda[N-1].u + 2.*d*u_lambda[N-1].lambda - d*u_lambda[N-2].lambda; else if (i == N-2) fu_lambda[N-2].lambda = 2.*h*u_lambda[N-2].u + 2.*d*u_lambda[N-2].lambda - d*u_lambda[N-3].lambda; else fu_lambda[i].lambda = 2.*h*u_lambda[i].u - d*(u_lambda[i+1].lambda - 2.0*u_lambda[i].lambda + u_lambda[i-1].lambda); } /* derivative of L() w.r.t. lambda */ for (i=xs; i<xs+xm; i++) { if (i == 0) fu_lambda[0].u = 2.0*d*(u_lambda[0].u - w[0]); else if (i == N-1) fu_lambda[N-1].u = 2.0*d*u_lambda[N-1].u; else fu_lambda[i].u = -(d*(u_lambda[i+1].u - 2.0*u_lambda[i].u + u_lambda[i-1].u) - 2.0*h); } ierr = VecRestoreArray(vw,&w);CHKERRQ(ierr); ierr = VecRestoreArray(vfw,&fw);CHKERRQ(ierr); ierr = DMDAVecRestoreArray(da,vu_lambda,&u_lambda);CHKERRQ(ierr); ierr = DMDAVecRestoreArray(da,vfu_lambda,&fu_lambda);CHKERRQ(ierr); ierr = DMCompositeRestoreLocalVectors(packer,&vw,&vu_lambda);CHKERRQ(ierr); ierr = DMCompositeRestoreAccess(packer,FU,&vfw,&vfu_lambda);CHKERRQ(ierr); ierr = PetscLogFlops(13.0*N);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* MSA_InitialPoint - Calculates the initial guess in one of three ways. Input Parameters: . user - user-defined application context . X - vector for initial guess Output Parameters: . X - newly computed initial guess */ static int MSA_InitialPoint(AppCtx * user, Vec X) { int info; PetscInt start2=-1,i,j; PetscReal start1=0; PetscTruth flg1,flg2; info = PetscOptionsGetReal(PETSC_NULL,"-start",&start1,&flg1); CHKERRQ(info); info = PetscOptionsGetInt(PETSC_NULL,"-random",&start2,&flg2); CHKERRQ(info); if (flg1){ /* The zero vector is reasonable */ info = VecSet(X, start1); CHKERRQ(info); } else if (flg2 && start2>0){ /* Try a random start between -0.5 and 0.5 */ PetscRandom rctx; PetscScalar np5=-0.5; info = PetscRandomCreate(PETSC_COMM_WORLD,&rctx); CHKERRQ(info); for (i=0; i<start2; i++){ info = VecSetRandom(X, rctx); CHKERRQ(info); } info = PetscRandomDestroy(rctx); CHKERRQ(info); info = VecShift(X, np5); CHKERRQ(info); } else { /* Take an average of the boundary conditions */ PetscInt xs,xm,ys,ym; PetscInt mx=user->mx,my=user->my; PetscScalar **x; /* Get local mesh boundaries */ info = DAGetCorners(user->da,&xs,&ys,PETSC_NULL,&xm,&ym,PETSC_NULL); CHKERRQ(info); /* Get pointers to vector data */ info = DAVecGetArray(user->da,X,(void**)&x); /* Perform local computations */ for (j=ys; j<ys+ym; j++){ for (i=xs; i< xs+xm; i++){ x[j][i] = ( ((j+1)*user->bottom[i-xs+1]+(my-j+1)*user->top[i-xs+1])/(my+2)+ ((i+1)*user->left[j-ys+1]+(mx-i+1)*user->right[j-ys+1])/(mx+2))/2.0; } } /* Restore vectors */ info = DAVecRestoreArray(user->da,X,(void**)&x); CHKERRQ(info); info = PetscLogFlops(9*xm*ym); CHKERRQ(info); } return 0; }
PetscErrorCode VecPointwiseMax_Seq(Vec win,Vec xin,Vec yin) { PetscErrorCode ierr; PetscFunctionBegin; ierr = PetscThreadCommRunKernel3(PetscObjectComm((PetscObject)win),(PetscThreadKernel)VecPointwiseMax_kernel,win,xin,yin); CHKERRQ(ierr); ierr = PetscLogFlops(win->map->n); CHKERRQ(ierr); PetscFunctionReturn(0); }
/* Special case where the matrix was ILU(0) factored in the natural ordering. This eliminates the need for the column and row permutation. */ PetscErrorCode MatSolve_SeqBAIJ_1_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; const PetscInt n = a->mbs,*vi,*ai=a->i,*aj=a->j,*diag=a->diag; PetscErrorCode ierr; const MatScalar *aa=a->a,*v; PetscScalar *x; const PetscScalar *b; PetscScalar s1,x1; PetscInt jdx,idt,idx,nz,i; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); /* forward solve the lower triangular */ idx = 0; x[0] = b[0]; for (i=1; i<n; i++) { v = aa + ai[i]; vi = aj + ai[i]; nz = diag[i] - ai[i]; idx += 1; s1 = b[idx]; while (nz--) { jdx = *vi++; x1 = x[jdx]; s1 -= v[0]*x1; v += 1; } x[idx] = s1; } /* backward solve the upper triangular */ for (i=n-1; i>=0; i--) { v = aa + diag[i] + 1; vi = aj + diag[i] + 1; nz = ai[i+1] - diag[i] - 1; idt = i; s1 = x[idt]; while (nz--) { idx = *vi++; x1 = x[idx]; s1 -= v[0]*x1; v += 1; } v = aa + diag[i]; x[idt] = v[0]*s1; } ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*(a->nz) - A->cmap->n);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode EvaluateFunction(Tao tao, Vec X, Vec F, void *ptr) { AppCtx *user = (AppCtx *)ptr; PetscInt i; PetscReal *x,*f; PetscErrorCode ierr; PetscFunctionBegin; ierr = VecGetArray(X,&x);CHKERRQ(ierr); ierr = VecGetArray(F,&f);CHKERRQ(ierr); if (user->size == 1) { /* Single processor */ for (i=0;i<NOBSERVATIONS;i++) { ierr = RunSimulation(x,i,&f[i],user);CHKERRQ(ierr); } } else { /* Multiprocessor master */ PetscMPIInt tag; PetscInt finishedtasks,next_task,checkedin; PetscReal f_i; MPI_Status status; next_task=0; finishedtasks=0; checkedin=0; while(finishedtasks < NOBSERVATIONS || checkedin < user->size-1) { ierr = MPI_Recv(&f_i,1,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&status);CHKERRQ(ierr); if (status.MPI_TAG == IDLE_TAG) { checkedin++; } else { tag = status.MPI_TAG; f[tag] = (PetscReal)f_i; finishedtasks++; } if (next_task<NOBSERVATIONS) { ierr = MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,next_task,PETSC_COMM_WORLD);CHKERRQ(ierr); next_task++; } else { /* Send idle message */ ierr = MPI_Send(x,NPARAMETERS,MPIU_REAL,status.MPI_SOURCE,IDLE_TAG,PETSC_COMM_WORLD);CHKERRQ(ierr); } } } ierr = VecRestoreArray(X,&x);CHKERRQ(ierr); ierr = VecRestoreArray(F,&f);CHKERRQ(ierr); PetscLogFlops(6*NOBSERVATIONS); PetscFunctionReturn(0); }
PetscErrorCode MatScale_MPIDense(Mat inA,PetscScalar alpha) { Mat_MPIDense *A = (Mat_MPIDense*)inA->data; Mat_SeqDense *a = (Mat_SeqDense*)A->A->data; PetscScalar oalpha = alpha; PetscErrorCode ierr; PetscBLASInt one = 1,nz = PetscBLASIntCast(inA->rmap->n*inA->cmap->N); PetscFunctionBegin; BLASscal_(&nz,&oalpha,a->v,&one); ierr = PetscLogFlops(nz);CHKERRQ(ierr); PetscFunctionReturn(0); }
static int MatMult_SeqAIJ_DXML(Mat A,Vec x,Vec y) { Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; PetscScalar *xx,*yy; int ierr,zero = 0; PetscFunctionBegin; ierr = VecGetArray(x,&xx);CHKERRQ(ierr); ierr = VecGetArray(y,&yy);CHKERRQ(ierr); dmatvec_genr_(&zero,a->a,a->i,a->j,&a->nz,0,xx,yy,&A->rmap->n); ierr = PetscLogFlops(2.0*a->nz - A->rmap->n);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode FormIFunctionLocal(DMDALocalInfo *info,PetscScalar ptime,PetscScalar **x,PetscScalar **xt,PetscScalar **f,void *ctx) { PetscErrorCode ierr; PetscInt i,j; PetscReal hx,hy,hxdhy,hydhx,scale; PetscScalar u,uxx,uyy; Vec C; PetscScalar **c; PetscFunctionBeginUser; ierr = DMGetNamedGlobalVector(info->da,"coefficient",&C);CHKERRQ(ierr); ierr = DMDAVecGetArray(info->da,C,&c);CHKERRQ(ierr); hx = 10.0/((PetscReal)(info->mx-1)); hy = 10.0/((PetscReal)(info->my-1)); /* dhx = 1. / hx; dhy = 1. / hy; */ hxdhy = hx/hy; hydhx = hy/hx; scale = hx*hy; for (j=info->ys; j<info->ys+info->ym; j++) { for (i=info->xs; i<info->xs+info->xm; i++) { f[j][i] = xt[j][i]*scale; if (i == 0) { /* f[j][i] += (x[j][i] - x[j][i+1])*dhx; */ } else if (i == info->mx-1) { /* f[j][i] += (x[j][i] - x[j][i-1])*dhx; */ } else if (j == 0) { /* f[j][i] += (x[j][i] - x[j+1][i])*dhy; */ } else if (j == info->my-1) { /* f[j][i] += (x[j][i] - x[j-1][i])*dhy; */ } else { u = x[j][i]; uyy = (2.0*u - x[j-1][i] - x[j+1][i])*hxdhy; uxx = (2.0*u - x[j][i-1] - x[j][i+1])*hydhx; f[j][i] += c[j][i]*(uxx + uyy); } } } ierr = PetscLogFlops(11.*info->ym*info->xm);CHKERRQ(ierr); ierr = DMDAVecRestoreArray(info->da,C,&c);CHKERRQ(ierr); ierr = DMRestoreNamedGlobalVector(info->da,"coefficient",&C);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* FormFunctionLocalMMS2 - Evaluates nonlinear function, F(x) on local process patch */ PetscErrorCode FormFunctionLocalMMS2(DMDALocalInfo *info,PetscScalar **vx,PetscScalar **f,AppCtx *user) { PetscErrorCode ierr; PetscInt i,j; PetscReal lambda,hx,hy,hxdhy,hydhx; PetscScalar u,ue,uw,un,us,uxx,uyy; PetscReal x,y; DM coordDA; Vec coordinates; DMDACoor2d **coords; PetscFunctionBeginUser; lambda = user->param; hx = 1.0/(PetscReal)(info->mx-1); hy = 1.0/(PetscReal)(info->my-1); hxdhy = hx/hy; hydhx = hy/hx; /* Extract coordinates */ ierr = DMGetCoordinateDM(info->da, &coordDA);CHKERRQ(ierr); ierr = DMGetCoordinates(info->da, &coordinates);CHKERRQ(ierr); ierr = DMDAVecGetArray(coordDA, coordinates, &coords);CHKERRQ(ierr); /* Compute function over the locally owned part of the grid */ for (j=info->ys; j<info->ys+info->ym; j++) { for (i=info->xs; i<info->xs+info->xm; i++) { if (i == 0 || j == 0 || i == info->mx-1 || j == info->my-1) { f[j][i] = 2.0*(hydhx+hxdhy)*vx[j][i]; } else { x = PetscRealPart(coords[j][i].x); y = PetscRealPart(coords[j][i].y); u = vx[j][i]; uw = vx[j][i-1]; ue = vx[j][i+1]; un = vx[j-1][i]; us = vx[j+1][i]; if (i-1 == 0) uw = 0.; if (i+1 == info->mx-1) ue = 0.; if (j-1 == 0) un = 0.; if (j+1 == info->my-1) us = 0.; uxx = (2.0*u - uw - ue)*hydhx; uyy = (2.0*u - un - us)*hxdhy; f[j][i] = uxx + uyy - hx*hy*(lambda*PetscExpScalar(u) + 2*PetscSqr(PETSC_PI)*PetscSinReal(PETSC_PI*x)*PetscSinReal(PETSC_PI*y) - lambda*exp(PetscSinReal(PETSC_PI*x)*PetscSinReal(PETSC_PI*y))); } } } ierr = DMDAVecRestoreArray(coordDA, coordinates, &coords);CHKERRQ(ierr); ierr = PetscLogFlops(11.0*info->ym*info->xm);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode FormFunction1(SNES snes,Vec X,Vec F,void *ptr) { AppCtx *user = (AppCtx*)ptr; PetscErrorCode ierr; PetscInt i,j,k,loc,mx,my,mz,xs,ys,zs,xm,ym,zm,Xs,Ys,Zs,Xm,Ym,Zm,base1,base2; PetscReal two = 2.0,one = 1.0,lambda,Hx,Hy,Hz,HxHzdHy,HyHzdHx,HxHydHz; PetscScalar u,uxx,uyy,sc,*x,*f,uzz; Vec localX = user->localX,localF = user->localF; mx = user->mx; my = user->my; mz = user->mz; lambda = user->param; Hx = one / (PetscReal)(mx-1); Hy = one / (PetscReal)(my-1); Hz = one / (PetscReal)(mz-1); sc = Hx*Hy*Hz*lambda; HxHzdHy = Hx*Hz/Hy; HyHzdHx = Hy*Hz/Hx; HxHydHz = Hx*Hy/Hz; ierr = DAGlobalToLocalBegin(user->da,X,INSERT_VALUES,localX); ierr = DAGlobalToLocalEnd(user->da,X,INSERT_VALUES,localX); ierr = VecGetArray(localX,&x);CHKERRQ(ierr); ierr = VecGetArray(localF,&f);CHKERRQ(ierr); ierr = DAGetCorners(user->da,&xs,&ys,&zs,&xm,&ym,&zm);CHKERRQ(ierr); ierr = DAGetGhostCorners(user->da,&Xs,&Ys,&Zs,&Xm,&Ym,&Zm);CHKERRQ(ierr); for (k=zs; k<zs+zm; k++) { base1 = (Xm*Ym)*(k-Zs); for (j=ys; j<ys+ym; j++) { base2 = base1 + (j-Ys)*Xm; for (i=xs; i<xs+xm; i++) { loc = base2 + (i-Xs); if (i == 0 || j == 0 || k== 0 || i == mx-1 || j == my-1 || k == mz-1) { f[loc] = x[loc]; } else { u = x[loc]; uxx = (two*u - x[loc-1] - x[loc+1])*HyHzdHx; uyy = (two*u - x[loc-Xm] - x[loc+Xm])*HxHzdHy; uzz = (two*u - x[loc-Xm*Ym] - x[loc+Xm*Ym])*HxHydHz; f[loc] = uxx + uyy + uzz - sc*PetscExpScalar(u); } } } } ierr = VecRestoreArray(localX,&x);CHKERRQ(ierr); ierr = VecRestoreArray(localF,&f);CHKERRQ(ierr); /* stick values into global vector */ ierr = DALocalToGlobal(user->da,localF,INSERT_VALUES,F);CHKERRQ(ierr); ierr = PetscLogFlops(11.0*ym*xm*zm);CHKERRQ(ierr); return 0; }
PetscErrorCode MatSolveTranspose_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscErrorCode ierr; const PetscInt *adiag = a->diag,*ai = a->i,*aj = a->j,*vi; PetscInt i,n = a->mbs,j; PetscInt nz; PetscScalar *x,*tmp,s1; const MatScalar *aa = a->a,*v; const PetscScalar *b; PetscFunctionBegin; ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); tmp = a->solve_work; /* copy the b into temp work space according to permutation */ for (i=0; i<n; i++) tmp[i] = b[i]; /* forward solve the U^T */ for (i=0; i<n; i++) { v = aa + adiag[i+1] + 1; vi = aj + adiag[i+1] + 1; nz = adiag[i] - adiag[i+1] - 1; s1 = tmp[i]; s1 *= v[nz]; /* multiply by inverse of diagonal entry */ for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; tmp[i] = s1; } /* backward solve the L^T */ for (i=n-1; i>=0; i--) { v = aa + ai[i]; vi = aj + ai[i]; nz = ai[i+1] - ai[i]; s1 = tmp[i]; for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; } /* copy tmp into x according to permutation */ for (i=0; i<n; i++) x[i] = tmp[i]; ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* FormObjectiveLocal - Evaluates nonlinear function, F(x) on local process patch */ PetscErrorCode FormObjectiveLocal(DMDALocalInfo *info,PetscScalar **x,PetscReal *obj,AppCtx *user) { PetscErrorCode ierr; PetscInt i,j; PetscReal lambda,hx,hy,hxdhy,hydhx,sc,lobj=0; PetscScalar u,ue,uw,un,us,uxux,uyuy; MPI_Comm comm; PetscFunctionBeginUser; *obj = 0; ierr = PetscObjectGetComm((PetscObject)info,&comm);CHKERRQ(ierr); lambda = user->param; hx = 1.0/(PetscReal)(info->mx-1); hy = 1.0/(PetscReal)(info->my-1); sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx; /* Compute function over the locally owned part of the grid */ for (j=info->ys; j<info->ys+info->ym; j++) { for (i=info->xs; i<info->xs+info->xm; i++) { if (i == 0 || j == 0 || i == info->mx-1 || j == info->my-1) { lobj += PetscRealPart((hydhx + hxdhy)*x[j][i]*x[j][i]); } else { u = x[j][i]; uw = x[j][i-1]; ue = x[j][i+1]; un = x[j-1][i]; us = x[j+1][i]; if (i-1 == 0) uw = 0.; if (i+1 == info->mx-1) ue = 0.; if (j-1 == 0) un = 0.; if (j+1 == info->my-1) us = 0.; /* F[u] = 1/2\int_{\omega}\nabla^2u(x)*u(x)*dx */ uxux = u*(2.*u - ue - uw)*hydhx; uyuy = u*(2.*u - un - us)*hxdhy; lobj += PetscRealPart(0.5*(uxux + uyuy) - sc*PetscExpScalar(u)); } } } ierr = PetscLogFlops(12.0*info->ym*info->xm);CHKERRQ(ierr); ierr = MPI_Allreduce(&lobj,obj,1,MPIU_REAL,MPIU_SUM,comm);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* FormHessian - Evaluates Hessian matrix. Input Parameters: . tao - the Tao context . x - input vector . ptr - optional user-defined context, as set by TaoSetHessian() Output Parameters: . H - Hessian matrix Note: Providing the Hessian may not be necessary. Only some solvers require this matrix. */ PetscErrorCode FormHessian(Tao tao,Vec X,Mat H, Mat Hpre, void *ptr) { AppCtx *user = (AppCtx*)ptr; PetscErrorCode ierr; PetscInt i, ind[2]; PetscReal alpha=user->alpha; PetscReal v[2][2]; const PetscScalar *x; PetscBool assembled; PetscFunctionBeginUser; /* Zero existing matrix entries */ ierr = MatAssembled(H,&assembled);CHKERRQ(ierr); if (assembled){ierr = MatZeroEntries(H); CHKERRQ(ierr);} /* Get a pointer to vector data */ ierr = VecGetArrayRead(X,&x);CHKERRQ(ierr); /* Compute H(X) entries */ if (user->chained) { ierr = MatZeroEntries(H);CHKERRQ(ierr); for (i=0; i<user->n-1; i++) { PetscScalar t1 = x[i+1] - x[i]*x[i]; v[0][0] = 2 + 2*alpha*(t1*(-2) - 2*x[i]); v[0][1] = 2*alpha*(-2*x[i]); v[1][0] = 2*alpha*(-2*x[i]); v[1][1] = 2*alpha*t1; ind[0] = i; ind[1] = i+1; ierr = MatSetValues(H,2,ind,2,ind,v[0],ADD_VALUES);CHKERRQ(ierr); } } else { for (i=0; i<user->n/2; i++){ v[1][1] = 2*alpha; v[0][0] = -4*alpha*(x[2*i+1]-3*x[2*i]*x[2*i]) + 2; v[1][0] = v[0][1] = -4.0*alpha*x[2*i]; ind[0]=2*i; ind[1]=2*i+1; ierr = MatSetValues(H,2,ind,2,ind,v[0],INSERT_VALUES);CHKERRQ(ierr); } } ierr = VecRestoreArrayRead(X,&x);CHKERRQ(ierr); /* Assemble matrix */ ierr = MatAssemblyBegin(H,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(H,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = PetscLogFlops(9.0*user->n/2.0);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode FormFunction(SNES snes,Vec X,Vec F,void *ptr) { AppCtx *user = (AppCtx *) ptr; PetscInt i, j, row, mx, my, xs, ys, xm, ym, Xs, Ys, Xm, Ym; PetscErrorCode ierr; double two = 2.0, one = 1.0, lambda,hx, hy, hxdhy, hydhx,sc; PetscScalar u, uxx, uyy, *x,*f; Vec localX = user->fine.localX, localF = user->fine.localF; mx = user->fine.mx; my = user->fine.my; lambda = user->param; hx = one/(double)(mx-1); hy = one/(double)(my-1); sc = hx*hy*lambda; hxdhy = hx/hy; hydhx = hy/hx; /* Get ghost points */ ierr = DMGlobalToLocalBegin(user->fine.da,X,INSERT_VALUES,localX);CHKERRQ(ierr); ierr = DMGlobalToLocalEnd(user->fine.da,X,INSERT_VALUES,localX);CHKERRQ(ierr); ierr = DMDAGetCorners(user->fine.da,&xs,&ys,0,&xm,&ym,0);CHKERRQ(ierr); ierr = DMDAGetGhostCorners(user->fine.da,&Xs,&Ys,0,&Xm,&Ym,0);CHKERRQ(ierr); ierr = VecGetArray(localX,&x);CHKERRQ(ierr); ierr = VecGetArray(localF,&f);CHKERRQ(ierr); /* Evaluate function */ for (j=ys; j<ys+ym; j++) { row = (j - Ys)*Xm + xs - Xs - 1; for (i=xs; i<xs+xm; i++) { row++; if (i > 0 && i < mx-1 && j > 0 && j < my-1) { u = x[row]; uxx = (two*u - x[row-1] - x[row+1])*hydhx; uyy = (two*u - x[row-Xm] - x[row+Xm])*hxdhy; f[row] = uxx + uyy - sc*exp(u); } else if ((i > 0 && i < mx-1) || (j > 0 && j < my-1)){ f[row] = .5*two*(hydhx + hxdhy)*x[row]; } else { f[row] = .25*two*(hydhx + hxdhy)*x[row]; } } } ierr = VecRestoreArray(localX,&x);CHKERRQ(ierr); ierr = VecRestoreArray(localF,&f);CHKERRQ(ierr); /* Insert values into global vector */ ierr = DMLocalToGlobalBegin(user->fine.da,localF,INSERT_VALUES,F);CHKERRQ(ierr); ierr = DMLocalToGlobalEnd(user->fine.da,localF,INSERT_VALUES,F);CHKERRQ(ierr); ierr = PetscLogFlops(11.0*ym*xm);CHKERRQ(ierr); return 0; }
PetscErrorCode MatMultTranspose_SeqBSTRM_4(Mat A,Vec xx,Vec zz) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; Mat_SeqBSTRM *sbstrm = (Mat_SeqBSTRM*)A->spptr; PetscScalar zero = 0.0; PetscScalar x1,x2,x3,x4; PetscScalar *x, *xb, *z; MatScalar *v1, *v2, *v3, *v4; PetscErrorCode ierr; PetscInt mbs =a->mbs,i,*aj=a->j,*ai=a->i,n,*ib,cval,j; PetscInt nonzerorow=0; PetscInt slen; PetscFunctionBegin; ierr = VecSet(zz,zero);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); ierr = VecGetArray(zz,&z);CHKERRQ(ierr); slen = 4*(ai[mbs]-ai[0]); v1 = sbstrm->as; v2 = v1 + slen; v3 = v2 + slen; v4 = v3 + slen; xb = x; for (i=0; i<mbs; i++) { n = ai[i+1] - ai[i]; x1 = xb[0]; x2 = xb[1]; x3 = xb[2]; x4 = xb[3]; xb += 4; nonzerorow += (n>0); ib = aj + ai[i]; for (j=0; j<n; j++) { cval = ib[j]*4; z[cval] += v1[0]*x1 + v2[0]*x2 + v3[0]*x3 + v4[0]*x4; z[cval+1] += v1[1]*x1 + v2[1]*x2 + v3[1]*x3 + v4[1]*x4; z[cval+2] += v1[2]*x1 + v2[2]*x2 + v3[2]*x3 + v4[2]*x4; z[cval+3] += v1[3]*x1 + v2[3]*x2 + v3[3]*x3 + v4[3]*x4; v1 += 4; v2 += 4; v3 += 4; v4 += 4; } } ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); ierr = VecRestoreArray(zz,&z);CHKERRQ(ierr); ierr = PetscLogFlops(32*a->nz - 4*nonzerorow);CHKERRQ(ierr); PetscFunctionReturn(0); }
/* FormJacobianLocal - Evaluates Jacobian matrix on local process patch */ PetscErrorCode FormJacobianLocal(DMDALocalInfo *info,PetscScalar **x,Mat A,Mat jac, MatStructure *str,ObsCtx *user) { PetscErrorCode ierr; PetscInt i,j; MatStencil col[5],row; PetscReal v[5],dx,dy,oxx,oyy; PetscFunctionBeginUser; dx = 4.0 / (PetscReal)(info->mx-1); dy = 4.0 / (PetscReal)(info->my-1); oxx = 1.0 / (dx * dx); oyy = 1.0 / (dy * dy); for (j=info->ys; j<info->ys+info->ym; j++) { for (i=info->xs; i<info->xs+info->xm; i++) { row.j = j; row.i = i; if (i == 0 || j == 0 || i == info->mx-1 || j == info->my-1) { /* boundary */ v[0] = 1.0; ierr = MatSetValuesStencil(jac,1,&row,1,&row,v,INSERT_VALUES);CHKERRQ(ierr); } else { /* interior grid points */ v[0] = -oyy; col[0].j = j - 1; col[0].i = i; v[1] = -oxx; col[1].j = j; col[1].i = i - 1; v[2] = 2.0 * (oxx + oyy); col[2].j = j; col[2].i = i; v[3] = -oxx; col[3].j = j; col[3].i = i + 1; v[4] = -oyy; col[4].j = j + 1; col[4].i = i; ierr = MatSetValuesStencil(jac,1,&row,5,col,v,INSERT_VALUES);CHKERRQ(ierr); } } } /* Assemble matrix, using the 2-step process: */ ierr = MatAssemblyBegin(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(jac,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); if (A != jac) { ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); } *str = SAME_NONZERO_PATTERN; /* Tell the matrix we will never add a new nonzero location to the matrix. If we do, it will generate an error. */ ierr = MatSetOption(jac,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); ierr = PetscLogFlops(2.0*info->ym*info->xm);CHKERRQ(ierr); PetscFunctionReturn(0); }
PetscErrorCode FluidFieldDivergence_3D( DALocalInfo info, Coor d, Vec US, Vec VS, Vec WS, Vec DIV ) { int i,j,k; PetscReal xe, xs, ye, ys, ze, zs; PetscReal ***u, ***v, ***w, ***div; PetscErrorCode ierr; PetscFunctionBegin; xs = info.xs; xs = xs == 0 ? 2 : xs; xe = info.xs+info.xm; xe = xe == info.mx ? info.mx-3 : xe; ys = info.ys; ys = ys == 0 ? 2 : ys; ye = info.ys+info.ym; ye = ye == info.my ? info.my-3 : ye; zs = info.zs; zs = zs == 0 ? 2 : zs; ze = info.zs+info.zm; ze = ze == info.mz ? info.mz-3 : ze; ierr = DAVecGetArray(info.da,US,&u); CHKERRQ(ierr); ierr = DAVecGetArray(info.da,VS,&v); CHKERRQ(ierr); ierr = DAVecGetArray(info.da,WS,&w); CHKERRQ(ierr); ierr = DAVecGetArray(info.da,DIV,&div); CHKERRQ(ierr); for( k = zs; k < ze; ++k) { for (j = ys; j < ye; ++j) { for (i = xs; i < xe; ++i) { div[k][j][i] = (u[k][j][i+1] - u[k][j][i]) / d.x + (v[k][j+1][i] - v[k][j][i]) / d.y + (w[k+1][j][i] - w[k][j][i]) / d.z; } } } ierr = DAVecRestoreArray(info.da,US,&u); CHKERRQ(ierr); ierr = DAVecRestoreArray(info.da,VS,&v); CHKERRQ(ierr); ierr = DAVecRestoreArray(info.da,WS,&w); CHKERRQ(ierr); ierr = DAVecRestoreArray(info.da,DIV,&div); CHKERRQ(ierr); PetscLogFlops(8*(xe-xs)*(ye-ys)*(ze-zs)); PetscFunctionReturn(0); }
PetscErrorCode MatSolve_SeqBAIJ_1_NaturalOrdering(Mat A,Vec bb,Vec xx) { Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; PetscErrorCode ierr; const PetscInt n = a->mbs,*ai = a->i,*aj = a->j,*adiag = a->diag,*vi; PetscScalar *x,sum; const PetscScalar *b; const MatScalar *aa = a->a,*v; PetscInt i,nz; PetscFunctionBegin; if (!n) PetscFunctionReturn(0); ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecGetArray(xx,&x);CHKERRQ(ierr); /* forward solve the lower triangular */ x[0] = b[0]; v = aa; vi = aj; for (i=1; i<n; i++) { nz = ai[i+1] - ai[i]; sum = b[i]; PetscSparseDenseMinusDot(sum,x,v,vi,nz); v += nz; vi += nz; x[i] = sum; } /* backward solve the upper triangular */ for (i=n-1; i>=0; i--) { v = aa + adiag[i+1] + 1; vi = aj + adiag[i+1] + 1; nz = adiag[i] - adiag[i+1]-1; sum = x[i]; PetscSparseDenseMinusDot(sum,x,v,vi,nz); x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ } ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); PetscFunctionReturn(0); }