PetscErrorCode FieldVar::ConstructGrid(const PetscInt &NumNodes) { /*************************************************************/ /****** Discretize in space and construct a grid tensor *****/ /***********************************************************/ PetscFunctionBegin; PetscErrorCode ierr=0; if(FieldVar::Grid.size() > 0) FieldVar::Grid.clear(); FieldVar::Grid.resize(NumNodes); // This is a 3 * NumNodes_x * NumNodes_y * NumNodes_z tensor for(auto i = 0; i < FieldVar::Grid.size(); i++) FieldVar::Grid[i].resize(FieldVar::Dim); PetscInt dim_x = 0, dim_y = 1, dim_z = 2; PetscScalar hx, hy, hz; for(auto i = 1; i < FieldVar::NumNodes_x + 1; i++) for(auto j = 1; j < FieldVar::NumNodes_y + 1; j++) for(auto k = 1; k < FieldVar::NumNodes_z + 1; k++) { hx = PetscScalar(i - 1) / (FieldVar::NumNodes_x - 1.0) * (FieldVar::Box[dim_x+FieldVar::Dim] - FieldVar::Box[dim_x]) + FieldVar::Box[dim_x]; hy = PetscScalar(j - 1) / (FieldVar::NumNodes_y - 1.0) * (FieldVar::Box[dim_y+FieldVar::Dim] - FieldVar::Box[dim_y]) + FieldVar::Box[dim_y]; hz = PetscScalar(k - 1) / (FieldVar::NumNodes_z - 1.0) * (FieldVar::Box[dim_z+FieldVar::Dim] - FieldVar::Box[dim_z]) + FieldVar::Box[dim_z]; if(FieldVar::GridID[i][j][k] >= 0) { FieldVar::Grid[FieldVar::GridID[i][j][k]][dim_x] = hx; FieldVar::Grid[FieldVar::GridID[i][j][k]][dim_y] = hy; FieldVar::Grid[FieldVar::GridID[i][j][k]][dim_z] = hz; std::cout << "Grid pos = " << hx << " , " << hy << " , " << hz << std::endl; } } PetscFunctionReturn(ierr); }
static inline PetscErrorCode TensorContract_FMA(PetscInt dof,PetscInt P,PetscInt Q,const PetscReal Rf[],const PetscReal Sf[],const PetscReal Tf[],TensorMode tmode,const PetscScalar xx[],PetscScalar yy[]) { PetscFunctionBegin; if (tmode == TENSOR_TRANSPOSE) {PetscInt tmp = Q; Q = P; P = tmp;} { PetscReal R[Q][P],S[Q][P],T[Q][P]; const PetscScalar (*x)[P*P*P][NE] = (const PetscScalar(*)[P*P*P][NE])xx; PetscScalar (*y)[P*P*P][NE] = (PetscScalar(*)[Q*Q*Q][NE])yy; PetscScalar u[dof][Q*P*P][NE]_align,v[dof][Q*Q*P][NE]_align; for (PetscInt i=0; i<Q; i++) { for (PetscInt j=0; j<P; j++) { R[i][j] = tmode == TENSOR_EVAL ? Rf[i*P+j] : Rf[j*Q+i]; S[i][j] = tmode == TENSOR_EVAL ? Sf[i*P+j] : Sf[j*Q+i]; T[i][j] = tmode == TENSOR_EVAL ? Tf[i*P+j] : Tf[j*Q+i]; } } // u[l,a,j,k] = R[a,i] x[l,i,j,k] for (PetscInt l=0; l<dof; l++) { for (PetscInt a=0; a<Q; a++) { __m256d r[P]; for (PetscInt i=0; i<P; i++) r[i] = _mm256_set1_pd(R[a][i]); for (PetscInt jk=0; jk<P*P; jk++) { __m256d u_lajk = _mm256_setzero_pd(); for (PetscInt i=0; i<P; i++) { u_lajk = _mm256_fmadd_pd(r[i],_mm256_load_pd(x[l][i*P*P+jk]),u_lajk); } _mm256_store_pd(u[l][a*P*P+jk],u_lajk); } } } // v[l,a,b,k] = S[b,j] u[l,a,j,k] for (PetscInt l=0; l<dof; l++) { for (PetscInt b=0; b<Q; b++) { __m256d s[P]; for (int j=0; j<P; j++) s[j] = _mm256_set1_pd(S[b][j]); for (PetscInt a=0; a<Q; a++) { for (PetscInt k=0; k<P; k++) { __m256d v_labk = _mm256_setzero_pd(); for (PetscInt j=0; j<P; j++) { v_labk = _mm256_fmadd_pd(s[j],_mm256_load_pd(u[l][(a*P+j)*P+k]),v_labk); } _mm256_store_pd(v[l][(a*Q+b)*P+k],v_labk); } } } } // y[l,a,b,c] = T[c,k] v[l,a,b,k] for (PetscInt l=0; l<dof; l++) { for (PetscInt c=0; c<Q; c++) { __m256d t[P]; for (int k=0; k<P; k++) t[k] = _mm256_set1_pd(T[c][k]); for (PetscInt ab=0; ab<Q*Q; ab++) { __m256d y_labc = _mm256_load_pd(y[l][ab*Q+c]); for (PetscInt k=0; k<P; k++) { // for (PetscInt e=0; e<NE; e++) y[l][ab*Q+c][e] += T[c][k] * v[l][ab*P+k][e]; y_labc = _mm256_fmadd_pd(t[k],_mm256_load_pd(v[l][ab*P+k]),y_labc); } _mm256_store_pd(y[l][ab*Q+c],y_labc); } } } PetscLogFlops(dof*(Q*P*P*P+Q*Q*P*P+Q*Q*Q*P)*NE*2); } PetscFunctionReturn(0); }
int main(int argc, char **argv) { PetscErrorCode ierr; PetscInt n=10000,its,dfid=1; Vec x,b,u; Mat A; KSP ksp; PC pc,pcnoise; PCNoise_Ctx ctx={0,NULL}; PetscReal eta=0.1,norm; PetscScalar(*diagfunc)(PetscInt,PetscInt); ierr = PetscInitialize(&argc,&argv,(char*)0,help);CHKERRQ(ierr); /* Process command line options */ ierr = PetscOptionsGetInt(NULL,"-n",&n,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetReal(NULL,"-eta",&eta,NULL);CHKERRQ(ierr); ierr = PetscOptionsGetInt(NULL,"-diagfunc",&dfid,NULL);CHKERRQ(ierr); switch(dfid){ case 1: diagfunc = diagFunc1; break; case 2: diagfunc = diagFunc2; break; case 3: diagfunc = diagFunc3; break; default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Unrecognized diagfunc option"); } /* Create a diagonal matrix with a given distribution of diagonal elements */ ierr = MatCreate(PETSC_COMM_WORLD,&A);CHKERRQ(ierr); ierr = MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,n,n);CHKERRQ(ierr); ierr = MatSetFromOptions(A);CHKERRQ(ierr); ierr = MatSetUp(A);CHKERRQ(ierr); ierr = AssembleDiagonalMatrix(A,diagfunc);CHKERRQ(ierr); /* Allocate vectors and manufacture an exact solution and rhs */ ierr = MatCreateVecs(A,&x,NULL);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)x,"Computed Solution");CHKERRQ(ierr); ierr = MatCreateVecs(A,&b,NULL);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)b,"RHS");CHKERRQ(ierr); ierr = MatCreateVecs(A,&u,NULL);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)u,"Reference Solution");CHKERRQ(ierr); ierr = VecSet(u,1.0);CHKERRQ(ierr); ierr = MatMult(A,u,b);CHKERRQ(ierr); /* Create a KSP object */ ierr = KSPCreate(PETSC_COMM_WORLD,&ksp);CHKERRQ(ierr); ierr = KSPSetOperators(ksp,A,A);CHKERRQ(ierr); /* Set up a composite preconditioner */ ierr = KSPGetPC(ksp,&pc);CHKERRQ(ierr); ierr = PCSetType(pc,PCCOMPOSITE);CHKERRQ(ierr); /* default composite with single Identity PC */ ierr = PCCompositeSetType(pc,PC_COMPOSITE_ADDITIVE);CHKERRQ(ierr); ierr = PCCompositeAddPC(pc,PCNONE);CHKERRQ(ierr); if(eta > 0){ ierr = PCCompositeAddPC(pc,PCSHELL);CHKERRQ(ierr); ierr = PCCompositeGetPC(pc,1,&pcnoise);CHKERRQ(ierr); ctx.eta = eta; ierr = PCShellSetContext(pcnoise,&ctx);CHKERRQ(ierr); ierr = PCShellSetApply(pcnoise,PCApply_Noise);CHKERRQ(ierr); ierr = PCShellSetSetUp(pcnoise,PCSetup_Noise);CHKERRQ(ierr); ierr = PCShellSetDestroy(pcnoise,PCDestroy_Noise);CHKERRQ(ierr); ierr = PCShellSetName(pcnoise,"Noise PC");CHKERRQ(ierr); } /* Set KSP from options (this can override the PC just defined) */ ierr = KSPSetFromOptions(ksp);CHKERRQ(ierr); /* Solve */ ierr = KSPSolve(ksp,b,x);CHKERRQ(ierr); /* Compute error */ ierr = VecAXPY(x,-1.0,u);CHKERRQ(ierr); ierr = PetscObjectSetName((PetscObject)x,"Error");CHKERRQ(ierr); ierr = VecNorm(x,NORM_2,&norm);CHKERRQ(ierr); ierr = KSPGetIterationNumber(ksp,&its);CHKERRQ(ierr); ierr = PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);CHKERRQ(ierr); /* Destroy objects and finalize */ ierr = KSPDestroy(&ksp);CHKERRQ(ierr); ierr = MatDestroy(&A);CHKERRQ(ierr); ierr = VecDestroy(&x);CHKERRQ(ierr); ierr = VecDestroy(&b);CHKERRQ(ierr); ierr = VecDestroy(&u);CHKERRQ(ierr); PetscFinalize(); return 0; }