KOKKOS_INLINE_FUNCTION void StokesFOImplicitThicknessUpdateResid<EvalT, Traits>:: operator() (const StokesFOImplicitThicknessUpdateResid_Tag& tag, const int& cell) const { double rho_g=rho*g; for (int node=0; node < numNodes; ++node){ res(node,0)=0.0; res(node,1)=0.0; } for (int qp=0; qp < numQPs; ++qp) { ScalarT dHdiffdx = 0;//Ugrad(cell,qp,2,0); ScalarT dHdiffdy = 0;//Ugrad(cell,qp,2,1); for (int node=0; node < numNodes; ++node) { dHdiffdx += dH(cell,node) * gradBF(cell,node, qp,0); dHdiffdy += dH(cell,node) * gradBF(cell,node, qp,1); } for (int node=0; node < numNodes; ++node) { res(node,0) += rho_g*dHdiffdx*wBF(cell,node,qp); res(node,1) += rho_g*dHdiffdy*wBF(cell,node,qp); } } for (int node=0; node < numNodes; ++node) { Residual(cell,node,0) = InputResidual(cell,node,0)+res(node,0); Residual(cell,node,1) = InputResidual(cell,node,1)+res(node,1); if(numVecDims==3) Residual(cell,node,2) = InputResidual(cell,node,2); } }
void GPAMResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { typedef Intrepid2::FunctionSpaceTools FST; //Set Redidual to 0, add Diffusion Term for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t node=0; node < numNodes; ++node) { for (std::size_t i=0; i<vecDim; i++) Residual(cell,node,i)=0.0; for (std::size_t qp=0; qp < numQPs; ++qp) { for (std::size_t i=0; i<vecDim; i++) { for (std::size_t dim=0; dim<numDims; dim++) { Residual(cell,node,i) += Cgrad(cell, qp, i, dim) * wGradBF(cell, node, qp, dim); } } } } } // These both should always be true if transient is enabled if (workset.transientTerms && enableTransient) { for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t node=0; node < numNodes; ++node) { for (std::size_t qp=0; qp < numQPs; ++qp) { for (std::size_t i=0; i<vecDim; i++) { Residual(cell,node,i) += CDot(cell, qp, i) * wBF(cell, node, qp); } } } } } if (convectionTerm) { for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t node=0; node < numNodes; ++node) { for (std::size_t qp=0; qp < numQPs; ++qp) { for (std::size_t i=0; i<vecDim; i++) { for (std::size_t dim=0; dim<numDims; dim++) { Residual(cell,node,i) += u[dim]*Cgrad(cell, qp, i, dim) * wBF(cell, node, qp); } } } } } } }
void XZHydrostatic_VelResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { #ifndef ALBANY_KOKKOS_UNDER_DEVELOPMENT for (int cell=0; cell < workset.numCells; ++cell) { for (int node=0; node < numNodes; ++node) { for (int level=0; level < numLevels; ++level) { for (int dim=0; dim < numDims; ++dim) { int qp = node; Residual(cell,node,level,dim) = ( keGrad(cell,qp,level,dim) + PhiGrad(cell,qp,level,dim) )*wBF(cell,node,qp) + ( pGrad (cell,qp,level,dim)/density(cell,qp,level) ) *wBF(cell,node,qp) + etadotdVelx(cell,qp,level,dim) *wBF(cell,node,qp) + uDot(cell,qp,level,dim) *wBF(cell,node,qp); for (int qp=0; qp < numQPs; ++qp) { Residual(cell,node,level,dim) += viscosity * DVelx(cell,qp,level,dim) * wGradBF(cell,node,qp,dim); } } } } } #else Kokkos::parallel_for(XZHydrostatic_VelResid_Policy(0,workset.numCells),*this); #endif }
void XScalarAdvectionResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { std::vector<ScalarT> vel(numLevels); for (int level=0; level < numLevels; ++level) { vel[level] = (level+1)*Re; } for (int i=0; i < workset.numCells; ++i) for (int node=0; node < numNodes; ++node) Residual(i, node)=0.0; for (int cell=0; cell < workset.numCells; ++cell) { for (int qp=0; qp < numQPs; ++qp) { for (int node=0; node < numNodes; ++node) { if (2==numRank) { Residual(cell,node) += XDot(cell,qp)*wBF(cell,node,qp); for (int j=0; j < numDims; ++j) Residual(cell,node) += vel[0] * XGrad(cell,qp,j)*wBF(cell,node,qp); } else { TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "no impl"); //Irina TOFIX /* for (int level=0; level < numLevels; ++level) { Residual(cell,node,level) += XDot(cell,qp,level)*wBF(cell,node,qp); for (int j=0; j < numDims; ++j) Residual(cell,node,level) += vel[level] * XGrad(cell,qp,level,j)*wBF(cell,node,qp); } */ } } } } }
void XZHydrostaticResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { std::vector<ScalarT> vel(numLevels); for (int level=0; level < numLevels; ++level) { vel[level] = (level+1)*Re; } for (int i=0; i < Residual.size(); ++i) Residual(i)=0.0; for (int cell=0; cell < workset.numCells; ++cell) { for (int qp=0; qp < numQPs; ++qp) { for (int node=0; node < numNodes; ++node) { for (int level=0; level < numLevels; ++level) { // Transient Term Residual(cell,node,level) += rhoDot(cell,qp,level)*wBF(cell,node,qp); // Advection Term for (int j=0; j < numDims; ++j) { Residual(cell,node,level) += vel[level]*rhoGrad(cell,qp,level,j)*wBF(cell,node,qp); } } } } } }
void StokesFOImplicitThicknessUpdateResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { typedef Intrepid::FunctionSpaceTools FST; // Initialize residual to 0.0 Kokkos::deep_copy(Residual.get_kokkos_view(), ScalarT(0.0)); Intrepid::FieldContainer<ScalarT> res(numNodes,3); double rho_g=rho*g; for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (int i = 0; i < res.size(); i++) res(i) = 0.0; for (std::size_t qp=0; qp < numQPs; ++qp) { ScalarT dHdiffdx = 0;//Ugrad(cell,qp,2,0); ScalarT dHdiffdy = 0;//Ugrad(cell,qp,2,1); for (std::size_t node=0; node < numNodes; ++node) { dHdiffdx += (H(cell,node)-H0(cell,node)) * gradBF(cell,node, qp,0); dHdiffdy += (H(cell,node)-H0(cell,node)) * gradBF(cell,node, qp,1); } for (std::size_t node=0; node < numNodes; ++node) { res(node,0) += rho_g*dHdiffdx*wBF(cell,node,qp); res(node,1) += rho_g*dHdiffdy*wBF(cell,node,qp); } } for (std::size_t node=0; node < numNodes; ++node) { Residual(cell,node,0) = res(node,0); Residual(cell,node,1) = res(node,1); } } }
void TLElasResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { std::cout.precision(15); typedef Intrepid2::FunctionSpaceTools<PHX::Device> FST; typedef Intrepid2::RealSpaceTools<PHX::Device> RST; // using AD gives us P directly, we don't need to transform it if (matModel == "Neohookean AD") { for (int cell=0; cell < workset.numCells; ++cell) { for (int node=0; node < numNodes; ++node) { for (int dim=0; dim<numDims; dim++) Residual(cell,node,dim)=0.0; for (int qp=0; qp < numQPs; ++qp) { for (int i=0; i<numDims; i++) { for (int j=0; j<numDims; j++) { Residual(cell,node,i) += stress(cell, qp, i, j) * wGradBF(cell, node, qp, j); } } } } } } else { RST::inverse(F_inv, defgrad.get_view()); RST::transpose(F_invT, F_inv); FST::scalarMultiplyDataData(JF_invT, J.get_view(), F_invT); FST::tensorMultiplyDataData(P, stress.get_view(), JF_invT); for (int cell=0; cell < workset.numCells; ++cell) { for (int node=0; node < numNodes; ++node) { for (int dim=0; dim<numDims; dim++) Residual(cell,node,dim)=0.0; for (int qp=0; qp < numQPs; ++qp) { for (int i=0; i<numDims; i++) { for (int j=0; j<numDims; j++) { Residual(cell,node,i) += P(cell, qp, i, j) * wGradBF(cell, node, qp, j); } } } } } } /** // Gravity term used for load stepping for (int cell=0; cell < workset.numCells; ++cell) { for (int node=0; node < numNodes; ++node) { for (int qp=0; qp < numQPs; ++qp) { Residual(cell,node,2) += zGrav * wBF(cell, node, qp); } } } **/ }
void FEM:: multigrid( double* vector_v, double* vector_b, double* vector_res, const int level, double localstiffness[][4], double localmass[][4], double vector_bc[]){ double zero_bc[2] = {0.0, 0.0}; if(level > 1) { int cells = (1<<level); for(int i = 0; i < 2; ++i) { GSIteration(vector_v, vector_b, cells, localstiffness, localmass, vector_bc); } Residual(vector_v, vector_b, vector_res, cells, localstiffness, localmass, vector_bc); Restriction(vector_res, vector_b+(cells+1), level); multigrid(vector_v + (cells+1), vector_b + (cells+1), vector_res + (cells+1), level-1, localstiffness + cells, localmass + cells, zero_bc); Prolongation(vector_res, vector_v + (cells + 1), level); Correction(vector_v, vector_res, cells); for(int i = 0; i<2; ++i) { GSIteration(vector_v, vector_b, cells, localstiffness, localmass, vector_bc); } } else { for(int i = 0; i< 1000; ++i) GSIteration(vector_v, vector_b, (1<<level), localstiffness, localmass, vector_bc); } }
void StokesL1L2Resid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t node=0; node < numNodes; ++node) { for (std::size_t i=0; i<vecDim; i++) Residual(cell,node,i)=0.0; for (std::size_t qp=0; qp < numQPs; ++qp) { Residual(cell,node,0) += 2.0*muLandIce(cell,qp)*((2.0*epsilonXX(cell,qp) + epsilonYY(cell,qp))*wGradBF(cell,node,qp,0) + epsilonXY(cell,qp)*wGradBF(cell,node,qp,1)) + force(cell,qp,0)*wBF(cell,node,qp); Residual(cell,node,1) += 2.0*muLandIce(cell,qp)*(epsilonXY(cell,qp)*wGradBF(cell,node,qp,0) + (epsilonXX(cell,qp) + 2.0*epsilonYY(cell,qp))*wGradBF(cell,node,qp,1)) + force(cell,qp,1)*wBF(cell,node,qp); } } } }
KOKKOS_INLINE_FUNCTION void XZHydrostatic_VelResid<EvalT, Traits>:: operator() (const XZHydrostatic_VelResid_Tag& tag, const int& cell) const{ for (int node=0; node < numNodes; ++node) { for (int level=0; level < numLevels; ++level) { for (int dim=0; dim < numDims; ++dim) { int qp = node; Residual(cell,node,level,dim) = ( keGrad(cell,qp,level,dim) + PhiGrad(cell,qp,level,dim) )*wBF(cell,node,qp) + ( pGrad (cell,qp,level,dim)/density(cell,qp,level) ) *wBF(cell,node,qp) + etadotdVelx(cell,qp,level,dim) *wBF(cell,node,qp) + uDot(cell,qp,level,dim) *wBF(cell,node,qp); for (int qp=0; qp < numQPs; ++qp) { Residual(cell,node,level,dim) += viscosity * DVelx(cell,qp,level,dim) * wGradBF(cell,node,qp,dim); } } } } }
void StokesFOImplicitThicknessUpdateResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { #ifndef ALBANY_KOKKOS_UNDER_DEVELOPMENT typedef Intrepid2::FunctionSpaceTools FST; // Initialize residual to 0.0 Intrepid2::FieldContainer_Kokkos<ScalarT, PHX::Layout, PHX::Device> res(numNodes,2); double rho_g=rho*g; for (std::size_t cell=0; cell < workset.numCells; ++cell) { res.initialize(); for (std::size_t qp=0; qp < numQPs; ++qp) { ScalarT dHdiffdx = 0;//Ugrad(cell,qp,2,0); ScalarT dHdiffdy = 0;//Ugrad(cell,qp,2,1); for (std::size_t node=0; node < numNodes; ++node) { dHdiffdx += dH(cell,node) * gradBF(cell,node, qp,0); dHdiffdy += dH(cell,node) * gradBF(cell,node, qp,1); } for (std::size_t node=0; node < numNodes; ++node) { res(node,0) += rho_g*dHdiffdx*wBF(cell,node,qp); res(node,1) += rho_g*dHdiffdy*wBF(cell,node,qp); } } for (std::size_t node=0; node < numNodes; ++node) { Residual(cell,node,0) = InputResidual(cell,node,0)+res(node,0); Residual(cell,node,1) = InputResidual(cell,node,1)+res(node,1); if(numVecDims==3) Residual(cell,node,2) = InputResidual(cell,node,2); } } #else Kokkos::parallel_for(StokesFOImplicitThicknessUpdateResid_Policy(0,workset.numCells),*this); #endif }
void XZHydrostatic_TemperatureResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { PHAL::set(Residual, 0.0); for (int cell=0; cell < workset.numCells; ++cell) { for (int node=0; node < numNodes; ++node) { for (int level=0; level < numLevels; ++level) { for (int qp=0; qp < numQPs; ++qp) { for (int dim=0; dim < numDims; ++dim) Residual(cell,node,level) += velx(cell,qp,level,dim)*temperatureGrad(cell,qp,level,dim)*wBF(cell,node,qp); Residual(cell,node,level) += temperatureSrc(cell,qp,level) *wBF(cell,node,qp); Residual(cell,node,level) -= omega(cell,qp,level) *wBF(cell,node,qp); Residual(cell,node,level) += etadotdT(cell,qp,level) *wBF(cell,node,qp); Residual(cell,node,level) += temperatureDot(cell,qp,level) *wBF(cell,node,qp); } } } } }
void XZScalarAdvectionResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { // Constants L = 2.5e6 // Latent Heat J/kg cp = 1004.64 // Specfic Heat J/kg/K std::vector<ScalarT> vel(2); for (std::size_t i=0; i < Residual.size(); ++i) Residual(i)=0.0; for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t qp=0; qp < numQPs; ++qp) { if (coordVec(cell,qp,1) > 5.0) vel[0] = Re; else vel[0] = 0.0; vel[1] = 0.0; for (std::size_t node=0; node < numNodes; ++node) { // Transient Term // Residual(cell,node) += rhoDot(cell,qp)*wBF(cell,node,qp); Residual(cell,node,0) += rhoDot(cell,qp)*wBF(cell,node,qp); Residual(cell,node,1) += tempDot(cell,qp)*wBF(cell,node,qp); Residual(cell,node,2) += qvDot(cell,qp)*wBF(cell,node,qp); Residual(cell,node,3) += qcDot(cell,qp)*wBF(cell,node,qp); // Compute saturation mixing ratio for condensation rate with Teton's formula // Saturation vapor pressure, temp in Celcius, equation valid over [-35,35] with a 3% error qvs = 3.8 / rhoDot(cell,qp) * exp(17.27 * (tempGrad(cell,qp) - 273.)/(tempGrad(cell,qp) - 36.)); C = max( (qvDot(cell,qp) - qvs)/( 1. + qvs*((4093.*L)/(cp*tempDot(cell,qp)-36.)^2.) ) , -qcDot(cell,qp) ); Tv = T * (1 + 0.6*qv); // Advection Term for (std::size_t j=0; j < numDims; ++j) { Residual(cell,node,0) += vel[j]*rhoGrad(cell,qp,j)*wBF(cell,node,qp); Residual(cell,node,1) += vel[j]*tempGrad(cell,qp,j)*wBF(cell,node,qp)+L/cp*C; Residual(cell,node,2) += vel[j]*qvGrad(cell,qp,j)*wBF(cell,node,qp)-C; Residual(cell,node,3) += vel[j]*qcGrad(cell,qp,j)*wBF(cell,node,qp)+C; } } } } }
int GradientBoostingForest::Fit(InstancePool * pInstancepool) { m_pInstancePool = pInstancepool; m_pInstancePool->MakeBucket(); if(NULL == m_pInstancePool) { Comm::LogErr("GradientBoostingForest::Fit pInstancepool is NULL"); return -1; } int ret = -1; for(int i=0;i<m_pconfig->TreeNum;i++) { DecisionTree * pTree = new DecisionTree(m_pconfig); ret = pTree->Fit(m_pInstancePool); // printf("i = %d Fited pTree->FitError = %f\n",i,pTree->FitError()); if(ret != 0) { Comm::LogErr("GradientBoostingForest::Fit fail! tree i = %d Fit fail!",i); return -1; } m_Forest.push_back(pTree); if(m_pconfig->LogLevel >= 2)printf("i = %d FitError = %f TestError = %f\n",i,FitError(),TestError()); ret = Residual(); printf("i = %d Residualed\n",i); if(ret != 0) { Comm::LogErr("GradientBoostingForest::Fit fail! Residual fail!"); return -1; } } if(m_pconfig->IsLearnNewInstances) { ret = LearnNewInstance(); if(ret != 0) { Comm::LogErr("GradientBoostingForest::Fit fail! LearnNewInstance fail!"); return -1; } } ret = SaveResult(); if(ret != 0) { Comm::LogErr("GradientBoostingForest::Fit fail ! SaveResult fail!"); return -1; } if(m_pconfig->LogLevel >= 2)FeatureStat(); return 0; }
void ReactDiffSystemResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { typedef Intrepid2::FunctionSpaceTools<PHX::Device> FST; for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t node=0; node < numNodes; ++node) { for (std::size_t i=0; i<vecDim; i++) Residual(cell,node,i) = 0.0; for (std::size_t qp=0; qp < numQPs; ++qp) { //- mu0*delta(u0) + a0*u0 + a1*u1 + a2*u2 = f0 Residual(cell,node,0) += mu0*UGrad(cell,qp,0,0)*wGradBF(cell,node,qp,0) + mu0*UGrad(cell,qp,0,1)*wGradBF(cell,node,qp,1) + mu0*UGrad(cell,qp,0,2)*wGradBF(cell,node,qp,2) - reactCoeff0[0]*U(cell,qp,0)*wBF(cell,node,qp) - reactCoeff0[1]*U(cell,qp,1)*wBF(cell,node,qp) - reactCoeff0[2]*U(cell,qp,2)*wBF(cell,node,qp) - forces[0]*wBF(cell,node,qp); //- mu1*delta(u1) + b0*u0 + b1*u1 + b2*u2 = f1 Residual(cell,node,1) += mu1*UGrad(cell,qp,1,0)*wGradBF(cell,node,qp,0) + mu1*UGrad(cell,qp,1,1)*wGradBF(cell,node,qp,1) + mu1*UGrad(cell,qp,1,2)*wGradBF(cell,node,qp,2) - reactCoeff1[0]*U(cell,qp,0)*wBF(cell,node,qp) - reactCoeff1[1]*U(cell,qp,1)*wBF(cell,node,qp) - reactCoeff1[2]*U(cell,qp,2)*wBF(cell,node,qp) - forces[1]*wBF(cell,node,qp); //- mu2*delta(u2) + c0*u0 + c1*u1 + c2*u2 = f2 Residual(cell,node,2) += mu2*UGrad(cell,qp,2,0)*wGradBF(cell,node,qp,0) + mu2*UGrad(cell,qp,2,1)*wGradBF(cell,node,qp,1) + mu2*UGrad(cell,qp,2,2)*wGradBF(cell,node,qp,2) - reactCoeff2[0]*U(cell,qp,0)*wBF(cell,node,qp) - reactCoeff2[1]*U(cell,qp,1)*wBF(cell,node,qp) - reactCoeff2[2]*U(cell,qp,2)*wBF(cell,node,qp) - forces[2]*wBF(cell,node,qp); } } } }
void XZScalarAdvectionResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { std::vector<ScalarT> vel(2); for (std::size_t i=0; i < Residual.size(); ++i) Residual(i)=0.0; for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t qp=0; qp < numQPs; ++qp) { if (coordVec(cell,qp,1) > 0.5) vel[0] = Re; else vel[0] = 0.0; vel[1] = 0.0; for (std::size_t node=0; node < numNodes; ++node) { // Transient Term Residual(cell,node) += rhoDot(cell,qp)*wBF(cell,node,qp); // Advection Term for (std::size_t j=0; j < numDims; ++j) { Residual(cell,node) += vel[j]*rhoGrad(cell,qp,j)*wBF(cell,node,qp); } } } } }
int main(int argc, char *argv[]) { int ierr = 0, i, j, k; bool debug = false; #ifdef EPETRA_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm( MPI_COMM_WORLD ); #else Epetra_SerialComm Comm; #endif bool verbose = false; // Check if we should print results to standard out if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true; if (verbose && Comm.MyPID()==0) cout << Epetra_Version() << endl << endl; int rank = Comm.MyPID(); // char tmp; // if (rank==0) cout << "Press any key to continue..."<< endl; // if (rank==0) cin >> tmp; // Comm.Barrier(); Comm.SetTracebackMode(0); // This should shut down any error traceback reporting if (verbose) cout << Comm <<endl; // bool verbose1 = verbose; // Redefine verbose to only print on PE 0 if (verbose && rank!=0) verbose = false; int N = 20; int NRHS = 4; double * A = new double[N*N]; double * A1 = new double[N*N]; double * X = new double[(N+1)*NRHS]; double * X1 = new double[(N+1)*NRHS]; int LDX = N+1; int LDX1 = N+1; double * B = new double[N*NRHS]; double * B1 = new double[N*NRHS]; int LDB = N; int LDB1 = N; int LDA = N; int LDA1 = LDA; double OneNorm1; bool Transpose = false; Epetra_SerialDenseSolver solver; Epetra_SerialDenseMatrix * Matrix; for (int kk=0; kk<2; kk++) { for (i=1; i<=N; i++) { GenerateHilbert(A, LDA, i); OneNorm1 = 0.0; for (j=1; j<=i; j++) OneNorm1 += 1.0/((double) j); // 1-Norm = 1 + 1/2 + ...+1/n if (kk==0) { Matrix = new Epetra_SerialDenseMatrix(View, A, LDA, i, i); LDA1 = LDA; } else { Matrix = new Epetra_SerialDenseMatrix(Copy, A, LDA, i, i); LDA1 = i; } GenerateHilbert(A1, LDA1, i); if (kk==1) { solver.FactorWithEquilibration(true); solver.SolveWithTranspose(true); Transpose = true; solver.SolveToRefinedSolution(true); } for (k=0; k<NRHS; k++) for (j=0; j<i; j++) { B[j+k*LDB] = 1.0/((double) (k+3)*(j+3)); B1[j+k*LDB1] = B[j+k*LDB1]; } Epetra_SerialDenseMatrix Epetra_B(View, B, LDB, i, NRHS); Epetra_SerialDenseMatrix Epetra_X(View, X, LDX, i, NRHS); solver.SetMatrix(*Matrix); solver.SetVectors(Epetra_X, Epetra_B); ierr = check(solver, A1, LDA1, i, NRHS, OneNorm1, B1, LDB1, X1, LDX1, Transpose, verbose); assert (ierr>-1); delete Matrix; if (ierr!=0) { if (verbose) cout << "Factorization failed due to bad conditioning. This is normal if RCOND is small." << endl; break; } } } delete [] A; delete [] A1; delete [] X; delete [] X1; delete [] B; delete [] B1; ///////////////////////////////////////////////////////////////////// // Now test norms and scaling functions ///////////////////////////////////////////////////////////////////// Epetra_SerialDenseMatrix D; double ScalarA = 2.0; int DM = 10; int DN = 8; D.Shape(DM, DN); for (j=0; j<DN; j++) for (i=0; i<DM; i++) D[j][i] = (double) (1+i+j*DM) ; //cout << D << endl; double NormInfD_ref = (double)(DM*(DN*(DN+1))/2); double NormOneD_ref = (double)((DM*DN*(DM*DN+1))/2 - (DM*(DN-1)*(DM*(DN-1)+1))/2 ); double NormInfD = D.NormInf(); double NormOneD = D.NormOne(); if (verbose) { cout << " *** Before scaling *** " << endl << " Computed one-norm of test matrix = " << NormOneD << endl << " Expected one-norm = " << NormOneD_ref << endl << " Computed inf-norm of test matrix = " << NormInfD << endl << " Expected inf-norm = " << NormInfD_ref << endl; } D.Scale(ScalarA); // Scale entire D matrix by this value NormInfD = D.NormInf(); NormOneD = D.NormOne(); if (verbose) { cout << " *** After scaling *** " << endl << " Computed one-norm of test matrix = " << NormOneD << endl << " Expected one-norm = " << NormOneD_ref*ScalarA << endl << " Computed inf-norm of test matrix = " << NormInfD << endl << " Expected inf-norm = " << NormInfD_ref*ScalarA << endl; } ///////////////////////////////////////////////////////////////////// // Now test that A.Multiply(false, x, y) produces the same result // as y.Multiply('N','N', 1.0, A, x, 0.0). ///////////////////////////////////////////////////////////////////// N = 10; int M = 10; LDA = N; Epetra_SerialDenseMatrix smallA(N, M, false); Epetra_SerialDenseMatrix x(N, 1, false); Epetra_SerialDenseMatrix y1(N, 1, false); Epetra_SerialDenseMatrix y2(N, 1, false); for(i=0; i<N; ++i) { for(j=0; j<M; ++j) { smallA(i,j) = 1.0*i+2.0*j+1.0; } x(i,0) = 1.0; y1(i,0) = 0.0; y2(i,0) = 0.0; } //quick check of operator== if (x == y1) { if (verbose) cout << "err in Epetra_SerialDenseMatrix::operator==, " << "erroneously returned true." << std::endl; return(-1); } //quick check of operator!= if (x != x) { if (verbose) cout << "err in Epetra_SerialDenseMatrix::operator==, " << "erroneously returned true." << std::endl; return(-1); } int err1 = smallA.Multiply(false, x, y1); int err2 = y2.Multiply('N','N', 1.0, smallA, x, 0.0); if (err1 != 0 || err2 != 0) { if (verbose) cout << "err in Epetra_SerialDenseMatrix::Multiply"<<endl; return(err1+err2); } for(i=0; i<N; ++i) { if (y1(i,0) != y2(i,0)) { if (verbose) cout << "different versions of Multiply don't match."<<endl; return(-99); } } ///////////////////////////////////////////////////////////////////// // Now test for larger system, both correctness and performance. ///////////////////////////////////////////////////////////////////// N = 2000; NRHS = 5; LDA = N; LDB = N; LDX = N; if (verbose) cout << "\n\nComputing factor of an " << N << " x " << N << " general matrix...Please wait.\n\n" << endl; // Define A and X A = new double[LDA*N]; X = new double[LDB*NRHS]; for (j=0; j<N; j++) { for (k=0; k<NRHS; k++) X[j+k*LDX] = 1.0/((double) (j+5+k)); for (i=0; i<N; i++) { if (i==((j+2)%N)) A[i+j*LDA] = 100.0 + i; else A[i+j*LDA] = -11.0/((double) (i+5)*(j+2)); } } // Define Epetra_SerialDenseMatrix object Epetra_SerialDenseMatrix BigMatrix(Copy, A, LDA, N, N); Epetra_SerialDenseMatrix OrigBigMatrix(View, A, LDA, N, N); Epetra_SerialDenseSolver BigSolver; BigSolver.FactorWithEquilibration(true); BigSolver.SetMatrix(BigMatrix); // Time factorization Epetra_Flops counter; BigSolver.SetFlopCounter(counter); Epetra_Time Timer(Comm); double tstart = Timer.ElapsedTime(); ierr = BigSolver.Factor(); if (ierr!=0 && verbose) cout << "Error in factorization = "<<ierr<< endl; assert(ierr==0); double time = Timer.ElapsedTime() - tstart; double FLOPS = counter.Flops(); double MFLOPS = FLOPS/time/1000000.0; if (verbose) cout << "MFLOPS for Factorization = " << MFLOPS << endl; // Define Left hand side and right hand side Epetra_SerialDenseMatrix LHS(View, X, LDX, N, NRHS); Epetra_SerialDenseMatrix RHS; RHS.Shape(N,NRHS); // Allocate RHS // Compute RHS from A and X Epetra_Flops RHS_counter; RHS.SetFlopCounter(RHS_counter); tstart = Timer.ElapsedTime(); RHS.Multiply('N', 'N', 1.0, OrigBigMatrix, LHS, 0.0); time = Timer.ElapsedTime() - tstart; Epetra_SerialDenseMatrix OrigRHS = RHS; FLOPS = RHS_counter.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) cout << "MFLOPS to build RHS (NRHS = " << NRHS <<") = " << MFLOPS << endl; // Set LHS and RHS and solve BigSolver.SetVectors(LHS, RHS); tstart = Timer.ElapsedTime(); ierr = BigSolver.Solve(); if (ierr==1 && verbose) cout << "LAPACK guidelines suggest this matrix might benefit from equilibration." << endl; else if (ierr!=0 && verbose) cout << "Error in solve = "<<ierr<< endl; assert(ierr>=0); time = Timer.ElapsedTime() - tstart; FLOPS = BigSolver.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) cout << "MFLOPS for Solve (NRHS = " << NRHS <<") = " << MFLOPS << endl; double * resid = new double[NRHS]; bool OK = Residual(N, NRHS, A, LDA, BigSolver.Transpose(), BigSolver.X(), BigSolver.LDX(), OrigRHS.A(), OrigRHS.LDA(), resid); if (verbose) { if (!OK) cout << "************* Residual do not meet tolerance *************" << endl; for (i=0; i<NRHS; i++) cout << "Residual[" << i <<"] = "<< resid[i] << endl; cout << endl; } // Solve again using the Epetra_SerialDenseVector class for LHS and RHS Epetra_SerialDenseVector X2; Epetra_SerialDenseVector B2; X2.Size(BigMatrix.N()); B2.Size(BigMatrix.M()); int length = BigMatrix.N(); {for (int kk=0; kk<length; kk++) X2[kk] = ((double ) kk)/ ((double) length);} // Define entries of X2 RHS_counter.ResetFlops(); B2.SetFlopCounter(RHS_counter); tstart = Timer.ElapsedTime(); B2.Multiply('N', 'N', 1.0, OrigBigMatrix, X2, 0.0); // Define B2 = A*X2 time = Timer.ElapsedTime() - tstart; Epetra_SerialDenseVector OrigB2 = B2; FLOPS = RHS_counter.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) cout << "MFLOPS to build single RHS = " << MFLOPS << endl; // Set LHS and RHS and solve BigSolver.SetVectors(X2, B2); tstart = Timer.ElapsedTime(); ierr = BigSolver.Solve(); time = Timer.ElapsedTime() - tstart; if (ierr==1 && verbose) cout << "LAPACK guidelines suggest this matrix might benefit from equilibration." << endl; else if (ierr!=0 && verbose) cout << "Error in solve = "<<ierr<< endl; assert(ierr>=0); FLOPS = counter.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) cout << "MFLOPS to solve single RHS = " << MFLOPS << endl; OK = Residual(N, 1, A, LDA, BigSolver.Transpose(), BigSolver.X(), BigSolver.LDX(), OrigB2.A(), OrigB2.LDA(), resid); if (verbose) { if (!OK) cout << "************* Residual do not meet tolerance *************" << endl; cout << "Residual = "<< resid[0] << endl; } delete [] resid; delete [] A; delete [] X; /////////////////////////////////////////////////// // Now test default constructor and index operators /////////////////////////////////////////////////// N = 5; Epetra_SerialDenseMatrix C; // Implicit call to default constructor, should not need to call destructor C.Shape(5,5); // Make it 5 by 5 double * C1 = new double[N*N]; GenerateHilbert(C1, N, N); // Generate Hilber matrix C1[1+2*N] = 1000.0; // Make matrix nonsymmetric // Fill values of C with Hilbert values for (i=0; i<N; i++) for (j=0; j<N; j++) C(i,j) = C1[i+j*N]; // Test if values are correctly written and read for (i=0; i<N; i++) for (j=0; j<N; j++) { assert(C(i,j) == C1[i+j*N]); assert(C(i,j) == C[j][i]); } if (verbose) cout << "Default constructor and index operator check OK. Values of Hilbert matrix = " << endl << C << endl << "Values should be 1/(i+j+1), except value (1,2) should be 1000" << endl; delete [] C1; // now test sized/shaped constructor Epetra_SerialDenseMatrix shapedMatrix(10, 12); assert(shapedMatrix.M() == 10); assert(shapedMatrix.N() == 12); for(i = 0; i < 10; i++) for(j = 0; j < 12; j++) assert(shapedMatrix(i, j) == 0.0); Epetra_SerialDenseVector sizedVector(20); assert(sizedVector.Length() == 20); for(i = 0; i < 20; i++) assert(sizedVector(i) == 0.0); if (verbose) cout << "Shaped/sized constructors check OK." << endl; // test Copy/View mode in op= and cpy ctr int temperr = 0; temperr = matrixAssignment(verbose, debug); if(verbose && temperr == 0) cout << "Operator = checked OK." << endl; EPETRA_TEST_ERR(temperr, ierr); temperr = matrixCpyCtr(verbose, debug); if(verbose && temperr == 0) cout << "Copy ctr checked OK." << endl; EPETRA_TEST_ERR(temperr, ierr); // Test some vector methods Epetra_SerialDenseVector v1(3); v1[0] = 1.0; v1[1] = 3.0; v1[2] = 2.0; Epetra_SerialDenseVector v2(3); v2[0] = 2.0; v2[1] = 1.0; v2[2] = -2.0; temperr = 0; if (v1.Norm1()!=6.0) temperr++; if (fabs(sqrt(14.0)-v1.Norm2())>1.0e-6) temperr++; if (v1.NormInf()!=3.0) temperr++; if(verbose && temperr == 0) cout << "Vector Norms checked OK." << endl; temperr = 0; if (v1.Dot(v2)!=1.0) temperr++; if(verbose && temperr == 0) cout << "Vector Dot product checked OK." << endl; #ifdef EPETRA_MPI MPI_Finalize() ; #endif /* end main */ return ierr ; }
int check(Epetra_SerialDenseSolver &solver, double * A1, int LDA1, int N1, int NRHS1, double OneNorm1, double * B1, int LDB1, double * X1, int LDX1, bool Transpose, bool verbose) { int i; bool OK; // Test query functions int M= solver.M(); if (verbose) cout << "\n\nNumber of Rows = " << M << endl<< endl; assert(M==N1); int N= solver.N(); if (verbose) cout << "\n\nNumber of Equations = " << N << endl<< endl; assert(N==N1); int LDA = solver.LDA(); if (verbose) cout << "\n\nLDA = " << LDA << endl<< endl; assert(LDA==LDA1); int LDB = solver.LDB(); if (verbose) cout << "\n\nLDB = " << LDB << endl<< endl; assert(LDB==LDB1); int LDX = solver.LDX(); if (verbose) cout << "\n\nLDX = " << LDX << endl<< endl; assert(LDX==LDX1); int NRHS = solver.NRHS(); if (verbose) cout << "\n\nNRHS = " << NRHS << endl<< endl; assert(NRHS==NRHS1); assert(solver.ANORM()==-1.0); assert(solver.RCOND()==-1.0); if (!solver.A_Equilibrated() && !solver.B_Equilibrated()) { assert(solver.ROWCND()==-1.0); assert(solver.COLCND()==-1.0); assert(solver.AMAX()==-1.0); } // Other binary tests assert(!solver.Factored()); assert(solver.Transpose()==Transpose); assert(!solver.SolutionErrorsEstimated()); assert(!solver.Inverted()); assert(!solver.ReciprocalConditionEstimated()); assert(!solver.Solved()); assert(!solver.SolutionRefined()); int ierr = solver.Factor(); assert(ierr>-1); if (ierr!=0) return(ierr); // Factorization failed due to poor conditioning. double rcond; ierr = solver.ReciprocalConditionEstimate(rcond); assert(ierr==0); if (verbose) { double rcond1 = 1.0/exp(3.5*((double)N)); if (N==1) rcond1 = 1.0; cout << "\n\nRCOND = "<< rcond << " should be approx = " << rcond1 << endl << endl; } ierr = solver.Solve(); assert(ierr>-1); if (ierr!=0 && verbose) cout << "LAPACK rules suggest system should be equilibrated." << endl; assert(solver.Factored()); assert(solver.Transpose()==Transpose); assert(solver.ReciprocalConditionEstimated()); assert(solver.Solved()); if (solver.SolutionErrorsEstimated()) { if (verbose) { cout << "\n\nFERR[0] = "<< solver.FERR()[0] << endl; cout << "\n\nBERR[0] = "<< solver.BERR()[0] << endl<< endl; } } double * resid = new double[NRHS]; OK = Residual(N, NRHS, A1, LDA1, solver.Transpose(), solver.X(), solver.LDX(), B1, LDB1, resid); if (verbose) { if (!OK) cout << "************* Residual do not meet tolerance *************" << endl; /* if (solver.A_Equilibrated()) { double * R = solver.R(); double * C = solver.C(); for (i=0; i<solver.M(); i++) cout << "R[" << i <<"] = "<< R[i] << endl; for (i=0; i<solver.N(); i++) cout << "C[" << i <<"] = "<< C[i] << endl; } */ cout << "\n\nResiduals using factorization to solve" << endl; for (i=0; i<NRHS; i++) cout << "Residual[" << i <<"] = "<< resid[i] << endl; cout << endl; } ierr = solver.Invert(); assert(ierr>-1); assert(solver.Inverted()); assert(!solver.Factored()); assert(solver.Transpose()==Transpose); Epetra_SerialDenseMatrix RHS1(Copy, B1, LDB1, N, NRHS); Epetra_SerialDenseMatrix LHS1(Copy, X1, LDX1, N, NRHS); assert(solver.SetVectors(LHS1, RHS1)==0); assert(!solver.Solved()); assert(solver.Solve()>-1); OK = Residual(N, NRHS, A1, LDA1, solver.Transpose(), solver.X(), solver.LDX(), B1, LDB1, resid); if (verbose) { if (!OK) cout << "************* Residual do not meet tolerance *************" << endl; cout << "Residuals using inverse to solve" << endl; for (i=0; i<NRHS; i++) cout << "Residual[" << i <<"] = "<< resid[i] << endl; cout << endl; } delete [] resid; return(0); }
void StokesFOResid<EvalT, Traits>:: evaluateFields(typename Traits::EvalData workset) { typedef Intrepid::FunctionSpaceTools FST; for (std::size_t i=0; i < Residual.size(); ++i) Residual(i)=0.0; if (numDims == 3) { //3D case if (eqn_type == FELIX) { for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t qp=0; qp < numQPs; ++qp) { ScalarT& mu = muFELIX(cell,qp); ScalarT strs00 = 2.0*mu*(2.0*Ugrad(cell,qp,0,0) + Ugrad(cell,qp,1,1)); ScalarT strs11 = 2.0*mu*(2.0*Ugrad(cell,qp,1,1) + Ugrad(cell,qp,0,0)); ScalarT strs01 = mu*(Ugrad(cell,qp,1,0)+ Ugrad(cell,qp,0,1)); ScalarT strs02 = mu*Ugrad(cell,qp,0,2); ScalarT strs12 = mu*Ugrad(cell,qp,1,2); for (std::size_t node=0; node < numNodes; ++node) { Residual(cell,node,0) += strs00*wGradBF(cell,node,qp,0) + strs01*wGradBF(cell,node,qp,1) + strs02*wGradBF(cell,node,qp,2); Residual(cell,node,1) += strs01*wGradBF(cell,node,qp,0) + strs11*wGradBF(cell,node,qp,1) + strs12*wGradBF(cell,node,qp,2); } } for (std::size_t qp=0; qp < numQPs; ++qp) { ScalarT& frc0 = force(cell,qp,0); ScalarT& frc1 = force(cell,qp,1); for (std::size_t node=0; node < numNodes; ++node) { Residual(cell,node,0) += frc0*wBF(cell,node,qp); Residual(cell,node,1) += frc1*wBF(cell,node,qp); } } } } else if (eqn_type == POISSON) { //Laplace (Poisson) operator for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t node=0; node < numNodes; ++node) { for (std::size_t qp=0; qp < numQPs; ++qp) { Residual(cell,node,0) += Ugrad(cell,qp,0,0)*wGradBF(cell,node,qp,0) + Ugrad(cell,qp,0,1)*wGradBF(cell,node,qp,1) + Ugrad(cell,qp,0,2)*wGradBF(cell,node,qp,2) + force(cell,qp,0)*wBF(cell,node,qp); } } } } } else { //2D case if (eqn_type == FELIX) { for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t node=0; node < numNodes; ++node) { for (std::size_t qp=0; qp < numQPs; ++qp) { Residual(cell,node,0) += 2.0*muFELIX(cell,qp)*((2.0*Ugrad(cell,qp,0,0) + Ugrad(cell,qp,1,1))*wGradBF(cell,node,qp,0) + 0.5*(Ugrad(cell,qp,0,1) + Ugrad(cell,qp,1,0))*wGradBF(cell,node,qp,1)) + force(cell,qp,0)*wBF(cell,node,qp); Residual(cell,node,1) += 2.0*muFELIX(cell,qp)*(0.5*(Ugrad(cell,qp,0,1) + Ugrad(cell,qp,1,0))*wGradBF(cell,node,qp,0) + (Ugrad(cell,qp,0,0) + 2.0*Ugrad(cell,qp,1,1))*wGradBF(cell,node,qp,1)) + force(cell,qp,1)*wBF(cell,node,qp); } } } } else if (eqn_type == POISSON) { //Laplace (Poisson) operator for (std::size_t cell=0; cell < workset.numCells; ++cell) { for (std::size_t node=0; node < numNodes; ++node) { for (std::size_t qp=0; qp < numQPs; ++qp) { Residual(cell,node,0) += Ugrad(cell,qp,0,0)*wGradBF(cell,node,qp,0) + Ugrad(cell,qp,0,1)*wGradBF(cell,node,qp,1) + force(cell,qp,0)*wBF(cell,node,qp); } } } } } }
//-------------------------------------------------------------------------------- // SolveConstitutiveEquations // - Solves the constitutive equation given the applied strain rate; i.e., // find the stress state that is compatible with the strain rate defined. // // Applying Newton Raphson to solve the the stress state given the strain state of // the system. // //-------------------------------------------------------------------------------- EigenRep SolveConstitutiveEquations( const EigenRep & InitialStressState, // initial guess, either from Sach's or previous iteration const vector<EigenRep> & SchmidtTensors, const vector<Float> & CRSS, const vector<Float> & GammaDotBase, // reference shear rate const vector<int> & RateSensitivity, const EigenRep & StrainRate, // Current strain rate - constant from caller Float EpsilonConvergence, Float MaxResolvedStress, int MaxNumIterations ) { const Float Coeff = 0.2; // global fudge factor EigenRep CurrentStressState = InitialStressState; int RemainingIterations = MaxNumIterations; EigenRep NewStressState = InitialStressState; EigenRep SavedState(0, 0, 0, 0, 0); // used to return to old state while( RemainingIterations > 0 ) { bool AdmissibleStartPointFound = false; std::vector<Float> RSS( SchmidtTensors.size(), 0 ); // This is really RSS / tau do // refresh critical resolved shear stress. // check to see if it is outside of the yield // surface, and therefore inadmissible { AdmissibleStartPointFound = true; for( int i = 0; i < SchmidtTensors.size(); i ++ ) { RSS[i] = InnerProduct( SchmidtTensors[i], CurrentStressState) / CRSS[i]; if( std::fabs( RSS[i] ) < 1e-10 ) RSS[i] = 0; if( std::fabs( RSS[i] ) > MaxResolvedStress ) AdmissibleStartPointFound = false; } if( !AdmissibleStartPointFound ) CurrentStressState = SavedState + ( CurrentStressState - SavedState ) * Coeff; RemainingIterations --; if( RemainingIterations < 0 ) return NewStressState; } while ( !AdmissibleStartPointFound ); std::vector<Float> GammaDot( SchmidtTensors.size(), 0 ); // This is really RSS / tau for( int i = 0; i < SchmidtTensors.size(); i ++ ) { if( RateSensitivity[i] -1 > 0 ) GammaDot[i] = GammaDotBase[i] * std::pow( std::fabs( RSS[i] ), static_cast<int>( RateSensitivity[i] - 1 ) ); else GammaDot[i] = GammaDotBase[i]; } // Construct residual vector R(Sigma), where Sigma is stress. R(Sigma) is a 5d vector EigenRep Residual( 0, 0, 0, 0, 0 ); // current estimate SMatrix5x5 ResidualJacobian; ResidualJacobian.SetZero(); for( int i = 0; i < SchmidtTensors.size(); i ++ ) { Residual += SchmidtTensors[i] * GammaDot[i] * RSS[i]; // Residual = StrainRate - sum_{k} [ m^{k}_i * |r_ss/ crss|^n ] // Construct F', or the Jacobian ResidualJacobian -= OuterProduct( SchmidtTensors[i], SchmidtTensors[i] ) * RateSensitivity[i] * GammaDot[i] / CRSS[i]; } Residual = Residual - StrainRate ; // need the negative residual, instead of E - R SavedState = CurrentStressState; EigenRep Delta_Stress = LU_Solver( ResidualJacobian, Residual ); // <----------- Need to hangle error from this NewStressState = CurrentStressState + Delta_Stress; Float RelativeError = static_cast<Float>(2) * Delta_Stress.Norm() / ( NewStressState + CurrentStressState ).Norm(); CurrentStressState = NewStressState; if( RelativeError < EpsilonConvergence ) { break; } } // end while return NewStressState; }
/** * @function main */ int main( int argc, char* argv[] ) { if( argc < 3 ) { printf("Error. Give me two files with 2D and 3D Points \n"); return 1; } srand ( time(NULL) ); /** Read the point files */ readPoints2D( argv[1], points2D ); readPoints3D( argv[2], points3D ); /** Normalize them */ normalizePoints2D( points2D, T2, normPoints2D ); normalizePoints3D( points3D, T3, normPoints3D ); int N = points2D.size(); double sum_res; Eigen::MatrixXd M; int num_trials = 10; int k; int t; //-- 1. k = 8 t = 5 k = 8; t = 5; for( unsigned int i = 0; i < num_trials; i++ ) { solveQuestion3( k, t, N, points2D, points3D, T2, T3, sum_res, M ); storedResidual.push_back( sum_res ); storedM.push_back( M ); storedK.push_back( k ); } //-- 2. k = 12 t = 5 k = 12; t = 5; for( unsigned int i = 0; i < num_trials; i++ ) { solveQuestion3( k, t, N, points2D, points3D, T2, T3, sum_res, M ); storedResidual.push_back( sum_res ); storedM.push_back( M ); storedK.push_back( k ); } //-- 3. k = 15 t = 5 k = 15; t = 5; for( unsigned int i = 0; i < num_trials; i++ ) { solveQuestion3( k, t, N, points2D, points3D, T2, T3, sum_res, M ); storedResidual.push_back( sum_res ); storedM.push_back( M ); storedK.push_back( k ); } //-- Display and find min double minResidual = DBL_MAX; int minInd = -1; for( unsigned int i = 0; i < storedM.size(); i++ ) { printf("Trial [%d] -- K: %d Residual: %f \n", i, storedK[i], storedResidual[i]); if( storedResidual[i] < minResidual ) { minResidual = storedResidual[i]; minInd = i; } } std::cout<< " Min residual found with k: " << storedK[minInd] << std::endl; std::cout<< " Min residual value: " << storedResidual[minInd] << std::endl; std::cout<< " Matrix M: \n" << storedM[minInd] << std::endl; std::cout<< " Matrix T2: \n" << T2 << std::endl; std::cout<< " Matrix T3: \n" << T3 << std::endl; /** Check residual */ std::vector<Eigen::VectorXd> residual; Residual( storedM[minInd], points2D, points3D, normPoints2D, normPoints3D, T2, residual ); return 0; }
/** * @function solveQuestion3 */ void solveQuestion3( int k, int t, int N, std::vector<Eigen::VectorXi> _points2D, std::vector<Eigen::VectorXd> _points3D, Eigen::MatrixXd _T2, Eigen::MatrixXd _T3, double &_sum_res, Eigen::MatrixXd &_M ) { std::vector<Eigen::VectorXi> pointsK2D; std::vector<Eigen::VectorXd> pointsK3D; std::vector<Eigen::VectorXi> pointsTest2D; std::vector<Eigen::VectorXd> pointsTest3D; std::vector<Eigen::VectorXd> normPointsK2D; std::vector<Eigen::VectorXd> normPointsK3D; std::vector<Eigen::VectorXd> normPointsTest2D; std::vector<Eigen::VectorXd> normPointsTest3D; Eigen::MatrixXd M_SVD; /** Pick random points */ std::vector<int> randomK; std::vector<int> test; pickRandom( k, randomK, t, test, N ); printf(" K: "); for( unsigned int i = 0; i < k; i++ ) { printf(" %d ", randomK[i] ); } printf("\n test: "); for( unsigned int i = 0; i < t; i++ ) { printf(" %d ", test[i] ); } printf("\n"); pointsK2D.resize(0); pointsK3D.resize(0); pointsTest2D; pointsTest3D; normPointsK2D.resize(0); normPointsK3D.resize(0); normPointsTest2D.resize(0); normPointsTest3D.resize(0); /** Separate the sets */ for( unsigned int i = 0; i < k; i++ ) { pointsK2D.push_back( _points2D[ randomK[i] ] ); pointsK3D.push_back( _points3D[ randomK[i] ] ); } for( unsigned int i = 0; i < t; i++ ) { pointsTest2D.push_back( _points2D[ test[i] ] ); pointsTest3D.push_back( _points3D[ test[i] ] ); } /** Normalize the 2D points with T2 */ applyNorm2D( pointsK2D, _T2, normPointsK2D ); applyNorm3D( pointsK3D, _T3, normPointsK3D ); /** Calculate M from k points */ calculateM_SVD( normPointsK2D, normPointsK3D, M_SVD ); /// Residual applyNorm2D( pointsTest2D, _T2, normPointsTest2D ); applyNorm3D( pointsTest3D, _T3, normPointsTest3D ); std::vector<Eigen::VectorXd> residual; Residual( M_SVD, pointsTest2D, pointsTest3D, normPointsTest2D, normPointsTest3D, T2, residual ); /** Output: M and Residual */ _sum_res = 0; for( unsigned int i = 0; i < residual.size(); i++ ) { _sum_res += residual[i](2); } _sum_res /= residual.size(); _M = M_SVD; }
int main(int argc, char *argv[]) { int ierr = 0, i, j, k; #ifdef EPETRA_MPI MPI_Init(&argc,&argv); Epetra_MpiComm Comm( MPI_COMM_WORLD ); #else Epetra_SerialComm Comm; #endif bool verbose = false; // Check if we should print results to standard out if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true; if(verbose && Comm.MyPID()==0) std::cout << Epetra_Version() << std::endl << std::endl; int rank = Comm.MyPID(); // char tmp; // if (rank==0) std::cout << "Press any key to continue..."<< std::endl; // if (rank==0) cin >> tmp; // Comm.Barrier(); Comm.SetTracebackMode(0); // This should shut down any error traceback reporting if (verbose) std::cout << Comm << std::endl; // bool verbose1 = verbose; // Redefine verbose to only print on PE 0 if (verbose && rank!=0) verbose = false; int N = 20; int NRHS = 4; double * A = new double[N*N]; double * A1 = new double[N*N]; double * X = new double[(N+1)*NRHS]; double * X1 = new double[(N+1)*NRHS]; int LDX = N+1; int LDX1 = N+1; double * B = new double[N*NRHS]; double * B1 = new double[N*NRHS]; int LDB = N; int LDB1 = N; int LDA = N; int LDA1 = LDA; double OneNorm1; bool Upper = false; Epetra_SerialSpdDenseSolver solver; Epetra_SerialSymDenseMatrix * Matrix; for (int kk=0; kk<2; kk++) { for (i=1; i<=N; i++) { GenerateHilbert(A, LDA, i); OneNorm1 = 0.0; for (j=1; j<=i; j++) OneNorm1 += 1.0/((double) j); // 1-Norm = 1 + 1/2 + ...+1/n if (kk==0) { Matrix = new Epetra_SerialSymDenseMatrix(View, A, LDA, i); LDA1 = LDA; } else { Matrix = new Epetra_SerialSymDenseMatrix(Copy, A, LDA, i); LDA1 = i; } GenerateHilbert(A1, LDA1, i); if (kk==1) { solver.FactorWithEquilibration(true); Matrix->SetUpper(); Upper = true; solver.SolveToRefinedSolution(false); } for (k=0; k<NRHS; k++) for (j=0; j<i; j++) { B[j+k*LDB] = 1.0/((double) (k+3)*(j+3)); B1[j+k*LDB1] = B[j+k*LDB1]; } Epetra_SerialDenseMatrix Epetra_B(View, B, LDB, i, NRHS); Epetra_SerialDenseMatrix Epetra_X(View, X, LDX, i, NRHS); solver.SetMatrix(*Matrix); solver.SetVectors(Epetra_X, Epetra_B); ierr = check(solver, A1, LDA1, i, NRHS, OneNorm1, B1, LDB1, X1, LDX1, Upper, verbose); assert (ierr>-1); delete Matrix; if (ierr!=0) { if (verbose) std::cout << "Factorization failed due to bad conditioning. This is normal if SCOND is small." << std::endl; break; } } } delete [] A; delete [] A1; delete [] X; delete [] X1; delete [] B; delete [] B1; ///////////////////////////////////////////////////////////////////// // Now test norms and scaling functions ///////////////////////////////////////////////////////////////////// Epetra_SerialSymDenseMatrix D; double ScalarA = 2.0; int DM = 10; int DN = 10; D.Shape(DM); for (j=0; j<DN; j++) for (i=0; i<DM; i++) D[j][i] = (double) (1+i+j*DM) ; //std::cout << D << std::endl; double NormInfD_ref = (double)(DM*(DN*(DN+1))/2); double NormOneD_ref = NormInfD_ref; double NormInfD = D.NormInf(); double NormOneD = D.NormOne(); if (verbose) { std::cout << " *** Before scaling *** " << std::endl << " Computed one-norm of test matrix = " << NormOneD << std::endl << " Expected one-norm = " << NormOneD_ref << std::endl << " Computed inf-norm of test matrix = " << NormInfD << std::endl << " Expected inf-norm = " << NormInfD_ref << std::endl; } D.Scale(ScalarA); // Scale entire D matrix by this value //std::cout << D << std::endl; NormInfD = D.NormInf(); NormOneD = D.NormOne(); if (verbose) { std::cout << " *** After scaling *** " << std::endl << " Computed one-norm of test matrix = " << NormOneD << std::endl << " Expected one-norm = " << NormOneD_ref*ScalarA << std::endl << " Computed inf-norm of test matrix = " << NormInfD << std::endl << " Expected inf-norm = " << NormInfD_ref*ScalarA << std::endl; } ///////////////////////////////////////////////////////////////////// // Now test for larger system, both correctness and performance. ///////////////////////////////////////////////////////////////////// N = 2000; NRHS = 5; LDA = N; LDB = N; LDX = N; if (verbose) std::cout << "\n\nComputing factor of an " << N << " x " << N << " SPD matrix...Please wait.\n\n" << std::endl; // Define A and X A = new double[LDA*N]; X = new double[LDB*NRHS]; for (j=0; j<N; j++) { for (k=0; k<NRHS; k++) X[j+k*LDX] = 1.0/((double) (j+5+k)); for (i=0; i<N; i++) { if (i==j) A[i+j*LDA] = 100.0 + i; else A[i+j*LDA] = -1.0/((double) (i+10)*(j+10)); } } // Define Epetra_SerialDenseMatrix object Epetra_SerialSymDenseMatrix BigMatrix(Copy, A, LDA, N); Epetra_SerialSymDenseMatrix OrigBigMatrix(View, A, LDA, N); Epetra_SerialSpdDenseSolver BigSolver; BigSolver.FactorWithEquilibration(true); BigSolver.SetMatrix(BigMatrix); // Time factorization Epetra_Flops counter; BigSolver.SetFlopCounter(counter); Epetra_Time Timer(Comm); double tstart = Timer.ElapsedTime(); ierr = BigSolver.Factor(); if (ierr!=0 && verbose) std::cout << "Error in factorization = "<<ierr<< std::endl; assert(ierr==0); double time = Timer.ElapsedTime() - tstart; double FLOPS = counter.Flops(); double MFLOPS = FLOPS/time/1000000.0; if (verbose) std::cout << "MFLOPS for Factorization = " << MFLOPS << std::endl; // Define Left hand side and right hand side Epetra_SerialDenseMatrix LHS(View, X, LDX, N, NRHS); Epetra_SerialDenseMatrix RHS; RHS.Shape(N,NRHS); // Allocate RHS // Compute RHS from A and X Epetra_Flops RHS_counter; RHS.SetFlopCounter(RHS_counter); tstart = Timer.ElapsedTime(); RHS.Multiply('L', 1.0, OrigBigMatrix, LHS, 0.0); // Symmetric Matrix-multiply time = Timer.ElapsedTime() - tstart; Epetra_SerialDenseMatrix OrigRHS = RHS; FLOPS = RHS_counter.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) std::cout << "MFLOPS to build RHS (NRHS = " << NRHS <<") = " << MFLOPS << std::endl; // Set LHS and RHS and solve BigSolver.SetVectors(LHS, RHS); tstart = Timer.ElapsedTime(); ierr = BigSolver.Solve(); if (ierr==1 && verbose) std::cout << "LAPACK guidelines suggest this matrix might benefit from equilibration." << std::endl; else if (ierr!=0 && verbose) std::cout << "Error in solve = "<<ierr<< std::endl; assert(ierr>=0); time = Timer.ElapsedTime() - tstart; FLOPS = BigSolver.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) std::cout << "MFLOPS for Solve (NRHS = " << NRHS <<") = " << MFLOPS << std::endl; double * resid = new double[NRHS]; bool OK = Residual(N, NRHS, A, LDA, BigSolver.X(), BigSolver.LDX(), OrigRHS.A(), OrigRHS.LDA(), resid); if (verbose) { if (!OK) std::cout << "************* Residual do not meet tolerance *************" << std::endl; for (i=0; i<NRHS; i++) std::cout << "Residual[" << i <<"] = "<< resid[i] << std::endl; std::cout << std::endl; } // Solve again using the Epetra_SerialDenseVector class for LHS and RHS Epetra_SerialDenseVector X2; Epetra_SerialDenseVector B2; X2.Size(BigMatrix.N()); B2.Size(BigMatrix.M()); int length = BigMatrix.N(); {for (int kk=0; kk<length; kk++) X2[kk] = ((double ) kk)/ ((double) length);} // Define entries of X2 RHS_counter.ResetFlops(); B2.SetFlopCounter(RHS_counter); tstart = Timer.ElapsedTime(); B2.Multiply('N', 'N', 1.0, OrigBigMatrix, X2, 0.0); // Define B2 = A*X2 time = Timer.ElapsedTime() - tstart; Epetra_SerialDenseVector OrigB2 = B2; FLOPS = RHS_counter.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) std::cout << "MFLOPS to build single RHS = " << MFLOPS << std::endl; // Set LHS and RHS and solve BigSolver.SetVectors(X2, B2); tstart = Timer.ElapsedTime(); ierr = BigSolver.Solve(); time = Timer.ElapsedTime() - tstart; if (ierr==1 && verbose) std::cout << "LAPACK guidelines suggest this matrix might benefit from equilibration." << std::endl; else if (ierr!=0 && verbose) std::cout << "Error in solve = "<<ierr<< std::endl; assert(ierr>=0); FLOPS = counter.Flops(); MFLOPS = FLOPS/time/1000000.0; if (verbose) std::cout << "MFLOPS to solve single RHS = " << MFLOPS << std::endl; OK = Residual(N, 1, A, LDA, BigSolver.X(), BigSolver.LDX(), OrigB2.A(), OrigB2.LDA(), resid); if (verbose) { if (!OK) std::cout << "************* Residual do not meet tolerance *************" << std::endl; std::cout << "Residual = "<< resid[0] << std::endl; } delete [] resid; delete [] A; delete [] X; /////////////////////////////////////////////////// // Now test default constructor and index operators /////////////////////////////////////////////////// N = 5; Epetra_SerialSymDenseMatrix C; // Implicit call to default constructor, should not need to call destructor C.Shape(5); // Make it 5 by 5 double * C1 = new double[N*N]; GenerateHilbert(C1, N, N); // Generate Hilber matrix C1[1+2*N] = 1000.0; // Make matrix nonsymmetric // Fill values of C with Hilbert values for (i=0; i<N; i++) for (j=0; j<N; j++) C(i,j) = C1[i+j*N]; // Test if values are correctly written and read for (i=0; i<N; i++) for (j=0; j<N; j++) { assert(C(i,j) == C1[i+j*N]); assert(C(i,j) == C[j][i]); } if (verbose) std::cout << "Default constructor and index operator check OK. Values of Hilbert matrix = " << std::endl << C << std::endl << "Values should be 1/(i+j+1), except value (1,2) should be 1000" << std::endl; delete [] C1; #ifdef EPETRA_MPI MPI_Finalize() ; #endif /* end main */ return ierr ; }