// DPOSV uses Cholesky factorization A=U^T*U, A=L*L^T // to compute the solution to a real system of linear // equations A*X=B, where A is a square, (N,N) symmetric // positive definite matrix and X and B are (N,NRHS). // // If the system is over or under-determined, // (i.e. A is not square), then pass the problem // to the Least-squares solver (DGELSS) below. //--------------------------------------------------------- void umSOLVE_CH(const DMat& mat, const DMat& B, DMat& X) //--------------------------------------------------------- { if (!mat.ok()) {umWARNING("umSOLVE_CH()", "system is empty"); return;} if (!mat.is_square()) { umSOLVE_LS(mat, B, X); // return a least-squares solution. return; } DMat A(mat); // Work with a copy of input array. X = B; // initialize solution with rhs int rows=A.num_rows(), LDA=A.num_rows(), cols=A.num_cols(); int LDB=X.num_rows(), NRHS=X.num_cols(), info=0; assert(LDB >= rows); // enough space for solutions? // Solve the system. POSV('U', rows, NRHS, A.data(), LDA, X.data(), LDB, info); if (info < 0) { X = 0.0; umERROR("umSOLVE_CH(A,B, X)", "Error in input argument (%d)\nNo solution computed.", -info); } else if (info > 0) { X = 0.0; umERROR("umSOLVE_CH(A,B, X)", "\nINFO = %d. The leading minor of order %d of A" "\nis not positive definite, so the factorization" "\ncould not be completed. No solution computed.", info, info); } }
// DGESV uses the LU factorization to compute solution // to a real system of linear equations, A * X = B, // where A is square (N,N) and X, B are (N,NRHS). // // If the system is over or under-determined, // (i.e. A is not square), then pass the problem // to the Least-squares solver (DGELSS) below. //--------------------------------------------------------- void umSOLVE(const DMat& mat, const DMat& B, DMat& X) //--------------------------------------------------------- { if (!mat.ok()) {umWARNING("umSOLVE()", "system is empty"); return;} if (!mat.is_square()) { umSOLVE_LS(mat, B, X); // return a least-squares solution. return; } DMat A(mat); // work with copy of input X = B; // initialize result with RHS int rows=A.num_rows(), LDA=A.num_rows(), cols=A.num_cols(); int LDB=B.num_rows(), NRHS=B.num_cols(), info=0; if (rows<1) {umWARNING("umSOLVE()", "system is empty"); return;} IVec ipiv(rows); // Solve the system. GESV(rows, NRHS, A.data(), LDA, ipiv.data(), X.data(), LDB, info); if (info < 0) { X = 0.0; umERROR("umSOLVE(A,B, X)", "Error in input argument (%d)\nNo solution computed.", -info); } else if (info > 0) { X = 0.0; umERROR("umSOLVE(A,B, X)", "\nINFO = %d. U(%d,%d) was exactly zero." "\nThe factorization has been completed, but the factor U is " "\nexactly singular, so the solution could not be computed.", info, info, info); } }
//--------------------------------------------------------- void umAxB(const DMat& A, const DMat& B, DMat& C) //--------------------------------------------------------- { //------------------------- // C = A * B //------------------------- // A = op(A) is (M,K) // B = op(B) is (K,N) // C is (M,N) //------------------------- int M=A.num_rows(), K=A.num_cols(), N=B.num_cols(); int LDA=M, LDB=K, LDC=M; double one=1.0, zero=0.0; if (B.num_rows() != K) { umERROR("umAxB(A,B,C)", "wrong dimensions"); } C.resize(M,N); GEMM ('N','N',M,N,K, one,A.data(),LDA, B.data(),LDB, zero,C.data(),LDC); }
//--------------------------------------------------------- bool chol_solve(const DMat& ch, const DMat& B, DMat& X) //--------------------------------------------------------- { // Solve a set of linear systems using Cholesky-factored // symmetric positive-definite matrix, A = U^T U. if (FACT_CHOL != ch.get_factmode()) {umERROR("chol_solve(ch,B,X)", "matrix is not factored.");} int M =ch.num_rows(), lda=ch.num_rows(); int ldb=B.num_rows(), nrhs=B.num_cols(); assert(ldb == M); char uplo = 'U'; int info=0; double* ch_data = const_cast<double*>(ch.data()); X = B; // overwrite X with RHS's, then solutions POTRS (uplo, M, nrhs, ch_data, lda, X.data(), ldb, info); if (info) { umERROR("chol_solve(ch,B,X)", "dpotrs reports: info = %d", info); } return true; }
// tex "table" output //--------------------------------------------------------- void textable ( string& capt, FILE* fid, string* titles, DMat& entries, string* form ) //--------------------------------------------------------- { int Nrows=entries.num_rows(), Ncols=entries.num_cols(), n=0,m=0; fprintf(fid, "\\begin{table} \n"); fprintf(fid, "\\caption{%s} \n", capt.c_str()); fprintf(fid, "\\begin{center} \n"); fprintf(fid, "\\begin{tabular}{|"); for (n=1; n<=Ncols; ++n) { fprintf(fid, "c|"); if (2==n) { fprintf(fid, "|"); } } fprintf(fid, "} \\hline \n "); for (n=1; n<=(Ncols-1); ++n) { fprintf(fid, "%s & ", titles[n].c_str()); } fprintf(fid, " %s \\\\ \\hline \n ", titles[Ncols].c_str()); for (m=1; m<=Nrows; ++m) { for (n=1; n<=(Ncols-1); ++n) { fprintf(fid, form[n].c_str(), entries(m,n)); fprintf(fid, " & "); } if (m<Nrows) { fprintf(fid, form[Ncols].c_str(), entries(m,Ncols)); fprintf(fid, " \\\\ \n "); } else { fprintf(fid, form[Ncols].c_str(), entries(m,Ncols)); fprintf(fid, " \\\\ \\hline \n "); } } fprintf(fid, "\\end{tabular} \n"); fprintf(fid, "\\end{center} \n"); fprintf(fid, "\\end{table} \n"); }
// DGELSS computes minimum norm solution to a real linear // least squares problem: Minimize 2-norm(| b - A*x |). // using the singular value decomposition (SVD) of A. // A is an M-by-N matrix which may be rank-deficient. //--------------------------------------------------------- void umSOLVE_LS(const DMat& mat, const DMat& B, DMat& X) //--------------------------------------------------------- { if (!mat.ok()) {umWARNING("umSOLVE_LS()", "system is empty"); return;} DMat A(mat); // work with copy of input. int rows=A.num_rows(), cols=A.num_cols(), mmn=A.min_mn(); int LDB=A.max_mn(), NRHS=B.num_cols(); if (rows!=B.num_rows()) {umERROR("umSOLVE_LS(A,B)", "Inconsistant matrix sizes.");} DVec s(mmn); // allocate array for singular values // X must be big enough to store various results. // Resize X so that its leading dimension = max(M,N), // then load the set of right hand sides. X.resize(LDB,NRHS, true, 0.0); for (int j=1; j<=NRHS; ++j) // loop across colums for (int i=1; i<=rows; ++i) // loop down rows X(i,j) = B(i,j); // RCOND is used to determine the effective rank of A. // Singular values S(i) <= RCOND*S(1) are treated as zero. // If RCOND < 0, machine precision is used instead. //double rcond = 1.0 / 1.0e16; double rcond = -1.0; // NBN: ACML does not use the work vector. int mnLo=A.min_mn(), mnHi=A.max_mn(), rank=1, info=1; int lwork = 10*mnLo + std::max(2*mnLo, std::max(mnHi, NRHS)); DVec work(lwork); // Solve the system GELSS (rows, cols, NRHS, A.data(), rows, X.data(), LDB, s.data(), rcond, rank, work.data(), lwork, info); //--------------------------------------------- // Report: //--------------------------------------------- if (info == 0) { umLOG(1, "umSOLVE_LS reports successful LS-solution." "\nRCOND = %0.6e, " "\nOptimal length of work array was %d\n", rcond, lwork); } else { if (info < 0) { X = 0.0; umERROR("umSOLVE_LS(DMat&, DMat&)", "Error in input argument (%d)\nNo solution or error bounds computed.", -info); } else if (info > 0) { X = 0.0; umERROR("umSOLVE_LS(DMat&, DMat&)", "\nThe algorithm for computing the SVD failed to converge.\n" "\n%d off-diagonal elements of an intermediate " "\nbidiagonal form did not converge to zero.\n " "\nRCOND = %0.6e, " "\nOptimal length of work array was %d.\n", info, rcond, lwork); } } }
//--------------------------------------------------------- void NDG2D::OutputSampleXYZ ( int sample_N, DMat &newX, DMat &newY, DMat &newZ, // e.g. triangles on a sphere const DMat &FData, // old field data DMat &newFData, // new field data int zfield // if>0, use as z-elevation ) //--------------------------------------------------------- { DVec newR, newS, newT; DMat newVDM; int newNpts = 0; // Triangles OutputSampleNodes2D(sample_N, newR, newS); newNpts = newR.size(); newVDM = Vandermonde2D(this->N, newR, newS); const DMat& oldV = this->V; DMat oldtonew(newNpts, this->Np, "OldToNew"); oldtonew = trans(trans(oldV) | trans(newVDM)); //----------------------------------- // interpolate the field data //----------------------------------- int Nfields = FData.num_cols(); newFData.resize(newNpts*this->K, Nfields); //DVec scales(Nfields); // For each field, use tOldF to wrap field i. // Use tNewF to load the interpolated field // directly into column i of the output array. DMat tOldF, tNewF; for (int i=1; i<=Nfields; ++i) { tOldF.borrow(this->Np, this->K, (double*) FData.pCol(i)); tNewF.borrow(newNpts, this->K, (double*)newFData.pCol(i)); tNewF = oldtonew * tOldF; //scales(i) = tNewF.max_col_val_abs(i); } //----------------------------------- // interpolate the vertices //----------------------------------- newX = oldtonew * this->x; newY = oldtonew * this->y; if (this->bCoord3D) { newZ = oldtonew * this->z; } else { if (zfield>=1 && zfield<=Nfields) { // use field data for z-height newZ.load(newNpts, K, newFData.pCol(Nfields)); } else { // set z-data to 0.0 newZ.resize(newNpts, K, true, 0.0); } } }
//--------------------------------------------------------- void NDG2D::OutputVTK(const DMat& FData, int order, int zfield) //--------------------------------------------------------- { static int count = 0; string output_dir = "."; // The caller loads each field of interest into FData, // storing (Np*K) scalars per column. // // For high (or low) resolution output, the user can // specify an arbitrary order of interpolation for // exporting the fields. Thus while a simulation may // use N=3, we can export the solution fields with // high-order, regularized elements (e.g. with N=12). string buf = umOFORM("%s/sim_N%02d_%04d.vtk", output_dir.c_str(), order, ++count); FILE *fp = fopen(buf.c_str(), "w"); if (!fp) { umLOG(1, "Could no open %s for output!\n", buf.c_str()); return; } // Set flags and totals int Output_N = std::max(2, order); int Ncells=0, Npts=0; Ncells = OutputSampleNelmt2D(Output_N); Npts = OutputSampleNpts2D (Output_N); // set totals for Vtk output int vtkTotalPoints = this->K * Npts; int vtkTotalCells = this->K * Ncells; int vtkTotalConns = (this->EToV.num_cols()+1) * this->K * Ncells; //------------------------------------- // 1. Write the VTK header details //------------------------------------- fprintf(fp, "# vtk DataFile Version 2"); fprintf(fp, "\nNuDG++ 2D simulation"); fprintf(fp, "\nASCII"); fprintf(fp, "\nDATASET UNSTRUCTURED_GRID\n"); fprintf(fp, "\nPOINTS %d double", vtkTotalPoints); int newNpts=0; //------------------------------------- // 2. Write the vertex data //------------------------------------- DMat newX, newY, newZ, newFData; // Build new {X,Y,Z} vertices that regularize the // elements, then interpolate solution fields onto // this new set of elements: OutputSampleXYZ(Output_N, newX, newY, newZ, FData, newFData, zfield); //double maxF1 = newFData.max_col_val_abs(1), scaleF=1.0; //if (maxF1 != 0.0) { scaleF = 1.0/maxF1; } newNpts = newX.num_rows(); if (zfield>0) { // write 2D vertex data, with z-elevation for (int k=1; k<=this->K; ++k) { for (int n=1; n<=newNpts; ++n) { // use exponential format to allow for // arbitrary (astro, nano) magnitudes: fprintf(fp, "\n%20.12e %20.12e %20.12e", newX(n, k), newY(n, k), newZ(n,k)); //*scaleF); } } } else { // write 2D vertex data to file for (int k=1; k<=this->K; ++k) { for (int n=1; n<=newNpts; ++n) { // use exponential format to allow for // arbitrary (astro, nano) magnitudes: fprintf(fp, "\n%20.12e %20.12e 0.0", newX(n, k), newY(n, k)); } } } //------------------------------------- // 3. Write the element connectivity //------------------------------------- IMat newELMT; // Number of indices required to define connectivity fprintf(fp, "\n\nCELLS %d %d", vtkTotalCells, vtkTotalConns); // build regularized tri elements at selected order OutputSampleELMT2D(Output_N, newELMT); newNpts = OutputSampleNpts2D(Output_N); int newNTri = newELMT.num_rows(); int newNVert = newELMT.num_cols(); // write element connectivity to file for (int k=0; k<this->K; ++k) { int nodesk = k*newNpts; for (int n=1; n<=newNTri; ++n) { fprintf(fp, "\n%d", newNVert); for (int i=1; i<=newNVert; ++i) { fprintf(fp, " %5d", nodesk+newELMT(n, i)); } } } //------------------------------------- // 4. Write the cell types //------------------------------------- // For each element (cell) write a single integer // identifying the cell type. The integer should // correspond to the enumeration in the vtk file: // /VTK/Filtering/vtkCellType.h fprintf(fp, "\n\nCELL_TYPES %d", vtkTotalCells); for (int k=0; k<this->K; ++k) { fprintf(fp, "\n"); for (int i=1; i<=Ncells; ++i) { fprintf(fp, "5 "); // 5:VTK_TRIANGLE if (! (i%10)) fprintf(fp, "\n"); } } //------------------------------------- // 5. Write the scalar "vtkPointData" //------------------------------------- fprintf(fp, "\n\nPOINT_DATA %d", vtkTotalPoints); // For each field, write POINT DATA for each point // in the vtkUnstructuredGrid. int Nfields = FData.num_cols(); for (int fld=1; fld<=Nfields; ++fld) { fprintf(fp, "\nSCALARS field%d double 1", fld); fprintf(fp, "\nLOOKUP_TABLE default"); // Write the scalar data, using exponential format // to allow for arbitrary (astro, nano) magnitudes: for (int n=1; n<=newFData.num_rows(); ++n) { fprintf(fp, "\n%20.12e ", newFData(n, fld)); } } // add final newline to output fprintf(fp, "\n"); fclose(fp); }