// called from package MatrixModels's R code: SEXP dgCMatrix_cholsol(SEXP x, SEXP y) { /* Solve Sparse Least Squares X %*% beta ~= y with dense RHS y, * where X = t(x) i.e. we pass x = t(X) as argument, * via "Cholesky(X'X)" .. well not really: * cholmod_factorize("x", ..) finds L in X'X = L'L directly */ CHM_SP cx = AS_CHM_SP(x); /* FIXME: extend this to work in multivariate case, i.e. y a matrix with > 1 column ! */ CHM_DN cy = AS_CHM_DN(coerceVector(y, REALSXP)), rhs, cAns, resid; CHM_FR L; int n = cx->ncol;/* #{obs.} {x = t(X) !} */ double one[] = {1,0}, zero[] = {0,0}, neg1[] = {-1,0}; const char *nms[] = {"L", "coef", "Xty", "resid", ""}; SEXP ans = PROTECT(Rf_mkNamed(VECSXP, nms)); R_CheckStack(); if (n < cx->nrow || n <= 0) error(_("dgCMatrix_cholsol requires a 'short, wide' rectangular matrix")); if (cy->nrow != n) error(_("Dimensions of system to be solved are inconsistent")); rhs = cholmod_allocate_dense(cx->nrow, 1, cx->nrow, CHOLMOD_REAL, &c); /* cholmod_sdmult(A, transp, alpha, beta, X, Y, &c): * Y := alpha*(A*X) + beta*Y or alpha*(A'*X) + beta*Y ; * here: rhs := 1 * x %*% y + 0 = x %*% y = X'y */ if (!(cholmod_sdmult(cx, 0 /* trans */, one, zero, cy, rhs, &c))) error(_("cholmod_sdmult error (rhs)")); L = cholmod_analyze(cx, &c); if (!cholmod_factorize(cx, L, &c)) error(_("cholmod_factorize failed: status %d, minor %d from ncol %d"), c.status, L->minor, L->n); /* FIXME: Do this in stages so an "effects" vector can be calculated */ if (!(cAns = cholmod_solve(CHOLMOD_A, L, rhs, &c))) error(_("cholmod_solve (CHOLMOD_A) failed: status %d, minor %d from ncol %d"), c.status, L->minor, L->n); /* L : */ SET_VECTOR_ELT(ans, 0, chm_factor_to_SEXP(L, 0)); /* coef : */ SET_VECTOR_ELT(ans, 1, allocVector(REALSXP, cx->nrow)); Memcpy(REAL(VECTOR_ELT(ans, 1)), (double*)(cAns->x), cx->nrow); /* X'y : */ /* FIXME: Change this when the "effects" vector is available */ SET_VECTOR_ELT(ans, 2, allocVector(REALSXP, cx->nrow)); Memcpy(REAL(VECTOR_ELT(ans, 2)), (double*)(rhs->x), cx->nrow); /* resid := y */ resid = cholmod_copy_dense(cy, &c); /* cholmod_sdmult(A, transp, alp, bet, X, Y, &c): * Y := alp*(A*X) + bet*Y or alp*(A'*X) + beta*Y ; * here: resid := -1 * x' %*% coef + 1 * y = y - X %*% coef */ if (!(cholmod_sdmult(cx, 1/* trans */, neg1, one, cAns, resid, &c))) error(_("cholmod_sdmult error (resid)")); /* FIXME: for multivariate case, i.e. resid *matrix* with > 1 column ! */ SET_VECTOR_ELT(ans, 3, allocVector(REALSXP, n)); Memcpy(REAL(VECTOR_ELT(ans, 3)), (double*)(resid->x), n); cholmod_free_factor(&L, &c); cholmod_free_dense(&rhs, &c); cholmod_free_dense(&cAns, &c); UNPROTECT(1); return ans; }
SEXP Csparse_dense_prod(SEXP a, SEXP b) { CHM_SP cha = AS_CHM_SP(a); SEXP b_M = PROTECT(mMatrix_as_dgeMatrix(b)); CHM_DN chb = AS_CHM_DN(b_M); CHM_DN chc = cholmod_allocate_dense(cha->nrow, chb->ncol, cha->nrow, chb->xtype, &c); SEXP dn = PROTECT(allocVector(VECSXP, 2)); double one[] = {1,0}, zero[] = {0,0}; int nprot = 2; R_CheckStack(); /* Tim Davis, please FIXME: currently (2010-11) *fails* when a is a pattern matrix:*/ if(cha->xtype == CHOLMOD_PATTERN) { /* warning(_("Csparse_dense_prod(): cholmod_sdmult() not yet implemented for pattern./ ngCMatrix" */ /* " --> slightly inefficient coercion")); */ // This *fails* to produce a CHOLMOD_REAL .. // CHM_SP chd = cholmod_l_copy(cha, cha->stype, CHOLMOD_REAL, &c); // --> use our Matrix-classes SEXP da = PROTECT(nz2Csparse(a, x_double)); nprot++; cha = AS_CHM_SP(da); } cholmod_sdmult(cha, 0, one, zero, chb, chc, &c); SET_VECTOR_ELT(dn, 0, /* establish dimnames */ duplicate(VECTOR_ELT(GET_SLOT(a, Matrix_DimNamesSym), 0))); SET_VECTOR_ELT(dn, 1, duplicate(VECTOR_ELT(GET_SLOT(b_M, Matrix_DimNamesSym), 1))); UNPROTECT(nprot); return chm_dense_to_SEXP(chc, 1, 0, dn); }
/* Uses factorization to solve. */ void ClpCholeskyUfl::solve (double * region) { cholmod_dense *x, *b; b = cholmod_allocate_dense (numberRows_, 1, numberRows_, CHOLMOD_REAL, c_) ; CoinMemcpyN(region, numberRows_, (double *) b->x); x = cholmod_solve (CHOLMOD_A, L_, b, c_) ; CoinMemcpyN((double *) x->x, numberRows_, region); cholmod_free_dense (&x, c_) ; cholmod_free_dense (&b, c_) ; }
int CholeskyFactorization::solve(Matrix& rhs, Matrix& solution) { if (m_matrix_type == Matrix::MATRIX_SPARSE) { cholmod_dense *x; /* cast the RHS as a cholmod_dense b = rhs */ if (rhs.m_type == Matrix::MATRIX_DENSE) { cholmod_dense *b; b = cholmod_allocate_dense(rhs.m_nrows, rhs.m_ncols, rhs.m_nrows, CHOLMOD_REAL, Matrix::cholmod_handle()); b->x = rhs.m_data; /* Solve - rhs is dense*/ x = cholmod_solve(CHOLMOD_A, m_factor, b, Matrix::cholmod_handle()); solution = Matrix(rhs.m_nrows, rhs.m_ncols); solution.m_delete_data = false; memcpy(solution.m_data, static_cast<double*> (x->x), rhs.m_nrows * rhs.m_ncols * sizeof (double)); cholmod_free_dense(&x, Matrix::cholmod_handle()); } else if (rhs.m_type == Matrix::MATRIX_SPARSE) { // still untested! cholmod_sparse * rhs_sparse; if (rhs.m_sparse == NULL) { const_cast<Matrix&> (rhs)._createSparse(); } rhs_sparse = rhs.m_sparse; cholmod_sparse * result = cholmod_spsolve(CHOLMOD_LDLt, m_factor, rhs_sparse, Matrix::cholmod_handle()); solution = Matrix(rhs.m_nrows, rhs.m_ncols, Matrix::MATRIX_SPARSE); solution.m_sparse = result; solution._createTriplet(); } else { throw std::logic_error("Not supported"); } return ForBESUtils::STATUS_OK; } else { /* the matrix to be factorized is not sparse */ int info = ForBESUtils::STATUS_UNDEFINED_FUNCTION; solution = Matrix(rhs); if (m_matrix_type == Matrix::MATRIX_DENSE) { info = LAPACKE_dpotrs(LAPACK_COL_MAJOR, 'L', m_matrix_nrows, rhs.m_ncols, m_L, m_matrix_nrows, solution.m_data, m_matrix_nrows); } else if (m_matrix_type == Matrix::MATRIX_SYMMETRIC) { info = LAPACKE_dpptrs(LAPACK_COL_MAJOR, 'L', m_matrix_nrows, rhs.m_ncols, m_L, solution.m_data, m_matrix_nrows); } else { throw std::invalid_argument("This matrix type is not supported - only DENSE, SPARSE and SYMMETRIC are supported"); } return info; } }
SEXP Csparse_dense_crossprod(SEXP a, SEXP b) { CHM_SP cha = AS_CHM_SP(a); SEXP b_M = PROTECT(mMatrix_as_dgeMatrix(b)); CHM_DN chb = AS_CHM_DN(b_M); CHM_DN chc = cholmod_allocate_dense(cha->ncol, chb->ncol, cha->ncol, chb->xtype, &c); SEXP dn = PROTECT(allocVector(VECSXP, 2)); int nprot = 2; double one[] = {1,0}, zero[] = {0,0}; R_CheckStack(); // -- see Csparse_dense_prod() above : if(cha->xtype == CHOLMOD_PATTERN) { SEXP da = PROTECT(nz2Csparse(a, x_double)); nprot++; cha = AS_CHM_SP(da); } cholmod_sdmult(cha, 1, one, zero, chb, chc, &c); SET_VECTOR_ELT(dn, 0, /* establish dimnames */ duplicate(VECTOR_ELT(GET_SLOT(a, Matrix_DimNamesSym), 1))); SET_VECTOR_ELT(dn, 1, duplicate(VECTOR_ELT(GET_SLOT(b_M, Matrix_DimNamesSym), 1))); UNPROTECT(nprot); return chm_dense_to_SEXP(chc, 1, 0, dn); }
int main (int argc, char **argv) { double resid, t, ta, tf, ts, tot, bnorm, xnorm, anorm, rnorm, fl, anz, axbnorm, rnorm2, resid2 ; FILE *f ; cholmod_sparse *A ; cholmod_dense *X, *B, *W, *R ; double one [2], zero [2], minusone [2], beta [2], xlnz ; cholmod_common Common, *cm ; cholmod_factor *L ; double *Bx, *Rx, *Xx ; int i, n, isize, xsize, ordering, xtype, s, ss, lnz ; /* ---------------------------------------------------------------------- */ /* get the file containing the input matrix */ /* ---------------------------------------------------------------------- */ ff = NULL ; if (argc > 1) { if ((f = fopen (argv [1], "r")) == NULL) { my_handler (CHOLMOD_INVALID, __FILE__, __LINE__, "unable to open file") ; } ff = f ; } else { f = stdin ; } /* ---------------------------------------------------------------------- */ /* start CHOLMOD and set parameters */ /* ---------------------------------------------------------------------- */ cm = &Common ; cholmod_start (cm) ; /* use default parameter settings, except for the error handler. This * demo program terminates if an error occurs (out of memory, not positive * definite, ...). It makes the demo program simpler (no need to check * CHOLMOD error conditions). This non-default parameter setting has no * effect on performance. */ cm->error_handler = my_handler ; /* Note that CHOLMOD will do a supernodal LL' or a simplicial LDL' by * default, automatically selecting the latter if flop/nnz(L) < 40. */ /* ---------------------------------------------------------------------- */ /* create basic scalars */ /* ---------------------------------------------------------------------- */ zero [0] = 0 ; zero [1] = 0 ; one [0] = 1 ; one [1] = 0 ; minusone [0] = -1 ; minusone [1] = 0 ; beta [0] = 1e-6 ; beta [1] = 0 ; /* ---------------------------------------------------------------------- */ /* read in a matrix */ /* ---------------------------------------------------------------------- */ printf ("\n---------------------------------- cholmod_demo:\n") ; A = cholmod_read_sparse (f, cm) ; if (ff != NULL) fclose (ff) ; anorm = cholmod_norm_sparse (A, 0, cm) ; xtype = A->xtype ; printf ("norm (A,inf) = %g\n", anorm) ; printf ("norm (A,1) = %g\n", cholmod_norm_sparse (A, 1, cm)) ; cholmod_print_sparse (A, "A", cm) ; if (A->nrow > A->ncol) { /* Transpose A so that A'A+beta*I will be factorized instead */ cholmod_sparse *C = cholmod_transpose (A, 2, cm) ; cholmod_free_sparse (&A, cm) ; A = C ; printf ("transposing input matrix\n") ; } /* ---------------------------------------------------------------------- */ /* create an arbitrary right-hand-side */ /* ---------------------------------------------------------------------- */ n = A->nrow ; B = cholmod_zeros (n, 1, xtype, cm) ; Bx = B->x ; #if GHS { /* b = A*ones(n,1), used by Gould, Hu, and Scott in their experiments */ cholmod_dense *X0 ; X0 = cholmod_ones (A->ncol, 1, xtype, cm) ; cholmod_sdmult (A, 0, one, zero, X0, B, cm) ; cholmod_free_dense (&X0, cm) ; } #else if (xtype == CHOLMOD_REAL) { /* real case */ for (i = 0 ; i < n ; i++) { double x = n ; Bx [i] = 1 + i / x ; } } else { /* complex case */ for (i = 0 ; i < n ; i++) { double x = n ; Bx [2*i ] = 1 + i / x ; /* real part of B(i) */ Bx [2*i+1] = (x/2 - i) / (3*x) ; /* imag part of B(i) */ } } #endif cholmod_print_dense (B, "B", cm) ; bnorm = cholmod_norm_dense (B, 0, cm) ; /* max norm */ printf ("bnorm %g\n", bnorm) ; /* ---------------------------------------------------------------------- */ /* analyze, factorize, and solve */ /* ---------------------------------------------------------------------- */ t = CPUTIME ; L = cholmod_analyze (A, cm) ; ta = CPUTIME - t ; ta = MAX (ta, 0) ; printf ("Analyze: flop %g lnz %g\n", cm->fl, cm->lnz) ; if (A->stype == 0) { printf ("Factorizing A*A'+beta*I\n") ; t = CPUTIME ; cholmod_factorize_p (A, beta, NULL, 0, L, cm) ; tf = CPUTIME - t ; tf = MAX (tf, 0) ; } else { printf ("Factorizing A\n") ; t = CPUTIME ; cholmod_factorize (A, L, cm) ; tf = CPUTIME - t ; tf = MAX (tf, 0) ; } t = CPUTIME ; X = cholmod_solve (CHOLMOD_A, L, B, cm) ; ts = CPUTIME - t ; ts = MAX (ts, 0) ; tot = ta + tf + ts ; /* ---------------------------------------------------------------------- */ /* compute the residual */ /* ---------------------------------------------------------------------- */ if (A->stype == 0) { /* (AA'+beta*I)x=b is the linear system that was solved */ /* W = A'*X */ W = cholmod_allocate_dense (A->ncol, 1, A->ncol, xtype, cm) ; cholmod_sdmult (A, 2, one, zero, X, W, cm) ; /* R = B - beta*X */ R = cholmod_zeros (n, 1, xtype, cm) ; Rx = R->x ; Xx = X->x ; if (xtype == CHOLMOD_REAL) { for (i = 0 ; i < n ; i++) { Rx [i] = Bx [i] - beta [0] * Xx [i] ; } } else { /* complex case */ for (i = 0 ; i < n ; i++) { Rx [2*i ] = Bx [2*i ] - beta [0] * Xx [2*i ] ; Rx [2*i+1] = Bx [2*i+1] - beta [0] * Xx [2*i+1] ; } } /* R = A*W - R */ cholmod_sdmult (A, 0, one, minusone, W, R, cm) ; cholmod_free_dense (&W, cm) ; } else { /* Ax=b was factorized and solved, R = B-A*X */ R = cholmod_copy_dense (B, cm) ; cholmod_sdmult (A, 0, minusone, one, X, R, cm) ; } rnorm = cholmod_norm_dense (R, 0, cm) ; /* max abs. entry */ xnorm = cholmod_norm_dense (X, 0, cm) ; /* max abs. entry */ axbnorm = (anorm * xnorm + bnorm + ((n == 0) ? 1 : 0)) ; resid = rnorm / axbnorm ; /* ---------------------------------------------------------------------- */ /* iterative refinement (real symmetric case only) */ /* ---------------------------------------------------------------------- */ resid2 = -1 ; if (A->stype != 0 && A->xtype == CHOLMOD_REAL) { cholmod_dense *R2 ; /* R2 = A\(B-A*X) */ R2 = cholmod_solve (CHOLMOD_A, L, R, cm) ; /* compute X = X + A\(B-A*X) */ Xx = X->x ; Rx = R2->x ; for (i = 0 ; i < n ; i++) { Xx [i] = Xx [i] + Rx [i] ; } cholmod_free_dense (&R2, cm) ; cholmod_free_dense (&R, cm) ; /* compute the new residual, R = B-A*X */ R = cholmod_copy_dense (B, cm) ; cholmod_sdmult (A, 0, minusone, one, X, R, cm) ; rnorm2 = cholmod_norm_dense (R, 0, cm) ; resid2 = rnorm2 / axbnorm ; } cholmod_free_dense (&R, cm) ; /* ---------------------------------------------------------------------- */ /* print results */ /* ---------------------------------------------------------------------- */ cholmod_print_factor (L, "L", cm) ; /* determine the # of integers's and reals's in L. See cholmod_free */ if (L->is_super) { s = L->nsuper + 1 ; xsize = L->xsize ; ss = L->ssize ; isize = n /* L->Perm */ + n /* L->ColCount, nz in each column of 'pure' L */ + s /* L->pi, column pointers for L->s */ + s /* L->px, column pointers for L->x */ + s /* L->super, starting column index of each supernode */ + ss ; /* L->s, the pattern of the supernodes */ } else { /* this space can increase if you change parameters to their non- * default values (cm->final_pack, for example). */ lnz = L->nzmax ; xsize = lnz ; isize = n /* L->Perm */ + n /* L->ColCount, nz in each column of 'pure' L */ + n+1 /* L->p, column pointers */ + lnz /* L->i, integer row indices */ + n /* L->nz, nz in each column of L */ + n+2 /* L->next, link list */ + n+2 ; /* L->prev, link list */ } anz = cm->anz ; for (i = 0 ; i < CHOLMOD_MAXMETHODS ; i++) { fl = cm->method [i].fl ; xlnz = cm->method [i].lnz ; cm->method [i].fl = -1 ; cm->method [i].lnz = -1 ; ordering = cm->method [i].ordering ; if (fl >= 0) { printf ("Ordering: ") ; if (ordering == CHOLMOD_POSTORDERED) printf ("postordered ") ; if (ordering == CHOLMOD_NATURAL) printf ("natural ") ; if (ordering == CHOLMOD_GIVEN) printf ("user ") ; if (ordering == CHOLMOD_AMD) printf ("AMD ") ; if (ordering == CHOLMOD_METIS) printf ("METIS ") ; if (ordering == CHOLMOD_NESDIS) printf ("NESDIS ") ; if (xlnz > 0) { printf ("fl/lnz %10.1f", fl / xlnz) ; } if (anz > 0) { printf (" lnz/anz %10.1f", xlnz / anz) ; } printf ("\n") ; } } printf ("ints in L: %d, doubles in L: %d\n", isize, xsize) ; printf ("factor flops %g nnz(L) %15.0f (w/no amalgamation)\n", cm->fl, cm->lnz) ; if (A->stype == 0) { printf ("nnz(A): %15.0f\n", cm->anz) ; } else { printf ("nnz(A*A'): %15.0f\n", cm->anz) ; } if (cm->lnz > 0) { printf ("flops / nnz(L): %8.1f\n", cm->fl / cm->lnz) ; } if (anz > 0) { printf ("nnz(L) / nnz(A): %8.1f\n", cm->lnz / cm->anz) ; } printf ("analyze cputime: %12.4f\n", ta) ; printf ("factor cputime: %12.4f mflop: %8.1f\n", tf, (tf == 0) ? 0 : (1e-6*cm->fl / tf)) ; printf ("solve cputime: %12.4f mflop: %8.1f\n", ts, (ts == 0) ? 0 : (1e-6*4*cm->lnz / ts)) ; printf ("overall cputime: %12.4f mflop: %8.1f\n", tot, (tot == 0) ? 0 : (1e-6 * (cm->fl + 4 * cm->lnz) / tot)) ; printf ("peak memory usage: %12.0f (MB)\n", (double) (cm->memory_usage) / 1048576.) ; printf ("residual %8.1e (|Ax-b|/(|A||x|+|b|))\n", resid) ; if (resid2 >= 0) { printf ("residual %8.1e (|Ax-b|/(|A||x|+|b|))" " after iterative refinement\n", resid2) ; } printf ("rcond %8.1e\n\n", cholmod_rcond (L, cm)) ; cholmod_free_factor (&L, cm) ; cholmod_free_dense (&X, cm) ; /* ---------------------------------------------------------------------- */ /* free matrices and finish CHOLMOD */ /* ---------------------------------------------------------------------- */ cholmod_free_sparse (&A, cm) ; cholmod_free_dense (&B, cm) ; cholmod_finish (cm) ; return (0) ; }
static cholmod_dense* allocate_dense(size_t nrow, size_t ncol, size_t d, int xtype, cholmod_common* c) { return cholmod_allocate_dense(nrow, ncol, d, xtype, c); }
int main (int argc, char **argv) { double resid [4], t, ta, tf, ts [3], tot, bnorm, xnorm, anorm, rnorm, fl, anz, axbnorm, rnorm2, resid2, rcond ; FILE *f ; cholmod_sparse *A ; cholmod_dense *X = NULL, *B, *W, *R ; double one [2], zero [2], minusone [2], beta [2], xlnz ; cholmod_common Common, *cm ; cholmod_factor *L ; double *Bx, *Rx, *Xx ; int i, n, isize, xsize, ordering, xtype, s, ss, lnz ; int trial, method, L_is_super ; int ver [3] ; ts[0] = 0.; ts[1] = 0.; ts[2] = 0.; /* ---------------------------------------------------------------------- */ /* get the file containing the input matrix */ /* ---------------------------------------------------------------------- */ ff = NULL ; if (argc > 1) { if ((f = fopen (argv [1], "r")) == NULL) { my_handler (CHOLMOD_INVALID, __FILE__, __LINE__, "unable to open file") ; } ff = f ; } else { f = stdin ; } /* ---------------------------------------------------------------------- */ /* start CHOLMOD and set parameters */ /* ---------------------------------------------------------------------- */ cm = &Common ; cholmod_start (cm) ; CHOLMOD_FUNCTION_DEFAULTS (cm) ; /* just for testing (not required) */ /* use default parameter settings, except for the error handler. This * demo program terminates if an error occurs (out of memory, not positive * definite, ...). It makes the demo program simpler (no need to check * CHOLMOD error conditions). This non-default parameter setting has no * effect on performance. */ cm->error_handler = my_handler ; /* Note that CHOLMOD will do a supernodal LL' or a simplicial LDL' by * default, automatically selecting the latter if flop/nnz(L) < 40. */ /* ---------------------------------------------------------------------- */ /* create basic scalars */ /* ---------------------------------------------------------------------- */ zero [0] = 0 ; zero [1] = 0 ; one [0] = 1 ; one [1] = 0 ; minusone [0] = -1 ; minusone [1] = 0 ; beta [0] = 1e-6 ; beta [1] = 0 ; /* ---------------------------------------------------------------------- */ /* read in a matrix */ /* ---------------------------------------------------------------------- */ printf ("\n---------------------------------- cholmod_demo:\n") ; cholmod_version (ver) ; printf ("cholmod version %d.%d.%d\n", ver [0], ver [1], ver [2]) ; SuiteSparse_version (ver) ; printf ("SuiteSparse version %d.%d.%d\n", ver [0], ver [1], ver [2]) ; A = cholmod_read_sparse (f, cm) ; if (ff != NULL) { fclose (ff) ; ff = NULL ; } anorm = cholmod_norm_sparse (A, 0, cm) ; xtype = A->xtype ; printf ("norm (A,inf) = %g\n", anorm) ; printf ("norm (A,1) = %g\n", cholmod_norm_sparse (A, 1, cm)) ; cholmod_print_sparse (A, "A", cm) ; if (A->nrow > A->ncol) { /* Transpose A so that A'A+beta*I will be factorized instead */ cholmod_sparse *C = cholmod_transpose (A, 2, cm) ; cholmod_free_sparse (&A, cm) ; A = C ; printf ("transposing input matrix\n") ; } /* ---------------------------------------------------------------------- */ /* create an arbitrary right-hand-side */ /* ---------------------------------------------------------------------- */ n = A->nrow ; B = cholmod_zeros (n, 1, xtype, cm) ; Bx = B->x ; #if GHS { /* b = A*ones(n,1), used by Gould, Hu, and Scott in their experiments */ cholmod_dense *X0 ; X0 = cholmod_ones (A->ncol, 1, xtype, cm) ; cholmod_sdmult (A, 0, one, zero, X0, B, cm) ; cholmod_free_dense (&X0, cm) ; } #else if (xtype == CHOLMOD_REAL) { /* real case */ for (i = 0 ; i < n ; i++) { double x = n ; Bx [i] = 1 + i / x ; } } else { /* complex case */ for (i = 0 ; i < n ; i++) { double x = n ; Bx [2*i ] = 1 + i / x ; /* real part of B(i) */ Bx [2*i+1] = (x/2 - i) / (3*x) ; /* imag part of B(i) */ } } #endif cholmod_print_dense (B, "B", cm) ; bnorm = cholmod_norm_dense (B, 0, cm) ; /* max norm */ printf ("bnorm %g\n", bnorm) ; /* ---------------------------------------------------------------------- */ /* analyze and factorize */ /* ---------------------------------------------------------------------- */ t = CPUTIME ; L = cholmod_analyze (A, cm) ; ta = CPUTIME - t ; ta = MAX (ta, 0) ; printf ("Analyze: flop %g lnz %g\n", cm->fl, cm->lnz) ; if (A->stype == 0) { printf ("Factorizing A*A'+beta*I\n") ; t = CPUTIME ; cholmod_factorize_p (A, beta, NULL, 0, L, cm) ; tf = CPUTIME - t ; tf = MAX (tf, 0) ; } else { printf ("Factorizing A\n") ; t = CPUTIME ; cholmod_factorize (A, L, cm) ; tf = CPUTIME - t ; tf = MAX (tf, 0) ; } cholmod_print_factor (L, "L", cm) ; /* determine the # of integers's and reals's in L. See cholmod_free */ if (L->is_super) { s = L->nsuper + 1 ; xsize = L->xsize ; ss = L->ssize ; isize = n /* L->Perm */ + n /* L->ColCount, nz in each column of 'pure' L */ + s /* L->pi, column pointers for L->s */ + s /* L->px, column pointers for L->x */ + s /* L->super, starting column index of each supernode */ + ss ; /* L->s, the pattern of the supernodes */ } else { /* this space can increase if you change parameters to their non- * default values (cm->final_pack, for example). */ lnz = L->nzmax ; xsize = lnz ; isize = n /* L->Perm */ + n /* L->ColCount, nz in each column of 'pure' L */ + n+1 /* L->p, column pointers */ + lnz /* L->i, integer row indices */ + n /* L->nz, nz in each column of L */ + n+2 /* L->next, link list */ + n+2 ; /* L->prev, link list */ } /* solve with Bset will change L from simplicial to supernodal */ rcond = cholmod_rcond (L, cm) ; L_is_super = L->is_super ; /* ---------------------------------------------------------------------- */ /* solve */ /* ---------------------------------------------------------------------- */ for (method = 0 ; method <= 3 ; method++) { double x = n ; if (method == 0) { /* basic solve, just once */ t = CPUTIME ; X = cholmod_solve (CHOLMOD_A, L, B, cm) ; ts [0] = CPUTIME - t ; ts [0] = MAX (ts [0], 0) ; } else if (method == 1) { /* basic solve, many times, but keep the last one */ t = CPUTIME ; for (trial = 0 ; trial < NTRIALS ; trial++) { cholmod_free_dense (&X, cm) ; Bx [0] = 1 + trial / x ; /* tweak B each iteration */ X = cholmod_solve (CHOLMOD_A, L, B, cm) ; } ts [1] = CPUTIME - t ; ts [1] = MAX (ts [1], 0) / NTRIALS ; } else if (method == 2) { /* solve with reused workspace */ cholmod_dense *Ywork = NULL, *Ework = NULL ; cholmod_free_dense (&X, cm) ; t = CPUTIME ; for (trial = 0 ; trial < NTRIALS ; trial++) { Bx [0] = 1 + trial / x ; /* tweak B each iteration */ cholmod_solve2 (CHOLMOD_A, L, B, NULL, &X, NULL, &Ywork, &Ework, cm) ; } cholmod_free_dense (&Ywork, cm) ; cholmod_free_dense (&Ework, cm) ; ts [2] = CPUTIME - t ; ts [2] = MAX (ts [2], 0) / NTRIALS ; } else { /* solve with reused workspace and sparse Bset */ cholmod_dense *Ywork = NULL, *Ework = NULL ; cholmod_dense *X2 = NULL, *B2 = NULL ; cholmod_sparse *Bset, *Xset = NULL ; int *Bsetp, *Bseti, *Xsetp, *Xseti, xlen, j, k, *Lnz ; double *X1x, *X2x, *B2x, err ; FILE *timelog = fopen ("timelog.m", "w") ; if (timelog) fprintf (timelog, "results = [\n") ; B2 = cholmod_zeros (n, 1, xtype, cm) ; B2x = B2->x ; Bset = cholmod_allocate_sparse (n, 1, 1, FALSE, TRUE, 0, CHOLMOD_PATTERN, cm) ; Bsetp = Bset->p ; Bseti = Bset->i ; Bsetp [0] = 0 ; /* nnz(B) is 1 (it can be anything) */ Bsetp [1] = 1 ; resid [3] = 0 ; for (i = 0 ; i < MIN (100,n) ; i++) { /* B (i) is nonzero, all other entries are ignored (implied to be zero) */ Bseti [0] = i ; if (xtype == CHOLMOD_REAL) { B2x [i] = 3.1 * i + 0.9 ; } else { B2x [2*i ] = i + 0.042 ; B2x [2*i+1] = i - 92.7 ; } /* first get the entire solution, to compare against */ cholmod_solve2 (CHOLMOD_A, L, B2, NULL, &X, NULL, &Ywork, &Ework, cm) ; /* now get the sparse solutions; this will change L from supernodal to simplicial */ if (i == 0) { /* first solve can be slower because it has to allocate space for X2, Xset, etc, and change L. So don't time it */ cholmod_solve2 (CHOLMOD_A, L, B2, Bset, &X2, &Xset, &Ywork, &Ework, cm) ; } t = CPUTIME ; for (trial = 0 ; trial < NTRIALS ; trial++) { /* solve Ax=b but only to get x(i). b is all zero except for b(i). This takes O(xlen) time */ cholmod_solve2 (CHOLMOD_A, L, B2, Bset, &X2, &Xset, &Ywork, &Ework, cm) ; } t = CPUTIME - t ; t = MAX (t, 0) / NTRIALS ; /* check the solution and log the time */ Xsetp = Xset->p ; Xseti = Xset->i ; xlen = Xsetp [1] ; X1x = X->x ; X2x = X2->x ; Lnz = L->nz ; /* printf ("\ni %d xlen %d (%p %p)\n", i, xlen, X1x, X2x) ; */ if (xtype == CHOLMOD_REAL) { fl = 2 * xlen ; for (k = 0 ; k < xlen ; k++) { j = Xseti [k] ; fl += 4 * Lnz [j] ; err = X1x [j] - X2x [j] ; err = ABS (err) ; resid [3] = MAX (resid [3], err) ; } } else { fl = 16 * xlen ; for (k = 0 ; k < xlen ; k++) { j = Xseti [k] ; fl += 16 * Lnz [j] ; err = X1x [2*j ] - X2x [2*j ] ; err = ABS (err) ; resid [3] = MAX (resid [3], err) ; err = X1x [2*j+1] - X2x [2*j+1] ; err = ABS (err) ; resid [3] = MAX (resid [3], err) ; } } if (timelog) fprintf (timelog, "%g %g %g %g\n", (double) i, (double) xlen, fl, t); /* clear B for the next test */ if (xtype == CHOLMOD_REAL) { B2x [i] = 0 ; } else { B2x [2*i ] = 0 ; B2x [2*i+1] = 0 ; } } if (timelog) { fprintf (timelog, "] ; resid = %g ;\n", resid [3]) ; fprintf (timelog, "lnz = %g ;\n", cm->lnz) ; fprintf (timelog, "t = %g ; %% dense solve time\n", ts [2]) ; fclose (timelog) ; } resid [3] = resid [3] / cholmod_norm_dense (X, 1, cm) ; cholmod_free_dense (&Ywork, cm) ; cholmod_free_dense (&Ework, cm) ; cholmod_free_dense (&X2, cm) ; cholmod_free_dense (&B2, cm) ; cholmod_free_sparse (&Xset, cm) ; cholmod_free_sparse (&Bset, cm) ; } /* ------------------------------------------------------------------ */ /* compute the residual */ /* ------------------------------------------------------------------ */ if (method < 3) { if (A->stype == 0) { /* (AA'+beta*I)x=b is the linear system that was solved */ /* W = A'*X */ W = cholmod_allocate_dense (A->ncol, 1, A->ncol, xtype, cm) ; cholmod_sdmult (A, 2, one, zero, X, W, cm) ; /* R = B - beta*X */ R = cholmod_zeros (n, 1, xtype, cm) ; Rx = R->x ; Xx = X->x ; if (xtype == CHOLMOD_REAL) { for (i = 0 ; i < n ; i++) { Rx [i] = Bx [i] - beta [0] * Xx [i] ; } } else { /* complex case */ for (i = 0 ; i < n ; i++) { Rx [2*i ] = Bx [2*i ] - beta [0] * Xx [2*i ] ; Rx [2*i+1] = Bx [2*i+1] - beta [0] * Xx [2*i+1] ; } } /* R = A*W - R */ cholmod_sdmult (A, 0, one, minusone, W, R, cm) ; cholmod_free_dense (&W, cm) ; } else { /* Ax=b was factorized and solved, R = B-A*X */ R = cholmod_copy_dense (B, cm) ; cholmod_sdmult (A, 0, minusone, one, X, R, cm) ; } rnorm = cholmod_norm_dense (R, 0, cm) ; /* max abs. entry */ xnorm = cholmod_norm_dense (X, 0, cm) ; /* max abs. entry */ axbnorm = (anorm * xnorm + bnorm + ((n == 0) ? 1 : 0)) ; resid [method] = rnorm / axbnorm ; } } tot = ta + tf + ts [0] ; /* ---------------------------------------------------------------------- */ /* iterative refinement (real symmetric case only) */ /* ---------------------------------------------------------------------- */ resid2 = -1 ; if (A->stype != 0 && A->xtype == CHOLMOD_REAL) { cholmod_dense *R2 ; /* R2 = A\(B-A*X) */ R2 = cholmod_solve (CHOLMOD_A, L, R, cm) ; /* compute X = X + A\(B-A*X) */ Xx = X->x ; Rx = R2->x ; for (i = 0 ; i < n ; i++) { Xx [i] = Xx [i] + Rx [i] ; } cholmod_free_dense (&R2, cm) ; cholmod_free_dense (&R, cm) ; /* compute the new residual, R = B-A*X */ R = cholmod_copy_dense (B, cm) ; cholmod_sdmult (A, 0, minusone, one, X, R, cm) ; rnorm2 = cholmod_norm_dense (R, 0, cm) ; resid2 = rnorm2 / axbnorm ; } cholmod_free_dense (&R, cm) ; /* ---------------------------------------------------------------------- */ /* print results */ /* ---------------------------------------------------------------------- */ anz = cm->anz ; for (i = 0 ; i < CHOLMOD_MAXMETHODS ; i++) { fl = cm->method [i].fl ; xlnz = cm->method [i].lnz ; cm->method [i].fl = -1 ; cm->method [i].lnz = -1 ; ordering = cm->method [i].ordering ; if (fl >= 0) { printf ("Ordering: ") ; if (ordering == CHOLMOD_POSTORDERED) printf ("postordered ") ; if (ordering == CHOLMOD_NATURAL) printf ("natural ") ; if (ordering == CHOLMOD_GIVEN) printf ("user ") ; if (ordering == CHOLMOD_AMD) printf ("AMD ") ; if (ordering == CHOLMOD_METIS) printf ("METIS ") ; if (ordering == CHOLMOD_NESDIS) printf ("NESDIS ") ; if (xlnz > 0) { printf ("fl/lnz %10.1f", fl / xlnz) ; } if (anz > 0) { printf (" lnz/anz %10.1f", xlnz / anz) ; } printf ("\n") ; } } printf ("ints in L: %15.0f, doubles in L: %15.0f\n", (double) isize, (double) xsize) ; printf ("factor flops %g nnz(L) %15.0f (w/no amalgamation)\n", cm->fl, cm->lnz) ; if (A->stype == 0) { printf ("nnz(A): %15.0f\n", cm->anz) ; } else { printf ("nnz(A*A'): %15.0f\n", cm->anz) ; } if (cm->lnz > 0) { printf ("flops / nnz(L): %8.1f\n", cm->fl / cm->lnz) ; } if (anz > 0) { printf ("nnz(L) / nnz(A): %8.1f\n", cm->lnz / cm->anz) ; } printf ("analyze cputime: %12.4f\n", ta) ; printf ("factor cputime: %12.4f mflop: %8.1f\n", tf, (tf == 0) ? 0 : (1e-6*cm->fl / tf)) ; printf ("solve cputime: %12.4f mflop: %8.1f\n", ts [0], (ts [0] == 0) ? 0 : (1e-6*4*cm->lnz / ts [0])) ; printf ("overall cputime: %12.4f mflop: %8.1f\n", tot, (tot == 0) ? 0 : (1e-6 * (cm->fl + 4 * cm->lnz) / tot)) ; printf ("solve cputime: %12.4f mflop: %8.1f (%d trials)\n", ts [1], (ts [1] == 0) ? 0 : (1e-6*4*cm->lnz / ts [1]), NTRIALS) ; printf ("solve2 cputime: %12.4f mflop: %8.1f (%d trials)\n", ts [2], (ts [2] == 0) ? 0 : (1e-6*4*cm->lnz / ts [2]), NTRIALS) ; printf ("peak memory usage: %12.0f (MB)\n", (double) (cm->memory_usage) / 1048576.) ; printf ("residual (|Ax-b|/(|A||x|+|b|)): ") ; for (method = 0 ; method <= 3 ; method++) { printf ("%8.2e ", resid [method]) ; } printf ("\n") ; if (resid2 >= 0) { printf ("residual %8.1e (|Ax-b|/(|A||x|+|b|))" " after iterative refinement\n", resid2) ; } printf ("rcond %8.1e\n\n", rcond) ; if (L_is_super) { cholmod_gpu_stats (cm) ; } cholmod_free_factor (&L, cm) ; cholmod_free_dense (&X, cm) ; /* ---------------------------------------------------------------------- */ /* free matrices and finish CHOLMOD */ /* ---------------------------------------------------------------------- */ cholmod_free_sparse (&A, cm) ; cholmod_free_dense (&B, cm) ; cholmod_finish (cm) ; return (0) ; }
BasicMesh MeshTransferer::transfer(const vector<PhGUtils::Matrix3x3d> &S1grad) { if( !(S0set && T0set) ) { throw "S0 or T0 not set."; } auto &S = S1grad; auto &T = T0grad; int nfaces = S0.faces.nrow; int nverts = S0.verts.nrow; // assemble sparse matrix A int nrowsA = nfaces * 3; int nsv = stationary_vertices.size(); int nrowsC = nsv; int nrows = nrowsA + nrowsC; int ncols = nverts; int ntermsA = nfaces*9; int ntermsC = stationary_vertices.size(); int nterms = ntermsA + ntermsC; SparseMatrix A(nrows, ncols, nterms); // fill in the deformation gradient part for(int i=0, ioffset=0;i<nfaces;++i) { /* * Ai: * 1 2 3 4 5 ... nfaces*3 * 1 2 3 4 5 ... nfaces*3 * 1 2 3 4 5 ... nfaces*3 * Ai = reshape(Ai, 1, nfaces*9) * * Aj = reshape(repmat(S0.faces', 3, 1), 1, nfaces*9) * Av = reshape(cell2mat(T)', 1, nfaces*9) */ int *f = S0.faces.rowptr(i); auto Ti = T[i]; A.append(ioffset, f[0], Ti(0)); A.append(ioffset, f[1], Ti(1)); A.append(ioffset, f[2], Ti(2)); ++ioffset; A.append(ioffset, f[0], Ti(3)); A.append(ioffset, f[1], Ti(4)); A.append(ioffset, f[2], Ti(5)); ++ioffset; A.append(ioffset, f[0], Ti(6)); A.append(ioffset, f[1], Ti(7)); A.append(ioffset, f[2], Ti(8)); ++ioffset; } // fill in the lower part of A, stationary vertices part for(int i=0;i<nsv;++i) { A.append(nrowsA+i, stationary_vertices[i], 1); } ofstream fA("A.txt"); fA<<A; fA.close(); // fill in c matrix DenseMatrix c(nrows, 3); for(int i=0;i<3;++i) { for(int j=0, joffset=0;j<nfaces;++j) { auto &Sj = S[j]; c(joffset, i) = Sj(0, i); ++joffset; c(joffset, i) = Sj(1, i); ++joffset; c(joffset, i) = Sj(2, i); ++joffset; } } for(int i=0;i<3;++i) { for(int j=0, joffset=nrowsA;j<nsv;++j,++joffset) { auto vj = T0.verts.rowptr(stationary_vertices[j]); c(joffset, i) = vj[i]; } } cholmod_sparse *G = A.to_sparse(); cholmod_sparse *Gt = cholmod_transpose(G, 2, global::cm); // compute GtD // just multiply Dsi to corresponding elemenets double *Gtx = (double*)Gt->x; const int* Gtp = (const int*)(Gt->p); for(int i=0;i<nrowsA;++i) { int fidx = i/3; for(int j=Gtp[i];j<Gtp[i+1];++j) { Gtx[j] *= Ds(fidx); } } // compute GtDG cholmod_sparse *GtDG = cholmod_ssmult(Gt, G, 0, 1, 1, global::cm); GtDG->stype = 1; // compute GtD * c cholmod_dense *GtDc = cholmod_allocate_dense(ncols, 3, ncols, CHOLMOD_REAL, global::cm); double alpha[2] = {1, 0}; double beta[2] = {0, 0}; cholmod_sdmult(Gt, 0, alpha, beta, c.to_dense(), GtDc, global::cm); // solve for GtDG \ GtDc cholmod_factor *L = cholmod_analyze(GtDG, global::cm); cholmod_factorize(GtDG, L, global::cm); cholmod_dense *x = cholmod_solve(CHOLMOD_A, L, GtDc, global::cm); // make a copy of T0 BasicMesh Td = T0; // change the vertices with x double *Vx = (double*)x->x; for(int i=0;i<nverts;++i) { Td.verts(i, 0) = Vx[i]; Td.verts(i, 1) = Vx[i+nverts]; Td.verts(i, 2) = Vx[i+nverts*2]; } // release memory cholmod_free_sparse(&G, global::cm); cholmod_free_sparse(&Gt, global::cm); cholmod_free_sparse(&GtDG, global::cm); cholmod_free_dense(&GtDc, global::cm); cholmod_free_factor(&L, global::cm); cholmod_free_dense(&x, global::cm); return Td; }