Пример #1
0
// called from package MatrixModels's R code:
SEXP dgCMatrix_cholsol(SEXP x, SEXP y)
{
    /* Solve Sparse Least Squares X %*% beta ~= y  with dense RHS y,
     * where X = t(x) i.e. we pass  x = t(X)  as argument,
     * via  "Cholesky(X'X)" .. well not really:
     * cholmod_factorize("x", ..) finds L in  X'X = L'L directly */
    CHM_SP cx = AS_CHM_SP(x);
    /* FIXME: extend this to work in multivariate case, i.e. y a matrix with > 1 column ! */
    CHM_DN cy = AS_CHM_DN(coerceVector(y, REALSXP)), rhs, cAns, resid;
    CHM_FR L;
    int n = cx->ncol;/* #{obs.} {x = t(X) !} */
    double one[] = {1,0}, zero[] = {0,0}, neg1[] = {-1,0};
    const char *nms[] = {"L", "coef", "Xty", "resid", ""};
    SEXP ans = PROTECT(Rf_mkNamed(VECSXP, nms));
    R_CheckStack();

    if (n < cx->nrow || n <= 0)
	error(_("dgCMatrix_cholsol requires a 'short, wide' rectangular matrix"));
    if (cy->nrow != n)
	error(_("Dimensions of system to be solved are inconsistent"));
    rhs = cholmod_allocate_dense(cx->nrow, 1, cx->nrow, CHOLMOD_REAL, &c);
    /* cholmod_sdmult(A, transp, alpha, beta, X, Y, &c):
     *		Y := alpha*(A*X) + beta*Y or alpha*(A'*X) + beta*Y ;
     * here: rhs := 1 * x %*% y + 0 =  x %*% y =  X'y  */
    if (!(cholmod_sdmult(cx, 0 /* trans */, one, zero, cy, rhs, &c)))
	error(_("cholmod_sdmult error (rhs)"));
    L = cholmod_analyze(cx, &c);
    if (!cholmod_factorize(cx, L, &c))
	error(_("cholmod_factorize failed: status %d, minor %d from ncol %d"),
	      c.status, L->minor, L->n);
/* FIXME: Do this in stages so an "effects" vector can be calculated */
    if (!(cAns = cholmod_solve(CHOLMOD_A, L, rhs, &c)))
	error(_("cholmod_solve (CHOLMOD_A) failed: status %d, minor %d from ncol %d"),
	      c.status, L->minor, L->n);
    /* L : */
    SET_VECTOR_ELT(ans, 0, chm_factor_to_SEXP(L, 0));
    /* coef : */
    SET_VECTOR_ELT(ans, 1, allocVector(REALSXP, cx->nrow));
    Memcpy(REAL(VECTOR_ELT(ans, 1)), (double*)(cAns->x), cx->nrow);
    /* X'y : */
/* FIXME: Change this when the "effects" vector is available */
    SET_VECTOR_ELT(ans, 2, allocVector(REALSXP, cx->nrow));
    Memcpy(REAL(VECTOR_ELT(ans, 2)), (double*)(rhs->x), cx->nrow);
    /* resid := y */
    resid = cholmod_copy_dense(cy, &c);
    /* cholmod_sdmult(A, transp, alp, bet, X, Y, &c):
     *		Y := alp*(A*X) + bet*Y or alp*(A'*X) + beta*Y ;
     * here: resid := -1 * x' %*% coef + 1 * y = y - X %*% coef  */
    if (!(cholmod_sdmult(cx, 1/* trans */, neg1, one, cAns, resid, &c)))
	error(_("cholmod_sdmult error (resid)"));
    /* FIXME: for multivariate case, i.e. resid  *matrix* with > 1 column ! */
    SET_VECTOR_ELT(ans, 3, allocVector(REALSXP, n));
    Memcpy(REAL(VECTOR_ELT(ans, 3)), (double*)(resid->x), n);

    cholmod_free_factor(&L, &c);
    cholmod_free_dense(&rhs, &c);
    cholmod_free_dense(&cAns, &c);
    UNPROTECT(1);
    return ans;
}
Пример #2
0
SEXP Csparse_dense_prod(SEXP a, SEXP b)
{
    CHM_SP cha = AS_CHM_SP(a);
    SEXP b_M = PROTECT(mMatrix_as_dgeMatrix(b));
    CHM_DN chb = AS_CHM_DN(b_M);
    CHM_DN chc = cholmod_allocate_dense(cha->nrow, chb->ncol, cha->nrow,
					chb->xtype, &c);
    SEXP dn = PROTECT(allocVector(VECSXP, 2));
    double one[] = {1,0}, zero[] = {0,0};
    int nprot = 2;
    R_CheckStack();
    /* Tim Davis, please FIXME:  currently (2010-11) *fails* when  a  is a pattern matrix:*/
    if(cha->xtype == CHOLMOD_PATTERN) {
	/* warning(_("Csparse_dense_prod(): cholmod_sdmult() not yet implemented for pattern./ ngCMatrix" */
	/* 	  " --> slightly inefficient coercion")); */

	// This *fails* to produce a CHOLMOD_REAL ..
	// CHM_SP chd = cholmod_l_copy(cha, cha->stype, CHOLMOD_REAL, &c);
	// --> use our Matrix-classes
	SEXP da = PROTECT(nz2Csparse(a, x_double)); nprot++;
	cha = AS_CHM_SP(da);
    }
    cholmod_sdmult(cha, 0, one, zero, chb, chc, &c);
    SET_VECTOR_ELT(dn, 0,	/* establish dimnames */
		   duplicate(VECTOR_ELT(GET_SLOT(a, Matrix_DimNamesSym), 0)));
    SET_VECTOR_ELT(dn, 1,
		   duplicate(VECTOR_ELT(GET_SLOT(b_M, Matrix_DimNamesSym), 1)));
    UNPROTECT(nprot);
    return chm_dense_to_SEXP(chc, 1, 0, dn);
}
Пример #3
0
/* Uses factorization to solve. */
void
ClpCholeskyUfl::solve (double * region)
{
     cholmod_dense *x, *b;
     b = cholmod_allocate_dense (numberRows_, 1, numberRows_, CHOLMOD_REAL, c_) ;
     CoinMemcpyN(region, numberRows_, (double *) b->x);
     x = cholmod_solve (CHOLMOD_A, L_, b, c_) ;
     CoinMemcpyN((double *) x->x, numberRows_, region);
     cholmod_free_dense (&x, c_) ;
     cholmod_free_dense (&b, c_) ;
}
int CholeskyFactorization::solve(Matrix& rhs, Matrix& solution) {
    if (m_matrix_type == Matrix::MATRIX_SPARSE) {
        cholmod_dense *x;

        /* cast the RHS as a cholmod_dense b = rhs */
        if (rhs.m_type == Matrix::MATRIX_DENSE) {

            cholmod_dense *b;
            b = cholmod_allocate_dense(rhs.m_nrows, rhs.m_ncols, rhs.m_nrows, CHOLMOD_REAL, Matrix::cholmod_handle());
            b->x = rhs.m_data;

            /* Solve - rhs is dense*/
            x = cholmod_solve(CHOLMOD_A, m_factor, b, Matrix::cholmod_handle());
            solution = Matrix(rhs.m_nrows, rhs.m_ncols);
            solution.m_delete_data = false;
            memcpy(solution.m_data, static_cast<double*> (x->x), rhs.m_nrows * rhs.m_ncols * sizeof (double));
            cholmod_free_dense(&x, Matrix::cholmod_handle());

        } else if (rhs.m_type == Matrix::MATRIX_SPARSE) {
            // still untested!
            cholmod_sparse * rhs_sparse;
            if (rhs.m_sparse == NULL) {
                const_cast<Matrix&> (rhs)._createSparse();
            }
            rhs_sparse = rhs.m_sparse;
            cholmod_sparse * result = cholmod_spsolve(CHOLMOD_LDLt, m_factor, rhs_sparse, Matrix::cholmod_handle());
            solution = Matrix(rhs.m_nrows, rhs.m_ncols, Matrix::MATRIX_SPARSE);
            solution.m_sparse = result;
            solution._createTriplet();
        } else {
            throw std::logic_error("Not supported");
        }
        return ForBESUtils::STATUS_OK;
    } else { /* the matrix to be factorized is not sparse */
        int info = ForBESUtils::STATUS_UNDEFINED_FUNCTION;
        solution = Matrix(rhs);
        if (m_matrix_type == Matrix::MATRIX_DENSE) {
            info = LAPACKE_dpotrs(LAPACK_COL_MAJOR, 'L', m_matrix_nrows, rhs.m_ncols, m_L, m_matrix_nrows, solution.m_data, m_matrix_nrows);
        } else if (m_matrix_type == Matrix::MATRIX_SYMMETRIC) {
            info = LAPACKE_dpptrs(LAPACK_COL_MAJOR, 'L', m_matrix_nrows, rhs.m_ncols, m_L, solution.m_data, m_matrix_nrows);
        } else {
            throw std::invalid_argument("This matrix type is not supported - only DENSE, SPARSE and SYMMETRIC are supported");
        }
        return info;
    }
}
Пример #5
0
SEXP Csparse_dense_crossprod(SEXP a, SEXP b)
{
    CHM_SP cha = AS_CHM_SP(a);
    SEXP b_M = PROTECT(mMatrix_as_dgeMatrix(b));
    CHM_DN chb = AS_CHM_DN(b_M);
    CHM_DN chc = cholmod_allocate_dense(cha->ncol, chb->ncol, cha->ncol,
					chb->xtype, &c);
    SEXP dn = PROTECT(allocVector(VECSXP, 2)); int nprot = 2;
    double one[] = {1,0}, zero[] = {0,0};
    R_CheckStack();
    // -- see Csparse_dense_prod() above :
    if(cha->xtype == CHOLMOD_PATTERN) {
	SEXP da = PROTECT(nz2Csparse(a, x_double)); nprot++;
	cha = AS_CHM_SP(da);
    }
    cholmod_sdmult(cha, 1, one, zero, chb, chc, &c);
    SET_VECTOR_ELT(dn, 0,	/* establish dimnames */
		   duplicate(VECTOR_ELT(GET_SLOT(a, Matrix_DimNamesSym), 1)));
    SET_VECTOR_ELT(dn, 1,
		   duplicate(VECTOR_ELT(GET_SLOT(b_M, Matrix_DimNamesSym), 1)));
    UNPROTECT(nprot);
    return chm_dense_to_SEXP(chc, 1, 0, dn);
}
Пример #6
0
int main (int argc, char **argv)
{
    double resid, t, ta, tf, ts, tot, bnorm, xnorm, anorm, rnorm, fl, anz, 
	axbnorm, rnorm2, resid2 ;
    FILE *f ;
    cholmod_sparse *A ;
    cholmod_dense *X, *B, *W, *R ;
    double one [2], zero [2], minusone [2], beta [2], xlnz ;
    cholmod_common Common, *cm ;
    cholmod_factor *L ;
    double *Bx, *Rx, *Xx ;
    int i, n, isize, xsize, ordering, xtype, s, ss, lnz ;

    /* ---------------------------------------------------------------------- */
    /* get the file containing the input matrix */
    /* ---------------------------------------------------------------------- */

    ff = NULL ;
    if (argc > 1)
    {
	if ((f = fopen (argv [1], "r")) == NULL)
	{
	    my_handler (CHOLMOD_INVALID, __FILE__, __LINE__,
		    "unable to open file") ;
	}
	ff = f ;
    }
    else
    {
	f = stdin ;
    }

    /* ---------------------------------------------------------------------- */
    /* start CHOLMOD and set parameters */
    /* ---------------------------------------------------------------------- */

    cm = &Common ;
    cholmod_start (cm) ;

    /* use default parameter settings, except for the error handler.  This
     * demo program terminates if an error occurs (out of memory, not positive
     * definite, ...).  It makes the demo program simpler (no need to check
     * CHOLMOD error conditions).  This non-default parameter setting has no
     * effect on performance. */
    cm->error_handler = my_handler ;

    /* Note that CHOLMOD will do a supernodal LL' or a simplicial LDL' by
     * default, automatically selecting the latter if flop/nnz(L) < 40. */

    /* ---------------------------------------------------------------------- */
    /* create basic scalars */
    /* ---------------------------------------------------------------------- */

    zero [0] = 0 ;
    zero [1] = 0 ;
    one [0] = 1 ;
    one [1] = 0 ;
    minusone [0] = -1 ;
    minusone [1] = 0 ;
    beta [0] = 1e-6 ;
    beta [1] = 0 ;

    /* ---------------------------------------------------------------------- */
    /* read in a matrix */
    /* ---------------------------------------------------------------------- */

    printf ("\n---------------------------------- cholmod_demo:\n") ;
    A = cholmod_read_sparse (f, cm) ;
    if (ff != NULL) fclose (ff) ;
    anorm = cholmod_norm_sparse (A, 0, cm) ;
    xtype = A->xtype ;
    printf ("norm (A,inf) = %g\n", anorm) ;
    printf ("norm (A,1)   = %g\n", cholmod_norm_sparse (A, 1, cm)) ;
    cholmod_print_sparse (A, "A", cm) ;

    if (A->nrow > A->ncol)
    {
	/* Transpose A so that A'A+beta*I will be factorized instead */
	cholmod_sparse *C = cholmod_transpose (A, 2, cm) ;
	cholmod_free_sparse (&A, cm) ;
	A = C ;
	printf ("transposing input matrix\n") ;
    }

    /* ---------------------------------------------------------------------- */
    /* create an arbitrary right-hand-side */
    /* ---------------------------------------------------------------------- */

    n = A->nrow ;
    B = cholmod_zeros (n, 1, xtype, cm) ;
    Bx = B->x ;

#if GHS
    {
	/* b = A*ones(n,1), used by Gould, Hu, and Scott in their experiments */
	cholmod_dense *X0 ;
	X0 = cholmod_ones (A->ncol, 1, xtype, cm) ;
	cholmod_sdmult (A, 0, one, zero, X0, B, cm) ;
	cholmod_free_dense (&X0, cm) ;
    }
#else
    if (xtype == CHOLMOD_REAL)
    {
	/* real case */
	for (i = 0 ; i < n ; i++)
	{
	    double x = n ;
	    Bx [i] = 1 + i / x ;
	}
    }
    else
    {
	/* complex case */
	for (i = 0 ; i < n ; i++)
	{
	    double x = n ;
	    Bx [2*i  ] = 1 + i / x ;		/* real part of B(i) */
	    Bx [2*i+1] = (x/2 - i) / (3*x) ;	/* imag part of B(i) */
	}
    }
#endif

    cholmod_print_dense (B, "B", cm) ;
    bnorm = cholmod_norm_dense (B, 0, cm) ;	/* max norm */
    printf ("bnorm %g\n", bnorm) ;

    /* ---------------------------------------------------------------------- */
    /* analyze, factorize, and solve */
    /* ---------------------------------------------------------------------- */

    t = CPUTIME ;
    L = cholmod_analyze (A, cm) ;
    ta = CPUTIME - t ;
    ta = MAX (ta, 0) ;

    printf ("Analyze: flop %g lnz %g\n", cm->fl, cm->lnz) ;

    if (A->stype == 0)
    {
	printf ("Factorizing A*A'+beta*I\n") ;
	t = CPUTIME ;
	cholmod_factorize_p (A, beta, NULL, 0, L, cm) ;
	tf = CPUTIME - t ;
	tf = MAX (tf, 0) ;
    }
    else
    {
	printf ("Factorizing A\n") ;
	t = CPUTIME ;
	cholmod_factorize (A, L, cm) ;
	tf = CPUTIME - t ;
	tf = MAX (tf, 0) ;
    }

    t = CPUTIME ;

    X = cholmod_solve (CHOLMOD_A, L, B, cm) ;
    ts = CPUTIME - t ;
    ts = MAX (ts, 0) ;
    tot = ta + tf + ts ;

    /* ---------------------------------------------------------------------- */
    /* compute the residual */
    /* ---------------------------------------------------------------------- */

    if (A->stype == 0)
    {
	/* (AA'+beta*I)x=b is the linear system that was solved */
	/* W = A'*X */
	W = cholmod_allocate_dense (A->ncol, 1, A->ncol, xtype, cm) ;
	cholmod_sdmult (A, 2, one, zero, X, W, cm) ;
	/* R = B - beta*X */
	R = cholmod_zeros (n, 1, xtype, cm) ;
	Rx = R->x ;
	Xx = X->x ;
	if (xtype == CHOLMOD_REAL)
	{
	    for (i = 0 ; i < n ; i++)
	    {
		Rx [i] = Bx [i] - beta [0] * Xx [i] ;
	    }
	}
	else
	{
	    /* complex case */
	    for (i = 0 ; i < n ; i++)
	    {
		Rx [2*i  ] = Bx [2*i  ] - beta [0] * Xx [2*i  ] ;
		Rx [2*i+1] = Bx [2*i+1] - beta [0] * Xx [2*i+1] ;
	    }
	}
	/* R = A*W - R */
	cholmod_sdmult (A, 0, one, minusone, W, R, cm) ;
	cholmod_free_dense (&W, cm) ;
    }
    else
    {
	/* Ax=b was factorized and solved, R = B-A*X */
	R = cholmod_copy_dense (B, cm) ;
	cholmod_sdmult (A, 0, minusone, one, X, R, cm) ;
    }
    rnorm = cholmod_norm_dense (R, 0, cm) ;	    /* max abs. entry */
    xnorm = cholmod_norm_dense (X, 0, cm) ;	    /* max abs. entry */

    axbnorm = (anorm * xnorm + bnorm + ((n == 0) ? 1 : 0)) ;
    resid = rnorm / axbnorm ;

    /* ---------------------------------------------------------------------- */
    /* iterative refinement (real symmetric case only) */
    /* ---------------------------------------------------------------------- */

    resid2 = -1 ;
    if (A->stype != 0 && A->xtype == CHOLMOD_REAL)
    {
	cholmod_dense *R2 ;

	/* R2 = A\(B-A*X) */
	R2 = cholmod_solve (CHOLMOD_A, L, R, cm) ;
	/* compute X = X + A\(B-A*X) */
	Xx = X->x ;
	Rx = R2->x ;
	for (i = 0 ; i < n ; i++)
	{
	    Xx [i] = Xx [i] + Rx [i] ;
	}
	cholmod_free_dense (&R2, cm) ;
	cholmod_free_dense (&R, cm) ;

	/* compute the new residual, R = B-A*X */
	R = cholmod_copy_dense (B, cm) ;
	cholmod_sdmult (A, 0, minusone, one, X, R, cm) ;
	rnorm2 = cholmod_norm_dense (R, 0, cm) ;
	resid2 = rnorm2 / axbnorm ;
    }

    cholmod_free_dense (&R, cm) ;

    /* ---------------------------------------------------------------------- */
    /* print results */
    /* ---------------------------------------------------------------------- */

    cholmod_print_factor (L, "L", cm) ;

    /* determine the # of integers's and reals's in L.  See cholmod_free */
    if (L->is_super)
    {
	s = L->nsuper + 1 ;
	xsize = L->xsize ;
	ss = L->ssize ;
	isize =
	    n	/* L->Perm */
	    + n	/* L->ColCount, nz in each column of 'pure' L */
	    + s	/* L->pi, column pointers for L->s */
	    + s	/* L->px, column pointers for L->x */
	    + s	/* L->super, starting column index of each supernode */
	    + ss ;	/* L->s, the pattern of the supernodes */
    }
    else
    {
	/* this space can increase if you change parameters to their non-
	 * default values (cm->final_pack, for example). */
	lnz = L->nzmax ;
	xsize = lnz ;
	isize =
	    n	/* L->Perm */
	    + n	/* L->ColCount, nz in each column of 'pure' L */
	    + n+1	/* L->p, column pointers */
	    + lnz	/* L->i, integer row indices */
	    + n	/* L->nz, nz in each column of L */
	    + n+2	/* L->next, link list */
	    + n+2 ;	/* L->prev, link list */
    }

    anz = cm->anz ;
    for (i = 0 ; i < CHOLMOD_MAXMETHODS ; i++)
    {
	fl = cm->method [i].fl ;
	xlnz = cm->method [i].lnz ;
	cm->method [i].fl = -1 ;
	cm->method [i].lnz = -1 ;
	ordering = cm->method [i].ordering ;
	if (fl >= 0)
	{
	    printf ("Ordering: ") ;
	    if (ordering == CHOLMOD_POSTORDERED) printf ("postordered ") ;
	    if (ordering == CHOLMOD_NATURAL)     printf ("natural ") ;
	    if (ordering == CHOLMOD_GIVEN)	     printf ("user    ") ;
	    if (ordering == CHOLMOD_AMD)	     printf ("AMD     ") ;
	    if (ordering == CHOLMOD_METIS)	     printf ("METIS   ") ;
	    if (ordering == CHOLMOD_NESDIS)      printf ("NESDIS  ") ;
	    if (xlnz > 0)
	    {
		printf ("fl/lnz %10.1f", fl / xlnz) ;
	    }
	    if (anz > 0)
	    {
		printf ("  lnz/anz %10.1f", xlnz / anz) ;
	    }
	    printf ("\n") ;
	}
    }

    printf ("ints in L: %d, doubles in L: %d\n", isize, xsize) ;
    printf ("factor flops %g nnz(L) %15.0f (w/no amalgamation)\n",
	    cm->fl, cm->lnz) ;
    if (A->stype == 0)
    {
	printf ("nnz(A):    %15.0f\n", cm->anz) ;
    }
    else
    {
	printf ("nnz(A*A'): %15.0f\n", cm->anz) ;
    }
    if (cm->lnz > 0)
    {
	printf ("flops / nnz(L):  %8.1f\n", cm->fl / cm->lnz) ;
    }
    if (anz > 0)
    {
	printf ("nnz(L) / nnz(A): %8.1f\n", cm->lnz / cm->anz) ;
    }
    printf ("analyze cputime:  %12.4f\n", ta) ;
    printf ("factor  cputime:   %12.4f mflop: %8.1f\n", tf,
	(tf == 0) ? 0 : (1e-6*cm->fl / tf)) ;
    printf ("solve   cputime:   %12.4f mflop: %8.1f\n", ts,
	(ts == 0) ? 0 : (1e-6*4*cm->lnz / ts)) ;
    printf ("overall cputime:   %12.4f mflop: %8.1f\n", 
	    tot, (tot == 0) ? 0 : (1e-6 * (cm->fl + 4 * cm->lnz) / tot)) ;
    printf ("peak memory usage: %12.0f (MB)\n",
	    (double) (cm->memory_usage) / 1048576.) ;
    printf ("residual %8.1e (|Ax-b|/(|A||x|+|b|))\n", resid) ;
    if (resid2 >= 0)
    {
	printf ("residual %8.1e (|Ax-b|/(|A||x|+|b|))"
		" after iterative refinement\n", resid2) ;
    }
    printf ("rcond    %8.1e\n\n", cholmod_rcond (L, cm)) ;
    cholmod_free_factor (&L, cm) ;
    cholmod_free_dense (&X, cm) ;

    /* ---------------------------------------------------------------------- */
    /* free matrices and finish CHOLMOD */
    /* ---------------------------------------------------------------------- */

    cholmod_free_sparse (&A, cm) ;
    cholmod_free_dense (&B, cm) ;
    cholmod_finish (cm) ;
    return (0) ;
}
Пример #7
0
 static cholmod_dense* allocate_dense(size_t nrow, size_t ncol, size_t d,
     int xtype, cholmod_common* c) {
   return cholmod_allocate_dense(nrow, ncol, d, xtype, c);
 }
int main (int argc, char **argv)
{
    double resid [4], t, ta, tf, ts [3], tot, bnorm, xnorm, anorm, rnorm, fl,
        anz, axbnorm, rnorm2, resid2, rcond ;
    FILE *f ;
    cholmod_sparse *A ;
    cholmod_dense *X = NULL, *B, *W, *R ;
    double one [2], zero [2], minusone [2], beta [2], xlnz ;
    cholmod_common Common, *cm ;
    cholmod_factor *L ;
    double *Bx, *Rx, *Xx ;
    int i, n, isize, xsize, ordering, xtype, s, ss, lnz ;
    int trial, method, L_is_super ;
    int ver [3] ;

    ts[0] = 0.;
    ts[1] = 0.;
    ts[2] = 0.;

    /* ---------------------------------------------------------------------- */
    /* get the file containing the input matrix */
    /* ---------------------------------------------------------------------- */

    ff = NULL ;
    if (argc > 1)
    {
	if ((f = fopen (argv [1], "r")) == NULL)
	{
	    my_handler (CHOLMOD_INVALID, __FILE__, __LINE__,
		    "unable to open file") ;
	}
	ff = f ;
    }
    else
    {
	f = stdin ;
    }

    /* ---------------------------------------------------------------------- */
    /* start CHOLMOD and set parameters */
    /* ---------------------------------------------------------------------- */

    cm = &Common ;
    cholmod_start (cm) ;
    CHOLMOD_FUNCTION_DEFAULTS (cm) ;    /* just for testing (not required) */

    /* use default parameter settings, except for the error handler.  This
     * demo program terminates if an error occurs (out of memory, not positive
     * definite, ...).  It makes the demo program simpler (no need to check
     * CHOLMOD error conditions).  This non-default parameter setting has no
     * effect on performance. */
    cm->error_handler = my_handler ;

    /* Note that CHOLMOD will do a supernodal LL' or a simplicial LDL' by
     * default, automatically selecting the latter if flop/nnz(L) < 40. */

    /* ---------------------------------------------------------------------- */
    /* create basic scalars */
    /* ---------------------------------------------------------------------- */

    zero [0] = 0 ;
    zero [1] = 0 ;
    one [0] = 1 ;
    one [1] = 0 ;
    minusone [0] = -1 ;
    minusone [1] = 0 ;
    beta [0] = 1e-6 ;
    beta [1] = 0 ;

    /* ---------------------------------------------------------------------- */
    /* read in a matrix */
    /* ---------------------------------------------------------------------- */

    printf ("\n---------------------------------- cholmod_demo:\n") ;
    cholmod_version (ver) ;
    printf ("cholmod version %d.%d.%d\n", ver [0], ver [1], ver [2]) ;
    SuiteSparse_version (ver) ;
    printf ("SuiteSparse version %d.%d.%d\n", ver [0], ver [1], ver [2]) ;
    A = cholmod_read_sparse (f, cm) ;
    if (ff != NULL)
    {
        fclose (ff) ;
        ff = NULL ;
    }
    anorm = cholmod_norm_sparse (A, 0, cm) ;
    xtype = A->xtype ;
    printf ("norm (A,inf) = %g\n", anorm) ;
    printf ("norm (A,1)   = %g\n", cholmod_norm_sparse (A, 1, cm)) ;
    cholmod_print_sparse (A, "A", cm) ;

    if (A->nrow > A->ncol)
    {
	/* Transpose A so that A'A+beta*I will be factorized instead */
	cholmod_sparse *C = cholmod_transpose (A, 2, cm) ;
	cholmod_free_sparse (&A, cm) ;
	A = C ;
	printf ("transposing input matrix\n") ;
    }

    /* ---------------------------------------------------------------------- */
    /* create an arbitrary right-hand-side */
    /* ---------------------------------------------------------------------- */

    n = A->nrow ;
    B = cholmod_zeros (n, 1, xtype, cm) ;
    Bx = B->x ;

#if GHS
    {
	/* b = A*ones(n,1), used by Gould, Hu, and Scott in their experiments */
	cholmod_dense *X0 ;
	X0 = cholmod_ones (A->ncol, 1, xtype, cm) ;
	cholmod_sdmult (A, 0, one, zero, X0, B, cm) ;
	cholmod_free_dense (&X0, cm) ;
    }
#else
    if (xtype == CHOLMOD_REAL)
    {
	/* real case */
	for (i = 0 ; i < n ; i++)
	{
	    double x = n ;
	    Bx [i] = 1 + i / x ;
	}
    }
    else
    {
	/* complex case */
	for (i = 0 ; i < n ; i++)
	{
	    double x = n ;
	    Bx [2*i  ] = 1 + i / x ;		/* real part of B(i) */
	    Bx [2*i+1] = (x/2 - i) / (3*x) ;	/* imag part of B(i) */
	}
    }
#endif

    cholmod_print_dense (B, "B", cm) ;
    bnorm = cholmod_norm_dense (B, 0, cm) ;	/* max norm */
    printf ("bnorm %g\n", bnorm) ;

    /* ---------------------------------------------------------------------- */
    /* analyze and factorize */
    /* ---------------------------------------------------------------------- */

    t = CPUTIME ;
    L = cholmod_analyze (A, cm) ;
    ta = CPUTIME - t ;
    ta = MAX (ta, 0) ;

    printf ("Analyze: flop %g lnz %g\n", cm->fl, cm->lnz) ;

    if (A->stype == 0)
    {
	printf ("Factorizing A*A'+beta*I\n") ;
	t = CPUTIME ;
	cholmod_factorize_p (A, beta, NULL, 0, L, cm) ;
	tf = CPUTIME - t ;
	tf = MAX (tf, 0) ;
    }
    else
    {
	printf ("Factorizing A\n") ;
	t = CPUTIME ;
	cholmod_factorize (A, L, cm) ;
	tf = CPUTIME - t ;
	tf = MAX (tf, 0) ;
    }

    cholmod_print_factor (L, "L", cm) ;

    /* determine the # of integers's and reals's in L.  See cholmod_free */
    if (L->is_super)
    {
	s = L->nsuper + 1 ;
	xsize = L->xsize ;
	ss = L->ssize ;
	isize =
	    n	/* L->Perm */
	    + n	/* L->ColCount, nz in each column of 'pure' L */
	    + s	/* L->pi, column pointers for L->s */
	    + s	/* L->px, column pointers for L->x */
	    + s	/* L->super, starting column index of each supernode */
	    + ss ;	/* L->s, the pattern of the supernodes */
    }
    else
    {
	/* this space can increase if you change parameters to their non-
	 * default values (cm->final_pack, for example). */
	lnz = L->nzmax ;
	xsize = lnz ;
	isize =
	    n	/* L->Perm */
	    + n	/* L->ColCount, nz in each column of 'pure' L */
	    + n+1	/* L->p, column pointers */
	    + lnz	/* L->i, integer row indices */
	    + n	/* L->nz, nz in each column of L */
	    + n+2	/* L->next, link list */
	    + n+2 ;	/* L->prev, link list */
    }

    /* solve with Bset will change L from simplicial to supernodal */
    rcond = cholmod_rcond (L, cm) ;
    L_is_super = L->is_super ;

    /* ---------------------------------------------------------------------- */
    /* solve */
    /* ---------------------------------------------------------------------- */

    for (method = 0 ; method <= 3 ; method++)
    {
        double x = n ;

        if (method == 0)
        {
            /* basic solve, just once */
            t = CPUTIME ;
            X = cholmod_solve (CHOLMOD_A, L, B, cm) ;
            ts [0] = CPUTIME - t ;
            ts [0] = MAX (ts [0], 0) ;
        }
        else if (method == 1)
        {
            /* basic solve, many times, but keep the last one */
            t = CPUTIME ;
            for (trial = 0 ; trial < NTRIALS ; trial++)
            {
                cholmod_free_dense (&X, cm) ;
                Bx [0] = 1 + trial / x ;        /* tweak B each iteration */
                X = cholmod_solve (CHOLMOD_A, L, B, cm) ;
            }
            ts [1] = CPUTIME - t ;
            ts [1] = MAX (ts [1], 0) / NTRIALS ;
        }
        else if (method == 2)
        {
            /* solve with reused workspace */
            cholmod_dense *Ywork = NULL, *Ework = NULL ;
            cholmod_free_dense (&X, cm) ;

            t = CPUTIME ;
            for (trial = 0 ; trial < NTRIALS ; trial++)
            {
                Bx [0] = 1 + trial / x ;        /* tweak B each iteration */
                cholmod_solve2 (CHOLMOD_A, L, B, NULL, &X, NULL,
                    &Ywork, &Ework, cm) ;
            }
            cholmod_free_dense (&Ywork, cm) ;
            cholmod_free_dense (&Ework, cm) ;
            ts [2] = CPUTIME - t ;
            ts [2] = MAX (ts [2], 0) / NTRIALS ;
        }
        else
        {
            /* solve with reused workspace and sparse Bset */
            cholmod_dense *Ywork = NULL, *Ework = NULL ;
            cholmod_dense *X2 = NULL, *B2 = NULL ;
            cholmod_sparse *Bset, *Xset = NULL ;
            int *Bsetp, *Bseti, *Xsetp, *Xseti, xlen, j, k, *Lnz ;
            double *X1x, *X2x, *B2x, err ;
            FILE *timelog = fopen ("timelog.m", "w") ;
            if (timelog) fprintf (timelog, "results = [\n") ;

            B2 = cholmod_zeros (n, 1, xtype, cm) ;
            B2x = B2->x ;

            Bset = cholmod_allocate_sparse (n, 1, 1, FALSE, TRUE, 0,
                CHOLMOD_PATTERN, cm) ;
            Bsetp = Bset->p ;
            Bseti = Bset->i ;
            Bsetp [0] = 0 ;     /* nnz(B) is 1 (it can be anything) */
            Bsetp [1] = 1 ;
            resid [3] = 0 ;

            for (i = 0 ; i < MIN (100,n) ; i++)
            {
                /* B (i) is nonzero, all other entries are ignored
                   (implied to be zero) */
                Bseti [0] = i ;
                if (xtype == CHOLMOD_REAL)
                {
                    B2x [i] = 3.1 * i + 0.9 ;
                }
                else
                {
                    B2x [2*i  ] = i + 0.042 ;
                    B2x [2*i+1] = i - 92.7 ;
                }

                /* first get the entire solution, to compare against */
                cholmod_solve2 (CHOLMOD_A, L, B2, NULL, &X, NULL,
                    &Ywork, &Ework, cm) ;

                /* now get the sparse solutions; this will change L from
                   supernodal to simplicial */

                if (i == 0)
                {
                    /* first solve can be slower because it has to allocate
                       space for X2, Xset, etc, and change L.
                       So don't time it */
                    cholmod_solve2 (CHOLMOD_A, L, B2, Bset, &X2, &Xset,
                        &Ywork, &Ework, cm) ;
                }

                t = CPUTIME ;
                for (trial = 0 ; trial < NTRIALS ; trial++)
                {
                    /* solve Ax=b but only to get x(i).
                       b is all zero except for b(i).
                       This takes O(xlen) time */
                    cholmod_solve2 (CHOLMOD_A, L, B2, Bset, &X2, &Xset,
                        &Ywork, &Ework, cm) ;
                }
                t = CPUTIME - t ;
                t = MAX (t, 0) / NTRIALS ;

                /* check the solution and log the time */
                Xsetp = Xset->p ;
                Xseti = Xset->i ;
                xlen = Xsetp [1] ;
                X1x = X->x ;
                X2x = X2->x ;
                Lnz = L->nz ;

                /*
                printf ("\ni %d xlen %d  (%p %p)\n", i, xlen, X1x, X2x) ;
                */

                if (xtype == CHOLMOD_REAL)
                {
                    fl = 2 * xlen ;
                    for (k = 0 ; k < xlen ; k++)
                    {
                        j = Xseti [k] ;
                        fl += 4 * Lnz [j] ;
                        err = X1x [j] - X2x [j] ;
                        err = ABS (err) ;
                        resid [3] = MAX (resid [3], err) ;
                    }
                }
                else
                {
                    fl = 16 * xlen ;
                    for (k = 0 ; k < xlen ; k++)
                    {
                        j = Xseti [k] ;
                        fl += 16 * Lnz [j] ;
                        err = X1x [2*j  ] - X2x [2*j  ] ;
                        err = ABS (err) ;
                        resid [3] = MAX (resid [3], err) ;
                        err = X1x [2*j+1] - X2x [2*j+1] ;
                        err = ABS (err) ;
                        resid [3] = MAX (resid [3], err) ;
                    }
                }
                if (timelog) fprintf (timelog, "%g %g %g %g\n",
                    (double) i, (double) xlen, fl, t);

                /* clear B for the next test */
                if (xtype == CHOLMOD_REAL)
                {
                    B2x [i] = 0 ;
                }
                else
                {
                    B2x [2*i  ] = 0 ;
                    B2x [2*i+1] = 0 ;
                }

            }

            if (timelog)
            {
                fprintf (timelog, "] ; resid = %g ;\n", resid [3]) ;
                fprintf (timelog, "lnz = %g ;\n", cm->lnz) ;
                fprintf (timelog, "t = %g ;   %% dense solve time\n", ts [2]) ;
                fclose (timelog) ;
            }

            resid [3] = resid [3] / cholmod_norm_dense (X, 1, cm) ;

            cholmod_free_dense (&Ywork, cm) ;
            cholmod_free_dense (&Ework, cm) ;
            cholmod_free_dense (&X2, cm) ;
            cholmod_free_dense (&B2, cm) ;
            cholmod_free_sparse (&Xset, cm) ;
            cholmod_free_sparse (&Bset, cm) ;
        }

        /* ------------------------------------------------------------------ */
        /* compute the residual */
        /* ------------------------------------------------------------------ */

        if (method < 3)
        {

            if (A->stype == 0)
            {
                /* (AA'+beta*I)x=b is the linear system that was solved */
                /* W = A'*X */
                W = cholmod_allocate_dense (A->ncol, 1, A->ncol, xtype, cm) ;
                cholmod_sdmult (A, 2, one, zero, X, W, cm) ;
                /* R = B - beta*X */
                R = cholmod_zeros (n, 1, xtype, cm) ;
                Rx = R->x ;
                Xx = X->x ;
                if (xtype == CHOLMOD_REAL)
                {
                    for (i = 0 ; i < n ; i++)
                    {
                        Rx [i] = Bx [i] - beta [0] * Xx [i] ;
                    }
                }
                else
                {
                    /* complex case */
                    for (i = 0 ; i < n ; i++)
                    {
                        Rx [2*i  ] = Bx [2*i  ] - beta [0] * Xx [2*i  ] ;
                        Rx [2*i+1] = Bx [2*i+1] - beta [0] * Xx [2*i+1] ;
                    }
                }
                /* R = A*W - R */
                cholmod_sdmult (A, 0, one, minusone, W, R, cm) ;
                cholmod_free_dense (&W, cm) ;
            }
            else
            {
                /* Ax=b was factorized and solved, R = B-A*X */
                R = cholmod_copy_dense (B, cm) ;
                cholmod_sdmult (A, 0, minusone, one, X, R, cm) ;
            }
            rnorm = cholmod_norm_dense (R, 0, cm) ;	    /* max abs. entry */
            xnorm = cholmod_norm_dense (X, 0, cm) ;	    /* max abs. entry */

            axbnorm = (anorm * xnorm + bnorm + ((n == 0) ? 1 : 0)) ;
            resid [method] = rnorm / axbnorm ;
        }
    }

    tot = ta + tf + ts [0] ;

    /* ---------------------------------------------------------------------- */
    /* iterative refinement (real symmetric case only) */
    /* ---------------------------------------------------------------------- */

    resid2 = -1 ;
    if (A->stype != 0 && A->xtype == CHOLMOD_REAL)
    {
	cholmod_dense *R2 ;

	/* R2 = A\(B-A*X) */
	R2 = cholmod_solve (CHOLMOD_A, L, R, cm) ;
	/* compute X = X + A\(B-A*X) */
	Xx = X->x ;
	Rx = R2->x ;
	for (i = 0 ; i < n ; i++)
	{
	    Xx [i] = Xx [i] + Rx [i] ;
	}
	cholmod_free_dense (&R2, cm) ;
	cholmod_free_dense (&R, cm) ;

	/* compute the new residual, R = B-A*X */
	R = cholmod_copy_dense (B, cm) ;
	cholmod_sdmult (A, 0, minusone, one, X, R, cm) ;
	rnorm2 = cholmod_norm_dense (R, 0, cm) ;
	resid2 = rnorm2 / axbnorm ;
    }

    cholmod_free_dense (&R, cm) ;

    /* ---------------------------------------------------------------------- */
    /* print results */
    /* ---------------------------------------------------------------------- */

    anz = cm->anz ;
    for (i = 0 ; i < CHOLMOD_MAXMETHODS ; i++)
    {
	fl = cm->method [i].fl ;
	xlnz = cm->method [i].lnz ;
	cm->method [i].fl = -1 ;
	cm->method [i].lnz = -1 ;
	ordering = cm->method [i].ordering ;
	if (fl >= 0)
	{
	    printf ("Ordering: ") ;
	    if (ordering == CHOLMOD_POSTORDERED) printf ("postordered ") ;
	    if (ordering == CHOLMOD_NATURAL)     printf ("natural ") ;
	    if (ordering == CHOLMOD_GIVEN)	     printf ("user    ") ;
	    if (ordering == CHOLMOD_AMD)	     printf ("AMD     ") ;
	    if (ordering == CHOLMOD_METIS)	     printf ("METIS   ") ;
	    if (ordering == CHOLMOD_NESDIS)      printf ("NESDIS  ") ;
	    if (xlnz > 0)
	    {
		printf ("fl/lnz %10.1f", fl / xlnz) ;
	    }
	    if (anz > 0)
	    {
		printf ("  lnz/anz %10.1f", xlnz / anz) ;
	    }
	    printf ("\n") ;
	}
    }

    printf ("ints in L: %15.0f, doubles in L: %15.0f\n",
        (double) isize, (double) xsize) ;
    printf ("factor flops %g nnz(L) %15.0f (w/no amalgamation)\n",
	    cm->fl, cm->lnz) ;
    if (A->stype == 0)
    {
	printf ("nnz(A):    %15.0f\n", cm->anz) ;
    }
    else
    {
	printf ("nnz(A*A'): %15.0f\n", cm->anz) ;
    }
    if (cm->lnz > 0)
    {
	printf ("flops / nnz(L):  %8.1f\n", cm->fl / cm->lnz) ;
    }
    if (anz > 0)
    {
	printf ("nnz(L) / nnz(A): %8.1f\n", cm->lnz / cm->anz) ;
    }
    printf ("analyze cputime:  %12.4f\n", ta) ;
    printf ("factor  cputime:   %12.4f mflop: %8.1f\n", tf,
	(tf == 0) ? 0 : (1e-6*cm->fl / tf)) ;
    printf ("solve   cputime:   %12.4f mflop: %8.1f\n", ts [0],
	(ts [0] == 0) ? 0 : (1e-6*4*cm->lnz / ts [0])) ;
    printf ("overall cputime:   %12.4f mflop: %8.1f\n", 
	    tot, (tot == 0) ? 0 : (1e-6 * (cm->fl + 4 * cm->lnz) / tot)) ;
    printf ("solve   cputime:   %12.4f mflop: %8.1f (%d trials)\n", ts [1],
	(ts [1] == 0) ? 0 : (1e-6*4*cm->lnz / ts [1]), NTRIALS) ;
    printf ("solve2  cputime:   %12.4f mflop: %8.1f (%d trials)\n", ts [2],
	(ts [2] == 0) ? 0 : (1e-6*4*cm->lnz / ts [2]), NTRIALS) ;
    printf ("peak memory usage: %12.0f (MB)\n",
	    (double) (cm->memory_usage) / 1048576.) ;
    printf ("residual (|Ax-b|/(|A||x|+|b|)): ") ;
    for (method = 0 ; method <= 3 ; method++)
    {
        printf ("%8.2e ", resid [method]) ;
    }
    printf ("\n") ;
    if (resid2 >= 0)
    {
	printf ("residual %8.1e (|Ax-b|/(|A||x|+|b|))"
		" after iterative refinement\n", resid2) ;
    }

    printf ("rcond    %8.1e\n\n", rcond) ;

    if (L_is_super)
    {
        cholmod_gpu_stats (cm) ;
    }

    cholmod_free_factor (&L, cm) ;
    cholmod_free_dense (&X, cm) ;

    /* ---------------------------------------------------------------------- */
    /* free matrices and finish CHOLMOD */
    /* ---------------------------------------------------------------------- */

    cholmod_free_sparse (&A, cm) ;
    cholmod_free_dense (&B, cm) ;
    cholmod_finish (cm) ;
    
    return (0) ;
}
BasicMesh MeshTransferer::transfer(const vector<PhGUtils::Matrix3x3d> &S1grad)
{
  if( !(S0set && T0set) ) {
    throw "S0 or T0 not set.";
  }

  auto &S = S1grad;
  auto &T = T0grad;

  int nfaces = S0.faces.nrow;
  int nverts = S0.verts.nrow;

  // assemble sparse matrix A
  int nrowsA = nfaces * 3;
  int nsv = stationary_vertices.size();
  int nrowsC = nsv;
  int nrows = nrowsA + nrowsC;
  int ncols = nverts;
  int ntermsA = nfaces*9;
  int ntermsC = stationary_vertices.size();
  int nterms = ntermsA + ntermsC;
  SparseMatrix A(nrows, ncols, nterms);
  // fill in the deformation gradient part
  for(int i=0, ioffset=0;i<nfaces;++i) {
    /*
     * Ai:
     *     1 2 3 4 5 ... nfaces*3
     *     1 2 3 4 5 ... nfaces*3
     *     1 2 3 4 5 ... nfaces*3
     * Ai = reshape(Ai, 1, nfaces*9)
     *
     * Aj = reshape(repmat(S0.faces', 3, 1), 1, nfaces*9)
     * Av = reshape(cell2mat(T)', 1, nfaces*9)
     */
    int *f = S0.faces.rowptr(i);

    auto Ti = T[i];

    A.append(ioffset, f[0], Ti(0));
    A.append(ioffset, f[1], Ti(1));
    A.append(ioffset, f[2], Ti(2));
    ++ioffset;

    A.append(ioffset, f[0], Ti(3));
    A.append(ioffset, f[1], Ti(4));
    A.append(ioffset, f[2], Ti(5));
    ++ioffset;

    A.append(ioffset, f[0], Ti(6));
    A.append(ioffset, f[1], Ti(7));
    A.append(ioffset, f[2], Ti(8));
    ++ioffset;
  }

  // fill in the lower part of A, stationary vertices part
  for(int i=0;i<nsv;++i) {
    A.append(nrowsA+i, stationary_vertices[i], 1);
  }

  ofstream fA("A.txt");
  fA<<A;
  fA.close();

  // fill in c matrix
  DenseMatrix c(nrows, 3);
  for(int i=0;i<3;++i) {
    for(int j=0, joffset=0;j<nfaces;++j) {
      auto &Sj = S[j];
      c(joffset, i) = Sj(0, i); ++joffset;
      c(joffset, i) = Sj(1, i); ++joffset;
      c(joffset, i) = Sj(2, i); ++joffset;
    }
  }
  for(int i=0;i<3;++i) {
    for(int j=0, joffset=nrowsA;j<nsv;++j,++joffset) {
      auto vj = T0.verts.rowptr(stationary_vertices[j]);
      c(joffset, i) = vj[i];
    }
  }

  cholmod_sparse *G = A.to_sparse();
  cholmod_sparse *Gt = cholmod_transpose(G, 2, global::cm);

  // compute GtD
  // just multiply Dsi to corresponding elemenets
  double *Gtx = (double*)Gt->x;
  const int* Gtp = (const int*)(Gt->p);
  for(int i=0;i<nrowsA;++i) {
    int fidx = i/3;
    for(int j=Gtp[i];j<Gtp[i+1];++j) {
      Gtx[j] *= Ds(fidx);
    }
  }

  // compute GtDG
  cholmod_sparse *GtDG = cholmod_ssmult(Gt, G, 0, 1, 1, global::cm);
  GtDG->stype = 1;

  // compute GtD * c
  cholmod_dense *GtDc = cholmod_allocate_dense(ncols, 3, ncols, CHOLMOD_REAL, global::cm);
  double alpha[2] = {1, 0}; double beta[2] = {0, 0};
  cholmod_sdmult(Gt, 0, alpha, beta, c.to_dense(), GtDc, global::cm);

  // solve for GtDG \ GtDc
  cholmod_factor *L = cholmod_analyze(GtDG, global::cm);
  cholmod_factorize(GtDG, L, global::cm);
  cholmod_dense *x = cholmod_solve(CHOLMOD_A, L, GtDc, global::cm);

  // make a copy of T0
  BasicMesh Td = T0;

  // change the vertices with x
  double *Vx = (double*)x->x;
  for(int i=0;i<nverts;++i) {
    Td.verts(i, 0) = Vx[i];
    Td.verts(i, 1) = Vx[i+nverts];
    Td.verts(i, 2) = Vx[i+nverts*2];
  }

  // release memory
  cholmod_free_sparse(&G, global::cm);
  cholmod_free_sparse(&Gt, global::cm);
  cholmod_free_sparse(&GtDG, global::cm);
  cholmod_free_dense(&GtDc, global::cm);
  cholmod_free_factor(&L, global::cm);
  cholmod_free_dense(&x, global::cm);

  return Td;
}