SEXP Csparse_dense_prod(SEXP a, SEXP b) { CHM_SP cha = AS_CHM_SP(a); SEXP b_M = PROTECT(mMatrix_as_dgeMatrix(b)); CHM_DN chb = AS_CHM_DN(b_M); CHM_DN chc = cholmod_l_allocate_dense(cha->nrow, chb->ncol, cha->nrow, chb->xtype, &c); SEXP dn = PROTECT(allocVector(VECSXP, 2)); double one[] = {1,0}, zero[] = {0,0}; int nprot = 2; R_CheckStack(); /* Tim Davis, please FIXME: currently (2010-11) *fails* when a is a pattern matrix:*/ if(cha->xtype == CHOLMOD_PATTERN) { /* warning(_("Csparse_dense_prod(): cholmod_sdmult() not yet implemented for pattern./ ngCMatrix" */ /* " --> slightly inefficient coercion")); */ // This *fails* to produce a CHOLMOD_REAL .. // CHM_SP chd = cholmod_l_copy(cha, cha->stype, CHOLMOD_REAL, &c); // --> use our Matrix-classes SEXP da = PROTECT(nz2Csparse(a, x_double)); nprot++; cha = AS_CHM_SP(da); } cholmod_l_sdmult(cha, 0, one, zero, chb, chc, &c); SET_VECTOR_ELT(dn, 0, /* establish dimnames */ duplicate(VECTOR_ELT(GET_SLOT(a, Matrix_DimNamesSym), 0))); SET_VECTOR_ELT(dn, 1, duplicate(VECTOR_ELT(GET_SLOT(b_M, Matrix_DimNamesSym), 1))); UNPROTECT(nprot); return chm_dense_to_SEXP(chc, 1, 0, dn); }
int main (int argc, char **argv) { cholmod_common Common, *cc ; cholmod_sparse *A ; cholmod_dense *X, *B, *Residual ; double rnorm, one [2] = {1,0}, minusone [2] = {-1,0} ; int mtype ; // start CHOLMOD cc = &Common ; cholmod_l_start (cc) ; // load A A = (cholmod_sparse *) cholmod_l_read_matrix (stdin, 1, &mtype, cc) ; // B = ones (size (A,1),1) B = cholmod_l_ones (A->nrow, 1, A->xtype, cc) ; // X = A\B X = SuiteSparseQR <double> (A, B, cc) ; // rnorm = norm (B-A*X) Residual = cholmod_l_copy_dense (B, cc) ; cholmod_l_sdmult (A, 0, minusone, one, X, Residual, cc) ; rnorm = cholmod_l_norm_dense (Residual, 2, cc) ; printf ("2-norm of residual: %8.1e\n", rnorm) ; printf ("rank %ld\n", cc->SPQR_istat [4]) ; // free everything and finish CHOLMOD cholmod_l_free_dense (&Residual, cc) ; cholmod_l_free_sparse (&A, cc) ; cholmod_l_free_dense (&X, cc) ; cholmod_l_free_dense (&B, cc) ; cholmod_l_finish (cc) ; return (0) ;
void check_residual ( cholmod_sparse *A, cholmod_dense *X, cholmod_dense *B, cholmod_common *cc ) { Long m = A->nrow ; Long n = A->ncol ; Long rnk ; double rnorm, anorm, xnorm, atrnorm ; double one [2] = {1,0}, minusone [2] = {-1,0}, zero [2] = {0,0} ; cholmod_dense *r, *atr ; // get the rank(A) estimate rnk = cc->SPQR_istat [4] ; // anorm = norm (A,1) ; anorm = cholmod_l_norm_sparse (A, 1, cc) ; // rnorm = norm (A*X-B) r = cholmod_l_copy_dense (B, cc) ; cholmod_l_sdmult (A, 0, one, minusone, X, r, cc) ; rnorm = cholmod_l_norm_dense (r, 2, cc) ; // xnorm = norm (X) xnorm = cholmod_l_norm_dense (X, 2, cc) ; // atrnorm = norm (A'*r) atr = cholmod_l_zeros (n, 1, r->xtype, cc) ; // atr = zeros (n,1) cholmod_l_sdmult (A, 1, one, zero, r, atr, cc) ; // atr = A'*r atrnorm = cholmod_l_norm_dense (atr, 2, cc) ; // atrnorm = norm (atr) if (anorm > 0) atrnorm /= anorm ; if (m <= n && anorm > 0 && xnorm > 0) { // find the relative residual, except for least-squares systems rnorm /= (anorm * xnorm) ; } printf ("relative norm(Ax-b): %8.1e rank: %6ld " "rel. norm(A'(Ax-b)) %8.1e\n", rnorm, rnk, atrnorm) ; cholmod_l_free_dense (&r, cc) ; cholmod_l_free_dense (&atr, cc) ; }
SEXP Csparse_dense_crossprod(SEXP a, SEXP b) { CHM_SP cha = AS_CHM_SP(a); SEXP b_M = PROTECT(mMatrix_as_dgeMatrix(b)); CHM_DN chb = AS_CHM_DN(b_M); CHM_DN chc = cholmod_l_allocate_dense(cha->ncol, chb->ncol, cha->ncol, chb->xtype, &c); SEXP dn = PROTECT(allocVector(VECSXP, 2)); int nprot = 2; double one[] = {1,0}, zero[] = {0,0}; R_CheckStack(); // -- see Csparse_dense_prod() above : if(cha->xtype == CHOLMOD_PATTERN) { SEXP da = PROTECT(nz2Csparse(a, x_double)); nprot++; cha = AS_CHM_SP(da); } cholmod_l_sdmult(cha, 1, one, zero, chb, chc, &c); SET_VECTOR_ELT(dn, 0, /* establish dimnames */ duplicate(VECTOR_ELT(GET_SLOT(a, Matrix_DimNamesSym), 1))); SET_VECTOR_ELT(dn, 1, duplicate(VECTOR_ELT(GET_SLOT(b_M, Matrix_DimNamesSym), 1))); UNPROTECT(nprot); return chm_dense_to_SEXP(chc, 1, 0, dn); }
int main() { /* Time Recording Variables */ int now_s, now_u; struct rusage usage; double time_now, time_past; /* x, y, z and global indices */ int i, j, k, m, n, nodes; /* Connectivity enties */ int m_above, m_below, m_left, m_right, m_front, m_back; double c_above, c_below, c_left, c_right, c_front, c_back, c_self; /* Creat cholmod object */ cholmod_common Common, *cc; /* Compress column form sparse matrix */ cholmod_sparse *S, *ST; /* forcing function on input; voltages after solving */ cholmod_dense *b, *v0, *r; size_t *Si, *Sp, *Snz, *bi, *bj; double *Sx, *bx, *v0x; /* store the sparse matrix */ FILE *store; char filename[100]; /* start CHOLMOD */ cc = &Common; cholmod_l_start(cc); /* initialize timing variables*/ getrusage(0, &usage); now_s = usage.ru_utime.tv_sec; now_u = usage.ru_utime.tv_usec; time_past = now_s + now_u / 1.e6; time_now = time_past; /* total nodes in the grid */ nodes = IMAX * JMAX * KMAX; /* Allocate space for connectivity matrix and forcing vector. */ S = cholmod_l_allocate_sparse(nodes, nodes, 7 * nodes, 0, 0, 0, CHOLMOD_REAL, cc); b = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc); bx = b->x; v0 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc); v0x = v0->x; /*================================================================*/ /*=============== make connectivity matrix ======================*/ /*================================================================*/ Si = (size_t *) S->i; Sp = (size_t *) S->p; Sx = (double *) S->x; Snz = (size_t *) S->nz; n = 0; Sp[0] = 0; for (k = 0; k < KMAX; k++) { for (j = 0; j < JMAX; j++) { for (i = 0; i < IMAX; i++) { /* Global index in x-fastest order*/ m = k * IMAX * JMAX + j * IMAX + i; Sp[m + 1] = Sp[m]; v0x[m] = (k + 1.0) / (KMAX + 1.0); /* Six coef.s of every node */ c_below = (mepr(i - 0.5, j - 0.5, k - 0.5) + mepr(i + 0.5, j - 0.5, k - 0.5) + mepr(i - 0.5, j + 0.5, k - 0.5) + mepr(i + 0.5, j + 0.5, k - 0.5)) / 4.0; c_front = (mepr(i - 0.5, j - 0.5, k - 0.5) + mepr(i + 0.5, j - 0.5, k - 0.5) + mepr(i - 0.5, j - 0.5, k + 0.5) + mepr(i + 0.5, j - 0.5, k + 0.5)) / 4.0; c_left = (mepr(i - 0.5, j + 0.5, k - 0.5) + mepr(i - 0.5, j - 0.5, k - 0.5) + mepr(i - 0.5, j + 0.5, k + 0.5) + mepr(i - 0.5, j - 0.5, k + 0.5)) / 4.0; c_right = (mepr(i + 0.5, j + 0.5, k - 0.5) + mepr(i + 0.5, j - 0.5, k - 0.5) + mepr(i + 0.5, j + 0.5, k + 0.5) + mepr(i + 0.5, j - 0.5, k + 0.5)) / 4.0; c_back = (mepr(i - 0.5, j + 0.5, k - 0.5) + mepr(i + 0.5, j + 0.5, k - 0.5) + mepr(i - 0.5, j + 0.5, k + 0.5) + mepr(i + 0.5, j + 0.5, k + 0.5)) / 4.0; c_above = (mepr(i - 0.5, j - 0.5, k + 0.5) + mepr(i + 0.5, j - 0.5, k + 0.5) + mepr(i - 0.5, j + 0.5, k + 0.5) + mepr(i + 0.5, j + 0.5, k + 0.5))/4.0; /* Self term. */ c_self = -(c_above + c_below + c_left + c_right + c_front + c_back); Si[n] = m; Sx[n] = c_self; n++; Sp[m + 1]++; /* Node below. Ensure not on bottom face */ if (k != 0) { m_below = m - IMAX * JMAX; Si[n] = m_below; Sx[n] = c_below; n++; Sp[m + 1]++; } else { bx[m] = -c_below * VBOT; } /* Node front. Ensure not on front face. */ if (j != 0) { m_front = m - IMAX; } else { m_front = m + (JMAX - 1) * IMAX; } Si[n] = m_front; Sx[n] = c_front; n++; Sp[m + 1]++; /* Node to left. Ensure not on left face. */ if (i != 0) { m_left = m - 1; } else { m_left = m + IMAX -1; } Si[n] = m_left; Sx[n] = c_left; n++; Sp[m + 1]++; /* Node to right. Ensure not on right face. */ if (i != IMAX - 1) { m_right = m + 1; } else{ m_right = m - IMAX + 1; } Si[n] = m_right; Sx[n] = c_right; n++; Sp[m + 1]++; /* Node back. Ensure not on back face. */ if (j != JMAX - 1) { m_back = m + IMAX; } else { m_back = m - (JMAX - 1) * IMAX; } Si[n] = m_back; Sx[n] = c_back; n++; Sp[m + 1]++; /* Node top. Ensure not on top face */ if (k != KMAX - 1) { m_above = m + IMAX * JMAX; Si[n] = m_above; Sx[n] = c_above; n++; Sp[m + 1]++; } else { bx[m] = -c_above * VTOP; } Snz[m] = Sp[m + 1] - Sp[m]; } } } /*====================================================================*/ /*===================Done creating connectivity matrix.===============*/ /*====================================================================*/ /* report time*/ getrusage(0, &usage); now_s = usage.ru_utime.tv_sec; now_u = usage.ru_utime.tv_usec; time_now = now_s + now_u / 1.e6; printf("\nFinished creating connectivity matrix\n" " Incremental time %f\n" " Running time %f\n", time_now - time_past, time_now); time_past = time_now; /* Print all three matrixes */ cholmod_l_print_sparse(S, "S", cc); cholmod_l_print_dense(b, "b", cc); cholmod_l_print_dense(v0, "v0", cc); /* Allocate residual vector */ r = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc); /* The Preconditioned Conjugate Gradient Method */ /* Calculate the first residual value */ double one[2], zero[2], minusone[2]; zero[0] = 0.0; zero[1] = 0.0; one[0] = 1.0; one[1] = 0.0; minusone[0] = -1.0; minusone[1] = 0.0; cholmod_l_copy_dense2(b, r, cc); cholmod_l_sdmult(S, 0, minusone, one, v0, r, cc); printf("Initial 2-norm = %f\n", cholmod_l_norm_dense(r, 2, cc)); printf("Initial 1-norm = %f\n", cholmod_l_norm_dense(r, 1, cc)); printf("Initial 0-norm = %f\n", cholmod_l_norm_dense(r, 0, cc)); /* The iteration */ double rho1, rho0, beta = 0.0, alpha = 0.0; cholmod_dense *p1, *p0, *q; double *p1x, *p0x, *qx, *rx; /* p1 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc); p0 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc); q = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc); */ p1 = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc); p0 = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc); q = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc); p1x = (double *) p1->x; p0x = (double *) p0->x; qx = (double *) q->x; rx = (double *) r->x; int iter; for (iter = 0; iter < MAX_ITER; iter++) { if (cholmod_l_norm_dense(r, 0, cc) < 1e-10) break; rho1 = cholmod_l_norm_dense(r, 2, cc); rho1 = rho1 * rho1; if (iter == 0) { cholmod_l_copy_dense2(r, p1, cc); } else { beta = rho1 / rho0; for (i = 0; i < nodes; i++) { p1x[i] = rx[i] + beta * p0x[i]; } } cholmod_l_sdmult(S, 0, one, zero, p1, q, cc); alpha = 0; for (i = 0; i < nodes; i++) { alpha += p1x[i] * qx[i]; } alpha = rho1 / alpha; for (i = 0; i < nodes; i++) { v0x[i] += alpha * p1x[i]; rx[i] -= alpha * qx[i]; } /* printf("iter = %d\n: rho1 = %f, rho0 = %f, alpha = %f, beta = %f\n", iter, rho1, rho0, alpha, beta); */ cholmod_l_copy_dense2(p1, p0, cc); rho0 = rho1; } cholmod_l_copy_dense2(b, r, cc); cholmod_l_sdmult(S, 0, minusone, one, v0, r, cc); printf("After %d iterations:\n", iter); printf("Final 2-norm: %f\n", cholmod_l_norm_dense(r, 2, cc)); printf("Final 1-norm: %f\n", cholmod_l_norm_dense(r, 1, cc)); printf("Final 0-norm: %f\n", cholmod_l_norm_dense(r, 0, cc)); /* sort sparse matrix and store it cholmod_l_sort(S, cc); S->stype = 0; */ /* Check if S is symmetric ST = cholmod_l_transpose(S, 2, cc); store = fopen("LpT.dat", "w"); cholmod_l_write_sparse(store, ST, NULL, NULL, cc); fclose(store); cholmod_l_free_sparse(&ST, cc); */ /* store the sparse matrix */ /* store = fopen("Laplace.dat", "w"); cholmod_l_write_sparse(store, S, NULL, NULL, cc); fclose(store); cholmod_l_free_sparse(&S, cc); */ /* store the force vector */ /* store = fopen("Force.dat", "w"); cholmod_l_write_dense(store, b, NULL, cc); fclose(store); cholmod_l_free_dense(&b,cc); */ /* store the guess vector */ store = fopen("Voltage.dat", "w"); cholmod_l_write_dense(store, v0, NULL, cc); fclose(store); cholmod_l_free_dense(&v0, cc); /* store the residual vector */ store = fopen("Residual.dat", "w"); cholmod_l_write_dense(store, r, NULL, cc); fclose(store); cholmod_l_free_dense(&r, cc); /* report time*/ getrusage(0, &usage); now_s = usage.ru_utime.tv_sec; now_u = usage.ru_utime.tv_usec; time_now = now_s + now_u / 1.e6; printf("\nFinished writing matrix\n" " Incremental time %f\n" " Running time %f\n", time_now - time_past, time_now); cholmod_l_finish(cc); return 0; }
void mexFunction ( int nargout, mxArray *pargout [ ], int nargin, const mxArray *pargin [ ] ) { double dummy = 0, one [2] = {1,0}, zero [2] = {0,0} ; cholmod_sparse *S, Smatrix ; cholmod_dense *F, Fmatrix, *C ; cholmod_common Common, *cm ; Long srow, scol, frow, fcol, crow, transpose ; /* ---------------------------------------------------------------------- */ /* start CHOLMOD and set parameters */ /* ---------------------------------------------------------------------- */ cm = &Common ; cholmod_l_start (cm) ; sputil_config (SPUMONI, cm) ; /* ---------------------------------------------------------------------- */ /* check inputs */ /* ---------------------------------------------------------------------- */ if (nargout > 1 || nargin < 2 || nargin > 3) { mexErrMsgTxt ("Usage: C = sdmult (S,F,transpose)") ; } srow = mxGetM (pargin [0]) ; scol = mxGetN (pargin [0]) ; frow = mxGetM (pargin [1]) ; fcol = mxGetN (pargin [1]) ; transpose = !((nargin == 2) || (mxGetScalar (pargin [2]) == 0)) ; if (frow != (transpose ? srow : scol)) { mexErrMsgTxt ("invalid inner dimensions") ; } if (!mxIsSparse (pargin [0]) || mxIsSparse (pargin [1])) { mexErrMsgTxt ("sdmult (S,F): S must be sparse, F must be full") ; } /* ---------------------------------------------------------------------- */ /* get S and F */ /* ---------------------------------------------------------------------- */ S = sputil_get_sparse (pargin [0], &Smatrix, &dummy, 0) ; F = sputil_get_dense (pargin [1], &Fmatrix, &dummy) ; /* ---------------------------------------------------------------------- */ /* C = S*F or S'*F */ /* ---------------------------------------------------------------------- */ crow = transpose ? scol : srow ; C = cholmod_l_allocate_dense (crow, fcol, crow, F->xtype, cm) ; cholmod_l_sdmult (S, transpose, one, zero, F, C, cm) ; pargout [0] = sputil_put_dense (&C, cm) ; /* ---------------------------------------------------------------------- */ /* free workspace and the CHOLMOD L, except for what is copied to MATLAB */ /* ---------------------------------------------------------------------- */ cholmod_l_finish (cm) ; cholmod_l_print_common (" ", cm) ; /* if (cm->malloc_count != (mxIsComplex (pargout [0]) + 1)) mexErrMsgTxt ("!"); */ }
int main (int argc, char **argv) { cholmod_sparse *A ; cholmod_dense *X, *B, *r, *atr ; double anorm, xnorm, rnorm, one [2] = {1,0}, minusone [2] = {-1,0}, t ; double zero [2] = {0,0}, atrnorm ; int mtype ; long m, n, rnk ; size_t total_mem, available_mem ; // start CHOLMOD cholmod_common *cc, Common ; cc = &Common ; cholmod_l_start (cc) ; // warmup the GPU. This can take some time, but only needs // to be done once cc->useGPU = true ; t = SuiteSparse_time ( ) ; cholmod_l_gpu_memorysize (&total_mem, &available_mem, cc) ; cc->gpuMemorySize = available_mem ; t = SuiteSparse_time ( ) - t ; if (cc->gpuMemorySize <= 1) { printf ("no GPU available\n") ; } printf ("available GPU memory: %g MB, warmup time: %g\n", (double) (cc->gpuMemorySize) / (1024 * 1024), t) ; // A = mread (stdin) ; read in the sparse matrix A const char *filename = (argc < 2 ? "Problems/2.mtx" : argv[1]); FILE *file = fopen(filename, "r"); A = (cholmod_sparse *) cholmod_l_read_matrix (file, 1, &mtype, cc) ; fclose(file); if (mtype != CHOLMOD_SPARSE) { printf ("input matrix must be sparse\n") ; exit (1) ; } // [m n] = size (A) ; m = A->nrow ; n = A->ncol ; long ordering = (argc < 3 ? SPQR_ORDERING_DEFAULT : atoi(argv[2])); #if 1 printf ("Matrix %6ld-by-%-6ld nnz: %6ld\n", m, n, cholmod_l_nnz (A, cc)) ; #endif // anorm = norm (A,1) ; anorm = cholmod_l_norm_sparse (A, 1, cc) ; // B = ones (m,1), a dense right-hand-side of the same type as A B = cholmod_l_ones (m, 1, A->xtype, cc) ; // X = A\B ; with default ordering and default column 2-norm tolerance if (A->xtype == CHOLMOD_REAL) { // A, X, and B are all real X = SuiteSparseQR <double>(ordering, SPQR_NO_TOL, A, B, cc) ; } else { #if SUPPORTS_COMPLEX // A, X, and B are all complex X = SuiteSparseQR < std::complex<double> > (SPQR_ORDERING_DEFAULT, SPQR_NO_TOL, A, B, cc) ; #else printf("Code doesn't support std::complex<?> types.\n"); #endif } // get the rank(A) estimate rnk = cc->SPQR_istat [4] ; // compute the residual r, and A'*r, and their norms r = cholmod_l_copy_dense (B, cc) ; // r = B cholmod_l_sdmult (A, 0, one, minusone, X, r, cc) ; // r = A*X-r = A*x-b rnorm = cholmod_l_norm_dense (r, 2, cc) ; // rnorm = norm (r) atr = cholmod_l_zeros (n, 1, CHOLMOD_REAL, cc) ; // atr = zeros (n,1) cholmod_l_sdmult (A, 1, one, zero, r, atr, cc) ; // atr = A'*r atrnorm = cholmod_l_norm_dense (atr, 2, cc) ; // atrnorm = norm (atr) // xnorm = norm (X) xnorm = cholmod_l_norm_dense (X, 2, cc) ; // write out X to a file FILE *f = fopen ("X.mtx", "w") ; cholmod_l_write_dense (f, X, NULL, cc) ; fclose (f) ; if (m <= n && anorm > 0 && xnorm > 0) { // find the relative residual, except for least-squares systems rnorm /= (anorm * xnorm) ; } printf ("\nnorm(Ax-b): %8.1e\n", rnorm) ; printf ("norm(A'(Ax-b)) %8.1e rank: %ld of %ld\n", atrnorm, rnk, (m < n) ? m:n) ; /* Write an info file. */ FILE *info = fopen("gpu_results.txt", "w"); fprintf(info, "%ld\n", cc->SPQR_istat[7]); // ordering method fprintf(info, "%ld\n", cc->memory_usage); // memory usage (bytes) fprintf(info, "%30.16e\n", cc->SPQR_flopcount); // flop count fprintf(info, "%lf\n", cc->SPQR_analyze_time); // analyze time fprintf(info, "%lf\n", cc->SPQR_factorize_time); // factorize time fprintf(info, "-1\n") ; // cpu memory (bytes) fprintf(info, "-1\n") ; // gpu memory (bytes) fprintf(info, "%32.16e\n", rnorm); // residual fprintf(info, "%ld\n", cholmod_l_nnz (A, cc)); // nnz(A) fprintf(info, "%ld\n", cc->SPQR_istat [0]); // nnz(R) fprintf(info, "%ld\n", cc->SPQR_istat [2]); // # of frontal matrices fprintf(info, "%ld\n", cc->SPQR_istat [3]); // ntasks, for now fprintf(info, "%lf\n", cc->gpuKernelTime); // kernel time (ms) fprintf(info, "%ld\n", cc->gpuFlops); // "actual" gpu flops fprintf(info, "%d\n", cc->gpuNumKernelLaunches); // # of kernel launches fprintf(info, "%32.16e\n", atrnorm) ; // norm (A'*(Ax-b)) fclose(info); // free everything cholmod_l_free_dense (&r, cc) ; cholmod_l_free_dense (&atr, cc) ; cholmod_l_free_sparse (&A, cc) ; cholmod_l_free_dense (&X, cc) ; cholmod_l_free_dense (&B, cc) ; cholmod_l_finish (cc) ; return (0) ; }