static void _get_local_tolerance(const cs_real_t vtx_coords[], double vtx_tolerance[], const cs_int_t n_faces, const cs_int_t face_vtx_idx[], const cs_int_t face_vtx_lst[], double fraction) { cs_lnum_t j, k, start, end, face_id, vtx_id1, vtx_id2; cs_real_t length, tolerance; cs_real_t a[3], b[3]; for (face_id = 0; face_id < n_faces; face_id++) { start = face_vtx_idx[face_id]; end = face_vtx_idx[face_id + 1]; /* Loop on the vertices of the face */ for (j = start; j < end - 1; j++) { vtx_id1 = face_vtx_lst[j]; vtx_id2 = face_vtx_lst[j+1]; for (k = 0; k < 3; k++) { a[k] = vtx_coords[3*vtx_id1 + k]; b[k] = vtx_coords[3*vtx_id2 + k]; } length = _compute_distance(a, b); tolerance = length * fraction; vtx_tolerance[vtx_id1] = CS_MIN(vtx_tolerance[vtx_id1], tolerance); vtx_tolerance[vtx_id2] = CS_MIN(vtx_tolerance[vtx_id2], tolerance); } /* Case end - start */ vtx_id1 = face_vtx_lst[end-1]; vtx_id2 = face_vtx_lst[start]; for (k = 0; k < 3; k++) { a[k] = vtx_coords[3*vtx_id1 + k]; b[k] = vtx_coords[3*vtx_id2 + k]; } length = _compute_distance(a, b); tolerance = length * fraction; vtx_tolerance[vtx_id1] = CS_MIN(vtx_tolerance[vtx_id1], tolerance); vtx_tolerance[vtx_id2] = CS_MIN(vtx_tolerance[vtx_id2], tolerance); } /* End of loop on faces */ }
/* sparse Cholesky update/downdate, L*L' + sigma*w*w' (sigma = +1 or -1) */ int cs_updown (cs *L, int sigma, const cs *C, const int *parent) { int n, p, f, j, *Lp, *Li, *Cp, *Ci ; double *Lx, *Cx, alpha, beta = 1, delta, gamma, w1, w2, *w, beta2 = 1 ; if (!CS_CSC (L) || !CS_CSC (C) || !parent) return (0) ; /* check inputs */ Lp = L->p ; Li = L->i ; Lx = L->x ; n = L->n ; Cp = C->p ; Ci = C->i ; Cx = C->x ; if ((p = Cp [0]) >= Cp [1]) return (1) ; /* return if C empty */ w = cs_malloc (n, sizeof (double)) ; /* get workspace */ if (!w) return (0) ; /* out of memory */ f = Ci [p] ; for ( ; p < Cp [1] ; p++) f = CS_MIN (f, Ci [p]) ; /* f = min (find (C)) */ for (j = f ; j != -1 ; j = parent [j]) w [j] = 0 ; /* clear workspace w */ for (p = Cp [0] ; p < Cp [1] ; p++) w [Ci [p]] = Cx [p] ; /* w = C */ for (j = f ; j != -1 ; j = parent [j]) /* walk path f up to root */ { p = Lp [j] ; alpha = w [j] / Lx [p] ; /* alpha = w(j) / L(j,j) */ beta2 = beta*beta + sigma*alpha*alpha ; if (beta2 <= 0) break ; /* not positive definite */ beta2 = sqrt (beta2) ; delta = (sigma > 0) ? (beta / beta2) : (beta2 / beta) ; gamma = sigma * alpha / (beta2 * beta) ; Lx [p] = delta * Lx [p] + ((sigma > 0) ? (gamma * w [j]) : 0) ; beta = beta2 ; for (p++ ; p < Lp [j+1] ; p++) { w1 = w [Li [p]] ; w [Li [p]] = w2 = w1 - alpha * Lx [p] ; Lx [p] = delta * Lx [p] + gamma * ((sigma > 0) ? w1 : w2) ; } } cs_free (w) ; return (beta2 > 0) ; }
static void _compute_minmax(cs_int_t n_vals, const cs_real_t var[], cs_real_t *min, cs_real_t *max) { cs_int_t i; cs_real_t _min = DBL_MAX, _max = -DBL_MAX; for (i = 0; i < n_vals; i++) { _min = CS_MIN(_min, var[i]); _max = CS_MAX(_max, var[i]); } #if defined(HAVE_MPI) if (cs_glob_n_ranks > 1) { MPI_Allreduce(&_min, min, 1, CS_MPI_REAL, MPI_MIN, cs_glob_mpi_comm); MPI_Allreduce(&_max, max, 1, CS_MPI_REAL, MPI_MAX, cs_glob_mpi_comm); } #endif if (cs_glob_n_ranks == 1) { *min = _min; *max = _max; } }
/* cs_lusol: solve A*x=b using a sparse LU factorization */ void mexFunction ( int nargout, mxArray *pargout [ ], int nargin, const mxArray *pargin [ ] ) { double tol ; CS_INT order ; if (nargout > 1 || nargin < 2 || nargin > 4) { mexErrMsgTxt ("Usage: x = cs_lusol(A,b,order,tol)") ; } order = (nargin < 3) ? 2 : mxGetScalar (pargin [2]) ; order = CS_MAX (order, 0) ; order = CS_MIN (order, 3) ; if (nargin == 2) { tol = 1 ; /* normal partial pivoting */ } else if (nargin == 3) { tol = (order == 1) ? 0.001 : 1 ; /* tol = 0.001 for amd(A+A') */ } else { tol = mxGetScalar (pargin [3]) ; } if (mxIsComplex (pargin [0]) || mxIsComplex (pargin [1])) { #ifndef NCOMPLEX cs_cl *A, Amatrix ; cs_complex_t *x ; A = cs_cl_mex_get_sparse (&Amatrix, 1, pargin [0]) ; /* get A */ x = cs_cl_mex_get_double (A->n, pargin [1]) ; /* x = b */ if (!cs_cl_lusol (order, A, x, tol)) /* x = A\x */ { mexErrMsgTxt ("failed (singular or out of memory)") ; } cs_cl_free (A->x) ; /* complex copy no longer needed */ pargout [0] = cs_cl_mex_put_double (A->n, x) ; /* return x */ #else mexErrMsgTxt ("complex matrices not supported") ; #endif } else { cs_dl *A, Amatrix ; double *x, *b ; A = cs_dl_mex_get_sparse (&Amatrix, 1, 1, pargin [0]) ; /* get A */ b = cs_dl_mex_get_double (A->n, pargin [1]) ; /* get b */ x = cs_dl_mex_put_double (A->n, b, &(pargout [0])) ; /* x = b */ if (!cs_dl_lusol (order, A, x, tol)) /* x = A\x */ { mexErrMsgTxt ("failed (singular or out of memory)") ; } } }
static void init_ata (cs *AT, const int *post, int *w, int **head, int **next) { int i, k, p, m = AT->n, n = AT->m, *ATp = AT->p, *ATi = AT->i ; *head = w+4*n, *next = w+5*n+1 ; for (k = 0 ; k < n ; k++) w [post [k]] = k ; /* invert post */ for (i = 0 ; i < m ; i++) { for (k = n, p = ATp[i] ; p < ATp[i+1] ; p++) k = CS_MIN (k, w [ATi[p]]); (*next) [i] = (*head) [k] ; /* place row i in linked list k */ (*head) [k] = i ; } }
/* cs_qrsol: solve least squares or underdetermined problem */ void mexFunction ( int nargout, mxArray *pargout [ ], int nargin, const mxArray *pargin [ ] ) { CS_INT k, order ; if (nargout > 1 || nargin < 2 || nargin > 3) { mexErrMsgTxt ("Usage: x = cs_qrsol(A,b,order)") ; } order = (nargin < 3) ? 3 : mxGetScalar (pargin [2]) ; order = CS_MAX (order, 0) ; order = CS_MIN (order, 3) ; if (mxIsComplex (pargin [0]) || mxIsComplex (pargin [1])) { #ifndef NCOMPLEX cs_cl *A, Amatrix ; cs_complex_t *x, *b ; A = cs_cl_mex_get_sparse (&Amatrix, 0, pargin [0]) ; /* get A */ b = cs_cl_mex_get_double (A->m, pargin [1]) ; /* get b */ x = cs_dl_calloc (CS_MAX (A->m, A->n), sizeof (cs_complex_t)) ; for (k = 0 ; k < A->m ; k++) x [k] = b [k] ; /* x = b */ cs_free (b) ; if (!cs_cl_qrsol (order, A, x)) /* x = A\x */ { mexErrMsgTxt ("QR solve failed") ; } pargout [0] = cs_cl_mex_put_double (A->n, x) ; /* return x */ #else mexErrMsgTxt ("complex matrices not supported") ; #endif } else { cs_dl *A, Amatrix ; double *x, *b ; A = cs_dl_mex_get_sparse (&Amatrix, 0, 1, pargin [0]) ; /* get A */ b = cs_dl_mex_get_double (A->m, pargin [1]) ; /* get b */ x = cs_dl_calloc (CS_MAX (A->m, A->n), sizeof (double)) ; /* x = b */ for (k = 0 ; k < A->m ; k++) x [k] = b [k] ; if (!cs_dl_qrsol (order, A, x)) /* x = A\x */ { mexErrMsgTxt ("QR solve failed") ; } cs_dl_mex_put_double (A->n, x, &(pargout [0])) ; /* return x */ cs_free (x) ; } }
/* find a maximum transveral */ int *cs_maxtrans (const cs *A, int seed) /*[jmatch [0..m-1]; imatch [0..n-1]]*/ { int i, j, k, n, m, p, n2 = 0, m2 = 0, *Ap, *jimatch, *w, *cheap, *js, *is, *ps, *Ai, *Cp, *jmatch, *imatch, *q ; cs *C ; if (!CS_CSC (A)) return (NULL) ; /* check inputs */ n = A->n ; m = A->m ; Ap = A->p ; Ai = A->i ; w = jimatch = cs_calloc (m+n, sizeof (int)) ; /* allocate result */ if (!jimatch) return (NULL) ; for (k = 0, j = 0 ; j < n ; j++) /* count nonempty rows and columns */ { n2 += (Ap [j] < Ap [j+1]) ; for (p = Ap [j] ; p < Ap [j+1] ; p++) { w [Ai [p]] = 1 ; k += (j == Ai [p]) ; /* count entries already on diagonal */ } } if (k == CS_MIN (m,n)) /* quick return if diagonal zero-free */ { jmatch = jimatch ; imatch = jimatch + m ; for (i = 0 ; i < k ; i++) jmatch [i] = i ; for ( ; i < m ; i++) jmatch [i] = -1 ; for (j = 0 ; j < k ; j++) imatch [j] = j ; for ( ; j < n ; j++) imatch [j] = -1 ; return (cs_idone (jimatch, NULL, NULL, 1)) ; } for (i = 0 ; i < m ; i++) m2 += w [i] ; C = (m2 < n2) ? cs_transpose (A,0) : ((cs *) A) ; /* transpose if needed */ if (!C) return (cs_idone (jimatch, (m2 < n2) ? C : NULL, NULL, 0)) ; n = C->n ; m = C->m ; Cp = C->p ; jmatch = (m2 < n2) ? jimatch + n : jimatch ; imatch = (m2 < n2) ? jimatch : jimatch + m ; w = cs_malloc (5*n, sizeof (int)) ; /* get workspace */ if (!w) return (cs_idone (jimatch, (m2 < n2) ? C : NULL, w, 0)) ; cheap = w + n ; js = w + 2*n ; is = w + 3*n ; ps = w + 4*n ; for (j = 0 ; j < n ; j++) cheap [j] = Cp [j] ; /* for cheap assignment */ for (j = 0 ; j < n ; j++) w [j] = -1 ; /* all columns unflagged */ for (i = 0 ; i < m ; i++) jmatch [i] = -1 ; /* nothing matched yet */ q = cs_randperm (n, seed) ; /* q = random permutation */ for (k = 0 ; k < n ; k++) /* augment, starting at column q[k] */ { cs_augment (q ? q [k]: k, C, jmatch, cheap, w, js, is, ps) ; } cs_free (q) ; for (j = 0 ; j < n ; j++) imatch [j] = -1 ; /* find row match */ for (i = 0 ; i < m ; i++) if (jmatch [i] >= 0) imatch [jmatch [i]] = i ; return (cs_idone (jimatch, (m2 < n2) ? C : NULL, w, 1)) ; }
/* sparse Cholesky update/downdate, L*L' + sigma*w*w' (sigma = +1 or -1) */ CS_INT cs_updown (cs *L, CS_INT sigma, const cs *C, const CS_INT *parent) { CS_INT n, p, f, j, *Lp, *Li, *Cp, *Ci ; CS_ENTRY *Lx, *Cx, alpha, gamma, w1, w2, *w ; double beta = 1, beta2 = 1, delta ; #ifdef CS_COMPLEX cs_complex_t phase ; #endif if (!CS_CSC (L) || !CS_CSC (C) || !parent) return (0) ; /* check inputs */ Lp = L->p ; Li = L->i ; Lx = L->x ; n = L->n ; Cp = C->p ; Ci = C->i ; Cx = C->x ; if ((p = Cp [0]) >= Cp [1]) return (1) ; /* return if C empty */ w = cs_malloc (n, sizeof (CS_ENTRY)) ; /* get workspace */ if (!w) return (0) ; /* out of memory */ f = Ci [p] ; for ( ; p < Cp [1] ; p++) f = CS_MIN (f, Ci [p]) ; /* f = min (find (C)) */ for (j = f ; j != -1 ; j = parent [j]) w [j] = 0 ; /* clear workspace w */ for (p = Cp [0] ; p < Cp [1] ; p++) w [Ci [p]] = Cx [p] ; /* w = C */ for (j = f ; j != -1 ; j = parent [j]) /* walk path f up to root */ { p = Lp [j] ; alpha = w [j] / Lx [p] ; /* alpha = w(j) / L(j,j) */ beta2 = beta*beta + sigma*alpha*CS_CONJ(alpha) ; if (beta2 <= 0) break ; /* not positive definite */ beta2 = sqrt (beta2) ; delta = (sigma > 0) ? (beta / beta2) : (beta2 / beta) ; gamma = sigma * CS_CONJ(alpha) / (beta2 * beta) ; Lx [p] = delta * Lx [p] + ((sigma > 0) ? (gamma * w [j]) : 0) ; beta = beta2 ; #ifdef CS_COMPLEX phase = CS_ABS (Lx [p]) / Lx [p] ; /* phase = abs(L(j,j))/L(j,j)*/ Lx [p] *= phase ; /* L(j,j) = L(j,j) * phase */ #endif for (p++ ; p < Lp [j+1] ; p++) { w1 = w [Li [p]] ; w [Li [p]] = w2 = w1 - alpha * Lx [p] ; Lx [p] = delta * Lx [p] + gamma * ((sigma > 0) ? w1 : w2) ; #ifdef CS_COMPLEX Lx [p] *= phase ; /* L(i,j) = L(i,j) * phase */ #endif } } cs_free (w) ; return (beta2 > 0) ; }
cs *cs_symperm(const cs *A, const int *pinv, int values) { int i, j, p, q, i2, j2, n, *Ap, *Ai, *Cp, *Ci, *w; double *Cx, *Ax; cs *C; if (!CS_CSC (A)) return (NULL); /* check inputs */ n = A->n; Ap = A->p; Ai = A->i; Ax = A->x; C = cs_spalloc(n, n, Ap[n], values && (Ax != NULL), 0); /* alloc result*/ w = (int *) cs_calloc(n, sizeof(int)); /* get workspace */ if (!C || !w) return (cs_done(C, w, NULL, 0)); /* out of memory */ Cp = C->p; Ci = C->i; Cx = C->x; for (j = 0; j < n; j++) /* count entries in each column of C */ { j2 = pinv ? pinv[j] : j; /* column j of A is column j2 of C */ for (p = Ap[j]; p < Ap[j + 1]; p++) { i = Ai[p]; if (i > j) continue; /* skip lower triangular part of A */ i2 = pinv ? pinv[i] : i; /* row i of A is row i2 of C */ w[CS_MAX (i2, j2)]++; /* column count of C */ } } cs_cumsum(Cp, w, n); /* compute column pointers of C */ for (j = 0; j < n; j++) { j2 = pinv ? pinv[j] : j; /* column j of A is column j2 of C */ for (p = Ap[j]; p < Ap[j + 1]; p++) { i = Ai[p]; if (i > j) continue; /* skip lower triangular part of A*/ i2 = pinv ? pinv[i] : i; /* row i of A is row i2 of C */ Ci[q = w[CS_MAX (i2, j2)]++] = CS_MIN (i2, j2); if (Cx) Cx[q] = Ax[p]; } } return (cs_done(C, w, NULL, 1)); /* success; free workspace, return C */ }
void mexFunction ( int nargout, mxArray *pargout [ ], int nargin, const mxArray *pargin [ ] ) { cs Amatrix, *A ; int m, n, mn, m2, n2, k, s, j, ij, sj, si, p, *Ap, *Ai ; double aij, *S, *Ax ; if (nargout > 1 || nargin < 1 || nargin > 2) { mexErrMsgTxt ("Usage: S = cs_thumb(A,k)") ; } A = cs_mex_get_sparse (&Amatrix, 0, 1, pargin [0]) ; /* get A */ m = A->m ; n = A->n ; mn = CS_MAX (m,n) ; k = (nargin == 1) ? 256 : mxGetScalar (pargin [1]) ; /* get k */ /* s = size of each submatrix; A(1:s,1:s) maps to S(1,1) */ s = (mn < k) ? 1 : (int) ceil ((double) mn / (double) k) ; m2 = (int) ceil ((double) m / (double) s) ; n2 = (int) ceil ((double) n / (double) s) ; /* create S */ pargout [0] = mxCreateDoubleMatrix (m2, n2, mxREAL) ; S = mxGetPr (pargout [0]) ; Ap = A->p ; Ai = A->i ; Ax = A->x ; for (j = 0 ; j < n ; j++) { sj = j/s ; for (p = Ap [j] ; p < Ap [j+1] ; p++) { si = Ai [p] / s ; ij = INDEX (si,sj,m2) ; aij = fabs (Ax [p]) ; if (ISNAN (aij)) aij = BIG_VALUE ; aij = CS_MIN (BIG_VALUE, aij) ; S [ij] = CS_MAX (S [ij], aij) ; } } }
/* symbolic ordering and analysis for LU */ css *csr_sqr (int order, const csr *A ) { int n, ok = 1; css *S ; if (!CS_CSC (A)) return (NULL) ; /* check inputs */ n = A->n ; S = (css*)calloc(1, sizeof (css)) ; /* allocate result S */ if (!S) return (NULL) ; /* out of memory */ S->q = csr_amd (order, A) ; /* fill-reducing ordering */ if (!S->q) { printf(" csr_sqr error no permutation\n"); } if (order && !S->q) return (csr_sfree (S)) ; /* LU factorization */ S->unz = (double) CS_MIN(4*(A->p [n]) + n, n * n ); S->lnz = S->unz ; /* guess nnz(L) and nnz(U) */ return (ok ? S : csr_sfree (S)) ; /* return result S */ }
/* cs_amd: approximate minimum degree ordering */ void mexFunction ( int nargout, mxArray *pargout [ ], int nargin, const mxArray *pargin [ ] ) { cs Amatrix, *A ; int *P, order ; if (nargout > 1 || nargin < 1 || nargin > 2) { mexErrMsgTxt ("Usage: p = cs_amd(A,order)") ; } A = cs_mex_get_sparse (&Amatrix, 0, 0, pargin [0]) ; /* get A */ order = (nargin > 1) ? mxGetScalar (pargin [1]) : 1 ; /* get ordering */ order = CS_MAX (order, 1) ; order = CS_MIN (order, 3) ; P = cs_amd (order, A) ; /* min. degree ordering */ pargout [0] = cs_mex_put_int (P, A->n, 1, 1) ; /* return P */ }
/* cs_lusol: solve A*x=b using a sparse LU factorization */ void mexFunction ( int nargout, mxArray *pargout [ ], int nargin, const mxArray *pargin [ ] ) { cs *A, Amatrix ; csi order ; double *x, *b, tol ; if (nargout > 1 || nargin < 2 || nargin > 4) { mexErrMsgTxt ("Usage: x = cs_lusol(A,b,order,tol)") ; } A = cs_mex_get_sparse (&Amatrix, 1, 1, pargin [0]) ; /* get A */ b = cs_mex_get_double (A->n, pargin [1]) ; /* get b */ x = cs_mex_put_double (A->n, b, &(pargout [0])) ; /* x = b */ order = (nargin < 3) ? 2 : mxGetScalar (pargin [2]) ; order = CS_MAX (order, 0) ; order = CS_MIN (order, 3) ; if (nargin == 2) { tol = 1 ; /* normal partial pivoting */ } else if (nargin == 3) { tol = (order == 1) ? 0.001 : 1 ; /* tol = 0.001 for amd(A+A') */ } else { tol = mxGetScalar (pargin [3]) ; } if (!cs_lusol (order, A, x, tol)) /* x = A\x */ { mexErrMsgTxt ("LU factorization failed (singular or out of memory)") ; } }
/* p = amd(A+A') if symmetric is true, or amd(A'A) otherwise */ CS_INT *cs_amd (CS_INT order, const cs *A) /* order 0:natural, 1:Chol, 2:LU, 3:QR */ { cs *C, *A2, *AT ; CS_INT *Cp, *Ci, *last, *W, *len, *nv, *next, *P, *head, *elen, *degree, *w, *hhead, *ATp, *ATi, d, dk, dext, lemax = 0, e, elenk, eln, i, j, k, k1, k2, k3, jlast, ln, dense, nzmax, mindeg = 0, nvi, nvj, nvk, mark, wnvi, ok, cnz, nel = 0, p, p1, p2, p3, p4, pj, pk, pk1, pk2, pn, q, n, m, t ; unsigned CS_INT h ; /* --- Construct matrix C ----------------------------------------------- */ if (!CS_CSC (A) || order <= 0 || order > 3) return (NULL) ; /* check */ AT = cs_transpose (A, 0) ; /* compute A' */ if (!AT) return (NULL) ; m = A->m ; n = A->n ; dense = CS_MAX (16, 10 * sqrt ((double) n)) ; /* find dense threshold */ dense = CS_MIN (n-2, dense) ; if (order == 1 && n == m) { C = cs_add (A, AT, 0, 0) ; /* C = A+A' */ } else if (order == 2) { ATp = AT->p ; /* drop dense columns from AT */ ATi = AT->i ; for (p2 = 0, j = 0 ; j < m ; j++) { p = ATp [j] ; /* column j of AT starts here */ ATp [j] = p2 ; /* new column j starts here */ if (ATp [j+1] - p > dense) continue ; /* skip dense col j */ for ( ; p < ATp [j+1] ; p++) ATi [p2++] = ATi [p] ; } ATp [m] = p2 ; /* finalize AT */ A2 = cs_transpose (AT, 0) ; /* A2 = AT' */ C = A2 ? cs_multiply (AT, A2) : NULL ; /* C=A'*A with no dense rows */ cs_spfree (A2) ; } else { C = cs_multiply (AT, A) ; /* C=A'*A */ } cs_spfree (AT) ; if (!C) return (NULL) ; cs_fkeep (C, &cs_diag, NULL) ; /* drop diagonal entries */ Cp = C->p ; cnz = Cp [n] ; P = cs_malloc (n+1, sizeof (CS_INT)) ; /* allocate result */ W = cs_malloc (8*(n+1), sizeof (CS_INT)) ; /* get workspace */ t = cnz + cnz/5 + 2*n ; /* add elbow room to C */ if (!P || !W || !cs_sprealloc (C, t)) return (cs_idone (P, C, W, 0)) ; len = W ; nv = W + (n+1) ; next = W + 2*(n+1) ; head = W + 3*(n+1) ; elen = W + 4*(n+1) ; degree = W + 5*(n+1) ; w = W + 6*(n+1) ; hhead = W + 7*(n+1) ; last = P ; /* use P as workspace for last */ /* --- Initialize quotient graph ---------------------------------------- */ for (k = 0 ; k < n ; k++) len [k] = Cp [k+1] - Cp [k] ; len [n] = 0 ; nzmax = C->nzmax ; Ci = C->i ; for (i = 0 ; i <= n ; i++) { head [i] = -1 ; /* degree list i is empty */ last [i] = -1 ; next [i] = -1 ; hhead [i] = -1 ; /* hash list i is empty */ nv [i] = 1 ; /* node i is just one node */ w [i] = 1 ; /* node i is alive */ elen [i] = 0 ; /* Ek of node i is empty */ degree [i] = len [i] ; /* degree of node i */ } mark = cs_wclear (0, 0, w, n) ; /* clear w */ elen [n] = -2 ; /* n is a dead element */ Cp [n] = -1 ; /* n is a root of assembly tree */ w [n] = 0 ; /* n is a dead element */ /* --- Initialize degree lists ------------------------------------------ */ for (i = 0 ; i < n ; i++) { d = degree [i] ; if (d == 0) /* node i is empty */ { elen [i] = -2 ; /* element i is dead */ nel++ ; Cp [i] = -1 ; /* i is a root of assembly tree */ w [i] = 0 ; } else if (d > dense) /* node i is dense */ { nv [i] = 0 ; /* absorb i into element n */ elen [i] = -1 ; /* node i is dead */ nel++ ; Cp [i] = CS_FLIP (n) ; nv [n]++ ; } else { if (head [d] != -1) last [head [d]] = i ; next [i] = head [d] ; /* put node i in degree list d */ head [d] = i ; } } while (nel < n) /* while (selecting pivots) do */ { /* --- Select node of minimum approximate degree -------------------- */ for (k = -1 ; mindeg < n && (k = head [mindeg]) == -1 ; mindeg++) ; if (next [k] != -1) last [next [k]] = -1 ; head [mindeg] = next [k] ; /* remove k from degree list */ elenk = elen [k] ; /* elenk = |Ek| */ nvk = nv [k] ; /* # of nodes k represents */ nel += nvk ; /* nv[k] nodes of A eliminated */ /* --- Garbage collection ------------------------------------------- */ if (elenk > 0 && cnz + mindeg >= nzmax) { for (j = 0 ; j < n ; j++) { if ((p = Cp [j]) >= 0) /* j is a live node or element */ { Cp [j] = Ci [p] ; /* save first entry of object */ Ci [p] = CS_FLIP (j) ; /* first entry is now CS_FLIP(j) */ } } for (q = 0, p = 0 ; p < cnz ; ) /* scan all of memory */ { if ((j = CS_FLIP (Ci [p++])) >= 0) /* found object j */ { Ci [q] = Cp [j] ; /* restore first entry of object */ Cp [j] = q++ ; /* new pointer to object j */ for (k3 = 0 ; k3 < len [j]-1 ; k3++) Ci [q++] = Ci [p++] ; } } cnz = q ; /* Ci [cnz...nzmax-1] now free */ } /* --- Construct new element ---------------------------------------- */ dk = 0 ; nv [k] = -nvk ; /* flag k as in Lk */ p = Cp [k] ; pk1 = (elenk == 0) ? p : cnz ; /* do in place if elen[k] == 0 */ pk2 = pk1 ; for (k1 = 1 ; k1 <= elenk + 1 ; k1++) { if (k1 > elenk) { e = k ; /* search the nodes in k */ pj = p ; /* list of nodes starts at Ci[pj]*/ ln = len [k] - elenk ; /* length of list of nodes in k */ } else { e = Ci [p++] ; /* search the nodes in e */ pj = Cp [e] ; ln = len [e] ; /* length of list of nodes in e */ } for (k2 = 1 ; k2 <= ln ; k2++) { i = Ci [pj++] ; if ((nvi = nv [i]) <= 0) continue ; /* node i dead, or seen */ dk += nvi ; /* degree[Lk] += size of node i */ nv [i] = -nvi ; /* negate nv[i] to denote i in Lk*/ Ci [pk2++] = i ; /* place i in Lk */ if (next [i] != -1) last [next [i]] = last [i] ; if (last [i] != -1) /* remove i from degree list */ { next [last [i]] = next [i] ; } else { head [degree [i]] = next [i] ; } } if (e != k) { Cp [e] = CS_FLIP (k) ; /* absorb e into k */ w [e] = 0 ; /* e is now a dead element */ } } if (elenk != 0) cnz = pk2 ; /* Ci [cnz...nzmax] is free */ degree [k] = dk ; /* external degree of k - |Lk\i| */ Cp [k] = pk1 ; /* element k is in Ci[pk1..pk2-1] */ len [k] = pk2 - pk1 ; elen [k] = -2 ; /* k is now an element */ /* --- Find set differences ----------------------------------------- */ mark = cs_wclear (mark, lemax, w, n) ; /* clear w if necessary */ for (pk = pk1 ; pk < pk2 ; pk++) /* scan 1: find |Le\Lk| */ { i = Ci [pk] ; if ((eln = elen [i]) <= 0) continue ;/* skip if elen[i] empty */ nvi = -nv [i] ; /* nv [i] was negated */ wnvi = mark - nvi ; for (p = Cp [i] ; p <= Cp [i] + eln - 1 ; p++) /* scan Ei */ { e = Ci [p] ; if (w [e] >= mark) { w [e] -= nvi ; /* decrement |Le\Lk| */ } else if (w [e] != 0) /* ensure e is a live element */ { w [e] = degree [e] + wnvi ; /* 1st time e seen in scan 1 */ } } } /* --- Degree update ------------------------------------------------ */ for (pk = pk1 ; pk < pk2 ; pk++) /* scan2: degree update */ { i = Ci [pk] ; /* consider node i in Lk */ p1 = Cp [i] ; p2 = p1 + elen [i] - 1 ; pn = p1 ; for (h = 0, d = 0, p = p1 ; p <= p2 ; p++) /* scan Ei */ { e = Ci [p] ; if (w [e] != 0) /* e is an unabsorbed element */ { dext = w [e] - mark ; /* dext = |Le\Lk| */ if (dext > 0) { d += dext ; /* sum up the set differences */ Ci [pn++] = e ; /* keep e in Ei */ h += e ; /* compute the hash of node i */ } else { Cp [e] = CS_FLIP (k) ; /* aggressive absorb. e->k */ w [e] = 0 ; /* e is a dead element */ } } } elen [i] = pn - p1 + 1 ; /* elen[i] = |Ei| */ p3 = pn ; p4 = p1 + len [i] ; for (p = p2 + 1 ; p < p4 ; p++) /* prune edges in Ai */ { j = Ci [p] ; if ((nvj = nv [j]) <= 0) continue ; /* node j dead or in Lk */ d += nvj ; /* degree(i) += |j| */ Ci [pn++] = j ; /* place j in node list of i */ h += j ; /* compute hash for node i */ } if (d == 0) /* check for mass elimination */ { Cp [i] = CS_FLIP (k) ; /* absorb i into k */ nvi = -nv [i] ; dk -= nvi ; /* |Lk| -= |i| */ nvk += nvi ; /* |k| += nv[i] */ nel += nvi ; nv [i] = 0 ; elen [i] = -1 ; /* node i is dead */ } else { degree [i] = CS_MIN (degree [i], d) ; /* update degree(i) */ Ci [pn] = Ci [p3] ; /* move first node to end */ Ci [p3] = Ci [p1] ; /* move 1st el. to end of Ei */ Ci [p1] = k ; /* add k as 1st element in of Ei */ len [i] = pn - p1 + 1 ; /* new len of adj. list of node i */ h %= n ; /* finalize hash of i */ next [i] = hhead [h] ; /* place i in hash bucket */ hhead [h] = i ; last [i] = h ; /* save hash of i in last[i] */ } } /* scan2 is done */ degree [k] = dk ; /* finalize |Lk| */ lemax = CS_MAX (lemax, dk) ; mark = cs_wclear (mark+lemax, lemax, w, n) ; /* clear w */ /* --- Supernode detection ------------------------------------------ */ for (pk = pk1 ; pk < pk2 ; pk++) { i = Ci [pk] ; if (nv [i] >= 0) continue ; /* skip if i is dead */ h = last [i] ; /* scan hash bucket of node i */ i = hhead [h] ; hhead [h] = -1 ; /* hash bucket will be empty */ for ( ; i != -1 && next [i] != -1 ; i = next [i], mark++) { ln = len [i] ; eln = elen [i] ; for (p = Cp [i]+1 ; p <= Cp [i] + ln-1 ; p++) w [Ci [p]] = mark; jlast = i ; for (j = next [i] ; j != -1 ; ) /* compare i with all j */ { ok = (len [j] == ln) && (elen [j] == eln) ; for (p = Cp [j] + 1 ; ok && p <= Cp [j] + ln - 1 ; p++) { if (w [Ci [p]] != mark) ok = 0 ; /* compare i and j*/ } if (ok) /* i and j are identical */ { Cp [j] = CS_FLIP (i) ; /* absorb j into i */ nv [i] += nv [j] ; nv [j] = 0 ; elen [j] = -1 ; /* node j is dead */ j = next [j] ; /* delete j from hash bucket */ next [jlast] = j ; } else { jlast = j ; /* j and i are different */ j = next [j] ; } } } } /* --- Finalize new element------------------------------------------ */ for (p = pk1, pk = pk1 ; pk < pk2 ; pk++) /* finalize Lk */ { i = Ci [pk] ; if ((nvi = -nv [i]) <= 0) continue ;/* skip if i is dead */ nv [i] = nvi ; /* restore nv[i] */ d = degree [i] + dk - nvi ; /* compute external degree(i) */ d = CS_MIN (d, n - nel - nvi) ; if (head [d] != -1) last [head [d]] = i ; next [i] = head [d] ; /* put i back in degree list */ last [i] = -1 ; head [d] = i ; mindeg = CS_MIN (mindeg, d) ; /* find new minimum degree */ degree [i] = d ; Ci [p++] = i ; /* place i in Lk */ } nv [k] = nvk ; /* # nodes absorbed into k */ if ((len [k] = p-pk1) == 0) /* length of adj list of element k*/ { Cp [k] = -1 ; /* k is a root of the tree */ w [k] = 0 ; /* k is now a dead element */ } if (elenk != 0) cnz = p ; /* free unused space in Lk */ } /* --- Postordering ----------------------------------------------------- */ for (i = 0 ; i < n ; i++) Cp [i] = CS_FLIP (Cp [i]) ;/* fix assembly tree */ for (j = 0 ; j <= n ; j++) head [j] = -1 ; for (j = n ; j >= 0 ; j--) /* place unordered nodes in lists */ { if (nv [j] > 0) continue ; /* skip if j is an element */ next [j] = head [Cp [j]] ; /* place j in list of its parent */ head [Cp [j]] = j ; } for (e = n ; e >= 0 ; e--) /* place elements in lists */ { if (nv [e] <= 0) continue ; /* skip unless e is an element */ if (Cp [e] != -1) { next [e] = head [Cp [e]] ; /* place e in list of its parent */ head [Cp [e]] = e ; } } for (k = 0, i = 0 ; i <= n ; i++) /* postorder the assembly tree */ { if (Cp [i] == -1) k = cs_tdfs (i, k, head, next, P, w) ; } return (cs_idone (P, C, W, 1)) ; }
static cs_real_t _unwarping_mvt(cs_mesh_t *mesh, cs_real_t *i_face_norm, cs_real_t *b_face_norm, cs_real_t *i_face_cog, cs_real_t *b_face_cog, cs_real_t *loc_vtx_mvt, cs_real_t *i_face_warp, cs_real_t *b_face_warp, cs_real_t *vtx_tolerance, double frac) { cs_lnum_t face_id, i; int coord_id; cs_lnum_t start_id, end_id, vtx; cs_real_t lambda; cs_real_t max_vtxtol = 0.; cs_real_t maxwarp = 0.; for (face_id = 0; face_id < mesh->n_i_faces; face_id++) if (maxwarp < i_face_warp[face_id]) maxwarp = i_face_warp[face_id]; for (face_id = 0; face_id < mesh->n_b_faces; face_id++) if (maxwarp < b_face_warp[face_id]) maxwarp = b_face_warp[face_id]; for (i = 0; i < mesh->n_vertices*3; i++) loc_vtx_mvt[i] = 0.0; for (i = 0; i < mesh->n_vertices; i++) if (vtx_tolerance[i] > max_vtxtol) max_vtxtol = vtx_tolerance[i]; #if defined(HAVE_MPI) if (cs_glob_n_ranks > 1) { cs_real_t maxpar[2]; cs_real_t _maxpar[2]; maxpar[0] = maxwarp; maxpar[1] = max_vtxtol; MPI_Allreduce(maxpar, _maxpar, 2, CS_MPI_REAL, MPI_MAX, cs_glob_mpi_comm); maxwarp = _maxpar[0]; max_vtxtol = _maxpar[1]; } #endif for (face_id = 0; face_id < mesh->n_b_faces; face_id++) { start_id = mesh->b_face_vtx_idx[face_id]; end_id = mesh->b_face_vtx_idx[face_id + 1]; for (i = start_id; i < end_id; i++) { vtx = mesh->b_face_vtx_lst[i]; lambda = 0.0; for (coord_id = 0; coord_id < 3; coord_id++) lambda += (mesh->vtx_coord[3*vtx + coord_id] - b_face_cog[3*face_id + coord_id]) * b_face_norm[3*face_id + coord_id]; for (coord_id = 0; coord_id < 3; coord_id++) { loc_vtx_mvt[vtx*3 + coord_id] -= lambda * b_face_norm[3*face_id + coord_id] * UNWARPING_MVT * (b_face_warp[face_id]/maxwarp) * (vtx_tolerance[vtx]/(max_vtxtol*frac)); } } } for (face_id = 0; face_id < mesh->n_i_faces; face_id++) { if (mesh->i_face_cells[face_id][0] < mesh->n_cells) { start_id = mesh->i_face_vtx_idx[face_id]; end_id = mesh->i_face_vtx_idx[face_id + 1]; for (i = start_id; i < end_id; i++) { vtx = mesh->i_face_vtx_lst[i]; lambda = 0.0; for (coord_id = 0; coord_id < 3; coord_id++) lambda += (mesh->vtx_coord[3*vtx + coord_id] - i_face_cog[3*face_id + coord_id]) * i_face_norm[3*face_id + coord_id]; for (coord_id = 0; coord_id < 3; coord_id++) { loc_vtx_mvt[vtx*3 + coord_id] -= lambda * i_face_norm[3*face_id + coord_id] * UNWARPING_MVT * (i_face_warp[face_id]/maxwarp) * (vtx_tolerance[vtx]/(max_vtxtol*frac)); } } } } if (mesh->vtx_interfaces != NULL) { /* Parallel or periodic treatment */ cs_interface_set_sum(mesh->vtx_interfaces, mesh->n_vertices, 3, true, CS_REAL_TYPE, loc_vtx_mvt); } for (i = 0; i < mesh->n_vertices; i++) for (coord_id = 0; coord_id < 3; coord_id++) loc_vtx_mvt[3*i + coord_id] = CS_MIN(loc_vtx_mvt[3*i + coord_id], vtx_tolerance[i]); return maxwarp; }
static void _get_global_tolerance(cs_mesh_t *mesh, cs_real_t *vtx_tolerance) { cs_int_t i, rank, vtx_id, block_size, shift; cs_gnum_t first_vtx_gnum; cs_lnum_t n_vertices = mesh->n_vertices; double *g_vtx_tolerance = NULL, *send_list = NULL, *recv_list = NULL; cs_int_t *send_count = NULL, *recv_count = NULL; cs_int_t *send_shift = NULL, *recv_shift = NULL; cs_gnum_t *send_glist = NULL, *recv_glist = NULL; cs_gnum_t n_g_vertices = mesh->n_g_vertices; const cs_gnum_t *io_gnum = mesh->global_vtx_num; MPI_Comm mpi_comm = cs_glob_mpi_comm; const int local_rank = CS_MAX(cs_glob_rank_id, 0); const int n_ranks = cs_glob_n_ranks; /* Define a fvm_io_num_t structure on vertices */ block_size = n_g_vertices / n_ranks; if (n_g_vertices % n_ranks > 0) block_size += 1; /* Count the number of vertices to send to each rank */ /* ------------------------------------------------- */ BFT_MALLOC(send_count, n_ranks, int); BFT_MALLOC(recv_count, n_ranks, int); BFT_MALLOC(send_shift, n_ranks + 1, int); BFT_MALLOC(recv_shift, n_ranks + 1, int); send_shift[0] = 0; recv_shift[0] = 0; for (rank = 0; rank < n_ranks; rank++) send_count[rank] = 0; for (i = 0; i < n_vertices; i++) { rank = (io_gnum[i] - 1)/block_size; send_count[rank] += 1; } MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, mpi_comm); for (rank = 0; rank < n_ranks; rank++) { send_shift[rank + 1] = send_shift[rank] + send_count[rank]; recv_shift[rank + 1] = recv_shift[rank] + recv_count[rank]; } assert(send_shift[n_ranks] == n_vertices); /* Send the global numbering for each vertex */ /* ----------------------------------------- */ BFT_MALLOC(send_glist, n_vertices, cs_gnum_t); BFT_MALLOC(recv_glist, recv_shift[n_ranks], cs_gnum_t); for (rank = 0; rank < n_ranks; rank++) send_count[rank] = 0; for (i = 0; i < n_vertices; i++) { rank = (io_gnum[i] - 1)/block_size; shift = send_shift[rank] + send_count[rank]; send_count[rank] += 1; send_glist[shift] = io_gnum[i]; } MPI_Alltoallv(send_glist, send_count, send_shift, CS_MPI_GNUM, recv_glist, recv_count, recv_shift, CS_MPI_GNUM, mpi_comm); /* Send the vertex tolerance for each vertex */ /* ----------------------------------------- */ BFT_MALLOC(send_list, n_vertices, double); BFT_MALLOC(recv_list, recv_shift[n_ranks], double); for (rank = 0; rank < n_ranks; rank++) send_count[rank] = 0; for (i = 0; i < n_vertices; i++) { rank = (io_gnum[i] - 1)/block_size; shift = send_shift[rank] + send_count[rank]; send_count[rank] += 1; send_list[shift] = vtx_tolerance[i]; } MPI_Alltoallv(send_list, send_count, send_shift, MPI_DOUBLE, recv_list, recv_count, recv_shift, MPI_DOUBLE, mpi_comm); /* Define the global tolerance array */ BFT_MALLOC(g_vtx_tolerance, block_size, double); for (i = 0; i < block_size; i++) g_vtx_tolerance[i] = DBL_MAX; first_vtx_gnum = block_size * local_rank + 1; for (i = 0; i < recv_shift[n_ranks]; i++) { vtx_id = recv_glist[i] - first_vtx_gnum; g_vtx_tolerance[vtx_id] = CS_MIN(g_vtx_tolerance[vtx_id], recv_list[i]); } /* Replace local vertex tolerance by the new computed global tolerance */ for (i = 0; i < recv_shift[n_ranks]; i++) { vtx_id = recv_glist[i] - first_vtx_gnum; recv_list[i] = g_vtx_tolerance[vtx_id]; } MPI_Alltoallv(recv_list, recv_count, recv_shift, MPI_DOUBLE, send_list, send_count, send_shift, MPI_DOUBLE, mpi_comm); for (rank = 0; rank < n_ranks; rank++) send_count[rank] = 0; for (i = 0; i < n_vertices; i++) { rank = (io_gnum[i] - 1)/block_size; shift = send_shift[rank] + send_count[rank]; send_count[rank] += 1; vtx_tolerance[i] = send_list[shift]; } /* Free memory */ BFT_FREE(recv_glist); BFT_FREE(send_glist); BFT_FREE(send_list); BFT_FREE(recv_list); BFT_FREE(recv_count); BFT_FREE(send_count); BFT_FREE(recv_shift); BFT_FREE(send_shift); BFT_FREE(g_vtx_tolerance); }