/* * Convert a row compressed storage into a column compressed storage. */ void zCompRow_to_CompCol(int m, int n, int nnz, doublecomplex *a, int *colind, int *rowptr, doublecomplex **at, int **rowind, int **colptr) { register int i, j, col, relpos; int *marker; /* Allocate storage for another copy of the matrix. */ *at = (doublecomplex *) doublecomplexMalloc(nnz); *rowind = (int *) intMalloc(nnz); *colptr = (int *) intMalloc(n+1); marker = (int *) intCalloc(n); /* Get counts of each column of A, and set up column pointers */ for (i = 0; i < m; ++i) for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]]; (*colptr)[0] = 0; for (j = 0; j < n; ++j) { (*colptr)[j+1] = (*colptr)[j] + marker[j]; marker[j] = (*colptr)[j]; } /* Transfer the matrix into the compressed column storage. */ for (i = 0; i < m; ++i) { for (j = rowptr[i]; j < rowptr[i+1]; ++j) { col = colind[j]; relpos = marker[col]; (*rowind)[relpos] = i; (*at)[relpos] = a[j]; ++marker[col]; } } SUPERLU_FREE(marker); }
pdgstrf_threadarg_t * pdgstrf_thread_init(SuperMatrix *A, SuperMatrix *L, SuperMatrix *U, pdgstrf_options_t *pdgstrf_options, pxgstrf_shared_t *pxgstrf_shared, Gstat_t *Gstat, int *info) { /* * -- SuperLU MT routine (version 1.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, * and Lawrence Berkeley National Lab. * August 15, 1997 * * Purpose * ======= * * pdgstrf_thread_init() initializes the parallel data structures * for the multithreaded routine pdgstrf_thread(). * * Arguments * ========= * * A (input) SuperMatrix* * Original matrix A, permutated by columns, of dimension * (A->nrow, A->ncol). The type of A can be: * Stype = NCP; Dtype = _D; Mtype = GE. * * L (input) SuperMatrix* * If pdgstrf_options->refact = YES, then use the existing * storage in L to perform LU factorization; * Otherwise, L is not accessed. L has types: * Stype = SCP, Dtype = _D, Mtype = TRLU. * * U (input) SuperMatrix* * If pdgstrf_options->refact = YES, then use the existing * storage in U to perform LU factorization; * Otherwise, U is not accessed. U has types: * Stype = NCP, Dtype = _D, Mtype = TRU. * * pdgstrf_options (input) pdgstrf_options_t* * The structure contains the parameters to control how the * factorization is performed; * See pdgstrf_options_t structure defined in pdsp_defs.h. * * pxgstrf_shared (output) pxgstrf_shared_t* * The structure contains the shared task queue and the * synchronization variables for parallel factorization. * See pxgstrf_shared_t structure defined in pdsp_defs.h. * * Gstat (output) Gstat_t* * Record all the statistics about the factorization; * See Gstat_t structure defined in util.h. * * info (output) int* * = 0: successful exit * > 0: if pdgstrf_options->lwork = -1, info returns the estimated * amount of memory (in bytes) required; * Otherwise, it returns the number of bytes allocated when * memory allocation failure occurred, plus A->ncol. * */ static GlobalLU_t Glu; /* persistent to support repeated factors. */ pdgstrf_threadarg_t *pdgstrf_threadarg; register int n, i, nprocs; NCPformat *Astore; int *perm_c; int *perm_r; int *inv_perm_c; /* inverse of perm_c */ int *inv_perm_r; /* inverse of perm_r */ int *xprune; /* points to locations in subscript vector lsub[*]. For column i, xprune[i] denotes the point where structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need to be traversed for symbolic factorization. */ int *ispruned;/* flag to indicate whether column j is pruned */ int nzlumax; pxgstrf_relax_t *pxgstrf_relax; nprocs = pdgstrf_options->nprocs; perm_c = pdgstrf_options->perm_c; perm_r = pdgstrf_options->perm_r; n = A->ncol; Astore = A->Store; inv_perm_r = (int *) intMalloc(n); inv_perm_c = (int *) intMalloc(n); xprune = (int *) intMalloc(n); ispruned = (int *) intCalloc(n); /* Pack shared data objects to each process. */ pxgstrf_shared->inv_perm_r = inv_perm_r; pxgstrf_shared->inv_perm_c = inv_perm_c; pxgstrf_shared->xprune = xprune; pxgstrf_shared->ispruned = ispruned; pxgstrf_shared->A = A; pxgstrf_shared->Glu = &Glu; pxgstrf_shared->Gstat = Gstat; pxgstrf_shared->info = info; if ( pdgstrf_options->usepr ) { /* Compute the inverse of perm_r */ for (i = 0; i < n; ++i) inv_perm_r[perm_r[i]] = i; } for (i = 0; i < n; ++i) inv_perm_c[perm_c[i]] = i; /* Initialization. */ Glu.nsuper = -1; Glu.nextl = 0; Glu.nextu = 0; Glu.nextlu = 0; ifill(perm_r, n, EMPTY); /* Identify relaxed supernodes at the bottom of the etree. */ pxgstrf_relax = (pxgstrf_relax_t *) SUPERLU_MALLOC((n+2) * sizeof(pxgstrf_relax_t)); pxgstrf_relax_snode(n, pdgstrf_options, pxgstrf_relax); /* Initialize mutex variables, task queue, determine panels. */ ParallelInit(n, pxgstrf_relax, pdgstrf_options, pxgstrf_shared); /* Set up memory image in lusup[*]. */ nzlumax = PresetMap(n, A, pxgstrf_relax, pdgstrf_options, &Glu); if ( pdgstrf_options->refact == NO ) Glu.nzlumax = nzlumax; SUPERLU_FREE (pxgstrf_relax); /* Allocate global storage common to all the factor routines */ *info = pdgstrf_MemInit(n, Astore->nnz, pdgstrf_options, L, U, &Glu); if ( *info ) return NULL; /* Prepare arguments to all threads. */ pdgstrf_threadarg = (pdgstrf_threadarg_t *) SUPERLU_MALLOC(nprocs * sizeof(pdgstrf_threadarg_t)); for (i = 0; i < nprocs; ++i) { pdgstrf_threadarg[i].pnum = i; pdgstrf_threadarg[i].info = 0; pdgstrf_threadarg[i].pdgstrf_options = pdgstrf_options; pdgstrf_threadarg[i].pxgstrf_shared = pxgstrf_shared; } #if ( DEBUGlevel==1 ) printf("** pdgstrf_thread_init() called\n"); #endif return (pdgstrf_threadarg); }
int ParallelInit(int n, pxgstrf_relax_t *pxgstrf_relax, pdgstrf_options_t *pdgstrf_options, pxgstrf_shared_t *pxgstrf_shared) { int *etree = pdgstrf_options->etree; register int w, dad, ukids, i, j, k, rs, panel_size, relax; register int P, w_top, do_split = 0; panel_t panel_type; int *panel_histo = pxgstrf_shared->Gstat->panel_histo; register int nthr, concurrency, info; #if ( MACH==SUN ) register int sync_type = USYNC_THREAD; /* Set concurrency level. */ nthr = sysconf(_SC_NPROCESSORS_ONLN); thr_setconcurrency(nthr); /* number of LWPs */ concurrency = thr_getconcurrency(); #if ( PRNTlevel==1 ) printf(".. CPUs %d, concurrency (#LWP) %d, P %d\n", nthr, concurrency, P); #endif /* Initialize mutex variables. */ pxgstrf_shared->lu_locks = (mutex_t *) SUPERLU_MALLOC(NO_GLU_LOCKS * sizeof(mutex_t)); for (i = 0; i < NO_GLU_LOCKS; ++i) mutex_init(&pxgstrf_shared->lu_locks[i], sync_type, 0); #elif ( MACH==DEC || MACH==PTHREAD ) pxgstrf_shared->lu_locks = (pthread_mutex_t *) SUPERLU_MALLOC(NO_GLU_LOCKS * sizeof(pthread_mutex_t)); for (i = 0; i < NO_GLU_LOCKS; ++i) pthread_mutex_init(&pxgstrf_shared->lu_locks[i], NULL); #else pxgstrf_shared->lu_locks = (mutex_t *) SUPERLU_MALLOC(NO_GLU_LOCKS * sizeof(mutex_t)); #endif #if ( PRNTlevel==1 ) printf(".. ParallelInit() ... nprocs %2d\n", pdgstrf_options->nprocs); #endif pxgstrf_shared->spin_locks = intCalloc(n); pxgstrf_shared->pan_status = (pan_status_t *) SUPERLU_MALLOC((n+1)*sizeof(pan_status_t)); pxgstrf_shared->fb_cols = intMalloc(n+1); panel_size = pdgstrf_options->panel_size; relax = pdgstrf_options->relax; w = MAX(panel_size, relax) + 1; for (i = 0; i < w; ++i) panel_histo[i] = 0; pxgstrf_shared->num_splits = 0; if ( (info = queue_init(&pxgstrf_shared->taskq, n)) ) { fprintf(stderr, "ParallelInit(): %d\n", info); ABORT("queue_init fails."); } /* Count children of each node in the etree. */ for (i = 0; i <= n; ++i) pxgstrf_shared->pan_status[i].ukids = 0; for (i = 0; i < n; ++i) { dad = etree[i]; ++pxgstrf_shared->pan_status[dad].ukids; } /* Find the panel partitions and initialize each panel's status */ #ifdef PROFILE num_panels = 0; #endif pxgstrf_shared->tasks_remain = 0; rs = 1; w_top = panel_size/2; if ( w_top == 0 ) w_top = 1; P = 12; for (i = 0; i < n; ) { if ( pxgstrf_relax[rs].fcol == i ) { w = pxgstrf_relax[rs++].size; panel_type = RELAXED_SNODE; pxgstrf_shared->pan_status[i].state = CANGO; } else { w = MIN(panel_size, pxgstrf_relax[rs].fcol - i); #ifdef SPLIT_TOP if ( !do_split ) { if ( (n-i) < panel_size * P ) do_split = 1; } if ( do_split && w > w_top ) { /* split large panel */ w = w_top; ++pxgstrf_shared->num_splits; } #endif for (j = i+1; j < i + w; ++j) /* Do not allow panel to cross a branch point in the etree. */ if ( pxgstrf_shared->pan_status[j].ukids > 1 ) break; w = j - i; /* j should start a new panel */ panel_type = REGULAR_PANEL; pxgstrf_shared->pan_status[i].state = UNREADY; #ifdef DOMAINS if ( in_domain[i] == TREE_DOMAIN ) panel_type = TREE_DOMAIN; #endif } if ( panel_type == REGULAR_PANEL ) { ++pxgstrf_shared->tasks_remain; /*printf("nondomain panel %6d -- %6d\n", i, i+w-1); fflush(stdout);*/ } ukids = k = 0; for (j = i; j < i + w; ++j) { pxgstrf_shared->pan_status[j].size = k--; pxgstrf_shared->pan_status[j].type = panel_type; ukids += pxgstrf_shared->pan_status[j].ukids; } pxgstrf_shared->pan_status[i].size = w; /* leading column */ /* only count those kids outside the panel */ pxgstrf_shared->pan_status[i].ukids = ukids - (w-1); panel_histo[w]++; #ifdef PROFILE panstat[i].size = w; ++num_panels; #endif pxgstrf_shared->fb_cols[i] = i; i += w; } /* for i ... */ /* Dummy root */ pxgstrf_shared->pan_status[n].size = 1; pxgstrf_shared->pan_status[n].state = UNREADY; #if ( PRNTlevel==1 ) printf(".. Split: P %d, #nondomain panels %d\n", P, pxgstrf_shared->tasks_remain); #endif #ifdef DOMAINS EnqueueDomains(&pxgstrf_shared->taskq, list_head, pxgstrf_shared); #else EnqueueRelaxSnode(&pxgstrf_shared->taskq, n, pxgstrf_relax, pxgstrf_shared); #endif #if ( PRNTlevel==1 ) printf(".. # tasks %d\n", pxgstrf_shared->tasks_remain); fflush(stdout); #endif #ifdef PREDICT_OPT /* Set up structure describing children */ for (i = 0; i <= n; cp_firstkid[i++] = EMPTY); for (i = n-1; i >= 0; i--) { dad = etree[i]; cp_nextkid[i] = cp_firstkid[dad]; cp_firstkid[dad] = i; } #endif return 0; } /* ParallelInit */
int qrnzcnt(int neqns, int adjlen, int *xadj, int *adjncy, int *zfdperm, int *perm, int *invp, int *etpar, int *colcnt_h, int *nlnz, int *part_super_ata, int *part_super_h) { /* o 5/20/95 Xiaoye S. Li: Translated from fcnthn.f using f2c; Modified to use 0-based indexing in C; Initialize xsup = 0 as suggested by B. Peyton to handle singletons. o 5/24/95 Xiaoye S. Li: Modified to compute row/column counts of R in QR factorization 1. Compute row counts of A, and f(i) in a separate pass def 2. Re-define hadj[k] === U { j | j in Struct(A_i*), j>k} i:f(i)==k Record supernode partition in part_super_ata[*] of size neqns: part_super_ata[k] = size of the supernode beginning at column k; = 0, elsewhere. o 1/16/96 Xiaoye S. Li: Modified to incorporate row/column counts of the Householder Matrix H in the QR factorization A --> H , R. Record supernode partition in part_super_h[*] of size neqns: part_super_h[k] = size of the supernode beginning at column k; = 0, elsewhere. *********************************************************************** Version: 0.3 Last modified: January 12, 1995 Authors: Esmond G. Ng and Barry W. Peyton Mathematical Sciences Section, Oak Ridge National Laboratoy *********************************************************************** ************** FCNTHN ..... FIND NONZERO COUNTS *************** *********************************************************************** PURPOSE: THIS SUBROUTINE DETERMINES THE ROW COUNTS AND COLUMN COUNTS IN THE CHOLESKY FACTOR. IT USES A DISJOINT SET UNION ALGORITHM. TECHNIQUES: 1) SUPERNODE DETECTION. 2) PATH HALVING. 3) NO UNION BY RANK. NOTES: 1) ASSUMES A POSTORDERING OF THE ELIMINATION TREE. INPUT PARAMETERS: (I) NEQNS - NUMBER OF EQUATIONS. (I) ADJLEN - LENGTH OF ADJACENCY STRUCTURE. (I) XADJ(*) - ARRAY OF LENGTH NEQNS+1, CONTAINING POINTERS TO THE ADJACENCY STRUCTURE. (I) ADJNCY(*) - ARRAY OF LENGTH ADJLEN, CONTAINING THE ADJACENCY STRUCTURE. (I) ZFDPERM(*) - THE ROW PERMUTATION VECTOR THAT PERMUTES THE MATRIX TO HAVE ZERO-FREE DIAGONAL. ZFDPERM(I) = J MEANS ROW I OF THE ORIGINAL MATRIX IS IN ROW J OF THE PERMUTED MATRIX. (I) PERM(*) - ARRAY OF LENGTH NEQNS, CONTAINING THE POSTORDERING. (I) INVP(*) - ARRAY OF LENGTH NEQNS, CONTAINING THE INVERSE OF THE POSTORDERING. (I) ETPAR(*) - ARRAY OF LENGTH NEQNS, CONTAINING THE ELIMINATION TREE OF THE POSTORDERED MATRIX. OUTPUT PARAMETERS: (I) ROWCNT(*) - ARRAY OF LENGTH NEQNS, CONTAINING THE NUMBER OF NONZEROS IN EACH ROW OF THE FACTOR, INCLUDING THE DIAGONAL ENTRY. (I) COLCNT(*) - ARRAY OF LENGTH NEQNS, CONTAINING THE NUMBER OF NONZEROS IN EACH COLUMN OF THE FACTOR, INCLUDING THE DIAGONAL ENTRY. (I) NLNZ - NUMBER OF NONZEROS IN THE FACTOR, INCLUDING THE DIAGONAL ENTRIES. (I) PART_SUPER_ATA SUPERNODE PARTITION IN THE CHOLESKY FACTOR OF A'A. (I) PART_SUPER_H SUPERNODE PARTITION IN THE HOUSEHOLDER MATRIX H. WORK PARAMETERS: (I) SET(*) - ARRAY OF LENGTH NEQNS USED TO MAINTAIN THE DISJOINT SETS (I.E., SUBTREES). (I) PRVLF(*) - ARRAY OF LENGTH NEQNS USED TO RECORD THE PREVIOUS LEAF OF EACH ROW SUBTREE. (I) LEVEL(*) - ARRAY OF LENGTH NEQNS+1 CONTAINING THE LEVEL (DISTANCE FROM THE ROOT). (I) WEIGHT(*) - ARRAY OF LENGTH NEQNS+1 CONTAINING WEIGHTS USED TO COMPUTE COLUMN COUNTS. (I) FDESC(*) - ARRAY OF LENGTH NEQNS+1 CONTAINING THE FIRST (I.E., LOWEST-NUMBERED) DESCENDANT. (I) NCHILD(*) - ARRAY OF LENGTH NEQNS+1 CONTAINING THE NUMBER OF CHILDREN. (I) PRVNBR(*) - ARRAY OF LENGTH NEQNS USED TO RECORD THE PREVIOUS ``LOWER NEIGHBOR'' OF EACH NODE. FIRST CREATED ON APRIL 12, 1990. LAST UPDATED ON JANUARY 12, 1995. *********************************************************************** */ /* Local variables */ int temp, last1, last2, i, j, k, lflag, pleaf, hinbr, jstop, jstrt, ifdesc, oldnbr, parent, lownbr, lca; int xsup; /* the ongoing supernode */ int *set, *prvlf, *level, *weight, *fdesc, *nchild, *prvnbr; int *fnz; /* first nonzero column subscript in each row */ int *marker; /* used to remove duplicate indices */ int *fnz_hadj; /* higher-numbered neighbors of the first nonzero (higher adjacency set of A'A) */ int *hadj_begin; /* pointers to the fnz_hadj[] structure */ int *hadj_end; /* pointers to the fnz_hadj[] structure */ /* Locally malloc'd room for QR purpose */ /* ---------------------------------------------------------- FIRST set is defined as first[j] := { i : f[i] = j } , which is a collection of disjoint sets of integers between 0 and n-1. ---------------------------------------------------------- */ int *first; /* header pointing to FIRST set */ int *firstset; /* linked list to describe FIRST set */ int *weight_h; /* weights for H */ int *rowcnt; /* row colunts for Lc */ int *colcnt; /* column colunts for Lc */ int *rowcnt_h; /* row colunts for H */ int nsuper; /* total number of fundamental supernodes in Lc */ int nhnz; set = intMalloc(neqns); prvlf = intMalloc(neqns); level = intMalloc(neqns + 1); /* length n+1 */ weight = intMalloc(neqns + 1); /* length n+1 */ fdesc = intMalloc(neqns + 1); /* length n+1 */ nchild = intMalloc(neqns + 1); /* length n+1 */ prvnbr = intMalloc(neqns); fnz_hadj = intMalloc(adjlen + 2*neqns + 1); hadj_begin = fnz_hadj + adjlen; /* neqns+1 */ hadj_end = hadj_begin + neqns + 1; /* neqns */ fnz = set; /* aliasing for the time being */ marker = prvlf; /* " " " */ first = intMalloc(neqns); firstset = intMalloc(neqns); weight_h = intCalloc(neqns + 1); /* length n+1 */ rowcnt_h = intMalloc(neqns); rowcnt = intMalloc(neqns); colcnt = intMalloc(neqns); /* ------------------------------------------------------- * Compute fnz[*], first[*], nchild[*] and row counts of A. * Also find supernodes in H. * * Note that the structure of each row of H forms a simple path in * the etree between fnz[i] and i (George, Liu & Ng (1988)). * The "first vertices" of the supernodes in H are characterized * by the following conditions: * 1) first nonzero in each row of A, i.e., fnz(i); * or 2) nchild >= 2; * ------------------------------------------------------- */ for (k = 0; k < neqns; ++k) { fnz[k] = first[k] = marker[k] = EMPTY; rowcnt[k] = part_super_ata[k] = 0; part_super_h[k] = 0; nchild[k] = 0; } nchild[ROOT] = 0; xsup = 0; for (k = 0; k < neqns; ++k) { parent = etpar[k]; ++nchild[parent]; if ( k != 0 && nchild[k] >= 2 ) { part_super_h[xsup] = k - xsup; xsup = k; } oldnbr = perm[k]; for (j = xadj[oldnbr]; j < xadj[oldnbr+1]; ++j) { /* * Renumber vertices of G(A) by postorder */ /* i = invp[zfdperm[adjncy[j]]];*/ i = zfdperm[adjncy[j]]; ++rowcnt[i]; if (fnz[i] == EMPTY) { /* * Build linked list to describe FIRST sets */ fnz[i] = k; firstset[i] = first[k]; first[k] = i; if ( k != 0 && xsup != k ) { part_super_h[xsup] = k - xsup; xsup = k; } } } } part_super_h[xsup] = neqns - xsup; #ifdef CHK_NZCNT printf("%8s%8s%8s\n", "k", "fnz", "first"); for (k = 0; k < neqns; ++k) printf("%8d%8d%8d\n", k, fnz[k], first[k]); #endif /* Set up fnz_hadj[*] structure. */ hadj_begin[0] = 0; for (k = 0; k < neqns; ++k) { temp = 0; oldnbr = perm[k]; hadj_end[k] = hadj_begin[k]; for (j = xadj[oldnbr]; j < xadj[oldnbr+1]; ++j) { /* hinbr = invp[zfdperm[adjncy[j]]];*/ hinbr = zfdperm[adjncy[j]]; jstrt = fnz[hinbr]; /* first nonzero must be <= k */ if ( jstrt != k && marker[jstrt] < k ) { /* ---------------------------------- filtering k itself and duplicates ---------------------------------- */ fnz_hadj[hadj_end[jstrt]] = k; ++hadj_end[jstrt]; marker[jstrt] = k; } if ( jstrt == k ) temp += rowcnt[hinbr]; } hadj_begin[k+1] = hadj_begin[k] + temp; } #ifdef CHK_NZCNT printf("%8s%8s\n", "k", "hadj"); for (k = 0; k < neqns; ++k) { printf("%8d", k); for (j = hadj_begin[k]; j < hadj_end[k]; ++j) printf("%8d", fnz_hadj[j]); printf("\n"); } #endif /* -------------------------------------------------- COMPUTE LEVEL(*), FDESC(*), NCHILD(*). INITIALIZE ROWCNT(*), COLCNT(*), SET(*), PRVLF(*), WEIGHT(*), PRVNBR(*). -------------------------------------------------- */ level[ROOT] = 0; for (k = neqns-1; k >= 0; --k) { rowcnt[k] = 1; colcnt[k] = 0; set[k] = k; prvlf[k] = EMPTY; level[k] = level[etpar[k]] + 1; weight[k] = 1; fdesc[k] = k; prvnbr[k] = EMPTY; } fdesc[ROOT] = EMPTY; for (k = 0; k < neqns; ++k) { parent = etpar[k]; weight[parent] = 0; colcnt_h[k] = 0; ifdesc = fdesc[k]; if (ifdesc < fdesc[parent]) { fdesc[parent] = ifdesc; } } xsup = 0; /* BUG FIX */ nsuper = 0; /* ------------------------------------ FOR EACH ``LOW NEIGHBOR'' LOWNBR ... ------------------------------------ */ for (lownbr = 0; lownbr < neqns; ++lownbr) { for (i = first[lownbr]; i != EMPTY; i = firstset[i]) { rowcnt_h[i] = 1 + ( level[lownbr] - level[i] ); ++weight_h[lownbr]; parent = etpar[i]; --weight_h[parent]; } lflag = 0; ifdesc = fdesc[lownbr]; jstrt = hadj_begin[lownbr]; jstop = hadj_end[lownbr]; /* ----------------------------------------------- FOR EACH ``HIGH NEIGHBOR'', HINBR OF LOWNBR ... ----------------------------------------------- */ for (j = jstrt; j < jstop; ++j) { hinbr = fnz_hadj[j]; if (hinbr > lownbr) { if (ifdesc > prvnbr[hinbr]) { /* ------------------------- INCREMENT WEIGHT(LOWNBR). ------------------------- */ ++weight[lownbr]; pleaf = prvlf[hinbr]; /* ----------------------------------------- IF HINBR HAS NO PREVIOUS ``LOW NEIGHBOR'' THEN ... ----------------------------------------- */ if (pleaf == EMPTY) { /* ----------------------------------------- ... ACCUMULATE LOWNBR-->HINBR PATH LENGTH IN ROWCNT(HINBR). ----------------------------------------- */ rowcnt[hinbr] = rowcnt[hinbr] + level[lownbr] - level[hinbr]; } else { /* ----------------------------------------- ... OTHERWISE, LCA <-- FIND(PLEAF), WHICH IS THE LEAST COMMON ANCESTOR OF PLEAF AND LOWNBR. (PATH HALVING.) ----------------------------------------- */ last1 = pleaf; last2 = set[last1]; lca = set[last2]; while ( lca != last2 ) { set[last1] = lca; last1 = lca; last2 = set[last1]; lca = set[last2]; } /* ------------------------------------- ACCUMULATE PLEAF-->LCA PATH LENGTH IN ROWCNT(HINBR). DECREMENT WEIGHT(LCA). ------------------------------------- */ rowcnt[hinbr] = rowcnt[hinbr] + level[lownbr] - level[lca]; --weight[lca]; } /* ---------------------------------------------- LOWNBR NOW BECOMES ``PREVIOUS LEAF'' OF HINBR. ---------------------------------------------- */ prvlf[hinbr] = lownbr; lflag = 1; } /* -------------------------------------------------- LOWNBR NOW BECOMES ``PREVIOUS NEIGHBOR'' OF HINBR. -------------------------------------------------- */ prvnbr[hinbr] = lownbr; } } /* for j ... */ /* ---------------------------------------------------- DECREMENT WEIGHT ( PARENT(LOWNBR) ). SET ( P(LOWNBR) ) <-- SET ( P(LOWNBR) ) + SET(XSUP). ---------------------------------------------------- */ parent = etpar[lownbr]; --weight[parent]; if (lflag == 1 || nchild[lownbr] >= 2) { /* lownbr is detected as the beginning of the new supernode */ if ( lownbr != 0 ) part_super_ata[xsup] = lownbr - xsup; ++nsuper; xsup = lownbr; } else { if ( parent == ROOT && ifdesc == lownbr ) { /* lownbr is a singleton, and begins a new supernode but is not detected as doing so -- BUG FIX */ part_super_ata[lownbr] = 1; ++nsuper; xsup = lownbr; } } set[xsup] = parent; } /* for lownbr ... */ /* --------------------------------------------------------- USE WEIGHTS TO COMPUTE COLUMN (AND TOTAL) NONZERO COUNTS. --------------------------------------------------------- */ *nlnz = nhnz = 0; for (k = 0; k < neqns; ++k) { /* for R */ temp = colcnt[k] + weight[k]; colcnt[k] = temp; *nlnz += temp; parent = etpar[k]; if (parent != ROOT) { colcnt[parent] += temp; } /* for H */ temp = colcnt_h[k] + weight_h[k]; colcnt_h[k] = temp; nhnz += temp; if (parent != ROOT) { colcnt_h[parent] += temp; } } part_super_ata[xsup] = neqns - xsup; /* Fix the supernode partition in H. */ free (set); free (prvlf); free (level); free (weight); free (fdesc); free (nchild); free (prvnbr); free (fnz_hadj); free (first); free (firstset); free (weight_h); free (rowcnt_h); free (rowcnt); free (colcnt); #if ( PRNTlevel==1 ) printf(".. qrnzcnt() nlnz %d, nhnz %d, nlnz/nhnz %.2f\n", *nlnz, nhnz, (float) *nlnz/nhnz); #endif #if ( DEBUGlevel>=2 ) print_int_vec("part_super_h", neqns, part_super_h); #endif return 0; } /* qrnzcnt_ */
void psgstrf_relax_snode( const int n, /* number of columns in the matrix */ //psgstrf_options_t *psgstrf_options, superlumt_options_t *psgstrf_options, //sj pxgstrf_relax_t *pxgstrf_relax /* relaxed s-nodes */ ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose * ======= * psgstrf_relax_snode() identifes the initial relaxed supernodes, * assuming that the matrix has been reordered according to the postorder * of the etree. * */ register int j, parent, rs; register int fcol; /* beginning of a snode */ int *desc; /* no of descendants of each etree node. */ int *etree = psgstrf_options->etree; /* column elimination tree */ int relax = psgstrf_options->relax; /* maximum no of columns allowed in a relaxed s-node */ desc = intCalloc(n+1); /* Compute the number of descendants of each node in the etree */ for (j = 0; j < n; j++) { parent = etree[j]; desc[parent] += desc[j] + 1; } rs = 1; /* Identify the relaxed supernodes by postorder traversal of the etree. */ for (j = 0; j < n; ) { parent = etree[j]; fcol = j; while ( parent != n && desc[parent] < relax ) { j = parent; parent = etree[j]; } /* found a supernode with j being the last column. */ pxgstrf_relax[rs].fcol = fcol; pxgstrf_relax[rs].size = j - fcol + 1; #ifdef DOMAINS for (i = fcol; i <= j; ++i) in_domain[i] = RELAXED_SNODE; #endif j++; rs++; /* Search for a new leaf */ while ( desc[j] != 0 && j < n ) j++; } pxgstrf_relax[rs].fcol = n; pxgstrf_relax[0].size = rs-1; /* number of relaxed supernodes */ #if (PRNTlevel==1) printf(".. No of relaxed s-nodes %d\n", pxgstrf_relax[0].size); #endif SUPERLU_FREE (desc); }