Esempio n. 1
0
/*
 * Convert a row compressed storage into a column compressed storage.
 */
void
zCompRow_to_CompCol(int m, int n, int nnz, 
		    doublecomplex *a, int *colind, int *rowptr,
		    doublecomplex **at, int **rowind, int **colptr)
{
    register int i, j, col, relpos;
    int *marker;

    /* Allocate storage for another copy of the matrix. */
    *at = (doublecomplex *) doublecomplexMalloc(nnz);
    *rowind = (int *) intMalloc(nnz);
    *colptr = (int *) intMalloc(n+1);
    marker = (int *) intCalloc(n);
    
    /* Get counts of each column of A, and set up column pointers */
    for (i = 0; i < m; ++i)
	for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
    (*colptr)[0] = 0;
    for (j = 0; j < n; ++j) {
	(*colptr)[j+1] = (*colptr)[j] + marker[j];
	marker[j] = (*colptr)[j];
    }

    /* Transfer the matrix into the compressed column storage. */
    for (i = 0; i < m; ++i) {
	for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
	    col = colind[j];
	    relpos = marker[col];
	    (*rowind)[relpos] = i;
	    (*at)[relpos] = a[j];
	    ++marker[col];
	}
    }

    SUPERLU_FREE(marker);
}
Esempio n. 2
0
pdgstrf_threadarg_t *
pdgstrf_thread_init(SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
		    pdgstrf_options_t *pdgstrf_options, 
		    pxgstrf_shared_t *pxgstrf_shared,
		    Gstat_t *Gstat, int *info)
{
/*
 * -- SuperLU MT routine (version 1.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * August 15, 1997
 *
 * Purpose
 * =======
 *
 * pdgstrf_thread_init() initializes the parallel data structures
 * for the multithreaded routine pdgstrf_thread().
 *
 * Arguments
 * =========
 *
 * A        (input) SuperMatrix*
 *	    Original matrix A, permutated by columns, of dimension
 *          (A->nrow, A->ncol). The type of A can be:
 *          Stype = NCP; Dtype = _D; Mtype = GE.
 *
 * L        (input) SuperMatrix*
 *          If pdgstrf_options->refact = YES, then use the existing
 *          storage in L to perform LU factorization;
 *          Otherwise, L is not accessed. L has types: 
 *          Stype = SCP, Dtype = _D, Mtype = TRLU.
 *
 * U        (input) SuperMatrix*
 *          If pdgstrf_options->refact = YES, then use the existing
 *          storage in U to perform LU factorization;
 *          Otherwise, U is not accessed. U has types:
 *          Stype = NCP, Dtype = _D, Mtype = TRU.
 *
 * pdgstrf_options (input) pdgstrf_options_t*
 *          The structure contains the parameters to control how the
 *          factorization is performed;
 *          See pdgstrf_options_t structure defined in pdsp_defs.h.
 *
 * pxgstrf_shared (output) pxgstrf_shared_t*
 *          The structure contains the shared task queue and the 
 *          synchronization variables for parallel factorization.
 *          See pxgstrf_shared_t structure defined in pdsp_defs.h.
 *
 * Gstat    (output) Gstat_t*
 *          Record all the statistics about the factorization; 
 *          See Gstat_t structure defined in util.h.
 *
 * info     (output) int*
 *          = 0: successful exit
 *          > 0: if pdgstrf_options->lwork = -1, info returns the estimated
 *               amount of memory (in bytes) required;
 *               Otherwise, it returns the number of bytes allocated when
 *               memory allocation failure occurred, plus A->ncol.
 *
 */
    static GlobalLU_t Glu; /* persistent to support repeated factors. */
    pdgstrf_threadarg_t *pdgstrf_threadarg;
    register int n, i, nprocs;
    NCPformat *Astore;
    int  *perm_c;
    int  *perm_r;
    int  *inv_perm_c; /* inverse of perm_c */
    int  *inv_perm_r; /* inverse of perm_r */
    int	 *xprune;  /* points to locations in subscript vector lsub[*].
			For column i, xprune[i] denotes the point where 
			structural pruning begins.
			I.e. only xlsub[i],..,xprune[i]-1 need to be
			traversed for symbolic factorization.     */
    int  *ispruned;/* flag to indicate whether column j is pruned */
    int   nzlumax;
    pxgstrf_relax_t *pxgstrf_relax;
    
    nprocs     = pdgstrf_options->nprocs;
    perm_c     = pdgstrf_options->perm_c;
    perm_r     = pdgstrf_options->perm_r;
    n          = A->ncol;
    Astore     = A->Store;
    inv_perm_r = (int *) intMalloc(n);
    inv_perm_c = (int *) intMalloc(n);
    xprune     = (int *) intMalloc(n);
    ispruned   = (int *) intCalloc(n);
    
    /* Pack shared data objects to each process. */
    pxgstrf_shared->inv_perm_r   = inv_perm_r;
    pxgstrf_shared->inv_perm_c   = inv_perm_c;
    pxgstrf_shared->xprune       = xprune;
    pxgstrf_shared->ispruned     = ispruned;
    pxgstrf_shared->A            = A;
    pxgstrf_shared->Glu          = &Glu;
    pxgstrf_shared->Gstat        = Gstat;
    pxgstrf_shared->info         = info;

    if ( pdgstrf_options->usepr ) {
	/* Compute the inverse of perm_r */
	for (i = 0; i < n; ++i) inv_perm_r[perm_r[i]] = i;
    }
    for (i = 0; i < n; ++i) inv_perm_c[perm_c[i]] = i;

    /* Initialization. */
    Glu.nsuper = -1;
    Glu.nextl  = 0;
    Glu.nextu  = 0;
    Glu.nextlu = 0;
    ifill(perm_r, n, EMPTY);

    /* Identify relaxed supernodes at the bottom of the etree. */
    pxgstrf_relax = (pxgstrf_relax_t *)
        SUPERLU_MALLOC((n+2) * sizeof(pxgstrf_relax_t));
    pxgstrf_relax_snode(n, pdgstrf_options, pxgstrf_relax);
    
    /* Initialize mutex variables, task queue, determine panels. */
    ParallelInit(n, pxgstrf_relax, pdgstrf_options, pxgstrf_shared);
    
    /* Set up memory image in lusup[*]. */
    nzlumax = PresetMap(n, A, pxgstrf_relax, pdgstrf_options, &Glu);
    if ( pdgstrf_options->refact == NO ) Glu.nzlumax = nzlumax;
    
    SUPERLU_FREE (pxgstrf_relax);

    /* Allocate global storage common to all the factor routines */
    *info = pdgstrf_MemInit(n, Astore->nnz, pdgstrf_options, L, U, &Glu);
    if ( *info ) return NULL;

    /* Prepare arguments to all threads. */
    pdgstrf_threadarg = (pdgstrf_threadarg_t *) 
        SUPERLU_MALLOC(nprocs * sizeof(pdgstrf_threadarg_t));
    for (i = 0; i < nprocs; ++i) {
        pdgstrf_threadarg[i].pnum = i;
        pdgstrf_threadarg[i].info = 0;
	pdgstrf_threadarg[i].pdgstrf_options = pdgstrf_options;
	pdgstrf_threadarg[i].pxgstrf_shared = pxgstrf_shared;
    }

#if ( DEBUGlevel==1 )
    printf("** pdgstrf_thread_init() called\n");
#endif

    return (pdgstrf_threadarg);
}
Esempio n. 3
0
int
ParallelInit(int n, pxgstrf_relax_t *pxgstrf_relax, 
	     pdgstrf_options_t *pdgstrf_options, 
	     pxgstrf_shared_t *pxgstrf_shared)
{
    int      *etree = pdgstrf_options->etree;
    register int w, dad, ukids, i, j, k, rs, panel_size, relax;
    register int P, w_top, do_split = 0;
    panel_t panel_type;
    int      *panel_histo = pxgstrf_shared->Gstat->panel_histo;
    register int nthr, concurrency, info;

#if ( MACH==SUN )
    register int sync_type = USYNC_THREAD;
    
    /* Set concurrency level. */
    nthr = sysconf(_SC_NPROCESSORS_ONLN);
    thr_setconcurrency(nthr);            /* number of LWPs */
    concurrency = thr_getconcurrency();

#if ( PRNTlevel==1 )    
    printf(".. CPUs %d, concurrency (#LWP) %d, P %d\n",
	   nthr, concurrency, P);
#endif

    /* Initialize mutex variables. */
    pxgstrf_shared->lu_locks = (mutex_t *) 
        SUPERLU_MALLOC(NO_GLU_LOCKS * sizeof(mutex_t));
    for (i = 0; i < NO_GLU_LOCKS; ++i)
	mutex_init(&pxgstrf_shared->lu_locks[i], sync_type, 0);

#elif ( MACH==DEC || MACH==PTHREAD )
    pxgstrf_shared->lu_locks = (pthread_mutex_t *) 
        SUPERLU_MALLOC(NO_GLU_LOCKS * sizeof(pthread_mutex_t));
    for (i = 0; i < NO_GLU_LOCKS; ++i)
	pthread_mutex_init(&pxgstrf_shared->lu_locks[i], NULL);
#else
    pxgstrf_shared->lu_locks = (mutex_t *) SUPERLU_MALLOC(NO_GLU_LOCKS * sizeof(mutex_t));
#endif    
    
#if ( PRNTlevel==1 )
    printf(".. ParallelInit() ... nprocs %2d\n", pdgstrf_options->nprocs);
#endif

    pxgstrf_shared->spin_locks = intCalloc(n);
    pxgstrf_shared->pan_status = 
        (pan_status_t *) SUPERLU_MALLOC((n+1)*sizeof(pan_status_t));
    pxgstrf_shared->fb_cols    = intMalloc(n+1);

    panel_size = pdgstrf_options->panel_size;
    relax = pdgstrf_options->relax;
    w = MAX(panel_size, relax) + 1;
    for (i = 0; i < w; ++i) panel_histo[i] = 0;
    pxgstrf_shared->num_splits = 0;
    
    if ( (info = queue_init(&pxgstrf_shared->taskq, n)) ) {
	fprintf(stderr, "ParallelInit(): %d\n", info);
	ABORT("queue_init fails.");
    }

    /* Count children of each node in the etree. */
    for (i = 0; i <= n; ++i) pxgstrf_shared->pan_status[i].ukids = 0;
    for (i = 0; i < n; ++i) {
	dad = etree[i];
	++pxgstrf_shared->pan_status[dad].ukids;
    }

    
    /* Find the panel partitions and initialize each panel's status */

#ifdef PROFILE
    num_panels = 0;
#endif

    pxgstrf_shared->tasks_remain = 0;
    rs = 1;
    w_top = panel_size/2;
    if ( w_top == 0 ) w_top = 1;
    P = 12;

    for (i = 0; i < n; ) {
	if ( pxgstrf_relax[rs].fcol == i ) {
	    w = pxgstrf_relax[rs++].size;
	    panel_type = RELAXED_SNODE;
	    pxgstrf_shared->pan_status[i].state = CANGO;
	} else {
	    w = MIN(panel_size, pxgstrf_relax[rs].fcol - i);
#ifdef SPLIT_TOP
	    if ( !do_split ) {
	  	if ( (n-i) < panel_size * P ) do_split = 1;
	    }
	    if ( do_split && w > w_top ) { /* split large panel */
	    	w = w_top;
	    	++pxgstrf_shared->num_splits;
	    }
#endif
	    for (j = i+1; j < i + w; ++j) 
		/* Do not allow panel to cross a branch point in the etree. */
		if ( pxgstrf_shared->pan_status[j].ukids > 1 ) break;
	    w = j - i;    /* j should start a new panel */
	    panel_type = REGULAR_PANEL;
	    pxgstrf_shared->pan_status[i].state = UNREADY;
#ifdef DOMAINS
	    if ( in_domain[i] == TREE_DOMAIN ) panel_type = TREE_DOMAIN;
#endif
	}

	if ( panel_type == REGULAR_PANEL ) {
	    ++pxgstrf_shared->tasks_remain;
	    /*printf("nondomain panel %6d -- %6d\n", i, i+w-1);
	    fflush(stdout);*/
	}

	ukids = k = 0;
	for (j = i; j < i + w; ++j) {
	    pxgstrf_shared->pan_status[j].size = k--;
	    pxgstrf_shared->pan_status[j].type = panel_type;
	    ukids += pxgstrf_shared->pan_status[j].ukids;
	}
	pxgstrf_shared->pan_status[i].size = w; /* leading column */
	/* only count those kids outside the panel */
	pxgstrf_shared->pan_status[i].ukids = ukids - (w-1);
	panel_histo[w]++;
	
#ifdef PROFILE
	panstat[i].size = w;
	++num_panels;
#endif
	
	pxgstrf_shared->fb_cols[i] = i;
	i += w;
    } /* for i ... */
    
    /* Dummy root */
    pxgstrf_shared->pan_status[n].size = 1;
    pxgstrf_shared->pan_status[n].state = UNREADY;

#if ( PRNTlevel==1 )
    printf(".. Split: P %d, #nondomain panels %d\n", P, pxgstrf_shared->tasks_remain);
#endif
#ifdef DOMAINS
    EnqueueDomains(&pxgstrf_shared->taskq, list_head, pxgstrf_shared);
#else
    EnqueueRelaxSnode(&pxgstrf_shared->taskq, n, pxgstrf_relax, pxgstrf_shared);
#endif
#if ( PRNTlevel==1 )
    printf(".. # tasks %d\n", pxgstrf_shared->tasks_remain);
    fflush(stdout);
#endif

#ifdef PREDICT_OPT
    /* Set up structure describing children */
    for (i = 0; i <= n; cp_firstkid[i++] = EMPTY);
    for (i = n-1; i >= 0; i--) {
	dad = etree[i];
	cp_nextkid[i] = cp_firstkid[dad];
	cp_firstkid[dad] = i;
    }
#endif

    return 0;
} /* ParallelInit */
Esempio n. 4
0
int
qrnzcnt(int neqns, int adjlen, int *xadj, int *adjncy, int *zfdperm,
	int *perm, int *invp, int *etpar, int *colcnt_h,
	int *nlnz, int *part_super_ata, int *part_super_h)
{
/*
     o 5/20/95 Xiaoye S. Li:
         Translated from fcnthn.f using f2c;
         Modified to use 0-based indexing in C;
         Initialize xsup = 0 as suggested by B. Peyton to handle singletons.

     o 5/24/95 Xiaoye S. Li:
         Modified to compute row/column counts of R in QR factorization
           1. Compute row counts of A, and f(i) in a separate pass
	                        def
	   2. Re-define hadj[k] ===   U    { j | j in Struct(A_i*), j>k}
	                          i:f(i)==k
	 Record supernode partition in part_super_ata[*] of size neqns:
	   part_super_ata[k] = size of the supernode beginning at column k;
 	                     = 0, elsewhere.

     o 1/16/96 Xiaoye S. Li:
         Modified to incorporate row/column counts of the Householder
	 Matrix H in the QR factorization A --> H , R.
	 
	 Record supernode partition in part_super_h[*] of size neqns:
	   part_super_h[k] = size of the supernode beginning at column k;
 	                   = 0, elsewhere.
	 
   ***********************************************************************   
     Version:        0.3   
     Last modified:  January 12, 1995   
     Authors:        Esmond G. Ng and Barry W. Peyton   

     Mathematical Sciences Section, Oak Ridge National Laboratoy   

   ***********************************************************************   
   **************     FCNTHN  ..... FIND NONZERO COUNTS    ***************   
   ***********************************************************************   

     PURPOSE:   
         THIS SUBROUTINE DETERMINES THE ROW COUNTS AND COLUMN COUNTS IN   
         THE CHOLESKY FACTOR.  IT USES A DISJOINT SET UNION ALGORITHM.   

         TECHNIQUES:   
         1) SUPERNODE DETECTION.   
         2) PATH HALVING.   
         3) NO UNION BY RANK.   

     NOTES:   
         1) ASSUMES A POSTORDERING OF THE ELIMINATION TREE.   

     INPUT PARAMETERS:   
         (I) NEQNS       -   NUMBER OF EQUATIONS.   
         (I) ADJLEN      -   LENGTH OF ADJACENCY STRUCTURE.   
         (I) XADJ(*)     -   ARRAY OF LENGTH NEQNS+1, CONTAINING POINTERS   
                             TO THE ADJACENCY STRUCTURE.
         (I) ADJNCY(*)   -   ARRAY OF LENGTH ADJLEN, CONTAINING   
                             THE ADJACENCY STRUCTURE.
         (I) ZFDPERM(*)  -   THE ROW PERMUTATION VECTOR THAT PERMUTES THE
	                     MATRIX TO HAVE ZERO-FREE DIAGONAL.
			     ZFDPERM(I) = J MEANS ROW I OF THE ORIGINAL
			     MATRIX IS IN ROW J OF THE PERMUTED MATRIX.
         (I) PERM(*)     -   ARRAY OF LENGTH NEQNS, CONTAINING THE   
                             POSTORDERING.   
         (I) INVP(*)     -   ARRAY OF LENGTH NEQNS, CONTAINING THE   
                             INVERSE OF THE POSTORDERING.   
         (I) ETPAR(*)    -   ARRAY OF LENGTH NEQNS, CONTAINING THE   
                             ELIMINATION TREE OF THE POSTORDERED MATRIX.   

     OUTPUT PARAMETERS:   
         (I) ROWCNT(*)   -   ARRAY OF LENGTH NEQNS, CONTAINING THE NUMBER   
                             OF NONZEROS IN EACH ROW OF THE FACTOR,   
                             INCLUDING THE DIAGONAL ENTRY.   
         (I) COLCNT(*)   -   ARRAY OF LENGTH NEQNS, CONTAINING THE NUMBER   
                             OF NONZEROS IN EACH COLUMN OF THE FACTOR,   
                             INCLUDING THE DIAGONAL ENTRY.   
         (I) NLNZ        -   NUMBER OF NONZEROS IN THE FACTOR, INCLUDING   
                             THE DIAGONAL ENTRIES.
         (I) PART_SUPER_ATA  SUPERNODE PARTITION IN THE CHOLESKY FACTOR
	                     OF A'A.
	 (I) PART_SUPER_H    SUPERNODE PARTITION IN THE HOUSEHOLDER
	                     MATRIX H.

     WORK PARAMETERS:   
         (I) SET(*)      -   ARRAY OF LENGTH NEQNS USED TO MAINTAIN THE   
                             DISJOINT SETS (I.E., SUBTREES).   
         (I) PRVLF(*)    -   ARRAY OF LENGTH NEQNS USED TO RECORD THE   
                             PREVIOUS LEAF OF EACH ROW SUBTREE.   
         (I) LEVEL(*)    -   ARRAY OF LENGTH NEQNS+1 CONTAINING THE LEVEL   
                             (DISTANCE FROM THE ROOT).   
         (I) WEIGHT(*)   -   ARRAY OF LENGTH NEQNS+1 CONTAINING WEIGHTS   
                             USED TO COMPUTE COLUMN COUNTS.   
         (I) FDESC(*)    -   ARRAY OF LENGTH NEQNS+1 CONTAINING THE   
                             FIRST (I.E., LOWEST-NUMBERED) DESCENDANT.   
         (I) NCHILD(*)   -   ARRAY OF LENGTH NEQNS+1 CONTAINING THE   
                             NUMBER OF CHILDREN.   
         (I) PRVNBR(*)   -   ARRAY OF LENGTH NEQNS USED TO RECORD THE   
                             PREVIOUS ``LOWER NEIGHBOR'' OF EACH NODE.   

     FIRST CREATED ON    APRIL 12, 1990.   
     LAST UPDATED ON     JANUARY 12, 1995.   

   ***********************************************************************   
*/

    /* Local variables */
    int  temp, last1, last2, i, j, k, lflag, pleaf, hinbr, jstop,
	 jstrt, ifdesc, oldnbr, parent, lownbr, lca;
    int  xsup;        /* the ongoing supernode */
    int  *set, *prvlf, *level, *weight, *fdesc, *nchild, *prvnbr;
    int  *fnz;        /* first nonzero column subscript in each row */
    int  *marker;     /* used to remove duplicate indices */
    int  *fnz_hadj;   /* higher-numbered neighbors of the first nonzero
			 (higher adjacency set of A'A) */
    int  *hadj_begin; /* pointers to the fnz_hadj[] structure */
    int  *hadj_end;   /* pointers to the fnz_hadj[] structure */

    /* Locally malloc'd room for QR purpose */

    /* ----------------------------------------------------------
       FIRST set is defined as first[j] := { i : f[i] = j } ,
       which is a collection of disjoint sets of integers between
       0 and n-1.
       ---------------------------------------------------------- */    
    int  *first;    /* header pointing to FIRST set */
    int  *firstset; /* linked list to describe FIRST set */
    int  *weight_h; /* weights for H */
    int  *rowcnt;   /* row colunts for Lc */ 
    int  *colcnt;   /* column colunts for Lc */ 
    int  *rowcnt_h; /* row colunts for H */ 
    int  nsuper;    /* total number of fundamental supernodes in Lc */
    int  nhnz;
    
    set    = intMalloc(neqns);
    prvlf  = intMalloc(neqns);
    level  = intMalloc(neqns + 1);    /* length n+1 */
    weight = intMalloc(neqns + 1);    /* length n+1 */
    fdesc  = intMalloc(neqns + 1);    /* length n+1 */
    nchild = intMalloc(neqns + 1);    /* length n+1 */
    prvnbr = intMalloc(neqns);
    fnz_hadj   = intMalloc(adjlen + 2*neqns + 1);
    hadj_begin = fnz_hadj + adjlen;        /* neqns+1 */
    hadj_end   = hadj_begin + neqns + 1;   /* neqns */
    fnz        = set;    /* aliasing for the time being */
    marker     = prvlf;  /*     "    "    "             */
    
    first    = intMalloc(neqns);
    firstset = intMalloc(neqns);
    weight_h = intCalloc(neqns + 1);  /* length n+1 */
    rowcnt_h = intMalloc(neqns);
    rowcnt   = intMalloc(neqns);
    colcnt   = intMalloc(neqns);
    
    /* -------------------------------------------------------
     * Compute fnz[*], first[*], nchild[*] and row counts of A.
     * Also find supernodes in H.
     *
     * Note that the structure of each row of H forms a simple path in
     * the etree between fnz[i] and i (George, Liu & Ng (1988)).
     * The "first vertices" of the supernodes in H are characterized
     * by the following conditions:
     *     1) first nonzero in each row of A, i.e., fnz(i);
     *  or 2) nchild >= 2;
     * ------------------------------------------------------- */
    for (k = 0; k < neqns; ++k) {
	fnz[k] = first[k] = marker[k] = EMPTY;
	rowcnt[k] = part_super_ata[k] = 0;
	part_super_h[k] = 0;
	nchild[k] = 0;
    }
    nchild[ROOT] = 0;
    xsup = 0;
    for (k = 0; k < neqns; ++k) {
	parent = etpar[k];
	++nchild[parent];
	if ( k != 0 && nchild[k] >= 2 ) {
	    part_super_h[xsup] = k - xsup;
	    xsup = k;
	}
	oldnbr = perm[k];
	for (j = xadj[oldnbr]; j < xadj[oldnbr+1]; ++j) {
	    /*
	     * Renumber vertices of G(A) by postorder
	     */
/*	    i = invp[zfdperm[adjncy[j]]];*/
	    i = zfdperm[adjncy[j]];
	    ++rowcnt[i];
	    if (fnz[i] == EMPTY) {
		/*
		 * Build linked list to describe FIRST sets
		 */
		fnz[i] = k;
		firstset[i] = first[k];
		first[k] = i;
		if ( k != 0 && xsup != k ) {
		    part_super_h[xsup] = k - xsup;
		    xsup = k;
		}
	    }
	}
    }
    part_super_h[xsup] = neqns - xsup;

#ifdef CHK_NZCNT
    printf("%8s%8s%8s\n", "k", "fnz", "first");
    for (k = 0; k < neqns; ++k)
	printf("%8d%8d%8d\n", k, fnz[k], first[k]);
#endif
    
    /* Set up fnz_hadj[*] structure. */
    hadj_begin[0] = 0;
    for (k = 0; k < neqns; ++k) {
	temp = 0;
	oldnbr = perm[k];
	hadj_end[k] = hadj_begin[k];
	for (j = xadj[oldnbr]; j < xadj[oldnbr+1]; ++j) {
/*	    hinbr = invp[zfdperm[adjncy[j]]];*/
	    hinbr = zfdperm[adjncy[j]];
	    jstrt = fnz[hinbr];    /* first nonzero must be <= k */
	    if ( jstrt != k && marker[jstrt] < k ) {
		/* ----------------------------------
		   filtering k itself and duplicates
		   ---------------------------------- */
		fnz_hadj[hadj_end[jstrt]] = k;
		++hadj_end[jstrt];
		marker[jstrt] = k;
	    }
	    if ( jstrt == k ) temp += rowcnt[hinbr];
	}
	hadj_begin[k+1] = hadj_begin[k] + temp;
    }

#ifdef CHK_NZCNT
    printf("%8s%8s\n", "k", "hadj");
    for (k = 0; k < neqns; ++k) {
	printf("%8d", k);
	for (j = hadj_begin[k]; j < hadj_end[k]; ++j)
	    printf("%8d", fnz_hadj[j]);
	printf("\n");
    }
#endif
	
    /*   --------------------------------------------------   
         COMPUTE LEVEL(*), FDESC(*), NCHILD(*).   
         INITIALIZE ROWCNT(*), COLCNT(*),   
                    SET(*), PRVLF(*), WEIGHT(*), PRVNBR(*).   
         --------------------------------------------------   */
    level[ROOT] = 0;
    for (k = neqns-1; k >= 0; --k) {
	rowcnt[k] = 1;
	colcnt[k] = 0;
	set[k] = k;
	prvlf[k] = EMPTY;
	level[k] = level[etpar[k]] + 1;
	weight[k] = 1;
	fdesc[k] = k;
	prvnbr[k] = EMPTY;
    }
    fdesc[ROOT] = EMPTY;
    for (k = 0; k < neqns; ++k) {
	parent = etpar[k];
	weight[parent] = 0;
	colcnt_h[k] = 0;
	ifdesc = fdesc[k];
	if (ifdesc < fdesc[parent]) {
	    fdesc[parent] = ifdesc;
	}
    }

    xsup    = 0;      /* BUG FIX */
    nsuper = 0;
    
    /*   ------------------------------------   
         FOR EACH ``LOW NEIGHBOR'' LOWNBR ...   
         ------------------------------------ */
    for (lownbr = 0; lownbr < neqns; ++lownbr) {
	for (i = first[lownbr]; i != EMPTY; i = firstset[i]) {
	    rowcnt_h[i] = 1 + ( level[lownbr] - level[i] );
	    ++weight_h[lownbr];
	    parent = etpar[i];
	    --weight_h[parent];
	}
	
	lflag  = 0;
	ifdesc = fdesc[lownbr];
	jstrt  = hadj_begin[lownbr];
	jstop  = hadj_end[lownbr];
	/*   -----------------------------------------------   
             FOR EACH ``HIGH NEIGHBOR'', HINBR OF LOWNBR ...   
             ----------------------------------------------- */
	for (j = jstrt; j < jstop; ++j) {	    
	    hinbr = fnz_hadj[j];
	    if (hinbr > lownbr) {
                if (ifdesc > prvnbr[hinbr]) {
		    /*  -------------------------   
			INCREMENT WEIGHT(LOWNBR).   
			------------------------- */
		    ++weight[lownbr];
		    pleaf = prvlf[hinbr];
		    /*  -----------------------------------------   
			IF HINBR HAS NO PREVIOUS ``LOW NEIGHBOR'' THEN ...   
			----------------------------------------- */
		    if (pleaf == EMPTY) {
			/* -----------------------------------------   
			   ... ACCUMULATE LOWNBR-->HINBR PATH LENGTH   
			       IN ROWCNT(HINBR).   
			   ----------------------------------------- */
			rowcnt[hinbr] = rowcnt[hinbr] +
			                level[lownbr] - level[hinbr];
		    } else {
			/* -----------------------------------------   
			   ... OTHERWISE, LCA <-- FIND(PLEAF), WHICH   
                               IS THE LEAST COMMON ANCESTOR OF PLEAF   
                               AND LOWNBR. (PATH HALVING.)   
			   ----------------------------------------- */
			last1 = pleaf;
			last2 = set[last1];
			lca = set[last2];
			while ( lca != last2 ) {
			    set[last1] = lca;
			    last1 = lca;
			    last2 = set[last1];
			    lca = set[last2];
			}
			/* -------------------------------------   
			   ACCUMULATE PLEAF-->LCA PATH LENGTH IN   
			   ROWCNT(HINBR). DECREMENT WEIGHT(LCA).   
			   ------------------------------------- */
			rowcnt[hinbr] = rowcnt[hinbr] + 
					level[lownbr] - level[lca];
			--weight[lca];
		    }
		    /* ----------------------------------------------   
		       LOWNBR NOW BECOMES ``PREVIOUS LEAF'' OF HINBR.   
		       ---------------------------------------------- */
		    prvlf[hinbr] = lownbr;
		    lflag = 1;
        	}
		/* --------------------------------------------------   
		   LOWNBR NOW BECOMES ``PREVIOUS NEIGHBOR'' OF HINBR.   
		   -------------------------------------------------- */
		prvnbr[hinbr] = lownbr;
	    }
	} /* for j ... */
	
	/* ----------------------------------------------------   
	   DECREMENT WEIGHT ( PARENT(LOWNBR) ).   
	   SET ( P(LOWNBR) ) <-- SET ( P(LOWNBR) ) + SET(XSUP).   
	   ---------------------------------------------------- */
	parent = etpar[lownbr];
	--weight[parent];
	if (lflag == 1 || nchild[lownbr] >= 2) {
	    /* lownbr is detected as the beginning of the new supernode */
	    if ( lownbr != 0 ) part_super_ata[xsup] = lownbr - xsup;
	    ++nsuper;
	    xsup = lownbr;
	} else {
	    if ( parent == ROOT && ifdesc == lownbr ) {
		/* lownbr is a singleton, and begins a new supernode
		   but is not detected as doing so -- BUG FIX */
		part_super_ata[lownbr] = 1;
		++nsuper;
		xsup = lownbr;
	    }
	}
	set[xsup] = parent;
    } /* for lownbr ... */
    
    /* ---------------------------------------------------------   
       USE WEIGHTS TO COMPUTE COLUMN (AND TOTAL) NONZERO COUNTS.   
       --------------------------------------------------------- */
    *nlnz = nhnz = 0;
    for (k = 0; k < neqns; ++k) {
	/* for R */
	temp = colcnt[k] + weight[k];
	colcnt[k] = temp;
	*nlnz += temp;
	parent = etpar[k];
	if (parent != ROOT) {
	    colcnt[parent] += temp;
	}

	/* for H */
	temp = colcnt_h[k] + weight_h[k];
	colcnt_h[k] = temp;
	nhnz += temp;
	if (parent != ROOT) {
	    colcnt_h[parent] += temp;	    
	}
    }
    part_super_ata[xsup] = neqns - xsup;

    /* Fix the supernode partition in H. */
    
    free (set);
    free (prvlf);
    free (level);
    free (weight);
    free (fdesc);
    free (nchild);
    free (prvnbr);
    free (fnz_hadj);

    free (first);
    free (firstset);
    free (weight_h);
    free (rowcnt_h);
    free (rowcnt);
    free (colcnt);
    
#if ( PRNTlevel==1 )
    printf(".. qrnzcnt() nlnz %d, nhnz %d, nlnz/nhnz %.2f\n", 
		*nlnz, nhnz, (float) *nlnz/nhnz);
#endif

#if ( DEBUGlevel>=2 )
    print_int_vec("part_super_h", neqns, part_super_h);
#endif    

    return 0;
    
} /* qrnzcnt_ */
void
psgstrf_relax_snode(
		    const int n, /* number of columns in the matrix */
		    //psgstrf_options_t *psgstrf_options,
			superlumt_options_t *psgstrf_options, //sj
		    pxgstrf_relax_t *pxgstrf_relax /* relaxed s-nodes */
		    )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Purpose
 * =======
 *   psgstrf_relax_snode() identifes the initial relaxed supernodes, 
 *   assuming that the matrix has been reordered according to the postorder
 *   of the etree.
 *
 */ 
    register int j, parent, rs;
    register int fcol;	 /* beginning of a snode */
    int *desc;  /* no of descendants of each etree node. */
    int *etree = psgstrf_options->etree; /* column elimination tree */
    int relax = psgstrf_options->relax; /* maximum no of columns allowed 
					   in a relaxed s-node */
    
    desc = intCalloc(n+1);

    /* Compute the number of descendants of each node in the etree */
    for (j = 0; j < n; j++) {
	parent = etree[j];
	desc[parent] += desc[j] + 1;
    }
    
    rs = 1;
    
    /* Identify the relaxed supernodes by postorder traversal of the etree. */
    for (j = 0; j < n; ) { 
     	parent = etree[j];
        fcol = j;
 	while ( parent != n && desc[parent] < relax ) {
	    j = parent;
	    parent = etree[j];
	}
	/* found a supernode with j being the last column. */
	pxgstrf_relax[rs].fcol = fcol;
	pxgstrf_relax[rs].size = j - fcol + 1;
#ifdef DOMAINS
	for (i = fcol; i <= j; ++i) in_domain[i] = RELAXED_SNODE;
#endif
	j++;    rs++;
	/* Search for a new leaf */
	while ( desc[j] != 0 && j < n ) j++;
    }

    pxgstrf_relax[rs].fcol = n;
    pxgstrf_relax[0].size = rs-1; /* number of relaxed supernodes */

#if (PRNTlevel==1)
    printf(".. No of relaxed s-nodes %d\n", pxgstrf_relax[0].size);
#endif
    
    SUPERLU_FREE (desc);

}