Ejemplo n.º 1
0
/*
 * Count the total number of nonzeros in factors L and U,  and in the 
 * symmetrically reduced L. 
 */
void
countnz(const int n, int *xprune, int *nnzL, int *nnzU, GlobalLU_t *Glu)
{
    register int nsuper, fsupc, i, j, nnzL0, jlen, irep;
    register int nnzsup = 0;
    register int *xsup, *xsup_end, *xlsub, *xlsub_end, *supno;
	
    xsup      = Glu->xsup;
    xsup_end  = Glu->xsup_end;
    xlsub     = Glu->xlsub;
    xlsub_end = Glu->xlsub_end;
    supno     = Glu->supno;
    *nnzU     = Glu->nextu;
    nnzL0     = 0;
    *nnzL     = 0;
    nsuper    = supno[n];

    if ( n <= 0 ) return;

    /* 
     * For each supernode ...
     */
    for (i = 0; i <= nsuper; i++) {
	fsupc = xsup[i];
	jlen = xlsub_end[fsupc] - xlsub[fsupc];
	nnzsup += jlen * (xsup_end[i] - fsupc);
			  
	for (j = fsupc; j < xsup_end[i]; j++) {
	    *nnzL += jlen;
	    *nnzU += j - fsupc + 1;
	    jlen--;
	}
	irep = SUPER_REP(i);
	if ( SINGLETON(supno[irep]) )
	    nnzL0 += xprune[irep] - xlsub_end[irep];
	else 
	    nnzL0 += xprune[irep] - xlsub[irep];
    }

#if ( PRNTlevel==1 )
    printf(".. # supernodes = %d\n", nsuper+1);
    printf(".. # edges in symm-reduced L = %d\n", nnzL0);
    if ( Glu->dynamic_snode_bound )
      printf(".. # NZ in LUSUP %d, dynamic bound %d, utilization %.2f\n",
	     nnzsup, Glu->nextlu, (float)nnzsup/Glu->nextlu);
    else
      printf(".. # NNZ in LUSUP %d, static bound %d, utilization %.2f\n",
	     nnzsup, Glu->nzlumax, (float)nnzsup/Glu->nzlumax);
#endif
}
Ejemplo n.º 2
0
void
psgstrf_panel_bmod(
		   const int  pnum, /* process number */
		   const int  m,    /* number of rows in the matrix */
		   const int  w,    /* current panel width */
		   const int  jcol, /* leading column of the current panel */
		   const int  bcol, /* first column of the farthest busy snode*/
		   int   *inv_perm_r,/* in; inverse of the row pivoting */
		   int   *etree,     /* in */
		   int   *nseg,      /* modified */
		   int   *segrep,    /* modified */
		   int   *repfnz,    /* modified, size n-by-w */
		   int   *panel_lsub,/* modified */
		   int   *w_lsub_end,/* modified */
		   int   *spa_marker,/* modified; size n-by-w */
		   float *dense, /* modified, size n-by-w */
		   float *tempv, /* working array - zeros on input/output */
		   pxgstrf_shared_t *pxgstrf_shared /* modified */
		   )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Purpose
 * =======
 *
 *    Performs numeric block updates (sup-panel) in topological order.
 *    It features combined 1D and 2D blocking of the source updating s-node.
 *    It consists of two steps:
 *       (1) accumulates updates from "done" s-nodes.
 *       (2) accumulates updates from "busy" s-nodes.
 *
 *    Before entering this routine, the nonzeros of the original A in
 *    this panel were already copied into the SPA dense[n,w].
 *
 * Updated/Output arguments
 * ========================
 *    L[*,j:j+w-1] and U[*,j:j+w-1] are returned collectively in the
 *    m-by-w vector dense[*,w]. The locations of nonzeros in L[*,j:j+w-1]
 *    are given by lsub[*] and U[*,j:j+w-1] by (nseg,segrep,repfnz).
 *
 */
    GlobalLU_t *Glu = pxgstrf_shared->Glu;  /* modified */
    Gstat_t *Gstat = pxgstrf_shared->Gstat; /* modified */
    register int j, k, ksub;
    register int fsupc, nsupc, nsupr, nrow;
    register int kcol, krep, ksupno, dadsupno;
    register int jj;	      /* index through each column in the panel */
    int          *xsup, *xsup_end, *supno;
    int          *lsub, *xlsub, *xlsub_end;
    int          *repfnz_col; /* repfnz[] for a column in the panel */
    float       *dense_col;  /* dense[] for a column in the panel */
    int          *col_marker; /* each column of the spa_marker[*,w] */
    int          *col_lsub;   /* each column of the panel_lsub[*,w] */
    static   int first = 1, rowblk, colblk;

#ifdef PROFILE
    double   t1, t2; /* temporary time */
#endif
    
#ifdef PREDICT_OPT    
    register float pmod, max_child_eft = 0, sum_pmod = 0, min_desc_eft = 0;
    register float pmod_eft;
    register int   kid, ndesc = 0;
#endif
    
#if ( DEBUGlevel>=2 )
    int dbg_addr = 0*m;
#endif
    
    if ( first ) {
	rowblk   = sp_ienv(4);
	colblk   = sp_ienv(5);
	first = 0;
    }
    
    xsup      = Glu->xsup;
    xsup_end  = Glu->xsup_end;
    supno     = Glu->supno;
    lsub      = Glu->lsub;
    xlsub     = Glu->xlsub;
    xlsub_end = Glu->xlsub_end;

#if ( DEBUGlevel>=2 )
    /*if (jcol >= LOCOL && jcol <= HICOL)
    check_panel_dfs_list(pnum, "begin", jcol, *nseg, segrep);*/
if (jcol == BADPAN)
    printf("(%d) Enter psgstrf_panel_bmod() jcol %d,BADCOL %d,dense_col[%d] %.10f\n",
	   pnum, jcol, BADCOL, BADROW, dense[dbg_addr+BADROW]);
#endif    

    /* --------------------------------------------------------------------
       For each non-busy supernode segment of U[*,jcol] in topological order,
       perform sup-panel update.
       -------------------------------------------------------------------- */
    k = *nseg - 1;
    for (ksub = 0; ksub < *nseg; ++ksub) {
	/*
	 * krep = representative of current k-th supernode
	 * fsupc = first supernodal column
	 * nsupc = no of columns in a supernode
	 * nsupr = no of rows in a supernode
	 */
        krep = segrep[k--];
	fsupc = xsup[supno[krep]];
	nsupc = krep - fsupc + 1;
	nsupr = xlsub_end[fsupc] - xlsub[fsupc];
	nrow = nsupr - nsupc;

#ifdef PREDICT_OPT
	pmod = Gstat->procstat[pnum].fcops;
#endif
	    
	if ( nsupc >= colblk && nrow >= rowblk ) {
	    /* 2-D block update */
#ifdef GEMV2
	    psgstrf_bmod2D_mv2(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, 
			       nrow, repfnz, panel_lsub, w_lsub_end, 
			       spa_marker, dense, tempv, Glu, Gstat);
#else
	    psgstrf_bmod2D(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow,
			   repfnz, panel_lsub, w_lsub_end, spa_marker,
			   dense, tempv, Glu, Gstat);
#endif
	} else {
	    /* 1-D block update */
#ifdef GEMV2
	    psgstrf_bmod1D_mv2(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr,
			       nrow, repfnz, panel_lsub, w_lsub_end, 
			       spa_marker, dense, tempv, Glu, Gstat);
#else
	    psgstrf_bmod1D(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow,
			   repfnz, panel_lsub, w_lsub_end, spa_marker,
			   dense, tempv, Glu, Gstat);
#endif
	}
	
#ifdef PREDICT_OPT
	pmod = Gstat->procstat[pnum].fcops - pmod;
	kid = (Glu->pan_status[krep].size > 0) ?
	    krep : (krep + Glu->pan_status[krep].size);
	desc_eft[ndesc].eft = cp_panel[kid].est + cp_panel[kid].pdiv;
	desc_eft[ndesc++].pmod = pmod;
#endif
	
#if ( DEBUGlevel>=2 )
if (jcol == BADPAN)
    printf("(%d) non-busy update: krep %d, repfnz %d, dense_col[%d] %.10e\n",
	   pnum, krep, repfnz[dbg_addr+krep], BADROW, dense[dbg_addr+BADROW]);
#endif

    } /* for each updating supernode ... */
    
#if ( DEBUGlevel>=2 )
if (jcol == BADPAN)
    printf("(%d) After non-busy update: dense_col[%d] %.10e\n",
	   pnum, BADROW, dense[dbg_addr+BADROW]);
#endif
    
    /* ---------------------------------------------------------------------
     * Now wait for the "busy" s-nodes to become "done" -- this amounts to
     * climbing up the e-tree along the path starting from "bcol".
     * Several points are worth noting:
     *
     *  (1) There are two possible relations between supernodes and panels
     *      along the path of the e-tree:
     *      o |s-node| < |panel|
     *        want to climb up the e-tree one column at a time in order
     *        to achieve more concurrency
     *      o |s-node| > |panel|
     *        want to climb up the e-tree one panel at a time; this
     *        processor is stalled anyway while waiting for the panel.
     *
     *  (2) Need to accommodate new fills, append them in panel_lsub[*,w].
     *      o use an n-by-w marker array, as part of the SPA (not scalable!)
     *
     *  (3) Symbolically, need to find out repfnz[S, w], for each (busy)
     *      supernode S.
     *      o use dense[inv_perm_r[kcol]], filter all zeros
     *      o detect the first nonzero in each segment
     *        (at this moment, the boundary of the busy supernode/segment
     *         S has already been identified)
     *
     * --------------------------------------------------------------------- */

    kcol = bcol;
    while ( kcol < jcol ) {
        /* Pointers to each column of the w-wide arrays. */
	repfnz_col = repfnz;
	dense_col = dense;
	col_marker = spa_marker;
	col_lsub = panel_lsub;

	/* Wait for the supernode, and collect wait-time statistics. */
	if ( pxgstrf_shared->spin_locks[kcol] ) {
#ifdef PROFILE
	    TIC(t1);
#endif
	    await( &pxgstrf_shared->spin_locks[kcol] );

#ifdef PROFILE
	    TOC(t2, t1);
	    Gstat->panstat[jcol].pipewaits++;
	    Gstat->panstat[jcol].spintime += t2;
	    Gstat->procstat[pnum].spintime += t2;
#ifdef DOPRINT
	    PRINT_SPIN_TIME(1);
#endif
#endif		
	}
	
        /* Find leading column "fsupc" in the supernode that
           contains column "kcol" */
	ksupno = supno[kcol];
	fsupc = kcol;

#if ( DEBUGlevel>=2 )
	/*if (jcol >= LOCOL && jcol <= HICOL)    */
  if ( jcol==BADCOL )
    printf("(%d) psgstrf_panel_bmod[1] kcol %d, ksupno %d, fsupc %d\n",
	   pnum, kcol, ksupno, fsupc);
#endif
	
	/* Wait for the whole supernode to become "done" --
	   climb up e-tree one column at a time */
	do {
	    krep = SUPER_REP( ksupno );
	    kcol = etree[kcol];
	    if ( kcol >= jcol ) break;
	    if ( pxgstrf_shared->spin_locks[kcol] ) {
#ifdef PROFILE
		TIC(t1);
#endif
		await ( &pxgstrf_shared->spin_locks[kcol] );

#ifdef PROFILE
		TOC(t2, t1);
		Gstat->panstat[jcol].pipewaits++;
		Gstat->panstat[jcol].spintime += t2;
		Gstat->procstat[pnum].spintime += t2;
#ifdef DOPRINT
		PRINT_SPIN_TIME(2);
#endif
#endif		
	    }

	    dadsupno = supno[kcol];

#if ( DEBUGlevel>=2 )
	    /*if (jcol >= LOCOL && jcol <= HICOL)*/
if ( jcol==BADCOL )
    printf("(%d) psgstrf_panel_bmod[2] krep %d, dad=kcol %d, dadsupno %d\n",
	   pnum, krep, kcol, dadsupno);
#endif	    

	} while ( dadsupno == ksupno );

	/* Append the new segment into segrep[*]. After column_bmod(),
	   copy_to_ucol() will use them. */
	segrep[*nseg] = krep;
        ++(*nseg);
        
	/* Determine repfnz[krep, w] for each column in the panel */
	for (jj = jcol; jj < jcol + w; ++jj, dense_col += m, 
	       repfnz_col += m, col_marker += m, col_lsub += m) {
	    /*
	     * Note: relaxed supernode may not form a path on the e-tree,
	     *       but its column numbers are contiguous.
	     */
#ifdef SCATTER_FOUND
 	    for (kcol = fsupc; kcol <= krep; ++kcol) {
		if ( col_marker[inv_perm_r[kcol]] == jj ) {
		    repfnz_col[krep] = kcol;

 		    /* Append new fills in panel_lsub[*,jj]. */
		    j = w_lsub_end[jj - jcol];
/*#pragma ivdep*/
		    for (k = xlsub[krep]; k < xlsub_end[krep]; ++k) {
			ksub = lsub[k];
			if ( col_marker[ksub] != jj ) {
			    col_marker[ksub] = jj;
			    col_lsub[j++] = ksub;
			}
		    }
		    w_lsub_end[jj - jcol] = j;

		    break; /* found the leading nonzero in the segment */
		}
	    }

#else
	    for (kcol = fsupc; kcol <= krep; ++kcol) {
                if ( dense_col[inv_perm_r[kcol]] != 0.0 ) {
		    repfnz_col[krep] = kcol;
		    break; /* Found the leading nonzero in the U-segment */
		}
	    }

	    /* In this case, we always treat the L-subscripts of the 
	       busy s-node [kcol : krep] as the new fills, even if the
	       corresponding U-segment may be all zero. */

	    /* Append new fills in panel_lsub[*,jj]. */
	    j = w_lsub_end[jj - jcol];
/*#pragma ivdep*/
	    for (k = xlsub[krep]; k < xlsub_end[krep]; ++k) {
	        ksub = lsub[k];
		if ( col_marker[ksub] != jj ) {
		    col_marker[ksub] = jj;
		    col_lsub[j++] = ksub;
		}
	    }
	    w_lsub_end[jj - jcol] = j;
#endif

#if ( DEBUGlevel>=2 )
if (jj == BADCOL) {
printf("(%d) psgstrf_panel_bmod[fills]: jj %d, repfnz_col[%d] %d, inv_pr[%d] %d\n",
	   pnum, jj, krep, repfnz_col[krep], fsupc, inv_perm_r[fsupc]);
printf("(%d) psgstrf_panel_bmod[fills] xlsub %d, xlsub_end %d, #lsub[%d] %d\n",
       pnum,xlsub[krep],xlsub_end[krep],krep, xlsub_end[krep]-xlsub[krep]);
}
#endif	   
	} /* for jj ... */

#ifdef PREDICT_OPT
	pmod = Gstat->procstat[pnum].fcops;
#endif
	
	/* Perform sup-panel updates - use combined 1D + 2D updates. */
	nsupc = krep - fsupc + 1;
	nsupr = xlsub_end[fsupc] - xlsub[fsupc];
	nrow = nsupr - nsupc;
	if ( nsupc >= colblk && nrow >= rowblk ) {
	    /* 2-D block update */
#ifdef GEMV2
	    psgstrf_bmod2D_mv2(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr,
			       nrow, repfnz, panel_lsub, w_lsub_end, 
			       spa_marker, dense, tempv, Glu, Gstat);
#else
	    psgstrf_bmod2D(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow,
			   repfnz, panel_lsub, w_lsub_end, spa_marker,
			   dense, tempv, Glu, Gstat);
#endif
	} else {
	    /* 1-D block update */
#ifdef GEMV2
	    psgstrf_bmod1D_mv2(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr,
			       nrow, repfnz, panel_lsub, w_lsub_end, 
			       spa_marker, dense, tempv, Glu, Gstat);
#else
	    psgstrf_bmod1D(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow,
			   repfnz, panel_lsub, w_lsub_end, spa_marker,
			   dense, tempv, Glu, Gstat);
#endif
	}

#ifdef PREDICT_OPT
	pmod = Gstat->procstat[pnum].fcops - pmod;
	kid = (pxgstrf_shared->pan_status[krep].size > 0) ?
	       krep : (krep + pxgstrf_shared->pan_status[krep].size);
	desc_eft[ndesc].eft = cp_panel[kid].est + cp_panel[kid].pdiv;
	desc_eft[ndesc++].pmod = pmod;
#endif
	
#if ( DEBUGlevel>=2 )
if (jcol == BADPAN)
    printf("(%d) After busy update: dense_col[%d] %.10f\n",
	   pnum, BADROW, dense[dbg_addr+BADROW]);
#endif
	
	/* Go to the parent of "krep" */
	kcol = etree[krep];
	
    } /* while kcol < jcol ... */
    
#if ( DEBUGlevel>=2 )
    /*if (jcol >= LOCOL && jcol <= HICOL)*/
if ( jcol==BADCOL )
    check_panel_dfs_list(pnum, "after-busy", jcol, *nseg, segrep);
#endif

#ifdef PREDICT_OPT
    qsort(desc_eft, ndesc, sizeof(desc_eft_t), (int(*)())numcomp);
    pmod_eft = 0;
    for (j = 0; j < ndesc; ++j) {
	pmod_eft = SUPERLU_MAX( pmod_eft, desc_eft[j].eft ) + desc_eft[j].pmod;
    }

    if ( ndesc == 0 ) {
	/* No modifications from descendants */
	pmod_eft = 0;
	for (j = cp_firstkid[jcol]; j != EMPTY; j = cp_nextkid[j]) {
	    kid = (pxgstrf_shared->pan_status[j].size > 0) ? 
			j : (j + pxgstrf_shared->pan_status[j].size);
	    pmod_eft = SUPERLU_MAX( pmod_eft,
			   	cp_panel[kid].est + cp_panel[kid].pdiv );
	}
    }
    
    cp_panel[jcol].est = pmod_eft;
    
#endif

}
Ejemplo n.º 3
0
int
pzgstrf_column_dfs(
		   const int  pnum,    /* process number */
		   const int  m,       /* number of rows in the matrix */
		   const int  jcol,    /* current column in the panel */
		   const int  fstcol,  /* first column in the panel */
		   int *perm_r,   /* row pivotings that are done so far */
		   int *ispruned, /* in */
		   int *col_lsub, /* the RHS vector to start the dfs */
		   int lsub_end,  /* size of col_lsub[] */
		   int *super_bnd,/* supernode partition by upper bound */
		   int *nseg,     /* modified - with new segments appended */
		   int *segrep,   /* modified - with new segments appended */
		   int *repfnz,   /* modified */
		   int *xprune,   /* modified */
		   int *marker2,  /* modified */
		   int *parent,   /* working array */
		   int *xplore,   /* working array */
		   pxgstrf_shared_t *pxgstrf_shared /* modified */
		   )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Purpose
 * =======
 *   pzgstrf_column_dfs() performs a symbolic factorization on column jcol, 
 *   and detects whether column jcol belongs in the same supernode as jcol-1.
 *
 * Local parameters
 * ================
 *   A supernode representative is the last column of a supernode.
 *   The nonzeros in U[*,j] are segments that end at supernodal
 *   representatives. The routine returns a list of such supernodal 
 *   representatives in topological order of the dfs that generates them.
 *   The location of the first nonzero in each such supernodal segment
 *   (supernodal entry location) is also returned.
 *
 *   nseg: no of segments in current U[*,j]
 *   samesuper: samesuper=NO if column j does not belong in the same
 *	        supernode as j-1. Otherwise, samesuper=YES.
 *
 *   marker2: A-row --> A-row/col (0/1)
 *   repfnz: SuperA-col --> PA-row
 *   parent: SuperA-col --> SuperA-col
 *   xplore: SuperA-col --> index to L-structure
 *
 * Return value
 * ============
 *     0  success;
 *   > 0  number of bytes allocated when run out of space.
 *
 */
    GlobalLU_t *Glu = pxgstrf_shared->Glu; /* modified */
    Gstat_t *Gstat = pxgstrf_shared->Gstat; /* modified */
    register int jcolm1, jcolm1size, nextl, ifrom;
    register int k, krep, krow, kperm, samesuper, nsuper;
    register int no_lsub;
    int	    fsupc;		/* first column in a supernode */
    int     myfnz;		/* first nonz column in a U-segment */
    int	    chperm, chmark, chrep, kchild;
    int     xdfs, maxdfs, kpar;
    int     ito;	        /* Used to compress row subscripts */
    int     mem_error;
    int     *xsup, *xsup_end, *supno, *lsub, *xlsub, *xlsub_end;
    static  int  first = 1, maxsuper;

    if ( first ) {
	maxsuper = sp_ienv(3);
	first = 0;
    }

    /* Initialize pointers */
    xsup      = Glu->xsup;
    xsup_end  = Glu->xsup_end;
    supno     = Glu->supno;
    lsub      = Glu->lsub;
    xlsub     = Glu->xlsub;
    xlsub_end = Glu->xlsub_end;
    jcolm1    = jcol - 1;
    nextl     = lsub_end;
    no_lsub   = 0;
    samesuper = YES;

    /* Test whether the row structure of column jcol is contained
       in that of column jcol-1. */
    for (k = 0; k < lsub_end; ++k) {
	krow = col_lsub[k];
	if ( perm_r[krow] == EMPTY ) { /* krow is in L */
	    ++no_lsub;
	    if (marker2[krow] != jcolm1) 
	        samesuper = NO; /* row subset test */
	    marker2[krow] = jcol;
	}
    }

#if ( DEBUGlevel>=2 )
  if (jcol == BADCOL)
    printf("(%d) pzgstrf_column_dfs[1] %d, fstcol %d, lsub_end %d, no_lsub %d, samesuper? %d\n",
	   pnum, jcol, fstcol, lsub_end, no_lsub, samesuper);
#endif
    
    /*
     * For each nonzero in A[fstcol:n,jcol] perform DFS ...
     */
    for (k = 0; k < lsub_end; ++k) {
	krow = col_lsub[k];
	
	/* if krow was visited before, go to the next nonzero */
	if ( marker2[krow] == jcol ) continue;
	marker2[krow] = jcol;
	kperm = perm_r[krow];
#if ( DEBUGlevel>=3 )
  if (jcol == BADCOL)
    printf("(%d) pzgstrf_column_dfs[inner]: perm_r[krow=%d] %d\n", pnum, krow, kperm);
#endif
	
	/* Ignore the nonzeros in U corresponding to the busy columns
	   during the panel DFS. */
	/*if ( lbusy[kperm] != fstcol ) {  xiaoye? */
	if ( kperm >= fstcol ) {
	    /*
	     * krow is in U: if its supernode representative krep
	     * has been explored, update repfnz[*].
	     */
	    krep = SUPER_REP(supno[kperm]);
	    myfnz = repfnz[krep];
	    
#if ( DEBUGlevel>=3 )
  if (jcol == BADCOL)
    printf("(%d) pzgstrf_column_dfs[inner-U]: krep %d, myfnz %d, kperm %d\n",
	   pnum, krep, myfnz, kperm);
#endif
	    if ( myfnz != EMPTY ) {	/* Visited before */
		if ( myfnz > kperm ) repfnz[krep] = kperm;
		/* continue; */
	    } else {
		/* Otherwise, perform dfs starting at krep */
		parent[krep] = EMPTY;
		repfnz[krep] = kperm;
		if ( ispruned[krep] ) {
		    if ( SINGLETON( supno[krep] ) )
			xdfs = xlsub_end[krep];
		    else xdfs = xlsub[krep];
		    maxdfs = xprune[krep];
#ifdef PROFILE
		    Gstat->procstat[pnum].pruned++;
#endif		    
		} else {
		    fsupc = SUPER_FSUPC( supno[krep] );
		    xdfs = xlsub[fsupc] + krep-fsupc+1;
		    maxdfs = xlsub_end[fsupc];
#ifdef PROFILE
		    Gstat->procstat[pnum].unpruned++;
#endif		    
		}
		
		do {
		    /* 
		     * For each unmarked kchild of krep ...
		     */
		    while ( xdfs < maxdfs ) {
			
			kchild = lsub[xdfs];
			xdfs++;
			chmark = marker2[kchild];
			
			if ( chmark != jcol ) { /* Not reached yet */
			    marker2[kchild] = jcol;
			    chperm = perm_r[kchild];
			    
			    if ( chperm == EMPTY ) {
				/* kchild is in L: place it in L[*,k]. */
				++no_lsub;
				col_lsub[nextl++] = kchild;
				if (chmark != jcolm1) samesuper = NO;
			    } else {
				/* kchild is in U: chrep = its supernode
				 * representative. If its rep has 
				 * been explored, update its repfnz[*].
				 */
				chrep = SUPER_REP( supno[chperm] );
				myfnz = repfnz[chrep];
				if ( myfnz != EMPTY ) { /* Visited before */
				    if ( myfnz > chperm )
					repfnz[chrep] = chperm;
				} else {
				    /* Continue dfs at super-rep of kchild */
				    xplore[krep] = xdfs;	
				    xplore[m + krep] = maxdfs;	
				    parent[chrep] = krep;
				    krep = chrep; /* Go deeper down G(L^t) */
				    repfnz[krep] = chperm;
				    if ( ispruned[krep] ) {
					if ( SINGLETON( supno[krep] ) )
					    xdfs = xlsub_end[krep];
					else xdfs = xlsub[krep];
					maxdfs = xprune[krep];
#ifdef PROFILE
					Gstat->procstat[pnum].pruned++;
#endif		    
				    } else {
					fsupc = SUPER_FSUPC( supno[krep] );
					xdfs = xlsub[fsupc] + krep-fsupc+1;
					maxdfs = xlsub_end[fsupc];
#ifdef PROFILE
					Gstat->procstat[pnum].unpruned++;
#endif		    
				    }
				}
			    } /* else */
			} /* if */
		    } /* while */
		    
		    /* krow has no more unexplored nbrs:
		     *    place supernode-rep krep in postorder DFS,
		     *    backtrack dfs to its parent.
		     */
		    segrep[*nseg] = krep;
		    ++(*nseg);
#if ( DEBUGlevel>=3 )
  if (jcol == BADCOL)
    printf("(%d) pzgstrf_column_dfs[inner-dfs] new nseg %d, repfnz[krep=%d] %d\n",
	   pnum, *nseg, krep, repfnz[krep]);
#endif
		    kpar = parent[krep]; /* Pop from stack, mimic recursion */
		    if ( kpar == EMPTY ) break; /* dfs done */
		    krep = kpar;
		    xdfs = xplore[krep];
		    maxdfs = xplore[m + krep];
		} while ( kpar != EMPTY ); /* Do ... until empty stack */
		
	    } /* else myfnz ... */
	} /* if kperm >= fstcol ... */
    } /* for each nonzero ... */
	
#if ( DEBUGlevel>=3 )
  if (jcol == BADCOL)
    printf("(%d) pzgstrf_column_dfs[2]: nextl %d, samesuper? %d\n",
	   pnum, nextl, samesuper);
#endif

    /* assert(no_lsub == nextl - no_usub);*/

    /* ---------------------------------------------------------
       Check to see if j belongs in the same supernode as j-1.
       --------------------------------------------------------- */
    
    /* Does it matter if jcol == 0? - xiaoye */
    if ( samesuper == YES ) {
	nsuper = supno[jcolm1];
	jcolm1size = xlsub_end[jcolm1] - xlsub[jcolm1];
#if ( DEBUGlevel>=3 )
  if (jcol == BADCOL)
    printf("(%d) pzgstrf_column_dfs[YES] jcol-1 %d, jcolm1size %d, supno[%d] %d\n",
	   pnum, jcolm1, jcolm1size, jcolm1, nsuper);
#endif	
	if ( no_lsub != jcolm1size-1 )
	    samesuper = NO;        /* Enforce T2 supernode */
	else {
	    /* Make sure the number of columns in a supernode does not
	       exceed threshold. */
	    fsupc = xsup[nsuper];
	    if ( jcol - fsupc >= maxsuper )
		samesuper = NO;
	    else {
		/* start of a supernode in H (coarser partition) */
		if ( super_bnd[jcol] != 0 ) samesuper = NO;
	    }
	}
    }
    
    /* If jcol starts a new supernode, allocate storage for 
     * the subscript set of both first and last column of
     * a previous supernode. (first for num values, last for pruning)
     */
    if ( samesuper == NO ) { /* starts a new supernode */
	nsuper = NewNsuper(pnum, pxgstrf_shared, &Glu->nsuper);
	xsup[nsuper] = jcol;
	
	/* Copy column jcol; also reserve space to store pruned graph */
	if ((mem_error = Glu_alloc(pnum, jcol, 2*no_lsub, LSUB, &ito, 
				  pxgstrf_shared)))
	    return mem_error;
	xlsub[jcol] = ito;
	lsub = Glu->lsub;
	for (ifrom = 0; ifrom < nextl; ++ifrom) {
	    krow = col_lsub[ifrom];
	    if ( perm_r[krow] == EMPTY ) /* Filter U-subscript */
		lsub[ito++] = krow;
	}
	k = ito;
	xlsub_end[jcol] = k;
	
	/* Make a copy in case it is a singleton supernode */
	for (ifrom = xlsub[jcol]; ifrom < ito; ++ifrom)
	    lsub[k++] = lsub[ifrom];
	
    } else { /* Supernode of size > 1: overwrite column jcol-1 */
	k = xlsub_end[fsupc];
	xlsub[jcol] = k;
	xprune[fsupc] = k;
	for (ifrom = 0; ifrom < nextl; ++ifrom) {
	    krow = col_lsub[ifrom];
	    if ( perm_r[krow] == EMPTY ) /* Filter U-subscript */
		lsub[k++] = krow;
	}
	xlsub_end[jcol] = k;
    }

#if ( DEBUGlevel>=3 )
  if (jcol == BADCOL) {
    printf("(%d) pzgstrf_column_dfs[3]: %d in prev s-node %d? %d\n",
	   pnum, jcol, fsupc, samesuper);
    PrintInt10("lsub", xlsub_end[jcol]-xlsub[jcol], &lsub[xlsub[jcol]]);
  }
#endif
    
    /* Tidy up the pointers before exit */
    xprune[jcol] = k;     /* upper bound for pruning */
    supno[jcol] = nsuper;
    xsup_end[nsuper] = jcol + 1;
    
    return 0;
}
Ejemplo n.º 4
0
void
pxgstrf_super_bnd_dfs(
		      const int  pnum, /* process number */
		      const int  m,    /* number of rows in the matrix */
		      const int  n,    /* number of columns in the matrix */
		      const int  jcol, /* first column of the H-supernode */
		      const int  w,    /* size of the H-supernode */
		      SuperMatrix *A,  /* original matrix */
		      int        *perm_r,   /* in */
		      int        *iperm_r,  /* in; inverse of perm_r */
		      int        *xprune,   /* in */
		      int        *ispruned, /* in */
		      int        *marker,   /* modified */
		      int        *parent,   /* working array */
		      int        *xplore,   /* working array */
		      pxgstrf_shared_t *pxgstrf_shared /* modified */	      
		      )
{
/*
 * -- SuperLU MT routine (version 1.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * August 15, 1997
 *
 * Purpose
 * =======
 *
 * Performs a symbolic structure prediction on a supernode in the Householder
 * matrix H, with jcol being the leading column.
 *
 */
    GlobalLU_t *Glu = pxgstrf_shared->Glu; /* modified */	      
    register int krep, chperm, chrep, kchild;
    register int invp_rep; /* "krep" numbered in the original A */
    register int krow, kperm, xdfs, maxdfs, kpar;
    register int fsupc, k, jj, found;
    register int nrow;  /* union of the nonzero rows in a supernode */
    NCPformat    *Astore;
    int          *asub, *xa_begin, *xa_end;
    int          *xsup, *xsup_end, *supno, *lsub, *xlsub, *xlsub_end;

    /* Initialize pointers */
    xsup       = Glu->xsup;
    xsup_end   = Glu->xsup_end;
    supno      = Glu->supno;
    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;
    xlsub_end  = Glu->xlsub_end;
    Astore   = A->Store;
    asub     = Astore->rowind;
    xa_begin = Astore->colbeg;
    xa_end   = Astore->colend;
    nrow = 0;
    found = n + jcol;

    /* For each column in the H-supernode */
    for (jj = jcol; jj < jcol + w; ++jj) {

      /* For each nonz in A[*,jj] do dfs */
      for (k = xa_begin[jj]; k < xa_end[jj]; ++k) {
	  krow = asub[k];
	  
	  /* krow was visited before, go to the next nonzero. */
	  if ( marker[krow] == found ) continue;
	  
  	  /* For each unmarked nbr krow of jj ...   */
	  kperm = perm_r[krow];
	  
	  if ( kperm == EMPTY ) { /* krow is in L */
	      marker[krow] = found;
	      ++nrow;
	  } else {
	      /* krow is in U: if its supernode-rep krep has been explored,
		               skip the search.       */
	      krep = SUPER_REP( supno[kperm] );
	      invp_rep = iperm_r[krep];
		  
	      /* Perform dfs starting at krep */
              if ( marker[invp_rep] != found ) {
		  marker[invp_rep] = found;
		  parent[krep] = EMPTY;
		  if ( ispruned[krep] ) {
		      if ( SINGLETON( supno[krep] ) )
			  xdfs = xlsub_end[krep];
		      else xdfs = xlsub[krep];
		      maxdfs = xprune[krep];
		  } else {
		      fsupc = SUPER_FSUPC( supno[krep] );
		      xdfs = xlsub[fsupc] + krep-fsupc+1;
		      maxdfs = xlsub_end[fsupc];
		  }

		  do {
		      /* For each unmarked kchild of krep ... */
		      while ( xdfs < maxdfs ) {
			  kchild = lsub[xdfs];
			  xdfs++;
			  if (marker[kchild] != found) { /* Not reached yet */
			      chperm = perm_r[kchild];
			      
			      if ( chperm == EMPTY ) { /* kchild is in L */
				  marker[kchild] = found;
				  ++nrow;
			      } else {
				  /* kchild is in U: 
				   *   chrep = its supernode-rep. If its rep
				   *   has been explored, skip the search.
				   */
				  chrep = SUPER_REP( supno[chperm] );
				  invp_rep = iperm_r[chrep];
				  
				  /* Continue dfs at snode-rep of kchild */
				  if ( marker[invp_rep] != found ) {
				      marker[invp_rep] = found;
				      xplore[krep] = xdfs;
				      xplore[m + krep] = maxdfs;
				      parent[chrep] = krep;
				      krep = chrep;/* Go deeper down G(L^t) */
				      xdfs = xlsub[krep];     
				      maxdfs = xprune[krep];
				      if ( ispruned[krep] ) {
					  if ( SINGLETON( supno[krep] ) )
					      xdfs = xlsub_end[krep];
					  else xdfs = xlsub[krep];
					  maxdfs = xprune[krep];
				      } else {
					  fsupc = SUPER_FSUPC(supno[krep]);
					  xdfs = xlsub[fsupc] + krep-fsupc+1;
					  maxdfs = xlsub_end[fsupc];
				      }
				  } /* if */
			      } /* else */
			  } /* if... */
		      } /* while xdfs < maxdfs */
		      
		      /* krow has no more unexplored nbrs:
		       *    Place snode-rep krep in postorder dfs, if this 
		       *    segment is seen for the first time. Note that
		       *    the "repfnz[krep]" may change later.
		       *    Backtrack dfs to its parent.
		       */
		      kpar = parent[krep]; /* Pop stack, mimic recursion */
		      if ( kpar == EMPTY ) break; /* dfs done */
		      krep = kpar;
		      xdfs = xplore[krep];
		      maxdfs = xplore[m+krep];
		  } while ( kpar != EMPTY ); /* do-while - until empty stack */
	      } /* if */
	  } /* else */
      } /* for each nonz in A[*,jj] */
    } /* for jj ... */

    DynamicSetMap(pnum, jcol, nrow * w, pxgstrf_shared);
    
/*    for (i = 1; i < w; ++i) Glu->map_in_sup[jcol + i] = -i;*/
    
#if ( DEBUGlevel>=1 )
    printf("(%d) pxgstrf_super_bnd_dfs(): jcol= %d, w= %d, nrow= %d\n",
	   pnum, jcol, w, nrow);
#endif
}
Ejemplo n.º 5
0
void
pdgstrf_panel_dfs(
		  const int  pnum,  /* process number */
		  const int  m,     /* number of rows in the matrix */
		  const int  w,     /* current panel width */
		  const int  jcol,  /* leading column of the current panel */
		  SuperMatrix *A,   /* original matrix */
		  int *perm_r, /* row pivotings that are done so far */
		  int *xprune, /* in */
		  int *ispruned,   /* in */
		  int *lbusy,      /* in; size n */
		  int *nseg,	   /* out */
		  int *panel_lsub, /* out */
		  int *w_lsub_end, /* out; values irrelevant on entry */
		  int *segrep,     /* out */
		  int *repfnz,     /* out */
		  int *marker,     /* modified */
		  int *spa_marker, /* modified; size n-by-w */
		  int        *parent,     /* working array */
		  int *xplore,     /* working array */
		  double *dense,      /* out; size n-by-w */
		  GlobalLU_t *Glu         /* modified */
		  )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Purpose
 * =======
 *
 *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
 *   It skips all those busy descendants that are worked on by other
 *   processors along the e-tree path.
 *
 * Notes
 * =====
 *
 * (1) panel_lsub[0:w*n-1]: temporary for the nonzero row indices below 
 *     the panel diagonal, which will be used later in the inner LU
 *     factorization. For the busy columns, some of the nonzeros in U
 *     may be mistakenly placed in this list, because "perm_r" is
 *     still "empty". Later, during dcolumn_dfs in the inner factorization,
 *     we must filter those nonzeros belonging in U.
 *
 * (2) A supernode representative is the last column of a supernode.
 *     The nonzeros in U[*,j] are segments that end at supernodal
 *     representatives.
 *
 * (3) The routine returns one list of the supernodal representatives
 *     in topological order of the DFS that generates them. This list is
 *     a superset of the topological order of each individual column within
 *     the panel. The location of the first nonzero in each supernodal
 *     segment (supernodal entry location) is also returned. Each column
 *     has a separate list for this purpose.
 *
 * (4) Two marker arrays are used to facilitate dfs:
 *     marker[i] == jj, if i was visited during dfs of current column jj;
 *     marker1[i] == jcol, if i was visited by earlier columns in this panel;
 *
 * (5) The dfs stack is the combination of xplore[2*m] and parent[m]:
 *     xplore[k]     - pointer to k's adjancency list where search begins
 *     xplore[m + k] - pointer to k's adjancency list where search ends
 *
 * (6) Array mappings
 *     marker: A-row --> A-row/col (0/1)
 *     repfnz: SuperA-col --> PA-row
 *     parent: SuperA-col --> SuperA-col
 *     xplore: SuperA-col --> index to L-structure
 *
 */
    NCPformat *Astore;
    double    *a;
    int       *asub;
    int       *xa_begin, *xa_end;
    register int krep, chperm, chmark, chrep, kchild, myfnz;
    register int k, krow, kmark, kperm, fsupc;
    register int xdfs, maxdfs, kpar, jj, nextp;
    register int nextl_col;/* next open position in panel_lsub[*,jj] */
    int       *marker1;	   /* marker1[jj] == jcol if vertex jj was visited 
			      by a previous column within this panel.   */
    int       *repfnz_col; /* start of each column in the panel */
    double    *dense_col;  /* start of each column in the panel */
    int       *xsup, *xsup_end, *supno, *lsub, *xlsub, *xlsub_end;

    int       *col_marker; /* marker array of each column in the panel */

    /* Initialize pointers */
    xsup       = Glu->xsup;
    xsup_end   = Glu->xsup_end;
    supno      = Glu->supno;
    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;
    xlsub_end  = Glu->xlsub_end;
    Astore     = A->Store;
    a          = Astore->nzval;
    asub       = Astore->rowind;
    xa_begin   = Astore->colbeg;
    xa_end     = Astore->colend;
    marker1    = marker + m;
    repfnz_col = repfnz;
    dense_col  = dense;
    nextp      = 0;
    *nseg      = 0;

#if ( DEBUGlevel>=2 )
if (jcol == BADPAN)    
    printf("(%d) pdgstrf_panel_dfs[begin] jcol %d, w %d\n", pnum, jcol, w);
#endif
    
    /*
     * For each column in the panel ...
     */
    for (jj = jcol; jj < jcol + w; ++jj, nextp += m) {
	nextl_col = nextp;
	col_marker = &spa_marker[nextp];

	/*
	 * For each nonz in A[*,jj] perform dfs ...
	 */
	for (k = xa_begin[jj]; k < xa_end[jj]; ++k) {
	    krow = asub[k];
	    dense_col[krow] = a[k];
	    kmark = col_marker[krow];
	    
	    /* if krow was visited before, go to the next nonzero */
	    if ( kmark == jj ) continue;

	    /*
	     * For each unmarked nbr krow of jj ...
	     */
	    col_marker[krow] = jj;
	    kperm = perm_r[krow];
	    
	    if ( kperm == EMPTY ) {
		/* krow is in L: place it in structure of L[*,jj].
		 * NOTE: some entries in U may get here, because "perm_r"
		 *       is not yet available from a preceeding busy column.
		 */
		panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
	    } else {
		/* 
		 * krow is in U (0 <= kperm < jcol): if its supernode
		 * representative krep has been explored, update repfnz[*].
		 */
		if ( lbusy[kperm] == jcol ) { /* kperm is busy */
#if ( DEBUGlevel>=3 )
  if (jj == BADCOL)		    
    printf("(%d) pdgstrf_panel_dfs(%d) skip busy krow %d, kperm %d\n",
	   pnum, jj, krow, kperm);
#endif		    
		    continue;
		}

		/* Here, krep cannot possibly be "busy" */
		krep = SUPER_REP( supno[kperm] );
		myfnz = repfnz_col[krep];

#ifdef CHK_DFS
if (jj == BADCOL)		
    printf("(%d) pdgstrf_panel_dfs[1] %d, krep %d, fsupc %d, Pr[krow %d] %d, myfnz %d\n",
	   pnum, jj, krep, SUPER_FSUPC(supno[krep]), krow, kperm, myfnz);
#endif
		if ( myfnz != EMPTY ) {	/* Representative visited before */
		    if ( myfnz > kperm ) repfnz_col[krep] = kperm;
		    /* continue; */
		} else {
		    /* Otherwise, performs dfs starting from krep */
		    parent[krep] = EMPTY;
		    repfnz_col[krep] = kperm;
		    if ( ispruned[krep] ) {
			if ( SINGLETON( supno[krep] ) )
			    xdfs = xlsub_end[krep];
			else xdfs = xlsub[krep];
			maxdfs = xprune[krep];
#ifdef PROFILE
			/*Gstat->procstat[pnum].pruned++;*/
#endif		    
		    } else {
			fsupc = SUPER_FSUPC( supno[krep] );
			xdfs = xlsub[fsupc] + krep-fsupc+1;
			maxdfs = xlsub_end[fsupc];
#ifdef PROFILE
			/*Gstat->procstat[pnum].unpruned++;*/
#endif		    
		    }
#ifdef CHK_DFS
if (jj == BADCOL)		    
{
    register int i;
    printf("(%d) pdgstrf_panel_dfs[2] %d, ispruned[%d] %d, xdfs %d, maxdfs %d\n",
	   pnum, jj, krep, ispruned[krep], xdfs, maxdfs);
    /*for (i = xdfs; i < maxdfs; i++) printf("(%d) lsub-%d", pnum, lsub[i]);*/
    printf("\n");
}
#endif
		    do {
			while ( xdfs < maxdfs ) {
			    /* for each unmarked kchild of krep ... */
			    kchild = lsub[xdfs];
			    xdfs++;
			    chmark = col_marker[kchild];
			    
			    if ( chmark != jj ) { /* Not reached yet */
				col_marker[kchild] = jj;
				chperm = perm_r[kchild];
				
				if ( chperm == EMPTY ) {
				    /* kchild is in L: place it in L[*,j]. */
				    panel_lsub[nextl_col++] = kchild;
				} else {
				    /* kchild is in U (0 <= chperm < jcol): 
				     * chrep = its supernode-rep. If its rep
				     * has been explored, update its repfnz[*].
				     */

				    if ( lbusy[chperm] == jcol ) {
#ifdef DEBUG
if (jj == BADCOL)					
    printf("(%d) pdgstrf_panel_dfs(%d) skip busy kchild %d, chperm %d\n",
	   pnum, jj, kchild, chperm);
#endif		    
	                                     continue;
                                    }
				    
				    chrep = SUPER_REP( supno[chperm] );
				    myfnz = repfnz_col[chrep];
#ifdef DEBUG
if (jj == BADCOL)				    
    printf("(%d) pdgstrf_panel_dfs[3] %d, krep %d, Pr[kchild %d] %d, chrep %d, fsupc %d, myfnz %d\n",
	   pnum, jj, krep, kchild, chperm, chrep,
	   SUPER_FSUPC(supno[chrep]), myfnz);
#endif
				    if ( myfnz != EMPTY ) {/* Visited before */
					if ( myfnz > chperm )
					    repfnz_col[chrep] = chperm;
				    } else {
					/* Cont. dfs at snode-rep of kchild */
					xplore[krep] = xdfs;
					xplore[m + krep] = maxdfs;
					parent[chrep] = krep;
					krep = chrep; /* Go deeper down G(L) */
					repfnz_col[krep] = chperm;
					if ( ispruned[krep] ) {
					    if ( SINGLETON( supno[krep] ) )
						xdfs = xlsub_end[krep];
					    else xdfs = xlsub[krep];
					    maxdfs = xprune[krep];
#ifdef PROFILE
					    /*procstat[pnum].pruned++;*/
#endif		    
					} else {
					    fsupc = SUPER_FSUPC(supno[krep]);
					    xdfs = xlsub[fsupc] + krep-fsupc+1;
					    maxdfs = xlsub_end[fsupc];
#ifdef PROFILE
					    /*procstat[pnum].unpruned++;*/
#endif		    
					}
#ifdef CHK_DFS
if (jj == BADCOL)
    printf("(%d) pdgstrf_panel_dfs[4] %d, ispruned[%d] %d, xdfs %d, maxdfs %d\n",
	   pnum, jj, krep, ispruned[krep], xdfs, maxdfs);
#endif
					
				    } /* else */
				} /* else */
			      
			    } /* if... */
			    
			} /* while xdfs < maxdfs */
			
			/* krow has no more unexplored nbrs:
			 *    Place snode-rep krep in postorder DFS, if this 
			 *    segment is seen for the first time. (Note that
			 *    "repfnz[krep]" may change later.)
			 *    Backtrack dfs to its parent.
			 */
			if ( marker1[krep] != jcol ) {
			    segrep[*nseg] = krep;
			    ++(*nseg);
			    marker1[krep] = jcol;
#ifdef CHK_DFS
if (jj == BADCOL)			    
    printf("(%d) pdgstrf_panel_dfs(%d) repfnz[%d] %d added to top.list by jj %d\n",
	   pnum, jj, krep, repfnz_col[krep], jj);
#endif			    
			}
			
			kpar = parent[krep]; /* Pop stack, mimic recursion */
			if ( kpar == EMPTY ) break; /* dfs done */
			krep = kpar;
			xdfs = xplore[krep];
			maxdfs = xplore[m + krep];
			
#ifdef CHK_DFS
if (jj == BADCOL)			
{
    register int i;
    printf("(%d) pdgstrf_panel_dfs[5] pop stack: %d, krep %d, xdfs %d, maxdfs %d\n",
	   pnum, jj, krep, xdfs, maxdfs);
    /* for (i = xdfs; i < maxdfs; i++) printf("(%d) lsub-%d", pnum, lsub[i]);*/
    printf("\n");
}
#endif

		    } while ( kpar != EMPTY ); /* until empty stack */
		    
		} /* else: myfnz == EMPTY */
		
	    } /* else: kperm != EMPTY */
	    
	} /* for each nonzero in A[*,jj] */

#if ( DEBUGlevel>=3 )
if (jj == BADCOL) {
#define REPCOL 0    
    krep = REPCOL;
    printf("(%d) pdgstrf_panel_dfs(end) w_lsub_end[jj=%d] %d, repfnz_col[%d] %d\n",
	   pnum, jj, nextl_col - nextp, krep, repfnz_col[krep]);
    PrintInt10("lsub", nextl_col - nextp, &panel_lsub[nextp]);
}
#endif
	
	w_lsub_end[jj-jcol] = nextl_col - nextp;
	repfnz_col += m;
        dense_col += m;
	
    } /* for jj ... */

}