int
pcgstrf_column_bmod(
		    const int  pnum,   /* process number */
		    const int  jcol,   /* current column in the panel */
		    const int  fpanelc,/* first column in the panel */
		    const int  nseg,   /* number of s-nodes to update jcol */
		    int        *segrep,/* in */
		    int        *repfnz,/* in */
		    complex     *dense, /* modified */
		    complex     *tempv, /* working array */
		    pxgstrf_shared_t *pxgstrf_shared, /* modified */
		    Gstat_t *Gstat     /* modified */
		    )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Purpose:
 * ========
 *    Performs numeric block updates (sup-col) in topological order.
 *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
 *    Special processing on the supernodal portion of L\U[*,j].
 *
 * Return value:
 * =============
 *      0 - successful return
 *    > 0 - number of bytes allocated when run out of space
 *
 */
#if ( MACH==CRAY_PVP )
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
         ftcs2 = _cptofcd("N", strlen("N")),
         ftcs3 = _cptofcd("U", strlen("U"));
#endif
    
#ifdef USE_VENDOR_BLAS    
    int         incx = 1, incy = 1;
    complex      alpha, beta;
#endif
    GlobalLU_t *Glu = pxgstrf_shared->Glu;   /* modified */
    
    /* krep = representative of current k-th supernode
     * fsupc = first supernodal column
     * nsupc = no of columns in supernode
     * nsupr = no of rows in supernode (used as leading dimension)
     * luptr = location of supernodal LU-block in storage
     * kfnz = first nonz in the k-th supernodal segment
     * no_zeros = no of leading zeros in a supernodal U-segment
     */
    complex	  ukj, ukj1, ukj2;
    register int lptr, kfnz, isub, irow, i, no_zeros;
    register int luptr, luptr1, luptr2;
    int          fsupc, nsupc, nsupr, segsze;
    int          nrow;	  /* No of rows in the matrix of matrix-vector */
    int          jsupno, k, ksub, krep, krep_ind, ksupno;
    int          ufirst, nextlu;
    int          fst_col; /* First column within small LU update */
    int          d_fsupc; /* Distance between the first column of the current
			     panel and the first column of the current snode.*/
    int          *xsup, *supno;
    int          *lsub, *xlsub, *xlsub_end;
    complex       *lusup;
    int          *xlusup, *xlusup_end;
    complex       *tempv1;
    int          mem_error;
    register float flopcnt;

    complex      zero = {0.0, 0.0};
    complex      one = {1.0, 0.0};
    complex      none = {-1.0, 0.0};
    complex      comp_temp, comp_temp1;

    xsup       = Glu->xsup;
    supno      = Glu->supno;
    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;
    xlsub_end  = Glu->xlsub_end;
    lusup      = Glu->lusup;
    xlusup     = Glu->xlusup;
    xlusup_end = Glu->xlusup_end;
    jsupno     = supno[jcol];

    /* 
     * For each nonz supernode segment of U[*,j] in topological order 
     */
    k = nseg - 1;
    for (ksub = 0; ksub < nseg; ksub++) {

	krep = segrep[k];
	k--;
	ksupno = supno[krep];
#if ( DEBUGlvel>=2 )
if (jcol==BADCOL)
printf("(%d) pcgstrf_column_bmod[1]: %d, nseg %d, krep %d, jsupno %d, ksupno %d\n",
       pnum, jcol, nseg, krep, jsupno, ksupno);
#endif    
	if ( jsupno != ksupno ) { /* Outside the rectangular supernode */

	    fsupc = xsup[ksupno];
	    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

  	    /* Distance from the current supernode to the current panel; 
	       d_fsupc=0 if fsupc >= fpanelc. */
  	    d_fsupc = fst_col - fsupc; 

	    luptr = xlusup[fst_col] + d_fsupc;
	    lptr = xlsub[fsupc] + d_fsupc;
	    kfnz = repfnz[krep];
	    kfnz = SUPERLU_MAX ( kfnz, fpanelc );
	    segsze = krep - kfnz + 1;
	    nsupc = krep - fst_col + 1;
	    nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */
	    nrow = nsupr - d_fsupc - nsupc;
	    krep_ind = lptr + nsupc - 1;

	flopcnt = segsze * (segsze - 1) + 2 * nrow * segsze;//sj
		Gstat->procstat[pnum].fcops += flopcnt;

#if ( DEBUGlevel>=2 )
if (jcol==BADCOL)	    
printf("(%d) pcgstrf_column_bmod[2]: %d, krep %d, kfnz %d, segsze %d, d_fsupc %d,\
fsupc %d, nsupr %d, nsupc %d\n",
       pnum, jcol, krep, kfnz, segsze, d_fsupc, fsupc, nsupr, nsupc);

#endif



            /*
             * Case 1: Update U-segment of size 1 -- col-col update
             */
            if ( segsze == 1 ) {
                ukj = dense[lsub[krep_ind]];
                luptr += nsupr*(nsupc-1) + nsupc;

                for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
                    irow = lsub[i];
                    cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                    c_sub(&dense[irow], &dense[irow], &comp_temp);
                    luptr++;
                }

            } else if ( segsze <= 3 ) {
                ukj = dense[lsub[krep_ind]];
                luptr += nsupr*(nsupc-1) + nsupc-1;
                ukj1 = dense[lsub[krep_ind - 1]];
                luptr1 = luptr - nsupr;

                if ( segsze == 2 ) { /* Case 2: 2cols-col update */
                    cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                    c_sub(&ukj, &ukj, &comp_temp);
                    dense[lsub[krep_ind]] = ukj;
                    for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
                        irow = lsub[i];
                        luptr++;
                        luptr1++;
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&dense[irow], &dense[irow], &comp_temp);
                    }
                } else { /* Case 3: 3cols-col update */
                    ukj2 = dense[lsub[krep_ind - 2]];
                    luptr2 = luptr1 - nsupr;
                    cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
                    c_sub(&ukj1, &ukj1, &comp_temp);

                    cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                    cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                    c_add(&comp_temp, &comp_temp, &comp_temp1);
                    c_sub(&ukj, &ukj, &comp_temp);

                    dense[lsub[krep_ind]] = ukj;
                    dense[lsub[krep_ind-1]] = ukj1;
                    for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
                        irow = lsub[i];
                        luptr++;
                        luptr1++;
                        luptr2++;
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&dense[irow], &dense[irow], &comp_temp);
                    }
                }


	    } else {
	  	/*
		 * Case: sup-col update
		 * Perform a triangular solve and block update,
		 * then scatter the result of sup-col update to dense
		 */
		no_zeros = kfnz - fst_col;

	        /* Copy U[*,j] segment from dense[*] to tempv[*] */
	        isub = lptr + no_zeros;
	        for (i = 0; i < segsze; i++) {
	  	    irow = lsub[isub];
		    tempv[i] = dense[irow];
		    ++isub; 
	        }

	        /* Dense triangular solve -- start effective triangle */
		luptr += nsupr * no_zeros + no_zeros; 
#ifdef USE_VENDOR_BLAS
#if ( MACH==CRAY_PVP )
		CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#else
		ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#endif
		
 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
		alpha = one;
		beta = zero;
#if ( MACH==CRAY_PVP )
		CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#else
		cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#endif
#else
		clsolve ( nsupr, segsze, &lusup[luptr], tempv );

 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
		cmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
#endif
                /* Scatter tempv[] into SPA dense[*] */
                isub = lptr + no_zeros;
                for (i = 0; i < segsze; i++) {
                    irow = lsub[isub];
                    dense[irow] = tempv[i]; /* Scatter */
                    tempv[i] = zero;
                    isub++;
                }

		/* Scatter tempv1[] into SPA dense[*] */
		for (i = 0; i < nrow; i++) {
		    irow = lsub[isub];
                    c_sub(&dense[irow], &dense[irow], &tempv1[i]);
		    tempv1[i] = zero;
		    ++isub;
		}
	    } /* else segsze >= 4 */
	    
	} /* if jsupno ... */

    } /* for each segment... */

    
    /* ------------------------------------------
       Process the supernodal portion of L\U[*,j]
       ------------------------------------------ */
    
    fsupc = SUPER_FSUPC (jsupno);
    nsupr = xlsub_end[fsupc] - xlsub[fsupc];
    if ( (mem_error = Glu_alloc(pnum, jcol, nsupr, LUSUP, &nextlu, 
			       pxgstrf_shared)) )
	return mem_error;
    xlusup[jcol] = nextlu;
    lusup = Glu->lusup;
    
    /* Gather the nonzeros from SPA dense[*,j] into L\U[*,j] */
    for (isub = xlsub[fsupc]; isub < xlsub_end[fsupc]; ++isub) {
  	irow = lsub[isub];
	lusup[nextlu] = dense[irow];
	dense[irow] = zero;
#ifdef DEBUG
if (jcol == -1)
    printf("(%d) pcgstrf_column_bmod[lusup] jcol %d, irow %d, lusup %.10e\n",
	   pnum, jcol, irow, lusup[nextlu]);
#endif	
	++nextlu;
    }
    xlusup_end[jcol] = nextlu; /* close L\U[*,jcol] */

#if ( DEBUGlevel>=2 )
if (jcol == -1) {
    nrow = xlusup_end[jcol] - xlusup[jcol];
    print_double_vec("before sup-col update", nrow, &lsub[xlsub[fsupc]],
		     &lusup[xlusup[jcol]]);
}
#endif    
    
    /*
     * For more updates within the panel (also within the current supernode), 
     * should start from the first column of the panel, or the first column 
     * of the supernode, whichever is bigger. There are 2 cases:
     *    (1) fsupc < fpanelc,  then fst_col := fpanelc
     *    (2) fsupc >= fpanelc, then fst_col := fsupc
     */
    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

    if ( fst_col < jcol ) {

  	/* distance between the current supernode and the current panel;
	   d_fsupc=0 if fsupc >= fpanelc. */
  	d_fsupc = fst_col - fsupc;

	lptr = xlsub[fsupc] + d_fsupc;
	luptr = xlusup[fst_col] + d_fsupc;
	nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */
	nsupc = jcol - fst_col;	/* Excluding jcol */
	nrow = nsupr - d_fsupc - nsupc;

	/* points to the beginning of jcol in supernode L\U[*,jsupno] */
	ufirst = xlusup[jcol] + d_fsupc;	

#if ( DEBUGlevel>=2 )
if (jcol==BADCOL)
printf("(%d) pcgstrf_column_bmod[3] jcol %d, fsupc %d, nsupr %d, nsupc %d, nrow %d\n",
       pnum, jcol, fsupc, nsupr, nsupc, nrow);
#endif    

	flopcnt = nsupc * (nsupc - 1) + 2 * nrow * nsupc; //sj
	Gstat->procstat[pnum].fcops += flopcnt;

/*	ops[TRSV] += nsupc * (nsupc - 1);
	ops[GEMV] += 2 * nrow * nsupc;    */
	
#ifdef USE_VENDOR_BLAS
	alpha = none; beta = one; /* y := beta*y + alpha*A*x */
#if ( MACH==CRAY_PVP )
	CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
	CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
	ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
	cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
	clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );

	cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
		 &lusup[ufirst], tempv );
	
        /* Copy updates from tempv[*] into lusup[*] */
	isub = ufirst + nsupc;
	for (i = 0; i < nrow; i++) {
            c_sub(&lusup[isub], &lusup[isub], &tempv[i]);
            tempv[i] = zero;
	    ++isub;
	}
#endif
    } /* if fst_col < jcol ... */ 

    return 0;
}
void
pcgstrf_mark_busy_descends(int pnum, int jcol, int *etree, 
			   //pcgstrf_shared_t *pxgstrf_shared,
			   pxgstrf_shared_t *pxgstrf_shared, //sj
			   int *bcol, int *lbusy)
{
/*
 * -- SuperLU MT routine (version 1.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * August 15, 1997
 *
 * Purpose
 * =======
 *
 *   Mark busy panels in local "lbusy" array, used for linear pipelining.
 *
 *   When jcol begins, its busy descendant panels (if any) are bcol and
 *   all the e-tree ancestors of bcol between bcol and jcol. This routine
 *   marks those columns in the array lbusy, which is local to this
 *   processor, to preserve a snapshot regardless of what the other
 *   processors are doing meanwhile.
 *
 * Arguments
 * =========
 *
 * jcol    (input) int
 *         Current panel, with leading column jcol.
 *
 * etree   (input) int*
 *         Elimination tree parent pointers.
 *
 * bcol    (input/output) int*
 *         Farthest busy descendant of jcol.
 *         On entry, it is the first column of the farthest busy panel.
 *         On exit, it may be adjusted to the first column of the
 *                  farthest busy supernode.
 *
 * lbusy   (input/output) int*
 *         Initially all -1, lbusy(r) = jcol means that r was busy
 *         at the beginning of computing jcol.
 *
 */
	pxgstrf_shared_t *pcgstrf_shared = pxgstrf_shared; //sj

    GlobalLU_t *Glu = pcgstrf_shared->Glu;
    register int w,  kcol, fsupc, bcol_reg;
    int *xsup;

    bcol_reg = *bcol;
    if ( bcol_reg < jcol ) {
	
	/* -----------------------------------------------------------
	   Instead of waiting for the completion of "bcol", we can
	   pessimistically assume supno[bcol] == supno[bcol-1],
	   hence always mark as busy the supernode containing "bcol-1".
	   ----------------------------------------------------------- */
	if (pcgstrf_shared->pan_status[bcol_reg].type == RELAXED_SNODE) {
#if 0	    
	    if ( pcgstrf_shared->pan_status[bcol_reg].size < 0 )
	  	fsupc = bcol_reg + pcgstrf_shared->pan_status[bcol_reg].size;
	    else fsupc = bcol_reg;
#endif
	    fsupc = bcol_reg;
	    w = pcgstrf_shared->pan_status[fsupc].size;
	    bcol_reg += w;
	    for (kcol = fsupc; kcol < bcol_reg; ++kcol)
		lbusy[kcol] = jcol;
	} else {
	    /* Find leading column "fsupc" in the supernode that
	       contains column "bcol-1" */
#if 0
	    if ( pcgstrf_shared->spin_locks[bcol_reg] ) /* WORSE PERFORMANCE!! */
		await( &pcgstrf_shared->spin_locks[bcol_reg] );
#endif
	    xsup = Glu->xsup;
	    fsupc = SUPER_FSUPC ( Glu->supno[bcol_reg-1] );
	    for (kcol = fsupc; kcol < bcol_reg; ++kcol)	lbusy[kcol] = jcol;
	}
	
#if ( DEBUGlevel>=1 )
if (jcol >= LOCOL && jcol <= HICOL)
    printf("(%d) mark_busy_descends[1] jcol %d, bcol_reg %d, fsupc %d\n",
           pnum, jcol, bcol_reg, fsupc);
#endif
	
	/* Mark as busy all columns on the path between bcol_reg and jcol */
	for (kcol = bcol_reg; kcol < jcol; kcol = etree[kcol]) {
	    lbusy[kcol] = jcol;
	}

	/* INVARIANT: *bcol must be the first column of the farthest
	   busy supernode */
	*bcol = fsupc;
			 
    } /* if bcol_reg < jcol */
}
int
pzgstrf_column_dfs(
		   const int  pnum,    /* process number */
		   const int  m,       /* number of rows in the matrix */
		   const int  jcol,    /* current column in the panel */
		   const int  fstcol,  /* first column in the panel */
		   int *perm_r,   /* row pivotings that are done so far */
		   int *ispruned, /* in */
		   int *col_lsub, /* the RHS vector to start the dfs */
		   int lsub_end,  /* size of col_lsub[] */
		   int *super_bnd,/* supernode partition by upper bound */
		   int *nseg,     /* modified - with new segments appended */
		   int *segrep,   /* modified - with new segments appended */
		   int *repfnz,   /* modified */
		   int *xprune,   /* modified */
		   int *marker2,  /* modified */
		   int *parent,   /* working array */
		   int *xplore,   /* working array */
		   pxgstrf_shared_t *pxgstrf_shared /* modified */
		   )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Purpose
 * =======
 *   pzgstrf_column_dfs() performs a symbolic factorization on column jcol, 
 *   and detects whether column jcol belongs in the same supernode as jcol-1.
 *
 * Local parameters
 * ================
 *   A supernode representative is the last column of a supernode.
 *   The nonzeros in U[*,j] are segments that end at supernodal
 *   representatives. The routine returns a list of such supernodal 
 *   representatives in topological order of the dfs that generates them.
 *   The location of the first nonzero in each such supernodal segment
 *   (supernodal entry location) is also returned.
 *
 *   nseg: no of segments in current U[*,j]
 *   samesuper: samesuper=NO if column j does not belong in the same
 *	        supernode as j-1. Otherwise, samesuper=YES.
 *
 *   marker2: A-row --> A-row/col (0/1)
 *   repfnz: SuperA-col --> PA-row
 *   parent: SuperA-col --> SuperA-col
 *   xplore: SuperA-col --> index to L-structure
 *
 * Return value
 * ============
 *     0  success;
 *   > 0  number of bytes allocated when run out of space.
 *
 */
    GlobalLU_t *Glu = pxgstrf_shared->Glu; /* modified */
    Gstat_t *Gstat = pxgstrf_shared->Gstat; /* modified */
    register int jcolm1, jcolm1size, nextl, ifrom;
    register int k, krep, krow, kperm, samesuper, nsuper;
    register int no_lsub;
    int	    fsupc;		/* first column in a supernode */
    int     myfnz;		/* first nonz column in a U-segment */
    int	    chperm, chmark, chrep, kchild;
    int     xdfs, maxdfs, kpar;
    int     ito;	        /* Used to compress row subscripts */
    int     mem_error;
    int     *xsup, *xsup_end, *supno, *lsub, *xlsub, *xlsub_end;
    static  int  first = 1, maxsuper;

    if ( first ) {
	maxsuper = sp_ienv(3);
	first = 0;
    }

    /* Initialize pointers */
    xsup      = Glu->xsup;
    xsup_end  = Glu->xsup_end;
    supno     = Glu->supno;
    lsub      = Glu->lsub;
    xlsub     = Glu->xlsub;
    xlsub_end = Glu->xlsub_end;
    jcolm1    = jcol - 1;
    nextl     = lsub_end;
    no_lsub   = 0;
    samesuper = YES;

    /* Test whether the row structure of column jcol is contained
       in that of column jcol-1. */
    for (k = 0; k < lsub_end; ++k) {
	krow = col_lsub[k];
	if ( perm_r[krow] == EMPTY ) { /* krow is in L */
	    ++no_lsub;
	    if (marker2[krow] != jcolm1) 
	        samesuper = NO; /* row subset test */
	    marker2[krow] = jcol;
	}
    }

#if ( DEBUGlevel>=2 )
  if (jcol == BADCOL)
    printf("(%d) pzgstrf_column_dfs[1] %d, fstcol %d, lsub_end %d, no_lsub %d, samesuper? %d\n",
	   pnum, jcol, fstcol, lsub_end, no_lsub, samesuper);
#endif
    
    /*
     * For each nonzero in A[fstcol:n,jcol] perform DFS ...
     */
    for (k = 0; k < lsub_end; ++k) {
	krow = col_lsub[k];
	
	/* if krow was visited before, go to the next nonzero */
	if ( marker2[krow] == jcol ) continue;
	marker2[krow] = jcol;
	kperm = perm_r[krow];
#if ( DEBUGlevel>=3 )
  if (jcol == BADCOL)
    printf("(%d) pzgstrf_column_dfs[inner]: perm_r[krow=%d] %d\n", pnum, krow, kperm);
#endif
	
	/* Ignore the nonzeros in U corresponding to the busy columns
	   during the panel DFS. */
	/*if ( lbusy[kperm] != fstcol ) {  xiaoye? */
	if ( kperm >= fstcol ) {
	    /*
	     * krow is in U: if its supernode representative krep
	     * has been explored, update repfnz[*].
	     */
	    krep = SUPER_REP(supno[kperm]);
	    myfnz = repfnz[krep];
	    
#if ( DEBUGlevel>=3 )
  if (jcol == BADCOL)
    printf("(%d) pzgstrf_column_dfs[inner-U]: krep %d, myfnz %d, kperm %d\n",
	   pnum, krep, myfnz, kperm);
#endif
	    if ( myfnz != EMPTY ) {	/* Visited before */
		if ( myfnz > kperm ) repfnz[krep] = kperm;
		/* continue; */
	    } else {
		/* Otherwise, perform dfs starting at krep */
		parent[krep] = EMPTY;
		repfnz[krep] = kperm;
		if ( ispruned[krep] ) {
		    if ( SINGLETON( supno[krep] ) )
			xdfs = xlsub_end[krep];
		    else xdfs = xlsub[krep];
		    maxdfs = xprune[krep];
#ifdef PROFILE
		    Gstat->procstat[pnum].pruned++;
#endif		    
		} else {
		    fsupc = SUPER_FSUPC( supno[krep] );
		    xdfs = xlsub[fsupc] + krep-fsupc+1;
		    maxdfs = xlsub_end[fsupc];
#ifdef PROFILE
		    Gstat->procstat[pnum].unpruned++;
#endif		    
		}
		
		do {
		    /* 
		     * For each unmarked kchild of krep ...
		     */
		    while ( xdfs < maxdfs ) {
			
			kchild = lsub[xdfs];
			xdfs++;
			chmark = marker2[kchild];
			
			if ( chmark != jcol ) { /* Not reached yet */
			    marker2[kchild] = jcol;
			    chperm = perm_r[kchild];
			    
			    if ( chperm == EMPTY ) {
				/* kchild is in L: place it in L[*,k]. */
				++no_lsub;
				col_lsub[nextl++] = kchild;
				if (chmark != jcolm1) samesuper = NO;
			    } else {
				/* kchild is in U: chrep = its supernode
				 * representative. If its rep has 
				 * been explored, update its repfnz[*].
				 */
				chrep = SUPER_REP( supno[chperm] );
				myfnz = repfnz[chrep];
				if ( myfnz != EMPTY ) { /* Visited before */
				    if ( myfnz > chperm )
					repfnz[chrep] = chperm;
				} else {
				    /* Continue dfs at super-rep of kchild */
				    xplore[krep] = xdfs;	
				    xplore[m + krep] = maxdfs;	
				    parent[chrep] = krep;
				    krep = chrep; /* Go deeper down G(L^t) */
				    repfnz[krep] = chperm;
				    if ( ispruned[krep] ) {
					if ( SINGLETON( supno[krep] ) )
					    xdfs = xlsub_end[krep];
					else xdfs = xlsub[krep];
					maxdfs = xprune[krep];
#ifdef PROFILE
					Gstat->procstat[pnum].pruned++;
#endif		    
				    } else {
					fsupc = SUPER_FSUPC( supno[krep] );
					xdfs = xlsub[fsupc] + krep-fsupc+1;
					maxdfs = xlsub_end[fsupc];
#ifdef PROFILE
					Gstat->procstat[pnum].unpruned++;
#endif		    
				    }
				}
			    } /* else */
			} /* if */
		    } /* while */
		    
		    /* krow has no more unexplored nbrs:
		     *    place supernode-rep krep in postorder DFS,
		     *    backtrack dfs to its parent.
		     */
		    segrep[*nseg] = krep;
		    ++(*nseg);
#if ( DEBUGlevel>=3 )
  if (jcol == BADCOL)
    printf("(%d) pzgstrf_column_dfs[inner-dfs] new nseg %d, repfnz[krep=%d] %d\n",
	   pnum, *nseg, krep, repfnz[krep]);
#endif
		    kpar = parent[krep]; /* Pop from stack, mimic recursion */
		    if ( kpar == EMPTY ) break; /* dfs done */
		    krep = kpar;
		    xdfs = xplore[krep];
		    maxdfs = xplore[m + krep];
		} while ( kpar != EMPTY ); /* Do ... until empty stack */
		
	    } /* else myfnz ... */
	} /* if kperm >= fstcol ... */
    } /* for each nonzero ... */
	
#if ( DEBUGlevel>=3 )
  if (jcol == BADCOL)
    printf("(%d) pzgstrf_column_dfs[2]: nextl %d, samesuper? %d\n",
	   pnum, nextl, samesuper);
#endif

    /* assert(no_lsub == nextl - no_usub);*/

    /* ---------------------------------------------------------
       Check to see if j belongs in the same supernode as j-1.
       --------------------------------------------------------- */
    
    /* Does it matter if jcol == 0? - xiaoye */
    if ( samesuper == YES ) {
	nsuper = supno[jcolm1];
	jcolm1size = xlsub_end[jcolm1] - xlsub[jcolm1];
#if ( DEBUGlevel>=3 )
  if (jcol == BADCOL)
    printf("(%d) pzgstrf_column_dfs[YES] jcol-1 %d, jcolm1size %d, supno[%d] %d\n",
	   pnum, jcolm1, jcolm1size, jcolm1, nsuper);
#endif	
	if ( no_lsub != jcolm1size-1 )
	    samesuper = NO;        /* Enforce T2 supernode */
	else {
	    /* Make sure the number of columns in a supernode does not
	       exceed threshold. */
	    fsupc = xsup[nsuper];
	    if ( jcol - fsupc >= maxsuper )
		samesuper = NO;
	    else {
		/* start of a supernode in H (coarser partition) */
		if ( super_bnd[jcol] != 0 ) samesuper = NO;
	    }
	}
    }
    
    /* If jcol starts a new supernode, allocate storage for 
     * the subscript set of both first and last column of
     * a previous supernode. (first for num values, last for pruning)
     */
    if ( samesuper == NO ) { /* starts a new supernode */
	nsuper = NewNsuper(pnum, pxgstrf_shared, &Glu->nsuper);
	xsup[nsuper] = jcol;
	
	/* Copy column jcol; also reserve space to store pruned graph */
	if ((mem_error = Glu_alloc(pnum, jcol, 2*no_lsub, LSUB, &ito, 
				  pxgstrf_shared)))
	    return mem_error;
	xlsub[jcol] = ito;
	lsub = Glu->lsub;
	for (ifrom = 0; ifrom < nextl; ++ifrom) {
	    krow = col_lsub[ifrom];
	    if ( perm_r[krow] == EMPTY ) /* Filter U-subscript */
		lsub[ito++] = krow;
	}
	k = ito;
	xlsub_end[jcol] = k;
	
	/* Make a copy in case it is a singleton supernode */
	for (ifrom = xlsub[jcol]; ifrom < ito; ++ifrom)
	    lsub[k++] = lsub[ifrom];
	
    } else { /* Supernode of size > 1: overwrite column jcol-1 */
	k = xlsub_end[fsupc];
	xlsub[jcol] = k;
	xprune[fsupc] = k;
	for (ifrom = 0; ifrom < nextl; ++ifrom) {
	    krow = col_lsub[ifrom];
	    if ( perm_r[krow] == EMPTY ) /* Filter U-subscript */
		lsub[k++] = krow;
	}
	xlsub_end[jcol] = k;
    }

#if ( DEBUGlevel>=3 )
  if (jcol == BADCOL) {
    printf("(%d) pzgstrf_column_dfs[3]: %d in prev s-node %d? %d\n",
	   pnum, jcol, fsupc, samesuper);
    PrintInt10("lsub", xlsub_end[jcol]-xlsub[jcol], &lsub[xlsub[jcol]]);
  }
#endif
    
    /* Tidy up the pointers before exit */
    xprune[jcol] = k;     /* upper bound for pruning */
    supno[jcol] = nsuper;
    xsup_end[nsuper] = jcol + 1;
    
    return 0;
}
Пример #4
0
void
pxgstrf_super_bnd_dfs(
		      const int  pnum, /* process number */
		      const int  m,    /* number of rows in the matrix */
		      const int  n,    /* number of columns in the matrix */
		      const int  jcol, /* first column of the H-supernode */
		      const int  w,    /* size of the H-supernode */
		      SuperMatrix *A,  /* original matrix */
		      int        *perm_r,   /* in */
		      int        *iperm_r,  /* in; inverse of perm_r */
		      int        *xprune,   /* in */
		      int        *ispruned, /* in */
		      int        *marker,   /* modified */
		      int        *parent,   /* working array */
		      int        *xplore,   /* working array */
		      pxgstrf_shared_t *pxgstrf_shared /* modified */	      
		      )
{
/*
 * -- SuperLU MT routine (version 1.0) --
 * Univ. of California Berkeley, Xerox Palo Alto Research Center,
 * and Lawrence Berkeley National Lab.
 * August 15, 1997
 *
 * Purpose
 * =======
 *
 * Performs a symbolic structure prediction on a supernode in the Householder
 * matrix H, with jcol being the leading column.
 *
 */
    GlobalLU_t *Glu = pxgstrf_shared->Glu; /* modified */	      
    register int krep, chperm, chrep, kchild;
    register int invp_rep; /* "krep" numbered in the original A */
    register int krow, kperm, xdfs, maxdfs, kpar;
    register int fsupc, k, jj, found;
    register int nrow;  /* union of the nonzero rows in a supernode */
    NCPformat    *Astore;
    int          *asub, *xa_begin, *xa_end;
    int          *xsup, *xsup_end, *supno, *lsub, *xlsub, *xlsub_end;

    /* Initialize pointers */
    xsup       = Glu->xsup;
    xsup_end   = Glu->xsup_end;
    supno      = Glu->supno;
    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;
    xlsub_end  = Glu->xlsub_end;
    Astore   = A->Store;
    asub     = Astore->rowind;
    xa_begin = Astore->colbeg;
    xa_end   = Astore->colend;
    nrow = 0;
    found = n + jcol;

    /* For each column in the H-supernode */
    for (jj = jcol; jj < jcol + w; ++jj) {

      /* For each nonz in A[*,jj] do dfs */
      for (k = xa_begin[jj]; k < xa_end[jj]; ++k) {
	  krow = asub[k];
	  
	  /* krow was visited before, go to the next nonzero. */
	  if ( marker[krow] == found ) continue;
	  
  	  /* For each unmarked nbr krow of jj ...   */
	  kperm = perm_r[krow];
	  
	  if ( kperm == EMPTY ) { /* krow is in L */
	      marker[krow] = found;
	      ++nrow;
	  } else {
	      /* krow is in U: if its supernode-rep krep has been explored,
		               skip the search.       */
	      krep = SUPER_REP( supno[kperm] );
	      invp_rep = iperm_r[krep];
		  
	      /* Perform dfs starting at krep */
              if ( marker[invp_rep] != found ) {
		  marker[invp_rep] = found;
		  parent[krep] = EMPTY;
		  if ( ispruned[krep] ) {
		      if ( SINGLETON( supno[krep] ) )
			  xdfs = xlsub_end[krep];
		      else xdfs = xlsub[krep];
		      maxdfs = xprune[krep];
		  } else {
		      fsupc = SUPER_FSUPC( supno[krep] );
		      xdfs = xlsub[fsupc] + krep-fsupc+1;
		      maxdfs = xlsub_end[fsupc];
		  }

		  do {
		      /* For each unmarked kchild of krep ... */
		      while ( xdfs < maxdfs ) {
			  kchild = lsub[xdfs];
			  xdfs++;
			  if (marker[kchild] != found) { /* Not reached yet */
			      chperm = perm_r[kchild];
			      
			      if ( chperm == EMPTY ) { /* kchild is in L */
				  marker[kchild] = found;
				  ++nrow;
			      } else {
				  /* kchild is in U: 
				   *   chrep = its supernode-rep. If its rep
				   *   has been explored, skip the search.
				   */
				  chrep = SUPER_REP( supno[chperm] );
				  invp_rep = iperm_r[chrep];
				  
				  /* Continue dfs at snode-rep of kchild */
				  if ( marker[invp_rep] != found ) {
				      marker[invp_rep] = found;
				      xplore[krep] = xdfs;
				      xplore[m + krep] = maxdfs;
				      parent[chrep] = krep;
				      krep = chrep;/* Go deeper down G(L^t) */
				      xdfs = xlsub[krep];     
				      maxdfs = xprune[krep];
				      if ( ispruned[krep] ) {
					  if ( SINGLETON( supno[krep] ) )
					      xdfs = xlsub_end[krep];
					  else xdfs = xlsub[krep];
					  maxdfs = xprune[krep];
				      } else {
					  fsupc = SUPER_FSUPC(supno[krep]);
					  xdfs = xlsub[fsupc] + krep-fsupc+1;
					  maxdfs = xlsub_end[fsupc];
				      }
				  } /* if */
			      } /* else */
			  } /* if... */
		      } /* while xdfs < maxdfs */
		      
		      /* krow has no more unexplored nbrs:
		       *    Place snode-rep krep in postorder dfs, if this 
		       *    segment is seen for the first time. Note that
		       *    the "repfnz[krep]" may change later.
		       *    Backtrack dfs to its parent.
		       */
		      kpar = parent[krep]; /* Pop stack, mimic recursion */
		      if ( kpar == EMPTY ) break; /* dfs done */
		      krep = kpar;
		      xdfs = xplore[krep];
		      maxdfs = xplore[m+krep];
		  } while ( kpar != EMPTY ); /* do-while - until empty stack */
	      } /* if */
	  } /* else */
      } /* for each nonz in A[*,jj] */
    } /* for jj ... */

    DynamicSetMap(pnum, jcol, nrow * w, pxgstrf_shared);
    
/*    for (i = 1; i < w; ++i) Glu->map_in_sup[jcol + i] = -i;*/
    
#if ( DEBUGlevel>=1 )
    printf("(%d) pxgstrf_super_bnd_dfs(): jcol= %d, w= %d, nrow= %d\n",
	   pnum, jcol, w, nrow);
#endif
}
Пример #5
0
void
pdgstrf_panel_dfs(
		  const int  pnum,  /* process number */
		  const int  m,     /* number of rows in the matrix */
		  const int  w,     /* current panel width */
		  const int  jcol,  /* leading column of the current panel */
		  SuperMatrix *A,   /* original matrix */
		  int *perm_r, /* row pivotings that are done so far */
		  int *xprune, /* in */
		  int *ispruned,   /* in */
		  int *lbusy,      /* in; size n */
		  int *nseg,	   /* out */
		  int *panel_lsub, /* out */
		  int *w_lsub_end, /* out; values irrelevant on entry */
		  int *segrep,     /* out */
		  int *repfnz,     /* out */
		  int *marker,     /* modified */
		  int *spa_marker, /* modified; size n-by-w */
		  int        *parent,     /* working array */
		  int *xplore,     /* working array */
		  double *dense,      /* out; size n-by-w */
		  GlobalLU_t *Glu         /* modified */
		  )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Purpose
 * =======
 *
 *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
 *   It skips all those busy descendants that are worked on by other
 *   processors along the e-tree path.
 *
 * Notes
 * =====
 *
 * (1) panel_lsub[0:w*n-1]: temporary for the nonzero row indices below 
 *     the panel diagonal, which will be used later in the inner LU
 *     factorization. For the busy columns, some of the nonzeros in U
 *     may be mistakenly placed in this list, because "perm_r" is
 *     still "empty". Later, during dcolumn_dfs in the inner factorization,
 *     we must filter those nonzeros belonging in U.
 *
 * (2) A supernode representative is the last column of a supernode.
 *     The nonzeros in U[*,j] are segments that end at supernodal
 *     representatives.
 *
 * (3) The routine returns one list of the supernodal representatives
 *     in topological order of the DFS that generates them. This list is
 *     a superset of the topological order of each individual column within
 *     the panel. The location of the first nonzero in each supernodal
 *     segment (supernodal entry location) is also returned. Each column
 *     has a separate list for this purpose.
 *
 * (4) Two marker arrays are used to facilitate dfs:
 *     marker[i] == jj, if i was visited during dfs of current column jj;
 *     marker1[i] == jcol, if i was visited by earlier columns in this panel;
 *
 * (5) The dfs stack is the combination of xplore[2*m] and parent[m]:
 *     xplore[k]     - pointer to k's adjancency list where search begins
 *     xplore[m + k] - pointer to k's adjancency list where search ends
 *
 * (6) Array mappings
 *     marker: A-row --> A-row/col (0/1)
 *     repfnz: SuperA-col --> PA-row
 *     parent: SuperA-col --> SuperA-col
 *     xplore: SuperA-col --> index to L-structure
 *
 */
    NCPformat *Astore;
    double    *a;
    int       *asub;
    int       *xa_begin, *xa_end;
    register int krep, chperm, chmark, chrep, kchild, myfnz;
    register int k, krow, kmark, kperm, fsupc;
    register int xdfs, maxdfs, kpar, jj, nextp;
    register int nextl_col;/* next open position in panel_lsub[*,jj] */
    int       *marker1;	   /* marker1[jj] == jcol if vertex jj was visited 
			      by a previous column within this panel.   */
    int       *repfnz_col; /* start of each column in the panel */
    double    *dense_col;  /* start of each column in the panel */
    int       *xsup, *xsup_end, *supno, *lsub, *xlsub, *xlsub_end;

    int       *col_marker; /* marker array of each column in the panel */

    /* Initialize pointers */
    xsup       = Glu->xsup;
    xsup_end   = Glu->xsup_end;
    supno      = Glu->supno;
    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;
    xlsub_end  = Glu->xlsub_end;
    Astore     = A->Store;
    a          = Astore->nzval;
    asub       = Astore->rowind;
    xa_begin   = Astore->colbeg;
    xa_end     = Astore->colend;
    marker1    = marker + m;
    repfnz_col = repfnz;
    dense_col  = dense;
    nextp      = 0;
    *nseg      = 0;

#if ( DEBUGlevel>=2 )
if (jcol == BADPAN)    
    printf("(%d) pdgstrf_panel_dfs[begin] jcol %d, w %d\n", pnum, jcol, w);
#endif
    
    /*
     * For each column in the panel ...
     */
    for (jj = jcol; jj < jcol + w; ++jj, nextp += m) {
	nextl_col = nextp;
	col_marker = &spa_marker[nextp];

	/*
	 * For each nonz in A[*,jj] perform dfs ...
	 */
	for (k = xa_begin[jj]; k < xa_end[jj]; ++k) {
	    krow = asub[k];
	    dense_col[krow] = a[k];
	    kmark = col_marker[krow];
	    
	    /* if krow was visited before, go to the next nonzero */
	    if ( kmark == jj ) continue;

	    /*
	     * For each unmarked nbr krow of jj ...
	     */
	    col_marker[krow] = jj;
	    kperm = perm_r[krow];
	    
	    if ( kperm == EMPTY ) {
		/* krow is in L: place it in structure of L[*,jj].
		 * NOTE: some entries in U may get here, because "perm_r"
		 *       is not yet available from a preceeding busy column.
		 */
		panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
	    } else {
		/* 
		 * krow is in U (0 <= kperm < jcol): if its supernode
		 * representative krep has been explored, update repfnz[*].
		 */
		if ( lbusy[kperm] == jcol ) { /* kperm is busy */
#if ( DEBUGlevel>=3 )
  if (jj == BADCOL)		    
    printf("(%d) pdgstrf_panel_dfs(%d) skip busy krow %d, kperm %d\n",
	   pnum, jj, krow, kperm);
#endif		    
		    continue;
		}

		/* Here, krep cannot possibly be "busy" */
		krep = SUPER_REP( supno[kperm] );
		myfnz = repfnz_col[krep];

#ifdef CHK_DFS
if (jj == BADCOL)		
    printf("(%d) pdgstrf_panel_dfs[1] %d, krep %d, fsupc %d, Pr[krow %d] %d, myfnz %d\n",
	   pnum, jj, krep, SUPER_FSUPC(supno[krep]), krow, kperm, myfnz);
#endif
		if ( myfnz != EMPTY ) {	/* Representative visited before */
		    if ( myfnz > kperm ) repfnz_col[krep] = kperm;
		    /* continue; */
		} else {
		    /* Otherwise, performs dfs starting from krep */
		    parent[krep] = EMPTY;
		    repfnz_col[krep] = kperm;
		    if ( ispruned[krep] ) {
			if ( SINGLETON( supno[krep] ) )
			    xdfs = xlsub_end[krep];
			else xdfs = xlsub[krep];
			maxdfs = xprune[krep];
#ifdef PROFILE
			/*Gstat->procstat[pnum].pruned++;*/
#endif		    
		    } else {
			fsupc = SUPER_FSUPC( supno[krep] );
			xdfs = xlsub[fsupc] + krep-fsupc+1;
			maxdfs = xlsub_end[fsupc];
#ifdef PROFILE
			/*Gstat->procstat[pnum].unpruned++;*/
#endif		    
		    }
#ifdef CHK_DFS
if (jj == BADCOL)		    
{
    register int i;
    printf("(%d) pdgstrf_panel_dfs[2] %d, ispruned[%d] %d, xdfs %d, maxdfs %d\n",
	   pnum, jj, krep, ispruned[krep], xdfs, maxdfs);
    /*for (i = xdfs; i < maxdfs; i++) printf("(%d) lsub-%d", pnum, lsub[i]);*/
    printf("\n");
}
#endif
		    do {
			while ( xdfs < maxdfs ) {
			    /* for each unmarked kchild of krep ... */
			    kchild = lsub[xdfs];
			    xdfs++;
			    chmark = col_marker[kchild];
			    
			    if ( chmark != jj ) { /* Not reached yet */
				col_marker[kchild] = jj;
				chperm = perm_r[kchild];
				
				if ( chperm == EMPTY ) {
				    /* kchild is in L: place it in L[*,j]. */
				    panel_lsub[nextl_col++] = kchild;
				} else {
				    /* kchild is in U (0 <= chperm < jcol): 
				     * chrep = its supernode-rep. If its rep
				     * has been explored, update its repfnz[*].
				     */

				    if ( lbusy[chperm] == jcol ) {
#ifdef DEBUG
if (jj == BADCOL)					
    printf("(%d) pdgstrf_panel_dfs(%d) skip busy kchild %d, chperm %d\n",
	   pnum, jj, kchild, chperm);
#endif		    
	                                     continue;
                                    }
				    
				    chrep = SUPER_REP( supno[chperm] );
				    myfnz = repfnz_col[chrep];
#ifdef DEBUG
if (jj == BADCOL)				    
    printf("(%d) pdgstrf_panel_dfs[3] %d, krep %d, Pr[kchild %d] %d, chrep %d, fsupc %d, myfnz %d\n",
	   pnum, jj, krep, kchild, chperm, chrep,
	   SUPER_FSUPC(supno[chrep]), myfnz);
#endif
				    if ( myfnz != EMPTY ) {/* Visited before */
					if ( myfnz > chperm )
					    repfnz_col[chrep] = chperm;
				    } else {
					/* Cont. dfs at snode-rep of kchild */
					xplore[krep] = xdfs;
					xplore[m + krep] = maxdfs;
					parent[chrep] = krep;
					krep = chrep; /* Go deeper down G(L) */
					repfnz_col[krep] = chperm;
					if ( ispruned[krep] ) {
					    if ( SINGLETON( supno[krep] ) )
						xdfs = xlsub_end[krep];
					    else xdfs = xlsub[krep];
					    maxdfs = xprune[krep];
#ifdef PROFILE
					    /*procstat[pnum].pruned++;*/
#endif		    
					} else {
					    fsupc = SUPER_FSUPC(supno[krep]);
					    xdfs = xlsub[fsupc] + krep-fsupc+1;
					    maxdfs = xlsub_end[fsupc];
#ifdef PROFILE
					    /*procstat[pnum].unpruned++;*/
#endif		    
					}
#ifdef CHK_DFS
if (jj == BADCOL)
    printf("(%d) pdgstrf_panel_dfs[4] %d, ispruned[%d] %d, xdfs %d, maxdfs %d\n",
	   pnum, jj, krep, ispruned[krep], xdfs, maxdfs);
#endif
					
				    } /* else */
				} /* else */
			      
			    } /* if... */
			    
			} /* while xdfs < maxdfs */
			
			/* krow has no more unexplored nbrs:
			 *    Place snode-rep krep in postorder DFS, if this 
			 *    segment is seen for the first time. (Note that
			 *    "repfnz[krep]" may change later.)
			 *    Backtrack dfs to its parent.
			 */
			if ( marker1[krep] != jcol ) {
			    segrep[*nseg] = krep;
			    ++(*nseg);
			    marker1[krep] = jcol;
#ifdef CHK_DFS
if (jj == BADCOL)			    
    printf("(%d) pdgstrf_panel_dfs(%d) repfnz[%d] %d added to top.list by jj %d\n",
	   pnum, jj, krep, repfnz_col[krep], jj);
#endif			    
			}
			
			kpar = parent[krep]; /* Pop stack, mimic recursion */
			if ( kpar == EMPTY ) break; /* dfs done */
			krep = kpar;
			xdfs = xplore[krep];
			maxdfs = xplore[m + krep];
			
#ifdef CHK_DFS
if (jj == BADCOL)			
{
    register int i;
    printf("(%d) pdgstrf_panel_dfs[5] pop stack: %d, krep %d, xdfs %d, maxdfs %d\n",
	   pnum, jj, krep, xdfs, maxdfs);
    /* for (i = xdfs; i < maxdfs; i++) printf("(%d) lsub-%d", pnum, lsub[i]);*/
    printf("\n");
}
#endif

		    } while ( kpar != EMPTY ); /* until empty stack */
		    
		} /* else: myfnz == EMPTY */
		
	    } /* else: kperm != EMPTY */
	    
	} /* for each nonzero in A[*,jj] */

#if ( DEBUGlevel>=3 )
if (jj == BADCOL) {
#define REPCOL 0    
    krep = REPCOL;
    printf("(%d) pdgstrf_panel_dfs(end) w_lsub_end[jj=%d] %d, repfnz_col[%d] %d\n",
	   pnum, jj, nextl_col - nextp, krep, repfnz_col[krep]);
    PrintInt10("lsub", nextl_col - nextp, &panel_lsub[nextp]);
}
#endif
	
	w_lsub_end[jj-jcol] = nextl_col - nextp;
	repfnz_col += m;
        dense_col += m;
	
    } /* for jj ... */

}