/* * Count the total number of nonzeros in factors L and U, and in the * symmetrically reduced L. */ void countnz(const int n, int *xprune, int *nnzL, int *nnzU, GlobalLU_t *Glu) { register int nsuper, fsupc, i, j, nnzL0, jlen, irep; register int nnzsup = 0; register int *xsup, *xsup_end, *xlsub, *xlsub_end, *supno; xsup = Glu->xsup; xsup_end = Glu->xsup_end; xlsub = Glu->xlsub; xlsub_end = Glu->xlsub_end; supno = Glu->supno; *nnzU = Glu->nextu; nnzL0 = 0; *nnzL = 0; nsuper = supno[n]; if ( n <= 0 ) return; /* * For each supernode ... */ for (i = 0; i <= nsuper; i++) { fsupc = xsup[i]; jlen = xlsub_end[fsupc] - xlsub[fsupc]; nnzsup += jlen * (xsup_end[i] - fsupc); for (j = fsupc; j < xsup_end[i]; j++) { *nnzL += jlen; *nnzU += j - fsupc + 1; jlen--; } irep = SUPER_REP(i); if ( SINGLETON(supno[irep]) ) nnzL0 += xprune[irep] - xlsub_end[irep]; else nnzL0 += xprune[irep] - xlsub[irep]; } #if ( PRNTlevel==1 ) printf(".. # supernodes = %d\n", nsuper+1); printf(".. # edges in symm-reduced L = %d\n", nnzL0); if ( Glu->dynamic_snode_bound ) printf(".. # NZ in LUSUP %d, dynamic bound %d, utilization %.2f\n", nnzsup, Glu->nextlu, (float)nnzsup/Glu->nextlu); else printf(".. # NNZ in LUSUP %d, static bound %d, utilization %.2f\n", nnzsup, Glu->nzlumax, (float)nnzsup/Glu->nzlumax); #endif }
void psgstrf_panel_bmod( const int pnum, /* process number */ const int m, /* number of rows in the matrix */ const int w, /* current panel width */ const int jcol, /* leading column of the current panel */ const int bcol, /* first column of the farthest busy snode*/ int *inv_perm_r,/* in; inverse of the row pivoting */ int *etree, /* in */ int *nseg, /* modified */ int *segrep, /* modified */ int *repfnz, /* modified, size n-by-w */ int *panel_lsub,/* modified */ int *w_lsub_end,/* modified */ int *spa_marker,/* modified; size n-by-w */ float *dense, /* modified, size n-by-w */ float *tempv, /* working array - zeros on input/output */ pxgstrf_shared_t *pxgstrf_shared /* modified */ ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose * ======= * * Performs numeric block updates (sup-panel) in topological order. * It features combined 1D and 2D blocking of the source updating s-node. * It consists of two steps: * (1) accumulates updates from "done" s-nodes. * (2) accumulates updates from "busy" s-nodes. * * Before entering this routine, the nonzeros of the original A in * this panel were already copied into the SPA dense[n,w]. * * Updated/Output arguments * ======================== * L[*,j:j+w-1] and U[*,j:j+w-1] are returned collectively in the * m-by-w vector dense[*,w]. The locations of nonzeros in L[*,j:j+w-1] * are given by lsub[*] and U[*,j:j+w-1] by (nseg,segrep,repfnz). * */ GlobalLU_t *Glu = pxgstrf_shared->Glu; /* modified */ Gstat_t *Gstat = pxgstrf_shared->Gstat; /* modified */ register int j, k, ksub; register int fsupc, nsupc, nsupr, nrow; register int kcol, krep, ksupno, dadsupno; register int jj; /* index through each column in the panel */ int *xsup, *xsup_end, *supno; int *lsub, *xlsub, *xlsub_end; int *repfnz_col; /* repfnz[] for a column in the panel */ float *dense_col; /* dense[] for a column in the panel */ int *col_marker; /* each column of the spa_marker[*,w] */ int *col_lsub; /* each column of the panel_lsub[*,w] */ static int first = 1, rowblk, colblk; #ifdef PROFILE double t1, t2; /* temporary time */ #endif #ifdef PREDICT_OPT register float pmod, max_child_eft = 0, sum_pmod = 0, min_desc_eft = 0; register float pmod_eft; register int kid, ndesc = 0; #endif #if ( DEBUGlevel>=2 ) int dbg_addr = 0*m; #endif if ( first ) { rowblk = sp_ienv(4); colblk = sp_ienv(5); first = 0; } xsup = Glu->xsup; xsup_end = Glu->xsup_end; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; xlsub_end = Glu->xlsub_end; #if ( DEBUGlevel>=2 ) /*if (jcol >= LOCOL && jcol <= HICOL) check_panel_dfs_list(pnum, "begin", jcol, *nseg, segrep);*/ if (jcol == BADPAN) printf("(%d) Enter psgstrf_panel_bmod() jcol %d,BADCOL %d,dense_col[%d] %.10f\n", pnum, jcol, BADCOL, BADROW, dense[dbg_addr+BADROW]); #endif /* -------------------------------------------------------------------- For each non-busy supernode segment of U[*,jcol] in topological order, perform sup-panel update. -------------------------------------------------------------------- */ k = *nseg - 1; for (ksub = 0; ksub < *nseg; ++ksub) { /* * krep = representative of current k-th supernode * fsupc = first supernodal column * nsupc = no of columns in a supernode * nsupr = no of rows in a supernode */ krep = segrep[k--]; fsupc = xsup[supno[krep]]; nsupc = krep - fsupc + 1; nsupr = xlsub_end[fsupc] - xlsub[fsupc]; nrow = nsupr - nsupc; #ifdef PREDICT_OPT pmod = Gstat->procstat[pnum].fcops; #endif if ( nsupc >= colblk && nrow >= rowblk ) { /* 2-D block update */ #ifdef GEMV2 psgstrf_bmod2D_mv2(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow, repfnz, panel_lsub, w_lsub_end, spa_marker, dense, tempv, Glu, Gstat); #else psgstrf_bmod2D(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow, repfnz, panel_lsub, w_lsub_end, spa_marker, dense, tempv, Glu, Gstat); #endif } else { /* 1-D block update */ #ifdef GEMV2 psgstrf_bmod1D_mv2(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow, repfnz, panel_lsub, w_lsub_end, spa_marker, dense, tempv, Glu, Gstat); #else psgstrf_bmod1D(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow, repfnz, panel_lsub, w_lsub_end, spa_marker, dense, tempv, Glu, Gstat); #endif } #ifdef PREDICT_OPT pmod = Gstat->procstat[pnum].fcops - pmod; kid = (Glu->pan_status[krep].size > 0) ? krep : (krep + Glu->pan_status[krep].size); desc_eft[ndesc].eft = cp_panel[kid].est + cp_panel[kid].pdiv; desc_eft[ndesc++].pmod = pmod; #endif #if ( DEBUGlevel>=2 ) if (jcol == BADPAN) printf("(%d) non-busy update: krep %d, repfnz %d, dense_col[%d] %.10e\n", pnum, krep, repfnz[dbg_addr+krep], BADROW, dense[dbg_addr+BADROW]); #endif } /* for each updating supernode ... */ #if ( DEBUGlevel>=2 ) if (jcol == BADPAN) printf("(%d) After non-busy update: dense_col[%d] %.10e\n", pnum, BADROW, dense[dbg_addr+BADROW]); #endif /* --------------------------------------------------------------------- * Now wait for the "busy" s-nodes to become "done" -- this amounts to * climbing up the e-tree along the path starting from "bcol". * Several points are worth noting: * * (1) There are two possible relations between supernodes and panels * along the path of the e-tree: * o |s-node| < |panel| * want to climb up the e-tree one column at a time in order * to achieve more concurrency * o |s-node| > |panel| * want to climb up the e-tree one panel at a time; this * processor is stalled anyway while waiting for the panel. * * (2) Need to accommodate new fills, append them in panel_lsub[*,w]. * o use an n-by-w marker array, as part of the SPA (not scalable!) * * (3) Symbolically, need to find out repfnz[S, w], for each (busy) * supernode S. * o use dense[inv_perm_r[kcol]], filter all zeros * o detect the first nonzero in each segment * (at this moment, the boundary of the busy supernode/segment * S has already been identified) * * --------------------------------------------------------------------- */ kcol = bcol; while ( kcol < jcol ) { /* Pointers to each column of the w-wide arrays. */ repfnz_col = repfnz; dense_col = dense; col_marker = spa_marker; col_lsub = panel_lsub; /* Wait for the supernode, and collect wait-time statistics. */ if ( pxgstrf_shared->spin_locks[kcol] ) { #ifdef PROFILE TIC(t1); #endif await( &pxgstrf_shared->spin_locks[kcol] ); #ifdef PROFILE TOC(t2, t1); Gstat->panstat[jcol].pipewaits++; Gstat->panstat[jcol].spintime += t2; Gstat->procstat[pnum].spintime += t2; #ifdef DOPRINT PRINT_SPIN_TIME(1); #endif #endif } /* Find leading column "fsupc" in the supernode that contains column "kcol" */ ksupno = supno[kcol]; fsupc = kcol; #if ( DEBUGlevel>=2 ) /*if (jcol >= LOCOL && jcol <= HICOL) */ if ( jcol==BADCOL ) printf("(%d) psgstrf_panel_bmod[1] kcol %d, ksupno %d, fsupc %d\n", pnum, kcol, ksupno, fsupc); #endif /* Wait for the whole supernode to become "done" -- climb up e-tree one column at a time */ do { krep = SUPER_REP( ksupno ); kcol = etree[kcol]; if ( kcol >= jcol ) break; if ( pxgstrf_shared->spin_locks[kcol] ) { #ifdef PROFILE TIC(t1); #endif await ( &pxgstrf_shared->spin_locks[kcol] ); #ifdef PROFILE TOC(t2, t1); Gstat->panstat[jcol].pipewaits++; Gstat->panstat[jcol].spintime += t2; Gstat->procstat[pnum].spintime += t2; #ifdef DOPRINT PRINT_SPIN_TIME(2); #endif #endif } dadsupno = supno[kcol]; #if ( DEBUGlevel>=2 ) /*if (jcol >= LOCOL && jcol <= HICOL)*/ if ( jcol==BADCOL ) printf("(%d) psgstrf_panel_bmod[2] krep %d, dad=kcol %d, dadsupno %d\n", pnum, krep, kcol, dadsupno); #endif } while ( dadsupno == ksupno ); /* Append the new segment into segrep[*]. After column_bmod(), copy_to_ucol() will use them. */ segrep[*nseg] = krep; ++(*nseg); /* Determine repfnz[krep, w] for each column in the panel */ for (jj = jcol; jj < jcol + w; ++jj, dense_col += m, repfnz_col += m, col_marker += m, col_lsub += m) { /* * Note: relaxed supernode may not form a path on the e-tree, * but its column numbers are contiguous. */ #ifdef SCATTER_FOUND for (kcol = fsupc; kcol <= krep; ++kcol) { if ( col_marker[inv_perm_r[kcol]] == jj ) { repfnz_col[krep] = kcol; /* Append new fills in panel_lsub[*,jj]. */ j = w_lsub_end[jj - jcol]; /*#pragma ivdep*/ for (k = xlsub[krep]; k < xlsub_end[krep]; ++k) { ksub = lsub[k]; if ( col_marker[ksub] != jj ) { col_marker[ksub] = jj; col_lsub[j++] = ksub; } } w_lsub_end[jj - jcol] = j; break; /* found the leading nonzero in the segment */ } } #else for (kcol = fsupc; kcol <= krep; ++kcol) { if ( dense_col[inv_perm_r[kcol]] != 0.0 ) { repfnz_col[krep] = kcol; break; /* Found the leading nonzero in the U-segment */ } } /* In this case, we always treat the L-subscripts of the busy s-node [kcol : krep] as the new fills, even if the corresponding U-segment may be all zero. */ /* Append new fills in panel_lsub[*,jj]. */ j = w_lsub_end[jj - jcol]; /*#pragma ivdep*/ for (k = xlsub[krep]; k < xlsub_end[krep]; ++k) { ksub = lsub[k]; if ( col_marker[ksub] != jj ) { col_marker[ksub] = jj; col_lsub[j++] = ksub; } } w_lsub_end[jj - jcol] = j; #endif #if ( DEBUGlevel>=2 ) if (jj == BADCOL) { printf("(%d) psgstrf_panel_bmod[fills]: jj %d, repfnz_col[%d] %d, inv_pr[%d] %d\n", pnum, jj, krep, repfnz_col[krep], fsupc, inv_perm_r[fsupc]); printf("(%d) psgstrf_panel_bmod[fills] xlsub %d, xlsub_end %d, #lsub[%d] %d\n", pnum,xlsub[krep],xlsub_end[krep],krep, xlsub_end[krep]-xlsub[krep]); } #endif } /* for jj ... */ #ifdef PREDICT_OPT pmod = Gstat->procstat[pnum].fcops; #endif /* Perform sup-panel updates - use combined 1D + 2D updates. */ nsupc = krep - fsupc + 1; nsupr = xlsub_end[fsupc] - xlsub[fsupc]; nrow = nsupr - nsupc; if ( nsupc >= colblk && nrow >= rowblk ) { /* 2-D block update */ #ifdef GEMV2 psgstrf_bmod2D_mv2(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow, repfnz, panel_lsub, w_lsub_end, spa_marker, dense, tempv, Glu, Gstat); #else psgstrf_bmod2D(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow, repfnz, panel_lsub, w_lsub_end, spa_marker, dense, tempv, Glu, Gstat); #endif } else { /* 1-D block update */ #ifdef GEMV2 psgstrf_bmod1D_mv2(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow, repfnz, panel_lsub, w_lsub_end, spa_marker, dense, tempv, Glu, Gstat); #else psgstrf_bmod1D(pnum, m, w, jcol, fsupc, krep, nsupc, nsupr, nrow, repfnz, panel_lsub, w_lsub_end, spa_marker, dense, tempv, Glu, Gstat); #endif } #ifdef PREDICT_OPT pmod = Gstat->procstat[pnum].fcops - pmod; kid = (pxgstrf_shared->pan_status[krep].size > 0) ? krep : (krep + pxgstrf_shared->pan_status[krep].size); desc_eft[ndesc].eft = cp_panel[kid].est + cp_panel[kid].pdiv; desc_eft[ndesc++].pmod = pmod; #endif #if ( DEBUGlevel>=2 ) if (jcol == BADPAN) printf("(%d) After busy update: dense_col[%d] %.10f\n", pnum, BADROW, dense[dbg_addr+BADROW]); #endif /* Go to the parent of "krep" */ kcol = etree[krep]; } /* while kcol < jcol ... */ #if ( DEBUGlevel>=2 ) /*if (jcol >= LOCOL && jcol <= HICOL)*/ if ( jcol==BADCOL ) check_panel_dfs_list(pnum, "after-busy", jcol, *nseg, segrep); #endif #ifdef PREDICT_OPT qsort(desc_eft, ndesc, sizeof(desc_eft_t), (int(*)())numcomp); pmod_eft = 0; for (j = 0; j < ndesc; ++j) { pmod_eft = SUPERLU_MAX( pmod_eft, desc_eft[j].eft ) + desc_eft[j].pmod; } if ( ndesc == 0 ) { /* No modifications from descendants */ pmod_eft = 0; for (j = cp_firstkid[jcol]; j != EMPTY; j = cp_nextkid[j]) { kid = (pxgstrf_shared->pan_status[j].size > 0) ? j : (j + pxgstrf_shared->pan_status[j].size); pmod_eft = SUPERLU_MAX( pmod_eft, cp_panel[kid].est + cp_panel[kid].pdiv ); } } cp_panel[jcol].est = pmod_eft; #endif }
int pzgstrf_column_dfs( const int pnum, /* process number */ const int m, /* number of rows in the matrix */ const int jcol, /* current column in the panel */ const int fstcol, /* first column in the panel */ int *perm_r, /* row pivotings that are done so far */ int *ispruned, /* in */ int *col_lsub, /* the RHS vector to start the dfs */ int lsub_end, /* size of col_lsub[] */ int *super_bnd,/* supernode partition by upper bound */ int *nseg, /* modified - with new segments appended */ int *segrep, /* modified - with new segments appended */ int *repfnz, /* modified */ int *xprune, /* modified */ int *marker2, /* modified */ int *parent, /* working array */ int *xplore, /* working array */ pxgstrf_shared_t *pxgstrf_shared /* modified */ ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose * ======= * pzgstrf_column_dfs() performs a symbolic factorization on column jcol, * and detects whether column jcol belongs in the same supernode as jcol-1. * * Local parameters * ================ * A supernode representative is the last column of a supernode. * The nonzeros in U[*,j] are segments that end at supernodal * representatives. The routine returns a list of such supernodal * representatives in topological order of the dfs that generates them. * The location of the first nonzero in each such supernodal segment * (supernodal entry location) is also returned. * * nseg: no of segments in current U[*,j] * samesuper: samesuper=NO if column j does not belong in the same * supernode as j-1. Otherwise, samesuper=YES. * * marker2: A-row --> A-row/col (0/1) * repfnz: SuperA-col --> PA-row * parent: SuperA-col --> SuperA-col * xplore: SuperA-col --> index to L-structure * * Return value * ============ * 0 success; * > 0 number of bytes allocated when run out of space. * */ GlobalLU_t *Glu = pxgstrf_shared->Glu; /* modified */ Gstat_t *Gstat = pxgstrf_shared->Gstat; /* modified */ register int jcolm1, jcolm1size, nextl, ifrom; register int k, krep, krow, kperm, samesuper, nsuper; register int no_lsub; int fsupc; /* first column in a supernode */ int myfnz; /* first nonz column in a U-segment */ int chperm, chmark, chrep, kchild; int xdfs, maxdfs, kpar; int ito; /* Used to compress row subscripts */ int mem_error; int *xsup, *xsup_end, *supno, *lsub, *xlsub, *xlsub_end; static int first = 1, maxsuper; if ( first ) { maxsuper = sp_ienv(3); first = 0; } /* Initialize pointers */ xsup = Glu->xsup; xsup_end = Glu->xsup_end; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; xlsub_end = Glu->xlsub_end; jcolm1 = jcol - 1; nextl = lsub_end; no_lsub = 0; samesuper = YES; /* Test whether the row structure of column jcol is contained in that of column jcol-1. */ for (k = 0; k < lsub_end; ++k) { krow = col_lsub[k]; if ( perm_r[krow] == EMPTY ) { /* krow is in L */ ++no_lsub; if (marker2[krow] != jcolm1) samesuper = NO; /* row subset test */ marker2[krow] = jcol; } } #if ( DEBUGlevel>=2 ) if (jcol == BADCOL) printf("(%d) pzgstrf_column_dfs[1] %d, fstcol %d, lsub_end %d, no_lsub %d, samesuper? %d\n", pnum, jcol, fstcol, lsub_end, no_lsub, samesuper); #endif /* * For each nonzero in A[fstcol:n,jcol] perform DFS ... */ for (k = 0; k < lsub_end; ++k) { krow = col_lsub[k]; /* if krow was visited before, go to the next nonzero */ if ( marker2[krow] == jcol ) continue; marker2[krow] = jcol; kperm = perm_r[krow]; #if ( DEBUGlevel>=3 ) if (jcol == BADCOL) printf("(%d) pzgstrf_column_dfs[inner]: perm_r[krow=%d] %d\n", pnum, krow, kperm); #endif /* Ignore the nonzeros in U corresponding to the busy columns during the panel DFS. */ /*if ( lbusy[kperm] != fstcol ) { xiaoye? */ if ( kperm >= fstcol ) { /* * krow is in U: if its supernode representative krep * has been explored, update repfnz[*]. */ krep = SUPER_REP(supno[kperm]); myfnz = repfnz[krep]; #if ( DEBUGlevel>=3 ) if (jcol == BADCOL) printf("(%d) pzgstrf_column_dfs[inner-U]: krep %d, myfnz %d, kperm %d\n", pnum, krep, myfnz, kperm); #endif if ( myfnz != EMPTY ) { /* Visited before */ if ( myfnz > kperm ) repfnz[krep] = kperm; /* continue; */ } else { /* Otherwise, perform dfs starting at krep */ parent[krep] = EMPTY; repfnz[krep] = kperm; if ( ispruned[krep] ) { if ( SINGLETON( supno[krep] ) ) xdfs = xlsub_end[krep]; else xdfs = xlsub[krep]; maxdfs = xprune[krep]; #ifdef PROFILE Gstat->procstat[pnum].pruned++; #endif } else { fsupc = SUPER_FSUPC( supno[krep] ); xdfs = xlsub[fsupc] + krep-fsupc+1; maxdfs = xlsub_end[fsupc]; #ifdef PROFILE Gstat->procstat[pnum].unpruned++; #endif } do { /* * For each unmarked kchild of krep ... */ while ( xdfs < maxdfs ) { kchild = lsub[xdfs]; xdfs++; chmark = marker2[kchild]; if ( chmark != jcol ) { /* Not reached yet */ marker2[kchild] = jcol; chperm = perm_r[kchild]; if ( chperm == EMPTY ) { /* kchild is in L: place it in L[*,k]. */ ++no_lsub; col_lsub[nextl++] = kchild; if (chmark != jcolm1) samesuper = NO; } else { /* kchild is in U: chrep = its supernode * representative. If its rep has * been explored, update its repfnz[*]. */ chrep = SUPER_REP( supno[chperm] ); myfnz = repfnz[chrep]; if ( myfnz != EMPTY ) { /* Visited before */ if ( myfnz > chperm ) repfnz[chrep] = chperm; } else { /* Continue dfs at super-rep of kchild */ xplore[krep] = xdfs; xplore[m + krep] = maxdfs; parent[chrep] = krep; krep = chrep; /* Go deeper down G(L^t) */ repfnz[krep] = chperm; if ( ispruned[krep] ) { if ( SINGLETON( supno[krep] ) ) xdfs = xlsub_end[krep]; else xdfs = xlsub[krep]; maxdfs = xprune[krep]; #ifdef PROFILE Gstat->procstat[pnum].pruned++; #endif } else { fsupc = SUPER_FSUPC( supno[krep] ); xdfs = xlsub[fsupc] + krep-fsupc+1; maxdfs = xlsub_end[fsupc]; #ifdef PROFILE Gstat->procstat[pnum].unpruned++; #endif } } } /* else */ } /* if */ } /* while */ /* krow has no more unexplored nbrs: * place supernode-rep krep in postorder DFS, * backtrack dfs to its parent. */ segrep[*nseg] = krep; ++(*nseg); #if ( DEBUGlevel>=3 ) if (jcol == BADCOL) printf("(%d) pzgstrf_column_dfs[inner-dfs] new nseg %d, repfnz[krep=%d] %d\n", pnum, *nseg, krep, repfnz[krep]); #endif kpar = parent[krep]; /* Pop from stack, mimic recursion */ if ( kpar == EMPTY ) break; /* dfs done */ krep = kpar; xdfs = xplore[krep]; maxdfs = xplore[m + krep]; } while ( kpar != EMPTY ); /* Do ... until empty stack */ } /* else myfnz ... */ } /* if kperm >= fstcol ... */ } /* for each nonzero ... */ #if ( DEBUGlevel>=3 ) if (jcol == BADCOL) printf("(%d) pzgstrf_column_dfs[2]: nextl %d, samesuper? %d\n", pnum, nextl, samesuper); #endif /* assert(no_lsub == nextl - no_usub);*/ /* --------------------------------------------------------- Check to see if j belongs in the same supernode as j-1. --------------------------------------------------------- */ /* Does it matter if jcol == 0? - xiaoye */ if ( samesuper == YES ) { nsuper = supno[jcolm1]; jcolm1size = xlsub_end[jcolm1] - xlsub[jcolm1]; #if ( DEBUGlevel>=3 ) if (jcol == BADCOL) printf("(%d) pzgstrf_column_dfs[YES] jcol-1 %d, jcolm1size %d, supno[%d] %d\n", pnum, jcolm1, jcolm1size, jcolm1, nsuper); #endif if ( no_lsub != jcolm1size-1 ) samesuper = NO; /* Enforce T2 supernode */ else { /* Make sure the number of columns in a supernode does not exceed threshold. */ fsupc = xsup[nsuper]; if ( jcol - fsupc >= maxsuper ) samesuper = NO; else { /* start of a supernode in H (coarser partition) */ if ( super_bnd[jcol] != 0 ) samesuper = NO; } } } /* If jcol starts a new supernode, allocate storage for * the subscript set of both first and last column of * a previous supernode. (first for num values, last for pruning) */ if ( samesuper == NO ) { /* starts a new supernode */ nsuper = NewNsuper(pnum, pxgstrf_shared, &Glu->nsuper); xsup[nsuper] = jcol; /* Copy column jcol; also reserve space to store pruned graph */ if ((mem_error = Glu_alloc(pnum, jcol, 2*no_lsub, LSUB, &ito, pxgstrf_shared))) return mem_error; xlsub[jcol] = ito; lsub = Glu->lsub; for (ifrom = 0; ifrom < nextl; ++ifrom) { krow = col_lsub[ifrom]; if ( perm_r[krow] == EMPTY ) /* Filter U-subscript */ lsub[ito++] = krow; } k = ito; xlsub_end[jcol] = k; /* Make a copy in case it is a singleton supernode */ for (ifrom = xlsub[jcol]; ifrom < ito; ++ifrom) lsub[k++] = lsub[ifrom]; } else { /* Supernode of size > 1: overwrite column jcol-1 */ k = xlsub_end[fsupc]; xlsub[jcol] = k; xprune[fsupc] = k; for (ifrom = 0; ifrom < nextl; ++ifrom) { krow = col_lsub[ifrom]; if ( perm_r[krow] == EMPTY ) /* Filter U-subscript */ lsub[k++] = krow; } xlsub_end[jcol] = k; } #if ( DEBUGlevel>=3 ) if (jcol == BADCOL) { printf("(%d) pzgstrf_column_dfs[3]: %d in prev s-node %d? %d\n", pnum, jcol, fsupc, samesuper); PrintInt10("lsub", xlsub_end[jcol]-xlsub[jcol], &lsub[xlsub[jcol]]); } #endif /* Tidy up the pointers before exit */ xprune[jcol] = k; /* upper bound for pruning */ supno[jcol] = nsuper; xsup_end[nsuper] = jcol + 1; return 0; }
void pxgstrf_super_bnd_dfs( const int pnum, /* process number */ const int m, /* number of rows in the matrix */ const int n, /* number of columns in the matrix */ const int jcol, /* first column of the H-supernode */ const int w, /* size of the H-supernode */ SuperMatrix *A, /* original matrix */ int *perm_r, /* in */ int *iperm_r, /* in; inverse of perm_r */ int *xprune, /* in */ int *ispruned, /* in */ int *marker, /* modified */ int *parent, /* working array */ int *xplore, /* working array */ pxgstrf_shared_t *pxgstrf_shared /* modified */ ) { /* * -- SuperLU MT routine (version 1.0) -- * Univ. of California Berkeley, Xerox Palo Alto Research Center, * and Lawrence Berkeley National Lab. * August 15, 1997 * * Purpose * ======= * * Performs a symbolic structure prediction on a supernode in the Householder * matrix H, with jcol being the leading column. * */ GlobalLU_t *Glu = pxgstrf_shared->Glu; /* modified */ register int krep, chperm, chrep, kchild; register int invp_rep; /* "krep" numbered in the original A */ register int krow, kperm, xdfs, maxdfs, kpar; register int fsupc, k, jj, found; register int nrow; /* union of the nonzero rows in a supernode */ NCPformat *Astore; int *asub, *xa_begin, *xa_end; int *xsup, *xsup_end, *supno, *lsub, *xlsub, *xlsub_end; /* Initialize pointers */ xsup = Glu->xsup; xsup_end = Glu->xsup_end; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; xlsub_end = Glu->xlsub_end; Astore = A->Store; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; nrow = 0; found = n + jcol; /* For each column in the H-supernode */ for (jj = jcol; jj < jcol + w; ++jj) { /* For each nonz in A[*,jj] do dfs */ for (k = xa_begin[jj]; k < xa_end[jj]; ++k) { krow = asub[k]; /* krow was visited before, go to the next nonzero. */ if ( marker[krow] == found ) continue; /* For each unmarked nbr krow of jj ... */ kperm = perm_r[krow]; if ( kperm == EMPTY ) { /* krow is in L */ marker[krow] = found; ++nrow; } else { /* krow is in U: if its supernode-rep krep has been explored, skip the search. */ krep = SUPER_REP( supno[kperm] ); invp_rep = iperm_r[krep]; /* Perform dfs starting at krep */ if ( marker[invp_rep] != found ) { marker[invp_rep] = found; parent[krep] = EMPTY; if ( ispruned[krep] ) { if ( SINGLETON( supno[krep] ) ) xdfs = xlsub_end[krep]; else xdfs = xlsub[krep]; maxdfs = xprune[krep]; } else { fsupc = SUPER_FSUPC( supno[krep] ); xdfs = xlsub[fsupc] + krep-fsupc+1; maxdfs = xlsub_end[fsupc]; } do { /* For each unmarked kchild of krep ... */ while ( xdfs < maxdfs ) { kchild = lsub[xdfs]; xdfs++; if (marker[kchild] != found) { /* Not reached yet */ chperm = perm_r[kchild]; if ( chperm == EMPTY ) { /* kchild is in L */ marker[kchild] = found; ++nrow; } else { /* kchild is in U: * chrep = its supernode-rep. If its rep * has been explored, skip the search. */ chrep = SUPER_REP( supno[chperm] ); invp_rep = iperm_r[chrep]; /* Continue dfs at snode-rep of kchild */ if ( marker[invp_rep] != found ) { marker[invp_rep] = found; xplore[krep] = xdfs; xplore[m + krep] = maxdfs; parent[chrep] = krep; krep = chrep;/* Go deeper down G(L^t) */ xdfs = xlsub[krep]; maxdfs = xprune[krep]; if ( ispruned[krep] ) { if ( SINGLETON( supno[krep] ) ) xdfs = xlsub_end[krep]; else xdfs = xlsub[krep]; maxdfs = xprune[krep]; } else { fsupc = SUPER_FSUPC(supno[krep]); xdfs = xlsub[fsupc] + krep-fsupc+1; maxdfs = xlsub_end[fsupc]; } } /* if */ } /* else */ } /* if... */ } /* while xdfs < maxdfs */ /* krow has no more unexplored nbrs: * Place snode-rep krep in postorder dfs, if this * segment is seen for the first time. Note that * the "repfnz[krep]" may change later. * Backtrack dfs to its parent. */ kpar = parent[krep]; /* Pop stack, mimic recursion */ if ( kpar == EMPTY ) break; /* dfs done */ krep = kpar; xdfs = xplore[krep]; maxdfs = xplore[m+krep]; } while ( kpar != EMPTY ); /* do-while - until empty stack */ } /* if */ } /* else */ } /* for each nonz in A[*,jj] */ } /* for jj ... */ DynamicSetMap(pnum, jcol, nrow * w, pxgstrf_shared); /* for (i = 1; i < w; ++i) Glu->map_in_sup[jcol + i] = -i;*/ #if ( DEBUGlevel>=1 ) printf("(%d) pxgstrf_super_bnd_dfs(): jcol= %d, w= %d, nrow= %d\n", pnum, jcol, w, nrow); #endif }
void pdgstrf_panel_dfs( const int pnum, /* process number */ const int m, /* number of rows in the matrix */ const int w, /* current panel width */ const int jcol, /* leading column of the current panel */ SuperMatrix *A, /* original matrix */ int *perm_r, /* row pivotings that are done so far */ int *xprune, /* in */ int *ispruned, /* in */ int *lbusy, /* in; size n */ int *nseg, /* out */ int *panel_lsub, /* out */ int *w_lsub_end, /* out; values irrelevant on entry */ int *segrep, /* out */ int *repfnz, /* out */ int *marker, /* modified */ int *spa_marker, /* modified; size n-by-w */ int *parent, /* working array */ int *xplore, /* working array */ double *dense, /* out; size n-by-w */ GlobalLU_t *Glu /* modified */ ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose * ======= * * Performs a symbolic factorization on a panel of columns [jcol, jcol+w). * It skips all those busy descendants that are worked on by other * processors along the e-tree path. * * Notes * ===== * * (1) panel_lsub[0:w*n-1]: temporary for the nonzero row indices below * the panel diagonal, which will be used later in the inner LU * factorization. For the busy columns, some of the nonzeros in U * may be mistakenly placed in this list, because "perm_r" is * still "empty". Later, during dcolumn_dfs in the inner factorization, * we must filter those nonzeros belonging in U. * * (2) A supernode representative is the last column of a supernode. * The nonzeros in U[*,j] are segments that end at supernodal * representatives. * * (3) The routine returns one list of the supernodal representatives * in topological order of the DFS that generates them. This list is * a superset of the topological order of each individual column within * the panel. The location of the first nonzero in each supernodal * segment (supernodal entry location) is also returned. Each column * has a separate list for this purpose. * * (4) Two marker arrays are used to facilitate dfs: * marker[i] == jj, if i was visited during dfs of current column jj; * marker1[i] == jcol, if i was visited by earlier columns in this panel; * * (5) The dfs stack is the combination of xplore[2*m] and parent[m]: * xplore[k] - pointer to k's adjancency list where search begins * xplore[m + k] - pointer to k's adjancency list where search ends * * (6) Array mappings * marker: A-row --> A-row/col (0/1) * repfnz: SuperA-col --> PA-row * parent: SuperA-col --> SuperA-col * xplore: SuperA-col --> index to L-structure * */ NCPformat *Astore; double *a; int *asub; int *xa_begin, *xa_end; register int krep, chperm, chmark, chrep, kchild, myfnz; register int k, krow, kmark, kperm, fsupc; register int xdfs, maxdfs, kpar, jj, nextp; register int nextl_col;/* next open position in panel_lsub[*,jj] */ int *marker1; /* marker1[jj] == jcol if vertex jj was visited by a previous column within this panel. */ int *repfnz_col; /* start of each column in the panel */ double *dense_col; /* start of each column in the panel */ int *xsup, *xsup_end, *supno, *lsub, *xlsub, *xlsub_end; int *col_marker; /* marker array of each column in the panel */ /* Initialize pointers */ xsup = Glu->xsup; xsup_end = Glu->xsup_end; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; xlsub_end = Glu->xlsub_end; Astore = A->Store; a = Astore->nzval; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; marker1 = marker + m; repfnz_col = repfnz; dense_col = dense; nextp = 0; *nseg = 0; #if ( DEBUGlevel>=2 ) if (jcol == BADPAN) printf("(%d) pdgstrf_panel_dfs[begin] jcol %d, w %d\n", pnum, jcol, w); #endif /* * For each column in the panel ... */ for (jj = jcol; jj < jcol + w; ++jj, nextp += m) { nextl_col = nextp; col_marker = &spa_marker[nextp]; /* * For each nonz in A[*,jj] perform dfs ... */ for (k = xa_begin[jj]; k < xa_end[jj]; ++k) { krow = asub[k]; dense_col[krow] = a[k]; kmark = col_marker[krow]; /* if krow was visited before, go to the next nonzero */ if ( kmark == jj ) continue; /* * For each unmarked nbr krow of jj ... */ col_marker[krow] = jj; kperm = perm_r[krow]; if ( kperm == EMPTY ) { /* krow is in L: place it in structure of L[*,jj]. * NOTE: some entries in U may get here, because "perm_r" * is not yet available from a preceeding busy column. */ panel_lsub[nextl_col++] = krow; /* krow is indexed into A */ } else { /* * krow is in U (0 <= kperm < jcol): if its supernode * representative krep has been explored, update repfnz[*]. */ if ( lbusy[kperm] == jcol ) { /* kperm is busy */ #if ( DEBUGlevel>=3 ) if (jj == BADCOL) printf("(%d) pdgstrf_panel_dfs(%d) skip busy krow %d, kperm %d\n", pnum, jj, krow, kperm); #endif continue; } /* Here, krep cannot possibly be "busy" */ krep = SUPER_REP( supno[kperm] ); myfnz = repfnz_col[krep]; #ifdef CHK_DFS if (jj == BADCOL) printf("(%d) pdgstrf_panel_dfs[1] %d, krep %d, fsupc %d, Pr[krow %d] %d, myfnz %d\n", pnum, jj, krep, SUPER_FSUPC(supno[krep]), krow, kperm, myfnz); #endif if ( myfnz != EMPTY ) { /* Representative visited before */ if ( myfnz > kperm ) repfnz_col[krep] = kperm; /* continue; */ } else { /* Otherwise, performs dfs starting from krep */ parent[krep] = EMPTY; repfnz_col[krep] = kperm; if ( ispruned[krep] ) { if ( SINGLETON( supno[krep] ) ) xdfs = xlsub_end[krep]; else xdfs = xlsub[krep]; maxdfs = xprune[krep]; #ifdef PROFILE /*Gstat->procstat[pnum].pruned++;*/ #endif } else { fsupc = SUPER_FSUPC( supno[krep] ); xdfs = xlsub[fsupc] + krep-fsupc+1; maxdfs = xlsub_end[fsupc]; #ifdef PROFILE /*Gstat->procstat[pnum].unpruned++;*/ #endif } #ifdef CHK_DFS if (jj == BADCOL) { register int i; printf("(%d) pdgstrf_panel_dfs[2] %d, ispruned[%d] %d, xdfs %d, maxdfs %d\n", pnum, jj, krep, ispruned[krep], xdfs, maxdfs); /*for (i = xdfs; i < maxdfs; i++) printf("(%d) lsub-%d", pnum, lsub[i]);*/ printf("\n"); } #endif do { while ( xdfs < maxdfs ) { /* for each unmarked kchild of krep ... */ kchild = lsub[xdfs]; xdfs++; chmark = col_marker[kchild]; if ( chmark != jj ) { /* Not reached yet */ col_marker[kchild] = jj; chperm = perm_r[kchild]; if ( chperm == EMPTY ) { /* kchild is in L: place it in L[*,j]. */ panel_lsub[nextl_col++] = kchild; } else { /* kchild is in U (0 <= chperm < jcol): * chrep = its supernode-rep. If its rep * has been explored, update its repfnz[*]. */ if ( lbusy[chperm] == jcol ) { #ifdef DEBUG if (jj == BADCOL) printf("(%d) pdgstrf_panel_dfs(%d) skip busy kchild %d, chperm %d\n", pnum, jj, kchild, chperm); #endif continue; } chrep = SUPER_REP( supno[chperm] ); myfnz = repfnz_col[chrep]; #ifdef DEBUG if (jj == BADCOL) printf("(%d) pdgstrf_panel_dfs[3] %d, krep %d, Pr[kchild %d] %d, chrep %d, fsupc %d, myfnz %d\n", pnum, jj, krep, kchild, chperm, chrep, SUPER_FSUPC(supno[chrep]), myfnz); #endif if ( myfnz != EMPTY ) {/* Visited before */ if ( myfnz > chperm ) repfnz_col[chrep] = chperm; } else { /* Cont. dfs at snode-rep of kchild */ xplore[krep] = xdfs; xplore[m + krep] = maxdfs; parent[chrep] = krep; krep = chrep; /* Go deeper down G(L) */ repfnz_col[krep] = chperm; if ( ispruned[krep] ) { if ( SINGLETON( supno[krep] ) ) xdfs = xlsub_end[krep]; else xdfs = xlsub[krep]; maxdfs = xprune[krep]; #ifdef PROFILE /*procstat[pnum].pruned++;*/ #endif } else { fsupc = SUPER_FSUPC(supno[krep]); xdfs = xlsub[fsupc] + krep-fsupc+1; maxdfs = xlsub_end[fsupc]; #ifdef PROFILE /*procstat[pnum].unpruned++;*/ #endif } #ifdef CHK_DFS if (jj == BADCOL) printf("(%d) pdgstrf_panel_dfs[4] %d, ispruned[%d] %d, xdfs %d, maxdfs %d\n", pnum, jj, krep, ispruned[krep], xdfs, maxdfs); #endif } /* else */ } /* else */ } /* if... */ } /* while xdfs < maxdfs */ /* krow has no more unexplored nbrs: * Place snode-rep krep in postorder DFS, if this * segment is seen for the first time. (Note that * "repfnz[krep]" may change later.) * Backtrack dfs to its parent. */ if ( marker1[krep] != jcol ) { segrep[*nseg] = krep; ++(*nseg); marker1[krep] = jcol; #ifdef CHK_DFS if (jj == BADCOL) printf("(%d) pdgstrf_panel_dfs(%d) repfnz[%d] %d added to top.list by jj %d\n", pnum, jj, krep, repfnz_col[krep], jj); #endif } kpar = parent[krep]; /* Pop stack, mimic recursion */ if ( kpar == EMPTY ) break; /* dfs done */ krep = kpar; xdfs = xplore[krep]; maxdfs = xplore[m + krep]; #ifdef CHK_DFS if (jj == BADCOL) { register int i; printf("(%d) pdgstrf_panel_dfs[5] pop stack: %d, krep %d, xdfs %d, maxdfs %d\n", pnum, jj, krep, xdfs, maxdfs); /* for (i = xdfs; i < maxdfs; i++) printf("(%d) lsub-%d", pnum, lsub[i]);*/ printf("\n"); } #endif } while ( kpar != EMPTY ); /* until empty stack */ } /* else: myfnz == EMPTY */ } /* else: kperm != EMPTY */ } /* for each nonzero in A[*,jj] */ #if ( DEBUGlevel>=3 ) if (jj == BADCOL) { #define REPCOL 0 krep = REPCOL; printf("(%d) pdgstrf_panel_dfs(end) w_lsub_end[jj=%d] %d, repfnz_col[%d] %d\n", pnum, jj, nextl_col - nextp, krep, repfnz_col[krep]); PrintInt10("lsub", nextl_col - nextp, &panel_lsub[nextp]); } #endif w_lsub_end[jj-jcol] = nextl_col - nextp; repfnz_col += m; dense_col += m; } /* for jj ... */ }