int pcgstrf_column_bmod( const int pnum, /* process number */ const int jcol, /* current column in the panel */ const int fpanelc,/* first column in the panel */ const int nseg, /* number of s-nodes to update jcol */ int *segrep,/* in */ int *repfnz,/* in */ complex *dense, /* modified */ complex *tempv, /* working array */ pxgstrf_shared_t *pxgstrf_shared, /* modified */ Gstat_t *Gstat /* modified */ ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose: * ======== * Performs numeric block updates (sup-col) in topological order. * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. * Special processing on the supernodal portion of L\U[*,j]. * * Return value: * ============= * 0 - successful return * > 0 - number of bytes allocated when run out of space * */ #if ( MACH==CRAY_PVP ) _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), ftcs3 = _cptofcd("U", strlen("U")); #endif #ifdef USE_VENDOR_BLAS int incx = 1, incy = 1; complex alpha, beta; #endif GlobalLU_t *Glu = pxgstrf_shared->Glu; /* modified */ /* krep = representative of current k-th supernode * fsupc = first supernodal column * nsupc = no of columns in supernode * nsupr = no of rows in supernode (used as leading dimension) * luptr = location of supernodal LU-block in storage * kfnz = first nonz in the k-th supernodal segment * no_zeros = no of leading zeros in a supernodal U-segment */ complex ukj, ukj1, ukj2; register int lptr, kfnz, isub, irow, i, no_zeros; register int luptr, luptr1, luptr2; int fsupc, nsupc, nsupr, segsze; int nrow; /* No of rows in the matrix of matrix-vector */ int jsupno, k, ksub, krep, krep_ind, ksupno; int ufirst, nextlu; int fst_col; /* First column within small LU update */ int d_fsupc; /* Distance between the first column of the current panel and the first column of the current snode.*/ int *xsup, *supno; int *lsub, *xlsub, *xlsub_end; complex *lusup; int *xlusup, *xlusup_end; complex *tempv1; int mem_error; register float flopcnt; complex zero = {0.0, 0.0}; complex one = {1.0, 0.0}; complex none = {-1.0, 0.0}; complex comp_temp, comp_temp1; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; xlsub_end = Glu->xlsub_end; lusup = Glu->lusup; xlusup = Glu->xlusup; xlusup_end = Glu->xlusup_end; jsupno = supno[jcol]; /* * For each nonz supernode segment of U[*,j] in topological order */ k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep[k]; k--; ksupno = supno[krep]; #if ( DEBUGlvel>=2 ) if (jcol==BADCOL) printf("(%d) pcgstrf_column_bmod[1]: %d, nseg %d, krep %d, jsupno %d, ksupno %d\n", pnum, jcol, nseg, krep, jsupno, ksupno); #endif if ( jsupno != ksupno ) { /* Outside the rectangular supernode */ fsupc = xsup[ksupno]; fst_col = SUPERLU_MAX ( fsupc, fpanelc ); /* Distance from the current supernode to the current panel; d_fsupc=0 if fsupc >= fpanelc. */ d_fsupc = fst_col - fsupc; luptr = xlusup[fst_col] + d_fsupc; lptr = xlsub[fsupc] + d_fsupc; kfnz = repfnz[krep]; kfnz = SUPERLU_MAX ( kfnz, fpanelc ); segsze = krep - kfnz + 1; nsupc = krep - fst_col + 1; nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */ nrow = nsupr - d_fsupc - nsupc; krep_ind = lptr + nsupc - 1; flopcnt = segsze * (segsze - 1) + 2 * nrow * segsze;//sj Gstat->procstat[pnum].fcops += flopcnt; #if ( DEBUGlevel>=2 ) if (jcol==BADCOL) printf("(%d) pcgstrf_column_bmod[2]: %d, krep %d, kfnz %d, segsze %d, d_fsupc %d,\ fsupc %d, nsupr %d, nsupc %d\n", pnum, jcol, krep, kfnz, segsze, d_fsupc, fsupc, nsupr, nsupc); #endif /* * Case 1: Update U-segment of size 1 -- col-col update */ if ( segsze == 1 ) { ukj = dense[lsub[krep_ind]]; luptr += nsupr*(nsupc-1) + nsupc; for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) { irow = lsub[i]; cc_mult(&comp_temp, &ukj, &lusup[luptr]); c_sub(&dense[irow], &dense[irow], &comp_temp); luptr++; } } else if ( segsze <= 3 ) { ukj = dense[lsub[krep_ind]]; luptr += nsupr*(nsupc-1) + nsupc-1; ukj1 = dense[lsub[krep_ind - 1]]; luptr1 = luptr - nsupr; if ( segsze == 2 ) { /* Case 2: 2cols-col update */ cc_mult(&comp_temp, &ukj1, &lusup[luptr1]); c_sub(&ukj, &ukj, &comp_temp); dense[lsub[krep_ind]] = ukj; for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) { irow = lsub[i]; luptr++; luptr1++; cc_mult(&comp_temp, &ukj, &lusup[luptr]); cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&dense[irow], &dense[irow], &comp_temp); } } else { /* Case 3: 3cols-col update */ ukj2 = dense[lsub[krep_ind - 2]]; luptr2 = luptr1 - nsupr; cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]); c_sub(&ukj1, &ukj1, &comp_temp); cc_mult(&comp_temp, &ukj1, &lusup[luptr1]); cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&ukj, &ukj, &comp_temp); dense[lsub[krep_ind]] = ukj; dense[lsub[krep_ind-1]] = ukj1; for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) { irow = lsub[i]; luptr++; luptr1++; luptr2++; cc_mult(&comp_temp, &ukj, &lusup[luptr]); cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]); c_add(&comp_temp, &comp_temp, &comp_temp1); cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&dense[irow], &dense[irow], &comp_temp); } } } else { /* * Case: sup-col update * Perform a triangular solve and block update, * then scatter the result of sup-col update to dense */ no_zeros = kfnz - fst_col; /* Copy U[*,j] segment from dense[*] to tempv[*] */ isub = lptr + no_zeros; for (i = 0; i < segsze; i++) { irow = lsub[isub]; tempv[i] = dense[irow]; ++isub; } /* Dense triangular solve -- start effective triangle */ luptr += nsupr * no_zeros + no_zeros; #ifdef USE_VENDOR_BLAS #if ( MACH==CRAY_PVP ) CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], &nsupr, tempv, &incx ); #else ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], &nsupr, tempv, &incx ); #endif luptr += segsze; /* Dense matrix-vector */ tempv1 = &tempv[segsze]; alpha = one; beta = zero; #if ( MACH==CRAY_PVP ) CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], &nsupr, tempv, &incx, &beta, tempv1, &incy ); #else cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], &nsupr, tempv, &incx, &beta, tempv1, &incy ); #endif #else clsolve ( nsupr, segsze, &lusup[luptr], tempv ); luptr += segsze; /* Dense matrix-vector */ tempv1 = &tempv[segsze]; cmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1); #endif /* Scatter tempv[] into SPA dense[*] */ isub = lptr + no_zeros; for (i = 0; i < segsze; i++) { irow = lsub[isub]; dense[irow] = tempv[i]; /* Scatter */ tempv[i] = zero; isub++; } /* Scatter tempv1[] into SPA dense[*] */ for (i = 0; i < nrow; i++) { irow = lsub[isub]; c_sub(&dense[irow], &dense[irow], &tempv1[i]); tempv1[i] = zero; ++isub; } } /* else segsze >= 4 */ } /* if jsupno ... */ } /* for each segment... */ /* ------------------------------------------ Process the supernodal portion of L\U[*,j] ------------------------------------------ */ fsupc = SUPER_FSUPC (jsupno); nsupr = xlsub_end[fsupc] - xlsub[fsupc]; if ( (mem_error = Glu_alloc(pnum, jcol, nsupr, LUSUP, &nextlu, pxgstrf_shared)) ) return mem_error; xlusup[jcol] = nextlu; lusup = Glu->lusup; /* Gather the nonzeros from SPA dense[*,j] into L\U[*,j] */ for (isub = xlsub[fsupc]; isub < xlsub_end[fsupc]; ++isub) { irow = lsub[isub]; lusup[nextlu] = dense[irow]; dense[irow] = zero; #ifdef DEBUG if (jcol == -1) printf("(%d) pcgstrf_column_bmod[lusup] jcol %d, irow %d, lusup %.10e\n", pnum, jcol, irow, lusup[nextlu]); #endif ++nextlu; } xlusup_end[jcol] = nextlu; /* close L\U[*,jcol] */ #if ( DEBUGlevel>=2 ) if (jcol == -1) { nrow = xlusup_end[jcol] - xlusup[jcol]; print_double_vec("before sup-col update", nrow, &lsub[xlsub[fsupc]], &lusup[xlusup[jcol]]); } #endif /* * For more updates within the panel (also within the current supernode), * should start from the first column of the panel, or the first column * of the supernode, whichever is bigger. There are 2 cases: * (1) fsupc < fpanelc, then fst_col := fpanelc * (2) fsupc >= fpanelc, then fst_col := fsupc */ fst_col = SUPERLU_MAX ( fsupc, fpanelc ); if ( fst_col < jcol ) { /* distance between the current supernode and the current panel; d_fsupc=0 if fsupc >= fpanelc. */ d_fsupc = fst_col - fsupc; lptr = xlsub[fsupc] + d_fsupc; luptr = xlusup[fst_col] + d_fsupc; nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */ nsupc = jcol - fst_col; /* Excluding jcol */ nrow = nsupr - d_fsupc - nsupc; /* points to the beginning of jcol in supernode L\U[*,jsupno] */ ufirst = xlusup[jcol] + d_fsupc; #if ( DEBUGlevel>=2 ) if (jcol==BADCOL) printf("(%d) pcgstrf_column_bmod[3] jcol %d, fsupc %d, nsupr %d, nsupc %d, nrow %d\n", pnum, jcol, fsupc, nsupr, nsupc, nrow); #endif flopcnt = nsupc * (nsupc - 1) + 2 * nrow * nsupc; //sj Gstat->procstat[pnum].fcops += flopcnt; /* ops[TRSV] += nsupc * (nsupc - 1); ops[GEMV] += 2 * nrow * nsupc; */ #ifdef USE_VENDOR_BLAS alpha = none; beta = one; /* y := beta*y + alpha*A*x */ #if ( MACH==CRAY_PVP ) CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, &lusup[ufirst], &incx ); CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); #else ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, &lusup[ufirst], &incx ); cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); #endif #else clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] ); cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], &lusup[ufirst], tempv ); /* Copy updates from tempv[*] into lusup[*] */ isub = ufirst + nsupc; for (i = 0; i < nrow; i++) { c_sub(&lusup[isub], &lusup[isub], &tempv[i]); tempv[i] = zero; ++isub; } #endif } /* if fst_col < jcol ... */ return 0; }
int pzgstrf_column_dfs( const int pnum, /* process number */ const int m, /* number of rows in the matrix */ const int jcol, /* current column in the panel */ const int fstcol, /* first column in the panel */ int *perm_r, /* row pivotings that are done so far */ int *ispruned, /* in */ int *col_lsub, /* the RHS vector to start the dfs */ int lsub_end, /* size of col_lsub[] */ int *super_bnd,/* supernode partition by upper bound */ int *nseg, /* modified - with new segments appended */ int *segrep, /* modified - with new segments appended */ int *repfnz, /* modified */ int *xprune, /* modified */ int *marker2, /* modified */ int *parent, /* working array */ int *xplore, /* working array */ pxgstrf_shared_t *pxgstrf_shared /* modified */ ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose * ======= * pzgstrf_column_dfs() performs a symbolic factorization on column jcol, * and detects whether column jcol belongs in the same supernode as jcol-1. * * Local parameters * ================ * A supernode representative is the last column of a supernode. * The nonzeros in U[*,j] are segments that end at supernodal * representatives. The routine returns a list of such supernodal * representatives in topological order of the dfs that generates them. * The location of the first nonzero in each such supernodal segment * (supernodal entry location) is also returned. * * nseg: no of segments in current U[*,j] * samesuper: samesuper=NO if column j does not belong in the same * supernode as j-1. Otherwise, samesuper=YES. * * marker2: A-row --> A-row/col (0/1) * repfnz: SuperA-col --> PA-row * parent: SuperA-col --> SuperA-col * xplore: SuperA-col --> index to L-structure * * Return value * ============ * 0 success; * > 0 number of bytes allocated when run out of space. * */ GlobalLU_t *Glu = pxgstrf_shared->Glu; /* modified */ Gstat_t *Gstat = pxgstrf_shared->Gstat; /* modified */ register int jcolm1, jcolm1size, nextl, ifrom; register int k, krep, krow, kperm, samesuper, nsuper; register int no_lsub; int fsupc; /* first column in a supernode */ int myfnz; /* first nonz column in a U-segment */ int chperm, chmark, chrep, kchild; int xdfs, maxdfs, kpar; int ito; /* Used to compress row subscripts */ int mem_error; int *xsup, *xsup_end, *supno, *lsub, *xlsub, *xlsub_end; static int first = 1, maxsuper; if ( first ) { maxsuper = sp_ienv(3); first = 0; } /* Initialize pointers */ xsup = Glu->xsup; xsup_end = Glu->xsup_end; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; xlsub_end = Glu->xlsub_end; jcolm1 = jcol - 1; nextl = lsub_end; no_lsub = 0; samesuper = YES; /* Test whether the row structure of column jcol is contained in that of column jcol-1. */ for (k = 0; k < lsub_end; ++k) { krow = col_lsub[k]; if ( perm_r[krow] == EMPTY ) { /* krow is in L */ ++no_lsub; if (marker2[krow] != jcolm1) samesuper = NO; /* row subset test */ marker2[krow] = jcol; } } #if ( DEBUGlevel>=2 ) if (jcol == BADCOL) printf("(%d) pzgstrf_column_dfs[1] %d, fstcol %d, lsub_end %d, no_lsub %d, samesuper? %d\n", pnum, jcol, fstcol, lsub_end, no_lsub, samesuper); #endif /* * For each nonzero in A[fstcol:n,jcol] perform DFS ... */ for (k = 0; k < lsub_end; ++k) { krow = col_lsub[k]; /* if krow was visited before, go to the next nonzero */ if ( marker2[krow] == jcol ) continue; marker2[krow] = jcol; kperm = perm_r[krow]; #if ( DEBUGlevel>=3 ) if (jcol == BADCOL) printf("(%d) pzgstrf_column_dfs[inner]: perm_r[krow=%d] %d\n", pnum, krow, kperm); #endif /* Ignore the nonzeros in U corresponding to the busy columns during the panel DFS. */ /*if ( lbusy[kperm] != fstcol ) { xiaoye? */ if ( kperm >= fstcol ) { /* * krow is in U: if its supernode representative krep * has been explored, update repfnz[*]. */ krep = SUPER_REP(supno[kperm]); myfnz = repfnz[krep]; #if ( DEBUGlevel>=3 ) if (jcol == BADCOL) printf("(%d) pzgstrf_column_dfs[inner-U]: krep %d, myfnz %d, kperm %d\n", pnum, krep, myfnz, kperm); #endif if ( myfnz != EMPTY ) { /* Visited before */ if ( myfnz > kperm ) repfnz[krep] = kperm; /* continue; */ } else { /* Otherwise, perform dfs starting at krep */ parent[krep] = EMPTY; repfnz[krep] = kperm; if ( ispruned[krep] ) { if ( SINGLETON( supno[krep] ) ) xdfs = xlsub_end[krep]; else xdfs = xlsub[krep]; maxdfs = xprune[krep]; #ifdef PROFILE Gstat->procstat[pnum].pruned++; #endif } else { fsupc = SUPER_FSUPC( supno[krep] ); xdfs = xlsub[fsupc] + krep-fsupc+1; maxdfs = xlsub_end[fsupc]; #ifdef PROFILE Gstat->procstat[pnum].unpruned++; #endif } do { /* * For each unmarked kchild of krep ... */ while ( xdfs < maxdfs ) { kchild = lsub[xdfs]; xdfs++; chmark = marker2[kchild]; if ( chmark != jcol ) { /* Not reached yet */ marker2[kchild] = jcol; chperm = perm_r[kchild]; if ( chperm == EMPTY ) { /* kchild is in L: place it in L[*,k]. */ ++no_lsub; col_lsub[nextl++] = kchild; if (chmark != jcolm1) samesuper = NO; } else { /* kchild is in U: chrep = its supernode * representative. If its rep has * been explored, update its repfnz[*]. */ chrep = SUPER_REP( supno[chperm] ); myfnz = repfnz[chrep]; if ( myfnz != EMPTY ) { /* Visited before */ if ( myfnz > chperm ) repfnz[chrep] = chperm; } else { /* Continue dfs at super-rep of kchild */ xplore[krep] = xdfs; xplore[m + krep] = maxdfs; parent[chrep] = krep; krep = chrep; /* Go deeper down G(L^t) */ repfnz[krep] = chperm; if ( ispruned[krep] ) { if ( SINGLETON( supno[krep] ) ) xdfs = xlsub_end[krep]; else xdfs = xlsub[krep]; maxdfs = xprune[krep]; #ifdef PROFILE Gstat->procstat[pnum].pruned++; #endif } else { fsupc = SUPER_FSUPC( supno[krep] ); xdfs = xlsub[fsupc] + krep-fsupc+1; maxdfs = xlsub_end[fsupc]; #ifdef PROFILE Gstat->procstat[pnum].unpruned++; #endif } } } /* else */ } /* if */ } /* while */ /* krow has no more unexplored nbrs: * place supernode-rep krep in postorder DFS, * backtrack dfs to its parent. */ segrep[*nseg] = krep; ++(*nseg); #if ( DEBUGlevel>=3 ) if (jcol == BADCOL) printf("(%d) pzgstrf_column_dfs[inner-dfs] new nseg %d, repfnz[krep=%d] %d\n", pnum, *nseg, krep, repfnz[krep]); #endif kpar = parent[krep]; /* Pop from stack, mimic recursion */ if ( kpar == EMPTY ) break; /* dfs done */ krep = kpar; xdfs = xplore[krep]; maxdfs = xplore[m + krep]; } while ( kpar != EMPTY ); /* Do ... until empty stack */ } /* else myfnz ... */ } /* if kperm >= fstcol ... */ } /* for each nonzero ... */ #if ( DEBUGlevel>=3 ) if (jcol == BADCOL) printf("(%d) pzgstrf_column_dfs[2]: nextl %d, samesuper? %d\n", pnum, nextl, samesuper); #endif /* assert(no_lsub == nextl - no_usub);*/ /* --------------------------------------------------------- Check to see if j belongs in the same supernode as j-1. --------------------------------------------------------- */ /* Does it matter if jcol == 0? - xiaoye */ if ( samesuper == YES ) { nsuper = supno[jcolm1]; jcolm1size = xlsub_end[jcolm1] - xlsub[jcolm1]; #if ( DEBUGlevel>=3 ) if (jcol == BADCOL) printf("(%d) pzgstrf_column_dfs[YES] jcol-1 %d, jcolm1size %d, supno[%d] %d\n", pnum, jcolm1, jcolm1size, jcolm1, nsuper); #endif if ( no_lsub != jcolm1size-1 ) samesuper = NO; /* Enforce T2 supernode */ else { /* Make sure the number of columns in a supernode does not exceed threshold. */ fsupc = xsup[nsuper]; if ( jcol - fsupc >= maxsuper ) samesuper = NO; else { /* start of a supernode in H (coarser partition) */ if ( super_bnd[jcol] != 0 ) samesuper = NO; } } } /* If jcol starts a new supernode, allocate storage for * the subscript set of both first and last column of * a previous supernode. (first for num values, last for pruning) */ if ( samesuper == NO ) { /* starts a new supernode */ nsuper = NewNsuper(pnum, pxgstrf_shared, &Glu->nsuper); xsup[nsuper] = jcol; /* Copy column jcol; also reserve space to store pruned graph */ if ((mem_error = Glu_alloc(pnum, jcol, 2*no_lsub, LSUB, &ito, pxgstrf_shared))) return mem_error; xlsub[jcol] = ito; lsub = Glu->lsub; for (ifrom = 0; ifrom < nextl; ++ifrom) { krow = col_lsub[ifrom]; if ( perm_r[krow] == EMPTY ) /* Filter U-subscript */ lsub[ito++] = krow; } k = ito; xlsub_end[jcol] = k; /* Make a copy in case it is a singleton supernode */ for (ifrom = xlsub[jcol]; ifrom < ito; ++ifrom) lsub[k++] = lsub[ifrom]; } else { /* Supernode of size > 1: overwrite column jcol-1 */ k = xlsub_end[fsupc]; xlsub[jcol] = k; xprune[fsupc] = k; for (ifrom = 0; ifrom < nextl; ++ifrom) { krow = col_lsub[ifrom]; if ( perm_r[krow] == EMPTY ) /* Filter U-subscript */ lsub[k++] = krow; } xlsub_end[jcol] = k; } #if ( DEBUGlevel>=3 ) if (jcol == BADCOL) { printf("(%d) pzgstrf_column_dfs[3]: %d in prev s-node %d? %d\n", pnum, jcol, fsupc, samesuper); PrintInt10("lsub", xlsub_end[jcol]-xlsub[jcol], &lsub[xlsub[jcol]]); } #endif /* Tidy up the pointers before exit */ xprune[jcol] = k; /* upper bound for pruning */ supno[jcol] = nsuper; xsup_end[nsuper] = jcol + 1; return 0; }
int pdgstrf_copy_to_ucol( const int pnum, /* process number */ const int jcol, /* current column */ const int nseg, /* number of U-segments */ const int *segrep, /* in */ const int *repfnz, /* in */ const int *perm_r, /* in */ double *dense, /* modified - reset to zero on exit */ pxgstrf_shared_t *pxgstrf_shared /* modified */ ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Gather the nonzeros from SPA dense[*,jcol] into global ucol[*]. */ register int ksub, krep, ksupno, i, k, kfnz, segsze; register int fsupc, isub, irow, jsupno, colsize; int nextu, mem_error; int *xsup, *supno, *lsub, *xlsub, *usub; double *ucol; GlobalLU_t *Glu = pxgstrf_shared->Glu; /* modified */ double zero = 0.0; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; jsupno = supno[jcol]; /* find the size of column jcol */ colsize = 0; k = nseg - 1; for (ksub = 0; ksub < nseg; ++ksub) { krep = segrep[k--]; ksupno = supno[krep]; if ( ksupno != jsupno ) { /* should go into ucol[] */ kfnz = repfnz[krep]; if ( kfnz != EMPTY ) /* nonzero U-segment */ colsize += krep - kfnz + 1;; } } /* for each segment... */ if ( (mem_error = Glu_alloc(pnum, jcol, colsize, UCOL, &nextu, pxgstrf_shared)) ) return mem_error; Glu->xusub[jcol] = nextu; ucol = Glu->ucol; usub = Glu->usub; /* Now, it does not have to be in topological order! */ k = nseg - 1; for (ksub = 0; ksub < nseg; ++ksub) { krep = segrep[k--]; ksupno = supno[krep]; if ( ksupno != jsupno ) { /* should go into ucol[] */ kfnz = repfnz[krep]; if ( kfnz != EMPTY ) { /* nonzero U-segment */ fsupc = xsup[ksupno]; isub = xlsub[fsupc] + kfnz - fsupc; segsze = krep - kfnz + 1; #pragma ivdep for (i = 0; i < segsze; i++) { irow = lsub[isub]; usub[nextu] = perm_r[irow]; ucol[nextu] = dense[irow]; dense[irow] = zero; #ifdef DEBUG if (jcol == EMPTY) printf("(%d) pcopy_to_ucol[]: jcol %d, krep %d, irow %d, ucol %.10e\n", ME, jcol, krep, irow, ucol[nextu]); #endif nextu++; isub++; } } } } /* for each segment... */ Glu->xusub_end[jcol] = nextu; /* close U[*,jcol] */ return 0; }
int psgstrf_snode_dfs( const int pnum, /* process number */ const int jcol, /* in - start of the supernode */ const int kcol, /* in - end of the supernode */ const int *asub, /* in */ const int *xa_begin, /* in */ const int *xa_end, /* in */ int *xprune, /* out */ int *marker, /* modified */ int *col_lsub, /* values are irrelevant on entry and on return */ pxgstrf_shared_t *pxgstrf_shared /* modified */ ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose * ======= * psgstrf_snode_dfs() determines the union of the row structures of * those columns within the relaxed snode. * Note: The relaxed snodes are leaves of the supernodal etree, * therefore, the portion outside the rectangular supernode must be zero. * * Return value * ============ * 0 success; * >0 number of bytes allocated when run out of memory. * */ GlobalLU_t *Glu = pxgstrf_shared->Glu; register int i, k, ifrom, nextl, nsuper; int ito; int krow, kmark, mem_error; int *supno, *lsub, *xlsub, *xlsub_end; supno = Glu->supno; xlsub = Glu->xlsub; xlsub_end = Glu->xlsub_end; nsuper = NewNsuper(pnum, pxgstrf_shared, &Glu->nsuper); Glu->xsup[nsuper] = jcol; Glu->xsup_end[nsuper] = kcol + 1; nextl = 0; for (i = jcol; i <= kcol; i++) { /* for each nonzero in A[*,i] */ for (k = xa_begin[i]; k < xa_end[i]; k++) { krow = asub[k]; kmark = marker[krow]; if ( kmark != kcol ) { /* First time visit krow */ marker[krow] = kcol; col_lsub[nextl++] = krow; } } supno[i] = nsuper; } if ( (mem_error = Glu_alloc(pnum, jcol, 2*nextl, LSUB, &ito, pxgstrf_shared)) ) return mem_error; xlsub[jcol] = ito; lsub = Glu->lsub; for (ifrom = 0; ifrom < nextl; ++ifrom) lsub[ito++] = col_lsub[ifrom]; xlsub_end[jcol] = ito; return 0; }
int pcgstrf_factor_snode( const int pnum, /* process number */ const int jcol, SuperMatrix *A, const float diag_pivot_thresh, yes_no_t *usepr, int *perm_r, int *inv_perm_r, /* modified */ int *inv_perm_c, /* in - used to find diagonal of Pc*A*Pc' */ int *xprune, int *marker, int *col_lsub, /* values are irrevelant on entry and on return */ complex *dense, complex *tempv, pxgstrf_shared_t *pxgstrf_shared, int *info ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose * ======= * * Factorize the artificial supernodes grouped at the bottom * of the etree. * */ GlobalLU_t *Glu = pxgstrf_shared->Glu; int singular; NCPformat *Astore; register int kcol, icol, k, jsupno, fsupc, nsupr; register int ifrom, ito; int nextu, nextlu; int pivrow; complex *a; int *asub, *xa_begin, *xa_end, *xusub, *xusub_end, *xsup, *supno, *xlusup, *lsub, *xlsub, *xlsub_end; lsub = Glu->lsub; xlsub = Glu->xlsub; xlsub_end = Glu->xlsub_end; xusub = Glu->xusub; xusub_end = Glu->xusub_end; xsup = Glu->xsup; supno = Glu->supno; xlusup = Glu->xlusup; singular = 0; Astore = A->Store; a = Astore->nzval; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; kcol = jcol + pxgstrf_shared->pan_status[jcol].size; /* Determine the union of the row structure of the supernode */ if ( (*info = pcgstrf_snode_dfs(pnum, jcol, kcol-1, asub, xa_begin, xa_end, xprune, marker, col_lsub, pxgstrf_shared)) ) return 0; /* * Factorize the relaxed supernode (jcol:kcol-1) */ nextu = Glu->nextu; /* xiaoye - race condition (no problem!) */ jsupno = supno[jcol]; fsupc = xsup[jsupno]; nsupr = xlsub_end[fsupc] - xlsub[fsupc]; if ( (*info = Glu_alloc(pnum, jcol, nsupr*(kcol-jcol), LUSUP, &nextlu, pxgstrf_shared)) ) return 0; for (icol = jcol; icol < kcol; icol++) { xusub[icol] = xusub_end[icol] = nextu; xlusup[icol] = nextlu; /* Scatter into SPA dense[*] */ for (k = xa_begin[icol]; k < xa_end[icol]; k++) dense[asub[k]] = a[k]; /* Numeric update within the supernode */ pcgstrf_snode_bmod(pnum, icol, jsupno, fsupc, dense, tempv, Glu, pxgstrf_shared->Gstat); if ( (*info = pcgstrf_pivotL (pnum, icol, diag_pivot_thresh, usepr, perm_r, inv_perm_r, inv_perm_c, &pivrow, Glu, pxgstrf_shared->Gstat)) ) if ( singular == 0 ) singular = *info; nextlu += nsupr; #if ( DEBUGlevel>= 2 ) if ( icol>=LOCOL && icol<=HICOL ) dprint_lu_col(pnum,"relax:",jcol,icol,kcol-jcol,pivrow,xprune,Glu); #endif } /* Store the row subscripts of kcol-1 for pruned graph */ k = ito = xlsub_end[jcol]; for (ifrom = xlsub[jcol]+kcol-jcol-1; ifrom < k; ++ifrom) lsub[ito++] = lsub[ifrom]; k = ito; xprune[kcol-1] = k; if (jcol < kcol-1) { /* not a singleton */ for (icol = jcol+1; icol < kcol; ++icol) xlsub_end[icol] = k; k = xlsub_end[jcol]; xprune[jcol] = k; for (icol = jcol+1; icol < kcol; ++icol) xlsub[icol] = k; } *info = singular; return 0; }