int ilu_csnode_dfs( const int jcol, /* in - start of the supernode */ const int kcol, /* in - end of the supernode */ const int *asub, /* in */ const int *xa_begin, /* in */ const int *xa_end, /* in */ int *marker, /* modified */ GlobalLU_t *Glu /* modified */ ) { register int i, k, nextl; int nsuper, krow, kmark, mem_error; int *xsup, *supno; int *lsub, *xlsub; int nzlmax; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; nzlmax = Glu->nzlmax; nsuper = ++supno[jcol]; /* Next available supernode number */ nextl = xlsub[jcol]; for (i = jcol; i <= kcol; i++) { /* For each nonzero in A[*,i] */ for (k = xa_begin[i]; k < xa_end[i]; k++) { krow = asub[k]; kmark = marker[krow]; if ( kmark != kcol ) { /* First time visit krow */ marker[krow] = kcol; lsub[nextl++] = krow; if ( nextl >= nzlmax ) { if ( (mem_error = cLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu)) != 0) return (mem_error); lsub = Glu->lsub; } } } supno[i] = nsuper; } /* Supernode > 1 */ if ( jcol < kcol ) for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl; xsup[nsuper+1] = kcol + 1; supno[kcol+1] = nsuper; xlsub[kcol+1] = nextl; return 0; }
void cgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size, int *etree, void *work, int lwork, int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info) { /* Local working arrays */ NCPformat *Astore; int *iperm_r = NULL; /* inverse of perm_r; used when options->Fact == SamePattern_SameRowPerm */ int *iperm_c; /* inverse of perm_c */ int *swap, *iswap; /* swap is used to store the row permutation during the factorization. Initially, it is set to iperm_c (row indeces of Pc*A*Pc'). iswap is the inverse of swap. After the factorization, it is equal to perm_r. */ int *iwork; complex *cwork; int *segrep, *repfnz, *parent, *xplore; int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ int *marker, *marker_relax; complex *dense, *tempv; float *stempv; int *relax_end, *relax_fsupc; complex *a; int *asub; int *xa_begin, *xa_end; int *xsup, *supno; int *xlsub, *xlusup, *xusub; int nzlumax; float *amax; complex drop_sum; float alpha, omega; /* used in MILU, mimicing DRIC */ static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ float *swork2; /* used by the second dropping rule */ /* Local scalars */ fact_t fact = options->Fact; double diag_pivot_thresh = options->DiagPivotThresh; double drop_tol = options->ILU_DropTol; /* tau */ double fill_ini = options->ILU_FillTol; /* tau^hat */ double gamma = options->ILU_FillFactor; int drop_rule = options->ILU_DropRule; milu_t milu = options->ILU_MILU; double fill_tol; int pivrow; /* pivotal row number in the original matrix A */ int nseg1; /* no of segments in U-column above panel row jcol */ int nseg; /* no of segments in each U-column */ register int jcol; register int kcol; /* end column of a relaxed snode */ register int icol; register int i, k, jj, new_next, iinfo; int m, n, min_mn, jsupno, fsupc, nextlu, nextu; int w_def; /* upper bound on panel width */ int usepr, iperm_r_allocated = 0; int nnzL, nnzU; int *panel_histo = stat->panel_histo; flops_t *ops = stat->ops; int last_drop;/* the last column which the dropping rules applied */ int quota; int nnzAj; /* number of nonzeros in A(:,1:j) */ int nnzLj, nnzUj; double tol_L = drop_tol, tol_U = drop_tol; complex zero = {0.0, 0.0}; float one = 1.0; /* Executable */ iinfo = 0; m = A->nrow; n = A->ncol; min_mn = SUPERLU_MIN(m, n); Astore = A->Store; a = Astore->nzval; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; /* Allocate storage common to the factor routines */ *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size, gamma, L, U, &Glu, &iwork, &cwork); if ( *info ) return; xsup = Glu.xsup; supno = Glu.supno; xlsub = Glu.xlsub; xlusup = Glu.xlusup; xusub = Glu.xusub; SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, &repfnz, &panel_lsub, &marker_relax, &marker); cSetRWork(m, panel_size, cwork, &dense, &tempv); usepr = (fact == SamePattern_SameRowPerm); if ( usepr ) { /* Compute the inverse of perm_r */ iperm_r = (int *) intMalloc(m); for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; iperm_r_allocated = 1; } iperm_c = (int *) intMalloc(n); for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; swap = (int *)intMalloc(n); for (k = 0; k < n; k++) swap[k] = iperm_c[k]; iswap = (int *)intMalloc(n); for (k = 0; k < n; k++) iswap[k] = perm_c[k]; amax = (float *) floatMalloc(panel_size); if (drop_rule & DROP_SECONDARY) swork2 = (float *)floatMalloc(n); else swork2 = NULL; nnzAj = 0; nnzLj = 0; nnzUj = 0; last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95)); alpha = pow((double)n, -1.0 / options->ILU_MILU_Dim); /* Identify relaxed snodes */ relax_end = (int *) intMalloc(n); relax_fsupc = (int *) intMalloc(n); if ( options->SymmetricMode == YES ) ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); else ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); ifill (perm_r, m, EMPTY); ifill (marker, m * NO_MARKER, EMPTY); supno[0] = -1; xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; w_def = panel_size; /* Mark the rows used by relaxed supernodes */ ifill (marker_relax, m, EMPTY); i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end, asub, marker_relax); #if ( PRNTlevel >= 1) printf("%d relaxed supernodes.\n", i); #endif /* * Work on one "panel" at a time. A panel is one of the following: * (a) a relaxed supernode at the bottom of the etree, or * (b) panel_size contiguous columns, defined by the user */ for (jcol = 0; jcol < min_mn; ) { if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ kcol = relax_end[jcol]; /* end of the relaxed snode */ panel_histo[kcol-jcol+1]++; /* Drop small rows in the previous supernode. */ if (jcol > 0 && jcol < last_drop) { int first = xsup[supno[jcol - 1]]; int last = jcol - 1; int quota; /* Compute the quota */ if (drop_rule & DROP_PROWS) quota = gamma * Astore->nnz / m * (m - first) / m * (last - first + 1); else if (drop_rule & DROP_COLUMN) { int i; quota = 0; for (i = first; i <= last; i++) quota += xa_end[i] - xa_begin[i]; quota = gamma * quota * (m - first) / m; } else if (drop_rule & DROP_AREA) quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - nnzLj; else quota = m * n; fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn); /* Drop small rows */ stempv = (float *) tempv; i = ilu_cdrop_row(options, first, last, tol_L, quota, &nnzLj, &fill_tol, &Glu, stempv, swork2, 0); /* Reset the parameters */ if (drop_rule & DROP_DYNAMIC) { if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) < nnzLj) tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); else tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); } if (fill_tol < 0) iinfo -= (int)fill_tol; #ifdef DEBUG num_drop_L += i * (last - first + 1); #endif } /* -------------------------------------- * Factorize the relaxed supernode(jcol:kcol) * -------------------------------------- */ /* Determine the union of the row structure of the snode */ if ( (*info = ilu_csnode_dfs(jcol, kcol, asub, xa_begin, xa_end, marker, &Glu)) != 0 ) return; nextu = xusub[jcol]; nextlu = xlusup[jcol]; jsupno = supno[jcol]; fsupc = xsup[jsupno]; new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); nzlumax = Glu.nzlumax; while ( new_next > nzlumax ) { if ((*info = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu))) return; } for (icol = jcol; icol <= kcol; icol++) { xusub[icol+1] = nextu; amax[0] = 0.0; /* Scatter into SPA dense[*] */ for (k = xa_begin[icol]; k < xa_end[icol]; k++) { register float tmp = c_abs1 (&a[k]); if (tmp > amax[0]) amax[0] = tmp; dense[asub[k]] = a[k]; } nnzAj += xa_end[icol] - xa_begin[icol]; if (amax[0] == 0.0) { amax[0] = fill_ini; #if ( PRNTlevel >= 1) printf("Column %d is entirely zero!\n", icol); fflush(stdout); #endif } /* Numeric update within the snode */ csnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat); if (usepr) pivrow = iperm_r[icol]; fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn); if ( (*info = ilu_cpivotL(icol, diag_pivot_thresh, &usepr, perm_r, iperm_c[icol], swap, iswap, marker_relax, &pivrow, amax[0] * fill_tol, milu, zero, &Glu, stat)) ) { iinfo++; marker[pivrow] = kcol; } } jcol = kcol + 1; } else { /* Work on one panel of panel_size columns */ /* Adjust panel_size so that a panel won't overlap with the next * relaxed snode. */ panel_size = w_def; for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) if ( relax_end[k] != EMPTY ) { panel_size = k - jcol; break; } if ( k == min_mn ) panel_size = min_mn - jcol; panel_histo[panel_size]++; /* symbolic factor on a panel of columns */ ilu_cpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, dense, amax, panel_lsub, segrep, repfnz, marker, parent, xplore, &Glu); /* numeric sup-panel updates in topological order */ cpanel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, &Glu, stat); /* Sparse LU within the panel, and below panel diagonal */ for (jj = jcol; jj < jcol + panel_size; jj++) { k = (jj - jcol) * m; /* column index for w-wide arrays */ nseg = nseg1; /* Begin after all the panel segments */ nnzAj += xa_end[jj] - xa_begin[jj]; if ((*info = ilu_ccolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k], segrep, &repfnz[k], marker, parent, xplore, &Glu))) return; /* Numeric updates */ if ((*info = ccolumn_bmod(jj, (nseg - nseg1), &dense[k], tempv, &segrep[nseg1], &repfnz[k], jcol, &Glu, stat)) != 0) return; /* Make a fill-in position if the column is entirely zero */ if (xlsub[jj + 1] == xlsub[jj]) { register int i, row; int nextl; int nzlmax = Glu.nzlmax; int *lsub = Glu.lsub; int *marker2 = marker + 2 * m; /* Allocate memory */ nextl = xlsub[jj] + 1; if (nextl >= nzlmax) { int error = cLUMemXpand(jj, nextl, LSUB, &nzlmax, &Glu); if (error) { *info = error; return; } lsub = Glu.lsub; } xlsub[jj + 1]++; assert(xlusup[jj]==xlusup[jj+1]); xlusup[jj + 1]++; Glu.lusup[xlusup[jj]] = zero; /* Choose a row index (pivrow) for fill-in */ for (i = jj; i < n; i++) if (marker_relax[swap[i]] <= jj) break; row = swap[i]; marker2[row] = jj; lsub[xlsub[jj]] = row; #ifdef DEBUG printf("Fill col %d.\n", jj); fflush(stdout); #endif } /* Computer the quota */ if (drop_rule & DROP_PROWS) quota = gamma * Astore->nnz / m * jj / m; else if (drop_rule & DROP_COLUMN) quota = gamma * (xa_end[jj] - xa_begin[jj]) * (jj + 1) / m; else if (drop_rule & DROP_AREA) quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj; else quota = m; /* Copy the U-segments to ucol[*] and drop small entries */ if ((*info = ilu_ccopy_to_ucol(jj, nseg, segrep, &repfnz[k], perm_r, &dense[k], drop_rule, milu, amax[jj - jcol] * tol_U, quota, &drop_sum, &nnzUj, &Glu, swork2)) != 0) return; /* Reset the dropping threshold if required */ if (drop_rule & DROP_DYNAMIC) { if (gamma * 0.9 * nnzAj * 0.5 < nnzLj) tol_U = SUPERLU_MIN(1.0, tol_U * 2.0); else tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5); } if (drop_sum.r != 0.0 && drop_sum.i != 0.0) { omega = SUPERLU_MIN(2.0*(1.0-alpha)/c_abs1(&drop_sum), 1.0); cs_mult(&drop_sum, &drop_sum, omega); } if (usepr) pivrow = iperm_r[jj]; fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn); if ( (*info = ilu_cpivotL(jj, diag_pivot_thresh, &usepr, perm_r, iperm_c[jj], swap, iswap, marker_relax, &pivrow, amax[jj - jcol] * fill_tol, milu, drop_sum, &Glu, stat)) ) { iinfo++; marker[m + pivrow] = jj; marker[2 * m + pivrow] = jj; } /* Reset repfnz[] for this column */ resetrep_col (nseg, segrep, &repfnz[k]); /* Start a new supernode, drop the previous one */ if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) { int first = xsup[supno[jj - 1]]; int last = jj - 1; int quota; /* Compute the quota */ if (drop_rule & DROP_PROWS) quota = gamma * Astore->nnz / m * (m - first) / m * (last - first + 1); else if (drop_rule & DROP_COLUMN) { int i; quota = 0; for (i = first; i <= last; i++) quota += xa_end[i] - xa_begin[i]; quota = gamma * quota * (m - first) / m; } else if (drop_rule & DROP_AREA) quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - nnzLj; else quota = m * n; fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / (double)min_mn); /* Drop small rows */ stempv = (float *) tempv; i = ilu_cdrop_row(options, first, last, tol_L, quota, &nnzLj, &fill_tol, &Glu, stempv, swork2, 1); /* Reset the parameters */ if (drop_rule & DROP_DYNAMIC) { if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) < nnzLj) tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); else tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); } if (fill_tol < 0) iinfo -= (int)fill_tol; #ifdef DEBUG num_drop_L += i * (last - first + 1); #endif } /* if start a new supernode */ } /* for */ jcol += panel_size; /* Move to the next panel */ } /* else */ } /* for */ *info = iinfo; if ( m > n ) { k = 0; for (i = 0; i < m; ++i) if ( perm_r[i] == EMPTY ) { perm_r[i] = n + k; ++k; } } ilu_countnz(min_mn, &nnzL, &nnzU, &Glu); fixupL(min_mn, perm_r, &Glu); cLUWorkFree(iwork, cwork, &Glu); /* Free work space and compress storage */ if ( fact == SamePattern_SameRowPerm ) { /* L and U structures may have changed due to possibly different pivoting, even though the storage is available. There could also be memory expansions, so the array locations may have changed, */ ((SCformat *)L->Store)->nnz = nnzL; ((SCformat *)L->Store)->nsuper = Glu.supno[n]; ((SCformat *)L->Store)->nzval = Glu.lusup; ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; ((SCformat *)L->Store)->rowind = Glu.lsub; ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; ((NCformat *)U->Store)->nnz = nnzU; ((NCformat *)U->Store)->nzval = Glu.ucol; ((NCformat *)U->Store)->rowind = Glu.usub; ((NCformat *)U->Store)->colptr = Glu.xusub; } else { cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, Glu.xsup, SLU_SC, SLU_C, SLU_TRLU); cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, Glu.usub, Glu.xusub, SLU_NC, SLU_C, SLU_TRU); } ops[FACT] += ops[TRSV] + ops[GEMV]; stat->expansions = --(Glu.num_expansions); if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); SUPERLU_FREE (iperm_c); SUPERLU_FREE (relax_end); SUPERLU_FREE (swap); SUPERLU_FREE (iswap); SUPERLU_FREE (relax_fsupc); SUPERLU_FREE (amax); if ( swork2 ) SUPERLU_FREE (swork2); }
int csnode_dfs ( const int jcol, /* in - start of the supernode */ const int kcol, /* in - end of the supernode */ const int *asub, /* in */ const int *xa_begin, /* in */ const int *xa_end, /* in */ int *xprune, /* out */ int *marker, /* modified */ GlobalLU_t *Glu /* modified */ ) { /* Purpose * ======= * csnode_dfs() - Determine the union of the row structures of those * columns within the relaxed snode. * Note: The relaxed snodes are leaves of the supernodal etree, therefore, * the portion outside the rectangular supernode must be zero. * * Return value * ============ * 0 success; * >0 number of bytes allocated when run out of memory. * */ register int i, k, ifrom, ito, nextl, new_next; int nsuper, krow, kmark, mem_error; int *xsup, *supno; int *lsub, *xlsub; int nzlmax; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; nzlmax = Glu->nzlmax; nsuper = ++supno[jcol]; /* Next available supernode number */ nextl = xlsub[jcol]; for (i = jcol; i <= kcol; i++) { /* For each nonzero in A[*,i] */ for (k = xa_begin[i]; k < xa_end[i]; k++) { krow = asub[k]; kmark = marker[krow]; if ( kmark != kcol ) { /* First time visit krow */ marker[krow] = kcol; lsub[nextl++] = krow; if ( nextl >= nzlmax ) { if ( mem_error = cLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) ) return (mem_error); lsub = Glu->lsub; } } } supno[i] = nsuper; } /* Supernode > 1, then make a copy of the subscripts for pruning */ if ( jcol < kcol ) { new_next = nextl + (nextl - xlsub[jcol]); while ( new_next > nzlmax ) { if ( mem_error = cLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) ) return (mem_error); lsub = Glu->lsub; } ito = nextl; for (ifrom = xlsub[jcol]; ifrom < nextl; ) lsub[ito++] = lsub[ifrom++]; for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl; nextl = ito; } xsup[nsuper+1] = kcol + 1; supno[kcol+1] = nsuper; xprune[kcol] = nextl; xlsub[kcol+1] = nextl; return 0; }
void cgstrf (superlu_options_t *options, SuperMatrix *A, int relax, int panel_size, int *etree, void *work, int lwork, int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info) { /* Local working arrays */ NCPformat *Astore; int *iperm_r = NULL; /* inverse of perm_r; used when options->Fact == SamePattern_SameRowPerm */ int *iperm_c; /* inverse of perm_c */ int *iwork; complex *cwork; int *segrep, *repfnz, *parent, *xplore; int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ int *xprune; int *marker; complex *dense, *tempv; int *relax_end; complex *a; int *asub; int *xa_begin, *xa_end; int *xsup, *supno; int *xlsub, *xlusup, *xusub; int nzlumax; float fill_ratio = sp_ienv(6); /* estimated fill ratio */ static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ /* Local scalars */ fact_t fact = options->Fact; double diag_pivot_thresh = options->DiagPivotThresh; int pivrow; /* pivotal row number in the original matrix A */ int nseg1; /* no of segments in U-column above panel row jcol */ int nseg; /* no of segments in each U-column */ register int jcol; register int kcol; /* end column of a relaxed snode */ register int icol; register int i, k, jj, new_next, iinfo; int m, n, min_mn, jsupno, fsupc, nextlu, nextu; int w_def; /* upper bound on panel width */ int usepr, iperm_r_allocated = 0; int nnzL, nnzU; int *panel_histo = stat->panel_histo; flops_t *ops = stat->ops; iinfo = 0; m = A->nrow; n = A->ncol; min_mn = SUPERLU_MIN(m, n); Astore = A->Store; a = Astore->nzval; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; /* Allocate storage common to the factor routines */ *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size, fill_ratio, L, U, &Glu, &iwork, &cwork); if ( *info ) return; xsup = Glu.xsup; supno = Glu.supno; xlsub = Glu.xlsub; xlusup = Glu.xlusup; xusub = Glu.xusub; SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, &repfnz, &panel_lsub, &xprune, &marker); cSetRWork(m, panel_size, cwork, &dense, &tempv); usepr = (fact == SamePattern_SameRowPerm); if ( usepr ) { /* Compute the inverse of perm_r */ iperm_r = (int *) intMalloc(m); for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; iperm_r_allocated = 1; } iperm_c = (int *) intMalloc(n); for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; /* Identify relaxed snodes */ relax_end = (int *) intMalloc(n); if ( options->SymmetricMode == YES ) { heap_relax_snode(n, etree, relax, marker, relax_end); } else { relax_snode(n, etree, relax, marker, relax_end); } ifill (perm_r, m, EMPTY); ifill (marker, m * NO_MARKER, EMPTY); supno[0] = -1; xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; w_def = panel_size; /* * Work on one "panel" at a time. A panel is one of the following: * (a) a relaxed supernode at the bottom of the etree, or * (b) panel_size contiguous columns, defined by the user */ for (jcol = 0; jcol < min_mn; ) { if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ kcol = relax_end[jcol]; /* end of the relaxed snode */ panel_histo[kcol-jcol+1]++; /* -------------------------------------- * Factorize the relaxed supernode(jcol:kcol) * -------------------------------------- */ /* Determine the union of the row structure of the snode */ if ( (*info = csnode_dfs(jcol, kcol, asub, xa_begin, xa_end, xprune, marker, &Glu)) != 0 ) return; nextu = xusub[jcol]; nextlu = xlusup[jcol]; jsupno = supno[jcol]; fsupc = xsup[jsupno]; new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); nzlumax = Glu.nzlumax; while ( new_next > nzlumax ) { if ( (*info = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) ) return; } for (icol = jcol; icol<= kcol; icol++) { xusub[icol+1] = nextu; /* Scatter into SPA dense[*] */ for (k = xa_begin[icol]; k < xa_end[icol]; k++) dense[asub[k]] = a[k]; /* Numeric update within the snode */ csnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat); if ( (*info = cpivotL(icol, diag_pivot_thresh, &usepr, perm_r, iperm_r, iperm_c, &pivrow, &Glu, stat)) ) if ( iinfo == 0 ) iinfo = *info; #ifdef DEBUG cprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu); #endif } jcol = icol; } else { /* Work on one panel of panel_size columns */ /* Adjust panel_size so that a panel won't overlap with the next * relaxed snode. */ panel_size = w_def; for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) if ( relax_end[k] != EMPTY ) { panel_size = k - jcol; break; } if ( k == min_mn ) panel_size = min_mn - jcol; panel_histo[panel_size]++; /* symbolic factor on a panel of columns */ cpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, &Glu); /* numeric sup-panel updates in topological order */ cpanel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, &Glu, stat); /* Sparse LU within the panel, and below panel diagonal */ for ( jj = jcol; jj < jcol + panel_size; jj++) { k = (jj - jcol) * m; /* column index for w-wide arrays */ nseg = nseg1; /* Begin after all the panel segments */ if ((*info = ccolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k], segrep, &repfnz[k], xprune, marker, parent, xplore, &Glu)) != 0) return; /* Numeric updates */ if ((*info = ccolumn_bmod(jj, (nseg - nseg1), &dense[k], tempv, &segrep[nseg1], &repfnz[k], jcol, &Glu, stat)) != 0) return; /* Copy the U-segments to ucol[*] */ if ((*info = ccopy_to_ucol(jj, nseg, segrep, &repfnz[k], perm_r, &dense[k], &Glu)) != 0) return; if ( (*info = cpivotL(jj, diag_pivot_thresh, &usepr, perm_r, iperm_r, iperm_c, &pivrow, &Glu, stat)) ) if ( iinfo == 0 ) iinfo = *info; /* Prune columns (0:jj-1) using column jj */ cpruneL(jj, perm_r, pivrow, nseg, segrep, &repfnz[k], xprune, &Glu); /* Reset repfnz[] for this column */ resetrep_col (nseg, segrep, &repfnz[k]); #ifdef DEBUG cprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu); #endif } jcol += panel_size; /* Move to the next panel */ } /* else */ } /* for */ *info = iinfo; if ( m > n ) { k = 0; for (i = 0; i < m; ++i) if ( perm_r[i] == EMPTY ) { perm_r[i] = n + k; ++k; } } countnz(min_mn, xprune, &nnzL, &nnzU, &Glu); fixupL(min_mn, perm_r, &Glu); cLUWorkFree(iwork, cwork, &Glu); /* Free work space and compress storage */ if ( fact == SamePattern_SameRowPerm ) { /* L and U structures may have changed due to possibly different pivoting, even though the storage is available. There could also be memory expansions, so the array locations may have changed, */ ((SCformat *)L->Store)->nnz = nnzL; ((SCformat *)L->Store)->nsuper = Glu.supno[n]; ((SCformat *)L->Store)->nzval = Glu.lusup; ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; ((SCformat *)L->Store)->rowind = Glu.lsub; ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; ((NCformat *)U->Store)->nnz = nnzU; ((NCformat *)U->Store)->nzval = Glu.ucol; ((NCformat *)U->Store)->rowind = Glu.usub; ((NCformat *)U->Store)->colptr = Glu.xusub; } else { cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, Glu.xsup, SLU_SC, SLU_C, SLU_TRLU); cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, Glu.usub, Glu.xusub, SLU_NC, SLU_C, SLU_TRU); } ops[FACT] += ops[TRSV] + ops[GEMV]; stat->expansions = --(Glu.num_expansions); if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); SUPERLU_FREE (iperm_c); SUPERLU_FREE (relax_end); }
/*! \brief * * <pre> * Purpose * ======= * CCOLUMN_DFS performs a symbolic factorization on column jcol, and * decide the supernode boundary. * * This routine does not use numeric values, but only use the RHS * row indices to start the dfs. * * A supernode representative is the last column of a supernode. * The nonzeros in U[*,j] are segments that end at supernodal * representatives. The routine returns a list of such supernodal * representatives in topological order of the dfs that generates them. * The location of the first nonzero in each such supernodal segment * (supernodal entry location) is also returned. * * Local parameters * ================ * nseg: no of segments in current U[*,j] * jsuper: jsuper=EMPTY if column j does not belong to the same * supernode as j-1. Otherwise, jsuper=nsuper. * * marker2: A-row --> A-row/col (0/1) * repfnz: SuperA-col --> PA-row * parent: SuperA-col --> SuperA-col * xplore: SuperA-col --> index to L-structure * * Return value * ============ * 0 success; * > 0 number of bytes allocated when run out of space. * </pre> */ int ccolumn_dfs( const int m, /* in - number of rows in the matrix */ const int jcol, /* in */ int *perm_r, /* in */ int *nseg, /* modified - with new segments appended */ int *lsub_col, /* in - defines the RHS vector to start the dfs */ int *segrep, /* modified - with new segments appended */ int *repfnz, /* modified */ int *xprune, /* modified */ int *marker, /* modified */ int *parent, /* working array */ int *xplore, /* working array */ GlobalLU_t *Glu /* modified */ ) { int jcolp1, jcolm1, jsuper, nsuper, nextl; int k, krep, krow, kmark, kperm; int *marker2; /* Used for small panel LU */ int fsupc; /* First column of a snode */ int myfnz; /* First nonz column of a U-segment */ int chperm, chmark, chrep, kchild; int xdfs, maxdfs, kpar, oldrep; int jptr, jm1ptr; int ito, ifrom, istop; /* Used to compress row subscripts */ int mem_error; int *xsup, *supno, *lsub, *xlsub; int nzlmax; int maxsuper; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; nzlmax = Glu->nzlmax; maxsuper = sp_ienv(3); jcolp1 = jcol + 1; jcolm1 = jcol - 1; nsuper = supno[jcol]; jsuper = nsuper; nextl = xlsub[jcol]; marker2 = &marker[2*m]; /* For each nonzero in A[*,jcol] do dfs */ for (k = 0; lsub_col[k] != EMPTY; k++) { krow = lsub_col[k]; lsub_col[k] = EMPTY; kmark = marker2[krow]; /* krow was visited before, go to the next nonz */ if ( kmark == jcol ) continue; /* For each unmarked nbr krow of jcol * krow is in L: place it in structure of L[*,jcol] */ marker2[krow] = jcol; kperm = perm_r[krow]; if ( kperm == EMPTY ) { lsub[nextl++] = krow; /* krow is indexed into A */ if ( nextl >= nzlmax ) { if ( mem_error = cLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) ) return (mem_error); lsub = Glu->lsub; } if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing */ } else { /* krow is in U: if its supernode-rep krep * has been explored, update repfnz[*] */ krep = xsup[supno[kperm]+1] - 1; myfnz = repfnz[krep]; if ( myfnz != EMPTY ) { /* Visited before */ if ( myfnz > kperm ) repfnz[krep] = kperm; /* continue; */ } else { /* Otherwise, perform dfs starting at krep */ oldrep = EMPTY; parent[krep] = oldrep; repfnz[krep] = kperm; xdfs = xlsub[krep]; maxdfs = xprune[krep]; do { /* * For each unmarked kchild of krep */ while ( xdfs < maxdfs ) { kchild = lsub[xdfs]; xdfs++; chmark = marker2[kchild]; if ( chmark != jcol ) { /* Not reached yet */ marker2[kchild] = jcol; chperm = perm_r[kchild]; /* Case kchild is in L: place it in L[*,k] */ if ( chperm == EMPTY ) { lsub[nextl++] = kchild; if ( nextl >= nzlmax ) { if ( mem_error = cLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu) ) return (mem_error); lsub = Glu->lsub; } if ( chmark != jcolm1 ) jsuper = EMPTY; } else { /* Case kchild is in U: * chrep = its supernode-rep. If its rep has * been explored, update its repfnz[*] */ chrep = xsup[supno[chperm]+1] - 1; myfnz = repfnz[chrep]; if ( myfnz != EMPTY ) { /* Visited before */ if ( myfnz > chperm ) repfnz[chrep] = chperm; } else { /* Continue dfs at super-rep of kchild */ xplore[krep] = xdfs; oldrep = krep; krep = chrep; /* Go deeper down G(L^t) */ parent[krep] = oldrep; repfnz[krep] = chperm; xdfs = xlsub[krep]; maxdfs = xprune[krep]; } /* else */ } /* else */ } /* if */ } /* while */ /* krow has no more unexplored nbrs; * place supernode-rep krep in postorder DFS. * backtrack dfs to its parent */ segrep[*nseg] = krep; ++(*nseg); kpar = parent[krep]; /* Pop from stack, mimic recursion */ if ( kpar == EMPTY ) break; /* dfs done */ krep = kpar; xdfs = xplore[krep]; maxdfs = xprune[krep]; } while ( kpar != EMPTY ); /* Until empty stack */ } /* else */ } /* else */ } /* for each nonzero ... */ /* Check to see if j belongs in the same supernode as j-1 */ if ( jcol == 0 ) { /* Do nothing for column 0 */ nsuper = supno[0] = 0; } else { fsupc = xsup[nsuper]; jptr = xlsub[jcol]; /* Not compressed yet */ jm1ptr = xlsub[jcolm1]; #ifdef T2_SUPER if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY; #endif /* Make sure the number of columns in a supernode doesn't exceed threshold. */ if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY; /* If jcol starts a new supernode, reclaim storage space in * lsub from the previous supernode. Note we only store * the subscript set of the first and last columns of * a supernode. (first for num values, last for pruning) */ if ( jsuper == EMPTY ) { /* starts a new supernode */ if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */ #ifdef CHK_COMPRESS printf(" Compress lsub[] at super %d-%d\n", fsupc, jcolm1); #endif ito = xlsub[fsupc+1]; xlsub[jcolm1] = ito; istop = ito + jptr - jm1ptr; xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */ xlsub[jcol] = istop; for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito) lsub[ito] = lsub[ifrom]; nextl = ito; /* = istop + length(jcol) */ } nsuper++; supno[jcol] = nsuper; } /* if a new supernode */ } /* else: jcol > 0 */ /* Tidy up the pointers before exit */ xsup[nsuper+1] = jcolp1; supno[jcolp1] = nsuper; xprune[jcol] = nextl; /* Initialize upper bound for pruning */ xlsub[jcolp1] = nextl; return 0; }
int ccopy_to_ucol( int jcol, /* in */ int nseg, /* in */ int *segrep, /* in */ int *repfnz, /* in */ int *perm_r, /* in */ complex *dense, /* modified - reset to zero on return */ GlobalLU_t *Glu /* modified */ ) { /* * Gather from SPA dense[*] to global ucol[*]. */ int ksub, krep, ksupno; int i, k, kfnz, segsze; int fsupc, isub, irow; int jsupno, nextu; int new_next, mem_error; int *xsup, *supno; int *lsub, *xlsub; complex *ucol; int *usub, *xusub; int nzumax; complex zero = {0.0, 0.0}; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; ucol = Glu->ucol; usub = Glu->usub; xusub = Glu->xusub; nzumax = Glu->nzumax; jsupno = supno[jcol]; nextu = xusub[jcol]; k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep[k--]; ksupno = supno[krep]; if ( ksupno != jsupno ) { /* Should go into ucol[] */ kfnz = repfnz[krep]; if ( kfnz != EMPTY ) { /* Nonzero U-segment */ fsupc = xsup[ksupno]; isub = xlsub[fsupc] + kfnz - fsupc; segsze = krep - kfnz + 1; new_next = nextu + segsze; while ( new_next > nzumax ) { if (mem_error = cLUMemXpand(jcol, nextu, UCOL, &nzumax, Glu)) return (mem_error); ucol = Glu->ucol; if (mem_error = cLUMemXpand(jcol, nextu, USUB, &nzumax, Glu)) return (mem_error); usub = Glu->usub; lsub = Glu->lsub; } for (i = 0; i < segsze; i++) { irow = lsub[isub]; usub[nextu] = perm_r[irow]; ucol[nextu] = dense[irow]; dense[irow] = zero; nextu++; isub++; } } } } /* for each segment... */ xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ return 0; }
int ilu_ccopy_to_ucol( int jcol, /* in */ int nseg, /* in */ int *segrep, /* in */ int *repfnz, /* in */ int *perm_r, /* in */ complex *dense, /* modified - reset to zero on return */ int drop_rule,/* in */ milu_t milu, /* in */ double drop_tol, /* in */ int quota, /* maximum nonzero entries allowed */ complex *sum, /* out - the sum of dropped entries */ int *nnzUj, /* in - out */ GlobalLU_t *Glu, /* modified */ float *work /* working space with minimum size n, * used by the second dropping rule */ ) { /* * Gather from SPA dense[*] to global ucol[*]. */ int ksub, krep, ksupno; int i, k, kfnz, segsze; int fsupc, isub, irow; int jsupno, nextu; int new_next, mem_error; int *xsup, *supno; int *lsub, *xlsub; complex *ucol; int *usub, *xusub; int nzumax; int m; /* number of entries in the nonzero U-segments */ register float d_max = 0.0, d_min = 1.0 / dlamch_("Safe minimum"); register double tmp; complex zero = {0.0, 0.0}; int i_1 = 1; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; ucol = Glu->ucol; usub = Glu->usub; xusub = Glu->xusub; nzumax = Glu->nzumax; *sum = zero; if (drop_rule == NODROP) { drop_tol = -1.0, quota = Glu->n; } jsupno = supno[jcol]; nextu = xusub[jcol]; k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep[k--]; ksupno = supno[krep]; if ( ksupno != jsupno ) { /* Should go into ucol[] */ kfnz = repfnz[krep]; if ( kfnz != EMPTY ) { /* Nonzero U-segment */ fsupc = xsup[ksupno]; isub = xlsub[fsupc] + kfnz - fsupc; segsze = krep - kfnz + 1; new_next = nextu + segsze; while ( new_next > nzumax ) { if ((mem_error = cLUMemXpand(jcol, nextu, UCOL, &nzumax, Glu)) != 0) return (mem_error); ucol = Glu->ucol; if ((mem_error = cLUMemXpand(jcol, nextu, USUB, &nzumax, Glu)) != 0) return (mem_error); usub = Glu->usub; lsub = Glu->lsub; } for (i = 0; i < segsze; i++) { irow = lsub[isub++]; tmp = c_abs1(&dense[irow]); /* first dropping rule */ if (quota > 0 && tmp >= drop_tol) { if (tmp > d_max) d_max = tmp; if (tmp < d_min) d_min = tmp; usub[nextu] = perm_r[irow]; ucol[nextu] = dense[irow]; nextu++; } else { switch (milu) { case SMILU_1: case SMILU_2: c_add(sum, sum, &dense[irow]); break; case SMILU_3: /* *sum += fabs(dense[irow]);*/ sum->r += tmp; break; case SILU: default: break; } #ifdef DEBUG num_drop_U++; #endif } dense[irow] = zero; } } } } /* for each segment... */ xusub[jcol + 1] = nextu; /* Close U[*,jcol] */ m = xusub[jcol + 1] - xusub[jcol]; /* second dropping rule */ if (drop_rule & DROP_SECONDARY && m > quota) { register double tol = d_max; register int m0 = xusub[jcol] + m - 1; if (quota > 0) { if (drop_rule & DROP_INTERP) { d_max = 1.0 / d_max; d_min = 1.0 / d_min; tol = 1.0 / (d_max + (d_min - d_max) * quota / m); } else { i_1 = xusub[jcol]; for (i = 0; i < m; ++i, ++i_1) work[i] = c_abs1(&ucol[i_1]); tol = sqselect(m, work, quota); #if 0 A = &ucol[xusub[jcol]]; for (i = 0; i < m; i++) work[i] = i; qsort(work, m, sizeof(int), _compare_); tol = fabs(usub[xusub[jcol] + work[quota]]); #endif } } for (i = xusub[jcol]; i <= m0; ) { if (c_abs1(&ucol[i]) <= tol) { switch (milu) { case SMILU_1: case SMILU_2: c_add(sum, sum, &ucol[i]); break; case SMILU_3: sum->r += tmp; break; case SILU: default: break; } ucol[i] = ucol[m0]; usub[i] = usub[m0]; m0--; m--; #ifdef DEBUG num_drop_U++; #endif xusub[jcol + 1]--; continue; } i++; } } if (milu == SMILU_2) { sum->r = c_abs1(sum); sum->i = 0.0; } if (milu == SMILU_3) sum->i = 0.0; *nnzUj += m; return 0; }
/* Return value: 0 - successful return * > 0 - number of bytes allocated when run out of space */ int ccolumn_bmod ( const int jcol, /* in */ const int nseg, /* in */ complex *dense, /* in */ complex *tempv, /* working array */ int *segrep, /* in */ int *repfnz, /* in */ int fpanelc, /* in -- first column in the current panel */ GlobalLU_t *Glu, /* modified */ SuperLUStat_t *stat /* output */ ) { /* * Purpose: * ======== * Performs numeric block updates (sup-col) in topological order. * It features: col-col, 2cols-col, 3cols-col, and sup-col updates. * Special processing on the supernodal portion of L\U[*,j] * */ #ifdef _CRAY _fcd ftcs1 = _cptofcd("L", strlen("L")), ftcs2 = _cptofcd("N", strlen("N")), ftcs3 = _cptofcd("U", strlen("U")); #endif int incx = 1, incy = 1; complex alpha, beta; /* krep = representative of current k-th supernode * fsupc = first supernodal column * nsupc = no of columns in supernode * nsupr = no of rows in supernode (used as leading dimension) * luptr = location of supernodal LU-block in storage * kfnz = first nonz in the k-th supernodal segment * no_zeros = no of leading zeros in a supernodal U-segment */ complex ukj, ukj1, ukj2; int luptr, luptr1, luptr2; int fsupc, nsupc, nsupr, segsze; int nrow; /* No of rows in the matrix of matrix-vector */ int jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno; register int lptr, kfnz, isub, irow, i; register int no_zeros, new_next; int ufirst, nextlu; int fst_col; /* First column within small LU update */ int d_fsupc; /* Distance between the first column of the current panel and the first column of the current snode. */ int *xsup, *supno; int *lsub, *xlsub; complex *lusup; int *xlusup; int nzlumax; complex *tempv1; complex zero = {0.0, 0.0}; complex one = {1.0, 0.0}; complex none = {-1.0, 0.0}; complex comp_temp, comp_temp1; int mem_error; flops_t *ops = stat->ops; xsup = Glu->xsup; supno = Glu->supno; lsub = Glu->lsub; xlsub = Glu->xlsub; lusup = Glu->lusup; xlusup = Glu->xlusup; nzlumax = Glu->nzlumax; jcolp1 = jcol + 1; jsupno = supno[jcol]; /* * For each nonz supernode segment of U[*,j] in topological order */ k = nseg - 1; for (ksub = 0; ksub < nseg; ksub++) { krep = segrep[k]; k--; ksupno = supno[krep]; if ( jsupno != ksupno ) { /* Outside the rectangular supernode */ fsupc = xsup[ksupno]; fst_col = SUPERLU_MAX ( fsupc, fpanelc ); /* Distance from the current supernode to the current panel; d_fsupc=0 if fsupc > fpanelc. */ d_fsupc = fst_col - fsupc; luptr = xlusup[fst_col] + d_fsupc; lptr = xlsub[fsupc] + d_fsupc; kfnz = repfnz[krep]; kfnz = SUPERLU_MAX ( kfnz, fpanelc ); segsze = krep - kfnz + 1; nsupc = krep - fst_col + 1; nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */ nrow = nsupr - d_fsupc - nsupc; krep_ind = lptr + nsupc - 1; /* * Case 1: Update U-segment of size 1 -- col-col update */ if ( segsze == 1 ) { ukj = dense[lsub[krep_ind]]; luptr += nsupr*(nsupc-1) + nsupc; for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; cc_mult(&comp_temp, &ukj, &lusup[luptr]); c_sub(&dense[irow], &dense[irow], &comp_temp); luptr++; } } else if ( segsze <= 3 ) { ukj = dense[lsub[krep_ind]]; luptr += nsupr*(nsupc-1) + nsupc-1; ukj1 = dense[lsub[krep_ind - 1]]; luptr1 = luptr - nsupr; if ( segsze == 2 ) { /* Case 2: 2cols-col update */ cc_mult(&comp_temp, &ukj1, &lusup[luptr1]); c_sub(&ukj, &ukj, &comp_temp); dense[lsub[krep_ind]] = ukj; for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; luptr++; luptr1++; cc_mult(&comp_temp, &ukj, &lusup[luptr]); cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&dense[irow], &dense[irow], &comp_temp); } } else { /* Case 3: 3cols-col update */ ukj2 = dense[lsub[krep_ind - 2]]; luptr2 = luptr1 - nsupr; cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]); c_sub(&ukj1, &ukj1, &comp_temp); cc_mult(&comp_temp, &ukj1, &lusup[luptr1]); cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&ukj, &ukj, &comp_temp); dense[lsub[krep_ind]] = ukj; dense[lsub[krep_ind-1]] = ukj1; for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) { irow = lsub[i]; luptr++; luptr1++; luptr2++; cc_mult(&comp_temp, &ukj, &lusup[luptr]); cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]); c_add(&comp_temp, &comp_temp, &comp_temp1); cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]); c_add(&comp_temp, &comp_temp, &comp_temp1); c_sub(&dense[irow], &dense[irow], &comp_temp); } } } else { /* * Case: sup-col update * Perform a triangular solve and block update, * then scatter the result of sup-col update to dense */ no_zeros = kfnz - fst_col; /* Copy U[*,j] segment from dense[*] to tempv[*] */ isub = lptr + no_zeros; for (i = 0; i < segsze; i++) { irow = lsub[isub]; tempv[i] = dense[irow]; ++isub; } /* Dense triangular solve -- start effective triangle */ luptr += nsupr * no_zeros + no_zeros; #ifdef USE_VENDOR_BLAS #ifdef _CRAY CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], &nsupr, tempv, &incx ); #else ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], &nsupr, tempv, &incx ); #endif luptr += segsze; /* Dense matrix-vector */ tempv1 = &tempv[segsze]; alpha = one; beta = zero; #ifdef _CRAY CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], &nsupr, tempv, &incx, &beta, tempv1, &incy ); #else cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], &nsupr, tempv, &incx, &beta, tempv1, &incy ); #endif #else clsolve ( nsupr, segsze, &lusup[luptr], tempv ); luptr += segsze; /* Dense matrix-vector */ tempv1 = &tempv[segsze]; cmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1); #endif /* Scatter tempv[] into SPA dense[] as a temporary storage */ isub = lptr + no_zeros; for (i = 0; i < segsze; i++) { irow = lsub[isub]; dense[irow] = tempv[i]; tempv[i] = zero; ++isub; } /* Scatter tempv1[] into SPA dense[] */ for (i = 0; i < nrow; i++) { irow = lsub[isub]; c_sub(&dense[irow], &dense[irow], &tempv1[i]); tempv1[i] = zero; ++isub; } } } /* if jsupno ... */ } /* for each segment... */ /* * Process the supernodal portion of L\U[*,j] */ nextlu = xlusup[jcol]; fsupc = xsup[jsupno]; /* Copy the SPA dense into L\U[*,j] */ new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc]; while ( new_next > nzlumax ) { if (mem_error = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu)) return (mem_error); lusup = Glu->lusup; lsub = Glu->lsub; } for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) { irow = lsub[isub]; lusup[nextlu] = dense[irow]; dense[irow] = zero; ++nextlu; } xlusup[jcolp1] = nextlu; /* Close L\U[*,jcol] */ /* For more updates within the panel (also within the current supernode), * should start from the first column of the panel, or the first column * of the supernode, whichever is bigger. There are 2 cases: * 1) fsupc < fpanelc, then fst_col := fpanelc * 2) fsupc >= fpanelc, then fst_col := fsupc */ fst_col = SUPERLU_MAX ( fsupc, fpanelc ); if ( fst_col < jcol ) { /* Distance between the current supernode and the current panel. d_fsupc=0 if fsupc >= fpanelc. */ d_fsupc = fst_col - fsupc; lptr = xlsub[fsupc] + d_fsupc; luptr = xlusup[fst_col] + d_fsupc; nsupr = xlsub[fsupc+1] - xlsub[fsupc]; /* Leading dimension */ nsupc = jcol - fst_col; /* Excluding jcol */ nrow = nsupr - d_fsupc - nsupc; /* Points to the beginning of jcol in snode L\U(jsupno) */ ufirst = xlusup[jcol] + d_fsupc; ops[TRSV] += 4 * nsupc * (nsupc - 1); ops[GEMV] += 8 * nrow * nsupc; #ifdef USE_VENDOR_BLAS #ifdef _CRAY CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, &lusup[ufirst], &incx ); #else ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, &lusup[ufirst], &incx ); #endif alpha = none; beta = one; /* y := beta*y + alpha*A*x */ #ifdef _CRAY CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); #else cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy ); #endif #else clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] ); cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], &lusup[ufirst], tempv ); /* Copy updates from tempv[*] into lusup[*] */ isub = ufirst + nsupc; for (i = 0; i < nrow; i++) { c_sub(&lusup[isub], &lusup[isub], &tempv[i]); tempv[i] = zero; ++isub; } #endif } /* if fst_col < jcol ... */ return 0; }