/*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y * * <pre> * Purpose * ======= * * sp_zgemv() performs one of the matrix-vector operations * y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, * where alpha and beta are scalars, x and y are vectors and A is a * sparse A->nrow by A->ncol matrix. * * Parameters * ========== * * TRANS - (input) char* * On entry, TRANS specifies the operation to be performed as * follows: * TRANS = 'N' or 'n' y := alpha*A*x + beta*y. * TRANS = 'T' or 't' y := alpha*A'*x + beta*y. * TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. * * ALPHA - (input) doublecomplex * On entry, ALPHA specifies the scalar alpha. * * A - (input) SuperMatrix* * Before entry, the leading m by n part of the array A must * contain the matrix of coefficients. * * X - (input) doublecomplex*, array of DIMENSION at least * ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' * and at least * ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. * Before entry, the incremented array X must contain the * vector x. * * INCX - (input) int * On entry, INCX specifies the increment for the elements of * X. INCX must not be zero. * * BETA - (input) doublecomplex * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then Y need not be set on input. * * Y - (output) doublecomplex*, array of DIMENSION at least * ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' * and at least * ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. * Before entry with BETA non-zero, the incremented array Y * must contain the vector y. On exit, Y is overwritten by the * updated vector y. * * INCY - (input) int * On entry, INCY specifies the increment for the elements of * Y. INCY must not be zero. * * ==== Sparse Level 2 Blas routine. * </pre> */ int sp_zgemv(char *trans, doublecomplex alpha, SuperMatrix *A, doublecomplex *x, int incx, doublecomplex beta, doublecomplex *y, int incy) { /* Local variables */ NCformat *Astore; doublecomplex *Aval; int info; doublecomplex temp, temp1; int lenx, leny, i, j, irow; int iy, jx, jy, kx, ky; int notran; doublecomplex comp_zero = {0.0, 0.0}; doublecomplex comp_one = {1.0, 0.0}; notran = lsame_(trans, "N"); Astore = A->Store; Aval = Astore->nzval; /* Test the input parameters */ info = 0; if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1; else if ( A->nrow < 0 || A->ncol < 0 ) info = 3; else if (incx == 0) info = 5; else if (incy == 0) info = 8; if (info != 0) { xerbla_("sp_zgemv ", &info); return 0; } /* Quick return if possible. */ if (A->nrow == 0 || A->ncol == 0 || z_eq(&alpha, &comp_zero) && z_eq(&beta, &comp_one)) return 0; /* Set LENX and LENY, the lengths of the vectors x and y, and set up the start points in X and Y. */ if (lsame_(trans, "N")) { lenx = A->ncol; leny = A->nrow; } else { lenx = A->nrow; leny = A->ncol; } if (incx > 0) kx = 0; else kx = - (lenx - 1) * incx; if (incy > 0) ky = 0; else ky = - (leny - 1) * incy; /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. */ /* First form y := beta*y. */ if ( !z_eq(&beta, &comp_one) ) { if (incy == 1) { if ( z_eq(&beta, &comp_zero) ) for (i = 0; i < leny; ++i) y[i] = comp_zero; else for (i = 0; i < leny; ++i) zz_mult(&y[i], &beta, &y[i]); } else { iy = ky; if ( z_eq(&beta, &comp_zero) ) for (i = 0; i < leny; ++i) { y[iy] = comp_zero; iy += incy; } else for (i = 0; i < leny; ++i) { zz_mult(&y[iy], &beta, &y[iy]); iy += incy; } } } if ( z_eq(&alpha, &comp_zero) ) return 0; if ( notran ) { /* Form y := alpha*A*x + y. */ jx = kx; if (incy == 1) { for (j = 0; j < A->ncol; ++j) { if ( !z_eq(&x[jx], &comp_zero) ) { zz_mult(&temp, &alpha, &x[jx]); for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; zz_mult(&temp1, &temp, &Aval[i]); z_add(&y[irow], &y[irow], &temp1); } } jx += incx; } } else { ABORT("Not implemented."); } } else { /* Form y := alpha*A'*x + y. */ jy = ky; if (incx == 1) { for (j = 0; j < A->ncol; ++j) { temp = comp_zero; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; zz_mult(&temp1, &Aval[i], &x[irow]); z_add(&temp, &temp, &temp1); } zz_mult(&temp1, &alpha, &temp); z_add(&y[jy], &y[jy], &temp1); jy += incy; } } else { ABORT("Not implemented."); } } return 0; } /* sp_zgemv */
int sp_zgemv(char *trans, doublecomplex alpha, SuperMatrix *A, doublecomplex *x, int incx, doublecomplex beta, doublecomplex *y, int incy) { /* Purpose ======= sp_zgemv() performs one of the matrix-vector operations y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, where alpha and beta are scalars, x and y are vectors and A is a sparse A->nrow by A->ncol matrix. Parameters ========== TRANS - (input) char* On entry, TRANS specifies the operation to be performed as follows: TRANS = 'N' or 'n' y := alpha*A*x + beta*y. TRANS = 'T' or 't' y := alpha*A'*x + beta*y. TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. ALPHA - (input) doublecomplex On entry, ALPHA specifies the scalar alpha. A - (input) SuperMatrix* Before entry, the leading m by n part of the array A must contain the matrix of coefficients. X - (input) doublecomplex*, array of DIMENSION at least ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' and at least ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. Before entry, the incremented array X must contain the vector x. INCX - (input) int On entry, INCX specifies the increment for the elements of X. INCX must not be zero. BETA - (input) doublecomplex On entry, BETA specifies the scalar beta. When BETA is supplied as zero then Y need not be set on input. Y - (output) doublecomplex*, array of DIMENSION at least ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' and at least ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. Before entry with BETA non-zero, the incremented array Y must contain the vector y. On exit, Y is overwritten by the updated vector y. INCY - (input) int On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. ==== Sparse Level 2 Blas routine. */ /* Local variables */ NCformat *Astore; doublecomplex *Aval; int info; doublecomplex temp, temp1; int lenx, leny, i, j, irow; int iy, jx, jy, kx, ky; int notran; doublecomplex comp_zero = {0.0, 0.0}; doublecomplex comp_one = {1.0, 0.0}; notran = lsame_(trans, "N"); Astore = A->Store; Aval = Astore->nzval; /* Test the input parameters */ info = 0; if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1; else if ( A->nrow < 0 || A->ncol < 0 ) info = 3; else if (incx == 0) info = 5; else if (incy == 0) info = 8; if (info != 0) { xerbla_("sp_zgemv ", &info); return 0; } /* Quick return if possible. */ if (A->nrow == 0 || A->ncol == 0 || z_eq(&alpha, &comp_zero) && z_eq(&beta, &comp_one)) return 0; /* Set LENX and LENY, the lengths of the vectors x and y, and set up the start points in X and Y. */ if (lsame_(trans, "N")) { lenx = A->ncol; leny = A->nrow; } else { lenx = A->nrow; leny = A->ncol; } if (incx > 0) kx = 0; else kx = - (lenx - 1) * incx; if (incy > 0) ky = 0; else ky = - (leny - 1) * incy; /* Start the operations. In this version the elements of A are accessed sequentially with one pass through A. */ /* First form y := beta*y. */ if ( !z_eq(&beta, &comp_one) ) { if (incy == 1) { if ( z_eq(&beta, &comp_zero) ) for (i = 0; i < leny; ++i) y[i] = comp_zero; else for (i = 0; i < leny; ++i) zz_mult(&y[i], &beta, &y[i]); } else { iy = ky; if ( z_eq(&beta, &comp_zero) ) for (i = 0; i < leny; ++i) { y[iy] = comp_zero; iy += incy; } else for (i = 0; i < leny; ++i) { zz_mult(&y[iy], &beta, &y[iy]); iy += incy; } } } if ( z_eq(&alpha, &comp_zero) ) return 0; if ( notran ) { /* Form y := alpha*A*x + y. */ jx = kx; if (incy == 1) { for (j = 0; j < A->ncol; ++j) { if ( !z_eq(&x[jx], &comp_zero) ) { zz_mult(&temp, &alpha, &x[jx]); for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; zz_mult(&temp1, &temp, &Aval[i]); z_add(&y[irow], &y[irow], &temp1); } } jx += incx; } } else { ABORT("Not implemented."); } } else { /* Form y := alpha*A'*x + y. */ jy = ky; if (incx == 1) { for (j = 0; j < A->ncol; ++j) { temp = comp_zero; for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) { irow = Astore->rowind[i]; zz_mult(&temp1, &Aval[i], &x[irow]); z_add(&temp, &temp, &temp1); } zz_mult(&temp1, &alpha, &temp); z_add(&y[jy], &y[jy], &temp1); jy += incy; } } else { ABORT("Not implemented."); } } return 0; } /* sp_zgemv */
/*! \brief * <pre> * Purpose * ======= * ilu_zdrop_row() - Drop some small rows from the previous * supernode (L-part only). * </pre> */ int ilu_zdrop_row( superlu_options_t *options, /* options */ int first, /* index of the first column in the supernode */ int last, /* index of the last column in the supernode */ double drop_tol, /* dropping parameter */ int quota, /* maximum nonzero entries allowed */ int *nnzLj, /* in/out number of nonzeros in L(:, 1:last) */ double *fill_tol, /* in/out - on exit, fill_tol=-num_zero_pivots, * does not change if options->ILU_MILU != SMILU1 */ GlobalLU_t *Glu, /* modified */ double dwork[], /* working space * the length of dwork[] should be no less than * the number of rows in the supernode */ double dwork2[], /* working space with the same size as dwork[], * used only by the second dropping rule */ int lastc /* if lastc == 0, there is nothing after the * working supernode [first:last]; * if lastc == 1, there is one more column after * the working supernode. */ ) { register int i, j, k, m1; register int nzlc; /* number of nonzeros in column last+1 */ register int xlusup_first, xlsub_first; int m, n; /* m x n is the size of the supernode */ int r = 0; /* number of dropped rows */ register double *temp; register doublecomplex *lusup = Glu->lusup; register int *lsub = Glu->lsub; register int *xlsub = Glu->xlsub; register int *xlusup = Glu->xlusup; register double d_max = 0.0, d_min = 1.0; int drop_rule = options->ILU_DropRule; milu_t milu = options->ILU_MILU; norm_t nrm = options->ILU_Norm; doublecomplex zero = {0.0, 0.0}; doublecomplex one = {1.0, 0.0}; doublecomplex none = {-1.0, 0.0}; int i_1 = 1; int inc_diag; /* inc_diag = m + 1 */ int nzp = 0; /* number of zero pivots */ double alpha = pow((double)(Glu->n), -1.0 / options->ILU_MILU_Dim); xlusup_first = xlusup[first]; xlsub_first = xlsub[first]; m = xlusup[first + 1] - xlusup_first; n = last - first + 1; m1 = m - 1; inc_diag = m + 1; nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0; temp = dwork - n; /* Quick return if nothing to do. */ if (m == 0 || m == n || drop_rule == NODROP) { *nnzLj += m * n; return 0; } /* basic dropping: ILU(tau) */ for (i = n; i <= m1; ) { /* the average abs value of ith row */ switch (nrm) { case ONE_NORM: temp[i] = dzasum_(&n, &lusup[xlusup_first + i], &m) / (double)n; break; case TWO_NORM: temp[i] = dznrm2_(&n, &lusup[xlusup_first + i], &m) / sqrt((double)n); break; case INF_NORM: default: k = izamax_(&n, &lusup[xlusup_first + i], &m) - 1; temp[i] = z_abs1(&lusup[xlusup_first + i + m * k]); break; } /* drop small entries due to drop_tol */ if (drop_rule & DROP_BASIC && temp[i] < drop_tol) { r++; /* drop the current row and move the last undropped row here */ if (r > 1) /* add to last row */ { /* accumulate the sum (for MILU) */ switch (milu) { case SMILU_1: case SMILU_2: zaxpy_(&n, &one, &lusup[xlusup_first + i], &m, &lusup[xlusup_first + m - 1], &m); break; case SMILU_3: for (j = 0; j < n; j++) lusup[xlusup_first + (m - 1) + j * m].r += z_abs1(&lusup[xlusup_first + i + j * m]); break; case SILU: default: break; } zcopy_(&n, &lusup[xlusup_first + m1], &m, &lusup[xlusup_first + i], &m); } /* if (r > 1) */ else /* move to last row */ { zswap_(&n, &lusup[xlusup_first + m1], &m, &lusup[xlusup_first + i], &m); if (milu == SMILU_3) for (j = 0; j < n; j++) { lusup[xlusup_first + m1 + j * m].r = z_abs1(&lusup[xlusup_first + m1 + j * m]); lusup[xlusup_first + m1 + j * m].i = 0.0; } } lsub[xlsub_first + i] = lsub[xlsub_first + m1]; m1--; continue; } /* if dropping */ else { if (temp[i] > d_max) d_max = temp[i]; if (temp[i] < d_min) d_min = temp[i]; } i++; } /* for */ /* Secondary dropping: drop more rows according to the quota. */ quota = ceil((double)quota / (double)n); if (drop_rule & DROP_SECONDARY && m - r > quota) { register double tol = d_max; /* Calculate the second dropping tolerance */ if (quota > n) { if (drop_rule & DROP_INTERP) /* by interpolation */ { d_max = 1.0 / d_max; d_min = 1.0 / d_min; tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r)); } else /* by quick select */ { int len = m1 - n + 1; dcopy_(&len, dwork, &i_1, dwork2, &i_1); tol = dqselect(len, dwork2, quota - n); #if 0 register int *itemp = iwork - n; A = temp; for (i = n; i <= m1; i++) itemp[i] = i; qsort(iwork, m1 - n + 1, sizeof(int), _compare_); tol = temp[itemp[quota]]; #endif } } for (i = n; i <= m1; ) { if (temp[i] <= tol) { register int j; r++; /* drop the current row and move the last undropped row here */ if (r > 1) /* add to last row */ { /* accumulate the sum (for MILU) */ switch (milu) { case SMILU_1: case SMILU_2: zaxpy_(&n, &one, &lusup[xlusup_first + i], &m, &lusup[xlusup_first + m - 1], &m); break; case SMILU_3: for (j = 0; j < n; j++) lusup[xlusup_first + (m - 1) + j * m].r += z_abs1(&lusup[xlusup_first + i + j * m]); break; case SILU: default: break; } zcopy_(&n, &lusup[xlusup_first + m1], &m, &lusup[xlusup_first + i], &m); } /* if (r > 1) */ else /* move to last row */ { zswap_(&n, &lusup[xlusup_first + m1], &m, &lusup[xlusup_first + i], &m); if (milu == SMILU_3) for (j = 0; j < n; j++) { lusup[xlusup_first + m1 + j * m].r = z_abs1(&lusup[xlusup_first + m1 + j * m]); lusup[xlusup_first + m1 + j * m].i = 0.0; } } lsub[xlsub_first + i] = lsub[xlsub_first + m1]; m1--; temp[i] = temp[m1]; continue; } i++; } /* for */ } /* if secondary dropping */ for (i = n; i < m; i++) temp[i] = 0.0; if (r == 0) { *nnzLj += m * n; return 0; } /* add dropped entries to the diagnal */ if (milu != SILU) { register int j; doublecomplex t; double omega; for (j = 0; j < n; j++) { t = lusup[xlusup_first + (m - 1) + j * m]; if (t.r == 0.0 && t.i == 0.0) continue; omega = SUPERLU_MIN(2.0 * (1.0 - alpha) / z_abs1(&t), 1.0); zd_mult(&t, &t, omega); switch (milu) { case SMILU_1: if ( !(z_eq(&t, &none)) ) { z_add(&t, &t, &one); zz_mult(&lusup[xlusup_first + j * inc_diag], &lusup[xlusup_first + j * inc_diag], &t); } else { zd_mult( &lusup[xlusup_first + j * inc_diag], &lusup[xlusup_first + j * inc_diag], *fill_tol); #ifdef DEBUG printf("[1] ZERO PIVOT: FILL col %d.\n", first + j); fflush(stdout); #endif nzp++; } break; case SMILU_2: zd_mult(&lusup[xlusup_first + j * inc_diag], &lusup[xlusup_first + j * inc_diag], 1.0 + z_abs1(&t)); break; case SMILU_3: z_add(&t, &t, &one); zz_mult(&lusup[xlusup_first + j * inc_diag], &lusup[xlusup_first + j * inc_diag], &t); break; case SILU: default: break; } } if (nzp > 0) *fill_tol = -nzp; } /* Remove dropped entries from the memory and fix the pointers. */ m1 = m - r; for (j = 1; j < n; j++) { register int tmp1, tmp2; tmp1 = xlusup_first + j * m1; tmp2 = xlusup_first + j * m; for (i = 0; i < m1; i++) lusup[i + tmp1] = lusup[i + tmp2]; } for (i = 0; i < nzlc; i++) lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m]; for (i = 0; i < nzlc; i++) lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i]; for (i = first + 1; i <= last + 1; i++) { xlusup[i] -= r * (i - first); xlsub[i] -= r; } if (lastc) { xlusup[last + 2] -= r * n; xlsub[last + 2] -= r; } *nnzLj += (m - r) * n; return r; }