Beispiel #1
0
/*! \brief Performs one of the matrix-vector operations y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y
 *
 * <pre>
 *   Purpose
 *   =======
 *
 *   sp_zgemv()  performs one of the matrix-vector operations
 *      y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
 *   where alpha and beta are scalars, x and y are vectors and A is a
 *   sparse A->nrow by A->ncol matrix.
 *
 *   Parameters
 *   ==========
 *
 *   TRANS  - (input) char*
 *            On entry, TRANS specifies the operation to be performed as
 *            follows:
 *               TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
 *               TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
 *               TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
 *
 *   ALPHA  - (input) doublecomplex
 *            On entry, ALPHA specifies the scalar alpha.
 *
 *   A      - (input) SuperMatrix*
 *            Before entry, the leading m by n part of the array A must
 *            contain the matrix of coefficients.
 *
 *   X      - (input) doublecomplex*, array of DIMENSION at least
 *            ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
 *           and at least
 *            ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
 *            Before entry, the incremented array X must contain the
 *            vector x.
 *
 *   INCX   - (input) int
 *            On entry, INCX specifies the increment for the elements of
 *            X. INCX must not be zero.
 *
 *   BETA   - (input) doublecomplex
 *            On entry, BETA specifies the scalar beta. When BETA is
 *            supplied as zero then Y need not be set on input.
 *
 *   Y      - (output) doublecomplex*,  array of DIMENSION at least
 *            ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
 *            and at least
 *            ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
 *            Before entry with BETA non-zero, the incremented array Y
 *            must contain the vector y. On exit, Y is overwritten by the
 *            updated vector y.
 *
 *   INCY   - (input) int
 *            On entry, INCY specifies the increment for the elements of
 *            Y. INCY must not be zero.
 *
 *    ==== Sparse Level 2 Blas routine.
 * </pre>
*/
int
sp_zgemv(char *trans, doublecomplex alpha, SuperMatrix *A, doublecomplex *x,
         int incx, doublecomplex beta, doublecomplex *y, int incy)
{

    /* Local variables */
    NCformat *Astore;
    doublecomplex   *Aval;
    int info;
    doublecomplex temp, temp1;
    int lenx, leny, i, j, irow;
    int iy, jx, jy, kx, ky;
    int notran;
    doublecomplex comp_zero = {0.0, 0.0};
    doublecomplex comp_one = {1.0, 0.0};

    notran = lsame_(trans, "N");
    Astore = A->Store;
    Aval = Astore->nzval;

    /* Test the input parameters */
    info = 0;
    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
    else if (incx == 0) info = 5;
    else if (incy == 0) info = 8;
    if (info != 0) {
        xerbla_("sp_zgemv ", &info);
        return 0;
    }

    /* Quick return if possible. */
    if (A->nrow == 0 || A->ncol == 0 ||
        z_eq(&alpha, &comp_zero) &&
        z_eq(&beta, &comp_one))
        return 0;


    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set
       up the start points in  X  and  Y. */
    if (lsame_(trans, "N")) {
        lenx = A->ncol;
        leny = A->nrow;
    } else {
        lenx = A->nrow;
        leny = A->ncol;
    }
    if (incx > 0) kx = 0;
    else kx =  - (lenx - 1) * incx;
    if (incy > 0) ky = 0;
    else ky =  - (leny - 1) * incy;

    /* Start the operations. In this version the elements of A are
       accessed sequentially with one pass through A. */
    /* First form  y := beta*y. */
    if ( !z_eq(&beta, &comp_one) ) {
        if (incy == 1) {
            if ( z_eq(&beta, &comp_zero) )
                for (i = 0; i < leny; ++i) y[i] = comp_zero;
            else
                for (i = 0; i < leny; ++i)
                  zz_mult(&y[i], &beta, &y[i]);
        } else {
            iy = ky;
            if ( z_eq(&beta, &comp_zero) )
                for (i = 0; i < leny; ++i) {
                    y[iy] = comp_zero;
                    iy += incy;
                }
            else
                for (i = 0; i < leny; ++i) {
                    zz_mult(&y[iy], &beta, &y[iy]);
                    iy += incy;
                }
        }
    }

    if ( z_eq(&alpha, &comp_zero) ) return 0;

    if ( notran ) {
        /* Form  y := alpha*A*x + y. */
        jx = kx;
        if (incy == 1) {
            for (j = 0; j < A->ncol; ++j) {
                if ( !z_eq(&x[jx], &comp_zero) ) {
                    zz_mult(&temp, &alpha, &x[jx]);
                    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
                        irow = Astore->rowind[i];
                        zz_mult(&temp1, &temp,  &Aval[i]);
                        z_add(&y[irow], &y[irow], &temp1);
                    }
                }
                jx += incx;
            }
        } else {
            ABORT("Not implemented.");
        }
    } else {
        /* Form  y := alpha*A'*x + y. */
        jy = ky;
        if (incx == 1) {
            for (j = 0; j < A->ncol; ++j) {
                temp = comp_zero;
                for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
                    irow = Astore->rowind[i];
                    zz_mult(&temp1, &Aval[i], &x[irow]);
                    z_add(&temp, &temp, &temp1);
                }
                zz_mult(&temp1, &alpha, &temp);
                z_add(&y[jy], &y[jy], &temp1);
                jy += incy;
            }
        } else {
            ABORT("Not implemented.");
        }
    }
    return 0;
} /* sp_zgemv */
int
sp_zgemv(char *trans, doublecomplex alpha, SuperMatrix *A, doublecomplex *x, 
	 int incx, doublecomplex beta, doublecomplex *y, int incy)
{
/*  Purpose   
    =======   

    sp_zgemv()  performs one of the matrix-vector operations   
       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
    where alpha and beta are scalars, x and y are vectors and A is a
    sparse A->nrow by A->ncol matrix.   

    Parameters   
    ==========   

    TRANS  - (input) char*
             On entry, TRANS specifies the operation to be performed as   
             follows:   
                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   

    ALPHA  - (input) doublecomplex
             On entry, ALPHA specifies the scalar alpha.   

    A      - (input) SuperMatrix*
             Before entry, the leading m by n part of the array A must   
             contain the matrix of coefficients.   

    X      - (input) doublecomplex*, array of DIMENSION at least   
             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
             Before entry, the incremented array X must contain the   
             vector x.   

    INCX   - (input) int
             On entry, INCX specifies the increment for the elements of   
             X. INCX must not be zero.   

    BETA   - (input) doublecomplex
             On entry, BETA specifies the scalar beta. When BETA is   
             supplied as zero then Y need not be set on input.   

    Y      - (output) doublecomplex*,  array of DIMENSION at least   
             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
             and at least   
             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
             Before entry with BETA non-zero, the incremented array Y   
             must contain the vector y. On exit, Y is overwritten by the 
             updated vector y.
	     
    INCY   - (input) int
             On entry, INCY specifies the increment for the elements of   
             Y. INCY must not be zero.   

    ==== Sparse Level 2 Blas routine.   
*/

    /* Local variables */
    NCformat *Astore;
    doublecomplex   *Aval;
    int info;
    doublecomplex temp, temp1;
    int lenx, leny, i, j, irow;
    int iy, jx, jy, kx, ky;
    int notran;
    doublecomplex comp_zero = {0.0, 0.0};
    doublecomplex comp_one = {1.0, 0.0};

    notran = lsame_(trans, "N");
    Astore = A->Store;
    Aval = Astore->nzval;
    
    /* Test the input parameters */
    info = 0;
    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
    else if (incx == 0) info = 5;
    else if (incy == 0)	info = 8;
    if (info != 0) {
	xerbla_("sp_zgemv ", &info);
	return 0;
    }

    /* Quick return if possible. */
    if (A->nrow == 0 || A->ncol == 0 || 
	z_eq(&alpha, &comp_zero) && 
	z_eq(&beta, &comp_one))
	return 0;


    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
       up the start points in  X  and  Y. */
    if (lsame_(trans, "N")) {
	lenx = A->ncol;
	leny = A->nrow;
    } else {
	lenx = A->nrow;
	leny = A->ncol;
    }
    if (incx > 0) kx = 0;
    else kx =  - (lenx - 1) * incx;
    if (incy > 0) ky = 0;
    else ky =  - (leny - 1) * incy;

    /* Start the operations. In this version the elements of A are   
       accessed sequentially with one pass through A. */
    /* First form  y := beta*y. */
    if ( !z_eq(&beta, &comp_one) ) {
	if (incy == 1) {
	    if ( z_eq(&beta, &comp_zero) )
		for (i = 0; i < leny; ++i) y[i] = comp_zero;
	    else
		for (i = 0; i < leny; ++i) 
		  zz_mult(&y[i], &beta, &y[i]);
	} else {
	    iy = ky;
	    if ( z_eq(&beta, &comp_zero) )
		for (i = 0; i < leny; ++i) {
		    y[iy] = comp_zero;
		    iy += incy;
		}
	    else
		for (i = 0; i < leny; ++i) {
		    zz_mult(&y[iy], &beta, &y[iy]);
		    iy += incy;
		}
	}
    }
    
    if ( z_eq(&alpha, &comp_zero) ) return 0;

    if ( notran ) {
	/* Form  y := alpha*A*x + y. */
	jx = kx;
	if (incy == 1) {
	    for (j = 0; j < A->ncol; ++j) {
		if ( !z_eq(&x[jx], &comp_zero) ) {
		    zz_mult(&temp, &alpha, &x[jx]);
		    for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
			irow = Astore->rowind[i];
			zz_mult(&temp1, &temp,  &Aval[i]);
			z_add(&y[irow], &y[irow], &temp1);
		    }
		}
		jx += incx;
	    }
	} else {
	    ABORT("Not implemented.");
	}
    } else {
	/* Form  y := alpha*A'*x + y. */
	jy = ky;
	if (incx == 1) {
	    for (j = 0; j < A->ncol; ++j) {
		temp = comp_zero;
		for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
		    irow = Astore->rowind[i];
		    zz_mult(&temp1, &Aval[i], &x[irow]);
		    z_add(&temp, &temp, &temp1);
		}
		zz_mult(&temp1, &alpha, &temp);
		z_add(&y[jy], &y[jy], &temp1);
		jy += incy;
	    }
	} else {
	    ABORT("Not implemented.");
	}
    }
    return 0;    
} /* sp_zgemv */
Beispiel #3
0
/*! \brief
 * <pre>
 * Purpose
 * =======
 *    ilu_zdrop_row() - Drop some small rows from the previous
 *    supernode (L-part only).
 * </pre>
 */
int ilu_zdrop_row(
        superlu_options_t *options, /* options */
        int    first,       /* index of the first column in the supernode */
        int    last,        /* index of the last column in the supernode */
        double drop_tol,    /* dropping parameter */
        int    quota,       /* maximum nonzero entries allowed */
        int    *nnzLj,      /* in/out number of nonzeros in L(:, 1:last) */
        double *fill_tol,   /* in/out - on exit, fill_tol=-num_zero_pivots,
                             * does not change if options->ILU_MILU != SMILU1 */
        GlobalLU_t *Glu,    /* modified */
        double dwork[],   /* working space
                             * the length of dwork[] should be no less than
                             * the number of rows in the supernode */
        double dwork2[], /* working space with the same size as dwork[],
                             * used only by the second dropping rule */
        int    lastc        /* if lastc == 0, there is nothing after the
                             * working supernode [first:last];
                             * if lastc == 1, there is one more column after
                             * the working supernode. */ )
{
    register int i, j, k, m1;
    register int nzlc; /* number of nonzeros in column last+1 */
    register int xlusup_first, xlsub_first;
    int m, n; /* m x n is the size of the supernode */
    int r = 0; /* number of dropped rows */
    register double *temp;
    register doublecomplex *lusup = Glu->lusup;
    register int *lsub = Glu->lsub;
    register int *xlsub = Glu->xlsub;
    register int *xlusup = Glu->xlusup;
    register double d_max = 0.0, d_min = 1.0;
    int    drop_rule = options->ILU_DropRule;
    milu_t milu = options->ILU_MILU;
    norm_t nrm = options->ILU_Norm;
    doublecomplex zero = {0.0, 0.0};
    doublecomplex one = {1.0, 0.0};
    doublecomplex none = {-1.0, 0.0};
    int i_1 = 1;
    int inc_diag; /* inc_diag = m + 1 */
    int nzp = 0;  /* number of zero pivots */
    double alpha = pow((double)(Glu->n), -1.0 / options->ILU_MILU_Dim);

    xlusup_first = xlusup[first];
    xlsub_first = xlsub[first];
    m = xlusup[first + 1] - xlusup_first;
    n = last - first + 1;
    m1 = m - 1;
    inc_diag = m + 1;
    nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0;
    temp = dwork - n;

    /* Quick return if nothing to do. */
    if (m == 0 || m == n || drop_rule == NODROP)
    {
        *nnzLj += m * n;
        return 0;
    }

    /* basic dropping: ILU(tau) */
    for (i = n; i <= m1; )
    {
        /* the average abs value of ith row */
        switch (nrm)
        {
            case ONE_NORM:
                temp[i] = dzasum_(&n, &lusup[xlusup_first + i], &m) / (double)n;
                break;
            case TWO_NORM:
                temp[i] = dznrm2_(&n, &lusup[xlusup_first + i], &m)
                    / sqrt((double)n);
                break;
            case INF_NORM:
            default:
                k = izamax_(&n, &lusup[xlusup_first + i], &m) - 1;
                temp[i] = z_abs1(&lusup[xlusup_first + i + m * k]);
                break;
        }

        /* drop small entries due to drop_tol */
        if (drop_rule & DROP_BASIC && temp[i] < drop_tol)
        {
            r++;
            /* drop the current row and move the last undropped row here */
            if (r > 1) /* add to last row */
            {
                /* accumulate the sum (for MILU) */
                switch (milu)
                {
                    case SMILU_1:
                    case SMILU_2:
                        zaxpy_(&n, &one, &lusup[xlusup_first + i], &m,
                                &lusup[xlusup_first + m - 1], &m);
                        break;
                    case SMILU_3:
                        for (j = 0; j < n; j++)
                            lusup[xlusup_first + (m - 1) + j * m].r +=
                                    z_abs1(&lusup[xlusup_first + i + j * m]);
                        break;
                    case SILU:
                    default:
                        break;
                }
                zcopy_(&n, &lusup[xlusup_first + m1], &m,
                       &lusup[xlusup_first + i], &m);
            } /* if (r > 1) */
            else /* move to last row */
            {
                zswap_(&n, &lusup[xlusup_first + m1], &m,
                        &lusup[xlusup_first + i], &m);
                if (milu == SMILU_3)
                    for (j = 0; j < n; j++) {
                        lusup[xlusup_first + m1 + j * m].r =
                                z_abs1(&lusup[xlusup_first + m1 + j * m]);
                        lusup[xlusup_first + m1 + j * m].i = 0.0;
                    }
            }
            lsub[xlsub_first + i] = lsub[xlsub_first + m1];
            m1--;
            continue;
        } /* if dropping */
        else
        {
            if (temp[i] > d_max) d_max = temp[i];
            if (temp[i] < d_min) d_min = temp[i];
        }
        i++;
    } /* for */

    /* Secondary dropping: drop more rows according to the quota. */
    quota = ceil((double)quota / (double)n);
    if (drop_rule & DROP_SECONDARY && m - r > quota)
    {
        register double tol = d_max;

        /* Calculate the second dropping tolerance */
        if (quota > n)
        {
            if (drop_rule & DROP_INTERP) /* by interpolation */
            {
                d_max = 1.0 / d_max; d_min = 1.0 / d_min;
                tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r));
            }
            else /* by quick select */
            {
                int len = m1 - n + 1;
                dcopy_(&len, dwork, &i_1, dwork2, &i_1);
                tol = dqselect(len, dwork2, quota - n);
#if 0
                register int *itemp = iwork - n;
                A = temp;
                for (i = n; i <= m1; i++) itemp[i] = i;
                qsort(iwork, m1 - n + 1, sizeof(int), _compare_);
                tol = temp[itemp[quota]];
#endif
            }
        }

        for (i = n; i <= m1; )
        {
            if (temp[i] <= tol)
            {
                register int j;
                r++;
                /* drop the current row and move the last undropped row here */
                if (r > 1) /* add to last row */
                {
                    /* accumulate the sum (for MILU) */
                    switch (milu)
                    {
                        case SMILU_1:
                        case SMILU_2:
                            zaxpy_(&n, &one, &lusup[xlusup_first + i], &m,
                                    &lusup[xlusup_first + m - 1], &m);
                            break;
                        case SMILU_3:
                            for (j = 0; j < n; j++)
                                lusup[xlusup_first + (m - 1) + j * m].r +=
                                  z_abs1(&lusup[xlusup_first + i + j * m]);
                            break;
                        case SILU:
                        default:
                            break;
                    }
                    zcopy_(&n, &lusup[xlusup_first + m1], &m,
                            &lusup[xlusup_first + i], &m);
                } /* if (r > 1) */
                else /* move to last row */
                {
                    zswap_(&n, &lusup[xlusup_first + m1], &m,
                            &lusup[xlusup_first + i], &m);
                    if (milu == SMILU_3)
                        for (j = 0; j < n; j++) {
                            lusup[xlusup_first + m1 + j * m].r =
                                    z_abs1(&lusup[xlusup_first + m1 + j * m]);
                            lusup[xlusup_first + m1 + j * m].i = 0.0;
                        }
                }
                lsub[xlsub_first + i] = lsub[xlsub_first + m1];
                m1--;
                temp[i] = temp[m1];

                continue;
            }
            i++;

        } /* for */

    } /* if secondary dropping */

    for (i = n; i < m; i++) temp[i] = 0.0;

    if (r == 0)
    {
        *nnzLj += m * n;
        return 0;
    }

    /* add dropped entries to the diagnal */
    if (milu != SILU)
    {
        register int j;
        doublecomplex t;
        double omega;
        for (j = 0; j < n; j++)
        {
            t = lusup[xlusup_first + (m - 1) + j * m];
            if (t.r == 0.0 && t.i == 0.0) continue;
            omega = SUPERLU_MIN(2.0 * (1.0 - alpha) / z_abs1(&t), 1.0);
            zd_mult(&t, &t, omega);

            switch (milu)
            {
                case SMILU_1:
                    if ( !(z_eq(&t, &none)) ) {
                        z_add(&t, &t, &one);
                        zz_mult(&lusup[xlusup_first + j * inc_diag],
                                          &lusup[xlusup_first + j * inc_diag],
                                          &t);
                    }
                    else
                    {
                        zd_mult(
                                &lusup[xlusup_first + j * inc_diag],
                                &lusup[xlusup_first + j * inc_diag],
                                *fill_tol);
#ifdef DEBUG
                        printf("[1] ZERO PIVOT: FILL col %d.\n", first + j);
                        fflush(stdout);
#endif
                        nzp++;
                    }
                    break;
                case SMILU_2:
                    zd_mult(&lusup[xlusup_first + j * inc_diag],
                                          &lusup[xlusup_first + j * inc_diag],
                                          1.0 + z_abs1(&t));
                    break;
                case SMILU_3:
                    z_add(&t, &t, &one);
                    zz_mult(&lusup[xlusup_first + j * inc_diag],
                                      &lusup[xlusup_first + j * inc_diag],
                                      &t);
                    break;
                case SILU:
                default:
                    break;
            }
        }
        if (nzp > 0) *fill_tol = -nzp;
    }

    /* Remove dropped entries from the memory and fix the pointers. */
    m1 = m - r;
    for (j = 1; j < n; j++)
    {
        register int tmp1, tmp2;
        tmp1 = xlusup_first + j * m1;
        tmp2 = xlusup_first + j * m;
        for (i = 0; i < m1; i++)
            lusup[i + tmp1] = lusup[i + tmp2];
    }
    for (i = 0; i < nzlc; i++)
        lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m];
    for (i = 0; i < nzlc; i++)
        lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i];
    for (i = first + 1; i <= last + 1; i++)
    {
        xlusup[i] -= r * (i - first);
        xlsub[i] -= r;
    }
    if (lastc)
    {
        xlusup[last + 2] -= r * n;
        xlsub[last + 2] -= r;
    }

    *nnzLj += (m - r) * n;
    return r;
}