Example #1
0
int
ilu_zcopy_to_ucol(
	      int	 jcol,	   /* in */
	      int	 nseg,	   /* in */
	      int	 *segrep,  /* in */
	      int	 *repfnz,  /* in */
	      int	 *perm_r,  /* in */
	      doublecomplex	 *dense,   /* modified - reset to zero on return */
	      int  	 drop_rule,/* in */
	      milu_t	 milu,	   /* in */
	      double	 drop_tol, /* in */
	      int	 quota,    /* maximum nonzero entries allowed */
	      doublecomplex	 *sum,	   /* out - the sum of dropped entries */
	      int	 *nnzUj,   /* in - out */
	      GlobalLU_t *Glu,	   /* modified */
	      double	 *work	   /* working space with minimum size n,
				    * used by the second dropping rule */
	      )
{
/*
 * Gather from SPA dense[*] to global ucol[*].
 */
    int       ksub, krep, ksupno;
    int       i, k, kfnz, segsze;
    int       fsupc, isub, irow;
    int       jsupno, nextu;
    int       new_next, mem_error;
    int       *xsup, *supno;
    int       *lsub, *xlsub;
    doublecomplex    *ucol;
    int       *usub, *xusub;
    int       nzumax;
    int       m; /* number of entries in the nonzero U-segments */
    register double d_max = 0.0, d_min = 1.0 / dmach("Safe minimum");
    register double tmp;
    doublecomplex zero = {0.0, 0.0};
    int i_1 = 1;

    xsup    = Glu->xsup;
    supno   = Glu->supno;
    lsub    = Glu->lsub;
    xlsub   = Glu->xlsub;
    ucol    = (doublecomplex *) Glu->ucol;
    usub    = Glu->usub;
    xusub   = Glu->xusub;
    nzumax  = Glu->nzumax;

    *sum = zero;
    if (drop_rule == NODROP) {
	drop_tol = -1.0, quota = Glu->n;
    }

    jsupno = supno[jcol];
    nextu  = xusub[jcol];
    k = nseg - 1;
    for (ksub = 0; ksub < nseg; ksub++) {
	krep = segrep[k--];
	ksupno = supno[krep];

	if ( ksupno != jsupno ) { /* Should go into ucol[] */
	    kfnz = repfnz[krep];
	    if ( kfnz != EMPTY ) {	/* Nonzero U-segment */

		fsupc = xsup[ksupno];
		isub = xlsub[fsupc] + kfnz - fsupc;
		segsze = krep - kfnz + 1;

		new_next = nextu + segsze;
		while ( new_next > nzumax ) {
		    if ((mem_error = zLUMemXpand(jcol, nextu, UCOL, &nzumax,
			    Glu)) != 0)
			return (mem_error);
		    ucol = Glu->ucol;
		    if ((mem_error = zLUMemXpand(jcol, nextu, USUB, &nzumax,
			    Glu)) != 0)
			return (mem_error);
		    usub = Glu->usub;
		    lsub = Glu->lsub;
		}

		for (i = 0; i < segsze; i++) {
		    irow = lsub[isub++];
         	    tmp = z_abs1(&dense[irow]);

		    /* first dropping rule */
		    if (quota > 0 && tmp >= drop_tol) {
			if (tmp > d_max) d_max = tmp;
			if (tmp < d_min) d_min = tmp;
			usub[nextu] = perm_r[irow];
			ucol[nextu] = dense[irow];
			nextu++;
		    } else {
			switch (milu) {
			    case SMILU_1:
			    case SMILU_2:
                                z_add(sum, sum, &dense[irow]);
				break;
			    case SMILU_3:
				/* *sum += fabs(dense[irow]);*/
				sum->r += tmp;
				break;
			    case SILU:
			    default:
				break;
			}
#ifdef DEBUG
			num_drop_U++;
#endif
		    }
		    dense[irow] = zero;
		}

	    }

	}

    } /* for each segment... */

    xusub[jcol + 1] = nextu;	  /* Close U[*,jcol] */
    m = xusub[jcol + 1] - xusub[jcol];

    /* second dropping rule */
    if (drop_rule & DROP_SECONDARY && m > quota) {
	register double tol = d_max;
	register int m0 = xusub[jcol] + m - 1;

	if (quota > 0) {
	    if (drop_rule & DROP_INTERP) {
		d_max = 1.0 / d_max; d_min = 1.0 / d_min;
		tol = 1.0 / (d_max + (d_min - d_max) * quota / m);
	    } else {
                i_1 = xusub[jcol];
                for (i = 0; i < m; ++i, ++i_1) work[i] = z_abs1(&ucol[i_1]);
		tol = dqselect(m, work, quota);
#if 0
		A = &ucol[xusub[jcol]];
		for (i = 0; i < m; i++) work[i] = i;
		qsort(work, m, sizeof(int), _compare_);
		tol = fabs(usub[xusub[jcol] + work[quota]]);
#endif
	    }
	}
	for (i = xusub[jcol]; i <= m0; ) {
	    if (z_abs1(&ucol[i]) <= tol) {
		switch (milu) {
		    case SMILU_1:
		    case SMILU_2:
			z_add(sum, sum, &ucol[i]);
			break;
		    case SMILU_3:
			sum->r += tmp;
			break;
		    case SILU:
		    default:
			break;
		}
		ucol[i] = ucol[m0];
		usub[i] = usub[m0];
		m0--;
		m--;
#ifdef DEBUG
		num_drop_U++;
#endif
		xusub[jcol + 1]--;
		continue;
	    }
	    i++;
	}
    }

    if (milu == SMILU_2) {
        sum->r = z_abs1(sum); sum->i = 0.0;
    }
    if (milu == SMILU_3) sum->i = 0.0;

    *nnzUj += m;

    return 0;
}
Example #2
0
/*! \brief
 * <pre>
 * Purpose
 * =======
 *    ilu_zdrop_row() - Drop some small rows from the previous
 *    supernode (L-part only).
 * </pre>
 */
int ilu_zdrop_row(
        superlu_options_t *options, /* options */
        int    first,       /* index of the first column in the supernode */
        int    last,        /* index of the last column in the supernode */
        double drop_tol,    /* dropping parameter */
        int    quota,       /* maximum nonzero entries allowed */
        int    *nnzLj,      /* in/out number of nonzeros in L(:, 1:last) */
        double *fill_tol,   /* in/out - on exit, fill_tol=-num_zero_pivots,
                             * does not change if options->ILU_MILU != SMILU1 */
        GlobalLU_t *Glu,    /* modified */
        double dwork[],   /* working space
                             * the length of dwork[] should be no less than
                             * the number of rows in the supernode */
        double dwork2[], /* working space with the same size as dwork[],
                             * used only by the second dropping rule */
        int    lastc        /* if lastc == 0, there is nothing after the
                             * working supernode [first:last];
                             * if lastc == 1, there is one more column after
                             * the working supernode. */ )
{
    register int i, j, k, m1;
    register int nzlc; /* number of nonzeros in column last+1 */
    register int xlusup_first, xlsub_first;
    int m, n; /* m x n is the size of the supernode */
    int r = 0; /* number of dropped rows */
    register double *temp;
    register doublecomplex *lusup = Glu->lusup;
    register int *lsub = Glu->lsub;
    register int *xlsub = Glu->xlsub;
    register int *xlusup = Glu->xlusup;
    register double d_max = 0.0, d_min = 1.0;
    int    drop_rule = options->ILU_DropRule;
    milu_t milu = options->ILU_MILU;
    norm_t nrm = options->ILU_Norm;
    doublecomplex zero = {0.0, 0.0};
    doublecomplex one = {1.0, 0.0};
    doublecomplex none = {-1.0, 0.0};
    int i_1 = 1;
    int inc_diag; /* inc_diag = m + 1 */
    int nzp = 0;  /* number of zero pivots */
    double alpha = pow((double)(Glu->n), -1.0 / options->ILU_MILU_Dim);

    xlusup_first = xlusup[first];
    xlsub_first = xlsub[first];
    m = xlusup[first + 1] - xlusup_first;
    n = last - first + 1;
    m1 = m - 1;
    inc_diag = m + 1;
    nzlc = lastc ? (xlusup[last + 2] - xlusup[last + 1]) : 0;
    temp = dwork - n;

    /* Quick return if nothing to do. */
    if (m == 0 || m == n || drop_rule == NODROP)
    {
        *nnzLj += m * n;
        return 0;
    }

    /* basic dropping: ILU(tau) */
    for (i = n; i <= m1; )
    {
        /* the average abs value of ith row */
        switch (nrm)
        {
            case ONE_NORM:
                temp[i] = dzasum_(&n, &lusup[xlusup_first + i], &m) / (double)n;
                break;
            case TWO_NORM:
                temp[i] = dznrm2_(&n, &lusup[xlusup_first + i], &m)
                    / sqrt((double)n);
                break;
            case INF_NORM:
            default:
                k = izamax_(&n, &lusup[xlusup_first + i], &m) - 1;
                temp[i] = z_abs1(&lusup[xlusup_first + i + m * k]);
                break;
        }

        /* drop small entries due to drop_tol */
        if (drop_rule & DROP_BASIC && temp[i] < drop_tol)
        {
            r++;
            /* drop the current row and move the last undropped row here */
            if (r > 1) /* add to last row */
            {
                /* accumulate the sum (for MILU) */
                switch (milu)
                {
                    case SMILU_1:
                    case SMILU_2:
                        zaxpy_(&n, &one, &lusup[xlusup_first + i], &m,
                                &lusup[xlusup_first + m - 1], &m);
                        break;
                    case SMILU_3:
                        for (j = 0; j < n; j++)
                            lusup[xlusup_first + (m - 1) + j * m].r +=
                                    z_abs1(&lusup[xlusup_first + i + j * m]);
                        break;
                    case SILU:
                    default:
                        break;
                }
                zcopy_(&n, &lusup[xlusup_first + m1], &m,
                       &lusup[xlusup_first + i], &m);
            } /* if (r > 1) */
            else /* move to last row */
            {
                zswap_(&n, &lusup[xlusup_first + m1], &m,
                        &lusup[xlusup_first + i], &m);
                if (milu == SMILU_3)
                    for (j = 0; j < n; j++) {
                        lusup[xlusup_first + m1 + j * m].r =
                                z_abs1(&lusup[xlusup_first + m1 + j * m]);
                        lusup[xlusup_first + m1 + j * m].i = 0.0;
                    }
            }
            lsub[xlsub_first + i] = lsub[xlsub_first + m1];
            m1--;
            continue;
        } /* if dropping */
        else
        {
            if (temp[i] > d_max) d_max = temp[i];
            if (temp[i] < d_min) d_min = temp[i];
        }
        i++;
    } /* for */

    /* Secondary dropping: drop more rows according to the quota. */
    quota = ceil((double)quota / (double)n);
    if (drop_rule & DROP_SECONDARY && m - r > quota)
    {
        register double tol = d_max;

        /* Calculate the second dropping tolerance */
        if (quota > n)
        {
            if (drop_rule & DROP_INTERP) /* by interpolation */
            {
                d_max = 1.0 / d_max; d_min = 1.0 / d_min;
                tol = 1.0 / (d_max + (d_min - d_max) * quota / (m - n - r));
            }
            else /* by quick select */
            {
                int len = m1 - n + 1;
                dcopy_(&len, dwork, &i_1, dwork2, &i_1);
                tol = dqselect(len, dwork2, quota - n);
#if 0
                register int *itemp = iwork - n;
                A = temp;
                for (i = n; i <= m1; i++) itemp[i] = i;
                qsort(iwork, m1 - n + 1, sizeof(int), _compare_);
                tol = temp[itemp[quota]];
#endif
            }
        }

        for (i = n; i <= m1; )
        {
            if (temp[i] <= tol)
            {
                register int j;
                r++;
                /* drop the current row and move the last undropped row here */
                if (r > 1) /* add to last row */
                {
                    /* accumulate the sum (for MILU) */
                    switch (milu)
                    {
                        case SMILU_1:
                        case SMILU_2:
                            zaxpy_(&n, &one, &lusup[xlusup_first + i], &m,
                                    &lusup[xlusup_first + m - 1], &m);
                            break;
                        case SMILU_3:
                            for (j = 0; j < n; j++)
                                lusup[xlusup_first + (m - 1) + j * m].r +=
                                  z_abs1(&lusup[xlusup_first + i + j * m]);
                            break;
                        case SILU:
                        default:
                            break;
                    }
                    zcopy_(&n, &lusup[xlusup_first + m1], &m,
                            &lusup[xlusup_first + i], &m);
                } /* if (r > 1) */
                else /* move to last row */
                {
                    zswap_(&n, &lusup[xlusup_first + m1], &m,
                            &lusup[xlusup_first + i], &m);
                    if (milu == SMILU_3)
                        for (j = 0; j < n; j++) {
                            lusup[xlusup_first + m1 + j * m].r =
                                    z_abs1(&lusup[xlusup_first + m1 + j * m]);
                            lusup[xlusup_first + m1 + j * m].i = 0.0;
                        }
                }
                lsub[xlsub_first + i] = lsub[xlsub_first + m1];
                m1--;
                temp[i] = temp[m1];

                continue;
            }
            i++;

        } /* for */

    } /* if secondary dropping */

    for (i = n; i < m; i++) temp[i] = 0.0;

    if (r == 0)
    {
        *nnzLj += m * n;
        return 0;
    }

    /* add dropped entries to the diagnal */
    if (milu != SILU)
    {
        register int j;
        doublecomplex t;
        double omega;
        for (j = 0; j < n; j++)
        {
            t = lusup[xlusup_first + (m - 1) + j * m];
            if (t.r == 0.0 && t.i == 0.0) continue;
            omega = SUPERLU_MIN(2.0 * (1.0 - alpha) / z_abs1(&t), 1.0);
            zd_mult(&t, &t, omega);

            switch (milu)
            {
                case SMILU_1:
                    if ( !(z_eq(&t, &none)) ) {
                        z_add(&t, &t, &one);
                        zz_mult(&lusup[xlusup_first + j * inc_diag],
                                          &lusup[xlusup_first + j * inc_diag],
                                          &t);
                    }
                    else
                    {
                        zd_mult(
                                &lusup[xlusup_first + j * inc_diag],
                                &lusup[xlusup_first + j * inc_diag],
                                *fill_tol);
#ifdef DEBUG
                        printf("[1] ZERO PIVOT: FILL col %d.\n", first + j);
                        fflush(stdout);
#endif
                        nzp++;
                    }
                    break;
                case SMILU_2:
                    zd_mult(&lusup[xlusup_first + j * inc_diag],
                                          &lusup[xlusup_first + j * inc_diag],
                                          1.0 + z_abs1(&t));
                    break;
                case SMILU_3:
                    z_add(&t, &t, &one);
                    zz_mult(&lusup[xlusup_first + j * inc_diag],
                                      &lusup[xlusup_first + j * inc_diag],
                                      &t);
                    break;
                case SILU:
                default:
                    break;
            }
        }
        if (nzp > 0) *fill_tol = -nzp;
    }

    /* Remove dropped entries from the memory and fix the pointers. */
    m1 = m - r;
    for (j = 1; j < n; j++)
    {
        register int tmp1, tmp2;
        tmp1 = xlusup_first + j * m1;
        tmp2 = xlusup_first + j * m;
        for (i = 0; i < m1; i++)
            lusup[i + tmp1] = lusup[i + tmp2];
    }
    for (i = 0; i < nzlc; i++)
        lusup[xlusup_first + i + n * m1] = lusup[xlusup_first + i + n * m];
    for (i = 0; i < nzlc; i++)
        lsub[xlsub[last + 1] - r + i] = lsub[xlsub[last + 1] + i];
    for (i = first + 1; i <= last + 1; i++)
    {
        xlusup[i] -= r * (i - first);
        xlsub[i] -= r;
    }
    if (lastc)
    {
        xlusup[last + 2] -= r * n;
        xlsub[last + 2] -= r;
    }

    *nnzLj += (m - r) * n;
    return r;
}