コード例 #1
0
ファイル: zdiagonal.c プロジェクト: 1641731459/scipy
int zdominate(int n, NCformat *Astore)
/* make the matrix diagonally dominant */
{
    doublecomplex *nzval = (doublecomplex *)Astore->nzval;
    int *rowind = Astore->rowind;
    int *colptr = Astore->colptr;
    int nnz = colptr[n];
    int fill = 0;
    doublecomplex *nzval_new;
    int *rowind_new;
    int i, j, diag;
    double s;

    for (i = 0; i < n; i++)
    {
	diag = -1;
	for (j = colptr[i]; j < colptr[i + 1]; j++)
	    if (rowind[j] == i) diag = j;
	if (diag < 0) fill++;
    }
    if (fill)
    {
	nzval_new = doublecomplexMalloc(nnz + fill);
	rowind_new = intMalloc(nnz+ fill);
	fill = 0;
	for (i = 0; i < n; i++)
	{
	    s = 1e-6;
	    diag = -1;
	    for (j = colptr[i] - fill; j < colptr[i + 1]; j++)
	    {
		if ((rowind_new[j + fill] = rowind[j]) == i) diag = j;
                nzval_new[j + fill] = nzval[j];
		s += z_abs1(&nzval_new[j + fill]);
	    }
	    if (diag >= 0) {
		nzval_new[diag+fill].r = s * 3.0;
		nzval_new[diag+fill].i = 0.0;
	    } else {
		rowind_new[colptr[i + 1] + fill] = i;
		nzval_new[colptr[i + 1] + fill].r = s * 3.0;
		nzval_new[colptr[i + 1] + fill].i = 0.0;
		fill++;
	    }
	    colptr[i + 1] += fill;
	}
	Astore->nzval = nzval_new;
	Astore->rowind = rowind_new;
	SUPERLU_FREE(nzval);
	SUPERLU_FREE(rowind);
    }
    else
    {
	for (i = 0; i < n; i++)
	{
	    s = 1e-6;
	    diag = -1;
	    for (j = colptr[i]; j < colptr[i + 1]; j++)
	    {
		if (rowind[j] == i) diag = j;
		s += z_abs1(&nzval[j]);
	    }
	    nzval[diag].r = s * 3.0;
	    nzval[diag].i = 0.0;
	}
    }
    Astore->nnz += fill;
    return fill;
}
コード例 #2
0
ファイル: zlinsol.c プロジェクト: AmEv7Fam/opentoonz
main(int argc, char *argv[])
{
    SuperMatrix A;
    NCformat *Astore;
    doublecomplex   *a;
    int      *asub, *xa;
    int      *perm_c; /* column permutation vector */
    int      *perm_r; /* row permutations from partial pivoting */
    SuperMatrix L;      /* factor L */
    SCformat *Lstore;
    SuperMatrix U;      /* factor U */
    NCformat *Ustore;
    SuperMatrix B;
    int      nrhs, ldx, info, m, n, nnz;
    doublecomplex   *xact, *rhs;
    mem_usage_t   mem_usage;
    superlu_options_t options;
    SuperLUStat_t stat;
    
#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Enter main()");
#endif

    /* Set the default input options:
	options.Fact = DOFACT;
        options.Equil = YES;
    	options.ColPerm = COLAMD;
	options.DiagPivotThresh = 1.0;
    	options.Trans = NOTRANS;
    	options.IterRefine = NOREFINE;
    	options.SymmetricMode = NO;
    	options.PivotGrowth = NO;
    	options.ConditionNumber = NO;
    	options.PrintStat = YES;
     */
    set_default_options(&options);

    /* Read the matrix in Harwell-Boeing format. */
    zreadhb(&m, &n, &nnz, &a, &asub, &xa);

    zCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE);
    Astore = A.Store;
    printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz);
    
    nrhs   = 1;
    if ( !(rhs = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhs[].");
    zCreate_Dense_Matrix(&B, m, nrhs, rhs, m, SLU_DN, SLU_Z, SLU_GE);
    xact = doublecomplexMalloc(n * nrhs);
    ldx = n;
    zGenXtrue(n, nrhs, xact, ldx);
    zFillRHS(options.Trans, nrhs, xact, ldx, &A, &B);

    if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");
    if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[].");

    /* Initialize the statistics variables. */
    StatInit(&stat);
    
    zgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info);
    
    if ( info == 0 ) {

	/* This is how you could access the solution matrix. */
        doublecomplex *sol = (doublecomplex*) ((DNformat*) B.Store)->nzval; 

	 /* Compute the infinity norm of the error. */
	zinf_norm_error(nrhs, &B, xact);

	Lstore = (SCformat *) L.Store;
	Ustore = (NCformat *) U.Store;
    	printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
    	printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
    	printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
    	printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz);
	
	zQuerySpace(&L, &U, &mem_usage);
	printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
	       mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
	
    } else {
	printf("zgssv() error returns INFO= %d\n", info);
	if ( info <= n ) { /* factorization completes */
	    zQuerySpace(&L, &U, &mem_usage);
	    printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
		   mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
	}
    }

    if ( options.PrintStat ) StatPrint(&stat);
    StatFree(&stat);

    SUPERLU_FREE (rhs);
    SUPERLU_FREE (xact);
    SUPERLU_FREE (perm_r);
    SUPERLU_FREE (perm_c);
    Destroy_CompCol_Matrix(&A);
    Destroy_SuperMatrix_Store(&B);
    Destroy_SuperNode_Matrix(&L);
    Destroy_CompCol_Matrix(&U);

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Exit main()");
#endif
}
コード例 #3
0
ファイル: zgstrs.c プロジェクト: BijanZarif/oomph-lib
void
zgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
        int *perm_c, int *perm_r, SuperMatrix *B,
        SuperLUStat_t *stat, int *info)
{
/*
 * Purpose
 * =======
 *
 * ZGSTRS solves a system of linear equations A*X=B or A'*X=B
 * with A sparse and B dense, using the LU factorization computed by
 * ZGSTRF.
 *
 * See supermatrix.h for the definition of 'SuperMatrix' structure.
 *
 * Arguments
 * =========
 *
 * trans   (input) trans_t
 *          Specifies the form of the system of equations:
 *          = NOTRANS: A * X = B  (No transpose)
 *          = TRANS:   A'* X = B  (Transpose)
 *          = CONJ:    A**H * X = B  (Conjugate transpose)
 *
 * L       (input) SuperMatrix*
 *         The factor L from the factorization Pr*A*Pc=L*U as computed by
 *         zgstrf(). Use compressed row subscripts storage for supernodes,
 *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
 *
 * U       (input) SuperMatrix*
 *         The factor U from the factorization Pr*A*Pc=L*U as computed by
 *         zgstrf(). Use column-wise storage scheme, i.e., U has types:
 *         Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
 *
 * perm_c  (input) int*, dimension (L->ncol)
 *	   Column permutation vector, which defines the 
 *         permutation matrix Pc; perm_c[i] = j means column i of A is 
 *         in position j in A*Pc.
 *
 * perm_r  (input) int*, dimension (L->nrow)
 *         Row permutation vector, which defines the permutation matrix Pr; 
 *         perm_r[i] = j means row i of A is in position j in Pr*A.
 *
 * B       (input/output) SuperMatrix*
 *         B has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
 *         On entry, the right hand side matrix.
 *         On exit, the solution matrix if info = 0;
 *
 * stat     (output) SuperLUStat_t*
 *          Record the statistics on runtime and floating-point operation count.
 *          See util.h for the definition of 'SuperLUStat_t'.
 *
 * info    (output) int*
 * 	   = 0: successful exit
 *	   < 0: if info = -i, the i-th argument had an illegal value
 *
 */
#ifdef _CRAY
    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
#endif
    int      incx = 1, incy = 1;
#ifdef USE_VENDOR_BLAS
    doublecomplex   alpha = {1.0, 0.0}, beta = {1.0, 0.0};
    doublecomplex   *work_col;
#endif
    doublecomplex   temp_comp;
    DNformat *Bstore;
    doublecomplex   *Bmat;
    SCformat *Lstore;
    NCformat *Ustore;
    doublecomplex   *Lval, *Uval;
    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
    int      i, j, k, iptr, jcol, n, ldb, nrhs;
    doublecomplex   *work, *rhs_work, *soln;
    flops_t  solve_ops;
    void zprint_soln();

    /* Test input parameters ... */
    *info = 0;
    Bstore = B->Store;
    ldb = Bstore->lda;
    nrhs = B->ncol;
    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
    else if ( L->nrow != L->ncol || L->nrow < 0 ||
	      L->Stype != SLU_SC || L->Dtype != SLU_Z || L->Mtype != SLU_TRLU )
	*info = -2;
    else if ( U->nrow != U->ncol || U->nrow < 0 ||
	      U->Stype != SLU_NC || U->Dtype != SLU_Z || U->Mtype != SLU_TRU )
	*info = -3;
    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
	      B->Stype != SLU_DN || B->Dtype != SLU_Z || B->Mtype != SLU_GE )
	*info = -6;
    if ( *info ) {
	i = -(*info);
	xerbla_("zgstrs", &i);
	return;
    }

    n = L->nrow;
    work = doublecomplexCalloc(n * nrhs);
    if ( !work ) ABORT("Malloc fails for local work[].");
    soln = doublecomplexMalloc(n);
    if ( !soln ) ABORT("Malloc fails for local soln[].");

    Bmat = Bstore->nzval;
    Lstore = L->Store;
    Lval = Lstore->nzval;
    Ustore = U->Store;
    Uval = Ustore->nzval;
    solve_ops = 0;
    
    if ( trans == NOTRANS ) {
	/* Permute right hand sides to form Pr*B */
	for (i = 0; i < nrhs; i++) {
	    rhs_work = &Bmat[i*ldb];
	    for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
	}
	
	/* Forward solve PLy=Pb. */
	for (k = 0; k <= Lstore->nsuper; k++) {
	    fsupc = L_FST_SUPC(k);
	    istart = L_SUB_START(fsupc);
	    nsupr = L_SUB_START(fsupc+1) - istart;
	    nsupc = L_FST_SUPC(k+1) - fsupc;
	    nrow = nsupr - nsupc;

	    solve_ops += 4 * nsupc * (nsupc - 1) * nrhs;
	    solve_ops += 8 * nrow * nsupc * nrhs;
	    
	    if ( nsupc == 1 ) {
		for (j = 0; j < nrhs; j++) {
		    rhs_work = &Bmat[j*ldb];
	    	    luptr = L_NZ_START(fsupc);
		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
			irow = L_SUB(iptr);
			++luptr;
			zz_mult(&temp_comp, &rhs_work[fsupc], &Lval[luptr]);
			z_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
		    }
		}
	    } else {
	    	luptr = L_NZ_START(fsupc);
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		ftcs1 = _cptofcd("L", strlen("L"));
		ftcs2 = _cptofcd("N", strlen("N"));
		ftcs3 = _cptofcd("U", strlen("U"));
		CTRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
		
		CGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
			&beta, &work[0], &n );
#else
		ztrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
		
		zgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
			&beta, &work[0], &n );
#endif
		for (j = 0; j < nrhs; j++) {
		    rhs_work = &Bmat[j*ldb];
		    work_col = &work[j*n];
		    iptr = istart + nsupc;
		    for (i = 0; i < nrow; i++) {
			irow = L_SUB(iptr);
			z_sub(&rhs_work[irow], &rhs_work[irow], &work_col[i]);
			work_col[i].r = 0.0;
	                work_col[i].i = 0.0;
			iptr++;
		    }
		}
#else		
		for (j = 0; j < nrhs; j++) {
		    rhs_work = &Bmat[j*ldb];
		    zlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
		    zmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
			    &rhs_work[fsupc], &work[0] );

		    iptr = istart + nsupc;
		    for (i = 0; i < nrow; i++) {
			irow = L_SUB(iptr);
			z_sub(&rhs_work[irow], &rhs_work[irow], &work[i]);
			work[i].r = 0.;
	                work[i].i = 0.;
			iptr++;
		    }
		}
#endif		    
	    } /* else ... */
	} /* for L-solve */

#ifdef DEBUG
  	printf("After L-solve: y=\n");
	zprint_soln(n, nrhs, Bmat);
#endif

	/*
	 * Back solve Ux=y.
	 */
	for (k = Lstore->nsuper; k >= 0; k--) {
	    fsupc = L_FST_SUPC(k);
	    istart = L_SUB_START(fsupc);
	    nsupr = L_SUB_START(fsupc+1) - istart;
	    nsupc = L_FST_SUPC(k+1) - fsupc;
	    luptr = L_NZ_START(fsupc);

	    solve_ops += 4 * nsupc * (nsupc + 1) * nrhs;

	    if ( nsupc == 1 ) {
		rhs_work = &Bmat[0];
		for (j = 0; j < nrhs; j++) {
		    z_div(&rhs_work[fsupc], &rhs_work[fsupc], &Lval[luptr]);
		    rhs_work += ldb;
		}
	    } else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		ftcs1 = _cptofcd("L", strlen("L"));
		ftcs2 = _cptofcd("U", strlen("U"));
		ftcs3 = _cptofcd("N", strlen("N"));
		CTRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
#else
		ztrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
#endif
#else		
		for (j = 0; j < nrhs; j++)
		    zusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
#endif		
	    }

	    for (j = 0; j < nrhs; ++j) {
		rhs_work = &Bmat[j*ldb];
		for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
			irow = U_SUB(i);
			zz_mult(&temp_comp, &rhs_work[jcol], &Uval[i]);
			z_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
		    }
		}
	    }
	    
	} /* for U-solve */

#ifdef DEBUG
  	printf("After U-solve: x=\n");
	zprint_soln(n, nrhs, Bmat);
#endif

	/* Compute the final solution X := Pc*X. */
	for (i = 0; i < nrhs; i++) {
	    rhs_work = &Bmat[i*ldb];
	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
	}
	
        stat->ops[SOLVE] = solve_ops;

    } else { /* Solve A'*X=B or CONJ(A)*X=B */
	/* Permute right hand sides to form Pc'*B. */
	for (i = 0; i < nrhs; i++) {
	    rhs_work = &Bmat[i*ldb];
	    for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
	}

	stat->ops[SOLVE] = 0;
        if (trans == TRANS) {
	    for (k = 0; k < nrhs; ++k) {
	        /* Multiply by inv(U'). */
	        sp_ztrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
	    
	        /* Multiply by inv(L'). */
	        sp_ztrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
	    }
         } else { /* trans == CONJ */
            for (k = 0; k < nrhs; ++k) {                
                /* Multiply by conj(inv(U')). */
                sp_ztrsv("U", "C", "N", L, U, &Bmat[k*ldb], stat, info);
                
                /* Multiply by conj(inv(L')). */
                sp_ztrsv("L", "C", "U", L, U, &Bmat[k*ldb], stat, info);
	    }
         }
	/* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
	for (i = 0; i < nrhs; i++) {
	    rhs_work = &Bmat[i*ldb];
	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
	}

    }

    SUPERLU_FREE(work);
    SUPERLU_FREE(soln);
}
コード例 #4
0
ファイル: zlinsolx.c プロジェクト: gilso/Packages
main(int argc, char *argv[])
{
    char           equed[1];
    yes_no_t       equil;
    trans_t        trans;
    SuperMatrix    A, L, U;
    SuperMatrix    B, X;
    NCformat       *Astore;
    NCformat       *Ustore;
    SCformat       *Lstore;
    doublecomplex         *a;
    int            *asub, *xa;
    int            *perm_r; /* row permutations from partial pivoting */
    int            *perm_c; /* column permutation vector */
    int            *etree;
    void           *work;
    int            info, lwork, nrhs, ldx;
    int            i, m, n, nnz;
    doublecomplex         *rhsb, *rhsx, *xact;
    double         *R, *C;
    double         *ferr, *berr;
    double         u, rpg, rcond;
    mem_usage_t    mem_usage;
    superlu_options_t options;
    SuperLUStat_t stat;
    extern void  parse_command_line();

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Enter main()");
#endif

    /* Defaults */
    lwork = 0;
    nrhs  = 1;
    equil = YES;	
    u     = 1.0;
    trans = NOTRANS;
    
    /* Set the default input options:
	options.Fact = DOFACT;
        options.Equil = YES;
    	options.ColPerm = COLAMD;
	options.DiagPivotThresh = 1.0;
    	options.Trans = NOTRANS;
    	options.IterRefine = NOREFINE;
    	options.SymmetricMode = NO;
    	options.PivotGrowth = NO;
    	options.ConditionNumber = NO;
    	options.PrintStat = YES;
    */
    set_default_options(&options);

    /* Can use command line input to modify the defaults. */
    parse_command_line(argc, argv, &lwork, &u, &equil, &trans);
    options.Equil = equil;
    options.DiagPivotThresh = u;
    options.Trans = trans;

    /* Add more functionalities that the defaults. */
    options.PivotGrowth = YES;    /* Compute reciprocal pivot growth */
    options.ConditionNumber = YES;/* Compute reciprocal condition number */
    options.IterRefine = DOUBLE;  /* Perform double-precision refinement */
    
    if ( lwork > 0 ) {
	work = SUPERLU_MALLOC(lwork);
	if ( !work ) {
	    ABORT("ZLINSOLX: cannot allocate work[]");
	}
    }

    /* Read matrix A from a file in Harwell-Boeing format.*/
    zreadhb(&m, &n, &nnz, &a, &asub, &xa);
    
    zCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE);
    Astore = A.Store;
    printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz);
    
    if ( !(rhsb = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[].");
    if ( !(rhsx = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[].");
    zCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_Z, SLU_GE);
    zCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_Z, SLU_GE);
    xact = doublecomplexMalloc(n * nrhs);
    ldx = n;
    zGenXtrue(n, nrhs, xact, ldx);
    zFillRHS(trans, nrhs, xact, ldx, &A, &B);
    
    if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");
    if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[].");
    if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");
    if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) ) 
        ABORT("SUPERLU_MALLOC fails for R[].");
    if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) )
        ABORT("SUPERLU_MALLOC fails for C[].");
    if ( !(ferr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )
        ABORT("SUPERLU_MALLOC fails for ferr[].");
    if ( !(berr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) ) 
        ABORT("SUPERLU_MALLOC fails for berr[].");

    
    /* Initialize the statistics variables. */
    StatInit(&stat);
    
    /* Solve the system and compute the condition number
       and error bounds using dgssvx.      */
    
    zgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,
           &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,
           &mem_usage, &stat, &info);

    printf("zgssvx(): info %d\n", info);

    if ( info == 0 || info == n+1 ) {

        /* This is how you could access the solution matrix. */
        doublecomplex *sol = (doublecomplex*) ((DNformat*) X.Store)->nzval; 

	if ( options.PivotGrowth == YES )
            printf("Recip. pivot growth = %e\n", rpg);
	if ( options.ConditionNumber == YES )
	    printf("Recip. condition number = %e\n", rcond);
	if ( options.IterRefine != NOREFINE ) {
            printf("Iterative Refinement:\n");
	    printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
	    for (i = 0; i < nrhs; ++i)
	      printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]);
	}
        Lstore = (SCformat *) L.Store;
        Ustore = (NCformat *) U.Store;
	printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
    	printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
    	printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
    	printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz);

	printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
	       mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
	     
	fflush(stdout);

    } else if ( info > 0 && lwork == -1 ) {
        printf("** Estimated memory: %d bytes\n", info - n);
    }

    if ( options.PrintStat ) StatPrint(&stat);
    StatFree(&stat);

    SUPERLU_FREE (rhsb);
    SUPERLU_FREE (rhsx);
    SUPERLU_FREE (xact);
    SUPERLU_FREE (etree);
    SUPERLU_FREE (perm_r);
    SUPERLU_FREE (perm_c);
    SUPERLU_FREE (R);
    SUPERLU_FREE (C);
    SUPERLU_FREE (ferr);
    SUPERLU_FREE (berr);
    Destroy_CompCol_Matrix(&A);
    Destroy_SuperMatrix_Store(&B);
    Destroy_SuperMatrix_Store(&X);
    if ( lwork >= 0 ) {
        Destroy_SuperNode_Matrix(&L);
        Destroy_CompCol_Matrix(&U);
    }

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Exit main()");
#endif
}
コード例 #5
0
ファイル: zgsrfs.c プロジェクト: AtomAleks/PyProp
/*! \brief
 *
 * <pre>
 *   Purpose   
 *   =======   
 *
 *   ZGSRFS improves the computed solution to a system of linear   
 *   equations and provides error bounds and backward error estimates for 
 *   the solution.   
 *
 *   If equilibration was performed, the system becomes:
 *           (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
 *
 *   See supermatrix.h for the definition of 'SuperMatrix' structure.
 *
 *   Arguments   
 *   =========   
 *
 * trans   (input) trans_t
 *          Specifies the form of the system of equations:
 *          = NOTRANS: A * X = B  (No transpose)
 *          = TRANS:   A'* X = B  (Transpose)
 *          = CONJ:    A**H * X = B  (Conjugate transpose)
 *   
 *   A       (input) SuperMatrix*
 *           The original matrix A in the system, or the scaled A if
 *           equilibration was done. The type of A can be:
 *           Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_GE.
 *    
 *   L       (input) SuperMatrix*
 *	     The factor L from the factorization Pr*A*Pc=L*U. Use
 *           compressed row subscripts storage for supernodes, 
 *           i.e., L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
 * 
 *   U       (input) SuperMatrix*
 *           The factor U from the factorization Pr*A*Pc=L*U as computed by
 *           zgstrf(). Use column-wise storage scheme, 
 *           i.e., U has types: Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
 *
 *   perm_c  (input) int*, dimension (A->ncol)
 *	     Column permutation vector, which defines the 
 *           permutation matrix Pc; perm_c[i] = j means column i of A is 
 *           in position j in A*Pc.
 *
 *   perm_r  (input) int*, dimension (A->nrow)
 *           Row permutation vector, which defines the permutation matrix Pr;
 *           perm_r[i] = j means row i of A is in position j in Pr*A.
 *
 *   equed   (input) Specifies the form of equilibration that was done.
 *           = 'N': No equilibration.
 *           = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
 *           = 'C': Column equilibration, i.e., A was postmultiplied by
 *                  diag(C).
 *           = 'B': Both row and column equilibration, i.e., A was replaced 
 *                  by diag(R)*A*diag(C).
 *
 *   R       (input) double*, dimension (A->nrow)
 *           The row scale factors for A.
 *           If equed = 'R' or 'B', A is premultiplied by diag(R).
 *           If equed = 'N' or 'C', R is not accessed.
 * 
 *   C       (input) double*, dimension (A->ncol)
 *           The column scale factors for A.
 *           If equed = 'C' or 'B', A is postmultiplied by diag(C).
 *           If equed = 'N' or 'R', C is not accessed.
 *
 *   B       (input) SuperMatrix*
 *           B has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
 *           The right hand side matrix B.
 *           if equed = 'R' or 'B', B is premultiplied by diag(R).
 *
 *   X       (input/output) SuperMatrix*
 *           X has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
 *           On entry, the solution matrix X, as computed by zgstrs().
 *           On exit, the improved solution matrix X.
 *           if *equed = 'C' or 'B', X should be premultiplied by diag(C)
 *               in order to obtain the solution to the original system.
 *
 *   FERR    (output) double*, dimension (B->ncol)   
 *           The estimated forward error bound for each solution vector   
 *           X(j) (the j-th column of the solution matrix X).   
 *           If XTRUE is the true solution corresponding to X(j), FERR(j) 
 *           is an estimated upper bound for the magnitude of the largest 
 *           element in (X(j) - XTRUE) divided by the magnitude of the   
 *           largest element in X(j).  The estimate is as reliable as   
 *           the estimate for RCOND, and is almost always a slight   
 *           overestimate of the true error.
 *
 *   BERR    (output) double*, dimension (B->ncol)   
 *           The componentwise relative backward error of each solution   
 *           vector X(j) (i.e., the smallest relative change in   
 *           any element of A or B that makes X(j) an exact solution).
 *
 *   stat     (output) SuperLUStat_t*
 *            Record the statistics on runtime and floating-point operation count.
 *            See util.h for the definition of 'SuperLUStat_t'.
 *
 *   info    (output) int*   
 *           = 0:  successful exit   
 *            < 0:  if INFO = -i, the i-th argument had an illegal value   
 *
 *    Internal Parameters   
 *    ===================   
 *
 *    ITMAX is the maximum number of steps of iterative refinement.   
 *
 * </pre>
 */
void
zgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
       int *perm_c, int *perm_r, char *equed, double *R, double *C,
       SuperMatrix *B, SuperMatrix *X, double *ferr, double *berr,
       SuperLUStat_t *stat, int *info)
{


#define ITMAX 5
    
    /* Table of constant values */
    int    ione = 1;
    doublecomplex ndone = {-1., 0.};
    doublecomplex done = {1., 0.};
    
    /* Local variables */
    NCformat *Astore;
    doublecomplex   *Aval;
    SuperMatrix Bjcol;
    DNformat *Bstore, *Xstore, *Bjcol_store;
    doublecomplex   *Bmat, *Xmat, *Bptr, *Xptr;
    int      kase;
    double   safe1, safe2;
    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
    int      ldb, ldx, nrhs;
    double   s, xk, lstres, eps, safmin;
    char     transc[1];
    trans_t  transt;
    doublecomplex   *work;
    double   *rwork;
    int      *iwork;
    extern double dlamch_(char *);
    extern int zlacon_(int *, doublecomplex *, doublecomplex *, double *, int *);
#ifdef _CRAY
    extern int CCOPY(int *, doublecomplex *, int *, doublecomplex *, int *);
    extern int CSAXPY(int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, int *);
#else
    extern int zcopy_(int *, doublecomplex *, int *, doublecomplex *, int *);
    extern int zaxpy_(int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, int *);
#endif

    Astore = A->Store;
    Aval   = Astore->nzval;
    Bstore = B->Store;
    Xstore = X->Store;
    Bmat   = Bstore->nzval;
    Xmat   = Xstore->nzval;
    ldb    = Bstore->lda;
    ldx    = Xstore->lda;
    nrhs   = B->ncol;
    
    /* Test the input parameters */
    *info = 0;
    notran = (trans == NOTRANS);
    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
    else if ( A->nrow != A->ncol || A->nrow < 0 ||
	      A->Stype != SLU_NC || A->Dtype != SLU_Z || A->Mtype != SLU_GE )
	*info = -2;
    else if ( L->nrow != L->ncol || L->nrow < 0 ||
 	      L->Stype != SLU_SC || L->Dtype != SLU_Z || L->Mtype != SLU_TRLU )
	*info = -3;
    else if ( U->nrow != U->ncol || U->nrow < 0 ||
 	      U->Stype != SLU_NC || U->Dtype != SLU_Z || U->Mtype != SLU_TRU )
	*info = -4;
    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
 	      B->Stype != SLU_DN || B->Dtype != SLU_Z || B->Mtype != SLU_GE )
        *info = -10;
    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
 	      X->Stype != SLU_DN || X->Dtype != SLU_Z || X->Mtype != SLU_GE )
	*info = -11;
    if (*info != 0) {
	i = -(*info);
	xerbla_("zgsrfs", &i);
	return;
    }

    /* Quick return if possible */
    if ( A->nrow == 0 || nrhs == 0) {
	for (j = 0; j < nrhs; ++j) {
	    ferr[j] = 0.;
	    berr[j] = 0.;
	}
	return;
    }

    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
    colequ = lsame_(equed, "C") || lsame_(equed, "B");
    
    /* Allocate working space */
    work = doublecomplexMalloc(2*A->nrow);
    rwork = (double *) SUPERLU_MALLOC( A->nrow * sizeof(double) );
    iwork = intMalloc(A->nrow);
    if ( !work || !rwork || !iwork ) 
        ABORT("Malloc fails for work/rwork/iwork.");
    
    if ( notran ) {
	*(unsigned char *)transc = 'N';
        transt = TRANS;
    } else {
	*(unsigned char *)transc = 'T';
	transt = NOTRANS;
    }

    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
    nz     = A->ncol + 1;
    eps    = dlamch_("Epsilon");
    safmin = dlamch_("Safe minimum");
    /* Set SAFE1 essentially to be the underflow threshold times the
       number of additions in each row. */
    safe1  = nz * safmin;
    safe2  = safe1 / eps;

    /* Compute the number of nonzeros in each row (or column) of A */
    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
    if ( notran ) {
	for (k = 0; k < A->ncol; ++k)
	    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
		++iwork[Astore->rowind[i]];
    } else {
	for (k = 0; k < A->ncol; ++k)
	    iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
    }	

    /* Copy one column of RHS B into Bjcol. */
    Bjcol.Stype = B->Stype;
    Bjcol.Dtype = B->Dtype;
    Bjcol.Mtype = B->Mtype;
    Bjcol.nrow  = B->nrow;
    Bjcol.ncol  = 1;
    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
    Bjcol_store = Bjcol.Store;
    Bjcol_store->lda = ldb;
    Bjcol_store->nzval = work; /* address aliasing */
	
    /* Do for each right hand side ... */
    for (j = 0; j < nrhs; ++j) {
	count = 0;
	lstres = 3.;
	Bptr = &Bmat[j*ldb];
	Xptr = &Xmat[j*ldx];

	while (1) { /* Loop until stopping criterion is satisfied. */

	    /* Compute residual R = B - op(A) * X,   
	       where op(A) = A, A**T, or A**H, depending on TRANS. */
	    
#ifdef _CRAY
	    CCOPY(&A->nrow, Bptr, &ione, work, &ione);
#else
	    zcopy_(&A->nrow, Bptr, &ione, work, &ione);
#endif
	    sp_zgemv(transc, ndone, A, Xptr, ione, done, work, ione);

	    /* Compute componentwise relative backward error from formula 
	       max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
	       where abs(Z) is the componentwise absolute value of the matrix
	       or vector Z.  If the i-th component of the denominator is less
	       than SAFE2, then SAFE1 is added to the i-th component of the   
	       numerator before dividing. */

	    for (i = 0; i < A->nrow; ++i) rwork[i] = z_abs1( &Bptr[i] );
	    
	    /* Compute abs(op(A))*abs(X) + abs(B). */
	    if (notran) {
		for (k = 0; k < A->ncol; ++k) {
		    xk = z_abs1( &Xptr[k] );
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
			rwork[Astore->rowind[i]] += z_abs1(&Aval[i]) * xk;
		}
	    } else {
		for (k = 0; k < A->ncol; ++k) {
		    s = 0.;
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
			irow = Astore->rowind[i];
			s += z_abs1(&Aval[i]) * z_abs1(&Xptr[irow]);
		    }
		    rwork[k] += s;
		}
	    }
	    s = 0.;
	    for (i = 0; i < A->nrow; ++i) {
		if (rwork[i] > safe2) {
		    s = SUPERLU_MAX( s, z_abs1(&work[i]) / rwork[i] );
                } else if ( rwork[i] != 0.0 ) {
		    s = SUPERLU_MAX( s, (z_abs1(&work[i]) + safe1) / rwork[i] );
                }
                /* If rwork[i] is exactly 0.0, then we know the true 
                   residual also must be exactly 0.0. */
	    }
	    berr[j] = s;

	    /* Test stopping criterion. Continue iterating if   
	       1) The residual BERR(J) is larger than machine epsilon, and   
	       2) BERR(J) decreased by at least a factor of 2 during the   
	          last iteration, and   
	       3) At most ITMAX iterations tried. */

	    if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
		/* Update solution and try again. */
		zgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
		
#ifdef _CRAY
		CAXPY(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#else
		zaxpy_(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#endif
		lstres = berr[j];
		++count;
	    } else {
		break;
	    }
        
	} /* end while */

	stat->RefineSteps = count;

	/* Bound error from formula:
	   norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
	   ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
          where   
            norm(Z) is the magnitude of the largest component of Z   
            inv(op(A)) is the inverse of op(A)   
            abs(Z) is the componentwise absolute value of the matrix or
	       vector Z   
            NZ is the maximum number of nonzeros in any row of A, plus 1   
            EPS is machine epsilon   

          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
          is incremented by SAFE1 if the i-th component of   
          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   

          Use ZLACON to estimate the infinity-norm of the matrix   
             inv(op(A)) * diag(W),   
          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
	
	for (i = 0; i < A->nrow; ++i) rwork[i] = z_abs1( &Bptr[i] );
	
	/* Compute abs(op(A))*abs(X) + abs(B). */
	if ( notran ) {
	    for (k = 0; k < A->ncol; ++k) {
		xk = z_abs1( &Xptr[k] );
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
		    rwork[Astore->rowind[i]] += z_abs1(&Aval[i]) * xk;
	    }
	} else {
	    for (k = 0; k < A->ncol; ++k) {
		s = 0.;
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
		    irow = Astore->rowind[i];
		    xk = z_abs1( &Xptr[irow] );
		    s += z_abs1(&Aval[i]) * xk;
		}
		rwork[k] += s;
	    }
	}
	
	for (i = 0; i < A->nrow; ++i)
	    if (rwork[i] > safe2)
		rwork[i] = z_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
	    else
		rwork[i] = z_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
	kase = 0;

	do {
	    zlacon_(&A->nrow, &work[A->nrow], work,
		    &ferr[j], &kase);
	    if (kase == 0) break;

	    if (kase == 1) {
		/* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        zd_mult(&work[i], &work[i], C[i]);
	            }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->nrow; ++i) {
		        zd_mult(&work[i], &work[i], R[i]);
                    }

		zgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);
		
		for (i = 0; i < A->nrow; ++i) {
		    zd_mult(&work[i], &work[i], rwork[i]);
	 	}
	    } else {
		/* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
		for (i = 0; i < A->nrow; ++i) {
		    zd_mult(&work[i], &work[i], rwork[i]);
		}
		
		zgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
		
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        zd_mult(&work[i], &work[i], C[i]);
		    }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->ncol; ++i) {
		        zd_mult(&work[i], &work[i], R[i]);  
		    }
	    }
	    
	} while ( kase != 0 );

	/* Normalize error. */
	lstres = 0.;
 	if ( notran && colequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, C[i] * z_abs1( &Xptr[i]) );
  	} else if ( !notran && rowequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, R[i] * z_abs1( &Xptr[i]) );
	} else {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = SUPERLU_MAX( lstres, z_abs1( &Xptr[i]) );
	}
	if ( lstres != 0. )
	    ferr[j] /= lstres;

    } /* for each RHS j ... */
    
    SUPERLU_FREE(work);
    SUPERLU_FREE(rwork);
    SUPERLU_FREE(iwork);
    SUPERLU_FREE(Bjcol.Store);

    return;

} /* zgsrfs */
コード例 #6
0
ファイル: zlinsolx1.c プロジェクト: Amanotoko/fem
int main(int argc, char *argv[])
{
/*
 * Purpose
 * =======
 *
 * The driver program ZLINSOLX1.
 *
 * This example illustrates how to use ZGSSVX to solve systems with the same
 * A but different right-hand side.
 * In this case, we factorize A only once in the first call to DGSSVX,
 * and reuse the following data structures in the subsequent call to ZGSSVX:
 *     perm_c, perm_r, R, C, L, U.
 * 
 */
    char           equed[1];
    yes_no_t       equil;
    trans_t        trans;
    SuperMatrix    A, L, U;
    SuperMatrix    B, X;
    NCformat       *Astore;
    NCformat       *Ustore;
    SCformat       *Lstore;
    doublecomplex         *a;
    int            *asub, *xa;
    int            *perm_c; /* column permutation vector */
    int            *perm_r; /* row permutations from partial pivoting */
    int            *etree;
    void           *work;
    int            info, lwork, nrhs, ldx;
    int            i, m, n, nnz;
    doublecomplex         *rhsb, *rhsx, *xact;
    double         *R, *C;
    double         *ferr, *berr;
    double         u, rpg, rcond;
    mem_usage_t    mem_usage;
    superlu_options_t options;
    SuperLUStat_t stat;
    extern void    parse_command_line();

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Enter main()");
#endif

    /* Defaults */
    lwork = 0;
    nrhs  = 1;
    equil = YES;	
    u     = 1.0;
    trans = NOTRANS;

    /* Set the default values for options argument:
	options.Fact = DOFACT;
        options.Equil = YES;
    	options.ColPerm = COLAMD;
	options.DiagPivotThresh = 1.0;
    	options.Trans = NOTRANS;
    	options.IterRefine = NOREFINE;
    	options.SymmetricMode = NO;
    	options.PivotGrowth = NO;
    	options.ConditionNumber = NO;
    	options.PrintStat = YES;
    */
    set_default_options(&options);

    /* Can use command line input to modify the defaults. */
    parse_command_line(argc, argv, &lwork, &u, &equil, &trans);
    options.Equil = equil;
    options.DiagPivotThresh = u;
    options.Trans = trans;
    
    if ( lwork > 0 ) {
	work = SUPERLU_MALLOC(lwork);
	if ( !work ) {
	    ABORT("ZLINSOLX: cannot allocate work[]");
	}
    }

    /* Read matrix A from a file in Harwell-Boeing format.*/
    zreadhb(&m, &n, &nnz, &a, &asub, &xa);
    
    zCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE);
    Astore = A.Store;
    printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz);
    
    if ( !(rhsb = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[].");
    if ( !(rhsx = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[].");
    zCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_Z, SLU_GE);
    zCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_Z, SLU_GE);
    xact = doublecomplexMalloc(n * nrhs);
    ldx = n;
    zGenXtrue(n, nrhs, xact, ldx);
    zFillRHS(trans, nrhs, xact, ldx, &A, &B);
    
    if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");
    if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[].");
    if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");
    if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) ) 
        ABORT("SUPERLU_MALLOC fails for R[].");
    if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) )
        ABORT("SUPERLU_MALLOC fails for C[].");
    if ( !(ferr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )
        ABORT("SUPERLU_MALLOC fails for ferr[].");
    if ( !(berr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) ) 
        ABORT("SUPERLU_MALLOC fails for berr[].");

    /* Initialize the statistics variables. */
    StatInit(&stat);
    
    /* ONLY PERFORM THE LU DECOMPOSITION */
    B.ncol = 0;  /* Indicate not to solve the system */
    zgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,
           &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,
           &mem_usage, &stat, &info);

    printf("LU factorization: zgssvx() returns info %d\n", info);

    if ( info == 0 || info == n+1 ) {

	if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg);
	if ( options.ConditionNumber )
	    printf("Recip. condition number = %e\n", rcond);
        Lstore = (SCformat *) L.Store;
        Ustore = (NCformat *) U.Store;
	printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
    	printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
    	printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
    	printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz);

	printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
	       mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
	fflush(stdout);

    } else if ( info > 0 && lwork == -1 ) {
        printf("** Estimated memory: %d bytes\n", info - n);
    }

    if ( options.PrintStat ) StatPrint(&stat);
    StatFree(&stat);

    /* ------------------------------------------------------------
       NOW WE SOLVE THE LINEAR SYSTEM USING THE FACTORED FORM OF A.
       ------------------------------------------------------------*/
    options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
    B.ncol = nrhs;  /* Set the number of right-hand side */

    /* Initialize the statistics variables. */
    StatInit(&stat);

    zgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,
           &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,
           &mem_usage, &stat, &info);

    printf("Triangular solve: zgssvx() returns info %d\n", info);

    if ( info == 0 || info == n+1 ) {

        /* This is how you could access the solution matrix. */
        doublecomplex *sol = (doublecomplex*) ((DNformat*) X.Store)->nzval; 

	if ( options.IterRefine ) {
            printf("Iterative Refinement:\n");
	    printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
	    for (i = 0; i < nrhs; ++i)
	      printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]);
	}
	fflush(stdout);
    } else if ( info > 0 && lwork == -1 ) {
        printf("** Estimated memory: %d bytes\n", info - n);
    }

    if ( options.PrintStat ) StatPrint(&stat);
    StatFree(&stat);

    SUPERLU_FREE (rhsb);
    SUPERLU_FREE (rhsx);
    SUPERLU_FREE (xact);
    SUPERLU_FREE (etree);
    SUPERLU_FREE (perm_r);
    SUPERLU_FREE (perm_c);
    SUPERLU_FREE (R);
    SUPERLU_FREE (C);
    SUPERLU_FREE (ferr);
    SUPERLU_FREE (berr);
    Destroy_CompCol_Matrix(&A);
    Destroy_SuperMatrix_Store(&B);
    Destroy_SuperMatrix_Store(&X);
    if ( lwork == 0 ) {
        Destroy_SuperNode_Matrix(&L);
        Destroy_CompCol_Matrix(&U);
    } else if ( lwork > 0 ) {
        SUPERLU_FREE(work);
    }


#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Exit main()");
#endif
}
コード例 #7
0
main(int argc, char *argv[])
{
    SuperMatrix A;
    NCformat *Astore;
    doublecomplex   *a;
    int      *asub, *xa;
    int      *perm_r; /* row permutations from partial pivoting */
    int      *perm_c; /* column permutation vector */
    SuperMatrix L;      /* factor L */
    SCformat *Lstore;
    SuperMatrix U;      /* factor U */
    NCformat *Ustore;
    SuperMatrix B;
    int      nrhs, ldx, info, panel_size, m, n, nnz, permc_spec;
    char     trans[1];
    doublecomplex   *xact, *rhs;
    mem_usage_t   mem_usage;

    nrhs   = 1;
    *trans = 'N';
    
    zreadhb(&m, &n, &nnz, &a, &asub, &xa);

    zCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE);
    Astore = A.Store;
    printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz);
    
    if ( !(rhs = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhs[].");
    zCreate_Dense_Matrix(&B, m, nrhs, rhs, m, SLU_DN, SLU_Z, SLU_GE);
    xact = doublecomplexMalloc(n * nrhs);
    ldx = n;
    zGenXtrue(n, nrhs, xact, ldx);
    zFillRHS(trans, nrhs, xact, ldx, &A, &B);

    if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[].");
    if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");

    /*
     * Get column permutation vector perm_c[], according to permc_spec:
     *   permc_spec = 0: natural ordering 
     *   permc_spec = 1: minimum degree on structure of A'*A
     *   permc_spec = 2: minimum degree on structure of A'+A
     *   permc_spec = 3: approximate minimum degree for unsymmetric matrices
     */    	
    permc_spec = 1;
    get_perm_c(permc_spec, &A, perm_c);

    panel_size = sp_ienv(1);
    
    zgssv(&A, perm_c, perm_r, &L, &U, &B, &info);
    
    if ( info == 0 ) {

	zinf_norm_error(nrhs, &B, xact); /* Inf. norm of the error */

	Lstore = (SCformat *) L.Store;
	Ustore = (NCformat *) U.Store;
    	printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
    	printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
    	printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
	
	zQuerySpace(&L, &U, panel_size, &mem_usage);
	printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
	       mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
	       mem_usage.expansions);
	
    } else {
	printf("zgssv() error returns INFO= %d\n", info);
	if ( info <= n ) { /* factorization completes */
	    zQuerySpace(&L, &U, panel_size, &mem_usage);
	    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
		   mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
		   mem_usage.expansions);
	}
    }

    SUPERLU_FREE (rhs);
    SUPERLU_FREE (xact);
    SUPERLU_FREE (perm_r);
    SUPERLU_FREE (perm_c);
    Destroy_CompCol_Matrix(&A);
    Destroy_SuperMatrix_Store(&B);
    Destroy_SuperNode_Matrix(&L);
    Destroy_CompCol_Matrix(&U);
}
コード例 #8
0
ファイル: zgstrs.c プロジェクト: drhansj/polymec-dev
void
zgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
        int *perm_c, int *perm_r, SuperMatrix *B,
        SuperLUStat_t *stat, int *info)
{

#ifdef _CRAY
    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
#endif
    int      incx = 1, incy = 1;
#ifdef USE_VENDOR_BLAS
    doublecomplex   alpha = {1.0, 0.0}, beta = {1.0, 0.0};
    doublecomplex   *work_col;
#endif
    doublecomplex   temp_comp;
    DNformat *Bstore;
    doublecomplex   *Bmat;
    SCformat *Lstore;
    NCformat *Ustore;
    doublecomplex   *Lval, *Uval;
    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
    int      i, j, k, iptr, jcol, n, ldb, nrhs;
    doublecomplex   *work, *rhs_work, *soln;
    flops_t  solve_ops;
    void zprint_soln();

    /* Test input parameters ... */
    *info = 0;
    Bstore = B->Store;
    ldb = Bstore->lda;
    nrhs = B->ncol;
    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
    else if ( L->nrow != L->ncol || L->nrow < 0 ||
	      L->Stype != SLU_SC || L->Dtype != SLU_Z || L->Mtype != SLU_TRLU )
	*info = -2;
    else if ( U->nrow != U->ncol || U->nrow < 0 ||
	      U->Stype != SLU_NC || U->Dtype != SLU_Z || U->Mtype != SLU_TRU )
	*info = -3;
    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
	      B->Stype != SLU_DN || B->Dtype != SLU_Z || B->Mtype != SLU_GE )
	*info = -6;
    if ( *info ) {
	i = -(*info);
	input_error("zgstrs", &i);
	return;
    }

    n = L->nrow;
    work = doublecomplexCalloc(n * nrhs);
    if ( !work ) ABORT("Malloc fails for local work[].");
    soln = doublecomplexMalloc(n);
    if ( !soln ) ABORT("Malloc fails for local soln[].");

    Bmat = Bstore->nzval;
    Lstore = L->Store;
    Lval = Lstore->nzval;
    Ustore = U->Store;
    Uval = Ustore->nzval;
    solve_ops = 0;
    
    if ( trans == NOTRANS ) {
	/* Permute right hand sides to form Pr*B */
	for (i = 0; i < nrhs; i++) {
	    rhs_work = &Bmat[i*ldb];
	    for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
	}
	
	/* Forward solve PLy=Pb. */
	for (k = 0; k <= Lstore->nsuper; k++) {
	    fsupc = L_FST_SUPC(k);
	    istart = L_SUB_START(fsupc);
	    nsupr = L_SUB_START(fsupc+1) - istart;
	    nsupc = L_FST_SUPC(k+1) - fsupc;
	    nrow = nsupr - nsupc;

	    solve_ops += 4 * nsupc * (nsupc - 1) * nrhs;
	    solve_ops += 8 * nrow * nsupc * nrhs;
	    
	    if ( nsupc == 1 ) {
		for (j = 0; j < nrhs; j++) {
		    rhs_work = &Bmat[j*ldb];
	    	    luptr = L_NZ_START(fsupc);
		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
			irow = L_SUB(iptr);
			++luptr;
			zz_mult(&temp_comp, &rhs_work[fsupc], &Lval[luptr]);
			z_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
		    }
		}
	    } else {
	    	luptr = L_NZ_START(fsupc);
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		ftcs1 = _cptofcd("L", strlen("L"));
		ftcs2 = _cptofcd("N", strlen("N"));
		ftcs3 = _cptofcd("U", strlen("U"));
		CTRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
		
		CGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
			&beta, &work[0], &n );
#else
		ztrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
		
		zgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
			&Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
			&beta, &work[0], &n );
#endif
		for (j = 0; j < nrhs; j++) {
		    rhs_work = &Bmat[j*ldb];
		    work_col = &work[j*n];
		    iptr = istart + nsupc;
		    for (i = 0; i < nrow; i++) {
			irow = L_SUB(iptr);
			z_sub(&rhs_work[irow], &rhs_work[irow], &work_col[i]);
			work_col[i].r = 0.0;
	                work_col[i].i = 0.0;
			iptr++;
		    }
		}
#else		
		for (j = 0; j < nrhs; j++) {
		    rhs_work = &Bmat[j*ldb];
		    zlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
		    zmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
			    &rhs_work[fsupc], &work[0] );

		    iptr = istart + nsupc;
		    for (i = 0; i < nrow; i++) {
			irow = L_SUB(iptr);
			z_sub(&rhs_work[irow], &rhs_work[irow], &work[i]);
			work[i].r = 0.;
	                work[i].i = 0.;
			iptr++;
		    }
		}
#endif		    
	    } /* else ... */
	} /* for L-solve */

#ifdef DEBUG
  	printf("After L-solve: y=\n");
	zprint_soln(n, nrhs, Bmat);
#endif

	/*
	 * Back solve Ux=y.
	 */
	for (k = Lstore->nsuper; k >= 0; k--) {
	    fsupc = L_FST_SUPC(k);
	    istart = L_SUB_START(fsupc);
	    nsupr = L_SUB_START(fsupc+1) - istart;
	    nsupc = L_FST_SUPC(k+1) - fsupc;
	    luptr = L_NZ_START(fsupc);

	    solve_ops += 4 * nsupc * (nsupc + 1) * nrhs;

	    if ( nsupc == 1 ) {
		rhs_work = &Bmat[0];
		for (j = 0; j < nrhs; j++) {
		    z_div(&rhs_work[fsupc], &rhs_work[fsupc], &Lval[luptr]);
		    rhs_work += ldb;
		}
	    } else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		ftcs1 = _cptofcd("L", strlen("L"));
		ftcs2 = _cptofcd("U", strlen("U"));
		ftcs3 = _cptofcd("N", strlen("N"));
		CTRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
#else
		ztrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
		       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
#endif
#else		
		for (j = 0; j < nrhs; j++)
		    zusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
#endif		
	    }

	    for (j = 0; j < nrhs; ++j) {
		rhs_work = &Bmat[j*ldb];
		for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
			irow = U_SUB(i);
			zz_mult(&temp_comp, &rhs_work[jcol], &Uval[i]);
			z_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
		    }
		}
	    }
	    
	} /* for U-solve */

#ifdef DEBUG
  	printf("After U-solve: x=\n");
	zprint_soln(n, nrhs, Bmat);
#endif

	/* Compute the final solution X := Pc*X. */
	for (i = 0; i < nrhs; i++) {
	    rhs_work = &Bmat[i*ldb];
	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
	}
	
        stat->ops[SOLVE] = solve_ops;

    } else { /* Solve A'*X=B or CONJ(A)*X=B */
	/* Permute right hand sides to form Pc'*B. */
	for (i = 0; i < nrhs; i++) {
	    rhs_work = &Bmat[i*ldb];
	    for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
	}

	stat->ops[SOLVE] = 0;
        if (trans == TRANS) {
	    for (k = 0; k < nrhs; ++k) {
	        /* Multiply by inv(U'). */
	        sp_ztrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
	    
	        /* Multiply by inv(L'). */
	        sp_ztrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
	    }
         } else { /* trans == CONJ */
            for (k = 0; k < nrhs; ++k) {                
                /* Multiply by conj(inv(U')). */
                sp_ztrsv("U", "C", "N", L, U, &Bmat[k*ldb], stat, info);
                
                /* Multiply by conj(inv(L')). */
                sp_ztrsv("L", "C", "U", L, U, &Bmat[k*ldb], stat, info);
	    }
         }
	/* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
	for (i = 0; i < nrhs; i++) {
	    rhs_work = &Bmat[i*ldb];
	    for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
	    for (k = 0; k < n; k++) rhs_work[k] = soln[k];
	}

    }

    SUPERLU_FREE(work);
    SUPERLU_FREE(soln);
}
コード例 #9
0
void
zgsrfs(char *trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
       int *perm_r, int *perm_c, char *equed, double *R, double *C,
       SuperMatrix *B, SuperMatrix *X, 
       double *ferr, double *berr, int *info)
{
/*
 *   Purpose   
 *   =======   
 *
 *   ZGSRFS improves the computed solution to a system of linear   
 *   equations and provides error bounds and backward error estimates for 
 *   the solution.   
 *
 *   If equilibration was performed, the system becomes:
 *           (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
 *
 *   See supermatrix.h for the definition of 'SuperMatrix' structure.
 *
 *   Arguments   
 *   =========   
 *
 *   trans   (input) char*
 *           Specifies the form of the system of equations:   
 *           = 'N':  A * X = B     (No transpose)   
 *           = 'T':  A**T * X = B  (Transpose)   
 *           = 'C':  A**H * X = B  (Conjugate transpose = Transpose)
 *   
 *   A       (input) SuperMatrix*
 *           The original matrix A in the system, or the scaled A if
 *           equilibration was done. The type of A can be:
 *           Stype = NC, Dtype = _Z, Mtype = GE.
 *    
 *   L       (input) SuperMatrix*
 *	     The factor L from the factorization Pr*A*Pc=L*U. Use
 *           compressed row subscripts storage for supernodes, 
 *           i.e., L has types: Stype = SC, Dtype = _Z, Mtype = TRLU.
 * 
 *   U       (input) SuperMatrix*
 *           The factor U from the factorization Pr*A*Pc=L*U as computed by
 *           zgstrf(). Use column-wise storage scheme, 
 *           i.e., U has types: Stype = NC, Dtype = _Z, Mtype = TRU.
 *
 *   perm_r  (input) int*, dimension (A->nrow)
 *           Row permutation vector, which defines the permutation matrix Pr;
 *           perm_r[i] = j means row i of A is in position j in Pr*A.
 *
 *   perm_c  (input) int*, dimension (A->ncol)
 *	     Column permutation vector, which defines the 
 *           permutation matrix Pc; perm_c[i] = j means column i of A is 
 *           in position j in A*Pc.
 *
 *   equed   (input) Specifies the form of equilibration that was done.
 *           = 'N': No equilibration.
 *           = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
 *           = 'C': Column equilibration, i.e., A was postmultiplied by
 *                  diag(C).
 *           = 'B': Both row and column equilibration, i.e., A was replaced 
 *                  by diag(R)*A*diag(C).
 *
 *   R       (input) double*, dimension (A->nrow)
 *           The row scale factors for A.
 *           If equed = 'R' or 'B', A is premultiplied by diag(R).
 *           If equed = 'N' or 'C', R is not accessed.
 * 
 *   C       (input) double*, dimension (A->ncol)
 *           The column scale factors for A.
 *           If equed = 'C' or 'B', A is postmultiplied by diag(C).
 *           If equed = 'N' or 'R', C is not accessed.
 *
 *   B       (input) SuperMatrix*
 *           B has types: Stype = DN, Dtype = _Z, Mtype = GE.
 *           The right hand side matrix B.
 *           if equed = 'R' or 'B', B is premultiplied by diag(R).
 *
 *   X       (input/output) SuperMatrix*
 *           X has types: Stype = DN, Dtype = _Z, Mtype = GE.
 *           On entry, the solution matrix X, as computed by zgstrs().
 *           On exit, the improved solution matrix X.
 *           if *equed = 'C' or 'B', X should be premultiplied by diag(C)
 *               in order to obtain the solution to the original system.
 *
 *   FERR    (output) double*, dimension (B->ncol)   
 *           The estimated forward error bound for each solution vector   
 *           X(j) (the j-th column of the solution matrix X).   
 *           If XTRUE is the true solution corresponding to X(j), FERR(j) 
 *           is an estimated upper bound for the magnitude of the largest 
 *           element in (X(j) - XTRUE) divided by the magnitude of the   
 *           largest element in X(j).  The estimate is as reliable as   
 *           the estimate for RCOND, and is almost always a slight   
 *           overestimate of the true error.
 *
 *   BERR    (output) double*, dimension (B->ncol)   
 *           The componentwise relative backward error of each solution   
 *           vector X(j) (i.e., the smallest relative change in   
 *           any element of A or B that makes X(j) an exact solution).
 *
 *   info    (output) int*   
 *           = 0:  successful exit   
 *            < 0:  if INFO = -i, the i-th argument had an illegal value   
 *
 *    Internal Parameters   
 *    ===================   
 *
 *    ITMAX is the maximum number of steps of iterative refinement.   
 *
 */  

#define ITMAX 5
    
    /* Table of constant values */
    int    ione = 1;
    doublecomplex ndone = {-1., 0.};
    doublecomplex done = {1., 0.};
    
    /* Local variables */
    NCformat *Astore;
    doublecomplex   *Aval;
    SuperMatrix Bjcol;
    DNformat *Bstore, *Xstore, *Bjcol_store;
    doublecomplex   *Bmat, *Xmat, *Bptr, *Xptr;
    int      kase;
    double   safe1, safe2;
    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
    int      ldb, ldx, nrhs;
    double   s, xk, lstres, eps, safmin;
    char     transt[1];
    doublecomplex   *work;
    double   *rwork;
    int      *iwork;
    extern double dlamch_(char *);
    extern int zlacon_(int *, doublecomplex *, doublecomplex *, double *, int *);
#ifdef _CRAY
    extern int CCOPY(int *, doublecomplex *, int *, doublecomplex *, int *);
    extern int CSAXPY(int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, int *);
#else
    extern int zcopy_(int *, doublecomplex *, int *, doublecomplex *, int *);
    extern int zaxpy_(int *, doublecomplex *, doublecomplex *, int *, doublecomplex *, int *);
#endif

    Astore = A->Store;
    Aval   = Astore->nzval;
    Bstore = B->Store;
    Xstore = X->Store;
    Bmat   = Bstore->nzval;
    Xmat   = Xstore->nzval;
    ldb    = Bstore->lda;
    ldx    = Xstore->lda;
    nrhs   = B->ncol;
    
    /* Test the input parameters */
    *info = 0;
    notran = lsame_(trans, "N");
    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C"))	*info = -1;
    else if ( A->nrow != A->ncol || A->nrow < 0 ||
	      A->Stype != NC || A->Dtype != _Z || A->Mtype != GE )
	*info = -2;
    else if ( L->nrow != L->ncol || L->nrow < 0 ||
 	      L->Stype != SC || L->Dtype != _Z || L->Mtype != TRLU )
	*info = -3;
    else if ( U->nrow != U->ncol || U->nrow < 0 ||
 	      U->Stype != NC || U->Dtype != _Z || U->Mtype != TRU )
	*info = -4;
    else if ( ldb < MAX(0, A->nrow) ||
 	      B->Stype != DN || B->Dtype != _Z || B->Mtype != GE )
        *info = -10;
    else if ( ldx < MAX(0, A->nrow) ||
 	      X->Stype != DN || X->Dtype != _Z || X->Mtype != GE )
	*info = -11;
    if (*info != 0) {
	i = -(*info);
	xerbla_("zgsrfs", &i);
	return;
    }

    /* Quick return if possible */
    if ( A->nrow == 0 || nrhs == 0) {
	for (j = 0; j < nrhs; ++j) {
	    ferr[j] = 0.;
	    berr[j] = 0.;
	}
	return;
    }

    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
    colequ = lsame_(equed, "C") || lsame_(equed, "B");
    
    /* Allocate working space */
    work = doublecomplexMalloc(2*A->nrow);
    rwork = (double *) SUPERLU_MALLOC( A->nrow * sizeof(double) );
    iwork = intMalloc(A->nrow);
    if ( !work || !rwork || !iwork ) 
        ABORT("Malloc fails for work/rwork/iwork.");
    
    if ( notran ) {
	*(unsigned char *)transt = 'T';
    } else {
	*(unsigned char *)transt = 'N';
    }

    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
    nz     = A->ncol + 1;
    eps    = dlamch_("Epsilon");
    safmin = dlamch_("Safe minimum");
    safe1  = nz * safmin;
    safe2  = safe1 / eps;

    /* Compute the number of nonzeros in each row (or column) of A */
    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
    if ( notran ) {
	for (k = 0; k < A->ncol; ++k)
	    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
		++iwork[Astore->rowind[i]];
    } else {
	for (k = 0; k < A->ncol; ++k)
	    iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
    }	

    /* Copy one column of RHS B into Bjcol. */
    Bjcol.Stype = B->Stype;
    Bjcol.Dtype = B->Dtype;
    Bjcol.Mtype = B->Mtype;
    Bjcol.nrow  = B->nrow;
    Bjcol.ncol  = 1;
    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
    Bjcol_store = Bjcol.Store;
    Bjcol_store->lda = ldb;
    Bjcol_store->nzval = work; /* address aliasing */
	
    /* Do for each right hand side ... */
    for (j = 0; j < nrhs; ++j) {
	count = 0;
	lstres = 3.;
	Bptr = &Bmat[j*ldb];
	Xptr = &Xmat[j*ldx];

	while (1) { /* Loop until stopping criterion is satisfied. */

	    /* Compute residual R = B - op(A) * X,   
	       where op(A) = A, A**T, or A**H, depending on TRANS. */
	    
#ifdef _CRAY
	    CCOPY(&A->nrow, Bptr, &ione, work, &ione);
#else
	    zcopy_(&A->nrow, Bptr, &ione, work, &ione);
#endif
	    sp_zgemv(trans, ndone, A, Xptr, ione, done, work, ione);

	    /* Compute componentwise relative backward error from formula 
	       max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
	       where abs(Z) is the componentwise absolute value of the matrix
	       or vector Z.  If the i-th component of the denominator is less
	       than SAFE2, then SAFE1 is added to the i-th component of the   
	       numerator and denominator before dividing. */

	    for (i = 0; i < A->nrow; ++i) rwork[i] = z_abs1( &Bptr[i] );
	    
	    /* Compute abs(op(A))*abs(X) + abs(B). */
	    if (notran) {
		for (k = 0; k < A->ncol; ++k) {
		    xk = z_abs1( &Xptr[k] );
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
			rwork[Astore->rowind[i]] += z_abs1(&Aval[i]) * xk;
		}
	    } else {
		for (k = 0; k < A->ncol; ++k) {
		    s = 0.;
		    for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
			irow = Astore->rowind[i];
			s += z_abs1(&Aval[i]) * z_abs1(&Xptr[irow]);
		    }
		    rwork[k] += s;
		}
	    }
	    s = 0.;
	    for (i = 0; i < A->nrow; ++i) {
		if (rwork[i] > safe2)
		    s = MAX( s, z_abs1(&work[i]) / rwork[i] );
		else
		    s = MAX( s, (z_abs1(&work[i]) + safe1) / 
				(rwork[i] + safe1) );
	    }
	    berr[j] = s;

	    /* Test stopping criterion. Continue iterating if   
	       1) The residual BERR(J) is larger than machine epsilon, and   
	       2) BERR(J) decreased by at least a factor of 2 during the   
	          last iteration, and   
	       3) At most ITMAX iterations tried. */

	    if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
		/* Update solution and try again. */
		zgstrs (trans, L, U, perm_r, perm_c, &Bjcol, info);
		
#ifdef _CRAY
		CAXPY(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#else
		zaxpy_(&A->nrow, &done, work, &ione,
		       &Xmat[j*ldx], &ione);
#endif
		lstres = berr[j];
		++count;
	    } else {
		break;
	    }
        
	} /* end while */

	/* Bound error from formula:
	   norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
	   ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
          where   
            norm(Z) is the magnitude of the largest component of Z   
            inv(op(A)) is the inverse of op(A)   
            abs(Z) is the componentwise absolute value of the matrix or
	       vector Z   
            NZ is the maximum number of nonzeros in any row of A, plus 1   
            EPS is machine epsilon   

          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
          is incremented by SAFE1 if the i-th component of   
          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   

          Use ZLACON to estimate the infinity-norm of the matrix   
             inv(op(A)) * diag(W),   
          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
	
	for (i = 0; i < A->nrow; ++i) rwork[i] = z_abs1( &Bptr[i] );
	
	/* Compute abs(op(A))*abs(X) + abs(B). */
	if ( notran ) {
	    for (k = 0; k < A->ncol; ++k) {
		xk = z_abs1( &Xptr[k] );
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
		    rwork[Astore->rowind[i]] += z_abs1(&Aval[i]) * xk;
	    }
	} else {
	    for (k = 0; k < A->ncol; ++k) {
		s = 0.;
		for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
		    irow = Astore->rowind[i];
		    xk = z_abs1( &Xptr[irow] );
		    s += z_abs1(&Aval[i]) * xk;
		}
		rwork[k] += s;
	    }
	}
	
	for (i = 0; i < A->nrow; ++i)
	    if (rwork[i] > safe2)
		rwork[i] = z_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
	    else
		rwork[i] = z_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
	kase = 0;

	do {
	    zlacon_(&A->nrow, &work[A->nrow], work,
		    &ferr[j], &kase);
	    if (kase == 0) break;

	    if (kase == 1) {
		/* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        zd_mult(&work[i], &work[i], C[i]);
	            }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->nrow; ++i) {
		        zd_mult(&work[i], &work[i], R[i]);
                    }

		zgstrs (transt, L, U, perm_r, perm_c, &Bjcol, info);
		
		for (i = 0; i < A->nrow; ++i) {
		    zd_mult(&work[i], &work[i], rwork[i]);
	 	}
	    } else {
		/* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
		for (i = 0; i < A->nrow; ++i) {
		    zd_mult(&work[i], &work[i], rwork[i]);
		}
		
		zgstrs (trans, L, U, perm_r, perm_c, &Bjcol, info);
		
		if ( notran && colequ )
		    for (i = 0; i < A->ncol; ++i) {
		        zd_mult(&work[i], &work[i], C[i]);
		    }
		else if ( !notran && rowequ )
		    for (i = 0; i < A->ncol; ++i) {
		        zd_mult(&work[i], &work[i], R[i]);  
		    }
	    }
	    
	} while ( kase != 0 );

	/* Normalize error. */
	lstres = 0.;
 	if ( notran && colequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = MAX( lstres, C[i] * z_abs1( &Xptr[i]) );
  	} else if ( !notran && rowequ ) {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = MAX( lstres, R[i] * z_abs1( &Xptr[i]) );
	} else {
	    for (i = 0; i < A->nrow; ++i)
	    	lstres = MAX( lstres, z_abs1( &Xptr[i]) );
	}
	if ( lstres != 0. )
	    ferr[j] /= lstres;

    } /* for each RHS j ... */
    
    SUPERLU_FREE(work);
    SUPERLU_FREE(rwork);
    SUPERLU_FREE(iwork);
    SUPERLU_FREE(Bjcol.Store);

    return;

} /* zgsrfs */
コード例 #10
0
ファイル: zdrive.c プロジェクト: DarkOfTheMoon/HONEI
main(int argc, char *argv[])
{
/*
 * Purpose
 * =======
 *
 * ZDRIVE is the main test program for the DOUBLE COMPLEX linear
 * equation driver routines ZGSSV and ZGSSVX.
 *
 * The program is invoked by a shell script file -- ztest.csh.
 * The output from the tests are written into a file -- ztest.out.
 *
 * =====================================================================
 */
    doublecomplex         *a, *a_save;
    int            *asub, *asub_save;
    int            *xa, *xa_save;
    SuperMatrix  A, B, X, L, U;
    SuperMatrix  ASAV, AC;
    mem_usage_t    mem_usage;
    int            *perm_r; /* row permutation from partial pivoting */
    int            *perm_c, *pc_save; /* column permutation */
    int            *etree;
    doublecomplex  zero = {0.0, 0.0};
    double         *R, *C;
    double         *ferr, *berr;
    double         *rwork;
    doublecomplex          *wwork;
    void           *work;
    int            info, lwork, nrhs, panel_size, relax;
    int            m, n, nnz;
    doublecomplex         *xact;
    doublecomplex         *rhsb, *solx, *bsav;
    int            ldb, ldx;
    double         rpg, rcond;
    int            i, j, k1;
    double         rowcnd, colcnd, amax;
    int            maxsuper, rowblk, colblk;
    int            prefact, nofact, equil, iequed;
    int            nt, nrun, nfail, nerrs, imat, fimat, nimat;
    int            nfact, ifact, itran;
    int            kl, ku, mode, lda;
    int            zerot, izero, ioff;
    double         u;
    double         anorm, cndnum;
    doublecomplex         *Afull;
    double         result[NTESTS];
    superlu_options_t options;
    fact_t         fact;
    trans_t        trans;
    SuperLUStat_t  stat;
    static char    matrix_type[8];
    static char    equed[1], path[4], sym[1], dist[1];

    /* Fixed set of parameters */
    int            iseed[]  = {1988, 1989, 1990, 1991};
    static char    equeds[]  = {'N', 'R', 'C', 'B'};
    static fact_t  facts[] = {FACTORED, DOFACT, SamePattern,
                              SamePattern_SameRowPerm};
    static trans_t transs[]  = {NOTRANS, TRANS, CONJ};

    /* Some function prototypes */
    extern int zgst01(int, int, SuperMatrix *, SuperMatrix *,
                      SuperMatrix *, int *, int *, double *);
    extern int zgst02(trans_t, int, int, int, SuperMatrix *, doublecomplex *,
                      int, doublecomplex *, int, double *resid);
    extern int zgst04(int, int, doublecomplex *, int,
                      doublecomplex *, int, double rcond, double *resid);
    extern int zgst07(trans_t, int, int, SuperMatrix *, doublecomplex *, int,
                         doublecomplex *, int, doublecomplex *, int,
                         double *, double *, double *);
    extern int zlatb4_(char *, int *, int *, int *, char *, int *, int *,
                       double *, int *, double *, char *);
    extern int zlatms_(int *, int *, char *, int *, char *, double *d,
                       int *, double *, double *, int *, int *,
                       char *, doublecomplex *, int *, doublecomplex *, int *);
    extern int sp_zconvert(int, int, doublecomplex *, int, int, int,
                           doublecomplex *a, int *, int *, int *);


    /* Executable statements */

    strcpy(path, "ZGE");
    nrun  = 0;
    nfail = 0;
    nerrs = 0;

    /* Defaults */
    lwork      = 0;
    n          = 1;
    nrhs       = 1;
    panel_size = sp_ienv(1);
    relax      = sp_ienv(2);
    u          = 1.0;
    strcpy(matrix_type, "LA");
    parse_command_line(argc, argv, matrix_type, &n,
                       &panel_size, &relax, &nrhs, &maxsuper,
                       &rowblk, &colblk, &lwork, &u);
    if ( lwork > 0 ) {
        work = SUPERLU_MALLOC(lwork);
        if ( !work ) {
            fprintf(stderr, "expert: cannot allocate %d bytes\n", lwork);
            exit (-1);
        }
    }

    /* Set the default input options. */
    set_default_options(&options);
    options.DiagPivotThresh = u;
    options.PrintStat = NO;
    options.PivotGrowth = YES;
    options.ConditionNumber = YES;
    options.IterRefine = DOUBLE;

    if ( strcmp(matrix_type, "LA") == 0 ) {
        /* Test LAPACK matrix suite. */
        m = n;
        lda = SUPERLU_MAX(n, 1);
        nnz = n * n;        /* upper bound */
        fimat = 1;
        nimat = NTYPES;
        Afull = doublecomplexCalloc(lda * n);
        zallocateA(n, nnz, &a, &asub, &xa);
    } else {
        /* Read a sparse matrix */
        fimat = nimat = 0;
        zreadhb(&m, &n, &nnz, &a, &asub, &xa);
    }

    zallocateA(n, nnz, &a_save, &asub_save, &xa_save);
    rhsb = doublecomplexMalloc(m * nrhs);
    bsav = doublecomplexMalloc(m * nrhs);
    solx = doublecomplexMalloc(n * nrhs);
    ldb  = m;
    ldx  = n;
    zCreate_Dense_Matrix(&B, m, nrhs, rhsb, ldb, SLU_DN, SLU_Z, SLU_GE);
    zCreate_Dense_Matrix(&X, n, nrhs, solx, ldx, SLU_DN, SLU_Z, SLU_GE);
    xact = doublecomplexMalloc(n * nrhs);
    etree   = intMalloc(n);
    perm_r  = intMalloc(n);
    perm_c  = intMalloc(n);
    pc_save = intMalloc(n);
    R       = (double *) SUPERLU_MALLOC(m*sizeof(double));
    C       = (double *) SUPERLU_MALLOC(n*sizeof(double));
    ferr    = (double *) SUPERLU_MALLOC(nrhs*sizeof(double));
    berr    = (double *) SUPERLU_MALLOC(nrhs*sizeof(double));
    j = SUPERLU_MAX(m,n) * SUPERLU_MAX(4,nrhs);
    rwork   = (double *) SUPERLU_MALLOC(j*sizeof(double));
    for (i = 0; i < j; ++i) rwork[i] = 0.;
    if ( !R ) ABORT("SUPERLU_MALLOC fails for R");
    if ( !C ) ABORT("SUPERLU_MALLOC fails for C");
    if ( !ferr ) ABORT("SUPERLU_MALLOC fails for ferr");
    if ( !berr ) ABORT("SUPERLU_MALLOC fails for berr");
    if ( !rwork ) ABORT("SUPERLU_MALLOC fails for rwork");
    wwork   = doublecomplexCalloc( SUPERLU_MAX(m,n) * SUPERLU_MAX(4,nrhs) );

    for (i = 0; i < n; ++i) perm_c[i] = pc_save[i] = i;
    options.ColPerm = MY_PERMC;

    for (imat = fimat; imat <= nimat; ++imat) { /* All matrix types */

        if ( imat ) {

            /* Skip types 5, 6, or 7 if the matrix size is too small. */
            zerot = (imat >= 5 && imat <= 7);
            if ( zerot && n < imat-4 )
                continue;

            /* Set up parameters with ZLATB4 and generate a test matrix
               with ZLATMS.  */
            zlatb4_(path, &imat, &n, &n, sym, &kl, &ku, &anorm, &mode,
                    &cndnum, dist);

            zlatms_(&n, &n, dist, iseed, sym, &rwork[0], &mode, &cndnum,
                    &anorm, &kl, &ku, "No packing", Afull, &lda,
                    &wwork[0], &info);

            if ( info ) {
                printf(FMT3, "ZLATMS", info, izero, n, nrhs, imat, nfail);
                continue;
            }

            /* For types 5-7, zero one or more columns of the matrix
               to test that INFO is returned correctly.   */
            if ( zerot ) {
                if ( imat == 5 ) izero = 1;
                else if ( imat == 6 ) izero = n;
                else izero = n / 2 + 1;
                ioff = (izero - 1) * lda;
                if ( imat < 7 ) {
                    for (i = 0; i < n; ++i) Afull[ioff + i] = zero;
                } else {
                    for (j = 0; j < n - izero + 1; ++j)
                        for (i = 0; i < n; ++i)
                            Afull[ioff + i + j*lda] = zero;
                }
            } else {
                izero = 0;
            }

            /* Convert to sparse representation. */
            sp_zconvert(n, n, Afull, lda, kl, ku, a, asub, xa, &nnz);

        } else {
            izero = 0;
            zerot = 0;
        }

        zCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE);

        /* Save a copy of matrix A in ASAV */
        zCreate_CompCol_Matrix(&ASAV, m, n, nnz, a_save, asub_save, xa_save,
                              SLU_NC, SLU_Z, SLU_GE);
        zCopy_CompCol_Matrix(&A, &ASAV);

        /* Form exact solution. */
        zGenXtrue(n, nrhs, xact, ldx);

        StatInit(&stat);

        for (iequed = 0; iequed < 4; ++iequed) {
            *equed = equeds[iequed];
            if (iequed == 0) nfact = 4;
            else nfact = 1; /* Only test factored, pre-equilibrated matrix */

            for (ifact = 0; ifact < nfact; ++ifact) {
                fact = facts[ifact];
                options.Fact = fact;

                for (equil = 0; equil < 2; ++equil) {
                    options.Equil = equil;
                    prefact   = ( options.Fact == FACTORED ||
                                  options.Fact == SamePattern_SameRowPerm );
                                /* Need a first factor */
                    nofact    = (options.Fact != FACTORED);  /* Not factored */

                    /* Restore the matrix A. */
                    zCopy_CompCol_Matrix(&ASAV, &A);

                    if ( zerot ) {
                        if ( prefact ) continue;
                    } else if ( options.Fact == FACTORED ) {
                        if ( equil || iequed ) {
                            /* Compute row and column scale factors to
                               equilibrate matrix A.    */
                            zgsequ(&A, R, C, &rowcnd, &colcnd, &amax, &info);

                            /* Force equilibration. */
                            if ( !info && n > 0 ) {
                                if ( lsame_(equed, "R") ) {
                                    rowcnd = 0.;
                                    colcnd = 1.;
                                } else if ( lsame_(equed, "C") ) {
                                    rowcnd = 1.;
                                    colcnd = 0.;
                                } else if ( lsame_(equed, "B") ) {
                                    rowcnd = 0.;
                                    colcnd = 0.;
                                }
                            }

                            /* Equilibrate the matrix. */
                            zlaqgs(&A, R, C, rowcnd, colcnd, amax, equed);
                        }
                    }

                    if ( prefact ) { /* Need a factor for the first time */

                        /* Save Fact option. */
                        fact = options.Fact;
                        options.Fact = DOFACT;

                        /* Preorder the matrix, obtain the column etree. */
                        sp_preorder(&options, &A, perm_c, etree, &AC);

                        /* Factor the matrix AC. */
                        zgstrf(&options, &AC, relax, panel_size,
                               etree, work, lwork, perm_c, perm_r, &L, &U,
                               &stat, &info);

                        if ( info ) {
                            printf("** First factor: info %d, equed %c\n",
                                   info, *equed);
                            if ( lwork == -1 ) {
                                printf("** Estimated memory: %d bytes\n",
                                        info - n);
                                exit(0);
                            }
                        }

                        Destroy_CompCol_Permuted(&AC);

                        /* Restore Fact option. */
                        options.Fact = fact;
                    } /* if .. first time factor */

                    for (itran = 0; itran < NTRAN; ++itran) {
                        trans = transs[itran];
                        options.Trans = trans;

                        /* Restore the matrix A. */
                        zCopy_CompCol_Matrix(&ASAV, &A);

                        /* Set the right hand side. */
                        zFillRHS(trans, nrhs, xact, ldx, &A, &B);
                        zCopy_Dense_Matrix(m, nrhs, rhsb, ldb, bsav, ldb);

                        /*----------------
                         * Test zgssv
                         *----------------*/
                        if ( options.Fact == DOFACT && itran == 0) {
                            /* Not yet factored, and untransposed */

                            zCopy_Dense_Matrix(m, nrhs, rhsb, ldb, solx, ldx);
                            zgssv(&options, &A, perm_c, perm_r, &L, &U, &X,
                                  &stat, &info);

                            if ( info && info != izero ) {
                                printf(FMT3, "zgssv",
                                       info, izero, n, nrhs, imat, nfail);
                            } else {
                                /* Reconstruct matrix from factors and
                                   compute residual. */
                                zgst01(m, n, &A, &L, &U, perm_c, perm_r,
                                         &result[0]);
                                nt = 1;
                                if ( izero == 0 ) {
                                    /* Compute residual of the computed
                                       solution. */
                                    zCopy_Dense_Matrix(m, nrhs, rhsb, ldb,
                                                       wwork, ldb);
                                    zgst02(trans, m, n, nrhs, &A, solx,
                                              ldx, wwork,ldb, &result[1]);
                                    nt = 2;
                                }

                                /* Print information about the tests that
                                   did not pass the threshold.      */
                                for (i = 0; i < nt; ++i) {
                                    if ( result[i] >= THRESH ) {
                                        printf(FMT1, "zgssv", n, i,
                                               result[i]);
                                        ++nfail;
                                    }
                                }
                                nrun += nt;
                            } /* else .. info == 0 */

                            /* Restore perm_c. */
                            for (i = 0; i < n; ++i) perm_c[i] = pc_save[i];

                            if (lwork == 0) {
                                Destroy_SuperNode_Matrix(&L);
                                Destroy_CompCol_Matrix(&U);
                            }
                        } /* if .. end of testing zgssv */

                        /*----------------
                         * Test zgssvx
                         *----------------*/

                        /* Equilibrate the matrix if fact = FACTORED and
                           equed = 'R', 'C', or 'B'.   */
                        if ( options.Fact == FACTORED &&
                             (equil || iequed) && n > 0 ) {
                            zlaqgs(&A, R, C, rowcnd, colcnd, amax, equed);
                        }

                        /* Solve the system and compute the condition number
                           and error bounds using zgssvx.      */
                        zgssvx(&options, &A, perm_c, perm_r, etree,
                               equed, R, C, &L, &U, work, lwork, &B, &X, &rpg,
                               &rcond, ferr, berr, &mem_usage, &stat, &info);

                        if ( info && info != izero ) {
                            printf(FMT3, "zgssvx",
                                   info, izero, n, nrhs, imat, nfail);
                            if ( lwork == -1 ) {
                                printf("** Estimated memory: %.0f bytes\n",
                                        mem_usage.total_needed);
                                exit(0);
                            }
                        } else {
                            if ( !prefact ) {
                                /* Reconstruct matrix from factors and
                                   compute residual. */
                                zgst01(m, n, &A, &L, &U, perm_c, perm_r,
                                         &result[0]);
                                k1 = 0;
                            } else {
                                k1 = 1;
                            }

                            if ( !info ) {
                                /* Compute residual of the computed solution.*/
                                zCopy_Dense_Matrix(m, nrhs, bsav, ldb,
                                                  wwork, ldb);
                                zgst02(trans, m, n, nrhs, &ASAV, solx, ldx,
                                          wwork, ldb, &result[1]);

                                /* Check solution from generated exact
                                   solution. */
                                zgst04(n, nrhs, solx, ldx, xact, ldx, rcond,
                                          &result[2]);

                                /* Check the error bounds from iterative
                                   refinement. */
                                zgst07(trans, n, nrhs, &ASAV, bsav, ldb,
                                          solx, ldx, xact, ldx, ferr, berr,
                                          &result[3]);

                                /* Print information about the tests that did
                                   not pass the threshold.    */
                                for (i = k1; i < NTESTS; ++i) {
                                    if ( result[i] >= THRESH ) {
                                        printf(FMT2, "zgssvx",
                                               options.Fact, trans, *equed,
                                               n, imat, i, result[i]);
                                        ++nfail;
                                    }
                                }
                                nrun += NTESTS;
                            } /* if .. info == 0 */
                        } /* else .. end of testing zgssvx */

                    } /* for itran ... */

                    if ( lwork == 0 ) {
                        Destroy_SuperNode_Matrix(&L);
                        Destroy_CompCol_Matrix(&U);
                    }

                } /* for equil ... */
            } /* for ifact ... */
        } /* for iequed ... */
#if 0
    if ( !info ) {
        PrintPerf(&L, &U, &mem_usage, rpg, rcond, ferr, berr, equed);
    }
#endif

    } /* for imat ... */

    /* Print a summary of the results. */
    PrintSumm("ZGE", nfail, nrun, nerrs);

    SUPERLU_FREE (rhsb);
    SUPERLU_FREE (bsav);
    SUPERLU_FREE (solx);
    SUPERLU_FREE (xact);
    SUPERLU_FREE (etree);
    SUPERLU_FREE (perm_r);
    SUPERLU_FREE (perm_c);
    SUPERLU_FREE (pc_save);
    SUPERLU_FREE (R);
    SUPERLU_FREE (C);
    SUPERLU_FREE (ferr);
    SUPERLU_FREE (berr);
    SUPERLU_FREE (rwork);
    SUPERLU_FREE (wwork);
    Destroy_SuperMatrix_Store(&B);
    Destroy_SuperMatrix_Store(&X);
    Destroy_CompCol_Matrix(&A);
    Destroy_CompCol_Matrix(&ASAV);
    if ( lwork > 0 ) {
        SUPERLU_FREE (work);
        Destroy_SuperMatrix_Store(&L);
        Destroy_SuperMatrix_Store(&U);
    }
    StatFree(&stat);

    return 0;
}
コード例 #11
0
ファイル: zlinsolx2.c プロジェクト: Amanotoko/fem
int main(int argc, char *argv[])
{
/*
 * Purpose
 * =======
 *
 * The driver program ZLINSOLX2.
 *
 * This example illustrates how to use ZGSSVX to solve systems repeatedly
 * with the same sparsity pattern of matrix A.
 * In this case, the column permutation vector perm_c is computed once.
 * The following data structures will be reused in the subsequent call to
 * ZGSSVX: perm_c, etree
 * 
 */
    char           equed[1];
    yes_no_t       equil;
    trans_t        trans;
    SuperMatrix    A, A1, L, U;
    SuperMatrix    B, B1, X;
    NCformat       *Astore;
    NCformat       *Ustore;
    SCformat       *Lstore;
    doublecomplex         *a, *a1;
    int            *asub, *xa, *asub1, *xa1;
    int            *perm_r; /* row permutations from partial pivoting */
    int            *perm_c; /* column permutation vector */
    int            *etree;
    void           *work;
    int            info, lwork, nrhs, ldx;
    int            i, j, m, n, nnz;
    doublecomplex         *rhsb, *rhsb1, *rhsx, *xact;
    double         *R, *C;
    double         *ferr, *berr;
    double         u, rpg, rcond;
    mem_usage_t    mem_usage;
    superlu_options_t options;
    SuperLUStat_t stat;
    extern void    parse_command_line();

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Enter main()");
#endif

    /* Defaults */
    lwork = 0;
    nrhs  = 1;
    equil = YES;	
    u     = 1.0;
    trans = NOTRANS;

    /* Set the default input options:
	options.Fact = DOFACT;
        options.Equil = YES;
    	options.ColPerm = COLAMD;
	options.DiagPivotThresh = 1.0;
    	options.Trans = NOTRANS;
    	options.IterRefine = NOREFINE;
    	options.SymmetricMode = NO;
    	options.PivotGrowth = NO;
    	options.ConditionNumber = NO;
    	options.PrintStat = YES;
     */
    set_default_options(&options);

    /* Can use command line input to modify the defaults. */
    parse_command_line(argc, argv, &lwork, &u, &equil, &trans);
    options.Equil = equil;
    options.DiagPivotThresh = u;
    options.Trans = trans;

    if ( lwork > 0 ) {
	work = SUPERLU_MALLOC(lwork);
	if ( !work ) {
	    ABORT("DLINSOLX: cannot allocate work[]");
	}
    }

    /* Read matrix A from a file in Harwell-Boeing format.*/
    zreadhb(&m, &n, &nnz, &a, &asub, &xa);
    if ( !(a1 = doublecomplexMalloc(nnz)) ) ABORT("Malloc fails for a1[].");
    if ( !(asub1 = intMalloc(nnz)) ) ABORT("Malloc fails for asub1[].");
    if ( !(xa1 = intMalloc(n+1)) ) ABORT("Malloc fails for xa1[].");
    for (i = 0; i < nnz; ++i) {
        a1[i] = a[i];
	asub1[i] = asub[i];
    }
    for (i = 0; i < n+1; ++i) xa1[i] = xa[i];
    
    zCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE);
    Astore = A.Store;
    printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz);
    
    if ( !(rhsb = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[].");
    if ( !(rhsb1 = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb1[].");
    if ( !(rhsx = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[].");
    zCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_Z, SLU_GE);
    zCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_Z, SLU_GE);
    xact = doublecomplexMalloc(n * nrhs);
    ldx = n;
    zGenXtrue(n, nrhs, xact, ldx);
    zFillRHS(trans, nrhs, xact, ldx, &A, &B);
    for (j = 0; j < nrhs; ++j)
        for (i = 0; i < m; ++i) rhsb1[i+j*m] = rhsb[i+j*m];
    
    if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");
    if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[].");
    if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");
    if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) ) 
        ABORT("SUPERLU_MALLOC fails for R[].");
    if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) )
        ABORT("SUPERLU_MALLOC fails for C[].");
    if ( !(ferr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) )
        ABORT("SUPERLU_MALLOC fails for ferr[].");
    if ( !(berr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) ) 
        ABORT("SUPERLU_MALLOC fails for berr[].");

    /* Initialize the statistics variables. */
    StatInit(&stat);
    
    /* ------------------------------------------------------------
       WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME: AX = B
       ------------------------------------------------------------*/
    zgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C,
           &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr,
           &mem_usage, &stat, &info);

    printf("First system: zgssvx() returns info %d\n", info);

    if ( info == 0 || info == n+1 ) {

        /* This is how you could access the solution matrix. */
        doublecomplex *sol = (doublecomplex*) ((DNformat*) X.Store)->nzval; 

	if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg);
	if ( options.ConditionNumber )
	    printf("Recip. condition number = %e\n", rcond);
        Lstore = (SCformat *) L.Store;
        Ustore = (NCformat *) U.Store;
	printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
    	printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
    	printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
    	printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz);

	printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
	       mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
	if ( options.IterRefine ) {
            printf("Iterative Refinement:\n");
	    printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
	    for (i = 0; i < nrhs; ++i)
	      printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]);
	}
	fflush(stdout);

    } else if ( info > 0 && lwork == -1 ) {
        printf("** Estimated memory: %d bytes\n", info - n);
    }

    if ( options.PrintStat ) StatPrint(&stat);
    StatFree(&stat);
    Destroy_CompCol_Matrix(&A);
    Destroy_Dense_Matrix(&B);
    if ( lwork >= 0 ) { /* Deallocate storage associated with L and U. */
        Destroy_SuperNode_Matrix(&L);
        Destroy_CompCol_Matrix(&U);
    }

    /* ------------------------------------------------------------
       NOW WE SOLVE ANOTHER LINEAR SYSTEM: A1*X = B1
       ONLY THE SPARSITY PATTERN OF A1 IS THE SAME AS THAT OF A.
       ------------------------------------------------------------*/
    options.Fact = SamePattern;
    StatInit(&stat); /* Initialize the statistics variables. */

    zCreate_CompCol_Matrix(&A1, m, n, nnz, a1, asub1, xa1,
                           SLU_NC, SLU_Z, SLU_GE);
    zCreate_Dense_Matrix(&B1, m, nrhs, rhsb1, m, SLU_DN, SLU_Z, SLU_GE);

    zgssvx(&options, &A1, perm_c, perm_r, etree, equed, R, C,
           &L, &U, work, lwork, &B1, &X, &rpg, &rcond, ferr, berr,
           &mem_usage, &stat, &info);

    printf("\nSecond system: zgssvx() returns info %d\n", info);

    if ( info == 0 || info == n+1 ) {

        /* This is how you could access the solution matrix. */
        doublecomplex *sol = (doublecomplex*) ((DNformat*) X.Store)->nzval; 

	if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg);
	if ( options.ConditionNumber )
	    printf("Recip. condition number = %e\n", rcond);
        Lstore = (SCformat *) L.Store;
        Ustore = (NCformat *) U.Store;
	printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
    	printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
    	printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
	printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
	       mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
	if ( options.IterRefine ) {
            printf("Iterative Refinement:\n");
	    printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
	    for (i = 0; i < nrhs; ++i)
	      printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]);
	}
	fflush(stdout);
    } else if ( info > 0 && lwork == -1 ) {
        printf("** Estimated memory: %d bytes\n", info - n);
    }

    if ( options.PrintStat ) StatPrint(&stat);
    StatFree(&stat);

    SUPERLU_FREE (xact);
    SUPERLU_FREE (etree);
    SUPERLU_FREE (perm_r);
    SUPERLU_FREE (perm_c);
    SUPERLU_FREE (R);
    SUPERLU_FREE (C);
    SUPERLU_FREE (ferr);
    SUPERLU_FREE (berr);
    Destroy_CompCol_Matrix(&A1);
    Destroy_Dense_Matrix(&B1);
    Destroy_Dense_Matrix(&X);
    if ( lwork == 0 ) {
        Destroy_SuperNode_Matrix(&L);
        Destroy_CompCol_Matrix(&U);
    } else if ( lwork > 0 ) {
        SUPERLU_FREE(work);
    }

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Exit main()");
#endif
}