Esempio n. 1
0
int
sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
         SuperMatrix *U, doublecomplex *x, int *info)
{
/*
 *   Purpose
 *   =======
 *
 *   sp_ztrsv() solves one of the systems of equations   
 *       A*x = b,   or   A'*x = b,
 *   where b and x are n element vectors and A is a sparse unit , or   
 *   non-unit, upper or lower triangular matrix.   
 *   No test for singularity or near-singularity is included in this   
 *   routine. Such tests must be performed before calling this routine.   
 *
 *   Parameters   
 *   ==========   
 *
 *   uplo   - (input) char*
 *            On entry, uplo specifies whether the matrix is an upper or   
 *             lower triangular matrix as follows:   
 *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
 *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
 *
 *   trans  - (input) char*
 *             On entry, trans specifies the equations to be solved as   
 *             follows:   
 *                trans = 'N' or 'n'   A*x = b.   
 *                trans = 'T' or 't'   A'*x = b.
 *                trans = 'C' or 'c'   A^H*x = b.   
 *
 *   diag   - (input) char*
 *             On entry, diag specifies whether or not A is unit   
 *             triangular as follows:   
 *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
 *                diag = 'N' or 'n'   A is not assumed to be unit   
 *                                    triangular.   
 *	     
 *   L       - (input) SuperMatrix*
 *	       The factor L from the factorization Pr*A*Pc=L*U. Use
 *             compressed row subscripts storage for supernodes,
 *             i.e., L has types: Stype = SC, Dtype = _Z, Mtype = TRLU.
 *
 *   U       - (input) SuperMatrix*
 *	        The factor U from the factorization Pr*A*Pc=L*U.
 *	        U has types: Stype = NCP, Dtype = _Z, Mtype = TRU.
 *    
 *   x       - (input/output) doublecomplex*
 *             Before entry, the incremented array X must contain the n   
 *             element right-hand side vector b. On exit, X is overwritten 
 *             with the solution vector x.
 *
 *   info    - (output) int*
 *             If *info = -i, the i-th argument had an illegal value.
 *
 */
#if ( MACH==CRAY_PVP )
    _fcd ftcs1, ftcs2, ftcs3;
#endif
    SCPformat *Lstore;
    NCPformat *Ustore;
    doublecomplex   *Lval, *Uval;
    int incx = 1, incy = 1;
    doublecomplex temp;
    doublecomplex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
    doublecomplex comp_zero = {0.0, 0.0};
    register int fsupc, luptr, istart, irow, k, iptr, jcol, nsuper;
    int          nsupr, nsupc, nrow, i;
    doublecomplex *work;
    flops_t solve_ops;

    /* Test the input parameters */
    *info = 0;
    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2;
    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
    if ( *info ) {
	i = -(*info);
	xerbla_("sp_ztrsv", &i);
	return 0;
    }

    Lstore = L->Store;
    Lval = Lstore->nzval;
    Ustore = U->Store;
    Uval = Ustore->nzval;
    nsuper = Lstore->nsuper;
    solve_ops = 0;

    if ( !(work = doublecomplexCalloc(L->nrow)) )
	SUPERLU_ABORT("Malloc fails for work in sp_ztrsv().");
    
    if ( lsame_(trans, "N") ) {	/* Form x := inv(A)*x. */
	
	if ( lsame_(uplo, "L") ) {
	    /* Form x := inv(L)*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= nsuper; k++) {
		fsupc = L_FST_SUPC(k);
		istart = L_SUB_START(fsupc);
                nsupr = L_SUB_END(fsupc) - istart;
                nsupc = L_LAST_SUPC(k) - fsupc;
		luptr = L_NZ_START(fsupc);
		nrow = nsupr - nsupc;

                /* 1 z_div costs 10 flops */
	        solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc;
	        solve_ops += 8 * nrow * nsupc;

		if ( nsupc == 1 ) {
		    for (iptr=istart+1; iptr < L_SUB_END(fsupc); ++iptr) {
			irow = L_SUB(iptr);
			++luptr;
			zz_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
			z_sub(&x[irow], &x[irow], &comp_zero);
		    }
		} else {
#ifdef USE_VENDOR_BLAS
#if ( MACH==CRAY_PVP )
                    ftcs1 = _cptofcd("L", strlen("L"));
                    ftcs2 = _cptofcd("N", strlen("N"));
                    ftcs3 = _cptofcd("U", strlen("U"));

		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
		       	&x[fsupc], &incx);
		
		    CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#else
		    ztrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
		       	&x[fsupc], &incx);
		
		    zgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#endif
#else
		    zlsolve (nsupr, nsupc, &Lval[luptr], &x[fsupc]);
		
		    zmatvec (nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
                             &x[fsupc], &work[0] );
#endif		
		
		    iptr = istart + nsupc;
		    for (i = 0; i < nrow; ++i, ++iptr) {
			irow = L_SUB(iptr);
			z_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
			work[i] = comp_zero;

		    }
	 	}
	    } /* for k ... */
	    
	} else {
	    /* Form x := inv(U)*x */
	    
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = nsuper; k >= 0; k--) {
	    	fsupc = L_FST_SUPC(k);
                nsupr = L_SUB_END(fsupc) - L_SUB_START(fsupc);
                nsupc = L_LAST_SUPC(k) - fsupc;
	    	luptr = L_NZ_START(fsupc);
		
                /* 1 z_div costs 10 flops */
    	        solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;

		if ( nsupc == 1 ) {
		    z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
		    for (i = U_NZ_START(fsupc); i < U_NZ_END(fsupc); ++i) {
			irow = U_SUB(i);
			zz_mult(&comp_zero, &x[fsupc], &Uval[i]);
			z_sub(&x[irow], &x[irow], &comp_zero);
		    }
		} else {
#ifdef USE_VENDOR_BLAS
#if ( MACH==CRAY_PVP )
                    ftcs1 = _cptofcd("U", strlen("U"));
                    ftcs2 = _cptofcd("N", strlen("N"));
                    ftcs3 = _cptofcd("N", strlen("N"));

		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
		       &x[fsupc], &incx);
#else
		    ztrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
                           &x[fsupc], &incx);
#endif
#else		
		    zusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
#endif		

                    for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
		        solve_ops += 8*(U_NZ_END(jcol) - U_NZ_START(jcol));
		    	for (i = U_NZ_START(jcol); i < U_NZ_END(jcol); i++) {
			    irow = U_SUB(i);
			zz_mult(&comp_zero, &x[jcol], &Uval[i]);
			z_sub(&x[irow], &x[irow], &comp_zero);
		    	}
                    }
		}
	    } /* for k ... */
	    
	}
    } else if ( lsame_(trans, "T") ) { /* Form x := inv(A')*x */
	
	if ( lsame_(uplo, "L") ) {
	    /* Form x := inv(L')*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; --k) {
	    	fsupc = L_FST_SUPC(k);
	    	istart = L_SUB_START(fsupc);
                nsupr = L_SUB_END(fsupc) - istart;
                nsupc = L_LAST_SUPC(k) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		solve_ops += 8 * (nsupr - nsupc) * nsupc;

		for (jcol = fsupc; jcol < L_LAST_SUPC(k); jcol++) {
		    iptr = istart + nsupc;
		    for (i = L_NZ_START(jcol) + nsupc; 
				i < L_NZ_END(jcol); i++) {
			irow = L_SUB(iptr);
			zz_mult(&comp_zero, &x[irow], &Lval[i]);
		    	z_sub(&x[jcol], &x[jcol], &comp_zero);
			iptr++;
		    }
		}
		
		if ( nsupc > 1 ) {
		    solve_ops += 4 * nsupc * (nsupc - 1);
#ifdef _CRAY
                    ftcs1 = _cptofcd("L", strlen("L"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("U", strlen("U"));
		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#else
		    ztrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#endif
		}
	    }
	} else {
	    /* Form x := inv(U')*x */
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= nsuper; k++) {
	    	fsupc = L_FST_SUPC(k);
                nsupr = L_SUB_END(fsupc) - L_SUB_START(fsupc);
                nsupc = L_LAST_SUPC(k) - fsupc;
	    	luptr = L_NZ_START(fsupc);

                for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
                    for (i = U_NZ_START(jcol); i < U_NZ_END(jcol); i++) {
			irow = U_SUB(i);
			zz_mult(&comp_zero, &x[irow], &Uval[i]);
		    	z_sub(&x[jcol], &x[jcol], &comp_zero);
		    }
		}

                /* 1 z_div costs 10 flops */
		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;

		if ( nsupc == 1 ) {
		    z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
		} else {
#ifdef _CRAY
                    ftcs1 = _cptofcd("U", strlen("U"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("N", strlen("N"));
		    CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#else
		    ztrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#endif
		}
	    } /* for k ... */
	}
    } else { /* Form x := conj(inv(A'))*x */
	
	if ( lsame_(uplo, "L") ) {
	    /* Form x := conj(inv(L'))*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; --k) {
	    	fsupc = L_FST_SUPC(k);
	    	istart = L_SUB_START(fsupc);
	    	nsupr = L_SUB_END(fsupc) - istart;
	    	nsupc = L_LAST_SUPC(k) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		solve_ops += 8 * (nsupr - nsupc) * nsupc;

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    iptr = istart + nsupc;
		    for (i = L_NZ_START(jcol) + nsupc; 
				i < L_NZ_START(jcol+1); i++) {
			irow = L_SUB(iptr);
                        zz_conj(&temp, &Lval[i]);
			zz_mult(&comp_zero, &x[irow], &temp);
		    	z_sub(&x[jcol], &x[jcol], &comp_zero);
			iptr++;
		    }
 		}
 		
 		if ( nsupc > 1 ) {
		    solve_ops += 4 * nsupc * (nsupc - 1);
#ifdef _CRAY
                    ftcs1 = _cptofcd("L", strlen("L"));
                    ftcs2 = _cptofcd(trans, strlen("T"));
                    ftcs3 = _cptofcd("U", strlen("U"));
		    ZTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#else
                    ztrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr,
                           &x[fsupc], &incx);
#endif
		}
	    }
	} else {
	    /* Form x := conj(inv(U'))*x */
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= Lstore->nsuper; k++) {
	    	fsupc = L_FST_SUPC(k);
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
			irow = U_SUB(i);
                        zz_conj(&temp, &Uval[i]);
			zz_mult(&comp_zero, &x[irow], &temp);
		    	z_sub(&x[jcol], &x[jcol], &comp_zero);
		    }
		}

                /* 1 z_div costs 10 flops */
		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
 
		if ( nsupc == 1 ) {
                    zz_conj(&temp, &Lval[luptr]);
		    z_div(&x[fsupc], &x[fsupc], &temp);
		} else {
#ifdef _CRAY
                    ftcs1 = _cptofcd("U", strlen("U"));
                    ftcs2 = _cptofcd(trans, strlen("T"));
                    ftcs3 = _cptofcd("N", strlen("N"));
		    ZTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#else
                    ztrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr,
                               &x[fsupc], &incx);
#endif
  		}
  	    } /* for k ... */
  	}
    }

    SUPERLU_FREE(work);
    return 0;
}
Esempio n. 2
0
/*! \brief Solves one of the systems of equations A*x = b,   or   A'*x = b
 *
 * <pre>
 *   Purpose
 *   =======
 *
 *   sp_ztrsv() solves one of the systems of equations
 *       A*x = b,   or   A'*x = b,
 *   where b and x are n element vectors and A is a sparse unit , or
 *   non-unit, upper or lower triangular matrix.
 *   No test for singularity or near-singularity is included in this
 *   routine. Such tests must be performed before calling this routine.
 *
 *   Parameters
 *   ==========
 *
 *   uplo   - (input) char*
 *            On entry, uplo specifies whether the matrix is an upper or
 *             lower triangular matrix as follows:
 *                uplo = 'U' or 'u'   A is an upper triangular matrix.
 *                uplo = 'L' or 'l'   A is a lower triangular matrix.
 *
 *   trans  - (input) char*
 *             On entry, trans specifies the equations to be solved as
 *             follows:
 *                trans = 'N' or 'n'   A*x = b.
 *                trans = 'T' or 't'   A'*x = b.
 *                trans = 'C' or 'c'   A^H*x = b.
 *
 *   diag   - (input) char*
 *             On entry, diag specifies whether or not A is unit
 *             triangular as follows:
 *                diag = 'U' or 'u'   A is assumed to be unit triangular.
 *                diag = 'N' or 'n'   A is not assumed to be unit
 *                                    triangular.
 *
 *   L       - (input) SuperMatrix*
 *             The factor L from the factorization Pr*A*Pc=L*U. Use
 *             compressed row subscripts storage for supernodes,
 *             i.e., L has types: Stype = SC, Dtype = SLU_Z, Mtype = TRLU.
 *
 *   U       - (input) SuperMatrix*
 *              The factor U from the factorization Pr*A*Pc=L*U.
 *              U has types: Stype = NC, Dtype = SLU_Z, Mtype = TRU.
 *
 *   x       - (input/output) doublecomplex*
 *             Before entry, the incremented array X must contain the n
 *             element right-hand side vector b. On exit, X is overwritten
 *             with the solution vector x.
 *
 *   info    - (output) int*
 *             If *info = -i, the i-th argument had an illegal value.
 * </pre>
 */
int
sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L,
         SuperMatrix *U, doublecomplex *x, SuperLUStat_t *stat, int *info)
{
#ifdef _CRAY
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
         ftcs2 = _cptofcd("N", strlen("N")),
         ftcs3 = _cptofcd("U", strlen("U"));
#endif
    SCformat *Lstore;
    NCformat *Ustore;
    doublecomplex   *Lval, *Uval;
    int incx = 1, incy = 1;
    doublecomplex temp;
    doublecomplex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
    doublecomplex comp_zero = {0.0, 0.0};
    int nrow;
    int fsupc, nsupr, nsupc, luptr, istart, irow;
    int i, k, iptr, jcol;
    doublecomplex *work;
    flops_t solve_ops;

    /* Test the input parameters */
    *info = 0;
    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") &&
              !lsame_(trans, "C")) *info = -2;
    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
    if ( *info ) {
        i = -(*info);
        xerbla_("sp_ztrsv", &i);
        return 0;
    }

    Lstore = L->Store;
    Lval = Lstore->nzval;
    Ustore = U->Store;
    Uval = Ustore->nzval;
    solve_ops = 0;

    if ( !(work = doublecomplexCalloc(L->nrow)) )
        ABORT("Malloc fails for work in sp_ztrsv().");

    if ( lsame_(trans, "N") ) { /* Form x := inv(A)*x. */

        if ( lsame_(uplo, "L") ) {
            /* Form x := inv(L)*x */
            if ( L->nrow == 0 ) return 0; /* Quick return */

            for (k = 0; k <= Lstore->nsuper; k++) {
                fsupc = L_FST_SUPC(k);
                istart = L_SUB_START(fsupc);
                nsupr = L_SUB_START(fsupc+1) - istart;
                nsupc = L_FST_SUPC(k+1) - fsupc;
                luptr = L_NZ_START(fsupc);
                nrow = nsupr - nsupc;

                /* 1 z_div costs 10 flops */
                solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc;
                solve_ops += 8 * nrow * nsupc;

                if ( nsupc == 1 ) {
                    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
                        irow = L_SUB(iptr);
                        ++luptr;
                        zz_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
                        z_sub(&x[irow], &x[irow], &comp_zero);
                    }
                } else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
                        &x[fsupc], &incx);

                    CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
                        &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#else
                    ztrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
                        &x[fsupc], &incx);

                    zgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc],
                        &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#endif
#else
                    zlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);

                    zmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
                             &x[fsupc], &work[0] );
#endif

                    iptr = istart + nsupc;
                    for (i = 0; i < nrow; ++i, ++iptr) {
                        irow = L_SUB(iptr);
                        z_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
                        work[i] = comp_zero;

                    }
                }
            } /* for k ... */

        } else {
            /* Form x := inv(U)*x */

            if ( U->nrow == 0 ) return 0; /* Quick return */

            for (k = Lstore->nsuper; k >= 0; k--) {
                fsupc = L_FST_SUPC(k);
                nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
                nsupc = L_FST_SUPC(k+1) - fsupc;
                luptr = L_NZ_START(fsupc);

                /* 1 z_div costs 10 flops */
                solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;

                if ( nsupc == 1 ) {
                    z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
                    for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
                        irow = U_SUB(i);
                        zz_mult(&comp_zero, &x[fsupc], &Uval[i]);
                        z_sub(&x[irow], &x[irow], &comp_zero);
                    }
                } else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                    CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
                       &x[fsupc], &incx);
#else
                    ztrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
                           &x[fsupc], &incx);
#endif
#else
                    zusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
#endif

                    for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
                        solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
                        for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1);
                                i++) {
                            irow = U_SUB(i);
                        zz_mult(&comp_zero, &x[jcol], &Uval[i]);
                        z_sub(&x[irow], &x[irow], &comp_zero);
                        }
                    }
                }
            } /* for k ... */

        }
    } else if ( lsame_(trans, "T") ) { /* Form x := inv(A')*x */

        if ( lsame_(uplo, "L") ) {
            /* Form x := inv(L')*x */
            if ( L->nrow == 0 ) return 0; /* Quick return */

            for (k = Lstore->nsuper; k >= 0; --k) {
                fsupc = L_FST_SUPC(k);
                istart = L_SUB_START(fsupc);
                nsupr = L_SUB_START(fsupc+1) - istart;
                nsupc = L_FST_SUPC(k+1) - fsupc;
                luptr = L_NZ_START(fsupc);

                solve_ops += 8 * (nsupr - nsupc) * nsupc;

                for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
                    iptr = istart + nsupc;
                    for (i = L_NZ_START(jcol) + nsupc;
                                i < L_NZ_START(jcol+1); i++) {
                        irow = L_SUB(iptr);
                        zz_mult(&comp_zero, &x[irow], &Lval[i]);
                        z_sub(&x[jcol], &x[jcol], &comp_zero);
                        iptr++;
                    }
                }

                if ( nsupc > 1 ) {
                    solve_ops += 4 * nsupc * (nsupc - 1);
#ifdef _CRAY
                    ftcs1 = _cptofcd("L", strlen("L"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("U", strlen("U"));
                    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
                        &x[fsupc], &incx);
#else
                    ztrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
                        &x[fsupc], &incx);
#endif
                }
            }
        } else {
            /* Form x := inv(U')*x */
            if ( U->nrow == 0 ) return 0; /* Quick return */

            for (k = 0; k <= Lstore->nsuper; k++) {
                fsupc = L_FST_SUPC(k);
                nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
                nsupc = L_FST_SUPC(k+1) - fsupc;
                luptr = L_NZ_START(fsupc);

                for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
                    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
                    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
                        irow = U_SUB(i);
                        zz_mult(&comp_zero, &x[irow], &Uval[i]);
                        z_sub(&x[jcol], &x[jcol], &comp_zero);
                    }
                }

                /* 1 z_div costs 10 flops */
                solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;

                if ( nsupc == 1 ) {
                    z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
                } else {
#ifdef _CRAY
                    ftcs1 = _cptofcd("U", strlen("U"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("N", strlen("N"));
                    CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
                            &x[fsupc], &incx);
#else
                    ztrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
                            &x[fsupc], &incx);
#endif
                }
            } /* for k ... */
        }
    } else { /* Form x := conj(inv(A'))*x */

        if ( lsame_(uplo, "L") ) {
            /* Form x := conj(inv(L'))*x */
            if ( L->nrow == 0 ) return 0; /* Quick return */

            for (k = Lstore->nsuper; k >= 0; --k) {
                fsupc = L_FST_SUPC(k);
                istart = L_SUB_START(fsupc);
                nsupr = L_SUB_START(fsupc+1) - istart;
                nsupc = L_FST_SUPC(k+1) - fsupc;
                luptr = L_NZ_START(fsupc);

                solve_ops += 8 * (nsupr - nsupc) * nsupc;

                for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
                    iptr = istart + nsupc;
                    for (i = L_NZ_START(jcol) + nsupc;
                                i < L_NZ_START(jcol+1); i++) {
                        irow = L_SUB(iptr);
                        zz_conj(&temp, &Lval[i]);
                        zz_mult(&comp_zero, &x[irow], &temp);
                        z_sub(&x[jcol], &x[jcol], &comp_zero);
                        iptr++;
                    }
                }

                if ( nsupc > 1 ) {
                    solve_ops += 4 * nsupc * (nsupc - 1);
#ifdef _CRAY
                    ftcs1 = _cptofcd("L", strlen("L"));
                    ftcs2 = _cptofcd(trans, strlen("T"));
                    ftcs3 = _cptofcd("U", strlen("U"));
                    ZTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
                        &x[fsupc], &incx);
#else
                    ztrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr,
                           &x[fsupc], &incx);
#endif
                }
            }
        } else {
            /* Form x := conj(inv(U'))*x */
            if ( U->nrow == 0 ) return 0; /* Quick return */

            for (k = 0; k <= Lstore->nsuper; k++) {
                fsupc = L_FST_SUPC(k);
                nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
                nsupc = L_FST_SUPC(k+1) - fsupc;
                luptr = L_NZ_START(fsupc);

                for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
                    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
                    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
                        irow = U_SUB(i);
                        zz_conj(&temp, &Uval[i]);
                        zz_mult(&comp_zero, &x[irow], &temp);
                        z_sub(&x[jcol], &x[jcol], &comp_zero);
                    }
                }

                /* 1 z_div costs 10 flops */
                solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;

                if ( nsupc == 1 ) {
                    zz_conj(&temp, &Lval[luptr]);
                    z_div(&x[fsupc], &x[fsupc], &temp);
                } else {
#ifdef _CRAY
                    ftcs1 = _cptofcd("U", strlen("U"));
                    ftcs2 = _cptofcd(trans, strlen("T"));
                    ftcs3 = _cptofcd("N", strlen("N"));
                    ZTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
                            &x[fsupc], &incx);
#else
                    ztrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr,
                               &x[fsupc], &incx);
#endif
                }
            } /* for k ... */
        }
    }

    stat->ops[SOLVE] += solve_ops;
    SUPERLU_FREE(work);
    return 0;
}