Ejemplo n.º 1
0
void dd (header *hd)
{	header *st=hd,*hd1,*result;
	int c1,c2,i,j,r;
	double *m1,*m2,*mr;
	complex *mc1,*mc2,*mcr,hc1,hc2;
	interval *mi1,*mi2,*mir,hi1,hi2;
	hd1=next_param(st);
	equal_params_2(&hd,&hd1); if (error) return;
	getmatrix(hd,&r,&c1,&m1); if (r!=1) wrong_arg();
	getmatrix(hd1,&r,&c2,&m2); if (r!=1) wrong_arg();
	if (c1!=c2) wrong_arg();
	if (iscomplex(hd)) /* complex values */
	{	mc1=(complex *)m1; mc2=(complex *)m2;
		result=new_cmatrix(1,c1,""); if (error) return;
		mcr=(complex *)matrixof(result);
		memmove((char *)mcr,(char *)mc2,c1*sizeof(complex));
		for (i=1; i<c1; i++)
		{	for (j=c1-1; j>=i; j--)
			{	if (mc1[j][0]==mc1[j-i][0] &&
					mc1[j][1]==mc1[j-i][1]) wrong_arg();
				c_sub(mcr[j],mcr[j-1],hc1);
				c_sub(mc1[j],mc1[j-i],hc2);
				c_div(hc1,hc2,mcr[j]);
			}
		}
	}
	else if (isinterval(hd)) /* complex values */
	{	mi1=(complex *)m1; mi2=(complex *)m2;
		result=new_imatrix(1,c1,""); if (error) return;
		mir=(interval *)matrixof(result);
		memmove((char *)mir,(char *)mi2,c1*sizeof(interval));
		for (i=1; i<c1; i++)
		{	for (j=c1-1; j>=i; j--)
			{	i_sub(mir[j],mir[j-1],hi1);
				if (hi1[0]<=0 && hi1[1]>=0)
				{	output("Interval points coincide\n");
					error=1; return;
				}
				i_sub(mi1[j],mi1[j-i],hi2);
				i_div(hi1,hi2,mir[j]);
			}
		}
	}
	else if (isreal(hd))
	{	result=new_matrix(1,c1,""); if (error) return;
		mr=matrixof(result);
		memmove((char *)mr,(char *)m2,c1*sizeof(double));
		for (i=1; i<c1; i++)
		{	for (j=c1-1; j>=i; j--)
			{	if (m1[j]==m1[j-i]) wrong_arg();
				mr[j]=(mr[j]-mr[j-1])/(m1[j]-m1[j-i]);
			}
		}	
	}
	else wrong_arg();
	moveresult(st,result);
}
Ejemplo n.º 2
0
/**
 * \brief Expand pole product
 * \param c         resulting filter coefficients
 * \param poles     pole locations
 * \param K         number of poles
 * \ingroup vyv_gaussian
 *
 * This routine expands the product to obtain the filter coefficients:
 * \f[ \prod_{k=0}^{K-1}\frac{\mathrm{poles}[k]-1}{\mathrm{poles}[k]-z^{-1}}
 = \frac{c[0]}{1+\sum_{k=1}^K c[k] z^{-k}}. \f]
 */
static void expand_pole_product(double *c, const complex4c *poles, int K)
{
	complex4c denom[VYV_MAX_K + 1];
	int k, j;

	assert(K <= VYV_MAX_K);
	denom[0] = poles[0];
	denom[1] = make_complex(-1, 0);

	for (k = 1; k < K; ++k)
	{
		denom[k + 1] = c_neg(denom[k]);

		for (j = k; j > 0; --j)
			denom[j] = c_sub(c_mul(denom[j], poles[k]), denom[j - 1]);

		denom[0] = c_mul(denom[0], poles[k]);
	}

	for (k = 1; k <= K; ++k)
		c[k] = c_div(denom[k], denom[0]).real;

	for (c[0] = 1, k = 1; k <= K; ++k)
		c[0] += c[k];

	return;
}
Ejemplo n.º 3
0
Archivo: pol.c Proyecto: rforge/muste
static struct polynom *pol_div(struct polynom *p,struct polynom *q,struct polynom *p1,struct polynom *p2)
        {
        int i,j;
        struct complex z,z1;

		z.x=0; z.y=0; // RS 7.2.2013
        p->n=p1->n-p2->n;
        if (p->n<0)
            {
            sur_print("\nDegree of dividend < Degree of divisor");
            WAIT; return(p);
            }
        for (i=0; i<=p->n; ++i)
            p->a[i].x=p->a[i].y=0.0;
        for (i=0; i<=p1->n; ++i)
            { q->a[i].x=p1->a[i].x; q->a[i].y=p1->a[i].y; }

        for (i=p1->n; i>=p2->n; --i)
            {
            if (c_zero(&(q->a[i]))) continue;
            c_div(&z,&(q->a[i]),&(p2->a[p2->n]));
            p->a[i-p2->n].x=z.x; p->a[i-p2->n].y=z.y;
            q->a[i].x=q->a[i].y=0.0;
            for (j=i-1; j>=i-p2->n; --j)
                c_sub(&(q->a[j]),&(q->a[j]),
                      c_mult(&z1,&z,&(p2->a[p2->n-i+j])));
            }
        i=p2->n-1;
        while (c_zero(&(q->a[i])) && i>0) --i;
        q->n=i;
        return(p);
        }
Ejemplo n.º 4
0
void Dn_up(struct c_complex z,long nstop,struct c_complex*D)

/*:16*/
#line 216 "./mie.w"

{
struct c_complex zinv,k_over_z;
long k;

D[0]= c_inv(c_tan(z));
zinv= c_inv(z);

for(k= 1;k<nstop;k++){
k_over_z= c_smul((double)k,zinv);
D[k]= c_sub(c_inv(c_sub(k_over_z,D[k-1])),k_over_z);
}
}
Ejemplo n.º 5
0
    void CholeskySolver<TYPE>::CholeskyDecomposition(const BaseMatrix<TYPE> &bm)
    {
        assert(bm.Nrows() == bm.Ncols());

        int n = lm.Nrows();
        const TYPE &e = SimpleSolver<TYPE>::epsilon;
        TYPE temp;
        for (int i = 1; i <= n; ++i)
        {
            if (i == 1)
            {
                temp = bm(i, i);
            }
            else
            {
                temp = bm(i, i) - (c_sub(lm, i, i, 1, i - 1) * t(c_sub(lm, i, i, 1, i - 1)))(1, 1);
            }
            if (temp <= e)
            {
                LinearEquationSolver<TYPE>::fail = true;
                return;
            }
            else
            {
                lm(i, i) = std::sqrt(temp);
                for (int j = i + 1; j <= n; ++j)
                {
                    if (i == 1)
                    {
                        lm(j, i) = bm(j, i) / lm(i, i);
                    }
                    else
                    {
                        lm(j, i) = (bm(j, i) - (c_sub(lm, i, i, 1, i - 1) * t(c_sub(lm, j, j, 1, i - 1)))(1, 1)) / lm(i, i);
                    }
                }
            }
        }
    }
Ejemplo n.º 6
0
void polydd (header *hd)
{	header *st=hd,*hd1,*result;
	int c1,c2,i,j,r;
	double *m1,*m2,*mr,x;
	complex *mc1,*mc2,*mcr,hc,xc;
	hd1=next_param(st);
	equal_params_2(&hd,&hd1); if (error) return;
	getmatrix(hd,&r,&c1,&m1); if (r!=1) wrong_arg();
	getmatrix(hd1,&r,&c2,&m2); if (r!=1) wrong_arg();
	if (c1!=c2) wrong_arg();
	if (iscomplex(hd)) /* complex values */
	{	mc1=(complex *)m1; mc2=(complex *)m2;
		result=new_cmatrix(1,c1,""); if (error) return;
		mcr=(complex *)matrixof(result);
		c_copy(mcr[c1-1],mc2[c1-1]);
		for (i=c1-2; i>=0; i--)
		{	c_copy(xc,mc1[i]);
			c_mult(xc,mcr[i+1],hc);
			c_sub(mc2[i],hc,mcr[i]);
			for (j=i+1; j<c1-1; j++) 
			{	c_mult(xc,mcr[j+1],hc);
				c_sub(mcr[j],hc,mcr[j]);
			}
		}
	}
	else
	{	result=new_matrix(1,c1,""); if (error) return;
		mr=matrixof(result);
		mr[c1-1]=m2[c1-1];
		for (i=c1-2; i>=0; i--)
		{	x=m1[i];
			mr[i]=m2[i]-x*mr[i+1];
			for (j=i+1; j<c1-1; j++) mr[j]=mr[j]-x*mr[j+1];
		}
	}
	moveresult(st,result);
}
Ejemplo n.º 7
0
void Dn_down(struct c_complex z,long nstop,struct c_complex*D)

/*:19*/
#line 247 "./mie.w"

{
long k;
struct c_complex zinv,k_over_z;

D[nstop-1]= Lentz_Dn(z,nstop);
zinv= c_inv(z);

for(k= nstop-1;k>=1;k--){
k_over_z= c_smul((double)k,zinv);
D[k-1]= c_sub(k_over_z,c_inv(c_add(D[k],k_over_z)));
}
}
Ejemplo n.º 8
0
//傅里叶变化
void fft(int N,complex f[])
{
  complex t,wn;//中间变量
  int i,j,k,m,n,l,r,M;
  int la,lb,lc;
  /*----计算分解的级数M=log2(N)----*/
  for(i=N,M=1;(i=i/2)!=1;M++); 
  /*----按照倒位序重新排列原信号----*/
  for(i=1,j=N/2;i<=N-2;i++)
  {
    if(i<j)
    {
      t=f[j];
      f[j]=f[i];
      f[i]=t;
    }
    k=N/2;
    while(k<=j)
    {
      j=j-k;
      k=k/2;
    }
    j=j+k;
  }

  /*----FFT算法----*/
  for(m=1;m<=M;m++)
  {
    la=pow(2,m); //la=2^m代表第m级每个分组所含节点数		
    lb=la/2;    //lb代表第m级每个分组所含碟形单元数
                 //同时它也表示每个碟形单元上下节点之间的距离
    /*----碟形运算----*/
    for(l=1;l<=lb;l++)
    {
      r=(l-1)*pow(2,M-m);	
      for(n=l-1;n<N-1;n=n+la) //遍历每个分组,分组总数为N/la
      {
        lc=n+lb;  //n,lc分别代表一个碟形单元的上、下节点编号     
        Wn_i(N,r,&wn,1);//wn=Wnr
        c_mul(f[lc],wn,&t);//t = f[lc] * wn复数运算
        c_sub(f[n],t,&(f[lc]));//f[lc] = f[n] - f[lc] * Wnr
        c_plus(f[n],t,&(f[n]));//f[n] = f[n] + f[lc] * Wnr
      }
    }
  }
}
Ejemplo n.º 9
0
/*! \brief Check the inf-norm of the error vector 
 */
void cinf_norm_error(int nrhs, SuperMatrix *X, complex *xtrue)
{
    DNformat *Xstore;
    float err, xnorm;
    complex *Xmat, *soln_work;
    complex temp;
    int i, j;

    Xstore = X->Store;
    Xmat = Xstore->nzval;

    for (j = 0; j < nrhs; j++) {
      soln_work = &Xmat[j*Xstore->lda];
      err = xnorm = 0.0;
      for (i = 0; i < X->nrow; i++) {
        c_sub(&temp, &soln_work[i], &xtrue[i]);
	err = SUPERLU_MAX(err, c_abs(&temp));
	xnorm = SUPERLU_MAX(xnorm, c_abs(&soln_work[i]));
      }
      err = err / xnorm;
      printf("||X - Xtrue||/||X|| = %e\n", err);
    }
}
Ejemplo n.º 10
0
Archivo: pol.c Proyecto: rforge/muste
static struct polynom *pol_sub(struct polynom *p,struct polynom *p1,struct polynom *p2)
        {
        int i;
// RS REM        struct complex tulo;

        p->n=(p1->n>p2->n)? (p1->n):(p2->n);

        for (i=0; i<=p->n; ++i)
            {
            if (i<=p1->n)
                {
                if (i<=p2->n)
                    c_sub(&(p->a[i]),&(p1->a[i]),&(p2->a[i]));
                else
                    { p->a[i].x=p1->a[i].x; p->a[i].y=p1->a[i].y; }
                }
            else
                { p->a[i].x=-(p2->a[i].x); p->a[i].y=-(p2->a[i].y); }
            }
        i=p->n;
        while (c_zero(&(p->a[i])) && i>0) --i;
        p->n=i;
        return(p);
        }
Ejemplo n.º 11
0
/*! \brief Performs numeric block updates within the relaxed snode. 
 */
int
csnode_bmod (
	    const int  jcol,	  /* in */
	    const int  jsupno,    /* in */
	    const int  fsupc,     /* in */
	    complex     *dense,    /* in */
	    complex     *tempv,    /* working array */
	    GlobalLU_t *Glu,      /* modified */
	    SuperLUStat_t *stat   /* output */
	    )
{
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
	 ftcs2 = _cptofcd("N", strlen("N")),
	 ftcs3 = _cptofcd("U", strlen("U"));
#endif
    int            incx = 1, incy = 1;
    complex         alpha = {-1.0, 0.0},  beta = {1.0, 0.0};
#endif

    complex   comp_zero = {0.0, 0.0};
    int            luptr, nsupc, nsupr, nrow;
    int            isub, irow, i, iptr; 
    register int   ufirst, nextlu;
    int            *lsub, *xlsub;
    complex         *lusup;
    int            *xlusup;
    flops_t *ops = stat->ops;

    lsub    = Glu->lsub;
    xlsub   = Glu->xlsub;
    lusup   = (complex *) Glu->lusup;
    xlusup  = Glu->xlusup;

    nextlu = xlusup[jcol];
    
    /*
     *	Process the supernodal portion of L\U[*,j]
     */
    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
  	irow = lsub[isub];
	lusup[nextlu] = dense[irow];
        dense[irow] = comp_zero;
	++nextlu;
    }

    xlusup[jcol + 1] = nextlu;	/* Initialize xlusup for next column */
    
    if ( fsupc < jcol ) {

	luptr = xlusup[fsupc];
	nsupr = xlsub[fsupc+1] - xlsub[fsupc];
	nsupc = jcol - fsupc;	/* Excluding jcol */
	ufirst = xlusup[jcol];	/* Points to the beginning of column
				   jcol in supernode L\U(jsupno). */
	nrow = nsupr - nsupc;

	ops[TRSV] += 4 * nsupc * (nsupc - 1);
	ops[GEMV] += 8 * nrow * nsupc;

#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
	CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
	      &lusup[ufirst], &incx );
	CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
		&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
#if SCIPY_FIX
       if (nsupr < nsupc) {
           /* Fail early rather than passing in invalid parameters to TRSV. */
           ABORT("failed to factorize matrix");
       }
#endif
	ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
	      &lusup[ufirst], &incx );
	cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
		&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
	clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
	cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
			&lusup[ufirst], &tempv[0] );

        /* Scatter tempv[*] into lusup[*] */
	iptr = ufirst + nsupc;
	for (i = 0; i < nrow; i++) {
	    c_sub(&lusup[iptr], &lusup[iptr], &tempv[i]);
            ++iptr;
	    tempv[i] = comp_zero;
	}
#endif

    }

    return 0;
}
Ejemplo n.º 12
0
void LUdecomposition(long n, complex **A, complex *b, complex *x)
{

  long i,j,k,*p;
  complex akk, bk, z, sum; 
  complex *Ak; 


  /* Allocate auxiliary variables */

  p = (long *)Mem(MEM_ALLOC, n, sizeof(long)); 


  /* Gaussian elimination with partial pivoting */

  for (k=0; k<n-1; k++){ /* columns of A */


    /* --- Pivoting --- */

    akk.re = 0.0; 
    akk.im = 0.0; 
    j = k; 
    for (i=k; i<n; i++){
      if ( c_norm(A[i][k]) > c_norm(akk)){
	akk = A[i][k]; 

	CheckValue(FUNCTION_NAME, "A[i][k].re","", A[i][k].re, -INFTY, INFTY);
	CheckValue(FUNCTION_NAME, "A[i][k].im","", A[i][k].im, -INFTY, INFTY);

	j = i; 
      }
    }

    if (j != k){ /* swap rows j and k*/


      Ak   = A[k];  
      A[k] = A[j]; /* swap pointers*/
      A[j] = Ak; 

      bk   = b[k]; 
      b[k] = b[j]; 
      b[j] = bk; 

    }

    p[k] = j;  /* Keep list of permutations */

    /* Sanity check */

    if (akk.re != A[k][k].re && akk.im != A[k][k].im){
      Die(FUNCTION_NAME, "Something went wrong with pivoting?"); 
      return; 
    }

    if (akk.re == 0.0 && akk.im == 0.0){
      Die(FUNCTION_NAME, "Matrix singular?"); 
      return; 
    }

    /* Store L part */

    for (i=k+1;i<n; i++){
      A[i][k] = c_div(A[i][k],A[k][k]); 

      CheckValue(FUNCTION_NAME, "A[i][k].re","", A[i][k].re, -INFTY, INFTY);
      CheckValue(FUNCTION_NAME, "A[i][k].im","", A[i][k].im, -INFTY, INFTY);

    }

    /* Update */

    for (i=k+1; i<n; i++){

      /* update b */

      z = c_mul(A[i][k], b[k]);  
      b[i] = c_sub(b[i], z); 

      CheckValue(FUNCTION_NAME, "b[i].re","", b[i].re, -INFTY, INFTY);
      CheckValue(FUNCTION_NAME, "b[i].im","", b[i].im, -INFTY, INFTY);

      for (j=k+1;  j<n; j++){
	
	/* Update U part of A */
	
	z = c_mul( A[i][k], A[k][j]);
	A[i][j] = c_sub(A[i][j],z);  

	CheckValue(FUNCTION_NAME, "A[i][j].re","", A[i][j].re, -INFTY, INFTY);
	CheckValue(FUNCTION_NAME, "A[i][j].im","", A[i][j].im, -INFTY, INFTY);
      }
    }        
  }


  /* Solve x */

  x[n-1] = c_div(b[n-1],A[n-1][n-1]);

  for (i=n-2; i>=0; i--){
    sum.re = 0.0;
    sum.im = 0.0;
    for (j=i+1; j<n; j++){
      z   = c_mul(A[i][j], x[j]);
      sum = c_add(sum, z);
    }
    z    = c_sub(b[i], sum);
    x[i] = c_div(z, A[i][i]);
  }

  Mem(MEM_FREE, p); 

  /* Return */

  return;   
    
  
}
Ejemplo n.º 13
0
int
sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
	 SuperMatrix *U, complex *x, int *info)
{
/*
 *   Purpose
 *   =======
 *
 *   sp_ctrsv() solves one of the systems of equations   
 *       A*x = b,   or   A'*x = b,
 *   where b and x are n element vectors and A is a sparse unit , or   
 *   non-unit, upper or lower triangular matrix.   
 *   No test for singularity or near-singularity is included in this   
 *   routine. Such tests must be performed before calling this routine.   
 *
 *   Parameters   
 *   ==========   
 *
 *   uplo   - (input) char*
 *            On entry, uplo specifies whether the matrix is an upper or   
 *             lower triangular matrix as follows:   
 *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
 *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
 *
 *   trans  - (input) char*
 *             On entry, trans specifies the equations to be solved as   
 *             follows:   
 *                trans = 'N' or 'n'   A*x = b.   
 *                trans = 'T' or 't'   A'*x = b.   
 *                trans = 'C' or 'c'   A'*x = b.   
 *
 *   diag   - (input) char*
 *             On entry, diag specifies whether or not A is unit   
 *             triangular as follows:   
 *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
 *                diag = 'N' or 'n'   A is not assumed to be unit   
 *                                    triangular.   
 *	     
 *   L       - (input) SuperMatrix*
 *	       The factor L from the factorization Pr*A*Pc=L*U. Use
 *             compressed row subscripts storage for supernodes,
 *             i.e., L has types: Stype = SC, Dtype = SLU_C, Mtype = TRLU.
 *
 *   U       - (input) SuperMatrix*
 *	        The factor U from the factorization Pr*A*Pc=L*U.
 *	        U has types: Stype = NC, Dtype = SLU_C, Mtype = TRU.
 *    
 *   x       - (input/output) complex*
 *             Before entry, the incremented array X must contain the n   
 *             element right-hand side vector b. On exit, X is overwritten 
 *             with the solution vector x.
 *
 *   info    - (output) int*
 *             If *info = -i, the i-th argument had an illegal value.
 *
 */
#ifdef _CRAY
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
	 ftcs2 = _cptofcd("N", strlen("N")),
	 ftcs3 = _cptofcd("U", strlen("U"));
#endif
    SCformat *Lstore;
    NCformat *Ustore;
    complex   *Lval, *Uval;
    int incx = 1, incy = 1;
    complex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
    complex comp_zero = {0.0, 0.0};
    int nrow;
    int fsupc, nsupr, nsupc, luptr, istart, irow;
    int i, k, iptr, jcol;
    complex *work;
    flops_t solve_ops;
    extern SuperLUStat_t SuperLUStat;

    /* Test the input parameters */
    *info = 0;
    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") ) *info = -2;
    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
    if ( *info ) {
	i = -(*info);
	xerbla_("sp_ctrsv", &i);
	return 0;
    }

    Lstore = L->Store;
    Lval = Lstore->nzval;
    Ustore = U->Store;
    Uval = Ustore->nzval;
    solve_ops = 0;

    if ( !(work = complexCalloc(L->nrow)) )
	ABORT("Malloc fails for work in sp_ctrsv().");
    
    if ( lsame_(trans, "N") ) {	/* Form x := inv(A)*x. */
	
	if ( lsame_(uplo, "L") ) {
	    /* Form x := inv(L)*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= Lstore->nsuper; k++) {
		fsupc = L_FST_SUPC(k);
		istart = L_SUB_START(fsupc);
		nsupr = L_SUB_START(fsupc+1) - istart;
		nsupc = L_FST_SUPC(k+1) - fsupc;
		luptr = L_NZ_START(fsupc);
		nrow = nsupr - nsupc;

	        solve_ops += 4 * nsupc * (nsupc - 1);
	        solve_ops += 8 * nrow * nsupc;

		if ( nsupc == 1 ) {
		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
			irow = L_SUB(iptr);
			++luptr;
			cc_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
			c_sub(&x[irow], &x[irow], &comp_zero);
		    }
		} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
		       	&x[fsupc], &incx);
		
		    CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#else
		    ctrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
		       	&x[fsupc], &incx);
		
		    cgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#endif
#else
		    clsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
		
		    cmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
			&x[fsupc], &work[0] );
#endif		
		
		    iptr = istart + nsupc;
		    for (i = 0; i < nrow; ++i, ++iptr) {
			irow = L_SUB(iptr);
			c_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
			work[i] = comp_zero;

		    }
	 	}
	    } /* for k ... */
	    
	} else {
	    /* Form x := inv(U)*x */
	    
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; k--) {
	    	fsupc = L_FST_SUPC(k);
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);
		
    	        solve_ops += 4 * nsupc * (nsupc + 1);

		if ( nsupc == 1 ) {
		    c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
		    for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
			irow = U_SUB(i);
			cc_mult(&comp_zero, &x[fsupc], &Uval[i]);
			c_sub(&x[irow], &x[irow], &comp_zero);
		    }
		} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		    CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
		       &x[fsupc], &incx);
#else
		    ctrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
		       &x[fsupc], &incx);
#endif
#else		
		    cusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
#endif		

		    for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		        solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    	for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
				i++) {
			    irow = U_SUB(i);
			cc_mult(&comp_zero, &x[jcol], &Uval[i]);
			c_sub(&x[irow], &x[irow], &comp_zero);
		    	}
                    }
		}
	    } /* for k ... */
	    
	}
    } else { /* Form x := inv(A')*x */
	
	if ( lsame_(uplo, "L") ) {
	    /* Form x := inv(L')*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; --k) {
	    	fsupc = L_FST_SUPC(k);
	    	istart = L_SUB_START(fsupc);
	    	nsupr = L_SUB_START(fsupc+1) - istart;
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		solve_ops += 8 * (nsupr - nsupc) * nsupc;

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    iptr = istart + nsupc;
		    for (i = L_NZ_START(jcol) + nsupc; 
				i < L_NZ_START(jcol+1); i++) {
			irow = L_SUB(iptr);
			cc_mult(&comp_zero, &x[irow], &Lval[i]);
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
			iptr++;
		    }
		}
		
		if ( nsupc > 1 ) {
		    solve_ops += 4 * nsupc * (nsupc - 1);
#ifdef _CRAY
                    ftcs1 = _cptofcd("L", strlen("L"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("U", strlen("U"));
		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#else
		    ctrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#endif
		}
	    }
	} else {
	    /* Form x := inv(U')*x */
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= Lstore->nsuper; k++) {
	    	fsupc = L_FST_SUPC(k);
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
			irow = U_SUB(i);
			cc_mult(&comp_zero, &x[irow], &Uval[i]);
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
		    }
		}

		solve_ops += 4 * nsupc * (nsupc + 1);

		if ( nsupc == 1 ) {
		    c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
		} else {
#ifdef _CRAY
                    ftcs1 = _cptofcd("U", strlen("U"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("N", strlen("N"));
		    CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#else
		    ctrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#endif
		}
	    } /* for k ... */
	}
    }

    SuperLUStat.ops[SOLVE] += solve_ops;
    SUPERLU_FREE(work);
    return 0;
}
Ejemplo n.º 14
0
/*! \brief Solves one of the systems of equations A*x = b,   or   A'*x = b
 * 
 * <pre>
 *   Purpose
 *   =======
 *
 *   sp_ctrsv() solves one of the systems of equations   
 *       A*x = b,   or   A'*x = b,
 *   where b and x are n element vectors and A is a sparse unit , or   
 *   non-unit, upper or lower triangular matrix.   
 *   No test for singularity or near-singularity is included in this   
 *   routine. Such tests must be performed before calling this routine.   
 *
 *   Parameters   
 *   ==========   
 *
 *   uplo   - (input) char*
 *            On entry, uplo specifies whether the matrix is an upper or   
 *             lower triangular matrix as follows:   
 *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
 *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
 *
 *   trans  - (input) char*
 *             On entry, trans specifies the equations to be solved as   
 *             follows:   
 *                trans = 'N' or 'n'   A*x = b.   
 *                trans = 'T' or 't'   A'*x = b.
 *                trans = 'C' or 'c'   A^H*x = b.   
 *
 *   diag   - (input) char*
 *             On entry, diag specifies whether or not A is unit   
 *             triangular as follows:   
 *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
 *                diag = 'N' or 'n'   A is not assumed to be unit   
 *                                    triangular.   
 *	     
 *   L       - (input) SuperMatrix*
 *	       The factor L from the factorization Pr*A*Pc=L*U. Use
 *             compressed row subscripts storage for supernodes,
 *             i.e., L has types: Stype = SC, Dtype = SLU_C, Mtype = TRLU.
 *
 *   U       - (input) SuperMatrix*
 *	        The factor U from the factorization Pr*A*Pc=L*U.
 *	        U has types: Stype = NC, Dtype = SLU_C, Mtype = TRU.
 *    
 *   x       - (input/output) complex*
 *             Before entry, the incremented array X must contain the n   
 *             element right-hand side vector b. On exit, X is overwritten 
 *             with the solution vector x.
 *
 *   info    - (output) int*
 *             If *info = -i, the i-th argument had an illegal value.
 * </pre>
 */
int
sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
         SuperMatrix *U, complex *x, SuperLUStat_t *stat, int *info)
{
#ifdef _CRAY
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
	 ftcs2 = _cptofcd("N", strlen("N")),
	 ftcs3 = _cptofcd("U", strlen("U"));
#endif
    SCformat *Lstore;
    NCformat *Ustore;
    complex   *Lval, *Uval;
    int incx = 1, incy = 1;
    complex temp;
    complex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
    complex comp_zero = {0.0, 0.0};
    int nrow;
    int fsupc, nsupr, nsupc, luptr, istart, irow;
    int i, k, iptr, jcol;
    complex *work;
    flops_t solve_ops;

    /* Test the input parameters */
    *info = 0;
    if ( strncmp(uplo,"L", 1)!=0 && strncmp(uplo, "U", 1)!=0 ) *info = -1;
    else if ( strncmp(trans, "N", 1)!=0 && strncmp(trans, "T", 1)!=0 && 
              strncmp(trans, "C", 1)!=0) *info = -2;
    else if ( strncmp(diag, "U", 1)!=0 && strncmp(diag, "N", 1)!=0 )
         *info = -3;
    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
    if ( *info ) {
	i = -(*info);
	input_error("sp_ctrsv", &i);
	return 0;
    }

    Lstore = L->Store;
    Lval = Lstore->nzval;
    Ustore = U->Store;
    Uval = Ustore->nzval;
    solve_ops = 0;

    if ( !(work = complexCalloc(L->nrow)) )
	ABORT("Malloc fails for work in sp_ctrsv().");
    
    if ( strncmp(trans, "N", 1)==0 ) {	/* Form x := inv(A)*x. */
	
	if ( strncmp(uplo, "L", 1)==0 ) {
	    /* Form x := inv(L)*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= Lstore->nsuper; k++) {
		fsupc = L_FST_SUPC(k);
		istart = L_SUB_START(fsupc);
		nsupr = L_SUB_START(fsupc+1) - istart;
		nsupc = L_FST_SUPC(k+1) - fsupc;
		luptr = L_NZ_START(fsupc);
		nrow = nsupr - nsupc;

                /* 1 c_div costs 10 flops */
	        solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc;
	        solve_ops += 8 * nrow * nsupc;

		if ( nsupc == 1 ) {
		    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
			irow = L_SUB(iptr);
			++luptr;
			cc_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
			c_sub(&x[irow], &x[irow], &comp_zero);
		    }
		} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
		       	&x[fsupc], &incx);
		
		    CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#else
		    ctrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
		       	&x[fsupc], &incx);
		
		    cgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
		       	&nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
#endif
#else
		    clsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
		
		    cmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
                             &x[fsupc], &work[0] );
#endif		
		
		    iptr = istart + nsupc;
		    for (i = 0; i < nrow; ++i, ++iptr) {
			irow = L_SUB(iptr);
			c_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
			work[i] = comp_zero;

		    }
	 	}
	    } /* for k ... */
	    
	} else {
	    /* Form x := inv(U)*x */
	    
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; k--) {
	    	fsupc = L_FST_SUPC(k);
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);
		
                /* 1 c_div costs 10 flops */
    	        solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;

		if ( nsupc == 1 ) {
		    c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
		    for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
			irow = U_SUB(i);
			cc_mult(&comp_zero, &x[fsupc], &Uval[i]);
			c_sub(&x[irow], &x[irow], &comp_zero);
		    }
		} else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		    CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
		       &x[fsupc], &incx);
#else
		    ctrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
                           &x[fsupc], &incx);
#endif
#else		
		    cusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
#endif		

		    for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		        solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    	for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
				i++) {
			    irow = U_SUB(i);
			cc_mult(&comp_zero, &x[jcol], &Uval[i]);
			c_sub(&x[irow], &x[irow], &comp_zero);
		    	}
                    }
		}
	    } /* for k ... */
	    
	}
    } else if ( strncmp(trans, "T", 1)==0 ) { /* Form x := inv(A')*x */
	
	if ( strncmp(uplo, "L", 1)==0 ) {
	    /* Form x := inv(L')*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; --k) {
	    	fsupc = L_FST_SUPC(k);
	    	istart = L_SUB_START(fsupc);
	    	nsupr = L_SUB_START(fsupc+1) - istart;
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		solve_ops += 8 * (nsupr - nsupc) * nsupc;

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    iptr = istart + nsupc;
		    for (i = L_NZ_START(jcol) + nsupc; 
				i < L_NZ_START(jcol+1); i++) {
			irow = L_SUB(iptr);
			cc_mult(&comp_zero, &x[irow], &Lval[i]);
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
			iptr++;
		    }
		}
		
		if ( nsupc > 1 ) {
		    solve_ops += 4 * nsupc * (nsupc - 1);
#ifdef _CRAY
                    ftcs1 = _cptofcd("L", strlen("L"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("U", strlen("U"));
		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#else
		    ctrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#endif
		}
	    }
	} else {
	    /* Form x := inv(U')*x */
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= Lstore->nsuper; k++) {
	    	fsupc = L_FST_SUPC(k);
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
			irow = U_SUB(i);
			cc_mult(&comp_zero, &x[irow], &Uval[i]);
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
		    }
		}

                /* 1 c_div costs 10 flops */
		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;

		if ( nsupc == 1 ) {
		    c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
		} else {
#ifdef _CRAY
                    ftcs1 = _cptofcd("U", strlen("U"));
                    ftcs2 = _cptofcd("T", strlen("T"));
                    ftcs3 = _cptofcd("N", strlen("N"));
		    CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#else
		    ctrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#endif
		}
	    } /* for k ... */
	}
    } else { /* Form x := conj(inv(A'))*x */
	
	if ( strncmp(uplo, "L", 1)==0 ) {
	    /* Form x := conj(inv(L'))*x */
    	    if ( L->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = Lstore->nsuper; k >= 0; --k) {
	    	fsupc = L_FST_SUPC(k);
	    	istart = L_SUB_START(fsupc);
	    	nsupr = L_SUB_START(fsupc+1) - istart;
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		solve_ops += 8 * (nsupr - nsupc) * nsupc;

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    iptr = istart + nsupc;
		    for (i = L_NZ_START(jcol) + nsupc; 
				i < L_NZ_START(jcol+1); i++) {
			irow = L_SUB(iptr);
                        cc_conj(&temp, &Lval[i]);
			cc_mult(&comp_zero, &x[irow], &temp);
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
			iptr++;
		    }
 		}
 		
 		if ( nsupc > 1 ) {
		    solve_ops += 4 * nsupc * (nsupc - 1);
#ifdef _CRAY
                    ftcs1 = _cptofcd("L", strlen("L"));
                    ftcs2 = _cptofcd(trans, strlen("T"));
                    ftcs3 = _cptofcd("U", strlen("U"));
		    CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			&x[fsupc], &incx);
#else
                    ctrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr,
                           &x[fsupc], &incx);
#endif
		}
	    }
	} else {
	    /* Form x := conj(inv(U'))*x */
	    if ( U->nrow == 0 ) return 0; /* Quick return */
	    
	    for (k = 0; k <= Lstore->nsuper; k++) {
	    	fsupc = L_FST_SUPC(k);
	    	nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
	    	nsupc = L_FST_SUPC(k+1) - fsupc;
	    	luptr = L_NZ_START(fsupc);

		for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
		    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
		    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
			irow = U_SUB(i);
                        cc_conj(&temp, &Uval[i]);
			cc_mult(&comp_zero, &x[irow], &temp);
		    	c_sub(&x[jcol], &x[jcol], &comp_zero);
		    }
		}

                /* 1 c_div costs 10 flops */
		solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
 
		if ( nsupc == 1 ) {
                    cc_conj(&temp, &Lval[luptr]);
		    c_div(&x[fsupc], &x[fsupc], &temp);
		} else {
#ifdef _CRAY
                    ftcs1 = _cptofcd("U", strlen("U"));
                    ftcs2 = _cptofcd(trans, strlen("T"));
                    ftcs3 = _cptofcd("N", strlen("N"));
		    CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
			    &x[fsupc], &incx);
#else
                    ctrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr,
                               &x[fsupc], &incx);
#endif
  		}
  	    } /* for k ... */
  	}
    }

    stat->ops[SOLVE] += solve_ops;
    SUPERLU_FREE(work);
    return 0;
}
Ejemplo n.º 15
0
void
cgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
        int *perm_c, int *perm_r, SuperMatrix *B,
        SuperLUStat_t *stat, int *info)
{

#ifdef _CRAY
    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
#endif
    int      incx = 1, incy = 1;
#ifdef USE_VENDOR_BLAS
    complex   alpha = {1.0, 0.0}, beta = {1.0, 0.0};
    complex   *work_col;
#endif
    complex   temp_comp;
    DNformat *Bstore;
    complex   *Bmat;
    SCformat *Lstore;
    NCformat *Ustore;
    complex   *Lval, *Uval;
    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
    int      i, j, k, iptr, jcol, n, ldb, nrhs;
    complex   *work, *rhs_work, *soln;
    flops_t  solve_ops;
    void cprint_soln();

    /* Test input parameters ... */
    *info = 0;
    Bstore = B->Store;
    ldb = Bstore->lda;
    nrhs = B->ncol;
    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
    else if ( L->nrow != L->ncol || L->nrow < 0 ||
              L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU )
        *info = -2;
    else if ( U->nrow != U->ncol || U->nrow < 0 ||
              U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU )
        *info = -3;
    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
              B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
        *info = -6;
    if ( *info ) {
        i = -(*info);
        xerbla_("cgstrs", &i);
        return;
    }

    n = L->nrow;
    work = complexCalloc(n * nrhs);
    if ( !work ) ABORT("Malloc fails for local work[].");
    soln = complexMalloc(n);
    if ( !soln ) ABORT("Malloc fails for local soln[].");

    Bmat = Bstore->nzval;
    Lstore = L->Store;
    Lval = Lstore->nzval;
    Ustore = U->Store;
    Uval = Ustore->nzval;
    solve_ops = 0;

    if ( trans == NOTRANS ) {
        /* Permute right hand sides to form Pr*B */
        for (i = 0; i < nrhs; i++) {
            rhs_work = &Bmat[i*ldb];
            for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
            for (k = 0; k < n; k++) rhs_work[k] = soln[k];
        }

        /* Forward solve PLy=Pb. */
        for (k = 0; k <= Lstore->nsuper; k++) {
            fsupc = L_FST_SUPC(k);
            istart = L_SUB_START(fsupc);
            nsupr = L_SUB_START(fsupc+1) - istart;
            nsupc = L_FST_SUPC(k+1) - fsupc;
            nrow = nsupr - nsupc;

            solve_ops += 4 * nsupc * (nsupc - 1) * nrhs;
            solve_ops += 8 * nrow * nsupc * nrhs;

            if ( nsupc == 1 ) {
                for (j = 0; j < nrhs; j++) {
                    rhs_work = &Bmat[j*ldb];
                    luptr = L_NZ_START(fsupc);
                    for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
                        irow = L_SUB(iptr);
                        ++luptr;
                        cc_mult(&temp_comp, &rhs_work[fsupc], &Lval[luptr]);
                        c_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
                    }
                }
            } else {
                luptr = L_NZ_START(fsupc);
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                ftcs1 = _cptofcd("L", strlen("L"));
                ftcs2 = _cptofcd("N", strlen("N"));
                ftcs3 = _cptofcd("U", strlen("U"));
                CTRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
                       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);

                CGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha,
                        &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
                        &beta, &work[0], &n );
#else
                ctrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
                       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);

                cgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha,
                        &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb,
                        &beta, &work[0], &n );
#endif
                for (j = 0; j < nrhs; j++) {
                    rhs_work = &Bmat[j*ldb];
                    work_col = &work[j*n];
                    iptr = istart + nsupc;
                    for (i = 0; i < nrow; i++) {
                        irow = L_SUB(iptr);
                        c_sub(&rhs_work[irow], &rhs_work[irow], &work_col[i]);
                        work_col[i].r = 0.0;
                        work_col[i].i = 0.0;
                        iptr++;
                    }
                }
#else
                for (j = 0; j < nrhs; j++) {
                    rhs_work = &Bmat[j*ldb];
                    clsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
                    cmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
                            &rhs_work[fsupc], &work[0] );

                    iptr = istart + nsupc;
                    for (i = 0; i < nrow; i++) {
                        irow = L_SUB(iptr);
                        c_sub(&rhs_work[irow], &rhs_work[irow], &work[i]);
                        work[i].r = 0.;
                        work[i].i = 0.;
                        iptr++;
                    }
                }
#endif
            } /* else ... */
        } /* for L-solve */

#ifdef DEBUG
        printf("After L-solve: y=\n");
        cprint_soln(n, nrhs, Bmat);
#endif

        /*
         * Back solve Ux=y.
         */
        for (k = Lstore->nsuper; k >= 0; k--) {
            fsupc = L_FST_SUPC(k);
            istart = L_SUB_START(fsupc);
            nsupr = L_SUB_START(fsupc+1) - istart;
            nsupc = L_FST_SUPC(k+1) - fsupc;
            luptr = L_NZ_START(fsupc);

            solve_ops += 4 * nsupc * (nsupc + 1) * nrhs;

            if ( nsupc == 1 ) {
                rhs_work = &Bmat[0];
                for (j = 0; j < nrhs; j++) {
                    c_div(&rhs_work[fsupc], &rhs_work[fsupc], &Lval[luptr]);
                    rhs_work += ldb;
                }
            } else {
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                ftcs1 = _cptofcd("L", strlen("L"));
                ftcs2 = _cptofcd("U", strlen("U"));
                ftcs3 = _cptofcd("N", strlen("N"));
                CTRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
                       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
#else
                ctrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
                       &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
#endif
#else
                for (j = 0; j < nrhs; j++)
                    cusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
#endif
            }

            for (j = 0; j < nrhs; ++j) {
                rhs_work = &Bmat[j*ldb];
                for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
                    solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
                    for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
                        irow = U_SUB(i);
                        cc_mult(&temp_comp, &rhs_work[jcol], &Uval[i]);
                        c_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
                    }
                }
            }

        } /* for U-solve */

#ifdef DEBUG
        printf("After U-solve: x=\n");
        cprint_soln(n, nrhs, Bmat);
#endif

        /* Compute the final solution X := Pc*X. */
        for (i = 0; i < nrhs; i++) {
            rhs_work = &Bmat[i*ldb];
            for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
            for (k = 0; k < n; k++) rhs_work[k] = soln[k];
        }

        stat->ops[SOLVE] = solve_ops;

    } else { /* Solve A'*X=B or CONJ(A)*X=B */
        /* Permute right hand sides to form Pc'*B. */
        for (i = 0; i < nrhs; i++) {
            rhs_work = &Bmat[i*ldb];
            for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
            for (k = 0; k < n; k++) rhs_work[k] = soln[k];
        }

        stat->ops[SOLVE] = 0;
        if (trans == TRANS) {
            for (k = 0; k < nrhs; ++k) {
                /* Multiply by inv(U'). */
                sp_ctrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);

                /* Multiply by inv(L'). */
                sp_ctrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
            }
         } else { /* trans == CONJ */
            for (k = 0; k < nrhs; ++k) {
                /* Multiply by conj(inv(U')). */
                sp_ctrsv("U", "C", "N", L, U, &Bmat[k*ldb], stat, info);

                /* Multiply by conj(inv(L')). */
                sp_ctrsv("L", "C", "U", L, U, &Bmat[k*ldb], stat, info);
            }
         }
        /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
        for (i = 0; i < nrhs; i++) {
            rhs_work = &Bmat[i*ldb];
            for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
            for (k = 0; k < n; k++) rhs_work[k] = soln[k];
        }

    }

    SUPERLU_FREE(work);
    SUPERLU_FREE(soln);
}
Ejemplo n.º 16
0
void Mie(double x,struct c_complex m,double*mu,long nangles,struct c_complex*s1,
struct c_complex*s2,double*qext,double*qsca,double*qback,double*g)

/*:34*/
#line 519 "./mie.w"


{
/*36:*/
#line 546 "./mie.w"

struct c_complex*D;
struct c_complex z1,an,bn,bnm1,anm1,qbcalc;
double*pi0,*pi1,*tau;
struct c_complex xi,xi0,xi1;
double psi,psi0,psi1;
double alpha,beta,factor;
long n,k,nstop,sign;
*qext= -1;
*qsca= -1;
*qback= -1;
*g= -1;

/*:36*/
#line 522 "./mie.w"


/*37:*/
#line 559 "./mie.w"

if(m.im> 0.0){
mie_error("This program requires m.im>=0",1);
return;
}
if(x<=0.0){
mie_error("This program requires positive sphere sizes",2);
return;
}
if(nangles<0){
mie_error("This program requires non-negative angle sizes",3);
return;
}
if(nangles<0){
mie_error("This program requires non-negative angle sizes",4);
return;
}
if((nangles> 0)&&(s1==NULL)){
mie_error("Space must be allocated for s1 if nangles!=0",5);
return;
}
if((nangles> 0)&&(s2==NULL)){
mie_error("Space must be allocated for s2if nangles!=0",6);
return;
}
if(x> 20000){
mie_error("Program not validated for spheres with x>20000",7);
return;
}

/*:37*/
#line 524 "./mie.w"

/*38:*/
#line 589 "./mie.w"

if((m.re==0)&&(x<0.1)){
small_conducting_Mie(x,m,mu,nangles,s1,s2,qext,qsca,qback,g);
return;
}

if((m.re> 0.0)&&(c_abs(m)*x<0.1)){
small_Mie(x,m,mu,nangles,s1,s2,qext,qsca,qback,g);
return;
}

/*:38*/
#line 525 "./mie.w"


/*40:*/
#line 616 "./mie.w"

nstop= floor(x+4.05*pow(x,0.33333)+2.0);

/*:40*/
#line 527 "./mie.w"


/*39:*/
#line 600 "./mie.w"

if(nangles> 0){
set_carray(s1,nangles,c_set(0.0,0.0));
set_carray(s2,nangles,c_set(0.0,0.0));

pi0= new_darray(nangles);
pi1= new_darray(nangles);
tau= new_darray(nangles);

set_darray(pi0,nangles,0.0);
set_darray(tau,nangles,0.0);
set_darray(pi1,nangles,1.0);
}

/*:39*/
#line 529 "./mie.w"

if(m.re> 0)
/*41:*/
#line 634 "./mie.w"

{
struct c_complex z;

z= c_smul(x,m);

D= new_carray(nstop+1);
if(D==NULL){
mie_error("Cannot allocate log array",8);
return;
}

if(fabs(m.im*x)<((13.78*m.re-10.8)*m.re+3.9))
Dn_up(z,nstop,D);
else
Dn_down(z,nstop,D);
}

/*:41*/
#line 531 "./mie.w"


/*42:*/
#line 671 "./mie.w"

psi0= sin(x);
psi1= psi0/x-cos(x);
xi0= c_set(psi0,cos(x));
xi1= c_set(psi1,cos(x)/x+sin(x));
*qsca= 0.0;
*g= 0.0;
*qext= 0.0;
sign= 1;
qbcalc= c_set(0.0,0.0);
anm1= c_set(0.0,0.0);
bnm1= c_set(0.0,0.0);

/*:42*/
#line 533 "./mie.w"


for(n= 1;n<=nstop;n++){
/*43:*/
#line 696 "./mie.w"

if(m.re==0.0){
an= c_sdiv(n*psi1/x-psi0,c_sub(c_smul(n/x,xi1),xi0));
bn= c_sdiv(psi1,xi1);
}else if(m.im==0.0){
z1.re= D[n].re/m.re+n/x;
an= c_sdiv(z1.re*psi1-psi0,c_sub(c_smul(z1.re,xi1),xi0));

z1.re= D[n].re*m.re+n/x;
bn= c_sdiv(z1.re*psi1-psi0,c_sub(c_smul(z1.re,xi1),xi0));
}else{
z1= c_div(D[n],m);
z1.re+= n/x;
an= c_div(c_set(z1.re*psi1-psi0,z1.im*psi1),c_sub(c_mul(z1,xi1),xi0));

z1= c_mul(D[n],m);
z1.re+= n/x;
bn= c_div(c_set(z1.re*psi1-psi0,z1.im*psi1),c_sub(c_mul(z1,xi1),xi0));
}

/*:43*/
#line 536 "./mie.w"

/*44:*/
#line 734 "./mie.w"

for(k= 0;k<nangles;k++){
factor= (2.0*n+1.0)/(n+1.0)/n;
tau[k]= n*mu[k]*pi1[k]-(n+1)*pi0[k];
alpha= factor*pi1[k];
beta= factor*tau[k];
s1[k].re+= alpha*an.re+beta*bn.re;
s1[k].im+= alpha*an.im+beta*bn.im;
s2[k].re+= alpha*bn.re+beta*an.re;
s2[k].im+= alpha*bn.im+beta*an.im;
}

for(k= 0;k<nangles;k++){
factor= pi1[k];
pi1[k]= ((2.0*n+1.0)*mu[k]*pi1[k]-(n+1.0)*pi0[k])/n;
pi0[k]= factor;
}

/*:44*/
#line 537 "./mie.w"

/*45:*/
#line 780 "./mie.w"

factor= 2.0*n+1.0;
*g+= (n-1.0/n)*(anm1.re*an.re+anm1.im*an.im+bnm1.re*bn.re+bnm1.im*bn.im);
*g+= factor/n/(n+1.0)*(an.re*bn.re+an.im*bn.im);
*qsca+= factor*(c_norm(an)+c_norm(bn));
*qext+= factor*(an.re+bn.re);
sign*= -1;
qbcalc.re+= sign*factor*(an.re-bn.re);
qbcalc.im+= sign*factor*(an.im-bn.im);

/*:45*/
#line 538 "./mie.w"

/*46:*/
#line 804 "./mie.w"

factor= (2.0*n+1.0)/x;
xi= c_sub(c_smul(factor,xi1),xi0);
xi0= xi1;
xi1= xi;

psi= factor*psi1-psi0;
psi0= psi1;
psi1= xi1.re;

anm1= an;
bnm1= bn;

/*:46*/
#line 539 "./mie.w"

}

/*47:*/
#line 817 "./mie.w"

*qsca*= 2/(x*x);
*qext*= 2/(x*x);
*g*= 4/(*qsca)/(x*x);
*qback= c_norm(qbcalc)/(x*x);

/*:47*/
#line 542 "./mie.w"

/*48:*/
#line 823 "./mie.w"

if(m.re> 0)free_carray(D);

if(nangles> 0){
free_darray(pi0);
free_darray(pi1);
free_darray(tau);
}

/*:48*/
#line 543 "./mie.w"

}
Ejemplo n.º 17
0
/* Return value:   0 - successful return
 *               > 0 - number of bytes allocated when run out of space
 */
int
ccolumn_bmod (
	     const int  jcol,	  /* in */
	     const int  nseg,	  /* in */
	     complex     *dense,	  /* in */
	     complex     *tempv,	  /* working array */
	     int        *segrep,  /* in */
	     int        *repfnz,  /* in */
	     int        fpanelc,  /* in -- first column in the current panel */
	     GlobalLU_t *Glu,     /* modified */
	     SuperLUStat_t *stat  /* output */
	     )
{
/*
 * Purpose:
 * ========
 *    Performs numeric block updates (sup-col) in topological order.
 *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
 *    Special processing on the supernodal portion of L\U[*,j]
 *
 */
#ifdef _CRAY
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
         ftcs2 = _cptofcd("N", strlen("N")),
         ftcs3 = _cptofcd("U", strlen("U"));
#endif
    int         incx = 1, incy = 1;
    complex      alpha, beta;
    
    /* krep = representative of current k-th supernode
     * fsupc = first supernodal column
     * nsupc = no of columns in supernode
     * nsupr = no of rows in supernode (used as leading dimension)
     * luptr = location of supernodal LU-block in storage
     * kfnz = first nonz in the k-th supernodal segment
     * no_zeros = no of leading zeros in a supernodal U-segment
     */
    complex       ukj, ukj1, ukj2;
    int          luptr, luptr1, luptr2;
    int          fsupc, nsupc, nsupr, segsze;
    int          nrow;	  /* No of rows in the matrix of matrix-vector */
    int          jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
    register int lptr, kfnz, isub, irow, i;
    register int no_zeros, new_next; 
    int          ufirst, nextlu;
    int          fst_col; /* First column within small LU update */
    int          d_fsupc; /* Distance between the first column of the current
			     panel and the first column of the current snode. */
    int          *xsup, *supno;
    int          *lsub, *xlsub;
    complex       *lusup;
    int          *xlusup;
    int          nzlumax;
    complex       *tempv1;
    complex      zero = {0.0, 0.0};
    complex      one = {1.0, 0.0};
    complex      none = {-1.0, 0.0};
    complex	 comp_temp, comp_temp1;
    int          mem_error;
    flops_t      *ops = stat->ops;

    xsup    = Glu->xsup;
    supno   = Glu->supno;
    lsub    = Glu->lsub;
    xlsub   = Glu->xlsub;
    lusup   = Glu->lusup;
    xlusup  = Glu->xlusup;
    nzlumax = Glu->nzlumax;
    jcolp1 = jcol + 1;
    jsupno = supno[jcol];
    
    /* 
     * For each nonz supernode segment of U[*,j] in topological order 
     */
    k = nseg - 1;
    for (ksub = 0; ksub < nseg; ksub++) {

	krep = segrep[k];
	k--;
	ksupno = supno[krep];
	if ( jsupno != ksupno ) { /* Outside the rectangular supernode */

	    fsupc = xsup[ksupno];
	    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

  	    /* Distance from the current supernode to the current panel; 
	       d_fsupc=0 if fsupc > fpanelc. */
  	    d_fsupc = fst_col - fsupc; 

	    luptr = xlusup[fst_col] + d_fsupc;
	    lptr = xlsub[fsupc] + d_fsupc;

	    kfnz = repfnz[krep];
	    kfnz = SUPERLU_MAX ( kfnz, fpanelc );

	    segsze = krep - kfnz + 1;
	    nsupc = krep - fst_col + 1;
	    nsupr = xlsub[fsupc+1] - xlsub[fsupc];	/* Leading dimension */
	    nrow = nsupr - d_fsupc - nsupc;
	    krep_ind = lptr + nsupc - 1;




	    /* 
	     * Case 1: Update U-segment of size 1 -- col-col update 
	     */
	    if ( segsze == 1 ) {
	  	ukj = dense[lsub[krep_ind]];
		luptr += nsupr*(nsupc-1) + nsupc;

		for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
		    irow = lsub[i];
		    cc_mult(&comp_temp, &ukj, &lusup[luptr]);
		    c_sub(&dense[irow], &dense[irow], &comp_temp);
		    luptr++;
		}

	    } else if ( segsze <= 3 ) {
		ukj = dense[lsub[krep_ind]];
		luptr += nsupr*(nsupc-1) + nsupc-1;
		ukj1 = dense[lsub[krep_ind - 1]];
		luptr1 = luptr - nsupr;

		if ( segsze == 2 ) { /* Case 2: 2cols-col update */
		    cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
		    c_sub(&ukj, &ukj, &comp_temp);
		    dense[lsub[krep_ind]] = ukj;
		    for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
		    	irow = lsub[i];
		    	luptr++;
		    	luptr1++;
			cc_mult(&comp_temp, &ukj, &lusup[luptr]);
			cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
			c_add(&comp_temp, &comp_temp, &comp_temp1);
			c_sub(&dense[irow], &dense[irow], &comp_temp);
		    }
		} else { /* Case 3: 3cols-col update */
		    ukj2 = dense[lsub[krep_ind - 2]];
		    luptr2 = luptr1 - nsupr;
  		    cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
		    c_sub(&ukj1, &ukj1, &comp_temp);

		    cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
		    cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
		    c_add(&comp_temp, &comp_temp, &comp_temp1);
		    c_sub(&ukj, &ukj, &comp_temp);

		    dense[lsub[krep_ind]] = ukj;
		    dense[lsub[krep_ind-1]] = ukj1;
		    for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
		    	irow = lsub[i];
		    	luptr++;
		    	luptr1++;
			luptr2++;
			cc_mult(&comp_temp, &ukj, &lusup[luptr]);
			cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
			c_add(&comp_temp, &comp_temp, &comp_temp1);
			cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
			c_add(&comp_temp, &comp_temp, &comp_temp1);
			c_sub(&dense[irow], &dense[irow], &comp_temp);
		    }
		}


	    } else {
	  	/*
		 * Case: sup-col update
		 * Perform a triangular solve and block update,
		 * then scatter the result of sup-col update to dense
		 */

		no_zeros = kfnz - fst_col;

	        /* Copy U[*,j] segment from dense[*] to tempv[*] */
	        isub = lptr + no_zeros;
	        for (i = 0; i < segsze; i++) {
	  	    irow = lsub[isub];
		    tempv[i] = dense[irow];
		    ++isub; 
	        }

	        /* Dense triangular solve -- start effective triangle */
		luptr += nsupr * no_zeros + no_zeros; 
		
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
		CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#else		
		ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#endif		
 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
                alpha = one;
                beta = zero;
#ifdef _CRAY
		CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#else
		cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#endif
#else
		clsolve ( nsupr, segsze, &lusup[luptr], tempv );

 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
		cmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
#endif
		
		
                /* Scatter tempv[] into SPA dense[] as a temporary storage */
                isub = lptr + no_zeros;
                for (i = 0; i < segsze; i++) {
                    irow = lsub[isub];
                    dense[irow] = tempv[i];
                    tempv[i] = zero;
                    ++isub;
                }

		/* Scatter tempv1[] into SPA dense[] */
		for (i = 0; i < nrow; i++) {
		    irow = lsub[isub];
		    c_sub(&dense[irow], &dense[irow], &tempv1[i]);
		    tempv1[i] = zero;
		    ++isub;
		}
	    }
	    
	} /* if jsupno ... */

    } /* for each segment... */

    /*
     *	Process the supernodal portion of L\U[*,j]
     */
    nextlu = xlusup[jcol];
    fsupc = xsup[jsupno];

    /* Copy the SPA dense into L\U[*,j] */
    new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
    while ( new_next > nzlumax ) {
	if (mem_error = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))
	    return (mem_error);
	lusup = Glu->lusup;
	lsub = Glu->lsub;
    }

    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
  	irow = lsub[isub];
	lusup[nextlu] = dense[irow];
        dense[irow] = zero;
	++nextlu;
    }

    xlusup[jcolp1] = nextlu;	/* Close L\U[*,jcol] */

    /* For more updates within the panel (also within the current supernode), 
     * should start from the first column of the panel, or the first column 
     * of the supernode, whichever is bigger. There are 2 cases:
     *    1) fsupc < fpanelc, then fst_col := fpanelc
     *    2) fsupc >= fpanelc, then fst_col := fsupc
     */
    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

    if ( fst_col < jcol ) {

  	/* Distance between the current supernode and the current panel.
	   d_fsupc=0 if fsupc >= fpanelc. */
  	d_fsupc = fst_col - fsupc;

	lptr = xlsub[fsupc] + d_fsupc;
	luptr = xlusup[fst_col] + d_fsupc;
	nsupr = xlsub[fsupc+1] - xlsub[fsupc];	/* Leading dimension */
	nsupc = jcol - fst_col;	/* Excluding jcol */
	nrow = nsupr - d_fsupc - nsupc;

	/* Points to the beginning of jcol in snode L\U(jsupno) */
	ufirst = xlusup[jcol] + d_fsupc;	

	ops[TRSV] += 4 * nsupc * (nsupc - 1);
	ops[GEMV] += 8 * nrow * nsupc;
	
#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
	CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
#else
	ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
#endif
	
	alpha = none; beta = one; /* y := beta*y + alpha*A*x */

#ifdef _CRAY
	CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
	cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
	clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );

	cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
		&lusup[ufirst], tempv );
	
        /* Copy updates from tempv[*] into lusup[*] */
	isub = ufirst + nsupc;
	for (i = 0; i < nrow; i++) {
	    c_sub(&lusup[isub], &lusup[isub], &tempv[i]);
	    tempv[i] = zero;
	    ++isub;
	}

#endif
	
	
    } /* if fst_col < jcol ... */ 

    return 0;
}
Ejemplo n.º 18
0
void
cpanel_bmod (
            const int  m,          /* in - number of rows in the matrix */
            const int  w,          /* in */
            const int  jcol,       /* in */
            const int  nseg,       /* in */
            complex     *dense,     /* out, of size n by w */
            complex     *tempv,     /* working array */
            int        *segrep,    /* in */
            int        *repfnz,    /* in, of size n by w */
            GlobalLU_t *Glu,       /* modified */
            SuperLUStat_t *stat    /* output */
            )
{


#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
         ftcs2 = _cptofcd("N", strlen("N")),
         ftcs3 = _cptofcd("U", strlen("U"));
#endif
    int          incx = 1, incy = 1;
    complex       alpha, beta;
#endif

    register int k, ksub;
    int          fsupc, nsupc, nsupr, nrow;
    int          krep, krep_ind;
    complex       ukj, ukj1, ukj2;
    int          luptr, luptr1, luptr2;
    int          segsze;
    int          block_nrow;  /* no of rows in a block row */
    register int lptr;        /* Points to the row subscripts of a supernode */
    int          kfnz, irow, no_zeros;
    register int isub, isub1, i;
    register int jj;          /* Index through each column in the panel */
    int          *xsup, *supno;
    int          *lsub, *xlsub;
    complex       *lusup;
    int          *xlusup;
    int          *repfnz_col; /* repfnz[] for a column in the panel */
    complex       *dense_col;  /* dense[] for a column in the panel */
    complex       *tempv1;             /* Used in 1-D update */
    complex       *TriTmp, *MatvecTmp; /* used in 2-D update */
    complex      zero = {0.0, 0.0};
    complex      one = {1.0, 0.0};
    complex      comp_temp, comp_temp1;
    register int ldaTmp;
    register int r_ind, r_hi;
    static   int first = 1, maxsuper, rowblk, colblk;
    flops_t  *ops = stat->ops;

    xsup    = Glu->xsup;
    supno   = Glu->supno;
    lsub    = Glu->lsub;
    xlsub   = Glu->xlsub;
    lusup   = Glu->lusup;
    xlusup  = Glu->xlusup;

    if ( first ) {
        maxsuper = SUPERLU_MAX( sp_ienv(3), sp_ienv(7) );
        rowblk   = sp_ienv(4);
        colblk   = sp_ienv(5);
        first = 0;
    }
    ldaTmp = maxsuper + rowblk;

    /*
     * For each nonz supernode segment of U[*,j] in topological order
     */
    k = nseg - 1;
    for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */

        /* krep = representative of current k-th supernode
         * fsupc = first supernodal column
         * nsupc = no of columns in a supernode
         * nsupr = no of rows in a supernode
         */
        krep = segrep[k--];
        fsupc = xsup[supno[krep]];
        nsupc = krep - fsupc + 1;
        nsupr = xlsub[fsupc+1] - xlsub[fsupc];
        nrow = nsupr - nsupc;
        lptr = xlsub[fsupc];
        krep_ind = lptr + nsupc - 1;

        repfnz_col = repfnz;
        dense_col = dense;

        if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */

            TriTmp = tempv;

            /* Sequence through each column in panel -- triangular solves */
            for (jj = jcol; jj < jcol + w; jj++,
                 repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {

                kfnz = repfnz_col[krep];
                if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */

                segsze = krep - kfnz + 1;
                luptr = xlusup[fsupc];

                ops[TRSV] += 4 * segsze * (segsze - 1);
                ops[GEMV] += 8 * nrow * segsze;

                /* Case 1: Update U-segment of size 1 -- col-col update */
                if ( segsze == 1 ) {
                    ukj = dense_col[lsub[krep_ind]];
                    luptr += nsupr*(nsupc-1) + nsupc;

                    for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
                        irow = lsub[i];
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        ++luptr;
                    }

                } else if ( segsze <= 3 ) {
                    ukj = dense_col[lsub[krep_ind]];
                    ukj1 = dense_col[lsub[krep_ind - 1]];
                    luptr += nsupr*(nsupc-1) + nsupc-1;
                    luptr1 = luptr - nsupr;

                    if ( segsze == 2 ) {
                        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                        c_sub(&ukj, &ukj, &comp_temp);
                        dense_col[lsub[krep_ind]] = ukj;
                        for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
                            irow = lsub[i];
                            luptr++; luptr1++;
                            cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                            cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        }
                    } else {
                        ukj2 = dense_col[lsub[krep_ind - 2]];
                        luptr2 = luptr1 - nsupr;
                        cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
                        c_sub(&ukj1, &ukj1, &comp_temp);

                        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                        cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&ukj, &ukj, &comp_temp);
                        dense_col[lsub[krep_ind]] = ukj;
                        dense_col[lsub[krep_ind-1]] = ukj1;
                        for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
                            irow = lsub[i];
                            luptr++; luptr1++; luptr2++;
                            cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                            cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        }
                    }

                } else  {       /* segsze >= 4 */

                    /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
                       holds the result of triangular solves.    */
                    no_zeros = kfnz - fsupc;
                    isub = lptr + no_zeros;
                    for (i = 0; i < segsze; ++i) {
                        irow = lsub[isub];
                        TriTmp[i] = dense_col[irow]; /* Gather */
                        ++isub;
                    }

                    /* start effective triangle */
                    luptr += nsupr * no_zeros + no_zeros;

#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                    CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
                           &nsupr, TriTmp, &incx );
#else
                    ctrsv_( "L", "N", "U", &segsze, &lusup[luptr],
                           &nsupr, TriTmp, &incx );
#endif
#else
                    clsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
#endif


                } /* else ... */

            }  /* for jj ... end tri-solves */

            /* Block row updates; push all the way into dense[*] block */
            for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {

                r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
                block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
                luptr = xlusup[fsupc] + nsupc + r_ind;
                isub1 = lptr + nsupc + r_ind;

                repfnz_col = repfnz;
                TriTmp = tempv;
                dense_col = dense;

                /* Sequence through each column in panel -- matrix-vector */
                for (jj = jcol; jj < jcol + w; jj++,
                     repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {

                    kfnz = repfnz_col[krep];
                    if ( kfnz == EMPTY ) continue; /* Skip any zero segment */

                    segsze = krep - kfnz + 1;
                    if ( segsze <= 3 ) continue;   /* skip unrolled cases */

                    /* Perform a block update, and scatter the result of
                       matrix-vector to dense[].                 */
                    no_zeros = kfnz - fsupc;
                    luptr1 = luptr + nsupr * no_zeros;
                    MatvecTmp = &TriTmp[maxsuper];

#ifdef USE_VENDOR_BLAS
                    alpha = one;
                    beta = zero;
#ifdef _CRAY
                    CGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1],
                           &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
#else
                    cgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1],
                           &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
#endif
#else
                    cmatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
                           TriTmp, MatvecTmp);
#endif

                    /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
                     * such that MatvecTmp[*] can be re-used for the
                     * the next blok row update. dense[] will be copied into
                     * global store after the whole panel has been finished.
                     */
                    isub = isub1;
                    for (i = 0; i < block_nrow; i++) {
                        irow = lsub[isub];
                        c_sub(&dense_col[irow], &dense_col[irow],
                              &MatvecTmp[i]);
                        MatvecTmp[i] = zero;
                        ++isub;
                    }

                } /* for jj ... */

            } /* for each block row ... */

            /* Scatter the triangular solves into SPA dense[*] */
            repfnz_col = repfnz;
            TriTmp = tempv;
            dense_col = dense;

            for (jj = jcol; jj < jcol + w; jj++,
                 repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
                kfnz = repfnz_col[krep];
                if ( kfnz == EMPTY ) continue; /* Skip any zero segment */

                segsze = krep - kfnz + 1;
                if ( segsze <= 3 ) continue; /* skip unrolled cases */

                no_zeros = kfnz - fsupc;
                isub = lptr + no_zeros;
                for (i = 0; i < segsze; i++) {
                    irow = lsub[isub];
                    dense_col[irow] = TriTmp[i];
                    TriTmp[i] = zero;
                    ++isub;
                }

            } /* for jj ... */

        } else { /* 1-D block modification */


            /* Sequence through each column in the panel */
            for (jj = jcol; jj < jcol + w; jj++,
                 repfnz_col += m, dense_col += m) {

                kfnz = repfnz_col[krep];
                if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */

                segsze = krep - kfnz + 1;
                luptr = xlusup[fsupc];

                ops[TRSV] += 4 * segsze * (segsze - 1);
                ops[GEMV] += 8 * nrow * segsze;

                /* Case 1: Update U-segment of size 1 -- col-col update */
                if ( segsze == 1 ) {
                    ukj = dense_col[lsub[krep_ind]];
                    luptr += nsupr*(nsupc-1) + nsupc;

                    for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
                        irow = lsub[i];
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        ++luptr;
                    }

                } else if ( segsze <= 3 ) {
                    ukj = dense_col[lsub[krep_ind]];
                    luptr += nsupr*(nsupc-1) + nsupc-1;
                    ukj1 = dense_col[lsub[krep_ind - 1]];
                    luptr1 = luptr - nsupr;

                    if ( segsze == 2 ) {
                        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                        c_sub(&ukj, &ukj, &comp_temp);
                        dense_col[lsub[krep_ind]] = ukj;
                        for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
                            irow = lsub[i];
                            ++luptr;  ++luptr1;
                            cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                            cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        }
                    } else {
                        ukj2 = dense_col[lsub[krep_ind - 2]];
                        luptr2 = luptr1 - nsupr;
                        cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
                        c_sub(&ukj1, &ukj1, &comp_temp);

                        cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                        cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&ukj, &ukj, &comp_temp);
                        dense_col[lsub[krep_ind]] = ukj;
                        dense_col[lsub[krep_ind-1]] = ukj1;
                        for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
                            irow = lsub[i];
                            ++luptr; ++luptr1; ++luptr2;
                            cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                            cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
                        }
                    }

                } else  { /* segsze >= 4 */
                    /*
                     * Perform a triangular solve and block update,
                     * then scatter the result of sup-col update to dense[].
                     */
                    no_zeros = kfnz - fsupc;

                    /* Copy U[*,j] segment from dense[*] to tempv[*]:
                     *    The result of triangular solve is in tempv[*];
                     *    The result of matrix vector update is in dense_col[*]
                     */
                    isub = lptr + no_zeros;
                    for (i = 0; i < segsze; ++i) {
                        irow = lsub[isub];
                        tempv[i] = dense_col[irow]; /* Gather */
                        ++isub;
                    }

                    /* start effective triangle */
                    luptr += nsupr * no_zeros + no_zeros;

#ifdef USE_VENDOR_BLAS
#ifdef _CRAY
                    CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr],
                           &nsupr, tempv, &incx );
#else
                    ctrsv_( "L", "N", "U", &segsze, &lusup[luptr],
                           &nsupr, tempv, &incx );
#endif

                    luptr += segsze;    /* Dense matrix-vector */
                    tempv1 = &tempv[segsze];
                    alpha = one;
                    beta = zero;
#ifdef _CRAY
                    CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr],
                           &nsupr, tempv, &incx, &beta, tempv1, &incy );
#else
                    cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr],
                           &nsupr, tempv, &incx, &beta, tempv1, &incy );
#endif
#else
                    clsolve ( nsupr, segsze, &lusup[luptr], tempv );

                    luptr += segsze;        /* Dense matrix-vector */
                    tempv1 = &tempv[segsze];
                    cmatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
#endif

                    /* Scatter tempv[*] into SPA dense[*] temporarily, such
                     * that tempv[*] can be used for the triangular solve of
                     * the next column of the panel. They will be copied into
                     * ucol[*] after the whole panel has been finished.
                     */
                    isub = lptr + no_zeros;
                    for (i = 0; i < segsze; i++) {
                        irow = lsub[isub];
                        dense_col[irow] = tempv[i];
                        tempv[i] = zero;
                        isub++;
                    }

                    /* Scatter the update from tempv1[*] into SPA dense[*] */
                    /* Start dense rectangular L */
                    for (i = 0; i < nrow; i++) {
                        irow = lsub[isub];
                        c_sub(&dense_col[irow], &dense_col[irow], &tempv1[i]);
                        tempv1[i] = zero;
                        ++isub;
                    }

                } /* else segsze>=4 ... */

            } /* for each column in the panel... */

        } /* else 1-D update ... */

    } /* for each updating supernode ... */

}
Ejemplo n.º 19
0
int cgst01(int m, int n, SuperMatrix *A, SuperMatrix *L, 
		SuperMatrix *U, int *perm_c, int *perm_r, float *resid)
{
/* 
    Purpose   
    =======   

    CGST01 reconstructs a matrix A from its L*U factorization and   
    computes the residual   
       norm(L*U - A) / ( N * norm(A) * EPS ),   
    where EPS is the machine epsilon.   

    Arguments   
    ==========   

    M       (input) INT   
            The number of rows of the matrix A.  M >= 0.   

    N       (input) INT   
            The number of columns of the matrix A.  N >= 0.   

    A       (input) SuperMatrix *, dimension (A->nrow, A->ncol)
            The original M x N matrix A.   

    L       (input) SuperMatrix *, dimension (L->nrow, L->ncol)
            The factor matrix L.

    U       (input) SuperMatrix *, dimension (U->nrow, U->ncol)
            The factor matrix U.

    perm_c (input) INT array, dimension (N)
            The column permutation from CGSTRF.   

    perm_r  (input) INT array, dimension (M)
            The pivot indices from CGSTRF.   

    RESID   (output) FLOAT*
            norm(L*U - A) / ( N * norm(A) * EPS )   

    ===================================================================== 
*/  

    /* Local variables */
    complex zero = {0.0, 0.0};
    int i, j, k, arow, lptr,isub,  urow, superno, fsupc, u_part;
    complex utemp, comp_temp;
    float anorm, tnorm, cnorm;
    float eps;
    complex *work;
    SCformat *Lstore;
    NCformat *Astore, *Ustore;
    complex *Aval, *Lval, *Uval;
    int *colbeg, *colend;

    /* Function prototypes */
    extern float clangs(char *, SuperMatrix *);

    /* Quick exit if M = 0 or N = 0. */

    if (m <= 0 || n <= 0) {
	*resid = 0.f;
	return 0;
    }

    work = (complex *)complexCalloc(m);

    Astore = A->Store;
    Aval = Astore->nzval;
    Lstore = L->Store;
    Lval = Lstore->nzval;
    Ustore = U->Store;
    Uval = Ustore->nzval;

    colbeg = intMalloc(n);
    colend = intMalloc(n);

        for (i = 0; i < n; i++) {
            colbeg[perm_c[i]] = Astore->colptr[i]; 
	    colend[perm_c[i]] = Astore->colptr[i+1];
        }
	
    /* Determine EPS and the norm of A. */
    eps = smach("Epsilon");
    anorm = clangs("1", A);
    cnorm = 0.;

    /* Compute the product L*U, one column at a time */
    for (k = 0; k < n; ++k) {

	/* The U part outside the rectangular supernode */
        for (i = U_NZ_START(k); i < U_NZ_START(k+1); ++i) {
	    urow = U_SUB(i);
	    utemp = Uval[i];
            superno = Lstore->col_to_sup[urow];
	    fsupc = L_FST_SUPC(superno);
	    u_part = urow - fsupc + 1;
	    lptr = L_SUB_START(fsupc) + u_part;
            work[L_SUB(lptr-1)].r -= utemp.r;
            work[L_SUB(lptr-1)].i -= utemp.i;
	    for (j = L_NZ_START(urow) + u_part; j < L_NZ_START(urow+1); ++j) {
                isub = L_SUB(lptr);
	        cc_mult(&comp_temp, &utemp, &Lval[j]);
		c_sub(&work[isub], &work[isub], &comp_temp);
	        ++lptr;
	    }
	}

	/* The U part inside the rectangular supernode */
	superno = Lstore->col_to_sup[k];
	fsupc = L_FST_SUPC(superno);
	urow = L_NZ_START(k);
	for (i = fsupc; i <= k; ++i) {
	    utemp = Lval[urow++];
	    u_part = i - fsupc + 1;
	    lptr = L_SUB_START(fsupc) + u_part;
            work[L_SUB(lptr-1)].r -= utemp.r;
            work[L_SUB(lptr-1)].i -= utemp.i;
	    for (j = L_NZ_START(i)+u_part; j < L_NZ_START(i+1); ++j) {
                isub = L_SUB(lptr);
	        cc_mult(&comp_temp, &utemp, &Lval[j]);
		c_sub(&work[isub], &work[isub], &comp_temp);
	        ++lptr;
	    }
	}

	/* Now compute A[k] - (L*U)[k] (Both matrices may be permuted.) */

	for (i = colbeg[k]; i < colend[k]; ++i) {
	    arow = Astore->rowind[i];
	    work[perm_r[arow]].r += Aval[i].r;
	    work[perm_r[arow]].i += Aval[i].i;
        }

	/* Now compute the 1-norm of the column vector work */
        tnorm = 0.;
	for (i = 0; i < m; ++i) {
            tnorm += fabs(work[i].r) + fabs(work[i].i);
	    work[i] = zero;
	}
	cnorm = SUPERLU_MAX(tnorm, cnorm);
    }

    *resid = cnorm;

    if (anorm <= 0.f) {
	if (*resid != 0.f) {
	    *resid = 1.f / eps;
	}
    } else {
	*resid = *resid / (float) n / anorm / eps;
    }

    SUPERLU_FREE(work);
    SUPERLU_FREE(colbeg);
    SUPERLU_FREE(colend);
    return 0;

/*     End of CGST01 */

} /* cgst01_ */
Ejemplo n.º 20
0
void
pcgstrf_bmod1D(
	       const int pnum,  /* process number */
	       const int m,     /* number of rows in the matrix */
	       const int w,     /* current panel width */
	       const int jcol,  /* leading column of the current panel */
	       const int fsupc, /* leading column of the updating supernode */ 
	       const int krep,  /* last column of the updating supernode */ 
	       const int nsupc, /* number of columns in the updating s-node */ 
	       int nsupr, /* number of rows in the updating supernode */  
	       int nrow,  /* number of rows below the diagonal block of
			     the updating supernode */ 
	       int *repfnz,     /* in */
	       int *panel_lsub, /* modified */
	       int *w_lsub_end, /* modified */
	       int *spa_marker, /* modified; size n-by-w */
	       complex *dense,   /* modified */
	       complex *tempv,   /* working array - zeros on entry/exit */
	       GlobalLU_t *Glu, /* modified */
	       Gstat_t *Gstat   /* modified */
	       )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab,  Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Purpose
 * =======
 *
 *    Performs numeric block updates (sup-panel) in topological order.
 *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
 *    Results are returned in SPA dense[*,w].
 *
 */
#if ( MACH==CRAY_PVP )
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
         ftcs2 = _cptofcd("N", strlen("N")),
         ftcs3 = _cptofcd("U", strlen("U"));
#endif
#ifdef USE_VENDOR_BLAS
    int          incx = 1, incy = 1;
    complex       alpha, beta;
#endif

    complex       ukj, ukj1, ukj2;
    int          luptr, luptr1, luptr2;
    int          segsze;
    register int lptr; /* start of row subscripts of the updating supernode */
    register int i, krep_ind, kfnz, isub, irow, no_zeros;
    register int jj;	      /* index through each column in the panel */
    int          *repfnz_col; /* repfnz[] for a column in the panel */
    complex       *dense_col;  /* dense[] for a column in the panel */
    complex      *tempv1;     /* used to store matrix-vector result */
    int          *col_marker; /* each column of the spa_marker[*,w] */
    int          *col_lsub;   /* each column of the panel_lsub[*,w] */
    int          *lsub, *xlsub_end;
    complex       *lusup;
    int          *xlusup;
    register float flopcnt;

    complex      zero = {0.0, 0.0};
    complex      one = {1.0, 0.0};
    complex      comp_temp, comp_temp1;
    
#ifdef TIMING
    double *utime = Gstat->utime;
    double f_time;
#endif    
    
    lsub      = Glu->lsub;
    xlsub_end = Glu->xlsub_end;
    lusup     = Glu->lusup;
    xlusup    = Glu->xlusup;
    lptr      = Glu->xlsub[fsupc];
    krep_ind  = lptr + nsupc - 1;

    /* Pointers to each column of the w-wide arrays. */
    repfnz_col= repfnz;
    dense_col = dense;
    col_marker= spa_marker;
    col_lsub  = panel_lsub;

#if ( DEBUGlevel>=2 )
if (jcol == BADPAN && krep == BADREP) {
    printf("(%d) pcgstrf_bmod1D[1] jcol %d, fsupc %d, krep %d, nsupc %d, nsupr %d, nrow %d\n",
	   pnum, jcol, fsupc, krep, nsupc, nsupr, nrow);
    PrintInt10("lsub[xlsub[2774]]", nsupr, &lsub[lptr]);
}    
#endif
    
    /*
     * Sequence through each column in the panel ...
     */
    for (jj = jcol; jj < jcol + w; ++jj, col_marker += m, col_lsub += m,
	 repfnz_col += m, dense_col += m) {

	kfnz = repfnz_col[krep];
	if ( kfnz == EMPTY ) continue;	/* Skip any zero segment */

	segsze = krep - kfnz + 1;
	luptr = xlusup[fsupc];

	/* Calculate flops: tri-solve + mat-vector */
	Gstat->procstat[pnum].fcops += flopcnt;

	/* Case 1: Update U-segment of size 1 -- col-col update */
	if ( segsze == 1 ) {
#ifdef TIMING
	    f_time = SuperLU_timer_();
#endif	    
	    ukj = dense_col[lsub[krep_ind]];
	    luptr += nsupr*(nsupc-1) + nsupc;
#if ( DEBUGlevel>=2 )
if (krep == BADCOL && jj == -1) {
    printf("(%d) pcgstrf_bmod1D[segsze=1]: k %d, j %d, ukj %.10e\n",
	   pnum, lsub[krep_ind], jj, ukj);
    PrintInt10("segsze=1", nsupr, &lsub[lptr]);
}
#endif	    
	    for (i = lptr + nsupc; i < xlsub_end[fsupc]; i++) {
		irow = lsub[i];
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
		++luptr;
#ifdef SCATTER_FOUND		
		if ( col_marker[irow] != jj ) {
		    col_marker[irow] = jj;
		    col_lsub[w_lsub_end[jj-jcol]++] = irow;
		}
#endif		
	    }
#ifdef TIMING
	    utime[FLOAT] += SuperLU_timer_() - f_time;
#endif	    
	} else if ( segsze <= 3 ) {
#ifdef TIMING
	    f_time = SuperLU_timer_();
#endif	    
	    ukj = dense_col[lsub[krep_ind]];
	    luptr += nsupr*(nsupc-1) + nsupc-1;
	    ukj1 = dense_col[lsub[krep_ind - 1]];
	    luptr1 = luptr - nsupr;
	    if ( segsze == 2 ) {
                cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                c_sub(&ukj, &ukj, &comp_temp);
		dense_col[lsub[krep_ind]] = ukj;
		for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
		    irow = lsub[i];
		    ++luptr;  ++luptr1;
                            cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                            cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                            c_add(&comp_temp, &comp_temp, &comp_temp1);
                            c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
#ifdef SCATTER_FOUND		
		    if ( col_marker[irow] != jj ) {
			col_marker[irow] = jj;
			col_lsub[w_lsub_end[jj-jcol]++] = irow;
		    }
#endif		
		}
	    } else {
		ukj2 = dense_col[lsub[krep_ind - 2]];
		luptr2 = luptr1 - nsupr;
                cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
                c_sub(&ukj1, &ukj1, &comp_temp);

                cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                c_add(&comp_temp, &comp_temp, &comp_temp1);
                c_sub(&ukj, &ukj, &comp_temp);
		dense_col[lsub[krep_ind]] = ukj;
		dense_col[lsub[krep_ind-1]] = ukj1;
		for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
		    irow = lsub[i];
		    ++luptr; ++luptr1; ++luptr2;
                    cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                    cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                    c_add(&comp_temp, &comp_temp, &comp_temp1);
                    cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                    c_add(&comp_temp, &comp_temp, &comp_temp1);
                    c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
#ifdef SCATTER_FOUND		
		    if ( col_marker[irow] != jj ) {
			col_marker[irow] = jj;
			col_lsub[w_lsub_end[jj-jcol]++] = irow;
		    }
#endif		
		}
	    }
#ifdef TIMING
	    utime[FLOAT] += SuperLU_timer_() - f_time;
#endif	    
	} else { /* segsze >= 4 */
	    /* 
	     * Perform a triangular solve and matrix-vector update,
	     * then scatter the result of sup-col update to dense[*].
	     */
	    no_zeros = kfnz - fsupc;

	    /* Gather U[*,j] segment from dense[*] to tempv[*]: 
	     *   The result of triangular solve is in tempv[*];
	     *   The result of matrix vector update is in dense_col[*]
	     */
	    isub = lptr + no_zeros;
/*#pragma ivdep*/
	    for (i = 0; i < segsze; ++i) {
		irow = lsub[isub];
		tempv[i] = dense_col[irow]; /* Gather */
		++isub;
	    }

	    /* start effective triangle */
	    luptr += nsupr * no_zeros + no_zeros;
#ifdef TIMING
	    f_time = SuperLU_timer_();
#endif
		
#ifdef USE_VENDOR_BLAS
#if ( MACH==CRAY_PVP )
	    CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
		  &nsupr, tempv, &incx );
#else
	    ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
		   &nsupr, tempv, &incx );
#endif
		
	    luptr += segsze;	/* Dense matrix-vector */
	    tempv1 = &tempv[segsze];

            alpha = one;
            beta = zero;
#if ( MACH==CRAY_PVP )
	    CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
		  &nsupr, tempv, &incx, &beta, tempv1, &incy );
#else
	    cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
		   &nsupr, tempv, &incx, &beta, tempv1, &incy );
#endif /* _CRAY_PVP */
#else
	    clsolve ( nsupr, segsze, &lusup[luptr], tempv );
	    
	    luptr += segsze;        /* Dense matrix-vector */
	    tempv1 = &tempv[segsze];
	    cmatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
#endif
		
#ifdef TIMING
	    utime[FLOAT] += SuperLU_timer_() - f_time;
#endif	    

	    /* Scatter tempv[*] into SPA dense[*] temporarily, 
	     * such that tempv[*] can be used for the triangular solve of
	     * the next column of the panel. They will be copied into 
	     * ucol[*] after the whole panel has been finished.
	     */
	    isub = lptr + no_zeros;
/*#pragma ivdep*/
	    for (i = 0; i < segsze; i++) {
		irow = lsub[isub];
		dense_col[irow] = tempv[i]; /* Scatter */
		tempv[i] = zero;
		isub++;
#if ( DEBUGlevel>=2 )
	if (jj == -1 && krep == 3423)
	    printf("(%d) pcgstrf_bmod1D[scatter] jj %d, dense_col[%d] %e\n",
		   pnum, jj, irow, dense_col[irow]);
#endif
	    }
		
	    /* Scatter the update from tempv1[*] into SPA dense[*] */
/*#pragma ivdep*/
	    for (i = 0; i < nrow; i++) {
		irow = lsub[isub];
                c_sub(&dense_col[irow], &dense_col[irow],
                              &tempv1[i]); /* Scatter-add */
#ifdef SCATTER_FOUND		
		if ( col_marker[irow] != jj ) {
		    col_marker[irow] = jj;
		    col_lsub[w_lsub_end[jj-jcol]++] = irow;
		}
#endif		
		tempv1[i] = zero;
		isub++;
	    }
		
	} /* else segsze >= 4 ... */
	
    } /* for jj ... */

}
Ejemplo n.º 21
0
Archivo: pol.c Proyecto: rforge/muste
static int polroot(
struct polynom *p,
struct complex *pz,  /* pointer to root */
struct complex *pz0  /* pointer to initial value */
)
        {
        int i;
        int n_iter;
        struct polynom d;
        struct complex v,v0,vd,delta,zmin;
        double y,ymin;
//        extern struct polynom *pol_der();
//        extern struct complex *pol_value();
		delta.x=0; delta.y=0; // RS 7.2.2013
//printf("\np->n=%d",p->n); getch();
        if (p->n==1)
            {
            c_div(pz,&(p->a[0]),&(p->a[1]));
            pz->x=-(pz->x); pz->y=-(pz->y);
            return(1);
            }
//printf("\nr2");
        pol_value(p,pz0,&v0);
//printf("\nr3");
        if (c_zero(&v0))
            {
            pz->x=pz0->x;
            pz->y=pz0->y;
            return(1);
            }
//printf("\nr4");
        zmin.x=pz0->x; zmin.y=pz0->y;
        ymin=v0.x*v0.x+v0.y*v0.y;
        pol_der(&d,p);
//printf("\nr5");
        n_iter=0;
        while (1)
            {
            pol_value(&d,pz0,&vd);

            c_div(&delta,&v0,&vd);
            c_sub(pz,pz0,&delta);
            pol_value(p,pz,&v);
            v0.x=v.x; v0.y=v.y;
            pz0->x=pz->x; pz0->y=pz->y;

            ++n_iter;
            PR_UP;
            sprintf(sbuf,"\npolroot: N=%d  Re=%e Im=%e",n_iter,pz->x,pz->y); sur_print(sbuf); // RS CHA Rprintf

/* Rprintf("zero: %d\n",c_zero(pz)); getch();  */
  /*        if (c_zero(&pz)) break;     */
            if (c_zero(pz)) break;
            y=v.x*v.x+v.y*v.y;
            if (y<ymin)
                { ymin=y; zmin.x=pz->x; zmin.y=pz->y; }

            if (fabs(delta.x)<roots_eps && fabs(delta.y)<roots_eps) break;

            if (n_iter>roots_max_iter)
                {
                pz->x=zmin.x; pz->y=zmin.y;
                break;
                }
            if (sur_kbhit())
                {
                pz->x=zmin.x; pz->y=zmin.y;
                i=sur_getch(); if (i=='.') break;
                }
            }
        return (n_iter);
        }
Ejemplo n.º 22
0
int
pcgstrf_snode_bmod(
		   const int  pnum,   /* process number */
		   const int  jcol,   /* in - current column in the s-node */
		   const int  jsupno, /* in */
		   const int  fsupc,  /* in - first column in the s-node */
		   complex     *dense, /* in */
		   complex     *tempv, /* working array */
		   GlobalLU_t *Glu,   /* modified */
		   Gstat_t *Gstat     /* modified */
		   )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Performs numeric block updates within the relaxed supernode. 
 */

    complex      zero = {0.0, 0.0};
    complex      one = {1.0, 0.0};
    complex      none = {-1.0, 0.0};

#if ( MACH==CRAY_PVP )
    _fcd ftcs1, ftcs2, ftcs3;
#endif
#ifdef USE_VENDOR_BLAS    
    int            incx = 1, incy = 1;
    complex         alpha = none, beta = one;
#endif
    
    int            luptr, nsupc, nsupr, nrow;
    int            isub, irow, i, iptr; 
    register int   ufirst, nextlu;
    complex         *lusup;
    int            *lsub, *xlsub, *xlsub_end, *xlusup, *xlusup_end;
    register float flopcnt;

    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;
    xlsub_end  = Glu->xlsub_end;
    lusup      = Glu->lusup;
    xlusup     = Glu->xlusup;
    xlusup_end = Glu->xlusup_end;

    nextlu = xlusup[jcol];
    
    /*
     *	Process the supernodal portion of L\U[*,j]
     */
    for (isub = xlsub[fsupc]; isub < xlsub_end[fsupc]; isub++) {
  	irow = lsub[isub];
	lusup[nextlu] = dense[irow];
	dense[irow] = zero;
	++nextlu;
    }

    xlusup_end[jcol] = nextlu;
    
    if ( fsupc < jcol ) {

	luptr = xlusup[fsupc];
	nsupr = xlsub_end[fsupc] - xlsub[fsupc];
	nsupc = jcol - fsupc;	/* Excluding jcol */
	ufirst = xlusup[jcol];	/* Points to the beginning of column
				   jcol in supernode L\U(jsupno). */
	nrow = nsupr - nsupc;
	
	flopcnt = nsupc * (nsupc - 1) + 2 * nrow * nsupc; //sj
	Gstat->procstat[pnum].fcops += flopcnt;

/*	ops[TRSV] += nsupc * (nsupc - 1);
	ops[GEMV] += 2 * nrow * nsupc;    */

#ifdef USE_VENDOR_BLAS
#if ( MACH==CRAY_PVP )
	ftcs1 = _cptofcd("L", strlen("L"));
	ftcs2 = _cptofcd("N", strlen("N"));
	ftcs3 = _cptofcd("U", strlen("U"));
	CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
	      &lusup[ufirst], &incx );
	CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
	      &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
	ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
	      &lusup[ufirst], &incx );
	cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
		&lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
	clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
	cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
		 &lusup[ufirst], &tempv[0] );

        /* Scatter tempv[*] into lusup[*] */
	iptr = ufirst + nsupc;
	for (i = 0; i < nrow; i++) {
            c_sub(&lusup[iptr], &lusup[iptr], &tempv[i]);
            ++iptr;
            tempv[i] = zero;
	}
#endif

    }

    return 0;
}
Ejemplo n.º 23
0
void simulation_solve(simulation_t *simulation)
{
    int m,s;
    int i,j,k, i_max, x;
    m = simulation->n_vars;
    int n = m+1;
    s = simulation_context_get_n_samples(simulation->nodelist->netlist->sc);

    for ( k = 0 ; k < m ; ++k )
    {
        //pivot is col k
        //find a line with the maximum k
        i_max = simulation_max(simulation, k,m);

        if ( cell_is_zero(simulation, &simulation->cells[i_max][k]) )
        {
            fprintf(stderr,"singular matrix, zero found @ row %d col %d (mean = %.1f)\n", i_max, k,
                    (double)cell_mean(simulation,&simulation->cells[i_max][k]));
            simulation_dump(simulation);
            assert(0);
        }
        assert(i_max >= k);

        if ( i_max != k )
        {
            cell_t *tmp;
            tmp = simulation->cells[k];
            simulation->cells[k] = simulation->cells[i_max];
            simulation->cells[i_max] = tmp;
        }


        for ( i = k + 1 ; i < m ; i ++ )
        {
            if ( simulation->cells[i][k].state == CELL_ZERO )
            {
                //fprintf(stderr,"continue\n");
                continue;
            }
            assert( i != k);
#ifdef USE_OMP
            #pragma omp parallel for
#endif
            for ( j = n - 1 ;  j > k  ; --j )
            {
                // Fij = Fij - Fkj * Fik/Fkk
                assert( j != k );
                cell_t *line_i = simulation->cells[i];
                cell_t *line_k = simulation->cells[k];
                line_i[j].state = CELL_SET;
                for ( x = 0 ; x < s ; ++x )
                {
                    //assert( line_k[k].state != CELL_ZERO && !cell_is_zero(simulation,&line_k[k]));
                    line_i[j].values[x] = c_let(
                                              c_sub(line_i[j].values[x],
                                                    c_mul(line_k[j].values[x],
                                                          c_div( line_i[k].values[x], line_k[k].values[x] ))));
                    /*
                    line_i[j].values[x] =
                      line_i[j].values[x] - line_k[j].values[x]
                      * ( line_i[k].values[x] / line_k[k].values[x] );
                      */


                    /*
                    assert( i != k );
                    line_i[j].values[x] =
                        line_i[j].values[x] * line_k[k].values[x]
                      - line_k[j].values[x] * line_i[k].values[x];
                    */
                }
            }
            simulation->cells[i][k].state = CELL_ZERO;
#ifdef USE_OMP
            #pragma omp parallel for
#endif
            for ( x = 0 ; x < s ; ++x )
            {
                simulation->cells[i][k].values[x]=0.L;
            }
        }

    }

    //simulation_dump(simulation);
    //backward elimination
    //simulation_dump(simulation);

    if (1)
        for ( k = m - 1 ; k >= 0 ; --k )
        {
            //set pivot to 1
            // Lk = Lk / Fk,k
#ifdef USE_OMP
            #pragma omp parallel for
#endif
            for ( j = k+1 ; j < n ; ++j )
            {
                assert( j != k );
                simulation->cells[k][j].state = CELL_SET;
                for ( x = 0 ; x < s ; ++x )
                {
                    //assert( simulation->cells[k][k].state != CELL_ZERO && !cell_is_zero(simulation, &simulation->cells[k][k]));

                    simulation->cells[k][j].values[x] =
                        c_let(
                            c_div(simulation->cells[k][j].values[x], simulation->cells[k][k].values[x] ));
                    /*
                    simulation->cells[k][j].values[x] =
                    simulation->cells[k][j].values[x] / simulation->cells[k][k].values[x];
                         */
                }
            }
            simulation->cells[k][k].state = CELL_POSITIVE_UNITY;
#ifdef USE_OMP
            #pragma omp parallel for
#endif
            for ( x = 0 ; x < s ; ++x )
            {
                simulation->cells[k][k].values[x] = 1.L + I * 0.L;
            }
        }


    //zeroes pivot col
    for ( k = m - 1 ; k >= 0 ; --k ) // vertical pivot Fk,k
    {
        assert( simulation->cells[k][k].state == CELL_POSITIVE_UNITY );
        // Li = Li - Lk * Fik
#ifdef USE_OMP
        #pragma omp parallel for
#endif
        for ( i = 0 ; i < k ; ++i) // vertical
        {
            assert( i != k );
            for ( j = k + 1 ; j < m ; ++ j )
                assert( simulation->cells[i][j].state == CELL_ZERO );

            // => Fir = Fir - Fik x Fkr
            // => Fi,k  = 0
            int result_col = n - 1;
            simulation->cells[i][result_col].state = CELL_SET;
            for ( x = 0 ; x < s ; ++x )
            {
                simulation->cells[i][result_col].values[x] = c_let(
                            c_sub(simulation->cells[i][result_col].values[x],
                                  c_mul(simulation->cells[i][k].values[x], simulation->cells[k][result_col].values[x])));
                /*
                simulation->cells[i][j].values[x] =
                simulation->cells[i][j].values[x]
                - simulation->cells[i][k].values[x] * simulation->cells[k][j].values[x];
                     */
            }

            simulation->cells[i][k].state = CELL_ZERO;
            for ( x = 0 ; x < s ; ++x )
            {
                simulation->cells[i][k].values[x] = 0.L;
            }
        }

    }

    //sanity check
    assert( m == simulation->n_vars );
    assert( n == simulation->n_vars+1 );
    for ( i = 0 ; i < m ; ++ i )
        for ( j = 0 ; j < m ; ++ j )
            if ( i==j )
                assert(simulation->cells[i][j].state == CELL_POSITIVE_UNITY );
            else
                assert(simulation->cells[i][j].state == CELL_ZERO );
}
Ejemplo n.º 24
0
void schurFactorization(long n, complex **A, complex **T, complex **U)
{

  /* Schur factorization: A = U*T*U', T = upper triangular, U = unitary */
            
  long i,j,iter,maxIter;
  double tol, diff1,diff2; 
  complex T11, T12, T21, T22; 
  complex sigma1, sigma2, sigma; 
  complex z, z1, z2; 
  complex **P, **Q, **R;


  /* Allocate auxiliary matrices */

  P     = (complex **)Mem(MEM_ALLOC,n, sizeof(complex *)); 
  Q     = (complex **)Mem(MEM_ALLOC,n, sizeof(complex *)); 
  R     = (complex **)Mem(MEM_ALLOC,n, sizeof(complex *)); 

  for (i=0; i<n; i++){
    P[i]     = (complex *)Mem(MEM_ALLOC,n, sizeof(complex)); 
    Q[i]     = (complex *)Mem(MEM_ALLOC,n, sizeof(complex)); 
    R[i]     = (complex *)Mem(MEM_ALLOC,n, sizeof(complex)); 
  }

  /* ------------------------------------------------------------*/

  /* Parameters for iteration */

   maxIter = 500;
   tol     = 1E-30; 

  /* ------------------------------------------------------------*/

  /* Init U = eye(n) (identity matrix) */

  for (i=0; i<n; i++){
    U[i][i].re = 1.0; 
    U[i][i].im = 0.0; 
  }  
  
  /* ------------------------------------------------------------*/

  /* Reduce A to Hessenberg form */

  hessFactorization(n,A,P,T); 

  /* ------------------------------------------------------------*/

  /* Compute Schur factorization of Hessenberg matrix T */


   for (j=n-1; j>0; j--){ /* Main loop */

     for (iter=0; iter<maxIter; iter++){ /* Iteration loop */

       sigma.re = T[j][j].re;
       sigma.im = T[j][j].im; 


       /* -- Use Wilkinson shift -- */

       /* submatrix considered in the shift */

       T11 = T[j-1][j-1];
       T12 = T[j-1][j];
       T21 = T[j][j-1];
       T22 = T[j][j];

       /* Compute eigenvalues of submatrix */

       z.re  = 0.0;
       z.im  = 0.0;
       z2.re = 0.0;
       z2.im = 0.0;

       /* z = T11*T11 + T22*T22 - 2*T11*T22 + 4*T12*T21 */

       z1 = c_mul(T11,T11);

       z  = c_add(z ,z1);
       z2 = c_add(z2,z1);
       
       z1 = c_mul(T22,T22);

       z  = c_add(z ,z1);
       z2 = c_add(z2,z1);

       z1 = c_mul(T11,T22);

       z1.re = -2.0 * z1.re;
       z1.im = -2.0 * z1.im;

       z  = c_add(z,z1);

       z1 = c_mul(T12,T21);
       z1.re = 4.0 * z1.re;
       z1.im = 4.0 * z1.im;
       z = c_add(z,z1);

       /* Square root*/

       z = c_sqrt(z);

       /* Eigenvalues */
       
       sigma1 = c_add(z2,z);
       sigma2 = c_sub(z2,z);

/*        printf("sigma1 = %e %e\n", sigma1.re, sigma1.im); */
/*        printf("sigma2 = %e %e\n", sigma2.re, sigma2.im); */

       /* Select eigenvalue for shift*/

       diff1 = c_norm( c_sub(T[j][j], sigma1) );
       diff2 = c_norm( c_sub(T[j][j], sigma2) );

       if (diff1 < diff2){
	 sigma.re = sigma1.re;
	 sigma.im = sigma1.im;
       }else{
	 sigma.re = sigma2.re;
	 sigma.im = sigma2.im;
       }

       /* --- QR step with Wilkinson shift --- */

       /* Shift: T(1:j,1:j) = T(1:j,1:j) - sigma * eye(j) */

       for (i=0; i<j+1; i++){

	 CheckValue(FUNCTION_NAME, "T[i][i].re","", T[i][i].re, -INFTY, INFTY);
	 CheckValue(FUNCTION_NAME, "T[i][i].im","", T[i][i].im, -INFTY, INFTY);	 

	 T[i][i].re = T[i][i].re - sigma.re;   
	 T[i][i].im = T[i][i].im - sigma.im;   
	 
       }

       /* Compute QR factorization of shifted Hessenberg matrix */

       for (i=0; i<n; i++){
	 memset(Q[i], 0, n*sizeof(complex));
	 memset(R[i], 0, n*sizeof(complex));
       }

       QRfactorization(n,T,Q,R); 

       /* T = T_new = R * Q  */

       for (i=0; i<n; i++){
	 memset(T[i], 0, n*sizeof(complex));
       }
       matProduct(n, n, n, R, Q, T);

       /* T(1:j,1:j) = T(1:j,1:j) + sigma * eye(j) */
       for (i=0; i<j+1; i++){
	 T[i][i].re = T[i][i].re + sigma.re;   
	 T[i][i].im = T[i][i].im + sigma.im;   
       }


       /* R =  U_new = U * Q */

       for (i=0; i<n; i++){
	 memset(R[i], 0, n*sizeof(complex));
       }       
       matProduct(n,n,n,U,Q,R); 

       /* U = R */

       for (i=0; i<n; i++){
	 memcpy(U[i],R[i], n*sizeof(complex));
       } 

       /* Check convergence */

       if (c_norm( T[j][j-1] ) <= tol * (c_norm(T[j-1][j-1]) + c_norm(T[j][j]))){
	 T[j][j-1].re = 0.0;
	 T[j][j-1].im = 0.0;
	 break; 
       }
       
   
     }	/* end of iter loop */  
    
   } /* end of main loop */


  /* -------------------------------------------------------------*/

   /* U = P*U */

   for (i=0; i<n; i++){
     memset(U[i], 0, n*sizeof(complex));
   }
   matProduct(n,n,n,P,R,U);
   

  /* -------------------------------------------------------------*/
  /* Free auxiliary variables */

   for (i=0; i<n; i++){
     Mem(MEM_FREE,P[i]); 
     Mem(MEM_FREE,Q[i]); 
     Mem(MEM_FREE,R[i]); 
   }

   Mem(MEM_FREE,P); 
   Mem(MEM_FREE,Q); 
   Mem(MEM_FREE,R); 

  /* Return */

  return;   
    
  
}
int main()
{
	//initialize testing array
	float testVector[] = {0.1f,0.2f,0.3f,0.4f,0.5f};
	
	/*COMMENTED OUT LENGTH PARAM AS IT IS INCLUDED IN HEADER FILE*/
	//get the size of the array
	//int length = sizeof(testVector)/sizeof(float);

	//initiate empty output array of size length
	float outputArrayC[length];
	//initialize the struct at p=r=q 0.1 and x=k=0 
	kalman_state currentState = {0.1f, 0.1f, 0.0f , 0.1f, 0.0f};
	//call function Kalmanfilter_C
	Kalmanfilter_C(measurements, outputArrayC, &currentState, length);
	
	//initiate empty output array of size length
	float outputArrayASM[length];
	
	//reinitialize the struct at p=r=q 0.1 and x=k=0 
	currentState.p = DEF_p; 
	currentState.r = DEF_r; 	
	currentState.k = DEF_k;
	currentState.q = DEF_q; 
	currentState.x = DEF_x; 
	
	//call subroutine Kalmanfilter_asm
	Kalmanfilter_asm(measurements, outputArrayASM, &currentState, length );

	//Check for correctness with a error tolerance of 0.000001
	float errorTolerance = 0.000001f;
	float errorPercentage = 0.01;

	//is_valid(outputArrayC, outputArrayASM, length, errorTolerance, "c vs asm");
	//is_valid_relative(outputArrayC, outputArrayASM, length, errorTolerance, errorPercentage,"c vs asm");

	int p;

	//print KalmanFilter output
	for ( p = 0; p < length;  p++ )
	{
		printf("OutputASM: %f & OutputC %f\n", outputArrayASM[p], outputArrayC[p]); 
	}

	float differenceC[length];
	float differenceCMSIS[length];
	
	//Difference
	arm_sub_f32 (measurements, outputArrayC, differenceCMSIS, length);
	c_sub(measurements, outputArrayC, differenceC, length);
	
	//is_valid(differenceC, differenceCMSIS, length, errorTolerance, "Difference");
	//is_valid_relative(differenceC, differenceCMSIS, length, errorTolerance, errorPercentage,"Difference");
	
	//Print difference vector
	for ( p = 0; p < length;  p++ )
	{
		printf("DifferenceC: %f & DifferenceCMSIS %f \n", differenceC[p], differenceCMSIS[p]); 
	}


	//Mean
	float meanCMSIS;
	float meanC; 
	arm_mean_f32 (differenceCMSIS, length , &meanCMSIS);
	c_mean(differenceC,length, &meanC); 
	//is_valid(&meanC, &meanCMSIS, 1, errorTolerance, "mean"); 
	//is_valid_relative(&meanC, &meanCMSIS, 1, errorTolerance, errorPercentage, "mean"); 
	
	//Print mean values
	printf("MeanC: %f & MeanCMSIS %f \n", meanC, meanCMSIS); 
	
	//STD
	float stdC;
	float stdCMSIS;
	arm_std_f32 (differenceCMSIS, length, &stdCMSIS);
	c_std(differenceC, length, &stdC);
	//is_valid(&stdC, &stdCMSIS, 1, errorTolerance, "STD");
	//is_valid_relative(&stdC, &stdCMSIS, 1, errorTolerance, errorPercentage,"STD");

	//Print std values
	printf("StandardDevC: %f & StandardDevCMSIS %f \n", stdC, stdCMSIS); 

	//correlation
	float corC[2*length-1];
	float corCMSIS[2*length-1];
	arm_correlate_f32 (measurements, length, outputArrayC, length, corCMSIS);
	c_correlate(measurements, outputArrayC, corC, length);
	//is_valid(corC, corCMSIS, 2*length-1, errorTolerance, "correlation"); 
	//is_valid_relative(corC, corCMSIS, 2*length-1, errorTolerance, errorPercentage, "correlation"); 
	
	//convolution
	float convC[2*length-1];
	float convCMSIS[2*length-1];
	arm_conv_f32 (measurements, length, outputArrayC, length, convCMSIS);
	c_conv(measurements, outputArrayC, convC, length);
	//is_valid(convC, convCMSIS, 2*length-1, errorTolerance, "convolution"); 
	//is_valid_relative(convC, convCMSIS, 2*length-1, errorTolerance, errorPercentage, "convolution"); 

	//Print correlation and convolution values
	for ( p = 0; p < (2*length-1);  p++ )
	{
		printf("ConvC: %f & ConvCMSIS: %f \n", convC[p], convCMSIS[p]); 
	}
	for ( p = 0; p < (2*length-1);  p++ )
	{
		printf("CorrelateC: %f & CorrelatCMSIS: %f \n", corC[p], corCMSIS[p]); 
	}

	return 0;
}
Ejemplo n.º 26
0
int
pcgstrf_column_bmod(
		    const int  pnum,   /* process number */
		    const int  jcol,   /* current column in the panel */
		    const int  fpanelc,/* first column in the panel */
		    const int  nseg,   /* number of s-nodes to update jcol */
		    int        *segrep,/* in */
		    int        *repfnz,/* in */
		    complex     *dense, /* modified */
		    complex     *tempv, /* working array */
		    pxgstrf_shared_t *pxgstrf_shared, /* modified */
		    Gstat_t *Gstat     /* modified */
		    )
{
/*
 * -- SuperLU MT routine (version 2.0) --
 * Lawrence Berkeley National Lab, Univ. of California Berkeley,
 * and Xerox Palo Alto Research Center.
 * September 10, 2007
 *
 * Purpose:
 * ========
 *    Performs numeric block updates (sup-col) in topological order.
 *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
 *    Special processing on the supernodal portion of L\U[*,j].
 *
 * Return value:
 * =============
 *      0 - successful return
 *    > 0 - number of bytes allocated when run out of space
 *
 */
#if ( MACH==CRAY_PVP )
    _fcd ftcs1 = _cptofcd("L", strlen("L")),
         ftcs2 = _cptofcd("N", strlen("N")),
         ftcs3 = _cptofcd("U", strlen("U"));
#endif
    
#ifdef USE_VENDOR_BLAS    
    int         incx = 1, incy = 1;
    complex      alpha, beta;
#endif
    GlobalLU_t *Glu = pxgstrf_shared->Glu;   /* modified */
    
    /* krep = representative of current k-th supernode
     * fsupc = first supernodal column
     * nsupc = no of columns in supernode
     * nsupr = no of rows in supernode (used as leading dimension)
     * luptr = location of supernodal LU-block in storage
     * kfnz = first nonz in the k-th supernodal segment
     * no_zeros = no of leading zeros in a supernodal U-segment
     */
    complex	  ukj, ukj1, ukj2;
    register int lptr, kfnz, isub, irow, i, no_zeros;
    register int luptr, luptr1, luptr2;
    int          fsupc, nsupc, nsupr, segsze;
    int          nrow;	  /* No of rows in the matrix of matrix-vector */
    int          jsupno, k, ksub, krep, krep_ind, ksupno;
    int          ufirst, nextlu;
    int          fst_col; /* First column within small LU update */
    int          d_fsupc; /* Distance between the first column of the current
			     panel and the first column of the current snode.*/
    int          *xsup, *supno;
    int          *lsub, *xlsub, *xlsub_end;
    complex       *lusup;
    int          *xlusup, *xlusup_end;
    complex       *tempv1;
    int          mem_error;
    register float flopcnt;

    complex      zero = {0.0, 0.0};
    complex      one = {1.0, 0.0};
    complex      none = {-1.0, 0.0};
    complex      comp_temp, comp_temp1;

    xsup       = Glu->xsup;
    supno      = Glu->supno;
    lsub       = Glu->lsub;
    xlsub      = Glu->xlsub;
    xlsub_end  = Glu->xlsub_end;
    lusup      = Glu->lusup;
    xlusup     = Glu->xlusup;
    xlusup_end = Glu->xlusup_end;
    jsupno     = supno[jcol];

    /* 
     * For each nonz supernode segment of U[*,j] in topological order 
     */
    k = nseg - 1;
    for (ksub = 0; ksub < nseg; ksub++) {

	krep = segrep[k];
	k--;
	ksupno = supno[krep];
#if ( DEBUGlvel>=2 )
if (jcol==BADCOL)
printf("(%d) pcgstrf_column_bmod[1]: %d, nseg %d, krep %d, jsupno %d, ksupno %d\n",
       pnum, jcol, nseg, krep, jsupno, ksupno);
#endif    
	if ( jsupno != ksupno ) { /* Outside the rectangular supernode */

	    fsupc = xsup[ksupno];
	    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

  	    /* Distance from the current supernode to the current panel; 
	       d_fsupc=0 if fsupc >= fpanelc. */
  	    d_fsupc = fst_col - fsupc; 

	    luptr = xlusup[fst_col] + d_fsupc;
	    lptr = xlsub[fsupc] + d_fsupc;
	    kfnz = repfnz[krep];
	    kfnz = SUPERLU_MAX ( kfnz, fpanelc );
	    segsze = krep - kfnz + 1;
	    nsupc = krep - fst_col + 1;
	    nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */
	    nrow = nsupr - d_fsupc - nsupc;
	    krep_ind = lptr + nsupc - 1;

	flopcnt = segsze * (segsze - 1) + 2 * nrow * segsze;//sj
		Gstat->procstat[pnum].fcops += flopcnt;

#if ( DEBUGlevel>=2 )
if (jcol==BADCOL)	    
printf("(%d) pcgstrf_column_bmod[2]: %d, krep %d, kfnz %d, segsze %d, d_fsupc %d,\
fsupc %d, nsupr %d, nsupc %d\n",
       pnum, jcol, krep, kfnz, segsze, d_fsupc, fsupc, nsupr, nsupc);

#endif



            /*
             * Case 1: Update U-segment of size 1 -- col-col update
             */
            if ( segsze == 1 ) {
                ukj = dense[lsub[krep_ind]];
                luptr += nsupr*(nsupc-1) + nsupc;

                for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
                    irow = lsub[i];
                    cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                    c_sub(&dense[irow], &dense[irow], &comp_temp);
                    luptr++;
                }

            } else if ( segsze <= 3 ) {
                ukj = dense[lsub[krep_ind]];
                luptr += nsupr*(nsupc-1) + nsupc-1;
                ukj1 = dense[lsub[krep_ind - 1]];
                luptr1 = luptr - nsupr;

                if ( segsze == 2 ) { /* Case 2: 2cols-col update */
                    cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                    c_sub(&ukj, &ukj, &comp_temp);
                    dense[lsub[krep_ind]] = ukj;
                    for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
                        irow = lsub[i];
                        luptr++;
                        luptr1++;
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&dense[irow], &dense[irow], &comp_temp);
                    }
                } else { /* Case 3: 3cols-col update */
                    ukj2 = dense[lsub[krep_ind - 2]];
                    luptr2 = luptr1 - nsupr;
                    cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
                    c_sub(&ukj1, &ukj1, &comp_temp);

                    cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
                    cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                    c_add(&comp_temp, &comp_temp, &comp_temp1);
                    c_sub(&ukj, &ukj, &comp_temp);

                    dense[lsub[krep_ind]] = ukj;
                    dense[lsub[krep_ind-1]] = ukj1;
                    for (i = lptr + nsupc; i < xlsub_end[fsupc]; ++i) {
                        irow = lsub[i];
                        luptr++;
                        luptr1++;
                        luptr2++;
                        cc_mult(&comp_temp, &ukj, &lusup[luptr]);
                        cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
                        c_add(&comp_temp, &comp_temp, &comp_temp1);
                        c_sub(&dense[irow], &dense[irow], &comp_temp);
                    }
                }


	    } else {
	  	/*
		 * Case: sup-col update
		 * Perform a triangular solve and block update,
		 * then scatter the result of sup-col update to dense
		 */
		no_zeros = kfnz - fst_col;

	        /* Copy U[*,j] segment from dense[*] to tempv[*] */
	        isub = lptr + no_zeros;
	        for (i = 0; i < segsze; i++) {
	  	    irow = lsub[isub];
		    tempv[i] = dense[irow];
		    ++isub; 
	        }

	        /* Dense triangular solve -- start effective triangle */
		luptr += nsupr * no_zeros + no_zeros; 
#ifdef USE_VENDOR_BLAS
#if ( MACH==CRAY_PVP )
		CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#else
		ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
		       &nsupr, tempv, &incx );
#endif
		
 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
		alpha = one;
		beta = zero;
#if ( MACH==CRAY_PVP )
		CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#else
		cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
		       &nsupr, tempv, &incx, &beta, tempv1, &incy );
#endif
#else
		clsolve ( nsupr, segsze, &lusup[luptr], tempv );

 		luptr += segsze;  /* Dense matrix-vector */
		tempv1 = &tempv[segsze];
		cmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
#endif
                /* Scatter tempv[] into SPA dense[*] */
                isub = lptr + no_zeros;
                for (i = 0; i < segsze; i++) {
                    irow = lsub[isub];
                    dense[irow] = tempv[i]; /* Scatter */
                    tempv[i] = zero;
                    isub++;
                }

		/* Scatter tempv1[] into SPA dense[*] */
		for (i = 0; i < nrow; i++) {
		    irow = lsub[isub];
                    c_sub(&dense[irow], &dense[irow], &tempv1[i]);
		    tempv1[i] = zero;
		    ++isub;
		}
	    } /* else segsze >= 4 */
	    
	} /* if jsupno ... */

    } /* for each segment... */

    
    /* ------------------------------------------
       Process the supernodal portion of L\U[*,j]
       ------------------------------------------ */
    
    fsupc = SUPER_FSUPC (jsupno);
    nsupr = xlsub_end[fsupc] - xlsub[fsupc];
    if ( (mem_error = Glu_alloc(pnum, jcol, nsupr, LUSUP, &nextlu, 
			       pxgstrf_shared)) )
	return mem_error;
    xlusup[jcol] = nextlu;
    lusup = Glu->lusup;
    
    /* Gather the nonzeros from SPA dense[*,j] into L\U[*,j] */
    for (isub = xlsub[fsupc]; isub < xlsub_end[fsupc]; ++isub) {
  	irow = lsub[isub];
	lusup[nextlu] = dense[irow];
	dense[irow] = zero;
#ifdef DEBUG
if (jcol == -1)
    printf("(%d) pcgstrf_column_bmod[lusup] jcol %d, irow %d, lusup %.10e\n",
	   pnum, jcol, irow, lusup[nextlu]);
#endif	
	++nextlu;
    }
    xlusup_end[jcol] = nextlu; /* close L\U[*,jcol] */

#if ( DEBUGlevel>=2 )
if (jcol == -1) {
    nrow = xlusup_end[jcol] - xlusup[jcol];
    print_double_vec("before sup-col update", nrow, &lsub[xlsub[fsupc]],
		     &lusup[xlusup[jcol]]);
}
#endif    
    
    /*
     * For more updates within the panel (also within the current supernode), 
     * should start from the first column of the panel, or the first column 
     * of the supernode, whichever is bigger. There are 2 cases:
     *    (1) fsupc < fpanelc,  then fst_col := fpanelc
     *    (2) fsupc >= fpanelc, then fst_col := fsupc
     */
    fst_col = SUPERLU_MAX ( fsupc, fpanelc );

    if ( fst_col < jcol ) {

  	/* distance between the current supernode and the current panel;
	   d_fsupc=0 if fsupc >= fpanelc. */
  	d_fsupc = fst_col - fsupc;

	lptr = xlsub[fsupc] + d_fsupc;
	luptr = xlusup[fst_col] + d_fsupc;
	nsupr = xlsub_end[fsupc] - xlsub[fsupc]; /* Leading dimension */
	nsupc = jcol - fst_col;	/* Excluding jcol */
	nrow = nsupr - d_fsupc - nsupc;

	/* points to the beginning of jcol in supernode L\U[*,jsupno] */
	ufirst = xlusup[jcol] + d_fsupc;	

#if ( DEBUGlevel>=2 )
if (jcol==BADCOL)
printf("(%d) pcgstrf_column_bmod[3] jcol %d, fsupc %d, nsupr %d, nsupc %d, nrow %d\n",
       pnum, jcol, fsupc, nsupr, nsupc, nrow);
#endif    

	flopcnt = nsupc * (nsupc - 1) + 2 * nrow * nsupc; //sj
	Gstat->procstat[pnum].fcops += flopcnt;

/*	ops[TRSV] += nsupc * (nsupc - 1);
	ops[GEMV] += 2 * nrow * nsupc;    */
	
#ifdef USE_VENDOR_BLAS
	alpha = none; beta = one; /* y := beta*y + alpha*A*x */
#if ( MACH==CRAY_PVP )
	CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
	CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#else
	ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
	       &nsupr, &lusup[ufirst], &incx );
	cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
	       &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
#endif
#else
	clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );

	cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
		 &lusup[ufirst], tempv );
	
        /* Copy updates from tempv[*] into lusup[*] */
	isub = ufirst + nsupc;
	for (i = 0; i < nrow; i++) {
            c_sub(&lusup[isub], &lusup[isub], &tempv[i]);
            tempv[i] = zero;
	    ++isub;
	}
#endif
    } /* if fst_col < jcol ... */ 

    return 0;
}
Ejemplo n.º 27
0
int main(int argc, char *argv[])
{
    void cmatvec_mult(complex alpha, complex x[], complex beta, complex y[]);
    void cpsolve(int n, complex x[], complex y[]);
    extern int cfgmr( int n,
	void (*matvec_mult)(complex, complex [], complex, complex []),
	void (*psolve)(int n, complex [], complex[]),
	complex *rhs, complex *sol, double tol, int restrt, int *itmax,
	FILE *fits);
    extern int cfill_diag(int n, NCformat *Astore);

    char     equed[1] = {'B'};
    yes_no_t equil;
    trans_t  trans;
    SuperMatrix A, L, U;
    SuperMatrix B, X;
    NCformat *Astore;
    NCformat *Ustore;
    SCformat *Lstore;
    complex   *a;
    int      *asub, *xa;
    int      *etree;
    int      *perm_c; /* column permutation vector */
    int      *perm_r; /* row permutations from partial pivoting */
    int      nrhs, ldx, lwork, info, m, n, nnz;
    complex   *rhsb, *rhsx, *xact;
    complex   *work = NULL;
    float   *R, *C;
    float   u, rpg, rcond;
    complex zero = {0.0, 0.0};
    complex one = {1.0, 0.0};
    complex none = {-1.0, 0.0};
    mem_usage_t   mem_usage;
    superlu_options_t options;
    SuperLUStat_t stat;

    int restrt, iter, maxit, i;
    double resid;
    complex *x, *b;

#ifdef DEBUG
    extern int num_drop_L, num_drop_U;
#endif

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Enter main()");
#endif

    /* Defaults */
    lwork = 0;
    nrhs  = 1;
    equil = YES;
    u	  = 0.1; /* u=1.0 for complete factorization */
    trans = NOTRANS;

    /* Set the default input options:
	options.Fact = DOFACT;
	options.Equil = YES;
	options.ColPerm = COLAMD;
	options.DiagPivotThresh = 0.1; //different from complete LU
	options.Trans = NOTRANS;
	options.IterRefine = NOREFINE;
	options.SymmetricMode = NO;
	options.PivotGrowth = NO;
	options.ConditionNumber = NO;
	options.PrintStat = YES;
	options.RowPerm = LargeDiag;
	options.ILU_DropTol = 1e-4;
	options.ILU_FillTol = 1e-2;
	options.ILU_FillFactor = 10.0;
	options.ILU_DropRule = DROP_BASIC | DROP_AREA;
	options.ILU_Norm = INF_NORM;
	options.ILU_MILU = SMILU_2;
     */
    ilu_set_default_options(&options);

    /* Modify the defaults. */
    options.PivotGrowth = YES;	  /* Compute reciprocal pivot growth */
    options.ConditionNumber = YES;/* Compute reciprocal condition number */

    if ( lwork > 0 ) {
	work = SUPERLU_MALLOC(lwork);
	if ( !work ) ABORT("Malloc fails for work[].");
    }

    /* Read matrix A from a file in Harwell-Boeing format.*/
    if (argc < 2)
    {
	printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n"
		"-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n"
		"-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n"
		"-t -triplet:\n\t[INPUT] is a triplet format matrix.\n",
		argv[0]);
	return 0;
    }
    else
    {
	switch (argv[1][1])
	{
	    case 'H':
	    case 'h':
		printf("Input a Harwell-Boeing format matrix:\n");
		creadhb(&m, &n, &nnz, &a, &asub, &xa);
		break;
	    case 'R':
	    case 'r':
		printf("Input a Rutherford-Boeing format matrix:\n");
		creadrb(&m, &n, &nnz, &a, &asub, &xa);
		break;
	    case 'T':
	    case 't':
		printf("Input a triplet format matrix:\n");
		creadtriple(&m, &n, &nnz, &a, &asub, &xa);
		break;
	    default:
		printf("Unrecognized format.\n");
		return 0;
	}
    }

    cCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_C, SLU_GE);
    Astore = A.Store;
    cfill_diag(n, Astore);
    printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz);
    fflush(stdout);

    if ( !(rhsb = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[].");
    if ( !(rhsx = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[].");
    cCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_C, SLU_GE);
    cCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_C, SLU_GE);
    xact = complexMalloc(n * nrhs);
    ldx = n;
    cGenXtrue(n, nrhs, xact, ldx);
    cFillRHS(trans, nrhs, xact, ldx, &A, &B);

    if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[].");
    if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[].");
    if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[].");
    if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) )
	ABORT("SUPERLU_MALLOC fails for R[].");
    if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) )
	ABORT("SUPERLU_MALLOC fails for C[].");

    info = 0;
#ifdef DEBUG
    num_drop_L = 0;
    num_drop_U = 0;
#endif

    /* Initialize the statistics variables. */
    StatInit(&stat);

    /* Compute the incomplete factorization and compute the condition number
       and pivot growth using dgsisx. */
    cgsisx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work,
	   lwork, &B, &X, &rpg, &rcond, &mem_usage, &stat, &info);

    Lstore = (SCformat *) L.Store;
    Ustore = (NCformat *) U.Store;
    printf("cgsisx(): info %d\n", info);
    if (info > 0 || rcond < 1e-8 || rpg > 1e8)
	printf("WARNING: This preconditioner might be unstable.\n");

    if ( info == 0 || info == n+1 ) {

	if ( options.PivotGrowth == YES )
	    printf("Recip. pivot growth = %e\n", rpg);
	if ( options.ConditionNumber == YES )
	    printf("Recip. condition number = %e\n", rcond);

    } else if ( info > 0 && lwork == -1 ) {
	printf("** Estimated memory: %d bytes\n", info - n);
    }
    printf("n(A) = %d, nnz(A) = %d\n", n, Astore->nnz);
    printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
    printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
    printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
    printf("Fill ratio: nnz(F)/nnz(A) = %.3f\n",
	    ((double)(Lstore->nnz) + (double)(Ustore->nnz) - (double)n)
	    / (double)Astore->nnz);
    printf("L\\U MB %.3f\ttotal MB needed %.3f\n",
	   mem_usage.for_lu/1e6, mem_usage.total_needed/1e6);
    fflush(stdout);

    /* Set the global variables. */
    GLOBAL_A = &A;
    GLOBAL_L = &L;
    GLOBAL_U = &U;
    GLOBAL_STAT = &stat;
    GLOBAL_PERM_C = perm_c;
    GLOBAL_PERM_R = perm_r;

    /* Set the variables used by GMRES. */
    restrt = SUPERLU_MIN(n / 3 + 1, 50);
    maxit = 1000;
    iter = maxit;
    resid = 1e-8;
    if (!(b = complexMalloc(m))) ABORT("Malloc fails for b[].");
    if (!(x = complexMalloc(n))) ABORT("Malloc fails for x[].");
    sp_cgemv("N", one, &A, xact, 1, zero, b, 1);

    if (info <= n + 1)
    {
	int i_1 = 1;
	double maxferr = 0.0, nrmA, nrmB, res, t;
        complex temp;
	extern float scnrm2_(int *, complex [], int *);
	extern void caxpy_(int *, complex *, complex [], int *, complex [], int *);

	/* Call GMRES. */
	/*double *sol = (double*) ((DNformat*) X.Store)->nzval;
	for (i = 0; i < n; i++) x[i] = sol[i];*/
	for (i = 0; i < n; i++) x[i] = zero;

	t = SuperLU_timer_();

	cfgmr(n, cmatvec_mult, cpsolve, b, x, resid, restrt, &iter, stdout);

	t = SuperLU_timer_() - t;

	/* Output the result. */
	nrmA = scnrm2_(&(Astore->nnz), (complex *)((DNformat *)A.Store)->nzval,
		&i_1);
	nrmB = scnrm2_(&m, b, &i_1);
	sp_cgemv("N", none, &A, x, 1, one, b, 1);
	res = scnrm2_(&m, b, &i_1);
	resid = res / nrmB;
	printf("||A||_F = %.1e, ||B||_2 = %.1e, ||B-A*X||_2 = %.1e, "
		"relres = %.1e\n", nrmA, nrmB, res, resid);

	if (iter >= maxit)
	{
	    if (resid >= 1.0) iter = -180;
	    else if (resid > 1e-8) iter = -111;
	}
	printf("iteration: %d\nresidual: %.1e\nGMRES time: %.2f seconds.\n",
		iter, resid, t);

	for (i = 0; i < m; i++)
            c_sub(&temp, &x[i], &xact[i]);
            maxferr = SUPERLU_MAX(maxferr, c_abs1(&temp));
	printf("||X-X_true||_oo = %.1e\n", maxferr);
    }
#ifdef DEBUG
    printf("%d entries in L and %d entries in U dropped.\n",
	    num_drop_L, num_drop_U);
#endif
    fflush(stdout);

    if ( options.PrintStat ) StatPrint(&stat);
    StatFree(&stat);

    SUPERLU_FREE (rhsb);
    SUPERLU_FREE (rhsx);
    SUPERLU_FREE (xact);
    SUPERLU_FREE (etree);
    SUPERLU_FREE (perm_r);
    SUPERLU_FREE (perm_c);
    SUPERLU_FREE (R);
    SUPERLU_FREE (C);
    Destroy_CompCol_Matrix(&A);
    Destroy_SuperMatrix_Store(&B);
    Destroy_SuperMatrix_Store(&X);
    if ( lwork >= 0 ) {
	Destroy_SuperNode_Matrix(&L);
	Destroy_CompCol_Matrix(&U);
    }
    SUPERLU_FREE(b);
    SUPERLU_FREE(x);

#if ( DEBUGlevel>=1 )
    CHECK_MALLOC("Exit main()");
#endif

    return 0;
}
Ejemplo n.º 28
0
void polydiv (header *hd)
{	header *st=hd,*hd1,*result,*rest;
	int c1,c2,i,r,j;
	double *m1,*m2,*mr,*mh,x,l;
	complex *mc1,*mc2,*mcr,*mch,xc,lc,hc;
	interval *mi1,*mi2,*mir,*mih,xi,li,hi;
	hd1=next_param(st);
	equal_params_2(&hd,&hd1); if (error) return;
	getmatrix(hd,&r,&c1,&m1); if (r!=1) wrong_arg();
	getmatrix(hd1,&r,&c2,&m2); if (r!=1) wrong_arg();
	if (c1<c2)
	{	result=new_real(0.0,"");
		rest=(header *)newram;
		moveresult(rest,hd1);
	}
	else if (iscomplex(hd))
	{	mc1=(complex *)m1; mc2=(complex *)m2;
		result=new_cmatrix(1,c1-c2+1,""); if (error) return;
		mcr=(complex *)matrixof(result);
		rest=new_cmatrix(1,c2,""); if (error) return;
		mch=(complex *)newram;
		if (!freeram(c1*sizeof(complex)))
		{	output("Out of memory!\n"); error=190; return;
		}
		memmove((char *)mch,(char *)mc1,c1*sizeof(complex));
		c_copy(lc,mc2[c2-1]);
		if (lc[0]==0.0 && lc[1]==0.0) wrong_arg();
		for (i=c1-c2; i>=0; i--)
		{	c_div(mch[c2+i-1],lc,xc); c_copy(mcr[i],xc);
			for(j=0; j<c2; j++)
			{	c_mult(mc2[j],xc,hc);
				c_sub(mch[i+j],hc,mch[i+j]);
			}
		}
		memmove((char *)matrixof(rest),(char *)mch,c2*sizeof(complex));
	}
	else if (isinterval(hd))
	{	mi1=(interval *)m1; mi2=(interval *)m2;
		result=new_imatrix(1,c1-c2+1,""); if (error) return;
		mir=(interval *)matrixof(result);
		rest=new_imatrix(1,c2,""); if (error) return;
		mih=(complex *)newram;
		if (!freeram(c1*sizeof(complex)))
		{	output("Out of memory!\n"); error=190; return;
		}
		memmove((char *)mih,(char *)mi1,c1*sizeof(interval));
		i_copy(li,mi2[c2-1]);
		if (li[0]<=0.0 && li[1]>=0.0) wrong_arg();
		for (i=c1-c2; i>=0; i--)
		{	i_div(mih[c2+i-1],li,xi); c_copy(mir[i],xi);
			for(j=0; j<c2; j++)
			{	i_mult(mi2[j],xi,hi);
				i_sub(mih[i+j],hi,mih[i+j]);
			}
		}
		memmove((char *)matrixof(rest),(char *)mih,c2*sizeof(interval));
	}
	else if (isreal(hd))
	{	result=new_matrix(1,c1-c2+1,""); if (error) return;
		mr=matrixof(result);
		rest=new_matrix(1,c2,""); if (error) return;
		mh=(double *)newram;
		if (!freeram(c1*sizeof(double)))
		{	output("Out of memory!\n"); error=190; return;
		}
		memmove((char *)mh,(char *)m1,c1*sizeof(double));
		l=m2[c2-1];
		if (l==0.0) wrong_arg();
		for (i=c1-c2; i>=0; i--)
		{	x=mh[c2+i-1]/l; mr[i]=x;
			for(j=0; j<c2; j++) mh[i+j]-=m2[j]*x;
		}
		memmove((char *)matrixof(rest),(char *)mh,c2*sizeof(double));
	}
	else wrong_arg();
	moveresult(st,result);
	moveresult(nextof(st),rest);
}
Ejemplo n.º 29
0
     void recalculate (int plane, number_t * x1, number_t * y1)
{
    number_t x = *x1, y = *y1;
    switch (plane) {
        case 1:
            {                   /* 1/mu */
                number_t t;
                if (myabs (x) + myabs (y) < 0.000001)
                    t = INT_MAX, y = INT_MAX;
                else {

                    c_div (1, 0, x, y, t, y);
                }
                x = t;
            }
            break;
        case 2:
            {                   /* 1/(mu + 0.25) */
                number_t t;
                if (myabs (x) + myabs (y) < 0.000001)
                    t = INT_MAX, y = INT_MAX;
                else {

                    c_div (1, 0, x, y, t, y);
                }
                x = t;
                x += 0.25;
            }
            break;
        case 3:                /* lambda */
            {
                number_t tr, ti, mr, mi;

                mr = x, mi = y;
                c_pow2 (x, y, tr, ti);
                c_div (tr, ti, 4, 0, x, y);
                c_div (mr, mi, 2, 0, tr, ti);
                c_sub (tr, ti, x, y, mr, mi);
                x = mr, y = mi;
            }
            break;
        case 4:                /* 1/lambda */
            {
                number_t tr, ti, mr, mi;

                c_div (1, 0, x, y, tr, y);
                x = tr;
                mr = x, mi = y;
                c_pow2 (x, y, tr, ti);
                c_div (tr, ti, 4, 0, x, y);
                c_div (mr, mi, 2, 0, tr, ti);
                c_sub (tr, ti, x, y, mr, mi);
                x = mr, y = mi;
            }
            break;
        case 5:                /* 1/(lambda-1) */
            {
                number_t tr, ti, mr, mi;

                c_div (1, 0, x, y, tr, y);
                x = tr + 1;
                mr = x, mi = y;
                c_pow2 (x, y, tr, ti);
                c_div (tr, ti, 4, 0, x, y);
                c_div (mr, mi, 2, 0, tr, ti);
                c_sub (tr, ti, x, y, mr, mi);
                x = mr, y = mi;
            }
            break;
        case 6:
            {                   /* 1/(mu + 0.25) */
                number_t t;
                if (myabs (x) + myabs (y) < 0.000001)
                    t = INT_MAX, y = INT_MAX;
                else {

                    c_div (1, 0, x, y, t, y);
                }
                x = t;
                x -= 1.40115;
            }
            break;
        default:
            break;
    }
    *x1 = x;
    *y1 = y;
}