mlib_status
mlib_VectorConjSymExt_S32C_S32C_Sat_N(
	mlib_s32 *zz,
	const mlib_s32 *xx,
	mlib_s32 n,
	mlib_s32 n1)
{
	CHECK;

	const mlib_s32 *px = xx;
	mlib_s32 *pz = zz;
	mlib_s32 *pd = zz + n1 + n1;

	mlib_s32 i, ax, az, n2, n3, nstep, c, c0, c1;
	__m128i xbuf, zbuf, mask1, mask2, mask3;
	mask1 = _mm_setr_epi32(0, 0xffffffff, 0, 0xffffffff);
	mask2 = _mm_setr_epi32(0, 0x80000000, 0, 0x80000000);

	ax = (mlib_addr)px & 15;
	az = (mlib_addr)pz & 15;
	nstep = 16 / sizeof (mlib_s32);

	if (ax & 7) {
		for (i = 0; i < n / 2; i++) {
			CONJ_S32C(loadu, storeu);
		}
		for (i = 0; i < n % 2; i++) {
			CONJ(S32);
		}
	} else {
		n1 = ((16 - ax) & 15) / sizeof (mlib_s32);
		n2 = (n + n - n1) / nstep;
		n3 = n + n - n1 - n2 * nstep;

		if (n2 < 1) {
			for (i = 0; i < n; i++) {
				CONJ(S32);
			}
		} else {
			for (i = 0; i < n1 / 2; i++) {
				CONJ(S32);
			}

			if (ax == az) {
				for (i = 0; i < n2; i++) {
					CONJ_S32C(loadu, storeu);
				}
			} else {
				for (i = 0; i < n2; i++) {
					CONJ_S32C(loadu, storeu);
				}
			}
			for (i = 0; i < n3 / 2; i++) {
				CONJ(S32);
			}
		}
	}

	return (MLIB_SUCCESS);
}
Beispiel #2
0
mlib_status
__mlib_VectorConj_S16C_S16C_Sat(
	mlib_s16 *zz,
	const mlib_s16 *xx,
	mlib_s32 n)
{
	CHECK;

	const mlib_s16 *px = xx;
	mlib_s16 *pz = zz;

	mlib_s32 i, ax, az, n1, n2, n3, nstep, c;
	__m128i xbuf, zbuf, mask1, mask2, mask3;
	mask1 = _mm_set1_epi32(0xffff0000);
	mask2 = _mm_set1_epi32(0x80000000);

	ax = (mlib_addr)xx & 15;
	az = (mlib_addr)zz & 15;
	nstep = 16 / sizeof (mlib_s16);

	if (ax & 3) {
		for (i = 0; i < n / 4; i++) {
			CONJ_S16C(loadu, storeu);
		}
		for (i = 0; i < n % 4; i++) {
			CONJ(S16);
		}
	} else {
		n1 = ((16 - ax) & 15) / sizeof (mlib_s16);
		n2 = (n + n - n1) / nstep;
		n3 = n + n - n1 - n2 * nstep;

		if (n2 < 1) {
			for (i = 0; i < n; i++) {
				CONJ(S16);
			}
		} else {
			for (i = 0; i < n1 / 2; i++) {
				CONJ(S16);
			}

			if (ax == az) {
				for (i = 0; i < n2; i++) {
					CONJ_S16C(loadu, storeu);
				}
			} else {
				for (i = 0; i < n2; i++) {
					CONJ_S16C(loadu, storeu);
				}
			}
			for (i = 0; i < n3 / 2; i++) {
				CONJ(S16);
			}
		}
	}

	return (MLIB_SUCCESS);
}
Beispiel #3
0
/** Computes the weighted inner/dot product \f$x^H (w2\odot w2 \odot x)\f$. */
R Y(dot_w2_complex)(C *x, R *w2, INT n)
{
  INT k;
  R dot;

  for (k = 0, dot = K(0.0); k < n; k++)
    dot+=w2[k]*w2[k]*CONJ(x[k])*x[k];

  return dot;
}
Beispiel #4
0
/** Computes the inner/dot product \f$x^H x\f$. */
R Y(dot_complex)(C *x, INT n)
{
  INT k;
  R dot;

  for (k = 0, dot = K(0.0); k < n; k++)
    dot += CONJ(x[k])*x[k];

  return dot;
}
Beispiel #5
0
STATIC int solve_H_Ref_Sprimme(SCALAR *H, int ldH, SCALAR *hVecs, int ldhVecs,
      SCALAR *hU, int ldhU, REAL *hSVals, SCALAR *R, int ldR, SCALAR *QtQ,
      int ldQtQ, SCALAR *VtBV, int ldVtBV, EVAL *hVals, int basisSize,
      int targetShiftIndex, primme_context ctx) {

   primme_params *primme = ctx.primme;
   int i, j; /* Loop variables    */

   (void)targetShiftIndex; /* unused parameter */

   /* Some LAPACK implementations don't like zero-size matrices */
   if (basisSize == 0) return 0;

   /* Copy R into hVecs */
   Num_copy_matrix_Sprimme(R, basisSize, basisSize, ldR, hVecs, ldhVecs, ctx);

   if (QtQ) {
      /* Factorize QtQ */
      Num_copy_matrix_Sprimme(
            QtQ, basisSize, basisSize, ldQtQ, hU, ldhU, ctx);
      CHKERR(Num_potrf_Sprimme("U", basisSize, hU, ldhU, NULL, ctx));
      CHKERR(Num_trmm_Sprimme("L", "U", "N", "N", basisSize, basisSize, 1.0, hU,
            ldhU, hVecs, ldhVecs, ctx));
   }

   SCALAR *U_VtBV=NULL; /* Cholesky factor of VtBV */
   if (VtBV) {
      CHKERR(Num_malloc_Sprimme(basisSize*basisSize, &U_VtBV, ctx));
      Num_copy_matrix_Sprimme(
            VtBV, basisSize, basisSize, ldVtBV, U_VtBV, basisSize, ctx);
      CHKERR(Num_potrf_Sprimme("U", basisSize, U_VtBV, basisSize, NULL, ctx));
      CHKERR(Num_trsm_Sprimme("R", "U", "N", "N", basisSize, basisSize, 1.0,
            U_VtBV, basisSize, hVecs, basisSize, ctx));
   }

   /* Note gesvd returns transpose(V) rather than V and sorted in descending */
   /* order of the singular values */

   CHKERR(Num_gesvd_Sprimme("S", "O", basisSize, basisSize, hVecs, ldhVecs,
         hSVals, hU, ldhU, hVecs, ldhVecs, ctx));

   /* Transpose back V */

   SCALAR *rwork;
   CHKERR(Num_malloc_Sprimme((size_t)basisSize * basisSize, &rwork, ctx));
   for (j = 0; j < basisSize; j++) {
      for (i = 0; i < basisSize; i++) {
         rwork[basisSize * j + i] = CONJ(hVecs[ldhVecs * i + j]);
      }
   }
   Num_copy_matrix_Sprimme(
         rwork, basisSize, basisSize, basisSize, hVecs, ldhVecs, ctx);

   if (VtBV) {
      CHKERR(Num_trsm_Sprimme("L", "U", "N", "N", basisSize, basisSize, 1.0,
            U_VtBV, basisSize, hVecs, ldhVecs, ctx));
      CHKERR(Num_free_Sprimme(U_VtBV, ctx));
   }

   /* Rearrange V, hSVals and hU in ascending order of singular value   */
   /* if target is not largest abs.                                     */

   if (primme->target == primme_closest_abs ||
         primme->target == primme_closest_leq ||
         primme->target == primme_closest_geq) {
      int *perm;
      CHKERR(Num_malloc_iprimme(basisSize, &perm, ctx));
      for (i = 0; i < basisSize; i++) perm[i] = basisSize - 1 - i;
      permute_vecs_Rprimme(hSVals, 1, basisSize, 1, perm, ctx);
      permute_vecs_Sprimme(hVecs, basisSize, basisSize, ldhVecs, perm, ctx);
      permute_vecs_Sprimme(hU, basisSize, basisSize, ldhU, perm, ctx);
      CHKERR(Num_free_iprimme(perm, ctx));
   }

   /* compute Rayleigh quotient lambda_i = x_i'*H*x_i */

   Num_hemm_Sprimme("L", "U", basisSize, basisSize, 1.0, H,
      ldH, hVecs, ldhVecs, 0.0, rwork, basisSize);

   for (i=0; i<basisSize; i++) {
      hVals[i] = KIND(REAL_PART, )(Num_dot_Sprimme(
            basisSize, &hVecs[ldhVecs * i], 1, &rwork[basisSize * i], 1, ctx));
   }
   CHKERR(Num_free_Sprimme(rwork, ctx));

   return 0;
}
Beispiel #6
0
    static void BlockHessenberg(
        MatrixView<T> A, VectorView<T> Ubeta)
    {
        // Much like the block version of Bidiagonalize, we try to maintain
        // the operation of several successive Householder matrices in
        // a block form, where the net Block Householder is I - YZYt.
        //
        // But as with the bidiagonlization algorithm (and unlike a simple
        // block QR decomposition), we update the matrix from both the left 
        // and the right, so we also need to keep track of the product
        // ZYtm in addition.
        //
        // The block update at the end of the block loop is
        // m' = (I-YZYt) m (I-YZtYt)
        //
        // The Y matrix is stored in the first K columns of m,
        // and the Hessenberg portion of these columns is updated as we go.
        // For the right-hand-side update, m -= mYZtYt, the m on the right
        // needs to be the full original matrix m, including the original
        // versions of these K columns.  Therefore, we can't wait until 
        // the end for this calculation.  
        //
        // Instead, we keep track of mYZt as we progress, so the final update
        // is:
        //
        // m' = (I-YZYt) (m - mYZt Y)
        //
        // We also need to do this same calculation for each column as we
        // progress through the block.
        //
        const ptrdiff_t N = A.rowsize();

#ifdef XDEBUG
        Matrix<T> A0(A);
#endif

        TMVAssert(A.rowsize() == A.colsize());
        TMVAssert(N > 0);
        TMVAssert(Ubeta.size() == N-1);
        TMVAssert(!Ubeta.isconj());
        TMVAssert(Ubeta.step()==1);

        ptrdiff_t ncolmax = MIN(HESS_BLOCKSIZE,N-1);
        Matrix<T,RowMajor> mYZt_full(N,ncolmax);
        UpperTriMatrix<T,NonUnitDiag|ColMajor> Z_full(ncolmax);

        T det(0); // Ignore Householder Determinant calculations
        T* Uj = Ubeta.ptr();
        for(ptrdiff_t j1=0;j1<N-1;) {
            ptrdiff_t j2 = MIN(N-1,j1+HESS_BLOCKSIZE);
            ptrdiff_t ncols = j2-j1;
            MatrixView<T> mYZt = mYZt_full.subMatrix(0,N-j1,0,ncols);
            UpperTriMatrixView<T> Z = Z_full.subTriMatrix(0,ncols);

            for(ptrdiff_t j=j1,jj=0;j<j2;++j,++jj,++Uj) { // jj = j-j1

                // Update current column of A
                //
                // m' = (I - YZYt) (m - mYZt Yt)
                // A(0:N,j)' = A(0:N,j) - mYZt(0:N,0:j) Y(j,0:j)t
                A.col(j,j1+1,N) -= mYZt.Cols(0,j) * A.row(j,0,j).Conjugate();
                //
                // A(0:N,j)'' = A(0:N,j) - Y Z Yt A(0:N,j)'
                // 
                // Let Y = (L)     where L is unit-diagonal, lower-triangular,
                //         (M)     and M is rectangular
                //
                LowerTriMatrixView<T> L = 
                    LowerTriMatrixViewOf(A.subMatrix(j1+1,j+1,j1,j),UnitDiag);
                MatrixView<T> M = A.subMatrix(j+1,N,j1,j);
                // Use the last column of Z as temporary storage for Yt A(0:N,j)'
                VectorView<T> YtAj = Z.col(jj,0,jj);
                YtAj = L.adjoint() * A.col(j,j1+1,j+1);
                YtAj += M.adjoint() * A.col(j,j+1,N);
                YtAj = Z.subTriMatrix(0,jj) * YtAj;
                A.col(j,j1+1,j+1) -= L * YtAj;
                A.col(j,j+1,N) -= M * YtAj;

                // Do the Householder reflection 
                VectorView<T> u = A.col(j,j+1,N);
                T bu = Householder_Reflect(u,det);
#ifdef TMVFLDEBUG
                TMVAssert(Uj >= Ubeta._first);
                TMVAssert(Uj < Ubeta._last);
#endif
                *Uj = bu;

                // Save the top of the u vector, which isn't actually part of u
                T& Atemp = *u.cptr();
                TMVAssert(IMAG(Atemp) == RealType(T)(0));
                RealType(T) Aorig = REAL(Atemp);
                Atemp = RealType(T)(1);

                // Update Z
                VectorView<T> Zj = Z.col(jj,0,jj);
                Zj = -bu * M.adjoint() * u;
                Zj = Z * Zj;
                Z(jj,jj) = -bu;

                // Update mYtZt:
                //
                // mYZt(0:N,j) = m(0:N,0:N) Y(0:N,0:j) Zt(0:j,j)
                //             = m(0:N,j+1:N) Y(j+1:N,j) Zt(j,j)
                //             = bu* m(0:N,j+1:N) u 
                //
                mYZt.col(jj) = CONJ(bu) * A.subMatrix(j1,N,j+1,N) * u;

                // Restore Aorig, which is actually part of the Hessenberg matrix.
                Atemp = Aorig;
            }

            // Update the rest of the matrix:
            // A(j2,j2-1) needs to be temporarily changed to 1 for use in Y
            T& Atemp = *(A.ptr() + j2*A.stepi() + (j2-1)*A.stepj());
            TMVAssert(IMAG(Atemp) == RealType(T)(0));
            RealType(T) Aorig = Atemp;
            Atemp = RealType(T)(1);

            // m' = (I-YZYt) (m - mYZt Y)
            MatrixView<T> m = A.subMatrix(j1,N,j2,N);
            ConstMatrixView<T> Y = A.subMatrix(j2+1,N,j1,j2);
            m -= mYZt * Y.adjoint();
            BlockHouseholder_LMult(Y,Z,m);

            // Restore A(j2,j2-1)
            Atemp = Aorig;
            j1 = j2;
        }

#ifdef XDEBUG
        Matrix<T> U(N,N,T(0));
        U.subMatrix(1,N,1,N) = A.subMatrix(1,N,0,N-1);
        U.upperTri().setZero();
        U(0,0) = T(1);
        Vector<T> Ubeta2(N);
        Ubeta2.subVector(1,N) = Ubeta;
        Ubeta2(0) = T(0);
        GetQFromQR(U.view(),Ubeta2);
        Matrix<T> H = A;
        if (N>2) LowerTriMatrixViewOf(H).offDiag(2).setZero();
        Matrix<T> AA = U*H*U.adjoint();
        if (Norm(A0-AA) > 0.001*Norm(A0)) {
            cerr<<"NonBlock Hessenberg: A = "<<Type(A)<<"  "<<A0<<endl;
            cerr<<"A = "<<A<<endl;
            cerr<<"Ubeta = "<<Ubeta<<endl;
            cerr<<"U = "<<U<<endl;
            cerr<<"H = "<<H<<endl;
            cerr<<"UHUt = "<<AA<<endl;
            Matrix<T,ColMajor> A2 = A0;
            Vector<T> Ub2(Ubeta.size());
            NonBlockHessenberg(A2.view(),Ub2.view());
            cerr<<"cf NonBlock: A -> "<<A2<<endl;
            cerr<<"Ubeta = "<<Ub2<<endl;
            abort();
        }
#endif
    }
Beispiel #7
0
int main (void)
{
  TESTIT_COMPLEX_ALLNEG (csin, 0,
			 0, -0.F,
			 CONJ(0), CONJ(-0.F));
  TESTIT_COMPLEX_R_ALLNEG (csin, 3.45678F + 2.34567FI,
			   -1.633059F - 4.917448FI, 1.633059F - 4.917448FI,
			   -1.633059F + 4.917448FI, 1.633059F + 4.917448FI);

  TESTIT_COMPLEX_ALLNEG (ccos, 0,
			 CONJ(1), 1, 1, CONJ(1));
  TESTIT_COMPLEX_R_ALLNEG (ccos, 3.45678F + 2.34567FI,
			   -5.008512F + 1.603367FI, -5.008512F - 1.603367FI,
			   -5.008512F - 1.603367FI, -5.008512F + 1.603367FI);

  TESTIT_COMPLEX_ALLNEG (ctan, 0,
			 0, -0.F, CONJ(0), CONJ(-0.F));
  TESTIT_COMPLEX_R_ALLNEG (ctan, 3.45678F + 2.34567FI,
			   0.010657F + 0.985230FI, -0.010657F + 0.985230FI,
			   0.010657F - 0.985230FI, -0.010657F - 0.985230FI);
  
  TESTIT_COMPLEX_ALLNEG (csinh, 0,
			 0, -0.F, CONJ(0), CONJ(-0.F));
  TESTIT_COMPLEX_R_ALLNEG (csinh, 3.45678F + 2.34567FI,
			   -11.083178F + 11.341487FI, 11.083178F +11.341487FI,
			   -11.083178F - 11.341487FI, 11.083178F -11.341487FI);
  
  TESTIT_COMPLEX_ALLNEG (ccosh, 0,
			 1, CONJ(1), CONJ(1), 1);
  TESTIT_COMPLEX_R_ALLNEG (ccosh, 3.45678F + 2.34567FI,
			   -11.105238F + 11.318958FI,-11.105238F -11.318958FI,
			   -11.105238F - 11.318958FI,-11.105238F +11.318958FI);
  
  TESTIT_COMPLEX_ALLNEG (ctanh, 0,
			 0, -0.F, CONJ(0), CONJ(-0.F));
  TESTIT_COMPLEX_R_ALLNEG (ctanh, 3.45678F + 2.34567FI,
			   1.000040F - 0.001988FI, -1.000040F - 0.001988FI,
			   1.000040F + 0.001988FI, -1.000040F + 0.001988FI);

  TESTIT_COMPLEX (clog, 1, 0);
  TESTIT_COMPLEX_R (clog, -1, 3.141593FI);
  TESTIT_COMPLEX (clog, CONJ(1), CONJ(0)); /* Fails with mpc-0.6.  */
  TESTIT_COMPLEX_R (clog, CONJ(-1), CONJ(3.141593FI)); /* Fails with mpc-0.6.  */
  TESTIT_COMPLEX_R_ALLNEG (clog, 3.45678F + 2.34567FI,
			   1.429713F + 0.596199FI, 1.429713F + 2.545394FI,
			   1.429713F - 0.596199FI, 1.429713F - 2.545394FI);

  TESTIT_COMPLEX_ALLNEG (csqrt, 0,
			 0, 0, CONJ(0), CONJ(0));
  TESTIT_COMPLEX_R_ALLNEG (csqrt, 3.45678F + 2.34567FI,
			   1.953750F + 0.600299FI, 0.600299F + 1.953750FI,
			   1.953750F - 0.600299FI, 0.600299F - 1.953750FI);
  
  TESTIT_COMPLEX2_ALLNEG (cpow, 1, 0,
			  1, 1, CONJ(1), CONJ(1), 1, CONJ(1), 1, 1,
			  CONJ(1), CONJ(1), 1, 1, 1, 1, CONJ(1), 1);
  TESTIT_COMPLEX2_ALLNEG (cpow, 1.FI, 0,
			  1, 1, CONJ(1), 1, 1, CONJ(1), 1, 1,
			  1, CONJ(1), 1, 1, 1, 1, CONJ(1), 1);
  TESTIT_COMPLEX_R2_ALLNEG (cpow, 2, 3,
			    8, 8, CONJ(1/8.F), 1/8.F, CONJ(-8), -8, -1/8.F, -1/8.F,
			    8, CONJ(8), 1/8.F, 1/8.F, -8, -8, -1/8.F, CONJ(-1/8.F));
  TESTIT_COMPLEX_R2_ALLNEG (cpow, 3, 4,
			    81, 81, CONJ(1/81.F), 1/81.F, 81, 81, CONJ(1/81.F), 1/81.F,
			    81, CONJ(81), 1/81.F, 1/81.F, 81, CONJ(81), 1/81.F, 1/81.F);
  TESTIT_COMPLEX_R2_ALLNEG (cpow, 3, 5,
			    243, 243, CONJ(1/243.F), 1/243.F, CONJ(-243), -243, -1/243.F, -1/243.F,
			    243, CONJ(243), 1/243.F, 1/243.F, -243, -243, -1/243.F, CONJ(-1/243.F));
  TESTIT_COMPLEX_R2_ALLNEG (cpow, 4, 2,
			    16, 16, CONJ(1/16.F), 1/16.F, 16, 16, CONJ(1/16.F), 1/16.F,
			    16, CONJ(16), 1/16.F, 1/16.F, 16, CONJ(16), 1/16.F, 1/16.F);
  TESTIT_COMPLEX_R2_ALLNEG (cpow, 1.5, 3,
			    3.375F, 3.375F, CONJ(1/3.375F), 1/3.375F, CONJ(-3.375F), -3.375F, -1/3.375F, -1/3.375F,
			    3.375F, CONJ(3.375F), 1/3.375F, 1/3.375F, -3.375F, -3.375F, -1/3.375F, CONJ(-1/3.375F));
  
  TESTIT_COMPLEX2 (cpow, 16, 0.25F, 2);

  TESTIT_COMPLEX_R2 (cpow, 3.45678F + 2.34567FI, 1.23456 + 4.56789FI, 0.212485F + 0.319304FI);
  TESTIT_COMPLEX_R2 (cpow, 3.45678F - 2.34567FI, 1.23456 + 4.56789FI, 78.576402F + -41.756208FI);
  TESTIT_COMPLEX_R2 (cpow, -1.23456F + 2.34567FI, 2.34567 - 1.23456FI, -110.629847F + -57.021655FI);
  TESTIT_COMPLEX_R2 (cpow, -1.23456F - 2.34567FI, 2.34567 - 1.23456FI, 0.752336F + 0.199095FI);
  
  return 0;
}
Beispiel #8
0
void KLU_utsolve
(
    /* inputs, not modified: */
    Int n,
    Int Uip [ ],
    Int Ulen [ ],
    Unit LU [ ],
    Entry Udiag [ ],
    Int nrhs,
#ifdef COMPLEX
    Int conj_solve,
#endif
    /* right-hand-side on input, solution to Ux=b on output */
    Entry X [ ]
)
{
    Entry x [4], uik, ukk ;
    Int k, p, len, i ;
    Int *Ui ;
    Entry *Ux ;

    switch (nrhs)
    {

        case 1:

            for (k = 0 ; k < n ; k++)
            {
                GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
                x [0] = X [k] ;
                for (p = 0 ; p < len ; p++)
                {
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        /* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */
                        MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ;
                    }
                    else
#endif
                    {
                        /* x [0] -= Ux [p] * X [Ui [p]] ; */
                        MULT_SUB (x [0], Ux [p], X [Ui [p]]) ;
                    }
                }
#ifdef COMPLEX
                if (conj_solve)
                {
                    CONJ (ukk, Udiag [k]) ;
                }
                else
#endif
                {
                    ukk = Udiag [k] ;
                }
                DIV (X [k], x [0], ukk) ;
            }
            break ;

        case 2:

            for (k = 0 ; k < n ; k++)
            {
                GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
                x [0] = X [2*k    ] ;
                x [1] = X [2*k + 1] ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Ui [p] ;
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        CONJ (uik, Ux [p]) ;
                    }
                    else
#endif
                    {
                        uik = Ux [p] ;
                    }
                    MULT_SUB (x [0], uik, X [2*i]) ;
                    MULT_SUB (x [1], uik, X [2*i + 1]) ;
                }
#ifdef COMPLEX
                if (conj_solve)
                {
                    CONJ (ukk, Udiag [k]) ;
                }
                else
#endif
                {
                    ukk = Udiag [k] ;
                }
                DIV (X [2*k], x [0], ukk) ;
                DIV (X [2*k + 1], x [1], ukk) ;
            }
            break ;

        case 3:

            for (k = 0 ; k < n ; k++)
            {
                GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
                x [0] = X [3*k    ] ;
                x [1] = X [3*k + 1] ;
                x [2] = X [3*k + 2] ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Ui [p] ;
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        CONJ (uik, Ux [p]) ;
                    }
                    else
#endif
                    {
                        uik = Ux [p] ;
                    }
                    MULT_SUB (x [0], uik, X [3*i]) ;
                    MULT_SUB (x [1], uik, X [3*i + 1]) ;
                    MULT_SUB (x [2], uik, X [3*i + 2]) ;
                }
#ifdef COMPLEX
                if (conj_solve)
                {
                    CONJ (ukk, Udiag [k]) ;
                }
                else
#endif
                {
                    ukk = Udiag [k] ;
                }
                DIV (X [3*k], x [0], ukk) ;
                DIV (X [3*k + 1], x [1], ukk) ;
                DIV (X [3*k + 2], x [2], ukk) ;
            }
            break ;

        case 4:

            for (k = 0 ; k < n ; k++)
            {
                GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ;
                x [0] = X [4*k    ] ;
                x [1] = X [4*k + 1] ;
                x [2] = X [4*k + 2] ;
                x [3] = X [4*k + 3] ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Ui [p] ;
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        CONJ (uik, Ux [p]) ;
                    }
                    else
#endif
                    {
                        uik = Ux [p] ;
                    }
                    MULT_SUB (x [0], uik, X [4*i]) ;
                    MULT_SUB (x [1], uik, X [4*i + 1]) ;
                    MULT_SUB (x [2], uik, X [4*i + 2]) ;
                    MULT_SUB (x [3], uik, X [4*i + 3]) ;
                }
#ifdef COMPLEX
                if (conj_solve)
                {
                    CONJ (ukk, Udiag [k]) ;
                }
                else
#endif
                {
                    ukk = Udiag [k] ;
                }
                DIV (X [4*k], x [0], ukk) ;
                DIV (X [4*k + 1], x [1], ukk) ;
                DIV (X [4*k + 2], x [2], ukk) ;
                DIV (X [4*k + 3], x [3], ukk) ;
            }
            break ;
    }
}
Beispiel #9
0
void KLU_ltsolve
(
    /* inputs, not modified: */
    Int n,
    Int Lip [ ],
    Int Llen [ ],
    Unit LU [ ],
    Int nrhs,
#ifdef COMPLEX
    Int conj_solve,
#endif
    /* right-hand-side on input, solution to L'x=b on output */
    Entry X [ ]
)
{
    Entry x [4], lik ;
    Int *Li ;
    Entry *Lx ;
    Int k, p, len, i ;

    switch (nrhs)
    {

        case 1:

            for (k = n-1 ; k >= 0 ; k--)
            {
                GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
                x [0] = X [k] ;
                for (p = 0 ; p < len ; p++)
                {
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        /* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */
                        MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ;
                    }
                    else
#endif
                    {
                        /*x [0] -= Lx [p] * X [Li [p]] ;*/
                        MULT_SUB (x [0], Lx [p], X [Li [p]]) ;
                    }
                }
                X [k] = x [0] ;
            }
            break ;

        case 2:

            for (k = n-1 ; k >= 0 ; k--)
            {
                x [0] = X [2*k    ] ;
                x [1] = X [2*k + 1] ;
                GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Li [p] ;
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        CONJ (lik, Lx [p]) ;
                    }
                    else
#endif
                    {
                        lik = Lx [p] ;
                    }
                    MULT_SUB (x [0], lik, X [2*i]) ;
                    MULT_SUB (x [1], lik, X [2*i + 1]) ;
                }
                X [2*k    ] = x [0] ;
                X [2*k + 1] = x [1] ;
            }
            break ;

        case 3:

            for (k = n-1 ; k >= 0 ; k--)
            {
                x [0] = X [3*k    ] ;
                x [1] = X [3*k + 1] ;
                x [2] = X [3*k + 2] ;
                GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Li [p] ;
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        CONJ (lik, Lx [p]) ;
                    }
                    else
#endif
                    {
                        lik = Lx [p] ;
                    }
                    MULT_SUB (x [0], lik, X [3*i]) ;
                    MULT_SUB (x [1], lik, X [3*i + 1]) ;
                    MULT_SUB (x [2], lik, X [3*i + 2]) ;
                }
                X [3*k    ] = x [0] ;
                X [3*k + 1] = x [1] ;
                X [3*k + 2] = x [2] ;
            }
            break ;

        case 4:

            for (k = n-1 ; k >= 0 ; k--)
            {
                x [0] = X [4*k    ] ;
                x [1] = X [4*k + 1] ;
                x [2] = X [4*k + 2] ;
                x [3] = X [4*k + 3] ;
                GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ;
                for (p = 0 ; p < len ; p++)
                {
                    i = Li [p] ;
#ifdef COMPLEX
                    if (conj_solve)
                    {
                        CONJ (lik, Lx [p]) ;
                    }
                    else
#endif
                    {
                        lik = Lx [p] ;
                    }
                    MULT_SUB (x [0], lik, X [4*i]) ;
                    MULT_SUB (x [1], lik, X [4*i + 1]) ;
                    MULT_SUB (x [2], lik, X [4*i + 2]) ;
                    MULT_SUB (x [3], lik, X [4*i + 3]) ;
                }
                X [4*k    ] = x [0] ;
                X [4*k + 1] = x [1] ;
                X [4*k + 2] = x [2] ;
                X [4*k + 3] = x [3] ;
            }
            break ;
    }
}
Beispiel #10
0
Int KLU_tsolve
(
    /* inputs, not modified */
    KLU_symbolic<Entry, Int> *Symbolic,
    KLU_numeric<Entry, Int> *Numeric,
    Int d,                  /* leading dimension of B */
    Int nrhs,               /* number of right-hand-sides */

    /* right-hand-side on input, overwritten with solution to Ax=b on output */
    double B [ ],           /* size n*nrhs, in column-oriented form, with
                             * leading dimension d. */
#ifdef COMPLEX
    Int conj_solve,         /* TRUE for conjugate transpose solve, FALSE for
                             * array transpose solve.  Used for the complex
                             * case only. */
#endif
    /* --------------- */
    KLU_common<Entry, Int> *Common
)
{
    Entry x [4], offik, s ;
    double rs, *Rs ;
    Entry *Offx, *X, *Bz, *Udiag ;
    Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
    Unit **LUbx ;
    Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    if (Common == NULL)
    {
        return (FALSE) ;
    }
    if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
        B == NULL)
    {
        Common->status = KLU_INVALID ;
        return (FALSE) ;
    }
    Common->status = KLU_OK ;

    /* ---------------------------------------------------------------------- */
    /* get the contents of the Symbolic object */
    /* ---------------------------------------------------------------------- */

    Bz = (Entry *) B ;
    n = Symbolic->n ;
    nblocks = Symbolic->nblocks ;
    Q = Symbolic->Q ;
    R = Symbolic->R ;

    /* ---------------------------------------------------------------------- */
    /* get the contents of the Numeric object */
    /* ---------------------------------------------------------------------- */

    ASSERT (nblocks == Numeric->nblocks) ;
    Pnum = Numeric->Pnum ;
    Offp = Numeric->Offp ;
    Offi = Numeric->Offi ;
    Offx = (Entry *) Numeric->Offx ;

    Lip  = Numeric->Lip ;
    Llen = Numeric->Llen ;
    Uip  = Numeric->Uip ;
    Ulen = Numeric->Ulen ;
    LUbx = (Unit **) Numeric->LUbx ;
    Udiag = (Entry *) Numeric->Udiag ;

    Rs = Numeric->Rs ;
    X = (Entry *) Numeric->Xwork ;
    ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;

    /* ---------------------------------------------------------------------- */
    /* solve in chunks of 4 columns at a time */
    /* ---------------------------------------------------------------------- */

    for (chunk = 0 ; chunk < nrhs ; chunk += 4)
    {

        /* ------------------------------------------------------------------ */
        /* get the size of the current chunk */
        /* ------------------------------------------------------------------ */

        nr = MIN (nrhs - chunk, 4) ;

        /* ------------------------------------------------------------------ */
        /* permute the right hand side, X = Q'*B */
        /* ------------------------------------------------------------------ */

        switch (nr)
        {

            case 1:

                for (k = 0 ; k < n ; k++)
                {
                    X [k] = Bz  [Q [k]] ;
                }
                break ;

            case 2:

                for (k = 0 ; k < n ; k++)
                {
                    i = Q [k] ;
                    X [2*k    ] = Bz [i      ] ;
                    X [2*k + 1] = Bz [i + d  ] ;
                }
                break ;

            case 3:

                for (k = 0 ; k < n ; k++)
                {
                    i = Q [k] ;
                    X [3*k    ] = Bz [i      ] ;
                    X [3*k + 1] = Bz [i + d  ] ;
                    X [3*k + 2] = Bz [i + d*2] ;
                }
                break ;

            case 4:

                for (k = 0 ; k < n ; k++)
                {
                    i = Q [k] ;
                    X [4*k    ] = Bz [i      ] ;
                    X [4*k + 1] = Bz [i + d  ] ;
                    X [4*k + 2] = Bz [i + d*2] ;
                    X [4*k + 3] = Bz [i + d*3] ;
                }
                break ;

        }

        /* ------------------------------------------------------------------ */
        /* solve X = (L*U + Off)'\X */
        /* ------------------------------------------------------------------ */

        for (block = 0 ; block < nblocks ; block++)
        {

            /* -------------------------------------------------------------- */
            /* the block of size nk is from rows/columns k1 to k2-1 */
            /* -------------------------------------------------------------- */

            k1 = R [block] ;
            k2 = R [block+1] ;
            nk = k2 - k1 ;
            PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;

            /* -------------------------------------------------------------- */
            /* block back-substitution for the off-diagonal-block entries */
            /* -------------------------------------------------------------- */

            if (block > 0)
            {
                switch (nr)
                    {

                    case 1:

                        for (k = k1 ; k < k2 ; k++)
                        {
                            pend = Offp [k+1] ;
                            for (p = Offp [k] ; p < pend ; p++)
                            {
#ifdef COMPLEX
                                if (conj_solve)
                                {
                                    MULT_SUB_CONJ (X [k], X [Offi [p]],
                                            Offx [p]) ;
                                }
                                else
#endif
                                {
                                    MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
                                }
                            }
                        }
                        break ;

                    case 2:

                        for (k = k1 ; k < k2 ; k++)
                        {
                            pend = Offp [k+1] ;
                            x [0] = X [2*k    ] ;
                            x [1] = X [2*k + 1] ;
                            for (p = Offp [k] ; p < pend ; p++)
                            {
                                i = Offi [p] ;
#ifdef COMPLEX
                                if (conj_solve)
                                {
                                    CONJ (offik, Offx [p]) ;
                                }
                                else
#endif
                                {
                                    offik = Offx [p] ;
                                }
                                MULT_SUB (x [0], offik, X [2*i]) ;
                                MULT_SUB (x [1], offik, X [2*i + 1]) ;
                            }
                            X [2*k    ] = x [0] ;
                            X [2*k + 1] = x [1] ;
                        }
                        break ;

                    case 3:

                        for (k = k1 ; k < k2 ; k++)
                        {
                            pend = Offp [k+1] ;
                            x [0] = X [3*k    ] ;
                            x [1] = X [3*k + 1] ;
                            x [2] = X [3*k + 2] ;
                            for (p = Offp [k] ; p < pend ; p++)
                            {
                                i = Offi [p] ;
#ifdef COMPLEX
                                if (conj_solve)
                                {
                                    CONJ (offik, Offx [p]) ;
                                }
                                else
#endif
                                {
                                    offik = Offx [p] ;
                                }
                                MULT_SUB (x [0], offik, X [3*i]) ;
                                MULT_SUB (x [1], offik, X [3*i + 1]) ;
                                MULT_SUB (x [2], offik, X [3*i + 2]) ;
                            }
                            X [3*k    ] = x [0] ;
                            X [3*k + 1] = x [1] ;
                            X [3*k + 2] = x [2] ;
                        }
                        break ;

                    case 4:

                        for (k = k1 ; k < k2 ; k++)
                        {
                            pend = Offp [k+1] ;
                            x [0] = X [4*k    ] ;
                            x [1] = X [4*k + 1] ;
                            x [2] = X [4*k + 2] ;
                            x [3] = X [4*k + 3] ;
                            for (p = Offp [k] ; p < pend ; p++)
                            {
                                i = Offi [p] ;
#ifdef COMPLEX
                                if (conj_solve)
                                {
                                    CONJ(offik, Offx [p]) ;
                                }
                                else
#endif
                                {
                                    offik = Offx [p] ;
                                }
                                MULT_SUB (x [0], offik, X [4*i]) ;
                                MULT_SUB (x [1], offik, X [4*i + 1]) ;
                                MULT_SUB (x [2], offik, X [4*i + 2]) ;
                                MULT_SUB (x [3], offik, X [4*i + 3]) ;
                            }
                            X [4*k    ] = x [0] ;
                            X [4*k + 1] = x [1] ;
                            X [4*k + 2] = x [2] ;
                            X [4*k + 3] = x [3] ;
                        }
                        break ;
                    }
            }

            /* -------------------------------------------------------------- */
            /* solve the block system */
            /* -------------------------------------------------------------- */

            if (nk == 1)
            {
#ifdef COMPLEX
                if (conj_solve)
                {
                    CONJ (s, Udiag [k1]) ;
                }
                else
#endif
                {
                    s = Udiag [k1] ;
                }
                switch (nr)
                {

                    case 1:
                        DIV (X [k1], X [k1], s) ;
                        break ;

                    case 2:
                        DIV (X [2*k1], X [2*k1], s) ;
                        DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
                        break ;

                    case 3:
                        DIV (X [3*k1], X [3*k1], s) ;
                        DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
                        DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
                        break ;

                    case 4:
                        DIV (X [4*k1], X [4*k1], s) ;
                        DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
                        DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
                        DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
                        break ;

                }
            }
            else
            {
                KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
                        Udiag + k1, nr,
#ifdef COMPLEX
                        conj_solve,
#endif
                        X + nr*k1) ;
                KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
#ifdef COMPLEX
                        conj_solve,
#endif
                        X + nr*k1) ;
            }
        }

        /* ------------------------------------------------------------------ */
        /* scale and permute the result, Bz  = P'(R\X) */
        /* ------------------------------------------------------------------ */

        if (Rs == NULL)
        {

            /* no scaling */
            switch (nr)
            {

                case 1:

                    for (k = 0 ; k < n ; k++)
                    {
                        Bz  [Pnum [k]] = X [k] ;
                    }
                    break ;

                case 2:

                    for (k = 0 ; k < n ; k++)
                    {
                        i = Pnum [k] ;
                        Bz  [i      ] = X [2*k    ] ;
                        Bz  [i + d  ] = X [2*k + 1] ;
                    }
                    break ;

                case 3:

                    for (k = 0 ; k < n ; k++)
                    {
                        i = Pnum [k] ;
                        Bz  [i      ] = X [3*k    ] ;
                        Bz  [i + d  ] = X [3*k + 1] ;
                        Bz  [i + d*2] = X [3*k + 2] ;
                    }
                    break ;

                case 4:

                    for (k = 0 ; k < n ; k++)
                    {
                        i = Pnum [k] ;
                        Bz  [i      ] = X [4*k    ] ;
                        Bz  [i + d  ] = X [4*k + 1] ;
                        Bz  [i + d*2] = X [4*k + 2] ;
                        Bz  [i + d*3] = X [4*k + 3] ;
                    }
                    break ;
            }

        }
        else
        {

            switch (nr)
            {

                case 1:

                    for (k = 0 ; k < n ; k++)
                    {
                        SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
                    }
                    break ;

                case 2:

                    for (k = 0 ; k < n ; k++)
                    {
                        i = Pnum [k] ;
                        rs = Rs [k] ;
                        SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
                        SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
                    }
                    break ;

                case 3:

                    for (k = 0 ; k < n ; k++)
                    {
                        i = Pnum [k] ;
                        rs = Rs [k] ;
                        SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
                        SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
                        SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
                    }
                    break ;

                case 4:

                    for (k = 0 ; k < n ; k++)
                    {
                        i = Pnum [k] ;
                        rs = Rs [k] ;
                        SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
                        SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
                        SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
                        SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
                    }
                    break ;
            }
        }

        /* ------------------------------------------------------------------ */
        /* go to the next chunk of B */
        /* ------------------------------------------------------------------ */

        Bz  += d*4 ;
    }
    return (TRUE) ;
}
Beispiel #11
0
int
main()
{
  ptridiag  T, Tcopy;
  pamatrix  A, Acopy, Q, U, Vt;
  pavector  work;
  prealavector sigma, lambda;
  real      error;
  uint      rows, cols, mid;
  uint      i, n, iter;
  int       info;

  /* ------------------------------------------------------------
   * Testing symmetric tridiagonal eigenvalue solver
   * ------------------------------------------------------------ */

  n = 6;

  /* Testing symmetric tridiagonal eigenvalue solver */

  (void) printf("==================================================\n"
		"Testing symmetric tridiagonal eigenvalue solver\n"
		"==================================================\n"
		"Setting up %u x %u tridiagonal matrix\n", n, n);
  T = new_tridiag(n);
  for (i = 0; i < n; i++)
    T->d[i] = 2.0;
  for (i = 0; i < n - 1; i++)
    T->l[i] = T->u[i] = -1.0;
  Tcopy = new_tridiag(n);
  copy_tridiag(T, Tcopy);

  A = new_amatrix(n, n);
  clear_amatrix(A);
  for (i = 0; i < n; i++)
    A->a[i + i * A->ld] = T->d[i];
  for (i = 0; i < n - 1; i++) {
    A->a[(i + 1) + i * A->ld] = T->l[i];
    A->a[i + (i + 1) * A->ld] = T->u[i];
  }
  Acopy = clone_amatrix(A);

  Q = new_identity_amatrix(n, n);
  U = new_amatrix(n, n);

  (void) printf("Performing self-made implicit QR iteration\n");
  iter = sb_muleig_tridiag(T, Q, 8 * n);

  (void) printf("  %u iterations\n", iter);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, Q);
  (void) printf("  Orthogonality Q %g, %sokay\n", error,
		(error < tolerance ? "" : "    NOT "));
  if (error >= tolerance)
    problems++;

  copy_amatrix(false, Q, U);
  diageval_tridiag_amatrix(1.0, true, T, true, U);
  addmul_amatrix(-1.0, false, U, true, Q, A);
  error = normfrob_amatrix(A);
  (void) printf("  Accuracy %g. %sokay\n", error,
		(error < tolerance ? "" : "    NOT "));
  if (error >= tolerance)
    problems++;

  (void) printf("Performing default implicit QR iteration\n");
  identity_amatrix(Q);
  i = muleig_tridiag(Tcopy, Q);
  if (i == 0)
    (void) printf("  Success\n");
  else {
    (void) printf("  Failure\n");
    problems++;
  }

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, Q);
  (void) printf("  Orthogonality Q %g, %sokay\n", error,
		(error < tolerance ? "" : "    NOT "));
  if (error >= tolerance)
    problems++;

  copy_amatrix(false, Q, U);
  diageval_tridiag_amatrix(1.0, true, Tcopy, true, U);
  addmul_amatrix(-1.0, false, U, true, Q, Acopy);
  error = normfrob_amatrix(Acopy);
  (void) printf("  Accuracy %g. %sokay\n", error,
		(error < tolerance ? "" : "    NOT "));
  if (error >= tolerance)
    problems++;

  del_amatrix(U);
  del_amatrix(Q);
  del_amatrix(Acopy);
  del_amatrix(A);
  del_tridiag(Tcopy);
  del_tridiag(T);

  (void) printf("--------------------------------------------------\n"
		"Setting up random %u x %u tridiagonal matrix\n", n, n);
  T = new_tridiag(n);
  for (i = 0; i < n; i++)
    T->d[i] = 2.0 * rand() / RAND_MAX - 1.0;
  for (i = 0; i < n - 1; i++) {
    T->l[i] = 2.0 * rand() / RAND_MAX - 1.0;
    T->u[i] = CONJ(T->l[i]);
  }
  A = new_amatrix(n, n);
  clear_amatrix(A);
  for (i = 0; i < n; i++)
    A->a[i + i * A->ld] = T->d[i];
  for (i = 0; i < n - 1; i++) {
    A->a[(i + 1) + i * A->ld] = T->l[i];
    A->a[i + (i + 1) * A->ld] = T->u[i];
  }
  Tcopy = new_tridiag(n);
  copy_tridiag(T, Tcopy);
  Acopy = clone_amatrix(A);

  Q = new_identity_amatrix(n, n);
  U = new_amatrix(n, n);

  (void) printf("Performing implicit QR iteration\n");
  iter = sb_muleig_tridiag(T, Q, 8 * n);

  (void) printf("  %u iterations\n", iter);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, Q);
  (void) printf("  Orthogonality Q %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  copy_amatrix(false, Q, U);
  diageval_tridiag_amatrix(1.0, true, T, true, U);
  addmul_amatrix(-1.0, false, U, true, Q, A);
  error = normfrob_amatrix(A);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "    NOT "));
  if (error >= tolerance)
    problems++;

  (void) printf("Using default eigenvalue solver\n");
  identity_amatrix(Q);
  muleig_tridiag(Tcopy, Q);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, Q);
  (void) printf("  Orthogonality Q %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  copy_amatrix(false, Q, U);
  diageval_tridiag_amatrix(1.0, true, Tcopy, true, U);
  addmul_amatrix(-1.0, false, U, true, Q, Acopy);
  error = normfrob_amatrix(Acopy);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "    NOT "));
  if (error >= tolerance)
    problems++;

  del_amatrix(U);
  del_amatrix(Q);
  del_amatrix(Acopy);
  del_amatrix(A);
  del_tridiag(Tcopy);
  del_tridiag(T);

  /* ------------------------------------------------------------
   * Testing self-adjoint matrix eigenvalue solver
   * ------------------------------------------------------------ */

  (void) printf("==================================================\n"
		"Testing self-adjoint matrix eigenvalue solver\n"
		"==================================================\n"
		"Setting up random %u x %u self-adjoint matrix\n", n, n);
  A = new_amatrix(n, n);
  random_selfadjoint_amatrix(A);

  Acopy = new_amatrix(n, n);
  copy_amatrix(false, A, Acopy);

  lambda = new_realavector(n);
  Q = new_identity_amatrix(n, n);

  (void) printf("Performing implicit QR iteration\n");
  iter = sb_eig_amatrix(A, lambda, Q, 8 * n);

  (void) printf("  %u iterations\n", iter);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, Q);
  (void) printf("  Orthogonality Q %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  U = new_amatrix(n, n);
  copy_amatrix(false, Q, U);
  copy_amatrix(false, Acopy, A);
  diageval_realavector_amatrix(1.0, true, lambda, true, U);
  addmul_amatrix(-1.0, false, U, true, Q, A);
  error = normfrob_amatrix(A);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  (void) printf("Using default eigenvalue solver\n");
  copy_amatrix(false, Acopy, A);
  info = eig_amatrix(A, lambda, Q);
  assert(info == 0);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, Q);
  (void) printf("  Orthogonality Q %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  copy_amatrix(false, Q, U);
  diageval_realavector_amatrix(1.0, true, lambda, true, U);
  addmul_amatrix(-1.0, false, U, true, Q, Acopy);
  error = normfrob_amatrix(Acopy);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  del_amatrix(U);
  del_amatrix(Q);
  del_realavector(lambda);
  del_amatrix(Acopy);
  del_amatrix(A);

  /* ------------------------------------------------------------
   * Testing bidiagonal SVD solver
   * ------------------------------------------------------------ */

  (void) printf("==================================================\n"
		"Testing bidiagonal SVD solver\n"
		"==================================================\n"
		"Setting up bidiagonal %u x %u matrix\n", n, n);
  T = new_tridiag(n);
  for (i = 0; i < n; i++)
    T->d[i] = i + 1.0;
  for (i = 0; i < n - 1; i++) {
    T->l[i] = 1.0;
    T->u[i] = 0.0;
  }
  A = new_amatrix(n, n);
  clear_amatrix(A);
  for (i = 0; i < n; i++)
    A->a[i + i * A->ld] = T->d[i];
  for (i = 0; i < n - 1; i++)
    A->a[(i + 1) + i * A->ld] = T->l[i];

  Tcopy = new_tridiag(n);
  copy_tridiag(T, Tcopy);
  Acopy = new_amatrix(n, n);
  copy_amatrix(false, A, Acopy);

  U = new_identity_amatrix(n, n);
  Vt = new_identity_amatrix(n, n);

  (void) printf("Performing self-made implicit SVD iteration\n");
  iter = sb_mulsvd_tridiag(T, U, Vt, 8 * n);
  (void) printf("  %u iterations\n", iter);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality Vt %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  diageval_tridiag_amatrix(1.0, true, T, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, A);
  error = normfrob_amatrix(A);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  (void) printf("Using default SVD solver\n");
  copy_tridiag(Tcopy, T);
  identity_amatrix(U);
  identity_amatrix(Vt);
  mulsvd_tridiag(T, U, Vt);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality Vt %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  copy_amatrix(false, Acopy, A);
  diageval_tridiag_amatrix(1.0, true, T, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, A);
  error = normfrob_amatrix(A);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  del_amatrix(Vt);
  del_amatrix(U);
  del_amatrix(Acopy);
  del_tridiag(Tcopy);
  del_amatrix(A);
  del_tridiag(T);

  (void) printf("--------------------------------------------------\n"
		"Setting up random bidiagonal %u x %u matrix\n", n, n);
  T = new_tridiag(n);
  for (i = 0; i < n; i++) {
    T->d[i] = 2.0 * rand() / RAND_MAX - 1.0;
  }
  for (i = 0; i < n - 1; i++) {
    T->l[i] = 2.0 * rand() / RAND_MAX - 1.0;
    T->u[i] = 0.0;
  }
  A = new_amatrix(n, n);
  clear_amatrix(A);
  for (i = 0; i < n; i++)
    A->a[i + i * A->ld] = T->d[i];
  for (i = 0; i < n - 1; i++)
    A->a[(i + 1) + i * A->ld] = T->l[i];

  Tcopy = new_tridiag(n);
  copy_tridiag(T, Tcopy);
  Acopy = clone_amatrix(A);

  U = new_identity_amatrix(n, n);
  Vt = new_identity_amatrix(n, n);

  (void) printf("Performing implicit SVD iteration\n");
  iter = sb_mulsvd_tridiag(T, U, Vt, 8 * n);
  (void) printf("  %u iterations\n", iter);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality Vt %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  diageval_tridiag_amatrix(1.0, true, T, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, A);
  error = normfrob_amatrix(A);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  (void) printf("Using default SVD solver\n");
  copy_tridiag(Tcopy, T);
  copy_amatrix(false, Acopy, A);
  identity_amatrix(U);
  identity_amatrix(Vt);
  mulsvd_tridiag(T, U, Vt);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality Vt %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  diageval_tridiag_amatrix(1.0, true, T, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, A);
  error = normfrob_amatrix(A);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  del_amatrix(Vt);
  del_amatrix(U);
  del_amatrix(Acopy);
  del_tridiag(Tcopy);
  del_amatrix(A);
  del_tridiag(T);

  /* ------------------------------------------------------------
   * Testing Golub-Kahan bidiagonalization
   * ------------------------------------------------------------ */

  rows = 10;
  cols = 7;
  mid = UINT_MIN(rows, cols);
  (void) printf("==================================================\n"
		"Testing Golub-Kahan bidiagonalization\n"
		"==================================================\n"
		"Setting up random %u x %u matrix\n", rows, cols);
  A = new_amatrix(rows, cols);
  random_amatrix(A);
  Acopy = new_amatrix(rows, cols);
  copy_amatrix(false, A, Acopy);
  U = new_amatrix(rows, mid);
  Vt = new_amatrix(mid, cols);
  T = new_tridiag(mid);

  (void) printf("Bidiagonalizing\n");
  bidiagonalize_amatrix(A, T, U, Vt);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality Vt %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  lowereval_tridiag_amatrix(1.0, true, T, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, Acopy);
  error = normfrob_amatrix(Acopy);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  del_tridiag(T);
  del_amatrix(Vt);
  del_amatrix(U);
  del_amatrix(Acopy);
  del_amatrix(A);

  rows = 8;
  cols = 15;
  mid = UINT_MIN(rows, cols);
  (void) printf("--------------------------------------------------\n"
		"Setting up %u x %u matrix\n", rows, cols);
  A = new_amatrix(rows, cols);
  random_amatrix(A);
  Acopy = new_amatrix(rows, cols);
  copy_amatrix(false, A, Acopy);
  U = new_amatrix(rows, mid);
  Vt = new_amatrix(mid, cols);
  T = new_tridiag(mid);

  (void) printf("Bidiagonalizing\n");
  bidiagonalize_amatrix(A, T, U, Vt);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality Vt %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  lowereval_tridiag_amatrix(1.0, true, T, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, Acopy);
  error = normfrob_amatrix(Acopy);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  del_tridiag(T);
  del_amatrix(Vt);
  del_amatrix(U);
  del_amatrix(Acopy);
  del_amatrix(A);

  /* ------------------------------------------------------------
   * Testing general SVD solver
   * ------------------------------------------------------------ */

  (void) printf("==================================================\n"
		"Testing general SVD solver\n"
		"==================================================\n"
		"Setting up 3 x 4 matrix\n");
  A = new_amatrix(3, 4);
  setentry_amatrix(A, 0, 0, 1.0);
  setentry_amatrix(A, 1, 0, 2.0);
  setentry_amatrix(A, 2, 0, 3.0);
  setentry_amatrix(A, 0, 1, 2.0);
  setentry_amatrix(A, 1, 1, 4.0);
  setentry_amatrix(A, 2, 1, 6.0);
  setentry_amatrix(A, 0, 2, 2.0);
  setentry_amatrix(A, 1, 2, 5.0);
  setentry_amatrix(A, 2, 2, 8.0);
  setentry_amatrix(A, 0, 3, 1.0);
  setentry_amatrix(A, 1, 3, 4.0);
  setentry_amatrix(A, 2, 3, 7.0);

  Acopy = new_amatrix(A->rows, A->cols);
  copy_amatrix(false, A, Acopy);

  U = new_identity_amatrix(3, 3);
  Vt = new_identity_amatrix(3, 4);
  sigma = new_realavector(3);
  work = new_avector(3 * 3);

  (void) printf("Running self-made SVD solver\n");
  iter = sb_svd_amatrix(A, sigma, U, Vt, 24);

  (void) printf("  %u iterations\n", iter);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality Vt %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  diageval_realavector_amatrix(1.0, true, sigma, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, Acopy);
  error = normfrob_amatrix(Acopy);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  del_avector(work);
  del_realavector(sigma);
  del_amatrix(Vt);
  del_amatrix(U);
  del_amatrix(Acopy);
  del_amatrix(A);

  (void) printf("--------------------------------------------------\n"
		"Setting up 4 x 3 matrix\n");
  A = new_amatrix(4, 3);
  setentry_amatrix(A, 0, 0, 1.0);
  setentry_amatrix(A, 0, 1, 2.0);
  setentry_amatrix(A, 0, 2, 3.0);
  setentry_amatrix(A, 1, 0, 2.0);
  setentry_amatrix(A, 1, 1, 4.0);
  setentry_amatrix(A, 1, 2, 6.0);
  setentry_amatrix(A, 2, 0, 2.0);
  setentry_amatrix(A, 2, 1, 5.0);
  setentry_amatrix(A, 2, 2, 8.0);
  setentry_amatrix(A, 3, 0, 1.0);
  setentry_amatrix(A, 3, 1, 4.0);
  setentry_amatrix(A, 3, 2, 7.0);

  Acopy = new_amatrix(A->rows, A->cols);
  copy_amatrix(false, A, Acopy);

  U = new_amatrix(4, 3);
  Vt = new_amatrix(3, 3);
  sigma = new_realavector(3);
  work = new_avector(3 * 3);

  (void) printf("Running self-made SVD solver\n");
  iter = sb_svd_amatrix(A, sigma, U, Vt, 24);
  (void) printf("  %u iterations\n", iter);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality V %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  diageval_realavector_amatrix(1.0, true, sigma, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, Acopy);
  error = normfrob_amatrix(Acopy);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  del_avector(work);
  del_realavector(sigma);
  del_amatrix(Vt);
  del_amatrix(U);
  del_amatrix(Acopy);
  del_amatrix(A);

  (void) printf("--------------------------------------------------\n"
		"Setting up 4 x 3 matrix\n");
  A = new_amatrix(4, 3);
  setentry_amatrix(A, 0, 0, 1.0);
  setentry_amatrix(A, 0, 1, 2.0);
  setentry_amatrix(A, 0, 2, 3.0);
  setentry_amatrix(A, 1, 0, 2.0);
  setentry_amatrix(A, 1, 1, 4.0);
  setentry_amatrix(A, 1, 2, 6.0);
  setentry_amatrix(A, 2, 0, 2.0);
  setentry_amatrix(A, 2, 1, 5.0);
  setentry_amatrix(A, 2, 2, 8.0);
  setentry_amatrix(A, 3, 0, 1.0);
  setentry_amatrix(A, 3, 1, 4.0);
  setentry_amatrix(A, 3, 2, 7.0);

  Acopy = clone_amatrix(A);

  U = new_amatrix(4, 3);
  Vt = new_amatrix(3, 3);
  sigma = new_realavector(3);
  work = new_avector(3 * 3);

  (void) printf("Running self-made SVD solver\n");
  iter = sb_svd_amatrix(A, sigma, U, Vt, 24);
  (void) printf("  %u iterations\n", iter);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality Vt %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  diageval_realavector_amatrix(1.0, true, sigma, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, Acopy);
  error = normfrob_amatrix(Acopy);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  del_avector(work);
  del_realavector(sigma);
  del_amatrix(Vt);
  del_amatrix(U);
  del_amatrix(Acopy);
  del_amatrix(A);

  rows = 9;
  cols = 7;
  mid = UINT_MIN(rows, cols);
  (void) printf("--------------------------------------------------\n"
		"Setting up random %u x %u matrix\n", rows, cols);
  A = new_amatrix(rows, cols);
  random_amatrix(A);

  Acopy = new_amatrix(A->rows, A->cols);
  copy_amatrix(false, A, Acopy);

  U = new_amatrix(rows, mid);
  Vt = new_amatrix(mid, cols);
  sigma = new_realavector(mid);
  work = new_avector(3 * mid);

  (void) printf("Running self-made SVD solver\n");
  iter = sb_svd_amatrix(A, sigma, U, Vt, 10 * mid);
  (void) printf("  %u iterations\n", iter);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality Vt %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  diageval_realavector_amatrix(1.0, true, sigma, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, Acopy);
  error = normfrob_amatrix(Acopy);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  del_avector(work);
  del_realavector(sigma);
  del_amatrix(Vt);
  del_amatrix(U);
  del_amatrix(Acopy);
  del_amatrix(A);

  rows = 10;
  cols = 6;
  mid = UINT_MIN(rows, cols);
  (void) printf("--------------------------------------------------\n"
		"Setting up random %u x %u matrix\n", rows, cols);
  A = new_amatrix(rows, cols);
  random_amatrix(A);

  Acopy = new_amatrix(A->rows, A->cols);
  copy_amatrix(false, A, Acopy);

  U = new_amatrix(rows, mid);
  Vt = new_amatrix(mid, cols);
  sigma = new_realavector(mid);
  work = new_avector(3 * mid);

  (void) printf("Running self-made SVD solver\n");
  iter = sb_svd_amatrix(A, sigma, U, Vt, 10 * mid);
  (void) printf("  %u iterations\n", iter);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality Vt %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  copy_amatrix(false, Acopy, A);
  diageval_realavector_amatrix(1.0, true, sigma, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, A);
  error = normfrob_amatrix(A);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  (void) printf("Running default SVD solver\n");
  copy_amatrix(false, Acopy, A);
  svd_amatrix(A, sigma, U, Vt);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality Vt %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  copy_amatrix(false, Acopy, A);
  diageval_realavector_amatrix(1.0, true, sigma, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, A);
  error = normfrob_amatrix(A);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  del_avector(work);
  del_realavector(sigma);
  del_amatrix(Vt);
  del_amatrix(U);
  del_amatrix(Acopy);
  del_amatrix(A);

  rows = 7;
  cols = 13;
  mid = UINT_MIN(rows, cols);
  (void) printf("--------------------------------------------------\n"
		"Setting up random %u x %u matrix\n", rows, cols);
  A = new_amatrix(rows, cols);
  random_amatrix(A);

  Acopy = new_amatrix(A->rows, A->cols);
  copy_amatrix(false, A, Acopy);

  U = new_amatrix(rows, mid);
  Vt = new_amatrix(mid, cols);
  sigma = new_realavector(mid);
  work = new_avector(3 * mid);

  (void) printf("Running self-made SVD solver\n");
  iter = sb_svd_amatrix(A, sigma, U, Vt, 10 * mid);
  (void) printf("  %u iterations\n", iter);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality Vt %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  copy_amatrix(false, Acopy, A);
  diageval_realavector_amatrix(1.0, true, sigma, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, A);
  error = normfrob_amatrix(A);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  (void) printf("Running default SVD solver\n");
  copy_amatrix(false, Acopy, A);
  svd_amatrix(A, sigma, U, Vt);

  (void) printf("Checking accuracy\n");
  error = check_ortho_amatrix(false, U);
  (void) printf("  Orthogonality U %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  error = check_ortho_amatrix(true, Vt);
  (void) printf("  Orthogonality Vt %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  copy_amatrix(false, Acopy, A);
  diageval_realavector_amatrix(1.0, true, sigma, true, U);
  addmul_amatrix(-1.0, false, U, false, Vt, A);
  error = normfrob_amatrix(A);
  (void) printf("  Accuracy %g, %sokay\n", error,
		(error < tolerance ? "" : "NOT "));
  if (error >= tolerance)
    problems++;

  del_avector(work);
  del_realavector(sigma);
  del_amatrix(Vt);
  del_amatrix(U);
  del_amatrix(Acopy);
  del_amatrix(A);

  printf("----------------------------------------\n"
	 "  %u matrices and\n"
	 "  %u vectors still active\n"
	 "  %u errors found\n", getactives_amatrix(), getactives_avector(),
	 problems);

  return problems;
}
Beispiel #12
0
/* Executa o programa */
int Mepa::exec(){

	int op;

    if(detalha)
	{
         p->imprime(); /* Imprime o programa */
    }

    op = p->next();
    while( op != PARA )
	{
        if(detalha) 
		{	
			debug();
		}
        switch(op)
		{
            /* Funcoes MEPA */
            case 0: { AMEM(); break; } 
            case 1: { ARMI(); break; } 
            case 2: { ARMP(); break; }
            case 3: { ARMZ(); break; } 
            case 4: { CHPP(); break; } 
            case 5: { CHPR(); break; }
            case 6: { CMAF(); break; } 
            case 7: { CMAG(); break; } 
            case 8: { CMDF(); break; }
            case 9: { CMDG(); break; } 
            case 10: { CMEF(); break; } 
            case 11: { CMEG(); break; }
            case 12: { CMIF(); break; } 
            case 13: { CMIG(); break; } 
            case 14: { CMMA(); break; } 
            case 15: { CMMF(); break; } 
            case 16: { CMME(); break; } 
            case 17: { CMNF(); break; } 
            case 18: { CONJ(); break; } 
            case 19: { CRCT(); break; } 
            case 20: { CRCF(); break; } 
            case 21: { CREG(); break; }
            case 22: { CREN(); break; }
            case 23: { CRVI(); break; } 
            case 24: { CRVL(); break; }
            case 25: { CRVP(); break; }
            case 26: { DISJ(); break; }
            case 27: { DIVF(); break; } 
            case 28: { DIVI(); break; }
            case 29: { DMEM(); break; } 
            case 30: { DSVF(); break; } 
            case 31: { DSVS(); break; } 
            case 32: { ENTR(); break; } 
            case 33: { IMPC(); break; }
            case 34: { IMPF(); break; }
            case 35: { IMPR(); break; }
            case 36: { INPP(); break; }
            case 37: { INVF(); break; } 
            case 38: { INVR(); break; }
            case 39: { LEIT(); break; }
            case 40: { LEIF(); break; }
            case 41: { MULF(); break; }
            case 42: { MULT(); break; }
            case 43: { NEGA(); break; }
            case 44: { RTPR(); break; }
            case 45: { SOMA(); break; }
            case 46: { SOMF(); break; }
            case 47: { SUBT(); break; }
            case 48: { SUBF(); break; }
            default:{
               cerr << "O programa executou uma operacao invalida." << endl;
               cerr << "i = " << p->getI() << endl; 
               abort();
            }
        }
        op = p->next();
    }
    return 0;
}