mlib_status mlib_VectorConjSymExt_S32C_S32C_Sat_N( mlib_s32 *zz, const mlib_s32 *xx, mlib_s32 n, mlib_s32 n1) { CHECK; const mlib_s32 *px = xx; mlib_s32 *pz = zz; mlib_s32 *pd = zz + n1 + n1; mlib_s32 i, ax, az, n2, n3, nstep, c, c0, c1; __m128i xbuf, zbuf, mask1, mask2, mask3; mask1 = _mm_setr_epi32(0, 0xffffffff, 0, 0xffffffff); mask2 = _mm_setr_epi32(0, 0x80000000, 0, 0x80000000); ax = (mlib_addr)px & 15; az = (mlib_addr)pz & 15; nstep = 16 / sizeof (mlib_s32); if (ax & 7) { for (i = 0; i < n / 2; i++) { CONJ_S32C(loadu, storeu); } for (i = 0; i < n % 2; i++) { CONJ(S32); } } else { n1 = ((16 - ax) & 15) / sizeof (mlib_s32); n2 = (n + n - n1) / nstep; n3 = n + n - n1 - n2 * nstep; if (n2 < 1) { for (i = 0; i < n; i++) { CONJ(S32); } } else { for (i = 0; i < n1 / 2; i++) { CONJ(S32); } if (ax == az) { for (i = 0; i < n2; i++) { CONJ_S32C(loadu, storeu); } } else { for (i = 0; i < n2; i++) { CONJ_S32C(loadu, storeu); } } for (i = 0; i < n3 / 2; i++) { CONJ(S32); } } } return (MLIB_SUCCESS); }
mlib_status __mlib_VectorConj_S16C_S16C_Sat( mlib_s16 *zz, const mlib_s16 *xx, mlib_s32 n) { CHECK; const mlib_s16 *px = xx; mlib_s16 *pz = zz; mlib_s32 i, ax, az, n1, n2, n3, nstep, c; __m128i xbuf, zbuf, mask1, mask2, mask3; mask1 = _mm_set1_epi32(0xffff0000); mask2 = _mm_set1_epi32(0x80000000); ax = (mlib_addr)xx & 15; az = (mlib_addr)zz & 15; nstep = 16 / sizeof (mlib_s16); if (ax & 3) { for (i = 0; i < n / 4; i++) { CONJ_S16C(loadu, storeu); } for (i = 0; i < n % 4; i++) { CONJ(S16); } } else { n1 = ((16 - ax) & 15) / sizeof (mlib_s16); n2 = (n + n - n1) / nstep; n3 = n + n - n1 - n2 * nstep; if (n2 < 1) { for (i = 0; i < n; i++) { CONJ(S16); } } else { for (i = 0; i < n1 / 2; i++) { CONJ(S16); } if (ax == az) { for (i = 0; i < n2; i++) { CONJ_S16C(loadu, storeu); } } else { for (i = 0; i < n2; i++) { CONJ_S16C(loadu, storeu); } } for (i = 0; i < n3 / 2; i++) { CONJ(S16); } } } return (MLIB_SUCCESS); }
/** Computes the weighted inner/dot product \f$x^H (w2\odot w2 \odot x)\f$. */ R Y(dot_w2_complex)(C *x, R *w2, INT n) { INT k; R dot; for (k = 0, dot = K(0.0); k < n; k++) dot+=w2[k]*w2[k]*CONJ(x[k])*x[k]; return dot; }
/** Computes the inner/dot product \f$x^H x\f$. */ R Y(dot_complex)(C *x, INT n) { INT k; R dot; for (k = 0, dot = K(0.0); k < n; k++) dot += CONJ(x[k])*x[k]; return dot; }
STATIC int solve_H_Ref_Sprimme(SCALAR *H, int ldH, SCALAR *hVecs, int ldhVecs, SCALAR *hU, int ldhU, REAL *hSVals, SCALAR *R, int ldR, SCALAR *QtQ, int ldQtQ, SCALAR *VtBV, int ldVtBV, EVAL *hVals, int basisSize, int targetShiftIndex, primme_context ctx) { primme_params *primme = ctx.primme; int i, j; /* Loop variables */ (void)targetShiftIndex; /* unused parameter */ /* Some LAPACK implementations don't like zero-size matrices */ if (basisSize == 0) return 0; /* Copy R into hVecs */ Num_copy_matrix_Sprimme(R, basisSize, basisSize, ldR, hVecs, ldhVecs, ctx); if (QtQ) { /* Factorize QtQ */ Num_copy_matrix_Sprimme( QtQ, basisSize, basisSize, ldQtQ, hU, ldhU, ctx); CHKERR(Num_potrf_Sprimme("U", basisSize, hU, ldhU, NULL, ctx)); CHKERR(Num_trmm_Sprimme("L", "U", "N", "N", basisSize, basisSize, 1.0, hU, ldhU, hVecs, ldhVecs, ctx)); } SCALAR *U_VtBV=NULL; /* Cholesky factor of VtBV */ if (VtBV) { CHKERR(Num_malloc_Sprimme(basisSize*basisSize, &U_VtBV, ctx)); Num_copy_matrix_Sprimme( VtBV, basisSize, basisSize, ldVtBV, U_VtBV, basisSize, ctx); CHKERR(Num_potrf_Sprimme("U", basisSize, U_VtBV, basisSize, NULL, ctx)); CHKERR(Num_trsm_Sprimme("R", "U", "N", "N", basisSize, basisSize, 1.0, U_VtBV, basisSize, hVecs, basisSize, ctx)); } /* Note gesvd returns transpose(V) rather than V and sorted in descending */ /* order of the singular values */ CHKERR(Num_gesvd_Sprimme("S", "O", basisSize, basisSize, hVecs, ldhVecs, hSVals, hU, ldhU, hVecs, ldhVecs, ctx)); /* Transpose back V */ SCALAR *rwork; CHKERR(Num_malloc_Sprimme((size_t)basisSize * basisSize, &rwork, ctx)); for (j = 0; j < basisSize; j++) { for (i = 0; i < basisSize; i++) { rwork[basisSize * j + i] = CONJ(hVecs[ldhVecs * i + j]); } } Num_copy_matrix_Sprimme( rwork, basisSize, basisSize, basisSize, hVecs, ldhVecs, ctx); if (VtBV) { CHKERR(Num_trsm_Sprimme("L", "U", "N", "N", basisSize, basisSize, 1.0, U_VtBV, basisSize, hVecs, ldhVecs, ctx)); CHKERR(Num_free_Sprimme(U_VtBV, ctx)); } /* Rearrange V, hSVals and hU in ascending order of singular value */ /* if target is not largest abs. */ if (primme->target == primme_closest_abs || primme->target == primme_closest_leq || primme->target == primme_closest_geq) { int *perm; CHKERR(Num_malloc_iprimme(basisSize, &perm, ctx)); for (i = 0; i < basisSize; i++) perm[i] = basisSize - 1 - i; permute_vecs_Rprimme(hSVals, 1, basisSize, 1, perm, ctx); permute_vecs_Sprimme(hVecs, basisSize, basisSize, ldhVecs, perm, ctx); permute_vecs_Sprimme(hU, basisSize, basisSize, ldhU, perm, ctx); CHKERR(Num_free_iprimme(perm, ctx)); } /* compute Rayleigh quotient lambda_i = x_i'*H*x_i */ Num_hemm_Sprimme("L", "U", basisSize, basisSize, 1.0, H, ldH, hVecs, ldhVecs, 0.0, rwork, basisSize); for (i=0; i<basisSize; i++) { hVals[i] = KIND(REAL_PART, )(Num_dot_Sprimme( basisSize, &hVecs[ldhVecs * i], 1, &rwork[basisSize * i], 1, ctx)); } CHKERR(Num_free_Sprimme(rwork, ctx)); return 0; }
static void BlockHessenberg( MatrixView<T> A, VectorView<T> Ubeta) { // Much like the block version of Bidiagonalize, we try to maintain // the operation of several successive Householder matrices in // a block form, where the net Block Householder is I - YZYt. // // But as with the bidiagonlization algorithm (and unlike a simple // block QR decomposition), we update the matrix from both the left // and the right, so we also need to keep track of the product // ZYtm in addition. // // The block update at the end of the block loop is // m' = (I-YZYt) m (I-YZtYt) // // The Y matrix is stored in the first K columns of m, // and the Hessenberg portion of these columns is updated as we go. // For the right-hand-side update, m -= mYZtYt, the m on the right // needs to be the full original matrix m, including the original // versions of these K columns. Therefore, we can't wait until // the end for this calculation. // // Instead, we keep track of mYZt as we progress, so the final update // is: // // m' = (I-YZYt) (m - mYZt Y) // // We also need to do this same calculation for each column as we // progress through the block. // const ptrdiff_t N = A.rowsize(); #ifdef XDEBUG Matrix<T> A0(A); #endif TMVAssert(A.rowsize() == A.colsize()); TMVAssert(N > 0); TMVAssert(Ubeta.size() == N-1); TMVAssert(!Ubeta.isconj()); TMVAssert(Ubeta.step()==1); ptrdiff_t ncolmax = MIN(HESS_BLOCKSIZE,N-1); Matrix<T,RowMajor> mYZt_full(N,ncolmax); UpperTriMatrix<T,NonUnitDiag|ColMajor> Z_full(ncolmax); T det(0); // Ignore Householder Determinant calculations T* Uj = Ubeta.ptr(); for(ptrdiff_t j1=0;j1<N-1;) { ptrdiff_t j2 = MIN(N-1,j1+HESS_BLOCKSIZE); ptrdiff_t ncols = j2-j1; MatrixView<T> mYZt = mYZt_full.subMatrix(0,N-j1,0,ncols); UpperTriMatrixView<T> Z = Z_full.subTriMatrix(0,ncols); for(ptrdiff_t j=j1,jj=0;j<j2;++j,++jj,++Uj) { // jj = j-j1 // Update current column of A // // m' = (I - YZYt) (m - mYZt Yt) // A(0:N,j)' = A(0:N,j) - mYZt(0:N,0:j) Y(j,0:j)t A.col(j,j1+1,N) -= mYZt.Cols(0,j) * A.row(j,0,j).Conjugate(); // // A(0:N,j)'' = A(0:N,j) - Y Z Yt A(0:N,j)' // // Let Y = (L) where L is unit-diagonal, lower-triangular, // (M) and M is rectangular // LowerTriMatrixView<T> L = LowerTriMatrixViewOf(A.subMatrix(j1+1,j+1,j1,j),UnitDiag); MatrixView<T> M = A.subMatrix(j+1,N,j1,j); // Use the last column of Z as temporary storage for Yt A(0:N,j)' VectorView<T> YtAj = Z.col(jj,0,jj); YtAj = L.adjoint() * A.col(j,j1+1,j+1); YtAj += M.adjoint() * A.col(j,j+1,N); YtAj = Z.subTriMatrix(0,jj) * YtAj; A.col(j,j1+1,j+1) -= L * YtAj; A.col(j,j+1,N) -= M * YtAj; // Do the Householder reflection VectorView<T> u = A.col(j,j+1,N); T bu = Householder_Reflect(u,det); #ifdef TMVFLDEBUG TMVAssert(Uj >= Ubeta._first); TMVAssert(Uj < Ubeta._last); #endif *Uj = bu; // Save the top of the u vector, which isn't actually part of u T& Atemp = *u.cptr(); TMVAssert(IMAG(Atemp) == RealType(T)(0)); RealType(T) Aorig = REAL(Atemp); Atemp = RealType(T)(1); // Update Z VectorView<T> Zj = Z.col(jj,0,jj); Zj = -bu * M.adjoint() * u; Zj = Z * Zj; Z(jj,jj) = -bu; // Update mYtZt: // // mYZt(0:N,j) = m(0:N,0:N) Y(0:N,0:j) Zt(0:j,j) // = m(0:N,j+1:N) Y(j+1:N,j) Zt(j,j) // = bu* m(0:N,j+1:N) u // mYZt.col(jj) = CONJ(bu) * A.subMatrix(j1,N,j+1,N) * u; // Restore Aorig, which is actually part of the Hessenberg matrix. Atemp = Aorig; } // Update the rest of the matrix: // A(j2,j2-1) needs to be temporarily changed to 1 for use in Y T& Atemp = *(A.ptr() + j2*A.stepi() + (j2-1)*A.stepj()); TMVAssert(IMAG(Atemp) == RealType(T)(0)); RealType(T) Aorig = Atemp; Atemp = RealType(T)(1); // m' = (I-YZYt) (m - mYZt Y) MatrixView<T> m = A.subMatrix(j1,N,j2,N); ConstMatrixView<T> Y = A.subMatrix(j2+1,N,j1,j2); m -= mYZt * Y.adjoint(); BlockHouseholder_LMult(Y,Z,m); // Restore A(j2,j2-1) Atemp = Aorig; j1 = j2; } #ifdef XDEBUG Matrix<T> U(N,N,T(0)); U.subMatrix(1,N,1,N) = A.subMatrix(1,N,0,N-1); U.upperTri().setZero(); U(0,0) = T(1); Vector<T> Ubeta2(N); Ubeta2.subVector(1,N) = Ubeta; Ubeta2(0) = T(0); GetQFromQR(U.view(),Ubeta2); Matrix<T> H = A; if (N>2) LowerTriMatrixViewOf(H).offDiag(2).setZero(); Matrix<T> AA = U*H*U.adjoint(); if (Norm(A0-AA) > 0.001*Norm(A0)) { cerr<<"NonBlock Hessenberg: A = "<<Type(A)<<" "<<A0<<endl; cerr<<"A = "<<A<<endl; cerr<<"Ubeta = "<<Ubeta<<endl; cerr<<"U = "<<U<<endl; cerr<<"H = "<<H<<endl; cerr<<"UHUt = "<<AA<<endl; Matrix<T,ColMajor> A2 = A0; Vector<T> Ub2(Ubeta.size()); NonBlockHessenberg(A2.view(),Ub2.view()); cerr<<"cf NonBlock: A -> "<<A2<<endl; cerr<<"Ubeta = "<<Ub2<<endl; abort(); } #endif }
int main (void) { TESTIT_COMPLEX_ALLNEG (csin, 0, 0, -0.F, CONJ(0), CONJ(-0.F)); TESTIT_COMPLEX_R_ALLNEG (csin, 3.45678F + 2.34567FI, -1.633059F - 4.917448FI, 1.633059F - 4.917448FI, -1.633059F + 4.917448FI, 1.633059F + 4.917448FI); TESTIT_COMPLEX_ALLNEG (ccos, 0, CONJ(1), 1, 1, CONJ(1)); TESTIT_COMPLEX_R_ALLNEG (ccos, 3.45678F + 2.34567FI, -5.008512F + 1.603367FI, -5.008512F - 1.603367FI, -5.008512F - 1.603367FI, -5.008512F + 1.603367FI); TESTIT_COMPLEX_ALLNEG (ctan, 0, 0, -0.F, CONJ(0), CONJ(-0.F)); TESTIT_COMPLEX_R_ALLNEG (ctan, 3.45678F + 2.34567FI, 0.010657F + 0.985230FI, -0.010657F + 0.985230FI, 0.010657F - 0.985230FI, -0.010657F - 0.985230FI); TESTIT_COMPLEX_ALLNEG (csinh, 0, 0, -0.F, CONJ(0), CONJ(-0.F)); TESTIT_COMPLEX_R_ALLNEG (csinh, 3.45678F + 2.34567FI, -11.083178F + 11.341487FI, 11.083178F +11.341487FI, -11.083178F - 11.341487FI, 11.083178F -11.341487FI); TESTIT_COMPLEX_ALLNEG (ccosh, 0, 1, CONJ(1), CONJ(1), 1); TESTIT_COMPLEX_R_ALLNEG (ccosh, 3.45678F + 2.34567FI, -11.105238F + 11.318958FI,-11.105238F -11.318958FI, -11.105238F - 11.318958FI,-11.105238F +11.318958FI); TESTIT_COMPLEX_ALLNEG (ctanh, 0, 0, -0.F, CONJ(0), CONJ(-0.F)); TESTIT_COMPLEX_R_ALLNEG (ctanh, 3.45678F + 2.34567FI, 1.000040F - 0.001988FI, -1.000040F - 0.001988FI, 1.000040F + 0.001988FI, -1.000040F + 0.001988FI); TESTIT_COMPLEX (clog, 1, 0); TESTIT_COMPLEX_R (clog, -1, 3.141593FI); TESTIT_COMPLEX (clog, CONJ(1), CONJ(0)); /* Fails with mpc-0.6. */ TESTIT_COMPLEX_R (clog, CONJ(-1), CONJ(3.141593FI)); /* Fails with mpc-0.6. */ TESTIT_COMPLEX_R_ALLNEG (clog, 3.45678F + 2.34567FI, 1.429713F + 0.596199FI, 1.429713F + 2.545394FI, 1.429713F - 0.596199FI, 1.429713F - 2.545394FI); TESTIT_COMPLEX_ALLNEG (csqrt, 0, 0, 0, CONJ(0), CONJ(0)); TESTIT_COMPLEX_R_ALLNEG (csqrt, 3.45678F + 2.34567FI, 1.953750F + 0.600299FI, 0.600299F + 1.953750FI, 1.953750F - 0.600299FI, 0.600299F - 1.953750FI); TESTIT_COMPLEX2_ALLNEG (cpow, 1, 0, 1, 1, CONJ(1), CONJ(1), 1, CONJ(1), 1, 1, CONJ(1), CONJ(1), 1, 1, 1, 1, CONJ(1), 1); TESTIT_COMPLEX2_ALLNEG (cpow, 1.FI, 0, 1, 1, CONJ(1), 1, 1, CONJ(1), 1, 1, 1, CONJ(1), 1, 1, 1, 1, CONJ(1), 1); TESTIT_COMPLEX_R2_ALLNEG (cpow, 2, 3, 8, 8, CONJ(1/8.F), 1/8.F, CONJ(-8), -8, -1/8.F, -1/8.F, 8, CONJ(8), 1/8.F, 1/8.F, -8, -8, -1/8.F, CONJ(-1/8.F)); TESTIT_COMPLEX_R2_ALLNEG (cpow, 3, 4, 81, 81, CONJ(1/81.F), 1/81.F, 81, 81, CONJ(1/81.F), 1/81.F, 81, CONJ(81), 1/81.F, 1/81.F, 81, CONJ(81), 1/81.F, 1/81.F); TESTIT_COMPLEX_R2_ALLNEG (cpow, 3, 5, 243, 243, CONJ(1/243.F), 1/243.F, CONJ(-243), -243, -1/243.F, -1/243.F, 243, CONJ(243), 1/243.F, 1/243.F, -243, -243, -1/243.F, CONJ(-1/243.F)); TESTIT_COMPLEX_R2_ALLNEG (cpow, 4, 2, 16, 16, CONJ(1/16.F), 1/16.F, 16, 16, CONJ(1/16.F), 1/16.F, 16, CONJ(16), 1/16.F, 1/16.F, 16, CONJ(16), 1/16.F, 1/16.F); TESTIT_COMPLEX_R2_ALLNEG (cpow, 1.5, 3, 3.375F, 3.375F, CONJ(1/3.375F), 1/3.375F, CONJ(-3.375F), -3.375F, -1/3.375F, -1/3.375F, 3.375F, CONJ(3.375F), 1/3.375F, 1/3.375F, -3.375F, -3.375F, -1/3.375F, CONJ(-1/3.375F)); TESTIT_COMPLEX2 (cpow, 16, 0.25F, 2); TESTIT_COMPLEX_R2 (cpow, 3.45678F + 2.34567FI, 1.23456 + 4.56789FI, 0.212485F + 0.319304FI); TESTIT_COMPLEX_R2 (cpow, 3.45678F - 2.34567FI, 1.23456 + 4.56789FI, 78.576402F + -41.756208FI); TESTIT_COMPLEX_R2 (cpow, -1.23456F + 2.34567FI, 2.34567 - 1.23456FI, -110.629847F + -57.021655FI); TESTIT_COMPLEX_R2 (cpow, -1.23456F - 2.34567FI, 2.34567 - 1.23456FI, 0.752336F + 0.199095FI); return 0; }
void KLU_utsolve ( /* inputs, not modified: */ Int n, Int Uip [ ], Int Ulen [ ], Unit LU [ ], Entry Udiag [ ], Int nrhs, #ifdef COMPLEX Int conj_solve, #endif /* right-hand-side on input, solution to Ux=b on output */ Entry X [ ] ) { Entry x [4], uik, ukk ; Int k, p, len, i ; Int *Ui ; Entry *Ux ; switch (nrhs) { case 1: for (k = 0 ; k < n ; k++) { GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; x [0] = X [k] ; for (p = 0 ; p < len ; p++) { #ifdef COMPLEX if (conj_solve) { /* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */ MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ; } else #endif { /* x [0] -= Ux [p] * X [Ui [p]] ; */ MULT_SUB (x [0], Ux [p], X [Ui [p]]) ; } } #ifdef COMPLEX if (conj_solve) { CONJ (ukk, Udiag [k]) ; } else #endif { ukk = Udiag [k] ; } DIV (X [k], x [0], ukk) ; } break ; case 2: for (k = 0 ; k < n ; k++) { GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; x [0] = X [2*k ] ; x [1] = X [2*k + 1] ; for (p = 0 ; p < len ; p++) { i = Ui [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (uik, Ux [p]) ; } else #endif { uik = Ux [p] ; } MULT_SUB (x [0], uik, X [2*i]) ; MULT_SUB (x [1], uik, X [2*i + 1]) ; } #ifdef COMPLEX if (conj_solve) { CONJ (ukk, Udiag [k]) ; } else #endif { ukk = Udiag [k] ; } DIV (X [2*k], x [0], ukk) ; DIV (X [2*k + 1], x [1], ukk) ; } break ; case 3: for (k = 0 ; k < n ; k++) { GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; x [0] = X [3*k ] ; x [1] = X [3*k + 1] ; x [2] = X [3*k + 2] ; for (p = 0 ; p < len ; p++) { i = Ui [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (uik, Ux [p]) ; } else #endif { uik = Ux [p] ; } MULT_SUB (x [0], uik, X [3*i]) ; MULT_SUB (x [1], uik, X [3*i + 1]) ; MULT_SUB (x [2], uik, X [3*i + 2]) ; } #ifdef COMPLEX if (conj_solve) { CONJ (ukk, Udiag [k]) ; } else #endif { ukk = Udiag [k] ; } DIV (X [3*k], x [0], ukk) ; DIV (X [3*k + 1], x [1], ukk) ; DIV (X [3*k + 2], x [2], ukk) ; } break ; case 4: for (k = 0 ; k < n ; k++) { GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; x [0] = X [4*k ] ; x [1] = X [4*k + 1] ; x [2] = X [4*k + 2] ; x [3] = X [4*k + 3] ; for (p = 0 ; p < len ; p++) { i = Ui [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (uik, Ux [p]) ; } else #endif { uik = Ux [p] ; } MULT_SUB (x [0], uik, X [4*i]) ; MULT_SUB (x [1], uik, X [4*i + 1]) ; MULT_SUB (x [2], uik, X [4*i + 2]) ; MULT_SUB (x [3], uik, X [4*i + 3]) ; } #ifdef COMPLEX if (conj_solve) { CONJ (ukk, Udiag [k]) ; } else #endif { ukk = Udiag [k] ; } DIV (X [4*k], x [0], ukk) ; DIV (X [4*k + 1], x [1], ukk) ; DIV (X [4*k + 2], x [2], ukk) ; DIV (X [4*k + 3], x [3], ukk) ; } break ; } }
void KLU_ltsolve ( /* inputs, not modified: */ Int n, Int Lip [ ], Int Llen [ ], Unit LU [ ], Int nrhs, #ifdef COMPLEX Int conj_solve, #endif /* right-hand-side on input, solution to L'x=b on output */ Entry X [ ] ) { Entry x [4], lik ; Int *Li ; Entry *Lx ; Int k, p, len, i ; switch (nrhs) { case 1: for (k = n-1 ; k >= 0 ; k--) { GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; x [0] = X [k] ; for (p = 0 ; p < len ; p++) { #ifdef COMPLEX if (conj_solve) { /* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */ MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ; } else #endif { /*x [0] -= Lx [p] * X [Li [p]] ;*/ MULT_SUB (x [0], Lx [p], X [Li [p]]) ; } } X [k] = x [0] ; } break ; case 2: for (k = n-1 ; k >= 0 ; k--) { x [0] = X [2*k ] ; x [1] = X [2*k + 1] ; GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; for (p = 0 ; p < len ; p++) { i = Li [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (lik, Lx [p]) ; } else #endif { lik = Lx [p] ; } MULT_SUB (x [0], lik, X [2*i]) ; MULT_SUB (x [1], lik, X [2*i + 1]) ; } X [2*k ] = x [0] ; X [2*k + 1] = x [1] ; } break ; case 3: for (k = n-1 ; k >= 0 ; k--) { x [0] = X [3*k ] ; x [1] = X [3*k + 1] ; x [2] = X [3*k + 2] ; GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; for (p = 0 ; p < len ; p++) { i = Li [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (lik, Lx [p]) ; } else #endif { lik = Lx [p] ; } MULT_SUB (x [0], lik, X [3*i]) ; MULT_SUB (x [1], lik, X [3*i + 1]) ; MULT_SUB (x [2], lik, X [3*i + 2]) ; } X [3*k ] = x [0] ; X [3*k + 1] = x [1] ; X [3*k + 2] = x [2] ; } break ; case 4: for (k = n-1 ; k >= 0 ; k--) { x [0] = X [4*k ] ; x [1] = X [4*k + 1] ; x [2] = X [4*k + 2] ; x [3] = X [4*k + 3] ; GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; for (p = 0 ; p < len ; p++) { i = Li [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (lik, Lx [p]) ; } else #endif { lik = Lx [p] ; } MULT_SUB (x [0], lik, X [4*i]) ; MULT_SUB (x [1], lik, X [4*i + 1]) ; MULT_SUB (x [2], lik, X [4*i + 2]) ; MULT_SUB (x [3], lik, X [4*i + 3]) ; } X [4*k ] = x [0] ; X [4*k + 1] = x [1] ; X [4*k + 2] = x [2] ; X [4*k + 3] = x [3] ; } break ; } }
Int KLU_tsolve ( /* inputs, not modified */ KLU_symbolic<Entry, Int> *Symbolic, KLU_numeric<Entry, Int> *Numeric, Int d, /* leading dimension of B */ Int nrhs, /* number of right-hand-sides */ /* right-hand-side on input, overwritten with solution to Ax=b on output */ double B [ ], /* size n*nrhs, in column-oriented form, with * leading dimension d. */ #ifdef COMPLEX Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for * array transpose solve. Used for the complex * case only. */ #endif /* --------------- */ KLU_common<Entry, Int> *Common ) { Entry x [4], offik, s ; double rs, *Rs ; Entry *Offx, *X, *Bz, *Udiag ; Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ; Unit **LUbx ; Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ; /* ---------------------------------------------------------------------- */ /* check inputs */ /* ---------------------------------------------------------------------- */ if (Common == NULL) { return (FALSE) ; } if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 || B == NULL) { Common->status = KLU_INVALID ; return (FALSE) ; } Common->status = KLU_OK ; /* ---------------------------------------------------------------------- */ /* get the contents of the Symbolic object */ /* ---------------------------------------------------------------------- */ Bz = (Entry *) B ; n = Symbolic->n ; nblocks = Symbolic->nblocks ; Q = Symbolic->Q ; R = Symbolic->R ; /* ---------------------------------------------------------------------- */ /* get the contents of the Numeric object */ /* ---------------------------------------------------------------------- */ ASSERT (nblocks == Numeric->nblocks) ; Pnum = Numeric->Pnum ; Offp = Numeric->Offp ; Offi = Numeric->Offi ; Offx = (Entry *) Numeric->Offx ; Lip = Numeric->Lip ; Llen = Numeric->Llen ; Uip = Numeric->Uip ; Ulen = Numeric->Ulen ; LUbx = (Unit **) Numeric->LUbx ; Udiag = (Entry *) Numeric->Udiag ; Rs = Numeric->Rs ; X = (Entry *) Numeric->Xwork ; ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; /* ---------------------------------------------------------------------- */ /* solve in chunks of 4 columns at a time */ /* ---------------------------------------------------------------------- */ for (chunk = 0 ; chunk < nrhs ; chunk += 4) { /* ------------------------------------------------------------------ */ /* get the size of the current chunk */ /* ------------------------------------------------------------------ */ nr = MIN (nrhs - chunk, 4) ; /* ------------------------------------------------------------------ */ /* permute the right hand side, X = Q'*B */ /* ------------------------------------------------------------------ */ switch (nr) { case 1: for (k = 0 ; k < n ; k++) { X [k] = Bz [Q [k]] ; } break ; case 2: for (k = 0 ; k < n ; k++) { i = Q [k] ; X [2*k ] = Bz [i ] ; X [2*k + 1] = Bz [i + d ] ; } break ; case 3: for (k = 0 ; k < n ; k++) { i = Q [k] ; X [3*k ] = Bz [i ] ; X [3*k + 1] = Bz [i + d ] ; X [3*k + 2] = Bz [i + d*2] ; } break ; case 4: for (k = 0 ; k < n ; k++) { i = Q [k] ; X [4*k ] = Bz [i ] ; X [4*k + 1] = Bz [i + d ] ; X [4*k + 2] = Bz [i + d*2] ; X [4*k + 3] = Bz [i + d*3] ; } break ; } /* ------------------------------------------------------------------ */ /* solve X = (L*U + Off)'\X */ /* ------------------------------------------------------------------ */ for (block = 0 ; block < nblocks ; block++) { /* -------------------------------------------------------------- */ /* the block of size nk is from rows/columns k1 to k2-1 */ /* -------------------------------------------------------------- */ k1 = R [block] ; k2 = R [block+1] ; nk = k2 - k1 ; PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ; /* -------------------------------------------------------------- */ /* block back-substitution for the off-diagonal-block entries */ /* -------------------------------------------------------------- */ if (block > 0) { switch (nr) { case 1: for (k = k1 ; k < k2 ; k++) { pend = Offp [k+1] ; for (p = Offp [k] ; p < pend ; p++) { #ifdef COMPLEX if (conj_solve) { MULT_SUB_CONJ (X [k], X [Offi [p]], Offx [p]) ; } else #endif { MULT_SUB (X [k], Offx [p], X [Offi [p]]) ; } } } break ; case 2: for (k = k1 ; k < k2 ; k++) { pend = Offp [k+1] ; x [0] = X [2*k ] ; x [1] = X [2*k + 1] ; for (p = Offp [k] ; p < pend ; p++) { i = Offi [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (offik, Offx [p]) ; } else #endif { offik = Offx [p] ; } MULT_SUB (x [0], offik, X [2*i]) ; MULT_SUB (x [1], offik, X [2*i + 1]) ; } X [2*k ] = x [0] ; X [2*k + 1] = x [1] ; } break ; case 3: for (k = k1 ; k < k2 ; k++) { pend = Offp [k+1] ; x [0] = X [3*k ] ; x [1] = X [3*k + 1] ; x [2] = X [3*k + 2] ; for (p = Offp [k] ; p < pend ; p++) { i = Offi [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (offik, Offx [p]) ; } else #endif { offik = Offx [p] ; } MULT_SUB (x [0], offik, X [3*i]) ; MULT_SUB (x [1], offik, X [3*i + 1]) ; MULT_SUB (x [2], offik, X [3*i + 2]) ; } X [3*k ] = x [0] ; X [3*k + 1] = x [1] ; X [3*k + 2] = x [2] ; } break ; case 4: for (k = k1 ; k < k2 ; k++) { pend = Offp [k+1] ; x [0] = X [4*k ] ; x [1] = X [4*k + 1] ; x [2] = X [4*k + 2] ; x [3] = X [4*k + 3] ; for (p = Offp [k] ; p < pend ; p++) { i = Offi [p] ; #ifdef COMPLEX if (conj_solve) { CONJ(offik, Offx [p]) ; } else #endif { offik = Offx [p] ; } MULT_SUB (x [0], offik, X [4*i]) ; MULT_SUB (x [1], offik, X [4*i + 1]) ; MULT_SUB (x [2], offik, X [4*i + 2]) ; MULT_SUB (x [3], offik, X [4*i + 3]) ; } X [4*k ] = x [0] ; X [4*k + 1] = x [1] ; X [4*k + 2] = x [2] ; X [4*k + 3] = x [3] ; } break ; } } /* -------------------------------------------------------------- */ /* solve the block system */ /* -------------------------------------------------------------- */ if (nk == 1) { #ifdef COMPLEX if (conj_solve) { CONJ (s, Udiag [k1]) ; } else #endif { s = Udiag [k1] ; } switch (nr) { case 1: DIV (X [k1], X [k1], s) ; break ; case 2: DIV (X [2*k1], X [2*k1], s) ; DIV (X [2*k1 + 1], X [2*k1 + 1], s) ; break ; case 3: DIV (X [3*k1], X [3*k1], s) ; DIV (X [3*k1 + 1], X [3*k1 + 1], s) ; DIV (X [3*k1 + 2], X [3*k1 + 2], s) ; break ; case 4: DIV (X [4*k1], X [4*k1], s) ; DIV (X [4*k1 + 1], X [4*k1 + 1], s) ; DIV (X [4*k1 + 2], X [4*k1 + 2], s) ; DIV (X [4*k1 + 3], X [4*k1 + 3], s) ; break ; } } else { KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block], Udiag + k1, nr, #ifdef COMPLEX conj_solve, #endif X + nr*k1) ; KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr, #ifdef COMPLEX conj_solve, #endif X + nr*k1) ; } } /* ------------------------------------------------------------------ */ /* scale and permute the result, Bz = P'(R\X) */ /* ------------------------------------------------------------------ */ if (Rs == NULL) { /* no scaling */ switch (nr) { case 1: for (k = 0 ; k < n ; k++) { Bz [Pnum [k]] = X [k] ; } break ; case 2: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; Bz [i ] = X [2*k ] ; Bz [i + d ] = X [2*k + 1] ; } break ; case 3: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; Bz [i ] = X [3*k ] ; Bz [i + d ] = X [3*k + 1] ; Bz [i + d*2] = X [3*k + 2] ; } break ; case 4: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; Bz [i ] = X [4*k ] ; Bz [i + d ] = X [4*k + 1] ; Bz [i + d*2] = X [4*k + 2] ; Bz [i + d*3] = X [4*k + 3] ; } break ; } } else { switch (nr) { case 1: for (k = 0 ; k < n ; k++) { SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ; } break ; case 2: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; rs = Rs [k] ; SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ; SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ; } break ; case 3: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; rs = Rs [k] ; SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ; SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ; SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ; } break ; case 4: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; rs = Rs [k] ; SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ; SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ; SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ; SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ; } break ; } } /* ------------------------------------------------------------------ */ /* go to the next chunk of B */ /* ------------------------------------------------------------------ */ Bz += d*4 ; } return (TRUE) ; }
int main() { ptridiag T, Tcopy; pamatrix A, Acopy, Q, U, Vt; pavector work; prealavector sigma, lambda; real error; uint rows, cols, mid; uint i, n, iter; int info; /* ------------------------------------------------------------ * Testing symmetric tridiagonal eigenvalue solver * ------------------------------------------------------------ */ n = 6; /* Testing symmetric tridiagonal eigenvalue solver */ (void) printf("==================================================\n" "Testing symmetric tridiagonal eigenvalue solver\n" "==================================================\n" "Setting up %u x %u tridiagonal matrix\n", n, n); T = new_tridiag(n); for (i = 0; i < n; i++) T->d[i] = 2.0; for (i = 0; i < n - 1; i++) T->l[i] = T->u[i] = -1.0; Tcopy = new_tridiag(n); copy_tridiag(T, Tcopy); A = new_amatrix(n, n); clear_amatrix(A); for (i = 0; i < n; i++) A->a[i + i * A->ld] = T->d[i]; for (i = 0; i < n - 1; i++) { A->a[(i + 1) + i * A->ld] = T->l[i]; A->a[i + (i + 1) * A->ld] = T->u[i]; } Acopy = clone_amatrix(A); Q = new_identity_amatrix(n, n); U = new_amatrix(n, n); (void) printf("Performing self-made implicit QR iteration\n"); iter = sb_muleig_tridiag(T, Q, 8 * n); (void) printf(" %u iterations\n", iter); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, Q); (void) printf(" Orthogonality Q %g, %sokay\n", error, (error < tolerance ? "" : " NOT ")); if (error >= tolerance) problems++; copy_amatrix(false, Q, U); diageval_tridiag_amatrix(1.0, true, T, true, U); addmul_amatrix(-1.0, false, U, true, Q, A); error = normfrob_amatrix(A); (void) printf(" Accuracy %g. %sokay\n", error, (error < tolerance ? "" : " NOT ")); if (error >= tolerance) problems++; (void) printf("Performing default implicit QR iteration\n"); identity_amatrix(Q); i = muleig_tridiag(Tcopy, Q); if (i == 0) (void) printf(" Success\n"); else { (void) printf(" Failure\n"); problems++; } (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, Q); (void) printf(" Orthogonality Q %g, %sokay\n", error, (error < tolerance ? "" : " NOT ")); if (error >= tolerance) problems++; copy_amatrix(false, Q, U); diageval_tridiag_amatrix(1.0, true, Tcopy, true, U); addmul_amatrix(-1.0, false, U, true, Q, Acopy); error = normfrob_amatrix(Acopy); (void) printf(" Accuracy %g. %sokay\n", error, (error < tolerance ? "" : " NOT ")); if (error >= tolerance) problems++; del_amatrix(U); del_amatrix(Q); del_amatrix(Acopy); del_amatrix(A); del_tridiag(Tcopy); del_tridiag(T); (void) printf("--------------------------------------------------\n" "Setting up random %u x %u tridiagonal matrix\n", n, n); T = new_tridiag(n); for (i = 0; i < n; i++) T->d[i] = 2.0 * rand() / RAND_MAX - 1.0; for (i = 0; i < n - 1; i++) { T->l[i] = 2.0 * rand() / RAND_MAX - 1.0; T->u[i] = CONJ(T->l[i]); } A = new_amatrix(n, n); clear_amatrix(A); for (i = 0; i < n; i++) A->a[i + i * A->ld] = T->d[i]; for (i = 0; i < n - 1; i++) { A->a[(i + 1) + i * A->ld] = T->l[i]; A->a[i + (i + 1) * A->ld] = T->u[i]; } Tcopy = new_tridiag(n); copy_tridiag(T, Tcopy); Acopy = clone_amatrix(A); Q = new_identity_amatrix(n, n); U = new_amatrix(n, n); (void) printf("Performing implicit QR iteration\n"); iter = sb_muleig_tridiag(T, Q, 8 * n); (void) printf(" %u iterations\n", iter); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, Q); (void) printf(" Orthogonality Q %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; copy_amatrix(false, Q, U); diageval_tridiag_amatrix(1.0, true, T, true, U); addmul_amatrix(-1.0, false, U, true, Q, A); error = normfrob_amatrix(A); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : " NOT ")); if (error >= tolerance) problems++; (void) printf("Using default eigenvalue solver\n"); identity_amatrix(Q); muleig_tridiag(Tcopy, Q); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, Q); (void) printf(" Orthogonality Q %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; copy_amatrix(false, Q, U); diageval_tridiag_amatrix(1.0, true, Tcopy, true, U); addmul_amatrix(-1.0, false, U, true, Q, Acopy); error = normfrob_amatrix(Acopy); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : " NOT ")); if (error >= tolerance) problems++; del_amatrix(U); del_amatrix(Q); del_amatrix(Acopy); del_amatrix(A); del_tridiag(Tcopy); del_tridiag(T); /* ------------------------------------------------------------ * Testing self-adjoint matrix eigenvalue solver * ------------------------------------------------------------ */ (void) printf("==================================================\n" "Testing self-adjoint matrix eigenvalue solver\n" "==================================================\n" "Setting up random %u x %u self-adjoint matrix\n", n, n); A = new_amatrix(n, n); random_selfadjoint_amatrix(A); Acopy = new_amatrix(n, n); copy_amatrix(false, A, Acopy); lambda = new_realavector(n); Q = new_identity_amatrix(n, n); (void) printf("Performing implicit QR iteration\n"); iter = sb_eig_amatrix(A, lambda, Q, 8 * n); (void) printf(" %u iterations\n", iter); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, Q); (void) printf(" Orthogonality Q %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; U = new_amatrix(n, n); copy_amatrix(false, Q, U); copy_amatrix(false, Acopy, A); diageval_realavector_amatrix(1.0, true, lambda, true, U); addmul_amatrix(-1.0, false, U, true, Q, A); error = normfrob_amatrix(A); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; (void) printf("Using default eigenvalue solver\n"); copy_amatrix(false, Acopy, A); info = eig_amatrix(A, lambda, Q); assert(info == 0); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, Q); (void) printf(" Orthogonality Q %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; copy_amatrix(false, Q, U); diageval_realavector_amatrix(1.0, true, lambda, true, U); addmul_amatrix(-1.0, false, U, true, Q, Acopy); error = normfrob_amatrix(Acopy); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; del_amatrix(U); del_amatrix(Q); del_realavector(lambda); del_amatrix(Acopy); del_amatrix(A); /* ------------------------------------------------------------ * Testing bidiagonal SVD solver * ------------------------------------------------------------ */ (void) printf("==================================================\n" "Testing bidiagonal SVD solver\n" "==================================================\n" "Setting up bidiagonal %u x %u matrix\n", n, n); T = new_tridiag(n); for (i = 0; i < n; i++) T->d[i] = i + 1.0; for (i = 0; i < n - 1; i++) { T->l[i] = 1.0; T->u[i] = 0.0; } A = new_amatrix(n, n); clear_amatrix(A); for (i = 0; i < n; i++) A->a[i + i * A->ld] = T->d[i]; for (i = 0; i < n - 1; i++) A->a[(i + 1) + i * A->ld] = T->l[i]; Tcopy = new_tridiag(n); copy_tridiag(T, Tcopy); Acopy = new_amatrix(n, n); copy_amatrix(false, A, Acopy); U = new_identity_amatrix(n, n); Vt = new_identity_amatrix(n, n); (void) printf("Performing self-made implicit SVD iteration\n"); iter = sb_mulsvd_tridiag(T, U, Vt, 8 * n); (void) printf(" %u iterations\n", iter); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality Vt %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; diageval_tridiag_amatrix(1.0, true, T, true, U); addmul_amatrix(-1.0, false, U, false, Vt, A); error = normfrob_amatrix(A); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; (void) printf("Using default SVD solver\n"); copy_tridiag(Tcopy, T); identity_amatrix(U); identity_amatrix(Vt); mulsvd_tridiag(T, U, Vt); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality Vt %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; copy_amatrix(false, Acopy, A); diageval_tridiag_amatrix(1.0, true, T, true, U); addmul_amatrix(-1.0, false, U, false, Vt, A); error = normfrob_amatrix(A); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; del_amatrix(Vt); del_amatrix(U); del_amatrix(Acopy); del_tridiag(Tcopy); del_amatrix(A); del_tridiag(T); (void) printf("--------------------------------------------------\n" "Setting up random bidiagonal %u x %u matrix\n", n, n); T = new_tridiag(n); for (i = 0; i < n; i++) { T->d[i] = 2.0 * rand() / RAND_MAX - 1.0; } for (i = 0; i < n - 1; i++) { T->l[i] = 2.0 * rand() / RAND_MAX - 1.0; T->u[i] = 0.0; } A = new_amatrix(n, n); clear_amatrix(A); for (i = 0; i < n; i++) A->a[i + i * A->ld] = T->d[i]; for (i = 0; i < n - 1; i++) A->a[(i + 1) + i * A->ld] = T->l[i]; Tcopy = new_tridiag(n); copy_tridiag(T, Tcopy); Acopy = clone_amatrix(A); U = new_identity_amatrix(n, n); Vt = new_identity_amatrix(n, n); (void) printf("Performing implicit SVD iteration\n"); iter = sb_mulsvd_tridiag(T, U, Vt, 8 * n); (void) printf(" %u iterations\n", iter); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality Vt %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; diageval_tridiag_amatrix(1.0, true, T, true, U); addmul_amatrix(-1.0, false, U, false, Vt, A); error = normfrob_amatrix(A); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; (void) printf("Using default SVD solver\n"); copy_tridiag(Tcopy, T); copy_amatrix(false, Acopy, A); identity_amatrix(U); identity_amatrix(Vt); mulsvd_tridiag(T, U, Vt); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality Vt %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; diageval_tridiag_amatrix(1.0, true, T, true, U); addmul_amatrix(-1.0, false, U, false, Vt, A); error = normfrob_amatrix(A); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; del_amatrix(Vt); del_amatrix(U); del_amatrix(Acopy); del_tridiag(Tcopy); del_amatrix(A); del_tridiag(T); /* ------------------------------------------------------------ * Testing Golub-Kahan bidiagonalization * ------------------------------------------------------------ */ rows = 10; cols = 7; mid = UINT_MIN(rows, cols); (void) printf("==================================================\n" "Testing Golub-Kahan bidiagonalization\n" "==================================================\n" "Setting up random %u x %u matrix\n", rows, cols); A = new_amatrix(rows, cols); random_amatrix(A); Acopy = new_amatrix(rows, cols); copy_amatrix(false, A, Acopy); U = new_amatrix(rows, mid); Vt = new_amatrix(mid, cols); T = new_tridiag(mid); (void) printf("Bidiagonalizing\n"); bidiagonalize_amatrix(A, T, U, Vt); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality Vt %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; lowereval_tridiag_amatrix(1.0, true, T, true, U); addmul_amatrix(-1.0, false, U, false, Vt, Acopy); error = normfrob_amatrix(Acopy); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; del_tridiag(T); del_amatrix(Vt); del_amatrix(U); del_amatrix(Acopy); del_amatrix(A); rows = 8; cols = 15; mid = UINT_MIN(rows, cols); (void) printf("--------------------------------------------------\n" "Setting up %u x %u matrix\n", rows, cols); A = new_amatrix(rows, cols); random_amatrix(A); Acopy = new_amatrix(rows, cols); copy_amatrix(false, A, Acopy); U = new_amatrix(rows, mid); Vt = new_amatrix(mid, cols); T = new_tridiag(mid); (void) printf("Bidiagonalizing\n"); bidiagonalize_amatrix(A, T, U, Vt); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality Vt %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; lowereval_tridiag_amatrix(1.0, true, T, true, U); addmul_amatrix(-1.0, false, U, false, Vt, Acopy); error = normfrob_amatrix(Acopy); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; del_tridiag(T); del_amatrix(Vt); del_amatrix(U); del_amatrix(Acopy); del_amatrix(A); /* ------------------------------------------------------------ * Testing general SVD solver * ------------------------------------------------------------ */ (void) printf("==================================================\n" "Testing general SVD solver\n" "==================================================\n" "Setting up 3 x 4 matrix\n"); A = new_amatrix(3, 4); setentry_amatrix(A, 0, 0, 1.0); setentry_amatrix(A, 1, 0, 2.0); setentry_amatrix(A, 2, 0, 3.0); setentry_amatrix(A, 0, 1, 2.0); setentry_amatrix(A, 1, 1, 4.0); setentry_amatrix(A, 2, 1, 6.0); setentry_amatrix(A, 0, 2, 2.0); setentry_amatrix(A, 1, 2, 5.0); setentry_amatrix(A, 2, 2, 8.0); setentry_amatrix(A, 0, 3, 1.0); setentry_amatrix(A, 1, 3, 4.0); setentry_amatrix(A, 2, 3, 7.0); Acopy = new_amatrix(A->rows, A->cols); copy_amatrix(false, A, Acopy); U = new_identity_amatrix(3, 3); Vt = new_identity_amatrix(3, 4); sigma = new_realavector(3); work = new_avector(3 * 3); (void) printf("Running self-made SVD solver\n"); iter = sb_svd_amatrix(A, sigma, U, Vt, 24); (void) printf(" %u iterations\n", iter); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality Vt %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; diageval_realavector_amatrix(1.0, true, sigma, true, U); addmul_amatrix(-1.0, false, U, false, Vt, Acopy); error = normfrob_amatrix(Acopy); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; del_avector(work); del_realavector(sigma); del_amatrix(Vt); del_amatrix(U); del_amatrix(Acopy); del_amatrix(A); (void) printf("--------------------------------------------------\n" "Setting up 4 x 3 matrix\n"); A = new_amatrix(4, 3); setentry_amatrix(A, 0, 0, 1.0); setentry_amatrix(A, 0, 1, 2.0); setentry_amatrix(A, 0, 2, 3.0); setentry_amatrix(A, 1, 0, 2.0); setentry_amatrix(A, 1, 1, 4.0); setentry_amatrix(A, 1, 2, 6.0); setentry_amatrix(A, 2, 0, 2.0); setentry_amatrix(A, 2, 1, 5.0); setentry_amatrix(A, 2, 2, 8.0); setentry_amatrix(A, 3, 0, 1.0); setentry_amatrix(A, 3, 1, 4.0); setentry_amatrix(A, 3, 2, 7.0); Acopy = new_amatrix(A->rows, A->cols); copy_amatrix(false, A, Acopy); U = new_amatrix(4, 3); Vt = new_amatrix(3, 3); sigma = new_realavector(3); work = new_avector(3 * 3); (void) printf("Running self-made SVD solver\n"); iter = sb_svd_amatrix(A, sigma, U, Vt, 24); (void) printf(" %u iterations\n", iter); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality V %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; diageval_realavector_amatrix(1.0, true, sigma, true, U); addmul_amatrix(-1.0, false, U, false, Vt, Acopy); error = normfrob_amatrix(Acopy); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; del_avector(work); del_realavector(sigma); del_amatrix(Vt); del_amatrix(U); del_amatrix(Acopy); del_amatrix(A); (void) printf("--------------------------------------------------\n" "Setting up 4 x 3 matrix\n"); A = new_amatrix(4, 3); setentry_amatrix(A, 0, 0, 1.0); setentry_amatrix(A, 0, 1, 2.0); setentry_amatrix(A, 0, 2, 3.0); setentry_amatrix(A, 1, 0, 2.0); setentry_amatrix(A, 1, 1, 4.0); setentry_amatrix(A, 1, 2, 6.0); setentry_amatrix(A, 2, 0, 2.0); setentry_amatrix(A, 2, 1, 5.0); setentry_amatrix(A, 2, 2, 8.0); setentry_amatrix(A, 3, 0, 1.0); setentry_amatrix(A, 3, 1, 4.0); setentry_amatrix(A, 3, 2, 7.0); Acopy = clone_amatrix(A); U = new_amatrix(4, 3); Vt = new_amatrix(3, 3); sigma = new_realavector(3); work = new_avector(3 * 3); (void) printf("Running self-made SVD solver\n"); iter = sb_svd_amatrix(A, sigma, U, Vt, 24); (void) printf(" %u iterations\n", iter); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality Vt %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; diageval_realavector_amatrix(1.0, true, sigma, true, U); addmul_amatrix(-1.0, false, U, false, Vt, Acopy); error = normfrob_amatrix(Acopy); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; del_avector(work); del_realavector(sigma); del_amatrix(Vt); del_amatrix(U); del_amatrix(Acopy); del_amatrix(A); rows = 9; cols = 7; mid = UINT_MIN(rows, cols); (void) printf("--------------------------------------------------\n" "Setting up random %u x %u matrix\n", rows, cols); A = new_amatrix(rows, cols); random_amatrix(A); Acopy = new_amatrix(A->rows, A->cols); copy_amatrix(false, A, Acopy); U = new_amatrix(rows, mid); Vt = new_amatrix(mid, cols); sigma = new_realavector(mid); work = new_avector(3 * mid); (void) printf("Running self-made SVD solver\n"); iter = sb_svd_amatrix(A, sigma, U, Vt, 10 * mid); (void) printf(" %u iterations\n", iter); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality Vt %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; diageval_realavector_amatrix(1.0, true, sigma, true, U); addmul_amatrix(-1.0, false, U, false, Vt, Acopy); error = normfrob_amatrix(Acopy); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; del_avector(work); del_realavector(sigma); del_amatrix(Vt); del_amatrix(U); del_amatrix(Acopy); del_amatrix(A); rows = 10; cols = 6; mid = UINT_MIN(rows, cols); (void) printf("--------------------------------------------------\n" "Setting up random %u x %u matrix\n", rows, cols); A = new_amatrix(rows, cols); random_amatrix(A); Acopy = new_amatrix(A->rows, A->cols); copy_amatrix(false, A, Acopy); U = new_amatrix(rows, mid); Vt = new_amatrix(mid, cols); sigma = new_realavector(mid); work = new_avector(3 * mid); (void) printf("Running self-made SVD solver\n"); iter = sb_svd_amatrix(A, sigma, U, Vt, 10 * mid); (void) printf(" %u iterations\n", iter); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality Vt %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; copy_amatrix(false, Acopy, A); diageval_realavector_amatrix(1.0, true, sigma, true, U); addmul_amatrix(-1.0, false, U, false, Vt, A); error = normfrob_amatrix(A); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; (void) printf("Running default SVD solver\n"); copy_amatrix(false, Acopy, A); svd_amatrix(A, sigma, U, Vt); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality Vt %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; copy_amatrix(false, Acopy, A); diageval_realavector_amatrix(1.0, true, sigma, true, U); addmul_amatrix(-1.0, false, U, false, Vt, A); error = normfrob_amatrix(A); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; del_avector(work); del_realavector(sigma); del_amatrix(Vt); del_amatrix(U); del_amatrix(Acopy); del_amatrix(A); rows = 7; cols = 13; mid = UINT_MIN(rows, cols); (void) printf("--------------------------------------------------\n" "Setting up random %u x %u matrix\n", rows, cols); A = new_amatrix(rows, cols); random_amatrix(A); Acopy = new_amatrix(A->rows, A->cols); copy_amatrix(false, A, Acopy); U = new_amatrix(rows, mid); Vt = new_amatrix(mid, cols); sigma = new_realavector(mid); work = new_avector(3 * mid); (void) printf("Running self-made SVD solver\n"); iter = sb_svd_amatrix(A, sigma, U, Vt, 10 * mid); (void) printf(" %u iterations\n", iter); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality Vt %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; copy_amatrix(false, Acopy, A); diageval_realavector_amatrix(1.0, true, sigma, true, U); addmul_amatrix(-1.0, false, U, false, Vt, A); error = normfrob_amatrix(A); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; (void) printf("Running default SVD solver\n"); copy_amatrix(false, Acopy, A); svd_amatrix(A, sigma, U, Vt); (void) printf("Checking accuracy\n"); error = check_ortho_amatrix(false, U); (void) printf(" Orthogonality U %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; error = check_ortho_amatrix(true, Vt); (void) printf(" Orthogonality Vt %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; copy_amatrix(false, Acopy, A); diageval_realavector_amatrix(1.0, true, sigma, true, U); addmul_amatrix(-1.0, false, U, false, Vt, A); error = normfrob_amatrix(A); (void) printf(" Accuracy %g, %sokay\n", error, (error < tolerance ? "" : "NOT ")); if (error >= tolerance) problems++; del_avector(work); del_realavector(sigma); del_amatrix(Vt); del_amatrix(U); del_amatrix(Acopy); del_amatrix(A); printf("----------------------------------------\n" " %u matrices and\n" " %u vectors still active\n" " %u errors found\n", getactives_amatrix(), getactives_avector(), problems); return problems; }
/* Executa o programa */ int Mepa::exec(){ int op; if(detalha) { p->imprime(); /* Imprime o programa */ } op = p->next(); while( op != PARA ) { if(detalha) { debug(); } switch(op) { /* Funcoes MEPA */ case 0: { AMEM(); break; } case 1: { ARMI(); break; } case 2: { ARMP(); break; } case 3: { ARMZ(); break; } case 4: { CHPP(); break; } case 5: { CHPR(); break; } case 6: { CMAF(); break; } case 7: { CMAG(); break; } case 8: { CMDF(); break; } case 9: { CMDG(); break; } case 10: { CMEF(); break; } case 11: { CMEG(); break; } case 12: { CMIF(); break; } case 13: { CMIG(); break; } case 14: { CMMA(); break; } case 15: { CMMF(); break; } case 16: { CMME(); break; } case 17: { CMNF(); break; } case 18: { CONJ(); break; } case 19: { CRCT(); break; } case 20: { CRCF(); break; } case 21: { CREG(); break; } case 22: { CREN(); break; } case 23: { CRVI(); break; } case 24: { CRVL(); break; } case 25: { CRVP(); break; } case 26: { DISJ(); break; } case 27: { DIVF(); break; } case 28: { DIVI(); break; } case 29: { DMEM(); break; } case 30: { DSVF(); break; } case 31: { DSVS(); break; } case 32: { ENTR(); break; } case 33: { IMPC(); break; } case 34: { IMPF(); break; } case 35: { IMPR(); break; } case 36: { INPP(); break; } case 37: { INVF(); break; } case 38: { INVR(); break; } case 39: { LEIT(); break; } case 40: { LEIF(); break; } case 41: { MULF(); break; } case 42: { MULT(); break; } case 43: { NEGA(); break; } case 44: { RTPR(); break; } case 45: { SOMA(); break; } case 46: { SOMF(); break; } case 47: { SUBT(); break; } case 48: { SUBF(); break; } default:{ cerr << "O programa executou uma operacao invalida." << endl; cerr << "i = " << p->getI() << endl; abort(); } } op = p->next(); } return 0; }