/** * \brief Cholesky factorization * * Compute the Cholesky factorization of a symmetric positive definite N-by-N matrix A. * * \param[in,out] A * On entry, the symmetric matrix A. If *ARMAS_UPPER* is set the upper triangular part * of A contains the upper triangular part of the matrix A, and strictly * lower part A is not referenced. If *ARMAS_LOWER* is set the lower triangular part * of a contains the lower triangular part of the matrix A. Likewise, the * strictly upper part of A is not referenced. On exit, factor U or L from the * Cholesky factorization \f$ A = U^T U \f$ or \f$ A = L L^T \f$ * \param[in] W * Workspace for pivoting factorization. * \param[out] P * Optional pivot array. If non null then pivoting factorization is computed. * \param[in] flags * The matrix structure indicator, *ARMAS_UPPER* for upper tridiagonal and * *ARMAS_LOWER* for lower tridiagonal matrix. * \param[in,out] conf * Optional blocking configuration. If not provided default blocking configuration * will be used. * \retval 0 Success * \retval -1 Error, `conf.error` holds error code * * Compatible with lapack.DPOTRF * \ingroup lapack */ int armas_x_cholfactor(armas_x_dense_t *A, armas_pivot_t *P, int flags, armas_conf_t *conf) { int err = 0; armas_wbuf_t wb = ARMAS_WBNULL; if (!conf) conf = armas_conf_default(); if (!A) { conf->error = ARMAS_EINVAL; return -1; } if (P != ARMAS_NOPIVOT) { if (!armas_walloc(&wb, A->cols*sizeof(DTYPE))) { conf->error = ARMAS_EMEMORY; return -1; } err = armas_x_cholfactor_w(A, P, flags, &wb, conf); armas_wrelease(&wb); return err; } return armas_x_cholfactor_w(A, ARMAS_NOPIVOT, flags, ARMAS_NOWORK, conf); }
void __trmvf(char *uplo, char *trans, char *diag, int *n, DTYPE *A, int *lda, DTYPE *X, int *incx) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t a, x; int flags = 0; switch (toupper(*uplo)) { case 'L': flags |= ARMAS_LOWER; break; case 'U': default: flags |= ARMAS_UPPER; break; } if (toupper(*trans) == 'T') flags |= ARMAS_TRANS; if (toupper(*diag) == 'U') flags |= ARMAS_UNIT; armas_x_make(&a, *n, *n, *lda, A); if (*incx == 1) { armas_x_make(&x, *n, 1, *n, X); } else { armas_x_make(&x, 1, *n, *incx, X); } armas_x_mvmult_trm(&x, __ONE, &a, flags, conf); }
void __cblas_symv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, int N, DTYPE alpha, DTYPE *A, int lda, DTYPE *X, int incx, DTYPE beta, DTYPE *Y, int incy) { armas_x_dense_t Aa, x, y; armas_conf_t conf = *armas_conf_default(); int flags; switch (order) { case CblasRowMajor: flags = uplo == CblasUpper ? ARMAS_LOWER : ARMAS_UPPER; break; case CblasColMajor: default: flags = uplo == CblasUpper ? ARMAS_UPPER : ARMAS_LOWER; break; } armas_x_make(&Aa, N, N, lda, A); if (incx == 1) { armas_x_make(&x, N, 1, N, X); } else { armas_x_make(&x, 1, N, incx, X); } if (incy == 1) { armas_x_make(&y, N, 1, N, Y); } else { armas_x_make(&y, 1, N, incy, Y); } armas_x_mvmult_sym(beta, &y, alpha, &Aa, &x, flags, &conf); }
void __cblas_ger(const enum CBLAS_ORDER order, const int M, const int N, const DTYPE alpha, DTYPE *X, const int incx, DTYPE *Y, const int incy, DTYPE *A, const int lda) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t y, a, x; if (incy == 1) { // column vector armas_x_make(&y, N, 1, N, Y); } else { // row vector armas_x_make(&y, 1, N, incy, Y); } if (incx == 1) { armas_x_make(&x, M, 1, M, X); } else { armas_x_make(&x, 1, M, incx, X); } if (order == CblasRowMajor) { armas_x_make(&a, N, M, lda, A); armas_x_mvupdate(&a, alpha, &y, &x, conf); } else { armas_x_make(&a, M, N, lda, A); armas_x_mvupdate(&a, alpha, &x, &y, conf); } }
/** * @brief Generate orthogonal matrix Q for tridiagonally reduced matrix. * * @param A [in,out] * On entry tridiagonal reduction as returned by trdreduce(). On exit * the orthogonal matrix Q. * @param tau [in] * Scalar coefficients of the elementary reflectors. * @param K [in] * Number of elementary reflector that define the Q matrix, n(A) > K > 0 * @param flags [in] * Flag bits, either upper tridiagonal (ARMAS_UPPER) or lower tridiagonal (ARMAS_LOWER) * @param conf [in] * Optional blocking configuration * * @return * 0 on success, -1 on failure and sets conf.error value. */ int armas_x_trdbuild(armas_x_dense_t *A, const armas_x_dense_t *tau, int K, int flags, armas_conf_t *conf) { if (!conf) conf = armas_conf_default(); armas_wbuf_t *wbs, wb = ARMAS_WBNULL; if (armas_x_trdbuild_w(A, tau, K, flags, &wb, conf) < 0) return -1; wbs = &wb; if (wb.bytes > 0) { if (!armas_walloc(&wb, wb.bytes)) { conf->error = ARMAS_EMEMORY; return -1; } } else wbs = ARMAS_NOWORK; int stat = armas_x_trdbuild_w(A, tau, K, flags, wbs, conf); armas_wrelease(&wb); return stat; }
/** * \brief Compute RQ factorization of a M-by-N matrix A * * \param[in,out] A * On entry, the M-by-N matrix A, M <= N. On exit, upper triangular matrix R * and the orthogonal matrix Q as product of elementary reflectors. * * \param[out] tau * On exit, the scalar factors of the elemenentary reflectors. * * \param[out] W * Workspace, M-by-nb matrix used for work space in blocked invocations. * * \param[in,out] conf * The blocking configuration. If nil then default blocking configuration * is used. Member conf.lb defines blocking size of blocked algorithms. * If it is zero then unblocked algorithm is used. * * \retval 0 Success * \retval -1 Error. * * #### Additional information * * Ortogonal matrix Q is product of elementary reflectors H(k) * * \f$ Q = H_0 H-1,...,H_{K-1} \f$ , where \f$ K = \min M N \f$ * * Elementary reflector H(k) is stored on first N-M+k elements of row k of A. * with implicit unit value on element N-M+k entry. The vector TAU holds scalar * factors of the elementary reflectors. * * Contents of matrix A after factorization is as follow: * * ( v0 v0 r r r r ) M=4, N=6 * ( v1 v1 v1 r r r ) r is element of R * ( v2 v2 v2 v2 r r ) vk is element of H(k) * ( v3 v3 v3 v3 v3 r ) * * rqfactor() is compatible with lapack.DGERQF */ int armas_x_rqfactor(armas_x_dense_t *A, armas_x_dense_t *tau, armas_conf_t *cf) { if (!cf) cf = armas_conf_default(); armas_wbuf_t *wbs, wb = ARMAS_WBNULL; if (armas_x_rqfactor_w(A, tau, &wb, cf) < 0) return -1; wbs = &wb; if (wb.bytes > 0) { if (!armas_walloc(&wb, wb.bytes)) { cf->error = ARMAS_EMEMORY; return -1; } } else wbs = ARMAS_NOWORK; int stat = armas_x_rqfactor_w(A, tau, wbs, cf); armas_wrelease(&wb); return stat; }
/** * @brief Compute QR factorization of a M-by-N matrix A = Q * R. * * Arguments: * A On entry, the M-by-N matrix A, M >= N. On exit, upper triangular matrix R * and the orthogonal matrix Q as product of elementary reflectors. * * T On exit, block reflectors * * W Workspace, N-by-lb matrix used for work space in blocked invocations. * * conf The blocking configuration. If nil then default blocking configuration * is used. Member conf.LB defines blocking size of blocked algorithms. * If it is zero then unblocked algorithm is used. * * @returns: * 0 if succesfull, -1 otherwise * * Additional information * * Ortogonal matrix Q is product of elementary reflectors H(k) * * Q = H(0)H(1),...,H(K-1), where K = min(M,N) * * Elementary reflector H(k) is stored on column k of A below the diagonal with * implicit unit value on diagonal entry. The matrix T holds Householder block * reflectors. * * Contents of matrix A after factorization is as follow: * * ( r r r r ) for M=6, N=4 * ( v1 r r r ) * ( v1 v2 r r ) * ( v1 v2 v3 r ) * ( v1 v2 v3 v4 ) * ( v1 v2 v3 v4 ) * * where r is element of R, vk is element of H(k). * * Compatible with lapack.xGEQRT */ int armas_x_qrtfactor(armas_x_dense_t *A, armas_x_dense_t *T, armas_x_dense_t *W, armas_conf_t *conf) { armas_x_dense_t sT; int wsmin, lb; if (!conf) conf = armas_conf_default(); // must have: M >= N if (A->rows < A->cols || T->cols < A->cols) { conf->error = ARMAS_ESIZE; return -1; } // set blocking factor to number of rows in T lb = T->rows; wsmin = __ws_qrtfactor(A->rows, A->cols, lb); if (! W || armas_x_size(W) < wsmin) { conf->error = ARMAS_EWORK; return -1; } if (lb == 0 || A->cols <= lb) { armas_x_submatrix(&sT, T, 0, 0, A->cols, A->cols); __unblk_qrtfactor(A, &sT, W, conf); } else { __blk_qrtfactor(A, T, W, lb, conf); } return 0; }
void __cblas_trmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans, const enum CBLAS_DIAG diag, int N, DTYPE alpha, DTYPE *A, int lda, DTYPE *X, int incx) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t Aa, x; int flags = 0; switch (order) { case CblasRowMajor: flags |= uplo == CblasUpper ? ARMAS_LOWER : ARMAS_UPPER; if (trans == CblasNoTrans) flags |= ARMAS_TRANS; if (diag == CblasUnit) flags |= ARMAS_UNIT; break; case CblasColMajor: default: flags |= uplo == CblasUpper ? ARMAS_UPPER : ARMAS_LOWER; if (trans == CblasTrans) flags |= ARMAS_TRANS; if (diag == CblasUnit) flags |= ARMAS_UNIT; break; } if (incx == 1) { armas_x_make(&x, N, 1, N, X); } else { armas_x_make(&x, 1, N, incx, X); } armas_x_make(&Aa, N, N, lda, A); armas_x_mvmult_trm(&x, alpha, &Aa, flags, conf); }
void __cblas_syrk(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans, int N, int K, DTYPE alpha, DTYPE *A, int lda, DTYPE beta, DTYPE *C, int ldc) { armas_conf_t conf = *armas_conf_default(); armas_x_dense_t Ca, Aa; int flags = 0; switch (order) { case CblasRowMajor: flags |= uplo == CblasUpper ? ARMAS_LOWER : ARMAS_UPPER; if (trans == CblasNoTrans) { flags |= ARMAS_TRANS; armas_x_make(&Aa, K, N, lda, A); } else { armas_x_make(&Aa, N, K, lda, A); } break; case CblasColMajor: default: flags |= uplo == CblasUpper ? ARMAS_UPPER : ARMAS_LOWER; if (trans == CblasTrans) { flags |= ARMAS_TRANS; armas_x_make(&Aa, K, N, lda, A); } else { armas_x_make(&Aa, N, K, lda, A); } break; } armas_x_make(&Ca, N, N, ldc, C); armas_x_update_sym(&Ca, &Aa, alpha, beta, flags, conf); }
void __trmmf(char *side, char *uplo, char *transa, char *diag,int *m, int *n, DTYPE *alpha, DTYPE *A, int *lda, DTYPE *B, int *ldb) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t a, b; int flags = 0; armas_x_make(&b, *m, *n, *ldb, B); switch (toupper(*side)) { case 'R': flags |= ARMAS_RIGHT; armas_x_make(&a, *n, *n, *lda, A); break; case 'L': default: flags |= ARMAS_LEFT; armas_x_make(&a, *m, *m, *lda, A); break; } flags |= toupper(*uplo) == 'L' ? ARMAS_LOWER : ARMAS_UPPER; if (toupper(*transa) == 'T') flags |= ARMAS_TRANS; if (toupper(*diag) == 'U') flags |= ARMAS_UNIT; armas_x_mult_trm(&b, &a, *alpha, flags, conf); }
/** * \brief Solve symmetric positive definite system of linear equations * * Solves a system of linear equations \f$ AX = B \f$ with symmetric positive * definite matrix A using the Cholesky factorization \f$ A = U^TU \f$ or \f$ A = LL^T \f$ * computed by `cholfactor()`. * * \param[in,out] B * On entry, the right hand side matrix B. On exit, the solution matrix X. * \param[in] A * The triangular factor U or L from Cholesky factorization as computed by * `cholfactor().` * \param[in] P * Optional pivot array. If non null then A is pivoted cholesky factorization. * Set to ARMAS_NOPIVOT if normal cholesky factorization used. * \param[in] flags * Indicator of which factor is stored in A. If *ARMAS_UPPER* (*ARMAS_LOWER) then upper * (lower) triangle of A is stored. * \param[in,out] conf * Optional blocking configuration. * * \retval 0 Succes * \retval -1 Error, `conf.error` holds last error code * * Compatible with lapack.DPOTRS. * \ingroup lapack */ int armas_x_cholsolve(armas_x_dense_t *B, const armas_x_dense_t *A, const armas_pivot_t *P, int flags, armas_conf_t *conf) { int ok; if (!conf) conf = armas_conf_default(); if (P != ARMAS_NOPIVOT) { return __cholsolve_pv(B, (armas_x_dense_t *)A, (armas_pivot_t *)P, flags, conf); } ok = B->rows == A->cols && A->rows == A->cols; if (!ok) { conf->error = ARMAS_ESIZE; return -1; } if (flags & ARMAS_LOWER) { // solve A*X = B; X = A.-1*B == (L*L.T).-1*B == L.-T*(L.-1*B) armas_x_solve_trm(B, __ONE, A, ARMAS_LEFT|ARMAS_LOWER, conf); armas_x_solve_trm(B, __ONE, A, ARMAS_LEFT|ARMAS_LOWER|ARMAS_TRANSA, conf); } else { // solve A*X = B; X = A.-1*B == (U.T*U).-1*B == U.-1*(U.-T*B) armas_x_solve_trm(B, __ONE, A, ARMAS_LEFT|ARMAS_UPPER|ARMAS_TRANSA, conf); armas_x_solve_trm(B, __ONE, A, ARMAS_LEFT|ARMAS_UPPER, conf); } return 0; }
void __symvf(char *uplo, int *n, DTYPE *alpha, DTYPE *A, int *lda, DTYPE *X, int *incx, DTYPE *beta, DTYPE *Y, int *incy) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t y, a, x; int flags = 0; switch (toupper(*uplo)) { case 'L': flags |= ARMAS_LOWER; break; case 'U': default: flags |= ARMAS_UPPER; break; } armas_x_make(&a, *n, *n, *lda, A); if (*incy == 1) { // column vector armas_x_make(&y, *n, 1, *n, Y); } else { // row vector armas_x_make(&y, 1, *n, *incy, Y); } if (*incx == 1) { armas_x_make(&x, *n, 1, *n, X); } else { armas_x_make(&x, 1, *n, *incx, X); } armas_x_mvmult_sym(*beta, &y, *alpha, &a, &x, flags, conf); }
/** * \brief Cholesky factorization * * Compute the Cholesky factorization of a symmetric positive definite N-by-N matrix A. * * \param[in,out] A * On entry, the symmetric matrix A. If *ARMAS_UPPER* is set the upper triangular part * of A contains the upper triangular part of the matrix A, and strictly * lower part A is not referenced. If *ARMAS_LOWER* is set the lower triangular part * of a contains the lower triangular part of the matrix A. Likewise, the * strictly upper part of A is not referenced. On exit, factor U or L from the * Cholesky factorization \f$ A = U^T U \f$ or \f$ A = L L^T \f$ * \param[out] P * Optional pivot array. If non null then pivoting factorization is computed. * Set to ARMAS_NOPIVOT if normal cholesky factorization wanted. * \param[in] flags * The matrix structure indicator, *ARMAS_UPPER* for upper tridiagonal and * *ARMAS_LOWER* for lower tridiagonal matrix. * \param[in,out] wb * Workspace for pivoting factorization. If wb.bytes is zero then work buffer size is * returned in wb.bytes. * \param[in,out] conf * Optional blocking configuration. If not provided default blocking configuration * will be used. * * \retval 0 Success; If pivoting factorized then result matrix is full rank. * \retval >0 Success with pivoting factorization, matrix rank returned. * \retval -1 Error, `conf.error` holds error code * * Pivoting factorization is computed of P is not ARMAS_NOPIVOT. Pivoting factorization * needs workspace of size N elements for blocked version. If no workspace (ARMAS_NOWORK) is provided * or it is too small then unblocked algorithm is used. * * Factorization stops when diagonal element goes small enough. * Default value for stopping criteria is \$ max |diag(A)|*N*epsilon \$ * If value of absolute stopping criteria `conf.stop` is non-zero then it is used. Otherwise * if `conf.smult` (relative stopping criterion multiplier) is non-zero then stopping criteria * is set to \$ max |diag(A)|*smult \$. * * Pivoting factorization returns zero if result matrix is full rank. Return value greater than * zero is rank of result matrix. Negative values indicate error. * * * Compatible with lapack.DPOTRF * \ingroup lapack */ int armas_x_cholfactor_w(armas_x_dense_t *A, armas_pivot_t *P, int flags, armas_wbuf_t *wb, armas_conf_t *conf) { armas_x_dense_t W; int err = 0; if (!conf) conf = armas_conf_default(); if (!A) { conf->error = ARMAS_EINVAL; return -1; } if (P != ARMAS_NOPIVOT) { if (wb && wb->bytes == 0) { wb->bytes = A->cols * sizeof(DTYPE); return 0; } // working space is N elements for blocked factorization if (conf->lb > 0 && A->cols > conf->lb) { if (!wb || armas_wbytes(wb) < A->cols * sizeof(DTYPE)) armas_x_make(&W, 0, 0, 0, (DTYPE *)0); // too small, force unblocked else armas_x_make(&W, A->cols, 1, A->cols, (DTYPE *)armas_wptr(wb)); } else { // force unblocked with zero sized workspace armas_x_make(&W, 0, 0, 0, (DTYPE *)0); } return __cholfactor_pv(A, &W, P, flags, conf); } if (A->rows != A->cols) { conf->error = ARMAS_ESIZE; return -1; } if (conf->lb == 0 || A->cols <= conf->lb) { if (flags & ARMAS_LOWER) { err = __unblk_cholfactor_lower(A, conf); } else { err = __unblk_cholfactor_upper(A, conf); } } else { if (flags & ARMAS_LOWER) { err = __blk_cholfactor_lower(A, conf->lb, conf); } else { err = __blk_cholfactor_upper(A, conf->lb, conf); } } return err; }
// compute ||A - Q*T*Q.T|| int test_mult_trd(int M, int N, int lb, int verbose, int flags) { armas_x_dense_t A0, A1, tau0, T0, T1, e1, e2, d1, d2; armas_conf_t conf = *armas_conf_default(); int ok; DTYPE nrm; char *uplo = flags & ARMAS_UPPER ? "UPPER" : "LOWER"; armas_x_init(&A0, N, N); armas_x_init(&A1, N, N); armas_x_init(&T0, N, N); armas_x_init(&T1, N, N); armas_x_init(&tau0, N, 1); // set source data armas_x_set_values(&A0, unitrand, flags); armas_x_mcopy(&A1, &A0); conf.lb = lb; armas_x_trdreduce(&A0, &tau0, flags, &conf); // make tridiagonal matrix T0 armas_x_diag(&d1, &A0, 0); armas_x_diag(&d2, &T0, 0); armas_x_mcopy(&d2, &d1); if (flags & ARMAS_UPPER) { armas_x_diag(&e1, &A0, 1); } else { armas_x_diag(&e1, &A0, -1); } // copy off-diagonals armas_x_diag(&e2, &T0, 1); armas_x_mcopy(&e2, &e1); armas_x_diag(&e2, &T0, -1); armas_x_mcopy(&e2, &e1); // compute Q*T*Q.T armas_x_trdmult(&T0, &A0, &tau0, flags|ARMAS_LEFT, &conf); armas_x_trdmult(&T0, &A0, &tau0, flags|ARMAS_RIGHT|ARMAS_TRANS, &conf); // make result triangular (original matrix) armas_x_make_trm(&T0, flags); nrm = rel_error((DTYPE *)0, &T0, &A1, ARMAS_NORM_ONE, ARMAS_NONE, &conf); ok = isFINE(nrm, N*__ERROR); printf("%s: [%s] Q*T*Q.T == A\n", PASS(ok), uplo); if (verbose > 0) { printf(" || rel error ||: %e [%d]\n", nrm, ndigits(nrm)); } armas_x_release(&A0); armas_x_release(&A1); armas_x_release(&tau0); armas_x_release(&T0); armas_x_release(&T1); return ok; }
int __cblas_iamax(int N, DTYPE *X, int incx) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t x; if (incx == 1) { armas_x_make(&x, N, 1, N, X); } else { armas_x_make(&x, 1, N, incx, X); } return armas_x_iamax(&x, conf); }
int __iamaxf(int *n, DTYPE *X, int *incx) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t x; if (*incx == 1) { armas_x_make(&x, *n, 1, *n, X); } else { armas_x_make(&x, 1, *n, *incx, X); } return armas_x_iamax(&x, conf); }
/** * @brief General triangular/trapezoidial matrix rank update. * * Computes * - \f$ A = A + alpha \times X Y^T \f$ * * where A is upper (lower) triangular or trapezoidial matrix as defined with * flag bits *ARMAS_UPPER* (*ARMAS_LOWER*). * * If option *ARMAS_OEXTPREC* is set in *conf.optflags* then computations * are executed in extended precision. * * @param[in,out] A target matrix * @param[in] alpha scalar multiplier * @param[in] X source vector * @param[in] Y source vector * @param[in] flags flag bits * @param[in] conf configuration block * * @retval 0 Success * @retval <0 Failed * * @ingroup blas2 */ int armas_x_mvupdate_trm(armas_x_dense_t *A, DTYPE alpha, const armas_x_dense_t *X, const armas_x_dense_t *Y, int flags, armas_conf_t *conf) { mvec_t x, y; mdata_t A0; int nx = armas_x_size(X); int ny = armas_x_size(Y); if (armas_x_size(A) == 0 || armas_x_size(X) == 0 || armas_x_size(Y) == 0) return 0; if (!conf) conf = armas_conf_default(); if (X->rows != 1 && X->cols != 1) { conf->error = ARMAS_ENEED_VECTOR; return -1; } if (Y->rows != 1 && Y->cols != 1) { conf->error = ARMAS_ENEED_VECTOR; return -1; } if (A->cols != ny || A->rows != nx) { conf->error = ARMAS_ESIZE; return -1; } x = (mvec_t){X->elems, (X->rows == 1 ? X->step : 1)}; y = (mvec_t){Y->elems, (Y->rows == 1 ? Y->step : 1)}; A0 = (mdata_t){A->elems, A->step}; // if extended precision enabled and requested if (HAVE_EXT_PRECISION && (conf->optflags & ARMAS_OEXTPREC)) { __update_trmv_ext_unb(&A0, &x, &y, alpha, flags, ny, nx); return 0; } // normal precision here switch (conf->optflags & (ARMAS_ONAIVE|ARMAS_ORECURSIVE)) { case ARMAS_ORECURSIVE: __update_trmv_recursive(&A0, &x, &y, alpha, flags, ny, nx); break; case ARMAS_ONAIVE: default: __update_trmv_unb(&A0, &x, &y, alpha, flags, ny, nx); break; } return 0; }
/** * @brief Maximum absolute value of vector. */ ABSTYPE armas_x_amax(const armas_x_dense_t *x, armas_conf_t *conf) { if (!conf) conf = armas_conf_default(); int imax = armas_x_iamax(x, conf); if (imax != -1) { int r = x->rows == 0 ? 0 : imax; int c = x->rows == 0 ? imax : 0; return __ABS(armas_x_get(x, r, c)); } return __ZERO; }
/* * Build block reflector from RQ factorized matrix. * * Elementary reflector stored in matrix A rowwise as descriped below. Result * block reflector matrix is lower triangular with tau-vector on diagonal. * * ( v1 v1 v1 1 . . ) ( t1 ) ( t1 . . ) * ( v2 v2 v2 v2 1 . ) ( t2 ) ( t t2 . ) * ( v3 v3 v3 v3 v3 1 ) ( t3 ) ( t t t3 ) */ int armas_x_rqreflector(armas_x_dense_t *T, armas_x_dense_t *A, armas_x_dense_t *tau, armas_conf_t *conf) { if (!conf) conf = armas_conf_default(); if (T->cols < A->rows || T->rows < A->rows) { conf->error = ARMAS_ESIZE; return -1; } __unblk_rq_reflector(T, A, tau, conf); return 0; }
/** * \brief Cholesky factorization * * Compute the Cholesky factorization of a symmetric positive definite N-by-N matrix A. * */ int armas_x_cholesky(armas_x_dense_t *A, int flags, armas_conf_t *conf) { if (!conf) conf = armas_conf_default(); if (!A) { conf->error = ARMAS_EINVAL; return -1; } return armas_x_cholfactor_w(A, ARMAS_NOPIVOT, flags, ARMAS_NOWORK, conf); }
/** * @brief Index of \f$ \min_{k} |x| \f$ * * @param[in] x vector * @param[in,out] conf configuration block * * @retval >= 0 index of minimum element * @retval -1 error, conf->error holds error code * * @ingroup blas1 */ int armas_x_iamin(const armas_x_dense_t *x, armas_conf_t *conf) { if (!conf) conf = armas_conf_default(); // only for column or row vectors if (x->cols != 1 && x->rows != 1) { conf->error = ARMAS_ENEED_VECTOR; return -1; } // assume column vector mvec_t X = {x->elems, (x->rows == 1 ? x->step : 1)}; return __vec_iamin(&X, armas_x_size(x)); }
void __cblas_gemm(int order, int transa, int transb, int M, int N, int K, DTYPE alpha, DTYPE *A, int lda, DTYPE *B, int ldb, DTYPE beta, DTYPE *C, int ldc) { armas_conf_t conf = *armas_conf_default(); armas_x_dense_t Ca, Aa, Ba; int flags = 0; switch (order) { case CblasColMajor: if (transa == CblasTrans) { flags |= ARMAS_TRANSA; // error: K > lda armas_x_make(&Aa, K, M, lda, A); } else { // error: M > lda armas_x_make(&Aa, M, K, lda, A); } if (transb == CblasTrans) { flags |= ARMAS_TRANSB; armas_x_make(&Ba, N, K, ldb, B); } else { armas_x_make(&Ba, K, N, ldb, B); } armas_x_make(&Ca, M, N, ldc, C); break; case CblasRowMajor: if (transa == CblasNoTrans) { flags |= ARMAS_TRANSA; // error: M > lda armas_x_make(&Aa, M, K, lda, A); } else { // error: K > lda armas_x_make(&Aa, K, M, lda, A); } if (transb == CblasNoTrans) { flags |= ARMAS_TRANSB; armas_x_make(&Ba, K, N, ldb, B); } else { armas_x_make(&Ba, N, K, ldb, B); } armas_x_make(&Ca, N, M, ldc, C); break; default: return; } armas_x_mult(beta, &Ca, alpha, &Aa, &Ba, flags, &conf); }
void __cblas_trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE transa, const enum CBLAS_DIAG diag, int M, int N, DTYPE alpha, DTYPE *A, int lda, DTYPE *B, int ldb) { armas_conf_t conf = *armas_conf_default(); armas_x_dense_t Aa, Ba; int flags = 0; switch (order) { case CblasColMajor: flags |= side == CblasRight ? ARMAS_RIGHT : ARMAS_LEFT; flags |= uplo == CblasUpper ? ARMAS_UPPER : ARMAS_LOWER; if (diag == CblasUnit) flags |= ARMAS_UNIT; if (transa == CblasTrans) flags |= ARMAS_TRANSA; // M > ldb --> error armas_x_make(&Ba, M, N, ldb, B); if (side == CblasRight) { // N > lda --> error armas_x_make(&Aa, N, N, lda, A); } else { // M > lda --> error armas_x_make(&Aa, M, M, lda, A); } break; case CblasRowMajor: flags |= side == CblasRight ? ARMAS_LEFT : ARMAS_RIGHT; flags |= uplo == CblasUpper ? ARMAS_LOWER : ARMAS_UPPER; if (diag == CblasUnit) flags |= ARMAS_UNIT; if (transa == CblasNoTrans) flags |= ARMAS_TRANSA; // N > ldb --> error armas_x_make(&Ba, N, M, ldb, B); if (side == CblasRight) { // N > lda --> error armas_x_make(&Aa, M, M, lda, A); } else { // M > lda --> error armas_x_make(&Aa, N, N, lda, A); } break; default: return; } armas_x_mult_trm(&Ba, &Aa, alpha, flags, &conf); }
void __gemmf(char *transa, char *transb, int *m, int *n, int *k, DTYPE *alpha, DTYPE *A, int *lda, DTYPE *B, int *ldb, DTYPE *beta, DTYPE *C, int *ldc) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t c, a, b; int flags = 0; armas_x_make(&c, *m, *n, *ldc, C); armas_x_make(&a, *m, *k, *lda, A); armas_x_make(&b, *k, *n, *ldb, B); if (toupper(*transa) == 'T') flags |= ARMAS_TRANSA; if (toupper(*transb) == 'T') flags |= ARMAS_TRANSB; armas_x_mult(*beta, &c, *alpha, &a, &b, flags, conf); }
/** * \brief Compute singular values of bidiagonal matrix using the DQDS algorithm. * * \param[in,out] D * On entry, the diagonal elements. On exit singular values in descending order * \param[in,out] E * On entry off-diagonal elements * \param[in] conf * Configuration block */ int armas_x_dqds(armas_x_dense_t *D, armas_x_dense_t *E, armas_conf_t *conf) { int err = 0; armas_wbuf_t *wbs, wb = ARMAS_WBNULL; if (!conf) conf = armas_conf_default(); if (armas_x_dqds_w(D, E, &wb, conf) < 0) return -1; if (wb.bytes > 0 && wb.bytes <= ARMAS_MAX_ONSTACK_WSPACE) { char b[ARMAS_MAX_ONSTACK_WSPACE]; armas_wbuf_t wbt = (armas_wbuf_t){ .buf = b, .offset = 0, .bytes = ARMAS_MAX_ONSTACK_WSPACE }; err = armas_x_dqds_w(D, E, &wbt, conf); } else {
/* ----------------------------------------------------------------------------------- * Test 6: Q(qr(A)).T * Q(qr(A)) == I * OK: ||I - Q.T*Q||_1 ~~ n*eps */ int test_build_identity(int M, int N, int K, int lb, int verbose) { char *blk = lb > 0 ? " blk" : "unblk"; char ct = M == K ? 'M' : 'K'; armas_x_dense_t A0, C0, tau0, D; int ok; DTYPE n0; armas_conf_t conf = *armas_conf_default(); armas_x_init(&A0, M, N); armas_x_init(&C0, M, M); armas_x_init(&tau0, imin(M, N), 1); // set source data armas_x_set_values(&A0, unitrand, ARMAS_ANY); // factorize conf.lb = lb; armas_x_rqfactor(&A0, &tau0, &conf); // compute Q = buildQ(rq(A)), K first columns conf.lb = lb; if (armas_x_rqbuild(&A0, &tau0, K, &conf) < 0) printf("build error: %d\n", conf.error); // C0 = Q.T*Q - I armas_x_mult(0.0, &C0, 1.0, &A0, &A0, ARMAS_TRANSB, &conf); armas_x_diag(&D, &C0, 0); armas_x_add(&D, -1.0, &conf); n0 = armas_x_mnorm(&C0, ARMAS_NORM_ONE, &conf); ok = isOK(n0, N); printf("%s: %s Q(rq(A),%c).T * Q(rq(A),%c) == I\n", PASS(ok), blk, ct, ct); if (verbose > 0) { printf(" || rel error ||_1: %e [%d]\n", n0, ndigits(n0)); } //armas_x_release(&A0); armas_x_release(&C0); armas_x_release(&tau0); return ok; }
void __syrkf(char *uplo, char *trans, int *n, int *k, DTYPE *alpha, DTYPE *A, int *lda, DTYPE *beta, DTYPE *C, int *ldc) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t c, a; int flags = 0; flags |= toupper(*uplo) == 'L' ? ARMAS_LOWER : ARMAS_UPPER; if (toupper(*trans) == 'T') flags |= ARMAS_TRANS; armas_x_make(&c, *n, *n, *ldc, C); if (flags & ARMAS_TRANS) { armas_x_make(&a, *k, *n, *lda, A); } else { armas_x_make(&a, *n, *k, *lda, A); } armas_x_update_sym(&c, &a, *alpha, *beta, flags, conf); }
void __gerf(int *m, int *n, DTYPE *alpha, DTYPE *X, int *incx, DTYPE *Y, int *incy, DTYPE *A, int *lda) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t y, a, x; armas_x_make(&a, *m, *n, *lda, A); if (*incy == 1) { // column vector armas_x_make(&y, *n, 1, *n, Y); } else { // row vector armas_x_make(&y, 1, *n, *incy, Y); } if (*incx == 1) { armas_x_make(&x, *m, 1, *m, X); } else { armas_x_make(&x, 1, *m, *incx, X); } armas_x_mvupdate(&a, *alpha, &x, &y, conf); }
/* ----------------------------------------------------------------------------------- * Test 5: unblk.Q(rq(A)) == blk.Q(rq(A)) * OK: ||unblk.Q(rq(A)) - blk.Q(rq(A))||_1 ~~ n*eps */ int test_build(int M, int N, int K, int lb, int verbose) { char ct = N == K ? 'N' : 'K'; armas_x_dense_t A0, A1, tau0; int ok; DTYPE n0; armas_conf_t conf = *armas_conf_default(); armas_x_init(&A0, M, N); armas_x_init(&A1, M, N); armas_x_init(&tau0, imin(M, N), 1); // set source data armas_x_set_values(&A0, unitrand, ARMAS_ANY); // factorize conf.lb = lb; armas_x_rqfactor(&A0, &tau0, &conf); armas_x_mcopy(&A1, &A0); // compute Q = buildQ(rq(A)) conf.lb = 0; armas_x_rqbuild(&A0, &tau0, K, &conf); conf.lb = lb; armas_x_rqbuild(&A1, &tau0, K, &conf); // ||A1 - A0||/||A0|| n0 = rel_error((DTYPE *)0, &A1, &A0, ARMAS_NORM_ONE, ARMAS_NONE, &conf); ok = isOK(n0, N); printf("%s: unblk.Q(rq(A),%c) == blk.Q(rq(A),%c)\n", PASS(ok), ct, ct); if (verbose > 0) { printf(" || rel_error ||_1: %e [%d]\n", n0, ndigits(n0)); } armas_x_release(&A0); armas_x_release(&A1); armas_x_release(&tau0); return ok; }
int test_build(int M, int N, int lb, int K, int verbose, int flags) { armas_x_dense_t A0, tauq0, d0, QQt; armas_conf_t conf = *armas_conf_default(); int ok; DTYPE nrm; int tN = M < N ? M : N; char *uplo = flags & ARMAS_UPPER ? "UPPER" : "LOWER"; armas_x_init(&A0, N, N); armas_x_init(&tauq0, tN, 1); //------------------------------------------------------ // set source data armas_x_set_values(&A0, unitrand, flags); // reduce to tridiagonal matrix conf.lb = lb; armas_x_trdreduce(&A0, &tauq0, flags, &conf); // ---------------------------------------------------------------- // Q-matrix armas_x_trdbuild(&A0, &tauq0, K, flags, &conf); armas_x_init(&QQt, N, N); armas_x_mult(0.0, &QQt, 1.0, &A0, &A0, ARMAS_TRANSB, &conf); armas_x_diag(&d0, &QQt, 0); armas_x_madd(&d0, -1.0, ARMAS_NONE); nrm = armas_x_mnorm(&QQt, ARMAS_NORM_ONE, &conf); ok = isFINE(nrm, N*__ERROR); printf("%s: [%s] I == Q*Q.T\n", PASS(ok), uplo); if (verbose > 0) { printf(" || rel error ||: %e [%d]\n", nrm, ndigits(nrm)); } //------------------------------------------------------ armas_x_release(&A0); armas_x_release(&tauq0); armas_x_release(&QQt); return ok; }