void __symvf(char *uplo, int *n, DTYPE *alpha, DTYPE *A, int *lda, DTYPE *X, int *incx, DTYPE *beta, DTYPE *Y, int *incy) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t y, a, x; int flags = 0; switch (toupper(*uplo)) { case 'L': flags |= ARMAS_LOWER; break; case 'U': default: flags |= ARMAS_UPPER; break; } armas_x_make(&a, *n, *n, *lda, A); if (*incy == 1) { // column vector armas_x_make(&y, *n, 1, *n, Y); } else { // row vector armas_x_make(&y, 1, *n, *incy, Y); } if (*incx == 1) { armas_x_make(&x, *n, 1, *n, X); } else { armas_x_make(&x, 1, *n, *incx, X); } armas_x_mvmult_sym(*beta, &y, *alpha, &a, &x, flags, conf); }
void __cblas_symv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, int N, DTYPE alpha, DTYPE *A, int lda, DTYPE *X, int incx, DTYPE beta, DTYPE *Y, int incy) { armas_x_dense_t Aa, x, y; armas_conf_t conf = *armas_conf_default(); int flags; switch (order) { case CblasRowMajor: flags = uplo == CblasUpper ? ARMAS_LOWER : ARMAS_UPPER; break; case CblasColMajor: default: flags = uplo == CblasUpper ? ARMAS_UPPER : ARMAS_LOWER; break; } armas_x_make(&Aa, N, N, lda, A); if (incx == 1) { armas_x_make(&x, N, 1, N, X); } else { armas_x_make(&x, 1, N, incx, X); } if (incy == 1) { armas_x_make(&y, N, 1, N, Y); } else { armas_x_make(&y, 1, N, incy, Y); } armas_x_mvmult_sym(beta, &y, alpha, &Aa, &x, flags, &conf); }
void __trmvf(char *uplo, char *trans, char *diag, int *n, DTYPE *A, int *lda, DTYPE *X, int *incx) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t a, x; int flags = 0; switch (toupper(*uplo)) { case 'L': flags |= ARMAS_LOWER; break; case 'U': default: flags |= ARMAS_UPPER; break; } if (toupper(*trans) == 'T') flags |= ARMAS_TRANS; if (toupper(*diag) == 'U') flags |= ARMAS_UNIT; armas_x_make(&a, *n, *n, *lda, A); if (*incx == 1) { armas_x_make(&x, *n, 1, *n, X); } else { armas_x_make(&x, 1, *n, *incx, X); } armas_x_mvmult_trm(&x, __ONE, &a, flags, conf); }
void __cblas_trmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans, const enum CBLAS_DIAG diag, int N, DTYPE alpha, DTYPE *A, int lda, DTYPE *X, int incx) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t Aa, x; int flags = 0; switch (order) { case CblasRowMajor: flags |= uplo == CblasUpper ? ARMAS_LOWER : ARMAS_UPPER; if (trans == CblasNoTrans) flags |= ARMAS_TRANS; if (diag == CblasUnit) flags |= ARMAS_UNIT; break; case CblasColMajor: default: flags |= uplo == CblasUpper ? ARMAS_UPPER : ARMAS_LOWER; if (trans == CblasTrans) flags |= ARMAS_TRANS; if (diag == CblasUnit) flags |= ARMAS_UNIT; break; } if (incx == 1) { armas_x_make(&x, N, 1, N, X); } else { armas_x_make(&x, 1, N, incx, X); } armas_x_make(&Aa, N, N, lda, A); armas_x_mvmult_trm(&x, alpha, &Aa, flags, conf); }
void __cblas_syrk(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE trans, int N, int K, DTYPE alpha, DTYPE *A, int lda, DTYPE beta, DTYPE *C, int ldc) { armas_conf_t conf = *armas_conf_default(); armas_x_dense_t Ca, Aa; int flags = 0; switch (order) { case CblasRowMajor: flags |= uplo == CblasUpper ? ARMAS_LOWER : ARMAS_UPPER; if (trans == CblasNoTrans) { flags |= ARMAS_TRANS; armas_x_make(&Aa, K, N, lda, A); } else { armas_x_make(&Aa, N, K, lda, A); } break; case CblasColMajor: default: flags |= uplo == CblasUpper ? ARMAS_UPPER : ARMAS_LOWER; if (trans == CblasTrans) { flags |= ARMAS_TRANS; armas_x_make(&Aa, K, N, lda, A); } else { armas_x_make(&Aa, N, K, lda, A); } break; } armas_x_make(&Ca, N, N, ldc, C); armas_x_update_sym(&Ca, &Aa, alpha, beta, flags, conf); }
void __trmmf(char *side, char *uplo, char *transa, char *diag,int *m, int *n, DTYPE *alpha, DTYPE *A, int *lda, DTYPE *B, int *ldb) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t a, b; int flags = 0; armas_x_make(&b, *m, *n, *ldb, B); switch (toupper(*side)) { case 'R': flags |= ARMAS_RIGHT; armas_x_make(&a, *n, *n, *lda, A); break; case 'L': default: flags |= ARMAS_LEFT; armas_x_make(&a, *m, *m, *lda, A); break; } flags |= toupper(*uplo) == 'L' ? ARMAS_LOWER : ARMAS_UPPER; if (toupper(*transa) == 'T') flags |= ARMAS_TRANS; if (toupper(*diag) == 'U') flags |= ARMAS_UNIT; armas_x_mult_trm(&b, &a, *alpha, flags, conf); }
/** * \brief Cholesky factorization * * Compute the Cholesky factorization of a symmetric positive definite N-by-N matrix A. * * \param[in,out] A * On entry, the symmetric matrix A. If *ARMAS_UPPER* is set the upper triangular part * of A contains the upper triangular part of the matrix A, and strictly * lower part A is not referenced. If *ARMAS_LOWER* is set the lower triangular part * of a contains the lower triangular part of the matrix A. Likewise, the * strictly upper part of A is not referenced. On exit, factor U or L from the * Cholesky factorization \f$ A = U^T U \f$ or \f$ A = L L^T \f$ * \param[out] P * Optional pivot array. If non null then pivoting factorization is computed. * Set to ARMAS_NOPIVOT if normal cholesky factorization wanted. * \param[in] flags * The matrix structure indicator, *ARMAS_UPPER* for upper tridiagonal and * *ARMAS_LOWER* for lower tridiagonal matrix. * \param[in,out] wb * Workspace for pivoting factorization. If wb.bytes is zero then work buffer size is * returned in wb.bytes. * \param[in,out] conf * Optional blocking configuration. If not provided default blocking configuration * will be used. * * \retval 0 Success; If pivoting factorized then result matrix is full rank. * \retval >0 Success with pivoting factorization, matrix rank returned. * \retval -1 Error, `conf.error` holds error code * * Pivoting factorization is computed of P is not ARMAS_NOPIVOT. Pivoting factorization * needs workspace of size N elements for blocked version. If no workspace (ARMAS_NOWORK) is provided * or it is too small then unblocked algorithm is used. * * Factorization stops when diagonal element goes small enough. * Default value for stopping criteria is \$ max |diag(A)|*N*epsilon \$ * If value of absolute stopping criteria `conf.stop` is non-zero then it is used. Otherwise * if `conf.smult` (relative stopping criterion multiplier) is non-zero then stopping criteria * is set to \$ max |diag(A)|*smult \$. * * Pivoting factorization returns zero if result matrix is full rank. Return value greater than * zero is rank of result matrix. Negative values indicate error. * * * Compatible with lapack.DPOTRF * \ingroup lapack */ int armas_x_cholfactor_w(armas_x_dense_t *A, armas_pivot_t *P, int flags, armas_wbuf_t *wb, armas_conf_t *conf) { armas_x_dense_t W; int err = 0; if (!conf) conf = armas_conf_default(); if (!A) { conf->error = ARMAS_EINVAL; return -1; } if (P != ARMAS_NOPIVOT) { if (wb && wb->bytes == 0) { wb->bytes = A->cols * sizeof(DTYPE); return 0; } // working space is N elements for blocked factorization if (conf->lb > 0 && A->cols > conf->lb) { if (!wb || armas_wbytes(wb) < A->cols * sizeof(DTYPE)) armas_x_make(&W, 0, 0, 0, (DTYPE *)0); // too small, force unblocked else armas_x_make(&W, A->cols, 1, A->cols, (DTYPE *)armas_wptr(wb)); } else { // force unblocked with zero sized workspace armas_x_make(&W, 0, 0, 0, (DTYPE *)0); } return __cholfactor_pv(A, &W, P, flags, conf); } if (A->rows != A->cols) { conf->error = ARMAS_ESIZE; return -1; } if (conf->lb == 0 || A->cols <= conf->lb) { if (flags & ARMAS_LOWER) { err = __unblk_cholfactor_lower(A, conf); } else { err = __unblk_cholfactor_upper(A, conf); } } else { if (flags & ARMAS_LOWER) { err = __blk_cholfactor_lower(A, conf->lb, conf); } else { err = __blk_cholfactor_upper(A, conf->lb, conf); } } return err; }
int __cblas_iamax(int N, DTYPE *X, int incx) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t x; if (incx == 1) { armas_x_make(&x, N, 1, N, X); } else { armas_x_make(&x, 1, N, incx, X); } return armas_x_iamax(&x, conf); }
int __iamaxf(int *n, DTYPE *X, int *incx) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t x; if (*incx == 1) { armas_x_make(&x, *n, 1, *n, X); } else { armas_x_make(&x, 1, *n, *incx, X); } return armas_x_iamax(&x, conf); }
void __cblas_gemm(int order, int transa, int transb, int M, int N, int K, DTYPE alpha, DTYPE *A, int lda, DTYPE *B, int ldb, DTYPE beta, DTYPE *C, int ldc) { armas_conf_t conf = *armas_conf_default(); armas_x_dense_t Ca, Aa, Ba; int flags = 0; switch (order) { case CblasColMajor: if (transa == CblasTrans) { flags |= ARMAS_TRANSA; // error: K > lda armas_x_make(&Aa, K, M, lda, A); } else { // error: M > lda armas_x_make(&Aa, M, K, lda, A); } if (transb == CblasTrans) { flags |= ARMAS_TRANSB; armas_x_make(&Ba, N, K, ldb, B); } else { armas_x_make(&Ba, K, N, ldb, B); } armas_x_make(&Ca, M, N, ldc, C); break; case CblasRowMajor: if (transa == CblasNoTrans) { flags |= ARMAS_TRANSA; // error: M > lda armas_x_make(&Aa, M, K, lda, A); } else { // error: K > lda armas_x_make(&Aa, K, M, lda, A); } if (transb == CblasNoTrans) { flags |= ARMAS_TRANSB; armas_x_make(&Ba, K, N, ldb, B); } else { armas_x_make(&Ba, N, K, ldb, B); } armas_x_make(&Ca, N, M, ldc, C); break; default: return; } armas_x_mult(beta, &Ca, alpha, &Aa, &Ba, flags, &conf); }
/* * Tridiagonal reduction of LOWER triangular symmetric matrix, zero elements below 1st * subdiagonal: * * A = (1 - tau*u*u.t)*A*(1 - tau*u*u.T) * = (I - tau*( 0 0 )) (a11 a12) (I - tau*( 0 0 )) * ( ( 0 u*u.t)) (a21 A22) ( ( 0 u*u.t)) * * a11, a12, a21 not affected * * from LEFT: * A22 = A22 - tau*u*u.T*A22 * from RIGHT: * A22 = A22 - tau*A22*u.u.T * * LEFT and RIGHT: * A22 = A22 - tau*u*u.T*A22 - tau*(A22 - tau*u*u.T*A22)*u*u.T * = A22 - tau*u*u.T*A22 - tau*A22*u*u.T + tau*tau*u*u.T*A22*u*u.T * [x = tau*A22*u (vector)] (SYMV) * A22 = A22 - u*x.T - x*u.T + tau*u*u.T*x*u.T * [beta = tau*u.T*x (scalar)] (DOT) * = A22 - u*x.T - x*u.T + beta*u*u.T * = A22 - u*(x - 0.5*beta*u).T - (x - 0.5*beta*u)*u.T * [w = x - 0.5*beta*u] (AXPY) * = A22 - u*w.T - w*u.T (SYR2) * * Result of reduction for N = 5: * ( d . . . . ) * ( e d . . . ) * ( v1 e d . . ) * ( v1 v2 e d . ) * ( v1 v2 v3 e d ) */ static int __unblk_trdreduce_lower(armas_x_dense_t *A, armas_x_dense_t *tauq, armas_x_dense_t *W, armas_conf_t *conf) { armas_x_dense_t ATL, ABR, A00, a11, a21, A22; armas_x_dense_t tT, tB, tq0, tq1, tq2, y21; DTYPE v0, beta, tauval; EMPTY(A00); EMPTY(a11); if (armas_x_size(tauq) == 0) return 0; __partition_2x2(&ATL, __nil, __nil, &ABR, /**/ A, 0, 0, ARMAS_PTOPLEFT); __partition_2x1(&tT, &tB, /**/ tauq, 0, ARMAS_PTOP); while (ABR.rows > 0 && ABR.cols > 0) { __repartition_2x2to3x3(&ATL, &A00, __nil, __nil, __nil, &a11, __nil, __nil, &a21, &A22, /**/ A, 1, ARMAS_PBOTTOMRIGHT); __repartition_2x1to3x1(&tT, &tq0, &tq1, &tq2, /**/ tauq, 1, ARMAS_PBOTTOM); // ------------------------------------------------------------------------ // temp vector for this round armas_x_make(&y21, A22.rows, 1, A22.rows, armas_x_data(W)); // compute householder to zero subdiagonal entries __compute_householder_vec(&a21, &tq1, conf); tauval = armas_x_get(&tq1, 0, 0); // set subdiagonal to unit v0 = armas_x_get(&a21, 0, 0); armas_x_set(&a21, 0, 0, 1.0); // y21 := tauq*A22*a21 armas_x_mvmult_sym(__ZERO, &y21, tauval, &A22, &a21, ARMAS_LOWER, conf); // beta := tauq*a21.T*y21 beta = tauval * armas_x_dot(&a21, &y21, conf); // y21 := y21 - 0.5*beta*a21 armas_x_axpy(&y21, -__HALF*beta, &a21, conf); // A22 := A22 - a21*y21.T - y21*a21.T armas_x_mvupdate2_sym(&A22, -__ONE, &a21, &y21, ARMAS_LOWER, conf); // restore subdiagonal armas_x_set(&a21, 0, 0, v0); // ------------------------------------------------------------------------ __continue_3x3to2x2(&ATL, __nil, __nil, &ABR, /**/ &A00, &a11, &A22, /**/ A, ARMAS_PBOTTOMRIGHT); __continue_3x1to2x1(&tT, &tB, /**/ &tq0, &tq1, /**/ tauq, ARMAS_PBOTTOM); } return 0; }
void __gemmf(char *transa, char *transb, int *m, int *n, int *k, DTYPE *alpha, DTYPE *A, int *lda, DTYPE *B, int *ldb, DTYPE *beta, DTYPE *C, int *ldc) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t c, a, b; int flags = 0; armas_x_make(&c, *m, *n, *ldc, C); armas_x_make(&a, *m, *k, *lda, A); armas_x_make(&b, *k, *n, *ldb, B); if (toupper(*transa) == 'T') flags |= ARMAS_TRANSA; if (toupper(*transb) == 'T') flags |= ARMAS_TRANSB; armas_x_mult(*beta, &c, *alpha, &a, &b, flags, conf); }
void __syrkf(char *uplo, char *trans, int *n, int *k, DTYPE *alpha, DTYPE *A, int *lda, DTYPE *beta, DTYPE *C, int *ldc) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t c, a; int flags = 0; flags |= toupper(*uplo) == 'L' ? ARMAS_LOWER : ARMAS_UPPER; if (toupper(*trans) == 'T') flags |= ARMAS_TRANS; armas_x_make(&c, *n, *n, *ldc, C); if (flags & ARMAS_TRANS) { armas_x_make(&a, *k, *n, *lda, A); } else { armas_x_make(&a, *n, *k, *lda, A); } armas_x_update_sym(&c, &a, *alpha, *beta, flags, conf); }
void __gerf(int *m, int *n, DTYPE *alpha, DTYPE *X, int *incx, DTYPE *Y, int *incy, DTYPE *A, int *lda) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t y, a, x; armas_x_make(&a, *m, *n, *lda, A); if (*incy == 1) { // column vector armas_x_make(&y, *n, 1, *n, Y); } else { // row vector armas_x_make(&y, 1, *n, *incy, Y); } if (*incx == 1) { armas_x_make(&x, *m, 1, *m, X); } else { armas_x_make(&x, 1, *m, *incx, X); } armas_x_mvupdate(&a, *alpha, &x, &y, conf); }
void __cblas_ger(const enum CBLAS_ORDER order, const int M, const int N, const DTYPE alpha, DTYPE *X, const int incx, DTYPE *Y, const int incy, DTYPE *A, const int lda) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t y, a, x; if (incy == 1) { // column vector armas_x_make(&y, N, 1, N, Y); } else { // row vector armas_x_make(&y, 1, N, incy, Y); } if (incx == 1) { armas_x_make(&x, M, 1, M, X); } else { armas_x_make(&x, 1, M, incx, X); } if (order == CblasRowMajor) { armas_x_make(&a, N, M, lda, A); armas_x_mvupdate(&a, alpha, &y, &x, conf); } else { armas_x_make(&a, M, N, lda, A); armas_x_mvupdate(&a, alpha, &x, &y, conf); } }
static void __compat_axpy(const int n, const DTYPE alpha, DTYPE *X, const int incx, DTYPE *Y, const int incy) { armas_conf_t *conf = armas_conf_default(); armas_x_dense_t y, x; int ix, iy, nx, ny, k; DTYPE xv, yv; ix = incx < 0 ? - incx : incx; iy = incy < 0 ? - incy : incy; if (ix == 1) { armas_x_make(&x, n, 1, n, X); } else { armas_x_make(&x, 1, n, ix, X); } if (iy == 1) { armas_x_make(&y, n, 1, n, Y); } else { armas_x_make(&y, 1, n, iy, Y); } if (incx*incy > 0) { armas_x_axpy(&y, &x, alpha, conf); return; } // if not same sign then iteration direction is different (so clever) ix = incx < 0 ? n - 1 : 0; iy = incy < 0 ? n - 1 : 0; nx = ix == 0 ? 1 : -1; ny = iy == 0 ? 1 : -1; for (k = 0; k < n; ix += nx, iy += ny, k++) { xv = armas_x_get_at_unsafe(&x, ix); yv = armas_x_get_at_unsafe(&y, iy); armas_x_set_at_unsafe(&y, iy, yv + alpha*xv); } }
void __cblas_trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side, const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE transa, const enum CBLAS_DIAG diag, int M, int N, DTYPE alpha, DTYPE *A, int lda, DTYPE *B, int ldb) { armas_conf_t conf = *armas_conf_default(); armas_x_dense_t Aa, Ba; int flags = 0; switch (order) { case CblasColMajor: flags |= side == CblasRight ? ARMAS_RIGHT : ARMAS_LEFT; flags |= uplo == CblasUpper ? ARMAS_UPPER : ARMAS_LOWER; if (diag == CblasUnit) flags |= ARMAS_UNIT; if (transa == CblasTrans) flags |= ARMAS_TRANSA; // M > ldb --> error armas_x_make(&Ba, M, N, ldb, B); if (side == CblasRight) { // N > lda --> error armas_x_make(&Aa, N, N, lda, A); } else { // M > lda --> error armas_x_make(&Aa, M, M, lda, A); } break; case CblasRowMajor: flags |= side == CblasRight ? ARMAS_LEFT : ARMAS_RIGHT; flags |= uplo == CblasUpper ? ARMAS_LOWER : ARMAS_UPPER; if (diag == CblasUnit) flags |= ARMAS_UNIT; if (transa == CblasNoTrans) flags |= ARMAS_TRANSA; // N > ldb --> error armas_x_make(&Ba, N, M, ldb, B); if (side == CblasRight) { // N > lda --> error armas_x_make(&Aa, M, M, lda, A); } else { // M > lda --> error armas_x_make(&Aa, N, N, lda, A); } break; default: return; } armas_x_mult_trm(&Ba, &Aa, alpha, flags, &conf); }
/** * @brief Reduce symmetric matrix to tridiagonal form by similiarity transformation A = QTQ^T * * @param[in,out] A * On entry, symmetric matrix with elemets stored in upper (lower) triangular * part. On exit, diagonal and first super (sub) diagonals hold matrix T. * The upper (lower) triangular part above (below) first super(sub)diagonal * is used to store orthogonal matrix Q. * * @param[out] tauq * Scalar coefficients of elementary reflectors. * * @param[in] flags * ARMAS_LOWER or ARMAS_UPPER * * @param[in] wb * Workspace buffer needed for computation. To compute size of the required space call * the function with workspace bytes set to zero. Size of workspace is returned in * `wb.bytes` and no other computation or parameter size checking is done and function * returns with success. * * @param[in] confs * Optional blocking configuration * * @retval 0 success * @retval -1 error and `conf.error` set to last error * * Last error codes returned * - `ARMAS_ESIZE` if n(C) != m(A) for C*op(Q) or m(C) != m(A) for op(Q)*C * - `ARMAS_EINVAL` C or A or tau is null pointer * - `ARMAS_EWORK` if workspace is less than required for unblocked computation * * #### Additional information * * If LOWER, then the matrix Q is represented as product of elementary reflectors * * \f$ Q = H_1 H_2...H_{n-1}. \f$ * * If UPPER, then the matrix Q is represented as product * * \f$ Q = H_{n-1}...H_2 H_1, H_k = I - tau*v_k*v_k^T. \f$ * * The contents of A on exit is as follow for N = 5. * * LOWER UPPER * ( d . . . . ) ( d e v3 v2 v1 ) * ( e d . . . ) ( . d e v2 v1 ) * ( v1 e d . . ) ( . . d e v1 ) * ( v1 v2 e d . ) ( . . . d e ) * ( v1 v2 v3 e d ) ( . . . . d ) */ int armas_x_trdreduce_w(armas_x_dense_t *A, armas_x_dense_t *tauq, int flags, armas_wbuf_t *wb, armas_conf_t *conf) { armas_x_dense_t Y, Wrk; size_t wsmin, wsz; int lb, err = 0; DTYPE *buf; if (!conf) conf = armas_conf_default(); if (!A) { conf->error = ARMAS_EINVAL; return -1; } if (wb && wb->bytes == 0) { if (conf->lb > 0 && A->cols > conf->lb) wb->bytes = (A->cols * conf->lb) * sizeof(DTYPE); else wb->bytes = A->cols * sizeof(DTYPE); return 0; } if (A->rows != A->cols) { conf->error = ARMAS_ESIZE; return -1; } if (!armas_x_isvector(tauq) && armas_x_size(tauq) != A->cols) { conf->error = ARMAS_EINVAL; return -1; } wsmin = A->cols * sizeof(DTYPE); if (!wb || (wsz = armas_wbytes(wb)) < wsmin) { conf->error = ARMAS_EWORK; return -1; } // adjust blocking factor to workspace lb = conf->lb; if (lb > 0 && A->cols > lb) { wsz /= sizeof(DTYPE); if (wsz < A->cols * lb) { lb = (wsz / A->cols) & ~0x3; if (lb < ARMAS_BLOCKING_MIN) lb = 0; } } wsz = armas_wpos(wb); buf = (DTYPE *)armas_wptr(wb); if (lb == 0 || A->cols <= lb) { armas_x_make(&Wrk, A->rows, 1, A->rows, buf); if (flags & ARMAS_UPPER) { err = __unblk_trdreduce_upper(A, tauq, &Wrk, FALSE, conf); } else { err = __unblk_trdreduce_lower(A, tauq, &Wrk, conf); } } else { armas_x_make(&Y, A->rows, lb, A->rows, buf); // Make W = Y in folling. W is used in following subroutine only when reducing // last block to tridiagonal form and Y is not needed anymore // TODO: fix this later if (flags & ARMAS_UPPER) { err = __blk_trdreduce_upper(A, tauq, &Y, &Y, lb, conf); } else { err = __blk_trdreduce_lower(A, tauq, &Y, &Y, lb, conf); } } armas_wsetpos(wb, wsz); return err; }
/** * @brief Multiply with orthogonal matrix Q from LQ factorization * * Multiply and replace C with Q*C or Q.T*C where Q is a real orthogonal matrix * defined as the product of k elementary reflectors. * * Q = H(k)H(k-1)...H(1) * * as returned by armas_x_lqfactor(). * * @param[in,out] C * On entry, the M-by-N matrix C or if flag bit RIGHT is set then * N-by-M matrix. On exit C is overwritten by Q*C or Q.T*C. * If bit RIGHT is set then C is overwritten by C*Q or C*Q.T * * @param[in] A * LQ factorization as returned by lqfactor() where the upper * trapezoidal part holds the elementary reflectors. * * @param[in] tau * The scalar factors of the elementary reflectors. * * @param[in] flags * Indicators. Valid indicators *ARMAS_LEFT*, *ARMAS_RIGHT*, *ARMAS_TRANS* * * @param[out] W * Workspace buffer needed for computation. To compute size of the required space call * the function with workspace bytes set to zero. Size of workspace is returned in * `wb.bytes` and no other computation or parameter size checking is done and function * returns with success. * * @param[in,out] conf * Blocking configuration. Field LB defines block sized. If it is zero * unblocked invocation is assumed. * * @retval 0 Success * @retval -1 Error, `conf.error` holds error code * * Last error codes returned * - `ARMAS_ESIZE` if m(C) != n(A) for C*op(Q) or n(C) != n(A) for op(Q)*C * - `ARMAS_EINVAL` C or A or tau is null pointer * - `ARMAS_EWORK` if workspace is less than required for unblocked computation * * Compatible with lapack.DORMLQ * * #### Notes * m(A) is number of elementary reflectors == A.rows * n(A) is the order of the Q matrix == A.cols * * \cond * LEFT : m(C) == n(A) * RIGHT: n(C) == n(A) * \endcond * \ingroup lapack */ int armas_x_lqmult_w(armas_x_dense_t *C, const armas_x_dense_t *A, const armas_x_dense_t *tau, int flags, armas_wbuf_t *wb, armas_conf_t *conf) { armas_x_dense_t T, Wrk; size_t wsmin, wsz = 0; int lb, K, P; DTYPE *buf; if (!conf) conf = armas_conf_default(); if (!C) { conf->error = ARMAS_EINVAL; return -1; } K = (flags & ARMAS_RIGHT) != 0 ? C->rows : C->cols; if (wb && wb->bytes == 0) { if (conf->lb > 0 && K > conf->lb) wb->bytes = ((conf->lb + K) * conf->lb) * sizeof(DTYPE); else wb->bytes = K * sizeof(DTYPE); return 0; } if (!A || !tau) { conf->error = ARMAS_EINVAL; return -1; } // check sizes; A, tau return from armas_x_qrfactor() P = (flags & ARMAS_RIGHT) != 0 ? C->cols : C->rows; if (P != A->cols || armas_x_size(tau) != A->rows) { conf->error = ARMAS_ESIZE; return -1; } lb = conf->lb; wsmin = K * sizeof(DTYPE); if (! wb || (wsz = armas_wbytes(wb)) < wsmin) { conf->error = ARMAS_EWORK; return -1; } // adjust blocking factor for workspace if (lb > 0 && K > lb) { wsz /= sizeof(DTYPE); if (wsz < (K + lb)*lb) { // ws = (K + lb)*lb => lb^2 + K*lb - wsz = 0 => (sqrt(K^2 + 4*wsz) - K)/2 lb = ((int)(__SQRT((DTYPE)(K*K + 4*wsz))) - K) / 2; lb &= ~0x3; if (lb < ARMAS_BLOCKING_MIN) lb = 0; } } wsz = armas_wpos(wb); buf = (DTYPE *)armas_wptr(wb); if (lb == 0 || K <= lb) { // unblocked armas_x_make(&Wrk, K, 1, K, buf); if ((flags & ARMAS_RIGHT) != 0) { __unblk_lqmult_right(C, (armas_x_dense_t *)A, (armas_x_dense_t *)tau, &Wrk, flags, conf); } else { __unblk_lqmult_left(C, (armas_x_dense_t *)A, (armas_x_dense_t *)tau, &Wrk, flags, conf); } } else { // blocked code; block reflector T and temporary space armas_x_make(&T, lb, lb, lb, buf); armas_x_make(&Wrk, K, lb, K, &buf[armas_x_size(&T)]); if ((flags & ARMAS_RIGHT) != 0) { __blk_lqmult_right(C, (armas_x_dense_t *)A, (armas_x_dense_t *)tau, &T, &Wrk, flags, lb, conf); } else { __blk_lqmult_left(C, (armas_x_dense_t *)A, (armas_x_dense_t *)tau, &T, &Wrk, flags, lb, conf); } } armas_wsetpos(wb, wsz); return 0; }
int armas_x_rqfactor_w(armas_x_dense_t *A, armas_x_dense_t *tau, armas_wbuf_t *wb, armas_conf_t *conf) { armas_x_dense_t T, Wrk; size_t wsmin, wsneed, wsz = 0; int lb; DTYPE *buf; if (!conf) conf = armas_conf_default(); if (!A) { conf->error = ARMAS_EINVAL; return -1; } if (wb && wb->bytes == 0) { if (conf->lb > 0 && A->rows > conf->lb) wb->bytes = (A->rows * conf->lb) * sizeof(DTYPE); else wb->bytes = A->rows * sizeof(DTYPE); return 0; } // must have: M <= N if (A->rows > A->cols) { conf->error = ARMAS_ESIZE; return -1; } if (! armas_x_isvector(tau) || armas_x_size(tau) != A->rows) { conf->error = ARMAS_EINVAL; return -1; } lb = conf->lb; wsmin = A->rows * sizeof(DTYPE); if (! wb || (wsz = armas_wbytes(wb)) < wsmin) { conf->error = ARMAS_EWORK; return -1; } // adjust blocking factor for workspace wsneed = (lb > 0 ? A->rows * lb : A->rows) * sizeof(DTYPE); if (lb > 0 && wsz < wsneed) { lb = ( wsz / (A->rows * sizeof(DTYPE))) & ~0x3; if (lb < ARMAS_BLOCKING_MIN) lb = 0; } wsz = armas_wpos(wb); buf = (DTYPE *)armas_wptr(wb); if (lb == 0 || A->rows <= lb) { armas_x_make(&Wrk, A->rows, 1, A->rows, buf); __unblk_rqfactor(A, tau, &Wrk, conf); } else { // block reflector [lb,lb]; temp space [n(A)-lb, lb] matrix armas_x_make(&T, lb, lb, lb, buf); armas_x_make(&Wrk, A->rows-lb, lb, A->rows-lb, &buf[armas_x_size(&T)]); __blk_rqfactor(A, tau, &T, &Wrk, lb, conf); } armas_wsetpos(wb, wsz); return 0; }