예제 #1
0
파일: symv.c 프로젝트: hrautila/armas
void __symvf(char *uplo, int *n, DTYPE *alpha, DTYPE *A,
             int *lda, DTYPE *X, int *incx, DTYPE *beta, DTYPE *Y, int *incy)
{
    armas_conf_t *conf = armas_conf_default();
    armas_x_dense_t y, a, x;
    int flags = 0;

    switch (toupper(*uplo)) {
    case 'L':
        flags |= ARMAS_LOWER;
        break;
    case 'U':
    default:
        flags |= ARMAS_UPPER;
        break;
    }
    
    armas_x_make(&a, *n, *n, *lda, A);
    if (*incy == 1) {
        // column vector
        armas_x_make(&y, *n, 1, *n, Y);
    } else {
        // row vector
        armas_x_make(&y, 1, *n, *incy, Y);
    }
    if (*incx == 1) {
        armas_x_make(&x, *n, 1, *n, X);
    } else {
        armas_x_make(&x, 1, *n, *incx, X);
    }
    armas_x_mvmult_sym(*beta, &y, *alpha, &a, &x, flags, conf);
}
예제 #2
0
파일: symv.c 프로젝트: hrautila/armas
void __cblas_symv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo,  
                  int N, DTYPE alpha, DTYPE *A, int lda, 
                  DTYPE *X,  int incx, DTYPE beta, DTYPE *Y, int incy)
{
    armas_x_dense_t Aa, x, y;
    armas_conf_t conf = *armas_conf_default();
    int flags;

    switch (order) {
    case CblasRowMajor:
        flags = uplo == CblasUpper ? ARMAS_LOWER : ARMAS_UPPER;
        break;
    case CblasColMajor:
    default:
        flags = uplo == CblasUpper ? ARMAS_UPPER : ARMAS_LOWER;
        break;
    }
    armas_x_make(&Aa, N, N, lda, A);
    if (incx == 1) {
        armas_x_make(&x, N, 1, N, X);
    } else {
        armas_x_make(&x, 1, N, incx, X);
    }
    if (incy == 1) {
        armas_x_make(&y, N, 1, N, Y);
    } else {
        armas_x_make(&y, 1, N, incy, Y);
    }
    armas_x_mvmult_sym(beta, &y, alpha, &Aa, &x, flags, &conf);
}
예제 #3
0
파일: trmv.c 프로젝트: hrautila/armas
void __trmvf(char *uplo, char *trans, char *diag, int *n, DTYPE *A,
             int *lda, DTYPE *X, int *incx)
{
    armas_conf_t *conf = armas_conf_default();
    armas_x_dense_t a, x;
    int flags = 0;

    switch (toupper(*uplo)) {
    case 'L':
        flags |= ARMAS_LOWER;
        break;
    case 'U':
    default:
        flags |= ARMAS_UPPER;
        break;
    }
    if (toupper(*trans) == 'T') 
        flags |= ARMAS_TRANS;
    if (toupper(*diag) == 'U') 
        flags |= ARMAS_UNIT;
    
    armas_x_make(&a, *n, *n, *lda, A);
    if (*incx == 1) {
        armas_x_make(&x, *n, 1, *n, X);
    } else {
        armas_x_make(&x, 1, *n, *incx, X);
    }
    armas_x_mvmult_trm(&x, __ONE, &a, flags, conf);
}
예제 #4
0
파일: trmv.c 프로젝트: hrautila/armas
void __cblas_trmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo, 
                  const enum CBLAS_TRANSPOSE trans,  const enum CBLAS_DIAG diag, int N,
                  DTYPE alpha, DTYPE *A, int lda, DTYPE *X,  int incx)
{
    armas_conf_t *conf = armas_conf_default();
    armas_x_dense_t Aa, x;
    int flags = 0;

    switch (order) {
    case CblasRowMajor:
        flags |= uplo == CblasUpper ? ARMAS_LOWER : ARMAS_UPPER;
        if (trans == CblasNoTrans)
            flags |= ARMAS_TRANS;
        if (diag == CblasUnit)
            flags |= ARMAS_UNIT;
        break;
    case CblasColMajor:
    default:
        flags |= uplo == CblasUpper ? ARMAS_UPPER : ARMAS_LOWER;
        if (trans == CblasTrans)
            flags |= ARMAS_TRANS;
        if (diag == CblasUnit)
            flags |= ARMAS_UNIT;
        break;
    }
    if (incx == 1) {
        armas_x_make(&x, N, 1, N, X);
    } else {
        armas_x_make(&x, 1, N, incx, X);
    }
    armas_x_make(&Aa, N, N, lda, A);
    armas_x_mvmult_trm(&x, alpha, &Aa, flags, conf);
}
예제 #5
0
void __cblas_syrk(const enum CBLAS_ORDER order, const enum CBLAS_UPLO uplo,
                  const enum CBLAS_TRANSPOSE trans, int N, int K, 
                  DTYPE alpha, DTYPE *A, int lda, DTYPE beta, DTYPE *C, int ldc)
{
    armas_conf_t conf = *armas_conf_default();
    armas_x_dense_t Ca, Aa;
    int flags = 0;

    switch (order) {
    case CblasRowMajor:
        flags |= uplo == CblasUpper ? ARMAS_LOWER : ARMAS_UPPER;
        if (trans == CblasNoTrans) {
            flags |= ARMAS_TRANS;
            armas_x_make(&Aa, K, N, lda, A);
        } else {
            armas_x_make(&Aa, N, K, lda, A);
        }
        break;
    case CblasColMajor:
    default:
        flags |= uplo == CblasUpper ? ARMAS_UPPER : ARMAS_LOWER;
        if (trans == CblasTrans) {
            flags |= ARMAS_TRANS;
            armas_x_make(&Aa, K, N, lda, A);
        } else {
            armas_x_make(&Aa, N, K, lda, A);
        }
        break;
    }
    armas_x_make(&Ca, N, N, ldc, C);
    armas_x_update_sym(&Ca, &Aa, alpha, beta, flags, conf);
}
예제 #6
0
void __trmmf(char *side, char *uplo, char *transa, char *diag,int *m, int *n,
             DTYPE *alpha, DTYPE *A, int *lda, DTYPE *B, int *ldb)
{
    armas_conf_t *conf = armas_conf_default();
    armas_x_dense_t a, b;
    int flags = 0;

    armas_x_make(&b, *m, *n, *ldb, B);

    switch (toupper(*side)) {
    case 'R':
        flags |= ARMAS_RIGHT;
        armas_x_make(&a, *n, *n, *lda, A);
        break;
    case 'L':
    default:
        flags |= ARMAS_LEFT;
        armas_x_make(&a, *m, *m, *lda, A);
        break;
    }
    flags |= toupper(*uplo) == 'L' ? ARMAS_LOWER : ARMAS_UPPER;
    if (toupper(*transa) == 'T')
        flags |=  ARMAS_TRANS;
    if (toupper(*diag) == 'U')
        flags |=  ARMAS_UNIT;

    armas_x_mult_trm(&b, &a, *alpha, flags, conf);
}
예제 #7
0
파일: chol.c 프로젝트: hrautila/armas
/**
 * \brief Cholesky factorization
 *
 * Compute the Cholesky factorization of a symmetric positive definite N-by-N matrix A.
 *
 * \param[in,out] A     
 *     On entry, the symmetric matrix A. If *ARMAS_UPPER* is set the upper triangular part
 *     of A contains the upper triangular part of the matrix A, and strictly
 *     lower part A is not referenced. If *ARMAS_LOWER* is set the lower triangular part
 *     of a contains the lower triangular part of the matrix A. Likewise, the
 *     strictly upper part of A is not referenced. On exit, factor U or L from the
 *     Cholesky factorization \f$ A = U^T U \f$ or \f$ A = L L^T \f$
 * \param[out] P
 *     Optional pivot array. If non null then pivoting factorization is computed. 
 *     Set to ARMAS_NOPIVOT if normal cholesky factorization wanted.
 * \param[in] flags 
 *      The matrix structure indicator, *ARMAS_UPPER* for upper tridiagonal and 
 *      *ARMAS_LOWER* for lower tridiagonal matrix.
 * \param[in,out] wb
 *     Workspace for pivoting factorization. If wb.bytes is zero then work buffer size is
 *     returned in wb.bytes. 
 * \param[in,out] conf 
 *     Optional blocking configuration. If not provided default blocking configuration
 *     will be used.
 *
 * \retval  0 Success; If pivoting factorized then result matrix is full rank.
 * \retval >0 Success with pivoting factorization, matrix rank returned.
 * \retval -1 Error, `conf.error` holds error code
 *
 * Pivoting factorization is computed of P is not ARMAS_NOPIVOT. Pivoting factorization
 * needs workspace of size N elements for blocked version. If no workspace (ARMAS_NOWORK) is provided 
 * or it is too small then unblocked algorithm is used. 
 *
 * Factorization stops when diagonal element goes small enough.
 * Default value for stopping criteria is \$ max |diag(A)|*N*epsilon \$
 * If value of absolute stopping criteria `conf.stop` is non-zero then it is used. Otherwise
 * if `conf.smult` (relative stopping criterion multiplier) is non-zero then stopping criteria 
 * is set to \$ max |diag(A)|*smult \$.
 *
 * Pivoting factorization returns zero if result matrix is full rank. Return value greater than
 * zero is rank of result matrix. Negative values indicate error.
 *
 * 
 * Compatible with lapack.DPOTRF
 * \ingroup lapack
 */
int armas_x_cholfactor_w(armas_x_dense_t *A, 
                         armas_pivot_t *P,
                         int flags,
                         armas_wbuf_t *wb,
                         armas_conf_t *conf)
{
  armas_x_dense_t W;
  int err = 0;
  if (!conf)
    conf = armas_conf_default();

  if (!A) {
    conf->error = ARMAS_EINVAL;
    return -1;
  }

  if (P != ARMAS_NOPIVOT) {
    if (wb && wb->bytes == 0) {
      wb->bytes = A->cols * sizeof(DTYPE);
      return 0;
    }

    // working space is N elements for blocked factorization
    if (conf->lb > 0 && A->cols > conf->lb) {
      if (!wb || armas_wbytes(wb) < A->cols * sizeof(DTYPE))
        armas_x_make(&W, 0, 0, 0, (DTYPE *)0); // too small, force unblocked
      else
        armas_x_make(&W, A->cols, 1, A->cols, (DTYPE *)armas_wptr(wb));
    } else {
      // force unblocked with zero sized workspace
      armas_x_make(&W, 0, 0, 0, (DTYPE *)0);
    }
    return __cholfactor_pv(A, &W, P, flags, conf);
  }
  
  if (A->rows != A->cols) {
    conf->error = ARMAS_ESIZE;
    return -1;
  }    

  if (conf->lb == 0 || A->cols <= conf->lb) {
    if (flags & ARMAS_LOWER) {
      err = __unblk_cholfactor_lower(A, conf);
    } else {
      err = __unblk_cholfactor_upper(A, conf);
    }
  } else {
    if (flags & ARMAS_LOWER) {
      err = __blk_cholfactor_lower(A, conf->lb, conf);
    } else {
      err = __blk_cholfactor_upper(A, conf->lb, conf);
    }
  }
  return err;
}
예제 #8
0
파일: iamax.c 프로젝트: hrautila/armas
int __cblas_iamax(int N, DTYPE *X, int incx)
{
    armas_conf_t *conf = armas_conf_default();
    armas_x_dense_t x;

    if (incx == 1) {
        armas_x_make(&x, N, 1, N, X);
    } else {
        armas_x_make(&x, 1, N, incx, X);
    }
    return armas_x_iamax(&x, conf);
}
예제 #9
0
파일: iamax.c 프로젝트: hrautila/armas
int __iamaxf(int *n, DTYPE *X, int *incx)
{
    armas_conf_t *conf = armas_conf_default();
    armas_x_dense_t x;

    if (*incx == 1) {
        armas_x_make(&x, *n, 1, *n, X);
    } else {
        armas_x_make(&x, 1, *n, *incx, X);
    }
    return armas_x_iamax(&x, conf);
}
예제 #10
0
파일: gemm.c 프로젝트: hrautila/armas
void __cblas_gemm(int order, int transa,  int transb, int M, int N,
                  int K, DTYPE alpha, DTYPE *A, int lda, DTYPE *B,  int ldb,
                  DTYPE beta, DTYPE *C, int ldc)
{
    armas_conf_t conf = *armas_conf_default();
    armas_x_dense_t Ca, Aa, Ba;
    int flags = 0;
    
    switch (order) {
    case CblasColMajor:
        if (transa == CblasTrans) {
            flags |= ARMAS_TRANSA;
            // error: K > lda
            armas_x_make(&Aa, K, M, lda, A);
        } else {
            // error: M > lda
            armas_x_make(&Aa, M, K, lda, A);
        }
        if (transb == CblasTrans) {
            flags |= ARMAS_TRANSB;
            armas_x_make(&Ba, N, K, ldb, B);
        } else {
            armas_x_make(&Ba, K, N, ldb, B);
        }
        armas_x_make(&Ca, M, N, ldc, C);
        break;
    case CblasRowMajor:
        if (transa == CblasNoTrans) {
            flags |= ARMAS_TRANSA;
            // error: M > lda
            armas_x_make(&Aa, M, K, lda, A);
        } else {
            // error: K > lda
            armas_x_make(&Aa, K, M, lda, A);
        }
        if (transb == CblasNoTrans) {
            flags |= ARMAS_TRANSB;
            armas_x_make(&Ba, K, N, ldb, B);
        } else {
            armas_x_make(&Ba, N, K, ldb, B);
        }
        armas_x_make(&Ca, N, M, ldc, C);
        break;
    default:
        return;
    }
    armas_x_mult(beta, &Ca, alpha, &Aa, &Ba, flags, &conf);
}
예제 #11
0
파일: trired.c 프로젝트: hrautila/armas
/*
 * Tridiagonal reduction of LOWER triangular symmetric matrix, zero elements below 1st
 * subdiagonal:
 *
 *   A =  (1 - tau*u*u.t)*A*(1 - tau*u*u.T)
 *     =  (I - tau*( 0   0   )) (a11 a12) (I - tau*( 0  0   ))
 *        (        ( 0  u*u.t)) (a21 A22) (        ( 0 u*u.t))
 *
 *  a11, a12, a21 not affected
 *
 *  from LEFT:
 *    A22 = A22 - tau*u*u.T*A22
 *  from RIGHT:
 *    A22 = A22 - tau*A22*u.u.T
 *
 *  LEFT and RIGHT:
 *    A22   = A22 - tau*u*u.T*A22 - tau*(A22 - tau*u*u.T*A22)*u*u.T
 *          = A22 - tau*u*u.T*A22 - tau*A22*u*u.T + tau*tau*u*u.T*A22*u*u.T
 *    [x    = tau*A22*u (vector)]  (SYMV)
 *    A22   = A22 - u*x.T - x*u.T + tau*u*u.T*x*u.T
 *    [beta = tau*u.T*x (scalar)]  (DOT)
 *          = A22 - u*x.T - x*u.T + beta*u*u.T
 *          = A22 - u*(x - 0.5*beta*u).T - (x - 0.5*beta*u)*u.T
 *    [w    = x - 0.5*beta*u]      (AXPY)
 *          = A22 - u*w.T - w*u.T  (SYR2)
 *
 * Result of reduction for N = 5:
 *    ( d  .  .  . . )
 *    ( e  d  .  . . )
 *    ( v1 e  d  . . )
 *    ( v1 v2 e  d . )
 *    ( v1 v2 v3 e d )
 */
static
int __unblk_trdreduce_lower(armas_x_dense_t *A, armas_x_dense_t *tauq,
                            armas_x_dense_t *W, armas_conf_t *conf)
{
  armas_x_dense_t ATL, ABR, A00, a11, a21, A22;
  armas_x_dense_t tT, tB, tq0, tq1, tq2, y21;
  DTYPE v0, beta, tauval;

  EMPTY(A00); EMPTY(a11);

  if (armas_x_size(tauq) == 0)
    return 0;

  __partition_2x2(&ATL,  __nil,
                  __nil, &ABR,   /**/  A, 0, 0, ARMAS_PTOPLEFT);
  __partition_2x1(&tT,
                  &tB,   /**/  tauq, 0, ARMAS_PTOP);

  while (ABR.rows > 0 && ABR.cols > 0) {
    __repartition_2x2to3x3(&ATL,
                           &A00,  __nil, __nil,
                           __nil, &a11,  __nil,
                           __nil, &a21,  &A22,  /**/  A, 1, ARMAS_PBOTTOMRIGHT);
    __repartition_2x1to3x1(&tT,
                           &tq0,
                           &tq1,
                           &tq2,  /**/  tauq, 1, ARMAS_PBOTTOM);
    // ------------------------------------------------------------------------
    // temp vector for this round
    armas_x_make(&y21, A22.rows, 1, A22.rows, armas_x_data(W));

    // compute householder to zero subdiagonal entries
    __compute_householder_vec(&a21, &tq1, conf);
    tauval = armas_x_get(&tq1, 0, 0);
    
    // set subdiagonal to unit
    v0 = armas_x_get(&a21, 0, 0);
    armas_x_set(&a21, 0, 0, 1.0);

    // y21 := tauq*A22*a21
    armas_x_mvmult_sym(__ZERO, &y21, tauval, &A22, &a21, ARMAS_LOWER, conf);
    // beta := tauq*a21.T*y21
    beta = tauval * armas_x_dot(&a21, &y21, conf);
    // y21 := y21 - 0.5*beta*a21
    armas_x_axpy(&y21, -__HALF*beta, &a21, conf);
    // A22 := A22 - a21*y21.T - y21*a21.T
    armas_x_mvupdate2_sym(&A22, -__ONE, &a21, &y21, ARMAS_LOWER, conf);
    // restore subdiagonal
    armas_x_set(&a21, 0, 0, v0);

    // ------------------------------------------------------------------------
    __continue_3x3to2x2(&ATL,  __nil,
                        __nil, &ABR, /**/  &A00, &a11, &A22, /**/  A, ARMAS_PBOTTOMRIGHT);
    __continue_3x1to2x1(&tT,
                        &tB, /**/  &tq0, &tq1,  /**/ tauq, ARMAS_PBOTTOM);
  }
  return 0;
}
예제 #12
0
파일: gemm.c 프로젝트: hrautila/armas
void __gemmf(char *transa, char *transb, int *m, int *n, int *k, DTYPE *alpha, DTYPE *A,
            int *lda, DTYPE *B, int *ldb, DTYPE *beta, DTYPE *C, int *ldc)
{
    armas_conf_t *conf = armas_conf_default();
    armas_x_dense_t c, a, b;
    int flags = 0;

    armas_x_make(&c, *m, *n, *ldc, C);
    armas_x_make(&a, *m, *k, *lda, A);
    armas_x_make(&b, *k, *n, *ldb, B);

    if (toupper(*transa) == 'T') 
        flags |= ARMAS_TRANSA;
    if (toupper(*transb) == 'T')
        flags |= ARMAS_TRANSB;

    armas_x_mult(*beta, &c, *alpha, &a, &b, flags, conf);
}
예제 #13
0
void __syrkf(char *uplo, char *trans, int *n, int *k, DTYPE *alpha, DTYPE *A,
             int *lda, DTYPE *beta, DTYPE *C, int *ldc)
{
    armas_conf_t *conf = armas_conf_default();
    armas_x_dense_t c, a;
    int flags = 0;

    flags |= toupper(*uplo) == 'L' ? ARMAS_LOWER : ARMAS_UPPER;
    if (toupper(*trans) == 'T') 
        flags |= ARMAS_TRANS;

    armas_x_make(&c, *n, *n, *ldc, C);
    if (flags & ARMAS_TRANS) {
        armas_x_make(&a, *k, *n, *lda, A);
    } else {
        armas_x_make(&a, *n, *k, *lda, A);
    }

    armas_x_update_sym(&c, &a, *alpha, *beta, flags, conf);
}
예제 #14
0
파일: ger.c 프로젝트: hrautila/armas
void __gerf(int *m, int *n, DTYPE *alpha, DTYPE *X,
            int *incx, DTYPE *Y, int *incy, DTYPE *A, int *lda)
{
    armas_conf_t *conf = armas_conf_default();
    armas_x_dense_t y, a, x;

    armas_x_make(&a, *m, *n, *lda, A);
    if (*incy == 1) {
        // column vector
        armas_x_make(&y, *n, 1, *n, Y);
    } else {
        // row vector
        armas_x_make(&y, 1, *n, *incy, Y);
    }
    if (*incx == 1) {
        armas_x_make(&x, *m, 1, *m, X);
    } else {
        armas_x_make(&x, 1, *m, *incx, X);
    }
    armas_x_mvupdate(&a, *alpha, &x, &y, conf);
}
예제 #15
0
파일: ger.c 프로젝트: hrautila/armas
void __cblas_ger(const enum CBLAS_ORDER order, const int M,  
                 const int N, const DTYPE alpha, DTYPE *X, const int incx, 
                 DTYPE *Y, const int incy, DTYPE *A, const int lda)
{
    armas_conf_t *conf = armas_conf_default();
    armas_x_dense_t y, a, x;

    if (incy == 1) {
        // column vector
        armas_x_make(&y, N, 1, N, Y);
    } else {
        // row vector
        armas_x_make(&y, 1, N, incy, Y);
    }
    if (incx == 1) {
        armas_x_make(&x, M, 1, M, X);
    } else {
        armas_x_make(&x, 1, M, incx, X);
    }

    if (order == CblasRowMajor) {
        armas_x_make(&a, N, M, lda, A);
        armas_x_mvupdate(&a, alpha, &y, &x, conf);
    } else {
        armas_x_make(&a, M, N, lda, A);
        armas_x_mvupdate(&a, alpha, &x, &y, conf);
    }
}
예제 #16
0
파일: axpy.c 프로젝트: hrautila/armas
static
void __compat_axpy(const int n, const DTYPE alpha, DTYPE *X, const int incx, DTYPE *Y, const int incy)
{
    armas_conf_t *conf = armas_conf_default();
    armas_x_dense_t y, x;
    int ix, iy, nx, ny, k;
    DTYPE xv, yv;

    ix = incx < 0 ? - incx : incx;
    iy = incy < 0 ? - incy : incy;

    if (ix == 1) {
        armas_x_make(&x, n, 1, n, X);
    } else {
        armas_x_make(&x, 1, n, ix, X);
    }
    if (iy == 1) {
        armas_x_make(&y, n, 1, n, Y);
    } else {
        armas_x_make(&y, 1, n, iy, Y);
    }
    if (incx*incy > 0) {
        armas_x_axpy(&y, &x, alpha, conf);
        return;
    }
    // if not same sign then iteration direction is different (so clever)
    ix = incx < 0 ? n - 1 : 0;
    iy = incy < 0 ? n - 1 : 0;
    nx = ix == 0 ? 1 : -1;
    ny = iy == 0 ? 1 : -1;
    for (k = 0; k < n; ix += nx, iy += ny, k++) {
        xv = armas_x_get_at_unsafe(&x, ix);
        yv = armas_x_get_at_unsafe(&y, iy);
        armas_x_set_at_unsafe(&y, iy, yv + alpha*xv);
    }
}
예제 #17
0
void __cblas_trmm(const enum CBLAS_ORDER order, const enum CBLAS_SIDE side,
                  const enum CBLAS_UPLO uplo, const enum CBLAS_TRANSPOSE transa,
                  const enum CBLAS_DIAG diag, int M, int N,
                  DTYPE alpha, DTYPE *A, int lda, DTYPE *B,  int ldb)
{
    armas_conf_t conf = *armas_conf_default();
    armas_x_dense_t Aa, Ba;
    int flags = 0;
    switch (order) {
    case CblasColMajor:
        flags |= side == CblasRight ? ARMAS_RIGHT : ARMAS_LEFT;
        flags |= uplo == CblasUpper ? ARMAS_UPPER : ARMAS_LOWER;
        if (diag == CblasUnit)
            flags |= ARMAS_UNIT;
        if (transa == CblasTrans)
            flags |= ARMAS_TRANSA;
        // M > ldb --> error
        armas_x_make(&Ba, M, N, ldb, B);
        if (side == CblasRight) {
            // N > lda --> error
            armas_x_make(&Aa, N, N, lda, A);
        } else {
            // M > lda --> error
            armas_x_make(&Aa, M, M, lda, A);
        }
        break;
    case CblasRowMajor:
        flags |= side == CblasRight ? ARMAS_LEFT : ARMAS_RIGHT;
        flags |= uplo == CblasUpper ? ARMAS_LOWER : ARMAS_UPPER;
        if (diag == CblasUnit)
            flags |= ARMAS_UNIT;
        if (transa == CblasNoTrans)
            flags |= ARMAS_TRANSA;
        // N > ldb --> error
        armas_x_make(&Ba, N, M, ldb, B);
        if (side == CblasRight) {
            // N > lda --> error
            armas_x_make(&Aa, M, M, lda, A);
        } else {
            // M > lda --> error
            armas_x_make(&Aa, N, N, lda, A);
        }
        break;
    default:
        return;
    }
    armas_x_mult_trm(&Ba, &Aa, alpha, flags, &conf);
}
예제 #18
0
파일: trired.c 프로젝트: hrautila/armas
/**
 * @brief Reduce symmetric matrix to tridiagonal form by similiarity transformation A = QTQ^T
 *
 * @param[in,out]  A
 *      On entry, symmetric matrix with elemets stored in upper (lower) triangular
 *      part. On exit, diagonal and first super (sub) diagonals hold matrix T.
 *      The upper (lower) triangular part above (below) first super(sub)diagonal
 *      is used to store orthogonal matrix Q.
 *
 * @param[out] tauq
 *      Scalar coefficients of elementary reflectors.
 *
 * @param[in] flags
 *      ARMAS_LOWER or ARMAS_UPPER
 *
 * @param[in] wb
 *     Workspace buffer needed for computation. To compute size of the required space call 
 *     the function with workspace bytes set to zero. Size of workspace is returned in 
 *     `wb.bytes` and no other computation or parameter size checking is done and function
 *     returns with success.
 *
 * @param[in] confs
 *      Optional blocking configuration
 *
 *  @retval 0  success
 *  @retval -1 error and `conf.error` set to last error
 *
 *  Last error codes returned
 *   - `ARMAS_ESIZE`  if n(C) != m(A) for C*op(Q) or m(C) != m(A) for op(Q)*C
 *   - `ARMAS_EINVAL` C or A or tau is null pointer
 *   - `ARMAS_EWORK`  if workspace is less than required for unblocked computation
 *
 * #### Additional information
 *
 * If LOWER, then the matrix Q is represented as product of elementary reflectors
 *
 *   \f$ Q = H_1 H_2...H_{n-1}. \f$
 *
 * If UPPER, then the matrix Q is represented as product 
 * 
 *   \f$ Q = H_{n-1}...H_2 H_1,  H_k = I - tau*v_k*v_k^T. \f$
 *
 * The contents of A on exit is as follow for N = 5.
 *
 *    LOWER                    UPPER
 *     ( d  .  .  .  . )         ( d  e  v3 v2 v1 )
 *     ( e  d  .  .  . )         ( .  d  e  v2 v1 )
 *     ( v1 e  d  .  . )         ( .  .  d  e  v1 )
 *     ( v1 v2 e  d  . )         ( .  .  .  d  e  )
 *     ( v1 v2 v3 e  d )         ( .  .  .  .  d  )
 */
int armas_x_trdreduce_w(armas_x_dense_t *A,
                        armas_x_dense_t *tauq,
                        int flags,
                        armas_wbuf_t *wb,
                        armas_conf_t *conf)
{
  armas_x_dense_t Y, Wrk;
  size_t wsmin, wsz;
  int lb, err = 0;
  DTYPE *buf;
  
  if (!conf)
    conf = armas_conf_default();

  if (!A) {
    conf->error = ARMAS_EINVAL;
    return -1;
  }

  if (wb && wb->bytes == 0) {
    if (conf->lb > 0 && A->cols > conf->lb)
      wb->bytes = (A->cols * conf->lb) * sizeof(DTYPE);
    else
      wb->bytes = A->cols * sizeof(DTYPE);
    return 0;
  }

  if (A->rows != A->cols) {
    conf->error = ARMAS_ESIZE;
    return -1;
  }
  if (!armas_x_isvector(tauq) && armas_x_size(tauq) != A->cols) {
    conf->error = ARMAS_EINVAL;
    return -1;
  }
  
  wsmin = A->cols * sizeof(DTYPE);
  if (!wb || (wsz = armas_wbytes(wb)) < wsmin) {
    conf->error = ARMAS_EWORK;
    return -1;
  }
  // adjust blocking factor to workspace
  lb = conf->lb;
  if (lb > 0 && A->cols > lb) {
    wsz /= sizeof(DTYPE);
    if (wsz < A->cols * lb) {
      lb = (wsz / A->cols) & ~0x3;
      if (lb < ARMAS_BLOCKING_MIN)
        lb = 0;
    }
  }

  wsz = armas_wpos(wb);
  buf = (DTYPE *)armas_wptr(wb);
  
  if (lb == 0 || A->cols <= lb) {
    armas_x_make(&Wrk, A->rows, 1, A->rows, buf);
    if (flags & ARMAS_UPPER) {
      err = __unblk_trdreduce_upper(A, tauq, &Wrk, FALSE, conf);
    } else {
      err = __unblk_trdreduce_lower(A, tauq, &Wrk, conf);
    }
  } else {
    armas_x_make(&Y, A->rows, lb, A->rows, buf);
    // Make W = Y in folling. W is used in following subroutine only when reducing
    // last block to tridiagonal form and Y is not needed anymore
    // TODO: fix this later
    if (flags & ARMAS_UPPER) {
      err = __blk_trdreduce_upper(A, tauq, &Y, &Y, lb, conf);
    } else {
      err = __blk_trdreduce_lower(A, tauq, &Y, &Y, lb, conf);
    }
  }
  armas_wsetpos(wb, wsz);
  return err;
}
예제 #19
0
파일: lqmult.c 프로젝트: hrautila/armas
/**
 * @brief Multiply with orthogonal matrix Q from LQ factorization
 *
 * Multiply and replace C with Q*C or Q.T*C where Q is a real orthogonal matrix
 * defined as the product of k elementary reflectors.
 *
 *    Q = H(k)H(k-1)...H(1)
 *
 * as returned by armas_x_lqfactor().
 *
 * @param[in,out] C
 *     On entry, the M-by-N matrix C or if flag bit RIGHT is set then
 *     N-by-M matrix.  On exit C is overwritten by Q*C or Q.T*C.
 *     If bit RIGHT is set then C is  overwritten by C*Q or C*Q.T
 *
 * @param[in] A
 *     LQ factorization as returned by lqfactor() where the upper
 *     trapezoidal part holds the elementary reflectors.
 *
 * @param[in] tau
 *   The scalar factors of the elementary reflectors.
 *
 * @param[in] flags
 *     Indicators. Valid indicators *ARMAS_LEFT*, *ARMAS_RIGHT*, *ARMAS_TRANS*
 *       
 * @param[out] W
 *    Workspace buffer needed for computation. To compute size of the required space call 
 *    the function with workspace bytes set to zero. Size of workspace is returned in 
 *    `wb.bytes` and no other computation or parameter size checking is done and function
 *    returns with success.
 *
 * @param[in,out] conf
 *     Blocking configuration. Field LB defines block sized. If it is zero
 *     unblocked invocation is assumed.
 *
 * @retval  0 Success
 * @retval -1 Error, `conf.error` holds error code
 *
 *  Last error codes returned
 *   - `ARMAS_ESIZE`  if m(C) != n(A) for C*op(Q) or n(C) != n(A) for op(Q)*C
 *   - `ARMAS_EINVAL` C or A or tau is null pointer
 *   - `ARMAS_EWORK`  if workspace is less than required for unblocked computation
 *
 * Compatible with lapack.DORMLQ
 *
 * #### Notes
 *   m(A) is number of elementary reflectors == A.rows
 *   n(A) is the order of the Q matrix == A.cols
 *
 * \cond
 *   LEFT : m(C) == n(A)
 *   RIGHT: n(C) == n(A)
 * \endcond
 * \ingroup lapack
 */
int armas_x_lqmult_w(armas_x_dense_t *C,
                     const armas_x_dense_t *A,
                     const armas_x_dense_t *tau, 
                     int flags,
                     armas_wbuf_t *wb,
                     armas_conf_t *conf)
{
  armas_x_dense_t T, Wrk;
  size_t wsmin, wsz = 0;
  int lb, K, P;
  DTYPE *buf;
  
  if (!conf)
    conf = armas_conf_default();

  if (!C) {
    conf->error = ARMAS_EINVAL;
    return -1;
  }
  K = (flags & ARMAS_RIGHT) != 0 ? C->rows : C->cols;
  if (wb && wb->bytes == 0) {
    if (conf->lb > 0 && K > conf->lb) 
      wb->bytes = ((conf->lb + K) * conf->lb) * sizeof(DTYPE);
    else
      wb->bytes = K * sizeof(DTYPE);
    return 0;
  }

  if (!A || !tau) {
    conf->error = ARMAS_EINVAL;
    return -1;
  }

  // check sizes; A, tau return from armas_x_qrfactor()
  P = (flags & ARMAS_RIGHT) != 0 ? C->cols : C->rows;
  if (P != A->cols || armas_x_size(tau) != A->rows) {
    conf->error = ARMAS_ESIZE;
    return -1;
  }

  lb = conf->lb;
  wsmin = K * sizeof(DTYPE);
  if (! wb || (wsz = armas_wbytes(wb)) < wsmin) {
    conf->error = ARMAS_EWORK;
    return -1;
  }
  // adjust blocking factor for workspace
  if (lb > 0 && K > lb) {
    wsz /= sizeof(DTYPE);
    if (wsz < (K + lb)*lb) {
      // ws = (K + lb)*lb => lb^2 + K*lb - wsz = 0  =>  (sqrt(K^2 + 4*wsz) - K)/2
      lb  = ((int)(__SQRT((DTYPE)(K*K + 4*wsz))) - K) / 2;
      lb &= ~0x3;
      if (lb < ARMAS_BLOCKING_MIN)
        lb = 0;
    }
  }

  wsz = armas_wpos(wb);
  buf = (DTYPE *)armas_wptr(wb);

  if (lb == 0 || K <= lb) {
    // unblocked 
    armas_x_make(&Wrk, K, 1, K, buf);
    if ((flags & ARMAS_RIGHT) != 0) {
      __unblk_lqmult_right(C, (armas_x_dense_t *)A, (armas_x_dense_t *)tau, &Wrk, flags, conf);
    } else {
      __unblk_lqmult_left(C, (armas_x_dense_t *)A, (armas_x_dense_t *)tau, &Wrk, flags, conf);
    }
  } else {
    // blocked code; block reflector T and temporary space
    armas_x_make(&T, lb, lb, lb, buf);
    armas_x_make(&Wrk, K, lb,  K, &buf[armas_x_size(&T)]);

    if ((flags & ARMAS_RIGHT) != 0) {
      __blk_lqmult_right(C, (armas_x_dense_t *)A, (armas_x_dense_t *)tau, &T, &Wrk, flags, lb, conf);
    } else {
      __blk_lqmult_left(C, (armas_x_dense_t *)A, (armas_x_dense_t *)tau, &T, &Wrk, flags, lb, conf);
    }
  }
  armas_wsetpos(wb, wsz);
  return 0;
}
예제 #20
0
파일: rq.c 프로젝트: hrautila/armas
int armas_x_rqfactor_w(armas_x_dense_t *A,
                       armas_x_dense_t *tau,
                       armas_wbuf_t *wb,
                       armas_conf_t *conf)
{
  armas_x_dense_t T, Wrk;
  size_t wsmin, wsneed, wsz = 0;
  int lb;
  DTYPE *buf;
  
  if (!conf)
    conf = armas_conf_default();

  if (!A) {
    conf->error = ARMAS_EINVAL;
    return -1;
  }
  if (wb && wb->bytes == 0) {
    if (conf->lb > 0 && A->rows > conf->lb)
      wb->bytes = (A->rows * conf->lb) * sizeof(DTYPE);
    else 
      wb->bytes = A->rows * sizeof(DTYPE);
    return 0;
  }

  // must have: M <= N
  if (A->rows > A->cols) {
    conf->error = ARMAS_ESIZE;
    return -1;
  }
  if (! armas_x_isvector(tau) || armas_x_size(tau) !=  A->rows) {
    conf->error = ARMAS_EINVAL;
    return -1;
  }
  
  lb = conf->lb;
  wsmin = A->rows * sizeof(DTYPE);
  if (! wb || (wsz = armas_wbytes(wb)) < wsmin) {
    conf->error = ARMAS_EWORK;
    return -1;
  }
  // adjust blocking factor for workspace
  wsneed = (lb > 0 ? A->rows * lb : A->rows) * sizeof(DTYPE);
  if (lb > 0 && wsz < wsneed) {
    lb = ( wsz / (A->rows * sizeof(DTYPE))) & ~0x3;
    if (lb < ARMAS_BLOCKING_MIN)
      lb = 0;
  }
  
  wsz = armas_wpos(wb);
  buf = (DTYPE *)armas_wptr(wb);
  
  if (lb == 0 || A->rows <= lb) {
    armas_x_make(&Wrk, A->rows, 1, A->rows, buf);
    __unblk_rqfactor(A, tau, &Wrk, conf);
  } else {
    // block reflector [lb,lb]; temp space [n(A)-lb, lb] matrix
    armas_x_make(&T, lb, lb, lb, buf);
    armas_x_make(&Wrk, A->rows-lb, lb, A->rows-lb, &buf[armas_x_size(&T)]);

    __blk_rqfactor(A, tau, &T, &Wrk, lb, conf);
  }
  armas_wsetpos(wb, wsz);
  return 0;
}