Exemplo n.º 1
0
/*
 * transform ket, s2 to label AO symmetry
 */
int AO2MOmmm_ket_nr_s2(double *vout, double *vin, double *buf,
                       struct _AO2MOEnvs *envs, int seekdim)
{
        switch (seekdim) {
                case OUTPUTIJ: return envs->nao * envs->ket_count;
                case INPUT_IJ: return envs->nao * (envs->nao+1) / 2;
        }
        const double D0 = 0;
        const double D1 = 1;
        const char SIDE_L = 'L';
        const char UPLO_U = 'U';
        int nao = envs->nao;
        int j_start = envs->ket_start;
        int j_count = envs->ket_count;
        double *mo_coeff = envs->mo_coeff;
        int i, j;

        dsymm_(&SIDE_L, &UPLO_U, &nao, &j_count,
               &D1, vin, &nao, mo_coeff+j_start*nao, &nao,
               &D0, buf, &nao);
        for (j = 0; j < nao; j++) {
                for (i = 0; i < j_count; i++) {
                        vout[i] = buf[i*nao+j];
                }
                vout += j_count;
        }
        return 0;
}
Exemplo n.º 2
0
/*
 * s2-AO integrals to s1-MO integrals, efficient for i_count > j_count
 * shape requirements:
 *      vout[:,bra_count*ket_count], eri[:,nao*(nao+1)/2]
 */
int AO2MOmmm_nr_s2_igtj(double *vout, double *eri, struct _AO2MOEnvs *envs,
                        int seekdim)
{
        switch (seekdim) {
                case 1: return envs->bra_count * envs->ket_count;
                case 2: return envs->nao * (envs->nao+1) / 2;
        }
        const double D0 = 0;
        const double D1 = 1;
        const char SIDE_L = 'L';
        const char UPLO_U = 'U';
        const char TRANS_T = 'T';
        const char TRANS_N = 'N';
        int nao = envs->nao;
        int i_start = envs->bra_start;
        int i_count = envs->bra_count;
        int j_start = envs->ket_start;
        int j_count = envs->ket_count;
        double *mo_coeff = envs->mo_coeff;
        double *buf = malloc(sizeof(double)*nao*j_count);

        // C_qj (pq| = (pj|, where (pq| is in C-order
        dsymm_(&SIDE_L, &UPLO_U, &nao, &j_count,
               &D1, eri, &nao, mo_coeff+j_start*nao, &nao,
               &D0, buf, &nao);
        // C_pi (pj| = (ij|
        dgemm_(&TRANS_T, &TRANS_N, &j_count, &i_count, &nao,
               &D1, buf, &nao, mo_coeff+i_start*nao, &nao,
               &D0, vout, &j_count);
        free(buf);
        return 0;
}
Exemplo n.º 3
0
int
f2c_dsymm(char* side, char* uplo, integer* M, integer* N,
          doublereal* alpha,
          doublereal* A, integer* lda,
          doublereal* B, integer* ldb,
          doublereal* beta,
          doublereal* C, integer* ldc)
{
    dsymm_(side, uplo, M, N,
           alpha, A, lda, B, ldb, beta, C, ldc);
    return 0;
}
Exemplo n.º 4
0
 void dsymm(const SIDE Side,
            const UPLO Uplo,
            const int M,
            const int N,
            const double alpha,
            const double *A,
            const int lda,
            const double *B,
            const int ldb,
            const double beta,
            double *C,
            const int ldc) {
   dsymm_(SideChar[Side], UploChar[Uplo], &M, &N, &alpha,
          A, &lda, B, &ldb, &beta, C, &ldc);
 }
Exemplo n.º 5
0
Arquivo: latools.c Projeto: cran/Bmix
void la_dsymm(int Alhs, int Arow, int Acol, int Brow, int Bcol, int Crow, int Ccol,
		  double **A, double **B, double **C, double alpha, double beta)
{
  int m, n, lda, ldb, ldc;
  char Atri = 'L';  // reference the lower triangle of column major A. 
  char Aside;

  assert(Arow=Acol); 
  
  m = Crow; n = Ccol; lda = Arow; ldb = Brow; ldc = Crow;
  assert(ldb == m);

  if(Alhs){  Aside='L';  assert(lda == m); }
  else{  Aside='R';  assert(lda == n); }
  
  dsymm_(&Aside,&Atri,&m,&n,&alpha,*A,&lda,*B,&ldb,&beta,*C,&ldc);
}
Exemplo n.º 6
0
/*
 * transform bra, s2 to label AO symmetry
 */
int AO2MOmmm_bra_nr_s2(double *vout, double *vin, double *buf,
                       struct _AO2MOEnvs *envs, int seekdim)
{
        switch (seekdim) {
                case OUTPUTIJ: return envs->bra_count * envs->nao;
                case INPUT_IJ: return envs->nao * (envs->nao+1) / 2;
        }
        const double D0 = 0;
        const double D1 = 1;
        const char SIDE_L = 'L';
        const char UPLO_U = 'U';
        int nao = envs->nao;
        int i_start = envs->bra_start;
        int i_count = envs->bra_count;
        double *mo_coeff = envs->mo_coeff;

        dsymm_(&SIDE_L, &UPLO_U, &nao, &i_count,
               &D1, vin, &nao, mo_coeff+i_start*nao, &nao,
               &D0, vout, &nao);
        return 0;
}
Exemplo n.º 7
0
/*
 * s2-AO integrals to s2-MO integrals
 * shape requirements:
 *      vout[:,bra_count*(bra_count+1)/2] and bra_count==ket_count,
 *      eri[:,nao*(nao+1)/2]
 * first s2 is the AO symmetry, second s2 is the MO symmetry
 */
int AO2MOmmm_nr_s2_s2(double *vout, double *eri, struct _AO2MOEnvs *envs,
                        int seekdim)
{
        switch (seekdim) {
                case 1: assert(envs->bra_count == envs->ket_count);
                        return envs->bra_count * (envs->bra_count+1) / 2;
                case 2: return envs->nao * (envs->nao+1) / 2;
        }
        const double D0 = 0;
        const double D1 = 1;
        const char SIDE_L = 'L';
        const char UPLO_U = 'U';
        int nao = envs->nao;
        int i_start = envs->bra_start;
        int i_count = envs->bra_count;
        int j_start = envs->ket_start;
        int j_count = envs->ket_count;
        double *mo_coeff = envs->mo_coeff;
        double *buf = malloc(sizeof(double)*(nao*i_count+i_count*j_count));
        double *buf1 = buf + nao*i_count;
        int i, j, ij;

        // C_pi (pq| = (iq|, where (pq| is in C-order
        dsymm_(&SIDE_L, &UPLO_U, &nao, &i_count,
               &D1, eri, &nao, mo_coeff+i_start*nao, &nao,
               &D0, buf, &nao);
        AO2MOdtriumm_o1(j_count, i_count, nao, 0,
                        mo_coeff+j_start*nao, buf, buf1);

        for (i = 0, ij = 0; i < i_count; i++) {
                for (j = 0; j <= i; j++, ij++) {
                        vout[ij] = buf1[j];
                }
                buf1 += j_count;
        }
        free(buf);
        return 0;
}
Exemplo n.º 8
0
/* Subroutine */ int dpot03_(char *uplo, integer *n, doublereal *a, integer *
	lda, doublereal *ainv, integer *ldainv, doublereal *work, integer *
	ldwork, doublereal *rwork, doublereal *rcond, doublereal *resid)
{
    /* System generated locals */
    integer a_dim1, a_offset, ainv_dim1, ainv_offset, work_dim1, work_offset, 
	    i__1, i__2;

    /* Local variables */
    integer i__, j;
    doublereal eps;
    extern logical lsame_(char *, char *);
    doublereal anorm;
    extern /* Subroutine */ int dsymm_(char *, char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *);
    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
	    integer *, doublereal *, integer *, doublereal *);
    doublereal ainvnm;
    extern doublereal dlansy_(char *, char *, integer *, doublereal *, 
	    integer *, doublereal *);


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  DPOT03 computes the residual for a symmetric matrix times its */
/*  inverse: */
/*     norm( I - A*AINV ) / ( N * norm(A) * norm(AINV) * EPS ), */
/*  where EPS is the machine epsilon. */

/*  Arguments */
/*  ========== */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the upper or lower triangular part of the */
/*          symmetric matrix A is stored: */
/*          = 'U':  Upper triangular */
/*          = 'L':  Lower triangular */

/*  N       (input) INTEGER */
/*          The number of rows and columns of the matrix A.  N >= 0. */

/*  A       (input) DOUBLE PRECISION array, dimension (LDA,N) */
/*          The original symmetric matrix A. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N) */

/*  AINV    (input/output) DOUBLE PRECISION array, dimension (LDAINV,N) */
/*          On entry, the inverse of the matrix A, stored as a symmetric */
/*          matrix in the same format as A. */
/*          In this version, AINV is expanded into a full matrix and */
/*          multiplied by A, so the opposing triangle of AINV will be */
/*          changed; i.e., if the upper triangular part of AINV is */
/*          stored, the lower triangular part will be used as work space. */

/*  LDAINV  (input) INTEGER */
/*          The leading dimension of the array AINV.  LDAINV >= max(1,N). */

/*  WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,N) */

/*  LDWORK  (input) INTEGER */
/*          The leading dimension of the array WORK.  LDWORK >= max(1,N). */

/*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N) */

/*  RCOND   (output) DOUBLE PRECISION */
/*          The reciprocal of the condition number of A, computed as */
/*          ( 1/norm(A) ) / norm(AINV). */

/*  RESID   (output) DOUBLE PRECISION */
/*          norm(I - A*AINV) / ( N * norm(A) * norm(AINV) * EPS ) */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick exit if N = 0. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    ainv_dim1 = *ldainv;
    ainv_offset = 1 + ainv_dim1;
    ainv -= ainv_offset;
    work_dim1 = *ldwork;
    work_offset = 1 + work_dim1;
    work -= work_offset;
    --rwork;

    /* Function Body */
    if (*n <= 0) {
	*rcond = 1.;
	*resid = 0.;
	return 0;
    }

/*     Exit with RESID = 1/EPS if ANORM = 0 or AINVNM = 0. */

    eps = dlamch_("Epsilon");
    anorm = dlansy_("1", uplo, n, &a[a_offset], lda, &rwork[1]);
    ainvnm = dlansy_("1", uplo, n, &ainv[ainv_offset], ldainv, &rwork[1]);
    if (anorm <= 0. || ainvnm <= 0.) {
	*rcond = 0.;
	*resid = 1. / eps;
	return 0;
    }
    *rcond = 1. / anorm / ainvnm;

/*     Expand AINV into a full matrix and call DSYMM to multiply */
/*     AINV on the left by A. */

    if (lsame_(uplo, "U")) {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = j - 1;
	    for (i__ = 1; i__ <= i__2; ++i__) {
		ainv[j + i__ * ainv_dim1] = ainv[i__ + j * ainv_dim1];
/* L10: */
	    }
/* L20: */
	}
    } else {
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    i__2 = *n;
	    for (i__ = j + 1; i__ <= i__2; ++i__) {
		ainv[j + i__ * ainv_dim1] = ainv[i__ + j * ainv_dim1];
/* L30: */
	    }
/* L40: */
	}
    }
    dsymm_("Left", uplo, n, n, &c_b11, &a[a_offset], lda, &ainv[ainv_offset], 
	    ldainv, &c_b12, &work[work_offset], ldwork);

/*     Add the identity matrix to WORK . */

    i__1 = *n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	work[i__ + i__ * work_dim1] += 1.;
/* L50: */
    }

/*     Compute norm(I - A*AINV) / (N * norm(A) * norm(AINV) * EPS) */

    *resid = dlange_("1", n, n, &work[work_offset], ldwork, &rwork[1]);

    *resid = *resid * *rcond / eps / (doublereal) (*n);

    return 0;

/*     End of DPOT03 */

} /* dpot03_ */
Exemplo n.º 9
0
/* Subroutine */ int dlarhs_(char *path, char *xtype, char *uplo, char *trans,
	 integer *m, integer *n, integer *kl, integer *ku, integer *nrhs, 
	doublereal *a, integer *lda, doublereal *x, integer *ldx, doublereal *
	b, integer *ldb, integer *iseed, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1;

    /* Builtin functions   
       Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);

    /* Local variables */
    static logical band;
    static char diag[1];
    static logical tran;
    static integer j;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
	    integer *, doublereal *, doublereal *, integer *),
	     dgbmv_(char *, integer *, integer *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dsbmv_(char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *), dtbmv_(char *, 
	    char *, char *, integer *, integer *, doublereal *, integer *, 
	    doublereal *, integer *), dtrmm_(char *, 
	    char *, char *, char *, integer *, integer *, doublereal *, 
	    doublereal *, integer *, doublereal *, integer *);
    static char c1[1], c2[2];
    extern /* Subroutine */ int dspmv_(char *, integer *, doublereal *, 
	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
	     integer *), dsymm_(char *, char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *), dtpmv_(
	    char *, char *, char *, integer *, doublereal *, doublereal *, 
	    integer *);
    static integer mb, nx;
    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, integer *), 
	    xerbla_(char *, integer *);
    extern logical lsamen_(integer *, char *, char *);
    extern /* Subroutine */ int dlarnv_(integer *, integer *, integer *, 
	    doublereal *);
    static logical notran, gen, tri, qrs, sym;


#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       February 29, 1992   


    Purpose   
    =======   

    DLARHS chooses a set of NRHS random solution vectors and sets   
    up the right hand sides for the linear system   
       op( A ) * X = B,   
    where op( A ) may be A or A' (transpose of A).   

    Arguments   
    =========   

    PATH    (input) CHARACTER*3   
            The type of the real matrix A.  PATH may be given in any   
            combination of upper and lower case.  Valid types include   
               xGE:  General m x n matrix   
               xGB:  General banded matrix   
               xPO:  Symmetric positive definite, 2-D storage   
               xPP:  Symmetric positive definite packed   
               xPB:  Symmetric positive definite banded   
               xSY:  Symmetric indefinite, 2-D storage   
               xSP:  Symmetric indefinite packed   
               xSB:  Symmetric indefinite banded   
               xTR:  Triangular   
               xTP:  Triangular packed   
               xTB:  Triangular banded   
               xQR:  General m x n matrix   
               xLQ:  General m x n matrix   
               xQL:  General m x n matrix   
               xRQ:  General m x n matrix   
            where the leading character indicates the precision.   

    XTYPE   (input) CHARACTER*1   
            Specifies how the exact solution X will be determined:   
            = 'N':  New solution; generate a random X.   
            = 'C':  Computed; use value of X on entry.   

    UPLO    (input) CHARACTER*1   
            Specifies whether the upper or lower triangular part of the   
            matrix A is stored, if A is symmetric.   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    TRANS   (input) CHARACTER*1   
            Specifies the operation applied to the matrix A.   
            = 'N':  System is  A * x = b   
            = 'T':  System is  A'* x = b   
            = 'C':  System is  A'* x = b   

    M       (input) INTEGER   
            The number or rows of the matrix A.  M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A.  N >= 0.   

    KL      (input) INTEGER   
            Used only if A is a band matrix; specifies the number of   
            subdiagonals of A if A is a general band matrix or if A is   
            symmetric or triangular and UPLO = 'L'; specifies the number   
            of superdiagonals of A if A is symmetric or triangular and   
            UPLO = 'U'.  0 <= KL <= M-1.   

    KU      (input) INTEGER   
            Used only if A is a general band matrix or if A is   
            triangular.   

            If PATH = xGB, specifies the number of superdiagonals of A,   
            and 0 <= KU <= N-1.   

            If PATH = xTR, xTP, or xTB, specifies whether or not the   
            matrix has unit diagonal:   
            = 1:  matrix has non-unit diagonal (default)   
            = 2:  matrix has unit diagonal   

    NRHS    (input) INTEGER   
            The number of right hand side vectors in the system A*X = B.   

    A       (input) DOUBLE PRECISION array, dimension (LDA,N)   
            The test matrix whose type is given by PATH.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.   
            If PATH = xGB, LDA >= KL+KU+1.   
            If PATH = xPB, xSB, xHB, or xTB, LDA >= KL+1.   
            Otherwise, LDA >= max(1,M).   

    X       (input or output) DOUBLE PRECISION array, dimension(LDX,NRHS)   
            On entry, if XTYPE = 'C' (for 'Computed'), then X contains   
            the exact solution to the system of linear equations.   
            On exit, if XTYPE = 'N' (for 'New'), then X is initialized   
            with random values.   

    LDX     (input) INTEGER   
            The leading dimension of the array X.  If TRANS = 'N',   
            LDX >= max(1,N); if TRANS = 'T', LDX >= max(1,M).   

    B       (output) DOUBLE PRECISION array, dimension (LDB,NRHS)   
            The right hand side vector(s) for the system of equations,   
            computed from B = op(A) * X, where op(A) is determined by   
            TRANS.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  If TRANS = 'N',   
            LDB >= max(1,M); if TRANS = 'T', LDB >= max(1,N).   

    ISEED   (input/output) INTEGER array, dimension (4)   
            The seed vector for the random number generator (used in   
            DLATMS).  Modified on exit.   

    INFO    (output) INTEGER   
            = 0: successful exit   
            < 0: if INFO = -i, the i-th argument had an illegal value   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --iseed;

    /* Function Body */
    *info = 0;
    *(unsigned char *)c1 = *(unsigned char *)path;
    s_copy(c2, path + 1, (ftnlen)2, (ftnlen)2);
    tran = lsame_(trans, "T") || lsame_(trans, "C");
    notran = ! tran;
    gen = lsame_(path + 1, "G");
    qrs = lsame_(path + 1, "Q") || lsame_(path + 2, 
	    "Q");
    sym = lsame_(path + 1, "P") || lsame_(path + 1, 
	    "S");
    tri = lsame_(path + 1, "T");
    band = lsame_(path + 2, "B");
    if (! lsame_(c1, "Double precision")) {
	*info = -1;
    } else if (! (lsame_(xtype, "N") || lsame_(xtype, 
	    "C"))) {
	*info = -2;
    } else if ((sym || tri) && ! (lsame_(uplo, "U") || 
	    lsame_(uplo, "L"))) {
	*info = -3;
    } else if ((gen || qrs) && ! (tran || lsame_(trans, "N"))) {
	*info = -4;
    } else if (*m < 0) {
	*info = -5;
    } else if (*n < 0) {
	*info = -6;
    } else if (band && *kl < 0) {
	*info = -7;
    } else if (band && *ku < 0) {
	*info = -8;
    } else if (*nrhs < 0) {
	*info = -9;
    } else if (! band && *lda < max(1,*m) || band && (sym || tri) && *lda < *
	    kl + 1 || band && gen && *lda < *kl + *ku + 1) {
	*info = -11;
    } else if (notran && *ldx < max(1,*n) || tran && *ldx < max(1,*m)) {
	*info = -13;
    } else if (notran && *ldb < max(1,*m) || tran && *ldb < max(1,*n)) {
	*info = -15;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DLARHS", &i__1);
	return 0;
    }

/*     Initialize X to NRHS random vectors unless XTYPE = 'C'. */

    if (tran) {
	nx = *m;
	mb = *n;
    } else {
	nx = *n;
	mb = *m;
    }
    if (! lsame_(xtype, "C")) {
	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    dlarnv_(&c__2, &iseed[1], n, &x_ref(1, j));
/* L10: */
	}
    }

/*     Multiply X by op( A ) using an appropriate   
       matrix multiply routine. */

    if (lsamen_(&c__2, c2, "GE") || lsamen_(&c__2, c2, 
	    "QR") || lsamen_(&c__2, c2, "LQ") || lsamen_(&c__2, c2, "QL") || 
	    lsamen_(&c__2, c2, "RQ")) {

/*        General matrix */

	dgemm_(trans, "N", &mb, nrhs, &nx, &c_b32, &a[a_offset], lda, &x[
		x_offset], ldx, &c_b33, &b[b_offset], ldb);

    } else if (lsamen_(&c__2, c2, "PO") || lsamen_(&
	    c__2, c2, "SY")) {

/*        Symmetric matrix, 2-D storage */

	dsymm_("Left", uplo, n, nrhs, &c_b32, &a[a_offset], lda, &x[x_offset],
		 ldx, &c_b33, &b[b_offset], ldb);

    } else if (lsamen_(&c__2, c2, "GB")) {

/*        General matrix, band storage */

	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    dgbmv_(trans, &mb, &nx, kl, ku, &c_b32, &a[a_offset], lda, &x_ref(
		    1, j), &c__1, &c_b33, &b_ref(1, j), &c__1);
/* L20: */
	}

    } else if (lsamen_(&c__2, c2, "PB")) {

/*        Symmetric matrix, band storage */

	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    dsbmv_(uplo, n, kl, &c_b32, &a[a_offset], lda, &x_ref(1, j), &
		    c__1, &c_b33, &b_ref(1, j), &c__1);
/* L30: */
	}

    } else if (lsamen_(&c__2, c2, "PP") || lsamen_(&
	    c__2, c2, "SP")) {

/*        Symmetric matrix, packed storage */

	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    dspmv_(uplo, n, &c_b32, &a[a_offset], &x_ref(1, j), &c__1, &c_b33,
		     &b_ref(1, j), &c__1);
/* L40: */
	}

    } else if (lsamen_(&c__2, c2, "TR")) {

/*        Triangular matrix.  Note that for triangular matrices,   
             KU = 1 => non-unit triangular   
             KU = 2 => unit triangular */

	dlacpy_("Full", n, nrhs, &x[x_offset], ldx, &b[b_offset], ldb);
	if (*ku == 2) {
	    *(unsigned char *)diag = 'U';
	} else {
	    *(unsigned char *)diag = 'N';
	}
	dtrmm_("Left", uplo, trans, diag, n, nrhs, &c_b32, &a[a_offset], lda, 
		&b[b_offset], ldb)
		;

    } else if (lsamen_(&c__2, c2, "TP")) {

/*        Triangular matrix, packed storage */

	dlacpy_("Full", n, nrhs, &x[x_offset], ldx, &b[b_offset], ldb);
	if (*ku == 2) {
	    *(unsigned char *)diag = 'U';
	} else {
	    *(unsigned char *)diag = 'N';
	}
	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    dtpmv_(uplo, trans, diag, n, &a[a_offset], &b_ref(1, j), &c__1);
/* L50: */
	}

    } else if (lsamen_(&c__2, c2, "TB")) {

/*        Triangular matrix, banded storage */

	dlacpy_("Full", n, nrhs, &x[x_offset], ldx, &b[b_offset], ldb);
	if (*ku == 2) {
	    *(unsigned char *)diag = 'U';
	} else {
	    *(unsigned char *)diag = 'N';
	}
	i__1 = *nrhs;
	for (j = 1; j <= i__1; ++j) {
	    dtbmv_(uplo, trans, diag, n, kl, &a[a_offset], lda, &b_ref(1, j), 
		    &c__1);
/* L60: */
	}

    } else {

/*        If PATH is none of the above, return with an error code. */

	*info = -1;
	i__1 = -(*info);
	xerbla_("DLARHS", &i__1);
    }

    return 0;

/*     End of DLARHS */

} /* dlarhs_ */
Exemplo n.º 10
0
/* Subroutine */ int dpot06_(char *uplo, integer *n, integer *nrhs, 
	doublereal *a, integer *lda, doublereal *x, integer *ldx, doublereal *
	b, integer *ldb, doublereal *rwork, doublereal *resid)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1;
    doublereal d__1, d__2;

    /* Local variables */
    integer j;
    doublereal eps;
    integer ifail;
    doublereal anorm, bnorm;
    extern /* Subroutine */ int dsymm_(char *, char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *);
    doublereal xnorm;
    extern doublereal dlamch_(char *);
    extern integer idamax_(integer *, doublereal *, integer *);
    extern doublereal dlansy_(char *, char *, integer *, doublereal *, 
	    integer *, doublereal *);


/*  -- LAPACK test routine (version 3.1.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     April 2007 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  DPOT06 computes the residual for a solution of a system of linear */
/*  equations  A*x = b : */
/*     RESID = norm(B - A*X,inf) / ( norm(A,inf) * norm(X,inf) * EPS ), */
/*  where EPS is the machine epsilon. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the upper or lower triangular part of the */
/*          symmetric matrix A is stored: */
/*          = 'U':  Upper triangular */
/*          = 'L':  Lower triangular */

/*  N       (input) INTEGER */
/*          The number of rows and columns of the matrix A.  N >= 0. */

/*  NRHS    (input) INTEGER */
/*          The number of columns of B, the matrix of right hand sides. */
/*          NRHS >= 0. */

/*  A       (input) DOUBLE PRECISION array, dimension (LDA,N) */
/*          The original M x N matrix A. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  X       (input) DOUBLE PRECISION array, dimension (LDX,NRHS) */
/*          The computed solution vectors for the system of linear */
/*          equations. */

/*  LDX     (input) INTEGER */
/*          The leading dimension of the array X.  If TRANS = 'N', */
/*          LDX >= max(1,N); if TRANS = 'T' or 'C', LDX >= max(1,N). */

/*  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */
/*          On entry, the right hand side vectors for the system of */
/*          linear equations. */
/*          On exit, B is overwritten with the difference B - A*X. */

/*  LDB     (input) INTEGER */
/*          The leading dimension of the array B.  IF TRANS = 'N', */
/*          LDB >= max(1,M); if TRANS = 'T' or 'C', LDB >= max(1,N). */

/*  RWORK   (workspace) DOUBLE PRECISION array, dimension (N) */

/*  RESID   (output) DOUBLE PRECISION */
/*          The maximum over the number of right hand sides of */
/*          norm(B - A*X) / ( norm(A) * norm(X) * EPS ). */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick exit if N = 0 or NRHS = 0 */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    --rwork;

    /* Function Body */
    if (*n <= 0 || *nrhs == 0) {
	*resid = 0.;
	return 0;
    }

/*     Exit with RESID = 1/EPS if ANORM = 0. */

    eps = dlamch_("Epsilon");
    anorm = dlansy_("I", uplo, n, &a[a_offset], lda, &rwork[1]);
    if (anorm <= 0.) {
	*resid = 1. / eps;
	return 0;
    }

/*     Compute  B - A*X  and store in B. */
    ifail = 0;

    dsymm_("Left", uplo, n, nrhs, &c_b5, &a[a_offset], lda, &x[x_offset], ldx, 
	     &c_b6, &b[b_offset], ldb);

/*     Compute the maximum over the number of right hand sides of */
/*        norm(B - A*X) / ( norm(A) * norm(X) * EPS ) . */

    *resid = 0.;
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
	bnorm = (d__1 = b[idamax_(n, &b[j * b_dim1 + 1], &c__1) + j * b_dim1],
		 abs(d__1));
	xnorm = (d__1 = x[idamax_(n, &x[j * x_dim1 + 1], &c__1) + j * x_dim1],
		 abs(d__1));
	if (xnorm <= 0.) {
	    *resid = 1. / eps;
	} else {
/* Computing MAX */
	    d__1 = *resid, d__2 = bnorm / anorm / xnorm / eps;
	    *resid = max(d__1,d__2);
	}
/* L10: */
    }

    return 0;

/*     End of DPOT06 */

} /* dpot06_ */
Exemplo n.º 11
0
/* Subroutine */ int dpot02_(char *uplo, integer *n, integer *nrhs,
                             doublereal *a, integer *lda, doublereal *x, integer *ldx, doublereal *
                             b, integer *ldb, doublereal *rwork, doublereal *resid)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1;
    doublereal d__1, d__2;

    /* Local variables */
    static integer j;
    extern doublereal dasum_(integer *, doublereal *, integer *);
    static doublereal anorm, bnorm;
    extern /* Subroutine */ int dsymm_(char *, char *, integer *, integer *,
                                       doublereal *, doublereal *, integer *, doublereal *, integer *,
                                       doublereal *, doublereal *, integer *);
    static doublereal xnorm;
    extern doublereal dlamch_(char *), dlansy_(char *, char *,
            integer *, doublereal *, integer *, doublereal *);
    static doublereal eps;


#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
#define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1]


    /*  -- LAPACK test routine (version 3.0) --
           Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
           Courant Institute, Argonne National Lab, and Rice University
           February 29, 1992


        Purpose
        =======

        DPOT02 computes the residual for the solution of a symmetric system
        of linear equations  A*x = b:

           RESID = norm(B - A*X) / ( norm(A) * norm(X) * EPS ),

        where EPS is the machine epsilon.

        Arguments
        =========

        UPLO    (input) CHARACTER*1
                Specifies whether the upper or lower triangular part of the
                symmetric matrix A is stored:
                = 'U':  Upper triangular
                = 'L':  Lower triangular

        N       (input) INTEGER
                The number of rows and columns of the matrix A.  N >= 0.

        NRHS    (input) INTEGER
                The number of columns of B, the matrix of right hand sides.
                NRHS >= 0.

        A       (input) DOUBLE PRECISION array, dimension (LDA,N)
                The original symmetric matrix A.

        LDA     (input) INTEGER
                The leading dimension of the array A.  LDA >= max(1,N)

        X       (input) DOUBLE PRECISION array, dimension (LDX,NRHS)
                The computed solution vectors for the system of linear
                equations.

        LDX     (input) INTEGER
                The leading dimension of the array X.   LDX >= max(1,N).

        B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
                On entry, the right hand side vectors for the system of
                linear equations.
                On exit, B is overwritten with the difference B - A*X.

        LDB     (input) INTEGER
                The leading dimension of the array B.  LDB >= max(1,N).

        RWORK   (workspace) DOUBLE PRECISION array, dimension (N)

        RESID   (output) DOUBLE PRECISION
                The maximum over the number of right hand sides of
                norm(B - A*X) / ( norm(A) * norm(X) * EPS ).

        =====================================================================


           Quick exit if N = 0 or NRHS = 0.

           Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1 * 1;
    x -= x_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --rwork;

    /* Function Body */
    if (*n <= 0 || *nrhs <= 0) {
        *resid = 0.;
        return 0;
    }

    /*     Exit with RESID = 1/EPS if ANORM = 0. */

    eps = dlamch_("Epsilon");
    anorm = dlansy_("1", uplo, n, &a[a_offset], lda, &rwork[1]);
    if (anorm <= 0.) {
        *resid = 1. / eps;
        return 0;
    }

    /*     Compute  B - A*X */

    dsymm_("Left", uplo, n, nrhs, &c_b5, &a[a_offset], lda, &x[x_offset], ldx,
           &c_b6, &b[b_offset], ldb);

    /*     Compute the maximum over the number of right hand sides of
              norm( B - A*X ) / ( norm(A) * norm(X) * EPS ) . */

    *resid = 0.;
    i__1 = *nrhs;
    for (j = 1; j <= i__1; ++j) {
        bnorm = dasum_(n, &b_ref(1, j), &c__1);
        xnorm = dasum_(n, &x_ref(1, j), &c__1);
        if (xnorm <= 0.) {
            *resid = 1. / eps;
        } else {
            /* Computing MAX */
            d__1 = *resid, d__2 = bnorm / anorm / xnorm / eps;
            *resid = max(d__1,d__2);
        }
        /* L10: */
    }

    return 0;

    /*     End of DPOT02 */

} /* dpot02_ */
Exemplo n.º 12
0
int main( int argc, char** argv )
{
	obj_t a, b, c;
	obj_t c_save;
	obj_t alpha, beta;
	dim_t m, n;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input, n_input;
	num_t dt_a, dt_b, dt_c;
	num_t dt_alpha, dt_beta;
	int   r, n_repeats;
	side_t side;
	uplo_t uplo;

	double dtime;
	double dtime_save;
	double gflops;

	bli_init();

	n_repeats = 3;

    if( argc < 7 ) 
    {   
        printf("Usage:\n");
        printf("test_foo.x m n k p_begin p_inc p_end:\n");
        exit;
    }   

    int world_size, world_rank, provided;
    MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
    MPI_Comm_size( MPI_COMM_WORLD, &world_size );
    MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );

    m_input = strtol( argv[1], NULL, 10 );
    n_input = strtol( argv[2], NULL, 10 );
    p_begin = strtol( argv[4], NULL, 10 );
    p_inc   = strtol( argv[5], NULL, 10 );
    p_end   = strtol( argv[6], NULL, 10 );

#if 1
	dt_a = BLIS_DOUBLE;
	dt_b = BLIS_DOUBLE;
	dt_c = BLIS_DOUBLE;
	dt_alpha = BLIS_DOUBLE;
	dt_beta = BLIS_DOUBLE;
#else
	dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_DCOMPLEX;
#endif

	side = BLIS_LEFT;
	//side = BLIS_RIGHT;

	uplo = BLIS_LOWER;
	//uplo = BLIS_UPPER;

    for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
	{

		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
		else               n =     ( dim_t )    n_input;


		bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
		bli_obj_create( dt_beta,  1, 1, 0, 0, &beta );

		if ( bli_is_left( side ) )
			bli_obj_create( dt_a, m, m, 0, 0, &a );
		else
			bli_obj_create( dt_a, n, n, 0, 0, &a );
		bli_obj_create( dt_b, m, n, 0, 0, &b );
		bli_obj_create( dt_c, m, n, 0, 0, &c );
		bli_obj_create( dt_c, m, n, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &b );
		bli_randm( &c );

		bli_obj_set_struc( BLIS_HERMITIAN, &a );
		bli_obj_set_uplo( uplo, &a );

		// Randomize A, make it densely Hermitian, and zero the unstored
		// triangle to ensure the implementation reads only from the stored
		// region.
		bli_randm( &a );
		bli_mkherm( &a );
		bli_mktrim( &a );
/*
		bli_obj_toggle_uplo( &a );
		bli_obj_inc_diag_offset( 1, &a );
		bli_setm( &BLIS_ZERO, &a );
		bli_obj_inc_diag_offset( -1, &a );
		bli_obj_toggle_uplo( &a );
		bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a );
		bli_scalm( &BLIS_TWO, &a );
		bli_scalm( &BLIS_TWO, &a );
*/


		bli_setsc(  (2.0/1.0), 1.0, &alpha );
		bli_setsc( (1.0/1.0), 0.0, &beta );


		bli_copym( &c, &c_save );
	
		dtime_save = 1.0e9;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();

#ifdef PRINT
/*
			obj_t ar, ai;
			bli_obj_alias_to( &a, &ar );
			bli_obj_alias_to( &a, &ai );
			bli_obj_set_dt( BLIS_DOUBLE, &ar ); ar.rs *= 2; ar.cs *= 2;
			bli_obj_set_dt( BLIS_DOUBLE, &ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
			bli_printm( "ar", &ar, "%4.1f", "" );
			bli_printm( "ai", &ai, "%4.1f", "" );
*/

			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "b", &b, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS

			//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

			bli_hemm( side,
			//bli_hemm4m( side,
			          &alpha,
			          &a,
			          &b,
			          &beta,
			          &c );
#else

			f77_char side   = 'L';
			f77_char uplo   = 'L';
			f77_int  mm     = bli_obj_length( &c );
			f77_int  nn     = bli_obj_width( &c );
			f77_int  lda    = bli_obj_col_stride( &a );
			f77_int  ldb    = bli_obj_col_stride( &b );
			f77_int  ldc    = bli_obj_col_stride( &c );
			double*  alphap = bli_obj_buffer( &alpha );
			double*  ap     = bli_obj_buffer( &a );
			double*  bp     = bli_obj_buffer( &b );
			double*  betap  = bli_obj_buffer( &beta );
			double*  cp     = bli_obj_buffer( &c );

			dsymm_( &side,
			        &uplo,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%9.5f", "" );
			exit(1);
#endif

			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		if ( bli_is_left( side ) )
			gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 );
		else
			gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 );

		if ( bli_is_complex( dt_a ) ) gflops *= 4.0;

#ifdef BLIS
		printf( "data_hemm_blis" );
#else
		printf( "data_hemm_%s", BLAS );
#endif
		printf( "( %2lu, 1:4 ) = [ %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )n, dtime_save, gflops );

		bli_obj_free( &alpha );
		bli_obj_free( &beta );

		bli_obj_free( &a );
		bli_obj_free( &b );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	bli_finalize();

	return 0;
}
Exemplo n.º 13
0
int main( int argc, char** argv )
{
    obj_t a, b, c;
    obj_t x, y;
    obj_t alpha, beta;
    dim_t m;
    num_t dt_a, dt_b, dt_c;
    num_t dt_alpha, dt_beta;
    int   ii;

#ifdef NBLIS
    bli_init();
#endif


    m = 4000;

    dt_a = BLIS_DOUBLE;
    dt_b = BLIS_DOUBLE;
    dt_c = BLIS_DOUBLE;
    dt_alpha = BLIS_DOUBLE;
    dt_beta = BLIS_DOUBLE;

    {


#ifdef NBLIS
        bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
        bli_obj_create( dt_beta,  1, 1, 0, 0, &beta );

        bli_obj_create( dt_a, m, 1, 0, 0, &x );
        bli_obj_create( dt_a, m, 1, 0, 0, &y );

        bli_obj_create( dt_a, m, m, 0, 0, &a );
        bli_obj_create( dt_b, m, m, 0, 0, &b );
        bli_obj_create( dt_c, m, m, 0, 0, &c );

        bli_randm( &a );
        bli_randm( &b );
        bli_randm( &c );

        bli_setsc(  (2.0/1.0), 0.0, &alpha );
        bli_setsc( -(1.0/1.0), 0.0, &beta );

#endif

#ifdef NBLAS
        x.buffer     = malloc( m * 1 * sizeof( double ) );
        y.buffer     = malloc( m * 1 * sizeof( double ) );

        alpha.buffer = malloc( 1 * sizeof( double ) );
        beta.buffer  = malloc( 1 * sizeof( double ) );
        a.buffer     = malloc( m * m * sizeof( double ) );
        a.m          = m;
        a.n          = m;
        a.cs         = m;
        b.buffer     = malloc( m * m * sizeof( double ) );
        b.m          = m;
        b.n          = m;
        b.cs         = m;
        c.buffer     = malloc( m * m * sizeof( double ) );
        c.m          = m;
        c.n          = m;
        c.cs         = m;

        *((double*)alpha.buffer) =  2.0;
        *((double*)beta.buffer)  = -1.0;
#endif


#ifdef NBLIS

#if NBLIS >= 1
        for ( ii = 0; ii < 2000000000; ++ii )
        {
            bli_gemm( &BLIS_ONE,
                      &a,
                      &b,
                      &BLIS_ONE,
                      &c );
        }
#endif

#if NBLIS >= 2
        {
            bli_hemm( BLIS_LEFT,
                      &BLIS_ONE,
                      &a,
                      &b,
                      &BLIS_ONE,
                      &c );
        }
#endif

#if NBLIS >= 3
        {
            bli_herk( &BLIS_ONE,
                      &a,
                      &BLIS_ONE,
                      &c );
        }
#endif

#if NBLIS >= 4
        {
            bli_her2k( &BLIS_ONE,
                       &a,
                       &b,
                       &BLIS_ONE,
                       &c );
        }
#endif

#if NBLIS >= 5
        {
            bli_trmm( BLIS_LEFT,
                      &BLIS_ONE,
                      &a,
                      &c );
        }
#endif

#if NBLIS >= 6
        {
            bli_trsm( BLIS_LEFT,
                      &BLIS_ONE,
                      &a,
                      &c );
        }
#endif

#endif



#ifdef NBLAS

#if NBLAS >= 1
        for ( ii = 0; ii < 2000000000; ++ii )
        {
            f77_char transa = 'N';
            f77_char transb = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  kk     = bli_obj_width_after_trans( a );
            f77_int  nn     = bli_obj_width( c );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldb    = bli_obj_col_stride( b );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  bp     = bli_obj_buffer( b );
            double*  betap  = bli_obj_buffer( beta );
            double*  cp     = bli_obj_buffer( c );

            dgemm_( &transa,
                    &transb,
                    &mm,
                    &nn,
                    &kk,
                    alphap,
                    ap, &lda,
                    bp, &ldb,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 2
        {
            f77_char side   = 'L';
            f77_char uplo   = 'L';
            f77_int  mm     = bli_obj_length( c );
            f77_int  nn     = bli_obj_width( c );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldb    = bli_obj_col_stride( b );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  bp     = bli_obj_buffer( b );
            double*  betap  = bli_obj_buffer( beta );
            double*  cp     = bli_obj_buffer( c );

            dsymm_( &side,
                    &uplo,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    bp, &ldb,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 3
        {
            f77_char uplo   = 'L';
            f77_char trans  = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  kk     = bli_obj_width( a );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  betap  = bli_obj_buffer( beta );
            double*  cp     = bli_obj_buffer( c );

            dsyrk_( &uplo,
                    &trans,
                    &mm,
                    &kk,
                    alphap,
                    ap, &lda,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 4
        {
            f77_char uplo   = 'L';
            f77_char trans  = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  kk     = bli_obj_width( a );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldb    = bli_obj_col_stride( b );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  bp     = bli_obj_buffer( b );
            double*  betap  = bli_obj_buffer( beta );
            double*  cp     = bli_obj_buffer( c );

            dsyr2k_( &uplo,
                     &trans,
                     &mm,
                     &kk,
                     alphap,
                     ap, &lda,
                     bp, &ldb,
                     betap,
                     cp, &ldc );
        }
#endif

#if NBLAS >= 5
        {
            f77_char side   = 'L';
            f77_char uplo   = 'L';
            f77_char trans  = 'N';
            f77_char diag   = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  nn     = bli_obj_width( c );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  cp     = bli_obj_buffer( c );

            dtrmm_( &side,
                    &uplo,
                    &trans,
                    &diag,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 6
        {
            f77_char side   = 'L';
            f77_char uplo   = 'L';
            f77_char trans  = 'N';
            f77_char diag   = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  nn     = bli_obj_width( c );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  cp     = bli_obj_buffer( c );

            dtrsm_( &side,
                    &uplo,
                    &trans,
                    &diag,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 7
        {
            f77_char  transa = 'N';
            f77_char  transb = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   kk     = bli_obj_width_after_trans( a );
            f77_int   nn     = bli_obj_width( c );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldb    = bli_obj_col_stride( b );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* bp     = bli_obj_buffer( b );
            dcomplex* betap  = bli_obj_buffer( beta );
            dcomplex* cp     = bli_obj_buffer( c );

            zgemm_( &transa,
                    &transb,
                    &mm,
                    &nn,
                    &kk,
                    alphap,
                    ap, &lda,
                    bp, &ldb,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 8
        {
            f77_char  side   = 'L';
            f77_char  uplo   = 'L';
            f77_int   mm     = bli_obj_length( c );
            f77_int   nn     = bli_obj_width( c );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldb    = bli_obj_col_stride( b );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* bp     = bli_obj_buffer( b );
            dcomplex* betap  = bli_obj_buffer( beta );
            dcomplex* cp     = bli_obj_buffer( c );

            zhemm_( &side,
                    &uplo,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    bp, &ldb,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 9
        {
            f77_char  uplo   = 'L';
            f77_char  trans  = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   kk     = bli_obj_width( a );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldc    = bli_obj_col_stride( c );
            double*   alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            double*   betap  = bli_obj_buffer( beta );
            dcomplex* cp     = bli_obj_buffer( c );

            zherk_( &uplo,
                    &trans,
                    &mm,
                    &kk,
                    alphap,
                    ap, &lda,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 10
        {
            f77_char  uplo   = 'L';
            f77_char  trans  = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   kk     = bli_obj_width( a );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldb    = bli_obj_col_stride( b );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* bp     = bli_obj_buffer( b );
            double*   betap  = bli_obj_buffer( beta );
            dcomplex* cp     = bli_obj_buffer( c );

            zher2k_( &uplo,
                     &trans,
                     &mm,
                     &kk,
                     alphap,
                     ap, &lda,
                     bp, &ldb,
                     betap,
                     cp, &ldc );
        }
#endif

#if NBLAS >= 11
        {
            f77_char  side   = 'L';
            f77_char  uplo   = 'L';
            f77_char  trans  = 'N';
            f77_char  diag   = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   nn     = bli_obj_width( c );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* cp     = bli_obj_buffer( c );

            ztrmm_( &side,
                    &uplo,
                    &trans,
                    &diag,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 12
        {
            f77_char  side   = 'L';
            f77_char  uplo   = 'L';
            f77_char  trans  = 'N';
            f77_char  diag   = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   nn     = bli_obj_width( c );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* cp     = bli_obj_buffer( c );

            ztrsm_( &side,
                    &uplo,
                    &trans,
                    &diag,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    cp, &ldc );
        }
#endif


#endif


#ifdef NBLIS
        bli_obj_free( &x );
        bli_obj_free( &y );

        bli_obj_free( &alpha );
        bli_obj_free( &beta );

        bli_obj_free( &a );
        bli_obj_free( &b );
        bli_obj_free( &c );
#endif

#ifdef NBLAS
        free( x.buffer );
        free( y.buffer );

        free( alpha.buffer );
        free( beta.buffer );

        free( a.buffer );
        free( b.buffer );
        free( c.buffer );
#endif
    }

#ifdef NBLIS
    bli_finalize();
#endif

    return 0;
}
Exemplo n.º 14
0
/* Subroutine */ int dsgt01_(integer *itype, char *uplo, integer *n, integer *
	m, doublereal *a, integer *lda, doublereal *b, integer *ldb, 
	doublereal *z__, integer *ldz, doublereal *d__, doublereal *work, 
	doublereal *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, z_dim1, z_offset, i__1;

    /* Local variables */
    static integer i__;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, 
	    integer *);
    static doublereal anorm;
    extern /* Subroutine */ int dsymm_(char *, char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *);
    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
	    integer *, doublereal *, integer *, doublereal *), 
	    dlansy_(char *, char *, integer *, doublereal *, integer *, 
	    doublereal *);
    static doublereal ulp;


#define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   

       modified August 1997, a new parameter M is added to the calling   
       sequence.   


    Purpose   
    =======   

    DDGT01 checks a decomposition of the form   

       A Z   =  B Z D or   
       A B Z =  Z D or   
       B A Z =  Z D   

    where A is a symmetric matrix, B is   
    symmetric positive definite, Z is orthogonal, and D is diagonal.   

    One of the following test ratios is computed:   

    ITYPE = 1:  RESULT(1) = | A Z - B Z D | / ( |A| |Z| n ulp )   

    ITYPE = 2:  RESULT(1) = | A B Z - Z D | / ( |A| |Z| n ulp )   

    ITYPE = 3:  RESULT(1) = | B A Z - Z D | / ( |A| |Z| n ulp )   

    Arguments   
    =========   

    ITYPE   (input) INTEGER   
            The form of the symmetric generalized eigenproblem.   
            = 1:  A*z = (lambda)*B*z   
            = 2:  A*B*z = (lambda)*z   
            = 3:  B*A*z = (lambda)*z   

    UPLO    (input) CHARACTER*1   
            Specifies whether the upper or lower triangular part of the   
            symmetric matrices A and B is stored.   
            = 'U':  Upper triangular   
            = 'L':  Lower triangular   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    M       (input) INTEGER   
            The number of eigenvalues found.  0 <= M <= N.   

    A       (input) DOUBLE PRECISION array, dimension (LDA, N)   
            The original symmetric matrix A.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    B       (input) DOUBLE PRECISION array, dimension (LDB, N)   
            The original symmetric positive definite matrix B.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    Z       (input) DOUBLE PRECISION array, dimension (LDZ, M)   
            The computed eigenvectors of the generalized eigenproblem.   

    LDZ     (input) INTEGER   
            The leading dimension of the array Z.  LDZ >= max(1,N).   

    D       (input) DOUBLE PRECISION array, dimension (M)   
            The computed eigenvalues of the generalized eigenproblem.   

    WORK    (workspace) DOUBLE PRECISION array, dimension (N*N)   

    RESULT  (output) DOUBLE PRECISION array, dimension (1)   
            The test ratio as described above.   

    =====================================================================   


       Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    z_dim1 = *ldz;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    --d__;
    --work;
    --result;

    /* Function Body */
    result[1] = 0.;
    if (*n <= 0) {
	return 0;
    }

    ulp = dlamch_("Epsilon");

/*     Compute product of 1-norms of A and Z. */

    anorm = dlansy_("1", uplo, n, &a[a_offset], lda, &work[1]) * dlange_("1", n, m, &z__[z_offset], ldz, &work[1]);
    if (anorm == 0.) {
	anorm = 1.;
    }

    if (*itype == 1) {

/*        Norm of AZ - BZD */

	dsymm_("Left", uplo, n, m, &c_b6, &a[a_offset], lda, &z__[z_offset], 
		ldz, &c_b7, &work[1], n);
	i__1 = *m;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    dscal_(n, &d__[i__], &z___ref(1, i__), &c__1);
/* L10: */
	}
	dsymm_("Left", uplo, n, m, &c_b6, &b[b_offset], ldb, &z__[z_offset], 
		ldz, &c_b12, &work[1], n);

	result[1] = dlange_("1", n, m, &work[1], n, &work[1]) / 
		anorm / (*n * ulp);

    } else if (*itype == 2) {

/*        Norm of ABZ - ZD */

	dsymm_("Left", uplo, n, m, &c_b6, &b[b_offset], ldb, &z__[z_offset], 
		ldz, &c_b7, &work[1], n);
	i__1 = *m;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    dscal_(n, &d__[i__], &z___ref(1, i__), &c__1);
/* L20: */
	}
	dsymm_("Left", uplo, n, m, &c_b6, &a[a_offset], lda, &work[1], n, &
		c_b12, &z__[z_offset], ldz);

	result[1] = dlange_("1", n, m, &z__[z_offset], ldz, &work[1]) / anorm / (*n * ulp);

    } else if (*itype == 3) {

/*        Norm of BAZ - ZD */

	dsymm_("Left", uplo, n, m, &c_b6, &a[a_offset], lda, &z__[z_offset], 
		ldz, &c_b7, &work[1], n);
	i__1 = *m;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    dscal_(n, &d__[i__], &z___ref(1, i__), &c__1);
/* L30: */
	}
	dsymm_("Left", uplo, n, m, &c_b6, &b[b_offset], ldb, &work[1], n, &
		c_b12, &z__[z_offset], ldz);

	result[1] = dlange_("1", n, m, &z__[z_offset], ldz, &work[1]) / anorm / (*n * ulp);
    }

    return 0;

/*     End of DDGT01 */

} /* dsgt01_ */
Exemplo n.º 15
0
void
dsymm(char side, char uplo, int m, int n, double alpha, double *a, int lda, double *b, int ldb, double beta, double *c, int ldc)
{
   dsymm_( &side, &uplo, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc);
}
Exemplo n.º 16
0
/* Subroutine */ int dsygst_(integer *itype, char *uplo, integer *n, 
	doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *
	info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    DSYGST reduces a real symmetric-definite generalized eigenproblem   
    to standard form.   

    If ITYPE = 1, the problem is A*x = lambda*B*x,   
    and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)   

    If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or   
    B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L.   

    B must have been previously factorized as U**T*U or L*L**T by DPOTRF.   

    Arguments   
    =========   

    ITYPE   (input) INTEGER   
            = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T);   
            = 2 or 3: compute U*A*U**T or L**T*A*L.   

    UPLO    (input) CHARACTER   
            = 'U':  Upper triangle of A is stored and B is factored as   
                    U**T*U;   
            = 'L':  Lower triangle of A is stored and B is factored as   
                    L*L**T.   

    N       (input) INTEGER   
            The order of the matrices A and B.  N >= 0.   

    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
            N-by-N upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of A contains the lower   
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   

            On exit, if INFO = 0, the transformed matrix, stored in the   
            same format as A.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    B       (input) DOUBLE PRECISION array, dimension (LDB,N)   
            The triangular factor from the Cholesky factorization of B,   
            as returned by DPOTRF.   

    LDB     (input) INTEGER   
            The leading dimension of the array B.  LDB >= max(1,N).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static doublereal c_b14 = 1.;
    static doublereal c_b16 = -.5;
    static doublereal c_b19 = -1.;
    static doublereal c_b52 = .5;
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
    /* Local variables */
    static integer k;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, 
	    integer *, integer *, doublereal *, doublereal *, integer *, 
	    doublereal *, integer *), dsymm_(
	    char *, char *, integer *, integer *, doublereal *, doublereal *, 
	    integer *, doublereal *, integer *, doublereal *, doublereal *, 
	    integer *);
    static logical upper;
    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, 
	    integer *, integer *, doublereal *, doublereal *, integer *, 
	    doublereal *, integer *), dsygs2_(
	    integer *, char *, integer *, doublereal *, integer *, doublereal 
	    *, integer *, integer *);
    static integer kb;
    extern /* Subroutine */ int dsyr2k_(char *, char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *);
    static integer nb;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (*itype < 1 || *itype > 3) {
	*info = -1;
    } else if (! upper && ! lsame_(uplo, "L")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    } else if (*lda < max(1,*n)) {
	*info = -5;
    } else if (*ldb < max(1,*n)) {
	*info = -7;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DSYGST", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "DSYGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);

    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	dsygs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info);
    } else {

/*        Use blocked code */

	if (*itype == 1) {
	    if (upper) {

/*              Compute inv(U')*A*inv(U) */

		i__1 = *n;
		i__2 = nb;
		for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the upper triangle of A(k:n,k:n) */

		    dsygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
			     ldb, info);
		    if (k + kb <= *n) {
			i__3 = *n - k - kb + 1;
			dtrsm_("Left", uplo, "Transpose", "Non-unit", &kb, &
				i__3, &c_b14, &b_ref(k, k), ldb, &a_ref(k, k 
				+ kb), lda);
			i__3 = *n - k - kb + 1;
			dsymm_("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k),
				 lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref(
				k, k + kb), lda);
			i__3 = *n - k - kb + 1;
			dsyr2k_(uplo, "Transpose", &i__3, &kb, &c_b19, &a_ref(
				k, k + kb), lda, &b_ref(k, k + kb), ldb, &
				c_b14, &a_ref(k + kb, k + kb), lda);
			i__3 = *n - k - kb + 1;
			dsymm_("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k),
				 lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref(
				k, k + kb), lda);
			i__3 = *n - k - kb + 1;
			dtrsm_("Right", uplo, "No transpose", "Non-unit", &kb,
				 &i__3, &c_b14, &b_ref(k + kb, k + kb), ldb, &
				a_ref(k, k + kb), lda);
		    }
/* L10: */
		}
	    } else {

/*              Compute inv(L)*A*inv(L') */

		i__2 = *n;
		i__1 = nb;
		for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the lower triangle of A(k:n,k:n) */

		    dsygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
			     ldb, info);
		    if (k + kb <= *n) {
			i__3 = *n - k - kb + 1;
			dtrsm_("Right", uplo, "Transpose", "Non-unit", &i__3, 
				&kb, &c_b14, &b_ref(k, k), ldb, &a_ref(k + kb,
				 k), lda);
			i__3 = *n - k - kb + 1;
			dsymm_("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k)
				, lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref(
				k + kb, k), lda);
			i__3 = *n - k - kb + 1;
			dsyr2k_(uplo, "No transpose", &i__3, &kb, &c_b19, &
				a_ref(k + kb, k), lda, &b_ref(k + kb, k), ldb,
				 &c_b14, &a_ref(k + kb, k + kb), lda);
			i__3 = *n - k - kb + 1;
			dsymm_("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k)
				, lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref(
				k + kb, k), lda);
			i__3 = *n - k - kb + 1;
			dtrsm_("Left", uplo, "No transpose", "Non-unit", &
				i__3, &kb, &c_b14, &b_ref(k + kb, k + kb), 
				ldb, &a_ref(k + kb, k), lda);
		    }
/* L20: */
		}
	    }
	} else {
	    if (upper) {

/*              Compute U*A*U' */

		i__1 = *n;
		i__2 = nb;
		for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */

		    i__3 = k - 1;
		    dtrmm_("Left", uplo, "No transpose", "Non-unit", &i__3, &
			    kb, &c_b14, &b[b_offset], ldb, &a_ref(1, k), lda);
		    i__3 = k - 1;
		    dsymm_("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), 
			    lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda);
		    i__3 = k - 1;
		    dsyr2k_(uplo, "No transpose", &i__3, &kb, &c_b14, &a_ref(
			    1, k), lda, &b_ref(1, k), ldb, &c_b14, &a[
			    a_offset], lda);
		    i__3 = k - 1;
		    dsymm_("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), 
			    lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda);
		    i__3 = k - 1;
		    dtrmm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb,
			     &c_b14, &b_ref(k, k), ldb, &a_ref(1, k), lda);
		    dsygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
			     ldb, info);
/* L30: */
		}
	    } else {

/*              Compute L'*A*L */

		i__2 = *n;
		i__1 = nb;
		for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {
/* Computing MIN */
		    i__3 = *n - k + 1;
		    kb = min(i__3,nb);

/*                 Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */

		    i__3 = k - 1;
		    dtrmm_("Right", uplo, "No transpose", "Non-unit", &kb, &
			    i__3, &c_b14, &b[b_offset], ldb, &a_ref(k, 1), 
			    lda);
		    i__3 = k - 1;
		    dsymm_("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), 
			    lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda);
		    i__3 = k - 1;
		    dsyr2k_(uplo, "Transpose", &i__3, &kb, &c_b14, &a_ref(k, 
			    1), lda, &b_ref(k, 1), ldb, &c_b14, &a[a_offset], 
			    lda);
		    i__3 = k - 1;
		    dsymm_("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), 
			    lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda);
		    i__3 = k - 1;
		    dtrmm_("Left", uplo, "Transpose", "Non-unit", &kb, &i__3, 
			    &c_b14, &b_ref(k, k), ldb, &a_ref(k, 1), lda);
		    dsygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k),
			     ldb, info);
/* L40: */
		}
	    }
	}
    }
    return 0;

/*     End of DSYGST */

} /* dsygst_ */
Exemplo n.º 17
0
/* Right-hand-side equation for density matrix using BLAS */
int RHS_DM_BLAS(realtype t, N_Vector y, N_Vector ydot, void * data) {

#ifdef DEBUGf_DM
  // file for density matrix coeff derivatives in time
  FILE * dmf;
  std::cout << "Creating output file for density matrix coefficient derivatives in time.\n";
  dmf = fopen("dmf.out", "a");
#endif

  // data is a pointer to the params struct
  Params * p;
  p = (Params *) data;

  // extract parameters from p
  double * H = &(p->H)[0];
  int N = p->NEQ;
  int N2 = p->NEQ2;

  // more compact notation for N_Vectors
  realtype * yp = N_VGetArrayPointer(y);
  realtype * ydotp = N_VGetArrayPointer(ydot);

  // update Hamiltonian if it is time-dependent
  if (p->torsion || p->laser_on) {
    // only update if at a new time point
    if ((t > 0.0) && (t != p->lastTime)) {
      updateHamiltonian(p, t);
      // update time point
      p->lastTime = t;
    }
  }

  // initialize ydot

#pragma omp parallel for
  for (int ii = 0; ii < 2*N2; ii++) {
    ydotp[ii] = 0.0;
  }

  char LEFT = 'l';
  char RGHT = 'r';
  char UP = 'l';
  double ONE = 1.0;
  double NEG = -1.0;
  double ZERO = 0.0;

  // Re(\dot{\rho}) += H*Im(\rho)
  dsymm_(&LEFT, &UP, &N, &N, &ONE, &H[0], &N, &yp[N2], &N, &ZERO, &ydotp[0], &N);

  // Re(\dot{\rho}) -= Im(\rho)*H
  dsymm_(&RGHT, &UP, &N, &N, &NEG, &H[0], &N, &yp[N2], &N, &ONE, &ydotp[0], &N);

  // Im(\dot{\rho}) += i*Re(\rho)*H
  dsymm_(&RGHT, &UP, &N, &N, &ONE, &H[0], &N, &yp[0], &N, &ONE, &ydotp[N2], &N);

  // Im(\dot{\rho}) -= i*H*Re(\rho)
  dsymm_(&LEFT, &UP, &N, &N, &NEG, &H[0], &N, &yp[0], &N, &ONE, &ydotp[N2], &N);

#ifdef DEBUGf_DM
  fprintf(dmf, "%+.7e", t);
  for (int ii = 0; ii < N; ii++) {
    for (int jj = 0; jj < N; jj++) {
      fprintf(dmf, " (%+.2e,%+.2e)", ydotp[ii*N + jj], ydotp[ii*N + jj + N2]);
    }
  }
  fprintf(dmf, "\n");

  std::cout << "Closing output file for density matrix coefficients in time.\n";
  fclose(dmf);
#endif

  return 0;
}
Exemplo n.º 18
0
int main( int argc, char** argv )
{
	obj_t a, b, c;
	obj_t c_save;
	obj_t alpha, beta;
	dim_t m, n;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input, n_input;
	num_t dt;
	int   r, n_repeats;
	side_t side;
	uplo_t uploa;
	f77_char f77_side;
	f77_char f77_uploa;

	double dtime;
	double dtime_save;
	double gflops;

	bli_init();

	//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

	n_repeats = 3;

#ifndef PRINT
	p_begin = 200;
	p_end   = 2000;
	p_inc   = 200;

	m_input = -1;
	n_input = -1;
#else
	p_begin = 16;
	p_end   = 16;
	p_inc   = 1;

	m_input = 4;
	n_input = 4;
#endif

#if 1
	//dt = BLIS_FLOAT;
	dt = BLIS_DOUBLE;
#else
	//dt = BLIS_SCOMPLEX;
	dt = BLIS_DCOMPLEX;
#endif

	side = BLIS_LEFT;
	//side = BLIS_RIGHT;

	uploa = BLIS_LOWER;
	//uploa = BLIS_UPPER;

	bli_param_map_blis_to_netlib_side( side, &f77_side );
	bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa );


	for ( p = p_begin; p <= p_end; p += p_inc )
	{
		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
		else               n =     ( dim_t )    n_input;

		bli_obj_create( dt, 1, 1, 0, 0, &alpha );
		bli_obj_create( dt, 1, 1, 0, 0, &beta );

		if ( bli_is_left( side ) )
			bli_obj_create( dt, m, m, 0, 0, &a );
		else
			bli_obj_create( dt, n, n, 0, 0, &a );
		bli_obj_create( dt, m, n, 0, 0, &b );
		bli_obj_create( dt, m, n, 0, 0, &c );
		bli_obj_create( dt, m, n, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &b );
		bli_randm( &c );

		bli_obj_set_struc( BLIS_HERMITIAN, a );
		bli_obj_set_uplo( uploa, a );

		// Randomize A, make it densely Hermitian, and zero the unstored
		// triangle to ensure the implementation reads only from the stored
		// region.
		bli_randm( &a );
		bli_mkherm( &a );
		bli_mktrim( &a );
/*
		bli_obj_toggle_uplo( a );
		bli_obj_inc_diag_off( 1, a );
		bli_setm( &BLIS_ZERO, &a );
		bli_obj_inc_diag_off( -1, a );
		bli_obj_toggle_uplo( a );
		bli_obj_set_diag( BLIS_NONUNIT_DIAG, a );
		bli_scalm( &BLIS_TWO, &a );
		bli_scalm( &BLIS_TWO, &a );
*/

		bli_setsc(  (2.0/1.0), 1.0, &alpha );
		bli_setsc( -(1.0/1.0), 0.0, &beta );


		bli_copym( &c, &c_save );
	
		dtime_save = 1.0e9;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();

#ifdef PRINT
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "b", &b, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS

			bli_hemm( side,
			          &alpha,
			          &a,
			          &b,
			          &beta,
			          &c );
#else

		if ( bli_is_float( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			float*   alphap = bli_obj_buffer( alpha );
			float*   ap     = bli_obj_buffer( a );
			float*   bp     = bli_obj_buffer( b );
			float*   betap  = bli_obj_buffer( beta );
			float*   cp     = bli_obj_buffer( c );

			ssymm_( &f77_side,
			        &f77_uploa,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_double( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			double*  alphap = bli_obj_buffer( alpha );
			double*  ap     = bli_obj_buffer( a );
			double*  bp     = bli_obj_buffer( b );
			double*  betap  = bli_obj_buffer( beta );
			double*  cp     = bli_obj_buffer( c );

			dsymm_( &f77_side,
			        &f77_uploa,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_scomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			scomplex*  alphap = bli_obj_buffer( alpha );
			scomplex*  ap     = bli_obj_buffer( a );
			scomplex*  bp     = bli_obj_buffer( b );
			scomplex*  betap  = bli_obj_buffer( beta );
			scomplex*  cp     = bli_obj_buffer( c );

			chemm_( &f77_side,
			        &f77_uploa,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_dcomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  nn     = bli_obj_width( c );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldb    = bli_obj_col_stride( b );
			f77_int  ldc    = bli_obj_col_stride( c );
			dcomplex*  alphap = bli_obj_buffer( alpha );
			dcomplex*  ap     = bli_obj_buffer( a );
			dcomplex*  bp     = bli_obj_buffer( b );
			dcomplex*  betap  = bli_obj_buffer( beta );
			dcomplex*  cp     = bli_obj_buffer( c );

			zhemm_( &f77_side,
			        &f77_uploa,
			        &mm,
			        &nn,
			        alphap,
			        ap, &lda,
			        bp, &ldb,
			        betap,
			        cp, &ldc );
		}
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%9.5f", "" );
			exit(1);
#endif

			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		if ( bli_is_left( side ) )
			gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 );
		else
			gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 );

		if ( bli_is_complex( dt ) ) gflops *= 4.0;

#ifdef BLIS
		printf( "data_hemm_blis" );
#else
		printf( "data_hemm_%s", BLAS );
#endif
		printf( "( %2lu, 1:4 ) = [ %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )n, dtime_save, gflops );

		bli_obj_free( &alpha );
		bli_obj_free( &beta );

		bli_obj_free( &a );
		bli_obj_free( &b );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	bli_finalize();

	return 0;
}
Exemplo n.º 19
0
/* Subroutine */
int dsposv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *work, real *swork, integer *iter, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, work_dim1, work_offset, x_dim1, x_offset, i__1;
    doublereal d__1;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    integer i__;
    doublereal cte, eps, anrm;
    integer ptsa;
    doublereal rnrm, xnrm;
    integer ptsx;
    extern logical lsame_(char *, char *);
    integer iiter;
    extern /* Subroutine */
    int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dlag2s_(integer *, integer *, doublereal *, integer *, real *, integer *, integer *), slag2d_(integer *, integer *, real *, integer *, doublereal *, integer *, integer *), dlat2s_(char *, integer *, doublereal *, integer *, real *, integer *, integer *);
    extern doublereal dlamch_(char *);
    extern integer idamax_(integer *, doublereal *, integer *);
    extern /* Subroutine */
    int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
    extern doublereal dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *);
    extern /* Subroutine */
    int dpotrf_(char *, integer *, doublereal *, integer *, integer *), dpotrs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), spotrf_(char *, integer *, real *, integer *, integer *), spotrs_(char *, integer *, integer *, real *, integer *, real *, integer *, integer *);
    /* -- LAPACK driver routine (version 3.4.0) -- */
    /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
    /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
    /* November 2011 */
    /* .. Scalar Arguments .. */
    /* .. */
    /* .. Array Arguments .. */
    /* .. */
    /* ===================================================================== */
    /* .. Parameters .. */
    /* .. Local Scalars .. */
    /* .. External Subroutines .. */
    /* .. */
    /* .. External Functions .. */
    /* .. */
    /* .. Intrinsic Functions .. */
    /* .. */
    /* .. Executable Statements .. */
    /* Parameter adjustments */
    work_dim1 = *n;
    work_offset = 1 + work_dim1;
    work -= work_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1;
    b -= b_offset;
    x_dim1 = *ldx;
    x_offset = 1 + x_dim1;
    x -= x_offset;
    --swork;
    /* Function Body */
    *info = 0;
    *iter = 0;
    /* Test the input parameters. */
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L"))
    {
        *info = -1;
    }
    else if (*n < 0)
    {
        *info = -2;
    }
    else if (*nrhs < 0)
    {
        *info = -3;
    }
    else if (*lda < max(1,*n))
    {
        *info = -5;
    }
    else if (*ldb < max(1,*n))
    {
        *info = -7;
    }
    else if (*ldx < max(1,*n))
    {
        *info = -9;
    }
    if (*info != 0)
    {
        i__1 = -(*info);
        xerbla_("DSPOSV", &i__1);
        return 0;
    }
    /* Quick return if (N.EQ.0). */
    if (*n == 0)
    {
        return 0;
    }
    /* Skip single precision iterative refinement if a priori slower */
    /* than double precision factorization. */
    if (FALSE_)
    {
        *iter = -1;
        goto L40;
    }
    /* Compute some constants. */
    anrm = dlansy_("I", uplo, n, &a[a_offset], lda, &work[work_offset]);
    eps = dlamch_("Epsilon");
    cte = anrm * eps * sqrt((doublereal) (*n)) * 1.;
    /* Set the indices PTSA, PTSX for referencing SA and SX in SWORK. */
    ptsa = 1;
    ptsx = ptsa + *n * *n;
    /* Convert B from double precision to single precision and store the */
    /* result in SX. */
    dlag2s_(n, nrhs, &b[b_offset], ldb, &swork[ptsx], n, info);
    if (*info != 0)
    {
        *iter = -2;
        goto L40;
    }
    /* Convert A from double precision to single precision and store the */
    /* result in SA. */
    dlat2s_(uplo, n, &a[a_offset], lda, &swork[ptsa], n, info);
    if (*info != 0)
    {
        *iter = -2;
        goto L40;
    }
    /* Compute the Cholesky factorization of SA. */
    spotrf_(uplo, n, &swork[ptsa], n, info);
    if (*info != 0)
    {
        *iter = -3;
        goto L40;
    }
    /* Solve the system SA*SX = SB. */
    spotrs_(uplo, n, nrhs, &swork[ptsa], n, &swork[ptsx], n, info);
    /* Convert SX back to double precision */
    slag2d_(n, nrhs, &swork[ptsx], n, &x[x_offset], ldx, info);
    /* Compute R = B - AX (R is WORK). */
    dlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n);
    dsymm_("Left", uplo, n, nrhs, &c_b10, &a[a_offset], lda, &x[x_offset], ldx, &c_b11, &work[work_offset], n);
    /* Check whether the NRHS normwise backward errors satisfy the */
    /* stopping criterion. If yes, set ITER=0 and return. */
    i__1 = *nrhs;
    for (i__ = 1;
            i__ <= i__1;
            ++i__)
    {
        xnrm = (d__1 = x[idamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * x_dim1], f2c_abs(d__1));
        rnrm = (d__1 = work[idamax_(n, &work[i__ * work_dim1 + 1], &c__1) + i__ * work_dim1], f2c_abs(d__1));
        if (rnrm > xnrm * cte)
        {
            goto L10;
        }
    }
    /* If we are here, the NRHS normwise backward errors satisfy the */
    /* stopping criterion. We are good to exit. */
    *iter = 0;
    return 0;
L10:
    for (iiter = 1;
            iiter <= 30;
            ++iiter)
    {
        /* Convert R (in WORK) from double precision to single precision */
        /* and store the result in SX. */
        dlag2s_(n, nrhs, &work[work_offset], n, &swork[ptsx], n, info);
        if (*info != 0)
        {
            *iter = -2;
            goto L40;
        }
        /* Solve the system SA*SX = SR. */
        spotrs_(uplo, n, nrhs, &swork[ptsa], n, &swork[ptsx], n, info);
        /* Convert SX back to double precision and update the current */
        /* iterate. */
        slag2d_(n, nrhs, &swork[ptsx], n, &work[work_offset], n, info);
        i__1 = *nrhs;
        for (i__ = 1;
                i__ <= i__1;
                ++i__)
        {
            daxpy_(n, &c_b11, &work[i__ * work_dim1 + 1], &c__1, &x[i__ * x_dim1 + 1], &c__1);
        }
        /* Compute R = B - AX (R is WORK). */
        dlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n);
        dsymm_("L", uplo, n, nrhs, &c_b10, &a[a_offset], lda, &x[x_offset], ldx, &c_b11, &work[work_offset], n);
        /* Check whether the NRHS normwise backward errors satisfy the */
        /* stopping criterion. If yes, set ITER=IITER>0 and return. */
        i__1 = *nrhs;
        for (i__ = 1;
                i__ <= i__1;
                ++i__)
        {
            xnrm = (d__1 = x[idamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * x_dim1], f2c_abs(d__1));
            rnrm = (d__1 = work[idamax_(n, &work[i__ * work_dim1 + 1], &c__1) + i__ * work_dim1], f2c_abs(d__1));
            if (rnrm > xnrm * cte)
            {
                goto L20;
            }
        }
        /* If we are here, the NRHS normwise backward errors satisfy the */
        /* stopping criterion, we are good to exit. */
        *iter = iiter;
        return 0;
L20: /* L30: */
        ;
    }
    /* If we are at this place of the code, this is because we have */
    /* performed ITER=ITERMAX iterations and never satisified the */
    /* stopping criterion, set up the ITER flag accordingly and follow */
    /* up on double precision routine. */
    *iter = -31;
L40: /* Single-precision iterative refinement failed to converge to a */
    /* satisfactory solution, so we resort to double precision. */
    dpotrf_(uplo, n, &a[a_offset], lda, info);
    if (*info != 0)
    {
        return 0;
    }
    dlacpy_("All", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx);
    dpotrs_(uplo, n, nrhs, &a[a_offset], lda, &x[x_offset], ldx, info);
    return 0;
    /* End of DSPOSV. */
}