Ejemplo n.º 1
0
FLA_Error REF_Syrk_ln( FLA_Obj A, FLA_Obj C )
{
  FLA_Datatype datatype;
  int          k, m, ldim_A, ldim_C;

  datatype = FLA_Obj_datatype( A );
  ldim_A   = FLA_Obj_ldim( A );
  ldim_C   = FLA_Obj_ldim( C );
  k        = FLA_Obj_width( A );
  m        = FLA_Obj_length( A );
  
  switch( datatype ){
    case FLA_DOUBLE:
    {
      double *buff_A, *buff_C, d_one=1.0;

      buff_A = ( double * ) FLA_Obj_buffer_at_view( A );
      buff_C = ( double * ) FLA_Obj_buffer_at_view( C );
    
      dsyrk_( "L", "N", &m, &k,
              &d_one, buff_A, &ldim_A, &d_one, buff_C, &ldim_C );
    } break;
  }
  
  return 0;
}
Ejemplo n.º 2
0
/*
 * 2pdm kernel for  beta^i beta_j | ci0 >
 */
void FCIrdm12kern_b(double *rdm1, double *rdm2, double *bra, double *ket,
                    int bcount, int stra_id, int strb_id,
                    int norb, int na, int nb, int nlinka, int nlinkb,
                    _LinkT *clink_indexa, _LinkT *clink_indexb, int symm)
{
        const int INC1 = 1;
        const char UP = 'U';
        const char TRANS_N = 'N';
        const char TRANS_T = 'T';
        const double D1 = 1;
        const int nnorb = norb * norb;
        double csum;
        double *buf = calloc(nnorb*bcount, sizeof(double));

        csum = FCIrdm2_b_t1ci(ket, buf, bcount, stra_id, strb_id,
                              norb, nb, nlinkb, clink_indexb);
        if (csum > CSUMTHR) {
                dgemv_(&TRANS_N, &nnorb, &bcount, &D1, buf, &nnorb,
                       ket+stra_id*nb+strb_id, &INC1, &D1, rdm1, &INC1);
                switch (symm) {
                case BRAKETSYM:
                        dsyrk_(&UP, &TRANS_N, &nnorb, &bcount,
                               &D1, buf, &nnorb, &D1, rdm2, &nnorb);
                        break;
                case PARTICLESYM:
                        tril_particle_symm(rdm2, buf, buf, bcount, norb, 1, 1);
                        break;
                default:
                        dgemm_(&TRANS_N, &TRANS_T, &nnorb, &nnorb, &bcount,
                               &D1, buf, &nnorb, buf, &nnorb,
                               &D1, rdm2, &nnorb);
                }
        }
        free(buf);
}
Ejemplo n.º 3
0
/*
 * _spin0 assumes the strict symmetry on alpha and beta electrons
 */
void FCIrdm12kern_spin0(double *rdm1, double *rdm2, double *bra, double *ket,
                        int bcount, int stra_id, int strb_id,
                        int norb, int na, int nb, int nlinka, int nlinkb,
                        _LinkT *clink_indexa, _LinkT *clink_indexb, int symm)
{
        if (stra_id < strb_id) {
                return;
        }
        const int INC1 = 1;
        const char UP = 'U';
        const char TRANS_N = 'N';
        const char TRANS_T = 'T';
        const double D1 = 1;
        const double D2 = 2;
        const int nnorb = norb * norb;
        int fill0, fill1, i;
        double csum = 0;
        double *buf = calloc(nnorb * na, sizeof(double));

        if (strb_id+bcount <= stra_id) {
                fill0 = bcount;
                fill1 = bcount;
                csum = FCIrdm2_b_t1ci(ket, buf, fill0, stra_id, strb_id,
                                      norb, na, nlinka, clink_indexa)
                     + FCIrdm2_a_t1ci(ket, buf, fill1, stra_id, strb_id,
                                      norb, na, nlinka, clink_indexa);
        } else if (stra_id >= strb_id) {
                fill0 = stra_id - strb_id;
                fill1 = stra_id - strb_id + 1;
                csum = FCIrdm2_b_t1ci(ket, buf, fill0, stra_id, strb_id,
                                      norb, na, nlinka, clink_indexa)
                     + FCIrdm2_a_t1ci(ket, buf, fill1, stra_id, strb_id,
                                      norb, na, nlinka, clink_indexa);
        }
        if (csum > CSUMTHR) {
                dgemv_(&TRANS_N, &nnorb, &fill1, &D2, buf, &nnorb,
                       ket+stra_id*na+strb_id, &INC1, &D1, rdm1, &INC1);

                for (i = fill0*nnorb; i < fill1*nnorb; i++) {
                        buf[i] *= SQRT2;
                }
                switch (symm) {
                case BRAKETSYM:
                        dsyrk_(&UP, &TRANS_N, &nnorb, &fill1,
                               &D2, buf, &nnorb, &D1, rdm2, &nnorb);
                        break;
                case PARTICLESYM:
                        tril_particle_symm(rdm2, buf, buf, fill1, norb, D2, D1);
                        break;
                default:
                        dgemm_(&TRANS_N, &TRANS_T, &nnorb, &nnorb, &fill1,
                               &D2, buf, &nnorb, buf, &nnorb,
                               &D1, rdm2, &nnorb);
                }
        }
        free(buf);
}
Ejemplo n.º 4
0
int
f2c_dsyrk(char* uplo, char* trans, integer* N, integer* K,
          doublereal* alpha,
          doublereal* A, integer* lda,
          doublereal* beta,
          doublereal* C, integer* ldc)
{
    dsyrk_(uplo, trans, N, K,
           alpha, A, lda, beta, C, ldc);
    return 0;
}
Ejemplo n.º 5
0
/* ------------------------------------------------------------------ */
void
mim_ip_span_nullspace(int nf, int nconn, int d,
                      double *C,
                      double *A,
                      double *X,
                      double *work, int nwork)
/* ------------------------------------------------------------------ */
{
    MAT_SIZE_T m, n, k, ldC, ldX, info, lwork;

    int    i, j;
    double a1, a2;

    double tau[3] = { 0.0 };  /* No more than 3 spatial dimensions */

    /* Step 1) X(1:nf, 1:nf) <- I_{nf} */
    for (j = 0; j < nf; j++) {
        for (i = 0; i < nf; i++) {
            X[i + j*nconn] = 0.0;
        }
        X[j * (nconn + 1)] = 1.0;
    }

    /* Step 2) C <- orth(A * C) */
    for (j = 0; j < d; j++) {
        for (i = 0; i < nf; i++) {
            C[i + j*nf] *= A[i];
        }
    }

    m = nf;  n = d;  ldC = nf;  k = d;  lwork = nwork;
    dgeqrf_(&m, &n,     C, &ldC, tau, work, &lwork, &info);
    dorgqr_(&m, &n, &k, C, &ldC, tau, work, &lwork, &info);

    /* Step 3) X <- A * (X - C*C') * A */
    ldX = nconn;
    a1 = -1.0;  a2 = 1.0;
    dsyrk_("Upper Triangular", "No Transpose",
           &m, &n, &a1, C, &ldC, &a2, X, &ldX);
    for (j = 0; j < nf; j++) {
        for (i = 0; i <= j; i++) {
            X[i + j*nconn] *= A[i] * A[j];
        }
    }

    /* Account for DSYRK only assigning upper triangular part. */
    for (j = 0; j < nf; j++) {
        for (i = j + 1; i < nf; i++) {
            X[i + j*nconn] = X[j + i*nconn];
        }
    }
}
Ejemplo n.º 6
0
 void dsyrk(const UPLO Uplo,
            const TRANSPOSE Trans,
            const int N,
            const int K,
            const double alpha,
            const double *A,
            const int lda,
            const double beta,
            double *C,
            const int ldc) {
   dsyrk_(UploChar[Uplo], TransposeChar[Trans],
          &N, &K, &alpha, A, &lda, &beta, C, &ldc);
 }
Ejemplo n.º 7
0
void DenseSymMatrix::atRankkUpdate( double alpha, double beta, DenseGenMatrix& U, int trans)
{
  int n, k; int ldu, lda;
  //-----------------------------------------------
  // setup if the U is stored in column-major form
  // (FORTRAN Style)
  //   char UPLO  = 'U'; char TRANS = trans==0?'N':'T';
  
  //   U.getSize(n,k); ldu=n;
  //   if(trans) k=n;
  
  //   n = mStorage->n; 
  //   lda=n;
  //----------------------------------------------

  // U and 'this' are stored in row-major form -> a little change in passing params to FORTRAN is needed
  char UPLO  = 'U'; //update LOWER triagular part for symmetric matrix 'this'
  //trans=1 -> this += U'*U -> tell BLAS to do U*U'
  //trans=0 -> this += U*U' -> tell BLAS to do U'*U
  char TRANS = trans==0?'T':'N';  

  int m;
  U.getSize(m,k);
  ldu=k; // change leading dim so that U in row-major  in col-major
  if(trans) k=m;

  n = mStorage->n; 
  lda=n;

#ifdef DEBUG
  //TRANS = 'N', k specifies the number of columns of the matrix U
  //we pass U' instead of U, so k should be the number of rows of U
  int r,c; U.getSize(rll,cll);
  if(TRANS=='N') assert(k==r);
  else if(TRANS=='T') assert(k==c);
  else assert(false);
#endif


  dsyrk_(&UPLO, &TRANS,
	 &n, &k,
	 &beta,   &U.getStorageRef().M[0][0], &ldu,
	 &alpha,  &mStorage->M[0][0],       &lda);

}
Ejemplo n.º 8
0
/*
 * dm(pq,rs) * [p(beta)^+ q(alpha) r(alpha)^+ s(beta)]
 */
void FCIdm2_baab_kern(double *rdm1, double *rdm2, double *bra, double *ket,
                      int stra_id, int norb, int na, int nb,
                      int neleca, int nelecb,
                      int *ades_index, int *bcre_index)
{
        const char UP = 'U';
        const char TRANS_N = 'N';
        const double D1 = 1;
        const int nnorb = norb * norb;
        const int instrb = nb * nelecb / (norb-nelecb+1);
        double csum;
        double *buf = calloc(nnorb * instrb, sizeof(double));

        csum = ades_bcre_t1(ket, buf, instrb, stra_id, norb, nb,
                            neleca, nelecb, ades_index, bcre_index);
        if (csum > CSUMTHR) {
                dsyrk_(&UP, &TRANS_N, &nnorb, &instrb,
                       &D1, buf, &nnorb, &D1, rdm2, &nnorb);
        }
        free(buf);
}
Ejemplo n.º 9
0
void syrkBase( double *C, double *A, int n, double alpha ) {
  double *temp = (double*) malloc( n*n*sizeof(double) );
  startTimer(TIMER_REARRANGE_SYRK);
  double *Cp = C;
  for( int c = 0; c < n; c++ )
    for( int r = c; r < n; r++ )
      temp[c*n+r] = *(Cp++);
  char L = 'L', N = 'N';
  double none = -1., one=alpha;
  stopTimer(TIMER_REARRANGE_SYRK);
  startTimer(TIMER_BASE_SYRK);
  dsyrk_(&L, &N, &n, &n, &none, A, &n, &one, temp, &n);
  stopTimer(TIMER_BASE_SYRK);
  startTimer(TIMER_REARRANGE_SYRK);
  Cp = C;
  for( int c = 0; c < n; c++ )
    for( int r = c; r < n; r++ )
      *(Cp++) = temp[c*n+r];
  stopTimer(TIMER_REARRANGE_SYRK);
  free(temp);
}
Ejemplo n.º 10
0
/*
 * dm(pq,rs) * [p(alpha)^+ q(beta) r(beta)^+ s(alpha)]
 */
void FCIdm2_abba_kern(double *rdm1, double *rdm2, double *bra, double *ket,
                      int stra_id, int norb, int na, int nb,
                      int neleca, int nelecb,
                      int *acre_index, int *bdes_index)
{
        const char UP = 'U';
        const char TRANS_N = 'N';
        const double D1 = 1;
        const int nnorb = norb * norb;
        const int instrb = nb * (norb-nelecb) / (nelecb+1);
        double csum;
        double *buf = malloc(sizeof(double) * nnorb * instrb);

        memset(buf, 0, sizeof(double)*nnorb*instrb);
        csum = acre_bdes_t1(ket, buf, instrb, stra_id, norb, nb,
                            neleca, nelecb, acre_index, bdes_index);
        if (csum > CSUMTHR) {
                dsyrk_(&UP, &TRANS_N, &nnorb, &instrb,
                       &D1, buf, &nnorb, &D1, rdm2, &nnorb);
        }
        free(buf);
}
/**
 * Compute the schur complement adjacency matrix.
 * \param[inout] adjacency_matrix The adjacency represenation of the graph.
 */
static double compute_schur_complement (std::vector<double>& adjacency_matrix) {
  int matrix_dimension = static_cast<int>(sqrt (adjacency_matrix.size()));
  int submatrix_dimension = matrix_dimension-2;
  int element_size=2;
  double ONE=1.0;
  double MINUS_ONE=-1.0;

  assert (0<submatrix_dimension);

  char side = 'L';
  char uplo = 'U';
  char transA = 'T';
  char diag = 'N';

  dtrsm_(&side,    /* side */
         &uplo,    /* uplo */
         &transA,  /* transA */
         &diag,    /* diag */
         &submatrix_dimension, /* M */
         &element_size,        /* N */
         &ONE,                 /* alpha */
         &(adjacency_matrix[0]),  /* A */
         &matrix_dimension,       /* lda */
   &(adjacency_matrix[submatrix_dimension*matrix_dimension]), /*B*/
         &matrix_dimension);      /* ldb */

  dsyrk_(&uplo,         /* uplo */
         &transA,       /* trans */
         &element_size, /* N */
         &submatrix_dimension, /* K */
         &MINUS_ONE,           /* alpha */
   &(adjacency_matrix[submatrix_dimension*matrix_dimension]), /* A */
         &matrix_dimension, /* lda */
         &ONE, /* beta */
   &(adjacency_matrix[submatrix_dimension*(matrix_dimension+1)]), /* C */
         &matrix_dimension);   /* ldc */

  return 1.0/(adjacency_matrix[submatrix_dimension*(matrix_dimension+1)]);
}
Ejemplo n.º 12
0
PyObject* rk(PyObject *self, PyObject *args)
{
  double alpha;
  PyArrayObject* a;
  double beta;
  PyArrayObject* c;
  if (!PyArg_ParseTuple(args, "dOdO", &alpha, &a, &beta, &c))
    return NULL;
  int n = PyArray_DIMS(a)[0];
  int k = PyArray_DIMS(a)[1];
  for (int d = 2; d < PyArray_NDIM(a); d++)
    k *= PyArray_DIMS(a)[d];
  int ldc = PyArray_STRIDES(c)[0] / PyArray_STRIDES(c)[1];
  if (PyArray_DESCR(a)->type_num == NPY_DOUBLE)
    dsyrk_("u", "t", &n, &k,
           &alpha, DOUBLEP(a), &k, &beta,
           DOUBLEP(c), &ldc);
  else
    zherk_("u", "c", &n, &k,
           &alpha, (void*)COMPLEXP(a), &k, &beta,
           (void*)COMPLEXP(c), &ldc);
  Py_RETURN_NONE;
}
Ejemplo n.º 13
0
// computes C -= A*A^t, where C is symmetric, half stored, A is general
void syrk( double *C, double *A, int n ) {
  // base case
  if( n <= nmin ) {
    double *temp = (double*) malloc( n*n*sizeof(double) );
    double *Cp = C;
    for( int c = 0; c < n; c++ )
      for( int r = c; r < n; r++ )
	temp[c*n+r] = *(Cp++);
    char L = 'L', N = 'N';
    double none = -1., one = 1.;
    dsyrk_(&L, &N, &n, &n, &none, A, &n, &one, temp, &n);
    Cp = C;
    for( int c = 0; c < n; c++ )
      for( int r = c; r < n; r++ )
	*(Cp++) = temp[c*n+r];
    free(temp);
    //C[0] -= A[0]*A[0];
    return;
  }
  int nhalf = n/2;
  double *C11 = C;
  double *C21 = C + nhalf*(nhalf+1)/2;
  double *C22 = C21 + nhalf*nhalf;
  double *A11 = A;
  double *A21 = A+nhalf*nhalf;
  double *A12 = A21+nhalf*nhalf;
  double *A22 = A12+nhalf*nhalf;
  
  // these can be made independent with the use of some intermediates, and some final additions
  syrk( C11, A11, nhalf );
  syrk( C11, A12, nhalf );
  mult( C21, A21, A11, nhalf ); // This will do C21 = C21-A21*A11^t
  mult( C21, A22, A12, nhalf );
  syrk( C22, A21, nhalf );
  syrk( C22, A22, nhalf );
}
Ejemplo n.º 14
0
int chol_blocked(double *A, int n, int b){

  int i,j,k,m;
  int info;
  const double one = 1.0;
  const double minusone = -1.0;
  double t1, t2;

  for(k = 0; k < n; k += b){
    m=min( n-k, b );
    dpotrf_("L", &m, &A[k*n+k], &n, &info);
    
    if( info < 0 ) {
      fprintf(stderr,"Error in the Cholesky decomposition \nInvalid argument");
      exit(-1);
    }
    if( info > 0 ) {
      fprintf(stderr,"Error in the Cholesky decomposition \nMatrix isn't positive definite");
      exit(-1);
    }

    for(i = k+b; i < n; i += b){
      m=min( n-i, b );  
      dtrsm_("R","L","T","N", &m, &b, &one, &A[k*n+k], &n, &A[i+k*n], &n);
    }
    
    for(i = k + b; i < n; i += b){
      m=min( n-i, b ); 
      for(j = k + b; j <= i - 1; j += b){
       dgemm_("N","T", &m, &b, &b, &minusone, &A[i+k*n], &n, &A[j+k*n], &n, &one, &A[j*n+i], &n);
    }
    dsyrk_("L","N",&m, &b, &minusone, &A[i+k*n], &n, &one, &A[i*n+i], &n);
    }
  }
  return 0;
}
Ejemplo n.º 15
0
/* Subroutine */ int dgqrts_(integer *n, integer *m, integer *p, doublereal *
	a, doublereal *af, doublereal *q, doublereal *r__, integer *lda, 
	doublereal *taua, doublereal *b, doublereal *bf, doublereal *z__, 
	doublereal *t, doublereal *bwk, integer *ldb, doublereal *taub, 
	doublereal *work, integer *lwork, doublereal *rwork, doublereal *
	result)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, bf_dim1, 
	    bf_offset, bwk_dim1, bwk_offset, q_dim1, q_offset, r_dim1, 
	    r_offset, t_dim1, t_offset, z_dim1, z_offset, i__1, i__2;
    doublereal d__1;

    /* Local variables */
    static integer info;
    static doublereal unfl;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
	    integer *, doublereal *, doublereal *, integer *);
    static doublereal resid, anorm, bnorm;
    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
	     integer *);
    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
	    integer *, doublereal *, integer *, doublereal *);
    extern /* Subroutine */ int dggqrf_(integer *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
	    doublereal *, doublereal *, integer *, integer *), dlacpy_(char *,
	     integer *, integer *, doublereal *, integer *, doublereal *, 
	    integer *), dlaset_(char *, integer *, integer *, 
	    doublereal *, doublereal *, doublereal *, integer *);
    extern doublereal dlansy_(char *, char *, integer *, doublereal *, 
	    integer *, doublereal *);
    extern /* Subroutine */ int dorgqr_(integer *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
	    integer *), dorgrq_(integer *, integer *, integer *, doublereal *,
	     integer *, doublereal *, doublereal *, integer *, integer *);
    static doublereal ulp;


#define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1]
#define t_ref(a_1,a_2) t[(a_2)*t_dim1 + a_1]
#define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
#define af_ref(a_1,a_2) af[(a_2)*af_dim1 + a_1]
#define bf_ref(a_1,a_2) bf[(a_2)*bf_dim1 + a_1]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    DGQRTS tests DGGQRF, which computes the GQR factorization of an   
    N-by-M matrix A and a N-by-P matrix B: A = Q*R and B = Q*T*Z.   

    Arguments   
    =========   

    N       (input) INTEGER   
            The number of rows of the matrices A and B.  N >= 0.   

    M       (input) INTEGER   
            The number of columns of the matrix A.  M >= 0.   

    P       (input) INTEGER   
            The number of columns of the matrix B.  P >= 0.   

    A       (input) DOUBLE PRECISION array, dimension (LDA,M)   
            The N-by-M matrix A.   

    AF      (output) DOUBLE PRECISION array, dimension (LDA,N)   
            Details of the GQR factorization of A and B, as returned   
            by DGGQRF, see SGGQRF for further details.   

    Q       (output) DOUBLE PRECISION array, dimension (LDA,N)   
            The M-by-M orthogonal matrix Q.   

    R       (workspace) DOUBLE PRECISION array, dimension (LDA,MAX(M,N))   

    LDA     (input) INTEGER   
            The leading dimension of the arrays A, AF, R and Q.   
            LDA >= max(M,N).   

    TAUA    (output) DOUBLE PRECISION array, dimension (min(M,N))   
            The scalar factors of the elementary reflectors, as returned   
            by DGGQRF.   

    B       (input) DOUBLE PRECISION array, dimension (LDB,P)   
            On entry, the N-by-P matrix A.   

    BF      (output) DOUBLE PRECISION array, dimension (LDB,N)   
            Details of the GQR factorization of A and B, as returned   
            by DGGQRF, see SGGQRF for further details.   

    Z       (output) DOUBLE PRECISION array, dimension (LDB,P)   
            The P-by-P orthogonal matrix Z.   

    T       (workspace) DOUBLE PRECISION array, dimension (LDB,max(P,N))   

    BWK     (workspace) DOUBLE PRECISION array, dimension (LDB,N)   

    LDB     (input) INTEGER   
            The leading dimension of the arrays B, BF, Z and T.   
            LDB >= max(P,N).   

    TAUB    (output) DOUBLE PRECISION array, dimension (min(P,N))   
            The scalar factors of the elementary reflectors, as returned   
            by DGGRQF.   

    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)   

    LWORK   (input) INTEGER   
            The dimension of the array WORK, LWORK >= max(N,M,P)**2.   

    RWORK   (workspace) DOUBLE PRECISION array, dimension (max(N,M,P))   

    RESULT  (output) DOUBLE PRECISION array, dimension (4)   
            The test ratios:   
              RESULT(1) = norm( R - Q'*A ) / ( MAX(M,N)*norm(A)*ULP)   
              RESULT(2) = norm( T*Z - Q'*B ) / (MAX(P,N)*norm(B)*ULP)   
              RESULT(3) = norm( I - Q'*Q ) / ( M*ULP )   
              RESULT(4) = norm( I - Z'*Z ) / ( P*ULP )   

    =====================================================================   


       Parameter adjustments */
    r_dim1 = *lda;
    r_offset = 1 + r_dim1 * 1;
    r__ -= r_offset;
    q_dim1 = *lda;
    q_offset = 1 + q_dim1 * 1;
    q -= q_offset;
    af_dim1 = *lda;
    af_offset = 1 + af_dim1 * 1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --taua;
    bwk_dim1 = *ldb;
    bwk_offset = 1 + bwk_dim1 * 1;
    bwk -= bwk_offset;
    t_dim1 = *ldb;
    t_offset = 1 + t_dim1 * 1;
    t -= t_offset;
    z_dim1 = *ldb;
    z_offset = 1 + z_dim1 * 1;
    z__ -= z_offset;
    bf_dim1 = *ldb;
    bf_offset = 1 + bf_dim1 * 1;
    bf -= bf_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    --taub;
    --work;
    --rwork;
    --result;

    /* Function Body */
    ulp = dlamch_("Precision");
    unfl = dlamch_("Safe minimum");

/*     Copy the matrix A to the array AF. */

    dlacpy_("Full", n, m, &a[a_offset], lda, &af[af_offset], lda);
    dlacpy_("Full", n, p, &b[b_offset], ldb, &bf[bf_offset], ldb);

/* Computing MAX */
    d__1 = dlange_("1", n, m, &a[a_offset], lda, &rwork[1]);
    anorm = max(d__1,unfl);
/* Computing MAX */
    d__1 = dlange_("1", n, p, &b[b_offset], ldb, &rwork[1]);
    bnorm = max(d__1,unfl);

/*     Factorize the matrices A and B in the arrays AF and BF. */

    dggqrf_(n, m, p, &af[af_offset], lda, &taua[1], &bf[bf_offset], ldb, &
	    taub[1], &work[1], lwork, &info);

/*     Generate the N-by-N matrix Q */

    dlaset_("Full", n, n, &c_b9, &c_b9, &q[q_offset], lda);
    i__1 = *n - 1;
    dlacpy_("Lower", &i__1, m, &af_ref(2, 1), lda, &q_ref(2, 1), lda);
    i__1 = min(*n,*m);
    dorgqr_(n, n, &i__1, &q[q_offset], lda, &taua[1], &work[1], lwork, &info);

/*     Generate the P-by-P matrix Z */

    dlaset_("Full", p, p, &c_b9, &c_b9, &z__[z_offset], ldb);
    if (*n <= *p) {
	if (*n > 0 && *n < *p) {
	    i__1 = *p - *n;
	    dlacpy_("Full", n, &i__1, &bf[bf_offset], ldb, &z___ref(*p - *n + 
		    1, 1), ldb);
	}
	if (*n > 1) {
	    i__1 = *n - 1;
	    i__2 = *n - 1;
	    dlacpy_("Lower", &i__1, &i__2, &bf_ref(2, *p - *n + 1), ldb, &
		    z___ref(*p - *n + 2, *p - *n + 1), ldb);
	}
    } else {
	if (*p > 1) {
	    i__1 = *p - 1;
	    i__2 = *p - 1;
	    dlacpy_("Lower", &i__1, &i__2, &bf_ref(*n - *p + 2, 1), ldb, &
		    z___ref(2, 1), ldb);
	}
    }
    i__1 = min(*n,*p);
    dorgrq_(p, p, &i__1, &z__[z_offset], ldb, &taub[1], &work[1], lwork, &
	    info);

/*     Copy R */

    dlaset_("Full", n, m, &c_b19, &c_b19, &r__[r_offset], lda);
    dlacpy_("Upper", n, m, &af[af_offset], lda, &r__[r_offset], lda);

/*     Copy T */

    dlaset_("Full", n, p, &c_b19, &c_b19, &t[t_offset], ldb);
    if (*n <= *p) {
	dlacpy_("Upper", n, n, &bf_ref(1, *p - *n + 1), ldb, &t_ref(1, *p - *
		n + 1), ldb);
    } else {
	i__1 = *n - *p;
	dlacpy_("Full", &i__1, p, &bf[bf_offset], ldb, &t[t_offset], ldb);
	dlacpy_("Upper", p, p, &bf_ref(*n - *p + 1, 1), ldb, &t_ref(*n - *p + 
		1, 1), ldb);
    }

/*     Compute R - Q'*A */

    dgemm_("Transpose", "No transpose", n, m, n, &c_b30, &q[q_offset], lda, &
	    a[a_offset], lda, &c_b31, &r__[r_offset], lda);

/*     Compute norm( R - Q'*A ) / ( MAX(M,N)*norm(A)*ULP ) . */

    resid = dlange_("1", n, m, &r__[r_offset], lda, &rwork[1]);
    if (anorm > 0.) {
/* Computing MAX */
	i__1 = max(1,*m);
	result[1] = resid / (doublereal) max(i__1,*n) / anorm / ulp;
    } else {
	result[1] = 0.;
    }

/*     Compute T*Z - Q'*B */

    dgemm_("No Transpose", "No transpose", n, p, p, &c_b31, &t[t_offset], ldb,
	     &z__[z_offset], ldb, &c_b19, &bwk[bwk_offset], ldb);
    dgemm_("Transpose", "No transpose", n, p, n, &c_b30, &q[q_offset], lda, &
	    b[b_offset], ldb, &c_b31, &bwk[bwk_offset], ldb);

/*     Compute norm( T*Z - Q'*B ) / ( MAX(P,N)*norm(A)*ULP ) . */

    resid = dlange_("1", n, p, &bwk[bwk_offset], ldb, &rwork[1]);
    if (bnorm > 0.) {
/* Computing MAX */
	i__1 = max(1,*p);
	result[2] = resid / (doublereal) max(i__1,*n) / bnorm / ulp;
    } else {
	result[2] = 0.;
    }

/*     Compute I - Q'*Q */

    dlaset_("Full", n, n, &c_b19, &c_b31, &r__[r_offset], lda);
    dsyrk_("Upper", "Transpose", n, n, &c_b30, &q[q_offset], lda, &c_b31, &
	    r__[r_offset], lda);

/*     Compute norm( I - Q'*Q ) / ( N * ULP ) . */

    resid = dlansy_("1", "Upper", n, &r__[r_offset], lda, &rwork[1]);
    result[3] = resid / (doublereal) max(1,*n) / ulp;

/*     Compute I - Z'*Z */

    dlaset_("Full", p, p, &c_b19, &c_b31, &t[t_offset], ldb);
    dsyrk_("Upper", "Transpose", p, p, &c_b30, &z__[z_offset], ldb, &c_b31, &
	    t[t_offset], ldb);

/*     Compute norm( I - Z'*Z ) / ( P*ULP ) . */

    resid = dlansy_("1", "Upper", p, &t[t_offset], ldb, &rwork[1]);
    result[4] = resid / (doublereal) max(1,*p) / ulp;

    return 0;

/*     End of DGQRTS */

} /* dgqrts_ */
Ejemplo n.º 16
0
/* Subroutine */ HYPRE_Int dpotrf_(char *uplo, integer *n, doublereal *a, integer *
	lda, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       March 31, 1993   


    Purpose   
    =======   

    DPOTRF computes the Cholesky factorization of a real symmetric   
    positive definite matrix A.   

    The factorization has the form   
       A = U**T * U,  if UPLO = 'U', or   
       A = L  * L**T,  if UPLO = 'L',   
    where U is an upper triangular matrix and L is lower triangular.   

    This is the block version of the algorithm, calling Level 3 BLAS.   

    Arguments   
    =========   

    UPLO    (input) CHARACTER*1   
            = 'U':  Upper triangle of A is stored;   
            = 'L':  Lower triangle of A is stored.   

    N       (input) INTEGER   
            The order of the matrix A.  N >= 0.   

    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)   
            On entry, the symmetric matrix A.  If UPLO = 'U', the leading   
            N-by-N upper triangular part of A contains the upper   
            triangular part of the matrix A, and the strictly lower   
            triangular part of A is not referenced.  If UPLO = 'L', the   
            leading N-by-N lower triangular part of A contains the lower   
            triangular part of the matrix A, and the strictly upper   
            triangular part of A is not referenced.   

            On exit, if INFO = 0, the factor U or L from the Cholesky   
            factorization A = U**T*U or A = L*L**T.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value   
            > 0:  if INFO = i, the leading minor of order i is not   
                  positive definite, and the factorization could not be   
                  completed.   

    =====================================================================   


       Test the input parameters.   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static doublereal c_b13 = -1.;
    static doublereal c_b14 = 1.;
    
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static integer j;
    extern /* Subroutine */ HYPRE_Int dgemm_(char *, char *, integer *, integer *, 
	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
	    integer *, doublereal *, doublereal *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ HYPRE_Int dtrsm_(char *, char *, char *, char *, 
	    integer *, integer *, doublereal *, doublereal *, integer *, 
	    doublereal *, integer *);
    static logical upper;
    extern /* Subroutine */ HYPRE_Int dsyrk_(char *, char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
	     integer *), dpotf2_(char *, integer *, 
	    doublereal *, integer *, integer *);
    static integer jb, nb;
    extern /* Subroutine */ HYPRE_Int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DPOTRF", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);
    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code. */

	dpotf2_(uplo, n, &a[a_offset], lda, info);
    } else {

/*        Use blocked code. */

	if (upper) {

/*           Compute the Cholesky factorization A = U'*U. */

	    i__1 = *n;
	    i__2 = nb;
	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

/*              Update and factorize the current diagonal block and test   
                for non-positive-definiteness.   

   Computing MIN */
		i__3 = nb, i__4 = *n - j + 1;
		jb = min(i__3,i__4);
		i__3 = j - 1;
		dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b13, &a_ref(1, j),
			 lda, &c_b14, &a_ref(j, j), lda)
			;
		dpotf2_("Upper", &jb, &a_ref(j, j), lda, info);
		if (*info != 0) {
		    goto L30;
		}
		if (j + jb <= *n) {

/*                 Compute the current block row. */

		    i__3 = *n - j - jb + 1;
		    i__4 = j - 1;
		    dgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, &
			    c_b13, &a_ref(1, j), lda, &a_ref(1, j + jb), lda, 
			    &c_b14, &a_ref(j, j + jb), lda);
		    i__3 = *n - j - jb + 1;
		    dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, &
			    i__3, &c_b14, &a_ref(j, j), lda, &a_ref(j, j + jb)
			    , lda)
			    ;
		}
/* L10: */
	    }

	} else {

/*           Compute the Cholesky factorization A = L*L'. */

	    i__2 = *n;
	    i__1 = nb;
	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

/*              Update and factorize the current diagonal block and test   
                for non-positive-definiteness.   

   Computing MIN */
		i__3 = nb, i__4 = *n - j + 1;
		jb = min(i__3,i__4);
		i__3 = j - 1;
		dsyrk_("Lower", "No transpose", &jb, &i__3, &c_b13, &a_ref(j, 
			1), lda, &c_b14, &a_ref(j, j), lda);
		dpotf2_("Lower", &jb, &a_ref(j, j), lda, info);
		if (*info != 0) {
		    goto L30;
		}
		if (j + jb <= *n) {

/*                 Compute the current block column. */

		    i__3 = *n - j - jb + 1;
		    i__4 = j - 1;
		    dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &
			    c_b13, &a_ref(j + jb, 1), lda, &a_ref(j, 1), lda, 
			    &c_b14, &a_ref(j + jb, j), lda);
		    i__3 = *n - j - jb + 1;
		    dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, &
			    jb, &c_b14, &a_ref(j, j), lda, &a_ref(j + jb, j), 
			    lda);
		}
/* L20: */
	    }
	}
    }
    goto L40;

L30:
    *info = *info + j - 1;

L40:
    return 0;

/*     End of DPOTRF */

} /* dpotrf_ */
Ejemplo n.º 17
0
void
dsyrk(char uplo, char transa, int n, int k, double alpha, double *a, int lda, double beta, double *c, int ldc)
{
    dsyrk_(&uplo, &transa, &n, &k, &alpha, a, &lda, &beta, c, &ldc);
}
Ejemplo n.º 18
0
int main( int argc, char** argv )
{
	obj_t a, c;
	obj_t c_save;
	obj_t alpha, beta;
	dim_t m, k;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input, k_input;
	num_t dt_a, dt_c;
	num_t dt_alpha, dt_beta;
	int   r, n_repeats;
	uplo_t uplo;

	double dtime;
	double dtime_save;
	double gflops;

	bli_init();

	n_repeats = 3;

    if( argc < 7 ) 
    {   
        printf("Usage:\n");
        printf("test_foo.x m n k p_begin p_inc p_end:\n");
        exit;
    }   

    int world_size, world_rank, provided;
    MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
    MPI_Comm_size( MPI_COMM_WORLD, &world_size );
    MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );

    m_input = strtol( argv[1], NULL, 10 );
    k_input = strtol( argv[3], NULL, 10 );
    p_begin = strtol( argv[4], NULL, 10 );
    p_inc   = strtol( argv[5], NULL, 10 );
    p_end   = strtol( argv[6], NULL, 10 );

	dt_a = BLIS_DOUBLE;
	dt_c = BLIS_DOUBLE;
	dt_alpha = BLIS_DOUBLE;
	dt_beta = BLIS_DOUBLE;

	uplo = BLIS_LOWER;

    for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
	{

		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
		else               k =     ( dim_t )    k_input;


		bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
		bli_obj_create( dt_beta,  1, 1, 0, 0, &beta );

		bli_obj_create( dt_a, m, k, 0, 0, &a );
		bli_obj_create( dt_c, m, m, 0, 0, &c );
		bli_obj_create( dt_c, m, m, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &c );

		bli_obj_set_struc( BLIS_HERMITIAN, &c );
		bli_obj_set_uplo( uplo, &c );


		bli_setsc(  (2.0/1.0), 0.0, &alpha );
		bli_setsc(  (1.0/1.0), 0.0, &beta );


		bli_copym( &c, &c_save );
	
		dtime_save = 1.0e9;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();

#ifdef PRINT
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS

			//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

			bli_herk( &alpha,
			          &a,
			          &beta,
			          &c );

#else

			f77_char uploa  = 'L';
			f77_char transa = 'N';
			f77_int  mm     = bli_obj_length( &c );
			f77_int  kk     = bli_obj_width_after_trans( &a );
			f77_int  lda    = bli_obj_col_stride( &a );
			f77_int  ldc    = bli_obj_col_stride( &c );
			double*  alphap = bli_obj_buffer( &alpha );
			double*  ap     = bli_obj_buffer( &a );
			double*  betap  = bli_obj_buffer( &beta );
			double*  cp     = bli_obj_buffer( &c );

			dsyrk_( &uploa,
			        &transa,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        betap,
			        cp, &ldc );
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%4.1f", "" );
			exit(1);
#endif


			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 );

#ifdef BLIS
		printf( "data_herk_blis" );
#else
		printf( "data_herk_%s", BLAS );
#endif
		printf( "( %2lu, 1:4 ) = [ %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )k, dtime_save, gflops );


		bli_obj_free( &alpha );
		bli_obj_free( &beta );

		bli_obj_free( &a );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	bli_finalize();

	return 0;
}
Ejemplo n.º 19
0
/* Subroutine */ int dlauum_(char *uplo, integer *n, doublereal *a, integer *
	lda, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer i__, ib, nb;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
	    integer *, doublereal *, doublereal *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, 
	    integer *, integer *, doublereal *, doublereal *, integer *, 
	    doublereal *, integer *);
    logical upper;
    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, doublereal *, 
	     integer *), dlauu2_(char *, integer *, 
	    doublereal *, integer *, integer *), xerbla_(char *, 
	    integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *);


/*  -- LAPACK auxiliary routine (version 3.2) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  DLAUUM computes the product U * U' or L' * L, where the triangular */
/*  factor U or L is stored in the upper or lower triangular part of */
/*  the array A. */

/*  If UPLO = 'U' or 'u' then the upper triangle of the result is stored, */
/*  overwriting the factor U in A. */
/*  If UPLO = 'L' or 'l' then the lower triangle of the result is stored, */
/*  overwriting the factor L in A. */

/*  This is the blocked form of the algorithm, calling Level 3 BLAS. */

/*  Arguments */
/*  ========= */

/*  UPLO    (input) CHARACTER*1 */
/*          Specifies whether the triangular factor stored in the array A */
/*          is upper or lower triangular: */
/*          = 'U':  Upper triangular */
/*          = 'L':  Lower triangular */

/*  N       (input) INTEGER */
/*          The order of the triangular factor U or L.  N >= 0. */

/*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
/*          On entry, the triangular factor U or L. */
/*          On exit, if UPLO = 'U', the upper triangle of A is */
/*          overwritten with the upper triangle of the product U * U'; */
/*          if UPLO = 'L', the lower triangle of A is overwritten with */
/*          the lower triangle of the product L' * L. */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the array A.  LDA >= max(1,N). */

/*  INFO    (output) INTEGER */
/*          = 0: successful exit */
/*          < 0: if INFO = -k, the k-th argument had an illegal value */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
	*info = -1;
    } else if (*n < 0) {
	*info = -2;
    } else if (*lda < max(1,*n)) {
	*info = -4;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DLAUUM", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "DLAUUM", uplo, n, &c_n1, &c_n1, &c_n1);

    if (nb <= 1 || nb >= *n) {

/*        Use unblocked code */

	dlauu2_(uplo, n, &a[a_offset], lda, info);
    } else {

/*        Use blocked code */

	if (upper) {

/*           Compute the product U * U'. */

	    i__1 = *n;
	    i__2 = nb;
	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);
		i__3 = i__ - 1;
		dtrmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib, 
			&c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ * a_dim1 
			+ 1], lda)
			;
		dlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
		if (i__ + ib <= *n) {
		    i__3 = i__ - 1;
		    i__4 = *n - i__ - ib + 1;
		    dgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, &
			    c_b15, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__ + 
			    (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ * 
			    a_dim1 + 1], lda);
		    i__3 = *n - i__ - ib + 1;
		    dsyrk_("Upper", "No transpose", &ib, &i__3, &c_b15, &a[
			    i__ + (i__ + ib) * a_dim1], lda, &c_b15, &a[i__ + 
			    i__ * a_dim1], lda);
		}
/* L10: */
	    }
	} else {

/*           Compute the product L' * L. */

	    i__2 = *n;
	    i__1 = nb;
	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
/* Computing MIN */
		i__3 = nb, i__4 = *n - i__ + 1;
		ib = min(i__3,i__4);
		i__3 = i__ - 1;
		dtrmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, &
			c_b15, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1], 
			lda);
		dlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
		if (i__ + ib <= *n) {
		    i__3 = i__ - 1;
		    i__4 = *n - i__ - ib + 1;
		    dgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, &
			    c_b15, &a[i__ + ib + i__ * a_dim1], lda, &a[i__ + 
			    ib + a_dim1], lda, &c_b15, &a[i__ + a_dim1], lda);
		    i__3 = *n - i__ - ib + 1;
		    dsyrk_("Lower", "Transpose", &ib, &i__3, &c_b15, &a[i__ + 
			    ib + i__ * a_dim1], lda, &c_b15, &a[i__ + i__ * 
			    a_dim1], lda);
		}
/* L20: */
	    }
	}
    }

    return 0;

/*     End of DLAUUM */

} /* dlauum_ */
Ejemplo n.º 20
0
/* Subroutine */ int dpstrf_(char *uplo, integer *n, doublereal *a, integer *
                             lda, integer *piv, integer *rank, doublereal *tol, doublereal *work,
                             integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
    doublereal d__1;

    /* Builtin functions */
    double sqrt(doublereal);

    /* Local variables */
    integer i__, j, k, maxlocvar, jb, nb;
    doublereal ajj;
    integer pvt;
    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
                                       integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
                                       doublereal *, doublereal *, integer *, doublereal *, integer *,
                                       doublereal *, doublereal *, integer *);
    doublereal dtemp;
    integer itemp;
    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
                                       doublereal *, integer *);
    doublereal dstop;
    logical upper;
    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *,
                                       doublereal *, doublereal *, integer *, doublereal *, doublereal *,
                                       integer *), dpstf2_(char *, integer *,
                                               doublereal *, integer *, integer *, integer *, doublereal *,
                                               doublereal *, integer *);
    extern doublereal dlamch_(char *);
    extern logical disnan_(doublereal *);
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
                           integer *, integer *);
    extern integer dmaxloc_(doublereal *, integer *);


    /*  -- LAPACK routine (version 3.2) -- */
    /*     Craig Lucas, University of Manchester / NAG Ltd. */
    /*     October, 2008 */

    /*     .. Scalar Arguments .. */
    /*     .. */
    /*     .. Array Arguments .. */
    /*     .. */

    /*  Purpose */
    /*  ======= */

    /*  DPSTRF computes the Cholesky factorization with complete */
    /*  pivoting of a real symmetric positive semidefinite matrix A. */

    /*  The factorization has the form */
    /*     P' * A * P = U' * U ,  if UPLO = 'U', */
    /*     P' * A * P = L  * L',  if UPLO = 'L', */
    /*  where U is an upper triangular matrix and L is lower triangular, and */
    /*  P is stored as vector PIV. */

    /*  This algorithm does not attempt to check that A is positive */
    /*  semidefinite. This version of the algorithm calls level 3 BLAS. */

    /*  Arguments */
    /*  ========= */

    /*  UPLO    (input) CHARACTER*1 */
    /*          Specifies whether the upper or lower triangular part of the */
    /*          symmetric matrix A is stored. */
    /*          = 'U':  Upper triangular */
    /*          = 'L':  Lower triangular */

    /*  N       (input) INTEGER */
    /*          The order of the matrix A.  N >= 0. */

    /*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
    /*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading */
    /*          n by n upper triangular part of A contains the upper */
    /*          triangular part of the matrix A, and the strictly lower */
    /*          triangular part of A is not referenced.  If UPLO = 'L', the */
    /*          leading n by n lower triangular part of A contains the lower */
    /*          triangular part of the matrix A, and the strictly upper */
    /*          triangular part of A is not referenced. */

    /*          On exit, if INFO = 0, the factor U or L from the Cholesky */
    /*          factorization as above. */

    /*  LDA     (input) INTEGER */
    /*          The leading dimension of the array A.  LDA >= max(1,N). */

    /*  PIV     (output) INTEGER array, dimension (N) */
    /*          PIV is such that the nonzero entries are P( PIV(K), K ) = 1. */

    /*  RANK    (output) INTEGER */
    /*          The rank of A given by the number of steps the algorithm */
    /*          completed. */

    /*  TOL     (input) DOUBLE PRECISION */
    /*          User defined tolerance. If TOL < 0, then N*U*MAX( A(K,K) ) */
    /*          will be used. The algorithm terminates at the (K-1)st step */
    /*          if the pivot <= TOL. */

    /*  WORK    DOUBLE PRECISION array, dimension (2*N) */
    /*          Work space. */

    /*  INFO    (output) INTEGER */
    /*          < 0: If INFO = -K, the K-th argument had an illegal value, */
    /*          = 0: algorithm completed successfully, and */
    /*          > 0: the matrix A is either rank deficient with computed rank */
    /*               as returned in RANK, or is indefinite.  See Section 7 of */
    /*               LAPACK Working Note #161 for further information. */

    /*  ===================================================================== */

    /*     .. Parameters .. */
    /*     .. */
    /*     .. Local Scalars .. */
    /*     .. */
    /*     .. External Functions .. */
    /*     .. */
    /*     .. External Subroutines .. */
    /*     .. */
    /*     .. Intrinsic Functions .. */
    /*     .. */
    /*     .. Executable Statements .. */

    /*     Test the input parameters. */

    /* Parameter adjustments */
    --work;
    --piv;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < max(1,*n)) {
        *info = -4;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("DPSTRF", &i__1);
        return 0;
    }

    /*     Quick return if possible */

    if (*n == 0) {
        return 0;
    }

    /*     Get block size */

    nb = ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1);
    if (nb <= 1 || nb >= *n) {

        /*        Use unblocked code */

        dpstf2_(uplo, n, &a[a_dim1 + 1], lda, &piv[1], rank, tol, &work[1],
                info);
        goto L200;

    } else {

        /*     Initialize PIV */

        i__1 = *n;
        for (i__ = 1; i__ <= i__1; ++i__) {
            piv[i__] = i__;
            /* L100: */
        }

        /*     Compute stopping value */

        pvt = 1;
        ajj = a[pvt + pvt * a_dim1];
        i__1 = *n;
        for (i__ = 2; i__ <= i__1; ++i__) {
            if (a[i__ + i__ * a_dim1] > ajj) {
                pvt = i__;
                ajj = a[pvt + pvt * a_dim1];
            }
        }
        if (ajj == 0. || disnan_(&ajj)) {
            *rank = 0;
            *info = 1;
            goto L200;
        }

        /*     Compute stopping value if not supplied */

        if (*tol < 0.) {
            dstop = *n * dlamch_("Epsilon") * ajj;
        } else {
            dstop = *tol;
        }


        if (upper) {

            /*           Compute the Cholesky factorization P' * A * P = U' * U */

            i__1 = *n;
            i__2 = nb;
            for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {

                /*              Account for last block not being NB wide */

                /* Computing MIN */
                i__3 = nb, i__4 = *n - k + 1;
                jb = min(i__3,i__4);

                /*              Set relevant part of first half of WORK to zero, */
                /*              holds dot products */

                i__3 = *n;
                for (i__ = k; i__ <= i__3; ++i__) {
                    work[i__] = 0.;
                    /* L110: */
                }

                i__3 = k + jb - 1;
                for (j = k; j <= i__3; ++j) {

                    /*              Find pivot, test for exit, else swap rows and columns */
                    /*              Update dot products, compute possible pivots which are */
                    /*              stored in the second half of WORK */

                    i__4 = *n;
                    for (i__ = j; i__ <= i__4; ++i__) {

                        if (j > k) {
                            /* Computing 2nd power */
                            d__1 = a[j - 1 + i__ * a_dim1];
                            work[i__] += d__1 * d__1;
                        }
                        work[*n + i__] = a[i__ + i__ * a_dim1] - work[i__];

                        /* L120: */
                    }

                    if (j > 1) {
                        maxlocvar = (*n << 1) - (*n + j) + 1;
                        itemp = dmaxloc_(&work[*n + j], &maxlocvar);
                        pvt = itemp + j - 1;
                        ajj = work[*n + pvt];
                        if (ajj <= dstop || disnan_(&ajj)) {
                            a[j + j * a_dim1] = ajj;
                            goto L190;
                        }
                    }

                    if (j != pvt) {

                        /*                    Pivot OK, so can now swap pivot rows and columns */

                        a[pvt + pvt * a_dim1] = a[j + j * a_dim1];
                        i__4 = j - 1;
                        dswap_(&i__4, &a[j * a_dim1 + 1], &c__1, &a[pvt *
                                a_dim1 + 1], &c__1);
                        if (pvt < *n) {
                            i__4 = *n - pvt;
                            dswap_(&i__4, &a[j + (pvt + 1) * a_dim1], lda, &a[
                                       pvt + (pvt + 1) * a_dim1], lda);
                        }
                        i__4 = pvt - j - 1;
                        dswap_(&i__4, &a[j + (j + 1) * a_dim1], lda, &a[j + 1
                                + pvt * a_dim1], &c__1);

                        /*                    Swap dot products and PIV */

                        dtemp = work[j];
                        work[j] = work[pvt];
                        work[pvt] = dtemp;
                        itemp = piv[pvt];
                        piv[pvt] = piv[j];
                        piv[j] = itemp;
                    }

                    ajj = sqrt(ajj);
                    a[j + j * a_dim1] = ajj;

                    /*                 Compute elements J+1:N of row J. */

                    if (j < *n) {
                        i__4 = j - k;
                        i__5 = *n - j;
                        dgemv_("Trans", &i__4, &i__5, &c_b22, &a[k + (j + 1) *
                                a_dim1], lda, &a[k + j * a_dim1], &c__1, &
                               c_b24, &a[j + (j + 1) * a_dim1], lda);
                        i__4 = *n - j;
                        d__1 = 1. / ajj;
                        dscal_(&i__4, &d__1, &a[j + (j + 1) * a_dim1], lda);
                    }

                    /* L130: */
                }

                /*              Update trailing matrix, J already incremented */

                if (k + jb <= *n) {
                    i__3 = *n - j + 1;
                    dsyrk_("Upper", "Trans", &i__3, &jb, &c_b22, &a[k + j *
                            a_dim1], lda, &c_b24, &a[j + j * a_dim1], lda);
                }

                /* L140: */
            }

        } else {

            /*        Compute the Cholesky factorization P' * A * P = L * L' */

            i__2 = *n;
            i__1 = nb;
            for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) {

                /*              Account for last block not being NB wide */

                /* Computing MIN */
                i__3 = nb, i__4 = *n - k + 1;
                jb = min(i__3,i__4);

                /*              Set relevant part of first half of WORK to zero, */
                /*              holds dot products */

                i__3 = *n;
                for (i__ = k; i__ <= i__3; ++i__) {
                    work[i__] = 0.;
                    /* L150: */
                }

                i__3 = k + jb - 1;
                for (j = k; j <= i__3; ++j) {

                    /*              Find pivot, test for exit, else swap rows and columns */
                    /*              Update dot products, compute possible pivots which are */
                    /*              stored in the second half of WORK */

                    i__4 = *n;
                    for (i__ = j; i__ <= i__4; ++i__) {

                        if (j > k) {
                            /* Computing 2nd power */
                            d__1 = a[i__ + (j - 1) * a_dim1];
                            work[i__] += d__1 * d__1;
                        }
                        work[*n + i__] = a[i__ + i__ * a_dim1] - work[i__];

                        /* L160: */
                    }

                    if (j > 1) {
                        maxlocvar = (*n << 1) - (*n + j) + 1;
                        itemp = dmaxloc_(&work[*n + j], &maxlocvar);
                        pvt = itemp + j - 1;
                        ajj = work[*n + pvt];
                        if (ajj <= dstop || disnan_(&ajj)) {
                            a[j + j * a_dim1] = ajj;
                            goto L190;
                        }
                    }

                    if (j != pvt) {

                        /*                    Pivot OK, so can now swap pivot rows and columns */

                        a[pvt + pvt * a_dim1] = a[j + j * a_dim1];
                        i__4 = j - 1;
                        dswap_(&i__4, &a[j + a_dim1], lda, &a[pvt + a_dim1],
                               lda);
                        if (pvt < *n) {
                            i__4 = *n - pvt;
                            dswap_(&i__4, &a[pvt + 1 + j * a_dim1], &c__1, &a[
                                       pvt + 1 + pvt * a_dim1], &c__1);
                        }
                        i__4 = pvt - j - 1;
                        dswap_(&i__4, &a[j + 1 + j * a_dim1], &c__1, &a[pvt +
                                (j + 1) * a_dim1], lda);

                        /*                    Swap dot products and PIV */

                        dtemp = work[j];
                        work[j] = work[pvt];
                        work[pvt] = dtemp;
                        itemp = piv[pvt];
                        piv[pvt] = piv[j];
                        piv[j] = itemp;
                    }

                    ajj = sqrt(ajj);
                    a[j + j * a_dim1] = ajj;

                    /*                 Compute elements J+1:N of column J. */

                    if (j < *n) {
                        i__4 = *n - j;
                        i__5 = j - k;
                        dgemv_("No Trans", &i__4, &i__5, &c_b22, &a[j + 1 + k
                                * a_dim1], lda, &a[j + k * a_dim1], lda, &
                               c_b24, &a[j + 1 + j * a_dim1], &c__1);
                        i__4 = *n - j;
                        d__1 = 1. / ajj;
                        dscal_(&i__4, &d__1, &a[j + 1 + j * a_dim1], &c__1);
                    }

                    /* L170: */
                }

                /*              Update trailing matrix, J already incremented */

                if (k + jb <= *n) {
                    i__3 = *n - j + 1;
                    dsyrk_("Lower", "No Trans", &i__3, &jb, &c_b22, &a[j + k *
                            a_dim1], lda, &c_b24, &a[j + j * a_dim1], lda);
                }

                /* L180: */
            }

        }
    }

    /*     Ran to completion, A has full rank */

    *rank = *n;

    goto L200;
L190:

    /*     Rank is the number of steps completed.  Set INFO = 1 to signal */
    /*     that the factorization cannot be used to solve a system. */

    *rank = j - 1;
    *info = 1;

L200:
    return 0;

    /*     End of DPSTRF */

} /* dpstrf_ */
Ejemplo n.º 21
0
/* Subroutine */ int dpftri_(char *transr, char *uplo, integer *n, doublereal 
	*a, integer *info)
{
    /* System generated locals */
    integer i__1, i__2;

    /* Local variables */
    integer k, n1, n2;
    logical normaltransr;
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, 
	    integer *, integer *, doublereal *, doublereal *, integer *, 
	    doublereal *, integer *);
    logical lower;
    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, doublereal *, 
	     integer *), xerbla_(char *, integer *);
    logical nisodd;
    extern /* Subroutine */ int dlauum_(char *, integer *, doublereal *, 
	    integer *, integer *), dtftri_(char *, char *, char *, 
	    integer *, doublereal *, integer *);


/*  -- LAPACK routine (version 3.2)                                    -- */

/*  -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */
/*  -- November 2008                                                   -- */

/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */

/*     .. Scalar Arguments .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  DPFTRI computes the inverse of a (real) symmetric positive definite */
/*  matrix A using the Cholesky factorization A = U**T*U or A = L*L**T */
/*  computed by DPFTRF. */

/*  Arguments */
/*  ========= */

/*  TRANSR    (input) CHARACTER */
/*          = 'N':  The Normal TRANSR of RFP A is stored; */
/*          = 'T':  The Transpose TRANSR of RFP A is stored. */

/*  UPLO    (input) CHARACTER */
/*          = 'U':  Upper triangle of A is stored; */
/*          = 'L':  Lower triangle of A is stored. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  A       (input/output) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ) */
/*          On entry, the symmetric matrix A in RFP format. RFP format is */
/*          described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' */
/*          then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is */
/*          (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is */
/*          the transpose of RFP A as defined when */
/*          TRANSR = 'N'. The contents of RFP A are defined by UPLO as */
/*          follows: If UPLO = 'U' the RFP A contains the nt elements of */
/*          upper packed A. If UPLO = 'L' the RFP A contains the elements */
/*          of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = */
/*          'T'. When TRANSR is 'N' the LDA is N+1 when N is even and N */
/*          is odd. See the Note below for more details. */

/*          On exit, the symmetric inverse of the original matrix, in the */
/*          same storage format. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
/*          > 0:  if INFO = i, the (i,i) element of the factor U or L is */
/*                zero, and the inverse could not be computed. */

/*  Notes */
/*  ===== */

/*  We first consider Rectangular Full Packed (RFP) Format when N is */
/*  even. We give an example where N = 6. */

/*      AP is Upper             AP is Lower */

/*   00 01 02 03 04 05       00 */
/*      11 12 13 14 15       10 11 */
/*         22 23 24 25       20 21 22 */
/*            33 34 35       30 31 32 33 */
/*               44 45       40 41 42 43 44 */
/*                  55       50 51 52 53 54 55 */


/*  Let TRANSR = 'N'. RFP holds AP as follows: */
/*  For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */
/*  three columns of AP upper. The lower triangle A(4:6,0:2) consists of */
/*  the transpose of the first three columns of AP upper. */
/*  For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */
/*  three columns of AP lower. The upper triangle A(0:2,0:2) consists of */
/*  the transpose of the last three columns of AP lower. */
/*  This covers the case N even and TRANSR = 'N'. */

/*         RFP A                   RFP A */

/*        03 04 05                33 43 53 */
/*        13 14 15                00 44 54 */
/*        23 24 25                10 11 55 */
/*        33 34 35                20 21 22 */
/*        00 44 45                30 31 32 */
/*        01 11 55                40 41 42 */
/*        02 12 22                50 51 52 */

/*  Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */
/*  transpose of RFP A above. One therefore gets: */


/*           RFP A                   RFP A */

/*     03 13 23 33 00 01 02    33 00 10 20 30 40 50 */
/*     04 14 24 34 44 11 12    43 44 11 21 31 41 51 */
/*     05 15 25 35 45 55 22    53 54 55 22 32 42 52 */


/*  We first consider Rectangular Full Packed (RFP) Format when N is */
/*  odd. We give an example where N = 5. */

/*     AP is Upper                 AP is Lower */

/*   00 01 02 03 04              00 */
/*      11 12 13 14              10 11 */
/*         22 23 24              20 21 22 */
/*            33 34              30 31 32 33 */
/*               44              40 41 42 43 44 */


/*  Let TRANSR = 'N'. RFP holds AP as follows: */
/*  For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */
/*  three columns of AP upper. The lower triangle A(3:4,0:1) consists of */
/*  the transpose of the first two columns of AP upper. */
/*  For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */
/*  three columns of AP lower. The upper triangle A(0:1,1:2) consists of */
/*  the transpose of the last two columns of AP lower. */
/*  This covers the case N odd and TRANSR = 'N'. */

/*         RFP A                   RFP A */

/*        02 03 04                00 33 43 */
/*        12 13 14                10 11 44 */
/*        22 23 24                20 21 22 */
/*        00 33 34                30 31 32 */
/*        01 11 44                40 41 42 */

/*  Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */
/*  transpose of RFP A above. One therefore gets: */

/*           RFP A                   RFP A */

/*     02 12 22 00 01             00 10 20 30 40 50 */
/*     03 13 23 33 11             33 11 21 31 41 51 */
/*     04 14 24 34 44             43 44 22 32 42 52 */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    *info = 0;
    normaltransr = lsame_(transr, "N");
    lower = lsame_(uplo, "L");
    if (! normaltransr && ! lsame_(transr, "T")) {
	*info = -1;
    } else if (! lower && ! lsame_(uplo, "U")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DPFTRI", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     Invert the triangular Cholesky factor U or L. */

    dtftri_(transr, uplo, "N", n, a, info);
    if (*info > 0) {
	return 0;
    }

/*     If N is odd, set NISODD = .TRUE. */
/*     If N is even, set K = N/2 and NISODD = .FALSE. */

    if (*n % 2 == 0) {
	k = *n / 2;
	nisodd = FALSE_;
    } else {
	nisodd = TRUE_;
    }

/*     Set N1 and N2 depending on LOWER */

    if (lower) {
	n2 = *n / 2;
	n1 = *n - n2;
    } else {
	n1 = *n / 2;
	n2 = *n - n1;
    }

/*     Start execution of triangular matrix multiply: inv(U)*inv(U)^C or */
/*     inv(L)^C*inv(L). There are eight cases. */

    if (nisodd) {

/*        N is odd */

	if (normaltransr) {

/*           N is odd and TRANSR = 'N' */

	    if (lower) {

/*              SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:N1-1) ) */
/*              T1 -> a(0,0), T2 -> a(0,1), S -> a(N1,0) */
/*              T1 -> a(0), T2 -> a(n), S -> a(N1) */

		dlauum_("L", &n1, a, n, info);
		dsyrk_("L", "T", &n1, &n2, &c_b11, &a[n1], n, &c_b11, a, n);
		dtrmm_("L", "U", "N", "N", &n2, &n1, &c_b11, &a[*n], n, &a[n1]
, n);
		dlauum_("U", &n2, &a[*n], n, info);

	    } else {

/*              SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:N2-1) */
/*              T1 -> a(N1+1,0), T2 -> a(N1,0), S -> a(0,0) */
/*              T1 -> a(N2), T2 -> a(N1), S -> a(0) */

		dlauum_("L", &n1, &a[n2], n, info);
		dsyrk_("L", "N", &n1, &n2, &c_b11, a, n, &c_b11, &a[n2], n);
		dtrmm_("R", "U", "T", "N", &n1, &n2, &c_b11, &a[n1], n, a, n);
		dlauum_("U", &n2, &a[n1], n, info);

	    }

	} else {

/*           N is odd and TRANSR = 'T' */

	    if (lower) {

/*              SRPA for LOWER, TRANSPOSE, and N is odd */
/*              T1 -> a(0), T2 -> a(1), S -> a(0+N1*N1) */

		dlauum_("U", &n1, a, &n1, info);
		dsyrk_("U", "N", &n1, &n2, &c_b11, &a[n1 * n1], &n1, &c_b11, 
			a, &n1);
		dtrmm_("R", "L", "N", "N", &n1, &n2, &c_b11, &a[1], &n1, &a[
			n1 * n1], &n1);
		dlauum_("L", &n2, &a[1], &n1, info);

	    } else {

/*              SRPA for UPPER, TRANSPOSE, and N is odd */
/*              T1 -> a(0+N2*N2), T2 -> a(0+N1*N2), S -> a(0) */

		dlauum_("U", &n1, &a[n2 * n2], &n2, info);
		dsyrk_("U", "T", &n1, &n2, &c_b11, a, &n2, &c_b11, &a[n2 * n2]
, &n2);
		dtrmm_("L", "L", "T", "N", &n2, &n1, &c_b11, &a[n1 * n2], &n2, 
			 a, &n2);
		dlauum_("L", &n2, &a[n1 * n2], &n2, info);

	    }

	}

    } else {

/*        N is even */

	if (normaltransr) {

/*           N is even and TRANSR = 'N' */

	    if (lower) {

/*              SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */
/*              T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */
/*              T1 -> a(1), T2 -> a(0), S -> a(k+1) */

		i__1 = *n + 1;
		dlauum_("L", &k, &a[1], &i__1, info);
		i__1 = *n + 1;
		i__2 = *n + 1;
		dsyrk_("L", "T", &k, &k, &c_b11, &a[k + 1], &i__1, &c_b11, &a[
			1], &i__2);
		i__1 = *n + 1;
		i__2 = *n + 1;
		dtrmm_("L", "U", "N", "N", &k, &k, &c_b11, a, &i__1, &a[k + 1]
, &i__2);
		i__1 = *n + 1;
		dlauum_("U", &k, a, &i__1, info);

	    } else {

/*              SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */
/*              T1 -> a(k+1,0) ,  T2 -> a(k,0),   S -> a(0,0) */
/*              T1 -> a(k+1), T2 -> a(k), S -> a(0) */

		i__1 = *n + 1;
		dlauum_("L", &k, &a[k + 1], &i__1, info);
		i__1 = *n + 1;
		i__2 = *n + 1;
		dsyrk_("L", "N", &k, &k, &c_b11, a, &i__1, &c_b11, &a[k + 1], 
			&i__2);
		i__1 = *n + 1;
		i__2 = *n + 1;
		dtrmm_("R", "U", "T", "N", &k, &k, &c_b11, &a[k], &i__1, a, &
			i__2);
		i__1 = *n + 1;
		dlauum_("U", &k, &a[k], &i__1, info);

	    }

	} else {

/*           N is even and TRANSR = 'T' */

	    if (lower) {

/*              SRPA for LOWER, TRANSPOSE, and N is even (see paper) */
/*              T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1), */
/*              T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k */

		dlauum_("U", &k, &a[k], &k, info);
		dsyrk_("U", "N", &k, &k, &c_b11, &a[k * (k + 1)], &k, &c_b11, 
			&a[k], &k);
		dtrmm_("R", "L", "N", "N", &k, &k, &c_b11, a, &k, &a[k * (k + 
			1)], &k);
		dlauum_("L", &k, a, &k, info);

	    } else {

/*              SRPA for UPPER, TRANSPOSE, and N is even (see paper) */
/*              T1 -> B(0,k+1),     T2 -> B(0,k),   S -> B(0,0), */
/*              T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k */

		dlauum_("U", &k, &a[k * (k + 1)], &k, info);
		dsyrk_("U", "T", &k, &k, &c_b11, a, &k, &c_b11, &a[k * (k + 1)
			], &k);
		dtrmm_("L", "L", "T", "N", &k, &k, &c_b11, &a[k * k], &k, a, &
			k);
		dlauum_("L", &k, &a[k * k], &k, info);

	    }

	}

    }

    return 0;

/*     End of DPFTRI */

} /* dpftri_ */
Ejemplo n.º 22
0
/* Subroutine */ int dpftrf_(char *transr, char *uplo, integer *n, doublereal 
	*a, integer *info)
{
    /* System generated locals */
    integer i__1, i__2;

    /* Local variables */
    integer k, n1, n2;
    logical normaltransr;
    logical lower;
    logical nisodd;

/*  -- LAPACK routine (version 3.2)                                    -- */

/*  -- Contributed by Fred Gustavson of the IBM Watson Research Center -- */
/*  -- November 2008                                                   -- */

/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */

/*  Purpose */
/*  ======= */

/*  DPFTRF computes the Cholesky factorization of a real symmetric */
/*  positive definite matrix A. */

/*  The factorization has the form */
/*     A = U**T * U,  if UPLO = 'U', or */
/*     A = L  * L**T,  if UPLO = 'L', */
/*  where U is an upper triangular matrix and L is lower triangular. */

/*  This is the block version of the algorithm, calling Level 3 BLAS. */

/*  Arguments */
/*  ========= */

/*  TRANSR    (input) CHARACTER */
/*          = 'N':  The Normal TRANSR of RFP A is stored; */
/*          = 'T':  The Transpose TRANSR of RFP A is stored. */

/*  UPLO    (input) CHARACTER */
/*          = 'U':  Upper triangle of RFP A is stored; */
/*          = 'L':  Lower triangle of RFP A is stored. */

/*  N       (input) INTEGER */
/*          The order of the matrix A.  N >= 0. */

/*  A       (input/output) DOUBLE PRECISION array, dimension ( N*(N+1)/2 ); */
/*          On entry, the symmetric matrix A in RFP format. RFP format is */
/*          described by TRANSR, UPLO, and N as follows: If TRANSR = 'N' */
/*          then RFP A is (0:N,0:k-1) when N is even; k=N/2. RFP A is */
/*          (0:N-1,0:k) when N is odd; k=N/2. IF TRANSR = 'T' then RFP is */
/*          the transpose of RFP A as defined when */
/*          TRANSR = 'N'. The contents of RFP A are defined by UPLO as */
/*          follows: If UPLO = 'U' the RFP A contains the NT elements of */
/*          upper packed A. If UPLO = 'L' the RFP A contains the elements */
/*          of lower packed A. The LDA of RFP A is (N+1)/2 when TRANSR = */
/*          'T'. When TRANSR is 'N' the LDA is N+1 when N is even and N */
/*          is odd. See the Note below for more details. */

/*          On exit, if INFO = 0, the factor U or L from the Cholesky */
/*          factorization RFP A = U**T*U or RFP A = L*L**T. */

/*  INFO    (output) INTEGER */
/*          = 0:  successful exit */
/*          < 0:  if INFO = -i, the i-th argument had an illegal value */
/*          > 0:  if INFO = i, the leading minor of order i is not */
/*                positive definite, and the factorization could not be */
/*                completed. */

/*  Notes */
/*  ===== */

/*  We first consider Rectangular Full Packed (RFP) Format when N is */
/*  even. We give an example where N = 6. */

/*      AP is Upper             AP is Lower */

/*   00 01 02 03 04 05       00 */
/*      11 12 13 14 15       10 11 */
/*         22 23 24 25       20 21 22 */
/*            33 34 35       30 31 32 33 */
/*               44 45       40 41 42 43 44 */
/*                  55       50 51 52 53 54 55 */

/*  Let TRANSR = 'N'. RFP holds AP as follows: */
/*  For UPLO = 'U' the upper trapezoid A(0:5,0:2) consists of the last */
/*  three columns of AP upper. The lower triangle A(4:6,0:2) consists of */
/*  the transpose of the first three columns of AP upper. */
/*  For UPLO = 'L' the lower trapezoid A(1:6,0:2) consists of the first */
/*  three columns of AP lower. The upper triangle A(0:2,0:2) consists of */
/*  the transpose of the last three columns of AP lower. */
/*  This covers the case N even and TRANSR = 'N'. */

/*         RFP A                   RFP A */

/*        03 04 05                33 43 53 */
/*        13 14 15                00 44 54 */
/*        23 24 25                10 11 55 */
/*        33 34 35                20 21 22 */
/*        00 44 45                30 31 32 */
/*        01 11 55                40 41 42 */
/*        02 12 22                50 51 52 */

/*  Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */
/*  transpose of RFP A above. One therefore gets: */

/*           RFP A                   RFP A */

/*     03 13 23 33 00 01 02    33 00 10 20 30 40 50 */
/*     04 14 24 34 44 11 12    43 44 11 21 31 41 51 */
/*     05 15 25 35 45 55 22    53 54 55 22 32 42 52 */

/*  We first consider Rectangular Full Packed (RFP) Format when N is */
/*  odd. We give an example where N = 5. */

/*     AP is Upper                 AP is Lower */

/*   00 01 02 03 04              00 */
/*      11 12 13 14              10 11 */
/*         22 23 24              20 21 22 */
/*            33 34              30 31 32 33 */
/*               44              40 41 42 43 44 */

/*  Let TRANSR = 'N'. RFP holds AP as follows: */
/*  For UPLO = 'U' the upper trapezoid A(0:4,0:2) consists of the last */
/*  three columns of AP upper. The lower triangle A(3:4,0:1) consists of */
/*  the transpose of the first two columns of AP upper. */
/*  For UPLO = 'L' the lower trapezoid A(0:4,0:2) consists of the first */
/*  three columns of AP lower. The upper triangle A(0:1,1:2) consists of */
/*  the transpose of the last two columns of AP lower. */
/*  This covers the case N odd and TRANSR = 'N'. */

/*         RFP A                   RFP A */

/*        02 03 04                00 33 43 */
/*        12 13 14                10 11 44 */
/*        22 23 24                20 21 22 */
/*        00 33 34                30 31 32 */
/*        01 11 44                40 41 42 */

/*  Now let TRANSR = 'T'. RFP A in both UPLO cases is just the */
/*  transpose of RFP A above. One therefore gets: */

/*           RFP A                   RFP A */

/*     02 12 22 00 01             00 10 20 30 40 50 */
/*     03 13 23 33 11             33 11 21 31 41 51 */
/*     04 14 24 34 44             43 44 22 32 42 52 */

/*  ===================================================================== */

/*     Test the input parameters. */

    *info = 0;
    normaltransr = lsame_(transr, "N");
    lower = lsame_(uplo, "L");
    if (! normaltransr && ! lsame_(transr, "T")) {
	*info = -1;
    } else if (! lower && ! lsame_(uplo, "U")) {
	*info = -2;
    } else if (*n < 0) {
	*info = -3;
    }
    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("DPFTRF", &i__1);
	return 0;
    }

/*     Quick return if possible */

    if (*n == 0) {
	return 0;
    }

/*     If N is odd, set NISODD = .TRUE. */
/*     If N is even, set K = N/2 and NISODD = .FALSE. */

    if (*n % 2 == 0) {
	k = *n / 2;
	nisodd = FALSE_;
    } else {
	nisodd = TRUE_;
    }

/*     Set N1 and N2 depending on LOWER */

    if (lower) {
	n2 = *n / 2;
	n1 = *n - n2;
    } else {
	n1 = *n / 2;
	n2 = *n - n1;
    }

/*     start execution: there are eight cases */

    if (nisodd) {

/*        N is odd */

	if (normaltransr) {

/*           N is odd and TRANSR = 'N' */

	    if (lower) {

/*             SRPA for LOWER, NORMAL and N is odd ( a(0:n-1,0:n1-1) ) */
/*             T1 -> a(0,0), T2 -> a(0,1), S -> a(n1,0) */
/*             T1 -> a(0), T2 -> a(n), S -> a(n1) */

		dpotrf_("L", &n1, a, n, info);
		if (*info > 0) {
		    return 0;
		}
		dtrsm_("R", "L", "T", "N", &n2, &n1, &c_b12, a, n, &a[n1], n);
		dsyrk_("U", "N", &n2, &n1, &c_b15, &a[n1], n, &c_b12, &a[*n], 
			n);
		dpotrf_("U", &n2, &a[*n], n, info);
		if (*info > 0) {
		    *info += n1;
		}

	    } else {

/*             SRPA for UPPER, NORMAL and N is odd ( a(0:n-1,0:n2-1) */
/*             T1 -> a(n1+1,0), T2 -> a(n1,0), S -> a(0,0) */
/*             T1 -> a(n2), T2 -> a(n1), S -> a(0) */

		dpotrf_("L", &n1, &a[n2], n, info);
		if (*info > 0) {
		    return 0;
		}
		dtrsm_("L", "L", "N", "N", &n1, &n2, &c_b12, &a[n2], n, a, n);
		dsyrk_("U", "T", &n2, &n1, &c_b15, a, n, &c_b12, &a[n1], n);
		dpotrf_("U", &n2, &a[n1], n, info);
		if (*info > 0) {
		    *info += n1;
		}

	    }

	} else {

/*           N is odd and TRANSR = 'T' */

	    if (lower) {

/*              SRPA for LOWER, TRANSPOSE and N is odd */
/*              T1 -> A(0,0) , T2 -> A(1,0) , S -> A(0,n1) */
/*              T1 -> a(0+0) , T2 -> a(1+0) , S -> a(0+n1*n1); lda=n1 */

		dpotrf_("U", &n1, a, &n1, info);
		if (*info > 0) {
		    return 0;
		}
		dtrsm_("L", "U", "T", "N", &n1, &n2, &c_b12, a, &n1, &a[n1 * 
			n1], &n1);
		dsyrk_("L", "T", &n2, &n1, &c_b15, &a[n1 * n1], &n1, &c_b12, &
			a[1], &n1);
		dpotrf_("L", &n2, &a[1], &n1, info);
		if (*info > 0) {
		    *info += n1;
		}

	    } else {

/*              SRPA for UPPER, TRANSPOSE and N is odd */
/*              T1 -> A(0,n1+1), T2 -> A(0,n1), S -> A(0,0) */
/*              T1 -> a(n2*n2), T2 -> a(n1*n2), S -> a(0); lda = n2 */

		dpotrf_("U", &n1, &a[n2 * n2], &n2, info);
		if (*info > 0) {
		    return 0;
		}
		dtrsm_("R", "U", "N", "N", &n2, &n1, &c_b12, &a[n2 * n2], &n2, 
			 a, &n2);
		dsyrk_("L", "N", &n2, &n1, &c_b15, a, &n2, &c_b12, &a[n1 * n2]
, &n2);
		dpotrf_("L", &n2, &a[n1 * n2], &n2, info);
		if (*info > 0) {
		    *info += n1;
		}

	    }

	}

    } else {

/*        N is even */

	if (normaltransr) {

/*           N is even and TRANSR = 'N' */

	    if (lower) {

/*              SRPA for LOWER, NORMAL, and N is even ( a(0:n,0:k-1) ) */
/*              T1 -> a(1,0), T2 -> a(0,0), S -> a(k+1,0) */
/*              T1 -> a(1), T2 -> a(0), S -> a(k+1) */

		i__1 = *n + 1;
		dpotrf_("L", &k, &a[1], &i__1, info);
		if (*info > 0) {
		    return 0;
		}
		i__1 = *n + 1;
		i__2 = *n + 1;
		dtrsm_("R", "L", "T", "N", &k, &k, &c_b12, &a[1], &i__1, &a[k 
			+ 1], &i__2);
		i__1 = *n + 1;
		i__2 = *n + 1;
		dsyrk_("U", "N", &k, &k, &c_b15, &a[k + 1], &i__1, &c_b12, a, 
			&i__2);
		i__1 = *n + 1;
		dpotrf_("U", &k, a, &i__1, info);
		if (*info > 0) {
		    *info += k;
		}

	    } else {

/*              SRPA for UPPER, NORMAL, and N is even ( a(0:n,0:k-1) ) */
/*              T1 -> a(k+1,0) ,  T2 -> a(k,0),   S -> a(0,0) */
/*              T1 -> a(k+1), T2 -> a(k), S -> a(0) */

		i__1 = *n + 1;
		dpotrf_("L", &k, &a[k + 1], &i__1, info);
		if (*info > 0) {
		    return 0;
		}
		i__1 = *n + 1;
		i__2 = *n + 1;
		dtrsm_("L", "L", "N", "N", &k, &k, &c_b12, &a[k + 1], &i__1, 
			a, &i__2);
		i__1 = *n + 1;
		i__2 = *n + 1;
		dsyrk_("U", "T", &k, &k, &c_b15, a, &i__1, &c_b12, &a[k], &
			i__2);
		i__1 = *n + 1;
		dpotrf_("U", &k, &a[k], &i__1, info);
		if (*info > 0) {
		    *info += k;
		}

	    }

	} else {

/*           N is even and TRANSR = 'T' */

	    if (lower) {

/*              SRPA for LOWER, TRANSPOSE and N is even (see paper) */
/*              T1 -> B(0,1), T2 -> B(0,0), S -> B(0,k+1) */
/*              T1 -> a(0+k), T2 -> a(0+0), S -> a(0+k*(k+1)); lda=k */

		dpotrf_("U", &k, &a[k], &k, info);
		if (*info > 0) {
		    return 0;
		}
		dtrsm_("L", "U", "T", "N", &k, &k, &c_b12, &a[k], &n1, &a[k * 
			(k + 1)], &k);
		dsyrk_("L", "T", &k, &k, &c_b15, &a[k * (k + 1)], &k, &c_b12, 
			a, &k);
		dpotrf_("L", &k, a, &k, info);
		if (*info > 0) {
		    *info += k;
		}

	    } else {

/*              SRPA for UPPER, TRANSPOSE and N is even (see paper) */
/*              T1 -> B(0,k+1),     T2 -> B(0,k),   S -> B(0,0) */
/*              T1 -> a(0+k*(k+1)), T2 -> a(0+k*k), S -> a(0+0)); lda=k */

		dpotrf_("U", &k, &a[k * (k + 1)], &k, info);
		if (*info > 0) {
		    return 0;
		}
		dtrsm_("R", "U", "N", "N", &k, &k, &c_b12, &a[k * (k + 1)], &
			k, a, &k);
		dsyrk_("L", "N", &k, &k, &c_b15, a, &k, &c_b12, &a[k * k], &k);
		dpotrf_("L", &k, &a[k * k], &k, info);
		if (*info > 0) {
		    *info += k;
		}

	    }

	}

    }

    return 0;

/*     End of DPFTRF */

} /* dpftrf_ */
Ejemplo n.º 23
0
int main( int argc, char** argv )
{
    obj_t a, b, c;
    obj_t x, y;
    obj_t alpha, beta;
    dim_t m;
    num_t dt_a, dt_b, dt_c;
    num_t dt_alpha, dt_beta;
    int   ii;

#ifdef NBLIS
    bli_init();
#endif


    m = 4000;

    dt_a = BLIS_DOUBLE;
    dt_b = BLIS_DOUBLE;
    dt_c = BLIS_DOUBLE;
    dt_alpha = BLIS_DOUBLE;
    dt_beta = BLIS_DOUBLE;

    {


#ifdef NBLIS
        bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
        bli_obj_create( dt_beta,  1, 1, 0, 0, &beta );

        bli_obj_create( dt_a, m, 1, 0, 0, &x );
        bli_obj_create( dt_a, m, 1, 0, 0, &y );

        bli_obj_create( dt_a, m, m, 0, 0, &a );
        bli_obj_create( dt_b, m, m, 0, 0, &b );
        bli_obj_create( dt_c, m, m, 0, 0, &c );

        bli_randm( &a );
        bli_randm( &b );
        bli_randm( &c );

        bli_setsc(  (2.0/1.0), 0.0, &alpha );
        bli_setsc( -(1.0/1.0), 0.0, &beta );

#endif

#ifdef NBLAS
        x.buffer     = malloc( m * 1 * sizeof( double ) );
        y.buffer     = malloc( m * 1 * sizeof( double ) );

        alpha.buffer = malloc( 1 * sizeof( double ) );
        beta.buffer  = malloc( 1 * sizeof( double ) );
        a.buffer     = malloc( m * m * sizeof( double ) );
        a.m          = m;
        a.n          = m;
        a.cs         = m;
        b.buffer     = malloc( m * m * sizeof( double ) );
        b.m          = m;
        b.n          = m;
        b.cs         = m;
        c.buffer     = malloc( m * m * sizeof( double ) );
        c.m          = m;
        c.n          = m;
        c.cs         = m;

        *((double*)alpha.buffer) =  2.0;
        *((double*)beta.buffer)  = -1.0;
#endif


#ifdef NBLIS

#if NBLIS >= 1
        for ( ii = 0; ii < 2000000000; ++ii )
        {
            bli_gemm( &BLIS_ONE,
                      &a,
                      &b,
                      &BLIS_ONE,
                      &c );
        }
#endif

#if NBLIS >= 2
        {
            bli_hemm( BLIS_LEFT,
                      &BLIS_ONE,
                      &a,
                      &b,
                      &BLIS_ONE,
                      &c );
        }
#endif

#if NBLIS >= 3
        {
            bli_herk( &BLIS_ONE,
                      &a,
                      &BLIS_ONE,
                      &c );
        }
#endif

#if NBLIS >= 4
        {
            bli_her2k( &BLIS_ONE,
                       &a,
                       &b,
                       &BLIS_ONE,
                       &c );
        }
#endif

#if NBLIS >= 5
        {
            bli_trmm( BLIS_LEFT,
                      &BLIS_ONE,
                      &a,
                      &c );
        }
#endif

#if NBLIS >= 6
        {
            bli_trsm( BLIS_LEFT,
                      &BLIS_ONE,
                      &a,
                      &c );
        }
#endif

#endif



#ifdef NBLAS

#if NBLAS >= 1
        for ( ii = 0; ii < 2000000000; ++ii )
        {
            f77_char transa = 'N';
            f77_char transb = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  kk     = bli_obj_width_after_trans( a );
            f77_int  nn     = bli_obj_width( c );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldb    = bli_obj_col_stride( b );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  bp     = bli_obj_buffer( b );
            double*  betap  = bli_obj_buffer( beta );
            double*  cp     = bli_obj_buffer( c );

            dgemm_( &transa,
                    &transb,
                    &mm,
                    &nn,
                    &kk,
                    alphap,
                    ap, &lda,
                    bp, &ldb,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 2
        {
            f77_char side   = 'L';
            f77_char uplo   = 'L';
            f77_int  mm     = bli_obj_length( c );
            f77_int  nn     = bli_obj_width( c );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldb    = bli_obj_col_stride( b );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  bp     = bli_obj_buffer( b );
            double*  betap  = bli_obj_buffer( beta );
            double*  cp     = bli_obj_buffer( c );

            dsymm_( &side,
                    &uplo,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    bp, &ldb,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 3
        {
            f77_char uplo   = 'L';
            f77_char trans  = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  kk     = bli_obj_width( a );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  betap  = bli_obj_buffer( beta );
            double*  cp     = bli_obj_buffer( c );

            dsyrk_( &uplo,
                    &trans,
                    &mm,
                    &kk,
                    alphap,
                    ap, &lda,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 4
        {
            f77_char uplo   = 'L';
            f77_char trans  = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  kk     = bli_obj_width( a );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldb    = bli_obj_col_stride( b );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  bp     = bli_obj_buffer( b );
            double*  betap  = bli_obj_buffer( beta );
            double*  cp     = bli_obj_buffer( c );

            dsyr2k_( &uplo,
                     &trans,
                     &mm,
                     &kk,
                     alphap,
                     ap, &lda,
                     bp, &ldb,
                     betap,
                     cp, &ldc );
        }
#endif

#if NBLAS >= 5
        {
            f77_char side   = 'L';
            f77_char uplo   = 'L';
            f77_char trans  = 'N';
            f77_char diag   = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  nn     = bli_obj_width( c );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  cp     = bli_obj_buffer( c );

            dtrmm_( &side,
                    &uplo,
                    &trans,
                    &diag,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 6
        {
            f77_char side   = 'L';
            f77_char uplo   = 'L';
            f77_char trans  = 'N';
            f77_char diag   = 'N';
            f77_int  mm     = bli_obj_length( c );
            f77_int  nn     = bli_obj_width( c );
            f77_int  lda    = bli_obj_col_stride( a );
            f77_int  ldc    = bli_obj_col_stride( c );
            double*  alphap = bli_obj_buffer( alpha );
            double*  ap     = bli_obj_buffer( a );
            double*  cp     = bli_obj_buffer( c );

            dtrsm_( &side,
                    &uplo,
                    &trans,
                    &diag,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 7
        {
            f77_char  transa = 'N';
            f77_char  transb = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   kk     = bli_obj_width_after_trans( a );
            f77_int   nn     = bli_obj_width( c );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldb    = bli_obj_col_stride( b );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* bp     = bli_obj_buffer( b );
            dcomplex* betap  = bli_obj_buffer( beta );
            dcomplex* cp     = bli_obj_buffer( c );

            zgemm_( &transa,
                    &transb,
                    &mm,
                    &nn,
                    &kk,
                    alphap,
                    ap, &lda,
                    bp, &ldb,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 8
        {
            f77_char  side   = 'L';
            f77_char  uplo   = 'L';
            f77_int   mm     = bli_obj_length( c );
            f77_int   nn     = bli_obj_width( c );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldb    = bli_obj_col_stride( b );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* bp     = bli_obj_buffer( b );
            dcomplex* betap  = bli_obj_buffer( beta );
            dcomplex* cp     = bli_obj_buffer( c );

            zhemm_( &side,
                    &uplo,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    bp, &ldb,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 9
        {
            f77_char  uplo   = 'L';
            f77_char  trans  = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   kk     = bli_obj_width( a );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldc    = bli_obj_col_stride( c );
            double*   alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            double*   betap  = bli_obj_buffer( beta );
            dcomplex* cp     = bli_obj_buffer( c );

            zherk_( &uplo,
                    &trans,
                    &mm,
                    &kk,
                    alphap,
                    ap, &lda,
                    betap,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 10
        {
            f77_char  uplo   = 'L';
            f77_char  trans  = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   kk     = bli_obj_width( a );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldb    = bli_obj_col_stride( b );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* bp     = bli_obj_buffer( b );
            double*   betap  = bli_obj_buffer( beta );
            dcomplex* cp     = bli_obj_buffer( c );

            zher2k_( &uplo,
                     &trans,
                     &mm,
                     &kk,
                     alphap,
                     ap, &lda,
                     bp, &ldb,
                     betap,
                     cp, &ldc );
        }
#endif

#if NBLAS >= 11
        {
            f77_char  side   = 'L';
            f77_char  uplo   = 'L';
            f77_char  trans  = 'N';
            f77_char  diag   = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   nn     = bli_obj_width( c );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* cp     = bli_obj_buffer( c );

            ztrmm_( &side,
                    &uplo,
                    &trans,
                    &diag,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    cp, &ldc );
        }
#endif

#if NBLAS >= 12
        {
            f77_char  side   = 'L';
            f77_char  uplo   = 'L';
            f77_char  trans  = 'N';
            f77_char  diag   = 'N';
            f77_int   mm     = bli_obj_length( c );
            f77_int   nn     = bli_obj_width( c );
            f77_int   lda    = bli_obj_col_stride( a );
            f77_int   ldc    = bli_obj_col_stride( c );
            dcomplex* alphap = bli_obj_buffer( alpha );
            dcomplex* ap     = bli_obj_buffer( a );
            dcomplex* cp     = bli_obj_buffer( c );

            ztrsm_( &side,
                    &uplo,
                    &trans,
                    &diag,
                    &mm,
                    &nn,
                    alphap,
                    ap, &lda,
                    cp, &ldc );
        }
#endif


#endif


#ifdef NBLIS
        bli_obj_free( &x );
        bli_obj_free( &y );

        bli_obj_free( &alpha );
        bli_obj_free( &beta );

        bli_obj_free( &a );
        bli_obj_free( &b );
        bli_obj_free( &c );
#endif

#ifdef NBLAS
        free( x.buffer );
        free( y.buffer );

        free( alpha.buffer );
        free( beta.buffer );

        free( a.buffer );
        free( b.buffer );
        free( c.buffer );
#endif
    }

#ifdef NBLIS
    bli_finalize();
#endif

    return 0;
}
Ejemplo n.º 24
0
/* Subroutine */ int drqt02_(integer *m, integer *n, integer *k, doublereal *
	a, doublereal *af, doublereal *q, doublereal *r__, integer *lda, 
	doublereal *tau, doublereal *work, integer *lwork, doublereal *rwork, 
	doublereal *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, q_dim1, q_offset, r_dim1, 
	    r_offset, i__1, i__2;

    /* Builtin functions */
    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);

    /* Local variables */
    doublereal eps;
    integer info;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
	    integer *, doublereal *, doublereal *, integer *);
    doublereal resid, anorm;
    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *, 
	    doublereal *, doublereal *, integer *, doublereal *, doublereal *, 
	     integer *);
    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
	    integer *, doublereal *, integer *, doublereal *);
    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, integer *), 
	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *, 
	    doublereal *, integer *);
    extern doublereal dlansy_(char *, char *, integer *, doublereal *, 
	    integer *, doublereal *);
    extern /* Subroutine */ int dorgrq_(integer *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, doublereal *, integer *, 
	    integer *);


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  DRQT02 tests DORGRQ, which generates an m-by-n matrix Q with */
/*  orthonornmal rows that is defined as the product of k elementary */
/*  reflectors. */

/*  Given the RQ factorization of an m-by-n matrix A, DRQT02 generates */
/*  the orthogonal matrix Q defined by the factorization of the last k */
/*  rows of A; it compares R(m-k+1:m,n-m+1:n) with */
/*  A(m-k+1:m,1:n)*Q(n-m+1:n,1:n)', and checks that the rows of Q are */
/*  orthonormal. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix Q to be generated.  M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix Q to be generated. */
/*          N >= M >= 0. */

/*  K       (input) INTEGER */
/*          The number of elementary reflectors whose product defines the */
/*          matrix Q. M >= K >= 0. */

/*  A       (input) DOUBLE PRECISION array, dimension (LDA,N) */
/*          The m-by-n matrix A which was factorized by DRQT01. */

/*  AF      (input) DOUBLE PRECISION array, dimension (LDA,N) */
/*          Details of the RQ factorization of A, as returned by DGERQF. */
/*          See DGERQF for further details. */

/*  Q       (workspace) DOUBLE PRECISION array, dimension (LDA,N) */

/*  R       (workspace) DOUBLE PRECISION array, dimension (LDA,M) */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the arrays A, AF, Q and L. LDA >= N. */

/*  TAU     (input) DOUBLE PRECISION array, dimension (M) */
/*          The scalar factors of the elementary reflectors corresponding */
/*          to the RQ factorization in AF. */

/*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK. */

/*  RWORK   (workspace) DOUBLE PRECISION array, dimension (M) */

/*  RESULT  (output) DOUBLE PRECISION array, dimension (2) */
/*          The test ratios: */
/*          RESULT(1) = norm( R - A*Q' ) / ( N * norm(A) * EPS ) */
/*          RESULT(2) = norm( I - Q*Q' ) / ( N * EPS ) */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Scalars in Common .. */
/*     .. */
/*     .. Common blocks .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Quick return if possible */

    /* Parameter adjustments */
    r_dim1 = *lda;
    r_offset = 1 + r_dim1;
    r__ -= r_offset;
    q_dim1 = *lda;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    af_dim1 = *lda;
    af_offset = 1 + af_dim1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    --work;
    --rwork;
    --result;

    /* Function Body */
    if (*m == 0 || *n == 0 || *k == 0) {
	result[1] = 0.;
	result[2] = 0.;
	return 0;
    }

    eps = dlamch_("Epsilon");

/*     Copy the last k rows of the factorization to the array Q */

    dlaset_("Full", m, n, &c_b4, &c_b4, &q[q_offset], lda);
    if (*k < *n) {
	i__1 = *n - *k;
	dlacpy_("Full", k, &i__1, &af[*m - *k + 1 + af_dim1], lda, &q[*m - *k 
		+ 1 + q_dim1], lda);
    }
    if (*k > 1) {
	i__1 = *k - 1;
	i__2 = *k - 1;
	dlacpy_("Lower", &i__1, &i__2, &af[*m - *k + 2 + (*n - *k + 1) * 
		af_dim1], lda, &q[*m - *k + 2 + (*n - *k + 1) * q_dim1], lda);
    }

/*     Generate the last n rows of the matrix Q */

    s_copy(srnamc_1.srnamt, "DORGRQ", (ftnlen)6, (ftnlen)6);
    dorgrq_(m, n, k, &q[q_offset], lda, &tau[*m - *k + 1], &work[1], lwork, &
	    info);

/*     Copy R(m-k+1:m,n-m+1:n) */

    dlaset_("Full", k, m, &c_b10, &c_b10, &r__[*m - *k + 1 + (*n - *m + 1) * 
	    r_dim1], lda);
    dlacpy_("Upper", k, k, &af[*m - *k + 1 + (*n - *k + 1) * af_dim1], lda, &
	    r__[*m - *k + 1 + (*n - *k + 1) * r_dim1], lda);

/*     Compute R(m-k+1:m,n-m+1:n) - A(m-k+1:m,1:n) * Q(n-m+1:n,1:n)' */

    dgemm_("No transpose", "Transpose", k, m, n, &c_b15, &a[*m - *k + 1 + 
	    a_dim1], lda, &q[q_offset], lda, &c_b16, &r__[*m - *k + 1 + (*n - 
	    *m + 1) * r_dim1], lda);

/*     Compute norm( R - A*Q' ) / ( N * norm(A) * EPS ) . */

    anorm = dlange_("1", k, n, &a[*m - *k + 1 + a_dim1], lda, &rwork[1]);
    resid = dlange_("1", k, m, &r__[*m - *k + 1 + (*n - *m + 1) * r_dim1], 
	    lda, &rwork[1]);
    if (anorm > 0.) {
	result[1] = resid / (doublereal) max(1,*n) / anorm / eps;
    } else {
	result[1] = 0.;
    }

/*     Compute I - Q*Q' */

    dlaset_("Full", m, m, &c_b10, &c_b16, &r__[r_offset], lda);
    dsyrk_("Upper", "No transpose", m, n, &c_b15, &q[q_offset], lda, &c_b16, &
	    r__[r_offset], lda);

/*     Compute norm( I - Q*Q' ) / ( N * EPS ) . */

    resid = dlansy_("1", "Upper", m, &r__[r_offset], lda, &rwork[1]);

    result[2] = resid / (doublereal) max(1,*n) / eps;

    return 0;

/*     End of DRQT02 */

} /* drqt02_ */
Ejemplo n.º 25
0
/* Subroutine */ int dpbtrf_(char *uplo, integer *n, integer *kd, doublereal *
                             ab, integer *ldab, integer *info)
{
    /*  -- LAPACK routine (version 3.0) --
           Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
           Courant Institute, Argonne National Lab, and Rice University
           March 31, 1993


        Purpose
        =======

        DPBTRF computes the Cholesky factorization of a real symmetric
        positive definite band matrix A.

        The factorization has the form
           A = U**T * U,  if UPLO = 'U', or
           A = L  * L**T,  if UPLO = 'L',
        where U is an upper triangular matrix and L is lower triangular.

        Arguments
        =========

        UPLO    (input) CHARACTER*1
                = 'U':  Upper triangle of A is stored;
                = 'L':  Lower triangle of A is stored.

        N       (input) INTEGER
                The order of the matrix A.  N >= 0.

        KD      (input) INTEGER
                The number of superdiagonals of the matrix A if UPLO = 'U',
                or the number of subdiagonals if UPLO = 'L'.  KD >= 0.

        AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
                On entry, the upper or lower triangle of the symmetric band
                matrix A, stored in the first KD+1 rows of the array.  The
                j-th column of A is stored in the j-th column of the array AB
                as follows:
                if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
                if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).

                On exit, if INFO = 0, the triangular factor U or L from the
                Cholesky factorization A = U**T*U or A = L*L**T of the band
                matrix A, in the same storage format as A.

        LDAB    (input) INTEGER
                The leading dimension of the array AB.  LDAB >= KD+1.

        INFO    (output) INTEGER
                = 0:  successful exit
                < 0:  if INFO = -i, the i-th argument had an illegal value
                > 0:  if INFO = i, the leading minor of order i is not
                      positive definite, and the factorization could not be
                      completed.

        Further Details
        ===============

        The band storage scheme is illustrated by the following example, when
        N = 6, KD = 2, and UPLO = 'U':

        On entry:                       On exit:

            *    *   a13  a24  a35  a46      *    *   u13  u24  u35  u46
            *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
           a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66

        Similarly, if UPLO = 'L' the format of A is as follows:

        On entry:                       On exit:

           a11  a22  a33  a44  a55  a66     l11  l22  l33  l44  l55  l66
           a21  a32  a43  a54  a65   *      l21  l32  l43  l54  l65   *
           a31  a42  a53  a64   *    *      l31  l42  l53  l64   *    *

        Array elements marked * are not used by the routine.

        Contributed by
        Peter Mayes and Giuseppe Radicati, IBM ECSEC, Rome, March 23, 1989

        =====================================================================


           Test the input parameters.

           Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static integer c_n1 = -1;
    static doublereal c_b18 = 1.;
    static doublereal c_b21 = -1.;
    static integer c__33 = 33;

    /* System generated locals */
    integer ab_dim1, ab_offset, i__1, i__2, i__3, i__4;
    /* Local variables */
    static doublereal work[1056]	/* was [33][32] */;
    static integer i__, j;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
                                       integer *, doublereal *, doublereal *, integer *, doublereal *,
                                       integer *, doublereal *, doublereal *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
                                       integer *, integer *, doublereal *, doublereal *, integer *,
                                       doublereal *, integer *);
    static integer i2, i3;
    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *,
                                       doublereal *, doublereal *, integer *, doublereal *, doublereal *,
                                       integer *), dpbtf2_(char *, integer *, integer *,
                                               doublereal *, integer *, integer *), dpotf2_(char *,
                                                       integer *, doublereal *, integer *, integer *);
    static integer ib, nb, ii, jj;
    extern /* Subroutine */ int xerbla_(char *, integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
                           integer *, integer *, ftnlen, ftnlen);
#define work_ref(a_1,a_2) work[(a_2)*33 + a_1 - 34]
#define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1]


    ab_dim1 = *ldab;
    ab_offset = 1 + ab_dim1 * 1;
    ab -= ab_offset;

    /* Function Body */
    *info = 0;
    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*kd < 0) {
        *info = -3;
    } else if (*ldab < *kd + 1) {
        *info = -5;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("DPBTRF", &i__1);
        return 0;
    }

    /*     Quick return if possible */

    if (*n == 0) {
        return 0;
    }

    /*     Determine the block size for this environment */

    nb = ilaenv_(&c__1, "DPBTRF", uplo, n, kd, &c_n1, &c_n1, (ftnlen)6, (
                     ftnlen)1);

    /*     The block size must not exceed the semi-bandwidth KD, and must not
           exceed the limit set by the size of the local array WORK. */

    nb = min(nb,32);

    if (nb <= 1 || nb > *kd) {

        /*        Use unblocked code */

        dpbtf2_(uplo, n, kd, &ab[ab_offset], ldab, info);
    } else {

        /*        Use blocked code */

        if (lsame_(uplo, "U")) {

            /*           Compute the Cholesky factorization of a symmetric band
                         matrix, given the upper triangle of the matrix in band
                         storage.

                         Zero the upper triangle of the work array. */

            i__1 = nb;
            for (j = 1; j <= i__1; ++j) {
                i__2 = j - 1;
                for (i__ = 1; i__ <= i__2; ++i__) {
                    work_ref(i__, j) = 0.;
                    /* L10: */
                }
                /* L20: */
            }

            /*           Process the band matrix one diagonal block at a time. */

            i__1 = *n;
            i__2 = nb;
            for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
                /* Computing MIN */
                i__3 = nb, i__4 = *n - i__ + 1;
                ib = min(i__3,i__4);

                /*              Factorize the diagonal block */

                i__3 = *ldab - 1;
                dpotf2_(uplo, &ib, &ab_ref(*kd + 1, i__), &i__3, &ii);
                if (ii != 0) {
                    *info = i__ + ii - 1;
                    goto L150;
                }
                if (i__ + ib <= *n) {

                    /*                 Update the relevant part of the trailing submatrix.
                                       If A11 denotes the diagonal block which has just been
                                       factorized, then we need to update the remaining
                                       blocks in the diagram:

                                          A11   A12   A13
                                                A22   A23
                                                      A33

                                       The numbers of rows and columns in the partitioning
                                       are IB, I2, I3 respectively. The blocks A12, A22 and
                                       A23 are empty if IB = KD. The upper triangle of A13
                                       lies outside the band.

                       Computing MIN */
                    i__3 = *kd - ib, i__4 = *n - i__ - ib + 1;
                    i2 = min(i__3,i__4);
                    /* Computing MIN */
                    i__3 = ib, i__4 = *n - i__ - *kd + 1;
                    i3 = min(i__3,i__4);

                    if (i2 > 0) {

                        /*                    Update A12 */

                        i__3 = *ldab - 1;
                        i__4 = *ldab - 1;
                        dtrsm_("Left", "Upper", "Transpose", "Non-unit", &ib,
                               &i2, &c_b18, &ab_ref(*kd + 1, i__), &i__3, &
                               ab_ref(*kd + 1 - ib, i__ + ib), &i__4);

                        /*                    Update A22 */

                        i__3 = *ldab - 1;
                        i__4 = *ldab - 1;
                        dsyrk_("Upper", "Transpose", &i2, &ib, &c_b21, &
                               ab_ref(*kd + 1 - ib, i__ + ib), &i__3, &c_b18,
                               &ab_ref(*kd + 1, i__ + ib), &i__4);
                    }

                    if (i3 > 0) {

                        /*                    Copy the lower triangle of A13 into the work array. */

                        i__3 = i3;
                        for (jj = 1; jj <= i__3; ++jj) {
                            i__4 = ib;
                            for (ii = jj; ii <= i__4; ++ii) {
                                work_ref(ii, jj) = ab_ref(ii - jj + 1, jj +
                                                          i__ + *kd - 1);
                                /* L30: */
                            }
                            /* L40: */
                        }

                        /*                    Update A13 (in the work array). */

                        i__3 = *ldab - 1;
                        dtrsm_("Left", "Upper", "Transpose", "Non-unit", &ib,
                               &i3, &c_b18, &ab_ref(*kd + 1, i__), &i__3,
                               work, &c__33);

                        /*                    Update A23 */

                        if (i2 > 0) {
                            i__3 = *ldab - 1;
                            i__4 = *ldab - 1;
                            dgemm_("Transpose", "No Transpose", &i2, &i3, &ib,
                                   &c_b21, &ab_ref(*kd + 1 - ib, i__ + ib),
                                   &i__3, work, &c__33, &c_b18, &ab_ref(ib +
                                           1, i__ + *kd), &i__4);
                        }

                        /*                    Update A33 */

                        i__3 = *ldab - 1;
                        dsyrk_("Upper", "Transpose", &i3, &ib, &c_b21, work, &
                               c__33, &c_b18, &ab_ref(*kd + 1, i__ + *kd), &
                               i__3);

                        /*                    Copy the lower triangle of A13 back into place. */

                        i__3 = i3;
                        for (jj = 1; jj <= i__3; ++jj) {
                            i__4 = ib;
                            for (ii = jj; ii <= i__4; ++ii) {
                                ab_ref(ii - jj + 1, jj + i__ + *kd - 1) =
                                    work_ref(ii, jj);
                                /* L50: */
                            }
                            /* L60: */
                        }
                    }
                }
                /* L70: */
            }
        } else {

            /*           Compute the Cholesky factorization of a symmetric band
                         matrix, given the lower triangle of the matrix in band
                         storage.

                         Zero the lower triangle of the work array. */

            i__2 = nb;
            for (j = 1; j <= i__2; ++j) {
                i__1 = nb;
                for (i__ = j + 1; i__ <= i__1; ++i__) {
                    work_ref(i__, j) = 0.;
                    /* L80: */
                }
                /* L90: */
            }

            /*           Process the band matrix one diagonal block at a time. */

            i__2 = *n;
            i__1 = nb;
            for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
                /* Computing MIN */
                i__3 = nb, i__4 = *n - i__ + 1;
                ib = min(i__3,i__4);

                /*              Factorize the diagonal block */

                i__3 = *ldab - 1;
                dpotf2_(uplo, &ib, &ab_ref(1, i__), &i__3, &ii);
                if (ii != 0) {
                    *info = i__ + ii - 1;
                    goto L150;
                }
                if (i__ + ib <= *n) {

                    /*                 Update the relevant part of the trailing submatrix.
                                       If A11 denotes the diagonal block which has just been
                                       factorized, then we need to update the remaining
                                       blocks in the diagram:

                                          A11
                                          A21   A22
                                          A31   A32   A33

                                       The numbers of rows and columns in the partitioning
                                       are IB, I2, I3 respectively. The blocks A21, A22 and
                                       A32 are empty if IB = KD. The lower triangle of A31
                                       lies outside the band.

                       Computing MIN */
                    i__3 = *kd - ib, i__4 = *n - i__ - ib + 1;
                    i2 = min(i__3,i__4);
                    /* Computing MIN */
                    i__3 = ib, i__4 = *n - i__ - *kd + 1;
                    i3 = min(i__3,i__4);

                    if (i2 > 0) {

                        /*                    Update A21 */

                        i__3 = *ldab - 1;
                        i__4 = *ldab - 1;
                        dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i2,
                               &ib, &c_b18, &ab_ref(1, i__), &i__3, &ab_ref(
                                   ib + 1, i__), &i__4);

                        /*                    Update A22 */

                        i__3 = *ldab - 1;
                        i__4 = *ldab - 1;
                        dsyrk_("Lower", "No Transpose", &i2, &ib, &c_b21, &
                               ab_ref(ib + 1, i__), &i__3, &c_b18, &ab_ref(1,
                                       i__ + ib), &i__4);
                    }

                    if (i3 > 0) {

                        /*                    Copy the upper triangle of A31 into the work array. */

                        i__3 = ib;
                        for (jj = 1; jj <= i__3; ++jj) {
                            i__4 = min(jj,i3);
                            for (ii = 1; ii <= i__4; ++ii) {
                                work_ref(ii, jj) = ab_ref(*kd + 1 - jj + ii,
                                                          jj + i__ - 1);
                                /* L100: */
                            }
                            /* L110: */
                        }

                        /*                    Update A31 (in the work array). */

                        i__3 = *ldab - 1;
                        dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i3,
                               &ib, &c_b18, &ab_ref(1, i__), &i__3, work, &
                               c__33);

                        /*                    Update A32 */

                        if (i2 > 0) {
                            i__3 = *ldab - 1;
                            i__4 = *ldab - 1;
                            dgemm_("No transpose", "Transpose", &i3, &i2, &ib,
                                   &c_b21, work, &c__33, &ab_ref(ib + 1,
                                                                 i__), &i__3, &c_b18, &ab_ref(*kd + 1 - ib,
                                                                         i__ + ib), &i__4);
                        }

                        /*                    Update A33 */

                        i__3 = *ldab - 1;
                        dsyrk_("Lower", "No Transpose", &i3, &ib, &c_b21,
                               work, &c__33, &c_b18, &ab_ref(1, i__ + *kd), &
                               i__3);

                        /*                    Copy the upper triangle of A31 back into place. */

                        i__3 = ib;
                        for (jj = 1; jj <= i__3; ++jj) {
                            i__4 = min(jj,i3);
                            for (ii = 1; ii <= i__4; ++ii) {
                                ab_ref(*kd + 1 - jj + ii, jj + i__ - 1) =
                                    work_ref(ii, jj);
                                /* L120: */
                            }
                            /* L130: */
                        }
                    }
                }
                /* L140: */
            }
        }
    }
    return 0;

L150:
    return 0;

    /*     End of DPBTRF */

} /* dpbtrf_ */
Ejemplo n.º 26
0
/* Subroutine */ int dpotrf_(char *uplo, integer *n, doublereal *a, integer *
                             lda, integer *info)
{
    /* System generated locals */
    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;

    /* Local variables */
    integer j, jb, nb;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
                                       integer *, doublereal *, doublereal *, integer *, doublereal *,
                                       integer *, doublereal *, doublereal *, integer *);
    extern logical lsame_(char *, char *);
    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
                                       integer *, integer *, doublereal *, doublereal *, integer *,
                                       doublereal *, integer *);
    logical upper;
    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *,
                                       doublereal *, doublereal *, integer *, doublereal *, doublereal *,
                                       integer *), dpotf2_(char *, integer *,
                                               doublereal *, integer *, integer *), xerbla_(char *,
                                                       integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
                           integer *, integer *);


    /*  -- LAPACK routine (version 3.2) -- */
    /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
    /*     November 2006 */

    /*     .. Scalar Arguments .. */
    /*     .. */
    /*     .. Array Arguments .. */
    /*     .. */

    /*  Purpose */
    /*  ======= */

    /*  DPOTRF computes the Cholesky factorization of a real symmetric */
    /*  positive definite matrix A. */

    /*  The factorization has the form */
    /*     A = U**T * U,  if UPLO = 'U', or */
    /*     A = L  * L**T,  if UPLO = 'L', */
    /*  where U is an upper triangular matrix and L is lower triangular. */

    /*  This is the block version of the algorithm, calling Level 3 BLAS. */

    /*  Arguments */
    /*  ========= */

    /*  UPLO    (input) CHARACTER*1 */
    /*          = 'U':  Upper triangle of A is stored; */
    /*          = 'L':  Lower triangle of A is stored. */

    /*  N       (input) INTEGER */
    /*          The order of the matrix A.  N >= 0. */

    /*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
    /*          On entry, the symmetric matrix A.  If UPLO = 'U', the leading */
    /*          N-by-N upper triangular part of A contains the upper */
    /*          triangular part of the matrix A, and the strictly lower */
    /*          triangular part of A is not referenced.  If UPLO = 'L', the */
    /*          leading N-by-N lower triangular part of A contains the lower */
    /*          triangular part of the matrix A, and the strictly upper */
    /*          triangular part of A is not referenced. */

    /*          On exit, if INFO = 0, the factor U or L from the Cholesky */
    /*          factorization A = U**T*U or A = L*L**T. */

    /*  LDA     (input) INTEGER */
    /*          The leading dimension of the array A.  LDA >= max(1,N). */

    /*  INFO    (output) INTEGER */
    /*          = 0:  successful exit */
    /*          < 0:  if INFO = -i, the i-th argument had an illegal value */
    /*          > 0:  if INFO = i, the leading minor of order i is not */
    /*                positive definite, and the factorization could not be */
    /*                completed. */

    /*  ===================================================================== */

    /*     .. Parameters .. */
    /*     .. */
    /*     .. Local Scalars .. */
    /*     .. */
    /*     .. External Functions .. */
    /*     .. */
    /*     .. External Subroutines .. */
    /*     .. */
    /*     .. Intrinsic Functions .. */
    /*     .. */
    /*     .. Executable Statements .. */

    /*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;

    /* Function Body */
    *info = 0;
    upper = lsame_(uplo, "U");
    if (! upper && ! lsame_(uplo, "L")) {
        *info = -1;
    } else if (*n < 0) {
        *info = -2;
    } else if (*lda < max(1,*n)) {
        *info = -4;
    }
    if (*info != 0) {
        i__1 = -(*info);
        xerbla_("DPOTRF", &i__1);
        return 0;
    }

    /*     Quick return if possible */

    if (*n == 0) {
        return 0;
    }

    /*     Determine the block size for this environment. */

    nb = ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1);
    if (nb <= 1 || nb >= *n) {

        /*        Use unblocked code. */

        dpotf2_(uplo, n, &a[a_offset], lda, info);
    } else {

        /*        Use blocked code. */

        if (upper) {

            /*           Compute the Cholesky factorization A = U'*U. */

            i__1 = *n;
            i__2 = nb;
            for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {

                /*              Update and factorize the current diagonal block and test */
                /*              for non-positive-definiteness. */

                /* Computing MIN */
                i__3 = nb, i__4 = *n - j + 1;
                jb = min(i__3,i__4);
                i__3 = j - 1;
                dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b13, &a[j *
                        a_dim1 + 1], lda, &c_b14, &a[j + j * a_dim1], lda);
                dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
                if (*info != 0) {
                    goto L30;
                }
                if (j + jb <= *n) {

                    /*                 Compute the current block row. */

                    i__3 = *n - j - jb + 1;
                    i__4 = j - 1;
                    dgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, &
                           c_b13, &a[j * a_dim1 + 1], lda, &a[(j + jb) *
                                   a_dim1 + 1], lda, &c_b14, &a[j + (j + jb) *
                                           a_dim1], lda);
                    i__3 = *n - j - jb + 1;
                    dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, &
                           i__3, &c_b14, &a[j + j * a_dim1], lda, &a[j + (j
                                   + jb) * a_dim1], lda);
                }
                /* L10: */
            }

        } else {

            /*           Compute the Cholesky factorization A = L*L'. */

            i__2 = *n;
            i__1 = nb;
            for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {

                /*              Update and factorize the current diagonal block and test */
                /*              for non-positive-definiteness. */

                /* Computing MIN */
                i__3 = nb, i__4 = *n - j + 1;
                jb = min(i__3,i__4);
                i__3 = j - 1;
                dsyrk_("Lower", "No transpose", &jb, &i__3, &c_b13, &a[j +
                        a_dim1], lda, &c_b14, &a[j + j * a_dim1], lda);
                dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
                if (*info != 0) {
                    goto L30;
                }
                if (j + jb <= *n) {

                    /*                 Compute the current block column. */

                    i__3 = *n - j - jb + 1;
                    i__4 = j - 1;
                    dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &
                           c_b13, &a[j + jb + a_dim1], lda, &a[j + a_dim1],
                           lda, &c_b14, &a[j + jb + j * a_dim1], lda);
                    i__3 = *n - j - jb + 1;
                    dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, &
                           jb, &c_b14, &a[j + j * a_dim1], lda, &a[j + jb +
                                   j * a_dim1], lda);
                }
                /* L20: */
            }
        }
    }
    goto L40;

L30:
    *info = *info + j - 1;

L40:
    return 0;

    /*     End of DPOTRF */

} /* dpotrf_ */
Ejemplo n.º 27
0
int main( int argc, char** argv )
{
	obj_t a, c;
	obj_t c_save;
	obj_t alpha, beta;
	dim_t m, k;
	dim_t p;
	dim_t p_begin, p_end, p_inc;
	int   m_input, k_input;
	num_t dt;
	int   r, n_repeats;
	uplo_t uploc;
	trans_t transa;
	f77_char f77_uploc;
	f77_char f77_transa;

	double dtime;
	double dtime_save;
	double gflops;

	bli_init();

	//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );

	n_repeats = 3;

#ifndef PRINT
	p_begin = 200;
	p_end   = 2000;
	p_inc   = 200;

	m_input = -1;
	k_input = -1;
#else
	p_begin = 16;
	p_end   = 16;
	p_inc   = 1;

	m_input = 3;
	k_input = 1;
#endif

#if 1
	//dt = BLIS_FLOAT;
	dt = BLIS_DOUBLE;
#else
	//dt = BLIS_SCOMPLEX;
	dt = BLIS_DCOMPLEX;
#endif

	uploc = BLIS_LOWER;
	//uploc = BLIS_UPPER;

	transa = BLIS_NO_TRANSPOSE;

	bli_param_map_blis_to_netlib_uplo( uploc, &f77_uploc );
	bli_param_map_blis_to_netlib_trans( transa, &f77_transa );


	for ( p = p_begin; p <= p_end; p += p_inc )
	{
		if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
		else               m =     ( dim_t )    m_input;
		if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
		else               k =     ( dim_t )    k_input;

		bli_obj_create( dt, 1, 1, 0, 0, &alpha );
		bli_obj_create( dt, 1, 1, 0, 0, &beta );

		if ( bli_does_trans( transa ) )
			bli_obj_create( dt, k, m, 0, 0, &a );
		else
			bli_obj_create( dt, m, k, 0, 0, &a );
		bli_obj_create( dt, m, m, 0, 0, &c );
		bli_obj_create( dt, m, m, 0, 0, &c_save );

		bli_randm( &a );
		bli_randm( &c );

		bli_obj_set_struc( BLIS_HERMITIAN, c );
		bli_obj_set_uplo( uploc, c );

		bli_obj_set_conjtrans( transa, a );


		bli_setsc(  (2.0/1.0), 0.0, &alpha );
		bli_setsc( -(1.0/1.0), 0.0, &beta );


		bli_copym( &c, &c_save );
	
		dtime_save = 1.0e9;

		for ( r = 0; r < n_repeats; ++r )
		{
			bli_copym( &c_save, &c );


			dtime = bli_clock();


#ifdef PRINT
			bli_printm( "a", &a, "%4.1f", "" );
			bli_printm( "c", &c, "%4.1f", "" );
#endif

#ifdef BLIS

			bli_herk( &alpha,
			          &a,
			          &beta,
			          &c );

#else
		if ( bli_is_float( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldc    = bli_obj_col_stride( c );
			float*   alphap = bli_obj_buffer( alpha );
			float*   ap     = bli_obj_buffer( a );
			float*   betap  = bli_obj_buffer( beta );
			float*   cp     = bli_obj_buffer( c );

			ssyrk_( &f77_uploc,
			        &f77_transa,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_double( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldc    = bli_obj_col_stride( c );
			double*  alphap = bli_obj_buffer( alpha );
			double*  ap     = bli_obj_buffer( a );
			double*  betap  = bli_obj_buffer( beta );
			double*  cp     = bli_obj_buffer( c );

			dsyrk_( &f77_uploc,
			        &f77_transa,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_scomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldc    = bli_obj_col_stride( c );
			float*     alphap = bli_obj_buffer( alpha );
			scomplex*  ap     = bli_obj_buffer( a );
			float*     betap  = bli_obj_buffer( beta );
			scomplex*  cp     = bli_obj_buffer( c );

			cherk_( &f77_uploc,
			        &f77_transa,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        betap,
			        cp, &ldc );
		}
		else if ( bli_is_dcomplex( dt ) )
		{
			f77_int  mm     = bli_obj_length( c );
			f77_int  kk     = bli_obj_width_after_trans( a );
			f77_int  lda    = bli_obj_col_stride( a );
			f77_int  ldc    = bli_obj_col_stride( c );
			double*    alphap = bli_obj_buffer( alpha );
			dcomplex*  ap     = bli_obj_buffer( a );
			double*    betap  = bli_obj_buffer( beta );
			dcomplex*  cp     = bli_obj_buffer( c );

			zherk_( &f77_uploc,
			        &f77_transa,
			        &mm,
			        &kk,
			        alphap,
			        ap, &lda,
			        betap,
			        cp, &ldc );
		}
#endif

#ifdef PRINT
			bli_printm( "c after", &c, "%4.1f", "" );
			exit(1);
#endif


			dtime_save = bli_clock_min_diff( dtime_save, dtime );
		}

		gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 );

		if ( bli_is_complex( dt ) ) gflops *= 4.0;

#ifdef BLIS
		printf( "data_herk_blis" );
#else
		printf( "data_herk_%s", BLAS );
#endif
		printf( "( %2lu, 1:4 ) = [ %4lu %4lu  %10.3e  %6.3f ];\n",
		        ( unsigned long )(p - p_begin + 1)/p_inc + 1,
		        ( unsigned long )m,
		        ( unsigned long )k, dtime_save, gflops );


		bli_obj_free( &alpha );
		bli_obj_free( &beta );

		bli_obj_free( &a );
		bli_obj_free( &c );
		bli_obj_free( &c_save );
	}

	bli_finalize();

	return 0;
}
Ejemplo n.º 28
0
 int dsfrk_(char *transr, char *uplo, char *trans, int *n, 
	 int *k, double *alpha, double *a, int *lda, 
	double *beta, double *c__)
{
    /* System generated locals */
    int a_dim1, a_offset, i__1;

    /* Local variables */
    int j, n1, n2, nk, info;
    int normaltransr;
    extern  int dgemm_(char *, char *, int *, int *, 
	    int *, double *, double *, int *, double *, 
	    int *, double *, double *, int *);
    extern int lsame_(char *, char *);
    int nrowa;
    int lower;
    extern  int dsyrk_(char *, char *, int *, int *, 
	    double *, double *, int *, double *, double *, 
	     int *), xerbla_(char *, int *);
    int nisodd, notrans;


/*  -- LAPACK routine (version 3.2)                                    -- */

/*  -- Contributed by Julien Langou of the Univ. of Colorado Denver    -- */
/*  -- November 2008                                                   -- */

/*  -- LAPACK is a software package provided by Univ. of Tennessee,    -- */
/*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */

/*     .. */
/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  Level 3 BLAS like routine for C in RFP Format. */

/*  DSFRK performs one of the symmetric rank--k operations */

/*     C := alpha*A*A' + beta*C, */

/*  or */

/*     C := alpha*A'*A + beta*C, */

/*  where alpha and beta are float scalars, C is an n--by--n symmetric */
/*  matrix and A is an n--by--k matrix in the first case and a k--by--n */
/*  matrix in the second case. */

/*  Arguments */
/*  ========== */

/*  TRANSR    (input) CHARACTER */
/*          = 'N':  The Normal Form of RFP A is stored; */
/*          = 'T':  The Transpose Form of RFP A is stored. */

/*  UPLO   - (input) CHARACTER */
/*           On  entry, UPLO specifies whether the upper or lower */
/*           triangular part of the array C is to be referenced as */
/*           follows: */

/*              UPLO = 'U' or 'u'   Only the upper triangular part of C */
/*                                  is to be referenced. */

/*              UPLO = 'L' or 'l'   Only the lower triangular part of C */
/*                                  is to be referenced. */

/*           Unchanged on exit. */

/*  TRANS  - (input) CHARACTER */
/*           On entry, TRANS specifies the operation to be performed as */
/*           follows: */

/*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C. */

/*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C. */

/*           Unchanged on exit. */

/*  N      - (input) INTEGER. */
/*           On entry, N specifies the order of the matrix C. N must be */
/*           at least zero. */
/*           Unchanged on exit. */

/*  K      - (input) INTEGER. */
/*           On entry with TRANS = 'N' or 'n', K specifies the number */
/*           of  columns of the matrix A, and on entry with TRANS = 'T' */
/*           or 't', K specifies the number of rows of the matrix A. K */
/*           must be at least zero. */
/*           Unchanged on exit. */

/*  ALPHA  - (input) DOUBLE PRECISION. */
/*           On entry, ALPHA specifies the scalar alpha. */
/*           Unchanged on exit. */

/*  A      - (input) DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where KA */
/*           is K  when TRANS = 'N' or 'n', and is N otherwise. Before */
/*           entry with TRANS = 'N' or 'n', the leading N--by--K part of */
/*           the array A must contain the matrix A, otherwise the leading */
/*           K--by--N part of the array A must contain the matrix A. */
/*           Unchanged on exit. */

/*  LDA    - (input) INTEGER. */
/*           On entry, LDA specifies the first dimension of A as declared */
/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
/*           then  LDA must be at least  MAX( 1, n ), otherwise  LDA must */
/*           be at least  MAX( 1, k ). */
/*           Unchanged on exit. */

/*  BETA   - (input) DOUBLE PRECISION. */
/*           On entry, BETA specifies the scalar beta. */
/*           Unchanged on exit. */


/*  C      - (input/output) DOUBLE PRECISION array, dimension ( NT ); */
/*           NT = N*(N+1)/2. On entry, the symmetric matrix C in RFP */
/*           Format. RFP Format is described by TRANSR, UPLO and N. */

/*  Arguments */
/*  ========== */

/*     .. */
/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Executable Statements .. */

/*     Test the input parameters. */

    /* Parameter adjustments */
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --c__;

    /* Function Body */
    info = 0;
    normaltransr = lsame_(transr, "N");
    lower = lsame_(uplo, "L");
    notrans = lsame_(trans, "N");

    if (notrans) {
	nrowa = *n;
    } else {
	nrowa = *k;
    }

    if (! normaltransr && ! lsame_(transr, "T")) {
	info = -1;
    } else if (! lower && ! lsame_(uplo, "U")) {
	info = -2;
    } else if (! notrans && ! lsame_(trans, "T")) {
	info = -3;
    } else if (*n < 0) {
	info = -4;
    } else if (*k < 0) {
	info = -5;
    } else if (*lda < MAX(1,nrowa)) {
	info = -8;
    }
    if (info != 0) {
	i__1 = -info;
	xerbla_("DSFRK ", &i__1);
	return 0;
    }

/*     Quick return if possible. */

/*     The quick return case: ((ALPHA.EQ.0).AND.(BETA.NE.ZERO)) is not */
/*     done (it is in DSYRK for example) and left in the general case. */

    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
	return 0;
    }

    if (*alpha == 0. && *beta == 0.) {
	i__1 = *n * (*n + 1) / 2;
	for (j = 1; j <= i__1; ++j) {
	    c__[j] = 0.;
	}
	return 0;
    }

/*     C is N-by-N. */
/*     If N is odd, set NISODD = .TRUE., and N1 and N2. */
/*     If N is even, NISODD = .FALSE., and NK. */

    if (*n % 2 == 0) {
	nisodd = FALSE;
	nk = *n / 2;
    } else {
	nisodd = TRUE;
	if (lower) {
	    n2 = *n / 2;
	    n1 = *n - n2;
	} else {
	    n1 = *n / 2;
	    n2 = *n - n1;
	}
    }

    if (nisodd) {

/*        N is odd */

	if (normaltransr) {

/*           N is odd and TRANSR = 'N' */

	    if (lower) {

/*              N is odd, TRANSR = 'N', and UPLO = 'L' */

		if (notrans) {

/*                 N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' */

		    dsyrk_("L", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[1], n);
		    dsyrk_("U", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, 
			    beta, &c__[*n + 1], n);
		    dgemm_("N", "T", &n2, &n1, k, alpha, &a[n1 + 1 + a_dim1], 
			    lda, &a[a_dim1 + 1], lda, beta, &c__[n1 + 1], n);

		} else {

/*                 N is odd, TRANSR = 'N', UPLO = 'L', and TRANS = 'T' */

		    dsyrk_("L", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[1], n);
		    dsyrk_("U", "T", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], 
			     lda, beta, &c__[*n + 1], n)
			    ;
		    dgemm_("T", "N", &n2, &n1, k, alpha, &a[(n1 + 1) * a_dim1 
			    + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[n1 + 1]
, n);

		}

	    } else {

/*              N is odd, TRANSR = 'N', and UPLO = 'U' */

		if (notrans) {

/*                 N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' */

		    dsyrk_("L", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[n2 + 1], n);
		    dsyrk_("U", "N", &n2, k, alpha, &a[n2 + a_dim1], lda, 
			    beta, &c__[n1 + 1], n);
		    dgemm_("N", "T", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[n2 + a_dim1], lda, beta, &c__[1], n);

		} else {

/*                 N is odd, TRANSR = 'N', UPLO = 'U', and TRANS = 'T' */

		    dsyrk_("L", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[n2 + 1], n);
		    dsyrk_("U", "T", &n2, k, alpha, &a[n2 * a_dim1 + 1], lda, 
			    beta, &c__[n1 + 1], n);
		    dgemm_("T", "N", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[n2 * a_dim1 + 1], lda, beta, &c__[1], n);

		}

	    }

	} else {

/*           N is odd, and TRANSR = 'T' */

	    if (lower) {

/*              N is odd, TRANSR = 'T', and UPLO = 'L' */

		if (notrans) {

/*                 N is odd, TRANSR = 'T', UPLO = 'L', and TRANS = 'N' */

		    dsyrk_("U", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[1], &n1);
		    dsyrk_("L", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, 
			    beta, &c__[2], &n1);
		    dgemm_("N", "T", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[n1 + 1 + a_dim1], lda, beta, &c__[n1 * n1 + 1], 
			     &n1);

		} else {

/*                 N is odd, TRANSR = 'T', UPLO = 'L', and TRANS = 'T' */

		    dsyrk_("U", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[1], &n1);
		    dsyrk_("L", "T", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], 
			     lda, beta, &c__[2], &n1);
		    dgemm_("T", "N", &n1, &n2, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[(n1 + 1) * a_dim1 + 1], lda, beta, &c__[n1 * 
			    n1 + 1], &n1);

		}

	    } else {

/*              N is odd, TRANSR = 'T', and UPLO = 'U' */

		if (notrans) {

/*                 N is odd, TRANSR = 'T', UPLO = 'U', and TRANS = 'N' */

		    dsyrk_("U", "N", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[n2 * n2 + 1], &n2);
		    dsyrk_("L", "N", &n2, k, alpha, &a[n1 + 1 + a_dim1], lda, 
			    beta, &c__[n1 * n2 + 1], &n2);
		    dgemm_("N", "T", &n2, &n1, k, alpha, &a[n1 + 1 + a_dim1], 
			    lda, &a[a_dim1 + 1], lda, beta, &c__[1], &n2);

		} else {

/*                 N is odd, TRANSR = 'T', UPLO = 'U', and TRANS = 'T' */

		    dsyrk_("U", "T", &n1, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[n2 * n2 + 1], &n2);
		    dsyrk_("L", "T", &n2, k, alpha, &a[(n1 + 1) * a_dim1 + 1], 
			     lda, beta, &c__[n1 * n2 + 1], &n2);
		    dgemm_("T", "N", &n2, &n1, k, alpha, &a[(n1 + 1) * a_dim1 
			    + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[1], &
			    n2);

		}

	    }

	}

    } else {

/*        N is even */

	if (normaltransr) {

/*           N is even and TRANSR = 'N' */

	    if (lower) {

/*              N is even, TRANSR = 'N', and UPLO = 'L' */

		if (notrans) {

/*                 N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'N' */

		    i__1 = *n + 1;
		    dsyrk_("L", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[2], &i__1);
		    i__1 = *n + 1;
		    dsyrk_("U", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, 
			    beta, &c__[1], &i__1);
		    i__1 = *n + 1;
		    dgemm_("N", "T", &nk, &nk, k, alpha, &a[nk + 1 + a_dim1], 
			    lda, &a[a_dim1 + 1], lda, beta, &c__[nk + 2], &
			    i__1);

		} else {

/*                 N is even, TRANSR = 'N', UPLO = 'L', and TRANS = 'T' */

		    i__1 = *n + 1;
		    dsyrk_("L", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[2], &i__1);
		    i__1 = *n + 1;
		    dsyrk_("U", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], 
			     lda, beta, &c__[1], &i__1);
		    i__1 = *n + 1;
		    dgemm_("T", "N", &nk, &nk, k, alpha, &a[(nk + 1) * a_dim1 
			    + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[nk + 2]
, &i__1);

		}

	    } else {

/*              N is even, TRANSR = 'N', and UPLO = 'U' */

		if (notrans) {

/*                 N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'N' */

		    i__1 = *n + 1;
		    dsyrk_("L", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk + 2], &i__1);
		    i__1 = *n + 1;
		    dsyrk_("U", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, 
			    beta, &c__[nk + 1], &i__1);
		    i__1 = *n + 1;
		    dgemm_("N", "T", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[nk + 1 + a_dim1], lda, beta, &c__[1], &i__1);

		} else {

/*                 N is even, TRANSR = 'N', UPLO = 'U', and TRANS = 'T' */

		    i__1 = *n + 1;
		    dsyrk_("L", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk + 2], &i__1);
		    i__1 = *n + 1;
		    dsyrk_("U", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], 
			     lda, beta, &c__[nk + 1], &i__1);
		    i__1 = *n + 1;
		    dgemm_("T", "N", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[(nk + 1) * a_dim1 + 1], lda, beta, &c__[1], &
			    i__1);

		}

	    }

	} else {

/*           N is even, and TRANSR = 'T' */

	    if (lower) {

/*              N is even, TRANSR = 'T', and UPLO = 'L' */

		if (notrans) {

/*                 N is even, TRANSR = 'T', UPLO = 'L', and TRANS = 'N' */

		    dsyrk_("U", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk + 1], &nk);
		    dsyrk_("L", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, 
			    beta, &c__[1], &nk);
		    dgemm_("N", "T", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[nk + 1 + a_dim1], lda, beta, &c__[(nk + 1) * 
			    nk + 1], &nk);

		} else {

/*                 N is even, TRANSR = 'T', UPLO = 'L', and TRANS = 'T' */

		    dsyrk_("U", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk + 1], &nk);
		    dsyrk_("L", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], 
			     lda, beta, &c__[1], &nk);
		    dgemm_("T", "N", &nk, &nk, k, alpha, &a[a_dim1 + 1], lda, 
			    &a[(nk + 1) * a_dim1 + 1], lda, beta, &c__[(nk + 
			    1) * nk + 1], &nk);

		}

	    } else {

/*              N is even, TRANSR = 'T', and UPLO = 'U' */

		if (notrans) {

/*                 N is even, TRANSR = 'T', UPLO = 'U', and TRANS = 'N' */

		    dsyrk_("U", "N", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk * (nk + 1) + 1], &nk);
		    dsyrk_("L", "N", &nk, k, alpha, &a[nk + 1 + a_dim1], lda, 
			    beta, &c__[nk * nk + 1], &nk);
		    dgemm_("N", "T", &nk, &nk, k, alpha, &a[nk + 1 + a_dim1], 
			    lda, &a[a_dim1 + 1], lda, beta, &c__[1], &nk);

		} else {

/*                 N is even, TRANSR = 'T', UPLO = 'U', and TRANS = 'T' */

		    dsyrk_("U", "T", &nk, k, alpha, &a[a_dim1 + 1], lda, beta, 
			     &c__[nk * (nk + 1) + 1], &nk);
		    dsyrk_("L", "T", &nk, k, alpha, &a[(nk + 1) * a_dim1 + 1], 
			     lda, beta, &c__[nk * nk + 1], &nk);
		    dgemm_("T", "N", &nk, &nk, k, alpha, &a[(nk + 1) * a_dim1 
			    + 1], lda, &a[a_dim1 + 1], lda, beta, &c__[1], &
			    nk);

		}

	    }

	}

    }

    return 0;

/*     End of DSFRK */

} /* dsfrk_ */
Ejemplo n.º 29
0
/* Subroutine */ int dqlt01_(integer *m, integer *n, doublereal *a, 
	doublereal *af, doublereal *q, doublereal *l, integer *lda, 
	doublereal *tau, doublereal *work, integer *lwork, doublereal *rwork, 
	doublereal *result)
{
    /* System generated locals */
    integer a_dim1, a_offset, af_dim1, af_offset, l_dim1, l_offset, q_dim1, 
	    q_offset, i__1, i__2;

    /* Local variables */
    doublereal eps;
    integer info;
    doublereal resid, anorm;
    integer minmn;


/*  -- LAPACK test routine (version 3.1) -- */
/*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
/*     November 2006 */

/*     .. Scalar Arguments .. */
/*     .. */
/*     .. Array Arguments .. */
/*     .. */

/*  Purpose */
/*  ======= */

/*  DQLT01 tests DGEQLF, which computes the QL factorization of an m-by-n */
/*  matrix A, and partially tests DORGQL which forms the m-by-m */
/*  orthogonal matrix Q. */

/*  DQLT01 compares L with Q'*A, and checks that Q is orthogonal. */

/*  Arguments */
/*  ========= */

/*  M       (input) INTEGER */
/*          The number of rows of the matrix A.  M >= 0. */

/*  N       (input) INTEGER */
/*          The number of columns of the matrix A.  N >= 0. */

/*  A       (input) DOUBLE PRECISION array, dimension (LDA,N) */
/*          The m-by-n matrix A. */

/*  AF      (output) DOUBLE PRECISION array, dimension (LDA,N) */
/*          Details of the QL factorization of A, as returned by DGEQLF. */
/*          See DGEQLF for further details. */

/*  Q       (output) DOUBLE PRECISION array, dimension (LDA,M) */
/*          The m-by-m orthogonal matrix Q. */

/*  L       (workspace) DOUBLE PRECISION array, dimension (LDA,max(M,N)) */

/*  LDA     (input) INTEGER */
/*          The leading dimension of the arrays A, AF, Q and R. */
/*          LDA >= max(M,N). */

/*  TAU     (output) DOUBLE PRECISION array, dimension (min(M,N)) */
/*          The scalar factors of the elementary reflectors, as returned */
/*          by DGEQLF. */

/*  WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK) */

/*  LWORK   (input) INTEGER */
/*          The dimension of the array WORK. */

/*  RWORK   (workspace) DOUBLE PRECISION array, dimension (M) */

/*  RESULT  (output) DOUBLE PRECISION array, dimension (2) */
/*          The test ratios: */
/*          RESULT(1) = norm( L - Q'*A ) / ( M * norm(A) * EPS ) */
/*          RESULT(2) = norm( I - Q'*Q ) / ( M * EPS ) */

/*  ===================================================================== */

/*     .. Parameters .. */
/*     .. */
/*     .. Local Scalars .. */
/*     .. */
/*     .. External Functions .. */
/*     .. */
/*     .. External Subroutines .. */
/*     .. */
/*     .. Intrinsic Functions .. */
/*     .. */
/*     .. Scalars in Common .. */
/*     .. */
/*     .. Common blocks .. */
/*     .. */
/*     .. Executable Statements .. */

    /* Parameter adjustments */
    l_dim1 = *lda;
    l_offset = 1 + l_dim1;
    l -= l_offset;
    q_dim1 = *lda;
    q_offset = 1 + q_dim1;
    q -= q_offset;
    af_dim1 = *lda;
    af_offset = 1 + af_dim1;
    af -= af_offset;
    a_dim1 = *lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    --tau;
    --work;
    --rwork;
    --result;

    /* Function Body */
    minmn = min(*m,*n);
    eps = dlamch_("Epsilon");

/*     Copy the matrix A to the array AF. */

    dlacpy_("Full", m, n, &a[a_offset], lda, &af[af_offset], lda);

/*     Factorize the matrix A in the array AF. */

    s_copy(srnamc_1.srnamt, "DGEQLF", (ftnlen)32, (ftnlen)6);
    dgeqlf_(m, n, &af[af_offset], lda, &tau[1], &work[1], lwork, &info);

/*     Copy details of Q */

    dlaset_("Full", m, m, &c_b6, &c_b6, &q[q_offset], lda);
    if (*m >= *n) {
	if (*n < *m && *n > 0) {
	    i__1 = *m - *n;
	    dlacpy_("Full", &i__1, n, &af[af_offset], lda, &q[(*m - *n + 1) * 
		    q_dim1 + 1], lda);
	}
	if (*n > 1) {
	    i__1 = *n - 1;
	    i__2 = *n - 1;
	    dlacpy_("Upper", &i__1, &i__2, &af[*m - *n + 1 + (af_dim1 << 1)], 
		    lda, &q[*m - *n + 1 + (*m - *n + 2) * q_dim1], lda);
	}
    } else {
	if (*m > 1) {
	    i__1 = *m - 1;
	    i__2 = *m - 1;
	    dlacpy_("Upper", &i__1, &i__2, &af[(*n - *m + 2) * af_dim1 + 1], 
		    lda, &q[(q_dim1 << 1) + 1], lda);
	}
    }

/*     Generate the m-by-m matrix Q */

    s_copy(srnamc_1.srnamt, "DORGQL", (ftnlen)32, (ftnlen)6);
    dorgql_(m, m, &minmn, &q[q_offset], lda, &tau[1], &work[1], lwork, &info);

/*     Copy L */

    dlaset_("Full", m, n, &c_b13, &c_b13, &l[l_offset], lda);
    if (*m >= *n) {
	if (*n > 0) {
	    dlacpy_("Lower", n, n, &af[*m - *n + 1 + af_dim1], lda, &l[*m - *
		    n + 1 + l_dim1], lda);
	}
    } else {
	if (*n > *m && *m > 0) {
	    i__1 = *n - *m;
	    dlacpy_("Full", m, &i__1, &af[af_offset], lda, &l[l_offset], lda);
	}
	if (*m > 0) {
	    dlacpy_("Lower", m, m, &af[(*n - *m + 1) * af_dim1 + 1], lda, &l[(
		    *n - *m + 1) * l_dim1 + 1], lda);
	}
    }

/*     Compute L - Q'*A */

    dgemm_("Transpose", "No transpose", m, n, m, &c_b20, &q[q_offset], lda, &
	    a[a_offset], lda, &c_b21, &l[l_offset], lda);

/*     Compute norm( L - Q'*A ) / ( M * norm(A) * EPS ) . */

    anorm = dlange_("1", m, n, &a[a_offset], lda, &rwork[1]);
    resid = dlange_("1", m, n, &l[l_offset], lda, &rwork[1]);
    if (anorm > 0.) {
	result[1] = resid / (doublereal) max(1,*m) / anorm / eps;
    } else {
	result[1] = 0.;
    }

/*     Compute I - Q'*Q */

    dlaset_("Full", m, m, &c_b13, &c_b21, &l[l_offset], lda);
    dsyrk_("Upper", "Transpose", m, m, &c_b20, &q[q_offset], lda, &c_b21, &l[
	    l_offset], lda);

/*     Compute norm( I - Q'*Q ) / ( M * EPS ) . */

    resid = dlansy_("1", "Upper", m, &l[l_offset], lda, &rwork[1]);

    result[2] = resid / (doublereal) max(1,*m) / eps;

    return 0;

/*     End of DQLT01 */

} /* dqlt01_ */
Ejemplo n.º 30
0
  /**
   * Refit the coefficients based on the new estimates. This is sequential
   * for now ... we can think about making this parallel later.
   */
  void refit () {

    /** Create some variables for BLAS */
    int DIM = X.size();
    int UNIQ_COLS = shadow_X.size();
    int int_M = static_cast<int>(M);
    int int_K = static_cast<int>(K);

    /* Create all the required buffers */
    RandomAccessContainer A_aug (M * DIM);
    RandomAccessContainer Y_aug (M * UNIQ_COLS);
    RandomAccessContainer A_aug_T_A_aug (DIM*DIM);
    RandomAccessContainer A_aug_T_Y (DIM*K);
    RandomAccessContainer W_effects (DIM*UNIQ_COLS);
    RandomAccessContainer index_vector (DIM);

    /* Create all the pointers */
    double* Y_ptr = const_cast<double*>(&(Y[0]));
    double* A_aug_ptr = &(A_aug[0]);
    double* Y_aug_ptr = &(Y_aug[0]);
    double* A_aug_T_A_aug_ptr = &(A_aug_T_A_aug[0]);
    double* A_aug_T_Y_ptr = &(A_aug_T_Y[0]);
    double* W_effects_ptr = &(W_effects[0]);

    if (ROOT==mpi_rank) {
      /* Create A_aug */
      ColMapIterType outer_iter = shadow_X.begin();
      int col_number = 0; int W_col_index = 0;

      /* Iterate over one column of X at at time */
      while (outer_iter != shadow_X.end()) {

        EdgeListIteratorType inner_iter=(*outer_iter).second.begin();
        const EdgeListIteratorType inner_end = (*outer_iter).second.end();

        /* Add one row at a time to A_aug */
        while (inner_iter != inner_end) {

          const int A_column = (*inner_iter).source;
          const int A_aug_col_index = M*col_number;
          materializer (snp_map(A_column), A_aug.begin()+A_aug_col_index);
          index_vector[col_number] = W_col_index;

          ++col_number;
          ++inner_iter;
        }

        /* Add the corresponding column of Y to Y_aug */
        const int Y_col = (*outer_iter).first;
        dcopy_ (&int_M, 
                Y_ptr+(Y_col*M), 
                &ONE_STEP, 
                Y_aug_ptr+(M*W_col_index), 
                &ONE_STEP);

        ++W_col_index;
        ++outer_iter;
      }
      
      /* Create A_aug'A_aug */
      dsyrk_ (&UPPER,
              &TRANS,
              &DIM,
              &int_M,
              &PLUS_ONE,
              A_aug_ptr,
              &int_M,
              &ZERO,
              A_aug_T_A_aug_ptr,
              &DIM);
      
      /* Create A_aug'Y */
      dgemm_ (&TRANS,
              &NO_TRANS,
              &DIM,
              &int_K,
              &int_M,
              &PLUS_ONE,
              A_aug_ptr,
              &int_M,
              Y_aug_ptr,
              &int_M,
              &ZERO,
              A_aug_T_Y_ptr,
              &DIM);

      /* Create the required columns of W */
      {
        ColMapIterType outer_iter = shadow_X.begin();
        int col=0;
        while (outer_iter != shadow_X.end()) {
          fill_in_W (W_effects.begin()+(DIM*col), (*outer_iter).first);
          ++outer_iter;
          ++col;
        }
      } 
      
      /* Add in the effects of W to A_aug_T_A_aug and A_aug_T_Y */
      for (int i=0; i<DIM; ++i) 
        for (int j=0; j<DIM; ++j) 
          A_aug_T_A_aug [i*DIM+j] = W_effects[(index_vector[i]*DIM)+j];
      
      for (int i=0; i<UNIQ_COLS; ++i) 
        for (int j=0; j<DIM; ++j) 
          A_aug_T_Y [j] += (A_aug_T_Y [(i*DIM)+j] * W_effects[(i*DIM)+j]);
      
      /* Factorize A_aug_T_A_aug */
      int result;
      dpotrf_ (&UPPER, 
               &DIM,
               A_aug_T_A_aug_ptr, 
               &DIM,
               &result);
      
      /* x = forwardsolve (C,x) */
      dtrsm_(&LEFT,
             &UPPER,
             &TRANS,
             &NO_DIAG,
             &DIM,
             &ONE_STEP,
             &PLUS_ONE,
             A_aug_T_A_aug_ptr,
             &DIM,
             A_aug_T_Y_ptr,
             &DIM);
      
      /* x = backsolve (C,x) */
      dtrsm_(&LEFT,
             &UPPER,
             &NO_TRANS,
             &NO_DIAG,
             &DIM,
             &ONE_STEP,
             &PLUS_ONE,
             A_aug_T_A_aug_ptr,
             &DIM,
             A_aug_T_Y_ptr,
             &DIM);
    }

    /* Broadcast coordinates to everyone */
    MPI_Bcast (A_aug_T_Y_ptr, DIM, MPI_DOUBLE, ROOT, MPI_COMM_WORLD);

    /* Re-populate these coordinates into the selected set */
    {
      ColMapIterType outer_iter = shadow_X.begin();
      int col_number = 0;
      while (outer_iter != shadow_X.end()) {
        EdgeListIteratorType inner_iter=(*outer_iter).second.begin();
        EdgeListIteratorType inner_end  = (*outer_iter).second.end();
        while (inner_iter != inner_end) {
          edge_type current_X = (*inner_iter);
          X.erase (current_X);
          current_X.weight = A_aug_T_Y [col_number];
          X.insert (current_X);
          ++col_number;
          ++inner_iter;
        }
        ++outer_iter;
      }
    }
  }