void Flow::CorrectFlow(FP T, FP p, FP ref_val, FixedValue fv) { FP res_p = 1.0, res_t = 1.0; int iter=0; if(fv == FV_MACH) { do { MACH(ref_val); t0 = T/TAU(); p0 = p/PF(); res_p = fabs((p0-p/PF())/p0); res_t = fabs((t0-T/TAU())/t0); Wg(ref_val*Asound()); iter++; } while ((res_p > 0.0001 || res_t > 0.0001) && iter < 100); /* MACH(ref_val); t0 = T/TAU(); p0 = p/PF(); */ } else if(fv == FV_VELOCITY) { do { MACH(ref_val/Asound()); t0 = T/TAU(); p0 = p/PF(); res_p = fabs((p0-p/PF())/p0); res_t = fabs((t0-T/TAU())/t0); Wg(ref_val); iter++; } while ((res_p > 0.0001 || res_t > 0.0001) && iter < 100); } }
extern "C" void magma_ctrdtype1cbHLsym_withQ( magma_int_t N, magma_int_t NB, magmaFloatComplex *A, magma_int_t LDA, magmaFloatComplex *V, magmaFloatComplex *TAU, magma_int_t st, magma_int_t ed, magma_int_t sweep, magma_int_t Vblksiz) { //magma_int_t J1, J2, J3, i, j; magma_int_t len, LDX; magma_int_t IONE=1; magma_int_t blkid, vpos, taupos, tpos; //magmaFloatComplex conjtmp; magmaFloatComplex Z_ONE = MAGMA_C_ONE; magmaFloatComplex *WORK; magma_cmalloc_cpu( &WORK, N ); findVTpos(N,NB,Vblksiz,sweep-1,st-1, &vpos, &taupos, &tpos, &blkid); //printf("voici vpos %d taupos %d tpos %d blkid %d \n", vpos, taupos, tpos, blkid); LDX = LDA-1; len = ed-st+1; *V(vpos) = Z_ONE; memcpy(V(vpos+1), A(st+1, st-1), (len-1)*sizeof(magmaFloatComplex)); memset(A(st+1, st-1), 0, (len-1)*sizeof(magmaFloatComplex)); /* Eliminate the col at st-1 */ lapackf77_clarfg( &len, A(st, st-1), V(vpos+1), &IONE, TAU(taupos) ); /* apply left and right on A(st:ed,st:ed)*/ magma_clarfxsym(len,A(st,st),LDX,V(vpos),TAU(taupos)); //conjtmp = MAGMA_C_CONJ(*TAU(taupos)); //lapackf77_clarfy("L", &len, V(vpos), &IONE, &conjtmp, A(st,st), &LDX, WORK); //&(MAGMA_C_CONJ(*TAU(taupos))) magma_free_cpu(WORK); }
extern "C" void magma_strdtype1cbHLsym_withQ_v2(magma_int_t n, magma_int_t nb, float *A, magma_int_t lda, float *V, magma_int_t ldv, float *TAU, magma_int_t st, magma_int_t ed, magma_int_t sweep, magma_int_t Vblksiz, float *work) { /* WORK (workspace) float real array, dimension N */ magma_int_t ione = 1; magma_int_t vpos, taupos, len, len2; float c_one = MAGMA_S_ONE; magma_bulge_findVTAUpos(n, nb, Vblksiz, sweep-1, st-1, ldv, &vpos, &taupos); //printf("voici vpos %d taupos %d tpos %d blkid %d \n", vpos, taupos, tpos, blkid); len = ed-st+1; *V(vpos) = c_one; len2 = len-1; blasf77_scopy( &len2, A(st+1, st-1), &ione, V(vpos+1), &ione ); //memcpy(V(vpos+1), A(st+1, st-1), (len-1)*sizeof(float)); memset(A(st+1, st-1), 0, (len-1)*sizeof(float)); /* Eliminate the col at st-1 */ lapackf77_slarfg( &len, A(st, st-1), V(vpos+1), &ione, TAU(taupos) ); /* apply left and right on A(st:ed,st:ed)*/ magma_slarfxsym_v2(len, A(st,st), lda-1, V(vpos), TAU(taupos), work); }
extern "C" void magma_ctrdtype2cbHLsym_withQ_v2( magma_int_t n, magma_int_t nb, magmaFloatComplex *A, magma_int_t lda, magmaFloatComplex *V, magma_int_t ldv, magmaFloatComplex *TAU, magma_int_t st, magma_int_t ed, magma_int_t sweep, magma_int_t Vblksiz, magmaFloatComplex *work) { /* WORK (workspace) float complex array, dimension NB */ magma_int_t ione = 1; magma_int_t vpos, taupos; magmaFloatComplex conjtmp; magmaFloatComplex c_one = MAGMA_C_ONE; magma_int_t ldx = lda-1; magma_int_t len = ed - st + 1; magma_int_t lem = min(ed+nb, n) - ed; magma_int_t lem2; if (lem > 0) { magma_bulge_findVTAUpos(n, nb, Vblksiz, sweep-1, st-1, ldv, &vpos, &taupos); /* apply remaining right coming from the top block */ lapackf77_clarfx("R", &lem, &len, V(vpos), TAU(taupos), A(ed+1, st), &ldx, work); } if (lem > 1) { magma_bulge_findVTAUpos(n, nb, Vblksiz, sweep-1, ed, ldv, &vpos, &taupos); /* remove the first column of the created bulge */ *V(vpos) = c_one; //memcpy(V(vpos+1), A(ed+2, st), (lem-1)*sizeof(magmaFloatComplex)); lem2 = lem-1; blasf77_ccopy( &lem2, A(ed+2, st), &ione, V(vpos+1), &ione ); memset(A(ed+2, st),0,(lem-1)*sizeof(magmaFloatComplex)); /* Eliminate the col at st */ lapackf77_clarfg( &lem, A(ed+1, st), V(vpos+1), &ione, TAU(taupos) ); /* apply left on A(J1:J2,st+1:ed) */ len = len-1; /* because we start at col st+1 instead of st. col st is the col that has been removed; */ conjtmp = MAGMA_C_CNJG(*TAU(taupos)); lapackf77_clarfx("L", &lem, &len, V(vpos), &conjtmp, A(ed+1, st+1), &ldx, work); } }
extern "C" void magma_ctrdtype3cbHLsym_withQ( magma_int_t N, magma_int_t NB, magmaFloatComplex *A, magma_int_t LDA, magmaFloatComplex *V, magmaFloatComplex *TAU, magma_int_t st, magma_int_t ed, magma_int_t sweep, magma_int_t Vblksiz) { //magma_int_t J1, J2, J3, i, j; magma_int_t len, LDX; //magma_int_t IONE=1; magma_int_t blkid, vpos, taupos, tpos; //magmaFloatComplex conjtmp; magmaFloatComplex *WORK; magma_cmalloc_cpu( &WORK, N ); findVTpos(N,NB,Vblksiz,sweep-1,st-1, &vpos, &taupos, &tpos, &blkid); LDX = LDA-1; len = ed-st+1; /* apply left and right on A(st:ed,st:ed)*/ magma_clarfxsym(len,A(st,st),LDX,V(vpos),TAU(taupos)); //conjtmp = MAGMA_C_CONJ(*TAU(taupos)); //lapackf77_clarfy("L", &len, V(vpos), &IONE, &(MAGMA_C_CONJ(*TAU(taupos))), A(st,st), &LDX, WORK); magma_free_cpu(WORK); }
/***************************************************************************//** * TYPE 3-BAND Lower-columnwise-Householder ***************************************************************************/ extern "C" void magma_dsbtype3cb(magma_int_t n, magma_int_t nb, double *A, magma_int_t lda, double *V, magma_int_t ldv, double *TAU, magma_int_t st, magma_int_t ed, magma_int_t sweep, magma_int_t Vblksiz, magma_int_t wantz, double *work) { magma_int_t len; magma_int_t vpos, taupos; //magma_int_t blkid, tpos; if ( wantz == 0 ) { vpos = (sweep%2)*n + st; taupos = (sweep%2)*n + st; } else { magma_bulge_findVTAUpos(n, nb, Vblksiz, sweep, st, ldv, &vpos, &taupos); //findVTpos(n, nb, Vblksiz, sweep, st, &vpos, &taupos, &tpos, &blkid); } len = ed-st+1; /* Apply left and right on A(st:ed,st:ed)*/ magma_dlarfy(len, A(st,st), lda-1, V(vpos), TAU(taupos), work); return; }
static void magma_stile_bulge_computeT_parallel(magma_int_t my_core_id, magma_int_t cores_num, float *V, magma_int_t ldv, float *TAU, float *T, magma_int_t ldt, magma_int_t n, magma_int_t nb, magma_int_t Vblksiz) { //%=========================== //% local variables //%=========================== magma_int_t firstcolj; magma_int_t rownbm; magma_int_t st,ed,fst,vlen,vnb,colj; magma_int_t blkid,vpos,taupos,tpos; magma_int_t blkpercore, myid; if(n<=0) return ; magma_int_t blkcnt = magma_bulge_get_blkcnt(n, nb, Vblksiz); blkpercore = blkcnt/cores_num; magma_int_t nbGblk = magma_ceildiv(n-1, Vblksiz); if(my_core_id==0) printf(" COMPUTE T parallel threads %d with N %d NB %d Vblksiz %d \n",cores_num,n,nb,Vblksiz); for (magma_int_t bg = nbGblk; bg>0; bg--) { firstcolj = (bg-1)*Vblksiz + 1; rownbm = magma_ceildiv(n-(firstcolj+1), nb); if(bg==nbGblk) rownbm = magma_ceildiv(n-firstcolj ,nb); // last blk has size=1 used for real to handle A(N,N-1) for (magma_int_t m = rownbm; m>0; m--) { vlen = 0; vnb = 0; colj = (bg-1)*Vblksiz; // for k=0;I compute the fst and then can remove it from the loop fst = (rownbm -m)*nb+colj +1; for (magma_int_t k=0; k<Vblksiz; k++) { colj = (bg-1)*Vblksiz + k; st = (rownbm -m)*nb+colj +1; ed = min(st+nb-1,n-1); if(st>ed) break; if((st==ed)&&(colj!=n-2)) break; vlen=ed-fst+1; vnb=k+1; } colj = (bg-1)*Vblksiz; magma_bulge_findVTAUTpos(n, nb, Vblksiz, colj, fst, ldv, ldt, &vpos, &taupos, &tpos, &blkid); myid = blkid/blkpercore; if(my_core_id==(myid%cores_num)){ if((vlen>0)&&(vnb>0)) lapackf77_slarft( "F", "C", &vlen, &vnb, V(vpos), &ldv, TAU(taupos), T(tpos), &ldt); } } } }
extern "C" void magma_ctrdtype2cbHLsym_withQ( magma_int_t N, magma_int_t NB, magmaFloatComplex *A, magma_int_t LDA, magmaFloatComplex *V, magmaFloatComplex *TAU, magma_int_t st, magma_int_t ed, magma_int_t sweep, magma_int_t Vblksiz) { magma_int_t J1, J2, len, lem, LDX; //magma_int_t i, j; magma_int_t IONE=1; magma_int_t blkid, vpos, taupos, tpos; magmaFloatComplex conjtmp; magmaFloatComplex Z_ONE = MAGMA_C_ONE; //magmaFloatComplex WORK[NB]; magmaFloatComplex *WORK; magma_cmalloc_cpu( &WORK, NB ); findVTpos(N,NB,Vblksiz,sweep-1,st-1, &vpos, &taupos, &tpos, &blkid); LDX = LDA-1; J1 = ed+1; J2 = min(ed+NB,N); len = ed-st+1; lem = J2-J1+1; if (lem > 0) { /* apply remaining right commming from the top block */ lapackf77_clarfx("R", &lem, &len, V(vpos), TAU(taupos), A(J1, st), &LDX, WORK); } if (lem > 1) { findVTpos(N,NB,Vblksiz,sweep-1,J1-1, &vpos, &taupos, &tpos, &blkid); /* remove the first column of the created bulge */ *V(vpos) = Z_ONE; memcpy(V(vpos+1), A(J1+1, st), (lem-1)*sizeof(magmaFloatComplex)); memset(A(J1+1, st),0,(lem-1)*sizeof(magmaFloatComplex)); /* Eliminate the col at st */ lapackf77_clarfg( &lem, A(J1, st), V(vpos+1), &IONE, TAU(taupos) ); /* apply left on A(J1:J2,st+1:ed) */ len = len-1; /* because we start at col st+1 instead of st. col st is the col that has been revomved; */ conjtmp = MAGMA_C_CONJ(*TAU(taupos)); lapackf77_clarfx("L", &lem, &len, V(vpos), &conjtmp, A(J1, st+1), &LDX, WORK); } magma_free_cpu(WORK); }
extern "C" void magma_dsbtype1cb(magma_int_t n, magma_int_t nb, double *A, magma_int_t lda, double *V, magma_int_t ldv, double *TAU, magma_int_t st, magma_int_t ed, magma_int_t sweep, magma_int_t Vblksiz, magma_int_t wantz, double *work) { magma_int_t len; magma_int_t vpos, taupos; //magma_int_t blkid, tpos; magma_int_t ione = 1; double c_one = MAGMA_D_ONE; /* find the pointer to the Vs and Ts as stored by the bulgechasing * note that in case no eigenvector required V and T are stored * on a vector of size n * */ if ( wantz == 0 ) { vpos = (sweep%2)*n + st; taupos = (sweep%2)*n + st; } else { magma_bulge_findVTAUpos(n, nb, Vblksiz, sweep, st, ldv, &vpos, &taupos); //findVTpos(n, nb, Vblksiz, sweep, st, &vpos, &taupos, &tpos, &blkid); } len = ed-st+1; *(V(vpos)) = c_one; //magma_int_t len2 = len-1; //blasf77_dcopy( &len2, A(st+1, st-1), &ione, V(vpos+1), &ione ); memcpy( V(vpos+1), A(st+1, st-1), (len-1)*sizeof(double) ); memset( A(st+1, st-1), 0, (len-1)*sizeof(double) ); /* Eliminate the col at st-1 */ lapackf77_dlarfg( &len, A(st, st-1), V(vpos+1), &ione, TAU(taupos) ); /* Apply left and right on A(st:ed,st:ed) */ magma_dlarfy(len, A(st,st), lda-1, V(vpos), TAU(taupos), work); }
extern "C" void magma_ztrdtype1cbHLsym_withQ_v2(magma_int_t n, magma_int_t nb, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *V, magma_int_t ldv, magmaDoubleComplex *TAU, magma_int_t st, magma_int_t ed, magma_int_t sweep, magma_int_t Vblksiz, magmaDoubleComplex *work) { /* WORK (workspace) double complex array, dimension N */ magma_int_t ione = 1; magma_int_t vpos, taupos, len; magmaDoubleComplex c_one = MAGMA_Z_ONE; magma_bulge_findVTAUpos(n, nb, Vblksiz, sweep-1, st-1, ldv, &vpos, &taupos); //printf("voici vpos %d taupos %d tpos %d blkid %d \n", vpos, taupos, tpos, blkid); len = ed-st+1; *V(vpos) = c_one; cblas_zcopy(len-1, A(st+1, st-1), ione, V(vpos+1), ione); //memcpy(V(vpos+1), A(st+1, st-1), (len-1)*sizeof(magmaDoubleComplex)); memset(A(st+1, st-1), 0, (len-1)*sizeof(magmaDoubleComplex)); /* Eliminate the col at st-1 */ lapackf77_zlarfg( &len, A(st, st-1), V(vpos+1), &ione, TAU(taupos) ); /* apply left and right on A(st:ed,st:ed)*/ magma_zlarfxsym_v2(len, A(st,st), lda-1, V(vpos), TAU(taupos), work); }
int SpIO_H5ReadTau(hid_t h5f_id, Zone *zone) /* Write data of all children to an HDF5 table */ { SpPhys *pp = zone->data; /* Just in case the programmer did something stupid */ Deb_ASSERT(pp->mol != NULL); Deb_ASSERT(pp->tau != NULL); int status = 0; size_t i, j, nrad = pp->mol->nrad, record_size = sizeof(double) * nrad, field_sizes[nrad], field_offsets[nrad]; double *tau = Mem_CALLOC(zone->nchildren * nrad, tau); herr_t hstatus; /* Init fields */ for(i = 0; i < nrad; i++) { field_offsets[i] = i * sizeof(double); field_sizes[i] = sizeof(double); } /* Read tau table */ hstatus = H5TBread_table(h5f_id, "TAU", record_size, field_offsets, field_sizes, tau); if(hstatus < 0) status = Err_SETSTRING("Error reading HDF5 `%s' table", "TAU"); #define TAU(i, j)\ tau[(j) + nrad * (i)] for(i = 0; i < zone->nchildren; i++) { pp = zone->children[i]->data; for(j = 0; j < nrad; j++) { pp->tau[j] = TAU(i, j); } } #undef TAU free(tau); return status; }
extern "C" void magma_strdtype3cbHLsym_withQ_v2(magma_int_t n, magma_int_t nb, float *A, magma_int_t lda, float *V, magma_int_t ldv, float *TAU, magma_int_t st, magma_int_t ed, magma_int_t sweep, magma_int_t Vblksiz, float *work) { /* WORK (workspace) float real array, dimension N */ magma_int_t vpos, taupos; magma_bulge_findVTAUpos(n, nb, Vblksiz, sweep-1, st-1, ldv, &vpos, &taupos); magma_int_t len = ed-st+1; /* apply left and right on A(st:ed,st:ed)*/ magma_slarfxsym_v2(len, A(st,st), lda-1, V(vpos), TAU(taupos), work); }
/* Subroutine */ int dormbr_(char *vect, char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *c, integer *ldc, doublereal *work, integer *lwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C with SIDE = 'L' SIDE = 'R' TRANS = 'N': Q * C C * Q TRANS = 'T': Q**T * C C * Q**T If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C with SIDE = 'L' SIDE = 'R' TRANS = 'N': P * C C * P TRANS = 'T': P**T * C C * P**T Here Q and P**T are the orthogonal matrices determined by DGEBRD when reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and P**T are defined as products of elementary reflectors H(i) and G(i) respectively. Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the order of the orthogonal matrix Q or P**T that is applied. If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: if nq >= k, Q = H(1) H(2) . . . H(k); if nq < k, Q = H(1) H(2) . . . H(nq-1). If VECT = 'P', A is assumed to have been a K-by-NQ matrix: if k < nq, P = G(1) G(2) . . . G(k); if k >= nq, P = G(1) G(2) . . . G(nq-1). Arguments ========= VECT (input) CHARACTER*1 = 'Q': apply Q or Q**T; = 'P': apply P or P**T. SIDE (input) CHARACTER*1 = 'L': apply Q, Q**T, P or P**T from the Left; = 'R': apply Q, Q**T, P or P**T from the Right. TRANS (input) CHARACTER*1 = 'N': No transpose, apply Q or P; = 'T': Transpose, apply Q**T or P**T. M (input) INTEGER The number of rows of the matrix C. M >= 0. N (input) INTEGER The number of columns of the matrix C. N >= 0. K (input) INTEGER If VECT = 'Q', the number of columns in the original matrix reduced by DGEBRD. If VECT = 'P', the number of rows in the original matrix reduced by DGEBRD. K >= 0. A (input) DOUBLE PRECISION array, dimension (LDA,min(nq,K)) if VECT = 'Q' (LDA,nq) if VECT = 'P' The vectors which define the elementary reflectors H(i) and G(i), whose products determine the matrices Q and P, as returned by DGEBRD. LDA (input) INTEGER The leading dimension of the array A. If VECT = 'Q', LDA >= max(1,nq); if VECT = 'P', LDA >= max(1,min(nq,K)). TAU (input) DOUBLE PRECISION array, dimension (min(nq,K)) TAU(i) must contain the scalar factor of the elementary reflector H(i) or G(i) which determines Q or P, as returned by DGEBRD in the array argument TAUQ or TAUP. C (input/output) DOUBLE PRECISION array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q or P*C or P**T*C or C*P or C*P**T. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,M). WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. If SIDE = 'L', LWORK >= max(1,N); if SIDE = 'R', LWORK >= max(1,M). For optimum performance LWORK >= N*NB if SIDE = 'L', and LWORK >= M*NB if SIDE = 'R', where NB is the optimal blocksize. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; /* Local variables */ static logical left; extern logical lsame_(char *, char *); static integer iinfo, i1, i2, mi, ni, nq, nw; extern /* Subroutine */ int xerbla_(char *, integer *), dormlq_( char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); static logical notran; extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); static logical applyq; static char transt[1]; #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define C(I,J) c[(I)-1 + ((J)-1)* ( *ldc)] *info = 0; applyq = lsame_(vect, "Q"); left = lsame_(side, "L"); notran = lsame_(trans, "N"); /* NQ is the order of Q or P and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = *n; } else { nq = *n; nw = *m; } if (! applyq && ! lsame_(vect, "P")) { *info = -1; } else if (! left && ! lsame_(side, "R")) { *info = -2; } else if (! notran && ! lsame_(trans, "T")) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*k < 0) { *info = -6; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = min(nq,*k); if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { *info = -8; } else if (*ldc < max(1,*m)) { *info = -11; } else if (*lwork < max(1,nw)) { *info = -13; } } if (*info != 0) { i__1 = -(*info); xerbla_("DORMBR", &i__1); return 0; } /* Quick return if possible */ WORK(1) = 1.; if (*m == 0 || *n == 0) { return 0; } if (applyq) { /* Apply Q */ if (nq >= *k) { /* Q was determined by a call to DGEBRD with nq >= k */ dormqr_(side, trans, m, n, k, &A(1,1), lda, &TAU(1), &C(1,1), ldc, &WORK(1), lwork, &iinfo); } else if (nq > 1) { /* Q was determined by a call to DGEBRD with nq < k */ if (left) { mi = *m - 1; ni = *n; i1 = 2; i2 = 1; } else { mi = *m; ni = *n - 1; i1 = 1; i2 = 2; } i__1 = nq - 1; dormqr_(side, trans, &mi, &ni, &i__1, &A(2,1), lda, &TAU(1) , &C(i1,i2), ldc, &WORK(1), lwork, &iinfo); } } else { /* Apply P */ if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } if (nq > *k) { /* P was determined by a call to DGEBRD with nq > k */ dormlq_(side, transt, m, n, k, &A(1,1), lda, &TAU(1), &C(1,1), ldc, &WORK(1), lwork, &iinfo); } else if (nq > 1) { /* P was determined by a call to DGEBRD with nq <= k */ if (left) { mi = *m - 1; ni = *n; i1 = 2; i2 = 1; } else { mi = *m; ni = *n - 1; i1 = 1; i2 = 2; } i__1 = nq - 1; dormlq_(side, transt, &mi, &ni, &i__1, &A(1,2), lda, &TAU(1), &C(i1,i2), ldc, &WORK(1), lwork, & iinfo); } } return 0; /* End of DORMBR */ } /* dormbr_ */
/* Subroutine */ int ssytd2_(char *uplo, integer *n, real *a, integer *lda, real *d, real *e, real *tau, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal form T by an orthogonal similarity transformation: Q' * A * Q = T. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the symmetric matrix A is stored: = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading n-by-n upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading n-by-n lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if UPLO = 'U', the diagonal and first superdiagonal of A are overwritten by the corresponding elements of the tridiagonal matrix T, and the elements above the first superdiagonal, with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors; if UPLO = 'L', the diagonal and first subdiagonal of A are over- written by the corresponding elements of the tridiagonal matrix T, and the elements below the first subdiagonal, with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors. See Further Details. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). D (output) REAL array, dimension (N) The diagonal elements of the tridiagonal matrix T: D(i) = A(i,i). E (output) REAL array, dimension (N-1) The off-diagonal elements of the tridiagonal matrix T: E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. TAU (output) REAL array, dimension (N-1) The scalar factors of the elementary reflectors (see Further Details). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== If UPLO = 'U', the matrix Q is represented as a product of elementary reflectors Q = H(n-1) . . . H(2) H(1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in A(1:i-1,i+1), and tau in TAU(i). If UPLO = 'L', the matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(n-1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), and tau in TAU(i). The contents of A on exit are illustrated by the following examples with n = 5: if UPLO = 'U': if UPLO = 'L': ( d e v2 v3 v4 ) ( d ) ( d e v3 v4 ) ( e d ) ( d e v4 ) ( v1 e d ) ( d e ) ( v1 v2 e d ) ( d ) ( v1 v2 v3 e d ) where d and e denote diagonal and off-diagonal elements of T, and vi denotes an element of the vector defining H(i). ===================================================================== Test the input parameters Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static real c_b8 = 0.f; static real c_b14 = -1.f; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ static real taui; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static integer i; extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *, integer *, real *, integer *, real *, integer *); static real alpha; extern logical lsame_(char *, char *); static logical upper; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *), ssymv_(char *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), xerbla_(char *, integer *), slarfg_(integer *, real *, real *, integer *, real *); #define D(I) d[(I)-1] #define E(I) e[(I)-1] #define TAU(I) tau[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("SSYTD2", &i__1); return 0; } /* Quick return if possible */ if (*n <= 0) { return 0; } if (upper) { /* Reduce the upper triangle of A */ for (i = *n - 1; i >= 1; --i) { /* Generate elementary reflector H(i) = I - tau * v * v' to annihilate A(1:i-1,i+1) */ slarfg_(&i, &A(i,i+1), &A(1,i+1), & c__1, &taui); E(i) = A(i,i+1); if (taui != 0.f) { /* Apply H(i) from both sides to A(1:i,1:i) */ A(i,i+1) = 1.f; /* Compute x := tau * A * v storing x in TAU(1: i) */ ssymv_(uplo, &i, &taui, &A(1,1), lda, &A(1,i+1), &c__1, &c_b8, &TAU(1), &c__1); /* Compute w := x - 1/2 * tau * (x'*v) * v */ alpha = taui * -.5f * sdot_(&i, &TAU(1), &c__1, &A(1,i+1), &c__1); saxpy_(&i, &alpha, &A(1,i+1), &c__1, &TAU(1), & c__1); /* Apply the transformation as a rank-2 update: A := A - v * w' - w * v' */ ssyr2_(uplo, &i, &c_b14, &A(1,i+1), &c__1, & TAU(1), &c__1, &A(1,1), lda); A(i,i+1) = E(i); } D(i + 1) = A(i+1,i+1); TAU(i) = taui; /* L10: */ } D(1) = A(1,1); } else { /* Reduce the lower triangle of A */ i__1 = *n - 1; for (i = 1; i <= *n-1; ++i) { /* Generate elementary reflector H(i) = I - tau * v * v' to annihilate A(i+2:n,i) */ i__2 = *n - i; /* Computing MIN */ i__3 = i + 2; slarfg_(&i__2, &A(i+1,i), &A(min(i+2,*n),i), &c__1, &taui); E(i) = A(i+1,i); if (taui != 0.f) { /* Apply H(i) from both sides to A(i+1:n,i+1:n) */ A(i+1,i) = 1.f; /* Compute x := tau * A * v storing y in TAU(i: n-1) */ i__2 = *n - i; ssymv_(uplo, &i__2, &taui, &A(i+1,i+1), lda, &A(i+1,i), &c__1, &c_b8, &TAU(i), &c__1); /* Compute w := x - 1/2 * tau * (x'*v) * v */ i__2 = *n - i; alpha = taui * -.5f * sdot_(&i__2, &TAU(i), &c__1, &A(i+1,i), &c__1); i__2 = *n - i; saxpy_(&i__2, &alpha, &A(i+1,i), &c__1, &TAU(i), &c__1); /* Apply the transformation as a rank-2 update: A := A - v * w' - w * v' */ i__2 = *n - i; ssyr2_(uplo, &i__2, &c_b14, &A(i+1,i), &c__1, & TAU(i), &c__1, &A(i+1,i+1), lda); A(i+1,i) = E(i); } D(i) = A(i,i); TAU(i) = taui; /* L20: */ } D(*n) = A(*n,*n); } return 0; /* End of SSYTD2 */ } /* ssytd2_ */
int CORE_shbrce(int uplo, int N, PLASMA_desc *A, float *V, float *TAU, int st, int ed, int eltsize) { int NB, J1, J2, J3, KDM2, len, pt; int len1, len2, t1ed, t2st; int i; static float zzero = 0.0; PLASMA_desc vA=*A; /* Check input arguments */ if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (ed <= st) { coreblas_error(6, "Illegal value of st and ed (internal)"); return -6; } /* Quick return */ if (N == 0) return PLASMA_SUCCESS; NB = A->mb; KDM2 = A->mb-2; if( uplo == PlasmaLower ) { /* ======================== * LOWER CASE * ========================*/ for (i = ed; i >= st+1 ; i--){ /* apply Householder from the right. and create newnnz outside the band if J3 < N */ J1 = ed+1; J2 = min((i+1+KDM2), N); J3 = min((J2+1), N); len = J3-J1+1; if(J3>J2)*A(J3,(i-1))=zzero;/* could be removed because A is supposed to be band.*/ t1ed = (J3/NB)*NB; t2st = max(t1ed+1,J1); len1 = t1ed-J1+1; /* can be negative*/ len2 = J3-t2st+1; if(len1>0)CORE_slarfx2(PlasmaRight, len1, *V(i), *TAU(i), A(J1, i-1), ELTLDD(vA, J1), A(J1 , i), ELTLDD(vA, J1) ); if(len2>0)CORE_slarfx2(PlasmaRight, len2, *V(i), *TAU(i), A(t2st,i-1), ELTLDD(vA, t2st), A(t2st, i), ELTLDD(vA, t2st)); /* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it.*/ len = J3-J2; // soit 1 soit 0 if(len>0){ /* generate Householder to annihilate a(j+kd,j-1) within the band */ *V(J3) = *A(J3,i-1); *A(J3,i-1) = 0.0; LAPACKE_slarfg_work( 2, A(J2,i-1), V(J3), 1, TAU(J3)); } } /* APPLY LEFT ON THE REMAINING ELEMENT OF KERNEL 2 */ for (i = ed; i >= st+1 ; i--){ /* find if there was a nnz created. if yes apply left else nothing to be done.*/ J2 = min((i+1+KDM2), N); J3 = min((J2+1), N); len = J3-J2; if(len>0){ pt = J2; J1 = i; J2 = min(ed,N); t1ed = (J2/NB)*NB; t2st = max(t1ed+1,J1); len1 = t1ed-J1+1; /* can be negative*/ len2 = J2-t2st+1; if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , *V(J3), (*TAU(J3)), A(pt, i ), ELTLDD(vA, pt), A((pt+1), i ), ELTLDD(vA, pt+1) ); if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , *V(J3), (*TAU(J3)), A(pt, t2st), ELTLDD(vA, pt), A((pt+1), t2st), ELTLDD(vA, pt+1) ); } } } else { /* ======================== * UPPER CASE * ========================*/ for (i = ed; i >= st+1 ; i--){ /* apply Householder from the right. and create newnnz outside the band if J3 < N */ J1 = ed+1; J2 = min((i+1+KDM2), N); J3 = min((J2+1), N); len = J3-J1+1; if(J3>J2)*A((i-1), J3)=zzero;/* could be removed because A is supposed to be band.*/ t1ed = (J3/NB)*NB; t2st = max(t1ed+1,J1); len1 = t1ed-J1+1; /* can be negative*/ len2 = J3-t2st+1; if(len1>0)CORE_slarfx2(PlasmaLeft, len1 , (*V(i)), *TAU(i), A(i-1, J1 ), ELTLDD(vA, (i-1)), A(i, J1 ), ELTLDD(vA, i) ); if(len2>0)CORE_slarfx2(PlasmaLeft, len2 , (*V(i)), *TAU(i), A(i-1, t2st), ELTLDD(vA, (i-1)), A(i, t2st), ELTLDD(vA, i) ); /* if nonzero element a(j+kd,j-1) has been created outside the band (if index < N) then eliminate it.*/ len = J3-J2; /* either 1 soit 0*/ if(len>0){ /* generate Householder to annihilate a(j+kd,j-1) within the band*/ *V(J3) = *A((i-1), J3); *A((i-1), J3) = 0.0; LAPACKE_slarfg_work( 2, A((i-1), J2), V(J3), 1, TAU(J3)); } } /* APPLY RIGHT ON THE REMAINING ELEMENT OF KERNEL 2*/ for (i = ed; i >= st+1 ; i--){ /* find if there was a nnz created. if yes apply right else nothing to be done.*/ J2 = min((i+1+KDM2), N); J3 = min((J2+1), N); len = J3-J2; if(len>0){ pt = J2; J1 = i; J2 = min(ed,N); t1ed = (J2/NB)*NB; t2st = max(t1ed+1,J1); len1 = t1ed-J1+1; /* can be negative*/ len2 = J2-t2st+1; if(len1>0)CORE_slarfx2(PlasmaRight, len1 , (*V(J3)), (*TAU(J3)), A(i , pt), ELTLDD(vA, i), A(i, pt+1), ELTLDD(vA, i) ); if(len2>0)CORE_slarfx2(PlasmaRight, len2 , (*V(J3)), (*TAU(J3)), A(t2st, pt), ELTLDD(vA, t2st), A(t2st, pt+1), ELTLDD(vA, t2st) ); } } } /* end of else for the upper case */ return PLASMA_SUCCESS; }
/* Subroutine */ int zungbr_(char *vect, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZUNGBR generates one of the complex unitary matrices Q or P**H determined by ZGEBRD when reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q and P**H are defined as products of elementary reflectors H(i) or G(i) respectively. If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q is of order M: if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n columns of Q, where m >= n >= k; if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an M-by-M matrix. If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H is of order N: if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m rows of P**H, where n >= m >= k; if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as an N-by-N matrix. Arguments ========= VECT (input) CHARACTER*1 Specifies whether the matrix Q or the matrix P**H is required, as defined in the transformation applied by ZGEBRD: = 'Q': generate Q; = 'P': generate P**H. M (input) INTEGER The number of rows of the matrix Q or P**H to be returned. M >= 0. N (input) INTEGER The number of columns of the matrix Q or P**H to be returned. N >= 0. If VECT = 'Q', M >= N >= min(M,K); if VECT = 'P', N >= M >= min(N,K). K (input) INTEGER If VECT = 'Q', the number of columns in the original M-by-K matrix reduced by ZGEBRD. If VECT = 'P', the number of rows in the original K-by-N matrix reduced by ZGEBRD. K >= 0. A (input/output) COMPLEX*16 array, dimension (LDA,N) On entry, the vectors which define the elementary reflectors, as returned by ZGEBRD. On exit, the M-by-N matrix Q or P**H. LDA (input) INTEGER The leading dimension of the array A. LDA >= M. TAU (input) COMPLEX*16 array, dimension (min(M,K)) if VECT = 'Q' (min(N,K)) if VECT = 'P' TAU(i) must contain the scalar factor of the elementary reflector H(i) or G(i), which determines Q or P**H, as returned by ZGEBRD in its array argument TAUQ or TAUP. WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,min(M,N)). For optimum performance LWORK >= min(M,N)*NB, where NB is the optimal blocksize. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ static integer i, j; extern logical lsame_(char *, char *); static integer iinfo; static logical wantq; extern /* Subroutine */ int xerbla_(char *, integer *), zunglq_( integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *), zungqr_( integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *); #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; wantq = lsame_(vect, "Q"); if (! wantq && ! lsame_(vect, "P")) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( *m > *n || *m < min(*n,*k))) { *info = -3; } else if (*k < 0) { *info = -4; } else if (*lda < max(1,*m)) { *info = -6; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = min(*m,*n); if (*lwork < max(i__1,i__2)) { *info = -9; } } if (*info != 0) { i__1 = -(*info); xerbla_("ZUNGBR", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { WORK(1).r = 1., WORK(1).i = 0.; return 0; } if (wantq) { /* Form Q, determined by a call to ZGEBRD to reduce an m-by-k matrix */ if (*m >= *k) { /* If m >= k, assume m >= n >= k */ zungqr_(m, n, k, &A(1,1), lda, &TAU(1), &WORK(1), lwork, & iinfo); } else { /* If m < k, assume m = n Shift the vectors which define the elementary reflect ors one column to the right, and set the first row and column of Q to those of the unit matrix */ for (j = *m; j >= 2; --j) { i__1 = j * a_dim1 + 1; A(1,j).r = 0., A(1,j).i = 0.; i__1 = *m; for (i = j + 1; i <= *m; ++i) { i__2 = i + j * a_dim1; i__3 = i + (j - 1) * a_dim1; A(i,j).r = A(i,j-1).r, A(i,j).i = A(i,j-1).i; /* L10: */ } /* L20: */ } i__1 = a_dim1 + 1; A(1,1).r = 1., A(1,1).i = 0.; i__1 = *m; for (i = 2; i <= *m; ++i) { i__2 = i + a_dim1; A(i,1).r = 0., A(i,1).i = 0.; /* L30: */ } if (*m > 1) { /* Form Q(2:m,2:m) */ i__1 = *m - 1; i__2 = *m - 1; i__3 = *m - 1; zungqr_(&i__1, &i__2, &i__3, &A(2,2), lda, &TAU( 1), &WORK(1), lwork, &iinfo); } } } else { /* Form P', determined by a call to ZGEBRD to reduce a k-by-n matrix */ if (*k < *n) { /* If k < n, assume k <= m <= n */ zunglq_(m, n, k, &A(1,1), lda, &TAU(1), &WORK(1), lwork, & iinfo); } else { /* If k >= n, assume m = n Shift the vectors which define the elementary reflect ors one row downward, and set the first row and column of P' to those of the unit matrix */ i__1 = a_dim1 + 1; A(1,1).r = 1., A(1,1).i = 0.; i__1 = *n; for (i = 2; i <= *n; ++i) { i__2 = i + a_dim1; A(i,1).r = 0., A(i,1).i = 0.; /* L40: */ } i__1 = *n; for (j = 2; j <= *n; ++j) { for (i = j - 1; i >= 2; --i) { i__2 = i + j * a_dim1; i__3 = i - 1 + j * a_dim1; A(i,j).r = A(i-1,j).r, A(i,j).i = A(i-1,j).i; /* L50: */ } i__2 = j * a_dim1 + 1; A(1,j).r = 0., A(1,j).i = 0.; /* L60: */ } if (*n > 1) { /* Form P'(2:n,2:n) */ i__1 = *n - 1; i__2 = *n - 1; i__3 = *n - 1; zunglq_(&i__1, &i__2, &i__3, &A(2,2), lda, &TAU( 1), &WORK(1), lwork, &iinfo); } } } return 0; /* End of ZUNGBR */ } /* zungbr_ */
/* Subroutine */ int zunghr_(integer *n, integer *ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZUNGHR generates a complex unitary matrix Q which is defined as the product of IHI-ILO elementary reflectors of order N, as returned by ZGEHRD: Q = H(ilo) H(ilo+1) . . . H(ihi-1). Arguments ========= N (input) INTEGER The order of the matrix Q. N >= 0. ILO (input) INTEGER IHI (input) INTEGER ILO and IHI must have the same values as in the previous call of ZGEHRD. Q is equal to the unit matrix except in the submatrix Q(ilo+1:ihi,ilo+1:ihi). 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. A (input/output) COMPLEX*16 array, dimension (LDA,N) On entry, the vectors which define the elementary reflectors, as returned by ZGEHRD. On exit, the N-by-N unitary matrix Q. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). TAU (input) COMPLEX*16 array, dimension (N-1) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by ZGEHRD. WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= IHI-ILO. For optimum performance LWORK >= (IHI-ILO)*NB, where NB is the optimal blocksize. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ static integer i, j, iinfo, nh; extern /* Subroutine */ int xerbla_(char *, integer *), zungqr_( integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *); #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; if (*n < 0) { *info = -1; } else if (*ilo < 1 || *ilo > max(1,*n)) { *info = -2; } else if (*ihi < min(*ilo,*n) || *ihi > *n) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = *ihi - *ilo; if (*lwork < max(i__1,i__2)) { *info = -8; } } if (*info != 0) { i__1 = -(*info); xerbla_("ZUNGHR", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { WORK(1).r = 1., WORK(1).i = 0.; return 0; } /* Shift the vectors which define the elementary reflectors one column to the right, and set the first ilo and the last n-ihi rows and columns to those of the unit matrix */ i__1 = *ilo + 1; for (j = *ihi; j >= *ilo+1; --j) { i__2 = j - 1; for (i = 1; i <= j-1; ++i) { i__3 = i + j * a_dim1; A(i,j).r = 0., A(i,j).i = 0.; /* L10: */ } i__2 = *ihi; for (i = j + 1; i <= *ihi; ++i) { i__3 = i + j * a_dim1; i__4 = i + (j - 1) * a_dim1; A(i,j).r = A(i,j-1).r, A(i,j).i = A(i,j-1).i; /* L20: */ } i__2 = *n; for (i = *ihi + 1; i <= *n; ++i) { i__3 = i + j * a_dim1; A(i,j).r = 0., A(i,j).i = 0.; /* L30: */ } /* L40: */ } i__1 = *ilo; for (j = 1; j <= *ilo; ++j) { i__2 = *n; for (i = 1; i <= *n; ++i) { i__3 = i + j * a_dim1; A(i,j).r = 0., A(i,j).i = 0.; /* L50: */ } i__2 = j + j * a_dim1; A(j,j).r = 1., A(j,j).i = 0.; /* L60: */ } i__1 = *n; for (j = *ihi + 1; j <= *n; ++j) { i__2 = *n; for (i = 1; i <= *n; ++i) { i__3 = i + j * a_dim1; A(i,j).r = 0., A(i,j).i = 0.; /* L70: */ } i__2 = j + j * a_dim1; A(j,j).r = 1., A(j,j).i = 0.; /* L80: */ } nh = *ihi - *ilo; if (nh > 0) { /* Generate Q(ilo+1:ihi,ilo+1:ihi) */ zungqr_(&nh, &nh, &nh, &A(*ilo+1,*ilo+1), lda, &TAU(* ilo), &WORK(1), lwork, &iinfo); } return 0; /* End of ZUNGHR */ } /* zunghr_ */
/* Subroutine */ int sormtr_(char *side, char *uplo, char *trans, int *m, int *n, real *a, int *lda, real *tau, real *c, int *ldc, real *work, int *lwork, int *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SORMTR overwrites the general real M-by-N matrix C with SIDE = 'L' SIDE = 'R' TRANS = 'N': Q * C C * Q TRANS = 'T': Q**T * C C * Q**T where Q is a real orthogonal matrix of order nq, with nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of nq-1 elementary reflectors, as returned by SSYTRD: if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1); if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1). Arguments ========= SIDE (input) CHARACTER*1 = 'L': apply Q or Q**T from the Left; = 'R': apply Q or Q**T from the Right. UPLO (input) CHARACTER*1 = 'U': Upper triangle of A contains elementary reflectors from SSYTRD; = 'L': Lower triangle of A contains elementary reflectors from SSYTRD. TRANS (input) CHARACTER*1 = 'N': No transpose, apply Q; = 'T': Transpose, apply Q**T. M (input) INTEGER The number of rows of the matrix C. M >= 0. N (input) INTEGER The number of columns of the matrix C. N >= 0. A (input) REAL array, dimension (LDA,M) if SIDE = 'L' (LDA,N) if SIDE = 'R' The vectors which define the elementary reflectors, as returned by SSYTRD. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'. TAU (input) REAL array, dimension (M-1) if SIDE = 'L' (N-1) if SIDE = 'R' TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by SSYTRD. C (input/output) REAL array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,M). WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. If SIDE = 'L', LWORK >= max(1,N); if SIDE = 'R', LWORK >= max(1,M). For optimum performance LWORK >= N*NB if SIDE = 'L', and LWORK >= M*NB if SIDE = 'R', where NB is the optimal blocksize. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* System generated locals */ /* Unused variables commented out by MDG on 03-09-05 int a_dim1, a_offset, c_dim1, c_offset; */ int i__1; /* Local variables */ static logical left; extern logical lsame_(char *, char *); static int iinfo, i1; static logical upper; static int i2, mi, ni, nq, nw; extern /* Subroutine */ int xerbla_(char *, int *), sormql_( char *, char *, int *, int *, int *, real *, int * , real *, real *, int *, real *, int *, int *), sormqr_(char *, char *, int *, int *, int *, real *, int *, real *, real *, int *, real *, int *, int *); #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define C(I,J) c[(I)-1 + ((J)-1)* ( *ldc)] *info = 0; left = lsame_(side, "L"); upper = lsame_(uplo, "U"); /* NQ is the order of Q and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = *n; } else { nq = *n; nw = *m; } if (! left && ! lsame_(side, "R")) { *info = -1; } else if (! upper && ! lsame_(uplo, "L")) { *info = -2; } else if (! lsame_(trans, "N") && ! lsame_(trans, "T")) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*lda < max(1,nq)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } else if (*lwork < max(1,nw)) { *info = -12; } if (*info != 0) { i__1 = -(*info); xerbla_("SORMTR", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || nq == 1) { WORK(1) = 1.f; return 0; } if (left) { mi = *m - 1; ni = *n; } else { mi = *m; ni = *n - 1; } if (upper) { /* Q was determined by a call to SSYTRD with UPLO = 'U' */ i__1 = nq - 1; sormql_(side, trans, &mi, &ni, &i__1, &A(1,2), lda, & TAU(1), &C(1,1), ldc, &WORK(1), lwork, &iinfo); } else { /* Q was determined by a call to SSYTRD with UPLO = 'L' */ if (left) { i1 = 2; i2 = 1; } else { i1 = 1; i2 = 2; } i__1 = nq - 1; sormqr_(side, trans, &mi, &ni, &i__1, &A(2,1), lda, &TAU(1), & C(i1,i2), ldc, &WORK(1), lwork, &iinfo); } return 0; /* End of SORMTR */ } /* sormtr_ */
/* Subroutine */ int cungrq_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer * info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CUNGRQ generates an M-by-N complex matrix Q with orthonormal rows, which is defined as the last M rows of a product of K elementary reflectors of order N Q = H(1)' H(2)' . . . H(k)' as returned by CGERQF. Arguments ========= M (input) INTEGER The number of rows of the matrix Q. M >= 0. N (input) INTEGER The number of columns of the matrix Q. N >= M. K (input) INTEGER The number of elementary reflectors whose product defines the matrix Q. M >= K >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the (m-k+i)-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CGERQF in the last k rows of its array argument A. On exit, the M-by-N matrix Q. LDA (input) INTEGER The first dimension of the array A. LDA >= max(1,M). TAU (input) COMPLEX array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CGERQF. WORK (workspace/output) COMPLEX array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,M). For optimum performance LWORK >= M*NB, where NB is the optimal blocksize. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument has an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; /* Local variables */ static integer i, j, l, nbmin, iinfo; extern /* Subroutine */ int cungr2_(integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *); static integer ib, nb, ii, kk; extern /* Subroutine */ int clarfb_(char *, char *, char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *); static integer nx; extern /* Subroutine */ int clarft_(char *, char *, integer *, integer *, complex *, integer *, complex *, complex *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static integer ldwork, iws; #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; if (*m < 0) { *info = -1; } else if (*n < *m) { *info = -2; } else if (*k < 0 || *k > *m) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*lwork < max(1,*m)) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("CUNGRQ", &i__1); return 0; } /* Quick return if possible */ if (*m <= 0) { WORK(1).r = 1.f, WORK(1).i = 0.f; return 0; } /* Determine the block size. */ nb = ilaenv_(&c__1, "CUNGRQ", " ", m, n, k, &c_n1, 6L, 1L); nbmin = 2; nx = 0; iws = *m; if (nb > 1 && nb < *k) { /* Determine when to cross over from blocked to unblocked code. Computing MAX */ i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGRQ", " ", m, n, k, &c_n1, 6L, 1L) ; nx = max(i__1,i__2); if (nx < *k) { /* Determine if workspace is large enough for blocked co de. */ ldwork = *m; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduc e NB and determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGRQ", " ", m, n, k, &c_n1, 6L, 1L); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < *k && nx < *k) { /* Use blocked code after the first block. The last kk rows are handled by the block method. Computing MIN */ i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb; kk = min(i__1,i__2); /* Set A(1:m-kk,n-kk+1:n) to zero. */ i__1 = *n; for (j = *n - kk + 1; j <= *n; ++j) { i__2 = *m - kk; for (i = 1; i <= *m-kk; ++i) { i__3 = i + j * a_dim1; A(i,j).r = 0.f, A(i,j).i = 0.f; /* L10: */ } /* L20: */ } } else { kk = 0; } /* Use unblocked code for the first or only block. */ i__1 = *m - kk; i__2 = *n - kk; i__3 = *k - kk; cungr2_(&i__1, &i__2, &i__3, &A(1,1), lda, &TAU(1), &WORK(1), &iinfo) ; if (kk > 0) { /* Use blocked code */ i__1 = *k; i__2 = nb; for (i = *k - kk + 1; nb < 0 ? i >= *k : i <= *k; i += nb) { /* Computing MIN */ i__3 = nb, i__4 = *k - i + 1; ib = min(i__3,i__4); ii = *m - *k + i; if (ii > 1) { /* Form the triangular factor of the block reflec tor H = H(i+ib-1) . . . H(i+1) H(i) */ i__3 = *n - *k + i + ib - 1; clarft_("Backward", "Rowwise", &i__3, &ib, &A(ii,1), lda, &TAU(i), &WORK(1), &ldwork); /* Apply H' to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */ i__3 = ii - 1; i__4 = *n - *k + i + ib - 1; clarfb_("Right", "Conjugate transpose", "Backward", "Rowwise", &i__3, &i__4, &ib, &A(ii,1), lda, &WORK(1), & ldwork, &A(1,1), lda, &WORK(ib + 1), &ldwork); } /* Apply H' to columns 1:n-k+i+ib-1 of current block */ i__3 = *n - *k + i + ib - 1; cungr2_(&ib, &i__3, &ib, &A(ii,1), lda, &TAU(i), &WORK(1), &iinfo); /* Set columns n-k+i+ib:n of current block to zero */ i__3 = *n; for (l = *n - *k + i + ib; l <= *n; ++l) { i__4 = ii + ib - 1; for (j = ii; j <= ii+ib-1; ++j) { i__5 = j + l * a_dim1; A(j,l).r = 0.f, A(j,l).i = 0.f; /* L30: */ } /* L40: */ } /* L50: */ } } WORK(1).r = (real) iws, WORK(1).i = 0.f; return 0; /* End of CUNGRQ */ } /* cungrq_ */
/* Subroutine */ int cggsvp_(char *jobu, char *jobv, char *jobq, integer *m, integer *p, integer *n, complex *a, integer *lda, complex *b, integer *ldb, real *tola, real *tolb, integer *k, integer *l, complex *u, integer *ldu, complex *v, integer *ldv, complex *q, integer *ldq, integer *iwork, real *rwork, complex *tau, complex *work, integer * info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CGGSVP computes unitary matrices U, V and Q such that N-K-L K L U'*A*Q = K ( 0 A12 A13 ) if M-K-L >= 0; L ( 0 0 A23 ) M-K-L ( 0 0 0 ) N-K-L K L = K ( 0 A12 A13 ) if M-K-L < 0; M-K ( 0 0 A23 ) N-K-L K L V'*B*Q = L ( 0 0 B13 ) P-L ( 0 0 0 ) where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0, otherwise A23 is (M-K)-by-L upper trapezoidal. K+L = the effective numerical rank of the (M+P)-by-N matrix (A',B')'. Z' denotes the conjugate transpose of Z. This decomposition is the preprocessing step for computing the Generalized Singular Value Decomposition (GSVD), see subroutine CGGSVD. Arguments ========= JOBU (input) CHARACTER*1 = 'U': Unitary matrix U is computed; = 'N': U is not computed. JOBV (input) CHARACTER*1 = 'V': Unitary matrix V is computed; = 'N': V is not computed. JOBQ (input) CHARACTER*1 = 'Q': Unitary matrix Q is computed; = 'N': Q is not computed. M (input) INTEGER The number of rows of the matrix A. M >= 0. P (input) INTEGER The number of rows of the matrix B. P >= 0. N (input) INTEGER The number of columns of the matrices A and B. N >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, A contains the triangular (or trapezoidal) matrix described in the Purpose section. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). B (input/output) COMPLEX array, dimension (LDB,N) On entry, the P-by-N matrix B. On exit, B contains the triangular matrix described in the Purpose section. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,P). TOLA (input) REAL TOLB (input) REAL TOLA and TOLB are the thresholds to determine the effective numerical rank of matrix B and a subblock of A. Generally, they are set to TOLA = MAX(M,N)*norm(A)*MACHEPS, TOLB = MAX(P,N)*norm(B)*MACHEPS. The size of TOLA and TOLB may affect the size of backward errors of the decomposition. K (output) INTEGER L (output) INTEGER On exit, K and L specify the dimension of the subblocks described in Purpose section. K + L = effective numerical rank of (A',B')'. U (output) COMPLEX array, dimension (LDU,M) If JOBU = 'U', U contains the unitary matrix U. If JOBU = 'N', U is not referenced. LDU (input) INTEGER The leading dimension of the array U. LDU >= max(1,M) if JOBU = 'U'; LDU >= 1 otherwise. V (output) COMPLEX array, dimension (LDV,M) If JOBV = 'V', V contains the unitary matrix V. If JOBV = 'N', V is not referenced. LDV (input) INTEGER The leading dimension of the array V. LDV >= max(1,P) if JOBV = 'V'; LDV >= 1 otherwise. Q (output) COMPLEX array, dimension (LDQ,N) If JOBQ = 'Q', Q contains the unitary matrix Q. If JOBQ = 'N', Q is not referenced. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max(1,N) if JOBQ = 'Q'; LDQ >= 1 otherwise. IWORK (workspace) INTEGER array, dimension (N) RWORK (workspace) REAL array, dimension (2*N) TAU (workspace) COMPLEX array, dimension (N) WORK (workspace) COMPLEX array, dimension (max(3*N,M,P)) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== The subroutine uses LAPACK subroutine CGEQPF for the QR factorization with column pivoting to detect the effective numerical rank of the a matrix. It may be replaced by a better rank determination strategy. ===================================================================== Test the input parameters Parameter adjustments Function Body */ /* Table of constant values */ static complex c_b1 = {0.f,0.f}; static complex c_b2 = {1.f,0.f}; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, q_dim1, q_offset, u_dim1, u_offset, v_dim1, v_offset, i__1, i__2, i__3; real r__1, r__2; /* Builtin functions */ double r_imag(complex *); /* Local variables */ static integer i, j; extern logical lsame_(char *, char *); static logical wantq, wantu, wantv; extern /* Subroutine */ int cgeqr2_(integer *, integer *, complex *, integer *, complex *, complex *, integer *), cgerq2_(integer *, integer *, complex *, integer *, complex *, complex *, integer *), cung2r_(integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *), cunm2r_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, integer *), cunmr2_(char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *, integer *), cgeqpf_(integer *, integer *, complex *, integer *, integer *, complex *, complex *, real *, integer *), clacpy_(char *, integer *, integer *, complex *, integer *, complex *, integer *), claset_(char *, integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *), clapmt_(logical *, integer *, integer *, complex *, integer *, integer *); static logical forwrd; #define IWORK(I) iwork[(I)-1] #define RWORK(I) rwork[(I)-1] #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] #define U(I,J) u[(I)-1 + ((J)-1)* ( *ldu)] #define V(I,J) v[(I)-1 + ((J)-1)* ( *ldv)] #define Q(I,J) q[(I)-1 + ((J)-1)* ( *ldq)] wantu = lsame_(jobu, "U"); wantv = lsame_(jobv, "V"); wantq = lsame_(jobq, "Q"); forwrd = TRUE_; *info = 0; if (! (wantu || lsame_(jobu, "N"))) { *info = -1; } else if (! (wantv || lsame_(jobv, "N"))) { *info = -2; } else if (! (wantq || lsame_(jobq, "N"))) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*p < 0) { *info = -5; } else if (*n < 0) { *info = -6; } else if (*lda < max(1,*m)) { *info = -8; } else if (*ldb < max(1,*p)) { *info = -10; } else if (*ldu < 1 || wantu && *ldu < *m) { *info = -16; } else if (*ldv < 1 || wantv && *ldv < *p) { *info = -18; } else if (*ldq < 1 || wantq && *ldq < *n) { *info = -20; } if (*info != 0) { i__1 = -(*info); xerbla_("CGGSVP", &i__1); return 0; } /* QR with column pivoting of B: B*P = V*( S11 S12 ) ( 0 0 ) */ i__1 = *n; for (i = 1; i <= *n; ++i) { IWORK(i) = 0; /* L10: */ } cgeqpf_(p, n, &B(1,1), ldb, &IWORK(1), &TAU(1), &WORK(1), &RWORK(1), info); /* Update A := A*P */ clapmt_(&forwrd, m, n, &A(1,1), lda, &IWORK(1)); /* Determine the effective rank of matrix B. */ *l = 0; i__1 = min(*p,*n); for (i = 1; i <= min(*p,*n); ++i) { i__2 = i + i * b_dim1; if ((r__1 = B(i,i).r, dabs(r__1)) + (r__2 = r_imag(&B(i,i) ), dabs(r__2)) > *tolb) { ++(*l); } /* L20: */ } if (wantv) { /* Copy the details of V, and form V. */ claset_("Full", p, p, &c_b1, &c_b1, &V(1,1), ldv); if (*p > 1) { i__1 = *p - 1; clacpy_("Lower", &i__1, n, &B(2,1), ldb, &V(2,1), ldv); } i__1 = min(*p,*n); cung2r_(p, p, &i__1, &V(1,1), ldv, &TAU(1), &WORK(1), info); } /* Clean up B */ i__1 = *l - 1; for (j = 1; j <= *l-1; ++j) { i__2 = *l; for (i = j + 1; i <= *l; ++i) { i__3 = i + j * b_dim1; B(i,j).r = 0.f, B(i,j).i = 0.f; /* L30: */ } /* L40: */ } if (*p > *l) { i__1 = *p - *l; claset_("Full", &i__1, n, &c_b1, &c_b1, &B(*l+1,1), ldb); } if (wantq) { /* Set Q = I and Update Q := Q*P */ claset_("Full", n, n, &c_b1, &c_b2, &Q(1,1), ldq); clapmt_(&forwrd, n, n, &Q(1,1), ldq, &IWORK(1)); } if (*p >= *l && *n != *l) { /* RQ factorization of ( S11 S12 ) = ( 0 S12 )*Z */ cgerq2_(l, n, &B(1,1), ldb, &TAU(1), &WORK(1), info); /* Update A := A*Z' */ cunmr2_("Right", "Conjugate transpose", m, n, l, &B(1,1), ldb, & TAU(1), &A(1,1), lda, &WORK(1), info); if (wantq) { /* Update Q := Q*Z' */ cunmr2_("Right", "Conjugate transpose", n, n, l, &B(1,1), ldb, &TAU(1), &Q(1,1), ldq, &WORK(1), info); } /* Clean up B */ i__1 = *n - *l; claset_("Full", l, &i__1, &c_b1, &c_b1, &B(1,1), ldb); i__1 = *n; for (j = *n - *l + 1; j <= *n; ++j) { i__2 = *l; for (i = j - *n + *l + 1; i <= *l; ++i) { i__3 = i + j * b_dim1; B(i,j).r = 0.f, B(i,j).i = 0.f; /* L50: */ } /* L60: */ } } /* Let N-L L A = ( A11 A12 ) M, then the following does the complete QR decomposition of A11: A11 = U*( 0 T12 )*P1' ( 0 0 ) */ i__1 = *n - *l; for (i = 1; i <= *n-*l; ++i) { IWORK(i) = 0; /* L70: */ } i__1 = *n - *l; cgeqpf_(m, &i__1, &A(1,1), lda, &IWORK(1), &TAU(1), &WORK(1), &RWORK( 1), info); /* Determine the effective rank of A11 */ *k = 0; /* Computing MIN */ i__2 = *m, i__3 = *n - *l; i__1 = min(i__2,i__3); for (i = 1; i <= min(*m,*n-*l); ++i) { i__2 = i + i * a_dim1; if ((r__1 = A(i,i).r, dabs(r__1)) + (r__2 = r_imag(&A(i,i) ), dabs(r__2)) > *tola) { ++(*k); } /* L80: */ } /* Update A12 := U'*A12, where A12 = A( 1:M, N-L+1:N ) Computing MIN */ i__2 = *m, i__3 = *n - *l; i__1 = min(i__2,i__3); cunm2r_("Left", "Conjugate transpose", m, l, &i__1, &A(1,1), lda, & TAU(1), &A(1,*n-*l+1), lda, &WORK(1), info); if (wantu) { /* Copy the details of U, and form U */ claset_("Full", m, m, &c_b1, &c_b1, &U(1,1), ldu); if (*m > 1) { i__1 = *m - 1; i__2 = *n - *l; clacpy_("Lower", &i__1, &i__2, &A(2,1), lda, &U(2,1) , ldu); } /* Computing MIN */ i__2 = *m, i__3 = *n - *l; i__1 = min(i__2,i__3); cung2r_(m, m, &i__1, &U(1,1), ldu, &TAU(1), &WORK(1), info); } if (wantq) { /* Update Q( 1:N, 1:N-L ) = Q( 1:N, 1:N-L )*P1 */ i__1 = *n - *l; clapmt_(&forwrd, n, &i__1, &Q(1,1), ldq, &IWORK(1)); } /* Clean up A: set the strictly lower triangular part of A(1:K, 1:K) = 0, and A( K+1:M, 1:N-L ) = 0. */ i__1 = *k - 1; for (j = 1; j <= *k-1; ++j) { i__2 = *k; for (i = j + 1; i <= *k; ++i) { i__3 = i + j * a_dim1; A(i,j).r = 0.f, A(i,j).i = 0.f; /* L90: */ } /* L100: */ } if (*m > *k) { i__1 = *m - *k; i__2 = *n - *l; claset_("Full", &i__1, &i__2, &c_b1, &c_b1, &A(*k+1,1), lda); } if (*n - *l > *k) { /* RQ factorization of ( T11 T12 ) = ( 0 T12 )*Z1 */ i__1 = *n - *l; cgerq2_(k, &i__1, &A(1,1), lda, &TAU(1), &WORK(1), info); if (wantq) { /* Update Q( 1:N,1:N-L ) = Q( 1:N,1:N-L )*Z1' */ i__1 = *n - *l; cunmr2_("Right", "Conjugate transpose", n, &i__1, k, &A(1,1), lda, &TAU(1), &Q(1,1), ldq, &WORK(1), info) ; } /* Clean up A */ i__1 = *n - *l - *k; claset_("Full", k, &i__1, &c_b1, &c_b1, &A(1,1), lda); i__1 = *n - *l; for (j = *n - *l - *k + 1; j <= *n-*l; ++j) { i__2 = *k; for (i = j - *n + *l + *k + 1; i <= *k; ++i) { i__3 = i + j * a_dim1; A(i,j).r = 0.f, A(i,j).i = 0.f; /* L110: */ } /* L120: */ } } if (*m > *k) { /* QR factorization of A( K+1:M,N-L+1:N ) */ i__1 = *m - *k; cgeqr2_(&i__1, l, &A(*k+1,*n-*l+1), lda, &TAU(1), & WORK(1), info); if (wantu) { /* Update U(:,K+1:M) := U(:,K+1:M)*U1 */ i__1 = *m - *k; /* Computing MIN */ i__3 = *m - *k; i__2 = min(i__3,*l); cunm2r_("Right", "No transpose", m, &i__1, &i__2, &A(*k+1,*n-*l+1), lda, &TAU(1), &U(1,*k+1), ldu, &WORK(1), info); } /* Clean up */ i__1 = *n; for (j = *n - *l + 1; j <= *n; ++j) { i__2 = *m; for (i = j - *n + *k + *l + 1; i <= *m; ++i) { i__3 = i + j * a_dim1; A(i,j).r = 0.f, A(i,j).i = 0.f; /* L130: */ } /* L140: */ } } return 0; /* End of CGGSVP */ } /* cggsvp_ */
/* Subroutine */ int sorgtr_(char *uplo, integer *n, real *a, integer *lda, real *tau, real *work, integer *lwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SORGTR generates a real orthogonal matrix Q which is defined as the product of n-1 elementary reflectors of order N, as returned by SSYTRD: if UPLO = 'U', Q = H(n-1) . . . H(2) H(1), if UPLO = 'L', Q = H(1) H(2) . . . H(n-1). Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A contains elementary reflectors from SSYTRD; = 'L': Lower triangle of A contains elementary reflectors from SSYTRD. N (input) INTEGER The order of the matrix Q. N >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the vectors which define the elementary reflectors, as returned by SSYTRD. On exit, the N-by-N orthogonal matrix Q. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). TAU (input) REAL array, dimension (N-1) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by SSYTRD. WORK (workspace/output) REAL array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,N-1). For optimum performance LWORK >= (N-1)*NB, where NB is the optimal blocksize. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ static integer i, j; extern logical lsame_(char *, char *); static integer iinfo; static logical upper; extern /* Subroutine */ int xerbla_(char *, integer *), sorgql_( integer *, integer *, integer *, real *, integer *, real *, real * , integer *, integer *), sorgqr_(integer *, integer *, integer *, real *, integer *, real *, real *, integer *, integer *); #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = *n - 1; if (*lwork < max(i__1,i__2)) { *info = -7; } } if (*info != 0) { i__1 = -(*info); xerbla_("SORGTR", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { WORK(1) = 1.f; return 0; } if (upper) { /* Q was determined by a call to SSYTRD with UPLO = 'U' Shift the vectors which define the elementary reflectors one column to the left, and set the last row and column of Q to those of the unit matrix */ i__1 = *n - 1; for (j = 1; j <= *n-1; ++j) { i__2 = j - 1; for (i = 1; i <= j-1; ++i) { A(i,j) = A(i,j+1); /* L10: */ } A(*n,j) = 0.f; /* L20: */ } i__1 = *n - 1; for (i = 1; i <= *n-1; ++i) { A(i,*n) = 0.f; /* L30: */ } A(*n,*n) = 1.f; /* Generate Q(1:n-1,1:n-1) */ i__1 = *n - 1; i__2 = *n - 1; i__3 = *n - 1; sorgql_(&i__1, &i__2, &i__3, &A(1,1), lda, &TAU(1), &WORK(1), lwork, &iinfo); } else { /* Q was determined by a call to SSYTRD with UPLO = 'L'. Shift the vectors which define the elementary reflectors one column to the right, and set the first row and column of Q t o those of the unit matrix */ for (j = *n; j >= 2; --j) { A(1,j) = 0.f; i__1 = *n; for (i = j + 1; i <= *n; ++i) { A(i,j) = A(i,j-1); /* L40: */ } /* L50: */ } A(1,1) = 1.f; i__1 = *n; for (i = 2; i <= *n; ++i) { A(i,1) = 0.f; /* L60: */ } if (*n > 1) { /* Generate Q(2:n,2:n) */ i__1 = *n - 1; i__2 = *n - 1; i__3 = *n - 1; sorgqr_(&i__1, &i__2, &i__3, &A(2,2), lda, &TAU(1), &WORK(1), lwork, &iinfo); } } return 0; /* End of SORGTR */ } /* sorgtr_ */
/* Subroutine */ int dgehd2_(integer *n, integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *tau, doublereal *work, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= DGEHD2 reduces a real general matrix A to upper Hessenberg form H by an orthogonal similarity transformation: Q' * A * Q = H . Arguments ========= N (input) INTEGER The order of the matrix A. N >= 0. ILO (input) INTEGER IHI (input) INTEGER It is assumed that A is already upper triangular in rows and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally set by a previous call to DGEBAL; otherwise they should be set to 1 and N respectively. See Further Details. 1 <= ILO <= IHI <= max(1,N). A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the n by n general matrix to be reduced. On exit, the upper triangle and the first subdiagonal of A are overwritten with the upper Hessenberg matrix H, and the elements below the first subdiagonal, with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors. See Further Details. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). TAU (output) DOUBLE PRECISION array, dimension (N-1) The scalar factors of the elementary reflectors (see Further Details). WORK (workspace) DOUBLE PRECISION array, dimension (N) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== The matrix Q is represented as a product of (ihi-ilo) elementary reflectors Q = H(ilo) H(ilo+1) . . . H(ihi-1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on exit in A(i+2:ihi,i), and tau in TAU(i). The contents of A are illustrated by the following example, with n = 7, ilo = 2 and ihi = 6: on entry, on exit, ( a a a a a a a ) ( a a h h h h a ) ( a a a a a a ) ( a h h h h a ) ( a a a a a a ) ( h h h h h h ) ( a a a a a a ) ( v2 h h h h h ) ( a a a a a a ) ( v2 v3 h h h h ) ( a a a a a a ) ( v2 v3 v4 h h h ) ( a ) ( a ) where a denotes an element of the original matrix A, h denotes a modified element of the upper Hessenberg matrix H, and vi denotes an element of the vector defining H(i). ===================================================================== Test the input parameters Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ static integer i; extern /* Subroutine */ int dlarf_(char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *), dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *); static doublereal aii; #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; if (*n < 0) { *info = -1; } else if (*ilo < 1 || *ilo > max(1,*n)) { *info = -2; } else if (*ihi < min(*ilo,*n) || *ihi > *n) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } if (*info != 0) { i__1 = -(*info); xerbla_("DGEHD2", &i__1); return 0; } i__1 = *ihi - 1; for (i = *ilo; i <= *ihi-1; ++i) { /* Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */ i__2 = *ihi - i; /* Computing MIN */ i__3 = i + 2; dlarfg_(&i__2, &A(i+1,i), &A(min(i+2,*n),i), &c__1, &TAU(i)); aii = A(i+1,i); A(i+1,i) = 1.; /* Apply H(i) to A(1:ihi,i+1:ihi) from the right */ i__2 = *ihi - i; dlarf_("Right", ihi, &i__2, &A(i+1,i), &c__1, &TAU(i), & A(1,i+1), lda, &WORK(1)); /* Apply H(i) to A(i+1:ihi,i+1:n) from the left */ i__2 = *ihi - i; i__3 = *n - i; dlarf_("Left", &i__2, &i__3, &A(i+1,i), &c__1, &TAU(i), & A(i+1,i+1), lda, &WORK(1)); A(i+1,i) = aii; /* L10: */ } return 0; /* End of DGEHD2 */ } /* dgehd2_ */
/* Subroutine */ int zlahrd_(integer *n, integer *k, integer *nb, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t, integer *ldt, doublecomplex *y, integer *ldy) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZLAHRD reduces the first NB columns of a complex general n-by-(n-k+1) matrix A so that elements below the k-th subdiagonal are zero. The reduction is performed by a unitary similarity transformation Q' * A * Q. The routine returns the matrices V and T which determine Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T. This is an auxiliary routine called by ZGEHRD. Arguments ========= N (input) INTEGER The order of the matrix A. K (input) INTEGER The offset for the reduction. Elements below the k-th subdiagonal in the first NB columns are reduced to zero. NB (input) INTEGER The number of columns to be reduced. A (input/output) COMPLEX*16 array, dimension (LDA,N-K+1) On entry, the n-by-(n-k+1) general matrix A. On exit, the elements on and above the k-th subdiagonal in the first NB columns are overwritten with the corresponding elements of the reduced matrix; the elements below the k-th subdiagonal, with the array TAU, represent the matrix Q as a product of elementary reflectors. The other columns of A are unchanged. See Further Details. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). TAU (output) COMPLEX*16 array, dimension (NB) The scalar factors of the elementary reflectors. See Further Details. T (output) COMPLEX*16 array, dimension (NB,NB) The upper triangular matrix T. LDT (input) INTEGER The leading dimension of the array T. LDT >= NB. Y (output) COMPLEX*16 array, dimension (LDY,NB) The n-by-nb matrix Y. LDY (input) INTEGER The leading dimension of the array Y. LDY >= max(1,N). Further Details =============== The matrix Q is represented as a product of nb elementary reflectors Q = H(1) H(2) . . . H(nb). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in A(i+k+1:n,i), and tau in TAU(i). The elements of the vectors v together form the (n-k+1)-by-nb matrix V which is needed, with T and Y, to apply the transformation to the unreduced part of the matrix, using an update of the form: A := (I - V*T*V') * (A - Y*V'). The contents of A on exit are illustrated by the following example with n = 7, k = 3 and nb = 2: ( a h a a a ) ( a h a a a ) ( a h a a a ) ( h h a a a ) ( v1 h a a a ) ( v1 v2 a a a ) ( v1 v2 a a a ) where a denotes an element of the original matrix A, h denotes a modified element of the upper Hessenberg matrix H, and vi denotes an element of the vector defining H(i). ===================================================================== Quick return if possible Parameter adjustments Function Body */ /* Table of constant values */ static doublecomplex c_b1 = {0.,0.}; static doublecomplex c_b2 = {1.,0.}; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2, i__3; doublecomplex z__1; /* Local variables */ static integer i; extern /* Subroutine */ int zscal_(integer *, doublecomplex *, doublecomplex *, integer *), zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *), ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static doublecomplex ei; extern /* Subroutine */ int zlarfg_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *, integer *); #define TAU(I) tau[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define T(I,J) t[(I)-1 + ((J)-1)* ( *ldt)] #define Y(I,J) y[(I)-1 + ((J)-1)* ( *ldy)] if (*n <= 1) { return 0; } i__1 = *nb; for (i = 1; i <= *nb; ++i) { if (i > 1) { /* Update A(1:n,i) Compute i-th column of A - Y * V' */ i__2 = i - 1; zlacgv_(&i__2, &A(*k+i-1,1), lda); i__2 = i - 1; z__1.r = -1., z__1.i = 0.; zgemv_("No transpose", n, &i__2, &z__1, &Y(1,1), ldy, &A(*k+i-1,1), lda, &c_b2, &A(1,i), &c__1); i__2 = i - 1; zlacgv_(&i__2, &A(*k+i-1,1), lda); /* Apply I - V * T' * V' to this column (call it b) from the left, using the last column of T as workspace Let V = ( V1 ) and b = ( b1 ) (first I-1 rows) ( V2 ) ( b2 ) where V1 is unit lower triangular w := V1' * b1 */ i__2 = i - 1; zcopy_(&i__2, &A(*k+1,i), &c__1, &T(1,*nb) , &c__1); i__2 = i - 1; ztrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &A(*k+1,1), lda, &T(1,*nb), &c__1); /* w := w + V2'*b2 */ i__2 = *n - *k - i + 1; i__3 = i - 1; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &A(*k+i,1), lda, &A(*k+i,i), &c__1, &c_b2, &T(1,*nb), &c__1); /* w := T'*w */ i__2 = i - 1; ztrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &T(1,1), ldt, &T(1,*nb), &c__1); /* b2 := b2 - V2*w */ i__2 = *n - *k - i + 1; i__3 = i - 1; z__1.r = -1., z__1.i = 0.; zgemv_("No transpose", &i__2, &i__3, &z__1, &A(*k+i,1), lda, &T(1,*nb), &c__1, &c_b2, &A(*k+i,i), &c__1); /* b1 := b1 - V1*w */ i__2 = i - 1; ztrmv_("Lower", "No transpose", "Unit", &i__2, &A(*k+1,1) , lda, &T(1,*nb), &c__1); i__2 = i - 1; z__1.r = -1., z__1.i = 0.; zaxpy_(&i__2, &z__1, &T(1,*nb), &c__1, &A(*k+1,i), &c__1); i__2 = *k + i - 1 + (i - 1) * a_dim1; A(*k+i-1,i-1).r = ei.r, A(*k+i-1,i-1).i = ei.i; } /* Generate the elementary reflector H(i) to annihilate A(k+i+1:n,i) */ i__2 = *k + i + i * a_dim1; ei.r = A(*k+i,i).r, ei.i = A(*k+i,i).i; i__2 = *n - *k - i + 1; /* Computing MIN */ i__3 = *k + i + 1; zlarfg_(&i__2, &ei, &A(min(*k+i+1,*n),i), &c__1, &TAU(i)); i__2 = *k + i + i * a_dim1; A(*k+i,i).r = 1., A(*k+i,i).i = 0.; /* Compute Y(1:n,i) */ i__2 = *n - *k - i + 1; zgemv_("No transpose", n, &i__2, &c_b2, &A(1,i+1), lda, &A(*k+i,i), &c__1, &c_b1, &Y(1,i), & c__1); i__2 = *n - *k - i + 1; i__3 = i - 1; zgemv_("Conjugate transpose", &i__2, &i__3, &c_b2, &A(*k+i,1) , lda, &A(*k+i,i), &c__1, &c_b1, &T(1,i), &c__1); i__2 = i - 1; z__1.r = -1., z__1.i = 0.; zgemv_("No transpose", n, &i__2, &z__1, &Y(1,1), ldy, &T(1,i), &c__1, &c_b2, &Y(1,i), &c__1); zscal_(n, &TAU(i), &Y(1,i), &c__1); /* Compute T(1:i,i) */ i__2 = i - 1; i__3 = i; z__1.r = -TAU(i).r, z__1.i = -TAU(i).i; zscal_(&i__2, &z__1, &T(1,i), &c__1); i__2 = i - 1; ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &T(1,1), ldt, &T(1,i), &c__1); i__2 = i + i * t_dim1; i__3 = i; T(i,i).r = TAU(i).r, T(i,i).i = TAU(i).i; /* L10: */ } i__1 = *k + *nb + *nb * a_dim1; A(*k+*nb,*nb).r = ei.r, A(*k+*nb,*nb).i = ei.i; return 0; /* End of ZLAHRD */ } /* zlahrd_ */
/* Subroutine */ int slatrd_(char *uplo, int *n, int *nb, real *a, int *lda, real *e, real *tau, real *w, int *ldw) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= SLATRD reduces NB rows and columns of a real symmetric matrix A to symmetric tridiagonal form by an orthogonal similarity transformation Q' * A * Q, and returns the matrices V and W which are needed to apply the transformation to the unreduced part of A. If UPLO = 'U', SLATRD reduces the last NB rows and columns of a matrix, of which the upper triangle is supplied; if UPLO = 'L', SLATRD reduces the first NB rows and columns of a matrix, of which the lower triangle is supplied. This is an auxiliary routine called by SSYTRD. Arguments ========= UPLO (input) CHARACTER Specifies whether the upper or lower triangular part of the symmetric matrix A is stored: = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the matrix A. NB (input) INTEGER The number of rows and columns to be reduced. A (input/output) REAL array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading n-by-n upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading n-by-n lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit: if UPLO = 'U', the last NB columns have been reduced to tridiagonal form, with the diagonal elements overwriting the diagonal elements of A; the elements above the diagonal with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors; if UPLO = 'L', the first NB columns have been reduced to tridiagonal form, with the diagonal elements overwriting the diagonal elements of A; the elements below the diagonal with the array TAU, represent the orthogonal matrix Q as a product of elementary reflectors. See Further Details. LDA (input) INTEGER The leading dimension of the array A. LDA >= (1,N). E (output) REAL array, dimension (N-1) If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal elements of the last NB columns of the reduced matrix; if UPLO = 'L', E(1:nb) contains the subdiagonal elements of the first NB columns of the reduced matrix. TAU (output) REAL array, dimension (N-1) The scalar factors of the elementary reflectors, stored in TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'. See Further Details. W (output) REAL array, dimension (LDW,NB) The n-by-nb matrix W required to update the unreduced part of A. LDW (input) INTEGER The leading dimension of the array W. LDW >= max(1,N). Further Details =============== If UPLO = 'U', the matrix Q is represented as a product of elementary reflectors Q = H(n) H(n-1) . . . H(n-nb+1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i), and tau in TAU(i-1). If UPLO = 'L', the matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(nb). Each H(i) has the form H(i) = I - tau * v * v' where tau is a real scalar, and v is a real vector with v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i), and tau in TAU(i). The elements of the vectors v together form the n-by-nb matrix V which is needed, with W, to apply the transformation to the unreduced part of the matrix, using a symmetric rank-2k update of the form: A := A - V*W' - W*V'. The contents of A on exit are illustrated by the following examples with n = 5 and nb = 2: if UPLO = 'U': if UPLO = 'L': ( a a a v4 v5 ) ( d ) ( a a v4 v5 ) ( 1 d ) ( a 1 v5 ) ( v1 1 a ) ( d 1 ) ( v1 v2 a a ) ( d ) ( v1 v2 a a a ) where d denotes a diagonal element of the reduced matrix, a denotes an element of the original matrix that is unchanged, and vi denotes an element of the vector defining H(i). ===================================================================== Quick return if possible Parameter adjustments Function Body */ /* Table of constant values */ static real c_b5 = -1.f; static real c_b6 = 1.f; static int c__1 = 1; static real c_b16 = 0.f; /* System generated locals */ /* Unused variables commented out by MDG on 03-09-05 int a_dim1, a_offset, w_dim1, w_offset; */ int i__1, i__2, i__3; /* Local variables */ extern doublereal sdot_(int *, real *, int *, real *, int *); static int i; static real alpha; extern logical lsame_(char *, char *); extern /* Subroutine */ int sscal_(int *, real *, real *, int *), sgemv_(char *, int *, int *, real *, real *, int *, real *, int *, real *, real *, int *), saxpy_( int *, real *, real *, int *, real *, int *), ssymv_( char *, int *, real *, real *, int *, real *, int *, real *, real *, int *); static int iw; extern /* Subroutine */ int slarfg_(int *, real *, real *, int *, real *); #define E(I) e[(I)-1] #define TAU(I) tau[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define W(I,J) w[(I)-1 + ((J)-1)* ( *ldw)] if (*n <= 0) { return 0; } if (lsame_(uplo, "U")) { /* Reduce last NB columns of upper triangle */ i__1 = *n - *nb + 1; for (i = *n; i >= *n-*nb+1; --i) { iw = i - *n + *nb; if (i < *n) { /* Update A(1:i,i) */ i__2 = *n - i; sgemv_("No transpose", &i, &i__2, &c_b5, &A(1,i+1), lda, &W(i,iw+1), ldw, &c_b6, &A(1,i), &c__1); i__2 = *n - i; sgemv_("No transpose", &i, &i__2, &c_b5, &W(1,iw+1), ldw, &A(i,i+1), lda, &c_b6, &A(1,i), &c__1); } if (i > 1) { /* Generate elementary reflector H(i) to annihila te A(1:i-2,i) */ i__2 = i - 1; slarfg_(&i__2, &A(i-1,i), &A(1,i), & c__1, &TAU(i - 1)); E(i - 1) = A(i-1,i); A(i-1,i) = 1.f; /* Compute W(1:i-1,i) */ i__2 = i - 1; ssymv_("Upper", &i__2, &c_b6, &A(1,1), lda, &A(1,i), &c__1, &c_b16, &W(1,iw), & c__1); if (i < *n) { i__2 = i - 1; i__3 = *n - i; sgemv_("Transpose", &i__2, &i__3, &c_b6, &W(1,iw+1), ldw, &A(1,i), &c__1, & c_b16, &W(i+1,iw), &c__1); i__2 = i - 1; i__3 = *n - i; sgemv_("No transpose", &i__2, &i__3, &c_b5, &A(1,i+1), lda, &W(i+1,iw), &c__1, &c_b6, &W(1,iw), &c__1); i__2 = i - 1; i__3 = *n - i; sgemv_("Transpose", &i__2, &i__3, &c_b6, &A(1,i+1), lda, &A(1,i), &c__1, & c_b16, &W(i+1,iw), &c__1); i__2 = i - 1; i__3 = *n - i; sgemv_("No transpose", &i__2, &i__3, &c_b5, &W(1,iw+1), ldw, &W(i+1,iw), &c__1, &c_b6, &W(1,iw), &c__1); } i__2 = i - 1; sscal_(&i__2, &TAU(i - 1), &W(1,iw), &c__1); i__2 = i - 1; alpha = TAU(i - 1) * -.5f * sdot_(&i__2, &W(1,iw), &c__1, &A(1,i), &c__1); i__2 = i - 1; saxpy_(&i__2, &alpha, &A(1,i), &c__1, &W(1,iw), &c__1); } /* L10: */ } } else { /* Reduce first NB columns of lower triangle */ i__1 = *nb; for (i = 1; i <= *nb; ++i) { /* Update A(i:n,i) */ i__2 = *n - i + 1; i__3 = i - 1; sgemv_("No transpose", &i__2, &i__3, &c_b5, &A(i,1), lda, & W(i,1), ldw, &c_b6, &A(i,i), &c__1) ; i__2 = *n - i + 1; i__3 = i - 1; sgemv_("No transpose", &i__2, &i__3, &c_b5, &W(i,1), ldw, & A(i,1), lda, &c_b6, &A(i,i), &c__1) ; if (i < *n) { /* Generate elementary reflector H(i) to annihila te A(i+2:n,i) */ i__2 = *n - i; /* Computing MIN */ i__3 = i + 2; slarfg_(&i__2, &A(i+1,i), &A(min(i+2,*n),i), &c__1, &TAU(i)); E(i) = A(i+1,i); A(i+1,i) = 1.f; /* Compute W(i+1:n,i) */ i__2 = *n - i; ssymv_("Lower", &i__2, &c_b6, &A(i+1,i+1), lda, &A(i+1,i), &c__1, &c_b16, &W(i+1,i), &c__1); i__2 = *n - i; i__3 = i - 1; sgemv_("Transpose", &i__2, &i__3, &c_b6, &W(i+1,1), ldw, &A(i+1,i), &c__1, &c_b16, &W(1,i), &c__1); i__2 = *n - i; i__3 = i - 1; sgemv_("No transpose", &i__2, &i__3, &c_b5, &A(i+1,1) , lda, &W(1,i), &c__1, &c_b6, &W(i+1,i), &c__1); i__2 = *n - i; i__3 = i - 1; sgemv_("Transpose", &i__2, &i__3, &c_b6, &A(i+1,1), lda, &A(i+1,i), &c__1, &c_b16, &W(1,i), &c__1); i__2 = *n - i; i__3 = i - 1; sgemv_("No transpose", &i__2, &i__3, &c_b5, &W(i+1,1) , ldw, &W(1,i), &c__1, &c_b6, &W(i+1,i), &c__1); i__2 = *n - i; sscal_(&i__2, &TAU(i), &W(i+1,i), &c__1); i__2 = *n - i; alpha = TAU(i) * -.5f * sdot_(&i__2, &W(i+1,i), & c__1, &A(i+1,i), &c__1); i__2 = *n - i; saxpy_(&i__2, &alpha, &A(i+1,i), &c__1, &W(i+1,i), &c__1); } /* L20: */ } } return 0; /* End of SLATRD */ } /* slatrd_ */
/* Subroutine */ int cung2l_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CUNG2L generates an m by n complex matrix Q with orthonormal columns, which is defined as the last n columns of a product of k elementary reflectors of order m Q = H(k) . . . H(2) H(1) as returned by CGEQLF. Arguments ========= M (input) INTEGER The number of rows of the matrix Q. M >= 0. N (input) INTEGER The number of columns of the matrix Q. M >= N >= 0. K (input) INTEGER The number of elementary reflectors whose product defines the matrix Q. N >= K >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the (n-k+i)-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CGEQLF in the last k columns of its array argument A. On exit, the m-by-n matrix Q. LDA (input) INTEGER The first dimension of the array A. LDA >= max(1,M). TAU (input) COMPLEX array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CGEQLF. WORK (workspace) COMPLEX array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument has an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; complex q__1; /* Local variables */ static integer i, j, l; extern /* Subroutine */ int cscal_(integer *, complex *, complex *, integer *), clarf_(char *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *); static integer ii; extern /* Subroutine */ int xerbla_(char *, integer *); #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0 || *n > *m) { *info = -2; } else if (*k < 0 || *k > *n) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } if (*info != 0) { i__1 = -(*info); xerbla_("CUNG2L", &i__1); return 0; } /* Quick return if possible */ if (*n <= 0) { return 0; } /* Initialise columns 1:n-k to columns of the unit matrix */ i__1 = *n - *k; for (j = 1; j <= *n-*k; ++j) { i__2 = *m; for (l = 1; l <= *m; ++l) { i__3 = l + j * a_dim1; A(l,j).r = 0.f, A(l,j).i = 0.f; /* L10: */ } i__2 = *m - *n + j + j * a_dim1; A(*m-*n+j,j).r = 1.f, A(*m-*n+j,j).i = 0.f; /* L20: */ } i__1 = *k; for (i = 1; i <= *k; ++i) { ii = *n - *k + i; /* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left */ i__2 = *m - *n + ii + ii * a_dim1; A(*m-*n+ii,ii).r = 1.f, A(*m-*n+ii,ii).i = 0.f; i__2 = *m - *n + ii; i__3 = ii - 1; clarf_("Left", &i__2, &i__3, &A(1,ii), &c__1, &TAU(i), &A(1,1), lda, &WORK(1)); i__2 = *m - *n + ii - 1; i__3 = i; q__1.r = -(doublereal)TAU(i).r, q__1.i = -(doublereal)TAU(i).i; cscal_(&i__2, &q__1, &A(1,ii), &c__1); i__2 = *m - *n + ii + ii * a_dim1; i__3 = i; q__1.r = 1.f - TAU(i).r, q__1.i = 0.f - TAU(i).i; A(*m-*n+ii,ii).r = q__1.r, A(*m-*n+ii,ii).i = q__1.i; /* Set A(m-k+i+1:m,n-k+i) to zero */ i__2 = *m; for (l = *m - *n + ii + 1; l <= *m; ++l) { i__3 = l + ii * a_dim1; A(l,ii).r = 0.f, A(l,ii).i = 0.f; /* L30: */ } /* L40: */ } return 0; /* End of CUNG2L */ } /* cung2l_ */
/* Subroutine */ int zlarft_(char *direct, char *storev, integer *n, integer * k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex * t, integer *ldt) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZLARFT forms the triangular factor T of a complex block reflector H of order n, which is defined as a product of k elementary reflectors. If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular; If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular. If STOREV = 'C', the vector which defines the elementary reflector H(i) is stored in the i-th column of the array V, and H = I - V * T * V' If STOREV = 'R', the vector which defines the elementary reflector H(i) is stored in the i-th row of the array V, and H = I - V' * T * V Arguments ========= DIRECT (input) CHARACTER*1 Specifies the order in which the elementary reflectors are multiplied to form the block reflector: = 'F': H = H(1) H(2) . . . H(k) (Forward) = 'B': H = H(k) . . . H(2) H(1) (Backward) STOREV (input) CHARACTER*1 Specifies how the vectors which define the elementary reflectors are stored (see also Further Details): = 'C': columnwise = 'R': rowwise N (input) INTEGER The order of the block reflector H. N >= 0. K (input) INTEGER The order of the triangular factor T (= the number of elementary reflectors). K >= 1. V (input/output) COMPLEX*16 array, dimension (LDV,K) if STOREV = 'C' (LDV,N) if STOREV = 'R' The matrix V. See further details. LDV (input) INTEGER The leading dimension of the array V. If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K. TAU (input) COMPLEX*16 array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i). T (output) COMPLEX*16 array, dimension (LDT,K) The k by k triangular factor T of the block reflector. If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is lower triangular. The rest of the array is not used. LDT (input) INTEGER The leading dimension of the array T. LDT >= K. Further Details =============== The shape of the matrix V and the storage of the vectors which define the H(i) is best illustrated by the following example with n = 5 and k = 3. The elements equal to 1 are not stored; the corresponding array elements are modified but restored on exit. The rest of the array is not used. DIRECT = 'F' and STOREV = 'C': DIRECT = 'F' and STOREV = 'R': V = ( 1 ) V = ( 1 v1 v1 v1 v1 ) ( v1 1 ) ( 1 v2 v2 v2 ) ( v1 v2 1 ) ( 1 v3 v3 ) ( v1 v2 v3 ) ( v1 v2 v3 ) DIRECT = 'B' and STOREV = 'C': DIRECT = 'B' and STOREV = 'R': V = ( v1 v2 v3 ) V = ( v1 v1 1 ) ( v1 v2 v3 ) ( v2 v2 v2 1 ) ( 1 v2 v3 ) ( v3 v3 v3 v3 1 ) ( 1 v3 ) ( 1 ) ===================================================================== Quick return if possible Parameter adjustments Function Body */ /* Table of constant values */ static doublecomplex c_b2 = {0.,0.}; static integer c__1 = 1; /* System generated locals */ integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4; doublecomplex z__1; /* Local variables */ static integer i, j; extern logical lsame_(char *, char *); extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), ztrmv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zlacgv_(integer *, doublecomplex *, integer *); static doublecomplex vii; #define TAU(I) tau[(I)-1] #define V(I,J) v[(I)-1 + ((J)-1)* ( *ldv)] #define T(I,J) t[(I)-1 + ((J)-1)* ( *ldt)] if (*n == 0) { return 0; } if (lsame_(direct, "F")) { i__1 = *k; for (i = 1; i <= *k; ++i) { i__2 = i; if (TAU(i).r == 0. && TAU(i).i == 0.) { /* H(i) = I */ i__2 = i; for (j = 1; j <= i; ++j) { i__3 = j + i * t_dim1; T(j,i).r = 0., T(j,i).i = 0.; /* L10: */ } } else { /* general case */ i__2 = i + i * v_dim1; vii.r = V(i,i).r, vii.i = V(i,i).i; i__2 = i + i * v_dim1; V(i,i).r = 1., V(i,i).i = 0.; if (lsame_(storev, "C")) { /* T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */ i__2 = *n - i + 1; i__3 = i - 1; i__4 = i; z__1.r = -TAU(i).r, z__1.i = -TAU(i).i; zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &V(i,1), ldv, &V(i,i), &c__1, &c_b2, & T(1,i), &c__1); } else { /* T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */ if (i < *n) { i__2 = *n - i; zlacgv_(&i__2, &V(i,i+1), ldv); } i__2 = i - 1; i__3 = *n - i + 1; i__4 = i; z__1.r = -TAU(i).r, z__1.i = -TAU(i).i; zgemv_("No transpose", &i__2, &i__3, &z__1, &V(1,i), ldv, &V(i,i), ldv, &c_b2, &T(1,i), &c__1); if (i < *n) { i__2 = *n - i; zlacgv_(&i__2, &V(i,i+1), ldv); } } i__2 = i + i * v_dim1; V(i,i).r = vii.r, V(i,i).i = vii.i; /* T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */ i__2 = i - 1; ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &T(1,1), ldt, &T(1,i), &c__1); i__2 = i + i * t_dim1; i__3 = i; T(i,i).r = TAU(i).r, T(i,i).i = TAU(i).i; } /* L20: */ } } else { for (i = *k; i >= 1; --i) { i__1 = i; if (TAU(i).r == 0. && TAU(i).i == 0.) { /* H(i) = I */ i__1 = *k; for (j = i; j <= *k; ++j) { i__2 = j + i * t_dim1; T(j,i).r = 0., T(j,i).i = 0.; /* L30: */ } } else { /* general case */ if (i < *k) { if (lsame_(storev, "C")) { i__1 = *n - *k + i + i * v_dim1; vii.r = V(*n-*k+i,i).r, vii.i = V(*n-*k+i,i).i; i__1 = *n - *k + i + i * v_dim1; V(*n-*k+i,i).r = 1., V(*n-*k+i,i).i = 0.; /* T(i+1:k,i) := - tau(i) * V(1:n-k+i,i+1 :k)' * V(1:n-k+i,i) */ i__1 = *n - *k + i; i__2 = *k - i; i__3 = i; z__1.r = -TAU(i).r, z__1.i = -TAU(i).i; zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &V(1,i+1), ldv, &V(1,i) , &c__1, &c_b2, &T(i+1,i), &c__1); i__1 = *n - *k + i + i * v_dim1; V(*n-*k+i,i).r = vii.r, V(*n-*k+i,i).i = vii.i; } else { i__1 = i + (*n - *k + i) * v_dim1; vii.r = V(i,*n-*k+i).r, vii.i = V(i,*n-*k+i).i; i__1 = i + (*n - *k + i) * v_dim1; V(i,*n-*k+i).r = 1., V(i,*n-*k+i).i = 0.; /* T(i+1:k,i) := - tau(i) * V(i+1:k,1:n-k +i) * V(i,1:n-k+i)' */ i__1 = *n - *k + i - 1; zlacgv_(&i__1, &V(i,1), ldv); i__1 = *k - i; i__2 = *n - *k + i; i__3 = i; z__1.r = -TAU(i).r, z__1.i = -TAU(i).i; zgemv_("No transpose", &i__1, &i__2, &z__1, &V(i+1,1), ldv, &V(i,1), ldv, &c_b2, & T(i+1,i), &c__1); i__1 = *n - *k + i - 1; zlacgv_(&i__1, &V(i,1), ldv); i__1 = i + (*n - *k + i) * v_dim1; V(i,*n-*k+i).r = vii.r, V(i,*n-*k+i).i = vii.i; } /* T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k, i) */ i__1 = *k - i; ztrmv_("Lower", "No transpose", "Non-unit", &i__1, &T(i+1,i+1), ldt, &T(i+1,i) , &c__1); } i__1 = i + i * t_dim1; i__2 = i; T(i,i).r = TAU(i).r, T(i,i).i = TAU(i).i; } /* L40: */ } } return 0; /* End of ZLARFT */ } /* zlarft_ */
/* Subroutine */ int chetrd_(char *uplo, integer *n, complex *a, integer *lda, real *d, real *e, complex *tau, complex *work, integer *lwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CHETRD reduces a complex Hermitian matrix A to real symmetric tridiagonal form T by a unitary similarity transformation: Q**H * A * Q = T. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the Hermitian matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if UPLO = 'U', the diagonal and first superdiagonal of A are overwritten by the corresponding elements of the tridiagonal matrix T, and the elements above the first superdiagonal, with the array TAU, represent the unitary matrix Q as a product of elementary reflectors; if UPLO = 'L', the diagonal and first subdiagonal of A are over- written by the corresponding elements of the tridiagonal matrix T, and the elements below the first subdiagonal, with the array TAU, represent the unitary matrix Q as a product of elementary reflectors. See Further Details. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). D (output) REAL array, dimension (N) The diagonal elements of the tridiagonal matrix T: D(i) = A(i,i). E (output) REAL array, dimension (N-1) The off-diagonal elements of the tridiagonal matrix T: E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. TAU (output) COMPLEX array, dimension (N-1) The scalar factors of the elementary reflectors (see Further Details). WORK (workspace/output) COMPLEX array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= 1. For optimum performance LWORK >= N*NB, where NB is the optimal blocksize. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== If UPLO = 'U', the matrix Q is represented as a product of elementary reflectors Q = H(n-1) . . . H(2) H(1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in A(1:i-1,i+1), and tau in TAU(i). If UPLO = 'L', the matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(n-1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), and tau in TAU(i). The contents of A on exit are illustrated by the following examples with n = 5: if UPLO = 'U': if UPLO = 'L': ( d e v2 v3 v4 ) ( d ) ( d e v3 v4 ) ( e d ) ( d e v4 ) ( v1 e d ) ( d e ) ( v1 v2 e d ) ( d ) ( v1 v2 v3 e d ) where d and e denote diagonal and off-diagonal elements of T, and vi denotes an element of the vector defining H(i). ===================================================================== Test the input parameters Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; static real c_b23 = 1.f; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; complex q__1; /* Local variables */ static integer i, j; extern logical lsame_(char *, char *); static integer nbmin, iinfo; static logical upper; extern /* Subroutine */ int chetd2_(char *, integer *, complex *, integer *, real *, real *, complex *, integer *), cher2k_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, real *, complex *, integer *); static integer nb, kk, nx; extern /* Subroutine */ int clatrd_(char *, integer *, integer *, complex *, integer *, real *, complex *, complex *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static integer ldwork, iws; #define D(I) d[(I)-1] #define E(I) e[(I)-1] #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*lwork < 1) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("CHETRD", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { WORK(1).r = 1.f, WORK(1).i = 0.f; return 0; } /* Determine the block size. */ nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, 6L, 1L); nx = *n; iws = 1; if (nb > 1 && nb < *n) { /* Determine when to cross over from blocked to unblocked code (last block is always handled by unblocked code). Computing MAX */ i__1 = nb, i__2 = ilaenv_(&c__3, "CHETRD", uplo, n, &c_n1, &c_n1, & c_n1, 6L, 1L); nx = max(i__1,i__2); if (nx < *n) { /* Determine if workspace is large enough for blocked co de. */ ldwork = *n; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: deter mine the minimum value of NB, and reduce NB or force us e of unblocked code by setting NX = N. Computing MAX */ i__1 = *lwork / ldwork; nb = max(i__1,1); nbmin = ilaenv_(&c__2, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, 6L, 1L); if (nb < nbmin) { nx = *n; } } } else { nx = *n; } } else { nb = 1; } if (upper) { /* Reduce the upper triangle of A. Columns 1:kk are handled by the unblocked method. */ kk = *n - (*n - nx + nb - 1) / nb * nb; i__1 = kk + 1; i__2 = -nb; for (i = *n - nb + 1; -nb < 0 ? i >= kk+1 : i <= kk+1; i += -nb) { /* Reduce columns i:i+nb-1 to tridiagonal form and form the matrix W which is needed to update the unreduced part of the matrix */ i__3 = i + nb - 1; clatrd_(uplo, &i__3, &nb, &A(1,1), lda, &E(1), &TAU(1), & WORK(1), &ldwork); /* Update the unreduced submatrix A(1:i-1,1:i-1), using an update of the form: A := A - V*W' - W*V' */ i__3 = i - 1; q__1.r = -1.f, q__1.i = 0.f; cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &A(1,i), lda, &WORK(1), &ldwork, &c_b23, &A(1,1), lda); /* Copy superdiagonal elements back into A, and diagonal elements into D */ i__3 = i + nb - 1; for (j = i; j <= i+nb-1; ++j) { i__4 = j - 1 + j * a_dim1; i__5 = j - 1; A(j-1,j).r = E(j-1), A(j-1,j).i = 0.f; i__4 = j; i__5 = j + j * a_dim1; D(j) = A(j,j).r; /* L10: */ } /* L20: */ } /* Use unblocked code to reduce the last or only block */ chetd2_(uplo, &kk, &A(1,1), lda, &D(1), &E(1), &TAU(1), &iinfo); } else { /* Reduce the lower triangle of A */ i__2 = *n - nx; i__1 = nb; for (i = 1; nb < 0 ? i >= *n-nx : i <= *n-nx; i += nb) { /* Reduce columns i:i+nb-1 to tridiagonal form and form the matrix W which is needed to update the unreduced part of the matrix */ i__3 = *n - i + 1; clatrd_(uplo, &i__3, &nb, &A(i,i), lda, &E(i), &TAU(i), &WORK(1), &ldwork); /* Update the unreduced submatrix A(i+nb:n,i+nb:n), usin g an update of the form: A := A - V*W' - W*V' */ i__3 = *n - i - nb + 1; q__1.r = -1.f, q__1.i = 0.f; cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &A(i+nb,i), lda, &WORK(nb + 1), &ldwork, &c_b23, &A(i+nb,i+nb), lda); /* Copy subdiagonal elements back into A, and diagonal elements into D */ i__3 = i + nb - 1; for (j = i; j <= i+nb-1; ++j) { i__4 = j + 1 + j * a_dim1; i__5 = j; A(j+1,j).r = E(j), A(j+1,j).i = 0.f; i__4 = j; i__5 = j + j * a_dim1; D(j) = A(j,j).r; /* L30: */ } /* L40: */ } /* Use unblocked code to reduce the last or only block */ i__1 = *n - i + 1; chetd2_(uplo, &i__1, &A(i,i), lda, &D(i), &E(i), &TAU(i), & iinfo); } WORK(1).r = (real) iws, WORK(1).i = 0.f; return 0; /* End of CHETRD */ } /* chetrd_ */
/* Subroutine */ int sorml2_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c, integer *ldc, real *work, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= SORML2 overwrites the general real m by n matrix C with Q * C if SIDE = 'L' and TRANS = 'N', or Q'* C if SIDE = 'L' and TRANS = 'T', or C * Q if SIDE = 'R' and TRANS = 'N', or C * Q' if SIDE = 'R' and TRANS = 'T', where Q is a real orthogonal matrix defined as the product of k elementary reflectors Q = H(k) . . . H(2) H(1) as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n if SIDE = 'R'. Arguments ========= SIDE (input) CHARACTER*1 = 'L': apply Q or Q' from the Left = 'R': apply Q or Q' from the Right TRANS (input) CHARACTER*1 = 'N': apply Q (No transpose) = 'T': apply Q' (Transpose) M (input) INTEGER The number of rows of the matrix C. M >= 0. N (input) INTEGER The number of columns of the matrix C. N >= 0. K (input) INTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = 'L', M >= K >= 0; if SIDE = 'R', N >= K >= 0. A (input) REAL array, dimension (LDA,M) if SIDE = 'L', (LDA,N) if SIDE = 'R' The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by SGELQF in the first k rows of its array argument A. A is modified by the routine but restored on exit. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,K). TAU (input) REAL array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by SGELQF. C (input/output) REAL array, dimension (LDC,N) On entry, the m by n matrix C. On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,M). WORK (workspace) REAL array, dimension (N) if SIDE = 'L', (M) if SIDE = 'R' INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; /* Local variables */ static logical left; static integer i; extern logical lsame_(char *, char *); extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, integer *, real *, real *, integer *, real *); static integer i1, i2, i3, ic, jc, mi, ni, nq; extern /* Subroutine */ int xerbla_(char *, integer *); static logical notran; static real aii; #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define C(I,J) c[(I)-1 + ((J)-1)* ( *ldc)] *info = 0; left = lsame_(side, "L"); notran = lsame_(trans, "N"); /* NQ is the order of Q */ if (left) { nq = *m; } else { nq = *n; } if (! left && ! lsame_(side, "R")) { *info = -1; } else if (! notran && ! lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,*k)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } if (*info != 0) { i__1 = -(*info); xerbla_("SORML2", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { return 0; } if (left && notran || ! left && ! notran) { i1 = 1; i2 = *k; i3 = 1; } else { i1 = *k; i2 = 1; i3 = -1; } if (left) { ni = *n; jc = 1; } else { mi = *m; ic = 1; } i__1 = i2; i__2 = i3; for (i = i1; i3 < 0 ? i >= i2 : i <= i2; i += i3) { if (left) { /* H(i) is applied to C(i:m,1:n) */ mi = *m - i + 1; ic = i; } else { /* H(i) is applied to C(1:m,i:n) */ ni = *n - i + 1; jc = i; } /* Apply H(i) */ aii = A(i,i); A(i,i) = 1.f; slarf_(side, &mi, &ni, &A(i,i), lda, &TAU(i), &C(ic,jc), ldc, &WORK(1)); A(i,i) = aii; /* L10: */ } return 0; /* End of SORML2 */ } /* sorml2_ */
/* Subroutine */ int cgeql2_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CGEQL2 computes a QL factorization of a complex m by n matrix A: A = Q * L. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the m by n matrix A. On exit, if m >= n, the lower triangle of the subarray A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; if m <= n, the elements on and below the (n-m)-th superdiagonal contain the m by n lower trapezoidal matrix L; the remaining elements, with the array TAU, represent the unitary matrix Q as a product of elementary reflectors (see Further Details). LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). TAU (output) COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details). WORK (workspace) COMPLEX array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== The matrix Q is represented as a product of elementary reflectors Q = H(k) . . . H(2) H(1), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in A(1:m-k+i-1,n-k+i), and tau in TAU(i). ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; complex q__1; /* Builtin functions */ void r_cnjg(complex *, complex *); /* Local variables */ static integer i, k; static complex alpha; extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * , integer *, complex *, complex *, integer *, complex *), clarfg_(integer *, complex *, complex *, integer *, complex *), xerbla_(char *, integer *); #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("CGEQL2", &i__1); return 0; } k = min(*m,*n); for (i = k; i >= 1; --i) { /* Generate elementary reflector H(i) to annihilate A(1:m-k+i-1,n-k+i) */ i__1 = *m - k + i + (*n - k + i) * a_dim1; alpha.r = A(*m-k+i,*n-k+i).r, alpha.i = A(*m-k+i,*n-k+i).i; i__1 = *m - k + i; clarfg_(&i__1, &alpha, &A(1,*n-k+i), &c__1, &TAU(i)); /* Apply H(i)' to A(1:m-k+i,1:n-k+i-1) from the left */ i__1 = *m - k + i + (*n - k + i) * a_dim1; A(*m-k+i,*n-k+i).r = 1.f, A(*m-k+i,*n-k+i).i = 0.f; i__1 = *m - k + i; i__2 = *n - k + i - 1; r_cnjg(&q__1, &TAU(i)); clarf_("Left", &i__1, &i__2, &A(1,*n-k+i), &c__1, & q__1, &A(1,1), lda, &WORK(1)); i__1 = *m - k + i + (*n - k + i) * a_dim1; A(*m-k+i,*n-k+i).r = alpha.r, A(*m-k+i,*n-k+i).i = alpha.i; /* L10: */ } return 0; /* End of CGEQL2 */ } /* cgeql2_ */
/* Subroutine */ int dtzrqf_(integer *m, integer *n, doublereal *a, integer * lda, doublereal *tau, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DTZRQF reduces the M-by-N ( M<=N ) real upper trapezoidal matrix A to upper triangular form by means of orthogonal transformations. The upper trapezoidal matrix A is factored as A = ( R 0 ) * Z, where Z is an N-by-N orthogonal matrix and R is an M-by-M upper triangular matrix. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= M. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the leading M-by-N upper trapezoidal part of the array A must contain the matrix to be factorized. On exit, the leading M-by-M upper triangular part of A contains the upper triangular matrix R, and elements M+1 to N of the first M rows of A, with the array TAU, represent the orthogonal matrix Z as a product of M elementary reflectors. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). TAU (output) DOUBLE PRECISION array, dimension (M) The scalar factors of the elementary reflectors. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== The factorization is obtained by Householder's method. The kth transformation matrix, Z( k ), which is used to introduce zeros into the ( m - k + 1 )th row of A, is given in the form Z( k ) = ( I 0 ), ( 0 T( k ) ) where T( k ) = I - tau*u( k )*u( k )', u( k ) = ( 1 ), ( 0 ) ( z( k ) ) tau is a scalar and z( k ) is an ( n - m ) element vector. tau and z( k ) are chosen to annihilate the elements of the kth row of X. The scalar tau is returned in the kth element of TAU and the vector u( k ) in the kth row of A, such that the elements of z( k ) are in a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in the upper triangular part of A. Z is given by Z = Z( 1 ) * Z( 2 ) * ... * Z( m ). ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b8 = 1.; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; doublereal d__1; /* Local variables */ extern /* Subroutine */ int dger_(integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *); static integer i, k; extern /* Subroutine */ int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *) ; static integer m1; extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *); #define TAU(I) tau[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; if (*m < 0) { *info = -1; } else if (*n < *m) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("DTZRQF", &i__1); return 0; } /* Perform the factorization. */ if (*m == 0) { return 0; } if (*m == *n) { i__1 = *n; for (i = 1; i <= *n; ++i) { TAU(i) = 0.; /* L10: */ } } else { /* Computing MIN */ i__1 = *m + 1; m1 = min(i__1,*n); for (k = *m; k >= 1; --k) { /* Use a Householder reflection to zero the kth row of A . First set up the reflection. */ i__1 = *n - *m + 1; dlarfg_(&i__1, &A(k,k), &A(k,m1), lda, &TAU( k)); if (TAU(k) != 0. && k > 1) { /* We now perform the operation A := A*P( k ). Use the first ( k - 1 ) elements of TAU to sto re a( k ), where a( k ) consists of the first ( k - 1 ) elements of the kth column of A. Also let B denote the first ( k - 1 ) rows of the last ( n - m ) columns o f A. */ i__1 = k - 1; dcopy_(&i__1, &A(1,k), &c__1, &TAU(1), &c__1); /* Form w = a( k ) + B*z( k ) in TAU. */ i__1 = k - 1; i__2 = *n - *m; dgemv_("No transpose", &i__1, &i__2, &c_b8, &A(1,m1), lda, &A(k,m1), lda, &c_b8, &TAU(1), & c__1); /* Now form a( k ) := a( k ) - tau*w and B := B - tau*w*z( k )'. */ i__1 = k - 1; d__1 = -TAU(k); daxpy_(&i__1, &d__1, &TAU(1), &c__1, &A(1,k), & c__1); i__1 = k - 1; i__2 = *n - *m; d__1 = -TAU(k); dger_(&i__1, &i__2, &d__1, &TAU(1), &c__1, &A(k,m1) , lda, &A(1,m1), lda); } /* L20: */ } } return 0; /* End of DTZRQF */ } /* dtzrqf_ */